diff --git "a/PAM-3B/trainer_state.json" "b/PAM-3B/trainer_state.json" new file mode 100644--- /dev/null +++ "b/PAM-3B/trainer_state.json" @@ -0,0 +1,343021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9809073392888422, + "eval_steps": 500, + "global_step": 49000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.0018517128343718e-05, + "grad_norm": 1.1067396402359009, + "learning_rate": 6.671114076050701e-09, + "loss": 0.4315, + "step": 1 + }, + { + "epoch": 4.0037034256687435e-05, + "grad_norm": 1.0725362300872803, + "learning_rate": 1.3342228152101401e-08, + "loss": 0.3445, + "step": 2 + }, + { + "epoch": 6.005555138503115e-05, + "grad_norm": 0.97669517993927, + "learning_rate": 2.0013342228152105e-08, + "loss": 0.2993, + "step": 3 + }, + { + "epoch": 8.007406851337487e-05, + "grad_norm": 1.0655372142791748, + "learning_rate": 2.6684456304202803e-08, + "loss": 0.3243, + "step": 4 + }, + { + "epoch": 0.0001000925856417186, + "grad_norm": 0.8762655854225159, + "learning_rate": 3.3355570380253504e-08, + "loss": 0.33, + "step": 5 + }, + { + "epoch": 0.0001201111027700623, + "grad_norm": 0.9498075842857361, + "learning_rate": 4.002668445630421e-08, + "loss": 0.2853, + "step": 6 + }, + { + "epoch": 0.00014012961989840603, + "grad_norm": 1.2386378049850464, + "learning_rate": 4.6697798532354914e-08, + "loss": 0.3552, + "step": 7 + }, + { + "epoch": 0.00016014813702674974, + "grad_norm": 1.0732017755508423, + "learning_rate": 5.3368912608405606e-08, + "loss": 0.3583, + "step": 8 + }, + { + "epoch": 0.00018016665415509345, + "grad_norm": 1.0446075201034546, + "learning_rate": 6.00400266844563e-08, + "loss": 0.3573, + "step": 9 + }, + { + "epoch": 0.0002001851712834372, + "grad_norm": 0.9875965118408203, + "learning_rate": 6.671114076050701e-08, + "loss": 0.316, + "step": 10 + }, + { + "epoch": 0.0002202036884117809, + "grad_norm": 1.0224372148513794, + "learning_rate": 7.338225483655771e-08, + "loss": 0.3068, + "step": 11 + }, + { + "epoch": 0.0002402222055401246, + "grad_norm": 1.0233546495437622, + "learning_rate": 8.005336891260842e-08, + "loss": 0.3017, + "step": 12 + }, + { + "epoch": 0.0002602407226684683, + "grad_norm": 1.0910632610321045, + "learning_rate": 8.672448298865911e-08, + "loss": 0.3607, + "step": 13 + }, + { + "epoch": 0.00028025923979681206, + "grad_norm": 1.0302242040634155, + "learning_rate": 9.339559706470983e-08, + "loss": 0.3064, + "step": 14 + }, + { + "epoch": 0.00030027775692515574, + "grad_norm": 1.076331377029419, + "learning_rate": 1.0006671114076052e-07, + "loss": 0.3676, + "step": 15 + }, + { + "epoch": 0.0003202962740534995, + "grad_norm": 1.1522259712219238, + "learning_rate": 1.0673782521681121e-07, + "loss": 0.3133, + "step": 16 + }, + { + "epoch": 0.0003403147911818432, + "grad_norm": 1.8172268867492676, + "learning_rate": 1.1340893929286192e-07, + "loss": 0.8751, + "step": 17 + }, + { + "epoch": 0.0003603333083101869, + "grad_norm": 1.039529800415039, + "learning_rate": 1.200800533689126e-07, + "loss": 0.3137, + "step": 18 + }, + { + "epoch": 0.00038035182543853064, + "grad_norm": 1.0794923305511475, + "learning_rate": 1.267511674449633e-07, + "loss": 0.3354, + "step": 19 + }, + { + "epoch": 0.0004003703425668744, + "grad_norm": 1.170980453491211, + "learning_rate": 1.3342228152101402e-07, + "loss": 0.3494, + "step": 20 + }, + { + "epoch": 0.00042038885969521806, + "grad_norm": 1.122810959815979, + "learning_rate": 1.4009339559706472e-07, + "loss": 0.3221, + "step": 21 + }, + { + "epoch": 0.0004404073768235618, + "grad_norm": 1.1371028423309326, + "learning_rate": 1.4676450967311543e-07, + "loss": 0.2987, + "step": 22 + }, + { + "epoch": 0.0004604258939519055, + "grad_norm": 1.0752114057540894, + "learning_rate": 1.5343562374916613e-07, + "loss": 0.3248, + "step": 23 + }, + { + "epoch": 0.0004804444110802492, + "grad_norm": 0.9785743355751038, + "learning_rate": 1.6010673782521684e-07, + "loss": 0.3867, + "step": 24 + }, + { + "epoch": 0.0005004629282085929, + "grad_norm": 1.0122798681259155, + "learning_rate": 1.6677785190126754e-07, + "loss": 0.3551, + "step": 25 + }, + { + "epoch": 0.0005204814453369366, + "grad_norm": 1.1035405397415161, + "learning_rate": 1.7344896597731822e-07, + "loss": 0.3329, + "step": 26 + }, + { + "epoch": 0.0005404999624652804, + "grad_norm": 1.0019606351852417, + "learning_rate": 1.8012008005336893e-07, + "loss": 0.3818, + "step": 27 + }, + { + "epoch": 0.0005605184795936241, + "grad_norm": 1.1014189720153809, + "learning_rate": 1.8679119412941966e-07, + "loss": 0.3445, + "step": 28 + }, + { + "epoch": 0.0005805369967219679, + "grad_norm": 1.25911545753479, + "learning_rate": 1.9346230820547034e-07, + "loss": 0.3866, + "step": 29 + }, + { + "epoch": 0.0006005555138503115, + "grad_norm": 1.1129298210144043, + "learning_rate": 2.0013342228152104e-07, + "loss": 0.3346, + "step": 30 + }, + { + "epoch": 0.0006205740309786552, + "grad_norm": 1.0189913511276245, + "learning_rate": 2.0680453635757172e-07, + "loss": 0.3404, + "step": 31 + }, + { + "epoch": 0.000640592548106999, + "grad_norm": 0.9685162305831909, + "learning_rate": 2.1347565043362242e-07, + "loss": 0.3269, + "step": 32 + }, + { + "epoch": 0.0006606110652353427, + "grad_norm": 1.0794732570648193, + "learning_rate": 2.2014676450967315e-07, + "loss": 0.3694, + "step": 33 + }, + { + "epoch": 0.0006806295823636864, + "grad_norm": 1.0381780862808228, + "learning_rate": 2.2681787858572383e-07, + "loss": 0.3164, + "step": 34 + }, + { + "epoch": 0.0007006480994920302, + "grad_norm": 1.0603187084197998, + "learning_rate": 2.3348899266177454e-07, + "loss": 0.3298, + "step": 35 + }, + { + "epoch": 0.0007206666166203738, + "grad_norm": 1.8474775552749634, + "learning_rate": 2.401601067378252e-07, + "loss": 0.8961, + "step": 36 + }, + { + "epoch": 0.0007406851337487175, + "grad_norm": 1.087700605392456, + "learning_rate": 2.46831220813876e-07, + "loss": 0.3555, + "step": 37 + }, + { + "epoch": 0.0007607036508770613, + "grad_norm": 1.0459905862808228, + "learning_rate": 2.535023348899266e-07, + "loss": 0.3197, + "step": 38 + }, + { + "epoch": 0.000780722168005405, + "grad_norm": 1.0142173767089844, + "learning_rate": 2.6017344896597733e-07, + "loss": 0.3418, + "step": 39 + }, + { + "epoch": 0.0008007406851337488, + "grad_norm": 1.8717455863952637, + "learning_rate": 2.6684456304202804e-07, + "loss": 0.3534, + "step": 40 + }, + { + "epoch": 0.0008207592022620924, + "grad_norm": 1.027757167816162, + "learning_rate": 2.7351567711807874e-07, + "loss": 0.3595, + "step": 41 + }, + { + "epoch": 0.0008407777193904361, + "grad_norm": 1.0134693384170532, + "learning_rate": 2.8018679119412945e-07, + "loss": 0.312, + "step": 42 + }, + { + "epoch": 0.0008607962365187799, + "grad_norm": 1.175888180732727, + "learning_rate": 2.8685790527018015e-07, + "loss": 0.3454, + "step": 43 + }, + { + "epoch": 0.0008808147536471236, + "grad_norm": 1.048987865447998, + "learning_rate": 2.9352901934623086e-07, + "loss": 0.3362, + "step": 44 + }, + { + "epoch": 0.0009008332707754673, + "grad_norm": 1.0333203077316284, + "learning_rate": 3.0020013342228156e-07, + "loss": 0.3471, + "step": 45 + }, + { + "epoch": 0.000920851787903811, + "grad_norm": 1.1571308374404907, + "learning_rate": 3.0687124749833226e-07, + "loss": 0.3289, + "step": 46 + }, + { + "epoch": 0.0009408703050321547, + "grad_norm": 0.9667331576347351, + "learning_rate": 3.1354236157438297e-07, + "loss": 0.3637, + "step": 47 + }, + { + "epoch": 0.0009608888221604984, + "grad_norm": 1.1592222452163696, + "learning_rate": 3.202134756504337e-07, + "loss": 0.3429, + "step": 48 + }, + { + "epoch": 0.000980907339288842, + "grad_norm": 0.9826697707176208, + "learning_rate": 3.2688458972648433e-07, + "loss": 0.345, + "step": 49 + }, + { + "epoch": 0.0010009258564171858, + "grad_norm": 0.9778319597244263, + "learning_rate": 3.335557038025351e-07, + "loss": 0.294, + "step": 50 + }, + { + "epoch": 0.0010209443735455296, + "grad_norm": 0.9872627258300781, + "learning_rate": 3.4022681787858574e-07, + "loss": 0.3273, + "step": 51 + }, + { + "epoch": 0.0010409628906738733, + "grad_norm": 0.9080860614776611, + "learning_rate": 3.4689793195463644e-07, + "loss": 0.329, + "step": 52 + }, + { + "epoch": 0.001060981407802217, + "grad_norm": 1.017682433128357, + "learning_rate": 3.535690460306872e-07, + "loss": 0.3333, + "step": 53 + }, + { + "epoch": 0.0010809999249305608, + "grad_norm": 1.0238699913024902, + "learning_rate": 3.6024016010673785e-07, + "loss": 0.3517, + "step": 54 + }, + { + "epoch": 0.0011010184420589045, + "grad_norm": 1.2585257291793823, + "learning_rate": 3.6691127418278856e-07, + "loss": 0.4341, + "step": 55 + }, + { + "epoch": 0.0011210369591872482, + "grad_norm": 1.1194818019866943, + "learning_rate": 3.735823882588393e-07, + "loss": 0.3454, + "step": 56 + }, + { + "epoch": 0.001141055476315592, + "grad_norm": 0.9502558708190918, + "learning_rate": 3.8025350233488997e-07, + "loss": 0.3624, + "step": 57 + }, + { + "epoch": 0.0011610739934439357, + "grad_norm": 0.9737577438354492, + "learning_rate": 3.8692461641094067e-07, + "loss": 0.3082, + "step": 58 + }, + { + "epoch": 0.0011810925105722795, + "grad_norm": 1.142785668373108, + "learning_rate": 3.935957304869913e-07, + "loss": 0.284, + "step": 59 + }, + { + "epoch": 0.001201111027700623, + "grad_norm": 1.1513887643814087, + "learning_rate": 4.002668445630421e-07, + "loss": 0.3974, + "step": 60 + }, + { + "epoch": 0.0012211295448289667, + "grad_norm": 1.0596894025802612, + "learning_rate": 4.069379586390928e-07, + "loss": 0.3435, + "step": 61 + }, + { + "epoch": 0.0012411480619573105, + "grad_norm": 1.0198768377304077, + "learning_rate": 4.1360907271514344e-07, + "loss": 0.3706, + "step": 62 + }, + { + "epoch": 0.0012611665790856542, + "grad_norm": 1.7840301990509033, + "learning_rate": 4.202801867911942e-07, + "loss": 0.8184, + "step": 63 + }, + { + "epoch": 0.001281185096213998, + "grad_norm": 1.0830214023590088, + "learning_rate": 4.2695130086724485e-07, + "loss": 0.3467, + "step": 64 + }, + { + "epoch": 0.0013012036133423417, + "grad_norm": 1.7385045289993286, + "learning_rate": 4.3362241494329555e-07, + "loss": 0.8842, + "step": 65 + }, + { + "epoch": 0.0013212221304706854, + "grad_norm": 1.0867854356765747, + "learning_rate": 4.402935290193463e-07, + "loss": 0.3372, + "step": 66 + }, + { + "epoch": 0.0013412406475990291, + "grad_norm": 1.040658950805664, + "learning_rate": 4.4696464309539696e-07, + "loss": 0.3723, + "step": 67 + }, + { + "epoch": 0.0013612591647273729, + "grad_norm": 0.9857014417648315, + "learning_rate": 4.5363575717144767e-07, + "loss": 0.3355, + "step": 68 + }, + { + "epoch": 0.0013812776818557166, + "grad_norm": 0.988731324672699, + "learning_rate": 4.603068712474984e-07, + "loss": 0.3099, + "step": 69 + }, + { + "epoch": 0.0014012961989840604, + "grad_norm": 1.0434221029281616, + "learning_rate": 4.669779853235491e-07, + "loss": 0.3448, + "step": 70 + }, + { + "epoch": 0.0014213147161124039, + "grad_norm": 1.0196505784988403, + "learning_rate": 4.736490993995998e-07, + "loss": 0.3099, + "step": 71 + }, + { + "epoch": 0.0014413332332407476, + "grad_norm": 1.132448673248291, + "learning_rate": 4.803202134756504e-07, + "loss": 0.3288, + "step": 72 + }, + { + "epoch": 0.0014613517503690914, + "grad_norm": 1.1343308687210083, + "learning_rate": 4.869913275517012e-07, + "loss": 0.3795, + "step": 73 + }, + { + "epoch": 0.001481370267497435, + "grad_norm": 1.018228530883789, + "learning_rate": 4.93662441627752e-07, + "loss": 0.3463, + "step": 74 + }, + { + "epoch": 0.0015013887846257788, + "grad_norm": 1.1776738166809082, + "learning_rate": 5.003335557038026e-07, + "loss": 0.3752, + "step": 75 + }, + { + "epoch": 0.0015214073017541226, + "grad_norm": 0.9954798817634583, + "learning_rate": 5.070046697798533e-07, + "loss": 0.3357, + "step": 76 + }, + { + "epoch": 0.0015414258188824663, + "grad_norm": 0.9657215476036072, + "learning_rate": 5.13675783855904e-07, + "loss": 0.3135, + "step": 77 + }, + { + "epoch": 0.00156144433601081, + "grad_norm": 0.9859840869903564, + "learning_rate": 5.203468979319547e-07, + "loss": 0.339, + "step": 78 + }, + { + "epoch": 0.0015814628531391538, + "grad_norm": 1.0755646228790283, + "learning_rate": 5.270180120080054e-07, + "loss": 0.3694, + "step": 79 + }, + { + "epoch": 0.0016014813702674975, + "grad_norm": 1.0204200744628906, + "learning_rate": 5.336891260840561e-07, + "loss": 0.3322, + "step": 80 + }, + { + "epoch": 0.0016214998873958413, + "grad_norm": 0.9571223258972168, + "learning_rate": 5.403602401601068e-07, + "loss": 0.2972, + "step": 81 + }, + { + "epoch": 0.0016415184045241848, + "grad_norm": 1.0934810638427734, + "learning_rate": 5.470313542361575e-07, + "loss": 0.3768, + "step": 82 + }, + { + "epoch": 0.0016615369216525285, + "grad_norm": 1.7731283903121948, + "learning_rate": 5.537024683122081e-07, + "loss": 0.9365, + "step": 83 + }, + { + "epoch": 0.0016815554387808723, + "grad_norm": 1.8018783330917358, + "learning_rate": 5.603735823882589e-07, + "loss": 0.9206, + "step": 84 + }, + { + "epoch": 0.001701573955909216, + "grad_norm": 1.0727746486663818, + "learning_rate": 5.670446964643095e-07, + "loss": 0.3491, + "step": 85 + }, + { + "epoch": 0.0017215924730375597, + "grad_norm": 0.98478764295578, + "learning_rate": 5.737158105403603e-07, + "loss": 0.2928, + "step": 86 + }, + { + "epoch": 0.0017416109901659035, + "grad_norm": 0.9946699738502502, + "learning_rate": 5.803869246164111e-07, + "loss": 0.3066, + "step": 87 + }, + { + "epoch": 0.0017616295072942472, + "grad_norm": 1.1565228700637817, + "learning_rate": 5.870580386924617e-07, + "loss": 0.3761, + "step": 88 + }, + { + "epoch": 0.001781648024422591, + "grad_norm": 1.110931634902954, + "learning_rate": 5.937291527685124e-07, + "loss": 0.3117, + "step": 89 + }, + { + "epoch": 0.0018016665415509347, + "grad_norm": 0.9905818700790405, + "learning_rate": 6.004002668445631e-07, + "loss": 0.3258, + "step": 90 + }, + { + "epoch": 0.0018216850586792784, + "grad_norm": 1.0751181840896606, + "learning_rate": 6.070713809206138e-07, + "loss": 0.3739, + "step": 91 + }, + { + "epoch": 0.001841703575807622, + "grad_norm": 1.1745609045028687, + "learning_rate": 6.137424949966645e-07, + "loss": 0.362, + "step": 92 + }, + { + "epoch": 0.0018617220929359657, + "grad_norm": 1.1062904596328735, + "learning_rate": 6.204136090727152e-07, + "loss": 0.3358, + "step": 93 + }, + { + "epoch": 0.0018817406100643094, + "grad_norm": 0.9975125193595886, + "learning_rate": 6.270847231487659e-07, + "loss": 0.306, + "step": 94 + }, + { + "epoch": 0.0019017591271926532, + "grad_norm": 1.0093201398849487, + "learning_rate": 6.337558372248166e-07, + "loss": 0.3557, + "step": 95 + }, + { + "epoch": 0.001921777644320997, + "grad_norm": 0.9739794135093689, + "learning_rate": 6.404269513008673e-07, + "loss": 0.3334, + "step": 96 + }, + { + "epoch": 0.0019417961614493406, + "grad_norm": 1.7329802513122559, + "learning_rate": 6.470980653769179e-07, + "loss": 0.8976, + "step": 97 + }, + { + "epoch": 0.001961814678577684, + "grad_norm": 0.9927840828895569, + "learning_rate": 6.537691794529687e-07, + "loss": 0.3453, + "step": 98 + }, + { + "epoch": 0.001981833195706028, + "grad_norm": 1.0084737539291382, + "learning_rate": 6.604402935290194e-07, + "loss": 0.3432, + "step": 99 + }, + { + "epoch": 0.0020018517128343716, + "grad_norm": 1.0927097797393799, + "learning_rate": 6.671114076050702e-07, + "loss": 0.3546, + "step": 100 + }, + { + "epoch": 0.0020218702299627154, + "grad_norm": 1.051785945892334, + "learning_rate": 6.737825216811208e-07, + "loss": 0.3399, + "step": 101 + }, + { + "epoch": 0.002041888747091059, + "grad_norm": 1.1340142488479614, + "learning_rate": 6.804536357571715e-07, + "loss": 0.384, + "step": 102 + }, + { + "epoch": 0.002061907264219403, + "grad_norm": 1.0251572132110596, + "learning_rate": 6.871247498332221e-07, + "loss": 0.3289, + "step": 103 + }, + { + "epoch": 0.0020819257813477466, + "grad_norm": 1.0008225440979004, + "learning_rate": 6.937958639092729e-07, + "loss": 0.3198, + "step": 104 + }, + { + "epoch": 0.0021019442984760903, + "grad_norm": 1.7709410190582275, + "learning_rate": 7.004669779853236e-07, + "loss": 0.9205, + "step": 105 + }, + { + "epoch": 0.002121962815604434, + "grad_norm": 1.0657304525375366, + "learning_rate": 7.071380920613744e-07, + "loss": 0.358, + "step": 106 + }, + { + "epoch": 0.002141981332732778, + "grad_norm": 1.0649075508117676, + "learning_rate": 7.13809206137425e-07, + "loss": 0.3453, + "step": 107 + }, + { + "epoch": 0.0021619998498611215, + "grad_norm": 1.1097196340560913, + "learning_rate": 7.204803202134757e-07, + "loss": 0.3443, + "step": 108 + }, + { + "epoch": 0.0021820183669894653, + "grad_norm": 1.021958589553833, + "learning_rate": 7.271514342895264e-07, + "loss": 0.3116, + "step": 109 + }, + { + "epoch": 0.002202036884117809, + "grad_norm": 1.1110048294067383, + "learning_rate": 7.338225483655771e-07, + "loss": 0.2938, + "step": 110 + }, + { + "epoch": 0.0022220554012461527, + "grad_norm": 1.1143723726272583, + "learning_rate": 7.404936624416279e-07, + "loss": 0.3933, + "step": 111 + }, + { + "epoch": 0.0022420739183744965, + "grad_norm": 1.090186595916748, + "learning_rate": 7.471647765176786e-07, + "loss": 0.3568, + "step": 112 + }, + { + "epoch": 0.0022620924355028402, + "grad_norm": 1.324747920036316, + "learning_rate": 7.538358905937292e-07, + "loss": 0.4214, + "step": 113 + }, + { + "epoch": 0.002282110952631184, + "grad_norm": 0.9432870149612427, + "learning_rate": 7.605070046697799e-07, + "loss": 0.3722, + "step": 114 + }, + { + "epoch": 0.0023021294697595277, + "grad_norm": 1.1803218126296997, + "learning_rate": 7.671781187458306e-07, + "loss": 0.3419, + "step": 115 + }, + { + "epoch": 0.0023221479868878714, + "grad_norm": 1.0272248983383179, + "learning_rate": 7.738492328218813e-07, + "loss": 0.351, + "step": 116 + }, + { + "epoch": 0.002342166504016215, + "grad_norm": 1.1677035093307495, + "learning_rate": 7.805203468979321e-07, + "loss": 0.3208, + "step": 117 + }, + { + "epoch": 0.002362185021144559, + "grad_norm": 1.0401628017425537, + "learning_rate": 7.871914609739826e-07, + "loss": 0.3405, + "step": 118 + }, + { + "epoch": 0.0023822035382729027, + "grad_norm": 1.0827909708023071, + "learning_rate": 7.938625750500334e-07, + "loss": 0.3418, + "step": 119 + }, + { + "epoch": 0.002402222055401246, + "grad_norm": 1.04204261302948, + "learning_rate": 8.005336891260842e-07, + "loss": 0.332, + "step": 120 + }, + { + "epoch": 0.0024222405725295897, + "grad_norm": 1.0163073539733887, + "learning_rate": 8.072048032021348e-07, + "loss": 0.3394, + "step": 121 + }, + { + "epoch": 0.0024422590896579334, + "grad_norm": 1.0605405569076538, + "learning_rate": 8.138759172781856e-07, + "loss": 0.3648, + "step": 122 + }, + { + "epoch": 0.002462277606786277, + "grad_norm": 1.0280210971832275, + "learning_rate": 8.205470313542361e-07, + "loss": 0.3712, + "step": 123 + }, + { + "epoch": 0.002482296123914621, + "grad_norm": 0.9879762530326843, + "learning_rate": 8.272181454302869e-07, + "loss": 0.3572, + "step": 124 + }, + { + "epoch": 0.0025023146410429646, + "grad_norm": 1.2364951372146606, + "learning_rate": 8.338892595063376e-07, + "loss": 0.3298, + "step": 125 + }, + { + "epoch": 0.0025223331581713084, + "grad_norm": 1.0359711647033691, + "learning_rate": 8.405603735823884e-07, + "loss": 0.3159, + "step": 126 + }, + { + "epoch": 0.002542351675299652, + "grad_norm": 1.0296694040298462, + "learning_rate": 8.47231487658439e-07, + "loss": 0.3848, + "step": 127 + }, + { + "epoch": 0.002562370192427996, + "grad_norm": 0.9627702832221985, + "learning_rate": 8.539026017344897e-07, + "loss": 0.3267, + "step": 128 + }, + { + "epoch": 0.0025823887095563396, + "grad_norm": 1.012309193611145, + "learning_rate": 8.605737158105403e-07, + "loss": 0.3583, + "step": 129 + }, + { + "epoch": 0.0026024072266846833, + "grad_norm": 1.0465233325958252, + "learning_rate": 8.672448298865911e-07, + "loss": 0.3152, + "step": 130 + }, + { + "epoch": 0.002622425743813027, + "grad_norm": 1.0889161825180054, + "learning_rate": 8.739159439626419e-07, + "loss": 0.4065, + "step": 131 + }, + { + "epoch": 0.002642444260941371, + "grad_norm": 1.7590560913085938, + "learning_rate": 8.805870580386926e-07, + "loss": 0.8468, + "step": 132 + }, + { + "epoch": 0.0026624627780697145, + "grad_norm": 1.0346946716308594, + "learning_rate": 8.872581721147433e-07, + "loss": 0.3536, + "step": 133 + }, + { + "epoch": 0.0026824812951980583, + "grad_norm": 1.0108641386032104, + "learning_rate": 8.939292861907939e-07, + "loss": 0.3568, + "step": 134 + }, + { + "epoch": 0.002702499812326402, + "grad_norm": 1.0371404886245728, + "learning_rate": 9.006004002668446e-07, + "loss": 0.3298, + "step": 135 + }, + { + "epoch": 0.0027225183294547458, + "grad_norm": 0.9999841451644897, + "learning_rate": 9.072715143428953e-07, + "loss": 0.3346, + "step": 136 + }, + { + "epoch": 0.0027425368465830895, + "grad_norm": 1.0597223043441772, + "learning_rate": 9.139426284189461e-07, + "loss": 0.3369, + "step": 137 + }, + { + "epoch": 0.0027625553637114332, + "grad_norm": 1.14918851852417, + "learning_rate": 9.206137424949968e-07, + "loss": 0.3747, + "step": 138 + }, + { + "epoch": 0.002782573880839777, + "grad_norm": 1.1091169118881226, + "learning_rate": 9.272848565710474e-07, + "loss": 0.3729, + "step": 139 + }, + { + "epoch": 0.0028025923979681207, + "grad_norm": 1.8006303310394287, + "learning_rate": 9.339559706470982e-07, + "loss": 0.8698, + "step": 140 + }, + { + "epoch": 0.002822610915096464, + "grad_norm": 1.021198034286499, + "learning_rate": 9.406270847231488e-07, + "loss": 0.2873, + "step": 141 + }, + { + "epoch": 0.0028426294322248078, + "grad_norm": 0.9878223538398743, + "learning_rate": 9.472981987991996e-07, + "loss": 0.3451, + "step": 142 + }, + { + "epoch": 0.0028626479493531515, + "grad_norm": 1.2528680562973022, + "learning_rate": 9.539693128752503e-07, + "loss": 0.3595, + "step": 143 + }, + { + "epoch": 0.0028826664664814952, + "grad_norm": 1.165534496307373, + "learning_rate": 9.606404269513009e-07, + "loss": 0.357, + "step": 144 + }, + { + "epoch": 0.002902684983609839, + "grad_norm": 1.0182348489761353, + "learning_rate": 9.673115410273516e-07, + "loss": 0.3106, + "step": 145 + }, + { + "epoch": 0.0029227035007381827, + "grad_norm": 1.086139440536499, + "learning_rate": 9.739826551034024e-07, + "loss": 0.3398, + "step": 146 + }, + { + "epoch": 0.0029427220178665264, + "grad_norm": 1.1960740089416504, + "learning_rate": 9.806537691794531e-07, + "loss": 0.3395, + "step": 147 + }, + { + "epoch": 0.00296274053499487, + "grad_norm": 1.073056936264038, + "learning_rate": 9.87324883255504e-07, + "loss": 0.3454, + "step": 148 + }, + { + "epoch": 0.002982759052123214, + "grad_norm": 1.0450530052185059, + "learning_rate": 9.939959973315544e-07, + "loss": 0.3553, + "step": 149 + }, + { + "epoch": 0.0030027775692515577, + "grad_norm": 1.1226109266281128, + "learning_rate": 1.0006671114076052e-06, + "loss": 0.3655, + "step": 150 + }, + { + "epoch": 0.0030227960863799014, + "grad_norm": 1.2579210996627808, + "learning_rate": 1.0073382254836557e-06, + "loss": 0.3876, + "step": 151 + }, + { + "epoch": 0.003042814603508245, + "grad_norm": 1.0431383848190308, + "learning_rate": 1.0140093395597065e-06, + "loss": 0.3386, + "step": 152 + }, + { + "epoch": 0.003062833120636589, + "grad_norm": 1.0774379968643188, + "learning_rate": 1.0206804536357573e-06, + "loss": 0.3752, + "step": 153 + }, + { + "epoch": 0.0030828516377649326, + "grad_norm": 1.0190558433532715, + "learning_rate": 1.027351567711808e-06, + "loss": 0.3275, + "step": 154 + }, + { + "epoch": 0.0031028701548932764, + "grad_norm": 0.9620422720909119, + "learning_rate": 1.0340226817878586e-06, + "loss": 0.3392, + "step": 155 + }, + { + "epoch": 0.00312288867202162, + "grad_norm": 1.0382415056228638, + "learning_rate": 1.0406937958639093e-06, + "loss": 0.3742, + "step": 156 + }, + { + "epoch": 0.003142907189149964, + "grad_norm": 1.0754891633987427, + "learning_rate": 1.04736490993996e-06, + "loss": 0.3622, + "step": 157 + }, + { + "epoch": 0.0031629257062783076, + "grad_norm": 1.0172077417373657, + "learning_rate": 1.0540360240160108e-06, + "loss": 0.3393, + "step": 158 + }, + { + "epoch": 0.0031829442234066513, + "grad_norm": 0.9822896122932434, + "learning_rate": 1.0607071380920616e-06, + "loss": 0.3304, + "step": 159 + }, + { + "epoch": 0.003202962740534995, + "grad_norm": 1.0372042655944824, + "learning_rate": 1.0673782521681121e-06, + "loss": 0.2922, + "step": 160 + }, + { + "epoch": 0.0032229812576633388, + "grad_norm": 1.2081612348556519, + "learning_rate": 1.074049366244163e-06, + "loss": 0.3532, + "step": 161 + }, + { + "epoch": 0.0032429997747916825, + "grad_norm": 1.0281058549880981, + "learning_rate": 1.0807204803202137e-06, + "loss": 0.3271, + "step": 162 + }, + { + "epoch": 0.003263018291920026, + "grad_norm": 0.9874348044395447, + "learning_rate": 1.0873915943962642e-06, + "loss": 0.3455, + "step": 163 + }, + { + "epoch": 0.0032830368090483696, + "grad_norm": 1.0538359880447388, + "learning_rate": 1.094062708472315e-06, + "loss": 0.3573, + "step": 164 + }, + { + "epoch": 0.0033030553261767133, + "grad_norm": 1.1657860279083252, + "learning_rate": 1.1007338225483655e-06, + "loss": 0.4043, + "step": 165 + }, + { + "epoch": 0.003323073843305057, + "grad_norm": 1.223834753036499, + "learning_rate": 1.1074049366244163e-06, + "loss": 0.384, + "step": 166 + }, + { + "epoch": 0.0033430923604334008, + "grad_norm": 1.0674268007278442, + "learning_rate": 1.114076050700467e-06, + "loss": 0.336, + "step": 167 + }, + { + "epoch": 0.0033631108775617445, + "grad_norm": 0.973685622215271, + "learning_rate": 1.1207471647765178e-06, + "loss": 0.3217, + "step": 168 + }, + { + "epoch": 0.0033831293946900882, + "grad_norm": 1.2414227724075317, + "learning_rate": 1.1274182788525685e-06, + "loss": 0.3418, + "step": 169 + }, + { + "epoch": 0.003403147911818432, + "grad_norm": 1.072571039199829, + "learning_rate": 1.134089392928619e-06, + "loss": 0.3568, + "step": 170 + }, + { + "epoch": 0.0034231664289467757, + "grad_norm": 0.9765994548797607, + "learning_rate": 1.1407605070046698e-06, + "loss": 0.3373, + "step": 171 + }, + { + "epoch": 0.0034431849460751195, + "grad_norm": 0.9319426417350769, + "learning_rate": 1.1474316210807206e-06, + "loss": 0.3529, + "step": 172 + }, + { + "epoch": 0.003463203463203463, + "grad_norm": 0.9557063579559326, + "learning_rate": 1.1541027351567714e-06, + "loss": 0.3516, + "step": 173 + }, + { + "epoch": 0.003483221980331807, + "grad_norm": 0.9252992272377014, + "learning_rate": 1.1607738492328221e-06, + "loss": 0.3306, + "step": 174 + }, + { + "epoch": 0.0035032404974601507, + "grad_norm": 1.1234521865844727, + "learning_rate": 1.1674449633088727e-06, + "loss": 0.385, + "step": 175 + }, + { + "epoch": 0.0035232590145884944, + "grad_norm": 1.171687126159668, + "learning_rate": 1.1741160773849234e-06, + "loss": 0.3069, + "step": 176 + }, + { + "epoch": 0.003543277531716838, + "grad_norm": 1.008914589881897, + "learning_rate": 1.180787191460974e-06, + "loss": 0.3174, + "step": 177 + }, + { + "epoch": 0.003563296048845182, + "grad_norm": 1.017923355102539, + "learning_rate": 1.1874583055370247e-06, + "loss": 0.3466, + "step": 178 + }, + { + "epoch": 0.0035833145659735256, + "grad_norm": 1.1146148443222046, + "learning_rate": 1.1941294196130755e-06, + "loss": 0.3418, + "step": 179 + }, + { + "epoch": 0.0036033330831018694, + "grad_norm": 0.9781566858291626, + "learning_rate": 1.2008005336891262e-06, + "loss": 0.3612, + "step": 180 + }, + { + "epoch": 0.003623351600230213, + "grad_norm": 1.0716923475265503, + "learning_rate": 1.2074716477651768e-06, + "loss": 0.3367, + "step": 181 + }, + { + "epoch": 0.003643370117358557, + "grad_norm": 0.9827850461006165, + "learning_rate": 1.2141427618412275e-06, + "loss": 0.3513, + "step": 182 + }, + { + "epoch": 0.0036633886344869006, + "grad_norm": 0.9571705460548401, + "learning_rate": 1.2208138759172783e-06, + "loss": 0.317, + "step": 183 + }, + { + "epoch": 0.003683407151615244, + "grad_norm": 1.0889248847961426, + "learning_rate": 1.227484989993329e-06, + "loss": 0.3884, + "step": 184 + }, + { + "epoch": 0.0037034256687435876, + "grad_norm": 1.7236579656600952, + "learning_rate": 1.2341561040693798e-06, + "loss": 0.9124, + "step": 185 + }, + { + "epoch": 0.0037234441858719314, + "grad_norm": 1.1589921712875366, + "learning_rate": 1.2408272181454304e-06, + "loss": 0.3278, + "step": 186 + }, + { + "epoch": 0.003743462703000275, + "grad_norm": 1.0943868160247803, + "learning_rate": 1.2474983322214811e-06, + "loss": 0.3853, + "step": 187 + }, + { + "epoch": 0.003763481220128619, + "grad_norm": 1.1057530641555786, + "learning_rate": 1.2541694462975319e-06, + "loss": 0.333, + "step": 188 + }, + { + "epoch": 0.0037834997372569626, + "grad_norm": 1.075685977935791, + "learning_rate": 1.2608405603735824e-06, + "loss": 0.326, + "step": 189 + }, + { + "epoch": 0.0038035182543853063, + "grad_norm": 1.0488795042037964, + "learning_rate": 1.2675116744496332e-06, + "loss": 0.3503, + "step": 190 + }, + { + "epoch": 0.00382353677151365, + "grad_norm": 1.2556462287902832, + "learning_rate": 1.2741827885256837e-06, + "loss": 0.3575, + "step": 191 + }, + { + "epoch": 0.003843555288641994, + "grad_norm": 1.0615943670272827, + "learning_rate": 1.2808539026017347e-06, + "loss": 0.3932, + "step": 192 + }, + { + "epoch": 0.0038635738057703375, + "grad_norm": 1.0440254211425781, + "learning_rate": 1.2875250166777852e-06, + "loss": 0.3177, + "step": 193 + }, + { + "epoch": 0.0038835923228986813, + "grad_norm": 1.0929633378982544, + "learning_rate": 1.2941961307538358e-06, + "loss": 0.3117, + "step": 194 + }, + { + "epoch": 0.003903610840027025, + "grad_norm": 1.1033145189285278, + "learning_rate": 1.3008672448298868e-06, + "loss": 0.2966, + "step": 195 + }, + { + "epoch": 0.003923629357155368, + "grad_norm": 2.0144643783569336, + "learning_rate": 1.3075383589059373e-06, + "loss": 0.8817, + "step": 196 + }, + { + "epoch": 0.003943647874283712, + "grad_norm": 0.9662773013114929, + "learning_rate": 1.3142094729819883e-06, + "loss": 0.3459, + "step": 197 + }, + { + "epoch": 0.003963666391412056, + "grad_norm": 1.0172979831695557, + "learning_rate": 1.3208805870580388e-06, + "loss": 0.3635, + "step": 198 + }, + { + "epoch": 0.0039836849085403995, + "grad_norm": 1.0197914838790894, + "learning_rate": 1.3275517011340894e-06, + "loss": 0.3683, + "step": 199 + }, + { + "epoch": 0.004003703425668743, + "grad_norm": 1.091792345046997, + "learning_rate": 1.3342228152101403e-06, + "loss": 0.3284, + "step": 200 + }, + { + "epoch": 0.004023721942797087, + "grad_norm": 1.0397071838378906, + "learning_rate": 1.3408939292861909e-06, + "loss": 0.3472, + "step": 201 + }, + { + "epoch": 0.004043740459925431, + "grad_norm": 0.9911607503890991, + "learning_rate": 1.3475650433622416e-06, + "loss": 0.3604, + "step": 202 + }, + { + "epoch": 0.0040637589770537745, + "grad_norm": 1.0942814350128174, + "learning_rate": 1.3542361574382922e-06, + "loss": 0.3586, + "step": 203 + }, + { + "epoch": 0.004083777494182118, + "grad_norm": 1.1271568536758423, + "learning_rate": 1.360907271514343e-06, + "loss": 0.3727, + "step": 204 + }, + { + "epoch": 0.004103796011310462, + "grad_norm": 1.1404681205749512, + "learning_rate": 1.3675783855903937e-06, + "loss": 0.4032, + "step": 205 + }, + { + "epoch": 0.004123814528438806, + "grad_norm": 1.256198763847351, + "learning_rate": 1.3742494996664442e-06, + "loss": 0.3873, + "step": 206 + }, + { + "epoch": 0.004143833045567149, + "grad_norm": 1.8433817625045776, + "learning_rate": 1.3809206137424952e-06, + "loss": 0.9578, + "step": 207 + }, + { + "epoch": 0.004163851562695493, + "grad_norm": 1.0420496463775635, + "learning_rate": 1.3875917278185458e-06, + "loss": 0.325, + "step": 208 + }, + { + "epoch": 0.004183870079823837, + "grad_norm": 1.1182948350906372, + "learning_rate": 1.3942628418945963e-06, + "loss": 0.3382, + "step": 209 + }, + { + "epoch": 0.004203888596952181, + "grad_norm": 0.977677583694458, + "learning_rate": 1.4009339559706473e-06, + "loss": 0.3161, + "step": 210 + }, + { + "epoch": 0.004223907114080524, + "grad_norm": 1.0230404138565063, + "learning_rate": 1.4076050700466978e-06, + "loss": 0.3216, + "step": 211 + }, + { + "epoch": 0.004243925631208868, + "grad_norm": 1.0275135040283203, + "learning_rate": 1.4142761841227488e-06, + "loss": 0.4029, + "step": 212 + }, + { + "epoch": 0.004263944148337212, + "grad_norm": 1.0983688831329346, + "learning_rate": 1.4209472981987993e-06, + "loss": 0.4173, + "step": 213 + }, + { + "epoch": 0.004283962665465556, + "grad_norm": 1.0400152206420898, + "learning_rate": 1.42761841227485e-06, + "loss": 0.3699, + "step": 214 + }, + { + "epoch": 0.004303981182593899, + "grad_norm": 1.0193389654159546, + "learning_rate": 1.4342895263509006e-06, + "loss": 0.3701, + "step": 215 + }, + { + "epoch": 0.004323999699722243, + "grad_norm": 1.0483285188674927, + "learning_rate": 1.4409606404269514e-06, + "loss": 0.3419, + "step": 216 + }, + { + "epoch": 0.004344018216850587, + "grad_norm": 1.0377894639968872, + "learning_rate": 1.4476317545030022e-06, + "loss": 0.3557, + "step": 217 + }, + { + "epoch": 0.0043640367339789305, + "grad_norm": 1.1148982048034668, + "learning_rate": 1.4543028685790527e-06, + "loss": 0.3939, + "step": 218 + }, + { + "epoch": 0.004384055251107274, + "grad_norm": 1.0581110715866089, + "learning_rate": 1.4609739826551037e-06, + "loss": 0.3575, + "step": 219 + }, + { + "epoch": 0.004404073768235618, + "grad_norm": 1.1981613636016846, + "learning_rate": 1.4676450967311542e-06, + "loss": 0.3194, + "step": 220 + }, + { + "epoch": 0.004424092285363962, + "grad_norm": 0.9546608328819275, + "learning_rate": 1.4743162108072048e-06, + "loss": 0.3837, + "step": 221 + }, + { + "epoch": 0.0044441108024923055, + "grad_norm": 1.0209063291549683, + "learning_rate": 1.4809873248832557e-06, + "loss": 0.3301, + "step": 222 + }, + { + "epoch": 0.004464129319620649, + "grad_norm": 1.8525937795639038, + "learning_rate": 1.4876584389593063e-06, + "loss": 0.8869, + "step": 223 + }, + { + "epoch": 0.004484147836748993, + "grad_norm": 1.030745029449463, + "learning_rate": 1.4943295530353573e-06, + "loss": 0.3465, + "step": 224 + }, + { + "epoch": 0.004504166353877337, + "grad_norm": 1.1171189546585083, + "learning_rate": 1.5010006671114078e-06, + "loss": 0.3131, + "step": 225 + }, + { + "epoch": 0.0045241848710056804, + "grad_norm": 1.7455319166183472, + "learning_rate": 1.5076717811874583e-06, + "loss": 0.8586, + "step": 226 + }, + { + "epoch": 0.004544203388134024, + "grad_norm": 1.8052003383636475, + "learning_rate": 1.514342895263509e-06, + "loss": 0.8369, + "step": 227 + }, + { + "epoch": 0.004564221905262368, + "grad_norm": 1.0082547664642334, + "learning_rate": 1.5210140093395599e-06, + "loss": 0.3421, + "step": 228 + }, + { + "epoch": 0.004584240422390712, + "grad_norm": 1.0003489255905151, + "learning_rate": 1.5276851234156106e-06, + "loss": 0.3579, + "step": 229 + }, + { + "epoch": 0.004604258939519055, + "grad_norm": 1.0908241271972656, + "learning_rate": 1.5343562374916612e-06, + "loss": 0.3618, + "step": 230 + }, + { + "epoch": 0.004624277456647399, + "grad_norm": 1.0856441259384155, + "learning_rate": 1.541027351567712e-06, + "loss": 0.3637, + "step": 231 + }, + { + "epoch": 0.004644295973775743, + "grad_norm": 1.0969706773757935, + "learning_rate": 1.5476984656437627e-06, + "loss": 0.3741, + "step": 232 + }, + { + "epoch": 0.004664314490904087, + "grad_norm": 1.055543303489685, + "learning_rate": 1.5543695797198132e-06, + "loss": 0.3569, + "step": 233 + }, + { + "epoch": 0.00468433300803243, + "grad_norm": 1.0877448320388794, + "learning_rate": 1.5610406937958642e-06, + "loss": 0.3215, + "step": 234 + }, + { + "epoch": 0.004704351525160774, + "grad_norm": 1.2703063488006592, + "learning_rate": 1.5677118078719147e-06, + "loss": 0.3579, + "step": 235 + }, + { + "epoch": 0.004724370042289118, + "grad_norm": 1.0638283491134644, + "learning_rate": 1.5743829219479653e-06, + "loss": 0.3205, + "step": 236 + }, + { + "epoch": 0.0047443885594174616, + "grad_norm": 1.066827654838562, + "learning_rate": 1.5810540360240163e-06, + "loss": 0.3503, + "step": 237 + }, + { + "epoch": 0.004764407076545805, + "grad_norm": 1.0281940698623657, + "learning_rate": 1.5877251501000668e-06, + "loss": 0.2936, + "step": 238 + }, + { + "epoch": 0.004784425593674148, + "grad_norm": 1.0779025554656982, + "learning_rate": 1.5943962641761176e-06, + "loss": 0.3831, + "step": 239 + }, + { + "epoch": 0.004804444110802492, + "grad_norm": 1.1495797634124756, + "learning_rate": 1.6010673782521683e-06, + "loss": 0.3181, + "step": 240 + }, + { + "epoch": 0.004824462627930836, + "grad_norm": 1.8079701662063599, + "learning_rate": 1.6077384923282189e-06, + "loss": 0.8688, + "step": 241 + }, + { + "epoch": 0.004844481145059179, + "grad_norm": 1.1432288885116577, + "learning_rate": 1.6144096064042696e-06, + "loss": 0.353, + "step": 242 + }, + { + "epoch": 0.004864499662187523, + "grad_norm": 1.0066691637039185, + "learning_rate": 1.6210807204803204e-06, + "loss": 0.3454, + "step": 243 + }, + { + "epoch": 0.004884518179315867, + "grad_norm": 1.0439289808273315, + "learning_rate": 1.6277518345563711e-06, + "loss": 0.3324, + "step": 244 + }, + { + "epoch": 0.004904536696444211, + "grad_norm": 1.171670913696289, + "learning_rate": 1.6344229486324217e-06, + "loss": 0.3609, + "step": 245 + }, + { + "epoch": 0.004924555213572554, + "grad_norm": 1.8439239263534546, + "learning_rate": 1.6410940627084722e-06, + "loss": 0.8521, + "step": 246 + }, + { + "epoch": 0.004944573730700898, + "grad_norm": 1.1497564315795898, + "learning_rate": 1.6477651767845232e-06, + "loss": 0.3441, + "step": 247 + }, + { + "epoch": 0.004964592247829242, + "grad_norm": 1.1153236627578735, + "learning_rate": 1.6544362908605737e-06, + "loss": 0.3426, + "step": 248 + }, + { + "epoch": 0.0049846107649575856, + "grad_norm": 1.1753289699554443, + "learning_rate": 1.6611074049366247e-06, + "loss": 0.3906, + "step": 249 + }, + { + "epoch": 0.005004629282085929, + "grad_norm": 0.9694640636444092, + "learning_rate": 1.6677785190126753e-06, + "loss": 0.3735, + "step": 250 + }, + { + "epoch": 0.005024647799214273, + "grad_norm": 1.788311243057251, + "learning_rate": 1.6744496330887258e-06, + "loss": 0.9262, + "step": 251 + }, + { + "epoch": 0.005044666316342617, + "grad_norm": 1.1049309968948364, + "learning_rate": 1.6811207471647768e-06, + "loss": 0.3867, + "step": 252 + }, + { + "epoch": 0.0050646848334709605, + "grad_norm": 1.0925676822662354, + "learning_rate": 1.6877918612408273e-06, + "loss": 0.4289, + "step": 253 + }, + { + "epoch": 0.005084703350599304, + "grad_norm": 1.2277631759643555, + "learning_rate": 1.694462975316878e-06, + "loss": 0.3377, + "step": 254 + }, + { + "epoch": 0.005104721867727648, + "grad_norm": 1.0777363777160645, + "learning_rate": 1.7011340893929286e-06, + "loss": 0.3322, + "step": 255 + }, + { + "epoch": 0.005124740384855992, + "grad_norm": 1.8172038793563843, + "learning_rate": 1.7078052034689794e-06, + "loss": 0.9185, + "step": 256 + }, + { + "epoch": 0.0051447589019843355, + "grad_norm": 1.0424140691757202, + "learning_rate": 1.7144763175450301e-06, + "loss": 0.3524, + "step": 257 + }, + { + "epoch": 0.005164777419112679, + "grad_norm": 1.065719723701477, + "learning_rate": 1.7211474316210807e-06, + "loss": 0.3329, + "step": 258 + }, + { + "epoch": 0.005184795936241023, + "grad_norm": 0.9916853904724121, + "learning_rate": 1.7278185456971317e-06, + "loss": 0.3458, + "step": 259 + }, + { + "epoch": 0.005204814453369367, + "grad_norm": 1.0367039442062378, + "learning_rate": 1.7344896597731822e-06, + "loss": 0.3396, + "step": 260 + }, + { + "epoch": 0.00522483297049771, + "grad_norm": 1.0323669910430908, + "learning_rate": 1.7411607738492332e-06, + "loss": 0.3781, + "step": 261 + }, + { + "epoch": 0.005244851487626054, + "grad_norm": 0.9594296813011169, + "learning_rate": 1.7478318879252837e-06, + "loss": 0.2911, + "step": 262 + }, + { + "epoch": 0.005264870004754398, + "grad_norm": 1.1367038488388062, + "learning_rate": 1.7545030020013343e-06, + "loss": 0.3288, + "step": 263 + }, + { + "epoch": 0.005284888521882742, + "grad_norm": 1.1530425548553467, + "learning_rate": 1.7611741160773852e-06, + "loss": 0.3737, + "step": 264 + }, + { + "epoch": 0.005304907039011085, + "grad_norm": 0.98500657081604, + "learning_rate": 1.7678452301534358e-06, + "loss": 0.2933, + "step": 265 + }, + { + "epoch": 0.005324925556139429, + "grad_norm": 1.8943291902542114, + "learning_rate": 1.7745163442294865e-06, + "loss": 0.8797, + "step": 266 + }, + { + "epoch": 0.005344944073267773, + "grad_norm": 1.0326361656188965, + "learning_rate": 1.781187458305537e-06, + "loss": 0.3201, + "step": 267 + }, + { + "epoch": 0.005364962590396117, + "grad_norm": 1.9771965742111206, + "learning_rate": 1.7878585723815878e-06, + "loss": 0.8351, + "step": 268 + }, + { + "epoch": 0.00538498110752446, + "grad_norm": 2.011528968811035, + "learning_rate": 1.7945296864576386e-06, + "loss": 0.8452, + "step": 269 + }, + { + "epoch": 0.005404999624652804, + "grad_norm": 1.1136507987976074, + "learning_rate": 1.8012008005336891e-06, + "loss": 0.3463, + "step": 270 + }, + { + "epoch": 0.005425018141781148, + "grad_norm": 1.0327386856079102, + "learning_rate": 1.8078719146097401e-06, + "loss": 0.3197, + "step": 271 + }, + { + "epoch": 0.0054450366589094915, + "grad_norm": 1.0785300731658936, + "learning_rate": 1.8145430286857907e-06, + "loss": 0.3513, + "step": 272 + }, + { + "epoch": 0.005465055176037835, + "grad_norm": 1.0967133045196533, + "learning_rate": 1.8212141427618412e-06, + "loss": 0.369, + "step": 273 + }, + { + "epoch": 0.005485073693166179, + "grad_norm": 1.8590859174728394, + "learning_rate": 1.8278852568378922e-06, + "loss": 0.8837, + "step": 274 + }, + { + "epoch": 0.005505092210294523, + "grad_norm": 0.9598642587661743, + "learning_rate": 1.8345563709139427e-06, + "loss": 0.3624, + "step": 275 + }, + { + "epoch": 0.0055251107274228665, + "grad_norm": 1.7625129222869873, + "learning_rate": 1.8412274849899937e-06, + "loss": 0.3578, + "step": 276 + }, + { + "epoch": 0.00554512924455121, + "grad_norm": 0.9183222055435181, + "learning_rate": 1.8478985990660442e-06, + "loss": 0.3113, + "step": 277 + }, + { + "epoch": 0.005565147761679554, + "grad_norm": 1.1281927824020386, + "learning_rate": 1.8545697131420948e-06, + "loss": 0.3275, + "step": 278 + }, + { + "epoch": 0.005585166278807898, + "grad_norm": 1.036133050918579, + "learning_rate": 1.8612408272181455e-06, + "loss": 0.309, + "step": 279 + }, + { + "epoch": 0.005605184795936241, + "grad_norm": 1.0365924835205078, + "learning_rate": 1.8679119412941963e-06, + "loss": 0.3469, + "step": 280 + }, + { + "epoch": 0.005625203313064585, + "grad_norm": 0.9987282156944275, + "learning_rate": 1.874583055370247e-06, + "loss": 0.2971, + "step": 281 + }, + { + "epoch": 0.005645221830192928, + "grad_norm": 1.086187481880188, + "learning_rate": 1.8812541694462976e-06, + "loss": 0.3651, + "step": 282 + }, + { + "epoch": 0.005665240347321272, + "grad_norm": 1.0714961290359497, + "learning_rate": 1.8879252835223484e-06, + "loss": 0.3628, + "step": 283 + }, + { + "epoch": 0.0056852588644496155, + "grad_norm": 1.0615737438201904, + "learning_rate": 1.8945963975983991e-06, + "loss": 0.3258, + "step": 284 + }, + { + "epoch": 0.005705277381577959, + "grad_norm": 1.049548864364624, + "learning_rate": 1.9012675116744497e-06, + "loss": 0.3559, + "step": 285 + }, + { + "epoch": 0.005725295898706303, + "grad_norm": 1.3814584016799927, + "learning_rate": 1.9079386257505006e-06, + "loss": 0.3153, + "step": 286 + }, + { + "epoch": 0.005745314415834647, + "grad_norm": 1.090623378753662, + "learning_rate": 1.914609739826551e-06, + "loss": 0.3391, + "step": 287 + }, + { + "epoch": 0.0057653329329629905, + "grad_norm": 0.9347370862960815, + "learning_rate": 1.9212808539026017e-06, + "loss": 0.336, + "step": 288 + }, + { + "epoch": 0.005785351450091334, + "grad_norm": 1.1534956693649292, + "learning_rate": 1.9279519679786525e-06, + "loss": 0.3408, + "step": 289 + }, + { + "epoch": 0.005805369967219678, + "grad_norm": 1.0252344608306885, + "learning_rate": 1.9346230820547032e-06, + "loss": 0.3081, + "step": 290 + }, + { + "epoch": 0.005825388484348022, + "grad_norm": 1.0597635507583618, + "learning_rate": 1.941294196130754e-06, + "loss": 0.3037, + "step": 291 + }, + { + "epoch": 0.005845407001476365, + "grad_norm": 1.0722402334213257, + "learning_rate": 1.9479653102068048e-06, + "loss": 0.3244, + "step": 292 + }, + { + "epoch": 0.005865425518604709, + "grad_norm": 1.09663724899292, + "learning_rate": 1.954636424282855e-06, + "loss": 0.3631, + "step": 293 + }, + { + "epoch": 0.005885444035733053, + "grad_norm": 1.0617138147354126, + "learning_rate": 1.9613075383589063e-06, + "loss": 0.3594, + "step": 294 + }, + { + "epoch": 0.005905462552861397, + "grad_norm": 1.0880051851272583, + "learning_rate": 1.9679786524349566e-06, + "loss": 0.3427, + "step": 295 + }, + { + "epoch": 0.00592548106998974, + "grad_norm": 1.0406452417373657, + "learning_rate": 1.974649766511008e-06, + "loss": 0.3637, + "step": 296 + }, + { + "epoch": 0.005945499587118084, + "grad_norm": 1.0645713806152344, + "learning_rate": 1.981320880587058e-06, + "loss": 0.3785, + "step": 297 + }, + { + "epoch": 0.005965518104246428, + "grad_norm": 1.0827786922454834, + "learning_rate": 1.987991994663109e-06, + "loss": 0.3688, + "step": 298 + }, + { + "epoch": 0.005985536621374772, + "grad_norm": 1.1697412729263306, + "learning_rate": 1.9946631087391596e-06, + "loss": 0.32, + "step": 299 + }, + { + "epoch": 0.006005555138503115, + "grad_norm": 1.0148099660873413, + "learning_rate": 2.0013342228152104e-06, + "loss": 0.397, + "step": 300 + }, + { + "epoch": 0.006025573655631459, + "grad_norm": 1.0741328001022339, + "learning_rate": 2.008005336891261e-06, + "loss": 0.3073, + "step": 301 + }, + { + "epoch": 0.006045592172759803, + "grad_norm": 1.0663073062896729, + "learning_rate": 2.0146764509673115e-06, + "loss": 0.3303, + "step": 302 + }, + { + "epoch": 0.0060656106898881465, + "grad_norm": 1.6855080127716064, + "learning_rate": 2.0213475650433622e-06, + "loss": 0.9847, + "step": 303 + }, + { + "epoch": 0.00608562920701649, + "grad_norm": 1.0405930280685425, + "learning_rate": 2.028018679119413e-06, + "loss": 0.3634, + "step": 304 + }, + { + "epoch": 0.006105647724144834, + "grad_norm": 1.1455109119415283, + "learning_rate": 2.0346897931954638e-06, + "loss": 0.339, + "step": 305 + }, + { + "epoch": 0.006125666241273178, + "grad_norm": 1.1068627834320068, + "learning_rate": 2.0413609072715145e-06, + "loss": 0.3723, + "step": 306 + }, + { + "epoch": 0.0061456847584015215, + "grad_norm": 1.082398772239685, + "learning_rate": 2.0480320213475653e-06, + "loss": 0.3594, + "step": 307 + }, + { + "epoch": 0.006165703275529865, + "grad_norm": 1.0157562494277954, + "learning_rate": 2.054703135423616e-06, + "loss": 0.3462, + "step": 308 + }, + { + "epoch": 0.006185721792658209, + "grad_norm": 1.0950570106506348, + "learning_rate": 2.061374249499667e-06, + "loss": 0.3848, + "step": 309 + }, + { + "epoch": 0.006205740309786553, + "grad_norm": 1.068679690361023, + "learning_rate": 2.068045363575717e-06, + "loss": 0.3309, + "step": 310 + }, + { + "epoch": 0.0062257588269148964, + "grad_norm": 1.0534955263137817, + "learning_rate": 2.074716477651768e-06, + "loss": 0.356, + "step": 311 + }, + { + "epoch": 0.00624577734404324, + "grad_norm": 1.022519588470459, + "learning_rate": 2.0813875917278186e-06, + "loss": 0.3379, + "step": 312 + }, + { + "epoch": 0.006265795861171584, + "grad_norm": 1.036375641822815, + "learning_rate": 2.0880587058038694e-06, + "loss": 0.3138, + "step": 313 + }, + { + "epoch": 0.006285814378299928, + "grad_norm": 2.0150673389434814, + "learning_rate": 2.09472981987992e-06, + "loss": 0.9284, + "step": 314 + }, + { + "epoch": 0.006305832895428271, + "grad_norm": 1.10751473903656, + "learning_rate": 2.101400933955971e-06, + "loss": 0.3274, + "step": 315 + }, + { + "epoch": 0.006325851412556615, + "grad_norm": 1.2553911209106445, + "learning_rate": 2.1080720480320217e-06, + "loss": 0.3138, + "step": 316 + }, + { + "epoch": 0.006345869929684959, + "grad_norm": 1.1637681722640991, + "learning_rate": 2.114743162108072e-06, + "loss": 0.2968, + "step": 317 + }, + { + "epoch": 0.006365888446813303, + "grad_norm": 1.1143245697021484, + "learning_rate": 2.121414276184123e-06, + "loss": 0.362, + "step": 318 + }, + { + "epoch": 0.006385906963941646, + "grad_norm": 1.1440308094024658, + "learning_rate": 2.1280853902601735e-06, + "loss": 0.3721, + "step": 319 + }, + { + "epoch": 0.00640592548106999, + "grad_norm": 1.052011489868164, + "learning_rate": 2.1347565043362243e-06, + "loss": 0.3062, + "step": 320 + }, + { + "epoch": 0.006425943998198334, + "grad_norm": 1.2596919536590576, + "learning_rate": 2.141427618412275e-06, + "loss": 0.3684, + "step": 321 + }, + { + "epoch": 0.0064459625153266776, + "grad_norm": 1.0625238418579102, + "learning_rate": 2.148098732488326e-06, + "loss": 0.3436, + "step": 322 + }, + { + "epoch": 0.006465981032455021, + "grad_norm": 1.0771417617797852, + "learning_rate": 2.1547698465643766e-06, + "loss": 0.3309, + "step": 323 + }, + { + "epoch": 0.006485999549583365, + "grad_norm": 0.9920762181282043, + "learning_rate": 2.1614409606404273e-06, + "loss": 0.3419, + "step": 324 + }, + { + "epoch": 0.006506018066711708, + "grad_norm": 1.0715546607971191, + "learning_rate": 2.1681120747164777e-06, + "loss": 0.3358, + "step": 325 + }, + { + "epoch": 0.006526036583840052, + "grad_norm": 1.010159969329834, + "learning_rate": 2.1747831887925284e-06, + "loss": 0.3211, + "step": 326 + }, + { + "epoch": 0.006546055100968395, + "grad_norm": 1.0055676698684692, + "learning_rate": 2.181454302868579e-06, + "loss": 0.3218, + "step": 327 + }, + { + "epoch": 0.006566073618096739, + "grad_norm": 1.0454553365707397, + "learning_rate": 2.18812541694463e-06, + "loss": 0.3136, + "step": 328 + }, + { + "epoch": 0.006586092135225083, + "grad_norm": 1.1168930530548096, + "learning_rate": 2.1947965310206807e-06, + "loss": 0.3598, + "step": 329 + }, + { + "epoch": 0.006606110652353427, + "grad_norm": 1.0181013345718384, + "learning_rate": 2.201467645096731e-06, + "loss": 0.3826, + "step": 330 + }, + { + "epoch": 0.00662612916948177, + "grad_norm": 1.209134578704834, + "learning_rate": 2.208138759172782e-06, + "loss": 0.3049, + "step": 331 + }, + { + "epoch": 0.006646147686610114, + "grad_norm": 1.0619711875915527, + "learning_rate": 2.2148098732488325e-06, + "loss": 0.285, + "step": 332 + }, + { + "epoch": 0.006666166203738458, + "grad_norm": 1.0385758876800537, + "learning_rate": 2.2214809873248837e-06, + "loss": 0.3465, + "step": 333 + }, + { + "epoch": 0.0066861847208668015, + "grad_norm": 1.0020352602005005, + "learning_rate": 2.228152101400934e-06, + "loss": 0.3228, + "step": 334 + }, + { + "epoch": 0.006706203237995145, + "grad_norm": 1.0524910688400269, + "learning_rate": 2.234823215476985e-06, + "loss": 0.3282, + "step": 335 + }, + { + "epoch": 0.006726221755123489, + "grad_norm": 1.8803147077560425, + "learning_rate": 2.2414943295530356e-06, + "loss": 0.8942, + "step": 336 + }, + { + "epoch": 0.006746240272251833, + "grad_norm": 1.0486823320388794, + "learning_rate": 2.2481654436290863e-06, + "loss": 0.3186, + "step": 337 + }, + { + "epoch": 0.0067662587893801765, + "grad_norm": 1.0936228036880493, + "learning_rate": 2.254836557705137e-06, + "loss": 0.3348, + "step": 338 + }, + { + "epoch": 0.00678627730650852, + "grad_norm": 1.1012150049209595, + "learning_rate": 2.261507671781188e-06, + "loss": 0.2857, + "step": 339 + }, + { + "epoch": 0.006806295823636864, + "grad_norm": 1.2144168615341187, + "learning_rate": 2.268178785857238e-06, + "loss": 0.3637, + "step": 340 + }, + { + "epoch": 0.006826314340765208, + "grad_norm": 1.1189838647842407, + "learning_rate": 2.274849899933289e-06, + "loss": 0.3228, + "step": 341 + }, + { + "epoch": 0.0068463328578935514, + "grad_norm": 1.668177604675293, + "learning_rate": 2.2815210140093397e-06, + "loss": 0.8625, + "step": 342 + }, + { + "epoch": 0.006866351375021895, + "grad_norm": 1.7185291051864624, + "learning_rate": 2.2881921280853904e-06, + "loss": 0.9171, + "step": 343 + }, + { + "epoch": 0.006886369892150239, + "grad_norm": 1.4093014001846313, + "learning_rate": 2.294863242161441e-06, + "loss": 0.3964, + "step": 344 + }, + { + "epoch": 0.006906388409278583, + "grad_norm": 0.9531022310256958, + "learning_rate": 2.3015343562374915e-06, + "loss": 0.3637, + "step": 345 + }, + { + "epoch": 0.006926406926406926, + "grad_norm": 1.0518234968185425, + "learning_rate": 2.3082054703135427e-06, + "loss": 0.3517, + "step": 346 + }, + { + "epoch": 0.00694642544353527, + "grad_norm": 1.8019517660140991, + "learning_rate": 2.314876584389593e-06, + "loss": 0.8052, + "step": 347 + }, + { + "epoch": 0.006966443960663614, + "grad_norm": 1.03178071975708, + "learning_rate": 2.3215476984656442e-06, + "loss": 0.3286, + "step": 348 + }, + { + "epoch": 0.006986462477791958, + "grad_norm": 1.82966947555542, + "learning_rate": 2.3282188125416946e-06, + "loss": 0.7881, + "step": 349 + }, + { + "epoch": 0.007006480994920301, + "grad_norm": 1.1379799842834473, + "learning_rate": 2.3348899266177453e-06, + "loss": 0.3219, + "step": 350 + }, + { + "epoch": 0.007026499512048645, + "grad_norm": 1.129220724105835, + "learning_rate": 2.341561040693796e-06, + "loss": 0.3793, + "step": 351 + }, + { + "epoch": 0.007046518029176989, + "grad_norm": 1.1165426969528198, + "learning_rate": 2.348232154769847e-06, + "loss": 0.3739, + "step": 352 + }, + { + "epoch": 0.007066536546305333, + "grad_norm": 1.032211184501648, + "learning_rate": 2.3549032688458976e-06, + "loss": 0.297, + "step": 353 + }, + { + "epoch": 0.007086555063433676, + "grad_norm": 1.1529245376586914, + "learning_rate": 2.361574382921948e-06, + "loss": 0.2931, + "step": 354 + }, + { + "epoch": 0.00710657358056202, + "grad_norm": 1.0350041389465332, + "learning_rate": 2.368245496997999e-06, + "loss": 0.3795, + "step": 355 + }, + { + "epoch": 0.007126592097690364, + "grad_norm": 1.0913448333740234, + "learning_rate": 2.3749166110740494e-06, + "loss": 0.3354, + "step": 356 + }, + { + "epoch": 0.0071466106148187075, + "grad_norm": 1.0956742763519287, + "learning_rate": 2.3815877251501e-06, + "loss": 0.3252, + "step": 357 + }, + { + "epoch": 0.007166629131947051, + "grad_norm": 1.8407657146453857, + "learning_rate": 2.388258839226151e-06, + "loss": 0.8529, + "step": 358 + }, + { + "epoch": 0.007186647649075395, + "grad_norm": 1.0007177591323853, + "learning_rate": 2.3949299533022017e-06, + "loss": 0.3366, + "step": 359 + }, + { + "epoch": 0.007206666166203739, + "grad_norm": 0.9966747164726257, + "learning_rate": 2.4016010673782525e-06, + "loss": 0.3425, + "step": 360 + }, + { + "epoch": 0.0072266846833320825, + "grad_norm": 1.0885522365570068, + "learning_rate": 2.4082721814543032e-06, + "loss": 0.3075, + "step": 361 + }, + { + "epoch": 0.007246703200460426, + "grad_norm": 1.025892734527588, + "learning_rate": 2.4149432955303536e-06, + "loss": 0.3376, + "step": 362 + }, + { + "epoch": 0.00726672171758877, + "grad_norm": 0.9684320688247681, + "learning_rate": 2.4216144096064043e-06, + "loss": 0.3205, + "step": 363 + }, + { + "epoch": 0.007286740234717114, + "grad_norm": 1.2536265850067139, + "learning_rate": 2.428285523682455e-06, + "loss": 0.3389, + "step": 364 + }, + { + "epoch": 0.007306758751845457, + "grad_norm": 0.9933950304985046, + "learning_rate": 2.434956637758506e-06, + "loss": 0.306, + "step": 365 + }, + { + "epoch": 0.007326777268973801, + "grad_norm": 1.2199444770812988, + "learning_rate": 2.4416277518345566e-06, + "loss": 0.3117, + "step": 366 + }, + { + "epoch": 0.007346795786102145, + "grad_norm": 1.0599265098571777, + "learning_rate": 2.4482988659106074e-06, + "loss": 0.3145, + "step": 367 + }, + { + "epoch": 0.007366814303230488, + "grad_norm": 1.1761066913604736, + "learning_rate": 2.454969979986658e-06, + "loss": 0.3425, + "step": 368 + }, + { + "epoch": 0.0073868328203588315, + "grad_norm": 1.17820143699646, + "learning_rate": 2.4616410940627085e-06, + "loss": 0.3617, + "step": 369 + }, + { + "epoch": 0.007406851337487175, + "grad_norm": 1.177098274230957, + "learning_rate": 2.4683122081387596e-06, + "loss": 0.3068, + "step": 370 + }, + { + "epoch": 0.007426869854615519, + "grad_norm": 1.077748417854309, + "learning_rate": 2.47498332221481e-06, + "loss": 0.3858, + "step": 371 + }, + { + "epoch": 0.007446888371743863, + "grad_norm": 1.101712703704834, + "learning_rate": 2.4816544362908607e-06, + "loss": 0.3606, + "step": 372 + }, + { + "epoch": 0.0074669068888722065, + "grad_norm": 1.202863335609436, + "learning_rate": 2.4883255503669115e-06, + "loss": 0.3568, + "step": 373 + }, + { + "epoch": 0.00748692540600055, + "grad_norm": 1.1819506883621216, + "learning_rate": 2.4949966644429622e-06, + "loss": 0.3575, + "step": 374 + }, + { + "epoch": 0.007506943923128894, + "grad_norm": 1.1113324165344238, + "learning_rate": 2.501667778519013e-06, + "loss": 0.3428, + "step": 375 + }, + { + "epoch": 0.007526962440257238, + "grad_norm": 1.4090632200241089, + "learning_rate": 2.5083388925950638e-06, + "loss": 0.368, + "step": 376 + }, + { + "epoch": 0.007546980957385581, + "grad_norm": 1.00551176071167, + "learning_rate": 2.515010006671114e-06, + "loss": 0.336, + "step": 377 + }, + { + "epoch": 0.007566999474513925, + "grad_norm": 1.041032075881958, + "learning_rate": 2.521681120747165e-06, + "loss": 0.3353, + "step": 378 + }, + { + "epoch": 0.007587017991642269, + "grad_norm": 1.0576586723327637, + "learning_rate": 2.528352234823216e-06, + "loss": 0.3437, + "step": 379 + }, + { + "epoch": 0.007607036508770613, + "grad_norm": 1.0032610893249512, + "learning_rate": 2.5350233488992664e-06, + "loss": 0.3361, + "step": 380 + }, + { + "epoch": 0.007627055025898956, + "grad_norm": 1.1521880626678467, + "learning_rate": 2.541694462975317e-06, + "loss": 0.3677, + "step": 381 + }, + { + "epoch": 0.0076470735430273, + "grad_norm": 1.1664042472839355, + "learning_rate": 2.5483655770513675e-06, + "loss": 0.3215, + "step": 382 + }, + { + "epoch": 0.007667092060155644, + "grad_norm": 1.0379317998886108, + "learning_rate": 2.5550366911274182e-06, + "loss": 0.3084, + "step": 383 + }, + { + "epoch": 0.007687110577283988, + "grad_norm": 1.2153745889663696, + "learning_rate": 2.5617078052034694e-06, + "loss": 0.3516, + "step": 384 + }, + { + "epoch": 0.007707129094412331, + "grad_norm": 1.0464789867401123, + "learning_rate": 2.56837891927952e-06, + "loss": 0.3439, + "step": 385 + }, + { + "epoch": 0.007727147611540675, + "grad_norm": 1.8024146556854248, + "learning_rate": 2.5750500333555705e-06, + "loss": 0.8524, + "step": 386 + }, + { + "epoch": 0.007747166128669019, + "grad_norm": 1.2757402658462524, + "learning_rate": 2.5817211474316212e-06, + "loss": 0.2875, + "step": 387 + }, + { + "epoch": 0.0077671846457973625, + "grad_norm": 0.9905286431312561, + "learning_rate": 2.5883922615076716e-06, + "loss": 0.3133, + "step": 388 + }, + { + "epoch": 0.007787203162925706, + "grad_norm": 1.133623719215393, + "learning_rate": 2.5950633755837228e-06, + "loss": 0.3285, + "step": 389 + }, + { + "epoch": 0.00780722168005405, + "grad_norm": 1.904404640197754, + "learning_rate": 2.6017344896597735e-06, + "loss": 0.8823, + "step": 390 + }, + { + "epoch": 0.007827240197182393, + "grad_norm": 1.3842976093292236, + "learning_rate": 2.6084056037358243e-06, + "loss": 0.3591, + "step": 391 + }, + { + "epoch": 0.007847258714310737, + "grad_norm": 1.0311110019683838, + "learning_rate": 2.6150767178118746e-06, + "loss": 0.31, + "step": 392 + }, + { + "epoch": 0.00786727723143908, + "grad_norm": 0.9883089661598206, + "learning_rate": 2.6217478318879254e-06, + "loss": 0.3341, + "step": 393 + }, + { + "epoch": 0.007887295748567424, + "grad_norm": 1.0640028715133667, + "learning_rate": 2.6284189459639766e-06, + "loss": 0.3508, + "step": 394 + }, + { + "epoch": 0.007907314265695768, + "grad_norm": 1.0630801916122437, + "learning_rate": 2.635090060040027e-06, + "loss": 0.3566, + "step": 395 + }, + { + "epoch": 0.007927332782824112, + "grad_norm": 1.1612716913223267, + "learning_rate": 2.6417611741160776e-06, + "loss": 0.3903, + "step": 396 + }, + { + "epoch": 0.007947351299952455, + "grad_norm": 0.9282535314559937, + "learning_rate": 2.648432288192128e-06, + "loss": 0.3266, + "step": 397 + }, + { + "epoch": 0.007967369817080799, + "grad_norm": 0.9833594560623169, + "learning_rate": 2.6551034022681787e-06, + "loss": 0.3364, + "step": 398 + }, + { + "epoch": 0.007987388334209143, + "grad_norm": 1.2890042066574097, + "learning_rate": 2.66177451634423e-06, + "loss": 0.3493, + "step": 399 + }, + { + "epoch": 0.008007406851337487, + "grad_norm": 1.244901180267334, + "learning_rate": 2.6684456304202807e-06, + "loss": 0.3512, + "step": 400 + }, + { + "epoch": 0.00802742536846583, + "grad_norm": 2.103975296020508, + "learning_rate": 2.675116744496331e-06, + "loss": 0.8447, + "step": 401 + }, + { + "epoch": 0.008047443885594174, + "grad_norm": 1.109789252281189, + "learning_rate": 2.6817878585723818e-06, + "loss": 0.3054, + "step": 402 + }, + { + "epoch": 0.008067462402722518, + "grad_norm": 1.3734267950057983, + "learning_rate": 2.688458972648432e-06, + "loss": 0.3098, + "step": 403 + }, + { + "epoch": 0.008087480919850861, + "grad_norm": 1.030332088470459, + "learning_rate": 2.6951300867244833e-06, + "loss": 0.296, + "step": 404 + }, + { + "epoch": 0.008107499436979205, + "grad_norm": 1.0455570220947266, + "learning_rate": 2.701801200800534e-06, + "loss": 0.2945, + "step": 405 + }, + { + "epoch": 0.008127517954107549, + "grad_norm": 1.281455636024475, + "learning_rate": 2.7084723148765844e-06, + "loss": 0.2942, + "step": 406 + }, + { + "epoch": 0.008147536471235893, + "grad_norm": 1.108974814414978, + "learning_rate": 2.715143428952635e-06, + "loss": 0.33, + "step": 407 + }, + { + "epoch": 0.008167554988364236, + "grad_norm": 1.1995090246200562, + "learning_rate": 2.721814543028686e-06, + "loss": 0.2875, + "step": 408 + }, + { + "epoch": 0.00818757350549258, + "grad_norm": 1.032166838645935, + "learning_rate": 2.728485657104737e-06, + "loss": 0.2968, + "step": 409 + }, + { + "epoch": 0.008207592022620924, + "grad_norm": 1.2511330842971802, + "learning_rate": 2.7351567711807874e-06, + "loss": 0.3407, + "step": 410 + }, + { + "epoch": 0.008227610539749268, + "grad_norm": 1.0399694442749023, + "learning_rate": 2.741827885256838e-06, + "loss": 0.334, + "step": 411 + }, + { + "epoch": 0.008247629056877611, + "grad_norm": 1.2650386095046997, + "learning_rate": 2.7484989993328885e-06, + "loss": 0.3441, + "step": 412 + }, + { + "epoch": 0.008267647574005955, + "grad_norm": 1.037491798400879, + "learning_rate": 2.7551701134089393e-06, + "loss": 0.3381, + "step": 413 + }, + { + "epoch": 0.008287666091134299, + "grad_norm": 1.0637662410736084, + "learning_rate": 2.7618412274849904e-06, + "loss": 0.3496, + "step": 414 + }, + { + "epoch": 0.008307684608262643, + "grad_norm": 1.16436767578125, + "learning_rate": 2.768512341561041e-06, + "loss": 0.3361, + "step": 415 + }, + { + "epoch": 0.008327703125390986, + "grad_norm": 0.985469400882721, + "learning_rate": 2.7751834556370915e-06, + "loss": 0.3334, + "step": 416 + }, + { + "epoch": 0.00834772164251933, + "grad_norm": 1.1123677492141724, + "learning_rate": 2.7818545697131423e-06, + "loss": 0.3482, + "step": 417 + }, + { + "epoch": 0.008367740159647674, + "grad_norm": 1.1437528133392334, + "learning_rate": 2.7885256837891926e-06, + "loss": 0.2999, + "step": 418 + }, + { + "epoch": 0.008387758676776018, + "grad_norm": 1.0342841148376465, + "learning_rate": 2.795196797865244e-06, + "loss": 0.3415, + "step": 419 + }, + { + "epoch": 0.008407777193904361, + "grad_norm": 1.8477303981781006, + "learning_rate": 2.8018679119412946e-06, + "loss": 0.8531, + "step": 420 + }, + { + "epoch": 0.008427795711032705, + "grad_norm": 0.9786385297775269, + "learning_rate": 2.808539026017345e-06, + "loss": 0.3371, + "step": 421 + }, + { + "epoch": 0.008447814228161049, + "grad_norm": 1.0052202939987183, + "learning_rate": 2.8152101400933957e-06, + "loss": 0.3026, + "step": 422 + }, + { + "epoch": 0.008467832745289392, + "grad_norm": 0.9509280323982239, + "learning_rate": 2.821881254169447e-06, + "loss": 0.3253, + "step": 423 + }, + { + "epoch": 0.008487851262417736, + "grad_norm": 1.0963438749313354, + "learning_rate": 2.8285523682454976e-06, + "loss": 0.3693, + "step": 424 + }, + { + "epoch": 0.00850786977954608, + "grad_norm": 1.1362642049789429, + "learning_rate": 2.835223482321548e-06, + "loss": 0.335, + "step": 425 + }, + { + "epoch": 0.008527888296674424, + "grad_norm": 1.1166688203811646, + "learning_rate": 2.8418945963975987e-06, + "loss": 0.3012, + "step": 426 + }, + { + "epoch": 0.008547906813802767, + "grad_norm": 1.1361026763916016, + "learning_rate": 2.848565710473649e-06, + "loss": 0.3209, + "step": 427 + }, + { + "epoch": 0.008567925330931111, + "grad_norm": 1.102122187614441, + "learning_rate": 2.8552368245497e-06, + "loss": 0.2973, + "step": 428 + }, + { + "epoch": 0.008587943848059455, + "grad_norm": 1.0612908601760864, + "learning_rate": 2.861907938625751e-06, + "loss": 0.3528, + "step": 429 + }, + { + "epoch": 0.008607962365187799, + "grad_norm": 1.1448071002960205, + "learning_rate": 2.8685790527018013e-06, + "loss": 0.3351, + "step": 430 + }, + { + "epoch": 0.008627980882316142, + "grad_norm": 1.1287763118743896, + "learning_rate": 2.875250166777852e-06, + "loss": 0.2939, + "step": 431 + }, + { + "epoch": 0.008647999399444486, + "grad_norm": 1.0534394979476929, + "learning_rate": 2.881921280853903e-06, + "loss": 0.3188, + "step": 432 + }, + { + "epoch": 0.00866801791657283, + "grad_norm": 1.1234763860702515, + "learning_rate": 2.888592394929954e-06, + "loss": 0.3263, + "step": 433 + }, + { + "epoch": 0.008688036433701174, + "grad_norm": 1.1106873750686646, + "learning_rate": 2.8952635090060043e-06, + "loss": 0.3102, + "step": 434 + }, + { + "epoch": 0.008708054950829517, + "grad_norm": 1.0464071035385132, + "learning_rate": 2.901934623082055e-06, + "loss": 0.3424, + "step": 435 + }, + { + "epoch": 0.008728073467957861, + "grad_norm": 1.1490641832351685, + "learning_rate": 2.9086057371581054e-06, + "loss": 0.3306, + "step": 436 + }, + { + "epoch": 0.008748091985086205, + "grad_norm": 1.113077998161316, + "learning_rate": 2.915276851234156e-06, + "loss": 0.3577, + "step": 437 + }, + { + "epoch": 0.008768110502214549, + "grad_norm": 1.0919040441513062, + "learning_rate": 2.9219479653102074e-06, + "loss": 0.3031, + "step": 438 + }, + { + "epoch": 0.008788129019342892, + "grad_norm": 1.6532115936279297, + "learning_rate": 2.9286190793862577e-06, + "loss": 0.9026, + "step": 439 + }, + { + "epoch": 0.008808147536471236, + "grad_norm": 1.0310717821121216, + "learning_rate": 2.9352901934623084e-06, + "loss": 0.304, + "step": 440 + }, + { + "epoch": 0.00882816605359958, + "grad_norm": 1.120715618133545, + "learning_rate": 2.941961307538359e-06, + "loss": 0.3293, + "step": 441 + }, + { + "epoch": 0.008848184570727924, + "grad_norm": 1.8131555318832397, + "learning_rate": 2.9486324216144095e-06, + "loss": 0.8152, + "step": 442 + }, + { + "epoch": 0.008868203087856267, + "grad_norm": 0.9654719829559326, + "learning_rate": 2.9553035356904607e-06, + "loss": 0.3337, + "step": 443 + }, + { + "epoch": 0.008888221604984611, + "grad_norm": 1.2053179740905762, + "learning_rate": 2.9619746497665115e-06, + "loss": 0.3809, + "step": 444 + }, + { + "epoch": 0.008908240122112955, + "grad_norm": 0.9652197957038879, + "learning_rate": 2.968645763842562e-06, + "loss": 0.3682, + "step": 445 + }, + { + "epoch": 0.008928258639241298, + "grad_norm": 1.0323768854141235, + "learning_rate": 2.9753168779186126e-06, + "loss": 0.3208, + "step": 446 + }, + { + "epoch": 0.008948277156369642, + "grad_norm": 1.8004246950149536, + "learning_rate": 2.9819879919946633e-06, + "loss": 0.854, + "step": 447 + }, + { + "epoch": 0.008968295673497986, + "grad_norm": 1.090123176574707, + "learning_rate": 2.9886591060707145e-06, + "loss": 0.3583, + "step": 448 + }, + { + "epoch": 0.00898831419062633, + "grad_norm": 1.121842622756958, + "learning_rate": 2.995330220146765e-06, + "loss": 0.3352, + "step": 449 + }, + { + "epoch": 0.009008332707754673, + "grad_norm": 1.8716092109680176, + "learning_rate": 3.0020013342228156e-06, + "loss": 0.8378, + "step": 450 + }, + { + "epoch": 0.009028351224883017, + "grad_norm": 0.9642249941825867, + "learning_rate": 3.008672448298866e-06, + "loss": 0.3507, + "step": 451 + }, + { + "epoch": 0.009048369742011361, + "grad_norm": 2.002427101135254, + "learning_rate": 3.0153435623749167e-06, + "loss": 0.8603, + "step": 452 + }, + { + "epoch": 0.009068388259139705, + "grad_norm": 1.034248948097229, + "learning_rate": 3.022014676450968e-06, + "loss": 0.3091, + "step": 453 + }, + { + "epoch": 0.009088406776268048, + "grad_norm": 1.0077989101409912, + "learning_rate": 3.028685790527018e-06, + "loss": 0.3404, + "step": 454 + }, + { + "epoch": 0.009108425293396392, + "grad_norm": 1.0961769819259644, + "learning_rate": 3.035356904603069e-06, + "loss": 0.3556, + "step": 455 + }, + { + "epoch": 0.009128443810524736, + "grad_norm": 1.179197907447815, + "learning_rate": 3.0420280186791197e-06, + "loss": 0.3438, + "step": 456 + }, + { + "epoch": 0.00914846232765308, + "grad_norm": 1.107642412185669, + "learning_rate": 3.04869913275517e-06, + "loss": 0.3876, + "step": 457 + }, + { + "epoch": 0.009168480844781423, + "grad_norm": 1.0938575267791748, + "learning_rate": 3.0553702468312212e-06, + "loss": 0.3299, + "step": 458 + }, + { + "epoch": 0.009188499361909767, + "grad_norm": 1.1617940664291382, + "learning_rate": 3.062041360907272e-06, + "loss": 0.3974, + "step": 459 + }, + { + "epoch": 0.00920851787903811, + "grad_norm": 1.0600109100341797, + "learning_rate": 3.0687124749833223e-06, + "loss": 0.2996, + "step": 460 + }, + { + "epoch": 0.009228536396166455, + "grad_norm": 0.961048424243927, + "learning_rate": 3.075383589059373e-06, + "loss": 0.3159, + "step": 461 + }, + { + "epoch": 0.009248554913294798, + "grad_norm": 1.8622393608093262, + "learning_rate": 3.082054703135424e-06, + "loss": 0.8758, + "step": 462 + }, + { + "epoch": 0.009268573430423142, + "grad_norm": 1.0733202695846558, + "learning_rate": 3.0887258172114746e-06, + "loss": 0.3192, + "step": 463 + }, + { + "epoch": 0.009288591947551486, + "grad_norm": 1.081894874572754, + "learning_rate": 3.0953969312875254e-06, + "loss": 0.3642, + "step": 464 + }, + { + "epoch": 0.00930861046467983, + "grad_norm": 1.3075908422470093, + "learning_rate": 3.102068045363576e-06, + "loss": 0.3533, + "step": 465 + }, + { + "epoch": 0.009328628981808173, + "grad_norm": 1.175094485282898, + "learning_rate": 3.1087391594396265e-06, + "loss": 0.3395, + "step": 466 + }, + { + "epoch": 0.009348647498936517, + "grad_norm": 1.0453943014144897, + "learning_rate": 3.1154102735156772e-06, + "loss": 0.3298, + "step": 467 + }, + { + "epoch": 0.00936866601606486, + "grad_norm": 1.0775824785232544, + "learning_rate": 3.1220813875917284e-06, + "loss": 0.3732, + "step": 468 + }, + { + "epoch": 0.009388684533193204, + "grad_norm": 1.0514930486679077, + "learning_rate": 3.1287525016677787e-06, + "loss": 0.3735, + "step": 469 + }, + { + "epoch": 0.009408703050321548, + "grad_norm": 1.1263853311538696, + "learning_rate": 3.1354236157438295e-06, + "loss": 0.3674, + "step": 470 + }, + { + "epoch": 0.009428721567449892, + "grad_norm": 1.286760687828064, + "learning_rate": 3.1420947298198802e-06, + "loss": 0.3458, + "step": 471 + }, + { + "epoch": 0.009448740084578236, + "grad_norm": 1.0521570444107056, + "learning_rate": 3.1487658438959306e-06, + "loss": 0.356, + "step": 472 + }, + { + "epoch": 0.00946875860170658, + "grad_norm": 1.1519792079925537, + "learning_rate": 3.1554369579719818e-06, + "loss": 0.3305, + "step": 473 + }, + { + "epoch": 0.009488777118834923, + "grad_norm": 1.0583549737930298, + "learning_rate": 3.1621080720480325e-06, + "loss": 0.4315, + "step": 474 + }, + { + "epoch": 0.009508795635963267, + "grad_norm": 1.100572109222412, + "learning_rate": 3.168779186124083e-06, + "loss": 0.3575, + "step": 475 + }, + { + "epoch": 0.00952881415309161, + "grad_norm": 1.8676751852035522, + "learning_rate": 3.1754503002001336e-06, + "loss": 0.8519, + "step": 476 + }, + { + "epoch": 0.009548832670219953, + "grad_norm": 1.3589838743209839, + "learning_rate": 3.182121414276184e-06, + "loss": 0.3284, + "step": 477 + }, + { + "epoch": 0.009568851187348296, + "grad_norm": 1.2678040266036987, + "learning_rate": 3.188792528352235e-06, + "loss": 0.3386, + "step": 478 + }, + { + "epoch": 0.00958886970447664, + "grad_norm": 1.0981509685516357, + "learning_rate": 3.195463642428286e-06, + "loss": 0.3446, + "step": 479 + }, + { + "epoch": 0.009608888221604984, + "grad_norm": 0.9769441485404968, + "learning_rate": 3.2021347565043366e-06, + "loss": 0.3524, + "step": 480 + }, + { + "epoch": 0.009628906738733328, + "grad_norm": 1.1154288053512573, + "learning_rate": 3.208805870580387e-06, + "loss": 0.3213, + "step": 481 + }, + { + "epoch": 0.009648925255861671, + "grad_norm": 1.8607627153396606, + "learning_rate": 3.2154769846564377e-06, + "loss": 0.822, + "step": 482 + }, + { + "epoch": 0.009668943772990015, + "grad_norm": 1.0499634742736816, + "learning_rate": 3.222148098732489e-06, + "loss": 0.3395, + "step": 483 + }, + { + "epoch": 0.009688962290118359, + "grad_norm": 1.0847530364990234, + "learning_rate": 3.2288192128085392e-06, + "loss": 0.3309, + "step": 484 + }, + { + "epoch": 0.009708980807246703, + "grad_norm": 1.0477259159088135, + "learning_rate": 3.23549032688459e-06, + "loss": 0.3192, + "step": 485 + }, + { + "epoch": 0.009728999324375046, + "grad_norm": 1.1331231594085693, + "learning_rate": 3.2421614409606408e-06, + "loss": 0.3113, + "step": 486 + }, + { + "epoch": 0.00974901784150339, + "grad_norm": 1.1846524477005005, + "learning_rate": 3.248832555036691e-06, + "loss": 0.3654, + "step": 487 + }, + { + "epoch": 0.009769036358631734, + "grad_norm": 1.0010573863983154, + "learning_rate": 3.2555036691127423e-06, + "loss": 0.2995, + "step": 488 + }, + { + "epoch": 0.009789054875760077, + "grad_norm": 1.142266035079956, + "learning_rate": 3.262174783188793e-06, + "loss": 0.3178, + "step": 489 + }, + { + "epoch": 0.009809073392888421, + "grad_norm": 1.0898741483688354, + "learning_rate": 3.2688458972648434e-06, + "loss": 0.368, + "step": 490 + }, + { + "epoch": 0.009829091910016765, + "grad_norm": 1.1005135774612427, + "learning_rate": 3.275517011340894e-06, + "loss": 0.3072, + "step": 491 + }, + { + "epoch": 0.009849110427145109, + "grad_norm": 1.0404720306396484, + "learning_rate": 3.2821881254169445e-06, + "loss": 0.3659, + "step": 492 + }, + { + "epoch": 0.009869128944273452, + "grad_norm": 1.0370279550552368, + "learning_rate": 3.2888592394929956e-06, + "loss": 0.366, + "step": 493 + }, + { + "epoch": 0.009889147461401796, + "grad_norm": 1.103554368019104, + "learning_rate": 3.2955303535690464e-06, + "loss": 0.3042, + "step": 494 + }, + { + "epoch": 0.00990916597853014, + "grad_norm": 1.1565337181091309, + "learning_rate": 3.302201467645097e-06, + "loss": 0.3653, + "step": 495 + }, + { + "epoch": 0.009929184495658484, + "grad_norm": 1.1764150857925415, + "learning_rate": 3.3088725817211475e-06, + "loss": 0.3578, + "step": 496 + }, + { + "epoch": 0.009949203012786827, + "grad_norm": 1.0749928951263428, + "learning_rate": 3.3155436957971983e-06, + "loss": 0.3756, + "step": 497 + }, + { + "epoch": 0.009969221529915171, + "grad_norm": 1.0230286121368408, + "learning_rate": 3.3222148098732494e-06, + "loss": 0.3226, + "step": 498 + }, + { + "epoch": 0.009989240047043515, + "grad_norm": 1.1076538562774658, + "learning_rate": 3.3288859239492998e-06, + "loss": 0.3654, + "step": 499 + }, + { + "epoch": 0.010009258564171859, + "grad_norm": 1.0579122304916382, + "learning_rate": 3.3355570380253505e-06, + "loss": 0.3841, + "step": 500 + }, + { + "epoch": 0.010029277081300202, + "grad_norm": 1.0281120538711548, + "learning_rate": 3.342228152101401e-06, + "loss": 0.3137, + "step": 501 + }, + { + "epoch": 0.010049295598428546, + "grad_norm": 0.9778279066085815, + "learning_rate": 3.3488992661774516e-06, + "loss": 0.3178, + "step": 502 + }, + { + "epoch": 0.01006931411555689, + "grad_norm": 1.012542486190796, + "learning_rate": 3.355570380253503e-06, + "loss": 0.3373, + "step": 503 + }, + { + "epoch": 0.010089332632685234, + "grad_norm": 1.1380778551101685, + "learning_rate": 3.3622414943295536e-06, + "loss": 0.3112, + "step": 504 + }, + { + "epoch": 0.010109351149813577, + "grad_norm": 1.0207196474075317, + "learning_rate": 3.368912608405604e-06, + "loss": 0.3005, + "step": 505 + }, + { + "epoch": 0.010129369666941921, + "grad_norm": 1.1385140419006348, + "learning_rate": 3.3755837224816546e-06, + "loss": 0.343, + "step": 506 + }, + { + "epoch": 0.010149388184070265, + "grad_norm": 1.1670206785202026, + "learning_rate": 3.382254836557705e-06, + "loss": 0.365, + "step": 507 + }, + { + "epoch": 0.010169406701198608, + "grad_norm": 1.009905219078064, + "learning_rate": 3.388925950633756e-06, + "loss": 0.3255, + "step": 508 + }, + { + "epoch": 0.010189425218326952, + "grad_norm": 1.403417944908142, + "learning_rate": 3.395597064709807e-06, + "loss": 0.3306, + "step": 509 + }, + { + "epoch": 0.010209443735455296, + "grad_norm": 1.109103798866272, + "learning_rate": 3.4022681787858573e-06, + "loss": 0.3442, + "step": 510 + }, + { + "epoch": 0.01022946225258364, + "grad_norm": 0.9878956079483032, + "learning_rate": 3.408939292861908e-06, + "loss": 0.3073, + "step": 511 + }, + { + "epoch": 0.010249480769711983, + "grad_norm": 1.107586145401001, + "learning_rate": 3.4156104069379588e-06, + "loss": 0.3087, + "step": 512 + }, + { + "epoch": 0.010269499286840327, + "grad_norm": 1.2124078273773193, + "learning_rate": 3.42228152101401e-06, + "loss": 0.3202, + "step": 513 + }, + { + "epoch": 0.010289517803968671, + "grad_norm": 1.8299435377120972, + "learning_rate": 3.4289526350900603e-06, + "loss": 0.8274, + "step": 514 + }, + { + "epoch": 0.010309536321097015, + "grad_norm": 1.1025861501693726, + "learning_rate": 3.435623749166111e-06, + "loss": 0.3409, + "step": 515 + }, + { + "epoch": 0.010329554838225358, + "grad_norm": 1.0312137603759766, + "learning_rate": 3.4422948632421614e-06, + "loss": 0.3266, + "step": 516 + }, + { + "epoch": 0.010349573355353702, + "grad_norm": 1.8884310722351074, + "learning_rate": 3.4489659773182126e-06, + "loss": 0.8748, + "step": 517 + }, + { + "epoch": 0.010369591872482046, + "grad_norm": 0.9991918206214905, + "learning_rate": 3.4556370913942633e-06, + "loss": 0.3046, + "step": 518 + }, + { + "epoch": 0.01038961038961039, + "grad_norm": 0.9954031109809875, + "learning_rate": 3.462308205470314e-06, + "loss": 0.3145, + "step": 519 + }, + { + "epoch": 0.010409628906738733, + "grad_norm": 1.8633460998535156, + "learning_rate": 3.4689793195463644e-06, + "loss": 0.8059, + "step": 520 + }, + { + "epoch": 0.010429647423867077, + "grad_norm": 1.0385255813598633, + "learning_rate": 3.475650433622415e-06, + "loss": 0.334, + "step": 521 + }, + { + "epoch": 0.01044966594099542, + "grad_norm": 1.0052446126937866, + "learning_rate": 3.4823215476984663e-06, + "loss": 0.2905, + "step": 522 + }, + { + "epoch": 0.010469684458123765, + "grad_norm": 1.0553507804870605, + "learning_rate": 3.4889926617745167e-06, + "loss": 0.3102, + "step": 523 + }, + { + "epoch": 0.010489702975252108, + "grad_norm": 1.0677298307418823, + "learning_rate": 3.4956637758505674e-06, + "loss": 0.297, + "step": 524 + }, + { + "epoch": 0.010509721492380452, + "grad_norm": 1.174546480178833, + "learning_rate": 3.5023348899266178e-06, + "loss": 0.3375, + "step": 525 + }, + { + "epoch": 0.010529740009508796, + "grad_norm": 1.053247094154358, + "learning_rate": 3.5090060040026685e-06, + "loss": 0.3405, + "step": 526 + }, + { + "epoch": 0.01054975852663714, + "grad_norm": 1.0867242813110352, + "learning_rate": 3.5156771180787197e-06, + "loss": 0.3258, + "step": 527 + }, + { + "epoch": 0.010569777043765483, + "grad_norm": 1.0293691158294678, + "learning_rate": 3.5223482321547705e-06, + "loss": 0.3225, + "step": 528 + }, + { + "epoch": 0.010589795560893827, + "grad_norm": 1.1773707866668701, + "learning_rate": 3.529019346230821e-06, + "loss": 0.2936, + "step": 529 + }, + { + "epoch": 0.01060981407802217, + "grad_norm": 1.0127743482589722, + "learning_rate": 3.5356904603068716e-06, + "loss": 0.3075, + "step": 530 + }, + { + "epoch": 0.010629832595150514, + "grad_norm": 1.1471476554870605, + "learning_rate": 3.542361574382922e-06, + "loss": 0.3295, + "step": 531 + }, + { + "epoch": 0.010649851112278858, + "grad_norm": 1.2964189052581787, + "learning_rate": 3.549032688458973e-06, + "loss": 0.366, + "step": 532 + }, + { + "epoch": 0.010669869629407202, + "grad_norm": 0.9850085377693176, + "learning_rate": 3.555703802535024e-06, + "loss": 0.3151, + "step": 533 + }, + { + "epoch": 0.010689888146535546, + "grad_norm": 1.060073971748352, + "learning_rate": 3.562374916611074e-06, + "loss": 0.3371, + "step": 534 + }, + { + "epoch": 0.01070990666366389, + "grad_norm": 1.0383378267288208, + "learning_rate": 3.569046030687125e-06, + "loss": 0.3205, + "step": 535 + }, + { + "epoch": 0.010729925180792233, + "grad_norm": 1.2513034343719482, + "learning_rate": 3.5757171447631757e-06, + "loss": 0.3229, + "step": 536 + }, + { + "epoch": 0.010749943697920577, + "grad_norm": 1.0815403461456299, + "learning_rate": 3.582388258839227e-06, + "loss": 0.3436, + "step": 537 + }, + { + "epoch": 0.01076996221504892, + "grad_norm": 1.1383118629455566, + "learning_rate": 3.589059372915277e-06, + "loss": 0.317, + "step": 538 + }, + { + "epoch": 0.010789980732177264, + "grad_norm": 1.025688886642456, + "learning_rate": 3.595730486991328e-06, + "loss": 0.3489, + "step": 539 + }, + { + "epoch": 0.010809999249305608, + "grad_norm": 1.8311266899108887, + "learning_rate": 3.6024016010673783e-06, + "loss": 0.8379, + "step": 540 + }, + { + "epoch": 0.010830017766433952, + "grad_norm": 1.0315309762954712, + "learning_rate": 3.609072715143429e-06, + "loss": 0.3592, + "step": 541 + }, + { + "epoch": 0.010850036283562296, + "grad_norm": 1.7086553573608398, + "learning_rate": 3.6157438292194802e-06, + "loss": 0.9336, + "step": 542 + }, + { + "epoch": 0.01087005480069064, + "grad_norm": 1.0413861274719238, + "learning_rate": 3.622414943295531e-06, + "loss": 0.3232, + "step": 543 + }, + { + "epoch": 0.010890073317818983, + "grad_norm": 1.093281626701355, + "learning_rate": 3.6290860573715813e-06, + "loss": 0.293, + "step": 544 + }, + { + "epoch": 0.010910091834947327, + "grad_norm": 1.0125129222869873, + "learning_rate": 3.635757171447632e-06, + "loss": 0.3947, + "step": 545 + }, + { + "epoch": 0.01093011035207567, + "grad_norm": 0.960318922996521, + "learning_rate": 3.6424282855236824e-06, + "loss": 0.2931, + "step": 546 + }, + { + "epoch": 0.010950128869204014, + "grad_norm": 1.088364601135254, + "learning_rate": 3.6490993995997336e-06, + "loss": 0.3456, + "step": 547 + }, + { + "epoch": 0.010970147386332358, + "grad_norm": 1.0800566673278809, + "learning_rate": 3.6557705136757844e-06, + "loss": 0.3551, + "step": 548 + }, + { + "epoch": 0.010990165903460702, + "grad_norm": 1.216808795928955, + "learning_rate": 3.6624416277518347e-06, + "loss": 0.3281, + "step": 549 + }, + { + "epoch": 0.011010184420589045, + "grad_norm": 1.10563063621521, + "learning_rate": 3.6691127418278855e-06, + "loss": 0.2966, + "step": 550 + }, + { + "epoch": 0.01103020293771739, + "grad_norm": 1.049318790435791, + "learning_rate": 3.675783855903936e-06, + "loss": 0.3191, + "step": 551 + }, + { + "epoch": 0.011050221454845733, + "grad_norm": 1.0769530534744263, + "learning_rate": 3.6824549699799874e-06, + "loss": 0.3249, + "step": 552 + }, + { + "epoch": 0.011070239971974077, + "grad_norm": 1.2300605773925781, + "learning_rate": 3.6891260840560377e-06, + "loss": 0.3655, + "step": 553 + }, + { + "epoch": 0.01109025848910242, + "grad_norm": 1.7954230308532715, + "learning_rate": 3.6957971981320885e-06, + "loss": 0.84, + "step": 554 + }, + { + "epoch": 0.011110277006230764, + "grad_norm": 1.1987940073013306, + "learning_rate": 3.702468312208139e-06, + "loss": 0.3727, + "step": 555 + }, + { + "epoch": 0.011130295523359108, + "grad_norm": 1.0549994707107544, + "learning_rate": 3.7091394262841896e-06, + "loss": 0.3494, + "step": 556 + }, + { + "epoch": 0.011150314040487452, + "grad_norm": 1.1700570583343506, + "learning_rate": 3.7158105403602408e-06, + "loss": 0.297, + "step": 557 + }, + { + "epoch": 0.011170332557615795, + "grad_norm": 1.1900895833969116, + "learning_rate": 3.722481654436291e-06, + "loss": 0.354, + "step": 558 + }, + { + "epoch": 0.011190351074744139, + "grad_norm": 1.0252695083618164, + "learning_rate": 3.729152768512342e-06, + "loss": 0.2824, + "step": 559 + }, + { + "epoch": 0.011210369591872483, + "grad_norm": 2.010692596435547, + "learning_rate": 3.7358238825883926e-06, + "loss": 0.95, + "step": 560 + }, + { + "epoch": 0.011230388109000827, + "grad_norm": 1.1479687690734863, + "learning_rate": 3.742494996664443e-06, + "loss": 0.349, + "step": 561 + }, + { + "epoch": 0.01125040662612917, + "grad_norm": 1.1856940984725952, + "learning_rate": 3.749166110740494e-06, + "loss": 0.3098, + "step": 562 + }, + { + "epoch": 0.011270425143257512, + "grad_norm": 1.1494914293289185, + "learning_rate": 3.755837224816545e-06, + "loss": 0.3121, + "step": 563 + }, + { + "epoch": 0.011290443660385856, + "grad_norm": 0.9775360822677612, + "learning_rate": 3.7625083388925952e-06, + "loss": 0.3659, + "step": 564 + }, + { + "epoch": 0.0113104621775142, + "grad_norm": 1.0859959125518799, + "learning_rate": 3.769179452968646e-06, + "loss": 0.3259, + "step": 565 + }, + { + "epoch": 0.011330480694642544, + "grad_norm": 1.0885742902755737, + "learning_rate": 3.7758505670446967e-06, + "loss": 0.3909, + "step": 566 + }, + { + "epoch": 0.011350499211770887, + "grad_norm": 1.772613286972046, + "learning_rate": 3.7825216811207475e-06, + "loss": 0.8777, + "step": 567 + }, + { + "epoch": 0.011370517728899231, + "grad_norm": 1.125032663345337, + "learning_rate": 3.7891927951967982e-06, + "loss": 0.3301, + "step": 568 + }, + { + "epoch": 0.011390536246027575, + "grad_norm": 1.1916085481643677, + "learning_rate": 3.795863909272849e-06, + "loss": 0.3314, + "step": 569 + }, + { + "epoch": 0.011410554763155919, + "grad_norm": 1.1391597986221313, + "learning_rate": 3.8025350233488993e-06, + "loss": 0.3126, + "step": 570 + }, + { + "epoch": 0.011430573280284262, + "grad_norm": 0.9555512070655823, + "learning_rate": 3.80920613742495e-06, + "loss": 0.3086, + "step": 571 + }, + { + "epoch": 0.011450591797412606, + "grad_norm": 1.077433466911316, + "learning_rate": 3.815877251501001e-06, + "loss": 0.3098, + "step": 572 + }, + { + "epoch": 0.01147061031454095, + "grad_norm": 1.306694507598877, + "learning_rate": 3.822548365577052e-06, + "loss": 0.3422, + "step": 573 + }, + { + "epoch": 0.011490628831669293, + "grad_norm": 1.0814247131347656, + "learning_rate": 3.829219479653102e-06, + "loss": 0.306, + "step": 574 + }, + { + "epoch": 0.011510647348797637, + "grad_norm": 1.0253669023513794, + "learning_rate": 3.835890593729153e-06, + "loss": 0.2951, + "step": 575 + }, + { + "epoch": 0.011530665865925981, + "grad_norm": 1.0793170928955078, + "learning_rate": 3.8425617078052035e-06, + "loss": 0.3397, + "step": 576 + }, + { + "epoch": 0.011550684383054325, + "grad_norm": 1.2146247625350952, + "learning_rate": 3.849232821881255e-06, + "loss": 0.2974, + "step": 577 + }, + { + "epoch": 0.011570702900182668, + "grad_norm": 1.0380836725234985, + "learning_rate": 3.855903935957305e-06, + "loss": 0.337, + "step": 578 + }, + { + "epoch": 0.011590721417311012, + "grad_norm": 1.081396222114563, + "learning_rate": 3.862575050033356e-06, + "loss": 0.3772, + "step": 579 + }, + { + "epoch": 0.011610739934439356, + "grad_norm": 0.9943265914916992, + "learning_rate": 3.8692461641094065e-06, + "loss": 0.289, + "step": 580 + }, + { + "epoch": 0.0116307584515677, + "grad_norm": 1.273555874824524, + "learning_rate": 3.875917278185457e-06, + "loss": 0.3258, + "step": 581 + }, + { + "epoch": 0.011650776968696043, + "grad_norm": 1.0617831945419312, + "learning_rate": 3.882588392261508e-06, + "loss": 0.3193, + "step": 582 + }, + { + "epoch": 0.011670795485824387, + "grad_norm": 1.1736116409301758, + "learning_rate": 3.889259506337559e-06, + "loss": 0.3543, + "step": 583 + }, + { + "epoch": 0.01169081400295273, + "grad_norm": 1.075273871421814, + "learning_rate": 3.8959306204136095e-06, + "loss": 0.3427, + "step": 584 + }, + { + "epoch": 0.011710832520081075, + "grad_norm": 1.1273791790008545, + "learning_rate": 3.90260173448966e-06, + "loss": 0.3189, + "step": 585 + }, + { + "epoch": 0.011730851037209418, + "grad_norm": 1.1166162490844727, + "learning_rate": 3.90927284856571e-06, + "loss": 0.3361, + "step": 586 + }, + { + "epoch": 0.011750869554337762, + "grad_norm": 1.8246535062789917, + "learning_rate": 3.915943962641761e-06, + "loss": 0.8342, + "step": 587 + }, + { + "epoch": 0.011770888071466106, + "grad_norm": 1.0799263715744019, + "learning_rate": 3.9226150767178126e-06, + "loss": 0.3484, + "step": 588 + }, + { + "epoch": 0.01179090658859445, + "grad_norm": 1.0228983163833618, + "learning_rate": 3.929286190793863e-06, + "loss": 0.2832, + "step": 589 + }, + { + "epoch": 0.011810925105722793, + "grad_norm": 1.0778990983963013, + "learning_rate": 3.935957304869913e-06, + "loss": 0.3178, + "step": 590 + }, + { + "epoch": 0.011830943622851137, + "grad_norm": 1.057736873626709, + "learning_rate": 3.942628418945964e-06, + "loss": 0.3343, + "step": 591 + }, + { + "epoch": 0.01185096213997948, + "grad_norm": 1.117945671081543, + "learning_rate": 3.949299533022016e-06, + "loss": 0.3676, + "step": 592 + }, + { + "epoch": 0.011870980657107824, + "grad_norm": 1.1533067226409912, + "learning_rate": 3.955970647098066e-06, + "loss": 0.3486, + "step": 593 + }, + { + "epoch": 0.011890999174236168, + "grad_norm": 0.9574215412139893, + "learning_rate": 3.962641761174116e-06, + "loss": 0.3227, + "step": 594 + }, + { + "epoch": 0.011911017691364512, + "grad_norm": 1.048217535018921, + "learning_rate": 3.969312875250167e-06, + "loss": 0.3453, + "step": 595 + }, + { + "epoch": 0.011931036208492856, + "grad_norm": 1.1853567361831665, + "learning_rate": 3.975983989326218e-06, + "loss": 0.3563, + "step": 596 + }, + { + "epoch": 0.0119510547256212, + "grad_norm": 1.0789257287979126, + "learning_rate": 3.982655103402269e-06, + "loss": 0.3086, + "step": 597 + }, + { + "epoch": 0.011971073242749543, + "grad_norm": 1.0702587366104126, + "learning_rate": 3.989326217478319e-06, + "loss": 0.3416, + "step": 598 + }, + { + "epoch": 0.011991091759877887, + "grad_norm": 1.0946346521377563, + "learning_rate": 3.99599733155437e-06, + "loss": 0.3827, + "step": 599 + }, + { + "epoch": 0.01201111027700623, + "grad_norm": 1.0305372476577759, + "learning_rate": 4.002668445630421e-06, + "loss": 0.3436, + "step": 600 + }, + { + "epoch": 0.012031128794134574, + "grad_norm": 1.0979708433151245, + "learning_rate": 4.009339559706471e-06, + "loss": 0.3302, + "step": 601 + }, + { + "epoch": 0.012051147311262918, + "grad_norm": 1.0583938360214233, + "learning_rate": 4.016010673782522e-06, + "loss": 0.3399, + "step": 602 + }, + { + "epoch": 0.012071165828391262, + "grad_norm": 1.0054504871368408, + "learning_rate": 4.022681787858573e-06, + "loss": 0.3407, + "step": 603 + }, + { + "epoch": 0.012091184345519606, + "grad_norm": 1.4358744621276855, + "learning_rate": 4.029352901934623e-06, + "loss": 0.3145, + "step": 604 + }, + { + "epoch": 0.01211120286264795, + "grad_norm": 1.7911733388900757, + "learning_rate": 4.036024016010674e-06, + "loss": 0.8923, + "step": 605 + }, + { + "epoch": 0.012131221379776293, + "grad_norm": 1.0117384195327759, + "learning_rate": 4.0426951300867245e-06, + "loss": 0.3305, + "step": 606 + }, + { + "epoch": 0.012151239896904637, + "grad_norm": 1.012426495552063, + "learning_rate": 4.049366244162776e-06, + "loss": 0.3292, + "step": 607 + }, + { + "epoch": 0.01217125841403298, + "grad_norm": 1.2818412780761719, + "learning_rate": 4.056037358238826e-06, + "loss": 0.3822, + "step": 608 + }, + { + "epoch": 0.012191276931161324, + "grad_norm": 1.2952145338058472, + "learning_rate": 4.062708472314877e-06, + "loss": 0.3459, + "step": 609 + }, + { + "epoch": 0.012211295448289668, + "grad_norm": 1.2246981859207153, + "learning_rate": 4.0693795863909275e-06, + "loss": 0.3718, + "step": 610 + }, + { + "epoch": 0.012231313965418012, + "grad_norm": 1.1137161254882812, + "learning_rate": 4.076050700466979e-06, + "loss": 0.3072, + "step": 611 + }, + { + "epoch": 0.012251332482546355, + "grad_norm": 1.1160073280334473, + "learning_rate": 4.082721814543029e-06, + "loss": 0.3305, + "step": 612 + }, + { + "epoch": 0.0122713509996747, + "grad_norm": 1.1289676427841187, + "learning_rate": 4.089392928619079e-06, + "loss": 0.3582, + "step": 613 + }, + { + "epoch": 0.012291369516803043, + "grad_norm": 1.1623421907424927, + "learning_rate": 4.0960640426951306e-06, + "loss": 0.3425, + "step": 614 + }, + { + "epoch": 0.012311388033931387, + "grad_norm": 1.053159236907959, + "learning_rate": 4.102735156771181e-06, + "loss": 0.2962, + "step": 615 + }, + { + "epoch": 0.01233140655105973, + "grad_norm": 1.1663142442703247, + "learning_rate": 4.109406270847232e-06, + "loss": 0.3658, + "step": 616 + }, + { + "epoch": 0.012351425068188074, + "grad_norm": 1.0641961097717285, + "learning_rate": 4.116077384923282e-06, + "loss": 0.3245, + "step": 617 + }, + { + "epoch": 0.012371443585316418, + "grad_norm": 1.0217220783233643, + "learning_rate": 4.122748498999334e-06, + "loss": 0.3348, + "step": 618 + }, + { + "epoch": 0.012391462102444762, + "grad_norm": 1.1131799221038818, + "learning_rate": 4.129419613075384e-06, + "loss": 0.328, + "step": 619 + }, + { + "epoch": 0.012411480619573105, + "grad_norm": 1.9548743963241577, + "learning_rate": 4.136090727151434e-06, + "loss": 0.9018, + "step": 620 + }, + { + "epoch": 0.01243149913670145, + "grad_norm": 1.0293314456939697, + "learning_rate": 4.1427618412274854e-06, + "loss": 0.3281, + "step": 621 + }, + { + "epoch": 0.012451517653829793, + "grad_norm": 1.0007556676864624, + "learning_rate": 4.149432955303536e-06, + "loss": 0.344, + "step": 622 + }, + { + "epoch": 0.012471536170958137, + "grad_norm": 1.0996240377426147, + "learning_rate": 4.156104069379587e-06, + "loss": 0.3793, + "step": 623 + }, + { + "epoch": 0.01249155468808648, + "grad_norm": 1.9567642211914062, + "learning_rate": 4.162775183455637e-06, + "loss": 0.8572, + "step": 624 + }, + { + "epoch": 0.012511573205214824, + "grad_norm": 0.9039920568466187, + "learning_rate": 4.169446297531688e-06, + "loss": 0.2868, + "step": 625 + }, + { + "epoch": 0.012531591722343168, + "grad_norm": 0.9794298410415649, + "learning_rate": 4.176117411607739e-06, + "loss": 0.3446, + "step": 626 + }, + { + "epoch": 0.012551610239471512, + "grad_norm": 1.2308950424194336, + "learning_rate": 4.18278852568379e-06, + "loss": 0.3226, + "step": 627 + }, + { + "epoch": 0.012571628756599855, + "grad_norm": 1.1297086477279663, + "learning_rate": 4.18945963975984e-06, + "loss": 0.3578, + "step": 628 + }, + { + "epoch": 0.012591647273728199, + "grad_norm": 1.0977541208267212, + "learning_rate": 4.196130753835891e-06, + "loss": 0.3534, + "step": 629 + }, + { + "epoch": 0.012611665790856543, + "grad_norm": 0.9931874871253967, + "learning_rate": 4.202801867911942e-06, + "loss": 0.2923, + "step": 630 + }, + { + "epoch": 0.012631684307984887, + "grad_norm": 0.9475472569465637, + "learning_rate": 4.209472981987992e-06, + "loss": 0.3516, + "step": 631 + }, + { + "epoch": 0.01265170282511323, + "grad_norm": 0.9974777698516846, + "learning_rate": 4.216144096064043e-06, + "loss": 0.3343, + "step": 632 + }, + { + "epoch": 0.012671721342241574, + "grad_norm": 1.0053889751434326, + "learning_rate": 4.222815210140094e-06, + "loss": 0.3283, + "step": 633 + }, + { + "epoch": 0.012691739859369918, + "grad_norm": 1.0101441144943237, + "learning_rate": 4.229486324216144e-06, + "loss": 0.2823, + "step": 634 + }, + { + "epoch": 0.012711758376498261, + "grad_norm": 1.1309189796447754, + "learning_rate": 4.236157438292195e-06, + "loss": 0.4209, + "step": 635 + }, + { + "epoch": 0.012731776893626605, + "grad_norm": 1.135191798210144, + "learning_rate": 4.242828552368246e-06, + "loss": 0.3237, + "step": 636 + }, + { + "epoch": 0.012751795410754949, + "grad_norm": 1.8790745735168457, + "learning_rate": 4.249499666444297e-06, + "loss": 0.891, + "step": 637 + }, + { + "epoch": 0.012771813927883293, + "grad_norm": 1.1304638385772705, + "learning_rate": 4.256170780520347e-06, + "loss": 0.391, + "step": 638 + }, + { + "epoch": 0.012791832445011636, + "grad_norm": 1.1830172538757324, + "learning_rate": 4.262841894596398e-06, + "loss": 0.3581, + "step": 639 + }, + { + "epoch": 0.01281185096213998, + "grad_norm": 1.1919174194335938, + "learning_rate": 4.2695130086724486e-06, + "loss": 0.3345, + "step": 640 + }, + { + "epoch": 0.012831869479268324, + "grad_norm": 1.0451879501342773, + "learning_rate": 4.2761841227485e-06, + "loss": 0.3524, + "step": 641 + }, + { + "epoch": 0.012851887996396668, + "grad_norm": 1.2176847457885742, + "learning_rate": 4.28285523682455e-06, + "loss": 0.2887, + "step": 642 + }, + { + "epoch": 0.012871906513525011, + "grad_norm": 1.4422607421875, + "learning_rate": 4.2895263509006e-06, + "loss": 0.3973, + "step": 643 + }, + { + "epoch": 0.012891925030653355, + "grad_norm": 1.200056791305542, + "learning_rate": 4.296197464976652e-06, + "loss": 0.3619, + "step": 644 + }, + { + "epoch": 0.012911943547781699, + "grad_norm": 1.032428503036499, + "learning_rate": 4.302868579052702e-06, + "loss": 0.3031, + "step": 645 + }, + { + "epoch": 0.012931962064910043, + "grad_norm": 1.840067744255066, + "learning_rate": 4.309539693128753e-06, + "loss": 0.9108, + "step": 646 + }, + { + "epoch": 0.012951980582038386, + "grad_norm": 0.9780956506729126, + "learning_rate": 4.3162108072048035e-06, + "loss": 0.3108, + "step": 647 + }, + { + "epoch": 0.01297199909916673, + "grad_norm": 1.0557570457458496, + "learning_rate": 4.322881921280855e-06, + "loss": 0.3513, + "step": 648 + }, + { + "epoch": 0.012992017616295074, + "grad_norm": 1.379356026649475, + "learning_rate": 4.329553035356905e-06, + "loss": 0.3381, + "step": 649 + }, + { + "epoch": 0.013012036133423416, + "grad_norm": 1.2378547191619873, + "learning_rate": 4.336224149432955e-06, + "loss": 0.3041, + "step": 650 + }, + { + "epoch": 0.01303205465055176, + "grad_norm": 1.044453740119934, + "learning_rate": 4.3428952635090065e-06, + "loss": 0.3056, + "step": 651 + }, + { + "epoch": 0.013052073167680103, + "grad_norm": 2.0525758266448975, + "learning_rate": 4.349566377585057e-06, + "loss": 0.8765, + "step": 652 + }, + { + "epoch": 0.013072091684808447, + "grad_norm": 1.1354281902313232, + "learning_rate": 4.356237491661108e-06, + "loss": 0.3335, + "step": 653 + }, + { + "epoch": 0.01309211020193679, + "grad_norm": 1.2277562618255615, + "learning_rate": 4.362908605737158e-06, + "loss": 0.3427, + "step": 654 + }, + { + "epoch": 0.013112128719065134, + "grad_norm": 1.0401055812835693, + "learning_rate": 4.369579719813209e-06, + "loss": 0.3962, + "step": 655 + }, + { + "epoch": 0.013132147236193478, + "grad_norm": 0.9584609866142273, + "learning_rate": 4.37625083388926e-06, + "loss": 0.31, + "step": 656 + }, + { + "epoch": 0.013152165753321822, + "grad_norm": 1.0532187223434448, + "learning_rate": 4.382921947965311e-06, + "loss": 0.3656, + "step": 657 + }, + { + "epoch": 0.013172184270450166, + "grad_norm": 1.218270182609558, + "learning_rate": 4.389593062041361e-06, + "loss": 0.3366, + "step": 658 + }, + { + "epoch": 0.01319220278757851, + "grad_norm": 1.0967011451721191, + "learning_rate": 4.396264176117412e-06, + "loss": 0.3531, + "step": 659 + }, + { + "epoch": 0.013212221304706853, + "grad_norm": 1.014241099357605, + "learning_rate": 4.402935290193462e-06, + "loss": 0.2883, + "step": 660 + }, + { + "epoch": 0.013232239821835197, + "grad_norm": 1.0188730955123901, + "learning_rate": 4.409606404269513e-06, + "loss": 0.3595, + "step": 661 + }, + { + "epoch": 0.01325225833896354, + "grad_norm": 1.8835114240646362, + "learning_rate": 4.416277518345564e-06, + "loss": 0.8798, + "step": 662 + }, + { + "epoch": 0.013272276856091884, + "grad_norm": 0.9688794016838074, + "learning_rate": 4.422948632421615e-06, + "loss": 0.3089, + "step": 663 + }, + { + "epoch": 0.013292295373220228, + "grad_norm": 1.1278038024902344, + "learning_rate": 4.429619746497665e-06, + "loss": 0.3586, + "step": 664 + }, + { + "epoch": 0.013312313890348572, + "grad_norm": 1.0792821645736694, + "learning_rate": 4.436290860573716e-06, + "loss": 0.3721, + "step": 665 + }, + { + "epoch": 0.013332332407476916, + "grad_norm": 1.1023694276809692, + "learning_rate": 4.4429619746497674e-06, + "loss": 0.3323, + "step": 666 + }, + { + "epoch": 0.01335235092460526, + "grad_norm": 1.2988243103027344, + "learning_rate": 4.449633088725818e-06, + "loss": 0.3502, + "step": 667 + }, + { + "epoch": 0.013372369441733603, + "grad_norm": 1.2119553089141846, + "learning_rate": 4.456304202801868e-06, + "loss": 0.3004, + "step": 668 + }, + { + "epoch": 0.013392387958861947, + "grad_norm": 1.2960237264633179, + "learning_rate": 4.4629753168779184e-06, + "loss": 0.3748, + "step": 669 + }, + { + "epoch": 0.01341240647599029, + "grad_norm": 1.080484390258789, + "learning_rate": 4.46964643095397e-06, + "loss": 0.3306, + "step": 670 + }, + { + "epoch": 0.013432424993118634, + "grad_norm": 1.0395945310592651, + "learning_rate": 4.476317545030021e-06, + "loss": 0.3288, + "step": 671 + }, + { + "epoch": 0.013452443510246978, + "grad_norm": 1.446455717086792, + "learning_rate": 4.482988659106071e-06, + "loss": 0.3099, + "step": 672 + }, + { + "epoch": 0.013472462027375322, + "grad_norm": 1.1712431907653809, + "learning_rate": 4.4896597731821215e-06, + "loss": 0.3129, + "step": 673 + }, + { + "epoch": 0.013492480544503666, + "grad_norm": 1.9456350803375244, + "learning_rate": 4.496330887258173e-06, + "loss": 0.8918, + "step": 674 + }, + { + "epoch": 0.01351249906163201, + "grad_norm": 1.2756091356277466, + "learning_rate": 4.503002001334223e-06, + "loss": 0.3822, + "step": 675 + }, + { + "epoch": 0.013532517578760353, + "grad_norm": 1.1245425939559937, + "learning_rate": 4.509673115410274e-06, + "loss": 0.3046, + "step": 676 + }, + { + "epoch": 0.013552536095888697, + "grad_norm": 0.9850724339485168, + "learning_rate": 4.5163442294863245e-06, + "loss": 0.2883, + "step": 677 + }, + { + "epoch": 0.01357255461301704, + "grad_norm": 1.3024473190307617, + "learning_rate": 4.523015343562376e-06, + "loss": 0.3834, + "step": 678 + }, + { + "epoch": 0.013592573130145384, + "grad_norm": 0.9729016423225403, + "learning_rate": 4.529686457638426e-06, + "loss": 0.3458, + "step": 679 + }, + { + "epoch": 0.013612591647273728, + "grad_norm": 0.9915385246276855, + "learning_rate": 4.536357571714476e-06, + "loss": 0.2846, + "step": 680 + }, + { + "epoch": 0.013632610164402072, + "grad_norm": 1.2044706344604492, + "learning_rate": 4.5430286857905275e-06, + "loss": 0.3433, + "step": 681 + }, + { + "epoch": 0.013652628681530415, + "grad_norm": 0.9916861653327942, + "learning_rate": 4.549699799866578e-06, + "loss": 0.3348, + "step": 682 + }, + { + "epoch": 0.01367264719865876, + "grad_norm": 1.054591417312622, + "learning_rate": 4.556370913942629e-06, + "loss": 0.3165, + "step": 683 + }, + { + "epoch": 0.013692665715787103, + "grad_norm": 1.062982201576233, + "learning_rate": 4.563042028018679e-06, + "loss": 0.3419, + "step": 684 + }, + { + "epoch": 0.013712684232915447, + "grad_norm": 1.7670471668243408, + "learning_rate": 4.56971314209473e-06, + "loss": 0.8415, + "step": 685 + }, + { + "epoch": 0.01373270275004379, + "grad_norm": 1.1971651315689087, + "learning_rate": 4.576384256170781e-06, + "loss": 0.3369, + "step": 686 + }, + { + "epoch": 0.013752721267172134, + "grad_norm": 1.3625233173370361, + "learning_rate": 4.583055370246832e-06, + "loss": 0.3707, + "step": 687 + }, + { + "epoch": 0.013772739784300478, + "grad_norm": 1.1944586038589478, + "learning_rate": 4.589726484322882e-06, + "loss": 0.363, + "step": 688 + }, + { + "epoch": 0.013792758301428822, + "grad_norm": 1.0634516477584839, + "learning_rate": 4.596397598398933e-06, + "loss": 0.3118, + "step": 689 + }, + { + "epoch": 0.013812776818557165, + "grad_norm": 1.111507534980774, + "learning_rate": 4.603068712474983e-06, + "loss": 0.3704, + "step": 690 + }, + { + "epoch": 0.013832795335685509, + "grad_norm": 1.0305033922195435, + "learning_rate": 4.609739826551034e-06, + "loss": 0.3058, + "step": 691 + }, + { + "epoch": 0.013852813852813853, + "grad_norm": 1.076710820198059, + "learning_rate": 4.6164109406270854e-06, + "loss": 0.2921, + "step": 692 + }, + { + "epoch": 0.013872832369942197, + "grad_norm": 1.2386780977249146, + "learning_rate": 4.623082054703136e-06, + "loss": 0.3152, + "step": 693 + }, + { + "epoch": 0.01389285088707054, + "grad_norm": 1.0874847173690796, + "learning_rate": 4.629753168779186e-06, + "loss": 0.3377, + "step": 694 + }, + { + "epoch": 0.013912869404198884, + "grad_norm": 0.9927990436553955, + "learning_rate": 4.636424282855237e-06, + "loss": 0.3405, + "step": 695 + }, + { + "epoch": 0.013932887921327228, + "grad_norm": 1.1948816776275635, + "learning_rate": 4.6430953969312885e-06, + "loss": 0.3585, + "step": 696 + }, + { + "epoch": 0.013952906438455571, + "grad_norm": 1.143511176109314, + "learning_rate": 4.649766511007339e-06, + "loss": 0.4206, + "step": 697 + }, + { + "epoch": 0.013972924955583915, + "grad_norm": 1.3767986297607422, + "learning_rate": 4.656437625083389e-06, + "loss": 0.3753, + "step": 698 + }, + { + "epoch": 0.013992943472712259, + "grad_norm": 1.929076910018921, + "learning_rate": 4.6631087391594395e-06, + "loss": 0.7952, + "step": 699 + }, + { + "epoch": 0.014012961989840603, + "grad_norm": 1.115427851676941, + "learning_rate": 4.669779853235491e-06, + "loss": 0.3455, + "step": 700 + }, + { + "epoch": 0.014032980506968946, + "grad_norm": 1.0962141752243042, + "learning_rate": 4.676450967311542e-06, + "loss": 0.3802, + "step": 701 + }, + { + "epoch": 0.01405299902409729, + "grad_norm": 0.992607831954956, + "learning_rate": 4.683122081387592e-06, + "loss": 0.3136, + "step": 702 + }, + { + "epoch": 0.014073017541225634, + "grad_norm": 1.991264820098877, + "learning_rate": 4.6897931954636425e-06, + "loss": 0.8426, + "step": 703 + }, + { + "epoch": 0.014093036058353978, + "grad_norm": 1.1921156644821167, + "learning_rate": 4.696464309539694e-06, + "loss": 0.3304, + "step": 704 + }, + { + "epoch": 0.014113054575482321, + "grad_norm": 1.2436918020248413, + "learning_rate": 4.703135423615745e-06, + "loss": 0.3738, + "step": 705 + }, + { + "epoch": 0.014133073092610665, + "grad_norm": 1.9215762615203857, + "learning_rate": 4.709806537691795e-06, + "loss": 0.8442, + "step": 706 + }, + { + "epoch": 0.014153091609739009, + "grad_norm": 1.0876973867416382, + "learning_rate": 4.7164776517678455e-06, + "loss": 0.3831, + "step": 707 + }, + { + "epoch": 0.014173110126867353, + "grad_norm": 0.9643149375915527, + "learning_rate": 4.723148765843896e-06, + "loss": 0.3526, + "step": 708 + }, + { + "epoch": 0.014193128643995696, + "grad_norm": 1.1013644933700562, + "learning_rate": 4.729819879919947e-06, + "loss": 0.3093, + "step": 709 + }, + { + "epoch": 0.01421314716112404, + "grad_norm": 1.100867748260498, + "learning_rate": 4.736490993995998e-06, + "loss": 0.3496, + "step": 710 + }, + { + "epoch": 0.014233165678252384, + "grad_norm": 1.0025042295455933, + "learning_rate": 4.7431621080720486e-06, + "loss": 0.3222, + "step": 711 + }, + { + "epoch": 0.014253184195380728, + "grad_norm": 1.0350797176361084, + "learning_rate": 4.749833222148099e-06, + "loss": 0.3032, + "step": 712 + }, + { + "epoch": 0.014273202712509071, + "grad_norm": 1.7595738172531128, + "learning_rate": 4.75650433622415e-06, + "loss": 0.8547, + "step": 713 + }, + { + "epoch": 0.014293221229637415, + "grad_norm": 1.158739447593689, + "learning_rate": 4.7631754503002e-06, + "loss": 0.3528, + "step": 714 + }, + { + "epoch": 0.014313239746765759, + "grad_norm": 1.008568525314331, + "learning_rate": 4.769846564376252e-06, + "loss": 0.3502, + "step": 715 + }, + { + "epoch": 0.014333258263894103, + "grad_norm": 1.2603434324264526, + "learning_rate": 4.776517678452302e-06, + "loss": 0.321, + "step": 716 + }, + { + "epoch": 0.014353276781022446, + "grad_norm": 1.0145790576934814, + "learning_rate": 4.783188792528352e-06, + "loss": 0.3461, + "step": 717 + }, + { + "epoch": 0.01437329529815079, + "grad_norm": 1.1148978471755981, + "learning_rate": 4.7898599066044034e-06, + "loss": 0.3323, + "step": 718 + }, + { + "epoch": 0.014393313815279134, + "grad_norm": 1.0567805767059326, + "learning_rate": 4.796531020680454e-06, + "loss": 0.3346, + "step": 719 + }, + { + "epoch": 0.014413332332407477, + "grad_norm": 1.018010139465332, + "learning_rate": 4.803202134756505e-06, + "loss": 0.3359, + "step": 720 + }, + { + "epoch": 0.014433350849535821, + "grad_norm": 1.066069483757019, + "learning_rate": 4.809873248832555e-06, + "loss": 0.3385, + "step": 721 + }, + { + "epoch": 0.014453369366664165, + "grad_norm": 1.1362656354904175, + "learning_rate": 4.8165443629086065e-06, + "loss": 0.3412, + "step": 722 + }, + { + "epoch": 0.014473387883792509, + "grad_norm": 1.0688517093658447, + "learning_rate": 4.823215476984657e-06, + "loss": 0.3448, + "step": 723 + }, + { + "epoch": 0.014493406400920852, + "grad_norm": 1.0384124517440796, + "learning_rate": 4.829886591060707e-06, + "loss": 0.3349, + "step": 724 + }, + { + "epoch": 0.014513424918049196, + "grad_norm": 1.1227242946624756, + "learning_rate": 4.836557705136758e-06, + "loss": 0.3762, + "step": 725 + }, + { + "epoch": 0.01453344343517754, + "grad_norm": 1.0677860975265503, + "learning_rate": 4.843228819212809e-06, + "loss": 0.3321, + "step": 726 + }, + { + "epoch": 0.014553461952305884, + "grad_norm": 1.1179914474487305, + "learning_rate": 4.84989993328886e-06, + "loss": 0.3971, + "step": 727 + }, + { + "epoch": 0.014573480469434227, + "grad_norm": 1.8414344787597656, + "learning_rate": 4.85657104736491e-06, + "loss": 0.9214, + "step": 728 + }, + { + "epoch": 0.014593498986562571, + "grad_norm": 1.021141529083252, + "learning_rate": 4.8632421614409605e-06, + "loss": 0.3891, + "step": 729 + }, + { + "epoch": 0.014613517503690915, + "grad_norm": 1.040903925895691, + "learning_rate": 4.869913275517012e-06, + "loss": 0.3653, + "step": 730 + }, + { + "epoch": 0.014633536020819259, + "grad_norm": 1.2216596603393555, + "learning_rate": 4.876584389593063e-06, + "loss": 0.325, + "step": 731 + }, + { + "epoch": 0.014653554537947602, + "grad_norm": 1.1543312072753906, + "learning_rate": 4.883255503669113e-06, + "loss": 0.3123, + "step": 732 + }, + { + "epoch": 0.014673573055075946, + "grad_norm": 1.1752945184707642, + "learning_rate": 4.8899266177451635e-06, + "loss": 0.3271, + "step": 733 + }, + { + "epoch": 0.01469359157220429, + "grad_norm": 1.024526596069336, + "learning_rate": 4.896597731821215e-06, + "loss": 0.3384, + "step": 734 + }, + { + "epoch": 0.014713610089332634, + "grad_norm": 1.1389421224594116, + "learning_rate": 4.903268845897266e-06, + "loss": 0.3324, + "step": 735 + }, + { + "epoch": 0.014733628606460976, + "grad_norm": 1.0850285291671753, + "learning_rate": 4.909939959973316e-06, + "loss": 0.3559, + "step": 736 + }, + { + "epoch": 0.01475364712358932, + "grad_norm": 1.0560129880905151, + "learning_rate": 4.9166110740493666e-06, + "loss": 0.2636, + "step": 737 + }, + { + "epoch": 0.014773665640717663, + "grad_norm": 1.0359876155853271, + "learning_rate": 4.923282188125417e-06, + "loss": 0.3056, + "step": 738 + }, + { + "epoch": 0.014793684157846007, + "grad_norm": 1.0231523513793945, + "learning_rate": 4.929953302201468e-06, + "loss": 0.3181, + "step": 739 + }, + { + "epoch": 0.01481370267497435, + "grad_norm": 1.0434857606887817, + "learning_rate": 4.936624416277519e-06, + "loss": 0.2861, + "step": 740 + }, + { + "epoch": 0.014833721192102694, + "grad_norm": 2.046856641769409, + "learning_rate": 4.94329553035357e-06, + "loss": 0.835, + "step": 741 + }, + { + "epoch": 0.014853739709231038, + "grad_norm": 1.1138389110565186, + "learning_rate": 4.94996664442962e-06, + "loss": 0.3898, + "step": 742 + }, + { + "epoch": 0.014873758226359382, + "grad_norm": 1.0657380819320679, + "learning_rate": 4.956637758505671e-06, + "loss": 0.3314, + "step": 743 + }, + { + "epoch": 0.014893776743487725, + "grad_norm": 1.2122976779937744, + "learning_rate": 4.9633088725817215e-06, + "loss": 0.3229, + "step": 744 + }, + { + "epoch": 0.01491379526061607, + "grad_norm": 1.0217492580413818, + "learning_rate": 4.969979986657773e-06, + "loss": 0.3172, + "step": 745 + }, + { + "epoch": 0.014933813777744413, + "grad_norm": 1.0692464113235474, + "learning_rate": 4.976651100733823e-06, + "loss": 0.3031, + "step": 746 + }, + { + "epoch": 0.014953832294872757, + "grad_norm": 1.0635149478912354, + "learning_rate": 4.983322214809873e-06, + "loss": 0.3182, + "step": 747 + }, + { + "epoch": 0.0149738508120011, + "grad_norm": 1.236802101135254, + "learning_rate": 4.9899933288859245e-06, + "loss": 0.3956, + "step": 748 + }, + { + "epoch": 0.014993869329129444, + "grad_norm": 1.1125539541244507, + "learning_rate": 4.996664442961975e-06, + "loss": 0.3091, + "step": 749 + }, + { + "epoch": 0.015013887846257788, + "grad_norm": 2.112318754196167, + "learning_rate": 5.003335557038026e-06, + "loss": 0.3146, + "step": 750 + }, + { + "epoch": 0.015033906363386132, + "grad_norm": 1.4249215126037598, + "learning_rate": 5.010006671114076e-06, + "loss": 0.3732, + "step": 751 + }, + { + "epoch": 0.015053924880514475, + "grad_norm": 1.1134761571884155, + "learning_rate": 5.0166777851901275e-06, + "loss": 0.3749, + "step": 752 + }, + { + "epoch": 0.015073943397642819, + "grad_norm": 1.1651510000228882, + "learning_rate": 5.023348899266179e-06, + "loss": 0.3398, + "step": 753 + }, + { + "epoch": 0.015093961914771163, + "grad_norm": 1.0603492259979248, + "learning_rate": 5.030020013342228e-06, + "loss": 0.3357, + "step": 754 + }, + { + "epoch": 0.015113980431899507, + "grad_norm": 1.0952327251434326, + "learning_rate": 5.036691127418279e-06, + "loss": 0.3361, + "step": 755 + }, + { + "epoch": 0.01513399894902785, + "grad_norm": 1.0937250852584839, + "learning_rate": 5.04336224149433e-06, + "loss": 0.3747, + "step": 756 + }, + { + "epoch": 0.015154017466156194, + "grad_norm": 1.0588411092758179, + "learning_rate": 5.050033355570381e-06, + "loss": 0.3545, + "step": 757 + }, + { + "epoch": 0.015174035983284538, + "grad_norm": 1.1448148488998413, + "learning_rate": 5.056704469646432e-06, + "loss": 0.3715, + "step": 758 + }, + { + "epoch": 0.015194054500412882, + "grad_norm": 0.9355542063713074, + "learning_rate": 5.0633755837224815e-06, + "loss": 0.3293, + "step": 759 + }, + { + "epoch": 0.015214073017541225, + "grad_norm": 1.128651738166809, + "learning_rate": 5.070046697798533e-06, + "loss": 0.3323, + "step": 760 + }, + { + "epoch": 0.015234091534669569, + "grad_norm": 1.889409065246582, + "learning_rate": 5.076717811874583e-06, + "loss": 0.7897, + "step": 761 + }, + { + "epoch": 0.015254110051797913, + "grad_norm": 1.0051321983337402, + "learning_rate": 5.083388925950634e-06, + "loss": 0.3137, + "step": 762 + }, + { + "epoch": 0.015274128568926256, + "grad_norm": 1.1863850355148315, + "learning_rate": 5.0900600400266854e-06, + "loss": 0.3347, + "step": 763 + }, + { + "epoch": 0.0152941470860546, + "grad_norm": 1.1127883195877075, + "learning_rate": 5.096731154102735e-06, + "loss": 0.3095, + "step": 764 + }, + { + "epoch": 0.015314165603182944, + "grad_norm": 1.0931371450424194, + "learning_rate": 5.103402268178786e-06, + "loss": 0.3201, + "step": 765 + }, + { + "epoch": 0.015334184120311288, + "grad_norm": 1.4586753845214844, + "learning_rate": 5.1100733822548364e-06, + "loss": 0.317, + "step": 766 + }, + { + "epoch": 0.015354202637439631, + "grad_norm": 1.1003224849700928, + "learning_rate": 5.116744496330888e-06, + "loss": 0.3103, + "step": 767 + }, + { + "epoch": 0.015374221154567975, + "grad_norm": 1.2381736040115356, + "learning_rate": 5.123415610406939e-06, + "loss": 0.3553, + "step": 768 + }, + { + "epoch": 0.015394239671696319, + "grad_norm": 1.236075758934021, + "learning_rate": 5.130086724482989e-06, + "loss": 0.3185, + "step": 769 + }, + { + "epoch": 0.015414258188824663, + "grad_norm": 1.128480076789856, + "learning_rate": 5.13675783855904e-06, + "loss": 0.3732, + "step": 770 + }, + { + "epoch": 0.015434276705953006, + "grad_norm": 1.1019165515899658, + "learning_rate": 5.14342895263509e-06, + "loss": 0.3249, + "step": 771 + }, + { + "epoch": 0.01545429522308135, + "grad_norm": 1.0681803226470947, + "learning_rate": 5.150100066711141e-06, + "loss": 0.367, + "step": 772 + }, + { + "epoch": 0.015474313740209694, + "grad_norm": 1.8151113986968994, + "learning_rate": 5.156771180787192e-06, + "loss": 0.8764, + "step": 773 + }, + { + "epoch": 0.015494332257338038, + "grad_norm": 1.1761078834533691, + "learning_rate": 5.1634422948632425e-06, + "loss": 0.3354, + "step": 774 + }, + { + "epoch": 0.015514350774466381, + "grad_norm": 1.213462233543396, + "learning_rate": 5.170113408939294e-06, + "loss": 0.3215, + "step": 775 + }, + { + "epoch": 0.015534369291594725, + "grad_norm": 1.9677165746688843, + "learning_rate": 5.176784523015343e-06, + "loss": 0.886, + "step": 776 + }, + { + "epoch": 0.015554387808723069, + "grad_norm": 1.0188323259353638, + "learning_rate": 5.183455637091394e-06, + "loss": 0.3407, + "step": 777 + }, + { + "epoch": 0.015574406325851413, + "grad_norm": 0.9662430286407471, + "learning_rate": 5.1901267511674455e-06, + "loss": 0.3357, + "step": 778 + }, + { + "epoch": 0.015594424842979756, + "grad_norm": 1.083368182182312, + "learning_rate": 5.196797865243496e-06, + "loss": 0.3388, + "step": 779 + }, + { + "epoch": 0.0156144433601081, + "grad_norm": 1.0954108238220215, + "learning_rate": 5.203468979319547e-06, + "loss": 0.3338, + "step": 780 + }, + { + "epoch": 0.015634461877236442, + "grad_norm": 1.208626389503479, + "learning_rate": 5.210140093395597e-06, + "loss": 0.3144, + "step": 781 + }, + { + "epoch": 0.015654480394364786, + "grad_norm": 1.1051464080810547, + "learning_rate": 5.2168112074716486e-06, + "loss": 0.3316, + "step": 782 + }, + { + "epoch": 0.01567449891149313, + "grad_norm": 1.0029922723770142, + "learning_rate": 5.223482321547699e-06, + "loss": 0.3358, + "step": 783 + }, + { + "epoch": 0.015694517428621473, + "grad_norm": 1.0703754425048828, + "learning_rate": 5.230153435623749e-06, + "loss": 0.3042, + "step": 784 + }, + { + "epoch": 0.015714535945749817, + "grad_norm": 1.2437200546264648, + "learning_rate": 5.2368245496998e-06, + "loss": 0.3599, + "step": 785 + }, + { + "epoch": 0.01573455446287816, + "grad_norm": 1.0184565782546997, + "learning_rate": 5.243495663775851e-06, + "loss": 0.3131, + "step": 786 + }, + { + "epoch": 0.015754572980006504, + "grad_norm": 1.9161826372146606, + "learning_rate": 5.250166777851902e-06, + "loss": 0.8341, + "step": 787 + }, + { + "epoch": 0.015774591497134848, + "grad_norm": 0.9633497595787048, + "learning_rate": 5.256837891927953e-06, + "loss": 0.3205, + "step": 788 + }, + { + "epoch": 0.015794610014263192, + "grad_norm": 1.0221731662750244, + "learning_rate": 5.263509006004003e-06, + "loss": 0.2977, + "step": 789 + }, + { + "epoch": 0.015814628531391536, + "grad_norm": 1.08966064453125, + "learning_rate": 5.270180120080054e-06, + "loss": 0.3069, + "step": 790 + }, + { + "epoch": 0.01583464704851988, + "grad_norm": 1.850947380065918, + "learning_rate": 5.276851234156104e-06, + "loss": 0.8542, + "step": 791 + }, + { + "epoch": 0.015854665565648223, + "grad_norm": 1.013253092765808, + "learning_rate": 5.283522348232155e-06, + "loss": 0.3516, + "step": 792 + }, + { + "epoch": 0.015874684082776567, + "grad_norm": 1.0138709545135498, + "learning_rate": 5.2901934623082065e-06, + "loss": 0.3402, + "step": 793 + }, + { + "epoch": 0.01589470259990491, + "grad_norm": 1.137337327003479, + "learning_rate": 5.296864576384256e-06, + "loss": 0.3566, + "step": 794 + }, + { + "epoch": 0.015914721117033254, + "grad_norm": 1.0801748037338257, + "learning_rate": 5.303535690460307e-06, + "loss": 0.2875, + "step": 795 + }, + { + "epoch": 0.015934739634161598, + "grad_norm": 1.1423110961914062, + "learning_rate": 5.3102068045363575e-06, + "loss": 0.3712, + "step": 796 + }, + { + "epoch": 0.015954758151289942, + "grad_norm": 1.172385811805725, + "learning_rate": 5.316877918612409e-06, + "loss": 0.3909, + "step": 797 + }, + { + "epoch": 0.015974776668418286, + "grad_norm": 1.0006663799285889, + "learning_rate": 5.32354903268846e-06, + "loss": 0.2855, + "step": 798 + }, + { + "epoch": 0.01599479518554663, + "grad_norm": 1.3470708131790161, + "learning_rate": 5.33022014676451e-06, + "loss": 0.3509, + "step": 799 + }, + { + "epoch": 0.016014813702674973, + "grad_norm": 1.1017462015151978, + "learning_rate": 5.336891260840561e-06, + "loss": 0.3158, + "step": 800 + }, + { + "epoch": 0.016034832219803317, + "grad_norm": 1.890378475189209, + "learning_rate": 5.343562374916611e-06, + "loss": 0.851, + "step": 801 + }, + { + "epoch": 0.01605485073693166, + "grad_norm": 1.2325584888458252, + "learning_rate": 5.350233488992662e-06, + "loss": 0.3544, + "step": 802 + }, + { + "epoch": 0.016074869254060004, + "grad_norm": 1.092528223991394, + "learning_rate": 5.356904603068713e-06, + "loss": 0.319, + "step": 803 + }, + { + "epoch": 0.016094887771188348, + "grad_norm": 1.9685660600662231, + "learning_rate": 5.3635757171447635e-06, + "loss": 0.8865, + "step": 804 + }, + { + "epoch": 0.01611490628831669, + "grad_norm": 1.0674697160720825, + "learning_rate": 5.370246831220815e-06, + "loss": 0.3068, + "step": 805 + }, + { + "epoch": 0.016134924805445035, + "grad_norm": 1.135088562965393, + "learning_rate": 5.376917945296864e-06, + "loss": 0.3236, + "step": 806 + }, + { + "epoch": 0.01615494332257338, + "grad_norm": 1.0648424625396729, + "learning_rate": 5.383589059372915e-06, + "loss": 0.3562, + "step": 807 + }, + { + "epoch": 0.016174961839701723, + "grad_norm": 1.0895888805389404, + "learning_rate": 5.3902601734489666e-06, + "loss": 0.3702, + "step": 808 + }, + { + "epoch": 0.016194980356830067, + "grad_norm": 1.9647891521453857, + "learning_rate": 5.396931287525017e-06, + "loss": 0.8728, + "step": 809 + }, + { + "epoch": 0.01621499887395841, + "grad_norm": 1.0998228788375854, + "learning_rate": 5.403602401601068e-06, + "loss": 0.3268, + "step": 810 + }, + { + "epoch": 0.016235017391086754, + "grad_norm": 1.070011854171753, + "learning_rate": 5.410273515677118e-06, + "loss": 0.3296, + "step": 811 + }, + { + "epoch": 0.016255035908215098, + "grad_norm": 1.0291463136672974, + "learning_rate": 5.416944629753169e-06, + "loss": 0.3355, + "step": 812 + }, + { + "epoch": 0.01627505442534344, + "grad_norm": 1.1080970764160156, + "learning_rate": 5.42361574382922e-06, + "loss": 0.3381, + "step": 813 + }, + { + "epoch": 0.016295072942471785, + "grad_norm": 1.1334047317504883, + "learning_rate": 5.43028685790527e-06, + "loss": 0.3532, + "step": 814 + }, + { + "epoch": 0.01631509145960013, + "grad_norm": 1.223921298980713, + "learning_rate": 5.4369579719813214e-06, + "loss": 0.3477, + "step": 815 + }, + { + "epoch": 0.016335109976728473, + "grad_norm": 1.1393804550170898, + "learning_rate": 5.443629086057372e-06, + "loss": 0.3329, + "step": 816 + }, + { + "epoch": 0.016355128493856817, + "grad_norm": 1.1745020151138306, + "learning_rate": 5.450300200133423e-06, + "loss": 0.3145, + "step": 817 + }, + { + "epoch": 0.01637514701098516, + "grad_norm": 1.0427478551864624, + "learning_rate": 5.456971314209474e-06, + "loss": 0.2978, + "step": 818 + }, + { + "epoch": 0.016395165528113504, + "grad_norm": 1.0486774444580078, + "learning_rate": 5.463642428285524e-06, + "loss": 0.3521, + "step": 819 + }, + { + "epoch": 0.016415184045241848, + "grad_norm": 1.0728741884231567, + "learning_rate": 5.470313542361575e-06, + "loss": 0.2964, + "step": 820 + }, + { + "epoch": 0.01643520256237019, + "grad_norm": 0.9963114261627197, + "learning_rate": 5.476984656437625e-06, + "loss": 0.313, + "step": 821 + }, + { + "epoch": 0.016455221079498535, + "grad_norm": 1.8569154739379883, + "learning_rate": 5.483655770513676e-06, + "loss": 0.8526, + "step": 822 + }, + { + "epoch": 0.01647523959662688, + "grad_norm": 1.048340082168579, + "learning_rate": 5.4903268845897275e-06, + "loss": 0.3195, + "step": 823 + }, + { + "epoch": 0.016495258113755223, + "grad_norm": 1.846267580986023, + "learning_rate": 5.496997998665777e-06, + "loss": 0.8736, + "step": 824 + }, + { + "epoch": 0.016515276630883566, + "grad_norm": 1.1590656042099, + "learning_rate": 5.503669112741828e-06, + "loss": 0.3317, + "step": 825 + }, + { + "epoch": 0.01653529514801191, + "grad_norm": 1.163628339767456, + "learning_rate": 5.5103402268178785e-06, + "loss": 0.3596, + "step": 826 + }, + { + "epoch": 0.016555313665140254, + "grad_norm": 1.8624252080917358, + "learning_rate": 5.51701134089393e-06, + "loss": 0.8068, + "step": 827 + }, + { + "epoch": 0.016575332182268598, + "grad_norm": 0.9878004789352417, + "learning_rate": 5.523682454969981e-06, + "loss": 0.3065, + "step": 828 + }, + { + "epoch": 0.01659535069939694, + "grad_norm": 1.0852710008621216, + "learning_rate": 5.530353569046031e-06, + "loss": 0.3321, + "step": 829 + }, + { + "epoch": 0.016615369216525285, + "grad_norm": 1.061721682548523, + "learning_rate": 5.537024683122082e-06, + "loss": 0.3265, + "step": 830 + }, + { + "epoch": 0.01663538773365363, + "grad_norm": 1.8287514448165894, + "learning_rate": 5.543695797198132e-06, + "loss": 0.8665, + "step": 831 + }, + { + "epoch": 0.016655406250781973, + "grad_norm": 1.0924748182296753, + "learning_rate": 5.550366911274183e-06, + "loss": 0.2878, + "step": 832 + }, + { + "epoch": 0.016675424767910316, + "grad_norm": 1.0256153345108032, + "learning_rate": 5.557038025350234e-06, + "loss": 0.3306, + "step": 833 + }, + { + "epoch": 0.01669544328503866, + "grad_norm": 1.119415283203125, + "learning_rate": 5.5637091394262846e-06, + "loss": 0.3321, + "step": 834 + }, + { + "epoch": 0.016715461802167004, + "grad_norm": 1.0442734956741333, + "learning_rate": 5.570380253502336e-06, + "loss": 0.3713, + "step": 835 + }, + { + "epoch": 0.016735480319295348, + "grad_norm": 0.9835049510002136, + "learning_rate": 5.577051367578385e-06, + "loss": 0.323, + "step": 836 + }, + { + "epoch": 0.01675549883642369, + "grad_norm": 1.281340479850769, + "learning_rate": 5.583722481654436e-06, + "loss": 0.3491, + "step": 837 + }, + { + "epoch": 0.016775517353552035, + "grad_norm": 1.1203947067260742, + "learning_rate": 5.590393595730488e-06, + "loss": 0.312, + "step": 838 + }, + { + "epoch": 0.01679553587068038, + "grad_norm": 1.0834065675735474, + "learning_rate": 5.597064709806538e-06, + "loss": 0.307, + "step": 839 + }, + { + "epoch": 0.016815554387808723, + "grad_norm": 1.1250184774398804, + "learning_rate": 5.603735823882589e-06, + "loss": 0.3112, + "step": 840 + }, + { + "epoch": 0.016835572904937066, + "grad_norm": 1.63748300075531, + "learning_rate": 5.610406937958639e-06, + "loss": 0.8391, + "step": 841 + }, + { + "epoch": 0.01685559142206541, + "grad_norm": 1.1188149452209473, + "learning_rate": 5.61707805203469e-06, + "loss": 0.3486, + "step": 842 + }, + { + "epoch": 0.016875609939193754, + "grad_norm": 1.0218806266784668, + "learning_rate": 5.623749166110741e-06, + "loss": 0.3438, + "step": 843 + }, + { + "epoch": 0.016895628456322098, + "grad_norm": 0.968826949596405, + "learning_rate": 5.630420280186791e-06, + "loss": 0.2733, + "step": 844 + }, + { + "epoch": 0.01691564697345044, + "grad_norm": 1.172285795211792, + "learning_rate": 5.6370913942628425e-06, + "loss": 0.3537, + "step": 845 + }, + { + "epoch": 0.016935665490578785, + "grad_norm": 1.1464693546295166, + "learning_rate": 5.643762508338894e-06, + "loss": 0.3476, + "step": 846 + }, + { + "epoch": 0.01695568400770713, + "grad_norm": 1.1187208890914917, + "learning_rate": 5.650433622414944e-06, + "loss": 0.357, + "step": 847 + }, + { + "epoch": 0.016975702524835472, + "grad_norm": 1.7815724611282349, + "learning_rate": 5.657104736490995e-06, + "loss": 0.8621, + "step": 848 + }, + { + "epoch": 0.016995721041963816, + "grad_norm": 1.0849648714065552, + "learning_rate": 5.663775850567045e-06, + "loss": 0.3428, + "step": 849 + }, + { + "epoch": 0.01701573955909216, + "grad_norm": 1.0064018964767456, + "learning_rate": 5.670446964643096e-06, + "loss": 0.3226, + "step": 850 + }, + { + "epoch": 0.017035758076220504, + "grad_norm": 1.9144442081451416, + "learning_rate": 5.677118078719147e-06, + "loss": 0.8369, + "step": 851 + }, + { + "epoch": 0.017055776593348847, + "grad_norm": 1.4115127325057983, + "learning_rate": 5.683789192795197e-06, + "loss": 0.4057, + "step": 852 + }, + { + "epoch": 0.01707579511047719, + "grad_norm": 1.061948537826538, + "learning_rate": 5.6904603068712485e-06, + "loss": 0.3562, + "step": 853 + }, + { + "epoch": 0.017095813627605535, + "grad_norm": 1.1567476987838745, + "learning_rate": 5.697131420947298e-06, + "loss": 0.326, + "step": 854 + }, + { + "epoch": 0.01711583214473388, + "grad_norm": 1.1022838354110718, + "learning_rate": 5.703802535023349e-06, + "loss": 0.3449, + "step": 855 + }, + { + "epoch": 0.017135850661862222, + "grad_norm": 1.828279972076416, + "learning_rate": 5.7104736490994e-06, + "loss": 0.8465, + "step": 856 + }, + { + "epoch": 0.017155869178990566, + "grad_norm": 1.3594900369644165, + "learning_rate": 5.717144763175451e-06, + "loss": 0.404, + "step": 857 + }, + { + "epoch": 0.01717588769611891, + "grad_norm": 1.13335120677948, + "learning_rate": 5.723815877251502e-06, + "loss": 0.3562, + "step": 858 + }, + { + "epoch": 0.017195906213247254, + "grad_norm": 1.055601716041565, + "learning_rate": 5.730486991327551e-06, + "loss": 0.353, + "step": 859 + }, + { + "epoch": 0.017215924730375597, + "grad_norm": 1.19294011592865, + "learning_rate": 5.737158105403603e-06, + "loss": 0.3961, + "step": 860 + }, + { + "epoch": 0.01723594324750394, + "grad_norm": 1.0711638927459717, + "learning_rate": 5.743829219479654e-06, + "loss": 0.3471, + "step": 861 + }, + { + "epoch": 0.017255961764632285, + "grad_norm": 1.9245998859405518, + "learning_rate": 5.750500333555704e-06, + "loss": 0.7856, + "step": 862 + }, + { + "epoch": 0.01727598028176063, + "grad_norm": 1.0810885429382324, + "learning_rate": 5.757171447631755e-06, + "loss": 0.3157, + "step": 863 + }, + { + "epoch": 0.017295998798888972, + "grad_norm": 1.0890722274780273, + "learning_rate": 5.763842561707806e-06, + "loss": 0.3725, + "step": 864 + }, + { + "epoch": 0.017316017316017316, + "grad_norm": 1.1739184856414795, + "learning_rate": 5.770513675783857e-06, + "loss": 0.3351, + "step": 865 + }, + { + "epoch": 0.01733603583314566, + "grad_norm": 1.0819408893585205, + "learning_rate": 5.777184789859908e-06, + "loss": 0.3573, + "step": 866 + }, + { + "epoch": 0.017356054350274003, + "grad_norm": 1.0698814392089844, + "learning_rate": 5.7838559039359575e-06, + "loss": 0.3535, + "step": 867 + }, + { + "epoch": 0.017376072867402347, + "grad_norm": 1.1386035680770874, + "learning_rate": 5.790527018012009e-06, + "loss": 0.3648, + "step": 868 + }, + { + "epoch": 0.01739609138453069, + "grad_norm": 1.056754231452942, + "learning_rate": 5.797198132088059e-06, + "loss": 0.3491, + "step": 869 + }, + { + "epoch": 0.017416109901659035, + "grad_norm": 1.1884483098983765, + "learning_rate": 5.80386924616411e-06, + "loss": 0.3127, + "step": 870 + }, + { + "epoch": 0.01743612841878738, + "grad_norm": 1.1252326965332031, + "learning_rate": 5.810540360240161e-06, + "loss": 0.324, + "step": 871 + }, + { + "epoch": 0.017456146935915722, + "grad_norm": 1.8834737539291382, + "learning_rate": 5.817211474316211e-06, + "loss": 0.8654, + "step": 872 + }, + { + "epoch": 0.017476165453044066, + "grad_norm": 1.0128881931304932, + "learning_rate": 5.823882588392262e-06, + "loss": 0.3036, + "step": 873 + }, + { + "epoch": 0.01749618397017241, + "grad_norm": 1.038265585899353, + "learning_rate": 5.830553702468312e-06, + "loss": 0.3201, + "step": 874 + }, + { + "epoch": 0.017516202487300753, + "grad_norm": 1.0621566772460938, + "learning_rate": 5.8372248165443635e-06, + "loss": 0.3186, + "step": 875 + }, + { + "epoch": 0.017536221004429097, + "grad_norm": 0.9577281475067139, + "learning_rate": 5.843895930620415e-06, + "loss": 0.321, + "step": 876 + }, + { + "epoch": 0.01755623952155744, + "grad_norm": 1.038634181022644, + "learning_rate": 5.850567044696465e-06, + "loss": 0.2912, + "step": 877 + }, + { + "epoch": 0.017576258038685785, + "grad_norm": 1.0303009748458862, + "learning_rate": 5.857238158772515e-06, + "loss": 0.3127, + "step": 878 + }, + { + "epoch": 0.01759627655581413, + "grad_norm": 1.0453342199325562, + "learning_rate": 5.863909272848566e-06, + "loss": 0.327, + "step": 879 + }, + { + "epoch": 0.017616295072942472, + "grad_norm": 1.2400598526000977, + "learning_rate": 5.870580386924617e-06, + "loss": 0.3506, + "step": 880 + }, + { + "epoch": 0.017636313590070816, + "grad_norm": 1.0996888875961304, + "learning_rate": 5.877251501000668e-06, + "loss": 0.3781, + "step": 881 + }, + { + "epoch": 0.01765633210719916, + "grad_norm": 1.1969300508499146, + "learning_rate": 5.883922615076718e-06, + "loss": 0.3696, + "step": 882 + }, + { + "epoch": 0.017676350624327503, + "grad_norm": 0.988248884677887, + "learning_rate": 5.89059372915277e-06, + "loss": 0.3131, + "step": 883 + }, + { + "epoch": 0.017696369141455847, + "grad_norm": 1.0559196472167969, + "learning_rate": 5.897264843228819e-06, + "loss": 0.3277, + "step": 884 + }, + { + "epoch": 0.01771638765858419, + "grad_norm": 1.0820832252502441, + "learning_rate": 5.90393595730487e-06, + "loss": 0.3923, + "step": 885 + }, + { + "epoch": 0.017736406175712534, + "grad_norm": 1.15842866897583, + "learning_rate": 5.9106070713809214e-06, + "loss": 0.3186, + "step": 886 + }, + { + "epoch": 0.017756424692840878, + "grad_norm": 1.099192500114441, + "learning_rate": 5.917278185456972e-06, + "loss": 0.3097, + "step": 887 + }, + { + "epoch": 0.017776443209969222, + "grad_norm": 0.9573950171470642, + "learning_rate": 5.923949299533023e-06, + "loss": 0.3201, + "step": 888 + }, + { + "epoch": 0.017796461727097566, + "grad_norm": 1.0747041702270508, + "learning_rate": 5.9306204136090724e-06, + "loss": 0.3732, + "step": 889 + }, + { + "epoch": 0.01781648024422591, + "grad_norm": 1.835673451423645, + "learning_rate": 5.937291527685124e-06, + "loss": 0.8355, + "step": 890 + }, + { + "epoch": 0.017836498761354253, + "grad_norm": 1.0603370666503906, + "learning_rate": 5.943962641761175e-06, + "loss": 0.3259, + "step": 891 + }, + { + "epoch": 0.017856517278482597, + "grad_norm": 1.0948320627212524, + "learning_rate": 5.950633755837225e-06, + "loss": 0.3565, + "step": 892 + }, + { + "epoch": 0.01787653579561094, + "grad_norm": 1.133957862854004, + "learning_rate": 5.957304869913276e-06, + "loss": 0.3059, + "step": 893 + }, + { + "epoch": 0.017896554312739284, + "grad_norm": 1.07373046875, + "learning_rate": 5.963975983989327e-06, + "loss": 0.3926, + "step": 894 + }, + { + "epoch": 0.017916572829867628, + "grad_norm": 1.0662471055984497, + "learning_rate": 5.970647098065378e-06, + "loss": 0.3239, + "step": 895 + }, + { + "epoch": 0.017936591346995972, + "grad_norm": 1.2440329790115356, + "learning_rate": 5.977318212141429e-06, + "loss": 0.3442, + "step": 896 + }, + { + "epoch": 0.017956609864124316, + "grad_norm": 1.2145576477050781, + "learning_rate": 5.9839893262174785e-06, + "loss": 0.3329, + "step": 897 + }, + { + "epoch": 0.01797662838125266, + "grad_norm": 1.0369491577148438, + "learning_rate": 5.99066044029353e-06, + "loss": 0.3256, + "step": 898 + }, + { + "epoch": 0.017996646898381003, + "grad_norm": 1.9298086166381836, + "learning_rate": 5.99733155436958e-06, + "loss": 0.8823, + "step": 899 + }, + { + "epoch": 0.018016665415509347, + "grad_norm": 1.1741888523101807, + "learning_rate": 6.004002668445631e-06, + "loss": 0.3611, + "step": 900 + }, + { + "epoch": 0.01803668393263769, + "grad_norm": 1.7162913084030151, + "learning_rate": 6.010673782521682e-06, + "loss": 0.8396, + "step": 901 + }, + { + "epoch": 0.018056702449766034, + "grad_norm": 1.093433141708374, + "learning_rate": 6.017344896597732e-06, + "loss": 0.3496, + "step": 902 + }, + { + "epoch": 0.018076720966894378, + "grad_norm": 1.0372273921966553, + "learning_rate": 6.024016010673783e-06, + "loss": 0.3534, + "step": 903 + }, + { + "epoch": 0.018096739484022722, + "grad_norm": 1.0775638818740845, + "learning_rate": 6.030687124749833e-06, + "loss": 0.3203, + "step": 904 + }, + { + "epoch": 0.018116758001151066, + "grad_norm": 1.1541810035705566, + "learning_rate": 6.0373582388258846e-06, + "loss": 0.3279, + "step": 905 + }, + { + "epoch": 0.01813677651827941, + "grad_norm": 1.058952808380127, + "learning_rate": 6.044029352901936e-06, + "loss": 0.3624, + "step": 906 + }, + { + "epoch": 0.018156795035407753, + "grad_norm": 1.1640199422836304, + "learning_rate": 6.050700466977985e-06, + "loss": 0.3383, + "step": 907 + }, + { + "epoch": 0.018176813552536097, + "grad_norm": 1.7934690713882446, + "learning_rate": 6.057371581054036e-06, + "loss": 0.8363, + "step": 908 + }, + { + "epoch": 0.01819683206966444, + "grad_norm": 0.9772834777832031, + "learning_rate": 6.064042695130087e-06, + "loss": 0.277, + "step": 909 + }, + { + "epoch": 0.018216850586792784, + "grad_norm": 1.0080132484436035, + "learning_rate": 6.070713809206138e-06, + "loss": 0.3153, + "step": 910 + }, + { + "epoch": 0.018236869103921128, + "grad_norm": 1.0553396940231323, + "learning_rate": 6.077384923282189e-06, + "loss": 0.2676, + "step": 911 + }, + { + "epoch": 0.01825688762104947, + "grad_norm": 1.0713484287261963, + "learning_rate": 6.0840560373582394e-06, + "loss": 0.3033, + "step": 912 + }, + { + "epoch": 0.018276906138177815, + "grad_norm": 1.8927271366119385, + "learning_rate": 6.090727151434291e-06, + "loss": 0.8535, + "step": 913 + }, + { + "epoch": 0.01829692465530616, + "grad_norm": 1.080771803855896, + "learning_rate": 6.09739826551034e-06, + "loss": 0.363, + "step": 914 + }, + { + "epoch": 0.018316943172434503, + "grad_norm": 1.006669521331787, + "learning_rate": 6.104069379586391e-06, + "loss": 0.3887, + "step": 915 + }, + { + "epoch": 0.018336961689562847, + "grad_norm": 1.1825398206710815, + "learning_rate": 6.1107404936624425e-06, + "loss": 0.3152, + "step": 916 + }, + { + "epoch": 0.01835698020669119, + "grad_norm": 1.1862787008285522, + "learning_rate": 6.117411607738493e-06, + "loss": 0.3439, + "step": 917 + }, + { + "epoch": 0.018376998723819534, + "grad_norm": 1.0685172080993652, + "learning_rate": 6.124082721814544e-06, + "loss": 0.3684, + "step": 918 + }, + { + "epoch": 0.018397017240947878, + "grad_norm": 1.0573515892028809, + "learning_rate": 6.1307538358905935e-06, + "loss": 0.3116, + "step": 919 + }, + { + "epoch": 0.01841703575807622, + "grad_norm": 1.1212364435195923, + "learning_rate": 6.137424949966645e-06, + "loss": 0.372, + "step": 920 + }, + { + "epoch": 0.018437054275204565, + "grad_norm": 1.0330435037612915, + "learning_rate": 6.144096064042696e-06, + "loss": 0.3654, + "step": 921 + }, + { + "epoch": 0.01845707279233291, + "grad_norm": 1.0125515460968018, + "learning_rate": 6.150767178118746e-06, + "loss": 0.3132, + "step": 922 + }, + { + "epoch": 0.018477091309461253, + "grad_norm": 1.049013614654541, + "learning_rate": 6.157438292194797e-06, + "loss": 0.3357, + "step": 923 + }, + { + "epoch": 0.018497109826589597, + "grad_norm": 1.0915933847427368, + "learning_rate": 6.164109406270848e-06, + "loss": 0.2994, + "step": 924 + }, + { + "epoch": 0.01851712834371794, + "grad_norm": 1.3991899490356445, + "learning_rate": 6.170780520346899e-06, + "loss": 0.3262, + "step": 925 + }, + { + "epoch": 0.018537146860846284, + "grad_norm": 1.282048225402832, + "learning_rate": 6.177451634422949e-06, + "loss": 0.3709, + "step": 926 + }, + { + "epoch": 0.018557165377974628, + "grad_norm": 1.0293883085250854, + "learning_rate": 6.1841227484989995e-06, + "loss": 0.2991, + "step": 927 + }, + { + "epoch": 0.01857718389510297, + "grad_norm": 1.8715457916259766, + "learning_rate": 6.190793862575051e-06, + "loss": 0.8594, + "step": 928 + }, + { + "epoch": 0.018597202412231315, + "grad_norm": 1.2297749519348145, + "learning_rate": 6.197464976651101e-06, + "loss": 0.2859, + "step": 929 + }, + { + "epoch": 0.01861722092935966, + "grad_norm": 1.2872673273086548, + "learning_rate": 6.204136090727152e-06, + "loss": 0.3637, + "step": 930 + }, + { + "epoch": 0.018637239446488003, + "grad_norm": 1.9366660118103027, + "learning_rate": 6.210807204803203e-06, + "loss": 0.8748, + "step": 931 + }, + { + "epoch": 0.018657257963616346, + "grad_norm": 1.0404589176177979, + "learning_rate": 6.217478318879253e-06, + "loss": 0.3492, + "step": 932 + }, + { + "epoch": 0.01867727648074469, + "grad_norm": 1.3305368423461914, + "learning_rate": 6.224149432955304e-06, + "loss": 0.3564, + "step": 933 + }, + { + "epoch": 0.018697294997873034, + "grad_norm": 1.185019612312317, + "learning_rate": 6.2308205470313544e-06, + "loss": 0.3235, + "step": 934 + }, + { + "epoch": 0.018717313515001378, + "grad_norm": 1.2668317556381226, + "learning_rate": 6.237491661107406e-06, + "loss": 0.3299, + "step": 935 + }, + { + "epoch": 0.01873733203212972, + "grad_norm": 1.1652400493621826, + "learning_rate": 6.244162775183457e-06, + "loss": 0.3093, + "step": 936 + }, + { + "epoch": 0.018757350549258065, + "grad_norm": 1.0012394189834595, + "learning_rate": 6.250833889259506e-06, + "loss": 0.3091, + "step": 937 + }, + { + "epoch": 0.01877736906638641, + "grad_norm": 1.8712888956069946, + "learning_rate": 6.2575050033355575e-06, + "loss": 0.9182, + "step": 938 + }, + { + "epoch": 0.018797387583514753, + "grad_norm": 1.0119309425354004, + "learning_rate": 6.264176117411608e-06, + "loss": 0.3314, + "step": 939 + }, + { + "epoch": 0.018817406100643096, + "grad_norm": 1.2637819051742554, + "learning_rate": 6.270847231487659e-06, + "loss": 0.352, + "step": 940 + }, + { + "epoch": 0.01883742461777144, + "grad_norm": 1.0763734579086304, + "learning_rate": 6.27751834556371e-06, + "loss": 0.4039, + "step": 941 + }, + { + "epoch": 0.018857443134899784, + "grad_norm": 1.9068410396575928, + "learning_rate": 6.2841894596397605e-06, + "loss": 0.8934, + "step": 942 + }, + { + "epoch": 0.018877461652028128, + "grad_norm": 0.9878477454185486, + "learning_rate": 6.290860573715812e-06, + "loss": 0.3323, + "step": 943 + }, + { + "epoch": 0.01889748016915647, + "grad_norm": 1.0765917301177979, + "learning_rate": 6.297531687791861e-06, + "loss": 0.298, + "step": 944 + }, + { + "epoch": 0.018917498686284815, + "grad_norm": 1.095545768737793, + "learning_rate": 6.304202801867912e-06, + "loss": 0.333, + "step": 945 + }, + { + "epoch": 0.01893751720341316, + "grad_norm": 1.1114487648010254, + "learning_rate": 6.3108739159439635e-06, + "loss": 0.3393, + "step": 946 + }, + { + "epoch": 0.018957535720541503, + "grad_norm": 1.0850645303726196, + "learning_rate": 6.317545030020014e-06, + "loss": 0.2884, + "step": 947 + }, + { + "epoch": 0.018977554237669846, + "grad_norm": 1.0461398363113403, + "learning_rate": 6.324216144096065e-06, + "loss": 0.3354, + "step": 948 + }, + { + "epoch": 0.01899757275479819, + "grad_norm": 1.063124179840088, + "learning_rate": 6.3308872581721145e-06, + "loss": 0.3637, + "step": 949 + }, + { + "epoch": 0.019017591271926534, + "grad_norm": 1.225517749786377, + "learning_rate": 6.337558372248166e-06, + "loss": 0.3733, + "step": 950 + }, + { + "epoch": 0.019037609789054877, + "grad_norm": 1.1174752712249756, + "learning_rate": 6.344229486324217e-06, + "loss": 0.2993, + "step": 951 + }, + { + "epoch": 0.01905762830618322, + "grad_norm": 0.9823200702667236, + "learning_rate": 6.350900600400267e-06, + "loss": 0.3038, + "step": 952 + }, + { + "epoch": 0.01907764682331156, + "grad_norm": 1.1508935689926147, + "learning_rate": 6.357571714476318e-06, + "loss": 0.3397, + "step": 953 + }, + { + "epoch": 0.019097665340439905, + "grad_norm": 1.0612282752990723, + "learning_rate": 6.364242828552368e-06, + "loss": 0.3142, + "step": 954 + }, + { + "epoch": 0.01911768385756825, + "grad_norm": 1.1840678453445435, + "learning_rate": 6.370913942628419e-06, + "loss": 0.3629, + "step": 955 + }, + { + "epoch": 0.019137702374696593, + "grad_norm": 1.173342227935791, + "learning_rate": 6.37758505670447e-06, + "loss": 0.3129, + "step": 956 + }, + { + "epoch": 0.019157720891824936, + "grad_norm": 1.0196101665496826, + "learning_rate": 6.384256170780521e-06, + "loss": 0.3358, + "step": 957 + }, + { + "epoch": 0.01917773940895328, + "grad_norm": 1.1059520244598389, + "learning_rate": 6.390927284856572e-06, + "loss": 0.2866, + "step": 958 + }, + { + "epoch": 0.019197757926081624, + "grad_norm": 1.1599034070968628, + "learning_rate": 6.397598398932622e-06, + "loss": 0.3277, + "step": 959 + }, + { + "epoch": 0.019217776443209968, + "grad_norm": 1.0359245538711548, + "learning_rate": 6.404269513008673e-06, + "loss": 0.3439, + "step": 960 + }, + { + "epoch": 0.01923779496033831, + "grad_norm": 1.0040650367736816, + "learning_rate": 6.4109406270847245e-06, + "loss": 0.3388, + "step": 961 + }, + { + "epoch": 0.019257813477466655, + "grad_norm": 1.0717928409576416, + "learning_rate": 6.417611741160774e-06, + "loss": 0.3235, + "step": 962 + }, + { + "epoch": 0.019277831994595, + "grad_norm": 1.095108985900879, + "learning_rate": 6.424282855236825e-06, + "loss": 0.3329, + "step": 963 + }, + { + "epoch": 0.019297850511723343, + "grad_norm": 0.9689841270446777, + "learning_rate": 6.4309539693128755e-06, + "loss": 0.319, + "step": 964 + }, + { + "epoch": 0.019317869028851686, + "grad_norm": 1.2157001495361328, + "learning_rate": 6.437625083388927e-06, + "loss": 0.3769, + "step": 965 + }, + { + "epoch": 0.01933788754598003, + "grad_norm": 1.2527369260787964, + "learning_rate": 6.444296197464978e-06, + "loss": 0.3348, + "step": 966 + }, + { + "epoch": 0.019357906063108374, + "grad_norm": 1.0733178853988647, + "learning_rate": 6.450967311541027e-06, + "loss": 0.3495, + "step": 967 + }, + { + "epoch": 0.019377924580236718, + "grad_norm": 1.0510330200195312, + "learning_rate": 6.4576384256170785e-06, + "loss": 0.3317, + "step": 968 + }, + { + "epoch": 0.01939794309736506, + "grad_norm": 1.0518503189086914, + "learning_rate": 6.464309539693129e-06, + "loss": 0.2969, + "step": 969 + }, + { + "epoch": 0.019417961614493405, + "grad_norm": 1.1840444803237915, + "learning_rate": 6.47098065376918e-06, + "loss": 0.3206, + "step": 970 + }, + { + "epoch": 0.01943798013162175, + "grad_norm": 1.386938214302063, + "learning_rate": 6.477651767845231e-06, + "loss": 0.3488, + "step": 971 + }, + { + "epoch": 0.019457998648750092, + "grad_norm": 1.194237232208252, + "learning_rate": 6.4843228819212815e-06, + "loss": 0.329, + "step": 972 + }, + { + "epoch": 0.019478017165878436, + "grad_norm": 1.1681545972824097, + "learning_rate": 6.490993995997332e-06, + "loss": 0.3739, + "step": 973 + }, + { + "epoch": 0.01949803568300678, + "grad_norm": 1.1011244058609009, + "learning_rate": 6.497665110073382e-06, + "loss": 0.3357, + "step": 974 + }, + { + "epoch": 0.019518054200135124, + "grad_norm": 1.1287283897399902, + "learning_rate": 6.504336224149433e-06, + "loss": 0.314, + "step": 975 + }, + { + "epoch": 0.019538072717263467, + "grad_norm": 1.0869028568267822, + "learning_rate": 6.5110073382254846e-06, + "loss": 0.359, + "step": 976 + }, + { + "epoch": 0.01955809123439181, + "grad_norm": 1.015711784362793, + "learning_rate": 6.517678452301535e-06, + "loss": 0.2996, + "step": 977 + }, + { + "epoch": 0.019578109751520155, + "grad_norm": 1.2362161874771118, + "learning_rate": 6.524349566377586e-06, + "loss": 0.3104, + "step": 978 + }, + { + "epoch": 0.0195981282686485, + "grad_norm": 1.1348440647125244, + "learning_rate": 6.5310206804536356e-06, + "loss": 0.3536, + "step": 979 + }, + { + "epoch": 0.019618146785776842, + "grad_norm": 1.0817854404449463, + "learning_rate": 6.537691794529687e-06, + "loss": 0.3182, + "step": 980 + }, + { + "epoch": 0.019638165302905186, + "grad_norm": 1.063151478767395, + "learning_rate": 6.544362908605738e-06, + "loss": 0.3561, + "step": 981 + }, + { + "epoch": 0.01965818382003353, + "grad_norm": 1.0871738195419312, + "learning_rate": 6.551034022681788e-06, + "loss": 0.3343, + "step": 982 + }, + { + "epoch": 0.019678202337161874, + "grad_norm": 1.0467379093170166, + "learning_rate": 6.5577051367578394e-06, + "loss": 0.3415, + "step": 983 + }, + { + "epoch": 0.019698220854290217, + "grad_norm": 1.3127398490905762, + "learning_rate": 6.564376250833889e-06, + "loss": 0.3631, + "step": 984 + }, + { + "epoch": 0.01971823937141856, + "grad_norm": 1.1198155879974365, + "learning_rate": 6.57104736490994e-06, + "loss": 0.3565, + "step": 985 + }, + { + "epoch": 0.019738257888546905, + "grad_norm": 1.1768903732299805, + "learning_rate": 6.577718478985991e-06, + "loss": 0.3896, + "step": 986 + }, + { + "epoch": 0.01975827640567525, + "grad_norm": 1.203739881515503, + "learning_rate": 6.584389593062042e-06, + "loss": 0.3308, + "step": 987 + }, + { + "epoch": 0.019778294922803592, + "grad_norm": 1.02766752243042, + "learning_rate": 6.591060707138093e-06, + "loss": 0.3235, + "step": 988 + }, + { + "epoch": 0.019798313439931936, + "grad_norm": 1.132454752922058, + "learning_rate": 6.597731821214143e-06, + "loss": 0.3378, + "step": 989 + }, + { + "epoch": 0.01981833195706028, + "grad_norm": 1.0537437200546265, + "learning_rate": 6.604402935290194e-06, + "loss": 0.2999, + "step": 990 + }, + { + "epoch": 0.019838350474188624, + "grad_norm": 1.3054285049438477, + "learning_rate": 6.6110740493662455e-06, + "loss": 0.3581, + "step": 991 + }, + { + "epoch": 0.019858368991316967, + "grad_norm": 1.1002572774887085, + "learning_rate": 6.617745163442295e-06, + "loss": 0.3281, + "step": 992 + }, + { + "epoch": 0.01987838750844531, + "grad_norm": 1.2135038375854492, + "learning_rate": 6.624416277518346e-06, + "loss": 0.3401, + "step": 993 + }, + { + "epoch": 0.019898406025573655, + "grad_norm": 1.040556788444519, + "learning_rate": 6.6310873915943965e-06, + "loss": 0.3265, + "step": 994 + }, + { + "epoch": 0.019918424542702, + "grad_norm": 1.1694504022598267, + "learning_rate": 6.637758505670448e-06, + "loss": 0.3524, + "step": 995 + }, + { + "epoch": 0.019938443059830342, + "grad_norm": 1.4285082817077637, + "learning_rate": 6.644429619746499e-06, + "loss": 0.3396, + "step": 996 + }, + { + "epoch": 0.019958461576958686, + "grad_norm": 0.9526458382606506, + "learning_rate": 6.651100733822548e-06, + "loss": 0.2988, + "step": 997 + }, + { + "epoch": 0.01997848009408703, + "grad_norm": 1.0638072490692139, + "learning_rate": 6.6577718478985995e-06, + "loss": 0.3262, + "step": 998 + }, + { + "epoch": 0.019998498611215373, + "grad_norm": 1.07565176486969, + "learning_rate": 6.66444296197465e-06, + "loss": 0.3282, + "step": 999 + }, + { + "epoch": 0.020018517128343717, + "grad_norm": 1.9840214252471924, + "learning_rate": 6.671114076050701e-06, + "loss": 0.8625, + "step": 1000 + }, + { + "epoch": 0.02003853564547206, + "grad_norm": 1.1044836044311523, + "learning_rate": 6.677785190126752e-06, + "loss": 0.3798, + "step": 1001 + }, + { + "epoch": 0.020058554162600405, + "grad_norm": 1.0479382276535034, + "learning_rate": 6.684456304202802e-06, + "loss": 0.3289, + "step": 1002 + }, + { + "epoch": 0.02007857267972875, + "grad_norm": 1.2521297931671143, + "learning_rate": 6.691127418278853e-06, + "loss": 0.3131, + "step": 1003 + }, + { + "epoch": 0.020098591196857092, + "grad_norm": 1.1567481756210327, + "learning_rate": 6.697798532354903e-06, + "loss": 0.3364, + "step": 1004 + }, + { + "epoch": 0.020118609713985436, + "grad_norm": 1.8258700370788574, + "learning_rate": 6.704469646430954e-06, + "loss": 0.7834, + "step": 1005 + }, + { + "epoch": 0.02013862823111378, + "grad_norm": 1.273269772529602, + "learning_rate": 6.711140760507006e-06, + "loss": 0.295, + "step": 1006 + }, + { + "epoch": 0.020158646748242123, + "grad_norm": 1.2305594682693481, + "learning_rate": 6.717811874583056e-06, + "loss": 0.3513, + "step": 1007 + }, + { + "epoch": 0.020178665265370467, + "grad_norm": 1.0698548555374146, + "learning_rate": 6.724482988659107e-06, + "loss": 0.3285, + "step": 1008 + }, + { + "epoch": 0.02019868378249881, + "grad_norm": 1.1007624864578247, + "learning_rate": 6.731154102735157e-06, + "loss": 0.3049, + "step": 1009 + }, + { + "epoch": 0.020218702299627155, + "grad_norm": 1.0433034896850586, + "learning_rate": 6.737825216811208e-06, + "loss": 0.3514, + "step": 1010 + }, + { + "epoch": 0.0202387208167555, + "grad_norm": 1.1325896978378296, + "learning_rate": 6.744496330887259e-06, + "loss": 0.3401, + "step": 1011 + }, + { + "epoch": 0.020258739333883842, + "grad_norm": 0.9892399311065674, + "learning_rate": 6.751167444963309e-06, + "loss": 0.3162, + "step": 1012 + }, + { + "epoch": 0.020278757851012186, + "grad_norm": 1.1675078868865967, + "learning_rate": 6.7578385590393605e-06, + "loss": 0.355, + "step": 1013 + }, + { + "epoch": 0.02029877636814053, + "grad_norm": 1.1372942924499512, + "learning_rate": 6.76450967311541e-06, + "loss": 0.3937, + "step": 1014 + }, + { + "epoch": 0.020318794885268873, + "grad_norm": 1.1125662326812744, + "learning_rate": 6.771180787191461e-06, + "loss": 0.3478, + "step": 1015 + }, + { + "epoch": 0.020338813402397217, + "grad_norm": 1.079825520515442, + "learning_rate": 6.777851901267512e-06, + "loss": 0.3348, + "step": 1016 + }, + { + "epoch": 0.02035883191952556, + "grad_norm": 1.1418625116348267, + "learning_rate": 6.784523015343563e-06, + "loss": 0.3389, + "step": 1017 + }, + { + "epoch": 0.020378850436653904, + "grad_norm": 1.016863226890564, + "learning_rate": 6.791194129419614e-06, + "loss": 0.3355, + "step": 1018 + }, + { + "epoch": 0.020398868953782248, + "grad_norm": 1.6813374757766724, + "learning_rate": 6.797865243495664e-06, + "loss": 0.904, + "step": 1019 + }, + { + "epoch": 0.020418887470910592, + "grad_norm": 2.0513174533843994, + "learning_rate": 6.8045363575717145e-06, + "loss": 0.9351, + "step": 1020 + }, + { + "epoch": 0.020438905988038936, + "grad_norm": 1.124208688735962, + "learning_rate": 6.811207471647766e-06, + "loss": 0.3759, + "step": 1021 + }, + { + "epoch": 0.02045892450516728, + "grad_norm": 1.2255420684814453, + "learning_rate": 6.817878585723816e-06, + "loss": 0.33, + "step": 1022 + }, + { + "epoch": 0.020478943022295623, + "grad_norm": 1.0655773878097534, + "learning_rate": 6.824549699799867e-06, + "loss": 0.3168, + "step": 1023 + }, + { + "epoch": 0.020498961539423967, + "grad_norm": 1.7735413312911987, + "learning_rate": 6.8312208138759175e-06, + "loss": 0.8826, + "step": 1024 + }, + { + "epoch": 0.02051898005655231, + "grad_norm": 0.9711496829986572, + "learning_rate": 6.837891927951969e-06, + "loss": 0.3106, + "step": 1025 + }, + { + "epoch": 0.020538998573680654, + "grad_norm": 1.7917323112487793, + "learning_rate": 6.84456304202802e-06, + "loss": 0.8238, + "step": 1026 + }, + { + "epoch": 0.020559017090808998, + "grad_norm": 1.1145143508911133, + "learning_rate": 6.851234156104069e-06, + "loss": 0.3056, + "step": 1027 + }, + { + "epoch": 0.020579035607937342, + "grad_norm": 1.0885852575302124, + "learning_rate": 6.8579052701801206e-06, + "loss": 0.3195, + "step": 1028 + }, + { + "epoch": 0.020599054125065686, + "grad_norm": 1.1155507564544678, + "learning_rate": 6.864576384256171e-06, + "loss": 0.2981, + "step": 1029 + }, + { + "epoch": 0.02061907264219403, + "grad_norm": 1.8491030931472778, + "learning_rate": 6.871247498332222e-06, + "loss": 0.8422, + "step": 1030 + }, + { + "epoch": 0.020639091159322373, + "grad_norm": 1.1401875019073486, + "learning_rate": 6.877918612408273e-06, + "loss": 0.367, + "step": 1031 + }, + { + "epoch": 0.020659109676450717, + "grad_norm": 1.0456392765045166, + "learning_rate": 6.884589726484323e-06, + "loss": 0.3745, + "step": 1032 + }, + { + "epoch": 0.02067912819357906, + "grad_norm": 1.1427019834518433, + "learning_rate": 6.891260840560374e-06, + "loss": 0.3341, + "step": 1033 + }, + { + "epoch": 0.020699146710707404, + "grad_norm": 1.8743263483047485, + "learning_rate": 6.897931954636425e-06, + "loss": 0.8351, + "step": 1034 + }, + { + "epoch": 0.020719165227835748, + "grad_norm": 1.1577600240707397, + "learning_rate": 6.9046030687124755e-06, + "loss": 0.323, + "step": 1035 + }, + { + "epoch": 0.02073918374496409, + "grad_norm": 1.1102601289749146, + "learning_rate": 6.911274182788527e-06, + "loss": 0.3265, + "step": 1036 + }, + { + "epoch": 0.020759202262092435, + "grad_norm": 1.0735063552856445, + "learning_rate": 6.917945296864577e-06, + "loss": 0.3288, + "step": 1037 + }, + { + "epoch": 0.02077922077922078, + "grad_norm": 1.160347819328308, + "learning_rate": 6.924616410940628e-06, + "loss": 0.3413, + "step": 1038 + }, + { + "epoch": 0.020799239296349123, + "grad_norm": 1.0433303117752075, + "learning_rate": 6.9312875250166785e-06, + "loss": 0.3207, + "step": 1039 + }, + { + "epoch": 0.020819257813477467, + "grad_norm": 1.1428951025009155, + "learning_rate": 6.937958639092729e-06, + "loss": 0.3796, + "step": 1040 + }, + { + "epoch": 0.02083927633060581, + "grad_norm": 1.0905587673187256, + "learning_rate": 6.94462975316878e-06, + "loss": 0.359, + "step": 1041 + }, + { + "epoch": 0.020859294847734154, + "grad_norm": 0.9736509919166565, + "learning_rate": 6.95130086724483e-06, + "loss": 0.3099, + "step": 1042 + }, + { + "epoch": 0.020879313364862498, + "grad_norm": 1.1650631427764893, + "learning_rate": 6.9579719813208815e-06, + "loss": 0.3199, + "step": 1043 + }, + { + "epoch": 0.02089933188199084, + "grad_norm": 1.1025390625, + "learning_rate": 6.964643095396933e-06, + "loss": 0.3138, + "step": 1044 + }, + { + "epoch": 0.020919350399119185, + "grad_norm": 1.006978988647461, + "learning_rate": 6.971314209472982e-06, + "loss": 0.3096, + "step": 1045 + }, + { + "epoch": 0.02093936891624753, + "grad_norm": 1.1025735139846802, + "learning_rate": 6.977985323549033e-06, + "loss": 0.3182, + "step": 1046 + }, + { + "epoch": 0.020959387433375873, + "grad_norm": 0.9852421879768372, + "learning_rate": 6.984656437625084e-06, + "loss": 0.3308, + "step": 1047 + }, + { + "epoch": 0.020979405950504217, + "grad_norm": 0.9476369619369507, + "learning_rate": 6.991327551701135e-06, + "loss": 0.3032, + "step": 1048 + }, + { + "epoch": 0.02099942446763256, + "grad_norm": 1.0396767854690552, + "learning_rate": 6.997998665777186e-06, + "loss": 0.3294, + "step": 1049 + }, + { + "epoch": 0.021019442984760904, + "grad_norm": 0.9662322402000427, + "learning_rate": 7.0046697798532356e-06, + "loss": 0.317, + "step": 1050 + }, + { + "epoch": 0.021039461501889248, + "grad_norm": 1.7733781337738037, + "learning_rate": 7.011340893929287e-06, + "loss": 0.8069, + "step": 1051 + }, + { + "epoch": 0.02105948001901759, + "grad_norm": 1.900394082069397, + "learning_rate": 7.018012008005337e-06, + "loss": 0.8942, + "step": 1052 + }, + { + "epoch": 0.021079498536145935, + "grad_norm": 1.0083163976669312, + "learning_rate": 7.024683122081388e-06, + "loss": 0.3456, + "step": 1053 + }, + { + "epoch": 0.02109951705327428, + "grad_norm": 1.072644829750061, + "learning_rate": 7.0313542361574394e-06, + "loss": 0.3451, + "step": 1054 + }, + { + "epoch": 0.021119535570402623, + "grad_norm": 1.018554925918579, + "learning_rate": 7.03802535023349e-06, + "loss": 0.3645, + "step": 1055 + }, + { + "epoch": 0.021139554087530966, + "grad_norm": 1.8062158823013306, + "learning_rate": 7.044696464309541e-06, + "loss": 0.9557, + "step": 1056 + }, + { + "epoch": 0.02115957260465931, + "grad_norm": 1.0941405296325684, + "learning_rate": 7.0513675783855904e-06, + "loss": 0.3364, + "step": 1057 + }, + { + "epoch": 0.021179591121787654, + "grad_norm": 0.9776818752288818, + "learning_rate": 7.058038692461642e-06, + "loss": 0.336, + "step": 1058 + }, + { + "epoch": 0.021199609638915998, + "grad_norm": 0.992139995098114, + "learning_rate": 7.064709806537693e-06, + "loss": 0.3072, + "step": 1059 + }, + { + "epoch": 0.02121962815604434, + "grad_norm": 1.0873030424118042, + "learning_rate": 7.071380920613743e-06, + "loss": 0.3184, + "step": 1060 + }, + { + "epoch": 0.021239646673172685, + "grad_norm": 1.0433558225631714, + "learning_rate": 7.078052034689794e-06, + "loss": 0.3334, + "step": 1061 + }, + { + "epoch": 0.02125966519030103, + "grad_norm": 1.100573182106018, + "learning_rate": 7.084723148765844e-06, + "loss": 0.3617, + "step": 1062 + }, + { + "epoch": 0.021279683707429373, + "grad_norm": 1.9153515100479126, + "learning_rate": 7.091394262841895e-06, + "loss": 0.8492, + "step": 1063 + }, + { + "epoch": 0.021299702224557716, + "grad_norm": 1.074064016342163, + "learning_rate": 7.098065376917946e-06, + "loss": 0.3103, + "step": 1064 + }, + { + "epoch": 0.02131972074168606, + "grad_norm": 1.1459167003631592, + "learning_rate": 7.1047364909939965e-06, + "loss": 0.3234, + "step": 1065 + }, + { + "epoch": 0.021339739258814404, + "grad_norm": 1.0475773811340332, + "learning_rate": 7.111407605070048e-06, + "loss": 0.3133, + "step": 1066 + }, + { + "epoch": 0.021359757775942748, + "grad_norm": 1.0898821353912354, + "learning_rate": 7.118078719146098e-06, + "loss": 0.3019, + "step": 1067 + }, + { + "epoch": 0.02137977629307109, + "grad_norm": 1.1551363468170166, + "learning_rate": 7.124749833222148e-06, + "loss": 0.3314, + "step": 1068 + }, + { + "epoch": 0.021399794810199435, + "grad_norm": 1.0886967182159424, + "learning_rate": 7.1314209472981995e-06, + "loss": 0.3003, + "step": 1069 + }, + { + "epoch": 0.02141981332732778, + "grad_norm": 1.0575133562088013, + "learning_rate": 7.13809206137425e-06, + "loss": 0.2944, + "step": 1070 + }, + { + "epoch": 0.021439831844456123, + "grad_norm": 1.0824483633041382, + "learning_rate": 7.144763175450301e-06, + "loss": 0.3336, + "step": 1071 + }, + { + "epoch": 0.021459850361584466, + "grad_norm": 1.8709638118743896, + "learning_rate": 7.151434289526351e-06, + "loss": 0.8911, + "step": 1072 + }, + { + "epoch": 0.02147986887871281, + "grad_norm": 1.0787087678909302, + "learning_rate": 7.1581054036024026e-06, + "loss": 0.317, + "step": 1073 + }, + { + "epoch": 0.021499887395841154, + "grad_norm": 1.0662670135498047, + "learning_rate": 7.164776517678454e-06, + "loss": 0.3714, + "step": 1074 + }, + { + "epoch": 0.021519905912969498, + "grad_norm": 1.2429754734039307, + "learning_rate": 7.171447631754503e-06, + "loss": 0.3297, + "step": 1075 + }, + { + "epoch": 0.02153992443009784, + "grad_norm": 1.0334290266036987, + "learning_rate": 7.178118745830554e-06, + "loss": 0.3294, + "step": 1076 + }, + { + "epoch": 0.021559942947226185, + "grad_norm": 1.153558373451233, + "learning_rate": 7.184789859906605e-06, + "loss": 0.3379, + "step": 1077 + }, + { + "epoch": 0.02157996146435453, + "grad_norm": 0.9929841756820679, + "learning_rate": 7.191460973982656e-06, + "loss": 0.3093, + "step": 1078 + }, + { + "epoch": 0.021599979981482872, + "grad_norm": 1.0227924585342407, + "learning_rate": 7.198132088058707e-06, + "loss": 0.3223, + "step": 1079 + }, + { + "epoch": 0.021619998498611216, + "grad_norm": 1.0113757848739624, + "learning_rate": 7.204803202134757e-06, + "loss": 0.2932, + "step": 1080 + }, + { + "epoch": 0.02164001701573956, + "grad_norm": 1.1093692779541016, + "learning_rate": 7.211474316210808e-06, + "loss": 0.3692, + "step": 1081 + }, + { + "epoch": 0.021660035532867904, + "grad_norm": 1.1276627779006958, + "learning_rate": 7.218145430286858e-06, + "loss": 0.2945, + "step": 1082 + }, + { + "epoch": 0.021680054049996247, + "grad_norm": 1.1241506338119507, + "learning_rate": 7.224816544362909e-06, + "loss": 0.3781, + "step": 1083 + }, + { + "epoch": 0.02170007256712459, + "grad_norm": 1.2980332374572754, + "learning_rate": 7.2314876584389605e-06, + "loss": 0.3648, + "step": 1084 + }, + { + "epoch": 0.021720091084252935, + "grad_norm": 1.1195547580718994, + "learning_rate": 7.238158772515011e-06, + "loss": 0.3489, + "step": 1085 + }, + { + "epoch": 0.02174010960138128, + "grad_norm": 1.3110889196395874, + "learning_rate": 7.244829886591062e-06, + "loss": 0.3182, + "step": 1086 + }, + { + "epoch": 0.021760128118509622, + "grad_norm": 1.093862533569336, + "learning_rate": 7.2515010006671115e-06, + "loss": 0.2886, + "step": 1087 + }, + { + "epoch": 0.021780146635637966, + "grad_norm": 1.113844871520996, + "learning_rate": 7.258172114743163e-06, + "loss": 0.3531, + "step": 1088 + }, + { + "epoch": 0.02180016515276631, + "grad_norm": 1.8341865539550781, + "learning_rate": 7.264843228819214e-06, + "loss": 0.8701, + "step": 1089 + }, + { + "epoch": 0.021820183669894654, + "grad_norm": 1.9541444778442383, + "learning_rate": 7.271514342895264e-06, + "loss": 0.8354, + "step": 1090 + }, + { + "epoch": 0.021840202187022997, + "grad_norm": 1.1834313869476318, + "learning_rate": 7.278185456971315e-06, + "loss": 0.3244, + "step": 1091 + }, + { + "epoch": 0.02186022070415134, + "grad_norm": 1.9561468362808228, + "learning_rate": 7.284856571047365e-06, + "loss": 0.8908, + "step": 1092 + }, + { + "epoch": 0.021880239221279685, + "grad_norm": 1.264538288116455, + "learning_rate": 7.291527685123416e-06, + "loss": 0.3829, + "step": 1093 + }, + { + "epoch": 0.02190025773840803, + "grad_norm": 1.0289448499679565, + "learning_rate": 7.298198799199467e-06, + "loss": 0.3265, + "step": 1094 + }, + { + "epoch": 0.021920276255536372, + "grad_norm": 1.0430935621261597, + "learning_rate": 7.3048699132755175e-06, + "loss": 0.3509, + "step": 1095 + }, + { + "epoch": 0.021940294772664716, + "grad_norm": 1.1383155584335327, + "learning_rate": 7.311541027351569e-06, + "loss": 0.3391, + "step": 1096 + }, + { + "epoch": 0.02196031328979306, + "grad_norm": 1.1395373344421387, + "learning_rate": 7.318212141427618e-06, + "loss": 0.383, + "step": 1097 + }, + { + "epoch": 0.021980331806921403, + "grad_norm": 1.3137199878692627, + "learning_rate": 7.324883255503669e-06, + "loss": 0.3525, + "step": 1098 + }, + { + "epoch": 0.022000350324049747, + "grad_norm": 1.0751022100448608, + "learning_rate": 7.3315543695797206e-06, + "loss": 0.3504, + "step": 1099 + }, + { + "epoch": 0.02202036884117809, + "grad_norm": 1.0998790264129639, + "learning_rate": 7.338225483655771e-06, + "loss": 0.2911, + "step": 1100 + }, + { + "epoch": 0.022040387358306435, + "grad_norm": 1.0602506399154663, + "learning_rate": 7.344896597731822e-06, + "loss": 0.3243, + "step": 1101 + }, + { + "epoch": 0.02206040587543478, + "grad_norm": 0.9799483418464661, + "learning_rate": 7.351567711807872e-06, + "loss": 0.2914, + "step": 1102 + }, + { + "epoch": 0.022080424392563122, + "grad_norm": 1.106614112854004, + "learning_rate": 7.358238825883924e-06, + "loss": 0.318, + "step": 1103 + }, + { + "epoch": 0.022100442909691466, + "grad_norm": 1.0848655700683594, + "learning_rate": 7.364909939959975e-06, + "loss": 0.3478, + "step": 1104 + }, + { + "epoch": 0.02212046142681981, + "grad_norm": 1.1039791107177734, + "learning_rate": 7.371581054036024e-06, + "loss": 0.3246, + "step": 1105 + }, + { + "epoch": 0.022140479943948153, + "grad_norm": 1.7357810735702515, + "learning_rate": 7.3782521681120754e-06, + "loss": 0.8883, + "step": 1106 + }, + { + "epoch": 0.022160498461076497, + "grad_norm": 1.0980650186538696, + "learning_rate": 7.384923282188126e-06, + "loss": 0.3017, + "step": 1107 + }, + { + "epoch": 0.02218051697820484, + "grad_norm": 1.6408816576004028, + "learning_rate": 7.391594396264177e-06, + "loss": 0.3857, + "step": 1108 + }, + { + "epoch": 0.022200535495333185, + "grad_norm": 1.3199666738510132, + "learning_rate": 7.398265510340228e-06, + "loss": 0.3671, + "step": 1109 + }, + { + "epoch": 0.02222055401246153, + "grad_norm": 1.0642187595367432, + "learning_rate": 7.404936624416278e-06, + "loss": 0.2577, + "step": 1110 + }, + { + "epoch": 0.022240572529589872, + "grad_norm": 1.1537659168243408, + "learning_rate": 7.411607738492329e-06, + "loss": 0.3945, + "step": 1111 + }, + { + "epoch": 0.022260591046718216, + "grad_norm": 1.0282909870147705, + "learning_rate": 7.418278852568379e-06, + "loss": 0.3042, + "step": 1112 + }, + { + "epoch": 0.02228060956384656, + "grad_norm": 1.290846824645996, + "learning_rate": 7.42494996664443e-06, + "loss": 0.323, + "step": 1113 + }, + { + "epoch": 0.022300628080974903, + "grad_norm": 1.0234074592590332, + "learning_rate": 7.4316210807204815e-06, + "loss": 0.3252, + "step": 1114 + }, + { + "epoch": 0.022320646598103247, + "grad_norm": 1.1617181301116943, + "learning_rate": 7.438292194796531e-06, + "loss": 0.3145, + "step": 1115 + }, + { + "epoch": 0.02234066511523159, + "grad_norm": 2.1507842540740967, + "learning_rate": 7.444963308872582e-06, + "loss": 0.8274, + "step": 1116 + }, + { + "epoch": 0.022360683632359935, + "grad_norm": 1.0967153310775757, + "learning_rate": 7.4516344229486325e-06, + "loss": 0.3579, + "step": 1117 + }, + { + "epoch": 0.022380702149488278, + "grad_norm": 1.17815101146698, + "learning_rate": 7.458305537024684e-06, + "loss": 0.3409, + "step": 1118 + }, + { + "epoch": 0.022400720666616622, + "grad_norm": 1.0701566934585571, + "learning_rate": 7.464976651100735e-06, + "loss": 0.3481, + "step": 1119 + }, + { + "epoch": 0.022420739183744966, + "grad_norm": 0.9788215160369873, + "learning_rate": 7.471647765176785e-06, + "loss": 0.3337, + "step": 1120 + }, + { + "epoch": 0.02244075770087331, + "grad_norm": 0.9978281855583191, + "learning_rate": 7.478318879252836e-06, + "loss": 0.3273, + "step": 1121 + }, + { + "epoch": 0.022460776218001653, + "grad_norm": 1.8171412944793701, + "learning_rate": 7.484989993328886e-06, + "loss": 0.9012, + "step": 1122 + }, + { + "epoch": 0.022480794735129997, + "grad_norm": 1.0873616933822632, + "learning_rate": 7.491661107404937e-06, + "loss": 0.3017, + "step": 1123 + }, + { + "epoch": 0.02250081325225834, + "grad_norm": 1.1321251392364502, + "learning_rate": 7.498332221480988e-06, + "loss": 0.3568, + "step": 1124 + }, + { + "epoch": 0.022520831769386684, + "grad_norm": 1.9617125988006592, + "learning_rate": 7.5050033355570386e-06, + "loss": 0.9292, + "step": 1125 + }, + { + "epoch": 0.022540850286515025, + "grad_norm": 1.1833101511001587, + "learning_rate": 7.51167444963309e-06, + "loss": 0.3275, + "step": 1126 + }, + { + "epoch": 0.02256086880364337, + "grad_norm": 0.9939690232276917, + "learning_rate": 7.518345563709139e-06, + "loss": 0.2987, + "step": 1127 + }, + { + "epoch": 0.022580887320771712, + "grad_norm": 1.9488919973373413, + "learning_rate": 7.5250166777851904e-06, + "loss": 0.8636, + "step": 1128 + }, + { + "epoch": 0.022600905837900056, + "grad_norm": 1.1791512966156006, + "learning_rate": 7.531687791861242e-06, + "loss": 0.3046, + "step": 1129 + }, + { + "epoch": 0.0226209243550284, + "grad_norm": 1.1462900638580322, + "learning_rate": 7.538358905937292e-06, + "loss": 0.3631, + "step": 1130 + }, + { + "epoch": 0.022640942872156743, + "grad_norm": 1.191728949546814, + "learning_rate": 7.545030020013343e-06, + "loss": 0.3376, + "step": 1131 + }, + { + "epoch": 0.022660961389285087, + "grad_norm": 1.1403051614761353, + "learning_rate": 7.5517011340893935e-06, + "loss": 0.3504, + "step": 1132 + }, + { + "epoch": 0.02268097990641343, + "grad_norm": 1.248295545578003, + "learning_rate": 7.558372248165445e-06, + "loss": 0.2942, + "step": 1133 + }, + { + "epoch": 0.022700998423541775, + "grad_norm": 1.0760040283203125, + "learning_rate": 7.565043362241495e-06, + "loss": 0.3342, + "step": 1134 + }, + { + "epoch": 0.02272101694067012, + "grad_norm": 1.3183097839355469, + "learning_rate": 7.571714476317545e-06, + "loss": 0.3388, + "step": 1135 + }, + { + "epoch": 0.022741035457798462, + "grad_norm": 1.0743608474731445, + "learning_rate": 7.5783855903935965e-06, + "loss": 0.3635, + "step": 1136 + }, + { + "epoch": 0.022761053974926806, + "grad_norm": 1.2210850715637207, + "learning_rate": 7.585056704469647e-06, + "loss": 0.3411, + "step": 1137 + }, + { + "epoch": 0.02278107249205515, + "grad_norm": 0.9872978329658508, + "learning_rate": 7.591727818545698e-06, + "loss": 0.3306, + "step": 1138 + }, + { + "epoch": 0.022801091009183493, + "grad_norm": 1.9445725679397583, + "learning_rate": 7.598398932621749e-06, + "loss": 0.7796, + "step": 1139 + }, + { + "epoch": 0.022821109526311837, + "grad_norm": 1.2128832340240479, + "learning_rate": 7.605070046697799e-06, + "loss": 0.3767, + "step": 1140 + }, + { + "epoch": 0.02284112804344018, + "grad_norm": 1.204514980316162, + "learning_rate": 7.61174116077385e-06, + "loss": 0.3542, + "step": 1141 + }, + { + "epoch": 0.022861146560568524, + "grad_norm": 1.1240060329437256, + "learning_rate": 7.6184122748499e-06, + "loss": 0.3477, + "step": 1142 + }, + { + "epoch": 0.022881165077696868, + "grad_norm": 1.8857252597808838, + "learning_rate": 7.625083388925951e-06, + "loss": 0.7978, + "step": 1143 + }, + { + "epoch": 0.022901183594825212, + "grad_norm": 1.9593137502670288, + "learning_rate": 7.631754503002003e-06, + "loss": 0.9689, + "step": 1144 + }, + { + "epoch": 0.022921202111953556, + "grad_norm": 1.132453203201294, + "learning_rate": 7.638425617078052e-06, + "loss": 0.3211, + "step": 1145 + }, + { + "epoch": 0.0229412206290819, + "grad_norm": 1.6749963760375977, + "learning_rate": 7.645096731154103e-06, + "loss": 0.8141, + "step": 1146 + }, + { + "epoch": 0.022961239146210243, + "grad_norm": 1.0468940734863281, + "learning_rate": 7.651767845230154e-06, + "loss": 0.311, + "step": 1147 + }, + { + "epoch": 0.022981257663338587, + "grad_norm": 1.185658574104309, + "learning_rate": 7.658438959306204e-06, + "loss": 0.2774, + "step": 1148 + }, + { + "epoch": 0.02300127618046693, + "grad_norm": 1.0577335357666016, + "learning_rate": 7.665110073382255e-06, + "loss": 0.3024, + "step": 1149 + }, + { + "epoch": 0.023021294697595274, + "grad_norm": 1.174895167350769, + "learning_rate": 7.671781187458306e-06, + "loss": 0.3625, + "step": 1150 + }, + { + "epoch": 0.023041313214723618, + "grad_norm": 1.009640097618103, + "learning_rate": 7.678452301534357e-06, + "loss": 0.3171, + "step": 1151 + }, + { + "epoch": 0.023061331731851962, + "grad_norm": 1.034883975982666, + "learning_rate": 7.685123415610407e-06, + "loss": 0.321, + "step": 1152 + }, + { + "epoch": 0.023081350248980306, + "grad_norm": 1.0555998086929321, + "learning_rate": 7.691794529686458e-06, + "loss": 0.3342, + "step": 1153 + }, + { + "epoch": 0.02310136876610865, + "grad_norm": 1.3065000772476196, + "learning_rate": 7.69846564376251e-06, + "loss": 0.3169, + "step": 1154 + }, + { + "epoch": 0.023121387283236993, + "grad_norm": 1.1877477169036865, + "learning_rate": 7.705136757838559e-06, + "loss": 0.3734, + "step": 1155 + }, + { + "epoch": 0.023141405800365337, + "grad_norm": 1.2171247005462646, + "learning_rate": 7.71180787191461e-06, + "loss": 0.3233, + "step": 1156 + }, + { + "epoch": 0.02316142431749368, + "grad_norm": 0.9288862943649292, + "learning_rate": 7.718478985990661e-06, + "loss": 0.2781, + "step": 1157 + }, + { + "epoch": 0.023181442834622024, + "grad_norm": 1.102313756942749, + "learning_rate": 7.725150100066712e-06, + "loss": 0.3489, + "step": 1158 + }, + { + "epoch": 0.023201461351750368, + "grad_norm": 1.2208869457244873, + "learning_rate": 7.731821214142763e-06, + "loss": 0.377, + "step": 1159 + }, + { + "epoch": 0.023221479868878712, + "grad_norm": 1.0194939374923706, + "learning_rate": 7.738492328218813e-06, + "loss": 0.3407, + "step": 1160 + }, + { + "epoch": 0.023241498386007056, + "grad_norm": 1.0509802103042603, + "learning_rate": 7.745163442294864e-06, + "loss": 0.3422, + "step": 1161 + }, + { + "epoch": 0.0232615169031354, + "grad_norm": 1.077462077140808, + "learning_rate": 7.751834556370914e-06, + "loss": 0.3073, + "step": 1162 + }, + { + "epoch": 0.023281535420263743, + "grad_norm": 1.161503791809082, + "learning_rate": 7.758505670446965e-06, + "loss": 0.31, + "step": 1163 + }, + { + "epoch": 0.023301553937392087, + "grad_norm": 1.0370782613754272, + "learning_rate": 7.765176784523016e-06, + "loss": 0.3409, + "step": 1164 + }, + { + "epoch": 0.02332157245452043, + "grad_norm": 2.3395919799804688, + "learning_rate": 7.771847898599067e-06, + "loss": 0.8591, + "step": 1165 + }, + { + "epoch": 0.023341590971648774, + "grad_norm": 1.5079351663589478, + "learning_rate": 7.778519012675118e-06, + "loss": 0.3635, + "step": 1166 + }, + { + "epoch": 0.023361609488777118, + "grad_norm": 1.081386923789978, + "learning_rate": 7.785190126751168e-06, + "loss": 0.363, + "step": 1167 + }, + { + "epoch": 0.02338162800590546, + "grad_norm": 2.019080638885498, + "learning_rate": 7.791861240827219e-06, + "loss": 0.833, + "step": 1168 + }, + { + "epoch": 0.023401646523033805, + "grad_norm": 1.1209263801574707, + "learning_rate": 7.79853235490327e-06, + "loss": 0.2718, + "step": 1169 + }, + { + "epoch": 0.02342166504016215, + "grad_norm": 1.1308484077453613, + "learning_rate": 7.80520346897932e-06, + "loss": 0.2738, + "step": 1170 + }, + { + "epoch": 0.023441683557290493, + "grad_norm": 1.1334562301635742, + "learning_rate": 7.811874583055371e-06, + "loss": 0.3304, + "step": 1171 + }, + { + "epoch": 0.023461702074418837, + "grad_norm": 1.0365233421325684, + "learning_rate": 7.81854569713142e-06, + "loss": 0.3454, + "step": 1172 + }, + { + "epoch": 0.02348172059154718, + "grad_norm": 1.755020260810852, + "learning_rate": 7.825216811207472e-06, + "loss": 0.3841, + "step": 1173 + }, + { + "epoch": 0.023501739108675524, + "grad_norm": 1.0225244760513306, + "learning_rate": 7.831887925283523e-06, + "loss": 0.3592, + "step": 1174 + }, + { + "epoch": 0.023521757625803868, + "grad_norm": 0.9744156002998352, + "learning_rate": 7.838559039359574e-06, + "loss": 0.3119, + "step": 1175 + }, + { + "epoch": 0.02354177614293221, + "grad_norm": 1.0913710594177246, + "learning_rate": 7.845230153435625e-06, + "loss": 0.3318, + "step": 1176 + }, + { + "epoch": 0.023561794660060555, + "grad_norm": 1.1873514652252197, + "learning_rate": 7.851901267511675e-06, + "loss": 0.3562, + "step": 1177 + }, + { + "epoch": 0.0235818131771889, + "grad_norm": 1.1285017728805542, + "learning_rate": 7.858572381587726e-06, + "loss": 0.3311, + "step": 1178 + }, + { + "epoch": 0.023601831694317243, + "grad_norm": 1.0721806287765503, + "learning_rate": 7.865243495663777e-06, + "loss": 0.3453, + "step": 1179 + }, + { + "epoch": 0.023621850211445587, + "grad_norm": 1.2082446813583374, + "learning_rate": 7.871914609739826e-06, + "loss": 0.3271, + "step": 1180 + }, + { + "epoch": 0.02364186872857393, + "grad_norm": 1.1329911947250366, + "learning_rate": 7.878585723815878e-06, + "loss": 0.2931, + "step": 1181 + }, + { + "epoch": 0.023661887245702274, + "grad_norm": 1.1168811321258545, + "learning_rate": 7.885256837891929e-06, + "loss": 0.3328, + "step": 1182 + }, + { + "epoch": 0.023681905762830618, + "grad_norm": 1.329690933227539, + "learning_rate": 7.89192795196798e-06, + "loss": 0.3238, + "step": 1183 + }, + { + "epoch": 0.02370192427995896, + "grad_norm": 1.163675308227539, + "learning_rate": 7.898599066044031e-06, + "loss": 0.3016, + "step": 1184 + }, + { + "epoch": 0.023721942797087305, + "grad_norm": 0.9913971424102783, + "learning_rate": 7.90527018012008e-06, + "loss": 0.3289, + "step": 1185 + }, + { + "epoch": 0.02374196131421565, + "grad_norm": 1.1071884632110596, + "learning_rate": 7.911941294196132e-06, + "loss": 0.314, + "step": 1186 + }, + { + "epoch": 0.023761979831343993, + "grad_norm": 1.140504240989685, + "learning_rate": 7.918612408272181e-06, + "loss": 0.3267, + "step": 1187 + }, + { + "epoch": 0.023781998348472336, + "grad_norm": 1.2475415468215942, + "learning_rate": 7.925283522348233e-06, + "loss": 0.3508, + "step": 1188 + }, + { + "epoch": 0.02380201686560068, + "grad_norm": 1.1611597537994385, + "learning_rate": 7.931954636424284e-06, + "loss": 0.3679, + "step": 1189 + }, + { + "epoch": 0.023822035382729024, + "grad_norm": 0.9057615399360657, + "learning_rate": 7.938625750500333e-06, + "loss": 0.2802, + "step": 1190 + }, + { + "epoch": 0.023842053899857368, + "grad_norm": 0.9739316701889038, + "learning_rate": 7.945296864576384e-06, + "loss": 0.3301, + "step": 1191 + }, + { + "epoch": 0.02386207241698571, + "grad_norm": 1.0137630701065063, + "learning_rate": 7.951967978652436e-06, + "loss": 0.3507, + "step": 1192 + }, + { + "epoch": 0.023882090934114055, + "grad_norm": 1.2436658143997192, + "learning_rate": 7.958639092728487e-06, + "loss": 0.2977, + "step": 1193 + }, + { + "epoch": 0.0239021094512424, + "grad_norm": 1.1021672487258911, + "learning_rate": 7.965310206804538e-06, + "loss": 0.3255, + "step": 1194 + }, + { + "epoch": 0.023922127968370743, + "grad_norm": 1.1073501110076904, + "learning_rate": 7.971981320880587e-06, + "loss": 0.347, + "step": 1195 + }, + { + "epoch": 0.023942146485499086, + "grad_norm": 1.0797268152236938, + "learning_rate": 7.978652434956639e-06, + "loss": 0.3156, + "step": 1196 + }, + { + "epoch": 0.02396216500262743, + "grad_norm": 0.9944286346435547, + "learning_rate": 7.985323549032688e-06, + "loss": 0.297, + "step": 1197 + }, + { + "epoch": 0.023982183519755774, + "grad_norm": 1.0344575643539429, + "learning_rate": 7.99199466310874e-06, + "loss": 0.3549, + "step": 1198 + }, + { + "epoch": 0.024002202036884118, + "grad_norm": 1.3129273653030396, + "learning_rate": 7.99866577718479e-06, + "loss": 0.2682, + "step": 1199 + }, + { + "epoch": 0.02402222055401246, + "grad_norm": 1.9924086332321167, + "learning_rate": 8.005336891260842e-06, + "loss": 0.9565, + "step": 1200 + }, + { + "epoch": 0.024042239071140805, + "grad_norm": 1.2418079376220703, + "learning_rate": 8.012008005336893e-06, + "loss": 0.3455, + "step": 1201 + }, + { + "epoch": 0.02406225758826915, + "grad_norm": 1.1954073905944824, + "learning_rate": 8.018679119412942e-06, + "loss": 0.3626, + "step": 1202 + }, + { + "epoch": 0.024082276105397492, + "grad_norm": 1.0009914636611938, + "learning_rate": 8.025350233488993e-06, + "loss": 0.342, + "step": 1203 + }, + { + "epoch": 0.024102294622525836, + "grad_norm": 1.0434298515319824, + "learning_rate": 8.032021347565045e-06, + "loss": 0.3474, + "step": 1204 + }, + { + "epoch": 0.02412231313965418, + "grad_norm": 1.078075885772705, + "learning_rate": 8.038692461641094e-06, + "loss": 0.3685, + "step": 1205 + }, + { + "epoch": 0.024142331656782524, + "grad_norm": 1.1885218620300293, + "learning_rate": 8.045363575717145e-06, + "loss": 0.3415, + "step": 1206 + }, + { + "epoch": 0.024162350173910867, + "grad_norm": 1.1103718280792236, + "learning_rate": 8.052034689793195e-06, + "loss": 0.3053, + "step": 1207 + }, + { + "epoch": 0.02418236869103921, + "grad_norm": 1.037015438079834, + "learning_rate": 8.058705803869246e-06, + "loss": 0.3209, + "step": 1208 + }, + { + "epoch": 0.024202387208167555, + "grad_norm": 1.2558320760726929, + "learning_rate": 8.065376917945297e-06, + "loss": 0.3777, + "step": 1209 + }, + { + "epoch": 0.0242224057252959, + "grad_norm": 1.1009705066680908, + "learning_rate": 8.072048032021348e-06, + "loss": 0.3206, + "step": 1210 + }, + { + "epoch": 0.024242424242424242, + "grad_norm": 1.0607351064682007, + "learning_rate": 8.0787191460974e-06, + "loss": 0.3105, + "step": 1211 + }, + { + "epoch": 0.024262442759552586, + "grad_norm": 1.1299145221710205, + "learning_rate": 8.085390260173449e-06, + "loss": 0.3674, + "step": 1212 + }, + { + "epoch": 0.02428246127668093, + "grad_norm": 0.9775823354721069, + "learning_rate": 8.0920613742495e-06, + "loss": 0.3203, + "step": 1213 + }, + { + "epoch": 0.024302479793809274, + "grad_norm": 1.1473829746246338, + "learning_rate": 8.098732488325551e-06, + "loss": 0.3658, + "step": 1214 + }, + { + "epoch": 0.024322498310937617, + "grad_norm": 2.0062334537506104, + "learning_rate": 8.105403602401601e-06, + "loss": 0.9017, + "step": 1215 + }, + { + "epoch": 0.02434251682806596, + "grad_norm": 1.3465856313705444, + "learning_rate": 8.112074716477652e-06, + "loss": 0.3417, + "step": 1216 + }, + { + "epoch": 0.024362535345194305, + "grad_norm": 1.220474123954773, + "learning_rate": 8.118745830553703e-06, + "loss": 0.3689, + "step": 1217 + }, + { + "epoch": 0.02438255386232265, + "grad_norm": 1.8266620635986328, + "learning_rate": 8.125416944629754e-06, + "loss": 0.9246, + "step": 1218 + }, + { + "epoch": 0.024402572379450992, + "grad_norm": 1.0112626552581787, + "learning_rate": 8.132088058705806e-06, + "loss": 0.3441, + "step": 1219 + }, + { + "epoch": 0.024422590896579336, + "grad_norm": 1.047417163848877, + "learning_rate": 8.138759172781855e-06, + "loss": 0.3291, + "step": 1220 + }, + { + "epoch": 0.02444260941370768, + "grad_norm": 1.1457053422927856, + "learning_rate": 8.145430286857906e-06, + "loss": 0.346, + "step": 1221 + }, + { + "epoch": 0.024462627930836024, + "grad_norm": 1.0498987436294556, + "learning_rate": 8.152101400933957e-06, + "loss": 0.3449, + "step": 1222 + }, + { + "epoch": 0.024482646447964367, + "grad_norm": 1.1608288288116455, + "learning_rate": 8.158772515010007e-06, + "loss": 0.3282, + "step": 1223 + }, + { + "epoch": 0.02450266496509271, + "grad_norm": 1.0375367403030396, + "learning_rate": 8.165443629086058e-06, + "loss": 0.2989, + "step": 1224 + }, + { + "epoch": 0.024522683482221055, + "grad_norm": 1.0461174249649048, + "learning_rate": 8.172114743162108e-06, + "loss": 0.3729, + "step": 1225 + }, + { + "epoch": 0.0245427019993494, + "grad_norm": 1.034311294555664, + "learning_rate": 8.178785857238159e-06, + "loss": 0.3418, + "step": 1226 + }, + { + "epoch": 0.024562720516477742, + "grad_norm": 1.0283914804458618, + "learning_rate": 8.18545697131421e-06, + "loss": 0.3244, + "step": 1227 + }, + { + "epoch": 0.024582739033606086, + "grad_norm": 1.9132063388824463, + "learning_rate": 8.192128085390261e-06, + "loss": 0.9435, + "step": 1228 + }, + { + "epoch": 0.02460275755073443, + "grad_norm": 1.1722739934921265, + "learning_rate": 8.198799199466312e-06, + "loss": 0.3583, + "step": 1229 + }, + { + "epoch": 0.024622776067862773, + "grad_norm": 1.082772135734558, + "learning_rate": 8.205470313542362e-06, + "loss": 0.3642, + "step": 1230 + }, + { + "epoch": 0.024642794584991117, + "grad_norm": 0.989289402961731, + "learning_rate": 8.212141427618413e-06, + "loss": 0.3271, + "step": 1231 + }, + { + "epoch": 0.02466281310211946, + "grad_norm": 1.1584094762802124, + "learning_rate": 8.218812541694464e-06, + "loss": 0.3043, + "step": 1232 + }, + { + "epoch": 0.024682831619247805, + "grad_norm": 1.0964710712432861, + "learning_rate": 8.225483655770514e-06, + "loss": 0.3267, + "step": 1233 + }, + { + "epoch": 0.02470285013637615, + "grad_norm": 1.138398289680481, + "learning_rate": 8.232154769846565e-06, + "loss": 0.3479, + "step": 1234 + }, + { + "epoch": 0.024722868653504492, + "grad_norm": 1.0941911935806274, + "learning_rate": 8.238825883922616e-06, + "loss": 0.3116, + "step": 1235 + }, + { + "epoch": 0.024742887170632836, + "grad_norm": 2.0664141178131104, + "learning_rate": 8.245496997998667e-06, + "loss": 0.8682, + "step": 1236 + }, + { + "epoch": 0.02476290568776118, + "grad_norm": 1.1162687540054321, + "learning_rate": 8.252168112074718e-06, + "loss": 0.3365, + "step": 1237 + }, + { + "epoch": 0.024782924204889523, + "grad_norm": 1.189969539642334, + "learning_rate": 8.258839226150768e-06, + "loss": 0.353, + "step": 1238 + }, + { + "epoch": 0.024802942722017867, + "grad_norm": 1.0210338830947876, + "learning_rate": 8.265510340226819e-06, + "loss": 0.3022, + "step": 1239 + }, + { + "epoch": 0.02482296123914621, + "grad_norm": 1.9541223049163818, + "learning_rate": 8.272181454302869e-06, + "loss": 0.8988, + "step": 1240 + }, + { + "epoch": 0.024842979756274555, + "grad_norm": 1.2375385761260986, + "learning_rate": 8.27885256837892e-06, + "loss": 0.3392, + "step": 1241 + }, + { + "epoch": 0.0248629982734029, + "grad_norm": 1.0619560480117798, + "learning_rate": 8.285523682454971e-06, + "loss": 0.3634, + "step": 1242 + }, + { + "epoch": 0.024883016790531242, + "grad_norm": 1.0730780363082886, + "learning_rate": 8.29219479653102e-06, + "loss": 0.3648, + "step": 1243 + }, + { + "epoch": 0.024903035307659586, + "grad_norm": 1.111710786819458, + "learning_rate": 8.298865910607072e-06, + "loss": 0.3184, + "step": 1244 + }, + { + "epoch": 0.02492305382478793, + "grad_norm": 1.156899094581604, + "learning_rate": 8.305537024683123e-06, + "loss": 0.3426, + "step": 1245 + }, + { + "epoch": 0.024943072341916273, + "grad_norm": 1.2607083320617676, + "learning_rate": 8.312208138759174e-06, + "loss": 0.3521, + "step": 1246 + }, + { + "epoch": 0.024963090859044617, + "grad_norm": 1.0577391386032104, + "learning_rate": 8.318879252835225e-06, + "loss": 0.3424, + "step": 1247 + }, + { + "epoch": 0.02498310937617296, + "grad_norm": 1.8957799673080444, + "learning_rate": 8.325550366911275e-06, + "loss": 0.8909, + "step": 1248 + }, + { + "epoch": 0.025003127893301304, + "grad_norm": 0.9904258847236633, + "learning_rate": 8.332221480987326e-06, + "loss": 0.3093, + "step": 1249 + }, + { + "epoch": 0.025023146410429648, + "grad_norm": 1.0696825981140137, + "learning_rate": 8.338892595063375e-06, + "loss": 0.3253, + "step": 1250 + }, + { + "epoch": 0.025043164927557992, + "grad_norm": 1.7849441766738892, + "learning_rate": 8.345563709139426e-06, + "loss": 0.8421, + "step": 1251 + }, + { + "epoch": 0.025063183444686336, + "grad_norm": 1.359799861907959, + "learning_rate": 8.352234823215478e-06, + "loss": 0.3552, + "step": 1252 + }, + { + "epoch": 0.02508320196181468, + "grad_norm": 1.925704836845398, + "learning_rate": 8.358905937291529e-06, + "loss": 0.7573, + "step": 1253 + }, + { + "epoch": 0.025103220478943023, + "grad_norm": 1.1005263328552246, + "learning_rate": 8.36557705136758e-06, + "loss": 0.3555, + "step": 1254 + }, + { + "epoch": 0.025123238996071367, + "grad_norm": 0.9861041903495789, + "learning_rate": 8.37224816544363e-06, + "loss": 0.3261, + "step": 1255 + }, + { + "epoch": 0.02514325751319971, + "grad_norm": 1.0013712644577026, + "learning_rate": 8.37891927951968e-06, + "loss": 0.2819, + "step": 1256 + }, + { + "epoch": 0.025163276030328054, + "grad_norm": 1.0836821794509888, + "learning_rate": 8.385590393595732e-06, + "loss": 0.3295, + "step": 1257 + }, + { + "epoch": 0.025183294547456398, + "grad_norm": 1.0602467060089111, + "learning_rate": 8.392261507671781e-06, + "loss": 0.3311, + "step": 1258 + }, + { + "epoch": 0.025203313064584742, + "grad_norm": 1.9728256464004517, + "learning_rate": 8.398932621747832e-06, + "loss": 0.8646, + "step": 1259 + }, + { + "epoch": 0.025223331581713086, + "grad_norm": 1.12026047706604, + "learning_rate": 8.405603735823884e-06, + "loss": 0.3553, + "step": 1260 + }, + { + "epoch": 0.02524335009884143, + "grad_norm": 1.0526251792907715, + "learning_rate": 8.412274849899935e-06, + "loss": 0.3276, + "step": 1261 + }, + { + "epoch": 0.025263368615969773, + "grad_norm": 1.1091495752334595, + "learning_rate": 8.418945963975984e-06, + "loss": 0.367, + "step": 1262 + }, + { + "epoch": 0.025283387133098117, + "grad_norm": 1.0740065574645996, + "learning_rate": 8.425617078052036e-06, + "loss": 0.353, + "step": 1263 + }, + { + "epoch": 0.02530340565022646, + "grad_norm": 1.0349940061569214, + "learning_rate": 8.432288192128087e-06, + "loss": 0.2969, + "step": 1264 + }, + { + "epoch": 0.025323424167354804, + "grad_norm": 1.2059777975082397, + "learning_rate": 8.438959306204136e-06, + "loss": 0.3409, + "step": 1265 + }, + { + "epoch": 0.025343442684483148, + "grad_norm": 1.2385181188583374, + "learning_rate": 8.445630420280187e-06, + "loss": 0.3379, + "step": 1266 + }, + { + "epoch": 0.02536346120161149, + "grad_norm": 1.1391537189483643, + "learning_rate": 8.452301534356239e-06, + "loss": 0.3036, + "step": 1267 + }, + { + "epoch": 0.025383479718739835, + "grad_norm": 1.2156697511672974, + "learning_rate": 8.458972648432288e-06, + "loss": 0.3323, + "step": 1268 + }, + { + "epoch": 0.02540349823586818, + "grad_norm": 1.1899728775024414, + "learning_rate": 8.46564376250834e-06, + "loss": 0.3385, + "step": 1269 + }, + { + "epoch": 0.025423516752996523, + "grad_norm": 1.860045313835144, + "learning_rate": 8.47231487658439e-06, + "loss": 0.88, + "step": 1270 + }, + { + "epoch": 0.025443535270124867, + "grad_norm": 1.1740342378616333, + "learning_rate": 8.478985990660442e-06, + "loss": 0.3524, + "step": 1271 + }, + { + "epoch": 0.02546355378725321, + "grad_norm": 1.1045976877212524, + "learning_rate": 8.485657104736493e-06, + "loss": 0.3379, + "step": 1272 + }, + { + "epoch": 0.025483572304381554, + "grad_norm": 1.2288641929626465, + "learning_rate": 8.492328218812542e-06, + "loss": 0.3234, + "step": 1273 + }, + { + "epoch": 0.025503590821509898, + "grad_norm": 1.0416815280914307, + "learning_rate": 8.498999332888593e-06, + "loss": 0.3325, + "step": 1274 + }, + { + "epoch": 0.02552360933863824, + "grad_norm": 1.0879848003387451, + "learning_rate": 8.505670446964643e-06, + "loss": 0.3221, + "step": 1275 + }, + { + "epoch": 0.025543627855766585, + "grad_norm": 1.0648183822631836, + "learning_rate": 8.512341561040694e-06, + "loss": 0.346, + "step": 1276 + }, + { + "epoch": 0.02556364637289493, + "grad_norm": 1.1195354461669922, + "learning_rate": 8.519012675116745e-06, + "loss": 0.3129, + "step": 1277 + }, + { + "epoch": 0.025583664890023273, + "grad_norm": 1.1268938779830933, + "learning_rate": 8.525683789192796e-06, + "loss": 0.33, + "step": 1278 + }, + { + "epoch": 0.025603683407151617, + "grad_norm": 1.7228801250457764, + "learning_rate": 8.532354903268848e-06, + "loss": 0.8583, + "step": 1279 + }, + { + "epoch": 0.02562370192427996, + "grad_norm": 1.0704169273376465, + "learning_rate": 8.539026017344897e-06, + "loss": 0.3448, + "step": 1280 + }, + { + "epoch": 0.025643720441408304, + "grad_norm": 1.045586347579956, + "learning_rate": 8.545697131420948e-06, + "loss": 0.2951, + "step": 1281 + }, + { + "epoch": 0.025663738958536648, + "grad_norm": 1.0112544298171997, + "learning_rate": 8.552368245497e-06, + "loss": 0.2983, + "step": 1282 + }, + { + "epoch": 0.02568375747566499, + "grad_norm": 1.1209088563919067, + "learning_rate": 8.559039359573049e-06, + "loss": 0.3096, + "step": 1283 + }, + { + "epoch": 0.025703775992793335, + "grad_norm": 1.0915536880493164, + "learning_rate": 8.5657104736491e-06, + "loss": 0.3234, + "step": 1284 + }, + { + "epoch": 0.02572379450992168, + "grad_norm": 1.0997188091278076, + "learning_rate": 8.57238158772515e-06, + "loss": 0.3704, + "step": 1285 + }, + { + "epoch": 0.025743813027050023, + "grad_norm": 1.049125075340271, + "learning_rate": 8.5790527018012e-06, + "loss": 0.3452, + "step": 1286 + }, + { + "epoch": 0.025763831544178366, + "grad_norm": 1.7042368650436401, + "learning_rate": 8.585723815877252e-06, + "loss": 0.8727, + "step": 1287 + }, + { + "epoch": 0.02578385006130671, + "grad_norm": 1.1369749307632446, + "learning_rate": 8.592394929953303e-06, + "loss": 0.3755, + "step": 1288 + }, + { + "epoch": 0.025803868578435054, + "grad_norm": 1.437058925628662, + "learning_rate": 8.599066044029354e-06, + "loss": 0.3124, + "step": 1289 + }, + { + "epoch": 0.025823887095563398, + "grad_norm": 1.2607816457748413, + "learning_rate": 8.605737158105404e-06, + "loss": 0.3358, + "step": 1290 + }, + { + "epoch": 0.02584390561269174, + "grad_norm": 1.0703694820404053, + "learning_rate": 8.612408272181455e-06, + "loss": 0.4077, + "step": 1291 + }, + { + "epoch": 0.025863924129820085, + "grad_norm": 0.9493529796600342, + "learning_rate": 8.619079386257506e-06, + "loss": 0.2862, + "step": 1292 + }, + { + "epoch": 0.02588394264694843, + "grad_norm": 1.1109678745269775, + "learning_rate": 8.625750500333556e-06, + "loss": 0.3509, + "step": 1293 + }, + { + "epoch": 0.025903961164076773, + "grad_norm": 1.1079449653625488, + "learning_rate": 8.632421614409607e-06, + "loss": 0.3442, + "step": 1294 + }, + { + "epoch": 0.025923979681205116, + "grad_norm": 1.0219770669937134, + "learning_rate": 8.639092728485658e-06, + "loss": 0.3694, + "step": 1295 + }, + { + "epoch": 0.02594399819833346, + "grad_norm": 1.1148929595947266, + "learning_rate": 8.64576384256171e-06, + "loss": 0.3076, + "step": 1296 + }, + { + "epoch": 0.025964016715461804, + "grad_norm": 1.0050580501556396, + "learning_rate": 8.65243495663776e-06, + "loss": 0.2987, + "step": 1297 + }, + { + "epoch": 0.025984035232590148, + "grad_norm": 2.071463108062744, + "learning_rate": 8.65910607071381e-06, + "loss": 0.777, + "step": 1298 + }, + { + "epoch": 0.026004053749718488, + "grad_norm": 1.0292030572891235, + "learning_rate": 8.665777184789861e-06, + "loss": 0.3213, + "step": 1299 + }, + { + "epoch": 0.02602407226684683, + "grad_norm": 1.0564597845077515, + "learning_rate": 8.67244829886591e-06, + "loss": 0.3188, + "step": 1300 + }, + { + "epoch": 0.026044090783975175, + "grad_norm": 1.0800395011901855, + "learning_rate": 8.679119412941962e-06, + "loss": 0.3596, + "step": 1301 + }, + { + "epoch": 0.02606410930110352, + "grad_norm": 0.9918821454048157, + "learning_rate": 8.685790527018013e-06, + "loss": 0.3041, + "step": 1302 + }, + { + "epoch": 0.026084127818231863, + "grad_norm": 1.2692756652832031, + "learning_rate": 8.692461641094062e-06, + "loss": 0.3727, + "step": 1303 + }, + { + "epoch": 0.026104146335360207, + "grad_norm": 1.1341508626937866, + "learning_rate": 8.699132755170114e-06, + "loss": 0.3873, + "step": 1304 + }, + { + "epoch": 0.02612416485248855, + "grad_norm": 1.1268997192382812, + "learning_rate": 8.705803869246165e-06, + "loss": 0.3376, + "step": 1305 + }, + { + "epoch": 0.026144183369616894, + "grad_norm": 1.7238949537277222, + "learning_rate": 8.712474983322216e-06, + "loss": 0.3064, + "step": 1306 + }, + { + "epoch": 0.026164201886745238, + "grad_norm": 1.8890324831008911, + "learning_rate": 8.719146097398267e-06, + "loss": 0.8405, + "step": 1307 + }, + { + "epoch": 0.02618422040387358, + "grad_norm": 1.8250318765640259, + "learning_rate": 8.725817211474317e-06, + "loss": 0.8659, + "step": 1308 + }, + { + "epoch": 0.026204238921001925, + "grad_norm": 1.0690343379974365, + "learning_rate": 8.732488325550368e-06, + "loss": 0.3322, + "step": 1309 + }, + { + "epoch": 0.02622425743813027, + "grad_norm": 1.0759838819503784, + "learning_rate": 8.739159439626417e-06, + "loss": 0.3398, + "step": 1310 + }, + { + "epoch": 0.026244275955258613, + "grad_norm": 1.133201003074646, + "learning_rate": 8.745830553702469e-06, + "loss": 0.3313, + "step": 1311 + }, + { + "epoch": 0.026264294472386956, + "grad_norm": 1.0321481227874756, + "learning_rate": 8.75250166777852e-06, + "loss": 0.2964, + "step": 1312 + }, + { + "epoch": 0.0262843129895153, + "grad_norm": 1.0806580781936646, + "learning_rate": 8.759172781854571e-06, + "loss": 0.3514, + "step": 1313 + }, + { + "epoch": 0.026304331506643644, + "grad_norm": 1.12923002243042, + "learning_rate": 8.765843895930622e-06, + "loss": 0.3532, + "step": 1314 + }, + { + "epoch": 0.026324350023771988, + "grad_norm": 0.9444090127944946, + "learning_rate": 8.772515010006672e-06, + "loss": 0.3317, + "step": 1315 + }, + { + "epoch": 0.02634436854090033, + "grad_norm": 1.8287254571914673, + "learning_rate": 8.779186124082723e-06, + "loss": 0.9029, + "step": 1316 + }, + { + "epoch": 0.026364387058028675, + "grad_norm": 1.0615464448928833, + "learning_rate": 8.785857238158774e-06, + "loss": 0.3537, + "step": 1317 + }, + { + "epoch": 0.02638440557515702, + "grad_norm": 1.0996452569961548, + "learning_rate": 8.792528352234823e-06, + "loss": 0.33, + "step": 1318 + }, + { + "epoch": 0.026404424092285363, + "grad_norm": 1.0422141551971436, + "learning_rate": 8.799199466310875e-06, + "loss": 0.3433, + "step": 1319 + }, + { + "epoch": 0.026424442609413706, + "grad_norm": 1.0550864934921265, + "learning_rate": 8.805870580386924e-06, + "loss": 0.3201, + "step": 1320 + }, + { + "epoch": 0.02644446112654205, + "grad_norm": 1.1128175258636475, + "learning_rate": 8.812541694462975e-06, + "loss": 0.3418, + "step": 1321 + }, + { + "epoch": 0.026464479643670394, + "grad_norm": 1.2235355377197266, + "learning_rate": 8.819212808539026e-06, + "loss": 0.3146, + "step": 1322 + }, + { + "epoch": 0.026484498160798738, + "grad_norm": 1.1604535579681396, + "learning_rate": 8.825883922615078e-06, + "loss": 0.3556, + "step": 1323 + }, + { + "epoch": 0.02650451667792708, + "grad_norm": 1.0513086318969727, + "learning_rate": 8.832555036691129e-06, + "loss": 0.2871, + "step": 1324 + }, + { + "epoch": 0.026524535195055425, + "grad_norm": 0.9920433759689331, + "learning_rate": 8.839226150767178e-06, + "loss": 0.3048, + "step": 1325 + }, + { + "epoch": 0.02654455371218377, + "grad_norm": 1.0793688297271729, + "learning_rate": 8.84589726484323e-06, + "loss": 0.3054, + "step": 1326 + }, + { + "epoch": 0.026564572229312113, + "grad_norm": 1.1242942810058594, + "learning_rate": 8.85256837891928e-06, + "loss": 0.336, + "step": 1327 + }, + { + "epoch": 0.026584590746440456, + "grad_norm": 1.7033774852752686, + "learning_rate": 8.85923949299533e-06, + "loss": 0.7882, + "step": 1328 + }, + { + "epoch": 0.0266046092635688, + "grad_norm": 1.1023041009902954, + "learning_rate": 8.865910607071381e-06, + "loss": 0.343, + "step": 1329 + }, + { + "epoch": 0.026624627780697144, + "grad_norm": 1.036612868309021, + "learning_rate": 8.872581721147432e-06, + "loss": 0.3434, + "step": 1330 + }, + { + "epoch": 0.026644646297825487, + "grad_norm": 1.1210730075836182, + "learning_rate": 8.879252835223484e-06, + "loss": 0.2563, + "step": 1331 + }, + { + "epoch": 0.02666466481495383, + "grad_norm": 1.1667319536209106, + "learning_rate": 8.885923949299535e-06, + "loss": 0.3579, + "step": 1332 + }, + { + "epoch": 0.026684683332082175, + "grad_norm": 1.244649052619934, + "learning_rate": 8.892595063375584e-06, + "loss": 0.315, + "step": 1333 + }, + { + "epoch": 0.02670470184921052, + "grad_norm": 1.0468945503234863, + "learning_rate": 8.899266177451636e-06, + "loss": 0.3375, + "step": 1334 + }, + { + "epoch": 0.026724720366338862, + "grad_norm": 1.027390956878662, + "learning_rate": 8.905937291527685e-06, + "loss": 0.3219, + "step": 1335 + }, + { + "epoch": 0.026744738883467206, + "grad_norm": 1.1912212371826172, + "learning_rate": 8.912608405603736e-06, + "loss": 0.3486, + "step": 1336 + }, + { + "epoch": 0.02676475740059555, + "grad_norm": 1.1187552213668823, + "learning_rate": 8.919279519679787e-06, + "loss": 0.3327, + "step": 1337 + }, + { + "epoch": 0.026784775917723894, + "grad_norm": 1.0465471744537354, + "learning_rate": 8.925950633755837e-06, + "loss": 0.3227, + "step": 1338 + }, + { + "epoch": 0.026804794434852237, + "grad_norm": 1.2274906635284424, + "learning_rate": 8.932621747831888e-06, + "loss": 0.3703, + "step": 1339 + }, + { + "epoch": 0.02682481295198058, + "grad_norm": 1.171038031578064, + "learning_rate": 8.93929286190794e-06, + "loss": 0.3526, + "step": 1340 + }, + { + "epoch": 0.026844831469108925, + "grad_norm": 1.1936107873916626, + "learning_rate": 8.94596397598399e-06, + "loss": 0.3823, + "step": 1341 + }, + { + "epoch": 0.02686484998623727, + "grad_norm": 1.1462758779525757, + "learning_rate": 8.952635090060042e-06, + "loss": 0.3499, + "step": 1342 + }, + { + "epoch": 0.026884868503365612, + "grad_norm": 1.0331107378005981, + "learning_rate": 8.959306204136091e-06, + "loss": 0.3312, + "step": 1343 + }, + { + "epoch": 0.026904887020493956, + "grad_norm": 1.1644614934921265, + "learning_rate": 8.965977318212142e-06, + "loss": 0.3538, + "step": 1344 + }, + { + "epoch": 0.0269249055376223, + "grad_norm": 1.2726134061813354, + "learning_rate": 8.972648432288192e-06, + "loss": 0.3778, + "step": 1345 + }, + { + "epoch": 0.026944924054750644, + "grad_norm": 1.1586723327636719, + "learning_rate": 8.979319546364243e-06, + "loss": 0.3352, + "step": 1346 + }, + { + "epoch": 0.026964942571878987, + "grad_norm": 1.0955003499984741, + "learning_rate": 8.985990660440294e-06, + "loss": 0.3769, + "step": 1347 + }, + { + "epoch": 0.02698496108900733, + "grad_norm": 1.0283830165863037, + "learning_rate": 8.992661774516345e-06, + "loss": 0.3352, + "step": 1348 + }, + { + "epoch": 0.027004979606135675, + "grad_norm": 1.0646953582763672, + "learning_rate": 8.999332888592396e-06, + "loss": 0.3359, + "step": 1349 + }, + { + "epoch": 0.02702499812326402, + "grad_norm": 1.1613490581512451, + "learning_rate": 9.006004002668446e-06, + "loss": 0.3516, + "step": 1350 + }, + { + "epoch": 0.027045016640392362, + "grad_norm": 1.3094879388809204, + "learning_rate": 9.012675116744497e-06, + "loss": 0.3151, + "step": 1351 + }, + { + "epoch": 0.027065035157520706, + "grad_norm": 1.1699349880218506, + "learning_rate": 9.019346230820548e-06, + "loss": 0.3655, + "step": 1352 + }, + { + "epoch": 0.02708505367464905, + "grad_norm": 1.2512881755828857, + "learning_rate": 9.026017344896598e-06, + "loss": 0.3187, + "step": 1353 + }, + { + "epoch": 0.027105072191777393, + "grad_norm": 1.0029000043869019, + "learning_rate": 9.032688458972649e-06, + "loss": 0.3616, + "step": 1354 + }, + { + "epoch": 0.027125090708905737, + "grad_norm": 1.0639808177947998, + "learning_rate": 9.0393595730487e-06, + "loss": 0.3011, + "step": 1355 + }, + { + "epoch": 0.02714510922603408, + "grad_norm": 1.0217839479446411, + "learning_rate": 9.046030687124751e-06, + "loss": 0.3597, + "step": 1356 + }, + { + "epoch": 0.027165127743162425, + "grad_norm": 1.1317204236984253, + "learning_rate": 9.0527018012008e-06, + "loss": 0.2739, + "step": 1357 + }, + { + "epoch": 0.02718514626029077, + "grad_norm": 1.2191599607467651, + "learning_rate": 9.059372915276852e-06, + "loss": 0.3378, + "step": 1358 + }, + { + "epoch": 0.027205164777419112, + "grad_norm": 1.187351942062378, + "learning_rate": 9.066044029352903e-06, + "loss": 0.3121, + "step": 1359 + }, + { + "epoch": 0.027225183294547456, + "grad_norm": 1.8308689594268799, + "learning_rate": 9.072715143428953e-06, + "loss": 0.7976, + "step": 1360 + }, + { + "epoch": 0.0272452018116758, + "grad_norm": 1.0801403522491455, + "learning_rate": 9.079386257505004e-06, + "loss": 0.3356, + "step": 1361 + }, + { + "epoch": 0.027265220328804143, + "grad_norm": 1.4171199798583984, + "learning_rate": 9.086057371581055e-06, + "loss": 0.3078, + "step": 1362 + }, + { + "epoch": 0.027285238845932487, + "grad_norm": 1.0697338581085205, + "learning_rate": 9.092728485657105e-06, + "loss": 0.3075, + "step": 1363 + }, + { + "epoch": 0.02730525736306083, + "grad_norm": 1.078705906867981, + "learning_rate": 9.099399599733156e-06, + "loss": 0.3422, + "step": 1364 + }, + { + "epoch": 0.027325275880189175, + "grad_norm": 1.0665252208709717, + "learning_rate": 9.106070713809207e-06, + "loss": 0.3197, + "step": 1365 + }, + { + "epoch": 0.02734529439731752, + "grad_norm": 1.1059143543243408, + "learning_rate": 9.112741827885258e-06, + "loss": 0.3019, + "step": 1366 + }, + { + "epoch": 0.027365312914445862, + "grad_norm": 1.0601049661636353, + "learning_rate": 9.11941294196131e-06, + "loss": 0.3372, + "step": 1367 + }, + { + "epoch": 0.027385331431574206, + "grad_norm": 1.1464101076126099, + "learning_rate": 9.126084056037359e-06, + "loss": 0.3439, + "step": 1368 + }, + { + "epoch": 0.02740534994870255, + "grad_norm": 1.938177466392517, + "learning_rate": 9.13275517011341e-06, + "loss": 0.7863, + "step": 1369 + }, + { + "epoch": 0.027425368465830893, + "grad_norm": 1.1011924743652344, + "learning_rate": 9.13942628418946e-06, + "loss": 0.3362, + "step": 1370 + }, + { + "epoch": 0.027445386982959237, + "grad_norm": 0.9635065793991089, + "learning_rate": 9.14609739826551e-06, + "loss": 0.2964, + "step": 1371 + }, + { + "epoch": 0.02746540550008758, + "grad_norm": 1.0150116682052612, + "learning_rate": 9.152768512341562e-06, + "loss": 0.3076, + "step": 1372 + }, + { + "epoch": 0.027485424017215924, + "grad_norm": 0.9806089401245117, + "learning_rate": 9.159439626417613e-06, + "loss": 0.2993, + "step": 1373 + }, + { + "epoch": 0.027505442534344268, + "grad_norm": 1.0405566692352295, + "learning_rate": 9.166110740493664e-06, + "loss": 0.3122, + "step": 1374 + }, + { + "epoch": 0.027525461051472612, + "grad_norm": 1.2521451711654663, + "learning_rate": 9.172781854569714e-06, + "loss": 0.329, + "step": 1375 + }, + { + "epoch": 0.027545479568600956, + "grad_norm": 1.0396900177001953, + "learning_rate": 9.179452968645765e-06, + "loss": 0.3639, + "step": 1376 + }, + { + "epoch": 0.0275654980857293, + "grad_norm": 1.0235275030136108, + "learning_rate": 9.186124082721816e-06, + "loss": 0.3535, + "step": 1377 + }, + { + "epoch": 0.027585516602857643, + "grad_norm": 1.9738744497299194, + "learning_rate": 9.192795196797865e-06, + "loss": 0.8797, + "step": 1378 + }, + { + "epoch": 0.027605535119985987, + "grad_norm": 2.04766583442688, + "learning_rate": 9.199466310873917e-06, + "loss": 0.8798, + "step": 1379 + }, + { + "epoch": 0.02762555363711433, + "grad_norm": 1.0344334840774536, + "learning_rate": 9.206137424949966e-06, + "loss": 0.3559, + "step": 1380 + }, + { + "epoch": 0.027645572154242674, + "grad_norm": 1.0900436639785767, + "learning_rate": 9.212808539026017e-06, + "loss": 0.3305, + "step": 1381 + }, + { + "epoch": 0.027665590671371018, + "grad_norm": 1.0929325819015503, + "learning_rate": 9.219479653102069e-06, + "loss": 0.3231, + "step": 1382 + }, + { + "epoch": 0.027685609188499362, + "grad_norm": 1.0366623401641846, + "learning_rate": 9.22615076717812e-06, + "loss": 0.3123, + "step": 1383 + }, + { + "epoch": 0.027705627705627706, + "grad_norm": 1.2142359018325806, + "learning_rate": 9.232821881254171e-06, + "loss": 0.3406, + "step": 1384 + }, + { + "epoch": 0.02772564622275605, + "grad_norm": 1.2368052005767822, + "learning_rate": 9.23949299533022e-06, + "loss": 0.3975, + "step": 1385 + }, + { + "epoch": 0.027745664739884393, + "grad_norm": 0.9918451309204102, + "learning_rate": 9.246164109406272e-06, + "loss": 0.325, + "step": 1386 + }, + { + "epoch": 0.027765683257012737, + "grad_norm": 1.2680548429489136, + "learning_rate": 9.252835223482323e-06, + "loss": 0.3314, + "step": 1387 + }, + { + "epoch": 0.02778570177414108, + "grad_norm": 1.2299551963806152, + "learning_rate": 9.259506337558372e-06, + "loss": 0.3008, + "step": 1388 + }, + { + "epoch": 0.027805720291269424, + "grad_norm": 1.979215383529663, + "learning_rate": 9.266177451634423e-06, + "loss": 0.777, + "step": 1389 + }, + { + "epoch": 0.027825738808397768, + "grad_norm": 1.1245867013931274, + "learning_rate": 9.272848565710475e-06, + "loss": 0.3249, + "step": 1390 + }, + { + "epoch": 0.027845757325526112, + "grad_norm": 1.1792199611663818, + "learning_rate": 9.279519679786526e-06, + "loss": 0.3718, + "step": 1391 + }, + { + "epoch": 0.027865775842654456, + "grad_norm": 1.0712285041809082, + "learning_rate": 9.286190793862577e-06, + "loss": 0.3365, + "step": 1392 + }, + { + "epoch": 0.0278857943597828, + "grad_norm": 1.075650930404663, + "learning_rate": 9.292861907938626e-06, + "loss": 0.3493, + "step": 1393 + }, + { + "epoch": 0.027905812876911143, + "grad_norm": 1.084611415863037, + "learning_rate": 9.299533022014678e-06, + "loss": 0.3509, + "step": 1394 + }, + { + "epoch": 0.027925831394039487, + "grad_norm": 0.9927454590797424, + "learning_rate": 9.306204136090727e-06, + "loss": 0.3309, + "step": 1395 + }, + { + "epoch": 0.02794584991116783, + "grad_norm": 1.2301838397979736, + "learning_rate": 9.312875250166778e-06, + "loss": 0.3317, + "step": 1396 + }, + { + "epoch": 0.027965868428296174, + "grad_norm": 1.0411440134048462, + "learning_rate": 9.31954636424283e-06, + "loss": 0.336, + "step": 1397 + }, + { + "epoch": 0.027985886945424518, + "grad_norm": 1.2461317777633667, + "learning_rate": 9.326217478318879e-06, + "loss": 0.3282, + "step": 1398 + }, + { + "epoch": 0.02800590546255286, + "grad_norm": 1.0360419750213623, + "learning_rate": 9.33288859239493e-06, + "loss": 0.3358, + "step": 1399 + }, + { + "epoch": 0.028025923979681205, + "grad_norm": 1.104390263557434, + "learning_rate": 9.339559706470981e-06, + "loss": 0.3428, + "step": 1400 + }, + { + "epoch": 0.02804594249680955, + "grad_norm": 1.0890368223190308, + "learning_rate": 9.346230820547032e-06, + "loss": 0.3504, + "step": 1401 + }, + { + "epoch": 0.028065961013937893, + "grad_norm": 1.1064190864562988, + "learning_rate": 9.352901934623084e-06, + "loss": 0.3378, + "step": 1402 + }, + { + "epoch": 0.028085979531066237, + "grad_norm": 1.2431039810180664, + "learning_rate": 9.359573048699133e-06, + "loss": 0.3014, + "step": 1403 + }, + { + "epoch": 0.02810599804819458, + "grad_norm": 1.1736196279525757, + "learning_rate": 9.366244162775184e-06, + "loss": 0.2937, + "step": 1404 + }, + { + "epoch": 0.028126016565322924, + "grad_norm": 1.0165077447891235, + "learning_rate": 9.372915276851234e-06, + "loss": 0.3144, + "step": 1405 + }, + { + "epoch": 0.028146035082451268, + "grad_norm": 1.8517041206359863, + "learning_rate": 9.379586390927285e-06, + "loss": 0.8447, + "step": 1406 + }, + { + "epoch": 0.02816605359957961, + "grad_norm": 1.0733088254928589, + "learning_rate": 9.386257505003336e-06, + "loss": 0.3848, + "step": 1407 + }, + { + "epoch": 0.028186072116707955, + "grad_norm": 1.1276354789733887, + "learning_rate": 9.392928619079387e-06, + "loss": 0.3171, + "step": 1408 + }, + { + "epoch": 0.0282060906338363, + "grad_norm": 1.13438081741333, + "learning_rate": 9.399599733155439e-06, + "loss": 0.3489, + "step": 1409 + }, + { + "epoch": 0.028226109150964643, + "grad_norm": 1.031434178352356, + "learning_rate": 9.40627084723149e-06, + "loss": 0.3808, + "step": 1410 + }, + { + "epoch": 0.028246127668092987, + "grad_norm": 1.0034047365188599, + "learning_rate": 9.41294196130754e-06, + "loss": 0.2585, + "step": 1411 + }, + { + "epoch": 0.02826614618522133, + "grad_norm": 1.0342501401901245, + "learning_rate": 9.41961307538359e-06, + "loss": 0.3521, + "step": 1412 + }, + { + "epoch": 0.028286164702349674, + "grad_norm": 1.1927624940872192, + "learning_rate": 9.42628418945964e-06, + "loss": 0.357, + "step": 1413 + }, + { + "epoch": 0.028306183219478018, + "grad_norm": 1.0651521682739258, + "learning_rate": 9.432955303535691e-06, + "loss": 0.3535, + "step": 1414 + }, + { + "epoch": 0.02832620173660636, + "grad_norm": 1.7441775798797607, + "learning_rate": 9.439626417611742e-06, + "loss": 0.8889, + "step": 1415 + }, + { + "epoch": 0.028346220253734705, + "grad_norm": 1.0994638204574585, + "learning_rate": 9.446297531687792e-06, + "loss": 0.33, + "step": 1416 + }, + { + "epoch": 0.02836623877086305, + "grad_norm": 1.1209523677825928, + "learning_rate": 9.452968645763843e-06, + "loss": 0.3106, + "step": 1417 + }, + { + "epoch": 0.028386257287991393, + "grad_norm": 1.0673469305038452, + "learning_rate": 9.459639759839894e-06, + "loss": 0.3501, + "step": 1418 + }, + { + "epoch": 0.028406275805119736, + "grad_norm": 1.1716209650039673, + "learning_rate": 9.466310873915945e-06, + "loss": 0.3623, + "step": 1419 + }, + { + "epoch": 0.02842629432224808, + "grad_norm": 1.0322257280349731, + "learning_rate": 9.472981987991996e-06, + "loss": 0.3525, + "step": 1420 + }, + { + "epoch": 0.028446312839376424, + "grad_norm": 1.1828871965408325, + "learning_rate": 9.479653102068046e-06, + "loss": 0.3, + "step": 1421 + }, + { + "epoch": 0.028466331356504768, + "grad_norm": 1.2000142335891724, + "learning_rate": 9.486324216144097e-06, + "loss": 0.331, + "step": 1422 + }, + { + "epoch": 0.02848634987363311, + "grad_norm": 1.1380128860473633, + "learning_rate": 9.492995330220147e-06, + "loss": 0.3543, + "step": 1423 + }, + { + "epoch": 0.028506368390761455, + "grad_norm": 1.2292174100875854, + "learning_rate": 9.499666444296198e-06, + "loss": 0.3678, + "step": 1424 + }, + { + "epoch": 0.0285263869078898, + "grad_norm": 1.7162597179412842, + "learning_rate": 9.506337558372249e-06, + "loss": 0.865, + "step": 1425 + }, + { + "epoch": 0.028546405425018143, + "grad_norm": 1.0871999263763428, + "learning_rate": 9.5130086724483e-06, + "loss": 0.3082, + "step": 1426 + }, + { + "epoch": 0.028566423942146486, + "grad_norm": 1.9022663831710815, + "learning_rate": 9.519679786524351e-06, + "loss": 0.8485, + "step": 1427 + }, + { + "epoch": 0.02858644245927483, + "grad_norm": 1.9444665908813477, + "learning_rate": 9.5263509006004e-06, + "loss": 0.8023, + "step": 1428 + }, + { + "epoch": 0.028606460976403174, + "grad_norm": 1.3108558654785156, + "learning_rate": 9.533022014676452e-06, + "loss": 0.3649, + "step": 1429 + }, + { + "epoch": 0.028626479493531518, + "grad_norm": 1.734695315361023, + "learning_rate": 9.539693128752503e-06, + "loss": 0.8466, + "step": 1430 + }, + { + "epoch": 0.02864649801065986, + "grad_norm": 1.1107368469238281, + "learning_rate": 9.546364242828553e-06, + "loss": 0.3386, + "step": 1431 + }, + { + "epoch": 0.028666516527788205, + "grad_norm": 1.112393856048584, + "learning_rate": 9.553035356904604e-06, + "loss": 0.3365, + "step": 1432 + }, + { + "epoch": 0.02868653504491655, + "grad_norm": 2.0055887699127197, + "learning_rate": 9.559706470980653e-06, + "loss": 0.8227, + "step": 1433 + }, + { + "epoch": 0.028706553562044893, + "grad_norm": 1.0430375337600708, + "learning_rate": 9.566377585056705e-06, + "loss": 0.3067, + "step": 1434 + }, + { + "epoch": 0.028726572079173236, + "grad_norm": 1.2919635772705078, + "learning_rate": 9.573048699132756e-06, + "loss": 0.3539, + "step": 1435 + }, + { + "epoch": 0.02874659059630158, + "grad_norm": 1.0541462898254395, + "learning_rate": 9.579719813208807e-06, + "loss": 0.3187, + "step": 1436 + }, + { + "epoch": 0.028766609113429924, + "grad_norm": 1.8622363805770874, + "learning_rate": 9.586390927284858e-06, + "loss": 0.844, + "step": 1437 + }, + { + "epoch": 0.028786627630558267, + "grad_norm": 1.0886917114257812, + "learning_rate": 9.593062041360908e-06, + "loss": 0.3922, + "step": 1438 + }, + { + "epoch": 0.02880664614768661, + "grad_norm": 1.0653184652328491, + "learning_rate": 9.599733155436959e-06, + "loss": 0.387, + "step": 1439 + }, + { + "epoch": 0.028826664664814955, + "grad_norm": 1.0717154741287231, + "learning_rate": 9.60640426951301e-06, + "loss": 0.334, + "step": 1440 + }, + { + "epoch": 0.0288466831819433, + "grad_norm": 1.0925018787384033, + "learning_rate": 9.61307538358906e-06, + "loss": 0.2919, + "step": 1441 + }, + { + "epoch": 0.028866701699071642, + "grad_norm": 1.0993189811706543, + "learning_rate": 9.61974649766511e-06, + "loss": 0.3022, + "step": 1442 + }, + { + "epoch": 0.028886720216199986, + "grad_norm": 1.0238358974456787, + "learning_rate": 9.626417611741162e-06, + "loss": 0.3212, + "step": 1443 + }, + { + "epoch": 0.02890673873332833, + "grad_norm": 1.1169497966766357, + "learning_rate": 9.633088725817213e-06, + "loss": 0.3512, + "step": 1444 + }, + { + "epoch": 0.028926757250456674, + "grad_norm": 1.0994998216629028, + "learning_rate": 9.639759839893264e-06, + "loss": 0.3295, + "step": 1445 + }, + { + "epoch": 0.028946775767585017, + "grad_norm": 1.1093957424163818, + "learning_rate": 9.646430953969314e-06, + "loss": 0.3266, + "step": 1446 + }, + { + "epoch": 0.02896679428471336, + "grad_norm": 1.2534593343734741, + "learning_rate": 9.653102068045365e-06, + "loss": 0.3609, + "step": 1447 + }, + { + "epoch": 0.028986812801841705, + "grad_norm": 1.1917108297348022, + "learning_rate": 9.659773182121414e-06, + "loss": 0.3303, + "step": 1448 + }, + { + "epoch": 0.02900683131897005, + "grad_norm": 1.019168496131897, + "learning_rate": 9.666444296197465e-06, + "loss": 0.3163, + "step": 1449 + }, + { + "epoch": 0.029026849836098392, + "grad_norm": 1.1420341730117798, + "learning_rate": 9.673115410273517e-06, + "loss": 0.3291, + "step": 1450 + }, + { + "epoch": 0.029046868353226736, + "grad_norm": 0.9351878762245178, + "learning_rate": 9.679786524349568e-06, + "loss": 0.2857, + "step": 1451 + }, + { + "epoch": 0.02906688687035508, + "grad_norm": 1.1828585863113403, + "learning_rate": 9.686457638425617e-06, + "loss": 0.3405, + "step": 1452 + }, + { + "epoch": 0.029086905387483424, + "grad_norm": 1.318495273590088, + "learning_rate": 9.693128752501669e-06, + "loss": 0.3456, + "step": 1453 + }, + { + "epoch": 0.029106923904611767, + "grad_norm": 1.1992021799087524, + "learning_rate": 9.69979986657772e-06, + "loss": 0.3225, + "step": 1454 + }, + { + "epoch": 0.02912694242174011, + "grad_norm": 1.0422725677490234, + "learning_rate": 9.706470980653771e-06, + "loss": 0.3524, + "step": 1455 + }, + { + "epoch": 0.029146960938868455, + "grad_norm": 1.0147497653961182, + "learning_rate": 9.71314209472982e-06, + "loss": 0.3261, + "step": 1456 + }, + { + "epoch": 0.0291669794559968, + "grad_norm": 1.088788628578186, + "learning_rate": 9.719813208805872e-06, + "loss": 0.3084, + "step": 1457 + }, + { + "epoch": 0.029186997973125142, + "grad_norm": 1.3319823741912842, + "learning_rate": 9.726484322881921e-06, + "loss": 0.3193, + "step": 1458 + }, + { + "epoch": 0.029207016490253486, + "grad_norm": 1.1466509103775024, + "learning_rate": 9.733155436957972e-06, + "loss": 0.2916, + "step": 1459 + }, + { + "epoch": 0.02922703500738183, + "grad_norm": 1.1676831245422363, + "learning_rate": 9.739826551034023e-06, + "loss": 0.3458, + "step": 1460 + }, + { + "epoch": 0.029247053524510173, + "grad_norm": 1.036505937576294, + "learning_rate": 9.746497665110075e-06, + "loss": 0.3707, + "step": 1461 + }, + { + "epoch": 0.029267072041638517, + "grad_norm": 1.037941336631775, + "learning_rate": 9.753168779186126e-06, + "loss": 0.3195, + "step": 1462 + }, + { + "epoch": 0.02928709055876686, + "grad_norm": 1.0457934141159058, + "learning_rate": 9.759839893262175e-06, + "loss": 0.2948, + "step": 1463 + }, + { + "epoch": 0.029307109075895205, + "grad_norm": 1.2480484247207642, + "learning_rate": 9.766511007338226e-06, + "loss": 0.3368, + "step": 1464 + }, + { + "epoch": 0.02932712759302355, + "grad_norm": 1.1208112239837646, + "learning_rate": 9.773182121414278e-06, + "loss": 0.289, + "step": 1465 + }, + { + "epoch": 0.029347146110151892, + "grad_norm": 1.1873396635055542, + "learning_rate": 9.779853235490327e-06, + "loss": 0.3379, + "step": 1466 + }, + { + "epoch": 0.029367164627280236, + "grad_norm": 1.2219895124435425, + "learning_rate": 9.786524349566378e-06, + "loss": 0.2802, + "step": 1467 + }, + { + "epoch": 0.02938718314440858, + "grad_norm": 1.121216893196106, + "learning_rate": 9.79319546364243e-06, + "loss": 0.3295, + "step": 1468 + }, + { + "epoch": 0.029407201661536923, + "grad_norm": 1.0648289918899536, + "learning_rate": 9.79986657771848e-06, + "loss": 0.3325, + "step": 1469 + }, + { + "epoch": 0.029427220178665267, + "grad_norm": 2.0970330238342285, + "learning_rate": 9.806537691794532e-06, + "loss": 0.8431, + "step": 1470 + }, + { + "epoch": 0.02944723869579361, + "grad_norm": 1.0162684917449951, + "learning_rate": 9.813208805870581e-06, + "loss": 0.3389, + "step": 1471 + }, + { + "epoch": 0.02946725721292195, + "grad_norm": 0.9914348125457764, + "learning_rate": 9.819879919946632e-06, + "loss": 0.2884, + "step": 1472 + }, + { + "epoch": 0.029487275730050295, + "grad_norm": 1.0210304260253906, + "learning_rate": 9.826551034022682e-06, + "loss": 0.3036, + "step": 1473 + }, + { + "epoch": 0.02950729424717864, + "grad_norm": 1.121854543685913, + "learning_rate": 9.833222148098733e-06, + "loss": 0.3218, + "step": 1474 + }, + { + "epoch": 0.029527312764306982, + "grad_norm": 1.78669011592865, + "learning_rate": 9.839893262174784e-06, + "loss": 0.8658, + "step": 1475 + }, + { + "epoch": 0.029547331281435326, + "grad_norm": 1.0754499435424805, + "learning_rate": 9.846564376250834e-06, + "loss": 0.3522, + "step": 1476 + }, + { + "epoch": 0.02956734979856367, + "grad_norm": 1.0502017736434937, + "learning_rate": 9.853235490326885e-06, + "loss": 0.3608, + "step": 1477 + }, + { + "epoch": 0.029587368315692014, + "grad_norm": 1.1093580722808838, + "learning_rate": 9.859906604402936e-06, + "loss": 0.3648, + "step": 1478 + }, + { + "epoch": 0.029607386832820357, + "grad_norm": 1.1386771202087402, + "learning_rate": 9.866577718478987e-06, + "loss": 0.3064, + "step": 1479 + }, + { + "epoch": 0.0296274053499487, + "grad_norm": 1.094520926475525, + "learning_rate": 9.873248832555039e-06, + "loss": 0.3248, + "step": 1480 + }, + { + "epoch": 0.029647423867077045, + "grad_norm": 1.9170621633529663, + "learning_rate": 9.879919946631088e-06, + "loss": 0.8368, + "step": 1481 + }, + { + "epoch": 0.02966744238420539, + "grad_norm": 0.9649667143821716, + "learning_rate": 9.88659106070714e-06, + "loss": 0.3364, + "step": 1482 + }, + { + "epoch": 0.029687460901333732, + "grad_norm": 1.1160008907318115, + "learning_rate": 9.893262174783189e-06, + "loss": 0.3113, + "step": 1483 + }, + { + "epoch": 0.029707479418462076, + "grad_norm": 1.0532255172729492, + "learning_rate": 9.89993328885924e-06, + "loss": 0.282, + "step": 1484 + }, + { + "epoch": 0.02972749793559042, + "grad_norm": 1.0824507474899292, + "learning_rate": 9.906604402935291e-06, + "loss": 0.2934, + "step": 1485 + }, + { + "epoch": 0.029747516452718763, + "grad_norm": 1.0200762748718262, + "learning_rate": 9.913275517011342e-06, + "loss": 0.2778, + "step": 1486 + }, + { + "epoch": 0.029767534969847107, + "grad_norm": 1.1599574089050293, + "learning_rate": 9.919946631087393e-06, + "loss": 0.3264, + "step": 1487 + }, + { + "epoch": 0.02978755348697545, + "grad_norm": 1.0729082822799683, + "learning_rate": 9.926617745163443e-06, + "loss": 0.3596, + "step": 1488 + }, + { + "epoch": 0.029807572004103795, + "grad_norm": 1.0751487016677856, + "learning_rate": 9.933288859239494e-06, + "loss": 0.3535, + "step": 1489 + }, + { + "epoch": 0.02982759052123214, + "grad_norm": 1.228847622871399, + "learning_rate": 9.939959973315545e-06, + "loss": 0.3687, + "step": 1490 + }, + { + "epoch": 0.029847609038360482, + "grad_norm": 1.6613085269927979, + "learning_rate": 9.946631087391595e-06, + "loss": 0.8432, + "step": 1491 + }, + { + "epoch": 0.029867627555488826, + "grad_norm": 1.2017768621444702, + "learning_rate": 9.953302201467646e-06, + "loss": 0.3379, + "step": 1492 + }, + { + "epoch": 0.02988764607261717, + "grad_norm": 1.0490432977676392, + "learning_rate": 9.959973315543695e-06, + "loss": 0.3022, + "step": 1493 + }, + { + "epoch": 0.029907664589745513, + "grad_norm": 1.0102957487106323, + "learning_rate": 9.966644429619747e-06, + "loss": 0.2961, + "step": 1494 + }, + { + "epoch": 0.029927683106873857, + "grad_norm": 1.1349836587905884, + "learning_rate": 9.973315543695798e-06, + "loss": 0.3402, + "step": 1495 + }, + { + "epoch": 0.0299477016240022, + "grad_norm": 1.1525903940200806, + "learning_rate": 9.979986657771849e-06, + "loss": 0.3563, + "step": 1496 + }, + { + "epoch": 0.029967720141130545, + "grad_norm": 1.8539518117904663, + "learning_rate": 9.9866577718479e-06, + "loss": 0.8926, + "step": 1497 + }, + { + "epoch": 0.029987738658258888, + "grad_norm": 1.2212989330291748, + "learning_rate": 9.99332888592395e-06, + "loss": 0.2938, + "step": 1498 + }, + { + "epoch": 0.030007757175387232, + "grad_norm": 1.0062109231948853, + "learning_rate": 1e-05, + "loss": 0.3644, + "step": 1499 + }, + { + "epoch": 0.030027775692515576, + "grad_norm": 1.1754419803619385, + "learning_rate": 9.99999998949054e-06, + "loss": 0.3617, + "step": 1500 + }, + { + "epoch": 0.03004779420964392, + "grad_norm": 1.0757583379745483, + "learning_rate": 9.999999957962152e-06, + "loss": 0.3416, + "step": 1501 + }, + { + "epoch": 0.030067812726772263, + "grad_norm": 1.0048353672027588, + "learning_rate": 9.999999905414844e-06, + "loss": 0.3111, + "step": 1502 + }, + { + "epoch": 0.030087831243900607, + "grad_norm": 1.1286895275115967, + "learning_rate": 9.999999831848611e-06, + "loss": 0.3865, + "step": 1503 + }, + { + "epoch": 0.03010784976102895, + "grad_norm": 1.7797127962112427, + "learning_rate": 9.999999737263452e-06, + "loss": 0.9848, + "step": 1504 + }, + { + "epoch": 0.030127868278157294, + "grad_norm": 1.0798958539962769, + "learning_rate": 9.999999621659374e-06, + "loss": 0.3598, + "step": 1505 + }, + { + "epoch": 0.030147886795285638, + "grad_norm": 1.1404478549957275, + "learning_rate": 9.999999485036371e-06, + "loss": 0.3355, + "step": 1506 + }, + { + "epoch": 0.030167905312413982, + "grad_norm": 1.0478119850158691, + "learning_rate": 9.999999327394448e-06, + "loss": 0.301, + "step": 1507 + }, + { + "epoch": 0.030187923829542326, + "grad_norm": 1.3970191478729248, + "learning_rate": 9.999999148733603e-06, + "loss": 0.3395, + "step": 1508 + }, + { + "epoch": 0.03020794234667067, + "grad_norm": 1.3390991687774658, + "learning_rate": 9.999998949053838e-06, + "loss": 0.3415, + "step": 1509 + }, + { + "epoch": 0.030227960863799013, + "grad_norm": 1.084641695022583, + "learning_rate": 9.999998728355153e-06, + "loss": 0.3309, + "step": 1510 + }, + { + "epoch": 0.030247979380927357, + "grad_norm": 1.2019455432891846, + "learning_rate": 9.99999848663755e-06, + "loss": 0.3091, + "step": 1511 + }, + { + "epoch": 0.0302679978980557, + "grad_norm": 1.1363435983657837, + "learning_rate": 9.999998223901029e-06, + "loss": 0.3569, + "step": 1512 + }, + { + "epoch": 0.030288016415184044, + "grad_norm": 1.1568135023117065, + "learning_rate": 9.999997940145591e-06, + "loss": 0.3613, + "step": 1513 + }, + { + "epoch": 0.030308034932312388, + "grad_norm": 1.2323588132858276, + "learning_rate": 9.999997635371238e-06, + "loss": 0.3356, + "step": 1514 + }, + { + "epoch": 0.030328053449440732, + "grad_norm": 1.1573387384414673, + "learning_rate": 9.999997309577972e-06, + "loss": 0.3411, + "step": 1515 + }, + { + "epoch": 0.030348071966569076, + "grad_norm": 1.0529884099960327, + "learning_rate": 9.999996962765791e-06, + "loss": 0.3193, + "step": 1516 + }, + { + "epoch": 0.03036809048369742, + "grad_norm": 1.0283738374710083, + "learning_rate": 9.9999965949347e-06, + "loss": 0.3384, + "step": 1517 + }, + { + "epoch": 0.030388109000825763, + "grad_norm": 1.0076981782913208, + "learning_rate": 9.999996206084698e-06, + "loss": 0.3571, + "step": 1518 + }, + { + "epoch": 0.030408127517954107, + "grad_norm": 1.0041804313659668, + "learning_rate": 9.99999579621579e-06, + "loss": 0.3131, + "step": 1519 + }, + { + "epoch": 0.03042814603508245, + "grad_norm": 1.0425035953521729, + "learning_rate": 9.999995365327975e-06, + "loss": 0.3488, + "step": 1520 + }, + { + "epoch": 0.030448164552210794, + "grad_norm": 1.0413280725479126, + "learning_rate": 9.999994913421256e-06, + "loss": 0.2828, + "step": 1521 + }, + { + "epoch": 0.030468183069339138, + "grad_norm": 1.1379567384719849, + "learning_rate": 9.999994440495635e-06, + "loss": 0.3381, + "step": 1522 + }, + { + "epoch": 0.03048820158646748, + "grad_norm": 1.0906318426132202, + "learning_rate": 9.999993946551112e-06, + "loss": 0.3283, + "step": 1523 + }, + { + "epoch": 0.030508220103595825, + "grad_norm": 1.1192667484283447, + "learning_rate": 9.999993431587691e-06, + "loss": 0.2874, + "step": 1524 + }, + { + "epoch": 0.03052823862072417, + "grad_norm": 1.0821757316589355, + "learning_rate": 9.999992895605373e-06, + "loss": 0.3515, + "step": 1525 + }, + { + "epoch": 0.030548257137852513, + "grad_norm": 1.1917606592178345, + "learning_rate": 9.99999233860416e-06, + "loss": 0.3687, + "step": 1526 + }, + { + "epoch": 0.030568275654980857, + "grad_norm": 1.0650259256362915, + "learning_rate": 9.999991760584058e-06, + "loss": 0.3664, + "step": 1527 + }, + { + "epoch": 0.0305882941721092, + "grad_norm": 1.0984774827957153, + "learning_rate": 9.999991161545064e-06, + "loss": 0.2743, + "step": 1528 + }, + { + "epoch": 0.030608312689237544, + "grad_norm": 1.1877598762512207, + "learning_rate": 9.999990541487186e-06, + "loss": 0.2531, + "step": 1529 + }, + { + "epoch": 0.030628331206365888, + "grad_norm": 1.0520845651626587, + "learning_rate": 9.99998990041042e-06, + "loss": 0.3529, + "step": 1530 + }, + { + "epoch": 0.03064834972349423, + "grad_norm": 0.976096510887146, + "learning_rate": 9.999989238314775e-06, + "loss": 0.3395, + "step": 1531 + }, + { + "epoch": 0.030668368240622575, + "grad_norm": 1.101197361946106, + "learning_rate": 9.99998855520025e-06, + "loss": 0.3541, + "step": 1532 + }, + { + "epoch": 0.03068838675775092, + "grad_norm": 0.9900884032249451, + "learning_rate": 9.999987851066852e-06, + "loss": 0.3574, + "step": 1533 + }, + { + "epoch": 0.030708405274879263, + "grad_norm": 1.0374438762664795, + "learning_rate": 9.999987125914578e-06, + "loss": 0.3813, + "step": 1534 + }, + { + "epoch": 0.030728423792007607, + "grad_norm": 1.140032172203064, + "learning_rate": 9.999986379743436e-06, + "loss": 0.3501, + "step": 1535 + }, + { + "epoch": 0.03074844230913595, + "grad_norm": 1.1986942291259766, + "learning_rate": 9.999985612553427e-06, + "loss": 0.3661, + "step": 1536 + }, + { + "epoch": 0.030768460826264294, + "grad_norm": 1.1275585889816284, + "learning_rate": 9.999984824344553e-06, + "loss": 0.4002, + "step": 1537 + }, + { + "epoch": 0.030788479343392638, + "grad_norm": 0.9958738088607788, + "learning_rate": 9.99998401511682e-06, + "loss": 0.3351, + "step": 1538 + }, + { + "epoch": 0.03080849786052098, + "grad_norm": 1.0842604637145996, + "learning_rate": 9.99998318487023e-06, + "loss": 0.3802, + "step": 1539 + }, + { + "epoch": 0.030828516377649325, + "grad_norm": 1.2425230741500854, + "learning_rate": 9.999982333604787e-06, + "loss": 0.3327, + "step": 1540 + }, + { + "epoch": 0.03084853489477767, + "grad_norm": 1.045507788658142, + "learning_rate": 9.999981461320493e-06, + "loss": 0.3491, + "step": 1541 + }, + { + "epoch": 0.030868553411906013, + "grad_norm": 1.7933317422866821, + "learning_rate": 9.999980568017353e-06, + "loss": 0.8778, + "step": 1542 + }, + { + "epoch": 0.030888571929034356, + "grad_norm": 1.1540610790252686, + "learning_rate": 9.999979653695373e-06, + "loss": 0.3371, + "step": 1543 + }, + { + "epoch": 0.0309085904461627, + "grad_norm": 1.1266117095947266, + "learning_rate": 9.999978718354552e-06, + "loss": 0.3527, + "step": 1544 + }, + { + "epoch": 0.030928608963291044, + "grad_norm": 1.0819565057754517, + "learning_rate": 9.999977761994898e-06, + "loss": 0.3911, + "step": 1545 + }, + { + "epoch": 0.030948627480419388, + "grad_norm": 1.8606925010681152, + "learning_rate": 9.999976784616413e-06, + "loss": 0.8295, + "step": 1546 + }, + { + "epoch": 0.03096864599754773, + "grad_norm": 1.061132550239563, + "learning_rate": 9.999975786219101e-06, + "loss": 0.3111, + "step": 1547 + }, + { + "epoch": 0.030988664514676075, + "grad_norm": 1.1494570970535278, + "learning_rate": 9.999974766802967e-06, + "loss": 0.302, + "step": 1548 + }, + { + "epoch": 0.03100868303180442, + "grad_norm": 1.037157416343689, + "learning_rate": 9.999973726368017e-06, + "loss": 0.3584, + "step": 1549 + }, + { + "epoch": 0.031028701548932763, + "grad_norm": 1.011846899986267, + "learning_rate": 9.999972664914251e-06, + "loss": 0.2939, + "step": 1550 + }, + { + "epoch": 0.031048720066061106, + "grad_norm": 0.9896759390830994, + "learning_rate": 9.999971582441677e-06, + "loss": 0.2888, + "step": 1551 + }, + { + "epoch": 0.03106873858318945, + "grad_norm": 1.155336856842041, + "learning_rate": 9.999970478950298e-06, + "loss": 0.3106, + "step": 1552 + }, + { + "epoch": 0.031088757100317794, + "grad_norm": 1.1576656103134155, + "learning_rate": 9.99996935444012e-06, + "loss": 0.3045, + "step": 1553 + }, + { + "epoch": 0.031108775617446138, + "grad_norm": 1.0395667552947998, + "learning_rate": 9.999968208911146e-06, + "loss": 0.3323, + "step": 1554 + }, + { + "epoch": 0.03112879413457448, + "grad_norm": 1.1641321182250977, + "learning_rate": 9.999967042363383e-06, + "loss": 0.3391, + "step": 1555 + }, + { + "epoch": 0.031148812651702825, + "grad_norm": 1.1154643297195435, + "learning_rate": 9.999965854796834e-06, + "loss": 0.2756, + "step": 1556 + }, + { + "epoch": 0.03116883116883117, + "grad_norm": 1.0999890565872192, + "learning_rate": 9.999964646211504e-06, + "loss": 0.2857, + "step": 1557 + }, + { + "epoch": 0.031188849685959513, + "grad_norm": 1.0001853704452515, + "learning_rate": 9.999963416607398e-06, + "loss": 0.3296, + "step": 1558 + }, + { + "epoch": 0.031208868203087856, + "grad_norm": 1.048601508140564, + "learning_rate": 9.999962165984522e-06, + "loss": 0.3624, + "step": 1559 + }, + { + "epoch": 0.0312288867202162, + "grad_norm": 1.0443121194839478, + "learning_rate": 9.999960894342883e-06, + "loss": 0.3355, + "step": 1560 + }, + { + "epoch": 0.031248905237344544, + "grad_norm": 1.0706007480621338, + "learning_rate": 9.999959601682484e-06, + "loss": 0.2842, + "step": 1561 + }, + { + "epoch": 0.031268923754472884, + "grad_norm": 1.193405270576477, + "learning_rate": 9.999958288003329e-06, + "loss": 0.4105, + "step": 1562 + }, + { + "epoch": 0.03128894227160123, + "grad_norm": 1.1663804054260254, + "learning_rate": 9.999956953305425e-06, + "loss": 0.3806, + "step": 1563 + }, + { + "epoch": 0.03130896078872957, + "grad_norm": 1.8991243839263916, + "learning_rate": 9.999955597588779e-06, + "loss": 0.8256, + "step": 1564 + }, + { + "epoch": 0.031328979305857915, + "grad_norm": 1.0166816711425781, + "learning_rate": 9.999954220853395e-06, + "loss": 0.3142, + "step": 1565 + }, + { + "epoch": 0.03134899782298626, + "grad_norm": 1.2141555547714233, + "learning_rate": 9.999952823099282e-06, + "loss": 0.3702, + "step": 1566 + }, + { + "epoch": 0.0313690163401146, + "grad_norm": 1.1713138818740845, + "learning_rate": 9.999951404326443e-06, + "loss": 0.3201, + "step": 1567 + }, + { + "epoch": 0.031389034857242946, + "grad_norm": 1.2579386234283447, + "learning_rate": 9.99994996453488e-06, + "loss": 0.3107, + "step": 1568 + }, + { + "epoch": 0.03140905337437129, + "grad_norm": 2.054107427597046, + "learning_rate": 9.999948503724609e-06, + "loss": 0.8517, + "step": 1569 + }, + { + "epoch": 0.031429071891499634, + "grad_norm": 1.2674864530563354, + "learning_rate": 9.999947021895626e-06, + "loss": 0.3215, + "step": 1570 + }, + { + "epoch": 0.03144909040862798, + "grad_norm": 1.0627853870391846, + "learning_rate": 9.999945519047945e-06, + "loss": 0.3365, + "step": 1571 + }, + { + "epoch": 0.03146910892575632, + "grad_norm": 1.025206446647644, + "learning_rate": 9.999943995181568e-06, + "loss": 0.3291, + "step": 1572 + }, + { + "epoch": 0.031489127442884665, + "grad_norm": 1.1861144304275513, + "learning_rate": 9.9999424502965e-06, + "loss": 0.3588, + "step": 1573 + }, + { + "epoch": 0.03150914596001301, + "grad_norm": 1.1547447443008423, + "learning_rate": 9.999940884392753e-06, + "loss": 0.3157, + "step": 1574 + }, + { + "epoch": 0.03152916447714135, + "grad_norm": 1.0610146522521973, + "learning_rate": 9.99993929747033e-06, + "loss": 0.3728, + "step": 1575 + }, + { + "epoch": 0.031549182994269696, + "grad_norm": 1.974165439605713, + "learning_rate": 9.999937689529236e-06, + "loss": 0.8253, + "step": 1576 + }, + { + "epoch": 0.03156920151139804, + "grad_norm": 1.065116047859192, + "learning_rate": 9.999936060569482e-06, + "loss": 0.3129, + "step": 1577 + }, + { + "epoch": 0.031589220028526384, + "grad_norm": 1.108520269393921, + "learning_rate": 9.999934410591073e-06, + "loss": 0.3897, + "step": 1578 + }, + { + "epoch": 0.03160923854565473, + "grad_norm": 1.1879100799560547, + "learning_rate": 9.999932739594014e-06, + "loss": 0.3426, + "step": 1579 + }, + { + "epoch": 0.03162925706278307, + "grad_norm": 1.250545859336853, + "learning_rate": 9.999931047578315e-06, + "loss": 0.3339, + "step": 1580 + }, + { + "epoch": 0.031649275579911415, + "grad_norm": 1.0216411352157593, + "learning_rate": 9.999929334543984e-06, + "loss": 0.3411, + "step": 1581 + }, + { + "epoch": 0.03166929409703976, + "grad_norm": 1.0261826515197754, + "learning_rate": 9.999927600491022e-06, + "loss": 0.3095, + "step": 1582 + }, + { + "epoch": 0.0316893126141681, + "grad_norm": 1.142471432685852, + "learning_rate": 9.999925845419443e-06, + "loss": 0.3837, + "step": 1583 + }, + { + "epoch": 0.031709331131296446, + "grad_norm": 1.1693854331970215, + "learning_rate": 9.99992406932925e-06, + "loss": 0.3363, + "step": 1584 + }, + { + "epoch": 0.03172934964842479, + "grad_norm": 0.9992582201957703, + "learning_rate": 9.999922272220454e-06, + "loss": 0.3299, + "step": 1585 + }, + { + "epoch": 0.031749368165553134, + "grad_norm": 1.1510417461395264, + "learning_rate": 9.999920454093059e-06, + "loss": 0.3404, + "step": 1586 + }, + { + "epoch": 0.03176938668268148, + "grad_norm": 1.1009752750396729, + "learning_rate": 9.999918614947075e-06, + "loss": 0.3055, + "step": 1587 + }, + { + "epoch": 0.03178940519980982, + "grad_norm": 1.892890214920044, + "learning_rate": 9.99991675478251e-06, + "loss": 0.8901, + "step": 1588 + }, + { + "epoch": 0.031809423716938165, + "grad_norm": 1.1551491022109985, + "learning_rate": 9.99991487359937e-06, + "loss": 0.3099, + "step": 1589 + }, + { + "epoch": 0.03182944223406651, + "grad_norm": 1.1236600875854492, + "learning_rate": 9.999912971397665e-06, + "loss": 0.3099, + "step": 1590 + }, + { + "epoch": 0.03184946075119485, + "grad_norm": 1.2350786924362183, + "learning_rate": 9.9999110481774e-06, + "loss": 0.3856, + "step": 1591 + }, + { + "epoch": 0.031869479268323196, + "grad_norm": 1.171216607093811, + "learning_rate": 9.999909103938588e-06, + "loss": 0.3476, + "step": 1592 + }, + { + "epoch": 0.03188949778545154, + "grad_norm": 1.0433286428451538, + "learning_rate": 9.999907138681231e-06, + "loss": 0.3396, + "step": 1593 + }, + { + "epoch": 0.031909516302579884, + "grad_norm": 1.1002634763717651, + "learning_rate": 9.999905152405343e-06, + "loss": 0.3128, + "step": 1594 + }, + { + "epoch": 0.03192953481970823, + "grad_norm": 1.0884828567504883, + "learning_rate": 9.999903145110927e-06, + "loss": 0.3295, + "step": 1595 + }, + { + "epoch": 0.03194955333683657, + "grad_norm": 1.054162621498108, + "learning_rate": 9.999901116797995e-06, + "loss": 0.2738, + "step": 1596 + }, + { + "epoch": 0.031969571853964915, + "grad_norm": 1.9274765253067017, + "learning_rate": 9.999899067466557e-06, + "loss": 0.8865, + "step": 1597 + }, + { + "epoch": 0.03198959037109326, + "grad_norm": 1.1334692239761353, + "learning_rate": 9.999896997116616e-06, + "loss": 0.3421, + "step": 1598 + }, + { + "epoch": 0.0320096088882216, + "grad_norm": 1.0379023551940918, + "learning_rate": 9.999894905748186e-06, + "loss": 0.3232, + "step": 1599 + }, + { + "epoch": 0.032029627405349946, + "grad_norm": 1.0749579668045044, + "learning_rate": 9.999892793361272e-06, + "loss": 0.3494, + "step": 1600 + }, + { + "epoch": 0.03204964592247829, + "grad_norm": 1.0384395122528076, + "learning_rate": 9.999890659955887e-06, + "loss": 0.3337, + "step": 1601 + }, + { + "epoch": 0.032069664439606634, + "grad_norm": 1.9814587831497192, + "learning_rate": 9.999888505532036e-06, + "loss": 0.786, + "step": 1602 + }, + { + "epoch": 0.03208968295673498, + "grad_norm": 1.0932834148406982, + "learning_rate": 9.999886330089731e-06, + "loss": 0.3059, + "step": 1603 + }, + { + "epoch": 0.03210970147386332, + "grad_norm": 1.0051442384719849, + "learning_rate": 9.999884133628978e-06, + "loss": 0.3033, + "step": 1604 + }, + { + "epoch": 0.032129719990991665, + "grad_norm": 1.0065243244171143, + "learning_rate": 9.99988191614979e-06, + "loss": 0.3029, + "step": 1605 + }, + { + "epoch": 0.03214973850812001, + "grad_norm": 1.0791497230529785, + "learning_rate": 9.999879677652175e-06, + "loss": 0.359, + "step": 1606 + }, + { + "epoch": 0.03216975702524835, + "grad_norm": 1.1998486518859863, + "learning_rate": 9.999877418136138e-06, + "loss": 0.36, + "step": 1607 + }, + { + "epoch": 0.032189775542376696, + "grad_norm": 1.2373602390289307, + "learning_rate": 9.999875137601697e-06, + "loss": 0.2875, + "step": 1608 + }, + { + "epoch": 0.03220979405950504, + "grad_norm": 1.0355348587036133, + "learning_rate": 9.999872836048853e-06, + "loss": 0.3221, + "step": 1609 + }, + { + "epoch": 0.03222981257663338, + "grad_norm": 1.0796796083450317, + "learning_rate": 9.999870513477622e-06, + "loss": 0.3418, + "step": 1610 + }, + { + "epoch": 0.03224983109376173, + "grad_norm": 1.0321221351623535, + "learning_rate": 9.999868169888012e-06, + "loss": 0.3593, + "step": 1611 + }, + { + "epoch": 0.03226984961089007, + "grad_norm": 1.0579622983932495, + "learning_rate": 9.99986580528003e-06, + "loss": 0.3061, + "step": 1612 + }, + { + "epoch": 0.032289868128018415, + "grad_norm": 1.2582190036773682, + "learning_rate": 9.99986341965369e-06, + "loss": 0.3458, + "step": 1613 + }, + { + "epoch": 0.03230988664514676, + "grad_norm": 1.9242020845413208, + "learning_rate": 9.999861013008999e-06, + "loss": 0.8417, + "step": 1614 + }, + { + "epoch": 0.0323299051622751, + "grad_norm": 1.088417649269104, + "learning_rate": 9.999858585345968e-06, + "loss": 0.3455, + "step": 1615 + }, + { + "epoch": 0.032349923679403446, + "grad_norm": 1.0996322631835938, + "learning_rate": 9.999856136664607e-06, + "loss": 0.3477, + "step": 1616 + }, + { + "epoch": 0.03236994219653179, + "grad_norm": 1.9115993976593018, + "learning_rate": 9.999853666964929e-06, + "loss": 0.9447, + "step": 1617 + }, + { + "epoch": 0.03238996071366013, + "grad_norm": 1.90034019947052, + "learning_rate": 9.999851176246941e-06, + "loss": 0.779, + "step": 1618 + }, + { + "epoch": 0.03240997923078848, + "grad_norm": 1.987371802330017, + "learning_rate": 9.999848664510654e-06, + "loss": 0.8305, + "step": 1619 + }, + { + "epoch": 0.03242999774791682, + "grad_norm": 1.0926971435546875, + "learning_rate": 9.99984613175608e-06, + "loss": 0.2993, + "step": 1620 + }, + { + "epoch": 0.032450016265045165, + "grad_norm": 1.2204536199569702, + "learning_rate": 9.999843577983228e-06, + "loss": 0.3053, + "step": 1621 + }, + { + "epoch": 0.03247003478217351, + "grad_norm": 1.0145082473754883, + "learning_rate": 9.99984100319211e-06, + "loss": 0.3415, + "step": 1622 + }, + { + "epoch": 0.03249005329930185, + "grad_norm": 1.0230543613433838, + "learning_rate": 9.999838407382737e-06, + "loss": 0.2928, + "step": 1623 + }, + { + "epoch": 0.032510071816430196, + "grad_norm": 1.2050732374191284, + "learning_rate": 9.999835790555119e-06, + "loss": 0.3348, + "step": 1624 + }, + { + "epoch": 0.03253009033355854, + "grad_norm": 1.1623824834823608, + "learning_rate": 9.999833152709267e-06, + "loss": 0.3501, + "step": 1625 + }, + { + "epoch": 0.03255010885068688, + "grad_norm": 1.122550368309021, + "learning_rate": 9.999830493845193e-06, + "loss": 0.3737, + "step": 1626 + }, + { + "epoch": 0.03257012736781523, + "grad_norm": 1.224914312362671, + "learning_rate": 9.999827813962907e-06, + "loss": 0.3993, + "step": 1627 + }, + { + "epoch": 0.03259014588494357, + "grad_norm": 1.0966848134994507, + "learning_rate": 9.99982511306242e-06, + "loss": 0.3489, + "step": 1628 + }, + { + "epoch": 0.032610164402071914, + "grad_norm": 1.112405776977539, + "learning_rate": 9.999822391143746e-06, + "loss": 0.3137, + "step": 1629 + }, + { + "epoch": 0.03263018291920026, + "grad_norm": 1.1113168001174927, + "learning_rate": 9.999819648206895e-06, + "loss": 0.3292, + "step": 1630 + }, + { + "epoch": 0.0326502014363286, + "grad_norm": 1.0824131965637207, + "learning_rate": 9.999816884251876e-06, + "loss": 0.3028, + "step": 1631 + }, + { + "epoch": 0.032670219953456946, + "grad_norm": 1.1000314950942993, + "learning_rate": 9.999814099278704e-06, + "loss": 0.3478, + "step": 1632 + }, + { + "epoch": 0.03269023847058529, + "grad_norm": 0.9842463731765747, + "learning_rate": 9.99981129328739e-06, + "loss": 0.335, + "step": 1633 + }, + { + "epoch": 0.03271025698771363, + "grad_norm": 1.0947173833847046, + "learning_rate": 9.999808466277943e-06, + "loss": 0.3218, + "step": 1634 + }, + { + "epoch": 0.03273027550484198, + "grad_norm": 1.0321308374404907, + "learning_rate": 9.99980561825038e-06, + "loss": 0.2809, + "step": 1635 + }, + { + "epoch": 0.03275029402197032, + "grad_norm": 1.1661434173583984, + "learning_rate": 9.999802749204708e-06, + "loss": 0.3951, + "step": 1636 + }, + { + "epoch": 0.032770312539098664, + "grad_norm": 1.4552842378616333, + "learning_rate": 9.999799859140942e-06, + "loss": 0.3204, + "step": 1637 + }, + { + "epoch": 0.03279033105622701, + "grad_norm": 0.9347301721572876, + "learning_rate": 9.999796948059093e-06, + "loss": 0.2988, + "step": 1638 + }, + { + "epoch": 0.03281034957335535, + "grad_norm": 1.1018630266189575, + "learning_rate": 9.999794015959174e-06, + "loss": 0.3045, + "step": 1639 + }, + { + "epoch": 0.032830368090483696, + "grad_norm": 1.0216457843780518, + "learning_rate": 9.999791062841197e-06, + "loss": 0.2841, + "step": 1640 + }, + { + "epoch": 0.03285038660761204, + "grad_norm": 1.1165132522583008, + "learning_rate": 9.999788088705176e-06, + "loss": 0.293, + "step": 1641 + }, + { + "epoch": 0.03287040512474038, + "grad_norm": 1.8954179286956787, + "learning_rate": 9.999785093551119e-06, + "loss": 0.8742, + "step": 1642 + }, + { + "epoch": 0.03289042364186873, + "grad_norm": 1.7928073406219482, + "learning_rate": 9.999782077379043e-06, + "loss": 0.84, + "step": 1643 + }, + { + "epoch": 0.03291044215899707, + "grad_norm": 1.263667345046997, + "learning_rate": 9.99977904018896e-06, + "loss": 0.3682, + "step": 1644 + }, + { + "epoch": 0.032930460676125414, + "grad_norm": 1.0866490602493286, + "learning_rate": 9.99977598198088e-06, + "loss": 0.2957, + "step": 1645 + }, + { + "epoch": 0.03295047919325376, + "grad_norm": 1.0578480958938599, + "learning_rate": 9.999772902754819e-06, + "loss": 0.3504, + "step": 1646 + }, + { + "epoch": 0.0329704977103821, + "grad_norm": 1.0971423387527466, + "learning_rate": 9.999769802510787e-06, + "loss": 0.3106, + "step": 1647 + }, + { + "epoch": 0.032990516227510445, + "grad_norm": 1.0912978649139404, + "learning_rate": 9.9997666812488e-06, + "loss": 0.3463, + "step": 1648 + }, + { + "epoch": 0.03301053474463879, + "grad_norm": 0.9772865772247314, + "learning_rate": 9.99976353896887e-06, + "loss": 0.2789, + "step": 1649 + }, + { + "epoch": 0.03303055326176713, + "grad_norm": 1.2106389999389648, + "learning_rate": 9.999760375671009e-06, + "loss": 0.3396, + "step": 1650 + }, + { + "epoch": 0.03305057177889548, + "grad_norm": 0.9566429853439331, + "learning_rate": 9.999757191355234e-06, + "loss": 0.3115, + "step": 1651 + }, + { + "epoch": 0.03307059029602382, + "grad_norm": 0.9831568002700806, + "learning_rate": 9.999753986021552e-06, + "loss": 0.3033, + "step": 1652 + }, + { + "epoch": 0.033090608813152164, + "grad_norm": 1.1353390216827393, + "learning_rate": 9.999750759669981e-06, + "loss": 0.3153, + "step": 1653 + }, + { + "epoch": 0.03311062733028051, + "grad_norm": 1.0740755796432495, + "learning_rate": 9.999747512300535e-06, + "loss": 0.3214, + "step": 1654 + }, + { + "epoch": 0.03313064584740885, + "grad_norm": 0.9828640222549438, + "learning_rate": 9.999744243913226e-06, + "loss": 0.3781, + "step": 1655 + }, + { + "epoch": 0.033150664364537195, + "grad_norm": 1.8873589038848877, + "learning_rate": 9.999740954508068e-06, + "loss": 0.8671, + "step": 1656 + }, + { + "epoch": 0.03317068288166554, + "grad_norm": 1.1236472129821777, + "learning_rate": 9.999737644085078e-06, + "loss": 0.3436, + "step": 1657 + }, + { + "epoch": 0.03319070139879388, + "grad_norm": 1.4543882608413696, + "learning_rate": 9.999734312644262e-06, + "loss": 0.3545, + "step": 1658 + }, + { + "epoch": 0.03321071991592223, + "grad_norm": 1.1351834535598755, + "learning_rate": 9.999730960185641e-06, + "loss": 0.3193, + "step": 1659 + }, + { + "epoch": 0.03323073843305057, + "grad_norm": 1.071945071220398, + "learning_rate": 9.999727586709227e-06, + "loss": 0.3109, + "step": 1660 + }, + { + "epoch": 0.033250756950178914, + "grad_norm": 1.2814466953277588, + "learning_rate": 9.999724192215034e-06, + "loss": 0.3535, + "step": 1661 + }, + { + "epoch": 0.03327077546730726, + "grad_norm": 1.3107901811599731, + "learning_rate": 9.999720776703076e-06, + "loss": 0.3455, + "step": 1662 + }, + { + "epoch": 0.0332907939844356, + "grad_norm": 1.0964138507843018, + "learning_rate": 9.99971734017337e-06, + "loss": 0.3981, + "step": 1663 + }, + { + "epoch": 0.033310812501563945, + "grad_norm": 1.2454878091812134, + "learning_rate": 9.999713882625925e-06, + "loss": 0.3335, + "step": 1664 + }, + { + "epoch": 0.03333083101869229, + "grad_norm": 1.2444007396697998, + "learning_rate": 9.999710404060761e-06, + "loss": 0.3372, + "step": 1665 + }, + { + "epoch": 0.03335084953582063, + "grad_norm": 1.245661735534668, + "learning_rate": 9.99970690447789e-06, + "loss": 0.3151, + "step": 1666 + }, + { + "epoch": 0.033370868052948977, + "grad_norm": 1.1431536674499512, + "learning_rate": 9.999703383877329e-06, + "loss": 0.3318, + "step": 1667 + }, + { + "epoch": 0.03339088657007732, + "grad_norm": 1.1071300506591797, + "learning_rate": 9.999699842259087e-06, + "loss": 0.3059, + "step": 1668 + }, + { + "epoch": 0.033410905087205664, + "grad_norm": 1.0525224208831787, + "learning_rate": 9.999696279623185e-06, + "loss": 0.2876, + "step": 1669 + }, + { + "epoch": 0.03343092360433401, + "grad_norm": 1.044603943824768, + "learning_rate": 9.999692695969635e-06, + "loss": 0.3191, + "step": 1670 + }, + { + "epoch": 0.03345094212146235, + "grad_norm": 1.2515753507614136, + "learning_rate": 9.999689091298454e-06, + "loss": 0.3215, + "step": 1671 + }, + { + "epoch": 0.033470960638590695, + "grad_norm": 1.471955418586731, + "learning_rate": 9.999685465609656e-06, + "loss": 0.3395, + "step": 1672 + }, + { + "epoch": 0.03349097915571904, + "grad_norm": 1.1259628534317017, + "learning_rate": 9.999681818903256e-06, + "loss": 0.4177, + "step": 1673 + }, + { + "epoch": 0.03351099767284738, + "grad_norm": 1.0607832670211792, + "learning_rate": 9.999678151179269e-06, + "loss": 0.3136, + "step": 1674 + }, + { + "epoch": 0.033531016189975726, + "grad_norm": 1.1572691202163696, + "learning_rate": 9.999674462437711e-06, + "loss": 0.318, + "step": 1675 + }, + { + "epoch": 0.03355103470710407, + "grad_norm": 1.107702374458313, + "learning_rate": 9.999670752678598e-06, + "loss": 0.3345, + "step": 1676 + }, + { + "epoch": 0.033571053224232414, + "grad_norm": 1.1796607971191406, + "learning_rate": 9.999667021901944e-06, + "loss": 0.3331, + "step": 1677 + }, + { + "epoch": 0.03359107174136076, + "grad_norm": 1.1327977180480957, + "learning_rate": 9.999663270107767e-06, + "loss": 0.344, + "step": 1678 + }, + { + "epoch": 0.0336110902584891, + "grad_norm": 1.2302765846252441, + "learning_rate": 9.999659497296082e-06, + "loss": 0.3424, + "step": 1679 + }, + { + "epoch": 0.033631108775617445, + "grad_norm": 0.990686297416687, + "learning_rate": 9.999655703466902e-06, + "loss": 0.3363, + "step": 1680 + }, + { + "epoch": 0.03365112729274579, + "grad_norm": 1.1301137208938599, + "learning_rate": 9.999651888620247e-06, + "loss": 0.3374, + "step": 1681 + }, + { + "epoch": 0.03367114580987413, + "grad_norm": 0.9751896858215332, + "learning_rate": 9.999648052756132e-06, + "loss": 0.2898, + "step": 1682 + }, + { + "epoch": 0.033691164327002476, + "grad_norm": 1.2033792734146118, + "learning_rate": 9.999644195874572e-06, + "loss": 0.352, + "step": 1683 + }, + { + "epoch": 0.03371118284413082, + "grad_norm": 1.9230962991714478, + "learning_rate": 9.999640317975583e-06, + "loss": 0.9018, + "step": 1684 + }, + { + "epoch": 0.033731201361259164, + "grad_norm": 1.852220892906189, + "learning_rate": 9.999636419059183e-06, + "loss": 0.8444, + "step": 1685 + }, + { + "epoch": 0.03375121987838751, + "grad_norm": 0.9913758635520935, + "learning_rate": 9.999632499125386e-06, + "loss": 0.3491, + "step": 1686 + }, + { + "epoch": 0.03377123839551585, + "grad_norm": 1.0125178098678589, + "learning_rate": 9.999628558174212e-06, + "loss": 0.3222, + "step": 1687 + }, + { + "epoch": 0.033791256912644195, + "grad_norm": 1.0344505310058594, + "learning_rate": 9.999624596205675e-06, + "loss": 0.3339, + "step": 1688 + }, + { + "epoch": 0.03381127542977254, + "grad_norm": 1.9638826847076416, + "learning_rate": 9.999620613219792e-06, + "loss": 0.8212, + "step": 1689 + }, + { + "epoch": 0.03383129394690088, + "grad_norm": 1.137292742729187, + "learning_rate": 9.999616609216577e-06, + "loss": 0.339, + "step": 1690 + }, + { + "epoch": 0.033851312464029226, + "grad_norm": 1.161903738975525, + "learning_rate": 9.999612584196055e-06, + "loss": 0.334, + "step": 1691 + }, + { + "epoch": 0.03387133098115757, + "grad_norm": 1.1438614130020142, + "learning_rate": 9.999608538158234e-06, + "loss": 0.372, + "step": 1692 + }, + { + "epoch": 0.033891349498285914, + "grad_norm": 1.051919937133789, + "learning_rate": 9.999604471103134e-06, + "loss": 0.326, + "step": 1693 + }, + { + "epoch": 0.03391136801541426, + "grad_norm": 1.1149356365203857, + "learning_rate": 9.999600383030775e-06, + "loss": 0.3068, + "step": 1694 + }, + { + "epoch": 0.0339313865325426, + "grad_norm": 1.0593624114990234, + "learning_rate": 9.99959627394117e-06, + "loss": 0.3665, + "step": 1695 + }, + { + "epoch": 0.033951405049670945, + "grad_norm": 1.0969752073287964, + "learning_rate": 9.999592143834341e-06, + "loss": 0.3065, + "step": 1696 + }, + { + "epoch": 0.03397142356679929, + "grad_norm": 1.2670115232467651, + "learning_rate": 9.9995879927103e-06, + "loss": 0.3813, + "step": 1697 + }, + { + "epoch": 0.03399144208392763, + "grad_norm": 1.820241928100586, + "learning_rate": 9.999583820569068e-06, + "loss": 0.8688, + "step": 1698 + }, + { + "epoch": 0.034011460601055976, + "grad_norm": 1.8901313543319702, + "learning_rate": 9.999579627410661e-06, + "loss": 0.8877, + "step": 1699 + }, + { + "epoch": 0.03403147911818432, + "grad_norm": 1.0088996887207031, + "learning_rate": 9.999575413235098e-06, + "loss": 0.3255, + "step": 1700 + }, + { + "epoch": 0.034051497635312664, + "grad_norm": 1.9684480428695679, + "learning_rate": 9.999571178042396e-06, + "loss": 0.8402, + "step": 1701 + }, + { + "epoch": 0.03407151615244101, + "grad_norm": 1.219041347503662, + "learning_rate": 9.99956692183257e-06, + "loss": 0.3455, + "step": 1702 + }, + { + "epoch": 0.03409153466956935, + "grad_norm": 1.1380664110183716, + "learning_rate": 9.999562644605643e-06, + "loss": 0.3475, + "step": 1703 + }, + { + "epoch": 0.034111553186697695, + "grad_norm": 1.1856576204299927, + "learning_rate": 9.999558346361632e-06, + "loss": 0.331, + "step": 1704 + }, + { + "epoch": 0.03413157170382604, + "grad_norm": 1.157383680343628, + "learning_rate": 9.99955402710055e-06, + "loss": 0.3277, + "step": 1705 + }, + { + "epoch": 0.03415159022095438, + "grad_norm": 1.814713954925537, + "learning_rate": 9.99954968682242e-06, + "loss": 0.8355, + "step": 1706 + }, + { + "epoch": 0.034171608738082726, + "grad_norm": 1.0457628965377808, + "learning_rate": 9.999545325527262e-06, + "loss": 0.3469, + "step": 1707 + }, + { + "epoch": 0.03419162725521107, + "grad_norm": 1.1369332075119019, + "learning_rate": 9.999540943215088e-06, + "loss": 0.3428, + "step": 1708 + }, + { + "epoch": 0.034211645772339414, + "grad_norm": 1.0896940231323242, + "learning_rate": 9.99953653988592e-06, + "loss": 0.3935, + "step": 1709 + }, + { + "epoch": 0.03423166428946776, + "grad_norm": 1.0184578895568848, + "learning_rate": 9.999532115539779e-06, + "loss": 0.2903, + "step": 1710 + }, + { + "epoch": 0.0342516828065961, + "grad_norm": 1.0936189889907837, + "learning_rate": 9.999527670176678e-06, + "loss": 0.3331, + "step": 1711 + }, + { + "epoch": 0.034271701323724445, + "grad_norm": 0.9978766441345215, + "learning_rate": 9.99952320379664e-06, + "loss": 0.3006, + "step": 1712 + }, + { + "epoch": 0.03429171984085279, + "grad_norm": 1.253991723060608, + "learning_rate": 9.999518716399681e-06, + "loss": 0.3535, + "step": 1713 + }, + { + "epoch": 0.03431173835798113, + "grad_norm": 1.2782618999481201, + "learning_rate": 9.999514207985823e-06, + "loss": 0.3276, + "step": 1714 + }, + { + "epoch": 0.034331756875109476, + "grad_norm": 1.2013955116271973, + "learning_rate": 9.999509678555081e-06, + "loss": 0.3488, + "step": 1715 + }, + { + "epoch": 0.03435177539223782, + "grad_norm": 1.0308786630630493, + "learning_rate": 9.999505128107479e-06, + "loss": 0.2995, + "step": 1716 + }, + { + "epoch": 0.03437179390936616, + "grad_norm": 1.913626790046692, + "learning_rate": 9.99950055664303e-06, + "loss": 0.8567, + "step": 1717 + }, + { + "epoch": 0.03439181242649451, + "grad_norm": 1.0639028549194336, + "learning_rate": 9.99949596416176e-06, + "loss": 0.3015, + "step": 1718 + }, + { + "epoch": 0.03441183094362285, + "grad_norm": 1.0483906269073486, + "learning_rate": 9.999491350663683e-06, + "loss": 0.3432, + "step": 1719 + }, + { + "epoch": 0.034431849460751195, + "grad_norm": 1.0902190208435059, + "learning_rate": 9.999486716148821e-06, + "loss": 0.3119, + "step": 1720 + }, + { + "epoch": 0.03445186797787954, + "grad_norm": 1.1039341688156128, + "learning_rate": 9.999482060617192e-06, + "loss": 0.3084, + "step": 1721 + }, + { + "epoch": 0.03447188649500788, + "grad_norm": 1.1052287817001343, + "learning_rate": 9.999477384068816e-06, + "loss": 0.3134, + "step": 1722 + }, + { + "epoch": 0.034491905012136226, + "grad_norm": 1.1480827331542969, + "learning_rate": 9.999472686503715e-06, + "loss": 0.3663, + "step": 1723 + }, + { + "epoch": 0.03451192352926457, + "grad_norm": 1.1027244329452515, + "learning_rate": 9.999467967921906e-06, + "loss": 0.3559, + "step": 1724 + }, + { + "epoch": 0.03453194204639291, + "grad_norm": 1.158332109451294, + "learning_rate": 9.999463228323408e-06, + "loss": 0.3832, + "step": 1725 + }, + { + "epoch": 0.03455196056352126, + "grad_norm": 1.0612889528274536, + "learning_rate": 9.999458467708242e-06, + "loss": 0.3156, + "step": 1726 + }, + { + "epoch": 0.0345719790806496, + "grad_norm": 1.1581839323043823, + "learning_rate": 9.99945368607643e-06, + "loss": 0.3076, + "step": 1727 + }, + { + "epoch": 0.034591997597777945, + "grad_norm": 1.0034266710281372, + "learning_rate": 9.99944888342799e-06, + "loss": 0.3031, + "step": 1728 + }, + { + "epoch": 0.03461201611490629, + "grad_norm": 1.0517688989639282, + "learning_rate": 9.999444059762945e-06, + "loss": 0.3604, + "step": 1729 + }, + { + "epoch": 0.03463203463203463, + "grad_norm": 1.0749903917312622, + "learning_rate": 9.999439215081311e-06, + "loss": 0.384, + "step": 1730 + }, + { + "epoch": 0.034652053149162976, + "grad_norm": 1.0981889963150024, + "learning_rate": 9.99943434938311e-06, + "loss": 0.308, + "step": 1731 + }, + { + "epoch": 0.03467207166629132, + "grad_norm": 2.146247148513794, + "learning_rate": 9.999429462668365e-06, + "loss": 0.8489, + "step": 1732 + }, + { + "epoch": 0.03469209018341966, + "grad_norm": 1.0057995319366455, + "learning_rate": 9.999424554937093e-06, + "loss": 0.2947, + "step": 1733 + }, + { + "epoch": 0.03471210870054801, + "grad_norm": 1.9282325506210327, + "learning_rate": 9.999419626189316e-06, + "loss": 0.3343, + "step": 1734 + }, + { + "epoch": 0.03473212721767635, + "grad_norm": 0.9841139316558838, + "learning_rate": 9.999414676425056e-06, + "loss": 0.3157, + "step": 1735 + }, + { + "epoch": 0.034752145734804694, + "grad_norm": 0.9698187708854675, + "learning_rate": 9.999409705644331e-06, + "loss": 0.3001, + "step": 1736 + }, + { + "epoch": 0.03477216425193304, + "grad_norm": 2.0692574977874756, + "learning_rate": 9.999404713847164e-06, + "loss": 0.7961, + "step": 1737 + }, + { + "epoch": 0.03479218276906138, + "grad_norm": 1.109845519065857, + "learning_rate": 9.999399701033575e-06, + "loss": 0.3256, + "step": 1738 + }, + { + "epoch": 0.034812201286189726, + "grad_norm": 2.0007567405700684, + "learning_rate": 9.999394667203585e-06, + "loss": 0.9232, + "step": 1739 + }, + { + "epoch": 0.03483221980331807, + "grad_norm": 1.1904983520507812, + "learning_rate": 9.999389612357217e-06, + "loss": 0.2955, + "step": 1740 + }, + { + "epoch": 0.03485223832044641, + "grad_norm": 1.101305365562439, + "learning_rate": 9.999384536494491e-06, + "loss": 0.306, + "step": 1741 + }, + { + "epoch": 0.03487225683757476, + "grad_norm": 1.1924898624420166, + "learning_rate": 9.999379439615428e-06, + "loss": 0.3176, + "step": 1742 + }, + { + "epoch": 0.0348922753547031, + "grad_norm": 1.0586838722229004, + "learning_rate": 9.99937432172005e-06, + "loss": 0.3268, + "step": 1743 + }, + { + "epoch": 0.034912293871831444, + "grad_norm": 1.0847808122634888, + "learning_rate": 9.999369182808378e-06, + "loss": 0.3357, + "step": 1744 + }, + { + "epoch": 0.03493231238895979, + "grad_norm": 1.194129467010498, + "learning_rate": 9.999364022880433e-06, + "loss": 0.3538, + "step": 1745 + }, + { + "epoch": 0.03495233090608813, + "grad_norm": 1.2468175888061523, + "learning_rate": 9.99935884193624e-06, + "loss": 0.342, + "step": 1746 + }, + { + "epoch": 0.034972349423216476, + "grad_norm": 1.0275647640228271, + "learning_rate": 9.999353639975815e-06, + "loss": 0.3226, + "step": 1747 + }, + { + "epoch": 0.03499236794034482, + "grad_norm": 1.0417237281799316, + "learning_rate": 9.999348416999186e-06, + "loss": 0.3279, + "step": 1748 + }, + { + "epoch": 0.03501238645747316, + "grad_norm": 1.089349627494812, + "learning_rate": 9.999343173006371e-06, + "loss": 0.2931, + "step": 1749 + }, + { + "epoch": 0.03503240497460151, + "grad_norm": 1.1844408512115479, + "learning_rate": 9.999337907997394e-06, + "loss": 0.3278, + "step": 1750 + }, + { + "epoch": 0.03505242349172985, + "grad_norm": 1.0126473903656006, + "learning_rate": 9.999332621972273e-06, + "loss": 0.3526, + "step": 1751 + }, + { + "epoch": 0.035072442008858194, + "grad_norm": 1.939924716949463, + "learning_rate": 9.999327314931037e-06, + "loss": 0.7837, + "step": 1752 + }, + { + "epoch": 0.03509246052598654, + "grad_norm": 1.0865716934204102, + "learning_rate": 9.999321986873703e-06, + "loss": 0.3491, + "step": 1753 + }, + { + "epoch": 0.03511247904311488, + "grad_norm": 1.8939428329467773, + "learning_rate": 9.999316637800296e-06, + "loss": 0.8832, + "step": 1754 + }, + { + "epoch": 0.035132497560243225, + "grad_norm": 1.04414963722229, + "learning_rate": 9.999311267710839e-06, + "loss": 0.3487, + "step": 1755 + }, + { + "epoch": 0.03515251607737157, + "grad_norm": 1.0882223844528198, + "learning_rate": 9.99930587660535e-06, + "loss": 0.3784, + "step": 1756 + }, + { + "epoch": 0.03517253459449991, + "grad_norm": 1.1683406829833984, + "learning_rate": 9.999300464483858e-06, + "loss": 0.2915, + "step": 1757 + }, + { + "epoch": 0.03519255311162826, + "grad_norm": 1.1711411476135254, + "learning_rate": 9.999295031346381e-06, + "loss": 0.3726, + "step": 1758 + }, + { + "epoch": 0.0352125716287566, + "grad_norm": 2.0617518424987793, + "learning_rate": 9.999289577192944e-06, + "loss": 0.8702, + "step": 1759 + }, + { + "epoch": 0.035232590145884944, + "grad_norm": 1.0517627000808716, + "learning_rate": 9.999284102023569e-06, + "loss": 0.2813, + "step": 1760 + }, + { + "epoch": 0.03525260866301329, + "grad_norm": 1.1426165103912354, + "learning_rate": 9.99927860583828e-06, + "loss": 0.4153, + "step": 1761 + }, + { + "epoch": 0.03527262718014163, + "grad_norm": 1.1517713069915771, + "learning_rate": 9.9992730886371e-06, + "loss": 0.3133, + "step": 1762 + }, + { + "epoch": 0.035292645697269975, + "grad_norm": 1.1182194948196411, + "learning_rate": 9.999267550420053e-06, + "loss": 0.3053, + "step": 1763 + }, + { + "epoch": 0.03531266421439832, + "grad_norm": 1.0935876369476318, + "learning_rate": 9.99926199118716e-06, + "loss": 0.3393, + "step": 1764 + }, + { + "epoch": 0.03533268273152666, + "grad_norm": 1.0749467611312866, + "learning_rate": 9.999256410938445e-06, + "loss": 0.3609, + "step": 1765 + }, + { + "epoch": 0.03535270124865501, + "grad_norm": 1.0155376195907593, + "learning_rate": 9.99925080967393e-06, + "loss": 0.3503, + "step": 1766 + }, + { + "epoch": 0.03537271976578335, + "grad_norm": 1.1216758489608765, + "learning_rate": 9.999245187393643e-06, + "loss": 0.3422, + "step": 1767 + }, + { + "epoch": 0.035392738282911694, + "grad_norm": 1.0514335632324219, + "learning_rate": 9.999239544097605e-06, + "loss": 0.3323, + "step": 1768 + }, + { + "epoch": 0.03541275680004004, + "grad_norm": 0.9349673986434937, + "learning_rate": 9.999233879785839e-06, + "loss": 0.2766, + "step": 1769 + }, + { + "epoch": 0.03543277531716838, + "grad_norm": 1.0821118354797363, + "learning_rate": 9.99922819445837e-06, + "loss": 0.3234, + "step": 1770 + }, + { + "epoch": 0.035452793834296725, + "grad_norm": 1.0882554054260254, + "learning_rate": 9.99922248811522e-06, + "loss": 0.3454, + "step": 1771 + }, + { + "epoch": 0.03547281235142507, + "grad_norm": 1.2258822917938232, + "learning_rate": 9.999216760756417e-06, + "loss": 0.3409, + "step": 1772 + }, + { + "epoch": 0.03549283086855341, + "grad_norm": 1.151849389076233, + "learning_rate": 9.999211012381983e-06, + "loss": 0.3406, + "step": 1773 + }, + { + "epoch": 0.035512849385681756, + "grad_norm": 1.0181057453155518, + "learning_rate": 9.99920524299194e-06, + "loss": 0.3112, + "step": 1774 + }, + { + "epoch": 0.0355328679028101, + "grad_norm": 1.104857087135315, + "learning_rate": 9.999199452586314e-06, + "loss": 0.3378, + "step": 1775 + }, + { + "epoch": 0.035552886419938444, + "grad_norm": 1.093328595161438, + "learning_rate": 9.99919364116513e-06, + "loss": 0.3016, + "step": 1776 + }, + { + "epoch": 0.03557290493706679, + "grad_norm": 1.8650366067886353, + "learning_rate": 9.99918780872841e-06, + "loss": 0.8211, + "step": 1777 + }, + { + "epoch": 0.03559292345419513, + "grad_norm": 1.043068289756775, + "learning_rate": 9.999181955276182e-06, + "loss": 0.323, + "step": 1778 + }, + { + "epoch": 0.035612941971323475, + "grad_norm": 1.1160471439361572, + "learning_rate": 9.999176080808468e-06, + "loss": 0.2994, + "step": 1779 + }, + { + "epoch": 0.03563296048845182, + "grad_norm": 1.1272475719451904, + "learning_rate": 9.999170185325295e-06, + "loss": 0.326, + "step": 1780 + }, + { + "epoch": 0.03565297900558016, + "grad_norm": 1.0118786096572876, + "learning_rate": 9.999164268826684e-06, + "loss": 0.3234, + "step": 1781 + }, + { + "epoch": 0.035672997522708506, + "grad_norm": 1.0193904638290405, + "learning_rate": 9.999158331312666e-06, + "loss": 0.3171, + "step": 1782 + }, + { + "epoch": 0.03569301603983685, + "grad_norm": 1.8612452745437622, + "learning_rate": 9.999152372783259e-06, + "loss": 0.9225, + "step": 1783 + }, + { + "epoch": 0.035713034556965194, + "grad_norm": 1.066795825958252, + "learning_rate": 9.999146393238493e-06, + "loss": 0.3294, + "step": 1784 + }, + { + "epoch": 0.03573305307409354, + "grad_norm": 1.1243849992752075, + "learning_rate": 9.999140392678391e-06, + "loss": 0.3369, + "step": 1785 + }, + { + "epoch": 0.03575307159122188, + "grad_norm": 1.114353060722351, + "learning_rate": 9.999134371102976e-06, + "loss": 0.3704, + "step": 1786 + }, + { + "epoch": 0.035773090108350225, + "grad_norm": 1.0968401432037354, + "learning_rate": 9.999128328512278e-06, + "loss": 0.3522, + "step": 1787 + }, + { + "epoch": 0.03579310862547857, + "grad_norm": 1.255424976348877, + "learning_rate": 9.999122264906321e-06, + "loss": 0.3165, + "step": 1788 + }, + { + "epoch": 0.03581312714260691, + "grad_norm": 1.0358394384384155, + "learning_rate": 9.99911618028513e-06, + "loss": 0.3039, + "step": 1789 + }, + { + "epoch": 0.035833145659735256, + "grad_norm": 2.025722026824951, + "learning_rate": 9.999110074648729e-06, + "loss": 0.8803, + "step": 1790 + }, + { + "epoch": 0.0358531641768636, + "grad_norm": 1.0888633728027344, + "learning_rate": 9.999103947997146e-06, + "loss": 0.3243, + "step": 1791 + }, + { + "epoch": 0.035873182693991944, + "grad_norm": 1.184116244316101, + "learning_rate": 9.999097800330406e-06, + "loss": 0.3354, + "step": 1792 + }, + { + "epoch": 0.03589320121112029, + "grad_norm": 1.3048081398010254, + "learning_rate": 9.999091631648535e-06, + "loss": 0.3259, + "step": 1793 + }, + { + "epoch": 0.03591321972824863, + "grad_norm": 1.135545015335083, + "learning_rate": 9.999085441951557e-06, + "loss": 0.3713, + "step": 1794 + }, + { + "epoch": 0.035933238245376975, + "grad_norm": 1.093761920928955, + "learning_rate": 9.9990792312395e-06, + "loss": 0.3699, + "step": 1795 + }, + { + "epoch": 0.03595325676250532, + "grad_norm": 1.0878596305847168, + "learning_rate": 9.99907299951239e-06, + "loss": 0.3034, + "step": 1796 + }, + { + "epoch": 0.03597327527963366, + "grad_norm": 1.1930042505264282, + "learning_rate": 9.999066746770254e-06, + "loss": 0.3624, + "step": 1797 + }, + { + "epoch": 0.035993293796762006, + "grad_norm": 1.152584195137024, + "learning_rate": 9.999060473013116e-06, + "loss": 0.3436, + "step": 1798 + }, + { + "epoch": 0.03601331231389035, + "grad_norm": 1.1099684238433838, + "learning_rate": 9.999054178241004e-06, + "loss": 0.3214, + "step": 1799 + }, + { + "epoch": 0.036033330831018694, + "grad_norm": 1.2117854356765747, + "learning_rate": 9.999047862453943e-06, + "loss": 0.3274, + "step": 1800 + }, + { + "epoch": 0.03605334934814704, + "grad_norm": 1.886657476425171, + "learning_rate": 9.999041525651963e-06, + "loss": 0.905, + "step": 1801 + }, + { + "epoch": 0.03607336786527538, + "grad_norm": 1.0848982334136963, + "learning_rate": 9.999035167835087e-06, + "loss": 0.3302, + "step": 1802 + }, + { + "epoch": 0.036093386382403725, + "grad_norm": 1.0934535264968872, + "learning_rate": 9.999028789003342e-06, + "loss": 0.3809, + "step": 1803 + }, + { + "epoch": 0.03611340489953207, + "grad_norm": 1.7973464727401733, + "learning_rate": 9.999022389156755e-06, + "loss": 0.8401, + "step": 1804 + }, + { + "epoch": 0.03613342341666041, + "grad_norm": 1.8020024299621582, + "learning_rate": 9.999015968295356e-06, + "loss": 0.8654, + "step": 1805 + }, + { + "epoch": 0.036153441933788756, + "grad_norm": 1.8059031963348389, + "learning_rate": 9.999009526419169e-06, + "loss": 0.9045, + "step": 1806 + }, + { + "epoch": 0.0361734604509171, + "grad_norm": 1.0707128047943115, + "learning_rate": 9.999003063528222e-06, + "loss": 0.3383, + "step": 1807 + }, + { + "epoch": 0.036193478968045444, + "grad_norm": 1.1659594774246216, + "learning_rate": 9.998996579622541e-06, + "loss": 0.3204, + "step": 1808 + }, + { + "epoch": 0.03621349748517379, + "grad_norm": 1.8678781986236572, + "learning_rate": 9.998990074702154e-06, + "loss": 0.8468, + "step": 1809 + }, + { + "epoch": 0.03623351600230213, + "grad_norm": 1.2087656259536743, + "learning_rate": 9.998983548767089e-06, + "loss": 0.3202, + "step": 1810 + }, + { + "epoch": 0.036253534519430475, + "grad_norm": 1.0285853147506714, + "learning_rate": 9.998977001817374e-06, + "loss": 0.3312, + "step": 1811 + }, + { + "epoch": 0.03627355303655882, + "grad_norm": 1.1947088241577148, + "learning_rate": 9.998970433853035e-06, + "loss": 0.3092, + "step": 1812 + }, + { + "epoch": 0.03629357155368716, + "grad_norm": 1.084144115447998, + "learning_rate": 9.998963844874098e-06, + "loss": 0.3593, + "step": 1813 + }, + { + "epoch": 0.036313590070815506, + "grad_norm": 1.1086739301681519, + "learning_rate": 9.998957234880596e-06, + "loss": 0.3238, + "step": 1814 + }, + { + "epoch": 0.03633360858794385, + "grad_norm": 1.035625696182251, + "learning_rate": 9.998950603872553e-06, + "loss": 0.3425, + "step": 1815 + }, + { + "epoch": 0.036353627105072193, + "grad_norm": 1.0904909372329712, + "learning_rate": 9.998943951849996e-06, + "loss": 0.3492, + "step": 1816 + }, + { + "epoch": 0.03637364562220054, + "grad_norm": 1.0992521047592163, + "learning_rate": 9.998937278812954e-06, + "loss": 0.3696, + "step": 1817 + }, + { + "epoch": 0.03639366413932888, + "grad_norm": 1.1618120670318604, + "learning_rate": 9.998930584761457e-06, + "loss": 0.3508, + "step": 1818 + }, + { + "epoch": 0.036413682656457225, + "grad_norm": 1.1151210069656372, + "learning_rate": 9.998923869695532e-06, + "loss": 0.3, + "step": 1819 + }, + { + "epoch": 0.03643370117358557, + "grad_norm": 1.1549023389816284, + "learning_rate": 9.998917133615205e-06, + "loss": 0.3353, + "step": 1820 + }, + { + "epoch": 0.03645371969071391, + "grad_norm": 1.1116284132003784, + "learning_rate": 9.998910376520508e-06, + "loss": 0.3238, + "step": 1821 + }, + { + "epoch": 0.036473738207842256, + "grad_norm": 1.1011135578155518, + "learning_rate": 9.998903598411464e-06, + "loss": 0.3052, + "step": 1822 + }, + { + "epoch": 0.0364937567249706, + "grad_norm": 1.132655382156372, + "learning_rate": 9.99889679928811e-06, + "loss": 0.3646, + "step": 1823 + }, + { + "epoch": 0.03651377524209894, + "grad_norm": 1.156638503074646, + "learning_rate": 9.998889979150466e-06, + "loss": 0.3111, + "step": 1824 + }, + { + "epoch": 0.03653379375922729, + "grad_norm": 1.0787848234176636, + "learning_rate": 9.998883137998566e-06, + "loss": 0.3374, + "step": 1825 + }, + { + "epoch": 0.03655381227635563, + "grad_norm": 1.147817850112915, + "learning_rate": 9.998876275832437e-06, + "loss": 0.2821, + "step": 1826 + }, + { + "epoch": 0.036573830793483975, + "grad_norm": 1.2288016080856323, + "learning_rate": 9.998869392652106e-06, + "loss": 0.2853, + "step": 1827 + }, + { + "epoch": 0.03659384931061232, + "grad_norm": 1.0648117065429688, + "learning_rate": 9.998862488457607e-06, + "loss": 0.3005, + "step": 1828 + }, + { + "epoch": 0.03661386782774066, + "grad_norm": 1.1006801128387451, + "learning_rate": 9.998855563248963e-06, + "loss": 0.3774, + "step": 1829 + }, + { + "epoch": 0.036633886344869006, + "grad_norm": 1.1931520700454712, + "learning_rate": 9.998848617026207e-06, + "loss": 0.3315, + "step": 1830 + }, + { + "epoch": 0.03665390486199735, + "grad_norm": 1.0571529865264893, + "learning_rate": 9.998841649789367e-06, + "loss": 0.3442, + "step": 1831 + }, + { + "epoch": 0.03667392337912569, + "grad_norm": 1.1277869939804077, + "learning_rate": 9.998834661538474e-06, + "loss": 0.3162, + "step": 1832 + }, + { + "epoch": 0.03669394189625404, + "grad_norm": 1.1222736835479736, + "learning_rate": 9.998827652273554e-06, + "loss": 0.2853, + "step": 1833 + }, + { + "epoch": 0.03671396041338238, + "grad_norm": 1.1016128063201904, + "learning_rate": 9.99882062199464e-06, + "loss": 0.3287, + "step": 1834 + }, + { + "epoch": 0.036733978930510724, + "grad_norm": 1.0431855916976929, + "learning_rate": 9.998813570701757e-06, + "loss": 0.3177, + "step": 1835 + }, + { + "epoch": 0.03675399744763907, + "grad_norm": 1.191482424736023, + "learning_rate": 9.998806498394941e-06, + "loss": 0.3499, + "step": 1836 + }, + { + "epoch": 0.03677401596476741, + "grad_norm": 1.0836870670318604, + "learning_rate": 9.998799405074215e-06, + "loss": 0.2826, + "step": 1837 + }, + { + "epoch": 0.036794034481895756, + "grad_norm": 1.041977047920227, + "learning_rate": 9.998792290739614e-06, + "loss": 0.3276, + "step": 1838 + }, + { + "epoch": 0.0368140529990241, + "grad_norm": 1.0546263456344604, + "learning_rate": 9.998785155391166e-06, + "loss": 0.3484, + "step": 1839 + }, + { + "epoch": 0.03683407151615244, + "grad_norm": 1.0964781045913696, + "learning_rate": 9.998777999028902e-06, + "loss": 0.3454, + "step": 1840 + }, + { + "epoch": 0.03685409003328079, + "grad_norm": 1.9023783206939697, + "learning_rate": 9.998770821652848e-06, + "loss": 0.8499, + "step": 1841 + }, + { + "epoch": 0.03687410855040913, + "grad_norm": 1.029005527496338, + "learning_rate": 9.99876362326304e-06, + "loss": 0.325, + "step": 1842 + }, + { + "epoch": 0.036894127067537474, + "grad_norm": 1.063676118850708, + "learning_rate": 9.998756403859505e-06, + "loss": 0.3394, + "step": 1843 + }, + { + "epoch": 0.03691414558466582, + "grad_norm": 1.2545037269592285, + "learning_rate": 9.998749163442272e-06, + "loss": 0.3473, + "step": 1844 + }, + { + "epoch": 0.03693416410179416, + "grad_norm": 1.2369219064712524, + "learning_rate": 9.998741902011374e-06, + "loss": 0.3453, + "step": 1845 + }, + { + "epoch": 0.036954182618922506, + "grad_norm": 1.1881126165390015, + "learning_rate": 9.998734619566841e-06, + "loss": 0.3351, + "step": 1846 + }, + { + "epoch": 0.03697420113605085, + "grad_norm": 1.1115642786026, + "learning_rate": 9.998727316108703e-06, + "loss": 0.3385, + "step": 1847 + }, + { + "epoch": 0.03699421965317919, + "grad_norm": 0.9965870380401611, + "learning_rate": 9.99871999163699e-06, + "loss": 0.3405, + "step": 1848 + }, + { + "epoch": 0.03701423817030754, + "grad_norm": 1.1304209232330322, + "learning_rate": 9.998712646151737e-06, + "loss": 0.3511, + "step": 1849 + }, + { + "epoch": 0.03703425668743588, + "grad_norm": 1.1511421203613281, + "learning_rate": 9.99870527965297e-06, + "loss": 0.3473, + "step": 1850 + }, + { + "epoch": 0.037054275204564224, + "grad_norm": 1.08097243309021, + "learning_rate": 9.998697892140723e-06, + "loss": 0.3388, + "step": 1851 + }, + { + "epoch": 0.03707429372169257, + "grad_norm": 1.0525221824645996, + "learning_rate": 9.998690483615026e-06, + "loss": 0.3393, + "step": 1852 + }, + { + "epoch": 0.03709431223882091, + "grad_norm": 1.1627823114395142, + "learning_rate": 9.998683054075908e-06, + "loss": 0.3389, + "step": 1853 + }, + { + "epoch": 0.037114330755949256, + "grad_norm": 1.1707899570465088, + "learning_rate": 9.998675603523402e-06, + "loss": 0.3411, + "step": 1854 + }, + { + "epoch": 0.0371343492730776, + "grad_norm": 1.083594560623169, + "learning_rate": 9.99866813195754e-06, + "loss": 0.3365, + "step": 1855 + }, + { + "epoch": 0.03715436779020594, + "grad_norm": 1.199436068534851, + "learning_rate": 9.998660639378355e-06, + "loss": 0.3573, + "step": 1856 + }, + { + "epoch": 0.03717438630733429, + "grad_norm": 1.2971649169921875, + "learning_rate": 9.998653125785874e-06, + "loss": 0.3425, + "step": 1857 + }, + { + "epoch": 0.03719440482446263, + "grad_norm": 1.1510752439498901, + "learning_rate": 9.998645591180131e-06, + "loss": 0.3152, + "step": 1858 + }, + { + "epoch": 0.037214423341590974, + "grad_norm": 1.0847671031951904, + "learning_rate": 9.998638035561159e-06, + "loss": 0.3115, + "step": 1859 + }, + { + "epoch": 0.03723444185871932, + "grad_norm": 1.026432991027832, + "learning_rate": 9.998630458928988e-06, + "loss": 0.3121, + "step": 1860 + }, + { + "epoch": 0.03725446037584766, + "grad_norm": 1.0535895824432373, + "learning_rate": 9.99862286128365e-06, + "loss": 0.3279, + "step": 1861 + }, + { + "epoch": 0.037274478892976005, + "grad_norm": 1.0695078372955322, + "learning_rate": 9.998615242625177e-06, + "loss": 0.3902, + "step": 1862 + }, + { + "epoch": 0.03729449741010435, + "grad_norm": 1.8856699466705322, + "learning_rate": 9.998607602953602e-06, + "loss": 0.8711, + "step": 1863 + }, + { + "epoch": 0.03731451592723269, + "grad_norm": 1.0073590278625488, + "learning_rate": 9.998599942268956e-06, + "loss": 0.3109, + "step": 1864 + }, + { + "epoch": 0.03733453444436104, + "grad_norm": 1.0141781568527222, + "learning_rate": 9.998592260571271e-06, + "loss": 0.3136, + "step": 1865 + }, + { + "epoch": 0.03735455296148938, + "grad_norm": 1.0737700462341309, + "learning_rate": 9.99858455786058e-06, + "loss": 0.3751, + "step": 1866 + }, + { + "epoch": 0.037374571478617724, + "grad_norm": 1.0659059286117554, + "learning_rate": 9.998576834136915e-06, + "loss": 0.297, + "step": 1867 + }, + { + "epoch": 0.03739458999574607, + "grad_norm": 1.1861141920089722, + "learning_rate": 9.998569089400309e-06, + "loss": 0.344, + "step": 1868 + }, + { + "epoch": 0.03741460851287441, + "grad_norm": 1.043556809425354, + "learning_rate": 9.998561323650796e-06, + "loss": 0.3146, + "step": 1869 + }, + { + "epoch": 0.037434627030002755, + "grad_norm": 1.0351130962371826, + "learning_rate": 9.998553536888404e-06, + "loss": 0.3215, + "step": 1870 + }, + { + "epoch": 0.0374546455471311, + "grad_norm": 1.049180507659912, + "learning_rate": 9.99854572911317e-06, + "loss": 0.2928, + "step": 1871 + }, + { + "epoch": 0.03747466406425944, + "grad_norm": 1.225328803062439, + "learning_rate": 9.998537900325127e-06, + "loss": 0.3357, + "step": 1872 + }, + { + "epoch": 0.03749468258138779, + "grad_norm": 1.1050776243209839, + "learning_rate": 9.998530050524304e-06, + "loss": 0.2839, + "step": 1873 + }, + { + "epoch": 0.03751470109851613, + "grad_norm": 1.0506142377853394, + "learning_rate": 9.998522179710736e-06, + "loss": 0.3381, + "step": 1874 + }, + { + "epoch": 0.037534719615644474, + "grad_norm": 1.1164172887802124, + "learning_rate": 9.998514287884458e-06, + "loss": 0.2634, + "step": 1875 + }, + { + "epoch": 0.03755473813277282, + "grad_norm": 1.1491533517837524, + "learning_rate": 9.998506375045502e-06, + "loss": 0.3262, + "step": 1876 + }, + { + "epoch": 0.03757475664990116, + "grad_norm": 1.1441524028778076, + "learning_rate": 9.998498441193901e-06, + "loss": 0.3189, + "step": 1877 + }, + { + "epoch": 0.037594775167029505, + "grad_norm": 1.0893343687057495, + "learning_rate": 9.998490486329686e-06, + "loss": 0.3055, + "step": 1878 + }, + { + "epoch": 0.03761479368415785, + "grad_norm": 1.0596356391906738, + "learning_rate": 9.998482510452894e-06, + "loss": 0.3605, + "step": 1879 + }, + { + "epoch": 0.03763481220128619, + "grad_norm": 1.9039376974105835, + "learning_rate": 9.998474513563557e-06, + "loss": 0.8327, + "step": 1880 + }, + { + "epoch": 0.037654830718414536, + "grad_norm": 1.1645699739456177, + "learning_rate": 9.99846649566171e-06, + "loss": 0.3382, + "step": 1881 + }, + { + "epoch": 0.03767484923554288, + "grad_norm": 1.1918621063232422, + "learning_rate": 9.998458456747385e-06, + "loss": 0.3389, + "step": 1882 + }, + { + "epoch": 0.037694867752671224, + "grad_norm": 1.095022201538086, + "learning_rate": 9.998450396820615e-06, + "loss": 0.3626, + "step": 1883 + }, + { + "epoch": 0.03771488626979957, + "grad_norm": 1.0896610021591187, + "learning_rate": 9.998442315881436e-06, + "loss": 0.3539, + "step": 1884 + }, + { + "epoch": 0.03773490478692791, + "grad_norm": 1.1084388494491577, + "learning_rate": 9.998434213929882e-06, + "loss": 0.3333, + "step": 1885 + }, + { + "epoch": 0.037754923304056255, + "grad_norm": 1.1192190647125244, + "learning_rate": 9.998426090965985e-06, + "loss": 0.3325, + "step": 1886 + }, + { + "epoch": 0.0377749418211846, + "grad_norm": 1.1705330610275269, + "learning_rate": 9.99841794698978e-06, + "loss": 0.342, + "step": 1887 + }, + { + "epoch": 0.03779496033831294, + "grad_norm": 1.978147268295288, + "learning_rate": 9.998409782001304e-06, + "loss": 0.8292, + "step": 1888 + }, + { + "epoch": 0.037814978855441286, + "grad_norm": 1.0344191789627075, + "learning_rate": 9.998401596000586e-06, + "loss": 0.3381, + "step": 1889 + }, + { + "epoch": 0.03783499737256963, + "grad_norm": 1.1258119344711304, + "learning_rate": 9.998393388987667e-06, + "loss": 0.353, + "step": 1890 + }, + { + "epoch": 0.037855015889697974, + "grad_norm": 1.1699010133743286, + "learning_rate": 9.998385160962574e-06, + "loss": 0.3045, + "step": 1891 + }, + { + "epoch": 0.03787503440682632, + "grad_norm": 1.1214052438735962, + "learning_rate": 9.998376911925348e-06, + "loss": 0.3648, + "step": 1892 + }, + { + "epoch": 0.03789505292395466, + "grad_norm": 1.0550041198730469, + "learning_rate": 9.99836864187602e-06, + "loss": 0.2788, + "step": 1893 + }, + { + "epoch": 0.037915071441083005, + "grad_norm": 0.9972654581069946, + "learning_rate": 9.998360350814626e-06, + "loss": 0.3423, + "step": 1894 + }, + { + "epoch": 0.03793508995821135, + "grad_norm": 1.0279712677001953, + "learning_rate": 9.998352038741201e-06, + "loss": 0.3759, + "step": 1895 + }, + { + "epoch": 0.03795510847533969, + "grad_norm": 1.0634080171585083, + "learning_rate": 9.998343705655779e-06, + "loss": 0.3623, + "step": 1896 + }, + { + "epoch": 0.037975126992468036, + "grad_norm": 1.2658634185791016, + "learning_rate": 9.998335351558397e-06, + "loss": 0.3287, + "step": 1897 + }, + { + "epoch": 0.03799514550959638, + "grad_norm": 1.2203177213668823, + "learning_rate": 9.998326976449087e-06, + "loss": 0.3286, + "step": 1898 + }, + { + "epoch": 0.038015164026724724, + "grad_norm": 1.0394972562789917, + "learning_rate": 9.998318580327888e-06, + "loss": 0.3746, + "step": 1899 + }, + { + "epoch": 0.03803518254385307, + "grad_norm": 1.023566722869873, + "learning_rate": 9.998310163194834e-06, + "loss": 0.3367, + "step": 1900 + }, + { + "epoch": 0.03805520106098141, + "grad_norm": 1.010127305984497, + "learning_rate": 9.998301725049957e-06, + "loss": 0.3626, + "step": 1901 + }, + { + "epoch": 0.038075219578109755, + "grad_norm": 1.1416878700256348, + "learning_rate": 9.998293265893296e-06, + "loss": 0.3052, + "step": 1902 + }, + { + "epoch": 0.0380952380952381, + "grad_norm": 1.0605201721191406, + "learning_rate": 9.998284785724887e-06, + "loss": 0.3023, + "step": 1903 + }, + { + "epoch": 0.03811525661236644, + "grad_norm": 1.073678731918335, + "learning_rate": 9.998276284544764e-06, + "loss": 0.3436, + "step": 1904 + }, + { + "epoch": 0.03813527512949478, + "grad_norm": 1.0575833320617676, + "learning_rate": 9.998267762352963e-06, + "loss": 0.3568, + "step": 1905 + }, + { + "epoch": 0.03815529364662312, + "grad_norm": 1.9761908054351807, + "learning_rate": 9.998259219149519e-06, + "loss": 0.8893, + "step": 1906 + }, + { + "epoch": 0.03817531216375147, + "grad_norm": 1.0156794786453247, + "learning_rate": 9.99825065493447e-06, + "loss": 0.3148, + "step": 1907 + }, + { + "epoch": 0.03819533068087981, + "grad_norm": 1.202756404876709, + "learning_rate": 9.99824206970785e-06, + "loss": 0.3508, + "step": 1908 + }, + { + "epoch": 0.038215349198008154, + "grad_norm": 1.1239569187164307, + "learning_rate": 9.998233463469695e-06, + "loss": 0.306, + "step": 1909 + }, + { + "epoch": 0.0382353677151365, + "grad_norm": 1.0549237728118896, + "learning_rate": 9.998224836220046e-06, + "loss": 0.3222, + "step": 1910 + }, + { + "epoch": 0.03825538623226484, + "grad_norm": 1.824440360069275, + "learning_rate": 9.998216187958931e-06, + "loss": 0.8396, + "step": 1911 + }, + { + "epoch": 0.038275404749393185, + "grad_norm": 1.0607959032058716, + "learning_rate": 9.998207518686393e-06, + "loss": 0.3394, + "step": 1912 + }, + { + "epoch": 0.03829542326652153, + "grad_norm": 1.0571677684783936, + "learning_rate": 9.998198828402468e-06, + "loss": 0.3285, + "step": 1913 + }, + { + "epoch": 0.03831544178364987, + "grad_norm": 1.0742751359939575, + "learning_rate": 9.998190117107187e-06, + "loss": 0.3192, + "step": 1914 + }, + { + "epoch": 0.03833546030077822, + "grad_norm": 1.1620067358016968, + "learning_rate": 9.998181384800593e-06, + "loss": 0.3637, + "step": 1915 + }, + { + "epoch": 0.03835547881790656, + "grad_norm": 1.1152145862579346, + "learning_rate": 9.998172631482719e-06, + "loss": 0.3791, + "step": 1916 + }, + { + "epoch": 0.038375497335034904, + "grad_norm": 1.2616711854934692, + "learning_rate": 9.998163857153603e-06, + "loss": 0.2938, + "step": 1917 + }, + { + "epoch": 0.03839551585216325, + "grad_norm": 1.1039583683013916, + "learning_rate": 9.998155061813282e-06, + "loss": 0.3033, + "step": 1918 + }, + { + "epoch": 0.03841553436929159, + "grad_norm": 1.0358655452728271, + "learning_rate": 9.998146245461792e-06, + "loss": 0.3262, + "step": 1919 + }, + { + "epoch": 0.038435552886419935, + "grad_norm": 1.997412919998169, + "learning_rate": 9.99813740809917e-06, + "loss": 0.8652, + "step": 1920 + }, + { + "epoch": 0.03845557140354828, + "grad_norm": 1.1423338651657104, + "learning_rate": 9.998128549725457e-06, + "loss": 0.3664, + "step": 1921 + }, + { + "epoch": 0.03847558992067662, + "grad_norm": 1.0646077394485474, + "learning_rate": 9.998119670340686e-06, + "loss": 0.3134, + "step": 1922 + }, + { + "epoch": 0.038495608437804966, + "grad_norm": 1.0413974523544312, + "learning_rate": 9.998110769944896e-06, + "loss": 0.3044, + "step": 1923 + }, + { + "epoch": 0.03851562695493331, + "grad_norm": 1.124876618385315, + "learning_rate": 9.998101848538123e-06, + "loss": 0.3799, + "step": 1924 + }, + { + "epoch": 0.038535645472061654, + "grad_norm": 1.141158938407898, + "learning_rate": 9.998092906120405e-06, + "loss": 0.3403, + "step": 1925 + }, + { + "epoch": 0.03855566398919, + "grad_norm": 1.1341991424560547, + "learning_rate": 9.998083942691778e-06, + "loss": 0.3363, + "step": 1926 + }, + { + "epoch": 0.03857568250631834, + "grad_norm": 1.1142312288284302, + "learning_rate": 9.998074958252285e-06, + "loss": 0.3171, + "step": 1927 + }, + { + "epoch": 0.038595701023446685, + "grad_norm": 1.168458104133606, + "learning_rate": 9.99806595280196e-06, + "loss": 0.3151, + "step": 1928 + }, + { + "epoch": 0.03861571954057503, + "grad_norm": 1.072303056716919, + "learning_rate": 9.998056926340842e-06, + "loss": 0.3896, + "step": 1929 + }, + { + "epoch": 0.03863573805770337, + "grad_norm": 0.9045822620391846, + "learning_rate": 9.998047878868967e-06, + "loss": 0.2535, + "step": 1930 + }, + { + "epoch": 0.038655756574831716, + "grad_norm": 1.0306459665298462, + "learning_rate": 9.998038810386374e-06, + "loss": 0.3247, + "step": 1931 + }, + { + "epoch": 0.03867577509196006, + "grad_norm": 1.2243311405181885, + "learning_rate": 9.998029720893101e-06, + "loss": 0.3586, + "step": 1932 + }, + { + "epoch": 0.038695793609088404, + "grad_norm": 1.2201858758926392, + "learning_rate": 9.998020610389187e-06, + "loss": 0.321, + "step": 1933 + }, + { + "epoch": 0.03871581212621675, + "grad_norm": 1.917679786682129, + "learning_rate": 9.998011478874673e-06, + "loss": 0.8417, + "step": 1934 + }, + { + "epoch": 0.03873583064334509, + "grad_norm": 1.0557212829589844, + "learning_rate": 9.99800232634959e-06, + "loss": 0.327, + "step": 1935 + }, + { + "epoch": 0.038755849160473435, + "grad_norm": 1.1097906827926636, + "learning_rate": 9.997993152813982e-06, + "loss": 0.3782, + "step": 1936 + }, + { + "epoch": 0.03877586767760178, + "grad_norm": 1.1330056190490723, + "learning_rate": 9.997983958267889e-06, + "loss": 0.3545, + "step": 1937 + }, + { + "epoch": 0.03879588619473012, + "grad_norm": 1.031516671180725, + "learning_rate": 9.997974742711345e-06, + "loss": 0.3497, + "step": 1938 + }, + { + "epoch": 0.038815904711858466, + "grad_norm": 1.1789659261703491, + "learning_rate": 9.997965506144391e-06, + "loss": 0.3374, + "step": 1939 + }, + { + "epoch": 0.03883592322898681, + "grad_norm": 1.2068508863449097, + "learning_rate": 9.997956248567064e-06, + "loss": 0.3316, + "step": 1940 + }, + { + "epoch": 0.038855941746115154, + "grad_norm": 1.0916774272918701, + "learning_rate": 9.997946969979406e-06, + "loss": 0.3642, + "step": 1941 + }, + { + "epoch": 0.0388759602632435, + "grad_norm": 1.0502979755401611, + "learning_rate": 9.997937670381454e-06, + "loss": 0.3445, + "step": 1942 + }, + { + "epoch": 0.03889597878037184, + "grad_norm": 1.7982829809188843, + "learning_rate": 9.99792834977325e-06, + "loss": 0.9314, + "step": 1943 + }, + { + "epoch": 0.038915997297500185, + "grad_norm": 1.1371873617172241, + "learning_rate": 9.997919008154828e-06, + "loss": 0.2872, + "step": 1944 + }, + { + "epoch": 0.03893601581462853, + "grad_norm": 1.9858472347259521, + "learning_rate": 9.99790964552623e-06, + "loss": 0.8057, + "step": 1945 + }, + { + "epoch": 0.03895603433175687, + "grad_norm": 1.7085695266723633, + "learning_rate": 9.997900261887496e-06, + "loss": 0.8697, + "step": 1946 + }, + { + "epoch": 0.038976052848885216, + "grad_norm": 1.0228829383850098, + "learning_rate": 9.997890857238665e-06, + "loss": 0.3387, + "step": 1947 + }, + { + "epoch": 0.03899607136601356, + "grad_norm": 1.0423047542572021, + "learning_rate": 9.997881431579776e-06, + "loss": 0.3752, + "step": 1948 + }, + { + "epoch": 0.039016089883141904, + "grad_norm": 1.1670204401016235, + "learning_rate": 9.99787198491087e-06, + "loss": 0.3665, + "step": 1949 + }, + { + "epoch": 0.03903610840027025, + "grad_norm": 1.0919054746627808, + "learning_rate": 9.997862517231986e-06, + "loss": 0.3019, + "step": 1950 + }, + { + "epoch": 0.03905612691739859, + "grad_norm": 1.0267773866653442, + "learning_rate": 9.997853028543163e-06, + "loss": 0.327, + "step": 1951 + }, + { + "epoch": 0.039076145434526935, + "grad_norm": 1.1112818717956543, + "learning_rate": 9.997843518844442e-06, + "loss": 0.3702, + "step": 1952 + }, + { + "epoch": 0.03909616395165528, + "grad_norm": 1.0872013568878174, + "learning_rate": 9.997833988135861e-06, + "loss": 0.308, + "step": 1953 + }, + { + "epoch": 0.03911618246878362, + "grad_norm": 1.1673612594604492, + "learning_rate": 9.997824436417461e-06, + "loss": 0.3628, + "step": 1954 + }, + { + "epoch": 0.039136200985911966, + "grad_norm": 1.1233820915222168, + "learning_rate": 9.997814863689284e-06, + "loss": 0.337, + "step": 1955 + }, + { + "epoch": 0.03915621950304031, + "grad_norm": 1.0386592149734497, + "learning_rate": 9.997805269951369e-06, + "loss": 0.3105, + "step": 1956 + }, + { + "epoch": 0.039176238020168654, + "grad_norm": 1.0999335050582886, + "learning_rate": 9.997795655203755e-06, + "loss": 0.3312, + "step": 1957 + }, + { + "epoch": 0.039196256537297, + "grad_norm": 1.1270971298217773, + "learning_rate": 9.997786019446485e-06, + "loss": 0.367, + "step": 1958 + }, + { + "epoch": 0.03921627505442534, + "grad_norm": 0.9849525094032288, + "learning_rate": 9.997776362679596e-06, + "loss": 0.3464, + "step": 1959 + }, + { + "epoch": 0.039236293571553685, + "grad_norm": 1.0669937133789062, + "learning_rate": 9.997766684903133e-06, + "loss": 0.3607, + "step": 1960 + }, + { + "epoch": 0.03925631208868203, + "grad_norm": 1.2137843370437622, + "learning_rate": 9.997756986117133e-06, + "loss": 0.3964, + "step": 1961 + }, + { + "epoch": 0.03927633060581037, + "grad_norm": 1.0993109941482544, + "learning_rate": 9.99774726632164e-06, + "loss": 0.3431, + "step": 1962 + }, + { + "epoch": 0.039296349122938716, + "grad_norm": 1.1633988618850708, + "learning_rate": 9.997737525516691e-06, + "loss": 0.3479, + "step": 1963 + }, + { + "epoch": 0.03931636764006706, + "grad_norm": 1.1080349683761597, + "learning_rate": 9.99772776370233e-06, + "loss": 0.3587, + "step": 1964 + }, + { + "epoch": 0.039336386157195403, + "grad_norm": 1.1060230731964111, + "learning_rate": 9.997717980878596e-06, + "loss": 0.3708, + "step": 1965 + }, + { + "epoch": 0.03935640467432375, + "grad_norm": 0.971972644329071, + "learning_rate": 9.997708177045532e-06, + "loss": 0.3093, + "step": 1966 + }, + { + "epoch": 0.03937642319145209, + "grad_norm": 1.1188706159591675, + "learning_rate": 9.997698352203178e-06, + "loss": 0.3699, + "step": 1967 + }, + { + "epoch": 0.039396441708580435, + "grad_norm": 1.0239676237106323, + "learning_rate": 9.997688506351576e-06, + "loss": 0.3329, + "step": 1968 + }, + { + "epoch": 0.03941646022570878, + "grad_norm": 1.0810621976852417, + "learning_rate": 9.997678639490767e-06, + "loss": 0.3577, + "step": 1969 + }, + { + "epoch": 0.03943647874283712, + "grad_norm": 1.221549391746521, + "learning_rate": 9.997668751620793e-06, + "loss": 0.3727, + "step": 1970 + }, + { + "epoch": 0.039456497259965466, + "grad_norm": 1.1589019298553467, + "learning_rate": 9.997658842741693e-06, + "loss": 0.3174, + "step": 1971 + }, + { + "epoch": 0.03947651577709381, + "grad_norm": 1.101334810256958, + "learning_rate": 9.997648912853513e-06, + "loss": 0.307, + "step": 1972 + }, + { + "epoch": 0.03949653429422215, + "grad_norm": 1.025956630706787, + "learning_rate": 9.99763896195629e-06, + "loss": 0.3471, + "step": 1973 + }, + { + "epoch": 0.0395165528113505, + "grad_norm": 1.2714897394180298, + "learning_rate": 9.997628990050072e-06, + "loss": 0.295, + "step": 1974 + }, + { + "epoch": 0.03953657132847884, + "grad_norm": 1.0914175510406494, + "learning_rate": 9.997618997134895e-06, + "loss": 0.3362, + "step": 1975 + }, + { + "epoch": 0.039556589845607185, + "grad_norm": 1.0275261402130127, + "learning_rate": 9.997608983210803e-06, + "loss": 0.3227, + "step": 1976 + }, + { + "epoch": 0.03957660836273553, + "grad_norm": 1.1896603107452393, + "learning_rate": 9.997598948277838e-06, + "loss": 0.3358, + "step": 1977 + }, + { + "epoch": 0.03959662687986387, + "grad_norm": 1.016655683517456, + "learning_rate": 9.997588892336043e-06, + "loss": 0.2834, + "step": 1978 + }, + { + "epoch": 0.039616645396992216, + "grad_norm": 1.1779074668884277, + "learning_rate": 9.99757881538546e-06, + "loss": 0.3619, + "step": 1979 + }, + { + "epoch": 0.03963666391412056, + "grad_norm": 1.0258437395095825, + "learning_rate": 9.997568717426132e-06, + "loss": 0.3335, + "step": 1980 + }, + { + "epoch": 0.0396566824312489, + "grad_norm": 1.1420619487762451, + "learning_rate": 9.997558598458099e-06, + "loss": 0.306, + "step": 1981 + }, + { + "epoch": 0.03967670094837725, + "grad_norm": 1.2195706367492676, + "learning_rate": 9.997548458481406e-06, + "loss": 0.3303, + "step": 1982 + }, + { + "epoch": 0.03969671946550559, + "grad_norm": 1.3050674200057983, + "learning_rate": 9.997538297496095e-06, + "loss": 0.3284, + "step": 1983 + }, + { + "epoch": 0.039716737982633935, + "grad_norm": 1.0905691385269165, + "learning_rate": 9.99752811550221e-06, + "loss": 0.3353, + "step": 1984 + }, + { + "epoch": 0.03973675649976228, + "grad_norm": 1.853681206703186, + "learning_rate": 9.997517912499789e-06, + "loss": 0.9169, + "step": 1985 + }, + { + "epoch": 0.03975677501689062, + "grad_norm": 1.0105738639831543, + "learning_rate": 9.997507688488879e-06, + "loss": 0.2998, + "step": 1986 + }, + { + "epoch": 0.039776793534018966, + "grad_norm": 1.2309454679489136, + "learning_rate": 9.997497443469524e-06, + "loss": 0.3303, + "step": 1987 + }, + { + "epoch": 0.03979681205114731, + "grad_norm": 1.090983271598816, + "learning_rate": 9.997487177441762e-06, + "loss": 0.3493, + "step": 1988 + }, + { + "epoch": 0.03981683056827565, + "grad_norm": 1.1623830795288086, + "learning_rate": 9.997476890405642e-06, + "loss": 0.35, + "step": 1989 + }, + { + "epoch": 0.039836849085404, + "grad_norm": 1.057342290878296, + "learning_rate": 9.997466582361203e-06, + "loss": 0.3697, + "step": 1990 + }, + { + "epoch": 0.03985686760253234, + "grad_norm": 1.0644198656082153, + "learning_rate": 9.997456253308493e-06, + "loss": 0.3187, + "step": 1991 + }, + { + "epoch": 0.039876886119660684, + "grad_norm": 0.9905849099159241, + "learning_rate": 9.997445903247549e-06, + "loss": 0.3048, + "step": 1992 + }, + { + "epoch": 0.03989690463678903, + "grad_norm": 1.9988597631454468, + "learning_rate": 9.99743553217842e-06, + "loss": 0.8698, + "step": 1993 + }, + { + "epoch": 0.03991692315391737, + "grad_norm": 1.0989331007003784, + "learning_rate": 9.997425140101147e-06, + "loss": 0.3501, + "step": 1994 + }, + { + "epoch": 0.039936941671045716, + "grad_norm": 1.137603998184204, + "learning_rate": 9.997414727015773e-06, + "loss": 0.3053, + "step": 1995 + }, + { + "epoch": 0.03995696018817406, + "grad_norm": 1.0732382535934448, + "learning_rate": 9.997404292922344e-06, + "loss": 0.329, + "step": 1996 + }, + { + "epoch": 0.0399769787053024, + "grad_norm": 1.0057621002197266, + "learning_rate": 9.997393837820904e-06, + "loss": 0.3352, + "step": 1997 + }, + { + "epoch": 0.03999699722243075, + "grad_norm": 1.0217247009277344, + "learning_rate": 9.997383361711492e-06, + "loss": 0.354, + "step": 1998 + }, + { + "epoch": 0.04001701573955909, + "grad_norm": 1.0577061176300049, + "learning_rate": 9.99737286459416e-06, + "loss": 0.3296, + "step": 1999 + }, + { + "epoch": 0.040037034256687434, + "grad_norm": 1.8342037200927734, + "learning_rate": 9.997362346468945e-06, + "loss": 0.8594, + "step": 2000 + }, + { + "epoch": 0.04005705277381578, + "grad_norm": 1.7669711112976074, + "learning_rate": 9.997351807335896e-06, + "loss": 0.8326, + "step": 2001 + }, + { + "epoch": 0.04007707129094412, + "grad_norm": 1.0924798250198364, + "learning_rate": 9.997341247195053e-06, + "loss": 0.3113, + "step": 2002 + }, + { + "epoch": 0.040097089808072466, + "grad_norm": 1.1210696697235107, + "learning_rate": 9.997330666046466e-06, + "loss": 0.3004, + "step": 2003 + }, + { + "epoch": 0.04011710832520081, + "grad_norm": 1.0665093660354614, + "learning_rate": 9.997320063890175e-06, + "loss": 0.3466, + "step": 2004 + }, + { + "epoch": 0.04013712684232915, + "grad_norm": 1.0429553985595703, + "learning_rate": 9.997309440726224e-06, + "loss": 0.2772, + "step": 2005 + }, + { + "epoch": 0.0401571453594575, + "grad_norm": 1.2243150472640991, + "learning_rate": 9.997298796554662e-06, + "loss": 0.3509, + "step": 2006 + }, + { + "epoch": 0.04017716387658584, + "grad_norm": 1.8147337436676025, + "learning_rate": 9.99728813137553e-06, + "loss": 0.8397, + "step": 2007 + }, + { + "epoch": 0.040197182393714184, + "grad_norm": 1.8658524751663208, + "learning_rate": 9.997277445188876e-06, + "loss": 0.8662, + "step": 2008 + }, + { + "epoch": 0.04021720091084253, + "grad_norm": 1.076796054840088, + "learning_rate": 9.997266737994742e-06, + "loss": 0.3425, + "step": 2009 + }, + { + "epoch": 0.04023721942797087, + "grad_norm": 0.9855514168739319, + "learning_rate": 9.997256009793173e-06, + "loss": 0.3186, + "step": 2010 + }, + { + "epoch": 0.040257237945099215, + "grad_norm": 1.0373985767364502, + "learning_rate": 9.997245260584216e-06, + "loss": 0.3209, + "step": 2011 + }, + { + "epoch": 0.04027725646222756, + "grad_norm": 1.2157648801803589, + "learning_rate": 9.997234490367915e-06, + "loss": 0.3559, + "step": 2012 + }, + { + "epoch": 0.0402972749793559, + "grad_norm": 1.7648773193359375, + "learning_rate": 9.997223699144316e-06, + "loss": 0.8338, + "step": 2013 + }, + { + "epoch": 0.04031729349648425, + "grad_norm": 1.3354415893554688, + "learning_rate": 9.997212886913463e-06, + "loss": 0.2972, + "step": 2014 + }, + { + "epoch": 0.04033731201361259, + "grad_norm": 0.956444501876831, + "learning_rate": 9.997202053675404e-06, + "loss": 0.305, + "step": 2015 + }, + { + "epoch": 0.040357330530740934, + "grad_norm": 0.94142746925354, + "learning_rate": 9.997191199430182e-06, + "loss": 0.3065, + "step": 2016 + }, + { + "epoch": 0.04037734904786928, + "grad_norm": 1.0357478857040405, + "learning_rate": 9.997180324177844e-06, + "loss": 0.2647, + "step": 2017 + }, + { + "epoch": 0.04039736756499762, + "grad_norm": 1.9409213066101074, + "learning_rate": 9.997169427918436e-06, + "loss": 0.9002, + "step": 2018 + }, + { + "epoch": 0.040417386082125965, + "grad_norm": 1.126991629600525, + "learning_rate": 9.997158510652001e-06, + "loss": 0.3726, + "step": 2019 + }, + { + "epoch": 0.04043740459925431, + "grad_norm": 1.1374036073684692, + "learning_rate": 9.997147572378589e-06, + "loss": 0.3316, + "step": 2020 + }, + { + "epoch": 0.04045742311638265, + "grad_norm": 1.1287643909454346, + "learning_rate": 9.997136613098244e-06, + "loss": 0.3193, + "step": 2021 + }, + { + "epoch": 0.040477441633511, + "grad_norm": 1.0224415063858032, + "learning_rate": 9.997125632811011e-06, + "loss": 0.3262, + "step": 2022 + }, + { + "epoch": 0.04049746015063934, + "grad_norm": 0.9906805157661438, + "learning_rate": 9.997114631516938e-06, + "loss": 0.3232, + "step": 2023 + }, + { + "epoch": 0.040517478667767684, + "grad_norm": 0.9763866662979126, + "learning_rate": 9.99710360921607e-06, + "loss": 0.2712, + "step": 2024 + }, + { + "epoch": 0.04053749718489603, + "grad_norm": 1.0383392572402954, + "learning_rate": 9.997092565908454e-06, + "loss": 0.3009, + "step": 2025 + }, + { + "epoch": 0.04055751570202437, + "grad_norm": 1.0793828964233398, + "learning_rate": 9.997081501594135e-06, + "loss": 0.3142, + "step": 2026 + }, + { + "epoch": 0.040577534219152715, + "grad_norm": 1.1233234405517578, + "learning_rate": 9.997070416273162e-06, + "loss": 0.3659, + "step": 2027 + }, + { + "epoch": 0.04059755273628106, + "grad_norm": 1.0533583164215088, + "learning_rate": 9.99705930994558e-06, + "loss": 0.3337, + "step": 2028 + }, + { + "epoch": 0.0406175712534094, + "grad_norm": 1.304885983467102, + "learning_rate": 9.997048182611437e-06, + "loss": 0.3529, + "step": 2029 + }, + { + "epoch": 0.040637589770537746, + "grad_norm": 0.990925669670105, + "learning_rate": 9.997037034270777e-06, + "loss": 0.3243, + "step": 2030 + }, + { + "epoch": 0.04065760828766609, + "grad_norm": 1.135526418685913, + "learning_rate": 9.997025864923651e-06, + "loss": 0.3428, + "step": 2031 + }, + { + "epoch": 0.040677626804794434, + "grad_norm": 1.134792685508728, + "learning_rate": 9.997014674570103e-06, + "loss": 0.4106, + "step": 2032 + }, + { + "epoch": 0.04069764532192278, + "grad_norm": 1.1669819355010986, + "learning_rate": 9.99700346321018e-06, + "loss": 0.349, + "step": 2033 + }, + { + "epoch": 0.04071766383905112, + "grad_norm": 0.9597114324569702, + "learning_rate": 9.99699223084393e-06, + "loss": 0.3822, + "step": 2034 + }, + { + "epoch": 0.040737682356179465, + "grad_norm": 1.093478798866272, + "learning_rate": 9.9969809774714e-06, + "loss": 0.3406, + "step": 2035 + }, + { + "epoch": 0.04075770087330781, + "grad_norm": 1.304222822189331, + "learning_rate": 9.996969703092637e-06, + "loss": 0.3309, + "step": 2036 + }, + { + "epoch": 0.04077771939043615, + "grad_norm": 1.0537159442901611, + "learning_rate": 9.996958407707689e-06, + "loss": 0.3076, + "step": 2037 + }, + { + "epoch": 0.040797737907564496, + "grad_norm": 1.283251404762268, + "learning_rate": 9.996947091316604e-06, + "loss": 0.2889, + "step": 2038 + }, + { + "epoch": 0.04081775642469284, + "grad_norm": 1.1270101070404053, + "learning_rate": 9.996935753919428e-06, + "loss": 0.3712, + "step": 2039 + }, + { + "epoch": 0.040837774941821184, + "grad_norm": 1.0973116159439087, + "learning_rate": 9.99692439551621e-06, + "loss": 0.2926, + "step": 2040 + }, + { + "epoch": 0.04085779345894953, + "grad_norm": 1.0844310522079468, + "learning_rate": 9.996913016106995e-06, + "loss": 0.3142, + "step": 2041 + }, + { + "epoch": 0.04087781197607787, + "grad_norm": 1.2656044960021973, + "learning_rate": 9.996901615691836e-06, + "loss": 0.3564, + "step": 2042 + }, + { + "epoch": 0.040897830493206215, + "grad_norm": 1.088368535041809, + "learning_rate": 9.996890194270776e-06, + "loss": 0.3514, + "step": 2043 + }, + { + "epoch": 0.04091784901033456, + "grad_norm": 1.0664342641830444, + "learning_rate": 9.996878751843866e-06, + "loss": 0.3222, + "step": 2044 + }, + { + "epoch": 0.0409378675274629, + "grad_norm": 1.142798900604248, + "learning_rate": 9.996867288411151e-06, + "loss": 0.367, + "step": 2045 + }, + { + "epoch": 0.040957886044591246, + "grad_norm": 1.1938608884811401, + "learning_rate": 9.996855803972685e-06, + "loss": 0.2932, + "step": 2046 + }, + { + "epoch": 0.04097790456171959, + "grad_norm": 1.1816320419311523, + "learning_rate": 9.99684429852851e-06, + "loss": 0.3038, + "step": 2047 + }, + { + "epoch": 0.040997923078847934, + "grad_norm": 1.014769434928894, + "learning_rate": 9.996832772078678e-06, + "loss": 0.329, + "step": 2048 + }, + { + "epoch": 0.04101794159597628, + "grad_norm": 1.1305902004241943, + "learning_rate": 9.996821224623234e-06, + "loss": 0.3125, + "step": 2049 + }, + { + "epoch": 0.04103796011310462, + "grad_norm": 1.8727554082870483, + "learning_rate": 9.996809656162231e-06, + "loss": 0.804, + "step": 2050 + }, + { + "epoch": 0.041057978630232965, + "grad_norm": 1.1007851362228394, + "learning_rate": 9.996798066695717e-06, + "loss": 0.3407, + "step": 2051 + }, + { + "epoch": 0.04107799714736131, + "grad_norm": 1.1723440885543823, + "learning_rate": 9.996786456223737e-06, + "loss": 0.2931, + "step": 2052 + }, + { + "epoch": 0.04109801566448965, + "grad_norm": 1.0916526317596436, + "learning_rate": 9.996774824746341e-06, + "loss": 0.3102, + "step": 2053 + }, + { + "epoch": 0.041118034181617996, + "grad_norm": 1.187381386756897, + "learning_rate": 9.996763172263582e-06, + "loss": 0.3279, + "step": 2054 + }, + { + "epoch": 0.04113805269874634, + "grad_norm": 0.9807420969009399, + "learning_rate": 9.996751498775503e-06, + "loss": 0.2896, + "step": 2055 + }, + { + "epoch": 0.041158071215874684, + "grad_norm": 1.0696611404418945, + "learning_rate": 9.996739804282158e-06, + "loss": 0.3314, + "step": 2056 + }, + { + "epoch": 0.04117808973300303, + "grad_norm": 1.2115075588226318, + "learning_rate": 9.996728088783593e-06, + "loss": 0.3035, + "step": 2057 + }, + { + "epoch": 0.04119810825013137, + "grad_norm": 1.1720521450042725, + "learning_rate": 9.99671635227986e-06, + "loss": 0.3427, + "step": 2058 + }, + { + "epoch": 0.041218126767259715, + "grad_norm": 1.0452213287353516, + "learning_rate": 9.996704594771006e-06, + "loss": 0.2984, + "step": 2059 + }, + { + "epoch": 0.04123814528438806, + "grad_norm": 1.086497187614441, + "learning_rate": 9.99669281625708e-06, + "loss": 0.3425, + "step": 2060 + }, + { + "epoch": 0.0412581638015164, + "grad_norm": 1.099318027496338, + "learning_rate": 9.996681016738133e-06, + "loss": 0.3127, + "step": 2061 + }, + { + "epoch": 0.041278182318644746, + "grad_norm": 1.9339369535446167, + "learning_rate": 9.996669196214215e-06, + "loss": 0.9025, + "step": 2062 + }, + { + "epoch": 0.04129820083577309, + "grad_norm": 1.0386745929718018, + "learning_rate": 9.996657354685375e-06, + "loss": 0.3436, + "step": 2063 + }, + { + "epoch": 0.041318219352901434, + "grad_norm": 1.1589102745056152, + "learning_rate": 9.996645492151663e-06, + "loss": 0.3433, + "step": 2064 + }, + { + "epoch": 0.04133823787002978, + "grad_norm": 0.9763083457946777, + "learning_rate": 9.996633608613129e-06, + "loss": 0.2954, + "step": 2065 + }, + { + "epoch": 0.04135825638715812, + "grad_norm": 1.0078392028808594, + "learning_rate": 9.996621704069823e-06, + "loss": 0.3078, + "step": 2066 + }, + { + "epoch": 0.041378274904286465, + "grad_norm": 1.0159573554992676, + "learning_rate": 9.996609778521792e-06, + "loss": 0.3226, + "step": 2067 + }, + { + "epoch": 0.04139829342141481, + "grad_norm": 1.0286202430725098, + "learning_rate": 9.99659783196909e-06, + "loss": 0.3214, + "step": 2068 + }, + { + "epoch": 0.04141831193854315, + "grad_norm": 1.8201864957809448, + "learning_rate": 9.996585864411767e-06, + "loss": 0.8859, + "step": 2069 + }, + { + "epoch": 0.041438330455671496, + "grad_norm": 1.2775133848190308, + "learning_rate": 9.996573875849872e-06, + "loss": 0.3341, + "step": 2070 + }, + { + "epoch": 0.04145834897279984, + "grad_norm": 1.1716541051864624, + "learning_rate": 9.996561866283453e-06, + "loss": 0.3304, + "step": 2071 + }, + { + "epoch": 0.04147836748992818, + "grad_norm": 1.3337781429290771, + "learning_rate": 9.996549835712568e-06, + "loss": 0.2644, + "step": 2072 + }, + { + "epoch": 0.04149838600705653, + "grad_norm": 1.1819106340408325, + "learning_rate": 9.996537784137259e-06, + "loss": 0.3326, + "step": 2073 + }, + { + "epoch": 0.04151840452418487, + "grad_norm": 1.11174738407135, + "learning_rate": 9.996525711557581e-06, + "loss": 0.3158, + "step": 2074 + }, + { + "epoch": 0.041538423041313215, + "grad_norm": 1.142469048500061, + "learning_rate": 9.996513617973587e-06, + "loss": 0.3326, + "step": 2075 + }, + { + "epoch": 0.04155844155844156, + "grad_norm": 1.7583271265029907, + "learning_rate": 9.996501503385322e-06, + "loss": 0.8672, + "step": 2076 + }, + { + "epoch": 0.0415784600755699, + "grad_norm": 1.1151862144470215, + "learning_rate": 9.99648936779284e-06, + "loss": 0.3547, + "step": 2077 + }, + { + "epoch": 0.041598478592698246, + "grad_norm": 1.1966886520385742, + "learning_rate": 9.996477211196194e-06, + "loss": 0.366, + "step": 2078 + }, + { + "epoch": 0.04161849710982659, + "grad_norm": 1.07932448387146, + "learning_rate": 9.996465033595432e-06, + "loss": 0.3183, + "step": 2079 + }, + { + "epoch": 0.04163851562695493, + "grad_norm": 1.1476505994796753, + "learning_rate": 9.996452834990606e-06, + "loss": 0.3241, + "step": 2080 + }, + { + "epoch": 0.04165853414408328, + "grad_norm": 0.9639757871627808, + "learning_rate": 9.996440615381766e-06, + "loss": 0.2994, + "step": 2081 + }, + { + "epoch": 0.04167855266121162, + "grad_norm": 1.0752438306808472, + "learning_rate": 9.996428374768967e-06, + "loss": 0.3071, + "step": 2082 + }, + { + "epoch": 0.041698571178339965, + "grad_norm": 1.131439208984375, + "learning_rate": 9.996416113152258e-06, + "loss": 0.3479, + "step": 2083 + }, + { + "epoch": 0.04171858969546831, + "grad_norm": 1.134437084197998, + "learning_rate": 9.996403830531691e-06, + "loss": 0.3215, + "step": 2084 + }, + { + "epoch": 0.04173860821259665, + "grad_norm": 1.0752251148223877, + "learning_rate": 9.996391526907318e-06, + "loss": 0.3706, + "step": 2085 + }, + { + "epoch": 0.041758626729724996, + "grad_norm": 1.156659483909607, + "learning_rate": 9.99637920227919e-06, + "loss": 0.3471, + "step": 2086 + }, + { + "epoch": 0.04177864524685334, + "grad_norm": 1.1182397603988647, + "learning_rate": 9.996366856647357e-06, + "loss": 0.3675, + "step": 2087 + }, + { + "epoch": 0.04179866376398168, + "grad_norm": 1.1533757448196411, + "learning_rate": 9.996354490011874e-06, + "loss": 0.3611, + "step": 2088 + }, + { + "epoch": 0.04181868228111003, + "grad_norm": 1.0101131200790405, + "learning_rate": 9.996342102372794e-06, + "loss": 0.3168, + "step": 2089 + }, + { + "epoch": 0.04183870079823837, + "grad_norm": 1.1425679922103882, + "learning_rate": 9.996329693730166e-06, + "loss": 0.3144, + "step": 2090 + }, + { + "epoch": 0.041858719315366714, + "grad_norm": 1.2029873132705688, + "learning_rate": 9.996317264084042e-06, + "loss": 0.3327, + "step": 2091 + }, + { + "epoch": 0.04187873783249506, + "grad_norm": 1.0979995727539062, + "learning_rate": 9.996304813434476e-06, + "loss": 0.3384, + "step": 2092 + }, + { + "epoch": 0.0418987563496234, + "grad_norm": 0.9580891728401184, + "learning_rate": 9.99629234178152e-06, + "loss": 0.2901, + "step": 2093 + }, + { + "epoch": 0.041918774866751746, + "grad_norm": 1.2427797317504883, + "learning_rate": 9.996279849125227e-06, + "loss": 0.3668, + "step": 2094 + }, + { + "epoch": 0.04193879338388009, + "grad_norm": 1.1077007055282593, + "learning_rate": 9.996267335465648e-06, + "loss": 0.3183, + "step": 2095 + }, + { + "epoch": 0.04195881190100843, + "grad_norm": 1.173149585723877, + "learning_rate": 9.996254800802836e-06, + "loss": 0.3515, + "step": 2096 + }, + { + "epoch": 0.04197883041813678, + "grad_norm": 1.0484726428985596, + "learning_rate": 9.996242245136845e-06, + "loss": 0.3328, + "step": 2097 + }, + { + "epoch": 0.04199884893526512, + "grad_norm": 1.3190069198608398, + "learning_rate": 9.996229668467725e-06, + "loss": 0.3611, + "step": 2098 + }, + { + "epoch": 0.042018867452393464, + "grad_norm": 1.1554067134857178, + "learning_rate": 9.996217070795531e-06, + "loss": 0.2894, + "step": 2099 + }, + { + "epoch": 0.04203888596952181, + "grad_norm": 1.243655800819397, + "learning_rate": 9.996204452120317e-06, + "loss": 0.3687, + "step": 2100 + }, + { + "epoch": 0.04205890448665015, + "grad_norm": 1.1341453790664673, + "learning_rate": 9.996191812442136e-06, + "loss": 0.3327, + "step": 2101 + }, + { + "epoch": 0.042078923003778496, + "grad_norm": 1.0719830989837646, + "learning_rate": 9.996179151761037e-06, + "loss": 0.3204, + "step": 2102 + }, + { + "epoch": 0.04209894152090684, + "grad_norm": 1.0886139869689941, + "learning_rate": 9.996166470077076e-06, + "loss": 0.341, + "step": 2103 + }, + { + "epoch": 0.04211896003803518, + "grad_norm": 1.0651167631149292, + "learning_rate": 9.996153767390307e-06, + "loss": 0.3166, + "step": 2104 + }, + { + "epoch": 0.04213897855516353, + "grad_norm": 0.9559372067451477, + "learning_rate": 9.996141043700784e-06, + "loss": 0.3142, + "step": 2105 + }, + { + "epoch": 0.04215899707229187, + "grad_norm": 1.0893781185150146, + "learning_rate": 9.996128299008559e-06, + "loss": 0.3266, + "step": 2106 + }, + { + "epoch": 0.042179015589420214, + "grad_norm": 1.1121478080749512, + "learning_rate": 9.996115533313684e-06, + "loss": 0.362, + "step": 2107 + }, + { + "epoch": 0.04219903410654856, + "grad_norm": 1.1473913192749023, + "learning_rate": 9.996102746616217e-06, + "loss": 0.3637, + "step": 2108 + }, + { + "epoch": 0.0422190526236769, + "grad_norm": 1.1801767349243164, + "learning_rate": 9.996089938916207e-06, + "loss": 0.3308, + "step": 2109 + }, + { + "epoch": 0.042239071140805245, + "grad_norm": 1.8738359212875366, + "learning_rate": 9.99607711021371e-06, + "loss": 0.892, + "step": 2110 + }, + { + "epoch": 0.04225908965793359, + "grad_norm": 1.0044337511062622, + "learning_rate": 9.996064260508782e-06, + "loss": 0.2932, + "step": 2111 + }, + { + "epoch": 0.04227910817506193, + "grad_norm": 1.0812182426452637, + "learning_rate": 9.996051389801474e-06, + "loss": 0.3298, + "step": 2112 + }, + { + "epoch": 0.04229912669219028, + "grad_norm": 1.0744026899337769, + "learning_rate": 9.99603849809184e-06, + "loss": 0.3247, + "step": 2113 + }, + { + "epoch": 0.04231914520931862, + "grad_norm": 1.1706432104110718, + "learning_rate": 9.996025585379935e-06, + "loss": 0.3591, + "step": 2114 + }, + { + "epoch": 0.042339163726446964, + "grad_norm": 1.0837723016738892, + "learning_rate": 9.996012651665816e-06, + "loss": 0.3506, + "step": 2115 + }, + { + "epoch": 0.04235918224357531, + "grad_norm": 1.211907148361206, + "learning_rate": 9.995999696949534e-06, + "loss": 0.2967, + "step": 2116 + }, + { + "epoch": 0.04237920076070365, + "grad_norm": 1.156572699546814, + "learning_rate": 9.995986721231144e-06, + "loss": 0.3121, + "step": 2117 + }, + { + "epoch": 0.042399219277831995, + "grad_norm": 1.0665194988250732, + "learning_rate": 9.995973724510702e-06, + "loss": 0.2708, + "step": 2118 + }, + { + "epoch": 0.04241923779496034, + "grad_norm": 1.095603346824646, + "learning_rate": 9.99596070678826e-06, + "loss": 0.2968, + "step": 2119 + }, + { + "epoch": 0.04243925631208868, + "grad_norm": 1.0536435842514038, + "learning_rate": 9.995947668063875e-06, + "loss": 0.3199, + "step": 2120 + }, + { + "epoch": 0.04245927482921703, + "grad_norm": 1.1007959842681885, + "learning_rate": 9.9959346083376e-06, + "loss": 0.3345, + "step": 2121 + }, + { + "epoch": 0.04247929334634537, + "grad_norm": 1.1373059749603271, + "learning_rate": 9.995921527609494e-06, + "loss": 0.3316, + "step": 2122 + }, + { + "epoch": 0.042499311863473714, + "grad_norm": 1.8625472784042358, + "learning_rate": 9.995908425879608e-06, + "loss": 0.9512, + "step": 2123 + }, + { + "epoch": 0.04251933038060206, + "grad_norm": 1.085578203201294, + "learning_rate": 9.995895303147996e-06, + "loss": 0.2846, + "step": 2124 + }, + { + "epoch": 0.0425393488977304, + "grad_norm": 1.0977070331573486, + "learning_rate": 9.995882159414719e-06, + "loss": 0.2972, + "step": 2125 + }, + { + "epoch": 0.042559367414858745, + "grad_norm": 1.1224850416183472, + "learning_rate": 9.995868994679826e-06, + "loss": 0.3257, + "step": 2126 + }, + { + "epoch": 0.04257938593198709, + "grad_norm": 1.0647398233413696, + "learning_rate": 9.995855808943374e-06, + "loss": 0.347, + "step": 2127 + }, + { + "epoch": 0.04259940444911543, + "grad_norm": 1.3186900615692139, + "learning_rate": 9.995842602205421e-06, + "loss": 0.3437, + "step": 2128 + }, + { + "epoch": 0.042619422966243777, + "grad_norm": 1.0426546335220337, + "learning_rate": 9.99582937446602e-06, + "loss": 0.3381, + "step": 2129 + }, + { + "epoch": 0.04263944148337212, + "grad_norm": 1.1722526550292969, + "learning_rate": 9.995816125725228e-06, + "loss": 0.3158, + "step": 2130 + }, + { + "epoch": 0.042659460000500464, + "grad_norm": 1.1162172555923462, + "learning_rate": 9.9958028559831e-06, + "loss": 0.3782, + "step": 2131 + }, + { + "epoch": 0.04267947851762881, + "grad_norm": 1.20915687084198, + "learning_rate": 9.99578956523969e-06, + "loss": 0.3359, + "step": 2132 + }, + { + "epoch": 0.04269949703475715, + "grad_norm": 1.148875117301941, + "learning_rate": 9.995776253495058e-06, + "loss": 0.3309, + "step": 2133 + }, + { + "epoch": 0.042719515551885495, + "grad_norm": 1.318799376487732, + "learning_rate": 9.995762920749258e-06, + "loss": 0.3403, + "step": 2134 + }, + { + "epoch": 0.04273953406901384, + "grad_norm": 1.1337841749191284, + "learning_rate": 9.995749567002344e-06, + "loss": 0.3655, + "step": 2135 + }, + { + "epoch": 0.04275955258614218, + "grad_norm": 1.1447855234146118, + "learning_rate": 9.995736192254375e-06, + "loss": 0.3352, + "step": 2136 + }, + { + "epoch": 0.042779571103270526, + "grad_norm": 1.0970243215560913, + "learning_rate": 9.995722796505408e-06, + "loss": 0.3194, + "step": 2137 + }, + { + "epoch": 0.04279958962039887, + "grad_norm": 1.033121109008789, + "learning_rate": 9.995709379755493e-06, + "loss": 0.3411, + "step": 2138 + }, + { + "epoch": 0.042819608137527214, + "grad_norm": 1.0387176275253296, + "learning_rate": 9.995695942004695e-06, + "loss": 0.3496, + "step": 2139 + }, + { + "epoch": 0.04283962665465556, + "grad_norm": 1.0988421440124512, + "learning_rate": 9.995682483253065e-06, + "loss": 0.3768, + "step": 2140 + }, + { + "epoch": 0.0428596451717839, + "grad_norm": 2.0165505409240723, + "learning_rate": 9.99566900350066e-06, + "loss": 0.8312, + "step": 2141 + }, + { + "epoch": 0.042879663688912245, + "grad_norm": 1.0989900827407837, + "learning_rate": 9.99565550274754e-06, + "loss": 0.3806, + "step": 2142 + }, + { + "epoch": 0.04289968220604059, + "grad_norm": 1.1474913358688354, + "learning_rate": 9.995641980993756e-06, + "loss": 0.3341, + "step": 2143 + }, + { + "epoch": 0.04291970072316893, + "grad_norm": 1.8770421743392944, + "learning_rate": 9.99562843823937e-06, + "loss": 0.8602, + "step": 2144 + }, + { + "epoch": 0.042939719240297276, + "grad_norm": 1.0543153285980225, + "learning_rate": 9.995614874484436e-06, + "loss": 0.3219, + "step": 2145 + }, + { + "epoch": 0.04295973775742562, + "grad_norm": 1.2237838506698608, + "learning_rate": 9.995601289729013e-06, + "loss": 0.2812, + "step": 2146 + }, + { + "epoch": 0.042979756274553964, + "grad_norm": 1.3404568433761597, + "learning_rate": 9.995587683973158e-06, + "loss": 0.3712, + "step": 2147 + }, + { + "epoch": 0.04299977479168231, + "grad_norm": 1.2128270864486694, + "learning_rate": 9.995574057216927e-06, + "loss": 0.3069, + "step": 2148 + }, + { + "epoch": 0.04301979330881065, + "grad_norm": 1.0244061946868896, + "learning_rate": 9.995560409460377e-06, + "loss": 0.3273, + "step": 2149 + }, + { + "epoch": 0.043039811825938995, + "grad_norm": 2.1715176105499268, + "learning_rate": 9.995546740703567e-06, + "loss": 0.9246, + "step": 2150 + }, + { + "epoch": 0.04305983034306734, + "grad_norm": 1.0126855373382568, + "learning_rate": 9.995533050946553e-06, + "loss": 0.3168, + "step": 2151 + }, + { + "epoch": 0.04307984886019568, + "grad_norm": 1.1287282705307007, + "learning_rate": 9.995519340189392e-06, + "loss": 0.3395, + "step": 2152 + }, + { + "epoch": 0.043099867377324026, + "grad_norm": 1.8463242053985596, + "learning_rate": 9.995505608432146e-06, + "loss": 0.8653, + "step": 2153 + }, + { + "epoch": 0.04311988589445237, + "grad_norm": 1.9617253541946411, + "learning_rate": 9.995491855674865e-06, + "loss": 0.8264, + "step": 2154 + }, + { + "epoch": 0.043139904411580714, + "grad_norm": 1.1780502796173096, + "learning_rate": 9.995478081917616e-06, + "loss": 0.3428, + "step": 2155 + }, + { + "epoch": 0.04315992292870906, + "grad_norm": 1.0482494831085205, + "learning_rate": 9.995464287160448e-06, + "loss": 0.3431, + "step": 2156 + }, + { + "epoch": 0.0431799414458374, + "grad_norm": 1.0676642656326294, + "learning_rate": 9.995450471403425e-06, + "loss": 0.3051, + "step": 2157 + }, + { + "epoch": 0.043199959962965745, + "grad_norm": 1.125418782234192, + "learning_rate": 9.995436634646604e-06, + "loss": 0.3302, + "step": 2158 + }, + { + "epoch": 0.04321997848009409, + "grad_norm": 1.1792640686035156, + "learning_rate": 9.995422776890043e-06, + "loss": 0.3401, + "step": 2159 + }, + { + "epoch": 0.04323999699722243, + "grad_norm": 1.0987868309020996, + "learning_rate": 9.995408898133797e-06, + "loss": 0.3447, + "step": 2160 + }, + { + "epoch": 0.043260015514350776, + "grad_norm": 1.0175203084945679, + "learning_rate": 9.995394998377929e-06, + "loss": 0.2811, + "step": 2161 + }, + { + "epoch": 0.04328003403147912, + "grad_norm": 1.0411385297775269, + "learning_rate": 9.995381077622495e-06, + "loss": 0.3031, + "step": 2162 + }, + { + "epoch": 0.043300052548607464, + "grad_norm": 1.0402593612670898, + "learning_rate": 9.995367135867553e-06, + "loss": 0.2934, + "step": 2163 + }, + { + "epoch": 0.04332007106573581, + "grad_norm": 1.0623233318328857, + "learning_rate": 9.995353173113164e-06, + "loss": 0.3186, + "step": 2164 + }, + { + "epoch": 0.04334008958286415, + "grad_norm": 1.1399996280670166, + "learning_rate": 9.995339189359385e-06, + "loss": 0.35, + "step": 2165 + }, + { + "epoch": 0.043360108099992495, + "grad_norm": 2.009143352508545, + "learning_rate": 9.995325184606275e-06, + "loss": 0.8905, + "step": 2166 + }, + { + "epoch": 0.04338012661712084, + "grad_norm": 1.0238271951675415, + "learning_rate": 9.995311158853891e-06, + "loss": 0.3464, + "step": 2167 + }, + { + "epoch": 0.04340014513424918, + "grad_norm": 1.0825005769729614, + "learning_rate": 9.995297112102296e-06, + "loss": 0.3388, + "step": 2168 + }, + { + "epoch": 0.043420163651377526, + "grad_norm": 1.193447470664978, + "learning_rate": 9.995283044351547e-06, + "loss": 0.3411, + "step": 2169 + }, + { + "epoch": 0.04344018216850587, + "grad_norm": 1.750802993774414, + "learning_rate": 9.995268955601702e-06, + "loss": 0.8893, + "step": 2170 + }, + { + "epoch": 0.043460200685634214, + "grad_norm": 1.2286944389343262, + "learning_rate": 9.995254845852822e-06, + "loss": 0.319, + "step": 2171 + }, + { + "epoch": 0.04348021920276256, + "grad_norm": 1.4605220556259155, + "learning_rate": 9.995240715104965e-06, + "loss": 0.3603, + "step": 2172 + }, + { + "epoch": 0.0435002377198909, + "grad_norm": 1.128003478050232, + "learning_rate": 9.995226563358191e-06, + "loss": 0.3346, + "step": 2173 + }, + { + "epoch": 0.043520256237019245, + "grad_norm": 1.0322375297546387, + "learning_rate": 9.99521239061256e-06, + "loss": 0.3358, + "step": 2174 + }, + { + "epoch": 0.04354027475414759, + "grad_norm": 1.7913432121276855, + "learning_rate": 9.995198196868133e-06, + "loss": 0.7901, + "step": 2175 + }, + { + "epoch": 0.04356029327127593, + "grad_norm": 1.0580177307128906, + "learning_rate": 9.995183982124965e-06, + "loss": 0.3047, + "step": 2176 + }, + { + "epoch": 0.043580311788404276, + "grad_norm": 1.8889857530593872, + "learning_rate": 9.99516974638312e-06, + "loss": 0.9895, + "step": 2177 + }, + { + "epoch": 0.04360033030553262, + "grad_norm": 1.0959181785583496, + "learning_rate": 9.995155489642654e-06, + "loss": 0.3435, + "step": 2178 + }, + { + "epoch": 0.04362034882266096, + "grad_norm": 1.0850383043289185, + "learning_rate": 9.995141211903632e-06, + "loss": 0.3376, + "step": 2179 + }, + { + "epoch": 0.04364036733978931, + "grad_norm": 1.1642390489578247, + "learning_rate": 9.995126913166112e-06, + "loss": 0.3374, + "step": 2180 + }, + { + "epoch": 0.04366038585691765, + "grad_norm": 1.0845831632614136, + "learning_rate": 9.99511259343015e-06, + "loss": 0.3465, + "step": 2181 + }, + { + "epoch": 0.043680404374045995, + "grad_norm": 1.080854892730713, + "learning_rate": 9.995098252695812e-06, + "loss": 0.3224, + "step": 2182 + }, + { + "epoch": 0.04370042289117434, + "grad_norm": 1.8591818809509277, + "learning_rate": 9.995083890963156e-06, + "loss": 0.8572, + "step": 2183 + }, + { + "epoch": 0.04372044140830268, + "grad_norm": 1.116549015045166, + "learning_rate": 9.99506950823224e-06, + "loss": 0.3466, + "step": 2184 + }, + { + "epoch": 0.043740459925431026, + "grad_norm": 1.0959951877593994, + "learning_rate": 9.995055104503129e-06, + "loss": 0.357, + "step": 2185 + }, + { + "epoch": 0.04376047844255937, + "grad_norm": 1.1842494010925293, + "learning_rate": 9.99504067977588e-06, + "loss": 0.334, + "step": 2186 + }, + { + "epoch": 0.04378049695968771, + "grad_norm": 1.1027365922927856, + "learning_rate": 9.995026234050556e-06, + "loss": 0.3295, + "step": 2187 + }, + { + "epoch": 0.04380051547681606, + "grad_norm": 1.093526005744934, + "learning_rate": 9.995011767327216e-06, + "loss": 0.3649, + "step": 2188 + }, + { + "epoch": 0.0438205339939444, + "grad_norm": 1.0740318298339844, + "learning_rate": 9.994997279605922e-06, + "loss": 0.3671, + "step": 2189 + }, + { + "epoch": 0.043840552511072745, + "grad_norm": 1.132738471031189, + "learning_rate": 9.994982770886734e-06, + "loss": 0.3583, + "step": 2190 + }, + { + "epoch": 0.04386057102820109, + "grad_norm": 1.1455227136611938, + "learning_rate": 9.994968241169714e-06, + "loss": 0.3026, + "step": 2191 + }, + { + "epoch": 0.04388058954532943, + "grad_norm": 1.2176604270935059, + "learning_rate": 9.99495369045492e-06, + "loss": 0.3079, + "step": 2192 + }, + { + "epoch": 0.043900608062457776, + "grad_norm": 1.1060655117034912, + "learning_rate": 9.994939118742416e-06, + "loss": 0.3083, + "step": 2193 + }, + { + "epoch": 0.04392062657958612, + "grad_norm": 1.2281713485717773, + "learning_rate": 9.994924526032265e-06, + "loss": 0.3606, + "step": 2194 + }, + { + "epoch": 0.04394064509671446, + "grad_norm": 1.1379424333572388, + "learning_rate": 9.994909912324526e-06, + "loss": 0.3518, + "step": 2195 + }, + { + "epoch": 0.04396066361384281, + "grad_norm": 1.1379344463348389, + "learning_rate": 9.994895277619261e-06, + "loss": 0.3299, + "step": 2196 + }, + { + "epoch": 0.04398068213097115, + "grad_norm": 1.2044591903686523, + "learning_rate": 9.99488062191653e-06, + "loss": 0.3809, + "step": 2197 + }, + { + "epoch": 0.044000700648099494, + "grad_norm": 1.2266852855682373, + "learning_rate": 9.994865945216396e-06, + "loss": 0.341, + "step": 2198 + }, + { + "epoch": 0.04402071916522784, + "grad_norm": 1.0619769096374512, + "learning_rate": 9.99485124751892e-06, + "loss": 0.3431, + "step": 2199 + }, + { + "epoch": 0.04404073768235618, + "grad_norm": 1.1526741981506348, + "learning_rate": 9.994836528824166e-06, + "loss": 0.3444, + "step": 2200 + }, + { + "epoch": 0.044060756199484526, + "grad_norm": 1.1036101579666138, + "learning_rate": 9.994821789132191e-06, + "loss": 0.3603, + "step": 2201 + }, + { + "epoch": 0.04408077471661287, + "grad_norm": 1.1434147357940674, + "learning_rate": 9.994807028443064e-06, + "loss": 0.3005, + "step": 2202 + }, + { + "epoch": 0.04410079323374121, + "grad_norm": 1.8312723636627197, + "learning_rate": 9.994792246756841e-06, + "loss": 0.8924, + "step": 2203 + }, + { + "epoch": 0.04412081175086956, + "grad_norm": 1.0785163640975952, + "learning_rate": 9.994777444073587e-06, + "loss": 0.3579, + "step": 2204 + }, + { + "epoch": 0.0441408302679979, + "grad_norm": 1.1298187971115112, + "learning_rate": 9.994762620393362e-06, + "loss": 0.3462, + "step": 2205 + }, + { + "epoch": 0.044160848785126244, + "grad_norm": 1.0004268884658813, + "learning_rate": 9.994747775716232e-06, + "loss": 0.306, + "step": 2206 + }, + { + "epoch": 0.04418086730225459, + "grad_norm": 1.1139830350875854, + "learning_rate": 9.994732910042255e-06, + "loss": 0.3709, + "step": 2207 + }, + { + "epoch": 0.04420088581938293, + "grad_norm": 1.1851303577423096, + "learning_rate": 9.994718023371498e-06, + "loss": 0.2783, + "step": 2208 + }, + { + "epoch": 0.044220904336511276, + "grad_norm": 1.0699018239974976, + "learning_rate": 9.994703115704019e-06, + "loss": 0.3328, + "step": 2209 + }, + { + "epoch": 0.04424092285363962, + "grad_norm": 1.1810691356658936, + "learning_rate": 9.994688187039884e-06, + "loss": 0.3075, + "step": 2210 + }, + { + "epoch": 0.04426094137076796, + "grad_norm": 1.108271837234497, + "learning_rate": 9.994673237379156e-06, + "loss": 0.3449, + "step": 2211 + }, + { + "epoch": 0.04428095988789631, + "grad_norm": 1.0863518714904785, + "learning_rate": 9.994658266721894e-06, + "loss": 0.3173, + "step": 2212 + }, + { + "epoch": 0.04430097840502465, + "grad_norm": 1.1691877841949463, + "learning_rate": 9.994643275068166e-06, + "loss": 0.2972, + "step": 2213 + }, + { + "epoch": 0.044320996922152994, + "grad_norm": 1.088608741760254, + "learning_rate": 9.99462826241803e-06, + "loss": 0.3249, + "step": 2214 + }, + { + "epoch": 0.04434101543928134, + "grad_norm": 1.3141306638717651, + "learning_rate": 9.994613228771554e-06, + "loss": 0.4132, + "step": 2215 + }, + { + "epoch": 0.04436103395640968, + "grad_norm": 1.0564565658569336, + "learning_rate": 9.994598174128798e-06, + "loss": 0.3206, + "step": 2216 + }, + { + "epoch": 0.044381052473538025, + "grad_norm": 1.1509324312210083, + "learning_rate": 9.994583098489826e-06, + "loss": 0.3572, + "step": 2217 + }, + { + "epoch": 0.04440107099066637, + "grad_norm": 1.0982789993286133, + "learning_rate": 9.9945680018547e-06, + "loss": 0.3216, + "step": 2218 + }, + { + "epoch": 0.04442108950779471, + "grad_norm": 1.072442650794983, + "learning_rate": 9.994552884223487e-06, + "loss": 0.3202, + "step": 2219 + }, + { + "epoch": 0.04444110802492306, + "grad_norm": 1.306989312171936, + "learning_rate": 9.994537745596249e-06, + "loss": 0.3072, + "step": 2220 + }, + { + "epoch": 0.0444611265420514, + "grad_norm": 1.122847557067871, + "learning_rate": 9.994522585973048e-06, + "loss": 0.3207, + "step": 2221 + }, + { + "epoch": 0.044481145059179744, + "grad_norm": 1.3451961278915405, + "learning_rate": 9.994507405353947e-06, + "loss": 0.3982, + "step": 2222 + }, + { + "epoch": 0.04450116357630809, + "grad_norm": 1.1623497009277344, + "learning_rate": 9.994492203739015e-06, + "loss": 0.3293, + "step": 2223 + }, + { + "epoch": 0.04452118209343643, + "grad_norm": 1.0474461317062378, + "learning_rate": 9.994476981128312e-06, + "loss": 0.3231, + "step": 2224 + }, + { + "epoch": 0.044541200610564775, + "grad_norm": 1.216821312904358, + "learning_rate": 9.994461737521901e-06, + "loss": 0.3394, + "step": 2225 + }, + { + "epoch": 0.04456121912769312, + "grad_norm": 1.175278663635254, + "learning_rate": 9.99444647291985e-06, + "loss": 0.3212, + "step": 2226 + }, + { + "epoch": 0.04458123764482146, + "grad_norm": 1.1070423126220703, + "learning_rate": 9.994431187322217e-06, + "loss": 0.3155, + "step": 2227 + }, + { + "epoch": 0.04460125616194981, + "grad_norm": 1.2327916622161865, + "learning_rate": 9.994415880729073e-06, + "loss": 0.3007, + "step": 2228 + }, + { + "epoch": 0.04462127467907815, + "grad_norm": 2.024733543395996, + "learning_rate": 9.994400553140479e-06, + "loss": 0.8416, + "step": 2229 + }, + { + "epoch": 0.044641293196206494, + "grad_norm": 1.0504567623138428, + "learning_rate": 9.994385204556501e-06, + "loss": 0.3425, + "step": 2230 + }, + { + "epoch": 0.04466131171333484, + "grad_norm": 1.0570951700210571, + "learning_rate": 9.9943698349772e-06, + "loss": 0.3579, + "step": 2231 + }, + { + "epoch": 0.04468133023046318, + "grad_norm": 2.245278835296631, + "learning_rate": 9.994354444402645e-06, + "loss": 0.8993, + "step": 2232 + }, + { + "epoch": 0.044701348747591525, + "grad_norm": 1.15447998046875, + "learning_rate": 9.994339032832898e-06, + "loss": 0.3581, + "step": 2233 + }, + { + "epoch": 0.04472136726471987, + "grad_norm": 1.935319423675537, + "learning_rate": 9.994323600268026e-06, + "loss": 0.8168, + "step": 2234 + }, + { + "epoch": 0.04474138578184821, + "grad_norm": 1.1446583271026611, + "learning_rate": 9.994308146708089e-06, + "loss": 0.3577, + "step": 2235 + }, + { + "epoch": 0.044761404298976556, + "grad_norm": 1.08036208152771, + "learning_rate": 9.994292672153158e-06, + "loss": 0.3372, + "step": 2236 + }, + { + "epoch": 0.0447814228161049, + "grad_norm": 1.2189617156982422, + "learning_rate": 9.994277176603295e-06, + "loss": 0.3438, + "step": 2237 + }, + { + "epoch": 0.044801441333233244, + "grad_norm": 1.0059349536895752, + "learning_rate": 9.994261660058564e-06, + "loss": 0.3375, + "step": 2238 + }, + { + "epoch": 0.04482145985036159, + "grad_norm": 1.125089406967163, + "learning_rate": 9.994246122519034e-06, + "loss": 0.343, + "step": 2239 + }, + { + "epoch": 0.04484147836748993, + "grad_norm": 1.0425301790237427, + "learning_rate": 9.994230563984766e-06, + "loss": 0.3158, + "step": 2240 + }, + { + "epoch": 0.044861496884618275, + "grad_norm": 1.2853972911834717, + "learning_rate": 9.994214984455828e-06, + "loss": 0.328, + "step": 2241 + }, + { + "epoch": 0.04488151540174662, + "grad_norm": 1.186503291130066, + "learning_rate": 9.994199383932287e-06, + "loss": 0.3061, + "step": 2242 + }, + { + "epoch": 0.04490153391887496, + "grad_norm": 1.0797151327133179, + "learning_rate": 9.994183762414205e-06, + "loss": 0.3324, + "step": 2243 + }, + { + "epoch": 0.044921552436003306, + "grad_norm": 1.0287950038909912, + "learning_rate": 9.994168119901648e-06, + "loss": 0.2975, + "step": 2244 + }, + { + "epoch": 0.04494157095313165, + "grad_norm": 1.156805396080017, + "learning_rate": 9.994152456394685e-06, + "loss": 0.3417, + "step": 2245 + }, + { + "epoch": 0.044961589470259994, + "grad_norm": 1.1293671131134033, + "learning_rate": 9.99413677189338e-06, + "loss": 0.3262, + "step": 2246 + }, + { + "epoch": 0.04498160798738834, + "grad_norm": 1.3277298212051392, + "learning_rate": 9.994121066397797e-06, + "loss": 0.3491, + "step": 2247 + }, + { + "epoch": 0.04500162650451668, + "grad_norm": 1.2203444242477417, + "learning_rate": 9.994105339908005e-06, + "loss": 0.3638, + "step": 2248 + }, + { + "epoch": 0.045021645021645025, + "grad_norm": 1.2052611112594604, + "learning_rate": 9.994089592424067e-06, + "loss": 0.3586, + "step": 2249 + }, + { + "epoch": 0.04504166353877337, + "grad_norm": 1.817412257194519, + "learning_rate": 9.994073823946054e-06, + "loss": 0.8238, + "step": 2250 + }, + { + "epoch": 0.045061682055901706, + "grad_norm": 1.1873505115509033, + "learning_rate": 9.994058034474027e-06, + "loss": 0.3139, + "step": 2251 + }, + { + "epoch": 0.04508170057303005, + "grad_norm": 1.0598655939102173, + "learning_rate": 9.994042224008056e-06, + "loss": 0.2776, + "step": 2252 + }, + { + "epoch": 0.04510171909015839, + "grad_norm": 1.003609538078308, + "learning_rate": 9.994026392548206e-06, + "loss": 0.3149, + "step": 2253 + }, + { + "epoch": 0.04512173760728674, + "grad_norm": 1.137892723083496, + "learning_rate": 9.994010540094543e-06, + "loss": 0.2965, + "step": 2254 + }, + { + "epoch": 0.04514175612441508, + "grad_norm": 0.9889925718307495, + "learning_rate": 9.993994666647135e-06, + "loss": 0.316, + "step": 2255 + }, + { + "epoch": 0.045161774641543424, + "grad_norm": 1.1475555896759033, + "learning_rate": 9.993978772206048e-06, + "loss": 0.3223, + "step": 2256 + }, + { + "epoch": 0.04518179315867177, + "grad_norm": 1.1023750305175781, + "learning_rate": 9.993962856771348e-06, + "loss": 0.3284, + "step": 2257 + }, + { + "epoch": 0.04520181167580011, + "grad_norm": 1.0353046655654907, + "learning_rate": 9.993946920343105e-06, + "loss": 0.3433, + "step": 2258 + }, + { + "epoch": 0.045221830192928456, + "grad_norm": 1.0344873666763306, + "learning_rate": 9.993930962921383e-06, + "loss": 0.3535, + "step": 2259 + }, + { + "epoch": 0.0452418487100568, + "grad_norm": 1.0851657390594482, + "learning_rate": 9.99391498450625e-06, + "loss": 0.3966, + "step": 2260 + }, + { + "epoch": 0.04526186722718514, + "grad_norm": 1.089604377746582, + "learning_rate": 9.993898985097773e-06, + "loss": 0.3427, + "step": 2261 + }, + { + "epoch": 0.04528188574431349, + "grad_norm": 1.1148300170898438, + "learning_rate": 9.99388296469602e-06, + "loss": 0.3358, + "step": 2262 + }, + { + "epoch": 0.04530190426144183, + "grad_norm": 1.0553241968154907, + "learning_rate": 9.993866923301057e-06, + "loss": 0.3268, + "step": 2263 + }, + { + "epoch": 0.045321922778570174, + "grad_norm": 1.0099258422851562, + "learning_rate": 9.993850860912952e-06, + "loss": 0.3086, + "step": 2264 + }, + { + "epoch": 0.04534194129569852, + "grad_norm": 1.1624608039855957, + "learning_rate": 9.993834777531772e-06, + "loss": 0.3185, + "step": 2265 + }, + { + "epoch": 0.04536195981282686, + "grad_norm": 1.0394432544708252, + "learning_rate": 9.993818673157586e-06, + "loss": 0.3222, + "step": 2266 + }, + { + "epoch": 0.045381978329955205, + "grad_norm": 1.0326104164123535, + "learning_rate": 9.99380254779046e-06, + "loss": 0.314, + "step": 2267 + }, + { + "epoch": 0.04540199684708355, + "grad_norm": 1.0755152702331543, + "learning_rate": 9.993786401430465e-06, + "loss": 0.3564, + "step": 2268 + }, + { + "epoch": 0.04542201536421189, + "grad_norm": 1.1379894018173218, + "learning_rate": 9.993770234077667e-06, + "loss": 0.3356, + "step": 2269 + }, + { + "epoch": 0.04544203388134024, + "grad_norm": 1.880204677581787, + "learning_rate": 9.993754045732132e-06, + "loss": 0.8785, + "step": 2270 + }, + { + "epoch": 0.04546205239846858, + "grad_norm": 1.1606082916259766, + "learning_rate": 9.993737836393929e-06, + "loss": 0.3265, + "step": 2271 + }, + { + "epoch": 0.045482070915596924, + "grad_norm": 1.0871330499649048, + "learning_rate": 9.993721606063127e-06, + "loss": 0.3236, + "step": 2272 + }, + { + "epoch": 0.04550208943272527, + "grad_norm": 1.14255690574646, + "learning_rate": 9.993705354739796e-06, + "loss": 0.3353, + "step": 2273 + }, + { + "epoch": 0.04552210794985361, + "grad_norm": 1.0514248609542847, + "learning_rate": 9.993689082424e-06, + "loss": 0.3184, + "step": 2274 + }, + { + "epoch": 0.045542126466981955, + "grad_norm": 0.9740574359893799, + "learning_rate": 9.993672789115811e-06, + "loss": 0.3345, + "step": 2275 + }, + { + "epoch": 0.0455621449841103, + "grad_norm": 1.2824358940124512, + "learning_rate": 9.993656474815296e-06, + "loss": 0.3717, + "step": 2276 + }, + { + "epoch": 0.04558216350123864, + "grad_norm": 1.1015516519546509, + "learning_rate": 9.993640139522523e-06, + "loss": 0.3402, + "step": 2277 + }, + { + "epoch": 0.04560218201836699, + "grad_norm": 1.0001215934753418, + "learning_rate": 9.993623783237563e-06, + "loss": 0.3111, + "step": 2278 + }, + { + "epoch": 0.04562220053549533, + "grad_norm": 1.1183156967163086, + "learning_rate": 9.993607405960482e-06, + "loss": 0.3801, + "step": 2279 + }, + { + "epoch": 0.045642219052623674, + "grad_norm": 1.1204416751861572, + "learning_rate": 9.99359100769135e-06, + "loss": 0.3274, + "step": 2280 + }, + { + "epoch": 0.04566223756975202, + "grad_norm": 1.166388988494873, + "learning_rate": 9.993574588430237e-06, + "loss": 0.3332, + "step": 2281 + }, + { + "epoch": 0.04568225608688036, + "grad_norm": 1.0555371046066284, + "learning_rate": 9.993558148177212e-06, + "loss": 0.2757, + "step": 2282 + }, + { + "epoch": 0.045702274604008705, + "grad_norm": 1.1025668382644653, + "learning_rate": 9.99354168693234e-06, + "loss": 0.2586, + "step": 2283 + }, + { + "epoch": 0.04572229312113705, + "grad_norm": 1.0503649711608887, + "learning_rate": 9.993525204695697e-06, + "loss": 0.3331, + "step": 2284 + }, + { + "epoch": 0.04574231163826539, + "grad_norm": 1.4039632081985474, + "learning_rate": 9.993508701467346e-06, + "loss": 0.4111, + "step": 2285 + }, + { + "epoch": 0.045762330155393736, + "grad_norm": 1.2082031965255737, + "learning_rate": 9.99349217724736e-06, + "loss": 0.344, + "step": 2286 + }, + { + "epoch": 0.04578234867252208, + "grad_norm": 1.0923006534576416, + "learning_rate": 9.993475632035807e-06, + "loss": 0.3492, + "step": 2287 + }, + { + "epoch": 0.045802367189650424, + "grad_norm": 1.2638243436813354, + "learning_rate": 9.993459065832757e-06, + "loss": 0.334, + "step": 2288 + }, + { + "epoch": 0.04582238570677877, + "grad_norm": 1.6103383302688599, + "learning_rate": 9.993442478638281e-06, + "loss": 0.3865, + "step": 2289 + }, + { + "epoch": 0.04584240422390711, + "grad_norm": 1.0129643678665161, + "learning_rate": 9.993425870452446e-06, + "loss": 0.3272, + "step": 2290 + }, + { + "epoch": 0.045862422741035455, + "grad_norm": 1.122186541557312, + "learning_rate": 9.993409241275325e-06, + "loss": 0.3732, + "step": 2291 + }, + { + "epoch": 0.0458824412581638, + "grad_norm": 1.0804294347763062, + "learning_rate": 9.993392591106985e-06, + "loss": 0.3491, + "step": 2292 + }, + { + "epoch": 0.04590245977529214, + "grad_norm": 1.124137043952942, + "learning_rate": 9.993375919947496e-06, + "loss": 0.3491, + "step": 2293 + }, + { + "epoch": 0.045922478292420486, + "grad_norm": 1.9506192207336426, + "learning_rate": 9.99335922779693e-06, + "loss": 0.917, + "step": 2294 + }, + { + "epoch": 0.04594249680954883, + "grad_norm": 2.215247392654419, + "learning_rate": 9.993342514655358e-06, + "loss": 0.8249, + "step": 2295 + }, + { + "epoch": 0.045962515326677174, + "grad_norm": 1.0561368465423584, + "learning_rate": 9.993325780522846e-06, + "loss": 0.3026, + "step": 2296 + }, + { + "epoch": 0.04598253384380552, + "grad_norm": 1.1995785236358643, + "learning_rate": 9.99330902539947e-06, + "loss": 0.3484, + "step": 2297 + }, + { + "epoch": 0.04600255236093386, + "grad_norm": 1.142163634300232, + "learning_rate": 9.993292249285296e-06, + "loss": 0.3335, + "step": 2298 + }, + { + "epoch": 0.046022570878062205, + "grad_norm": 1.1245436668395996, + "learning_rate": 9.993275452180395e-06, + "loss": 0.3609, + "step": 2299 + }, + { + "epoch": 0.04604258939519055, + "grad_norm": 1.0106446743011475, + "learning_rate": 9.993258634084839e-06, + "loss": 0.3099, + "step": 2300 + }, + { + "epoch": 0.04606260791231889, + "grad_norm": 1.1483893394470215, + "learning_rate": 9.993241794998699e-06, + "loss": 0.3168, + "step": 2301 + }, + { + "epoch": 0.046082626429447236, + "grad_norm": 1.0218827724456787, + "learning_rate": 9.993224934922045e-06, + "loss": 0.3247, + "step": 2302 + }, + { + "epoch": 0.04610264494657558, + "grad_norm": 1.0639474391937256, + "learning_rate": 9.993208053854947e-06, + "loss": 0.3386, + "step": 2303 + }, + { + "epoch": 0.046122663463703924, + "grad_norm": 1.0683151483535767, + "learning_rate": 9.993191151797477e-06, + "loss": 0.2964, + "step": 2304 + }, + { + "epoch": 0.04614268198083227, + "grad_norm": 1.2994225025177002, + "learning_rate": 9.993174228749708e-06, + "loss": 0.3503, + "step": 2305 + }, + { + "epoch": 0.04616270049796061, + "grad_norm": 1.2254910469055176, + "learning_rate": 9.993157284711706e-06, + "loss": 0.3843, + "step": 2306 + }, + { + "epoch": 0.046182719015088955, + "grad_norm": 1.175897479057312, + "learning_rate": 9.993140319683548e-06, + "loss": 0.33, + "step": 2307 + }, + { + "epoch": 0.0462027375322173, + "grad_norm": 1.2618318796157837, + "learning_rate": 9.993123333665301e-06, + "loss": 0.3239, + "step": 2308 + }, + { + "epoch": 0.04622275604934564, + "grad_norm": 1.1451348066329956, + "learning_rate": 9.993106326657039e-06, + "loss": 0.3579, + "step": 2309 + }, + { + "epoch": 0.046242774566473986, + "grad_norm": 1.0885359048843384, + "learning_rate": 9.99308929865883e-06, + "loss": 0.3687, + "step": 2310 + }, + { + "epoch": 0.04626279308360233, + "grad_norm": 1.3215333223342896, + "learning_rate": 9.993072249670752e-06, + "loss": 0.348, + "step": 2311 + }, + { + "epoch": 0.046282811600730674, + "grad_norm": 1.258288860321045, + "learning_rate": 9.99305517969287e-06, + "loss": 0.316, + "step": 2312 + }, + { + "epoch": 0.04630283011785902, + "grad_norm": 1.2202645540237427, + "learning_rate": 9.99303808872526e-06, + "loss": 0.3515, + "step": 2313 + }, + { + "epoch": 0.04632284863498736, + "grad_norm": 1.1034808158874512, + "learning_rate": 9.99302097676799e-06, + "loss": 0.3796, + "step": 2314 + }, + { + "epoch": 0.046342867152115705, + "grad_norm": 1.1374340057373047, + "learning_rate": 9.993003843821138e-06, + "loss": 0.3004, + "step": 2315 + }, + { + "epoch": 0.04636288566924405, + "grad_norm": 1.1034232378005981, + "learning_rate": 9.99298668988477e-06, + "loss": 0.3545, + "step": 2316 + }, + { + "epoch": 0.04638290418637239, + "grad_norm": 1.195245385169983, + "learning_rate": 9.992969514958962e-06, + "loss": 0.3631, + "step": 2317 + }, + { + "epoch": 0.046402922703500736, + "grad_norm": 1.3483343124389648, + "learning_rate": 9.992952319043783e-06, + "loss": 0.3536, + "step": 2318 + }, + { + "epoch": 0.04642294122062908, + "grad_norm": 1.012702226638794, + "learning_rate": 9.992935102139307e-06, + "loss": 0.3307, + "step": 2319 + }, + { + "epoch": 0.046442959737757424, + "grad_norm": 2.0024755001068115, + "learning_rate": 9.992917864245607e-06, + "loss": 0.9093, + "step": 2320 + }, + { + "epoch": 0.04646297825488577, + "grad_norm": 1.1642816066741943, + "learning_rate": 9.992900605362754e-06, + "loss": 0.3455, + "step": 2321 + }, + { + "epoch": 0.04648299677201411, + "grad_norm": 1.2822809219360352, + "learning_rate": 9.992883325490823e-06, + "loss": 0.3551, + "step": 2322 + }, + { + "epoch": 0.046503015289142455, + "grad_norm": 1.2395588159561157, + "learning_rate": 9.992866024629884e-06, + "loss": 0.3815, + "step": 2323 + }, + { + "epoch": 0.0465230338062708, + "grad_norm": 1.0374945402145386, + "learning_rate": 9.99284870278001e-06, + "loss": 0.3514, + "step": 2324 + }, + { + "epoch": 0.04654305232339914, + "grad_norm": 1.0405844449996948, + "learning_rate": 9.992831359941276e-06, + "loss": 0.3022, + "step": 2325 + }, + { + "epoch": 0.046563070840527486, + "grad_norm": 1.2096573114395142, + "learning_rate": 9.992813996113752e-06, + "loss": 0.3753, + "step": 2326 + }, + { + "epoch": 0.04658308935765583, + "grad_norm": 1.0399781465530396, + "learning_rate": 9.992796611297513e-06, + "loss": 0.3262, + "step": 2327 + }, + { + "epoch": 0.04660310787478417, + "grad_norm": 1.0478134155273438, + "learning_rate": 9.992779205492632e-06, + "loss": 0.3074, + "step": 2328 + }, + { + "epoch": 0.04662312639191252, + "grad_norm": 1.1461273431777954, + "learning_rate": 9.992761778699181e-06, + "loss": 0.3115, + "step": 2329 + }, + { + "epoch": 0.04664314490904086, + "grad_norm": 1.1352365016937256, + "learning_rate": 9.992744330917235e-06, + "loss": 0.3081, + "step": 2330 + }, + { + "epoch": 0.046663163426169205, + "grad_norm": 1.0443785190582275, + "learning_rate": 9.992726862146864e-06, + "loss": 0.3005, + "step": 2331 + }, + { + "epoch": 0.04668318194329755, + "grad_norm": 2.0726215839385986, + "learning_rate": 9.992709372388147e-06, + "loss": 0.889, + "step": 2332 + }, + { + "epoch": 0.04670320046042589, + "grad_norm": 1.126634955406189, + "learning_rate": 9.992691861641152e-06, + "loss": 0.3031, + "step": 2333 + }, + { + "epoch": 0.046723218977554236, + "grad_norm": 1.078032374382019, + "learning_rate": 9.992674329905956e-06, + "loss": 0.3467, + "step": 2334 + }, + { + "epoch": 0.04674323749468258, + "grad_norm": 1.0291377305984497, + "learning_rate": 9.99265677718263e-06, + "loss": 0.3599, + "step": 2335 + }, + { + "epoch": 0.04676325601181092, + "grad_norm": 1.141851782798767, + "learning_rate": 9.992639203471251e-06, + "loss": 0.3603, + "step": 2336 + }, + { + "epoch": 0.04678327452893927, + "grad_norm": 1.1023681163787842, + "learning_rate": 9.99262160877189e-06, + "loss": 0.3178, + "step": 2337 + }, + { + "epoch": 0.04680329304606761, + "grad_norm": 1.0569430589675903, + "learning_rate": 9.992603993084622e-06, + "loss": 0.3577, + "step": 2338 + }, + { + "epoch": 0.046823311563195955, + "grad_norm": 1.0912096500396729, + "learning_rate": 9.99258635640952e-06, + "loss": 0.3391, + "step": 2339 + }, + { + "epoch": 0.0468433300803243, + "grad_norm": 1.1265205144882202, + "learning_rate": 9.992568698746663e-06, + "loss": 0.3483, + "step": 2340 + }, + { + "epoch": 0.04686334859745264, + "grad_norm": 1.188368797302246, + "learning_rate": 9.99255102009612e-06, + "loss": 0.3455, + "step": 2341 + }, + { + "epoch": 0.046883367114580986, + "grad_norm": 1.0525486469268799, + "learning_rate": 9.992533320457966e-06, + "loss": 0.3316, + "step": 2342 + }, + { + "epoch": 0.04690338563170933, + "grad_norm": 1.1914708614349365, + "learning_rate": 9.992515599832278e-06, + "loss": 0.3511, + "step": 2343 + }, + { + "epoch": 0.04692340414883767, + "grad_norm": 1.1990878582000732, + "learning_rate": 9.992497858219128e-06, + "loss": 0.3078, + "step": 2344 + }, + { + "epoch": 0.04694342266596602, + "grad_norm": 1.1526302099227905, + "learning_rate": 9.992480095618591e-06, + "loss": 0.3621, + "step": 2345 + }, + { + "epoch": 0.04696344118309436, + "grad_norm": 1.0633209943771362, + "learning_rate": 9.992462312030741e-06, + "loss": 0.3483, + "step": 2346 + }, + { + "epoch": 0.046983459700222704, + "grad_norm": 1.040841817855835, + "learning_rate": 9.992444507455656e-06, + "loss": 0.3498, + "step": 2347 + }, + { + "epoch": 0.04700347821735105, + "grad_norm": 0.9880803823471069, + "learning_rate": 9.992426681893407e-06, + "loss": 0.317, + "step": 2348 + }, + { + "epoch": 0.04702349673447939, + "grad_norm": 1.1877728700637817, + "learning_rate": 9.992408835344072e-06, + "loss": 0.3778, + "step": 2349 + }, + { + "epoch": 0.047043515251607736, + "grad_norm": 1.7857612371444702, + "learning_rate": 9.992390967807725e-06, + "loss": 0.8514, + "step": 2350 + }, + { + "epoch": 0.04706353376873608, + "grad_norm": 1.0769907236099243, + "learning_rate": 9.99237307928444e-06, + "loss": 0.3341, + "step": 2351 + }, + { + "epoch": 0.04708355228586442, + "grad_norm": 1.0510722398757935, + "learning_rate": 9.992355169774293e-06, + "loss": 0.3493, + "step": 2352 + }, + { + "epoch": 0.04710357080299277, + "grad_norm": 1.0298597812652588, + "learning_rate": 9.992337239277359e-06, + "loss": 0.3243, + "step": 2353 + }, + { + "epoch": 0.04712358932012111, + "grad_norm": 1.0792897939682007, + "learning_rate": 9.992319287793712e-06, + "loss": 0.3398, + "step": 2354 + }, + { + "epoch": 0.047143607837249454, + "grad_norm": 1.1115344762802124, + "learning_rate": 9.992301315323431e-06, + "loss": 0.3606, + "step": 2355 + }, + { + "epoch": 0.0471636263543778, + "grad_norm": 1.1916935443878174, + "learning_rate": 9.99228332186659e-06, + "loss": 0.3407, + "step": 2356 + }, + { + "epoch": 0.04718364487150614, + "grad_norm": 1.2253594398498535, + "learning_rate": 9.992265307423263e-06, + "loss": 0.3175, + "step": 2357 + }, + { + "epoch": 0.047203663388634486, + "grad_norm": 1.1995521783828735, + "learning_rate": 9.992247271993529e-06, + "loss": 0.3528, + "step": 2358 + }, + { + "epoch": 0.04722368190576283, + "grad_norm": 1.1212608814239502, + "learning_rate": 9.99222921557746e-06, + "loss": 0.4127, + "step": 2359 + }, + { + "epoch": 0.04724370042289117, + "grad_norm": 1.0646780729293823, + "learning_rate": 9.992211138175134e-06, + "loss": 0.3863, + "step": 2360 + }, + { + "epoch": 0.04726371894001952, + "grad_norm": 0.9528916478157043, + "learning_rate": 9.992193039786626e-06, + "loss": 0.2981, + "step": 2361 + }, + { + "epoch": 0.04728373745714786, + "grad_norm": 1.1421271562576294, + "learning_rate": 9.992174920412014e-06, + "loss": 0.3381, + "step": 2362 + }, + { + "epoch": 0.047303755974276204, + "grad_norm": 1.0029963254928589, + "learning_rate": 9.992156780051373e-06, + "loss": 0.3099, + "step": 2363 + }, + { + "epoch": 0.04732377449140455, + "grad_norm": 1.0835223197937012, + "learning_rate": 9.992138618704779e-06, + "loss": 0.3054, + "step": 2364 + }, + { + "epoch": 0.04734379300853289, + "grad_norm": 1.1856191158294678, + "learning_rate": 9.992120436372308e-06, + "loss": 0.3427, + "step": 2365 + }, + { + "epoch": 0.047363811525661235, + "grad_norm": 0.9733125567436218, + "learning_rate": 9.992102233054037e-06, + "loss": 0.2931, + "step": 2366 + }, + { + "epoch": 0.04738383004278958, + "grad_norm": 1.1029770374298096, + "learning_rate": 9.992084008750042e-06, + "loss": 0.331, + "step": 2367 + }, + { + "epoch": 0.04740384855991792, + "grad_norm": 1.0833194255828857, + "learning_rate": 9.9920657634604e-06, + "loss": 0.3109, + "step": 2368 + }, + { + "epoch": 0.04742386707704627, + "grad_norm": 1.0998040437698364, + "learning_rate": 9.992047497185191e-06, + "loss": 0.3255, + "step": 2369 + }, + { + "epoch": 0.04744388559417461, + "grad_norm": 1.1961724758148193, + "learning_rate": 9.992029209924486e-06, + "loss": 0.3552, + "step": 2370 + }, + { + "epoch": 0.047463904111302954, + "grad_norm": 1.116494059562683, + "learning_rate": 9.992010901678366e-06, + "loss": 0.3552, + "step": 2371 + }, + { + "epoch": 0.0474839226284313, + "grad_norm": 1.102137804031372, + "learning_rate": 9.991992572446904e-06, + "loss": 0.3093, + "step": 2372 + }, + { + "epoch": 0.04750394114555964, + "grad_norm": 1.091383934020996, + "learning_rate": 9.991974222230182e-06, + "loss": 0.3054, + "step": 2373 + }, + { + "epoch": 0.047523959662687985, + "grad_norm": 1.136234164237976, + "learning_rate": 9.991955851028273e-06, + "loss": 0.3551, + "step": 2374 + }, + { + "epoch": 0.04754397817981633, + "grad_norm": 1.14642333984375, + "learning_rate": 9.991937458841257e-06, + "loss": 0.3305, + "step": 2375 + }, + { + "epoch": 0.04756399669694467, + "grad_norm": 1.0799221992492676, + "learning_rate": 9.99191904566921e-06, + "loss": 0.3007, + "step": 2376 + }, + { + "epoch": 0.04758401521407302, + "grad_norm": 1.1281441450119019, + "learning_rate": 9.991900611512209e-06, + "loss": 0.3484, + "step": 2377 + }, + { + "epoch": 0.04760403373120136, + "grad_norm": 1.217971682548523, + "learning_rate": 9.991882156370331e-06, + "loss": 0.3507, + "step": 2378 + }, + { + "epoch": 0.047624052248329704, + "grad_norm": 1.8032764196395874, + "learning_rate": 9.991863680243658e-06, + "loss": 0.8089, + "step": 2379 + }, + { + "epoch": 0.04764407076545805, + "grad_norm": 1.1957142353057861, + "learning_rate": 9.991845183132262e-06, + "loss": 0.3117, + "step": 2380 + }, + { + "epoch": 0.04766408928258639, + "grad_norm": 1.1810592412948608, + "learning_rate": 9.991826665036224e-06, + "loss": 0.3301, + "step": 2381 + }, + { + "epoch": 0.047684107799714735, + "grad_norm": 1.880863904953003, + "learning_rate": 9.99180812595562e-06, + "loss": 0.8621, + "step": 2382 + }, + { + "epoch": 0.04770412631684308, + "grad_norm": 1.104122281074524, + "learning_rate": 9.991789565890528e-06, + "loss": 0.3383, + "step": 2383 + }, + { + "epoch": 0.04772414483397142, + "grad_norm": 1.1792340278625488, + "learning_rate": 9.99177098484103e-06, + "loss": 0.3288, + "step": 2384 + }, + { + "epoch": 0.047744163351099767, + "grad_norm": 1.3607752323150635, + "learning_rate": 9.991752382807198e-06, + "loss": 0.3575, + "step": 2385 + }, + { + "epoch": 0.04776418186822811, + "grad_norm": 1.121131420135498, + "learning_rate": 9.991733759789114e-06, + "loss": 0.3459, + "step": 2386 + }, + { + "epoch": 0.047784200385356454, + "grad_norm": 1.8121556043624878, + "learning_rate": 9.991715115786858e-06, + "loss": 0.8631, + "step": 2387 + }, + { + "epoch": 0.0478042189024848, + "grad_norm": 1.8978564739227295, + "learning_rate": 9.991696450800503e-06, + "loss": 0.7718, + "step": 2388 + }, + { + "epoch": 0.04782423741961314, + "grad_norm": 1.8635432720184326, + "learning_rate": 9.991677764830133e-06, + "loss": 0.8184, + "step": 2389 + }, + { + "epoch": 0.047844255936741485, + "grad_norm": 1.0108131170272827, + "learning_rate": 9.99165905787582e-06, + "loss": 0.3307, + "step": 2390 + }, + { + "epoch": 0.04786427445386983, + "grad_norm": 1.0867140293121338, + "learning_rate": 9.991640329937648e-06, + "loss": 0.2939, + "step": 2391 + }, + { + "epoch": 0.04788429297099817, + "grad_norm": 1.0621188879013062, + "learning_rate": 9.991621581015696e-06, + "loss": 0.3469, + "step": 2392 + }, + { + "epoch": 0.047904311488126516, + "grad_norm": 1.1077337265014648, + "learning_rate": 9.991602811110041e-06, + "loss": 0.3284, + "step": 2393 + }, + { + "epoch": 0.04792433000525486, + "grad_norm": 1.1746258735656738, + "learning_rate": 9.99158402022076e-06, + "loss": 0.3194, + "step": 2394 + }, + { + "epoch": 0.047944348522383204, + "grad_norm": 1.051879644393921, + "learning_rate": 9.991565208347935e-06, + "loss": 0.3526, + "step": 2395 + }, + { + "epoch": 0.04796436703951155, + "grad_norm": 1.2454825639724731, + "learning_rate": 9.991546375491643e-06, + "loss": 0.3563, + "step": 2396 + }, + { + "epoch": 0.04798438555663989, + "grad_norm": 1.2867087125778198, + "learning_rate": 9.991527521651967e-06, + "loss": 0.3543, + "step": 2397 + }, + { + "epoch": 0.048004404073768235, + "grad_norm": 2.044262647628784, + "learning_rate": 9.991508646828983e-06, + "loss": 0.8609, + "step": 2398 + }, + { + "epoch": 0.04802442259089658, + "grad_norm": 1.8140026330947876, + "learning_rate": 9.991489751022768e-06, + "loss": 0.8616, + "step": 2399 + }, + { + "epoch": 0.04804444110802492, + "grad_norm": 0.9931821823120117, + "learning_rate": 9.991470834233406e-06, + "loss": 0.3509, + "step": 2400 + }, + { + "epoch": 0.048064459625153266, + "grad_norm": 1.1175925731658936, + "learning_rate": 9.991451896460976e-06, + "loss": 0.3718, + "step": 2401 + }, + { + "epoch": 0.04808447814228161, + "grad_norm": 1.066075086593628, + "learning_rate": 9.991432937705554e-06, + "loss": 0.3188, + "step": 2402 + }, + { + "epoch": 0.048104496659409954, + "grad_norm": 1.0645126104354858, + "learning_rate": 9.991413957967224e-06, + "loss": 0.3227, + "step": 2403 + }, + { + "epoch": 0.0481245151765383, + "grad_norm": 1.0552592277526855, + "learning_rate": 9.991394957246063e-06, + "loss": 0.3257, + "step": 2404 + }, + { + "epoch": 0.04814453369366664, + "grad_norm": 1.1935101747512817, + "learning_rate": 9.991375935542151e-06, + "loss": 0.3494, + "step": 2405 + }, + { + "epoch": 0.048164552210794985, + "grad_norm": 1.2178493738174438, + "learning_rate": 9.99135689285557e-06, + "loss": 0.3656, + "step": 2406 + }, + { + "epoch": 0.04818457072792333, + "grad_norm": 1.0273008346557617, + "learning_rate": 9.991337829186398e-06, + "loss": 0.2924, + "step": 2407 + }, + { + "epoch": 0.04820458924505167, + "grad_norm": 1.0902758836746216, + "learning_rate": 9.991318744534716e-06, + "loss": 0.3506, + "step": 2408 + }, + { + "epoch": 0.048224607762180016, + "grad_norm": 1.052201509475708, + "learning_rate": 9.991299638900604e-06, + "loss": 0.3878, + "step": 2409 + }, + { + "epoch": 0.04824462627930836, + "grad_norm": 1.079473853111267, + "learning_rate": 9.991280512284143e-06, + "loss": 0.3228, + "step": 2410 + }, + { + "epoch": 0.048264644796436704, + "grad_norm": 1.04761803150177, + "learning_rate": 9.991261364685413e-06, + "loss": 0.2904, + "step": 2411 + }, + { + "epoch": 0.04828466331356505, + "grad_norm": 2.108748435974121, + "learning_rate": 9.991242196104494e-06, + "loss": 0.8718, + "step": 2412 + }, + { + "epoch": 0.04830468183069339, + "grad_norm": 1.0605406761169434, + "learning_rate": 9.991223006541466e-06, + "loss": 0.3028, + "step": 2413 + }, + { + "epoch": 0.048324700347821735, + "grad_norm": 1.0636625289916992, + "learning_rate": 9.991203795996412e-06, + "loss": 0.3438, + "step": 2414 + }, + { + "epoch": 0.04834471886495008, + "grad_norm": 1.1161481142044067, + "learning_rate": 9.991184564469412e-06, + "loss": 0.3291, + "step": 2415 + }, + { + "epoch": 0.04836473738207842, + "grad_norm": 1.1312364339828491, + "learning_rate": 9.991165311960545e-06, + "loss": 0.3416, + "step": 2416 + }, + { + "epoch": 0.048384755899206766, + "grad_norm": 1.9103132486343384, + "learning_rate": 9.991146038469893e-06, + "loss": 0.8676, + "step": 2417 + }, + { + "epoch": 0.04840477441633511, + "grad_norm": 1.1460410356521606, + "learning_rate": 9.991126743997536e-06, + "loss": 0.3392, + "step": 2418 + }, + { + "epoch": 0.048424792933463454, + "grad_norm": 1.2249715328216553, + "learning_rate": 9.991107428543559e-06, + "loss": 0.3726, + "step": 2419 + }, + { + "epoch": 0.0484448114505918, + "grad_norm": 1.260068655014038, + "learning_rate": 9.991088092108039e-06, + "loss": 0.277, + "step": 2420 + }, + { + "epoch": 0.04846482996772014, + "grad_norm": 1.4430899620056152, + "learning_rate": 9.991068734691058e-06, + "loss": 0.3222, + "step": 2421 + }, + { + "epoch": 0.048484848484848485, + "grad_norm": 1.2122209072113037, + "learning_rate": 9.9910493562927e-06, + "loss": 0.3809, + "step": 2422 + }, + { + "epoch": 0.04850486700197683, + "grad_norm": 1.1279281377792358, + "learning_rate": 9.991029956913043e-06, + "loss": 0.351, + "step": 2423 + }, + { + "epoch": 0.04852488551910517, + "grad_norm": 1.0255385637283325, + "learning_rate": 9.991010536552172e-06, + "loss": 0.3263, + "step": 2424 + }, + { + "epoch": 0.048544904036233516, + "grad_norm": 2.133787155151367, + "learning_rate": 9.990991095210166e-06, + "loss": 0.3176, + "step": 2425 + }, + { + "epoch": 0.04856492255336186, + "grad_norm": 1.1785962581634521, + "learning_rate": 9.990971632887106e-06, + "loss": 0.3534, + "step": 2426 + }, + { + "epoch": 0.048584941070490203, + "grad_norm": 1.0079900026321411, + "learning_rate": 9.990952149583078e-06, + "loss": 0.2704, + "step": 2427 + }, + { + "epoch": 0.04860495958761855, + "grad_norm": 1.111312985420227, + "learning_rate": 9.99093264529816e-06, + "loss": 0.3458, + "step": 2428 + }, + { + "epoch": 0.04862497810474689, + "grad_norm": 1.1629897356033325, + "learning_rate": 9.990913120032433e-06, + "loss": 0.3003, + "step": 2429 + }, + { + "epoch": 0.048644996621875235, + "grad_norm": 1.1343327760696411, + "learning_rate": 9.990893573785984e-06, + "loss": 0.286, + "step": 2430 + }, + { + "epoch": 0.04866501513900358, + "grad_norm": 1.0505938529968262, + "learning_rate": 9.990874006558891e-06, + "loss": 0.3359, + "step": 2431 + }, + { + "epoch": 0.04868503365613192, + "grad_norm": 1.1189619302749634, + "learning_rate": 9.990854418351238e-06, + "loss": 0.342, + "step": 2432 + }, + { + "epoch": 0.048705052173260266, + "grad_norm": 1.041853666305542, + "learning_rate": 9.990834809163108e-06, + "loss": 0.3697, + "step": 2433 + }, + { + "epoch": 0.04872507069038861, + "grad_norm": 1.0714869499206543, + "learning_rate": 9.990815178994581e-06, + "loss": 0.2901, + "step": 2434 + }, + { + "epoch": 0.04874508920751695, + "grad_norm": 1.2010362148284912, + "learning_rate": 9.990795527845743e-06, + "loss": 0.3431, + "step": 2435 + }, + { + "epoch": 0.0487651077246453, + "grad_norm": 1.3356117010116577, + "learning_rate": 9.990775855716674e-06, + "loss": 0.353, + "step": 2436 + }, + { + "epoch": 0.04878512624177364, + "grad_norm": 1.0574299097061157, + "learning_rate": 9.990756162607455e-06, + "loss": 0.3226, + "step": 2437 + }, + { + "epoch": 0.048805144758901985, + "grad_norm": 1.0348913669586182, + "learning_rate": 9.990736448518175e-06, + "loss": 0.3522, + "step": 2438 + }, + { + "epoch": 0.04882516327603033, + "grad_norm": 1.0171167850494385, + "learning_rate": 9.99071671344891e-06, + "loss": 0.3239, + "step": 2439 + }, + { + "epoch": 0.04884518179315867, + "grad_norm": 1.0028027296066284, + "learning_rate": 9.990696957399748e-06, + "loss": 0.3219, + "step": 2440 + }, + { + "epoch": 0.048865200310287016, + "grad_norm": 1.1312111616134644, + "learning_rate": 9.99067718037077e-06, + "loss": 0.3602, + "step": 2441 + }, + { + "epoch": 0.04888521882741536, + "grad_norm": 1.041509985923767, + "learning_rate": 9.990657382362056e-06, + "loss": 0.3494, + "step": 2442 + }, + { + "epoch": 0.0489052373445437, + "grad_norm": 1.1668667793273926, + "learning_rate": 9.990637563373695e-06, + "loss": 0.3693, + "step": 2443 + }, + { + "epoch": 0.04892525586167205, + "grad_norm": 1.240339994430542, + "learning_rate": 9.990617723405767e-06, + "loss": 0.3901, + "step": 2444 + }, + { + "epoch": 0.04894527437880039, + "grad_norm": 1.1001423597335815, + "learning_rate": 9.990597862458356e-06, + "loss": 0.336, + "step": 2445 + }, + { + "epoch": 0.048965292895928735, + "grad_norm": 1.1525803804397583, + "learning_rate": 9.990577980531546e-06, + "loss": 0.2878, + "step": 2446 + }, + { + "epoch": 0.04898531141305708, + "grad_norm": 1.1905262470245361, + "learning_rate": 9.99055807762542e-06, + "loss": 0.3129, + "step": 2447 + }, + { + "epoch": 0.04900532993018542, + "grad_norm": 1.1534578800201416, + "learning_rate": 9.990538153740062e-06, + "loss": 0.3435, + "step": 2448 + }, + { + "epoch": 0.049025348447313766, + "grad_norm": 1.1146619319915771, + "learning_rate": 9.990518208875555e-06, + "loss": 0.344, + "step": 2449 + }, + { + "epoch": 0.04904536696444211, + "grad_norm": 1.8302712440490723, + "learning_rate": 9.990498243031984e-06, + "loss": 0.8053, + "step": 2450 + }, + { + "epoch": 0.04906538548157045, + "grad_norm": 1.06768798828125, + "learning_rate": 9.990478256209433e-06, + "loss": 0.2914, + "step": 2451 + }, + { + "epoch": 0.0490854039986988, + "grad_norm": 1.0575385093688965, + "learning_rate": 9.990458248407984e-06, + "loss": 0.3189, + "step": 2452 + }, + { + "epoch": 0.04910542251582714, + "grad_norm": 1.0716954469680786, + "learning_rate": 9.990438219627722e-06, + "loss": 0.3371, + "step": 2453 + }, + { + "epoch": 0.049125441032955484, + "grad_norm": 1.1289045810699463, + "learning_rate": 9.990418169868733e-06, + "loss": 0.3441, + "step": 2454 + }, + { + "epoch": 0.04914545955008383, + "grad_norm": 1.166378378868103, + "learning_rate": 9.9903980991311e-06, + "loss": 0.3269, + "step": 2455 + }, + { + "epoch": 0.04916547806721217, + "grad_norm": 1.1310464143753052, + "learning_rate": 9.990378007414908e-06, + "loss": 0.3146, + "step": 2456 + }, + { + "epoch": 0.049185496584340516, + "grad_norm": 1.1405013799667358, + "learning_rate": 9.990357894720239e-06, + "loss": 0.3061, + "step": 2457 + }, + { + "epoch": 0.04920551510146886, + "grad_norm": 1.9510096311569214, + "learning_rate": 9.99033776104718e-06, + "loss": 0.8619, + "step": 2458 + }, + { + "epoch": 0.0492255336185972, + "grad_norm": 1.0061469078063965, + "learning_rate": 9.990317606395815e-06, + "loss": 0.3469, + "step": 2459 + }, + { + "epoch": 0.04924555213572555, + "grad_norm": 1.0503324270248413, + "learning_rate": 9.990297430766229e-06, + "loss": 0.3569, + "step": 2460 + }, + { + "epoch": 0.04926557065285389, + "grad_norm": 1.1681385040283203, + "learning_rate": 9.990277234158506e-06, + "loss": 0.3342, + "step": 2461 + }, + { + "epoch": 0.049285589169982234, + "grad_norm": 1.1871788501739502, + "learning_rate": 9.990257016572732e-06, + "loss": 0.3783, + "step": 2462 + }, + { + "epoch": 0.04930560768711058, + "grad_norm": 1.0471277236938477, + "learning_rate": 9.990236778008991e-06, + "loss": 0.3257, + "step": 2463 + }, + { + "epoch": 0.04932562620423892, + "grad_norm": 1.1617408990859985, + "learning_rate": 9.990216518467368e-06, + "loss": 0.3152, + "step": 2464 + }, + { + "epoch": 0.049345644721367266, + "grad_norm": 1.3776533603668213, + "learning_rate": 9.99019623794795e-06, + "loss": 0.3418, + "step": 2465 + }, + { + "epoch": 0.04936566323849561, + "grad_norm": 1.1207393407821655, + "learning_rate": 9.990175936450822e-06, + "loss": 0.385, + "step": 2466 + }, + { + "epoch": 0.04938568175562395, + "grad_norm": 1.0107362270355225, + "learning_rate": 9.990155613976065e-06, + "loss": 0.315, + "step": 2467 + }, + { + "epoch": 0.0494057002727523, + "grad_norm": 1.1434035301208496, + "learning_rate": 9.99013527052377e-06, + "loss": 0.3065, + "step": 2468 + }, + { + "epoch": 0.04942571878988064, + "grad_norm": 1.2199816703796387, + "learning_rate": 9.99011490609402e-06, + "loss": 0.3338, + "step": 2469 + }, + { + "epoch": 0.049445737307008984, + "grad_norm": 1.0971944332122803, + "learning_rate": 9.990094520686899e-06, + "loss": 0.3465, + "step": 2470 + }, + { + "epoch": 0.04946575582413733, + "grad_norm": 1.155154824256897, + "learning_rate": 9.990074114302495e-06, + "loss": 0.3456, + "step": 2471 + }, + { + "epoch": 0.04948577434126567, + "grad_norm": 1.0548152923583984, + "learning_rate": 9.990053686940895e-06, + "loss": 0.3365, + "step": 2472 + }, + { + "epoch": 0.049505792858394015, + "grad_norm": 1.064415454864502, + "learning_rate": 9.990033238602181e-06, + "loss": 0.3518, + "step": 2473 + }, + { + "epoch": 0.04952581137552236, + "grad_norm": 1.1336432695388794, + "learning_rate": 9.990012769286443e-06, + "loss": 0.3611, + "step": 2474 + }, + { + "epoch": 0.0495458298926507, + "grad_norm": 2.223184823989868, + "learning_rate": 9.989992278993765e-06, + "loss": 0.8431, + "step": 2475 + }, + { + "epoch": 0.04956584840977905, + "grad_norm": 1.1917736530303955, + "learning_rate": 9.989971767724234e-06, + "loss": 0.3305, + "step": 2476 + }, + { + "epoch": 0.04958586692690739, + "grad_norm": 1.1804149150848389, + "learning_rate": 9.989951235477933e-06, + "loss": 0.3473, + "step": 2477 + }, + { + "epoch": 0.049605885444035734, + "grad_norm": 1.1953307390213013, + "learning_rate": 9.989930682254954e-06, + "loss": 0.3136, + "step": 2478 + }, + { + "epoch": 0.04962590396116408, + "grad_norm": 1.0340763330459595, + "learning_rate": 9.989910108055378e-06, + "loss": 0.3123, + "step": 2479 + }, + { + "epoch": 0.04964592247829242, + "grad_norm": 1.0921577215194702, + "learning_rate": 9.989889512879294e-06, + "loss": 0.3172, + "step": 2480 + }, + { + "epoch": 0.049665940995420765, + "grad_norm": 1.05033540725708, + "learning_rate": 9.98986889672679e-06, + "loss": 0.3401, + "step": 2481 + }, + { + "epoch": 0.04968595951254911, + "grad_norm": 1.1180877685546875, + "learning_rate": 9.989848259597948e-06, + "loss": 0.3342, + "step": 2482 + }, + { + "epoch": 0.04970597802967745, + "grad_norm": 1.085943579673767, + "learning_rate": 9.989827601492859e-06, + "loss": 0.3096, + "step": 2483 + }, + { + "epoch": 0.0497259965468058, + "grad_norm": 1.1141583919525146, + "learning_rate": 9.989806922411609e-06, + "loss": 0.3967, + "step": 2484 + }, + { + "epoch": 0.04974601506393414, + "grad_norm": 1.1426039934158325, + "learning_rate": 9.989786222354286e-06, + "loss": 0.3475, + "step": 2485 + }, + { + "epoch": 0.049766033581062484, + "grad_norm": 1.0326093435287476, + "learning_rate": 9.989765501320971e-06, + "loss": 0.3268, + "step": 2486 + }, + { + "epoch": 0.04978605209819083, + "grad_norm": 1.204393982887268, + "learning_rate": 9.98974475931176e-06, + "loss": 0.3467, + "step": 2487 + }, + { + "epoch": 0.04980607061531917, + "grad_norm": 1.0206434726715088, + "learning_rate": 9.989723996326734e-06, + "loss": 0.3406, + "step": 2488 + }, + { + "epoch": 0.049826089132447515, + "grad_norm": 1.168546199798584, + "learning_rate": 9.989703212365983e-06, + "loss": 0.3137, + "step": 2489 + }, + { + "epoch": 0.04984610764957586, + "grad_norm": 1.068556308746338, + "learning_rate": 9.989682407429593e-06, + "loss": 0.3074, + "step": 2490 + }, + { + "epoch": 0.0498661261667042, + "grad_norm": 1.2548465728759766, + "learning_rate": 9.989661581517653e-06, + "loss": 0.3069, + "step": 2491 + }, + { + "epoch": 0.049886144683832546, + "grad_norm": 1.110405445098877, + "learning_rate": 9.989640734630248e-06, + "loss": 0.3479, + "step": 2492 + }, + { + "epoch": 0.04990616320096089, + "grad_norm": 1.1231626272201538, + "learning_rate": 9.989619866767467e-06, + "loss": 0.3606, + "step": 2493 + }, + { + "epoch": 0.049926181718089234, + "grad_norm": 1.186233639717102, + "learning_rate": 9.989598977929398e-06, + "loss": 0.3362, + "step": 2494 + }, + { + "epoch": 0.04994620023521758, + "grad_norm": 1.0351669788360596, + "learning_rate": 9.98957806811613e-06, + "loss": 0.3296, + "step": 2495 + }, + { + "epoch": 0.04996621875234592, + "grad_norm": 1.03407621383667, + "learning_rate": 9.989557137327748e-06, + "loss": 0.3531, + "step": 2496 + }, + { + "epoch": 0.049986237269474265, + "grad_norm": 1.0999308824539185, + "learning_rate": 9.989536185564344e-06, + "loss": 0.2816, + "step": 2497 + }, + { + "epoch": 0.05000625578660261, + "grad_norm": 1.1528520584106445, + "learning_rate": 9.989515212826e-06, + "loss": 0.4174, + "step": 2498 + }, + { + "epoch": 0.05002627430373095, + "grad_norm": 1.863580584526062, + "learning_rate": 9.98949421911281e-06, + "loss": 0.8406, + "step": 2499 + }, + { + "epoch": 0.050046292820859296, + "grad_norm": 1.1538156270980835, + "learning_rate": 9.98947320442486e-06, + "loss": 0.409, + "step": 2500 + }, + { + "epoch": 0.05006631133798764, + "grad_norm": 1.0219039916992188, + "learning_rate": 9.98945216876224e-06, + "loss": 0.3119, + "step": 2501 + }, + { + "epoch": 0.050086329855115984, + "grad_norm": 1.1651192903518677, + "learning_rate": 9.989431112125034e-06, + "loss": 0.34, + "step": 2502 + }, + { + "epoch": 0.05010634837224433, + "grad_norm": 1.069038987159729, + "learning_rate": 9.989410034513334e-06, + "loss": 0.326, + "step": 2503 + }, + { + "epoch": 0.05012636688937267, + "grad_norm": 1.1491878032684326, + "learning_rate": 9.989388935927229e-06, + "loss": 0.3152, + "step": 2504 + }, + { + "epoch": 0.050146385406501015, + "grad_norm": 1.0667234659194946, + "learning_rate": 9.989367816366805e-06, + "loss": 0.2989, + "step": 2505 + }, + { + "epoch": 0.05016640392362936, + "grad_norm": 1.047333002090454, + "learning_rate": 9.989346675832153e-06, + "loss": 0.3399, + "step": 2506 + }, + { + "epoch": 0.0501864224407577, + "grad_norm": 1.2692863941192627, + "learning_rate": 9.989325514323362e-06, + "loss": 0.3921, + "step": 2507 + }, + { + "epoch": 0.050206440957886046, + "grad_norm": 1.1165698766708374, + "learning_rate": 9.989304331840521e-06, + "loss": 0.3717, + "step": 2508 + }, + { + "epoch": 0.05022645947501439, + "grad_norm": 1.0988506078720093, + "learning_rate": 9.989283128383716e-06, + "loss": 0.3862, + "step": 2509 + }, + { + "epoch": 0.050246477992142734, + "grad_norm": 0.9764906167984009, + "learning_rate": 9.98926190395304e-06, + "loss": 0.3301, + "step": 2510 + }, + { + "epoch": 0.05026649650927108, + "grad_norm": 1.2210191488265991, + "learning_rate": 9.98924065854858e-06, + "loss": 0.3441, + "step": 2511 + }, + { + "epoch": 0.05028651502639942, + "grad_norm": 1.0834475755691528, + "learning_rate": 9.989219392170426e-06, + "loss": 0.3453, + "step": 2512 + }, + { + "epoch": 0.050306533543527765, + "grad_norm": 0.9791473746299744, + "learning_rate": 9.989198104818667e-06, + "loss": 0.3386, + "step": 2513 + }, + { + "epoch": 0.05032655206065611, + "grad_norm": 1.0579532384872437, + "learning_rate": 9.989176796493393e-06, + "loss": 0.3314, + "step": 2514 + }, + { + "epoch": 0.05034657057778445, + "grad_norm": 1.0931456089019775, + "learning_rate": 9.989155467194696e-06, + "loss": 0.3073, + "step": 2515 + }, + { + "epoch": 0.050366589094912796, + "grad_norm": 0.9321861863136292, + "learning_rate": 9.989134116922661e-06, + "loss": 0.2813, + "step": 2516 + }, + { + "epoch": 0.05038660761204114, + "grad_norm": 1.1199179887771606, + "learning_rate": 9.98911274567738e-06, + "loss": 0.3239, + "step": 2517 + }, + { + "epoch": 0.050406626129169484, + "grad_norm": 1.2945748567581177, + "learning_rate": 9.989091353458942e-06, + "loss": 0.356, + "step": 2518 + }, + { + "epoch": 0.05042664464629783, + "grad_norm": 1.0714926719665527, + "learning_rate": 9.989069940267438e-06, + "loss": 0.2995, + "step": 2519 + }, + { + "epoch": 0.05044666316342617, + "grad_norm": 1.0253726243972778, + "learning_rate": 9.989048506102959e-06, + "loss": 0.3266, + "step": 2520 + }, + { + "epoch": 0.050466681680554515, + "grad_norm": 1.1063284873962402, + "learning_rate": 9.989027050965592e-06, + "loss": 0.3249, + "step": 2521 + }, + { + "epoch": 0.05048670019768286, + "grad_norm": 0.9872124791145325, + "learning_rate": 9.989005574855428e-06, + "loss": 0.3124, + "step": 2522 + }, + { + "epoch": 0.0505067187148112, + "grad_norm": 1.0168815851211548, + "learning_rate": 9.988984077772561e-06, + "loss": 0.3766, + "step": 2523 + }, + { + "epoch": 0.050526737231939546, + "grad_norm": 1.1441401243209839, + "learning_rate": 9.988962559717078e-06, + "loss": 0.3975, + "step": 2524 + }, + { + "epoch": 0.05054675574906789, + "grad_norm": 1.1075559854507446, + "learning_rate": 9.988941020689068e-06, + "loss": 0.3636, + "step": 2525 + }, + { + "epoch": 0.050566774266196234, + "grad_norm": 1.2931842803955078, + "learning_rate": 9.988919460688625e-06, + "loss": 0.3757, + "step": 2526 + }, + { + "epoch": 0.05058679278332458, + "grad_norm": 1.4530831575393677, + "learning_rate": 9.988897879715838e-06, + "loss": 0.3687, + "step": 2527 + }, + { + "epoch": 0.05060681130045292, + "grad_norm": 1.0536283254623413, + "learning_rate": 9.988876277770799e-06, + "loss": 0.2721, + "step": 2528 + }, + { + "epoch": 0.050626829817581265, + "grad_norm": 1.2230892181396484, + "learning_rate": 9.988854654853595e-06, + "loss": 0.3556, + "step": 2529 + }, + { + "epoch": 0.05064684833470961, + "grad_norm": 1.050588846206665, + "learning_rate": 9.988833010964323e-06, + "loss": 0.3889, + "step": 2530 + }, + { + "epoch": 0.05066686685183795, + "grad_norm": 1.073000431060791, + "learning_rate": 9.988811346103068e-06, + "loss": 0.3035, + "step": 2531 + }, + { + "epoch": 0.050686885368966296, + "grad_norm": 1.204756259918213, + "learning_rate": 9.988789660269926e-06, + "loss": 0.3423, + "step": 2532 + }, + { + "epoch": 0.05070690388609464, + "grad_norm": 1.0623542070388794, + "learning_rate": 9.988767953464985e-06, + "loss": 0.3481, + "step": 2533 + }, + { + "epoch": 0.05072692240322298, + "grad_norm": 1.7957032918930054, + "learning_rate": 9.988746225688335e-06, + "loss": 0.8428, + "step": 2534 + }, + { + "epoch": 0.05074694092035133, + "grad_norm": 1.0250463485717773, + "learning_rate": 9.988724476940072e-06, + "loss": 0.3119, + "step": 2535 + }, + { + "epoch": 0.05076695943747967, + "grad_norm": 1.0673370361328125, + "learning_rate": 9.988702707220283e-06, + "loss": 0.3478, + "step": 2536 + }, + { + "epoch": 0.050786977954608015, + "grad_norm": 1.0179194211959839, + "learning_rate": 9.988680916529063e-06, + "loss": 0.371, + "step": 2537 + }, + { + "epoch": 0.05080699647173636, + "grad_norm": 1.1486629247665405, + "learning_rate": 9.988659104866502e-06, + "loss": 0.3238, + "step": 2538 + }, + { + "epoch": 0.0508270149888647, + "grad_norm": 1.270818829536438, + "learning_rate": 9.98863727223269e-06, + "loss": 0.3731, + "step": 2539 + }, + { + "epoch": 0.050847033505993046, + "grad_norm": 1.1812187433242798, + "learning_rate": 9.988615418627724e-06, + "loss": 0.3475, + "step": 2540 + }, + { + "epoch": 0.05086705202312139, + "grad_norm": 1.1733285188674927, + "learning_rate": 9.988593544051688e-06, + "loss": 0.3543, + "step": 2541 + }, + { + "epoch": 0.05088707054024973, + "grad_norm": 1.1774191856384277, + "learning_rate": 9.988571648504681e-06, + "loss": 0.3452, + "step": 2542 + }, + { + "epoch": 0.05090708905737808, + "grad_norm": 0.9969701170921326, + "learning_rate": 9.988549731986791e-06, + "loss": 0.3547, + "step": 2543 + }, + { + "epoch": 0.05092710757450642, + "grad_norm": 1.0747995376586914, + "learning_rate": 9.988527794498112e-06, + "loss": 0.3394, + "step": 2544 + }, + { + "epoch": 0.050947126091634765, + "grad_norm": 1.0537792444229126, + "learning_rate": 9.988505836038735e-06, + "loss": 0.3274, + "step": 2545 + }, + { + "epoch": 0.05096714460876311, + "grad_norm": 0.9508224725723267, + "learning_rate": 9.988483856608754e-06, + "loss": 0.3507, + "step": 2546 + }, + { + "epoch": 0.05098716312589145, + "grad_norm": 1.0360627174377441, + "learning_rate": 9.988461856208262e-06, + "loss": 0.3228, + "step": 2547 + }, + { + "epoch": 0.051007181643019796, + "grad_norm": 1.0848535299301147, + "learning_rate": 9.988439834837347e-06, + "loss": 0.3507, + "step": 2548 + }, + { + "epoch": 0.05102720016014814, + "grad_norm": 1.0985947847366333, + "learning_rate": 9.988417792496106e-06, + "loss": 0.3053, + "step": 2549 + }, + { + "epoch": 0.05104721867727648, + "grad_norm": 1.2075483798980713, + "learning_rate": 9.98839572918463e-06, + "loss": 0.2941, + "step": 2550 + }, + { + "epoch": 0.05106723719440483, + "grad_norm": 1.1042966842651367, + "learning_rate": 9.98837364490301e-06, + "loss": 0.3493, + "step": 2551 + }, + { + "epoch": 0.05108725571153317, + "grad_norm": 1.1398712396621704, + "learning_rate": 9.988351539651343e-06, + "loss": 0.3502, + "step": 2552 + }, + { + "epoch": 0.051107274228661514, + "grad_norm": 1.1513117551803589, + "learning_rate": 9.988329413429719e-06, + "loss": 0.3404, + "step": 2553 + }, + { + "epoch": 0.05112729274578986, + "grad_norm": 1.1268163919448853, + "learning_rate": 9.988307266238232e-06, + "loss": 0.3177, + "step": 2554 + }, + { + "epoch": 0.0511473112629182, + "grad_norm": 1.4803009033203125, + "learning_rate": 9.988285098076974e-06, + "loss": 0.3543, + "step": 2555 + }, + { + "epoch": 0.051167329780046546, + "grad_norm": 1.1592209339141846, + "learning_rate": 9.98826290894604e-06, + "loss": 0.3585, + "step": 2556 + }, + { + "epoch": 0.05118734829717489, + "grad_norm": 1.0118657350540161, + "learning_rate": 9.98824069884552e-06, + "loss": 0.2823, + "step": 2557 + }, + { + "epoch": 0.05120736681430323, + "grad_norm": 1.3030250072479248, + "learning_rate": 9.988218467775512e-06, + "loss": 0.3282, + "step": 2558 + }, + { + "epoch": 0.05122738533143158, + "grad_norm": 1.067016363143921, + "learning_rate": 9.988196215736106e-06, + "loss": 0.356, + "step": 2559 + }, + { + "epoch": 0.05124740384855992, + "grad_norm": 1.734088659286499, + "learning_rate": 9.988173942727396e-06, + "loss": 0.8303, + "step": 2560 + }, + { + "epoch": 0.051267422365688264, + "grad_norm": 1.2919552326202393, + "learning_rate": 9.988151648749476e-06, + "loss": 0.3012, + "step": 2561 + }, + { + "epoch": 0.05128744088281661, + "grad_norm": 1.102341651916504, + "learning_rate": 9.988129333802441e-06, + "loss": 0.3253, + "step": 2562 + }, + { + "epoch": 0.05130745939994495, + "grad_norm": 1.1726127862930298, + "learning_rate": 9.988106997886384e-06, + "loss": 0.2654, + "step": 2563 + }, + { + "epoch": 0.051327477917073296, + "grad_norm": 1.0616096258163452, + "learning_rate": 9.988084641001398e-06, + "loss": 0.3455, + "step": 2564 + }, + { + "epoch": 0.05134749643420164, + "grad_norm": 1.1273480653762817, + "learning_rate": 9.988062263147578e-06, + "loss": 0.3552, + "step": 2565 + }, + { + "epoch": 0.05136751495132998, + "grad_norm": 1.04810631275177, + "learning_rate": 9.988039864325018e-06, + "loss": 0.399, + "step": 2566 + }, + { + "epoch": 0.05138753346845833, + "grad_norm": 1.0687825679779053, + "learning_rate": 9.98801744453381e-06, + "loss": 0.3295, + "step": 2567 + }, + { + "epoch": 0.05140755198558667, + "grad_norm": 1.089050054550171, + "learning_rate": 9.987995003774053e-06, + "loss": 0.3899, + "step": 2568 + }, + { + "epoch": 0.051427570502715014, + "grad_norm": 1.361708641052246, + "learning_rate": 9.987972542045836e-06, + "loss": 0.3477, + "step": 2569 + }, + { + "epoch": 0.05144758901984336, + "grad_norm": 1.2698029279708862, + "learning_rate": 9.987950059349256e-06, + "loss": 0.3184, + "step": 2570 + }, + { + "epoch": 0.0514676075369717, + "grad_norm": 1.1449213027954102, + "learning_rate": 9.987927555684408e-06, + "loss": 0.3714, + "step": 2571 + }, + { + "epoch": 0.051487626054100046, + "grad_norm": 1.168701171875, + "learning_rate": 9.987905031051388e-06, + "loss": 0.3279, + "step": 2572 + }, + { + "epoch": 0.05150764457122839, + "grad_norm": 1.1290918588638306, + "learning_rate": 9.987882485450285e-06, + "loss": 0.3649, + "step": 2573 + }, + { + "epoch": 0.05152766308835673, + "grad_norm": 1.0489658117294312, + "learning_rate": 9.9878599188812e-06, + "loss": 0.3326, + "step": 2574 + }, + { + "epoch": 0.05154768160548508, + "grad_norm": 1.0277647972106934, + "learning_rate": 9.987837331344223e-06, + "loss": 0.3475, + "step": 2575 + }, + { + "epoch": 0.05156770012261342, + "grad_norm": 1.1803975105285645, + "learning_rate": 9.987814722839453e-06, + "loss": 0.3015, + "step": 2576 + }, + { + "epoch": 0.051587718639741764, + "grad_norm": 1.1479253768920898, + "learning_rate": 9.987792093366984e-06, + "loss": 0.3513, + "step": 2577 + }, + { + "epoch": 0.05160773715687011, + "grad_norm": 1.7835365533828735, + "learning_rate": 9.98776944292691e-06, + "loss": 0.8346, + "step": 2578 + }, + { + "epoch": 0.05162775567399845, + "grad_norm": 1.1695246696472168, + "learning_rate": 9.987746771519325e-06, + "loss": 0.3322, + "step": 2579 + }, + { + "epoch": 0.051647774191126795, + "grad_norm": 1.067273497581482, + "learning_rate": 9.987724079144328e-06, + "loss": 0.2871, + "step": 2580 + }, + { + "epoch": 0.05166779270825514, + "grad_norm": 1.0018481016159058, + "learning_rate": 9.98770136580201e-06, + "loss": 0.306, + "step": 2581 + }, + { + "epoch": 0.05168781122538348, + "grad_norm": 0.9910014867782593, + "learning_rate": 9.98767863149247e-06, + "loss": 0.3288, + "step": 2582 + }, + { + "epoch": 0.05170782974251183, + "grad_norm": 0.9951345324516296, + "learning_rate": 9.987655876215803e-06, + "loss": 0.3295, + "step": 2583 + }, + { + "epoch": 0.05172784825964017, + "grad_norm": 1.0699207782745361, + "learning_rate": 9.987633099972103e-06, + "loss": 0.3923, + "step": 2584 + }, + { + "epoch": 0.051747866776768514, + "grad_norm": 1.0302437543869019, + "learning_rate": 9.987610302761468e-06, + "loss": 0.2947, + "step": 2585 + }, + { + "epoch": 0.05176788529389686, + "grad_norm": 1.169559359550476, + "learning_rate": 9.987587484583992e-06, + "loss": 0.2623, + "step": 2586 + }, + { + "epoch": 0.0517879038110252, + "grad_norm": 1.0416319370269775, + "learning_rate": 9.98756464543977e-06, + "loss": 0.3348, + "step": 2587 + }, + { + "epoch": 0.051807922328153545, + "grad_norm": 1.219727873802185, + "learning_rate": 9.987541785328902e-06, + "loss": 0.3298, + "step": 2588 + }, + { + "epoch": 0.05182794084528189, + "grad_norm": 1.0563938617706299, + "learning_rate": 9.987518904251481e-06, + "loss": 0.3482, + "step": 2589 + }, + { + "epoch": 0.05184795936241023, + "grad_norm": 1.057615876197815, + "learning_rate": 9.987496002207605e-06, + "loss": 0.2791, + "step": 2590 + }, + { + "epoch": 0.051867977879538577, + "grad_norm": 1.2039433717727661, + "learning_rate": 9.987473079197367e-06, + "loss": 0.3277, + "step": 2591 + }, + { + "epoch": 0.05188799639666692, + "grad_norm": 1.1794098615646362, + "learning_rate": 9.987450135220867e-06, + "loss": 0.3732, + "step": 2592 + }, + { + "epoch": 0.051908014913795264, + "grad_norm": 1.1042228937149048, + "learning_rate": 9.987427170278199e-06, + "loss": 0.3456, + "step": 2593 + }, + { + "epoch": 0.05192803343092361, + "grad_norm": 1.0687549114227295, + "learning_rate": 9.98740418436946e-06, + "loss": 0.3147, + "step": 2594 + }, + { + "epoch": 0.05194805194805195, + "grad_norm": 1.1323522329330444, + "learning_rate": 9.987381177494748e-06, + "loss": 0.3896, + "step": 2595 + }, + { + "epoch": 0.051968070465180295, + "grad_norm": 1.1729549169540405, + "learning_rate": 9.987358149654158e-06, + "loss": 0.355, + "step": 2596 + }, + { + "epoch": 0.05198808898230864, + "grad_norm": 1.916853666305542, + "learning_rate": 9.987335100847788e-06, + "loss": 0.8896, + "step": 2597 + }, + { + "epoch": 0.052008107499436976, + "grad_norm": 1.1228861808776855, + "learning_rate": 9.987312031075736e-06, + "loss": 0.2988, + "step": 2598 + }, + { + "epoch": 0.05202812601656532, + "grad_norm": 1.064122200012207, + "learning_rate": 9.987288940338096e-06, + "loss": 0.328, + "step": 2599 + }, + { + "epoch": 0.05204814453369366, + "grad_norm": 1.0623666048049927, + "learning_rate": 9.987265828634967e-06, + "loss": 0.3425, + "step": 2600 + }, + { + "epoch": 0.05206816305082201, + "grad_norm": 1.7999166250228882, + "learning_rate": 9.987242695966445e-06, + "loss": 0.7564, + "step": 2601 + }, + { + "epoch": 0.05208818156795035, + "grad_norm": 1.0549057722091675, + "learning_rate": 9.98721954233263e-06, + "loss": 0.2844, + "step": 2602 + }, + { + "epoch": 0.052108200085078694, + "grad_norm": 1.2711933851242065, + "learning_rate": 9.987196367733615e-06, + "loss": 0.3732, + "step": 2603 + }, + { + "epoch": 0.05212821860220704, + "grad_norm": 1.1005253791809082, + "learning_rate": 9.9871731721695e-06, + "loss": 0.3936, + "step": 2604 + }, + { + "epoch": 0.05214823711933538, + "grad_norm": 1.9680663347244263, + "learning_rate": 9.987149955640383e-06, + "loss": 0.8395, + "step": 2605 + }, + { + "epoch": 0.052168255636463726, + "grad_norm": 1.9770278930664062, + "learning_rate": 9.98712671814636e-06, + "loss": 0.7809, + "step": 2606 + }, + { + "epoch": 0.05218827415359207, + "grad_norm": 1.1562292575836182, + "learning_rate": 9.987103459687531e-06, + "loss": 0.3264, + "step": 2607 + }, + { + "epoch": 0.05220829267072041, + "grad_norm": 1.0558909177780151, + "learning_rate": 9.98708018026399e-06, + "loss": 0.3403, + "step": 2608 + }, + { + "epoch": 0.05222831118784876, + "grad_norm": 1.051920771598816, + "learning_rate": 9.987056879875841e-06, + "loss": 0.3533, + "step": 2609 + }, + { + "epoch": 0.0522483297049771, + "grad_norm": 1.172982096672058, + "learning_rate": 9.987033558523174e-06, + "loss": 0.3453, + "step": 2610 + }, + { + "epoch": 0.052268348222105444, + "grad_norm": 1.022819995880127, + "learning_rate": 9.987010216206093e-06, + "loss": 0.2857, + "step": 2611 + }, + { + "epoch": 0.05228836673923379, + "grad_norm": 1.0940053462982178, + "learning_rate": 9.986986852924694e-06, + "loss": 0.3222, + "step": 2612 + }, + { + "epoch": 0.05230838525636213, + "grad_norm": 1.269423484802246, + "learning_rate": 9.986963468679076e-06, + "loss": 0.353, + "step": 2613 + }, + { + "epoch": 0.052328403773490476, + "grad_norm": 1.0866320133209229, + "learning_rate": 9.986940063469338e-06, + "loss": 0.3454, + "step": 2614 + }, + { + "epoch": 0.05234842229061882, + "grad_norm": 1.9024600982666016, + "learning_rate": 9.986916637295573e-06, + "loss": 0.8747, + "step": 2615 + }, + { + "epoch": 0.05236844080774716, + "grad_norm": 1.1546070575714111, + "learning_rate": 9.986893190157887e-06, + "loss": 0.3518, + "step": 2616 + }, + { + "epoch": 0.05238845932487551, + "grad_norm": 1.1516867876052856, + "learning_rate": 9.986869722056374e-06, + "loss": 0.3399, + "step": 2617 + }, + { + "epoch": 0.05240847784200385, + "grad_norm": 1.0483494997024536, + "learning_rate": 9.986846232991135e-06, + "loss": 0.2941, + "step": 2618 + }, + { + "epoch": 0.052428496359132194, + "grad_norm": 1.0833667516708374, + "learning_rate": 9.986822722962267e-06, + "loss": 0.4245, + "step": 2619 + }, + { + "epoch": 0.05244851487626054, + "grad_norm": 1.0169271230697632, + "learning_rate": 9.98679919196987e-06, + "loss": 0.2907, + "step": 2620 + }, + { + "epoch": 0.05246853339338888, + "grad_norm": 1.1360039710998535, + "learning_rate": 9.986775640014042e-06, + "loss": 0.3615, + "step": 2621 + }, + { + "epoch": 0.052488551910517225, + "grad_norm": 1.1192755699157715, + "learning_rate": 9.986752067094882e-06, + "loss": 0.3384, + "step": 2622 + }, + { + "epoch": 0.05250857042764557, + "grad_norm": 1.050897479057312, + "learning_rate": 9.98672847321249e-06, + "loss": 0.3261, + "step": 2623 + }, + { + "epoch": 0.05252858894477391, + "grad_norm": 1.1609562635421753, + "learning_rate": 9.986704858366963e-06, + "loss": 0.3521, + "step": 2624 + }, + { + "epoch": 0.05254860746190226, + "grad_norm": 1.344098687171936, + "learning_rate": 9.986681222558403e-06, + "loss": 0.3197, + "step": 2625 + }, + { + "epoch": 0.0525686259790306, + "grad_norm": 1.1328376531600952, + "learning_rate": 9.986657565786909e-06, + "loss": 0.3498, + "step": 2626 + }, + { + "epoch": 0.052588644496158944, + "grad_norm": 1.8394622802734375, + "learning_rate": 9.98663388805258e-06, + "loss": 0.8404, + "step": 2627 + }, + { + "epoch": 0.05260866301328729, + "grad_norm": 1.1681597232818604, + "learning_rate": 9.986610189355514e-06, + "loss": 0.3783, + "step": 2628 + }, + { + "epoch": 0.05262868153041563, + "grad_norm": 1.1272789239883423, + "learning_rate": 9.98658646969581e-06, + "loss": 0.3438, + "step": 2629 + }, + { + "epoch": 0.052648700047543975, + "grad_norm": 1.1146621704101562, + "learning_rate": 9.986562729073575e-06, + "loss": 0.3577, + "step": 2630 + }, + { + "epoch": 0.05266871856467232, + "grad_norm": 1.0819238424301147, + "learning_rate": 9.986538967488899e-06, + "loss": 0.3312, + "step": 2631 + }, + { + "epoch": 0.05268873708180066, + "grad_norm": 1.061537742614746, + "learning_rate": 9.986515184941888e-06, + "loss": 0.3408, + "step": 2632 + }, + { + "epoch": 0.05270875559892901, + "grad_norm": 1.0475950241088867, + "learning_rate": 9.98649138143264e-06, + "loss": 0.3171, + "step": 2633 + }, + { + "epoch": 0.05272877411605735, + "grad_norm": 1.7540725469589233, + "learning_rate": 9.986467556961254e-06, + "loss": 0.8344, + "step": 2634 + }, + { + "epoch": 0.052748792633185694, + "grad_norm": 1.143708348274231, + "learning_rate": 9.986443711527834e-06, + "loss": 0.3329, + "step": 2635 + }, + { + "epoch": 0.05276881115031404, + "grad_norm": 1.0907825231552124, + "learning_rate": 9.986419845132476e-06, + "loss": 0.315, + "step": 2636 + }, + { + "epoch": 0.05278882966744238, + "grad_norm": 1.7662110328674316, + "learning_rate": 9.986395957775283e-06, + "loss": 0.8779, + "step": 2637 + }, + { + "epoch": 0.052808848184570725, + "grad_norm": 1.054876446723938, + "learning_rate": 9.986372049456355e-06, + "loss": 0.3404, + "step": 2638 + }, + { + "epoch": 0.05282886670169907, + "grad_norm": 1.0627148151397705, + "learning_rate": 9.98634812017579e-06, + "loss": 0.3066, + "step": 2639 + }, + { + "epoch": 0.05284888521882741, + "grad_norm": 1.1411479711532593, + "learning_rate": 9.986324169933692e-06, + "loss": 0.3422, + "step": 2640 + }, + { + "epoch": 0.052868903735955756, + "grad_norm": 1.2381484508514404, + "learning_rate": 9.98630019873016e-06, + "loss": 0.4499, + "step": 2641 + }, + { + "epoch": 0.0528889222530841, + "grad_norm": 1.03895902633667, + "learning_rate": 9.986276206565295e-06, + "loss": 0.3072, + "step": 2642 + }, + { + "epoch": 0.052908940770212444, + "grad_norm": 1.1107079982757568, + "learning_rate": 9.986252193439198e-06, + "loss": 0.3932, + "step": 2643 + }, + { + "epoch": 0.05292895928734079, + "grad_norm": 1.9884697198867798, + "learning_rate": 9.986228159351969e-06, + "loss": 0.8636, + "step": 2644 + }, + { + "epoch": 0.05294897780446913, + "grad_norm": 1.23809814453125, + "learning_rate": 9.98620410430371e-06, + "loss": 0.3265, + "step": 2645 + }, + { + "epoch": 0.052968996321597475, + "grad_norm": 1.0744578838348389, + "learning_rate": 9.986180028294524e-06, + "loss": 0.3019, + "step": 2646 + }, + { + "epoch": 0.05298901483872582, + "grad_norm": 1.1643067598342896, + "learning_rate": 9.986155931324509e-06, + "loss": 0.3633, + "step": 2647 + }, + { + "epoch": 0.05300903335585416, + "grad_norm": 1.2716611623764038, + "learning_rate": 9.986131813393766e-06, + "loss": 0.3298, + "step": 2648 + }, + { + "epoch": 0.053029051872982506, + "grad_norm": 1.0504041910171509, + "learning_rate": 9.986107674502399e-06, + "loss": 0.3246, + "step": 2649 + }, + { + "epoch": 0.05304907039011085, + "grad_norm": 1.843673586845398, + "learning_rate": 9.986083514650508e-06, + "loss": 0.8648, + "step": 2650 + }, + { + "epoch": 0.053069088907239194, + "grad_norm": 1.0033262968063354, + "learning_rate": 9.986059333838196e-06, + "loss": 0.3171, + "step": 2651 + }, + { + "epoch": 0.05308910742436754, + "grad_norm": 1.1016075611114502, + "learning_rate": 9.986035132065562e-06, + "loss": 0.3301, + "step": 2652 + }, + { + "epoch": 0.05310912594149588, + "grad_norm": 1.885260820388794, + "learning_rate": 9.986010909332711e-06, + "loss": 0.8511, + "step": 2653 + }, + { + "epoch": 0.053129144458624225, + "grad_norm": 1.080768346786499, + "learning_rate": 9.985986665639741e-06, + "loss": 0.3698, + "step": 2654 + }, + { + "epoch": 0.05314916297575257, + "grad_norm": 1.1645855903625488, + "learning_rate": 9.985962400986758e-06, + "loss": 0.3407, + "step": 2655 + }, + { + "epoch": 0.05316918149288091, + "grad_norm": 1.0773111581802368, + "learning_rate": 9.985938115373862e-06, + "loss": 0.2828, + "step": 2656 + }, + { + "epoch": 0.053189200010009256, + "grad_norm": 1.796323299407959, + "learning_rate": 9.985913808801156e-06, + "loss": 0.8011, + "step": 2657 + }, + { + "epoch": 0.0532092185271376, + "grad_norm": 1.231564998626709, + "learning_rate": 9.985889481268737e-06, + "loss": 0.3124, + "step": 2658 + }, + { + "epoch": 0.053229237044265944, + "grad_norm": 1.8422002792358398, + "learning_rate": 9.985865132776715e-06, + "loss": 0.8657, + "step": 2659 + }, + { + "epoch": 0.05324925556139429, + "grad_norm": 1.1097655296325684, + "learning_rate": 9.98584076332519e-06, + "loss": 0.3865, + "step": 2660 + }, + { + "epoch": 0.05326927407852263, + "grad_norm": 1.1338186264038086, + "learning_rate": 9.985816372914262e-06, + "loss": 0.3648, + "step": 2661 + }, + { + "epoch": 0.053289292595650975, + "grad_norm": 1.88276207447052, + "learning_rate": 9.985791961544033e-06, + "loss": 0.8625, + "step": 2662 + }, + { + "epoch": 0.05330931111277932, + "grad_norm": 1.1040550470352173, + "learning_rate": 9.98576752921461e-06, + "loss": 0.3283, + "step": 2663 + }, + { + "epoch": 0.05332932962990766, + "grad_norm": 1.1724274158477783, + "learning_rate": 9.985743075926093e-06, + "loss": 0.3575, + "step": 2664 + }, + { + "epoch": 0.053349348147036006, + "grad_norm": 1.197234869003296, + "learning_rate": 9.985718601678584e-06, + "loss": 0.348, + "step": 2665 + }, + { + "epoch": 0.05336936666416435, + "grad_norm": 1.0122147798538208, + "learning_rate": 9.985694106472187e-06, + "loss": 0.3221, + "step": 2666 + }, + { + "epoch": 0.053389385181292694, + "grad_norm": 1.0619056224822998, + "learning_rate": 9.985669590307007e-06, + "loss": 0.3687, + "step": 2667 + }, + { + "epoch": 0.05340940369842104, + "grad_norm": 2.116952896118164, + "learning_rate": 9.985645053183143e-06, + "loss": 0.8172, + "step": 2668 + }, + { + "epoch": 0.05342942221554938, + "grad_norm": 1.1392003297805786, + "learning_rate": 9.985620495100701e-06, + "loss": 0.323, + "step": 2669 + }, + { + "epoch": 0.053449440732677725, + "grad_norm": 1.1261049509048462, + "learning_rate": 9.985595916059782e-06, + "loss": 0.3294, + "step": 2670 + }, + { + "epoch": 0.05346945924980607, + "grad_norm": 1.2648844718933105, + "learning_rate": 9.985571316060492e-06, + "loss": 0.3536, + "step": 2671 + }, + { + "epoch": 0.05348947776693441, + "grad_norm": 1.0510059595108032, + "learning_rate": 9.985546695102933e-06, + "loss": 0.3385, + "step": 2672 + }, + { + "epoch": 0.053509496284062756, + "grad_norm": 1.1444265842437744, + "learning_rate": 9.98552205318721e-06, + "loss": 0.3364, + "step": 2673 + }, + { + "epoch": 0.0535295148011911, + "grad_norm": 1.24550461769104, + "learning_rate": 9.985497390313422e-06, + "loss": 0.3342, + "step": 2674 + }, + { + "epoch": 0.053549533318319444, + "grad_norm": 0.9929347634315491, + "learning_rate": 9.985472706481678e-06, + "loss": 0.303, + "step": 2675 + }, + { + "epoch": 0.05356955183544779, + "grad_norm": 1.1073237657546997, + "learning_rate": 9.985448001692078e-06, + "loss": 0.3469, + "step": 2676 + }, + { + "epoch": 0.05358957035257613, + "grad_norm": 1.1285063028335571, + "learning_rate": 9.985423275944729e-06, + "loss": 0.3388, + "step": 2677 + }, + { + "epoch": 0.053609588869704475, + "grad_norm": 1.0858851671218872, + "learning_rate": 9.985398529239733e-06, + "loss": 0.2941, + "step": 2678 + }, + { + "epoch": 0.05362960738683282, + "grad_norm": 1.0904327630996704, + "learning_rate": 9.985373761577195e-06, + "loss": 0.3473, + "step": 2679 + }, + { + "epoch": 0.05364962590396116, + "grad_norm": 1.189211368560791, + "learning_rate": 9.985348972957218e-06, + "loss": 0.347, + "step": 2680 + }, + { + "epoch": 0.053669644421089506, + "grad_norm": 0.9827781915664673, + "learning_rate": 9.985324163379906e-06, + "loss": 0.3165, + "step": 2681 + }, + { + "epoch": 0.05368966293821785, + "grad_norm": 1.1224504709243774, + "learning_rate": 9.985299332845364e-06, + "loss": 0.3766, + "step": 2682 + }, + { + "epoch": 0.053709681455346193, + "grad_norm": 1.8431191444396973, + "learning_rate": 9.985274481353697e-06, + "loss": 0.9342, + "step": 2683 + }, + { + "epoch": 0.05372969997247454, + "grad_norm": 1.0928475856781006, + "learning_rate": 9.98524960890501e-06, + "loss": 0.2983, + "step": 2684 + }, + { + "epoch": 0.05374971848960288, + "grad_norm": 1.1880748271942139, + "learning_rate": 9.985224715499406e-06, + "loss": 0.3792, + "step": 2685 + }, + { + "epoch": 0.053769737006731225, + "grad_norm": 1.0256543159484863, + "learning_rate": 9.985199801136988e-06, + "loss": 0.3769, + "step": 2686 + }, + { + "epoch": 0.05378975552385957, + "grad_norm": 1.1428543329238892, + "learning_rate": 9.985174865817866e-06, + "loss": 0.3756, + "step": 2687 + }, + { + "epoch": 0.05380977404098791, + "grad_norm": 1.15771484375, + "learning_rate": 9.98514990954214e-06, + "loss": 0.3297, + "step": 2688 + }, + { + "epoch": 0.053829792558116256, + "grad_norm": 1.134904146194458, + "learning_rate": 9.985124932309918e-06, + "loss": 0.4027, + "step": 2689 + }, + { + "epoch": 0.0538498110752446, + "grad_norm": 1.040604591369629, + "learning_rate": 9.985099934121301e-06, + "loss": 0.351, + "step": 2690 + }, + { + "epoch": 0.05386982959237294, + "grad_norm": 1.0445470809936523, + "learning_rate": 9.9850749149764e-06, + "loss": 0.3567, + "step": 2691 + }, + { + "epoch": 0.05388984810950129, + "grad_norm": 1.2725718021392822, + "learning_rate": 9.985049874875314e-06, + "loss": 0.3464, + "step": 2692 + }, + { + "epoch": 0.05390986662662963, + "grad_norm": 1.0416477918624878, + "learning_rate": 9.985024813818152e-06, + "loss": 0.3006, + "step": 2693 + }, + { + "epoch": 0.053929885143757975, + "grad_norm": 1.0291458368301392, + "learning_rate": 9.984999731805019e-06, + "loss": 0.3377, + "step": 2694 + }, + { + "epoch": 0.05394990366088632, + "grad_norm": 0.9829162359237671, + "learning_rate": 9.984974628836019e-06, + "loss": 0.34, + "step": 2695 + }, + { + "epoch": 0.05396992217801466, + "grad_norm": 2.0263872146606445, + "learning_rate": 9.984949504911258e-06, + "loss": 0.8097, + "step": 2696 + }, + { + "epoch": 0.053989940695143006, + "grad_norm": 1.0199275016784668, + "learning_rate": 9.984924360030845e-06, + "loss": 0.3081, + "step": 2697 + }, + { + "epoch": 0.05400995921227135, + "grad_norm": 1.0620203018188477, + "learning_rate": 9.98489919419488e-06, + "loss": 0.3319, + "step": 2698 + }, + { + "epoch": 0.05402997772939969, + "grad_norm": 1.0494670867919922, + "learning_rate": 9.984874007403473e-06, + "loss": 0.3252, + "step": 2699 + }, + { + "epoch": 0.05404999624652804, + "grad_norm": 1.0881576538085938, + "learning_rate": 9.984848799656729e-06, + "loss": 0.3766, + "step": 2700 + }, + { + "epoch": 0.05407001476365638, + "grad_norm": 1.1179640293121338, + "learning_rate": 9.984823570954751e-06, + "loss": 0.3646, + "step": 2701 + }, + { + "epoch": 0.054090033280784725, + "grad_norm": 1.0283734798431396, + "learning_rate": 9.984798321297648e-06, + "loss": 0.2794, + "step": 2702 + }, + { + "epoch": 0.05411005179791307, + "grad_norm": 1.0992398262023926, + "learning_rate": 9.984773050685527e-06, + "loss": 0.3366, + "step": 2703 + }, + { + "epoch": 0.05413007031504141, + "grad_norm": 1.0487655401229858, + "learning_rate": 9.984747759118491e-06, + "loss": 0.3151, + "step": 2704 + }, + { + "epoch": 0.054150088832169756, + "grad_norm": 1.037052035331726, + "learning_rate": 9.984722446596649e-06, + "loss": 0.3049, + "step": 2705 + }, + { + "epoch": 0.0541701073492981, + "grad_norm": 1.059334635734558, + "learning_rate": 9.984697113120108e-06, + "loss": 0.2884, + "step": 2706 + }, + { + "epoch": 0.05419012586642644, + "grad_norm": 1.1582491397857666, + "learning_rate": 9.984671758688971e-06, + "loss": 0.3589, + "step": 2707 + }, + { + "epoch": 0.05421014438355479, + "grad_norm": 1.1371362209320068, + "learning_rate": 9.984646383303347e-06, + "loss": 0.3351, + "step": 2708 + }, + { + "epoch": 0.05423016290068313, + "grad_norm": 1.0514832735061646, + "learning_rate": 9.984620986963343e-06, + "loss": 0.3185, + "step": 2709 + }, + { + "epoch": 0.054250181417811474, + "grad_norm": 1.189158320426941, + "learning_rate": 9.984595569669065e-06, + "loss": 0.3211, + "step": 2710 + }, + { + "epoch": 0.05427019993493982, + "grad_norm": 1.739924430847168, + "learning_rate": 9.98457013142062e-06, + "loss": 0.8566, + "step": 2711 + }, + { + "epoch": 0.05429021845206816, + "grad_norm": 1.8572497367858887, + "learning_rate": 9.984544672218115e-06, + "loss": 0.8683, + "step": 2712 + }, + { + "epoch": 0.054310236969196506, + "grad_norm": 1.0841237306594849, + "learning_rate": 9.984519192061657e-06, + "loss": 0.309, + "step": 2713 + }, + { + "epoch": 0.05433025548632485, + "grad_norm": 1.1488194465637207, + "learning_rate": 9.984493690951351e-06, + "loss": 0.3312, + "step": 2714 + }, + { + "epoch": 0.05435027400345319, + "grad_norm": 1.1527326107025146, + "learning_rate": 9.984468168887309e-06, + "loss": 0.3335, + "step": 2715 + }, + { + "epoch": 0.05437029252058154, + "grad_norm": 1.0958017110824585, + "learning_rate": 9.984442625869633e-06, + "loss": 0.3609, + "step": 2716 + }, + { + "epoch": 0.05439031103770988, + "grad_norm": 1.1124324798583984, + "learning_rate": 9.984417061898436e-06, + "loss": 0.3195, + "step": 2717 + }, + { + "epoch": 0.054410329554838224, + "grad_norm": 1.0229228734970093, + "learning_rate": 9.98439147697382e-06, + "loss": 0.3258, + "step": 2718 + }, + { + "epoch": 0.05443034807196657, + "grad_norm": 1.2634977102279663, + "learning_rate": 9.984365871095894e-06, + "loss": 0.3494, + "step": 2719 + }, + { + "epoch": 0.05445036658909491, + "grad_norm": 1.101944923400879, + "learning_rate": 9.984340244264769e-06, + "loss": 0.3487, + "step": 2720 + }, + { + "epoch": 0.054470385106223256, + "grad_norm": 1.1110401153564453, + "learning_rate": 9.98431459648055e-06, + "loss": 0.3542, + "step": 2721 + }, + { + "epoch": 0.0544904036233516, + "grad_norm": 1.9812017679214478, + "learning_rate": 9.984288927743341e-06, + "loss": 0.7897, + "step": 2722 + }, + { + "epoch": 0.05451042214047994, + "grad_norm": 2.006283760070801, + "learning_rate": 9.984263238053259e-06, + "loss": 0.8482, + "step": 2723 + }, + { + "epoch": 0.05453044065760829, + "grad_norm": 1.093960165977478, + "learning_rate": 9.984237527410406e-06, + "loss": 0.3659, + "step": 2724 + }, + { + "epoch": 0.05455045917473663, + "grad_norm": 1.0443720817565918, + "learning_rate": 9.98421179581489e-06, + "loss": 0.3167, + "step": 2725 + }, + { + "epoch": 0.054570477691864974, + "grad_norm": 1.1948970556259155, + "learning_rate": 9.984186043266819e-06, + "loss": 0.3028, + "step": 2726 + }, + { + "epoch": 0.05459049620899332, + "grad_norm": 1.0792640447616577, + "learning_rate": 9.984160269766305e-06, + "loss": 0.3837, + "step": 2727 + }, + { + "epoch": 0.05461051472612166, + "grad_norm": 1.0385138988494873, + "learning_rate": 9.984134475313451e-06, + "loss": 0.3208, + "step": 2728 + }, + { + "epoch": 0.054630533243250005, + "grad_norm": 1.24711275100708, + "learning_rate": 9.984108659908372e-06, + "loss": 0.3571, + "step": 2729 + }, + { + "epoch": 0.05465055176037835, + "grad_norm": 1.1072717905044556, + "learning_rate": 9.984082823551168e-06, + "loss": 0.3905, + "step": 2730 + }, + { + "epoch": 0.05467057027750669, + "grad_norm": 1.0779409408569336, + "learning_rate": 9.984056966241958e-06, + "loss": 0.3132, + "step": 2731 + }, + { + "epoch": 0.05469058879463504, + "grad_norm": 1.1360198259353638, + "learning_rate": 9.98403108798084e-06, + "loss": 0.3692, + "step": 2732 + }, + { + "epoch": 0.05471060731176338, + "grad_norm": 1.097269058227539, + "learning_rate": 9.984005188767931e-06, + "loss": 0.3506, + "step": 2733 + }, + { + "epoch": 0.054730625828891724, + "grad_norm": 1.221848487854004, + "learning_rate": 9.983979268603335e-06, + "loss": 0.3423, + "step": 2734 + }, + { + "epoch": 0.05475064434602007, + "grad_norm": 1.1706182956695557, + "learning_rate": 9.983953327487164e-06, + "loss": 0.382, + "step": 2735 + }, + { + "epoch": 0.05477066286314841, + "grad_norm": 1.4513355493545532, + "learning_rate": 9.983927365419526e-06, + "loss": 0.2813, + "step": 2736 + }, + { + "epoch": 0.054790681380276755, + "grad_norm": 1.1810230016708374, + "learning_rate": 9.98390138240053e-06, + "loss": 0.3371, + "step": 2737 + }, + { + "epoch": 0.0548106998974051, + "grad_norm": 1.0149033069610596, + "learning_rate": 9.983875378430283e-06, + "loss": 0.3229, + "step": 2738 + }, + { + "epoch": 0.05483071841453344, + "grad_norm": 1.1616690158843994, + "learning_rate": 9.983849353508899e-06, + "loss": 0.33, + "step": 2739 + }, + { + "epoch": 0.05485073693166179, + "grad_norm": 1.0868967771530151, + "learning_rate": 9.983823307636482e-06, + "loss": 0.357, + "step": 2740 + }, + { + "epoch": 0.05487075544879013, + "grad_norm": 0.9368399381637573, + "learning_rate": 9.983797240813147e-06, + "loss": 0.2785, + "step": 2741 + }, + { + "epoch": 0.054890773965918474, + "grad_norm": 1.0895918607711792, + "learning_rate": 9.983771153038999e-06, + "loss": 0.3653, + "step": 2742 + }, + { + "epoch": 0.05491079248304682, + "grad_norm": 1.0794566869735718, + "learning_rate": 9.983745044314152e-06, + "loss": 0.3318, + "step": 2743 + }, + { + "epoch": 0.05493081100017516, + "grad_norm": 1.0390751361846924, + "learning_rate": 9.98371891463871e-06, + "loss": 0.3413, + "step": 2744 + }, + { + "epoch": 0.054950829517303505, + "grad_norm": 0.9903419613838196, + "learning_rate": 9.983692764012789e-06, + "loss": 0.3392, + "step": 2745 + }, + { + "epoch": 0.05497084803443185, + "grad_norm": 1.0734095573425293, + "learning_rate": 9.983666592436496e-06, + "loss": 0.3317, + "step": 2746 + }, + { + "epoch": 0.05499086655156019, + "grad_norm": 1.1242622137069702, + "learning_rate": 9.98364039990994e-06, + "loss": 0.3254, + "step": 2747 + }, + { + "epoch": 0.055010885068688536, + "grad_norm": 1.1259100437164307, + "learning_rate": 9.983614186433233e-06, + "loss": 0.3051, + "step": 2748 + }, + { + "epoch": 0.05503090358581688, + "grad_norm": 1.1536272764205933, + "learning_rate": 9.983587952006484e-06, + "loss": 0.3717, + "step": 2749 + }, + { + "epoch": 0.055050922102945224, + "grad_norm": 1.1382960081100464, + "learning_rate": 9.983561696629803e-06, + "loss": 0.3459, + "step": 2750 + }, + { + "epoch": 0.05507094062007357, + "grad_norm": 1.0979496240615845, + "learning_rate": 9.983535420303303e-06, + "loss": 0.3202, + "step": 2751 + }, + { + "epoch": 0.05509095913720191, + "grad_norm": 1.0440138578414917, + "learning_rate": 9.98350912302709e-06, + "loss": 0.3528, + "step": 2752 + }, + { + "epoch": 0.055110977654330255, + "grad_norm": 1.078014612197876, + "learning_rate": 9.983482804801278e-06, + "loss": 0.3446, + "step": 2753 + }, + { + "epoch": 0.0551309961714586, + "grad_norm": 1.0940184593200684, + "learning_rate": 9.983456465625978e-06, + "loss": 0.3821, + "step": 2754 + }, + { + "epoch": 0.05515101468858694, + "grad_norm": 1.1043784618377686, + "learning_rate": 9.9834301055013e-06, + "loss": 0.3356, + "step": 2755 + }, + { + "epoch": 0.055171033205715286, + "grad_norm": 1.1962807178497314, + "learning_rate": 9.983403724427352e-06, + "loss": 0.36, + "step": 2756 + }, + { + "epoch": 0.05519105172284363, + "grad_norm": 1.1042901277542114, + "learning_rate": 9.983377322404249e-06, + "loss": 0.2998, + "step": 2757 + }, + { + "epoch": 0.055211070239971974, + "grad_norm": 1.095822811126709, + "learning_rate": 9.983350899432098e-06, + "loss": 0.2983, + "step": 2758 + }, + { + "epoch": 0.05523108875710032, + "grad_norm": 1.4097141027450562, + "learning_rate": 9.983324455511014e-06, + "loss": 0.3478, + "step": 2759 + }, + { + "epoch": 0.05525110727422866, + "grad_norm": 1.098899483680725, + "learning_rate": 9.983297990641107e-06, + "loss": 0.2966, + "step": 2760 + }, + { + "epoch": 0.055271125791357005, + "grad_norm": 1.2749474048614502, + "learning_rate": 9.983271504822487e-06, + "loss": 0.3859, + "step": 2761 + }, + { + "epoch": 0.05529114430848535, + "grad_norm": 0.9334107041358948, + "learning_rate": 9.983244998055264e-06, + "loss": 0.2841, + "step": 2762 + }, + { + "epoch": 0.05531116282561369, + "grad_norm": 1.1172173023223877, + "learning_rate": 9.983218470339554e-06, + "loss": 0.2889, + "step": 2763 + }, + { + "epoch": 0.055331181342742036, + "grad_norm": 1.113742470741272, + "learning_rate": 9.983191921675466e-06, + "loss": 0.3495, + "step": 2764 + }, + { + "epoch": 0.05535119985987038, + "grad_norm": 1.1517632007598877, + "learning_rate": 9.98316535206311e-06, + "loss": 0.3192, + "step": 2765 + }, + { + "epoch": 0.055371218376998724, + "grad_norm": 1.0857974290847778, + "learning_rate": 9.9831387615026e-06, + "loss": 0.2963, + "step": 2766 + }, + { + "epoch": 0.05539123689412707, + "grad_norm": 1.210383653640747, + "learning_rate": 9.983112149994048e-06, + "loss": 0.3647, + "step": 2767 + }, + { + "epoch": 0.05541125541125541, + "grad_norm": 1.2497317790985107, + "learning_rate": 9.983085517537563e-06, + "loss": 0.332, + "step": 2768 + }, + { + "epoch": 0.055431273928383755, + "grad_norm": 1.2112404108047485, + "learning_rate": 9.98305886413326e-06, + "loss": 0.3771, + "step": 2769 + }, + { + "epoch": 0.0554512924455121, + "grad_norm": 1.2363109588623047, + "learning_rate": 9.98303218978125e-06, + "loss": 0.3401, + "step": 2770 + }, + { + "epoch": 0.05547131096264044, + "grad_norm": 1.1384549140930176, + "learning_rate": 9.983005494481644e-06, + "loss": 0.3487, + "step": 2771 + }, + { + "epoch": 0.055491329479768786, + "grad_norm": 1.0927283763885498, + "learning_rate": 9.982978778234555e-06, + "loss": 0.3527, + "step": 2772 + }, + { + "epoch": 0.05551134799689713, + "grad_norm": 1.0406335592269897, + "learning_rate": 9.982952041040098e-06, + "loss": 0.2847, + "step": 2773 + }, + { + "epoch": 0.055531366514025474, + "grad_norm": 1.022986888885498, + "learning_rate": 9.98292528289838e-06, + "loss": 0.3021, + "step": 2774 + }, + { + "epoch": 0.05555138503115382, + "grad_norm": 1.0745693445205688, + "learning_rate": 9.982898503809519e-06, + "loss": 0.3422, + "step": 2775 + }, + { + "epoch": 0.05557140354828216, + "grad_norm": 1.0986601114273071, + "learning_rate": 9.982871703773624e-06, + "loss": 0.339, + "step": 2776 + }, + { + "epoch": 0.055591422065410505, + "grad_norm": 1.103365182876587, + "learning_rate": 9.982844882790808e-06, + "loss": 0.3634, + "step": 2777 + }, + { + "epoch": 0.05561144058253885, + "grad_norm": 1.2008719444274902, + "learning_rate": 9.982818040861184e-06, + "loss": 0.3572, + "step": 2778 + }, + { + "epoch": 0.05563145909966719, + "grad_norm": 1.1005200147628784, + "learning_rate": 9.982791177984867e-06, + "loss": 0.3269, + "step": 2779 + }, + { + "epoch": 0.055651477616795536, + "grad_norm": 1.1233447790145874, + "learning_rate": 9.982764294161968e-06, + "loss": 0.3068, + "step": 2780 + }, + { + "epoch": 0.05567149613392388, + "grad_norm": 1.758442759513855, + "learning_rate": 9.982737389392599e-06, + "loss": 0.8795, + "step": 2781 + }, + { + "epoch": 0.055691514651052224, + "grad_norm": 1.1406465768814087, + "learning_rate": 9.982710463676875e-06, + "loss": 0.3879, + "step": 2782 + }, + { + "epoch": 0.05571153316818057, + "grad_norm": 1.049817681312561, + "learning_rate": 9.982683517014908e-06, + "loss": 0.3624, + "step": 2783 + }, + { + "epoch": 0.05573155168530891, + "grad_norm": 1.0983257293701172, + "learning_rate": 9.982656549406813e-06, + "loss": 0.396, + "step": 2784 + }, + { + "epoch": 0.055751570202437255, + "grad_norm": 1.9405479431152344, + "learning_rate": 9.982629560852701e-06, + "loss": 0.8678, + "step": 2785 + }, + { + "epoch": 0.0557715887195656, + "grad_norm": 1.186302661895752, + "learning_rate": 9.982602551352687e-06, + "loss": 0.3116, + "step": 2786 + }, + { + "epoch": 0.05579160723669394, + "grad_norm": 1.2606810331344604, + "learning_rate": 9.982575520906885e-06, + "loss": 0.3641, + "step": 2787 + }, + { + "epoch": 0.055811625753822286, + "grad_norm": 1.2187261581420898, + "learning_rate": 9.982548469515407e-06, + "loss": 0.3648, + "step": 2788 + }, + { + "epoch": 0.05583164427095063, + "grad_norm": 1.074079155921936, + "learning_rate": 9.982521397178367e-06, + "loss": 0.2578, + "step": 2789 + }, + { + "epoch": 0.05585166278807897, + "grad_norm": 1.8481009006500244, + "learning_rate": 9.98249430389588e-06, + "loss": 0.856, + "step": 2790 + }, + { + "epoch": 0.05587168130520732, + "grad_norm": 1.2187868356704712, + "learning_rate": 9.982467189668059e-06, + "loss": 0.3214, + "step": 2791 + }, + { + "epoch": 0.05589169982233566, + "grad_norm": 1.2738778591156006, + "learning_rate": 9.982440054495017e-06, + "loss": 0.3247, + "step": 2792 + }, + { + "epoch": 0.055911718339464005, + "grad_norm": 0.9899984002113342, + "learning_rate": 9.982412898376872e-06, + "loss": 0.3155, + "step": 2793 + }, + { + "epoch": 0.05593173685659235, + "grad_norm": 1.8194667100906372, + "learning_rate": 9.982385721313734e-06, + "loss": 0.8701, + "step": 2794 + }, + { + "epoch": 0.05595175537372069, + "grad_norm": 1.0361899137496948, + "learning_rate": 9.982358523305718e-06, + "loss": 0.3001, + "step": 2795 + }, + { + "epoch": 0.055971773890849036, + "grad_norm": 1.0842434167861938, + "learning_rate": 9.982331304352938e-06, + "loss": 0.3272, + "step": 2796 + }, + { + "epoch": 0.05599179240797738, + "grad_norm": 1.8334909677505493, + "learning_rate": 9.982304064455511e-06, + "loss": 0.8402, + "step": 2797 + }, + { + "epoch": 0.05601181092510572, + "grad_norm": 1.0583014488220215, + "learning_rate": 9.98227680361355e-06, + "loss": 0.3676, + "step": 2798 + }, + { + "epoch": 0.05603182944223407, + "grad_norm": 1.1670310497283936, + "learning_rate": 9.98224952182717e-06, + "loss": 0.3642, + "step": 2799 + }, + { + "epoch": 0.05605184795936241, + "grad_norm": 0.9678025245666504, + "learning_rate": 9.982222219096484e-06, + "loss": 0.3326, + "step": 2800 + }, + { + "epoch": 0.056071866476490755, + "grad_norm": 1.0766966342926025, + "learning_rate": 9.982194895421608e-06, + "loss": 0.3943, + "step": 2801 + }, + { + "epoch": 0.0560918849936191, + "grad_norm": 1.1758393049240112, + "learning_rate": 9.982167550802658e-06, + "loss": 0.3114, + "step": 2802 + }, + { + "epoch": 0.05611190351074744, + "grad_norm": 1.752100944519043, + "learning_rate": 9.982140185239746e-06, + "loss": 0.8115, + "step": 2803 + }, + { + "epoch": 0.056131922027875786, + "grad_norm": 1.0338770151138306, + "learning_rate": 9.98211279873299e-06, + "loss": 0.3359, + "step": 2804 + }, + { + "epoch": 0.05615194054500413, + "grad_norm": 1.0549848079681396, + "learning_rate": 9.982085391282504e-06, + "loss": 0.367, + "step": 2805 + }, + { + "epoch": 0.05617195906213247, + "grad_norm": 1.1154850721359253, + "learning_rate": 9.982057962888402e-06, + "loss": 0.2792, + "step": 2806 + }, + { + "epoch": 0.05619197757926082, + "grad_norm": 1.029963493347168, + "learning_rate": 9.982030513550803e-06, + "loss": 0.3093, + "step": 2807 + }, + { + "epoch": 0.05621199609638916, + "grad_norm": 1.0795496702194214, + "learning_rate": 9.982003043269818e-06, + "loss": 0.3512, + "step": 2808 + }, + { + "epoch": 0.056232014613517504, + "grad_norm": 1.1395608186721802, + "learning_rate": 9.981975552045563e-06, + "loss": 0.3074, + "step": 2809 + }, + { + "epoch": 0.05625203313064585, + "grad_norm": 1.0932399034500122, + "learning_rate": 9.981948039878158e-06, + "loss": 0.3077, + "step": 2810 + }, + { + "epoch": 0.05627205164777419, + "grad_norm": 1.1656169891357422, + "learning_rate": 9.981920506767715e-06, + "loss": 0.4022, + "step": 2811 + }, + { + "epoch": 0.056292070164902536, + "grad_norm": 1.188760757446289, + "learning_rate": 9.98189295271435e-06, + "loss": 0.3067, + "step": 2812 + }, + { + "epoch": 0.05631208868203088, + "grad_norm": 1.1125824451446533, + "learning_rate": 9.981865377718177e-06, + "loss": 0.3216, + "step": 2813 + }, + { + "epoch": 0.05633210719915922, + "grad_norm": 1.1738746166229248, + "learning_rate": 9.981837781779316e-06, + "loss": 0.3233, + "step": 2814 + }, + { + "epoch": 0.05635212571628757, + "grad_norm": 1.859043002128601, + "learning_rate": 9.981810164897881e-06, + "loss": 0.9515, + "step": 2815 + }, + { + "epoch": 0.05637214423341591, + "grad_norm": 1.1372193098068237, + "learning_rate": 9.981782527073987e-06, + "loss": 0.2842, + "step": 2816 + }, + { + "epoch": 0.056392162750544254, + "grad_norm": 1.1621432304382324, + "learning_rate": 9.981754868307754e-06, + "loss": 0.3302, + "step": 2817 + }, + { + "epoch": 0.0564121812676726, + "grad_norm": 1.0231481790542603, + "learning_rate": 9.981727188599292e-06, + "loss": 0.3371, + "step": 2818 + }, + { + "epoch": 0.05643219978480094, + "grad_norm": 1.2305047512054443, + "learning_rate": 9.981699487948724e-06, + "loss": 0.3183, + "step": 2819 + }, + { + "epoch": 0.056452218301929286, + "grad_norm": 1.0080205202102661, + "learning_rate": 9.981671766356163e-06, + "loss": 0.2982, + "step": 2820 + }, + { + "epoch": 0.05647223681905763, + "grad_norm": 1.146132230758667, + "learning_rate": 9.981644023821724e-06, + "loss": 0.3787, + "step": 2821 + }, + { + "epoch": 0.05649225533618597, + "grad_norm": 1.2894717454910278, + "learning_rate": 9.981616260345527e-06, + "loss": 0.3559, + "step": 2822 + }, + { + "epoch": 0.05651227385331432, + "grad_norm": 1.2639812231063843, + "learning_rate": 9.981588475927688e-06, + "loss": 0.4268, + "step": 2823 + }, + { + "epoch": 0.05653229237044266, + "grad_norm": 1.102890133857727, + "learning_rate": 9.981560670568322e-06, + "loss": 0.3534, + "step": 2824 + }, + { + "epoch": 0.056552310887571004, + "grad_norm": 1.1059447526931763, + "learning_rate": 9.981532844267546e-06, + "loss": 0.2944, + "step": 2825 + }, + { + "epoch": 0.05657232940469935, + "grad_norm": 1.0895012617111206, + "learning_rate": 9.981504997025479e-06, + "loss": 0.3306, + "step": 2826 + }, + { + "epoch": 0.05659234792182769, + "grad_norm": 1.0166003704071045, + "learning_rate": 9.981477128842239e-06, + "loss": 0.348, + "step": 2827 + }, + { + "epoch": 0.056612366438956035, + "grad_norm": 1.1034355163574219, + "learning_rate": 9.981449239717937e-06, + "loss": 0.3392, + "step": 2828 + }, + { + "epoch": 0.05663238495608438, + "grad_norm": 1.0399361848831177, + "learning_rate": 9.981421329652699e-06, + "loss": 0.3073, + "step": 2829 + }, + { + "epoch": 0.05665240347321272, + "grad_norm": 1.0722737312316895, + "learning_rate": 9.981393398646634e-06, + "loss": 0.3782, + "step": 2830 + }, + { + "epoch": 0.05667242199034107, + "grad_norm": 1.1278220415115356, + "learning_rate": 9.981365446699864e-06, + "loss": 0.3299, + "step": 2831 + }, + { + "epoch": 0.05669244050746941, + "grad_norm": 1.0993874073028564, + "learning_rate": 9.981337473812507e-06, + "loss": 0.3337, + "step": 2832 + }, + { + "epoch": 0.056712459024597754, + "grad_norm": 1.2451269626617432, + "learning_rate": 9.98130947998468e-06, + "loss": 0.3309, + "step": 2833 + }, + { + "epoch": 0.0567324775417261, + "grad_norm": 1.1188284158706665, + "learning_rate": 9.981281465216497e-06, + "loss": 0.3166, + "step": 2834 + }, + { + "epoch": 0.05675249605885444, + "grad_norm": 1.9429106712341309, + "learning_rate": 9.981253429508082e-06, + "loss": 0.8523, + "step": 2835 + }, + { + "epoch": 0.056772514575982785, + "grad_norm": 1.0778486728668213, + "learning_rate": 9.981225372859547e-06, + "loss": 0.3773, + "step": 2836 + }, + { + "epoch": 0.05679253309311113, + "grad_norm": 1.1373918056488037, + "learning_rate": 9.981197295271012e-06, + "loss": 0.348, + "step": 2837 + }, + { + "epoch": 0.05681255161023947, + "grad_norm": 1.129986047744751, + "learning_rate": 9.981169196742598e-06, + "loss": 0.3453, + "step": 2838 + }, + { + "epoch": 0.05683257012736782, + "grad_norm": 0.9908447861671448, + "learning_rate": 9.981141077274418e-06, + "loss": 0.2813, + "step": 2839 + }, + { + "epoch": 0.05685258864449616, + "grad_norm": 1.089407205581665, + "learning_rate": 9.981112936866595e-06, + "loss": 0.3224, + "step": 2840 + }, + { + "epoch": 0.056872607161624504, + "grad_norm": 1.1684730052947998, + "learning_rate": 9.981084775519246e-06, + "loss": 0.3704, + "step": 2841 + }, + { + "epoch": 0.05689262567875285, + "grad_norm": 1.1700712442398071, + "learning_rate": 9.981056593232486e-06, + "loss": 0.3118, + "step": 2842 + }, + { + "epoch": 0.05691264419588119, + "grad_norm": 1.1522855758666992, + "learning_rate": 9.981028390006437e-06, + "loss": 0.3162, + "step": 2843 + }, + { + "epoch": 0.056932662713009535, + "grad_norm": 1.003163456916809, + "learning_rate": 9.981000165841218e-06, + "loss": 0.3007, + "step": 2844 + }, + { + "epoch": 0.05695268123013788, + "grad_norm": 1.0153565406799316, + "learning_rate": 9.980971920736944e-06, + "loss": 0.3362, + "step": 2845 + }, + { + "epoch": 0.05697269974726622, + "grad_norm": 1.2046868801116943, + "learning_rate": 9.980943654693736e-06, + "loss": 0.3177, + "step": 2846 + }, + { + "epoch": 0.056992718264394567, + "grad_norm": 1.3628720045089722, + "learning_rate": 9.980915367711714e-06, + "loss": 0.3125, + "step": 2847 + }, + { + "epoch": 0.05701273678152291, + "grad_norm": 1.1550989151000977, + "learning_rate": 9.980887059790996e-06, + "loss": 0.3492, + "step": 2848 + }, + { + "epoch": 0.057032755298651254, + "grad_norm": 1.731429100036621, + "learning_rate": 9.9808587309317e-06, + "loss": 0.3447, + "step": 2849 + }, + { + "epoch": 0.0570527738157796, + "grad_norm": 0.9265353679656982, + "learning_rate": 9.980830381133945e-06, + "loss": 0.2885, + "step": 2850 + }, + { + "epoch": 0.05707279233290794, + "grad_norm": 1.2225332260131836, + "learning_rate": 9.980802010397853e-06, + "loss": 0.3638, + "step": 2851 + }, + { + "epoch": 0.057092810850036285, + "grad_norm": 1.1198986768722534, + "learning_rate": 9.98077361872354e-06, + "loss": 0.3216, + "step": 2852 + }, + { + "epoch": 0.05711282936716463, + "grad_norm": 1.076850414276123, + "learning_rate": 9.980745206111126e-06, + "loss": 0.3188, + "step": 2853 + }, + { + "epoch": 0.05713284788429297, + "grad_norm": 1.1311007738113403, + "learning_rate": 9.980716772560733e-06, + "loss": 0.3301, + "step": 2854 + }, + { + "epoch": 0.057152866401421316, + "grad_norm": 1.1921943426132202, + "learning_rate": 9.980688318072476e-06, + "loss": 0.2943, + "step": 2855 + }, + { + "epoch": 0.05717288491854966, + "grad_norm": 1.012850046157837, + "learning_rate": 9.980659842646479e-06, + "loss": 0.3126, + "step": 2856 + }, + { + "epoch": 0.057192903435678004, + "grad_norm": 1.1089595556259155, + "learning_rate": 9.980631346282858e-06, + "loss": 0.3453, + "step": 2857 + }, + { + "epoch": 0.05721292195280635, + "grad_norm": 1.861614465713501, + "learning_rate": 9.980602828981736e-06, + "loss": 0.8777, + "step": 2858 + }, + { + "epoch": 0.05723294046993469, + "grad_norm": 1.218097448348999, + "learning_rate": 9.980574290743232e-06, + "loss": 0.3756, + "step": 2859 + }, + { + "epoch": 0.057252958987063035, + "grad_norm": 1.0892572402954102, + "learning_rate": 9.980545731567464e-06, + "loss": 0.3505, + "step": 2860 + }, + { + "epoch": 0.05727297750419138, + "grad_norm": 1.059362769126892, + "learning_rate": 9.980517151454555e-06, + "loss": 0.345, + "step": 2861 + }, + { + "epoch": 0.05729299602131972, + "grad_norm": 1.171406865119934, + "learning_rate": 9.980488550404624e-06, + "loss": 0.3573, + "step": 2862 + }, + { + "epoch": 0.057313014538448066, + "grad_norm": 1.115626335144043, + "learning_rate": 9.980459928417789e-06, + "loss": 0.3529, + "step": 2863 + }, + { + "epoch": 0.05733303305557641, + "grad_norm": 1.0599946975708008, + "learning_rate": 9.980431285494174e-06, + "loss": 0.3431, + "step": 2864 + }, + { + "epoch": 0.057353051572704754, + "grad_norm": 1.192286491394043, + "learning_rate": 9.980402621633896e-06, + "loss": 0.3244, + "step": 2865 + }, + { + "epoch": 0.0573730700898331, + "grad_norm": 1.0249625444412231, + "learning_rate": 9.980373936837079e-06, + "loss": 0.346, + "step": 2866 + }, + { + "epoch": 0.05739308860696144, + "grad_norm": 2.164804697036743, + "learning_rate": 9.98034523110384e-06, + "loss": 0.8649, + "step": 2867 + }, + { + "epoch": 0.057413107124089785, + "grad_norm": 1.1302603483200073, + "learning_rate": 9.980316504434304e-06, + "loss": 0.3783, + "step": 2868 + }, + { + "epoch": 0.05743312564121813, + "grad_norm": 1.106321930885315, + "learning_rate": 9.980287756828588e-06, + "loss": 0.3507, + "step": 2869 + }, + { + "epoch": 0.05745314415834647, + "grad_norm": 1.2066855430603027, + "learning_rate": 9.980258988286813e-06, + "loss": 0.354, + "step": 2870 + }, + { + "epoch": 0.057473162675474816, + "grad_norm": 1.2082695960998535, + "learning_rate": 9.980230198809102e-06, + "loss": 0.3691, + "step": 2871 + }, + { + "epoch": 0.05749318119260316, + "grad_norm": 1.2086701393127441, + "learning_rate": 9.980201388395575e-06, + "loss": 0.3733, + "step": 2872 + }, + { + "epoch": 0.057513199709731504, + "grad_norm": 1.1667283773422241, + "learning_rate": 9.980172557046352e-06, + "loss": 0.3386, + "step": 2873 + }, + { + "epoch": 0.05753321822685985, + "grad_norm": 1.9683513641357422, + "learning_rate": 9.980143704761555e-06, + "loss": 0.8322, + "step": 2874 + }, + { + "epoch": 0.05755323674398819, + "grad_norm": 1.1667633056640625, + "learning_rate": 9.980114831541307e-06, + "loss": 0.3306, + "step": 2875 + }, + { + "epoch": 0.057573255261116535, + "grad_norm": 1.0293995141983032, + "learning_rate": 9.980085937385728e-06, + "loss": 0.3407, + "step": 2876 + }, + { + "epoch": 0.05759327377824488, + "grad_norm": 1.0535123348236084, + "learning_rate": 9.98005702229494e-06, + "loss": 0.295, + "step": 2877 + }, + { + "epoch": 0.05761329229537322, + "grad_norm": 1.1200186014175415, + "learning_rate": 9.980028086269063e-06, + "loss": 0.3499, + "step": 2878 + }, + { + "epoch": 0.057633310812501566, + "grad_norm": 1.066319227218628, + "learning_rate": 9.979999129308219e-06, + "loss": 0.3176, + "step": 2879 + }, + { + "epoch": 0.05765332932962991, + "grad_norm": 1.9040563106536865, + "learning_rate": 9.979970151412532e-06, + "loss": 0.9357, + "step": 2880 + }, + { + "epoch": 0.057673347846758254, + "grad_norm": 1.1438212394714355, + "learning_rate": 9.979941152582121e-06, + "loss": 0.3342, + "step": 2881 + }, + { + "epoch": 0.0576933663638866, + "grad_norm": 1.845268964767456, + "learning_rate": 9.97991213281711e-06, + "loss": 0.8742, + "step": 2882 + }, + { + "epoch": 0.05771338488101494, + "grad_norm": 1.2743475437164307, + "learning_rate": 9.979883092117619e-06, + "loss": 0.3343, + "step": 2883 + }, + { + "epoch": 0.057733403398143285, + "grad_norm": 0.9206478595733643, + "learning_rate": 9.97985403048377e-06, + "loss": 0.3268, + "step": 2884 + }, + { + "epoch": 0.05775342191527163, + "grad_norm": 1.081490159034729, + "learning_rate": 9.979824947915689e-06, + "loss": 0.3423, + "step": 2885 + }, + { + "epoch": 0.05777344043239997, + "grad_norm": 1.0142256021499634, + "learning_rate": 9.979795844413496e-06, + "loss": 0.2913, + "step": 2886 + }, + { + "epoch": 0.057793458949528316, + "grad_norm": 1.114656686782837, + "learning_rate": 9.979766719977312e-06, + "loss": 0.3402, + "step": 2887 + }, + { + "epoch": 0.05781347746665666, + "grad_norm": 1.1370785236358643, + "learning_rate": 9.979737574607258e-06, + "loss": 0.3336, + "step": 2888 + }, + { + "epoch": 0.057833495983785004, + "grad_norm": 1.1398563385009766, + "learning_rate": 9.979708408303461e-06, + "loss": 0.3698, + "step": 2889 + }, + { + "epoch": 0.05785351450091335, + "grad_norm": 1.101674199104309, + "learning_rate": 9.979679221066041e-06, + "loss": 0.3413, + "step": 2890 + }, + { + "epoch": 0.05787353301804169, + "grad_norm": 1.0917261838912964, + "learning_rate": 9.979650012895121e-06, + "loss": 0.3355, + "step": 2891 + }, + { + "epoch": 0.057893551535170035, + "grad_norm": 1.0508161783218384, + "learning_rate": 9.979620783790826e-06, + "loss": 0.3163, + "step": 2892 + }, + { + "epoch": 0.05791357005229838, + "grad_norm": 1.1455236673355103, + "learning_rate": 9.979591533753274e-06, + "loss": 0.3364, + "step": 2893 + }, + { + "epoch": 0.05793358856942672, + "grad_norm": 1.170191764831543, + "learning_rate": 9.979562262782593e-06, + "loss": 0.3684, + "step": 2894 + }, + { + "epoch": 0.057953607086555066, + "grad_norm": 1.377484917640686, + "learning_rate": 9.979532970878902e-06, + "loss": 0.3626, + "step": 2895 + }, + { + "epoch": 0.05797362560368341, + "grad_norm": 1.0975406169891357, + "learning_rate": 9.979503658042327e-06, + "loss": 0.3064, + "step": 2896 + }, + { + "epoch": 0.05799364412081175, + "grad_norm": 1.1536184549331665, + "learning_rate": 9.979474324272991e-06, + "loss": 0.3658, + "step": 2897 + }, + { + "epoch": 0.0580136626379401, + "grad_norm": 1.1402798891067505, + "learning_rate": 9.979444969571015e-06, + "loss": 0.3433, + "step": 2898 + }, + { + "epoch": 0.05803368115506844, + "grad_norm": 1.0742930173873901, + "learning_rate": 9.979415593936525e-06, + "loss": 0.3437, + "step": 2899 + }, + { + "epoch": 0.058053699672196785, + "grad_norm": 1.2377701997756958, + "learning_rate": 9.979386197369642e-06, + "loss": 0.3739, + "step": 2900 + }, + { + "epoch": 0.05807371818932513, + "grad_norm": 1.0746464729309082, + "learning_rate": 9.979356779870493e-06, + "loss": 0.3166, + "step": 2901 + }, + { + "epoch": 0.05809373670645347, + "grad_norm": 1.1550605297088623, + "learning_rate": 9.9793273414392e-06, + "loss": 0.3308, + "step": 2902 + }, + { + "epoch": 0.058113755223581816, + "grad_norm": 1.1278862953186035, + "learning_rate": 9.979297882075884e-06, + "loss": 0.2452, + "step": 2903 + }, + { + "epoch": 0.05813377374071016, + "grad_norm": 1.2116376161575317, + "learning_rate": 9.979268401780673e-06, + "loss": 0.3246, + "step": 2904 + }, + { + "epoch": 0.0581537922578385, + "grad_norm": 1.0500940084457397, + "learning_rate": 9.979238900553688e-06, + "loss": 0.3494, + "step": 2905 + }, + { + "epoch": 0.05817381077496685, + "grad_norm": 1.0930711030960083, + "learning_rate": 9.979209378395055e-06, + "loss": 0.3015, + "step": 2906 + }, + { + "epoch": 0.05819382929209519, + "grad_norm": 1.3285727500915527, + "learning_rate": 9.979179835304897e-06, + "loss": 0.3419, + "step": 2907 + }, + { + "epoch": 0.058213847809223535, + "grad_norm": 0.9927680492401123, + "learning_rate": 9.979150271283339e-06, + "loss": 0.2983, + "step": 2908 + }, + { + "epoch": 0.05823386632635188, + "grad_norm": 1.14529550075531, + "learning_rate": 9.979120686330504e-06, + "loss": 0.3501, + "step": 2909 + }, + { + "epoch": 0.05825388484348022, + "grad_norm": 0.9200878739356995, + "learning_rate": 9.979091080446518e-06, + "loss": 0.3021, + "step": 2910 + }, + { + "epoch": 0.058273903360608566, + "grad_norm": 1.2099276781082153, + "learning_rate": 9.979061453631504e-06, + "loss": 0.3456, + "step": 2911 + }, + { + "epoch": 0.05829392187773691, + "grad_norm": 1.1256566047668457, + "learning_rate": 9.979031805885588e-06, + "loss": 0.3269, + "step": 2912 + }, + { + "epoch": 0.05831394039486525, + "grad_norm": 1.2064746618270874, + "learning_rate": 9.979002137208893e-06, + "loss": 0.3465, + "step": 2913 + }, + { + "epoch": 0.0583339589119936, + "grad_norm": 1.0296043157577515, + "learning_rate": 9.978972447601544e-06, + "loss": 0.3356, + "step": 2914 + }, + { + "epoch": 0.05835397742912194, + "grad_norm": 1.896231770515442, + "learning_rate": 9.978942737063668e-06, + "loss": 0.9172, + "step": 2915 + }, + { + "epoch": 0.058373995946250284, + "grad_norm": 1.0715471506118774, + "learning_rate": 9.978913005595387e-06, + "loss": 0.3378, + "step": 2916 + }, + { + "epoch": 0.05839401446337863, + "grad_norm": 1.0654600858688354, + "learning_rate": 9.978883253196827e-06, + "loss": 0.3176, + "step": 2917 + }, + { + "epoch": 0.05841403298050697, + "grad_norm": 1.0645768642425537, + "learning_rate": 9.978853479868114e-06, + "loss": 0.3116, + "step": 2918 + }, + { + "epoch": 0.058434051497635316, + "grad_norm": 1.158400535583496, + "learning_rate": 9.978823685609372e-06, + "loss": 0.3452, + "step": 2919 + }, + { + "epoch": 0.05845407001476366, + "grad_norm": 1.787590503692627, + "learning_rate": 9.978793870420727e-06, + "loss": 0.9626, + "step": 2920 + }, + { + "epoch": 0.058474088531892, + "grad_norm": 1.148016333580017, + "learning_rate": 9.978764034302303e-06, + "loss": 0.3313, + "step": 2921 + }, + { + "epoch": 0.05849410704902035, + "grad_norm": 1.1518537998199463, + "learning_rate": 9.978734177254227e-06, + "loss": 0.3291, + "step": 2922 + }, + { + "epoch": 0.05851412556614869, + "grad_norm": 1.0652616024017334, + "learning_rate": 9.978704299276624e-06, + "loss": 0.3433, + "step": 2923 + }, + { + "epoch": 0.058534144083277034, + "grad_norm": 1.0575156211853027, + "learning_rate": 9.97867440036962e-06, + "loss": 0.3653, + "step": 2924 + }, + { + "epoch": 0.05855416260040538, + "grad_norm": 1.045267939567566, + "learning_rate": 9.97864448053334e-06, + "loss": 0.3491, + "step": 2925 + }, + { + "epoch": 0.05857418111753372, + "grad_norm": 1.0313396453857422, + "learning_rate": 9.97861453976791e-06, + "loss": 0.3235, + "step": 2926 + }, + { + "epoch": 0.058594199634662066, + "grad_norm": 1.7427642345428467, + "learning_rate": 9.978584578073457e-06, + "loss": 0.8727, + "step": 2927 + }, + { + "epoch": 0.05861421815179041, + "grad_norm": 1.0742568969726562, + "learning_rate": 9.978554595450103e-06, + "loss": 0.3477, + "step": 2928 + }, + { + "epoch": 0.05863423666891875, + "grad_norm": 1.03793203830719, + "learning_rate": 9.978524591897979e-06, + "loss": 0.301, + "step": 2929 + }, + { + "epoch": 0.0586542551860471, + "grad_norm": 1.1422395706176758, + "learning_rate": 9.97849456741721e-06, + "loss": 0.375, + "step": 2930 + }, + { + "epoch": 0.05867427370317544, + "grad_norm": 1.1430498361587524, + "learning_rate": 9.978464522007917e-06, + "loss": 0.3754, + "step": 2931 + }, + { + "epoch": 0.058694292220303784, + "grad_norm": 1.2256768941879272, + "learning_rate": 9.978434455670234e-06, + "loss": 0.3165, + "step": 2932 + }, + { + "epoch": 0.05871431073743213, + "grad_norm": 1.0458096265792847, + "learning_rate": 9.978404368404284e-06, + "loss": 0.3086, + "step": 2933 + }, + { + "epoch": 0.05873432925456047, + "grad_norm": 1.1166177988052368, + "learning_rate": 9.978374260210193e-06, + "loss": 0.363, + "step": 2934 + }, + { + "epoch": 0.058754347771688815, + "grad_norm": 1.2253096103668213, + "learning_rate": 9.978344131088087e-06, + "loss": 0.3412, + "step": 2935 + }, + { + "epoch": 0.05877436628881716, + "grad_norm": 1.0546482801437378, + "learning_rate": 9.978313981038094e-06, + "loss": 0.3491, + "step": 2936 + }, + { + "epoch": 0.0587943848059455, + "grad_norm": 1.0292261838912964, + "learning_rate": 9.978283810060341e-06, + "loss": 0.3405, + "step": 2937 + }, + { + "epoch": 0.05881440332307385, + "grad_norm": 1.1666241884231567, + "learning_rate": 9.978253618154954e-06, + "loss": 0.3208, + "step": 2938 + }, + { + "epoch": 0.05883442184020219, + "grad_norm": 1.065185308456421, + "learning_rate": 9.97822340532206e-06, + "loss": 0.3332, + "step": 2939 + }, + { + "epoch": 0.058854440357330534, + "grad_norm": 1.0134800672531128, + "learning_rate": 9.978193171561786e-06, + "loss": 0.3046, + "step": 2940 + }, + { + "epoch": 0.05887445887445888, + "grad_norm": 1.1103891134262085, + "learning_rate": 9.97816291687426e-06, + "loss": 0.3538, + "step": 2941 + }, + { + "epoch": 0.05889447739158722, + "grad_norm": 1.1726977825164795, + "learning_rate": 9.978132641259607e-06, + "loss": 0.3558, + "step": 2942 + }, + { + "epoch": 0.058914495908715565, + "grad_norm": 1.1863741874694824, + "learning_rate": 9.978102344717956e-06, + "loss": 0.3509, + "step": 2943 + }, + { + "epoch": 0.0589345144258439, + "grad_norm": 1.8066883087158203, + "learning_rate": 9.978072027249435e-06, + "loss": 0.8164, + "step": 2944 + }, + { + "epoch": 0.058954532942972246, + "grad_norm": 1.0867273807525635, + "learning_rate": 9.97804168885417e-06, + "loss": 0.3455, + "step": 2945 + }, + { + "epoch": 0.05897455146010059, + "grad_norm": 1.9298561811447144, + "learning_rate": 9.978011329532288e-06, + "loss": 0.8795, + "step": 2946 + }, + { + "epoch": 0.05899456997722893, + "grad_norm": 1.461113452911377, + "learning_rate": 9.977980949283919e-06, + "loss": 0.3517, + "step": 2947 + }, + { + "epoch": 0.05901458849435728, + "grad_norm": 1.101182460784912, + "learning_rate": 9.977950548109189e-06, + "loss": 0.3639, + "step": 2948 + }, + { + "epoch": 0.05903460701148562, + "grad_norm": 1.158292531967163, + "learning_rate": 9.977920126008227e-06, + "loss": 0.3661, + "step": 2949 + }, + { + "epoch": 0.059054625528613965, + "grad_norm": 1.2735042572021484, + "learning_rate": 9.97788968298116e-06, + "loss": 0.3681, + "step": 2950 + }, + { + "epoch": 0.05907464404574231, + "grad_norm": 1.1058733463287354, + "learning_rate": 9.977859219028115e-06, + "loss": 0.2988, + "step": 2951 + }, + { + "epoch": 0.05909466256287065, + "grad_norm": 1.125184416770935, + "learning_rate": 9.97782873414922e-06, + "loss": 0.3622, + "step": 2952 + }, + { + "epoch": 0.059114681079998996, + "grad_norm": 1.0981866121292114, + "learning_rate": 9.977798228344606e-06, + "loss": 0.3079, + "step": 2953 + }, + { + "epoch": 0.05913469959712734, + "grad_norm": 0.9805325269699097, + "learning_rate": 9.977767701614398e-06, + "loss": 0.3542, + "step": 2954 + }, + { + "epoch": 0.05915471811425568, + "grad_norm": 1.0061376094818115, + "learning_rate": 9.977737153958727e-06, + "loss": 0.3329, + "step": 2955 + }, + { + "epoch": 0.05917473663138403, + "grad_norm": 1.1635322570800781, + "learning_rate": 9.97770658537772e-06, + "loss": 0.4361, + "step": 2956 + }, + { + "epoch": 0.05919475514851237, + "grad_norm": 1.2631189823150635, + "learning_rate": 9.977675995871506e-06, + "loss": 0.3585, + "step": 2957 + }, + { + "epoch": 0.059214773665640714, + "grad_norm": 1.0511317253112793, + "learning_rate": 9.977645385440214e-06, + "loss": 0.3145, + "step": 2958 + }, + { + "epoch": 0.05923479218276906, + "grad_norm": 1.798878788948059, + "learning_rate": 9.97761475408397e-06, + "loss": 0.2769, + "step": 2959 + }, + { + "epoch": 0.0592548106998974, + "grad_norm": 1.3178894519805908, + "learning_rate": 9.977584101802907e-06, + "loss": 0.3668, + "step": 2960 + }, + { + "epoch": 0.059274829217025746, + "grad_norm": 1.0931627750396729, + "learning_rate": 9.97755342859715e-06, + "loss": 0.3538, + "step": 2961 + }, + { + "epoch": 0.05929484773415409, + "grad_norm": 1.0614244937896729, + "learning_rate": 9.97752273446683e-06, + "loss": 0.3417, + "step": 2962 + }, + { + "epoch": 0.05931486625128243, + "grad_norm": 1.8097690343856812, + "learning_rate": 9.977492019412076e-06, + "loss": 0.9246, + "step": 2963 + }, + { + "epoch": 0.05933488476841078, + "grad_norm": 1.2077616453170776, + "learning_rate": 9.977461283433017e-06, + "loss": 0.3408, + "step": 2964 + }, + { + "epoch": 0.05935490328553912, + "grad_norm": 1.1889795064926147, + "learning_rate": 9.97743052652978e-06, + "loss": 0.3685, + "step": 2965 + }, + { + "epoch": 0.059374921802667464, + "grad_norm": 1.0264613628387451, + "learning_rate": 9.977399748702498e-06, + "loss": 0.2785, + "step": 2966 + }, + { + "epoch": 0.05939494031979581, + "grad_norm": 1.7499700784683228, + "learning_rate": 9.977368949951299e-06, + "loss": 0.811, + "step": 2967 + }, + { + "epoch": 0.05941495883692415, + "grad_norm": 1.0797419548034668, + "learning_rate": 9.97733813027631e-06, + "loss": 0.3881, + "step": 2968 + }, + { + "epoch": 0.059434977354052496, + "grad_norm": 1.3869818449020386, + "learning_rate": 9.977307289677664e-06, + "loss": 0.3566, + "step": 2969 + }, + { + "epoch": 0.05945499587118084, + "grad_norm": 1.9466567039489746, + "learning_rate": 9.97727642815549e-06, + "loss": 0.7908, + "step": 2970 + }, + { + "epoch": 0.05947501438830918, + "grad_norm": 1.1949458122253418, + "learning_rate": 9.977245545709916e-06, + "loss": 0.3868, + "step": 2971 + }, + { + "epoch": 0.05949503290543753, + "grad_norm": 1.1117379665374756, + "learning_rate": 9.977214642341073e-06, + "loss": 0.3415, + "step": 2972 + }, + { + "epoch": 0.05951505142256587, + "grad_norm": 1.1462364196777344, + "learning_rate": 9.977183718049091e-06, + "loss": 0.3457, + "step": 2973 + }, + { + "epoch": 0.059535069939694214, + "grad_norm": 1.1201046705245972, + "learning_rate": 9.977152772834098e-06, + "loss": 0.3411, + "step": 2974 + }, + { + "epoch": 0.05955508845682256, + "grad_norm": 1.2163636684417725, + "learning_rate": 9.977121806696227e-06, + "loss": 0.3893, + "step": 2975 + }, + { + "epoch": 0.0595751069739509, + "grad_norm": 1.0444387197494507, + "learning_rate": 9.977090819635608e-06, + "loss": 0.3342, + "step": 2976 + }, + { + "epoch": 0.059595125491079246, + "grad_norm": 1.1662039756774902, + "learning_rate": 9.97705981165237e-06, + "loss": 0.3291, + "step": 2977 + }, + { + "epoch": 0.05961514400820759, + "grad_norm": 1.8134143352508545, + "learning_rate": 9.977028782746643e-06, + "loss": 0.8582, + "step": 2978 + }, + { + "epoch": 0.05963516252533593, + "grad_norm": 1.1650774478912354, + "learning_rate": 9.976997732918559e-06, + "loss": 0.3869, + "step": 2979 + }, + { + "epoch": 0.05965518104246428, + "grad_norm": 1.8038339614868164, + "learning_rate": 9.976966662168245e-06, + "loss": 0.8393, + "step": 2980 + }, + { + "epoch": 0.05967519955959262, + "grad_norm": 1.0741010904312134, + "learning_rate": 9.976935570495836e-06, + "loss": 0.3329, + "step": 2981 + }, + { + "epoch": 0.059695218076720964, + "grad_norm": 1.1138238906860352, + "learning_rate": 9.976904457901461e-06, + "loss": 0.3325, + "step": 2982 + }, + { + "epoch": 0.05971523659384931, + "grad_norm": 1.1186857223510742, + "learning_rate": 9.976873324385252e-06, + "loss": 0.3328, + "step": 2983 + }, + { + "epoch": 0.05973525511097765, + "grad_norm": 1.0881599187850952, + "learning_rate": 9.976842169947338e-06, + "loss": 0.327, + "step": 2984 + }, + { + "epoch": 0.059755273628105995, + "grad_norm": 0.9791388511657715, + "learning_rate": 9.976810994587848e-06, + "loss": 0.3007, + "step": 2985 + }, + { + "epoch": 0.05977529214523434, + "grad_norm": 1.008437991142273, + "learning_rate": 9.97677979830692e-06, + "loss": 0.3174, + "step": 2986 + }, + { + "epoch": 0.05979531066236268, + "grad_norm": 1.0366746187210083, + "learning_rate": 9.976748581104677e-06, + "loss": 0.3108, + "step": 2987 + }, + { + "epoch": 0.05981532917949103, + "grad_norm": 1.0402477979660034, + "learning_rate": 9.976717342981254e-06, + "loss": 0.2926, + "step": 2988 + }, + { + "epoch": 0.05983534769661937, + "grad_norm": 1.0841318368911743, + "learning_rate": 9.976686083936785e-06, + "loss": 0.318, + "step": 2989 + }, + { + "epoch": 0.059855366213747714, + "grad_norm": 1.0009002685546875, + "learning_rate": 9.976654803971396e-06, + "loss": 0.3098, + "step": 2990 + }, + { + "epoch": 0.05987538473087606, + "grad_norm": 1.1632750034332275, + "learning_rate": 9.976623503085224e-06, + "loss": 0.33, + "step": 2991 + }, + { + "epoch": 0.0598954032480044, + "grad_norm": 1.9725910425186157, + "learning_rate": 9.976592181278395e-06, + "loss": 0.8494, + "step": 2992 + }, + { + "epoch": 0.059915421765132745, + "grad_norm": 1.1801953315734863, + "learning_rate": 9.976560838551043e-06, + "loss": 0.3573, + "step": 2993 + }, + { + "epoch": 0.05993544028226109, + "grad_norm": 1.0849015712738037, + "learning_rate": 9.976529474903302e-06, + "loss": 0.3099, + "step": 2994 + }, + { + "epoch": 0.05995545879938943, + "grad_norm": 1.7384717464447021, + "learning_rate": 9.976498090335303e-06, + "loss": 0.8033, + "step": 2995 + }, + { + "epoch": 0.059975477316517777, + "grad_norm": 1.2624199390411377, + "learning_rate": 9.976466684847175e-06, + "loss": 0.3514, + "step": 2996 + }, + { + "epoch": 0.05999549583364612, + "grad_norm": 1.147136926651001, + "learning_rate": 9.976435258439053e-06, + "loss": 0.3638, + "step": 2997 + }, + { + "epoch": 0.060015514350774464, + "grad_norm": 1.2905652523040771, + "learning_rate": 9.97640381111107e-06, + "loss": 0.2721, + "step": 2998 + }, + { + "epoch": 0.06003553286790281, + "grad_norm": 1.1511489152908325, + "learning_rate": 9.976372342863352e-06, + "loss": 0.4021, + "step": 2999 + }, + { + "epoch": 0.06005555138503115, + "grad_norm": 1.257918119430542, + "learning_rate": 9.976340853696038e-06, + "loss": 0.3219, + "step": 3000 + }, + { + "epoch": 0.060075569902159495, + "grad_norm": 1.1556947231292725, + "learning_rate": 9.976309343609258e-06, + "loss": 0.2692, + "step": 3001 + }, + { + "epoch": 0.06009558841928784, + "grad_norm": 1.1957048177719116, + "learning_rate": 9.976277812603145e-06, + "loss": 0.3537, + "step": 3002 + }, + { + "epoch": 0.06011560693641618, + "grad_norm": 1.2751399278640747, + "learning_rate": 9.97624626067783e-06, + "loss": 0.3768, + "step": 3003 + }, + { + "epoch": 0.060135625453544526, + "grad_norm": 1.1388423442840576, + "learning_rate": 9.976214687833446e-06, + "loss": 0.3632, + "step": 3004 + }, + { + "epoch": 0.06015564397067287, + "grad_norm": 1.160815715789795, + "learning_rate": 9.976183094070127e-06, + "loss": 0.3796, + "step": 3005 + }, + { + "epoch": 0.060175662487801214, + "grad_norm": 1.018699288368225, + "learning_rate": 9.976151479388006e-06, + "loss": 0.2926, + "step": 3006 + }, + { + "epoch": 0.06019568100492956, + "grad_norm": 1.0709218978881836, + "learning_rate": 9.976119843787215e-06, + "loss": 0.3544, + "step": 3007 + }, + { + "epoch": 0.0602156995220579, + "grad_norm": 1.1632269620895386, + "learning_rate": 9.976088187267887e-06, + "loss": 0.3411, + "step": 3008 + }, + { + "epoch": 0.060235718039186245, + "grad_norm": 1.0779131650924683, + "learning_rate": 9.976056509830155e-06, + "loss": 0.3525, + "step": 3009 + }, + { + "epoch": 0.06025573655631459, + "grad_norm": 1.1335103511810303, + "learning_rate": 9.97602481147415e-06, + "loss": 0.3455, + "step": 3010 + }, + { + "epoch": 0.06027575507344293, + "grad_norm": 1.1530095338821411, + "learning_rate": 9.975993092200009e-06, + "loss": 0.3749, + "step": 3011 + }, + { + "epoch": 0.060295773590571276, + "grad_norm": 1.2236636877059937, + "learning_rate": 9.975961352007866e-06, + "loss": 0.3367, + "step": 3012 + }, + { + "epoch": 0.06031579210769962, + "grad_norm": 1.041991114616394, + "learning_rate": 9.975929590897851e-06, + "loss": 0.3544, + "step": 3013 + }, + { + "epoch": 0.060335810624827964, + "grad_norm": 1.015735387802124, + "learning_rate": 9.975897808870097e-06, + "loss": 0.3431, + "step": 3014 + }, + { + "epoch": 0.06035582914195631, + "grad_norm": 1.029818058013916, + "learning_rate": 9.975866005924742e-06, + "loss": 0.336, + "step": 3015 + }, + { + "epoch": 0.06037584765908465, + "grad_norm": 1.2089017629623413, + "learning_rate": 9.975834182061914e-06, + "loss": 0.3196, + "step": 3016 + }, + { + "epoch": 0.060395866176212995, + "grad_norm": 1.1978492736816406, + "learning_rate": 9.975802337281753e-06, + "loss": 0.3555, + "step": 3017 + }, + { + "epoch": 0.06041588469334134, + "grad_norm": 1.197115421295166, + "learning_rate": 9.975770471584389e-06, + "loss": 0.3426, + "step": 3018 + }, + { + "epoch": 0.06043590321046968, + "grad_norm": 1.1803284883499146, + "learning_rate": 9.975738584969956e-06, + "loss": 0.3315, + "step": 3019 + }, + { + "epoch": 0.060455921727598026, + "grad_norm": 1.2297037839889526, + "learning_rate": 9.975706677438589e-06, + "loss": 0.3399, + "step": 3020 + }, + { + "epoch": 0.06047594024472637, + "grad_norm": 1.108139991760254, + "learning_rate": 9.975674748990422e-06, + "loss": 0.3314, + "step": 3021 + }, + { + "epoch": 0.060495958761854714, + "grad_norm": 1.0474640130996704, + "learning_rate": 9.975642799625589e-06, + "loss": 0.3407, + "step": 3022 + }, + { + "epoch": 0.06051597727898306, + "grad_norm": 1.0748794078826904, + "learning_rate": 9.975610829344225e-06, + "loss": 0.3447, + "step": 3023 + }, + { + "epoch": 0.0605359957961114, + "grad_norm": 1.1247010231018066, + "learning_rate": 9.975578838146463e-06, + "loss": 0.328, + "step": 3024 + }, + { + "epoch": 0.060556014313239745, + "grad_norm": 1.2098904848098755, + "learning_rate": 9.97554682603244e-06, + "loss": 0.3427, + "step": 3025 + }, + { + "epoch": 0.06057603283036809, + "grad_norm": 0.9816367626190186, + "learning_rate": 9.975514793002287e-06, + "loss": 0.2543, + "step": 3026 + }, + { + "epoch": 0.06059605134749643, + "grad_norm": 1.8695212602615356, + "learning_rate": 9.975482739056142e-06, + "loss": 0.8921, + "step": 3027 + }, + { + "epoch": 0.060616069864624776, + "grad_norm": 1.3549376726150513, + "learning_rate": 9.975450664194138e-06, + "loss": 0.3172, + "step": 3028 + }, + { + "epoch": 0.06063608838175312, + "grad_norm": 1.2600187063217163, + "learning_rate": 9.975418568416408e-06, + "loss": 0.2891, + "step": 3029 + }, + { + "epoch": 0.060656106898881464, + "grad_norm": 1.1335060596466064, + "learning_rate": 9.975386451723092e-06, + "loss": 0.2756, + "step": 3030 + }, + { + "epoch": 0.06067612541600981, + "grad_norm": 1.1229732036590576, + "learning_rate": 9.97535431411432e-06, + "loss": 0.3196, + "step": 3031 + }, + { + "epoch": 0.06069614393313815, + "grad_norm": 2.1651611328125, + "learning_rate": 9.97532215559023e-06, + "loss": 0.8389, + "step": 3032 + }, + { + "epoch": 0.060716162450266495, + "grad_norm": 1.0078918933868408, + "learning_rate": 9.975289976150957e-06, + "loss": 0.2824, + "step": 3033 + }, + { + "epoch": 0.06073618096739484, + "grad_norm": 1.4525777101516724, + "learning_rate": 9.975257775796634e-06, + "loss": 0.3495, + "step": 3034 + }, + { + "epoch": 0.06075619948452318, + "grad_norm": 1.2528884410858154, + "learning_rate": 9.9752255545274e-06, + "loss": 0.3642, + "step": 3035 + }, + { + "epoch": 0.060776218001651526, + "grad_norm": 1.0907000303268433, + "learning_rate": 9.975193312343387e-06, + "loss": 0.325, + "step": 3036 + }, + { + "epoch": 0.06079623651877987, + "grad_norm": 1.9567937850952148, + "learning_rate": 9.975161049244733e-06, + "loss": 0.7793, + "step": 3037 + }, + { + "epoch": 0.060816255035908214, + "grad_norm": 1.8879141807556152, + "learning_rate": 9.975128765231573e-06, + "loss": 0.8811, + "step": 3038 + }, + { + "epoch": 0.06083627355303656, + "grad_norm": 1.0449583530426025, + "learning_rate": 9.97509646030404e-06, + "loss": 0.3363, + "step": 3039 + }, + { + "epoch": 0.0608562920701649, + "grad_norm": 1.2431726455688477, + "learning_rate": 9.975064134462273e-06, + "loss": 0.3625, + "step": 3040 + }, + { + "epoch": 0.060876310587293245, + "grad_norm": 1.0844252109527588, + "learning_rate": 9.975031787706409e-06, + "loss": 0.3339, + "step": 3041 + }, + { + "epoch": 0.06089632910442159, + "grad_norm": 1.0163707733154297, + "learning_rate": 9.97499942003658e-06, + "loss": 0.3047, + "step": 3042 + }, + { + "epoch": 0.06091634762154993, + "grad_norm": 1.184043049812317, + "learning_rate": 9.974967031452925e-06, + "loss": 0.3125, + "step": 3043 + }, + { + "epoch": 0.060936366138678276, + "grad_norm": 1.07407546043396, + "learning_rate": 9.97493462195558e-06, + "loss": 0.3438, + "step": 3044 + }, + { + "epoch": 0.06095638465580662, + "grad_norm": 1.2284603118896484, + "learning_rate": 9.974902191544677e-06, + "loss": 0.3314, + "step": 3045 + }, + { + "epoch": 0.06097640317293496, + "grad_norm": 1.1759357452392578, + "learning_rate": 9.974869740220358e-06, + "loss": 0.3384, + "step": 3046 + }, + { + "epoch": 0.06099642169006331, + "grad_norm": 1.1344419717788696, + "learning_rate": 9.974837267982759e-06, + "loss": 0.3247, + "step": 3047 + }, + { + "epoch": 0.06101644020719165, + "grad_norm": 1.1375809907913208, + "learning_rate": 9.974804774832012e-06, + "loss": 0.3402, + "step": 3048 + }, + { + "epoch": 0.061036458724319995, + "grad_norm": 1.0771703720092773, + "learning_rate": 9.974772260768258e-06, + "loss": 0.301, + "step": 3049 + }, + { + "epoch": 0.06105647724144834, + "grad_norm": 1.1354421377182007, + "learning_rate": 9.97473972579163e-06, + "loss": 0.3539, + "step": 3050 + }, + { + "epoch": 0.06107649575857668, + "grad_norm": 1.1766704320907593, + "learning_rate": 9.974707169902268e-06, + "loss": 0.364, + "step": 3051 + }, + { + "epoch": 0.061096514275705026, + "grad_norm": 1.1325165033340454, + "learning_rate": 9.974674593100308e-06, + "loss": 0.3405, + "step": 3052 + }, + { + "epoch": 0.06111653279283337, + "grad_norm": 1.121610164642334, + "learning_rate": 9.974641995385887e-06, + "loss": 0.3333, + "step": 3053 + }, + { + "epoch": 0.06113655130996171, + "grad_norm": 1.1689404249191284, + "learning_rate": 9.974609376759141e-06, + "loss": 0.3448, + "step": 3054 + }, + { + "epoch": 0.06115656982709006, + "grad_norm": 1.1144801378250122, + "learning_rate": 9.974576737220208e-06, + "loss": 0.2885, + "step": 3055 + }, + { + "epoch": 0.0611765883442184, + "grad_norm": 1.9353452920913696, + "learning_rate": 9.974544076769225e-06, + "loss": 0.8408, + "step": 3056 + }, + { + "epoch": 0.061196606861346745, + "grad_norm": 1.1330691576004028, + "learning_rate": 9.97451139540633e-06, + "loss": 0.3135, + "step": 3057 + }, + { + "epoch": 0.06121662537847509, + "grad_norm": 1.1250351667404175, + "learning_rate": 9.974478693131657e-06, + "loss": 0.3231, + "step": 3058 + }, + { + "epoch": 0.06123664389560343, + "grad_norm": 1.1940796375274658, + "learning_rate": 9.974445969945348e-06, + "loss": 0.3572, + "step": 3059 + }, + { + "epoch": 0.061256662412731776, + "grad_norm": 1.1981908082962036, + "learning_rate": 9.974413225847538e-06, + "loss": 0.3027, + "step": 3060 + }, + { + "epoch": 0.06127668092986012, + "grad_norm": 1.1399548053741455, + "learning_rate": 9.974380460838366e-06, + "loss": 0.3323, + "step": 3061 + }, + { + "epoch": 0.06129669944698846, + "grad_norm": 1.3243807554244995, + "learning_rate": 9.974347674917967e-06, + "loss": 0.3509, + "step": 3062 + }, + { + "epoch": 0.06131671796411681, + "grad_norm": 1.005210280418396, + "learning_rate": 9.974314868086483e-06, + "loss": 0.2984, + "step": 3063 + }, + { + "epoch": 0.06133673648124515, + "grad_norm": 1.1161983013153076, + "learning_rate": 9.97428204034405e-06, + "loss": 0.3632, + "step": 3064 + }, + { + "epoch": 0.061356754998373494, + "grad_norm": 1.1294488906860352, + "learning_rate": 9.974249191690804e-06, + "loss": 0.3522, + "step": 3065 + }, + { + "epoch": 0.06137677351550184, + "grad_norm": 1.0742034912109375, + "learning_rate": 9.974216322126886e-06, + "loss": 0.3093, + "step": 3066 + }, + { + "epoch": 0.06139679203263018, + "grad_norm": 1.0209532976150513, + "learning_rate": 9.974183431652434e-06, + "loss": 0.2961, + "step": 3067 + }, + { + "epoch": 0.061416810549758526, + "grad_norm": 1.2194410562515259, + "learning_rate": 9.974150520267584e-06, + "loss": 0.3109, + "step": 3068 + }, + { + "epoch": 0.06143682906688687, + "grad_norm": 2.002161979675293, + "learning_rate": 9.974117587972475e-06, + "loss": 0.9067, + "step": 3069 + }, + { + "epoch": 0.06145684758401521, + "grad_norm": 1.1045942306518555, + "learning_rate": 9.974084634767247e-06, + "loss": 0.3354, + "step": 3070 + }, + { + "epoch": 0.06147686610114356, + "grad_norm": 0.9835676550865173, + "learning_rate": 9.974051660652037e-06, + "loss": 0.3137, + "step": 3071 + }, + { + "epoch": 0.0614968846182719, + "grad_norm": 1.0987021923065186, + "learning_rate": 9.974018665626985e-06, + "loss": 0.3463, + "step": 3072 + }, + { + "epoch": 0.061516903135400244, + "grad_norm": 1.1215314865112305, + "learning_rate": 9.97398564969223e-06, + "loss": 0.3389, + "step": 3073 + }, + { + "epoch": 0.06153692165252859, + "grad_norm": 1.1565272808074951, + "learning_rate": 9.973952612847907e-06, + "loss": 0.3128, + "step": 3074 + }, + { + "epoch": 0.06155694016965693, + "grad_norm": 1.1492927074432373, + "learning_rate": 9.97391955509416e-06, + "loss": 0.3722, + "step": 3075 + }, + { + "epoch": 0.061576958686785276, + "grad_norm": 1.0584275722503662, + "learning_rate": 9.973886476431125e-06, + "loss": 0.3318, + "step": 3076 + }, + { + "epoch": 0.06159697720391362, + "grad_norm": 1.16837477684021, + "learning_rate": 9.973853376858942e-06, + "loss": 0.3012, + "step": 3077 + }, + { + "epoch": 0.06161699572104196, + "grad_norm": 1.0664072036743164, + "learning_rate": 9.973820256377748e-06, + "loss": 0.3689, + "step": 3078 + }, + { + "epoch": 0.06163701423817031, + "grad_norm": 1.0834522247314453, + "learning_rate": 9.973787114987686e-06, + "loss": 0.3205, + "step": 3079 + }, + { + "epoch": 0.06165703275529865, + "grad_norm": 1.3478378057479858, + "learning_rate": 9.973753952688892e-06, + "loss": 0.336, + "step": 3080 + }, + { + "epoch": 0.061677051272426994, + "grad_norm": 1.1869052648544312, + "learning_rate": 9.973720769481508e-06, + "loss": 0.3638, + "step": 3081 + }, + { + "epoch": 0.06169706978955534, + "grad_norm": 1.0160908699035645, + "learning_rate": 9.973687565365671e-06, + "loss": 0.29, + "step": 3082 + }, + { + "epoch": 0.06171708830668368, + "grad_norm": 1.0889091491699219, + "learning_rate": 9.973654340341523e-06, + "loss": 0.3535, + "step": 3083 + }, + { + "epoch": 0.061737106823812025, + "grad_norm": 1.0628653764724731, + "learning_rate": 9.973621094409203e-06, + "loss": 0.3611, + "step": 3084 + }, + { + "epoch": 0.06175712534094037, + "grad_norm": 1.0469284057617188, + "learning_rate": 9.973587827568848e-06, + "loss": 0.3375, + "step": 3085 + }, + { + "epoch": 0.06177714385806871, + "grad_norm": 1.0870873928070068, + "learning_rate": 9.9735545398206e-06, + "loss": 0.3606, + "step": 3086 + }, + { + "epoch": 0.06179716237519706, + "grad_norm": 1.0486867427825928, + "learning_rate": 9.973521231164601e-06, + "loss": 0.3284, + "step": 3087 + }, + { + "epoch": 0.0618171808923254, + "grad_norm": 1.083859920501709, + "learning_rate": 9.97348790160099e-06, + "loss": 0.3081, + "step": 3088 + }, + { + "epoch": 0.061837199409453744, + "grad_norm": 1.0696872472763062, + "learning_rate": 9.973454551129905e-06, + "loss": 0.3054, + "step": 3089 + }, + { + "epoch": 0.06185721792658209, + "grad_norm": 1.1164323091506958, + "learning_rate": 9.973421179751485e-06, + "loss": 0.3652, + "step": 3090 + }, + { + "epoch": 0.06187723644371043, + "grad_norm": 2.0553925037384033, + "learning_rate": 9.973387787465875e-06, + "loss": 0.8722, + "step": 3091 + }, + { + "epoch": 0.061897254960838775, + "grad_norm": 1.1189223527908325, + "learning_rate": 9.973354374273211e-06, + "loss": 0.3466, + "step": 3092 + }, + { + "epoch": 0.06191727347796712, + "grad_norm": 1.1330360174179077, + "learning_rate": 9.973320940173638e-06, + "loss": 0.3399, + "step": 3093 + }, + { + "epoch": 0.06193729199509546, + "grad_norm": 1.1392123699188232, + "learning_rate": 9.973287485167293e-06, + "loss": 0.3695, + "step": 3094 + }, + { + "epoch": 0.06195731051222381, + "grad_norm": 1.0769712924957275, + "learning_rate": 9.973254009254318e-06, + "loss": 0.3341, + "step": 3095 + }, + { + "epoch": 0.06197732902935215, + "grad_norm": 1.2803901433944702, + "learning_rate": 9.973220512434854e-06, + "loss": 0.3483, + "step": 3096 + }, + { + "epoch": 0.061997347546480494, + "grad_norm": 1.0304144620895386, + "learning_rate": 9.973186994709038e-06, + "loss": 0.3238, + "step": 3097 + }, + { + "epoch": 0.06201736606360884, + "grad_norm": 1.0688509941101074, + "learning_rate": 9.973153456077018e-06, + "loss": 0.3363, + "step": 3098 + }, + { + "epoch": 0.06203738458073718, + "grad_norm": 1.8615642786026, + "learning_rate": 9.97311989653893e-06, + "loss": 0.8659, + "step": 3099 + }, + { + "epoch": 0.062057403097865525, + "grad_norm": 2.0189595222473145, + "learning_rate": 9.973086316094916e-06, + "loss": 0.8895, + "step": 3100 + }, + { + "epoch": 0.06207742161499387, + "grad_norm": 1.1965023279190063, + "learning_rate": 9.973052714745117e-06, + "loss": 0.3594, + "step": 3101 + }, + { + "epoch": 0.06209744013212221, + "grad_norm": 1.0523109436035156, + "learning_rate": 9.973019092489673e-06, + "loss": 0.3652, + "step": 3102 + }, + { + "epoch": 0.062117458649250556, + "grad_norm": 1.4408777952194214, + "learning_rate": 9.972985449328729e-06, + "loss": 0.3531, + "step": 3103 + }, + { + "epoch": 0.0621374771663789, + "grad_norm": 1.1493027210235596, + "learning_rate": 9.972951785262424e-06, + "loss": 0.3432, + "step": 3104 + }, + { + "epoch": 0.062157495683507244, + "grad_norm": 1.1311469078063965, + "learning_rate": 9.9729181002909e-06, + "loss": 0.3138, + "step": 3105 + }, + { + "epoch": 0.06217751420063559, + "grad_norm": 1.021727204322815, + "learning_rate": 9.972884394414299e-06, + "loss": 0.3006, + "step": 3106 + }, + { + "epoch": 0.06219753271776393, + "grad_norm": 1.1381559371948242, + "learning_rate": 9.97285066763276e-06, + "loss": 0.3859, + "step": 3107 + }, + { + "epoch": 0.062217551234892275, + "grad_norm": 1.054542899131775, + "learning_rate": 9.97281691994643e-06, + "loss": 0.3604, + "step": 3108 + }, + { + "epoch": 0.06223756975202062, + "grad_norm": 1.0991244316101074, + "learning_rate": 9.972783151355446e-06, + "loss": 0.3081, + "step": 3109 + }, + { + "epoch": 0.06225758826914896, + "grad_norm": 1.0359601974487305, + "learning_rate": 9.972749361859952e-06, + "loss": 0.37, + "step": 3110 + }, + { + "epoch": 0.062277606786277306, + "grad_norm": 1.1205203533172607, + "learning_rate": 9.97271555146009e-06, + "loss": 0.2742, + "step": 3111 + }, + { + "epoch": 0.06229762530340565, + "grad_norm": 1.8217939138412476, + "learning_rate": 9.972681720156002e-06, + "loss": 0.8438, + "step": 3112 + }, + { + "epoch": 0.062317643820533994, + "grad_norm": 1.1656900644302368, + "learning_rate": 9.97264786794783e-06, + "loss": 0.3759, + "step": 3113 + }, + { + "epoch": 0.06233766233766234, + "grad_norm": 1.0171337127685547, + "learning_rate": 9.972613994835716e-06, + "loss": 0.3135, + "step": 3114 + }, + { + "epoch": 0.06235768085479068, + "grad_norm": 1.2011618614196777, + "learning_rate": 9.972580100819804e-06, + "loss": 0.3336, + "step": 3115 + }, + { + "epoch": 0.062377699371919025, + "grad_norm": 1.0216902494430542, + "learning_rate": 9.972546185900236e-06, + "loss": 0.3121, + "step": 3116 + }, + { + "epoch": 0.06239771788904737, + "grad_norm": 1.9720712900161743, + "learning_rate": 9.972512250077153e-06, + "loss": 0.8316, + "step": 3117 + }, + { + "epoch": 0.06241773640617571, + "grad_norm": 1.316367506980896, + "learning_rate": 9.972478293350698e-06, + "loss": 0.3727, + "step": 3118 + }, + { + "epoch": 0.062437754923304056, + "grad_norm": 1.144375205039978, + "learning_rate": 9.972444315721014e-06, + "loss": 0.3552, + "step": 3119 + }, + { + "epoch": 0.0624577734404324, + "grad_norm": 1.2324864864349365, + "learning_rate": 9.972410317188245e-06, + "loss": 0.3547, + "step": 3120 + }, + { + "epoch": 0.062477791957560744, + "grad_norm": 1.1374146938323975, + "learning_rate": 9.972376297752532e-06, + "loss": 0.3799, + "step": 3121 + }, + { + "epoch": 0.06249781047468909, + "grad_norm": 1.9821016788482666, + "learning_rate": 9.972342257414022e-06, + "loss": 0.8906, + "step": 3122 + }, + { + "epoch": 0.06251782899181743, + "grad_norm": 2.047835111618042, + "learning_rate": 9.972308196172852e-06, + "loss": 0.8883, + "step": 3123 + }, + { + "epoch": 0.06253784750894577, + "grad_norm": 1.1259249448776245, + "learning_rate": 9.97227411402917e-06, + "loss": 0.3266, + "step": 3124 + }, + { + "epoch": 0.06255786602607412, + "grad_norm": 0.9182649850845337, + "learning_rate": 9.972240010983119e-06, + "loss": 0.2668, + "step": 3125 + }, + { + "epoch": 0.06257788454320246, + "grad_norm": 1.1432316303253174, + "learning_rate": 9.972205887034838e-06, + "loss": 0.3311, + "step": 3126 + }, + { + "epoch": 0.0625979030603308, + "grad_norm": 1.0392261743545532, + "learning_rate": 9.972171742184475e-06, + "loss": 0.3418, + "step": 3127 + }, + { + "epoch": 0.06261792157745914, + "grad_norm": 1.0786080360412598, + "learning_rate": 9.972137576432172e-06, + "loss": 0.3771, + "step": 3128 + }, + { + "epoch": 0.0626379400945875, + "grad_norm": 1.0251283645629883, + "learning_rate": 9.972103389778073e-06, + "loss": 0.3156, + "step": 3129 + }, + { + "epoch": 0.06265795861171583, + "grad_norm": 1.0891693830490112, + "learning_rate": 9.97206918222232e-06, + "loss": 0.3596, + "step": 3130 + }, + { + "epoch": 0.06267797712884418, + "grad_norm": 1.0598433017730713, + "learning_rate": 9.97203495376506e-06, + "loss": 0.3424, + "step": 3131 + }, + { + "epoch": 0.06269799564597252, + "grad_norm": 1.1695560216903687, + "learning_rate": 9.972000704406435e-06, + "loss": 0.3665, + "step": 3132 + }, + { + "epoch": 0.06271801416310087, + "grad_norm": 1.072617769241333, + "learning_rate": 9.971966434146587e-06, + "loss": 0.3149, + "step": 3133 + }, + { + "epoch": 0.0627380326802292, + "grad_norm": 1.2667814493179321, + "learning_rate": 9.971932142985663e-06, + "loss": 0.322, + "step": 3134 + }, + { + "epoch": 0.06275805119735756, + "grad_norm": 1.0934998989105225, + "learning_rate": 9.971897830923807e-06, + "loss": 0.3205, + "step": 3135 + }, + { + "epoch": 0.06277806971448589, + "grad_norm": 1.1219562292099, + "learning_rate": 9.971863497961163e-06, + "loss": 0.3259, + "step": 3136 + }, + { + "epoch": 0.06279808823161424, + "grad_norm": 1.1769037246704102, + "learning_rate": 9.971829144097873e-06, + "loss": 0.3279, + "step": 3137 + }, + { + "epoch": 0.06281810674874258, + "grad_norm": 1.0996859073638916, + "learning_rate": 9.971794769334085e-06, + "loss": 0.3302, + "step": 3138 + }, + { + "epoch": 0.06283812526587093, + "grad_norm": 1.9062299728393555, + "learning_rate": 9.97176037366994e-06, + "loss": 0.898, + "step": 3139 + }, + { + "epoch": 0.06285814378299927, + "grad_norm": 1.233168601989746, + "learning_rate": 9.971725957105585e-06, + "loss": 0.3124, + "step": 3140 + }, + { + "epoch": 0.06287816230012762, + "grad_norm": 1.1390423774719238, + "learning_rate": 9.971691519641164e-06, + "loss": 0.3118, + "step": 3141 + }, + { + "epoch": 0.06289818081725596, + "grad_norm": 0.999279260635376, + "learning_rate": 9.971657061276822e-06, + "loss": 0.3107, + "step": 3142 + }, + { + "epoch": 0.0629181993343843, + "grad_norm": 1.1795002222061157, + "learning_rate": 9.971622582012702e-06, + "loss": 0.3614, + "step": 3143 + }, + { + "epoch": 0.06293821785151264, + "grad_norm": 1.046667218208313, + "learning_rate": 9.971588081848953e-06, + "loss": 0.3222, + "step": 3144 + }, + { + "epoch": 0.062958236368641, + "grad_norm": 1.1079281568527222, + "learning_rate": 9.971553560785716e-06, + "loss": 0.3405, + "step": 3145 + }, + { + "epoch": 0.06297825488576933, + "grad_norm": 1.0542404651641846, + "learning_rate": 9.971519018823138e-06, + "loss": 0.3941, + "step": 3146 + }, + { + "epoch": 0.06299827340289768, + "grad_norm": 1.0969517230987549, + "learning_rate": 9.971484455961365e-06, + "loss": 0.3124, + "step": 3147 + }, + { + "epoch": 0.06301829192002602, + "grad_norm": 1.0323278903961182, + "learning_rate": 9.97144987220054e-06, + "loss": 0.2931, + "step": 3148 + }, + { + "epoch": 0.06303831043715437, + "grad_norm": 1.1215001344680786, + "learning_rate": 9.971415267540811e-06, + "loss": 0.3589, + "step": 3149 + }, + { + "epoch": 0.0630583289542827, + "grad_norm": 1.0126482248306274, + "learning_rate": 9.971380641982321e-06, + "loss": 0.3132, + "step": 3150 + }, + { + "epoch": 0.06307834747141106, + "grad_norm": 1.0966291427612305, + "learning_rate": 9.971345995525216e-06, + "loss": 0.2647, + "step": 3151 + }, + { + "epoch": 0.06309836598853939, + "grad_norm": 1.209218978881836, + "learning_rate": 9.971311328169643e-06, + "loss": 0.3707, + "step": 3152 + }, + { + "epoch": 0.06311838450566774, + "grad_norm": 1.1012327671051025, + "learning_rate": 9.971276639915748e-06, + "loss": 0.3245, + "step": 3153 + }, + { + "epoch": 0.06313840302279608, + "grad_norm": 1.1261523962020874, + "learning_rate": 9.971241930763676e-06, + "loss": 0.3592, + "step": 3154 + }, + { + "epoch": 0.06315842153992443, + "grad_norm": 1.1973289251327515, + "learning_rate": 9.97120720071357e-06, + "loss": 0.3459, + "step": 3155 + }, + { + "epoch": 0.06317844005705277, + "grad_norm": 1.8347737789154053, + "learning_rate": 9.971172449765581e-06, + "loss": 0.9, + "step": 3156 + }, + { + "epoch": 0.06319845857418112, + "grad_norm": 1.179221510887146, + "learning_rate": 9.971137677919853e-06, + "loss": 0.336, + "step": 3157 + }, + { + "epoch": 0.06321847709130946, + "grad_norm": 1.144659399986267, + "learning_rate": 9.97110288517653e-06, + "loss": 0.3857, + "step": 3158 + }, + { + "epoch": 0.0632384956084378, + "grad_norm": 1.1500778198242188, + "learning_rate": 9.971068071535764e-06, + "loss": 0.4012, + "step": 3159 + }, + { + "epoch": 0.06325851412556614, + "grad_norm": 1.0886046886444092, + "learning_rate": 9.971033236997694e-06, + "loss": 0.3668, + "step": 3160 + }, + { + "epoch": 0.0632785326426945, + "grad_norm": 1.7019189596176147, + "learning_rate": 9.97099838156247e-06, + "loss": 0.8531, + "step": 3161 + }, + { + "epoch": 0.06329855115982283, + "grad_norm": 1.4199696779251099, + "learning_rate": 9.97096350523024e-06, + "loss": 0.3484, + "step": 3162 + }, + { + "epoch": 0.06331856967695118, + "grad_norm": 1.1402970552444458, + "learning_rate": 9.970928608001149e-06, + "loss": 0.3318, + "step": 3163 + }, + { + "epoch": 0.06333858819407952, + "grad_norm": 1.1070771217346191, + "learning_rate": 9.970893689875344e-06, + "loss": 0.3408, + "step": 3164 + }, + { + "epoch": 0.06335860671120787, + "grad_norm": 1.0090633630752563, + "learning_rate": 9.970858750852973e-06, + "loss": 0.3288, + "step": 3165 + }, + { + "epoch": 0.0633786252283362, + "grad_norm": 1.185854196548462, + "learning_rate": 9.97082379093418e-06, + "loss": 0.3462, + "step": 3166 + }, + { + "epoch": 0.06339864374546456, + "grad_norm": 1.1529113054275513, + "learning_rate": 9.970788810119112e-06, + "loss": 0.333, + "step": 3167 + }, + { + "epoch": 0.06341866226259289, + "grad_norm": 1.9339501857757568, + "learning_rate": 9.97075380840792e-06, + "loss": 0.9092, + "step": 3168 + }, + { + "epoch": 0.06343868077972124, + "grad_norm": 1.1303108930587769, + "learning_rate": 9.970718785800748e-06, + "loss": 0.3798, + "step": 3169 + }, + { + "epoch": 0.06345869929684958, + "grad_norm": 0.976895809173584, + "learning_rate": 9.970683742297745e-06, + "loss": 0.3223, + "step": 3170 + }, + { + "epoch": 0.06347871781397793, + "grad_norm": 1.0783404111862183, + "learning_rate": 9.970648677899055e-06, + "loss": 0.3172, + "step": 3171 + }, + { + "epoch": 0.06349873633110627, + "grad_norm": 1.1692982912063599, + "learning_rate": 9.970613592604829e-06, + "loss": 0.3425, + "step": 3172 + }, + { + "epoch": 0.06351875484823462, + "grad_norm": 1.006308674812317, + "learning_rate": 9.970578486415213e-06, + "loss": 0.2775, + "step": 3173 + }, + { + "epoch": 0.06353877336536295, + "grad_norm": 1.2098089456558228, + "learning_rate": 9.970543359330353e-06, + "loss": 0.3521, + "step": 3174 + }, + { + "epoch": 0.0635587918824913, + "grad_norm": 1.0463027954101562, + "learning_rate": 9.970508211350401e-06, + "loss": 0.2779, + "step": 3175 + }, + { + "epoch": 0.06357881039961964, + "grad_norm": 1.231911540031433, + "learning_rate": 9.970473042475502e-06, + "loss": 0.3776, + "step": 3176 + }, + { + "epoch": 0.063598828916748, + "grad_norm": 1.1242519617080688, + "learning_rate": 9.970437852705803e-06, + "loss": 0.3104, + "step": 3177 + }, + { + "epoch": 0.06361884743387633, + "grad_norm": 1.1755127906799316, + "learning_rate": 9.970402642041452e-06, + "loss": 0.2977, + "step": 3178 + }, + { + "epoch": 0.06363886595100468, + "grad_norm": 1.0856910943984985, + "learning_rate": 9.9703674104826e-06, + "loss": 0.3045, + "step": 3179 + }, + { + "epoch": 0.06365888446813302, + "grad_norm": 1.0235792398452759, + "learning_rate": 9.970332158029393e-06, + "loss": 0.3149, + "step": 3180 + }, + { + "epoch": 0.06367890298526137, + "grad_norm": 1.6876051425933838, + "learning_rate": 9.970296884681977e-06, + "loss": 0.3349, + "step": 3181 + }, + { + "epoch": 0.0636989215023897, + "grad_norm": 1.8995873928070068, + "learning_rate": 9.970261590440504e-06, + "loss": 0.8507, + "step": 3182 + }, + { + "epoch": 0.06371894001951806, + "grad_norm": 1.0874712467193604, + "learning_rate": 9.970226275305121e-06, + "loss": 0.343, + "step": 3183 + }, + { + "epoch": 0.06373895853664639, + "grad_norm": 1.1463243961334229, + "learning_rate": 9.970190939275974e-06, + "loss": 0.3163, + "step": 3184 + }, + { + "epoch": 0.06375897705377474, + "grad_norm": 1.0909626483917236, + "learning_rate": 9.970155582353217e-06, + "loss": 0.3157, + "step": 3185 + }, + { + "epoch": 0.06377899557090308, + "grad_norm": 1.2099027633666992, + "learning_rate": 9.970120204536996e-06, + "loss": 0.288, + "step": 3186 + }, + { + "epoch": 0.06379901408803143, + "grad_norm": 1.1042038202285767, + "learning_rate": 9.970084805827455e-06, + "loss": 0.3174, + "step": 3187 + }, + { + "epoch": 0.06381903260515977, + "grad_norm": 1.1752009391784668, + "learning_rate": 9.970049386224752e-06, + "loss": 0.2795, + "step": 3188 + }, + { + "epoch": 0.06383905112228812, + "grad_norm": 1.238309383392334, + "learning_rate": 9.970013945729026e-06, + "loss": 0.3182, + "step": 3189 + }, + { + "epoch": 0.06385906963941645, + "grad_norm": 1.0651532411575317, + "learning_rate": 9.969978484340435e-06, + "loss": 0.3204, + "step": 3190 + }, + { + "epoch": 0.0638790881565448, + "grad_norm": 1.0657612085342407, + "learning_rate": 9.969943002059122e-06, + "loss": 0.3208, + "step": 3191 + }, + { + "epoch": 0.06389910667367314, + "grad_norm": 2.215639352798462, + "learning_rate": 9.96990749888524e-06, + "loss": 0.8882, + "step": 3192 + }, + { + "epoch": 0.06391912519080149, + "grad_norm": 0.9984332323074341, + "learning_rate": 9.969871974818935e-06, + "loss": 0.3091, + "step": 3193 + }, + { + "epoch": 0.06393914370792983, + "grad_norm": 1.0918464660644531, + "learning_rate": 9.969836429860358e-06, + "loss": 0.3454, + "step": 3194 + }, + { + "epoch": 0.06395916222505818, + "grad_norm": 1.0931994915008545, + "learning_rate": 9.969800864009658e-06, + "loss": 0.3112, + "step": 3195 + }, + { + "epoch": 0.06397918074218652, + "grad_norm": 1.2776381969451904, + "learning_rate": 9.969765277266984e-06, + "loss": 0.2808, + "step": 3196 + }, + { + "epoch": 0.06399919925931487, + "grad_norm": 1.0947905778884888, + "learning_rate": 9.969729669632488e-06, + "loss": 0.3341, + "step": 3197 + }, + { + "epoch": 0.0640192177764432, + "grad_norm": 1.2327406406402588, + "learning_rate": 9.969694041106317e-06, + "loss": 0.2828, + "step": 3198 + }, + { + "epoch": 0.06403923629357156, + "grad_norm": 1.3664759397506714, + "learning_rate": 9.969658391688622e-06, + "loss": 0.3117, + "step": 3199 + }, + { + "epoch": 0.06405925481069989, + "grad_norm": 1.156369924545288, + "learning_rate": 9.969622721379552e-06, + "loss": 0.3406, + "step": 3200 + }, + { + "epoch": 0.06407927332782824, + "grad_norm": 1.0793430805206299, + "learning_rate": 9.969587030179257e-06, + "loss": 0.3844, + "step": 3201 + }, + { + "epoch": 0.06409929184495658, + "grad_norm": 1.2098366022109985, + "learning_rate": 9.969551318087888e-06, + "loss": 0.3804, + "step": 3202 + }, + { + "epoch": 0.06411931036208493, + "grad_norm": 1.008349061012268, + "learning_rate": 9.969515585105595e-06, + "loss": 0.2653, + "step": 3203 + }, + { + "epoch": 0.06413932887921327, + "grad_norm": 1.0643773078918457, + "learning_rate": 9.96947983123253e-06, + "loss": 0.3568, + "step": 3204 + }, + { + "epoch": 0.06415934739634162, + "grad_norm": 1.2499033212661743, + "learning_rate": 9.969444056468837e-06, + "loss": 0.3277, + "step": 3205 + }, + { + "epoch": 0.06417936591346995, + "grad_norm": 1.0802092552185059, + "learning_rate": 9.969408260814675e-06, + "loss": 0.3072, + "step": 3206 + }, + { + "epoch": 0.0641993844305983, + "grad_norm": 1.295272707939148, + "learning_rate": 9.969372444270187e-06, + "loss": 0.3088, + "step": 3207 + }, + { + "epoch": 0.06421940294772664, + "grad_norm": 1.142231822013855, + "learning_rate": 9.969336606835528e-06, + "loss": 0.3286, + "step": 3208 + }, + { + "epoch": 0.06423942146485499, + "grad_norm": 1.032381534576416, + "learning_rate": 9.969300748510847e-06, + "loss": 0.3173, + "step": 3209 + }, + { + "epoch": 0.06425943998198333, + "grad_norm": 0.9943848848342896, + "learning_rate": 9.969264869296296e-06, + "loss": 0.3017, + "step": 3210 + }, + { + "epoch": 0.06427945849911168, + "grad_norm": 1.1759555339813232, + "learning_rate": 9.969228969192025e-06, + "loss": 0.3204, + "step": 3211 + }, + { + "epoch": 0.06429947701624002, + "grad_norm": 1.1271679401397705, + "learning_rate": 9.969193048198182e-06, + "loss": 0.316, + "step": 3212 + }, + { + "epoch": 0.06431949553336837, + "grad_norm": 1.276343822479248, + "learning_rate": 9.969157106314922e-06, + "loss": 0.3638, + "step": 3213 + }, + { + "epoch": 0.0643395140504967, + "grad_norm": 1.9958728551864624, + "learning_rate": 9.969121143542396e-06, + "loss": 0.8477, + "step": 3214 + }, + { + "epoch": 0.06435953256762506, + "grad_norm": 1.0760035514831543, + "learning_rate": 9.969085159880753e-06, + "loss": 0.3337, + "step": 3215 + }, + { + "epoch": 0.06437955108475339, + "grad_norm": 1.0223288536071777, + "learning_rate": 9.969049155330147e-06, + "loss": 0.2757, + "step": 3216 + }, + { + "epoch": 0.06439956960188174, + "grad_norm": 1.9682186841964722, + "learning_rate": 9.969013129890727e-06, + "loss": 0.8743, + "step": 3217 + }, + { + "epoch": 0.06441958811901008, + "grad_norm": 1.7462078332901, + "learning_rate": 9.968977083562644e-06, + "loss": 0.8105, + "step": 3218 + }, + { + "epoch": 0.06443960663613843, + "grad_norm": 1.09239661693573, + "learning_rate": 9.968941016346053e-06, + "loss": 0.3451, + "step": 3219 + }, + { + "epoch": 0.06445962515326677, + "grad_norm": 1.901999831199646, + "learning_rate": 9.9689049282411e-06, + "loss": 0.9195, + "step": 3220 + }, + { + "epoch": 0.06447964367039512, + "grad_norm": 1.1317265033721924, + "learning_rate": 9.968868819247944e-06, + "loss": 0.3429, + "step": 3221 + }, + { + "epoch": 0.06449966218752345, + "grad_norm": 1.0815366506576538, + "learning_rate": 9.96883268936673e-06, + "loss": 0.317, + "step": 3222 + }, + { + "epoch": 0.0645196807046518, + "grad_norm": 1.174164891242981, + "learning_rate": 9.968796538597611e-06, + "loss": 0.3412, + "step": 3223 + }, + { + "epoch": 0.06453969922178014, + "grad_norm": 1.1872766017913818, + "learning_rate": 9.968760366940743e-06, + "loss": 0.3403, + "step": 3224 + }, + { + "epoch": 0.06455971773890849, + "grad_norm": 1.9703155755996704, + "learning_rate": 9.968724174396277e-06, + "loss": 0.8336, + "step": 3225 + }, + { + "epoch": 0.06457973625603683, + "grad_norm": 2.0641772747039795, + "learning_rate": 9.968687960964363e-06, + "loss": 0.8359, + "step": 3226 + }, + { + "epoch": 0.06459975477316518, + "grad_norm": 1.0898977518081665, + "learning_rate": 9.968651726645153e-06, + "loss": 0.3485, + "step": 3227 + }, + { + "epoch": 0.06461977329029352, + "grad_norm": 1.0882524251937866, + "learning_rate": 9.9686154714388e-06, + "loss": 0.3246, + "step": 3228 + }, + { + "epoch": 0.06463979180742187, + "grad_norm": 1.1372787952423096, + "learning_rate": 9.968579195345458e-06, + "loss": 0.3378, + "step": 3229 + }, + { + "epoch": 0.0646598103245502, + "grad_norm": 1.0670764446258545, + "learning_rate": 9.96854289836528e-06, + "loss": 0.3436, + "step": 3230 + }, + { + "epoch": 0.06467982884167855, + "grad_norm": 1.1880117654800415, + "learning_rate": 9.968506580498414e-06, + "loss": 0.4007, + "step": 3231 + }, + { + "epoch": 0.06469984735880689, + "grad_norm": 1.1275169849395752, + "learning_rate": 9.968470241745017e-06, + "loss": 0.3351, + "step": 3232 + }, + { + "epoch": 0.06471986587593524, + "grad_norm": 1.086534023284912, + "learning_rate": 9.968433882105239e-06, + "loss": 0.3331, + "step": 3233 + }, + { + "epoch": 0.06473988439306358, + "grad_norm": 1.1553704738616943, + "learning_rate": 9.968397501579234e-06, + "loss": 0.3149, + "step": 3234 + }, + { + "epoch": 0.06475990291019193, + "grad_norm": 0.9965959191322327, + "learning_rate": 9.968361100167156e-06, + "loss": 0.3379, + "step": 3235 + }, + { + "epoch": 0.06477992142732027, + "grad_norm": 1.7884498834609985, + "learning_rate": 9.96832467786916e-06, + "loss": 0.906, + "step": 3236 + }, + { + "epoch": 0.06479993994444862, + "grad_norm": 1.1190307140350342, + "learning_rate": 9.968288234685392e-06, + "loss": 0.3186, + "step": 3237 + }, + { + "epoch": 0.06481995846157695, + "grad_norm": 1.0150614976882935, + "learning_rate": 9.968251770616012e-06, + "loss": 0.2806, + "step": 3238 + }, + { + "epoch": 0.0648399769787053, + "grad_norm": 1.1655209064483643, + "learning_rate": 9.968215285661168e-06, + "loss": 0.3354, + "step": 3239 + }, + { + "epoch": 0.06485999549583364, + "grad_norm": 1.1253992319107056, + "learning_rate": 9.968178779821019e-06, + "loss": 0.3541, + "step": 3240 + }, + { + "epoch": 0.06488001401296199, + "grad_norm": 1.1134356260299683, + "learning_rate": 9.968142253095713e-06, + "loss": 0.3187, + "step": 3241 + }, + { + "epoch": 0.06490003253009033, + "grad_norm": 1.425672173500061, + "learning_rate": 9.96810570548541e-06, + "loss": 0.3475, + "step": 3242 + }, + { + "epoch": 0.06492005104721868, + "grad_norm": 1.2214186191558838, + "learning_rate": 9.968069136990256e-06, + "loss": 0.3194, + "step": 3243 + }, + { + "epoch": 0.06494006956434702, + "grad_norm": 1.1614638566970825, + "learning_rate": 9.96803254761041e-06, + "loss": 0.3865, + "step": 3244 + }, + { + "epoch": 0.06496008808147537, + "grad_norm": 1.2375388145446777, + "learning_rate": 9.967995937346024e-06, + "loss": 0.2968, + "step": 3245 + }, + { + "epoch": 0.0649801065986037, + "grad_norm": 4.7421875, + "learning_rate": 9.967959306197252e-06, + "loss": 0.8658, + "step": 3246 + }, + { + "epoch": 0.06500012511573205, + "grad_norm": 1.1437573432922363, + "learning_rate": 9.967922654164248e-06, + "loss": 0.3357, + "step": 3247 + }, + { + "epoch": 0.06502014363286039, + "grad_norm": 1.1495351791381836, + "learning_rate": 9.967885981247167e-06, + "loss": 0.3477, + "step": 3248 + }, + { + "epoch": 0.06504016214998874, + "grad_norm": 1.0624096393585205, + "learning_rate": 9.967849287446162e-06, + "loss": 0.3022, + "step": 3249 + }, + { + "epoch": 0.06506018066711708, + "grad_norm": 1.0394967794418335, + "learning_rate": 9.967812572761388e-06, + "loss": 0.3526, + "step": 3250 + }, + { + "epoch": 0.06508019918424543, + "grad_norm": 1.7073103189468384, + "learning_rate": 9.967775837192999e-06, + "loss": 0.8537, + "step": 3251 + }, + { + "epoch": 0.06510021770137377, + "grad_norm": 1.0014762878417969, + "learning_rate": 9.96773908074115e-06, + "loss": 0.2956, + "step": 3252 + }, + { + "epoch": 0.06512023621850212, + "grad_norm": 1.85820734500885, + "learning_rate": 9.967702303405995e-06, + "loss": 0.9081, + "step": 3253 + }, + { + "epoch": 0.06514025473563045, + "grad_norm": 1.0829529762268066, + "learning_rate": 9.967665505187687e-06, + "loss": 0.3491, + "step": 3254 + }, + { + "epoch": 0.0651602732527588, + "grad_norm": 0.9878683090209961, + "learning_rate": 9.967628686086383e-06, + "loss": 0.3026, + "step": 3255 + }, + { + "epoch": 0.06518029176988714, + "grad_norm": 1.1791328191757202, + "learning_rate": 9.967591846102237e-06, + "loss": 0.2808, + "step": 3256 + }, + { + "epoch": 0.06520031028701549, + "grad_norm": 1.1037887334823608, + "learning_rate": 9.967554985235405e-06, + "loss": 0.4156, + "step": 3257 + }, + { + "epoch": 0.06522032880414383, + "grad_norm": 1.056565761566162, + "learning_rate": 9.96751810348604e-06, + "loss": 0.2932, + "step": 3258 + }, + { + "epoch": 0.06524034732127218, + "grad_norm": 1.2072982788085938, + "learning_rate": 9.967481200854297e-06, + "loss": 0.3064, + "step": 3259 + }, + { + "epoch": 0.06526036583840052, + "grad_norm": 1.9492223262786865, + "learning_rate": 9.967444277340335e-06, + "loss": 0.8787, + "step": 3260 + }, + { + "epoch": 0.06528038435552887, + "grad_norm": 1.128557562828064, + "learning_rate": 9.967407332944304e-06, + "loss": 0.3256, + "step": 3261 + }, + { + "epoch": 0.0653004028726572, + "grad_norm": 1.943425178527832, + "learning_rate": 9.96737036766636e-06, + "loss": 0.8688, + "step": 3262 + }, + { + "epoch": 0.06532042138978555, + "grad_norm": 1.7099891901016235, + "learning_rate": 9.967333381506664e-06, + "loss": 0.83, + "step": 3263 + }, + { + "epoch": 0.06534043990691389, + "grad_norm": 1.126011848449707, + "learning_rate": 9.967296374465364e-06, + "loss": 0.361, + "step": 3264 + }, + { + "epoch": 0.06536045842404224, + "grad_norm": 1.2244880199432373, + "learning_rate": 9.96725934654262e-06, + "loss": 0.3554, + "step": 3265 + }, + { + "epoch": 0.06538047694117058, + "grad_norm": 1.2146755456924438, + "learning_rate": 9.967222297738588e-06, + "loss": 0.3603, + "step": 3266 + }, + { + "epoch": 0.06540049545829893, + "grad_norm": 1.1232801675796509, + "learning_rate": 9.96718522805342e-06, + "loss": 0.3307, + "step": 3267 + }, + { + "epoch": 0.06542051397542727, + "grad_norm": 1.0681787729263306, + "learning_rate": 9.967148137487277e-06, + "loss": 0.2991, + "step": 3268 + }, + { + "epoch": 0.06544053249255562, + "grad_norm": 1.1946090459823608, + "learning_rate": 9.96711102604031e-06, + "loss": 0.3549, + "step": 3269 + }, + { + "epoch": 0.06546055100968395, + "grad_norm": 1.9232794046401978, + "learning_rate": 9.967073893712678e-06, + "loss": 0.8204, + "step": 3270 + }, + { + "epoch": 0.0654805695268123, + "grad_norm": 0.9894646406173706, + "learning_rate": 9.967036740504537e-06, + "loss": 0.3348, + "step": 3271 + }, + { + "epoch": 0.06550058804394064, + "grad_norm": 1.0159804821014404, + "learning_rate": 9.96699956641604e-06, + "loss": 0.2992, + "step": 3272 + }, + { + "epoch": 0.06552060656106899, + "grad_norm": 1.1769922971725464, + "learning_rate": 9.966962371447348e-06, + "loss": 0.3926, + "step": 3273 + }, + { + "epoch": 0.06554062507819733, + "grad_norm": 1.0463097095489502, + "learning_rate": 9.966925155598615e-06, + "loss": 0.3266, + "step": 3274 + }, + { + "epoch": 0.06556064359532568, + "grad_norm": 1.0328830480575562, + "learning_rate": 9.966887918869997e-06, + "loss": 0.3145, + "step": 3275 + }, + { + "epoch": 0.06558066211245402, + "grad_norm": 1.0260206460952759, + "learning_rate": 9.966850661261652e-06, + "loss": 0.3476, + "step": 3276 + }, + { + "epoch": 0.06560068062958237, + "grad_norm": 1.0768510103225708, + "learning_rate": 9.966813382773735e-06, + "loss": 0.3034, + "step": 3277 + }, + { + "epoch": 0.0656206991467107, + "grad_norm": 1.0463812351226807, + "learning_rate": 9.966776083406403e-06, + "loss": 0.3286, + "step": 3278 + }, + { + "epoch": 0.06564071766383905, + "grad_norm": 1.7395151853561401, + "learning_rate": 9.966738763159812e-06, + "loss": 0.8547, + "step": 3279 + }, + { + "epoch": 0.06566073618096739, + "grad_norm": 1.0640419721603394, + "learning_rate": 9.966701422034122e-06, + "loss": 0.3117, + "step": 3280 + }, + { + "epoch": 0.06568075469809574, + "grad_norm": 1.9256013631820679, + "learning_rate": 9.966664060029488e-06, + "loss": 0.8401, + "step": 3281 + }, + { + "epoch": 0.06570077321522408, + "grad_norm": 1.062780499458313, + "learning_rate": 9.966626677146066e-06, + "loss": 0.3037, + "step": 3282 + }, + { + "epoch": 0.06572079173235243, + "grad_norm": 1.2454150915145874, + "learning_rate": 9.966589273384016e-06, + "loss": 0.3503, + "step": 3283 + }, + { + "epoch": 0.06574081024948077, + "grad_norm": 1.003523349761963, + "learning_rate": 9.966551848743493e-06, + "loss": 0.3143, + "step": 3284 + }, + { + "epoch": 0.06576082876660912, + "grad_norm": 1.0945993661880493, + "learning_rate": 9.966514403224654e-06, + "loss": 0.3024, + "step": 3285 + }, + { + "epoch": 0.06578084728373745, + "grad_norm": 1.0357599258422852, + "learning_rate": 9.966476936827657e-06, + "loss": 0.3428, + "step": 3286 + }, + { + "epoch": 0.0658008658008658, + "grad_norm": 1.0822951793670654, + "learning_rate": 9.96643944955266e-06, + "loss": 0.3571, + "step": 3287 + }, + { + "epoch": 0.06582088431799414, + "grad_norm": 1.084435224533081, + "learning_rate": 9.966401941399822e-06, + "loss": 0.3597, + "step": 3288 + }, + { + "epoch": 0.06584090283512249, + "grad_norm": 1.8497185707092285, + "learning_rate": 9.966364412369296e-06, + "loss": 0.7931, + "step": 3289 + }, + { + "epoch": 0.06586092135225083, + "grad_norm": 1.0712547302246094, + "learning_rate": 9.966326862461245e-06, + "loss": 0.3361, + "step": 3290 + }, + { + "epoch": 0.06588093986937918, + "grad_norm": 1.0122599601745605, + "learning_rate": 9.966289291675824e-06, + "loss": 0.343, + "step": 3291 + }, + { + "epoch": 0.06590095838650752, + "grad_norm": 1.2672194242477417, + "learning_rate": 9.966251700013192e-06, + "loss": 0.3181, + "step": 3292 + }, + { + "epoch": 0.06592097690363587, + "grad_norm": 1.0361254215240479, + "learning_rate": 9.966214087473507e-06, + "loss": 0.3314, + "step": 3293 + }, + { + "epoch": 0.0659409954207642, + "grad_norm": 1.1576392650604248, + "learning_rate": 9.966176454056926e-06, + "loss": 0.3071, + "step": 3294 + }, + { + "epoch": 0.06596101393789255, + "grad_norm": 1.8222436904907227, + "learning_rate": 9.966138799763608e-06, + "loss": 0.7618, + "step": 3295 + }, + { + "epoch": 0.06598103245502089, + "grad_norm": 1.0854392051696777, + "learning_rate": 9.96610112459371e-06, + "loss": 0.3231, + "step": 3296 + }, + { + "epoch": 0.06600105097214924, + "grad_norm": 1.0719813108444214, + "learning_rate": 9.966063428547395e-06, + "loss": 0.244, + "step": 3297 + }, + { + "epoch": 0.06602106948927758, + "grad_norm": 1.1494060754776, + "learning_rate": 9.966025711624814e-06, + "loss": 0.3653, + "step": 3298 + }, + { + "epoch": 0.06604108800640593, + "grad_norm": 1.7810943126678467, + "learning_rate": 9.965987973826133e-06, + "loss": 0.8783, + "step": 3299 + }, + { + "epoch": 0.06606110652353427, + "grad_norm": 1.074949860572815, + "learning_rate": 9.965950215151506e-06, + "loss": 0.3164, + "step": 3300 + }, + { + "epoch": 0.06608112504066262, + "grad_norm": 1.251968502998352, + "learning_rate": 9.965912435601091e-06, + "loss": 0.3516, + "step": 3301 + }, + { + "epoch": 0.06610114355779095, + "grad_norm": 1.1466460227966309, + "learning_rate": 9.96587463517505e-06, + "loss": 0.3446, + "step": 3302 + }, + { + "epoch": 0.0661211620749193, + "grad_norm": 1.2237224578857422, + "learning_rate": 9.965836813873543e-06, + "loss": 0.3373, + "step": 3303 + }, + { + "epoch": 0.06614118059204764, + "grad_norm": 1.0478448867797852, + "learning_rate": 9.965798971696724e-06, + "loss": 0.2992, + "step": 3304 + }, + { + "epoch": 0.06616119910917599, + "grad_norm": 2.008185386657715, + "learning_rate": 9.965761108644756e-06, + "loss": 0.831, + "step": 3305 + }, + { + "epoch": 0.06618121762630433, + "grad_norm": 1.1038799285888672, + "learning_rate": 9.965723224717797e-06, + "loss": 0.356, + "step": 3306 + }, + { + "epoch": 0.06620123614343268, + "grad_norm": 1.240586519241333, + "learning_rate": 9.965685319916005e-06, + "loss": 0.3053, + "step": 3307 + }, + { + "epoch": 0.06622125466056102, + "grad_norm": 1.0252612829208374, + "learning_rate": 9.965647394239542e-06, + "loss": 0.3753, + "step": 3308 + }, + { + "epoch": 0.06624127317768937, + "grad_norm": 1.0568987131118774, + "learning_rate": 9.965609447688564e-06, + "loss": 0.3586, + "step": 3309 + }, + { + "epoch": 0.0662612916948177, + "grad_norm": 1.1784697771072388, + "learning_rate": 9.965571480263235e-06, + "loss": 0.3557, + "step": 3310 + }, + { + "epoch": 0.06628131021194605, + "grad_norm": 1.066069483757019, + "learning_rate": 9.96553349196371e-06, + "loss": 0.3315, + "step": 3311 + }, + { + "epoch": 0.06630132872907439, + "grad_norm": 1.835569977760315, + "learning_rate": 9.965495482790151e-06, + "loss": 0.8411, + "step": 3312 + }, + { + "epoch": 0.06632134724620274, + "grad_norm": 1.061038613319397, + "learning_rate": 9.965457452742719e-06, + "loss": 0.3023, + "step": 3313 + }, + { + "epoch": 0.06634136576333108, + "grad_norm": 1.0616942644119263, + "learning_rate": 9.965419401821573e-06, + "loss": 0.343, + "step": 3314 + }, + { + "epoch": 0.06636138428045943, + "grad_norm": 1.1613932847976685, + "learning_rate": 9.965381330026871e-06, + "loss": 0.3327, + "step": 3315 + }, + { + "epoch": 0.06638140279758777, + "grad_norm": 1.0484378337860107, + "learning_rate": 9.965343237358773e-06, + "loss": 0.3689, + "step": 3316 + }, + { + "epoch": 0.06640142131471612, + "grad_norm": 1.0879658460617065, + "learning_rate": 9.965305123817443e-06, + "loss": 0.3549, + "step": 3317 + }, + { + "epoch": 0.06642143983184445, + "grad_norm": 1.1628421545028687, + "learning_rate": 9.965266989403037e-06, + "loss": 0.3682, + "step": 3318 + }, + { + "epoch": 0.0664414583489728, + "grad_norm": 1.0523748397827148, + "learning_rate": 9.96522883411572e-06, + "loss": 0.3206, + "step": 3319 + }, + { + "epoch": 0.06646147686610114, + "grad_norm": 1.1486812829971313, + "learning_rate": 9.965190657955646e-06, + "loss": 0.3616, + "step": 3320 + }, + { + "epoch": 0.06648149538322949, + "grad_norm": 1.2203584909439087, + "learning_rate": 9.965152460922981e-06, + "loss": 0.3344, + "step": 3321 + }, + { + "epoch": 0.06650151390035783, + "grad_norm": 1.1770668029785156, + "learning_rate": 9.965114243017883e-06, + "loss": 0.4105, + "step": 3322 + }, + { + "epoch": 0.06652153241748618, + "grad_norm": 1.0082812309265137, + "learning_rate": 9.965076004240514e-06, + "loss": 0.2934, + "step": 3323 + }, + { + "epoch": 0.06654155093461452, + "grad_norm": 1.0493754148483276, + "learning_rate": 9.965037744591032e-06, + "loss": 0.3439, + "step": 3324 + }, + { + "epoch": 0.06656156945174287, + "grad_norm": 1.1694221496582031, + "learning_rate": 9.964999464069602e-06, + "loss": 0.3651, + "step": 3325 + }, + { + "epoch": 0.0665815879688712, + "grad_norm": 1.1875442266464233, + "learning_rate": 9.964961162676383e-06, + "loss": 0.3646, + "step": 3326 + }, + { + "epoch": 0.06660160648599955, + "grad_norm": 1.2294321060180664, + "learning_rate": 9.964922840411533e-06, + "loss": 0.2971, + "step": 3327 + }, + { + "epoch": 0.06662162500312789, + "grad_norm": 1.9506299495697021, + "learning_rate": 9.964884497275218e-06, + "loss": 0.8201, + "step": 3328 + }, + { + "epoch": 0.06664164352025624, + "grad_norm": 1.8335992097854614, + "learning_rate": 9.964846133267596e-06, + "loss": 0.8369, + "step": 3329 + }, + { + "epoch": 0.06666166203738458, + "grad_norm": 1.2291728258132935, + "learning_rate": 9.964807748388831e-06, + "loss": 0.3483, + "step": 3330 + }, + { + "epoch": 0.06668168055451293, + "grad_norm": 1.195691704750061, + "learning_rate": 9.96476934263908e-06, + "loss": 0.3569, + "step": 3331 + }, + { + "epoch": 0.06670169907164127, + "grad_norm": 1.0760632753372192, + "learning_rate": 9.96473091601851e-06, + "loss": 0.3214, + "step": 3332 + }, + { + "epoch": 0.06672171758876962, + "grad_norm": 1.0916870832443237, + "learning_rate": 9.964692468527277e-06, + "loss": 0.3211, + "step": 3333 + }, + { + "epoch": 0.06674173610589795, + "grad_norm": 1.0991261005401611, + "learning_rate": 9.964654000165548e-06, + "loss": 0.3385, + "step": 3334 + }, + { + "epoch": 0.0667617546230263, + "grad_norm": 1.0762385129928589, + "learning_rate": 9.96461551093348e-06, + "loss": 0.3352, + "step": 3335 + }, + { + "epoch": 0.06678177314015464, + "grad_norm": 1.8420144319534302, + "learning_rate": 9.964577000831237e-06, + "loss": 0.7911, + "step": 3336 + }, + { + "epoch": 0.06680179165728299, + "grad_norm": 1.1904484033584595, + "learning_rate": 9.964538469858981e-06, + "loss": 0.2737, + "step": 3337 + }, + { + "epoch": 0.06682181017441133, + "grad_norm": 1.1280877590179443, + "learning_rate": 9.964499918016875e-06, + "loss": 0.3457, + "step": 3338 + }, + { + "epoch": 0.06684182869153968, + "grad_norm": 1.1948074102401733, + "learning_rate": 9.964461345305076e-06, + "loss": 0.3293, + "step": 3339 + }, + { + "epoch": 0.06686184720866802, + "grad_norm": 1.0028650760650635, + "learning_rate": 9.964422751723752e-06, + "loss": 0.3291, + "step": 3340 + }, + { + "epoch": 0.06688186572579637, + "grad_norm": 1.050834059715271, + "learning_rate": 9.964384137273065e-06, + "loss": 0.3556, + "step": 3341 + }, + { + "epoch": 0.0669018842429247, + "grad_norm": 1.1545203924179077, + "learning_rate": 9.964345501953173e-06, + "loss": 0.3624, + "step": 3342 + }, + { + "epoch": 0.06692190276005305, + "grad_norm": 1.1062548160552979, + "learning_rate": 9.964306845764243e-06, + "loss": 0.3686, + "step": 3343 + }, + { + "epoch": 0.06694192127718139, + "grad_norm": 1.0991013050079346, + "learning_rate": 9.964268168706432e-06, + "loss": 0.3061, + "step": 3344 + }, + { + "epoch": 0.06696193979430974, + "grad_norm": 1.1827929019927979, + "learning_rate": 9.964229470779909e-06, + "loss": 0.3052, + "step": 3345 + }, + { + "epoch": 0.06698195831143808, + "grad_norm": 1.189076542854309, + "learning_rate": 9.964190751984832e-06, + "loss": 0.3191, + "step": 3346 + }, + { + "epoch": 0.06700197682856643, + "grad_norm": 1.1855696439743042, + "learning_rate": 9.964152012321367e-06, + "loss": 0.3051, + "step": 3347 + }, + { + "epoch": 0.06702199534569477, + "grad_norm": 1.0787440538406372, + "learning_rate": 9.964113251789673e-06, + "loss": 0.3134, + "step": 3348 + }, + { + "epoch": 0.06704201386282312, + "grad_norm": 1.0217846632003784, + "learning_rate": 9.964074470389917e-06, + "loss": 0.3698, + "step": 3349 + }, + { + "epoch": 0.06706203237995145, + "grad_norm": 1.15177321434021, + "learning_rate": 9.964035668122259e-06, + "loss": 0.3339, + "step": 3350 + }, + { + "epoch": 0.0670820508970798, + "grad_norm": 1.2096729278564453, + "learning_rate": 9.963996844986863e-06, + "loss": 0.3058, + "step": 3351 + }, + { + "epoch": 0.06710206941420814, + "grad_norm": 1.1273024082183838, + "learning_rate": 9.963958000983895e-06, + "loss": 0.3452, + "step": 3352 + }, + { + "epoch": 0.06712208793133649, + "grad_norm": 1.0161066055297852, + "learning_rate": 9.963919136113514e-06, + "loss": 0.3033, + "step": 3353 + }, + { + "epoch": 0.06714210644846483, + "grad_norm": 0.908512532711029, + "learning_rate": 9.963880250375885e-06, + "loss": 0.3303, + "step": 3354 + }, + { + "epoch": 0.06716212496559318, + "grad_norm": 1.024250864982605, + "learning_rate": 9.963841343771173e-06, + "loss": 0.3091, + "step": 3355 + }, + { + "epoch": 0.06718214348272152, + "grad_norm": 1.142723798751831, + "learning_rate": 9.963802416299538e-06, + "loss": 0.3341, + "step": 3356 + }, + { + "epoch": 0.06720216199984987, + "grad_norm": 1.0446056127548218, + "learning_rate": 9.963763467961148e-06, + "loss": 0.3259, + "step": 3357 + }, + { + "epoch": 0.0672221805169782, + "grad_norm": 1.2046325206756592, + "learning_rate": 9.963724498756164e-06, + "loss": 0.3107, + "step": 3358 + }, + { + "epoch": 0.06724219903410655, + "grad_norm": 1.1584341526031494, + "learning_rate": 9.963685508684749e-06, + "loss": 0.2949, + "step": 3359 + }, + { + "epoch": 0.06726221755123489, + "grad_norm": 1.1816984415054321, + "learning_rate": 9.96364649774707e-06, + "loss": 0.3161, + "step": 3360 + }, + { + "epoch": 0.06728223606836324, + "grad_norm": 1.9006730318069458, + "learning_rate": 9.963607465943288e-06, + "loss": 0.8941, + "step": 3361 + }, + { + "epoch": 0.06730225458549158, + "grad_norm": 1.1276495456695557, + "learning_rate": 9.96356841327357e-06, + "loss": 0.3397, + "step": 3362 + }, + { + "epoch": 0.06732227310261993, + "grad_norm": 1.1917152404785156, + "learning_rate": 9.963529339738078e-06, + "loss": 0.323, + "step": 3363 + }, + { + "epoch": 0.06734229161974827, + "grad_norm": 1.0560493469238281, + "learning_rate": 9.963490245336976e-06, + "loss": 0.3146, + "step": 3364 + }, + { + "epoch": 0.06736231013687662, + "grad_norm": 1.1062129735946655, + "learning_rate": 9.96345113007043e-06, + "loss": 0.3187, + "step": 3365 + }, + { + "epoch": 0.06738232865400495, + "grad_norm": 0.9889781475067139, + "learning_rate": 9.963411993938603e-06, + "loss": 0.3181, + "step": 3366 + }, + { + "epoch": 0.0674023471711333, + "grad_norm": 2.0376052856445312, + "learning_rate": 9.963372836941661e-06, + "loss": 0.884, + "step": 3367 + }, + { + "epoch": 0.06742236568826164, + "grad_norm": 1.2654449939727783, + "learning_rate": 9.963333659079765e-06, + "loss": 0.3164, + "step": 3368 + }, + { + "epoch": 0.06744238420538999, + "grad_norm": 1.8424842357635498, + "learning_rate": 9.963294460353086e-06, + "loss": 0.8508, + "step": 3369 + }, + { + "epoch": 0.06746240272251833, + "grad_norm": 1.0685938596725464, + "learning_rate": 9.963255240761785e-06, + "loss": 0.3477, + "step": 3370 + }, + { + "epoch": 0.06748242123964668, + "grad_norm": 1.046120524406433, + "learning_rate": 9.963216000306025e-06, + "loss": 0.3087, + "step": 3371 + }, + { + "epoch": 0.06750243975677502, + "grad_norm": 1.0874814987182617, + "learning_rate": 9.963176738985974e-06, + "loss": 0.3135, + "step": 3372 + }, + { + "epoch": 0.06752245827390337, + "grad_norm": 1.7712503671646118, + "learning_rate": 9.963137456801794e-06, + "loss": 0.8787, + "step": 3373 + }, + { + "epoch": 0.0675424767910317, + "grad_norm": 1.6770843267440796, + "learning_rate": 9.963098153753657e-06, + "loss": 0.8045, + "step": 3374 + }, + { + "epoch": 0.06756249530816005, + "grad_norm": 1.7484486103057861, + "learning_rate": 9.963058829841719e-06, + "loss": 0.8868, + "step": 3375 + }, + { + "epoch": 0.06758251382528839, + "grad_norm": 1.0862362384796143, + "learning_rate": 9.963019485066153e-06, + "loss": 0.3261, + "step": 3376 + }, + { + "epoch": 0.06760253234241674, + "grad_norm": 1.1288243532180786, + "learning_rate": 9.962980119427119e-06, + "loss": 0.3307, + "step": 3377 + }, + { + "epoch": 0.06762255085954508, + "grad_norm": 1.153892159461975, + "learning_rate": 9.962940732924784e-06, + "loss": 0.3572, + "step": 3378 + }, + { + "epoch": 0.06764256937667343, + "grad_norm": 1.0012222528457642, + "learning_rate": 9.962901325559316e-06, + "loss": 0.2715, + "step": 3379 + }, + { + "epoch": 0.06766258789380176, + "grad_norm": 0.9979632496833801, + "learning_rate": 9.962861897330877e-06, + "loss": 0.2755, + "step": 3380 + }, + { + "epoch": 0.06768260641093012, + "grad_norm": 1.3203336000442505, + "learning_rate": 9.962822448239636e-06, + "loss": 0.3588, + "step": 3381 + }, + { + "epoch": 0.06770262492805845, + "grad_norm": 1.0695198774337769, + "learning_rate": 9.962782978285757e-06, + "loss": 0.3401, + "step": 3382 + }, + { + "epoch": 0.0677226434451868, + "grad_norm": 1.0858087539672852, + "learning_rate": 9.962743487469406e-06, + "loss": 0.3059, + "step": 3383 + }, + { + "epoch": 0.06774266196231514, + "grad_norm": 1.092000126838684, + "learning_rate": 9.962703975790751e-06, + "loss": 0.2967, + "step": 3384 + }, + { + "epoch": 0.06776268047944349, + "grad_norm": 1.1552363634109497, + "learning_rate": 9.962664443249955e-06, + "loss": 0.33, + "step": 3385 + }, + { + "epoch": 0.06778269899657183, + "grad_norm": 1.0360097885131836, + "learning_rate": 9.962624889847186e-06, + "loss": 0.3209, + "step": 3386 + }, + { + "epoch": 0.06780271751370018, + "grad_norm": 1.1608620882034302, + "learning_rate": 9.96258531558261e-06, + "loss": 0.3143, + "step": 3387 + }, + { + "epoch": 0.06782273603082851, + "grad_norm": 1.9197279214859009, + "learning_rate": 9.962545720456394e-06, + "loss": 0.9103, + "step": 3388 + }, + { + "epoch": 0.06784275454795687, + "grad_norm": 1.093279480934143, + "learning_rate": 9.962506104468703e-06, + "loss": 0.2688, + "step": 3389 + }, + { + "epoch": 0.0678627730650852, + "grad_norm": 1.1171238422393799, + "learning_rate": 9.962466467619704e-06, + "loss": 0.4108, + "step": 3390 + }, + { + "epoch": 0.06788279158221355, + "grad_norm": 1.8744860887527466, + "learning_rate": 9.962426809909564e-06, + "loss": 0.9032, + "step": 3391 + }, + { + "epoch": 0.06790281009934189, + "grad_norm": 1.1561179161071777, + "learning_rate": 9.96238713133845e-06, + "loss": 0.3349, + "step": 3392 + }, + { + "epoch": 0.06792282861647024, + "grad_norm": 1.095119595527649, + "learning_rate": 9.962347431906528e-06, + "loss": 0.3606, + "step": 3393 + }, + { + "epoch": 0.06794284713359858, + "grad_norm": 1.1234813928604126, + "learning_rate": 9.962307711613965e-06, + "loss": 0.3502, + "step": 3394 + }, + { + "epoch": 0.06796286565072693, + "grad_norm": 1.0797977447509766, + "learning_rate": 9.96226797046093e-06, + "loss": 0.3225, + "step": 3395 + }, + { + "epoch": 0.06798288416785526, + "grad_norm": 1.8455955982208252, + "learning_rate": 9.962228208447586e-06, + "loss": 0.8438, + "step": 3396 + }, + { + "epoch": 0.06800290268498362, + "grad_norm": 1.1175036430358887, + "learning_rate": 9.962188425574104e-06, + "loss": 0.3383, + "step": 3397 + }, + { + "epoch": 0.06802292120211195, + "grad_norm": 1.2265604734420776, + "learning_rate": 9.96214862184065e-06, + "loss": 0.3206, + "step": 3398 + }, + { + "epoch": 0.0680429397192403, + "grad_norm": 1.1112983226776123, + "learning_rate": 9.962108797247389e-06, + "loss": 0.331, + "step": 3399 + }, + { + "epoch": 0.06806295823636864, + "grad_norm": 1.2156968116760254, + "learning_rate": 9.962068951794492e-06, + "loss": 0.2854, + "step": 3400 + }, + { + "epoch": 0.06808297675349699, + "grad_norm": 1.1348828077316284, + "learning_rate": 9.962029085482126e-06, + "loss": 0.361, + "step": 3401 + }, + { + "epoch": 0.06810299527062533, + "grad_norm": 1.0058681964874268, + "learning_rate": 9.961989198310456e-06, + "loss": 0.3207, + "step": 3402 + }, + { + "epoch": 0.06812301378775368, + "grad_norm": 1.1177507638931274, + "learning_rate": 9.961949290279652e-06, + "loss": 0.3197, + "step": 3403 + }, + { + "epoch": 0.06814303230488201, + "grad_norm": 1.025559663772583, + "learning_rate": 9.961909361389881e-06, + "loss": 0.3096, + "step": 3404 + }, + { + "epoch": 0.06816305082201037, + "grad_norm": 1.1396435499191284, + "learning_rate": 9.961869411641311e-06, + "loss": 0.3745, + "step": 3405 + }, + { + "epoch": 0.0681830693391387, + "grad_norm": 1.1548805236816406, + "learning_rate": 9.961829441034109e-06, + "loss": 0.3991, + "step": 3406 + }, + { + "epoch": 0.06820308785626705, + "grad_norm": 0.9941993355751038, + "learning_rate": 9.961789449568444e-06, + "loss": 0.3013, + "step": 3407 + }, + { + "epoch": 0.06822310637339539, + "grad_norm": 1.0565557479858398, + "learning_rate": 9.961749437244485e-06, + "loss": 0.3162, + "step": 3408 + }, + { + "epoch": 0.06824312489052374, + "grad_norm": 1.1725131273269653, + "learning_rate": 9.961709404062398e-06, + "loss": 0.3434, + "step": 3409 + }, + { + "epoch": 0.06826314340765208, + "grad_norm": 1.8887081146240234, + "learning_rate": 9.961669350022352e-06, + "loss": 0.881, + "step": 3410 + }, + { + "epoch": 0.06828316192478043, + "grad_norm": 1.1740139722824097, + "learning_rate": 9.961629275124516e-06, + "loss": 0.3614, + "step": 3411 + }, + { + "epoch": 0.06830318044190876, + "grad_norm": 1.9562021493911743, + "learning_rate": 9.96158917936906e-06, + "loss": 0.8502, + "step": 3412 + }, + { + "epoch": 0.06832319895903712, + "grad_norm": 1.0608314275741577, + "learning_rate": 9.96154906275615e-06, + "loss": 0.3638, + "step": 3413 + }, + { + "epoch": 0.06834321747616545, + "grad_norm": 1.1397418975830078, + "learning_rate": 9.961508925285954e-06, + "loss": 0.3527, + "step": 3414 + }, + { + "epoch": 0.0683632359932938, + "grad_norm": 1.0360329151153564, + "learning_rate": 9.961468766958644e-06, + "loss": 0.3621, + "step": 3415 + }, + { + "epoch": 0.06838325451042214, + "grad_norm": 1.0811762809753418, + "learning_rate": 9.961428587774386e-06, + "loss": 0.3495, + "step": 3416 + }, + { + "epoch": 0.06840327302755049, + "grad_norm": 1.283936858177185, + "learning_rate": 9.961388387733351e-06, + "loss": 0.3091, + "step": 3417 + }, + { + "epoch": 0.06842329154467883, + "grad_norm": 1.8696056604385376, + "learning_rate": 9.961348166835706e-06, + "loss": 0.8162, + "step": 3418 + }, + { + "epoch": 0.06844331006180718, + "grad_norm": 1.782128930091858, + "learning_rate": 9.96130792508162e-06, + "loss": 0.8864, + "step": 3419 + }, + { + "epoch": 0.06846332857893551, + "grad_norm": 1.0372041463851929, + "learning_rate": 9.961267662471265e-06, + "loss": 0.3339, + "step": 3420 + }, + { + "epoch": 0.06848334709606387, + "grad_norm": 1.775267481803894, + "learning_rate": 9.961227379004807e-06, + "loss": 0.8705, + "step": 3421 + }, + { + "epoch": 0.0685033656131922, + "grad_norm": 1.0923564434051514, + "learning_rate": 9.961187074682419e-06, + "loss": 0.3221, + "step": 3422 + }, + { + "epoch": 0.06852338413032055, + "grad_norm": 1.2025026082992554, + "learning_rate": 9.961146749504267e-06, + "loss": 0.389, + "step": 3423 + }, + { + "epoch": 0.06854340264744889, + "grad_norm": 1.0589823722839355, + "learning_rate": 9.961106403470522e-06, + "loss": 0.3777, + "step": 3424 + }, + { + "epoch": 0.06856342116457724, + "grad_norm": 1.1723219156265259, + "learning_rate": 9.961066036581353e-06, + "loss": 0.3184, + "step": 3425 + }, + { + "epoch": 0.06858343968170558, + "grad_norm": 0.9340587854385376, + "learning_rate": 9.961025648836928e-06, + "loss": 0.2662, + "step": 3426 + }, + { + "epoch": 0.06860345819883393, + "grad_norm": 1.159299373626709, + "learning_rate": 9.960985240237421e-06, + "loss": 0.4301, + "step": 3427 + }, + { + "epoch": 0.06862347671596226, + "grad_norm": 1.1729450225830078, + "learning_rate": 9.960944810783e-06, + "loss": 0.3836, + "step": 3428 + }, + { + "epoch": 0.06864349523309062, + "grad_norm": 1.0598018169403076, + "learning_rate": 9.960904360473833e-06, + "loss": 0.3556, + "step": 3429 + }, + { + "epoch": 0.06866351375021895, + "grad_norm": 1.1794241666793823, + "learning_rate": 9.960863889310093e-06, + "loss": 0.3374, + "step": 3430 + }, + { + "epoch": 0.0686835322673473, + "grad_norm": 1.059971570968628, + "learning_rate": 9.960823397291948e-06, + "loss": 0.3121, + "step": 3431 + }, + { + "epoch": 0.06870355078447564, + "grad_norm": 1.3798702955245972, + "learning_rate": 9.960782884419569e-06, + "loss": 0.3621, + "step": 3432 + }, + { + "epoch": 0.06872356930160399, + "grad_norm": 1.0847079753875732, + "learning_rate": 9.960742350693129e-06, + "loss": 0.378, + "step": 3433 + }, + { + "epoch": 0.06874358781873233, + "grad_norm": 1.0853790044784546, + "learning_rate": 9.960701796112793e-06, + "loss": 0.3827, + "step": 3434 + }, + { + "epoch": 0.06876360633586068, + "grad_norm": 1.1017963886260986, + "learning_rate": 9.960661220678733e-06, + "loss": 0.3463, + "step": 3435 + }, + { + "epoch": 0.06878362485298901, + "grad_norm": 1.8176664113998413, + "learning_rate": 9.960620624391124e-06, + "loss": 0.8653, + "step": 3436 + }, + { + "epoch": 0.06880364337011736, + "grad_norm": 1.9251198768615723, + "learning_rate": 9.96058000725013e-06, + "loss": 0.9219, + "step": 3437 + }, + { + "epoch": 0.0688236618872457, + "grad_norm": 0.9489299654960632, + "learning_rate": 9.960539369255928e-06, + "loss": 0.3008, + "step": 3438 + }, + { + "epoch": 0.06884368040437405, + "grad_norm": 1.1891435384750366, + "learning_rate": 9.960498710408686e-06, + "loss": 0.3515, + "step": 3439 + }, + { + "epoch": 0.06886369892150239, + "grad_norm": 1.8210101127624512, + "learning_rate": 9.960458030708574e-06, + "loss": 0.8155, + "step": 3440 + }, + { + "epoch": 0.06888371743863074, + "grad_norm": 1.2367397546768188, + "learning_rate": 9.960417330155762e-06, + "loss": 0.3767, + "step": 3441 + }, + { + "epoch": 0.06890373595575908, + "grad_norm": 0.9775677919387817, + "learning_rate": 9.960376608750426e-06, + "loss": 0.3114, + "step": 3442 + }, + { + "epoch": 0.06892375447288743, + "grad_norm": 1.0766409635543823, + "learning_rate": 9.960335866492734e-06, + "loss": 0.3586, + "step": 3443 + }, + { + "epoch": 0.06894377299001576, + "grad_norm": 1.1534315347671509, + "learning_rate": 9.960295103382856e-06, + "loss": 0.3243, + "step": 3444 + }, + { + "epoch": 0.06896379150714411, + "grad_norm": 1.0433926582336426, + "learning_rate": 9.960254319420965e-06, + "loss": 0.33, + "step": 3445 + }, + { + "epoch": 0.06898381002427245, + "grad_norm": 1.1869477033615112, + "learning_rate": 9.960213514607232e-06, + "loss": 0.3044, + "step": 3446 + }, + { + "epoch": 0.0690038285414008, + "grad_norm": 1.0557265281677246, + "learning_rate": 9.960172688941832e-06, + "loss": 0.3241, + "step": 3447 + }, + { + "epoch": 0.06902384705852914, + "grad_norm": 1.021751880645752, + "learning_rate": 9.96013184242493e-06, + "loss": 0.3565, + "step": 3448 + }, + { + "epoch": 0.06904386557565749, + "grad_norm": 1.0822818279266357, + "learning_rate": 9.960090975056703e-06, + "loss": 0.364, + "step": 3449 + }, + { + "epoch": 0.06906388409278583, + "grad_norm": 1.012094259262085, + "learning_rate": 9.96005008683732e-06, + "loss": 0.3139, + "step": 3450 + }, + { + "epoch": 0.06908390260991418, + "grad_norm": 1.0585174560546875, + "learning_rate": 9.960009177766955e-06, + "loss": 0.3105, + "step": 3451 + }, + { + "epoch": 0.06910392112704251, + "grad_norm": 1.0815178155899048, + "learning_rate": 9.959968247845778e-06, + "loss": 0.2844, + "step": 3452 + }, + { + "epoch": 0.06912393964417086, + "grad_norm": 2.020177125930786, + "learning_rate": 9.95992729707396e-06, + "loss": 0.8093, + "step": 3453 + }, + { + "epoch": 0.0691439581612992, + "grad_norm": 1.078015685081482, + "learning_rate": 9.959886325451677e-06, + "loss": 0.3133, + "step": 3454 + }, + { + "epoch": 0.06916397667842755, + "grad_norm": 0.9187019467353821, + "learning_rate": 9.9598453329791e-06, + "loss": 0.3214, + "step": 3455 + }, + { + "epoch": 0.06918399519555589, + "grad_norm": 0.9821882843971252, + "learning_rate": 9.959804319656399e-06, + "loss": 0.3281, + "step": 3456 + }, + { + "epoch": 0.06920401371268424, + "grad_norm": 1.0960639715194702, + "learning_rate": 9.959763285483749e-06, + "loss": 0.3149, + "step": 3457 + }, + { + "epoch": 0.06922403222981258, + "grad_norm": 1.18851900100708, + "learning_rate": 9.959722230461321e-06, + "loss": 0.3311, + "step": 3458 + }, + { + "epoch": 0.06924405074694093, + "grad_norm": 1.0725539922714233, + "learning_rate": 9.95968115458929e-06, + "loss": 0.3295, + "step": 3459 + }, + { + "epoch": 0.06926406926406926, + "grad_norm": 1.1042627096176147, + "learning_rate": 9.959640057867824e-06, + "loss": 0.3402, + "step": 3460 + }, + { + "epoch": 0.06928408778119761, + "grad_norm": 1.1402196884155273, + "learning_rate": 9.959598940297099e-06, + "loss": 0.3361, + "step": 3461 + }, + { + "epoch": 0.06930410629832595, + "grad_norm": 1.8813188076019287, + "learning_rate": 9.959557801877288e-06, + "loss": 0.8285, + "step": 3462 + }, + { + "epoch": 0.06932412481545429, + "grad_norm": 1.969835877418518, + "learning_rate": 9.959516642608563e-06, + "loss": 0.923, + "step": 3463 + }, + { + "epoch": 0.06934414333258264, + "grad_norm": 1.135347604751587, + "learning_rate": 9.959475462491097e-06, + "loss": 0.3184, + "step": 3464 + }, + { + "epoch": 0.06936416184971098, + "grad_norm": 1.1502805948257446, + "learning_rate": 9.959434261525064e-06, + "loss": 0.3015, + "step": 3465 + }, + { + "epoch": 0.06938418036683933, + "grad_norm": 1.4624720811843872, + "learning_rate": 9.959393039710638e-06, + "loss": 0.3447, + "step": 3466 + }, + { + "epoch": 0.06940419888396766, + "grad_norm": 1.0927000045776367, + "learning_rate": 9.959351797047989e-06, + "loss": 0.3694, + "step": 3467 + }, + { + "epoch": 0.06942421740109601, + "grad_norm": 1.2143806219100952, + "learning_rate": 9.959310533537294e-06, + "loss": 0.3113, + "step": 3468 + }, + { + "epoch": 0.06944423591822435, + "grad_norm": 1.2280004024505615, + "learning_rate": 9.959269249178723e-06, + "loss": 0.3877, + "step": 3469 + }, + { + "epoch": 0.0694642544353527, + "grad_norm": 1.1324739456176758, + "learning_rate": 9.959227943972451e-06, + "loss": 0.3798, + "step": 3470 + }, + { + "epoch": 0.06948427295248104, + "grad_norm": 1.269641637802124, + "learning_rate": 9.959186617918654e-06, + "loss": 0.3475, + "step": 3471 + }, + { + "epoch": 0.06950429146960939, + "grad_norm": 1.089984655380249, + "learning_rate": 9.959145271017503e-06, + "loss": 0.3455, + "step": 3472 + }, + { + "epoch": 0.06952430998673773, + "grad_norm": 1.091401219367981, + "learning_rate": 9.959103903269171e-06, + "loss": 0.3369, + "step": 3473 + }, + { + "epoch": 0.06954432850386608, + "grad_norm": 0.9884762763977051, + "learning_rate": 9.959062514673834e-06, + "loss": 0.3074, + "step": 3474 + }, + { + "epoch": 0.06956434702099441, + "grad_norm": 1.0186023712158203, + "learning_rate": 9.959021105231668e-06, + "loss": 0.3585, + "step": 3475 + }, + { + "epoch": 0.06958436553812276, + "grad_norm": 1.4433766603469849, + "learning_rate": 9.958979674942842e-06, + "loss": 0.3463, + "step": 3476 + }, + { + "epoch": 0.0696043840552511, + "grad_norm": 1.1307473182678223, + "learning_rate": 9.958938223807532e-06, + "loss": 0.3474, + "step": 3477 + }, + { + "epoch": 0.06962440257237945, + "grad_norm": 0.982184648513794, + "learning_rate": 9.958896751825915e-06, + "loss": 0.2961, + "step": 3478 + }, + { + "epoch": 0.06964442108950779, + "grad_norm": 1.956289529800415, + "learning_rate": 9.958855258998161e-06, + "loss": 0.8988, + "step": 3479 + }, + { + "epoch": 0.06966443960663614, + "grad_norm": 1.2524787187576294, + "learning_rate": 9.958813745324449e-06, + "loss": 0.3085, + "step": 3480 + }, + { + "epoch": 0.06968445812376448, + "grad_norm": 1.0338693857192993, + "learning_rate": 9.95877221080495e-06, + "loss": 0.3445, + "step": 3481 + }, + { + "epoch": 0.06970447664089283, + "grad_norm": 1.0922595262527466, + "learning_rate": 9.95873065543984e-06, + "loss": 0.3289, + "step": 3482 + }, + { + "epoch": 0.06972449515802116, + "grad_norm": 1.0390692949295044, + "learning_rate": 9.958689079229292e-06, + "loss": 0.3501, + "step": 3483 + }, + { + "epoch": 0.06974451367514951, + "grad_norm": 1.1895378828048706, + "learning_rate": 9.958647482173483e-06, + "loss": 0.3217, + "step": 3484 + }, + { + "epoch": 0.06976453219227785, + "grad_norm": 1.031930923461914, + "learning_rate": 9.958605864272589e-06, + "loss": 0.313, + "step": 3485 + }, + { + "epoch": 0.0697845507094062, + "grad_norm": 1.2143375873565674, + "learning_rate": 9.95856422552678e-06, + "loss": 0.3111, + "step": 3486 + }, + { + "epoch": 0.06980456922653454, + "grad_norm": 1.142746090888977, + "learning_rate": 9.958522565936237e-06, + "loss": 0.3135, + "step": 3487 + }, + { + "epoch": 0.06982458774366289, + "grad_norm": 1.0953084230422974, + "learning_rate": 9.958480885501131e-06, + "loss": 0.351, + "step": 3488 + }, + { + "epoch": 0.06984460626079123, + "grad_norm": 1.1336721181869507, + "learning_rate": 9.958439184221637e-06, + "loss": 0.331, + "step": 3489 + }, + { + "epoch": 0.06986462477791958, + "grad_norm": 0.991931676864624, + "learning_rate": 9.958397462097935e-06, + "loss": 0.2738, + "step": 3490 + }, + { + "epoch": 0.06988464329504791, + "grad_norm": 1.1298378705978394, + "learning_rate": 9.958355719130194e-06, + "loss": 0.3111, + "step": 3491 + }, + { + "epoch": 0.06990466181217626, + "grad_norm": 1.085416316986084, + "learning_rate": 9.958313955318594e-06, + "loss": 0.3097, + "step": 3492 + }, + { + "epoch": 0.0699246803293046, + "grad_norm": 1.2206556797027588, + "learning_rate": 9.95827217066331e-06, + "loss": 0.3777, + "step": 3493 + }, + { + "epoch": 0.06994469884643295, + "grad_norm": 1.3551194667816162, + "learning_rate": 9.958230365164514e-06, + "loss": 0.3694, + "step": 3494 + }, + { + "epoch": 0.06996471736356129, + "grad_norm": 1.2306287288665771, + "learning_rate": 9.958188538822387e-06, + "loss": 0.3329, + "step": 3495 + }, + { + "epoch": 0.06998473588068964, + "grad_norm": 1.839336633682251, + "learning_rate": 9.958146691637102e-06, + "loss": 0.8201, + "step": 3496 + }, + { + "epoch": 0.07000475439781798, + "grad_norm": 1.797885537147522, + "learning_rate": 9.958104823608834e-06, + "loss": 0.8258, + "step": 3497 + }, + { + "epoch": 0.07002477291494633, + "grad_norm": 1.0677052736282349, + "learning_rate": 9.958062934737762e-06, + "loss": 0.3517, + "step": 3498 + }, + { + "epoch": 0.07004479143207466, + "grad_norm": 1.099535346031189, + "learning_rate": 9.958021025024061e-06, + "loss": 0.3267, + "step": 3499 + }, + { + "epoch": 0.07006480994920301, + "grad_norm": 2.1549971103668213, + "learning_rate": 9.957979094467905e-06, + "loss": 0.8179, + "step": 3500 + }, + { + "epoch": 0.07008482846633135, + "grad_norm": 1.1023693084716797, + "learning_rate": 9.957937143069472e-06, + "loss": 0.365, + "step": 3501 + }, + { + "epoch": 0.0701048469834597, + "grad_norm": 0.995390772819519, + "learning_rate": 9.957895170828937e-06, + "loss": 0.3288, + "step": 3502 + }, + { + "epoch": 0.07012486550058804, + "grad_norm": 1.2489906549453735, + "learning_rate": 9.957853177746479e-06, + "loss": 0.327, + "step": 3503 + }, + { + "epoch": 0.07014488401771639, + "grad_norm": 1.1447558403015137, + "learning_rate": 9.957811163822274e-06, + "loss": 0.313, + "step": 3504 + }, + { + "epoch": 0.07016490253484473, + "grad_norm": 1.12969172000885, + "learning_rate": 9.957769129056498e-06, + "loss": 0.377, + "step": 3505 + }, + { + "epoch": 0.07018492105197308, + "grad_norm": 1.044472098350525, + "learning_rate": 9.957727073449325e-06, + "loss": 0.3171, + "step": 3506 + }, + { + "epoch": 0.07020493956910141, + "grad_norm": 1.1648565530776978, + "learning_rate": 9.957684997000938e-06, + "loss": 0.3491, + "step": 3507 + }, + { + "epoch": 0.07022495808622976, + "grad_norm": 2.1389260292053223, + "learning_rate": 9.957642899711508e-06, + "loss": 0.8282, + "step": 3508 + }, + { + "epoch": 0.0702449766033581, + "grad_norm": 1.137825846672058, + "learning_rate": 9.957600781581213e-06, + "loss": 0.3193, + "step": 3509 + }, + { + "epoch": 0.07026499512048645, + "grad_norm": 1.2567644119262695, + "learning_rate": 9.957558642610232e-06, + "loss": 0.3105, + "step": 3510 + }, + { + "epoch": 0.07028501363761479, + "grad_norm": 1.909730076789856, + "learning_rate": 9.957516482798742e-06, + "loss": 0.902, + "step": 3511 + }, + { + "epoch": 0.07030503215474314, + "grad_norm": 1.237120270729065, + "learning_rate": 9.95747430214692e-06, + "loss": 0.3688, + "step": 3512 + }, + { + "epoch": 0.07032505067187148, + "grad_norm": 1.1167432069778442, + "learning_rate": 9.957432100654943e-06, + "loss": 0.313, + "step": 3513 + }, + { + "epoch": 0.07034506918899983, + "grad_norm": 1.1463669538497925, + "learning_rate": 9.957389878322987e-06, + "loss": 0.3367, + "step": 3514 + }, + { + "epoch": 0.07036508770612816, + "grad_norm": 1.0865923166275024, + "learning_rate": 9.95734763515123e-06, + "loss": 0.3378, + "step": 3515 + }, + { + "epoch": 0.07038510622325651, + "grad_norm": 1.1058915853500366, + "learning_rate": 9.957305371139853e-06, + "loss": 0.3347, + "step": 3516 + }, + { + "epoch": 0.07040512474038485, + "grad_norm": 1.11403226852417, + "learning_rate": 9.95726308628903e-06, + "loss": 0.3058, + "step": 3517 + }, + { + "epoch": 0.0704251432575132, + "grad_norm": 1.1536625623703003, + "learning_rate": 9.95722078059894e-06, + "loss": 0.3546, + "step": 3518 + }, + { + "epoch": 0.07044516177464154, + "grad_norm": 1.2028545141220093, + "learning_rate": 9.957178454069758e-06, + "loss": 0.2871, + "step": 3519 + }, + { + "epoch": 0.07046518029176989, + "grad_norm": 1.1127521991729736, + "learning_rate": 9.957136106701669e-06, + "loss": 0.3083, + "step": 3520 + }, + { + "epoch": 0.07048519880889823, + "grad_norm": 1.0606945753097534, + "learning_rate": 9.957093738494842e-06, + "loss": 0.3019, + "step": 3521 + }, + { + "epoch": 0.07050521732602658, + "grad_norm": 1.163633942604065, + "learning_rate": 9.957051349449462e-06, + "loss": 0.3251, + "step": 3522 + }, + { + "epoch": 0.07052523584315491, + "grad_norm": 1.047458291053772, + "learning_rate": 9.957008939565706e-06, + "loss": 0.3239, + "step": 3523 + }, + { + "epoch": 0.07054525436028326, + "grad_norm": 1.3136066198349, + "learning_rate": 9.95696650884375e-06, + "loss": 0.3846, + "step": 3524 + }, + { + "epoch": 0.0705652728774116, + "grad_norm": 2.078026056289673, + "learning_rate": 9.956924057283774e-06, + "loss": 0.8688, + "step": 3525 + }, + { + "epoch": 0.07058529139453995, + "grad_norm": 1.1997312307357788, + "learning_rate": 9.956881584885956e-06, + "loss": 0.3075, + "step": 3526 + }, + { + "epoch": 0.07060530991166829, + "grad_norm": 1.2615983486175537, + "learning_rate": 9.956839091650474e-06, + "loss": 0.3517, + "step": 3527 + }, + { + "epoch": 0.07062532842879664, + "grad_norm": 1.2323122024536133, + "learning_rate": 9.956796577577508e-06, + "loss": 0.2981, + "step": 3528 + }, + { + "epoch": 0.07064534694592497, + "grad_norm": 1.0866742134094238, + "learning_rate": 9.956754042667233e-06, + "loss": 0.3619, + "step": 3529 + }, + { + "epoch": 0.07066536546305333, + "grad_norm": 1.069307565689087, + "learning_rate": 9.956711486919835e-06, + "loss": 0.3199, + "step": 3530 + }, + { + "epoch": 0.07068538398018166, + "grad_norm": 1.2129956483840942, + "learning_rate": 9.956668910335487e-06, + "loss": 0.344, + "step": 3531 + }, + { + "epoch": 0.07070540249731001, + "grad_norm": 1.9119417667388916, + "learning_rate": 9.956626312914369e-06, + "loss": 0.8641, + "step": 3532 + }, + { + "epoch": 0.07072542101443835, + "grad_norm": 1.2169864177703857, + "learning_rate": 9.95658369465666e-06, + "loss": 0.3523, + "step": 3533 + }, + { + "epoch": 0.0707454395315667, + "grad_norm": 1.1241931915283203, + "learning_rate": 9.956541055562542e-06, + "loss": 0.3228, + "step": 3534 + }, + { + "epoch": 0.07076545804869504, + "grad_norm": 1.1420586109161377, + "learning_rate": 9.956498395632191e-06, + "loss": 0.32, + "step": 3535 + }, + { + "epoch": 0.07078547656582339, + "grad_norm": 1.1644810438156128, + "learning_rate": 9.956455714865786e-06, + "loss": 0.2829, + "step": 3536 + }, + { + "epoch": 0.07080549508295172, + "grad_norm": 1.135717749595642, + "learning_rate": 9.956413013263508e-06, + "loss": 0.3301, + "step": 3537 + }, + { + "epoch": 0.07082551360008008, + "grad_norm": 1.2582666873931885, + "learning_rate": 9.956370290825538e-06, + "loss": 0.3092, + "step": 3538 + }, + { + "epoch": 0.07084553211720841, + "grad_norm": 1.0261152982711792, + "learning_rate": 9.956327547552054e-06, + "loss": 0.3492, + "step": 3539 + }, + { + "epoch": 0.07086555063433676, + "grad_norm": 1.1430644989013672, + "learning_rate": 9.956284783443234e-06, + "loss": 0.2984, + "step": 3540 + }, + { + "epoch": 0.0708855691514651, + "grad_norm": 1.2976495027542114, + "learning_rate": 9.95624199849926e-06, + "loss": 0.3501, + "step": 3541 + }, + { + "epoch": 0.07090558766859345, + "grad_norm": 1.0155748128890991, + "learning_rate": 9.956199192720311e-06, + "loss": 0.3254, + "step": 3542 + }, + { + "epoch": 0.07092560618572179, + "grad_norm": 1.2294831275939941, + "learning_rate": 9.956156366106569e-06, + "loss": 0.3597, + "step": 3543 + }, + { + "epoch": 0.07094562470285014, + "grad_norm": 1.1705795526504517, + "learning_rate": 9.956113518658211e-06, + "loss": 0.3356, + "step": 3544 + }, + { + "epoch": 0.07096564321997847, + "grad_norm": 1.0536468029022217, + "learning_rate": 9.956070650375418e-06, + "loss": 0.3122, + "step": 3545 + }, + { + "epoch": 0.07098566173710683, + "grad_norm": 1.2799030542373657, + "learning_rate": 9.95602776125837e-06, + "loss": 0.3241, + "step": 3546 + }, + { + "epoch": 0.07100568025423516, + "grad_norm": 1.1389650106430054, + "learning_rate": 9.955984851307249e-06, + "loss": 0.3411, + "step": 3547 + }, + { + "epoch": 0.07102569877136351, + "grad_norm": 1.776821494102478, + "learning_rate": 9.955941920522233e-06, + "loss": 0.3254, + "step": 3548 + }, + { + "epoch": 0.07104571728849185, + "grad_norm": 1.1424671411514282, + "learning_rate": 9.955898968903506e-06, + "loss": 0.3583, + "step": 3549 + }, + { + "epoch": 0.0710657358056202, + "grad_norm": 1.0576704740524292, + "learning_rate": 9.955855996451244e-06, + "loss": 0.3052, + "step": 3550 + }, + { + "epoch": 0.07108575432274854, + "grad_norm": 1.1430370807647705, + "learning_rate": 9.95581300316563e-06, + "loss": 0.3122, + "step": 3551 + }, + { + "epoch": 0.07110577283987689, + "grad_norm": 1.066888451576233, + "learning_rate": 9.955769989046846e-06, + "loss": 0.3298, + "step": 3552 + }, + { + "epoch": 0.07112579135700522, + "grad_norm": 1.4912919998168945, + "learning_rate": 9.95572695409507e-06, + "loss": 0.3691, + "step": 3553 + }, + { + "epoch": 0.07114580987413358, + "grad_norm": 1.099988579750061, + "learning_rate": 9.955683898310486e-06, + "loss": 0.3417, + "step": 3554 + }, + { + "epoch": 0.07116582839126191, + "grad_norm": 1.025530457496643, + "learning_rate": 9.955640821693272e-06, + "loss": 0.3456, + "step": 3555 + }, + { + "epoch": 0.07118584690839026, + "grad_norm": 1.1233402490615845, + "learning_rate": 9.95559772424361e-06, + "loss": 0.3478, + "step": 3556 + }, + { + "epoch": 0.0712058654255186, + "grad_norm": 1.079017162322998, + "learning_rate": 9.955554605961684e-06, + "loss": 0.3104, + "step": 3557 + }, + { + "epoch": 0.07122588394264695, + "grad_norm": 1.2606801986694336, + "learning_rate": 9.95551146684767e-06, + "loss": 0.3443, + "step": 3558 + }, + { + "epoch": 0.07124590245977529, + "grad_norm": 1.2576348781585693, + "learning_rate": 9.955468306901754e-06, + "loss": 0.3185, + "step": 3559 + }, + { + "epoch": 0.07126592097690364, + "grad_norm": 1.0555061101913452, + "learning_rate": 9.955425126124117e-06, + "loss": 0.2969, + "step": 3560 + }, + { + "epoch": 0.07128593949403197, + "grad_norm": 1.0179532766342163, + "learning_rate": 9.955381924514936e-06, + "loss": 0.3272, + "step": 3561 + }, + { + "epoch": 0.07130595801116033, + "grad_norm": 1.1265766620635986, + "learning_rate": 9.955338702074397e-06, + "loss": 0.3018, + "step": 3562 + }, + { + "epoch": 0.07132597652828866, + "grad_norm": 1.0007654428482056, + "learning_rate": 9.955295458802681e-06, + "loss": 0.2944, + "step": 3563 + }, + { + "epoch": 0.07134599504541701, + "grad_norm": 1.3176681995391846, + "learning_rate": 9.955252194699969e-06, + "loss": 0.3565, + "step": 3564 + }, + { + "epoch": 0.07136601356254535, + "grad_norm": 1.0388821363449097, + "learning_rate": 9.955208909766443e-06, + "loss": 0.3036, + "step": 3565 + }, + { + "epoch": 0.0713860320796737, + "grad_norm": 1.1255333423614502, + "learning_rate": 9.955165604002286e-06, + "loss": 0.3187, + "step": 3566 + }, + { + "epoch": 0.07140605059680204, + "grad_norm": 1.286716103553772, + "learning_rate": 9.955122277407679e-06, + "loss": 0.3688, + "step": 3567 + }, + { + "epoch": 0.07142606911393039, + "grad_norm": 1.042026400566101, + "learning_rate": 9.955078929982803e-06, + "loss": 0.3217, + "step": 3568 + }, + { + "epoch": 0.07144608763105872, + "grad_norm": 1.0332869291305542, + "learning_rate": 9.955035561727842e-06, + "loss": 0.3122, + "step": 3569 + }, + { + "epoch": 0.07146610614818708, + "grad_norm": 1.2269071340560913, + "learning_rate": 9.954992172642979e-06, + "loss": 0.3725, + "step": 3570 + }, + { + "epoch": 0.07148612466531541, + "grad_norm": 1.1874827146530151, + "learning_rate": 9.954948762728393e-06, + "loss": 0.4109, + "step": 3571 + }, + { + "epoch": 0.07150614318244376, + "grad_norm": 1.1358457803726196, + "learning_rate": 9.954905331984271e-06, + "loss": 0.2924, + "step": 3572 + }, + { + "epoch": 0.0715261616995721, + "grad_norm": 2.198942184448242, + "learning_rate": 9.954861880410794e-06, + "loss": 0.8487, + "step": 3573 + }, + { + "epoch": 0.07154618021670045, + "grad_norm": 1.1254088878631592, + "learning_rate": 9.954818408008142e-06, + "loss": 0.3582, + "step": 3574 + }, + { + "epoch": 0.07156619873382879, + "grad_norm": 0.9854891300201416, + "learning_rate": 9.9547749147765e-06, + "loss": 0.3248, + "step": 3575 + }, + { + "epoch": 0.07158621725095714, + "grad_norm": 1.1405030488967896, + "learning_rate": 9.954731400716049e-06, + "loss": 0.3145, + "step": 3576 + }, + { + "epoch": 0.07160623576808547, + "grad_norm": 1.176939845085144, + "learning_rate": 9.954687865826974e-06, + "loss": 0.3638, + "step": 3577 + }, + { + "epoch": 0.07162625428521383, + "grad_norm": 2.0089452266693115, + "learning_rate": 9.95464431010946e-06, + "loss": 0.8899, + "step": 3578 + }, + { + "epoch": 0.07164627280234216, + "grad_norm": 1.82561194896698, + "learning_rate": 9.954600733563686e-06, + "loss": 0.8486, + "step": 3579 + }, + { + "epoch": 0.07166629131947051, + "grad_norm": 1.2340413331985474, + "learning_rate": 9.954557136189837e-06, + "loss": 0.3395, + "step": 3580 + }, + { + "epoch": 0.07168630983659885, + "grad_norm": 1.1066492795944214, + "learning_rate": 9.954513517988096e-06, + "loss": 0.2967, + "step": 3581 + }, + { + "epoch": 0.0717063283537272, + "grad_norm": 1.1308255195617676, + "learning_rate": 9.954469878958645e-06, + "loss": 0.317, + "step": 3582 + }, + { + "epoch": 0.07172634687085554, + "grad_norm": 1.2436579465866089, + "learning_rate": 9.954426219101672e-06, + "loss": 0.3318, + "step": 3583 + }, + { + "epoch": 0.07174636538798389, + "grad_norm": 1.0660312175750732, + "learning_rate": 9.954382538417355e-06, + "loss": 0.3369, + "step": 3584 + }, + { + "epoch": 0.07176638390511222, + "grad_norm": 1.117604374885559, + "learning_rate": 9.954338836905882e-06, + "loss": 0.3642, + "step": 3585 + }, + { + "epoch": 0.07178640242224057, + "grad_norm": 1.1680235862731934, + "learning_rate": 9.954295114567432e-06, + "loss": 0.3759, + "step": 3586 + }, + { + "epoch": 0.07180642093936891, + "grad_norm": 1.0925761461257935, + "learning_rate": 9.954251371402192e-06, + "loss": 0.352, + "step": 3587 + }, + { + "epoch": 0.07182643945649726, + "grad_norm": 1.1300158500671387, + "learning_rate": 9.954207607410346e-06, + "loss": 0.3341, + "step": 3588 + }, + { + "epoch": 0.0718464579736256, + "grad_norm": 1.2360974550247192, + "learning_rate": 9.954163822592078e-06, + "loss": 0.3436, + "step": 3589 + }, + { + "epoch": 0.07186647649075395, + "grad_norm": 1.1736817359924316, + "learning_rate": 9.95412001694757e-06, + "loss": 0.315, + "step": 3590 + }, + { + "epoch": 0.07188649500788229, + "grad_norm": 1.0249799489974976, + "learning_rate": 9.95407619047701e-06, + "loss": 0.3622, + "step": 3591 + }, + { + "epoch": 0.07190651352501064, + "grad_norm": 1.2328526973724365, + "learning_rate": 9.954032343180578e-06, + "loss": 0.3651, + "step": 3592 + }, + { + "epoch": 0.07192653204213897, + "grad_norm": 1.0645921230316162, + "learning_rate": 9.953988475058461e-06, + "loss": 0.3652, + "step": 3593 + }, + { + "epoch": 0.07194655055926732, + "grad_norm": 1.1036951541900635, + "learning_rate": 9.953944586110841e-06, + "loss": 0.3007, + "step": 3594 + }, + { + "epoch": 0.07196656907639566, + "grad_norm": 1.093099594116211, + "learning_rate": 9.953900676337907e-06, + "loss": 0.3758, + "step": 3595 + }, + { + "epoch": 0.07198658759352401, + "grad_norm": 1.0573543310165405, + "learning_rate": 9.953856745739839e-06, + "loss": 0.3065, + "step": 3596 + }, + { + "epoch": 0.07200660611065235, + "grad_norm": 1.9358431100845337, + "learning_rate": 9.953812794316822e-06, + "loss": 0.8203, + "step": 3597 + }, + { + "epoch": 0.0720266246277807, + "grad_norm": 1.052194595336914, + "learning_rate": 9.953768822069043e-06, + "loss": 0.3084, + "step": 3598 + }, + { + "epoch": 0.07204664314490904, + "grad_norm": 0.962150514125824, + "learning_rate": 9.953724828996687e-06, + "loss": 0.3131, + "step": 3599 + }, + { + "epoch": 0.07206666166203739, + "grad_norm": 1.3936660289764404, + "learning_rate": 9.953680815099935e-06, + "loss": 0.369, + "step": 3600 + }, + { + "epoch": 0.07208668017916572, + "grad_norm": 1.114108681678772, + "learning_rate": 9.953636780378979e-06, + "loss": 0.3317, + "step": 3601 + }, + { + "epoch": 0.07210669869629407, + "grad_norm": 1.1517930030822754, + "learning_rate": 9.953592724833996e-06, + "loss": 0.3345, + "step": 3602 + }, + { + "epoch": 0.07212671721342241, + "grad_norm": 1.1273679733276367, + "learning_rate": 9.953548648465179e-06, + "loss": 0.3905, + "step": 3603 + }, + { + "epoch": 0.07214673573055076, + "grad_norm": 1.2142757177352905, + "learning_rate": 9.953504551272706e-06, + "loss": 0.3073, + "step": 3604 + }, + { + "epoch": 0.0721667542476791, + "grad_norm": 1.028929352760315, + "learning_rate": 9.953460433256767e-06, + "loss": 0.2913, + "step": 3605 + }, + { + "epoch": 0.07218677276480745, + "grad_norm": 1.1184883117675781, + "learning_rate": 9.953416294417549e-06, + "loss": 0.3405, + "step": 3606 + }, + { + "epoch": 0.07220679128193579, + "grad_norm": 1.1577190160751343, + "learning_rate": 9.953372134755233e-06, + "loss": 0.3228, + "step": 3607 + }, + { + "epoch": 0.07222680979906414, + "grad_norm": 1.0793678760528564, + "learning_rate": 9.953327954270004e-06, + "loss": 0.3066, + "step": 3608 + }, + { + "epoch": 0.07224682831619247, + "grad_norm": 1.0609214305877686, + "learning_rate": 9.953283752962053e-06, + "loss": 0.3116, + "step": 3609 + }, + { + "epoch": 0.07226684683332082, + "grad_norm": 1.0520975589752197, + "learning_rate": 9.953239530831563e-06, + "loss": 0.3159, + "step": 3610 + }, + { + "epoch": 0.07228686535044916, + "grad_norm": 1.1083052158355713, + "learning_rate": 9.953195287878719e-06, + "loss": 0.3217, + "step": 3611 + }, + { + "epoch": 0.07230688386757751, + "grad_norm": 1.0358437299728394, + "learning_rate": 9.953151024103709e-06, + "loss": 0.2979, + "step": 3612 + }, + { + "epoch": 0.07232690238470585, + "grad_norm": 1.087172269821167, + "learning_rate": 9.953106739506717e-06, + "loss": 0.3235, + "step": 3613 + }, + { + "epoch": 0.0723469209018342, + "grad_norm": 0.9897400736808777, + "learning_rate": 9.953062434087932e-06, + "loss": 0.2815, + "step": 3614 + }, + { + "epoch": 0.07236693941896254, + "grad_norm": 1.1259161233901978, + "learning_rate": 9.953018107847537e-06, + "loss": 0.3563, + "step": 3615 + }, + { + "epoch": 0.07238695793609089, + "grad_norm": 1.0956171751022339, + "learning_rate": 9.952973760785719e-06, + "loss": 0.3563, + "step": 3616 + }, + { + "epoch": 0.07240697645321922, + "grad_norm": 1.0996458530426025, + "learning_rate": 9.952929392902667e-06, + "loss": 0.3519, + "step": 3617 + }, + { + "epoch": 0.07242699497034757, + "grad_norm": 0.975287139415741, + "learning_rate": 9.952885004198564e-06, + "loss": 0.3072, + "step": 3618 + }, + { + "epoch": 0.07244701348747591, + "grad_norm": 1.2046988010406494, + "learning_rate": 9.952840594673599e-06, + "loss": 0.3533, + "step": 3619 + }, + { + "epoch": 0.07246703200460426, + "grad_norm": 1.1997807025909424, + "learning_rate": 9.952796164327957e-06, + "loss": 0.3743, + "step": 3620 + }, + { + "epoch": 0.0724870505217326, + "grad_norm": 1.9016391038894653, + "learning_rate": 9.952751713161828e-06, + "loss": 0.8067, + "step": 3621 + }, + { + "epoch": 0.07250706903886095, + "grad_norm": 1.9525127410888672, + "learning_rate": 9.952707241175396e-06, + "loss": 0.8836, + "step": 3622 + }, + { + "epoch": 0.07252708755598929, + "grad_norm": 0.9929125308990479, + "learning_rate": 9.952662748368847e-06, + "loss": 0.2853, + "step": 3623 + }, + { + "epoch": 0.07254710607311764, + "grad_norm": 0.9844908118247986, + "learning_rate": 9.952618234742372e-06, + "loss": 0.303, + "step": 3624 + }, + { + "epoch": 0.07256712459024597, + "grad_norm": 1.2455158233642578, + "learning_rate": 9.952573700296154e-06, + "loss": 0.3431, + "step": 3625 + }, + { + "epoch": 0.07258714310737432, + "grad_norm": 1.0675244331359863, + "learning_rate": 9.952529145030383e-06, + "loss": 0.289, + "step": 3626 + }, + { + "epoch": 0.07260716162450266, + "grad_norm": 1.1559057235717773, + "learning_rate": 9.952484568945244e-06, + "loss": 0.3647, + "step": 3627 + }, + { + "epoch": 0.07262718014163101, + "grad_norm": 1.2844491004943848, + "learning_rate": 9.952439972040927e-06, + "loss": 0.3452, + "step": 3628 + }, + { + "epoch": 0.07264719865875935, + "grad_norm": 1.2009692192077637, + "learning_rate": 9.952395354317617e-06, + "loss": 0.3781, + "step": 3629 + }, + { + "epoch": 0.0726672171758877, + "grad_norm": 1.0841243267059326, + "learning_rate": 9.952350715775503e-06, + "loss": 0.3917, + "step": 3630 + }, + { + "epoch": 0.07268723569301604, + "grad_norm": 1.1056485176086426, + "learning_rate": 9.952306056414774e-06, + "loss": 0.3207, + "step": 3631 + }, + { + "epoch": 0.07270725421014439, + "grad_norm": 1.228663444519043, + "learning_rate": 9.952261376235614e-06, + "loss": 0.3319, + "step": 3632 + }, + { + "epoch": 0.07272727272727272, + "grad_norm": 1.0650193691253662, + "learning_rate": 9.952216675238213e-06, + "loss": 0.3178, + "step": 3633 + }, + { + "epoch": 0.07274729124440107, + "grad_norm": 1.0919585227966309, + "learning_rate": 9.95217195342276e-06, + "loss": 0.3289, + "step": 3634 + }, + { + "epoch": 0.07276730976152941, + "grad_norm": 1.1371562480926514, + "learning_rate": 9.952127210789443e-06, + "loss": 0.3453, + "step": 3635 + }, + { + "epoch": 0.07278732827865776, + "grad_norm": 1.103724718093872, + "learning_rate": 9.952082447338445e-06, + "loss": 0.3279, + "step": 3636 + }, + { + "epoch": 0.0728073467957861, + "grad_norm": 1.191855788230896, + "learning_rate": 9.95203766306996e-06, + "loss": 0.3705, + "step": 3637 + }, + { + "epoch": 0.07282736531291445, + "grad_norm": 1.021930456161499, + "learning_rate": 9.951992857984176e-06, + "loss": 0.2685, + "step": 3638 + }, + { + "epoch": 0.07284738383004279, + "grad_norm": 1.1111531257629395, + "learning_rate": 9.951948032081278e-06, + "loss": 0.3332, + "step": 3639 + }, + { + "epoch": 0.07286740234717114, + "grad_norm": 0.9889205694198608, + "learning_rate": 9.951903185361455e-06, + "loss": 0.3134, + "step": 3640 + }, + { + "epoch": 0.07288742086429947, + "grad_norm": 1.0385966300964355, + "learning_rate": 9.951858317824899e-06, + "loss": 0.3464, + "step": 3641 + }, + { + "epoch": 0.07290743938142782, + "grad_norm": 1.2104254961013794, + "learning_rate": 9.951813429471795e-06, + "loss": 0.3079, + "step": 3642 + }, + { + "epoch": 0.07292745789855616, + "grad_norm": 1.0809649229049683, + "learning_rate": 9.951768520302333e-06, + "loss": 0.3567, + "step": 3643 + }, + { + "epoch": 0.07294747641568451, + "grad_norm": 1.0696309804916382, + "learning_rate": 9.951723590316703e-06, + "loss": 0.335, + "step": 3644 + }, + { + "epoch": 0.07296749493281285, + "grad_norm": 1.2961785793304443, + "learning_rate": 9.951678639515093e-06, + "loss": 0.3189, + "step": 3645 + }, + { + "epoch": 0.0729875134499412, + "grad_norm": 1.2899774312973022, + "learning_rate": 9.951633667897688e-06, + "loss": 0.3709, + "step": 3646 + }, + { + "epoch": 0.07300753196706954, + "grad_norm": 1.0896679162979126, + "learning_rate": 9.951588675464686e-06, + "loss": 0.3266, + "step": 3647 + }, + { + "epoch": 0.07302755048419789, + "grad_norm": 1.1585667133331299, + "learning_rate": 9.951543662216266e-06, + "loss": 0.3416, + "step": 3648 + }, + { + "epoch": 0.07304756900132622, + "grad_norm": 1.0507780313491821, + "learning_rate": 9.951498628152624e-06, + "loss": 0.3224, + "step": 3649 + }, + { + "epoch": 0.07306758751845457, + "grad_norm": 1.335923671722412, + "learning_rate": 9.951453573273948e-06, + "loss": 0.3618, + "step": 3650 + }, + { + "epoch": 0.07308760603558291, + "grad_norm": 1.798439860343933, + "learning_rate": 9.951408497580426e-06, + "loss": 0.8343, + "step": 3651 + }, + { + "epoch": 0.07310762455271126, + "grad_norm": 1.2486222982406616, + "learning_rate": 9.951363401072247e-06, + "loss": 0.3451, + "step": 3652 + }, + { + "epoch": 0.0731276430698396, + "grad_norm": 1.1936792135238647, + "learning_rate": 9.951318283749603e-06, + "loss": 0.3115, + "step": 3653 + }, + { + "epoch": 0.07314766158696795, + "grad_norm": 1.0362664461135864, + "learning_rate": 9.951273145612684e-06, + "loss": 0.3469, + "step": 3654 + }, + { + "epoch": 0.07316768010409629, + "grad_norm": 1.887290596961975, + "learning_rate": 9.951227986661676e-06, + "loss": 0.7509, + "step": 3655 + }, + { + "epoch": 0.07318769862122464, + "grad_norm": 1.1362814903259277, + "learning_rate": 9.951182806896772e-06, + "loss": 0.348, + "step": 3656 + }, + { + "epoch": 0.07320771713835297, + "grad_norm": 1.0679346323013306, + "learning_rate": 9.95113760631816e-06, + "loss": 0.3261, + "step": 3657 + }, + { + "epoch": 0.07322773565548132, + "grad_norm": 1.1277480125427246, + "learning_rate": 9.951092384926034e-06, + "loss": 0.2978, + "step": 3658 + }, + { + "epoch": 0.07324775417260966, + "grad_norm": 1.0139156579971313, + "learning_rate": 9.951047142720578e-06, + "loss": 0.3164, + "step": 3659 + }, + { + "epoch": 0.07326777268973801, + "grad_norm": 1.123599886894226, + "learning_rate": 9.951001879701986e-06, + "loss": 0.3938, + "step": 3660 + }, + { + "epoch": 0.07328779120686635, + "grad_norm": 1.1993396282196045, + "learning_rate": 9.950956595870446e-06, + "loss": 0.3323, + "step": 3661 + }, + { + "epoch": 0.0733078097239947, + "grad_norm": 1.0368472337722778, + "learning_rate": 9.950911291226153e-06, + "loss": 0.2924, + "step": 3662 + }, + { + "epoch": 0.07332782824112304, + "grad_norm": 1.1633145809173584, + "learning_rate": 9.950865965769293e-06, + "loss": 0.3591, + "step": 3663 + }, + { + "epoch": 0.07334784675825139, + "grad_norm": 1.18228280544281, + "learning_rate": 9.950820619500058e-06, + "loss": 0.3601, + "step": 3664 + }, + { + "epoch": 0.07336786527537972, + "grad_norm": 1.1662870645523071, + "learning_rate": 9.950775252418639e-06, + "loss": 0.3341, + "step": 3665 + }, + { + "epoch": 0.07338788379250807, + "grad_norm": 1.8354909420013428, + "learning_rate": 9.950729864525227e-06, + "loss": 0.8449, + "step": 3666 + }, + { + "epoch": 0.07340790230963641, + "grad_norm": 0.9856070280075073, + "learning_rate": 9.95068445582001e-06, + "loss": 0.3229, + "step": 3667 + }, + { + "epoch": 0.07342792082676476, + "grad_norm": 1.0637444257736206, + "learning_rate": 9.950639026303181e-06, + "loss": 0.2788, + "step": 3668 + }, + { + "epoch": 0.0734479393438931, + "grad_norm": 1.2075693607330322, + "learning_rate": 9.950593575974932e-06, + "loss": 0.3353, + "step": 3669 + }, + { + "epoch": 0.07346795786102145, + "grad_norm": 1.169466257095337, + "learning_rate": 9.950548104835452e-06, + "loss": 0.3336, + "step": 3670 + }, + { + "epoch": 0.07348797637814979, + "grad_norm": 1.0804457664489746, + "learning_rate": 9.950502612884934e-06, + "loss": 0.3804, + "step": 3671 + }, + { + "epoch": 0.07350799489527814, + "grad_norm": 1.0780271291732788, + "learning_rate": 9.950457100123568e-06, + "loss": 0.349, + "step": 3672 + }, + { + "epoch": 0.07352801341240647, + "grad_norm": 1.0388237237930298, + "learning_rate": 9.950411566551545e-06, + "loss": 0.3373, + "step": 3673 + }, + { + "epoch": 0.07354803192953482, + "grad_norm": 1.7049542665481567, + "learning_rate": 9.950366012169059e-06, + "loss": 0.3048, + "step": 3674 + }, + { + "epoch": 0.07356805044666316, + "grad_norm": 1.2129600048065186, + "learning_rate": 9.9503204369763e-06, + "loss": 0.2968, + "step": 3675 + }, + { + "epoch": 0.07358806896379151, + "grad_norm": 1.0270106792449951, + "learning_rate": 9.950274840973457e-06, + "loss": 0.3378, + "step": 3676 + }, + { + "epoch": 0.07360808748091985, + "grad_norm": 1.2627781629562378, + "learning_rate": 9.950229224160725e-06, + "loss": 0.3082, + "step": 3677 + }, + { + "epoch": 0.0736281059980482, + "grad_norm": 1.0000196695327759, + "learning_rate": 9.950183586538293e-06, + "loss": 0.3394, + "step": 3678 + }, + { + "epoch": 0.07364812451517654, + "grad_norm": 1.1884526014328003, + "learning_rate": 9.950137928106356e-06, + "loss": 0.3271, + "step": 3679 + }, + { + "epoch": 0.07366814303230489, + "grad_norm": 1.1322723627090454, + "learning_rate": 9.950092248865105e-06, + "loss": 0.3581, + "step": 3680 + }, + { + "epoch": 0.07368816154943322, + "grad_norm": 1.1299158334732056, + "learning_rate": 9.95004654881473e-06, + "loss": 0.3565, + "step": 3681 + }, + { + "epoch": 0.07370818006656157, + "grad_norm": 1.2694621086120605, + "learning_rate": 9.950000827955425e-06, + "loss": 0.3527, + "step": 3682 + }, + { + "epoch": 0.07372819858368991, + "grad_norm": 1.1538890600204468, + "learning_rate": 9.949955086287383e-06, + "loss": 0.336, + "step": 3683 + }, + { + "epoch": 0.07374821710081826, + "grad_norm": 1.0234498977661133, + "learning_rate": 9.949909323810793e-06, + "loss": 0.2918, + "step": 3684 + }, + { + "epoch": 0.0737682356179466, + "grad_norm": 1.1237927675247192, + "learning_rate": 9.949863540525852e-06, + "loss": 0.3816, + "step": 3685 + }, + { + "epoch": 0.07378825413507495, + "grad_norm": 1.059909701347351, + "learning_rate": 9.949817736432747e-06, + "loss": 0.3474, + "step": 3686 + }, + { + "epoch": 0.07380827265220329, + "grad_norm": 1.0402708053588867, + "learning_rate": 9.949771911531675e-06, + "loss": 0.3583, + "step": 3687 + }, + { + "epoch": 0.07382829116933164, + "grad_norm": 1.1951931715011597, + "learning_rate": 9.949726065822829e-06, + "loss": 0.3191, + "step": 3688 + }, + { + "epoch": 0.07384830968645997, + "grad_norm": 1.2192699909210205, + "learning_rate": 9.949680199306398e-06, + "loss": 0.3132, + "step": 3689 + }, + { + "epoch": 0.07386832820358832, + "grad_norm": 1.851244330406189, + "learning_rate": 9.949634311982575e-06, + "loss": 0.8346, + "step": 3690 + }, + { + "epoch": 0.07388834672071666, + "grad_norm": 1.9963014125823975, + "learning_rate": 9.949588403851556e-06, + "loss": 0.836, + "step": 3691 + }, + { + "epoch": 0.07390836523784501, + "grad_norm": 1.1488771438598633, + "learning_rate": 9.949542474913533e-06, + "loss": 0.2802, + "step": 3692 + }, + { + "epoch": 0.07392838375497335, + "grad_norm": 1.2408676147460938, + "learning_rate": 9.949496525168699e-06, + "loss": 0.2896, + "step": 3693 + }, + { + "epoch": 0.0739484022721017, + "grad_norm": 1.2402442693710327, + "learning_rate": 9.949450554617245e-06, + "loss": 0.3466, + "step": 3694 + }, + { + "epoch": 0.07396842078923004, + "grad_norm": 1.2152924537658691, + "learning_rate": 9.949404563259365e-06, + "loss": 0.3182, + "step": 3695 + }, + { + "epoch": 0.07398843930635839, + "grad_norm": 1.2233531475067139, + "learning_rate": 9.949358551095257e-06, + "loss": 0.2878, + "step": 3696 + }, + { + "epoch": 0.07400845782348672, + "grad_norm": 1.1159881353378296, + "learning_rate": 9.94931251812511e-06, + "loss": 0.3765, + "step": 3697 + }, + { + "epoch": 0.07402847634061507, + "grad_norm": 1.0880985260009766, + "learning_rate": 9.949266464349117e-06, + "loss": 0.3262, + "step": 3698 + }, + { + "epoch": 0.07404849485774341, + "grad_norm": 1.1509181261062622, + "learning_rate": 9.949220389767472e-06, + "loss": 0.3841, + "step": 3699 + }, + { + "epoch": 0.07406851337487176, + "grad_norm": 1.0881990194320679, + "learning_rate": 9.94917429438037e-06, + "loss": 0.3199, + "step": 3700 + }, + { + "epoch": 0.0740885318920001, + "grad_norm": 0.9993201494216919, + "learning_rate": 9.949128178188006e-06, + "loss": 0.2766, + "step": 3701 + }, + { + "epoch": 0.07410855040912845, + "grad_norm": 1.0477569103240967, + "learning_rate": 9.94908204119057e-06, + "loss": 0.3426, + "step": 3702 + }, + { + "epoch": 0.07412856892625679, + "grad_norm": 1.0799566507339478, + "learning_rate": 9.94903588338826e-06, + "loss": 0.3378, + "step": 3703 + }, + { + "epoch": 0.07414858744338514, + "grad_norm": 1.1102019548416138, + "learning_rate": 9.948989704781267e-06, + "loss": 0.3563, + "step": 3704 + }, + { + "epoch": 0.07416860596051347, + "grad_norm": 1.0936744213104248, + "learning_rate": 9.948943505369789e-06, + "loss": 0.332, + "step": 3705 + }, + { + "epoch": 0.07418862447764182, + "grad_norm": 1.0312613248825073, + "learning_rate": 9.948897285154013e-06, + "loss": 0.3052, + "step": 3706 + }, + { + "epoch": 0.07420864299477016, + "grad_norm": 1.0955712795257568, + "learning_rate": 9.948851044134142e-06, + "loss": 0.307, + "step": 3707 + }, + { + "epoch": 0.07422866151189851, + "grad_norm": 0.9891958236694336, + "learning_rate": 9.948804782310363e-06, + "loss": 0.3194, + "step": 3708 + }, + { + "epoch": 0.07424868002902685, + "grad_norm": 1.147829532623291, + "learning_rate": 9.948758499682876e-06, + "loss": 0.3085, + "step": 3709 + }, + { + "epoch": 0.0742686985461552, + "grad_norm": 1.162734031677246, + "learning_rate": 9.948712196251872e-06, + "loss": 0.3405, + "step": 3710 + }, + { + "epoch": 0.07428871706328354, + "grad_norm": 1.0063444375991821, + "learning_rate": 9.948665872017547e-06, + "loss": 0.3451, + "step": 3711 + }, + { + "epoch": 0.07430873558041189, + "grad_norm": 1.0683777332305908, + "learning_rate": 9.948619526980097e-06, + "loss": 0.3588, + "step": 3712 + }, + { + "epoch": 0.07432875409754022, + "grad_norm": 1.0922502279281616, + "learning_rate": 9.948573161139713e-06, + "loss": 0.3287, + "step": 3713 + }, + { + "epoch": 0.07434877261466857, + "grad_norm": 1.1914483308792114, + "learning_rate": 9.948526774496594e-06, + "loss": 0.4107, + "step": 3714 + }, + { + "epoch": 0.07436879113179691, + "grad_norm": 1.2166935205459595, + "learning_rate": 9.948480367050933e-06, + "loss": 0.3921, + "step": 3715 + }, + { + "epoch": 0.07438880964892526, + "grad_norm": 1.2110329866409302, + "learning_rate": 9.948433938802926e-06, + "loss": 0.3573, + "step": 3716 + }, + { + "epoch": 0.0744088281660536, + "grad_norm": 1.0940743684768677, + "learning_rate": 9.948387489752767e-06, + "loss": 0.3584, + "step": 3717 + }, + { + "epoch": 0.07442884668318195, + "grad_norm": 1.0224676132202148, + "learning_rate": 9.948341019900653e-06, + "loss": 0.3187, + "step": 3718 + }, + { + "epoch": 0.07444886520031029, + "grad_norm": 0.9443845152854919, + "learning_rate": 9.948294529246776e-06, + "loss": 0.3204, + "step": 3719 + }, + { + "epoch": 0.07446888371743864, + "grad_norm": 1.0516207218170166, + "learning_rate": 9.948248017791336e-06, + "loss": 0.3619, + "step": 3720 + }, + { + "epoch": 0.07448890223456697, + "grad_norm": 1.365993857383728, + "learning_rate": 9.948201485534525e-06, + "loss": 0.3315, + "step": 3721 + }, + { + "epoch": 0.07450892075169532, + "grad_norm": 1.03854238986969, + "learning_rate": 9.94815493247654e-06, + "loss": 0.3267, + "step": 3722 + }, + { + "epoch": 0.07452893926882366, + "grad_norm": 1.1523327827453613, + "learning_rate": 9.948108358617576e-06, + "loss": 0.3299, + "step": 3723 + }, + { + "epoch": 0.07454895778595201, + "grad_norm": 1.0572643280029297, + "learning_rate": 9.94806176395783e-06, + "loss": 0.3163, + "step": 3724 + }, + { + "epoch": 0.07456897630308035, + "grad_norm": 1.0929064750671387, + "learning_rate": 9.948015148497498e-06, + "loss": 0.3542, + "step": 3725 + }, + { + "epoch": 0.0745889948202087, + "grad_norm": 1.0557035207748413, + "learning_rate": 9.947968512236774e-06, + "loss": 0.2822, + "step": 3726 + }, + { + "epoch": 0.07460901333733704, + "grad_norm": 1.1842734813690186, + "learning_rate": 9.947921855175855e-06, + "loss": 0.3357, + "step": 3727 + }, + { + "epoch": 0.07462903185446539, + "grad_norm": 1.1011269092559814, + "learning_rate": 9.947875177314938e-06, + "loss": 0.3688, + "step": 3728 + }, + { + "epoch": 0.07464905037159372, + "grad_norm": 1.1015762090682983, + "learning_rate": 9.947828478654219e-06, + "loss": 0.3673, + "step": 3729 + }, + { + "epoch": 0.07466906888872207, + "grad_norm": 1.0854692459106445, + "learning_rate": 9.947781759193894e-06, + "loss": 0.3021, + "step": 3730 + }, + { + "epoch": 0.07468908740585041, + "grad_norm": 1.8757264614105225, + "learning_rate": 9.947735018934156e-06, + "loss": 0.8684, + "step": 3731 + }, + { + "epoch": 0.07470910592297876, + "grad_norm": 1.283703327178955, + "learning_rate": 9.947688257875208e-06, + "loss": 0.3517, + "step": 3732 + }, + { + "epoch": 0.0747291244401071, + "grad_norm": 1.084742546081543, + "learning_rate": 9.947641476017243e-06, + "loss": 0.3383, + "step": 3733 + }, + { + "epoch": 0.07474914295723545, + "grad_norm": 1.115262508392334, + "learning_rate": 9.947594673360457e-06, + "loss": 0.3289, + "step": 3734 + }, + { + "epoch": 0.07476916147436379, + "grad_norm": 1.0506126880645752, + "learning_rate": 9.947547849905047e-06, + "loss": 0.3504, + "step": 3735 + }, + { + "epoch": 0.07478917999149214, + "grad_norm": 1.2562881708145142, + "learning_rate": 9.947501005651213e-06, + "loss": 0.3417, + "step": 3736 + }, + { + "epoch": 0.07480919850862047, + "grad_norm": 0.9576063752174377, + "learning_rate": 9.947454140599148e-06, + "loss": 0.2945, + "step": 3737 + }, + { + "epoch": 0.07482921702574882, + "grad_norm": 1.1171773672103882, + "learning_rate": 9.94740725474905e-06, + "loss": 0.3259, + "step": 3738 + }, + { + "epoch": 0.07484923554287716, + "grad_norm": 1.1139070987701416, + "learning_rate": 9.947360348101118e-06, + "loss": 0.3516, + "step": 3739 + }, + { + "epoch": 0.07486925406000551, + "grad_norm": 0.9774764180183411, + "learning_rate": 9.947313420655546e-06, + "loss": 0.3147, + "step": 3740 + }, + { + "epoch": 0.07488927257713385, + "grad_norm": 1.1233608722686768, + "learning_rate": 9.947266472412536e-06, + "loss": 0.3609, + "step": 3741 + }, + { + "epoch": 0.0749092910942622, + "grad_norm": 1.0881507396697998, + "learning_rate": 9.94721950337228e-06, + "loss": 0.3123, + "step": 3742 + }, + { + "epoch": 0.07492930961139053, + "grad_norm": 1.1175475120544434, + "learning_rate": 9.947172513534978e-06, + "loss": 0.3643, + "step": 3743 + }, + { + "epoch": 0.07494932812851889, + "grad_norm": 1.1588375568389893, + "learning_rate": 9.947125502900827e-06, + "loss": 0.3571, + "step": 3744 + }, + { + "epoch": 0.07496934664564722, + "grad_norm": 1.1325132846832275, + "learning_rate": 9.947078471470026e-06, + "loss": 0.3686, + "step": 3745 + }, + { + "epoch": 0.07498936516277557, + "grad_norm": 1.201094388961792, + "learning_rate": 9.947031419242772e-06, + "loss": 0.3541, + "step": 3746 + }, + { + "epoch": 0.07500938367990391, + "grad_norm": 1.1053818464279175, + "learning_rate": 9.946984346219263e-06, + "loss": 0.3509, + "step": 3747 + }, + { + "epoch": 0.07502940219703226, + "grad_norm": 1.24420964717865, + "learning_rate": 9.946937252399695e-06, + "loss": 0.3567, + "step": 3748 + }, + { + "epoch": 0.0750494207141606, + "grad_norm": 1.7725199460983276, + "learning_rate": 9.946890137784268e-06, + "loss": 0.4242, + "step": 3749 + }, + { + "epoch": 0.07506943923128895, + "grad_norm": 1.1747941970825195, + "learning_rate": 9.946843002373178e-06, + "loss": 0.3562, + "step": 3750 + }, + { + "epoch": 0.07508945774841728, + "grad_norm": 1.018791675567627, + "learning_rate": 9.946795846166628e-06, + "loss": 0.3125, + "step": 3751 + }, + { + "epoch": 0.07510947626554564, + "grad_norm": 1.0609639883041382, + "learning_rate": 9.946748669164809e-06, + "loss": 0.3416, + "step": 3752 + }, + { + "epoch": 0.07512949478267397, + "grad_norm": 1.2936408519744873, + "learning_rate": 9.946701471367925e-06, + "loss": 0.306, + "step": 3753 + }, + { + "epoch": 0.07514951329980232, + "grad_norm": 1.086276650428772, + "learning_rate": 9.946654252776173e-06, + "loss": 0.3349, + "step": 3754 + }, + { + "epoch": 0.07516953181693066, + "grad_norm": 1.22760808467865, + "learning_rate": 9.946607013389751e-06, + "loss": 0.2981, + "step": 3755 + }, + { + "epoch": 0.07518955033405901, + "grad_norm": 1.9053375720977783, + "learning_rate": 9.946559753208858e-06, + "loss": 0.8264, + "step": 3756 + }, + { + "epoch": 0.07520956885118735, + "grad_norm": 1.1379491090774536, + "learning_rate": 9.94651247223369e-06, + "loss": 0.3612, + "step": 3757 + }, + { + "epoch": 0.0752295873683157, + "grad_norm": 1.0509377717971802, + "learning_rate": 9.94646517046445e-06, + "loss": 0.3344, + "step": 3758 + }, + { + "epoch": 0.07524960588544403, + "grad_norm": 1.5081924200057983, + "learning_rate": 9.946417847901336e-06, + "loss": 0.3422, + "step": 3759 + }, + { + "epoch": 0.07526962440257239, + "grad_norm": 1.1649309396743774, + "learning_rate": 9.946370504544543e-06, + "loss": 0.39, + "step": 3760 + }, + { + "epoch": 0.07528964291970072, + "grad_norm": 1.1133756637573242, + "learning_rate": 9.946323140394274e-06, + "loss": 0.2969, + "step": 3761 + }, + { + "epoch": 0.07530966143682907, + "grad_norm": 1.1690571308135986, + "learning_rate": 9.946275755450727e-06, + "loss": 0.3545, + "step": 3762 + }, + { + "epoch": 0.07532967995395741, + "grad_norm": 1.0737501382827759, + "learning_rate": 9.946228349714101e-06, + "loss": 0.2997, + "step": 3763 + }, + { + "epoch": 0.07534969847108576, + "grad_norm": 1.0295671224594116, + "learning_rate": 9.946180923184596e-06, + "loss": 0.319, + "step": 3764 + }, + { + "epoch": 0.0753697169882141, + "grad_norm": 1.1796709299087524, + "learning_rate": 9.94613347586241e-06, + "loss": 0.3092, + "step": 3765 + }, + { + "epoch": 0.07538973550534245, + "grad_norm": 1.1948140859603882, + "learning_rate": 9.946086007747744e-06, + "loss": 0.3389, + "step": 3766 + }, + { + "epoch": 0.07540975402247078, + "grad_norm": 1.1588499546051025, + "learning_rate": 9.946038518840797e-06, + "loss": 0.3642, + "step": 3767 + }, + { + "epoch": 0.07542977253959914, + "grad_norm": 1.1511059999465942, + "learning_rate": 9.945991009141769e-06, + "loss": 0.2813, + "step": 3768 + }, + { + "epoch": 0.07544979105672747, + "grad_norm": 1.0436484813690186, + "learning_rate": 9.945943478650858e-06, + "loss": 0.3155, + "step": 3769 + }, + { + "epoch": 0.07546980957385582, + "grad_norm": 1.045157551765442, + "learning_rate": 9.945895927368265e-06, + "loss": 0.2888, + "step": 3770 + }, + { + "epoch": 0.07548982809098416, + "grad_norm": 1.1013197898864746, + "learning_rate": 9.94584835529419e-06, + "loss": 0.3496, + "step": 3771 + }, + { + "epoch": 0.07550984660811251, + "grad_norm": 1.1061947345733643, + "learning_rate": 9.945800762428834e-06, + "loss": 0.3356, + "step": 3772 + }, + { + "epoch": 0.07552986512524085, + "grad_norm": 1.772997260093689, + "learning_rate": 9.945753148772395e-06, + "loss": 0.8657, + "step": 3773 + }, + { + "epoch": 0.0755498836423692, + "grad_norm": 1.08683180809021, + "learning_rate": 9.945705514325074e-06, + "loss": 0.3224, + "step": 3774 + }, + { + "epoch": 0.07556990215949753, + "grad_norm": 1.23146653175354, + "learning_rate": 9.94565785908707e-06, + "loss": 0.3451, + "step": 3775 + }, + { + "epoch": 0.07558992067662589, + "grad_norm": 1.0845564603805542, + "learning_rate": 9.945610183058588e-06, + "loss": 0.3177, + "step": 3776 + }, + { + "epoch": 0.07560993919375422, + "grad_norm": 1.1065605878829956, + "learning_rate": 9.945562486239822e-06, + "loss": 0.3073, + "step": 3777 + }, + { + "epoch": 0.07562995771088257, + "grad_norm": 1.223395586013794, + "learning_rate": 9.945514768630978e-06, + "loss": 0.3437, + "step": 3778 + }, + { + "epoch": 0.07564997622801091, + "grad_norm": 1.063165545463562, + "learning_rate": 9.945467030232252e-06, + "loss": 0.3297, + "step": 3779 + }, + { + "epoch": 0.07566999474513926, + "grad_norm": 1.1512867212295532, + "learning_rate": 9.945419271043848e-06, + "loss": 0.3115, + "step": 3780 + }, + { + "epoch": 0.0756900132622676, + "grad_norm": 1.0832527875900269, + "learning_rate": 9.945371491065966e-06, + "loss": 0.3063, + "step": 3781 + }, + { + "epoch": 0.07571003177939595, + "grad_norm": 1.0505383014678955, + "learning_rate": 9.945323690298807e-06, + "loss": 0.3333, + "step": 3782 + }, + { + "epoch": 0.07573005029652428, + "grad_norm": 1.1705418825149536, + "learning_rate": 9.94527586874257e-06, + "loss": 0.3331, + "step": 3783 + }, + { + "epoch": 0.07575006881365264, + "grad_norm": 1.106520414352417, + "learning_rate": 9.945228026397457e-06, + "loss": 0.3224, + "step": 3784 + }, + { + "epoch": 0.07577008733078097, + "grad_norm": 1.1606041193008423, + "learning_rate": 9.94518016326367e-06, + "loss": 0.3742, + "step": 3785 + }, + { + "epoch": 0.07579010584790932, + "grad_norm": 1.111770510673523, + "learning_rate": 9.945132279341412e-06, + "loss": 0.2872, + "step": 3786 + }, + { + "epoch": 0.07581012436503766, + "grad_norm": 1.2423454523086548, + "learning_rate": 9.94508437463088e-06, + "loss": 0.3288, + "step": 3787 + }, + { + "epoch": 0.07583014288216601, + "grad_norm": 2.1501150131225586, + "learning_rate": 9.945036449132279e-06, + "loss": 0.9024, + "step": 3788 + }, + { + "epoch": 0.07585016139929435, + "grad_norm": 1.184483289718628, + "learning_rate": 9.944988502845808e-06, + "loss": 0.3307, + "step": 3789 + }, + { + "epoch": 0.0758701799164227, + "grad_norm": 1.0615160465240479, + "learning_rate": 9.94494053577167e-06, + "loss": 0.3045, + "step": 3790 + }, + { + "epoch": 0.07589019843355103, + "grad_norm": 1.256896734237671, + "learning_rate": 9.944892547910065e-06, + "loss": 0.355, + "step": 3791 + }, + { + "epoch": 0.07591021695067939, + "grad_norm": 1.378766655921936, + "learning_rate": 9.944844539261195e-06, + "loss": 0.3432, + "step": 3792 + }, + { + "epoch": 0.07593023546780772, + "grad_norm": 1.8634898662567139, + "learning_rate": 9.944796509825266e-06, + "loss": 0.7889, + "step": 3793 + }, + { + "epoch": 0.07595025398493607, + "grad_norm": 1.3043878078460693, + "learning_rate": 9.944748459602474e-06, + "loss": 0.3358, + "step": 3794 + }, + { + "epoch": 0.07597027250206441, + "grad_norm": 1.0624397993087769, + "learning_rate": 9.944700388593026e-06, + "loss": 0.3778, + "step": 3795 + }, + { + "epoch": 0.07599029101919276, + "grad_norm": 1.1307618618011475, + "learning_rate": 9.94465229679712e-06, + "loss": 0.3746, + "step": 3796 + }, + { + "epoch": 0.0760103095363211, + "grad_norm": 1.045487403869629, + "learning_rate": 9.944604184214959e-06, + "loss": 0.2975, + "step": 3797 + }, + { + "epoch": 0.07603032805344945, + "grad_norm": 1.1744674444198608, + "learning_rate": 9.944556050846748e-06, + "loss": 0.302, + "step": 3798 + }, + { + "epoch": 0.07605034657057778, + "grad_norm": 1.0851839780807495, + "learning_rate": 9.944507896692685e-06, + "loss": 0.2987, + "step": 3799 + }, + { + "epoch": 0.07607036508770613, + "grad_norm": 1.077057123184204, + "learning_rate": 9.944459721752977e-06, + "loss": 0.3745, + "step": 3800 + }, + { + "epoch": 0.07609038360483447, + "grad_norm": 1.308598518371582, + "learning_rate": 9.944411526027824e-06, + "loss": 0.3399, + "step": 3801 + }, + { + "epoch": 0.07611040212196282, + "grad_norm": 1.9999120235443115, + "learning_rate": 9.94436330951743e-06, + "loss": 0.8498, + "step": 3802 + }, + { + "epoch": 0.07613042063909116, + "grad_norm": 1.1205174922943115, + "learning_rate": 9.944315072221993e-06, + "loss": 0.334, + "step": 3803 + }, + { + "epoch": 0.07615043915621951, + "grad_norm": 1.1204091310501099, + "learning_rate": 9.944266814141724e-06, + "loss": 0.3459, + "step": 3804 + }, + { + "epoch": 0.07617045767334785, + "grad_norm": 1.212080717086792, + "learning_rate": 9.944218535276818e-06, + "loss": 0.3088, + "step": 3805 + }, + { + "epoch": 0.0761904761904762, + "grad_norm": 1.1576719284057617, + "learning_rate": 9.944170235627483e-06, + "loss": 0.3365, + "step": 3806 + }, + { + "epoch": 0.07621049470760453, + "grad_norm": 1.1070597171783447, + "learning_rate": 9.94412191519392e-06, + "loss": 0.3615, + "step": 3807 + }, + { + "epoch": 0.07623051322473288, + "grad_norm": 1.219165563583374, + "learning_rate": 9.944073573976332e-06, + "loss": 0.3947, + "step": 3808 + }, + { + "epoch": 0.07625053174186122, + "grad_norm": 1.1614969968795776, + "learning_rate": 9.944025211974923e-06, + "loss": 0.3438, + "step": 3809 + }, + { + "epoch": 0.07627055025898956, + "grad_norm": 1.0536631345748901, + "learning_rate": 9.943976829189896e-06, + "loss": 0.2776, + "step": 3810 + }, + { + "epoch": 0.07629056877611791, + "grad_norm": 0.9865900874137878, + "learning_rate": 9.943928425621454e-06, + "loss": 0.325, + "step": 3811 + }, + { + "epoch": 0.07631058729324625, + "grad_norm": 1.8490235805511475, + "learning_rate": 9.9438800012698e-06, + "loss": 0.8411, + "step": 3812 + }, + { + "epoch": 0.0763306058103746, + "grad_norm": 1.0922092199325562, + "learning_rate": 9.94383155613514e-06, + "loss": 0.3194, + "step": 3813 + }, + { + "epoch": 0.07635062432750293, + "grad_norm": 1.9626195430755615, + "learning_rate": 9.943783090217675e-06, + "loss": 0.7486, + "step": 3814 + }, + { + "epoch": 0.07637064284463128, + "grad_norm": 1.254043459892273, + "learning_rate": 9.943734603517611e-06, + "loss": 0.3715, + "step": 3815 + }, + { + "epoch": 0.07639066136175962, + "grad_norm": 1.1233303546905518, + "learning_rate": 9.943686096035152e-06, + "loss": 0.3446, + "step": 3816 + }, + { + "epoch": 0.07641067987888797, + "grad_norm": 1.1176633834838867, + "learning_rate": 9.943637567770498e-06, + "loss": 0.3548, + "step": 3817 + }, + { + "epoch": 0.07643069839601631, + "grad_norm": 1.101880669593811, + "learning_rate": 9.943589018723855e-06, + "loss": 0.3654, + "step": 3818 + }, + { + "epoch": 0.07645071691314466, + "grad_norm": 1.1515573263168335, + "learning_rate": 9.94354044889543e-06, + "loss": 0.3305, + "step": 3819 + }, + { + "epoch": 0.076470735430273, + "grad_norm": 1.0764057636260986, + "learning_rate": 9.943491858285423e-06, + "loss": 0.3125, + "step": 3820 + }, + { + "epoch": 0.07649075394740135, + "grad_norm": 1.347089171409607, + "learning_rate": 9.94344324689404e-06, + "loss": 0.3336, + "step": 3821 + }, + { + "epoch": 0.07651077246452968, + "grad_norm": 1.1486046314239502, + "learning_rate": 9.943394614721487e-06, + "loss": 0.3496, + "step": 3822 + }, + { + "epoch": 0.07653079098165803, + "grad_norm": 1.0787795782089233, + "learning_rate": 9.943345961767966e-06, + "loss": 0.3478, + "step": 3823 + }, + { + "epoch": 0.07655080949878637, + "grad_norm": 1.1074767112731934, + "learning_rate": 9.943297288033683e-06, + "loss": 0.3135, + "step": 3824 + }, + { + "epoch": 0.07657082801591472, + "grad_norm": 1.0331381559371948, + "learning_rate": 9.943248593518842e-06, + "loss": 0.3379, + "step": 3825 + }, + { + "epoch": 0.07659084653304306, + "grad_norm": 1.0008652210235596, + "learning_rate": 9.943199878223647e-06, + "loss": 0.3141, + "step": 3826 + }, + { + "epoch": 0.07661086505017141, + "grad_norm": 1.0101898908615112, + "learning_rate": 9.943151142148301e-06, + "loss": 0.3566, + "step": 3827 + }, + { + "epoch": 0.07663088356729975, + "grad_norm": 1.11006760597229, + "learning_rate": 9.943102385293016e-06, + "loss": 0.3871, + "step": 3828 + }, + { + "epoch": 0.0766509020844281, + "grad_norm": 1.126786231994629, + "learning_rate": 9.94305360765799e-06, + "loss": 0.3141, + "step": 3829 + }, + { + "epoch": 0.07667092060155643, + "grad_norm": 1.0582767724990845, + "learning_rate": 9.94300480924343e-06, + "loss": 0.3359, + "step": 3830 + }, + { + "epoch": 0.07669093911868478, + "grad_norm": 1.0669224262237549, + "learning_rate": 9.942955990049544e-06, + "loss": 0.3234, + "step": 3831 + }, + { + "epoch": 0.07671095763581312, + "grad_norm": 1.088014841079712, + "learning_rate": 9.942907150076532e-06, + "loss": 0.3134, + "step": 3832 + }, + { + "epoch": 0.07673097615294147, + "grad_norm": 1.3802852630615234, + "learning_rate": 9.942858289324602e-06, + "loss": 0.3839, + "step": 3833 + }, + { + "epoch": 0.07675099467006981, + "grad_norm": 1.3447515964508057, + "learning_rate": 9.94280940779396e-06, + "loss": 0.3497, + "step": 3834 + }, + { + "epoch": 0.07677101318719816, + "grad_norm": 1.204565167427063, + "learning_rate": 9.942760505484814e-06, + "loss": 0.345, + "step": 3835 + }, + { + "epoch": 0.0767910317043265, + "grad_norm": 1.2568567991256714, + "learning_rate": 9.942711582397363e-06, + "loss": 0.3403, + "step": 3836 + }, + { + "epoch": 0.07681105022145485, + "grad_norm": 1.1632041931152344, + "learning_rate": 9.942662638531816e-06, + "loss": 0.2963, + "step": 3837 + }, + { + "epoch": 0.07683106873858318, + "grad_norm": 1.3393030166625977, + "learning_rate": 9.942613673888382e-06, + "loss": 0.3435, + "step": 3838 + }, + { + "epoch": 0.07685108725571153, + "grad_norm": 1.1110936403274536, + "learning_rate": 9.942564688467262e-06, + "loss": 0.2949, + "step": 3839 + }, + { + "epoch": 0.07687110577283987, + "grad_norm": 1.0578140020370483, + "learning_rate": 9.942515682268664e-06, + "loss": 0.2887, + "step": 3840 + }, + { + "epoch": 0.07689112428996822, + "grad_norm": 1.171064019203186, + "learning_rate": 9.942466655292793e-06, + "loss": 0.3634, + "step": 3841 + }, + { + "epoch": 0.07691114280709656, + "grad_norm": 1.111125111579895, + "learning_rate": 9.942417607539856e-06, + "loss": 0.3079, + "step": 3842 + }, + { + "epoch": 0.07693116132422491, + "grad_norm": 1.1965240240097046, + "learning_rate": 9.942368539010059e-06, + "loss": 0.3924, + "step": 3843 + }, + { + "epoch": 0.07695117984135325, + "grad_norm": 1.1277326345443726, + "learning_rate": 9.942319449703608e-06, + "loss": 0.3151, + "step": 3844 + }, + { + "epoch": 0.0769711983584816, + "grad_norm": 1.147170901298523, + "learning_rate": 9.94227033962071e-06, + "loss": 0.3607, + "step": 3845 + }, + { + "epoch": 0.07699121687560993, + "grad_norm": 1.0647727251052856, + "learning_rate": 9.942221208761572e-06, + "loss": 0.3549, + "step": 3846 + }, + { + "epoch": 0.07701123539273828, + "grad_norm": 1.1380199193954468, + "learning_rate": 9.942172057126399e-06, + "loss": 0.3306, + "step": 3847 + }, + { + "epoch": 0.07703125390986662, + "grad_norm": 1.3055638074874878, + "learning_rate": 9.942122884715398e-06, + "loss": 0.2998, + "step": 3848 + }, + { + "epoch": 0.07705127242699497, + "grad_norm": 1.0192110538482666, + "learning_rate": 9.942073691528776e-06, + "loss": 0.2989, + "step": 3849 + }, + { + "epoch": 0.07707129094412331, + "grad_norm": 1.109601378440857, + "learning_rate": 9.942024477566738e-06, + "loss": 0.328, + "step": 3850 + }, + { + "epoch": 0.07709130946125166, + "grad_norm": 1.877355933189392, + "learning_rate": 9.941975242829496e-06, + "loss": 0.8829, + "step": 3851 + }, + { + "epoch": 0.07711132797838, + "grad_norm": 1.131367564201355, + "learning_rate": 9.941925987317252e-06, + "loss": 0.3031, + "step": 3852 + }, + { + "epoch": 0.07713134649550835, + "grad_norm": 1.2130240201950073, + "learning_rate": 9.941876711030213e-06, + "loss": 0.3136, + "step": 3853 + }, + { + "epoch": 0.07715136501263668, + "grad_norm": 1.249745488166809, + "learning_rate": 9.94182741396859e-06, + "loss": 0.3617, + "step": 3854 + }, + { + "epoch": 0.07717138352976503, + "grad_norm": 1.0100380182266235, + "learning_rate": 9.941778096132586e-06, + "loss": 0.3186, + "step": 3855 + }, + { + "epoch": 0.07719140204689337, + "grad_norm": 2.540311813354492, + "learning_rate": 9.941728757522414e-06, + "loss": 0.8183, + "step": 3856 + }, + { + "epoch": 0.07721142056402172, + "grad_norm": 1.0605167150497437, + "learning_rate": 9.941679398138275e-06, + "loss": 0.301, + "step": 3857 + }, + { + "epoch": 0.07723143908115006, + "grad_norm": 1.1726182699203491, + "learning_rate": 9.941630017980379e-06, + "loss": 0.3656, + "step": 3858 + }, + { + "epoch": 0.07725145759827841, + "grad_norm": 0.9783897995948792, + "learning_rate": 9.941580617048937e-06, + "loss": 0.2809, + "step": 3859 + }, + { + "epoch": 0.07727147611540675, + "grad_norm": 1.233133316040039, + "learning_rate": 9.941531195344149e-06, + "loss": 0.3567, + "step": 3860 + }, + { + "epoch": 0.0772914946325351, + "grad_norm": 1.119086742401123, + "learning_rate": 9.94148175286623e-06, + "loss": 0.3213, + "step": 3861 + }, + { + "epoch": 0.07731151314966343, + "grad_norm": 1.9844753742218018, + "learning_rate": 9.941432289615385e-06, + "loss": 0.9264, + "step": 3862 + }, + { + "epoch": 0.07733153166679178, + "grad_norm": 1.0794705152511597, + "learning_rate": 9.941382805591822e-06, + "loss": 0.3377, + "step": 3863 + }, + { + "epoch": 0.07735155018392012, + "grad_norm": 1.092604398727417, + "learning_rate": 9.941333300795748e-06, + "loss": 0.3449, + "step": 3864 + }, + { + "epoch": 0.07737156870104847, + "grad_norm": 1.0322352647781372, + "learning_rate": 9.941283775227373e-06, + "loss": 0.3775, + "step": 3865 + }, + { + "epoch": 0.07739158721817681, + "grad_norm": 1.0817766189575195, + "learning_rate": 9.941234228886905e-06, + "loss": 0.3748, + "step": 3866 + }, + { + "epoch": 0.07741160573530516, + "grad_norm": 1.237642526626587, + "learning_rate": 9.94118466177455e-06, + "loss": 0.368, + "step": 3867 + }, + { + "epoch": 0.0774316242524335, + "grad_norm": 1.2024537324905396, + "learning_rate": 9.94113507389052e-06, + "loss": 0.318, + "step": 3868 + }, + { + "epoch": 0.07745164276956185, + "grad_norm": 2.2129344940185547, + "learning_rate": 9.94108546523502e-06, + "loss": 0.7886, + "step": 3869 + }, + { + "epoch": 0.07747166128669018, + "grad_norm": 1.085873007774353, + "learning_rate": 9.94103583580826e-06, + "loss": 0.4044, + "step": 3870 + }, + { + "epoch": 0.07749167980381853, + "grad_norm": 1.2188074588775635, + "learning_rate": 9.94098618561045e-06, + "loss": 0.3263, + "step": 3871 + }, + { + "epoch": 0.07751169832094687, + "grad_norm": 1.0897117853164673, + "learning_rate": 9.940936514641796e-06, + "loss": 0.3758, + "step": 3872 + }, + { + "epoch": 0.07753171683807522, + "grad_norm": 1.191046953201294, + "learning_rate": 9.940886822902508e-06, + "loss": 0.3169, + "step": 3873 + }, + { + "epoch": 0.07755173535520356, + "grad_norm": 1.130911111831665, + "learning_rate": 9.940837110392799e-06, + "loss": 0.3144, + "step": 3874 + }, + { + "epoch": 0.07757175387233191, + "grad_norm": 1.0413967370986938, + "learning_rate": 9.94078737711287e-06, + "loss": 0.3152, + "step": 3875 + }, + { + "epoch": 0.07759177238946025, + "grad_norm": 1.0251702070236206, + "learning_rate": 9.940737623062936e-06, + "loss": 0.2776, + "step": 3876 + }, + { + "epoch": 0.0776117909065886, + "grad_norm": 1.1311230659484863, + "learning_rate": 9.940687848243202e-06, + "loss": 0.3854, + "step": 3877 + }, + { + "epoch": 0.07763180942371693, + "grad_norm": 1.1797353029251099, + "learning_rate": 9.940638052653882e-06, + "loss": 0.3368, + "step": 3878 + }, + { + "epoch": 0.07765182794084528, + "grad_norm": 1.1205464601516724, + "learning_rate": 9.940588236295182e-06, + "loss": 0.3477, + "step": 3879 + }, + { + "epoch": 0.07767184645797362, + "grad_norm": 1.997694969177246, + "learning_rate": 9.94053839916731e-06, + "loss": 0.8114, + "step": 3880 + }, + { + "epoch": 0.07769186497510197, + "grad_norm": 1.0355018377304077, + "learning_rate": 9.94048854127048e-06, + "loss": 0.3277, + "step": 3881 + }, + { + "epoch": 0.07771188349223031, + "grad_norm": 1.2225775718688965, + "learning_rate": 9.9404386626049e-06, + "loss": 0.3007, + "step": 3882 + }, + { + "epoch": 0.07773190200935866, + "grad_norm": 1.2145802974700928, + "learning_rate": 9.940388763170778e-06, + "loss": 0.3442, + "step": 3883 + }, + { + "epoch": 0.077751920526487, + "grad_norm": 1.8255534172058105, + "learning_rate": 9.940338842968327e-06, + "loss": 0.8632, + "step": 3884 + }, + { + "epoch": 0.07777193904361535, + "grad_norm": 0.9927339553833008, + "learning_rate": 9.940288901997753e-06, + "loss": 0.3064, + "step": 3885 + }, + { + "epoch": 0.07779195756074368, + "grad_norm": 1.172226071357727, + "learning_rate": 9.940238940259266e-06, + "loss": 0.349, + "step": 3886 + }, + { + "epoch": 0.07781197607787203, + "grad_norm": 1.064351201057434, + "learning_rate": 9.940188957753078e-06, + "loss": 0.3412, + "step": 3887 + }, + { + "epoch": 0.07783199459500037, + "grad_norm": 1.1071271896362305, + "learning_rate": 9.9401389544794e-06, + "loss": 0.3046, + "step": 3888 + }, + { + "epoch": 0.07785201311212872, + "grad_norm": 1.0725750923156738, + "learning_rate": 9.940088930438442e-06, + "loss": 0.3383, + "step": 3889 + }, + { + "epoch": 0.07787203162925706, + "grad_norm": 1.8931580781936646, + "learning_rate": 9.940038885630413e-06, + "loss": 0.8502, + "step": 3890 + }, + { + "epoch": 0.07789205014638541, + "grad_norm": 1.0634416341781616, + "learning_rate": 9.939988820055522e-06, + "loss": 0.3551, + "step": 3891 + }, + { + "epoch": 0.07791206866351374, + "grad_norm": 1.1093530654907227, + "learning_rate": 9.939938733713981e-06, + "loss": 0.2965, + "step": 3892 + }, + { + "epoch": 0.0779320871806421, + "grad_norm": 1.0444687604904175, + "learning_rate": 9.939888626606002e-06, + "loss": 0.3658, + "step": 3893 + }, + { + "epoch": 0.07795210569777043, + "grad_norm": 1.1517235040664673, + "learning_rate": 9.939838498731795e-06, + "loss": 0.3556, + "step": 3894 + }, + { + "epoch": 0.07797212421489878, + "grad_norm": 0.9782913327217102, + "learning_rate": 9.939788350091568e-06, + "loss": 0.2973, + "step": 3895 + }, + { + "epoch": 0.07799214273202712, + "grad_norm": 1.1637403964996338, + "learning_rate": 9.939738180685534e-06, + "loss": 0.3537, + "step": 3896 + }, + { + "epoch": 0.07801216124915547, + "grad_norm": 1.6623976230621338, + "learning_rate": 9.939687990513905e-06, + "loss": 0.3088, + "step": 3897 + }, + { + "epoch": 0.07803217976628381, + "grad_norm": 1.2163937091827393, + "learning_rate": 9.93963777957689e-06, + "loss": 0.3565, + "step": 3898 + }, + { + "epoch": 0.07805219828341216, + "grad_norm": 1.9321321249008179, + "learning_rate": 9.9395875478747e-06, + "loss": 0.8081, + "step": 3899 + }, + { + "epoch": 0.0780722168005405, + "grad_norm": 1.0148521661758423, + "learning_rate": 9.939537295407548e-06, + "loss": 0.3498, + "step": 3900 + }, + { + "epoch": 0.07809223531766885, + "grad_norm": 1.186668038368225, + "learning_rate": 9.939487022175644e-06, + "loss": 0.3907, + "step": 3901 + }, + { + "epoch": 0.07811225383479718, + "grad_norm": 0.9993528127670288, + "learning_rate": 9.939436728179199e-06, + "loss": 0.3278, + "step": 3902 + }, + { + "epoch": 0.07813227235192553, + "grad_norm": 1.029727816581726, + "learning_rate": 9.939386413418426e-06, + "loss": 0.2595, + "step": 3903 + }, + { + "epoch": 0.07815229086905387, + "grad_norm": 1.0100795030593872, + "learning_rate": 9.939336077893536e-06, + "loss": 0.303, + "step": 3904 + }, + { + "epoch": 0.07817230938618222, + "grad_norm": 1.1669197082519531, + "learning_rate": 9.93928572160474e-06, + "loss": 0.2856, + "step": 3905 + }, + { + "epoch": 0.07819232790331056, + "grad_norm": 1.1396745443344116, + "learning_rate": 9.939235344552248e-06, + "loss": 0.3586, + "step": 3906 + }, + { + "epoch": 0.07821234642043891, + "grad_norm": 1.062453269958496, + "learning_rate": 9.939184946736273e-06, + "loss": 0.3088, + "step": 3907 + }, + { + "epoch": 0.07823236493756724, + "grad_norm": 1.2394617795944214, + "learning_rate": 9.93913452815703e-06, + "loss": 0.3555, + "step": 3908 + }, + { + "epoch": 0.0782523834546956, + "grad_norm": 1.1229712963104248, + "learning_rate": 9.939084088814727e-06, + "loss": 0.3476, + "step": 3909 + }, + { + "epoch": 0.07827240197182393, + "grad_norm": 1.0716800689697266, + "learning_rate": 9.939033628709578e-06, + "loss": 0.3529, + "step": 3910 + }, + { + "epoch": 0.07829242048895228, + "grad_norm": 1.0597212314605713, + "learning_rate": 9.938983147841794e-06, + "loss": 0.3008, + "step": 3911 + }, + { + "epoch": 0.07831243900608062, + "grad_norm": 1.2138644456863403, + "learning_rate": 9.938932646211588e-06, + "loss": 0.388, + "step": 3912 + }, + { + "epoch": 0.07833245752320897, + "grad_norm": 1.0721075534820557, + "learning_rate": 9.938882123819171e-06, + "loss": 0.3211, + "step": 3913 + }, + { + "epoch": 0.07835247604033731, + "grad_norm": 0.9777457118034363, + "learning_rate": 9.938831580664758e-06, + "loss": 0.3089, + "step": 3914 + }, + { + "epoch": 0.07837249455746566, + "grad_norm": 1.0896443128585815, + "learning_rate": 9.938781016748559e-06, + "loss": 0.3463, + "step": 3915 + }, + { + "epoch": 0.078392513074594, + "grad_norm": 1.150007963180542, + "learning_rate": 9.938730432070787e-06, + "loss": 0.3146, + "step": 3916 + }, + { + "epoch": 0.07841253159172235, + "grad_norm": 1.0821434259414673, + "learning_rate": 9.938679826631657e-06, + "loss": 0.3339, + "step": 3917 + }, + { + "epoch": 0.07843255010885068, + "grad_norm": 2.0417559146881104, + "learning_rate": 9.938629200431378e-06, + "loss": 0.9105, + "step": 3918 + }, + { + "epoch": 0.07845256862597903, + "grad_norm": 1.0530747175216675, + "learning_rate": 9.938578553470164e-06, + "loss": 0.3318, + "step": 3919 + }, + { + "epoch": 0.07847258714310737, + "grad_norm": 1.1139302253723145, + "learning_rate": 9.93852788574823e-06, + "loss": 0.3258, + "step": 3920 + }, + { + "epoch": 0.07849260566023572, + "grad_norm": 1.1646438837051392, + "learning_rate": 9.938477197265788e-06, + "loss": 0.3267, + "step": 3921 + }, + { + "epoch": 0.07851262417736406, + "grad_norm": 1.1338707208633423, + "learning_rate": 9.93842648802305e-06, + "loss": 0.3004, + "step": 3922 + }, + { + "epoch": 0.07853264269449241, + "grad_norm": 1.0841695070266724, + "learning_rate": 9.93837575802023e-06, + "loss": 0.3665, + "step": 3923 + }, + { + "epoch": 0.07855266121162074, + "grad_norm": 1.1354460716247559, + "learning_rate": 9.93832500725754e-06, + "loss": 0.386, + "step": 3924 + }, + { + "epoch": 0.0785726797287491, + "grad_norm": 1.0284326076507568, + "learning_rate": 9.938274235735195e-06, + "loss": 0.3215, + "step": 3925 + }, + { + "epoch": 0.07859269824587743, + "grad_norm": 1.0772720575332642, + "learning_rate": 9.938223443453409e-06, + "loss": 0.334, + "step": 3926 + }, + { + "epoch": 0.07861271676300578, + "grad_norm": 1.0229113101959229, + "learning_rate": 9.938172630412393e-06, + "loss": 0.3261, + "step": 3927 + }, + { + "epoch": 0.07863273528013412, + "grad_norm": 1.0459364652633667, + "learning_rate": 9.938121796612362e-06, + "loss": 0.2808, + "step": 3928 + }, + { + "epoch": 0.07865275379726247, + "grad_norm": 1.931260108947754, + "learning_rate": 9.93807094205353e-06, + "loss": 0.899, + "step": 3929 + }, + { + "epoch": 0.07867277231439081, + "grad_norm": 1.1828049421310425, + "learning_rate": 9.93802006673611e-06, + "loss": 0.3395, + "step": 3930 + }, + { + "epoch": 0.07869279083151916, + "grad_norm": 1.3119642734527588, + "learning_rate": 9.937969170660317e-06, + "loss": 0.3898, + "step": 3931 + }, + { + "epoch": 0.0787128093486475, + "grad_norm": 1.1367045640945435, + "learning_rate": 9.937918253826365e-06, + "loss": 0.3522, + "step": 3932 + }, + { + "epoch": 0.07873282786577585, + "grad_norm": 1.0758975744247437, + "learning_rate": 9.937867316234467e-06, + "loss": 0.3609, + "step": 3933 + }, + { + "epoch": 0.07875284638290418, + "grad_norm": 1.0450986623764038, + "learning_rate": 9.937816357884836e-06, + "loss": 0.3361, + "step": 3934 + }, + { + "epoch": 0.07877286490003253, + "grad_norm": 1.1982755661010742, + "learning_rate": 9.937765378777688e-06, + "loss": 0.3346, + "step": 3935 + }, + { + "epoch": 0.07879288341716087, + "grad_norm": 1.0907838344573975, + "learning_rate": 9.937714378913236e-06, + "loss": 0.3327, + "step": 3936 + }, + { + "epoch": 0.07881290193428922, + "grad_norm": 1.163532018661499, + "learning_rate": 9.937663358291698e-06, + "loss": 0.3568, + "step": 3937 + }, + { + "epoch": 0.07883292045141756, + "grad_norm": 1.0832281112670898, + "learning_rate": 9.937612316913285e-06, + "loss": 0.3588, + "step": 3938 + }, + { + "epoch": 0.07885293896854591, + "grad_norm": 1.0628596544265747, + "learning_rate": 9.937561254778211e-06, + "loss": 0.352, + "step": 3939 + }, + { + "epoch": 0.07887295748567424, + "grad_norm": 1.0652732849121094, + "learning_rate": 9.937510171886693e-06, + "loss": 0.3623, + "step": 3940 + }, + { + "epoch": 0.0788929760028026, + "grad_norm": 1.055010199546814, + "learning_rate": 9.937459068238947e-06, + "loss": 0.3535, + "step": 3941 + }, + { + "epoch": 0.07891299451993093, + "grad_norm": 1.0883605480194092, + "learning_rate": 9.937407943835183e-06, + "loss": 0.3776, + "step": 3942 + }, + { + "epoch": 0.07893301303705928, + "grad_norm": 1.0953338146209717, + "learning_rate": 9.93735679867562e-06, + "loss": 0.3713, + "step": 3943 + }, + { + "epoch": 0.07895303155418762, + "grad_norm": 1.082729697227478, + "learning_rate": 9.937305632760469e-06, + "loss": 0.3429, + "step": 3944 + }, + { + "epoch": 0.07897305007131597, + "grad_norm": 1.1969388723373413, + "learning_rate": 9.93725444608995e-06, + "loss": 0.3542, + "step": 3945 + }, + { + "epoch": 0.0789930685884443, + "grad_norm": 1.1150273084640503, + "learning_rate": 9.937203238664274e-06, + "loss": 0.3391, + "step": 3946 + }, + { + "epoch": 0.07901308710557266, + "grad_norm": 1.1962906122207642, + "learning_rate": 9.93715201048366e-06, + "loss": 0.3811, + "step": 3947 + }, + { + "epoch": 0.079033105622701, + "grad_norm": 0.99776691198349, + "learning_rate": 9.937100761548319e-06, + "loss": 0.3106, + "step": 3948 + }, + { + "epoch": 0.07905312413982934, + "grad_norm": 2.0036349296569824, + "learning_rate": 9.937049491858472e-06, + "loss": 0.9215, + "step": 3949 + }, + { + "epoch": 0.07907314265695768, + "grad_norm": 1.175212025642395, + "learning_rate": 9.93699820141433e-06, + "loss": 0.3284, + "step": 3950 + }, + { + "epoch": 0.07909316117408603, + "grad_norm": 1.0091904401779175, + "learning_rate": 9.936946890216109e-06, + "loss": 0.3134, + "step": 3951 + }, + { + "epoch": 0.07911317969121437, + "grad_norm": 1.0707565546035767, + "learning_rate": 9.936895558264028e-06, + "loss": 0.3129, + "step": 3952 + }, + { + "epoch": 0.07913319820834272, + "grad_norm": 1.179427981376648, + "learning_rate": 9.936844205558298e-06, + "loss": 0.2805, + "step": 3953 + }, + { + "epoch": 0.07915321672547106, + "grad_norm": 1.1131353378295898, + "learning_rate": 9.936792832099137e-06, + "loss": 0.3056, + "step": 3954 + }, + { + "epoch": 0.07917323524259941, + "grad_norm": 1.0623805522918701, + "learning_rate": 9.936741437886765e-06, + "loss": 0.3264, + "step": 3955 + }, + { + "epoch": 0.07919325375972774, + "grad_norm": 1.0933363437652588, + "learning_rate": 9.936690022921391e-06, + "loss": 0.3709, + "step": 3956 + }, + { + "epoch": 0.0792132722768561, + "grad_norm": 1.71523916721344, + "learning_rate": 9.936638587203236e-06, + "loss": 0.8492, + "step": 3957 + }, + { + "epoch": 0.07923329079398443, + "grad_norm": 1.0855333805084229, + "learning_rate": 9.936587130732515e-06, + "loss": 0.2967, + "step": 3958 + }, + { + "epoch": 0.07925330931111278, + "grad_norm": 1.1001545190811157, + "learning_rate": 9.936535653509442e-06, + "loss": 0.3301, + "step": 3959 + }, + { + "epoch": 0.07927332782824112, + "grad_norm": 1.0885906219482422, + "learning_rate": 9.936484155534238e-06, + "loss": 0.3354, + "step": 3960 + }, + { + "epoch": 0.07929334634536947, + "grad_norm": 1.8121992349624634, + "learning_rate": 9.936432636807116e-06, + "loss": 0.8749, + "step": 3961 + }, + { + "epoch": 0.0793133648624978, + "grad_norm": 1.0695419311523438, + "learning_rate": 9.936381097328294e-06, + "loss": 0.3268, + "step": 3962 + }, + { + "epoch": 0.07933338337962616, + "grad_norm": 1.1381394863128662, + "learning_rate": 9.936329537097989e-06, + "loss": 0.3593, + "step": 3963 + }, + { + "epoch": 0.0793534018967545, + "grad_norm": 1.2475541830062866, + "learning_rate": 9.936277956116414e-06, + "loss": 0.3085, + "step": 3964 + }, + { + "epoch": 0.07937342041388284, + "grad_norm": 1.8905246257781982, + "learning_rate": 9.936226354383792e-06, + "loss": 0.8481, + "step": 3965 + }, + { + "epoch": 0.07939343893101118, + "grad_norm": 1.1443490982055664, + "learning_rate": 9.936174731900335e-06, + "loss": 0.3283, + "step": 3966 + }, + { + "epoch": 0.07941345744813953, + "grad_norm": 1.0706737041473389, + "learning_rate": 9.936123088666261e-06, + "loss": 0.3276, + "step": 3967 + }, + { + "epoch": 0.07943347596526787, + "grad_norm": 1.0129283666610718, + "learning_rate": 9.936071424681788e-06, + "loss": 0.3135, + "step": 3968 + }, + { + "epoch": 0.07945349448239622, + "grad_norm": 1.2438167333602905, + "learning_rate": 9.936019739947134e-06, + "loss": 0.3325, + "step": 3969 + }, + { + "epoch": 0.07947351299952456, + "grad_norm": 1.3236531019210815, + "learning_rate": 9.935968034462515e-06, + "loss": 0.3566, + "step": 3970 + }, + { + "epoch": 0.07949353151665291, + "grad_norm": 1.040830373764038, + "learning_rate": 9.935916308228149e-06, + "loss": 0.3282, + "step": 3971 + }, + { + "epoch": 0.07951355003378124, + "grad_norm": 1.0714529752731323, + "learning_rate": 9.935864561244252e-06, + "loss": 0.368, + "step": 3972 + }, + { + "epoch": 0.0795335685509096, + "grad_norm": 1.0451710224151611, + "learning_rate": 9.935812793511043e-06, + "loss": 0.3739, + "step": 3973 + }, + { + "epoch": 0.07955358706803793, + "grad_norm": 1.2468717098236084, + "learning_rate": 9.935761005028739e-06, + "loss": 0.3423, + "step": 3974 + }, + { + "epoch": 0.07957360558516628, + "grad_norm": 1.129802942276001, + "learning_rate": 9.935709195797558e-06, + "loss": 0.3443, + "step": 3975 + }, + { + "epoch": 0.07959362410229462, + "grad_norm": 1.0735912322998047, + "learning_rate": 9.935657365817719e-06, + "loss": 0.3276, + "step": 3976 + }, + { + "epoch": 0.07961364261942297, + "grad_norm": 1.049054741859436, + "learning_rate": 9.935605515089437e-06, + "loss": 0.3234, + "step": 3977 + }, + { + "epoch": 0.0796336611365513, + "grad_norm": 1.0641090869903564, + "learning_rate": 9.935553643612932e-06, + "loss": 0.3743, + "step": 3978 + }, + { + "epoch": 0.07965367965367966, + "grad_norm": 1.109554409980774, + "learning_rate": 9.93550175138842e-06, + "loss": 0.352, + "step": 3979 + }, + { + "epoch": 0.079673698170808, + "grad_norm": 1.1764675378799438, + "learning_rate": 9.935449838416124e-06, + "loss": 0.2938, + "step": 3980 + }, + { + "epoch": 0.07969371668793634, + "grad_norm": 0.9958846569061279, + "learning_rate": 9.935397904696258e-06, + "loss": 0.3243, + "step": 3981 + }, + { + "epoch": 0.07971373520506468, + "grad_norm": 1.0391417741775513, + "learning_rate": 9.93534595022904e-06, + "loss": 0.3169, + "step": 3982 + }, + { + "epoch": 0.07973375372219303, + "grad_norm": 1.1885117292404175, + "learning_rate": 9.935293975014691e-06, + "loss": 0.3455, + "step": 3983 + }, + { + "epoch": 0.07975377223932137, + "grad_norm": 1.1455856561660767, + "learning_rate": 9.935241979053428e-06, + "loss": 0.3494, + "step": 3984 + }, + { + "epoch": 0.07977379075644972, + "grad_norm": 1.2975037097930908, + "learning_rate": 9.935189962345469e-06, + "loss": 0.3686, + "step": 3985 + }, + { + "epoch": 0.07979380927357806, + "grad_norm": 1.1437679529190063, + "learning_rate": 9.935137924891034e-06, + "loss": 0.3114, + "step": 3986 + }, + { + "epoch": 0.07981382779070641, + "grad_norm": 1.0396806001663208, + "learning_rate": 9.935085866690342e-06, + "loss": 0.3281, + "step": 3987 + }, + { + "epoch": 0.07983384630783474, + "grad_norm": 1.2385646104812622, + "learning_rate": 9.935033787743612e-06, + "loss": 0.3815, + "step": 3988 + }, + { + "epoch": 0.0798538648249631, + "grad_norm": 1.2474825382232666, + "learning_rate": 9.934981688051059e-06, + "loss": 0.3547, + "step": 3989 + }, + { + "epoch": 0.07987388334209143, + "grad_norm": 1.055444598197937, + "learning_rate": 9.934929567612905e-06, + "loss": 0.3229, + "step": 3990 + }, + { + "epoch": 0.07989390185921978, + "grad_norm": 1.2024710178375244, + "learning_rate": 9.934877426429372e-06, + "loss": 0.3964, + "step": 3991 + }, + { + "epoch": 0.07991392037634812, + "grad_norm": 1.0755517482757568, + "learning_rate": 9.934825264500673e-06, + "loss": 0.3207, + "step": 3992 + }, + { + "epoch": 0.07993393889347647, + "grad_norm": 1.0946810245513916, + "learning_rate": 9.934773081827033e-06, + "loss": 0.3107, + "step": 3993 + }, + { + "epoch": 0.0799539574106048, + "grad_norm": 1.906601071357727, + "learning_rate": 9.934720878408668e-06, + "loss": 0.8969, + "step": 3994 + }, + { + "epoch": 0.07997397592773316, + "grad_norm": 0.9477238059043884, + "learning_rate": 9.9346686542458e-06, + "loss": 0.3203, + "step": 3995 + }, + { + "epoch": 0.0799939944448615, + "grad_norm": 1.2105908393859863, + "learning_rate": 9.934616409338645e-06, + "loss": 0.3269, + "step": 3996 + }, + { + "epoch": 0.08001401296198984, + "grad_norm": 1.1612190008163452, + "learning_rate": 9.934564143687425e-06, + "loss": 0.3134, + "step": 3997 + }, + { + "epoch": 0.08003403147911818, + "grad_norm": 1.0832881927490234, + "learning_rate": 9.934511857292359e-06, + "loss": 0.3276, + "step": 3998 + }, + { + "epoch": 0.08005404999624653, + "grad_norm": 1.0569422245025635, + "learning_rate": 9.934459550153668e-06, + "loss": 0.3689, + "step": 3999 + }, + { + "epoch": 0.08007406851337487, + "grad_norm": 1.8481897115707397, + "learning_rate": 9.934407222271571e-06, + "loss": 0.8785, + "step": 4000 + }, + { + "epoch": 0.08009408703050322, + "grad_norm": 1.0351654291152954, + "learning_rate": 9.934354873646287e-06, + "loss": 0.3497, + "step": 4001 + }, + { + "epoch": 0.08011410554763156, + "grad_norm": 1.088699221611023, + "learning_rate": 9.934302504278037e-06, + "loss": 0.3023, + "step": 4002 + }, + { + "epoch": 0.0801341240647599, + "grad_norm": 1.144000768661499, + "learning_rate": 9.934250114167042e-06, + "loss": 0.3185, + "step": 4003 + }, + { + "epoch": 0.08015414258188824, + "grad_norm": 1.0723270177841187, + "learning_rate": 9.93419770331352e-06, + "loss": 0.3034, + "step": 4004 + }, + { + "epoch": 0.0801741610990166, + "grad_norm": 1.7941583395004272, + "learning_rate": 9.934145271717695e-06, + "loss": 0.821, + "step": 4005 + }, + { + "epoch": 0.08019417961614493, + "grad_norm": 1.0828838348388672, + "learning_rate": 9.934092819379783e-06, + "loss": 0.3474, + "step": 4006 + }, + { + "epoch": 0.08021419813327328, + "grad_norm": 1.2630879878997803, + "learning_rate": 9.934040346300006e-06, + "loss": 0.3885, + "step": 4007 + }, + { + "epoch": 0.08023421665040162, + "grad_norm": 1.1889894008636475, + "learning_rate": 9.933987852478588e-06, + "loss": 0.3365, + "step": 4008 + }, + { + "epoch": 0.08025423516752997, + "grad_norm": 2.0204825401306152, + "learning_rate": 9.933935337915745e-06, + "loss": 0.8527, + "step": 4009 + }, + { + "epoch": 0.0802742536846583, + "grad_norm": 1.0509498119354248, + "learning_rate": 9.933882802611701e-06, + "loss": 0.3277, + "step": 4010 + }, + { + "epoch": 0.08029427220178666, + "grad_norm": 1.002483606338501, + "learning_rate": 9.933830246566673e-06, + "loss": 0.2832, + "step": 4011 + }, + { + "epoch": 0.080314290718915, + "grad_norm": 1.164443850517273, + "learning_rate": 9.933777669780886e-06, + "loss": 0.3139, + "step": 4012 + }, + { + "epoch": 0.08033430923604334, + "grad_norm": 1.2275580167770386, + "learning_rate": 9.93372507225456e-06, + "loss": 0.3221, + "step": 4013 + }, + { + "epoch": 0.08035432775317168, + "grad_norm": 1.0403838157653809, + "learning_rate": 9.933672453987914e-06, + "loss": 0.3426, + "step": 4014 + }, + { + "epoch": 0.08037434627030003, + "grad_norm": 1.3604618310928345, + "learning_rate": 9.933619814981173e-06, + "loss": 0.3545, + "step": 4015 + }, + { + "epoch": 0.08039436478742837, + "grad_norm": 0.9734665155410767, + "learning_rate": 9.933567155234553e-06, + "loss": 0.3431, + "step": 4016 + }, + { + "epoch": 0.08041438330455672, + "grad_norm": 1.1796927452087402, + "learning_rate": 9.93351447474828e-06, + "loss": 0.3478, + "step": 4017 + }, + { + "epoch": 0.08043440182168506, + "grad_norm": 1.984893560409546, + "learning_rate": 9.933461773522575e-06, + "loss": 0.8201, + "step": 4018 + }, + { + "epoch": 0.0804544203388134, + "grad_norm": 0.9958094358444214, + "learning_rate": 9.933409051557657e-06, + "loss": 0.3213, + "step": 4019 + }, + { + "epoch": 0.08047443885594174, + "grad_norm": 1.2222583293914795, + "learning_rate": 9.93335630885375e-06, + "loss": 0.3586, + "step": 4020 + }, + { + "epoch": 0.0804944573730701, + "grad_norm": 1.031615972518921, + "learning_rate": 9.933303545411074e-06, + "loss": 0.3211, + "step": 4021 + }, + { + "epoch": 0.08051447589019843, + "grad_norm": 1.2407567501068115, + "learning_rate": 9.933250761229852e-06, + "loss": 0.3612, + "step": 4022 + }, + { + "epoch": 0.08053449440732678, + "grad_norm": 1.1522034406661987, + "learning_rate": 9.933197956310304e-06, + "loss": 0.3312, + "step": 4023 + }, + { + "epoch": 0.08055451292445512, + "grad_norm": 1.16855788230896, + "learning_rate": 9.933145130652655e-06, + "loss": 0.3449, + "step": 4024 + }, + { + "epoch": 0.08057453144158347, + "grad_norm": 1.8041471242904663, + "learning_rate": 9.933092284257125e-06, + "loss": 0.9164, + "step": 4025 + }, + { + "epoch": 0.0805945499587118, + "grad_norm": 1.0632367134094238, + "learning_rate": 9.933039417123936e-06, + "loss": 0.3139, + "step": 4026 + }, + { + "epoch": 0.08061456847584016, + "grad_norm": 1.3329882621765137, + "learning_rate": 9.932986529253312e-06, + "loss": 0.3338, + "step": 4027 + }, + { + "epoch": 0.0806345869929685, + "grad_norm": 1.1558383703231812, + "learning_rate": 9.932933620645474e-06, + "loss": 0.3488, + "step": 4028 + }, + { + "epoch": 0.08065460551009684, + "grad_norm": 1.1464720964431763, + "learning_rate": 9.932880691300644e-06, + "loss": 0.3396, + "step": 4029 + }, + { + "epoch": 0.08067462402722518, + "grad_norm": 1.0459463596343994, + "learning_rate": 9.932827741219047e-06, + "loss": 0.3748, + "step": 4030 + }, + { + "epoch": 0.08069464254435353, + "grad_norm": 1.0981171131134033, + "learning_rate": 9.932774770400902e-06, + "loss": 0.3396, + "step": 4031 + }, + { + "epoch": 0.08071466106148187, + "grad_norm": 1.2516851425170898, + "learning_rate": 9.932721778846432e-06, + "loss": 0.3485, + "step": 4032 + }, + { + "epoch": 0.08073467957861022, + "grad_norm": 1.017539143562317, + "learning_rate": 9.932668766555864e-06, + "loss": 0.2814, + "step": 4033 + }, + { + "epoch": 0.08075469809573856, + "grad_norm": 1.130815029144287, + "learning_rate": 9.932615733529416e-06, + "loss": 0.3507, + "step": 4034 + }, + { + "epoch": 0.0807747166128669, + "grad_norm": 1.912869930267334, + "learning_rate": 9.932562679767315e-06, + "loss": 0.8617, + "step": 4035 + }, + { + "epoch": 0.08079473512999524, + "grad_norm": 1.1536166667938232, + "learning_rate": 9.93250960526978e-06, + "loss": 0.322, + "step": 4036 + }, + { + "epoch": 0.0808147536471236, + "grad_norm": 1.1199806928634644, + "learning_rate": 9.932456510037037e-06, + "loss": 0.3193, + "step": 4037 + }, + { + "epoch": 0.08083477216425193, + "grad_norm": 1.0928692817687988, + "learning_rate": 9.932403394069309e-06, + "loss": 0.349, + "step": 4038 + }, + { + "epoch": 0.08085479068138028, + "grad_norm": 1.0061649084091187, + "learning_rate": 9.932350257366817e-06, + "loss": 0.2651, + "step": 4039 + }, + { + "epoch": 0.08087480919850862, + "grad_norm": 1.1228867769241333, + "learning_rate": 9.932297099929788e-06, + "loss": 0.3259, + "step": 4040 + }, + { + "epoch": 0.08089482771563697, + "grad_norm": 1.2533624172210693, + "learning_rate": 9.932243921758442e-06, + "loss": 0.308, + "step": 4041 + }, + { + "epoch": 0.0809148462327653, + "grad_norm": 1.09130859375, + "learning_rate": 9.932190722853003e-06, + "loss": 0.3417, + "step": 4042 + }, + { + "epoch": 0.08093486474989366, + "grad_norm": 1.1033600568771362, + "learning_rate": 9.932137503213697e-06, + "loss": 0.3193, + "step": 4043 + }, + { + "epoch": 0.080954883267022, + "grad_norm": 1.0864218473434448, + "learning_rate": 9.932084262840746e-06, + "loss": 0.3436, + "step": 4044 + }, + { + "epoch": 0.08097490178415034, + "grad_norm": 1.0296226739883423, + "learning_rate": 9.932031001734375e-06, + "loss": 0.3222, + "step": 4045 + }, + { + "epoch": 0.08099492030127868, + "grad_norm": 1.0993242263793945, + "learning_rate": 9.931977719894807e-06, + "loss": 0.3461, + "step": 4046 + }, + { + "epoch": 0.08101493881840703, + "grad_norm": 1.1395691633224487, + "learning_rate": 9.931924417322265e-06, + "loss": 0.3123, + "step": 4047 + }, + { + "epoch": 0.08103495733553537, + "grad_norm": 1.7330687046051025, + "learning_rate": 9.931871094016975e-06, + "loss": 0.8662, + "step": 4048 + }, + { + "epoch": 0.08105497585266372, + "grad_norm": 1.2162230014801025, + "learning_rate": 9.93181774997916e-06, + "loss": 0.3424, + "step": 4049 + }, + { + "epoch": 0.08107499436979206, + "grad_norm": 1.9464330673217773, + "learning_rate": 9.931764385209045e-06, + "loss": 0.8773, + "step": 4050 + }, + { + "epoch": 0.0810950128869204, + "grad_norm": 1.1174490451812744, + "learning_rate": 9.931710999706852e-06, + "loss": 0.2968, + "step": 4051 + }, + { + "epoch": 0.08111503140404874, + "grad_norm": 1.2121940851211548, + "learning_rate": 9.931657593472809e-06, + "loss": 0.346, + "step": 4052 + }, + { + "epoch": 0.0811350499211771, + "grad_norm": 1.7873210906982422, + "learning_rate": 9.931604166507139e-06, + "loss": 0.3727, + "step": 4053 + }, + { + "epoch": 0.08115506843830543, + "grad_norm": 1.1766512393951416, + "learning_rate": 9.931550718810065e-06, + "loss": 0.374, + "step": 4054 + }, + { + "epoch": 0.08117508695543378, + "grad_norm": 1.1274827718734741, + "learning_rate": 9.931497250381814e-06, + "loss": 0.3924, + "step": 4055 + }, + { + "epoch": 0.08119510547256212, + "grad_norm": 1.3173555135726929, + "learning_rate": 9.93144376122261e-06, + "loss": 0.3665, + "step": 4056 + }, + { + "epoch": 0.08121512398969047, + "grad_norm": 1.215133547782898, + "learning_rate": 9.931390251332677e-06, + "loss": 0.3575, + "step": 4057 + }, + { + "epoch": 0.0812351425068188, + "grad_norm": 1.1709688901901245, + "learning_rate": 9.93133672071224e-06, + "loss": 0.3017, + "step": 4058 + }, + { + "epoch": 0.08125516102394716, + "grad_norm": 1.082587480545044, + "learning_rate": 9.931283169361525e-06, + "loss": 0.3172, + "step": 4059 + }, + { + "epoch": 0.08127517954107549, + "grad_norm": 1.0064316987991333, + "learning_rate": 9.931229597280758e-06, + "loss": 0.3316, + "step": 4060 + }, + { + "epoch": 0.08129519805820384, + "grad_norm": 1.153601884841919, + "learning_rate": 9.931176004470162e-06, + "loss": 0.2826, + "step": 4061 + }, + { + "epoch": 0.08131521657533218, + "grad_norm": 1.217543601989746, + "learning_rate": 9.931122390929963e-06, + "loss": 0.3451, + "step": 4062 + }, + { + "epoch": 0.08133523509246053, + "grad_norm": 1.9040578603744507, + "learning_rate": 9.931068756660388e-06, + "loss": 0.8995, + "step": 4063 + }, + { + "epoch": 0.08135525360958887, + "grad_norm": 1.1932445764541626, + "learning_rate": 9.931015101661661e-06, + "loss": 0.3332, + "step": 4064 + }, + { + "epoch": 0.08137527212671722, + "grad_norm": 1.0974425077438354, + "learning_rate": 9.930961425934008e-06, + "loss": 0.3572, + "step": 4065 + }, + { + "epoch": 0.08139529064384556, + "grad_norm": 1.244520902633667, + "learning_rate": 9.930907729477652e-06, + "loss": 0.3418, + "step": 4066 + }, + { + "epoch": 0.0814153091609739, + "grad_norm": 1.1001559495925903, + "learning_rate": 9.930854012292822e-06, + "loss": 0.299, + "step": 4067 + }, + { + "epoch": 0.08143532767810224, + "grad_norm": 1.1085789203643799, + "learning_rate": 9.930800274379743e-06, + "loss": 0.3565, + "step": 4068 + }, + { + "epoch": 0.0814553461952306, + "grad_norm": 1.7209197282791138, + "learning_rate": 9.930746515738641e-06, + "loss": 0.8517, + "step": 4069 + }, + { + "epoch": 0.08147536471235893, + "grad_norm": 1.1541061401367188, + "learning_rate": 9.930692736369742e-06, + "loss": 0.3102, + "step": 4070 + }, + { + "epoch": 0.08149538322948728, + "grad_norm": 1.1148855686187744, + "learning_rate": 9.93063893627327e-06, + "loss": 0.35, + "step": 4071 + }, + { + "epoch": 0.08151540174661562, + "grad_norm": 1.1994045972824097, + "learning_rate": 9.930585115449456e-06, + "loss": 0.3521, + "step": 4072 + }, + { + "epoch": 0.08153542026374397, + "grad_norm": 1.1107510328292847, + "learning_rate": 9.930531273898523e-06, + "loss": 0.3485, + "step": 4073 + }, + { + "epoch": 0.0815554387808723, + "grad_norm": 1.0500845909118652, + "learning_rate": 9.930477411620695e-06, + "loss": 0.3278, + "step": 4074 + }, + { + "epoch": 0.08157545729800066, + "grad_norm": 1.1444159746170044, + "learning_rate": 9.930423528616203e-06, + "loss": 0.3497, + "step": 4075 + }, + { + "epoch": 0.08159547581512899, + "grad_norm": 1.148336410522461, + "learning_rate": 9.930369624885273e-06, + "loss": 0.3327, + "step": 4076 + }, + { + "epoch": 0.08161549433225734, + "grad_norm": 1.0917296409606934, + "learning_rate": 9.930315700428125e-06, + "loss": 0.3481, + "step": 4077 + }, + { + "epoch": 0.08163551284938568, + "grad_norm": 2.045956611633301, + "learning_rate": 9.930261755244995e-06, + "loss": 0.8458, + "step": 4078 + }, + { + "epoch": 0.08165553136651403, + "grad_norm": 1.0256450176239014, + "learning_rate": 9.930207789336105e-06, + "loss": 0.2879, + "step": 4079 + }, + { + "epoch": 0.08167554988364237, + "grad_norm": 1.1776610612869263, + "learning_rate": 9.930153802701682e-06, + "loss": 0.354, + "step": 4080 + }, + { + "epoch": 0.08169556840077072, + "grad_norm": 1.1132967472076416, + "learning_rate": 9.930099795341953e-06, + "loss": 0.3439, + "step": 4081 + }, + { + "epoch": 0.08171558691789906, + "grad_norm": 1.1493117809295654, + "learning_rate": 9.930045767257146e-06, + "loss": 0.3589, + "step": 4082 + }, + { + "epoch": 0.0817356054350274, + "grad_norm": 1.124147891998291, + "learning_rate": 9.929991718447486e-06, + "loss": 0.3685, + "step": 4083 + }, + { + "epoch": 0.08175562395215574, + "grad_norm": 1.016496181488037, + "learning_rate": 9.929937648913204e-06, + "loss": 0.2842, + "step": 4084 + }, + { + "epoch": 0.0817756424692841, + "grad_norm": 1.1749078035354614, + "learning_rate": 9.929883558654526e-06, + "loss": 0.3558, + "step": 4085 + }, + { + "epoch": 0.08179566098641243, + "grad_norm": 1.2352509498596191, + "learning_rate": 9.929829447671675e-06, + "loss": 0.349, + "step": 4086 + }, + { + "epoch": 0.08181567950354078, + "grad_norm": 1.032522439956665, + "learning_rate": 9.929775315964883e-06, + "loss": 0.3246, + "step": 4087 + }, + { + "epoch": 0.08183569802066912, + "grad_norm": 1.167728304862976, + "learning_rate": 9.929721163534378e-06, + "loss": 0.3987, + "step": 4088 + }, + { + "epoch": 0.08185571653779747, + "grad_norm": 1.0381450653076172, + "learning_rate": 9.929666990380385e-06, + "loss": 0.3619, + "step": 4089 + }, + { + "epoch": 0.0818757350549258, + "grad_norm": 1.1916298866271973, + "learning_rate": 9.929612796503133e-06, + "loss": 0.3712, + "step": 4090 + }, + { + "epoch": 0.08189575357205416, + "grad_norm": 1.1439037322998047, + "learning_rate": 9.92955858190285e-06, + "loss": 0.3114, + "step": 4091 + }, + { + "epoch": 0.08191577208918249, + "grad_norm": 1.9547662734985352, + "learning_rate": 9.929504346579763e-06, + "loss": 0.8785, + "step": 4092 + }, + { + "epoch": 0.08193579060631084, + "grad_norm": 1.2622709274291992, + "learning_rate": 9.9294500905341e-06, + "loss": 0.3592, + "step": 4093 + }, + { + "epoch": 0.08195580912343918, + "grad_norm": 1.0674231052398682, + "learning_rate": 9.92939581376609e-06, + "loss": 0.3352, + "step": 4094 + }, + { + "epoch": 0.08197582764056753, + "grad_norm": 1.8453986644744873, + "learning_rate": 9.92934151627596e-06, + "loss": 0.8945, + "step": 4095 + }, + { + "epoch": 0.08199584615769587, + "grad_norm": 0.932470440864563, + "learning_rate": 9.929287198063941e-06, + "loss": 0.3104, + "step": 4096 + }, + { + "epoch": 0.08201586467482422, + "grad_norm": 1.4087491035461426, + "learning_rate": 9.92923285913026e-06, + "loss": 0.3466, + "step": 4097 + }, + { + "epoch": 0.08203588319195255, + "grad_norm": 1.821608066558838, + "learning_rate": 9.929178499475142e-06, + "loss": 0.8701, + "step": 4098 + }, + { + "epoch": 0.0820559017090809, + "grad_norm": 0.9602922797203064, + "learning_rate": 9.929124119098821e-06, + "loss": 0.3579, + "step": 4099 + }, + { + "epoch": 0.08207592022620924, + "grad_norm": 1.1096724271774292, + "learning_rate": 9.929069718001523e-06, + "loss": 0.3426, + "step": 4100 + }, + { + "epoch": 0.0820959387433376, + "grad_norm": 1.0581536293029785, + "learning_rate": 9.929015296183475e-06, + "loss": 0.323, + "step": 4101 + }, + { + "epoch": 0.08211595726046593, + "grad_norm": 1.1814923286437988, + "learning_rate": 9.928960853644908e-06, + "loss": 0.3475, + "step": 4102 + }, + { + "epoch": 0.08213597577759428, + "grad_norm": 1.0812647342681885, + "learning_rate": 9.928906390386051e-06, + "loss": 0.295, + "step": 4103 + }, + { + "epoch": 0.08215599429472262, + "grad_norm": 1.038635492324829, + "learning_rate": 9.928851906407131e-06, + "loss": 0.3631, + "step": 4104 + }, + { + "epoch": 0.08217601281185097, + "grad_norm": 1.0744996070861816, + "learning_rate": 9.92879740170838e-06, + "loss": 0.3325, + "step": 4105 + }, + { + "epoch": 0.0821960313289793, + "grad_norm": 1.0913522243499756, + "learning_rate": 9.928742876290023e-06, + "loss": 0.3437, + "step": 4106 + }, + { + "epoch": 0.08221604984610766, + "grad_norm": 1.9444268941879272, + "learning_rate": 9.928688330152292e-06, + "loss": 0.7453, + "step": 4107 + }, + { + "epoch": 0.08223606836323599, + "grad_norm": 1.1610909700393677, + "learning_rate": 9.928633763295416e-06, + "loss": 0.3357, + "step": 4108 + }, + { + "epoch": 0.08225608688036434, + "grad_norm": 1.0860559940338135, + "learning_rate": 9.928579175719625e-06, + "loss": 0.3428, + "step": 4109 + }, + { + "epoch": 0.08227610539749268, + "grad_norm": 1.1535439491271973, + "learning_rate": 9.92852456742515e-06, + "loss": 0.3312, + "step": 4110 + }, + { + "epoch": 0.08229612391462103, + "grad_norm": 1.9037271738052368, + "learning_rate": 9.928469938412215e-06, + "loss": 0.9025, + "step": 4111 + }, + { + "epoch": 0.08231614243174937, + "grad_norm": 1.9430683851242065, + "learning_rate": 9.928415288681055e-06, + "loss": 0.7818, + "step": 4112 + }, + { + "epoch": 0.08233616094887772, + "grad_norm": 1.4393724203109741, + "learning_rate": 9.928360618231896e-06, + "loss": 0.3899, + "step": 4113 + }, + { + "epoch": 0.08235617946600605, + "grad_norm": 1.149025559425354, + "learning_rate": 9.92830592706497e-06, + "loss": 0.3163, + "step": 4114 + }, + { + "epoch": 0.0823761979831344, + "grad_norm": 1.0871038436889648, + "learning_rate": 9.928251215180508e-06, + "loss": 0.3159, + "step": 4115 + }, + { + "epoch": 0.08239621650026274, + "grad_norm": 1.0157279968261719, + "learning_rate": 9.928196482578738e-06, + "loss": 0.2998, + "step": 4116 + }, + { + "epoch": 0.08241623501739109, + "grad_norm": 1.077441692352295, + "learning_rate": 9.928141729259888e-06, + "loss": 0.3573, + "step": 4117 + }, + { + "epoch": 0.08243625353451943, + "grad_norm": 1.1291999816894531, + "learning_rate": 9.928086955224194e-06, + "loss": 0.3466, + "step": 4118 + }, + { + "epoch": 0.08245627205164778, + "grad_norm": 1.0637458562850952, + "learning_rate": 9.92803216047188e-06, + "loss": 0.3487, + "step": 4119 + }, + { + "epoch": 0.08247629056877612, + "grad_norm": 1.014211654663086, + "learning_rate": 9.927977345003181e-06, + "loss": 0.327, + "step": 4120 + }, + { + "epoch": 0.08249630908590447, + "grad_norm": 1.8574823141098022, + "learning_rate": 9.927922508818326e-06, + "loss": 0.7784, + "step": 4121 + }, + { + "epoch": 0.0825163276030328, + "grad_norm": 1.1304144859313965, + "learning_rate": 9.927867651917546e-06, + "loss": 0.3115, + "step": 4122 + }, + { + "epoch": 0.08253634612016116, + "grad_norm": 1.1009846925735474, + "learning_rate": 9.92781277430107e-06, + "loss": 0.3142, + "step": 4123 + }, + { + "epoch": 0.08255636463728949, + "grad_norm": 1.2238235473632812, + "learning_rate": 9.927757875969128e-06, + "loss": 0.3422, + "step": 4124 + }, + { + "epoch": 0.08257638315441784, + "grad_norm": 1.1431763172149658, + "learning_rate": 9.927702956921956e-06, + "loss": 0.3248, + "step": 4125 + }, + { + "epoch": 0.08259640167154618, + "grad_norm": 1.1582608222961426, + "learning_rate": 9.927648017159777e-06, + "loss": 0.3743, + "step": 4126 + }, + { + "epoch": 0.08261642018867453, + "grad_norm": 1.2032524347305298, + "learning_rate": 9.927593056682829e-06, + "loss": 0.3124, + "step": 4127 + }, + { + "epoch": 0.08263643870580287, + "grad_norm": 1.1412779092788696, + "learning_rate": 9.92753807549134e-06, + "loss": 0.3254, + "step": 4128 + }, + { + "epoch": 0.08265645722293122, + "grad_norm": 1.1674284934997559, + "learning_rate": 9.92748307358554e-06, + "loss": 0.319, + "step": 4129 + }, + { + "epoch": 0.08267647574005955, + "grad_norm": 1.8105981349945068, + "learning_rate": 9.927428050965663e-06, + "loss": 0.9293, + "step": 4130 + }, + { + "epoch": 0.0826964942571879, + "grad_norm": 1.083459496498108, + "learning_rate": 9.927373007631938e-06, + "loss": 0.3594, + "step": 4131 + }, + { + "epoch": 0.08271651277431624, + "grad_norm": 1.0981539487838745, + "learning_rate": 9.927317943584599e-06, + "loss": 0.3304, + "step": 4132 + }, + { + "epoch": 0.08273653129144459, + "grad_norm": 1.377024531364441, + "learning_rate": 9.927262858823873e-06, + "loss": 0.3514, + "step": 4133 + }, + { + "epoch": 0.08275654980857293, + "grad_norm": 1.59926438331604, + "learning_rate": 9.927207753349995e-06, + "loss": 0.3384, + "step": 4134 + }, + { + "epoch": 0.08277656832570128, + "grad_norm": 1.1573315858840942, + "learning_rate": 9.927152627163197e-06, + "loss": 0.3234, + "step": 4135 + }, + { + "epoch": 0.08279658684282962, + "grad_norm": 1.0385850667953491, + "learning_rate": 9.927097480263709e-06, + "loss": 0.3636, + "step": 4136 + }, + { + "epoch": 0.08281660535995797, + "grad_norm": 1.1815167665481567, + "learning_rate": 9.927042312651763e-06, + "loss": 0.3386, + "step": 4137 + }, + { + "epoch": 0.0828366238770863, + "grad_norm": 1.0811591148376465, + "learning_rate": 9.926987124327592e-06, + "loss": 0.2993, + "step": 4138 + }, + { + "epoch": 0.08285664239421466, + "grad_norm": 1.0453777313232422, + "learning_rate": 9.926931915291427e-06, + "loss": 0.3091, + "step": 4139 + }, + { + "epoch": 0.08287666091134299, + "grad_norm": 0.9934354424476624, + "learning_rate": 9.926876685543503e-06, + "loss": 0.3525, + "step": 4140 + }, + { + "epoch": 0.08289667942847134, + "grad_norm": 1.1636255979537964, + "learning_rate": 9.926821435084047e-06, + "loss": 0.313, + "step": 4141 + }, + { + "epoch": 0.08291669794559968, + "grad_norm": 1.131213903427124, + "learning_rate": 9.926766163913294e-06, + "loss": 0.338, + "step": 4142 + }, + { + "epoch": 0.08293671646272803, + "grad_norm": 1.0262198448181152, + "learning_rate": 9.926710872031478e-06, + "loss": 0.2999, + "step": 4143 + }, + { + "epoch": 0.08295673497985637, + "grad_norm": 1.0881271362304688, + "learning_rate": 9.926655559438829e-06, + "loss": 0.3065, + "step": 4144 + }, + { + "epoch": 0.08297675349698472, + "grad_norm": 1.1728193759918213, + "learning_rate": 9.926600226135578e-06, + "loss": 0.3597, + "step": 4145 + }, + { + "epoch": 0.08299677201411305, + "grad_norm": 1.9512919187545776, + "learning_rate": 9.926544872121964e-06, + "loss": 0.8196, + "step": 4146 + }, + { + "epoch": 0.0830167905312414, + "grad_norm": 1.8240430355072021, + "learning_rate": 9.926489497398213e-06, + "loss": 0.9069, + "step": 4147 + }, + { + "epoch": 0.08303680904836974, + "grad_norm": 1.3310291767120361, + "learning_rate": 9.926434101964559e-06, + "loss": 0.3241, + "step": 4148 + }, + { + "epoch": 0.08305682756549809, + "grad_norm": 1.0362364053726196, + "learning_rate": 9.926378685821239e-06, + "loss": 0.3062, + "step": 4149 + }, + { + "epoch": 0.08307684608262643, + "grad_norm": 1.2488195896148682, + "learning_rate": 9.926323248968482e-06, + "loss": 0.3273, + "step": 4150 + }, + { + "epoch": 0.08309686459975478, + "grad_norm": 1.0440703630447388, + "learning_rate": 9.926267791406523e-06, + "loss": 0.297, + "step": 4151 + }, + { + "epoch": 0.08311688311688312, + "grad_norm": 1.0143001079559326, + "learning_rate": 9.926212313135593e-06, + "loss": 0.3393, + "step": 4152 + }, + { + "epoch": 0.08313690163401147, + "grad_norm": 1.9815021753311157, + "learning_rate": 9.926156814155927e-06, + "loss": 0.9088, + "step": 4153 + }, + { + "epoch": 0.0831569201511398, + "grad_norm": 1.0155720710754395, + "learning_rate": 9.926101294467757e-06, + "loss": 0.3295, + "step": 4154 + }, + { + "epoch": 0.08317693866826815, + "grad_norm": 1.2410987615585327, + "learning_rate": 9.926045754071321e-06, + "loss": 0.3346, + "step": 4155 + }, + { + "epoch": 0.08319695718539649, + "grad_norm": 1.0871373414993286, + "learning_rate": 9.925990192966845e-06, + "loss": 0.3539, + "step": 4156 + }, + { + "epoch": 0.08321697570252483, + "grad_norm": 1.1799746751785278, + "learning_rate": 9.925934611154567e-06, + "loss": 0.3367, + "step": 4157 + }, + { + "epoch": 0.08323699421965318, + "grad_norm": 1.8529225587844849, + "learning_rate": 9.925879008634722e-06, + "loss": 0.8374, + "step": 4158 + }, + { + "epoch": 0.08325701273678152, + "grad_norm": 1.0693800449371338, + "learning_rate": 9.925823385407538e-06, + "loss": 0.3452, + "step": 4159 + }, + { + "epoch": 0.08327703125390987, + "grad_norm": 1.0569632053375244, + "learning_rate": 9.925767741473256e-06, + "loss": 0.3859, + "step": 4160 + }, + { + "epoch": 0.0832970497710382, + "grad_norm": 1.1115388870239258, + "learning_rate": 9.925712076832104e-06, + "loss": 0.3749, + "step": 4161 + }, + { + "epoch": 0.08331706828816655, + "grad_norm": 1.083061695098877, + "learning_rate": 9.92565639148432e-06, + "loss": 0.3455, + "step": 4162 + }, + { + "epoch": 0.08333708680529489, + "grad_norm": 1.146290898323059, + "learning_rate": 9.925600685430137e-06, + "loss": 0.3782, + "step": 4163 + }, + { + "epoch": 0.08335710532242324, + "grad_norm": 1.0530872344970703, + "learning_rate": 9.925544958669788e-06, + "loss": 0.3348, + "step": 4164 + }, + { + "epoch": 0.08337712383955158, + "grad_norm": 1.2983973026275635, + "learning_rate": 9.925489211203506e-06, + "loss": 0.3848, + "step": 4165 + }, + { + "epoch": 0.08339714235667993, + "grad_norm": 1.2288119792938232, + "learning_rate": 9.92543344303153e-06, + "loss": 0.3602, + "step": 4166 + }, + { + "epoch": 0.08341716087380827, + "grad_norm": 1.1494470834732056, + "learning_rate": 9.925377654154091e-06, + "loss": 0.306, + "step": 4167 + }, + { + "epoch": 0.08343717939093662, + "grad_norm": 1.049165964126587, + "learning_rate": 9.925321844571424e-06, + "loss": 0.3087, + "step": 4168 + }, + { + "epoch": 0.08345719790806495, + "grad_norm": 1.2093969583511353, + "learning_rate": 9.925266014283766e-06, + "loss": 0.3047, + "step": 4169 + }, + { + "epoch": 0.0834772164251933, + "grad_norm": 1.0711253881454468, + "learning_rate": 9.925210163291347e-06, + "loss": 0.3518, + "step": 4170 + }, + { + "epoch": 0.08349723494232164, + "grad_norm": 1.0905609130859375, + "learning_rate": 9.925154291594404e-06, + "loss": 0.2778, + "step": 4171 + }, + { + "epoch": 0.08351725345944999, + "grad_norm": 1.0313435792922974, + "learning_rate": 9.925098399193174e-06, + "loss": 0.3141, + "step": 4172 + }, + { + "epoch": 0.08353727197657833, + "grad_norm": 1.0727057456970215, + "learning_rate": 9.92504248608789e-06, + "loss": 0.3577, + "step": 4173 + }, + { + "epoch": 0.08355729049370668, + "grad_norm": 1.0515327453613281, + "learning_rate": 9.924986552278786e-06, + "loss": 0.3424, + "step": 4174 + }, + { + "epoch": 0.08357730901083502, + "grad_norm": 1.1797802448272705, + "learning_rate": 9.9249305977661e-06, + "loss": 0.3532, + "step": 4175 + }, + { + "epoch": 0.08359732752796337, + "grad_norm": 1.2014238834381104, + "learning_rate": 9.924874622550064e-06, + "loss": 0.4127, + "step": 4176 + }, + { + "epoch": 0.0836173460450917, + "grad_norm": 1.6954572200775146, + "learning_rate": 9.924818626630917e-06, + "loss": 0.777, + "step": 4177 + }, + { + "epoch": 0.08363736456222005, + "grad_norm": 0.9653947353363037, + "learning_rate": 9.924762610008891e-06, + "loss": 0.3337, + "step": 4178 + }, + { + "epoch": 0.08365738307934839, + "grad_norm": 1.0998190641403198, + "learning_rate": 9.924706572684224e-06, + "loss": 0.3281, + "step": 4179 + }, + { + "epoch": 0.08367740159647674, + "grad_norm": 1.189430594444275, + "learning_rate": 9.924650514657148e-06, + "loss": 0.3406, + "step": 4180 + }, + { + "epoch": 0.08369742011360508, + "grad_norm": 1.865696907043457, + "learning_rate": 9.924594435927902e-06, + "loss": 0.8108, + "step": 4181 + }, + { + "epoch": 0.08371743863073343, + "grad_norm": 1.319665551185608, + "learning_rate": 9.92453833649672e-06, + "loss": 0.3894, + "step": 4182 + }, + { + "epoch": 0.08373745714786177, + "grad_norm": 1.9885162115097046, + "learning_rate": 9.924482216363842e-06, + "loss": 0.8766, + "step": 4183 + }, + { + "epoch": 0.08375747566499012, + "grad_norm": 1.1983298063278198, + "learning_rate": 9.924426075529498e-06, + "loss": 0.3585, + "step": 4184 + }, + { + "epoch": 0.08377749418211845, + "grad_norm": 1.0739542245864868, + "learning_rate": 9.924369913993927e-06, + "loss": 0.2982, + "step": 4185 + }, + { + "epoch": 0.0837975126992468, + "grad_norm": 1.0617430210113525, + "learning_rate": 9.924313731757365e-06, + "loss": 0.3431, + "step": 4186 + }, + { + "epoch": 0.08381753121637514, + "grad_norm": 1.0831835269927979, + "learning_rate": 9.924257528820047e-06, + "loss": 0.3537, + "step": 4187 + }, + { + "epoch": 0.08383754973350349, + "grad_norm": 1.1397972106933594, + "learning_rate": 9.92420130518221e-06, + "loss": 0.286, + "step": 4188 + }, + { + "epoch": 0.08385756825063183, + "grad_norm": 1.0774506330490112, + "learning_rate": 9.924145060844091e-06, + "loss": 0.303, + "step": 4189 + }, + { + "epoch": 0.08387758676776018, + "grad_norm": 1.1273064613342285, + "learning_rate": 9.924088795805924e-06, + "loss": 0.3238, + "step": 4190 + }, + { + "epoch": 0.08389760528488852, + "grad_norm": 1.2873705625534058, + "learning_rate": 9.92403251006795e-06, + "loss": 0.2962, + "step": 4191 + }, + { + "epoch": 0.08391762380201687, + "grad_norm": 1.2232427597045898, + "learning_rate": 9.923976203630401e-06, + "loss": 0.3363, + "step": 4192 + }, + { + "epoch": 0.0839376423191452, + "grad_norm": 1.1135412454605103, + "learning_rate": 9.923919876493517e-06, + "loss": 0.3634, + "step": 4193 + }, + { + "epoch": 0.08395766083627355, + "grad_norm": 1.0983872413635254, + "learning_rate": 9.923863528657534e-06, + "loss": 0.3088, + "step": 4194 + }, + { + "epoch": 0.08397767935340189, + "grad_norm": 1.179770827293396, + "learning_rate": 9.923807160122687e-06, + "loss": 0.3099, + "step": 4195 + }, + { + "epoch": 0.08399769787053024, + "grad_norm": 1.3191704750061035, + "learning_rate": 9.923750770889215e-06, + "loss": 0.3268, + "step": 4196 + }, + { + "epoch": 0.08401771638765858, + "grad_norm": 1.153022050857544, + "learning_rate": 9.923694360957352e-06, + "loss": 0.3317, + "step": 4197 + }, + { + "epoch": 0.08403773490478693, + "grad_norm": 1.197909951210022, + "learning_rate": 9.92363793032734e-06, + "loss": 0.3206, + "step": 4198 + }, + { + "epoch": 0.08405775342191527, + "grad_norm": 1.3397226333618164, + "learning_rate": 9.923581478999414e-06, + "loss": 0.3677, + "step": 4199 + }, + { + "epoch": 0.08407777193904362, + "grad_norm": 1.1219841241836548, + "learning_rate": 9.923525006973811e-06, + "loss": 0.3441, + "step": 4200 + }, + { + "epoch": 0.08409779045617195, + "grad_norm": 1.0342981815338135, + "learning_rate": 9.923468514250767e-06, + "loss": 0.3416, + "step": 4201 + }, + { + "epoch": 0.0841178089733003, + "grad_norm": 1.1148380041122437, + "learning_rate": 9.923412000830523e-06, + "loss": 0.3141, + "step": 4202 + }, + { + "epoch": 0.08413782749042864, + "grad_norm": 1.0631965398788452, + "learning_rate": 9.923355466713313e-06, + "loss": 0.3458, + "step": 4203 + }, + { + "epoch": 0.08415784600755699, + "grad_norm": 1.1125887632369995, + "learning_rate": 9.923298911899376e-06, + "loss": 0.3173, + "step": 4204 + }, + { + "epoch": 0.08417786452468533, + "grad_norm": 1.6136586666107178, + "learning_rate": 9.92324233638895e-06, + "loss": 0.3264, + "step": 4205 + }, + { + "epoch": 0.08419788304181368, + "grad_norm": 1.126905083656311, + "learning_rate": 9.923185740182273e-06, + "loss": 0.3467, + "step": 4206 + }, + { + "epoch": 0.08421790155894202, + "grad_norm": 1.0526769161224365, + "learning_rate": 9.923129123279582e-06, + "loss": 0.3068, + "step": 4207 + }, + { + "epoch": 0.08423792007607037, + "grad_norm": 1.1955782175064087, + "learning_rate": 9.923072485681116e-06, + "loss": 0.3297, + "step": 4208 + }, + { + "epoch": 0.0842579385931987, + "grad_norm": 1.1354798078536987, + "learning_rate": 9.923015827387113e-06, + "loss": 0.3297, + "step": 4209 + }, + { + "epoch": 0.08427795711032705, + "grad_norm": 1.0855114459991455, + "learning_rate": 9.92295914839781e-06, + "loss": 0.3233, + "step": 4210 + }, + { + "epoch": 0.08429797562745539, + "grad_norm": 1.1807401180267334, + "learning_rate": 9.922902448713449e-06, + "loss": 0.4057, + "step": 4211 + }, + { + "epoch": 0.08431799414458374, + "grad_norm": 1.2503042221069336, + "learning_rate": 9.922845728334262e-06, + "loss": 0.3268, + "step": 4212 + }, + { + "epoch": 0.08433801266171208, + "grad_norm": 1.1023023128509521, + "learning_rate": 9.922788987260493e-06, + "loss": 0.3396, + "step": 4213 + }, + { + "epoch": 0.08435803117884043, + "grad_norm": 1.1465411186218262, + "learning_rate": 9.922732225492378e-06, + "loss": 0.3505, + "step": 4214 + }, + { + "epoch": 0.08437804969596877, + "grad_norm": 1.1181741952896118, + "learning_rate": 9.922675443030157e-06, + "loss": 0.3136, + "step": 4215 + }, + { + "epoch": 0.08439806821309712, + "grad_norm": 1.8921459913253784, + "learning_rate": 9.922618639874067e-06, + "loss": 0.8114, + "step": 4216 + }, + { + "epoch": 0.08441808673022545, + "grad_norm": 1.1772712469100952, + "learning_rate": 9.922561816024347e-06, + "loss": 0.3066, + "step": 4217 + }, + { + "epoch": 0.0844381052473538, + "grad_norm": 1.1366243362426758, + "learning_rate": 9.922504971481236e-06, + "loss": 0.3598, + "step": 4218 + }, + { + "epoch": 0.08445812376448214, + "grad_norm": 1.157926321029663, + "learning_rate": 9.922448106244975e-06, + "loss": 0.3405, + "step": 4219 + }, + { + "epoch": 0.08447814228161049, + "grad_norm": 1.2500531673431396, + "learning_rate": 9.922391220315801e-06, + "loss": 0.3979, + "step": 4220 + }, + { + "epoch": 0.08449816079873883, + "grad_norm": 1.2345335483551025, + "learning_rate": 9.922334313693954e-06, + "loss": 0.3376, + "step": 4221 + }, + { + "epoch": 0.08451817931586718, + "grad_norm": 1.812201738357544, + "learning_rate": 9.922277386379671e-06, + "loss": 0.8471, + "step": 4222 + }, + { + "epoch": 0.08453819783299552, + "grad_norm": 1.1556447744369507, + "learning_rate": 9.922220438373195e-06, + "loss": 0.3258, + "step": 4223 + }, + { + "epoch": 0.08455821635012387, + "grad_norm": 1.0191962718963623, + "learning_rate": 9.922163469674763e-06, + "loss": 0.3083, + "step": 4224 + }, + { + "epoch": 0.0845782348672522, + "grad_norm": 1.1475481986999512, + "learning_rate": 9.922106480284614e-06, + "loss": 0.3191, + "step": 4225 + }, + { + "epoch": 0.08459825338438055, + "grad_norm": 0.9512898921966553, + "learning_rate": 9.92204947020299e-06, + "loss": 0.3111, + "step": 4226 + }, + { + "epoch": 0.08461827190150889, + "grad_norm": 1.1763678789138794, + "learning_rate": 9.92199243943013e-06, + "loss": 0.3204, + "step": 4227 + }, + { + "epoch": 0.08463829041863724, + "grad_norm": 1.113446831703186, + "learning_rate": 9.921935387966272e-06, + "loss": 0.3254, + "step": 4228 + }, + { + "epoch": 0.08465830893576558, + "grad_norm": 1.980424165725708, + "learning_rate": 9.921878315811658e-06, + "loss": 0.8267, + "step": 4229 + }, + { + "epoch": 0.08467832745289393, + "grad_norm": 1.0325480699539185, + "learning_rate": 9.921821222966524e-06, + "loss": 0.2936, + "step": 4230 + }, + { + "epoch": 0.08469834597002227, + "grad_norm": 1.1583102941513062, + "learning_rate": 9.921764109431114e-06, + "loss": 0.3529, + "step": 4231 + }, + { + "epoch": 0.08471836448715062, + "grad_norm": 1.115012764930725, + "learning_rate": 9.921706975205668e-06, + "loss": 0.3635, + "step": 4232 + }, + { + "epoch": 0.08473838300427895, + "grad_norm": 1.070404291152954, + "learning_rate": 9.921649820290424e-06, + "loss": 0.3366, + "step": 4233 + }, + { + "epoch": 0.0847584015214073, + "grad_norm": 1.6932693719863892, + "learning_rate": 9.921592644685622e-06, + "loss": 0.9465, + "step": 4234 + }, + { + "epoch": 0.08477842003853564, + "grad_norm": 1.0677307844161987, + "learning_rate": 9.921535448391506e-06, + "loss": 0.3294, + "step": 4235 + }, + { + "epoch": 0.08479843855566399, + "grad_norm": 1.05168879032135, + "learning_rate": 9.921478231408313e-06, + "loss": 0.313, + "step": 4236 + }, + { + "epoch": 0.08481845707279233, + "grad_norm": 1.1686666011810303, + "learning_rate": 9.921420993736287e-06, + "loss": 0.3126, + "step": 4237 + }, + { + "epoch": 0.08483847558992068, + "grad_norm": 1.2010530233383179, + "learning_rate": 9.921363735375663e-06, + "loss": 0.3186, + "step": 4238 + }, + { + "epoch": 0.08485849410704902, + "grad_norm": 1.1222022771835327, + "learning_rate": 9.921306456326685e-06, + "loss": 0.3178, + "step": 4239 + }, + { + "epoch": 0.08487851262417737, + "grad_norm": 1.0770388841629028, + "learning_rate": 9.921249156589597e-06, + "loss": 0.3535, + "step": 4240 + }, + { + "epoch": 0.0848985311413057, + "grad_norm": 1.0782599449157715, + "learning_rate": 9.921191836164633e-06, + "loss": 0.3854, + "step": 4241 + }, + { + "epoch": 0.08491854965843405, + "grad_norm": 1.167563557624817, + "learning_rate": 9.92113449505204e-06, + "loss": 0.2995, + "step": 4242 + }, + { + "epoch": 0.08493856817556239, + "grad_norm": 1.0572949647903442, + "learning_rate": 9.921077133252055e-06, + "loss": 0.319, + "step": 4243 + }, + { + "epoch": 0.08495858669269074, + "grad_norm": 1.1057909727096558, + "learning_rate": 9.921019750764923e-06, + "loss": 0.331, + "step": 4244 + }, + { + "epoch": 0.08497860520981908, + "grad_norm": 1.0772103071212769, + "learning_rate": 9.92096234759088e-06, + "loss": 0.3188, + "step": 4245 + }, + { + "epoch": 0.08499862372694743, + "grad_norm": 1.2120106220245361, + "learning_rate": 9.92090492373017e-06, + "loss": 0.2948, + "step": 4246 + }, + { + "epoch": 0.08501864224407576, + "grad_norm": 1.0593475103378296, + "learning_rate": 9.920847479183039e-06, + "loss": 0.3346, + "step": 4247 + }, + { + "epoch": 0.08503866076120412, + "grad_norm": 1.106019377708435, + "learning_rate": 9.92079001394972e-06, + "loss": 0.3197, + "step": 4248 + }, + { + "epoch": 0.08505867927833245, + "grad_norm": 1.1456780433654785, + "learning_rate": 9.920732528030462e-06, + "loss": 0.3213, + "step": 4249 + }, + { + "epoch": 0.0850786977954608, + "grad_norm": 1.2783695459365845, + "learning_rate": 9.920675021425502e-06, + "loss": 0.37, + "step": 4250 + }, + { + "epoch": 0.08509871631258914, + "grad_norm": 1.163663387298584, + "learning_rate": 9.92061749413508e-06, + "loss": 0.355, + "step": 4251 + }, + { + "epoch": 0.08511873482971749, + "grad_norm": 1.1547659635543823, + "learning_rate": 9.920559946159445e-06, + "loss": 0.284, + "step": 4252 + }, + { + "epoch": 0.08513875334684583, + "grad_norm": 1.0805667638778687, + "learning_rate": 9.920502377498834e-06, + "loss": 0.3329, + "step": 4253 + }, + { + "epoch": 0.08515877186397418, + "grad_norm": 1.1738667488098145, + "learning_rate": 9.92044478815349e-06, + "loss": 0.3662, + "step": 4254 + }, + { + "epoch": 0.08517879038110251, + "grad_norm": 1.140300989151001, + "learning_rate": 9.920387178123653e-06, + "loss": 0.3603, + "step": 4255 + }, + { + "epoch": 0.08519880889823087, + "grad_norm": 1.1628905534744263, + "learning_rate": 9.92032954740957e-06, + "loss": 0.2892, + "step": 4256 + }, + { + "epoch": 0.0852188274153592, + "grad_norm": 1.1246602535247803, + "learning_rate": 9.92027189601148e-06, + "loss": 0.3104, + "step": 4257 + }, + { + "epoch": 0.08523884593248755, + "grad_norm": 1.0347059965133667, + "learning_rate": 9.920214223929625e-06, + "loss": 0.3083, + "step": 4258 + }, + { + "epoch": 0.08525886444961589, + "grad_norm": 1.0355284214019775, + "learning_rate": 9.920156531164248e-06, + "loss": 0.3508, + "step": 4259 + }, + { + "epoch": 0.08527888296674424, + "grad_norm": 1.106124758720398, + "learning_rate": 9.920098817715592e-06, + "loss": 0.3109, + "step": 4260 + }, + { + "epoch": 0.08529890148387258, + "grad_norm": 1.996665120124817, + "learning_rate": 9.920041083583899e-06, + "loss": 0.9586, + "step": 4261 + }, + { + "epoch": 0.08531892000100093, + "grad_norm": 1.113111138343811, + "learning_rate": 9.919983328769413e-06, + "loss": 0.3156, + "step": 4262 + }, + { + "epoch": 0.08533893851812926, + "grad_norm": 1.1731376647949219, + "learning_rate": 9.919925553272377e-06, + "loss": 0.3326, + "step": 4263 + }, + { + "epoch": 0.08535895703525762, + "grad_norm": 1.1055896282196045, + "learning_rate": 9.919867757093028e-06, + "loss": 0.3159, + "step": 4264 + }, + { + "epoch": 0.08537897555238595, + "grad_norm": 0.9942158460617065, + "learning_rate": 9.919809940231619e-06, + "loss": 0.3027, + "step": 4265 + }, + { + "epoch": 0.0853989940695143, + "grad_norm": 1.138849139213562, + "learning_rate": 9.919752102688384e-06, + "loss": 0.327, + "step": 4266 + }, + { + "epoch": 0.08541901258664264, + "grad_norm": 1.0521060228347778, + "learning_rate": 9.919694244463573e-06, + "loss": 0.3247, + "step": 4267 + }, + { + "epoch": 0.08543903110377099, + "grad_norm": 1.0174131393432617, + "learning_rate": 9.919636365557423e-06, + "loss": 0.3387, + "step": 4268 + }, + { + "epoch": 0.08545904962089933, + "grad_norm": 1.1843243837356567, + "learning_rate": 9.919578465970184e-06, + "loss": 0.3063, + "step": 4269 + }, + { + "epoch": 0.08547906813802768, + "grad_norm": 1.0007866621017456, + "learning_rate": 9.919520545702093e-06, + "loss": 0.3135, + "step": 4270 + }, + { + "epoch": 0.08549908665515601, + "grad_norm": 1.2089602947235107, + "learning_rate": 9.919462604753398e-06, + "loss": 0.3821, + "step": 4271 + }, + { + "epoch": 0.08551910517228437, + "grad_norm": 1.1350117921829224, + "learning_rate": 9.919404643124339e-06, + "loss": 0.3279, + "step": 4272 + }, + { + "epoch": 0.0855391236894127, + "grad_norm": 1.1100870370864868, + "learning_rate": 9.919346660815164e-06, + "loss": 0.384, + "step": 4273 + }, + { + "epoch": 0.08555914220654105, + "grad_norm": 1.077157735824585, + "learning_rate": 9.919288657826113e-06, + "loss": 0.2913, + "step": 4274 + }, + { + "epoch": 0.08557916072366939, + "grad_norm": 1.1308165788650513, + "learning_rate": 9.91923063415743e-06, + "loss": 0.309, + "step": 4275 + }, + { + "epoch": 0.08559917924079774, + "grad_norm": 1.1375426054000854, + "learning_rate": 9.919172589809363e-06, + "loss": 0.29, + "step": 4276 + }, + { + "epoch": 0.08561919775792608, + "grad_norm": 1.1520205736160278, + "learning_rate": 9.91911452478215e-06, + "loss": 0.3204, + "step": 4277 + }, + { + "epoch": 0.08563921627505443, + "grad_norm": 1.1499239206314087, + "learning_rate": 9.91905643907604e-06, + "loss": 0.345, + "step": 4278 + }, + { + "epoch": 0.08565923479218276, + "grad_norm": 1.0572859048843384, + "learning_rate": 9.918998332691274e-06, + "loss": 0.3092, + "step": 4279 + }, + { + "epoch": 0.08567925330931112, + "grad_norm": 1.5116634368896484, + "learning_rate": 9.918940205628099e-06, + "loss": 0.3608, + "step": 4280 + }, + { + "epoch": 0.08569927182643945, + "grad_norm": 1.1613273620605469, + "learning_rate": 9.918882057886757e-06, + "loss": 0.2875, + "step": 4281 + }, + { + "epoch": 0.0857192903435678, + "grad_norm": 1.1211880445480347, + "learning_rate": 9.918823889467494e-06, + "loss": 0.3395, + "step": 4282 + }, + { + "epoch": 0.08573930886069614, + "grad_norm": 1.075716495513916, + "learning_rate": 9.918765700370554e-06, + "loss": 0.3141, + "step": 4283 + }, + { + "epoch": 0.08575932737782449, + "grad_norm": 1.8341091871261597, + "learning_rate": 9.91870749059618e-06, + "loss": 0.8163, + "step": 4284 + }, + { + "epoch": 0.08577934589495283, + "grad_norm": 1.3195271492004395, + "learning_rate": 9.918649260144619e-06, + "loss": 0.314, + "step": 4285 + }, + { + "epoch": 0.08579936441208118, + "grad_norm": 1.1296862363815308, + "learning_rate": 9.918591009016116e-06, + "loss": 0.331, + "step": 4286 + }, + { + "epoch": 0.08581938292920951, + "grad_norm": 1.2433534860610962, + "learning_rate": 9.918532737210913e-06, + "loss": 0.3154, + "step": 4287 + }, + { + "epoch": 0.08583940144633787, + "grad_norm": 1.0710750818252563, + "learning_rate": 9.918474444729259e-06, + "loss": 0.2894, + "step": 4288 + }, + { + "epoch": 0.0858594199634662, + "grad_norm": 1.7890875339508057, + "learning_rate": 9.918416131571395e-06, + "loss": 0.7916, + "step": 4289 + }, + { + "epoch": 0.08587943848059455, + "grad_norm": 1.0778192281723022, + "learning_rate": 9.91835779773757e-06, + "loss": 0.3424, + "step": 4290 + }, + { + "epoch": 0.08589945699772289, + "grad_norm": 1.0874780416488647, + "learning_rate": 9.918299443228027e-06, + "loss": 0.2884, + "step": 4291 + }, + { + "epoch": 0.08591947551485124, + "grad_norm": 1.0825140476226807, + "learning_rate": 9.91824106804301e-06, + "loss": 0.3649, + "step": 4292 + }, + { + "epoch": 0.08593949403197958, + "grad_norm": 1.1816365718841553, + "learning_rate": 9.918182672182767e-06, + "loss": 0.3283, + "step": 4293 + }, + { + "epoch": 0.08595951254910793, + "grad_norm": 1.0873218774795532, + "learning_rate": 9.918124255647542e-06, + "loss": 0.3882, + "step": 4294 + }, + { + "epoch": 0.08597953106623626, + "grad_norm": 1.074283480644226, + "learning_rate": 9.918065818437581e-06, + "loss": 0.316, + "step": 4295 + }, + { + "epoch": 0.08599954958336462, + "grad_norm": 1.0873156785964966, + "learning_rate": 9.91800736055313e-06, + "loss": 0.3339, + "step": 4296 + }, + { + "epoch": 0.08601956810049295, + "grad_norm": 1.1511449813842773, + "learning_rate": 9.917948881994435e-06, + "loss": 0.3021, + "step": 4297 + }, + { + "epoch": 0.0860395866176213, + "grad_norm": 1.2129093408584595, + "learning_rate": 9.91789038276174e-06, + "loss": 0.2833, + "step": 4298 + }, + { + "epoch": 0.08605960513474964, + "grad_norm": 1.1392320394515991, + "learning_rate": 9.917831862855294e-06, + "loss": 0.3499, + "step": 4299 + }, + { + "epoch": 0.08607962365187799, + "grad_norm": 1.2036124467849731, + "learning_rate": 9.91777332227534e-06, + "loss": 0.3044, + "step": 4300 + }, + { + "epoch": 0.08609964216900633, + "grad_norm": 1.054579734802246, + "learning_rate": 9.917714761022125e-06, + "loss": 0.3729, + "step": 4301 + }, + { + "epoch": 0.08611966068613468, + "grad_norm": 1.2042070627212524, + "learning_rate": 9.917656179095895e-06, + "loss": 0.2746, + "step": 4302 + }, + { + "epoch": 0.08613967920326301, + "grad_norm": 1.9294242858886719, + "learning_rate": 9.917597576496899e-06, + "loss": 0.8927, + "step": 4303 + }, + { + "epoch": 0.08615969772039136, + "grad_norm": 1.049448013305664, + "learning_rate": 9.917538953225378e-06, + "loss": 0.3215, + "step": 4304 + }, + { + "epoch": 0.0861797162375197, + "grad_norm": 1.018904447555542, + "learning_rate": 9.917480309281583e-06, + "loss": 0.3312, + "step": 4305 + }, + { + "epoch": 0.08619973475464805, + "grad_norm": 1.1420189142227173, + "learning_rate": 9.91742164466576e-06, + "loss": 0.349, + "step": 4306 + }, + { + "epoch": 0.08621975327177639, + "grad_norm": 1.1243597269058228, + "learning_rate": 9.917362959378153e-06, + "loss": 0.3197, + "step": 4307 + }, + { + "epoch": 0.08623977178890474, + "grad_norm": 1.0608179569244385, + "learning_rate": 9.917304253419012e-06, + "loss": 0.3388, + "step": 4308 + }, + { + "epoch": 0.08625979030603308, + "grad_norm": 1.0480222702026367, + "learning_rate": 9.917245526788579e-06, + "loss": 0.285, + "step": 4309 + }, + { + "epoch": 0.08627980882316143, + "grad_norm": 1.1205495595932007, + "learning_rate": 9.917186779487107e-06, + "loss": 0.3845, + "step": 4310 + }, + { + "epoch": 0.08629982734028976, + "grad_norm": 1.0099523067474365, + "learning_rate": 9.917128011514839e-06, + "loss": 0.3358, + "step": 4311 + }, + { + "epoch": 0.08631984585741811, + "grad_norm": 1.1008718013763428, + "learning_rate": 9.917069222872023e-06, + "loss": 0.3498, + "step": 4312 + }, + { + "epoch": 0.08633986437454645, + "grad_norm": 1.1492177248001099, + "learning_rate": 9.917010413558908e-06, + "loss": 0.3567, + "step": 4313 + }, + { + "epoch": 0.0863598828916748, + "grad_norm": 1.3107143640518188, + "learning_rate": 9.916951583575735e-06, + "loss": 0.3362, + "step": 4314 + }, + { + "epoch": 0.08637990140880314, + "grad_norm": 1.1333361864089966, + "learning_rate": 9.916892732922759e-06, + "loss": 0.3256, + "step": 4315 + }, + { + "epoch": 0.08639991992593149, + "grad_norm": 1.7057044506072998, + "learning_rate": 9.916833861600223e-06, + "loss": 0.8643, + "step": 4316 + }, + { + "epoch": 0.08641993844305983, + "grad_norm": 0.9891005158424377, + "learning_rate": 9.916774969608378e-06, + "loss": 0.3594, + "step": 4317 + }, + { + "epoch": 0.08643995696018818, + "grad_norm": 1.2710455656051636, + "learning_rate": 9.916716056947467e-06, + "loss": 0.3901, + "step": 4318 + }, + { + "epoch": 0.08645997547731651, + "grad_norm": 1.1593756675720215, + "learning_rate": 9.916657123617738e-06, + "loss": 0.331, + "step": 4319 + }, + { + "epoch": 0.08647999399444486, + "grad_norm": 1.0413247346878052, + "learning_rate": 9.916598169619444e-06, + "loss": 0.3178, + "step": 4320 + }, + { + "epoch": 0.0865000125115732, + "grad_norm": 1.1062769889831543, + "learning_rate": 9.916539194952828e-06, + "loss": 0.3638, + "step": 4321 + }, + { + "epoch": 0.08652003102870155, + "grad_norm": 1.9335839748382568, + "learning_rate": 9.916480199618138e-06, + "loss": 0.8281, + "step": 4322 + }, + { + "epoch": 0.08654004954582989, + "grad_norm": 1.2002092599868774, + "learning_rate": 9.916421183615626e-06, + "loss": 0.3536, + "step": 4323 + }, + { + "epoch": 0.08656006806295824, + "grad_norm": 1.2753268480300903, + "learning_rate": 9.916362146945535e-06, + "loss": 0.3597, + "step": 4324 + }, + { + "epoch": 0.08658008658008658, + "grad_norm": 1.0168468952178955, + "learning_rate": 9.916303089608115e-06, + "loss": 0.2782, + "step": 4325 + }, + { + "epoch": 0.08660010509721493, + "grad_norm": 1.8442221879959106, + "learning_rate": 9.916244011603617e-06, + "loss": 0.9682, + "step": 4326 + }, + { + "epoch": 0.08662012361434326, + "grad_norm": 1.1290582418441772, + "learning_rate": 9.916184912932285e-06, + "loss": 0.3283, + "step": 4327 + }, + { + "epoch": 0.08664014213147161, + "grad_norm": 1.0773190259933472, + "learning_rate": 9.91612579359437e-06, + "loss": 0.3793, + "step": 4328 + }, + { + "epoch": 0.08666016064859995, + "grad_norm": 1.1712124347686768, + "learning_rate": 9.916066653590123e-06, + "loss": 0.3331, + "step": 4329 + }, + { + "epoch": 0.0866801791657283, + "grad_norm": 1.3038796186447144, + "learning_rate": 9.916007492919787e-06, + "loss": 0.3299, + "step": 4330 + }, + { + "epoch": 0.08670019768285664, + "grad_norm": 1.0918993949890137, + "learning_rate": 9.915948311583615e-06, + "loss": 0.3469, + "step": 4331 + }, + { + "epoch": 0.08672021619998499, + "grad_norm": 1.0864537954330444, + "learning_rate": 9.915889109581853e-06, + "loss": 0.3668, + "step": 4332 + }, + { + "epoch": 0.08674023471711333, + "grad_norm": 1.114355206489563, + "learning_rate": 9.915829886914752e-06, + "loss": 0.3526, + "step": 4333 + }, + { + "epoch": 0.08676025323424168, + "grad_norm": 1.1825306415557861, + "learning_rate": 9.915770643582559e-06, + "loss": 0.3125, + "step": 4334 + }, + { + "epoch": 0.08678027175137001, + "grad_norm": 1.199136734008789, + "learning_rate": 9.915711379585524e-06, + "loss": 0.3597, + "step": 4335 + }, + { + "epoch": 0.08680029026849836, + "grad_norm": 1.2278403043746948, + "learning_rate": 9.915652094923898e-06, + "loss": 0.3475, + "step": 4336 + }, + { + "epoch": 0.0868203087856267, + "grad_norm": 1.1971405744552612, + "learning_rate": 9.915592789597927e-06, + "loss": 0.3526, + "step": 4337 + }, + { + "epoch": 0.08684032730275505, + "grad_norm": 1.8599355220794678, + "learning_rate": 9.915533463607861e-06, + "loss": 0.8942, + "step": 4338 + }, + { + "epoch": 0.08686034581988339, + "grad_norm": 1.0959563255310059, + "learning_rate": 9.915474116953951e-06, + "loss": 0.324, + "step": 4339 + }, + { + "epoch": 0.08688036433701174, + "grad_norm": 1.8590853214263916, + "learning_rate": 9.915414749636447e-06, + "loss": 0.9082, + "step": 4340 + }, + { + "epoch": 0.08690038285414008, + "grad_norm": 1.0779131650924683, + "learning_rate": 9.915355361655596e-06, + "loss": 0.3545, + "step": 4341 + }, + { + "epoch": 0.08692040137126843, + "grad_norm": 0.999178946018219, + "learning_rate": 9.915295953011649e-06, + "loss": 0.3101, + "step": 4342 + }, + { + "epoch": 0.08694041988839676, + "grad_norm": 1.0471551418304443, + "learning_rate": 9.915236523704855e-06, + "loss": 0.3013, + "step": 4343 + }, + { + "epoch": 0.08696043840552511, + "grad_norm": 1.1145210266113281, + "learning_rate": 9.915177073735465e-06, + "loss": 0.2883, + "step": 4344 + }, + { + "epoch": 0.08698045692265345, + "grad_norm": 1.0908440351486206, + "learning_rate": 9.915117603103727e-06, + "loss": 0.3315, + "step": 4345 + }, + { + "epoch": 0.0870004754397818, + "grad_norm": 1.9246352910995483, + "learning_rate": 9.915058111809895e-06, + "loss": 0.9475, + "step": 4346 + }, + { + "epoch": 0.08702049395691014, + "grad_norm": 1.088944673538208, + "learning_rate": 9.914998599854215e-06, + "loss": 0.3515, + "step": 4347 + }, + { + "epoch": 0.08704051247403849, + "grad_norm": 1.1828076839447021, + "learning_rate": 9.914939067236939e-06, + "loss": 0.3969, + "step": 4348 + }, + { + "epoch": 0.08706053099116683, + "grad_norm": 1.0638176202774048, + "learning_rate": 9.914879513958315e-06, + "loss": 0.3286, + "step": 4349 + }, + { + "epoch": 0.08708054950829518, + "grad_norm": 1.1528265476226807, + "learning_rate": 9.914819940018599e-06, + "loss": 0.2994, + "step": 4350 + }, + { + "epoch": 0.08710056802542351, + "grad_norm": 1.1888515949249268, + "learning_rate": 9.914760345418034e-06, + "loss": 0.2849, + "step": 4351 + }, + { + "epoch": 0.08712058654255186, + "grad_norm": 1.0600512027740479, + "learning_rate": 9.914700730156876e-06, + "loss": 0.3209, + "step": 4352 + }, + { + "epoch": 0.0871406050596802, + "grad_norm": 1.2092409133911133, + "learning_rate": 9.914641094235374e-06, + "loss": 0.3404, + "step": 4353 + }, + { + "epoch": 0.08716062357680855, + "grad_norm": 1.0201025009155273, + "learning_rate": 9.914581437653777e-06, + "loss": 0.3049, + "step": 4354 + }, + { + "epoch": 0.08718064209393689, + "grad_norm": 1.1188523769378662, + "learning_rate": 9.914521760412338e-06, + "loss": 0.3411, + "step": 4355 + }, + { + "epoch": 0.08720066061106524, + "grad_norm": 1.1077110767364502, + "learning_rate": 9.914462062511307e-06, + "loss": 0.3101, + "step": 4356 + }, + { + "epoch": 0.08722067912819358, + "grad_norm": 1.1466349363327026, + "learning_rate": 9.914402343950935e-06, + "loss": 0.2855, + "step": 4357 + }, + { + "epoch": 0.08724069764532193, + "grad_norm": 1.0857799053192139, + "learning_rate": 9.914342604731474e-06, + "loss": 0.3139, + "step": 4358 + }, + { + "epoch": 0.08726071616245026, + "grad_norm": 1.0626784563064575, + "learning_rate": 9.914282844853174e-06, + "loss": 0.3181, + "step": 4359 + }, + { + "epoch": 0.08728073467957861, + "grad_norm": 1.0887314081192017, + "learning_rate": 9.914223064316286e-06, + "loss": 0.3785, + "step": 4360 + }, + { + "epoch": 0.08730075319670695, + "grad_norm": 1.0420037508010864, + "learning_rate": 9.914163263121062e-06, + "loss": 0.3024, + "step": 4361 + }, + { + "epoch": 0.0873207717138353, + "grad_norm": 1.287888526916504, + "learning_rate": 9.914103441267754e-06, + "loss": 0.343, + "step": 4362 + }, + { + "epoch": 0.08734079023096364, + "grad_norm": 1.7384189367294312, + "learning_rate": 9.91404359875661e-06, + "loss": 0.8419, + "step": 4363 + }, + { + "epoch": 0.08736080874809199, + "grad_norm": 1.0954792499542236, + "learning_rate": 9.913983735587886e-06, + "loss": 0.3337, + "step": 4364 + }, + { + "epoch": 0.08738082726522033, + "grad_norm": 1.2322510480880737, + "learning_rate": 9.913923851761834e-06, + "loss": 0.3139, + "step": 4365 + }, + { + "epoch": 0.08740084578234868, + "grad_norm": 1.0979845523834229, + "learning_rate": 9.9138639472787e-06, + "loss": 0.3889, + "step": 4366 + }, + { + "epoch": 0.08742086429947701, + "grad_norm": 1.1488280296325684, + "learning_rate": 9.913804022138742e-06, + "loss": 0.3488, + "step": 4367 + }, + { + "epoch": 0.08744088281660536, + "grad_norm": 1.091407060623169, + "learning_rate": 9.913744076342207e-06, + "loss": 0.3571, + "step": 4368 + }, + { + "epoch": 0.0874609013337337, + "grad_norm": 1.1210383176803589, + "learning_rate": 9.913684109889353e-06, + "loss": 0.3117, + "step": 4369 + }, + { + "epoch": 0.08748091985086205, + "grad_norm": 1.1027172803878784, + "learning_rate": 9.913624122780426e-06, + "loss": 0.3184, + "step": 4370 + }, + { + "epoch": 0.08750093836799039, + "grad_norm": 1.0621306896209717, + "learning_rate": 9.91356411501568e-06, + "loss": 0.3693, + "step": 4371 + }, + { + "epoch": 0.08752095688511874, + "grad_norm": 1.0625238418579102, + "learning_rate": 9.913504086595368e-06, + "loss": 0.3362, + "step": 4372 + }, + { + "epoch": 0.08754097540224708, + "grad_norm": 1.7191768884658813, + "learning_rate": 9.913444037519742e-06, + "loss": 0.8197, + "step": 4373 + }, + { + "epoch": 0.08756099391937543, + "grad_norm": 1.2970112562179565, + "learning_rate": 9.913383967789057e-06, + "loss": 0.3384, + "step": 4374 + }, + { + "epoch": 0.08758101243650376, + "grad_norm": 0.9694825410842896, + "learning_rate": 9.91332387740356e-06, + "loss": 0.3713, + "step": 4375 + }, + { + "epoch": 0.08760103095363211, + "grad_norm": 1.1142252683639526, + "learning_rate": 9.91326376636351e-06, + "loss": 0.3626, + "step": 4376 + }, + { + "epoch": 0.08762104947076045, + "grad_norm": 1.0666464567184448, + "learning_rate": 9.913203634669155e-06, + "loss": 0.3332, + "step": 4377 + }, + { + "epoch": 0.0876410679878888, + "grad_norm": 0.9891493320465088, + "learning_rate": 9.913143482320748e-06, + "loss": 0.2777, + "step": 4378 + }, + { + "epoch": 0.08766108650501714, + "grad_norm": 1.1222069263458252, + "learning_rate": 9.913083309318545e-06, + "loss": 0.3564, + "step": 4379 + }, + { + "epoch": 0.08768110502214549, + "grad_norm": 1.2851475477218628, + "learning_rate": 9.913023115662797e-06, + "loss": 0.323, + "step": 4380 + }, + { + "epoch": 0.08770112353927383, + "grad_norm": 1.0863291025161743, + "learning_rate": 9.912962901353755e-06, + "loss": 0.3214, + "step": 4381 + }, + { + "epoch": 0.08772114205640218, + "grad_norm": 1.0899766683578491, + "learning_rate": 9.912902666391675e-06, + "loss": 0.3148, + "step": 4382 + }, + { + "epoch": 0.08774116057353051, + "grad_norm": 1.0170652866363525, + "learning_rate": 9.912842410776811e-06, + "loss": 0.3158, + "step": 4383 + }, + { + "epoch": 0.08776117909065886, + "grad_norm": 1.171999454498291, + "learning_rate": 9.912782134509415e-06, + "loss": 0.2936, + "step": 4384 + }, + { + "epoch": 0.0877811976077872, + "grad_norm": 1.6778985261917114, + "learning_rate": 9.912721837589738e-06, + "loss": 0.9114, + "step": 4385 + }, + { + "epoch": 0.08780121612491555, + "grad_norm": 1.1175833940505981, + "learning_rate": 9.912661520018037e-06, + "loss": 0.2978, + "step": 4386 + }, + { + "epoch": 0.08782123464204389, + "grad_norm": 1.1399961709976196, + "learning_rate": 9.912601181794563e-06, + "loss": 0.3326, + "step": 4387 + }, + { + "epoch": 0.08784125315917224, + "grad_norm": 1.1901938915252686, + "learning_rate": 9.912540822919572e-06, + "loss": 0.3347, + "step": 4388 + }, + { + "epoch": 0.08786127167630058, + "grad_norm": 1.0915675163269043, + "learning_rate": 9.912480443393317e-06, + "loss": 0.3374, + "step": 4389 + }, + { + "epoch": 0.08788129019342893, + "grad_norm": 1.1647111177444458, + "learning_rate": 9.912420043216051e-06, + "loss": 0.3433, + "step": 4390 + }, + { + "epoch": 0.08790130871055726, + "grad_norm": 1.192793607711792, + "learning_rate": 9.912359622388029e-06, + "loss": 0.3197, + "step": 4391 + }, + { + "epoch": 0.08792132722768561, + "grad_norm": 1.008718729019165, + "learning_rate": 9.9122991809095e-06, + "loss": 0.2881, + "step": 4392 + }, + { + "epoch": 0.08794134574481395, + "grad_norm": 1.1177843809127808, + "learning_rate": 9.912238718780728e-06, + "loss": 0.3228, + "step": 4393 + }, + { + "epoch": 0.0879613642619423, + "grad_norm": 1.2060991525650024, + "learning_rate": 9.91217823600196e-06, + "loss": 0.3141, + "step": 4394 + }, + { + "epoch": 0.08798138277907064, + "grad_norm": 1.0577843189239502, + "learning_rate": 9.91211773257345e-06, + "loss": 0.351, + "step": 4395 + }, + { + "epoch": 0.08800140129619899, + "grad_norm": 1.2143586874008179, + "learning_rate": 9.912057208495456e-06, + "loss": 0.3187, + "step": 4396 + }, + { + "epoch": 0.08802141981332733, + "grad_norm": 1.866955041885376, + "learning_rate": 9.911996663768228e-06, + "loss": 0.8502, + "step": 4397 + }, + { + "epoch": 0.08804143833045568, + "grad_norm": 1.103511095046997, + "learning_rate": 9.911936098392026e-06, + "loss": 0.3337, + "step": 4398 + }, + { + "epoch": 0.08806145684758401, + "grad_norm": 1.1024733781814575, + "learning_rate": 9.9118755123671e-06, + "loss": 0.3039, + "step": 4399 + }, + { + "epoch": 0.08808147536471236, + "grad_norm": 1.1512324810028076, + "learning_rate": 9.911814905693708e-06, + "loss": 0.3723, + "step": 4400 + }, + { + "epoch": 0.0881014938818407, + "grad_norm": 1.1983509063720703, + "learning_rate": 9.9117542783721e-06, + "loss": 0.3439, + "step": 4401 + }, + { + "epoch": 0.08812151239896905, + "grad_norm": 1.19255530834198, + "learning_rate": 9.911693630402537e-06, + "loss": 0.3282, + "step": 4402 + }, + { + "epoch": 0.08814153091609739, + "grad_norm": 1.9178898334503174, + "learning_rate": 9.911632961785272e-06, + "loss": 0.8255, + "step": 4403 + }, + { + "epoch": 0.08816154943322574, + "grad_norm": 1.130454421043396, + "learning_rate": 9.911572272520555e-06, + "loss": 0.3168, + "step": 4404 + }, + { + "epoch": 0.08818156795035408, + "grad_norm": 1.0439140796661377, + "learning_rate": 9.911511562608648e-06, + "loss": 0.3214, + "step": 4405 + }, + { + "epoch": 0.08820158646748243, + "grad_norm": 1.2040311098098755, + "learning_rate": 9.911450832049803e-06, + "loss": 0.3572, + "step": 4406 + }, + { + "epoch": 0.08822160498461076, + "grad_norm": 1.1363927125930786, + "learning_rate": 9.911390080844276e-06, + "loss": 0.3193, + "step": 4407 + }, + { + "epoch": 0.08824162350173911, + "grad_norm": 1.0115723609924316, + "learning_rate": 9.91132930899232e-06, + "loss": 0.3299, + "step": 4408 + }, + { + "epoch": 0.08826164201886745, + "grad_norm": 2.0066215991973877, + "learning_rate": 9.911268516494195e-06, + "loss": 0.7924, + "step": 4409 + }, + { + "epoch": 0.0882816605359958, + "grad_norm": 1.1597548723220825, + "learning_rate": 9.911207703350152e-06, + "loss": 0.3448, + "step": 4410 + }, + { + "epoch": 0.08830167905312414, + "grad_norm": 1.1499748229980469, + "learning_rate": 9.91114686956045e-06, + "loss": 0.3558, + "step": 4411 + }, + { + "epoch": 0.08832169757025249, + "grad_norm": 1.1106935739517212, + "learning_rate": 9.911086015125344e-06, + "loss": 0.3776, + "step": 4412 + }, + { + "epoch": 0.08834171608738083, + "grad_norm": 1.1495697498321533, + "learning_rate": 9.911025140045087e-06, + "loss": 0.3442, + "step": 4413 + }, + { + "epoch": 0.08836173460450918, + "grad_norm": 1.0918656587600708, + "learning_rate": 9.910964244319939e-06, + "loss": 0.3447, + "step": 4414 + }, + { + "epoch": 0.08838175312163751, + "grad_norm": 1.0418100357055664, + "learning_rate": 9.910903327950154e-06, + "loss": 0.3283, + "step": 4415 + }, + { + "epoch": 0.08840177163876586, + "grad_norm": 1.200629472732544, + "learning_rate": 9.910842390935988e-06, + "loss": 0.3422, + "step": 4416 + }, + { + "epoch": 0.0884217901558942, + "grad_norm": 1.207767128944397, + "learning_rate": 9.910781433277698e-06, + "loss": 0.329, + "step": 4417 + }, + { + "epoch": 0.08844180867302255, + "grad_norm": 1.194298267364502, + "learning_rate": 9.91072045497554e-06, + "loss": 0.297, + "step": 4418 + }, + { + "epoch": 0.08846182719015089, + "grad_norm": 1.0318443775177002, + "learning_rate": 9.910659456029768e-06, + "loss": 0.3422, + "step": 4419 + }, + { + "epoch": 0.08848184570727924, + "grad_norm": 1.0293086767196655, + "learning_rate": 9.910598436440642e-06, + "loss": 0.3178, + "step": 4420 + }, + { + "epoch": 0.08850186422440758, + "grad_norm": 1.2282847166061401, + "learning_rate": 9.910537396208417e-06, + "loss": 0.3277, + "step": 4421 + }, + { + "epoch": 0.08852188274153593, + "grad_norm": 1.0421355962753296, + "learning_rate": 9.910476335333348e-06, + "loss": 0.2973, + "step": 4422 + }, + { + "epoch": 0.08854190125866426, + "grad_norm": 1.1392991542816162, + "learning_rate": 9.910415253815695e-06, + "loss": 0.3265, + "step": 4423 + }, + { + "epoch": 0.08856191977579261, + "grad_norm": 1.0859341621398926, + "learning_rate": 9.910354151655713e-06, + "loss": 0.2894, + "step": 4424 + }, + { + "epoch": 0.08858193829292095, + "grad_norm": 1.1299556493759155, + "learning_rate": 9.910293028853657e-06, + "loss": 0.3426, + "step": 4425 + }, + { + "epoch": 0.0886019568100493, + "grad_norm": 1.8021178245544434, + "learning_rate": 9.910231885409786e-06, + "loss": 0.8087, + "step": 4426 + }, + { + "epoch": 0.08862197532717764, + "grad_norm": 1.0157936811447144, + "learning_rate": 9.910170721324357e-06, + "loss": 0.3292, + "step": 4427 + }, + { + "epoch": 0.08864199384430599, + "grad_norm": 1.0689380168914795, + "learning_rate": 9.91010953659763e-06, + "loss": 0.3489, + "step": 4428 + }, + { + "epoch": 0.08866201236143433, + "grad_norm": 1.0685222148895264, + "learning_rate": 9.910048331229855e-06, + "loss": 0.3592, + "step": 4429 + }, + { + "epoch": 0.08868203087856268, + "grad_norm": 1.2605525255203247, + "learning_rate": 9.909987105221297e-06, + "loss": 0.3625, + "step": 4430 + }, + { + "epoch": 0.08870204939569101, + "grad_norm": 1.1535818576812744, + "learning_rate": 9.909925858572207e-06, + "loss": 0.3443, + "step": 4431 + }, + { + "epoch": 0.08872206791281936, + "grad_norm": 1.1000970602035522, + "learning_rate": 9.909864591282848e-06, + "loss": 0.3631, + "step": 4432 + }, + { + "epoch": 0.0887420864299477, + "grad_norm": 1.1233367919921875, + "learning_rate": 9.909803303353474e-06, + "loss": 0.3358, + "step": 4433 + }, + { + "epoch": 0.08876210494707605, + "grad_norm": 1.1066864728927612, + "learning_rate": 9.909741994784341e-06, + "loss": 0.3357, + "step": 4434 + }, + { + "epoch": 0.08878212346420439, + "grad_norm": 1.1120336055755615, + "learning_rate": 9.909680665575711e-06, + "loss": 0.3133, + "step": 4435 + }, + { + "epoch": 0.08880214198133274, + "grad_norm": 1.1097193956375122, + "learning_rate": 9.90961931572784e-06, + "loss": 0.3267, + "step": 4436 + }, + { + "epoch": 0.08882216049846108, + "grad_norm": 1.0546767711639404, + "learning_rate": 9.909557945240988e-06, + "loss": 0.3177, + "step": 4437 + }, + { + "epoch": 0.08884217901558943, + "grad_norm": 1.875500202178955, + "learning_rate": 9.909496554115408e-06, + "loss": 0.8003, + "step": 4438 + }, + { + "epoch": 0.08886219753271776, + "grad_norm": 1.1843831539154053, + "learning_rate": 9.909435142351362e-06, + "loss": 0.2691, + "step": 4439 + }, + { + "epoch": 0.08888221604984611, + "grad_norm": 1.1200350522994995, + "learning_rate": 9.909373709949108e-06, + "loss": 0.3651, + "step": 4440 + }, + { + "epoch": 0.08890223456697445, + "grad_norm": 1.103477120399475, + "learning_rate": 9.909312256908902e-06, + "loss": 0.3573, + "step": 4441 + }, + { + "epoch": 0.0889222530841028, + "grad_norm": 1.0771708488464355, + "learning_rate": 9.909250783231005e-06, + "loss": 0.3435, + "step": 4442 + }, + { + "epoch": 0.08894227160123114, + "grad_norm": 1.0658185482025146, + "learning_rate": 9.909189288915676e-06, + "loss": 0.3481, + "step": 4443 + }, + { + "epoch": 0.08896229011835949, + "grad_norm": 1.775486707687378, + "learning_rate": 9.909127773963167e-06, + "loss": 0.8722, + "step": 4444 + }, + { + "epoch": 0.08898230863548783, + "grad_norm": 1.2739814519882202, + "learning_rate": 9.909066238373745e-06, + "loss": 0.3413, + "step": 4445 + }, + { + "epoch": 0.08900232715261618, + "grad_norm": 1.8261008262634277, + "learning_rate": 9.909004682147663e-06, + "loss": 0.8378, + "step": 4446 + }, + { + "epoch": 0.08902234566974451, + "grad_norm": 1.078903079032898, + "learning_rate": 9.908943105285183e-06, + "loss": 0.3441, + "step": 4447 + }, + { + "epoch": 0.08904236418687286, + "grad_norm": 1.2222471237182617, + "learning_rate": 9.90888150778656e-06, + "loss": 0.3715, + "step": 4448 + }, + { + "epoch": 0.0890623827040012, + "grad_norm": 1.2712477445602417, + "learning_rate": 9.90881988965206e-06, + "loss": 0.3371, + "step": 4449 + }, + { + "epoch": 0.08908240122112955, + "grad_norm": 1.065148949623108, + "learning_rate": 9.908758250881934e-06, + "loss": 0.3127, + "step": 4450 + }, + { + "epoch": 0.08910241973825789, + "grad_norm": 1.2283669710159302, + "learning_rate": 9.908696591476446e-06, + "loss": 0.3685, + "step": 4451 + }, + { + "epoch": 0.08912243825538624, + "grad_norm": 1.7754262685775757, + "learning_rate": 9.908634911435856e-06, + "loss": 0.8543, + "step": 4452 + }, + { + "epoch": 0.08914245677251458, + "grad_norm": 1.0711501836776733, + "learning_rate": 9.908573210760417e-06, + "loss": 0.3474, + "step": 4453 + }, + { + "epoch": 0.08916247528964293, + "grad_norm": 1.3972952365875244, + "learning_rate": 9.908511489450394e-06, + "loss": 0.3113, + "step": 4454 + }, + { + "epoch": 0.08918249380677126, + "grad_norm": 1.114725112915039, + "learning_rate": 9.908449747506046e-06, + "loss": 0.3491, + "step": 4455 + }, + { + "epoch": 0.08920251232389961, + "grad_norm": 1.0977556705474854, + "learning_rate": 9.90838798492763e-06, + "loss": 0.335, + "step": 4456 + }, + { + "epoch": 0.08922253084102795, + "grad_norm": 1.164865255355835, + "learning_rate": 9.90832620171541e-06, + "loss": 0.2948, + "step": 4457 + }, + { + "epoch": 0.0892425493581563, + "grad_norm": 1.1943424940109253, + "learning_rate": 9.90826439786964e-06, + "loss": 0.3386, + "step": 4458 + }, + { + "epoch": 0.08926256787528464, + "grad_norm": 1.1062726974487305, + "learning_rate": 9.908202573390583e-06, + "loss": 0.3193, + "step": 4459 + }, + { + "epoch": 0.08928258639241299, + "grad_norm": 1.0231980085372925, + "learning_rate": 9.908140728278501e-06, + "loss": 0.2981, + "step": 4460 + }, + { + "epoch": 0.08930260490954132, + "grad_norm": 1.2110004425048828, + "learning_rate": 9.90807886253365e-06, + "loss": 0.3085, + "step": 4461 + }, + { + "epoch": 0.08932262342666968, + "grad_norm": 1.155126929283142, + "learning_rate": 9.908016976156292e-06, + "loss": 0.3156, + "step": 4462 + }, + { + "epoch": 0.08934264194379801, + "grad_norm": 1.0806769132614136, + "learning_rate": 9.907955069146687e-06, + "loss": 0.2793, + "step": 4463 + }, + { + "epoch": 0.08936266046092636, + "grad_norm": 1.3153818845748901, + "learning_rate": 9.907893141505095e-06, + "loss": 0.3221, + "step": 4464 + }, + { + "epoch": 0.0893826789780547, + "grad_norm": 1.174806833267212, + "learning_rate": 9.907831193231776e-06, + "loss": 0.3415, + "step": 4465 + }, + { + "epoch": 0.08940269749518305, + "grad_norm": 1.9755046367645264, + "learning_rate": 9.907769224326992e-06, + "loss": 0.8279, + "step": 4466 + }, + { + "epoch": 0.08942271601231139, + "grad_norm": 1.4113526344299316, + "learning_rate": 9.907707234791002e-06, + "loss": 0.3615, + "step": 4467 + }, + { + "epoch": 0.08944273452943974, + "grad_norm": 1.2957814931869507, + "learning_rate": 9.907645224624065e-06, + "loss": 0.3287, + "step": 4468 + }, + { + "epoch": 0.08946275304656807, + "grad_norm": 1.1288410425186157, + "learning_rate": 9.907583193826446e-06, + "loss": 0.3175, + "step": 4469 + }, + { + "epoch": 0.08948277156369643, + "grad_norm": 1.7788071632385254, + "learning_rate": 9.907521142398402e-06, + "loss": 0.8192, + "step": 4470 + }, + { + "epoch": 0.08950279008082476, + "grad_norm": 1.2286107540130615, + "learning_rate": 9.907459070340195e-06, + "loss": 0.3134, + "step": 4471 + }, + { + "epoch": 0.08952280859795311, + "grad_norm": 1.1280518770217896, + "learning_rate": 9.907396977652087e-06, + "loss": 0.3131, + "step": 4472 + }, + { + "epoch": 0.08954282711508145, + "grad_norm": 1.0890430212020874, + "learning_rate": 9.90733486433434e-06, + "loss": 0.3294, + "step": 4473 + }, + { + "epoch": 0.0895628456322098, + "grad_norm": 0.9777441620826721, + "learning_rate": 9.90727273038721e-06, + "loss": 0.2954, + "step": 4474 + }, + { + "epoch": 0.08958286414933814, + "grad_norm": 1.242693543434143, + "learning_rate": 9.907210575810963e-06, + "loss": 0.3085, + "step": 4475 + }, + { + "epoch": 0.08960288266646649, + "grad_norm": 1.102349877357483, + "learning_rate": 9.907148400605859e-06, + "loss": 0.3315, + "step": 4476 + }, + { + "epoch": 0.08962290118359482, + "grad_norm": 1.3304697275161743, + "learning_rate": 9.907086204772159e-06, + "loss": 0.3269, + "step": 4477 + }, + { + "epoch": 0.08964291970072318, + "grad_norm": 1.0823067426681519, + "learning_rate": 9.907023988310123e-06, + "loss": 0.3536, + "step": 4478 + }, + { + "epoch": 0.08966293821785151, + "grad_norm": 1.0764245986938477, + "learning_rate": 9.906961751220016e-06, + "loss": 0.3079, + "step": 4479 + }, + { + "epoch": 0.08968295673497986, + "grad_norm": 1.9199515581130981, + "learning_rate": 9.906899493502097e-06, + "loss": 0.8427, + "step": 4480 + }, + { + "epoch": 0.0897029752521082, + "grad_norm": 1.0721784830093384, + "learning_rate": 9.906837215156629e-06, + "loss": 0.3686, + "step": 4481 + }, + { + "epoch": 0.08972299376923655, + "grad_norm": 1.0147932767868042, + "learning_rate": 9.906774916183872e-06, + "loss": 0.3379, + "step": 4482 + }, + { + "epoch": 0.08974301228636489, + "grad_norm": 1.0175271034240723, + "learning_rate": 9.906712596584091e-06, + "loss": 0.3258, + "step": 4483 + }, + { + "epoch": 0.08976303080349324, + "grad_norm": 1.2193679809570312, + "learning_rate": 9.906650256357547e-06, + "loss": 0.3086, + "step": 4484 + }, + { + "epoch": 0.08978304932062157, + "grad_norm": 0.9778397679328918, + "learning_rate": 9.906587895504498e-06, + "loss": 0.2984, + "step": 4485 + }, + { + "epoch": 0.08980306783774993, + "grad_norm": 1.0688260793685913, + "learning_rate": 9.906525514025211e-06, + "loss": 0.3388, + "step": 4486 + }, + { + "epoch": 0.08982308635487826, + "grad_norm": 1.0440651178359985, + "learning_rate": 9.906463111919947e-06, + "loss": 0.3624, + "step": 4487 + }, + { + "epoch": 0.08984310487200661, + "grad_norm": 1.2213373184204102, + "learning_rate": 9.906400689188968e-06, + "loss": 0.3057, + "step": 4488 + }, + { + "epoch": 0.08986312338913495, + "grad_norm": 1.0552055835723877, + "learning_rate": 9.906338245832537e-06, + "loss": 0.2986, + "step": 4489 + }, + { + "epoch": 0.0898831419062633, + "grad_norm": 1.111575722694397, + "learning_rate": 9.906275781850913e-06, + "loss": 0.3463, + "step": 4490 + }, + { + "epoch": 0.08990316042339164, + "grad_norm": 1.0962426662445068, + "learning_rate": 9.906213297244365e-06, + "loss": 0.3588, + "step": 4491 + }, + { + "epoch": 0.08992317894051999, + "grad_norm": 1.1157150268554688, + "learning_rate": 9.90615079201315e-06, + "loss": 0.3092, + "step": 4492 + }, + { + "epoch": 0.08994319745764832, + "grad_norm": 1.0314955711364746, + "learning_rate": 9.906088266157534e-06, + "loss": 0.336, + "step": 4493 + }, + { + "epoch": 0.08996321597477668, + "grad_norm": 1.2346258163452148, + "learning_rate": 9.906025719677777e-06, + "loss": 0.2862, + "step": 4494 + }, + { + "epoch": 0.08998323449190501, + "grad_norm": 1.109327793121338, + "learning_rate": 9.905963152574146e-06, + "loss": 0.3239, + "step": 4495 + }, + { + "epoch": 0.09000325300903336, + "grad_norm": 1.8793240785598755, + "learning_rate": 9.9059005648469e-06, + "loss": 0.869, + "step": 4496 + }, + { + "epoch": 0.0900232715261617, + "grad_norm": 1.0109522342681885, + "learning_rate": 9.905837956496305e-06, + "loss": 0.3398, + "step": 4497 + }, + { + "epoch": 0.09004329004329005, + "grad_norm": 1.1457949876785278, + "learning_rate": 9.905775327522622e-06, + "loss": 0.3475, + "step": 4498 + }, + { + "epoch": 0.09006330856041839, + "grad_norm": 1.1683732271194458, + "learning_rate": 9.905712677926116e-06, + "loss": 0.3142, + "step": 4499 + }, + { + "epoch": 0.09008332707754674, + "grad_norm": 1.1059399843215942, + "learning_rate": 9.905650007707048e-06, + "loss": 0.3015, + "step": 4500 + }, + { + "epoch": 0.09010334559467507, + "grad_norm": 1.055745005607605, + "learning_rate": 9.905587316865684e-06, + "loss": 0.3477, + "step": 4501 + }, + { + "epoch": 0.09012336411180341, + "grad_norm": 1.9645942449569702, + "learning_rate": 9.905524605402289e-06, + "loss": 0.858, + "step": 4502 + }, + { + "epoch": 0.09014338262893176, + "grad_norm": 1.0364000797271729, + "learning_rate": 9.90546187331712e-06, + "loss": 0.3341, + "step": 4503 + }, + { + "epoch": 0.0901634011460601, + "grad_norm": 1.2310649156570435, + "learning_rate": 9.905399120610448e-06, + "loss": 0.3182, + "step": 4504 + }, + { + "epoch": 0.09018341966318845, + "grad_norm": 1.7061055898666382, + "learning_rate": 9.905336347282533e-06, + "loss": 0.8832, + "step": 4505 + }, + { + "epoch": 0.09020343818031679, + "grad_norm": 1.0372726917266846, + "learning_rate": 9.905273553333639e-06, + "loss": 0.3766, + "step": 4506 + }, + { + "epoch": 0.09022345669744514, + "grad_norm": 1.0978988409042358, + "learning_rate": 9.90521073876403e-06, + "loss": 0.3504, + "step": 4507 + }, + { + "epoch": 0.09024347521457347, + "grad_norm": 1.2374908924102783, + "learning_rate": 9.905147903573973e-06, + "loss": 0.2892, + "step": 4508 + }, + { + "epoch": 0.09026349373170182, + "grad_norm": 1.0967093706130981, + "learning_rate": 9.905085047763727e-06, + "loss": 0.3138, + "step": 4509 + }, + { + "epoch": 0.09028351224883016, + "grad_norm": 1.1000465154647827, + "learning_rate": 9.905022171333562e-06, + "loss": 0.297, + "step": 4510 + }, + { + "epoch": 0.09030353076595851, + "grad_norm": 0.9973841905593872, + "learning_rate": 9.904959274283738e-06, + "loss": 0.3377, + "step": 4511 + }, + { + "epoch": 0.09032354928308685, + "grad_norm": 1.1619874238967896, + "learning_rate": 9.90489635661452e-06, + "loss": 0.316, + "step": 4512 + }, + { + "epoch": 0.0903435678002152, + "grad_norm": 1.1538119316101074, + "learning_rate": 9.904833418326173e-06, + "loss": 0.3411, + "step": 4513 + }, + { + "epoch": 0.09036358631734354, + "grad_norm": 1.951154351234436, + "learning_rate": 9.904770459418965e-06, + "loss": 0.8482, + "step": 4514 + }, + { + "epoch": 0.09038360483447189, + "grad_norm": 1.1655902862548828, + "learning_rate": 9.904707479893155e-06, + "loss": 0.3689, + "step": 4515 + }, + { + "epoch": 0.09040362335160022, + "grad_norm": 1.1958460807800293, + "learning_rate": 9.904644479749011e-06, + "loss": 0.3412, + "step": 4516 + }, + { + "epoch": 0.09042364186872857, + "grad_norm": 1.2247161865234375, + "learning_rate": 9.904581458986795e-06, + "loss": 0.3866, + "step": 4517 + }, + { + "epoch": 0.09044366038585691, + "grad_norm": 2.01920485496521, + "learning_rate": 9.904518417606776e-06, + "loss": 0.9062, + "step": 4518 + }, + { + "epoch": 0.09046367890298526, + "grad_norm": 1.2248637676239014, + "learning_rate": 9.904455355609217e-06, + "loss": 0.343, + "step": 4519 + }, + { + "epoch": 0.0904836974201136, + "grad_norm": 1.1189290285110474, + "learning_rate": 9.904392272994383e-06, + "loss": 0.3572, + "step": 4520 + }, + { + "epoch": 0.09050371593724195, + "grad_norm": 0.9572097063064575, + "learning_rate": 9.904329169762538e-06, + "loss": 0.276, + "step": 4521 + }, + { + "epoch": 0.09052373445437029, + "grad_norm": 1.228082537651062, + "learning_rate": 9.90426604591395e-06, + "loss": 0.3463, + "step": 4522 + }, + { + "epoch": 0.09054375297149864, + "grad_norm": 1.8940426111221313, + "learning_rate": 9.904202901448883e-06, + "loss": 0.8952, + "step": 4523 + }, + { + "epoch": 0.09056377148862697, + "grad_norm": 1.2532976865768433, + "learning_rate": 9.904139736367602e-06, + "loss": 0.3441, + "step": 4524 + }, + { + "epoch": 0.09058379000575532, + "grad_norm": 1.0768765211105347, + "learning_rate": 9.904076550670375e-06, + "loss": 0.3242, + "step": 4525 + }, + { + "epoch": 0.09060380852288366, + "grad_norm": 1.0897853374481201, + "learning_rate": 9.904013344357461e-06, + "loss": 0.3275, + "step": 4526 + }, + { + "epoch": 0.09062382704001201, + "grad_norm": 1.8153637647628784, + "learning_rate": 9.903950117429135e-06, + "loss": 0.8531, + "step": 4527 + }, + { + "epoch": 0.09064384555714035, + "grad_norm": 1.1772209405899048, + "learning_rate": 9.903886869885654e-06, + "loss": 0.3102, + "step": 4528 + }, + { + "epoch": 0.0906638640742687, + "grad_norm": 1.1179651021957397, + "learning_rate": 9.90382360172729e-06, + "loss": 0.3258, + "step": 4529 + }, + { + "epoch": 0.09068388259139704, + "grad_norm": 1.0901089906692505, + "learning_rate": 9.903760312954307e-06, + "loss": 0.3098, + "step": 4530 + }, + { + "epoch": 0.09070390110852539, + "grad_norm": 1.162866473197937, + "learning_rate": 9.90369700356697e-06, + "loss": 0.3714, + "step": 4531 + }, + { + "epoch": 0.09072391962565372, + "grad_norm": 1.2250335216522217, + "learning_rate": 9.903633673565548e-06, + "loss": 0.3627, + "step": 4532 + }, + { + "epoch": 0.09074393814278207, + "grad_norm": 1.0798519849777222, + "learning_rate": 9.903570322950304e-06, + "loss": 0.3174, + "step": 4533 + }, + { + "epoch": 0.09076395665991041, + "grad_norm": 1.075773000717163, + "learning_rate": 9.903506951721506e-06, + "loss": 0.3259, + "step": 4534 + }, + { + "epoch": 0.09078397517703876, + "grad_norm": 1.178781270980835, + "learning_rate": 9.90344355987942e-06, + "loss": 0.2979, + "step": 4535 + }, + { + "epoch": 0.0908039936941671, + "grad_norm": 1.099730134010315, + "learning_rate": 9.90338014742431e-06, + "loss": 0.3212, + "step": 4536 + }, + { + "epoch": 0.09082401221129545, + "grad_norm": 1.0759941339492798, + "learning_rate": 9.903316714356448e-06, + "loss": 0.3141, + "step": 4537 + }, + { + "epoch": 0.09084403072842379, + "grad_norm": 1.060404896736145, + "learning_rate": 9.903253260676097e-06, + "loss": 0.3228, + "step": 4538 + }, + { + "epoch": 0.09086404924555214, + "grad_norm": 1.0253387689590454, + "learning_rate": 9.903189786383525e-06, + "loss": 0.3459, + "step": 4539 + }, + { + "epoch": 0.09088406776268047, + "grad_norm": 1.0938268899917603, + "learning_rate": 9.903126291478999e-06, + "loss": 0.2958, + "step": 4540 + }, + { + "epoch": 0.09090408627980882, + "grad_norm": 1.0275803804397583, + "learning_rate": 9.903062775962785e-06, + "loss": 0.3465, + "step": 4541 + }, + { + "epoch": 0.09092410479693716, + "grad_norm": 1.1823298931121826, + "learning_rate": 9.902999239835147e-06, + "loss": 0.3139, + "step": 4542 + }, + { + "epoch": 0.09094412331406551, + "grad_norm": 1.2088675498962402, + "learning_rate": 9.902935683096358e-06, + "loss": 0.3556, + "step": 4543 + }, + { + "epoch": 0.09096414183119385, + "grad_norm": 1.1428550481796265, + "learning_rate": 9.902872105746683e-06, + "loss": 0.3228, + "step": 4544 + }, + { + "epoch": 0.0909841603483222, + "grad_norm": 1.0195096731185913, + "learning_rate": 9.902808507786387e-06, + "loss": 0.3649, + "step": 4545 + }, + { + "epoch": 0.09100417886545054, + "grad_norm": 1.1416423320770264, + "learning_rate": 9.90274488921574e-06, + "loss": 0.3579, + "step": 4546 + }, + { + "epoch": 0.09102419738257889, + "grad_norm": 1.9804986715316772, + "learning_rate": 9.90268125003501e-06, + "loss": 0.8648, + "step": 4547 + }, + { + "epoch": 0.09104421589970722, + "grad_norm": 1.378353238105774, + "learning_rate": 9.90261759024446e-06, + "loss": 0.2852, + "step": 4548 + }, + { + "epoch": 0.09106423441683557, + "grad_norm": 0.9754900336265564, + "learning_rate": 9.902553909844364e-06, + "loss": 0.2955, + "step": 4549 + }, + { + "epoch": 0.09108425293396391, + "grad_norm": 1.8257478475570679, + "learning_rate": 9.902490208834984e-06, + "loss": 0.8714, + "step": 4550 + }, + { + "epoch": 0.09110427145109226, + "grad_norm": 0.9922580122947693, + "learning_rate": 9.902426487216592e-06, + "loss": 0.3379, + "step": 4551 + }, + { + "epoch": 0.0911242899682206, + "grad_norm": 1.1285138130187988, + "learning_rate": 9.902362744989453e-06, + "loss": 0.3102, + "step": 4552 + }, + { + "epoch": 0.09114430848534895, + "grad_norm": 1.0694379806518555, + "learning_rate": 9.902298982153835e-06, + "loss": 0.3697, + "step": 4553 + }, + { + "epoch": 0.09116432700247729, + "grad_norm": 1.0841273069381714, + "learning_rate": 9.902235198710008e-06, + "loss": 0.3407, + "step": 4554 + }, + { + "epoch": 0.09118434551960564, + "grad_norm": 1.1188149452209473, + "learning_rate": 9.90217139465824e-06, + "loss": 0.3548, + "step": 4555 + }, + { + "epoch": 0.09120436403673397, + "grad_norm": 1.0822696685791016, + "learning_rate": 9.902107569998798e-06, + "loss": 0.3197, + "step": 4556 + }, + { + "epoch": 0.09122438255386232, + "grad_norm": 1.239465355873108, + "learning_rate": 9.90204372473195e-06, + "loss": 0.3598, + "step": 4557 + }, + { + "epoch": 0.09124440107099066, + "grad_norm": 1.139055848121643, + "learning_rate": 9.901979858857966e-06, + "loss": 0.3453, + "step": 4558 + }, + { + "epoch": 0.09126441958811901, + "grad_norm": 1.184098720550537, + "learning_rate": 9.901915972377113e-06, + "loss": 0.3089, + "step": 4559 + }, + { + "epoch": 0.09128443810524735, + "grad_norm": 1.0734050273895264, + "learning_rate": 9.901852065289662e-06, + "loss": 0.3331, + "step": 4560 + }, + { + "epoch": 0.0913044566223757, + "grad_norm": 1.8679628372192383, + "learning_rate": 9.901788137595875e-06, + "loss": 0.9157, + "step": 4561 + }, + { + "epoch": 0.09132447513950404, + "grad_norm": 1.0731556415557861, + "learning_rate": 9.90172418929603e-06, + "loss": 0.3371, + "step": 4562 + }, + { + "epoch": 0.09134449365663239, + "grad_norm": 1.1470234394073486, + "learning_rate": 9.90166022039039e-06, + "loss": 0.3071, + "step": 4563 + }, + { + "epoch": 0.09136451217376072, + "grad_norm": 1.0789611339569092, + "learning_rate": 9.901596230879226e-06, + "loss": 0.3154, + "step": 4564 + }, + { + "epoch": 0.09138453069088907, + "grad_norm": 1.1437666416168213, + "learning_rate": 9.901532220762804e-06, + "loss": 0.4236, + "step": 4565 + }, + { + "epoch": 0.09140454920801741, + "grad_norm": 1.0675418376922607, + "learning_rate": 9.901468190041399e-06, + "loss": 0.3183, + "step": 4566 + }, + { + "epoch": 0.09142456772514576, + "grad_norm": 1.085932970046997, + "learning_rate": 9.901404138715273e-06, + "loss": 0.336, + "step": 4567 + }, + { + "epoch": 0.0914445862422741, + "grad_norm": 1.2051723003387451, + "learning_rate": 9.901340066784701e-06, + "loss": 0.3016, + "step": 4568 + }, + { + "epoch": 0.09146460475940245, + "grad_norm": 1.0066205263137817, + "learning_rate": 9.90127597424995e-06, + "loss": 0.3331, + "step": 4569 + }, + { + "epoch": 0.09148462327653079, + "grad_norm": 1.0809834003448486, + "learning_rate": 9.901211861111288e-06, + "loss": 0.389, + "step": 4570 + }, + { + "epoch": 0.09150464179365914, + "grad_norm": 0.9854878187179565, + "learning_rate": 9.901147727368987e-06, + "loss": 0.3128, + "step": 4571 + }, + { + "epoch": 0.09152466031078747, + "grad_norm": 1.1694514751434326, + "learning_rate": 9.901083573023318e-06, + "loss": 0.3549, + "step": 4572 + }, + { + "epoch": 0.09154467882791582, + "grad_norm": 1.365750789642334, + "learning_rate": 9.901019398074546e-06, + "loss": 0.3309, + "step": 4573 + }, + { + "epoch": 0.09156469734504416, + "grad_norm": 1.036965250968933, + "learning_rate": 9.900955202522941e-06, + "loss": 0.3328, + "step": 4574 + }, + { + "epoch": 0.09158471586217251, + "grad_norm": 1.082316279411316, + "learning_rate": 9.900890986368779e-06, + "loss": 0.371, + "step": 4575 + }, + { + "epoch": 0.09160473437930085, + "grad_norm": 1.7927395105361938, + "learning_rate": 9.900826749612325e-06, + "loss": 0.3627, + "step": 4576 + }, + { + "epoch": 0.0916247528964292, + "grad_norm": 1.0552059412002563, + "learning_rate": 9.900762492253847e-06, + "loss": 0.3421, + "step": 4577 + }, + { + "epoch": 0.09164477141355754, + "grad_norm": 1.0350114107131958, + "learning_rate": 9.900698214293623e-06, + "loss": 0.3123, + "step": 4578 + }, + { + "epoch": 0.09166478993068589, + "grad_norm": 1.3170589208602905, + "learning_rate": 9.900633915731914e-06, + "loss": 0.3856, + "step": 4579 + }, + { + "epoch": 0.09168480844781422, + "grad_norm": 1.1291133165359497, + "learning_rate": 9.900569596568998e-06, + "loss": 0.3796, + "step": 4580 + }, + { + "epoch": 0.09170482696494257, + "grad_norm": 2.03820538520813, + "learning_rate": 9.90050525680514e-06, + "loss": 0.8387, + "step": 4581 + }, + { + "epoch": 0.09172484548207091, + "grad_norm": 1.1206454038619995, + "learning_rate": 9.900440896440613e-06, + "loss": 0.3012, + "step": 4582 + }, + { + "epoch": 0.09174486399919926, + "grad_norm": 1.01094651222229, + "learning_rate": 9.900376515475687e-06, + "loss": 0.302, + "step": 4583 + }, + { + "epoch": 0.0917648825163276, + "grad_norm": 1.0964053869247437, + "learning_rate": 9.900312113910633e-06, + "loss": 0.3345, + "step": 4584 + }, + { + "epoch": 0.09178490103345595, + "grad_norm": 1.6681385040283203, + "learning_rate": 9.900247691745722e-06, + "loss": 0.8663, + "step": 4585 + }, + { + "epoch": 0.09180491955058429, + "grad_norm": 1.1712350845336914, + "learning_rate": 9.900183248981224e-06, + "loss": 0.3494, + "step": 4586 + }, + { + "epoch": 0.09182493806771264, + "grad_norm": 1.1187553405761719, + "learning_rate": 9.900118785617408e-06, + "loss": 0.3017, + "step": 4587 + }, + { + "epoch": 0.09184495658484097, + "grad_norm": 1.0617516040802002, + "learning_rate": 9.90005430165455e-06, + "loss": 0.3516, + "step": 4588 + }, + { + "epoch": 0.09186497510196932, + "grad_norm": 1.0939133167266846, + "learning_rate": 9.899989797092916e-06, + "loss": 0.33, + "step": 4589 + }, + { + "epoch": 0.09188499361909766, + "grad_norm": 1.243449091911316, + "learning_rate": 9.89992527193278e-06, + "loss": 0.3493, + "step": 4590 + }, + { + "epoch": 0.09190501213622601, + "grad_norm": 2.057506799697876, + "learning_rate": 9.899860726174414e-06, + "loss": 0.864, + "step": 4591 + }, + { + "epoch": 0.09192503065335435, + "grad_norm": 1.8580602407455444, + "learning_rate": 9.899796159818087e-06, + "loss": 0.9281, + "step": 4592 + }, + { + "epoch": 0.0919450491704827, + "grad_norm": 1.1280392408370972, + "learning_rate": 9.89973157286407e-06, + "loss": 0.3055, + "step": 4593 + }, + { + "epoch": 0.09196506768761104, + "grad_norm": 1.1583906412124634, + "learning_rate": 9.899666965312638e-06, + "loss": 0.2783, + "step": 4594 + }, + { + "epoch": 0.09198508620473939, + "grad_norm": 1.0250298976898193, + "learning_rate": 9.89960233716406e-06, + "loss": 0.3437, + "step": 4595 + }, + { + "epoch": 0.09200510472186772, + "grad_norm": 1.7894600629806519, + "learning_rate": 9.899537688418608e-06, + "loss": 0.8596, + "step": 4596 + }, + { + "epoch": 0.09202512323899607, + "grad_norm": 1.048621416091919, + "learning_rate": 9.899473019076554e-06, + "loss": 0.3826, + "step": 4597 + }, + { + "epoch": 0.09204514175612441, + "grad_norm": 1.038752555847168, + "learning_rate": 9.89940832913817e-06, + "loss": 0.3277, + "step": 4598 + }, + { + "epoch": 0.09206516027325276, + "grad_norm": 1.116684079170227, + "learning_rate": 9.899343618603728e-06, + "loss": 0.3396, + "step": 4599 + }, + { + "epoch": 0.0920851787903811, + "grad_norm": 1.8693821430206299, + "learning_rate": 9.899278887473499e-06, + "loss": 0.8775, + "step": 4600 + }, + { + "epoch": 0.09210519730750945, + "grad_norm": 1.0677250623703003, + "learning_rate": 9.899214135747755e-06, + "loss": 0.3451, + "step": 4601 + }, + { + "epoch": 0.09212521582463779, + "grad_norm": 1.0426604747772217, + "learning_rate": 9.899149363426771e-06, + "loss": 0.3135, + "step": 4602 + }, + { + "epoch": 0.09214523434176614, + "grad_norm": 1.0893967151641846, + "learning_rate": 9.899084570510818e-06, + "loss": 0.3125, + "step": 4603 + }, + { + "epoch": 0.09216525285889447, + "grad_norm": 1.1202201843261719, + "learning_rate": 9.899019757000165e-06, + "loss": 0.3547, + "step": 4604 + }, + { + "epoch": 0.09218527137602282, + "grad_norm": 1.0886441469192505, + "learning_rate": 9.898954922895089e-06, + "loss": 0.2963, + "step": 4605 + }, + { + "epoch": 0.09220528989315116, + "grad_norm": 1.0714854001998901, + "learning_rate": 9.89889006819586e-06, + "loss": 0.363, + "step": 4606 + }, + { + "epoch": 0.09222530841027951, + "grad_norm": 1.0643562078475952, + "learning_rate": 9.898825192902752e-06, + "loss": 0.3116, + "step": 4607 + }, + { + "epoch": 0.09224532692740785, + "grad_norm": 1.2334948778152466, + "learning_rate": 9.898760297016036e-06, + "loss": 0.3158, + "step": 4608 + }, + { + "epoch": 0.0922653454445362, + "grad_norm": 1.276962161064148, + "learning_rate": 9.898695380535988e-06, + "loss": 0.3348, + "step": 4609 + }, + { + "epoch": 0.09228536396166453, + "grad_norm": 1.7824065685272217, + "learning_rate": 9.898630443462877e-06, + "loss": 0.8516, + "step": 4610 + }, + { + "epoch": 0.09230538247879289, + "grad_norm": 1.2007246017456055, + "learning_rate": 9.898565485796978e-06, + "loss": 0.4065, + "step": 4611 + }, + { + "epoch": 0.09232540099592122, + "grad_norm": 1.0970957279205322, + "learning_rate": 9.898500507538566e-06, + "loss": 0.3178, + "step": 4612 + }, + { + "epoch": 0.09234541951304957, + "grad_norm": 1.0581783056259155, + "learning_rate": 9.898435508687909e-06, + "loss": 0.3183, + "step": 4613 + }, + { + "epoch": 0.09236543803017791, + "grad_norm": 0.99844890832901, + "learning_rate": 9.898370489245286e-06, + "loss": 0.3025, + "step": 4614 + }, + { + "epoch": 0.09238545654730626, + "grad_norm": 1.1082627773284912, + "learning_rate": 9.898305449210966e-06, + "loss": 0.3471, + "step": 4615 + }, + { + "epoch": 0.0924054750644346, + "grad_norm": 1.0546773672103882, + "learning_rate": 9.898240388585223e-06, + "loss": 0.3283, + "step": 4616 + }, + { + "epoch": 0.09242549358156295, + "grad_norm": 0.9470946192741394, + "learning_rate": 9.898175307368334e-06, + "loss": 0.2898, + "step": 4617 + }, + { + "epoch": 0.09244551209869128, + "grad_norm": 1.082358956336975, + "learning_rate": 9.89811020556057e-06, + "loss": 0.3034, + "step": 4618 + }, + { + "epoch": 0.09246553061581964, + "grad_norm": 1.1866834163665771, + "learning_rate": 9.898045083162202e-06, + "loss": 0.3739, + "step": 4619 + }, + { + "epoch": 0.09248554913294797, + "grad_norm": 2.02947998046875, + "learning_rate": 9.897979940173508e-06, + "loss": 0.9043, + "step": 4620 + }, + { + "epoch": 0.09250556765007632, + "grad_norm": 1.026078224182129, + "learning_rate": 9.897914776594762e-06, + "loss": 0.3236, + "step": 4621 + }, + { + "epoch": 0.09252558616720466, + "grad_norm": 1.0906840562820435, + "learning_rate": 9.897849592426234e-06, + "loss": 0.3209, + "step": 4622 + }, + { + "epoch": 0.09254560468433301, + "grad_norm": 1.1673007011413574, + "learning_rate": 9.897784387668201e-06, + "loss": 0.3362, + "step": 4623 + }, + { + "epoch": 0.09256562320146135, + "grad_norm": 1.1673083305358887, + "learning_rate": 9.897719162320937e-06, + "loss": 0.317, + "step": 4624 + }, + { + "epoch": 0.0925856417185897, + "grad_norm": 1.059312105178833, + "learning_rate": 9.897653916384715e-06, + "loss": 0.3269, + "step": 4625 + }, + { + "epoch": 0.09260566023571803, + "grad_norm": 1.048789143562317, + "learning_rate": 9.89758864985981e-06, + "loss": 0.3592, + "step": 4626 + }, + { + "epoch": 0.09262567875284639, + "grad_norm": 1.0986623764038086, + "learning_rate": 9.897523362746497e-06, + "loss": 0.3489, + "step": 4627 + }, + { + "epoch": 0.09264569726997472, + "grad_norm": 1.0742748975753784, + "learning_rate": 9.89745805504505e-06, + "loss": 0.2977, + "step": 4628 + }, + { + "epoch": 0.09266571578710307, + "grad_norm": 1.0346406698226929, + "learning_rate": 9.897392726755744e-06, + "loss": 0.2642, + "step": 4629 + }, + { + "epoch": 0.09268573430423141, + "grad_norm": 1.0985323190689087, + "learning_rate": 9.89732737787885e-06, + "loss": 0.3058, + "step": 4630 + }, + { + "epoch": 0.09270575282135976, + "grad_norm": 1.7785038948059082, + "learning_rate": 9.897262008414648e-06, + "loss": 0.8648, + "step": 4631 + }, + { + "epoch": 0.0927257713384881, + "grad_norm": 1.2662051916122437, + "learning_rate": 9.89719661836341e-06, + "loss": 0.3743, + "step": 4632 + }, + { + "epoch": 0.09274578985561645, + "grad_norm": 2.0504214763641357, + "learning_rate": 9.89713120772541e-06, + "loss": 0.8369, + "step": 4633 + }, + { + "epoch": 0.09276580837274478, + "grad_norm": 1.0297949314117432, + "learning_rate": 9.897065776500925e-06, + "loss": 0.3444, + "step": 4634 + }, + { + "epoch": 0.09278582688987314, + "grad_norm": 1.1406381130218506, + "learning_rate": 9.89700032469023e-06, + "loss": 0.3762, + "step": 4635 + }, + { + "epoch": 0.09280584540700147, + "grad_norm": 1.0615925788879395, + "learning_rate": 9.896934852293598e-06, + "loss": 0.3275, + "step": 4636 + }, + { + "epoch": 0.09282586392412982, + "grad_norm": 1.1477808952331543, + "learning_rate": 9.896869359311307e-06, + "loss": 0.3385, + "step": 4637 + }, + { + "epoch": 0.09284588244125816, + "grad_norm": 1.1339662075042725, + "learning_rate": 9.89680384574363e-06, + "loss": 0.2874, + "step": 4638 + }, + { + "epoch": 0.09286590095838651, + "grad_norm": 1.3237355947494507, + "learning_rate": 9.896738311590846e-06, + "loss": 0.3414, + "step": 4639 + }, + { + "epoch": 0.09288591947551485, + "grad_norm": 1.072548747062683, + "learning_rate": 9.896672756853225e-06, + "loss": 0.3312, + "step": 4640 + }, + { + "epoch": 0.0929059379926432, + "grad_norm": 3.1323890686035156, + "learning_rate": 9.896607181531046e-06, + "loss": 0.3217, + "step": 4641 + }, + { + "epoch": 0.09292595650977153, + "grad_norm": 1.0480324029922485, + "learning_rate": 9.896541585624586e-06, + "loss": 0.2936, + "step": 4642 + }, + { + "epoch": 0.09294597502689989, + "grad_norm": 1.0680619478225708, + "learning_rate": 9.896475969134118e-06, + "loss": 0.3686, + "step": 4643 + }, + { + "epoch": 0.09296599354402822, + "grad_norm": 1.060720443725586, + "learning_rate": 9.896410332059919e-06, + "loss": 0.3291, + "step": 4644 + }, + { + "epoch": 0.09298601206115657, + "grad_norm": 0.9471954107284546, + "learning_rate": 9.896344674402263e-06, + "loss": 0.3508, + "step": 4645 + }, + { + "epoch": 0.09300603057828491, + "grad_norm": 1.2965914011001587, + "learning_rate": 9.896278996161428e-06, + "loss": 0.3409, + "step": 4646 + }, + { + "epoch": 0.09302604909541326, + "grad_norm": 1.0916121006011963, + "learning_rate": 9.896213297337691e-06, + "loss": 0.3477, + "step": 4647 + }, + { + "epoch": 0.0930460676125416, + "grad_norm": 1.104055404663086, + "learning_rate": 9.896147577931327e-06, + "loss": 0.2899, + "step": 4648 + }, + { + "epoch": 0.09306608612966995, + "grad_norm": 1.1396214962005615, + "learning_rate": 9.896081837942613e-06, + "loss": 0.384, + "step": 4649 + }, + { + "epoch": 0.09308610464679828, + "grad_norm": 1.859874963760376, + "learning_rate": 9.896016077371822e-06, + "loss": 0.8486, + "step": 4650 + }, + { + "epoch": 0.09310612316392664, + "grad_norm": 1.0569965839385986, + "learning_rate": 9.895950296219234e-06, + "loss": 0.3157, + "step": 4651 + }, + { + "epoch": 0.09312614168105497, + "grad_norm": 2.4091086387634277, + "learning_rate": 9.895884494485127e-06, + "loss": 0.3025, + "step": 4652 + }, + { + "epoch": 0.09314616019818332, + "grad_norm": 1.1101043224334717, + "learning_rate": 9.895818672169772e-06, + "loss": 0.3359, + "step": 4653 + }, + { + "epoch": 0.09316617871531166, + "grad_norm": 1.7535053491592407, + "learning_rate": 9.895752829273451e-06, + "loss": 0.791, + "step": 4654 + }, + { + "epoch": 0.09318619723244001, + "grad_norm": 1.1618802547454834, + "learning_rate": 9.895686965796437e-06, + "loss": 0.3607, + "step": 4655 + }, + { + "epoch": 0.09320621574956835, + "grad_norm": 1.7920253276824951, + "learning_rate": 9.89562108173901e-06, + "loss": 0.8636, + "step": 4656 + }, + { + "epoch": 0.0932262342666967, + "grad_norm": 1.098017692565918, + "learning_rate": 9.895555177101444e-06, + "loss": 0.3361, + "step": 4657 + }, + { + "epoch": 0.09324625278382503, + "grad_norm": 1.233797311782837, + "learning_rate": 9.895489251884019e-06, + "loss": 0.3445, + "step": 4658 + }, + { + "epoch": 0.09326627130095339, + "grad_norm": 1.1414278745651245, + "learning_rate": 9.89542330608701e-06, + "loss": 0.3197, + "step": 4659 + }, + { + "epoch": 0.09328628981808172, + "grad_norm": 1.1127519607543945, + "learning_rate": 9.895357339710694e-06, + "loss": 0.3685, + "step": 4660 + }, + { + "epoch": 0.09330630833521007, + "grad_norm": 1.085418462753296, + "learning_rate": 9.895291352755352e-06, + "loss": 0.347, + "step": 4661 + }, + { + "epoch": 0.09332632685233841, + "grad_norm": 1.0751233100891113, + "learning_rate": 9.895225345221257e-06, + "loss": 0.3492, + "step": 4662 + }, + { + "epoch": 0.09334634536946676, + "grad_norm": 1.1771029233932495, + "learning_rate": 9.895159317108687e-06, + "loss": 0.3477, + "step": 4663 + }, + { + "epoch": 0.0933663638865951, + "grad_norm": 1.0103355646133423, + "learning_rate": 9.895093268417923e-06, + "loss": 0.3603, + "step": 4664 + }, + { + "epoch": 0.09338638240372345, + "grad_norm": 1.0986500978469849, + "learning_rate": 9.895027199149238e-06, + "loss": 0.3619, + "step": 4665 + }, + { + "epoch": 0.09340640092085178, + "grad_norm": 1.213437795639038, + "learning_rate": 9.894961109302915e-06, + "loss": 0.3628, + "step": 4666 + }, + { + "epoch": 0.09342641943798013, + "grad_norm": 1.1605104207992554, + "learning_rate": 9.894894998879225e-06, + "loss": 0.3576, + "step": 4667 + }, + { + "epoch": 0.09344643795510847, + "grad_norm": 1.119326114654541, + "learning_rate": 9.894828867878452e-06, + "loss": 0.341, + "step": 4668 + }, + { + "epoch": 0.09346645647223682, + "grad_norm": 1.9592630863189697, + "learning_rate": 9.894762716300872e-06, + "loss": 0.8993, + "step": 4669 + }, + { + "epoch": 0.09348647498936516, + "grad_norm": 1.2156556844711304, + "learning_rate": 9.894696544146762e-06, + "loss": 0.3722, + "step": 4670 + }, + { + "epoch": 0.09350649350649351, + "grad_norm": 1.994773268699646, + "learning_rate": 9.894630351416403e-06, + "loss": 0.796, + "step": 4671 + }, + { + "epoch": 0.09352651202362185, + "grad_norm": 1.2572414875030518, + "learning_rate": 9.89456413811007e-06, + "loss": 0.355, + "step": 4672 + }, + { + "epoch": 0.0935465305407502, + "grad_norm": 1.0644713640213013, + "learning_rate": 9.894497904228042e-06, + "loss": 0.3517, + "step": 4673 + }, + { + "epoch": 0.09356654905787853, + "grad_norm": 1.1227126121520996, + "learning_rate": 9.8944316497706e-06, + "loss": 0.3506, + "step": 4674 + }, + { + "epoch": 0.09358656757500688, + "grad_norm": 1.0442564487457275, + "learning_rate": 9.89436537473802e-06, + "loss": 0.3416, + "step": 4675 + }, + { + "epoch": 0.09360658609213522, + "grad_norm": 1.1026885509490967, + "learning_rate": 9.89429907913058e-06, + "loss": 0.3065, + "step": 4676 + }, + { + "epoch": 0.09362660460926357, + "grad_norm": 1.2127817869186401, + "learning_rate": 9.894232762948561e-06, + "loss": 0.3427, + "step": 4677 + }, + { + "epoch": 0.09364662312639191, + "grad_norm": 1.123039722442627, + "learning_rate": 9.89416642619224e-06, + "loss": 0.294, + "step": 4678 + }, + { + "epoch": 0.09366664164352026, + "grad_norm": 1.0192450284957886, + "learning_rate": 9.894100068861896e-06, + "loss": 0.3642, + "step": 4679 + }, + { + "epoch": 0.0936866601606486, + "grad_norm": 1.1823303699493408, + "learning_rate": 9.89403369095781e-06, + "loss": 0.3677, + "step": 4680 + }, + { + "epoch": 0.09370667867777695, + "grad_norm": 0.975394606590271, + "learning_rate": 9.89396729248026e-06, + "loss": 0.3116, + "step": 4681 + }, + { + "epoch": 0.09372669719490528, + "grad_norm": 1.1052601337432861, + "learning_rate": 9.893900873429523e-06, + "loss": 0.3298, + "step": 4682 + }, + { + "epoch": 0.09374671571203363, + "grad_norm": 1.2103461027145386, + "learning_rate": 9.89383443380588e-06, + "loss": 0.3434, + "step": 4683 + }, + { + "epoch": 0.09376673422916197, + "grad_norm": 1.1634281873703003, + "learning_rate": 9.893767973609612e-06, + "loss": 0.3349, + "step": 4684 + }, + { + "epoch": 0.09378675274629032, + "grad_norm": 1.116780400276184, + "learning_rate": 9.893701492840995e-06, + "loss": 0.324, + "step": 4685 + }, + { + "epoch": 0.09380677126341866, + "grad_norm": 1.2125052213668823, + "learning_rate": 9.893634991500311e-06, + "loss": 0.3684, + "step": 4686 + }, + { + "epoch": 0.09382678978054701, + "grad_norm": 1.0433224439620972, + "learning_rate": 9.893568469587838e-06, + "loss": 0.3022, + "step": 4687 + }, + { + "epoch": 0.09384680829767535, + "grad_norm": 1.2358704805374146, + "learning_rate": 9.893501927103857e-06, + "loss": 0.3337, + "step": 4688 + }, + { + "epoch": 0.0938668268148037, + "grad_norm": 1.1539499759674072, + "learning_rate": 9.893435364048645e-06, + "loss": 0.3329, + "step": 4689 + }, + { + "epoch": 0.09388684533193203, + "grad_norm": 1.1220489740371704, + "learning_rate": 9.893368780422485e-06, + "loss": 0.3546, + "step": 4690 + }, + { + "epoch": 0.09390686384906038, + "grad_norm": 1.034408688545227, + "learning_rate": 9.893302176225656e-06, + "loss": 0.3351, + "step": 4691 + }, + { + "epoch": 0.09392688236618872, + "grad_norm": 1.1430492401123047, + "learning_rate": 9.893235551458438e-06, + "loss": 0.3794, + "step": 4692 + }, + { + "epoch": 0.09394690088331707, + "grad_norm": 1.379705786705017, + "learning_rate": 9.893168906121111e-06, + "loss": 0.3416, + "step": 4693 + }, + { + "epoch": 0.09396691940044541, + "grad_norm": 1.1758195161819458, + "learning_rate": 9.893102240213954e-06, + "loss": 0.3728, + "step": 4694 + }, + { + "epoch": 0.09398693791757376, + "grad_norm": 1.2358494997024536, + "learning_rate": 9.89303555373725e-06, + "loss": 0.3595, + "step": 4695 + }, + { + "epoch": 0.0940069564347021, + "grad_norm": 1.1291847229003906, + "learning_rate": 9.892968846691275e-06, + "loss": 0.3495, + "step": 4696 + }, + { + "epoch": 0.09402697495183045, + "grad_norm": 1.118160367012024, + "learning_rate": 9.892902119076314e-06, + "loss": 0.3749, + "step": 4697 + }, + { + "epoch": 0.09404699346895878, + "grad_norm": 1.201233983039856, + "learning_rate": 9.892835370892645e-06, + "loss": 0.3356, + "step": 4698 + }, + { + "epoch": 0.09406701198608713, + "grad_norm": 2.0595428943634033, + "learning_rate": 9.892768602140547e-06, + "loss": 0.8348, + "step": 4699 + }, + { + "epoch": 0.09408703050321547, + "grad_norm": 0.9716939330101013, + "learning_rate": 9.892701812820304e-06, + "loss": 0.2946, + "step": 4700 + }, + { + "epoch": 0.09410704902034382, + "grad_norm": 1.1745827198028564, + "learning_rate": 9.892635002932196e-06, + "loss": 0.3288, + "step": 4701 + }, + { + "epoch": 0.09412706753747216, + "grad_norm": 1.0536141395568848, + "learning_rate": 9.892568172476503e-06, + "loss": 0.3619, + "step": 4702 + }, + { + "epoch": 0.09414708605460051, + "grad_norm": 1.1211212873458862, + "learning_rate": 9.892501321453506e-06, + "loss": 0.3418, + "step": 4703 + }, + { + "epoch": 0.09416710457172885, + "grad_norm": 1.2103880643844604, + "learning_rate": 9.892434449863488e-06, + "loss": 0.3174, + "step": 4704 + }, + { + "epoch": 0.0941871230888572, + "grad_norm": 1.0721328258514404, + "learning_rate": 9.892367557706726e-06, + "loss": 0.3498, + "step": 4705 + }, + { + "epoch": 0.09420714160598553, + "grad_norm": 1.1151432991027832, + "learning_rate": 9.892300644983504e-06, + "loss": 0.3474, + "step": 4706 + }, + { + "epoch": 0.09422716012311388, + "grad_norm": 1.245870590209961, + "learning_rate": 9.892233711694104e-06, + "loss": 0.3242, + "step": 4707 + }, + { + "epoch": 0.09424717864024222, + "grad_norm": 1.8334282636642456, + "learning_rate": 9.892166757838806e-06, + "loss": 0.9308, + "step": 4708 + }, + { + "epoch": 0.09426719715737057, + "grad_norm": 1.202724814414978, + "learning_rate": 9.892099783417892e-06, + "loss": 0.3625, + "step": 4709 + }, + { + "epoch": 0.09428721567449891, + "grad_norm": 1.050378680229187, + "learning_rate": 9.892032788431644e-06, + "loss": 0.3176, + "step": 4710 + }, + { + "epoch": 0.09430723419162726, + "grad_norm": 1.0870568752288818, + "learning_rate": 9.891965772880341e-06, + "loss": 0.3905, + "step": 4711 + }, + { + "epoch": 0.0943272527087556, + "grad_norm": 1.036117672920227, + "learning_rate": 9.891898736764267e-06, + "loss": 0.3127, + "step": 4712 + }, + { + "epoch": 0.09434727122588395, + "grad_norm": 1.1029070615768433, + "learning_rate": 9.891831680083706e-06, + "loss": 0.3488, + "step": 4713 + }, + { + "epoch": 0.09436728974301228, + "grad_norm": 1.149531602859497, + "learning_rate": 9.891764602838934e-06, + "loss": 0.3647, + "step": 4714 + }, + { + "epoch": 0.09438730826014063, + "grad_norm": 1.142680287361145, + "learning_rate": 9.891697505030238e-06, + "loss": 0.3491, + "step": 4715 + }, + { + "epoch": 0.09440732677726897, + "grad_norm": 1.0157469511032104, + "learning_rate": 9.891630386657898e-06, + "loss": 0.3548, + "step": 4716 + }, + { + "epoch": 0.09442734529439732, + "grad_norm": 1.1988171339035034, + "learning_rate": 9.891563247722198e-06, + "loss": 0.3202, + "step": 4717 + }, + { + "epoch": 0.09444736381152566, + "grad_norm": 1.0825161933898926, + "learning_rate": 9.891496088223417e-06, + "loss": 0.3171, + "step": 4718 + }, + { + "epoch": 0.09446738232865401, + "grad_norm": 1.3321304321289062, + "learning_rate": 9.89142890816184e-06, + "loss": 0.3075, + "step": 4719 + }, + { + "epoch": 0.09448740084578235, + "grad_norm": 1.065247654914856, + "learning_rate": 9.891361707537747e-06, + "loss": 0.3442, + "step": 4720 + }, + { + "epoch": 0.0945074193629107, + "grad_norm": 1.1788313388824463, + "learning_rate": 9.891294486351424e-06, + "loss": 0.354, + "step": 4721 + }, + { + "epoch": 0.09452743788003903, + "grad_norm": 1.1217268705368042, + "learning_rate": 9.89122724460315e-06, + "loss": 0.2645, + "step": 4722 + }, + { + "epoch": 0.09454745639716738, + "grad_norm": 1.8309698104858398, + "learning_rate": 9.89115998229321e-06, + "loss": 0.8102, + "step": 4723 + }, + { + "epoch": 0.09456747491429572, + "grad_norm": 1.049710988998413, + "learning_rate": 9.891092699421887e-06, + "loss": 0.2891, + "step": 4724 + }, + { + "epoch": 0.09458749343142407, + "grad_norm": 1.1675221920013428, + "learning_rate": 9.891025395989462e-06, + "loss": 0.3391, + "step": 4725 + }, + { + "epoch": 0.09460751194855241, + "grad_norm": 1.1657054424285889, + "learning_rate": 9.890958071996219e-06, + "loss": 0.3273, + "step": 4726 + }, + { + "epoch": 0.09462753046568076, + "grad_norm": 1.0783360004425049, + "learning_rate": 9.890890727442439e-06, + "loss": 0.3239, + "step": 4727 + }, + { + "epoch": 0.0946475489828091, + "grad_norm": 1.347533941268921, + "learning_rate": 9.890823362328409e-06, + "loss": 0.3831, + "step": 4728 + }, + { + "epoch": 0.09466756749993745, + "grad_norm": 1.1152640581130981, + "learning_rate": 9.89075597665441e-06, + "loss": 0.3565, + "step": 4729 + }, + { + "epoch": 0.09468758601706578, + "grad_norm": 1.1825289726257324, + "learning_rate": 9.890688570420724e-06, + "loss": 0.3224, + "step": 4730 + }, + { + "epoch": 0.09470760453419413, + "grad_norm": 1.0933661460876465, + "learning_rate": 9.890621143627637e-06, + "loss": 0.3244, + "step": 4731 + }, + { + "epoch": 0.09472762305132247, + "grad_norm": 1.0455986261367798, + "learning_rate": 9.890553696275431e-06, + "loss": 0.3201, + "step": 4732 + }, + { + "epoch": 0.09474764156845082, + "grad_norm": 1.198011875152588, + "learning_rate": 9.89048622836439e-06, + "loss": 0.3413, + "step": 4733 + }, + { + "epoch": 0.09476766008557916, + "grad_norm": 1.0400904417037964, + "learning_rate": 9.890418739894796e-06, + "loss": 0.3274, + "step": 4734 + }, + { + "epoch": 0.09478767860270751, + "grad_norm": 2.078843593597412, + "learning_rate": 9.890351230866935e-06, + "loss": 0.9153, + "step": 4735 + }, + { + "epoch": 0.09480769711983585, + "grad_norm": 1.1871179342269897, + "learning_rate": 9.89028370128109e-06, + "loss": 0.3337, + "step": 4736 + }, + { + "epoch": 0.0948277156369642, + "grad_norm": 1.257460594177246, + "learning_rate": 9.890216151137545e-06, + "loss": 0.3914, + "step": 4737 + }, + { + "epoch": 0.09484773415409253, + "grad_norm": 1.0355006456375122, + "learning_rate": 9.890148580436583e-06, + "loss": 0.3004, + "step": 4738 + }, + { + "epoch": 0.09486775267122088, + "grad_norm": 1.1348845958709717, + "learning_rate": 9.890080989178488e-06, + "loss": 0.3374, + "step": 4739 + }, + { + "epoch": 0.09488777118834922, + "grad_norm": 1.0905195474624634, + "learning_rate": 9.890013377363546e-06, + "loss": 0.3408, + "step": 4740 + }, + { + "epoch": 0.09490778970547757, + "grad_norm": 1.133828043937683, + "learning_rate": 9.88994574499204e-06, + "loss": 0.3605, + "step": 4741 + }, + { + "epoch": 0.09492780822260591, + "grad_norm": 1.0514259338378906, + "learning_rate": 9.889878092064256e-06, + "loss": 0.3638, + "step": 4742 + }, + { + "epoch": 0.09494782673973426, + "grad_norm": 1.0332003831863403, + "learning_rate": 9.889810418580474e-06, + "loss": 0.3663, + "step": 4743 + }, + { + "epoch": 0.0949678452568626, + "grad_norm": 1.0774060487747192, + "learning_rate": 9.889742724540983e-06, + "loss": 0.2971, + "step": 4744 + }, + { + "epoch": 0.09498786377399095, + "grad_norm": 1.254685640335083, + "learning_rate": 9.889675009946065e-06, + "loss": 0.3404, + "step": 4745 + }, + { + "epoch": 0.09500788229111928, + "grad_norm": 1.2123788595199585, + "learning_rate": 9.889607274796004e-06, + "loss": 0.347, + "step": 4746 + }, + { + "epoch": 0.09502790080824763, + "grad_norm": 1.1635265350341797, + "learning_rate": 9.88953951909109e-06, + "loss": 0.3474, + "step": 4747 + }, + { + "epoch": 0.09504791932537597, + "grad_norm": 1.0815621614456177, + "learning_rate": 9.889471742831603e-06, + "loss": 0.3105, + "step": 4748 + }, + { + "epoch": 0.09506793784250432, + "grad_norm": 1.0267966985702515, + "learning_rate": 9.889403946017828e-06, + "loss": 0.3282, + "step": 4749 + }, + { + "epoch": 0.09508795635963266, + "grad_norm": 0.9914000630378723, + "learning_rate": 9.889336128650051e-06, + "loss": 0.3043, + "step": 4750 + }, + { + "epoch": 0.09510797487676101, + "grad_norm": 1.1657911539077759, + "learning_rate": 9.889268290728558e-06, + "loss": 0.3322, + "step": 4751 + }, + { + "epoch": 0.09512799339388935, + "grad_norm": 1.1263083219528198, + "learning_rate": 9.889200432253632e-06, + "loss": 0.3159, + "step": 4752 + }, + { + "epoch": 0.0951480119110177, + "grad_norm": 1.0946401357650757, + "learning_rate": 9.88913255322556e-06, + "loss": 0.3267, + "step": 4753 + }, + { + "epoch": 0.09516803042814603, + "grad_norm": 1.1351045370101929, + "learning_rate": 9.889064653644627e-06, + "loss": 0.312, + "step": 4754 + }, + { + "epoch": 0.09518804894527438, + "grad_norm": 1.9033021926879883, + "learning_rate": 9.888996733511119e-06, + "loss": 0.8573, + "step": 4755 + }, + { + "epoch": 0.09520806746240272, + "grad_norm": 1.0198180675506592, + "learning_rate": 9.888928792825318e-06, + "loss": 0.3263, + "step": 4756 + }, + { + "epoch": 0.09522808597953107, + "grad_norm": 1.2272439002990723, + "learning_rate": 9.888860831587517e-06, + "loss": 0.3275, + "step": 4757 + }, + { + "epoch": 0.09524810449665941, + "grad_norm": 1.0826627016067505, + "learning_rate": 9.888792849797994e-06, + "loss": 0.3262, + "step": 4758 + }, + { + "epoch": 0.09526812301378776, + "grad_norm": 1.1081750392913818, + "learning_rate": 9.888724847457038e-06, + "loss": 0.3391, + "step": 4759 + }, + { + "epoch": 0.0952881415309161, + "grad_norm": 1.1615592241287231, + "learning_rate": 9.888656824564938e-06, + "loss": 0.339, + "step": 4760 + }, + { + "epoch": 0.09530816004804445, + "grad_norm": 1.0256024599075317, + "learning_rate": 9.888588781121972e-06, + "loss": 0.3059, + "step": 4761 + }, + { + "epoch": 0.09532817856517278, + "grad_norm": 1.0979586839675903, + "learning_rate": 9.888520717128434e-06, + "loss": 0.3124, + "step": 4762 + }, + { + "epoch": 0.09534819708230113, + "grad_norm": 1.224400281906128, + "learning_rate": 9.888452632584606e-06, + "loss": 0.2841, + "step": 4763 + }, + { + "epoch": 0.09536821559942947, + "grad_norm": 1.154288649559021, + "learning_rate": 9.888384527490775e-06, + "loss": 0.3316, + "step": 4764 + }, + { + "epoch": 0.09538823411655782, + "grad_norm": 1.0539854764938354, + "learning_rate": 9.888316401847228e-06, + "loss": 0.3062, + "step": 4765 + }, + { + "epoch": 0.09540825263368616, + "grad_norm": 0.9903861880302429, + "learning_rate": 9.888248255654251e-06, + "loss": 0.3049, + "step": 4766 + }, + { + "epoch": 0.09542827115081451, + "grad_norm": 1.2941110134124756, + "learning_rate": 9.888180088912129e-06, + "loss": 0.3387, + "step": 4767 + }, + { + "epoch": 0.09544828966794285, + "grad_norm": 1.1480180025100708, + "learning_rate": 9.88811190162115e-06, + "loss": 0.3869, + "step": 4768 + }, + { + "epoch": 0.0954683081850712, + "grad_norm": 1.052952527999878, + "learning_rate": 9.888043693781602e-06, + "loss": 0.3095, + "step": 4769 + }, + { + "epoch": 0.09548832670219953, + "grad_norm": 1.765926480293274, + "learning_rate": 9.88797546539377e-06, + "loss": 0.8615, + "step": 4770 + }, + { + "epoch": 0.09550834521932788, + "grad_norm": 1.1631460189819336, + "learning_rate": 9.88790721645794e-06, + "loss": 0.3455, + "step": 4771 + }, + { + "epoch": 0.09552836373645622, + "grad_norm": 1.2044446468353271, + "learning_rate": 9.887838946974401e-06, + "loss": 0.3565, + "step": 4772 + }, + { + "epoch": 0.09554838225358457, + "grad_norm": 1.0234086513519287, + "learning_rate": 9.887770656943438e-06, + "loss": 0.2958, + "step": 4773 + }, + { + "epoch": 0.09556840077071291, + "grad_norm": 1.155669927597046, + "learning_rate": 9.88770234636534e-06, + "loss": 0.352, + "step": 4774 + }, + { + "epoch": 0.09558841928784126, + "grad_norm": 1.326493263244629, + "learning_rate": 9.887634015240392e-06, + "loss": 0.3627, + "step": 4775 + }, + { + "epoch": 0.0956084378049696, + "grad_norm": 1.0318498611450195, + "learning_rate": 9.887565663568883e-06, + "loss": 0.3282, + "step": 4776 + }, + { + "epoch": 0.09562845632209795, + "grad_norm": 1.0610835552215576, + "learning_rate": 9.8874972913511e-06, + "loss": 0.3412, + "step": 4777 + }, + { + "epoch": 0.09564847483922628, + "grad_norm": 1.1866885423660278, + "learning_rate": 9.887428898587331e-06, + "loss": 0.3187, + "step": 4778 + }, + { + "epoch": 0.09566849335635463, + "grad_norm": 1.1226054430007935, + "learning_rate": 9.88736048527786e-06, + "loss": 0.2971, + "step": 4779 + }, + { + "epoch": 0.09568851187348297, + "grad_norm": 1.1117651462554932, + "learning_rate": 9.88729205142298e-06, + "loss": 0.371, + "step": 4780 + }, + { + "epoch": 0.09570853039061132, + "grad_norm": 1.1999461650848389, + "learning_rate": 9.887223597022974e-06, + "loss": 0.3163, + "step": 4781 + }, + { + "epoch": 0.09572854890773966, + "grad_norm": 1.794084072113037, + "learning_rate": 9.887155122078133e-06, + "loss": 0.9471, + "step": 4782 + }, + { + "epoch": 0.09574856742486801, + "grad_norm": 1.0045244693756104, + "learning_rate": 9.887086626588744e-06, + "loss": 0.3202, + "step": 4783 + }, + { + "epoch": 0.09576858594199635, + "grad_norm": 1.1080970764160156, + "learning_rate": 9.887018110555094e-06, + "loss": 0.3334, + "step": 4784 + }, + { + "epoch": 0.0957886044591247, + "grad_norm": 0.996189534664154, + "learning_rate": 9.886949573977471e-06, + "loss": 0.2729, + "step": 4785 + }, + { + "epoch": 0.09580862297625303, + "grad_norm": 1.2396429777145386, + "learning_rate": 9.886881016856164e-06, + "loss": 0.3037, + "step": 4786 + }, + { + "epoch": 0.09582864149338138, + "grad_norm": 1.0555733442306519, + "learning_rate": 9.88681243919146e-06, + "loss": 0.3154, + "step": 4787 + }, + { + "epoch": 0.09584866001050972, + "grad_norm": 1.07500422000885, + "learning_rate": 9.88674384098365e-06, + "loss": 0.3389, + "step": 4788 + }, + { + "epoch": 0.09586867852763807, + "grad_norm": 1.2470420598983765, + "learning_rate": 9.88667522223302e-06, + "loss": 0.3586, + "step": 4789 + }, + { + "epoch": 0.09588869704476641, + "grad_norm": 2.0616562366485596, + "learning_rate": 9.886606582939857e-06, + "loss": 0.8671, + "step": 4790 + }, + { + "epoch": 0.09590871556189476, + "grad_norm": 1.1751099824905396, + "learning_rate": 9.886537923104453e-06, + "loss": 0.3448, + "step": 4791 + }, + { + "epoch": 0.0959287340790231, + "grad_norm": 1.1996451616287231, + "learning_rate": 9.886469242727096e-06, + "loss": 0.3636, + "step": 4792 + }, + { + "epoch": 0.09594875259615145, + "grad_norm": 1.0485643148422241, + "learning_rate": 9.886400541808073e-06, + "loss": 0.3064, + "step": 4793 + }, + { + "epoch": 0.09596877111327978, + "grad_norm": 1.1557272672653198, + "learning_rate": 9.886331820347674e-06, + "loss": 0.3749, + "step": 4794 + }, + { + "epoch": 0.09598878963040813, + "grad_norm": 1.1303563117980957, + "learning_rate": 9.886263078346188e-06, + "loss": 0.365, + "step": 4795 + }, + { + "epoch": 0.09600880814753647, + "grad_norm": 1.029091715812683, + "learning_rate": 9.886194315803904e-06, + "loss": 0.3377, + "step": 4796 + }, + { + "epoch": 0.09602882666466482, + "grad_norm": 1.1461259126663208, + "learning_rate": 9.886125532721108e-06, + "loss": 0.3187, + "step": 4797 + }, + { + "epoch": 0.09604884518179316, + "grad_norm": 1.1568981409072876, + "learning_rate": 9.886056729098093e-06, + "loss": 0.328, + "step": 4798 + }, + { + "epoch": 0.09606886369892151, + "grad_norm": 1.15010666847229, + "learning_rate": 9.885987904935148e-06, + "loss": 0.3848, + "step": 4799 + }, + { + "epoch": 0.09608888221604985, + "grad_norm": 1.24551522731781, + "learning_rate": 9.88591906023256e-06, + "loss": 0.3424, + "step": 4800 + }, + { + "epoch": 0.0961089007331782, + "grad_norm": 1.086617350578308, + "learning_rate": 9.885850194990621e-06, + "loss": 0.33, + "step": 4801 + }, + { + "epoch": 0.09612891925030653, + "grad_norm": 1.183618187904358, + "learning_rate": 9.885781309209619e-06, + "loss": 0.3302, + "step": 4802 + }, + { + "epoch": 0.09614893776743488, + "grad_norm": 1.0868160724639893, + "learning_rate": 9.885712402889843e-06, + "loss": 0.3422, + "step": 4803 + }, + { + "epoch": 0.09616895628456322, + "grad_norm": 1.1149718761444092, + "learning_rate": 9.885643476031583e-06, + "loss": 0.3722, + "step": 4804 + }, + { + "epoch": 0.09618897480169157, + "grad_norm": 1.1104072332382202, + "learning_rate": 9.88557452863513e-06, + "loss": 0.2832, + "step": 4805 + }, + { + "epoch": 0.09620899331881991, + "grad_norm": 1.1792092323303223, + "learning_rate": 9.885505560700774e-06, + "loss": 0.3538, + "step": 4806 + }, + { + "epoch": 0.09622901183594826, + "grad_norm": 1.1499812602996826, + "learning_rate": 9.885436572228803e-06, + "loss": 0.3193, + "step": 4807 + }, + { + "epoch": 0.0962490303530766, + "grad_norm": 1.927562952041626, + "learning_rate": 9.885367563219508e-06, + "loss": 0.799, + "step": 4808 + }, + { + "epoch": 0.09626904887020495, + "grad_norm": 1.1121716499328613, + "learning_rate": 9.885298533673179e-06, + "loss": 0.3384, + "step": 4809 + }, + { + "epoch": 0.09628906738733328, + "grad_norm": 1.0505938529968262, + "learning_rate": 9.885229483590108e-06, + "loss": 0.3333, + "step": 4810 + }, + { + "epoch": 0.09630908590446163, + "grad_norm": 1.7979706525802612, + "learning_rate": 9.885160412970582e-06, + "loss": 0.9044, + "step": 4811 + }, + { + "epoch": 0.09632910442158997, + "grad_norm": 1.014290690422058, + "learning_rate": 9.885091321814893e-06, + "loss": 0.3028, + "step": 4812 + }, + { + "epoch": 0.09634912293871832, + "grad_norm": 1.0876604318618774, + "learning_rate": 9.885022210123332e-06, + "loss": 0.3184, + "step": 4813 + }, + { + "epoch": 0.09636914145584666, + "grad_norm": 1.2544245719909668, + "learning_rate": 9.884953077896188e-06, + "loss": 0.3301, + "step": 4814 + }, + { + "epoch": 0.09638915997297501, + "grad_norm": 1.2041120529174805, + "learning_rate": 9.884883925133753e-06, + "loss": 0.3251, + "step": 4815 + }, + { + "epoch": 0.09640917849010334, + "grad_norm": 1.0676162242889404, + "learning_rate": 9.884814751836318e-06, + "loss": 0.3371, + "step": 4816 + }, + { + "epoch": 0.0964291970072317, + "grad_norm": 1.099949598312378, + "learning_rate": 9.88474555800417e-06, + "loss": 0.2815, + "step": 4817 + }, + { + "epoch": 0.09644921552436003, + "grad_norm": 1.1128952503204346, + "learning_rate": 9.884676343637606e-06, + "loss": 0.3434, + "step": 4818 + }, + { + "epoch": 0.09646923404148838, + "grad_norm": 1.189819574356079, + "learning_rate": 9.884607108736912e-06, + "loss": 0.3321, + "step": 4819 + }, + { + "epoch": 0.09648925255861672, + "grad_norm": 1.0534950494766235, + "learning_rate": 9.884537853302383e-06, + "loss": 0.3223, + "step": 4820 + }, + { + "epoch": 0.09650927107574507, + "grad_norm": 1.1589940786361694, + "learning_rate": 9.884468577334307e-06, + "loss": 0.3492, + "step": 4821 + }, + { + "epoch": 0.09652928959287341, + "grad_norm": 1.1045091152191162, + "learning_rate": 9.884399280832974e-06, + "loss": 0.3677, + "step": 4822 + }, + { + "epoch": 0.09654930811000176, + "grad_norm": 1.2444247007369995, + "learning_rate": 9.88432996379868e-06, + "loss": 0.3313, + "step": 4823 + }, + { + "epoch": 0.0965693266271301, + "grad_norm": 1.0676895380020142, + "learning_rate": 9.884260626231714e-06, + "loss": 0.3506, + "step": 4824 + }, + { + "epoch": 0.09658934514425845, + "grad_norm": 1.0368902683258057, + "learning_rate": 9.884191268132367e-06, + "loss": 0.2837, + "step": 4825 + }, + { + "epoch": 0.09660936366138678, + "grad_norm": 1.0802346467971802, + "learning_rate": 9.884121889500931e-06, + "loss": 0.3377, + "step": 4826 + }, + { + "epoch": 0.09662938217851513, + "grad_norm": 1.057187557220459, + "learning_rate": 9.884052490337698e-06, + "loss": 0.3506, + "step": 4827 + }, + { + "epoch": 0.09664940069564347, + "grad_norm": 1.2748908996582031, + "learning_rate": 9.883983070642959e-06, + "loss": 0.3095, + "step": 4828 + }, + { + "epoch": 0.09666941921277182, + "grad_norm": 1.2515181303024292, + "learning_rate": 9.883913630417006e-06, + "loss": 0.317, + "step": 4829 + }, + { + "epoch": 0.09668943772990016, + "grad_norm": 1.0599900484085083, + "learning_rate": 9.88384416966013e-06, + "loss": 0.3236, + "step": 4830 + }, + { + "epoch": 0.09670945624702851, + "grad_norm": 1.1172637939453125, + "learning_rate": 9.883774688372627e-06, + "loss": 0.356, + "step": 4831 + }, + { + "epoch": 0.09672947476415684, + "grad_norm": 1.06431245803833, + "learning_rate": 9.883705186554784e-06, + "loss": 0.3065, + "step": 4832 + }, + { + "epoch": 0.0967494932812852, + "grad_norm": 1.0676147937774658, + "learning_rate": 9.883635664206897e-06, + "loss": 0.3116, + "step": 4833 + }, + { + "epoch": 0.09676951179841353, + "grad_norm": 1.0689797401428223, + "learning_rate": 9.883566121329255e-06, + "loss": 0.2878, + "step": 4834 + }, + { + "epoch": 0.09678953031554188, + "grad_norm": 2.0205514430999756, + "learning_rate": 9.883496557922152e-06, + "loss": 0.8075, + "step": 4835 + }, + { + "epoch": 0.09680954883267022, + "grad_norm": 1.1926428079605103, + "learning_rate": 9.883426973985882e-06, + "loss": 0.3106, + "step": 4836 + }, + { + "epoch": 0.09682956734979857, + "grad_norm": 1.1756786108016968, + "learning_rate": 9.883357369520734e-06, + "loss": 0.3217, + "step": 4837 + }, + { + "epoch": 0.09684958586692691, + "grad_norm": 1.0530675649642944, + "learning_rate": 9.883287744527004e-06, + "loss": 0.3812, + "step": 4838 + }, + { + "epoch": 0.09686960438405526, + "grad_norm": 1.1208337545394897, + "learning_rate": 9.883218099004983e-06, + "loss": 0.3639, + "step": 4839 + }, + { + "epoch": 0.0968896229011836, + "grad_norm": 1.104323387145996, + "learning_rate": 9.883148432954963e-06, + "loss": 0.3194, + "step": 4840 + }, + { + "epoch": 0.09690964141831195, + "grad_norm": 1.2977190017700195, + "learning_rate": 9.883078746377239e-06, + "loss": 0.3227, + "step": 4841 + }, + { + "epoch": 0.09692965993544028, + "grad_norm": 1.22068452835083, + "learning_rate": 9.883009039272103e-06, + "loss": 0.3259, + "step": 4842 + }, + { + "epoch": 0.09694967845256863, + "grad_norm": 1.1843440532684326, + "learning_rate": 9.882939311639847e-06, + "loss": 0.3357, + "step": 4843 + }, + { + "epoch": 0.09696969696969697, + "grad_norm": 1.056255578994751, + "learning_rate": 9.882869563480765e-06, + "loss": 0.3486, + "step": 4844 + }, + { + "epoch": 0.09698971548682532, + "grad_norm": 1.049831748008728, + "learning_rate": 9.88279979479515e-06, + "loss": 0.3542, + "step": 4845 + }, + { + "epoch": 0.09700973400395366, + "grad_norm": 1.0490485429763794, + "learning_rate": 9.882730005583297e-06, + "loss": 0.3295, + "step": 4846 + }, + { + "epoch": 0.09702975252108201, + "grad_norm": 1.6617568731307983, + "learning_rate": 9.882660195845496e-06, + "loss": 0.8844, + "step": 4847 + }, + { + "epoch": 0.09704977103821034, + "grad_norm": 1.0676888227462769, + "learning_rate": 9.882590365582043e-06, + "loss": 0.2794, + "step": 4848 + }, + { + "epoch": 0.09706978955533868, + "grad_norm": 1.1322253942489624, + "learning_rate": 9.88252051479323e-06, + "loss": 0.3502, + "step": 4849 + }, + { + "epoch": 0.09708980807246703, + "grad_norm": 1.2288010120391846, + "learning_rate": 9.882450643479353e-06, + "loss": 0.3553, + "step": 4850 + }, + { + "epoch": 0.09710982658959537, + "grad_norm": 1.0410529375076294, + "learning_rate": 9.882380751640704e-06, + "loss": 0.3143, + "step": 4851 + }, + { + "epoch": 0.09712984510672372, + "grad_norm": 1.066434383392334, + "learning_rate": 9.882310839277576e-06, + "loss": 0.3331, + "step": 4852 + }, + { + "epoch": 0.09714986362385206, + "grad_norm": 1.0974609851837158, + "learning_rate": 9.882240906390264e-06, + "loss": 0.3371, + "step": 4853 + }, + { + "epoch": 0.09716988214098041, + "grad_norm": 1.0183688402175903, + "learning_rate": 9.882170952979062e-06, + "loss": 0.3141, + "step": 4854 + }, + { + "epoch": 0.09718990065810874, + "grad_norm": 1.9416894912719727, + "learning_rate": 9.882100979044265e-06, + "loss": 0.7764, + "step": 4855 + }, + { + "epoch": 0.0972099191752371, + "grad_norm": 1.92948317527771, + "learning_rate": 9.882030984586166e-06, + "loss": 0.7758, + "step": 4856 + }, + { + "epoch": 0.09722993769236543, + "grad_norm": 1.0436077117919922, + "learning_rate": 9.881960969605057e-06, + "loss": 0.3786, + "step": 4857 + }, + { + "epoch": 0.09724995620949378, + "grad_norm": 1.1063661575317383, + "learning_rate": 9.881890934101236e-06, + "loss": 0.3066, + "step": 4858 + }, + { + "epoch": 0.09726997472662212, + "grad_norm": 2.021845817565918, + "learning_rate": 9.881820878075e-06, + "loss": 0.8569, + "step": 4859 + }, + { + "epoch": 0.09728999324375047, + "grad_norm": 1.2758482694625854, + "learning_rate": 9.881750801526635e-06, + "loss": 0.3808, + "step": 4860 + }, + { + "epoch": 0.0973100117608788, + "grad_norm": 1.0433465242385864, + "learning_rate": 9.88168070445644e-06, + "loss": 0.3447, + "step": 4861 + }, + { + "epoch": 0.09733003027800716, + "grad_norm": 1.1395028829574585, + "learning_rate": 9.881610586864712e-06, + "loss": 0.3601, + "step": 4862 + }, + { + "epoch": 0.0973500487951355, + "grad_norm": 1.111998200416565, + "learning_rate": 9.881540448751743e-06, + "loss": 0.362, + "step": 4863 + }, + { + "epoch": 0.09737006731226384, + "grad_norm": 1.1252309083938599, + "learning_rate": 9.881470290117829e-06, + "loss": 0.3284, + "step": 4864 + }, + { + "epoch": 0.09739008582939218, + "grad_norm": 1.116455316543579, + "learning_rate": 9.881400110963263e-06, + "loss": 0.3221, + "step": 4865 + }, + { + "epoch": 0.09741010434652053, + "grad_norm": 1.0585514307022095, + "learning_rate": 9.881329911288342e-06, + "loss": 0.3154, + "step": 4866 + }, + { + "epoch": 0.09743012286364887, + "grad_norm": 1.0358922481536865, + "learning_rate": 9.881259691093361e-06, + "loss": 0.2868, + "step": 4867 + }, + { + "epoch": 0.09745014138077722, + "grad_norm": 1.1228505373001099, + "learning_rate": 9.881189450378614e-06, + "loss": 0.365, + "step": 4868 + }, + { + "epoch": 0.09747015989790556, + "grad_norm": 1.0319477319717407, + "learning_rate": 9.881119189144396e-06, + "loss": 0.2834, + "step": 4869 + }, + { + "epoch": 0.0974901784150339, + "grad_norm": 1.040061593055725, + "learning_rate": 9.881048907391006e-06, + "loss": 0.3576, + "step": 4870 + }, + { + "epoch": 0.09751019693216224, + "grad_norm": 1.011289119720459, + "learning_rate": 9.880978605118736e-06, + "loss": 0.2983, + "step": 4871 + }, + { + "epoch": 0.0975302154492906, + "grad_norm": 1.2626581192016602, + "learning_rate": 9.880908282327881e-06, + "loss": 0.3304, + "step": 4872 + }, + { + "epoch": 0.09755023396641893, + "grad_norm": 1.745839238166809, + "learning_rate": 9.88083793901874e-06, + "loss": 0.8698, + "step": 4873 + }, + { + "epoch": 0.09757025248354728, + "grad_norm": 1.111366868019104, + "learning_rate": 9.880767575191606e-06, + "loss": 0.3352, + "step": 4874 + }, + { + "epoch": 0.09759027100067562, + "grad_norm": 1.0827950239181519, + "learning_rate": 9.880697190846776e-06, + "loss": 0.3276, + "step": 4875 + }, + { + "epoch": 0.09761028951780397, + "grad_norm": 1.2585773468017578, + "learning_rate": 9.880626785984543e-06, + "loss": 0.3605, + "step": 4876 + }, + { + "epoch": 0.0976303080349323, + "grad_norm": 1.0599263906478882, + "learning_rate": 9.880556360605207e-06, + "loss": 0.3702, + "step": 4877 + }, + { + "epoch": 0.09765032655206066, + "grad_norm": 1.1192649602890015, + "learning_rate": 9.880485914709062e-06, + "loss": 0.3567, + "step": 4878 + }, + { + "epoch": 0.097670345069189, + "grad_norm": 1.0577534437179565, + "learning_rate": 9.880415448296404e-06, + "loss": 0.2938, + "step": 4879 + }, + { + "epoch": 0.09769036358631734, + "grad_norm": 1.0200973749160767, + "learning_rate": 9.88034496136753e-06, + "loss": 0.3036, + "step": 4880 + }, + { + "epoch": 0.09771038210344568, + "grad_norm": 1.1892204284667969, + "learning_rate": 9.880274453922737e-06, + "loss": 0.3469, + "step": 4881 + }, + { + "epoch": 0.09773040062057403, + "grad_norm": 1.1974914073944092, + "learning_rate": 9.880203925962319e-06, + "loss": 0.3595, + "step": 4882 + }, + { + "epoch": 0.09775041913770237, + "grad_norm": 0.9932042956352234, + "learning_rate": 9.880133377486576e-06, + "loss": 0.3116, + "step": 4883 + }, + { + "epoch": 0.09777043765483072, + "grad_norm": 1.036340594291687, + "learning_rate": 9.8800628084958e-06, + "loss": 0.3677, + "step": 4884 + }, + { + "epoch": 0.09779045617195906, + "grad_norm": 1.2204599380493164, + "learning_rate": 9.879992218990292e-06, + "loss": 0.3578, + "step": 4885 + }, + { + "epoch": 0.0978104746890874, + "grad_norm": 1.542702317237854, + "learning_rate": 9.879921608970347e-06, + "loss": 0.409, + "step": 4886 + }, + { + "epoch": 0.09783049320621574, + "grad_norm": 1.0858573913574219, + "learning_rate": 9.87985097843626e-06, + "loss": 0.2921, + "step": 4887 + }, + { + "epoch": 0.0978505117233441, + "grad_norm": 1.20797860622406, + "learning_rate": 9.879780327388331e-06, + "loss": 0.2975, + "step": 4888 + }, + { + "epoch": 0.09787053024047243, + "grad_norm": 1.0750926733016968, + "learning_rate": 9.879709655826855e-06, + "loss": 0.2767, + "step": 4889 + }, + { + "epoch": 0.09789054875760078, + "grad_norm": 1.8525513410568237, + "learning_rate": 9.879638963752129e-06, + "loss": 0.9187, + "step": 4890 + }, + { + "epoch": 0.09791056727472912, + "grad_norm": 1.072407841682434, + "learning_rate": 9.879568251164453e-06, + "loss": 0.3505, + "step": 4891 + }, + { + "epoch": 0.09793058579185747, + "grad_norm": 1.7507973909378052, + "learning_rate": 9.87949751806412e-06, + "loss": 0.8446, + "step": 4892 + }, + { + "epoch": 0.0979506043089858, + "grad_norm": 1.6644302606582642, + "learning_rate": 9.879426764451431e-06, + "loss": 0.8283, + "step": 4893 + }, + { + "epoch": 0.09797062282611416, + "grad_norm": 1.0257943868637085, + "learning_rate": 9.879355990326682e-06, + "loss": 0.3343, + "step": 4894 + }, + { + "epoch": 0.0979906413432425, + "grad_norm": 1.0659366846084595, + "learning_rate": 9.879285195690169e-06, + "loss": 0.3327, + "step": 4895 + }, + { + "epoch": 0.09801065986037084, + "grad_norm": 1.1708827018737793, + "learning_rate": 9.879214380542193e-06, + "loss": 0.3339, + "step": 4896 + }, + { + "epoch": 0.09803067837749918, + "grad_norm": 1.114704966545105, + "learning_rate": 9.879143544883048e-06, + "loss": 0.3211, + "step": 4897 + }, + { + "epoch": 0.09805069689462753, + "grad_norm": 1.050057053565979, + "learning_rate": 9.879072688713035e-06, + "loss": 0.2878, + "step": 4898 + }, + { + "epoch": 0.09807071541175587, + "grad_norm": 1.179585576057434, + "learning_rate": 9.879001812032449e-06, + "loss": 0.3016, + "step": 4899 + }, + { + "epoch": 0.09809073392888422, + "grad_norm": 1.0158482789993286, + "learning_rate": 9.87893091484159e-06, + "loss": 0.307, + "step": 4900 + }, + { + "epoch": 0.09811075244601256, + "grad_norm": 1.1363279819488525, + "learning_rate": 9.878859997140757e-06, + "loss": 0.3528, + "step": 4901 + }, + { + "epoch": 0.0981307709631409, + "grad_norm": 1.0419906377792358, + "learning_rate": 9.878789058930245e-06, + "loss": 0.3777, + "step": 4902 + }, + { + "epoch": 0.09815078948026924, + "grad_norm": 1.0592695474624634, + "learning_rate": 9.878718100210354e-06, + "loss": 0.2976, + "step": 4903 + }, + { + "epoch": 0.0981708079973976, + "grad_norm": 1.1750754117965698, + "learning_rate": 9.878647120981381e-06, + "loss": 0.3157, + "step": 4904 + }, + { + "epoch": 0.09819082651452593, + "grad_norm": 1.227695107460022, + "learning_rate": 9.878576121243625e-06, + "loss": 0.3527, + "step": 4905 + }, + { + "epoch": 0.09821084503165428, + "grad_norm": 1.3159340620040894, + "learning_rate": 9.878505100997387e-06, + "loss": 0.3133, + "step": 4906 + }, + { + "epoch": 0.09823086354878262, + "grad_norm": 1.0173606872558594, + "learning_rate": 9.878434060242963e-06, + "loss": 0.2668, + "step": 4907 + }, + { + "epoch": 0.09825088206591097, + "grad_norm": 1.1925272941589355, + "learning_rate": 9.87836299898065e-06, + "loss": 0.2903, + "step": 4908 + }, + { + "epoch": 0.0982709005830393, + "grad_norm": 0.9848401546478271, + "learning_rate": 9.878291917210752e-06, + "loss": 0.2772, + "step": 4909 + }, + { + "epoch": 0.09829091910016766, + "grad_norm": 1.1030389070510864, + "learning_rate": 9.878220814933562e-06, + "loss": 0.3374, + "step": 4910 + }, + { + "epoch": 0.098310937617296, + "grad_norm": 1.1660927534103394, + "learning_rate": 9.878149692149383e-06, + "loss": 0.3577, + "step": 4911 + }, + { + "epoch": 0.09833095613442434, + "grad_norm": 1.2795947790145874, + "learning_rate": 9.878078548858511e-06, + "loss": 0.3772, + "step": 4912 + }, + { + "epoch": 0.09835097465155268, + "grad_norm": 1.1659621000289917, + "learning_rate": 9.878007385061247e-06, + "loss": 0.2915, + "step": 4913 + }, + { + "epoch": 0.09837099316868103, + "grad_norm": 1.9360493421554565, + "learning_rate": 9.877936200757892e-06, + "loss": 0.9026, + "step": 4914 + }, + { + "epoch": 0.09839101168580937, + "grad_norm": 1.1920723915100098, + "learning_rate": 9.87786499594874e-06, + "loss": 0.3546, + "step": 4915 + }, + { + "epoch": 0.09841103020293772, + "grad_norm": 1.3022890090942383, + "learning_rate": 9.877793770634094e-06, + "loss": 0.312, + "step": 4916 + }, + { + "epoch": 0.09843104872006606, + "grad_norm": 1.1435141563415527, + "learning_rate": 9.877722524814254e-06, + "loss": 0.2971, + "step": 4917 + }, + { + "epoch": 0.0984510672371944, + "grad_norm": 1.269625186920166, + "learning_rate": 9.877651258489518e-06, + "loss": 0.3601, + "step": 4918 + }, + { + "epoch": 0.09847108575432274, + "grad_norm": 1.2971936464309692, + "learning_rate": 9.877579971660185e-06, + "loss": 0.3765, + "step": 4919 + }, + { + "epoch": 0.0984911042714511, + "grad_norm": 0.9996562600135803, + "learning_rate": 9.877508664326555e-06, + "loss": 0.3098, + "step": 4920 + }, + { + "epoch": 0.09851112278857943, + "grad_norm": 1.0122495889663696, + "learning_rate": 9.877437336488928e-06, + "loss": 0.2674, + "step": 4921 + }, + { + "epoch": 0.09853114130570778, + "grad_norm": 0.9880119562149048, + "learning_rate": 9.877365988147605e-06, + "loss": 0.3248, + "step": 4922 + }, + { + "epoch": 0.09855115982283612, + "grad_norm": 1.1196590662002563, + "learning_rate": 9.877294619302886e-06, + "loss": 0.3746, + "step": 4923 + }, + { + "epoch": 0.09857117833996447, + "grad_norm": 1.1339309215545654, + "learning_rate": 9.877223229955068e-06, + "loss": 0.3437, + "step": 4924 + }, + { + "epoch": 0.0985911968570928, + "grad_norm": 1.045304775238037, + "learning_rate": 9.877151820104454e-06, + "loss": 0.2934, + "step": 4925 + }, + { + "epoch": 0.09861121537422116, + "grad_norm": 1.0445045232772827, + "learning_rate": 9.877080389751344e-06, + "loss": 0.3116, + "step": 4926 + }, + { + "epoch": 0.09863123389134949, + "grad_norm": 1.8301078081130981, + "learning_rate": 9.877008938896036e-06, + "loss": 0.8755, + "step": 4927 + }, + { + "epoch": 0.09865125240847784, + "grad_norm": 1.0702145099639893, + "learning_rate": 9.876937467538834e-06, + "loss": 0.2921, + "step": 4928 + }, + { + "epoch": 0.09867127092560618, + "grad_norm": 1.0525407791137695, + "learning_rate": 9.876865975680035e-06, + "loss": 0.3453, + "step": 4929 + }, + { + "epoch": 0.09869128944273453, + "grad_norm": 1.0127171277999878, + "learning_rate": 9.876794463319941e-06, + "loss": 0.324, + "step": 4930 + }, + { + "epoch": 0.09871130795986287, + "grad_norm": 1.0977084636688232, + "learning_rate": 9.876722930458854e-06, + "loss": 0.3043, + "step": 4931 + }, + { + "epoch": 0.09873132647699122, + "grad_norm": 1.0374420881271362, + "learning_rate": 9.87665137709707e-06, + "loss": 0.3636, + "step": 4932 + }, + { + "epoch": 0.09875134499411956, + "grad_norm": 1.076244592666626, + "learning_rate": 9.876579803234895e-06, + "loss": 0.2803, + "step": 4933 + }, + { + "epoch": 0.0987713635112479, + "grad_norm": 1.0903531312942505, + "learning_rate": 9.876508208872629e-06, + "loss": 0.3439, + "step": 4934 + }, + { + "epoch": 0.09879138202837624, + "grad_norm": 1.076688289642334, + "learning_rate": 9.876436594010571e-06, + "loss": 0.3081, + "step": 4935 + }, + { + "epoch": 0.0988114005455046, + "grad_norm": 1.5176844596862793, + "learning_rate": 9.876364958649024e-06, + "loss": 0.3375, + "step": 4936 + }, + { + "epoch": 0.09883141906263293, + "grad_norm": 1.0679084062576294, + "learning_rate": 9.876293302788286e-06, + "loss": 0.289, + "step": 4937 + }, + { + "epoch": 0.09885143757976128, + "grad_norm": 1.085098385810852, + "learning_rate": 9.876221626428662e-06, + "loss": 0.2947, + "step": 4938 + }, + { + "epoch": 0.09887145609688962, + "grad_norm": 1.130636215209961, + "learning_rate": 9.87614992957045e-06, + "loss": 0.3401, + "step": 4939 + }, + { + "epoch": 0.09889147461401797, + "grad_norm": 1.1207669973373413, + "learning_rate": 9.876078212213956e-06, + "loss": 0.3431, + "step": 4940 + }, + { + "epoch": 0.0989114931311463, + "grad_norm": 1.2494789361953735, + "learning_rate": 9.876006474359477e-06, + "loss": 0.3796, + "step": 4941 + }, + { + "epoch": 0.09893151164827466, + "grad_norm": 1.2301146984100342, + "learning_rate": 9.875934716007316e-06, + "loss": 0.3659, + "step": 4942 + }, + { + "epoch": 0.09895153016540299, + "grad_norm": 1.187388300895691, + "learning_rate": 9.875862937157774e-06, + "loss": 0.3578, + "step": 4943 + }, + { + "epoch": 0.09897154868253134, + "grad_norm": 1.018320083618164, + "learning_rate": 9.875791137811154e-06, + "loss": 0.3261, + "step": 4944 + }, + { + "epoch": 0.09899156719965968, + "grad_norm": 1.1071068048477173, + "learning_rate": 9.875719317967759e-06, + "loss": 0.311, + "step": 4945 + }, + { + "epoch": 0.09901158571678803, + "grad_norm": 1.076352834701538, + "learning_rate": 9.875647477627887e-06, + "loss": 0.3367, + "step": 4946 + }, + { + "epoch": 0.09903160423391637, + "grad_norm": 1.0306727886199951, + "learning_rate": 9.875575616791842e-06, + "loss": 0.3074, + "step": 4947 + }, + { + "epoch": 0.09905162275104472, + "grad_norm": 1.1096670627593994, + "learning_rate": 9.875503735459928e-06, + "loss": 0.3098, + "step": 4948 + }, + { + "epoch": 0.09907164126817306, + "grad_norm": 1.0653374195098877, + "learning_rate": 9.875431833632446e-06, + "loss": 0.3043, + "step": 4949 + }, + { + "epoch": 0.0990916597853014, + "grad_norm": 1.189347505569458, + "learning_rate": 9.875359911309697e-06, + "loss": 0.3328, + "step": 4950 + }, + { + "epoch": 0.09911167830242974, + "grad_norm": 1.0722906589508057, + "learning_rate": 9.875287968491983e-06, + "loss": 0.3222, + "step": 4951 + }, + { + "epoch": 0.0991316968195581, + "grad_norm": 1.0191322565078735, + "learning_rate": 9.875216005179609e-06, + "loss": 0.3266, + "step": 4952 + }, + { + "epoch": 0.09915171533668643, + "grad_norm": 1.0952293872833252, + "learning_rate": 9.875144021372874e-06, + "loss": 0.3387, + "step": 4953 + }, + { + "epoch": 0.09917173385381478, + "grad_norm": 1.106354832649231, + "learning_rate": 9.875072017072084e-06, + "loss": 0.3507, + "step": 4954 + }, + { + "epoch": 0.09919175237094312, + "grad_norm": 1.0427035093307495, + "learning_rate": 9.87499999227754e-06, + "loss": 0.3942, + "step": 4955 + }, + { + "epoch": 0.09921177088807147, + "grad_norm": 1.173105239868164, + "learning_rate": 9.874927946989547e-06, + "loss": 0.3028, + "step": 4956 + }, + { + "epoch": 0.0992317894051998, + "grad_norm": 1.1835278272628784, + "learning_rate": 9.874855881208404e-06, + "loss": 0.3921, + "step": 4957 + }, + { + "epoch": 0.09925180792232816, + "grad_norm": 1.1066086292266846, + "learning_rate": 9.874783794934416e-06, + "loss": 0.3105, + "step": 4958 + }, + { + "epoch": 0.09927182643945649, + "grad_norm": 1.1220873594284058, + "learning_rate": 9.874711688167887e-06, + "loss": 0.3292, + "step": 4959 + }, + { + "epoch": 0.09929184495658484, + "grad_norm": 1.012262225151062, + "learning_rate": 9.874639560909118e-06, + "loss": 0.3305, + "step": 4960 + }, + { + "epoch": 0.09931186347371318, + "grad_norm": 1.1137455701828003, + "learning_rate": 9.874567413158414e-06, + "loss": 0.3567, + "step": 4961 + }, + { + "epoch": 0.09933188199084153, + "grad_norm": 1.121317982673645, + "learning_rate": 9.874495244916078e-06, + "loss": 0.3543, + "step": 4962 + }, + { + "epoch": 0.09935190050796987, + "grad_norm": 1.3972392082214355, + "learning_rate": 9.874423056182412e-06, + "loss": 0.3312, + "step": 4963 + }, + { + "epoch": 0.09937191902509822, + "grad_norm": 1.3026347160339355, + "learning_rate": 9.874350846957722e-06, + "loss": 0.3941, + "step": 4964 + }, + { + "epoch": 0.09939193754222655, + "grad_norm": 2.036729574203491, + "learning_rate": 9.87427861724231e-06, + "loss": 0.8889, + "step": 4965 + }, + { + "epoch": 0.0994119560593549, + "grad_norm": 1.0462844371795654, + "learning_rate": 9.874206367036478e-06, + "loss": 0.2915, + "step": 4966 + }, + { + "epoch": 0.09943197457648324, + "grad_norm": 1.1012617349624634, + "learning_rate": 9.874134096340532e-06, + "loss": 0.3432, + "step": 4967 + }, + { + "epoch": 0.0994519930936116, + "grad_norm": 2.0033626556396484, + "learning_rate": 9.874061805154775e-06, + "loss": 0.8193, + "step": 4968 + }, + { + "epoch": 0.09947201161073993, + "grad_norm": 1.9485143423080444, + "learning_rate": 9.873989493479512e-06, + "loss": 0.8489, + "step": 4969 + }, + { + "epoch": 0.09949203012786828, + "grad_norm": 1.0910447835922241, + "learning_rate": 9.873917161315045e-06, + "loss": 0.3323, + "step": 4970 + }, + { + "epoch": 0.09951204864499662, + "grad_norm": 1.1408485174179077, + "learning_rate": 9.87384480866168e-06, + "loss": 0.3232, + "step": 4971 + }, + { + "epoch": 0.09953206716212497, + "grad_norm": 1.1829254627227783, + "learning_rate": 9.87377243551972e-06, + "loss": 0.3749, + "step": 4972 + }, + { + "epoch": 0.0995520856792533, + "grad_norm": 1.1076608896255493, + "learning_rate": 9.873700041889469e-06, + "loss": 0.3445, + "step": 4973 + }, + { + "epoch": 0.09957210419638166, + "grad_norm": 1.1170778274536133, + "learning_rate": 9.873627627771232e-06, + "loss": 0.3263, + "step": 4974 + }, + { + "epoch": 0.09959212271350999, + "grad_norm": 1.052656888961792, + "learning_rate": 9.873555193165315e-06, + "loss": 0.2944, + "step": 4975 + }, + { + "epoch": 0.09961214123063834, + "grad_norm": 1.1935912370681763, + "learning_rate": 9.873482738072018e-06, + "loss": 0.2962, + "step": 4976 + }, + { + "epoch": 0.09963215974776668, + "grad_norm": 1.1908090114593506, + "learning_rate": 9.87341026249165e-06, + "loss": 0.3136, + "step": 4977 + }, + { + "epoch": 0.09965217826489503, + "grad_norm": 1.253764271736145, + "learning_rate": 9.873337766424513e-06, + "loss": 0.3519, + "step": 4978 + }, + { + "epoch": 0.09967219678202337, + "grad_norm": 1.2491072416305542, + "learning_rate": 9.873265249870913e-06, + "loss": 0.2776, + "step": 4979 + }, + { + "epoch": 0.09969221529915172, + "grad_norm": 1.2098546028137207, + "learning_rate": 9.873192712831155e-06, + "loss": 0.356, + "step": 4980 + }, + { + "epoch": 0.09971223381628005, + "grad_norm": 1.1576889753341675, + "learning_rate": 9.873120155305544e-06, + "loss": 0.3256, + "step": 4981 + }, + { + "epoch": 0.0997322523334084, + "grad_norm": 1.1601530313491821, + "learning_rate": 9.873047577294384e-06, + "loss": 0.3385, + "step": 4982 + }, + { + "epoch": 0.09975227085053674, + "grad_norm": 1.1609619855880737, + "learning_rate": 9.872974978797981e-06, + "loss": 0.3108, + "step": 4983 + }, + { + "epoch": 0.09977228936766509, + "grad_norm": 1.347659707069397, + "learning_rate": 9.87290235981664e-06, + "loss": 0.355, + "step": 4984 + }, + { + "epoch": 0.09979230788479343, + "grad_norm": 1.154422402381897, + "learning_rate": 9.872829720350665e-06, + "loss": 0.3777, + "step": 4985 + }, + { + "epoch": 0.09981232640192178, + "grad_norm": 1.051641821861267, + "learning_rate": 9.872757060400365e-06, + "loss": 0.2829, + "step": 4986 + }, + { + "epoch": 0.09983234491905012, + "grad_norm": 1.062400460243225, + "learning_rate": 9.87268437996604e-06, + "loss": 0.2891, + "step": 4987 + }, + { + "epoch": 0.09985236343617847, + "grad_norm": 1.2329248189926147, + "learning_rate": 9.872611679048e-06, + "loss": 0.3554, + "step": 4988 + }, + { + "epoch": 0.0998723819533068, + "grad_norm": 1.160451054573059, + "learning_rate": 9.87253895764655e-06, + "loss": 0.3082, + "step": 4989 + }, + { + "epoch": 0.09989240047043516, + "grad_norm": 1.9484034776687622, + "learning_rate": 9.872466215761993e-06, + "loss": 0.8356, + "step": 4990 + }, + { + "epoch": 0.09991241898756349, + "grad_norm": 1.1067944765090942, + "learning_rate": 9.872393453394637e-06, + "loss": 0.351, + "step": 4991 + }, + { + "epoch": 0.09993243750469184, + "grad_norm": 1.2330536842346191, + "learning_rate": 9.872320670544788e-06, + "loss": 0.3352, + "step": 4992 + }, + { + "epoch": 0.09995245602182018, + "grad_norm": 1.8052631616592407, + "learning_rate": 9.87224786721275e-06, + "loss": 0.8804, + "step": 4993 + }, + { + "epoch": 0.09997247453894853, + "grad_norm": 1.143804907798767, + "learning_rate": 9.872175043398833e-06, + "loss": 0.3662, + "step": 4994 + }, + { + "epoch": 0.09999249305607687, + "grad_norm": 1.0806972980499268, + "learning_rate": 9.87210219910334e-06, + "loss": 0.3749, + "step": 4995 + }, + { + "epoch": 0.10001251157320522, + "grad_norm": 1.176962971687317, + "learning_rate": 9.872029334326577e-06, + "loss": 0.3127, + "step": 4996 + }, + { + "epoch": 0.10003253009033355, + "grad_norm": 1.1762467622756958, + "learning_rate": 9.87195644906885e-06, + "loss": 0.3263, + "step": 4997 + }, + { + "epoch": 0.1000525486074619, + "grad_norm": 1.890404224395752, + "learning_rate": 9.871883543330468e-06, + "loss": 0.8628, + "step": 4998 + }, + { + "epoch": 0.10007256712459024, + "grad_norm": 1.1092689037322998, + "learning_rate": 9.871810617111737e-06, + "loss": 0.363, + "step": 4999 + }, + { + "epoch": 0.10009258564171859, + "grad_norm": 1.106379747390747, + "learning_rate": 9.871737670412961e-06, + "loss": 0.3414, + "step": 5000 + }, + { + "epoch": 0.10011260415884693, + "grad_norm": 1.9398916959762573, + "learning_rate": 9.871664703234448e-06, + "loss": 0.7901, + "step": 5001 + }, + { + "epoch": 0.10013262267597528, + "grad_norm": 1.0493838787078857, + "learning_rate": 9.871591715576506e-06, + "loss": 0.3565, + "step": 5002 + }, + { + "epoch": 0.10015264119310362, + "grad_norm": 1.1400166749954224, + "learning_rate": 9.87151870743944e-06, + "loss": 0.3421, + "step": 5003 + }, + { + "epoch": 0.10017265971023197, + "grad_norm": 1.1012909412384033, + "learning_rate": 9.871445678823559e-06, + "loss": 0.2767, + "step": 5004 + }, + { + "epoch": 0.1001926782273603, + "grad_norm": 1.2249962091445923, + "learning_rate": 9.871372629729167e-06, + "loss": 0.2993, + "step": 5005 + }, + { + "epoch": 0.10021269674448866, + "grad_norm": 1.0944358110427856, + "learning_rate": 9.871299560156575e-06, + "loss": 0.3446, + "step": 5006 + }, + { + "epoch": 0.10023271526161699, + "grad_norm": 1.8348307609558105, + "learning_rate": 9.871226470106085e-06, + "loss": 0.8567, + "step": 5007 + }, + { + "epoch": 0.10025273377874534, + "grad_norm": 1.1155494451522827, + "learning_rate": 9.87115335957801e-06, + "loss": 0.3162, + "step": 5008 + }, + { + "epoch": 0.10027275229587368, + "grad_norm": 1.1201800107955933, + "learning_rate": 9.871080228572654e-06, + "loss": 0.3757, + "step": 5009 + }, + { + "epoch": 0.10029277081300203, + "grad_norm": 1.1565730571746826, + "learning_rate": 9.871007077090324e-06, + "loss": 0.3097, + "step": 5010 + }, + { + "epoch": 0.10031278933013037, + "grad_norm": 1.060375452041626, + "learning_rate": 9.870933905131328e-06, + "loss": 0.2978, + "step": 5011 + }, + { + "epoch": 0.10033280784725872, + "grad_norm": 1.8963279724121094, + "learning_rate": 9.870860712695976e-06, + "loss": 0.843, + "step": 5012 + }, + { + "epoch": 0.10035282636438705, + "grad_norm": 1.1066961288452148, + "learning_rate": 9.870787499784574e-06, + "loss": 0.326, + "step": 5013 + }, + { + "epoch": 0.1003728448815154, + "grad_norm": 1.17388916015625, + "learning_rate": 9.870714266397427e-06, + "loss": 0.3404, + "step": 5014 + }, + { + "epoch": 0.10039286339864374, + "grad_norm": 1.1320459842681885, + "learning_rate": 9.870641012534848e-06, + "loss": 0.3226, + "step": 5015 + }, + { + "epoch": 0.10041288191577209, + "grad_norm": 1.1390630006790161, + "learning_rate": 9.87056773819714e-06, + "loss": 0.349, + "step": 5016 + }, + { + "epoch": 0.10043290043290043, + "grad_norm": 1.2034924030303955, + "learning_rate": 9.870494443384616e-06, + "loss": 0.3584, + "step": 5017 + }, + { + "epoch": 0.10045291895002878, + "grad_norm": 1.0310410261154175, + "learning_rate": 9.870421128097577e-06, + "loss": 0.359, + "step": 5018 + }, + { + "epoch": 0.10047293746715712, + "grad_norm": 1.038183569908142, + "learning_rate": 9.87034779233634e-06, + "loss": 0.2918, + "step": 5019 + }, + { + "epoch": 0.10049295598428547, + "grad_norm": 1.1432676315307617, + "learning_rate": 9.870274436101208e-06, + "loss": 0.3522, + "step": 5020 + }, + { + "epoch": 0.1005129745014138, + "grad_norm": 1.9711573123931885, + "learning_rate": 9.87020105939249e-06, + "loss": 0.9099, + "step": 5021 + }, + { + "epoch": 0.10053299301854215, + "grad_norm": 1.9334080219268799, + "learning_rate": 9.870127662210497e-06, + "loss": 0.8146, + "step": 5022 + }, + { + "epoch": 0.10055301153567049, + "grad_norm": 1.1272016763687134, + "learning_rate": 9.870054244555532e-06, + "loss": 0.3449, + "step": 5023 + }, + { + "epoch": 0.10057303005279884, + "grad_norm": 1.1260795593261719, + "learning_rate": 9.869980806427908e-06, + "loss": 0.3397, + "step": 5024 + }, + { + "epoch": 0.10059304856992718, + "grad_norm": 1.1727811098098755, + "learning_rate": 9.869907347827933e-06, + "loss": 0.2902, + "step": 5025 + }, + { + "epoch": 0.10061306708705553, + "grad_norm": 1.1133711338043213, + "learning_rate": 9.869833868755915e-06, + "loss": 0.3576, + "step": 5026 + }, + { + "epoch": 0.10063308560418387, + "grad_norm": 1.2088497877120972, + "learning_rate": 9.869760369212165e-06, + "loss": 0.2989, + "step": 5027 + }, + { + "epoch": 0.10065310412131222, + "grad_norm": 1.2153496742248535, + "learning_rate": 9.869686849196989e-06, + "loss": 0.3147, + "step": 5028 + }, + { + "epoch": 0.10067312263844055, + "grad_norm": 1.2988152503967285, + "learning_rate": 9.869613308710697e-06, + "loss": 0.3321, + "step": 5029 + }, + { + "epoch": 0.1006931411555689, + "grad_norm": 1.1471930742263794, + "learning_rate": 9.8695397477536e-06, + "loss": 0.3847, + "step": 5030 + }, + { + "epoch": 0.10071315967269724, + "grad_norm": 1.8808902502059937, + "learning_rate": 9.869466166326005e-06, + "loss": 0.8511, + "step": 5031 + }, + { + "epoch": 0.10073317818982559, + "grad_norm": 1.3042417764663696, + "learning_rate": 9.869392564428221e-06, + "loss": 0.3597, + "step": 5032 + }, + { + "epoch": 0.10075319670695393, + "grad_norm": 1.1226698160171509, + "learning_rate": 9.869318942060562e-06, + "loss": 0.359, + "step": 5033 + }, + { + "epoch": 0.10077321522408228, + "grad_norm": 1.0466622114181519, + "learning_rate": 9.869245299223332e-06, + "loss": 0.2741, + "step": 5034 + }, + { + "epoch": 0.10079323374121062, + "grad_norm": 0.9787752032279968, + "learning_rate": 9.869171635916842e-06, + "loss": 0.3182, + "step": 5035 + }, + { + "epoch": 0.10081325225833897, + "grad_norm": 1.3835843801498413, + "learning_rate": 9.869097952141403e-06, + "loss": 0.3196, + "step": 5036 + }, + { + "epoch": 0.1008332707754673, + "grad_norm": 1.0562630891799927, + "learning_rate": 9.869024247897325e-06, + "loss": 0.3142, + "step": 5037 + }, + { + "epoch": 0.10085328929259565, + "grad_norm": 1.0726351737976074, + "learning_rate": 9.868950523184914e-06, + "loss": 0.4009, + "step": 5038 + }, + { + "epoch": 0.10087330780972399, + "grad_norm": 1.1444380283355713, + "learning_rate": 9.868876778004486e-06, + "loss": 0.3376, + "step": 5039 + }, + { + "epoch": 0.10089332632685234, + "grad_norm": 0.9720107316970825, + "learning_rate": 9.868803012356347e-06, + "loss": 0.2989, + "step": 5040 + }, + { + "epoch": 0.10091334484398068, + "grad_norm": 1.099777102470398, + "learning_rate": 9.868729226240808e-06, + "loss": 0.3464, + "step": 5041 + }, + { + "epoch": 0.10093336336110903, + "grad_norm": 1.1716656684875488, + "learning_rate": 9.868655419658177e-06, + "loss": 0.3819, + "step": 5042 + }, + { + "epoch": 0.10095338187823737, + "grad_norm": 0.9717981219291687, + "learning_rate": 9.868581592608768e-06, + "loss": 0.3199, + "step": 5043 + }, + { + "epoch": 0.10097340039536572, + "grad_norm": 1.0629785060882568, + "learning_rate": 9.86850774509289e-06, + "loss": 0.3244, + "step": 5044 + }, + { + "epoch": 0.10099341891249405, + "grad_norm": 1.116470217704773, + "learning_rate": 9.868433877110852e-06, + "loss": 0.3178, + "step": 5045 + }, + { + "epoch": 0.1010134374296224, + "grad_norm": 1.8946943283081055, + "learning_rate": 9.868359988662968e-06, + "loss": 0.866, + "step": 5046 + }, + { + "epoch": 0.10103345594675074, + "grad_norm": 1.8390378952026367, + "learning_rate": 9.868286079749545e-06, + "loss": 0.9044, + "step": 5047 + }, + { + "epoch": 0.10105347446387909, + "grad_norm": 1.1270538568496704, + "learning_rate": 9.868212150370892e-06, + "loss": 0.3266, + "step": 5048 + }, + { + "epoch": 0.10107349298100743, + "grad_norm": 1.0793366432189941, + "learning_rate": 9.868138200527326e-06, + "loss": 0.3878, + "step": 5049 + }, + { + "epoch": 0.10109351149813578, + "grad_norm": 1.1697165966033936, + "learning_rate": 9.868064230219155e-06, + "loss": 0.3596, + "step": 5050 + }, + { + "epoch": 0.10111353001526412, + "grad_norm": 1.0391603708267212, + "learning_rate": 9.867990239446689e-06, + "loss": 0.2876, + "step": 5051 + }, + { + "epoch": 0.10113354853239247, + "grad_norm": 1.0893937349319458, + "learning_rate": 9.867916228210239e-06, + "loss": 0.2955, + "step": 5052 + }, + { + "epoch": 0.1011535670495208, + "grad_norm": 1.245302677154541, + "learning_rate": 9.867842196510118e-06, + "loss": 0.3479, + "step": 5053 + }, + { + "epoch": 0.10117358556664915, + "grad_norm": 1.0923261642456055, + "learning_rate": 9.867768144346635e-06, + "loss": 0.3379, + "step": 5054 + }, + { + "epoch": 0.10119360408377749, + "grad_norm": 1.874123215675354, + "learning_rate": 9.867694071720101e-06, + "loss": 0.8611, + "step": 5055 + }, + { + "epoch": 0.10121362260090584, + "grad_norm": 1.9814552068710327, + "learning_rate": 9.86761997863083e-06, + "loss": 0.8549, + "step": 5056 + }, + { + "epoch": 0.10123364111803418, + "grad_norm": 1.1271400451660156, + "learning_rate": 9.867545865079133e-06, + "loss": 0.3235, + "step": 5057 + }, + { + "epoch": 0.10125365963516253, + "grad_norm": 1.1785475015640259, + "learning_rate": 9.86747173106532e-06, + "loss": 0.2976, + "step": 5058 + }, + { + "epoch": 0.10127367815229087, + "grad_norm": 1.1420981884002686, + "learning_rate": 9.867397576589703e-06, + "loss": 0.3192, + "step": 5059 + }, + { + "epoch": 0.10129369666941922, + "grad_norm": 1.1611121892929077, + "learning_rate": 9.867323401652594e-06, + "loss": 0.3737, + "step": 5060 + }, + { + "epoch": 0.10131371518654755, + "grad_norm": 1.1899077892303467, + "learning_rate": 9.867249206254307e-06, + "loss": 0.341, + "step": 5061 + }, + { + "epoch": 0.1013337337036759, + "grad_norm": 1.0837793350219727, + "learning_rate": 9.867174990395149e-06, + "loss": 0.3346, + "step": 5062 + }, + { + "epoch": 0.10135375222080424, + "grad_norm": 1.1208491325378418, + "learning_rate": 9.867100754075436e-06, + "loss": 0.3396, + "step": 5063 + }, + { + "epoch": 0.10137377073793259, + "grad_norm": 1.0394303798675537, + "learning_rate": 9.867026497295478e-06, + "loss": 0.3465, + "step": 5064 + }, + { + "epoch": 0.10139378925506093, + "grad_norm": 1.2376683950424194, + "learning_rate": 9.866952220055588e-06, + "loss": 0.2909, + "step": 5065 + }, + { + "epoch": 0.10141380777218928, + "grad_norm": 1.0582754611968994, + "learning_rate": 9.86687792235608e-06, + "loss": 0.3445, + "step": 5066 + }, + { + "epoch": 0.10143382628931762, + "grad_norm": 1.0311459302902222, + "learning_rate": 9.866803604197263e-06, + "loss": 0.3098, + "step": 5067 + }, + { + "epoch": 0.10145384480644597, + "grad_norm": 1.2829021215438843, + "learning_rate": 9.866729265579452e-06, + "loss": 0.3589, + "step": 5068 + }, + { + "epoch": 0.1014738633235743, + "grad_norm": 1.1734776496887207, + "learning_rate": 9.866654906502957e-06, + "loss": 0.355, + "step": 5069 + }, + { + "epoch": 0.10149388184070265, + "grad_norm": 1.0225294828414917, + "learning_rate": 9.866580526968092e-06, + "loss": 0.3201, + "step": 5070 + }, + { + "epoch": 0.10151390035783099, + "grad_norm": 1.169145941734314, + "learning_rate": 9.866506126975171e-06, + "loss": 0.2878, + "step": 5071 + }, + { + "epoch": 0.10153391887495934, + "grad_norm": 1.2724480628967285, + "learning_rate": 9.866431706524507e-06, + "loss": 0.3857, + "step": 5072 + }, + { + "epoch": 0.10155393739208768, + "grad_norm": 1.0119167566299438, + "learning_rate": 9.866357265616408e-06, + "loss": 0.349, + "step": 5073 + }, + { + "epoch": 0.10157395590921603, + "grad_norm": 1.1070060729980469, + "learning_rate": 9.866282804251193e-06, + "loss": 0.3273, + "step": 5074 + }, + { + "epoch": 0.10159397442634437, + "grad_norm": 1.1597703695297241, + "learning_rate": 9.86620832242917e-06, + "loss": 0.3028, + "step": 5075 + }, + { + "epoch": 0.10161399294347272, + "grad_norm": 1.0175923109054565, + "learning_rate": 9.866133820150656e-06, + "loss": 0.3457, + "step": 5076 + }, + { + "epoch": 0.10163401146060105, + "grad_norm": 1.1140170097351074, + "learning_rate": 9.866059297415961e-06, + "loss": 0.3353, + "step": 5077 + }, + { + "epoch": 0.1016540299777294, + "grad_norm": 1.2668273448944092, + "learning_rate": 9.865984754225404e-06, + "loss": 0.3362, + "step": 5078 + }, + { + "epoch": 0.10167404849485774, + "grad_norm": 1.1490881443023682, + "learning_rate": 9.865910190579292e-06, + "loss": 0.3135, + "step": 5079 + }, + { + "epoch": 0.10169406701198609, + "grad_norm": 0.9523840546607971, + "learning_rate": 9.865835606477939e-06, + "loss": 0.3046, + "step": 5080 + }, + { + "epoch": 0.10171408552911443, + "grad_norm": 1.0593706369400024, + "learning_rate": 9.865761001921662e-06, + "loss": 0.3512, + "step": 5081 + }, + { + "epoch": 0.10173410404624278, + "grad_norm": 1.2538670301437378, + "learning_rate": 9.86568637691077e-06, + "loss": 0.3355, + "step": 5082 + }, + { + "epoch": 0.10175412256337112, + "grad_norm": 2.1902124881744385, + "learning_rate": 9.865611731445584e-06, + "loss": 0.8787, + "step": 5083 + }, + { + "epoch": 0.10177414108049947, + "grad_norm": 1.2411996126174927, + "learning_rate": 9.86553706552641e-06, + "loss": 0.3391, + "step": 5084 + }, + { + "epoch": 0.1017941595976278, + "grad_norm": 1.1100836992263794, + "learning_rate": 9.865462379153567e-06, + "loss": 0.3768, + "step": 5085 + }, + { + "epoch": 0.10181417811475615, + "grad_norm": 1.2053476572036743, + "learning_rate": 9.865387672327367e-06, + "loss": 0.3822, + "step": 5086 + }, + { + "epoch": 0.10183419663188449, + "grad_norm": 1.1629736423492432, + "learning_rate": 9.865312945048123e-06, + "loss": 0.3095, + "step": 5087 + }, + { + "epoch": 0.10185421514901284, + "grad_norm": 1.0988339185714722, + "learning_rate": 9.865238197316151e-06, + "loss": 0.3372, + "step": 5088 + }, + { + "epoch": 0.10187423366614118, + "grad_norm": 1.0719857215881348, + "learning_rate": 9.865163429131766e-06, + "loss": 0.3386, + "step": 5089 + }, + { + "epoch": 0.10189425218326953, + "grad_norm": 1.0849369764328003, + "learning_rate": 9.86508864049528e-06, + "loss": 0.346, + "step": 5090 + }, + { + "epoch": 0.10191427070039787, + "grad_norm": 1.0914897918701172, + "learning_rate": 9.865013831407009e-06, + "loss": 0.3253, + "step": 5091 + }, + { + "epoch": 0.10193428921752622, + "grad_norm": 1.1698235273361206, + "learning_rate": 9.864939001867265e-06, + "loss": 0.3387, + "step": 5092 + }, + { + "epoch": 0.10195430773465455, + "grad_norm": 1.1611109972000122, + "learning_rate": 9.864864151876366e-06, + "loss": 0.3628, + "step": 5093 + }, + { + "epoch": 0.1019743262517829, + "grad_norm": 1.0656850337982178, + "learning_rate": 9.864789281434624e-06, + "loss": 0.3189, + "step": 5094 + }, + { + "epoch": 0.10199434476891124, + "grad_norm": 1.047637701034546, + "learning_rate": 9.864714390542356e-06, + "loss": 0.339, + "step": 5095 + }, + { + "epoch": 0.10201436328603959, + "grad_norm": 1.0663492679595947, + "learning_rate": 9.864639479199874e-06, + "loss": 0.3418, + "step": 5096 + }, + { + "epoch": 0.10203438180316793, + "grad_norm": 1.0975680351257324, + "learning_rate": 9.864564547407495e-06, + "loss": 0.3347, + "step": 5097 + }, + { + "epoch": 0.10205440032029628, + "grad_norm": 1.1250332593917847, + "learning_rate": 9.864489595165535e-06, + "loss": 0.3138, + "step": 5098 + }, + { + "epoch": 0.10207441883742462, + "grad_norm": 1.061482548713684, + "learning_rate": 9.864414622474306e-06, + "loss": 0.2895, + "step": 5099 + }, + { + "epoch": 0.10209443735455297, + "grad_norm": 1.0754374265670776, + "learning_rate": 9.864339629334126e-06, + "loss": 0.3282, + "step": 5100 + }, + { + "epoch": 0.1021144558716813, + "grad_norm": 1.0652308464050293, + "learning_rate": 9.864264615745308e-06, + "loss": 0.3067, + "step": 5101 + }, + { + "epoch": 0.10213447438880965, + "grad_norm": 2.0151760578155518, + "learning_rate": 9.864189581708168e-06, + "loss": 0.8193, + "step": 5102 + }, + { + "epoch": 0.10215449290593799, + "grad_norm": 1.1622790098190308, + "learning_rate": 9.864114527223023e-06, + "loss": 0.33, + "step": 5103 + }, + { + "epoch": 0.10217451142306634, + "grad_norm": 1.075360894203186, + "learning_rate": 9.864039452290185e-06, + "loss": 0.3154, + "step": 5104 + }, + { + "epoch": 0.10219452994019468, + "grad_norm": 1.1184909343719482, + "learning_rate": 9.863964356909976e-06, + "loss": 0.3339, + "step": 5105 + }, + { + "epoch": 0.10221454845732303, + "grad_norm": 1.2330677509307861, + "learning_rate": 9.863889241082704e-06, + "loss": 0.2848, + "step": 5106 + }, + { + "epoch": 0.10223456697445137, + "grad_norm": 1.1072622537612915, + "learning_rate": 9.86381410480869e-06, + "loss": 0.3314, + "step": 5107 + }, + { + "epoch": 0.10225458549157972, + "grad_norm": 1.5885339975357056, + "learning_rate": 9.863738948088248e-06, + "loss": 0.3505, + "step": 5108 + }, + { + "epoch": 0.10227460400870805, + "grad_norm": 1.1391868591308594, + "learning_rate": 9.863663770921694e-06, + "loss": 0.3629, + "step": 5109 + }, + { + "epoch": 0.1022946225258364, + "grad_norm": 1.2233569622039795, + "learning_rate": 9.863588573309345e-06, + "loss": 0.3492, + "step": 5110 + }, + { + "epoch": 0.10231464104296474, + "grad_norm": 1.0696356296539307, + "learning_rate": 9.863513355251515e-06, + "loss": 0.3589, + "step": 5111 + }, + { + "epoch": 0.10233465956009309, + "grad_norm": 1.1153876781463623, + "learning_rate": 9.86343811674852e-06, + "loss": 0.3656, + "step": 5112 + }, + { + "epoch": 0.10235467807722143, + "grad_norm": 1.0358924865722656, + "learning_rate": 9.86336285780068e-06, + "loss": 0.3245, + "step": 5113 + }, + { + "epoch": 0.10237469659434978, + "grad_norm": 1.9970965385437012, + "learning_rate": 9.863287578408309e-06, + "loss": 0.8567, + "step": 5114 + }, + { + "epoch": 0.10239471511147812, + "grad_norm": 1.178884506225586, + "learning_rate": 9.863212278571725e-06, + "loss": 0.3718, + "step": 5115 + }, + { + "epoch": 0.10241473362860647, + "grad_norm": 1.1266835927963257, + "learning_rate": 9.86313695829124e-06, + "loss": 0.359, + "step": 5116 + }, + { + "epoch": 0.1024347521457348, + "grad_norm": 1.1183634996414185, + "learning_rate": 9.863061617567175e-06, + "loss": 0.3121, + "step": 5117 + }, + { + "epoch": 0.10245477066286315, + "grad_norm": 1.127980351448059, + "learning_rate": 9.862986256399847e-06, + "loss": 0.3245, + "step": 5118 + }, + { + "epoch": 0.10247478917999149, + "grad_norm": 1.0080995559692383, + "learning_rate": 9.86291087478957e-06, + "loss": 0.3375, + "step": 5119 + }, + { + "epoch": 0.10249480769711984, + "grad_norm": 1.1114534139633179, + "learning_rate": 9.862835472736663e-06, + "loss": 0.3264, + "step": 5120 + }, + { + "epoch": 0.10251482621424818, + "grad_norm": 1.1266762018203735, + "learning_rate": 9.862760050241441e-06, + "loss": 0.3264, + "step": 5121 + }, + { + "epoch": 0.10253484473137653, + "grad_norm": 1.1664109230041504, + "learning_rate": 9.862684607304223e-06, + "loss": 0.3761, + "step": 5122 + }, + { + "epoch": 0.10255486324850487, + "grad_norm": 1.180371880531311, + "learning_rate": 9.862609143925325e-06, + "loss": 0.2976, + "step": 5123 + }, + { + "epoch": 0.10257488176563322, + "grad_norm": 1.572569727897644, + "learning_rate": 9.862533660105064e-06, + "loss": 0.9256, + "step": 5124 + }, + { + "epoch": 0.10259490028276155, + "grad_norm": 1.025048017501831, + "learning_rate": 9.86245815584376e-06, + "loss": 0.3149, + "step": 5125 + }, + { + "epoch": 0.1026149187998899, + "grad_norm": 1.095594048500061, + "learning_rate": 9.862382631141726e-06, + "loss": 0.3556, + "step": 5126 + }, + { + "epoch": 0.10263493731701824, + "grad_norm": 1.2443324327468872, + "learning_rate": 9.862307085999283e-06, + "loss": 0.3356, + "step": 5127 + }, + { + "epoch": 0.10265495583414659, + "grad_norm": 1.156268835067749, + "learning_rate": 9.862231520416748e-06, + "loss": 0.3313, + "step": 5128 + }, + { + "epoch": 0.10267497435127493, + "grad_norm": 1.1159950494766235, + "learning_rate": 9.862155934394437e-06, + "loss": 0.2944, + "step": 5129 + }, + { + "epoch": 0.10269499286840328, + "grad_norm": 1.265263557434082, + "learning_rate": 9.862080327932668e-06, + "loss": 0.3737, + "step": 5130 + }, + { + "epoch": 0.10271501138553162, + "grad_norm": 1.1692945957183838, + "learning_rate": 9.862004701031763e-06, + "loss": 0.3629, + "step": 5131 + }, + { + "epoch": 0.10273502990265997, + "grad_norm": 1.0515458583831787, + "learning_rate": 9.861929053692032e-06, + "loss": 0.3775, + "step": 5132 + }, + { + "epoch": 0.1027550484197883, + "grad_norm": 1.2214363813400269, + "learning_rate": 9.8618533859138e-06, + "loss": 0.3446, + "step": 5133 + }, + { + "epoch": 0.10277506693691665, + "grad_norm": 1.8024132251739502, + "learning_rate": 9.861777697697383e-06, + "loss": 0.8524, + "step": 5134 + }, + { + "epoch": 0.10279508545404499, + "grad_norm": 1.886073350906372, + "learning_rate": 9.861701989043098e-06, + "loss": 0.9416, + "step": 5135 + }, + { + "epoch": 0.10281510397117334, + "grad_norm": 1.2398325204849243, + "learning_rate": 9.861626259951264e-06, + "loss": 0.3509, + "step": 5136 + }, + { + "epoch": 0.10283512248830168, + "grad_norm": 1.0595982074737549, + "learning_rate": 9.8615505104222e-06, + "loss": 0.3241, + "step": 5137 + }, + { + "epoch": 0.10285514100543003, + "grad_norm": 0.998489499092102, + "learning_rate": 9.861474740456224e-06, + "loss": 0.3241, + "step": 5138 + }, + { + "epoch": 0.10287515952255837, + "grad_norm": 1.0348749160766602, + "learning_rate": 9.861398950053653e-06, + "loss": 0.3598, + "step": 5139 + }, + { + "epoch": 0.10289517803968672, + "grad_norm": 1.0564407110214233, + "learning_rate": 9.86132313921481e-06, + "loss": 0.2923, + "step": 5140 + }, + { + "epoch": 0.10291519655681505, + "grad_norm": 1.1679407358169556, + "learning_rate": 9.861247307940007e-06, + "loss": 0.3226, + "step": 5141 + }, + { + "epoch": 0.1029352150739434, + "grad_norm": 1.281810998916626, + "learning_rate": 9.861171456229569e-06, + "loss": 0.3039, + "step": 5142 + }, + { + "epoch": 0.10295523359107174, + "grad_norm": 1.150320291519165, + "learning_rate": 9.861095584083811e-06, + "loss": 0.3335, + "step": 5143 + }, + { + "epoch": 0.10297525210820009, + "grad_norm": 1.025681734085083, + "learning_rate": 9.861019691503054e-06, + "loss": 0.3217, + "step": 5144 + }, + { + "epoch": 0.10299527062532843, + "grad_norm": 1.0704721212387085, + "learning_rate": 9.860943778487617e-06, + "loss": 0.3475, + "step": 5145 + }, + { + "epoch": 0.10301528914245678, + "grad_norm": 1.1190447807312012, + "learning_rate": 9.860867845037818e-06, + "loss": 0.3565, + "step": 5146 + }, + { + "epoch": 0.10303530765958512, + "grad_norm": 1.1218262910842896, + "learning_rate": 9.860791891153976e-06, + "loss": 0.3027, + "step": 5147 + }, + { + "epoch": 0.10305532617671347, + "grad_norm": 1.8859808444976807, + "learning_rate": 9.86071591683641e-06, + "loss": 0.8564, + "step": 5148 + }, + { + "epoch": 0.1030753446938418, + "grad_norm": 1.0053856372833252, + "learning_rate": 9.860639922085442e-06, + "loss": 0.3135, + "step": 5149 + }, + { + "epoch": 0.10309536321097015, + "grad_norm": 1.2202775478363037, + "learning_rate": 9.860563906901389e-06, + "loss": 0.3485, + "step": 5150 + }, + { + "epoch": 0.10311538172809849, + "grad_norm": 1.1661128997802734, + "learning_rate": 9.860487871284572e-06, + "loss": 0.3613, + "step": 5151 + }, + { + "epoch": 0.10313540024522684, + "grad_norm": 1.7926071882247925, + "learning_rate": 9.86041181523531e-06, + "loss": 0.7732, + "step": 5152 + }, + { + "epoch": 0.10315541876235518, + "grad_norm": 1.4990886449813843, + "learning_rate": 9.86033573875392e-06, + "loss": 0.3563, + "step": 5153 + }, + { + "epoch": 0.10317543727948353, + "grad_norm": 1.1037259101867676, + "learning_rate": 9.860259641840728e-06, + "loss": 0.3461, + "step": 5154 + }, + { + "epoch": 0.10319545579661187, + "grad_norm": 1.1786459684371948, + "learning_rate": 9.86018352449605e-06, + "loss": 0.3342, + "step": 5155 + }, + { + "epoch": 0.10321547431374022, + "grad_norm": 1.8745580911636353, + "learning_rate": 9.860107386720205e-06, + "loss": 0.8151, + "step": 5156 + }, + { + "epoch": 0.10323549283086855, + "grad_norm": 1.2443102598190308, + "learning_rate": 9.860031228513516e-06, + "loss": 0.3258, + "step": 5157 + }, + { + "epoch": 0.1032555113479969, + "grad_norm": 1.1073604822158813, + "learning_rate": 9.859955049876299e-06, + "loss": 0.4027, + "step": 5158 + }, + { + "epoch": 0.10327552986512524, + "grad_norm": 2.010652780532837, + "learning_rate": 9.85987885080888e-06, + "loss": 0.8591, + "step": 5159 + }, + { + "epoch": 0.10329554838225359, + "grad_norm": 1.1102567911148071, + "learning_rate": 9.859802631311574e-06, + "loss": 0.346, + "step": 5160 + }, + { + "epoch": 0.10331556689938193, + "grad_norm": 1.0206471681594849, + "learning_rate": 9.859726391384705e-06, + "loss": 0.3371, + "step": 5161 + }, + { + "epoch": 0.10333558541651028, + "grad_norm": 1.1448599100112915, + "learning_rate": 9.85965013102859e-06, + "loss": 0.3346, + "step": 5162 + }, + { + "epoch": 0.10335560393363862, + "grad_norm": 1.1237448453903198, + "learning_rate": 9.859573850243554e-06, + "loss": 0.3392, + "step": 5163 + }, + { + "epoch": 0.10337562245076697, + "grad_norm": 1.1641740798950195, + "learning_rate": 9.859497549029915e-06, + "loss": 0.3277, + "step": 5164 + }, + { + "epoch": 0.1033956409678953, + "grad_norm": 0.9734706878662109, + "learning_rate": 9.859421227387994e-06, + "loss": 0.3038, + "step": 5165 + }, + { + "epoch": 0.10341565948502365, + "grad_norm": 1.0300816297531128, + "learning_rate": 9.859344885318109e-06, + "loss": 0.3039, + "step": 5166 + }, + { + "epoch": 0.10343567800215199, + "grad_norm": 1.0957149267196655, + "learning_rate": 9.859268522820588e-06, + "loss": 0.3008, + "step": 5167 + }, + { + "epoch": 0.10345569651928034, + "grad_norm": 1.141078233718872, + "learning_rate": 9.859192139895745e-06, + "loss": 0.3103, + "step": 5168 + }, + { + "epoch": 0.10347571503640868, + "grad_norm": 1.262823462486267, + "learning_rate": 9.859115736543906e-06, + "loss": 0.3909, + "step": 5169 + }, + { + "epoch": 0.10349573355353703, + "grad_norm": 1.383810043334961, + "learning_rate": 9.859039312765388e-06, + "loss": 0.3155, + "step": 5170 + }, + { + "epoch": 0.10351575207066536, + "grad_norm": 1.1721335649490356, + "learning_rate": 9.858962868560516e-06, + "loss": 0.3078, + "step": 5171 + }, + { + "epoch": 0.10353577058779372, + "grad_norm": 1.2428193092346191, + "learning_rate": 9.858886403929609e-06, + "loss": 0.3757, + "step": 5172 + }, + { + "epoch": 0.10355578910492205, + "grad_norm": 1.0835086107254028, + "learning_rate": 9.858809918872989e-06, + "loss": 0.3607, + "step": 5173 + }, + { + "epoch": 0.1035758076220504, + "grad_norm": 1.1081438064575195, + "learning_rate": 9.85873341339098e-06, + "loss": 0.3651, + "step": 5174 + }, + { + "epoch": 0.10359582613917874, + "grad_norm": 1.1055444478988647, + "learning_rate": 9.858656887483899e-06, + "loss": 0.3383, + "step": 5175 + }, + { + "epoch": 0.10361584465630709, + "grad_norm": 1.0013424158096313, + "learning_rate": 9.85858034115207e-06, + "loss": 0.3176, + "step": 5176 + }, + { + "epoch": 0.10363586317343543, + "grad_norm": 1.0837260484695435, + "learning_rate": 9.858503774395815e-06, + "loss": 0.3666, + "step": 5177 + }, + { + "epoch": 0.10365588169056378, + "grad_norm": 1.243679404258728, + "learning_rate": 9.858427187215456e-06, + "loss": 0.3734, + "step": 5178 + }, + { + "epoch": 0.10367590020769211, + "grad_norm": 1.877756118774414, + "learning_rate": 9.858350579611315e-06, + "loss": 0.885, + "step": 5179 + }, + { + "epoch": 0.10369591872482047, + "grad_norm": 1.0515966415405273, + "learning_rate": 9.858273951583714e-06, + "loss": 0.3332, + "step": 5180 + }, + { + "epoch": 0.1037159372419488, + "grad_norm": 1.1083440780639648, + "learning_rate": 9.858197303132975e-06, + "loss": 0.3296, + "step": 5181 + }, + { + "epoch": 0.10373595575907715, + "grad_norm": 1.0733981132507324, + "learning_rate": 9.858120634259419e-06, + "loss": 0.3426, + "step": 5182 + }, + { + "epoch": 0.10375597427620549, + "grad_norm": 1.1335915327072144, + "learning_rate": 9.858043944963368e-06, + "loss": 0.3235, + "step": 5183 + }, + { + "epoch": 0.10377599279333384, + "grad_norm": 1.1355681419372559, + "learning_rate": 9.857967235245148e-06, + "loss": 0.399, + "step": 5184 + }, + { + "epoch": 0.10379601131046218, + "grad_norm": 1.2177523374557495, + "learning_rate": 9.85789050510508e-06, + "loss": 0.2922, + "step": 5185 + }, + { + "epoch": 0.10381602982759053, + "grad_norm": 1.1120381355285645, + "learning_rate": 9.857813754543483e-06, + "loss": 0.3603, + "step": 5186 + }, + { + "epoch": 0.10383604834471886, + "grad_norm": 1.1468193531036377, + "learning_rate": 9.857736983560685e-06, + "loss": 0.3404, + "step": 5187 + }, + { + "epoch": 0.10385606686184722, + "grad_norm": 1.0507789850234985, + "learning_rate": 9.857660192157004e-06, + "loss": 0.3373, + "step": 5188 + }, + { + "epoch": 0.10387608537897555, + "grad_norm": 1.0586001873016357, + "learning_rate": 9.857583380332764e-06, + "loss": 0.3071, + "step": 5189 + }, + { + "epoch": 0.1038961038961039, + "grad_norm": 1.1988015174865723, + "learning_rate": 9.857506548088291e-06, + "loss": 0.3323, + "step": 5190 + }, + { + "epoch": 0.10391612241323224, + "grad_norm": 1.1702077388763428, + "learning_rate": 9.857429695423905e-06, + "loss": 0.3583, + "step": 5191 + }, + { + "epoch": 0.10393614093036059, + "grad_norm": 1.115631341934204, + "learning_rate": 9.857352822339931e-06, + "loss": 0.2679, + "step": 5192 + }, + { + "epoch": 0.10395615944748893, + "grad_norm": 1.2235742807388306, + "learning_rate": 9.857275928836692e-06, + "loss": 0.353, + "step": 5193 + }, + { + "epoch": 0.10397617796461728, + "grad_norm": 1.0346180200576782, + "learning_rate": 9.857199014914507e-06, + "loss": 0.3223, + "step": 5194 + }, + { + "epoch": 0.10399619648174561, + "grad_norm": 1.1795527935028076, + "learning_rate": 9.857122080573705e-06, + "loss": 0.3076, + "step": 5195 + }, + { + "epoch": 0.10401621499887395, + "grad_norm": 1.1315855979919434, + "learning_rate": 9.857045125814607e-06, + "loss": 0.3287, + "step": 5196 + }, + { + "epoch": 0.1040362335160023, + "grad_norm": 1.1696420907974243, + "learning_rate": 9.856968150637535e-06, + "loss": 0.347, + "step": 5197 + }, + { + "epoch": 0.10405625203313064, + "grad_norm": 1.0117690563201904, + "learning_rate": 9.856891155042816e-06, + "loss": 0.2989, + "step": 5198 + }, + { + "epoch": 0.10407627055025899, + "grad_norm": 1.7679462432861328, + "learning_rate": 9.85681413903077e-06, + "loss": 0.842, + "step": 5199 + }, + { + "epoch": 0.10409628906738733, + "grad_norm": 1.0910658836364746, + "learning_rate": 9.856737102601724e-06, + "loss": 0.3341, + "step": 5200 + }, + { + "epoch": 0.10411630758451568, + "grad_norm": 1.2136356830596924, + "learning_rate": 9.856660045755999e-06, + "loss": 0.3664, + "step": 5201 + }, + { + "epoch": 0.10413632610164401, + "grad_norm": 1.033828616142273, + "learning_rate": 9.85658296849392e-06, + "loss": 0.3609, + "step": 5202 + }, + { + "epoch": 0.10415634461877236, + "grad_norm": 1.1442960500717163, + "learning_rate": 9.856505870815813e-06, + "loss": 0.2966, + "step": 5203 + }, + { + "epoch": 0.1041763631359007, + "grad_norm": 1.1148661375045776, + "learning_rate": 9.856428752722001e-06, + "loss": 0.3314, + "step": 5204 + }, + { + "epoch": 0.10419638165302905, + "grad_norm": 1.2237435579299927, + "learning_rate": 9.856351614212805e-06, + "loss": 0.3741, + "step": 5205 + }, + { + "epoch": 0.10421640017015739, + "grad_norm": 1.8454469442367554, + "learning_rate": 9.856274455288554e-06, + "loss": 0.7651, + "step": 5206 + }, + { + "epoch": 0.10423641868728574, + "grad_norm": 1.040086030960083, + "learning_rate": 9.856197275949569e-06, + "loss": 0.3314, + "step": 5207 + }, + { + "epoch": 0.10425643720441408, + "grad_norm": 1.0859894752502441, + "learning_rate": 9.856120076196177e-06, + "loss": 0.3013, + "step": 5208 + }, + { + "epoch": 0.10427645572154243, + "grad_norm": 1.270702838897705, + "learning_rate": 9.8560428560287e-06, + "loss": 0.4284, + "step": 5209 + }, + { + "epoch": 0.10429647423867076, + "grad_norm": 1.083738088607788, + "learning_rate": 9.855965615447464e-06, + "loss": 0.3522, + "step": 5210 + }, + { + "epoch": 0.10431649275579911, + "grad_norm": 1.1227809190750122, + "learning_rate": 9.855888354452796e-06, + "loss": 0.3465, + "step": 5211 + }, + { + "epoch": 0.10433651127292745, + "grad_norm": 0.9483809471130371, + "learning_rate": 9.855811073045016e-06, + "loss": 0.2949, + "step": 5212 + }, + { + "epoch": 0.1043565297900558, + "grad_norm": 1.1233323812484741, + "learning_rate": 9.855733771224452e-06, + "loss": 0.3265, + "step": 5213 + }, + { + "epoch": 0.10437654830718414, + "grad_norm": 1.2180542945861816, + "learning_rate": 9.855656448991426e-06, + "loss": 0.3313, + "step": 5214 + }, + { + "epoch": 0.10439656682431249, + "grad_norm": 1.1500656604766846, + "learning_rate": 9.855579106346267e-06, + "loss": 0.3008, + "step": 5215 + }, + { + "epoch": 0.10441658534144083, + "grad_norm": 1.1846777200698853, + "learning_rate": 9.855501743289299e-06, + "loss": 0.3444, + "step": 5216 + }, + { + "epoch": 0.10443660385856918, + "grad_norm": 1.0658611059188843, + "learning_rate": 9.855424359820846e-06, + "loss": 0.3357, + "step": 5217 + }, + { + "epoch": 0.10445662237569751, + "grad_norm": 1.2709217071533203, + "learning_rate": 9.855346955941235e-06, + "loss": 0.3294, + "step": 5218 + }, + { + "epoch": 0.10447664089282586, + "grad_norm": 1.8392184972763062, + "learning_rate": 9.855269531650789e-06, + "loss": 0.8972, + "step": 5219 + }, + { + "epoch": 0.1044966594099542, + "grad_norm": 1.1614891290664673, + "learning_rate": 9.855192086949834e-06, + "loss": 0.3193, + "step": 5220 + }, + { + "epoch": 0.10451667792708255, + "grad_norm": 1.0212011337280273, + "learning_rate": 9.855114621838698e-06, + "loss": 0.3176, + "step": 5221 + }, + { + "epoch": 0.10453669644421089, + "grad_norm": 1.0677632093429565, + "learning_rate": 9.855037136317704e-06, + "loss": 0.3307, + "step": 5222 + }, + { + "epoch": 0.10455671496133924, + "grad_norm": 1.2050167322158813, + "learning_rate": 9.854959630387179e-06, + "loss": 0.3419, + "step": 5223 + }, + { + "epoch": 0.10457673347846758, + "grad_norm": 1.2834599018096924, + "learning_rate": 9.854882104047447e-06, + "loss": 0.3636, + "step": 5224 + }, + { + "epoch": 0.10459675199559593, + "grad_norm": 1.0827090740203857, + "learning_rate": 9.854804557298838e-06, + "loss": 0.3516, + "step": 5225 + }, + { + "epoch": 0.10461677051272426, + "grad_norm": 1.8324650526046753, + "learning_rate": 9.854726990141674e-06, + "loss": 0.8688, + "step": 5226 + }, + { + "epoch": 0.10463678902985261, + "grad_norm": 1.011949896812439, + "learning_rate": 9.854649402576282e-06, + "loss": 0.347, + "step": 5227 + }, + { + "epoch": 0.10465680754698095, + "grad_norm": 1.143586277961731, + "learning_rate": 9.854571794602991e-06, + "loss": 0.3785, + "step": 5228 + }, + { + "epoch": 0.1046768260641093, + "grad_norm": 1.0757858753204346, + "learning_rate": 9.854494166222122e-06, + "loss": 0.3119, + "step": 5229 + }, + { + "epoch": 0.10469684458123764, + "grad_norm": 1.1193090677261353, + "learning_rate": 9.854416517434007e-06, + "loss": 0.3718, + "step": 5230 + }, + { + "epoch": 0.10471686309836599, + "grad_norm": 1.12441885471344, + "learning_rate": 9.854338848238967e-06, + "loss": 0.3056, + "step": 5231 + }, + { + "epoch": 0.10473688161549433, + "grad_norm": 1.081398844718933, + "learning_rate": 9.854261158637333e-06, + "loss": 0.3587, + "step": 5232 + }, + { + "epoch": 0.10475690013262268, + "grad_norm": 0.9743266701698303, + "learning_rate": 9.854183448629428e-06, + "loss": 0.27, + "step": 5233 + }, + { + "epoch": 0.10477691864975101, + "grad_norm": 1.919697880744934, + "learning_rate": 9.854105718215582e-06, + "loss": 0.8327, + "step": 5234 + }, + { + "epoch": 0.10479693716687936, + "grad_norm": 1.966170072555542, + "learning_rate": 9.85402796739612e-06, + "loss": 0.8631, + "step": 5235 + }, + { + "epoch": 0.1048169556840077, + "grad_norm": 1.1270724534988403, + "learning_rate": 9.85395019617137e-06, + "loss": 0.3351, + "step": 5236 + }, + { + "epoch": 0.10483697420113605, + "grad_norm": 1.0334503650665283, + "learning_rate": 9.853872404541656e-06, + "loss": 0.3631, + "step": 5237 + }, + { + "epoch": 0.10485699271826439, + "grad_norm": 1.297415852546692, + "learning_rate": 9.853794592507306e-06, + "loss": 0.3533, + "step": 5238 + }, + { + "epoch": 0.10487701123539274, + "grad_norm": 1.111788034439087, + "learning_rate": 9.85371676006865e-06, + "loss": 0.3595, + "step": 5239 + }, + { + "epoch": 0.10489702975252108, + "grad_norm": 1.1099284887313843, + "learning_rate": 9.853638907226013e-06, + "loss": 0.3075, + "step": 5240 + }, + { + "epoch": 0.10491704826964943, + "grad_norm": 1.066067099571228, + "learning_rate": 9.853561033979723e-06, + "loss": 0.3356, + "step": 5241 + }, + { + "epoch": 0.10493706678677776, + "grad_norm": 1.1087578535079956, + "learning_rate": 9.853483140330105e-06, + "loss": 0.3206, + "step": 5242 + }, + { + "epoch": 0.10495708530390611, + "grad_norm": 1.0429092645645142, + "learning_rate": 9.853405226277491e-06, + "loss": 0.3014, + "step": 5243 + }, + { + "epoch": 0.10497710382103445, + "grad_norm": 1.3931323289871216, + "learning_rate": 9.853327291822205e-06, + "loss": 0.3433, + "step": 5244 + }, + { + "epoch": 0.1049971223381628, + "grad_norm": 0.9894484281539917, + "learning_rate": 9.853249336964575e-06, + "loss": 0.3228, + "step": 5245 + }, + { + "epoch": 0.10501714085529114, + "grad_norm": 1.1276218891143799, + "learning_rate": 9.85317136170493e-06, + "loss": 0.3378, + "step": 5246 + }, + { + "epoch": 0.10503715937241949, + "grad_norm": 1.0780223608016968, + "learning_rate": 9.853093366043596e-06, + "loss": 0.3086, + "step": 5247 + }, + { + "epoch": 0.10505717788954783, + "grad_norm": 1.2054356336593628, + "learning_rate": 9.853015349980901e-06, + "loss": 0.3597, + "step": 5248 + }, + { + "epoch": 0.10507719640667618, + "grad_norm": 1.1360735893249512, + "learning_rate": 9.852937313517177e-06, + "loss": 0.3517, + "step": 5249 + }, + { + "epoch": 0.10509721492380451, + "grad_norm": 1.0366064310073853, + "learning_rate": 9.852859256652744e-06, + "loss": 0.3253, + "step": 5250 + }, + { + "epoch": 0.10511723344093286, + "grad_norm": 1.1546847820281982, + "learning_rate": 9.85278117938794e-06, + "loss": 0.294, + "step": 5251 + }, + { + "epoch": 0.1051372519580612, + "grad_norm": 1.161771535873413, + "learning_rate": 9.852703081723085e-06, + "loss": 0.3923, + "step": 5252 + }, + { + "epoch": 0.10515727047518955, + "grad_norm": 1.0039829015731812, + "learning_rate": 9.852624963658512e-06, + "loss": 0.3323, + "step": 5253 + }, + { + "epoch": 0.10517728899231789, + "grad_norm": 1.1828868389129639, + "learning_rate": 9.852546825194549e-06, + "loss": 0.3249, + "step": 5254 + }, + { + "epoch": 0.10519730750944624, + "grad_norm": 1.12959623336792, + "learning_rate": 9.85246866633152e-06, + "loss": 0.3012, + "step": 5255 + }, + { + "epoch": 0.10521732602657458, + "grad_norm": 1.095025658607483, + "learning_rate": 9.85239048706976e-06, + "loss": 0.3369, + "step": 5256 + }, + { + "epoch": 0.10523734454370293, + "grad_norm": 1.1317540407180786, + "learning_rate": 9.852312287409595e-06, + "loss": 0.3355, + "step": 5257 + }, + { + "epoch": 0.10525736306083126, + "grad_norm": 1.2372995615005493, + "learning_rate": 9.852234067351352e-06, + "loss": 0.3369, + "step": 5258 + }, + { + "epoch": 0.10527738157795961, + "grad_norm": 1.0705517530441284, + "learning_rate": 9.852155826895363e-06, + "loss": 0.3294, + "step": 5259 + }, + { + "epoch": 0.10529740009508795, + "grad_norm": 1.1428719758987427, + "learning_rate": 9.852077566041953e-06, + "loss": 0.3815, + "step": 5260 + }, + { + "epoch": 0.1053174186122163, + "grad_norm": 1.0866589546203613, + "learning_rate": 9.851999284791453e-06, + "loss": 0.3588, + "step": 5261 + }, + { + "epoch": 0.10533743712934464, + "grad_norm": 1.058692455291748, + "learning_rate": 9.851920983144192e-06, + "loss": 0.3078, + "step": 5262 + }, + { + "epoch": 0.10535745564647299, + "grad_norm": 1.0421427488327026, + "learning_rate": 9.8518426611005e-06, + "loss": 0.3416, + "step": 5263 + }, + { + "epoch": 0.10537747416360133, + "grad_norm": 1.0131919384002686, + "learning_rate": 9.851764318660706e-06, + "loss": 0.2647, + "step": 5264 + }, + { + "epoch": 0.10539749268072968, + "grad_norm": 1.1189948320388794, + "learning_rate": 9.851685955825139e-06, + "loss": 0.3601, + "step": 5265 + }, + { + "epoch": 0.10541751119785801, + "grad_norm": 1.1410179138183594, + "learning_rate": 9.851607572594128e-06, + "loss": 0.3384, + "step": 5266 + }, + { + "epoch": 0.10543752971498636, + "grad_norm": 1.0766783952713013, + "learning_rate": 9.851529168968002e-06, + "loss": 0.3234, + "step": 5267 + }, + { + "epoch": 0.1054575482321147, + "grad_norm": 1.0427227020263672, + "learning_rate": 9.851450744947093e-06, + "loss": 0.3153, + "step": 5268 + }, + { + "epoch": 0.10547756674924305, + "grad_norm": 1.075554370880127, + "learning_rate": 9.851372300531727e-06, + "loss": 0.3333, + "step": 5269 + }, + { + "epoch": 0.10549758526637139, + "grad_norm": 1.0363037586212158, + "learning_rate": 9.851293835722238e-06, + "loss": 0.3471, + "step": 5270 + }, + { + "epoch": 0.10551760378349974, + "grad_norm": 1.1098095178604126, + "learning_rate": 9.851215350518953e-06, + "loss": 0.3502, + "step": 5271 + }, + { + "epoch": 0.10553762230062808, + "grad_norm": 0.9867364764213562, + "learning_rate": 9.851136844922202e-06, + "loss": 0.2952, + "step": 5272 + }, + { + "epoch": 0.10555764081775643, + "grad_norm": 1.0482569932937622, + "learning_rate": 9.851058318932318e-06, + "loss": 0.3222, + "step": 5273 + }, + { + "epoch": 0.10557765933488476, + "grad_norm": 1.0205650329589844, + "learning_rate": 9.850979772549625e-06, + "loss": 0.326, + "step": 5274 + }, + { + "epoch": 0.10559767785201311, + "grad_norm": 1.1425485610961914, + "learning_rate": 9.850901205774461e-06, + "loss": 0.3246, + "step": 5275 + }, + { + "epoch": 0.10561769636914145, + "grad_norm": 0.9881823658943176, + "learning_rate": 9.85082261860715e-06, + "loss": 0.343, + "step": 5276 + }, + { + "epoch": 0.1056377148862698, + "grad_norm": 1.118388295173645, + "learning_rate": 9.850744011048024e-06, + "loss": 0.3748, + "step": 5277 + }, + { + "epoch": 0.10565773340339814, + "grad_norm": 1.0703531503677368, + "learning_rate": 9.850665383097415e-06, + "loss": 0.3055, + "step": 5278 + }, + { + "epoch": 0.10567775192052649, + "grad_norm": 1.1348673105239868, + "learning_rate": 9.850586734755652e-06, + "loss": 0.3336, + "step": 5279 + }, + { + "epoch": 0.10569777043765483, + "grad_norm": 1.1924076080322266, + "learning_rate": 9.850508066023067e-06, + "loss": 0.3311, + "step": 5280 + }, + { + "epoch": 0.10571778895478318, + "grad_norm": 1.0435810089111328, + "learning_rate": 9.85042937689999e-06, + "loss": 0.3292, + "step": 5281 + }, + { + "epoch": 0.10573780747191151, + "grad_norm": 1.1323755979537964, + "learning_rate": 9.850350667386752e-06, + "loss": 0.3268, + "step": 5282 + }, + { + "epoch": 0.10575782598903986, + "grad_norm": 1.09293532371521, + "learning_rate": 9.850271937483683e-06, + "loss": 0.3265, + "step": 5283 + }, + { + "epoch": 0.1057778445061682, + "grad_norm": 1.0537525415420532, + "learning_rate": 9.850193187191115e-06, + "loss": 0.2995, + "step": 5284 + }, + { + "epoch": 0.10579786302329655, + "grad_norm": 1.1570711135864258, + "learning_rate": 9.850114416509377e-06, + "loss": 0.3393, + "step": 5285 + }, + { + "epoch": 0.10581788154042489, + "grad_norm": 1.1199419498443604, + "learning_rate": 9.850035625438804e-06, + "loss": 0.4007, + "step": 5286 + }, + { + "epoch": 0.10583790005755324, + "grad_norm": 1.0479490756988525, + "learning_rate": 9.849956813979725e-06, + "loss": 0.297, + "step": 5287 + }, + { + "epoch": 0.10585791857468158, + "grad_norm": 1.1382496356964111, + "learning_rate": 9.84987798213247e-06, + "loss": 0.3382, + "step": 5288 + }, + { + "epoch": 0.10587793709180993, + "grad_norm": 0.9704822301864624, + "learning_rate": 9.84979912989737e-06, + "loss": 0.3475, + "step": 5289 + }, + { + "epoch": 0.10589795560893826, + "grad_norm": 1.0733559131622314, + "learning_rate": 9.849720257274761e-06, + "loss": 0.3501, + "step": 5290 + }, + { + "epoch": 0.10591797412606661, + "grad_norm": 1.1112279891967773, + "learning_rate": 9.84964136426497e-06, + "loss": 0.2792, + "step": 5291 + }, + { + "epoch": 0.10593799264319495, + "grad_norm": 1.1677021980285645, + "learning_rate": 9.849562450868332e-06, + "loss": 0.3003, + "step": 5292 + }, + { + "epoch": 0.1059580111603233, + "grad_norm": 1.0627375841140747, + "learning_rate": 9.849483517085177e-06, + "loss": 0.3503, + "step": 5293 + }, + { + "epoch": 0.10597802967745164, + "grad_norm": 1.1932255029678345, + "learning_rate": 9.849404562915835e-06, + "loss": 0.3145, + "step": 5294 + }, + { + "epoch": 0.10599804819457999, + "grad_norm": 1.0688520669937134, + "learning_rate": 9.849325588360641e-06, + "loss": 0.344, + "step": 5295 + }, + { + "epoch": 0.10601806671170833, + "grad_norm": 1.0738303661346436, + "learning_rate": 9.849246593419925e-06, + "loss": 0.3136, + "step": 5296 + }, + { + "epoch": 0.10603808522883668, + "grad_norm": 1.8859679698944092, + "learning_rate": 9.84916757809402e-06, + "loss": 0.887, + "step": 5297 + }, + { + "epoch": 0.10605810374596501, + "grad_norm": 1.2422382831573486, + "learning_rate": 9.84908854238326e-06, + "loss": 0.3386, + "step": 5298 + }, + { + "epoch": 0.10607812226309336, + "grad_norm": 1.0371476411819458, + "learning_rate": 9.849009486287974e-06, + "loss": 0.3193, + "step": 5299 + }, + { + "epoch": 0.1060981407802217, + "grad_norm": 1.1480865478515625, + "learning_rate": 9.848930409808496e-06, + "loss": 0.3185, + "step": 5300 + }, + { + "epoch": 0.10611815929735005, + "grad_norm": 1.1420879364013672, + "learning_rate": 9.848851312945158e-06, + "loss": 0.3011, + "step": 5301 + }, + { + "epoch": 0.10613817781447839, + "grad_norm": 1.0901683568954468, + "learning_rate": 9.848772195698292e-06, + "loss": 0.3553, + "step": 5302 + }, + { + "epoch": 0.10615819633160674, + "grad_norm": 1.2463239431381226, + "learning_rate": 9.848693058068232e-06, + "loss": 0.3674, + "step": 5303 + }, + { + "epoch": 0.10617821484873508, + "grad_norm": 1.7155601978302002, + "learning_rate": 9.848613900055309e-06, + "loss": 0.8907, + "step": 5304 + }, + { + "epoch": 0.10619823336586343, + "grad_norm": 1.1545689105987549, + "learning_rate": 9.848534721659857e-06, + "loss": 0.3356, + "step": 5305 + }, + { + "epoch": 0.10621825188299176, + "grad_norm": 1.2768514156341553, + "learning_rate": 9.848455522882208e-06, + "loss": 0.3464, + "step": 5306 + }, + { + "epoch": 0.10623827040012011, + "grad_norm": 1.126941442489624, + "learning_rate": 9.848376303722696e-06, + "loss": 0.3025, + "step": 5307 + }, + { + "epoch": 0.10625828891724845, + "grad_norm": 1.067276954650879, + "learning_rate": 9.848297064181654e-06, + "loss": 0.3402, + "step": 5308 + }, + { + "epoch": 0.1062783074343768, + "grad_norm": 1.2110872268676758, + "learning_rate": 9.848217804259413e-06, + "loss": 0.3457, + "step": 5309 + }, + { + "epoch": 0.10629832595150514, + "grad_norm": 0.9972941875457764, + "learning_rate": 9.848138523956308e-06, + "loss": 0.3422, + "step": 5310 + }, + { + "epoch": 0.10631834446863349, + "grad_norm": 1.0428922176361084, + "learning_rate": 9.848059223272672e-06, + "loss": 0.3596, + "step": 5311 + }, + { + "epoch": 0.10633836298576183, + "grad_norm": 0.9784172773361206, + "learning_rate": 9.847979902208838e-06, + "loss": 0.3123, + "step": 5312 + }, + { + "epoch": 0.10635838150289018, + "grad_norm": 1.1630983352661133, + "learning_rate": 9.847900560765141e-06, + "loss": 0.3545, + "step": 5313 + }, + { + "epoch": 0.10637840002001851, + "grad_norm": 1.2699795961380005, + "learning_rate": 9.847821198941913e-06, + "loss": 0.3649, + "step": 5314 + }, + { + "epoch": 0.10639841853714686, + "grad_norm": 1.0690141916275024, + "learning_rate": 9.847741816739488e-06, + "loss": 0.3008, + "step": 5315 + }, + { + "epoch": 0.1064184370542752, + "grad_norm": 1.0848478078842163, + "learning_rate": 9.8476624141582e-06, + "loss": 0.3213, + "step": 5316 + }, + { + "epoch": 0.10643845557140355, + "grad_norm": 1.0705442428588867, + "learning_rate": 9.847582991198381e-06, + "loss": 0.2793, + "step": 5317 + }, + { + "epoch": 0.10645847408853189, + "grad_norm": 1.8252289295196533, + "learning_rate": 9.847503547860367e-06, + "loss": 0.8526, + "step": 5318 + }, + { + "epoch": 0.10647849260566024, + "grad_norm": 1.102009892463684, + "learning_rate": 9.84742408414449e-06, + "loss": 0.2913, + "step": 5319 + }, + { + "epoch": 0.10649851112278858, + "grad_norm": 1.0179623365402222, + "learning_rate": 9.847344600051088e-06, + "loss": 0.3569, + "step": 5320 + }, + { + "epoch": 0.10651852963991693, + "grad_norm": 2.007408380508423, + "learning_rate": 9.847265095580491e-06, + "loss": 0.8248, + "step": 5321 + }, + { + "epoch": 0.10653854815704526, + "grad_norm": 1.1503909826278687, + "learning_rate": 9.847185570733034e-06, + "loss": 0.297, + "step": 5322 + }, + { + "epoch": 0.10655856667417361, + "grad_norm": 2.040682554244995, + "learning_rate": 9.847106025509054e-06, + "loss": 0.9003, + "step": 5323 + }, + { + "epoch": 0.10657858519130195, + "grad_norm": 1.0140066146850586, + "learning_rate": 9.847026459908882e-06, + "loss": 0.3211, + "step": 5324 + }, + { + "epoch": 0.1065986037084303, + "grad_norm": 1.1137783527374268, + "learning_rate": 9.846946873932853e-06, + "loss": 0.3126, + "step": 5325 + }, + { + "epoch": 0.10661862222555864, + "grad_norm": 1.0256415605545044, + "learning_rate": 9.846867267581303e-06, + "loss": 0.3325, + "step": 5326 + }, + { + "epoch": 0.10663864074268699, + "grad_norm": 1.159629464149475, + "learning_rate": 9.846787640854567e-06, + "loss": 0.3242, + "step": 5327 + }, + { + "epoch": 0.10665865925981532, + "grad_norm": 1.1620213985443115, + "learning_rate": 9.846707993752978e-06, + "loss": 0.4012, + "step": 5328 + }, + { + "epoch": 0.10667867777694368, + "grad_norm": 1.06884765625, + "learning_rate": 9.846628326276873e-06, + "loss": 0.2699, + "step": 5329 + }, + { + "epoch": 0.10669869629407201, + "grad_norm": 1.2242668867111206, + "learning_rate": 9.846548638426584e-06, + "loss": 0.3794, + "step": 5330 + }, + { + "epoch": 0.10671871481120036, + "grad_norm": 1.0639331340789795, + "learning_rate": 9.846468930202449e-06, + "loss": 0.3127, + "step": 5331 + }, + { + "epoch": 0.1067387333283287, + "grad_norm": 1.2183395624160767, + "learning_rate": 9.8463892016048e-06, + "loss": 0.3833, + "step": 5332 + }, + { + "epoch": 0.10675875184545705, + "grad_norm": 1.1703081130981445, + "learning_rate": 9.846309452633974e-06, + "loss": 0.3254, + "step": 5333 + }, + { + "epoch": 0.10677877036258539, + "grad_norm": 1.1469722986221313, + "learning_rate": 9.846229683290306e-06, + "loss": 0.3544, + "step": 5334 + }, + { + "epoch": 0.10679878887971374, + "grad_norm": 1.81344735622406, + "learning_rate": 9.846149893574133e-06, + "loss": 0.8831, + "step": 5335 + }, + { + "epoch": 0.10681880739684207, + "grad_norm": 1.1748303174972534, + "learning_rate": 9.846070083485787e-06, + "loss": 0.3655, + "step": 5336 + }, + { + "epoch": 0.10683882591397043, + "grad_norm": 1.048671007156372, + "learning_rate": 9.845990253025605e-06, + "loss": 0.3743, + "step": 5337 + }, + { + "epoch": 0.10685884443109876, + "grad_norm": 1.0956170558929443, + "learning_rate": 9.845910402193923e-06, + "loss": 0.257, + "step": 5338 + }, + { + "epoch": 0.10687886294822711, + "grad_norm": 1.1174969673156738, + "learning_rate": 9.845830530991077e-06, + "loss": 0.3405, + "step": 5339 + }, + { + "epoch": 0.10689888146535545, + "grad_norm": 1.088494896888733, + "learning_rate": 9.845750639417403e-06, + "loss": 0.3601, + "step": 5340 + }, + { + "epoch": 0.1069188999824838, + "grad_norm": 1.739548683166504, + "learning_rate": 9.845670727473236e-06, + "loss": 0.8105, + "step": 5341 + }, + { + "epoch": 0.10693891849961214, + "grad_norm": 0.9404656887054443, + "learning_rate": 9.84559079515891e-06, + "loss": 0.3141, + "step": 5342 + }, + { + "epoch": 0.10695893701674049, + "grad_norm": 1.1709855794906616, + "learning_rate": 9.845510842474766e-06, + "loss": 0.3469, + "step": 5343 + }, + { + "epoch": 0.10697895553386882, + "grad_norm": 1.1093802452087402, + "learning_rate": 9.845430869421135e-06, + "loss": 0.2977, + "step": 5344 + }, + { + "epoch": 0.10699897405099718, + "grad_norm": 1.0254151821136475, + "learning_rate": 9.845350875998356e-06, + "loss": 0.3381, + "step": 5345 + }, + { + "epoch": 0.10701899256812551, + "grad_norm": 1.911950945854187, + "learning_rate": 9.845270862206765e-06, + "loss": 0.8536, + "step": 5346 + }, + { + "epoch": 0.10703901108525386, + "grad_norm": 1.0261378288269043, + "learning_rate": 9.845190828046698e-06, + "loss": 0.3793, + "step": 5347 + }, + { + "epoch": 0.1070590296023822, + "grad_norm": 1.1085313558578491, + "learning_rate": 9.845110773518491e-06, + "loss": 0.3108, + "step": 5348 + }, + { + "epoch": 0.10707904811951055, + "grad_norm": 0.9973580241203308, + "learning_rate": 9.845030698622482e-06, + "loss": 0.3163, + "step": 5349 + }, + { + "epoch": 0.10709906663663889, + "grad_norm": 1.1968073844909668, + "learning_rate": 9.844950603359005e-06, + "loss": 0.323, + "step": 5350 + }, + { + "epoch": 0.10711908515376724, + "grad_norm": 1.0596221685409546, + "learning_rate": 9.844870487728399e-06, + "loss": 0.3128, + "step": 5351 + }, + { + "epoch": 0.10713910367089557, + "grad_norm": 1.1476212739944458, + "learning_rate": 9.844790351731001e-06, + "loss": 0.3829, + "step": 5352 + }, + { + "epoch": 0.10715912218802393, + "grad_norm": 1.1651347875595093, + "learning_rate": 9.844710195367146e-06, + "loss": 0.2941, + "step": 5353 + }, + { + "epoch": 0.10717914070515226, + "grad_norm": 1.8511347770690918, + "learning_rate": 9.844630018637172e-06, + "loss": 0.9873, + "step": 5354 + }, + { + "epoch": 0.10719915922228061, + "grad_norm": 1.1951746940612793, + "learning_rate": 9.844549821541416e-06, + "loss": 0.3223, + "step": 5355 + }, + { + "epoch": 0.10721917773940895, + "grad_norm": 1.056623101234436, + "learning_rate": 9.844469604080216e-06, + "loss": 0.3176, + "step": 5356 + }, + { + "epoch": 0.1072391962565373, + "grad_norm": 1.1782379150390625, + "learning_rate": 9.844389366253906e-06, + "loss": 0.3266, + "step": 5357 + }, + { + "epoch": 0.10725921477366564, + "grad_norm": 1.1268658638000488, + "learning_rate": 9.844309108062828e-06, + "loss": 0.3924, + "step": 5358 + }, + { + "epoch": 0.10727923329079399, + "grad_norm": 1.0712181329727173, + "learning_rate": 9.844228829507317e-06, + "loss": 0.3315, + "step": 5359 + }, + { + "epoch": 0.10729925180792232, + "grad_norm": 1.0263322591781616, + "learning_rate": 9.844148530587709e-06, + "loss": 0.3348, + "step": 5360 + }, + { + "epoch": 0.10731927032505068, + "grad_norm": 1.062761664390564, + "learning_rate": 9.844068211304343e-06, + "loss": 0.3673, + "step": 5361 + }, + { + "epoch": 0.10733928884217901, + "grad_norm": 1.4839907884597778, + "learning_rate": 9.843987871657557e-06, + "loss": 0.3555, + "step": 5362 + }, + { + "epoch": 0.10735930735930736, + "grad_norm": 1.0987942218780518, + "learning_rate": 9.84390751164769e-06, + "loss": 0.3468, + "step": 5363 + }, + { + "epoch": 0.1073793258764357, + "grad_norm": 1.10943603515625, + "learning_rate": 9.843827131275076e-06, + "loss": 0.3659, + "step": 5364 + }, + { + "epoch": 0.10739934439356405, + "grad_norm": 1.940420150756836, + "learning_rate": 9.843746730540057e-06, + "loss": 0.8559, + "step": 5365 + }, + { + "epoch": 0.10741936291069239, + "grad_norm": 1.1021716594696045, + "learning_rate": 9.843666309442968e-06, + "loss": 0.3065, + "step": 5366 + }, + { + "epoch": 0.10743938142782074, + "grad_norm": 1.0305289030075073, + "learning_rate": 9.843585867984149e-06, + "loss": 0.3404, + "step": 5367 + }, + { + "epoch": 0.10745939994494907, + "grad_norm": 1.790729284286499, + "learning_rate": 9.843505406163937e-06, + "loss": 0.878, + "step": 5368 + }, + { + "epoch": 0.10747941846207743, + "grad_norm": 1.1035012006759644, + "learning_rate": 9.84342492398267e-06, + "loss": 0.3307, + "step": 5369 + }, + { + "epoch": 0.10749943697920576, + "grad_norm": 1.2640787363052368, + "learning_rate": 9.843344421440689e-06, + "loss": 0.3453, + "step": 5370 + }, + { + "epoch": 0.10751945549633411, + "grad_norm": 1.0853309631347656, + "learning_rate": 9.843263898538327e-06, + "loss": 0.3529, + "step": 5371 + }, + { + "epoch": 0.10753947401346245, + "grad_norm": 1.1851922273635864, + "learning_rate": 9.84318335527593e-06, + "loss": 0.3532, + "step": 5372 + }, + { + "epoch": 0.1075594925305908, + "grad_norm": 1.0729670524597168, + "learning_rate": 9.84310279165383e-06, + "loss": 0.3458, + "step": 5373 + }, + { + "epoch": 0.10757951104771914, + "grad_norm": 1.4604177474975586, + "learning_rate": 9.843022207672367e-06, + "loss": 0.3171, + "step": 5374 + }, + { + "epoch": 0.10759952956484749, + "grad_norm": 1.2376679182052612, + "learning_rate": 9.84294160333188e-06, + "loss": 0.2746, + "step": 5375 + }, + { + "epoch": 0.10761954808197582, + "grad_norm": 1.203576922416687, + "learning_rate": 9.842860978632713e-06, + "loss": 0.3635, + "step": 5376 + }, + { + "epoch": 0.10763956659910418, + "grad_norm": 1.077223539352417, + "learning_rate": 9.842780333575199e-06, + "loss": 0.278, + "step": 5377 + }, + { + "epoch": 0.10765958511623251, + "grad_norm": 1.0414397716522217, + "learning_rate": 9.842699668159677e-06, + "loss": 0.3055, + "step": 5378 + }, + { + "epoch": 0.10767960363336086, + "grad_norm": 1.8955103158950806, + "learning_rate": 9.842618982386488e-06, + "loss": 0.794, + "step": 5379 + }, + { + "epoch": 0.1076996221504892, + "grad_norm": 1.060389518737793, + "learning_rate": 9.842538276255972e-06, + "loss": 0.3153, + "step": 5380 + }, + { + "epoch": 0.10771964066761755, + "grad_norm": 1.168236494064331, + "learning_rate": 9.842457549768468e-06, + "loss": 0.3513, + "step": 5381 + }, + { + "epoch": 0.10773965918474589, + "grad_norm": 1.0705726146697998, + "learning_rate": 9.842376802924313e-06, + "loss": 0.2946, + "step": 5382 + }, + { + "epoch": 0.10775967770187424, + "grad_norm": 1.0711942911148071, + "learning_rate": 9.84229603572385e-06, + "loss": 0.3016, + "step": 5383 + }, + { + "epoch": 0.10777969621900257, + "grad_norm": 1.79459810256958, + "learning_rate": 9.842215248167414e-06, + "loss": 0.8339, + "step": 5384 + }, + { + "epoch": 0.10779971473613092, + "grad_norm": 1.0896803140640259, + "learning_rate": 9.842134440255349e-06, + "loss": 0.3105, + "step": 5385 + }, + { + "epoch": 0.10781973325325926, + "grad_norm": 1.3172422647476196, + "learning_rate": 9.842053611987993e-06, + "loss": 0.3331, + "step": 5386 + }, + { + "epoch": 0.10783975177038761, + "grad_norm": 1.1763824224472046, + "learning_rate": 9.841972763365684e-06, + "loss": 0.3043, + "step": 5387 + }, + { + "epoch": 0.10785977028751595, + "grad_norm": 1.0846519470214844, + "learning_rate": 9.841891894388764e-06, + "loss": 0.3143, + "step": 5388 + }, + { + "epoch": 0.1078797888046443, + "grad_norm": 1.720238447189331, + "learning_rate": 9.841811005057574e-06, + "loss": 0.8542, + "step": 5389 + }, + { + "epoch": 0.10789980732177264, + "grad_norm": 1.2452443838119507, + "learning_rate": 9.84173009537245e-06, + "loss": 0.4062, + "step": 5390 + }, + { + "epoch": 0.10791982583890099, + "grad_norm": 1.1342657804489136, + "learning_rate": 9.841649165333735e-06, + "loss": 0.3663, + "step": 5391 + }, + { + "epoch": 0.10793984435602932, + "grad_norm": 1.0610198974609375, + "learning_rate": 9.84156821494177e-06, + "loss": 0.3311, + "step": 5392 + }, + { + "epoch": 0.10795986287315767, + "grad_norm": 1.0920524597167969, + "learning_rate": 9.841487244196895e-06, + "loss": 0.2873, + "step": 5393 + }, + { + "epoch": 0.10797988139028601, + "grad_norm": 1.117134928703308, + "learning_rate": 9.841406253099448e-06, + "loss": 0.3293, + "step": 5394 + }, + { + "epoch": 0.10799989990741436, + "grad_norm": 1.0871680974960327, + "learning_rate": 9.84132524164977e-06, + "loss": 0.3022, + "step": 5395 + }, + { + "epoch": 0.1080199184245427, + "grad_norm": 1.2638015747070312, + "learning_rate": 9.841244209848205e-06, + "loss": 0.3519, + "step": 5396 + }, + { + "epoch": 0.10803993694167105, + "grad_norm": 0.9978744387626648, + "learning_rate": 9.84116315769509e-06, + "loss": 0.335, + "step": 5397 + }, + { + "epoch": 0.10805995545879939, + "grad_norm": 1.2595595121383667, + "learning_rate": 9.841082085190768e-06, + "loss": 0.3503, + "step": 5398 + }, + { + "epoch": 0.10807997397592774, + "grad_norm": 1.1757606267929077, + "learning_rate": 9.841000992335577e-06, + "loss": 0.3697, + "step": 5399 + }, + { + "epoch": 0.10809999249305607, + "grad_norm": 1.1148394346237183, + "learning_rate": 9.84091987912986e-06, + "loss": 0.3944, + "step": 5400 + }, + { + "epoch": 0.10812001101018442, + "grad_norm": 1.1156938076019287, + "learning_rate": 9.840838745573958e-06, + "loss": 0.3558, + "step": 5401 + }, + { + "epoch": 0.10814002952731276, + "grad_norm": 1.1117750406265259, + "learning_rate": 9.840757591668212e-06, + "loss": 0.3607, + "step": 5402 + }, + { + "epoch": 0.10816004804444111, + "grad_norm": 1.0076675415039062, + "learning_rate": 9.840676417412962e-06, + "loss": 0.3572, + "step": 5403 + }, + { + "epoch": 0.10818006656156945, + "grad_norm": 1.2028179168701172, + "learning_rate": 9.84059522280855e-06, + "loss": 0.4147, + "step": 5404 + }, + { + "epoch": 0.1082000850786978, + "grad_norm": 1.9296643733978271, + "learning_rate": 9.84051400785532e-06, + "loss": 0.8048, + "step": 5405 + }, + { + "epoch": 0.10822010359582614, + "grad_norm": 1.8646425008773804, + "learning_rate": 9.840432772553607e-06, + "loss": 0.7882, + "step": 5406 + }, + { + "epoch": 0.10824012211295449, + "grad_norm": 1.1174969673156738, + "learning_rate": 9.84035151690376e-06, + "loss": 0.2828, + "step": 5407 + }, + { + "epoch": 0.10826014063008282, + "grad_norm": 1.039497971534729, + "learning_rate": 9.840270240906114e-06, + "loss": 0.3533, + "step": 5408 + }, + { + "epoch": 0.10828015914721117, + "grad_norm": 1.0242433547973633, + "learning_rate": 9.840188944561014e-06, + "loss": 0.3429, + "step": 5409 + }, + { + "epoch": 0.10830017766433951, + "grad_norm": 1.1203054189682007, + "learning_rate": 9.840107627868802e-06, + "loss": 0.3803, + "step": 5410 + }, + { + "epoch": 0.10832019618146786, + "grad_norm": 1.0808653831481934, + "learning_rate": 9.840026290829818e-06, + "loss": 0.3349, + "step": 5411 + }, + { + "epoch": 0.1083402146985962, + "grad_norm": 1.1568557024002075, + "learning_rate": 9.839944933444407e-06, + "loss": 0.2898, + "step": 5412 + }, + { + "epoch": 0.10836023321572455, + "grad_norm": 1.239492654800415, + "learning_rate": 9.839863555712907e-06, + "loss": 0.3756, + "step": 5413 + }, + { + "epoch": 0.10838025173285289, + "grad_norm": 1.158781886100769, + "learning_rate": 9.839782157635665e-06, + "loss": 0.3819, + "step": 5414 + }, + { + "epoch": 0.10840027024998124, + "grad_norm": 0.9666725397109985, + "learning_rate": 9.839700739213019e-06, + "loss": 0.2868, + "step": 5415 + }, + { + "epoch": 0.10842028876710957, + "grad_norm": 1.1597486734390259, + "learning_rate": 9.83961930044531e-06, + "loss": 0.3409, + "step": 5416 + }, + { + "epoch": 0.10844030728423792, + "grad_norm": 1.1170917749404907, + "learning_rate": 9.839537841332886e-06, + "loss": 0.3421, + "step": 5417 + }, + { + "epoch": 0.10846032580136626, + "grad_norm": 1.14488685131073, + "learning_rate": 9.839456361876084e-06, + "loss": 0.2978, + "step": 5418 + }, + { + "epoch": 0.10848034431849461, + "grad_norm": 1.1331321001052856, + "learning_rate": 9.839374862075253e-06, + "loss": 0.3522, + "step": 5419 + }, + { + "epoch": 0.10850036283562295, + "grad_norm": 0.9368754029273987, + "learning_rate": 9.839293341930728e-06, + "loss": 0.3375, + "step": 5420 + }, + { + "epoch": 0.1085203813527513, + "grad_norm": 1.0372228622436523, + "learning_rate": 9.839211801442856e-06, + "loss": 0.3501, + "step": 5421 + }, + { + "epoch": 0.10854039986987964, + "grad_norm": 1.4468326568603516, + "learning_rate": 9.83913024061198e-06, + "loss": 0.3557, + "step": 5422 + }, + { + "epoch": 0.10856041838700799, + "grad_norm": 1.137157917022705, + "learning_rate": 9.839048659438441e-06, + "loss": 0.2903, + "step": 5423 + }, + { + "epoch": 0.10858043690413632, + "grad_norm": 1.8426939249038696, + "learning_rate": 9.838967057922581e-06, + "loss": 0.8366, + "step": 5424 + }, + { + "epoch": 0.10860045542126467, + "grad_norm": 1.1195135116577148, + "learning_rate": 9.838885436064747e-06, + "loss": 0.3318, + "step": 5425 + }, + { + "epoch": 0.10862047393839301, + "grad_norm": 1.0648138523101807, + "learning_rate": 9.83880379386528e-06, + "loss": 0.3481, + "step": 5426 + }, + { + "epoch": 0.10864049245552136, + "grad_norm": 0.9871672987937927, + "learning_rate": 9.838722131324523e-06, + "loss": 0.3461, + "step": 5427 + }, + { + "epoch": 0.1086605109726497, + "grad_norm": 1.1160229444503784, + "learning_rate": 9.83864044844282e-06, + "loss": 0.3436, + "step": 5428 + }, + { + "epoch": 0.10868052948977805, + "grad_norm": 1.0558440685272217, + "learning_rate": 9.838558745220513e-06, + "loss": 0.2769, + "step": 5429 + }, + { + "epoch": 0.10870054800690639, + "grad_norm": 1.0590310096740723, + "learning_rate": 9.838477021657946e-06, + "loss": 0.3821, + "step": 5430 + }, + { + "epoch": 0.10872056652403474, + "grad_norm": 1.134751796722412, + "learning_rate": 9.838395277755463e-06, + "loss": 0.3472, + "step": 5431 + }, + { + "epoch": 0.10874058504116307, + "grad_norm": 2.4025073051452637, + "learning_rate": 9.838313513513408e-06, + "loss": 0.9103, + "step": 5432 + }, + { + "epoch": 0.10876060355829142, + "grad_norm": 1.0338445901870728, + "learning_rate": 9.838231728932124e-06, + "loss": 0.2952, + "step": 5433 + }, + { + "epoch": 0.10878062207541976, + "grad_norm": 0.9801994562149048, + "learning_rate": 9.838149924011954e-06, + "loss": 0.3088, + "step": 5434 + }, + { + "epoch": 0.10880064059254811, + "grad_norm": 1.147883653640747, + "learning_rate": 9.838068098753244e-06, + "loss": 0.385, + "step": 5435 + }, + { + "epoch": 0.10882065910967645, + "grad_norm": 1.0851935148239136, + "learning_rate": 9.837986253156338e-06, + "loss": 0.2928, + "step": 5436 + }, + { + "epoch": 0.1088406776268048, + "grad_norm": 1.9055863618850708, + "learning_rate": 9.837904387221578e-06, + "loss": 0.8822, + "step": 5437 + }, + { + "epoch": 0.10886069614393314, + "grad_norm": 1.3064690828323364, + "learning_rate": 9.837822500949308e-06, + "loss": 0.3702, + "step": 5438 + }, + { + "epoch": 0.10888071466106149, + "grad_norm": 1.1515617370605469, + "learning_rate": 9.837740594339875e-06, + "loss": 0.3663, + "step": 5439 + }, + { + "epoch": 0.10890073317818982, + "grad_norm": 1.1121827363967896, + "learning_rate": 9.83765866739362e-06, + "loss": 0.3326, + "step": 5440 + }, + { + "epoch": 0.10892075169531817, + "grad_norm": 1.1644610166549683, + "learning_rate": 9.83757672011089e-06, + "loss": 0.347, + "step": 5441 + }, + { + "epoch": 0.10894077021244651, + "grad_norm": 1.187464952468872, + "learning_rate": 9.837494752492028e-06, + "loss": 0.3241, + "step": 5442 + }, + { + "epoch": 0.10896078872957486, + "grad_norm": 1.0563071966171265, + "learning_rate": 9.837412764537379e-06, + "loss": 0.3545, + "step": 5443 + }, + { + "epoch": 0.1089808072467032, + "grad_norm": 1.2930268049240112, + "learning_rate": 9.837330756247289e-06, + "loss": 0.3408, + "step": 5444 + }, + { + "epoch": 0.10900082576383155, + "grad_norm": 1.2225385904312134, + "learning_rate": 9.837248727622099e-06, + "loss": 0.2962, + "step": 5445 + }, + { + "epoch": 0.10902084428095989, + "grad_norm": 1.0885366201400757, + "learning_rate": 9.837166678662159e-06, + "loss": 0.3232, + "step": 5446 + }, + { + "epoch": 0.10904086279808824, + "grad_norm": 1.1617685556411743, + "learning_rate": 9.83708460936781e-06, + "loss": 0.3506, + "step": 5447 + }, + { + "epoch": 0.10906088131521657, + "grad_norm": 1.0768897533416748, + "learning_rate": 9.837002519739397e-06, + "loss": 0.3666, + "step": 5448 + }, + { + "epoch": 0.10908089983234492, + "grad_norm": 1.153808355331421, + "learning_rate": 9.83692040977727e-06, + "loss": 0.3235, + "step": 5449 + }, + { + "epoch": 0.10910091834947326, + "grad_norm": 1.2259526252746582, + "learning_rate": 9.836838279481768e-06, + "loss": 0.2825, + "step": 5450 + }, + { + "epoch": 0.10912093686660161, + "grad_norm": 1.8001729249954224, + "learning_rate": 9.83675612885324e-06, + "loss": 0.8956, + "step": 5451 + }, + { + "epoch": 0.10914095538372995, + "grad_norm": 1.828489899635315, + "learning_rate": 9.83667395789203e-06, + "loss": 0.9426, + "step": 5452 + }, + { + "epoch": 0.1091609739008583, + "grad_norm": 1.0423773527145386, + "learning_rate": 9.83659176659848e-06, + "loss": 0.288, + "step": 5453 + }, + { + "epoch": 0.10918099241798664, + "grad_norm": 1.1429193019866943, + "learning_rate": 9.836509554972944e-06, + "loss": 0.289, + "step": 5454 + }, + { + "epoch": 0.10920101093511499, + "grad_norm": 1.162245512008667, + "learning_rate": 9.83642732301576e-06, + "loss": 0.3219, + "step": 5455 + }, + { + "epoch": 0.10922102945224332, + "grad_norm": 1.129418969154358, + "learning_rate": 9.836345070727277e-06, + "loss": 0.3476, + "step": 5456 + }, + { + "epoch": 0.10924104796937167, + "grad_norm": 1.1939444541931152, + "learning_rate": 9.83626279810784e-06, + "loss": 0.3423, + "step": 5457 + }, + { + "epoch": 0.10926106648650001, + "grad_norm": 1.092636227607727, + "learning_rate": 9.836180505157794e-06, + "loss": 0.3615, + "step": 5458 + }, + { + "epoch": 0.10928108500362836, + "grad_norm": 1.3024793863296509, + "learning_rate": 9.836098191877488e-06, + "loss": 0.3756, + "step": 5459 + }, + { + "epoch": 0.1093011035207567, + "grad_norm": 1.0902153253555298, + "learning_rate": 9.836015858267265e-06, + "loss": 0.3054, + "step": 5460 + }, + { + "epoch": 0.10932112203788505, + "grad_norm": 0.9893777370452881, + "learning_rate": 9.835933504327471e-06, + "loss": 0.3095, + "step": 5461 + }, + { + "epoch": 0.10934114055501339, + "grad_norm": 1.0149024724960327, + "learning_rate": 9.835851130058454e-06, + "loss": 0.312, + "step": 5462 + }, + { + "epoch": 0.10936115907214174, + "grad_norm": 1.8758915662765503, + "learning_rate": 9.83576873546056e-06, + "loss": 0.8546, + "step": 5463 + }, + { + "epoch": 0.10938117758927007, + "grad_norm": 1.120868444442749, + "learning_rate": 9.835686320534135e-06, + "loss": 0.3386, + "step": 5464 + }, + { + "epoch": 0.10940119610639842, + "grad_norm": 1.1029714345932007, + "learning_rate": 9.835603885279525e-06, + "loss": 0.3273, + "step": 5465 + }, + { + "epoch": 0.10942121462352676, + "grad_norm": 1.0849841833114624, + "learning_rate": 9.835521429697077e-06, + "loss": 0.3712, + "step": 5466 + }, + { + "epoch": 0.10944123314065511, + "grad_norm": 1.3354721069335938, + "learning_rate": 9.835438953787137e-06, + "loss": 0.3617, + "step": 5467 + }, + { + "epoch": 0.10946125165778345, + "grad_norm": 1.4053242206573486, + "learning_rate": 9.835356457550051e-06, + "loss": 0.3032, + "step": 5468 + }, + { + "epoch": 0.1094812701749118, + "grad_norm": 1.060948371887207, + "learning_rate": 9.83527394098617e-06, + "loss": 0.2781, + "step": 5469 + }, + { + "epoch": 0.10950128869204014, + "grad_norm": 1.2173576354980469, + "learning_rate": 9.835191404095837e-06, + "loss": 0.3377, + "step": 5470 + }, + { + "epoch": 0.10952130720916849, + "grad_norm": 1.127221941947937, + "learning_rate": 9.835108846879399e-06, + "loss": 0.349, + "step": 5471 + }, + { + "epoch": 0.10954132572629682, + "grad_norm": 1.184349536895752, + "learning_rate": 9.835026269337205e-06, + "loss": 0.3532, + "step": 5472 + }, + { + "epoch": 0.10956134424342517, + "grad_norm": 1.0459589958190918, + "learning_rate": 9.8349436714696e-06, + "loss": 0.3455, + "step": 5473 + }, + { + "epoch": 0.10958136276055351, + "grad_norm": 1.1421619653701782, + "learning_rate": 9.834861053276932e-06, + "loss": 0.3178, + "step": 5474 + }, + { + "epoch": 0.10960138127768186, + "grad_norm": 1.0339640378952026, + "learning_rate": 9.83477841475955e-06, + "loss": 0.3068, + "step": 5475 + }, + { + "epoch": 0.1096213997948102, + "grad_norm": 1.9028568267822266, + "learning_rate": 9.834695755917798e-06, + "loss": 0.8878, + "step": 5476 + }, + { + "epoch": 0.10964141831193855, + "grad_norm": 1.5203691720962524, + "learning_rate": 9.834613076752027e-06, + "loss": 0.3136, + "step": 5477 + }, + { + "epoch": 0.10966143682906689, + "grad_norm": 1.0090004205703735, + "learning_rate": 9.834530377262584e-06, + "loss": 0.3246, + "step": 5478 + }, + { + "epoch": 0.10968145534619524, + "grad_norm": 1.3092262744903564, + "learning_rate": 9.834447657449813e-06, + "loss": 0.3637, + "step": 5479 + }, + { + "epoch": 0.10970147386332357, + "grad_norm": 1.1263779401779175, + "learning_rate": 9.834364917314066e-06, + "loss": 0.3494, + "step": 5480 + }, + { + "epoch": 0.10972149238045192, + "grad_norm": 1.2023879289627075, + "learning_rate": 9.83428215685569e-06, + "loss": 0.3594, + "step": 5481 + }, + { + "epoch": 0.10974151089758026, + "grad_norm": 1.1229254007339478, + "learning_rate": 9.834199376075031e-06, + "loss": 0.325, + "step": 5482 + }, + { + "epoch": 0.10976152941470861, + "grad_norm": 1.0956573486328125, + "learning_rate": 9.83411657497244e-06, + "loss": 0.3513, + "step": 5483 + }, + { + "epoch": 0.10978154793183695, + "grad_norm": 2.039783000946045, + "learning_rate": 9.83403375354826e-06, + "loss": 0.8857, + "step": 5484 + }, + { + "epoch": 0.1098015664489653, + "grad_norm": 1.9809268712997437, + "learning_rate": 9.833950911802845e-06, + "loss": 0.8294, + "step": 5485 + }, + { + "epoch": 0.10982158496609364, + "grad_norm": 1.0693717002868652, + "learning_rate": 9.83386804973654e-06, + "loss": 0.3106, + "step": 5486 + }, + { + "epoch": 0.10984160348322199, + "grad_norm": 1.1426606178283691, + "learning_rate": 9.833785167349693e-06, + "loss": 0.378, + "step": 5487 + }, + { + "epoch": 0.10986162200035032, + "grad_norm": 1.1248866319656372, + "learning_rate": 9.833702264642655e-06, + "loss": 0.3353, + "step": 5488 + }, + { + "epoch": 0.10988164051747867, + "grad_norm": 1.131622552871704, + "learning_rate": 9.833619341615773e-06, + "loss": 0.3155, + "step": 5489 + }, + { + "epoch": 0.10990165903460701, + "grad_norm": 1.0909090042114258, + "learning_rate": 9.833536398269393e-06, + "loss": 0.3435, + "step": 5490 + }, + { + "epoch": 0.10992167755173536, + "grad_norm": 1.7993440628051758, + "learning_rate": 9.833453434603869e-06, + "loss": 0.3647, + "step": 5491 + }, + { + "epoch": 0.1099416960688637, + "grad_norm": 1.2136948108673096, + "learning_rate": 9.833370450619545e-06, + "loss": 0.345, + "step": 5492 + }, + { + "epoch": 0.10996171458599205, + "grad_norm": 1.8324638605117798, + "learning_rate": 9.833287446316773e-06, + "loss": 0.8224, + "step": 5493 + }, + { + "epoch": 0.10998173310312039, + "grad_norm": 1.209181308746338, + "learning_rate": 9.833204421695899e-06, + "loss": 0.3292, + "step": 5494 + }, + { + "epoch": 0.11000175162024874, + "grad_norm": 1.0514775514602661, + "learning_rate": 9.833121376757275e-06, + "loss": 0.3164, + "step": 5495 + }, + { + "epoch": 0.11002177013737707, + "grad_norm": 1.091613531112671, + "learning_rate": 9.833038311501248e-06, + "loss": 0.346, + "step": 5496 + }, + { + "epoch": 0.11004178865450542, + "grad_norm": 1.184418797492981, + "learning_rate": 9.832955225928169e-06, + "loss": 0.3676, + "step": 5497 + }, + { + "epoch": 0.11006180717163376, + "grad_norm": 1.6777600049972534, + "learning_rate": 9.832872120038386e-06, + "loss": 0.7895, + "step": 5498 + }, + { + "epoch": 0.11008182568876211, + "grad_norm": 1.0629661083221436, + "learning_rate": 9.83278899383225e-06, + "loss": 0.3651, + "step": 5499 + }, + { + "epoch": 0.11010184420589045, + "grad_norm": 1.0188835859298706, + "learning_rate": 9.832705847310107e-06, + "loss": 0.2823, + "step": 5500 + }, + { + "epoch": 0.1101218627230188, + "grad_norm": 1.0871864557266235, + "learning_rate": 9.83262268047231e-06, + "loss": 0.3114, + "step": 5501 + }, + { + "epoch": 0.11014188124014714, + "grad_norm": 1.0832504034042358, + "learning_rate": 9.832539493319206e-06, + "loss": 0.3678, + "step": 5502 + }, + { + "epoch": 0.11016189975727549, + "grad_norm": 1.0860093832015991, + "learning_rate": 9.832456285851147e-06, + "loss": 0.3165, + "step": 5503 + }, + { + "epoch": 0.11018191827440382, + "grad_norm": 1.0541222095489502, + "learning_rate": 9.832373058068481e-06, + "loss": 0.3477, + "step": 5504 + }, + { + "epoch": 0.11020193679153217, + "grad_norm": 1.0952270030975342, + "learning_rate": 9.832289809971558e-06, + "loss": 0.3442, + "step": 5505 + }, + { + "epoch": 0.11022195530866051, + "grad_norm": 1.167872428894043, + "learning_rate": 9.83220654156073e-06, + "loss": 0.3171, + "step": 5506 + }, + { + "epoch": 0.11024197382578886, + "grad_norm": 1.1151875257492065, + "learning_rate": 9.832123252836345e-06, + "loss": 0.3127, + "step": 5507 + }, + { + "epoch": 0.1102619923429172, + "grad_norm": 1.8298722505569458, + "learning_rate": 9.832039943798755e-06, + "loss": 0.8181, + "step": 5508 + }, + { + "epoch": 0.11028201086004555, + "grad_norm": 1.0912370681762695, + "learning_rate": 9.831956614448308e-06, + "loss": 0.2991, + "step": 5509 + }, + { + "epoch": 0.11030202937717389, + "grad_norm": 1.1330149173736572, + "learning_rate": 9.831873264785355e-06, + "loss": 0.3181, + "step": 5510 + }, + { + "epoch": 0.11032204789430224, + "grad_norm": 1.133797287940979, + "learning_rate": 9.831789894810247e-06, + "loss": 0.3083, + "step": 5511 + }, + { + "epoch": 0.11034206641143057, + "grad_norm": 1.1271988153457642, + "learning_rate": 9.831706504523333e-06, + "loss": 0.3502, + "step": 5512 + }, + { + "epoch": 0.11036208492855892, + "grad_norm": 1.1430543661117554, + "learning_rate": 9.831623093924965e-06, + "loss": 0.3278, + "step": 5513 + }, + { + "epoch": 0.11038210344568726, + "grad_norm": 1.1243857145309448, + "learning_rate": 9.831539663015494e-06, + "loss": 0.3452, + "step": 5514 + }, + { + "epoch": 0.11040212196281561, + "grad_norm": 1.1274110078811646, + "learning_rate": 9.83145621179527e-06, + "loss": 0.3378, + "step": 5515 + }, + { + "epoch": 0.11042214047994395, + "grad_norm": 1.120334506034851, + "learning_rate": 9.831372740264643e-06, + "loss": 0.3337, + "step": 5516 + }, + { + "epoch": 0.1104421589970723, + "grad_norm": 1.142676830291748, + "learning_rate": 9.831289248423966e-06, + "loss": 0.3622, + "step": 5517 + }, + { + "epoch": 0.11046217751420064, + "grad_norm": 1.0283111333847046, + "learning_rate": 9.831205736273588e-06, + "loss": 0.2857, + "step": 5518 + }, + { + "epoch": 0.11048219603132899, + "grad_norm": 1.1070581674575806, + "learning_rate": 9.83112220381386e-06, + "loss": 0.3329, + "step": 5519 + }, + { + "epoch": 0.11050221454845732, + "grad_norm": 1.175041913986206, + "learning_rate": 9.831038651045134e-06, + "loss": 0.283, + "step": 5520 + }, + { + "epoch": 0.11052223306558567, + "grad_norm": 1.1489295959472656, + "learning_rate": 9.830955077967763e-06, + "loss": 0.409, + "step": 5521 + }, + { + "epoch": 0.11054225158271401, + "grad_norm": 1.9537850618362427, + "learning_rate": 9.830871484582094e-06, + "loss": 0.8367, + "step": 5522 + }, + { + "epoch": 0.11056227009984236, + "grad_norm": 1.9468073844909668, + "learning_rate": 9.830787870888484e-06, + "loss": 0.8771, + "step": 5523 + }, + { + "epoch": 0.1105822886169707, + "grad_norm": 1.0656944513320923, + "learning_rate": 9.830704236887278e-06, + "loss": 0.3013, + "step": 5524 + }, + { + "epoch": 0.11060230713409905, + "grad_norm": 1.0532011985778809, + "learning_rate": 9.830620582578834e-06, + "loss": 0.2764, + "step": 5525 + }, + { + "epoch": 0.11062232565122739, + "grad_norm": 1.185855746269226, + "learning_rate": 9.830536907963499e-06, + "loss": 0.3308, + "step": 5526 + }, + { + "epoch": 0.11064234416835574, + "grad_norm": 1.0952082872390747, + "learning_rate": 9.830453213041627e-06, + "loss": 0.4126, + "step": 5527 + }, + { + "epoch": 0.11066236268548407, + "grad_norm": 1.2055294513702393, + "learning_rate": 9.830369497813567e-06, + "loss": 0.3627, + "step": 5528 + }, + { + "epoch": 0.11068238120261242, + "grad_norm": 1.0430521965026855, + "learning_rate": 9.830285762279676e-06, + "loss": 0.3263, + "step": 5529 + }, + { + "epoch": 0.11070239971974076, + "grad_norm": 1.1948474645614624, + "learning_rate": 9.830202006440301e-06, + "loss": 0.3639, + "step": 5530 + }, + { + "epoch": 0.11072241823686911, + "grad_norm": 1.0982588529586792, + "learning_rate": 9.830118230295797e-06, + "loss": 0.3319, + "step": 5531 + }, + { + "epoch": 0.11074243675399745, + "grad_norm": 1.1377613544464111, + "learning_rate": 9.830034433846515e-06, + "loss": 0.3651, + "step": 5532 + }, + { + "epoch": 0.1107624552711258, + "grad_norm": 1.1325535774230957, + "learning_rate": 9.82995061709281e-06, + "loss": 0.3247, + "step": 5533 + }, + { + "epoch": 0.11078247378825413, + "grad_norm": 1.8298019170761108, + "learning_rate": 9.82986678003503e-06, + "loss": 0.8087, + "step": 5534 + }, + { + "epoch": 0.11080249230538249, + "grad_norm": 1.1685231924057007, + "learning_rate": 9.82978292267353e-06, + "loss": 0.3283, + "step": 5535 + }, + { + "epoch": 0.11082251082251082, + "grad_norm": 1.1401351690292358, + "learning_rate": 9.829699045008663e-06, + "loss": 0.3085, + "step": 5536 + }, + { + "epoch": 0.11084252933963917, + "grad_norm": 1.1027277708053589, + "learning_rate": 9.829615147040779e-06, + "loss": 0.2921, + "step": 5537 + }, + { + "epoch": 0.11086254785676751, + "grad_norm": 1.1719273328781128, + "learning_rate": 9.829531228770232e-06, + "loss": 0.3764, + "step": 5538 + }, + { + "epoch": 0.11088256637389586, + "grad_norm": 1.1411765813827515, + "learning_rate": 9.829447290197375e-06, + "loss": 0.3079, + "step": 5539 + }, + { + "epoch": 0.1109025848910242, + "grad_norm": 1.1304254531860352, + "learning_rate": 9.829363331322562e-06, + "loss": 0.3517, + "step": 5540 + }, + { + "epoch": 0.11092260340815253, + "grad_norm": 1.1169819831848145, + "learning_rate": 9.829279352146145e-06, + "loss": 0.3692, + "step": 5541 + }, + { + "epoch": 0.11094262192528088, + "grad_norm": 1.0246773958206177, + "learning_rate": 9.829195352668477e-06, + "loss": 0.3501, + "step": 5542 + }, + { + "epoch": 0.11096264044240922, + "grad_norm": 1.1691144704818726, + "learning_rate": 9.829111332889909e-06, + "loss": 0.3224, + "step": 5543 + }, + { + "epoch": 0.11098265895953757, + "grad_norm": 1.013434886932373, + "learning_rate": 9.829027292810798e-06, + "loss": 0.3219, + "step": 5544 + }, + { + "epoch": 0.11100267747666591, + "grad_norm": 1.109615445137024, + "learning_rate": 9.828943232431495e-06, + "loss": 0.3369, + "step": 5545 + }, + { + "epoch": 0.11102269599379426, + "grad_norm": 1.1700116395950317, + "learning_rate": 9.828859151752354e-06, + "loss": 0.2756, + "step": 5546 + }, + { + "epoch": 0.1110427145109226, + "grad_norm": 1.1881787776947021, + "learning_rate": 9.828775050773728e-06, + "loss": 0.3242, + "step": 5547 + }, + { + "epoch": 0.11106273302805095, + "grad_norm": 1.161833643913269, + "learning_rate": 9.828690929495971e-06, + "loss": 0.3121, + "step": 5548 + }, + { + "epoch": 0.11108275154517928, + "grad_norm": 1.0411911010742188, + "learning_rate": 9.828606787919437e-06, + "loss": 0.3372, + "step": 5549 + }, + { + "epoch": 0.11110277006230763, + "grad_norm": 1.121153473854065, + "learning_rate": 9.828522626044478e-06, + "loss": 0.3612, + "step": 5550 + }, + { + "epoch": 0.11112278857943597, + "grad_norm": 1.3160239458084106, + "learning_rate": 9.828438443871449e-06, + "loss": 0.3264, + "step": 5551 + }, + { + "epoch": 0.11114280709656432, + "grad_norm": 1.1356556415557861, + "learning_rate": 9.828354241400707e-06, + "loss": 0.3332, + "step": 5552 + }, + { + "epoch": 0.11116282561369266, + "grad_norm": 1.084226131439209, + "learning_rate": 9.828270018632599e-06, + "loss": 0.3477, + "step": 5553 + }, + { + "epoch": 0.11118284413082101, + "grad_norm": 1.6521865129470825, + "learning_rate": 9.828185775567484e-06, + "loss": 0.8942, + "step": 5554 + }, + { + "epoch": 0.11120286264794935, + "grad_norm": 1.0883070230484009, + "learning_rate": 9.828101512205715e-06, + "loss": 0.3124, + "step": 5555 + }, + { + "epoch": 0.1112228811650777, + "grad_norm": 1.052089810371399, + "learning_rate": 9.828017228547647e-06, + "loss": 0.3201, + "step": 5556 + }, + { + "epoch": 0.11124289968220603, + "grad_norm": 1.1296353340148926, + "learning_rate": 9.827932924593632e-06, + "loss": 0.2997, + "step": 5557 + }, + { + "epoch": 0.11126291819933438, + "grad_norm": 1.127122402191162, + "learning_rate": 9.827848600344028e-06, + "loss": 0.3273, + "step": 5558 + }, + { + "epoch": 0.11128293671646272, + "grad_norm": 1.1115148067474365, + "learning_rate": 9.827764255799187e-06, + "loss": 0.3324, + "step": 5559 + }, + { + "epoch": 0.11130295523359107, + "grad_norm": 1.0669755935668945, + "learning_rate": 9.827679890959463e-06, + "loss": 0.3205, + "step": 5560 + }, + { + "epoch": 0.11132297375071941, + "grad_norm": 1.1556174755096436, + "learning_rate": 9.827595505825213e-06, + "loss": 0.293, + "step": 5561 + }, + { + "epoch": 0.11134299226784776, + "grad_norm": 1.539739966392517, + "learning_rate": 9.82751110039679e-06, + "loss": 0.3582, + "step": 5562 + }, + { + "epoch": 0.1113630107849761, + "grad_norm": 1.0675950050354004, + "learning_rate": 9.82742667467455e-06, + "loss": 0.3373, + "step": 5563 + }, + { + "epoch": 0.11138302930210445, + "grad_norm": 1.1585440635681152, + "learning_rate": 9.827342228658844e-06, + "loss": 0.318, + "step": 5564 + }, + { + "epoch": 0.11140304781923278, + "grad_norm": 1.0631507635116577, + "learning_rate": 9.827257762350033e-06, + "loss": 0.3336, + "step": 5565 + }, + { + "epoch": 0.11142306633636113, + "grad_norm": 1.9137489795684814, + "learning_rate": 9.827173275748467e-06, + "loss": 0.9183, + "step": 5566 + }, + { + "epoch": 0.11144308485348947, + "grad_norm": 1.0930290222167969, + "learning_rate": 9.827088768854505e-06, + "loss": 0.3306, + "step": 5567 + }, + { + "epoch": 0.11146310337061782, + "grad_norm": 1.792893409729004, + "learning_rate": 9.8270042416685e-06, + "loss": 0.918, + "step": 5568 + }, + { + "epoch": 0.11148312188774616, + "grad_norm": 1.1557950973510742, + "learning_rate": 9.826919694190808e-06, + "loss": 0.3198, + "step": 5569 + }, + { + "epoch": 0.11150314040487451, + "grad_norm": 1.1277974843978882, + "learning_rate": 9.826835126421786e-06, + "loss": 0.3105, + "step": 5570 + }, + { + "epoch": 0.11152315892200285, + "grad_norm": 1.0994408130645752, + "learning_rate": 9.826750538361787e-06, + "loss": 0.3012, + "step": 5571 + }, + { + "epoch": 0.1115431774391312, + "grad_norm": 1.2065731287002563, + "learning_rate": 9.826665930011166e-06, + "loss": 0.3007, + "step": 5572 + }, + { + "epoch": 0.11156319595625953, + "grad_norm": 1.951310634613037, + "learning_rate": 9.826581301370282e-06, + "loss": 0.8078, + "step": 5573 + }, + { + "epoch": 0.11158321447338788, + "grad_norm": 1.101845145225525, + "learning_rate": 9.826496652439488e-06, + "loss": 0.3036, + "step": 5574 + }, + { + "epoch": 0.11160323299051622, + "grad_norm": 1.2001259326934814, + "learning_rate": 9.82641198321914e-06, + "loss": 0.3356, + "step": 5575 + }, + { + "epoch": 0.11162325150764457, + "grad_norm": 1.137520432472229, + "learning_rate": 9.826327293709596e-06, + "loss": 0.3595, + "step": 5576 + }, + { + "epoch": 0.11164327002477291, + "grad_norm": 1.2017158269882202, + "learning_rate": 9.82624258391121e-06, + "loss": 0.3194, + "step": 5577 + }, + { + "epoch": 0.11166328854190126, + "grad_norm": 1.2167681455612183, + "learning_rate": 9.826157853824339e-06, + "loss": 0.3458, + "step": 5578 + }, + { + "epoch": 0.1116833070590296, + "grad_norm": 1.0417145490646362, + "learning_rate": 9.826073103449338e-06, + "loss": 0.3483, + "step": 5579 + }, + { + "epoch": 0.11170332557615795, + "grad_norm": 1.9107884168624878, + "learning_rate": 9.825988332786566e-06, + "loss": 0.766, + "step": 5580 + }, + { + "epoch": 0.11172334409328628, + "grad_norm": 1.0812625885009766, + "learning_rate": 9.825903541836377e-06, + "loss": 0.3483, + "step": 5581 + }, + { + "epoch": 0.11174336261041463, + "grad_norm": 1.2413257360458374, + "learning_rate": 9.825818730599128e-06, + "loss": 0.4134, + "step": 5582 + }, + { + "epoch": 0.11176338112754297, + "grad_norm": 1.1179863214492798, + "learning_rate": 9.825733899075175e-06, + "loss": 0.3198, + "step": 5583 + }, + { + "epoch": 0.11178339964467132, + "grad_norm": 1.3101351261138916, + "learning_rate": 9.825649047264876e-06, + "loss": 0.3084, + "step": 5584 + }, + { + "epoch": 0.11180341816179966, + "grad_norm": 1.8581595420837402, + "learning_rate": 9.825564175168586e-06, + "loss": 0.8338, + "step": 5585 + }, + { + "epoch": 0.11182343667892801, + "grad_norm": 1.3120543956756592, + "learning_rate": 9.825479282786663e-06, + "loss": 0.3797, + "step": 5586 + }, + { + "epoch": 0.11184345519605635, + "grad_norm": 1.0443739891052246, + "learning_rate": 9.825394370119464e-06, + "loss": 0.3525, + "step": 5587 + }, + { + "epoch": 0.1118634737131847, + "grad_norm": 1.1845548152923584, + "learning_rate": 9.825309437167345e-06, + "loss": 0.3595, + "step": 5588 + }, + { + "epoch": 0.11188349223031303, + "grad_norm": 1.2309067249298096, + "learning_rate": 9.825224483930664e-06, + "loss": 0.3451, + "step": 5589 + }, + { + "epoch": 0.11190351074744138, + "grad_norm": 1.169100284576416, + "learning_rate": 9.825139510409777e-06, + "loss": 0.3311, + "step": 5590 + }, + { + "epoch": 0.11192352926456972, + "grad_norm": 1.0753772258758545, + "learning_rate": 9.825054516605043e-06, + "loss": 0.3273, + "step": 5591 + }, + { + "epoch": 0.11194354778169807, + "grad_norm": 1.0955504179000854, + "learning_rate": 9.824969502516818e-06, + "loss": 0.3515, + "step": 5592 + }, + { + "epoch": 0.11196356629882641, + "grad_norm": 1.8309879302978516, + "learning_rate": 9.82488446814546e-06, + "loss": 0.8863, + "step": 5593 + }, + { + "epoch": 0.11198358481595476, + "grad_norm": 1.5933431386947632, + "learning_rate": 9.824799413491324e-06, + "loss": 0.377, + "step": 5594 + }, + { + "epoch": 0.1120036033330831, + "grad_norm": 1.1399905681610107, + "learning_rate": 9.82471433855477e-06, + "loss": 0.3311, + "step": 5595 + }, + { + "epoch": 0.11202362185021145, + "grad_norm": 1.0628376007080078, + "learning_rate": 9.824629243336157e-06, + "loss": 0.3291, + "step": 5596 + }, + { + "epoch": 0.11204364036733978, + "grad_norm": 1.220232367515564, + "learning_rate": 9.82454412783584e-06, + "loss": 0.3823, + "step": 5597 + }, + { + "epoch": 0.11206365888446813, + "grad_norm": 1.0933141708374023, + "learning_rate": 9.824458992054177e-06, + "loss": 0.3512, + "step": 5598 + }, + { + "epoch": 0.11208367740159647, + "grad_norm": 1.3031456470489502, + "learning_rate": 9.824373835991527e-06, + "loss": 0.3025, + "step": 5599 + }, + { + "epoch": 0.11210369591872482, + "grad_norm": 1.0480185747146606, + "learning_rate": 9.824288659648248e-06, + "loss": 0.3585, + "step": 5600 + }, + { + "epoch": 0.11212371443585316, + "grad_norm": 1.2574129104614258, + "learning_rate": 9.824203463024697e-06, + "loss": 0.3251, + "step": 5601 + }, + { + "epoch": 0.11214373295298151, + "grad_norm": 1.037689208984375, + "learning_rate": 9.824118246121234e-06, + "loss": 0.3649, + "step": 5602 + }, + { + "epoch": 0.11216375147010985, + "grad_norm": 1.1660054922103882, + "learning_rate": 9.824033008938213e-06, + "loss": 0.32, + "step": 5603 + }, + { + "epoch": 0.1121837699872382, + "grad_norm": 0.9877206087112427, + "learning_rate": 9.823947751475999e-06, + "loss": 0.3016, + "step": 5604 + }, + { + "epoch": 0.11220378850436653, + "grad_norm": 1.111042857170105, + "learning_rate": 9.823862473734947e-06, + "loss": 0.3229, + "step": 5605 + }, + { + "epoch": 0.11222380702149488, + "grad_norm": 1.017491102218628, + "learning_rate": 9.823777175715412e-06, + "loss": 0.2942, + "step": 5606 + }, + { + "epoch": 0.11224382553862322, + "grad_norm": 1.0845882892608643, + "learning_rate": 9.823691857417758e-06, + "loss": 0.3312, + "step": 5607 + }, + { + "epoch": 0.11226384405575157, + "grad_norm": 1.1659650802612305, + "learning_rate": 9.82360651884234e-06, + "loss": 0.351, + "step": 5608 + }, + { + "epoch": 0.11228386257287991, + "grad_norm": 0.9962424039840698, + "learning_rate": 9.82352115998952e-06, + "loss": 0.2934, + "step": 5609 + }, + { + "epoch": 0.11230388109000826, + "grad_norm": 1.141605019569397, + "learning_rate": 9.823435780859656e-06, + "loss": 0.3407, + "step": 5610 + }, + { + "epoch": 0.1123238996071366, + "grad_norm": 2.1220998764038086, + "learning_rate": 9.823350381453103e-06, + "loss": 0.8707, + "step": 5611 + }, + { + "epoch": 0.11234391812426495, + "grad_norm": 1.9721949100494385, + "learning_rate": 9.823264961770224e-06, + "loss": 0.8908, + "step": 5612 + }, + { + "epoch": 0.11236393664139328, + "grad_norm": 1.0549675226211548, + "learning_rate": 9.82317952181138e-06, + "loss": 0.3231, + "step": 5613 + }, + { + "epoch": 0.11238395515852163, + "grad_norm": 1.0482521057128906, + "learning_rate": 9.823094061576923e-06, + "loss": 0.3244, + "step": 5614 + }, + { + "epoch": 0.11240397367564997, + "grad_norm": 1.3395791053771973, + "learning_rate": 9.82300858106722e-06, + "loss": 0.344, + "step": 5615 + }, + { + "epoch": 0.11242399219277832, + "grad_norm": 1.1632184982299805, + "learning_rate": 9.822923080282625e-06, + "loss": 0.3446, + "step": 5616 + }, + { + "epoch": 0.11244401070990666, + "grad_norm": 1.0338358879089355, + "learning_rate": 9.8228375592235e-06, + "loss": 0.3353, + "step": 5617 + }, + { + "epoch": 0.11246402922703501, + "grad_norm": 1.2760497331619263, + "learning_rate": 9.822752017890202e-06, + "loss": 0.2912, + "step": 5618 + }, + { + "epoch": 0.11248404774416335, + "grad_norm": 1.0310602188110352, + "learning_rate": 9.822666456283093e-06, + "loss": 0.3183, + "step": 5619 + }, + { + "epoch": 0.1125040662612917, + "grad_norm": 1.0645183324813843, + "learning_rate": 9.822580874402533e-06, + "loss": 0.411, + "step": 5620 + }, + { + "epoch": 0.11252408477842003, + "grad_norm": 1.2421149015426636, + "learning_rate": 9.82249527224888e-06, + "loss": 0.3518, + "step": 5621 + }, + { + "epoch": 0.11254410329554838, + "grad_norm": 0.9145286083221436, + "learning_rate": 9.822409649822495e-06, + "loss": 0.2547, + "step": 5622 + }, + { + "epoch": 0.11256412181267672, + "grad_norm": 1.0744372606277466, + "learning_rate": 9.822324007123738e-06, + "loss": 0.3171, + "step": 5623 + }, + { + "epoch": 0.11258414032980507, + "grad_norm": 1.0671278238296509, + "learning_rate": 9.822238344152967e-06, + "loss": 0.3396, + "step": 5624 + }, + { + "epoch": 0.11260415884693341, + "grad_norm": 0.9441510438919067, + "learning_rate": 9.822152660910546e-06, + "loss": 0.2789, + "step": 5625 + }, + { + "epoch": 0.11262417736406176, + "grad_norm": 1.164859652519226, + "learning_rate": 9.822066957396833e-06, + "loss": 0.3266, + "step": 5626 + }, + { + "epoch": 0.1126441958811901, + "grad_norm": 1.1158093214035034, + "learning_rate": 9.821981233612188e-06, + "loss": 0.3166, + "step": 5627 + }, + { + "epoch": 0.11266421439831845, + "grad_norm": 1.2364130020141602, + "learning_rate": 9.821895489556969e-06, + "loss": 0.4043, + "step": 5628 + }, + { + "epoch": 0.11268423291544678, + "grad_norm": 1.1641485691070557, + "learning_rate": 9.821809725231542e-06, + "loss": 0.3077, + "step": 5629 + }, + { + "epoch": 0.11270425143257513, + "grad_norm": 2.665925979614258, + "learning_rate": 9.821723940636264e-06, + "loss": 0.8066, + "step": 5630 + }, + { + "epoch": 0.11272426994970347, + "grad_norm": 1.3042341470718384, + "learning_rate": 9.821638135771495e-06, + "loss": 0.3539, + "step": 5631 + }, + { + "epoch": 0.11274428846683182, + "grad_norm": 1.1022326946258545, + "learning_rate": 9.821552310637598e-06, + "loss": 0.3381, + "step": 5632 + }, + { + "epoch": 0.11276430698396016, + "grad_norm": 1.0432599782943726, + "learning_rate": 9.821466465234933e-06, + "loss": 0.3298, + "step": 5633 + }, + { + "epoch": 0.11278432550108851, + "grad_norm": 1.034253478050232, + "learning_rate": 9.82138059956386e-06, + "loss": 0.2943, + "step": 5634 + }, + { + "epoch": 0.11280434401821685, + "grad_norm": 1.3848210573196411, + "learning_rate": 9.82129471362474e-06, + "loss": 0.343, + "step": 5635 + }, + { + "epoch": 0.1128243625353452, + "grad_norm": 1.1368154287338257, + "learning_rate": 9.821208807417935e-06, + "loss": 0.3778, + "step": 5636 + }, + { + "epoch": 0.11284438105247353, + "grad_norm": 1.0801472663879395, + "learning_rate": 9.821122880943806e-06, + "loss": 0.312, + "step": 5637 + }, + { + "epoch": 0.11286439956960188, + "grad_norm": 1.1111067533493042, + "learning_rate": 9.821036934202715e-06, + "loss": 0.3209, + "step": 5638 + }, + { + "epoch": 0.11288441808673022, + "grad_norm": 2.237574815750122, + "learning_rate": 9.82095096719502e-06, + "loss": 0.9703, + "step": 5639 + }, + { + "epoch": 0.11290443660385857, + "grad_norm": 1.1679872274398804, + "learning_rate": 9.820864979921086e-06, + "loss": 0.338, + "step": 5640 + }, + { + "epoch": 0.11292445512098691, + "grad_norm": 1.2290687561035156, + "learning_rate": 9.820778972381271e-06, + "loss": 0.3274, + "step": 5641 + }, + { + "epoch": 0.11294447363811526, + "grad_norm": 1.0753413438796997, + "learning_rate": 9.82069294457594e-06, + "loss": 0.3403, + "step": 5642 + }, + { + "epoch": 0.1129644921552436, + "grad_norm": 1.1051714420318604, + "learning_rate": 9.820606896505455e-06, + "loss": 0.3755, + "step": 5643 + }, + { + "epoch": 0.11298451067237195, + "grad_norm": 1.1090887784957886, + "learning_rate": 9.820520828170174e-06, + "loss": 0.3637, + "step": 5644 + }, + { + "epoch": 0.11300452918950028, + "grad_norm": 1.0321043729782104, + "learning_rate": 9.820434739570461e-06, + "loss": 0.3103, + "step": 5645 + }, + { + "epoch": 0.11302454770662863, + "grad_norm": 1.177655816078186, + "learning_rate": 9.82034863070668e-06, + "loss": 0.3582, + "step": 5646 + }, + { + "epoch": 0.11304456622375697, + "grad_norm": 1.0462843179702759, + "learning_rate": 9.820262501579187e-06, + "loss": 0.3256, + "step": 5647 + }, + { + "epoch": 0.11306458474088532, + "grad_norm": 1.28451406955719, + "learning_rate": 9.820176352188352e-06, + "loss": 0.3723, + "step": 5648 + }, + { + "epoch": 0.11308460325801366, + "grad_norm": 1.107845425605774, + "learning_rate": 9.82009018253453e-06, + "loss": 0.327, + "step": 5649 + }, + { + "epoch": 0.11310462177514201, + "grad_norm": 1.1834548711776733, + "learning_rate": 9.820003992618086e-06, + "loss": 0.3457, + "step": 5650 + }, + { + "epoch": 0.11312464029227035, + "grad_norm": 1.4103748798370361, + "learning_rate": 9.819917782439382e-06, + "loss": 0.4075, + "step": 5651 + }, + { + "epoch": 0.1131446588093987, + "grad_norm": 1.1626948118209839, + "learning_rate": 9.819831551998783e-06, + "loss": 0.2964, + "step": 5652 + }, + { + "epoch": 0.11316467732652703, + "grad_norm": 1.0353596210479736, + "learning_rate": 9.819745301296648e-06, + "loss": 0.2674, + "step": 5653 + }, + { + "epoch": 0.11318469584365538, + "grad_norm": 1.0233283042907715, + "learning_rate": 9.819659030333342e-06, + "loss": 0.2873, + "step": 5654 + }, + { + "epoch": 0.11320471436078372, + "grad_norm": 0.996379017829895, + "learning_rate": 9.819572739109227e-06, + "loss": 0.2918, + "step": 5655 + }, + { + "epoch": 0.11322473287791207, + "grad_norm": 1.0723545551300049, + "learning_rate": 9.819486427624665e-06, + "loss": 0.3177, + "step": 5656 + }, + { + "epoch": 0.11324475139504041, + "grad_norm": 1.0164848566055298, + "learning_rate": 9.819400095880018e-06, + "loss": 0.3197, + "step": 5657 + }, + { + "epoch": 0.11326476991216876, + "grad_norm": 1.0843580961227417, + "learning_rate": 9.81931374387565e-06, + "loss": 0.3349, + "step": 5658 + }, + { + "epoch": 0.1132847884292971, + "grad_norm": 1.1532362699508667, + "learning_rate": 9.819227371611926e-06, + "loss": 0.2863, + "step": 5659 + }, + { + "epoch": 0.11330480694642545, + "grad_norm": 1.1344727277755737, + "learning_rate": 9.819140979089206e-06, + "loss": 0.311, + "step": 5660 + }, + { + "epoch": 0.11332482546355378, + "grad_norm": 1.1273999214172363, + "learning_rate": 9.819054566307854e-06, + "loss": 0.3106, + "step": 5661 + }, + { + "epoch": 0.11334484398068213, + "grad_norm": 1.1100654602050781, + "learning_rate": 9.818968133268235e-06, + "loss": 0.3207, + "step": 5662 + }, + { + "epoch": 0.11336486249781047, + "grad_norm": 1.0300827026367188, + "learning_rate": 9.81888167997071e-06, + "loss": 0.3481, + "step": 5663 + }, + { + "epoch": 0.11338488101493882, + "grad_norm": 1.058120846748352, + "learning_rate": 9.818795206415644e-06, + "loss": 0.3318, + "step": 5664 + }, + { + "epoch": 0.11340489953206716, + "grad_norm": 1.17728853225708, + "learning_rate": 9.818708712603399e-06, + "loss": 0.3294, + "step": 5665 + }, + { + "epoch": 0.11342491804919551, + "grad_norm": 1.2060896158218384, + "learning_rate": 9.81862219853434e-06, + "loss": 0.3137, + "step": 5666 + }, + { + "epoch": 0.11344493656632385, + "grad_norm": 1.1902201175689697, + "learning_rate": 9.81853566420883e-06, + "loss": 0.2631, + "step": 5667 + }, + { + "epoch": 0.1134649550834522, + "grad_norm": 1.0183838605880737, + "learning_rate": 9.818449109627236e-06, + "loss": 0.3146, + "step": 5668 + }, + { + "epoch": 0.11348497360058053, + "grad_norm": 1.129237174987793, + "learning_rate": 9.818362534789916e-06, + "loss": 0.3532, + "step": 5669 + }, + { + "epoch": 0.11350499211770888, + "grad_norm": 1.688705563545227, + "learning_rate": 9.818275939697237e-06, + "loss": 0.3293, + "step": 5670 + }, + { + "epoch": 0.11352501063483722, + "grad_norm": 1.204996109008789, + "learning_rate": 9.818189324349563e-06, + "loss": 0.3507, + "step": 5671 + }, + { + "epoch": 0.11354502915196557, + "grad_norm": 1.0917553901672363, + "learning_rate": 9.818102688747257e-06, + "loss": 0.3759, + "step": 5672 + }, + { + "epoch": 0.11356504766909391, + "grad_norm": 1.0941133499145508, + "learning_rate": 9.818016032890688e-06, + "loss": 0.3435, + "step": 5673 + }, + { + "epoch": 0.11358506618622226, + "grad_norm": 1.7895931005477905, + "learning_rate": 9.817929356780212e-06, + "loss": 0.8425, + "step": 5674 + }, + { + "epoch": 0.1136050847033506, + "grad_norm": 1.1056618690490723, + "learning_rate": 9.8178426604162e-06, + "loss": 0.4064, + "step": 5675 + }, + { + "epoch": 0.11362510322047895, + "grad_norm": 1.1799503564834595, + "learning_rate": 9.817755943799014e-06, + "loss": 0.3116, + "step": 5676 + }, + { + "epoch": 0.11364512173760728, + "grad_norm": 1.3699195384979248, + "learning_rate": 9.817669206929018e-06, + "loss": 0.3501, + "step": 5677 + }, + { + "epoch": 0.11366514025473563, + "grad_norm": 1.1778146028518677, + "learning_rate": 9.817582449806579e-06, + "loss": 0.3388, + "step": 5678 + }, + { + "epoch": 0.11368515877186397, + "grad_norm": 1.1523669958114624, + "learning_rate": 9.817495672432059e-06, + "loss": 0.3538, + "step": 5679 + }, + { + "epoch": 0.11370517728899232, + "grad_norm": 1.0147525072097778, + "learning_rate": 9.817408874805823e-06, + "loss": 0.309, + "step": 5680 + }, + { + "epoch": 0.11372519580612066, + "grad_norm": 1.0345324277877808, + "learning_rate": 9.81732205692824e-06, + "loss": 0.3206, + "step": 5681 + }, + { + "epoch": 0.11374521432324901, + "grad_norm": 1.0725951194763184, + "learning_rate": 9.81723521879967e-06, + "loss": 0.3397, + "step": 5682 + }, + { + "epoch": 0.11376523284037734, + "grad_norm": 1.8288567066192627, + "learning_rate": 9.817148360420479e-06, + "loss": 0.8353, + "step": 5683 + }, + { + "epoch": 0.1137852513575057, + "grad_norm": 1.123624324798584, + "learning_rate": 9.817061481791032e-06, + "loss": 0.2751, + "step": 5684 + }, + { + "epoch": 0.11380526987463403, + "grad_norm": 1.1074109077453613, + "learning_rate": 9.816974582911698e-06, + "loss": 0.3424, + "step": 5685 + }, + { + "epoch": 0.11382528839176238, + "grad_norm": 1.2432111501693726, + "learning_rate": 9.816887663782838e-06, + "loss": 0.3565, + "step": 5686 + }, + { + "epoch": 0.11384530690889072, + "grad_norm": 1.0575053691864014, + "learning_rate": 9.816800724404818e-06, + "loss": 0.3098, + "step": 5687 + }, + { + "epoch": 0.11386532542601907, + "grad_norm": 1.2809914350509644, + "learning_rate": 9.816713764778006e-06, + "loss": 0.3254, + "step": 5688 + }, + { + "epoch": 0.11388534394314741, + "grad_norm": 1.1159437894821167, + "learning_rate": 9.816626784902765e-06, + "loss": 0.3334, + "step": 5689 + }, + { + "epoch": 0.11390536246027576, + "grad_norm": 1.013149619102478, + "learning_rate": 9.816539784779461e-06, + "loss": 0.3818, + "step": 5690 + }, + { + "epoch": 0.1139253809774041, + "grad_norm": 1.0007907152175903, + "learning_rate": 9.816452764408461e-06, + "loss": 0.3114, + "step": 5691 + }, + { + "epoch": 0.11394539949453245, + "grad_norm": 1.098410964012146, + "learning_rate": 9.816365723790132e-06, + "loss": 0.326, + "step": 5692 + }, + { + "epoch": 0.11396541801166078, + "grad_norm": 1.7514374256134033, + "learning_rate": 9.816278662924837e-06, + "loss": 0.865, + "step": 5693 + }, + { + "epoch": 0.11398543652878913, + "grad_norm": 1.0859371423721313, + "learning_rate": 9.81619158181294e-06, + "loss": 0.3352, + "step": 5694 + }, + { + "epoch": 0.11400545504591747, + "grad_norm": 1.103734016418457, + "learning_rate": 9.816104480454813e-06, + "loss": 0.288, + "step": 5695 + }, + { + "epoch": 0.11402547356304582, + "grad_norm": 1.094892144203186, + "learning_rate": 9.816017358850818e-06, + "loss": 0.2996, + "step": 5696 + }, + { + "epoch": 0.11404549208017416, + "grad_norm": 1.032420039176941, + "learning_rate": 9.815930217001324e-06, + "loss": 0.2924, + "step": 5697 + }, + { + "epoch": 0.11406551059730251, + "grad_norm": 1.131307601928711, + "learning_rate": 9.815843054906693e-06, + "loss": 0.3427, + "step": 5698 + }, + { + "epoch": 0.11408552911443084, + "grad_norm": 1.213504433631897, + "learning_rate": 9.815755872567296e-06, + "loss": 0.3329, + "step": 5699 + }, + { + "epoch": 0.1141055476315592, + "grad_norm": 1.0641907453536987, + "learning_rate": 9.815668669983497e-06, + "loss": 0.3305, + "step": 5700 + }, + { + "epoch": 0.11412556614868753, + "grad_norm": 1.0791696310043335, + "learning_rate": 9.815581447155665e-06, + "loss": 0.3395, + "step": 5701 + }, + { + "epoch": 0.11414558466581588, + "grad_norm": 1.0879569053649902, + "learning_rate": 9.815494204084164e-06, + "loss": 0.3005, + "step": 5702 + }, + { + "epoch": 0.11416560318294422, + "grad_norm": 1.196492075920105, + "learning_rate": 9.815406940769361e-06, + "loss": 0.3314, + "step": 5703 + }, + { + "epoch": 0.11418562170007257, + "grad_norm": 1.0993776321411133, + "learning_rate": 9.815319657211623e-06, + "loss": 0.3293, + "step": 5704 + }, + { + "epoch": 0.11420564021720091, + "grad_norm": 1.1279680728912354, + "learning_rate": 9.815232353411319e-06, + "loss": 0.3437, + "step": 5705 + }, + { + "epoch": 0.11422565873432926, + "grad_norm": 1.9346669912338257, + "learning_rate": 9.815145029368813e-06, + "loss": 0.8966, + "step": 5706 + }, + { + "epoch": 0.1142456772514576, + "grad_norm": 1.0939468145370483, + "learning_rate": 9.815057685084475e-06, + "loss": 0.3232, + "step": 5707 + }, + { + "epoch": 0.11426569576858595, + "grad_norm": 1.1659274101257324, + "learning_rate": 9.81497032055867e-06, + "loss": 0.3354, + "step": 5708 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 1.0809766054153442, + "learning_rate": 9.814882935791765e-06, + "loss": 0.3491, + "step": 5709 + }, + { + "epoch": 0.11430573280284263, + "grad_norm": 1.0648614168167114, + "learning_rate": 9.814795530784129e-06, + "loss": 0.3157, + "step": 5710 + }, + { + "epoch": 0.11432575131997097, + "grad_norm": 1.1579340696334839, + "learning_rate": 9.81470810553613e-06, + "loss": 0.2784, + "step": 5711 + }, + { + "epoch": 0.11434576983709932, + "grad_norm": 1.0690542459487915, + "learning_rate": 9.814620660048133e-06, + "loss": 0.3645, + "step": 5712 + }, + { + "epoch": 0.11436578835422766, + "grad_norm": 1.123995065689087, + "learning_rate": 9.814533194320508e-06, + "loss": 0.3102, + "step": 5713 + }, + { + "epoch": 0.11438580687135601, + "grad_norm": 1.1099348068237305, + "learning_rate": 9.81444570835362e-06, + "loss": 0.3797, + "step": 5714 + }, + { + "epoch": 0.11440582538848434, + "grad_norm": 1.1868480443954468, + "learning_rate": 9.814358202147837e-06, + "loss": 0.3047, + "step": 5715 + }, + { + "epoch": 0.1144258439056127, + "grad_norm": 1.116043210029602, + "learning_rate": 9.814270675703532e-06, + "loss": 0.3305, + "step": 5716 + }, + { + "epoch": 0.11444586242274103, + "grad_norm": 1.0983924865722656, + "learning_rate": 9.814183129021067e-06, + "loss": 0.3183, + "step": 5717 + }, + { + "epoch": 0.11446588093986938, + "grad_norm": 1.0410614013671875, + "learning_rate": 9.814095562100811e-06, + "loss": 0.3093, + "step": 5718 + }, + { + "epoch": 0.11448589945699772, + "grad_norm": 1.0225714445114136, + "learning_rate": 9.814007974943135e-06, + "loss": 0.2833, + "step": 5719 + }, + { + "epoch": 0.11450591797412607, + "grad_norm": 1.3034288883209229, + "learning_rate": 9.813920367548406e-06, + "loss": 0.369, + "step": 5720 + }, + { + "epoch": 0.11452593649125441, + "grad_norm": 1.102905511856079, + "learning_rate": 9.813832739916991e-06, + "loss": 0.3181, + "step": 5721 + }, + { + "epoch": 0.11454595500838276, + "grad_norm": 1.2632758617401123, + "learning_rate": 9.813745092049258e-06, + "loss": 0.3297, + "step": 5722 + }, + { + "epoch": 0.1145659735255111, + "grad_norm": 1.9593231678009033, + "learning_rate": 9.813657423945578e-06, + "loss": 0.8457, + "step": 5723 + }, + { + "epoch": 0.11458599204263945, + "grad_norm": 1.014145016670227, + "learning_rate": 9.813569735606318e-06, + "loss": 0.3299, + "step": 5724 + }, + { + "epoch": 0.11460601055976778, + "grad_norm": 1.281339406967163, + "learning_rate": 9.813482027031846e-06, + "loss": 0.2891, + "step": 5725 + }, + { + "epoch": 0.11462602907689613, + "grad_norm": 0.973191499710083, + "learning_rate": 9.813394298222532e-06, + "loss": 0.2774, + "step": 5726 + }, + { + "epoch": 0.11464604759402447, + "grad_norm": 1.041935682296753, + "learning_rate": 9.813306549178744e-06, + "loss": 0.3352, + "step": 5727 + }, + { + "epoch": 0.11466606611115282, + "grad_norm": 1.0106875896453857, + "learning_rate": 9.81321877990085e-06, + "loss": 0.3192, + "step": 5728 + }, + { + "epoch": 0.11468608462828116, + "grad_norm": 1.1656808853149414, + "learning_rate": 9.813130990389223e-06, + "loss": 0.3766, + "step": 5729 + }, + { + "epoch": 0.11470610314540951, + "grad_norm": 1.0558302402496338, + "learning_rate": 9.813043180644226e-06, + "loss": 0.3526, + "step": 5730 + }, + { + "epoch": 0.11472612166253784, + "grad_norm": 1.1920254230499268, + "learning_rate": 9.812955350666234e-06, + "loss": 0.3039, + "step": 5731 + }, + { + "epoch": 0.1147461401796662, + "grad_norm": 1.0785984992980957, + "learning_rate": 9.812867500455612e-06, + "loss": 0.3398, + "step": 5732 + }, + { + "epoch": 0.11476615869679453, + "grad_norm": 0.9834795594215393, + "learning_rate": 9.81277963001273e-06, + "loss": 0.2953, + "step": 5733 + }, + { + "epoch": 0.11478617721392288, + "grad_norm": 1.0604711771011353, + "learning_rate": 9.812691739337958e-06, + "loss": 0.2999, + "step": 5734 + }, + { + "epoch": 0.11480619573105122, + "grad_norm": 1.2056111097335815, + "learning_rate": 9.812603828431666e-06, + "loss": 0.3646, + "step": 5735 + }, + { + "epoch": 0.11482621424817957, + "grad_norm": 1.1633539199829102, + "learning_rate": 9.812515897294225e-06, + "loss": 0.3692, + "step": 5736 + }, + { + "epoch": 0.1148462327653079, + "grad_norm": 1.0248491764068604, + "learning_rate": 9.812427945926002e-06, + "loss": 0.3064, + "step": 5737 + }, + { + "epoch": 0.11486625128243626, + "grad_norm": 1.0255454778671265, + "learning_rate": 9.812339974327367e-06, + "loss": 0.2942, + "step": 5738 + }, + { + "epoch": 0.1148862697995646, + "grad_norm": 1.1901400089263916, + "learning_rate": 9.812251982498691e-06, + "loss": 0.3373, + "step": 5739 + }, + { + "epoch": 0.11490628831669294, + "grad_norm": 1.0771243572235107, + "learning_rate": 9.812163970440343e-06, + "loss": 0.302, + "step": 5740 + }, + { + "epoch": 0.11492630683382128, + "grad_norm": 1.0564745664596558, + "learning_rate": 9.812075938152691e-06, + "loss": 0.3507, + "step": 5741 + }, + { + "epoch": 0.11494632535094963, + "grad_norm": 1.0002447366714478, + "learning_rate": 9.811987885636111e-06, + "loss": 0.2919, + "step": 5742 + }, + { + "epoch": 0.11496634386807797, + "grad_norm": 1.7869065999984741, + "learning_rate": 9.811899812890967e-06, + "loss": 0.8381, + "step": 5743 + }, + { + "epoch": 0.11498636238520632, + "grad_norm": 1.1258320808410645, + "learning_rate": 9.811811719917632e-06, + "loss": 0.336, + "step": 5744 + }, + { + "epoch": 0.11500638090233466, + "grad_norm": 1.0715543031692505, + "learning_rate": 9.811723606716477e-06, + "loss": 0.3117, + "step": 5745 + }, + { + "epoch": 0.11502639941946301, + "grad_norm": 1.154240608215332, + "learning_rate": 9.811635473287872e-06, + "loss": 0.389, + "step": 5746 + }, + { + "epoch": 0.11504641793659134, + "grad_norm": 1.1366416215896606, + "learning_rate": 9.811547319632185e-06, + "loss": 0.382, + "step": 5747 + }, + { + "epoch": 0.1150664364537197, + "grad_norm": 1.0480308532714844, + "learning_rate": 9.81145914574979e-06, + "loss": 0.327, + "step": 5748 + }, + { + "epoch": 0.11508645497084803, + "grad_norm": 1.0691876411437988, + "learning_rate": 9.811370951641056e-06, + "loss": 0.3455, + "step": 5749 + }, + { + "epoch": 0.11510647348797638, + "grad_norm": 1.103724479675293, + "learning_rate": 9.811282737306354e-06, + "loss": 0.3043, + "step": 5750 + }, + { + "epoch": 0.11512649200510472, + "grad_norm": 1.0842095613479614, + "learning_rate": 9.811194502746054e-06, + "loss": 0.3092, + "step": 5751 + }, + { + "epoch": 0.11514651052223307, + "grad_norm": 2.0119645595550537, + "learning_rate": 9.811106247960528e-06, + "loss": 0.8546, + "step": 5752 + }, + { + "epoch": 0.1151665290393614, + "grad_norm": 1.0894110202789307, + "learning_rate": 9.811017972950146e-06, + "loss": 0.3496, + "step": 5753 + }, + { + "epoch": 0.11518654755648976, + "grad_norm": 1.1126588582992554, + "learning_rate": 9.810929677715282e-06, + "loss": 0.3267, + "step": 5754 + }, + { + "epoch": 0.1152065660736181, + "grad_norm": 1.0947548151016235, + "learning_rate": 9.810841362256303e-06, + "loss": 0.3159, + "step": 5755 + }, + { + "epoch": 0.11522658459074644, + "grad_norm": 1.1541821956634521, + "learning_rate": 9.810753026573583e-06, + "loss": 0.3647, + "step": 5756 + }, + { + "epoch": 0.11524660310787478, + "grad_norm": 1.304207444190979, + "learning_rate": 9.810664670667491e-06, + "loss": 0.3505, + "step": 5757 + }, + { + "epoch": 0.11526662162500313, + "grad_norm": 0.9884036779403687, + "learning_rate": 9.810576294538402e-06, + "loss": 0.2948, + "step": 5758 + }, + { + "epoch": 0.11528664014213147, + "grad_norm": 1.111599326133728, + "learning_rate": 9.810487898186685e-06, + "loss": 0.3367, + "step": 5759 + }, + { + "epoch": 0.11530665865925982, + "grad_norm": 1.2040486335754395, + "learning_rate": 9.810399481612711e-06, + "loss": 0.3555, + "step": 5760 + }, + { + "epoch": 0.11532667717638816, + "grad_norm": 1.8394426107406616, + "learning_rate": 9.810311044816855e-06, + "loss": 0.8338, + "step": 5761 + }, + { + "epoch": 0.11534669569351651, + "grad_norm": 1.2178510427474976, + "learning_rate": 9.810222587799484e-06, + "loss": 0.3013, + "step": 5762 + }, + { + "epoch": 0.11536671421064484, + "grad_norm": 1.2166634798049927, + "learning_rate": 9.810134110560975e-06, + "loss": 0.3345, + "step": 5763 + }, + { + "epoch": 0.1153867327277732, + "grad_norm": 1.0704078674316406, + "learning_rate": 9.810045613101696e-06, + "loss": 0.3545, + "step": 5764 + }, + { + "epoch": 0.11540675124490153, + "grad_norm": 1.0413813591003418, + "learning_rate": 9.80995709542202e-06, + "loss": 0.3449, + "step": 5765 + }, + { + "epoch": 0.11542676976202988, + "grad_norm": 1.0797944068908691, + "learning_rate": 9.80986855752232e-06, + "loss": 0.2841, + "step": 5766 + }, + { + "epoch": 0.11544678827915822, + "grad_norm": 1.1562069654464722, + "learning_rate": 9.809779999402968e-06, + "loss": 0.3503, + "step": 5767 + }, + { + "epoch": 0.11546680679628657, + "grad_norm": 1.1074095964431763, + "learning_rate": 9.809691421064336e-06, + "loss": 0.327, + "step": 5768 + }, + { + "epoch": 0.1154868253134149, + "grad_norm": 1.2085872888565063, + "learning_rate": 9.809602822506795e-06, + "loss": 0.3374, + "step": 5769 + }, + { + "epoch": 0.11550684383054326, + "grad_norm": 1.0792181491851807, + "learning_rate": 9.809514203730719e-06, + "loss": 0.2919, + "step": 5770 + }, + { + "epoch": 0.1155268623476716, + "grad_norm": 1.174818515777588, + "learning_rate": 9.809425564736481e-06, + "loss": 0.3345, + "step": 5771 + }, + { + "epoch": 0.11554688086479994, + "grad_norm": 1.217300534248352, + "learning_rate": 9.809336905524453e-06, + "loss": 0.2915, + "step": 5772 + }, + { + "epoch": 0.11556689938192828, + "grad_norm": 1.195068359375, + "learning_rate": 9.809248226095007e-06, + "loss": 0.3619, + "step": 5773 + }, + { + "epoch": 0.11558691789905663, + "grad_norm": 1.177497148513794, + "learning_rate": 9.809159526448517e-06, + "loss": 0.3813, + "step": 5774 + }, + { + "epoch": 0.11560693641618497, + "grad_norm": 1.0484766960144043, + "learning_rate": 9.809070806585357e-06, + "loss": 0.2796, + "step": 5775 + }, + { + "epoch": 0.11562695493331332, + "grad_norm": 1.0988609790802002, + "learning_rate": 9.808982066505896e-06, + "loss": 0.3135, + "step": 5776 + }, + { + "epoch": 0.11564697345044166, + "grad_norm": 1.1160938739776611, + "learning_rate": 9.80889330621051e-06, + "loss": 0.3328, + "step": 5777 + }, + { + "epoch": 0.11566699196757001, + "grad_norm": 1.0474236011505127, + "learning_rate": 9.808804525699572e-06, + "loss": 0.3043, + "step": 5778 + }, + { + "epoch": 0.11568701048469834, + "grad_norm": 1.1348321437835693, + "learning_rate": 9.808715724973452e-06, + "loss": 0.3402, + "step": 5779 + }, + { + "epoch": 0.1157070290018267, + "grad_norm": 1.1846650838851929, + "learning_rate": 9.808626904032528e-06, + "loss": 0.3124, + "step": 5780 + }, + { + "epoch": 0.11572704751895503, + "grad_norm": 1.0150587558746338, + "learning_rate": 9.808538062877172e-06, + "loss": 0.2961, + "step": 5781 + }, + { + "epoch": 0.11574706603608338, + "grad_norm": 1.3584458827972412, + "learning_rate": 9.808449201507756e-06, + "loss": 0.3793, + "step": 5782 + }, + { + "epoch": 0.11576708455321172, + "grad_norm": 1.0847522020339966, + "learning_rate": 9.808360319924654e-06, + "loss": 0.3602, + "step": 5783 + }, + { + "epoch": 0.11578710307034007, + "grad_norm": 1.1352285146713257, + "learning_rate": 9.808271418128239e-06, + "loss": 0.3084, + "step": 5784 + }, + { + "epoch": 0.1158071215874684, + "grad_norm": 1.7050132751464844, + "learning_rate": 9.808182496118887e-06, + "loss": 0.8542, + "step": 5785 + }, + { + "epoch": 0.11582714010459676, + "grad_norm": 1.0303210020065308, + "learning_rate": 9.808093553896971e-06, + "loss": 0.3343, + "step": 5786 + }, + { + "epoch": 0.1158471586217251, + "grad_norm": 1.1557062864303589, + "learning_rate": 9.808004591462862e-06, + "loss": 0.3587, + "step": 5787 + }, + { + "epoch": 0.11586717713885344, + "grad_norm": 1.0459954738616943, + "learning_rate": 9.807915608816941e-06, + "loss": 0.343, + "step": 5788 + }, + { + "epoch": 0.11588719565598178, + "grad_norm": 1.95121169090271, + "learning_rate": 9.807826605959573e-06, + "loss": 0.3558, + "step": 5789 + }, + { + "epoch": 0.11590721417311013, + "grad_norm": 1.1829581260681152, + "learning_rate": 9.807737582891139e-06, + "loss": 0.3298, + "step": 5790 + }, + { + "epoch": 0.11592723269023847, + "grad_norm": 1.1127119064331055, + "learning_rate": 9.807648539612011e-06, + "loss": 0.3012, + "step": 5791 + }, + { + "epoch": 0.11594725120736682, + "grad_norm": 1.458113431930542, + "learning_rate": 9.807559476122561e-06, + "loss": 0.3348, + "step": 5792 + }, + { + "epoch": 0.11596726972449516, + "grad_norm": 0.9857649207115173, + "learning_rate": 9.807470392423167e-06, + "loss": 0.2973, + "step": 5793 + }, + { + "epoch": 0.1159872882416235, + "grad_norm": 1.1751545667648315, + "learning_rate": 9.807381288514201e-06, + "loss": 0.3635, + "step": 5794 + }, + { + "epoch": 0.11600730675875184, + "grad_norm": 1.1465880870819092, + "learning_rate": 9.80729216439604e-06, + "loss": 0.2661, + "step": 5795 + }, + { + "epoch": 0.1160273252758802, + "grad_norm": 1.1098843812942505, + "learning_rate": 9.807203020069058e-06, + "loss": 0.3347, + "step": 5796 + }, + { + "epoch": 0.11604734379300853, + "grad_norm": 1.1538820266723633, + "learning_rate": 9.807113855533628e-06, + "loss": 0.3072, + "step": 5797 + }, + { + "epoch": 0.11606736231013688, + "grad_norm": 1.1551008224487305, + "learning_rate": 9.807024670790124e-06, + "loss": 0.348, + "step": 5798 + }, + { + "epoch": 0.11608738082726522, + "grad_norm": 1.0749659538269043, + "learning_rate": 9.806935465838925e-06, + "loss": 0.3405, + "step": 5799 + }, + { + "epoch": 0.11610739934439357, + "grad_norm": 1.8284672498703003, + "learning_rate": 9.806846240680404e-06, + "loss": 0.836, + "step": 5800 + }, + { + "epoch": 0.1161274178615219, + "grad_norm": 1.467962622642517, + "learning_rate": 9.806756995314935e-06, + "loss": 0.3581, + "step": 5801 + }, + { + "epoch": 0.11614743637865026, + "grad_norm": 1.1526727676391602, + "learning_rate": 9.806667729742894e-06, + "loss": 0.3237, + "step": 5802 + }, + { + "epoch": 0.1161674548957786, + "grad_norm": 1.1197706460952759, + "learning_rate": 9.806578443964655e-06, + "loss": 0.295, + "step": 5803 + }, + { + "epoch": 0.11618747341290694, + "grad_norm": 1.0913697481155396, + "learning_rate": 9.806489137980596e-06, + "loss": 0.3264, + "step": 5804 + }, + { + "epoch": 0.11620749193003528, + "grad_norm": 1.851243019104004, + "learning_rate": 9.806399811791092e-06, + "loss": 0.8447, + "step": 5805 + }, + { + "epoch": 0.11622751044716363, + "grad_norm": 1.1248865127563477, + "learning_rate": 9.806310465396517e-06, + "loss": 0.3637, + "step": 5806 + }, + { + "epoch": 0.11624752896429197, + "grad_norm": 1.2097954750061035, + "learning_rate": 9.806221098797246e-06, + "loss": 0.3589, + "step": 5807 + }, + { + "epoch": 0.11626754748142032, + "grad_norm": 0.9961126446723938, + "learning_rate": 9.806131711993656e-06, + "loss": 0.3015, + "step": 5808 + }, + { + "epoch": 0.11628756599854866, + "grad_norm": 1.0577985048294067, + "learning_rate": 9.806042304986124e-06, + "loss": 0.3085, + "step": 5809 + }, + { + "epoch": 0.116307584515677, + "grad_norm": 1.1497925519943237, + "learning_rate": 9.805952877775023e-06, + "loss": 0.3291, + "step": 5810 + }, + { + "epoch": 0.11632760303280534, + "grad_norm": 1.1972349882125854, + "learning_rate": 9.805863430360731e-06, + "loss": 0.3447, + "step": 5811 + }, + { + "epoch": 0.1163476215499337, + "grad_norm": 1.1324024200439453, + "learning_rate": 9.805773962743622e-06, + "loss": 0.318, + "step": 5812 + }, + { + "epoch": 0.11636764006706203, + "grad_norm": 1.1013227701187134, + "learning_rate": 9.805684474924077e-06, + "loss": 0.3151, + "step": 5813 + }, + { + "epoch": 0.11638765858419038, + "grad_norm": 1.2765012979507446, + "learning_rate": 9.805594966902465e-06, + "loss": 0.3298, + "step": 5814 + }, + { + "epoch": 0.11640767710131872, + "grad_norm": 1.2803256511688232, + "learning_rate": 9.805505438679168e-06, + "loss": 0.2896, + "step": 5815 + }, + { + "epoch": 0.11642769561844707, + "grad_norm": 0.998851478099823, + "learning_rate": 9.805415890254559e-06, + "loss": 0.3171, + "step": 5816 + }, + { + "epoch": 0.1164477141355754, + "grad_norm": 1.1288840770721436, + "learning_rate": 9.805326321629017e-06, + "loss": 0.3115, + "step": 5817 + }, + { + "epoch": 0.11646773265270376, + "grad_norm": 1.0797345638275146, + "learning_rate": 9.805236732802917e-06, + "loss": 0.3319, + "step": 5818 + }, + { + "epoch": 0.1164877511698321, + "grad_norm": 1.135475516319275, + "learning_rate": 9.805147123776635e-06, + "loss": 0.3461, + "step": 5819 + }, + { + "epoch": 0.11650776968696044, + "grad_norm": 1.0193688869476318, + "learning_rate": 9.80505749455055e-06, + "loss": 0.301, + "step": 5820 + }, + { + "epoch": 0.11652778820408878, + "grad_norm": 1.2457656860351562, + "learning_rate": 9.804967845125036e-06, + "loss": 0.3476, + "step": 5821 + }, + { + "epoch": 0.11654780672121713, + "grad_norm": 1.0822465419769287, + "learning_rate": 9.804878175500471e-06, + "loss": 0.3292, + "step": 5822 + }, + { + "epoch": 0.11656782523834547, + "grad_norm": 1.0997899770736694, + "learning_rate": 9.804788485677232e-06, + "loss": 0.3009, + "step": 5823 + }, + { + "epoch": 0.11658784375547382, + "grad_norm": 1.066670536994934, + "learning_rate": 9.804698775655697e-06, + "loss": 0.3693, + "step": 5824 + }, + { + "epoch": 0.11660786227260216, + "grad_norm": 1.2510387897491455, + "learning_rate": 9.804609045436244e-06, + "loss": 0.344, + "step": 5825 + }, + { + "epoch": 0.1166278807897305, + "grad_norm": 1.1713920831680298, + "learning_rate": 9.804519295019246e-06, + "loss": 0.3493, + "step": 5826 + }, + { + "epoch": 0.11664789930685884, + "grad_norm": 1.1429235935211182, + "learning_rate": 9.804429524405084e-06, + "loss": 0.2941, + "step": 5827 + }, + { + "epoch": 0.1166679178239872, + "grad_norm": 1.1271113157272339, + "learning_rate": 9.804339733594133e-06, + "loss": 0.3017, + "step": 5828 + }, + { + "epoch": 0.11668793634111553, + "grad_norm": 0.9986041784286499, + "learning_rate": 9.804249922586772e-06, + "loss": 0.2934, + "step": 5829 + }, + { + "epoch": 0.11670795485824388, + "grad_norm": 1.1667211055755615, + "learning_rate": 9.804160091383378e-06, + "loss": 0.3775, + "step": 5830 + }, + { + "epoch": 0.11672797337537222, + "grad_norm": 1.2523915767669678, + "learning_rate": 9.804070239984332e-06, + "loss": 0.3691, + "step": 5831 + }, + { + "epoch": 0.11674799189250057, + "grad_norm": 1.1197593212127686, + "learning_rate": 9.803980368390004e-06, + "loss": 0.3278, + "step": 5832 + }, + { + "epoch": 0.1167680104096289, + "grad_norm": 1.0902984142303467, + "learning_rate": 9.80389047660078e-06, + "loss": 0.315, + "step": 5833 + }, + { + "epoch": 0.11678802892675726, + "grad_norm": 1.0798537731170654, + "learning_rate": 9.803800564617032e-06, + "loss": 0.3126, + "step": 5834 + }, + { + "epoch": 0.1168080474438856, + "grad_norm": 1.0604674816131592, + "learning_rate": 9.803710632439142e-06, + "loss": 0.3269, + "step": 5835 + }, + { + "epoch": 0.11682806596101394, + "grad_norm": 1.2591792345046997, + "learning_rate": 9.803620680067485e-06, + "loss": 0.3317, + "step": 5836 + }, + { + "epoch": 0.11684808447814228, + "grad_norm": 1.1615911722183228, + "learning_rate": 9.80353070750244e-06, + "loss": 0.2875, + "step": 5837 + }, + { + "epoch": 0.11686810299527063, + "grad_norm": 1.063598394393921, + "learning_rate": 9.803440714744386e-06, + "loss": 0.3074, + "step": 5838 + }, + { + "epoch": 0.11688812151239897, + "grad_norm": 1.1023844480514526, + "learning_rate": 9.803350701793702e-06, + "loss": 0.2816, + "step": 5839 + }, + { + "epoch": 0.11690814002952732, + "grad_norm": 1.1311748027801514, + "learning_rate": 9.803260668650765e-06, + "loss": 0.3233, + "step": 5840 + }, + { + "epoch": 0.11692815854665566, + "grad_norm": 1.0446381568908691, + "learning_rate": 9.803170615315954e-06, + "loss": 0.266, + "step": 5841 + }, + { + "epoch": 0.116948177063784, + "grad_norm": 1.2107049226760864, + "learning_rate": 9.803080541789645e-06, + "loss": 0.2874, + "step": 5842 + }, + { + "epoch": 0.11696819558091234, + "grad_norm": 1.2230730056762695, + "learning_rate": 9.802990448072221e-06, + "loss": 0.2957, + "step": 5843 + }, + { + "epoch": 0.1169882140980407, + "grad_norm": 1.028782844543457, + "learning_rate": 9.802900334164058e-06, + "loss": 0.3077, + "step": 5844 + }, + { + "epoch": 0.11700823261516903, + "grad_norm": 1.116304636001587, + "learning_rate": 9.802810200065538e-06, + "loss": 0.3475, + "step": 5845 + }, + { + "epoch": 0.11702825113229738, + "grad_norm": 1.1364127397537231, + "learning_rate": 9.802720045777035e-06, + "loss": 0.3214, + "step": 5846 + }, + { + "epoch": 0.11704826964942572, + "grad_norm": 1.0874329805374146, + "learning_rate": 9.802629871298931e-06, + "loss": 0.3474, + "step": 5847 + }, + { + "epoch": 0.11706828816655407, + "grad_norm": 1.2717114686965942, + "learning_rate": 9.802539676631605e-06, + "loss": 0.346, + "step": 5848 + }, + { + "epoch": 0.1170883066836824, + "grad_norm": 1.0645358562469482, + "learning_rate": 9.802449461775436e-06, + "loss": 0.3173, + "step": 5849 + }, + { + "epoch": 0.11710832520081076, + "grad_norm": 1.2783520221710205, + "learning_rate": 9.802359226730802e-06, + "loss": 0.3392, + "step": 5850 + }, + { + "epoch": 0.11712834371793909, + "grad_norm": 0.965937614440918, + "learning_rate": 9.802268971498083e-06, + "loss": 0.3012, + "step": 5851 + }, + { + "epoch": 0.11714836223506744, + "grad_norm": 1.1248631477355957, + "learning_rate": 9.80217869607766e-06, + "loss": 0.3492, + "step": 5852 + }, + { + "epoch": 0.11716838075219578, + "grad_norm": 1.0939463376998901, + "learning_rate": 9.802088400469911e-06, + "loss": 0.3207, + "step": 5853 + }, + { + "epoch": 0.11718839926932413, + "grad_norm": 2.0292959213256836, + "learning_rate": 9.801998084675215e-06, + "loss": 0.8819, + "step": 5854 + }, + { + "epoch": 0.11720841778645247, + "grad_norm": 1.058800458908081, + "learning_rate": 9.801907748693953e-06, + "loss": 0.3638, + "step": 5855 + }, + { + "epoch": 0.11722843630358082, + "grad_norm": 1.463722586631775, + "learning_rate": 9.801817392526504e-06, + "loss": 0.3248, + "step": 5856 + }, + { + "epoch": 0.11724845482070916, + "grad_norm": 1.0870285034179688, + "learning_rate": 9.801727016173247e-06, + "loss": 0.2974, + "step": 5857 + }, + { + "epoch": 0.1172684733378375, + "grad_norm": 1.0349962711334229, + "learning_rate": 9.801636619634565e-06, + "loss": 0.2975, + "step": 5858 + }, + { + "epoch": 0.11728849185496584, + "grad_norm": 1.0930224657058716, + "learning_rate": 9.801546202910834e-06, + "loss": 0.3387, + "step": 5859 + }, + { + "epoch": 0.1173085103720942, + "grad_norm": 1.0733407735824585, + "learning_rate": 9.801455766002438e-06, + "loss": 0.2726, + "step": 5860 + }, + { + "epoch": 0.11732852888922253, + "grad_norm": 1.1772637367248535, + "learning_rate": 9.801365308909753e-06, + "loss": 0.3511, + "step": 5861 + }, + { + "epoch": 0.11734854740635088, + "grad_norm": 1.1501187086105347, + "learning_rate": 9.801274831633164e-06, + "loss": 0.3655, + "step": 5862 + }, + { + "epoch": 0.11736856592347922, + "grad_norm": 1.0604357719421387, + "learning_rate": 9.801184334173048e-06, + "loss": 0.3124, + "step": 5863 + }, + { + "epoch": 0.11738858444060757, + "grad_norm": 1.110060691833496, + "learning_rate": 9.801093816529786e-06, + "loss": 0.3805, + "step": 5864 + }, + { + "epoch": 0.1174086029577359, + "grad_norm": 1.1111136674880981, + "learning_rate": 9.801003278703758e-06, + "loss": 0.3538, + "step": 5865 + }, + { + "epoch": 0.11742862147486426, + "grad_norm": 1.0964797735214233, + "learning_rate": 9.800912720695346e-06, + "loss": 0.3045, + "step": 5866 + }, + { + "epoch": 0.11744863999199259, + "grad_norm": 1.0688945055007935, + "learning_rate": 9.800822142504931e-06, + "loss": 0.3514, + "step": 5867 + }, + { + "epoch": 0.11746865850912094, + "grad_norm": 1.8908580541610718, + "learning_rate": 9.800731544132892e-06, + "loss": 0.9728, + "step": 5868 + }, + { + "epoch": 0.11748867702624928, + "grad_norm": 1.1582287549972534, + "learning_rate": 9.80064092557961e-06, + "loss": 0.282, + "step": 5869 + }, + { + "epoch": 0.11750869554337763, + "grad_norm": 2.032886028289795, + "learning_rate": 9.800550286845469e-06, + "loss": 0.8646, + "step": 5870 + }, + { + "epoch": 0.11752871406050597, + "grad_norm": 1.0913437604904175, + "learning_rate": 9.800459627930845e-06, + "loss": 0.2859, + "step": 5871 + }, + { + "epoch": 0.11754873257763432, + "grad_norm": 1.047313928604126, + "learning_rate": 9.800368948836124e-06, + "loss": 0.3236, + "step": 5872 + }, + { + "epoch": 0.11756875109476266, + "grad_norm": 1.1325474977493286, + "learning_rate": 9.800278249561683e-06, + "loss": 0.37, + "step": 5873 + }, + { + "epoch": 0.117588769611891, + "grad_norm": 1.0206854343414307, + "learning_rate": 9.800187530107907e-06, + "loss": 0.3105, + "step": 5874 + }, + { + "epoch": 0.11760878812901934, + "grad_norm": 1.1076475381851196, + "learning_rate": 9.800096790475175e-06, + "loss": 0.3258, + "step": 5875 + }, + { + "epoch": 0.1176288066461477, + "grad_norm": 1.1357741355895996, + "learning_rate": 9.800006030663869e-06, + "loss": 0.3045, + "step": 5876 + }, + { + "epoch": 0.11764882516327603, + "grad_norm": 1.221313238143921, + "learning_rate": 9.79991525067437e-06, + "loss": 0.3633, + "step": 5877 + }, + { + "epoch": 0.11766884368040438, + "grad_norm": 1.1355400085449219, + "learning_rate": 9.799824450507061e-06, + "loss": 0.3186, + "step": 5878 + }, + { + "epoch": 0.11768886219753272, + "grad_norm": 1.047662615776062, + "learning_rate": 9.799733630162322e-06, + "loss": 0.3309, + "step": 5879 + }, + { + "epoch": 0.11770888071466107, + "grad_norm": 1.1070692539215088, + "learning_rate": 9.799642789640538e-06, + "loss": 0.3085, + "step": 5880 + }, + { + "epoch": 0.1177288992317894, + "grad_norm": 1.2235273122787476, + "learning_rate": 9.799551928942086e-06, + "loss": 0.3794, + "step": 5881 + }, + { + "epoch": 0.11774891774891776, + "grad_norm": 1.2624906301498413, + "learning_rate": 9.799461048067353e-06, + "loss": 0.3559, + "step": 5882 + }, + { + "epoch": 0.11776893626604609, + "grad_norm": 1.1255367994308472, + "learning_rate": 9.799370147016716e-06, + "loss": 0.3604, + "step": 5883 + }, + { + "epoch": 0.11778895478317444, + "grad_norm": 1.0154634714126587, + "learning_rate": 9.799279225790561e-06, + "loss": 0.3104, + "step": 5884 + }, + { + "epoch": 0.11780897330030278, + "grad_norm": 1.2445316314697266, + "learning_rate": 9.799188284389269e-06, + "loss": 0.3506, + "step": 5885 + }, + { + "epoch": 0.11782899181743113, + "grad_norm": 1.1415098905563354, + "learning_rate": 9.799097322813222e-06, + "loss": 0.3366, + "step": 5886 + }, + { + "epoch": 0.11784901033455947, + "grad_norm": 1.0338064432144165, + "learning_rate": 9.799006341062804e-06, + "loss": 0.3091, + "step": 5887 + }, + { + "epoch": 0.1178690288516878, + "grad_norm": 1.3537148237228394, + "learning_rate": 9.798915339138395e-06, + "loss": 0.3264, + "step": 5888 + }, + { + "epoch": 0.11788904736881615, + "grad_norm": 1.1307764053344727, + "learning_rate": 9.798824317040379e-06, + "loss": 0.319, + "step": 5889 + }, + { + "epoch": 0.11790906588594449, + "grad_norm": 1.185907244682312, + "learning_rate": 9.798733274769137e-06, + "loss": 0.3976, + "step": 5890 + }, + { + "epoch": 0.11792908440307284, + "grad_norm": 1.133839726448059, + "learning_rate": 9.798642212325054e-06, + "loss": 0.3593, + "step": 5891 + }, + { + "epoch": 0.11794910292020118, + "grad_norm": 1.0928219556808472, + "learning_rate": 9.79855112970851e-06, + "loss": 0.3072, + "step": 5892 + }, + { + "epoch": 0.11796912143732953, + "grad_norm": 1.1223503351211548, + "learning_rate": 9.798460026919893e-06, + "loss": 0.3754, + "step": 5893 + }, + { + "epoch": 0.11798913995445787, + "grad_norm": 1.1068170070648193, + "learning_rate": 9.79836890395958e-06, + "loss": 0.3353, + "step": 5894 + }, + { + "epoch": 0.11800915847158622, + "grad_norm": 1.081620454788208, + "learning_rate": 9.798277760827959e-06, + "loss": 0.31, + "step": 5895 + }, + { + "epoch": 0.11802917698871455, + "grad_norm": 1.1305142641067505, + "learning_rate": 9.79818659752541e-06, + "loss": 0.3504, + "step": 5896 + }, + { + "epoch": 0.1180491955058429, + "grad_norm": 1.2646148204803467, + "learning_rate": 9.798095414052317e-06, + "loss": 0.3413, + "step": 5897 + }, + { + "epoch": 0.11806921402297124, + "grad_norm": 1.1041628122329712, + "learning_rate": 9.798004210409063e-06, + "loss": 0.326, + "step": 5898 + }, + { + "epoch": 0.11808923254009959, + "grad_norm": 0.9969498515129089, + "learning_rate": 9.797912986596033e-06, + "loss": 0.2852, + "step": 5899 + }, + { + "epoch": 0.11810925105722793, + "grad_norm": 1.0106565952301025, + "learning_rate": 9.797821742613608e-06, + "loss": 0.3454, + "step": 5900 + }, + { + "epoch": 0.11812926957435628, + "grad_norm": 1.1195095777511597, + "learning_rate": 9.797730478462173e-06, + "loss": 0.3462, + "step": 5901 + }, + { + "epoch": 0.11814928809148462, + "grad_norm": 1.9610058069229126, + "learning_rate": 9.797639194142112e-06, + "loss": 0.8112, + "step": 5902 + }, + { + "epoch": 0.11816930660861297, + "grad_norm": 1.0278459787368774, + "learning_rate": 9.79754788965381e-06, + "loss": 0.3067, + "step": 5903 + }, + { + "epoch": 0.1181893251257413, + "grad_norm": 1.2452284097671509, + "learning_rate": 9.797456564997646e-06, + "loss": 0.363, + "step": 5904 + }, + { + "epoch": 0.11820934364286965, + "grad_norm": 1.1353226900100708, + "learning_rate": 9.797365220174009e-06, + "loss": 0.3483, + "step": 5905 + }, + { + "epoch": 0.11822936215999799, + "grad_norm": 1.2447335720062256, + "learning_rate": 9.797273855183281e-06, + "loss": 0.3292, + "step": 5906 + }, + { + "epoch": 0.11824938067712634, + "grad_norm": 1.133086085319519, + "learning_rate": 9.797182470025844e-06, + "loss": 0.3059, + "step": 5907 + }, + { + "epoch": 0.11826939919425468, + "grad_norm": 1.2407147884368896, + "learning_rate": 9.797091064702087e-06, + "loss": 0.3643, + "step": 5908 + }, + { + "epoch": 0.11828941771138303, + "grad_norm": 1.0831490755081177, + "learning_rate": 9.79699963921239e-06, + "loss": 0.3371, + "step": 5909 + }, + { + "epoch": 0.11830943622851137, + "grad_norm": 1.1352243423461914, + "learning_rate": 9.796908193557138e-06, + "loss": 0.3328, + "step": 5910 + }, + { + "epoch": 0.11832945474563972, + "grad_norm": 1.1719887256622314, + "learning_rate": 9.796816727736718e-06, + "loss": 0.3341, + "step": 5911 + }, + { + "epoch": 0.11834947326276805, + "grad_norm": 1.183854103088379, + "learning_rate": 9.796725241751513e-06, + "loss": 0.3469, + "step": 5912 + }, + { + "epoch": 0.1183694917798964, + "grad_norm": 0.9665386080741882, + "learning_rate": 9.796633735601906e-06, + "loss": 0.2905, + "step": 5913 + }, + { + "epoch": 0.11838951029702474, + "grad_norm": 1.244348168373108, + "learning_rate": 9.796542209288282e-06, + "loss": 0.3483, + "step": 5914 + }, + { + "epoch": 0.11840952881415309, + "grad_norm": 1.2157155275344849, + "learning_rate": 9.796450662811028e-06, + "loss": 0.341, + "step": 5915 + }, + { + "epoch": 0.11842954733128143, + "grad_norm": 1.1672216653823853, + "learning_rate": 9.79635909617053e-06, + "loss": 0.2991, + "step": 5916 + }, + { + "epoch": 0.11844956584840978, + "grad_norm": 1.1739879846572876, + "learning_rate": 9.796267509367167e-06, + "loss": 0.4011, + "step": 5917 + }, + { + "epoch": 0.11846958436553812, + "grad_norm": 1.1560640335083008, + "learning_rate": 9.79617590240133e-06, + "loss": 0.3507, + "step": 5918 + }, + { + "epoch": 0.11848960288266647, + "grad_norm": 1.2550700902938843, + "learning_rate": 9.796084275273401e-06, + "loss": 0.3153, + "step": 5919 + }, + { + "epoch": 0.1185096213997948, + "grad_norm": 1.1942781209945679, + "learning_rate": 9.795992627983765e-06, + "loss": 0.3705, + "step": 5920 + }, + { + "epoch": 0.11852963991692315, + "grad_norm": 1.0711127519607544, + "learning_rate": 9.795900960532808e-06, + "loss": 0.3086, + "step": 5921 + }, + { + "epoch": 0.11854965843405149, + "grad_norm": 1.1039619445800781, + "learning_rate": 9.795809272920917e-06, + "loss": 0.3362, + "step": 5922 + }, + { + "epoch": 0.11856967695117984, + "grad_norm": 1.207712173461914, + "learning_rate": 9.795717565148475e-06, + "loss": 0.3274, + "step": 5923 + }, + { + "epoch": 0.11858969546830818, + "grad_norm": 1.2737128734588623, + "learning_rate": 9.795625837215867e-06, + "loss": 0.3178, + "step": 5924 + }, + { + "epoch": 0.11860971398543653, + "grad_norm": 1.015939712524414, + "learning_rate": 9.795534089123481e-06, + "loss": 0.35, + "step": 5925 + }, + { + "epoch": 0.11862973250256487, + "grad_norm": 1.1064858436584473, + "learning_rate": 9.795442320871702e-06, + "loss": 0.358, + "step": 5926 + }, + { + "epoch": 0.11864975101969322, + "grad_norm": 1.0383602380752563, + "learning_rate": 9.795350532460917e-06, + "loss": 0.3487, + "step": 5927 + }, + { + "epoch": 0.11866976953682155, + "grad_norm": 1.0892976522445679, + "learning_rate": 9.795258723891508e-06, + "loss": 0.3203, + "step": 5928 + }, + { + "epoch": 0.1186897880539499, + "grad_norm": 1.0952045917510986, + "learning_rate": 9.795166895163864e-06, + "loss": 0.306, + "step": 5929 + }, + { + "epoch": 0.11870980657107824, + "grad_norm": 1.1456177234649658, + "learning_rate": 9.79507504627837e-06, + "loss": 0.3291, + "step": 5930 + }, + { + "epoch": 0.11872982508820659, + "grad_norm": 1.0717132091522217, + "learning_rate": 9.794983177235414e-06, + "loss": 0.3362, + "step": 5931 + }, + { + "epoch": 0.11874984360533493, + "grad_norm": 1.038156270980835, + "learning_rate": 9.79489128803538e-06, + "loss": 0.3161, + "step": 5932 + }, + { + "epoch": 0.11876986212246328, + "grad_norm": 1.1129212379455566, + "learning_rate": 9.794799378678655e-06, + "loss": 0.3577, + "step": 5933 + }, + { + "epoch": 0.11878988063959162, + "grad_norm": 1.107392430305481, + "learning_rate": 9.794707449165623e-06, + "loss": 0.3019, + "step": 5934 + }, + { + "epoch": 0.11880989915671997, + "grad_norm": 1.897583246231079, + "learning_rate": 9.794615499496674e-06, + "loss": 0.8106, + "step": 5935 + }, + { + "epoch": 0.1188299176738483, + "grad_norm": 1.111325740814209, + "learning_rate": 9.794523529672194e-06, + "loss": 0.3292, + "step": 5936 + }, + { + "epoch": 0.11884993619097665, + "grad_norm": 1.2226531505584717, + "learning_rate": 9.794431539692569e-06, + "loss": 0.3377, + "step": 5937 + }, + { + "epoch": 0.11886995470810499, + "grad_norm": 1.291908621788025, + "learning_rate": 9.794339529558186e-06, + "loss": 0.3153, + "step": 5938 + }, + { + "epoch": 0.11888997322523334, + "grad_norm": 1.1254326105117798, + "learning_rate": 9.794247499269429e-06, + "loss": 0.343, + "step": 5939 + }, + { + "epoch": 0.11890999174236168, + "grad_norm": 1.0727940797805786, + "learning_rate": 9.79415544882669e-06, + "loss": 0.2945, + "step": 5940 + }, + { + "epoch": 0.11893001025949003, + "grad_norm": 1.0747590065002441, + "learning_rate": 9.794063378230352e-06, + "loss": 0.3416, + "step": 5941 + }, + { + "epoch": 0.11895002877661837, + "grad_norm": 1.0324013233184814, + "learning_rate": 9.793971287480802e-06, + "loss": 0.3527, + "step": 5942 + }, + { + "epoch": 0.11897004729374672, + "grad_norm": 1.8579986095428467, + "learning_rate": 9.79387917657843e-06, + "loss": 0.8387, + "step": 5943 + }, + { + "epoch": 0.11899006581087505, + "grad_norm": 1.1013269424438477, + "learning_rate": 9.793787045523622e-06, + "loss": 0.2911, + "step": 5944 + }, + { + "epoch": 0.1190100843280034, + "grad_norm": 1.0880616903305054, + "learning_rate": 9.793694894316763e-06, + "loss": 0.34, + "step": 5945 + }, + { + "epoch": 0.11903010284513174, + "grad_norm": 1.0836533308029175, + "learning_rate": 9.793602722958242e-06, + "loss": 0.2902, + "step": 5946 + }, + { + "epoch": 0.11905012136226009, + "grad_norm": 1.282501220703125, + "learning_rate": 9.793510531448449e-06, + "loss": 0.3231, + "step": 5947 + }, + { + "epoch": 0.11907013987938843, + "grad_norm": 1.0692869424819946, + "learning_rate": 9.793418319787768e-06, + "loss": 0.3228, + "step": 5948 + }, + { + "epoch": 0.11909015839651678, + "grad_norm": 1.233176827430725, + "learning_rate": 9.793326087976589e-06, + "loss": 0.356, + "step": 5949 + }, + { + "epoch": 0.11911017691364512, + "grad_norm": 1.8508241176605225, + "learning_rate": 9.793233836015298e-06, + "loss": 0.8179, + "step": 5950 + }, + { + "epoch": 0.11913019543077347, + "grad_norm": 1.115771770477295, + "learning_rate": 9.793141563904282e-06, + "loss": 0.3319, + "step": 5951 + }, + { + "epoch": 0.1191502139479018, + "grad_norm": 1.1702953577041626, + "learning_rate": 9.793049271643934e-06, + "loss": 0.3483, + "step": 5952 + }, + { + "epoch": 0.11917023246503015, + "grad_norm": 1.1133959293365479, + "learning_rate": 9.792956959234635e-06, + "loss": 0.3185, + "step": 5953 + }, + { + "epoch": 0.11919025098215849, + "grad_norm": 1.095049262046814, + "learning_rate": 9.792864626676778e-06, + "loss": 0.3431, + "step": 5954 + }, + { + "epoch": 0.11921026949928684, + "grad_norm": 1.1842433214187622, + "learning_rate": 9.792772273970749e-06, + "loss": 0.3315, + "step": 5955 + }, + { + "epoch": 0.11923028801641518, + "grad_norm": 1.9337095022201538, + "learning_rate": 9.792679901116937e-06, + "loss": 0.7752, + "step": 5956 + }, + { + "epoch": 0.11925030653354353, + "grad_norm": 1.35171639919281, + "learning_rate": 9.79258750811573e-06, + "loss": 0.3603, + "step": 5957 + }, + { + "epoch": 0.11927032505067187, + "grad_norm": 1.1987619400024414, + "learning_rate": 9.792495094967516e-06, + "loss": 0.3356, + "step": 5958 + }, + { + "epoch": 0.11929034356780022, + "grad_norm": 1.1010502576828003, + "learning_rate": 9.792402661672684e-06, + "loss": 0.3098, + "step": 5959 + }, + { + "epoch": 0.11931036208492855, + "grad_norm": 0.9898505210876465, + "learning_rate": 9.792310208231624e-06, + "loss": 0.3073, + "step": 5960 + }, + { + "epoch": 0.1193303806020569, + "grad_norm": 1.9867379665374756, + "learning_rate": 9.792217734644722e-06, + "loss": 0.8411, + "step": 5961 + }, + { + "epoch": 0.11935039911918524, + "grad_norm": 0.9872413277626038, + "learning_rate": 9.792125240912366e-06, + "loss": 0.32, + "step": 5962 + }, + { + "epoch": 0.11937041763631359, + "grad_norm": 1.0976699590682983, + "learning_rate": 9.79203272703495e-06, + "loss": 0.3414, + "step": 5963 + }, + { + "epoch": 0.11939043615344193, + "grad_norm": 1.1103805303573608, + "learning_rate": 9.791940193012859e-06, + "loss": 0.3205, + "step": 5964 + }, + { + "epoch": 0.11941045467057028, + "grad_norm": 1.0922762155532837, + "learning_rate": 9.791847638846483e-06, + "loss": 0.3154, + "step": 5965 + }, + { + "epoch": 0.11943047318769862, + "grad_norm": 1.104567527770996, + "learning_rate": 9.79175506453621e-06, + "loss": 0.3281, + "step": 5966 + }, + { + "epoch": 0.11945049170482697, + "grad_norm": 1.0467839241027832, + "learning_rate": 9.791662470082428e-06, + "loss": 0.2978, + "step": 5967 + }, + { + "epoch": 0.1194705102219553, + "grad_norm": 1.1457312107086182, + "learning_rate": 9.791569855485531e-06, + "loss": 0.2995, + "step": 5968 + }, + { + "epoch": 0.11949052873908365, + "grad_norm": 1.1639505624771118, + "learning_rate": 9.791477220745903e-06, + "loss": 0.3269, + "step": 5969 + }, + { + "epoch": 0.11951054725621199, + "grad_norm": 1.1713837385177612, + "learning_rate": 9.791384565863939e-06, + "loss": 0.3405, + "step": 5970 + }, + { + "epoch": 0.11953056577334034, + "grad_norm": 1.0574373006820679, + "learning_rate": 9.791291890840024e-06, + "loss": 0.3408, + "step": 5971 + }, + { + "epoch": 0.11955058429046868, + "grad_norm": 1.0660258531570435, + "learning_rate": 9.791199195674547e-06, + "loss": 0.3298, + "step": 5972 + }, + { + "epoch": 0.11957060280759703, + "grad_norm": 1.2389370203018188, + "learning_rate": 9.791106480367902e-06, + "loss": 0.3209, + "step": 5973 + }, + { + "epoch": 0.11959062132472537, + "grad_norm": 1.211846947669983, + "learning_rate": 9.791013744920476e-06, + "loss": 0.3244, + "step": 5974 + }, + { + "epoch": 0.11961063984185372, + "grad_norm": 0.9941926002502441, + "learning_rate": 9.790920989332659e-06, + "loss": 0.3219, + "step": 5975 + }, + { + "epoch": 0.11963065835898205, + "grad_norm": 1.147355079650879, + "learning_rate": 9.790828213604841e-06, + "loss": 0.3489, + "step": 5976 + }, + { + "epoch": 0.1196506768761104, + "grad_norm": 1.0236185789108276, + "learning_rate": 9.790735417737414e-06, + "loss": 0.3591, + "step": 5977 + }, + { + "epoch": 0.11967069539323874, + "grad_norm": 1.1775033473968506, + "learning_rate": 9.790642601730765e-06, + "loss": 0.302, + "step": 5978 + }, + { + "epoch": 0.11969071391036709, + "grad_norm": 1.027721881866455, + "learning_rate": 9.790549765585284e-06, + "loss": 0.2255, + "step": 5979 + }, + { + "epoch": 0.11971073242749543, + "grad_norm": 1.241586446762085, + "learning_rate": 9.790456909301365e-06, + "loss": 0.3084, + "step": 5980 + }, + { + "epoch": 0.11973075094462378, + "grad_norm": 1.1519112586975098, + "learning_rate": 9.790364032879393e-06, + "loss": 0.3196, + "step": 5981 + }, + { + "epoch": 0.11975076946175212, + "grad_norm": 1.231252670288086, + "learning_rate": 9.790271136319764e-06, + "loss": 0.3904, + "step": 5982 + }, + { + "epoch": 0.11977078797888047, + "grad_norm": 1.0989680290222168, + "learning_rate": 9.790178219622866e-06, + "loss": 0.3395, + "step": 5983 + }, + { + "epoch": 0.1197908064960088, + "grad_norm": 1.034087061882019, + "learning_rate": 9.790085282789089e-06, + "loss": 0.329, + "step": 5984 + }, + { + "epoch": 0.11981082501313715, + "grad_norm": 1.2915723323822021, + "learning_rate": 9.789992325818825e-06, + "loss": 0.3086, + "step": 5985 + }, + { + "epoch": 0.11983084353026549, + "grad_norm": 1.0710768699645996, + "learning_rate": 9.789899348712465e-06, + "loss": 0.2951, + "step": 5986 + }, + { + "epoch": 0.11985086204739384, + "grad_norm": 1.112351417541504, + "learning_rate": 9.789806351470396e-06, + "loss": 0.3013, + "step": 5987 + }, + { + "epoch": 0.11987088056452218, + "grad_norm": 1.307437777519226, + "learning_rate": 9.789713334093013e-06, + "loss": 0.3401, + "step": 5988 + }, + { + "epoch": 0.11989089908165053, + "grad_norm": 1.1704449653625488, + "learning_rate": 9.789620296580706e-06, + "loss": 0.3272, + "step": 5989 + }, + { + "epoch": 0.11991091759877887, + "grad_norm": 1.1128283739089966, + "learning_rate": 9.789527238933866e-06, + "loss": 0.2971, + "step": 5990 + }, + { + "epoch": 0.11993093611590722, + "grad_norm": 1.132734775543213, + "learning_rate": 9.789434161152885e-06, + "loss": 0.3292, + "step": 5991 + }, + { + "epoch": 0.11995095463303555, + "grad_norm": 1.1788363456726074, + "learning_rate": 9.789341063238153e-06, + "loss": 0.298, + "step": 5992 + }, + { + "epoch": 0.1199709731501639, + "grad_norm": 1.206682801246643, + "learning_rate": 9.78924794519006e-06, + "loss": 0.3398, + "step": 5993 + }, + { + "epoch": 0.11999099166729224, + "grad_norm": 1.0728418827056885, + "learning_rate": 9.789154807009003e-06, + "loss": 0.3285, + "step": 5994 + }, + { + "epoch": 0.12001101018442059, + "grad_norm": 1.1045422554016113, + "learning_rate": 9.789061648695369e-06, + "loss": 0.3308, + "step": 5995 + }, + { + "epoch": 0.12003102870154893, + "grad_norm": 1.048689365386963, + "learning_rate": 9.788968470249548e-06, + "loss": 0.312, + "step": 5996 + }, + { + "epoch": 0.12005104721867728, + "grad_norm": 1.2025482654571533, + "learning_rate": 9.788875271671935e-06, + "loss": 0.3828, + "step": 5997 + }, + { + "epoch": 0.12007106573580562, + "grad_norm": 1.2353179454803467, + "learning_rate": 9.788782052962921e-06, + "loss": 0.3255, + "step": 5998 + }, + { + "epoch": 0.12009108425293397, + "grad_norm": 1.1643112897872925, + "learning_rate": 9.7886888141229e-06, + "loss": 0.3687, + "step": 5999 + }, + { + "epoch": 0.1201111027700623, + "grad_norm": 1.166045069694519, + "learning_rate": 9.788595555152259e-06, + "loss": 0.3505, + "step": 6000 + }, + { + "epoch": 0.12013112128719065, + "grad_norm": 1.7773704528808594, + "learning_rate": 9.788502276051395e-06, + "loss": 0.8485, + "step": 6001 + }, + { + "epoch": 0.12015113980431899, + "grad_norm": 1.0702155828475952, + "learning_rate": 9.788408976820695e-06, + "loss": 0.3059, + "step": 6002 + }, + { + "epoch": 0.12017115832144734, + "grad_norm": 1.0528669357299805, + "learning_rate": 9.788315657460557e-06, + "loss": 0.2785, + "step": 6003 + }, + { + "epoch": 0.12019117683857568, + "grad_norm": 1.3079895973205566, + "learning_rate": 9.788222317971368e-06, + "loss": 0.3532, + "step": 6004 + }, + { + "epoch": 0.12021119535570403, + "grad_norm": 1.173012137413025, + "learning_rate": 9.788128958353524e-06, + "loss": 0.3377, + "step": 6005 + }, + { + "epoch": 0.12023121387283237, + "grad_norm": 1.1916303634643555, + "learning_rate": 9.788035578607418e-06, + "loss": 0.3247, + "step": 6006 + }, + { + "epoch": 0.12025123238996072, + "grad_norm": 1.1292309761047363, + "learning_rate": 9.787942178733438e-06, + "loss": 0.2786, + "step": 6007 + }, + { + "epoch": 0.12027125090708905, + "grad_norm": 1.2222508192062378, + "learning_rate": 9.787848758731981e-06, + "loss": 0.3593, + "step": 6008 + }, + { + "epoch": 0.1202912694242174, + "grad_norm": 1.1514519453048706, + "learning_rate": 9.787755318603439e-06, + "loss": 0.3298, + "step": 6009 + }, + { + "epoch": 0.12031128794134574, + "grad_norm": 1.1438623666763306, + "learning_rate": 9.787661858348202e-06, + "loss": 0.3161, + "step": 6010 + }, + { + "epoch": 0.12033130645847409, + "grad_norm": 1.050207495689392, + "learning_rate": 9.787568377966665e-06, + "loss": 0.2984, + "step": 6011 + }, + { + "epoch": 0.12035132497560243, + "grad_norm": 1.0767202377319336, + "learning_rate": 9.787474877459222e-06, + "loss": 0.2753, + "step": 6012 + }, + { + "epoch": 0.12037134349273078, + "grad_norm": 1.1753085851669312, + "learning_rate": 9.787381356826265e-06, + "loss": 0.3345, + "step": 6013 + }, + { + "epoch": 0.12039136200985912, + "grad_norm": 1.062309741973877, + "learning_rate": 9.787287816068185e-06, + "loss": 0.3046, + "step": 6014 + }, + { + "epoch": 0.12041138052698747, + "grad_norm": 1.0785021781921387, + "learning_rate": 9.78719425518538e-06, + "loss": 0.311, + "step": 6015 + }, + { + "epoch": 0.1204313990441158, + "grad_norm": 1.267130732536316, + "learning_rate": 9.787100674178239e-06, + "loss": 0.3629, + "step": 6016 + }, + { + "epoch": 0.12045141756124415, + "grad_norm": 1.1789685487747192, + "learning_rate": 9.787007073047156e-06, + "loss": 0.3293, + "step": 6017 + }, + { + "epoch": 0.12047143607837249, + "grad_norm": 1.3016093969345093, + "learning_rate": 9.786913451792527e-06, + "loss": 0.3539, + "step": 6018 + }, + { + "epoch": 0.12049145459550084, + "grad_norm": 1.0441534519195557, + "learning_rate": 9.786819810414745e-06, + "loss": 0.3089, + "step": 6019 + }, + { + "epoch": 0.12051147311262918, + "grad_norm": 1.0927373170852661, + "learning_rate": 9.786726148914201e-06, + "loss": 0.3303, + "step": 6020 + }, + { + "epoch": 0.12053149162975753, + "grad_norm": 1.2225852012634277, + "learning_rate": 9.78663246729129e-06, + "loss": 0.3466, + "step": 6021 + }, + { + "epoch": 0.12055151014688587, + "grad_norm": 1.1134642362594604, + "learning_rate": 9.786538765546407e-06, + "loss": 0.3499, + "step": 6022 + }, + { + "epoch": 0.12057152866401422, + "grad_norm": 1.0162843465805054, + "learning_rate": 9.786445043679946e-06, + "loss": 0.3163, + "step": 6023 + }, + { + "epoch": 0.12059154718114255, + "grad_norm": 1.1343531608581543, + "learning_rate": 9.786351301692298e-06, + "loss": 0.3277, + "step": 6024 + }, + { + "epoch": 0.1206115656982709, + "grad_norm": 1.1082910299301147, + "learning_rate": 9.78625753958386e-06, + "loss": 0.3169, + "step": 6025 + }, + { + "epoch": 0.12063158421539924, + "grad_norm": 1.1581951379776, + "learning_rate": 9.786163757355026e-06, + "loss": 0.3656, + "step": 6026 + }, + { + "epoch": 0.12065160273252759, + "grad_norm": 1.1126484870910645, + "learning_rate": 9.78606995500619e-06, + "loss": 0.3195, + "step": 6027 + }, + { + "epoch": 0.12067162124965593, + "grad_norm": 1.0145149230957031, + "learning_rate": 9.785976132537744e-06, + "loss": 0.343, + "step": 6028 + }, + { + "epoch": 0.12069163976678428, + "grad_norm": 1.8815504312515259, + "learning_rate": 9.785882289950086e-06, + "loss": 0.808, + "step": 6029 + }, + { + "epoch": 0.12071165828391262, + "grad_norm": 1.0759077072143555, + "learning_rate": 9.78578842724361e-06, + "loss": 0.3199, + "step": 6030 + }, + { + "epoch": 0.12073167680104097, + "grad_norm": 1.0666776895523071, + "learning_rate": 9.785694544418706e-06, + "loss": 0.3328, + "step": 6031 + }, + { + "epoch": 0.1207516953181693, + "grad_norm": 1.102785587310791, + "learning_rate": 9.785600641475774e-06, + "loss": 0.3351, + "step": 6032 + }, + { + "epoch": 0.12077171383529765, + "grad_norm": 1.8643577098846436, + "learning_rate": 9.785506718415205e-06, + "loss": 0.8864, + "step": 6033 + }, + { + "epoch": 0.12079173235242599, + "grad_norm": 0.9502999782562256, + "learning_rate": 9.785412775237397e-06, + "loss": 0.3242, + "step": 6034 + }, + { + "epoch": 0.12081175086955434, + "grad_norm": 1.1312826871871948, + "learning_rate": 9.785318811942744e-06, + "loss": 0.3278, + "step": 6035 + }, + { + "epoch": 0.12083176938668268, + "grad_norm": 1.1196542978286743, + "learning_rate": 9.78522482853164e-06, + "loss": 0.3274, + "step": 6036 + }, + { + "epoch": 0.12085178790381103, + "grad_norm": 1.0828348398208618, + "learning_rate": 9.78513082500448e-06, + "loss": 0.3016, + "step": 6037 + }, + { + "epoch": 0.12087180642093937, + "grad_norm": 1.2451266050338745, + "learning_rate": 9.78503680136166e-06, + "loss": 0.3071, + "step": 6038 + }, + { + "epoch": 0.12089182493806772, + "grad_norm": 1.1193499565124512, + "learning_rate": 9.784942757603576e-06, + "loss": 0.2942, + "step": 6039 + }, + { + "epoch": 0.12091184345519605, + "grad_norm": 1.3182637691497803, + "learning_rate": 9.784848693730622e-06, + "loss": 0.3304, + "step": 6040 + }, + { + "epoch": 0.1209318619723244, + "grad_norm": 1.1310067176818848, + "learning_rate": 9.784754609743195e-06, + "loss": 0.3868, + "step": 6041 + }, + { + "epoch": 0.12095188048945274, + "grad_norm": 1.0372475385665894, + "learning_rate": 9.784660505641687e-06, + "loss": 0.3312, + "step": 6042 + }, + { + "epoch": 0.12097189900658109, + "grad_norm": 1.393670678138733, + "learning_rate": 9.784566381426497e-06, + "loss": 0.412, + "step": 6043 + }, + { + "epoch": 0.12099191752370943, + "grad_norm": 1.125438928604126, + "learning_rate": 9.78447223709802e-06, + "loss": 0.3459, + "step": 6044 + }, + { + "epoch": 0.12101193604083778, + "grad_norm": 1.1443012952804565, + "learning_rate": 9.784378072656649e-06, + "loss": 0.3268, + "step": 6045 + }, + { + "epoch": 0.12103195455796611, + "grad_norm": 0.9934729337692261, + "learning_rate": 9.784283888102784e-06, + "loss": 0.2778, + "step": 6046 + }, + { + "epoch": 0.12105197307509447, + "grad_norm": 1.0246340036392212, + "learning_rate": 9.78418968343682e-06, + "loss": 0.3252, + "step": 6047 + }, + { + "epoch": 0.1210719915922228, + "grad_norm": 1.8608125448226929, + "learning_rate": 9.78409545865915e-06, + "loss": 0.817, + "step": 6048 + }, + { + "epoch": 0.12109201010935115, + "grad_norm": 1.031862497329712, + "learning_rate": 9.784001213770173e-06, + "loss": 0.2579, + "step": 6049 + }, + { + "epoch": 0.12111202862647949, + "grad_norm": 1.0785313844680786, + "learning_rate": 9.783906948770286e-06, + "loss": 0.2776, + "step": 6050 + }, + { + "epoch": 0.12113204714360784, + "grad_norm": 1.2863869667053223, + "learning_rate": 9.783812663659881e-06, + "loss": 0.2848, + "step": 6051 + }, + { + "epoch": 0.12115206566073618, + "grad_norm": 1.0557093620300293, + "learning_rate": 9.783718358439361e-06, + "loss": 0.3486, + "step": 6052 + }, + { + "epoch": 0.12117208417786453, + "grad_norm": 1.1905149221420288, + "learning_rate": 9.783624033109115e-06, + "loss": 0.3376, + "step": 6053 + }, + { + "epoch": 0.12119210269499286, + "grad_norm": 1.2391918897628784, + "learning_rate": 9.783529687669544e-06, + "loss": 0.3771, + "step": 6054 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 1.0424093008041382, + "learning_rate": 9.783435322121044e-06, + "loss": 0.2867, + "step": 6055 + }, + { + "epoch": 0.12123213972924955, + "grad_norm": 1.0881881713867188, + "learning_rate": 9.783340936464012e-06, + "loss": 0.3346, + "step": 6056 + }, + { + "epoch": 0.1212521582463779, + "grad_norm": 1.2624235153198242, + "learning_rate": 9.783246530698843e-06, + "loss": 0.3141, + "step": 6057 + }, + { + "epoch": 0.12127217676350624, + "grad_norm": 1.0462937355041504, + "learning_rate": 9.783152104825933e-06, + "loss": 0.316, + "step": 6058 + }, + { + "epoch": 0.12129219528063459, + "grad_norm": 1.0694715976715088, + "learning_rate": 9.783057658845684e-06, + "loss": 0.283, + "step": 6059 + }, + { + "epoch": 0.12131221379776293, + "grad_norm": 1.0762724876403809, + "learning_rate": 9.78296319275849e-06, + "loss": 0.3054, + "step": 6060 + }, + { + "epoch": 0.12133223231489128, + "grad_norm": 1.4886648654937744, + "learning_rate": 9.782868706564745e-06, + "loss": 0.3368, + "step": 6061 + }, + { + "epoch": 0.12135225083201961, + "grad_norm": 1.1734189987182617, + "learning_rate": 9.782774200264851e-06, + "loss": 0.3238, + "step": 6062 + }, + { + "epoch": 0.12137226934914797, + "grad_norm": 1.246532917022705, + "learning_rate": 9.782679673859204e-06, + "loss": 0.3444, + "step": 6063 + }, + { + "epoch": 0.1213922878662763, + "grad_norm": 1.1242600679397583, + "learning_rate": 9.782585127348201e-06, + "loss": 0.3832, + "step": 6064 + }, + { + "epoch": 0.12141230638340465, + "grad_norm": 1.2236586809158325, + "learning_rate": 9.78249056073224e-06, + "loss": 0.378, + "step": 6065 + }, + { + "epoch": 0.12143232490053299, + "grad_norm": 1.2225069999694824, + "learning_rate": 9.782395974011715e-06, + "loss": 0.3336, + "step": 6066 + }, + { + "epoch": 0.12145234341766134, + "grad_norm": 1.038483738899231, + "learning_rate": 9.78230136718703e-06, + "loss": 0.3218, + "step": 6067 + }, + { + "epoch": 0.12147236193478968, + "grad_norm": 1.0628381967544556, + "learning_rate": 9.782206740258577e-06, + "loss": 0.3095, + "step": 6068 + }, + { + "epoch": 0.12149238045191803, + "grad_norm": 1.0642142295837402, + "learning_rate": 9.782112093226757e-06, + "loss": 0.3632, + "step": 6069 + }, + { + "epoch": 0.12151239896904636, + "grad_norm": 2.0144591331481934, + "learning_rate": 9.782017426091966e-06, + "loss": 0.831, + "step": 6070 + }, + { + "epoch": 0.12153241748617472, + "grad_norm": 1.0547136068344116, + "learning_rate": 9.781922738854604e-06, + "loss": 0.3543, + "step": 6071 + }, + { + "epoch": 0.12155243600330305, + "grad_norm": 1.1572160720825195, + "learning_rate": 9.781828031515065e-06, + "loss": 0.3069, + "step": 6072 + }, + { + "epoch": 0.1215724545204314, + "grad_norm": 1.122975468635559, + "learning_rate": 9.781733304073752e-06, + "loss": 0.339, + "step": 6073 + }, + { + "epoch": 0.12159247303755974, + "grad_norm": 1.1552737951278687, + "learning_rate": 9.781638556531064e-06, + "loss": 0.3514, + "step": 6074 + }, + { + "epoch": 0.12161249155468809, + "grad_norm": 1.0535609722137451, + "learning_rate": 9.781543788887394e-06, + "loss": 0.3431, + "step": 6075 + }, + { + "epoch": 0.12163251007181643, + "grad_norm": 1.0548633337020874, + "learning_rate": 9.781449001143142e-06, + "loss": 0.347, + "step": 6076 + }, + { + "epoch": 0.12165252858894478, + "grad_norm": 1.1358119249343872, + "learning_rate": 9.781354193298707e-06, + "loss": 0.326, + "step": 6077 + }, + { + "epoch": 0.12167254710607311, + "grad_norm": 1.9146989583969116, + "learning_rate": 9.781259365354489e-06, + "loss": 0.8512, + "step": 6078 + }, + { + "epoch": 0.12169256562320147, + "grad_norm": 1.0409339666366577, + "learning_rate": 9.781164517310885e-06, + "loss": 0.314, + "step": 6079 + }, + { + "epoch": 0.1217125841403298, + "grad_norm": 1.1202629804611206, + "learning_rate": 9.781069649168296e-06, + "loss": 0.367, + "step": 6080 + }, + { + "epoch": 0.12173260265745815, + "grad_norm": 1.1940323114395142, + "learning_rate": 9.780974760927119e-06, + "loss": 0.3136, + "step": 6081 + }, + { + "epoch": 0.12175262117458649, + "grad_norm": 1.1271708011627197, + "learning_rate": 9.780879852587753e-06, + "loss": 0.341, + "step": 6082 + }, + { + "epoch": 0.12177263969171484, + "grad_norm": 1.1776930093765259, + "learning_rate": 9.780784924150595e-06, + "loss": 0.3106, + "step": 6083 + }, + { + "epoch": 0.12179265820884318, + "grad_norm": 1.0063817501068115, + "learning_rate": 9.780689975616046e-06, + "loss": 0.3082, + "step": 6084 + }, + { + "epoch": 0.12181267672597153, + "grad_norm": 1.1969635486602783, + "learning_rate": 9.780595006984507e-06, + "loss": 0.3336, + "step": 6085 + }, + { + "epoch": 0.12183269524309986, + "grad_norm": 1.1254395246505737, + "learning_rate": 9.780500018256374e-06, + "loss": 0.35, + "step": 6086 + }, + { + "epoch": 0.12185271376022822, + "grad_norm": 1.8774789571762085, + "learning_rate": 9.78040500943205e-06, + "loss": 0.8036, + "step": 6087 + }, + { + "epoch": 0.12187273227735655, + "grad_norm": 1.0903574228286743, + "learning_rate": 9.78030998051193e-06, + "loss": 0.3538, + "step": 6088 + }, + { + "epoch": 0.1218927507944849, + "grad_norm": 1.1583794355392456, + "learning_rate": 9.780214931496415e-06, + "loss": 0.3475, + "step": 6089 + }, + { + "epoch": 0.12191276931161324, + "grad_norm": 1.2205220460891724, + "learning_rate": 9.780119862385906e-06, + "loss": 0.3431, + "step": 6090 + }, + { + "epoch": 0.12193278782874159, + "grad_norm": 1.008321762084961, + "learning_rate": 9.780024773180802e-06, + "loss": 0.357, + "step": 6091 + }, + { + "epoch": 0.12195280634586993, + "grad_norm": 1.1188292503356934, + "learning_rate": 9.779929663881502e-06, + "loss": 0.3064, + "step": 6092 + }, + { + "epoch": 0.12197282486299828, + "grad_norm": 1.2059340476989746, + "learning_rate": 9.779834534488407e-06, + "loss": 0.3421, + "step": 6093 + }, + { + "epoch": 0.12199284338012661, + "grad_norm": 1.143858790397644, + "learning_rate": 9.779739385001917e-06, + "loss": 0.3444, + "step": 6094 + }, + { + "epoch": 0.12201286189725497, + "grad_norm": 1.0858427286148071, + "learning_rate": 9.779644215422431e-06, + "loss": 0.3504, + "step": 6095 + }, + { + "epoch": 0.1220328804143833, + "grad_norm": 0.972972571849823, + "learning_rate": 9.779549025750348e-06, + "loss": 0.2862, + "step": 6096 + }, + { + "epoch": 0.12205289893151165, + "grad_norm": 1.180083155632019, + "learning_rate": 9.77945381598607e-06, + "loss": 0.3311, + "step": 6097 + }, + { + "epoch": 0.12207291744863999, + "grad_norm": 1.1228947639465332, + "learning_rate": 9.779358586129996e-06, + "loss": 0.3209, + "step": 6098 + }, + { + "epoch": 0.12209293596576834, + "grad_norm": 1.1327667236328125, + "learning_rate": 9.779263336182527e-06, + "loss": 0.3294, + "step": 6099 + }, + { + "epoch": 0.12211295448289668, + "grad_norm": 1.0789666175842285, + "learning_rate": 9.779168066144065e-06, + "loss": 0.3298, + "step": 6100 + }, + { + "epoch": 0.12213297300002503, + "grad_norm": 1.1384456157684326, + "learning_rate": 9.779072776015009e-06, + "loss": 0.3114, + "step": 6101 + }, + { + "epoch": 0.12215299151715336, + "grad_norm": 1.2366811037063599, + "learning_rate": 9.778977465795758e-06, + "loss": 0.3361, + "step": 6102 + }, + { + "epoch": 0.12217301003428171, + "grad_norm": 1.0443657636642456, + "learning_rate": 9.778882135486714e-06, + "loss": 0.2814, + "step": 6103 + }, + { + "epoch": 0.12219302855141005, + "grad_norm": 1.1083587408065796, + "learning_rate": 9.77878678508828e-06, + "loss": 0.3652, + "step": 6104 + }, + { + "epoch": 0.1222130470685384, + "grad_norm": 1.8502060174942017, + "learning_rate": 9.778691414600854e-06, + "loss": 0.7931, + "step": 6105 + }, + { + "epoch": 0.12223306558566674, + "grad_norm": 1.0737732648849487, + "learning_rate": 9.778596024024835e-06, + "loss": 0.3696, + "step": 6106 + }, + { + "epoch": 0.12225308410279509, + "grad_norm": 1.082669973373413, + "learning_rate": 9.778500613360631e-06, + "loss": 0.3271, + "step": 6107 + }, + { + "epoch": 0.12227310261992343, + "grad_norm": 1.0767431259155273, + "learning_rate": 9.778405182608636e-06, + "loss": 0.3642, + "step": 6108 + }, + { + "epoch": 0.12229312113705178, + "grad_norm": 0.9948060512542725, + "learning_rate": 9.778309731769255e-06, + "loss": 0.3151, + "step": 6109 + }, + { + "epoch": 0.12231313965418011, + "grad_norm": 1.0846139192581177, + "learning_rate": 9.778214260842887e-06, + "loss": 0.2978, + "step": 6110 + }, + { + "epoch": 0.12233315817130846, + "grad_norm": 1.1493171453475952, + "learning_rate": 9.778118769829936e-06, + "loss": 0.3803, + "step": 6111 + }, + { + "epoch": 0.1223531766884368, + "grad_norm": 1.069675326347351, + "learning_rate": 9.778023258730801e-06, + "loss": 0.3641, + "step": 6112 + }, + { + "epoch": 0.12237319520556515, + "grad_norm": 1.0442882776260376, + "learning_rate": 9.777927727545884e-06, + "loss": 0.3343, + "step": 6113 + }, + { + "epoch": 0.12239321372269349, + "grad_norm": 1.1821331977844238, + "learning_rate": 9.777832176275589e-06, + "loss": 0.3198, + "step": 6114 + }, + { + "epoch": 0.12241323223982184, + "grad_norm": 1.2758187055587769, + "learning_rate": 9.777736604920314e-06, + "loss": 0.3143, + "step": 6115 + }, + { + "epoch": 0.12243325075695018, + "grad_norm": 1.2237439155578613, + "learning_rate": 9.777641013480463e-06, + "loss": 0.3163, + "step": 6116 + }, + { + "epoch": 0.12245326927407853, + "grad_norm": 1.189746379852295, + "learning_rate": 9.777545401956438e-06, + "loss": 0.3345, + "step": 6117 + }, + { + "epoch": 0.12247328779120686, + "grad_norm": 1.09661066532135, + "learning_rate": 9.77744977034864e-06, + "loss": 0.2946, + "step": 6118 + }, + { + "epoch": 0.12249330630833521, + "grad_norm": 1.0790170431137085, + "learning_rate": 9.777354118657469e-06, + "loss": 0.3169, + "step": 6119 + }, + { + "epoch": 0.12251332482546355, + "grad_norm": 1.1543468236923218, + "learning_rate": 9.777258446883331e-06, + "loss": 0.2947, + "step": 6120 + }, + { + "epoch": 0.1225333433425919, + "grad_norm": 1.2385540008544922, + "learning_rate": 9.777162755026627e-06, + "loss": 0.3793, + "step": 6121 + }, + { + "epoch": 0.12255336185972024, + "grad_norm": 1.1327365636825562, + "learning_rate": 9.77706704308776e-06, + "loss": 0.3086, + "step": 6122 + }, + { + "epoch": 0.12257338037684859, + "grad_norm": 1.8260663747787476, + "learning_rate": 9.77697131106713e-06, + "loss": 0.9175, + "step": 6123 + }, + { + "epoch": 0.12259339889397693, + "grad_norm": 1.1740604639053345, + "learning_rate": 9.77687555896514e-06, + "loss": 0.3329, + "step": 6124 + }, + { + "epoch": 0.12261341741110528, + "grad_norm": 1.2690902948379517, + "learning_rate": 9.776779786782193e-06, + "loss": 0.3471, + "step": 6125 + }, + { + "epoch": 0.12263343592823361, + "grad_norm": 1.1829618215560913, + "learning_rate": 9.776683994518693e-06, + "loss": 0.3433, + "step": 6126 + }, + { + "epoch": 0.12265345444536196, + "grad_norm": 1.0078959465026855, + "learning_rate": 9.776588182175041e-06, + "loss": 0.2897, + "step": 6127 + }, + { + "epoch": 0.1226734729624903, + "grad_norm": 1.1969152688980103, + "learning_rate": 9.77649234975164e-06, + "loss": 0.3294, + "step": 6128 + }, + { + "epoch": 0.12269349147961865, + "grad_norm": 1.8475252389907837, + "learning_rate": 9.776396497248893e-06, + "loss": 0.8182, + "step": 6129 + }, + { + "epoch": 0.12271350999674699, + "grad_norm": 1.0449978113174438, + "learning_rate": 9.776300624667203e-06, + "loss": 0.3195, + "step": 6130 + }, + { + "epoch": 0.12273352851387534, + "grad_norm": 1.0208821296691895, + "learning_rate": 9.776204732006973e-06, + "loss": 0.2909, + "step": 6131 + }, + { + "epoch": 0.12275354703100368, + "grad_norm": 1.1148886680603027, + "learning_rate": 9.776108819268608e-06, + "loss": 0.3315, + "step": 6132 + }, + { + "epoch": 0.12277356554813203, + "grad_norm": 1.137071132659912, + "learning_rate": 9.776012886452506e-06, + "loss": 0.3295, + "step": 6133 + }, + { + "epoch": 0.12279358406526036, + "grad_norm": 1.1023801565170288, + "learning_rate": 9.775916933559075e-06, + "loss": 0.3632, + "step": 6134 + }, + { + "epoch": 0.12281360258238871, + "grad_norm": 1.168445348739624, + "learning_rate": 9.775820960588717e-06, + "loss": 0.3021, + "step": 6135 + }, + { + "epoch": 0.12283362109951705, + "grad_norm": 1.0884038209915161, + "learning_rate": 9.775724967541837e-06, + "loss": 0.3319, + "step": 6136 + }, + { + "epoch": 0.1228536396166454, + "grad_norm": 1.1695427894592285, + "learning_rate": 9.775628954418837e-06, + "loss": 0.3441, + "step": 6137 + }, + { + "epoch": 0.12287365813377374, + "grad_norm": 1.1783636808395386, + "learning_rate": 9.775532921220119e-06, + "loss": 0.3702, + "step": 6138 + }, + { + "epoch": 0.12289367665090209, + "grad_norm": 1.1320701837539673, + "learning_rate": 9.775436867946089e-06, + "loss": 0.3373, + "step": 6139 + }, + { + "epoch": 0.12291369516803043, + "grad_norm": 1.1826587915420532, + "learning_rate": 9.775340794597148e-06, + "loss": 0.3027, + "step": 6140 + }, + { + "epoch": 0.12293371368515878, + "grad_norm": 0.9951531291007996, + "learning_rate": 9.775244701173706e-06, + "loss": 0.2865, + "step": 6141 + }, + { + "epoch": 0.12295373220228711, + "grad_norm": 1.1163098812103271, + "learning_rate": 9.77514858767616e-06, + "loss": 0.3136, + "step": 6142 + }, + { + "epoch": 0.12297375071941546, + "grad_norm": 1.1436728239059448, + "learning_rate": 9.775052454104916e-06, + "loss": 0.3233, + "step": 6143 + }, + { + "epoch": 0.1229937692365438, + "grad_norm": 1.0756536722183228, + "learning_rate": 9.774956300460381e-06, + "loss": 0.3453, + "step": 6144 + }, + { + "epoch": 0.12301378775367215, + "grad_norm": 1.1226028203964233, + "learning_rate": 9.774860126742957e-06, + "loss": 0.3699, + "step": 6145 + }, + { + "epoch": 0.12303380627080049, + "grad_norm": 1.0981645584106445, + "learning_rate": 9.77476393295305e-06, + "loss": 0.3897, + "step": 6146 + }, + { + "epoch": 0.12305382478792884, + "grad_norm": 1.051344394683838, + "learning_rate": 9.77466771909106e-06, + "loss": 0.3587, + "step": 6147 + }, + { + "epoch": 0.12307384330505718, + "grad_norm": 1.2793101072311401, + "learning_rate": 9.774571485157396e-06, + "loss": 0.3676, + "step": 6148 + }, + { + "epoch": 0.12309386182218553, + "grad_norm": 1.0163931846618652, + "learning_rate": 9.774475231152461e-06, + "loss": 0.3592, + "step": 6149 + }, + { + "epoch": 0.12311388033931386, + "grad_norm": 1.1536391973495483, + "learning_rate": 9.77437895707666e-06, + "loss": 0.3537, + "step": 6150 + }, + { + "epoch": 0.12313389885644221, + "grad_norm": 1.114013433456421, + "learning_rate": 9.774282662930397e-06, + "loss": 0.3171, + "step": 6151 + }, + { + "epoch": 0.12315391737357055, + "grad_norm": 1.0095527172088623, + "learning_rate": 9.774186348714076e-06, + "loss": 0.2933, + "step": 6152 + }, + { + "epoch": 0.1231739358906989, + "grad_norm": 1.9634172916412354, + "learning_rate": 9.774090014428105e-06, + "loss": 0.9083, + "step": 6153 + }, + { + "epoch": 0.12319395440782724, + "grad_norm": 1.17432701587677, + "learning_rate": 9.773993660072887e-06, + "loss": 0.3342, + "step": 6154 + }, + { + "epoch": 0.12321397292495559, + "grad_norm": 1.1564204692840576, + "learning_rate": 9.773897285648825e-06, + "loss": 0.3885, + "step": 6155 + }, + { + "epoch": 0.12323399144208393, + "grad_norm": 1.1375066041946411, + "learning_rate": 9.773800891156326e-06, + "loss": 0.2927, + "step": 6156 + }, + { + "epoch": 0.12325400995921228, + "grad_norm": 1.0615565776824951, + "learning_rate": 9.773704476595799e-06, + "loss": 0.3313, + "step": 6157 + }, + { + "epoch": 0.12327402847634061, + "grad_norm": 1.1511119604110718, + "learning_rate": 9.773608041967641e-06, + "loss": 0.3122, + "step": 6158 + }, + { + "epoch": 0.12329404699346896, + "grad_norm": 1.148268699645996, + "learning_rate": 9.773511587272265e-06, + "loss": 0.3567, + "step": 6159 + }, + { + "epoch": 0.1233140655105973, + "grad_norm": 1.0237799882888794, + "learning_rate": 9.773415112510075e-06, + "loss": 0.317, + "step": 6160 + }, + { + "epoch": 0.12333408402772565, + "grad_norm": 1.2112245559692383, + "learning_rate": 9.773318617681471e-06, + "loss": 0.3068, + "step": 6161 + }, + { + "epoch": 0.12335410254485399, + "grad_norm": 2.1852426528930664, + "learning_rate": 9.773222102786865e-06, + "loss": 0.831, + "step": 6162 + }, + { + "epoch": 0.12337412106198234, + "grad_norm": 1.0649793148040771, + "learning_rate": 9.77312556782666e-06, + "loss": 0.3358, + "step": 6163 + }, + { + "epoch": 0.12339413957911068, + "grad_norm": 1.8915570974349976, + "learning_rate": 9.773029012801264e-06, + "loss": 0.849, + "step": 6164 + }, + { + "epoch": 0.12341415809623903, + "grad_norm": 1.2847765684127808, + "learning_rate": 9.772932437711079e-06, + "loss": 0.3217, + "step": 6165 + }, + { + "epoch": 0.12343417661336736, + "grad_norm": 1.7214245796203613, + "learning_rate": 9.772835842556513e-06, + "loss": 0.8459, + "step": 6166 + }, + { + "epoch": 0.12345419513049571, + "grad_norm": 1.7480636835098267, + "learning_rate": 9.772739227337973e-06, + "loss": 0.3389, + "step": 6167 + }, + { + "epoch": 0.12347421364762405, + "grad_norm": 1.1134681701660156, + "learning_rate": 9.772642592055866e-06, + "loss": 0.3125, + "step": 6168 + }, + { + "epoch": 0.1234942321647524, + "grad_norm": 1.1373836994171143, + "learning_rate": 9.772545936710595e-06, + "loss": 0.3142, + "step": 6169 + }, + { + "epoch": 0.12351425068188074, + "grad_norm": 1.021615743637085, + "learning_rate": 9.772449261302568e-06, + "loss": 0.2876, + "step": 6170 + }, + { + "epoch": 0.12353426919900909, + "grad_norm": 1.1298037767410278, + "learning_rate": 9.772352565832192e-06, + "loss": 0.3372, + "step": 6171 + }, + { + "epoch": 0.12355428771613743, + "grad_norm": 1.0334177017211914, + "learning_rate": 9.772255850299872e-06, + "loss": 0.3026, + "step": 6172 + }, + { + "epoch": 0.12357430623326578, + "grad_norm": 1.1032403707504272, + "learning_rate": 9.772159114706015e-06, + "loss": 0.3271, + "step": 6173 + }, + { + "epoch": 0.12359432475039411, + "grad_norm": 1.1381525993347168, + "learning_rate": 9.772062359051029e-06, + "loss": 0.3211, + "step": 6174 + }, + { + "epoch": 0.12361434326752246, + "grad_norm": 1.1948943138122559, + "learning_rate": 9.771965583335319e-06, + "loss": 0.323, + "step": 6175 + }, + { + "epoch": 0.1236343617846508, + "grad_norm": 1.0380139350891113, + "learning_rate": 9.771868787559294e-06, + "loss": 0.2967, + "step": 6176 + }, + { + "epoch": 0.12365438030177915, + "grad_norm": 1.1471928358078003, + "learning_rate": 9.771771971723359e-06, + "loss": 0.3282, + "step": 6177 + }, + { + "epoch": 0.12367439881890749, + "grad_norm": 1.1354238986968994, + "learning_rate": 9.77167513582792e-06, + "loss": 0.3323, + "step": 6178 + }, + { + "epoch": 0.12369441733603584, + "grad_norm": 1.2698073387145996, + "learning_rate": 9.771578279873389e-06, + "loss": 0.4023, + "step": 6179 + }, + { + "epoch": 0.12371443585316418, + "grad_norm": 1.6960557699203491, + "learning_rate": 9.771481403860167e-06, + "loss": 0.8264, + "step": 6180 + }, + { + "epoch": 0.12373445437029253, + "grad_norm": 1.0773463249206543, + "learning_rate": 9.771384507788663e-06, + "loss": 0.3414, + "step": 6181 + }, + { + "epoch": 0.12375447288742086, + "grad_norm": 1.12729811668396, + "learning_rate": 9.771287591659288e-06, + "loss": 0.334, + "step": 6182 + }, + { + "epoch": 0.12377449140454921, + "grad_norm": 1.0645456314086914, + "learning_rate": 9.771190655472446e-06, + "loss": 0.3813, + "step": 6183 + }, + { + "epoch": 0.12379450992167755, + "grad_norm": 1.1510064601898193, + "learning_rate": 9.771093699228545e-06, + "loss": 0.296, + "step": 6184 + }, + { + "epoch": 0.1238145284388059, + "grad_norm": 1.3041083812713623, + "learning_rate": 9.770996722927993e-06, + "loss": 0.2976, + "step": 6185 + }, + { + "epoch": 0.12383454695593424, + "grad_norm": 1.068156361579895, + "learning_rate": 9.770899726571197e-06, + "loss": 0.3225, + "step": 6186 + }, + { + "epoch": 0.12385456547306259, + "grad_norm": 1.0819246768951416, + "learning_rate": 9.770802710158568e-06, + "loss": 0.3261, + "step": 6187 + }, + { + "epoch": 0.12387458399019093, + "grad_norm": 1.1173183917999268, + "learning_rate": 9.770705673690507e-06, + "loss": 0.3245, + "step": 6188 + }, + { + "epoch": 0.12389460250731928, + "grad_norm": 1.102087378501892, + "learning_rate": 9.770608617167427e-06, + "loss": 0.3483, + "step": 6189 + }, + { + "epoch": 0.12391462102444761, + "grad_norm": 1.147773027420044, + "learning_rate": 9.770511540589736e-06, + "loss": 0.2842, + "step": 6190 + }, + { + "epoch": 0.12393463954157596, + "grad_norm": 1.2363866567611694, + "learning_rate": 9.77041444395784e-06, + "loss": 0.2728, + "step": 6191 + }, + { + "epoch": 0.1239546580587043, + "grad_norm": 1.003109097480774, + "learning_rate": 9.77031732727215e-06, + "loss": 0.2934, + "step": 6192 + }, + { + "epoch": 0.12397467657583265, + "grad_norm": 1.1184660196304321, + "learning_rate": 9.770220190533073e-06, + "loss": 0.32, + "step": 6193 + }, + { + "epoch": 0.12399469509296099, + "grad_norm": 1.8629510402679443, + "learning_rate": 9.770123033741015e-06, + "loss": 0.8521, + "step": 6194 + }, + { + "epoch": 0.12401471361008934, + "grad_norm": 1.1249754428863525, + "learning_rate": 9.770025856896386e-06, + "loss": 0.3619, + "step": 6195 + }, + { + "epoch": 0.12403473212721768, + "grad_norm": 1.006135106086731, + "learning_rate": 9.769928659999596e-06, + "loss": 0.3049, + "step": 6196 + }, + { + "epoch": 0.12405475064434603, + "grad_norm": 1.0548616647720337, + "learning_rate": 9.769831443051051e-06, + "loss": 0.3672, + "step": 6197 + }, + { + "epoch": 0.12407476916147436, + "grad_norm": 2.125108242034912, + "learning_rate": 9.769734206051165e-06, + "loss": 0.8352, + "step": 6198 + }, + { + "epoch": 0.12409478767860271, + "grad_norm": 0.9898504018783569, + "learning_rate": 9.769636949000338e-06, + "loss": 0.2965, + "step": 6199 + }, + { + "epoch": 0.12411480619573105, + "grad_norm": 1.061612844467163, + "learning_rate": 9.769539671898987e-06, + "loss": 0.2986, + "step": 6200 + }, + { + "epoch": 0.1241348247128594, + "grad_norm": 1.4296356439590454, + "learning_rate": 9.769442374747517e-06, + "loss": 0.3409, + "step": 6201 + }, + { + "epoch": 0.12415484322998774, + "grad_norm": 2.0389480590820312, + "learning_rate": 9.769345057546335e-06, + "loss": 0.8378, + "step": 6202 + }, + { + "epoch": 0.12417486174711609, + "grad_norm": 1.1162673234939575, + "learning_rate": 9.769247720295856e-06, + "loss": 0.3119, + "step": 6203 + }, + { + "epoch": 0.12419488026424443, + "grad_norm": 1.048523187637329, + "learning_rate": 9.769150362996484e-06, + "loss": 0.3596, + "step": 6204 + }, + { + "epoch": 0.12421489878137278, + "grad_norm": 1.1867414712905884, + "learning_rate": 9.769052985648632e-06, + "loss": 0.3425, + "step": 6205 + }, + { + "epoch": 0.12423491729850111, + "grad_norm": 1.130270004272461, + "learning_rate": 9.768955588252706e-06, + "loss": 0.3343, + "step": 6206 + }, + { + "epoch": 0.12425493581562946, + "grad_norm": 1.1505765914916992, + "learning_rate": 9.768858170809117e-06, + "loss": 0.3849, + "step": 6207 + }, + { + "epoch": 0.1242749543327578, + "grad_norm": 1.102278232574463, + "learning_rate": 9.768760733318277e-06, + "loss": 0.3395, + "step": 6208 + }, + { + "epoch": 0.12429497284988615, + "grad_norm": 1.28217351436615, + "learning_rate": 9.76866327578059e-06, + "loss": 0.2862, + "step": 6209 + }, + { + "epoch": 0.12431499136701449, + "grad_norm": 0.9474124908447266, + "learning_rate": 9.768565798196469e-06, + "loss": 0.311, + "step": 6210 + }, + { + "epoch": 0.12433500988414284, + "grad_norm": 1.1334965229034424, + "learning_rate": 9.768468300566325e-06, + "loss": 0.3119, + "step": 6211 + }, + { + "epoch": 0.12435502840127118, + "grad_norm": 1.2119470834732056, + "learning_rate": 9.768370782890567e-06, + "loss": 0.3384, + "step": 6212 + }, + { + "epoch": 0.12437504691839953, + "grad_norm": 1.1740658283233643, + "learning_rate": 9.768273245169604e-06, + "loss": 0.3576, + "step": 6213 + }, + { + "epoch": 0.12439506543552786, + "grad_norm": 1.175039529800415, + "learning_rate": 9.768175687403844e-06, + "loss": 0.351, + "step": 6214 + }, + { + "epoch": 0.12441508395265621, + "grad_norm": 1.166452169418335, + "learning_rate": 9.768078109593702e-06, + "loss": 0.2823, + "step": 6215 + }, + { + "epoch": 0.12443510246978455, + "grad_norm": 1.8823827505111694, + "learning_rate": 9.767980511739586e-06, + "loss": 0.8027, + "step": 6216 + }, + { + "epoch": 0.1244551209869129, + "grad_norm": 1.149725317955017, + "learning_rate": 9.767882893841905e-06, + "loss": 0.2915, + "step": 6217 + }, + { + "epoch": 0.12447513950404124, + "grad_norm": 1.258075475692749, + "learning_rate": 9.76778525590107e-06, + "loss": 0.3332, + "step": 6218 + }, + { + "epoch": 0.12449515802116959, + "grad_norm": 1.2580506801605225, + "learning_rate": 9.767687597917492e-06, + "loss": 0.3382, + "step": 6219 + }, + { + "epoch": 0.12451517653829793, + "grad_norm": 1.1142369508743286, + "learning_rate": 9.767589919891582e-06, + "loss": 0.2979, + "step": 6220 + }, + { + "epoch": 0.12453519505542628, + "grad_norm": 1.2272626161575317, + "learning_rate": 9.767492221823749e-06, + "loss": 0.3609, + "step": 6221 + }, + { + "epoch": 0.12455521357255461, + "grad_norm": 1.1728705167770386, + "learning_rate": 9.767394503714406e-06, + "loss": 0.3589, + "step": 6222 + }, + { + "epoch": 0.12457523208968296, + "grad_norm": 1.0678564310073853, + "learning_rate": 9.767296765563961e-06, + "loss": 0.3136, + "step": 6223 + }, + { + "epoch": 0.1245952506068113, + "grad_norm": 1.116307258605957, + "learning_rate": 9.767199007372828e-06, + "loss": 0.3554, + "step": 6224 + }, + { + "epoch": 0.12461526912393965, + "grad_norm": 1.1145707368850708, + "learning_rate": 9.767101229141416e-06, + "loss": 0.3208, + "step": 6225 + }, + { + "epoch": 0.12463528764106799, + "grad_norm": 1.1180787086486816, + "learning_rate": 9.767003430870134e-06, + "loss": 0.345, + "step": 6226 + }, + { + "epoch": 0.12465530615819634, + "grad_norm": 1.0899235010147095, + "learning_rate": 9.7669056125594e-06, + "loss": 0.3111, + "step": 6227 + }, + { + "epoch": 0.12467532467532468, + "grad_norm": 2.0769870281219482, + "learning_rate": 9.766807774209615e-06, + "loss": 0.8327, + "step": 6228 + }, + { + "epoch": 0.12469534319245303, + "grad_norm": 1.2820560932159424, + "learning_rate": 9.7667099158212e-06, + "loss": 0.3585, + "step": 6229 + }, + { + "epoch": 0.12471536170958136, + "grad_norm": 1.7619149684906006, + "learning_rate": 9.76661203739456e-06, + "loss": 0.8316, + "step": 6230 + }, + { + "epoch": 0.12473538022670971, + "grad_norm": 1.773268222808838, + "learning_rate": 9.76651413893011e-06, + "loss": 0.814, + "step": 6231 + }, + { + "epoch": 0.12475539874383805, + "grad_norm": 1.3161890506744385, + "learning_rate": 9.76641622042826e-06, + "loss": 0.3343, + "step": 6232 + }, + { + "epoch": 0.1247754172609664, + "grad_norm": 1.4078068733215332, + "learning_rate": 9.766318281889423e-06, + "loss": 0.336, + "step": 6233 + }, + { + "epoch": 0.12479543577809474, + "grad_norm": 1.6787580251693726, + "learning_rate": 9.766220323314009e-06, + "loss": 0.8856, + "step": 6234 + }, + { + "epoch": 0.12481545429522307, + "grad_norm": 1.1751456260681152, + "learning_rate": 9.76612234470243e-06, + "loss": 0.3077, + "step": 6235 + }, + { + "epoch": 0.12483547281235143, + "grad_norm": 1.1715257167816162, + "learning_rate": 9.766024346055099e-06, + "loss": 0.3685, + "step": 6236 + }, + { + "epoch": 0.12485549132947976, + "grad_norm": 1.1619205474853516, + "learning_rate": 9.765926327372425e-06, + "loss": 0.3605, + "step": 6237 + }, + { + "epoch": 0.12487550984660811, + "grad_norm": 1.1660691499710083, + "learning_rate": 9.765828288654826e-06, + "loss": 0.3544, + "step": 6238 + }, + { + "epoch": 0.12489552836373645, + "grad_norm": 1.354094386100769, + "learning_rate": 9.765730229902708e-06, + "loss": 0.317, + "step": 6239 + }, + { + "epoch": 0.1249155468808648, + "grad_norm": 1.1025446653366089, + "learning_rate": 9.765632151116486e-06, + "loss": 0.3507, + "step": 6240 + }, + { + "epoch": 0.12493556539799314, + "grad_norm": 1.0935924053192139, + "learning_rate": 9.765534052296574e-06, + "loss": 0.3343, + "step": 6241 + }, + { + "epoch": 0.12495558391512149, + "grad_norm": 1.0363564491271973, + "learning_rate": 9.765435933443381e-06, + "loss": 0.2711, + "step": 6242 + }, + { + "epoch": 0.12497560243224982, + "grad_norm": 1.270302176475525, + "learning_rate": 9.76533779455732e-06, + "loss": 0.343, + "step": 6243 + }, + { + "epoch": 0.12499562094937818, + "grad_norm": 0.9972089529037476, + "learning_rate": 9.765239635638805e-06, + "loss": 0.3136, + "step": 6244 + }, + { + "epoch": 0.1250156394665065, + "grad_norm": 1.2389980554580688, + "learning_rate": 9.765141456688249e-06, + "loss": 0.3718, + "step": 6245 + }, + { + "epoch": 0.12503565798363486, + "grad_norm": 1.226765513420105, + "learning_rate": 9.765043257706062e-06, + "loss": 0.3456, + "step": 6246 + }, + { + "epoch": 0.1250556765007632, + "grad_norm": 1.1725748777389526, + "learning_rate": 9.76494503869266e-06, + "loss": 0.3456, + "step": 6247 + }, + { + "epoch": 0.12507569501789154, + "grad_norm": 1.1143162250518799, + "learning_rate": 9.764846799648455e-06, + "loss": 0.3294, + "step": 6248 + }, + { + "epoch": 0.1250957135350199, + "grad_norm": 1.226401448249817, + "learning_rate": 9.764748540573858e-06, + "loss": 0.357, + "step": 6249 + }, + { + "epoch": 0.12511573205214824, + "grad_norm": 1.236059546470642, + "learning_rate": 9.764650261469284e-06, + "loss": 0.3158, + "step": 6250 + }, + { + "epoch": 0.1251357505692766, + "grad_norm": 1.0976401567459106, + "learning_rate": 9.764551962335145e-06, + "loss": 0.2973, + "step": 6251 + }, + { + "epoch": 0.1251557690864049, + "grad_norm": 1.0972721576690674, + "learning_rate": 9.764453643171857e-06, + "loss": 0.2858, + "step": 6252 + }, + { + "epoch": 0.12517578760353326, + "grad_norm": 1.1149942874908447, + "learning_rate": 9.76435530397983e-06, + "loss": 0.2959, + "step": 6253 + }, + { + "epoch": 0.1251958061206616, + "grad_norm": 0.9944084882736206, + "learning_rate": 9.764256944759479e-06, + "loss": 0.3465, + "step": 6254 + }, + { + "epoch": 0.12521582463778996, + "grad_norm": 1.7860376834869385, + "learning_rate": 9.764158565511214e-06, + "loss": 0.8759, + "step": 6255 + }, + { + "epoch": 0.12523584315491829, + "grad_norm": 1.8857566118240356, + "learning_rate": 9.764060166235456e-06, + "loss": 0.8058, + "step": 6256 + }, + { + "epoch": 0.12525586167204664, + "grad_norm": 1.1214383840560913, + "learning_rate": 9.763961746932611e-06, + "loss": 0.3213, + "step": 6257 + }, + { + "epoch": 0.125275880189175, + "grad_norm": 1.0474897623062134, + "learning_rate": 9.763863307603098e-06, + "loss": 0.3119, + "step": 6258 + }, + { + "epoch": 0.12529589870630334, + "grad_norm": 1.0432307720184326, + "learning_rate": 9.76376484824733e-06, + "loss": 0.3357, + "step": 6259 + }, + { + "epoch": 0.12531591722343166, + "grad_norm": 1.1084338426589966, + "learning_rate": 9.763666368865718e-06, + "loss": 0.3393, + "step": 6260 + }, + { + "epoch": 0.12533593574056, + "grad_norm": 1.0565639734268188, + "learning_rate": 9.763567869458677e-06, + "loss": 0.3466, + "step": 6261 + }, + { + "epoch": 0.12535595425768836, + "grad_norm": 1.3191624879837036, + "learning_rate": 9.763469350026622e-06, + "loss": 0.3486, + "step": 6262 + }, + { + "epoch": 0.1253759727748167, + "grad_norm": 1.1076747179031372, + "learning_rate": 9.763370810569968e-06, + "loss": 0.341, + "step": 6263 + }, + { + "epoch": 0.12539599129194504, + "grad_norm": 1.1089152097702026, + "learning_rate": 9.76327225108913e-06, + "loss": 0.358, + "step": 6264 + }, + { + "epoch": 0.1254160098090734, + "grad_norm": 1.167356252670288, + "learning_rate": 9.763173671584515e-06, + "loss": 0.3216, + "step": 6265 + }, + { + "epoch": 0.12543602832620174, + "grad_norm": 1.1484163999557495, + "learning_rate": 9.763075072056547e-06, + "loss": 0.3467, + "step": 6266 + }, + { + "epoch": 0.1254560468433301, + "grad_norm": 1.104707956314087, + "learning_rate": 9.762976452505636e-06, + "loss": 0.374, + "step": 6267 + }, + { + "epoch": 0.1254760653604584, + "grad_norm": 1.0497245788574219, + "learning_rate": 9.762877812932197e-06, + "loss": 0.3357, + "step": 6268 + }, + { + "epoch": 0.12549608387758676, + "grad_norm": 1.0947792530059814, + "learning_rate": 9.762779153336645e-06, + "loss": 0.3674, + "step": 6269 + }, + { + "epoch": 0.1255161023947151, + "grad_norm": 0.9710773229598999, + "learning_rate": 9.762680473719394e-06, + "loss": 0.3217, + "step": 6270 + }, + { + "epoch": 0.12553612091184346, + "grad_norm": 1.8043075799942017, + "learning_rate": 9.762581774080857e-06, + "loss": 0.7875, + "step": 6271 + }, + { + "epoch": 0.12555613942897179, + "grad_norm": 1.2026680707931519, + "learning_rate": 9.762483054421454e-06, + "loss": 0.3615, + "step": 6272 + }, + { + "epoch": 0.12557615794610014, + "grad_norm": 1.1837992668151855, + "learning_rate": 9.762384314741598e-06, + "loss": 0.4226, + "step": 6273 + }, + { + "epoch": 0.1255961764632285, + "grad_norm": 1.0433382987976074, + "learning_rate": 9.7622855550417e-06, + "loss": 0.3518, + "step": 6274 + }, + { + "epoch": 0.12561619498035684, + "grad_norm": 1.212459921836853, + "learning_rate": 9.76218677532218e-06, + "loss": 0.3304, + "step": 6275 + }, + { + "epoch": 0.12563621349748516, + "grad_norm": 1.1226568222045898, + "learning_rate": 9.762087975583452e-06, + "loss": 0.3715, + "step": 6276 + }, + { + "epoch": 0.1256562320146135, + "grad_norm": 1.075540542602539, + "learning_rate": 9.761989155825931e-06, + "loss": 0.293, + "step": 6277 + }, + { + "epoch": 0.12567625053174186, + "grad_norm": 1.178351640701294, + "learning_rate": 9.76189031605003e-06, + "loss": 0.354, + "step": 6278 + }, + { + "epoch": 0.1256962690488702, + "grad_norm": 1.1021300554275513, + "learning_rate": 9.76179145625617e-06, + "loss": 0.3024, + "step": 6279 + }, + { + "epoch": 0.12571628756599854, + "grad_norm": 1.2460631132125854, + "learning_rate": 9.761692576444762e-06, + "loss": 0.4281, + "step": 6280 + }, + { + "epoch": 0.1257363060831269, + "grad_norm": 1.055121898651123, + "learning_rate": 9.761593676616223e-06, + "loss": 0.3614, + "step": 6281 + }, + { + "epoch": 0.12575632460025524, + "grad_norm": 1.0512710809707642, + "learning_rate": 9.76149475677097e-06, + "loss": 0.3184, + "step": 6282 + }, + { + "epoch": 0.1257763431173836, + "grad_norm": 1.852571725845337, + "learning_rate": 9.761395816909416e-06, + "loss": 0.847, + "step": 6283 + }, + { + "epoch": 0.1257963616345119, + "grad_norm": 1.0368530750274658, + "learning_rate": 9.76129685703198e-06, + "loss": 0.2775, + "step": 6284 + }, + { + "epoch": 0.12581638015164026, + "grad_norm": 1.248214602470398, + "learning_rate": 9.761197877139077e-06, + "loss": 0.3591, + "step": 6285 + }, + { + "epoch": 0.1258363986687686, + "grad_norm": 1.0746017694473267, + "learning_rate": 9.76109887723112e-06, + "loss": 0.3109, + "step": 6286 + }, + { + "epoch": 0.12585641718589696, + "grad_norm": 1.2107523679733276, + "learning_rate": 9.76099985730853e-06, + "loss": 0.3725, + "step": 6287 + }, + { + "epoch": 0.12587643570302529, + "grad_norm": 1.1477288007736206, + "learning_rate": 9.760900817371722e-06, + "loss": 0.3746, + "step": 6288 + }, + { + "epoch": 0.12589645422015364, + "grad_norm": 1.0295555591583252, + "learning_rate": 9.760801757421111e-06, + "loss": 0.3046, + "step": 6289 + }, + { + "epoch": 0.125916472737282, + "grad_norm": 1.79914391040802, + "learning_rate": 9.760702677457114e-06, + "loss": 0.8922, + "step": 6290 + }, + { + "epoch": 0.12593649125441034, + "grad_norm": 1.0428630113601685, + "learning_rate": 9.760603577480147e-06, + "loss": 0.3034, + "step": 6291 + }, + { + "epoch": 0.12595650977153866, + "grad_norm": 1.158469319343567, + "learning_rate": 9.760504457490626e-06, + "loss": 0.3204, + "step": 6292 + }, + { + "epoch": 0.125976528288667, + "grad_norm": 1.3114768266677856, + "learning_rate": 9.76040531748897e-06, + "loss": 0.34, + "step": 6293 + }, + { + "epoch": 0.12599654680579536, + "grad_norm": 1.0637744665145874, + "learning_rate": 9.760306157475593e-06, + "loss": 0.3172, + "step": 6294 + }, + { + "epoch": 0.1260165653229237, + "grad_norm": 1.1680506467819214, + "learning_rate": 9.760206977450917e-06, + "loss": 0.3325, + "step": 6295 + }, + { + "epoch": 0.12603658384005204, + "grad_norm": 1.0470678806304932, + "learning_rate": 9.760107777415352e-06, + "loss": 0.3889, + "step": 6296 + }, + { + "epoch": 0.1260566023571804, + "grad_norm": 1.054765224456787, + "learning_rate": 9.760008557369321e-06, + "loss": 0.3337, + "step": 6297 + }, + { + "epoch": 0.12607662087430874, + "grad_norm": 1.078279733657837, + "learning_rate": 9.759909317313235e-06, + "loss": 0.3111, + "step": 6298 + }, + { + "epoch": 0.1260966393914371, + "grad_norm": 1.0656626224517822, + "learning_rate": 9.759810057247516e-06, + "loss": 0.3403, + "step": 6299 + }, + { + "epoch": 0.1261166579085654, + "grad_norm": 1.0981663465499878, + "learning_rate": 9.75971077717258e-06, + "loss": 0.3089, + "step": 6300 + }, + { + "epoch": 0.12613667642569376, + "grad_norm": 1.1095492839813232, + "learning_rate": 9.759611477088843e-06, + "loss": 0.3634, + "step": 6301 + }, + { + "epoch": 0.1261566949428221, + "grad_norm": 1.9856692552566528, + "learning_rate": 9.759512156996726e-06, + "loss": 0.9018, + "step": 6302 + }, + { + "epoch": 0.12617671345995046, + "grad_norm": 1.0501636266708374, + "learning_rate": 9.759412816896643e-06, + "loss": 0.3483, + "step": 6303 + }, + { + "epoch": 0.12619673197707879, + "grad_norm": 1.3970680236816406, + "learning_rate": 9.759313456789012e-06, + "loss": 0.3344, + "step": 6304 + }, + { + "epoch": 0.12621675049420714, + "grad_norm": 1.1492431163787842, + "learning_rate": 9.759214076674252e-06, + "loss": 0.3354, + "step": 6305 + }, + { + "epoch": 0.1262367690113355, + "grad_norm": 1.0724592208862305, + "learning_rate": 9.759114676552782e-06, + "loss": 0.2939, + "step": 6306 + }, + { + "epoch": 0.12625678752846384, + "grad_norm": 1.0507043600082397, + "learning_rate": 9.759015256425017e-06, + "loss": 0.3139, + "step": 6307 + }, + { + "epoch": 0.12627680604559216, + "grad_norm": 1.841373085975647, + "learning_rate": 9.758915816291374e-06, + "loss": 0.8391, + "step": 6308 + }, + { + "epoch": 0.1262968245627205, + "grad_norm": 1.1300442218780518, + "learning_rate": 9.758816356152275e-06, + "loss": 0.3, + "step": 6309 + }, + { + "epoch": 0.12631684307984886, + "grad_norm": 1.0877799987792969, + "learning_rate": 9.758716876008136e-06, + "loss": 0.3434, + "step": 6310 + }, + { + "epoch": 0.1263368615969772, + "grad_norm": 1.1412862539291382, + "learning_rate": 9.758617375859374e-06, + "loss": 0.2989, + "step": 6311 + }, + { + "epoch": 0.12635688011410554, + "grad_norm": 1.126047134399414, + "learning_rate": 9.75851785570641e-06, + "loss": 0.3212, + "step": 6312 + }, + { + "epoch": 0.12637689863123389, + "grad_norm": 1.2450473308563232, + "learning_rate": 9.758418315549662e-06, + "loss": 0.3565, + "step": 6313 + }, + { + "epoch": 0.12639691714836224, + "grad_norm": 1.0557701587677002, + "learning_rate": 9.758318755389546e-06, + "loss": 0.3136, + "step": 6314 + }, + { + "epoch": 0.1264169356654906, + "grad_norm": 1.132950782775879, + "learning_rate": 9.758219175226483e-06, + "loss": 0.3346, + "step": 6315 + }, + { + "epoch": 0.1264369541826189, + "grad_norm": 1.180273413658142, + "learning_rate": 9.758119575060887e-06, + "loss": 0.2899, + "step": 6316 + }, + { + "epoch": 0.12645697269974726, + "grad_norm": 1.050392746925354, + "learning_rate": 9.758019954893183e-06, + "loss": 0.3653, + "step": 6317 + }, + { + "epoch": 0.1264769912168756, + "grad_norm": 1.1520836353302002, + "learning_rate": 9.757920314723787e-06, + "loss": 0.3141, + "step": 6318 + }, + { + "epoch": 0.12649700973400396, + "grad_norm": 1.5148073434829712, + "learning_rate": 9.757820654553119e-06, + "loss": 0.3297, + "step": 6319 + }, + { + "epoch": 0.12651702825113229, + "grad_norm": 1.890015959739685, + "learning_rate": 9.757720974381594e-06, + "loss": 0.9442, + "step": 6320 + }, + { + "epoch": 0.12653704676826064, + "grad_norm": 1.1184862852096558, + "learning_rate": 9.757621274209636e-06, + "loss": 0.2756, + "step": 6321 + }, + { + "epoch": 0.126557065285389, + "grad_norm": 1.1319047212600708, + "learning_rate": 9.757521554037663e-06, + "loss": 0.3509, + "step": 6322 + }, + { + "epoch": 0.12657708380251734, + "grad_norm": 1.1592198610305786, + "learning_rate": 9.75742181386609e-06, + "loss": 0.3723, + "step": 6323 + }, + { + "epoch": 0.12659710231964566, + "grad_norm": 1.1659363508224487, + "learning_rate": 9.757322053695342e-06, + "loss": 0.3338, + "step": 6324 + }, + { + "epoch": 0.126617120836774, + "grad_norm": 1.0215429067611694, + "learning_rate": 9.757222273525835e-06, + "loss": 0.2708, + "step": 6325 + }, + { + "epoch": 0.12663713935390236, + "grad_norm": 1.1121952533721924, + "learning_rate": 9.75712247335799e-06, + "loss": 0.2816, + "step": 6326 + }, + { + "epoch": 0.1266571578710307, + "grad_norm": 1.1700729131698608, + "learning_rate": 9.757022653192225e-06, + "loss": 0.2887, + "step": 6327 + }, + { + "epoch": 0.12667717638815904, + "grad_norm": 1.0656797885894775, + "learning_rate": 9.756922813028962e-06, + "loss": 0.3212, + "step": 6328 + }, + { + "epoch": 0.12669719490528739, + "grad_norm": 1.1211776733398438, + "learning_rate": 9.756822952868616e-06, + "loss": 0.3164, + "step": 6329 + }, + { + "epoch": 0.12671721342241574, + "grad_norm": 1.0847185850143433, + "learning_rate": 9.756723072711614e-06, + "loss": 0.2706, + "step": 6330 + }, + { + "epoch": 0.1267372319395441, + "grad_norm": 1.8647958040237427, + "learning_rate": 9.756623172558369e-06, + "loss": 0.8421, + "step": 6331 + }, + { + "epoch": 0.1267572504566724, + "grad_norm": 1.2099066972732544, + "learning_rate": 9.756523252409305e-06, + "loss": 0.3418, + "step": 6332 + }, + { + "epoch": 0.12677726897380076, + "grad_norm": 1.2113299369812012, + "learning_rate": 9.75642331226484e-06, + "loss": 0.3186, + "step": 6333 + }, + { + "epoch": 0.1267972874909291, + "grad_norm": 1.046597957611084, + "learning_rate": 9.756323352125396e-06, + "loss": 0.3496, + "step": 6334 + }, + { + "epoch": 0.12681730600805746, + "grad_norm": 1.058515191078186, + "learning_rate": 9.756223371991391e-06, + "loss": 0.2987, + "step": 6335 + }, + { + "epoch": 0.12683732452518579, + "grad_norm": 1.0299433469772339, + "learning_rate": 9.756123371863248e-06, + "loss": 0.3016, + "step": 6336 + }, + { + "epoch": 0.12685734304231414, + "grad_norm": 1.1406701803207397, + "learning_rate": 9.756023351741384e-06, + "loss": 0.3199, + "step": 6337 + }, + { + "epoch": 0.1268773615594425, + "grad_norm": 1.1682183742523193, + "learning_rate": 9.755923311626224e-06, + "loss": 0.3635, + "step": 6338 + }, + { + "epoch": 0.12689738007657084, + "grad_norm": 1.1499614715576172, + "learning_rate": 9.755823251518184e-06, + "loss": 0.304, + "step": 6339 + }, + { + "epoch": 0.12691739859369916, + "grad_norm": 1.2118921279907227, + "learning_rate": 9.755723171417686e-06, + "loss": 0.3271, + "step": 6340 + }, + { + "epoch": 0.1269374171108275, + "grad_norm": 1.1943105459213257, + "learning_rate": 9.755623071325153e-06, + "loss": 0.365, + "step": 6341 + }, + { + "epoch": 0.12695743562795586, + "grad_norm": 1.0655102729797363, + "learning_rate": 9.755522951241004e-06, + "loss": 0.305, + "step": 6342 + }, + { + "epoch": 0.1269774541450842, + "grad_norm": 1.9730840921401978, + "learning_rate": 9.755422811165657e-06, + "loss": 0.8653, + "step": 6343 + }, + { + "epoch": 0.12699747266221253, + "grad_norm": 1.1212098598480225, + "learning_rate": 9.755322651099539e-06, + "loss": 0.3137, + "step": 6344 + }, + { + "epoch": 0.12701749117934089, + "grad_norm": 1.1232235431671143, + "learning_rate": 9.755222471043067e-06, + "loss": 0.3337, + "step": 6345 + }, + { + "epoch": 0.12703750969646924, + "grad_norm": 1.2112985849380493, + "learning_rate": 9.755122270996663e-06, + "loss": 0.3211, + "step": 6346 + }, + { + "epoch": 0.1270575282135976, + "grad_norm": 1.0915533304214478, + "learning_rate": 9.755022050960747e-06, + "loss": 0.3297, + "step": 6347 + }, + { + "epoch": 0.1270775467307259, + "grad_norm": 1.209226369857788, + "learning_rate": 9.754921810935743e-06, + "loss": 0.3939, + "step": 6348 + }, + { + "epoch": 0.12709756524785426, + "grad_norm": 1.0549256801605225, + "learning_rate": 9.754821550922071e-06, + "loss": 0.3205, + "step": 6349 + }, + { + "epoch": 0.1271175837649826, + "grad_norm": 1.2050505876541138, + "learning_rate": 9.754721270920152e-06, + "loss": 0.3325, + "step": 6350 + }, + { + "epoch": 0.12713760228211096, + "grad_norm": 1.1102712154388428, + "learning_rate": 9.75462097093041e-06, + "loss": 0.3086, + "step": 6351 + }, + { + "epoch": 0.12715762079923928, + "grad_norm": 1.0386483669281006, + "learning_rate": 9.75452065095326e-06, + "loss": 0.3115, + "step": 6352 + }, + { + "epoch": 0.12717763931636764, + "grad_norm": 1.1935067176818848, + "learning_rate": 9.754420310989132e-06, + "loss": 0.3856, + "step": 6353 + }, + { + "epoch": 0.127197657833496, + "grad_norm": 1.1027631759643555, + "learning_rate": 9.754319951038445e-06, + "loss": 0.3053, + "step": 6354 + }, + { + "epoch": 0.12721767635062434, + "grad_norm": 1.0617579221725464, + "learning_rate": 9.754219571101618e-06, + "loss": 0.3192, + "step": 6355 + }, + { + "epoch": 0.12723769486775266, + "grad_norm": 0.9793545603752136, + "learning_rate": 9.754119171179074e-06, + "loss": 0.2667, + "step": 6356 + }, + { + "epoch": 0.127257713384881, + "grad_norm": 1.4707568883895874, + "learning_rate": 9.754018751271239e-06, + "loss": 0.3233, + "step": 6357 + }, + { + "epoch": 0.12727773190200936, + "grad_norm": 1.0128155946731567, + "learning_rate": 9.753918311378531e-06, + "loss": 0.3433, + "step": 6358 + }, + { + "epoch": 0.1272977504191377, + "grad_norm": 1.1046375036239624, + "learning_rate": 9.753817851501373e-06, + "loss": 0.3398, + "step": 6359 + }, + { + "epoch": 0.12731776893626603, + "grad_norm": 1.12384831905365, + "learning_rate": 9.753717371640187e-06, + "loss": 0.336, + "step": 6360 + }, + { + "epoch": 0.12733778745339439, + "grad_norm": 1.11854887008667, + "learning_rate": 9.753616871795398e-06, + "loss": 0.3466, + "step": 6361 + }, + { + "epoch": 0.12735780597052274, + "grad_norm": 1.1668343544006348, + "learning_rate": 9.753516351967427e-06, + "loss": 0.3376, + "step": 6362 + }, + { + "epoch": 0.1273778244876511, + "grad_norm": 1.1635265350341797, + "learning_rate": 9.753415812156694e-06, + "loss": 0.3435, + "step": 6363 + }, + { + "epoch": 0.1273978430047794, + "grad_norm": 1.1926460266113281, + "learning_rate": 9.753315252363625e-06, + "loss": 0.4093, + "step": 6364 + }, + { + "epoch": 0.12741786152190776, + "grad_norm": 1.1516090631484985, + "learning_rate": 9.753214672588643e-06, + "loss": 0.3415, + "step": 6365 + }, + { + "epoch": 0.1274378800390361, + "grad_norm": 1.0941046476364136, + "learning_rate": 9.753114072832167e-06, + "loss": 0.3225, + "step": 6366 + }, + { + "epoch": 0.12745789855616446, + "grad_norm": 1.8195669651031494, + "learning_rate": 9.753013453094624e-06, + "loss": 0.828, + "step": 6367 + }, + { + "epoch": 0.12747791707329278, + "grad_norm": 1.0885564088821411, + "learning_rate": 9.752912813376434e-06, + "loss": 0.3386, + "step": 6368 + }, + { + "epoch": 0.12749793559042114, + "grad_norm": 1.1294115781784058, + "learning_rate": 9.752812153678021e-06, + "loss": 0.3305, + "step": 6369 + }, + { + "epoch": 0.12751795410754949, + "grad_norm": 1.064098834991455, + "learning_rate": 9.75271147399981e-06, + "loss": 0.3211, + "step": 6370 + }, + { + "epoch": 0.12753797262467784, + "grad_norm": 0.9624274373054504, + "learning_rate": 9.752610774342224e-06, + "loss": 0.3037, + "step": 6371 + }, + { + "epoch": 0.12755799114180616, + "grad_norm": 1.0374432802200317, + "learning_rate": 9.752510054705683e-06, + "loss": 0.3954, + "step": 6372 + }, + { + "epoch": 0.1275780096589345, + "grad_norm": 1.2398273944854736, + "learning_rate": 9.752409315090613e-06, + "loss": 0.3247, + "step": 6373 + }, + { + "epoch": 0.12759802817606286, + "grad_norm": 1.237375259399414, + "learning_rate": 9.752308555497436e-06, + "loss": 0.3237, + "step": 6374 + }, + { + "epoch": 0.1276180466931912, + "grad_norm": 1.1332165002822876, + "learning_rate": 9.752207775926578e-06, + "loss": 0.3187, + "step": 6375 + }, + { + "epoch": 0.12763806521031953, + "grad_norm": 1.2214890718460083, + "learning_rate": 9.752106976378462e-06, + "loss": 0.3445, + "step": 6376 + }, + { + "epoch": 0.12765808372744789, + "grad_norm": 1.0913110971450806, + "learning_rate": 9.75200615685351e-06, + "loss": 0.3702, + "step": 6377 + }, + { + "epoch": 0.12767810224457624, + "grad_norm": 1.056366205215454, + "learning_rate": 9.751905317352143e-06, + "loss": 0.3212, + "step": 6378 + }, + { + "epoch": 0.1276981207617046, + "grad_norm": 1.1594728231430054, + "learning_rate": 9.751804457874794e-06, + "loss": 0.326, + "step": 6379 + }, + { + "epoch": 0.1277181392788329, + "grad_norm": 1.2658147811889648, + "learning_rate": 9.75170357842188e-06, + "loss": 0.3742, + "step": 6380 + }, + { + "epoch": 0.12773815779596126, + "grad_norm": 0.9856467247009277, + "learning_rate": 9.751602678993825e-06, + "loss": 0.2974, + "step": 6381 + }, + { + "epoch": 0.1277581763130896, + "grad_norm": 1.2191824913024902, + "learning_rate": 9.751501759591057e-06, + "loss": 0.3105, + "step": 6382 + }, + { + "epoch": 0.12777819483021796, + "grad_norm": 1.0267612934112549, + "learning_rate": 9.751400820213996e-06, + "loss": 0.3835, + "step": 6383 + }, + { + "epoch": 0.12779821334734628, + "grad_norm": 1.1053544282913208, + "learning_rate": 9.75129986086307e-06, + "loss": 0.3234, + "step": 6384 + }, + { + "epoch": 0.12781823186447464, + "grad_norm": 1.130990982055664, + "learning_rate": 9.751198881538703e-06, + "loss": 0.386, + "step": 6385 + }, + { + "epoch": 0.12783825038160299, + "grad_norm": 1.1332812309265137, + "learning_rate": 9.751097882241317e-06, + "loss": 0.3615, + "step": 6386 + }, + { + "epoch": 0.12785826889873134, + "grad_norm": 1.1768543720245361, + "learning_rate": 9.750996862971339e-06, + "loss": 0.3116, + "step": 6387 + }, + { + "epoch": 0.12787828741585966, + "grad_norm": 1.0991284847259521, + "learning_rate": 9.75089582372919e-06, + "loss": 0.3125, + "step": 6388 + }, + { + "epoch": 0.127898305932988, + "grad_norm": 1.0028953552246094, + "learning_rate": 9.7507947645153e-06, + "loss": 0.3516, + "step": 6389 + }, + { + "epoch": 0.12791832445011636, + "grad_norm": 1.0549436807632446, + "learning_rate": 9.75069368533009e-06, + "loss": 0.3641, + "step": 6390 + }, + { + "epoch": 0.1279383429672447, + "grad_norm": 1.2594190835952759, + "learning_rate": 9.750592586173985e-06, + "loss": 0.3568, + "step": 6391 + }, + { + "epoch": 0.12795836148437303, + "grad_norm": 1.7131150960922241, + "learning_rate": 9.750491467047414e-06, + "loss": 0.9165, + "step": 6392 + }, + { + "epoch": 0.12797838000150139, + "grad_norm": 1.1322869062423706, + "learning_rate": 9.750390327950798e-06, + "loss": 0.3485, + "step": 6393 + }, + { + "epoch": 0.12799839851862974, + "grad_norm": 1.0896422863006592, + "learning_rate": 9.750289168884562e-06, + "loss": 0.2984, + "step": 6394 + }, + { + "epoch": 0.1280184170357581, + "grad_norm": 1.763944149017334, + "learning_rate": 9.750187989849134e-06, + "loss": 0.831, + "step": 6395 + }, + { + "epoch": 0.1280384355528864, + "grad_norm": 1.2450103759765625, + "learning_rate": 9.750086790844937e-06, + "loss": 0.3698, + "step": 6396 + }, + { + "epoch": 0.12805845407001476, + "grad_norm": 1.1224535703659058, + "learning_rate": 9.749985571872397e-06, + "loss": 0.3181, + "step": 6397 + }, + { + "epoch": 0.1280784725871431, + "grad_norm": 1.2008520364761353, + "learning_rate": 9.749884332931942e-06, + "loss": 0.3678, + "step": 6398 + }, + { + "epoch": 0.12809849110427146, + "grad_norm": 1.080175518989563, + "learning_rate": 9.749783074023994e-06, + "loss": 0.3273, + "step": 6399 + }, + { + "epoch": 0.12811850962139978, + "grad_norm": 1.1309335231781006, + "learning_rate": 9.749681795148981e-06, + "loss": 0.3032, + "step": 6400 + }, + { + "epoch": 0.12813852813852813, + "grad_norm": 1.1081855297088623, + "learning_rate": 9.749580496307327e-06, + "loss": 0.3128, + "step": 6401 + }, + { + "epoch": 0.12815854665565649, + "grad_norm": 1.0338574647903442, + "learning_rate": 9.749479177499459e-06, + "loss": 0.2805, + "step": 6402 + }, + { + "epoch": 0.12817856517278484, + "grad_norm": 1.4481492042541504, + "learning_rate": 9.749377838725803e-06, + "loss": 0.3203, + "step": 6403 + }, + { + "epoch": 0.12819858368991316, + "grad_norm": 1.8739150762557983, + "learning_rate": 9.749276479986784e-06, + "loss": 0.8536, + "step": 6404 + }, + { + "epoch": 0.1282186022070415, + "grad_norm": 1.8115801811218262, + "learning_rate": 9.749175101282828e-06, + "loss": 0.8418, + "step": 6405 + }, + { + "epoch": 0.12823862072416986, + "grad_norm": 1.0875650644302368, + "learning_rate": 9.749073702614364e-06, + "loss": 0.3409, + "step": 6406 + }, + { + "epoch": 0.1282586392412982, + "grad_norm": 1.040429949760437, + "learning_rate": 9.748972283981815e-06, + "loss": 0.3227, + "step": 6407 + }, + { + "epoch": 0.12827865775842653, + "grad_norm": 1.1416816711425781, + "learning_rate": 9.748870845385607e-06, + "loss": 0.3228, + "step": 6408 + }, + { + "epoch": 0.12829867627555488, + "grad_norm": 1.0671026706695557, + "learning_rate": 9.748769386826172e-06, + "loss": 0.3251, + "step": 6409 + }, + { + "epoch": 0.12831869479268324, + "grad_norm": 1.0871573686599731, + "learning_rate": 9.748667908303928e-06, + "loss": 0.3265, + "step": 6410 + }, + { + "epoch": 0.1283387133098116, + "grad_norm": 0.946879506111145, + "learning_rate": 9.74856640981931e-06, + "loss": 0.3138, + "step": 6411 + }, + { + "epoch": 0.1283587318269399, + "grad_norm": 1.9220916032791138, + "learning_rate": 9.748464891372738e-06, + "loss": 0.8546, + "step": 6412 + }, + { + "epoch": 0.12837875034406826, + "grad_norm": 1.1280403137207031, + "learning_rate": 9.74836335296464e-06, + "loss": 0.3645, + "step": 6413 + }, + { + "epoch": 0.1283987688611966, + "grad_norm": 1.0791360139846802, + "learning_rate": 9.748261794595448e-06, + "loss": 0.3606, + "step": 6414 + }, + { + "epoch": 0.12841878737832496, + "grad_norm": 1.8433213233947754, + "learning_rate": 9.748160216265584e-06, + "loss": 0.88, + "step": 6415 + }, + { + "epoch": 0.12843880589545328, + "grad_norm": 1.2154048681259155, + "learning_rate": 9.748058617975476e-06, + "loss": 0.3901, + "step": 6416 + }, + { + "epoch": 0.12845882441258163, + "grad_norm": 1.0566126108169556, + "learning_rate": 9.74795699972555e-06, + "loss": 0.361, + "step": 6417 + }, + { + "epoch": 0.12847884292970999, + "grad_norm": 1.1572256088256836, + "learning_rate": 9.747855361516236e-06, + "loss": 0.3301, + "step": 6418 + }, + { + "epoch": 0.12849886144683834, + "grad_norm": 1.1123520135879517, + "learning_rate": 9.74775370334796e-06, + "loss": 0.3724, + "step": 6419 + }, + { + "epoch": 0.12851887996396666, + "grad_norm": 1.1592282056808472, + "learning_rate": 9.74765202522115e-06, + "loss": 0.3047, + "step": 6420 + }, + { + "epoch": 0.128538898481095, + "grad_norm": 0.9630717635154724, + "learning_rate": 9.74755032713623e-06, + "loss": 0.3267, + "step": 6421 + }, + { + "epoch": 0.12855891699822336, + "grad_norm": 1.7274537086486816, + "learning_rate": 9.747448609093632e-06, + "loss": 0.8373, + "step": 6422 + }, + { + "epoch": 0.1285789355153517, + "grad_norm": 1.0688493251800537, + "learning_rate": 9.747346871093782e-06, + "loss": 0.3655, + "step": 6423 + }, + { + "epoch": 0.12859895403248003, + "grad_norm": 1.1006789207458496, + "learning_rate": 9.747245113137107e-06, + "loss": 0.3507, + "step": 6424 + }, + { + "epoch": 0.12861897254960838, + "grad_norm": 1.8732939958572388, + "learning_rate": 9.747143335224034e-06, + "loss": 0.8506, + "step": 6425 + }, + { + "epoch": 0.12863899106673674, + "grad_norm": 1.7333544492721558, + "learning_rate": 9.747041537354993e-06, + "loss": 0.8634, + "step": 6426 + }, + { + "epoch": 0.12865900958386509, + "grad_norm": 1.8051646947860718, + "learning_rate": 9.74693971953041e-06, + "loss": 0.8309, + "step": 6427 + }, + { + "epoch": 0.1286790281009934, + "grad_norm": 1.179795503616333, + "learning_rate": 9.746837881750714e-06, + "loss": 0.3428, + "step": 6428 + }, + { + "epoch": 0.12869904661812176, + "grad_norm": 1.1194993257522583, + "learning_rate": 9.746736024016335e-06, + "loss": 0.3519, + "step": 6429 + }, + { + "epoch": 0.1287190651352501, + "grad_norm": 1.245816946029663, + "learning_rate": 9.746634146327698e-06, + "loss": 0.361, + "step": 6430 + }, + { + "epoch": 0.12873908365237846, + "grad_norm": 1.084832787513733, + "learning_rate": 9.746532248685233e-06, + "loss": 0.2964, + "step": 6431 + }, + { + "epoch": 0.12875910216950678, + "grad_norm": 1.169694423675537, + "learning_rate": 9.746430331089367e-06, + "loss": 0.3349, + "step": 6432 + }, + { + "epoch": 0.12877912068663513, + "grad_norm": 1.0745455026626587, + "learning_rate": 9.74632839354053e-06, + "loss": 0.3891, + "step": 6433 + }, + { + "epoch": 0.12879913920376349, + "grad_norm": 1.874819278717041, + "learning_rate": 9.74622643603915e-06, + "loss": 0.8011, + "step": 6434 + }, + { + "epoch": 0.12881915772089184, + "grad_norm": 0.9765132665634155, + "learning_rate": 9.746124458585656e-06, + "loss": 0.3089, + "step": 6435 + }, + { + "epoch": 0.12883917623802016, + "grad_norm": 1.0747318267822266, + "learning_rate": 9.746022461180474e-06, + "loss": 0.3813, + "step": 6436 + }, + { + "epoch": 0.1288591947551485, + "grad_norm": 1.150234341621399, + "learning_rate": 9.745920443824037e-06, + "loss": 0.3228, + "step": 6437 + }, + { + "epoch": 0.12887921327227686, + "grad_norm": 1.1359777450561523, + "learning_rate": 9.745818406516771e-06, + "loss": 0.3426, + "step": 6438 + }, + { + "epoch": 0.1288992317894052, + "grad_norm": 1.8417631387710571, + "learning_rate": 9.745716349259105e-06, + "loss": 0.9135, + "step": 6439 + }, + { + "epoch": 0.12891925030653353, + "grad_norm": 1.1859664916992188, + "learning_rate": 9.74561427205147e-06, + "loss": 0.342, + "step": 6440 + }, + { + "epoch": 0.12893926882366188, + "grad_norm": 1.0652467012405396, + "learning_rate": 9.745512174894294e-06, + "loss": 0.3208, + "step": 6441 + }, + { + "epoch": 0.12895928734079024, + "grad_norm": 1.248525619506836, + "learning_rate": 9.745410057788005e-06, + "loss": 0.3421, + "step": 6442 + }, + { + "epoch": 0.12897930585791859, + "grad_norm": 1.1244655847549438, + "learning_rate": 9.745307920733034e-06, + "loss": 0.3727, + "step": 6443 + }, + { + "epoch": 0.1289993243750469, + "grad_norm": 1.000389575958252, + "learning_rate": 9.74520576372981e-06, + "loss": 0.356, + "step": 6444 + }, + { + "epoch": 0.12901934289217526, + "grad_norm": 1.134397268295288, + "learning_rate": 9.745103586778761e-06, + "loss": 0.3219, + "step": 6445 + }, + { + "epoch": 0.1290393614093036, + "grad_norm": 1.1119928359985352, + "learning_rate": 9.74500138988032e-06, + "loss": 0.3324, + "step": 6446 + }, + { + "epoch": 0.12905937992643196, + "grad_norm": 1.321523904800415, + "learning_rate": 9.744899173034912e-06, + "loss": 0.3584, + "step": 6447 + }, + { + "epoch": 0.12907939844356028, + "grad_norm": 1.0584375858306885, + "learning_rate": 9.744796936242968e-06, + "loss": 0.3228, + "step": 6448 + }, + { + "epoch": 0.12909941696068863, + "grad_norm": 1.0555955171585083, + "learning_rate": 9.74469467950492e-06, + "loss": 0.3261, + "step": 6449 + }, + { + "epoch": 0.12911943547781699, + "grad_norm": 1.231580138206482, + "learning_rate": 9.744592402821198e-06, + "loss": 0.3152, + "step": 6450 + }, + { + "epoch": 0.12913945399494534, + "grad_norm": 1.1654595136642456, + "learning_rate": 9.74449010619223e-06, + "loss": 0.3165, + "step": 6451 + }, + { + "epoch": 0.12915947251207366, + "grad_norm": 1.0984246730804443, + "learning_rate": 9.744387789618446e-06, + "loss": 0.3404, + "step": 6452 + }, + { + "epoch": 0.129179491029202, + "grad_norm": 1.1310163736343384, + "learning_rate": 9.744285453100277e-06, + "loss": 0.3537, + "step": 6453 + }, + { + "epoch": 0.12919950954633036, + "grad_norm": 1.267899513244629, + "learning_rate": 9.744183096638154e-06, + "loss": 0.329, + "step": 6454 + }, + { + "epoch": 0.1292195280634587, + "grad_norm": 1.0613025426864624, + "learning_rate": 9.744080720232504e-06, + "loss": 0.2752, + "step": 6455 + }, + { + "epoch": 0.12923954658058703, + "grad_norm": 1.1127252578735352, + "learning_rate": 9.74397832388376e-06, + "loss": 0.3074, + "step": 6456 + }, + { + "epoch": 0.12925956509771538, + "grad_norm": 1.2872743606567383, + "learning_rate": 9.743875907592352e-06, + "loss": 0.3417, + "step": 6457 + }, + { + "epoch": 0.12927958361484373, + "grad_norm": 1.1507816314697266, + "learning_rate": 9.743773471358713e-06, + "loss": 0.3642, + "step": 6458 + }, + { + "epoch": 0.12929960213197209, + "grad_norm": 1.0743318796157837, + "learning_rate": 9.743671015183269e-06, + "loss": 0.3326, + "step": 6459 + }, + { + "epoch": 0.1293196206491004, + "grad_norm": 1.0309035778045654, + "learning_rate": 9.743568539066454e-06, + "loss": 0.2794, + "step": 6460 + }, + { + "epoch": 0.12933963916622876, + "grad_norm": 1.0733654499053955, + "learning_rate": 9.743466043008697e-06, + "loss": 0.3068, + "step": 6461 + }, + { + "epoch": 0.1293596576833571, + "grad_norm": 1.46000075340271, + "learning_rate": 9.74336352701043e-06, + "loss": 0.306, + "step": 6462 + }, + { + "epoch": 0.12937967620048546, + "grad_norm": 1.090735912322998, + "learning_rate": 9.743260991072083e-06, + "loss": 0.3442, + "step": 6463 + }, + { + "epoch": 0.12939969471761378, + "grad_norm": 1.1630405187606812, + "learning_rate": 9.743158435194089e-06, + "loss": 0.3412, + "step": 6464 + }, + { + "epoch": 0.12941971323474213, + "grad_norm": 1.0259557962417603, + "learning_rate": 9.743055859376875e-06, + "loss": 0.3146, + "step": 6465 + }, + { + "epoch": 0.12943973175187048, + "grad_norm": 1.0460166931152344, + "learning_rate": 9.742953263620876e-06, + "loss": 0.3275, + "step": 6466 + }, + { + "epoch": 0.12945975026899884, + "grad_norm": 1.0478922128677368, + "learning_rate": 9.742850647926522e-06, + "loss": 0.3379, + "step": 6467 + }, + { + "epoch": 0.12947976878612716, + "grad_norm": 1.1357595920562744, + "learning_rate": 9.742748012294245e-06, + "loss": 0.3383, + "step": 6468 + }, + { + "epoch": 0.1294997873032555, + "grad_norm": 1.1672873497009277, + "learning_rate": 9.742645356724477e-06, + "loss": 0.3721, + "step": 6469 + }, + { + "epoch": 0.12951980582038386, + "grad_norm": 1.5937448740005493, + "learning_rate": 9.742542681217649e-06, + "loss": 0.3835, + "step": 6470 + }, + { + "epoch": 0.1295398243375122, + "grad_norm": 1.1423635482788086, + "learning_rate": 9.74243998577419e-06, + "loss": 0.2937, + "step": 6471 + }, + { + "epoch": 0.12955984285464053, + "grad_norm": 1.0459821224212646, + "learning_rate": 9.742337270394534e-06, + "loss": 0.3362, + "step": 6472 + }, + { + "epoch": 0.12957986137176888, + "grad_norm": 1.183976173400879, + "learning_rate": 9.742234535079113e-06, + "loss": 0.3026, + "step": 6473 + }, + { + "epoch": 0.12959987988889723, + "grad_norm": 1.19537353515625, + "learning_rate": 9.742131779828358e-06, + "loss": 0.3358, + "step": 6474 + }, + { + "epoch": 0.12961989840602559, + "grad_norm": 1.0241177082061768, + "learning_rate": 9.742029004642703e-06, + "loss": 0.3358, + "step": 6475 + }, + { + "epoch": 0.1296399169231539, + "grad_norm": 1.217983365058899, + "learning_rate": 9.741926209522578e-06, + "loss": 0.3604, + "step": 6476 + }, + { + "epoch": 0.12965993544028226, + "grad_norm": 1.0577080249786377, + "learning_rate": 9.741823394468416e-06, + "loss": 0.2882, + "step": 6477 + }, + { + "epoch": 0.1296799539574106, + "grad_norm": 1.1876256465911865, + "learning_rate": 9.741720559480649e-06, + "loss": 0.3263, + "step": 6478 + }, + { + "epoch": 0.12969997247453896, + "grad_norm": 1.16926109790802, + "learning_rate": 9.741617704559708e-06, + "loss": 0.3624, + "step": 6479 + }, + { + "epoch": 0.12971999099166728, + "grad_norm": 1.0310100317001343, + "learning_rate": 9.741514829706028e-06, + "loss": 0.3538, + "step": 6480 + }, + { + "epoch": 0.12974000950879563, + "grad_norm": 1.1454585790634155, + "learning_rate": 9.74141193492004e-06, + "loss": 0.3118, + "step": 6481 + }, + { + "epoch": 0.12976002802592398, + "grad_norm": 1.1693428754806519, + "learning_rate": 9.741309020202175e-06, + "loss": 0.305, + "step": 6482 + }, + { + "epoch": 0.12978004654305234, + "grad_norm": 1.1467604637145996, + "learning_rate": 9.741206085552868e-06, + "loss": 0.3227, + "step": 6483 + }, + { + "epoch": 0.12980006506018066, + "grad_norm": 1.0384118556976318, + "learning_rate": 9.741103130972551e-06, + "loss": 0.3249, + "step": 6484 + }, + { + "epoch": 0.129820083577309, + "grad_norm": 0.9834336638450623, + "learning_rate": 9.741000156461658e-06, + "loss": 0.3312, + "step": 6485 + }, + { + "epoch": 0.12984010209443736, + "grad_norm": 1.0946496725082397, + "learning_rate": 9.74089716202062e-06, + "loss": 0.3106, + "step": 6486 + }, + { + "epoch": 0.1298601206115657, + "grad_norm": 1.092555046081543, + "learning_rate": 9.740794147649869e-06, + "loss": 0.3957, + "step": 6487 + }, + { + "epoch": 0.12988013912869403, + "grad_norm": 1.0914911031723022, + "learning_rate": 9.74069111334984e-06, + "loss": 0.3716, + "step": 6488 + }, + { + "epoch": 0.12990015764582238, + "grad_norm": 1.0791802406311035, + "learning_rate": 9.740588059120967e-06, + "loss": 0.334, + "step": 6489 + }, + { + "epoch": 0.12992017616295073, + "grad_norm": 1.0410985946655273, + "learning_rate": 9.740484984963681e-06, + "loss": 0.3786, + "step": 6490 + }, + { + "epoch": 0.12994019468007909, + "grad_norm": 1.113437294960022, + "learning_rate": 9.740381890878416e-06, + "loss": 0.3662, + "step": 6491 + }, + { + "epoch": 0.1299602131972074, + "grad_norm": 1.1499593257904053, + "learning_rate": 9.740278776865607e-06, + "loss": 0.2777, + "step": 6492 + }, + { + "epoch": 0.12998023171433576, + "grad_norm": 1.0919454097747803, + "learning_rate": 9.740175642925687e-06, + "loss": 0.3116, + "step": 6493 + }, + { + "epoch": 0.1300002502314641, + "grad_norm": 0.977616012096405, + "learning_rate": 9.740072489059087e-06, + "loss": 0.2697, + "step": 6494 + }, + { + "epoch": 0.13002026874859246, + "grad_norm": 1.1280057430267334, + "learning_rate": 9.739969315266242e-06, + "loss": 0.372, + "step": 6495 + }, + { + "epoch": 0.13004028726572078, + "grad_norm": 1.059941053390503, + "learning_rate": 9.739866121547586e-06, + "loss": 0.3525, + "step": 6496 + }, + { + "epoch": 0.13006030578284913, + "grad_norm": 1.1686564683914185, + "learning_rate": 9.739762907903554e-06, + "loss": 0.381, + "step": 6497 + }, + { + "epoch": 0.13008032429997748, + "grad_norm": 1.0946497917175293, + "learning_rate": 9.739659674334578e-06, + "loss": 0.3385, + "step": 6498 + }, + { + "epoch": 0.13010034281710584, + "grad_norm": 1.062034249305725, + "learning_rate": 9.739556420841093e-06, + "loss": 0.299, + "step": 6499 + }, + { + "epoch": 0.13012036133423416, + "grad_norm": 1.043156623840332, + "learning_rate": 9.739453147423532e-06, + "loss": 0.35, + "step": 6500 + }, + { + "epoch": 0.1301403798513625, + "grad_norm": 1.0459846258163452, + "learning_rate": 9.739349854082332e-06, + "loss": 0.3275, + "step": 6501 + }, + { + "epoch": 0.13016039836849086, + "grad_norm": 1.1030350923538208, + "learning_rate": 9.739246540817924e-06, + "loss": 0.3272, + "step": 6502 + }, + { + "epoch": 0.1301804168856192, + "grad_norm": 1.2808101177215576, + "learning_rate": 9.739143207630742e-06, + "loss": 0.3889, + "step": 6503 + }, + { + "epoch": 0.13020043540274753, + "grad_norm": 1.1690137386322021, + "learning_rate": 9.739039854521224e-06, + "loss": 0.3239, + "step": 6504 + }, + { + "epoch": 0.13022045391987588, + "grad_norm": 1.032508373260498, + "learning_rate": 9.7389364814898e-06, + "loss": 0.3478, + "step": 6505 + }, + { + "epoch": 0.13024047243700423, + "grad_norm": 1.1246180534362793, + "learning_rate": 9.73883308853691e-06, + "loss": 0.3215, + "step": 6506 + }, + { + "epoch": 0.13026049095413259, + "grad_norm": 1.0587786436080933, + "learning_rate": 9.738729675662983e-06, + "loss": 0.2916, + "step": 6507 + }, + { + "epoch": 0.1302805094712609, + "grad_norm": 1.1492681503295898, + "learning_rate": 9.738626242868456e-06, + "loss": 0.3304, + "step": 6508 + }, + { + "epoch": 0.13030052798838926, + "grad_norm": 0.9826022982597351, + "learning_rate": 9.738522790153765e-06, + "loss": 0.3082, + "step": 6509 + }, + { + "epoch": 0.1303205465055176, + "grad_norm": 1.4475785493850708, + "learning_rate": 9.738419317519345e-06, + "loss": 0.3999, + "step": 6510 + }, + { + "epoch": 0.13034056502264596, + "grad_norm": 1.0282583236694336, + "learning_rate": 9.738315824965627e-06, + "loss": 0.3358, + "step": 6511 + }, + { + "epoch": 0.13036058353977428, + "grad_norm": 1.0285871028900146, + "learning_rate": 9.738212312493052e-06, + "loss": 0.2964, + "step": 6512 + }, + { + "epoch": 0.13038060205690263, + "grad_norm": 1.2229187488555908, + "learning_rate": 9.738108780102051e-06, + "loss": 0.327, + "step": 6513 + }, + { + "epoch": 0.13040062057403098, + "grad_norm": 1.2102245092391968, + "learning_rate": 9.73800522779306e-06, + "loss": 0.3334, + "step": 6514 + }, + { + "epoch": 0.13042063909115933, + "grad_norm": 1.1050978899002075, + "learning_rate": 9.737901655566514e-06, + "loss": 0.3093, + "step": 6515 + }, + { + "epoch": 0.13044065760828766, + "grad_norm": 1.055673599243164, + "learning_rate": 9.73779806342285e-06, + "loss": 0.3385, + "step": 6516 + }, + { + "epoch": 0.130460676125416, + "grad_norm": 1.0818339586257935, + "learning_rate": 9.737694451362502e-06, + "loss": 0.3409, + "step": 6517 + }, + { + "epoch": 0.13048069464254436, + "grad_norm": 1.1221303939819336, + "learning_rate": 9.737590819385908e-06, + "loss": 0.3204, + "step": 6518 + }, + { + "epoch": 0.1305007131596727, + "grad_norm": 1.0076961517333984, + "learning_rate": 9.7374871674935e-06, + "loss": 0.3262, + "step": 6519 + }, + { + "epoch": 0.13052073167680103, + "grad_norm": 1.1529382467269897, + "learning_rate": 9.737383495685715e-06, + "loss": 0.3559, + "step": 6520 + }, + { + "epoch": 0.13054075019392938, + "grad_norm": 1.0901050567626953, + "learning_rate": 9.73727980396299e-06, + "loss": 0.3421, + "step": 6521 + }, + { + "epoch": 0.13056076871105773, + "grad_norm": 1.9209908246994019, + "learning_rate": 9.737176092325761e-06, + "loss": 0.843, + "step": 6522 + }, + { + "epoch": 0.13058078722818608, + "grad_norm": 1.117836356163025, + "learning_rate": 9.737072360774462e-06, + "loss": 0.28, + "step": 6523 + }, + { + "epoch": 0.1306008057453144, + "grad_norm": 1.1132444143295288, + "learning_rate": 9.736968609309531e-06, + "loss": 0.3013, + "step": 6524 + }, + { + "epoch": 0.13062082426244276, + "grad_norm": 1.0981403589248657, + "learning_rate": 9.736864837931403e-06, + "loss": 0.3503, + "step": 6525 + }, + { + "epoch": 0.1306408427795711, + "grad_norm": 1.0661031007766724, + "learning_rate": 9.736761046640515e-06, + "loss": 0.3187, + "step": 6526 + }, + { + "epoch": 0.13066086129669946, + "grad_norm": 1.03561532497406, + "learning_rate": 9.7366572354373e-06, + "loss": 0.306, + "step": 6527 + }, + { + "epoch": 0.13068087981382778, + "grad_norm": 1.0906040668487549, + "learning_rate": 9.736553404322203e-06, + "loss": 0.3115, + "step": 6528 + }, + { + "epoch": 0.13070089833095613, + "grad_norm": 1.1654847860336304, + "learning_rate": 9.736449553295651e-06, + "loss": 0.34, + "step": 6529 + }, + { + "epoch": 0.13072091684808448, + "grad_norm": 0.9962111711502075, + "learning_rate": 9.736345682358084e-06, + "loss": 0.2971, + "step": 6530 + }, + { + "epoch": 0.13074093536521283, + "grad_norm": 1.0634119510650635, + "learning_rate": 9.73624179150994e-06, + "loss": 0.3516, + "step": 6531 + }, + { + "epoch": 0.13076095388234116, + "grad_norm": 1.0227922201156616, + "learning_rate": 9.736137880751655e-06, + "loss": 0.3314, + "step": 6532 + }, + { + "epoch": 0.1307809723994695, + "grad_norm": 1.078486442565918, + "learning_rate": 9.736033950083665e-06, + "loss": 0.3094, + "step": 6533 + }, + { + "epoch": 0.13080099091659786, + "grad_norm": 1.136522889137268, + "learning_rate": 9.735929999506408e-06, + "loss": 0.3207, + "step": 6534 + }, + { + "epoch": 0.1308210094337262, + "grad_norm": 1.0450291633605957, + "learning_rate": 9.735826029020321e-06, + "loss": 0.336, + "step": 6535 + }, + { + "epoch": 0.13084102795085453, + "grad_norm": 1.078373670578003, + "learning_rate": 9.735722038625838e-06, + "loss": 0.3592, + "step": 6536 + }, + { + "epoch": 0.13086104646798288, + "grad_norm": 1.080197811126709, + "learning_rate": 9.7356180283234e-06, + "loss": 0.3026, + "step": 6537 + }, + { + "epoch": 0.13088106498511123, + "grad_norm": 1.182147741317749, + "learning_rate": 9.735513998113444e-06, + "loss": 0.3412, + "step": 6538 + }, + { + "epoch": 0.13090108350223958, + "grad_norm": 0.9979256391525269, + "learning_rate": 9.735409947996408e-06, + "loss": 0.3129, + "step": 6539 + }, + { + "epoch": 0.1309211020193679, + "grad_norm": 1.0895335674285889, + "learning_rate": 9.735305877972723e-06, + "loss": 0.3369, + "step": 6540 + }, + { + "epoch": 0.13094112053649626, + "grad_norm": 1.1368077993392944, + "learning_rate": 9.735201788042834e-06, + "loss": 0.345, + "step": 6541 + }, + { + "epoch": 0.1309611390536246, + "grad_norm": 1.115004539489746, + "learning_rate": 9.735097678207176e-06, + "loss": 0.3527, + "step": 6542 + }, + { + "epoch": 0.13098115757075296, + "grad_norm": 1.0467232465744019, + "learning_rate": 9.734993548466185e-06, + "loss": 0.2909, + "step": 6543 + }, + { + "epoch": 0.13100117608788128, + "grad_norm": 1.349840521812439, + "learning_rate": 9.7348893988203e-06, + "loss": 0.3136, + "step": 6544 + }, + { + "epoch": 0.13102119460500963, + "grad_norm": 1.0788456201553345, + "learning_rate": 9.734785229269961e-06, + "loss": 0.3751, + "step": 6545 + }, + { + "epoch": 0.13104121312213798, + "grad_norm": 1.9767612218856812, + "learning_rate": 9.734681039815602e-06, + "loss": 0.8764, + "step": 6546 + }, + { + "epoch": 0.13106123163926633, + "grad_norm": 1.104344367980957, + "learning_rate": 9.734576830457662e-06, + "loss": 0.3324, + "step": 6547 + }, + { + "epoch": 0.13108125015639466, + "grad_norm": 1.183204174041748, + "learning_rate": 9.734472601196583e-06, + "loss": 0.3766, + "step": 6548 + }, + { + "epoch": 0.131101268673523, + "grad_norm": 1.2431873083114624, + "learning_rate": 9.734368352032797e-06, + "loss": 0.3404, + "step": 6549 + }, + { + "epoch": 0.13112128719065136, + "grad_norm": 1.199102759361267, + "learning_rate": 9.734264082966748e-06, + "loss": 0.3577, + "step": 6550 + }, + { + "epoch": 0.1311413057077797, + "grad_norm": 1.0735626220703125, + "learning_rate": 9.73415979399887e-06, + "loss": 0.3164, + "step": 6551 + }, + { + "epoch": 0.13116132422490803, + "grad_norm": 1.0696462392807007, + "learning_rate": 9.734055485129602e-06, + "loss": 0.3343, + "step": 6552 + }, + { + "epoch": 0.13118134274203638, + "grad_norm": 1.1910624504089355, + "learning_rate": 9.733951156359385e-06, + "loss": 0.338, + "step": 6553 + }, + { + "epoch": 0.13120136125916473, + "grad_norm": 1.1193779706954956, + "learning_rate": 9.733846807688656e-06, + "loss": 0.3552, + "step": 6554 + }, + { + "epoch": 0.13122137977629308, + "grad_norm": 1.1115459203720093, + "learning_rate": 9.733742439117853e-06, + "loss": 0.3416, + "step": 6555 + }, + { + "epoch": 0.1312413982934214, + "grad_norm": 1.3562941551208496, + "learning_rate": 9.733638050647415e-06, + "loss": 0.2948, + "step": 6556 + }, + { + "epoch": 0.13126141681054976, + "grad_norm": 1.1710741519927979, + "learning_rate": 9.733533642277784e-06, + "loss": 0.3712, + "step": 6557 + }, + { + "epoch": 0.1312814353276781, + "grad_norm": 1.1357722282409668, + "learning_rate": 9.733429214009393e-06, + "loss": 0.3066, + "step": 6558 + }, + { + "epoch": 0.13130145384480646, + "grad_norm": 1.079872965812683, + "learning_rate": 9.733324765842687e-06, + "loss": 0.3645, + "step": 6559 + }, + { + "epoch": 0.13132147236193478, + "grad_norm": 1.0460352897644043, + "learning_rate": 9.7332202977781e-06, + "loss": 0.2892, + "step": 6560 + }, + { + "epoch": 0.13134149087906313, + "grad_norm": 1.178972840309143, + "learning_rate": 9.733115809816076e-06, + "loss": 0.3172, + "step": 6561 + }, + { + "epoch": 0.13136150939619148, + "grad_norm": 1.0327768325805664, + "learning_rate": 9.733011301957049e-06, + "loss": 0.3131, + "step": 6562 + }, + { + "epoch": 0.13138152791331983, + "grad_norm": 1.101193904876709, + "learning_rate": 9.732906774201462e-06, + "loss": 0.3024, + "step": 6563 + }, + { + "epoch": 0.13140154643044816, + "grad_norm": 1.1899369955062866, + "learning_rate": 9.732802226549754e-06, + "loss": 0.3508, + "step": 6564 + }, + { + "epoch": 0.1314215649475765, + "grad_norm": 1.2080096006393433, + "learning_rate": 9.732697659002364e-06, + "loss": 0.3307, + "step": 6565 + }, + { + "epoch": 0.13144158346470486, + "grad_norm": 1.0189863443374634, + "learning_rate": 9.732593071559732e-06, + "loss": 0.2979, + "step": 6566 + }, + { + "epoch": 0.1314616019818332, + "grad_norm": 2.0771093368530273, + "learning_rate": 9.732488464222296e-06, + "loss": 0.8213, + "step": 6567 + }, + { + "epoch": 0.13148162049896153, + "grad_norm": 1.0944105386734009, + "learning_rate": 9.732383836990498e-06, + "loss": 0.3327, + "step": 6568 + }, + { + "epoch": 0.13150163901608988, + "grad_norm": 1.0548067092895508, + "learning_rate": 9.732279189864776e-06, + "loss": 0.3157, + "step": 6569 + }, + { + "epoch": 0.13152165753321823, + "grad_norm": 1.060503602027893, + "learning_rate": 9.73217452284557e-06, + "loss": 0.3109, + "step": 6570 + }, + { + "epoch": 0.13154167605034658, + "grad_norm": 1.1180607080459595, + "learning_rate": 9.732069835933322e-06, + "loss": 0.3643, + "step": 6571 + }, + { + "epoch": 0.1315616945674749, + "grad_norm": 1.211753487586975, + "learning_rate": 9.73196512912847e-06, + "loss": 0.3216, + "step": 6572 + }, + { + "epoch": 0.13158171308460326, + "grad_norm": 1.8546277284622192, + "learning_rate": 9.731860402431455e-06, + "loss": 0.9123, + "step": 6573 + }, + { + "epoch": 0.1316017316017316, + "grad_norm": 1.0801833868026733, + "learning_rate": 9.731755655842718e-06, + "loss": 0.3288, + "step": 6574 + }, + { + "epoch": 0.13162175011885996, + "grad_norm": 1.816819667816162, + "learning_rate": 9.731650889362697e-06, + "loss": 0.8754, + "step": 6575 + }, + { + "epoch": 0.13164176863598828, + "grad_norm": 1.144765853881836, + "learning_rate": 9.731546102991836e-06, + "loss": 0.3253, + "step": 6576 + }, + { + "epoch": 0.13166178715311663, + "grad_norm": 1.8645519018173218, + "learning_rate": 9.731441296730571e-06, + "loss": 0.8299, + "step": 6577 + }, + { + "epoch": 0.13168180567024498, + "grad_norm": 1.1211544275283813, + "learning_rate": 9.731336470579347e-06, + "loss": 0.3733, + "step": 6578 + }, + { + "epoch": 0.13170182418737333, + "grad_norm": 1.0172096490859985, + "learning_rate": 9.731231624538602e-06, + "loss": 0.3256, + "step": 6579 + }, + { + "epoch": 0.13172184270450166, + "grad_norm": 1.3973846435546875, + "learning_rate": 9.731126758608778e-06, + "loss": 0.3291, + "step": 6580 + }, + { + "epoch": 0.13174186122163, + "grad_norm": 1.0211713314056396, + "learning_rate": 9.731021872790314e-06, + "loss": 0.3306, + "step": 6581 + }, + { + "epoch": 0.13176187973875836, + "grad_norm": 1.7764794826507568, + "learning_rate": 9.730916967083653e-06, + "loss": 0.8054, + "step": 6582 + }, + { + "epoch": 0.13178189825588668, + "grad_norm": 1.0167312622070312, + "learning_rate": 9.730812041489235e-06, + "loss": 0.334, + "step": 6583 + }, + { + "epoch": 0.13180191677301503, + "grad_norm": 1.0834875106811523, + "learning_rate": 9.7307070960075e-06, + "loss": 0.3015, + "step": 6584 + }, + { + "epoch": 0.13182193529014338, + "grad_norm": 1.0960688591003418, + "learning_rate": 9.730602130638893e-06, + "loss": 0.3034, + "step": 6585 + }, + { + "epoch": 0.13184195380727173, + "grad_norm": 1.1301748752593994, + "learning_rate": 9.73049714538385e-06, + "loss": 0.3264, + "step": 6586 + }, + { + "epoch": 0.13186197232440006, + "grad_norm": 1.1072819232940674, + "learning_rate": 9.730392140242817e-06, + "loss": 0.3419, + "step": 6587 + }, + { + "epoch": 0.1318819908415284, + "grad_norm": 1.1008484363555908, + "learning_rate": 9.730287115216232e-06, + "loss": 0.3316, + "step": 6588 + }, + { + "epoch": 0.13190200935865676, + "grad_norm": 1.187926173210144, + "learning_rate": 9.730182070304539e-06, + "loss": 0.2984, + "step": 6589 + }, + { + "epoch": 0.1319220278757851, + "grad_norm": 1.0552030801773071, + "learning_rate": 9.730077005508178e-06, + "loss": 0.3051, + "step": 6590 + }, + { + "epoch": 0.13194204639291343, + "grad_norm": 1.2222312688827515, + "learning_rate": 9.729971920827592e-06, + "loss": 0.3219, + "step": 6591 + }, + { + "epoch": 0.13196206491004178, + "grad_norm": 1.227253794670105, + "learning_rate": 9.729866816263222e-06, + "loss": 0.3119, + "step": 6592 + }, + { + "epoch": 0.13198208342717013, + "grad_norm": 3.1291043758392334, + "learning_rate": 9.72976169181551e-06, + "loss": 0.3513, + "step": 6593 + }, + { + "epoch": 0.13200210194429848, + "grad_norm": 1.0898855924606323, + "learning_rate": 9.729656547484896e-06, + "loss": 0.2899, + "step": 6594 + }, + { + "epoch": 0.1320221204614268, + "grad_norm": 1.2008633613586426, + "learning_rate": 9.729551383271824e-06, + "loss": 0.3746, + "step": 6595 + }, + { + "epoch": 0.13204213897855516, + "grad_norm": 1.1565054655075073, + "learning_rate": 9.729446199176736e-06, + "loss": 0.3542, + "step": 6596 + }, + { + "epoch": 0.1320621574956835, + "grad_norm": 1.1081093549728394, + "learning_rate": 9.729340995200073e-06, + "loss": 0.3439, + "step": 6597 + }, + { + "epoch": 0.13208217601281186, + "grad_norm": 1.1482890844345093, + "learning_rate": 9.729235771342282e-06, + "loss": 0.3345, + "step": 6598 + }, + { + "epoch": 0.13210219452994018, + "grad_norm": 1.88942551612854, + "learning_rate": 9.729130527603799e-06, + "loss": 0.7681, + "step": 6599 + }, + { + "epoch": 0.13212221304706853, + "grad_norm": 1.2933402061462402, + "learning_rate": 9.729025263985068e-06, + "loss": 0.3661, + "step": 6600 + }, + { + "epoch": 0.13214223156419688, + "grad_norm": 2.0950350761413574, + "learning_rate": 9.728919980486533e-06, + "loss": 0.7929, + "step": 6601 + }, + { + "epoch": 0.13216225008132523, + "grad_norm": 1.130665898323059, + "learning_rate": 9.728814677108638e-06, + "loss": 0.3516, + "step": 6602 + }, + { + "epoch": 0.13218226859845356, + "grad_norm": 1.1349526643753052, + "learning_rate": 9.728709353851822e-06, + "loss": 0.3183, + "step": 6603 + }, + { + "epoch": 0.1322022871155819, + "grad_norm": 1.197660207748413, + "learning_rate": 9.72860401071653e-06, + "loss": 0.3241, + "step": 6604 + }, + { + "epoch": 0.13222230563271026, + "grad_norm": 1.1362519264221191, + "learning_rate": 9.728498647703204e-06, + "loss": 0.3156, + "step": 6605 + }, + { + "epoch": 0.1322423241498386, + "grad_norm": 1.1066160202026367, + "learning_rate": 9.728393264812289e-06, + "loss": 0.3211, + "step": 6606 + }, + { + "epoch": 0.13226234266696693, + "grad_norm": 1.3858270645141602, + "learning_rate": 9.728287862044223e-06, + "loss": 0.3312, + "step": 6607 + }, + { + "epoch": 0.13228236118409528, + "grad_norm": 1.1109449863433838, + "learning_rate": 9.728182439399455e-06, + "loss": 0.3015, + "step": 6608 + }, + { + "epoch": 0.13230237970122363, + "grad_norm": 1.3143022060394287, + "learning_rate": 9.728076996878424e-06, + "loss": 0.3152, + "step": 6609 + }, + { + "epoch": 0.13232239821835198, + "grad_norm": 1.1016464233398438, + "learning_rate": 9.727971534481576e-06, + "loss": 0.329, + "step": 6610 + }, + { + "epoch": 0.1323424167354803, + "grad_norm": 1.9545472860336304, + "learning_rate": 9.727866052209353e-06, + "loss": 0.7843, + "step": 6611 + }, + { + "epoch": 0.13236243525260866, + "grad_norm": 1.1943928003311157, + "learning_rate": 9.727760550062197e-06, + "loss": 0.3154, + "step": 6612 + }, + { + "epoch": 0.132382453769737, + "grad_norm": 1.215720295906067, + "learning_rate": 9.727655028040555e-06, + "loss": 0.3511, + "step": 6613 + }, + { + "epoch": 0.13240247228686536, + "grad_norm": 1.1778512001037598, + "learning_rate": 9.727549486144867e-06, + "loss": 0.3638, + "step": 6614 + }, + { + "epoch": 0.13242249080399368, + "grad_norm": 1.2112489938735962, + "learning_rate": 9.72744392437558e-06, + "loss": 0.3289, + "step": 6615 + }, + { + "epoch": 0.13244250932112203, + "grad_norm": 1.0999218225479126, + "learning_rate": 9.727338342733135e-06, + "loss": 0.3191, + "step": 6616 + }, + { + "epoch": 0.13246252783825038, + "grad_norm": 1.118740200996399, + "learning_rate": 9.727232741217978e-06, + "loss": 0.3179, + "step": 6617 + }, + { + "epoch": 0.13248254635537873, + "grad_norm": 1.0926125049591064, + "learning_rate": 9.72712711983055e-06, + "loss": 0.3321, + "step": 6618 + }, + { + "epoch": 0.13250256487250706, + "grad_norm": 1.1798914670944214, + "learning_rate": 9.727021478571299e-06, + "loss": 0.3879, + "step": 6619 + }, + { + "epoch": 0.1325225833896354, + "grad_norm": 1.1628397703170776, + "learning_rate": 9.726915817440665e-06, + "loss": 0.3151, + "step": 6620 + }, + { + "epoch": 0.13254260190676376, + "grad_norm": 1.717314600944519, + "learning_rate": 9.726810136439095e-06, + "loss": 0.2882, + "step": 6621 + }, + { + "epoch": 0.1325626204238921, + "grad_norm": 1.0823793411254883, + "learning_rate": 9.726704435567034e-06, + "loss": 0.3316, + "step": 6622 + }, + { + "epoch": 0.13258263894102043, + "grad_norm": 1.0761853456497192, + "learning_rate": 9.726598714824923e-06, + "loss": 0.3364, + "step": 6623 + }, + { + "epoch": 0.13260265745814878, + "grad_norm": 1.1788654327392578, + "learning_rate": 9.726492974213208e-06, + "loss": 0.3212, + "step": 6624 + }, + { + "epoch": 0.13262267597527713, + "grad_norm": 1.1279618740081787, + "learning_rate": 9.726387213732335e-06, + "loss": 0.3478, + "step": 6625 + }, + { + "epoch": 0.13264269449240548, + "grad_norm": 1.1500580310821533, + "learning_rate": 9.726281433382747e-06, + "loss": 0.3101, + "step": 6626 + }, + { + "epoch": 0.1326627130095338, + "grad_norm": 1.9397742748260498, + "learning_rate": 9.726175633164887e-06, + "loss": 0.8662, + "step": 6627 + }, + { + "epoch": 0.13268273152666216, + "grad_norm": 1.858139157295227, + "learning_rate": 9.726069813079205e-06, + "loss": 0.8683, + "step": 6628 + }, + { + "epoch": 0.1327027500437905, + "grad_norm": 1.1595039367675781, + "learning_rate": 9.72596397312614e-06, + "loss": 0.3529, + "step": 6629 + }, + { + "epoch": 0.13272276856091886, + "grad_norm": 1.1188926696777344, + "learning_rate": 9.72585811330614e-06, + "loss": 0.3552, + "step": 6630 + }, + { + "epoch": 0.13274278707804718, + "grad_norm": 1.0909161567687988, + "learning_rate": 9.72575223361965e-06, + "loss": 0.3326, + "step": 6631 + }, + { + "epoch": 0.13276280559517553, + "grad_norm": 2.045191526412964, + "learning_rate": 9.725646334067114e-06, + "loss": 0.7765, + "step": 6632 + }, + { + "epoch": 0.13278282411230388, + "grad_norm": 1.0824998617172241, + "learning_rate": 9.725540414648979e-06, + "loss": 0.2938, + "step": 6633 + }, + { + "epoch": 0.13280284262943223, + "grad_norm": 1.191960334777832, + "learning_rate": 9.725434475365688e-06, + "loss": 0.346, + "step": 6634 + }, + { + "epoch": 0.13282286114656056, + "grad_norm": 1.238154649734497, + "learning_rate": 9.725328516217687e-06, + "loss": 0.3106, + "step": 6635 + }, + { + "epoch": 0.1328428796636889, + "grad_norm": 1.109942078590393, + "learning_rate": 9.725222537205422e-06, + "loss": 0.3113, + "step": 6636 + }, + { + "epoch": 0.13286289818081726, + "grad_norm": 1.1481144428253174, + "learning_rate": 9.725116538329338e-06, + "loss": 0.3559, + "step": 6637 + }, + { + "epoch": 0.1328829166979456, + "grad_norm": 1.793737530708313, + "learning_rate": 9.725010519589883e-06, + "loss": 0.8395, + "step": 6638 + }, + { + "epoch": 0.13290293521507393, + "grad_norm": 1.1631641387939453, + "learning_rate": 9.724904480987498e-06, + "loss": 0.3032, + "step": 6639 + }, + { + "epoch": 0.13292295373220228, + "grad_norm": 1.4461331367492676, + "learning_rate": 9.724798422522635e-06, + "loss": 0.3062, + "step": 6640 + }, + { + "epoch": 0.13294297224933063, + "grad_norm": 1.0517877340316772, + "learning_rate": 9.724692344195731e-06, + "loss": 0.3209, + "step": 6641 + }, + { + "epoch": 0.13296299076645898, + "grad_norm": 1.119677186012268, + "learning_rate": 9.724586246007242e-06, + "loss": 0.3335, + "step": 6642 + }, + { + "epoch": 0.1329830092835873, + "grad_norm": 1.12467360496521, + "learning_rate": 9.724480127957607e-06, + "loss": 0.3241, + "step": 6643 + }, + { + "epoch": 0.13300302780071566, + "grad_norm": 1.3760126829147339, + "learning_rate": 9.724373990047273e-06, + "loss": 0.3419, + "step": 6644 + }, + { + "epoch": 0.133023046317844, + "grad_norm": 1.1243537664413452, + "learning_rate": 9.72426783227669e-06, + "loss": 0.3, + "step": 6645 + }, + { + "epoch": 0.13304306483497236, + "grad_norm": 1.0629383325576782, + "learning_rate": 9.7241616546463e-06, + "loss": 0.3251, + "step": 6646 + }, + { + "epoch": 0.13306308335210068, + "grad_norm": 1.0541223287582397, + "learning_rate": 9.724055457156552e-06, + "loss": 0.356, + "step": 6647 + }, + { + "epoch": 0.13308310186922903, + "grad_norm": 1.968999981880188, + "learning_rate": 9.723949239807891e-06, + "loss": 0.7975, + "step": 6648 + }, + { + "epoch": 0.13310312038635738, + "grad_norm": 1.1084611415863037, + "learning_rate": 9.723843002600763e-06, + "loss": 0.3684, + "step": 6649 + }, + { + "epoch": 0.13312313890348573, + "grad_norm": 1.048282265663147, + "learning_rate": 9.723736745535618e-06, + "loss": 0.3474, + "step": 6650 + }, + { + "epoch": 0.13314315742061406, + "grad_norm": 1.1072698831558228, + "learning_rate": 9.723630468612899e-06, + "loss": 0.3665, + "step": 6651 + }, + { + "epoch": 0.1331631759377424, + "grad_norm": 0.9953985214233398, + "learning_rate": 9.723524171833054e-06, + "loss": 0.3083, + "step": 6652 + }, + { + "epoch": 0.13318319445487076, + "grad_norm": 1.2848637104034424, + "learning_rate": 9.72341785519653e-06, + "loss": 0.3245, + "step": 6653 + }, + { + "epoch": 0.1332032129719991, + "grad_norm": 1.1946340799331665, + "learning_rate": 9.723311518703774e-06, + "loss": 0.3483, + "step": 6654 + }, + { + "epoch": 0.13322323148912743, + "grad_norm": 1.2766199111938477, + "learning_rate": 9.723205162355232e-06, + "loss": 0.283, + "step": 6655 + }, + { + "epoch": 0.13324325000625578, + "grad_norm": 1.1823229789733887, + "learning_rate": 9.723098786151353e-06, + "loss": 0.3085, + "step": 6656 + }, + { + "epoch": 0.13326326852338413, + "grad_norm": 1.5052876472473145, + "learning_rate": 9.722992390092582e-06, + "loss": 0.3506, + "step": 6657 + }, + { + "epoch": 0.13328328704051248, + "grad_norm": 1.0895317792892456, + "learning_rate": 9.722885974179368e-06, + "loss": 0.3319, + "step": 6658 + }, + { + "epoch": 0.1333033055576408, + "grad_norm": 1.1156203746795654, + "learning_rate": 9.72277953841216e-06, + "loss": 0.3465, + "step": 6659 + }, + { + "epoch": 0.13332332407476916, + "grad_norm": 1.1341431140899658, + "learning_rate": 9.7226730827914e-06, + "loss": 0.3541, + "step": 6660 + }, + { + "epoch": 0.1333433425918975, + "grad_norm": 0.9839117527008057, + "learning_rate": 9.722566607317539e-06, + "loss": 0.3639, + "step": 6661 + }, + { + "epoch": 0.13336336110902586, + "grad_norm": 1.0625444650650024, + "learning_rate": 9.722460111991026e-06, + "loss": 0.3188, + "step": 6662 + }, + { + "epoch": 0.13338337962615418, + "grad_norm": 1.0394335985183716, + "learning_rate": 9.722353596812305e-06, + "loss": 0.3112, + "step": 6663 + }, + { + "epoch": 0.13340339814328253, + "grad_norm": 1.1158626079559326, + "learning_rate": 9.722247061781826e-06, + "loss": 0.3119, + "step": 6664 + }, + { + "epoch": 0.13342341666041088, + "grad_norm": 1.164049506187439, + "learning_rate": 9.722140506900038e-06, + "loss": 0.3107, + "step": 6665 + }, + { + "epoch": 0.13344343517753923, + "grad_norm": 1.129599928855896, + "learning_rate": 9.722033932167387e-06, + "loss": 0.3351, + "step": 6666 + }, + { + "epoch": 0.13346345369466756, + "grad_norm": 0.9390685558319092, + "learning_rate": 9.72192733758432e-06, + "loss": 0.3075, + "step": 6667 + }, + { + "epoch": 0.1334834722117959, + "grad_norm": 1.186315894126892, + "learning_rate": 9.721820723151289e-06, + "loss": 0.3268, + "step": 6668 + }, + { + "epoch": 0.13350349072892426, + "grad_norm": 1.0796316862106323, + "learning_rate": 9.721714088868739e-06, + "loss": 0.2984, + "step": 6669 + }, + { + "epoch": 0.1335235092460526, + "grad_norm": 1.0326557159423828, + "learning_rate": 9.721607434737118e-06, + "loss": 0.3157, + "step": 6670 + }, + { + "epoch": 0.13354352776318093, + "grad_norm": 1.9247268438339233, + "learning_rate": 9.721500760756877e-06, + "loss": 0.8258, + "step": 6671 + }, + { + "epoch": 0.13356354628030928, + "grad_norm": 1.1107591390609741, + "learning_rate": 9.721394066928463e-06, + "loss": 0.3113, + "step": 6672 + }, + { + "epoch": 0.13358356479743763, + "grad_norm": 1.1614962816238403, + "learning_rate": 9.721287353252322e-06, + "loss": 0.3105, + "step": 6673 + }, + { + "epoch": 0.13360358331456598, + "grad_norm": 1.217699646949768, + "learning_rate": 9.721180619728908e-06, + "loss": 0.3498, + "step": 6674 + }, + { + "epoch": 0.1336236018316943, + "grad_norm": 1.073752522468567, + "learning_rate": 9.721073866358665e-06, + "loss": 0.3099, + "step": 6675 + }, + { + "epoch": 0.13364362034882266, + "grad_norm": 1.157285213470459, + "learning_rate": 9.720967093142044e-06, + "loss": 0.3543, + "step": 6676 + }, + { + "epoch": 0.133663638865951, + "grad_norm": 1.0040746927261353, + "learning_rate": 9.720860300079493e-06, + "loss": 0.3285, + "step": 6677 + }, + { + "epoch": 0.13368365738307936, + "grad_norm": 1.13627290725708, + "learning_rate": 9.720753487171462e-06, + "loss": 0.3292, + "step": 6678 + }, + { + "epoch": 0.13370367590020768, + "grad_norm": 1.0506689548492432, + "learning_rate": 9.720646654418398e-06, + "loss": 0.3153, + "step": 6679 + }, + { + "epoch": 0.13372369441733603, + "grad_norm": 1.2191215753555298, + "learning_rate": 9.720539801820752e-06, + "loss": 0.3614, + "step": 6680 + }, + { + "epoch": 0.13374371293446438, + "grad_norm": 1.1170300245285034, + "learning_rate": 9.720432929378971e-06, + "loss": 0.2938, + "step": 6681 + }, + { + "epoch": 0.13376373145159273, + "grad_norm": 1.0648695230484009, + "learning_rate": 9.720326037093508e-06, + "loss": 0.3072, + "step": 6682 + }, + { + "epoch": 0.13378374996872106, + "grad_norm": 1.1202211380004883, + "learning_rate": 9.72021912496481e-06, + "loss": 0.3472, + "step": 6683 + }, + { + "epoch": 0.1338037684858494, + "grad_norm": 1.0986639261245728, + "learning_rate": 9.720112192993325e-06, + "loss": 0.337, + "step": 6684 + }, + { + "epoch": 0.13382378700297776, + "grad_norm": 1.0805888175964355, + "learning_rate": 9.720005241179507e-06, + "loss": 0.3179, + "step": 6685 + }, + { + "epoch": 0.1338438055201061, + "grad_norm": 1.1336098909378052, + "learning_rate": 9.7198982695238e-06, + "loss": 0.363, + "step": 6686 + }, + { + "epoch": 0.13386382403723443, + "grad_norm": 1.123491644859314, + "learning_rate": 9.719791278026657e-06, + "loss": 0.3371, + "step": 6687 + }, + { + "epoch": 0.13388384255436278, + "grad_norm": 1.227304458618164, + "learning_rate": 9.719684266688527e-06, + "loss": 0.3805, + "step": 6688 + }, + { + "epoch": 0.13390386107149113, + "grad_norm": 1.1462751626968384, + "learning_rate": 9.71957723550986e-06, + "loss": 0.3345, + "step": 6689 + }, + { + "epoch": 0.13392387958861948, + "grad_norm": 1.0745457410812378, + "learning_rate": 9.719470184491106e-06, + "loss": 0.3127, + "step": 6690 + }, + { + "epoch": 0.1339438981057478, + "grad_norm": 1.109066128730774, + "learning_rate": 9.719363113632717e-06, + "loss": 0.3627, + "step": 6691 + }, + { + "epoch": 0.13396391662287616, + "grad_norm": 1.0564625263214111, + "learning_rate": 9.71925602293514e-06, + "loss": 0.294, + "step": 6692 + }, + { + "epoch": 0.1339839351400045, + "grad_norm": 1.0335952043533325, + "learning_rate": 9.719148912398827e-06, + "loss": 0.3386, + "step": 6693 + }, + { + "epoch": 0.13400395365713286, + "grad_norm": 1.0323264598846436, + "learning_rate": 9.719041782024224e-06, + "loss": 0.2972, + "step": 6694 + }, + { + "epoch": 0.13402397217426118, + "grad_norm": 1.0467805862426758, + "learning_rate": 9.718934631811786e-06, + "loss": 0.3329, + "step": 6695 + }, + { + "epoch": 0.13404399069138953, + "grad_norm": 1.0210810899734497, + "learning_rate": 9.718827461761966e-06, + "loss": 0.2809, + "step": 6696 + }, + { + "epoch": 0.13406400920851788, + "grad_norm": 1.0578551292419434, + "learning_rate": 9.718720271875208e-06, + "loss": 0.3506, + "step": 6697 + }, + { + "epoch": 0.13408402772564623, + "grad_norm": 1.186647653579712, + "learning_rate": 9.718613062151965e-06, + "loss": 0.3532, + "step": 6698 + }, + { + "epoch": 0.13410404624277455, + "grad_norm": 1.1168493032455444, + "learning_rate": 9.71850583259269e-06, + "loss": 0.3473, + "step": 6699 + }, + { + "epoch": 0.1341240647599029, + "grad_norm": 1.0625241994857788, + "learning_rate": 9.71839858319783e-06, + "loss": 0.337, + "step": 6700 + }, + { + "epoch": 0.13414408327703126, + "grad_norm": 1.183680534362793, + "learning_rate": 9.718291313967841e-06, + "loss": 0.3521, + "step": 6701 + }, + { + "epoch": 0.1341641017941596, + "grad_norm": 1.1423691511154175, + "learning_rate": 9.718184024903166e-06, + "loss": 0.3459, + "step": 6702 + }, + { + "epoch": 0.13418412031128793, + "grad_norm": 1.094375491142273, + "learning_rate": 9.718076716004264e-06, + "loss": 0.3693, + "step": 6703 + }, + { + "epoch": 0.13420413882841628, + "grad_norm": 1.8142591714859009, + "learning_rate": 9.717969387271582e-06, + "loss": 0.8207, + "step": 6704 + }, + { + "epoch": 0.13422415734554463, + "grad_norm": 1.0451856851577759, + "learning_rate": 9.717862038705572e-06, + "loss": 0.2997, + "step": 6705 + }, + { + "epoch": 0.13424417586267298, + "grad_norm": 1.3092824220657349, + "learning_rate": 9.717754670306685e-06, + "loss": 0.3253, + "step": 6706 + }, + { + "epoch": 0.1342641943798013, + "grad_norm": 1.362202525138855, + "learning_rate": 9.717647282075373e-06, + "loss": 0.3175, + "step": 6707 + }, + { + "epoch": 0.13428421289692966, + "grad_norm": 0.9347793459892273, + "learning_rate": 9.717539874012088e-06, + "loss": 0.3058, + "step": 6708 + }, + { + "epoch": 0.134304231414058, + "grad_norm": 1.0420717000961304, + "learning_rate": 9.717432446117278e-06, + "loss": 0.3053, + "step": 6709 + }, + { + "epoch": 0.13432424993118636, + "grad_norm": 1.1562663316726685, + "learning_rate": 9.717324998391399e-06, + "loss": 0.3173, + "step": 6710 + }, + { + "epoch": 0.13434426844831468, + "grad_norm": 1.13978910446167, + "learning_rate": 9.717217530834901e-06, + "loss": 0.3107, + "step": 6711 + }, + { + "epoch": 0.13436428696544303, + "grad_norm": 1.9309741258621216, + "learning_rate": 9.717110043448236e-06, + "loss": 0.8294, + "step": 6712 + }, + { + "epoch": 0.13438430548257138, + "grad_norm": 1.2733685970306396, + "learning_rate": 9.717002536231855e-06, + "loss": 0.3012, + "step": 6713 + }, + { + "epoch": 0.13440432399969973, + "grad_norm": 1.154436707496643, + "learning_rate": 9.71689500918621e-06, + "loss": 0.3419, + "step": 6714 + }, + { + "epoch": 0.13442434251682805, + "grad_norm": 1.1543984413146973, + "learning_rate": 9.716787462311754e-06, + "loss": 0.3737, + "step": 6715 + }, + { + "epoch": 0.1344443610339564, + "grad_norm": 1.0747637748718262, + "learning_rate": 9.716679895608939e-06, + "loss": 0.2907, + "step": 6716 + }, + { + "epoch": 0.13446437955108476, + "grad_norm": 1.212151050567627, + "learning_rate": 9.716572309078216e-06, + "loss": 0.3512, + "step": 6717 + }, + { + "epoch": 0.1344843980682131, + "grad_norm": 1.387977957725525, + "learning_rate": 9.716464702720037e-06, + "loss": 0.2846, + "step": 6718 + }, + { + "epoch": 0.13450441658534143, + "grad_norm": 1.271675705909729, + "learning_rate": 9.716357076534858e-06, + "loss": 0.3914, + "step": 6719 + }, + { + "epoch": 0.13452443510246978, + "grad_norm": 1.0740751028060913, + "learning_rate": 9.716249430523126e-06, + "loss": 0.3696, + "step": 6720 + }, + { + "epoch": 0.13454445361959813, + "grad_norm": 1.905863642692566, + "learning_rate": 9.716141764685297e-06, + "loss": 0.8372, + "step": 6721 + }, + { + "epoch": 0.13456447213672648, + "grad_norm": 1.0536149740219116, + "learning_rate": 9.716034079021824e-06, + "loss": 0.3002, + "step": 6722 + }, + { + "epoch": 0.1345844906538548, + "grad_norm": 1.0631581544876099, + "learning_rate": 9.715926373533158e-06, + "loss": 0.3883, + "step": 6723 + }, + { + "epoch": 0.13460450917098316, + "grad_norm": 1.226151943206787, + "learning_rate": 9.715818648219753e-06, + "loss": 0.3352, + "step": 6724 + }, + { + "epoch": 0.1346245276881115, + "grad_norm": 1.0756891965866089, + "learning_rate": 9.71571090308206e-06, + "loss": 0.3395, + "step": 6725 + }, + { + "epoch": 0.13464454620523986, + "grad_norm": 1.1319581270217896, + "learning_rate": 9.715603138120535e-06, + "loss": 0.3261, + "step": 6726 + }, + { + "epoch": 0.13466456472236818, + "grad_norm": 0.9853795170783997, + "learning_rate": 9.715495353335628e-06, + "loss": 0.2903, + "step": 6727 + }, + { + "epoch": 0.13468458323949653, + "grad_norm": 1.1560412645339966, + "learning_rate": 9.715387548727794e-06, + "loss": 0.3588, + "step": 6728 + }, + { + "epoch": 0.13470460175662488, + "grad_norm": 1.0911812782287598, + "learning_rate": 9.715279724297484e-06, + "loss": 0.2936, + "step": 6729 + }, + { + "epoch": 0.13472462027375323, + "grad_norm": 1.88239324092865, + "learning_rate": 9.715171880045156e-06, + "loss": 0.8853, + "step": 6730 + }, + { + "epoch": 0.13474463879088155, + "grad_norm": 1.096230149269104, + "learning_rate": 9.715064015971258e-06, + "loss": 0.3101, + "step": 6731 + }, + { + "epoch": 0.1347646573080099, + "grad_norm": 1.0675084590911865, + "learning_rate": 9.714956132076246e-06, + "loss": 0.342, + "step": 6732 + }, + { + "epoch": 0.13478467582513826, + "grad_norm": 1.1309630870819092, + "learning_rate": 9.714848228360572e-06, + "loss": 0.4119, + "step": 6733 + }, + { + "epoch": 0.1348046943422666, + "grad_norm": 1.1101137399673462, + "learning_rate": 9.71474030482469e-06, + "loss": 0.3605, + "step": 6734 + }, + { + "epoch": 0.13482471285939493, + "grad_norm": 1.1641794443130493, + "learning_rate": 9.714632361469056e-06, + "loss": 0.3072, + "step": 6735 + }, + { + "epoch": 0.13484473137652328, + "grad_norm": 1.0139760971069336, + "learning_rate": 9.714524398294123e-06, + "loss": 0.3233, + "step": 6736 + }, + { + "epoch": 0.13486474989365163, + "grad_norm": 1.0455889701843262, + "learning_rate": 9.714416415300342e-06, + "loss": 0.3623, + "step": 6737 + }, + { + "epoch": 0.13488476841077998, + "grad_norm": 1.1601179838180542, + "learning_rate": 9.71430841248817e-06, + "loss": 0.332, + "step": 6738 + }, + { + "epoch": 0.1349047869279083, + "grad_norm": 1.250433325767517, + "learning_rate": 9.71420038985806e-06, + "loss": 0.3299, + "step": 6739 + }, + { + "epoch": 0.13492480544503666, + "grad_norm": 1.1070860624313354, + "learning_rate": 9.714092347410467e-06, + "loss": 0.3695, + "step": 6740 + }, + { + "epoch": 0.134944823962165, + "grad_norm": 1.1075358390808105, + "learning_rate": 9.713984285145844e-06, + "loss": 0.3212, + "step": 6741 + }, + { + "epoch": 0.13496484247929336, + "grad_norm": 1.074533462524414, + "learning_rate": 9.713876203064643e-06, + "loss": 0.3279, + "step": 6742 + }, + { + "epoch": 0.13498486099642168, + "grad_norm": 1.210523009300232, + "learning_rate": 9.713768101167322e-06, + "loss": 0.3193, + "step": 6743 + }, + { + "epoch": 0.13500487951355003, + "grad_norm": 1.0984344482421875, + "learning_rate": 9.713659979454335e-06, + "loss": 0.3128, + "step": 6744 + }, + { + "epoch": 0.13502489803067838, + "grad_norm": 1.2606819868087769, + "learning_rate": 9.713551837926136e-06, + "loss": 0.2976, + "step": 6745 + }, + { + "epoch": 0.13504491654780673, + "grad_norm": 1.8345636129379272, + "learning_rate": 9.71344367658318e-06, + "loss": 0.8127, + "step": 6746 + }, + { + "epoch": 0.13506493506493505, + "grad_norm": 1.1626760959625244, + "learning_rate": 9.71333549542592e-06, + "loss": 0.3461, + "step": 6747 + }, + { + "epoch": 0.1350849535820634, + "grad_norm": 1.0984580516815186, + "learning_rate": 9.713227294454812e-06, + "loss": 0.3677, + "step": 6748 + }, + { + "epoch": 0.13510497209919176, + "grad_norm": 1.0259934663772583, + "learning_rate": 9.713119073670312e-06, + "loss": 0.296, + "step": 6749 + }, + { + "epoch": 0.1351249906163201, + "grad_norm": 1.1361032724380493, + "learning_rate": 9.713010833072871e-06, + "loss": 0.3169, + "step": 6750 + }, + { + "epoch": 0.13514500913344843, + "grad_norm": 1.0881190299987793, + "learning_rate": 9.712902572662948e-06, + "loss": 0.3912, + "step": 6751 + }, + { + "epoch": 0.13516502765057678, + "grad_norm": 0.9853938817977905, + "learning_rate": 9.712794292441e-06, + "loss": 0.2811, + "step": 6752 + }, + { + "epoch": 0.13518504616770513, + "grad_norm": 1.1344223022460938, + "learning_rate": 9.712685992407474e-06, + "loss": 0.3688, + "step": 6753 + }, + { + "epoch": 0.13520506468483348, + "grad_norm": 1.0760624408721924, + "learning_rate": 9.712577672562833e-06, + "loss": 0.3262, + "step": 6754 + }, + { + "epoch": 0.1352250832019618, + "grad_norm": 1.0754791498184204, + "learning_rate": 9.712469332907529e-06, + "loss": 0.3785, + "step": 6755 + }, + { + "epoch": 0.13524510171909015, + "grad_norm": 1.9022268056869507, + "learning_rate": 9.71236097344202e-06, + "loss": 0.8792, + "step": 6756 + }, + { + "epoch": 0.1352651202362185, + "grad_norm": 1.1231882572174072, + "learning_rate": 9.712252594166755e-06, + "loss": 0.3284, + "step": 6757 + }, + { + "epoch": 0.13528513875334686, + "grad_norm": 1.1692802906036377, + "learning_rate": 9.712144195082198e-06, + "loss": 0.3338, + "step": 6758 + }, + { + "epoch": 0.13530515727047518, + "grad_norm": 1.9113080501556396, + "learning_rate": 9.712035776188799e-06, + "loss": 0.8261, + "step": 6759 + }, + { + "epoch": 0.13532517578760353, + "grad_norm": 1.1388700008392334, + "learning_rate": 9.711927337487017e-06, + "loss": 0.3803, + "step": 6760 + }, + { + "epoch": 0.13534519430473188, + "grad_norm": 1.017876148223877, + "learning_rate": 9.711818878977306e-06, + "loss": 0.343, + "step": 6761 + }, + { + "epoch": 0.13536521282186023, + "grad_norm": 1.0962836742401123, + "learning_rate": 9.711710400660121e-06, + "loss": 0.3444, + "step": 6762 + }, + { + "epoch": 0.13538523133898855, + "grad_norm": 1.1489356756210327, + "learning_rate": 9.711601902535921e-06, + "loss": 0.3429, + "step": 6763 + }, + { + "epoch": 0.1354052498561169, + "grad_norm": 1.1461758613586426, + "learning_rate": 9.711493384605162e-06, + "loss": 0.3082, + "step": 6764 + }, + { + "epoch": 0.13542526837324526, + "grad_norm": 1.8304152488708496, + "learning_rate": 9.711384846868297e-06, + "loss": 0.8782, + "step": 6765 + }, + { + "epoch": 0.1354452868903736, + "grad_norm": 1.1936838626861572, + "learning_rate": 9.711276289325784e-06, + "loss": 0.3073, + "step": 6766 + }, + { + "epoch": 0.13546530540750193, + "grad_norm": 1.0155839920043945, + "learning_rate": 9.711167711978078e-06, + "loss": 0.3377, + "step": 6767 + }, + { + "epoch": 0.13548532392463028, + "grad_norm": 1.821820616722107, + "learning_rate": 9.711059114825638e-06, + "loss": 0.8729, + "step": 6768 + }, + { + "epoch": 0.13550534244175863, + "grad_norm": 1.2993887662887573, + "learning_rate": 9.710950497868921e-06, + "loss": 0.3381, + "step": 6769 + }, + { + "epoch": 0.13552536095888698, + "grad_norm": 2.016204595565796, + "learning_rate": 9.710841861108379e-06, + "loss": 0.8718, + "step": 6770 + }, + { + "epoch": 0.1355453794760153, + "grad_norm": 1.1357827186584473, + "learning_rate": 9.710733204544474e-06, + "loss": 0.3501, + "step": 6771 + }, + { + "epoch": 0.13556539799314365, + "grad_norm": 1.178355097770691, + "learning_rate": 9.710624528177659e-06, + "loss": 0.2977, + "step": 6772 + }, + { + "epoch": 0.135585416510272, + "grad_norm": 1.851447582244873, + "learning_rate": 9.710515832008393e-06, + "loss": 0.8304, + "step": 6773 + }, + { + "epoch": 0.13560543502740036, + "grad_norm": 1.9629795551300049, + "learning_rate": 9.71040711603713e-06, + "loss": 0.8565, + "step": 6774 + }, + { + "epoch": 0.13562545354452868, + "grad_norm": 1.1171159744262695, + "learning_rate": 9.710298380264332e-06, + "loss": 0.3448, + "step": 6775 + }, + { + "epoch": 0.13564547206165703, + "grad_norm": 1.0435938835144043, + "learning_rate": 9.710189624690453e-06, + "loss": 0.3118, + "step": 6776 + }, + { + "epoch": 0.13566549057878538, + "grad_norm": 1.3921563625335693, + "learning_rate": 9.710080849315949e-06, + "loss": 0.3421, + "step": 6777 + }, + { + "epoch": 0.13568550909591373, + "grad_norm": 1.136380672454834, + "learning_rate": 9.709972054141279e-06, + "loss": 0.3557, + "step": 6778 + }, + { + "epoch": 0.13570552761304205, + "grad_norm": 1.0842053890228271, + "learning_rate": 9.7098632391669e-06, + "loss": 0.2695, + "step": 6779 + }, + { + "epoch": 0.1357255461301704, + "grad_norm": 1.2502189874649048, + "learning_rate": 9.70975440439327e-06, + "loss": 0.3549, + "step": 6780 + }, + { + "epoch": 0.13574556464729876, + "grad_norm": 1.7485352754592896, + "learning_rate": 9.709645549820847e-06, + "loss": 0.8132, + "step": 6781 + }, + { + "epoch": 0.1357655831644271, + "grad_norm": 1.8940653800964355, + "learning_rate": 9.709536675450087e-06, + "loss": 0.8993, + "step": 6782 + }, + { + "epoch": 0.13578560168155543, + "grad_norm": 1.037412405014038, + "learning_rate": 9.709427781281448e-06, + "loss": 0.3102, + "step": 6783 + }, + { + "epoch": 0.13580562019868378, + "grad_norm": 1.2178394794464111, + "learning_rate": 9.709318867315388e-06, + "loss": 0.3201, + "step": 6784 + }, + { + "epoch": 0.13582563871581213, + "grad_norm": 1.0287821292877197, + "learning_rate": 9.709209933552367e-06, + "loss": 0.3261, + "step": 6785 + }, + { + "epoch": 0.13584565723294048, + "grad_norm": 1.1532042026519775, + "learning_rate": 9.709100979992837e-06, + "loss": 0.3603, + "step": 6786 + }, + { + "epoch": 0.1358656757500688, + "grad_norm": 1.1267855167388916, + "learning_rate": 9.708992006637262e-06, + "loss": 0.3626, + "step": 6787 + }, + { + "epoch": 0.13588569426719715, + "grad_norm": 1.1609238386154175, + "learning_rate": 9.708883013486097e-06, + "loss": 0.324, + "step": 6788 + }, + { + "epoch": 0.1359057127843255, + "grad_norm": 1.1561014652252197, + "learning_rate": 9.708774000539802e-06, + "loss": 0.3103, + "step": 6789 + }, + { + "epoch": 0.13592573130145386, + "grad_norm": 1.0917832851409912, + "learning_rate": 9.708664967798835e-06, + "loss": 0.368, + "step": 6790 + }, + { + "epoch": 0.13594574981858218, + "grad_norm": 1.906381368637085, + "learning_rate": 9.708555915263653e-06, + "loss": 0.7991, + "step": 6791 + }, + { + "epoch": 0.13596576833571053, + "grad_norm": 1.8038350343704224, + "learning_rate": 9.708446842934715e-06, + "loss": 0.8369, + "step": 6792 + }, + { + "epoch": 0.13598578685283888, + "grad_norm": 1.0302793979644775, + "learning_rate": 9.70833775081248e-06, + "loss": 0.3057, + "step": 6793 + }, + { + "epoch": 0.13600580536996723, + "grad_norm": 1.1278808116912842, + "learning_rate": 9.708228638897407e-06, + "loss": 0.398, + "step": 6794 + }, + { + "epoch": 0.13602582388709555, + "grad_norm": 1.275697112083435, + "learning_rate": 9.708119507189954e-06, + "loss": 0.3334, + "step": 6795 + }, + { + "epoch": 0.1360458424042239, + "grad_norm": 1.0484554767608643, + "learning_rate": 9.70801035569058e-06, + "loss": 0.3082, + "step": 6796 + }, + { + "epoch": 0.13606586092135226, + "grad_norm": 1.1476736068725586, + "learning_rate": 9.707901184399742e-06, + "loss": 0.3591, + "step": 6797 + }, + { + "epoch": 0.1360858794384806, + "grad_norm": 1.106402039527893, + "learning_rate": 9.707791993317901e-06, + "loss": 0.3032, + "step": 6798 + }, + { + "epoch": 0.13610589795560893, + "grad_norm": 1.1392631530761719, + "learning_rate": 9.707682782445517e-06, + "loss": 0.3491, + "step": 6799 + }, + { + "epoch": 0.13612591647273728, + "grad_norm": 1.0276297330856323, + "learning_rate": 9.707573551783047e-06, + "loss": 0.3093, + "step": 6800 + }, + { + "epoch": 0.13614593498986563, + "grad_norm": 1.180768609046936, + "learning_rate": 9.707464301330949e-06, + "loss": 0.3066, + "step": 6801 + }, + { + "epoch": 0.13616595350699398, + "grad_norm": 1.025296926498413, + "learning_rate": 9.707355031089687e-06, + "loss": 0.3186, + "step": 6802 + }, + { + "epoch": 0.1361859720241223, + "grad_norm": 1.8550466299057007, + "learning_rate": 9.707245741059717e-06, + "loss": 0.7932, + "step": 6803 + }, + { + "epoch": 0.13620599054125065, + "grad_norm": 1.807600736618042, + "learning_rate": 9.707136431241498e-06, + "loss": 0.8037, + "step": 6804 + }, + { + "epoch": 0.136226009058379, + "grad_norm": 1.1442123651504517, + "learning_rate": 9.70702710163549e-06, + "loss": 0.3334, + "step": 6805 + }, + { + "epoch": 0.13624602757550736, + "grad_norm": 1.061274528503418, + "learning_rate": 9.706917752242153e-06, + "loss": 0.3004, + "step": 6806 + }, + { + "epoch": 0.13626604609263568, + "grad_norm": 1.0564666986465454, + "learning_rate": 9.706808383061948e-06, + "loss": 0.3419, + "step": 6807 + }, + { + "epoch": 0.13628606460976403, + "grad_norm": 1.3174246549606323, + "learning_rate": 9.706698994095334e-06, + "loss": 0.3332, + "step": 6808 + }, + { + "epoch": 0.13630608312689238, + "grad_norm": 1.022478699684143, + "learning_rate": 9.706589585342768e-06, + "loss": 0.3454, + "step": 6809 + }, + { + "epoch": 0.13632610164402073, + "grad_norm": 1.2079474925994873, + "learning_rate": 9.706480156804714e-06, + "loss": 0.3454, + "step": 6810 + }, + { + "epoch": 0.13634612016114905, + "grad_norm": 1.0548207759857178, + "learning_rate": 9.706370708481629e-06, + "loss": 0.3426, + "step": 6811 + }, + { + "epoch": 0.1363661386782774, + "grad_norm": 1.0554134845733643, + "learning_rate": 9.706261240373975e-06, + "loss": 0.336, + "step": 6812 + }, + { + "epoch": 0.13638615719540575, + "grad_norm": 1.7972086668014526, + "learning_rate": 9.706151752482211e-06, + "loss": 0.8462, + "step": 6813 + }, + { + "epoch": 0.1364061757125341, + "grad_norm": 0.9883899092674255, + "learning_rate": 9.706042244806799e-06, + "loss": 0.321, + "step": 6814 + }, + { + "epoch": 0.13642619422966243, + "grad_norm": 1.0032764673233032, + "learning_rate": 9.705932717348197e-06, + "loss": 0.3476, + "step": 6815 + }, + { + "epoch": 0.13644621274679078, + "grad_norm": 1.090331792831421, + "learning_rate": 9.705823170106868e-06, + "loss": 0.3387, + "step": 6816 + }, + { + "epoch": 0.13646623126391913, + "grad_norm": 1.0806944370269775, + "learning_rate": 9.70571360308327e-06, + "loss": 0.3554, + "step": 6817 + }, + { + "epoch": 0.13648624978104748, + "grad_norm": 1.0080631971359253, + "learning_rate": 9.705604016277866e-06, + "loss": 0.309, + "step": 6818 + }, + { + "epoch": 0.1365062682981758, + "grad_norm": 1.1155697107315063, + "learning_rate": 9.705494409691115e-06, + "loss": 0.3117, + "step": 6819 + }, + { + "epoch": 0.13652628681530415, + "grad_norm": 1.0880341529846191, + "learning_rate": 9.705384783323476e-06, + "loss": 0.3183, + "step": 6820 + }, + { + "epoch": 0.1365463053324325, + "grad_norm": 1.0976721048355103, + "learning_rate": 9.705275137175414e-06, + "loss": 0.3234, + "step": 6821 + }, + { + "epoch": 0.13656632384956086, + "grad_norm": 1.1783584356307983, + "learning_rate": 9.705165471247386e-06, + "loss": 0.3736, + "step": 6822 + }, + { + "epoch": 0.13658634236668918, + "grad_norm": 1.0760515928268433, + "learning_rate": 9.705055785539856e-06, + "loss": 0.3684, + "step": 6823 + }, + { + "epoch": 0.13660636088381753, + "grad_norm": 1.084916114807129, + "learning_rate": 9.704946080053285e-06, + "loss": 0.3163, + "step": 6824 + }, + { + "epoch": 0.13662637940094588, + "grad_norm": 1.1016472578048706, + "learning_rate": 9.704836354788131e-06, + "loss": 0.3323, + "step": 6825 + }, + { + "epoch": 0.13664639791807423, + "grad_norm": 1.0239633321762085, + "learning_rate": 9.704726609744861e-06, + "loss": 0.3432, + "step": 6826 + }, + { + "epoch": 0.13666641643520255, + "grad_norm": 1.1488909721374512, + "learning_rate": 9.70461684492393e-06, + "loss": 0.3098, + "step": 6827 + }, + { + "epoch": 0.1366864349523309, + "grad_norm": 1.07908296585083, + "learning_rate": 9.704507060325803e-06, + "loss": 0.3315, + "step": 6828 + }, + { + "epoch": 0.13670645346945925, + "grad_norm": 1.1004350185394287, + "learning_rate": 9.70439725595094e-06, + "loss": 0.3204, + "step": 6829 + }, + { + "epoch": 0.1367264719865876, + "grad_norm": 1.1500353813171387, + "learning_rate": 9.704287431799804e-06, + "loss": 0.3344, + "step": 6830 + }, + { + "epoch": 0.13674649050371593, + "grad_norm": 1.1283326148986816, + "learning_rate": 9.704177587872858e-06, + "loss": 0.3829, + "step": 6831 + }, + { + "epoch": 0.13676650902084428, + "grad_norm": 1.1798287630081177, + "learning_rate": 9.704067724170558e-06, + "loss": 0.328, + "step": 6832 + }, + { + "epoch": 0.13678652753797263, + "grad_norm": 1.1630533933639526, + "learning_rate": 9.703957840693373e-06, + "loss": 0.3784, + "step": 6833 + }, + { + "epoch": 0.13680654605510098, + "grad_norm": 1.1538820266723633, + "learning_rate": 9.70384793744176e-06, + "loss": 0.3538, + "step": 6834 + }, + { + "epoch": 0.1368265645722293, + "grad_norm": 1.2018616199493408, + "learning_rate": 9.703738014416181e-06, + "loss": 0.3372, + "step": 6835 + }, + { + "epoch": 0.13684658308935765, + "grad_norm": 1.2257919311523438, + "learning_rate": 9.703628071617101e-06, + "loss": 0.3477, + "step": 6836 + }, + { + "epoch": 0.136866601606486, + "grad_norm": 1.107135534286499, + "learning_rate": 9.703518109044982e-06, + "loss": 0.3569, + "step": 6837 + }, + { + "epoch": 0.13688662012361436, + "grad_norm": 1.2788028717041016, + "learning_rate": 9.703408126700284e-06, + "loss": 0.3172, + "step": 6838 + }, + { + "epoch": 0.13690663864074268, + "grad_norm": 1.1149821281433105, + "learning_rate": 9.70329812458347e-06, + "loss": 0.2797, + "step": 6839 + }, + { + "epoch": 0.13692665715787103, + "grad_norm": 1.2463610172271729, + "learning_rate": 9.703188102695004e-06, + "loss": 0.3231, + "step": 6840 + }, + { + "epoch": 0.13694667567499938, + "grad_norm": 1.149895191192627, + "learning_rate": 9.703078061035348e-06, + "loss": 0.3481, + "step": 6841 + }, + { + "epoch": 0.13696669419212773, + "grad_norm": 1.0324183702468872, + "learning_rate": 9.70296799960496e-06, + "loss": 0.2598, + "step": 6842 + }, + { + "epoch": 0.13698671270925605, + "grad_norm": 1.0917689800262451, + "learning_rate": 9.70285791840431e-06, + "loss": 0.2753, + "step": 6843 + }, + { + "epoch": 0.1370067312263844, + "grad_norm": 1.988234043121338, + "learning_rate": 9.702747817433857e-06, + "loss": 0.843, + "step": 6844 + }, + { + "epoch": 0.13702674974351275, + "grad_norm": 1.1382176876068115, + "learning_rate": 9.702637696694064e-06, + "loss": 0.3401, + "step": 6845 + }, + { + "epoch": 0.1370467682606411, + "grad_norm": 1.1583616733551025, + "learning_rate": 9.702527556185394e-06, + "loss": 0.295, + "step": 6846 + }, + { + "epoch": 0.13706678677776943, + "grad_norm": 1.144903540611267, + "learning_rate": 9.702417395908311e-06, + "loss": 0.3296, + "step": 6847 + }, + { + "epoch": 0.13708680529489778, + "grad_norm": 1.096016764640808, + "learning_rate": 9.702307215863275e-06, + "loss": 0.3141, + "step": 6848 + }, + { + "epoch": 0.13710682381202613, + "grad_norm": 1.8264421224594116, + "learning_rate": 9.702197016050753e-06, + "loss": 0.9022, + "step": 6849 + }, + { + "epoch": 0.13712684232915448, + "grad_norm": 1.0917634963989258, + "learning_rate": 9.702086796471206e-06, + "loss": 0.3246, + "step": 6850 + }, + { + "epoch": 0.1371468608462828, + "grad_norm": 1.1311746835708618, + "learning_rate": 9.701976557125098e-06, + "loss": 0.3395, + "step": 6851 + }, + { + "epoch": 0.13716687936341115, + "grad_norm": 1.065631628036499, + "learning_rate": 9.701866298012894e-06, + "loss": 0.2979, + "step": 6852 + }, + { + "epoch": 0.1371868978805395, + "grad_norm": 1.183111310005188, + "learning_rate": 9.701756019135054e-06, + "loss": 0.3814, + "step": 6853 + }, + { + "epoch": 0.13720691639766786, + "grad_norm": 1.1029537916183472, + "learning_rate": 9.701645720492044e-06, + "loss": 0.3753, + "step": 6854 + }, + { + "epoch": 0.13722693491479618, + "grad_norm": 1.1108888387680054, + "learning_rate": 9.701535402084329e-06, + "loss": 0.2977, + "step": 6855 + }, + { + "epoch": 0.13724695343192453, + "grad_norm": 1.0276216268539429, + "learning_rate": 9.701425063912368e-06, + "loss": 0.3436, + "step": 6856 + }, + { + "epoch": 0.13726697194905288, + "grad_norm": 2.183756113052368, + "learning_rate": 9.70131470597663e-06, + "loss": 0.8771, + "step": 6857 + }, + { + "epoch": 0.13728699046618123, + "grad_norm": 1.22761070728302, + "learning_rate": 9.701204328277575e-06, + "loss": 0.3312, + "step": 6858 + }, + { + "epoch": 0.13730700898330955, + "grad_norm": 1.0194144248962402, + "learning_rate": 9.70109393081567e-06, + "loss": 0.3238, + "step": 6859 + }, + { + "epoch": 0.1373270275004379, + "grad_norm": 1.0502839088439941, + "learning_rate": 9.700983513591378e-06, + "loss": 0.3438, + "step": 6860 + }, + { + "epoch": 0.13734704601756625, + "grad_norm": 1.2493683099746704, + "learning_rate": 9.700873076605162e-06, + "loss": 0.3428, + "step": 6861 + }, + { + "epoch": 0.1373670645346946, + "grad_norm": 1.580027461051941, + "learning_rate": 9.700762619857489e-06, + "loss": 0.3759, + "step": 6862 + }, + { + "epoch": 0.13738708305182293, + "grad_norm": 1.0477721691131592, + "learning_rate": 9.700652143348819e-06, + "loss": 0.3361, + "step": 6863 + }, + { + "epoch": 0.13740710156895128, + "grad_norm": 1.1208014488220215, + "learning_rate": 9.70054164707962e-06, + "loss": 0.3404, + "step": 6864 + }, + { + "epoch": 0.13742712008607963, + "grad_norm": 1.0179088115692139, + "learning_rate": 9.700431131050357e-06, + "loss": 0.315, + "step": 6865 + }, + { + "epoch": 0.13744713860320798, + "grad_norm": 1.1756985187530518, + "learning_rate": 9.700320595261491e-06, + "loss": 0.3687, + "step": 6866 + }, + { + "epoch": 0.1374671571203363, + "grad_norm": 1.1183675527572632, + "learning_rate": 9.700210039713488e-06, + "loss": 0.3238, + "step": 6867 + }, + { + "epoch": 0.13748717563746465, + "grad_norm": 1.1962755918502808, + "learning_rate": 9.700099464406817e-06, + "loss": 0.3327, + "step": 6868 + }, + { + "epoch": 0.137507194154593, + "grad_norm": 1.123247504234314, + "learning_rate": 9.699988869341936e-06, + "loss": 0.3574, + "step": 6869 + }, + { + "epoch": 0.13752721267172135, + "grad_norm": 1.1507996320724487, + "learning_rate": 9.699878254519316e-06, + "loss": 0.3474, + "step": 6870 + }, + { + "epoch": 0.13754723118884968, + "grad_norm": 1.7935082912445068, + "learning_rate": 9.699767619939417e-06, + "loss": 0.7923, + "step": 6871 + }, + { + "epoch": 0.13756724970597803, + "grad_norm": 1.2961797714233398, + "learning_rate": 9.699656965602709e-06, + "loss": 0.3299, + "step": 6872 + }, + { + "epoch": 0.13758726822310638, + "grad_norm": 1.7860639095306396, + "learning_rate": 9.699546291509652e-06, + "loss": 0.9217, + "step": 6873 + }, + { + "epoch": 0.13760728674023473, + "grad_norm": 1.2317957878112793, + "learning_rate": 9.699435597660713e-06, + "loss": 0.3472, + "step": 6874 + }, + { + "epoch": 0.13762730525736305, + "grad_norm": 1.1547430753707886, + "learning_rate": 9.69932488405636e-06, + "loss": 0.3222, + "step": 6875 + }, + { + "epoch": 0.1376473237744914, + "grad_norm": 1.039678692817688, + "learning_rate": 9.699214150697057e-06, + "loss": 0.3531, + "step": 6876 + }, + { + "epoch": 0.13766734229161975, + "grad_norm": 1.1064952611923218, + "learning_rate": 9.699103397583267e-06, + "loss": 0.2913, + "step": 6877 + }, + { + "epoch": 0.1376873608087481, + "grad_norm": 1.1084628105163574, + "learning_rate": 9.698992624715458e-06, + "loss": 0.3547, + "step": 6878 + }, + { + "epoch": 0.13770737932587643, + "grad_norm": 1.1123253107070923, + "learning_rate": 9.698881832094096e-06, + "loss": 0.3249, + "step": 6879 + }, + { + "epoch": 0.13772739784300478, + "grad_norm": 1.1115162372589111, + "learning_rate": 9.698771019719645e-06, + "loss": 0.3036, + "step": 6880 + }, + { + "epoch": 0.13774741636013313, + "grad_norm": 1.2014553546905518, + "learning_rate": 9.698660187592575e-06, + "loss": 0.3443, + "step": 6881 + }, + { + "epoch": 0.13776743487726148, + "grad_norm": 1.1480379104614258, + "learning_rate": 9.698549335713346e-06, + "loss": 0.3469, + "step": 6882 + }, + { + "epoch": 0.1377874533943898, + "grad_norm": 1.2343831062316895, + "learning_rate": 9.698438464082427e-06, + "loss": 0.3533, + "step": 6883 + }, + { + "epoch": 0.13780747191151815, + "grad_norm": 1.0365945100784302, + "learning_rate": 9.698327572700284e-06, + "loss": 0.3106, + "step": 6884 + }, + { + "epoch": 0.1378274904286465, + "grad_norm": 1.0715489387512207, + "learning_rate": 9.69821666156738e-06, + "loss": 0.2886, + "step": 6885 + }, + { + "epoch": 0.13784750894577485, + "grad_norm": 1.0379022359848022, + "learning_rate": 9.698105730684188e-06, + "loss": 0.3417, + "step": 6886 + }, + { + "epoch": 0.13786752746290318, + "grad_norm": 1.1374130249023438, + "learning_rate": 9.69799478005117e-06, + "loss": 0.3606, + "step": 6887 + }, + { + "epoch": 0.13788754598003153, + "grad_norm": 1.8784011602401733, + "learning_rate": 9.697883809668793e-06, + "loss": 0.8766, + "step": 6888 + }, + { + "epoch": 0.13790756449715988, + "grad_norm": 1.099941611289978, + "learning_rate": 9.697772819537523e-06, + "loss": 0.3141, + "step": 6889 + }, + { + "epoch": 0.13792758301428823, + "grad_norm": 1.0324846506118774, + "learning_rate": 9.697661809657827e-06, + "loss": 0.3223, + "step": 6890 + }, + { + "epoch": 0.13794760153141655, + "grad_norm": 1.0663495063781738, + "learning_rate": 9.697550780030171e-06, + "loss": 0.3106, + "step": 6891 + }, + { + "epoch": 0.1379676200485449, + "grad_norm": 1.1016290187835693, + "learning_rate": 9.697439730655025e-06, + "loss": 0.3287, + "step": 6892 + }, + { + "epoch": 0.13798763856567325, + "grad_norm": 1.0566672086715698, + "learning_rate": 9.69732866153285e-06, + "loss": 0.324, + "step": 6893 + }, + { + "epoch": 0.1380076570828016, + "grad_norm": 1.1295630931854248, + "learning_rate": 9.697217572664118e-06, + "loss": 0.3096, + "step": 6894 + }, + { + "epoch": 0.13802767559992993, + "grad_norm": 1.0306090116500854, + "learning_rate": 9.697106464049294e-06, + "loss": 0.3171, + "step": 6895 + }, + { + "epoch": 0.13804769411705828, + "grad_norm": 1.0821726322174072, + "learning_rate": 9.696995335688846e-06, + "loss": 0.3172, + "step": 6896 + }, + { + "epoch": 0.13806771263418663, + "grad_norm": 1.192863941192627, + "learning_rate": 9.696884187583239e-06, + "loss": 0.3294, + "step": 6897 + }, + { + "epoch": 0.13808773115131498, + "grad_norm": 1.1775108575820923, + "learning_rate": 9.696773019732942e-06, + "loss": 0.3568, + "step": 6898 + }, + { + "epoch": 0.1381077496684433, + "grad_norm": 1.1366995573043823, + "learning_rate": 9.696661832138422e-06, + "loss": 0.321, + "step": 6899 + }, + { + "epoch": 0.13812776818557165, + "grad_norm": 1.0610084533691406, + "learning_rate": 9.696550624800148e-06, + "loss": 0.3314, + "step": 6900 + }, + { + "epoch": 0.1381477867027, + "grad_norm": 1.0835587978363037, + "learning_rate": 9.696439397718583e-06, + "loss": 0.2913, + "step": 6901 + }, + { + "epoch": 0.13816780521982835, + "grad_norm": 1.1499499082565308, + "learning_rate": 9.696328150894199e-06, + "loss": 0.2848, + "step": 6902 + }, + { + "epoch": 0.13818782373695668, + "grad_norm": 1.1122804880142212, + "learning_rate": 9.696216884327462e-06, + "loss": 0.3235, + "step": 6903 + }, + { + "epoch": 0.13820784225408503, + "grad_norm": 1.2483779191970825, + "learning_rate": 9.69610559801884e-06, + "loss": 0.383, + "step": 6904 + }, + { + "epoch": 0.13822786077121338, + "grad_norm": 1.9439197778701782, + "learning_rate": 9.6959942919688e-06, + "loss": 0.8009, + "step": 6905 + }, + { + "epoch": 0.13824787928834173, + "grad_norm": 1.1603366136550903, + "learning_rate": 9.695882966177812e-06, + "loss": 0.3286, + "step": 6906 + }, + { + "epoch": 0.13826789780547005, + "grad_norm": 1.799537181854248, + "learning_rate": 9.695771620646342e-06, + "loss": 0.8083, + "step": 6907 + }, + { + "epoch": 0.1382879163225984, + "grad_norm": 1.0137639045715332, + "learning_rate": 9.695660255374857e-06, + "loss": 0.2724, + "step": 6908 + }, + { + "epoch": 0.13830793483972675, + "grad_norm": 1.119105577468872, + "learning_rate": 9.69554887036383e-06, + "loss": 0.3113, + "step": 6909 + }, + { + "epoch": 0.1383279533568551, + "grad_norm": 1.062191367149353, + "learning_rate": 9.695437465613723e-06, + "loss": 0.3361, + "step": 6910 + }, + { + "epoch": 0.13834797187398343, + "grad_norm": 1.903435468673706, + "learning_rate": 9.695326041125008e-06, + "loss": 0.9057, + "step": 6911 + }, + { + "epoch": 0.13836799039111178, + "grad_norm": 1.109925627708435, + "learning_rate": 9.695214596898153e-06, + "loss": 0.311, + "step": 6912 + }, + { + "epoch": 0.13838800890824013, + "grad_norm": 1.063989520072937, + "learning_rate": 9.695103132933628e-06, + "loss": 0.3469, + "step": 6913 + }, + { + "epoch": 0.13840802742536848, + "grad_norm": 1.133002519607544, + "learning_rate": 9.694991649231897e-06, + "loss": 0.314, + "step": 6914 + }, + { + "epoch": 0.1384280459424968, + "grad_norm": 1.081836223602295, + "learning_rate": 9.694880145793433e-06, + "loss": 0.31, + "step": 6915 + }, + { + "epoch": 0.13844806445962515, + "grad_norm": 1.0072565078735352, + "learning_rate": 9.694768622618705e-06, + "loss": 0.26, + "step": 6916 + }, + { + "epoch": 0.1384680829767535, + "grad_norm": 1.133890151977539, + "learning_rate": 9.694657079708177e-06, + "loss": 0.3273, + "step": 6917 + }, + { + "epoch": 0.13848810149388185, + "grad_norm": 1.0725189447402954, + "learning_rate": 9.694545517062321e-06, + "loss": 0.323, + "step": 6918 + }, + { + "epoch": 0.13850812001101018, + "grad_norm": 1.088349461555481, + "learning_rate": 9.694433934681608e-06, + "loss": 0.3199, + "step": 6919 + }, + { + "epoch": 0.13852813852813853, + "grad_norm": 1.0077892541885376, + "learning_rate": 9.694322332566504e-06, + "loss": 0.3316, + "step": 6920 + }, + { + "epoch": 0.13854815704526688, + "grad_norm": 1.2367048263549805, + "learning_rate": 9.694210710717479e-06, + "loss": 0.2911, + "step": 6921 + }, + { + "epoch": 0.13856817556239523, + "grad_norm": 1.1938735246658325, + "learning_rate": 9.694099069135002e-06, + "loss": 0.3569, + "step": 6922 + }, + { + "epoch": 0.13858819407952355, + "grad_norm": 1.2206465005874634, + "learning_rate": 9.693987407819544e-06, + "loss": 0.3409, + "step": 6923 + }, + { + "epoch": 0.1386082125966519, + "grad_norm": 1.1686705350875854, + "learning_rate": 9.693875726771572e-06, + "loss": 0.3384, + "step": 6924 + }, + { + "epoch": 0.13862823111378025, + "grad_norm": 1.0326236486434937, + "learning_rate": 9.693764025991557e-06, + "loss": 0.3426, + "step": 6925 + }, + { + "epoch": 0.13864824963090858, + "grad_norm": 2.0804824829101562, + "learning_rate": 9.693652305479968e-06, + "loss": 0.8891, + "step": 6926 + }, + { + "epoch": 0.13866826814803693, + "grad_norm": 1.1866555213928223, + "learning_rate": 9.693540565237275e-06, + "loss": 0.3282, + "step": 6927 + }, + { + "epoch": 0.13868828666516528, + "grad_norm": 1.0381542444229126, + "learning_rate": 9.693428805263948e-06, + "loss": 0.3428, + "step": 6928 + }, + { + "epoch": 0.13870830518229363, + "grad_norm": 2.0176408290863037, + "learning_rate": 9.693317025560455e-06, + "loss": 0.8235, + "step": 6929 + }, + { + "epoch": 0.13872832369942195, + "grad_norm": 1.0486245155334473, + "learning_rate": 9.693205226127267e-06, + "loss": 0.3424, + "step": 6930 + }, + { + "epoch": 0.1387483422165503, + "grad_norm": 1.033315658569336, + "learning_rate": 9.693093406964855e-06, + "loss": 0.3033, + "step": 6931 + }, + { + "epoch": 0.13876836073367865, + "grad_norm": 1.1383594274520874, + "learning_rate": 9.69298156807369e-06, + "loss": 0.3483, + "step": 6932 + }, + { + "epoch": 0.138788379250807, + "grad_norm": 1.0908787250518799, + "learning_rate": 9.692869709454238e-06, + "loss": 0.3507, + "step": 6933 + }, + { + "epoch": 0.13880839776793533, + "grad_norm": 1.051340103149414, + "learning_rate": 9.692757831106973e-06, + "loss": 0.2974, + "step": 6934 + }, + { + "epoch": 0.13882841628506368, + "grad_norm": 1.1065285205841064, + "learning_rate": 9.692645933032362e-06, + "loss": 0.3148, + "step": 6935 + }, + { + "epoch": 0.13884843480219203, + "grad_norm": 1.0391744375228882, + "learning_rate": 9.69253401523088e-06, + "loss": 0.2707, + "step": 6936 + }, + { + "epoch": 0.13886845331932038, + "grad_norm": 1.122929573059082, + "learning_rate": 9.692422077702995e-06, + "loss": 0.3389, + "step": 6937 + }, + { + "epoch": 0.1388884718364487, + "grad_norm": 1.0802719593048096, + "learning_rate": 9.692310120449175e-06, + "loss": 0.3575, + "step": 6938 + }, + { + "epoch": 0.13890849035357705, + "grad_norm": 1.0543098449707031, + "learning_rate": 9.692198143469893e-06, + "loss": 0.3339, + "step": 6939 + }, + { + "epoch": 0.1389285088707054, + "grad_norm": 1.1648740768432617, + "learning_rate": 9.692086146765622e-06, + "loss": 0.2911, + "step": 6940 + }, + { + "epoch": 0.13894852738783375, + "grad_norm": 1.1017965078353882, + "learning_rate": 9.691974130336829e-06, + "loss": 0.3579, + "step": 6941 + }, + { + "epoch": 0.13896854590496208, + "grad_norm": 1.2398887872695923, + "learning_rate": 9.691862094183988e-06, + "loss": 0.2975, + "step": 6942 + }, + { + "epoch": 0.13898856442209043, + "grad_norm": 1.1847478151321411, + "learning_rate": 9.691750038307566e-06, + "loss": 0.3569, + "step": 6943 + }, + { + "epoch": 0.13900858293921878, + "grad_norm": 1.0916811227798462, + "learning_rate": 9.691637962708037e-06, + "loss": 0.3121, + "step": 6944 + }, + { + "epoch": 0.13902860145634713, + "grad_norm": 1.1228249073028564, + "learning_rate": 9.691525867385874e-06, + "loss": 0.3789, + "step": 6945 + }, + { + "epoch": 0.13904861997347545, + "grad_norm": 1.8276885747909546, + "learning_rate": 9.691413752341546e-06, + "loss": 0.7893, + "step": 6946 + }, + { + "epoch": 0.1390686384906038, + "grad_norm": 1.0798879861831665, + "learning_rate": 9.691301617575521e-06, + "loss": 0.3255, + "step": 6947 + }, + { + "epoch": 0.13908865700773215, + "grad_norm": 1.0247958898544312, + "learning_rate": 9.691189463088275e-06, + "loss": 0.3287, + "step": 6948 + }, + { + "epoch": 0.1391086755248605, + "grad_norm": 1.1315659284591675, + "learning_rate": 9.69107728888028e-06, + "loss": 0.3612, + "step": 6949 + }, + { + "epoch": 0.13912869404198883, + "grad_norm": 1.1262439489364624, + "learning_rate": 9.690965094952002e-06, + "loss": 0.3086, + "step": 6950 + }, + { + "epoch": 0.13914871255911718, + "grad_norm": 1.985521674156189, + "learning_rate": 9.690852881303918e-06, + "loss": 0.8335, + "step": 6951 + }, + { + "epoch": 0.13916873107624553, + "grad_norm": 1.1526981592178345, + "learning_rate": 9.690740647936499e-06, + "loss": 0.3857, + "step": 6952 + }, + { + "epoch": 0.13918874959337388, + "grad_norm": 1.0678625106811523, + "learning_rate": 9.690628394850214e-06, + "loss": 0.3281, + "step": 6953 + }, + { + "epoch": 0.1392087681105022, + "grad_norm": 1.1814990043640137, + "learning_rate": 9.690516122045539e-06, + "loss": 0.3129, + "step": 6954 + }, + { + "epoch": 0.13922878662763055, + "grad_norm": 1.0760489702224731, + "learning_rate": 9.690403829522943e-06, + "loss": 0.3326, + "step": 6955 + }, + { + "epoch": 0.1392488051447589, + "grad_norm": 1.9126065969467163, + "learning_rate": 9.690291517282897e-06, + "loss": 0.8588, + "step": 6956 + }, + { + "epoch": 0.13926882366188725, + "grad_norm": 1.163390874862671, + "learning_rate": 9.690179185325876e-06, + "loss": 0.3769, + "step": 6957 + }, + { + "epoch": 0.13928884217901558, + "grad_norm": 1.1557910442352295, + "learning_rate": 9.69006683365235e-06, + "loss": 0.34, + "step": 6958 + }, + { + "epoch": 0.13930886069614393, + "grad_norm": 1.2405563592910767, + "learning_rate": 9.689954462262794e-06, + "loss": 0.4028, + "step": 6959 + }, + { + "epoch": 0.13932887921327228, + "grad_norm": 1.0492080450057983, + "learning_rate": 9.689842071157678e-06, + "loss": 0.2891, + "step": 6960 + }, + { + "epoch": 0.13934889773040063, + "grad_norm": 1.0765935182571411, + "learning_rate": 9.689729660337474e-06, + "loss": 0.2905, + "step": 6961 + }, + { + "epoch": 0.13936891624752895, + "grad_norm": 1.0152970552444458, + "learning_rate": 9.689617229802657e-06, + "loss": 0.2955, + "step": 6962 + }, + { + "epoch": 0.1393889347646573, + "grad_norm": 1.0990002155303955, + "learning_rate": 9.689504779553698e-06, + "loss": 0.3064, + "step": 6963 + }, + { + "epoch": 0.13940895328178565, + "grad_norm": 1.1042711734771729, + "learning_rate": 9.68939230959107e-06, + "loss": 0.3575, + "step": 6964 + }, + { + "epoch": 0.139428971798914, + "grad_norm": 1.0860389471054077, + "learning_rate": 9.689279819915247e-06, + "loss": 0.2731, + "step": 6965 + }, + { + "epoch": 0.13944899031604233, + "grad_norm": 1.1669923067092896, + "learning_rate": 9.6891673105267e-06, + "loss": 0.3327, + "step": 6966 + }, + { + "epoch": 0.13946900883317068, + "grad_norm": 1.0185400247573853, + "learning_rate": 9.689054781425901e-06, + "loss": 0.3026, + "step": 6967 + }, + { + "epoch": 0.13948902735029903, + "grad_norm": 1.083240270614624, + "learning_rate": 9.688942232613328e-06, + "loss": 0.3197, + "step": 6968 + }, + { + "epoch": 0.13950904586742738, + "grad_norm": 1.105974555015564, + "learning_rate": 9.68882966408945e-06, + "loss": 0.2953, + "step": 6969 + }, + { + "epoch": 0.1395290643845557, + "grad_norm": 1.4820740222930908, + "learning_rate": 9.688717075854739e-06, + "loss": 0.3167, + "step": 6970 + }, + { + "epoch": 0.13954908290168405, + "grad_norm": 1.0406438112258911, + "learning_rate": 9.688604467909672e-06, + "loss": 0.2868, + "step": 6971 + }, + { + "epoch": 0.1395691014188124, + "grad_norm": 1.9887216091156006, + "learning_rate": 9.688491840254721e-06, + "loss": 0.8841, + "step": 6972 + }, + { + "epoch": 0.13958911993594075, + "grad_norm": 1.0042381286621094, + "learning_rate": 9.688379192890359e-06, + "loss": 0.39, + "step": 6973 + }, + { + "epoch": 0.13960913845306908, + "grad_norm": 1.0784459114074707, + "learning_rate": 9.688266525817061e-06, + "loss": 0.3276, + "step": 6974 + }, + { + "epoch": 0.13962915697019743, + "grad_norm": 1.0142782926559448, + "learning_rate": 9.688153839035297e-06, + "loss": 0.3036, + "step": 6975 + }, + { + "epoch": 0.13964917548732578, + "grad_norm": 1.9032169580459595, + "learning_rate": 9.688041132545545e-06, + "loss": 0.8394, + "step": 6976 + }, + { + "epoch": 0.13966919400445413, + "grad_norm": 1.293859601020813, + "learning_rate": 9.687928406348277e-06, + "loss": 0.3514, + "step": 6977 + }, + { + "epoch": 0.13968921252158245, + "grad_norm": 1.0764647722244263, + "learning_rate": 9.687815660443966e-06, + "loss": 0.3177, + "step": 6978 + }, + { + "epoch": 0.1397092310387108, + "grad_norm": 1.9531248807907104, + "learning_rate": 9.687702894833086e-06, + "loss": 0.8336, + "step": 6979 + }, + { + "epoch": 0.13972924955583915, + "grad_norm": 1.1172585487365723, + "learning_rate": 9.687590109516115e-06, + "loss": 0.3892, + "step": 6980 + }, + { + "epoch": 0.1397492680729675, + "grad_norm": 1.7794188261032104, + "learning_rate": 9.687477304493522e-06, + "loss": 0.8417, + "step": 6981 + }, + { + "epoch": 0.13976928659009583, + "grad_norm": 1.058469533920288, + "learning_rate": 9.687364479765782e-06, + "loss": 0.2898, + "step": 6982 + }, + { + "epoch": 0.13978930510722418, + "grad_norm": 1.9453635215759277, + "learning_rate": 9.687251635333373e-06, + "loss": 0.8547, + "step": 6983 + }, + { + "epoch": 0.13980932362435253, + "grad_norm": 1.1488869190216064, + "learning_rate": 9.687138771196764e-06, + "loss": 0.3102, + "step": 6984 + }, + { + "epoch": 0.13982934214148088, + "grad_norm": 1.0712130069732666, + "learning_rate": 9.687025887356434e-06, + "loss": 0.3518, + "step": 6985 + }, + { + "epoch": 0.1398493606586092, + "grad_norm": 1.1032907962799072, + "learning_rate": 9.686912983812856e-06, + "loss": 0.3635, + "step": 6986 + }, + { + "epoch": 0.13986937917573755, + "grad_norm": 1.3364216089248657, + "learning_rate": 9.686800060566503e-06, + "loss": 0.3856, + "step": 6987 + }, + { + "epoch": 0.1398893976928659, + "grad_norm": 1.059266209602356, + "learning_rate": 9.686687117617853e-06, + "loss": 0.3333, + "step": 6988 + }, + { + "epoch": 0.13990941620999425, + "grad_norm": 1.2046605348587036, + "learning_rate": 9.686574154967377e-06, + "loss": 0.3436, + "step": 6989 + }, + { + "epoch": 0.13992943472712258, + "grad_norm": 1.768189787864685, + "learning_rate": 9.686461172615553e-06, + "loss": 0.807, + "step": 6990 + }, + { + "epoch": 0.13994945324425093, + "grad_norm": 1.2851632833480835, + "learning_rate": 9.686348170562854e-06, + "loss": 0.3563, + "step": 6991 + }, + { + "epoch": 0.13996947176137928, + "grad_norm": 1.06267511844635, + "learning_rate": 9.686235148809756e-06, + "loss": 0.3258, + "step": 6992 + }, + { + "epoch": 0.13998949027850763, + "grad_norm": 1.9270027875900269, + "learning_rate": 9.686122107356734e-06, + "loss": 0.784, + "step": 6993 + }, + { + "epoch": 0.14000950879563595, + "grad_norm": 1.6242198944091797, + "learning_rate": 9.686009046204266e-06, + "loss": 0.3526, + "step": 6994 + }, + { + "epoch": 0.1400295273127643, + "grad_norm": 1.2584102153778076, + "learning_rate": 9.685895965352821e-06, + "loss": 0.3633, + "step": 6995 + }, + { + "epoch": 0.14004954582989265, + "grad_norm": 1.1217372417449951, + "learning_rate": 9.685782864802879e-06, + "loss": 0.3288, + "step": 6996 + }, + { + "epoch": 0.140069564347021, + "grad_norm": 1.101611852645874, + "learning_rate": 9.685669744554913e-06, + "loss": 0.3417, + "step": 6997 + }, + { + "epoch": 0.14008958286414933, + "grad_norm": 1.2135757207870483, + "learning_rate": 9.685556604609401e-06, + "loss": 0.3159, + "step": 6998 + }, + { + "epoch": 0.14010960138127768, + "grad_norm": 1.1524466276168823, + "learning_rate": 9.685443444966817e-06, + "loss": 0.3324, + "step": 6999 + }, + { + "epoch": 0.14012961989840603, + "grad_norm": 1.0976437330245972, + "learning_rate": 9.685330265627637e-06, + "loss": 0.3025, + "step": 7000 + }, + { + "epoch": 0.14014963841553438, + "grad_norm": 1.0283803939819336, + "learning_rate": 9.685217066592336e-06, + "loss": 0.3048, + "step": 7001 + }, + { + "epoch": 0.1401696569326627, + "grad_norm": 1.1021077632904053, + "learning_rate": 9.685103847861393e-06, + "loss": 0.3517, + "step": 7002 + }, + { + "epoch": 0.14018967544979105, + "grad_norm": 1.210026502609253, + "learning_rate": 9.68499060943528e-06, + "loss": 0.3495, + "step": 7003 + }, + { + "epoch": 0.1402096939669194, + "grad_norm": 1.25302255153656, + "learning_rate": 9.684877351314475e-06, + "loss": 0.3877, + "step": 7004 + }, + { + "epoch": 0.14022971248404775, + "grad_norm": 1.085310459136963, + "learning_rate": 9.684764073499453e-06, + "loss": 0.3107, + "step": 7005 + }, + { + "epoch": 0.14024973100117608, + "grad_norm": 1.207401156425476, + "learning_rate": 9.684650775990692e-06, + "loss": 0.3068, + "step": 7006 + }, + { + "epoch": 0.14026974951830443, + "grad_norm": 1.0921655893325806, + "learning_rate": 9.684537458788667e-06, + "loss": 0.3411, + "step": 7007 + }, + { + "epoch": 0.14028976803543278, + "grad_norm": 1.093906044960022, + "learning_rate": 9.684424121893856e-06, + "loss": 0.3229, + "step": 7008 + }, + { + "epoch": 0.14030978655256113, + "grad_norm": 1.151693344116211, + "learning_rate": 9.684310765306733e-06, + "loss": 0.3533, + "step": 7009 + }, + { + "epoch": 0.14032980506968945, + "grad_norm": 1.095555305480957, + "learning_rate": 9.684197389027773e-06, + "loss": 0.3734, + "step": 7010 + }, + { + "epoch": 0.1403498235868178, + "grad_norm": 1.1341652870178223, + "learning_rate": 9.684083993057461e-06, + "loss": 0.3223, + "step": 7011 + }, + { + "epoch": 0.14036984210394615, + "grad_norm": 1.1185798645019531, + "learning_rate": 9.683970577396263e-06, + "loss": 0.2911, + "step": 7012 + }, + { + "epoch": 0.1403898606210745, + "grad_norm": 1.2830544710159302, + "learning_rate": 9.683857142044662e-06, + "loss": 0.3563, + "step": 7013 + }, + { + "epoch": 0.14040987913820283, + "grad_norm": 1.7663716077804565, + "learning_rate": 9.683743687003134e-06, + "loss": 0.858, + "step": 7014 + }, + { + "epoch": 0.14042989765533118, + "grad_norm": 1.085105538368225, + "learning_rate": 9.683630212272156e-06, + "loss": 0.2876, + "step": 7015 + }, + { + "epoch": 0.14044991617245953, + "grad_norm": 1.162183165550232, + "learning_rate": 9.683516717852203e-06, + "loss": 0.3269, + "step": 7016 + }, + { + "epoch": 0.14046993468958788, + "grad_norm": 1.192633867263794, + "learning_rate": 9.683403203743756e-06, + "loss": 0.3663, + "step": 7017 + }, + { + "epoch": 0.1404899532067162, + "grad_norm": 1.7739461660385132, + "learning_rate": 9.683289669947287e-06, + "loss": 0.8181, + "step": 7018 + }, + { + "epoch": 0.14050997172384455, + "grad_norm": 1.088566780090332, + "learning_rate": 9.683176116463278e-06, + "loss": 0.3205, + "step": 7019 + }, + { + "epoch": 0.1405299902409729, + "grad_norm": 1.0896488428115845, + "learning_rate": 9.683062543292202e-06, + "loss": 0.3257, + "step": 7020 + }, + { + "epoch": 0.14055000875810125, + "grad_norm": 1.3241270780563354, + "learning_rate": 9.682948950434543e-06, + "loss": 0.3188, + "step": 7021 + }, + { + "epoch": 0.14057002727522958, + "grad_norm": 1.1208970546722412, + "learning_rate": 9.682835337890772e-06, + "loss": 0.3284, + "step": 7022 + }, + { + "epoch": 0.14059004579235793, + "grad_norm": 1.1158692836761475, + "learning_rate": 9.682721705661368e-06, + "loss": 0.3147, + "step": 7023 + }, + { + "epoch": 0.14061006430948628, + "grad_norm": 1.154223084449768, + "learning_rate": 9.682608053746812e-06, + "loss": 0.3162, + "step": 7024 + }, + { + "epoch": 0.14063008282661463, + "grad_norm": 1.0560215711593628, + "learning_rate": 9.682494382147579e-06, + "loss": 0.3031, + "step": 7025 + }, + { + "epoch": 0.14065010134374295, + "grad_norm": 1.3336336612701416, + "learning_rate": 9.682380690864145e-06, + "loss": 0.3292, + "step": 7026 + }, + { + "epoch": 0.1406701198608713, + "grad_norm": 1.19513738155365, + "learning_rate": 9.682266979896993e-06, + "loss": 0.3035, + "step": 7027 + }, + { + "epoch": 0.14069013837799965, + "grad_norm": 1.7809897661209106, + "learning_rate": 9.682153249246597e-06, + "loss": 0.8892, + "step": 7028 + }, + { + "epoch": 0.140710156895128, + "grad_norm": 1.1812111139297485, + "learning_rate": 9.682039498913436e-06, + "loss": 0.3104, + "step": 7029 + }, + { + "epoch": 0.14073017541225633, + "grad_norm": 1.11698579788208, + "learning_rate": 9.681925728897988e-06, + "loss": 0.3276, + "step": 7030 + }, + { + "epoch": 0.14075019392938468, + "grad_norm": 1.7379623651504517, + "learning_rate": 9.681811939200735e-06, + "loss": 0.4115, + "step": 7031 + }, + { + "epoch": 0.14077021244651303, + "grad_norm": 1.2405322790145874, + "learning_rate": 9.681698129822148e-06, + "loss": 0.3064, + "step": 7032 + }, + { + "epoch": 0.14079023096364138, + "grad_norm": 1.0014032125473022, + "learning_rate": 9.681584300762713e-06, + "loss": 0.3046, + "step": 7033 + }, + { + "epoch": 0.1408102494807697, + "grad_norm": 1.0808733701705933, + "learning_rate": 9.681470452022902e-06, + "loss": 0.3211, + "step": 7034 + }, + { + "epoch": 0.14083026799789805, + "grad_norm": 1.095973253250122, + "learning_rate": 9.681356583603198e-06, + "loss": 0.3008, + "step": 7035 + }, + { + "epoch": 0.1408502865150264, + "grad_norm": 1.170365333557129, + "learning_rate": 9.681242695504079e-06, + "loss": 0.3724, + "step": 7036 + }, + { + "epoch": 0.14087030503215475, + "grad_norm": 1.188308835029602, + "learning_rate": 9.681128787726022e-06, + "loss": 0.376, + "step": 7037 + }, + { + "epoch": 0.14089032354928308, + "grad_norm": 1.061743974685669, + "learning_rate": 9.681014860269508e-06, + "loss": 0.3269, + "step": 7038 + }, + { + "epoch": 0.14091034206641143, + "grad_norm": 1.6284314393997192, + "learning_rate": 9.680900913135015e-06, + "loss": 0.884, + "step": 7039 + }, + { + "epoch": 0.14093036058353978, + "grad_norm": 1.0180343389511108, + "learning_rate": 9.680786946323022e-06, + "loss": 0.3273, + "step": 7040 + }, + { + "epoch": 0.14095037910066813, + "grad_norm": 0.990260899066925, + "learning_rate": 9.680672959834006e-06, + "loss": 0.313, + "step": 7041 + }, + { + "epoch": 0.14097039761779645, + "grad_norm": 0.9656175971031189, + "learning_rate": 9.680558953668451e-06, + "loss": 0.2795, + "step": 7042 + }, + { + "epoch": 0.1409904161349248, + "grad_norm": 1.054056167602539, + "learning_rate": 9.680444927826831e-06, + "loss": 0.3484, + "step": 7043 + }, + { + "epoch": 0.14101043465205315, + "grad_norm": 1.0905691385269165, + "learning_rate": 9.68033088230963e-06, + "loss": 0.34, + "step": 7044 + }, + { + "epoch": 0.1410304531691815, + "grad_norm": 1.1307487487792969, + "learning_rate": 9.680216817117322e-06, + "loss": 0.3725, + "step": 7045 + }, + { + "epoch": 0.14105047168630983, + "grad_norm": 1.0039288997650146, + "learning_rate": 9.680102732250392e-06, + "loss": 0.3336, + "step": 7046 + }, + { + "epoch": 0.14107049020343818, + "grad_norm": 0.9584987759590149, + "learning_rate": 9.679988627709318e-06, + "loss": 0.325, + "step": 7047 + }, + { + "epoch": 0.14109050872056653, + "grad_norm": 1.341261625289917, + "learning_rate": 9.679874503494577e-06, + "loss": 0.3677, + "step": 7048 + }, + { + "epoch": 0.14111052723769488, + "grad_norm": 1.08066725730896, + "learning_rate": 9.679760359606653e-06, + "loss": 0.3562, + "step": 7049 + }, + { + "epoch": 0.1411305457548232, + "grad_norm": 1.1578668355941772, + "learning_rate": 9.679646196046022e-06, + "loss": 0.3198, + "step": 7050 + }, + { + "epoch": 0.14115056427195155, + "grad_norm": 1.2342262268066406, + "learning_rate": 9.679532012813166e-06, + "loss": 0.3156, + "step": 7051 + }, + { + "epoch": 0.1411705827890799, + "grad_norm": 1.1139438152313232, + "learning_rate": 9.679417809908563e-06, + "loss": 0.3441, + "step": 7052 + }, + { + "epoch": 0.14119060130620825, + "grad_norm": 1.037419080734253, + "learning_rate": 9.679303587332695e-06, + "loss": 0.2986, + "step": 7053 + }, + { + "epoch": 0.14121061982333658, + "grad_norm": 1.149057388305664, + "learning_rate": 9.679189345086044e-06, + "loss": 0.3682, + "step": 7054 + }, + { + "epoch": 0.14123063834046493, + "grad_norm": 1.0603246688842773, + "learning_rate": 9.679075083169084e-06, + "loss": 0.3161, + "step": 7055 + }, + { + "epoch": 0.14125065685759328, + "grad_norm": 1.1062062978744507, + "learning_rate": 9.678960801582303e-06, + "loss": 0.3232, + "step": 7056 + }, + { + "epoch": 0.14127067537472163, + "grad_norm": 1.0866256952285767, + "learning_rate": 9.678846500326177e-06, + "loss": 0.3213, + "step": 7057 + }, + { + "epoch": 0.14129069389184995, + "grad_norm": 1.08173668384552, + "learning_rate": 9.678732179401187e-06, + "loss": 0.3524, + "step": 7058 + }, + { + "epoch": 0.1413107124089783, + "grad_norm": 1.1640293598175049, + "learning_rate": 9.678617838807814e-06, + "loss": 0.3244, + "step": 7059 + }, + { + "epoch": 0.14133073092610665, + "grad_norm": 1.0630388259887695, + "learning_rate": 9.678503478546538e-06, + "loss": 0.3202, + "step": 7060 + }, + { + "epoch": 0.141350749443235, + "grad_norm": 1.0260555744171143, + "learning_rate": 9.678389098617839e-06, + "loss": 0.3271, + "step": 7061 + }, + { + "epoch": 0.14137076796036332, + "grad_norm": 1.0939161777496338, + "learning_rate": 9.678274699022199e-06, + "loss": 0.3081, + "step": 7062 + }, + { + "epoch": 0.14139078647749168, + "grad_norm": 1.1355197429656982, + "learning_rate": 9.6781602797601e-06, + "loss": 0.3016, + "step": 7063 + }, + { + "epoch": 0.14141080499462003, + "grad_norm": 1.0778489112854004, + "learning_rate": 9.678045840832022e-06, + "loss": 0.2905, + "step": 7064 + }, + { + "epoch": 0.14143082351174838, + "grad_norm": 1.0670050382614136, + "learning_rate": 9.677931382238446e-06, + "loss": 0.3239, + "step": 7065 + }, + { + "epoch": 0.1414508420288767, + "grad_norm": 0.9740689396858215, + "learning_rate": 9.677816903979853e-06, + "loss": 0.3477, + "step": 7066 + }, + { + "epoch": 0.14147086054600505, + "grad_norm": 1.0985506772994995, + "learning_rate": 9.677702406056725e-06, + "loss": 0.2949, + "step": 7067 + }, + { + "epoch": 0.1414908790631334, + "grad_norm": 1.02018404006958, + "learning_rate": 9.677587888469541e-06, + "loss": 0.3316, + "step": 7068 + }, + { + "epoch": 0.14151089758026175, + "grad_norm": 1.143438458442688, + "learning_rate": 9.677473351218787e-06, + "loss": 0.3494, + "step": 7069 + }, + { + "epoch": 0.14153091609739007, + "grad_norm": 1.7967884540557861, + "learning_rate": 9.67735879430494e-06, + "loss": 0.8764, + "step": 7070 + }, + { + "epoch": 0.14155093461451843, + "grad_norm": 1.036461353302002, + "learning_rate": 9.677244217728482e-06, + "loss": 0.3455, + "step": 7071 + }, + { + "epoch": 0.14157095313164678, + "grad_norm": 1.8144116401672363, + "learning_rate": 9.677129621489898e-06, + "loss": 0.8337, + "step": 7072 + }, + { + "epoch": 0.14159097164877513, + "grad_norm": 1.3293614387512207, + "learning_rate": 9.677015005589667e-06, + "loss": 0.357, + "step": 7073 + }, + { + "epoch": 0.14161099016590345, + "grad_norm": 1.2701411247253418, + "learning_rate": 9.67690037002827e-06, + "loss": 0.3781, + "step": 7074 + }, + { + "epoch": 0.1416310086830318, + "grad_norm": 1.0917173624038696, + "learning_rate": 9.676785714806192e-06, + "loss": 0.3182, + "step": 7075 + }, + { + "epoch": 0.14165102720016015, + "grad_norm": 0.9894595146179199, + "learning_rate": 9.676671039923913e-06, + "loss": 0.3262, + "step": 7076 + }, + { + "epoch": 0.1416710457172885, + "grad_norm": 1.217036485671997, + "learning_rate": 9.676556345381915e-06, + "loss": 0.3513, + "step": 7077 + }, + { + "epoch": 0.14169106423441682, + "grad_norm": 0.9813264608383179, + "learning_rate": 9.676441631180682e-06, + "loss": 0.3023, + "step": 7078 + }, + { + "epoch": 0.14171108275154518, + "grad_norm": 1.1394871473312378, + "learning_rate": 9.676326897320692e-06, + "loss": 0.3296, + "step": 7079 + }, + { + "epoch": 0.14173110126867353, + "grad_norm": 1.7813880443572998, + "learning_rate": 9.676212143802432e-06, + "loss": 0.8277, + "step": 7080 + }, + { + "epoch": 0.14175111978580188, + "grad_norm": 1.373026728630066, + "learning_rate": 9.676097370626382e-06, + "loss": 0.3262, + "step": 7081 + }, + { + "epoch": 0.1417711383029302, + "grad_norm": 1.2102168798446655, + "learning_rate": 9.675982577793024e-06, + "loss": 0.3173, + "step": 7082 + }, + { + "epoch": 0.14179115682005855, + "grad_norm": 1.200320839881897, + "learning_rate": 9.675867765302843e-06, + "loss": 0.2994, + "step": 7083 + }, + { + "epoch": 0.1418111753371869, + "grad_norm": 1.0901074409484863, + "learning_rate": 9.67575293315632e-06, + "loss": 0.3115, + "step": 7084 + }, + { + "epoch": 0.14183119385431525, + "grad_norm": 1.2408546209335327, + "learning_rate": 9.675638081353937e-06, + "loss": 0.3031, + "step": 7085 + }, + { + "epoch": 0.14185121237144357, + "grad_norm": 1.2233760356903076, + "learning_rate": 9.675523209896178e-06, + "loss": 0.3422, + "step": 7086 + }, + { + "epoch": 0.14187123088857193, + "grad_norm": 1.1859766244888306, + "learning_rate": 9.675408318783527e-06, + "loss": 0.3622, + "step": 7087 + }, + { + "epoch": 0.14189124940570028, + "grad_norm": 1.2336907386779785, + "learning_rate": 9.675293408016465e-06, + "loss": 0.3393, + "step": 7088 + }, + { + "epoch": 0.14191126792282863, + "grad_norm": 1.1379334926605225, + "learning_rate": 9.675178477595475e-06, + "loss": 0.3156, + "step": 7089 + }, + { + "epoch": 0.14193128643995695, + "grad_norm": 1.0242695808410645, + "learning_rate": 9.67506352752104e-06, + "loss": 0.3364, + "step": 7090 + }, + { + "epoch": 0.1419513049570853, + "grad_norm": 2.0966062545776367, + "learning_rate": 9.674948557793645e-06, + "loss": 0.771, + "step": 7091 + }, + { + "epoch": 0.14197132347421365, + "grad_norm": 1.0858396291732788, + "learning_rate": 9.674833568413773e-06, + "loss": 0.3542, + "step": 7092 + }, + { + "epoch": 0.141991341991342, + "grad_norm": 1.256247878074646, + "learning_rate": 9.674718559381905e-06, + "loss": 0.391, + "step": 7093 + }, + { + "epoch": 0.14201136050847032, + "grad_norm": 1.067842960357666, + "learning_rate": 9.674603530698527e-06, + "loss": 0.3048, + "step": 7094 + }, + { + "epoch": 0.14203137902559868, + "grad_norm": 1.0380351543426514, + "learning_rate": 9.674488482364122e-06, + "loss": 0.3366, + "step": 7095 + }, + { + "epoch": 0.14205139754272703, + "grad_norm": 0.9593302607536316, + "learning_rate": 9.674373414379173e-06, + "loss": 0.2909, + "step": 7096 + }, + { + "epoch": 0.14207141605985538, + "grad_norm": 1.1220929622650146, + "learning_rate": 9.674258326744166e-06, + "loss": 0.3804, + "step": 7097 + }, + { + "epoch": 0.1420914345769837, + "grad_norm": 1.0505815744400024, + "learning_rate": 9.67414321945958e-06, + "loss": 0.3554, + "step": 7098 + }, + { + "epoch": 0.14211145309411205, + "grad_norm": 1.1681469678878784, + "learning_rate": 9.674028092525904e-06, + "loss": 0.3386, + "step": 7099 + }, + { + "epoch": 0.1421314716112404, + "grad_norm": 1.1784014701843262, + "learning_rate": 9.673912945943617e-06, + "loss": 0.31, + "step": 7100 + }, + { + "epoch": 0.14215149012836875, + "grad_norm": 2.00223970413208, + "learning_rate": 9.673797779713209e-06, + "loss": 0.8283, + "step": 7101 + }, + { + "epoch": 0.14217150864549707, + "grad_norm": 0.9832313060760498, + "learning_rate": 9.67368259383516e-06, + "loss": 0.3156, + "step": 7102 + }, + { + "epoch": 0.14219152716262543, + "grad_norm": 1.0402039289474487, + "learning_rate": 9.673567388309954e-06, + "loss": 0.3126, + "step": 7103 + }, + { + "epoch": 0.14221154567975378, + "grad_norm": 1.0192680358886719, + "learning_rate": 9.673452163138077e-06, + "loss": 0.2884, + "step": 7104 + }, + { + "epoch": 0.14223156419688213, + "grad_norm": 1.1906071901321411, + "learning_rate": 9.673336918320014e-06, + "loss": 0.3443, + "step": 7105 + }, + { + "epoch": 0.14225158271401045, + "grad_norm": 1.0549646615982056, + "learning_rate": 9.673221653856247e-06, + "loss": 0.3769, + "step": 7106 + }, + { + "epoch": 0.1422716012311388, + "grad_norm": 1.2446188926696777, + "learning_rate": 9.673106369747261e-06, + "loss": 0.3482, + "step": 7107 + }, + { + "epoch": 0.14229161974826715, + "grad_norm": 1.1351737976074219, + "learning_rate": 9.672991065993544e-06, + "loss": 0.3718, + "step": 7108 + }, + { + "epoch": 0.1423116382653955, + "grad_norm": 1.0713187456130981, + "learning_rate": 9.672875742595577e-06, + "loss": 0.3166, + "step": 7109 + }, + { + "epoch": 0.14233165678252382, + "grad_norm": 1.3162932395935059, + "learning_rate": 9.672760399553846e-06, + "loss": 0.3177, + "step": 7110 + }, + { + "epoch": 0.14235167529965218, + "grad_norm": 1.060451626777649, + "learning_rate": 9.672645036868835e-06, + "loss": 0.3318, + "step": 7111 + }, + { + "epoch": 0.14237169381678053, + "grad_norm": 1.0779210329055786, + "learning_rate": 9.67252965454103e-06, + "loss": 0.3184, + "step": 7112 + }, + { + "epoch": 0.14239171233390888, + "grad_norm": 1.2283653020858765, + "learning_rate": 9.672414252570916e-06, + "loss": 0.3293, + "step": 7113 + }, + { + "epoch": 0.1424117308510372, + "grad_norm": 1.0067946910858154, + "learning_rate": 9.67229883095898e-06, + "loss": 0.2944, + "step": 7114 + }, + { + "epoch": 0.14243174936816555, + "grad_norm": 1.1646424531936646, + "learning_rate": 9.672183389705702e-06, + "loss": 0.3382, + "step": 7115 + }, + { + "epoch": 0.1424517678852939, + "grad_norm": 1.10996413230896, + "learning_rate": 9.672067928811573e-06, + "loss": 0.3303, + "step": 7116 + }, + { + "epoch": 0.14247178640242225, + "grad_norm": 1.2610872983932495, + "learning_rate": 9.671952448277075e-06, + "loss": 0.2979, + "step": 7117 + }, + { + "epoch": 0.14249180491955057, + "grad_norm": 1.0884367227554321, + "learning_rate": 9.671836948102695e-06, + "loss": 0.3489, + "step": 7118 + }, + { + "epoch": 0.14251182343667892, + "grad_norm": 1.0562076568603516, + "learning_rate": 9.671721428288916e-06, + "loss": 0.32, + "step": 7119 + }, + { + "epoch": 0.14253184195380728, + "grad_norm": 1.136776328086853, + "learning_rate": 9.671605888836228e-06, + "loss": 0.3249, + "step": 7120 + }, + { + "epoch": 0.14255186047093563, + "grad_norm": 1.0787209272384644, + "learning_rate": 9.671490329745112e-06, + "loss": 0.3562, + "step": 7121 + }, + { + "epoch": 0.14257187898806395, + "grad_norm": 1.034277081489563, + "learning_rate": 9.671374751016058e-06, + "loss": 0.3055, + "step": 7122 + }, + { + "epoch": 0.1425918975051923, + "grad_norm": 0.9811363816261292, + "learning_rate": 9.671259152649548e-06, + "loss": 0.3106, + "step": 7123 + }, + { + "epoch": 0.14261191602232065, + "grad_norm": 1.0340535640716553, + "learning_rate": 9.67114353464607e-06, + "loss": 0.3517, + "step": 7124 + }, + { + "epoch": 0.142631934539449, + "grad_norm": 1.1224415302276611, + "learning_rate": 9.67102789700611e-06, + "loss": 0.3496, + "step": 7125 + }, + { + "epoch": 0.14265195305657732, + "grad_norm": 0.9963855743408203, + "learning_rate": 9.670912239730155e-06, + "loss": 0.287, + "step": 7126 + }, + { + "epoch": 0.14267197157370567, + "grad_norm": 1.0391844511032104, + "learning_rate": 9.67079656281869e-06, + "loss": 0.3717, + "step": 7127 + }, + { + "epoch": 0.14269199009083403, + "grad_norm": 0.9991568922996521, + "learning_rate": 9.6706808662722e-06, + "loss": 0.3172, + "step": 7128 + }, + { + "epoch": 0.14271200860796238, + "grad_norm": 1.0665093660354614, + "learning_rate": 9.670565150091173e-06, + "loss": 0.3333, + "step": 7129 + }, + { + "epoch": 0.1427320271250907, + "grad_norm": 1.0992127656936646, + "learning_rate": 9.670449414276097e-06, + "loss": 0.3184, + "step": 7130 + }, + { + "epoch": 0.14275204564221905, + "grad_norm": 1.0749688148498535, + "learning_rate": 9.670333658827456e-06, + "loss": 0.3605, + "step": 7131 + }, + { + "epoch": 0.1427720641593474, + "grad_norm": 0.9691889882087708, + "learning_rate": 9.670217883745736e-06, + "loss": 0.3113, + "step": 7132 + }, + { + "epoch": 0.14279208267647575, + "grad_norm": 2.0044853687286377, + "learning_rate": 9.670102089031426e-06, + "loss": 0.8492, + "step": 7133 + }, + { + "epoch": 0.14281210119360407, + "grad_norm": 1.2326279878616333, + "learning_rate": 9.669986274685013e-06, + "loss": 0.3275, + "step": 7134 + }, + { + "epoch": 0.14283211971073242, + "grad_norm": 1.1254336833953857, + "learning_rate": 9.66987044070698e-06, + "loss": 0.3472, + "step": 7135 + }, + { + "epoch": 0.14285213822786078, + "grad_norm": 1.304681420326233, + "learning_rate": 9.669754587097819e-06, + "loss": 0.3216, + "step": 7136 + }, + { + "epoch": 0.14287215674498913, + "grad_norm": 1.1405121088027954, + "learning_rate": 9.669638713858015e-06, + "loss": 0.3062, + "step": 7137 + }, + { + "epoch": 0.14289217526211745, + "grad_norm": 1.0830261707305908, + "learning_rate": 9.669522820988051e-06, + "loss": 0.3065, + "step": 7138 + }, + { + "epoch": 0.1429121937792458, + "grad_norm": 1.1037774085998535, + "learning_rate": 9.66940690848842e-06, + "loss": 0.2889, + "step": 7139 + }, + { + "epoch": 0.14293221229637415, + "grad_norm": 1.236912727355957, + "learning_rate": 9.669290976359608e-06, + "loss": 0.3426, + "step": 7140 + }, + { + "epoch": 0.1429522308135025, + "grad_norm": 1.04560124874115, + "learning_rate": 9.669175024602103e-06, + "loss": 0.308, + "step": 7141 + }, + { + "epoch": 0.14297224933063082, + "grad_norm": 1.2020806074142456, + "learning_rate": 9.669059053216388e-06, + "loss": 0.3444, + "step": 7142 + }, + { + "epoch": 0.14299226784775917, + "grad_norm": 1.0602840185165405, + "learning_rate": 9.668943062202956e-06, + "loss": 0.3513, + "step": 7143 + }, + { + "epoch": 0.14301228636488753, + "grad_norm": 1.2023444175720215, + "learning_rate": 9.66882705156229e-06, + "loss": 0.3165, + "step": 7144 + }, + { + "epoch": 0.14303230488201588, + "grad_norm": 1.9106961488723755, + "learning_rate": 9.66871102129488e-06, + "loss": 0.8963, + "step": 7145 + }, + { + "epoch": 0.1430523233991442, + "grad_norm": 1.2443640232086182, + "learning_rate": 9.668594971401216e-06, + "loss": 0.3776, + "step": 7146 + }, + { + "epoch": 0.14307234191627255, + "grad_norm": 1.1286839246749878, + "learning_rate": 9.668478901881782e-06, + "loss": 0.2852, + "step": 7147 + }, + { + "epoch": 0.1430923604334009, + "grad_norm": 1.0915751457214355, + "learning_rate": 9.668362812737066e-06, + "loss": 0.3235, + "step": 7148 + }, + { + "epoch": 0.14311237895052925, + "grad_norm": 1.1789501905441284, + "learning_rate": 9.668246703967559e-06, + "loss": 0.3006, + "step": 7149 + }, + { + "epoch": 0.14313239746765757, + "grad_norm": 1.0779813528060913, + "learning_rate": 9.668130575573747e-06, + "loss": 0.2865, + "step": 7150 + }, + { + "epoch": 0.14315241598478592, + "grad_norm": 1.1646466255187988, + "learning_rate": 9.668014427556119e-06, + "loss": 0.3279, + "step": 7151 + }, + { + "epoch": 0.14317243450191428, + "grad_norm": 1.0718955993652344, + "learning_rate": 9.667898259915162e-06, + "loss": 0.3366, + "step": 7152 + }, + { + "epoch": 0.14319245301904263, + "grad_norm": 1.7964303493499756, + "learning_rate": 9.667782072651366e-06, + "loss": 0.9006, + "step": 7153 + }, + { + "epoch": 0.14321247153617095, + "grad_norm": 1.3249651193618774, + "learning_rate": 9.66766586576522e-06, + "loss": 0.3781, + "step": 7154 + }, + { + "epoch": 0.1432324900532993, + "grad_norm": 1.087753176689148, + "learning_rate": 9.66754963925721e-06, + "loss": 0.3668, + "step": 7155 + }, + { + "epoch": 0.14325250857042765, + "grad_norm": 1.0921791791915894, + "learning_rate": 9.667433393127823e-06, + "loss": 0.3168, + "step": 7156 + }, + { + "epoch": 0.143272527087556, + "grad_norm": 1.0769894123077393, + "learning_rate": 9.667317127377553e-06, + "loss": 0.351, + "step": 7157 + }, + { + "epoch": 0.14329254560468432, + "grad_norm": 1.0309964418411255, + "learning_rate": 9.667200842006887e-06, + "loss": 0.3302, + "step": 7158 + }, + { + "epoch": 0.14331256412181267, + "grad_norm": 1.0463893413543701, + "learning_rate": 9.667084537016312e-06, + "loss": 0.3178, + "step": 7159 + }, + { + "epoch": 0.14333258263894103, + "grad_norm": 1.0680627822875977, + "learning_rate": 9.666968212406319e-06, + "loss": 0.3247, + "step": 7160 + }, + { + "epoch": 0.14335260115606938, + "grad_norm": 1.0702455043792725, + "learning_rate": 9.666851868177395e-06, + "loss": 0.2759, + "step": 7161 + }, + { + "epoch": 0.1433726196731977, + "grad_norm": 1.9264837503433228, + "learning_rate": 9.66673550433003e-06, + "loss": 0.9052, + "step": 7162 + }, + { + "epoch": 0.14339263819032605, + "grad_norm": 1.1750775575637817, + "learning_rate": 9.666619120864714e-06, + "loss": 0.3709, + "step": 7163 + }, + { + "epoch": 0.1434126567074544, + "grad_norm": 1.1501604318618774, + "learning_rate": 9.666502717781935e-06, + "loss": 0.3494, + "step": 7164 + }, + { + "epoch": 0.14343267522458275, + "grad_norm": 1.3938554525375366, + "learning_rate": 9.666386295082184e-06, + "loss": 0.3304, + "step": 7165 + }, + { + "epoch": 0.14345269374171107, + "grad_norm": 1.1510801315307617, + "learning_rate": 9.666269852765945e-06, + "loss": 0.4039, + "step": 7166 + }, + { + "epoch": 0.14347271225883942, + "grad_norm": 1.3617697954177856, + "learning_rate": 9.666153390833716e-06, + "loss": 0.3618, + "step": 7167 + }, + { + "epoch": 0.14349273077596778, + "grad_norm": 1.0125318765640259, + "learning_rate": 9.666036909285981e-06, + "loss": 0.3059, + "step": 7168 + }, + { + "epoch": 0.14351274929309613, + "grad_norm": 0.9500599503517151, + "learning_rate": 9.665920408123232e-06, + "loss": 0.322, + "step": 7169 + }, + { + "epoch": 0.14353276781022445, + "grad_norm": 1.1453050374984741, + "learning_rate": 9.665803887345957e-06, + "loss": 0.3229, + "step": 7170 + }, + { + "epoch": 0.1435527863273528, + "grad_norm": 1.0761605501174927, + "learning_rate": 9.665687346954647e-06, + "loss": 0.3277, + "step": 7171 + }, + { + "epoch": 0.14357280484448115, + "grad_norm": 1.0539476871490479, + "learning_rate": 9.665570786949789e-06, + "loss": 0.3075, + "step": 7172 + }, + { + "epoch": 0.1435928233616095, + "grad_norm": 1.1372395753860474, + "learning_rate": 9.665454207331878e-06, + "loss": 0.3324, + "step": 7173 + }, + { + "epoch": 0.14361284187873782, + "grad_norm": 1.7282453775405884, + "learning_rate": 9.6653376081014e-06, + "loss": 0.8715, + "step": 7174 + }, + { + "epoch": 0.14363286039586617, + "grad_norm": 1.0749566555023193, + "learning_rate": 9.665220989258847e-06, + "loss": 0.3157, + "step": 7175 + }, + { + "epoch": 0.14365287891299452, + "grad_norm": 1.214095950126648, + "learning_rate": 9.66510435080471e-06, + "loss": 0.3401, + "step": 7176 + }, + { + "epoch": 0.14367289743012288, + "grad_norm": 1.8746081590652466, + "learning_rate": 9.664987692739478e-06, + "loss": 0.8438, + "step": 7177 + }, + { + "epoch": 0.1436929159472512, + "grad_norm": 1.2347310781478882, + "learning_rate": 9.664871015063641e-06, + "loss": 0.3415, + "step": 7178 + }, + { + "epoch": 0.14371293446437955, + "grad_norm": 1.026940107345581, + "learning_rate": 9.664754317777692e-06, + "loss": 0.3234, + "step": 7179 + }, + { + "epoch": 0.1437329529815079, + "grad_norm": 1.0950400829315186, + "learning_rate": 9.664637600882117e-06, + "loss": 0.329, + "step": 7180 + }, + { + "epoch": 0.14375297149863625, + "grad_norm": 1.0040154457092285, + "learning_rate": 9.66452086437741e-06, + "loss": 0.2968, + "step": 7181 + }, + { + "epoch": 0.14377299001576457, + "grad_norm": 1.0149179697036743, + "learning_rate": 9.664404108264063e-06, + "loss": 0.3369, + "step": 7182 + }, + { + "epoch": 0.14379300853289292, + "grad_norm": 1.1833168268203735, + "learning_rate": 9.664287332542563e-06, + "loss": 0.2919, + "step": 7183 + }, + { + "epoch": 0.14381302705002127, + "grad_norm": 1.1319597959518433, + "learning_rate": 9.664170537213404e-06, + "loss": 0.3272, + "step": 7184 + }, + { + "epoch": 0.14383304556714963, + "grad_norm": 1.2254897356033325, + "learning_rate": 9.664053722277074e-06, + "loss": 0.3342, + "step": 7185 + }, + { + "epoch": 0.14385306408427795, + "grad_norm": 1.993420124053955, + "learning_rate": 9.663936887734067e-06, + "loss": 0.8646, + "step": 7186 + }, + { + "epoch": 0.1438730826014063, + "grad_norm": 1.1317472457885742, + "learning_rate": 9.663820033584874e-06, + "loss": 0.3331, + "step": 7187 + }, + { + "epoch": 0.14389310111853465, + "grad_norm": 1.1260157823562622, + "learning_rate": 9.663703159829984e-06, + "loss": 0.3908, + "step": 7188 + }, + { + "epoch": 0.143913119635663, + "grad_norm": 1.1214920282363892, + "learning_rate": 9.663586266469889e-06, + "loss": 0.2821, + "step": 7189 + }, + { + "epoch": 0.14393313815279132, + "grad_norm": 1.2078737020492554, + "learning_rate": 9.663469353505082e-06, + "loss": 0.3932, + "step": 7190 + }, + { + "epoch": 0.14395315666991967, + "grad_norm": 1.1810439825057983, + "learning_rate": 9.663352420936052e-06, + "loss": 0.3234, + "step": 7191 + }, + { + "epoch": 0.14397317518704802, + "grad_norm": 1.15718674659729, + "learning_rate": 9.663235468763295e-06, + "loss": 0.3877, + "step": 7192 + }, + { + "epoch": 0.14399319370417638, + "grad_norm": 1.037497639656067, + "learning_rate": 9.663118496987297e-06, + "loss": 0.298, + "step": 7193 + }, + { + "epoch": 0.1440132122213047, + "grad_norm": 1.1246082782745361, + "learning_rate": 9.663001505608554e-06, + "loss": 0.3866, + "step": 7194 + }, + { + "epoch": 0.14403323073843305, + "grad_norm": 1.2016477584838867, + "learning_rate": 9.662884494627554e-06, + "loss": 0.3254, + "step": 7195 + }, + { + "epoch": 0.1440532492555614, + "grad_norm": 1.0863473415374756, + "learning_rate": 9.662767464044793e-06, + "loss": 0.342, + "step": 7196 + }, + { + "epoch": 0.14407326777268975, + "grad_norm": 1.103602409362793, + "learning_rate": 9.662650413860762e-06, + "loss": 0.3599, + "step": 7197 + }, + { + "epoch": 0.14409328628981807, + "grad_norm": 1.0767040252685547, + "learning_rate": 9.662533344075948e-06, + "loss": 0.3252, + "step": 7198 + }, + { + "epoch": 0.14411330480694642, + "grad_norm": 1.15738844871521, + "learning_rate": 9.66241625469085e-06, + "loss": 0.321, + "step": 7199 + }, + { + "epoch": 0.14413332332407477, + "grad_norm": 1.0587210655212402, + "learning_rate": 9.662299145705958e-06, + "loss": 0.3879, + "step": 7200 + }, + { + "epoch": 0.14415334184120313, + "grad_norm": 1.0571171045303345, + "learning_rate": 9.662182017121763e-06, + "loss": 0.3469, + "step": 7201 + }, + { + "epoch": 0.14417336035833145, + "grad_norm": 1.002548336982727, + "learning_rate": 9.662064868938758e-06, + "loss": 0.301, + "step": 7202 + }, + { + "epoch": 0.1441933788754598, + "grad_norm": 1.1928738355636597, + "learning_rate": 9.661947701157435e-06, + "loss": 0.3789, + "step": 7203 + }, + { + "epoch": 0.14421339739258815, + "grad_norm": 1.0556995868682861, + "learning_rate": 9.661830513778288e-06, + "loss": 0.3466, + "step": 7204 + }, + { + "epoch": 0.1442334159097165, + "grad_norm": 1.0713247060775757, + "learning_rate": 9.661713306801808e-06, + "loss": 0.3137, + "step": 7205 + }, + { + "epoch": 0.14425343442684482, + "grad_norm": 1.0814292430877686, + "learning_rate": 9.661596080228489e-06, + "loss": 0.3097, + "step": 7206 + }, + { + "epoch": 0.14427345294397317, + "grad_norm": 1.1923273801803589, + "learning_rate": 9.661478834058824e-06, + "loss": 0.3175, + "step": 7207 + }, + { + "epoch": 0.14429347146110152, + "grad_norm": 1.0744882822036743, + "learning_rate": 9.661361568293304e-06, + "loss": 0.3118, + "step": 7208 + }, + { + "epoch": 0.14431348997822988, + "grad_norm": 2.0208513736724854, + "learning_rate": 9.661244282932423e-06, + "loss": 0.8545, + "step": 7209 + }, + { + "epoch": 0.1443335084953582, + "grad_norm": 1.0147427320480347, + "learning_rate": 9.661126977976674e-06, + "loss": 0.2879, + "step": 7210 + }, + { + "epoch": 0.14435352701248655, + "grad_norm": 1.1445280313491821, + "learning_rate": 9.661009653426551e-06, + "loss": 0.3313, + "step": 7211 + }, + { + "epoch": 0.1443735455296149, + "grad_norm": 1.0247496366500854, + "learning_rate": 9.660892309282547e-06, + "loss": 0.314, + "step": 7212 + }, + { + "epoch": 0.14439356404674325, + "grad_norm": 1.1649870872497559, + "learning_rate": 9.660774945545154e-06, + "loss": 0.3522, + "step": 7213 + }, + { + "epoch": 0.14441358256387157, + "grad_norm": 1.175153136253357, + "learning_rate": 9.660657562214866e-06, + "loss": 0.3612, + "step": 7214 + }, + { + "epoch": 0.14443360108099992, + "grad_norm": 1.282518744468689, + "learning_rate": 9.660540159292177e-06, + "loss": 0.3467, + "step": 7215 + }, + { + "epoch": 0.14445361959812827, + "grad_norm": 1.0428186655044556, + "learning_rate": 9.660422736777579e-06, + "loss": 0.3335, + "step": 7216 + }, + { + "epoch": 0.14447363811525663, + "grad_norm": 1.0544002056121826, + "learning_rate": 9.660305294671568e-06, + "loss": 0.3224, + "step": 7217 + }, + { + "epoch": 0.14449365663238495, + "grad_norm": 1.0205819606781006, + "learning_rate": 9.660187832974637e-06, + "loss": 0.3222, + "step": 7218 + }, + { + "epoch": 0.1445136751495133, + "grad_norm": 1.0139970779418945, + "learning_rate": 9.660070351687279e-06, + "loss": 0.3056, + "step": 7219 + }, + { + "epoch": 0.14453369366664165, + "grad_norm": 1.0644532442092896, + "learning_rate": 9.659952850809987e-06, + "loss": 0.3253, + "step": 7220 + }, + { + "epoch": 0.14455371218377, + "grad_norm": 1.1414334774017334, + "learning_rate": 9.659835330343257e-06, + "loss": 0.346, + "step": 7221 + }, + { + "epoch": 0.14457373070089832, + "grad_norm": 1.0368765592575073, + "learning_rate": 9.65971779028758e-06, + "loss": 0.3169, + "step": 7222 + }, + { + "epoch": 0.14459374921802667, + "grad_norm": 1.1203594207763672, + "learning_rate": 9.659600230643455e-06, + "loss": 0.3468, + "step": 7223 + }, + { + "epoch": 0.14461376773515502, + "grad_norm": 1.1653168201446533, + "learning_rate": 9.659482651411371e-06, + "loss": 0.3605, + "step": 7224 + }, + { + "epoch": 0.14463378625228338, + "grad_norm": 1.2243916988372803, + "learning_rate": 9.659365052591827e-06, + "loss": 0.3594, + "step": 7225 + }, + { + "epoch": 0.1446538047694117, + "grad_norm": 2.011148691177368, + "learning_rate": 9.659247434185312e-06, + "loss": 0.8142, + "step": 7226 + }, + { + "epoch": 0.14467382328654005, + "grad_norm": 1.8661372661590576, + "learning_rate": 9.659129796192325e-06, + "loss": 0.8945, + "step": 7227 + }, + { + "epoch": 0.1446938418036684, + "grad_norm": 1.094066858291626, + "learning_rate": 9.659012138613362e-06, + "loss": 0.3492, + "step": 7228 + }, + { + "epoch": 0.14471386032079675, + "grad_norm": 1.1204087734222412, + "learning_rate": 9.658894461448911e-06, + "loss": 0.3372, + "step": 7229 + }, + { + "epoch": 0.14473387883792507, + "grad_norm": 1.075434923171997, + "learning_rate": 9.658776764699472e-06, + "loss": 0.3263, + "step": 7230 + }, + { + "epoch": 0.14475389735505342, + "grad_norm": 1.7871273756027222, + "learning_rate": 9.658659048365536e-06, + "loss": 0.836, + "step": 7231 + }, + { + "epoch": 0.14477391587218177, + "grad_norm": 1.2634602785110474, + "learning_rate": 9.658541312447603e-06, + "loss": 0.3393, + "step": 7232 + }, + { + "epoch": 0.14479393438931012, + "grad_norm": 1.1187530755996704, + "learning_rate": 9.658423556946162e-06, + "loss": 0.3168, + "step": 7233 + }, + { + "epoch": 0.14481395290643845, + "grad_norm": 1.0668280124664307, + "learning_rate": 9.658305781861713e-06, + "loss": 0.3291, + "step": 7234 + }, + { + "epoch": 0.1448339714235668, + "grad_norm": 2.0092689990997314, + "learning_rate": 9.658187987194746e-06, + "loss": 0.7905, + "step": 7235 + }, + { + "epoch": 0.14485398994069515, + "grad_norm": 1.2369533777236938, + "learning_rate": 9.658070172945762e-06, + "loss": 0.2918, + "step": 7236 + }, + { + "epoch": 0.1448740084578235, + "grad_norm": 1.3040107488632202, + "learning_rate": 9.657952339115251e-06, + "loss": 0.3624, + "step": 7237 + }, + { + "epoch": 0.14489402697495182, + "grad_norm": 1.1669714450836182, + "learning_rate": 9.657834485703714e-06, + "loss": 0.3598, + "step": 7238 + }, + { + "epoch": 0.14491404549208017, + "grad_norm": 1.2053037881851196, + "learning_rate": 9.657716612711641e-06, + "loss": 0.308, + "step": 7239 + }, + { + "epoch": 0.14493406400920852, + "grad_norm": 1.0623819828033447, + "learning_rate": 9.65759872013953e-06, + "loss": 0.3262, + "step": 7240 + }, + { + "epoch": 0.14495408252633687, + "grad_norm": 1.060082197189331, + "learning_rate": 9.657480807987875e-06, + "loss": 0.3259, + "step": 7241 + }, + { + "epoch": 0.1449741010434652, + "grad_norm": 1.1989562511444092, + "learning_rate": 9.657362876257176e-06, + "loss": 0.3689, + "step": 7242 + }, + { + "epoch": 0.14499411956059355, + "grad_norm": 1.098762035369873, + "learning_rate": 9.657244924947923e-06, + "loss": 0.3737, + "step": 7243 + }, + { + "epoch": 0.1450141380777219, + "grad_norm": 1.931837558746338, + "learning_rate": 9.657126954060616e-06, + "loss": 0.8814, + "step": 7244 + }, + { + "epoch": 0.14503415659485025, + "grad_norm": 1.084559440612793, + "learning_rate": 9.65700896359575e-06, + "loss": 0.3367, + "step": 7245 + }, + { + "epoch": 0.14505417511197857, + "grad_norm": 1.1232494115829468, + "learning_rate": 9.656890953553818e-06, + "loss": 0.3478, + "step": 7246 + }, + { + "epoch": 0.14507419362910692, + "grad_norm": 1.2414408922195435, + "learning_rate": 9.656772923935323e-06, + "loss": 0.3305, + "step": 7247 + }, + { + "epoch": 0.14509421214623527, + "grad_norm": 1.0488752126693726, + "learning_rate": 9.656654874740753e-06, + "loss": 0.3563, + "step": 7248 + }, + { + "epoch": 0.14511423066336362, + "grad_norm": 1.7855809926986694, + "learning_rate": 9.65653680597061e-06, + "loss": 0.8255, + "step": 7249 + }, + { + "epoch": 0.14513424918049195, + "grad_norm": 0.983617901802063, + "learning_rate": 9.656418717625387e-06, + "loss": 0.3204, + "step": 7250 + }, + { + "epoch": 0.1451542676976203, + "grad_norm": 1.0779659748077393, + "learning_rate": 9.656300609705582e-06, + "loss": 0.3652, + "step": 7251 + }, + { + "epoch": 0.14517428621474865, + "grad_norm": 1.0413007736206055, + "learning_rate": 9.656182482211691e-06, + "loss": 0.3043, + "step": 7252 + }, + { + "epoch": 0.145194304731877, + "grad_norm": 1.0872801542282104, + "learning_rate": 9.656064335144212e-06, + "loss": 0.3039, + "step": 7253 + }, + { + "epoch": 0.14521432324900532, + "grad_norm": 1.0213342905044556, + "learning_rate": 9.65594616850364e-06, + "loss": 0.3357, + "step": 7254 + }, + { + "epoch": 0.14523434176613367, + "grad_norm": 1.1900683641433716, + "learning_rate": 9.655827982290474e-06, + "loss": 0.3104, + "step": 7255 + }, + { + "epoch": 0.14525436028326202, + "grad_norm": 1.098550796508789, + "learning_rate": 9.655709776505207e-06, + "loss": 0.3108, + "step": 7256 + }, + { + "epoch": 0.14527437880039037, + "grad_norm": 1.8382856845855713, + "learning_rate": 9.655591551148338e-06, + "loss": 0.8453, + "step": 7257 + }, + { + "epoch": 0.1452943973175187, + "grad_norm": 1.2145599126815796, + "learning_rate": 9.655473306220365e-06, + "loss": 0.3477, + "step": 7258 + }, + { + "epoch": 0.14531441583464705, + "grad_norm": 0.9933026432991028, + "learning_rate": 9.655355041721783e-06, + "loss": 0.3298, + "step": 7259 + }, + { + "epoch": 0.1453344343517754, + "grad_norm": 1.2319567203521729, + "learning_rate": 9.655236757653091e-06, + "loss": 0.3113, + "step": 7260 + }, + { + "epoch": 0.14535445286890375, + "grad_norm": 1.1979540586471558, + "learning_rate": 9.655118454014785e-06, + "loss": 0.3348, + "step": 7261 + }, + { + "epoch": 0.14537447138603207, + "grad_norm": 1.788125991821289, + "learning_rate": 9.655000130807362e-06, + "loss": 0.8176, + "step": 7262 + }, + { + "epoch": 0.14539448990316042, + "grad_norm": 1.125949740409851, + "learning_rate": 9.654881788031323e-06, + "loss": 0.3124, + "step": 7263 + }, + { + "epoch": 0.14541450842028877, + "grad_norm": 1.073028564453125, + "learning_rate": 9.65476342568716e-06, + "loss": 0.308, + "step": 7264 + }, + { + "epoch": 0.14543452693741712, + "grad_norm": 1.0509729385375977, + "learning_rate": 9.654645043775375e-06, + "loss": 0.3122, + "step": 7265 + }, + { + "epoch": 0.14545454545454545, + "grad_norm": 1.0925220251083374, + "learning_rate": 9.654526642296463e-06, + "loss": 0.2872, + "step": 7266 + }, + { + "epoch": 0.1454745639716738, + "grad_norm": 1.1591722965240479, + "learning_rate": 9.654408221250922e-06, + "loss": 0.3133, + "step": 7267 + }, + { + "epoch": 0.14549458248880215, + "grad_norm": 1.3404357433319092, + "learning_rate": 9.654289780639252e-06, + "loss": 0.3596, + "step": 7268 + }, + { + "epoch": 0.1455146010059305, + "grad_norm": 1.089316964149475, + "learning_rate": 9.65417132046195e-06, + "loss": 0.3363, + "step": 7269 + }, + { + "epoch": 0.14553461952305882, + "grad_norm": 1.0113790035247803, + "learning_rate": 9.65405284071951e-06, + "loss": 0.3389, + "step": 7270 + }, + { + "epoch": 0.14555463804018717, + "grad_norm": 0.9952141642570496, + "learning_rate": 9.653934341412437e-06, + "loss": 0.2893, + "step": 7271 + }, + { + "epoch": 0.14557465655731552, + "grad_norm": 1.9697203636169434, + "learning_rate": 9.653815822541223e-06, + "loss": 0.7956, + "step": 7272 + }, + { + "epoch": 0.14559467507444385, + "grad_norm": 1.0978196859359741, + "learning_rate": 9.653697284106369e-06, + "loss": 0.3281, + "step": 7273 + }, + { + "epoch": 0.1456146935915722, + "grad_norm": 1.1486504077911377, + "learning_rate": 9.653578726108374e-06, + "loss": 0.3574, + "step": 7274 + }, + { + "epoch": 0.14563471210870055, + "grad_norm": 1.0956379175186157, + "learning_rate": 9.653460148547736e-06, + "loss": 0.3627, + "step": 7275 + }, + { + "epoch": 0.1456547306258289, + "grad_norm": 1.0964808464050293, + "learning_rate": 9.653341551424952e-06, + "loss": 0.3357, + "step": 7276 + }, + { + "epoch": 0.14567474914295722, + "grad_norm": 1.1038018465042114, + "learning_rate": 9.653222934740523e-06, + "loss": 0.336, + "step": 7277 + }, + { + "epoch": 0.14569476766008557, + "grad_norm": 1.075989842414856, + "learning_rate": 9.653104298494944e-06, + "loss": 0.3184, + "step": 7278 + }, + { + "epoch": 0.14571478617721392, + "grad_norm": 1.0710783004760742, + "learning_rate": 9.652985642688718e-06, + "loss": 0.2946, + "step": 7279 + }, + { + "epoch": 0.14573480469434227, + "grad_norm": 1.1469048261642456, + "learning_rate": 9.652866967322341e-06, + "loss": 0.3487, + "step": 7280 + }, + { + "epoch": 0.1457548232114706, + "grad_norm": 1.1375309228897095, + "learning_rate": 9.652748272396312e-06, + "loss": 0.3191, + "step": 7281 + }, + { + "epoch": 0.14577484172859895, + "grad_norm": 1.1175742149353027, + "learning_rate": 9.652629557911131e-06, + "loss": 0.3625, + "step": 7282 + }, + { + "epoch": 0.1457948602457273, + "grad_norm": 1.1089483499526978, + "learning_rate": 9.652510823867296e-06, + "loss": 0.2814, + "step": 7283 + }, + { + "epoch": 0.14581487876285565, + "grad_norm": 1.8656607866287231, + "learning_rate": 9.652392070265307e-06, + "loss": 0.8089, + "step": 7284 + }, + { + "epoch": 0.14583489727998397, + "grad_norm": 1.1768875122070312, + "learning_rate": 9.652273297105663e-06, + "loss": 0.346, + "step": 7285 + }, + { + "epoch": 0.14585491579711232, + "grad_norm": 1.1954083442687988, + "learning_rate": 9.652154504388865e-06, + "loss": 0.3243, + "step": 7286 + }, + { + "epoch": 0.14587493431424067, + "grad_norm": 1.105099081993103, + "learning_rate": 9.652035692115408e-06, + "loss": 0.3656, + "step": 7287 + }, + { + "epoch": 0.14589495283136902, + "grad_norm": 1.1090556383132935, + "learning_rate": 9.651916860285794e-06, + "loss": 0.3598, + "step": 7288 + }, + { + "epoch": 0.14591497134849735, + "grad_norm": 1.2020008563995361, + "learning_rate": 9.651798008900523e-06, + "loss": 0.4161, + "step": 7289 + }, + { + "epoch": 0.1459349898656257, + "grad_norm": 1.0298092365264893, + "learning_rate": 9.651679137960097e-06, + "loss": 0.3304, + "step": 7290 + }, + { + "epoch": 0.14595500838275405, + "grad_norm": 1.0525033473968506, + "learning_rate": 9.65156024746501e-06, + "loss": 0.3502, + "step": 7291 + }, + { + "epoch": 0.1459750268998824, + "grad_norm": 1.2469102144241333, + "learning_rate": 9.651441337415765e-06, + "loss": 0.3548, + "step": 7292 + }, + { + "epoch": 0.14599504541701072, + "grad_norm": 1.0748122930526733, + "learning_rate": 9.651322407812863e-06, + "loss": 0.3301, + "step": 7293 + }, + { + "epoch": 0.14601506393413907, + "grad_norm": 1.1156587600708008, + "learning_rate": 9.651203458656801e-06, + "loss": 0.3579, + "step": 7294 + }, + { + "epoch": 0.14603508245126742, + "grad_norm": 1.0901602506637573, + "learning_rate": 9.651084489948081e-06, + "loss": 0.3291, + "step": 7295 + }, + { + "epoch": 0.14605510096839577, + "grad_norm": 1.1813479661941528, + "learning_rate": 9.650965501687203e-06, + "loss": 0.3151, + "step": 7296 + }, + { + "epoch": 0.1460751194855241, + "grad_norm": 1.137376070022583, + "learning_rate": 9.650846493874667e-06, + "loss": 0.3307, + "step": 7297 + }, + { + "epoch": 0.14609513800265245, + "grad_norm": 1.0516289472579956, + "learning_rate": 9.650727466510974e-06, + "loss": 0.3514, + "step": 7298 + }, + { + "epoch": 0.1461151565197808, + "grad_norm": 1.1379557847976685, + "learning_rate": 9.650608419596623e-06, + "loss": 0.3415, + "step": 7299 + }, + { + "epoch": 0.14613517503690915, + "grad_norm": 1.0174612998962402, + "learning_rate": 9.650489353132115e-06, + "loss": 0.356, + "step": 7300 + }, + { + "epoch": 0.14615519355403747, + "grad_norm": 1.0737378597259521, + "learning_rate": 9.650370267117952e-06, + "loss": 0.3293, + "step": 7301 + }, + { + "epoch": 0.14617521207116582, + "grad_norm": 1.130890965461731, + "learning_rate": 9.650251161554632e-06, + "loss": 0.3419, + "step": 7302 + }, + { + "epoch": 0.14619523058829417, + "grad_norm": 1.0765379667282104, + "learning_rate": 9.650132036442656e-06, + "loss": 0.3048, + "step": 7303 + }, + { + "epoch": 0.14621524910542252, + "grad_norm": 1.0927784442901611, + "learning_rate": 9.650012891782525e-06, + "loss": 0.2943, + "step": 7304 + }, + { + "epoch": 0.14623526762255085, + "grad_norm": 1.1516592502593994, + "learning_rate": 9.649893727574742e-06, + "loss": 0.337, + "step": 7305 + }, + { + "epoch": 0.1462552861396792, + "grad_norm": 1.1806594133377075, + "learning_rate": 9.649774543819805e-06, + "loss": 0.3597, + "step": 7306 + }, + { + "epoch": 0.14627530465680755, + "grad_norm": 1.0914493799209595, + "learning_rate": 9.649655340518218e-06, + "loss": 0.3589, + "step": 7307 + }, + { + "epoch": 0.1462953231739359, + "grad_norm": 1.148205041885376, + "learning_rate": 9.649536117670481e-06, + "loss": 0.3505, + "step": 7308 + }, + { + "epoch": 0.14631534169106422, + "grad_norm": 0.9958631992340088, + "learning_rate": 9.649416875277093e-06, + "loss": 0.3261, + "step": 7309 + }, + { + "epoch": 0.14633536020819257, + "grad_norm": 1.0505625009536743, + "learning_rate": 9.649297613338557e-06, + "loss": 0.323, + "step": 7310 + }, + { + "epoch": 0.14635537872532092, + "grad_norm": 1.186075210571289, + "learning_rate": 9.649178331855376e-06, + "loss": 0.3406, + "step": 7311 + }, + { + "epoch": 0.14637539724244927, + "grad_norm": 1.1833747625350952, + "learning_rate": 9.64905903082805e-06, + "loss": 0.3528, + "step": 7312 + }, + { + "epoch": 0.1463954157595776, + "grad_norm": 0.9913502931594849, + "learning_rate": 9.648939710257079e-06, + "loss": 0.3364, + "step": 7313 + }, + { + "epoch": 0.14641543427670595, + "grad_norm": 1.0447653532028198, + "learning_rate": 9.648820370142965e-06, + "loss": 0.3488, + "step": 7314 + }, + { + "epoch": 0.1464354527938343, + "grad_norm": 1.0579092502593994, + "learning_rate": 9.648701010486213e-06, + "loss": 0.3169, + "step": 7315 + }, + { + "epoch": 0.14645547131096265, + "grad_norm": 1.0060230493545532, + "learning_rate": 9.648581631287322e-06, + "loss": 0.3452, + "step": 7316 + }, + { + "epoch": 0.14647548982809097, + "grad_norm": 1.2289423942565918, + "learning_rate": 9.648462232546792e-06, + "loss": 0.3098, + "step": 7317 + }, + { + "epoch": 0.14649550834521932, + "grad_norm": 1.0198065042495728, + "learning_rate": 9.648342814265129e-06, + "loss": 0.2955, + "step": 7318 + }, + { + "epoch": 0.14651552686234767, + "grad_norm": 1.1263002157211304, + "learning_rate": 9.648223376442834e-06, + "loss": 0.3481, + "step": 7319 + }, + { + "epoch": 0.14653554537947602, + "grad_norm": 1.1029913425445557, + "learning_rate": 9.648103919080406e-06, + "loss": 0.322, + "step": 7320 + }, + { + "epoch": 0.14655556389660435, + "grad_norm": 1.0148649215698242, + "learning_rate": 9.64798444217835e-06, + "loss": 0.3321, + "step": 7321 + }, + { + "epoch": 0.1465755824137327, + "grad_norm": 1.0603314638137817, + "learning_rate": 9.64786494573717e-06, + "loss": 0.3428, + "step": 7322 + }, + { + "epoch": 0.14659560093086105, + "grad_norm": 1.144494891166687, + "learning_rate": 9.647745429757362e-06, + "loss": 0.3113, + "step": 7323 + }, + { + "epoch": 0.1466156194479894, + "grad_norm": 1.8720934391021729, + "learning_rate": 9.647625894239434e-06, + "loss": 0.8301, + "step": 7324 + }, + { + "epoch": 0.14663563796511772, + "grad_norm": 0.9921284914016724, + "learning_rate": 9.647506339183888e-06, + "loss": 0.303, + "step": 7325 + }, + { + "epoch": 0.14665565648224607, + "grad_norm": 1.2102177143096924, + "learning_rate": 9.647386764591224e-06, + "loss": 0.2929, + "step": 7326 + }, + { + "epoch": 0.14667567499937442, + "grad_norm": 1.0541170835494995, + "learning_rate": 9.647267170461948e-06, + "loss": 0.2687, + "step": 7327 + }, + { + "epoch": 0.14669569351650277, + "grad_norm": 1.0378738641738892, + "learning_rate": 9.64714755679656e-06, + "loss": 0.389, + "step": 7328 + }, + { + "epoch": 0.1467157120336311, + "grad_norm": 1.0646730661392212, + "learning_rate": 9.647027923595564e-06, + "loss": 0.3389, + "step": 7329 + }, + { + "epoch": 0.14673573055075945, + "grad_norm": 1.1236284971237183, + "learning_rate": 9.646908270859462e-06, + "loss": 0.3045, + "step": 7330 + }, + { + "epoch": 0.1467557490678878, + "grad_norm": 1.1117558479309082, + "learning_rate": 9.646788598588757e-06, + "loss": 0.3014, + "step": 7331 + }, + { + "epoch": 0.14677576758501615, + "grad_norm": 1.1907124519348145, + "learning_rate": 9.646668906783955e-06, + "loss": 0.3711, + "step": 7332 + }, + { + "epoch": 0.14679578610214447, + "grad_norm": 1.0178682804107666, + "learning_rate": 9.646549195445555e-06, + "loss": 0.3093, + "step": 7333 + }, + { + "epoch": 0.14681580461927282, + "grad_norm": 1.1325809955596924, + "learning_rate": 9.646429464574064e-06, + "loss": 0.2925, + "step": 7334 + }, + { + "epoch": 0.14683582313640117, + "grad_norm": 1.0600478649139404, + "learning_rate": 9.64630971416998e-06, + "loss": 0.2963, + "step": 7335 + }, + { + "epoch": 0.14685584165352952, + "grad_norm": 1.3261151313781738, + "learning_rate": 9.646189944233814e-06, + "loss": 0.328, + "step": 7336 + }, + { + "epoch": 0.14687586017065785, + "grad_norm": 1.0592281818389893, + "learning_rate": 9.646070154766062e-06, + "loss": 0.3216, + "step": 7337 + }, + { + "epoch": 0.1468958786877862, + "grad_norm": 1.1554559469223022, + "learning_rate": 9.645950345767233e-06, + "loss": 0.3473, + "step": 7338 + }, + { + "epoch": 0.14691589720491455, + "grad_norm": 1.035110354423523, + "learning_rate": 9.645830517237829e-06, + "loss": 0.2923, + "step": 7339 + }, + { + "epoch": 0.1469359157220429, + "grad_norm": 1.187111496925354, + "learning_rate": 9.645710669178352e-06, + "loss": 0.3517, + "step": 7340 + }, + { + "epoch": 0.14695593423917122, + "grad_norm": 1.1669803857803345, + "learning_rate": 9.645590801589307e-06, + "loss": 0.3242, + "step": 7341 + }, + { + "epoch": 0.14697595275629957, + "grad_norm": 1.252651333808899, + "learning_rate": 9.645470914471197e-06, + "loss": 0.3292, + "step": 7342 + }, + { + "epoch": 0.14699597127342792, + "grad_norm": 1.1760532855987549, + "learning_rate": 9.64535100782453e-06, + "loss": 0.3318, + "step": 7343 + }, + { + "epoch": 0.14701598979055627, + "grad_norm": 1.7164164781570435, + "learning_rate": 9.645231081649804e-06, + "loss": 0.8101, + "step": 7344 + }, + { + "epoch": 0.1470360083076846, + "grad_norm": 1.2463525533676147, + "learning_rate": 9.64511113594753e-06, + "loss": 0.3784, + "step": 7345 + }, + { + "epoch": 0.14705602682481295, + "grad_norm": 2.038027048110962, + "learning_rate": 9.644991170718203e-06, + "loss": 0.847, + "step": 7346 + }, + { + "epoch": 0.1470760453419413, + "grad_norm": 1.1099374294281006, + "learning_rate": 9.644871185962337e-06, + "loss": 0.335, + "step": 7347 + }, + { + "epoch": 0.14709606385906965, + "grad_norm": 1.2550081014633179, + "learning_rate": 9.644751181680431e-06, + "loss": 0.3375, + "step": 7348 + }, + { + "epoch": 0.14711608237619797, + "grad_norm": 1.2142462730407715, + "learning_rate": 9.64463115787299e-06, + "loss": 0.3467, + "step": 7349 + }, + { + "epoch": 0.14713610089332632, + "grad_norm": 1.0520662069320679, + "learning_rate": 9.64451111454052e-06, + "loss": 0.3089, + "step": 7350 + }, + { + "epoch": 0.14715611941045467, + "grad_norm": 0.9941586256027222, + "learning_rate": 9.644391051683524e-06, + "loss": 0.3097, + "step": 7351 + }, + { + "epoch": 0.14717613792758302, + "grad_norm": 1.2247107028961182, + "learning_rate": 9.644270969302509e-06, + "loss": 0.3387, + "step": 7352 + }, + { + "epoch": 0.14719615644471135, + "grad_norm": 2.0372695922851562, + "learning_rate": 9.644150867397977e-06, + "loss": 0.8421, + "step": 7353 + }, + { + "epoch": 0.1472161749618397, + "grad_norm": 1.0569889545440674, + "learning_rate": 9.644030745970434e-06, + "loss": 0.3112, + "step": 7354 + }, + { + "epoch": 0.14723619347896805, + "grad_norm": 1.026806116104126, + "learning_rate": 9.643910605020386e-06, + "loss": 0.3097, + "step": 7355 + }, + { + "epoch": 0.1472562119960964, + "grad_norm": 1.12815523147583, + "learning_rate": 9.643790444548336e-06, + "loss": 0.2905, + "step": 7356 + }, + { + "epoch": 0.14727623051322472, + "grad_norm": 1.194126009941101, + "learning_rate": 9.643670264554792e-06, + "loss": 0.3874, + "step": 7357 + }, + { + "epoch": 0.14729624903035307, + "grad_norm": 1.23789644241333, + "learning_rate": 9.643550065040255e-06, + "loss": 0.3535, + "step": 7358 + }, + { + "epoch": 0.14731626754748142, + "grad_norm": 1.050846815109253, + "learning_rate": 9.643429846005235e-06, + "loss": 0.3584, + "step": 7359 + }, + { + "epoch": 0.14733628606460977, + "grad_norm": 1.0335520505905151, + "learning_rate": 9.643309607450235e-06, + "loss": 0.3281, + "step": 7360 + }, + { + "epoch": 0.1473563045817381, + "grad_norm": 1.074622392654419, + "learning_rate": 9.64318934937576e-06, + "loss": 0.2863, + "step": 7361 + }, + { + "epoch": 0.14737632309886645, + "grad_norm": 1.0591808557510376, + "learning_rate": 9.643069071782316e-06, + "loss": 0.3109, + "step": 7362 + }, + { + "epoch": 0.1473963416159948, + "grad_norm": 1.1424193382263184, + "learning_rate": 9.64294877467041e-06, + "loss": 0.3287, + "step": 7363 + }, + { + "epoch": 0.14741636013312315, + "grad_norm": 1.077181339263916, + "learning_rate": 9.642828458040546e-06, + "loss": 0.335, + "step": 7364 + }, + { + "epoch": 0.14743637865025147, + "grad_norm": 1.1568220853805542, + "learning_rate": 9.64270812189323e-06, + "loss": 0.3132, + "step": 7365 + }, + { + "epoch": 0.14745639716737982, + "grad_norm": 1.1832594871520996, + "learning_rate": 9.642587766228968e-06, + "loss": 0.3224, + "step": 7366 + }, + { + "epoch": 0.14747641568450817, + "grad_norm": 1.9311223030090332, + "learning_rate": 9.642467391048267e-06, + "loss": 0.8842, + "step": 7367 + }, + { + "epoch": 0.14749643420163652, + "grad_norm": 1.886549472808838, + "learning_rate": 9.642346996351632e-06, + "loss": 0.7768, + "step": 7368 + }, + { + "epoch": 0.14751645271876485, + "grad_norm": 1.1981465816497803, + "learning_rate": 9.64222658213957e-06, + "loss": 0.3076, + "step": 7369 + }, + { + "epoch": 0.1475364712358932, + "grad_norm": 1.1236112117767334, + "learning_rate": 9.642106148412583e-06, + "loss": 0.3057, + "step": 7370 + }, + { + "epoch": 0.14755648975302155, + "grad_norm": 1.6720690727233887, + "learning_rate": 9.641985695171182e-06, + "loss": 0.8465, + "step": 7371 + }, + { + "epoch": 0.1475765082701499, + "grad_norm": 1.0504647493362427, + "learning_rate": 9.641865222415876e-06, + "loss": 0.3108, + "step": 7372 + }, + { + "epoch": 0.14759652678727822, + "grad_norm": 1.3233832120895386, + "learning_rate": 9.641744730147164e-06, + "loss": 0.3523, + "step": 7373 + }, + { + "epoch": 0.14761654530440657, + "grad_norm": 1.2752934694290161, + "learning_rate": 9.641624218365556e-06, + "loss": 0.3557, + "step": 7374 + }, + { + "epoch": 0.14763656382153492, + "grad_norm": 1.0861536264419556, + "learning_rate": 9.64150368707156e-06, + "loss": 0.359, + "step": 7375 + }, + { + "epoch": 0.14765658233866327, + "grad_norm": 1.8696833848953247, + "learning_rate": 9.64138313626568e-06, + "loss": 0.7958, + "step": 7376 + }, + { + "epoch": 0.1476766008557916, + "grad_norm": 1.084925651550293, + "learning_rate": 9.641262565948426e-06, + "loss": 0.3082, + "step": 7377 + }, + { + "epoch": 0.14769661937291995, + "grad_norm": 1.1288584470748901, + "learning_rate": 9.641141976120301e-06, + "loss": 0.303, + "step": 7378 + }, + { + "epoch": 0.1477166378900483, + "grad_norm": 1.1770589351654053, + "learning_rate": 9.641021366781816e-06, + "loss": 0.306, + "step": 7379 + }, + { + "epoch": 0.14773665640717665, + "grad_norm": 1.07692289352417, + "learning_rate": 9.640900737933473e-06, + "loss": 0.3226, + "step": 7380 + }, + { + "epoch": 0.14775667492430497, + "grad_norm": 1.1270968914031982, + "learning_rate": 9.640780089575785e-06, + "loss": 0.2561, + "step": 7381 + }, + { + "epoch": 0.14777669344143332, + "grad_norm": 1.1681838035583496, + "learning_rate": 9.640659421709255e-06, + "loss": 0.2911, + "step": 7382 + }, + { + "epoch": 0.14779671195856167, + "grad_norm": 0.9581688642501831, + "learning_rate": 9.640538734334391e-06, + "loss": 0.2418, + "step": 7383 + }, + { + "epoch": 0.14781673047569002, + "grad_norm": 1.0937418937683105, + "learning_rate": 9.640418027451702e-06, + "loss": 0.3279, + "step": 7384 + }, + { + "epoch": 0.14783674899281835, + "grad_norm": 1.1707878112792969, + "learning_rate": 9.640297301061695e-06, + "loss": 0.3827, + "step": 7385 + }, + { + "epoch": 0.1478567675099467, + "grad_norm": 1.9093852043151855, + "learning_rate": 9.640176555164874e-06, + "loss": 0.9042, + "step": 7386 + }, + { + "epoch": 0.14787678602707505, + "grad_norm": 1.1509640216827393, + "learning_rate": 9.640055789761751e-06, + "loss": 0.296, + "step": 7387 + }, + { + "epoch": 0.1478968045442034, + "grad_norm": 1.0272995233535767, + "learning_rate": 9.639935004852831e-06, + "loss": 0.3148, + "step": 7388 + }, + { + "epoch": 0.14791682306133172, + "grad_norm": 1.2529542446136475, + "learning_rate": 9.639814200438624e-06, + "loss": 0.3888, + "step": 7389 + }, + { + "epoch": 0.14793684157846007, + "grad_norm": 1.2303582429885864, + "learning_rate": 9.639693376519636e-06, + "loss": 0.3395, + "step": 7390 + }, + { + "epoch": 0.14795686009558842, + "grad_norm": 0.9674539566040039, + "learning_rate": 9.639572533096376e-06, + "loss": 0.3045, + "step": 7391 + }, + { + "epoch": 0.14797687861271677, + "grad_norm": 1.0662362575531006, + "learning_rate": 9.639451670169352e-06, + "loss": 0.3396, + "step": 7392 + }, + { + "epoch": 0.1479968971298451, + "grad_norm": 1.1037893295288086, + "learning_rate": 9.63933078773907e-06, + "loss": 0.3635, + "step": 7393 + }, + { + "epoch": 0.14801691564697345, + "grad_norm": 1.1017428636550903, + "learning_rate": 9.63920988580604e-06, + "loss": 0.3223, + "step": 7394 + }, + { + "epoch": 0.1480369341641018, + "grad_norm": 1.094617247581482, + "learning_rate": 9.639088964370769e-06, + "loss": 0.3227, + "step": 7395 + }, + { + "epoch": 0.14805695268123015, + "grad_norm": 1.2473796606063843, + "learning_rate": 9.638968023433767e-06, + "loss": 0.3689, + "step": 7396 + }, + { + "epoch": 0.14807697119835847, + "grad_norm": 1.169235110282898, + "learning_rate": 9.638847062995545e-06, + "loss": 0.3491, + "step": 7397 + }, + { + "epoch": 0.14809698971548682, + "grad_norm": 1.1680095195770264, + "learning_rate": 9.638726083056604e-06, + "loss": 0.3856, + "step": 7398 + }, + { + "epoch": 0.14811700823261517, + "grad_norm": 1.164941668510437, + "learning_rate": 9.63860508361746e-06, + "loss": 0.374, + "step": 7399 + }, + { + "epoch": 0.14813702674974352, + "grad_norm": 1.0402846336364746, + "learning_rate": 9.638484064678616e-06, + "loss": 0.3443, + "step": 7400 + }, + { + "epoch": 0.14815704526687185, + "grad_norm": 1.071044683456421, + "learning_rate": 9.638363026240586e-06, + "loss": 0.2879, + "step": 7401 + }, + { + "epoch": 0.1481770637840002, + "grad_norm": 1.1027556657791138, + "learning_rate": 9.638241968303873e-06, + "loss": 0.3371, + "step": 7402 + }, + { + "epoch": 0.14819708230112855, + "grad_norm": 1.1343200206756592, + "learning_rate": 9.638120890868991e-06, + "loss": 0.3319, + "step": 7403 + }, + { + "epoch": 0.1482171008182569, + "grad_norm": 1.2269200086593628, + "learning_rate": 9.637999793936445e-06, + "loss": 0.3165, + "step": 7404 + }, + { + "epoch": 0.14823711933538522, + "grad_norm": 1.0865672826766968, + "learning_rate": 9.637878677506746e-06, + "loss": 0.2624, + "step": 7405 + }, + { + "epoch": 0.14825713785251357, + "grad_norm": 1.0674270391464233, + "learning_rate": 9.637757541580405e-06, + "loss": 0.3388, + "step": 7406 + }, + { + "epoch": 0.14827715636964192, + "grad_norm": 1.0277761220932007, + "learning_rate": 9.637636386157929e-06, + "loss": 0.3177, + "step": 7407 + }, + { + "epoch": 0.14829717488677027, + "grad_norm": 2.136819362640381, + "learning_rate": 9.637515211239827e-06, + "loss": 0.7974, + "step": 7408 + }, + { + "epoch": 0.1483171934038986, + "grad_norm": 1.740027666091919, + "learning_rate": 9.63739401682661e-06, + "loss": 0.8498, + "step": 7409 + }, + { + "epoch": 0.14833721192102695, + "grad_norm": 0.9648458361625671, + "learning_rate": 9.637272802918785e-06, + "loss": 0.2902, + "step": 7410 + }, + { + "epoch": 0.1483572304381553, + "grad_norm": 1.0982650518417358, + "learning_rate": 9.637151569516865e-06, + "loss": 0.3797, + "step": 7411 + }, + { + "epoch": 0.14837724895528365, + "grad_norm": 1.0447463989257812, + "learning_rate": 9.637030316621356e-06, + "loss": 0.356, + "step": 7412 + }, + { + "epoch": 0.14839726747241197, + "grad_norm": 1.159214973449707, + "learning_rate": 9.636909044232769e-06, + "loss": 0.3166, + "step": 7413 + }, + { + "epoch": 0.14841728598954032, + "grad_norm": 1.8103842735290527, + "learning_rate": 9.636787752351618e-06, + "loss": 0.8845, + "step": 7414 + }, + { + "epoch": 0.14843730450666867, + "grad_norm": 1.838596224784851, + "learning_rate": 9.636666440978404e-06, + "loss": 0.8507, + "step": 7415 + }, + { + "epoch": 0.14845732302379702, + "grad_norm": 1.044043779373169, + "learning_rate": 9.636545110113646e-06, + "loss": 0.3121, + "step": 7416 + }, + { + "epoch": 0.14847734154092534, + "grad_norm": 1.0978038311004639, + "learning_rate": 9.63642375975785e-06, + "loss": 0.2599, + "step": 7417 + }, + { + "epoch": 0.1484973600580537, + "grad_norm": 1.092827558517456, + "learning_rate": 9.636302389911522e-06, + "loss": 0.299, + "step": 7418 + }, + { + "epoch": 0.14851737857518205, + "grad_norm": 1.1813033819198608, + "learning_rate": 9.636181000575182e-06, + "loss": 0.3222, + "step": 7419 + }, + { + "epoch": 0.1485373970923104, + "grad_norm": 1.126658320426941, + "learning_rate": 9.636059591749332e-06, + "loss": 0.326, + "step": 7420 + }, + { + "epoch": 0.14855741560943872, + "grad_norm": 1.2649730443954468, + "learning_rate": 9.635938163434485e-06, + "loss": 0.3302, + "step": 7421 + }, + { + "epoch": 0.14857743412656707, + "grad_norm": 1.100911021232605, + "learning_rate": 9.635816715631153e-06, + "loss": 0.322, + "step": 7422 + }, + { + "epoch": 0.14859745264369542, + "grad_norm": 1.050976276397705, + "learning_rate": 9.635695248339843e-06, + "loss": 0.3132, + "step": 7423 + }, + { + "epoch": 0.14861747116082377, + "grad_norm": 1.0657455921173096, + "learning_rate": 9.63557376156107e-06, + "loss": 0.3464, + "step": 7424 + }, + { + "epoch": 0.1486374896779521, + "grad_norm": 1.1433916091918945, + "learning_rate": 9.63545225529534e-06, + "loss": 0.3604, + "step": 7425 + }, + { + "epoch": 0.14865750819508045, + "grad_norm": 1.12540602684021, + "learning_rate": 9.635330729543168e-06, + "loss": 0.3113, + "step": 7426 + }, + { + "epoch": 0.1486775267122088, + "grad_norm": 1.7963076829910278, + "learning_rate": 9.635209184305064e-06, + "loss": 0.8675, + "step": 7427 + }, + { + "epoch": 0.14869754522933715, + "grad_norm": 1.1188493967056274, + "learning_rate": 9.635087619581536e-06, + "loss": 0.334, + "step": 7428 + }, + { + "epoch": 0.14871756374646547, + "grad_norm": 1.1849746704101562, + "learning_rate": 9.6349660353731e-06, + "loss": 0.3538, + "step": 7429 + }, + { + "epoch": 0.14873758226359382, + "grad_norm": 1.1186388731002808, + "learning_rate": 9.634844431680262e-06, + "loss": 0.3531, + "step": 7430 + }, + { + "epoch": 0.14875760078072217, + "grad_norm": 1.093907117843628, + "learning_rate": 9.634722808503536e-06, + "loss": 0.3011, + "step": 7431 + }, + { + "epoch": 0.14877761929785052, + "grad_norm": 1.0974085330963135, + "learning_rate": 9.634601165843432e-06, + "loss": 0.3789, + "step": 7432 + }, + { + "epoch": 0.14879763781497884, + "grad_norm": 1.0951359272003174, + "learning_rate": 9.634479503700463e-06, + "loss": 0.3533, + "step": 7433 + }, + { + "epoch": 0.1488176563321072, + "grad_norm": 1.0405538082122803, + "learning_rate": 9.634357822075138e-06, + "loss": 0.3371, + "step": 7434 + }, + { + "epoch": 0.14883767484923555, + "grad_norm": 1.1150808334350586, + "learning_rate": 9.634236120967972e-06, + "loss": 0.3183, + "step": 7435 + }, + { + "epoch": 0.1488576933663639, + "grad_norm": 1.161342978477478, + "learning_rate": 9.634114400379474e-06, + "loss": 0.3017, + "step": 7436 + }, + { + "epoch": 0.14887771188349222, + "grad_norm": 1.0899001359939575, + "learning_rate": 9.633992660310156e-06, + "loss": 0.3302, + "step": 7437 + }, + { + "epoch": 0.14889773040062057, + "grad_norm": 1.2949333190917969, + "learning_rate": 9.633870900760529e-06, + "loss": 0.3118, + "step": 7438 + }, + { + "epoch": 0.14891774891774892, + "grad_norm": 1.1567902565002441, + "learning_rate": 9.63374912173111e-06, + "loss": 0.3526, + "step": 7439 + }, + { + "epoch": 0.14893776743487727, + "grad_norm": 1.0948638916015625, + "learning_rate": 9.633627323222403e-06, + "loss": 0.3289, + "step": 7440 + }, + { + "epoch": 0.1489577859520056, + "grad_norm": 1.0377388000488281, + "learning_rate": 9.633505505234925e-06, + "loss": 0.3166, + "step": 7441 + }, + { + "epoch": 0.14897780446913395, + "grad_norm": 1.0721237659454346, + "learning_rate": 9.633383667769186e-06, + "loss": 0.317, + "step": 7442 + }, + { + "epoch": 0.1489978229862623, + "grad_norm": 1.2838016748428345, + "learning_rate": 9.6332618108257e-06, + "loss": 0.3549, + "step": 7443 + }, + { + "epoch": 0.14901784150339065, + "grad_norm": 1.0757168531417847, + "learning_rate": 9.63313993440498e-06, + "loss": 0.3327, + "step": 7444 + }, + { + "epoch": 0.14903786002051897, + "grad_norm": 1.0600496530532837, + "learning_rate": 9.633018038507534e-06, + "loss": 0.3035, + "step": 7445 + }, + { + "epoch": 0.14905787853764732, + "grad_norm": 1.1406447887420654, + "learning_rate": 9.632896123133879e-06, + "loss": 0.3836, + "step": 7446 + }, + { + "epoch": 0.14907789705477567, + "grad_norm": 1.1216615438461304, + "learning_rate": 9.632774188284525e-06, + "loss": 0.3138, + "step": 7447 + }, + { + "epoch": 0.14909791557190402, + "grad_norm": 1.1392959356307983, + "learning_rate": 9.632652233959987e-06, + "loss": 0.3208, + "step": 7448 + }, + { + "epoch": 0.14911793408903234, + "grad_norm": 1.1522797346115112, + "learning_rate": 9.632530260160774e-06, + "loss": 0.3498, + "step": 7449 + }, + { + "epoch": 0.1491379526061607, + "grad_norm": 1.1392918825149536, + "learning_rate": 9.632408266887401e-06, + "loss": 0.3403, + "step": 7450 + }, + { + "epoch": 0.14915797112328905, + "grad_norm": 1.1223440170288086, + "learning_rate": 9.632286254140382e-06, + "loss": 0.3307, + "step": 7451 + }, + { + "epoch": 0.1491779896404174, + "grad_norm": 1.1003977060317993, + "learning_rate": 9.632164221920225e-06, + "loss": 0.3275, + "step": 7452 + }, + { + "epoch": 0.14919800815754572, + "grad_norm": 1.2726330757141113, + "learning_rate": 9.632042170227451e-06, + "loss": 0.3168, + "step": 7453 + }, + { + "epoch": 0.14921802667467407, + "grad_norm": 1.2714276313781738, + "learning_rate": 9.631920099062566e-06, + "loss": 0.3598, + "step": 7454 + }, + { + "epoch": 0.14923804519180242, + "grad_norm": 1.0944172143936157, + "learning_rate": 9.631798008426085e-06, + "loss": 0.3729, + "step": 7455 + }, + { + "epoch": 0.14925806370893077, + "grad_norm": 1.0725939273834229, + "learning_rate": 9.631675898318525e-06, + "loss": 0.3074, + "step": 7456 + }, + { + "epoch": 0.1492780822260591, + "grad_norm": 1.1989465951919556, + "learning_rate": 9.631553768740393e-06, + "loss": 0.2992, + "step": 7457 + }, + { + "epoch": 0.14929810074318745, + "grad_norm": 1.1815646886825562, + "learning_rate": 9.631431619692207e-06, + "loss": 0.2991, + "step": 7458 + }, + { + "epoch": 0.1493181192603158, + "grad_norm": 1.066227912902832, + "learning_rate": 9.63130945117448e-06, + "loss": 0.3781, + "step": 7459 + }, + { + "epoch": 0.14933813777744415, + "grad_norm": 1.0536569356918335, + "learning_rate": 9.631187263187722e-06, + "loss": 0.3118, + "step": 7460 + }, + { + "epoch": 0.14935815629457247, + "grad_norm": 1.2080720663070679, + "learning_rate": 9.631065055732451e-06, + "loss": 0.3353, + "step": 7461 + }, + { + "epoch": 0.14937817481170082, + "grad_norm": 1.0606136322021484, + "learning_rate": 9.63094282880918e-06, + "loss": 0.2895, + "step": 7462 + }, + { + "epoch": 0.14939819332882917, + "grad_norm": 1.1037566661834717, + "learning_rate": 9.630820582418422e-06, + "loss": 0.3179, + "step": 7463 + }, + { + "epoch": 0.14941821184595752, + "grad_norm": 1.3069863319396973, + "learning_rate": 9.630698316560689e-06, + "loss": 0.2878, + "step": 7464 + }, + { + "epoch": 0.14943823036308584, + "grad_norm": 1.3039653301239014, + "learning_rate": 9.630576031236498e-06, + "loss": 0.3313, + "step": 7465 + }, + { + "epoch": 0.1494582488802142, + "grad_norm": 1.7167370319366455, + "learning_rate": 9.630453726446362e-06, + "loss": 0.8309, + "step": 7466 + }, + { + "epoch": 0.14947826739734255, + "grad_norm": 1.074121356010437, + "learning_rate": 9.630331402190796e-06, + "loss": 0.3037, + "step": 7467 + }, + { + "epoch": 0.1494982859144709, + "grad_norm": 1.0569562911987305, + "learning_rate": 9.630209058470313e-06, + "loss": 0.3398, + "step": 7468 + }, + { + "epoch": 0.14951830443159922, + "grad_norm": 1.155329704284668, + "learning_rate": 9.630086695285426e-06, + "loss": 0.3182, + "step": 7469 + }, + { + "epoch": 0.14953832294872757, + "grad_norm": 1.0970523357391357, + "learning_rate": 9.62996431263665e-06, + "loss": 0.3242, + "step": 7470 + }, + { + "epoch": 0.14955834146585592, + "grad_norm": 1.4639631509780884, + "learning_rate": 9.629841910524502e-06, + "loss": 0.3585, + "step": 7471 + }, + { + "epoch": 0.14957835998298427, + "grad_norm": 1.0397019386291504, + "learning_rate": 9.629719488949497e-06, + "loss": 0.3113, + "step": 7472 + }, + { + "epoch": 0.1495983785001126, + "grad_norm": 1.0715875625610352, + "learning_rate": 9.629597047912144e-06, + "loss": 0.3199, + "step": 7473 + }, + { + "epoch": 0.14961839701724094, + "grad_norm": 1.850277066230774, + "learning_rate": 9.629474587412963e-06, + "loss": 0.7556, + "step": 7474 + }, + { + "epoch": 0.1496384155343693, + "grad_norm": 1.863253116607666, + "learning_rate": 9.629352107452467e-06, + "loss": 0.7597, + "step": 7475 + }, + { + "epoch": 0.14965843405149765, + "grad_norm": 1.4082789421081543, + "learning_rate": 9.629229608031172e-06, + "loss": 0.3636, + "step": 7476 + }, + { + "epoch": 0.14967845256862597, + "grad_norm": 1.1310375928878784, + "learning_rate": 9.629107089149589e-06, + "loss": 0.2983, + "step": 7477 + }, + { + "epoch": 0.14969847108575432, + "grad_norm": 1.1545395851135254, + "learning_rate": 9.62898455080824e-06, + "loss": 0.3464, + "step": 7478 + }, + { + "epoch": 0.14971848960288267, + "grad_norm": 1.0958735942840576, + "learning_rate": 9.628861993007633e-06, + "loss": 0.3014, + "step": 7479 + }, + { + "epoch": 0.14973850812001102, + "grad_norm": 1.0552146434783936, + "learning_rate": 9.628739415748287e-06, + "loss": 0.3083, + "step": 7480 + }, + { + "epoch": 0.14975852663713934, + "grad_norm": 1.0768520832061768, + "learning_rate": 9.628616819030717e-06, + "loss": 0.2945, + "step": 7481 + }, + { + "epoch": 0.1497785451542677, + "grad_norm": 1.0562996864318848, + "learning_rate": 9.628494202855439e-06, + "loss": 0.3411, + "step": 7482 + }, + { + "epoch": 0.14979856367139605, + "grad_norm": 1.7685493230819702, + "learning_rate": 9.628371567222965e-06, + "loss": 0.922, + "step": 7483 + }, + { + "epoch": 0.1498185821885244, + "grad_norm": 1.2032133340835571, + "learning_rate": 9.628248912133816e-06, + "loss": 0.3242, + "step": 7484 + }, + { + "epoch": 0.14983860070565272, + "grad_norm": 1.1415404081344604, + "learning_rate": 9.628126237588503e-06, + "loss": 0.3196, + "step": 7485 + }, + { + "epoch": 0.14985861922278107, + "grad_norm": 1.0124249458312988, + "learning_rate": 9.628003543587543e-06, + "loss": 0.3669, + "step": 7486 + }, + { + "epoch": 0.14987863773990942, + "grad_norm": 1.2830299139022827, + "learning_rate": 9.62788083013145e-06, + "loss": 0.3419, + "step": 7487 + }, + { + "epoch": 0.14989865625703777, + "grad_norm": 1.0882130861282349, + "learning_rate": 9.627758097220746e-06, + "loss": 0.3293, + "step": 7488 + }, + { + "epoch": 0.1499186747741661, + "grad_norm": 0.9901626110076904, + "learning_rate": 9.62763534485594e-06, + "loss": 0.347, + "step": 7489 + }, + { + "epoch": 0.14993869329129444, + "grad_norm": 1.0275726318359375, + "learning_rate": 9.627512573037552e-06, + "loss": 0.3541, + "step": 7490 + }, + { + "epoch": 0.1499587118084228, + "grad_norm": 1.1445142030715942, + "learning_rate": 9.627389781766096e-06, + "loss": 0.3554, + "step": 7491 + }, + { + "epoch": 0.14997873032555115, + "grad_norm": 1.10397469997406, + "learning_rate": 9.627266971042091e-06, + "loss": 0.3698, + "step": 7492 + }, + { + "epoch": 0.14999874884267947, + "grad_norm": 0.9700477123260498, + "learning_rate": 9.627144140866052e-06, + "loss": 0.3068, + "step": 7493 + }, + { + "epoch": 0.15001876735980782, + "grad_norm": 1.04306161403656, + "learning_rate": 9.627021291238491e-06, + "loss": 0.3159, + "step": 7494 + }, + { + "epoch": 0.15003878587693617, + "grad_norm": 1.2613708972930908, + "learning_rate": 9.626898422159932e-06, + "loss": 0.3594, + "step": 7495 + }, + { + "epoch": 0.15005880439406452, + "grad_norm": 1.2393676042556763, + "learning_rate": 9.626775533630886e-06, + "loss": 0.371, + "step": 7496 + }, + { + "epoch": 0.15007882291119284, + "grad_norm": 1.0509380102157593, + "learning_rate": 9.626652625651872e-06, + "loss": 0.2975, + "step": 7497 + }, + { + "epoch": 0.1500988414283212, + "grad_norm": 1.162516474723816, + "learning_rate": 9.626529698223405e-06, + "loss": 0.3144, + "step": 7498 + }, + { + "epoch": 0.15011885994544955, + "grad_norm": 1.1428099870681763, + "learning_rate": 9.626406751346005e-06, + "loss": 0.3446, + "step": 7499 + }, + { + "epoch": 0.1501388784625779, + "grad_norm": 0.9386906027793884, + "learning_rate": 9.626283785020184e-06, + "loss": 0.3045, + "step": 7500 + }, + { + "epoch": 0.15015889697970622, + "grad_norm": 1.0355632305145264, + "learning_rate": 9.626160799246462e-06, + "loss": 0.2895, + "step": 7501 + }, + { + "epoch": 0.15017891549683457, + "grad_norm": 1.1626096963882446, + "learning_rate": 9.626037794025358e-06, + "loss": 0.3429, + "step": 7502 + }, + { + "epoch": 0.15019893401396292, + "grad_norm": 1.836624026298523, + "learning_rate": 9.625914769357384e-06, + "loss": 0.8143, + "step": 7503 + }, + { + "epoch": 0.15021895253109127, + "grad_norm": 1.0982645750045776, + "learning_rate": 9.625791725243061e-06, + "loss": 0.3436, + "step": 7504 + }, + { + "epoch": 0.1502389710482196, + "grad_norm": 1.1651325225830078, + "learning_rate": 9.625668661682905e-06, + "loss": 0.3418, + "step": 7505 + }, + { + "epoch": 0.15025898956534794, + "grad_norm": 1.2375208139419556, + "learning_rate": 9.625545578677434e-06, + "loss": 0.2837, + "step": 7506 + }, + { + "epoch": 0.1502790080824763, + "grad_norm": 1.127305507659912, + "learning_rate": 9.625422476227162e-06, + "loss": 0.2925, + "step": 7507 + }, + { + "epoch": 0.15029902659960465, + "grad_norm": 1.1171008348464966, + "learning_rate": 9.625299354332613e-06, + "loss": 0.3807, + "step": 7508 + }, + { + "epoch": 0.15031904511673297, + "grad_norm": 0.989334762096405, + "learning_rate": 9.625176212994298e-06, + "loss": 0.2607, + "step": 7509 + }, + { + "epoch": 0.15033906363386132, + "grad_norm": 1.154258370399475, + "learning_rate": 9.625053052212739e-06, + "loss": 0.3179, + "step": 7510 + }, + { + "epoch": 0.15035908215098967, + "grad_norm": 1.054072618484497, + "learning_rate": 9.624929871988451e-06, + "loss": 0.3222, + "step": 7511 + }, + { + "epoch": 0.15037910066811802, + "grad_norm": 1.3300002813339233, + "learning_rate": 9.624806672321956e-06, + "loss": 0.3513, + "step": 7512 + }, + { + "epoch": 0.15039911918524634, + "grad_norm": 1.0567213296890259, + "learning_rate": 9.624683453213767e-06, + "loss": 0.3247, + "step": 7513 + }, + { + "epoch": 0.1504191377023747, + "grad_norm": 1.1048624515533447, + "learning_rate": 9.624560214664403e-06, + "loss": 0.37, + "step": 7514 + }, + { + "epoch": 0.15043915621950305, + "grad_norm": 1.2163639068603516, + "learning_rate": 9.624436956674385e-06, + "loss": 0.2999, + "step": 7515 + }, + { + "epoch": 0.1504591747366314, + "grad_norm": 1.1903135776519775, + "learning_rate": 9.624313679244229e-06, + "loss": 0.3926, + "step": 7516 + }, + { + "epoch": 0.15047919325375972, + "grad_norm": 1.1396996974945068, + "learning_rate": 9.624190382374451e-06, + "loss": 0.3231, + "step": 7517 + }, + { + "epoch": 0.15049921177088807, + "grad_norm": 2.0336759090423584, + "learning_rate": 9.624067066065574e-06, + "loss": 0.8072, + "step": 7518 + }, + { + "epoch": 0.15051923028801642, + "grad_norm": 1.095416784286499, + "learning_rate": 9.623943730318115e-06, + "loss": 0.3387, + "step": 7519 + }, + { + "epoch": 0.15053924880514477, + "grad_norm": 1.112327218055725, + "learning_rate": 9.62382037513259e-06, + "loss": 0.3337, + "step": 7520 + }, + { + "epoch": 0.1505592673222731, + "grad_norm": 1.125236988067627, + "learning_rate": 9.62369700050952e-06, + "loss": 0.3474, + "step": 7521 + }, + { + "epoch": 0.15057928583940144, + "grad_norm": 1.0853992700576782, + "learning_rate": 9.623573606449424e-06, + "loss": 0.2911, + "step": 7522 + }, + { + "epoch": 0.1505993043565298, + "grad_norm": 1.1573352813720703, + "learning_rate": 9.623450192952816e-06, + "loss": 0.3291, + "step": 7523 + }, + { + "epoch": 0.15061932287365815, + "grad_norm": 1.0325123071670532, + "learning_rate": 9.623326760020222e-06, + "loss": 0.2866, + "step": 7524 + }, + { + "epoch": 0.15063934139078647, + "grad_norm": 1.0148109197616577, + "learning_rate": 9.623203307652156e-06, + "loss": 0.3294, + "step": 7525 + }, + { + "epoch": 0.15065935990791482, + "grad_norm": 1.051623821258545, + "learning_rate": 9.623079835849138e-06, + "loss": 0.3189, + "step": 7526 + }, + { + "epoch": 0.15067937842504317, + "grad_norm": 1.1030793190002441, + "learning_rate": 9.622956344611687e-06, + "loss": 0.356, + "step": 7527 + }, + { + "epoch": 0.15069939694217152, + "grad_norm": 1.1592961549758911, + "learning_rate": 9.622832833940321e-06, + "loss": 0.3369, + "step": 7528 + }, + { + "epoch": 0.15071941545929984, + "grad_norm": 1.066511631011963, + "learning_rate": 9.622709303835563e-06, + "loss": 0.3082, + "step": 7529 + }, + { + "epoch": 0.1507394339764282, + "grad_norm": 1.1632213592529297, + "learning_rate": 9.62258575429793e-06, + "loss": 0.3637, + "step": 7530 + }, + { + "epoch": 0.15075945249355654, + "grad_norm": 1.0823568105697632, + "learning_rate": 9.62246218532794e-06, + "loss": 0.3534, + "step": 7531 + }, + { + "epoch": 0.1507794710106849, + "grad_norm": 1.1838451623916626, + "learning_rate": 9.622338596926114e-06, + "loss": 0.3472, + "step": 7532 + }, + { + "epoch": 0.15079948952781322, + "grad_norm": 1.2283546924591064, + "learning_rate": 9.622214989092972e-06, + "loss": 0.3447, + "step": 7533 + }, + { + "epoch": 0.15081950804494157, + "grad_norm": 1.1347922086715698, + "learning_rate": 9.622091361829032e-06, + "loss": 0.318, + "step": 7534 + }, + { + "epoch": 0.15083952656206992, + "grad_norm": 1.0492907762527466, + "learning_rate": 9.621967715134815e-06, + "loss": 0.3195, + "step": 7535 + }, + { + "epoch": 0.15085954507919827, + "grad_norm": 1.9463748931884766, + "learning_rate": 9.62184404901084e-06, + "loss": 0.8748, + "step": 7536 + }, + { + "epoch": 0.1508795635963266, + "grad_norm": 1.3750015497207642, + "learning_rate": 9.621720363457627e-06, + "loss": 0.3611, + "step": 7537 + }, + { + "epoch": 0.15089958211345494, + "grad_norm": 1.864507794380188, + "learning_rate": 9.621596658475699e-06, + "loss": 0.8817, + "step": 7538 + }, + { + "epoch": 0.1509196006305833, + "grad_norm": 1.3517290353775024, + "learning_rate": 9.62147293406557e-06, + "loss": 0.4049, + "step": 7539 + }, + { + "epoch": 0.15093961914771165, + "grad_norm": 1.120086908340454, + "learning_rate": 9.621349190227765e-06, + "loss": 0.308, + "step": 7540 + }, + { + "epoch": 0.15095963766483997, + "grad_norm": 1.1726479530334473, + "learning_rate": 9.621225426962802e-06, + "loss": 0.3487, + "step": 7541 + }, + { + "epoch": 0.15097965618196832, + "grad_norm": 1.0713284015655518, + "learning_rate": 9.621101644271201e-06, + "loss": 0.3411, + "step": 7542 + }, + { + "epoch": 0.15099967469909667, + "grad_norm": 1.051052212715149, + "learning_rate": 9.620977842153482e-06, + "loss": 0.2612, + "step": 7543 + }, + { + "epoch": 0.15101969321622502, + "grad_norm": 1.1700029373168945, + "learning_rate": 9.62085402061017e-06, + "loss": 0.3166, + "step": 7544 + }, + { + "epoch": 0.15103971173335334, + "grad_norm": 2.12058162689209, + "learning_rate": 9.62073017964178e-06, + "loss": 0.8294, + "step": 7545 + }, + { + "epoch": 0.1510597302504817, + "grad_norm": 1.0161343812942505, + "learning_rate": 9.620606319248836e-06, + "loss": 0.3064, + "step": 7546 + }, + { + "epoch": 0.15107974876761004, + "grad_norm": 1.0726680755615234, + "learning_rate": 9.620482439431855e-06, + "loss": 0.3281, + "step": 7547 + }, + { + "epoch": 0.1510997672847384, + "grad_norm": 1.2254724502563477, + "learning_rate": 9.620358540191363e-06, + "loss": 0.2672, + "step": 7548 + }, + { + "epoch": 0.15111978580186672, + "grad_norm": 1.0745279788970947, + "learning_rate": 9.620234621527877e-06, + "loss": 0.3108, + "step": 7549 + }, + { + "epoch": 0.15113980431899507, + "grad_norm": 1.1849088668823242, + "learning_rate": 9.620110683441919e-06, + "loss": 0.3559, + "step": 7550 + }, + { + "epoch": 0.15115982283612342, + "grad_norm": 1.0676729679107666, + "learning_rate": 9.61998672593401e-06, + "loss": 0.3163, + "step": 7551 + }, + { + "epoch": 0.15117984135325177, + "grad_norm": 1.2122427225112915, + "learning_rate": 9.619862749004669e-06, + "loss": 0.3672, + "step": 7552 + }, + { + "epoch": 0.1511998598703801, + "grad_norm": 1.0198107957839966, + "learning_rate": 9.619738752654421e-06, + "loss": 0.3322, + "step": 7553 + }, + { + "epoch": 0.15121987838750844, + "grad_norm": 1.0596429109573364, + "learning_rate": 9.619614736883787e-06, + "loss": 0.2924, + "step": 7554 + }, + { + "epoch": 0.1512398969046368, + "grad_norm": 1.245444655418396, + "learning_rate": 9.619490701693284e-06, + "loss": 0.3322, + "step": 7555 + }, + { + "epoch": 0.15125991542176515, + "grad_norm": 1.1071763038635254, + "learning_rate": 9.619366647083437e-06, + "loss": 0.3776, + "step": 7556 + }, + { + "epoch": 0.15127993393889347, + "grad_norm": 1.0718626976013184, + "learning_rate": 9.619242573054768e-06, + "loss": 0.3343, + "step": 7557 + }, + { + "epoch": 0.15129995245602182, + "grad_norm": 2.1325879096984863, + "learning_rate": 9.619118479607797e-06, + "loss": 0.8006, + "step": 7558 + }, + { + "epoch": 0.15131997097315017, + "grad_norm": 0.9909068942070007, + "learning_rate": 9.618994366743044e-06, + "loss": 0.3178, + "step": 7559 + }, + { + "epoch": 0.15133998949027852, + "grad_norm": 1.8255478143692017, + "learning_rate": 9.618870234461036e-06, + "loss": 0.9079, + "step": 7560 + }, + { + "epoch": 0.15136000800740684, + "grad_norm": 1.108872890472412, + "learning_rate": 9.618746082762289e-06, + "loss": 0.3576, + "step": 7561 + }, + { + "epoch": 0.1513800265245352, + "grad_norm": 1.0739960670471191, + "learning_rate": 9.618621911647328e-06, + "loss": 0.3579, + "step": 7562 + }, + { + "epoch": 0.15140004504166354, + "grad_norm": 1.1669025421142578, + "learning_rate": 9.618497721116675e-06, + "loss": 0.2951, + "step": 7563 + }, + { + "epoch": 0.1514200635587919, + "grad_norm": 1.682974100112915, + "learning_rate": 9.61837351117085e-06, + "loss": 0.8414, + "step": 7564 + }, + { + "epoch": 0.15144008207592022, + "grad_norm": 1.202345609664917, + "learning_rate": 9.618249281810378e-06, + "loss": 0.335, + "step": 7565 + }, + { + "epoch": 0.15146010059304857, + "grad_norm": 1.152785062789917, + "learning_rate": 9.61812503303578e-06, + "loss": 0.3649, + "step": 7566 + }, + { + "epoch": 0.15148011911017692, + "grad_norm": 1.3227273225784302, + "learning_rate": 9.618000764847577e-06, + "loss": 0.324, + "step": 7567 + }, + { + "epoch": 0.15150013762730527, + "grad_norm": 1.0577311515808105, + "learning_rate": 9.617876477246293e-06, + "loss": 0.3133, + "step": 7568 + }, + { + "epoch": 0.1515201561444336, + "grad_norm": 1.0174111127853394, + "learning_rate": 9.61775217023245e-06, + "loss": 0.2894, + "step": 7569 + }, + { + "epoch": 0.15154017466156194, + "grad_norm": 1.0761737823486328, + "learning_rate": 9.617627843806571e-06, + "loss": 0.3771, + "step": 7570 + }, + { + "epoch": 0.1515601931786903, + "grad_norm": 1.2173644304275513, + "learning_rate": 9.617503497969178e-06, + "loss": 0.2999, + "step": 7571 + }, + { + "epoch": 0.15158021169581865, + "grad_norm": 0.9784196615219116, + "learning_rate": 9.617379132720794e-06, + "loss": 0.3097, + "step": 7572 + }, + { + "epoch": 0.15160023021294697, + "grad_norm": 1.201116919517517, + "learning_rate": 9.617254748061943e-06, + "loss": 0.3161, + "step": 7573 + }, + { + "epoch": 0.15162024873007532, + "grad_norm": 1.0513964891433716, + "learning_rate": 9.617130343993146e-06, + "loss": 0.3183, + "step": 7574 + }, + { + "epoch": 0.15164026724720367, + "grad_norm": 1.1071587800979614, + "learning_rate": 9.617005920514926e-06, + "loss": 0.2901, + "step": 7575 + }, + { + "epoch": 0.15166028576433202, + "grad_norm": 1.2260563373565674, + "learning_rate": 9.616881477627807e-06, + "loss": 0.3579, + "step": 7576 + }, + { + "epoch": 0.15168030428146034, + "grad_norm": 1.0822960138320923, + "learning_rate": 9.616757015332313e-06, + "loss": 0.3134, + "step": 7577 + }, + { + "epoch": 0.1517003227985887, + "grad_norm": 1.1120370626449585, + "learning_rate": 9.616632533628964e-06, + "loss": 0.3812, + "step": 7578 + }, + { + "epoch": 0.15172034131571704, + "grad_norm": 0.9893361330032349, + "learning_rate": 9.616508032518286e-06, + "loss": 0.2942, + "step": 7579 + }, + { + "epoch": 0.1517403598328454, + "grad_norm": 1.2050873041152954, + "learning_rate": 9.616383512000802e-06, + "loss": 0.3571, + "step": 7580 + }, + { + "epoch": 0.15176037834997372, + "grad_norm": 1.175075888633728, + "learning_rate": 9.616258972077035e-06, + "loss": 0.3012, + "step": 7581 + }, + { + "epoch": 0.15178039686710207, + "grad_norm": 1.2084403038024902, + "learning_rate": 9.616134412747509e-06, + "loss": 0.3727, + "step": 7582 + }, + { + "epoch": 0.15180041538423042, + "grad_norm": 1.1168588399887085, + "learning_rate": 9.616009834012747e-06, + "loss": 0.298, + "step": 7583 + }, + { + "epoch": 0.15182043390135877, + "grad_norm": 1.2370144128799438, + "learning_rate": 9.615885235873273e-06, + "loss": 0.3227, + "step": 7584 + }, + { + "epoch": 0.1518404524184871, + "grad_norm": 1.0626380443572998, + "learning_rate": 9.615760618329612e-06, + "loss": 0.2738, + "step": 7585 + }, + { + "epoch": 0.15186047093561544, + "grad_norm": 1.1568732261657715, + "learning_rate": 9.615635981382285e-06, + "loss": 0.3723, + "step": 7586 + }, + { + "epoch": 0.1518804894527438, + "grad_norm": 1.151432752609253, + "learning_rate": 9.615511325031819e-06, + "loss": 0.3497, + "step": 7587 + }, + { + "epoch": 0.15190050796987214, + "grad_norm": 2.0415420532226562, + "learning_rate": 9.615386649278736e-06, + "loss": 0.9291, + "step": 7588 + }, + { + "epoch": 0.15192052648700047, + "grad_norm": 1.0121595859527588, + "learning_rate": 9.615261954123561e-06, + "loss": 0.3339, + "step": 7589 + }, + { + "epoch": 0.15194054500412882, + "grad_norm": 1.1213785409927368, + "learning_rate": 9.615137239566818e-06, + "loss": 0.2904, + "step": 7590 + }, + { + "epoch": 0.15196056352125717, + "grad_norm": 1.1178362369537354, + "learning_rate": 9.61501250560903e-06, + "loss": 0.2975, + "step": 7591 + }, + { + "epoch": 0.15198058203838552, + "grad_norm": 1.8645631074905396, + "learning_rate": 9.614887752250724e-06, + "loss": 0.8266, + "step": 7592 + }, + { + "epoch": 0.15200060055551384, + "grad_norm": 1.8793549537658691, + "learning_rate": 9.614762979492423e-06, + "loss": 0.8039, + "step": 7593 + }, + { + "epoch": 0.1520206190726422, + "grad_norm": 1.1324964761734009, + "learning_rate": 9.61463818733465e-06, + "loss": 0.3164, + "step": 7594 + }, + { + "epoch": 0.15204063758977054, + "grad_norm": 0.9710376858711243, + "learning_rate": 9.614513375777934e-06, + "loss": 0.3205, + "step": 7595 + }, + { + "epoch": 0.1520606561068989, + "grad_norm": 1.0884448289871216, + "learning_rate": 9.614388544822794e-06, + "loss": 0.2779, + "step": 7596 + }, + { + "epoch": 0.15208067462402722, + "grad_norm": 1.1557542085647583, + "learning_rate": 9.614263694469758e-06, + "loss": 0.3818, + "step": 7597 + }, + { + "epoch": 0.15210069314115557, + "grad_norm": 1.085734486579895, + "learning_rate": 9.614138824719352e-06, + "loss": 0.3186, + "step": 7598 + }, + { + "epoch": 0.15212071165828392, + "grad_norm": 1.1519880294799805, + "learning_rate": 9.614013935572099e-06, + "loss": 0.3048, + "step": 7599 + }, + { + "epoch": 0.15214073017541227, + "grad_norm": 1.2315142154693604, + "learning_rate": 9.613889027028523e-06, + "loss": 0.3698, + "step": 7600 + }, + { + "epoch": 0.1521607486925406, + "grad_norm": 1.1023588180541992, + "learning_rate": 9.61376409908915e-06, + "loss": 0.3416, + "step": 7601 + }, + { + "epoch": 0.15218076720966894, + "grad_norm": 1.0950613021850586, + "learning_rate": 9.613639151754508e-06, + "loss": 0.313, + "step": 7602 + }, + { + "epoch": 0.1522007857267973, + "grad_norm": 1.149497151374817, + "learning_rate": 9.613514185025118e-06, + "loss": 0.298, + "step": 7603 + }, + { + "epoch": 0.15222080424392564, + "grad_norm": 1.1129595041275024, + "learning_rate": 9.61338919890151e-06, + "loss": 0.3176, + "step": 7604 + }, + { + "epoch": 0.15224082276105397, + "grad_norm": 1.0977658033370972, + "learning_rate": 9.613264193384205e-06, + "loss": 0.3145, + "step": 7605 + }, + { + "epoch": 0.15226084127818232, + "grad_norm": 1.0246082544326782, + "learning_rate": 9.613139168473728e-06, + "loss": 0.3368, + "step": 7606 + }, + { + "epoch": 0.15228085979531067, + "grad_norm": 1.1624239683151245, + "learning_rate": 9.61301412417061e-06, + "loss": 0.3011, + "step": 7607 + }, + { + "epoch": 0.15230087831243902, + "grad_norm": 1.0839470624923706, + "learning_rate": 9.612889060475371e-06, + "loss": 0.3477, + "step": 7608 + }, + { + "epoch": 0.15232089682956734, + "grad_norm": 1.0763992071151733, + "learning_rate": 9.612763977388539e-06, + "loss": 0.3263, + "step": 7609 + }, + { + "epoch": 0.1523409153466957, + "grad_norm": 1.0228602886199951, + "learning_rate": 9.612638874910642e-06, + "loss": 0.3049, + "step": 7610 + }, + { + "epoch": 0.15236093386382404, + "grad_norm": 1.9839078187942505, + "learning_rate": 9.612513753042203e-06, + "loss": 0.8483, + "step": 7611 + }, + { + "epoch": 0.1523809523809524, + "grad_norm": 1.165502905845642, + "learning_rate": 9.612388611783748e-06, + "loss": 0.3148, + "step": 7612 + }, + { + "epoch": 0.15240097089808072, + "grad_norm": 1.0360426902770996, + "learning_rate": 9.612263451135804e-06, + "loss": 0.3305, + "step": 7613 + }, + { + "epoch": 0.15242098941520907, + "grad_norm": 1.5198798179626465, + "learning_rate": 9.612138271098898e-06, + "loss": 0.3418, + "step": 7614 + }, + { + "epoch": 0.15244100793233742, + "grad_norm": 1.0844494104385376, + "learning_rate": 9.612013071673554e-06, + "loss": 0.3654, + "step": 7615 + }, + { + "epoch": 0.15246102644946577, + "grad_norm": 1.1480200290679932, + "learning_rate": 9.6118878528603e-06, + "loss": 0.3209, + "step": 7616 + }, + { + "epoch": 0.1524810449665941, + "grad_norm": 1.1807992458343506, + "learning_rate": 9.611762614659663e-06, + "loss": 0.3406, + "step": 7617 + }, + { + "epoch": 0.15250106348372244, + "grad_norm": 1.1136963367462158, + "learning_rate": 9.611637357072166e-06, + "loss": 0.3149, + "step": 7618 + }, + { + "epoch": 0.1525210820008508, + "grad_norm": 1.0418490171432495, + "learning_rate": 9.61151208009834e-06, + "loss": 0.312, + "step": 7619 + }, + { + "epoch": 0.15254110051797912, + "grad_norm": 1.9364067316055298, + "learning_rate": 9.61138678373871e-06, + "loss": 0.8066, + "step": 7620 + }, + { + "epoch": 0.15256111903510747, + "grad_norm": 1.0137138366699219, + "learning_rate": 9.6112614679938e-06, + "loss": 0.3437, + "step": 7621 + }, + { + "epoch": 0.15258113755223582, + "grad_norm": 1.2534488439559937, + "learning_rate": 9.61113613286414e-06, + "loss": 0.3059, + "step": 7622 + }, + { + "epoch": 0.15260115606936417, + "grad_norm": 1.0359609127044678, + "learning_rate": 9.611010778350258e-06, + "loss": 0.3152, + "step": 7623 + }, + { + "epoch": 0.1526211745864925, + "grad_norm": 1.246767282485962, + "learning_rate": 9.610885404452678e-06, + "loss": 0.3413, + "step": 7624 + }, + { + "epoch": 0.15264119310362084, + "grad_norm": 1.099203109741211, + "learning_rate": 9.610760011171928e-06, + "loss": 0.344, + "step": 7625 + }, + { + "epoch": 0.1526612116207492, + "grad_norm": 1.1279245615005493, + "learning_rate": 9.610634598508534e-06, + "loss": 0.3557, + "step": 7626 + }, + { + "epoch": 0.15268123013787754, + "grad_norm": 1.0481963157653809, + "learning_rate": 9.610509166463025e-06, + "loss": 0.3231, + "step": 7627 + }, + { + "epoch": 0.15270124865500587, + "grad_norm": 1.1040151119232178, + "learning_rate": 9.610383715035928e-06, + "loss": 0.3335, + "step": 7628 + }, + { + "epoch": 0.15272126717213422, + "grad_norm": 1.141844391822815, + "learning_rate": 9.61025824422777e-06, + "loss": 0.2935, + "step": 7629 + }, + { + "epoch": 0.15274128568926257, + "grad_norm": 1.2122033834457397, + "learning_rate": 9.61013275403908e-06, + "loss": 0.3416, + "step": 7630 + }, + { + "epoch": 0.15276130420639092, + "grad_norm": 1.0743902921676636, + "learning_rate": 9.61000724447038e-06, + "loss": 0.3377, + "step": 7631 + }, + { + "epoch": 0.15278132272351924, + "grad_norm": 1.1145132780075073, + "learning_rate": 9.609881715522204e-06, + "loss": 0.3619, + "step": 7632 + }, + { + "epoch": 0.1528013412406476, + "grad_norm": 1.9348700046539307, + "learning_rate": 9.609756167195078e-06, + "loss": 0.8933, + "step": 7633 + }, + { + "epoch": 0.15282135975777594, + "grad_norm": 1.057477593421936, + "learning_rate": 9.609630599489527e-06, + "loss": 0.284, + "step": 7634 + }, + { + "epoch": 0.1528413782749043, + "grad_norm": 1.214787483215332, + "learning_rate": 9.609505012406084e-06, + "loss": 0.3713, + "step": 7635 + }, + { + "epoch": 0.15286139679203262, + "grad_norm": 1.186693787574768, + "learning_rate": 9.609379405945272e-06, + "loss": 0.3397, + "step": 7636 + }, + { + "epoch": 0.15288141530916097, + "grad_norm": 1.1046900749206543, + "learning_rate": 9.609253780107622e-06, + "loss": 0.3272, + "step": 7637 + }, + { + "epoch": 0.15290143382628932, + "grad_norm": 1.1064990758895874, + "learning_rate": 9.609128134893659e-06, + "loss": 0.3368, + "step": 7638 + }, + { + "epoch": 0.15292145234341767, + "grad_norm": 1.1451305150985718, + "learning_rate": 9.609002470303913e-06, + "loss": 0.385, + "step": 7639 + }, + { + "epoch": 0.152941470860546, + "grad_norm": 1.1711512804031372, + "learning_rate": 9.608876786338916e-06, + "loss": 0.3144, + "step": 7640 + }, + { + "epoch": 0.15296148937767434, + "grad_norm": 1.1958355903625488, + "learning_rate": 9.608751082999191e-06, + "loss": 0.3475, + "step": 7641 + }, + { + "epoch": 0.1529815078948027, + "grad_norm": 1.0370464324951172, + "learning_rate": 9.608625360285267e-06, + "loss": 0.3998, + "step": 7642 + }, + { + "epoch": 0.15300152641193104, + "grad_norm": 1.0512551069259644, + "learning_rate": 9.608499618197676e-06, + "loss": 0.3376, + "step": 7643 + }, + { + "epoch": 0.15302154492905937, + "grad_norm": 1.9560366868972778, + "learning_rate": 9.608373856736943e-06, + "loss": 0.8573, + "step": 7644 + }, + { + "epoch": 0.15304156344618772, + "grad_norm": 1.1412670612335205, + "learning_rate": 9.608248075903598e-06, + "loss": 0.322, + "step": 7645 + }, + { + "epoch": 0.15306158196331607, + "grad_norm": 1.175676703453064, + "learning_rate": 9.608122275698171e-06, + "loss": 0.3309, + "step": 7646 + }, + { + "epoch": 0.15308160048044442, + "grad_norm": 1.2250761985778809, + "learning_rate": 9.607996456121187e-06, + "loss": 0.3217, + "step": 7647 + }, + { + "epoch": 0.15310161899757274, + "grad_norm": 1.7322553396224976, + "learning_rate": 9.60787061717318e-06, + "loss": 0.8836, + "step": 7648 + }, + { + "epoch": 0.1531216375147011, + "grad_norm": 1.8030929565429688, + "learning_rate": 9.607744758854676e-06, + "loss": 0.8508, + "step": 7649 + }, + { + "epoch": 0.15314165603182944, + "grad_norm": 1.926685094833374, + "learning_rate": 9.607618881166203e-06, + "loss": 0.865, + "step": 7650 + }, + { + "epoch": 0.1531616745489578, + "grad_norm": 1.1745356321334839, + "learning_rate": 9.607492984108295e-06, + "loss": 0.3395, + "step": 7651 + }, + { + "epoch": 0.15318169306608612, + "grad_norm": 1.1918423175811768, + "learning_rate": 9.607367067681475e-06, + "loss": 0.315, + "step": 7652 + }, + { + "epoch": 0.15320171158321447, + "grad_norm": 1.2879959344863892, + "learning_rate": 9.607241131886276e-06, + "loss": 0.3352, + "step": 7653 + }, + { + "epoch": 0.15322173010034282, + "grad_norm": 1.0933490991592407, + "learning_rate": 9.607115176723227e-06, + "loss": 0.3203, + "step": 7654 + }, + { + "epoch": 0.15324174861747117, + "grad_norm": 1.0834192037582397, + "learning_rate": 9.606989202192858e-06, + "loss": 0.3538, + "step": 7655 + }, + { + "epoch": 0.1532617671345995, + "grad_norm": 1.2063971757888794, + "learning_rate": 9.606863208295698e-06, + "loss": 0.3318, + "step": 7656 + }, + { + "epoch": 0.15328178565172784, + "grad_norm": 1.1175389289855957, + "learning_rate": 9.606737195032276e-06, + "loss": 0.372, + "step": 7657 + }, + { + "epoch": 0.1533018041688562, + "grad_norm": 1.2125097513198853, + "learning_rate": 9.606611162403122e-06, + "loss": 0.3552, + "step": 7658 + }, + { + "epoch": 0.15332182268598454, + "grad_norm": 1.12649667263031, + "learning_rate": 9.606485110408767e-06, + "loss": 0.3662, + "step": 7659 + }, + { + "epoch": 0.15334184120311287, + "grad_norm": 1.2211347818374634, + "learning_rate": 9.606359039049738e-06, + "loss": 0.3743, + "step": 7660 + }, + { + "epoch": 0.15336185972024122, + "grad_norm": 1.1011632680892944, + "learning_rate": 9.606232948326569e-06, + "loss": 0.3852, + "step": 7661 + }, + { + "epoch": 0.15338187823736957, + "grad_norm": 1.1608421802520752, + "learning_rate": 9.606106838239786e-06, + "loss": 0.3095, + "step": 7662 + }, + { + "epoch": 0.15340189675449792, + "grad_norm": 1.149251937866211, + "learning_rate": 9.60598070878992e-06, + "loss": 0.3749, + "step": 7663 + }, + { + "epoch": 0.15342191527162624, + "grad_norm": 1.0543819665908813, + "learning_rate": 9.605854559977506e-06, + "loss": 0.2754, + "step": 7664 + }, + { + "epoch": 0.1534419337887546, + "grad_norm": 1.0831776857376099, + "learning_rate": 9.605728391803069e-06, + "loss": 0.3552, + "step": 7665 + }, + { + "epoch": 0.15346195230588294, + "grad_norm": 1.2468315362930298, + "learning_rate": 9.605602204267142e-06, + "loss": 0.341, + "step": 7666 + }, + { + "epoch": 0.1534819708230113, + "grad_norm": 1.1430555582046509, + "learning_rate": 9.605475997370253e-06, + "loss": 0.3835, + "step": 7667 + }, + { + "epoch": 0.15350198934013962, + "grad_norm": 1.0147596597671509, + "learning_rate": 9.605349771112935e-06, + "loss": 0.3187, + "step": 7668 + }, + { + "epoch": 0.15352200785726797, + "grad_norm": 1.2765378952026367, + "learning_rate": 9.605223525495717e-06, + "loss": 0.3252, + "step": 7669 + }, + { + "epoch": 0.15354202637439632, + "grad_norm": 1.1006402969360352, + "learning_rate": 9.605097260519129e-06, + "loss": 0.3315, + "step": 7670 + }, + { + "epoch": 0.15356204489152467, + "grad_norm": 1.143637776374817, + "learning_rate": 9.604970976183705e-06, + "loss": 0.3385, + "step": 7671 + }, + { + "epoch": 0.153582063408653, + "grad_norm": 1.0773849487304688, + "learning_rate": 9.604844672489974e-06, + "loss": 0.3177, + "step": 7672 + }, + { + "epoch": 0.15360208192578134, + "grad_norm": 1.6870054006576538, + "learning_rate": 9.604718349438466e-06, + "loss": 0.8221, + "step": 7673 + }, + { + "epoch": 0.1536221004429097, + "grad_norm": 1.0652357339859009, + "learning_rate": 9.604592007029714e-06, + "loss": 0.3446, + "step": 7674 + }, + { + "epoch": 0.15364211896003804, + "grad_norm": 1.086782693862915, + "learning_rate": 9.604465645264248e-06, + "loss": 0.3296, + "step": 7675 + }, + { + "epoch": 0.15366213747716637, + "grad_norm": 1.0499563217163086, + "learning_rate": 9.604339264142599e-06, + "loss": 0.3139, + "step": 7676 + }, + { + "epoch": 0.15368215599429472, + "grad_norm": 1.2560882568359375, + "learning_rate": 9.6042128636653e-06, + "loss": 0.3685, + "step": 7677 + }, + { + "epoch": 0.15370217451142307, + "grad_norm": 1.086113691329956, + "learning_rate": 9.60408644383288e-06, + "loss": 0.3432, + "step": 7678 + }, + { + "epoch": 0.15372219302855142, + "grad_norm": 1.1040139198303223, + "learning_rate": 9.60396000464587e-06, + "loss": 0.3145, + "step": 7679 + }, + { + "epoch": 0.15374221154567974, + "grad_norm": 1.076257348060608, + "learning_rate": 9.603833546104804e-06, + "loss": 0.3849, + "step": 7680 + }, + { + "epoch": 0.1537622300628081, + "grad_norm": 1.1285667419433594, + "learning_rate": 9.603707068210211e-06, + "loss": 0.3161, + "step": 7681 + }, + { + "epoch": 0.15378224857993644, + "grad_norm": 1.069342017173767, + "learning_rate": 9.603580570962627e-06, + "loss": 0.3365, + "step": 7682 + }, + { + "epoch": 0.1538022670970648, + "grad_norm": 1.1824322938919067, + "learning_rate": 9.60345405436258e-06, + "loss": 0.3425, + "step": 7683 + }, + { + "epoch": 0.15382228561419312, + "grad_norm": 1.2027957439422607, + "learning_rate": 9.603327518410602e-06, + "loss": 0.2796, + "step": 7684 + }, + { + "epoch": 0.15384230413132147, + "grad_norm": 1.2355786561965942, + "learning_rate": 9.603200963107225e-06, + "loss": 0.3341, + "step": 7685 + }, + { + "epoch": 0.15386232264844982, + "grad_norm": 1.7699496746063232, + "learning_rate": 9.603074388452984e-06, + "loss": 0.8646, + "step": 7686 + }, + { + "epoch": 0.15388234116557817, + "grad_norm": 1.031805157661438, + "learning_rate": 9.602947794448408e-06, + "loss": 0.3362, + "step": 7687 + }, + { + "epoch": 0.1539023596827065, + "grad_norm": 1.180560827255249, + "learning_rate": 9.60282118109403e-06, + "loss": 0.3171, + "step": 7688 + }, + { + "epoch": 0.15392237819983484, + "grad_norm": 1.1844761371612549, + "learning_rate": 9.602694548390381e-06, + "loss": 0.3044, + "step": 7689 + }, + { + "epoch": 0.1539423967169632, + "grad_norm": 1.791489839553833, + "learning_rate": 9.602567896337996e-06, + "loss": 0.8531, + "step": 7690 + }, + { + "epoch": 0.15396241523409154, + "grad_norm": 1.0661613941192627, + "learning_rate": 9.602441224937407e-06, + "loss": 0.356, + "step": 7691 + }, + { + "epoch": 0.15398243375121987, + "grad_norm": 1.1652653217315674, + "learning_rate": 9.602314534189144e-06, + "loss": 0.3469, + "step": 7692 + }, + { + "epoch": 0.15400245226834822, + "grad_norm": 1.0430279970169067, + "learning_rate": 9.60218782409374e-06, + "loss": 0.3323, + "step": 7693 + }, + { + "epoch": 0.15402247078547657, + "grad_norm": 1.17161226272583, + "learning_rate": 9.602061094651731e-06, + "loss": 0.3573, + "step": 7694 + }, + { + "epoch": 0.15404248930260492, + "grad_norm": 1.2656468152999878, + "learning_rate": 9.601934345863647e-06, + "loss": 0.3205, + "step": 7695 + }, + { + "epoch": 0.15406250781973324, + "grad_norm": 1.1830711364746094, + "learning_rate": 9.601807577730022e-06, + "loss": 0.294, + "step": 7696 + }, + { + "epoch": 0.1540825263368616, + "grad_norm": 1.1607887744903564, + "learning_rate": 9.601680790251386e-06, + "loss": 0.377, + "step": 7697 + }, + { + "epoch": 0.15410254485398994, + "grad_norm": 1.2201989889144897, + "learning_rate": 9.601553983428277e-06, + "loss": 0.382, + "step": 7698 + }, + { + "epoch": 0.1541225633711183, + "grad_norm": 1.039723515510559, + "learning_rate": 9.601427157261224e-06, + "loss": 0.3388, + "step": 7699 + }, + { + "epoch": 0.15414258188824662, + "grad_norm": 1.9157936573028564, + "learning_rate": 9.601300311750762e-06, + "loss": 0.8557, + "step": 7700 + }, + { + "epoch": 0.15416260040537497, + "grad_norm": 1.0976697206497192, + "learning_rate": 9.601173446897422e-06, + "loss": 0.3394, + "step": 7701 + }, + { + "epoch": 0.15418261892250332, + "grad_norm": 1.8096438646316528, + "learning_rate": 9.601046562701742e-06, + "loss": 0.9031, + "step": 7702 + }, + { + "epoch": 0.15420263743963167, + "grad_norm": 1.054250717163086, + "learning_rate": 9.60091965916425e-06, + "loss": 0.2912, + "step": 7703 + }, + { + "epoch": 0.15422265595676, + "grad_norm": 1.135999083518982, + "learning_rate": 9.600792736285483e-06, + "loss": 0.3338, + "step": 7704 + }, + { + "epoch": 0.15424267447388834, + "grad_norm": 1.1537855863571167, + "learning_rate": 9.600665794065972e-06, + "loss": 0.325, + "step": 7705 + }, + { + "epoch": 0.1542626929910167, + "grad_norm": 1.0753037929534912, + "learning_rate": 9.600538832506254e-06, + "loss": 0.2997, + "step": 7706 + }, + { + "epoch": 0.15428271150814504, + "grad_norm": 1.1044820547103882, + "learning_rate": 9.60041185160686e-06, + "loss": 0.3264, + "step": 7707 + }, + { + "epoch": 0.15430273002527337, + "grad_norm": 1.1263606548309326, + "learning_rate": 9.600284851368323e-06, + "loss": 0.3159, + "step": 7708 + }, + { + "epoch": 0.15432274854240172, + "grad_norm": 1.163712739944458, + "learning_rate": 9.60015783179118e-06, + "loss": 0.342, + "step": 7709 + }, + { + "epoch": 0.15434276705953007, + "grad_norm": 0.9431812167167664, + "learning_rate": 9.600030792875963e-06, + "loss": 0.3187, + "step": 7710 + }, + { + "epoch": 0.15436278557665842, + "grad_norm": 1.1527680158615112, + "learning_rate": 9.599903734623207e-06, + "loss": 0.2874, + "step": 7711 + }, + { + "epoch": 0.15438280409378674, + "grad_norm": 1.0112221240997314, + "learning_rate": 9.599776657033445e-06, + "loss": 0.2912, + "step": 7712 + }, + { + "epoch": 0.1544028226109151, + "grad_norm": 1.1232308149337769, + "learning_rate": 9.599649560107214e-06, + "loss": 0.364, + "step": 7713 + }, + { + "epoch": 0.15442284112804344, + "grad_norm": 1.058592438697815, + "learning_rate": 9.599522443845044e-06, + "loss": 0.2855, + "step": 7714 + }, + { + "epoch": 0.1544428596451718, + "grad_norm": 1.0986422300338745, + "learning_rate": 9.599395308247471e-06, + "loss": 0.3542, + "step": 7715 + }, + { + "epoch": 0.15446287816230012, + "grad_norm": 1.0795061588287354, + "learning_rate": 9.59926815331503e-06, + "loss": 0.3276, + "step": 7716 + }, + { + "epoch": 0.15448289667942847, + "grad_norm": 1.1871172189712524, + "learning_rate": 9.599140979048257e-06, + "loss": 0.3301, + "step": 7717 + }, + { + "epoch": 0.15450291519655682, + "grad_norm": 1.1908167600631714, + "learning_rate": 9.599013785447683e-06, + "loss": 0.3522, + "step": 7718 + }, + { + "epoch": 0.15452293371368517, + "grad_norm": 1.1702630519866943, + "learning_rate": 9.598886572513849e-06, + "loss": 0.3384, + "step": 7719 + }, + { + "epoch": 0.1545429522308135, + "grad_norm": 1.1409859657287598, + "learning_rate": 9.598759340247282e-06, + "loss": 0.3147, + "step": 7720 + }, + { + "epoch": 0.15456297074794184, + "grad_norm": 1.0599910020828247, + "learning_rate": 9.598632088648521e-06, + "loss": 0.3674, + "step": 7721 + }, + { + "epoch": 0.1545829892650702, + "grad_norm": 1.0634069442749023, + "learning_rate": 9.5985048177181e-06, + "loss": 0.3217, + "step": 7722 + }, + { + "epoch": 0.15460300778219854, + "grad_norm": 1.2706836462020874, + "learning_rate": 9.598377527456556e-06, + "loss": 0.3446, + "step": 7723 + }, + { + "epoch": 0.15462302629932687, + "grad_norm": 1.0666441917419434, + "learning_rate": 9.59825021786442e-06, + "loss": 0.2983, + "step": 7724 + }, + { + "epoch": 0.15464304481645522, + "grad_norm": 1.811206579208374, + "learning_rate": 9.598122888942229e-06, + "loss": 0.7652, + "step": 7725 + }, + { + "epoch": 0.15466306333358357, + "grad_norm": 1.1197798252105713, + "learning_rate": 9.59799554069052e-06, + "loss": 0.3325, + "step": 7726 + }, + { + "epoch": 0.15468308185071192, + "grad_norm": 1.2450422048568726, + "learning_rate": 9.59786817310983e-06, + "loss": 0.3358, + "step": 7727 + }, + { + "epoch": 0.15470310036784024, + "grad_norm": 1.1114763021469116, + "learning_rate": 9.597740786200688e-06, + "loss": 0.3364, + "step": 7728 + }, + { + "epoch": 0.1547231188849686, + "grad_norm": 1.2072596549987793, + "learning_rate": 9.597613379963637e-06, + "loss": 0.3524, + "step": 7729 + }, + { + "epoch": 0.15474313740209694, + "grad_norm": 1.9781118631362915, + "learning_rate": 9.597485954399206e-06, + "loss": 0.8769, + "step": 7730 + }, + { + "epoch": 0.1547631559192253, + "grad_norm": 1.774681806564331, + "learning_rate": 9.597358509507933e-06, + "loss": 0.8647, + "step": 7731 + }, + { + "epoch": 0.15478317443635362, + "grad_norm": 1.1619762182235718, + "learning_rate": 9.597231045290355e-06, + "loss": 0.3305, + "step": 7732 + }, + { + "epoch": 0.15480319295348197, + "grad_norm": 1.80233633518219, + "learning_rate": 9.597103561747007e-06, + "loss": 0.8424, + "step": 7733 + }, + { + "epoch": 0.15482321147061032, + "grad_norm": 2.052253246307373, + "learning_rate": 9.596976058878426e-06, + "loss": 0.8208, + "step": 7734 + }, + { + "epoch": 0.15484322998773867, + "grad_norm": 1.0954781770706177, + "learning_rate": 9.596848536685146e-06, + "loss": 0.327, + "step": 7735 + }, + { + "epoch": 0.154863248504867, + "grad_norm": 1.0575382709503174, + "learning_rate": 9.596720995167704e-06, + "loss": 0.3134, + "step": 7736 + }, + { + "epoch": 0.15488326702199534, + "grad_norm": 1.1449639797210693, + "learning_rate": 9.596593434326637e-06, + "loss": 0.3315, + "step": 7737 + }, + { + "epoch": 0.1549032855391237, + "grad_norm": 1.813029170036316, + "learning_rate": 9.596465854162479e-06, + "loss": 0.8305, + "step": 7738 + }, + { + "epoch": 0.15492330405625204, + "grad_norm": 1.1131341457366943, + "learning_rate": 9.596338254675767e-06, + "loss": 0.2908, + "step": 7739 + }, + { + "epoch": 0.15494332257338037, + "grad_norm": 1.0773040056228638, + "learning_rate": 9.596210635867041e-06, + "loss": 0.2851, + "step": 7740 + }, + { + "epoch": 0.15496334109050872, + "grad_norm": 1.0992214679718018, + "learning_rate": 9.596082997736833e-06, + "loss": 0.3047, + "step": 7741 + }, + { + "epoch": 0.15498335960763707, + "grad_norm": 1.8938263654708862, + "learning_rate": 9.59595534028568e-06, + "loss": 0.8045, + "step": 7742 + }, + { + "epoch": 0.15500337812476542, + "grad_norm": 1.0934334993362427, + "learning_rate": 9.595827663514122e-06, + "loss": 0.2822, + "step": 7743 + }, + { + "epoch": 0.15502339664189374, + "grad_norm": 1.83582603931427, + "learning_rate": 9.595699967422693e-06, + "loss": 0.8505, + "step": 7744 + }, + { + "epoch": 0.1550434151590221, + "grad_norm": 1.0668100118637085, + "learning_rate": 9.59557225201193e-06, + "loss": 0.3333, + "step": 7745 + }, + { + "epoch": 0.15506343367615044, + "grad_norm": 1.1623690128326416, + "learning_rate": 9.59544451728237e-06, + "loss": 0.3044, + "step": 7746 + }, + { + "epoch": 0.1550834521932788, + "grad_norm": 1.161233901977539, + "learning_rate": 9.59531676323455e-06, + "loss": 0.3444, + "step": 7747 + }, + { + "epoch": 0.15510347071040712, + "grad_norm": 1.170732021331787, + "learning_rate": 9.595188989869007e-06, + "loss": 0.3806, + "step": 7748 + }, + { + "epoch": 0.15512348922753547, + "grad_norm": 1.0974057912826538, + "learning_rate": 9.595061197186279e-06, + "loss": 0.3347, + "step": 7749 + }, + { + "epoch": 0.15514350774466382, + "grad_norm": 1.0834195613861084, + "learning_rate": 9.594933385186903e-06, + "loss": 0.3094, + "step": 7750 + }, + { + "epoch": 0.15516352626179217, + "grad_norm": 1.2352675199508667, + "learning_rate": 9.594805553871416e-06, + "loss": 0.3932, + "step": 7751 + }, + { + "epoch": 0.1551835447789205, + "grad_norm": 0.9757612943649292, + "learning_rate": 9.594677703240353e-06, + "loss": 0.2958, + "step": 7752 + }, + { + "epoch": 0.15520356329604884, + "grad_norm": 1.1803021430969238, + "learning_rate": 9.594549833294258e-06, + "loss": 0.336, + "step": 7753 + }, + { + "epoch": 0.1552235818131772, + "grad_norm": 1.1226335763931274, + "learning_rate": 9.594421944033661e-06, + "loss": 0.3421, + "step": 7754 + }, + { + "epoch": 0.15524360033030554, + "grad_norm": 1.0184857845306396, + "learning_rate": 9.594294035459102e-06, + "loss": 0.3575, + "step": 7755 + }, + { + "epoch": 0.15526361884743387, + "grad_norm": 1.0685914754867554, + "learning_rate": 9.594166107571122e-06, + "loss": 0.3514, + "step": 7756 + }, + { + "epoch": 0.15528363736456222, + "grad_norm": 1.0701016187667847, + "learning_rate": 9.594038160370256e-06, + "loss": 0.3533, + "step": 7757 + }, + { + "epoch": 0.15530365588169057, + "grad_norm": 1.17106032371521, + "learning_rate": 9.593910193857041e-06, + "loss": 0.3669, + "step": 7758 + }, + { + "epoch": 0.15532367439881892, + "grad_norm": 1.081178069114685, + "learning_rate": 9.593782208032017e-06, + "loss": 0.3572, + "step": 7759 + }, + { + "epoch": 0.15534369291594724, + "grad_norm": 1.178625226020813, + "learning_rate": 9.593654202895721e-06, + "loss": 0.3379, + "step": 7760 + }, + { + "epoch": 0.1553637114330756, + "grad_norm": 1.1670035123825073, + "learning_rate": 9.593526178448692e-06, + "loss": 0.3203, + "step": 7761 + }, + { + "epoch": 0.15538372995020394, + "grad_norm": 1.0295519828796387, + "learning_rate": 9.593398134691467e-06, + "loss": 0.2804, + "step": 7762 + }, + { + "epoch": 0.1554037484673323, + "grad_norm": 1.1885651350021362, + "learning_rate": 9.593270071624586e-06, + "loss": 0.368, + "step": 7763 + }, + { + "epoch": 0.15542376698446062, + "grad_norm": 1.1806472539901733, + "learning_rate": 9.593141989248583e-06, + "loss": 0.3128, + "step": 7764 + }, + { + "epoch": 0.15544378550158897, + "grad_norm": 1.824507713317871, + "learning_rate": 9.593013887564002e-06, + "loss": 0.7907, + "step": 7765 + }, + { + "epoch": 0.15546380401871732, + "grad_norm": 1.036562204360962, + "learning_rate": 9.592885766571378e-06, + "loss": 0.2893, + "step": 7766 + }, + { + "epoch": 0.15548382253584567, + "grad_norm": 1.1085351705551147, + "learning_rate": 9.592757626271251e-06, + "loss": 0.2919, + "step": 7767 + }, + { + "epoch": 0.155503841052974, + "grad_norm": 1.165805697441101, + "learning_rate": 9.592629466664159e-06, + "loss": 0.3372, + "step": 7768 + }, + { + "epoch": 0.15552385957010234, + "grad_norm": 1.1629635095596313, + "learning_rate": 9.592501287750642e-06, + "loss": 0.3167, + "step": 7769 + }, + { + "epoch": 0.1555438780872307, + "grad_norm": 1.138270378112793, + "learning_rate": 9.592373089531238e-06, + "loss": 0.3169, + "step": 7770 + }, + { + "epoch": 0.15556389660435904, + "grad_norm": 1.048746943473816, + "learning_rate": 9.592244872006484e-06, + "loss": 0.2737, + "step": 7771 + }, + { + "epoch": 0.15558391512148737, + "grad_norm": 1.1381316184997559, + "learning_rate": 9.592116635176922e-06, + "loss": 0.2983, + "step": 7772 + }, + { + "epoch": 0.15560393363861572, + "grad_norm": 1.0018562078475952, + "learning_rate": 9.59198837904309e-06, + "loss": 0.3277, + "step": 7773 + }, + { + "epoch": 0.15562395215574407, + "grad_norm": 0.9761331677436829, + "learning_rate": 9.591860103605525e-06, + "loss": 0.3234, + "step": 7774 + }, + { + "epoch": 0.15564397067287242, + "grad_norm": 1.1982197761535645, + "learning_rate": 9.591731808864772e-06, + "loss": 0.3668, + "step": 7775 + }, + { + "epoch": 0.15566398919000074, + "grad_norm": 1.0533124208450317, + "learning_rate": 9.591603494821364e-06, + "loss": 0.3145, + "step": 7776 + }, + { + "epoch": 0.1556840077071291, + "grad_norm": 1.1065688133239746, + "learning_rate": 9.591475161475843e-06, + "loss": 0.3715, + "step": 7777 + }, + { + "epoch": 0.15570402622425744, + "grad_norm": 0.9927852153778076, + "learning_rate": 9.59134680882875e-06, + "loss": 0.3337, + "step": 7778 + }, + { + "epoch": 0.1557240447413858, + "grad_norm": 1.124715805053711, + "learning_rate": 9.591218436880622e-06, + "loss": 0.3035, + "step": 7779 + }, + { + "epoch": 0.15574406325851411, + "grad_norm": 1.0680665969848633, + "learning_rate": 9.591090045632e-06, + "loss": 0.2956, + "step": 7780 + }, + { + "epoch": 0.15576408177564247, + "grad_norm": 1.1618393659591675, + "learning_rate": 9.590961635083425e-06, + "loss": 0.3005, + "step": 7781 + }, + { + "epoch": 0.15578410029277082, + "grad_norm": 0.9378149509429932, + "learning_rate": 9.590833205235433e-06, + "loss": 0.3232, + "step": 7782 + }, + { + "epoch": 0.15580411880989917, + "grad_norm": 1.3568968772888184, + "learning_rate": 9.590704756088567e-06, + "loss": 0.3894, + "step": 7783 + }, + { + "epoch": 0.1558241373270275, + "grad_norm": 1.2030413150787354, + "learning_rate": 9.590576287643367e-06, + "loss": 0.3985, + "step": 7784 + }, + { + "epoch": 0.15584415584415584, + "grad_norm": 1.1000707149505615, + "learning_rate": 9.59044779990037e-06, + "loss": 0.3061, + "step": 7785 + }, + { + "epoch": 0.1558641743612842, + "grad_norm": 1.9377055168151855, + "learning_rate": 9.59031929286012e-06, + "loss": 0.8061, + "step": 7786 + }, + { + "epoch": 0.15588419287841254, + "grad_norm": 1.8907173871994019, + "learning_rate": 9.590190766523155e-06, + "loss": 0.9072, + "step": 7787 + }, + { + "epoch": 0.15590421139554086, + "grad_norm": 1.130437970161438, + "learning_rate": 9.590062220890017e-06, + "loss": 0.3042, + "step": 7788 + }, + { + "epoch": 0.15592422991266922, + "grad_norm": 1.0510733127593994, + "learning_rate": 9.589933655961244e-06, + "loss": 0.3164, + "step": 7789 + }, + { + "epoch": 0.15594424842979757, + "grad_norm": 2.444420337677002, + "learning_rate": 9.589805071737376e-06, + "loss": 0.3575, + "step": 7790 + }, + { + "epoch": 0.15596426694692592, + "grad_norm": 1.1661380529403687, + "learning_rate": 9.589676468218958e-06, + "loss": 0.3436, + "step": 7791 + }, + { + "epoch": 0.15598428546405424, + "grad_norm": 2.1513214111328125, + "learning_rate": 9.589547845406528e-06, + "loss": 0.8405, + "step": 7792 + }, + { + "epoch": 0.1560043039811826, + "grad_norm": 1.2074427604675293, + "learning_rate": 9.589419203300625e-06, + "loss": 0.3173, + "step": 7793 + }, + { + "epoch": 0.15602432249831094, + "grad_norm": 1.0707082748413086, + "learning_rate": 9.589290541901792e-06, + "loss": 0.3201, + "step": 7794 + }, + { + "epoch": 0.1560443410154393, + "grad_norm": 1.1041356325149536, + "learning_rate": 9.589161861210569e-06, + "loss": 0.3284, + "step": 7795 + }, + { + "epoch": 0.15606435953256761, + "grad_norm": 1.1417312622070312, + "learning_rate": 9.589033161227498e-06, + "loss": 0.3511, + "step": 7796 + }, + { + "epoch": 0.15608437804969597, + "grad_norm": 1.0002354383468628, + "learning_rate": 9.588904441953118e-06, + "loss": 0.2802, + "step": 7797 + }, + { + "epoch": 0.15610439656682432, + "grad_norm": 1.0594677925109863, + "learning_rate": 9.58877570338797e-06, + "loss": 0.3372, + "step": 7798 + }, + { + "epoch": 0.15612441508395267, + "grad_norm": 1.004191279411316, + "learning_rate": 9.5886469455326e-06, + "loss": 0.3128, + "step": 7799 + }, + { + "epoch": 0.156144433601081, + "grad_norm": 1.981383204460144, + "learning_rate": 9.588518168387543e-06, + "loss": 0.825, + "step": 7800 + }, + { + "epoch": 0.15616445211820934, + "grad_norm": 1.8326143026351929, + "learning_rate": 9.588389371953344e-06, + "loss": 0.8449, + "step": 7801 + }, + { + "epoch": 0.1561844706353377, + "grad_norm": 1.1611295938491821, + "learning_rate": 9.588260556230544e-06, + "loss": 0.3199, + "step": 7802 + }, + { + "epoch": 0.15620448915246604, + "grad_norm": 1.0693951845169067, + "learning_rate": 9.588131721219683e-06, + "loss": 0.3509, + "step": 7803 + }, + { + "epoch": 0.15622450766959436, + "grad_norm": 1.1734495162963867, + "learning_rate": 9.588002866921304e-06, + "loss": 0.3509, + "step": 7804 + }, + { + "epoch": 0.15624452618672272, + "grad_norm": 1.2706624269485474, + "learning_rate": 9.587873993335947e-06, + "loss": 0.3483, + "step": 7805 + }, + { + "epoch": 0.15626454470385107, + "grad_norm": 1.1213786602020264, + "learning_rate": 9.587745100464158e-06, + "loss": 0.3132, + "step": 7806 + }, + { + "epoch": 0.15628456322097942, + "grad_norm": 1.0851658582687378, + "learning_rate": 9.587616188306474e-06, + "loss": 0.3202, + "step": 7807 + }, + { + "epoch": 0.15630458173810774, + "grad_norm": 1.0615644454956055, + "learning_rate": 9.58748725686344e-06, + "loss": 0.3066, + "step": 7808 + }, + { + "epoch": 0.1563246002552361, + "grad_norm": 1.3591907024383545, + "learning_rate": 9.587358306135594e-06, + "loss": 0.3609, + "step": 7809 + }, + { + "epoch": 0.15634461877236444, + "grad_norm": 1.009501338005066, + "learning_rate": 9.587229336123485e-06, + "loss": 0.3149, + "step": 7810 + }, + { + "epoch": 0.1563646372894928, + "grad_norm": 1.1126925945281982, + "learning_rate": 9.587100346827647e-06, + "loss": 0.3042, + "step": 7811 + }, + { + "epoch": 0.15638465580662111, + "grad_norm": 0.9585458636283875, + "learning_rate": 9.58697133824863e-06, + "loss": 0.3055, + "step": 7812 + }, + { + "epoch": 0.15640467432374947, + "grad_norm": 1.0931930541992188, + "learning_rate": 9.58684231038697e-06, + "loss": 0.395, + "step": 7813 + }, + { + "epoch": 0.15642469284087782, + "grad_norm": 0.9803481698036194, + "learning_rate": 9.586713263243213e-06, + "loss": 0.289, + "step": 7814 + }, + { + "epoch": 0.15644471135800617, + "grad_norm": 1.1510908603668213, + "learning_rate": 9.586584196817899e-06, + "loss": 0.3709, + "step": 7815 + }, + { + "epoch": 0.1564647298751345, + "grad_norm": 1.0084691047668457, + "learning_rate": 9.586455111111574e-06, + "loss": 0.3146, + "step": 7816 + }, + { + "epoch": 0.15648474839226284, + "grad_norm": 1.0925819873809814, + "learning_rate": 9.586326006124778e-06, + "loss": 0.2997, + "step": 7817 + }, + { + "epoch": 0.1565047669093912, + "grad_norm": 1.2073441743850708, + "learning_rate": 9.586196881858054e-06, + "loss": 0.3071, + "step": 7818 + }, + { + "epoch": 0.15652478542651954, + "grad_norm": 1.0698457956314087, + "learning_rate": 9.586067738311946e-06, + "loss": 0.3006, + "step": 7819 + }, + { + "epoch": 0.15654480394364786, + "grad_norm": 1.0214476585388184, + "learning_rate": 9.585938575486996e-06, + "loss": 0.3153, + "step": 7820 + }, + { + "epoch": 0.15656482246077622, + "grad_norm": 1.132632851600647, + "learning_rate": 9.585809393383748e-06, + "loss": 0.3102, + "step": 7821 + }, + { + "epoch": 0.15658484097790457, + "grad_norm": 1.1710399389266968, + "learning_rate": 9.585680192002742e-06, + "loss": 0.3743, + "step": 7822 + }, + { + "epoch": 0.15660485949503292, + "grad_norm": 1.2286489009857178, + "learning_rate": 9.585550971344526e-06, + "loss": 0.3402, + "step": 7823 + }, + { + "epoch": 0.15662487801216124, + "grad_norm": 1.207867980003357, + "learning_rate": 9.585421731409639e-06, + "loss": 0.3272, + "step": 7824 + }, + { + "epoch": 0.1566448965292896, + "grad_norm": 1.918067216873169, + "learning_rate": 9.585292472198626e-06, + "loss": 0.8306, + "step": 7825 + }, + { + "epoch": 0.15666491504641794, + "grad_norm": 1.170538306236267, + "learning_rate": 9.58516319371203e-06, + "loss": 0.3394, + "step": 7826 + }, + { + "epoch": 0.1566849335635463, + "grad_norm": 1.1560496091842651, + "learning_rate": 9.585033895950395e-06, + "loss": 0.3438, + "step": 7827 + }, + { + "epoch": 0.15670495208067461, + "grad_norm": 1.1143052577972412, + "learning_rate": 9.584904578914264e-06, + "loss": 0.3124, + "step": 7828 + }, + { + "epoch": 0.15672497059780297, + "grad_norm": 1.2195292711257935, + "learning_rate": 9.58477524260418e-06, + "loss": 0.3705, + "step": 7829 + }, + { + "epoch": 0.15674498911493132, + "grad_norm": 1.1212464570999146, + "learning_rate": 9.58464588702069e-06, + "loss": 0.3294, + "step": 7830 + }, + { + "epoch": 0.15676500763205967, + "grad_norm": 1.0918105840682983, + "learning_rate": 9.584516512164333e-06, + "loss": 0.3405, + "step": 7831 + }, + { + "epoch": 0.156785026149188, + "grad_norm": 1.073365569114685, + "learning_rate": 9.584387118035656e-06, + "loss": 0.2776, + "step": 7832 + }, + { + "epoch": 0.15680504466631634, + "grad_norm": 1.0556292533874512, + "learning_rate": 9.584257704635202e-06, + "loss": 0.35, + "step": 7833 + }, + { + "epoch": 0.1568250631834447, + "grad_norm": 1.1790627241134644, + "learning_rate": 9.584128271963516e-06, + "loss": 0.3391, + "step": 7834 + }, + { + "epoch": 0.15684508170057304, + "grad_norm": 1.6714876890182495, + "learning_rate": 9.58399882002114e-06, + "loss": 0.8653, + "step": 7835 + }, + { + "epoch": 0.15686510021770136, + "grad_norm": 1.1057798862457275, + "learning_rate": 9.583869348808622e-06, + "loss": 0.3585, + "step": 7836 + }, + { + "epoch": 0.15688511873482971, + "grad_norm": 1.2316445112228394, + "learning_rate": 9.5837398583265e-06, + "loss": 0.3887, + "step": 7837 + }, + { + "epoch": 0.15690513725195807, + "grad_norm": 1.20656156539917, + "learning_rate": 9.583610348575323e-06, + "loss": 0.3516, + "step": 7838 + }, + { + "epoch": 0.15692515576908642, + "grad_norm": 1.1359935998916626, + "learning_rate": 9.583480819555636e-06, + "loss": 0.3839, + "step": 7839 + }, + { + "epoch": 0.15694517428621474, + "grad_norm": 1.0781608819961548, + "learning_rate": 9.583351271267983e-06, + "loss": 0.3179, + "step": 7840 + }, + { + "epoch": 0.1569651928033431, + "grad_norm": 1.158195972442627, + "learning_rate": 9.583221703712903e-06, + "loss": 0.3095, + "step": 7841 + }, + { + "epoch": 0.15698521132047144, + "grad_norm": 1.1234056949615479, + "learning_rate": 9.583092116890948e-06, + "loss": 0.3453, + "step": 7842 + }, + { + "epoch": 0.1570052298375998, + "grad_norm": 1.4553757905960083, + "learning_rate": 9.582962510802662e-06, + "loss": 0.3405, + "step": 7843 + }, + { + "epoch": 0.15702524835472811, + "grad_norm": 1.1689478158950806, + "learning_rate": 9.582832885448585e-06, + "loss": 0.3982, + "step": 7844 + }, + { + "epoch": 0.15704526687185646, + "grad_norm": 1.6819359064102173, + "learning_rate": 9.582703240829267e-06, + "loss": 0.889, + "step": 7845 + }, + { + "epoch": 0.15706528538898482, + "grad_norm": 1.0490986108779907, + "learning_rate": 9.582573576945248e-06, + "loss": 0.3256, + "step": 7846 + }, + { + "epoch": 0.15708530390611317, + "grad_norm": 1.0243284702301025, + "learning_rate": 9.582443893797078e-06, + "loss": 0.3655, + "step": 7847 + }, + { + "epoch": 0.1571053224232415, + "grad_norm": 1.0902689695358276, + "learning_rate": 9.5823141913853e-06, + "loss": 0.3159, + "step": 7848 + }, + { + "epoch": 0.15712534094036984, + "grad_norm": 1.0473084449768066, + "learning_rate": 9.582184469710458e-06, + "loss": 0.321, + "step": 7849 + }, + { + "epoch": 0.1571453594574982, + "grad_norm": 1.038442611694336, + "learning_rate": 9.5820547287731e-06, + "loss": 0.3595, + "step": 7850 + }, + { + "epoch": 0.15716537797462654, + "grad_norm": 1.1343592405319214, + "learning_rate": 9.581924968573769e-06, + "loss": 0.3279, + "step": 7851 + }, + { + "epoch": 0.15718539649175486, + "grad_norm": 1.1112486124038696, + "learning_rate": 9.581795189113013e-06, + "loss": 0.336, + "step": 7852 + }, + { + "epoch": 0.15720541500888321, + "grad_norm": 1.0199159383773804, + "learning_rate": 9.581665390391374e-06, + "loss": 0.329, + "step": 7853 + }, + { + "epoch": 0.15722543352601157, + "grad_norm": 1.3514487743377686, + "learning_rate": 9.581535572409401e-06, + "loss": 0.336, + "step": 7854 + }, + { + "epoch": 0.15724545204313992, + "grad_norm": 1.1236214637756348, + "learning_rate": 9.581405735167637e-06, + "loss": 0.3183, + "step": 7855 + }, + { + "epoch": 0.15726547056026824, + "grad_norm": 1.035781741142273, + "learning_rate": 9.581275878666632e-06, + "loss": 0.3426, + "step": 7856 + }, + { + "epoch": 0.1572854890773966, + "grad_norm": 0.985037624835968, + "learning_rate": 9.581146002906925e-06, + "loss": 0.2966, + "step": 7857 + }, + { + "epoch": 0.15730550759452494, + "grad_norm": 1.0243183374404907, + "learning_rate": 9.581016107889069e-06, + "loss": 0.3034, + "step": 7858 + }, + { + "epoch": 0.1573255261116533, + "grad_norm": 1.1443638801574707, + "learning_rate": 9.580886193613607e-06, + "loss": 0.3096, + "step": 7859 + }, + { + "epoch": 0.15734554462878161, + "grad_norm": 1.1897759437561035, + "learning_rate": 9.580756260081085e-06, + "loss": 0.36, + "step": 7860 + }, + { + "epoch": 0.15736556314590996, + "grad_norm": 1.0970100164413452, + "learning_rate": 9.58062630729205e-06, + "loss": 0.3318, + "step": 7861 + }, + { + "epoch": 0.15738558166303832, + "grad_norm": 1.0776265859603882, + "learning_rate": 9.580496335247046e-06, + "loss": 0.3192, + "step": 7862 + }, + { + "epoch": 0.15740560018016667, + "grad_norm": 1.071009874343872, + "learning_rate": 9.580366343946621e-06, + "loss": 0.3543, + "step": 7863 + }, + { + "epoch": 0.157425618697295, + "grad_norm": 1.2189332246780396, + "learning_rate": 9.580236333391325e-06, + "loss": 0.2707, + "step": 7864 + }, + { + "epoch": 0.15744563721442334, + "grad_norm": 1.139135479927063, + "learning_rate": 9.580106303581697e-06, + "loss": 0.312, + "step": 7865 + }, + { + "epoch": 0.1574656557315517, + "grad_norm": 1.0446088314056396, + "learning_rate": 9.57997625451829e-06, + "loss": 0.3148, + "step": 7866 + }, + { + "epoch": 0.15748567424868004, + "grad_norm": 1.0757346153259277, + "learning_rate": 9.579846186201649e-06, + "loss": 0.3442, + "step": 7867 + }, + { + "epoch": 0.15750569276580836, + "grad_norm": 1.1637879610061646, + "learning_rate": 9.579716098632319e-06, + "loss": 0.347, + "step": 7868 + }, + { + "epoch": 0.15752571128293671, + "grad_norm": 1.9698370695114136, + "learning_rate": 9.579585991810849e-06, + "loss": 0.8684, + "step": 7869 + }, + { + "epoch": 0.15754572980006507, + "grad_norm": 1.094220757484436, + "learning_rate": 9.579455865737783e-06, + "loss": 0.34, + "step": 7870 + }, + { + "epoch": 0.15756574831719342, + "grad_norm": 1.0939587354660034, + "learning_rate": 9.579325720413671e-06, + "loss": 0.34, + "step": 7871 + }, + { + "epoch": 0.15758576683432174, + "grad_norm": 1.007719874382019, + "learning_rate": 9.57919555583906e-06, + "loss": 0.3025, + "step": 7872 + }, + { + "epoch": 0.1576057853514501, + "grad_norm": 1.0805392265319824, + "learning_rate": 9.579065372014496e-06, + "loss": 0.3411, + "step": 7873 + }, + { + "epoch": 0.15762580386857844, + "grad_norm": 1.1076641082763672, + "learning_rate": 9.578935168940526e-06, + "loss": 0.2955, + "step": 7874 + }, + { + "epoch": 0.1576458223857068, + "grad_norm": 1.0459951162338257, + "learning_rate": 9.5788049466177e-06, + "loss": 0.3443, + "step": 7875 + }, + { + "epoch": 0.1576658409028351, + "grad_norm": 1.0398112535476685, + "learning_rate": 9.578674705046562e-06, + "loss": 0.2935, + "step": 7876 + }, + { + "epoch": 0.15768585941996346, + "grad_norm": 1.084368348121643, + "learning_rate": 9.57854444422766e-06, + "loss": 0.3452, + "step": 7877 + }, + { + "epoch": 0.15770587793709182, + "grad_norm": 1.1162482500076294, + "learning_rate": 9.578414164161543e-06, + "loss": 0.321, + "step": 7878 + }, + { + "epoch": 0.15772589645422017, + "grad_norm": 1.0873990058898926, + "learning_rate": 9.578283864848759e-06, + "loss": 0.3154, + "step": 7879 + }, + { + "epoch": 0.1577459149713485, + "grad_norm": 1.0689940452575684, + "learning_rate": 9.578153546289853e-06, + "loss": 0.3177, + "step": 7880 + }, + { + "epoch": 0.15776593348847684, + "grad_norm": 1.1097041368484497, + "learning_rate": 9.578023208485376e-06, + "loss": 0.343, + "step": 7881 + }, + { + "epoch": 0.1577859520056052, + "grad_norm": 1.0649675130844116, + "learning_rate": 9.577892851435875e-06, + "loss": 0.3007, + "step": 7882 + }, + { + "epoch": 0.15780597052273354, + "grad_norm": 1.1073813438415527, + "learning_rate": 9.577762475141898e-06, + "loss": 0.39, + "step": 7883 + }, + { + "epoch": 0.15782598903986186, + "grad_norm": 1.025041937828064, + "learning_rate": 9.57763207960399e-06, + "loss": 0.3943, + "step": 7884 + }, + { + "epoch": 0.15784600755699021, + "grad_norm": 1.107092261314392, + "learning_rate": 9.577501664822705e-06, + "loss": 0.3566, + "step": 7885 + }, + { + "epoch": 0.15786602607411857, + "grad_norm": 1.138059139251709, + "learning_rate": 9.577371230798587e-06, + "loss": 0.3617, + "step": 7886 + }, + { + "epoch": 0.15788604459124692, + "grad_norm": 1.186140537261963, + "learning_rate": 9.577240777532184e-06, + "loss": 0.3594, + "step": 7887 + }, + { + "epoch": 0.15790606310837524, + "grad_norm": 0.989190399646759, + "learning_rate": 9.57711030502405e-06, + "loss": 0.3002, + "step": 7888 + }, + { + "epoch": 0.1579260816255036, + "grad_norm": 1.1946479082107544, + "learning_rate": 9.576979813274724e-06, + "loss": 0.3203, + "step": 7889 + }, + { + "epoch": 0.15794610014263194, + "grad_norm": 1.1582772731781006, + "learning_rate": 9.576849302284763e-06, + "loss": 0.3272, + "step": 7890 + }, + { + "epoch": 0.1579661186597603, + "grad_norm": 1.0255438089370728, + "learning_rate": 9.576718772054712e-06, + "loss": 0.3966, + "step": 7891 + }, + { + "epoch": 0.1579861371768886, + "grad_norm": 1.115800380706787, + "learning_rate": 9.57658822258512e-06, + "loss": 0.3165, + "step": 7892 + }, + { + "epoch": 0.15800615569401696, + "grad_norm": 1.1146488189697266, + "learning_rate": 9.576457653876536e-06, + "loss": 0.3326, + "step": 7893 + }, + { + "epoch": 0.15802617421114531, + "grad_norm": 1.163818359375, + "learning_rate": 9.576327065929506e-06, + "loss": 0.3418, + "step": 7894 + }, + { + "epoch": 0.15804619272827367, + "grad_norm": 1.2018743753433228, + "learning_rate": 9.576196458744586e-06, + "loss": 0.2967, + "step": 7895 + }, + { + "epoch": 0.158066211245402, + "grad_norm": 1.0877243280410767, + "learning_rate": 9.57606583232232e-06, + "loss": 0.3235, + "step": 7896 + }, + { + "epoch": 0.15808622976253034, + "grad_norm": 1.3226137161254883, + "learning_rate": 9.575935186663256e-06, + "loss": 0.3461, + "step": 7897 + }, + { + "epoch": 0.1581062482796587, + "grad_norm": 1.1157575845718384, + "learning_rate": 9.575804521767949e-06, + "loss": 0.3181, + "step": 7898 + }, + { + "epoch": 0.15812626679678704, + "grad_norm": 1.1119784116744995, + "learning_rate": 9.575673837636941e-06, + "loss": 0.3413, + "step": 7899 + }, + { + "epoch": 0.15814628531391536, + "grad_norm": 1.1964361667633057, + "learning_rate": 9.575543134270786e-06, + "loss": 0.3613, + "step": 7900 + }, + { + "epoch": 0.15816630383104371, + "grad_norm": 1.0828288793563843, + "learning_rate": 9.575412411670033e-06, + "loss": 0.4105, + "step": 7901 + }, + { + "epoch": 0.15818632234817206, + "grad_norm": 1.263344168663025, + "learning_rate": 9.57528166983523e-06, + "loss": 0.3347, + "step": 7902 + }, + { + "epoch": 0.15820634086530042, + "grad_norm": 1.083782434463501, + "learning_rate": 9.575150908766929e-06, + "loss": 0.3294, + "step": 7903 + }, + { + "epoch": 0.15822635938242874, + "grad_norm": 1.3582265377044678, + "learning_rate": 9.575020128465676e-06, + "loss": 0.3439, + "step": 7904 + }, + { + "epoch": 0.1582463778995571, + "grad_norm": 1.0933218002319336, + "learning_rate": 9.574889328932025e-06, + "loss": 0.3341, + "step": 7905 + }, + { + "epoch": 0.15826639641668544, + "grad_norm": 1.024864912033081, + "learning_rate": 9.574758510166521e-06, + "loss": 0.3115, + "step": 7906 + }, + { + "epoch": 0.1582864149338138, + "grad_norm": 1.059528112411499, + "learning_rate": 9.57462767216972e-06, + "loss": 0.337, + "step": 7907 + }, + { + "epoch": 0.1583064334509421, + "grad_norm": 1.0503499507904053, + "learning_rate": 9.574496814942168e-06, + "loss": 0.2473, + "step": 7908 + }, + { + "epoch": 0.15832645196807046, + "grad_norm": 1.196358561515808, + "learning_rate": 9.574365938484413e-06, + "loss": 0.3456, + "step": 7909 + }, + { + "epoch": 0.15834647048519881, + "grad_norm": 1.2747505903244019, + "learning_rate": 9.57423504279701e-06, + "loss": 0.3583, + "step": 7910 + }, + { + "epoch": 0.15836648900232717, + "grad_norm": 1.0263255834579468, + "learning_rate": 9.574104127880507e-06, + "loss": 0.2982, + "step": 7911 + }, + { + "epoch": 0.1583865075194555, + "grad_norm": 1.063651442527771, + "learning_rate": 9.573973193735456e-06, + "loss": 0.2767, + "step": 7912 + }, + { + "epoch": 0.15840652603658384, + "grad_norm": 1.193004846572876, + "learning_rate": 9.573842240362405e-06, + "loss": 0.3261, + "step": 7913 + }, + { + "epoch": 0.1584265445537122, + "grad_norm": 1.1229115724563599, + "learning_rate": 9.573711267761905e-06, + "loss": 0.3423, + "step": 7914 + }, + { + "epoch": 0.15844656307084054, + "grad_norm": 1.0293867588043213, + "learning_rate": 9.573580275934508e-06, + "loss": 0.3325, + "step": 7915 + }, + { + "epoch": 0.15846658158796886, + "grad_norm": 1.0455213785171509, + "learning_rate": 9.573449264880763e-06, + "loss": 0.3273, + "step": 7916 + }, + { + "epoch": 0.15848660010509721, + "grad_norm": 1.058482050895691, + "learning_rate": 9.57331823460122e-06, + "loss": 0.3359, + "step": 7917 + }, + { + "epoch": 0.15850661862222556, + "grad_norm": 1.383649230003357, + "learning_rate": 9.573187185096436e-06, + "loss": 0.3075, + "step": 7918 + }, + { + "epoch": 0.15852663713935392, + "grad_norm": 1.0997247695922852, + "learning_rate": 9.573056116366953e-06, + "loss": 0.353, + "step": 7919 + }, + { + "epoch": 0.15854665565648224, + "grad_norm": 1.1377720832824707, + "learning_rate": 9.572925028413328e-06, + "loss": 0.3158, + "step": 7920 + }, + { + "epoch": 0.1585666741736106, + "grad_norm": 1.1175479888916016, + "learning_rate": 9.57279392123611e-06, + "loss": 0.3465, + "step": 7921 + }, + { + "epoch": 0.15858669269073894, + "grad_norm": 1.1087878942489624, + "learning_rate": 9.57266279483585e-06, + "loss": 0.3525, + "step": 7922 + }, + { + "epoch": 0.1586067112078673, + "grad_norm": 1.1419494152069092, + "learning_rate": 9.5725316492131e-06, + "loss": 0.2942, + "step": 7923 + }, + { + "epoch": 0.1586267297249956, + "grad_norm": 0.9846611022949219, + "learning_rate": 9.572400484368412e-06, + "loss": 0.3307, + "step": 7924 + }, + { + "epoch": 0.15864674824212396, + "grad_norm": 1.307277798652649, + "learning_rate": 9.572269300302335e-06, + "loss": 0.3418, + "step": 7925 + }, + { + "epoch": 0.15866676675925231, + "grad_norm": 1.8081525564193726, + "learning_rate": 9.572138097015422e-06, + "loss": 0.8625, + "step": 7926 + }, + { + "epoch": 0.15868678527638067, + "grad_norm": 1.1643462181091309, + "learning_rate": 9.572006874508223e-06, + "loss": 0.3937, + "step": 7927 + }, + { + "epoch": 0.158706803793509, + "grad_norm": 1.1256905794143677, + "learning_rate": 9.571875632781293e-06, + "loss": 0.3324, + "step": 7928 + }, + { + "epoch": 0.15872682231063734, + "grad_norm": 1.0846478939056396, + "learning_rate": 9.57174437183518e-06, + "loss": 0.3229, + "step": 7929 + }, + { + "epoch": 0.1587468408277657, + "grad_norm": 0.9973533153533936, + "learning_rate": 9.571613091670441e-06, + "loss": 0.3266, + "step": 7930 + }, + { + "epoch": 0.15876685934489404, + "grad_norm": 1.1736732721328735, + "learning_rate": 9.571481792287622e-06, + "loss": 0.361, + "step": 7931 + }, + { + "epoch": 0.15878687786202236, + "grad_norm": 1.0842642784118652, + "learning_rate": 9.571350473687278e-06, + "loss": 0.2933, + "step": 7932 + }, + { + "epoch": 0.1588068963791507, + "grad_norm": 1.2043380737304688, + "learning_rate": 9.57121913586996e-06, + "loss": 0.3606, + "step": 7933 + }, + { + "epoch": 0.15882691489627906, + "grad_norm": 1.1467008590698242, + "learning_rate": 9.571087778836219e-06, + "loss": 0.3195, + "step": 7934 + }, + { + "epoch": 0.15884693341340742, + "grad_norm": 1.9380789995193481, + "learning_rate": 9.570956402586611e-06, + "loss": 0.816, + "step": 7935 + }, + { + "epoch": 0.15886695193053574, + "grad_norm": 1.0884954929351807, + "learning_rate": 9.570825007121685e-06, + "loss": 0.3761, + "step": 7936 + }, + { + "epoch": 0.1588869704476641, + "grad_norm": 1.915725827217102, + "learning_rate": 9.570693592441993e-06, + "loss": 0.8574, + "step": 7937 + }, + { + "epoch": 0.15890698896479244, + "grad_norm": 1.0874195098876953, + "learning_rate": 9.57056215854809e-06, + "loss": 0.3564, + "step": 7938 + }, + { + "epoch": 0.1589270074819208, + "grad_norm": 1.068363904953003, + "learning_rate": 9.570430705440527e-06, + "loss": 0.3288, + "step": 7939 + }, + { + "epoch": 0.1589470259990491, + "grad_norm": 1.090112566947937, + "learning_rate": 9.570299233119857e-06, + "loss": 0.332, + "step": 7940 + }, + { + "epoch": 0.15896704451617746, + "grad_norm": 1.0417697429656982, + "learning_rate": 9.570167741586634e-06, + "loss": 0.3198, + "step": 7941 + }, + { + "epoch": 0.15898706303330581, + "grad_norm": 1.2539687156677246, + "learning_rate": 9.570036230841408e-06, + "loss": 0.3341, + "step": 7942 + }, + { + "epoch": 0.15900708155043417, + "grad_norm": 1.1159682273864746, + "learning_rate": 9.569904700884733e-06, + "loss": 0.3766, + "step": 7943 + }, + { + "epoch": 0.1590271000675625, + "grad_norm": 1.9508602619171143, + "learning_rate": 9.569773151717162e-06, + "loss": 0.8611, + "step": 7944 + }, + { + "epoch": 0.15904711858469084, + "grad_norm": 1.0882519483566284, + "learning_rate": 9.569641583339249e-06, + "loss": 0.3638, + "step": 7945 + }, + { + "epoch": 0.1590671371018192, + "grad_norm": 0.9716752171516418, + "learning_rate": 9.569509995751546e-06, + "loss": 0.3171, + "step": 7946 + }, + { + "epoch": 0.15908715561894754, + "grad_norm": 1.0423816442489624, + "learning_rate": 9.569378388954605e-06, + "loss": 0.3529, + "step": 7947 + }, + { + "epoch": 0.15910717413607586, + "grad_norm": 1.0464038848876953, + "learning_rate": 9.569246762948983e-06, + "loss": 0.3299, + "step": 7948 + }, + { + "epoch": 0.1591271926532042, + "grad_norm": 1.1055278778076172, + "learning_rate": 9.569115117735229e-06, + "loss": 0.3366, + "step": 7949 + }, + { + "epoch": 0.15914721117033256, + "grad_norm": 1.3922065496444702, + "learning_rate": 9.568983453313898e-06, + "loss": 0.3302, + "step": 7950 + }, + { + "epoch": 0.15916722968746091, + "grad_norm": 1.1348899602890015, + "learning_rate": 9.568851769685546e-06, + "loss": 0.3516, + "step": 7951 + }, + { + "epoch": 0.15918724820458924, + "grad_norm": 1.0870438814163208, + "learning_rate": 9.568720066850724e-06, + "loss": 0.3141, + "step": 7952 + }, + { + "epoch": 0.1592072667217176, + "grad_norm": 1.4005894660949707, + "learning_rate": 9.568588344809983e-06, + "loss": 0.3504, + "step": 7953 + }, + { + "epoch": 0.15922728523884594, + "grad_norm": 1.057403802871704, + "learning_rate": 9.568456603563882e-06, + "loss": 0.3232, + "step": 7954 + }, + { + "epoch": 0.1592473037559743, + "grad_norm": 1.0615683794021606, + "learning_rate": 9.568324843112972e-06, + "loss": 0.3825, + "step": 7955 + }, + { + "epoch": 0.1592673222731026, + "grad_norm": 2.002534866333008, + "learning_rate": 9.56819306345781e-06, + "loss": 0.7429, + "step": 7956 + }, + { + "epoch": 0.15928734079023096, + "grad_norm": 1.103549599647522, + "learning_rate": 9.568061264598945e-06, + "loss": 0.3688, + "step": 7957 + }, + { + "epoch": 0.15930735930735931, + "grad_norm": 1.1360515356063843, + "learning_rate": 9.567929446536931e-06, + "loss": 0.3472, + "step": 7958 + }, + { + "epoch": 0.15932737782448766, + "grad_norm": 1.0996699333190918, + "learning_rate": 9.567797609272329e-06, + "loss": 0.3098, + "step": 7959 + }, + { + "epoch": 0.159347396341616, + "grad_norm": 1.8613429069519043, + "learning_rate": 9.567665752805688e-06, + "loss": 0.7496, + "step": 7960 + }, + { + "epoch": 0.15936741485874434, + "grad_norm": 1.1375547647476196, + "learning_rate": 9.567533877137562e-06, + "loss": 0.3111, + "step": 7961 + }, + { + "epoch": 0.1593874333758727, + "grad_norm": 1.1842551231384277, + "learning_rate": 9.567401982268507e-06, + "loss": 0.3404, + "step": 7962 + }, + { + "epoch": 0.15940745189300104, + "grad_norm": 0.9688184857368469, + "learning_rate": 9.567270068199077e-06, + "loss": 0.3278, + "step": 7963 + }, + { + "epoch": 0.15942747041012936, + "grad_norm": 1.0851640701293945, + "learning_rate": 9.567138134929827e-06, + "loss": 0.3667, + "step": 7964 + }, + { + "epoch": 0.1594474889272577, + "grad_norm": 1.1238129138946533, + "learning_rate": 9.56700618246131e-06, + "loss": 0.3602, + "step": 7965 + }, + { + "epoch": 0.15946750744438606, + "grad_norm": 1.0973060131072998, + "learning_rate": 9.566874210794083e-06, + "loss": 0.3193, + "step": 7966 + }, + { + "epoch": 0.1594875259615144, + "grad_norm": 1.0115365982055664, + "learning_rate": 9.566742219928699e-06, + "loss": 0.3542, + "step": 7967 + }, + { + "epoch": 0.15950754447864274, + "grad_norm": 1.1209731101989746, + "learning_rate": 9.566610209865713e-06, + "loss": 0.3447, + "step": 7968 + }, + { + "epoch": 0.1595275629957711, + "grad_norm": 1.7852482795715332, + "learning_rate": 9.56647818060568e-06, + "loss": 0.8677, + "step": 7969 + }, + { + "epoch": 0.15954758151289944, + "grad_norm": 1.2384068965911865, + "learning_rate": 9.566346132149159e-06, + "loss": 0.3953, + "step": 7970 + }, + { + "epoch": 0.15956760003002776, + "grad_norm": 1.0827888250350952, + "learning_rate": 9.566214064496697e-06, + "loss": 0.3106, + "step": 7971 + }, + { + "epoch": 0.1595876185471561, + "grad_norm": 0.9802738428115845, + "learning_rate": 9.566081977648856e-06, + "loss": 0.2844, + "step": 7972 + }, + { + "epoch": 0.15960763706428446, + "grad_norm": 1.096470594406128, + "learning_rate": 9.56594987160619e-06, + "loss": 0.3204, + "step": 7973 + }, + { + "epoch": 0.15962765558141281, + "grad_norm": 1.0659066438674927, + "learning_rate": 9.565817746369252e-06, + "loss": 0.3463, + "step": 7974 + }, + { + "epoch": 0.15964767409854114, + "grad_norm": 1.1880357265472412, + "learning_rate": 9.565685601938598e-06, + "loss": 0.3981, + "step": 7975 + }, + { + "epoch": 0.1596676926156695, + "grad_norm": 0.9914990663528442, + "learning_rate": 9.565553438314785e-06, + "loss": 0.3334, + "step": 7976 + }, + { + "epoch": 0.15968771113279784, + "grad_norm": 1.121476411819458, + "learning_rate": 9.565421255498368e-06, + "loss": 0.2594, + "step": 7977 + }, + { + "epoch": 0.1597077296499262, + "grad_norm": 1.1107065677642822, + "learning_rate": 9.565289053489903e-06, + "loss": 0.3683, + "step": 7978 + }, + { + "epoch": 0.1597277481670545, + "grad_norm": 1.1076303720474243, + "learning_rate": 9.565156832289945e-06, + "loss": 0.3235, + "step": 7979 + }, + { + "epoch": 0.15974776668418286, + "grad_norm": 1.8753606081008911, + "learning_rate": 9.56502459189905e-06, + "loss": 0.8877, + "step": 7980 + }, + { + "epoch": 0.1597677852013112, + "grad_norm": 1.0415345430374146, + "learning_rate": 9.564892332317775e-06, + "loss": 0.3717, + "step": 7981 + }, + { + "epoch": 0.15978780371843956, + "grad_norm": 1.1036303043365479, + "learning_rate": 9.564760053546673e-06, + "loss": 0.2767, + "step": 7982 + }, + { + "epoch": 0.1598078222355679, + "grad_norm": 1.0918946266174316, + "learning_rate": 9.564627755586302e-06, + "loss": 0.3592, + "step": 7983 + }, + { + "epoch": 0.15982784075269624, + "grad_norm": 1.9674147367477417, + "learning_rate": 9.564495438437221e-06, + "loss": 0.8676, + "step": 7984 + }, + { + "epoch": 0.1598478592698246, + "grad_norm": 1.8505865335464478, + "learning_rate": 9.564363102099981e-06, + "loss": 0.8831, + "step": 7985 + }, + { + "epoch": 0.15986787778695294, + "grad_norm": 1.1059225797653198, + "learning_rate": 9.56423074657514e-06, + "loss": 0.3791, + "step": 7986 + }, + { + "epoch": 0.15988789630408126, + "grad_norm": 1.0562368631362915, + "learning_rate": 9.564098371863258e-06, + "loss": 0.3077, + "step": 7987 + }, + { + "epoch": 0.1599079148212096, + "grad_norm": 1.047182559967041, + "learning_rate": 9.563965977964886e-06, + "loss": 0.3158, + "step": 7988 + }, + { + "epoch": 0.15992793333833796, + "grad_norm": 1.785157322883606, + "learning_rate": 9.563833564880586e-06, + "loss": 0.8567, + "step": 7989 + }, + { + "epoch": 0.1599479518554663, + "grad_norm": 1.0819425582885742, + "learning_rate": 9.56370113261091e-06, + "loss": 0.322, + "step": 7990 + }, + { + "epoch": 0.15996797037259464, + "grad_norm": 1.8839622735977173, + "learning_rate": 9.563568681156417e-06, + "loss": 0.8645, + "step": 7991 + }, + { + "epoch": 0.159987988889723, + "grad_norm": 1.0736804008483887, + "learning_rate": 9.563436210517663e-06, + "loss": 0.3298, + "step": 7992 + }, + { + "epoch": 0.16000800740685134, + "grad_norm": 1.131402850151062, + "learning_rate": 9.563303720695205e-06, + "loss": 0.3375, + "step": 7993 + }, + { + "epoch": 0.1600280259239797, + "grad_norm": 1.0940308570861816, + "learning_rate": 9.563171211689601e-06, + "loss": 0.3353, + "step": 7994 + }, + { + "epoch": 0.160048044441108, + "grad_norm": 1.0838998556137085, + "learning_rate": 9.563038683501406e-06, + "loss": 0.3306, + "step": 7995 + }, + { + "epoch": 0.16006806295823636, + "grad_norm": 1.013405680656433, + "learning_rate": 9.56290613613118e-06, + "loss": 0.325, + "step": 7996 + }, + { + "epoch": 0.1600880814753647, + "grad_norm": 1.1902762651443481, + "learning_rate": 9.56277356957948e-06, + "loss": 0.4063, + "step": 7997 + }, + { + "epoch": 0.16010809999249306, + "grad_norm": 1.049285888671875, + "learning_rate": 9.562640983846857e-06, + "loss": 0.3285, + "step": 7998 + }, + { + "epoch": 0.1601281185096214, + "grad_norm": 1.0882009267807007, + "learning_rate": 9.562508378933878e-06, + "loss": 0.3616, + "step": 7999 + }, + { + "epoch": 0.16014813702674974, + "grad_norm": 1.1455146074295044, + "learning_rate": 9.562375754841094e-06, + "loss": 0.3335, + "step": 8000 + }, + { + "epoch": 0.1601681555438781, + "grad_norm": 1.1152582168579102, + "learning_rate": 9.562243111569063e-06, + "loss": 0.3536, + "step": 8001 + }, + { + "epoch": 0.16018817406100644, + "grad_norm": 1.8239188194274902, + "learning_rate": 9.562110449118343e-06, + "loss": 0.8105, + "step": 8002 + }, + { + "epoch": 0.16020819257813476, + "grad_norm": 1.0053664445877075, + "learning_rate": 9.561977767489494e-06, + "loss": 0.3303, + "step": 8003 + }, + { + "epoch": 0.1602282110952631, + "grad_norm": 0.9975497126579285, + "learning_rate": 9.561845066683074e-06, + "loss": 0.3002, + "step": 8004 + }, + { + "epoch": 0.16024822961239146, + "grad_norm": 0.9919027090072632, + "learning_rate": 9.561712346699636e-06, + "loss": 0.278, + "step": 8005 + }, + { + "epoch": 0.1602682481295198, + "grad_norm": 1.1221795082092285, + "learning_rate": 9.561579607539742e-06, + "loss": 0.3393, + "step": 8006 + }, + { + "epoch": 0.16028826664664814, + "grad_norm": 1.0778273344039917, + "learning_rate": 9.561446849203949e-06, + "loss": 0.3096, + "step": 8007 + }, + { + "epoch": 0.1603082851637765, + "grad_norm": 1.8445454835891724, + "learning_rate": 9.561314071692816e-06, + "loss": 0.9162, + "step": 8008 + }, + { + "epoch": 0.16032830368090484, + "grad_norm": 1.2299718856811523, + "learning_rate": 9.561181275006899e-06, + "loss": 0.3781, + "step": 8009 + }, + { + "epoch": 0.1603483221980332, + "grad_norm": 1.1195390224456787, + "learning_rate": 9.561048459146757e-06, + "loss": 0.3349, + "step": 8010 + }, + { + "epoch": 0.1603683407151615, + "grad_norm": 1.0657691955566406, + "learning_rate": 9.56091562411295e-06, + "loss": 0.321, + "step": 8011 + }, + { + "epoch": 0.16038835923228986, + "grad_norm": 1.995724081993103, + "learning_rate": 9.560782769906036e-06, + "loss": 0.8124, + "step": 8012 + }, + { + "epoch": 0.1604083777494182, + "grad_norm": 1.0276798009872437, + "learning_rate": 9.560649896526572e-06, + "loss": 0.3505, + "step": 8013 + }, + { + "epoch": 0.16042839626654656, + "grad_norm": 1.0640437602996826, + "learning_rate": 9.560517003975116e-06, + "loss": 0.3503, + "step": 8014 + }, + { + "epoch": 0.1604484147836749, + "grad_norm": 1.0772621631622314, + "learning_rate": 9.560384092252229e-06, + "loss": 0.3535, + "step": 8015 + }, + { + "epoch": 0.16046843330080324, + "grad_norm": 1.212906837463379, + "learning_rate": 9.560251161358469e-06, + "loss": 0.3895, + "step": 8016 + }, + { + "epoch": 0.1604884518179316, + "grad_norm": 1.1419777870178223, + "learning_rate": 9.560118211294394e-06, + "loss": 0.3636, + "step": 8017 + }, + { + "epoch": 0.16050847033505994, + "grad_norm": 1.1002240180969238, + "learning_rate": 9.559985242060563e-06, + "loss": 0.3196, + "step": 8018 + }, + { + "epoch": 0.16052848885218826, + "grad_norm": 1.8877092599868774, + "learning_rate": 9.559852253657536e-06, + "loss": 0.8425, + "step": 8019 + }, + { + "epoch": 0.1605485073693166, + "grad_norm": 2.0699751377105713, + "learning_rate": 9.559719246085873e-06, + "loss": 0.8566, + "step": 8020 + }, + { + "epoch": 0.16056852588644496, + "grad_norm": 1.1039334535598755, + "learning_rate": 9.559586219346128e-06, + "loss": 0.3875, + "step": 8021 + }, + { + "epoch": 0.1605885444035733, + "grad_norm": 1.1296674013137817, + "learning_rate": 9.559453173438866e-06, + "loss": 0.3676, + "step": 8022 + }, + { + "epoch": 0.16060856292070164, + "grad_norm": 1.0853019952774048, + "learning_rate": 9.559320108364645e-06, + "loss": 0.2894, + "step": 8023 + }, + { + "epoch": 0.16062858143783, + "grad_norm": 1.0952372550964355, + "learning_rate": 9.559187024124021e-06, + "loss": 0.322, + "step": 8024 + }, + { + "epoch": 0.16064859995495834, + "grad_norm": 1.1654390096664429, + "learning_rate": 9.559053920717557e-06, + "loss": 0.3498, + "step": 8025 + }, + { + "epoch": 0.1606686184720867, + "grad_norm": 0.9909247159957886, + "learning_rate": 9.558920798145813e-06, + "loss": 0.3375, + "step": 8026 + }, + { + "epoch": 0.160688636989215, + "grad_norm": 1.0186806917190552, + "learning_rate": 9.558787656409345e-06, + "loss": 0.3066, + "step": 8027 + }, + { + "epoch": 0.16070865550634336, + "grad_norm": 1.7791423797607422, + "learning_rate": 9.558654495508714e-06, + "loss": 0.833, + "step": 8028 + }, + { + "epoch": 0.1607286740234717, + "grad_norm": 1.0501712560653687, + "learning_rate": 9.558521315444483e-06, + "loss": 0.3045, + "step": 8029 + }, + { + "epoch": 0.16074869254060006, + "grad_norm": 1.1134159564971924, + "learning_rate": 9.558388116217208e-06, + "loss": 0.3676, + "step": 8030 + }, + { + "epoch": 0.1607687110577284, + "grad_norm": 1.1452388763427734, + "learning_rate": 9.55825489782745e-06, + "loss": 0.339, + "step": 8031 + }, + { + "epoch": 0.16078872957485674, + "grad_norm": 1.0870625972747803, + "learning_rate": 9.55812166027577e-06, + "loss": 0.3064, + "step": 8032 + }, + { + "epoch": 0.1608087480919851, + "grad_norm": 1.0388257503509521, + "learning_rate": 9.557988403562728e-06, + "loss": 0.3438, + "step": 8033 + }, + { + "epoch": 0.16082876660911344, + "grad_norm": 1.1651090383529663, + "learning_rate": 9.557855127688884e-06, + "loss": 0.3261, + "step": 8034 + }, + { + "epoch": 0.16084878512624176, + "grad_norm": 1.813384771347046, + "learning_rate": 9.557721832654796e-06, + "loss": 0.8413, + "step": 8035 + }, + { + "epoch": 0.1608688036433701, + "grad_norm": 1.1822389364242554, + "learning_rate": 9.557588518461027e-06, + "loss": 0.3324, + "step": 8036 + }, + { + "epoch": 0.16088882216049846, + "grad_norm": 1.1620758771896362, + "learning_rate": 9.557455185108137e-06, + "loss": 0.3443, + "step": 8037 + }, + { + "epoch": 0.1609088406776268, + "grad_norm": 1.1005998849868774, + "learning_rate": 9.557321832596687e-06, + "loss": 0.3099, + "step": 8038 + }, + { + "epoch": 0.16092885919475514, + "grad_norm": 1.0384550094604492, + "learning_rate": 9.557188460927237e-06, + "loss": 0.283, + "step": 8039 + }, + { + "epoch": 0.1609488777118835, + "grad_norm": 1.105718731880188, + "learning_rate": 9.557055070100346e-06, + "loss": 0.3327, + "step": 8040 + }, + { + "epoch": 0.16096889622901184, + "grad_norm": 1.3006700277328491, + "learning_rate": 9.556921660116575e-06, + "loss": 0.3377, + "step": 8041 + }, + { + "epoch": 0.1609889147461402, + "grad_norm": 1.1139487028121948, + "learning_rate": 9.556788230976486e-06, + "loss": 0.354, + "step": 8042 + }, + { + "epoch": 0.1610089332632685, + "grad_norm": 1.0663161277770996, + "learning_rate": 9.556654782680643e-06, + "loss": 0.3285, + "step": 8043 + }, + { + "epoch": 0.16102895178039686, + "grad_norm": 1.0580027103424072, + "learning_rate": 9.556521315229601e-06, + "loss": 0.2801, + "step": 8044 + }, + { + "epoch": 0.1610489702975252, + "grad_norm": 1.4002572298049927, + "learning_rate": 9.556387828623926e-06, + "loss": 0.4023, + "step": 8045 + }, + { + "epoch": 0.16106898881465356, + "grad_norm": 0.9858920574188232, + "learning_rate": 9.556254322864176e-06, + "loss": 0.3093, + "step": 8046 + }, + { + "epoch": 0.16108900733178189, + "grad_norm": 1.0078320503234863, + "learning_rate": 9.556120797950915e-06, + "loss": 0.2816, + "step": 8047 + }, + { + "epoch": 0.16110902584891024, + "grad_norm": 1.0958106517791748, + "learning_rate": 9.5559872538847e-06, + "loss": 0.3586, + "step": 8048 + }, + { + "epoch": 0.1611290443660386, + "grad_norm": 1.2254987955093384, + "learning_rate": 9.555853690666097e-06, + "loss": 0.3523, + "step": 8049 + }, + { + "epoch": 0.16114906288316694, + "grad_norm": 1.1485824584960938, + "learning_rate": 9.555720108295665e-06, + "loss": 0.3129, + "step": 8050 + }, + { + "epoch": 0.16116908140029526, + "grad_norm": 0.9994816184043884, + "learning_rate": 9.555586506773965e-06, + "loss": 0.3052, + "step": 8051 + }, + { + "epoch": 0.1611890999174236, + "grad_norm": 1.096266269683838, + "learning_rate": 9.555452886101562e-06, + "loss": 0.3449, + "step": 8052 + }, + { + "epoch": 0.16120911843455196, + "grad_norm": 1.1112885475158691, + "learning_rate": 9.555319246279014e-06, + "loss": 0.3172, + "step": 8053 + }, + { + "epoch": 0.1612291369516803, + "grad_norm": 1.1045202016830444, + "learning_rate": 9.555185587306883e-06, + "loss": 0.3183, + "step": 8054 + }, + { + "epoch": 0.16124915546880864, + "grad_norm": 1.2090864181518555, + "learning_rate": 9.555051909185733e-06, + "loss": 0.3233, + "step": 8055 + }, + { + "epoch": 0.161269173985937, + "grad_norm": 1.2853567600250244, + "learning_rate": 9.554918211916127e-06, + "loss": 0.3605, + "step": 8056 + }, + { + "epoch": 0.16128919250306534, + "grad_norm": 1.069488525390625, + "learning_rate": 9.554784495498623e-06, + "loss": 0.3203, + "step": 8057 + }, + { + "epoch": 0.1613092110201937, + "grad_norm": 1.1177939176559448, + "learning_rate": 9.554650759933784e-06, + "loss": 0.3428, + "step": 8058 + }, + { + "epoch": 0.161329229537322, + "grad_norm": 1.0973279476165771, + "learning_rate": 9.554517005222176e-06, + "loss": 0.3479, + "step": 8059 + }, + { + "epoch": 0.16134924805445036, + "grad_norm": 1.0936017036437988, + "learning_rate": 9.554383231364357e-06, + "loss": 0.3121, + "step": 8060 + }, + { + "epoch": 0.1613692665715787, + "grad_norm": 1.2967561483383179, + "learning_rate": 9.554249438360892e-06, + "loss": 0.3277, + "step": 8061 + }, + { + "epoch": 0.16138928508870706, + "grad_norm": 0.9680827260017395, + "learning_rate": 9.554115626212342e-06, + "loss": 0.2726, + "step": 8062 + }, + { + "epoch": 0.16140930360583539, + "grad_norm": 1.2223609685897827, + "learning_rate": 9.553981794919269e-06, + "loss": 0.3585, + "step": 8063 + }, + { + "epoch": 0.16142932212296374, + "grad_norm": 1.0753971338272095, + "learning_rate": 9.553847944482238e-06, + "loss": 0.3244, + "step": 8064 + }, + { + "epoch": 0.1614493406400921, + "grad_norm": 1.0179321765899658, + "learning_rate": 9.55371407490181e-06, + "loss": 0.3301, + "step": 8065 + }, + { + "epoch": 0.16146935915722044, + "grad_norm": 1.0100528001785278, + "learning_rate": 9.553580186178548e-06, + "loss": 0.3176, + "step": 8066 + }, + { + "epoch": 0.16148937767434876, + "grad_norm": 1.1602551937103271, + "learning_rate": 9.553446278313014e-06, + "loss": 0.2786, + "step": 8067 + }, + { + "epoch": 0.1615093961914771, + "grad_norm": 1.2465530633926392, + "learning_rate": 9.553312351305773e-06, + "loss": 0.3868, + "step": 8068 + }, + { + "epoch": 0.16152941470860546, + "grad_norm": 1.07804274559021, + "learning_rate": 9.553178405157387e-06, + "loss": 0.3211, + "step": 8069 + }, + { + "epoch": 0.1615494332257338, + "grad_norm": 1.0338114500045776, + "learning_rate": 9.553044439868418e-06, + "loss": 0.3116, + "step": 8070 + }, + { + "epoch": 0.16156945174286214, + "grad_norm": 1.1050978899002075, + "learning_rate": 9.55291045543943e-06, + "loss": 0.3571, + "step": 8071 + }, + { + "epoch": 0.1615894702599905, + "grad_norm": 1.0685687065124512, + "learning_rate": 9.552776451870987e-06, + "loss": 0.2882, + "step": 8072 + }, + { + "epoch": 0.16160948877711884, + "grad_norm": 1.1271952390670776, + "learning_rate": 9.55264242916365e-06, + "loss": 0.2978, + "step": 8073 + }, + { + "epoch": 0.1616295072942472, + "grad_norm": 1.1842396259307861, + "learning_rate": 9.552508387317984e-06, + "loss": 0.3209, + "step": 8074 + }, + { + "epoch": 0.1616495258113755, + "grad_norm": 1.1386305093765259, + "learning_rate": 9.552374326334555e-06, + "loss": 0.299, + "step": 8075 + }, + { + "epoch": 0.16166954432850386, + "grad_norm": 1.1791895627975464, + "learning_rate": 9.552240246213922e-06, + "loss": 0.3472, + "step": 8076 + }, + { + "epoch": 0.1616895628456322, + "grad_norm": 1.1590032577514648, + "learning_rate": 9.55210614695665e-06, + "loss": 0.3427, + "step": 8077 + }, + { + "epoch": 0.16170958136276056, + "grad_norm": 1.8919438123703003, + "learning_rate": 9.551972028563304e-06, + "loss": 0.8631, + "step": 8078 + }, + { + "epoch": 0.16172959987988889, + "grad_norm": 1.8185158967971802, + "learning_rate": 9.551837891034447e-06, + "loss": 0.8047, + "step": 8079 + }, + { + "epoch": 0.16174961839701724, + "grad_norm": 1.2064357995986938, + "learning_rate": 9.551703734370644e-06, + "loss": 0.3325, + "step": 8080 + }, + { + "epoch": 0.1617696369141456, + "grad_norm": 2.00410795211792, + "learning_rate": 9.551569558572457e-06, + "loss": 0.8111, + "step": 8081 + }, + { + "epoch": 0.16178965543127394, + "grad_norm": 1.812896728515625, + "learning_rate": 9.551435363640454e-06, + "loss": 0.8598, + "step": 8082 + }, + { + "epoch": 0.16180967394840226, + "grad_norm": 1.1205775737762451, + "learning_rate": 9.551301149575192e-06, + "loss": 0.3172, + "step": 8083 + }, + { + "epoch": 0.1618296924655306, + "grad_norm": 1.2707533836364746, + "learning_rate": 9.551166916377242e-06, + "loss": 0.3042, + "step": 8084 + }, + { + "epoch": 0.16184971098265896, + "grad_norm": 1.2341313362121582, + "learning_rate": 9.551032664047166e-06, + "loss": 0.3109, + "step": 8085 + }, + { + "epoch": 0.1618697294997873, + "grad_norm": 1.0586352348327637, + "learning_rate": 9.550898392585525e-06, + "loss": 0.3381, + "step": 8086 + }, + { + "epoch": 0.16188974801691564, + "grad_norm": 1.8027921915054321, + "learning_rate": 9.55076410199289e-06, + "loss": 0.86, + "step": 8087 + }, + { + "epoch": 0.161909766534044, + "grad_norm": 1.1887179613113403, + "learning_rate": 9.550629792269822e-06, + "loss": 0.2963, + "step": 8088 + }, + { + "epoch": 0.16192978505117234, + "grad_norm": 1.101793885231018, + "learning_rate": 9.550495463416883e-06, + "loss": 0.3236, + "step": 8089 + }, + { + "epoch": 0.1619498035683007, + "grad_norm": 1.081451416015625, + "learning_rate": 9.55036111543464e-06, + "loss": 0.2941, + "step": 8090 + }, + { + "epoch": 0.161969822085429, + "grad_norm": 1.0635011196136475, + "learning_rate": 9.550226748323661e-06, + "loss": 0.3051, + "step": 8091 + }, + { + "epoch": 0.16198984060255736, + "grad_norm": 1.0969659090042114, + "learning_rate": 9.550092362084506e-06, + "loss": 0.3187, + "step": 8092 + }, + { + "epoch": 0.1620098591196857, + "grad_norm": 1.1560724973678589, + "learning_rate": 9.549957956717743e-06, + "loss": 0.355, + "step": 8093 + }, + { + "epoch": 0.16202987763681406, + "grad_norm": 1.8653541803359985, + "learning_rate": 9.549823532223936e-06, + "loss": 0.8145, + "step": 8094 + }, + { + "epoch": 0.16204989615394239, + "grad_norm": 1.044193983078003, + "learning_rate": 9.54968908860365e-06, + "loss": 0.3408, + "step": 8095 + }, + { + "epoch": 0.16206991467107074, + "grad_norm": 1.2237398624420166, + "learning_rate": 9.549554625857449e-06, + "loss": 0.3471, + "step": 8096 + }, + { + "epoch": 0.1620899331881991, + "grad_norm": 1.3662539720535278, + "learning_rate": 9.549420143985901e-06, + "loss": 0.352, + "step": 8097 + }, + { + "epoch": 0.16210995170532744, + "grad_norm": 1.8512825965881348, + "learning_rate": 9.54928564298957e-06, + "loss": 0.8361, + "step": 8098 + }, + { + "epoch": 0.16212997022245576, + "grad_norm": 1.15398371219635, + "learning_rate": 9.54915112286902e-06, + "loss": 0.3233, + "step": 8099 + }, + { + "epoch": 0.1621499887395841, + "grad_norm": 1.8591359853744507, + "learning_rate": 9.549016583624816e-06, + "loss": 0.8196, + "step": 8100 + }, + { + "epoch": 0.16217000725671246, + "grad_norm": 1.3319441080093384, + "learning_rate": 9.548882025257528e-06, + "loss": 0.3198, + "step": 8101 + }, + { + "epoch": 0.1621900257738408, + "grad_norm": 1.077914834022522, + "learning_rate": 9.548747447767717e-06, + "loss": 0.3273, + "step": 8102 + }, + { + "epoch": 0.16221004429096914, + "grad_norm": 1.0969352722167969, + "learning_rate": 9.548612851155951e-06, + "loss": 0.3474, + "step": 8103 + }, + { + "epoch": 0.16223006280809749, + "grad_norm": 1.1189500093460083, + "learning_rate": 9.548478235422797e-06, + "loss": 0.3705, + "step": 8104 + }, + { + "epoch": 0.16225008132522584, + "grad_norm": 1.9160890579223633, + "learning_rate": 9.548343600568819e-06, + "loss": 0.8981, + "step": 8105 + }, + { + "epoch": 0.1622700998423542, + "grad_norm": 1.1489614248275757, + "learning_rate": 9.548208946594582e-06, + "loss": 0.3422, + "step": 8106 + }, + { + "epoch": 0.1622901183594825, + "grad_norm": 1.043770670890808, + "learning_rate": 9.548074273500654e-06, + "loss": 0.3139, + "step": 8107 + }, + { + "epoch": 0.16231013687661086, + "grad_norm": 1.0386868715286255, + "learning_rate": 9.5479395812876e-06, + "loss": 0.2813, + "step": 8108 + }, + { + "epoch": 0.1623301553937392, + "grad_norm": 1.1780802011489868, + "learning_rate": 9.547804869955988e-06, + "loss": 0.3147, + "step": 8109 + }, + { + "epoch": 0.16235017391086756, + "grad_norm": 1.1268303394317627, + "learning_rate": 9.547670139506382e-06, + "loss": 0.2908, + "step": 8110 + }, + { + "epoch": 0.16237019242799589, + "grad_norm": 1.106603741645813, + "learning_rate": 9.547535389939351e-06, + "loss": 0.3459, + "step": 8111 + }, + { + "epoch": 0.16239021094512424, + "grad_norm": 0.9789596199989319, + "learning_rate": 9.547400621255457e-06, + "loss": 0.3515, + "step": 8112 + }, + { + "epoch": 0.1624102294622526, + "grad_norm": 1.211459755897522, + "learning_rate": 9.547265833455272e-06, + "loss": 0.3334, + "step": 8113 + }, + { + "epoch": 0.16243024797938094, + "grad_norm": 1.1727697849273682, + "learning_rate": 9.547131026539359e-06, + "loss": 0.3247, + "step": 8114 + }, + { + "epoch": 0.16245026649650926, + "grad_norm": 1.2383038997650146, + "learning_rate": 9.546996200508285e-06, + "loss": 0.3411, + "step": 8115 + }, + { + "epoch": 0.1624702850136376, + "grad_norm": 1.3992693424224854, + "learning_rate": 9.546861355362619e-06, + "loss": 0.2967, + "step": 8116 + }, + { + "epoch": 0.16249030353076596, + "grad_norm": 1.0941928625106812, + "learning_rate": 9.546726491102926e-06, + "loss": 0.3111, + "step": 8117 + }, + { + "epoch": 0.1625103220478943, + "grad_norm": 1.007351040840149, + "learning_rate": 9.546591607729771e-06, + "loss": 0.3299, + "step": 8118 + }, + { + "epoch": 0.16253034056502264, + "grad_norm": 0.9837388396263123, + "learning_rate": 9.546456705243727e-06, + "loss": 0.3266, + "step": 8119 + }, + { + "epoch": 0.16255035908215099, + "grad_norm": 1.0836949348449707, + "learning_rate": 9.546321783645355e-06, + "loss": 0.3093, + "step": 8120 + }, + { + "epoch": 0.16257037759927934, + "grad_norm": 1.1215561628341675, + "learning_rate": 9.546186842935226e-06, + "loss": 0.3095, + "step": 8121 + }, + { + "epoch": 0.1625903961164077, + "grad_norm": 1.0928938388824463, + "learning_rate": 9.546051883113906e-06, + "loss": 0.3466, + "step": 8122 + }, + { + "epoch": 0.162610414633536, + "grad_norm": 1.11616051197052, + "learning_rate": 9.54591690418196e-06, + "loss": 0.3425, + "step": 8123 + }, + { + "epoch": 0.16263043315066436, + "grad_norm": 1.0798685550689697, + "learning_rate": 9.545781906139957e-06, + "loss": 0.3159, + "step": 8124 + }, + { + "epoch": 0.1626504516677927, + "grad_norm": 1.1092851161956787, + "learning_rate": 9.54564688898847e-06, + "loss": 0.3419, + "step": 8125 + }, + { + "epoch": 0.16267047018492106, + "grad_norm": 1.0505784749984741, + "learning_rate": 9.545511852728058e-06, + "loss": 0.3791, + "step": 8126 + }, + { + "epoch": 0.16269048870204939, + "grad_norm": 1.2499905824661255, + "learning_rate": 9.545376797359292e-06, + "loss": 0.356, + "step": 8127 + }, + { + "epoch": 0.16271050721917774, + "grad_norm": 1.1015657186508179, + "learning_rate": 9.54524172288274e-06, + "loss": 0.3229, + "step": 8128 + }, + { + "epoch": 0.1627305257363061, + "grad_norm": 1.4205795526504517, + "learning_rate": 9.545106629298972e-06, + "loss": 0.3215, + "step": 8129 + }, + { + "epoch": 0.16275054425343444, + "grad_norm": 1.771402359008789, + "learning_rate": 9.544971516608554e-06, + "loss": 0.7565, + "step": 8130 + }, + { + "epoch": 0.16277056277056276, + "grad_norm": 1.0828955173492432, + "learning_rate": 9.544836384812053e-06, + "loss": 0.35, + "step": 8131 + }, + { + "epoch": 0.1627905812876911, + "grad_norm": 1.2401878833770752, + "learning_rate": 9.544701233910037e-06, + "loss": 0.3166, + "step": 8132 + }, + { + "epoch": 0.16281059980481946, + "grad_norm": 1.1106555461883545, + "learning_rate": 9.544566063903076e-06, + "loss": 0.3233, + "step": 8133 + }, + { + "epoch": 0.1628306183219478, + "grad_norm": 1.028206706047058, + "learning_rate": 9.544430874791737e-06, + "loss": 0.3448, + "step": 8134 + }, + { + "epoch": 0.16285063683907613, + "grad_norm": 1.0762783288955688, + "learning_rate": 9.544295666576588e-06, + "loss": 0.3414, + "step": 8135 + }, + { + "epoch": 0.16287065535620449, + "grad_norm": 1.1911308765411377, + "learning_rate": 9.5441604392582e-06, + "loss": 0.3512, + "step": 8136 + }, + { + "epoch": 0.16289067387333284, + "grad_norm": 1.0503478050231934, + "learning_rate": 9.544025192837137e-06, + "loss": 0.3257, + "step": 8137 + }, + { + "epoch": 0.1629106923904612, + "grad_norm": 1.1227641105651855, + "learning_rate": 9.543889927313971e-06, + "loss": 0.3705, + "step": 8138 + }, + { + "epoch": 0.1629307109075895, + "grad_norm": 2.0896964073181152, + "learning_rate": 9.54375464268927e-06, + "loss": 0.8186, + "step": 8139 + }, + { + "epoch": 0.16295072942471786, + "grad_norm": 1.2538161277770996, + "learning_rate": 9.543619338963602e-06, + "loss": 0.3285, + "step": 8140 + }, + { + "epoch": 0.1629707479418462, + "grad_norm": 1.0075896978378296, + "learning_rate": 9.543484016137537e-06, + "loss": 0.3126, + "step": 8141 + }, + { + "epoch": 0.16299076645897456, + "grad_norm": 1.0433988571166992, + "learning_rate": 9.543348674211641e-06, + "loss": 0.3155, + "step": 8142 + }, + { + "epoch": 0.16301078497610288, + "grad_norm": 1.060936450958252, + "learning_rate": 9.543213313186486e-06, + "loss": 0.3103, + "step": 8143 + }, + { + "epoch": 0.16303080349323124, + "grad_norm": 1.1993293762207031, + "learning_rate": 9.543077933062639e-06, + "loss": 0.3355, + "step": 8144 + }, + { + "epoch": 0.1630508220103596, + "grad_norm": 1.2345061302185059, + "learning_rate": 9.542942533840671e-06, + "loss": 0.3282, + "step": 8145 + }, + { + "epoch": 0.16307084052748794, + "grad_norm": 1.1850792169570923, + "learning_rate": 9.54280711552115e-06, + "loss": 0.2962, + "step": 8146 + }, + { + "epoch": 0.16309085904461626, + "grad_norm": 1.1145919561386108, + "learning_rate": 9.542671678104647e-06, + "loss": 0.3536, + "step": 8147 + }, + { + "epoch": 0.1631108775617446, + "grad_norm": 1.1978161334991455, + "learning_rate": 9.542536221591728e-06, + "loss": 0.3368, + "step": 8148 + }, + { + "epoch": 0.16313089607887296, + "grad_norm": 1.1041287183761597, + "learning_rate": 9.542400745982966e-06, + "loss": 0.3615, + "step": 8149 + }, + { + "epoch": 0.1631509145960013, + "grad_norm": 1.090808629989624, + "learning_rate": 9.542265251278927e-06, + "loss": 0.311, + "step": 8150 + }, + { + "epoch": 0.16317093311312963, + "grad_norm": 1.0437982082366943, + "learning_rate": 9.542129737480182e-06, + "loss": 0.3608, + "step": 8151 + }, + { + "epoch": 0.16319095163025799, + "grad_norm": 1.1784831285476685, + "learning_rate": 9.541994204587301e-06, + "loss": 0.3868, + "step": 8152 + }, + { + "epoch": 0.16321097014738634, + "grad_norm": 1.1410999298095703, + "learning_rate": 9.541858652600857e-06, + "loss": 0.3205, + "step": 8153 + }, + { + "epoch": 0.1632309886645147, + "grad_norm": 0.9857063293457031, + "learning_rate": 9.541723081521414e-06, + "loss": 0.2708, + "step": 8154 + }, + { + "epoch": 0.163251007181643, + "grad_norm": 1.0997984409332275, + "learning_rate": 9.541587491349545e-06, + "loss": 0.3301, + "step": 8155 + }, + { + "epoch": 0.16327102569877136, + "grad_norm": 1.0606348514556885, + "learning_rate": 9.54145188208582e-06, + "loss": 0.3053, + "step": 8156 + }, + { + "epoch": 0.1632910442158997, + "grad_norm": 1.0428510904312134, + "learning_rate": 9.541316253730806e-06, + "loss": 0.3074, + "step": 8157 + }, + { + "epoch": 0.16331106273302806, + "grad_norm": 1.1005797386169434, + "learning_rate": 9.541180606285079e-06, + "loss": 0.3181, + "step": 8158 + }, + { + "epoch": 0.16333108125015638, + "grad_norm": 2.0063374042510986, + "learning_rate": 9.541044939749205e-06, + "loss": 0.8413, + "step": 8159 + }, + { + "epoch": 0.16335109976728474, + "grad_norm": 1.0844526290893555, + "learning_rate": 9.540909254123755e-06, + "loss": 0.3211, + "step": 8160 + }, + { + "epoch": 0.16337111828441309, + "grad_norm": 1.2647225856781006, + "learning_rate": 9.540773549409301e-06, + "loss": 0.3073, + "step": 8161 + }, + { + "epoch": 0.16339113680154144, + "grad_norm": 1.1532946825027466, + "learning_rate": 9.540637825606411e-06, + "loss": 0.3606, + "step": 8162 + }, + { + "epoch": 0.16341115531866976, + "grad_norm": 1.269783616065979, + "learning_rate": 9.540502082715657e-06, + "loss": 0.3082, + "step": 8163 + }, + { + "epoch": 0.1634311738357981, + "grad_norm": 1.1688168048858643, + "learning_rate": 9.540366320737609e-06, + "loss": 0.3265, + "step": 8164 + }, + { + "epoch": 0.16345119235292646, + "grad_norm": 1.1415767669677734, + "learning_rate": 9.540230539672839e-06, + "loss": 0.3315, + "step": 8165 + }, + { + "epoch": 0.1634712108700548, + "grad_norm": 1.034913420677185, + "learning_rate": 9.540094739521915e-06, + "loss": 0.2617, + "step": 8166 + }, + { + "epoch": 0.16349122938718313, + "grad_norm": 1.2687195539474487, + "learning_rate": 9.539958920285412e-06, + "loss": 0.3168, + "step": 8167 + }, + { + "epoch": 0.16351124790431149, + "grad_norm": 1.030153751373291, + "learning_rate": 9.539823081963896e-06, + "loss": 0.3297, + "step": 8168 + }, + { + "epoch": 0.16353126642143984, + "grad_norm": 1.2599045038223267, + "learning_rate": 9.539687224557944e-06, + "loss": 0.348, + "step": 8169 + }, + { + "epoch": 0.1635512849385682, + "grad_norm": 1.145384669303894, + "learning_rate": 9.539551348068122e-06, + "loss": 0.3177, + "step": 8170 + }, + { + "epoch": 0.1635713034556965, + "grad_norm": 0.9929094314575195, + "learning_rate": 9.539415452495004e-06, + "loss": 0.3368, + "step": 8171 + }, + { + "epoch": 0.16359132197282486, + "grad_norm": 1.1896480321884155, + "learning_rate": 9.539279537839158e-06, + "loss": 0.3007, + "step": 8172 + }, + { + "epoch": 0.1636113404899532, + "grad_norm": 1.9434603452682495, + "learning_rate": 9.53914360410116e-06, + "loss": 0.8057, + "step": 8173 + }, + { + "epoch": 0.16363135900708156, + "grad_norm": 1.322229027748108, + "learning_rate": 9.53900765128158e-06, + "loss": 0.3708, + "step": 8174 + }, + { + "epoch": 0.16365137752420988, + "grad_norm": 2.0411016941070557, + "learning_rate": 9.538871679380985e-06, + "loss": 0.8457, + "step": 8175 + }, + { + "epoch": 0.16367139604133824, + "grad_norm": 1.177030324935913, + "learning_rate": 9.538735688399954e-06, + "loss": 0.3338, + "step": 8176 + }, + { + "epoch": 0.16369141455846659, + "grad_norm": 1.100067138671875, + "learning_rate": 9.538599678339054e-06, + "loss": 0.298, + "step": 8177 + }, + { + "epoch": 0.16371143307559494, + "grad_norm": 0.9833836555480957, + "learning_rate": 9.538463649198855e-06, + "loss": 0.2767, + "step": 8178 + }, + { + "epoch": 0.16373145159272326, + "grad_norm": 1.066989541053772, + "learning_rate": 9.538327600979936e-06, + "loss": 0.2901, + "step": 8179 + }, + { + "epoch": 0.1637514701098516, + "grad_norm": 1.164219617843628, + "learning_rate": 9.538191533682861e-06, + "loss": 0.3276, + "step": 8180 + }, + { + "epoch": 0.16377148862697996, + "grad_norm": 1.1025593280792236, + "learning_rate": 9.538055447308206e-06, + "loss": 0.3264, + "step": 8181 + }, + { + "epoch": 0.1637915071441083, + "grad_norm": 1.0948612689971924, + "learning_rate": 9.537919341856544e-06, + "loss": 0.3797, + "step": 8182 + }, + { + "epoch": 0.16381152566123663, + "grad_norm": 1.26187264919281, + "learning_rate": 9.537783217328443e-06, + "loss": 0.3824, + "step": 8183 + }, + { + "epoch": 0.16383154417836499, + "grad_norm": 1.0197231769561768, + "learning_rate": 9.537647073724479e-06, + "loss": 0.3381, + "step": 8184 + }, + { + "epoch": 0.16385156269549334, + "grad_norm": 1.103060007095337, + "learning_rate": 9.537510911045225e-06, + "loss": 0.3397, + "step": 8185 + }, + { + "epoch": 0.1638715812126217, + "grad_norm": 1.1132124662399292, + "learning_rate": 9.53737472929125e-06, + "loss": 0.3595, + "step": 8186 + }, + { + "epoch": 0.16389159972975, + "grad_norm": 1.2119050025939941, + "learning_rate": 9.537238528463127e-06, + "loss": 0.3344, + "step": 8187 + }, + { + "epoch": 0.16391161824687836, + "grad_norm": 1.1136155128479004, + "learning_rate": 9.537102308561432e-06, + "loss": 0.3315, + "step": 8188 + }, + { + "epoch": 0.1639316367640067, + "grad_norm": 1.2387157678604126, + "learning_rate": 9.536966069586734e-06, + "loss": 0.3183, + "step": 8189 + }, + { + "epoch": 0.16395165528113506, + "grad_norm": 1.1505601406097412, + "learning_rate": 9.536829811539608e-06, + "loss": 0.3127, + "step": 8190 + }, + { + "epoch": 0.16397167379826338, + "grad_norm": 1.8111644983291626, + "learning_rate": 9.536693534420624e-06, + "loss": 0.8853, + "step": 8191 + }, + { + "epoch": 0.16399169231539173, + "grad_norm": 1.8774720430374146, + "learning_rate": 9.536557238230358e-06, + "loss": 0.8631, + "step": 8192 + }, + { + "epoch": 0.16401171083252009, + "grad_norm": 1.2168972492218018, + "learning_rate": 9.536420922969382e-06, + "loss": 0.3301, + "step": 8193 + }, + { + "epoch": 0.16403172934964844, + "grad_norm": 1.1422947645187378, + "learning_rate": 9.536284588638267e-06, + "loss": 0.3492, + "step": 8194 + }, + { + "epoch": 0.16405174786677676, + "grad_norm": 1.2391269207000732, + "learning_rate": 9.53614823523759e-06, + "loss": 0.333, + "step": 8195 + }, + { + "epoch": 0.1640717663839051, + "grad_norm": 1.1011614799499512, + "learning_rate": 9.536011862767919e-06, + "loss": 0.3781, + "step": 8196 + }, + { + "epoch": 0.16409178490103346, + "grad_norm": 1.1390022039413452, + "learning_rate": 9.535875471229834e-06, + "loss": 0.3147, + "step": 8197 + }, + { + "epoch": 0.1641118034181618, + "grad_norm": 1.0873090028762817, + "learning_rate": 9.535739060623902e-06, + "loss": 0.3199, + "step": 8198 + }, + { + "epoch": 0.16413182193529013, + "grad_norm": 1.1595057249069214, + "learning_rate": 9.5356026309507e-06, + "loss": 0.3524, + "step": 8199 + }, + { + "epoch": 0.16415184045241848, + "grad_norm": 1.1588596105575562, + "learning_rate": 9.535466182210801e-06, + "loss": 0.3383, + "step": 8200 + }, + { + "epoch": 0.16417185896954684, + "grad_norm": 1.9055263996124268, + "learning_rate": 9.535329714404777e-06, + "loss": 0.8522, + "step": 8201 + }, + { + "epoch": 0.1641918774866752, + "grad_norm": 1.0565000772476196, + "learning_rate": 9.535193227533205e-06, + "loss": 0.3084, + "step": 8202 + }, + { + "epoch": 0.1642118960038035, + "grad_norm": 1.9226019382476807, + "learning_rate": 9.535056721596654e-06, + "loss": 0.811, + "step": 8203 + }, + { + "epoch": 0.16423191452093186, + "grad_norm": 1.1078705787658691, + "learning_rate": 9.534920196595702e-06, + "loss": 0.3447, + "step": 8204 + }, + { + "epoch": 0.1642519330380602, + "grad_norm": 1.1951732635498047, + "learning_rate": 9.53478365253092e-06, + "loss": 0.3031, + "step": 8205 + }, + { + "epoch": 0.16427195155518856, + "grad_norm": 1.1168373823165894, + "learning_rate": 9.534647089402886e-06, + "loss": 0.3231, + "step": 8206 + }, + { + "epoch": 0.16429197007231688, + "grad_norm": 1.2004048824310303, + "learning_rate": 9.53451050721217e-06, + "loss": 0.3363, + "step": 8207 + }, + { + "epoch": 0.16431198858944523, + "grad_norm": 1.0676082372665405, + "learning_rate": 9.534373905959348e-06, + "loss": 0.3109, + "step": 8208 + }, + { + "epoch": 0.16433200710657359, + "grad_norm": 1.0711838006973267, + "learning_rate": 9.534237285644994e-06, + "loss": 0.3467, + "step": 8209 + }, + { + "epoch": 0.16435202562370194, + "grad_norm": 1.0691018104553223, + "learning_rate": 9.534100646269682e-06, + "loss": 0.3555, + "step": 8210 + }, + { + "epoch": 0.16437204414083026, + "grad_norm": 1.1409361362457275, + "learning_rate": 9.533963987833987e-06, + "loss": 0.3823, + "step": 8211 + }, + { + "epoch": 0.1643920626579586, + "grad_norm": 1.1123512983322144, + "learning_rate": 9.533827310338481e-06, + "loss": 0.2844, + "step": 8212 + }, + { + "epoch": 0.16441208117508696, + "grad_norm": 1.184375286102295, + "learning_rate": 9.533690613783744e-06, + "loss": 0.3157, + "step": 8213 + }, + { + "epoch": 0.1644320996922153, + "grad_norm": 1.0144755840301514, + "learning_rate": 9.533553898170345e-06, + "loss": 0.3319, + "step": 8214 + }, + { + "epoch": 0.16445211820934363, + "grad_norm": 1.1394150257110596, + "learning_rate": 9.53341716349886e-06, + "loss": 0.3185, + "step": 8215 + }, + { + "epoch": 0.16447213672647198, + "grad_norm": 1.0641363859176636, + "learning_rate": 9.533280409769868e-06, + "loss": 0.3242, + "step": 8216 + }, + { + "epoch": 0.16449215524360034, + "grad_norm": 1.8761956691741943, + "learning_rate": 9.53314363698394e-06, + "loss": 0.8574, + "step": 8217 + }, + { + "epoch": 0.16451217376072869, + "grad_norm": 1.0673695802688599, + "learning_rate": 9.53300684514165e-06, + "loss": 0.3355, + "step": 8218 + }, + { + "epoch": 0.164532192277857, + "grad_norm": 1.1010583639144897, + "learning_rate": 9.532870034243576e-06, + "loss": 0.3661, + "step": 8219 + }, + { + "epoch": 0.16455221079498536, + "grad_norm": 1.224862813949585, + "learning_rate": 9.532733204290293e-06, + "loss": 0.4058, + "step": 8220 + }, + { + "epoch": 0.1645722293121137, + "grad_norm": 0.9979454278945923, + "learning_rate": 9.532596355282372e-06, + "loss": 0.3118, + "step": 8221 + }, + { + "epoch": 0.16459224782924206, + "grad_norm": 1.1924692392349243, + "learning_rate": 9.532459487220394e-06, + "loss": 0.3017, + "step": 8222 + }, + { + "epoch": 0.16461226634637038, + "grad_norm": 2.0728437900543213, + "learning_rate": 9.53232260010493e-06, + "loss": 0.8793, + "step": 8223 + }, + { + "epoch": 0.16463228486349873, + "grad_norm": 1.144993782043457, + "learning_rate": 9.532185693936559e-06, + "loss": 0.3067, + "step": 8224 + }, + { + "epoch": 0.16465230338062709, + "grad_norm": 1.0124753713607788, + "learning_rate": 9.532048768715853e-06, + "loss": 0.2702, + "step": 8225 + }, + { + "epoch": 0.16467232189775544, + "grad_norm": 1.2459220886230469, + "learning_rate": 9.53191182444339e-06, + "loss": 0.3294, + "step": 8226 + }, + { + "epoch": 0.16469234041488376, + "grad_norm": 1.1644585132598877, + "learning_rate": 9.531774861119745e-06, + "loss": 0.3188, + "step": 8227 + }, + { + "epoch": 0.1647123589320121, + "grad_norm": 1.302201271057129, + "learning_rate": 9.531637878745493e-06, + "loss": 0.2944, + "step": 8228 + }, + { + "epoch": 0.16473237744914046, + "grad_norm": 1.046473503112793, + "learning_rate": 9.53150087732121e-06, + "loss": 0.2869, + "step": 8229 + }, + { + "epoch": 0.1647523959662688, + "grad_norm": 1.1236213445663452, + "learning_rate": 9.531363856847476e-06, + "loss": 0.3104, + "step": 8230 + }, + { + "epoch": 0.16477241448339713, + "grad_norm": 1.1869909763336182, + "learning_rate": 9.53122681732486e-06, + "loss": 0.3168, + "step": 8231 + }, + { + "epoch": 0.16479243300052548, + "grad_norm": 1.1649373769760132, + "learning_rate": 9.531089758753943e-06, + "loss": 0.3766, + "step": 8232 + }, + { + "epoch": 0.16481245151765384, + "grad_norm": 1.1971354484558105, + "learning_rate": 9.5309526811353e-06, + "loss": 0.3497, + "step": 8233 + }, + { + "epoch": 0.16483247003478219, + "grad_norm": 1.2687510251998901, + "learning_rate": 9.530815584469506e-06, + "loss": 0.3159, + "step": 8234 + }, + { + "epoch": 0.1648524885519105, + "grad_norm": 1.1354371309280396, + "learning_rate": 9.53067846875714e-06, + "loss": 0.3236, + "step": 8235 + }, + { + "epoch": 0.16487250706903886, + "grad_norm": 1.1682262420654297, + "learning_rate": 9.530541333998776e-06, + "loss": 0.3579, + "step": 8236 + }, + { + "epoch": 0.1648925255861672, + "grad_norm": 1.1662179231643677, + "learning_rate": 9.53040418019499e-06, + "loss": 0.3872, + "step": 8237 + }, + { + "epoch": 0.16491254410329556, + "grad_norm": 1.1040512323379517, + "learning_rate": 9.53026700734636e-06, + "loss": 0.3326, + "step": 8238 + }, + { + "epoch": 0.16493256262042388, + "grad_norm": 1.066576361656189, + "learning_rate": 9.530129815453463e-06, + "loss": 0.3002, + "step": 8239 + }, + { + "epoch": 0.16495258113755223, + "grad_norm": 1.1948717832565308, + "learning_rate": 9.529992604516876e-06, + "loss": 0.3653, + "step": 8240 + }, + { + "epoch": 0.16497259965468059, + "grad_norm": 1.123758316040039, + "learning_rate": 9.529855374537173e-06, + "loss": 0.2713, + "step": 8241 + }, + { + "epoch": 0.16499261817180894, + "grad_norm": 1.066294550895691, + "learning_rate": 9.529718125514933e-06, + "loss": 0.3106, + "step": 8242 + }, + { + "epoch": 0.16501263668893726, + "grad_norm": 1.421852707862854, + "learning_rate": 9.529580857450736e-06, + "loss": 0.3362, + "step": 8243 + }, + { + "epoch": 0.1650326552060656, + "grad_norm": 1.881259799003601, + "learning_rate": 9.52944357034515e-06, + "loss": 0.7868, + "step": 8244 + }, + { + "epoch": 0.16505267372319396, + "grad_norm": 1.2114192247390747, + "learning_rate": 9.529306264198763e-06, + "loss": 0.3882, + "step": 8245 + }, + { + "epoch": 0.1650726922403223, + "grad_norm": 0.9292381405830383, + "learning_rate": 9.529168939012146e-06, + "loss": 0.3031, + "step": 8246 + }, + { + "epoch": 0.16509271075745063, + "grad_norm": 1.28181791305542, + "learning_rate": 9.529031594785878e-06, + "loss": 0.3685, + "step": 8247 + }, + { + "epoch": 0.16511272927457898, + "grad_norm": 1.036546230316162, + "learning_rate": 9.528894231520535e-06, + "loss": 0.3408, + "step": 8248 + }, + { + "epoch": 0.16513274779170733, + "grad_norm": 1.24800705909729, + "learning_rate": 9.528756849216695e-06, + "loss": 0.3623, + "step": 8249 + }, + { + "epoch": 0.16515276630883569, + "grad_norm": 1.7219152450561523, + "learning_rate": 9.528619447874936e-06, + "loss": 0.8351, + "step": 8250 + }, + { + "epoch": 0.165172784825964, + "grad_norm": 1.30301034450531, + "learning_rate": 9.528482027495836e-06, + "loss": 0.3009, + "step": 8251 + }, + { + "epoch": 0.16519280334309236, + "grad_norm": 2.219447374343872, + "learning_rate": 9.528344588079971e-06, + "loss": 0.8408, + "step": 8252 + }, + { + "epoch": 0.1652128218602207, + "grad_norm": 1.2677083015441895, + "learning_rate": 9.52820712962792e-06, + "loss": 0.3075, + "step": 8253 + }, + { + "epoch": 0.16523284037734906, + "grad_norm": 1.1002171039581299, + "learning_rate": 9.528069652140261e-06, + "loss": 0.3306, + "step": 8254 + }, + { + "epoch": 0.16525285889447738, + "grad_norm": 1.0596635341644287, + "learning_rate": 9.527932155617572e-06, + "loss": 0.3104, + "step": 8255 + }, + { + "epoch": 0.16527287741160573, + "grad_norm": 1.0797395706176758, + "learning_rate": 9.527794640060432e-06, + "loss": 0.3423, + "step": 8256 + }, + { + "epoch": 0.16529289592873408, + "grad_norm": 1.1030298471450806, + "learning_rate": 9.527657105469416e-06, + "loss": 0.3014, + "step": 8257 + }, + { + "epoch": 0.16531291444586244, + "grad_norm": 1.1881788969039917, + "learning_rate": 9.527519551845104e-06, + "loss": 0.3232, + "step": 8258 + }, + { + "epoch": 0.16533293296299076, + "grad_norm": 1.060510277748108, + "learning_rate": 9.527381979188074e-06, + "loss": 0.3489, + "step": 8259 + }, + { + "epoch": 0.1653529514801191, + "grad_norm": 1.0577375888824463, + "learning_rate": 9.527244387498904e-06, + "loss": 0.3137, + "step": 8260 + }, + { + "epoch": 0.16537296999724746, + "grad_norm": 1.389503002166748, + "learning_rate": 9.527106776778175e-06, + "loss": 0.3503, + "step": 8261 + }, + { + "epoch": 0.1653929885143758, + "grad_norm": 1.131162166595459, + "learning_rate": 9.526969147026462e-06, + "loss": 0.2764, + "step": 8262 + }, + { + "epoch": 0.16541300703150413, + "grad_norm": 1.1674983501434326, + "learning_rate": 9.526831498244344e-06, + "loss": 0.3305, + "step": 8263 + }, + { + "epoch": 0.16543302554863248, + "grad_norm": 1.063100814819336, + "learning_rate": 9.526693830432401e-06, + "loss": 0.3535, + "step": 8264 + }, + { + "epoch": 0.16545304406576083, + "grad_norm": 1.0130208730697632, + "learning_rate": 9.526556143591213e-06, + "loss": 0.3348, + "step": 8265 + }, + { + "epoch": 0.16547306258288919, + "grad_norm": 1.1658108234405518, + "learning_rate": 9.526418437721353e-06, + "loss": 0.3888, + "step": 8266 + }, + { + "epoch": 0.1654930811000175, + "grad_norm": 1.1685863733291626, + "learning_rate": 9.526280712823408e-06, + "loss": 0.3291, + "step": 8267 + }, + { + "epoch": 0.16551309961714586, + "grad_norm": 1.1879746913909912, + "learning_rate": 9.526142968897952e-06, + "loss": 0.3356, + "step": 8268 + }, + { + "epoch": 0.1655331181342742, + "grad_norm": 1.0701496601104736, + "learning_rate": 9.526005205945565e-06, + "loss": 0.3135, + "step": 8269 + }, + { + "epoch": 0.16555313665140256, + "grad_norm": 1.235715627670288, + "learning_rate": 9.525867423966825e-06, + "loss": 0.3433, + "step": 8270 + }, + { + "epoch": 0.16557315516853088, + "grad_norm": 1.060712218284607, + "learning_rate": 9.525729622962314e-06, + "loss": 0.3584, + "step": 8271 + }, + { + "epoch": 0.16559317368565923, + "grad_norm": 1.0648900270462036, + "learning_rate": 9.525591802932608e-06, + "loss": 0.3361, + "step": 8272 + }, + { + "epoch": 0.16561319220278758, + "grad_norm": 1.2776713371276855, + "learning_rate": 9.52545396387829e-06, + "loss": 0.346, + "step": 8273 + }, + { + "epoch": 0.16563321071991594, + "grad_norm": 1.0780998468399048, + "learning_rate": 9.525316105799935e-06, + "loss": 0.3412, + "step": 8274 + }, + { + "epoch": 0.16565322923704426, + "grad_norm": 1.1099398136138916, + "learning_rate": 9.525178228698126e-06, + "loss": 0.3409, + "step": 8275 + }, + { + "epoch": 0.1656732477541726, + "grad_norm": 1.1378346681594849, + "learning_rate": 9.525040332573443e-06, + "loss": 0.3731, + "step": 8276 + }, + { + "epoch": 0.16569326627130096, + "grad_norm": 1.7256892919540405, + "learning_rate": 9.524902417426462e-06, + "loss": 0.9181, + "step": 8277 + }, + { + "epoch": 0.1657132847884293, + "grad_norm": 1.0689839124679565, + "learning_rate": 9.524764483257767e-06, + "loss": 0.334, + "step": 8278 + }, + { + "epoch": 0.16573330330555763, + "grad_norm": 1.232414722442627, + "learning_rate": 9.524626530067934e-06, + "loss": 0.3572, + "step": 8279 + }, + { + "epoch": 0.16575332182268598, + "grad_norm": 1.078688144683838, + "learning_rate": 9.524488557857545e-06, + "loss": 0.3078, + "step": 8280 + }, + { + "epoch": 0.16577334033981433, + "grad_norm": 1.378403902053833, + "learning_rate": 9.52435056662718e-06, + "loss": 0.3271, + "step": 8281 + }, + { + "epoch": 0.16579335885694269, + "grad_norm": 1.0891999006271362, + "learning_rate": 9.52421255637742e-06, + "loss": 0.3658, + "step": 8282 + }, + { + "epoch": 0.165813377374071, + "grad_norm": 1.1094791889190674, + "learning_rate": 9.524074527108843e-06, + "loss": 0.3204, + "step": 8283 + }, + { + "epoch": 0.16583339589119936, + "grad_norm": 1.1187539100646973, + "learning_rate": 9.523936478822032e-06, + "loss": 0.3498, + "step": 8284 + }, + { + "epoch": 0.1658534144083277, + "grad_norm": 1.1509798765182495, + "learning_rate": 9.523798411517564e-06, + "loss": 0.3589, + "step": 8285 + }, + { + "epoch": 0.16587343292545606, + "grad_norm": 1.1187357902526855, + "learning_rate": 9.52366032519602e-06, + "loss": 0.3579, + "step": 8286 + }, + { + "epoch": 0.16589345144258438, + "grad_norm": 1.103869080543518, + "learning_rate": 9.523522219857984e-06, + "loss": 0.3069, + "step": 8287 + }, + { + "epoch": 0.16591346995971273, + "grad_norm": 1.0834050178527832, + "learning_rate": 9.523384095504031e-06, + "loss": 0.3311, + "step": 8288 + }, + { + "epoch": 0.16593348847684108, + "grad_norm": 0.9583265781402588, + "learning_rate": 9.523245952134747e-06, + "loss": 0.2791, + "step": 8289 + }, + { + "epoch": 0.16595350699396944, + "grad_norm": 1.2431739568710327, + "learning_rate": 9.523107789750709e-06, + "loss": 0.3632, + "step": 8290 + }, + { + "epoch": 0.16597352551109776, + "grad_norm": 1.05329430103302, + "learning_rate": 9.522969608352502e-06, + "loss": 0.2942, + "step": 8291 + }, + { + "epoch": 0.1659935440282261, + "grad_norm": 1.8379484415054321, + "learning_rate": 9.522831407940701e-06, + "loss": 0.7925, + "step": 8292 + }, + { + "epoch": 0.16601356254535446, + "grad_norm": 1.3313345909118652, + "learning_rate": 9.522693188515892e-06, + "loss": 0.3348, + "step": 8293 + }, + { + "epoch": 0.1660335810624828, + "grad_norm": 1.1437339782714844, + "learning_rate": 9.522554950078652e-06, + "loss": 0.3765, + "step": 8294 + }, + { + "epoch": 0.16605359957961113, + "grad_norm": 1.1556490659713745, + "learning_rate": 9.522416692629567e-06, + "loss": 0.3242, + "step": 8295 + }, + { + "epoch": 0.16607361809673948, + "grad_norm": 1.074668049812317, + "learning_rate": 9.522278416169213e-06, + "loss": 0.3728, + "step": 8296 + }, + { + "epoch": 0.16609363661386783, + "grad_norm": 1.0475324392318726, + "learning_rate": 9.522140120698176e-06, + "loss": 0.2943, + "step": 8297 + }, + { + "epoch": 0.16611365513099619, + "grad_norm": 1.1385502815246582, + "learning_rate": 9.522001806217034e-06, + "loss": 0.3463, + "step": 8298 + }, + { + "epoch": 0.1661336736481245, + "grad_norm": 1.0557180643081665, + "learning_rate": 9.521863472726369e-06, + "loss": 0.3427, + "step": 8299 + }, + { + "epoch": 0.16615369216525286, + "grad_norm": 2.158945322036743, + "learning_rate": 9.521725120226763e-06, + "loss": 0.8126, + "step": 8300 + }, + { + "epoch": 0.1661737106823812, + "grad_norm": 1.0595115423202515, + "learning_rate": 9.5215867487188e-06, + "loss": 0.3279, + "step": 8301 + }, + { + "epoch": 0.16619372919950956, + "grad_norm": 1.087256669998169, + "learning_rate": 9.521448358203057e-06, + "loss": 0.3191, + "step": 8302 + }, + { + "epoch": 0.16621374771663788, + "grad_norm": 1.1975311040878296, + "learning_rate": 9.521309948680119e-06, + "loss": 0.2924, + "step": 8303 + }, + { + "epoch": 0.16623376623376623, + "grad_norm": 1.076625943183899, + "learning_rate": 9.521171520150566e-06, + "loss": 0.3128, + "step": 8304 + }, + { + "epoch": 0.16625378475089458, + "grad_norm": 1.2082488536834717, + "learning_rate": 9.521033072614982e-06, + "loss": 0.3142, + "step": 8305 + }, + { + "epoch": 0.16627380326802293, + "grad_norm": 2.0221338272094727, + "learning_rate": 9.520894606073947e-06, + "loss": 0.8207, + "step": 8306 + }, + { + "epoch": 0.16629382178515126, + "grad_norm": 0.9763476848602295, + "learning_rate": 9.520756120528045e-06, + "loss": 0.2737, + "step": 8307 + }, + { + "epoch": 0.1663138403022796, + "grad_norm": 1.1398835182189941, + "learning_rate": 9.520617615977856e-06, + "loss": 0.3506, + "step": 8308 + }, + { + "epoch": 0.16633385881940796, + "grad_norm": 1.0597280263900757, + "learning_rate": 9.520479092423965e-06, + "loss": 0.3412, + "step": 8309 + }, + { + "epoch": 0.1663538773365363, + "grad_norm": 1.2271000146865845, + "learning_rate": 9.52034054986695e-06, + "loss": 0.3486, + "step": 8310 + }, + { + "epoch": 0.16637389585366463, + "grad_norm": 1.1755107641220093, + "learning_rate": 9.5202019883074e-06, + "loss": 0.3424, + "step": 8311 + }, + { + "epoch": 0.16639391437079298, + "grad_norm": 1.0518913269042969, + "learning_rate": 9.52006340774589e-06, + "loss": 0.306, + "step": 8312 + }, + { + "epoch": 0.16641393288792133, + "grad_norm": 1.818703055381775, + "learning_rate": 9.51992480818301e-06, + "loss": 0.8147, + "step": 8313 + }, + { + "epoch": 0.16643395140504966, + "grad_norm": 1.2751846313476562, + "learning_rate": 9.519786189619335e-06, + "loss": 0.2774, + "step": 8314 + }, + { + "epoch": 0.166453969922178, + "grad_norm": 1.1570371389389038, + "learning_rate": 9.519647552055453e-06, + "loss": 0.3223, + "step": 8315 + }, + { + "epoch": 0.16647398843930636, + "grad_norm": 2.110852003097534, + "learning_rate": 9.519508895491946e-06, + "loss": 0.7907, + "step": 8316 + }, + { + "epoch": 0.1664940069564347, + "grad_norm": 1.1231752634048462, + "learning_rate": 9.519370219929396e-06, + "loss": 0.2954, + "step": 8317 + }, + { + "epoch": 0.16651402547356303, + "grad_norm": 1.1353230476379395, + "learning_rate": 9.519231525368384e-06, + "loss": 0.3132, + "step": 8318 + }, + { + "epoch": 0.16653404399069138, + "grad_norm": 1.125016450881958, + "learning_rate": 9.519092811809499e-06, + "loss": 0.3699, + "step": 8319 + }, + { + "epoch": 0.16655406250781973, + "grad_norm": 1.0754576921463013, + "learning_rate": 9.518954079253318e-06, + "loss": 0.3555, + "step": 8320 + }, + { + "epoch": 0.16657408102494808, + "grad_norm": 1.1762851476669312, + "learning_rate": 9.518815327700426e-06, + "loss": 0.3545, + "step": 8321 + }, + { + "epoch": 0.1665940995420764, + "grad_norm": 1.0607430934906006, + "learning_rate": 9.518676557151407e-06, + "loss": 0.2987, + "step": 8322 + }, + { + "epoch": 0.16661411805920476, + "grad_norm": 1.0423929691314697, + "learning_rate": 9.518537767606846e-06, + "loss": 0.3601, + "step": 8323 + }, + { + "epoch": 0.1666341365763331, + "grad_norm": 2.118760824203491, + "learning_rate": 9.518398959067323e-06, + "loss": 0.8472, + "step": 8324 + }, + { + "epoch": 0.16665415509346146, + "grad_norm": 1.0797803401947021, + "learning_rate": 9.518260131533423e-06, + "loss": 0.3142, + "step": 8325 + }, + { + "epoch": 0.16667417361058978, + "grad_norm": 1.0800046920776367, + "learning_rate": 9.51812128500573e-06, + "loss": 0.3127, + "step": 8326 + }, + { + "epoch": 0.16669419212771813, + "grad_norm": 1.1901248693466187, + "learning_rate": 9.517982419484827e-06, + "loss": 0.3205, + "step": 8327 + }, + { + "epoch": 0.16671421064484648, + "grad_norm": 1.0562045574188232, + "learning_rate": 9.5178435349713e-06, + "loss": 0.3255, + "step": 8328 + }, + { + "epoch": 0.16673422916197483, + "grad_norm": 1.0827531814575195, + "learning_rate": 9.51770463146573e-06, + "loss": 0.3662, + "step": 8329 + }, + { + "epoch": 0.16675424767910316, + "grad_norm": 1.329858660697937, + "learning_rate": 9.517565708968702e-06, + "loss": 0.3721, + "step": 8330 + }, + { + "epoch": 0.1667742661962315, + "grad_norm": 1.3332351446151733, + "learning_rate": 9.5174267674808e-06, + "loss": 0.3415, + "step": 8331 + }, + { + "epoch": 0.16679428471335986, + "grad_norm": 1.1311787366867065, + "learning_rate": 9.517287807002608e-06, + "loss": 0.2983, + "step": 8332 + }, + { + "epoch": 0.1668143032304882, + "grad_norm": 1.1298388242721558, + "learning_rate": 9.51714882753471e-06, + "loss": 0.3566, + "step": 8333 + }, + { + "epoch": 0.16683432174761653, + "grad_norm": 1.042155146598816, + "learning_rate": 9.51700982907769e-06, + "loss": 0.3282, + "step": 8334 + }, + { + "epoch": 0.16685434026474488, + "grad_norm": 1.0948857069015503, + "learning_rate": 9.516870811632133e-06, + "loss": 0.3154, + "step": 8335 + }, + { + "epoch": 0.16687435878187323, + "grad_norm": 1.2951419353485107, + "learning_rate": 9.516731775198624e-06, + "loss": 0.3307, + "step": 8336 + }, + { + "epoch": 0.16689437729900158, + "grad_norm": 1.1045098304748535, + "learning_rate": 9.516592719777746e-06, + "loss": 0.331, + "step": 8337 + }, + { + "epoch": 0.1669143958161299, + "grad_norm": 1.120322346687317, + "learning_rate": 9.516453645370085e-06, + "loss": 0.2523, + "step": 8338 + }, + { + "epoch": 0.16693441433325826, + "grad_norm": 1.2019339799880981, + "learning_rate": 9.516314551976224e-06, + "loss": 0.3328, + "step": 8339 + }, + { + "epoch": 0.1669544328503866, + "grad_norm": 1.160683512687683, + "learning_rate": 9.516175439596749e-06, + "loss": 0.3314, + "step": 8340 + }, + { + "epoch": 0.16697445136751496, + "grad_norm": 2.0969040393829346, + "learning_rate": 9.516036308232244e-06, + "loss": 0.8511, + "step": 8341 + }, + { + "epoch": 0.16699446988464328, + "grad_norm": 1.120076060295105, + "learning_rate": 9.515897157883294e-06, + "loss": 0.3375, + "step": 8342 + }, + { + "epoch": 0.16701448840177163, + "grad_norm": 1.0796003341674805, + "learning_rate": 9.515757988550484e-06, + "loss": 0.3066, + "step": 8343 + }, + { + "epoch": 0.16703450691889998, + "grad_norm": 1.222031593322754, + "learning_rate": 9.515618800234399e-06, + "loss": 0.3767, + "step": 8344 + }, + { + "epoch": 0.16705452543602833, + "grad_norm": 1.2965574264526367, + "learning_rate": 9.515479592935626e-06, + "loss": 0.3462, + "step": 8345 + }, + { + "epoch": 0.16707454395315666, + "grad_norm": 1.1295697689056396, + "learning_rate": 9.515340366654747e-06, + "loss": 0.3649, + "step": 8346 + }, + { + "epoch": 0.167094562470285, + "grad_norm": 1.0570358037948608, + "learning_rate": 9.515201121392348e-06, + "loss": 0.2981, + "step": 8347 + }, + { + "epoch": 0.16711458098741336, + "grad_norm": 1.1332051753997803, + "learning_rate": 9.515061857149017e-06, + "loss": 0.3195, + "step": 8348 + }, + { + "epoch": 0.1671345995045417, + "grad_norm": 1.1152163743972778, + "learning_rate": 9.514922573925338e-06, + "loss": 0.3628, + "step": 8349 + }, + { + "epoch": 0.16715461802167003, + "grad_norm": 0.9771119952201843, + "learning_rate": 9.514783271721894e-06, + "loss": 0.3327, + "step": 8350 + }, + { + "epoch": 0.16717463653879838, + "grad_norm": 1.9115147590637207, + "learning_rate": 9.514643950539275e-06, + "loss": 0.8171, + "step": 8351 + }, + { + "epoch": 0.16719465505592673, + "grad_norm": 1.1334595680236816, + "learning_rate": 9.514504610378064e-06, + "loss": 0.3603, + "step": 8352 + }, + { + "epoch": 0.16721467357305508, + "grad_norm": 1.1488004922866821, + "learning_rate": 9.514365251238845e-06, + "loss": 0.3503, + "step": 8353 + }, + { + "epoch": 0.1672346920901834, + "grad_norm": 1.1674517393112183, + "learning_rate": 9.514225873122207e-06, + "loss": 0.2714, + "step": 8354 + }, + { + "epoch": 0.16725471060731176, + "grad_norm": 1.2123230695724487, + "learning_rate": 9.514086476028736e-06, + "loss": 0.3166, + "step": 8355 + }, + { + "epoch": 0.1672747291244401, + "grad_norm": 1.1872519254684448, + "learning_rate": 9.513947059959017e-06, + "loss": 0.3771, + "step": 8356 + }, + { + "epoch": 0.16729474764156846, + "grad_norm": 1.6457427740097046, + "learning_rate": 9.513807624913635e-06, + "loss": 0.3361, + "step": 8357 + }, + { + "epoch": 0.16731476615869678, + "grad_norm": 1.130401849746704, + "learning_rate": 9.513668170893177e-06, + "loss": 0.3429, + "step": 8358 + }, + { + "epoch": 0.16733478467582513, + "grad_norm": 1.2042330503463745, + "learning_rate": 9.51352869789823e-06, + "loss": 0.3214, + "step": 8359 + }, + { + "epoch": 0.16735480319295348, + "grad_norm": 1.2001166343688965, + "learning_rate": 9.513389205929379e-06, + "loss": 0.3755, + "step": 8360 + }, + { + "epoch": 0.16737482171008183, + "grad_norm": 1.6959993839263916, + "learning_rate": 9.513249694987211e-06, + "loss": 0.8418, + "step": 8361 + }, + { + "epoch": 0.16739484022721016, + "grad_norm": 1.0522857904434204, + "learning_rate": 9.513110165072313e-06, + "loss": 0.3557, + "step": 8362 + }, + { + "epoch": 0.1674148587443385, + "grad_norm": 1.346592903137207, + "learning_rate": 9.512970616185271e-06, + "loss": 0.3628, + "step": 8363 + }, + { + "epoch": 0.16743487726146686, + "grad_norm": 1.1028270721435547, + "learning_rate": 9.512831048326672e-06, + "loss": 0.3273, + "step": 8364 + }, + { + "epoch": 0.1674548957785952, + "grad_norm": 1.1458550691604614, + "learning_rate": 9.512691461497102e-06, + "loss": 0.3328, + "step": 8365 + }, + { + "epoch": 0.16747491429572353, + "grad_norm": 1.2110978364944458, + "learning_rate": 9.51255185569715e-06, + "loss": 0.3577, + "step": 8366 + }, + { + "epoch": 0.16749493281285188, + "grad_norm": 1.8370283842086792, + "learning_rate": 9.512412230927399e-06, + "loss": 0.7928, + "step": 8367 + }, + { + "epoch": 0.16751495132998023, + "grad_norm": 1.128896951675415, + "learning_rate": 9.512272587188439e-06, + "loss": 0.3539, + "step": 8368 + }, + { + "epoch": 0.16753496984710858, + "grad_norm": 1.2031140327453613, + "learning_rate": 9.512132924480855e-06, + "loss": 0.3433, + "step": 8369 + }, + { + "epoch": 0.1675549883642369, + "grad_norm": 1.146621584892273, + "learning_rate": 9.511993242805236e-06, + "loss": 0.3299, + "step": 8370 + }, + { + "epoch": 0.16757500688136526, + "grad_norm": 1.0344494581222534, + "learning_rate": 9.511853542162168e-06, + "loss": 0.2936, + "step": 8371 + }, + { + "epoch": 0.1675950253984936, + "grad_norm": 1.2064526081085205, + "learning_rate": 9.51171382255224e-06, + "loss": 0.3617, + "step": 8372 + }, + { + "epoch": 0.16761504391562196, + "grad_norm": 1.413295865058899, + "learning_rate": 9.511574083976037e-06, + "loss": 0.3126, + "step": 8373 + }, + { + "epoch": 0.16763506243275028, + "grad_norm": 1.1499042510986328, + "learning_rate": 9.511434326434146e-06, + "loss": 0.4015, + "step": 8374 + }, + { + "epoch": 0.16765508094987863, + "grad_norm": 1.0685120820999146, + "learning_rate": 9.511294549927158e-06, + "loss": 0.2942, + "step": 8375 + }, + { + "epoch": 0.16767509946700698, + "grad_norm": 1.0770466327667236, + "learning_rate": 9.511154754455658e-06, + "loss": 0.3439, + "step": 8376 + }, + { + "epoch": 0.16769511798413533, + "grad_norm": 1.0904039144515991, + "learning_rate": 9.511014940020234e-06, + "loss": 0.3114, + "step": 8377 + }, + { + "epoch": 0.16771513650126366, + "grad_norm": 1.1114219427108765, + "learning_rate": 9.510875106621474e-06, + "loss": 0.3645, + "step": 8378 + }, + { + "epoch": 0.167735155018392, + "grad_norm": 1.2755153179168701, + "learning_rate": 9.510735254259966e-06, + "loss": 0.4074, + "step": 8379 + }, + { + "epoch": 0.16775517353552036, + "grad_norm": 1.1477409601211548, + "learning_rate": 9.510595382936299e-06, + "loss": 0.3693, + "step": 8380 + }, + { + "epoch": 0.1677751920526487, + "grad_norm": 1.9589842557907104, + "learning_rate": 9.510455492651057e-06, + "loss": 0.8145, + "step": 8381 + }, + { + "epoch": 0.16779521056977703, + "grad_norm": 1.0548619031906128, + "learning_rate": 9.510315583404832e-06, + "loss": 0.3587, + "step": 8382 + }, + { + "epoch": 0.16781522908690538, + "grad_norm": 1.1370320320129395, + "learning_rate": 9.510175655198212e-06, + "loss": 0.3749, + "step": 8383 + }, + { + "epoch": 0.16783524760403373, + "grad_norm": 1.2136025428771973, + "learning_rate": 9.510035708031782e-06, + "loss": 0.3618, + "step": 8384 + }, + { + "epoch": 0.16785526612116208, + "grad_norm": 1.1315698623657227, + "learning_rate": 9.509895741906135e-06, + "loss": 0.3348, + "step": 8385 + }, + { + "epoch": 0.1678752846382904, + "grad_norm": 1.0280333757400513, + "learning_rate": 9.509755756821855e-06, + "loss": 0.3495, + "step": 8386 + }, + { + "epoch": 0.16789530315541876, + "grad_norm": 1.136345624923706, + "learning_rate": 9.509615752779533e-06, + "loss": 0.3262, + "step": 8387 + }, + { + "epoch": 0.1679153216725471, + "grad_norm": 1.3241788148880005, + "learning_rate": 9.509475729779756e-06, + "loss": 0.3096, + "step": 8388 + }, + { + "epoch": 0.16793534018967546, + "grad_norm": 1.3095781803131104, + "learning_rate": 9.509335687823115e-06, + "loss": 0.3294, + "step": 8389 + }, + { + "epoch": 0.16795535870680378, + "grad_norm": 1.369828462600708, + "learning_rate": 9.509195626910197e-06, + "loss": 0.3693, + "step": 8390 + }, + { + "epoch": 0.16797537722393213, + "grad_norm": 1.3664065599441528, + "learning_rate": 9.509055547041591e-06, + "loss": 0.3844, + "step": 8391 + }, + { + "epoch": 0.16799539574106048, + "grad_norm": 1.134155035018921, + "learning_rate": 9.508915448217883e-06, + "loss": 0.3322, + "step": 8392 + }, + { + "epoch": 0.16801541425818883, + "grad_norm": 1.9895085096359253, + "learning_rate": 9.508775330439668e-06, + "loss": 0.8709, + "step": 8393 + }, + { + "epoch": 0.16803543277531716, + "grad_norm": 1.0793412923812866, + "learning_rate": 9.50863519370753e-06, + "loss": 0.283, + "step": 8394 + }, + { + "epoch": 0.1680554512924455, + "grad_norm": 1.1732096672058105, + "learning_rate": 9.50849503802206e-06, + "loss": 0.3089, + "step": 8395 + }, + { + "epoch": 0.16807546980957386, + "grad_norm": 1.0249980688095093, + "learning_rate": 9.508354863383849e-06, + "loss": 0.2929, + "step": 8396 + }, + { + "epoch": 0.1680954883267022, + "grad_norm": 1.1306602954864502, + "learning_rate": 9.508214669793483e-06, + "loss": 0.3314, + "step": 8397 + }, + { + "epoch": 0.16811550684383053, + "grad_norm": 1.0452077388763428, + "learning_rate": 9.508074457251553e-06, + "loss": 0.3222, + "step": 8398 + }, + { + "epoch": 0.16813552536095888, + "grad_norm": 1.035905122756958, + "learning_rate": 9.507934225758648e-06, + "loss": 0.3174, + "step": 8399 + }, + { + "epoch": 0.16815554387808723, + "grad_norm": 1.107016921043396, + "learning_rate": 9.507793975315359e-06, + "loss": 0.3563, + "step": 8400 + }, + { + "epoch": 0.16817556239521558, + "grad_norm": 1.1419651508331299, + "learning_rate": 9.507653705922271e-06, + "loss": 0.3382, + "step": 8401 + }, + { + "epoch": 0.1681955809123439, + "grad_norm": 1.1884617805480957, + "learning_rate": 9.50751341757998e-06, + "loss": 0.2791, + "step": 8402 + }, + { + "epoch": 0.16821559942947226, + "grad_norm": 1.3113220930099487, + "learning_rate": 9.507373110289071e-06, + "loss": 0.2993, + "step": 8403 + }, + { + "epoch": 0.1682356179466006, + "grad_norm": 1.1869637966156006, + "learning_rate": 9.507232784050136e-06, + "loss": 0.3679, + "step": 8404 + }, + { + "epoch": 0.16825563646372896, + "grad_norm": 1.0480560064315796, + "learning_rate": 9.507092438863763e-06, + "loss": 0.3193, + "step": 8405 + }, + { + "epoch": 0.16827565498085728, + "grad_norm": 1.0752075910568237, + "learning_rate": 9.506952074730545e-06, + "loss": 0.283, + "step": 8406 + }, + { + "epoch": 0.16829567349798563, + "grad_norm": 1.064512848854065, + "learning_rate": 9.50681169165107e-06, + "loss": 0.2993, + "step": 8407 + }, + { + "epoch": 0.16831569201511398, + "grad_norm": 1.8533991575241089, + "learning_rate": 9.506671289625929e-06, + "loss": 0.8937, + "step": 8408 + }, + { + "epoch": 0.16833571053224233, + "grad_norm": 1.2127654552459717, + "learning_rate": 9.50653086865571e-06, + "loss": 0.3616, + "step": 8409 + }, + { + "epoch": 0.16835572904937066, + "grad_norm": 1.088039517402649, + "learning_rate": 9.506390428741005e-06, + "loss": 0.3498, + "step": 8410 + }, + { + "epoch": 0.168375747566499, + "grad_norm": 1.094178557395935, + "learning_rate": 9.506249969882408e-06, + "loss": 0.3212, + "step": 8411 + }, + { + "epoch": 0.16839576608362736, + "grad_norm": 1.093164324760437, + "learning_rate": 9.506109492080502e-06, + "loss": 0.297, + "step": 8412 + }, + { + "epoch": 0.1684157846007557, + "grad_norm": 1.07393479347229, + "learning_rate": 9.505968995335883e-06, + "loss": 0.3363, + "step": 8413 + }, + { + "epoch": 0.16843580311788403, + "grad_norm": 1.1681597232818604, + "learning_rate": 9.50582847964914e-06, + "loss": 0.319, + "step": 8414 + }, + { + "epoch": 0.16845582163501238, + "grad_norm": 1.2888258695602417, + "learning_rate": 9.505687945020863e-06, + "loss": 0.3533, + "step": 8415 + }, + { + "epoch": 0.16847584015214073, + "grad_norm": 1.0616716146469116, + "learning_rate": 9.505547391451644e-06, + "loss": 0.2951, + "step": 8416 + }, + { + "epoch": 0.16849585866926908, + "grad_norm": 1.080459475517273, + "learning_rate": 9.505406818942074e-06, + "loss": 0.3211, + "step": 8417 + }, + { + "epoch": 0.1685158771863974, + "grad_norm": 1.1705328226089478, + "learning_rate": 9.505266227492743e-06, + "loss": 0.319, + "step": 8418 + }, + { + "epoch": 0.16853589570352576, + "grad_norm": 1.2346818447113037, + "learning_rate": 9.505125617104243e-06, + "loss": 0.3549, + "step": 8419 + }, + { + "epoch": 0.1685559142206541, + "grad_norm": 1.1103193759918213, + "learning_rate": 9.504984987777164e-06, + "loss": 0.3753, + "step": 8420 + }, + { + "epoch": 0.16857593273778246, + "grad_norm": 1.1953319311141968, + "learning_rate": 9.504844339512096e-06, + "loss": 0.4211, + "step": 8421 + }, + { + "epoch": 0.16859595125491078, + "grad_norm": 1.073824167251587, + "learning_rate": 9.504703672309634e-06, + "loss": 0.3222, + "step": 8422 + }, + { + "epoch": 0.16861596977203913, + "grad_norm": 1.178676962852478, + "learning_rate": 9.504562986170365e-06, + "loss": 0.3368, + "step": 8423 + }, + { + "epoch": 0.16863598828916748, + "grad_norm": 1.1316404342651367, + "learning_rate": 9.504422281094885e-06, + "loss": 0.3484, + "step": 8424 + }, + { + "epoch": 0.16865600680629583, + "grad_norm": 1.036855936050415, + "learning_rate": 9.50428155708378e-06, + "loss": 0.336, + "step": 8425 + }, + { + "epoch": 0.16867602532342416, + "grad_norm": 1.0096830129623413, + "learning_rate": 9.504140814137647e-06, + "loss": 0.3078, + "step": 8426 + }, + { + "epoch": 0.1686960438405525, + "grad_norm": 1.1118804216384888, + "learning_rate": 9.504000052257077e-06, + "loss": 0.2914, + "step": 8427 + }, + { + "epoch": 0.16871606235768086, + "grad_norm": 1.140454888343811, + "learning_rate": 9.503859271442657e-06, + "loss": 0.3013, + "step": 8428 + }, + { + "epoch": 0.1687360808748092, + "grad_norm": 1.8759689331054688, + "learning_rate": 9.503718471694982e-06, + "loss": 0.8996, + "step": 8429 + }, + { + "epoch": 0.16875609939193753, + "grad_norm": 1.1043587923049927, + "learning_rate": 9.503577653014647e-06, + "loss": 0.3497, + "step": 8430 + }, + { + "epoch": 0.16877611790906588, + "grad_norm": 1.1239042282104492, + "learning_rate": 9.503436815402236e-06, + "loss": 0.3249, + "step": 8431 + }, + { + "epoch": 0.16879613642619423, + "grad_norm": 1.1303930282592773, + "learning_rate": 9.50329595885835e-06, + "loss": 0.2849, + "step": 8432 + }, + { + "epoch": 0.16881615494332258, + "grad_norm": 1.141845703125, + "learning_rate": 9.503155083383573e-06, + "loss": 0.3692, + "step": 8433 + }, + { + "epoch": 0.1688361734604509, + "grad_norm": 1.0767877101898193, + "learning_rate": 9.503014188978504e-06, + "loss": 0.3058, + "step": 8434 + }, + { + "epoch": 0.16885619197757926, + "grad_norm": 1.0147699117660522, + "learning_rate": 9.502873275643731e-06, + "loss": 0.2874, + "step": 8435 + }, + { + "epoch": 0.1688762104947076, + "grad_norm": 1.094010591506958, + "learning_rate": 9.502732343379847e-06, + "loss": 0.3399, + "step": 8436 + }, + { + "epoch": 0.16889622901183596, + "grad_norm": 1.0104718208312988, + "learning_rate": 9.502591392187449e-06, + "loss": 0.3164, + "step": 8437 + }, + { + "epoch": 0.16891624752896428, + "grad_norm": 1.1268582344055176, + "learning_rate": 9.502450422067121e-06, + "loss": 0.3741, + "step": 8438 + }, + { + "epoch": 0.16893626604609263, + "grad_norm": 1.0777573585510254, + "learning_rate": 9.502309433019463e-06, + "loss": 0.3187, + "step": 8439 + }, + { + "epoch": 0.16895628456322098, + "grad_norm": 1.0384341478347778, + "learning_rate": 9.502168425045066e-06, + "loss": 0.2905, + "step": 8440 + }, + { + "epoch": 0.16897630308034933, + "grad_norm": 1.0296344757080078, + "learning_rate": 9.50202739814452e-06, + "loss": 0.337, + "step": 8441 + }, + { + "epoch": 0.16899632159747766, + "grad_norm": 1.1538969278335571, + "learning_rate": 9.501886352318421e-06, + "loss": 0.3251, + "step": 8442 + }, + { + "epoch": 0.169016340114606, + "grad_norm": 1.0495795011520386, + "learning_rate": 9.50174528756736e-06, + "loss": 0.3237, + "step": 8443 + }, + { + "epoch": 0.16903635863173436, + "grad_norm": 1.1560860872268677, + "learning_rate": 9.50160420389193e-06, + "loss": 0.3289, + "step": 8444 + }, + { + "epoch": 0.1690563771488627, + "grad_norm": 1.3162939548492432, + "learning_rate": 9.501463101292726e-06, + "loss": 0.3581, + "step": 8445 + }, + { + "epoch": 0.16907639566599103, + "grad_norm": 1.3058024644851685, + "learning_rate": 9.501321979770338e-06, + "loss": 0.3502, + "step": 8446 + }, + { + "epoch": 0.16909641418311938, + "grad_norm": 1.114237904548645, + "learning_rate": 9.501180839325361e-06, + "loss": 0.3171, + "step": 8447 + }, + { + "epoch": 0.16911643270024773, + "grad_norm": 1.1437147855758667, + "learning_rate": 9.50103967995839e-06, + "loss": 0.3172, + "step": 8448 + }, + { + "epoch": 0.16913645121737608, + "grad_norm": 1.0671297311782837, + "learning_rate": 9.500898501670017e-06, + "loss": 0.3158, + "step": 8449 + }, + { + "epoch": 0.1691564697345044, + "grad_norm": 1.135785698890686, + "learning_rate": 9.500757304460834e-06, + "loss": 0.3453, + "step": 8450 + }, + { + "epoch": 0.16917648825163276, + "grad_norm": 1.2546110153198242, + "learning_rate": 9.500616088331436e-06, + "loss": 0.3972, + "step": 8451 + }, + { + "epoch": 0.1691965067687611, + "grad_norm": 1.1590967178344727, + "learning_rate": 9.500474853282418e-06, + "loss": 0.3293, + "step": 8452 + }, + { + "epoch": 0.16921652528588946, + "grad_norm": 1.119272232055664, + "learning_rate": 9.50033359931437e-06, + "loss": 0.3212, + "step": 8453 + }, + { + "epoch": 0.16923654380301778, + "grad_norm": 1.3249397277832031, + "learning_rate": 9.500192326427888e-06, + "loss": 0.3024, + "step": 8454 + }, + { + "epoch": 0.16925656232014613, + "grad_norm": 1.1589324474334717, + "learning_rate": 9.500051034623568e-06, + "loss": 0.3325, + "step": 8455 + }, + { + "epoch": 0.16927658083727448, + "grad_norm": 1.0983405113220215, + "learning_rate": 9.499909723902e-06, + "loss": 0.3598, + "step": 8456 + }, + { + "epoch": 0.16929659935440283, + "grad_norm": 1.2297333478927612, + "learning_rate": 9.499768394263782e-06, + "loss": 0.3725, + "step": 8457 + }, + { + "epoch": 0.16931661787153116, + "grad_norm": 1.0653759241104126, + "learning_rate": 9.499627045709505e-06, + "loss": 0.3082, + "step": 8458 + }, + { + "epoch": 0.1693366363886595, + "grad_norm": 1.192030429840088, + "learning_rate": 9.499485678239764e-06, + "loss": 0.3356, + "step": 8459 + }, + { + "epoch": 0.16935665490578786, + "grad_norm": 1.1512292623519897, + "learning_rate": 9.499344291855154e-06, + "loss": 0.3284, + "step": 8460 + }, + { + "epoch": 0.1693766734229162, + "grad_norm": 2.090230941772461, + "learning_rate": 9.499202886556268e-06, + "loss": 0.8344, + "step": 8461 + }, + { + "epoch": 0.16939669194004453, + "grad_norm": 1.1829917430877686, + "learning_rate": 9.499061462343702e-06, + "loss": 0.3331, + "step": 8462 + }, + { + "epoch": 0.16941671045717288, + "grad_norm": 1.1260851621627808, + "learning_rate": 9.49892001921805e-06, + "loss": 0.3684, + "step": 8463 + }, + { + "epoch": 0.16943672897430123, + "grad_norm": 1.1590335369110107, + "learning_rate": 9.498778557179907e-06, + "loss": 0.3566, + "step": 8464 + }, + { + "epoch": 0.16945674749142958, + "grad_norm": 1.1773637533187866, + "learning_rate": 9.498637076229864e-06, + "loss": 0.3597, + "step": 8465 + }, + { + "epoch": 0.1694767660085579, + "grad_norm": 1.1010128259658813, + "learning_rate": 9.498495576368521e-06, + "loss": 0.3499, + "step": 8466 + }, + { + "epoch": 0.16949678452568626, + "grad_norm": 1.0821943283081055, + "learning_rate": 9.498354057596471e-06, + "loss": 0.3221, + "step": 8467 + }, + { + "epoch": 0.1695168030428146, + "grad_norm": 1.1301887035369873, + "learning_rate": 9.498212519914308e-06, + "loss": 0.3576, + "step": 8468 + }, + { + "epoch": 0.16953682155994296, + "grad_norm": 1.3224074840545654, + "learning_rate": 9.498070963322627e-06, + "loss": 0.3556, + "step": 8469 + }, + { + "epoch": 0.16955684007707128, + "grad_norm": 1.0811189413070679, + "learning_rate": 9.497929387822026e-06, + "loss": 0.3341, + "step": 8470 + }, + { + "epoch": 0.16957685859419963, + "grad_norm": 1.0517946481704712, + "learning_rate": 9.497787793413095e-06, + "loss": 0.3989, + "step": 8471 + }, + { + "epoch": 0.16959687711132798, + "grad_norm": 1.1239689588546753, + "learning_rate": 9.497646180096434e-06, + "loss": 0.3252, + "step": 8472 + }, + { + "epoch": 0.16961689562845633, + "grad_norm": 1.1030429601669312, + "learning_rate": 9.497504547872636e-06, + "loss": 0.2929, + "step": 8473 + }, + { + "epoch": 0.16963691414558466, + "grad_norm": 1.0378870964050293, + "learning_rate": 9.497362896742296e-06, + "loss": 0.3583, + "step": 8474 + }, + { + "epoch": 0.169656932662713, + "grad_norm": 1.143768072128296, + "learning_rate": 9.497221226706012e-06, + "loss": 0.3222, + "step": 8475 + }, + { + "epoch": 0.16967695117984136, + "grad_norm": 1.0924484729766846, + "learning_rate": 9.497079537764376e-06, + "loss": 0.3262, + "step": 8476 + }, + { + "epoch": 0.1696969696969697, + "grad_norm": 1.0326080322265625, + "learning_rate": 9.496937829917986e-06, + "loss": 0.3475, + "step": 8477 + }, + { + "epoch": 0.16971698821409803, + "grad_norm": 1.859553575515747, + "learning_rate": 9.496796103167437e-06, + "loss": 0.8979, + "step": 8478 + }, + { + "epoch": 0.16973700673122638, + "grad_norm": 1.0563092231750488, + "learning_rate": 9.496654357513324e-06, + "loss": 0.3555, + "step": 8479 + }, + { + "epoch": 0.16975702524835473, + "grad_norm": 1.147207260131836, + "learning_rate": 9.496512592956246e-06, + "loss": 0.3331, + "step": 8480 + }, + { + "epoch": 0.16977704376548308, + "grad_norm": 1.1391174793243408, + "learning_rate": 9.496370809496795e-06, + "loss": 0.2903, + "step": 8481 + }, + { + "epoch": 0.1697970622826114, + "grad_norm": 0.9999846816062927, + "learning_rate": 9.496229007135568e-06, + "loss": 0.3065, + "step": 8482 + }, + { + "epoch": 0.16981708079973976, + "grad_norm": 1.2723900079727173, + "learning_rate": 9.496087185873164e-06, + "loss": 0.3074, + "step": 8483 + }, + { + "epoch": 0.1698370993168681, + "grad_norm": 1.1103018522262573, + "learning_rate": 9.495945345710177e-06, + "loss": 0.311, + "step": 8484 + }, + { + "epoch": 0.16985711783399646, + "grad_norm": 1.1698020696640015, + "learning_rate": 9.495803486647201e-06, + "loss": 0.3826, + "step": 8485 + }, + { + "epoch": 0.16987713635112478, + "grad_norm": 1.1275358200073242, + "learning_rate": 9.495661608684836e-06, + "loss": 0.3082, + "step": 8486 + }, + { + "epoch": 0.16989715486825313, + "grad_norm": 1.3094754219055176, + "learning_rate": 9.495519711823678e-06, + "loss": 0.3312, + "step": 8487 + }, + { + "epoch": 0.16991717338538148, + "grad_norm": 1.241001009941101, + "learning_rate": 9.495377796064323e-06, + "loss": 0.3504, + "step": 8488 + }, + { + "epoch": 0.16993719190250983, + "grad_norm": 1.2081571817398071, + "learning_rate": 9.495235861407363e-06, + "loss": 0.3292, + "step": 8489 + }, + { + "epoch": 0.16995721041963816, + "grad_norm": 1.116631031036377, + "learning_rate": 9.495093907853403e-06, + "loss": 0.317, + "step": 8490 + }, + { + "epoch": 0.1699772289367665, + "grad_norm": 0.9705914258956909, + "learning_rate": 9.494951935403035e-06, + "loss": 0.3294, + "step": 8491 + }, + { + "epoch": 0.16999724745389486, + "grad_norm": 1.1221884489059448, + "learning_rate": 9.494809944056855e-06, + "loss": 0.3495, + "step": 8492 + }, + { + "epoch": 0.1700172659710232, + "grad_norm": 1.268308401107788, + "learning_rate": 9.494667933815463e-06, + "loss": 0.2967, + "step": 8493 + }, + { + "epoch": 0.17003728448815153, + "grad_norm": 1.1287181377410889, + "learning_rate": 9.494525904679452e-06, + "loss": 0.3499, + "step": 8494 + }, + { + "epoch": 0.17005730300527988, + "grad_norm": 1.2216113805770874, + "learning_rate": 9.494383856649423e-06, + "loss": 0.3166, + "step": 8495 + }, + { + "epoch": 0.17007732152240823, + "grad_norm": 1.114240050315857, + "learning_rate": 9.494241789725971e-06, + "loss": 0.3456, + "step": 8496 + }, + { + "epoch": 0.17009734003953658, + "grad_norm": 1.0594960451126099, + "learning_rate": 9.494099703909694e-06, + "loss": 0.2969, + "step": 8497 + }, + { + "epoch": 0.1701173585566649, + "grad_norm": 1.1639113426208496, + "learning_rate": 9.493957599201188e-06, + "loss": 0.3276, + "step": 8498 + }, + { + "epoch": 0.17013737707379326, + "grad_norm": 1.104840636253357, + "learning_rate": 9.493815475601051e-06, + "loss": 0.353, + "step": 8499 + }, + { + "epoch": 0.1701573955909216, + "grad_norm": 1.1949419975280762, + "learning_rate": 9.493673333109882e-06, + "loss": 0.3489, + "step": 8500 + }, + { + "epoch": 0.17017741410804996, + "grad_norm": 1.2416460514068604, + "learning_rate": 9.493531171728278e-06, + "loss": 0.3528, + "step": 8501 + }, + { + "epoch": 0.17019743262517828, + "grad_norm": 1.1789971590042114, + "learning_rate": 9.493388991456834e-06, + "loss": 0.2907, + "step": 8502 + }, + { + "epoch": 0.17021745114230663, + "grad_norm": 1.1376349925994873, + "learning_rate": 9.493246792296152e-06, + "loss": 0.3158, + "step": 8503 + }, + { + "epoch": 0.17023746965943498, + "grad_norm": 0.9670250415802002, + "learning_rate": 9.493104574246825e-06, + "loss": 0.3133, + "step": 8504 + }, + { + "epoch": 0.17025748817656333, + "grad_norm": 1.2839900255203247, + "learning_rate": 9.492962337309455e-06, + "loss": 0.3771, + "step": 8505 + }, + { + "epoch": 0.17027750669369165, + "grad_norm": 1.1266742944717407, + "learning_rate": 9.492820081484639e-06, + "loss": 0.3059, + "step": 8506 + }, + { + "epoch": 0.17029752521082, + "grad_norm": 1.060611605644226, + "learning_rate": 9.492677806772972e-06, + "loss": 0.3434, + "step": 8507 + }, + { + "epoch": 0.17031754372794836, + "grad_norm": 1.0410473346710205, + "learning_rate": 9.492535513175057e-06, + "loss": 0.2913, + "step": 8508 + }, + { + "epoch": 0.1703375622450767, + "grad_norm": 1.2167344093322754, + "learning_rate": 9.492393200691487e-06, + "loss": 0.3514, + "step": 8509 + }, + { + "epoch": 0.17035758076220503, + "grad_norm": 1.024926781654358, + "learning_rate": 9.492250869322864e-06, + "loss": 0.3434, + "step": 8510 + }, + { + "epoch": 0.17037759927933338, + "grad_norm": 1.0014008283615112, + "learning_rate": 9.492108519069783e-06, + "loss": 0.327, + "step": 8511 + }, + { + "epoch": 0.17039761779646173, + "grad_norm": 1.232720971107483, + "learning_rate": 9.491966149932848e-06, + "loss": 0.2573, + "step": 8512 + }, + { + "epoch": 0.17041763631359008, + "grad_norm": 1.0694611072540283, + "learning_rate": 9.49182376191265e-06, + "loss": 0.3338, + "step": 8513 + }, + { + "epoch": 0.1704376548307184, + "grad_norm": 1.1261770725250244, + "learning_rate": 9.491681355009795e-06, + "loss": 0.3668, + "step": 8514 + }, + { + "epoch": 0.17045767334784676, + "grad_norm": 1.091599464416504, + "learning_rate": 9.491538929224877e-06, + "loss": 0.3008, + "step": 8515 + }, + { + "epoch": 0.1704776918649751, + "grad_norm": 1.0477298498153687, + "learning_rate": 9.491396484558495e-06, + "loss": 0.2985, + "step": 8516 + }, + { + "epoch": 0.17049771038210346, + "grad_norm": 1.0755692720413208, + "learning_rate": 9.491254021011251e-06, + "loss": 0.2916, + "step": 8517 + }, + { + "epoch": 0.17051772889923178, + "grad_norm": 1.1569340229034424, + "learning_rate": 9.49111153858374e-06, + "loss": 0.3293, + "step": 8518 + }, + { + "epoch": 0.17053774741636013, + "grad_norm": 1.2732672691345215, + "learning_rate": 9.490969037276563e-06, + "loss": 0.3543, + "step": 8519 + }, + { + "epoch": 0.17055776593348848, + "grad_norm": 1.2371712923049927, + "learning_rate": 9.490826517090318e-06, + "loss": 0.3415, + "step": 8520 + }, + { + "epoch": 0.17057778445061683, + "grad_norm": 1.1212031841278076, + "learning_rate": 9.490683978025606e-06, + "loss": 0.3267, + "step": 8521 + }, + { + "epoch": 0.17059780296774515, + "grad_norm": 1.9268611669540405, + "learning_rate": 9.490541420083024e-06, + "loss": 0.8381, + "step": 8522 + }, + { + "epoch": 0.1706178214848735, + "grad_norm": 1.2358540296554565, + "learning_rate": 9.49039884326317e-06, + "loss": 0.3301, + "step": 8523 + }, + { + "epoch": 0.17063784000200186, + "grad_norm": 1.2166916131973267, + "learning_rate": 9.490256247566649e-06, + "loss": 0.3657, + "step": 8524 + }, + { + "epoch": 0.1706578585191302, + "grad_norm": 1.2729090452194214, + "learning_rate": 9.490113632994059e-06, + "loss": 0.3435, + "step": 8525 + }, + { + "epoch": 0.17067787703625853, + "grad_norm": 1.2300058603286743, + "learning_rate": 9.489970999545992e-06, + "loss": 0.3719, + "step": 8526 + }, + { + "epoch": 0.17069789555338688, + "grad_norm": 1.180575966835022, + "learning_rate": 9.489828347223057e-06, + "loss": 0.3325, + "step": 8527 + }, + { + "epoch": 0.17071791407051523, + "grad_norm": 1.1474217176437378, + "learning_rate": 9.489685676025849e-06, + "loss": 0.3559, + "step": 8528 + }, + { + "epoch": 0.17073793258764358, + "grad_norm": 1.9264042377471924, + "learning_rate": 9.489542985954968e-06, + "loss": 0.8727, + "step": 8529 + }, + { + "epoch": 0.1707579511047719, + "grad_norm": 1.0017138719558716, + "learning_rate": 9.489400277011013e-06, + "loss": 0.3069, + "step": 8530 + }, + { + "epoch": 0.17077796962190026, + "grad_norm": 1.1569925546646118, + "learning_rate": 9.489257549194589e-06, + "loss": 0.3169, + "step": 8531 + }, + { + "epoch": 0.1707979881390286, + "grad_norm": 1.1442879438400269, + "learning_rate": 9.489114802506289e-06, + "loss": 0.3169, + "step": 8532 + }, + { + "epoch": 0.17081800665615696, + "grad_norm": 1.2933369874954224, + "learning_rate": 9.488972036946718e-06, + "loss": 0.3489, + "step": 8533 + }, + { + "epoch": 0.17083802517328528, + "grad_norm": 1.9579424858093262, + "learning_rate": 9.488829252516475e-06, + "loss": 0.8489, + "step": 8534 + }, + { + "epoch": 0.17085804369041363, + "grad_norm": 1.941367506980896, + "learning_rate": 9.488686449216161e-06, + "loss": 0.8508, + "step": 8535 + }, + { + "epoch": 0.17087806220754198, + "grad_norm": 1.1454668045043945, + "learning_rate": 9.488543627046373e-06, + "loss": 0.2908, + "step": 8536 + }, + { + "epoch": 0.17089808072467033, + "grad_norm": 1.1076428890228271, + "learning_rate": 9.488400786007714e-06, + "loss": 0.3628, + "step": 8537 + }, + { + "epoch": 0.17091809924179865, + "grad_norm": 1.1270560026168823, + "learning_rate": 9.488257926100784e-06, + "loss": 0.3791, + "step": 8538 + }, + { + "epoch": 0.170938117758927, + "grad_norm": 1.0581121444702148, + "learning_rate": 9.488115047326183e-06, + "loss": 0.3832, + "step": 8539 + }, + { + "epoch": 0.17095813627605536, + "grad_norm": 1.0078046321868896, + "learning_rate": 9.487972149684513e-06, + "loss": 0.2769, + "step": 8540 + }, + { + "epoch": 0.1709781547931837, + "grad_norm": 1.0680932998657227, + "learning_rate": 9.487829233176373e-06, + "loss": 0.3254, + "step": 8541 + }, + { + "epoch": 0.17099817331031203, + "grad_norm": 1.1213198900222778, + "learning_rate": 9.487686297802365e-06, + "loss": 0.3495, + "step": 8542 + }, + { + "epoch": 0.17101819182744038, + "grad_norm": 1.0000331401824951, + "learning_rate": 9.48754334356309e-06, + "loss": 0.3013, + "step": 8543 + }, + { + "epoch": 0.17103821034456873, + "grad_norm": 1.1996629238128662, + "learning_rate": 9.48740037045915e-06, + "loss": 0.3351, + "step": 8544 + }, + { + "epoch": 0.17105822886169708, + "grad_norm": 0.9359925389289856, + "learning_rate": 9.487257378491142e-06, + "loss": 0.31, + "step": 8545 + }, + { + "epoch": 0.1710782473788254, + "grad_norm": 1.0431572198867798, + "learning_rate": 9.487114367659671e-06, + "loss": 0.3154, + "step": 8546 + }, + { + "epoch": 0.17109826589595376, + "grad_norm": 1.0598433017730713, + "learning_rate": 9.486971337965336e-06, + "loss": 0.3239, + "step": 8547 + }, + { + "epoch": 0.1711182844130821, + "grad_norm": 1.0565835237503052, + "learning_rate": 9.48682828940874e-06, + "loss": 0.3453, + "step": 8548 + }, + { + "epoch": 0.17113830293021046, + "grad_norm": 1.0854580402374268, + "learning_rate": 9.486685221990484e-06, + "loss": 0.3477, + "step": 8549 + }, + { + "epoch": 0.17115832144733878, + "grad_norm": 1.1923571825027466, + "learning_rate": 9.486542135711168e-06, + "loss": 0.3691, + "step": 8550 + }, + { + "epoch": 0.17117833996446713, + "grad_norm": 1.067612648010254, + "learning_rate": 9.486399030571394e-06, + "loss": 0.3364, + "step": 8551 + }, + { + "epoch": 0.17119835848159548, + "grad_norm": 1.0291002988815308, + "learning_rate": 9.486255906571763e-06, + "loss": 0.2947, + "step": 8552 + }, + { + "epoch": 0.17121837699872383, + "grad_norm": 1.087537407875061, + "learning_rate": 9.48611276371288e-06, + "loss": 0.3426, + "step": 8553 + }, + { + "epoch": 0.17123839551585215, + "grad_norm": 1.128929853439331, + "learning_rate": 9.485969601995342e-06, + "loss": 0.342, + "step": 8554 + }, + { + "epoch": 0.1712584140329805, + "grad_norm": 1.075587272644043, + "learning_rate": 9.485826421419755e-06, + "loss": 0.3711, + "step": 8555 + }, + { + "epoch": 0.17127843255010886, + "grad_norm": 1.0051624774932861, + "learning_rate": 9.485683221986719e-06, + "loss": 0.3041, + "step": 8556 + }, + { + "epoch": 0.1712984510672372, + "grad_norm": 1.0575687885284424, + "learning_rate": 9.485540003696837e-06, + "loss": 0.3103, + "step": 8557 + }, + { + "epoch": 0.17131846958436553, + "grad_norm": 1.2224934101104736, + "learning_rate": 9.485396766550707e-06, + "loss": 0.3432, + "step": 8558 + }, + { + "epoch": 0.17133848810149388, + "grad_norm": 1.2439826726913452, + "learning_rate": 9.485253510548935e-06, + "loss": 0.329, + "step": 8559 + }, + { + "epoch": 0.17135850661862223, + "grad_norm": 1.0045560598373413, + "learning_rate": 9.485110235692124e-06, + "loss": 0.3106, + "step": 8560 + }, + { + "epoch": 0.17137852513575058, + "grad_norm": 1.8858126401901245, + "learning_rate": 9.484966941980874e-06, + "loss": 0.8127, + "step": 8561 + }, + { + "epoch": 0.1713985436528789, + "grad_norm": 1.1730777025222778, + "learning_rate": 9.48482362941579e-06, + "loss": 0.3905, + "step": 8562 + }, + { + "epoch": 0.17141856217000725, + "grad_norm": 1.2433221340179443, + "learning_rate": 9.48468029799747e-06, + "loss": 0.3322, + "step": 8563 + }, + { + "epoch": 0.1714385806871356, + "grad_norm": 1.0685582160949707, + "learning_rate": 9.484536947726519e-06, + "loss": 0.2986, + "step": 8564 + }, + { + "epoch": 0.17145859920426396, + "grad_norm": 1.107431411743164, + "learning_rate": 9.484393578603542e-06, + "loss": 0.3275, + "step": 8565 + }, + { + "epoch": 0.17147861772139228, + "grad_norm": 1.2755037546157837, + "learning_rate": 9.484250190629139e-06, + "loss": 0.3751, + "step": 8566 + }, + { + "epoch": 0.17149863623852063, + "grad_norm": 1.1788337230682373, + "learning_rate": 9.48410678380391e-06, + "loss": 0.2982, + "step": 8567 + }, + { + "epoch": 0.17151865475564898, + "grad_norm": 1.2037296295166016, + "learning_rate": 9.483963358128466e-06, + "loss": 0.3385, + "step": 8568 + }, + { + "epoch": 0.17153867327277733, + "grad_norm": 1.4071272611618042, + "learning_rate": 9.483819913603402e-06, + "loss": 0.3267, + "step": 8569 + }, + { + "epoch": 0.17155869178990565, + "grad_norm": 1.2678663730621338, + "learning_rate": 9.483676450229324e-06, + "loss": 0.3626, + "step": 8570 + }, + { + "epoch": 0.171578710307034, + "grad_norm": 1.0917319059371948, + "learning_rate": 9.483532968006837e-06, + "loss": 0.3222, + "step": 8571 + }, + { + "epoch": 0.17159872882416236, + "grad_norm": 1.2173523902893066, + "learning_rate": 9.483389466936541e-06, + "loss": 0.3583, + "step": 8572 + }, + { + "epoch": 0.1716187473412907, + "grad_norm": 1.075987696647644, + "learning_rate": 9.48324594701904e-06, + "loss": 0.3514, + "step": 8573 + }, + { + "epoch": 0.17163876585841903, + "grad_norm": 1.1142867803573608, + "learning_rate": 9.483102408254938e-06, + "loss": 0.2953, + "step": 8574 + }, + { + "epoch": 0.17165878437554738, + "grad_norm": 2.0467686653137207, + "learning_rate": 9.482958850644839e-06, + "loss": 0.8294, + "step": 8575 + }, + { + "epoch": 0.17167880289267573, + "grad_norm": 1.1968437433242798, + "learning_rate": 9.482815274189345e-06, + "loss": 0.3117, + "step": 8576 + }, + { + "epoch": 0.17169882140980408, + "grad_norm": 1.0394283533096313, + "learning_rate": 9.482671678889061e-06, + "loss": 0.2998, + "step": 8577 + }, + { + "epoch": 0.1717188399269324, + "grad_norm": 1.1380592584609985, + "learning_rate": 9.482528064744589e-06, + "loss": 0.2911, + "step": 8578 + }, + { + "epoch": 0.17173885844406075, + "grad_norm": 1.1885831356048584, + "learning_rate": 9.482384431756534e-06, + "loss": 0.3764, + "step": 8579 + }, + { + "epoch": 0.1717588769611891, + "grad_norm": 1.1483350992202759, + "learning_rate": 9.4822407799255e-06, + "loss": 0.3501, + "step": 8580 + }, + { + "epoch": 0.17177889547831746, + "grad_norm": 1.1690093278884888, + "learning_rate": 9.48209710925209e-06, + "loss": 0.2996, + "step": 8581 + }, + { + "epoch": 0.17179891399544578, + "grad_norm": 1.083495020866394, + "learning_rate": 9.481953419736909e-06, + "loss": 0.3284, + "step": 8582 + }, + { + "epoch": 0.17181893251257413, + "grad_norm": 1.0722639560699463, + "learning_rate": 9.481809711380559e-06, + "loss": 0.3574, + "step": 8583 + }, + { + "epoch": 0.17183895102970248, + "grad_norm": 1.1441268920898438, + "learning_rate": 9.481665984183645e-06, + "loss": 0.3377, + "step": 8584 + }, + { + "epoch": 0.17185896954683083, + "grad_norm": 1.0378152132034302, + "learning_rate": 9.481522238146775e-06, + "loss": 0.3045, + "step": 8585 + }, + { + "epoch": 0.17187898806395915, + "grad_norm": 1.1105862855911255, + "learning_rate": 9.481378473270547e-06, + "loss": 0.3413, + "step": 8586 + }, + { + "epoch": 0.1718990065810875, + "grad_norm": 1.1849017143249512, + "learning_rate": 9.48123468955557e-06, + "loss": 0.3262, + "step": 8587 + }, + { + "epoch": 0.17191902509821586, + "grad_norm": 1.1411731243133545, + "learning_rate": 9.481090887002444e-06, + "loss": 0.3087, + "step": 8588 + }, + { + "epoch": 0.1719390436153442, + "grad_norm": 1.2417714595794678, + "learning_rate": 9.480947065611778e-06, + "loss": 0.313, + "step": 8589 + }, + { + "epoch": 0.17195906213247253, + "grad_norm": 1.1142499446868896, + "learning_rate": 9.480803225384177e-06, + "loss": 0.348, + "step": 8590 + }, + { + "epoch": 0.17197908064960088, + "grad_norm": 1.048216462135315, + "learning_rate": 9.48065936632024e-06, + "loss": 0.2585, + "step": 8591 + }, + { + "epoch": 0.17199909916672923, + "grad_norm": 1.0962004661560059, + "learning_rate": 9.480515488420578e-06, + "loss": 0.3409, + "step": 8592 + }, + { + "epoch": 0.17201911768385758, + "grad_norm": 1.9864606857299805, + "learning_rate": 9.48037159168579e-06, + "loss": 0.8408, + "step": 8593 + }, + { + "epoch": 0.1720391362009859, + "grad_norm": 1.2193892002105713, + "learning_rate": 9.480227676116488e-06, + "loss": 0.3183, + "step": 8594 + }, + { + "epoch": 0.17205915471811425, + "grad_norm": 1.215061902999878, + "learning_rate": 9.48008374171327e-06, + "loss": 0.3321, + "step": 8595 + }, + { + "epoch": 0.1720791732352426, + "grad_norm": 1.1426069736480713, + "learning_rate": 9.479939788476744e-06, + "loss": 0.3688, + "step": 8596 + }, + { + "epoch": 0.17209919175237096, + "grad_norm": 1.0740348100662231, + "learning_rate": 9.479795816407517e-06, + "loss": 0.3215, + "step": 8597 + }, + { + "epoch": 0.17211921026949928, + "grad_norm": 1.1778957843780518, + "learning_rate": 9.47965182550619e-06, + "loss": 0.2689, + "step": 8598 + }, + { + "epoch": 0.17213922878662763, + "grad_norm": 1.026254653930664, + "learning_rate": 9.479507815773375e-06, + "loss": 0.3345, + "step": 8599 + }, + { + "epoch": 0.17215924730375598, + "grad_norm": 1.092879295349121, + "learning_rate": 9.47936378720967e-06, + "loss": 0.2963, + "step": 8600 + }, + { + "epoch": 0.17217926582088433, + "grad_norm": 1.110101342201233, + "learning_rate": 9.479219739815684e-06, + "loss": 0.3332, + "step": 8601 + }, + { + "epoch": 0.17219928433801265, + "grad_norm": 0.9884871244430542, + "learning_rate": 9.479075673592022e-06, + "loss": 0.317, + "step": 8602 + }, + { + "epoch": 0.172219302855141, + "grad_norm": 1.115868330001831, + "learning_rate": 9.47893158853929e-06, + "loss": 0.3276, + "step": 8603 + }, + { + "epoch": 0.17223932137226936, + "grad_norm": 1.706907868385315, + "learning_rate": 9.478787484658094e-06, + "loss": 0.8789, + "step": 8604 + }, + { + "epoch": 0.1722593398893977, + "grad_norm": 1.8541735410690308, + "learning_rate": 9.478643361949039e-06, + "loss": 0.88, + "step": 8605 + }, + { + "epoch": 0.17227935840652603, + "grad_norm": 1.1026548147201538, + "learning_rate": 9.47849922041273e-06, + "loss": 0.3246, + "step": 8606 + }, + { + "epoch": 0.17229937692365438, + "grad_norm": 1.0840214490890503, + "learning_rate": 9.478355060049775e-06, + "loss": 0.3551, + "step": 8607 + }, + { + "epoch": 0.17231939544078273, + "grad_norm": 1.9329473972320557, + "learning_rate": 9.47821088086078e-06, + "loss": 0.9166, + "step": 8608 + }, + { + "epoch": 0.17233941395791108, + "grad_norm": 1.0439398288726807, + "learning_rate": 9.47806668284635e-06, + "loss": 0.3509, + "step": 8609 + }, + { + "epoch": 0.1723594324750394, + "grad_norm": 1.1543785333633423, + "learning_rate": 9.477922466007088e-06, + "loss": 0.2929, + "step": 8610 + }, + { + "epoch": 0.17237945099216775, + "grad_norm": 1.1412564516067505, + "learning_rate": 9.477778230343608e-06, + "loss": 0.3276, + "step": 8611 + }, + { + "epoch": 0.1723994695092961, + "grad_norm": 0.9818516969680786, + "learning_rate": 9.47763397585651e-06, + "loss": 0.308, + "step": 8612 + }, + { + "epoch": 0.17241948802642446, + "grad_norm": 1.2231367826461792, + "learning_rate": 9.477489702546401e-06, + "loss": 0.3609, + "step": 8613 + }, + { + "epoch": 0.17243950654355278, + "grad_norm": 1.0835483074188232, + "learning_rate": 9.477345410413892e-06, + "loss": 0.328, + "step": 8614 + }, + { + "epoch": 0.17245952506068113, + "grad_norm": 1.7396308183670044, + "learning_rate": 9.477201099459583e-06, + "loss": 0.8653, + "step": 8615 + }, + { + "epoch": 0.17247954357780948, + "grad_norm": 1.123392105102539, + "learning_rate": 9.477056769684086e-06, + "loss": 0.3387, + "step": 8616 + }, + { + "epoch": 0.17249956209493783, + "grad_norm": 1.0646190643310547, + "learning_rate": 9.476912421088007e-06, + "loss": 0.3567, + "step": 8617 + }, + { + "epoch": 0.17251958061206615, + "grad_norm": 1.111498236656189, + "learning_rate": 9.47676805367195e-06, + "loss": 0.3289, + "step": 8618 + }, + { + "epoch": 0.1725395991291945, + "grad_norm": 1.717483639717102, + "learning_rate": 9.476623667436524e-06, + "loss": 0.8349, + "step": 8619 + }, + { + "epoch": 0.17255961764632285, + "grad_norm": 1.1687648296356201, + "learning_rate": 9.476479262382334e-06, + "loss": 0.3336, + "step": 8620 + }, + { + "epoch": 0.1725796361634512, + "grad_norm": 1.2103664875030518, + "learning_rate": 9.47633483850999e-06, + "loss": 0.326, + "step": 8621 + }, + { + "epoch": 0.17259965468057953, + "grad_norm": 1.9054211378097534, + "learning_rate": 9.476190395820099e-06, + "loss": 0.8303, + "step": 8622 + }, + { + "epoch": 0.17261967319770788, + "grad_norm": 1.048837661743164, + "learning_rate": 9.476045934313265e-06, + "loss": 0.3193, + "step": 8623 + }, + { + "epoch": 0.17263969171483623, + "grad_norm": 1.1803865432739258, + "learning_rate": 9.475901453990098e-06, + "loss": 0.3078, + "step": 8624 + }, + { + "epoch": 0.17265971023196458, + "grad_norm": 1.1566344499588013, + "learning_rate": 9.475756954851203e-06, + "loss": 0.3485, + "step": 8625 + }, + { + "epoch": 0.1726797287490929, + "grad_norm": 1.0638387203216553, + "learning_rate": 9.475612436897191e-06, + "loss": 0.3542, + "step": 8626 + }, + { + "epoch": 0.17269974726622125, + "grad_norm": 1.0541167259216309, + "learning_rate": 9.475467900128666e-06, + "loss": 0.3262, + "step": 8627 + }, + { + "epoch": 0.1727197657833496, + "grad_norm": 1.2429373264312744, + "learning_rate": 9.475323344546237e-06, + "loss": 0.3048, + "step": 8628 + }, + { + "epoch": 0.17273978430047796, + "grad_norm": 1.161981463432312, + "learning_rate": 9.475178770150513e-06, + "loss": 0.2688, + "step": 8629 + }, + { + "epoch": 0.17275980281760628, + "grad_norm": 1.0854871273040771, + "learning_rate": 9.475034176942099e-06, + "loss": 0.3145, + "step": 8630 + }, + { + "epoch": 0.17277982133473463, + "grad_norm": 1.1006335020065308, + "learning_rate": 9.474889564921605e-06, + "loss": 0.3601, + "step": 8631 + }, + { + "epoch": 0.17279983985186298, + "grad_norm": 1.1125062704086304, + "learning_rate": 9.47474493408964e-06, + "loss": 0.3433, + "step": 8632 + }, + { + "epoch": 0.17281985836899133, + "grad_norm": 1.156718134880066, + "learning_rate": 9.474600284446807e-06, + "loss": 0.3516, + "step": 8633 + }, + { + "epoch": 0.17283987688611965, + "grad_norm": 1.1728975772857666, + "learning_rate": 9.474455615993719e-06, + "loss": 0.2825, + "step": 8634 + }, + { + "epoch": 0.172859895403248, + "grad_norm": 1.1345341205596924, + "learning_rate": 9.474310928730983e-06, + "loss": 0.3011, + "step": 8635 + }, + { + "epoch": 0.17287991392037635, + "grad_norm": 1.8677380084991455, + "learning_rate": 9.474166222659207e-06, + "loss": 0.8134, + "step": 8636 + }, + { + "epoch": 0.1728999324375047, + "grad_norm": 1.2658694982528687, + "learning_rate": 9.474021497778998e-06, + "loss": 0.2986, + "step": 8637 + }, + { + "epoch": 0.17291995095463303, + "grad_norm": 1.1842602491378784, + "learning_rate": 9.473876754090966e-06, + "loss": 0.3427, + "step": 8638 + }, + { + "epoch": 0.17293996947176138, + "grad_norm": 1.1019059419631958, + "learning_rate": 9.473731991595718e-06, + "loss": 0.334, + "step": 8639 + }, + { + "epoch": 0.17295998798888973, + "grad_norm": 1.817600131034851, + "learning_rate": 9.473587210293864e-06, + "loss": 0.8143, + "step": 8640 + }, + { + "epoch": 0.17298000650601808, + "grad_norm": 1.0779578685760498, + "learning_rate": 9.473442410186013e-06, + "loss": 0.3345, + "step": 8641 + }, + { + "epoch": 0.1730000250231464, + "grad_norm": 1.2022477388381958, + "learning_rate": 9.473297591272772e-06, + "loss": 0.3548, + "step": 8642 + }, + { + "epoch": 0.17302004354027475, + "grad_norm": 1.0516893863677979, + "learning_rate": 9.47315275355475e-06, + "loss": 0.2553, + "step": 8643 + }, + { + "epoch": 0.1730400620574031, + "grad_norm": 1.062384843826294, + "learning_rate": 9.473007897032558e-06, + "loss": 0.3182, + "step": 8644 + }, + { + "epoch": 0.17306008057453146, + "grad_norm": 1.132143259048462, + "learning_rate": 9.472863021706802e-06, + "loss": 0.3325, + "step": 8645 + }, + { + "epoch": 0.17308009909165978, + "grad_norm": 1.1146316528320312, + "learning_rate": 9.472718127578093e-06, + "loss": 0.387, + "step": 8646 + }, + { + "epoch": 0.17310011760878813, + "grad_norm": 1.0583466291427612, + "learning_rate": 9.47257321464704e-06, + "loss": 0.2974, + "step": 8647 + }, + { + "epoch": 0.17312013612591648, + "grad_norm": 1.1127753257751465, + "learning_rate": 9.472428282914252e-06, + "loss": 0.3262, + "step": 8648 + }, + { + "epoch": 0.17314015464304483, + "grad_norm": 1.062788486480713, + "learning_rate": 9.472283332380337e-06, + "loss": 0.3566, + "step": 8649 + }, + { + "epoch": 0.17316017316017315, + "grad_norm": 1.1663917303085327, + "learning_rate": 9.472138363045905e-06, + "loss": 0.2839, + "step": 8650 + }, + { + "epoch": 0.1731801916773015, + "grad_norm": 1.194652795791626, + "learning_rate": 9.471993374911566e-06, + "loss": 0.2886, + "step": 8651 + }, + { + "epoch": 0.17320021019442985, + "grad_norm": 1.0657219886779785, + "learning_rate": 9.471848367977929e-06, + "loss": 0.3509, + "step": 8652 + }, + { + "epoch": 0.1732202287115582, + "grad_norm": 1.7429536581039429, + "learning_rate": 9.471703342245605e-06, + "loss": 0.8511, + "step": 8653 + }, + { + "epoch": 0.17324024722868653, + "grad_norm": 1.243834137916565, + "learning_rate": 9.471558297715202e-06, + "loss": 0.3137, + "step": 8654 + }, + { + "epoch": 0.17326026574581488, + "grad_norm": 1.1751083135604858, + "learning_rate": 9.47141323438733e-06, + "loss": 0.2996, + "step": 8655 + }, + { + "epoch": 0.17328028426294323, + "grad_norm": 2.000994920730591, + "learning_rate": 9.471268152262598e-06, + "loss": 0.8313, + "step": 8656 + }, + { + "epoch": 0.17330030278007158, + "grad_norm": 1.0437722206115723, + "learning_rate": 9.471123051341617e-06, + "loss": 0.3036, + "step": 8657 + }, + { + "epoch": 0.1733203212971999, + "grad_norm": 1.0447582006454468, + "learning_rate": 9.470977931624998e-06, + "loss": 0.2989, + "step": 8658 + }, + { + "epoch": 0.17334033981432825, + "grad_norm": 1.0367087125778198, + "learning_rate": 9.47083279311335e-06, + "loss": 0.3165, + "step": 8659 + }, + { + "epoch": 0.1733603583314566, + "grad_norm": 1.2705910205841064, + "learning_rate": 9.470687635807282e-06, + "loss": 0.3335, + "step": 8660 + }, + { + "epoch": 0.17338037684858493, + "grad_norm": 1.9793862104415894, + "learning_rate": 9.470542459707408e-06, + "loss": 0.8085, + "step": 8661 + }, + { + "epoch": 0.17340039536571328, + "grad_norm": 1.1757299900054932, + "learning_rate": 9.470397264814333e-06, + "loss": 0.3075, + "step": 8662 + }, + { + "epoch": 0.17342041388284163, + "grad_norm": 1.1792525053024292, + "learning_rate": 9.47025205112867e-06, + "loss": 0.3332, + "step": 8663 + }, + { + "epoch": 0.17344043239996998, + "grad_norm": 1.1161797046661377, + "learning_rate": 9.47010681865103e-06, + "loss": 0.3176, + "step": 8664 + }, + { + "epoch": 0.1734604509170983, + "grad_norm": 1.1528358459472656, + "learning_rate": 9.469961567382024e-06, + "loss": 0.3302, + "step": 8665 + }, + { + "epoch": 0.17348046943422665, + "grad_norm": 1.1746151447296143, + "learning_rate": 9.46981629732226e-06, + "loss": 0.3467, + "step": 8666 + }, + { + "epoch": 0.173500487951355, + "grad_norm": 1.0567115545272827, + "learning_rate": 9.469671008472351e-06, + "loss": 0.306, + "step": 8667 + }, + { + "epoch": 0.17352050646848335, + "grad_norm": 1.036629319190979, + "learning_rate": 9.469525700832907e-06, + "loss": 0.338, + "step": 8668 + }, + { + "epoch": 0.17354052498561168, + "grad_norm": 1.0960789918899536, + "learning_rate": 9.469380374404538e-06, + "loss": 0.3593, + "step": 8669 + }, + { + "epoch": 0.17356054350274003, + "grad_norm": 1.0866221189498901, + "learning_rate": 9.469235029187856e-06, + "loss": 0.3303, + "step": 8670 + }, + { + "epoch": 0.17358056201986838, + "grad_norm": 1.1346549987792969, + "learning_rate": 9.469089665183471e-06, + "loss": 0.2884, + "step": 8671 + }, + { + "epoch": 0.17360058053699673, + "grad_norm": 1.0710006952285767, + "learning_rate": 9.468944282391996e-06, + "loss": 0.3084, + "step": 8672 + }, + { + "epoch": 0.17362059905412505, + "grad_norm": 1.040248990058899, + "learning_rate": 9.46879888081404e-06, + "loss": 0.3179, + "step": 8673 + }, + { + "epoch": 0.1736406175712534, + "grad_norm": 1.1208994388580322, + "learning_rate": 9.468653460450216e-06, + "loss": 0.3219, + "step": 8674 + }, + { + "epoch": 0.17366063608838175, + "grad_norm": 1.1989964246749878, + "learning_rate": 9.468508021301135e-06, + "loss": 0.356, + "step": 8675 + }, + { + "epoch": 0.1736806546055101, + "grad_norm": 1.0330291986465454, + "learning_rate": 9.468362563367407e-06, + "loss": 0.3393, + "step": 8676 + }, + { + "epoch": 0.17370067312263843, + "grad_norm": 1.1140763759613037, + "learning_rate": 9.468217086649644e-06, + "loss": 0.3837, + "step": 8677 + }, + { + "epoch": 0.17372069163976678, + "grad_norm": 1.7715492248535156, + "learning_rate": 9.46807159114846e-06, + "loss": 0.8747, + "step": 8678 + }, + { + "epoch": 0.17374071015689513, + "grad_norm": 1.0684149265289307, + "learning_rate": 9.467926076864461e-06, + "loss": 0.2858, + "step": 8679 + }, + { + "epoch": 0.17376072867402348, + "grad_norm": 1.2018427848815918, + "learning_rate": 9.467780543798265e-06, + "loss": 0.3091, + "step": 8680 + }, + { + "epoch": 0.1737807471911518, + "grad_norm": 1.1241744756698608, + "learning_rate": 9.467634991950481e-06, + "loss": 0.3086, + "step": 8681 + }, + { + "epoch": 0.17380076570828015, + "grad_norm": 1.2945607900619507, + "learning_rate": 9.46748942132172e-06, + "loss": 0.3064, + "step": 8682 + }, + { + "epoch": 0.1738207842254085, + "grad_norm": 1.0696152448654175, + "learning_rate": 9.467343831912597e-06, + "loss": 0.3434, + "step": 8683 + }, + { + "epoch": 0.17384080274253685, + "grad_norm": 1.5526598691940308, + "learning_rate": 9.46719822372372e-06, + "loss": 0.3432, + "step": 8684 + }, + { + "epoch": 0.17386082125966518, + "grad_norm": 0.9679871201515198, + "learning_rate": 9.467052596755702e-06, + "loss": 0.2783, + "step": 8685 + }, + { + "epoch": 0.17388083977679353, + "grad_norm": 1.049141764640808, + "learning_rate": 9.466906951009158e-06, + "loss": 0.2832, + "step": 8686 + }, + { + "epoch": 0.17390085829392188, + "grad_norm": 1.074548602104187, + "learning_rate": 9.466761286484699e-06, + "loss": 0.3637, + "step": 8687 + }, + { + "epoch": 0.17392087681105023, + "grad_norm": 1.1151925325393677, + "learning_rate": 9.466615603182935e-06, + "loss": 0.3179, + "step": 8688 + }, + { + "epoch": 0.17394089532817855, + "grad_norm": 1.0905219316482544, + "learning_rate": 9.46646990110448e-06, + "loss": 0.2997, + "step": 8689 + }, + { + "epoch": 0.1739609138453069, + "grad_norm": 1.1518796682357788, + "learning_rate": 9.466324180249948e-06, + "loss": 0.3173, + "step": 8690 + }, + { + "epoch": 0.17398093236243525, + "grad_norm": 1.215408444404602, + "learning_rate": 9.46617844061995e-06, + "loss": 0.3877, + "step": 8691 + }, + { + "epoch": 0.1740009508795636, + "grad_norm": 1.0571671724319458, + "learning_rate": 9.4660326822151e-06, + "loss": 0.3091, + "step": 8692 + }, + { + "epoch": 0.17402096939669193, + "grad_norm": 1.1145819425582886, + "learning_rate": 9.465886905036009e-06, + "loss": 0.305, + "step": 8693 + }, + { + "epoch": 0.17404098791382028, + "grad_norm": 1.0754234790802002, + "learning_rate": 9.465741109083289e-06, + "loss": 0.3175, + "step": 8694 + }, + { + "epoch": 0.17406100643094863, + "grad_norm": 1.045850396156311, + "learning_rate": 9.465595294357555e-06, + "loss": 0.2874, + "step": 8695 + }, + { + "epoch": 0.17408102494807698, + "grad_norm": 1.116585373878479, + "learning_rate": 9.46544946085942e-06, + "loss": 0.3283, + "step": 8696 + }, + { + "epoch": 0.1741010434652053, + "grad_norm": 1.4471484422683716, + "learning_rate": 9.465303608589497e-06, + "loss": 0.3473, + "step": 8697 + }, + { + "epoch": 0.17412106198233365, + "grad_norm": 1.147851824760437, + "learning_rate": 9.465157737548399e-06, + "loss": 0.3011, + "step": 8698 + }, + { + "epoch": 0.174141080499462, + "grad_norm": 1.0562357902526855, + "learning_rate": 9.465011847736737e-06, + "loss": 0.3145, + "step": 8699 + }, + { + "epoch": 0.17416109901659035, + "grad_norm": 1.0093882083892822, + "learning_rate": 9.464865939155126e-06, + "loss": 0.3182, + "step": 8700 + }, + { + "epoch": 0.17418111753371868, + "grad_norm": 1.149389624595642, + "learning_rate": 9.46472001180418e-06, + "loss": 0.3401, + "step": 8701 + }, + { + "epoch": 0.17420113605084703, + "grad_norm": 1.9225562810897827, + "learning_rate": 9.464574065684513e-06, + "loss": 0.8552, + "step": 8702 + }, + { + "epoch": 0.17422115456797538, + "grad_norm": 1.077206015586853, + "learning_rate": 9.464428100796736e-06, + "loss": 0.4073, + "step": 8703 + }, + { + "epoch": 0.17424117308510373, + "grad_norm": 1.1083430051803589, + "learning_rate": 9.464282117141466e-06, + "loss": 0.3111, + "step": 8704 + }, + { + "epoch": 0.17426119160223205, + "grad_norm": 1.2517534494400024, + "learning_rate": 9.464136114719313e-06, + "loss": 0.3369, + "step": 8705 + }, + { + "epoch": 0.1742812101193604, + "grad_norm": 1.800757646560669, + "learning_rate": 9.463990093530893e-06, + "loss": 0.7755, + "step": 8706 + }, + { + "epoch": 0.17430122863648875, + "grad_norm": 1.200947880744934, + "learning_rate": 9.463844053576819e-06, + "loss": 0.3373, + "step": 8707 + }, + { + "epoch": 0.1743212471536171, + "grad_norm": 1.0407016277313232, + "learning_rate": 9.463697994857705e-06, + "loss": 0.3272, + "step": 8708 + }, + { + "epoch": 0.17434126567074543, + "grad_norm": 1.0426939725875854, + "learning_rate": 9.463551917374164e-06, + "loss": 0.339, + "step": 8709 + }, + { + "epoch": 0.17436128418787378, + "grad_norm": 1.1784512996673584, + "learning_rate": 9.463405821126814e-06, + "loss": 0.2973, + "step": 8710 + }, + { + "epoch": 0.17438130270500213, + "grad_norm": 1.290295958518982, + "learning_rate": 9.463259706116266e-06, + "loss": 0.3409, + "step": 8711 + }, + { + "epoch": 0.17440132122213048, + "grad_norm": 1.008744239807129, + "learning_rate": 9.463113572343134e-06, + "loss": 0.3677, + "step": 8712 + }, + { + "epoch": 0.1744213397392588, + "grad_norm": 1.2345898151397705, + "learning_rate": 9.462967419808033e-06, + "loss": 0.3045, + "step": 8713 + }, + { + "epoch": 0.17444135825638715, + "grad_norm": 1.1194878816604614, + "learning_rate": 9.462821248511579e-06, + "loss": 0.3073, + "step": 8714 + }, + { + "epoch": 0.1744613767735155, + "grad_norm": 1.038956880569458, + "learning_rate": 9.46267505845438e-06, + "loss": 0.3326, + "step": 8715 + }, + { + "epoch": 0.17448139529064385, + "grad_norm": 1.1051274538040161, + "learning_rate": 9.46252884963706e-06, + "loss": 0.3364, + "step": 8716 + }, + { + "epoch": 0.17450141380777218, + "grad_norm": 1.0414713621139526, + "learning_rate": 9.46238262206023e-06, + "loss": 0.3628, + "step": 8717 + }, + { + "epoch": 0.17452143232490053, + "grad_norm": 1.4372111558914185, + "learning_rate": 9.462236375724499e-06, + "loss": 0.343, + "step": 8718 + }, + { + "epoch": 0.17454145084202888, + "grad_norm": 1.2261828184127808, + "learning_rate": 9.46209011063049e-06, + "loss": 0.3843, + "step": 8719 + }, + { + "epoch": 0.17456146935915723, + "grad_norm": 1.074965000152588, + "learning_rate": 9.461943826778813e-06, + "loss": 0.3693, + "step": 8720 + }, + { + "epoch": 0.17458148787628555, + "grad_norm": 1.0522022247314453, + "learning_rate": 9.461797524170085e-06, + "loss": 0.3733, + "step": 8721 + }, + { + "epoch": 0.1746015063934139, + "grad_norm": 1.932390809059143, + "learning_rate": 9.461651202804921e-06, + "loss": 0.7206, + "step": 8722 + }, + { + "epoch": 0.17462152491054225, + "grad_norm": 1.065623164176941, + "learning_rate": 9.461504862683934e-06, + "loss": 0.286, + "step": 8723 + }, + { + "epoch": 0.1746415434276706, + "grad_norm": 1.1624540090560913, + "learning_rate": 9.461358503807742e-06, + "loss": 0.3784, + "step": 8724 + }, + { + "epoch": 0.17466156194479893, + "grad_norm": 1.0686490535736084, + "learning_rate": 9.46121212617696e-06, + "loss": 0.3255, + "step": 8725 + }, + { + "epoch": 0.17468158046192728, + "grad_norm": 1.1424386501312256, + "learning_rate": 9.461065729792199e-06, + "loss": 0.3529, + "step": 8726 + }, + { + "epoch": 0.17470159897905563, + "grad_norm": 0.9989859461784363, + "learning_rate": 9.46091931465408e-06, + "loss": 0.3122, + "step": 8727 + }, + { + "epoch": 0.17472161749618398, + "grad_norm": 1.1180009841918945, + "learning_rate": 9.460772880763216e-06, + "loss": 0.3197, + "step": 8728 + }, + { + "epoch": 0.1747416360133123, + "grad_norm": 1.1209970712661743, + "learning_rate": 9.460626428120221e-06, + "loss": 0.3457, + "step": 8729 + }, + { + "epoch": 0.17476165453044065, + "grad_norm": 1.2269974946975708, + "learning_rate": 9.460479956725714e-06, + "loss": 0.3038, + "step": 8730 + }, + { + "epoch": 0.174781673047569, + "grad_norm": 1.242641568183899, + "learning_rate": 9.46033346658031e-06, + "loss": 0.3162, + "step": 8731 + }, + { + "epoch": 0.17480169156469735, + "grad_norm": 1.1262551546096802, + "learning_rate": 9.460186957684623e-06, + "loss": 0.3238, + "step": 8732 + }, + { + "epoch": 0.17482171008182568, + "grad_norm": 1.1109395027160645, + "learning_rate": 9.46004043003927e-06, + "loss": 0.3014, + "step": 8733 + }, + { + "epoch": 0.17484172859895403, + "grad_norm": 1.186954379081726, + "learning_rate": 9.459893883644865e-06, + "loss": 0.2986, + "step": 8734 + }, + { + "epoch": 0.17486174711608238, + "grad_norm": 1.2459241151809692, + "learning_rate": 9.459747318502029e-06, + "loss": 0.3634, + "step": 8735 + }, + { + "epoch": 0.17488176563321073, + "grad_norm": 1.109594464302063, + "learning_rate": 9.459600734611373e-06, + "loss": 0.3146, + "step": 8736 + }, + { + "epoch": 0.17490178415033905, + "grad_norm": 1.1002566814422607, + "learning_rate": 9.459454131973517e-06, + "loss": 0.3289, + "step": 8737 + }, + { + "epoch": 0.1749218026674674, + "grad_norm": 1.8264225721359253, + "learning_rate": 9.459307510589076e-06, + "loss": 0.8017, + "step": 8738 + }, + { + "epoch": 0.17494182118459575, + "grad_norm": 1.0063945055007935, + "learning_rate": 9.459160870458663e-06, + "loss": 0.3319, + "step": 8739 + }, + { + "epoch": 0.1749618397017241, + "grad_norm": 1.0820807218551636, + "learning_rate": 9.459014211582898e-06, + "loss": 0.3353, + "step": 8740 + }, + { + "epoch": 0.17498185821885243, + "grad_norm": 1.8313504457473755, + "learning_rate": 9.458867533962397e-06, + "loss": 0.7883, + "step": 8741 + }, + { + "epoch": 0.17500187673598078, + "grad_norm": 1.1830121278762817, + "learning_rate": 9.458720837597777e-06, + "loss": 0.3315, + "step": 8742 + }, + { + "epoch": 0.17502189525310913, + "grad_norm": 1.1197636127471924, + "learning_rate": 9.458574122489655e-06, + "loss": 0.3318, + "step": 8743 + }, + { + "epoch": 0.17504191377023748, + "grad_norm": 1.1192289590835571, + "learning_rate": 9.458427388638647e-06, + "loss": 0.349, + "step": 8744 + }, + { + "epoch": 0.1750619322873658, + "grad_norm": 1.8929544687271118, + "learning_rate": 9.458280636045367e-06, + "loss": 0.8256, + "step": 8745 + }, + { + "epoch": 0.17508195080449415, + "grad_norm": 1.2235618829727173, + "learning_rate": 9.458133864710437e-06, + "loss": 0.3644, + "step": 8746 + }, + { + "epoch": 0.1751019693216225, + "grad_norm": 1.1462231874465942, + "learning_rate": 9.457987074634471e-06, + "loss": 0.3756, + "step": 8747 + }, + { + "epoch": 0.17512198783875085, + "grad_norm": 1.0879154205322266, + "learning_rate": 9.457840265818086e-06, + "loss": 0.3369, + "step": 8748 + }, + { + "epoch": 0.17514200635587918, + "grad_norm": 1.0525081157684326, + "learning_rate": 9.457693438261901e-06, + "loss": 0.2955, + "step": 8749 + }, + { + "epoch": 0.17516202487300753, + "grad_norm": 1.204796314239502, + "learning_rate": 9.45754659196653e-06, + "loss": 0.3837, + "step": 8750 + }, + { + "epoch": 0.17518204339013588, + "grad_norm": 1.0704677104949951, + "learning_rate": 9.457399726932594e-06, + "loss": 0.3029, + "step": 8751 + }, + { + "epoch": 0.17520206190726423, + "grad_norm": 1.1102190017700195, + "learning_rate": 9.45725284316071e-06, + "loss": 0.3058, + "step": 8752 + }, + { + "epoch": 0.17522208042439255, + "grad_norm": 1.0412521362304688, + "learning_rate": 9.457105940651492e-06, + "loss": 0.2932, + "step": 8753 + }, + { + "epoch": 0.1752420989415209, + "grad_norm": 1.0096936225891113, + "learning_rate": 9.456959019405563e-06, + "loss": 0.3588, + "step": 8754 + }, + { + "epoch": 0.17526211745864925, + "grad_norm": 1.055399775505066, + "learning_rate": 9.456812079423534e-06, + "loss": 0.3443, + "step": 8755 + }, + { + "epoch": 0.1752821359757776, + "grad_norm": 1.040313482284546, + "learning_rate": 9.456665120706028e-06, + "loss": 0.3642, + "step": 8756 + }, + { + "epoch": 0.17530215449290593, + "grad_norm": 1.1801862716674805, + "learning_rate": 9.456518143253659e-06, + "loss": 0.3664, + "step": 8757 + }, + { + "epoch": 0.17532217301003428, + "grad_norm": 1.0395174026489258, + "learning_rate": 9.45637114706705e-06, + "loss": 0.2675, + "step": 8758 + }, + { + "epoch": 0.17534219152716263, + "grad_norm": 1.0536623001098633, + "learning_rate": 9.456224132146813e-06, + "loss": 0.3279, + "step": 8759 + }, + { + "epoch": 0.17536221004429098, + "grad_norm": 1.240739345550537, + "learning_rate": 9.456077098493568e-06, + "loss": 0.3498, + "step": 8760 + }, + { + "epoch": 0.1753822285614193, + "grad_norm": 1.2606383562088013, + "learning_rate": 9.455930046107938e-06, + "loss": 0.3391, + "step": 8761 + }, + { + "epoch": 0.17540224707854765, + "grad_norm": 1.0581316947937012, + "learning_rate": 9.455782974990533e-06, + "loss": 0.3317, + "step": 8762 + }, + { + "epoch": 0.175422265595676, + "grad_norm": 1.3458576202392578, + "learning_rate": 9.455635885141976e-06, + "loss": 0.3221, + "step": 8763 + }, + { + "epoch": 0.17544228411280435, + "grad_norm": 1.0921944379806519, + "learning_rate": 9.455488776562886e-06, + "loss": 0.3316, + "step": 8764 + }, + { + "epoch": 0.17546230262993268, + "grad_norm": 1.0017658472061157, + "learning_rate": 9.45534164925388e-06, + "loss": 0.3539, + "step": 8765 + }, + { + "epoch": 0.17548232114706103, + "grad_norm": 1.061270833015442, + "learning_rate": 9.455194503215575e-06, + "loss": 0.3785, + "step": 8766 + }, + { + "epoch": 0.17550233966418938, + "grad_norm": 1.1002299785614014, + "learning_rate": 9.455047338448593e-06, + "loss": 0.3598, + "step": 8767 + }, + { + "epoch": 0.17552235818131773, + "grad_norm": 1.0830715894699097, + "learning_rate": 9.454900154953548e-06, + "loss": 0.2816, + "step": 8768 + }, + { + "epoch": 0.17554237669844605, + "grad_norm": 1.2115079164505005, + "learning_rate": 9.454752952731063e-06, + "loss": 0.33, + "step": 8769 + }, + { + "epoch": 0.1755623952155744, + "grad_norm": 1.1007877588272095, + "learning_rate": 9.454605731781756e-06, + "loss": 0.3013, + "step": 8770 + }, + { + "epoch": 0.17558241373270275, + "grad_norm": 1.1791163682937622, + "learning_rate": 9.454458492106245e-06, + "loss": 0.3694, + "step": 8771 + }, + { + "epoch": 0.1756024322498311, + "grad_norm": 1.0480252504348755, + "learning_rate": 9.454311233705147e-06, + "loss": 0.3347, + "step": 8772 + }, + { + "epoch": 0.17562245076695943, + "grad_norm": 1.284185528755188, + "learning_rate": 9.454163956579085e-06, + "loss": 0.3245, + "step": 8773 + }, + { + "epoch": 0.17564246928408778, + "grad_norm": 1.073176383972168, + "learning_rate": 9.454016660728677e-06, + "loss": 0.3071, + "step": 8774 + }, + { + "epoch": 0.17566248780121613, + "grad_norm": 1.0831624269485474, + "learning_rate": 9.45386934615454e-06, + "loss": 0.3263, + "step": 8775 + }, + { + "epoch": 0.17568250631834448, + "grad_norm": 1.8582913875579834, + "learning_rate": 9.453722012857295e-06, + "loss": 0.8682, + "step": 8776 + }, + { + "epoch": 0.1757025248354728, + "grad_norm": 1.2010111808776855, + "learning_rate": 9.453574660837561e-06, + "loss": 0.3259, + "step": 8777 + }, + { + "epoch": 0.17572254335260115, + "grad_norm": 1.1203081607818604, + "learning_rate": 9.453427290095957e-06, + "loss": 0.3654, + "step": 8778 + }, + { + "epoch": 0.1757425618697295, + "grad_norm": 1.1059248447418213, + "learning_rate": 9.453279900633105e-06, + "loss": 0.3352, + "step": 8779 + }, + { + "epoch": 0.17576258038685785, + "grad_norm": 1.1053162813186646, + "learning_rate": 9.453132492449621e-06, + "loss": 0.3024, + "step": 8780 + }, + { + "epoch": 0.17578259890398618, + "grad_norm": 1.118826150894165, + "learning_rate": 9.452985065546128e-06, + "loss": 0.3258, + "step": 8781 + }, + { + "epoch": 0.17580261742111453, + "grad_norm": 1.0814738273620605, + "learning_rate": 9.452837619923243e-06, + "loss": 0.3292, + "step": 8782 + }, + { + "epoch": 0.17582263593824288, + "grad_norm": 1.016046404838562, + "learning_rate": 9.452690155581588e-06, + "loss": 0.3285, + "step": 8783 + }, + { + "epoch": 0.17584265445537123, + "grad_norm": 1.1855944395065308, + "learning_rate": 9.45254267252178e-06, + "loss": 0.3743, + "step": 8784 + }, + { + "epoch": 0.17586267297249955, + "grad_norm": 1.00544273853302, + "learning_rate": 9.452395170744442e-06, + "loss": 0.3225, + "step": 8785 + }, + { + "epoch": 0.1758826914896279, + "grad_norm": 0.9647132754325867, + "learning_rate": 9.452247650250191e-06, + "loss": 0.3374, + "step": 8786 + }, + { + "epoch": 0.17590271000675625, + "grad_norm": 0.9908398985862732, + "learning_rate": 9.45210011103965e-06, + "loss": 0.3256, + "step": 8787 + }, + { + "epoch": 0.1759227285238846, + "grad_norm": 1.1472265720367432, + "learning_rate": 9.451952553113438e-06, + "loss": 0.3239, + "step": 8788 + }, + { + "epoch": 0.17594274704101293, + "grad_norm": 1.071740746498108, + "learning_rate": 9.451804976472178e-06, + "loss": 0.3015, + "step": 8789 + }, + { + "epoch": 0.17596276555814128, + "grad_norm": 1.0840173959732056, + "learning_rate": 9.451657381116486e-06, + "loss": 0.3368, + "step": 8790 + }, + { + "epoch": 0.17598278407526963, + "grad_norm": 1.2743760347366333, + "learning_rate": 9.451509767046985e-06, + "loss": 0.3346, + "step": 8791 + }, + { + "epoch": 0.17600280259239798, + "grad_norm": 1.0656158924102783, + "learning_rate": 9.451362134264294e-06, + "loss": 0.3212, + "step": 8792 + }, + { + "epoch": 0.1760228211095263, + "grad_norm": 2.065422534942627, + "learning_rate": 9.451214482769034e-06, + "loss": 0.8508, + "step": 8793 + }, + { + "epoch": 0.17604283962665465, + "grad_norm": 1.2046005725860596, + "learning_rate": 9.451066812561827e-06, + "loss": 0.3706, + "step": 8794 + }, + { + "epoch": 0.176062858143783, + "grad_norm": 1.1837447881698608, + "learning_rate": 9.450919123643293e-06, + "loss": 0.3709, + "step": 8795 + }, + { + "epoch": 0.17608287666091135, + "grad_norm": 1.0630128383636475, + "learning_rate": 9.450771416014054e-06, + "loss": 0.3497, + "step": 8796 + }, + { + "epoch": 0.17610289517803968, + "grad_norm": 1.1025415658950806, + "learning_rate": 9.450623689674729e-06, + "loss": 0.332, + "step": 8797 + }, + { + "epoch": 0.17612291369516803, + "grad_norm": 1.1202142238616943, + "learning_rate": 9.450475944625939e-06, + "loss": 0.3695, + "step": 8798 + }, + { + "epoch": 0.17614293221229638, + "grad_norm": 1.0479586124420166, + "learning_rate": 9.450328180868307e-06, + "loss": 0.3028, + "step": 8799 + }, + { + "epoch": 0.17616295072942473, + "grad_norm": 1.292597770690918, + "learning_rate": 9.450180398402452e-06, + "loss": 0.3416, + "step": 8800 + }, + { + "epoch": 0.17618296924655305, + "grad_norm": 1.1951143741607666, + "learning_rate": 9.450032597228998e-06, + "loss": 0.3964, + "step": 8801 + }, + { + "epoch": 0.1762029877636814, + "grad_norm": 1.177708625793457, + "learning_rate": 9.449884777348562e-06, + "loss": 0.3218, + "step": 8802 + }, + { + "epoch": 0.17622300628080975, + "grad_norm": 1.1072311401367188, + "learning_rate": 9.44973693876177e-06, + "loss": 0.2739, + "step": 8803 + }, + { + "epoch": 0.1762430247979381, + "grad_norm": 1.8351500034332275, + "learning_rate": 9.44958908146924e-06, + "loss": 0.8865, + "step": 8804 + }, + { + "epoch": 0.17626304331506643, + "grad_norm": 1.149023413658142, + "learning_rate": 9.449441205471597e-06, + "loss": 0.3341, + "step": 8805 + }, + { + "epoch": 0.17628306183219478, + "grad_norm": 1.250559687614441, + "learning_rate": 9.44929331076946e-06, + "loss": 0.3032, + "step": 8806 + }, + { + "epoch": 0.17630308034932313, + "grad_norm": 1.1611579656600952, + "learning_rate": 9.449145397363451e-06, + "loss": 0.2958, + "step": 8807 + }, + { + "epoch": 0.17632309886645148, + "grad_norm": 1.134872317314148, + "learning_rate": 9.448997465254192e-06, + "loss": 0.3681, + "step": 8808 + }, + { + "epoch": 0.1763431173835798, + "grad_norm": 1.0720412731170654, + "learning_rate": 9.448849514442305e-06, + "loss": 0.3384, + "step": 8809 + }, + { + "epoch": 0.17636313590070815, + "grad_norm": 1.1651616096496582, + "learning_rate": 9.448701544928412e-06, + "loss": 0.3524, + "step": 8810 + }, + { + "epoch": 0.1763831544178365, + "grad_norm": 1.0400514602661133, + "learning_rate": 9.448553556713136e-06, + "loss": 0.3738, + "step": 8811 + }, + { + "epoch": 0.17640317293496485, + "grad_norm": 1.1243940591812134, + "learning_rate": 9.448405549797097e-06, + "loss": 0.3596, + "step": 8812 + }, + { + "epoch": 0.17642319145209318, + "grad_norm": 1.0786904096603394, + "learning_rate": 9.44825752418092e-06, + "loss": 0.3129, + "step": 8813 + }, + { + "epoch": 0.17644320996922153, + "grad_norm": 1.1613701581954956, + "learning_rate": 9.448109479865223e-06, + "loss": 0.3571, + "step": 8814 + }, + { + "epoch": 0.17646322848634988, + "grad_norm": 1.0670504570007324, + "learning_rate": 9.447961416850634e-06, + "loss": 0.3213, + "step": 8815 + }, + { + "epoch": 0.17648324700347823, + "grad_norm": 1.057228922843933, + "learning_rate": 9.447813335137773e-06, + "loss": 0.3071, + "step": 8816 + }, + { + "epoch": 0.17650326552060655, + "grad_norm": 1.2174181938171387, + "learning_rate": 9.447665234727258e-06, + "loss": 0.3361, + "step": 8817 + }, + { + "epoch": 0.1765232840377349, + "grad_norm": 1.0695263147354126, + "learning_rate": 9.447517115619718e-06, + "loss": 0.2802, + "step": 8818 + }, + { + "epoch": 0.17654330255486325, + "grad_norm": 1.7952532768249512, + "learning_rate": 9.447368977815772e-06, + "loss": 0.8242, + "step": 8819 + }, + { + "epoch": 0.1765633210719916, + "grad_norm": 1.0877163410186768, + "learning_rate": 9.447220821316046e-06, + "loss": 0.3455, + "step": 8820 + }, + { + "epoch": 0.17658333958911993, + "grad_norm": 1.129256248474121, + "learning_rate": 9.447072646121159e-06, + "loss": 0.3285, + "step": 8821 + }, + { + "epoch": 0.17660335810624828, + "grad_norm": 1.116456389427185, + "learning_rate": 9.446924452231736e-06, + "loss": 0.3626, + "step": 8822 + }, + { + "epoch": 0.17662337662337663, + "grad_norm": 1.0009442567825317, + "learning_rate": 9.4467762396484e-06, + "loss": 0.2838, + "step": 8823 + }, + { + "epoch": 0.17664339514050498, + "grad_norm": 1.761358380317688, + "learning_rate": 9.446628008371772e-06, + "loss": 0.8924, + "step": 8824 + }, + { + "epoch": 0.1766634136576333, + "grad_norm": 1.1800496578216553, + "learning_rate": 9.446479758402478e-06, + "loss": 0.2895, + "step": 8825 + }, + { + "epoch": 0.17668343217476165, + "grad_norm": 1.1064062118530273, + "learning_rate": 9.446331489741141e-06, + "loss": 0.285, + "step": 8826 + }, + { + "epoch": 0.17670345069189, + "grad_norm": 1.224461555480957, + "learning_rate": 9.446183202388382e-06, + "loss": 0.3318, + "step": 8827 + }, + { + "epoch": 0.17672346920901835, + "grad_norm": 1.1504580974578857, + "learning_rate": 9.446034896344827e-06, + "loss": 0.3489, + "step": 8828 + }, + { + "epoch": 0.17674348772614668, + "grad_norm": 1.088697075843811, + "learning_rate": 9.445886571611096e-06, + "loss": 0.2731, + "step": 8829 + }, + { + "epoch": 0.17676350624327503, + "grad_norm": 1.1590601205825806, + "learning_rate": 9.445738228187815e-06, + "loss": 0.2893, + "step": 8830 + }, + { + "epoch": 0.17678352476040338, + "grad_norm": 1.9407397508621216, + "learning_rate": 9.445589866075608e-06, + "loss": 0.8467, + "step": 8831 + }, + { + "epoch": 0.17680354327753173, + "grad_norm": 1.2505663633346558, + "learning_rate": 9.445441485275098e-06, + "loss": 0.327, + "step": 8832 + }, + { + "epoch": 0.17682356179466005, + "grad_norm": 1.128652572631836, + "learning_rate": 9.445293085786909e-06, + "loss": 0.3312, + "step": 8833 + }, + { + "epoch": 0.1768435803117884, + "grad_norm": 1.1249761581420898, + "learning_rate": 9.445144667611663e-06, + "loss": 0.3417, + "step": 8834 + }, + { + "epoch": 0.17686359882891675, + "grad_norm": 1.082648754119873, + "learning_rate": 9.444996230749987e-06, + "loss": 0.3352, + "step": 8835 + }, + { + "epoch": 0.1768836173460451, + "grad_norm": 1.2222235202789307, + "learning_rate": 9.444847775202504e-06, + "loss": 0.3497, + "step": 8836 + }, + { + "epoch": 0.17690363586317343, + "grad_norm": 1.069444179534912, + "learning_rate": 9.444699300969836e-06, + "loss": 0.346, + "step": 8837 + }, + { + "epoch": 0.17692365438030178, + "grad_norm": 1.097232460975647, + "learning_rate": 9.444550808052609e-06, + "loss": 0.3129, + "step": 8838 + }, + { + "epoch": 0.17694367289743013, + "grad_norm": 1.116456389427185, + "learning_rate": 9.444402296451446e-06, + "loss": 0.3297, + "step": 8839 + }, + { + "epoch": 0.17696369141455848, + "grad_norm": 1.0560048818588257, + "learning_rate": 9.444253766166973e-06, + "loss": 0.3255, + "step": 8840 + }, + { + "epoch": 0.1769837099316868, + "grad_norm": 1.8809410333633423, + "learning_rate": 9.444105217199812e-06, + "loss": 0.8564, + "step": 8841 + }, + { + "epoch": 0.17700372844881515, + "grad_norm": 1.1902815103530884, + "learning_rate": 9.443956649550591e-06, + "loss": 0.3182, + "step": 8842 + }, + { + "epoch": 0.1770237469659435, + "grad_norm": 1.259452223777771, + "learning_rate": 9.443808063219933e-06, + "loss": 0.3098, + "step": 8843 + }, + { + "epoch": 0.17704376548307185, + "grad_norm": 1.1932246685028076, + "learning_rate": 9.44365945820846e-06, + "loss": 0.4065, + "step": 8844 + }, + { + "epoch": 0.17706378400020018, + "grad_norm": 1.0844146013259888, + "learning_rate": 9.443510834516801e-06, + "loss": 0.3572, + "step": 8845 + }, + { + "epoch": 0.17708380251732853, + "grad_norm": 1.0124086141586304, + "learning_rate": 9.443362192145578e-06, + "loss": 0.294, + "step": 8846 + }, + { + "epoch": 0.17710382103445688, + "grad_norm": 1.0440075397491455, + "learning_rate": 9.443213531095418e-06, + "loss": 0.3283, + "step": 8847 + }, + { + "epoch": 0.17712383955158523, + "grad_norm": 1.1457792520523071, + "learning_rate": 9.443064851366943e-06, + "loss": 0.3506, + "step": 8848 + }, + { + "epoch": 0.17714385806871355, + "grad_norm": 2.875674247741699, + "learning_rate": 9.44291615296078e-06, + "loss": 0.8806, + "step": 8849 + }, + { + "epoch": 0.1771638765858419, + "grad_norm": 1.0879716873168945, + "learning_rate": 9.442767435877553e-06, + "loss": 0.3314, + "step": 8850 + }, + { + "epoch": 0.17718389510297025, + "grad_norm": 1.0268728733062744, + "learning_rate": 9.442618700117887e-06, + "loss": 0.2968, + "step": 8851 + }, + { + "epoch": 0.1772039136200986, + "grad_norm": 1.092078447341919, + "learning_rate": 9.442469945682409e-06, + "loss": 0.3547, + "step": 8852 + }, + { + "epoch": 0.17722393213722692, + "grad_norm": 1.0708366632461548, + "learning_rate": 9.442321172571745e-06, + "loss": 0.3423, + "step": 8853 + }, + { + "epoch": 0.17724395065435528, + "grad_norm": 1.048056721687317, + "learning_rate": 9.442172380786517e-06, + "loss": 0.3029, + "step": 8854 + }, + { + "epoch": 0.17726396917148363, + "grad_norm": 1.891675353050232, + "learning_rate": 9.442023570327353e-06, + "loss": 0.8483, + "step": 8855 + }, + { + "epoch": 0.17728398768861198, + "grad_norm": 0.949200451374054, + "learning_rate": 9.441874741194877e-06, + "loss": 0.3123, + "step": 8856 + }, + { + "epoch": 0.1773040062057403, + "grad_norm": 1.0300564765930176, + "learning_rate": 9.441725893389715e-06, + "loss": 0.3116, + "step": 8857 + }, + { + "epoch": 0.17732402472286865, + "grad_norm": 1.743106722831726, + "learning_rate": 9.441577026912493e-06, + "loss": 0.8799, + "step": 8858 + }, + { + "epoch": 0.177344043239997, + "grad_norm": 1.0633968114852905, + "learning_rate": 9.44142814176384e-06, + "loss": 0.3071, + "step": 8859 + }, + { + "epoch": 0.17736406175712535, + "grad_norm": 1.109047532081604, + "learning_rate": 9.441279237944377e-06, + "loss": 0.3307, + "step": 8860 + }, + { + "epoch": 0.17738408027425367, + "grad_norm": 1.044442057609558, + "learning_rate": 9.441130315454731e-06, + "loss": 0.3259, + "step": 8861 + }, + { + "epoch": 0.17740409879138203, + "grad_norm": 1.172340750694275, + "learning_rate": 9.440981374295531e-06, + "loss": 0.3864, + "step": 8862 + }, + { + "epoch": 0.17742411730851038, + "grad_norm": 1.1395457983016968, + "learning_rate": 9.440832414467399e-06, + "loss": 0.2918, + "step": 8863 + }, + { + "epoch": 0.17744413582563873, + "grad_norm": 1.1358901262283325, + "learning_rate": 9.440683435970963e-06, + "loss": 0.3742, + "step": 8864 + }, + { + "epoch": 0.17746415434276705, + "grad_norm": 1.2349404096603394, + "learning_rate": 9.440534438806852e-06, + "loss": 0.3538, + "step": 8865 + }, + { + "epoch": 0.1774841728598954, + "grad_norm": 1.0774716138839722, + "learning_rate": 9.440385422975688e-06, + "loss": 0.3411, + "step": 8866 + }, + { + "epoch": 0.17750419137702375, + "grad_norm": 1.7963073253631592, + "learning_rate": 9.440236388478099e-06, + "loss": 0.8909, + "step": 8867 + }, + { + "epoch": 0.1775242098941521, + "grad_norm": 1.2271299362182617, + "learning_rate": 9.440087335314713e-06, + "loss": 0.3667, + "step": 8868 + }, + { + "epoch": 0.17754422841128042, + "grad_norm": 1.0882755517959595, + "learning_rate": 9.439938263486154e-06, + "loss": 0.3339, + "step": 8869 + }, + { + "epoch": 0.17756424692840878, + "grad_norm": 1.00246262550354, + "learning_rate": 9.439789172993051e-06, + "loss": 0.2848, + "step": 8870 + }, + { + "epoch": 0.17758426544553713, + "grad_norm": 1.1039965152740479, + "learning_rate": 9.43964006383603e-06, + "loss": 0.353, + "step": 8871 + }, + { + "epoch": 0.17760428396266548, + "grad_norm": 1.263123869895935, + "learning_rate": 9.439490936015716e-06, + "loss": 0.3232, + "step": 8872 + }, + { + "epoch": 0.1776243024797938, + "grad_norm": 1.0046221017837524, + "learning_rate": 9.439341789532739e-06, + "loss": 0.3055, + "step": 8873 + }, + { + "epoch": 0.17764432099692215, + "grad_norm": 1.1110798120498657, + "learning_rate": 9.439192624387724e-06, + "loss": 0.3254, + "step": 8874 + }, + { + "epoch": 0.1776643395140505, + "grad_norm": 1.3071876764297485, + "learning_rate": 9.439043440581299e-06, + "loss": 0.3208, + "step": 8875 + }, + { + "epoch": 0.17768435803117885, + "grad_norm": 0.9964504837989807, + "learning_rate": 9.43889423811409e-06, + "loss": 0.3089, + "step": 8876 + }, + { + "epoch": 0.17770437654830717, + "grad_norm": 1.0892573595046997, + "learning_rate": 9.438745016986725e-06, + "loss": 0.3297, + "step": 8877 + }, + { + "epoch": 0.17772439506543553, + "grad_norm": 1.0501455068588257, + "learning_rate": 9.438595777199832e-06, + "loss": 0.3142, + "step": 8878 + }, + { + "epoch": 0.17774441358256388, + "grad_norm": 1.0539096593856812, + "learning_rate": 9.438446518754036e-06, + "loss": 0.3029, + "step": 8879 + }, + { + "epoch": 0.17776443209969223, + "grad_norm": 1.0858958959579468, + "learning_rate": 9.438297241649968e-06, + "loss": 0.3021, + "step": 8880 + }, + { + "epoch": 0.17778445061682055, + "grad_norm": 1.0700680017471313, + "learning_rate": 9.438147945888252e-06, + "loss": 0.36, + "step": 8881 + }, + { + "epoch": 0.1778044691339489, + "grad_norm": 1.0779495239257812, + "learning_rate": 9.437998631469518e-06, + "loss": 0.3363, + "step": 8882 + }, + { + "epoch": 0.17782448765107725, + "grad_norm": 1.1820441484451294, + "learning_rate": 9.437849298394392e-06, + "loss": 0.3021, + "step": 8883 + }, + { + "epoch": 0.1778445061682056, + "grad_norm": 1.1718437671661377, + "learning_rate": 9.437699946663502e-06, + "loss": 0.3119, + "step": 8884 + }, + { + "epoch": 0.17786452468533392, + "grad_norm": 1.079917550086975, + "learning_rate": 9.437550576277477e-06, + "loss": 0.3537, + "step": 8885 + }, + { + "epoch": 0.17788454320246228, + "grad_norm": 1.3618475198745728, + "learning_rate": 9.437401187236945e-06, + "loss": 0.3277, + "step": 8886 + }, + { + "epoch": 0.17790456171959063, + "grad_norm": 1.1673811674118042, + "learning_rate": 9.437251779542534e-06, + "loss": 0.3321, + "step": 8887 + }, + { + "epoch": 0.17792458023671898, + "grad_norm": 1.974327564239502, + "learning_rate": 9.437102353194869e-06, + "loss": 0.8261, + "step": 8888 + }, + { + "epoch": 0.1779445987538473, + "grad_norm": 1.0888655185699463, + "learning_rate": 9.436952908194582e-06, + "loss": 0.347, + "step": 8889 + }, + { + "epoch": 0.17796461727097565, + "grad_norm": 1.1224925518035889, + "learning_rate": 9.436803444542302e-06, + "loss": 0.3277, + "step": 8890 + }, + { + "epoch": 0.177984635788104, + "grad_norm": 1.943023443222046, + "learning_rate": 9.436653962238653e-06, + "loss": 0.8516, + "step": 8891 + }, + { + "epoch": 0.17800465430523235, + "grad_norm": 1.063457727432251, + "learning_rate": 9.436504461284264e-06, + "loss": 0.3107, + "step": 8892 + }, + { + "epoch": 0.17802467282236067, + "grad_norm": 1.3029613494873047, + "learning_rate": 9.436354941679766e-06, + "loss": 0.3343, + "step": 8893 + }, + { + "epoch": 0.17804469133948903, + "grad_norm": 2.1062185764312744, + "learning_rate": 9.436205403425789e-06, + "loss": 0.8002, + "step": 8894 + }, + { + "epoch": 0.17806470985661738, + "grad_norm": 1.0588444471359253, + "learning_rate": 9.436055846522955e-06, + "loss": 0.3588, + "step": 8895 + }, + { + "epoch": 0.17808472837374573, + "grad_norm": 1.0788609981536865, + "learning_rate": 9.4359062709719e-06, + "loss": 0.3221, + "step": 8896 + }, + { + "epoch": 0.17810474689087405, + "grad_norm": 1.1996396780014038, + "learning_rate": 9.435756676773248e-06, + "loss": 0.3214, + "step": 8897 + }, + { + "epoch": 0.1781247654080024, + "grad_norm": 2.057831048965454, + "learning_rate": 9.435607063927632e-06, + "loss": 0.7959, + "step": 8898 + }, + { + "epoch": 0.17814478392513075, + "grad_norm": 1.2746669054031372, + "learning_rate": 9.435457432435677e-06, + "loss": 0.3435, + "step": 8899 + }, + { + "epoch": 0.1781648024422591, + "grad_norm": 1.0745645761489868, + "learning_rate": 9.435307782298014e-06, + "loss": 0.3526, + "step": 8900 + }, + { + "epoch": 0.17818482095938742, + "grad_norm": 1.0415700674057007, + "learning_rate": 9.435158113515269e-06, + "loss": 0.3244, + "step": 8901 + }, + { + "epoch": 0.17820483947651578, + "grad_norm": 1.132400393486023, + "learning_rate": 9.435008426088076e-06, + "loss": 0.3246, + "step": 8902 + }, + { + "epoch": 0.17822485799364413, + "grad_norm": 1.0760810375213623, + "learning_rate": 9.434858720017062e-06, + "loss": 0.3439, + "step": 8903 + }, + { + "epoch": 0.17824487651077248, + "grad_norm": 1.2755604982376099, + "learning_rate": 9.434708995302854e-06, + "loss": 0.378, + "step": 8904 + }, + { + "epoch": 0.1782648950279008, + "grad_norm": 1.0089625120162964, + "learning_rate": 9.434559251946087e-06, + "loss": 0.3323, + "step": 8905 + }, + { + "epoch": 0.17828491354502915, + "grad_norm": 1.1450735330581665, + "learning_rate": 9.434409489947384e-06, + "loss": 0.2931, + "step": 8906 + }, + { + "epoch": 0.1783049320621575, + "grad_norm": 2.1012275218963623, + "learning_rate": 9.43425970930738e-06, + "loss": 0.8312, + "step": 8907 + }, + { + "epoch": 0.17832495057928585, + "grad_norm": 1.0760927200317383, + "learning_rate": 9.434109910026702e-06, + "loss": 0.3625, + "step": 8908 + }, + { + "epoch": 0.17834496909641417, + "grad_norm": 1.1146876811981201, + "learning_rate": 9.433960092105982e-06, + "loss": 0.316, + "step": 8909 + }, + { + "epoch": 0.17836498761354252, + "grad_norm": 1.0919535160064697, + "learning_rate": 9.433810255545846e-06, + "loss": 0.3247, + "step": 8910 + }, + { + "epoch": 0.17838500613067088, + "grad_norm": 1.9363322257995605, + "learning_rate": 9.433660400346926e-06, + "loss": 0.8283, + "step": 8911 + }, + { + "epoch": 0.17840502464779923, + "grad_norm": 1.1514781713485718, + "learning_rate": 9.433510526509852e-06, + "loss": 0.3171, + "step": 8912 + }, + { + "epoch": 0.17842504316492755, + "grad_norm": 1.1018341779708862, + "learning_rate": 9.433360634035253e-06, + "loss": 0.3077, + "step": 8913 + }, + { + "epoch": 0.1784450616820559, + "grad_norm": 1.028151273727417, + "learning_rate": 9.433210722923761e-06, + "loss": 0.3718, + "step": 8914 + }, + { + "epoch": 0.17846508019918425, + "grad_norm": 1.3677985668182373, + "learning_rate": 9.433060793176006e-06, + "loss": 0.3017, + "step": 8915 + }, + { + "epoch": 0.1784850987163126, + "grad_norm": 1.0502687692642212, + "learning_rate": 9.432910844792616e-06, + "loss": 0.3304, + "step": 8916 + }, + { + "epoch": 0.17850511723344092, + "grad_norm": 0.9999079704284668, + "learning_rate": 9.432760877774222e-06, + "loss": 0.3095, + "step": 8917 + }, + { + "epoch": 0.17852513575056927, + "grad_norm": 1.0518256425857544, + "learning_rate": 9.432610892121457e-06, + "loss": 0.3297, + "step": 8918 + }, + { + "epoch": 0.17854515426769763, + "grad_norm": 1.093801736831665, + "learning_rate": 9.432460887834948e-06, + "loss": 0.3511, + "step": 8919 + }, + { + "epoch": 0.17856517278482598, + "grad_norm": 1.8174166679382324, + "learning_rate": 9.432310864915329e-06, + "loss": 0.8089, + "step": 8920 + }, + { + "epoch": 0.1785851913019543, + "grad_norm": 1.1327770948410034, + "learning_rate": 9.432160823363228e-06, + "loss": 0.3386, + "step": 8921 + }, + { + "epoch": 0.17860520981908265, + "grad_norm": 1.1119129657745361, + "learning_rate": 9.432010763179279e-06, + "loss": 0.3606, + "step": 8922 + }, + { + "epoch": 0.178625228336211, + "grad_norm": 1.054107904434204, + "learning_rate": 9.431860684364108e-06, + "loss": 0.3424, + "step": 8923 + }, + { + "epoch": 0.17864524685333935, + "grad_norm": 1.0960084199905396, + "learning_rate": 9.431710586918347e-06, + "loss": 0.3374, + "step": 8924 + }, + { + "epoch": 0.17866526537046767, + "grad_norm": 1.2222559452056885, + "learning_rate": 9.43156047084263e-06, + "loss": 0.306, + "step": 8925 + }, + { + "epoch": 0.17868528388759602, + "grad_norm": 1.9784268140792847, + "learning_rate": 9.431410336137588e-06, + "loss": 0.7901, + "step": 8926 + }, + { + "epoch": 0.17870530240472438, + "grad_norm": 1.1525381803512573, + "learning_rate": 9.43126018280385e-06, + "loss": 0.325, + "step": 8927 + }, + { + "epoch": 0.17872532092185273, + "grad_norm": 1.063774585723877, + "learning_rate": 9.431110010842046e-06, + "loss": 0.3397, + "step": 8928 + }, + { + "epoch": 0.17874533943898105, + "grad_norm": 1.3100377321243286, + "learning_rate": 9.43095982025281e-06, + "loss": 0.3699, + "step": 8929 + }, + { + "epoch": 0.1787653579561094, + "grad_norm": 1.0935413837432861, + "learning_rate": 9.430809611036774e-06, + "loss": 0.289, + "step": 8930 + }, + { + "epoch": 0.17878537647323775, + "grad_norm": 1.0593583583831787, + "learning_rate": 9.430659383194565e-06, + "loss": 0.3403, + "step": 8931 + }, + { + "epoch": 0.1788053949903661, + "grad_norm": 1.9509830474853516, + "learning_rate": 9.43050913672682e-06, + "loss": 0.821, + "step": 8932 + }, + { + "epoch": 0.17882541350749442, + "grad_norm": 1.1397701501846313, + "learning_rate": 9.430358871634168e-06, + "loss": 0.3316, + "step": 8933 + }, + { + "epoch": 0.17884543202462277, + "grad_norm": 1.1151237487792969, + "learning_rate": 9.43020858791724e-06, + "loss": 0.3671, + "step": 8934 + }, + { + "epoch": 0.17886545054175113, + "grad_norm": 1.2777297496795654, + "learning_rate": 9.430058285576668e-06, + "loss": 0.2868, + "step": 8935 + }, + { + "epoch": 0.17888546905887948, + "grad_norm": 1.1341278553009033, + "learning_rate": 9.429907964613084e-06, + "loss": 0.3585, + "step": 8936 + }, + { + "epoch": 0.1789054875760078, + "grad_norm": 1.1628742218017578, + "learning_rate": 9.429757625027121e-06, + "loss": 0.3365, + "step": 8937 + }, + { + "epoch": 0.17892550609313615, + "grad_norm": 1.1111221313476562, + "learning_rate": 9.42960726681941e-06, + "loss": 0.3465, + "step": 8938 + }, + { + "epoch": 0.1789455246102645, + "grad_norm": 1.0492280721664429, + "learning_rate": 9.429456889990585e-06, + "loss": 0.3174, + "step": 8939 + }, + { + "epoch": 0.17896554312739285, + "grad_norm": 1.188877820968628, + "learning_rate": 9.429306494541273e-06, + "loss": 0.3095, + "step": 8940 + }, + { + "epoch": 0.17898556164452117, + "grad_norm": 1.2017326354980469, + "learning_rate": 9.429156080472114e-06, + "loss": 0.3279, + "step": 8941 + }, + { + "epoch": 0.17900558016164952, + "grad_norm": 1.8570116758346558, + "learning_rate": 9.429005647783733e-06, + "loss": 0.8661, + "step": 8942 + }, + { + "epoch": 0.17902559867877788, + "grad_norm": 1.1362818479537964, + "learning_rate": 9.428855196476765e-06, + "loss": 0.3437, + "step": 8943 + }, + { + "epoch": 0.17904561719590623, + "grad_norm": 1.1524747610092163, + "learning_rate": 9.428704726551846e-06, + "loss": 0.3923, + "step": 8944 + }, + { + "epoch": 0.17906563571303455, + "grad_norm": 1.177388310432434, + "learning_rate": 9.428554238009601e-06, + "loss": 0.3652, + "step": 8945 + }, + { + "epoch": 0.1790856542301629, + "grad_norm": 1.176499843597412, + "learning_rate": 9.42840373085067e-06, + "loss": 0.3314, + "step": 8946 + }, + { + "epoch": 0.17910567274729125, + "grad_norm": 1.1180553436279297, + "learning_rate": 9.428253205075681e-06, + "loss": 0.3374, + "step": 8947 + }, + { + "epoch": 0.1791256912644196, + "grad_norm": 1.175363540649414, + "learning_rate": 9.42810266068527e-06, + "loss": 0.2888, + "step": 8948 + }, + { + "epoch": 0.17914570978154792, + "grad_norm": 1.112331509590149, + "learning_rate": 9.427952097680069e-06, + "loss": 0.3574, + "step": 8949 + }, + { + "epoch": 0.17916572829867627, + "grad_norm": 1.0989550352096558, + "learning_rate": 9.427801516060709e-06, + "loss": 0.3574, + "step": 8950 + }, + { + "epoch": 0.17918574681580463, + "grad_norm": 1.1007795333862305, + "learning_rate": 9.427650915827824e-06, + "loss": 0.3898, + "step": 8951 + }, + { + "epoch": 0.17920576533293298, + "grad_norm": 1.1509406566619873, + "learning_rate": 9.427500296982048e-06, + "loss": 0.3451, + "step": 8952 + }, + { + "epoch": 0.1792257838500613, + "grad_norm": 0.9942290782928467, + "learning_rate": 9.427349659524015e-06, + "loss": 0.3023, + "step": 8953 + }, + { + "epoch": 0.17924580236718965, + "grad_norm": 1.0373649597167969, + "learning_rate": 9.427199003454354e-06, + "loss": 0.2904, + "step": 8954 + }, + { + "epoch": 0.179265820884318, + "grad_norm": 1.1747677326202393, + "learning_rate": 9.427048328773703e-06, + "loss": 0.3494, + "step": 8955 + }, + { + "epoch": 0.17928583940144635, + "grad_norm": 1.2167400121688843, + "learning_rate": 9.426897635482692e-06, + "loss": 0.3659, + "step": 8956 + }, + { + "epoch": 0.17930585791857467, + "grad_norm": 1.0092796087265015, + "learning_rate": 9.426746923581957e-06, + "loss": 0.3455, + "step": 8957 + }, + { + "epoch": 0.17932587643570302, + "grad_norm": 1.049514651298523, + "learning_rate": 9.42659619307213e-06, + "loss": 0.3297, + "step": 8958 + }, + { + "epoch": 0.17934589495283138, + "grad_norm": 1.1298694610595703, + "learning_rate": 9.426445443953847e-06, + "loss": 0.3498, + "step": 8959 + }, + { + "epoch": 0.17936591346995973, + "grad_norm": 1.1246343851089478, + "learning_rate": 9.426294676227739e-06, + "loss": 0.294, + "step": 8960 + }, + { + "epoch": 0.17938593198708805, + "grad_norm": 1.0755566358566284, + "learning_rate": 9.42614388989444e-06, + "loss": 0.3235, + "step": 8961 + }, + { + "epoch": 0.1794059505042164, + "grad_norm": 1.127565622329712, + "learning_rate": 9.425993084954584e-06, + "loss": 0.338, + "step": 8962 + }, + { + "epoch": 0.17942596902134475, + "grad_norm": 1.2470999956130981, + "learning_rate": 9.425842261408808e-06, + "loss": 0.3542, + "step": 8963 + }, + { + "epoch": 0.1794459875384731, + "grad_norm": 1.162621021270752, + "learning_rate": 9.425691419257742e-06, + "loss": 0.3368, + "step": 8964 + }, + { + "epoch": 0.17946600605560142, + "grad_norm": 1.2381311655044556, + "learning_rate": 9.425540558502022e-06, + "loss": 0.3321, + "step": 8965 + }, + { + "epoch": 0.17948602457272977, + "grad_norm": 1.9839686155319214, + "learning_rate": 9.42538967914228e-06, + "loss": 0.8667, + "step": 8966 + }, + { + "epoch": 0.17950604308985812, + "grad_norm": 1.0418946743011475, + "learning_rate": 9.425238781179154e-06, + "loss": 0.3348, + "step": 8967 + }, + { + "epoch": 0.17952606160698648, + "grad_norm": 0.9725578427314758, + "learning_rate": 9.425087864613277e-06, + "loss": 0.335, + "step": 8968 + }, + { + "epoch": 0.1795460801241148, + "grad_norm": 1.1188857555389404, + "learning_rate": 9.424936929445282e-06, + "loss": 0.2888, + "step": 8969 + }, + { + "epoch": 0.17956609864124315, + "grad_norm": 1.0112384557724, + "learning_rate": 9.424785975675806e-06, + "loss": 0.3201, + "step": 8970 + }, + { + "epoch": 0.1795861171583715, + "grad_norm": 1.0100281238555908, + "learning_rate": 9.42463500330548e-06, + "loss": 0.3475, + "step": 8971 + }, + { + "epoch": 0.17960613567549985, + "grad_norm": 1.0505841970443726, + "learning_rate": 9.424484012334942e-06, + "loss": 0.3186, + "step": 8972 + }, + { + "epoch": 0.17962615419262817, + "grad_norm": 1.136738896369934, + "learning_rate": 9.424333002764823e-06, + "loss": 0.3049, + "step": 8973 + }, + { + "epoch": 0.17964617270975652, + "grad_norm": 1.1288421154022217, + "learning_rate": 9.424181974595764e-06, + "loss": 0.3118, + "step": 8974 + }, + { + "epoch": 0.17966619122688487, + "grad_norm": 1.1824451684951782, + "learning_rate": 9.424030927828393e-06, + "loss": 0.3234, + "step": 8975 + }, + { + "epoch": 0.17968620974401323, + "grad_norm": 1.0443540811538696, + "learning_rate": 9.423879862463347e-06, + "loss": 0.3813, + "step": 8976 + }, + { + "epoch": 0.17970622826114155, + "grad_norm": 1.0645831823349, + "learning_rate": 9.423728778501266e-06, + "loss": 0.3177, + "step": 8977 + }, + { + "epoch": 0.1797262467782699, + "grad_norm": 1.8850713968276978, + "learning_rate": 9.42357767594278e-06, + "loss": 0.9169, + "step": 8978 + }, + { + "epoch": 0.17974626529539825, + "grad_norm": 1.255324125289917, + "learning_rate": 9.423426554788525e-06, + "loss": 0.3283, + "step": 8979 + }, + { + "epoch": 0.1797662838125266, + "grad_norm": 1.9327399730682373, + "learning_rate": 9.423275415039134e-06, + "loss": 0.901, + "step": 8980 + }, + { + "epoch": 0.17978630232965492, + "grad_norm": 1.2215608358383179, + "learning_rate": 9.42312425669525e-06, + "loss": 0.312, + "step": 8981 + }, + { + "epoch": 0.17980632084678327, + "grad_norm": 1.1451101303100586, + "learning_rate": 9.422973079757499e-06, + "loss": 0.3185, + "step": 8982 + }, + { + "epoch": 0.17982633936391162, + "grad_norm": 1.0871273279190063, + "learning_rate": 9.422821884226523e-06, + "loss": 0.3802, + "step": 8983 + }, + { + "epoch": 0.17984635788103998, + "grad_norm": 1.2827718257904053, + "learning_rate": 9.422670670102955e-06, + "loss": 0.3209, + "step": 8984 + }, + { + "epoch": 0.1798663763981683, + "grad_norm": 1.1304036378860474, + "learning_rate": 9.422519437387431e-06, + "loss": 0.3259, + "step": 8985 + }, + { + "epoch": 0.17988639491529665, + "grad_norm": 1.1514956951141357, + "learning_rate": 9.422368186080588e-06, + "loss": 0.3471, + "step": 8986 + }, + { + "epoch": 0.179906413432425, + "grad_norm": 1.3862932920455933, + "learning_rate": 9.422216916183062e-06, + "loss": 0.3109, + "step": 8987 + }, + { + "epoch": 0.17992643194955335, + "grad_norm": 1.1334885358810425, + "learning_rate": 9.422065627695485e-06, + "loss": 0.3217, + "step": 8988 + }, + { + "epoch": 0.17994645046668167, + "grad_norm": 1.0938743352890015, + "learning_rate": 9.421914320618498e-06, + "loss": 0.3153, + "step": 8989 + }, + { + "epoch": 0.17996646898381002, + "grad_norm": 1.1080663204193115, + "learning_rate": 9.421762994952735e-06, + "loss": 0.3248, + "step": 8990 + }, + { + "epoch": 0.17998648750093837, + "grad_norm": 1.1982712745666504, + "learning_rate": 9.42161165069883e-06, + "loss": 0.3455, + "step": 8991 + }, + { + "epoch": 0.18000650601806673, + "grad_norm": 1.1938211917877197, + "learning_rate": 9.421460287857422e-06, + "loss": 0.372, + "step": 8992 + }, + { + "epoch": 0.18002652453519505, + "grad_norm": 1.1028573513031006, + "learning_rate": 9.421308906429148e-06, + "loss": 0.335, + "step": 8993 + }, + { + "epoch": 0.1800465430523234, + "grad_norm": 1.0110673904418945, + "learning_rate": 9.421157506414639e-06, + "loss": 0.2984, + "step": 8994 + }, + { + "epoch": 0.18006656156945175, + "grad_norm": 1.144150733947754, + "learning_rate": 9.42100608781454e-06, + "loss": 0.3625, + "step": 8995 + }, + { + "epoch": 0.1800865800865801, + "grad_norm": 1.1857664585113525, + "learning_rate": 9.420854650629479e-06, + "loss": 0.3563, + "step": 8996 + }, + { + "epoch": 0.18010659860370842, + "grad_norm": 1.2435356378555298, + "learning_rate": 9.420703194860098e-06, + "loss": 0.4297, + "step": 8997 + }, + { + "epoch": 0.18012661712083677, + "grad_norm": 1.1031501293182373, + "learning_rate": 9.420551720507032e-06, + "loss": 0.3114, + "step": 8998 + }, + { + "epoch": 0.18014663563796512, + "grad_norm": 1.1243669986724854, + "learning_rate": 9.420400227570918e-06, + "loss": 0.3511, + "step": 8999 + }, + { + "epoch": 0.18016665415509348, + "grad_norm": 1.2150987386703491, + "learning_rate": 9.420248716052393e-06, + "loss": 0.3263, + "step": 9000 + }, + { + "epoch": 0.1801866726722218, + "grad_norm": 1.0019370317459106, + "learning_rate": 9.420097185952092e-06, + "loss": 0.3153, + "step": 9001 + }, + { + "epoch": 0.18020669118935015, + "grad_norm": 1.8606228828430176, + "learning_rate": 9.419945637270656e-06, + "loss": 0.8533, + "step": 9002 + }, + { + "epoch": 0.1802267097064785, + "grad_norm": 1.2698171138763428, + "learning_rate": 9.41979407000872e-06, + "loss": 0.3745, + "step": 9003 + }, + { + "epoch": 0.18024672822360682, + "grad_norm": 1.0688380002975464, + "learning_rate": 9.419642484166918e-06, + "loss": 0.3413, + "step": 9004 + }, + { + "epoch": 0.18026674674073517, + "grad_norm": 1.106673240661621, + "learning_rate": 9.419490879745891e-06, + "loss": 0.3459, + "step": 9005 + }, + { + "epoch": 0.18028676525786352, + "grad_norm": 1.144382357597351, + "learning_rate": 9.419339256746276e-06, + "loss": 0.3466, + "step": 9006 + }, + { + "epoch": 0.18030678377499187, + "grad_norm": 0.9656162261962891, + "learning_rate": 9.41918761516871e-06, + "loss": 0.2828, + "step": 9007 + }, + { + "epoch": 0.1803268022921202, + "grad_norm": 1.1434288024902344, + "learning_rate": 9.41903595501383e-06, + "loss": 0.3507, + "step": 9008 + }, + { + "epoch": 0.18034682080924855, + "grad_norm": 1.093453288078308, + "learning_rate": 9.418884276282274e-06, + "loss": 0.2892, + "step": 9009 + }, + { + "epoch": 0.1803668393263769, + "grad_norm": 1.9080718755722046, + "learning_rate": 9.41873257897468e-06, + "loss": 0.8181, + "step": 9010 + }, + { + "epoch": 0.18038685784350525, + "grad_norm": 1.0022354125976562, + "learning_rate": 9.418580863091684e-06, + "loss": 0.3451, + "step": 9011 + }, + { + "epoch": 0.18040687636063357, + "grad_norm": 1.0561617612838745, + "learning_rate": 9.418429128633925e-06, + "loss": 0.3406, + "step": 9012 + }, + { + "epoch": 0.18042689487776192, + "grad_norm": 1.9931873083114624, + "learning_rate": 9.418277375602042e-06, + "loss": 0.7473, + "step": 9013 + }, + { + "epoch": 0.18044691339489027, + "grad_norm": 1.0969091653823853, + "learning_rate": 9.418125603996671e-06, + "loss": 0.3445, + "step": 9014 + }, + { + "epoch": 0.18046693191201862, + "grad_norm": 1.3270286321640015, + "learning_rate": 9.41797381381845e-06, + "loss": 0.3797, + "step": 9015 + }, + { + "epoch": 0.18048695042914695, + "grad_norm": 1.0434532165527344, + "learning_rate": 9.41782200506802e-06, + "loss": 0.3124, + "step": 9016 + }, + { + "epoch": 0.1805069689462753, + "grad_norm": 1.0233572721481323, + "learning_rate": 9.417670177746016e-06, + "loss": 0.314, + "step": 9017 + }, + { + "epoch": 0.18052698746340365, + "grad_norm": 1.1428526639938354, + "learning_rate": 9.417518331853077e-06, + "loss": 0.3104, + "step": 9018 + }, + { + "epoch": 0.180547005980532, + "grad_norm": 1.1342836618423462, + "learning_rate": 9.417366467389842e-06, + "loss": 0.3323, + "step": 9019 + }, + { + "epoch": 0.18056702449766032, + "grad_norm": 1.1965104341506958, + "learning_rate": 9.417214584356948e-06, + "loss": 0.3219, + "step": 9020 + }, + { + "epoch": 0.18058704301478867, + "grad_norm": 1.0308783054351807, + "learning_rate": 9.417062682755035e-06, + "loss": 0.3614, + "step": 9021 + }, + { + "epoch": 0.18060706153191702, + "grad_norm": 1.2402071952819824, + "learning_rate": 9.416910762584742e-06, + "loss": 0.3358, + "step": 9022 + }, + { + "epoch": 0.18062708004904537, + "grad_norm": 1.123040795326233, + "learning_rate": 9.416758823846704e-06, + "loss": 0.3159, + "step": 9023 + }, + { + "epoch": 0.1806470985661737, + "grad_norm": 1.8776671886444092, + "learning_rate": 9.416606866541565e-06, + "loss": 0.8453, + "step": 9024 + }, + { + "epoch": 0.18066711708330205, + "grad_norm": 1.0943084955215454, + "learning_rate": 9.41645489066996e-06, + "loss": 0.3225, + "step": 9025 + }, + { + "epoch": 0.1806871356004304, + "grad_norm": 1.061980962753296, + "learning_rate": 9.41630289623253e-06, + "loss": 0.2668, + "step": 9026 + }, + { + "epoch": 0.18070715411755875, + "grad_norm": 2.003854274749756, + "learning_rate": 9.416150883229912e-06, + "loss": 0.7902, + "step": 9027 + }, + { + "epoch": 0.18072717263468707, + "grad_norm": 1.3258874416351318, + "learning_rate": 9.415998851662747e-06, + "loss": 0.3697, + "step": 9028 + }, + { + "epoch": 0.18074719115181542, + "grad_norm": 1.0172439813613892, + "learning_rate": 9.415846801531672e-06, + "loss": 0.3201, + "step": 9029 + }, + { + "epoch": 0.18076720966894377, + "grad_norm": 1.110475778579712, + "learning_rate": 9.415694732837328e-06, + "loss": 0.2888, + "step": 9030 + }, + { + "epoch": 0.18078722818607212, + "grad_norm": 1.0447168350219727, + "learning_rate": 9.415542645580353e-06, + "loss": 0.3286, + "step": 9031 + }, + { + "epoch": 0.18080724670320045, + "grad_norm": 1.0655713081359863, + "learning_rate": 9.415390539761389e-06, + "loss": 0.2846, + "step": 9032 + }, + { + "epoch": 0.1808272652203288, + "grad_norm": 1.2025550603866577, + "learning_rate": 9.415238415381071e-06, + "loss": 0.3068, + "step": 9033 + }, + { + "epoch": 0.18084728373745715, + "grad_norm": 1.267480731010437, + "learning_rate": 9.415086272440042e-06, + "loss": 0.3047, + "step": 9034 + }, + { + "epoch": 0.1808673022545855, + "grad_norm": 1.1108241081237793, + "learning_rate": 9.41493411093894e-06, + "loss": 0.3255, + "step": 9035 + }, + { + "epoch": 0.18088732077171382, + "grad_norm": 1.2237883806228638, + "learning_rate": 9.414781930878406e-06, + "loss": 0.3118, + "step": 9036 + }, + { + "epoch": 0.18090733928884217, + "grad_norm": 1.0889184474945068, + "learning_rate": 9.414629732259077e-06, + "loss": 0.3511, + "step": 9037 + }, + { + "epoch": 0.18092735780597052, + "grad_norm": 1.3129335641860962, + "learning_rate": 9.414477515081597e-06, + "loss": 0.3096, + "step": 9038 + }, + { + "epoch": 0.18094737632309887, + "grad_norm": 1.038892388343811, + "learning_rate": 9.414325279346603e-06, + "loss": 0.3752, + "step": 9039 + }, + { + "epoch": 0.1809673948402272, + "grad_norm": 1.1557250022888184, + "learning_rate": 9.414173025054733e-06, + "loss": 0.3044, + "step": 9040 + }, + { + "epoch": 0.18098741335735555, + "grad_norm": 0.9936686158180237, + "learning_rate": 9.414020752206631e-06, + "loss": 0.3151, + "step": 9041 + }, + { + "epoch": 0.1810074318744839, + "grad_norm": 1.1814082860946655, + "learning_rate": 9.413868460802937e-06, + "loss": 0.3516, + "step": 9042 + }, + { + "epoch": 0.18102745039161225, + "grad_norm": 1.0111644268035889, + "learning_rate": 9.413716150844289e-06, + "loss": 0.2994, + "step": 9043 + }, + { + "epoch": 0.18104746890874057, + "grad_norm": 0.9961825609207153, + "learning_rate": 9.413563822331328e-06, + "loss": 0.3662, + "step": 9044 + }, + { + "epoch": 0.18106748742586892, + "grad_norm": 1.1234545707702637, + "learning_rate": 9.413411475264695e-06, + "loss": 0.3092, + "step": 9045 + }, + { + "epoch": 0.18108750594299727, + "grad_norm": 1.026937484741211, + "learning_rate": 9.41325910964503e-06, + "loss": 0.3121, + "step": 9046 + }, + { + "epoch": 0.18110752446012562, + "grad_norm": 1.114938497543335, + "learning_rate": 9.413106725472973e-06, + "loss": 0.3633, + "step": 9047 + }, + { + "epoch": 0.18112754297725395, + "grad_norm": 1.0364986658096313, + "learning_rate": 9.412954322749166e-06, + "loss": 0.3108, + "step": 9048 + }, + { + "epoch": 0.1811475614943823, + "grad_norm": 1.1851550340652466, + "learning_rate": 9.412801901474247e-06, + "loss": 0.3278, + "step": 9049 + }, + { + "epoch": 0.18116758001151065, + "grad_norm": 1.0554677248001099, + "learning_rate": 9.41264946164886e-06, + "loss": 0.3528, + "step": 9050 + }, + { + "epoch": 0.181187598528639, + "grad_norm": 1.9361094236373901, + "learning_rate": 9.412497003273642e-06, + "loss": 0.8286, + "step": 9051 + }, + { + "epoch": 0.18120761704576732, + "grad_norm": 1.0398656129837036, + "learning_rate": 9.412344526349238e-06, + "loss": 0.3, + "step": 9052 + }, + { + "epoch": 0.18122763556289567, + "grad_norm": 1.1634501218795776, + "learning_rate": 9.412192030876286e-06, + "loss": 0.3028, + "step": 9053 + }, + { + "epoch": 0.18124765408002402, + "grad_norm": 1.0059808492660522, + "learning_rate": 9.41203951685543e-06, + "loss": 0.2955, + "step": 9054 + }, + { + "epoch": 0.18126767259715237, + "grad_norm": 1.1773018836975098, + "learning_rate": 9.411886984287308e-06, + "loss": 0.3286, + "step": 9055 + }, + { + "epoch": 0.1812876911142807, + "grad_norm": 1.1538835763931274, + "learning_rate": 9.411734433172564e-06, + "loss": 0.362, + "step": 9056 + }, + { + "epoch": 0.18130770963140905, + "grad_norm": 1.1870853900909424, + "learning_rate": 9.411581863511836e-06, + "loss": 0.3506, + "step": 9057 + }, + { + "epoch": 0.1813277281485374, + "grad_norm": 1.0894356966018677, + "learning_rate": 9.411429275305768e-06, + "loss": 0.3267, + "step": 9058 + }, + { + "epoch": 0.18134774666566575, + "grad_norm": 1.1975756883621216, + "learning_rate": 9.411276668555001e-06, + "loss": 0.3315, + "step": 9059 + }, + { + "epoch": 0.18136776518279407, + "grad_norm": 1.048231601715088, + "learning_rate": 9.411124043260176e-06, + "loss": 0.2698, + "step": 9060 + }, + { + "epoch": 0.18138778369992242, + "grad_norm": 1.1183006763458252, + "learning_rate": 9.410971399421935e-06, + "loss": 0.3163, + "step": 9061 + }, + { + "epoch": 0.18140780221705077, + "grad_norm": 1.2087173461914062, + "learning_rate": 9.410818737040919e-06, + "loss": 0.3022, + "step": 9062 + }, + { + "epoch": 0.18142782073417912, + "grad_norm": 1.0340303182601929, + "learning_rate": 9.41066605611777e-06, + "loss": 0.311, + "step": 9063 + }, + { + "epoch": 0.18144783925130745, + "grad_norm": 1.8695404529571533, + "learning_rate": 9.41051335665313e-06, + "loss": 0.8672, + "step": 9064 + }, + { + "epoch": 0.1814678577684358, + "grad_norm": 1.099518895149231, + "learning_rate": 9.410360638647643e-06, + "loss": 0.2972, + "step": 9065 + }, + { + "epoch": 0.18148787628556415, + "grad_norm": 1.7888394594192505, + "learning_rate": 9.410207902101947e-06, + "loss": 0.828, + "step": 9066 + }, + { + "epoch": 0.1815078948026925, + "grad_norm": 1.125996470451355, + "learning_rate": 9.410055147016686e-06, + "loss": 0.3713, + "step": 9067 + }, + { + "epoch": 0.18152791331982082, + "grad_norm": 1.1359457969665527, + "learning_rate": 9.409902373392503e-06, + "loss": 0.364, + "step": 9068 + }, + { + "epoch": 0.18154793183694917, + "grad_norm": 1.2256101369857788, + "learning_rate": 9.409749581230039e-06, + "loss": 0.3419, + "step": 9069 + }, + { + "epoch": 0.18156795035407752, + "grad_norm": 1.833504557609558, + "learning_rate": 9.409596770529936e-06, + "loss": 0.7392, + "step": 9070 + }, + { + "epoch": 0.18158796887120587, + "grad_norm": 1.1085268259048462, + "learning_rate": 9.40944394129284e-06, + "loss": 0.3179, + "step": 9071 + }, + { + "epoch": 0.1816079873883342, + "grad_norm": 1.0798470973968506, + "learning_rate": 9.409291093519388e-06, + "loss": 0.3413, + "step": 9072 + }, + { + "epoch": 0.18162800590546255, + "grad_norm": 1.2415540218353271, + "learning_rate": 9.409138227210225e-06, + "loss": 0.323, + "step": 9073 + }, + { + "epoch": 0.1816480244225909, + "grad_norm": 1.1496278047561646, + "learning_rate": 9.408985342365996e-06, + "loss": 0.3082, + "step": 9074 + }, + { + "epoch": 0.18166804293971925, + "grad_norm": 1.221386194229126, + "learning_rate": 9.408832438987339e-06, + "loss": 0.3528, + "step": 9075 + }, + { + "epoch": 0.18168806145684757, + "grad_norm": 1.2159712314605713, + "learning_rate": 9.408679517074901e-06, + "loss": 0.3254, + "step": 9076 + }, + { + "epoch": 0.18170807997397592, + "grad_norm": 1.7735586166381836, + "learning_rate": 9.408526576629321e-06, + "loss": 0.8952, + "step": 9077 + }, + { + "epoch": 0.18172809849110427, + "grad_norm": 1.0597050189971924, + "learning_rate": 9.408373617651247e-06, + "loss": 0.3019, + "step": 9078 + }, + { + "epoch": 0.18174811700823262, + "grad_norm": 1.1274856328964233, + "learning_rate": 9.408220640141317e-06, + "loss": 0.3145, + "step": 9079 + }, + { + "epoch": 0.18176813552536095, + "grad_norm": 1.1830425262451172, + "learning_rate": 9.408067644100176e-06, + "loss": 0.3152, + "step": 9080 + }, + { + "epoch": 0.1817881540424893, + "grad_norm": 0.9907019138336182, + "learning_rate": 9.407914629528468e-06, + "loss": 0.3274, + "step": 9081 + }, + { + "epoch": 0.18180817255961765, + "grad_norm": 1.1940499544143677, + "learning_rate": 9.407761596426835e-06, + "loss": 0.3115, + "step": 9082 + }, + { + "epoch": 0.181828191076746, + "grad_norm": 1.9032875299453735, + "learning_rate": 9.407608544795921e-06, + "loss": 0.8518, + "step": 9083 + }, + { + "epoch": 0.18184820959387432, + "grad_norm": 1.131461501121521, + "learning_rate": 9.407455474636368e-06, + "loss": 0.3103, + "step": 9084 + }, + { + "epoch": 0.18186822811100267, + "grad_norm": 1.0636146068572998, + "learning_rate": 9.407302385948822e-06, + "loss": 0.3124, + "step": 9085 + }, + { + "epoch": 0.18188824662813102, + "grad_norm": 1.3427276611328125, + "learning_rate": 9.407149278733926e-06, + "loss": 0.3536, + "step": 9086 + }, + { + "epoch": 0.18190826514525937, + "grad_norm": 1.062813639640808, + "learning_rate": 9.406996152992323e-06, + "loss": 0.3325, + "step": 9087 + }, + { + "epoch": 0.1819282836623877, + "grad_norm": 1.2306314706802368, + "learning_rate": 9.406843008724655e-06, + "loss": 0.3326, + "step": 9088 + }, + { + "epoch": 0.18194830217951605, + "grad_norm": 1.865344762802124, + "learning_rate": 9.406689845931566e-06, + "loss": 0.8905, + "step": 9089 + }, + { + "epoch": 0.1819683206966444, + "grad_norm": 1.1586178541183472, + "learning_rate": 9.406536664613703e-06, + "loss": 0.3262, + "step": 9090 + }, + { + "epoch": 0.18198833921377275, + "grad_norm": 2.1566519737243652, + "learning_rate": 9.406383464771708e-06, + "loss": 0.9047, + "step": 9091 + }, + { + "epoch": 0.18200835773090107, + "grad_norm": 1.1053047180175781, + "learning_rate": 9.406230246406226e-06, + "loss": 0.2999, + "step": 9092 + }, + { + "epoch": 0.18202837624802942, + "grad_norm": 1.170235514640808, + "learning_rate": 9.4060770095179e-06, + "loss": 0.3198, + "step": 9093 + }, + { + "epoch": 0.18204839476515777, + "grad_norm": 1.136817216873169, + "learning_rate": 9.405923754107375e-06, + "loss": 0.361, + "step": 9094 + }, + { + "epoch": 0.18206841328228612, + "grad_norm": 1.2529263496398926, + "learning_rate": 9.405770480175294e-06, + "loss": 0.3939, + "step": 9095 + }, + { + "epoch": 0.18208843179941445, + "grad_norm": 1.086690068244934, + "learning_rate": 9.405617187722301e-06, + "loss": 0.3437, + "step": 9096 + }, + { + "epoch": 0.1821084503165428, + "grad_norm": 1.1548172235488892, + "learning_rate": 9.405463876749044e-06, + "loss": 0.3437, + "step": 9097 + }, + { + "epoch": 0.18212846883367115, + "grad_norm": 1.0778827667236328, + "learning_rate": 9.405310547256163e-06, + "loss": 0.3459, + "step": 9098 + }, + { + "epoch": 0.1821484873507995, + "grad_norm": 1.0270421504974365, + "learning_rate": 9.405157199244304e-06, + "loss": 0.3488, + "step": 9099 + }, + { + "epoch": 0.18216850586792782, + "grad_norm": 1.2981640100479126, + "learning_rate": 9.405003832714113e-06, + "loss": 0.316, + "step": 9100 + }, + { + "epoch": 0.18218852438505617, + "grad_norm": 1.2693068981170654, + "learning_rate": 9.404850447666234e-06, + "loss": 0.3101, + "step": 9101 + }, + { + "epoch": 0.18220854290218452, + "grad_norm": 0.9929118156433105, + "learning_rate": 9.404697044101311e-06, + "loss": 0.294, + "step": 9102 + }, + { + "epoch": 0.18222856141931287, + "grad_norm": 1.2427785396575928, + "learning_rate": 9.40454362201999e-06, + "loss": 0.3398, + "step": 9103 + }, + { + "epoch": 0.1822485799364412, + "grad_norm": 1.1522754430770874, + "learning_rate": 9.404390181422916e-06, + "loss": 0.3002, + "step": 9104 + }, + { + "epoch": 0.18226859845356955, + "grad_norm": 1.1081715822219849, + "learning_rate": 9.404236722310733e-06, + "loss": 0.3199, + "step": 9105 + }, + { + "epoch": 0.1822886169706979, + "grad_norm": 1.2419469356536865, + "learning_rate": 9.404083244684085e-06, + "loss": 0.3473, + "step": 9106 + }, + { + "epoch": 0.18230863548782625, + "grad_norm": 1.1477705240249634, + "learning_rate": 9.403929748543621e-06, + "loss": 0.357, + "step": 9107 + }, + { + "epoch": 0.18232865400495457, + "grad_norm": 1.0558149814605713, + "learning_rate": 9.403776233889984e-06, + "loss": 0.2979, + "step": 9108 + }, + { + "epoch": 0.18234867252208292, + "grad_norm": 1.1348363161087036, + "learning_rate": 9.403622700723817e-06, + "loss": 0.3785, + "step": 9109 + }, + { + "epoch": 0.18236869103921127, + "grad_norm": 1.084916114807129, + "learning_rate": 9.403469149045772e-06, + "loss": 0.3349, + "step": 9110 + }, + { + "epoch": 0.18238870955633962, + "grad_norm": 1.0989309549331665, + "learning_rate": 9.403315578856486e-06, + "loss": 0.3383, + "step": 9111 + }, + { + "epoch": 0.18240872807346795, + "grad_norm": 1.1070646047592163, + "learning_rate": 9.403161990156613e-06, + "loss": 0.3145, + "step": 9112 + }, + { + "epoch": 0.1824287465905963, + "grad_norm": 1.046728491783142, + "learning_rate": 9.403008382946792e-06, + "loss": 0.3172, + "step": 9113 + }, + { + "epoch": 0.18244876510772465, + "grad_norm": 1.1472769975662231, + "learning_rate": 9.40285475722767e-06, + "loss": 0.3087, + "step": 9114 + }, + { + "epoch": 0.182468783624853, + "grad_norm": 1.063609004020691, + "learning_rate": 9.402701112999897e-06, + "loss": 0.3253, + "step": 9115 + }, + { + "epoch": 0.18248880214198132, + "grad_norm": 1.3069000244140625, + "learning_rate": 9.402547450264114e-06, + "loss": 0.346, + "step": 9116 + }, + { + "epoch": 0.18250882065910967, + "grad_norm": 1.1423823833465576, + "learning_rate": 9.402393769020971e-06, + "loss": 0.3018, + "step": 9117 + }, + { + "epoch": 0.18252883917623802, + "grad_norm": 1.0372672080993652, + "learning_rate": 9.40224006927111e-06, + "loss": 0.3387, + "step": 9118 + }, + { + "epoch": 0.18254885769336637, + "grad_norm": 1.1353727579116821, + "learning_rate": 9.40208635101518e-06, + "loss": 0.31, + "step": 9119 + }, + { + "epoch": 0.1825688762104947, + "grad_norm": 1.2438385486602783, + "learning_rate": 9.401932614253827e-06, + "loss": 0.3702, + "step": 9120 + }, + { + "epoch": 0.18258889472762305, + "grad_norm": 1.1457315683364868, + "learning_rate": 9.401778858987696e-06, + "loss": 0.3116, + "step": 9121 + }, + { + "epoch": 0.1826089132447514, + "grad_norm": 1.8044947385787964, + "learning_rate": 9.401625085217434e-06, + "loss": 0.8447, + "step": 9122 + }, + { + "epoch": 0.18262893176187975, + "grad_norm": 1.2197620868682861, + "learning_rate": 9.401471292943687e-06, + "loss": 0.3417, + "step": 9123 + }, + { + "epoch": 0.18264895027900807, + "grad_norm": 1.057081937789917, + "learning_rate": 9.401317482167102e-06, + "loss": 0.3129, + "step": 9124 + }, + { + "epoch": 0.18266896879613642, + "grad_norm": 1.7984849214553833, + "learning_rate": 9.401163652888325e-06, + "loss": 0.9151, + "step": 9125 + }, + { + "epoch": 0.18268898731326477, + "grad_norm": 1.240718960762024, + "learning_rate": 9.401009805108003e-06, + "loss": 0.3836, + "step": 9126 + }, + { + "epoch": 0.18270900583039312, + "grad_norm": 1.0666089057922363, + "learning_rate": 9.400855938826786e-06, + "loss": 0.3645, + "step": 9127 + }, + { + "epoch": 0.18272902434752145, + "grad_norm": 1.7664049863815308, + "learning_rate": 9.400702054045313e-06, + "loss": 0.7836, + "step": 9128 + }, + { + "epoch": 0.1827490428646498, + "grad_norm": 1.0708575248718262, + "learning_rate": 9.40054815076424e-06, + "loss": 0.319, + "step": 9129 + }, + { + "epoch": 0.18276906138177815, + "grad_norm": 1.0982575416564941, + "learning_rate": 9.400394228984206e-06, + "loss": 0.3304, + "step": 9130 + }, + { + "epoch": 0.1827890798989065, + "grad_norm": 1.0270458459854126, + "learning_rate": 9.400240288705864e-06, + "loss": 0.3164, + "step": 9131 + }, + { + "epoch": 0.18280909841603482, + "grad_norm": 1.2442609071731567, + "learning_rate": 9.400086329929858e-06, + "loss": 0.4037, + "step": 9132 + }, + { + "epoch": 0.18282911693316317, + "grad_norm": 1.1723378896713257, + "learning_rate": 9.399932352656835e-06, + "loss": 0.3858, + "step": 9133 + }, + { + "epoch": 0.18284913545029152, + "grad_norm": 1.098459243774414, + "learning_rate": 9.399778356887445e-06, + "loss": 0.3193, + "step": 9134 + }, + { + "epoch": 0.18286915396741987, + "grad_norm": 1.0607900619506836, + "learning_rate": 9.399624342622333e-06, + "loss": 0.331, + "step": 9135 + }, + { + "epoch": 0.1828891724845482, + "grad_norm": 1.021681308746338, + "learning_rate": 9.399470309862147e-06, + "loss": 0.3342, + "step": 9136 + }, + { + "epoch": 0.18290919100167655, + "grad_norm": 1.0383925437927246, + "learning_rate": 9.399316258607533e-06, + "loss": 0.2919, + "step": 9137 + }, + { + "epoch": 0.1829292095188049, + "grad_norm": 1.0782736539840698, + "learning_rate": 9.399162188859142e-06, + "loss": 0.3549, + "step": 9138 + }, + { + "epoch": 0.18294922803593325, + "grad_norm": 1.143803358078003, + "learning_rate": 9.39900810061762e-06, + "loss": 0.3314, + "step": 9139 + }, + { + "epoch": 0.18296924655306157, + "grad_norm": 1.2202619314193726, + "learning_rate": 9.398853993883614e-06, + "loss": 0.3647, + "step": 9140 + }, + { + "epoch": 0.18298926507018992, + "grad_norm": 1.1394895315170288, + "learning_rate": 9.398699868657772e-06, + "loss": 0.3055, + "step": 9141 + }, + { + "epoch": 0.18300928358731827, + "grad_norm": 1.0878404378890991, + "learning_rate": 9.398545724940744e-06, + "loss": 0.309, + "step": 9142 + }, + { + "epoch": 0.18302930210444662, + "grad_norm": 1.1775511503219604, + "learning_rate": 9.398391562733176e-06, + "loss": 0.3552, + "step": 9143 + }, + { + "epoch": 0.18304932062157495, + "grad_norm": 1.1773741245269775, + "learning_rate": 9.398237382035716e-06, + "loss": 0.3722, + "step": 9144 + }, + { + "epoch": 0.1830693391387033, + "grad_norm": 1.1153212785720825, + "learning_rate": 9.398083182849012e-06, + "loss": 0.3046, + "step": 9145 + }, + { + "epoch": 0.18308935765583165, + "grad_norm": 1.119873285293579, + "learning_rate": 9.397928965173713e-06, + "loss": 0.3219, + "step": 9146 + }, + { + "epoch": 0.18310937617296, + "grad_norm": 1.11154305934906, + "learning_rate": 9.397774729010465e-06, + "loss": 0.3299, + "step": 9147 + }, + { + "epoch": 0.18312939469008832, + "grad_norm": 1.8414238691329956, + "learning_rate": 9.397620474359922e-06, + "loss": 0.889, + "step": 9148 + }, + { + "epoch": 0.18314941320721667, + "grad_norm": 1.056065320968628, + "learning_rate": 9.397466201222727e-06, + "loss": 0.3011, + "step": 9149 + }, + { + "epoch": 0.18316943172434502, + "grad_norm": 1.0705994367599487, + "learning_rate": 9.397311909599531e-06, + "loss": 0.2905, + "step": 9150 + }, + { + "epoch": 0.18318945024147337, + "grad_norm": 1.1361687183380127, + "learning_rate": 9.397157599490983e-06, + "loss": 0.368, + "step": 9151 + }, + { + "epoch": 0.1832094687586017, + "grad_norm": 1.1110012531280518, + "learning_rate": 9.397003270897728e-06, + "loss": 0.3935, + "step": 9152 + }, + { + "epoch": 0.18322948727573005, + "grad_norm": 1.8674026727676392, + "learning_rate": 9.39684892382042e-06, + "loss": 0.8122, + "step": 9153 + }, + { + "epoch": 0.1832495057928584, + "grad_norm": 1.101398229598999, + "learning_rate": 9.396694558259705e-06, + "loss": 0.3205, + "step": 9154 + }, + { + "epoch": 0.18326952430998675, + "grad_norm": 1.805783748626709, + "learning_rate": 9.39654017421623e-06, + "loss": 0.7638, + "step": 9155 + }, + { + "epoch": 0.18328954282711507, + "grad_norm": 1.087517499923706, + "learning_rate": 9.39638577169065e-06, + "loss": 0.3286, + "step": 9156 + }, + { + "epoch": 0.18330956134424342, + "grad_norm": 1.0162540674209595, + "learning_rate": 9.396231350683608e-06, + "loss": 0.3391, + "step": 9157 + }, + { + "epoch": 0.18332957986137177, + "grad_norm": 1.1284332275390625, + "learning_rate": 9.396076911195755e-06, + "loss": 0.3461, + "step": 9158 + }, + { + "epoch": 0.18334959837850012, + "grad_norm": 1.0335649251937866, + "learning_rate": 9.395922453227741e-06, + "loss": 0.3295, + "step": 9159 + }, + { + "epoch": 0.18336961689562845, + "grad_norm": 1.087564468383789, + "learning_rate": 9.395767976780216e-06, + "loss": 0.3248, + "step": 9160 + }, + { + "epoch": 0.1833896354127568, + "grad_norm": 1.0302523374557495, + "learning_rate": 9.395613481853829e-06, + "loss": 0.3477, + "step": 9161 + }, + { + "epoch": 0.18340965392988515, + "grad_norm": 1.8177673816680908, + "learning_rate": 9.395458968449227e-06, + "loss": 0.8439, + "step": 9162 + }, + { + "epoch": 0.1834296724470135, + "grad_norm": 1.8916685581207275, + "learning_rate": 9.395304436567063e-06, + "loss": 0.8702, + "step": 9163 + }, + { + "epoch": 0.18344969096414182, + "grad_norm": 1.0920840501785278, + "learning_rate": 9.395149886207984e-06, + "loss": 0.3303, + "step": 9164 + }, + { + "epoch": 0.18346970948127017, + "grad_norm": 1.1763969659805298, + "learning_rate": 9.39499531737264e-06, + "loss": 0.3325, + "step": 9165 + }, + { + "epoch": 0.18348972799839852, + "grad_norm": 1.1515153646469116, + "learning_rate": 9.394840730061683e-06, + "loss": 0.361, + "step": 9166 + }, + { + "epoch": 0.18350974651552687, + "grad_norm": 1.0467249155044556, + "learning_rate": 9.394686124275761e-06, + "loss": 0.2953, + "step": 9167 + }, + { + "epoch": 0.1835297650326552, + "grad_norm": 1.1570912599563599, + "learning_rate": 9.394531500015525e-06, + "loss": 0.3437, + "step": 9168 + }, + { + "epoch": 0.18354978354978355, + "grad_norm": 1.0279085636138916, + "learning_rate": 9.394376857281623e-06, + "loss": 0.276, + "step": 9169 + }, + { + "epoch": 0.1835698020669119, + "grad_norm": 1.1429905891418457, + "learning_rate": 9.394222196074708e-06, + "loss": 0.3019, + "step": 9170 + }, + { + "epoch": 0.18358982058404025, + "grad_norm": 1.0626394748687744, + "learning_rate": 9.394067516395427e-06, + "loss": 0.3397, + "step": 9171 + }, + { + "epoch": 0.18360983910116857, + "grad_norm": 1.0694743394851685, + "learning_rate": 9.393912818244431e-06, + "loss": 0.2943, + "step": 9172 + }, + { + "epoch": 0.18362985761829692, + "grad_norm": 1.3474780321121216, + "learning_rate": 9.393758101622373e-06, + "loss": 0.3727, + "step": 9173 + }, + { + "epoch": 0.18364987613542527, + "grad_norm": 1.8415151834487915, + "learning_rate": 9.3936033665299e-06, + "loss": 0.7743, + "step": 9174 + }, + { + "epoch": 0.18366989465255362, + "grad_norm": 1.2335933446884155, + "learning_rate": 9.393448612967664e-06, + "loss": 0.3274, + "step": 9175 + }, + { + "epoch": 0.18368991316968195, + "grad_norm": 1.2444102764129639, + "learning_rate": 9.393293840936317e-06, + "loss": 0.346, + "step": 9176 + }, + { + "epoch": 0.1837099316868103, + "grad_norm": 1.8946888446807861, + "learning_rate": 9.393139050436507e-06, + "loss": 0.834, + "step": 9177 + }, + { + "epoch": 0.18372995020393865, + "grad_norm": 1.2408769130706787, + "learning_rate": 9.392984241468887e-06, + "loss": 0.3051, + "step": 9178 + }, + { + "epoch": 0.183749968721067, + "grad_norm": 1.1642367839813232, + "learning_rate": 9.392829414034106e-06, + "loss": 0.3004, + "step": 9179 + }, + { + "epoch": 0.18376998723819532, + "grad_norm": 1.1509385108947754, + "learning_rate": 9.392674568132814e-06, + "loss": 0.315, + "step": 9180 + }, + { + "epoch": 0.18379000575532367, + "grad_norm": 1.1534974575042725, + "learning_rate": 9.392519703765666e-06, + "loss": 0.344, + "step": 9181 + }, + { + "epoch": 0.18381002427245202, + "grad_norm": 1.1180024147033691, + "learning_rate": 9.392364820933309e-06, + "loss": 0.3012, + "step": 9182 + }, + { + "epoch": 0.18383004278958037, + "grad_norm": 1.1264967918395996, + "learning_rate": 9.392209919636394e-06, + "loss": 0.3115, + "step": 9183 + }, + { + "epoch": 0.1838500613067087, + "grad_norm": 1.1158347129821777, + "learning_rate": 9.392054999875576e-06, + "loss": 0.2934, + "step": 9184 + }, + { + "epoch": 0.18387007982383705, + "grad_norm": 0.9992908239364624, + "learning_rate": 9.391900061651504e-06, + "loss": 0.3336, + "step": 9185 + }, + { + "epoch": 0.1838900983409654, + "grad_norm": 0.9834975004196167, + "learning_rate": 9.39174510496483e-06, + "loss": 0.3303, + "step": 9186 + }, + { + "epoch": 0.18391011685809375, + "grad_norm": 1.42884361743927, + "learning_rate": 9.391590129816201e-06, + "loss": 0.2834, + "step": 9187 + }, + { + "epoch": 0.18393013537522207, + "grad_norm": 1.1518927812576294, + "learning_rate": 9.391435136206275e-06, + "loss": 0.3871, + "step": 9188 + }, + { + "epoch": 0.18395015389235042, + "grad_norm": 1.1773768663406372, + "learning_rate": 9.3912801241357e-06, + "loss": 0.3763, + "step": 9189 + }, + { + "epoch": 0.18397017240947877, + "grad_norm": 1.0221561193466187, + "learning_rate": 9.391125093605129e-06, + "loss": 0.3271, + "step": 9190 + }, + { + "epoch": 0.18399019092660712, + "grad_norm": 1.071066975593567, + "learning_rate": 9.390970044615212e-06, + "loss": 0.3282, + "step": 9191 + }, + { + "epoch": 0.18401020944373545, + "grad_norm": 2.1009602546691895, + "learning_rate": 9.390814977166603e-06, + "loss": 0.8989, + "step": 9192 + }, + { + "epoch": 0.1840302279608638, + "grad_norm": 1.1889805793762207, + "learning_rate": 9.390659891259952e-06, + "loss": 0.3697, + "step": 9193 + }, + { + "epoch": 0.18405024647799215, + "grad_norm": 1.2950129508972168, + "learning_rate": 9.390504786895914e-06, + "loss": 0.34, + "step": 9194 + }, + { + "epoch": 0.1840702649951205, + "grad_norm": 1.0291520357131958, + "learning_rate": 9.390349664075136e-06, + "loss": 0.392, + "step": 9195 + }, + { + "epoch": 0.18409028351224882, + "grad_norm": 1.0238219499588013, + "learning_rate": 9.390194522798273e-06, + "loss": 0.302, + "step": 9196 + }, + { + "epoch": 0.18411030202937717, + "grad_norm": 1.1922764778137207, + "learning_rate": 9.390039363065977e-06, + "loss": 0.385, + "step": 9197 + }, + { + "epoch": 0.18413032054650552, + "grad_norm": 1.8222284317016602, + "learning_rate": 9.3898841848789e-06, + "loss": 0.7955, + "step": 9198 + }, + { + "epoch": 0.18415033906363387, + "grad_norm": 1.0533971786499023, + "learning_rate": 9.389728988237696e-06, + "loss": 0.3499, + "step": 9199 + }, + { + "epoch": 0.1841703575807622, + "grad_norm": 1.1822890043258667, + "learning_rate": 9.389573773143018e-06, + "loss": 0.3278, + "step": 9200 + }, + { + "epoch": 0.18419037609789055, + "grad_norm": 1.1787832975387573, + "learning_rate": 9.389418539595511e-06, + "loss": 0.295, + "step": 9201 + }, + { + "epoch": 0.1842103946150189, + "grad_norm": 1.1793688535690308, + "learning_rate": 9.389263287595838e-06, + "loss": 0.279, + "step": 9202 + }, + { + "epoch": 0.18423041313214725, + "grad_norm": 1.1946836709976196, + "learning_rate": 9.389108017144643e-06, + "loss": 0.3512, + "step": 9203 + }, + { + "epoch": 0.18425043164927557, + "grad_norm": 1.1398462057113647, + "learning_rate": 9.388952728242587e-06, + "loss": 0.3599, + "step": 9204 + }, + { + "epoch": 0.18427045016640392, + "grad_norm": 1.1142514944076538, + "learning_rate": 9.388797420890315e-06, + "loss": 0.3895, + "step": 9205 + }, + { + "epoch": 0.18429046868353227, + "grad_norm": 1.0864949226379395, + "learning_rate": 9.388642095088483e-06, + "loss": 0.3257, + "step": 9206 + }, + { + "epoch": 0.18431048720066062, + "grad_norm": 1.096488118171692, + "learning_rate": 9.388486750837745e-06, + "loss": 0.3171, + "step": 9207 + }, + { + "epoch": 0.18433050571778894, + "grad_norm": 1.082716464996338, + "learning_rate": 9.388331388138751e-06, + "loss": 0.2731, + "step": 9208 + }, + { + "epoch": 0.1843505242349173, + "grad_norm": 2.0605580806732178, + "learning_rate": 9.388176006992159e-06, + "loss": 0.7841, + "step": 9209 + }, + { + "epoch": 0.18437054275204565, + "grad_norm": 1.2506136894226074, + "learning_rate": 9.388020607398618e-06, + "loss": 0.3815, + "step": 9210 + }, + { + "epoch": 0.184390561269174, + "grad_norm": 1.0601571798324585, + "learning_rate": 9.387865189358782e-06, + "loss": 0.3233, + "step": 9211 + }, + { + "epoch": 0.18441057978630232, + "grad_norm": 1.0038862228393555, + "learning_rate": 9.387709752873307e-06, + "loss": 0.2724, + "step": 9212 + }, + { + "epoch": 0.18443059830343067, + "grad_norm": 1.178159236907959, + "learning_rate": 9.387554297942845e-06, + "loss": 0.3158, + "step": 9213 + }, + { + "epoch": 0.18445061682055902, + "grad_norm": 1.144627332687378, + "learning_rate": 9.387398824568047e-06, + "loss": 0.3349, + "step": 9214 + }, + { + "epoch": 0.18447063533768737, + "grad_norm": 1.2081037759780884, + "learning_rate": 9.38724333274957e-06, + "loss": 0.3332, + "step": 9215 + }, + { + "epoch": 0.1844906538548157, + "grad_norm": 1.2895985841751099, + "learning_rate": 9.387087822488063e-06, + "loss": 0.3258, + "step": 9216 + }, + { + "epoch": 0.18451067237194405, + "grad_norm": 1.2264764308929443, + "learning_rate": 9.386932293784185e-06, + "loss": 0.3523, + "step": 9217 + }, + { + "epoch": 0.1845306908890724, + "grad_norm": 0.9971306920051575, + "learning_rate": 9.386776746638588e-06, + "loss": 0.314, + "step": 9218 + }, + { + "epoch": 0.18455070940620075, + "grad_norm": 1.2271100282669067, + "learning_rate": 9.386621181051925e-06, + "loss": 0.3349, + "step": 9219 + }, + { + "epoch": 0.18457072792332907, + "grad_norm": 1.032974362373352, + "learning_rate": 9.386465597024852e-06, + "loss": 0.3236, + "step": 9220 + }, + { + "epoch": 0.18459074644045742, + "grad_norm": 1.1442564725875854, + "learning_rate": 9.386309994558021e-06, + "loss": 0.3505, + "step": 9221 + }, + { + "epoch": 0.18461076495758577, + "grad_norm": 1.085509181022644, + "learning_rate": 9.386154373652085e-06, + "loss": 0.3041, + "step": 9222 + }, + { + "epoch": 0.18463078347471412, + "grad_norm": 1.1697641611099243, + "learning_rate": 9.385998734307701e-06, + "loss": 0.3187, + "step": 9223 + }, + { + "epoch": 0.18465080199184244, + "grad_norm": 1.1402192115783691, + "learning_rate": 9.385843076525524e-06, + "loss": 0.3849, + "step": 9224 + }, + { + "epoch": 0.1846708205089708, + "grad_norm": 1.946442723274231, + "learning_rate": 9.385687400306206e-06, + "loss": 0.8094, + "step": 9225 + }, + { + "epoch": 0.18469083902609915, + "grad_norm": 1.2095541954040527, + "learning_rate": 9.385531705650399e-06, + "loss": 0.3234, + "step": 9226 + }, + { + "epoch": 0.1847108575432275, + "grad_norm": 1.95658278465271, + "learning_rate": 9.385375992558762e-06, + "loss": 0.8838, + "step": 9227 + }, + { + "epoch": 0.18473087606035582, + "grad_norm": 0.9381659626960754, + "learning_rate": 9.385220261031948e-06, + "loss": 0.2877, + "step": 9228 + }, + { + "epoch": 0.18475089457748417, + "grad_norm": 1.036955714225769, + "learning_rate": 9.385064511070614e-06, + "loss": 0.3265, + "step": 9229 + }, + { + "epoch": 0.18477091309461252, + "grad_norm": 1.1696152687072754, + "learning_rate": 9.38490874267541e-06, + "loss": 0.2996, + "step": 9230 + }, + { + "epoch": 0.18479093161174087, + "grad_norm": 1.0980945825576782, + "learning_rate": 9.384752955846995e-06, + "loss": 0.3045, + "step": 9231 + }, + { + "epoch": 0.1848109501288692, + "grad_norm": 1.1714197397232056, + "learning_rate": 9.38459715058602e-06, + "loss": 0.3511, + "step": 9232 + }, + { + "epoch": 0.18483096864599755, + "grad_norm": 1.050646424293518, + "learning_rate": 9.384441326893143e-06, + "loss": 0.3322, + "step": 9233 + }, + { + "epoch": 0.1848509871631259, + "grad_norm": 1.1503783464431763, + "learning_rate": 9.38428548476902e-06, + "loss": 0.3168, + "step": 9234 + }, + { + "epoch": 0.18487100568025425, + "grad_norm": 1.036772608757019, + "learning_rate": 9.384129624214301e-06, + "loss": 0.3323, + "step": 9235 + }, + { + "epoch": 0.18489102419738257, + "grad_norm": 1.2160924673080444, + "learning_rate": 9.38397374522965e-06, + "loss": 0.3031, + "step": 9236 + }, + { + "epoch": 0.18491104271451092, + "grad_norm": 1.1779392957687378, + "learning_rate": 9.38381784781571e-06, + "loss": 0.3948, + "step": 9237 + }, + { + "epoch": 0.18493106123163927, + "grad_norm": 2.033639669418335, + "learning_rate": 9.383661931973147e-06, + "loss": 0.7383, + "step": 9238 + }, + { + "epoch": 0.18495107974876762, + "grad_norm": 1.0567395687103271, + "learning_rate": 9.383505997702613e-06, + "loss": 0.3173, + "step": 9239 + }, + { + "epoch": 0.18497109826589594, + "grad_norm": 1.1914093494415283, + "learning_rate": 9.383350045004762e-06, + "loss": 0.2924, + "step": 9240 + }, + { + "epoch": 0.1849911167830243, + "grad_norm": 1.133324146270752, + "learning_rate": 9.38319407388025e-06, + "loss": 0.2892, + "step": 9241 + }, + { + "epoch": 0.18501113530015265, + "grad_norm": 1.1392018795013428, + "learning_rate": 9.383038084329735e-06, + "loss": 0.3197, + "step": 9242 + }, + { + "epoch": 0.185031153817281, + "grad_norm": 1.15122652053833, + "learning_rate": 9.38288207635387e-06, + "loss": 0.3275, + "step": 9243 + }, + { + "epoch": 0.18505117233440932, + "grad_norm": 1.1466283798217773, + "learning_rate": 9.382726049953313e-06, + "loss": 0.3264, + "step": 9244 + }, + { + "epoch": 0.18507119085153767, + "grad_norm": 1.8578839302062988, + "learning_rate": 9.382570005128717e-06, + "loss": 0.7535, + "step": 9245 + }, + { + "epoch": 0.18509120936866602, + "grad_norm": 1.1099364757537842, + "learning_rate": 9.382413941880741e-06, + "loss": 0.3058, + "step": 9246 + }, + { + "epoch": 0.18511122788579437, + "grad_norm": 1.909237027168274, + "learning_rate": 9.38225786021004e-06, + "loss": 0.8324, + "step": 9247 + }, + { + "epoch": 0.1851312464029227, + "grad_norm": 1.143892765045166, + "learning_rate": 9.38210176011727e-06, + "loss": 0.3214, + "step": 9248 + }, + { + "epoch": 0.18515126492005105, + "grad_norm": 0.9838765263557434, + "learning_rate": 9.381945641603087e-06, + "loss": 0.3052, + "step": 9249 + }, + { + "epoch": 0.1851712834371794, + "grad_norm": 0.9818832278251648, + "learning_rate": 9.381789504668148e-06, + "loss": 0.3228, + "step": 9250 + }, + { + "epoch": 0.18519130195430775, + "grad_norm": 1.053030252456665, + "learning_rate": 9.38163334931311e-06, + "loss": 0.3505, + "step": 9251 + }, + { + "epoch": 0.18521132047143607, + "grad_norm": 1.1013790369033813, + "learning_rate": 9.381477175538627e-06, + "loss": 0.3203, + "step": 9252 + }, + { + "epoch": 0.18523133898856442, + "grad_norm": 1.1285721063613892, + "learning_rate": 9.381320983345357e-06, + "loss": 0.355, + "step": 9253 + }, + { + "epoch": 0.18525135750569277, + "grad_norm": 1.1969982385635376, + "learning_rate": 9.381164772733956e-06, + "loss": 0.3467, + "step": 9254 + }, + { + "epoch": 0.18527137602282112, + "grad_norm": 1.2650690078735352, + "learning_rate": 9.381008543705081e-06, + "loss": 0.3595, + "step": 9255 + }, + { + "epoch": 0.18529139453994944, + "grad_norm": 1.4244577884674072, + "learning_rate": 9.380852296259388e-06, + "loss": 0.3146, + "step": 9256 + }, + { + "epoch": 0.1853114130570778, + "grad_norm": 1.3106263875961304, + "learning_rate": 9.380696030397538e-06, + "loss": 0.3636, + "step": 9257 + }, + { + "epoch": 0.18533143157420615, + "grad_norm": 1.192922592163086, + "learning_rate": 9.380539746120182e-06, + "loss": 0.3216, + "step": 9258 + }, + { + "epoch": 0.1853514500913345, + "grad_norm": 1.123777151107788, + "learning_rate": 9.380383443427981e-06, + "loss": 0.3522, + "step": 9259 + }, + { + "epoch": 0.18537146860846282, + "grad_norm": 1.1318717002868652, + "learning_rate": 9.380227122321589e-06, + "loss": 0.3539, + "step": 9260 + }, + { + "epoch": 0.18539148712559117, + "grad_norm": 1.0873265266418457, + "learning_rate": 9.380070782801666e-06, + "loss": 0.3376, + "step": 9261 + }, + { + "epoch": 0.18541150564271952, + "grad_norm": 1.2236629724502563, + "learning_rate": 9.379914424868868e-06, + "loss": 0.3292, + "step": 9262 + }, + { + "epoch": 0.18543152415984787, + "grad_norm": 1.0209420919418335, + "learning_rate": 9.37975804852385e-06, + "loss": 0.3154, + "step": 9263 + }, + { + "epoch": 0.1854515426769762, + "grad_norm": 1.2519395351409912, + "learning_rate": 9.379601653767275e-06, + "loss": 0.3041, + "step": 9264 + }, + { + "epoch": 0.18547156119410455, + "grad_norm": 1.1792488098144531, + "learning_rate": 9.379445240599795e-06, + "loss": 0.3299, + "step": 9265 + }, + { + "epoch": 0.1854915797112329, + "grad_norm": 1.971786379814148, + "learning_rate": 9.379288809022072e-06, + "loss": 0.854, + "step": 9266 + }, + { + "epoch": 0.18551159822836125, + "grad_norm": 1.1507508754730225, + "learning_rate": 9.37913235903476e-06, + "loss": 0.3598, + "step": 9267 + }, + { + "epoch": 0.18553161674548957, + "grad_norm": 1.2610969543457031, + "learning_rate": 9.378975890638518e-06, + "loss": 0.2904, + "step": 9268 + }, + { + "epoch": 0.18555163526261792, + "grad_norm": 1.0254868268966675, + "learning_rate": 9.378819403834002e-06, + "loss": 0.2774, + "step": 9269 + }, + { + "epoch": 0.18557165377974627, + "grad_norm": 1.2043033838272095, + "learning_rate": 9.378662898621873e-06, + "loss": 0.3248, + "step": 9270 + }, + { + "epoch": 0.18559167229687462, + "grad_norm": 1.719233512878418, + "learning_rate": 9.37850637500279e-06, + "loss": 0.32, + "step": 9271 + }, + { + "epoch": 0.18561169081400294, + "grad_norm": 1.1253859996795654, + "learning_rate": 9.378349832977405e-06, + "loss": 0.3124, + "step": 9272 + }, + { + "epoch": 0.1856317093311313, + "grad_norm": 1.7724738121032715, + "learning_rate": 9.378193272546382e-06, + "loss": 0.8717, + "step": 9273 + }, + { + "epoch": 0.18565172784825965, + "grad_norm": 1.3011665344238281, + "learning_rate": 9.378036693710374e-06, + "loss": 0.3712, + "step": 9274 + }, + { + "epoch": 0.185671746365388, + "grad_norm": 1.037127137184143, + "learning_rate": 9.377880096470043e-06, + "loss": 0.344, + "step": 9275 + }, + { + "epoch": 0.18569176488251632, + "grad_norm": 1.2148789167404175, + "learning_rate": 9.377723480826046e-06, + "loss": 0.3307, + "step": 9276 + }, + { + "epoch": 0.18571178339964467, + "grad_norm": 1.068575143814087, + "learning_rate": 9.377566846779042e-06, + "loss": 0.3529, + "step": 9277 + }, + { + "epoch": 0.18573180191677302, + "grad_norm": 1.0728070735931396, + "learning_rate": 9.37741019432969e-06, + "loss": 0.34, + "step": 9278 + }, + { + "epoch": 0.18575182043390137, + "grad_norm": 1.7721937894821167, + "learning_rate": 9.377253523478647e-06, + "loss": 0.8352, + "step": 9279 + }, + { + "epoch": 0.1857718389510297, + "grad_norm": 1.089540719985962, + "learning_rate": 9.377096834226571e-06, + "loss": 0.3808, + "step": 9280 + }, + { + "epoch": 0.18579185746815804, + "grad_norm": 0.9743115901947021, + "learning_rate": 9.376940126574122e-06, + "loss": 0.3174, + "step": 9281 + }, + { + "epoch": 0.1858118759852864, + "grad_norm": 1.2171385288238525, + "learning_rate": 9.37678340052196e-06, + "loss": 0.334, + "step": 9282 + }, + { + "epoch": 0.18583189450241475, + "grad_norm": 1.0449997186660767, + "learning_rate": 9.376626656070742e-06, + "loss": 0.3242, + "step": 9283 + }, + { + "epoch": 0.18585191301954307, + "grad_norm": 1.056536316871643, + "learning_rate": 9.376469893221128e-06, + "loss": 0.3391, + "step": 9284 + }, + { + "epoch": 0.18587193153667142, + "grad_norm": 1.1931109428405762, + "learning_rate": 9.376313111973774e-06, + "loss": 0.3485, + "step": 9285 + }, + { + "epoch": 0.18589195005379977, + "grad_norm": 1.154502511024475, + "learning_rate": 9.376156312329343e-06, + "loss": 0.3601, + "step": 9286 + }, + { + "epoch": 0.18591196857092812, + "grad_norm": 1.0857867002487183, + "learning_rate": 9.375999494288492e-06, + "loss": 0.3542, + "step": 9287 + }, + { + "epoch": 0.18593198708805644, + "grad_norm": 1.0799301862716675, + "learning_rate": 9.375842657851883e-06, + "loss": 0.3154, + "step": 9288 + }, + { + "epoch": 0.1859520056051848, + "grad_norm": 1.1907317638397217, + "learning_rate": 9.375685803020171e-06, + "loss": 0.3538, + "step": 9289 + }, + { + "epoch": 0.18597202412231315, + "grad_norm": 1.827498197555542, + "learning_rate": 9.375528929794018e-06, + "loss": 0.8915, + "step": 9290 + }, + { + "epoch": 0.1859920426394415, + "grad_norm": 1.1227067708969116, + "learning_rate": 9.375372038174084e-06, + "loss": 0.3329, + "step": 9291 + }, + { + "epoch": 0.18601206115656982, + "grad_norm": 1.101853370666504, + "learning_rate": 9.375215128161025e-06, + "loss": 0.3727, + "step": 9292 + }, + { + "epoch": 0.18603207967369817, + "grad_norm": 1.1790258884429932, + "learning_rate": 9.375058199755507e-06, + "loss": 0.315, + "step": 9293 + }, + { + "epoch": 0.18605209819082652, + "grad_norm": 1.8436824083328247, + "learning_rate": 9.374901252958184e-06, + "loss": 0.7767, + "step": 9294 + }, + { + "epoch": 0.18607211670795487, + "grad_norm": 1.1588706970214844, + "learning_rate": 9.374744287769717e-06, + "loss": 0.3281, + "step": 9295 + }, + { + "epoch": 0.1860921352250832, + "grad_norm": 1.2787370681762695, + "learning_rate": 9.374587304190766e-06, + "loss": 0.3309, + "step": 9296 + }, + { + "epoch": 0.18611215374221154, + "grad_norm": 1.1288220882415771, + "learning_rate": 9.374430302221993e-06, + "loss": 0.3457, + "step": 9297 + }, + { + "epoch": 0.1861321722593399, + "grad_norm": 1.342373251914978, + "learning_rate": 9.374273281864054e-06, + "loss": 0.3525, + "step": 9298 + }, + { + "epoch": 0.18615219077646825, + "grad_norm": 1.1913151741027832, + "learning_rate": 9.374116243117615e-06, + "loss": 0.4058, + "step": 9299 + }, + { + "epoch": 0.18617220929359657, + "grad_norm": 1.049622654914856, + "learning_rate": 9.37395918598333e-06, + "loss": 0.352, + "step": 9300 + }, + { + "epoch": 0.18619222781072492, + "grad_norm": 1.0000560283660889, + "learning_rate": 9.373802110461862e-06, + "loss": 0.2932, + "step": 9301 + }, + { + "epoch": 0.18621224632785327, + "grad_norm": 1.3307545185089111, + "learning_rate": 9.373645016553872e-06, + "loss": 0.3407, + "step": 9302 + }, + { + "epoch": 0.18623226484498162, + "grad_norm": 1.1733908653259277, + "learning_rate": 9.37348790426002e-06, + "loss": 0.3884, + "step": 9303 + }, + { + "epoch": 0.18625228336210994, + "grad_norm": 2.053712844848633, + "learning_rate": 9.373330773580965e-06, + "loss": 0.8199, + "step": 9304 + }, + { + "epoch": 0.1862723018792383, + "grad_norm": 1.2055972814559937, + "learning_rate": 9.373173624517368e-06, + "loss": 0.3487, + "step": 9305 + }, + { + "epoch": 0.18629232039636665, + "grad_norm": 1.146728515625, + "learning_rate": 9.373016457069892e-06, + "loss": 0.3524, + "step": 9306 + }, + { + "epoch": 0.186312338913495, + "grad_norm": 1.0451436042785645, + "learning_rate": 9.372859271239195e-06, + "loss": 0.2963, + "step": 9307 + }, + { + "epoch": 0.18633235743062332, + "grad_norm": 1.1659445762634277, + "learning_rate": 9.372702067025939e-06, + "loss": 0.3107, + "step": 9308 + }, + { + "epoch": 0.18635237594775167, + "grad_norm": 1.9003548622131348, + "learning_rate": 9.372544844430785e-06, + "loss": 0.8824, + "step": 9309 + }, + { + "epoch": 0.18637239446488002, + "grad_norm": 1.0641738176345825, + "learning_rate": 9.372387603454391e-06, + "loss": 0.3288, + "step": 9310 + }, + { + "epoch": 0.18639241298200837, + "grad_norm": 1.0665698051452637, + "learning_rate": 9.372230344097425e-06, + "loss": 0.2941, + "step": 9311 + }, + { + "epoch": 0.1864124314991367, + "grad_norm": 1.6378517150878906, + "learning_rate": 9.372073066360538e-06, + "loss": 0.8501, + "step": 9312 + }, + { + "epoch": 0.18643245001626504, + "grad_norm": 1.3291970491409302, + "learning_rate": 9.3719157702444e-06, + "loss": 0.2989, + "step": 9313 + }, + { + "epoch": 0.1864524685333934, + "grad_norm": 1.1023225784301758, + "learning_rate": 9.37175845574967e-06, + "loss": 0.319, + "step": 9314 + }, + { + "epoch": 0.18647248705052175, + "grad_norm": 1.057682752609253, + "learning_rate": 9.371601122877006e-06, + "loss": 0.2845, + "step": 9315 + }, + { + "epoch": 0.18649250556765007, + "grad_norm": 1.0607917308807373, + "learning_rate": 9.371443771627071e-06, + "loss": 0.2875, + "step": 9316 + }, + { + "epoch": 0.18651252408477842, + "grad_norm": 1.0841474533081055, + "learning_rate": 9.37128640200053e-06, + "loss": 0.33, + "step": 9317 + }, + { + "epoch": 0.18653254260190677, + "grad_norm": 1.0142306089401245, + "learning_rate": 9.37112901399804e-06, + "loss": 0.3186, + "step": 9318 + }, + { + "epoch": 0.18655256111903512, + "grad_norm": 1.133187174797058, + "learning_rate": 9.370971607620263e-06, + "loss": 0.2987, + "step": 9319 + }, + { + "epoch": 0.18657257963616344, + "grad_norm": 1.0659197568893433, + "learning_rate": 9.370814182867864e-06, + "loss": 0.2901, + "step": 9320 + }, + { + "epoch": 0.1865925981532918, + "grad_norm": 1.1288658380508423, + "learning_rate": 9.370656739741503e-06, + "loss": 0.3132, + "step": 9321 + }, + { + "epoch": 0.18661261667042015, + "grad_norm": 1.131247878074646, + "learning_rate": 9.370499278241841e-06, + "loss": 0.3272, + "step": 9322 + }, + { + "epoch": 0.1866326351875485, + "grad_norm": 1.9488836526870728, + "learning_rate": 9.37034179836954e-06, + "loss": 0.8197, + "step": 9323 + }, + { + "epoch": 0.18665265370467682, + "grad_norm": 1.1774210929870605, + "learning_rate": 9.370184300125263e-06, + "loss": 0.3336, + "step": 9324 + }, + { + "epoch": 0.18667267222180517, + "grad_norm": 1.040201187133789, + "learning_rate": 9.370026783509672e-06, + "loss": 0.3146, + "step": 9325 + }, + { + "epoch": 0.18669269073893352, + "grad_norm": 1.130162000656128, + "learning_rate": 9.369869248523428e-06, + "loss": 0.3326, + "step": 9326 + }, + { + "epoch": 0.18671270925606187, + "grad_norm": 1.0746878385543823, + "learning_rate": 9.369711695167195e-06, + "loss": 0.3084, + "step": 9327 + }, + { + "epoch": 0.1867327277731902, + "grad_norm": 1.1722115278244019, + "learning_rate": 9.369554123441633e-06, + "loss": 0.2907, + "step": 9328 + }, + { + "epoch": 0.18675274629031854, + "grad_norm": 0.979815661907196, + "learning_rate": 9.369396533347408e-06, + "loss": 0.3042, + "step": 9329 + }, + { + "epoch": 0.1867727648074469, + "grad_norm": 1.1824184656143188, + "learning_rate": 9.36923892488518e-06, + "loss": 0.3717, + "step": 9330 + }, + { + "epoch": 0.18679278332457525, + "grad_norm": 2.000737190246582, + "learning_rate": 9.36908129805561e-06, + "loss": 0.846, + "step": 9331 + }, + { + "epoch": 0.18681280184170357, + "grad_norm": 1.1384574174880981, + "learning_rate": 9.368923652859362e-06, + "loss": 0.3324, + "step": 9332 + }, + { + "epoch": 0.18683282035883192, + "grad_norm": 1.0940192937850952, + "learning_rate": 9.368765989297101e-06, + "loss": 0.3238, + "step": 9333 + }, + { + "epoch": 0.18685283887596027, + "grad_norm": 1.2970852851867676, + "learning_rate": 9.368608307369489e-06, + "loss": 0.2946, + "step": 9334 + }, + { + "epoch": 0.18687285739308862, + "grad_norm": 1.0479799509048462, + "learning_rate": 9.368450607077185e-06, + "loss": 0.3052, + "step": 9335 + }, + { + "epoch": 0.18689287591021694, + "grad_norm": 1.2624907493591309, + "learning_rate": 9.368292888420855e-06, + "loss": 0.3339, + "step": 9336 + }, + { + "epoch": 0.1869128944273453, + "grad_norm": 1.0097028017044067, + "learning_rate": 9.368135151401166e-06, + "loss": 0.2988, + "step": 9337 + }, + { + "epoch": 0.18693291294447364, + "grad_norm": 1.044772982597351, + "learning_rate": 9.367977396018773e-06, + "loss": 0.3606, + "step": 9338 + }, + { + "epoch": 0.186952931461602, + "grad_norm": 1.173030138015747, + "learning_rate": 9.367819622274343e-06, + "loss": 0.359, + "step": 9339 + }, + { + "epoch": 0.18697294997873032, + "grad_norm": 1.0684304237365723, + "learning_rate": 9.36766183016854e-06, + "loss": 0.2978, + "step": 9340 + }, + { + "epoch": 0.18699296849585867, + "grad_norm": 1.1437054872512817, + "learning_rate": 9.367504019702028e-06, + "loss": 0.354, + "step": 9341 + }, + { + "epoch": 0.18701298701298702, + "grad_norm": 1.082692265510559, + "learning_rate": 9.367346190875466e-06, + "loss": 0.3436, + "step": 9342 + }, + { + "epoch": 0.18703300553011537, + "grad_norm": 0.9664984345436096, + "learning_rate": 9.367188343689522e-06, + "loss": 0.2322, + "step": 9343 + }, + { + "epoch": 0.1870530240472437, + "grad_norm": 0.9729611277580261, + "learning_rate": 9.367030478144858e-06, + "loss": 0.2389, + "step": 9344 + }, + { + "epoch": 0.18707304256437204, + "grad_norm": 1.1233468055725098, + "learning_rate": 9.366872594242137e-06, + "loss": 0.3435, + "step": 9345 + }, + { + "epoch": 0.1870930610815004, + "grad_norm": 1.948485255241394, + "learning_rate": 9.366714691982026e-06, + "loss": 0.793, + "step": 9346 + }, + { + "epoch": 0.18711307959862875, + "grad_norm": 1.139007806777954, + "learning_rate": 9.366556771365184e-06, + "loss": 0.3717, + "step": 9347 + }, + { + "epoch": 0.18713309811575707, + "grad_norm": 2.119114637374878, + "learning_rate": 9.366398832392278e-06, + "loss": 0.8268, + "step": 9348 + }, + { + "epoch": 0.18715311663288542, + "grad_norm": 1.0849040746688843, + "learning_rate": 9.366240875063969e-06, + "loss": 0.3413, + "step": 9349 + }, + { + "epoch": 0.18717313515001377, + "grad_norm": 0.9731661677360535, + "learning_rate": 9.366082899380924e-06, + "loss": 0.2751, + "step": 9350 + }, + { + "epoch": 0.1871931536671421, + "grad_norm": 1.082475185394287, + "learning_rate": 9.365924905343807e-06, + "loss": 0.3726, + "step": 9351 + }, + { + "epoch": 0.18721317218427044, + "grad_norm": 1.8871475458145142, + "learning_rate": 9.365766892953281e-06, + "loss": 0.8334, + "step": 9352 + }, + { + "epoch": 0.1872331907013988, + "grad_norm": 1.0944082736968994, + "learning_rate": 9.36560886221001e-06, + "loss": 0.3334, + "step": 9353 + }, + { + "epoch": 0.18725320921852714, + "grad_norm": 0.9740158915519714, + "learning_rate": 9.365450813114658e-06, + "loss": 0.2928, + "step": 9354 + }, + { + "epoch": 0.18727322773565547, + "grad_norm": 1.0693544149398804, + "learning_rate": 9.36529274566789e-06, + "loss": 0.3556, + "step": 9355 + }, + { + "epoch": 0.18729324625278382, + "grad_norm": 1.0873359441757202, + "learning_rate": 9.365134659870375e-06, + "loss": 0.3389, + "step": 9356 + }, + { + "epoch": 0.18731326476991217, + "grad_norm": 1.0677963495254517, + "learning_rate": 9.364976555722769e-06, + "loss": 0.3005, + "step": 9357 + }, + { + "epoch": 0.18733328328704052, + "grad_norm": 2.08933687210083, + "learning_rate": 9.364818433225743e-06, + "loss": 0.746, + "step": 9358 + }, + { + "epoch": 0.18735330180416884, + "grad_norm": 1.132611632347107, + "learning_rate": 9.364660292379958e-06, + "loss": 0.3657, + "step": 9359 + }, + { + "epoch": 0.1873733203212972, + "grad_norm": 1.1110206842422485, + "learning_rate": 9.364502133186082e-06, + "loss": 0.3105, + "step": 9360 + }, + { + "epoch": 0.18739333883842554, + "grad_norm": 1.1519988775253296, + "learning_rate": 9.364343955644777e-06, + "loss": 0.3466, + "step": 9361 + }, + { + "epoch": 0.1874133573555539, + "grad_norm": 1.3369238376617432, + "learning_rate": 9.36418575975671e-06, + "loss": 0.3727, + "step": 9362 + }, + { + "epoch": 0.18743337587268222, + "grad_norm": 1.0442001819610596, + "learning_rate": 9.364027545522545e-06, + "loss": 0.2524, + "step": 9363 + }, + { + "epoch": 0.18745339438981057, + "grad_norm": 1.0535337924957275, + "learning_rate": 9.363869312942947e-06, + "loss": 0.3355, + "step": 9364 + }, + { + "epoch": 0.18747341290693892, + "grad_norm": 1.1210263967514038, + "learning_rate": 9.363711062018582e-06, + "loss": 0.3244, + "step": 9365 + }, + { + "epoch": 0.18749343142406727, + "grad_norm": 1.202736735343933, + "learning_rate": 9.363552792750114e-06, + "loss": 0.321, + "step": 9366 + }, + { + "epoch": 0.1875134499411956, + "grad_norm": 1.0880070924758911, + "learning_rate": 9.36339450513821e-06, + "loss": 0.3371, + "step": 9367 + }, + { + "epoch": 0.18753346845832394, + "grad_norm": 1.1366417407989502, + "learning_rate": 9.363236199183534e-06, + "loss": 0.3631, + "step": 9368 + }, + { + "epoch": 0.1875534869754523, + "grad_norm": 1.0330318212509155, + "learning_rate": 9.363077874886752e-06, + "loss": 0.3043, + "step": 9369 + }, + { + "epoch": 0.18757350549258064, + "grad_norm": 1.070624828338623, + "learning_rate": 9.36291953224853e-06, + "loss": 0.3206, + "step": 9370 + }, + { + "epoch": 0.18759352400970897, + "grad_norm": 1.0776876211166382, + "learning_rate": 9.362761171269533e-06, + "loss": 0.3219, + "step": 9371 + }, + { + "epoch": 0.18761354252683732, + "grad_norm": 1.0031139850616455, + "learning_rate": 9.362602791950426e-06, + "loss": 0.3426, + "step": 9372 + }, + { + "epoch": 0.18763356104396567, + "grad_norm": 1.0459312200546265, + "learning_rate": 9.362444394291876e-06, + "loss": 0.3528, + "step": 9373 + }, + { + "epoch": 0.18765357956109402, + "grad_norm": 1.075258731842041, + "learning_rate": 9.36228597829455e-06, + "loss": 0.3359, + "step": 9374 + }, + { + "epoch": 0.18767359807822234, + "grad_norm": 1.9751243591308594, + "learning_rate": 9.362127543959113e-06, + "loss": 0.8938, + "step": 9375 + }, + { + "epoch": 0.1876936165953507, + "grad_norm": 1.2169157266616821, + "learning_rate": 9.361969091286228e-06, + "loss": 0.3431, + "step": 9376 + }, + { + "epoch": 0.18771363511247904, + "grad_norm": 1.1643775701522827, + "learning_rate": 9.361810620276565e-06, + "loss": 0.3825, + "step": 9377 + }, + { + "epoch": 0.1877336536296074, + "grad_norm": 1.8534913063049316, + "learning_rate": 9.361652130930788e-06, + "loss": 0.8167, + "step": 9378 + }, + { + "epoch": 0.18775367214673572, + "grad_norm": 1.11756432056427, + "learning_rate": 9.361493623249564e-06, + "loss": 0.3245, + "step": 9379 + }, + { + "epoch": 0.18777369066386407, + "grad_norm": 1.1838874816894531, + "learning_rate": 9.361335097233562e-06, + "loss": 0.3294, + "step": 9380 + }, + { + "epoch": 0.18779370918099242, + "grad_norm": 1.1210777759552002, + "learning_rate": 9.361176552883443e-06, + "loss": 0.3742, + "step": 9381 + }, + { + "epoch": 0.18781372769812077, + "grad_norm": 1.2266855239868164, + "learning_rate": 9.361017990199878e-06, + "loss": 0.3388, + "step": 9382 + }, + { + "epoch": 0.1878337462152491, + "grad_norm": 1.1018587350845337, + "learning_rate": 9.360859409183531e-06, + "loss": 0.2959, + "step": 9383 + }, + { + "epoch": 0.18785376473237744, + "grad_norm": 1.122115969657898, + "learning_rate": 9.36070080983507e-06, + "loss": 0.3342, + "step": 9384 + }, + { + "epoch": 0.1878737832495058, + "grad_norm": 1.9070713520050049, + "learning_rate": 9.36054219215516e-06, + "loss": 0.787, + "step": 9385 + }, + { + "epoch": 0.18789380176663414, + "grad_norm": 1.0628900527954102, + "learning_rate": 9.36038355614447e-06, + "loss": 0.2965, + "step": 9386 + }, + { + "epoch": 0.18791382028376247, + "grad_norm": 1.0646051168441772, + "learning_rate": 9.360224901803666e-06, + "loss": 0.3094, + "step": 9387 + }, + { + "epoch": 0.18793383880089082, + "grad_norm": 1.1909178495407104, + "learning_rate": 9.360066229133416e-06, + "loss": 0.3105, + "step": 9388 + }, + { + "epoch": 0.18795385731801917, + "grad_norm": 1.2982994318008423, + "learning_rate": 9.359907538134383e-06, + "loss": 0.3399, + "step": 9389 + }, + { + "epoch": 0.18797387583514752, + "grad_norm": 1.82614004611969, + "learning_rate": 9.359748828807239e-06, + "loss": 0.7729, + "step": 9390 + }, + { + "epoch": 0.18799389435227584, + "grad_norm": 1.0500962734222412, + "learning_rate": 9.35959010115265e-06, + "loss": 0.3486, + "step": 9391 + }, + { + "epoch": 0.1880139128694042, + "grad_norm": 1.0027011632919312, + "learning_rate": 9.35943135517128e-06, + "loss": 0.3044, + "step": 9392 + }, + { + "epoch": 0.18803393138653254, + "grad_norm": 1.0819947719573975, + "learning_rate": 9.3592725908638e-06, + "loss": 0.2684, + "step": 9393 + }, + { + "epoch": 0.1880539499036609, + "grad_norm": 1.4139269590377808, + "learning_rate": 9.359113808230875e-06, + "loss": 0.3245, + "step": 9394 + }, + { + "epoch": 0.18807396842078922, + "grad_norm": 1.068144679069519, + "learning_rate": 9.358955007273175e-06, + "loss": 0.3382, + "step": 9395 + }, + { + "epoch": 0.18809398693791757, + "grad_norm": 1.1788142919540405, + "learning_rate": 9.358796187991366e-06, + "loss": 0.393, + "step": 9396 + }, + { + "epoch": 0.18811400545504592, + "grad_norm": 1.3543397188186646, + "learning_rate": 9.358637350386116e-06, + "loss": 0.3915, + "step": 9397 + }, + { + "epoch": 0.18813402397217427, + "grad_norm": 1.0485090017318726, + "learning_rate": 9.358478494458092e-06, + "loss": 0.347, + "step": 9398 + }, + { + "epoch": 0.1881540424893026, + "grad_norm": 1.148363471031189, + "learning_rate": 9.358319620207963e-06, + "loss": 0.3374, + "step": 9399 + }, + { + "epoch": 0.18817406100643094, + "grad_norm": 2.100086212158203, + "learning_rate": 9.358160727636394e-06, + "loss": 0.8498, + "step": 9400 + }, + { + "epoch": 0.1881940795235593, + "grad_norm": 1.079271912574768, + "learning_rate": 9.358001816744057e-06, + "loss": 0.297, + "step": 9401 + }, + { + "epoch": 0.18821409804068764, + "grad_norm": 1.1523125171661377, + "learning_rate": 9.357842887531619e-06, + "loss": 0.2905, + "step": 9402 + }, + { + "epoch": 0.18823411655781597, + "grad_norm": 1.2908012866973877, + "learning_rate": 9.357683939999747e-06, + "loss": 0.3206, + "step": 9403 + }, + { + "epoch": 0.18825413507494432, + "grad_norm": 1.1720280647277832, + "learning_rate": 9.35752497414911e-06, + "loss": 0.3221, + "step": 9404 + }, + { + "epoch": 0.18827415359207267, + "grad_norm": 1.099307894706726, + "learning_rate": 9.357365989980374e-06, + "loss": 0.3328, + "step": 9405 + }, + { + "epoch": 0.18829417210920102, + "grad_norm": 1.0781779289245605, + "learning_rate": 9.357206987494209e-06, + "loss": 0.3504, + "step": 9406 + }, + { + "epoch": 0.18831419062632934, + "grad_norm": 1.0934360027313232, + "learning_rate": 9.357047966691285e-06, + "loss": 0.3052, + "step": 9407 + }, + { + "epoch": 0.1883342091434577, + "grad_norm": 1.8130406141281128, + "learning_rate": 9.35688892757227e-06, + "loss": 0.8448, + "step": 9408 + }, + { + "epoch": 0.18835422766058604, + "grad_norm": 1.068660020828247, + "learning_rate": 9.356729870137828e-06, + "loss": 0.3103, + "step": 9409 + }, + { + "epoch": 0.1883742461777144, + "grad_norm": 1.0445294380187988, + "learning_rate": 9.356570794388632e-06, + "loss": 0.3389, + "step": 9410 + }, + { + "epoch": 0.18839426469484272, + "grad_norm": 1.223968505859375, + "learning_rate": 9.356411700325353e-06, + "loss": 0.3562, + "step": 9411 + }, + { + "epoch": 0.18841428321197107, + "grad_norm": 1.0768879652023315, + "learning_rate": 9.356252587948655e-06, + "loss": 0.2951, + "step": 9412 + }, + { + "epoch": 0.18843430172909942, + "grad_norm": 1.9256985187530518, + "learning_rate": 9.356093457259208e-06, + "loss": 0.8504, + "step": 9413 + }, + { + "epoch": 0.18845432024622777, + "grad_norm": 1.0732721090316772, + "learning_rate": 9.355934308257681e-06, + "loss": 0.3443, + "step": 9414 + }, + { + "epoch": 0.1884743387633561, + "grad_norm": 1.148089051246643, + "learning_rate": 9.355775140944745e-06, + "loss": 0.3281, + "step": 9415 + }, + { + "epoch": 0.18849435728048444, + "grad_norm": 1.2787420749664307, + "learning_rate": 9.355615955321066e-06, + "loss": 0.3582, + "step": 9416 + }, + { + "epoch": 0.1885143757976128, + "grad_norm": 1.1481270790100098, + "learning_rate": 9.355456751387317e-06, + "loss": 0.3045, + "step": 9417 + }, + { + "epoch": 0.18853439431474114, + "grad_norm": 1.2197821140289307, + "learning_rate": 9.355297529144166e-06, + "loss": 0.373, + "step": 9418 + }, + { + "epoch": 0.18855441283186947, + "grad_norm": 1.8019211292266846, + "learning_rate": 9.355138288592279e-06, + "loss": 0.8394, + "step": 9419 + }, + { + "epoch": 0.18857443134899782, + "grad_norm": 1.1324214935302734, + "learning_rate": 9.354979029732329e-06, + "loss": 0.2902, + "step": 9420 + }, + { + "epoch": 0.18859444986612617, + "grad_norm": 1.113982081413269, + "learning_rate": 9.354819752564983e-06, + "loss": 0.3226, + "step": 9421 + }, + { + "epoch": 0.18861446838325452, + "grad_norm": 1.09498131275177, + "learning_rate": 9.354660457090912e-06, + "loss": 0.3174, + "step": 9422 + }, + { + "epoch": 0.18863448690038284, + "grad_norm": 1.2522779703140259, + "learning_rate": 9.354501143310788e-06, + "loss": 0.3339, + "step": 9423 + }, + { + "epoch": 0.1886545054175112, + "grad_norm": 1.2815154790878296, + "learning_rate": 9.354341811225277e-06, + "loss": 0.3084, + "step": 9424 + }, + { + "epoch": 0.18867452393463954, + "grad_norm": 1.1861443519592285, + "learning_rate": 9.354182460835048e-06, + "loss": 0.3405, + "step": 9425 + }, + { + "epoch": 0.1886945424517679, + "grad_norm": 1.0902656316757202, + "learning_rate": 9.354023092140776e-06, + "loss": 0.335, + "step": 9426 + }, + { + "epoch": 0.18871456096889622, + "grad_norm": 1.119844913482666, + "learning_rate": 9.353863705143127e-06, + "loss": 0.3336, + "step": 9427 + }, + { + "epoch": 0.18873457948602457, + "grad_norm": 1.0054066181182861, + "learning_rate": 9.353704299842771e-06, + "loss": 0.2976, + "step": 9428 + }, + { + "epoch": 0.18875459800315292, + "grad_norm": 1.2711838483810425, + "learning_rate": 9.35354487624038e-06, + "loss": 0.3427, + "step": 9429 + }, + { + "epoch": 0.18877461652028127, + "grad_norm": 1.334396481513977, + "learning_rate": 9.353385434336621e-06, + "loss": 0.3529, + "step": 9430 + }, + { + "epoch": 0.1887946350374096, + "grad_norm": 1.0577473640441895, + "learning_rate": 9.35322597413217e-06, + "loss": 0.3502, + "step": 9431 + }, + { + "epoch": 0.18881465355453794, + "grad_norm": 1.0403735637664795, + "learning_rate": 9.353066495627692e-06, + "loss": 0.3064, + "step": 9432 + }, + { + "epoch": 0.1888346720716663, + "grad_norm": 1.2299277782440186, + "learning_rate": 9.352906998823858e-06, + "loss": 0.3621, + "step": 9433 + }, + { + "epoch": 0.18885469058879464, + "grad_norm": 1.1831188201904297, + "learning_rate": 9.352747483721342e-06, + "loss": 0.3204, + "step": 9434 + }, + { + "epoch": 0.18887470910592297, + "grad_norm": 1.144119381904602, + "learning_rate": 9.352587950320811e-06, + "loss": 0.3343, + "step": 9435 + }, + { + "epoch": 0.18889472762305132, + "grad_norm": 1.083450198173523, + "learning_rate": 9.35242839862294e-06, + "loss": 0.2818, + "step": 9436 + }, + { + "epoch": 0.18891474614017967, + "grad_norm": 1.0602953433990479, + "learning_rate": 9.352268828628393e-06, + "loss": 0.3116, + "step": 9437 + }, + { + "epoch": 0.18893476465730802, + "grad_norm": 1.096150279045105, + "learning_rate": 9.352109240337846e-06, + "loss": 0.3193, + "step": 9438 + }, + { + "epoch": 0.18895478317443634, + "grad_norm": 1.1080553531646729, + "learning_rate": 9.351949633751967e-06, + "loss": 0.3301, + "step": 9439 + }, + { + "epoch": 0.1889748016915647, + "grad_norm": 1.3402842283248901, + "learning_rate": 9.351790008871428e-06, + "loss": 0.3425, + "step": 9440 + }, + { + "epoch": 0.18899482020869304, + "grad_norm": 1.052371859550476, + "learning_rate": 9.351630365696903e-06, + "loss": 0.3032, + "step": 9441 + }, + { + "epoch": 0.1890148387258214, + "grad_norm": 1.0538502931594849, + "learning_rate": 9.351470704229059e-06, + "loss": 0.3422, + "step": 9442 + }, + { + "epoch": 0.18903485724294972, + "grad_norm": 1.1019114255905151, + "learning_rate": 9.351311024468568e-06, + "loss": 0.3648, + "step": 9443 + }, + { + "epoch": 0.18905487576007807, + "grad_norm": 1.1295393705368042, + "learning_rate": 9.351151326416102e-06, + "loss": 0.3498, + "step": 9444 + }, + { + "epoch": 0.18907489427720642, + "grad_norm": 1.298710584640503, + "learning_rate": 9.350991610072334e-06, + "loss": 0.3734, + "step": 9445 + }, + { + "epoch": 0.18909491279433477, + "grad_norm": 1.1471196413040161, + "learning_rate": 9.350831875437931e-06, + "loss": 0.3381, + "step": 9446 + }, + { + "epoch": 0.1891149313114631, + "grad_norm": 1.1802310943603516, + "learning_rate": 9.350672122513568e-06, + "loss": 0.3673, + "step": 9447 + }, + { + "epoch": 0.18913494982859144, + "grad_norm": 1.0701504945755005, + "learning_rate": 9.350512351299916e-06, + "loss": 0.3185, + "step": 9448 + }, + { + "epoch": 0.1891549683457198, + "grad_norm": 1.1780211925506592, + "learning_rate": 9.350352561797646e-06, + "loss": 0.3177, + "step": 9449 + }, + { + "epoch": 0.18917498686284814, + "grad_norm": 1.208452582359314, + "learning_rate": 9.35019275400743e-06, + "loss": 0.3431, + "step": 9450 + }, + { + "epoch": 0.18919500537997647, + "grad_norm": 1.1484673023223877, + "learning_rate": 9.350032927929941e-06, + "loss": 0.3052, + "step": 9451 + }, + { + "epoch": 0.18921502389710482, + "grad_norm": 1.1306320428848267, + "learning_rate": 9.349873083565851e-06, + "loss": 0.3257, + "step": 9452 + }, + { + "epoch": 0.18923504241423317, + "grad_norm": 1.2953839302062988, + "learning_rate": 9.349713220915827e-06, + "loss": 0.3035, + "step": 9453 + }, + { + "epoch": 0.18925506093136152, + "grad_norm": 1.030592679977417, + "learning_rate": 9.349553339980545e-06, + "loss": 0.3121, + "step": 9454 + }, + { + "epoch": 0.18927507944848984, + "grad_norm": 1.0098881721496582, + "learning_rate": 9.349393440760679e-06, + "loss": 0.3431, + "step": 9455 + }, + { + "epoch": 0.1892950979656182, + "grad_norm": 1.0330064296722412, + "learning_rate": 9.349233523256897e-06, + "loss": 0.3519, + "step": 9456 + }, + { + "epoch": 0.18931511648274654, + "grad_norm": 1.2053924798965454, + "learning_rate": 9.349073587469875e-06, + "loss": 0.3707, + "step": 9457 + }, + { + "epoch": 0.1893351349998749, + "grad_norm": 2.04439377784729, + "learning_rate": 9.348913633400283e-06, + "loss": 0.7201, + "step": 9458 + }, + { + "epoch": 0.18935515351700322, + "grad_norm": 1.392842411994934, + "learning_rate": 9.348753661048794e-06, + "loss": 0.3196, + "step": 9459 + }, + { + "epoch": 0.18937517203413157, + "grad_norm": 1.187294840812683, + "learning_rate": 9.34859367041608e-06, + "loss": 0.3267, + "step": 9460 + }, + { + "epoch": 0.18939519055125992, + "grad_norm": 1.1525380611419678, + "learning_rate": 9.348433661502813e-06, + "loss": 0.3374, + "step": 9461 + }, + { + "epoch": 0.18941520906838827, + "grad_norm": 1.0311368703842163, + "learning_rate": 9.34827363430967e-06, + "loss": 0.3174, + "step": 9462 + }, + { + "epoch": 0.1894352275855166, + "grad_norm": 1.2270207405090332, + "learning_rate": 9.348113588837318e-06, + "loss": 0.2924, + "step": 9463 + }, + { + "epoch": 0.18945524610264494, + "grad_norm": 1.2368131875991821, + "learning_rate": 9.347953525086433e-06, + "loss": 0.354, + "step": 9464 + }, + { + "epoch": 0.1894752646197733, + "grad_norm": 1.0956538915634155, + "learning_rate": 9.347793443057686e-06, + "loss": 0.2832, + "step": 9465 + }, + { + "epoch": 0.18949528313690164, + "grad_norm": 1.1044065952301025, + "learning_rate": 9.347633342751752e-06, + "loss": 0.3342, + "step": 9466 + }, + { + "epoch": 0.18951530165402997, + "grad_norm": 1.0717358589172363, + "learning_rate": 9.347473224169302e-06, + "loss": 0.3471, + "step": 9467 + }, + { + "epoch": 0.18953532017115832, + "grad_norm": 0.994202196598053, + "learning_rate": 9.347313087311011e-06, + "loss": 0.2713, + "step": 9468 + }, + { + "epoch": 0.18955533868828667, + "grad_norm": 1.0556105375289917, + "learning_rate": 9.347152932177552e-06, + "loss": 0.3183, + "step": 9469 + }, + { + "epoch": 0.18957535720541502, + "grad_norm": 1.038812279701233, + "learning_rate": 9.346992758769597e-06, + "loss": 0.3425, + "step": 9470 + }, + { + "epoch": 0.18959537572254334, + "grad_norm": 1.0579471588134766, + "learning_rate": 9.346832567087819e-06, + "loss": 0.3331, + "step": 9471 + }, + { + "epoch": 0.1896153942396717, + "grad_norm": 1.8002562522888184, + "learning_rate": 9.346672357132894e-06, + "loss": 0.8219, + "step": 9472 + }, + { + "epoch": 0.18963541275680004, + "grad_norm": 1.0511070489883423, + "learning_rate": 9.346512128905491e-06, + "loss": 0.3117, + "step": 9473 + }, + { + "epoch": 0.1896554312739284, + "grad_norm": 1.920459508895874, + "learning_rate": 9.34635188240629e-06, + "loss": 0.8812, + "step": 9474 + }, + { + "epoch": 0.18967544979105672, + "grad_norm": 0.9742267727851868, + "learning_rate": 9.346191617635958e-06, + "loss": 0.3059, + "step": 9475 + }, + { + "epoch": 0.18969546830818507, + "grad_norm": 1.0796504020690918, + "learning_rate": 9.346031334595173e-06, + "loss": 0.3575, + "step": 9476 + }, + { + "epoch": 0.18971548682531342, + "grad_norm": 1.0779645442962646, + "learning_rate": 9.345871033284607e-06, + "loss": 0.2799, + "step": 9477 + }, + { + "epoch": 0.18973550534244177, + "grad_norm": 1.047416090965271, + "learning_rate": 9.345710713704937e-06, + "loss": 0.2881, + "step": 9478 + }, + { + "epoch": 0.1897555238595701, + "grad_norm": 1.1381585597991943, + "learning_rate": 9.34555037585683e-06, + "loss": 0.3147, + "step": 9479 + }, + { + "epoch": 0.18977554237669844, + "grad_norm": 0.9610379338264465, + "learning_rate": 9.345390019740968e-06, + "loss": 0.3489, + "step": 9480 + }, + { + "epoch": 0.1897955608938268, + "grad_norm": 0.9761527180671692, + "learning_rate": 9.34522964535802e-06, + "loss": 0.3027, + "step": 9481 + }, + { + "epoch": 0.18981557941095514, + "grad_norm": 1.1672879457473755, + "learning_rate": 9.345069252708663e-06, + "loss": 0.3622, + "step": 9482 + }, + { + "epoch": 0.18983559792808347, + "grad_norm": 1.3392497301101685, + "learning_rate": 9.344908841793569e-06, + "loss": 0.3128, + "step": 9483 + }, + { + "epoch": 0.18985561644521182, + "grad_norm": 1.7428901195526123, + "learning_rate": 9.344748412613413e-06, + "loss": 0.7716, + "step": 9484 + }, + { + "epoch": 0.18987563496234017, + "grad_norm": 1.0991196632385254, + "learning_rate": 9.34458796516887e-06, + "loss": 0.3672, + "step": 9485 + }, + { + "epoch": 0.18989565347946852, + "grad_norm": 1.0420119762420654, + "learning_rate": 9.344427499460613e-06, + "loss": 0.3564, + "step": 9486 + }, + { + "epoch": 0.18991567199659684, + "grad_norm": 1.1358752250671387, + "learning_rate": 9.344267015489318e-06, + "loss": 0.3787, + "step": 9487 + }, + { + "epoch": 0.1899356905137252, + "grad_norm": 1.038804531097412, + "learning_rate": 9.344106513255661e-06, + "loss": 0.3154, + "step": 9488 + }, + { + "epoch": 0.18995570903085354, + "grad_norm": 1.0189186334609985, + "learning_rate": 9.343945992760314e-06, + "loss": 0.3089, + "step": 9489 + }, + { + "epoch": 0.1899757275479819, + "grad_norm": 1.1064847707748413, + "learning_rate": 9.343785454003953e-06, + "loss": 0.2959, + "step": 9490 + }, + { + "epoch": 0.18999574606511022, + "grad_norm": 1.0538078546524048, + "learning_rate": 9.343624896987253e-06, + "loss": 0.3013, + "step": 9491 + }, + { + "epoch": 0.19001576458223857, + "grad_norm": 1.1587589979171753, + "learning_rate": 9.343464321710889e-06, + "loss": 0.3624, + "step": 9492 + }, + { + "epoch": 0.19003578309936692, + "grad_norm": 1.045237421989441, + "learning_rate": 9.343303728175535e-06, + "loss": 0.3438, + "step": 9493 + }, + { + "epoch": 0.19005580161649527, + "grad_norm": 1.1618638038635254, + "learning_rate": 9.343143116381868e-06, + "loss": 0.3107, + "step": 9494 + }, + { + "epoch": 0.1900758201336236, + "grad_norm": 1.1599942445755005, + "learning_rate": 9.34298248633056e-06, + "loss": 0.3266, + "step": 9495 + }, + { + "epoch": 0.19009583865075194, + "grad_norm": 1.0145117044448853, + "learning_rate": 9.34282183802229e-06, + "loss": 0.3105, + "step": 9496 + }, + { + "epoch": 0.1901158571678803, + "grad_norm": 1.040149211883545, + "learning_rate": 9.34266117145773e-06, + "loss": 0.3212, + "step": 9497 + }, + { + "epoch": 0.19013587568500864, + "grad_norm": 1.0692713260650635, + "learning_rate": 9.342500486637558e-06, + "loss": 0.3377, + "step": 9498 + }, + { + "epoch": 0.19015589420213697, + "grad_norm": 1.2185957431793213, + "learning_rate": 9.342339783562448e-06, + "loss": 0.3547, + "step": 9499 + }, + { + "epoch": 0.19017591271926532, + "grad_norm": 1.131534457206726, + "learning_rate": 9.342179062233077e-06, + "loss": 0.3237, + "step": 9500 + }, + { + "epoch": 0.19019593123639367, + "grad_norm": 1.2415355443954468, + "learning_rate": 9.342018322650117e-06, + "loss": 0.3528, + "step": 9501 + }, + { + "epoch": 0.19021594975352202, + "grad_norm": 1.106290578842163, + "learning_rate": 9.34185756481425e-06, + "loss": 0.3287, + "step": 9502 + }, + { + "epoch": 0.19023596827065034, + "grad_norm": 1.3788784742355347, + "learning_rate": 9.341696788726145e-06, + "loss": 0.3661, + "step": 9503 + }, + { + "epoch": 0.1902559867877787, + "grad_norm": 1.116472840309143, + "learning_rate": 9.341535994386484e-06, + "loss": 0.3405, + "step": 9504 + }, + { + "epoch": 0.19027600530490704, + "grad_norm": 1.9076331853866577, + "learning_rate": 9.341375181795937e-06, + "loss": 0.8014, + "step": 9505 + }, + { + "epoch": 0.1902960238220354, + "grad_norm": 1.1476070880889893, + "learning_rate": 9.341214350955186e-06, + "loss": 0.3217, + "step": 9506 + }, + { + "epoch": 0.19031604233916372, + "grad_norm": 1.0734155178070068, + "learning_rate": 9.341053501864903e-06, + "loss": 0.3533, + "step": 9507 + }, + { + "epoch": 0.19033606085629207, + "grad_norm": 1.0857675075531006, + "learning_rate": 9.340892634525764e-06, + "loss": 0.3524, + "step": 9508 + }, + { + "epoch": 0.19035607937342042, + "grad_norm": 1.127243995666504, + "learning_rate": 9.340731748938448e-06, + "loss": 0.2869, + "step": 9509 + }, + { + "epoch": 0.19037609789054877, + "grad_norm": 1.1276401281356812, + "learning_rate": 9.34057084510363e-06, + "loss": 0.3643, + "step": 9510 + }, + { + "epoch": 0.1903961164076771, + "grad_norm": 1.1264973878860474, + "learning_rate": 9.340409923021985e-06, + "loss": 0.3177, + "step": 9511 + }, + { + "epoch": 0.19041613492480544, + "grad_norm": 1.0946948528289795, + "learning_rate": 9.340248982694192e-06, + "loss": 0.3529, + "step": 9512 + }, + { + "epoch": 0.1904361534419338, + "grad_norm": 1.0272300243377686, + "learning_rate": 9.340088024120924e-06, + "loss": 0.3218, + "step": 9513 + }, + { + "epoch": 0.19045617195906214, + "grad_norm": 1.0660109519958496, + "learning_rate": 9.33992704730286e-06, + "loss": 0.3447, + "step": 9514 + }, + { + "epoch": 0.19047619047619047, + "grad_norm": 1.224469780921936, + "learning_rate": 9.33976605224068e-06, + "loss": 0.3456, + "step": 9515 + }, + { + "epoch": 0.19049620899331882, + "grad_norm": 1.1336978673934937, + "learning_rate": 9.339605038935055e-06, + "loss": 0.3129, + "step": 9516 + }, + { + "epoch": 0.19051622751044717, + "grad_norm": 1.101038932800293, + "learning_rate": 9.339444007386667e-06, + "loss": 0.3236, + "step": 9517 + }, + { + "epoch": 0.19053624602757552, + "grad_norm": 1.027925968170166, + "learning_rate": 9.339282957596188e-06, + "loss": 0.2856, + "step": 9518 + }, + { + "epoch": 0.19055626454470384, + "grad_norm": 1.275011420249939, + "learning_rate": 9.339121889564297e-06, + "loss": 0.346, + "step": 9519 + }, + { + "epoch": 0.1905762830618322, + "grad_norm": 1.1543980836868286, + "learning_rate": 9.338960803291672e-06, + "loss": 0.3362, + "step": 9520 + }, + { + "epoch": 0.19059630157896054, + "grad_norm": 1.1141841411590576, + "learning_rate": 9.338799698778988e-06, + "loss": 0.3004, + "step": 9521 + }, + { + "epoch": 0.1906163200960889, + "grad_norm": 1.1591721773147583, + "learning_rate": 9.338638576026925e-06, + "loss": 0.3564, + "step": 9522 + }, + { + "epoch": 0.19063633861321722, + "grad_norm": 1.1639065742492676, + "learning_rate": 9.33847743503616e-06, + "loss": 0.2914, + "step": 9523 + }, + { + "epoch": 0.19065635713034557, + "grad_norm": 1.1395447254180908, + "learning_rate": 9.33831627580737e-06, + "loss": 0.3464, + "step": 9524 + }, + { + "epoch": 0.19067637564747392, + "grad_norm": 1.020411729812622, + "learning_rate": 9.33815509834123e-06, + "loss": 0.3212, + "step": 9525 + }, + { + "epoch": 0.19069639416460227, + "grad_norm": 1.1391760110855103, + "learning_rate": 9.337993902638421e-06, + "loss": 0.3756, + "step": 9526 + }, + { + "epoch": 0.1907164126817306, + "grad_norm": 1.084157943725586, + "learning_rate": 9.337832688699618e-06, + "loss": 0.3689, + "step": 9527 + }, + { + "epoch": 0.19073643119885894, + "grad_norm": 1.1470504999160767, + "learning_rate": 9.337671456525501e-06, + "loss": 0.3134, + "step": 9528 + }, + { + "epoch": 0.1907564497159873, + "grad_norm": 1.1371221542358398, + "learning_rate": 9.337510206116747e-06, + "loss": 0.3762, + "step": 9529 + }, + { + "epoch": 0.19077646823311564, + "grad_norm": 1.8001720905303955, + "learning_rate": 9.337348937474032e-06, + "loss": 0.8166, + "step": 9530 + }, + { + "epoch": 0.19079648675024397, + "grad_norm": 0.9613967537879944, + "learning_rate": 9.337187650598037e-06, + "loss": 0.3167, + "step": 9531 + }, + { + "epoch": 0.19081650526737232, + "grad_norm": 1.2253798246383667, + "learning_rate": 9.337026345489437e-06, + "loss": 0.3397, + "step": 9532 + }, + { + "epoch": 0.19083652378450067, + "grad_norm": 1.181817889213562, + "learning_rate": 9.336865022148912e-06, + "loss": 0.3484, + "step": 9533 + }, + { + "epoch": 0.19085654230162902, + "grad_norm": 1.0543694496154785, + "learning_rate": 9.336703680577139e-06, + "loss": 0.3042, + "step": 9534 + }, + { + "epoch": 0.19087656081875734, + "grad_norm": 1.0488168001174927, + "learning_rate": 9.3365423207748e-06, + "loss": 0.3724, + "step": 9535 + }, + { + "epoch": 0.1908965793358857, + "grad_norm": 1.1421557664871216, + "learning_rate": 9.336380942742567e-06, + "loss": 0.3613, + "step": 9536 + }, + { + "epoch": 0.19091659785301404, + "grad_norm": 1.1246862411499023, + "learning_rate": 9.336219546481123e-06, + "loss": 0.3486, + "step": 9537 + }, + { + "epoch": 0.1909366163701424, + "grad_norm": 1.0554344654083252, + "learning_rate": 9.336058131991146e-06, + "loss": 0.3134, + "step": 9538 + }, + { + "epoch": 0.19095663488727072, + "grad_norm": 1.0505086183547974, + "learning_rate": 9.335896699273312e-06, + "loss": 0.3034, + "step": 9539 + }, + { + "epoch": 0.19097665340439907, + "grad_norm": 1.1380547285079956, + "learning_rate": 9.335735248328303e-06, + "loss": 0.3839, + "step": 9540 + }, + { + "epoch": 0.19099667192152742, + "grad_norm": 1.09473717212677, + "learning_rate": 9.335573779156794e-06, + "loss": 0.3684, + "step": 9541 + }, + { + "epoch": 0.19101669043865577, + "grad_norm": 1.07272469997406, + "learning_rate": 9.335412291759469e-06, + "loss": 0.3185, + "step": 9542 + }, + { + "epoch": 0.1910367089557841, + "grad_norm": 1.1571239233016968, + "learning_rate": 9.335250786137001e-06, + "loss": 0.324, + "step": 9543 + }, + { + "epoch": 0.19105672747291244, + "grad_norm": 0.9749355316162109, + "learning_rate": 9.335089262290074e-06, + "loss": 0.3178, + "step": 9544 + }, + { + "epoch": 0.1910767459900408, + "grad_norm": 1.1386971473693848, + "learning_rate": 9.334927720219362e-06, + "loss": 0.342, + "step": 9545 + }, + { + "epoch": 0.19109676450716914, + "grad_norm": 1.1909139156341553, + "learning_rate": 9.334766159925548e-06, + "loss": 0.2838, + "step": 9546 + }, + { + "epoch": 0.19111678302429747, + "grad_norm": 1.1304603815078735, + "learning_rate": 9.33460458140931e-06, + "loss": 0.3357, + "step": 9547 + }, + { + "epoch": 0.19113680154142582, + "grad_norm": 1.0697492361068726, + "learning_rate": 9.334442984671329e-06, + "loss": 0.3261, + "step": 9548 + }, + { + "epoch": 0.19115682005855417, + "grad_norm": 1.3694778680801392, + "learning_rate": 9.33428136971228e-06, + "loss": 0.3426, + "step": 9549 + }, + { + "epoch": 0.19117683857568252, + "grad_norm": 1.149630069732666, + "learning_rate": 9.334119736532845e-06, + "loss": 0.283, + "step": 9550 + }, + { + "epoch": 0.19119685709281084, + "grad_norm": 1.1705940961837769, + "learning_rate": 9.333958085133704e-06, + "loss": 0.337, + "step": 9551 + }, + { + "epoch": 0.1912168756099392, + "grad_norm": 1.1056398153305054, + "learning_rate": 9.333796415515535e-06, + "loss": 0.3483, + "step": 9552 + }, + { + "epoch": 0.19123689412706754, + "grad_norm": 1.14877188205719, + "learning_rate": 9.33363472767902e-06, + "loss": 0.3473, + "step": 9553 + }, + { + "epoch": 0.1912569126441959, + "grad_norm": 1.304114580154419, + "learning_rate": 9.333473021624835e-06, + "loss": 0.3382, + "step": 9554 + }, + { + "epoch": 0.19127693116132422, + "grad_norm": 1.0575870275497437, + "learning_rate": 9.333311297353665e-06, + "loss": 0.283, + "step": 9555 + }, + { + "epoch": 0.19129694967845257, + "grad_norm": 1.1132817268371582, + "learning_rate": 9.333149554866184e-06, + "loss": 0.306, + "step": 9556 + }, + { + "epoch": 0.19131696819558092, + "grad_norm": 1.1465339660644531, + "learning_rate": 9.332987794163077e-06, + "loss": 0.3447, + "step": 9557 + }, + { + "epoch": 0.19133698671270927, + "grad_norm": 1.305328130722046, + "learning_rate": 9.33282601524502e-06, + "loss": 0.3535, + "step": 9558 + }, + { + "epoch": 0.1913570052298376, + "grad_norm": 1.1129130125045776, + "learning_rate": 9.332664218112696e-06, + "loss": 0.3337, + "step": 9559 + }, + { + "epoch": 0.19137702374696594, + "grad_norm": 1.0788969993591309, + "learning_rate": 9.332502402766783e-06, + "loss": 0.3496, + "step": 9560 + }, + { + "epoch": 0.1913970422640943, + "grad_norm": 1.081824779510498, + "learning_rate": 9.332340569207963e-06, + "loss": 0.318, + "step": 9561 + }, + { + "epoch": 0.19141706078122264, + "grad_norm": 1.2304913997650146, + "learning_rate": 9.332178717436917e-06, + "loss": 0.3737, + "step": 9562 + }, + { + "epoch": 0.19143707929835097, + "grad_norm": 1.1991417407989502, + "learning_rate": 9.332016847454321e-06, + "loss": 0.3451, + "step": 9563 + }, + { + "epoch": 0.19145709781547932, + "grad_norm": 1.136746883392334, + "learning_rate": 9.33185495926086e-06, + "loss": 0.3465, + "step": 9564 + }, + { + "epoch": 0.19147711633260767, + "grad_norm": 1.0664364099502563, + "learning_rate": 9.331693052857214e-06, + "loss": 0.3361, + "step": 9565 + }, + { + "epoch": 0.19149713484973602, + "grad_norm": 1.0039142370224, + "learning_rate": 9.33153112824406e-06, + "loss": 0.3159, + "step": 9566 + }, + { + "epoch": 0.19151715336686434, + "grad_norm": 1.0909534692764282, + "learning_rate": 9.331369185422084e-06, + "loss": 0.3536, + "step": 9567 + }, + { + "epoch": 0.1915371718839927, + "grad_norm": 1.152481198310852, + "learning_rate": 9.331207224391962e-06, + "loss": 0.321, + "step": 9568 + }, + { + "epoch": 0.19155719040112104, + "grad_norm": 1.0925105810165405, + "learning_rate": 9.331045245154378e-06, + "loss": 0.2866, + "step": 9569 + }, + { + "epoch": 0.1915772089182494, + "grad_norm": 1.035976767539978, + "learning_rate": 9.330883247710012e-06, + "loss": 0.323, + "step": 9570 + }, + { + "epoch": 0.19159722743537771, + "grad_norm": 1.1540205478668213, + "learning_rate": 9.330721232059544e-06, + "loss": 0.3264, + "step": 9571 + }, + { + "epoch": 0.19161724595250607, + "grad_norm": 1.1242775917053223, + "learning_rate": 9.330559198203658e-06, + "loss": 0.3604, + "step": 9572 + }, + { + "epoch": 0.19163726446963442, + "grad_norm": 1.9029438495635986, + "learning_rate": 9.33039714614303e-06, + "loss": 0.8352, + "step": 9573 + }, + { + "epoch": 0.19165728298676277, + "grad_norm": 1.9556167125701904, + "learning_rate": 9.330235075878345e-06, + "loss": 0.8836, + "step": 9574 + }, + { + "epoch": 0.1916773015038911, + "grad_norm": 0.9625478982925415, + "learning_rate": 9.330072987410285e-06, + "loss": 0.3281, + "step": 9575 + }, + { + "epoch": 0.19169732002101944, + "grad_norm": 1.066133975982666, + "learning_rate": 9.32991088073953e-06, + "loss": 0.3028, + "step": 9576 + }, + { + "epoch": 0.1917173385381478, + "grad_norm": 1.368266224861145, + "learning_rate": 9.329748755866762e-06, + "loss": 0.3524, + "step": 9577 + }, + { + "epoch": 0.19173735705527614, + "grad_norm": 1.1632037162780762, + "learning_rate": 9.32958661279266e-06, + "loss": 0.3356, + "step": 9578 + }, + { + "epoch": 0.19175737557240446, + "grad_norm": 1.0663126707077026, + "learning_rate": 9.329424451517907e-06, + "loss": 0.3433, + "step": 9579 + }, + { + "epoch": 0.19177739408953282, + "grad_norm": 1.8919156789779663, + "learning_rate": 9.329262272043186e-06, + "loss": 0.8207, + "step": 9580 + }, + { + "epoch": 0.19179741260666117, + "grad_norm": 1.145593285560608, + "learning_rate": 9.329100074369181e-06, + "loss": 0.322, + "step": 9581 + }, + { + "epoch": 0.19181743112378952, + "grad_norm": 1.1602751016616821, + "learning_rate": 9.328937858496567e-06, + "loss": 0.3459, + "step": 9582 + }, + { + "epoch": 0.19183744964091784, + "grad_norm": 1.1289316415786743, + "learning_rate": 9.328775624426032e-06, + "loss": 0.3454, + "step": 9583 + }, + { + "epoch": 0.1918574681580462, + "grad_norm": 1.292126178741455, + "learning_rate": 9.328613372158257e-06, + "loss": 0.3387, + "step": 9584 + }, + { + "epoch": 0.19187748667517454, + "grad_norm": 1.0755709409713745, + "learning_rate": 9.32845110169392e-06, + "loss": 0.3731, + "step": 9585 + }, + { + "epoch": 0.1918975051923029, + "grad_norm": 1.1152241230010986, + "learning_rate": 9.328288813033708e-06, + "loss": 0.3052, + "step": 9586 + }, + { + "epoch": 0.19191752370943121, + "grad_norm": 1.1058557033538818, + "learning_rate": 9.328126506178301e-06, + "loss": 0.2944, + "step": 9587 + }, + { + "epoch": 0.19193754222655957, + "grad_norm": 1.1792995929718018, + "learning_rate": 9.327964181128381e-06, + "loss": 0.316, + "step": 9588 + }, + { + "epoch": 0.19195756074368792, + "grad_norm": 1.1222962141036987, + "learning_rate": 9.32780183788463e-06, + "loss": 0.3008, + "step": 9589 + }, + { + "epoch": 0.19197757926081627, + "grad_norm": 1.1905908584594727, + "learning_rate": 9.327639476447734e-06, + "loss": 0.3582, + "step": 9590 + }, + { + "epoch": 0.1919975977779446, + "grad_norm": 1.101034164428711, + "learning_rate": 9.327477096818372e-06, + "loss": 0.37, + "step": 9591 + }, + { + "epoch": 0.19201761629507294, + "grad_norm": 1.8105194568634033, + "learning_rate": 9.327314698997227e-06, + "loss": 0.8228, + "step": 9592 + }, + { + "epoch": 0.1920376348122013, + "grad_norm": 1.1072372198104858, + "learning_rate": 9.327152282984983e-06, + "loss": 0.3297, + "step": 9593 + }, + { + "epoch": 0.19205765332932964, + "grad_norm": 1.072431206703186, + "learning_rate": 9.326989848782322e-06, + "loss": 0.3708, + "step": 9594 + }, + { + "epoch": 0.19207767184645796, + "grad_norm": 1.1566712856292725, + "learning_rate": 9.326827396389926e-06, + "loss": 0.3623, + "step": 9595 + }, + { + "epoch": 0.19209769036358632, + "grad_norm": 1.0085828304290771, + "learning_rate": 9.326664925808478e-06, + "loss": 0.3179, + "step": 9596 + }, + { + "epoch": 0.19211770888071467, + "grad_norm": 1.8138822317123413, + "learning_rate": 9.326502437038663e-06, + "loss": 0.7857, + "step": 9597 + }, + { + "epoch": 0.19213772739784302, + "grad_norm": 1.1145302057266235, + "learning_rate": 9.326339930081162e-06, + "loss": 0.316, + "step": 9598 + }, + { + "epoch": 0.19215774591497134, + "grad_norm": 1.0594854354858398, + "learning_rate": 9.326177404936661e-06, + "loss": 0.331, + "step": 9599 + }, + { + "epoch": 0.1921777644320997, + "grad_norm": 1.1357619762420654, + "learning_rate": 9.32601486160584e-06, + "loss": 0.3514, + "step": 9600 + }, + { + "epoch": 0.19219778294922804, + "grad_norm": 0.9741693735122681, + "learning_rate": 9.325852300089382e-06, + "loss": 0.3158, + "step": 9601 + }, + { + "epoch": 0.1922178014663564, + "grad_norm": 1.1222821474075317, + "learning_rate": 9.325689720387974e-06, + "loss": 0.3307, + "step": 9602 + }, + { + "epoch": 0.19223781998348471, + "grad_norm": 1.2298732995986938, + "learning_rate": 9.325527122502297e-06, + "loss": 0.3658, + "step": 9603 + }, + { + "epoch": 0.19225783850061307, + "grad_norm": 1.2320115566253662, + "learning_rate": 9.325364506433034e-06, + "loss": 0.3238, + "step": 9604 + }, + { + "epoch": 0.19227785701774142, + "grad_norm": 1.2028695344924927, + "learning_rate": 9.32520187218087e-06, + "loss": 0.3238, + "step": 9605 + }, + { + "epoch": 0.19229787553486977, + "grad_norm": 1.2686009407043457, + "learning_rate": 9.325039219746487e-06, + "loss": 0.2516, + "step": 9606 + }, + { + "epoch": 0.1923178940519981, + "grad_norm": 1.2301132678985596, + "learning_rate": 9.324876549130572e-06, + "loss": 0.3393, + "step": 9607 + }, + { + "epoch": 0.19233791256912644, + "grad_norm": 1.2577970027923584, + "learning_rate": 9.324713860333805e-06, + "loss": 0.2978, + "step": 9608 + }, + { + "epoch": 0.1923579310862548, + "grad_norm": 1.1815093755722046, + "learning_rate": 9.324551153356873e-06, + "loss": 0.317, + "step": 9609 + }, + { + "epoch": 0.19237794960338314, + "grad_norm": 1.1983213424682617, + "learning_rate": 9.324388428200456e-06, + "loss": 0.3596, + "step": 9610 + }, + { + "epoch": 0.19239796812051146, + "grad_norm": 1.0774919986724854, + "learning_rate": 9.324225684865245e-06, + "loss": 0.3513, + "step": 9611 + }, + { + "epoch": 0.19241798663763982, + "grad_norm": 1.2144452333450317, + "learning_rate": 9.324062923351916e-06, + "loss": 0.3017, + "step": 9612 + }, + { + "epoch": 0.19243800515476817, + "grad_norm": 1.2252373695373535, + "learning_rate": 9.32390014366116e-06, + "loss": 0.3061, + "step": 9613 + }, + { + "epoch": 0.19245802367189652, + "grad_norm": 1.0620207786560059, + "learning_rate": 9.323737345793655e-06, + "loss": 0.3373, + "step": 9614 + }, + { + "epoch": 0.19247804218902484, + "grad_norm": 1.0098366737365723, + "learning_rate": 9.32357452975009e-06, + "loss": 0.33, + "step": 9615 + }, + { + "epoch": 0.1924980607061532, + "grad_norm": 0.9907435774803162, + "learning_rate": 9.323411695531148e-06, + "loss": 0.3589, + "step": 9616 + }, + { + "epoch": 0.19251807922328154, + "grad_norm": 1.1221086978912354, + "learning_rate": 9.323248843137514e-06, + "loss": 0.3754, + "step": 9617 + }, + { + "epoch": 0.1925380977404099, + "grad_norm": 0.9624258875846863, + "learning_rate": 9.323085972569872e-06, + "loss": 0.3343, + "step": 9618 + }, + { + "epoch": 0.19255811625753821, + "grad_norm": 1.1344542503356934, + "learning_rate": 9.322923083828907e-06, + "loss": 0.3098, + "step": 9619 + }, + { + "epoch": 0.19257813477466657, + "grad_norm": 1.1803605556488037, + "learning_rate": 9.322760176915305e-06, + "loss": 0.328, + "step": 9620 + }, + { + "epoch": 0.19259815329179492, + "grad_norm": 1.2254399061203003, + "learning_rate": 9.322597251829747e-06, + "loss": 0.3098, + "step": 9621 + }, + { + "epoch": 0.19261817180892327, + "grad_norm": 1.1298497915267944, + "learning_rate": 9.322434308572922e-06, + "loss": 0.3483, + "step": 9622 + }, + { + "epoch": 0.1926381903260516, + "grad_norm": 1.2181800603866577, + "learning_rate": 9.322271347145514e-06, + "loss": 0.3286, + "step": 9623 + }, + { + "epoch": 0.19265820884317994, + "grad_norm": 1.2313982248306274, + "learning_rate": 9.322108367548206e-06, + "loss": 0.3304, + "step": 9624 + }, + { + "epoch": 0.1926782273603083, + "grad_norm": 1.0754573345184326, + "learning_rate": 9.321945369781685e-06, + "loss": 0.3381, + "step": 9625 + }, + { + "epoch": 0.19269824587743664, + "grad_norm": 1.0960378646850586, + "learning_rate": 9.321782353846635e-06, + "loss": 0.3568, + "step": 9626 + }, + { + "epoch": 0.19271826439456496, + "grad_norm": 1.133618712425232, + "learning_rate": 9.321619319743744e-06, + "loss": 0.3134, + "step": 9627 + }, + { + "epoch": 0.19273828291169331, + "grad_norm": 1.1364002227783203, + "learning_rate": 9.321456267473694e-06, + "loss": 0.3019, + "step": 9628 + }, + { + "epoch": 0.19275830142882167, + "grad_norm": 1.177280068397522, + "learning_rate": 9.321293197037173e-06, + "loss": 0.3281, + "step": 9629 + }, + { + "epoch": 0.19277831994595002, + "grad_norm": 1.1607331037521362, + "learning_rate": 9.321130108434864e-06, + "loss": 0.3053, + "step": 9630 + }, + { + "epoch": 0.19279833846307834, + "grad_norm": 1.1251558065414429, + "learning_rate": 9.320967001667453e-06, + "loss": 0.3231, + "step": 9631 + }, + { + "epoch": 0.1928183569802067, + "grad_norm": 1.1663106679916382, + "learning_rate": 9.320803876735628e-06, + "loss": 0.3086, + "step": 9632 + }, + { + "epoch": 0.19283837549733504, + "grad_norm": 1.089110255241394, + "learning_rate": 9.320640733640074e-06, + "loss": 0.3574, + "step": 9633 + }, + { + "epoch": 0.1928583940144634, + "grad_norm": 1.0468310117721558, + "learning_rate": 9.320477572381476e-06, + "loss": 0.3156, + "step": 9634 + }, + { + "epoch": 0.19287841253159171, + "grad_norm": 1.1411076784133911, + "learning_rate": 9.320314392960518e-06, + "loss": 0.3774, + "step": 9635 + }, + { + "epoch": 0.19289843104872006, + "grad_norm": 1.0702110528945923, + "learning_rate": 9.32015119537789e-06, + "loss": 0.3134, + "step": 9636 + }, + { + "epoch": 0.19291844956584842, + "grad_norm": 1.187835693359375, + "learning_rate": 9.319987979634278e-06, + "loss": 0.3564, + "step": 9637 + }, + { + "epoch": 0.19293846808297677, + "grad_norm": 1.0833007097244263, + "learning_rate": 9.319824745730362e-06, + "loss": 0.3538, + "step": 9638 + }, + { + "epoch": 0.1929584866001051, + "grad_norm": 1.186240315437317, + "learning_rate": 9.319661493666836e-06, + "loss": 0.3037, + "step": 9639 + }, + { + "epoch": 0.19297850511723344, + "grad_norm": 1.2139800786972046, + "learning_rate": 9.31949822344438e-06, + "loss": 0.3092, + "step": 9640 + }, + { + "epoch": 0.1929985236343618, + "grad_norm": 1.0054675340652466, + "learning_rate": 9.319334935063684e-06, + "loss": 0.3424, + "step": 9641 + }, + { + "epoch": 0.19301854215149014, + "grad_norm": 1.9482778310775757, + "learning_rate": 9.319171628525435e-06, + "loss": 0.8074, + "step": 9642 + }, + { + "epoch": 0.19303856066861846, + "grad_norm": 1.1188209056854248, + "learning_rate": 9.319008303830315e-06, + "loss": 0.292, + "step": 9643 + }, + { + "epoch": 0.19305857918574681, + "grad_norm": 1.1192998886108398, + "learning_rate": 9.318844960979014e-06, + "loss": 0.3472, + "step": 9644 + }, + { + "epoch": 0.19307859770287517, + "grad_norm": 1.142500400543213, + "learning_rate": 9.31868159997222e-06, + "loss": 0.3556, + "step": 9645 + }, + { + "epoch": 0.19309861622000352, + "grad_norm": 1.11190927028656, + "learning_rate": 9.318518220810617e-06, + "loss": 0.3954, + "step": 9646 + }, + { + "epoch": 0.19311863473713184, + "grad_norm": 1.913390874862671, + "learning_rate": 9.318354823494892e-06, + "loss": 0.8723, + "step": 9647 + }, + { + "epoch": 0.1931386532542602, + "grad_norm": 1.1799030303955078, + "learning_rate": 9.318191408025734e-06, + "loss": 0.3565, + "step": 9648 + }, + { + "epoch": 0.19315867177138854, + "grad_norm": 1.1135927438735962, + "learning_rate": 9.31802797440383e-06, + "loss": 0.3601, + "step": 9649 + }, + { + "epoch": 0.1931786902885169, + "grad_norm": 1.881561517715454, + "learning_rate": 9.317864522629863e-06, + "loss": 0.8417, + "step": 9650 + }, + { + "epoch": 0.19319870880564521, + "grad_norm": 1.1872401237487793, + "learning_rate": 9.317701052704523e-06, + "loss": 0.3339, + "step": 9651 + }, + { + "epoch": 0.19321872732277356, + "grad_norm": 1.1851907968521118, + "learning_rate": 9.317537564628498e-06, + "loss": 0.3504, + "step": 9652 + }, + { + "epoch": 0.19323874583990192, + "grad_norm": 1.0556694269180298, + "learning_rate": 9.317374058402473e-06, + "loss": 0.2844, + "step": 9653 + }, + { + "epoch": 0.19325876435703027, + "grad_norm": 1.9414554834365845, + "learning_rate": 9.317210534027136e-06, + "loss": 0.8398, + "step": 9654 + }, + { + "epoch": 0.1932787828741586, + "grad_norm": 1.1302801370620728, + "learning_rate": 9.317046991503178e-06, + "loss": 0.317, + "step": 9655 + }, + { + "epoch": 0.19329880139128694, + "grad_norm": 1.167558193206787, + "learning_rate": 9.316883430831281e-06, + "loss": 0.3899, + "step": 9656 + }, + { + "epoch": 0.1933188199084153, + "grad_norm": 1.8169444799423218, + "learning_rate": 9.316719852012136e-06, + "loss": 0.8469, + "step": 9657 + }, + { + "epoch": 0.19333883842554364, + "grad_norm": 1.0974022150039673, + "learning_rate": 9.31655625504643e-06, + "loss": 0.3398, + "step": 9658 + }, + { + "epoch": 0.19335885694267196, + "grad_norm": 1.125767707824707, + "learning_rate": 9.316392639934851e-06, + "loss": 0.3825, + "step": 9659 + }, + { + "epoch": 0.19337887545980031, + "grad_norm": 1.004018783569336, + "learning_rate": 9.316229006678084e-06, + "loss": 0.288, + "step": 9660 + }, + { + "epoch": 0.19339889397692867, + "grad_norm": 1.864392876625061, + "learning_rate": 9.316065355276819e-06, + "loss": 0.8474, + "step": 9661 + }, + { + "epoch": 0.19341891249405702, + "grad_norm": 1.7326452732086182, + "learning_rate": 9.315901685731748e-06, + "loss": 0.9082, + "step": 9662 + }, + { + "epoch": 0.19343893101118534, + "grad_norm": 1.0328141450881958, + "learning_rate": 9.31573799804355e-06, + "loss": 0.3293, + "step": 9663 + }, + { + "epoch": 0.1934589495283137, + "grad_norm": 1.1990082263946533, + "learning_rate": 9.315574292212923e-06, + "loss": 0.2909, + "step": 9664 + }, + { + "epoch": 0.19347896804544204, + "grad_norm": 1.3890926837921143, + "learning_rate": 9.315410568240546e-06, + "loss": 0.35, + "step": 9665 + }, + { + "epoch": 0.1934989865625704, + "grad_norm": 1.733670711517334, + "learning_rate": 9.315246826127116e-06, + "loss": 0.8458, + "step": 9666 + }, + { + "epoch": 0.1935190050796987, + "grad_norm": 1.8283828496932983, + "learning_rate": 9.315083065873314e-06, + "loss": 0.8272, + "step": 9667 + }, + { + "epoch": 0.19353902359682706, + "grad_norm": 1.2915064096450806, + "learning_rate": 9.314919287479833e-06, + "loss": 0.3469, + "step": 9668 + }, + { + "epoch": 0.19355904211395542, + "grad_norm": 0.9852789044380188, + "learning_rate": 9.314755490947358e-06, + "loss": 0.3022, + "step": 9669 + }, + { + "epoch": 0.19357906063108377, + "grad_norm": 1.244927167892456, + "learning_rate": 9.314591676276581e-06, + "loss": 0.3346, + "step": 9670 + }, + { + "epoch": 0.1935990791482121, + "grad_norm": 1.1179766654968262, + "learning_rate": 9.314427843468188e-06, + "loss": 0.319, + "step": 9671 + }, + { + "epoch": 0.19361909766534044, + "grad_norm": 1.2181154489517212, + "learning_rate": 9.314263992522869e-06, + "loss": 0.3736, + "step": 9672 + }, + { + "epoch": 0.1936391161824688, + "grad_norm": 1.048505187034607, + "learning_rate": 9.314100123441314e-06, + "loss": 0.3083, + "step": 9673 + }, + { + "epoch": 0.19365913469959714, + "grad_norm": 1.1233433485031128, + "learning_rate": 9.313936236224208e-06, + "loss": 0.3612, + "step": 9674 + }, + { + "epoch": 0.19367915321672546, + "grad_norm": 1.0933263301849365, + "learning_rate": 9.313772330872246e-06, + "loss": 0.3654, + "step": 9675 + }, + { + "epoch": 0.19369917173385381, + "grad_norm": 1.1318482160568237, + "learning_rate": 9.31360840738611e-06, + "loss": 0.2876, + "step": 9676 + }, + { + "epoch": 0.19371919025098217, + "grad_norm": 1.8671678304672241, + "learning_rate": 9.313444465766494e-06, + "loss": 0.8951, + "step": 9677 + }, + { + "epoch": 0.19373920876811052, + "grad_norm": 1.261229395866394, + "learning_rate": 9.313280506014086e-06, + "loss": 0.3204, + "step": 9678 + }, + { + "epoch": 0.19375922728523884, + "grad_norm": 1.9195021390914917, + "learning_rate": 9.313116528129575e-06, + "loss": 0.7965, + "step": 9679 + }, + { + "epoch": 0.1937792458023672, + "grad_norm": 1.1503546237945557, + "learning_rate": 9.31295253211365e-06, + "loss": 0.321, + "step": 9680 + }, + { + "epoch": 0.19379926431949554, + "grad_norm": 1.1755867004394531, + "learning_rate": 9.312788517967e-06, + "loss": 0.3449, + "step": 9681 + }, + { + "epoch": 0.1938192828366239, + "grad_norm": 1.0150456428527832, + "learning_rate": 9.312624485690317e-06, + "loss": 0.3193, + "step": 9682 + }, + { + "epoch": 0.1938393013537522, + "grad_norm": 1.3651788234710693, + "learning_rate": 9.312460435284287e-06, + "loss": 0.3493, + "step": 9683 + }, + { + "epoch": 0.19385931987088056, + "grad_norm": 1.1400573253631592, + "learning_rate": 9.312296366749602e-06, + "loss": 0.3521, + "step": 9684 + }, + { + "epoch": 0.19387933838800891, + "grad_norm": 1.0534390211105347, + "learning_rate": 9.312132280086953e-06, + "loss": 0.3725, + "step": 9685 + }, + { + "epoch": 0.19389935690513727, + "grad_norm": 1.0884860754013062, + "learning_rate": 9.311968175297025e-06, + "loss": 0.3242, + "step": 9686 + }, + { + "epoch": 0.1939193754222656, + "grad_norm": 1.0572680234909058, + "learning_rate": 9.311804052380511e-06, + "loss": 0.3268, + "step": 9687 + }, + { + "epoch": 0.19393939393939394, + "grad_norm": 1.1629329919815063, + "learning_rate": 9.311639911338102e-06, + "loss": 0.3419, + "step": 9688 + }, + { + "epoch": 0.1939594124565223, + "grad_norm": 1.8825721740722656, + "learning_rate": 9.311475752170487e-06, + "loss": 0.8088, + "step": 9689 + }, + { + "epoch": 0.19397943097365064, + "grad_norm": 1.0711147785186768, + "learning_rate": 9.311311574878354e-06, + "loss": 0.342, + "step": 9690 + }, + { + "epoch": 0.19399944949077896, + "grad_norm": 1.0709255933761597, + "learning_rate": 9.311147379462395e-06, + "loss": 0.2873, + "step": 9691 + }, + { + "epoch": 0.19401946800790731, + "grad_norm": 1.110417366027832, + "learning_rate": 9.310983165923302e-06, + "loss": 0.3202, + "step": 9692 + }, + { + "epoch": 0.19403948652503566, + "grad_norm": 1.2098039388656616, + "learning_rate": 9.310818934261762e-06, + "loss": 0.3549, + "step": 9693 + }, + { + "epoch": 0.19405950504216402, + "grad_norm": 1.872176170349121, + "learning_rate": 9.310654684478468e-06, + "loss": 0.8178, + "step": 9694 + }, + { + "epoch": 0.19407952355929234, + "grad_norm": 1.1137391328811646, + "learning_rate": 9.31049041657411e-06, + "loss": 0.3385, + "step": 9695 + }, + { + "epoch": 0.1940995420764207, + "grad_norm": 1.5327684879302979, + "learning_rate": 9.310326130549376e-06, + "loss": 0.3028, + "step": 9696 + }, + { + "epoch": 0.19411956059354904, + "grad_norm": 1.112273097038269, + "learning_rate": 9.310161826404958e-06, + "loss": 0.336, + "step": 9697 + }, + { + "epoch": 0.19413957911067736, + "grad_norm": 1.7901757955551147, + "learning_rate": 9.30999750414155e-06, + "loss": 0.8154, + "step": 9698 + }, + { + "epoch": 0.1941595976278057, + "grad_norm": 0.9162383079528809, + "learning_rate": 9.309833163759838e-06, + "loss": 0.277, + "step": 9699 + }, + { + "epoch": 0.19417961614493406, + "grad_norm": 1.1501739025115967, + "learning_rate": 9.309668805260514e-06, + "loss": 0.322, + "step": 9700 + }, + { + "epoch": 0.19419963466206241, + "grad_norm": 1.1528629064559937, + "learning_rate": 9.309504428644271e-06, + "loss": 0.3262, + "step": 9701 + }, + { + "epoch": 0.19421965317919074, + "grad_norm": 1.0343068838119507, + "learning_rate": 9.3093400339118e-06, + "loss": 0.3317, + "step": 9702 + }, + { + "epoch": 0.1942396716963191, + "grad_norm": 1.3695194721221924, + "learning_rate": 9.30917562106379e-06, + "loss": 0.3777, + "step": 9703 + }, + { + "epoch": 0.19425969021344744, + "grad_norm": 1.0637565851211548, + "learning_rate": 9.309011190100933e-06, + "loss": 0.2866, + "step": 9704 + }, + { + "epoch": 0.1942797087305758, + "grad_norm": 1.055230736732483, + "learning_rate": 9.30884674102392e-06, + "loss": 0.3013, + "step": 9705 + }, + { + "epoch": 0.1942997272477041, + "grad_norm": 1.029978632926941, + "learning_rate": 9.308682273833443e-06, + "loss": 0.2683, + "step": 9706 + }, + { + "epoch": 0.19431974576483246, + "grad_norm": 1.1268161535263062, + "learning_rate": 9.308517788530193e-06, + "loss": 0.3394, + "step": 9707 + }, + { + "epoch": 0.19433976428196081, + "grad_norm": 1.0324466228485107, + "learning_rate": 9.308353285114861e-06, + "loss": 0.2607, + "step": 9708 + }, + { + "epoch": 0.19435978279908916, + "grad_norm": 1.1088221073150635, + "learning_rate": 9.308188763588139e-06, + "loss": 0.3308, + "step": 9709 + }, + { + "epoch": 0.1943798013162175, + "grad_norm": 1.0912164449691772, + "learning_rate": 9.308024223950717e-06, + "loss": 0.3446, + "step": 9710 + }, + { + "epoch": 0.19439981983334584, + "grad_norm": 1.0914353132247925, + "learning_rate": 9.307859666203292e-06, + "loss": 0.325, + "step": 9711 + }, + { + "epoch": 0.1944198383504742, + "grad_norm": 1.2072049379348755, + "learning_rate": 9.307695090346549e-06, + "loss": 0.3816, + "step": 9712 + }, + { + "epoch": 0.19443985686760254, + "grad_norm": 1.110324740409851, + "learning_rate": 9.307530496381184e-06, + "loss": 0.3201, + "step": 9713 + }, + { + "epoch": 0.19445987538473086, + "grad_norm": 1.2344400882720947, + "learning_rate": 9.307365884307887e-06, + "loss": 0.3442, + "step": 9714 + }, + { + "epoch": 0.1944798939018592, + "grad_norm": 1.980103850364685, + "learning_rate": 9.307201254127352e-06, + "loss": 0.9153, + "step": 9715 + }, + { + "epoch": 0.19449991241898756, + "grad_norm": 1.1614347696304321, + "learning_rate": 9.307036605840269e-06, + "loss": 0.3635, + "step": 9716 + }, + { + "epoch": 0.19451993093611591, + "grad_norm": 1.2683613300323486, + "learning_rate": 9.306871939447331e-06, + "loss": 0.3262, + "step": 9717 + }, + { + "epoch": 0.19453994945324424, + "grad_norm": 1.057735800743103, + "learning_rate": 9.30670725494923e-06, + "loss": 0.3425, + "step": 9718 + }, + { + "epoch": 0.1945599679703726, + "grad_norm": 1.057391881942749, + "learning_rate": 9.306542552346661e-06, + "loss": 0.3319, + "step": 9719 + }, + { + "epoch": 0.19457998648750094, + "grad_norm": 2.014594554901123, + "learning_rate": 9.30637783164031e-06, + "loss": 0.8299, + "step": 9720 + }, + { + "epoch": 0.1946000050046293, + "grad_norm": 1.0779272317886353, + "learning_rate": 9.306213092830875e-06, + "loss": 0.3514, + "step": 9721 + }, + { + "epoch": 0.1946200235217576, + "grad_norm": 1.1928112506866455, + "learning_rate": 9.306048335919047e-06, + "loss": 0.3232, + "step": 9722 + }, + { + "epoch": 0.19464004203888596, + "grad_norm": 1.0012366771697998, + "learning_rate": 9.30588356090552e-06, + "loss": 0.319, + "step": 9723 + }, + { + "epoch": 0.1946600605560143, + "grad_norm": 1.1411347389221191, + "learning_rate": 9.305718767790983e-06, + "loss": 0.3083, + "step": 9724 + }, + { + "epoch": 0.19468007907314266, + "grad_norm": 1.1002476215362549, + "learning_rate": 9.305553956576131e-06, + "loss": 0.327, + "step": 9725 + }, + { + "epoch": 0.194700097590271, + "grad_norm": 1.2052112817764282, + "learning_rate": 9.305389127261658e-06, + "loss": 0.3074, + "step": 9726 + }, + { + "epoch": 0.19472011610739934, + "grad_norm": 1.2251249551773071, + "learning_rate": 9.305224279848255e-06, + "loss": 0.34, + "step": 9727 + }, + { + "epoch": 0.1947401346245277, + "grad_norm": 1.0879302024841309, + "learning_rate": 9.305059414336615e-06, + "loss": 0.3085, + "step": 9728 + }, + { + "epoch": 0.19476015314165604, + "grad_norm": 1.0362352132797241, + "learning_rate": 9.304894530727433e-06, + "loss": 0.2869, + "step": 9729 + }, + { + "epoch": 0.19478017165878436, + "grad_norm": 1.9081987142562866, + "learning_rate": 9.304729629021398e-06, + "loss": 0.8308, + "step": 9730 + }, + { + "epoch": 0.1948001901759127, + "grad_norm": 1.8354251384735107, + "learning_rate": 9.30456470921921e-06, + "loss": 0.8492, + "step": 9731 + }, + { + "epoch": 0.19482020869304106, + "grad_norm": 1.1415960788726807, + "learning_rate": 9.304399771321555e-06, + "loss": 0.3211, + "step": 9732 + }, + { + "epoch": 0.19484022721016941, + "grad_norm": 1.837564468383789, + "learning_rate": 9.304234815329132e-06, + "loss": 0.8213, + "step": 9733 + }, + { + "epoch": 0.19486024572729774, + "grad_norm": 1.0692059993743896, + "learning_rate": 9.304069841242631e-06, + "loss": 0.3429, + "step": 9734 + }, + { + "epoch": 0.1948802642444261, + "grad_norm": 1.7922594547271729, + "learning_rate": 9.303904849062747e-06, + "loss": 0.8362, + "step": 9735 + }, + { + "epoch": 0.19490028276155444, + "grad_norm": 1.296528697013855, + "learning_rate": 9.303739838790172e-06, + "loss": 0.3145, + "step": 9736 + }, + { + "epoch": 0.1949203012786828, + "grad_norm": 1.8926401138305664, + "learning_rate": 9.303574810425602e-06, + "loss": 0.7899, + "step": 9737 + }, + { + "epoch": 0.1949403197958111, + "grad_norm": 1.043668508529663, + "learning_rate": 9.303409763969731e-06, + "loss": 0.2867, + "step": 9738 + }, + { + "epoch": 0.19496033831293946, + "grad_norm": 1.0986714363098145, + "learning_rate": 9.303244699423248e-06, + "loss": 0.2933, + "step": 9739 + }, + { + "epoch": 0.1949803568300678, + "grad_norm": 1.0513242483139038, + "learning_rate": 9.303079616786853e-06, + "loss": 0.3004, + "step": 9740 + }, + { + "epoch": 0.19500037534719616, + "grad_norm": 1.1194316148757935, + "learning_rate": 9.302914516061237e-06, + "loss": 0.3092, + "step": 9741 + }, + { + "epoch": 0.1950203938643245, + "grad_norm": 1.9266886711120605, + "learning_rate": 9.302749397247094e-06, + "loss": 0.8175, + "step": 9742 + }, + { + "epoch": 0.19504041238145284, + "grad_norm": 1.2045804262161255, + "learning_rate": 9.302584260345118e-06, + "loss": 0.2974, + "step": 9743 + }, + { + "epoch": 0.1950604308985812, + "grad_norm": 1.1470069885253906, + "learning_rate": 9.302419105356004e-06, + "loss": 0.3085, + "step": 9744 + }, + { + "epoch": 0.19508044941570954, + "grad_norm": 0.9638363718986511, + "learning_rate": 9.302253932280448e-06, + "loss": 0.2902, + "step": 9745 + }, + { + "epoch": 0.19510046793283786, + "grad_norm": 1.1531705856323242, + "learning_rate": 9.30208874111914e-06, + "loss": 0.3574, + "step": 9746 + }, + { + "epoch": 0.1951204864499662, + "grad_norm": 1.1880909204483032, + "learning_rate": 9.301923531872777e-06, + "loss": 0.3345, + "step": 9747 + }, + { + "epoch": 0.19514050496709456, + "grad_norm": 1.3919925689697266, + "learning_rate": 9.301758304542053e-06, + "loss": 0.321, + "step": 9748 + }, + { + "epoch": 0.19516052348422291, + "grad_norm": 1.8147735595703125, + "learning_rate": 9.301593059127663e-06, + "loss": 0.8008, + "step": 9749 + }, + { + "epoch": 0.19518054200135124, + "grad_norm": 1.2406718730926514, + "learning_rate": 9.301427795630301e-06, + "loss": 0.3295, + "step": 9750 + }, + { + "epoch": 0.1952005605184796, + "grad_norm": 1.1848052740097046, + "learning_rate": 9.301262514050665e-06, + "loss": 0.3196, + "step": 9751 + }, + { + "epoch": 0.19522057903560794, + "grad_norm": 0.947276771068573, + "learning_rate": 9.301097214389445e-06, + "loss": 0.2928, + "step": 9752 + }, + { + "epoch": 0.1952405975527363, + "grad_norm": 1.1054332256317139, + "learning_rate": 9.300931896647339e-06, + "loss": 0.3385, + "step": 9753 + }, + { + "epoch": 0.1952606160698646, + "grad_norm": 2.124401569366455, + "learning_rate": 9.300766560825039e-06, + "loss": 0.8602, + "step": 9754 + }, + { + "epoch": 0.19528063458699296, + "grad_norm": 1.213034987449646, + "learning_rate": 9.300601206923244e-06, + "loss": 0.311, + "step": 9755 + }, + { + "epoch": 0.1953006531041213, + "grad_norm": 1.019866704940796, + "learning_rate": 9.300435834942645e-06, + "loss": 0.3496, + "step": 9756 + }, + { + "epoch": 0.19532067162124966, + "grad_norm": 1.3024414777755737, + "learning_rate": 9.30027044488394e-06, + "loss": 0.311, + "step": 9757 + }, + { + "epoch": 0.195340690138378, + "grad_norm": 1.2215814590454102, + "learning_rate": 9.300105036747823e-06, + "loss": 0.3419, + "step": 9758 + }, + { + "epoch": 0.19536070865550634, + "grad_norm": 1.0041157007217407, + "learning_rate": 9.299939610534991e-06, + "loss": 0.3197, + "step": 9759 + }, + { + "epoch": 0.1953807271726347, + "grad_norm": 1.0965478420257568, + "learning_rate": 9.299774166246138e-06, + "loss": 0.3127, + "step": 9760 + }, + { + "epoch": 0.19540074568976304, + "grad_norm": 1.076177954673767, + "learning_rate": 9.299608703881958e-06, + "loss": 0.3292, + "step": 9761 + }, + { + "epoch": 0.19542076420689136, + "grad_norm": 1.275655746459961, + "learning_rate": 9.29944322344315e-06, + "loss": 0.3716, + "step": 9762 + }, + { + "epoch": 0.1954407827240197, + "grad_norm": 1.2304857969284058, + "learning_rate": 9.299277724930407e-06, + "loss": 0.3088, + "step": 9763 + }, + { + "epoch": 0.19546080124114806, + "grad_norm": 1.176882266998291, + "learning_rate": 9.299112208344428e-06, + "loss": 0.315, + "step": 9764 + }, + { + "epoch": 0.19548081975827641, + "grad_norm": 1.1949712038040161, + "learning_rate": 9.298946673685904e-06, + "loss": 0.3627, + "step": 9765 + }, + { + "epoch": 0.19550083827540474, + "grad_norm": 1.1040621995925903, + "learning_rate": 9.298781120955534e-06, + "loss": 0.3568, + "step": 9766 + }, + { + "epoch": 0.1955208567925331, + "grad_norm": 1.089952826499939, + "learning_rate": 9.298615550154014e-06, + "loss": 0.3021, + "step": 9767 + }, + { + "epoch": 0.19554087530966144, + "grad_norm": 1.1699968576431274, + "learning_rate": 9.298449961282037e-06, + "loss": 0.3125, + "step": 9768 + }, + { + "epoch": 0.1955608938267898, + "grad_norm": 1.2437121868133545, + "learning_rate": 9.298284354340304e-06, + "loss": 0.3523, + "step": 9769 + }, + { + "epoch": 0.1955809123439181, + "grad_norm": 1.3473464250564575, + "learning_rate": 9.298118729329507e-06, + "loss": 0.3264, + "step": 9770 + }, + { + "epoch": 0.19560093086104646, + "grad_norm": 1.3415600061416626, + "learning_rate": 9.297953086250344e-06, + "loss": 0.3344, + "step": 9771 + }, + { + "epoch": 0.1956209493781748, + "grad_norm": 1.2438457012176514, + "learning_rate": 9.297787425103512e-06, + "loss": 0.3596, + "step": 9772 + }, + { + "epoch": 0.19564096789530316, + "grad_norm": 0.9371715784072876, + "learning_rate": 9.297621745889707e-06, + "loss": 0.3034, + "step": 9773 + }, + { + "epoch": 0.1956609864124315, + "grad_norm": 1.2100675106048584, + "learning_rate": 9.297456048609623e-06, + "loss": 0.3715, + "step": 9774 + }, + { + "epoch": 0.19568100492955984, + "grad_norm": 1.0279061794281006, + "learning_rate": 9.29729033326396e-06, + "loss": 0.3296, + "step": 9775 + }, + { + "epoch": 0.1957010234466882, + "grad_norm": 1.1050922870635986, + "learning_rate": 9.297124599853414e-06, + "loss": 0.3382, + "step": 9776 + }, + { + "epoch": 0.19572104196381654, + "grad_norm": 1.0785280466079712, + "learning_rate": 9.296958848378679e-06, + "loss": 0.3258, + "step": 9777 + }, + { + "epoch": 0.19574106048094486, + "grad_norm": 1.1854526996612549, + "learning_rate": 9.296793078840454e-06, + "loss": 0.3135, + "step": 9778 + }, + { + "epoch": 0.1957610789980732, + "grad_norm": 1.889604091644287, + "learning_rate": 9.296627291239437e-06, + "loss": 0.837, + "step": 9779 + }, + { + "epoch": 0.19578109751520156, + "grad_norm": 1.2681617736816406, + "learning_rate": 9.296461485576323e-06, + "loss": 0.3497, + "step": 9780 + }, + { + "epoch": 0.1958011160323299, + "grad_norm": 1.180429458618164, + "learning_rate": 9.296295661851808e-06, + "loss": 0.3518, + "step": 9781 + }, + { + "epoch": 0.19582113454945824, + "grad_norm": 1.151361346244812, + "learning_rate": 9.296129820066593e-06, + "loss": 0.3697, + "step": 9782 + }, + { + "epoch": 0.1958411530665866, + "grad_norm": 1.1097697019577026, + "learning_rate": 9.29596396022137e-06, + "loss": 0.3071, + "step": 9783 + }, + { + "epoch": 0.19586117158371494, + "grad_norm": 1.1397520303726196, + "learning_rate": 9.295798082316842e-06, + "loss": 0.3326, + "step": 9784 + }, + { + "epoch": 0.1958811901008433, + "grad_norm": 1.109926700592041, + "learning_rate": 9.295632186353702e-06, + "loss": 0.3372, + "step": 9785 + }, + { + "epoch": 0.1959012086179716, + "grad_norm": 1.0727858543395996, + "learning_rate": 9.295466272332649e-06, + "loss": 0.3577, + "step": 9786 + }, + { + "epoch": 0.19592122713509996, + "grad_norm": 1.0589224100112915, + "learning_rate": 9.295300340254379e-06, + "loss": 0.342, + "step": 9787 + }, + { + "epoch": 0.1959412456522283, + "grad_norm": 1.2644184827804565, + "learning_rate": 9.29513439011959e-06, + "loss": 0.3283, + "step": 9788 + }, + { + "epoch": 0.19596126416935666, + "grad_norm": 1.1319222450256348, + "learning_rate": 9.294968421928984e-06, + "loss": 0.3462, + "step": 9789 + }, + { + "epoch": 0.195981282686485, + "grad_norm": 1.0787386894226074, + "learning_rate": 9.294802435683251e-06, + "loss": 0.3231, + "step": 9790 + }, + { + "epoch": 0.19600130120361334, + "grad_norm": 1.2277830839157104, + "learning_rate": 9.294636431383097e-06, + "loss": 0.3313, + "step": 9791 + }, + { + "epoch": 0.1960213197207417, + "grad_norm": 1.1261717081069946, + "learning_rate": 9.294470409029212e-06, + "loss": 0.3565, + "step": 9792 + }, + { + "epoch": 0.19604133823787004, + "grad_norm": 1.1259071826934814, + "learning_rate": 9.2943043686223e-06, + "loss": 0.3712, + "step": 9793 + }, + { + "epoch": 0.19606135675499836, + "grad_norm": 1.1503459215164185, + "learning_rate": 9.294138310163054e-06, + "loss": 0.3383, + "step": 9794 + }, + { + "epoch": 0.1960813752721267, + "grad_norm": 0.9812018275260925, + "learning_rate": 9.293972233652178e-06, + "loss": 0.333, + "step": 9795 + }, + { + "epoch": 0.19610139378925506, + "grad_norm": 1.1479341983795166, + "learning_rate": 9.293806139090364e-06, + "loss": 0.302, + "step": 9796 + }, + { + "epoch": 0.1961214123063834, + "grad_norm": 1.2031644582748413, + "learning_rate": 9.293640026478314e-06, + "loss": 0.3353, + "step": 9797 + }, + { + "epoch": 0.19614143082351174, + "grad_norm": 1.1673598289489746, + "learning_rate": 9.293473895816727e-06, + "loss": 0.3825, + "step": 9798 + }, + { + "epoch": 0.1961614493406401, + "grad_norm": 1.051876187324524, + "learning_rate": 9.293307747106297e-06, + "loss": 0.3181, + "step": 9799 + }, + { + "epoch": 0.19618146785776844, + "grad_norm": 1.3582618236541748, + "learning_rate": 9.293141580347727e-06, + "loss": 0.3423, + "step": 9800 + }, + { + "epoch": 0.1962014863748968, + "grad_norm": 1.9000400304794312, + "learning_rate": 9.292975395541712e-06, + "loss": 0.8257, + "step": 9801 + }, + { + "epoch": 0.1962215048920251, + "grad_norm": 2.214991807937622, + "learning_rate": 9.292809192688953e-06, + "loss": 0.8164, + "step": 9802 + }, + { + "epoch": 0.19624152340915346, + "grad_norm": 1.0694612264633179, + "learning_rate": 9.29264297179015e-06, + "loss": 0.327, + "step": 9803 + }, + { + "epoch": 0.1962615419262818, + "grad_norm": 1.1730531454086304, + "learning_rate": 9.292476732845996e-06, + "loss": 0.3769, + "step": 9804 + }, + { + "epoch": 0.19628156044341016, + "grad_norm": 1.0645390748977661, + "learning_rate": 9.292310475857196e-06, + "loss": 0.3483, + "step": 9805 + }, + { + "epoch": 0.1963015789605385, + "grad_norm": 1.1326909065246582, + "learning_rate": 9.292144200824447e-06, + "loss": 0.3311, + "step": 9806 + }, + { + "epoch": 0.19632159747766684, + "grad_norm": 1.2684733867645264, + "learning_rate": 9.291977907748446e-06, + "loss": 0.3176, + "step": 9807 + }, + { + "epoch": 0.1963416159947952, + "grad_norm": 1.192487120628357, + "learning_rate": 9.291811596629894e-06, + "loss": 0.3443, + "step": 9808 + }, + { + "epoch": 0.19636163451192354, + "grad_norm": 1.213110327720642, + "learning_rate": 9.29164526746949e-06, + "loss": 0.3577, + "step": 9809 + }, + { + "epoch": 0.19638165302905186, + "grad_norm": 1.0684828758239746, + "learning_rate": 9.291478920267933e-06, + "loss": 0.3776, + "step": 9810 + }, + { + "epoch": 0.1964016715461802, + "grad_norm": 1.2221487760543823, + "learning_rate": 9.291312555025921e-06, + "loss": 0.2851, + "step": 9811 + }, + { + "epoch": 0.19642169006330856, + "grad_norm": 1.190969467163086, + "learning_rate": 9.291146171744155e-06, + "loss": 0.3189, + "step": 9812 + }, + { + "epoch": 0.1964417085804369, + "grad_norm": 1.1397662162780762, + "learning_rate": 9.290979770423334e-06, + "loss": 0.3568, + "step": 9813 + }, + { + "epoch": 0.19646172709756524, + "grad_norm": 1.0480061769485474, + "learning_rate": 9.290813351064158e-06, + "loss": 0.3344, + "step": 9814 + }, + { + "epoch": 0.1964817456146936, + "grad_norm": 1.1185535192489624, + "learning_rate": 9.290646913667325e-06, + "loss": 0.382, + "step": 9815 + }, + { + "epoch": 0.19650176413182194, + "grad_norm": 1.3730192184448242, + "learning_rate": 9.290480458233537e-06, + "loss": 0.3561, + "step": 9816 + }, + { + "epoch": 0.1965217826489503, + "grad_norm": 1.0980812311172485, + "learning_rate": 9.290313984763493e-06, + "loss": 0.3491, + "step": 9817 + }, + { + "epoch": 0.1965418011660786, + "grad_norm": 1.051744818687439, + "learning_rate": 9.29014749325789e-06, + "loss": 0.3327, + "step": 9818 + }, + { + "epoch": 0.19656181968320696, + "grad_norm": 2.0847511291503906, + "learning_rate": 9.289980983717432e-06, + "loss": 0.8245, + "step": 9819 + }, + { + "epoch": 0.1965818382003353, + "grad_norm": 1.9601449966430664, + "learning_rate": 9.289814456142816e-06, + "loss": 0.7606, + "step": 9820 + }, + { + "epoch": 0.19660185671746366, + "grad_norm": 1.1059528589248657, + "learning_rate": 9.289647910534743e-06, + "loss": 0.3242, + "step": 9821 + }, + { + "epoch": 0.196621875234592, + "grad_norm": 1.4078954458236694, + "learning_rate": 9.289481346893915e-06, + "loss": 0.3082, + "step": 9822 + }, + { + "epoch": 0.19664189375172034, + "grad_norm": 1.210611343383789, + "learning_rate": 9.28931476522103e-06, + "loss": 0.3578, + "step": 9823 + }, + { + "epoch": 0.1966619122688487, + "grad_norm": 1.9097355604171753, + "learning_rate": 9.289148165516787e-06, + "loss": 0.8697, + "step": 9824 + }, + { + "epoch": 0.19668193078597704, + "grad_norm": 1.021782636642456, + "learning_rate": 9.28898154778189e-06, + "loss": 0.2826, + "step": 9825 + }, + { + "epoch": 0.19670194930310536, + "grad_norm": 1.1231648921966553, + "learning_rate": 9.288814912017037e-06, + "loss": 0.3041, + "step": 9826 + }, + { + "epoch": 0.1967219678202337, + "grad_norm": 1.1480658054351807, + "learning_rate": 9.28864825822293e-06, + "loss": 0.3564, + "step": 9827 + }, + { + "epoch": 0.19674198633736206, + "grad_norm": 1.0626847743988037, + "learning_rate": 9.288481586400268e-06, + "loss": 0.33, + "step": 9828 + }, + { + "epoch": 0.1967620048544904, + "grad_norm": 1.1278573274612427, + "learning_rate": 9.288314896549752e-06, + "loss": 0.2682, + "step": 9829 + }, + { + "epoch": 0.19678202337161874, + "grad_norm": 1.0414944887161255, + "learning_rate": 9.288148188672084e-06, + "loss": 0.3507, + "step": 9830 + }, + { + "epoch": 0.1968020418887471, + "grad_norm": 1.2236229181289673, + "learning_rate": 9.287981462767961e-06, + "loss": 0.3763, + "step": 9831 + }, + { + "epoch": 0.19682206040587544, + "grad_norm": 1.130576491355896, + "learning_rate": 9.28781471883809e-06, + "loss": 0.3176, + "step": 9832 + }, + { + "epoch": 0.1968420789230038, + "grad_norm": 1.01539146900177, + "learning_rate": 9.287647956883167e-06, + "loss": 0.3344, + "step": 9833 + }, + { + "epoch": 0.1968620974401321, + "grad_norm": 1.0956693887710571, + "learning_rate": 9.287481176903895e-06, + "loss": 0.3455, + "step": 9834 + }, + { + "epoch": 0.19688211595726046, + "grad_norm": 1.1808130741119385, + "learning_rate": 9.287314378900975e-06, + "loss": 0.3226, + "step": 9835 + }, + { + "epoch": 0.1969021344743888, + "grad_norm": 1.166848063468933, + "learning_rate": 9.287147562875108e-06, + "loss": 0.3421, + "step": 9836 + }, + { + "epoch": 0.19692215299151716, + "grad_norm": 1.1330100297927856, + "learning_rate": 9.286980728826995e-06, + "loss": 0.3732, + "step": 9837 + }, + { + "epoch": 0.19694217150864549, + "grad_norm": 1.0407170057296753, + "learning_rate": 9.286813876757339e-06, + "loss": 0.3429, + "step": 9838 + }, + { + "epoch": 0.19696219002577384, + "grad_norm": 1.3486642837524414, + "learning_rate": 9.286647006666837e-06, + "loss": 0.3844, + "step": 9839 + }, + { + "epoch": 0.1969822085429022, + "grad_norm": 1.29725182056427, + "learning_rate": 9.286480118556197e-06, + "loss": 0.3201, + "step": 9840 + }, + { + "epoch": 0.19700222706003054, + "grad_norm": 1.1925921440124512, + "learning_rate": 9.286313212426114e-06, + "loss": 0.3469, + "step": 9841 + }, + { + "epoch": 0.19702224557715886, + "grad_norm": 1.3410935401916504, + "learning_rate": 9.286146288277294e-06, + "loss": 0.3944, + "step": 9842 + }, + { + "epoch": 0.1970422640942872, + "grad_norm": 1.1449511051177979, + "learning_rate": 9.285979346110437e-06, + "loss": 0.2982, + "step": 9843 + }, + { + "epoch": 0.19706228261141556, + "grad_norm": 2.264404296875, + "learning_rate": 9.285812385926245e-06, + "loss": 0.8384, + "step": 9844 + }, + { + "epoch": 0.1970823011285439, + "grad_norm": 1.0006184577941895, + "learning_rate": 9.28564540772542e-06, + "loss": 0.3077, + "step": 9845 + }, + { + "epoch": 0.19710231964567224, + "grad_norm": 1.9395604133605957, + "learning_rate": 9.285478411508666e-06, + "loss": 0.8152, + "step": 9846 + }, + { + "epoch": 0.1971223381628006, + "grad_norm": 1.2158539295196533, + "learning_rate": 9.285311397276681e-06, + "loss": 0.3088, + "step": 9847 + }, + { + "epoch": 0.19714235667992894, + "grad_norm": 1.26833975315094, + "learning_rate": 9.28514436503017e-06, + "loss": 0.327, + "step": 9848 + }, + { + "epoch": 0.1971623751970573, + "grad_norm": 1.2590094804763794, + "learning_rate": 9.284977314769834e-06, + "loss": 0.3106, + "step": 9849 + }, + { + "epoch": 0.1971823937141856, + "grad_norm": 1.8749792575836182, + "learning_rate": 9.284810246496376e-06, + "loss": 0.8603, + "step": 9850 + }, + { + "epoch": 0.19720241223131396, + "grad_norm": 1.0553922653198242, + "learning_rate": 9.284643160210495e-06, + "loss": 0.3038, + "step": 9851 + }, + { + "epoch": 0.1972224307484423, + "grad_norm": 1.1452016830444336, + "learning_rate": 9.2844760559129e-06, + "loss": 0.3161, + "step": 9852 + }, + { + "epoch": 0.19724244926557066, + "grad_norm": 1.843088150024414, + "learning_rate": 9.284308933604287e-06, + "loss": 0.8163, + "step": 9853 + }, + { + "epoch": 0.19726246778269899, + "grad_norm": 1.1713663339614868, + "learning_rate": 9.284141793285365e-06, + "loss": 0.3595, + "step": 9854 + }, + { + "epoch": 0.19728248629982734, + "grad_norm": 1.1727125644683838, + "learning_rate": 9.28397463495683e-06, + "loss": 0.2892, + "step": 9855 + }, + { + "epoch": 0.1973025048169557, + "grad_norm": 1.081590175628662, + "learning_rate": 9.283807458619387e-06, + "loss": 0.3785, + "step": 9856 + }, + { + "epoch": 0.19732252333408404, + "grad_norm": 1.0325770378112793, + "learning_rate": 9.28364026427374e-06, + "loss": 0.3116, + "step": 9857 + }, + { + "epoch": 0.19734254185121236, + "grad_norm": 1.1020910739898682, + "learning_rate": 9.28347305192059e-06, + "loss": 0.3671, + "step": 9858 + }, + { + "epoch": 0.1973625603683407, + "grad_norm": 1.1914170980453491, + "learning_rate": 9.283305821560644e-06, + "loss": 0.3406, + "step": 9859 + }, + { + "epoch": 0.19738257888546906, + "grad_norm": 1.1067562103271484, + "learning_rate": 9.2831385731946e-06, + "loss": 0.3424, + "step": 9860 + }, + { + "epoch": 0.1974025974025974, + "grad_norm": 1.2488898038864136, + "learning_rate": 9.282971306823163e-06, + "loss": 0.3536, + "step": 9861 + }, + { + "epoch": 0.19742261591972574, + "grad_norm": 1.1168568134307861, + "learning_rate": 9.282804022447038e-06, + "loss": 0.2795, + "step": 9862 + }, + { + "epoch": 0.1974426344368541, + "grad_norm": 1.1766798496246338, + "learning_rate": 9.282636720066925e-06, + "loss": 0.3441, + "step": 9863 + }, + { + "epoch": 0.19746265295398244, + "grad_norm": 1.127409815788269, + "learning_rate": 9.28246939968353e-06, + "loss": 0.3218, + "step": 9864 + }, + { + "epoch": 0.1974826714711108, + "grad_norm": 0.9935093522071838, + "learning_rate": 9.282302061297555e-06, + "loss": 0.3116, + "step": 9865 + }, + { + "epoch": 0.1975026899882391, + "grad_norm": 1.2046256065368652, + "learning_rate": 9.282134704909703e-06, + "loss": 0.3178, + "step": 9866 + }, + { + "epoch": 0.19752270850536746, + "grad_norm": 1.1747899055480957, + "learning_rate": 9.281967330520679e-06, + "loss": 0.3958, + "step": 9867 + }, + { + "epoch": 0.1975427270224958, + "grad_norm": 1.1804721355438232, + "learning_rate": 9.281799938131184e-06, + "loss": 0.3638, + "step": 9868 + }, + { + "epoch": 0.19756274553962416, + "grad_norm": 1.0659247636795044, + "learning_rate": 9.281632527741926e-06, + "loss": 0.3597, + "step": 9869 + }, + { + "epoch": 0.19758276405675249, + "grad_norm": 1.0961360931396484, + "learning_rate": 9.281465099353605e-06, + "loss": 0.3124, + "step": 9870 + }, + { + "epoch": 0.19760278257388084, + "grad_norm": 1.0595752000808716, + "learning_rate": 9.281297652966927e-06, + "loss": 0.3457, + "step": 9871 + }, + { + "epoch": 0.1976228010910092, + "grad_norm": 1.0489152669906616, + "learning_rate": 9.281130188582594e-06, + "loss": 0.3634, + "step": 9872 + }, + { + "epoch": 0.19764281960813754, + "grad_norm": 1.7935099601745605, + "learning_rate": 9.280962706201312e-06, + "loss": 0.3702, + "step": 9873 + }, + { + "epoch": 0.19766283812526586, + "grad_norm": 1.2024282217025757, + "learning_rate": 9.280795205823782e-06, + "loss": 0.3294, + "step": 9874 + }, + { + "epoch": 0.1976828566423942, + "grad_norm": 1.1542423963546753, + "learning_rate": 9.280627687450711e-06, + "loss": 0.314, + "step": 9875 + }, + { + "epoch": 0.19770287515952256, + "grad_norm": 1.1372069120407104, + "learning_rate": 9.280460151082805e-06, + "loss": 0.3521, + "step": 9876 + }, + { + "epoch": 0.1977228936766509, + "grad_norm": 1.2296520471572876, + "learning_rate": 9.280292596720762e-06, + "loss": 0.3374, + "step": 9877 + }, + { + "epoch": 0.19774291219377924, + "grad_norm": 1.074312686920166, + "learning_rate": 9.280125024365292e-06, + "loss": 0.3674, + "step": 9878 + }, + { + "epoch": 0.1977629307109076, + "grad_norm": 1.3096686601638794, + "learning_rate": 9.279957434017097e-06, + "loss": 0.3741, + "step": 9879 + }, + { + "epoch": 0.19778294922803594, + "grad_norm": 1.8604652881622314, + "learning_rate": 9.279789825676882e-06, + "loss": 0.8037, + "step": 9880 + }, + { + "epoch": 0.1978029677451643, + "grad_norm": 1.1436002254486084, + "learning_rate": 9.279622199345352e-06, + "loss": 0.2808, + "step": 9881 + }, + { + "epoch": 0.1978229862622926, + "grad_norm": 1.0760548114776611, + "learning_rate": 9.279454555023212e-06, + "loss": 0.3249, + "step": 9882 + }, + { + "epoch": 0.19784300477942096, + "grad_norm": 1.1693077087402344, + "learning_rate": 9.279286892711165e-06, + "loss": 0.3102, + "step": 9883 + }, + { + "epoch": 0.1978630232965493, + "grad_norm": 1.2048789262771606, + "learning_rate": 9.279119212409918e-06, + "loss": 0.3485, + "step": 9884 + }, + { + "epoch": 0.19788304181367766, + "grad_norm": 1.116324543952942, + "learning_rate": 9.27895151412017e-06, + "loss": 0.3868, + "step": 9885 + }, + { + "epoch": 0.19790306033080599, + "grad_norm": 1.0686681270599365, + "learning_rate": 9.278783797842637e-06, + "loss": 0.2946, + "step": 9886 + }, + { + "epoch": 0.19792307884793434, + "grad_norm": 1.0187578201293945, + "learning_rate": 9.278616063578014e-06, + "loss": 0.2781, + "step": 9887 + }, + { + "epoch": 0.1979430973650627, + "grad_norm": 1.1290251016616821, + "learning_rate": 9.278448311327011e-06, + "loss": 0.3035, + "step": 9888 + }, + { + "epoch": 0.19796311588219104, + "grad_norm": 1.9719951152801514, + "learning_rate": 9.278280541090333e-06, + "loss": 0.8599, + "step": 9889 + }, + { + "epoch": 0.19798313439931936, + "grad_norm": 1.1863977909088135, + "learning_rate": 9.278112752868682e-06, + "loss": 0.3607, + "step": 9890 + }, + { + "epoch": 0.1980031529164477, + "grad_norm": 1.1505026817321777, + "learning_rate": 9.277944946662767e-06, + "loss": 0.335, + "step": 9891 + }, + { + "epoch": 0.19802317143357606, + "grad_norm": 1.081140398979187, + "learning_rate": 9.277777122473293e-06, + "loss": 0.3307, + "step": 9892 + }, + { + "epoch": 0.1980431899507044, + "grad_norm": 1.1954957246780396, + "learning_rate": 9.277609280300962e-06, + "loss": 0.3219, + "step": 9893 + }, + { + "epoch": 0.19806320846783274, + "grad_norm": 1.2263745069503784, + "learning_rate": 9.277441420146484e-06, + "loss": 0.3465, + "step": 9894 + }, + { + "epoch": 0.19808322698496109, + "grad_norm": 1.1055225133895874, + "learning_rate": 9.277273542010563e-06, + "loss": 0.2849, + "step": 9895 + }, + { + "epoch": 0.19810324550208944, + "grad_norm": 1.208026647567749, + "learning_rate": 9.277105645893903e-06, + "loss": 0.3707, + "step": 9896 + }, + { + "epoch": 0.1981232640192178, + "grad_norm": 1.1173852682113647, + "learning_rate": 9.276937731797212e-06, + "loss": 0.3057, + "step": 9897 + }, + { + "epoch": 0.1981432825363461, + "grad_norm": 1.094284176826477, + "learning_rate": 9.276769799721196e-06, + "loss": 0.3397, + "step": 9898 + }, + { + "epoch": 0.19816330105347446, + "grad_norm": 1.1140079498291016, + "learning_rate": 9.27660184966656e-06, + "loss": 0.3039, + "step": 9899 + }, + { + "epoch": 0.1981833195706028, + "grad_norm": 0.9770106077194214, + "learning_rate": 9.276433881634008e-06, + "loss": 0.3126, + "step": 9900 + }, + { + "epoch": 0.19820333808773116, + "grad_norm": 1.1861884593963623, + "learning_rate": 9.27626589562425e-06, + "loss": 0.3004, + "step": 9901 + }, + { + "epoch": 0.19822335660485949, + "grad_norm": 1.0070523023605347, + "learning_rate": 9.276097891637991e-06, + "loss": 0.3225, + "step": 9902 + }, + { + "epoch": 0.19824337512198784, + "grad_norm": 1.0775893926620483, + "learning_rate": 9.275929869675937e-06, + "loss": 0.3617, + "step": 9903 + }, + { + "epoch": 0.1982633936391162, + "grad_norm": 1.2248834371566772, + "learning_rate": 9.275761829738791e-06, + "loss": 0.2948, + "step": 9904 + }, + { + "epoch": 0.19828341215624454, + "grad_norm": 1.1190416812896729, + "learning_rate": 9.275593771827266e-06, + "loss": 0.3841, + "step": 9905 + }, + { + "epoch": 0.19830343067337286, + "grad_norm": 1.1494114398956299, + "learning_rate": 9.275425695942062e-06, + "loss": 0.3282, + "step": 9906 + }, + { + "epoch": 0.1983234491905012, + "grad_norm": 2.162946939468384, + "learning_rate": 9.275257602083892e-06, + "loss": 0.8514, + "step": 9907 + }, + { + "epoch": 0.19834346770762956, + "grad_norm": 1.099748134613037, + "learning_rate": 9.275089490253456e-06, + "loss": 0.2496, + "step": 9908 + }, + { + "epoch": 0.1983634862247579, + "grad_norm": 1.2781845331192017, + "learning_rate": 9.274921360451465e-06, + "loss": 0.4001, + "step": 9909 + }, + { + "epoch": 0.19838350474188624, + "grad_norm": 1.1025036573410034, + "learning_rate": 9.274753212678624e-06, + "loss": 0.3377, + "step": 9910 + }, + { + "epoch": 0.19840352325901459, + "grad_norm": 1.2105122804641724, + "learning_rate": 9.27458504693564e-06, + "loss": 0.3992, + "step": 9911 + }, + { + "epoch": 0.19842354177614294, + "grad_norm": 1.0172101259231567, + "learning_rate": 9.27441686322322e-06, + "loss": 0.3554, + "step": 9912 + }, + { + "epoch": 0.1984435602932713, + "grad_norm": 1.1969687938690186, + "learning_rate": 9.274248661542074e-06, + "loss": 0.2613, + "step": 9913 + }, + { + "epoch": 0.1984635788103996, + "grad_norm": 1.0476840734481812, + "learning_rate": 9.274080441892906e-06, + "loss": 0.2658, + "step": 9914 + }, + { + "epoch": 0.19848359732752796, + "grad_norm": 1.0406274795532227, + "learning_rate": 9.273912204276422e-06, + "loss": 0.3073, + "step": 9915 + }, + { + "epoch": 0.1985036158446563, + "grad_norm": 1.256075382232666, + "learning_rate": 9.273743948693332e-06, + "loss": 0.3519, + "step": 9916 + }, + { + "epoch": 0.19852363436178466, + "grad_norm": 1.036183476448059, + "learning_rate": 9.27357567514434e-06, + "loss": 0.3251, + "step": 9917 + }, + { + "epoch": 0.19854365287891299, + "grad_norm": 1.024431586265564, + "learning_rate": 9.273407383630157e-06, + "loss": 0.3631, + "step": 9918 + }, + { + "epoch": 0.19856367139604134, + "grad_norm": 1.2184685468673706, + "learning_rate": 9.27323907415149e-06, + "loss": 0.3163, + "step": 9919 + }, + { + "epoch": 0.1985836899131697, + "grad_norm": 1.0580404996871948, + "learning_rate": 9.273070746709044e-06, + "loss": 0.3183, + "step": 9920 + }, + { + "epoch": 0.19860370843029804, + "grad_norm": 1.824089527130127, + "learning_rate": 9.27290240130353e-06, + "loss": 0.8589, + "step": 9921 + }, + { + "epoch": 0.19862372694742636, + "grad_norm": 1.3922175168991089, + "learning_rate": 9.272734037935651e-06, + "loss": 0.2952, + "step": 9922 + }, + { + "epoch": 0.1986437454645547, + "grad_norm": 1.59686279296875, + "learning_rate": 9.272565656606121e-06, + "loss": 0.3762, + "step": 9923 + }, + { + "epoch": 0.19866376398168306, + "grad_norm": 1.0867177248001099, + "learning_rate": 9.272397257315641e-06, + "loss": 0.2877, + "step": 9924 + }, + { + "epoch": 0.1986837824988114, + "grad_norm": 1.0899198055267334, + "learning_rate": 9.272228840064925e-06, + "loss": 0.3134, + "step": 9925 + }, + { + "epoch": 0.19870380101593973, + "grad_norm": 1.0082701444625854, + "learning_rate": 9.272060404854677e-06, + "loss": 0.3421, + "step": 9926 + }, + { + "epoch": 0.19872381953306809, + "grad_norm": 1.001529335975647, + "learning_rate": 9.271891951685608e-06, + "loss": 0.2872, + "step": 9927 + }, + { + "epoch": 0.19874383805019644, + "grad_norm": 0.9719015955924988, + "learning_rate": 9.271723480558423e-06, + "loss": 0.2848, + "step": 9928 + }, + { + "epoch": 0.1987638565673248, + "grad_norm": 1.2862308025360107, + "learning_rate": 9.27155499147383e-06, + "loss": 0.3258, + "step": 9929 + }, + { + "epoch": 0.1987838750844531, + "grad_norm": 0.9571006298065186, + "learning_rate": 9.271386484432543e-06, + "loss": 0.2839, + "step": 9930 + }, + { + "epoch": 0.19880389360158146, + "grad_norm": 1.0676665306091309, + "learning_rate": 9.271217959435263e-06, + "loss": 0.2959, + "step": 9931 + }, + { + "epoch": 0.1988239121187098, + "grad_norm": 1.044813871383667, + "learning_rate": 9.271049416482704e-06, + "loss": 0.3215, + "step": 9932 + }, + { + "epoch": 0.19884393063583816, + "grad_norm": 1.075934886932373, + "learning_rate": 9.270880855575573e-06, + "loss": 0.2878, + "step": 9933 + }, + { + "epoch": 0.19886394915296648, + "grad_norm": 1.11234712600708, + "learning_rate": 9.270712276714577e-06, + "loss": 0.3786, + "step": 9934 + }, + { + "epoch": 0.19888396767009484, + "grad_norm": 1.129236102104187, + "learning_rate": 9.270543679900426e-06, + "loss": 0.332, + "step": 9935 + }, + { + "epoch": 0.1989039861872232, + "grad_norm": 1.1200367212295532, + "learning_rate": 9.270375065133827e-06, + "loss": 0.3218, + "step": 9936 + }, + { + "epoch": 0.19892400470435154, + "grad_norm": 1.1342973709106445, + "learning_rate": 9.270206432415491e-06, + "loss": 0.3685, + "step": 9937 + }, + { + "epoch": 0.19894402322147986, + "grad_norm": 1.0882017612457275, + "learning_rate": 9.270037781746127e-06, + "loss": 0.3068, + "step": 9938 + }, + { + "epoch": 0.1989640417386082, + "grad_norm": 1.1880083084106445, + "learning_rate": 9.269869113126442e-06, + "loss": 0.3668, + "step": 9939 + }, + { + "epoch": 0.19898406025573656, + "grad_norm": 1.221863031387329, + "learning_rate": 9.269700426557146e-06, + "loss": 0.3412, + "step": 9940 + }, + { + "epoch": 0.1990040787728649, + "grad_norm": 1.131237506866455, + "learning_rate": 9.26953172203895e-06, + "loss": 0.313, + "step": 9941 + }, + { + "epoch": 0.19902409728999323, + "grad_norm": 0.989184558391571, + "learning_rate": 9.26936299957256e-06, + "loss": 0.3191, + "step": 9942 + }, + { + "epoch": 0.19904411580712159, + "grad_norm": 0.9520108699798584, + "learning_rate": 9.269194259158686e-06, + "loss": 0.2514, + "step": 9943 + }, + { + "epoch": 0.19906413432424994, + "grad_norm": 1.8023185729980469, + "learning_rate": 9.269025500798039e-06, + "loss": 0.8365, + "step": 9944 + }, + { + "epoch": 0.1990841528413783, + "grad_norm": 0.9984174966812134, + "learning_rate": 9.268856724491329e-06, + "loss": 0.3124, + "step": 9945 + }, + { + "epoch": 0.1991041713585066, + "grad_norm": 1.0488241910934448, + "learning_rate": 9.268687930239261e-06, + "loss": 0.3073, + "step": 9946 + }, + { + "epoch": 0.19912418987563496, + "grad_norm": 1.6837666034698486, + "learning_rate": 9.268519118042549e-06, + "loss": 0.8918, + "step": 9947 + }, + { + "epoch": 0.1991442083927633, + "grad_norm": 1.0479145050048828, + "learning_rate": 9.268350287901902e-06, + "loss": 0.3579, + "step": 9948 + }, + { + "epoch": 0.19916422690989166, + "grad_norm": 1.1299667358398438, + "learning_rate": 9.268181439818028e-06, + "loss": 0.3581, + "step": 9949 + }, + { + "epoch": 0.19918424542701998, + "grad_norm": 1.011599063873291, + "learning_rate": 9.268012573791638e-06, + "loss": 0.3309, + "step": 9950 + }, + { + "epoch": 0.19920426394414834, + "grad_norm": 1.112295389175415, + "learning_rate": 9.267843689823441e-06, + "loss": 0.2924, + "step": 9951 + }, + { + "epoch": 0.19922428246127669, + "grad_norm": 1.0127040147781372, + "learning_rate": 9.267674787914149e-06, + "loss": 0.3151, + "step": 9952 + }, + { + "epoch": 0.19924430097840504, + "grad_norm": 1.2543814182281494, + "learning_rate": 9.26750586806447e-06, + "loss": 0.3307, + "step": 9953 + }, + { + "epoch": 0.19926431949553336, + "grad_norm": 1.0421534776687622, + "learning_rate": 9.267336930275116e-06, + "loss": 0.2813, + "step": 9954 + }, + { + "epoch": 0.1992843380126617, + "grad_norm": 1.1975044012069702, + "learning_rate": 9.267167974546793e-06, + "loss": 0.3286, + "step": 9955 + }, + { + "epoch": 0.19930435652979006, + "grad_norm": 1.1845884323120117, + "learning_rate": 9.266999000880216e-06, + "loss": 0.3493, + "step": 9956 + }, + { + "epoch": 0.1993243750469184, + "grad_norm": 1.1945521831512451, + "learning_rate": 9.266830009276093e-06, + "loss": 0.3579, + "step": 9957 + }, + { + "epoch": 0.19934439356404673, + "grad_norm": 1.190796136856079, + "learning_rate": 9.266660999735135e-06, + "loss": 0.3334, + "step": 9958 + }, + { + "epoch": 0.19936441208117509, + "grad_norm": 1.1614381074905396, + "learning_rate": 9.266491972258051e-06, + "loss": 0.3678, + "step": 9959 + }, + { + "epoch": 0.19938443059830344, + "grad_norm": 1.1014318466186523, + "learning_rate": 9.266322926845555e-06, + "loss": 0.3936, + "step": 9960 + }, + { + "epoch": 0.1994044491154318, + "grad_norm": 1.2433035373687744, + "learning_rate": 9.266153863498356e-06, + "loss": 0.2968, + "step": 9961 + }, + { + "epoch": 0.1994244676325601, + "grad_norm": 1.0771769285202026, + "learning_rate": 9.265984782217161e-06, + "loss": 0.3412, + "step": 9962 + }, + { + "epoch": 0.19944448614968846, + "grad_norm": 1.2124738693237305, + "learning_rate": 9.265815683002687e-06, + "loss": 0.3611, + "step": 9963 + }, + { + "epoch": 0.1994645046668168, + "grad_norm": 1.083483099937439, + "learning_rate": 9.26564656585564e-06, + "loss": 0.3415, + "step": 9964 + }, + { + "epoch": 0.19948452318394516, + "grad_norm": 1.111466884613037, + "learning_rate": 9.265477430776735e-06, + "loss": 0.3647, + "step": 9965 + }, + { + "epoch": 0.19950454170107348, + "grad_norm": 1.1590626239776611, + "learning_rate": 9.265308277766679e-06, + "loss": 0.3497, + "step": 9966 + }, + { + "epoch": 0.19952456021820184, + "grad_norm": 1.1060117483139038, + "learning_rate": 9.265139106826185e-06, + "loss": 0.2758, + "step": 9967 + }, + { + "epoch": 0.19954457873533019, + "grad_norm": 1.1422604322433472, + "learning_rate": 9.264969917955963e-06, + "loss": 0.3262, + "step": 9968 + }, + { + "epoch": 0.19956459725245854, + "grad_norm": 1.128458023071289, + "learning_rate": 9.264800711156728e-06, + "loss": 0.3472, + "step": 9969 + }, + { + "epoch": 0.19958461576958686, + "grad_norm": 1.1610939502716064, + "learning_rate": 9.264631486429188e-06, + "loss": 0.335, + "step": 9970 + }, + { + "epoch": 0.1996046342867152, + "grad_norm": 1.1032575368881226, + "learning_rate": 9.264462243774053e-06, + "loss": 0.3607, + "step": 9971 + }, + { + "epoch": 0.19962465280384356, + "grad_norm": 1.196006178855896, + "learning_rate": 9.264292983192038e-06, + "loss": 0.3487, + "step": 9972 + }, + { + "epoch": 0.1996446713209719, + "grad_norm": 1.245444416999817, + "learning_rate": 9.264123704683854e-06, + "loss": 0.3229, + "step": 9973 + }, + { + "epoch": 0.19966468983810023, + "grad_norm": 1.132495403289795, + "learning_rate": 9.26395440825021e-06, + "loss": 0.3026, + "step": 9974 + }, + { + "epoch": 0.19968470835522859, + "grad_norm": 0.9956991076469421, + "learning_rate": 9.26378509389182e-06, + "loss": 0.2899, + "step": 9975 + }, + { + "epoch": 0.19970472687235694, + "grad_norm": 1.1701290607452393, + "learning_rate": 9.263615761609395e-06, + "loss": 0.3178, + "step": 9976 + }, + { + "epoch": 0.1997247453894853, + "grad_norm": 1.2681561708450317, + "learning_rate": 9.263446411403644e-06, + "loss": 0.3355, + "step": 9977 + }, + { + "epoch": 0.1997447639066136, + "grad_norm": 1.4816615581512451, + "learning_rate": 9.263277043275286e-06, + "loss": 0.3128, + "step": 9978 + }, + { + "epoch": 0.19976478242374196, + "grad_norm": 1.0977959632873535, + "learning_rate": 9.263107657225027e-06, + "loss": 0.3798, + "step": 9979 + }, + { + "epoch": 0.1997848009408703, + "grad_norm": 1.2355189323425293, + "learning_rate": 9.262938253253581e-06, + "loss": 0.3625, + "step": 9980 + }, + { + "epoch": 0.19980481945799866, + "grad_norm": 1.0975152254104614, + "learning_rate": 9.26276883136166e-06, + "loss": 0.3139, + "step": 9981 + }, + { + "epoch": 0.19982483797512698, + "grad_norm": 1.1607472896575928, + "learning_rate": 9.262599391549976e-06, + "loss": 0.268, + "step": 9982 + }, + { + "epoch": 0.19984485649225533, + "grad_norm": 1.027876377105713, + "learning_rate": 9.26242993381924e-06, + "loss": 0.3093, + "step": 9983 + }, + { + "epoch": 0.19986487500938369, + "grad_norm": 1.117197036743164, + "learning_rate": 9.262260458170169e-06, + "loss": 0.3799, + "step": 9984 + }, + { + "epoch": 0.19988489352651204, + "grad_norm": 1.4360411167144775, + "learning_rate": 9.262090964603472e-06, + "loss": 0.3013, + "step": 9985 + }, + { + "epoch": 0.19990491204364036, + "grad_norm": 1.1592766046524048, + "learning_rate": 9.26192145311986e-06, + "loss": 0.4018, + "step": 9986 + }, + { + "epoch": 0.1999249305607687, + "grad_norm": 1.3361706733703613, + "learning_rate": 9.261751923720048e-06, + "loss": 0.3372, + "step": 9987 + }, + { + "epoch": 0.19994494907789706, + "grad_norm": 1.0940693616867065, + "learning_rate": 9.261582376404747e-06, + "loss": 0.3615, + "step": 9988 + }, + { + "epoch": 0.1999649675950254, + "grad_norm": 1.0897221565246582, + "learning_rate": 9.261412811174673e-06, + "loss": 0.3572, + "step": 9989 + }, + { + "epoch": 0.19998498611215373, + "grad_norm": 1.1080057621002197, + "learning_rate": 9.261243228030534e-06, + "loss": 0.3332, + "step": 9990 + }, + { + "epoch": 0.20000500462928208, + "grad_norm": 1.1359713077545166, + "learning_rate": 9.261073626973047e-06, + "loss": 0.2915, + "step": 9991 + }, + { + "epoch": 0.20002502314641044, + "grad_norm": 1.1919008493423462, + "learning_rate": 9.260904008002924e-06, + "loss": 0.3147, + "step": 9992 + }, + { + "epoch": 0.2000450416635388, + "grad_norm": 1.0225290060043335, + "learning_rate": 9.260734371120876e-06, + "loss": 0.2884, + "step": 9993 + }, + { + "epoch": 0.2000650601806671, + "grad_norm": 1.0931751728057861, + "learning_rate": 9.26056471632762e-06, + "loss": 0.3237, + "step": 9994 + }, + { + "epoch": 0.20008507869779546, + "grad_norm": 1.1706743240356445, + "learning_rate": 9.260395043623864e-06, + "loss": 0.3373, + "step": 9995 + }, + { + "epoch": 0.2001050972149238, + "grad_norm": 1.148962140083313, + "learning_rate": 9.260225353010325e-06, + "loss": 0.3416, + "step": 9996 + }, + { + "epoch": 0.20012511573205216, + "grad_norm": 0.9929469227790833, + "learning_rate": 9.260055644487718e-06, + "loss": 0.3049, + "step": 9997 + }, + { + "epoch": 0.20014513424918048, + "grad_norm": 1.2325903177261353, + "learning_rate": 9.25988591805675e-06, + "loss": 0.3514, + "step": 9998 + }, + { + "epoch": 0.20016515276630883, + "grad_norm": 0.9959611296653748, + "learning_rate": 9.25971617371814e-06, + "loss": 0.2609, + "step": 9999 + }, + { + "epoch": 0.20018517128343719, + "grad_norm": 1.0854700803756714, + "learning_rate": 9.2595464114726e-06, + "loss": 0.3667, + "step": 10000 + }, + { + "epoch": 0.20020518980056554, + "grad_norm": 1.284654140472412, + "learning_rate": 9.259376631320844e-06, + "loss": 0.3102, + "step": 10001 + }, + { + "epoch": 0.20022520831769386, + "grad_norm": 1.9175924062728882, + "learning_rate": 9.259206833263586e-06, + "loss": 0.8098, + "step": 10002 + }, + { + "epoch": 0.2002452268348222, + "grad_norm": 1.136863350868225, + "learning_rate": 9.259037017301536e-06, + "loss": 0.265, + "step": 10003 + }, + { + "epoch": 0.20026524535195056, + "grad_norm": 1.078291893005371, + "learning_rate": 9.258867183435414e-06, + "loss": 0.3432, + "step": 10004 + }, + { + "epoch": 0.2002852638690789, + "grad_norm": 1.1587601900100708, + "learning_rate": 9.258697331665929e-06, + "loss": 0.347, + "step": 10005 + }, + { + "epoch": 0.20030528238620723, + "grad_norm": 1.0271068811416626, + "learning_rate": 9.258527461993797e-06, + "loss": 0.2805, + "step": 10006 + }, + { + "epoch": 0.20032530090333558, + "grad_norm": 1.1522905826568604, + "learning_rate": 9.258357574419733e-06, + "loss": 0.2619, + "step": 10007 + }, + { + "epoch": 0.20034531942046394, + "grad_norm": 1.1235626935958862, + "learning_rate": 9.25818766894445e-06, + "loss": 0.3189, + "step": 10008 + }, + { + "epoch": 0.20036533793759229, + "grad_norm": 1.1465208530426025, + "learning_rate": 9.258017745568661e-06, + "loss": 0.3581, + "step": 10009 + }, + { + "epoch": 0.2003853564547206, + "grad_norm": 1.552483320236206, + "learning_rate": 9.257847804293083e-06, + "loss": 0.3739, + "step": 10010 + }, + { + "epoch": 0.20040537497184896, + "grad_norm": 1.104089379310608, + "learning_rate": 9.257677845118429e-06, + "loss": 0.3469, + "step": 10011 + }, + { + "epoch": 0.2004253934889773, + "grad_norm": 1.1260194778442383, + "learning_rate": 9.257507868045414e-06, + "loss": 0.3501, + "step": 10012 + }, + { + "epoch": 0.20044541200610566, + "grad_norm": 1.128109097480774, + "learning_rate": 9.257337873074751e-06, + "loss": 0.3299, + "step": 10013 + }, + { + "epoch": 0.20046543052323398, + "grad_norm": 1.0789340734481812, + "learning_rate": 9.257167860207157e-06, + "loss": 0.3338, + "step": 10014 + }, + { + "epoch": 0.20048544904036233, + "grad_norm": 1.165132761001587, + "learning_rate": 9.256997829443346e-06, + "loss": 0.3349, + "step": 10015 + }, + { + "epoch": 0.20050546755749069, + "grad_norm": 1.0389786958694458, + "learning_rate": 9.25682778078403e-06, + "loss": 0.3564, + "step": 10016 + }, + { + "epoch": 0.20052548607461904, + "grad_norm": 1.1434226036071777, + "learning_rate": 9.256657714229927e-06, + "loss": 0.3834, + "step": 10017 + }, + { + "epoch": 0.20054550459174736, + "grad_norm": 1.245309591293335, + "learning_rate": 9.256487629781752e-06, + "loss": 0.3224, + "step": 10018 + }, + { + "epoch": 0.2005655231088757, + "grad_norm": 1.9631167650222778, + "learning_rate": 9.256317527440218e-06, + "loss": 0.3366, + "step": 10019 + }, + { + "epoch": 0.20058554162600406, + "grad_norm": 1.182482361793518, + "learning_rate": 9.256147407206042e-06, + "loss": 0.345, + "step": 10020 + }, + { + "epoch": 0.2006055601431324, + "grad_norm": 2.0907888412475586, + "learning_rate": 9.255977269079936e-06, + "loss": 0.8102, + "step": 10021 + }, + { + "epoch": 0.20062557866026073, + "grad_norm": 1.8465917110443115, + "learning_rate": 9.25580711306262e-06, + "loss": 0.8649, + "step": 10022 + }, + { + "epoch": 0.20064559717738908, + "grad_norm": 1.0293223857879639, + "learning_rate": 9.255636939154806e-06, + "loss": 0.3392, + "step": 10023 + }, + { + "epoch": 0.20066561569451744, + "grad_norm": 1.0495877265930176, + "learning_rate": 9.255466747357209e-06, + "loss": 0.3049, + "step": 10024 + }, + { + "epoch": 0.20068563421164579, + "grad_norm": 1.2397894859313965, + "learning_rate": 9.255296537670547e-06, + "loss": 0.3476, + "step": 10025 + }, + { + "epoch": 0.2007056527287741, + "grad_norm": 1.1622378826141357, + "learning_rate": 9.255126310095535e-06, + "loss": 0.3198, + "step": 10026 + }, + { + "epoch": 0.20072567124590246, + "grad_norm": 1.1602529287338257, + "learning_rate": 9.254956064632885e-06, + "loss": 0.3406, + "step": 10027 + }, + { + "epoch": 0.2007456897630308, + "grad_norm": 1.0748237371444702, + "learning_rate": 9.254785801283318e-06, + "loss": 0.2961, + "step": 10028 + }, + { + "epoch": 0.20076570828015916, + "grad_norm": 1.0242730379104614, + "learning_rate": 9.254615520047545e-06, + "loss": 0.3115, + "step": 10029 + }, + { + "epoch": 0.20078572679728748, + "grad_norm": 1.082666277885437, + "learning_rate": 9.254445220926285e-06, + "loss": 0.3582, + "step": 10030 + }, + { + "epoch": 0.20080574531441583, + "grad_norm": 1.3034943342208862, + "learning_rate": 9.254274903920254e-06, + "loss": 0.3394, + "step": 10031 + }, + { + "epoch": 0.20082576383154419, + "grad_norm": 1.2000046968460083, + "learning_rate": 9.254104569030164e-06, + "loss": 0.3563, + "step": 10032 + }, + { + "epoch": 0.20084578234867254, + "grad_norm": 1.0974136590957642, + "learning_rate": 9.253934216256735e-06, + "loss": 0.3763, + "step": 10033 + }, + { + "epoch": 0.20086580086580086, + "grad_norm": 1.071305513381958, + "learning_rate": 9.253763845600682e-06, + "loss": 0.342, + "step": 10034 + }, + { + "epoch": 0.2008858193829292, + "grad_norm": 1.0373220443725586, + "learning_rate": 9.253593457062722e-06, + "loss": 0.3415, + "step": 10035 + }, + { + "epoch": 0.20090583790005756, + "grad_norm": 1.179024577140808, + "learning_rate": 9.253423050643571e-06, + "loss": 0.3035, + "step": 10036 + }, + { + "epoch": 0.2009258564171859, + "grad_norm": 1.3597183227539062, + "learning_rate": 9.253252626343943e-06, + "loss": 0.3587, + "step": 10037 + }, + { + "epoch": 0.20094587493431423, + "grad_norm": 1.0526717901229858, + "learning_rate": 9.253082184164557e-06, + "loss": 0.2901, + "step": 10038 + }, + { + "epoch": 0.20096589345144258, + "grad_norm": 1.8775025606155396, + "learning_rate": 9.25291172410613e-06, + "loss": 0.853, + "step": 10039 + }, + { + "epoch": 0.20098591196857093, + "grad_norm": 1.1443082094192505, + "learning_rate": 9.252741246169376e-06, + "loss": 0.3309, + "step": 10040 + }, + { + "epoch": 0.20100593048569929, + "grad_norm": 1.1290773153305054, + "learning_rate": 9.252570750355012e-06, + "loss": 0.3464, + "step": 10041 + }, + { + "epoch": 0.2010259490028276, + "grad_norm": 1.0935543775558472, + "learning_rate": 9.252400236663758e-06, + "loss": 0.3011, + "step": 10042 + }, + { + "epoch": 0.20104596751995596, + "grad_norm": 1.1052955389022827, + "learning_rate": 9.252229705096328e-06, + "loss": 0.3565, + "step": 10043 + }, + { + "epoch": 0.2010659860370843, + "grad_norm": 1.1995781660079956, + "learning_rate": 9.25205915565344e-06, + "loss": 0.3376, + "step": 10044 + }, + { + "epoch": 0.20108600455421263, + "grad_norm": 1.855176329612732, + "learning_rate": 9.251888588335808e-06, + "loss": 0.8327, + "step": 10045 + }, + { + "epoch": 0.20110602307134098, + "grad_norm": 1.2878904342651367, + "learning_rate": 9.251718003144153e-06, + "loss": 0.279, + "step": 10046 + }, + { + "epoch": 0.20112604158846933, + "grad_norm": 1.1539198160171509, + "learning_rate": 9.25154740007919e-06, + "loss": 0.3725, + "step": 10047 + }, + { + "epoch": 0.20114606010559768, + "grad_norm": 1.0910358428955078, + "learning_rate": 9.251376779141637e-06, + "loss": 0.3642, + "step": 10048 + }, + { + "epoch": 0.201166078622726, + "grad_norm": 1.7554537057876587, + "learning_rate": 9.251206140332213e-06, + "loss": 0.896, + "step": 10049 + }, + { + "epoch": 0.20118609713985436, + "grad_norm": 1.037346601486206, + "learning_rate": 9.25103548365163e-06, + "loss": 0.3751, + "step": 10050 + }, + { + "epoch": 0.2012061156569827, + "grad_norm": 1.197303295135498, + "learning_rate": 9.250864809100608e-06, + "loss": 0.2955, + "step": 10051 + }, + { + "epoch": 0.20122613417411106, + "grad_norm": 1.1321377754211426, + "learning_rate": 9.250694116679867e-06, + "loss": 0.3278, + "step": 10052 + }, + { + "epoch": 0.20124615269123938, + "grad_norm": 1.084660291671753, + "learning_rate": 9.250523406390123e-06, + "loss": 0.3162, + "step": 10053 + }, + { + "epoch": 0.20126617120836773, + "grad_norm": 1.1045033931732178, + "learning_rate": 9.250352678232092e-06, + "loss": 0.3092, + "step": 10054 + }, + { + "epoch": 0.20128618972549608, + "grad_norm": 1.1911664009094238, + "learning_rate": 9.250181932206494e-06, + "loss": 0.3166, + "step": 10055 + }, + { + "epoch": 0.20130620824262443, + "grad_norm": 1.2811572551727295, + "learning_rate": 9.250011168314047e-06, + "loss": 0.3251, + "step": 10056 + }, + { + "epoch": 0.20132622675975276, + "grad_norm": 1.1134858131408691, + "learning_rate": 9.249840386555466e-06, + "loss": 0.3263, + "step": 10057 + }, + { + "epoch": 0.2013462452768811, + "grad_norm": 1.0782763957977295, + "learning_rate": 9.249669586931471e-06, + "loss": 0.315, + "step": 10058 + }, + { + "epoch": 0.20136626379400946, + "grad_norm": 1.186153531074524, + "learning_rate": 9.249498769442779e-06, + "loss": 0.2971, + "step": 10059 + }, + { + "epoch": 0.2013862823111378, + "grad_norm": 1.103639006614685, + "learning_rate": 9.249327934090111e-06, + "loss": 0.3297, + "step": 10060 + }, + { + "epoch": 0.20140630082826613, + "grad_norm": 1.9333194494247437, + "learning_rate": 9.24915708087418e-06, + "loss": 0.8485, + "step": 10061 + }, + { + "epoch": 0.20142631934539448, + "grad_norm": 1.066717267036438, + "learning_rate": 9.248986209795708e-06, + "loss": 0.3543, + "step": 10062 + }, + { + "epoch": 0.20144633786252283, + "grad_norm": 2.0664384365081787, + "learning_rate": 9.248815320855413e-06, + "loss": 0.87, + "step": 10063 + }, + { + "epoch": 0.20146635637965118, + "grad_norm": 1.2658871412277222, + "learning_rate": 9.248644414054013e-06, + "loss": 0.345, + "step": 10064 + }, + { + "epoch": 0.2014863748967795, + "grad_norm": 1.1696134805679321, + "learning_rate": 9.248473489392224e-06, + "loss": 0.379, + "step": 10065 + }, + { + "epoch": 0.20150639341390786, + "grad_norm": 1.2121063470840454, + "learning_rate": 9.248302546870769e-06, + "loss": 0.2905, + "step": 10066 + }, + { + "epoch": 0.2015264119310362, + "grad_norm": 1.0255446434020996, + "learning_rate": 9.248131586490363e-06, + "loss": 0.3128, + "step": 10067 + }, + { + "epoch": 0.20154643044816456, + "grad_norm": 1.0172473192214966, + "learning_rate": 9.247960608251728e-06, + "loss": 0.3022, + "step": 10068 + }, + { + "epoch": 0.20156644896529288, + "grad_norm": 1.1465964317321777, + "learning_rate": 9.24778961215558e-06, + "loss": 0.2992, + "step": 10069 + }, + { + "epoch": 0.20158646748242123, + "grad_norm": 1.820846438407898, + "learning_rate": 9.247618598202637e-06, + "loss": 0.778, + "step": 10070 + }, + { + "epoch": 0.20160648599954958, + "grad_norm": 1.0372116565704346, + "learning_rate": 9.24744756639362e-06, + "loss": 0.3224, + "step": 10071 + }, + { + "epoch": 0.20162650451667793, + "grad_norm": 1.161537766456604, + "learning_rate": 9.247276516729247e-06, + "loss": 0.3795, + "step": 10072 + }, + { + "epoch": 0.20164652303380626, + "grad_norm": 1.093733787536621, + "learning_rate": 9.24710544921024e-06, + "loss": 0.3242, + "step": 10073 + }, + { + "epoch": 0.2016665415509346, + "grad_norm": 1.1236188411712646, + "learning_rate": 9.246934363837313e-06, + "loss": 0.3213, + "step": 10074 + }, + { + "epoch": 0.20168656006806296, + "grad_norm": 1.0514018535614014, + "learning_rate": 9.246763260611189e-06, + "loss": 0.3186, + "step": 10075 + }, + { + "epoch": 0.2017065785851913, + "grad_norm": 2.059244394302368, + "learning_rate": 9.246592139532586e-06, + "loss": 0.7943, + "step": 10076 + }, + { + "epoch": 0.20172659710231963, + "grad_norm": 1.938002347946167, + "learning_rate": 9.246421000602224e-06, + "loss": 0.8552, + "step": 10077 + }, + { + "epoch": 0.20174661561944798, + "grad_norm": 1.1715563535690308, + "learning_rate": 9.246249843820823e-06, + "loss": 0.3103, + "step": 10078 + }, + { + "epoch": 0.20176663413657633, + "grad_norm": 1.1976065635681152, + "learning_rate": 9.2460786691891e-06, + "loss": 0.3403, + "step": 10079 + }, + { + "epoch": 0.20178665265370468, + "grad_norm": 1.0738790035247803, + "learning_rate": 9.245907476707776e-06, + "loss": 0.329, + "step": 10080 + }, + { + "epoch": 0.201806671170833, + "grad_norm": 1.1998862028121948, + "learning_rate": 9.245736266377571e-06, + "loss": 0.3423, + "step": 10081 + }, + { + "epoch": 0.20182668968796136, + "grad_norm": 1.2463736534118652, + "learning_rate": 9.245565038199205e-06, + "loss": 0.2864, + "step": 10082 + }, + { + "epoch": 0.2018467082050897, + "grad_norm": 1.086573839187622, + "learning_rate": 9.245393792173395e-06, + "loss": 0.3545, + "step": 10083 + }, + { + "epoch": 0.20186672672221806, + "grad_norm": 1.034505844116211, + "learning_rate": 9.245222528300866e-06, + "loss": 0.3518, + "step": 10084 + }, + { + "epoch": 0.20188674523934638, + "grad_norm": 1.046373963356018, + "learning_rate": 9.245051246582333e-06, + "loss": 0.2998, + "step": 10085 + }, + { + "epoch": 0.20190676375647473, + "grad_norm": 1.1563196182250977, + "learning_rate": 9.24487994701852e-06, + "loss": 0.2996, + "step": 10086 + }, + { + "epoch": 0.20192678227360308, + "grad_norm": 1.0715749263763428, + "learning_rate": 9.244708629610144e-06, + "loss": 0.3207, + "step": 10087 + }, + { + "epoch": 0.20194680079073143, + "grad_norm": 1.1905035972595215, + "learning_rate": 9.244537294357924e-06, + "loss": 0.3563, + "step": 10088 + }, + { + "epoch": 0.20196681930785976, + "grad_norm": 1.143599510192871, + "learning_rate": 9.244365941262586e-06, + "loss": 0.3697, + "step": 10089 + }, + { + "epoch": 0.2019868378249881, + "grad_norm": 1.8433201313018799, + "learning_rate": 9.244194570324846e-06, + "loss": 0.8583, + "step": 10090 + }, + { + "epoch": 0.20200685634211646, + "grad_norm": 1.0166462659835815, + "learning_rate": 9.244023181545427e-06, + "loss": 0.3141, + "step": 10091 + }, + { + "epoch": 0.2020268748592448, + "grad_norm": 1.0701943635940552, + "learning_rate": 9.243851774925045e-06, + "loss": 0.3091, + "step": 10092 + }, + { + "epoch": 0.20204689337637313, + "grad_norm": 1.2150137424468994, + "learning_rate": 9.243680350464425e-06, + "loss": 0.2989, + "step": 10093 + }, + { + "epoch": 0.20206691189350148, + "grad_norm": 1.9396603107452393, + "learning_rate": 9.243508908164287e-06, + "loss": 0.8893, + "step": 10094 + }, + { + "epoch": 0.20208693041062983, + "grad_norm": 1.3343857526779175, + "learning_rate": 9.24333744802535e-06, + "loss": 0.3481, + "step": 10095 + }, + { + "epoch": 0.20210694892775818, + "grad_norm": 1.2074790000915527, + "learning_rate": 9.243165970048334e-06, + "loss": 0.3769, + "step": 10096 + }, + { + "epoch": 0.2021269674448865, + "grad_norm": 1.3304412364959717, + "learning_rate": 9.242994474233962e-06, + "loss": 0.361, + "step": 10097 + }, + { + "epoch": 0.20214698596201486, + "grad_norm": 1.1605461835861206, + "learning_rate": 9.242822960582956e-06, + "loss": 0.3689, + "step": 10098 + }, + { + "epoch": 0.2021670044791432, + "grad_norm": 1.1482418775558472, + "learning_rate": 9.242651429096034e-06, + "loss": 0.3476, + "step": 10099 + }, + { + "epoch": 0.20218702299627156, + "grad_norm": 1.289986252784729, + "learning_rate": 9.242479879773917e-06, + "loss": 0.3312, + "step": 10100 + }, + { + "epoch": 0.20220704151339988, + "grad_norm": 0.9776738286018372, + "learning_rate": 9.24230831261733e-06, + "loss": 0.3067, + "step": 10101 + }, + { + "epoch": 0.20222706003052823, + "grad_norm": 1.1121506690979004, + "learning_rate": 9.242136727626991e-06, + "loss": 0.3067, + "step": 10102 + }, + { + "epoch": 0.20224707854765658, + "grad_norm": 1.0755380392074585, + "learning_rate": 9.241965124803622e-06, + "loss": 0.2897, + "step": 10103 + }, + { + "epoch": 0.20226709706478493, + "grad_norm": 1.2707542181015015, + "learning_rate": 9.241793504147945e-06, + "loss": 0.3007, + "step": 10104 + }, + { + "epoch": 0.20228711558191326, + "grad_norm": 1.1206610202789307, + "learning_rate": 9.241621865660681e-06, + "loss": 0.322, + "step": 10105 + }, + { + "epoch": 0.2023071340990416, + "grad_norm": 1.0867027044296265, + "learning_rate": 9.24145020934255e-06, + "loss": 0.3454, + "step": 10106 + }, + { + "epoch": 0.20232715261616996, + "grad_norm": 1.1502076387405396, + "learning_rate": 9.241278535194277e-06, + "loss": 0.2917, + "step": 10107 + }, + { + "epoch": 0.2023471711332983, + "grad_norm": 1.0424436330795288, + "learning_rate": 9.241106843216581e-06, + "loss": 0.33, + "step": 10108 + }, + { + "epoch": 0.20236718965042663, + "grad_norm": 1.1273841857910156, + "learning_rate": 9.240935133410185e-06, + "loss": 0.3187, + "step": 10109 + }, + { + "epoch": 0.20238720816755498, + "grad_norm": 1.3972605466842651, + "learning_rate": 9.240763405775809e-06, + "loss": 0.3329, + "step": 10110 + }, + { + "epoch": 0.20240722668468333, + "grad_norm": 1.3740278482437134, + "learning_rate": 9.240591660314178e-06, + "loss": 0.3191, + "step": 10111 + }, + { + "epoch": 0.20242724520181168, + "grad_norm": 1.105674147605896, + "learning_rate": 9.24041989702601e-06, + "loss": 0.3075, + "step": 10112 + }, + { + "epoch": 0.20244726371894, + "grad_norm": 1.1739884614944458, + "learning_rate": 9.240248115912032e-06, + "loss": 0.3045, + "step": 10113 + }, + { + "epoch": 0.20246728223606836, + "grad_norm": 1.1832125186920166, + "learning_rate": 9.240076316972962e-06, + "loss": 0.3657, + "step": 10114 + }, + { + "epoch": 0.2024873007531967, + "grad_norm": 2.0604255199432373, + "learning_rate": 9.239904500209522e-06, + "loss": 0.8356, + "step": 10115 + }, + { + "epoch": 0.20250731927032506, + "grad_norm": 1.200481653213501, + "learning_rate": 9.239732665622439e-06, + "loss": 0.3224, + "step": 10116 + }, + { + "epoch": 0.20252733778745338, + "grad_norm": 1.0908550024032593, + "learning_rate": 9.23956081321243e-06, + "loss": 0.3079, + "step": 10117 + }, + { + "epoch": 0.20254735630458173, + "grad_norm": 0.9445507526397705, + "learning_rate": 9.23938894298022e-06, + "loss": 0.2875, + "step": 10118 + }, + { + "epoch": 0.20256737482171008, + "grad_norm": 1.1222883462905884, + "learning_rate": 9.239217054926532e-06, + "loss": 0.3255, + "step": 10119 + }, + { + "epoch": 0.20258739333883843, + "grad_norm": 1.021012306213379, + "learning_rate": 9.239045149052088e-06, + "loss": 0.3465, + "step": 10120 + }, + { + "epoch": 0.20260741185596676, + "grad_norm": 1.0573076009750366, + "learning_rate": 9.23887322535761e-06, + "loss": 0.306, + "step": 10121 + }, + { + "epoch": 0.2026274303730951, + "grad_norm": 1.1666070222854614, + "learning_rate": 9.238701283843822e-06, + "loss": 0.3776, + "step": 10122 + }, + { + "epoch": 0.20264744889022346, + "grad_norm": 1.355292558670044, + "learning_rate": 9.238529324511445e-06, + "loss": 0.3235, + "step": 10123 + }, + { + "epoch": 0.2026674674073518, + "grad_norm": 1.2759573459625244, + "learning_rate": 9.238357347361202e-06, + "loss": 0.3474, + "step": 10124 + }, + { + "epoch": 0.20268748592448013, + "grad_norm": 1.3376269340515137, + "learning_rate": 9.238185352393819e-06, + "loss": 0.3506, + "step": 10125 + }, + { + "epoch": 0.20270750444160848, + "grad_norm": 1.0907201766967773, + "learning_rate": 9.238013339610014e-06, + "loss": 0.3403, + "step": 10126 + }, + { + "epoch": 0.20272752295873683, + "grad_norm": 1.0847712755203247, + "learning_rate": 9.237841309010516e-06, + "loss": 0.3275, + "step": 10127 + }, + { + "epoch": 0.20274754147586518, + "grad_norm": 1.1632025241851807, + "learning_rate": 9.237669260596044e-06, + "loss": 0.3552, + "step": 10128 + }, + { + "epoch": 0.2027675599929935, + "grad_norm": 1.0999641418457031, + "learning_rate": 9.237497194367322e-06, + "loss": 0.3362, + "step": 10129 + }, + { + "epoch": 0.20278757851012186, + "grad_norm": 1.0021308660507202, + "learning_rate": 9.237325110325075e-06, + "loss": 0.3057, + "step": 10130 + }, + { + "epoch": 0.2028075970272502, + "grad_norm": 1.0563745498657227, + "learning_rate": 9.237153008470023e-06, + "loss": 0.2883, + "step": 10131 + }, + { + "epoch": 0.20282761554437856, + "grad_norm": 1.0726983547210693, + "learning_rate": 9.236980888802893e-06, + "loss": 0.3459, + "step": 10132 + }, + { + "epoch": 0.20284763406150688, + "grad_norm": 1.2603439092636108, + "learning_rate": 9.236808751324407e-06, + "loss": 0.3349, + "step": 10133 + }, + { + "epoch": 0.20286765257863523, + "grad_norm": 1.1191242933273315, + "learning_rate": 9.236636596035288e-06, + "loss": 0.3251, + "step": 10134 + }, + { + "epoch": 0.20288767109576358, + "grad_norm": 1.0116047859191895, + "learning_rate": 9.236464422936261e-06, + "loss": 0.3199, + "step": 10135 + }, + { + "epoch": 0.20290768961289193, + "grad_norm": 1.2136332988739014, + "learning_rate": 9.23629223202805e-06, + "loss": 0.324, + "step": 10136 + }, + { + "epoch": 0.20292770813002026, + "grad_norm": 1.0779225826263428, + "learning_rate": 9.236120023311378e-06, + "loss": 0.3479, + "step": 10137 + }, + { + "epoch": 0.2029477266471486, + "grad_norm": 1.8922204971313477, + "learning_rate": 9.235947796786967e-06, + "loss": 0.8779, + "step": 10138 + }, + { + "epoch": 0.20296774516427696, + "grad_norm": 1.2895818948745728, + "learning_rate": 9.235775552455544e-06, + "loss": 0.3734, + "step": 10139 + }, + { + "epoch": 0.2029877636814053, + "grad_norm": 1.0804928541183472, + "learning_rate": 9.235603290317834e-06, + "loss": 0.3291, + "step": 10140 + }, + { + "epoch": 0.20300778219853363, + "grad_norm": 1.0965378284454346, + "learning_rate": 9.235431010374557e-06, + "loss": 0.3936, + "step": 10141 + }, + { + "epoch": 0.20302780071566198, + "grad_norm": 1.868080973625183, + "learning_rate": 9.23525871262644e-06, + "loss": 0.8186, + "step": 10142 + }, + { + "epoch": 0.20304781923279033, + "grad_norm": 1.1004791259765625, + "learning_rate": 9.235086397074208e-06, + "loss": 0.2989, + "step": 10143 + }, + { + "epoch": 0.20306783774991868, + "grad_norm": 1.0905712842941284, + "learning_rate": 9.234914063718582e-06, + "loss": 0.3402, + "step": 10144 + }, + { + "epoch": 0.203087856267047, + "grad_norm": 1.5069215297698975, + "learning_rate": 9.23474171256029e-06, + "loss": 0.2787, + "step": 10145 + }, + { + "epoch": 0.20310787478417536, + "grad_norm": 1.144656777381897, + "learning_rate": 9.234569343600057e-06, + "loss": 0.3528, + "step": 10146 + }, + { + "epoch": 0.2031278933013037, + "grad_norm": 1.1134461164474487, + "learning_rate": 9.234396956838602e-06, + "loss": 0.3367, + "step": 10147 + }, + { + "epoch": 0.20314791181843206, + "grad_norm": 1.1415746212005615, + "learning_rate": 9.234224552276657e-06, + "loss": 0.3509, + "step": 10148 + }, + { + "epoch": 0.20316793033556038, + "grad_norm": 1.0171163082122803, + "learning_rate": 9.234052129914941e-06, + "loss": 0.3062, + "step": 10149 + }, + { + "epoch": 0.20318794885268873, + "grad_norm": 1.2113643884658813, + "learning_rate": 9.233879689754181e-06, + "loss": 0.3719, + "step": 10150 + }, + { + "epoch": 0.20320796736981708, + "grad_norm": 1.903977394104004, + "learning_rate": 9.233707231795103e-06, + "loss": 0.8512, + "step": 10151 + }, + { + "epoch": 0.20322798588694543, + "grad_norm": 1.2099788188934326, + "learning_rate": 9.23353475603843e-06, + "loss": 0.3416, + "step": 10152 + }, + { + "epoch": 0.20324800440407376, + "grad_norm": 0.942586362361908, + "learning_rate": 9.23336226248489e-06, + "loss": 0.299, + "step": 10153 + }, + { + "epoch": 0.2032680229212021, + "grad_norm": 1.0614310503005981, + "learning_rate": 9.233189751135204e-06, + "loss": 0.3238, + "step": 10154 + }, + { + "epoch": 0.20328804143833046, + "grad_norm": 1.0473048686981201, + "learning_rate": 9.233017221990101e-06, + "loss": 0.3033, + "step": 10155 + }, + { + "epoch": 0.2033080599554588, + "grad_norm": 1.44100821018219, + "learning_rate": 9.232844675050303e-06, + "loss": 0.3476, + "step": 10156 + }, + { + "epoch": 0.20332807847258713, + "grad_norm": 1.2072153091430664, + "learning_rate": 9.232672110316537e-06, + "loss": 0.305, + "step": 10157 + }, + { + "epoch": 0.20334809698971548, + "grad_norm": 1.153228521347046, + "learning_rate": 9.232499527789529e-06, + "loss": 0.3468, + "step": 10158 + }, + { + "epoch": 0.20336811550684383, + "grad_norm": 0.9560301899909973, + "learning_rate": 9.232326927470004e-06, + "loss": 0.2713, + "step": 10159 + }, + { + "epoch": 0.20338813402397218, + "grad_norm": 0.9922974109649658, + "learning_rate": 9.232154309358686e-06, + "loss": 0.3383, + "step": 10160 + }, + { + "epoch": 0.2034081525411005, + "grad_norm": 1.0465675592422485, + "learning_rate": 9.231981673456303e-06, + "loss": 0.322, + "step": 10161 + }, + { + "epoch": 0.20342817105822886, + "grad_norm": 1.4664769172668457, + "learning_rate": 9.23180901976358e-06, + "loss": 0.3131, + "step": 10162 + }, + { + "epoch": 0.2034481895753572, + "grad_norm": 1.051617980003357, + "learning_rate": 9.23163634828124e-06, + "loss": 0.3654, + "step": 10163 + }, + { + "epoch": 0.20346820809248556, + "grad_norm": 1.213036298751831, + "learning_rate": 9.231463659010015e-06, + "loss": 0.2885, + "step": 10164 + }, + { + "epoch": 0.20348822660961388, + "grad_norm": 1.795698881149292, + "learning_rate": 9.231290951950626e-06, + "loss": 0.8685, + "step": 10165 + }, + { + "epoch": 0.20350824512674223, + "grad_norm": 1.1351091861724854, + "learning_rate": 9.231118227103802e-06, + "loss": 0.377, + "step": 10166 + }, + { + "epoch": 0.20352826364387058, + "grad_norm": 1.1577543020248413, + "learning_rate": 9.230945484470264e-06, + "loss": 0.3251, + "step": 10167 + }, + { + "epoch": 0.20354828216099893, + "grad_norm": 1.4692268371582031, + "learning_rate": 9.230772724050744e-06, + "loss": 0.3418, + "step": 10168 + }, + { + "epoch": 0.20356830067812726, + "grad_norm": 1.1820958852767944, + "learning_rate": 9.230599945845966e-06, + "loss": 0.3294, + "step": 10169 + }, + { + "epoch": 0.2035883191952556, + "grad_norm": 1.083914041519165, + "learning_rate": 9.230427149856655e-06, + "loss": 0.312, + "step": 10170 + }, + { + "epoch": 0.20360833771238396, + "grad_norm": 1.0731992721557617, + "learning_rate": 9.23025433608354e-06, + "loss": 0.3118, + "step": 10171 + }, + { + "epoch": 0.2036283562295123, + "grad_norm": 1.0258835554122925, + "learning_rate": 9.230081504527346e-06, + "loss": 0.2924, + "step": 10172 + }, + { + "epoch": 0.20364837474664063, + "grad_norm": 2.0969674587249756, + "learning_rate": 9.2299086551888e-06, + "loss": 0.8581, + "step": 10173 + }, + { + "epoch": 0.20366839326376898, + "grad_norm": 0.9383949041366577, + "learning_rate": 9.229735788068627e-06, + "loss": 0.3099, + "step": 10174 + }, + { + "epoch": 0.20368841178089733, + "grad_norm": 1.1040699481964111, + "learning_rate": 9.229562903167555e-06, + "loss": 0.3201, + "step": 10175 + }, + { + "epoch": 0.20370843029802568, + "grad_norm": 0.9982942938804626, + "learning_rate": 9.229390000486313e-06, + "loss": 0.2909, + "step": 10176 + }, + { + "epoch": 0.203728448815154, + "grad_norm": 1.133649230003357, + "learning_rate": 9.229217080025623e-06, + "loss": 0.3187, + "step": 10177 + }, + { + "epoch": 0.20374846733228236, + "grad_norm": 1.0786741971969604, + "learning_rate": 9.229044141786217e-06, + "loss": 0.3383, + "step": 10178 + }, + { + "epoch": 0.2037684858494107, + "grad_norm": 1.0922728776931763, + "learning_rate": 9.228871185768818e-06, + "loss": 0.3319, + "step": 10179 + }, + { + "epoch": 0.20378850436653906, + "grad_norm": 1.026909589767456, + "learning_rate": 9.228698211974154e-06, + "loss": 0.3007, + "step": 10180 + }, + { + "epoch": 0.20380852288366738, + "grad_norm": 1.1214320659637451, + "learning_rate": 9.228525220402953e-06, + "loss": 0.336, + "step": 10181 + }, + { + "epoch": 0.20382854140079573, + "grad_norm": 1.0854641199111938, + "learning_rate": 9.228352211055943e-06, + "loss": 0.3119, + "step": 10182 + }, + { + "epoch": 0.20384855991792408, + "grad_norm": 1.1342819929122925, + "learning_rate": 9.22817918393385e-06, + "loss": 0.3613, + "step": 10183 + }, + { + "epoch": 0.20386857843505243, + "grad_norm": 1.1180336475372314, + "learning_rate": 9.2280061390374e-06, + "loss": 0.3003, + "step": 10184 + }, + { + "epoch": 0.20388859695218076, + "grad_norm": 1.1261996030807495, + "learning_rate": 9.227833076367324e-06, + "loss": 0.3281, + "step": 10185 + }, + { + "epoch": 0.2039086154693091, + "grad_norm": 1.1313756704330444, + "learning_rate": 9.227659995924347e-06, + "loss": 0.3313, + "step": 10186 + }, + { + "epoch": 0.20392863398643746, + "grad_norm": 1.124720811843872, + "learning_rate": 9.227486897709197e-06, + "loss": 0.3222, + "step": 10187 + }, + { + "epoch": 0.2039486525035658, + "grad_norm": 1.085709571838379, + "learning_rate": 9.227313781722602e-06, + "loss": 0.33, + "step": 10188 + }, + { + "epoch": 0.20396867102069413, + "grad_norm": 2.062391757965088, + "learning_rate": 9.22714064796529e-06, + "loss": 0.7789, + "step": 10189 + }, + { + "epoch": 0.20398868953782248, + "grad_norm": 1.0589711666107178, + "learning_rate": 9.226967496437989e-06, + "loss": 0.3197, + "step": 10190 + }, + { + "epoch": 0.20400870805495083, + "grad_norm": 1.0785573720932007, + "learning_rate": 9.226794327141424e-06, + "loss": 0.3252, + "step": 10191 + }, + { + "epoch": 0.20402872657207918, + "grad_norm": 1.0915486812591553, + "learning_rate": 9.226621140076327e-06, + "loss": 0.3057, + "step": 10192 + }, + { + "epoch": 0.2040487450892075, + "grad_norm": 1.1823041439056396, + "learning_rate": 9.226447935243423e-06, + "loss": 0.338, + "step": 10193 + }, + { + "epoch": 0.20406876360633586, + "grad_norm": 1.1437792778015137, + "learning_rate": 9.226274712643444e-06, + "loss": 0.3635, + "step": 10194 + }, + { + "epoch": 0.2040887821234642, + "grad_norm": 1.0476374626159668, + "learning_rate": 9.226101472277112e-06, + "loss": 0.3565, + "step": 10195 + }, + { + "epoch": 0.20410880064059256, + "grad_norm": 1.0854674577713013, + "learning_rate": 9.225928214145161e-06, + "loss": 0.2923, + "step": 10196 + }, + { + "epoch": 0.20412881915772088, + "grad_norm": 1.1604875326156616, + "learning_rate": 9.225754938248318e-06, + "loss": 0.3591, + "step": 10197 + }, + { + "epoch": 0.20414883767484923, + "grad_norm": 1.1038402318954468, + "learning_rate": 9.225581644587309e-06, + "loss": 0.3344, + "step": 10198 + }, + { + "epoch": 0.20416885619197758, + "grad_norm": 1.824371576309204, + "learning_rate": 9.225408333162864e-06, + "loss": 0.7701, + "step": 10199 + }, + { + "epoch": 0.20418887470910593, + "grad_norm": 1.8016142845153809, + "learning_rate": 9.225235003975714e-06, + "loss": 0.8899, + "step": 10200 + }, + { + "epoch": 0.20420889322623426, + "grad_norm": 1.0336889028549194, + "learning_rate": 9.225061657026583e-06, + "loss": 0.3266, + "step": 10201 + }, + { + "epoch": 0.2042289117433626, + "grad_norm": 1.0422405004501343, + "learning_rate": 9.224888292316202e-06, + "loss": 0.2968, + "step": 10202 + }, + { + "epoch": 0.20424893026049096, + "grad_norm": 1.1015902757644653, + "learning_rate": 9.224714909845302e-06, + "loss": 0.2789, + "step": 10203 + }, + { + "epoch": 0.2042689487776193, + "grad_norm": 1.1906239986419678, + "learning_rate": 9.224541509614608e-06, + "loss": 0.3181, + "step": 10204 + }, + { + "epoch": 0.20428896729474763, + "grad_norm": 0.9685229659080505, + "learning_rate": 9.224368091624852e-06, + "loss": 0.2907, + "step": 10205 + }, + { + "epoch": 0.20430898581187598, + "grad_norm": 1.0600258111953735, + "learning_rate": 9.22419465587676e-06, + "loss": 0.2817, + "step": 10206 + }, + { + "epoch": 0.20432900432900433, + "grad_norm": 1.1639081239700317, + "learning_rate": 9.224021202371063e-06, + "loss": 0.357, + "step": 10207 + }, + { + "epoch": 0.20434902284613268, + "grad_norm": 1.3804861307144165, + "learning_rate": 9.223847731108489e-06, + "loss": 0.3728, + "step": 10208 + }, + { + "epoch": 0.204369041363261, + "grad_norm": 1.144345998764038, + "learning_rate": 9.22367424208977e-06, + "loss": 0.373, + "step": 10209 + }, + { + "epoch": 0.20438905988038936, + "grad_norm": 1.1764124631881714, + "learning_rate": 9.223500735315633e-06, + "loss": 0.3029, + "step": 10210 + }, + { + "epoch": 0.2044090783975177, + "grad_norm": 1.1155484914779663, + "learning_rate": 9.223327210786808e-06, + "loss": 0.2857, + "step": 10211 + }, + { + "epoch": 0.20442909691464606, + "grad_norm": 1.0663111209869385, + "learning_rate": 9.223153668504023e-06, + "loss": 0.2831, + "step": 10212 + }, + { + "epoch": 0.20444911543177438, + "grad_norm": 1.1788278818130493, + "learning_rate": 9.222980108468008e-06, + "loss": 0.3387, + "step": 10213 + }, + { + "epoch": 0.20446913394890273, + "grad_norm": 0.9209933280944824, + "learning_rate": 9.222806530679496e-06, + "loss": 0.2705, + "step": 10214 + }, + { + "epoch": 0.20448915246603108, + "grad_norm": 1.141566514968872, + "learning_rate": 9.222632935139213e-06, + "loss": 0.3192, + "step": 10215 + }, + { + "epoch": 0.20450917098315943, + "grad_norm": 1.3557627201080322, + "learning_rate": 9.222459321847891e-06, + "loss": 0.3148, + "step": 10216 + }, + { + "epoch": 0.20452918950028776, + "grad_norm": 1.083013892173767, + "learning_rate": 9.222285690806257e-06, + "loss": 0.3563, + "step": 10217 + }, + { + "epoch": 0.2045492080174161, + "grad_norm": 1.0127400159835815, + "learning_rate": 9.222112042015044e-06, + "loss": 0.3329, + "step": 10218 + }, + { + "epoch": 0.20456922653454446, + "grad_norm": 1.1416430473327637, + "learning_rate": 9.221938375474979e-06, + "loss": 0.3578, + "step": 10219 + }, + { + "epoch": 0.2045892450516728, + "grad_norm": 1.071589708328247, + "learning_rate": 9.221764691186795e-06, + "loss": 0.3062, + "step": 10220 + }, + { + "epoch": 0.20460926356880113, + "grad_norm": 1.888027310371399, + "learning_rate": 9.221590989151221e-06, + "loss": 0.8413, + "step": 10221 + }, + { + "epoch": 0.20462928208592948, + "grad_norm": 1.0539374351501465, + "learning_rate": 9.221417269368986e-06, + "loss": 0.3156, + "step": 10222 + }, + { + "epoch": 0.20464930060305783, + "grad_norm": 1.1045329570770264, + "learning_rate": 9.22124353184082e-06, + "loss": 0.3339, + "step": 10223 + }, + { + "epoch": 0.20466931912018618, + "grad_norm": 1.1754587888717651, + "learning_rate": 9.221069776567457e-06, + "loss": 0.3433, + "step": 10224 + }, + { + "epoch": 0.2046893376373145, + "grad_norm": 1.6789039373397827, + "learning_rate": 9.220896003549625e-06, + "loss": 0.8473, + "step": 10225 + }, + { + "epoch": 0.20470935615444286, + "grad_norm": 1.0517959594726562, + "learning_rate": 9.220722212788054e-06, + "loss": 0.3407, + "step": 10226 + }, + { + "epoch": 0.2047293746715712, + "grad_norm": 1.3475054502487183, + "learning_rate": 9.220548404283473e-06, + "loss": 0.3739, + "step": 10227 + }, + { + "epoch": 0.20474939318869956, + "grad_norm": 1.0535223484039307, + "learning_rate": 9.220374578036618e-06, + "loss": 0.3218, + "step": 10228 + }, + { + "epoch": 0.20476941170582788, + "grad_norm": 1.0047190189361572, + "learning_rate": 9.220200734048215e-06, + "loss": 0.2568, + "step": 10229 + }, + { + "epoch": 0.20478943022295623, + "grad_norm": 1.1075921058654785, + "learning_rate": 9.220026872318998e-06, + "loss": 0.3065, + "step": 10230 + }, + { + "epoch": 0.20480944874008458, + "grad_norm": 1.6095036268234253, + "learning_rate": 9.219852992849695e-06, + "loss": 0.3394, + "step": 10231 + }, + { + "epoch": 0.20482946725721293, + "grad_norm": 1.214214563369751, + "learning_rate": 9.219679095641036e-06, + "loss": 0.3282, + "step": 10232 + }, + { + "epoch": 0.20484948577434126, + "grad_norm": 2.0443155765533447, + "learning_rate": 9.219505180693755e-06, + "loss": 0.7859, + "step": 10233 + }, + { + "epoch": 0.2048695042914696, + "grad_norm": 1.8901625871658325, + "learning_rate": 9.219331248008585e-06, + "loss": 0.7826, + "step": 10234 + }, + { + "epoch": 0.20488952280859796, + "grad_norm": 1.1292510032653809, + "learning_rate": 9.219157297586251e-06, + "loss": 0.3903, + "step": 10235 + }, + { + "epoch": 0.2049095413257263, + "grad_norm": 1.1537697315216064, + "learning_rate": 9.21898332942749e-06, + "loss": 0.3217, + "step": 10236 + }, + { + "epoch": 0.20492955984285463, + "grad_norm": 1.0872056484222412, + "learning_rate": 9.218809343533031e-06, + "loss": 0.383, + "step": 10237 + }, + { + "epoch": 0.20494957835998298, + "grad_norm": 1.1601405143737793, + "learning_rate": 9.218635339903604e-06, + "loss": 0.3049, + "step": 10238 + }, + { + "epoch": 0.20496959687711133, + "grad_norm": 1.0466126203536987, + "learning_rate": 9.218461318539943e-06, + "loss": 0.2889, + "step": 10239 + }, + { + "epoch": 0.20498961539423968, + "grad_norm": 1.1254316568374634, + "learning_rate": 9.218287279442778e-06, + "loss": 0.3224, + "step": 10240 + }, + { + "epoch": 0.205009633911368, + "grad_norm": 1.0822169780731201, + "learning_rate": 9.218113222612842e-06, + "loss": 0.3274, + "step": 10241 + }, + { + "epoch": 0.20502965242849636, + "grad_norm": 1.2695144414901733, + "learning_rate": 9.217939148050865e-06, + "loss": 0.3369, + "step": 10242 + }, + { + "epoch": 0.2050496709456247, + "grad_norm": 1.1122488975524902, + "learning_rate": 9.21776505575758e-06, + "loss": 0.3473, + "step": 10243 + }, + { + "epoch": 0.20506968946275306, + "grad_norm": 0.9819684624671936, + "learning_rate": 9.217590945733716e-06, + "loss": 0.3407, + "step": 10244 + }, + { + "epoch": 0.20508970797988138, + "grad_norm": 0.9922499060630798, + "learning_rate": 9.21741681798001e-06, + "loss": 0.3028, + "step": 10245 + }, + { + "epoch": 0.20510972649700973, + "grad_norm": 1.0548985004425049, + "learning_rate": 9.21724267249719e-06, + "loss": 0.3123, + "step": 10246 + }, + { + "epoch": 0.20512974501413808, + "grad_norm": 1.0908018350601196, + "learning_rate": 9.21706850928599e-06, + "loss": 0.3578, + "step": 10247 + }, + { + "epoch": 0.20514976353126643, + "grad_norm": 1.0677204132080078, + "learning_rate": 9.216894328347142e-06, + "loss": 0.3685, + "step": 10248 + }, + { + "epoch": 0.20516978204839476, + "grad_norm": 1.2270373106002808, + "learning_rate": 9.216720129681377e-06, + "loss": 0.3386, + "step": 10249 + }, + { + "epoch": 0.2051898005655231, + "grad_norm": 1.1564449071884155, + "learning_rate": 9.216545913289427e-06, + "loss": 0.3383, + "step": 10250 + }, + { + "epoch": 0.20520981908265146, + "grad_norm": 1.066683292388916, + "learning_rate": 9.216371679172026e-06, + "loss": 0.3477, + "step": 10251 + }, + { + "epoch": 0.2052298375997798, + "grad_norm": 1.149986982345581, + "learning_rate": 9.216197427329908e-06, + "loss": 0.3225, + "step": 10252 + }, + { + "epoch": 0.20524985611690813, + "grad_norm": 1.842989444732666, + "learning_rate": 9.2160231577638e-06, + "loss": 0.8407, + "step": 10253 + }, + { + "epoch": 0.20526987463403648, + "grad_norm": 1.970654845237732, + "learning_rate": 9.21584887047444e-06, + "loss": 0.8243, + "step": 10254 + }, + { + "epoch": 0.20528989315116483, + "grad_norm": 1.11310613155365, + "learning_rate": 9.215674565462561e-06, + "loss": 0.3954, + "step": 10255 + }, + { + "epoch": 0.20530991166829318, + "grad_norm": 1.1007325649261475, + "learning_rate": 9.215500242728889e-06, + "loss": 0.2775, + "step": 10256 + }, + { + "epoch": 0.2053299301854215, + "grad_norm": 1.05418062210083, + "learning_rate": 9.215325902274162e-06, + "loss": 0.3178, + "step": 10257 + }, + { + "epoch": 0.20534994870254986, + "grad_norm": 1.2473381757736206, + "learning_rate": 9.215151544099114e-06, + "loss": 0.3487, + "step": 10258 + }, + { + "epoch": 0.2053699672196782, + "grad_norm": 1.0930122137069702, + "learning_rate": 9.214977168204475e-06, + "loss": 0.325, + "step": 10259 + }, + { + "epoch": 0.20538998573680656, + "grad_norm": 1.1073105335235596, + "learning_rate": 9.214802774590979e-06, + "loss": 0.3103, + "step": 10260 + }, + { + "epoch": 0.20541000425393488, + "grad_norm": 1.1491726636886597, + "learning_rate": 9.214628363259358e-06, + "loss": 0.3136, + "step": 10261 + }, + { + "epoch": 0.20543002277106323, + "grad_norm": 1.063636064529419, + "learning_rate": 9.214453934210348e-06, + "loss": 0.2893, + "step": 10262 + }, + { + "epoch": 0.20545004128819158, + "grad_norm": 1.07569420337677, + "learning_rate": 9.21427948744468e-06, + "loss": 0.3156, + "step": 10263 + }, + { + "epoch": 0.20547005980531993, + "grad_norm": 1.1346455812454224, + "learning_rate": 9.214105022963086e-06, + "loss": 0.3691, + "step": 10264 + }, + { + "epoch": 0.20549007832244826, + "grad_norm": 1.1606605052947998, + "learning_rate": 9.213930540766305e-06, + "loss": 0.354, + "step": 10265 + }, + { + "epoch": 0.2055100968395766, + "grad_norm": 1.2281309366226196, + "learning_rate": 9.213756040855065e-06, + "loss": 0.3475, + "step": 10266 + }, + { + "epoch": 0.20553011535670496, + "grad_norm": 1.0692782402038574, + "learning_rate": 9.213581523230103e-06, + "loss": 0.3223, + "step": 10267 + }, + { + "epoch": 0.2055501338738333, + "grad_norm": 1.1344308853149414, + "learning_rate": 9.213406987892148e-06, + "loss": 0.3596, + "step": 10268 + }, + { + "epoch": 0.20557015239096163, + "grad_norm": 1.1035354137420654, + "learning_rate": 9.213232434841939e-06, + "loss": 0.3608, + "step": 10269 + }, + { + "epoch": 0.20559017090808998, + "grad_norm": 1.2628955841064453, + "learning_rate": 9.213057864080206e-06, + "loss": 0.3755, + "step": 10270 + }, + { + "epoch": 0.20561018942521833, + "grad_norm": 1.166751503944397, + "learning_rate": 9.212883275607685e-06, + "loss": 0.3285, + "step": 10271 + }, + { + "epoch": 0.20563020794234668, + "grad_norm": 1.2695748805999756, + "learning_rate": 9.21270866942511e-06, + "loss": 0.3215, + "step": 10272 + }, + { + "epoch": 0.205650226459475, + "grad_norm": 1.2892638444900513, + "learning_rate": 9.212534045533215e-06, + "loss": 0.3959, + "step": 10273 + }, + { + "epoch": 0.20567024497660336, + "grad_norm": 1.1415055990219116, + "learning_rate": 9.212359403932731e-06, + "loss": 0.3672, + "step": 10274 + }, + { + "epoch": 0.2056902634937317, + "grad_norm": 1.1968249082565308, + "learning_rate": 9.212184744624396e-06, + "loss": 0.3367, + "step": 10275 + }, + { + "epoch": 0.20571028201086006, + "grad_norm": 1.1258583068847656, + "learning_rate": 9.212010067608945e-06, + "loss": 0.3097, + "step": 10276 + }, + { + "epoch": 0.20573030052798838, + "grad_norm": 1.1216250658035278, + "learning_rate": 9.211835372887108e-06, + "loss": 0.3405, + "step": 10277 + }, + { + "epoch": 0.20575031904511673, + "grad_norm": 1.1534349918365479, + "learning_rate": 9.21166066045962e-06, + "loss": 0.3585, + "step": 10278 + }, + { + "epoch": 0.20577033756224508, + "grad_norm": 1.182976484298706, + "learning_rate": 9.211485930327218e-06, + "loss": 0.3466, + "step": 10279 + }, + { + "epoch": 0.20579035607937343, + "grad_norm": 1.1520801782608032, + "learning_rate": 9.211311182490636e-06, + "loss": 0.3276, + "step": 10280 + }, + { + "epoch": 0.20581037459650176, + "grad_norm": 1.0311590433120728, + "learning_rate": 9.211136416950609e-06, + "loss": 0.2946, + "step": 10281 + }, + { + "epoch": 0.2058303931136301, + "grad_norm": 0.9886132478713989, + "learning_rate": 9.21096163370787e-06, + "loss": 0.2688, + "step": 10282 + }, + { + "epoch": 0.20585041163075846, + "grad_norm": 1.096164584159851, + "learning_rate": 9.210786832763153e-06, + "loss": 0.3745, + "step": 10283 + }, + { + "epoch": 0.2058704301478868, + "grad_norm": 1.8346264362335205, + "learning_rate": 9.210612014117196e-06, + "loss": 0.8319, + "step": 10284 + }, + { + "epoch": 0.20589044866501513, + "grad_norm": 1.1446046829223633, + "learning_rate": 9.210437177770732e-06, + "loss": 0.3614, + "step": 10285 + }, + { + "epoch": 0.20591046718214348, + "grad_norm": 1.0987097024917603, + "learning_rate": 9.210262323724496e-06, + "loss": 0.3566, + "step": 10286 + }, + { + "epoch": 0.20593048569927183, + "grad_norm": 1.035478949546814, + "learning_rate": 9.210087451979223e-06, + "loss": 0.3361, + "step": 10287 + }, + { + "epoch": 0.20595050421640018, + "grad_norm": 1.1677570343017578, + "learning_rate": 9.209912562535649e-06, + "loss": 0.32, + "step": 10288 + }, + { + "epoch": 0.2059705227335285, + "grad_norm": 1.0302115678787231, + "learning_rate": 9.209737655394506e-06, + "loss": 0.3439, + "step": 10289 + }, + { + "epoch": 0.20599054125065686, + "grad_norm": 1.1460946798324585, + "learning_rate": 9.209562730556535e-06, + "loss": 0.3797, + "step": 10290 + }, + { + "epoch": 0.2060105597677852, + "grad_norm": 1.0497550964355469, + "learning_rate": 9.209387788022469e-06, + "loss": 0.3331, + "step": 10291 + }, + { + "epoch": 0.20603057828491356, + "grad_norm": 1.075548768043518, + "learning_rate": 9.209212827793039e-06, + "loss": 0.3276, + "step": 10292 + }, + { + "epoch": 0.20605059680204188, + "grad_norm": 1.1306062936782837, + "learning_rate": 9.209037849868987e-06, + "loss": 0.3118, + "step": 10293 + }, + { + "epoch": 0.20607061531917023, + "grad_norm": 1.0118027925491333, + "learning_rate": 9.208862854251044e-06, + "loss": 0.3398, + "step": 10294 + }, + { + "epoch": 0.20609063383629858, + "grad_norm": 1.28236985206604, + "learning_rate": 9.208687840939948e-06, + "loss": 0.3561, + "step": 10295 + }, + { + "epoch": 0.20611065235342693, + "grad_norm": 1.1416337490081787, + "learning_rate": 9.208512809936434e-06, + "loss": 0.3498, + "step": 10296 + }, + { + "epoch": 0.20613067087055525, + "grad_norm": 1.2803945541381836, + "learning_rate": 9.208337761241238e-06, + "loss": 0.3509, + "step": 10297 + }, + { + "epoch": 0.2061506893876836, + "grad_norm": 1.0854971408843994, + "learning_rate": 9.208162694855095e-06, + "loss": 0.3077, + "step": 10298 + }, + { + "epoch": 0.20617070790481196, + "grad_norm": 1.086237907409668, + "learning_rate": 9.207987610778741e-06, + "loss": 0.3418, + "step": 10299 + }, + { + "epoch": 0.2061907264219403, + "grad_norm": 1.2626709938049316, + "learning_rate": 9.207812509012914e-06, + "loss": 0.326, + "step": 10300 + }, + { + "epoch": 0.20621074493906863, + "grad_norm": 1.9421111345291138, + "learning_rate": 9.207637389558348e-06, + "loss": 0.8566, + "step": 10301 + }, + { + "epoch": 0.20623076345619698, + "grad_norm": 1.9621607065200806, + "learning_rate": 9.207462252415782e-06, + "loss": 0.9221, + "step": 10302 + }, + { + "epoch": 0.20625078197332533, + "grad_norm": 1.011940836906433, + "learning_rate": 9.207287097585948e-06, + "loss": 0.3347, + "step": 10303 + }, + { + "epoch": 0.20627080049045368, + "grad_norm": 1.2436347007751465, + "learning_rate": 9.207111925069585e-06, + "loss": 0.3599, + "step": 10304 + }, + { + "epoch": 0.206290819007582, + "grad_norm": 1.1479798555374146, + "learning_rate": 9.206936734867428e-06, + "loss": 0.3407, + "step": 10305 + }, + { + "epoch": 0.20631083752471036, + "grad_norm": 1.1918541193008423, + "learning_rate": 9.206761526980216e-06, + "loss": 0.3497, + "step": 10306 + }, + { + "epoch": 0.2063308560418387, + "grad_norm": 1.1277815103530884, + "learning_rate": 9.206586301408683e-06, + "loss": 0.3095, + "step": 10307 + }, + { + "epoch": 0.20635087455896706, + "grad_norm": 1.0548670291900635, + "learning_rate": 9.206411058153566e-06, + "loss": 0.3175, + "step": 10308 + }, + { + "epoch": 0.20637089307609538, + "grad_norm": 1.0862610340118408, + "learning_rate": 9.206235797215603e-06, + "loss": 0.3027, + "step": 10309 + }, + { + "epoch": 0.20639091159322373, + "grad_norm": 1.0208393335342407, + "learning_rate": 9.20606051859553e-06, + "loss": 0.3141, + "step": 10310 + }, + { + "epoch": 0.20641093011035208, + "grad_norm": 1.0583839416503906, + "learning_rate": 9.205885222294082e-06, + "loss": 0.3205, + "step": 10311 + }, + { + "epoch": 0.20643094862748043, + "grad_norm": 1.0617507696151733, + "learning_rate": 9.205709908312e-06, + "loss": 0.337, + "step": 10312 + }, + { + "epoch": 0.20645096714460875, + "grad_norm": 1.1175267696380615, + "learning_rate": 9.205534576650017e-06, + "loss": 0.3093, + "step": 10313 + }, + { + "epoch": 0.2064709856617371, + "grad_norm": 1.120284080505371, + "learning_rate": 9.205359227308872e-06, + "loss": 0.3295, + "step": 10314 + }, + { + "epoch": 0.20649100417886546, + "grad_norm": 1.0485568046569824, + "learning_rate": 9.205183860289302e-06, + "loss": 0.3436, + "step": 10315 + }, + { + "epoch": 0.2065110226959938, + "grad_norm": 1.9451888799667358, + "learning_rate": 9.205008475592043e-06, + "loss": 0.8094, + "step": 10316 + }, + { + "epoch": 0.20653104121312213, + "grad_norm": 1.0655361413955688, + "learning_rate": 9.204833073217835e-06, + "loss": 0.32, + "step": 10317 + }, + { + "epoch": 0.20655105973025048, + "grad_norm": 1.1968109607696533, + "learning_rate": 9.204657653167413e-06, + "loss": 0.3462, + "step": 10318 + }, + { + "epoch": 0.20657107824737883, + "grad_norm": 1.8396344184875488, + "learning_rate": 9.204482215441514e-06, + "loss": 0.8356, + "step": 10319 + }, + { + "epoch": 0.20659109676450718, + "grad_norm": 1.1494803428649902, + "learning_rate": 9.204306760040877e-06, + "loss": 0.3162, + "step": 10320 + }, + { + "epoch": 0.2066111152816355, + "grad_norm": 1.8297499418258667, + "learning_rate": 9.204131286966242e-06, + "loss": 0.8346, + "step": 10321 + }, + { + "epoch": 0.20663113379876386, + "grad_norm": 1.1703884601593018, + "learning_rate": 9.20395579621834e-06, + "loss": 0.3373, + "step": 10322 + }, + { + "epoch": 0.2066511523158922, + "grad_norm": 1.0521156787872314, + "learning_rate": 9.203780287797914e-06, + "loss": 0.3372, + "step": 10323 + }, + { + "epoch": 0.20667117083302056, + "grad_norm": 1.910956859588623, + "learning_rate": 9.203604761705701e-06, + "loss": 0.7627, + "step": 10324 + }, + { + "epoch": 0.20669118935014888, + "grad_norm": 1.0885401964187622, + "learning_rate": 9.203429217942438e-06, + "loss": 0.3486, + "step": 10325 + }, + { + "epoch": 0.20671120786727723, + "grad_norm": 0.9945553541183472, + "learning_rate": 9.203253656508863e-06, + "loss": 0.3463, + "step": 10326 + }, + { + "epoch": 0.20673122638440558, + "grad_norm": 1.0864285230636597, + "learning_rate": 9.203078077405714e-06, + "loss": 0.3612, + "step": 10327 + }, + { + "epoch": 0.20675124490153393, + "grad_norm": 1.8983513116836548, + "learning_rate": 9.202902480633728e-06, + "loss": 0.8228, + "step": 10328 + }, + { + "epoch": 0.20677126341866225, + "grad_norm": 1.1715255975723267, + "learning_rate": 9.202726866193646e-06, + "loss": 0.3558, + "step": 10329 + }, + { + "epoch": 0.2067912819357906, + "grad_norm": 1.3094916343688965, + "learning_rate": 9.202551234086205e-06, + "loss": 0.4357, + "step": 10330 + }, + { + "epoch": 0.20681130045291896, + "grad_norm": 1.1039901971817017, + "learning_rate": 9.202375584312142e-06, + "loss": 0.3558, + "step": 10331 + }, + { + "epoch": 0.2068313189700473, + "grad_norm": 1.1091537475585938, + "learning_rate": 9.202199916872196e-06, + "loss": 0.2802, + "step": 10332 + }, + { + "epoch": 0.20685133748717563, + "grad_norm": 1.2848517894744873, + "learning_rate": 9.202024231767108e-06, + "loss": 0.2738, + "step": 10333 + }, + { + "epoch": 0.20687135600430398, + "grad_norm": 1.087033748626709, + "learning_rate": 9.201848528997612e-06, + "loss": 0.314, + "step": 10334 + }, + { + "epoch": 0.20689137452143233, + "grad_norm": 1.1515867710113525, + "learning_rate": 9.20167280856445e-06, + "loss": 0.3453, + "step": 10335 + }, + { + "epoch": 0.20691139303856068, + "grad_norm": 1.1443328857421875, + "learning_rate": 9.20149707046836e-06, + "loss": 0.3263, + "step": 10336 + }, + { + "epoch": 0.206931411555689, + "grad_norm": 1.0081201791763306, + "learning_rate": 9.20132131471008e-06, + "loss": 0.3455, + "step": 10337 + }, + { + "epoch": 0.20695143007281736, + "grad_norm": 1.0919601917266846, + "learning_rate": 9.20114554129035e-06, + "loss": 0.344, + "step": 10338 + }, + { + "epoch": 0.2069714485899457, + "grad_norm": 1.0631309747695923, + "learning_rate": 9.20096975020991e-06, + "loss": 0.343, + "step": 10339 + }, + { + "epoch": 0.20699146710707406, + "grad_norm": 1.3194853067398071, + "learning_rate": 9.200793941469493e-06, + "loss": 0.3172, + "step": 10340 + }, + { + "epoch": 0.20701148562420238, + "grad_norm": 1.1295442581176758, + "learning_rate": 9.200618115069847e-06, + "loss": 0.3104, + "step": 10341 + }, + { + "epoch": 0.20703150414133073, + "grad_norm": 1.2962923049926758, + "learning_rate": 9.200442271011702e-06, + "loss": 0.3285, + "step": 10342 + }, + { + "epoch": 0.20705152265845908, + "grad_norm": 2.0643465518951416, + "learning_rate": 9.200266409295804e-06, + "loss": 0.8129, + "step": 10343 + }, + { + "epoch": 0.20707154117558743, + "grad_norm": 0.9783579111099243, + "learning_rate": 9.20009052992289e-06, + "loss": 0.3122, + "step": 10344 + }, + { + "epoch": 0.20709155969271575, + "grad_norm": 1.1101467609405518, + "learning_rate": 9.199914632893699e-06, + "loss": 0.3334, + "step": 10345 + }, + { + "epoch": 0.2071115782098441, + "grad_norm": 1.0533066987991333, + "learning_rate": 9.199738718208968e-06, + "loss": 0.3057, + "step": 10346 + }, + { + "epoch": 0.20713159672697246, + "grad_norm": 1.0421836376190186, + "learning_rate": 9.199562785869442e-06, + "loss": 0.2751, + "step": 10347 + }, + { + "epoch": 0.2071516152441008, + "grad_norm": 1.583567500114441, + "learning_rate": 9.199386835875858e-06, + "loss": 0.2917, + "step": 10348 + }, + { + "epoch": 0.20717163376122913, + "grad_norm": 1.0512763261795044, + "learning_rate": 9.199210868228955e-06, + "loss": 0.3099, + "step": 10349 + }, + { + "epoch": 0.20719165227835748, + "grad_norm": 1.1475318670272827, + "learning_rate": 9.199034882929473e-06, + "loss": 0.3432, + "step": 10350 + }, + { + "epoch": 0.20721167079548583, + "grad_norm": 1.2191983461380005, + "learning_rate": 9.198858879978153e-06, + "loss": 0.3323, + "step": 10351 + }, + { + "epoch": 0.20723168931261418, + "grad_norm": 1.061420202255249, + "learning_rate": 9.198682859375731e-06, + "loss": 0.2888, + "step": 10352 + }, + { + "epoch": 0.2072517078297425, + "grad_norm": 1.042993187904358, + "learning_rate": 9.198506821122953e-06, + "loss": 0.3254, + "step": 10353 + }, + { + "epoch": 0.20727172634687085, + "grad_norm": 1.8871203660964966, + "learning_rate": 9.198330765220554e-06, + "loss": 0.8468, + "step": 10354 + }, + { + "epoch": 0.2072917448639992, + "grad_norm": 1.0896726846694946, + "learning_rate": 9.198154691669277e-06, + "loss": 0.3114, + "step": 10355 + }, + { + "epoch": 0.20731176338112756, + "grad_norm": 1.0082679986953735, + "learning_rate": 9.19797860046986e-06, + "loss": 0.2925, + "step": 10356 + }, + { + "epoch": 0.20733178189825588, + "grad_norm": 1.898139238357544, + "learning_rate": 9.197802491623046e-06, + "loss": 0.8264, + "step": 10357 + }, + { + "epoch": 0.20735180041538423, + "grad_norm": 1.0988343954086304, + "learning_rate": 9.197626365129572e-06, + "loss": 0.3237, + "step": 10358 + }, + { + "epoch": 0.20737181893251258, + "grad_norm": 1.1674216985702515, + "learning_rate": 9.19745022099018e-06, + "loss": 0.2966, + "step": 10359 + }, + { + "epoch": 0.20739183744964093, + "grad_norm": 1.1139081716537476, + "learning_rate": 9.197274059205612e-06, + "loss": 0.311, + "step": 10360 + }, + { + "epoch": 0.20741185596676925, + "grad_norm": 1.1589887142181396, + "learning_rate": 9.197097879776606e-06, + "loss": 0.3392, + "step": 10361 + }, + { + "epoch": 0.2074318744838976, + "grad_norm": 1.0351486206054688, + "learning_rate": 9.196921682703906e-06, + "loss": 0.3408, + "step": 10362 + }, + { + "epoch": 0.20745189300102596, + "grad_norm": 1.2893574237823486, + "learning_rate": 9.196745467988249e-06, + "loss": 0.3301, + "step": 10363 + }, + { + "epoch": 0.2074719115181543, + "grad_norm": 1.4429072141647339, + "learning_rate": 9.196569235630378e-06, + "loss": 0.3163, + "step": 10364 + }, + { + "epoch": 0.20749193003528263, + "grad_norm": 1.1826575994491577, + "learning_rate": 9.196392985631031e-06, + "loss": 0.3118, + "step": 10365 + }, + { + "epoch": 0.20751194855241098, + "grad_norm": 1.2106091976165771, + "learning_rate": 9.196216717990951e-06, + "loss": 0.3703, + "step": 10366 + }, + { + "epoch": 0.20753196706953933, + "grad_norm": 1.1290909051895142, + "learning_rate": 9.19604043271088e-06, + "loss": 0.3288, + "step": 10367 + }, + { + "epoch": 0.20755198558666768, + "grad_norm": 1.220089316368103, + "learning_rate": 9.19586412979156e-06, + "loss": 0.3619, + "step": 10368 + }, + { + "epoch": 0.207572004103796, + "grad_norm": 1.124098539352417, + "learning_rate": 9.195687809233726e-06, + "loss": 0.3335, + "step": 10369 + }, + { + "epoch": 0.20759202262092435, + "grad_norm": 1.1581352949142456, + "learning_rate": 9.195511471038126e-06, + "loss": 0.3786, + "step": 10370 + }, + { + "epoch": 0.2076120411380527, + "grad_norm": 0.9976120591163635, + "learning_rate": 9.195335115205497e-06, + "loss": 0.2988, + "step": 10371 + }, + { + "epoch": 0.20763205965518106, + "grad_norm": 1.123876929283142, + "learning_rate": 9.195158741736584e-06, + "loss": 0.331, + "step": 10372 + }, + { + "epoch": 0.20765207817230938, + "grad_norm": 1.870271921157837, + "learning_rate": 9.194982350632126e-06, + "loss": 0.8123, + "step": 10373 + }, + { + "epoch": 0.20767209668943773, + "grad_norm": 0.990347683429718, + "learning_rate": 9.194805941892864e-06, + "loss": 0.3504, + "step": 10374 + }, + { + "epoch": 0.20769211520656608, + "grad_norm": 1.0263121128082275, + "learning_rate": 9.194629515519542e-06, + "loss": 0.3175, + "step": 10375 + }, + { + "epoch": 0.20771213372369443, + "grad_norm": 1.1424765586853027, + "learning_rate": 9.194453071512899e-06, + "loss": 0.3401, + "step": 10376 + }, + { + "epoch": 0.20773215224082275, + "grad_norm": 1.080376386642456, + "learning_rate": 9.194276609873678e-06, + "loss": 0.3166, + "step": 10377 + }, + { + "epoch": 0.2077521707579511, + "grad_norm": 1.2157952785491943, + "learning_rate": 9.194100130602622e-06, + "loss": 0.3024, + "step": 10378 + }, + { + "epoch": 0.20777218927507946, + "grad_norm": 1.1277222633361816, + "learning_rate": 9.193923633700469e-06, + "loss": 0.3001, + "step": 10379 + }, + { + "epoch": 0.2077922077922078, + "grad_norm": 1.231005311012268, + "learning_rate": 9.193747119167966e-06, + "loss": 0.3427, + "step": 10380 + }, + { + "epoch": 0.20781222630933613, + "grad_norm": 1.0093778371810913, + "learning_rate": 9.193570587005851e-06, + "loss": 0.2984, + "step": 10381 + }, + { + "epoch": 0.20783224482646448, + "grad_norm": 1.8046903610229492, + "learning_rate": 9.19339403721487e-06, + "loss": 0.8498, + "step": 10382 + }, + { + "epoch": 0.20785226334359283, + "grad_norm": 1.1050105094909668, + "learning_rate": 9.19321746979576e-06, + "loss": 0.3253, + "step": 10383 + }, + { + "epoch": 0.20787228186072118, + "grad_norm": 1.2248620986938477, + "learning_rate": 9.193040884749269e-06, + "loss": 0.3587, + "step": 10384 + }, + { + "epoch": 0.2078923003778495, + "grad_norm": 0.9721242189407349, + "learning_rate": 9.192864282076135e-06, + "loss": 0.3255, + "step": 10385 + }, + { + "epoch": 0.20791231889497785, + "grad_norm": 1.1248340606689453, + "learning_rate": 9.192687661777101e-06, + "loss": 0.3081, + "step": 10386 + }, + { + "epoch": 0.2079323374121062, + "grad_norm": 1.1264445781707764, + "learning_rate": 9.192511023852911e-06, + "loss": 0.3972, + "step": 10387 + }, + { + "epoch": 0.20795235592923456, + "grad_norm": 1.0517915487289429, + "learning_rate": 9.192334368304308e-06, + "loss": 0.2588, + "step": 10388 + }, + { + "epoch": 0.20797237444636288, + "grad_norm": 1.008948564529419, + "learning_rate": 9.192157695132032e-06, + "loss": 0.2834, + "step": 10389 + }, + { + "epoch": 0.20799239296349123, + "grad_norm": 1.711674451828003, + "learning_rate": 9.191981004336828e-06, + "loss": 0.8197, + "step": 10390 + }, + { + "epoch": 0.20801241148061958, + "grad_norm": 1.0545340776443481, + "learning_rate": 9.191804295919437e-06, + "loss": 0.3584, + "step": 10391 + }, + { + "epoch": 0.2080324299977479, + "grad_norm": 1.197544813156128, + "learning_rate": 9.191627569880605e-06, + "loss": 0.3525, + "step": 10392 + }, + { + "epoch": 0.20805244851487625, + "grad_norm": 1.07572340965271, + "learning_rate": 9.191450826221072e-06, + "loss": 0.2939, + "step": 10393 + }, + { + "epoch": 0.2080724670320046, + "grad_norm": 1.1567240953445435, + "learning_rate": 9.191274064941579e-06, + "loss": 0.3233, + "step": 10394 + }, + { + "epoch": 0.20809248554913296, + "grad_norm": 1.3136587142944336, + "learning_rate": 9.191097286042876e-06, + "loss": 0.3491, + "step": 10395 + }, + { + "epoch": 0.20811250406626128, + "grad_norm": 1.0940358638763428, + "learning_rate": 9.1909204895257e-06, + "loss": 0.3466, + "step": 10396 + }, + { + "epoch": 0.20813252258338963, + "grad_norm": 1.2114722728729248, + "learning_rate": 9.190743675390795e-06, + "loss": 0.3274, + "step": 10397 + }, + { + "epoch": 0.20815254110051798, + "grad_norm": 1.0767229795455933, + "learning_rate": 9.190566843638907e-06, + "loss": 0.3461, + "step": 10398 + }, + { + "epoch": 0.20817255961764633, + "grad_norm": 1.084808588027954, + "learning_rate": 9.190389994270778e-06, + "loss": 0.277, + "step": 10399 + }, + { + "epoch": 0.20819257813477465, + "grad_norm": 1.0830072164535522, + "learning_rate": 9.19021312728715e-06, + "loss": 0.2754, + "step": 10400 + }, + { + "epoch": 0.208212596651903, + "grad_norm": 1.0586042404174805, + "learning_rate": 9.190036242688768e-06, + "loss": 0.3357, + "step": 10401 + }, + { + "epoch": 0.20823261516903135, + "grad_norm": 1.1818135976791382, + "learning_rate": 9.189859340476377e-06, + "loss": 0.3118, + "step": 10402 + }, + { + "epoch": 0.2082526336861597, + "grad_norm": 1.464551329612732, + "learning_rate": 9.189682420650716e-06, + "loss": 0.3283, + "step": 10403 + }, + { + "epoch": 0.20827265220328803, + "grad_norm": 1.1048136949539185, + "learning_rate": 9.189505483212534e-06, + "loss": 0.3632, + "step": 10404 + }, + { + "epoch": 0.20829267072041638, + "grad_norm": 1.447494387626648, + "learning_rate": 9.189328528162573e-06, + "loss": 0.3579, + "step": 10405 + }, + { + "epoch": 0.20831268923754473, + "grad_norm": 1.1493196487426758, + "learning_rate": 9.189151555501575e-06, + "loss": 0.3233, + "step": 10406 + }, + { + "epoch": 0.20833270775467308, + "grad_norm": 1.8755731582641602, + "learning_rate": 9.188974565230287e-06, + "loss": 0.7882, + "step": 10407 + }, + { + "epoch": 0.2083527262718014, + "grad_norm": 1.1087855100631714, + "learning_rate": 9.188797557349449e-06, + "loss": 0.2742, + "step": 10408 + }, + { + "epoch": 0.20837274478892975, + "grad_norm": 1.383333444595337, + "learning_rate": 9.18862053185981e-06, + "loss": 0.2957, + "step": 10409 + }, + { + "epoch": 0.2083927633060581, + "grad_norm": 1.0922131538391113, + "learning_rate": 9.18844348876211e-06, + "loss": 0.3161, + "step": 10410 + }, + { + "epoch": 0.20841278182318645, + "grad_norm": 1.2050659656524658, + "learning_rate": 9.188266428057094e-06, + "loss": 0.3111, + "step": 10411 + }, + { + "epoch": 0.20843280034031478, + "grad_norm": 1.7690181732177734, + "learning_rate": 9.188089349745509e-06, + "loss": 0.8078, + "step": 10412 + }, + { + "epoch": 0.20845281885744313, + "grad_norm": 1.1643213033676147, + "learning_rate": 9.187912253828097e-06, + "loss": 0.3668, + "step": 10413 + }, + { + "epoch": 0.20847283737457148, + "grad_norm": 1.1362122297286987, + "learning_rate": 9.187735140305603e-06, + "loss": 0.3674, + "step": 10414 + }, + { + "epoch": 0.20849285589169983, + "grad_norm": 1.1735988855361938, + "learning_rate": 9.187558009178773e-06, + "loss": 0.3376, + "step": 10415 + }, + { + "epoch": 0.20851287440882815, + "grad_norm": 1.062941551208496, + "learning_rate": 9.18738086044835e-06, + "loss": 0.2908, + "step": 10416 + }, + { + "epoch": 0.2085328929259565, + "grad_norm": 1.4703161716461182, + "learning_rate": 9.187203694115076e-06, + "loss": 0.3377, + "step": 10417 + }, + { + "epoch": 0.20855291144308485, + "grad_norm": 1.1100661754608154, + "learning_rate": 9.187026510179702e-06, + "loss": 0.3129, + "step": 10418 + }, + { + "epoch": 0.2085729299602132, + "grad_norm": 1.174039602279663, + "learning_rate": 9.186849308642969e-06, + "loss": 0.3397, + "step": 10419 + }, + { + "epoch": 0.20859294847734153, + "grad_norm": 0.9795107245445251, + "learning_rate": 9.186672089505622e-06, + "loss": 0.3282, + "step": 10420 + }, + { + "epoch": 0.20861296699446988, + "grad_norm": 1.1854979991912842, + "learning_rate": 9.186494852768407e-06, + "loss": 0.3893, + "step": 10421 + }, + { + "epoch": 0.20863298551159823, + "grad_norm": 1.3044466972351074, + "learning_rate": 9.186317598432069e-06, + "loss": 0.3168, + "step": 10422 + }, + { + "epoch": 0.20865300402872658, + "grad_norm": 1.042393445968628, + "learning_rate": 9.186140326497353e-06, + "loss": 0.3191, + "step": 10423 + }, + { + "epoch": 0.2086730225458549, + "grad_norm": 1.189673662185669, + "learning_rate": 9.185963036965003e-06, + "loss": 0.255, + "step": 10424 + }, + { + "epoch": 0.20869304106298325, + "grad_norm": 1.9072753190994263, + "learning_rate": 9.185785729835764e-06, + "loss": 0.8362, + "step": 10425 + }, + { + "epoch": 0.2087130595801116, + "grad_norm": 1.2205559015274048, + "learning_rate": 9.185608405110384e-06, + "loss": 0.3434, + "step": 10426 + }, + { + "epoch": 0.20873307809723995, + "grad_norm": 1.060403823852539, + "learning_rate": 9.185431062789607e-06, + "loss": 0.277, + "step": 10427 + }, + { + "epoch": 0.20875309661436828, + "grad_norm": 1.0573015213012695, + "learning_rate": 9.185253702874179e-06, + "loss": 0.2741, + "step": 10428 + }, + { + "epoch": 0.20877311513149663, + "grad_norm": 1.199479341506958, + "learning_rate": 9.185076325364844e-06, + "loss": 0.3095, + "step": 10429 + }, + { + "epoch": 0.20879313364862498, + "grad_norm": 1.1541510820388794, + "learning_rate": 9.184898930262349e-06, + "loss": 0.3805, + "step": 10430 + }, + { + "epoch": 0.20881315216575333, + "grad_norm": 1.749720573425293, + "learning_rate": 9.184721517567439e-06, + "loss": 0.8154, + "step": 10431 + }, + { + "epoch": 0.20883317068288165, + "grad_norm": 1.1124566793441772, + "learning_rate": 9.184544087280862e-06, + "loss": 0.3274, + "step": 10432 + }, + { + "epoch": 0.20885318920001, + "grad_norm": 1.176698088645935, + "learning_rate": 9.18436663940336e-06, + "loss": 0.3161, + "step": 10433 + }, + { + "epoch": 0.20887320771713835, + "grad_norm": 1.0980011224746704, + "learning_rate": 9.184189173935682e-06, + "loss": 0.3258, + "step": 10434 + }, + { + "epoch": 0.2088932262342667, + "grad_norm": 2.0881311893463135, + "learning_rate": 9.184011690878575e-06, + "loss": 0.8898, + "step": 10435 + }, + { + "epoch": 0.20891324475139503, + "grad_norm": 1.040565848350525, + "learning_rate": 9.183834190232781e-06, + "loss": 0.3045, + "step": 10436 + }, + { + "epoch": 0.20893326326852338, + "grad_norm": 1.1185684204101562, + "learning_rate": 9.18365667199905e-06, + "loss": 0.3235, + "step": 10437 + }, + { + "epoch": 0.20895328178565173, + "grad_norm": 1.8814423084259033, + "learning_rate": 9.183479136178126e-06, + "loss": 0.8541, + "step": 10438 + }, + { + "epoch": 0.20897330030278008, + "grad_norm": 1.118491530418396, + "learning_rate": 9.183301582770757e-06, + "loss": 0.3109, + "step": 10439 + }, + { + "epoch": 0.2089933188199084, + "grad_norm": 1.0387574434280396, + "learning_rate": 9.183124011777686e-06, + "loss": 0.2991, + "step": 10440 + }, + { + "epoch": 0.20901333733703675, + "grad_norm": 1.0820621252059937, + "learning_rate": 9.182946423199664e-06, + "loss": 0.3313, + "step": 10441 + }, + { + "epoch": 0.2090333558541651, + "grad_norm": 1.2367557287216187, + "learning_rate": 9.182768817037432e-06, + "loss": 0.2898, + "step": 10442 + }, + { + "epoch": 0.20905337437129345, + "grad_norm": 1.1658005714416504, + "learning_rate": 9.182591193291744e-06, + "loss": 0.3249, + "step": 10443 + }, + { + "epoch": 0.20907339288842178, + "grad_norm": 1.2093818187713623, + "learning_rate": 9.182413551963343e-06, + "loss": 0.3202, + "step": 10444 + }, + { + "epoch": 0.20909341140555013, + "grad_norm": 1.0505290031433105, + "learning_rate": 9.182235893052973e-06, + "loss": 0.2963, + "step": 10445 + }, + { + "epoch": 0.20911342992267848, + "grad_norm": 1.0024943351745605, + "learning_rate": 9.182058216561384e-06, + "loss": 0.2786, + "step": 10446 + }, + { + "epoch": 0.20913344843980683, + "grad_norm": 1.0728833675384521, + "learning_rate": 9.181880522489322e-06, + "loss": 0.3188, + "step": 10447 + }, + { + "epoch": 0.20915346695693515, + "grad_norm": 1.0710073709487915, + "learning_rate": 9.181702810837535e-06, + "loss": 0.3052, + "step": 10448 + }, + { + "epoch": 0.2091734854740635, + "grad_norm": 1.1006872653961182, + "learning_rate": 9.181525081606768e-06, + "loss": 0.3668, + "step": 10449 + }, + { + "epoch": 0.20919350399119185, + "grad_norm": 1.0594589710235596, + "learning_rate": 9.181347334797772e-06, + "loss": 0.2664, + "step": 10450 + }, + { + "epoch": 0.2092135225083202, + "grad_norm": 1.0743751525878906, + "learning_rate": 9.181169570411289e-06, + "loss": 0.3514, + "step": 10451 + }, + { + "epoch": 0.20923354102544853, + "grad_norm": 1.1055574417114258, + "learning_rate": 9.18099178844807e-06, + "loss": 0.266, + "step": 10452 + }, + { + "epoch": 0.20925355954257688, + "grad_norm": 1.168384075164795, + "learning_rate": 9.180813988908861e-06, + "loss": 0.3138, + "step": 10453 + }, + { + "epoch": 0.20927357805970523, + "grad_norm": 1.1481635570526123, + "learning_rate": 9.180636171794411e-06, + "loss": 0.3259, + "step": 10454 + }, + { + "epoch": 0.20929359657683358, + "grad_norm": 1.0402411222457886, + "learning_rate": 9.180458337105465e-06, + "loss": 0.3054, + "step": 10455 + }, + { + "epoch": 0.2093136150939619, + "grad_norm": 1.237042784690857, + "learning_rate": 9.180280484842771e-06, + "loss": 0.3742, + "step": 10456 + }, + { + "epoch": 0.20933363361109025, + "grad_norm": 1.12618887424469, + "learning_rate": 9.180102615007078e-06, + "loss": 0.3818, + "step": 10457 + }, + { + "epoch": 0.2093536521282186, + "grad_norm": 1.0989091396331787, + "learning_rate": 9.179924727599134e-06, + "loss": 0.3455, + "step": 10458 + }, + { + "epoch": 0.20937367064534695, + "grad_norm": 2.3752803802490234, + "learning_rate": 9.179746822619686e-06, + "loss": 0.8313, + "step": 10459 + }, + { + "epoch": 0.20939368916247528, + "grad_norm": 1.0627554655075073, + "learning_rate": 9.179568900069482e-06, + "loss": 0.3183, + "step": 10460 + }, + { + "epoch": 0.20941370767960363, + "grad_norm": 1.2261366844177246, + "learning_rate": 9.179390959949268e-06, + "loss": 0.3447, + "step": 10461 + }, + { + "epoch": 0.20943372619673198, + "grad_norm": 1.215401530265808, + "learning_rate": 9.179213002259795e-06, + "loss": 0.2983, + "step": 10462 + }, + { + "epoch": 0.20945374471386033, + "grad_norm": 1.0661813020706177, + "learning_rate": 9.17903502700181e-06, + "loss": 0.3396, + "step": 10463 + }, + { + "epoch": 0.20947376323098865, + "grad_norm": 1.383752465248108, + "learning_rate": 9.17885703417606e-06, + "loss": 0.3332, + "step": 10464 + }, + { + "epoch": 0.209493781748117, + "grad_norm": 1.1838289499282837, + "learning_rate": 9.178679023783296e-06, + "loss": 0.3408, + "step": 10465 + }, + { + "epoch": 0.20951380026524535, + "grad_norm": 1.2504374980926514, + "learning_rate": 9.178500995824264e-06, + "loss": 0.3419, + "step": 10466 + }, + { + "epoch": 0.2095338187823737, + "grad_norm": 1.2038966417312622, + "learning_rate": 9.178322950299714e-06, + "loss": 0.3194, + "step": 10467 + }, + { + "epoch": 0.20955383729950203, + "grad_norm": 1.1416107416152954, + "learning_rate": 9.178144887210391e-06, + "loss": 0.3753, + "step": 10468 + }, + { + "epoch": 0.20957385581663038, + "grad_norm": 1.094916582107544, + "learning_rate": 9.177966806557049e-06, + "loss": 0.3063, + "step": 10469 + }, + { + "epoch": 0.20959387433375873, + "grad_norm": 1.0590498447418213, + "learning_rate": 9.177788708340432e-06, + "loss": 0.2983, + "step": 10470 + }, + { + "epoch": 0.20961389285088708, + "grad_norm": 1.0725020170211792, + "learning_rate": 9.177610592561291e-06, + "loss": 0.3308, + "step": 10471 + }, + { + "epoch": 0.2096339113680154, + "grad_norm": 1.221928596496582, + "learning_rate": 9.177432459220373e-06, + "loss": 0.2979, + "step": 10472 + }, + { + "epoch": 0.20965392988514375, + "grad_norm": 1.1392563581466675, + "learning_rate": 9.177254308318428e-06, + "loss": 0.3762, + "step": 10473 + }, + { + "epoch": 0.2096739484022721, + "grad_norm": 0.9655948877334595, + "learning_rate": 9.177076139856207e-06, + "loss": 0.2632, + "step": 10474 + }, + { + "epoch": 0.20969396691940045, + "grad_norm": 1.153519868850708, + "learning_rate": 9.176897953834454e-06, + "loss": 0.3276, + "step": 10475 + }, + { + "epoch": 0.20971398543652878, + "grad_norm": 1.0384917259216309, + "learning_rate": 9.176719750253924e-06, + "loss": 0.3424, + "step": 10476 + }, + { + "epoch": 0.20973400395365713, + "grad_norm": 1.116724967956543, + "learning_rate": 9.176541529115361e-06, + "loss": 0.2937, + "step": 10477 + }, + { + "epoch": 0.20975402247078548, + "grad_norm": 1.0814722776412964, + "learning_rate": 9.176363290419515e-06, + "loss": 0.3273, + "step": 10478 + }, + { + "epoch": 0.20977404098791383, + "grad_norm": 0.9823485612869263, + "learning_rate": 9.176185034167138e-06, + "loss": 0.3232, + "step": 10479 + }, + { + "epoch": 0.20979405950504215, + "grad_norm": 1.0275593996047974, + "learning_rate": 9.176006760358978e-06, + "loss": 0.3551, + "step": 10480 + }, + { + "epoch": 0.2098140780221705, + "grad_norm": 2.0342657566070557, + "learning_rate": 9.175828468995784e-06, + "loss": 0.8532, + "step": 10481 + }, + { + "epoch": 0.20983409653929885, + "grad_norm": 1.1715422868728638, + "learning_rate": 9.175650160078306e-06, + "loss": 0.3195, + "step": 10482 + }, + { + "epoch": 0.2098541150564272, + "grad_norm": 1.9081692695617676, + "learning_rate": 9.175471833607292e-06, + "loss": 0.8534, + "step": 10483 + }, + { + "epoch": 0.20987413357355553, + "grad_norm": 1.0873446464538574, + "learning_rate": 9.175293489583494e-06, + "loss": 0.3027, + "step": 10484 + }, + { + "epoch": 0.20989415209068388, + "grad_norm": 1.0792672634124756, + "learning_rate": 9.175115128007662e-06, + "loss": 0.3439, + "step": 10485 + }, + { + "epoch": 0.20991417060781223, + "grad_norm": 0.9598633646965027, + "learning_rate": 9.174936748880542e-06, + "loss": 0.312, + "step": 10486 + }, + { + "epoch": 0.20993418912494058, + "grad_norm": 1.0697683095932007, + "learning_rate": 9.174758352202888e-06, + "loss": 0.3219, + "step": 10487 + }, + { + "epoch": 0.2099542076420689, + "grad_norm": 1.0037899017333984, + "learning_rate": 9.174579937975449e-06, + "loss": 0.3433, + "step": 10488 + }, + { + "epoch": 0.20997422615919725, + "grad_norm": 1.3974783420562744, + "learning_rate": 9.174401506198974e-06, + "loss": 0.3334, + "step": 10489 + }, + { + "epoch": 0.2099942446763256, + "grad_norm": 1.1848278045654297, + "learning_rate": 9.174223056874213e-06, + "loss": 0.3439, + "step": 10490 + }, + { + "epoch": 0.21001426319345395, + "grad_norm": 1.1179907321929932, + "learning_rate": 9.174044590001915e-06, + "loss": 0.33, + "step": 10491 + }, + { + "epoch": 0.21003428171058228, + "grad_norm": 1.0968573093414307, + "learning_rate": 9.173866105582832e-06, + "loss": 0.3656, + "step": 10492 + }, + { + "epoch": 0.21005430022771063, + "grad_norm": 1.0812875032424927, + "learning_rate": 9.173687603617716e-06, + "loss": 0.3658, + "step": 10493 + }, + { + "epoch": 0.21007431874483898, + "grad_norm": 1.1656779050827026, + "learning_rate": 9.173509084107315e-06, + "loss": 0.2958, + "step": 10494 + }, + { + "epoch": 0.21009433726196733, + "grad_norm": 1.9088852405548096, + "learning_rate": 9.17333054705238e-06, + "loss": 0.8019, + "step": 10495 + }, + { + "epoch": 0.21011435577909565, + "grad_norm": 1.0928453207015991, + "learning_rate": 9.17315199245366e-06, + "loss": 0.3235, + "step": 10496 + }, + { + "epoch": 0.210134374296224, + "grad_norm": 1.1902903318405151, + "learning_rate": 9.172973420311909e-06, + "loss": 0.3487, + "step": 10497 + }, + { + "epoch": 0.21015439281335235, + "grad_norm": 1.1059372425079346, + "learning_rate": 9.172794830627874e-06, + "loss": 0.3457, + "step": 10498 + }, + { + "epoch": 0.2101744113304807, + "grad_norm": 1.124740719795227, + "learning_rate": 9.172616223402309e-06, + "loss": 0.3185, + "step": 10499 + }, + { + "epoch": 0.21019442984760903, + "grad_norm": 1.160465955734253, + "learning_rate": 9.172437598635963e-06, + "loss": 0.3318, + "step": 10500 + }, + { + "epoch": 0.21021444836473738, + "grad_norm": 1.0444087982177734, + "learning_rate": 9.172258956329586e-06, + "loss": 0.3547, + "step": 10501 + }, + { + "epoch": 0.21023446688186573, + "grad_norm": 1.6796554327011108, + "learning_rate": 9.172080296483932e-06, + "loss": 0.9376, + "step": 10502 + }, + { + "epoch": 0.21025448539899408, + "grad_norm": 1.2094558477401733, + "learning_rate": 9.17190161909975e-06, + "loss": 0.3507, + "step": 10503 + }, + { + "epoch": 0.2102745039161224, + "grad_norm": 1.0795005559921265, + "learning_rate": 9.17172292417779e-06, + "loss": 0.292, + "step": 10504 + }, + { + "epoch": 0.21029452243325075, + "grad_norm": 1.4287364482879639, + "learning_rate": 9.171544211718806e-06, + "loss": 0.3481, + "step": 10505 + }, + { + "epoch": 0.2103145409503791, + "grad_norm": 1.914048671722412, + "learning_rate": 9.171365481723549e-06, + "loss": 0.7952, + "step": 10506 + }, + { + "epoch": 0.21033455946750745, + "grad_norm": 1.2244189977645874, + "learning_rate": 9.171186734192768e-06, + "loss": 0.3216, + "step": 10507 + }, + { + "epoch": 0.21035457798463578, + "grad_norm": 1.038416862487793, + "learning_rate": 9.171007969127215e-06, + "loss": 0.3232, + "step": 10508 + }, + { + "epoch": 0.21037459650176413, + "grad_norm": 1.116821050643921, + "learning_rate": 9.17082918652764e-06, + "loss": 0.3439, + "step": 10509 + }, + { + "epoch": 0.21039461501889248, + "grad_norm": 1.1529295444488525, + "learning_rate": 9.1706503863948e-06, + "loss": 0.3749, + "step": 10510 + }, + { + "epoch": 0.21041463353602083, + "grad_norm": 1.202628254890442, + "learning_rate": 9.170471568729443e-06, + "loss": 0.3403, + "step": 10511 + }, + { + "epoch": 0.21043465205314915, + "grad_norm": 1.123460292816162, + "learning_rate": 9.17029273353232e-06, + "loss": 0.3393, + "step": 10512 + }, + { + "epoch": 0.2104546705702775, + "grad_norm": 1.0247567892074585, + "learning_rate": 9.170113880804184e-06, + "loss": 0.3022, + "step": 10513 + }, + { + "epoch": 0.21047468908740585, + "grad_norm": 1.1171679496765137, + "learning_rate": 9.169935010545788e-06, + "loss": 0.3299, + "step": 10514 + }, + { + "epoch": 0.2104947076045342, + "grad_norm": 1.0621843338012695, + "learning_rate": 9.16975612275788e-06, + "loss": 0.3478, + "step": 10515 + }, + { + "epoch": 0.21051472612166253, + "grad_norm": 1.0224827527999878, + "learning_rate": 9.169577217441216e-06, + "loss": 0.3074, + "step": 10516 + }, + { + "epoch": 0.21053474463879088, + "grad_norm": 1.1097612380981445, + "learning_rate": 9.169398294596548e-06, + "loss": 0.332, + "step": 10517 + }, + { + "epoch": 0.21055476315591923, + "grad_norm": 1.3053232431411743, + "learning_rate": 9.169219354224625e-06, + "loss": 0.345, + "step": 10518 + }, + { + "epoch": 0.21057478167304758, + "grad_norm": 1.9362661838531494, + "learning_rate": 9.169040396326202e-06, + "loss": 0.8523, + "step": 10519 + }, + { + "epoch": 0.2105948001901759, + "grad_norm": 1.094778299331665, + "learning_rate": 9.16886142090203e-06, + "loss": 0.3266, + "step": 10520 + }, + { + "epoch": 0.21061481870730425, + "grad_norm": 1.034218668937683, + "learning_rate": 9.168682427952861e-06, + "loss": 0.3316, + "step": 10521 + }, + { + "epoch": 0.2106348372244326, + "grad_norm": 1.0116297006607056, + "learning_rate": 9.16850341747945e-06, + "loss": 0.3338, + "step": 10522 + }, + { + "epoch": 0.21065485574156095, + "grad_norm": 1.1903996467590332, + "learning_rate": 9.168324389482546e-06, + "loss": 0.3318, + "step": 10523 + }, + { + "epoch": 0.21067487425868928, + "grad_norm": 1.1523863077163696, + "learning_rate": 9.168145343962905e-06, + "loss": 0.3478, + "step": 10524 + }, + { + "epoch": 0.21069489277581763, + "grad_norm": 1.1318382024765015, + "learning_rate": 9.167966280921277e-06, + "loss": 0.3862, + "step": 10525 + }, + { + "epoch": 0.21071491129294598, + "grad_norm": 1.0863654613494873, + "learning_rate": 9.167787200358415e-06, + "loss": 0.3585, + "step": 10526 + }, + { + "epoch": 0.21073492981007433, + "grad_norm": 1.1437917947769165, + "learning_rate": 9.167608102275073e-06, + "loss": 0.3367, + "step": 10527 + }, + { + "epoch": 0.21075494832720265, + "grad_norm": 1.1253639459609985, + "learning_rate": 9.167428986672003e-06, + "loss": 0.3735, + "step": 10528 + }, + { + "epoch": 0.210774966844331, + "grad_norm": 1.1838990449905396, + "learning_rate": 9.16724985354996e-06, + "loss": 0.2961, + "step": 10529 + }, + { + "epoch": 0.21079498536145935, + "grad_norm": 1.161635160446167, + "learning_rate": 9.167070702909694e-06, + "loss": 0.3669, + "step": 10530 + }, + { + "epoch": 0.2108150038785877, + "grad_norm": 1.1103252172470093, + "learning_rate": 9.16689153475196e-06, + "loss": 0.3129, + "step": 10531 + }, + { + "epoch": 0.21083502239571603, + "grad_norm": 1.0775941610336304, + "learning_rate": 9.16671234907751e-06, + "loss": 0.2887, + "step": 10532 + }, + { + "epoch": 0.21085504091284438, + "grad_norm": 0.9694082736968994, + "learning_rate": 9.166533145887098e-06, + "loss": 0.2889, + "step": 10533 + }, + { + "epoch": 0.21087505942997273, + "grad_norm": 1.4287363290786743, + "learning_rate": 9.16635392518148e-06, + "loss": 0.3393, + "step": 10534 + }, + { + "epoch": 0.21089507794710108, + "grad_norm": 1.379599690437317, + "learning_rate": 9.166174686961404e-06, + "loss": 0.3558, + "step": 10535 + }, + { + "epoch": 0.2109150964642294, + "grad_norm": 1.1724857091903687, + "learning_rate": 9.165995431227626e-06, + "loss": 0.3348, + "step": 10536 + }, + { + "epoch": 0.21093511498135775, + "grad_norm": 1.8484692573547363, + "learning_rate": 9.1658161579809e-06, + "loss": 0.8538, + "step": 10537 + }, + { + "epoch": 0.2109551334984861, + "grad_norm": 1.024907112121582, + "learning_rate": 9.16563686722198e-06, + "loss": 0.2852, + "step": 10538 + }, + { + "epoch": 0.21097515201561445, + "grad_norm": 1.0003657341003418, + "learning_rate": 9.165457558951619e-06, + "loss": 0.2911, + "step": 10539 + }, + { + "epoch": 0.21099517053274278, + "grad_norm": 1.1733821630477905, + "learning_rate": 9.16527823317057e-06, + "loss": 0.3108, + "step": 10540 + }, + { + "epoch": 0.21101518904987113, + "grad_norm": 1.1228238344192505, + "learning_rate": 9.16509888987959e-06, + "loss": 0.2951, + "step": 10541 + }, + { + "epoch": 0.21103520756699948, + "grad_norm": 1.0086238384246826, + "learning_rate": 9.164919529079428e-06, + "loss": 0.3147, + "step": 10542 + }, + { + "epoch": 0.21105522608412783, + "grad_norm": 1.1241174936294556, + "learning_rate": 9.164740150770843e-06, + "loss": 0.3195, + "step": 10543 + }, + { + "epoch": 0.21107524460125615, + "grad_norm": 1.3344265222549438, + "learning_rate": 9.164560754954585e-06, + "loss": 0.3033, + "step": 10544 + }, + { + "epoch": 0.2110952631183845, + "grad_norm": 1.244726300239563, + "learning_rate": 9.16438134163141e-06, + "loss": 0.3716, + "step": 10545 + }, + { + "epoch": 0.21111528163551285, + "grad_norm": 1.117560625076294, + "learning_rate": 9.164201910802072e-06, + "loss": 0.3388, + "step": 10546 + }, + { + "epoch": 0.2111353001526412, + "grad_norm": 1.066213607788086, + "learning_rate": 9.164022462467325e-06, + "loss": 0.3537, + "step": 10547 + }, + { + "epoch": 0.21115531866976953, + "grad_norm": 0.9861123561859131, + "learning_rate": 9.163842996627926e-06, + "loss": 0.3194, + "step": 10548 + }, + { + "epoch": 0.21117533718689788, + "grad_norm": 1.1098456382751465, + "learning_rate": 9.163663513284624e-06, + "loss": 0.3335, + "step": 10549 + }, + { + "epoch": 0.21119535570402623, + "grad_norm": 1.0097914934158325, + "learning_rate": 9.163484012438178e-06, + "loss": 0.3076, + "step": 10550 + }, + { + "epoch": 0.21121537422115458, + "grad_norm": 1.1108694076538086, + "learning_rate": 9.16330449408934e-06, + "loss": 0.3359, + "step": 10551 + }, + { + "epoch": 0.2112353927382829, + "grad_norm": 0.9917749166488647, + "learning_rate": 9.163124958238867e-06, + "loss": 0.3126, + "step": 10552 + }, + { + "epoch": 0.21125541125541125, + "grad_norm": 1.2723636627197266, + "learning_rate": 9.162945404887513e-06, + "loss": 0.3209, + "step": 10553 + }, + { + "epoch": 0.2112754297725396, + "grad_norm": 1.1043411493301392, + "learning_rate": 9.162765834036032e-06, + "loss": 0.3289, + "step": 10554 + }, + { + "epoch": 0.21129544828966795, + "grad_norm": 1.0121785402297974, + "learning_rate": 9.162586245685179e-06, + "loss": 0.2698, + "step": 10555 + }, + { + "epoch": 0.21131546680679628, + "grad_norm": 1.1845077276229858, + "learning_rate": 9.16240663983571e-06, + "loss": 0.3491, + "step": 10556 + }, + { + "epoch": 0.21133548532392463, + "grad_norm": 1.1229267120361328, + "learning_rate": 9.162227016488378e-06, + "loss": 0.3643, + "step": 10557 + }, + { + "epoch": 0.21135550384105298, + "grad_norm": 1.0811940431594849, + "learning_rate": 9.162047375643942e-06, + "loss": 0.3709, + "step": 10558 + }, + { + "epoch": 0.21137552235818133, + "grad_norm": 1.204463005065918, + "learning_rate": 9.16186771730315e-06, + "loss": 0.3616, + "step": 10559 + }, + { + "epoch": 0.21139554087530965, + "grad_norm": 0.9230156540870667, + "learning_rate": 9.161688041466765e-06, + "loss": 0.2872, + "step": 10560 + }, + { + "epoch": 0.211415559392438, + "grad_norm": 1.1290150880813599, + "learning_rate": 9.16150834813554e-06, + "loss": 0.36, + "step": 10561 + }, + { + "epoch": 0.21143557790956635, + "grad_norm": 1.1606392860412598, + "learning_rate": 9.161328637310227e-06, + "loss": 0.3279, + "step": 10562 + }, + { + "epoch": 0.2114555964266947, + "grad_norm": 1.0342963933944702, + "learning_rate": 9.161148908991584e-06, + "loss": 0.3071, + "step": 10563 + }, + { + "epoch": 0.21147561494382303, + "grad_norm": 1.0479702949523926, + "learning_rate": 9.160969163180366e-06, + "loss": 0.3414, + "step": 10564 + }, + { + "epoch": 0.21149563346095138, + "grad_norm": 1.2101750373840332, + "learning_rate": 9.160789399877333e-06, + "loss": 0.3235, + "step": 10565 + }, + { + "epoch": 0.21151565197807973, + "grad_norm": 1.8888020515441895, + "learning_rate": 9.160609619083233e-06, + "loss": 0.8621, + "step": 10566 + }, + { + "epoch": 0.21153567049520808, + "grad_norm": 1.1552828550338745, + "learning_rate": 9.160429820798826e-06, + "loss": 0.3206, + "step": 10567 + }, + { + "epoch": 0.2115556890123364, + "grad_norm": 1.0504111051559448, + "learning_rate": 9.160250005024868e-06, + "loss": 0.3367, + "step": 10568 + }, + { + "epoch": 0.21157570752946475, + "grad_norm": 1.123623013496399, + "learning_rate": 9.160070171762114e-06, + "loss": 0.3296, + "step": 10569 + }, + { + "epoch": 0.2115957260465931, + "grad_norm": 1.169211506843567, + "learning_rate": 9.159890321011322e-06, + "loss": 0.3119, + "step": 10570 + }, + { + "epoch": 0.21161574456372145, + "grad_norm": 1.1693202257156372, + "learning_rate": 9.159710452773244e-06, + "loss": 0.3541, + "step": 10571 + }, + { + "epoch": 0.21163576308084978, + "grad_norm": 1.098193883895874, + "learning_rate": 9.15953056704864e-06, + "loss": 0.3891, + "step": 10572 + }, + { + "epoch": 0.21165578159797813, + "grad_norm": 1.0922967195510864, + "learning_rate": 9.159350663838263e-06, + "loss": 0.3476, + "step": 10573 + }, + { + "epoch": 0.21167580011510648, + "grad_norm": 1.1873527765274048, + "learning_rate": 9.159170743142872e-06, + "loss": 0.3642, + "step": 10574 + }, + { + "epoch": 0.21169581863223483, + "grad_norm": 1.1615257263183594, + "learning_rate": 9.158990804963222e-06, + "loss": 0.3361, + "step": 10575 + }, + { + "epoch": 0.21171583714936315, + "grad_norm": 1.1581203937530518, + "learning_rate": 9.15881084930007e-06, + "loss": 0.3594, + "step": 10576 + }, + { + "epoch": 0.2117358556664915, + "grad_norm": 1.2987878322601318, + "learning_rate": 9.158630876154171e-06, + "loss": 0.3511, + "step": 10577 + }, + { + "epoch": 0.21175587418361985, + "grad_norm": 1.0560003519058228, + "learning_rate": 9.158450885526284e-06, + "loss": 0.3034, + "step": 10578 + }, + { + "epoch": 0.2117758927007482, + "grad_norm": 1.155967116355896, + "learning_rate": 9.158270877417166e-06, + "loss": 0.3288, + "step": 10579 + }, + { + "epoch": 0.21179591121787653, + "grad_norm": 1.1022164821624756, + "learning_rate": 9.15809085182757e-06, + "loss": 0.3489, + "step": 10580 + }, + { + "epoch": 0.21181592973500488, + "grad_norm": 1.0986436605453491, + "learning_rate": 9.157910808758254e-06, + "loss": 0.3133, + "step": 10581 + }, + { + "epoch": 0.21183594825213323, + "grad_norm": 1.9382531642913818, + "learning_rate": 9.157730748209977e-06, + "loss": 0.809, + "step": 10582 + }, + { + "epoch": 0.21185596676926158, + "grad_norm": 1.0214604139328003, + "learning_rate": 9.157550670183495e-06, + "loss": 0.3383, + "step": 10583 + }, + { + "epoch": 0.2118759852863899, + "grad_norm": 1.1834995746612549, + "learning_rate": 9.157370574679563e-06, + "loss": 0.3343, + "step": 10584 + }, + { + "epoch": 0.21189600380351825, + "grad_norm": 1.2799783945083618, + "learning_rate": 9.157190461698942e-06, + "loss": 0.3529, + "step": 10585 + }, + { + "epoch": 0.2119160223206466, + "grad_norm": 1.0698362588882446, + "learning_rate": 9.157010331242384e-06, + "loss": 0.3123, + "step": 10586 + }, + { + "epoch": 0.21193604083777495, + "grad_norm": 1.0816177129745483, + "learning_rate": 9.156830183310653e-06, + "loss": 0.3791, + "step": 10587 + }, + { + "epoch": 0.21195605935490328, + "grad_norm": 1.1250885725021362, + "learning_rate": 9.156650017904501e-06, + "loss": 0.316, + "step": 10588 + }, + { + "epoch": 0.21197607787203163, + "grad_norm": 1.2951782941818237, + "learning_rate": 9.156469835024686e-06, + "loss": 0.385, + "step": 10589 + }, + { + "epoch": 0.21199609638915998, + "grad_norm": 1.373679518699646, + "learning_rate": 9.156289634671966e-06, + "loss": 0.3956, + "step": 10590 + }, + { + "epoch": 0.21201611490628833, + "grad_norm": 1.0635583400726318, + "learning_rate": 9.1561094168471e-06, + "loss": 0.3574, + "step": 10591 + }, + { + "epoch": 0.21203613342341665, + "grad_norm": 1.7435940504074097, + "learning_rate": 9.155929181550844e-06, + "loss": 0.862, + "step": 10592 + }, + { + "epoch": 0.212056151940545, + "grad_norm": 1.8640875816345215, + "learning_rate": 9.155748928783957e-06, + "loss": 0.8326, + "step": 10593 + }, + { + "epoch": 0.21207617045767335, + "grad_norm": 1.1405972242355347, + "learning_rate": 9.155568658547193e-06, + "loss": 0.3113, + "step": 10594 + }, + { + "epoch": 0.2120961889748017, + "grad_norm": 1.8282827138900757, + "learning_rate": 9.155388370841317e-06, + "loss": 0.8568, + "step": 10595 + }, + { + "epoch": 0.21211620749193003, + "grad_norm": 1.0990601778030396, + "learning_rate": 9.155208065667078e-06, + "loss": 0.3907, + "step": 10596 + }, + { + "epoch": 0.21213622600905838, + "grad_norm": 1.156341314315796, + "learning_rate": 9.155027743025242e-06, + "loss": 0.3393, + "step": 10597 + }, + { + "epoch": 0.21215624452618673, + "grad_norm": 1.0863615274429321, + "learning_rate": 9.15484740291656e-06, + "loss": 0.3267, + "step": 10598 + }, + { + "epoch": 0.21217626304331508, + "grad_norm": 1.0993856191635132, + "learning_rate": 9.154667045341796e-06, + "loss": 0.3596, + "step": 10599 + }, + { + "epoch": 0.2121962815604434, + "grad_norm": 1.0715044736862183, + "learning_rate": 9.154486670301707e-06, + "loss": 0.3222, + "step": 10600 + }, + { + "epoch": 0.21221630007757175, + "grad_norm": 1.128158450126648, + "learning_rate": 9.154306277797048e-06, + "loss": 0.2842, + "step": 10601 + }, + { + "epoch": 0.2122363185947001, + "grad_norm": 1.078744888305664, + "learning_rate": 9.154125867828581e-06, + "loss": 0.3942, + "step": 10602 + }, + { + "epoch": 0.21225633711182845, + "grad_norm": 1.1166574954986572, + "learning_rate": 9.153945440397061e-06, + "loss": 0.3329, + "step": 10603 + }, + { + "epoch": 0.21227635562895678, + "grad_norm": 2.0188231468200684, + "learning_rate": 9.153764995503249e-06, + "loss": 0.3062, + "step": 10604 + }, + { + "epoch": 0.21229637414608513, + "grad_norm": 1.0684032440185547, + "learning_rate": 9.153584533147903e-06, + "loss": 0.2875, + "step": 10605 + }, + { + "epoch": 0.21231639266321348, + "grad_norm": 1.7437838315963745, + "learning_rate": 9.153404053331783e-06, + "loss": 0.8683, + "step": 10606 + }, + { + "epoch": 0.21233641118034183, + "grad_norm": 1.1532554626464844, + "learning_rate": 9.153223556055644e-06, + "loss": 0.3425, + "step": 10607 + }, + { + "epoch": 0.21235642969747015, + "grad_norm": 1.0835331678390503, + "learning_rate": 9.153043041320247e-06, + "loss": 0.305, + "step": 10608 + }, + { + "epoch": 0.2123764482145985, + "grad_norm": 1.0837287902832031, + "learning_rate": 9.152862509126352e-06, + "loss": 0.2956, + "step": 10609 + }, + { + "epoch": 0.21239646673172685, + "grad_norm": 1.0987238883972168, + "learning_rate": 9.152681959474717e-06, + "loss": 0.306, + "step": 10610 + }, + { + "epoch": 0.2124164852488552, + "grad_norm": 1.9248327016830444, + "learning_rate": 9.1525013923661e-06, + "loss": 0.8421, + "step": 10611 + }, + { + "epoch": 0.21243650376598353, + "grad_norm": 1.272436499595642, + "learning_rate": 9.152320807801261e-06, + "loss": 0.3275, + "step": 10612 + }, + { + "epoch": 0.21245652228311188, + "grad_norm": 1.070361614227295, + "learning_rate": 9.152140205780957e-06, + "loss": 0.3419, + "step": 10613 + }, + { + "epoch": 0.21247654080024023, + "grad_norm": 1.085684061050415, + "learning_rate": 9.151959586305951e-06, + "loss": 0.3194, + "step": 10614 + }, + { + "epoch": 0.21249655931736858, + "grad_norm": 1.0113322734832764, + "learning_rate": 9.151778949377e-06, + "loss": 0.3013, + "step": 10615 + }, + { + "epoch": 0.2125165778344969, + "grad_norm": 1.8470544815063477, + "learning_rate": 9.151598294994864e-06, + "loss": 0.7934, + "step": 10616 + }, + { + "epoch": 0.21253659635162525, + "grad_norm": 1.1401629447937012, + "learning_rate": 9.151417623160303e-06, + "loss": 0.3447, + "step": 10617 + }, + { + "epoch": 0.2125566148687536, + "grad_norm": 1.0724139213562012, + "learning_rate": 9.151236933874075e-06, + "loss": 0.3028, + "step": 10618 + }, + { + "epoch": 0.21257663338588195, + "grad_norm": 1.0641441345214844, + "learning_rate": 9.151056227136939e-06, + "loss": 0.3166, + "step": 10619 + }, + { + "epoch": 0.21259665190301028, + "grad_norm": 1.154597282409668, + "learning_rate": 9.150875502949657e-06, + "loss": 0.3096, + "step": 10620 + }, + { + "epoch": 0.21261667042013863, + "grad_norm": 1.2011257410049438, + "learning_rate": 9.150694761312988e-06, + "loss": 0.3239, + "step": 10621 + }, + { + "epoch": 0.21263668893726698, + "grad_norm": 1.0784375667572021, + "learning_rate": 9.150514002227691e-06, + "loss": 0.3102, + "step": 10622 + }, + { + "epoch": 0.21265670745439533, + "grad_norm": 0.9885956048965454, + "learning_rate": 9.150333225694527e-06, + "loss": 0.3225, + "step": 10623 + }, + { + "epoch": 0.21267672597152365, + "grad_norm": 1.020182490348816, + "learning_rate": 9.150152431714255e-06, + "loss": 0.2743, + "step": 10624 + }, + { + "epoch": 0.212696744488652, + "grad_norm": 1.0255776643753052, + "learning_rate": 9.149971620287634e-06, + "loss": 0.3353, + "step": 10625 + }, + { + "epoch": 0.21271676300578035, + "grad_norm": 1.1401844024658203, + "learning_rate": 9.149790791415427e-06, + "loss": 0.3465, + "step": 10626 + }, + { + "epoch": 0.2127367815229087, + "grad_norm": 1.0884209871292114, + "learning_rate": 9.149609945098393e-06, + "loss": 0.3286, + "step": 10627 + }, + { + "epoch": 0.21275680004003703, + "grad_norm": 1.017776370048523, + "learning_rate": 9.14942908133729e-06, + "loss": 0.3209, + "step": 10628 + }, + { + "epoch": 0.21277681855716538, + "grad_norm": 1.0727547407150269, + "learning_rate": 9.14924820013288e-06, + "loss": 0.3454, + "step": 10629 + }, + { + "epoch": 0.21279683707429373, + "grad_norm": 2.041468381881714, + "learning_rate": 9.149067301485926e-06, + "loss": 0.8119, + "step": 10630 + }, + { + "epoch": 0.21281685559142208, + "grad_norm": 1.087188959121704, + "learning_rate": 9.148886385397184e-06, + "loss": 0.2864, + "step": 10631 + }, + { + "epoch": 0.2128368741085504, + "grad_norm": 2.0155248641967773, + "learning_rate": 9.148705451867417e-06, + "loss": 0.8061, + "step": 10632 + }, + { + "epoch": 0.21285689262567875, + "grad_norm": 1.0728837251663208, + "learning_rate": 9.148524500897385e-06, + "loss": 0.3287, + "step": 10633 + }, + { + "epoch": 0.2128769111428071, + "grad_norm": 1.1484804153442383, + "learning_rate": 9.14834353248785e-06, + "loss": 0.3279, + "step": 10634 + }, + { + "epoch": 0.21289692965993545, + "grad_norm": 1.9600653648376465, + "learning_rate": 9.148162546639568e-06, + "loss": 0.8907, + "step": 10635 + }, + { + "epoch": 0.21291694817706378, + "grad_norm": 1.4103115797042847, + "learning_rate": 9.147981543353307e-06, + "loss": 0.2973, + "step": 10636 + }, + { + "epoch": 0.21293696669419213, + "grad_norm": 1.155062198638916, + "learning_rate": 9.147800522629822e-06, + "loss": 0.3385, + "step": 10637 + }, + { + "epoch": 0.21295698521132048, + "grad_norm": 1.1161679029464722, + "learning_rate": 9.147619484469877e-06, + "loss": 0.3303, + "step": 10638 + }, + { + "epoch": 0.21297700372844883, + "grad_norm": 1.1617045402526855, + "learning_rate": 9.147438428874234e-06, + "loss": 0.382, + "step": 10639 + }, + { + "epoch": 0.21299702224557715, + "grad_norm": 1.1886544227600098, + "learning_rate": 9.147257355843649e-06, + "loss": 0.3413, + "step": 10640 + }, + { + "epoch": 0.2130170407627055, + "grad_norm": 2.14846134185791, + "learning_rate": 9.147076265378889e-06, + "loss": 0.8675, + "step": 10641 + }, + { + "epoch": 0.21303705927983385, + "grad_norm": 1.9214156866073608, + "learning_rate": 9.146895157480711e-06, + "loss": 0.9332, + "step": 10642 + }, + { + "epoch": 0.2130570777969622, + "grad_norm": 0.9615404009819031, + "learning_rate": 9.146714032149881e-06, + "loss": 0.274, + "step": 10643 + }, + { + "epoch": 0.21307709631409052, + "grad_norm": 1.2992058992385864, + "learning_rate": 9.146532889387155e-06, + "loss": 0.315, + "step": 10644 + }, + { + "epoch": 0.21309711483121888, + "grad_norm": 1.0277292728424072, + "learning_rate": 9.146351729193299e-06, + "loss": 0.305, + "step": 10645 + }, + { + "epoch": 0.21311713334834723, + "grad_norm": 1.7020995616912842, + "learning_rate": 9.146170551569072e-06, + "loss": 0.8552, + "step": 10646 + }, + { + "epoch": 0.21313715186547558, + "grad_norm": 1.1227563619613647, + "learning_rate": 9.145989356515234e-06, + "loss": 0.3597, + "step": 10647 + }, + { + "epoch": 0.2131571703826039, + "grad_norm": 1.1866625547409058, + "learning_rate": 9.14580814403255e-06, + "loss": 0.3581, + "step": 10648 + }, + { + "epoch": 0.21317718889973225, + "grad_norm": 1.0366032123565674, + "learning_rate": 9.145626914121784e-06, + "loss": 0.3281, + "step": 10649 + }, + { + "epoch": 0.2131972074168606, + "grad_norm": 1.10933256149292, + "learning_rate": 9.145445666783693e-06, + "loss": 0.3292, + "step": 10650 + }, + { + "epoch": 0.21321722593398895, + "grad_norm": 1.1263189315795898, + "learning_rate": 9.145264402019039e-06, + "loss": 0.3157, + "step": 10651 + }, + { + "epoch": 0.21323724445111727, + "grad_norm": 1.1388047933578491, + "learning_rate": 9.145083119828586e-06, + "loss": 0.3312, + "step": 10652 + }, + { + "epoch": 0.21325726296824563, + "grad_norm": 1.109838843345642, + "learning_rate": 9.144901820213095e-06, + "loss": 0.3265, + "step": 10653 + }, + { + "epoch": 0.21327728148537398, + "grad_norm": 1.2372139692306519, + "learning_rate": 9.14472050317333e-06, + "loss": 0.302, + "step": 10654 + }, + { + "epoch": 0.21329730000250233, + "grad_norm": 1.1560417413711548, + "learning_rate": 9.14453916871005e-06, + "loss": 0.264, + "step": 10655 + }, + { + "epoch": 0.21331731851963065, + "grad_norm": 1.0896052122116089, + "learning_rate": 9.144357816824023e-06, + "loss": 0.3396, + "step": 10656 + }, + { + "epoch": 0.213337337036759, + "grad_norm": 1.049208402633667, + "learning_rate": 9.144176447516004e-06, + "loss": 0.258, + "step": 10657 + }, + { + "epoch": 0.21335735555388735, + "grad_norm": 1.074613094329834, + "learning_rate": 9.143995060786761e-06, + "loss": 0.3582, + "step": 10658 + }, + { + "epoch": 0.2133773740710157, + "grad_norm": 1.929482340812683, + "learning_rate": 9.143813656637055e-06, + "loss": 0.3207, + "step": 10659 + }, + { + "epoch": 0.21339739258814402, + "grad_norm": 2.114015579223633, + "learning_rate": 9.143632235067646e-06, + "loss": 0.8428, + "step": 10660 + }, + { + "epoch": 0.21341741110527238, + "grad_norm": 1.1458778381347656, + "learning_rate": 9.1434507960793e-06, + "loss": 0.3692, + "step": 10661 + }, + { + "epoch": 0.21343742962240073, + "grad_norm": 1.3422961235046387, + "learning_rate": 9.14326933967278e-06, + "loss": 0.2859, + "step": 10662 + }, + { + "epoch": 0.21345744813952908, + "grad_norm": 1.192120909690857, + "learning_rate": 9.143087865848846e-06, + "loss": 0.316, + "step": 10663 + }, + { + "epoch": 0.2134774666566574, + "grad_norm": 1.0442416667938232, + "learning_rate": 9.142906374608261e-06, + "loss": 0.3171, + "step": 10664 + }, + { + "epoch": 0.21349748517378575, + "grad_norm": 1.2546491622924805, + "learning_rate": 9.142724865951791e-06, + "loss": 0.3213, + "step": 10665 + }, + { + "epoch": 0.2135175036909141, + "grad_norm": 1.1934419870376587, + "learning_rate": 9.142543339880197e-06, + "loss": 0.3108, + "step": 10666 + }, + { + "epoch": 0.21353752220804245, + "grad_norm": 1.0811713933944702, + "learning_rate": 9.142361796394242e-06, + "loss": 0.3543, + "step": 10667 + }, + { + "epoch": 0.21355754072517077, + "grad_norm": 1.1753120422363281, + "learning_rate": 9.142180235494687e-06, + "loss": 0.3063, + "step": 10668 + }, + { + "epoch": 0.21357755924229913, + "grad_norm": 1.0418697595596313, + "learning_rate": 9.1419986571823e-06, + "loss": 0.3106, + "step": 10669 + }, + { + "epoch": 0.21359757775942748, + "grad_norm": 1.06178879737854, + "learning_rate": 9.141817061457844e-06, + "loss": 0.3595, + "step": 10670 + }, + { + "epoch": 0.21361759627655583, + "grad_norm": 1.1172537803649902, + "learning_rate": 9.141635448322077e-06, + "loss": 0.3491, + "step": 10671 + }, + { + "epoch": 0.21363761479368415, + "grad_norm": 1.0484521389007568, + "learning_rate": 9.141453817775767e-06, + "loss": 0.3037, + "step": 10672 + }, + { + "epoch": 0.2136576333108125, + "grad_norm": 1.1086833477020264, + "learning_rate": 9.141272169819677e-06, + "loss": 0.2825, + "step": 10673 + }, + { + "epoch": 0.21367765182794085, + "grad_norm": 1.189035415649414, + "learning_rate": 9.14109050445457e-06, + "loss": 0.3359, + "step": 10674 + }, + { + "epoch": 0.2136976703450692, + "grad_norm": 1.143536925315857, + "learning_rate": 9.14090882168121e-06, + "loss": 0.3416, + "step": 10675 + }, + { + "epoch": 0.21371768886219752, + "grad_norm": 1.3391485214233398, + "learning_rate": 9.140727121500358e-06, + "loss": 0.3854, + "step": 10676 + }, + { + "epoch": 0.21373770737932588, + "grad_norm": 1.052363395690918, + "learning_rate": 9.140545403912782e-06, + "loss": 0.3546, + "step": 10677 + }, + { + "epoch": 0.21375772589645423, + "grad_norm": 2.0243422985076904, + "learning_rate": 9.140363668919244e-06, + "loss": 0.8555, + "step": 10678 + }, + { + "epoch": 0.21377774441358258, + "grad_norm": 1.0565305948257446, + "learning_rate": 9.140181916520509e-06, + "loss": 0.2586, + "step": 10679 + }, + { + "epoch": 0.2137977629307109, + "grad_norm": 1.0765527486801147, + "learning_rate": 9.140000146717339e-06, + "loss": 0.2923, + "step": 10680 + }, + { + "epoch": 0.21381778144783925, + "grad_norm": 1.0784754753112793, + "learning_rate": 9.1398183595105e-06, + "loss": 0.3065, + "step": 10681 + }, + { + "epoch": 0.2138377999649676, + "grad_norm": 1.2075698375701904, + "learning_rate": 9.139636554900755e-06, + "loss": 0.3332, + "step": 10682 + }, + { + "epoch": 0.21385781848209595, + "grad_norm": 1.0824624300003052, + "learning_rate": 9.13945473288887e-06, + "loss": 0.311, + "step": 10683 + }, + { + "epoch": 0.21387783699922427, + "grad_norm": 1.9148622751235962, + "learning_rate": 9.139272893475607e-06, + "loss": 0.7422, + "step": 10684 + }, + { + "epoch": 0.21389785551635263, + "grad_norm": 1.0695061683654785, + "learning_rate": 9.139091036661731e-06, + "loss": 0.2772, + "step": 10685 + }, + { + "epoch": 0.21391787403348098, + "grad_norm": 1.0212565660476685, + "learning_rate": 9.138909162448008e-06, + "loss": 0.3428, + "step": 10686 + }, + { + "epoch": 0.21393789255060933, + "grad_norm": 0.9486506581306458, + "learning_rate": 9.138727270835202e-06, + "loss": 0.2982, + "step": 10687 + }, + { + "epoch": 0.21395791106773765, + "grad_norm": 1.0248522758483887, + "learning_rate": 9.138545361824076e-06, + "loss": 0.3345, + "step": 10688 + }, + { + "epoch": 0.213977929584866, + "grad_norm": 1.7591538429260254, + "learning_rate": 9.138363435415397e-06, + "loss": 0.8537, + "step": 10689 + }, + { + "epoch": 0.21399794810199435, + "grad_norm": 1.048416018486023, + "learning_rate": 9.138181491609928e-06, + "loss": 0.2952, + "step": 10690 + }, + { + "epoch": 0.2140179666191227, + "grad_norm": 1.1414837837219238, + "learning_rate": 9.137999530408435e-06, + "loss": 0.3002, + "step": 10691 + }, + { + "epoch": 0.21403798513625102, + "grad_norm": 1.2860820293426514, + "learning_rate": 9.137817551811683e-06, + "loss": 0.3453, + "step": 10692 + }, + { + "epoch": 0.21405800365337938, + "grad_norm": 1.105345368385315, + "learning_rate": 9.137635555820434e-06, + "loss": 0.3652, + "step": 10693 + }, + { + "epoch": 0.21407802217050773, + "grad_norm": 1.0301969051361084, + "learning_rate": 9.13745354243546e-06, + "loss": 0.2812, + "step": 10694 + }, + { + "epoch": 0.21409804068763608, + "grad_norm": 0.9772236943244934, + "learning_rate": 9.137271511657518e-06, + "loss": 0.327, + "step": 10695 + }, + { + "epoch": 0.2141180592047644, + "grad_norm": 1.009577989578247, + "learning_rate": 9.137089463487378e-06, + "loss": 0.3062, + "step": 10696 + }, + { + "epoch": 0.21413807772189275, + "grad_norm": 1.8624851703643799, + "learning_rate": 9.136907397925802e-06, + "loss": 0.7834, + "step": 10697 + }, + { + "epoch": 0.2141580962390211, + "grad_norm": 1.1416220664978027, + "learning_rate": 9.136725314973562e-06, + "loss": 0.3613, + "step": 10698 + }, + { + "epoch": 0.21417811475614945, + "grad_norm": 1.0979151725769043, + "learning_rate": 9.136543214631416e-06, + "loss": 0.3106, + "step": 10699 + }, + { + "epoch": 0.21419813327327777, + "grad_norm": 1.2439484596252441, + "learning_rate": 9.136361096900133e-06, + "loss": 0.3357, + "step": 10700 + }, + { + "epoch": 0.21421815179040612, + "grad_norm": 1.1332429647445679, + "learning_rate": 9.136178961780479e-06, + "loss": 0.3798, + "step": 10701 + }, + { + "epoch": 0.21423817030753448, + "grad_norm": 1.051742672920227, + "learning_rate": 9.135996809273217e-06, + "loss": 0.307, + "step": 10702 + }, + { + "epoch": 0.21425818882466283, + "grad_norm": 1.1569843292236328, + "learning_rate": 9.135814639379117e-06, + "loss": 0.3447, + "step": 10703 + }, + { + "epoch": 0.21427820734179115, + "grad_norm": 1.1025689840316772, + "learning_rate": 9.13563245209894e-06, + "loss": 0.2919, + "step": 10704 + }, + { + "epoch": 0.2142982258589195, + "grad_norm": 1.1253212690353394, + "learning_rate": 9.135450247433454e-06, + "loss": 0.3155, + "step": 10705 + }, + { + "epoch": 0.21431824437604785, + "grad_norm": 1.1758267879486084, + "learning_rate": 9.135268025383427e-06, + "loss": 0.3355, + "step": 10706 + }, + { + "epoch": 0.2143382628931762, + "grad_norm": 1.2514333724975586, + "learning_rate": 9.135085785949623e-06, + "loss": 0.3282, + "step": 10707 + }, + { + "epoch": 0.21435828141030452, + "grad_norm": 1.0537853240966797, + "learning_rate": 9.134903529132806e-06, + "loss": 0.2773, + "step": 10708 + }, + { + "epoch": 0.21437829992743287, + "grad_norm": 1.2307196855545044, + "learning_rate": 9.134721254933747e-06, + "loss": 0.3332, + "step": 10709 + }, + { + "epoch": 0.21439831844456123, + "grad_norm": 1.6439181566238403, + "learning_rate": 9.13453896335321e-06, + "loss": 0.7931, + "step": 10710 + }, + { + "epoch": 0.21441833696168958, + "grad_norm": 1.2987748384475708, + "learning_rate": 9.134356654391958e-06, + "loss": 0.3505, + "step": 10711 + }, + { + "epoch": 0.2144383554788179, + "grad_norm": 1.5884850025177002, + "learning_rate": 9.134174328050762e-06, + "loss": 0.3383, + "step": 10712 + }, + { + "epoch": 0.21445837399594625, + "grad_norm": 1.132163405418396, + "learning_rate": 9.133991984330388e-06, + "loss": 0.3547, + "step": 10713 + }, + { + "epoch": 0.2144783925130746, + "grad_norm": 0.9860289096832275, + "learning_rate": 9.1338096232316e-06, + "loss": 0.3241, + "step": 10714 + }, + { + "epoch": 0.21449841103020295, + "grad_norm": 1.030963659286499, + "learning_rate": 9.133627244755165e-06, + "loss": 0.3058, + "step": 10715 + }, + { + "epoch": 0.21451842954733127, + "grad_norm": 1.0013006925582886, + "learning_rate": 9.133444848901851e-06, + "loss": 0.2699, + "step": 10716 + }, + { + "epoch": 0.21453844806445962, + "grad_norm": 1.1601600646972656, + "learning_rate": 9.133262435672425e-06, + "loss": 0.3731, + "step": 10717 + }, + { + "epoch": 0.21455846658158798, + "grad_norm": 1.138594388961792, + "learning_rate": 9.133080005067653e-06, + "loss": 0.3097, + "step": 10718 + }, + { + "epoch": 0.21457848509871633, + "grad_norm": 1.122436761856079, + "learning_rate": 9.132897557088303e-06, + "loss": 0.3099, + "step": 10719 + }, + { + "epoch": 0.21459850361584465, + "grad_norm": 1.1405667066574097, + "learning_rate": 9.132715091735141e-06, + "loss": 0.3656, + "step": 10720 + }, + { + "epoch": 0.214618522132973, + "grad_norm": 1.166162371635437, + "learning_rate": 9.132532609008932e-06, + "loss": 0.3627, + "step": 10721 + }, + { + "epoch": 0.21463854065010135, + "grad_norm": 1.124064326286316, + "learning_rate": 9.132350108910448e-06, + "loss": 0.3334, + "step": 10722 + }, + { + "epoch": 0.2146585591672297, + "grad_norm": 1.1088491678237915, + "learning_rate": 9.132167591440452e-06, + "loss": 0.3201, + "step": 10723 + }, + { + "epoch": 0.21467857768435802, + "grad_norm": 1.1314064264297485, + "learning_rate": 9.131985056599713e-06, + "loss": 0.3394, + "step": 10724 + }, + { + "epoch": 0.21469859620148637, + "grad_norm": 1.1032207012176514, + "learning_rate": 9.131802504388997e-06, + "loss": 0.3357, + "step": 10725 + }, + { + "epoch": 0.21471861471861473, + "grad_norm": 0.9973805546760559, + "learning_rate": 9.131619934809075e-06, + "loss": 0.3259, + "step": 10726 + }, + { + "epoch": 0.21473863323574308, + "grad_norm": 1.1763275861740112, + "learning_rate": 9.13143734786071e-06, + "loss": 0.3182, + "step": 10727 + }, + { + "epoch": 0.2147586517528714, + "grad_norm": 1.1413694620132446, + "learning_rate": 9.13125474354467e-06, + "loss": 0.3039, + "step": 10728 + }, + { + "epoch": 0.21477867026999975, + "grad_norm": 1.0688806772232056, + "learning_rate": 9.131072121861729e-06, + "loss": 0.3172, + "step": 10729 + }, + { + "epoch": 0.2147986887871281, + "grad_norm": 1.0712732076644897, + "learning_rate": 9.130889482812647e-06, + "loss": 0.3585, + "step": 10730 + }, + { + "epoch": 0.21481870730425645, + "grad_norm": 1.1297175884246826, + "learning_rate": 9.130706826398193e-06, + "loss": 0.3551, + "step": 10731 + }, + { + "epoch": 0.21483872582138477, + "grad_norm": 1.1024394035339355, + "learning_rate": 9.130524152619137e-06, + "loss": 0.3071, + "step": 10732 + }, + { + "epoch": 0.21485874433851312, + "grad_norm": 1.1342105865478516, + "learning_rate": 9.130341461476247e-06, + "loss": 0.3574, + "step": 10733 + }, + { + "epoch": 0.21487876285564148, + "grad_norm": 1.092123031616211, + "learning_rate": 9.13015875297029e-06, + "loss": 0.3347, + "step": 10734 + }, + { + "epoch": 0.21489878137276983, + "grad_norm": 1.0323936939239502, + "learning_rate": 9.129976027102037e-06, + "loss": 0.343, + "step": 10735 + }, + { + "epoch": 0.21491879988989815, + "grad_norm": 1.0752836465835571, + "learning_rate": 9.12979328387225e-06, + "loss": 0.3055, + "step": 10736 + }, + { + "epoch": 0.2149388184070265, + "grad_norm": 1.1118744611740112, + "learning_rate": 9.129610523281702e-06, + "loss": 0.3444, + "step": 10737 + }, + { + "epoch": 0.21495883692415485, + "grad_norm": 1.0660390853881836, + "learning_rate": 9.129427745331161e-06, + "loss": 0.3284, + "step": 10738 + }, + { + "epoch": 0.21497885544128317, + "grad_norm": 1.2423580884933472, + "learning_rate": 9.129244950021394e-06, + "loss": 0.2851, + "step": 10739 + }, + { + "epoch": 0.21499887395841152, + "grad_norm": 1.082008719444275, + "learning_rate": 9.129062137353168e-06, + "loss": 0.2938, + "step": 10740 + }, + { + "epoch": 0.21501889247553987, + "grad_norm": 1.1388225555419922, + "learning_rate": 9.128879307327256e-06, + "loss": 0.3211, + "step": 10741 + }, + { + "epoch": 0.21503891099266823, + "grad_norm": 1.0598233938217163, + "learning_rate": 9.128696459944423e-06, + "loss": 0.3114, + "step": 10742 + }, + { + "epoch": 0.21505892950979655, + "grad_norm": 1.1059532165527344, + "learning_rate": 9.128513595205439e-06, + "loss": 0.3479, + "step": 10743 + }, + { + "epoch": 0.2150789480269249, + "grad_norm": 1.0926216840744019, + "learning_rate": 9.12833071311107e-06, + "loss": 0.3199, + "step": 10744 + }, + { + "epoch": 0.21509896654405325, + "grad_norm": 1.1722824573516846, + "learning_rate": 9.12814781366209e-06, + "loss": 0.3337, + "step": 10745 + }, + { + "epoch": 0.2151189850611816, + "grad_norm": 1.3011195659637451, + "learning_rate": 9.127964896859263e-06, + "loss": 0.322, + "step": 10746 + }, + { + "epoch": 0.21513900357830992, + "grad_norm": 1.0691207647323608, + "learning_rate": 9.127781962703362e-06, + "loss": 0.313, + "step": 10747 + }, + { + "epoch": 0.21515902209543827, + "grad_norm": 1.1870592832565308, + "learning_rate": 9.127599011195153e-06, + "loss": 0.3283, + "step": 10748 + }, + { + "epoch": 0.21517904061256662, + "grad_norm": 1.1806952953338623, + "learning_rate": 9.127416042335405e-06, + "loss": 0.3572, + "step": 10749 + }, + { + "epoch": 0.21519905912969498, + "grad_norm": 1.166808843612671, + "learning_rate": 9.12723305612489e-06, + "loss": 0.3329, + "step": 10750 + }, + { + "epoch": 0.2152190776468233, + "grad_norm": 1.0861183404922485, + "learning_rate": 9.127050052564374e-06, + "loss": 0.3097, + "step": 10751 + }, + { + "epoch": 0.21523909616395165, + "grad_norm": 1.0951212644577026, + "learning_rate": 9.126867031654628e-06, + "loss": 0.3295, + "step": 10752 + }, + { + "epoch": 0.21525911468108, + "grad_norm": 1.1328099966049194, + "learning_rate": 9.12668399339642e-06, + "loss": 0.3156, + "step": 10753 + }, + { + "epoch": 0.21527913319820835, + "grad_norm": 1.0258970260620117, + "learning_rate": 9.126500937790523e-06, + "loss": 0.3338, + "step": 10754 + }, + { + "epoch": 0.21529915171533667, + "grad_norm": 0.9612321257591248, + "learning_rate": 9.126317864837703e-06, + "loss": 0.3196, + "step": 10755 + }, + { + "epoch": 0.21531917023246502, + "grad_norm": 1.0833779573440552, + "learning_rate": 9.126134774538731e-06, + "loss": 0.2677, + "step": 10756 + }, + { + "epoch": 0.21533918874959337, + "grad_norm": 1.008012294769287, + "learning_rate": 9.125951666894375e-06, + "loss": 0.3194, + "step": 10757 + }, + { + "epoch": 0.21535920726672172, + "grad_norm": 1.1955174207687378, + "learning_rate": 9.125768541905407e-06, + "loss": 0.356, + "step": 10758 + }, + { + "epoch": 0.21537922578385005, + "grad_norm": 0.9981282353401184, + "learning_rate": 9.125585399572596e-06, + "loss": 0.3395, + "step": 10759 + }, + { + "epoch": 0.2153992443009784, + "grad_norm": 1.2669951915740967, + "learning_rate": 9.125402239896713e-06, + "loss": 0.3178, + "step": 10760 + }, + { + "epoch": 0.21541926281810675, + "grad_norm": 1.2403130531311035, + "learning_rate": 9.125219062878524e-06, + "loss": 0.3648, + "step": 10761 + }, + { + "epoch": 0.2154392813352351, + "grad_norm": 1.208748459815979, + "learning_rate": 9.125035868518803e-06, + "loss": 0.3026, + "step": 10762 + }, + { + "epoch": 0.21545929985236342, + "grad_norm": 1.1767303943634033, + "learning_rate": 9.12485265681832e-06, + "loss": 0.3087, + "step": 10763 + }, + { + "epoch": 0.21547931836949177, + "grad_norm": 1.109298586845398, + "learning_rate": 9.124669427777843e-06, + "loss": 0.3641, + "step": 10764 + }, + { + "epoch": 0.21549933688662012, + "grad_norm": 1.1624650955200195, + "learning_rate": 9.124486181398144e-06, + "loss": 0.3381, + "step": 10765 + }, + { + "epoch": 0.21551935540374847, + "grad_norm": 1.2099324464797974, + "learning_rate": 9.124302917679992e-06, + "loss": 0.281, + "step": 10766 + }, + { + "epoch": 0.2155393739208768, + "grad_norm": 1.0649350881576538, + "learning_rate": 9.12411963662416e-06, + "loss": 0.3552, + "step": 10767 + }, + { + "epoch": 0.21555939243800515, + "grad_norm": 1.0979259014129639, + "learning_rate": 9.123936338231414e-06, + "loss": 0.3048, + "step": 10768 + }, + { + "epoch": 0.2155794109551335, + "grad_norm": 1.1721196174621582, + "learning_rate": 9.123753022502528e-06, + "loss": 0.3081, + "step": 10769 + }, + { + "epoch": 0.21559942947226185, + "grad_norm": 1.1089004278182983, + "learning_rate": 9.123569689438272e-06, + "loss": 0.2769, + "step": 10770 + }, + { + "epoch": 0.21561944798939017, + "grad_norm": 1.0311559438705444, + "learning_rate": 9.123386339039418e-06, + "loss": 0.3333, + "step": 10771 + }, + { + "epoch": 0.21563946650651852, + "grad_norm": 1.0226740837097168, + "learning_rate": 9.123202971306733e-06, + "loss": 0.3254, + "step": 10772 + }, + { + "epoch": 0.21565948502364687, + "grad_norm": 1.2462403774261475, + "learning_rate": 9.12301958624099e-06, + "loss": 0.3217, + "step": 10773 + }, + { + "epoch": 0.21567950354077522, + "grad_norm": 1.118704080581665, + "learning_rate": 9.12283618384296e-06, + "loss": 0.3134, + "step": 10774 + }, + { + "epoch": 0.21569952205790355, + "grad_norm": 1.3599518537521362, + "learning_rate": 9.122652764113416e-06, + "loss": 0.3755, + "step": 10775 + }, + { + "epoch": 0.2157195405750319, + "grad_norm": 1.0195140838623047, + "learning_rate": 9.122469327053126e-06, + "loss": 0.281, + "step": 10776 + }, + { + "epoch": 0.21573955909216025, + "grad_norm": 1.0891666412353516, + "learning_rate": 9.12228587266286e-06, + "loss": 0.3412, + "step": 10777 + }, + { + "epoch": 0.2157595776092886, + "grad_norm": 1.1158207654953003, + "learning_rate": 9.122102400943394e-06, + "loss": 0.3242, + "step": 10778 + }, + { + "epoch": 0.21577959612641692, + "grad_norm": 1.0856046676635742, + "learning_rate": 9.121918911895494e-06, + "loss": 0.2852, + "step": 10779 + }, + { + "epoch": 0.21579961464354527, + "grad_norm": 1.0417671203613281, + "learning_rate": 9.121735405519936e-06, + "loss": 0.3206, + "step": 10780 + }, + { + "epoch": 0.21581963316067362, + "grad_norm": 1.1353660821914673, + "learning_rate": 9.121551881817489e-06, + "loss": 0.283, + "step": 10781 + }, + { + "epoch": 0.21583965167780197, + "grad_norm": 1.131513237953186, + "learning_rate": 9.121368340788923e-06, + "loss": 0.3426, + "step": 10782 + }, + { + "epoch": 0.2158596701949303, + "grad_norm": 1.1251274347305298, + "learning_rate": 9.121184782435014e-06, + "loss": 0.2872, + "step": 10783 + }, + { + "epoch": 0.21587968871205865, + "grad_norm": 1.1909040212631226, + "learning_rate": 9.12100120675653e-06, + "loss": 0.2942, + "step": 10784 + }, + { + "epoch": 0.215899707229187, + "grad_norm": 1.0910170078277588, + "learning_rate": 9.120817613754245e-06, + "loss": 0.2819, + "step": 10785 + }, + { + "epoch": 0.21591972574631535, + "grad_norm": 0.9709217548370361, + "learning_rate": 9.120634003428928e-06, + "loss": 0.3006, + "step": 10786 + }, + { + "epoch": 0.21593974426344367, + "grad_norm": 1.2255984544754028, + "learning_rate": 9.120450375781353e-06, + "loss": 0.3027, + "step": 10787 + }, + { + "epoch": 0.21595976278057202, + "grad_norm": 1.1646993160247803, + "learning_rate": 9.12026673081229e-06, + "loss": 0.3359, + "step": 10788 + }, + { + "epoch": 0.21597978129770037, + "grad_norm": 1.2398115396499634, + "learning_rate": 9.120083068522514e-06, + "loss": 0.3319, + "step": 10789 + }, + { + "epoch": 0.21599979981482872, + "grad_norm": 1.0647475719451904, + "learning_rate": 9.119899388912797e-06, + "loss": 0.3295, + "step": 10790 + }, + { + "epoch": 0.21601981833195705, + "grad_norm": 1.0455141067504883, + "learning_rate": 9.119715691983906e-06, + "loss": 0.3242, + "step": 10791 + }, + { + "epoch": 0.2160398368490854, + "grad_norm": 1.0657765865325928, + "learning_rate": 9.119531977736619e-06, + "loss": 0.3268, + "step": 10792 + }, + { + "epoch": 0.21605985536621375, + "grad_norm": 1.1545583009719849, + "learning_rate": 9.119348246171705e-06, + "loss": 0.3935, + "step": 10793 + }, + { + "epoch": 0.2160798738833421, + "grad_norm": 1.2175079584121704, + "learning_rate": 9.119164497289939e-06, + "loss": 0.3408, + "step": 10794 + }, + { + "epoch": 0.21609989240047042, + "grad_norm": 1.131477952003479, + "learning_rate": 9.11898073109209e-06, + "loss": 0.3399, + "step": 10795 + }, + { + "epoch": 0.21611991091759877, + "grad_norm": 1.1506763696670532, + "learning_rate": 9.118796947578933e-06, + "loss": 0.3348, + "step": 10796 + }, + { + "epoch": 0.21613992943472712, + "grad_norm": 0.9766368865966797, + "learning_rate": 9.118613146751241e-06, + "loss": 0.3042, + "step": 10797 + }, + { + "epoch": 0.21615994795185547, + "grad_norm": 1.0921894311904907, + "learning_rate": 9.118429328609784e-06, + "loss": 0.3834, + "step": 10798 + }, + { + "epoch": 0.2161799664689838, + "grad_norm": 1.0941540002822876, + "learning_rate": 9.118245493155338e-06, + "loss": 0.33, + "step": 10799 + }, + { + "epoch": 0.21619998498611215, + "grad_norm": 1.2030526399612427, + "learning_rate": 9.118061640388674e-06, + "loss": 0.327, + "step": 10800 + }, + { + "epoch": 0.2162200035032405, + "grad_norm": 1.145263910293579, + "learning_rate": 9.117877770310563e-06, + "loss": 0.3404, + "step": 10801 + }, + { + "epoch": 0.21624002202036885, + "grad_norm": 1.8669861555099487, + "learning_rate": 9.117693882921782e-06, + "loss": 0.799, + "step": 10802 + }, + { + "epoch": 0.21626004053749717, + "grad_norm": 1.1828057765960693, + "learning_rate": 9.117509978223102e-06, + "loss": 0.3164, + "step": 10803 + }, + { + "epoch": 0.21628005905462552, + "grad_norm": 1.1210800409317017, + "learning_rate": 9.117326056215296e-06, + "loss": 0.3407, + "step": 10804 + }, + { + "epoch": 0.21630007757175387, + "grad_norm": 1.0110828876495361, + "learning_rate": 9.117142116899138e-06, + "loss": 0.2748, + "step": 10805 + }, + { + "epoch": 0.21632009608888222, + "grad_norm": 1.168073058128357, + "learning_rate": 9.1169581602754e-06, + "loss": 0.3229, + "step": 10806 + }, + { + "epoch": 0.21634011460601055, + "grad_norm": 1.0491594076156616, + "learning_rate": 9.116774186344856e-06, + "loss": 0.3542, + "step": 10807 + }, + { + "epoch": 0.2163601331231389, + "grad_norm": 1.1965564489364624, + "learning_rate": 9.116590195108279e-06, + "loss": 0.3305, + "step": 10808 + }, + { + "epoch": 0.21638015164026725, + "grad_norm": 1.15718674659729, + "learning_rate": 9.116406186566443e-06, + "loss": 0.3499, + "step": 10809 + }, + { + "epoch": 0.2164001701573956, + "grad_norm": 1.0446664094924927, + "learning_rate": 9.11622216072012e-06, + "loss": 0.32, + "step": 10810 + }, + { + "epoch": 0.21642018867452392, + "grad_norm": 1.239439606666565, + "learning_rate": 9.116038117570085e-06, + "loss": 0.2958, + "step": 10811 + }, + { + "epoch": 0.21644020719165227, + "grad_norm": 1.118152379989624, + "learning_rate": 9.115854057117113e-06, + "loss": 0.2983, + "step": 10812 + }, + { + "epoch": 0.21646022570878062, + "grad_norm": 1.3076348304748535, + "learning_rate": 9.115669979361976e-06, + "loss": 0.337, + "step": 10813 + }, + { + "epoch": 0.21648024422590897, + "grad_norm": 1.7897619009017944, + "learning_rate": 9.115485884305446e-06, + "loss": 0.8808, + "step": 10814 + }, + { + "epoch": 0.2165002627430373, + "grad_norm": 1.0602978467941284, + "learning_rate": 9.115301771948301e-06, + "loss": 0.3451, + "step": 10815 + }, + { + "epoch": 0.21652028126016565, + "grad_norm": 1.105238914489746, + "learning_rate": 9.115117642291313e-06, + "loss": 0.361, + "step": 10816 + }, + { + "epoch": 0.216540299777294, + "grad_norm": 1.2088208198547363, + "learning_rate": 9.114933495335257e-06, + "loss": 0.3484, + "step": 10817 + }, + { + "epoch": 0.21656031829442235, + "grad_norm": 1.7394616603851318, + "learning_rate": 9.114749331080903e-06, + "loss": 0.8295, + "step": 10818 + }, + { + "epoch": 0.21658033681155067, + "grad_norm": 1.1847978830337524, + "learning_rate": 9.114565149529032e-06, + "loss": 0.3247, + "step": 10819 + }, + { + "epoch": 0.21660035532867902, + "grad_norm": 1.1008120775222778, + "learning_rate": 9.114380950680412e-06, + "loss": 0.3273, + "step": 10820 + }, + { + "epoch": 0.21662037384580737, + "grad_norm": 1.2105203866958618, + "learning_rate": 9.11419673453582e-06, + "loss": 0.3113, + "step": 10821 + }, + { + "epoch": 0.21664039236293572, + "grad_norm": 1.0705389976501465, + "learning_rate": 9.11401250109603e-06, + "loss": 0.317, + "step": 10822 + }, + { + "epoch": 0.21666041088006405, + "grad_norm": 1.1851117610931396, + "learning_rate": 9.11382825036182e-06, + "loss": 0.3145, + "step": 10823 + }, + { + "epoch": 0.2166804293971924, + "grad_norm": 1.1189889907836914, + "learning_rate": 9.113643982333958e-06, + "loss": 0.2998, + "step": 10824 + }, + { + "epoch": 0.21670044791432075, + "grad_norm": 1.1904903650283813, + "learning_rate": 9.113459697013222e-06, + "loss": 0.3168, + "step": 10825 + }, + { + "epoch": 0.2167204664314491, + "grad_norm": 1.089547038078308, + "learning_rate": 9.11327539440039e-06, + "loss": 0.332, + "step": 10826 + }, + { + "epoch": 0.21674048494857742, + "grad_norm": 1.148722767829895, + "learning_rate": 9.11309107449623e-06, + "loss": 0.2858, + "step": 10827 + }, + { + "epoch": 0.21676050346570577, + "grad_norm": 1.0493788719177246, + "learning_rate": 9.112906737301522e-06, + "loss": 0.2992, + "step": 10828 + }, + { + "epoch": 0.21678052198283412, + "grad_norm": 1.9661991596221924, + "learning_rate": 9.112722382817037e-06, + "loss": 0.8995, + "step": 10829 + }, + { + "epoch": 0.21680054049996247, + "grad_norm": 1.2939397096633911, + "learning_rate": 9.112538011043555e-06, + "loss": 0.3151, + "step": 10830 + }, + { + "epoch": 0.2168205590170908, + "grad_norm": 1.9161677360534668, + "learning_rate": 9.112353621981847e-06, + "loss": 0.8302, + "step": 10831 + }, + { + "epoch": 0.21684057753421915, + "grad_norm": 1.0163532495498657, + "learning_rate": 9.112169215632688e-06, + "loss": 0.3283, + "step": 10832 + }, + { + "epoch": 0.2168605960513475, + "grad_norm": 1.1784242391586304, + "learning_rate": 9.111984791996856e-06, + "loss": 0.2971, + "step": 10833 + }, + { + "epoch": 0.21688061456847585, + "grad_norm": 1.0834667682647705, + "learning_rate": 9.111800351075126e-06, + "loss": 0.3581, + "step": 10834 + }, + { + "epoch": 0.21690063308560417, + "grad_norm": 1.1139438152313232, + "learning_rate": 9.11161589286827e-06, + "loss": 0.2816, + "step": 10835 + }, + { + "epoch": 0.21692065160273252, + "grad_norm": 1.1008244752883911, + "learning_rate": 9.111431417377067e-06, + "loss": 0.3438, + "step": 10836 + }, + { + "epoch": 0.21694067011986087, + "grad_norm": 0.9836719036102295, + "learning_rate": 9.111246924602291e-06, + "loss": 0.2904, + "step": 10837 + }, + { + "epoch": 0.21696068863698922, + "grad_norm": 1.1376938819885254, + "learning_rate": 9.111062414544715e-06, + "loss": 0.3192, + "step": 10838 + }, + { + "epoch": 0.21698070715411755, + "grad_norm": 1.3159455060958862, + "learning_rate": 9.110877887205121e-06, + "loss": 0.3187, + "step": 10839 + }, + { + "epoch": 0.2170007256712459, + "grad_norm": 1.2103831768035889, + "learning_rate": 9.11069334258428e-06, + "loss": 0.323, + "step": 10840 + }, + { + "epoch": 0.21702074418837425, + "grad_norm": 1.0333672761917114, + "learning_rate": 9.110508780682968e-06, + "loss": 0.3239, + "step": 10841 + }, + { + "epoch": 0.2170407627055026, + "grad_norm": 1.060171365737915, + "learning_rate": 9.110324201501963e-06, + "loss": 0.3165, + "step": 10842 + }, + { + "epoch": 0.21706078122263092, + "grad_norm": 1.8010128736495972, + "learning_rate": 9.110139605042039e-06, + "loss": 0.7586, + "step": 10843 + }, + { + "epoch": 0.21708079973975927, + "grad_norm": 1.1291065216064453, + "learning_rate": 9.109954991303971e-06, + "loss": 0.3728, + "step": 10844 + }, + { + "epoch": 0.21710081825688762, + "grad_norm": 1.1123576164245605, + "learning_rate": 9.109770360288539e-06, + "loss": 0.3586, + "step": 10845 + }, + { + "epoch": 0.21712083677401597, + "grad_norm": 0.9934029579162598, + "learning_rate": 9.109585711996516e-06, + "loss": 0.3779, + "step": 10846 + }, + { + "epoch": 0.2171408552911443, + "grad_norm": 1.0900671482086182, + "learning_rate": 9.109401046428678e-06, + "loss": 0.3131, + "step": 10847 + }, + { + "epoch": 0.21716087380827265, + "grad_norm": 1.0561467409133911, + "learning_rate": 9.109216363585804e-06, + "loss": 0.3364, + "step": 10848 + }, + { + "epoch": 0.217180892325401, + "grad_norm": 1.107898473739624, + "learning_rate": 9.109031663468668e-06, + "loss": 0.306, + "step": 10849 + }, + { + "epoch": 0.21720091084252935, + "grad_norm": 1.0172511339187622, + "learning_rate": 9.108846946078047e-06, + "loss": 0.3011, + "step": 10850 + }, + { + "epoch": 0.21722092935965767, + "grad_norm": 0.9803413152694702, + "learning_rate": 9.108662211414718e-06, + "loss": 0.2855, + "step": 10851 + }, + { + "epoch": 0.21724094787678602, + "grad_norm": 1.0645772218704224, + "learning_rate": 9.108477459479456e-06, + "loss": 0.3231, + "step": 10852 + }, + { + "epoch": 0.21726096639391437, + "grad_norm": 1.0622891187667847, + "learning_rate": 9.108292690273041e-06, + "loss": 0.3049, + "step": 10853 + }, + { + "epoch": 0.21728098491104272, + "grad_norm": 1.0957343578338623, + "learning_rate": 9.108107903796246e-06, + "loss": 0.3651, + "step": 10854 + }, + { + "epoch": 0.21730100342817105, + "grad_norm": 1.022043228149414, + "learning_rate": 9.107923100049852e-06, + "loss": 0.3149, + "step": 10855 + }, + { + "epoch": 0.2173210219452994, + "grad_norm": 1.0378153324127197, + "learning_rate": 9.10773827903463e-06, + "loss": 0.2879, + "step": 10856 + }, + { + "epoch": 0.21734104046242775, + "grad_norm": 1.059095025062561, + "learning_rate": 9.107553440751362e-06, + "loss": 0.3313, + "step": 10857 + }, + { + "epoch": 0.2173610589795561, + "grad_norm": 1.1606066226959229, + "learning_rate": 9.107368585200822e-06, + "loss": 0.3401, + "step": 10858 + }, + { + "epoch": 0.21738107749668442, + "grad_norm": 1.2893507480621338, + "learning_rate": 9.10718371238379e-06, + "loss": 0.3559, + "step": 10859 + }, + { + "epoch": 0.21740109601381277, + "grad_norm": 0.9254217147827148, + "learning_rate": 9.106998822301041e-06, + "loss": 0.2909, + "step": 10860 + }, + { + "epoch": 0.21742111453094112, + "grad_norm": 1.2001844644546509, + "learning_rate": 9.106813914953354e-06, + "loss": 0.2968, + "step": 10861 + }, + { + "epoch": 0.21744113304806947, + "grad_norm": 1.1848708391189575, + "learning_rate": 9.106628990341503e-06, + "loss": 0.3441, + "step": 10862 + }, + { + "epoch": 0.2174611515651978, + "grad_norm": 1.07466721534729, + "learning_rate": 9.106444048466268e-06, + "loss": 0.3192, + "step": 10863 + }, + { + "epoch": 0.21748117008232615, + "grad_norm": 1.0184837579727173, + "learning_rate": 9.106259089328426e-06, + "loss": 0.2785, + "step": 10864 + }, + { + "epoch": 0.2175011885994545, + "grad_norm": 1.0159516334533691, + "learning_rate": 9.106074112928755e-06, + "loss": 0.3073, + "step": 10865 + }, + { + "epoch": 0.21752120711658285, + "grad_norm": 1.0117793083190918, + "learning_rate": 9.105889119268033e-06, + "loss": 0.3308, + "step": 10866 + }, + { + "epoch": 0.21754122563371117, + "grad_norm": 1.2019569873809814, + "learning_rate": 9.105704108347036e-06, + "loss": 0.3102, + "step": 10867 + }, + { + "epoch": 0.21756124415083952, + "grad_norm": 1.1512678861618042, + "learning_rate": 9.10551908016654e-06, + "loss": 0.2854, + "step": 10868 + }, + { + "epoch": 0.21758126266796787, + "grad_norm": 1.1033746004104614, + "learning_rate": 9.105334034727328e-06, + "loss": 0.3128, + "step": 10869 + }, + { + "epoch": 0.21760128118509622, + "grad_norm": 1.1154102087020874, + "learning_rate": 9.105148972030175e-06, + "loss": 0.3426, + "step": 10870 + }, + { + "epoch": 0.21762129970222455, + "grad_norm": 1.0206245183944702, + "learning_rate": 9.104963892075858e-06, + "loss": 0.2938, + "step": 10871 + }, + { + "epoch": 0.2176413182193529, + "grad_norm": 1.0568134784698486, + "learning_rate": 9.104778794865156e-06, + "loss": 0.3617, + "step": 10872 + }, + { + "epoch": 0.21766133673648125, + "grad_norm": 1.0869332551956177, + "learning_rate": 9.10459368039885e-06, + "loss": 0.3285, + "step": 10873 + }, + { + "epoch": 0.2176813552536096, + "grad_norm": 1.1532161235809326, + "learning_rate": 9.104408548677712e-06, + "loss": 0.3516, + "step": 10874 + }, + { + "epoch": 0.21770137377073792, + "grad_norm": 1.0697290897369385, + "learning_rate": 9.104223399702526e-06, + "loss": 0.2882, + "step": 10875 + }, + { + "epoch": 0.21772139228786627, + "grad_norm": 1.9336971044540405, + "learning_rate": 9.104038233474068e-06, + "loss": 0.8303, + "step": 10876 + }, + { + "epoch": 0.21774141080499462, + "grad_norm": 1.1269522905349731, + "learning_rate": 9.103853049993117e-06, + "loss": 0.3044, + "step": 10877 + }, + { + "epoch": 0.21776142932212297, + "grad_norm": 1.0440502166748047, + "learning_rate": 9.10366784926045e-06, + "loss": 0.3271, + "step": 10878 + }, + { + "epoch": 0.2177814478392513, + "grad_norm": 1.1406863927841187, + "learning_rate": 9.103482631276845e-06, + "loss": 0.3337, + "step": 10879 + }, + { + "epoch": 0.21780146635637965, + "grad_norm": 0.9615980386734009, + "learning_rate": 9.103297396043082e-06, + "loss": 0.2983, + "step": 10880 + }, + { + "epoch": 0.217821484873508, + "grad_norm": 1.1301755905151367, + "learning_rate": 9.103112143559942e-06, + "loss": 0.32, + "step": 10881 + }, + { + "epoch": 0.21784150339063635, + "grad_norm": 1.2157691717147827, + "learning_rate": 9.102926873828201e-06, + "loss": 0.3242, + "step": 10882 + }, + { + "epoch": 0.21786152190776467, + "grad_norm": 1.1699484586715698, + "learning_rate": 9.102741586848638e-06, + "loss": 0.3039, + "step": 10883 + }, + { + "epoch": 0.21788154042489302, + "grad_norm": 1.0648841857910156, + "learning_rate": 9.102556282622033e-06, + "loss": 0.2817, + "step": 10884 + }, + { + "epoch": 0.21790155894202137, + "grad_norm": 1.0850542783737183, + "learning_rate": 9.102370961149164e-06, + "loss": 0.3308, + "step": 10885 + }, + { + "epoch": 0.21792157745914972, + "grad_norm": 1.1060028076171875, + "learning_rate": 9.10218562243081e-06, + "loss": 0.3459, + "step": 10886 + }, + { + "epoch": 0.21794159597627805, + "grad_norm": 1.093893051147461, + "learning_rate": 9.10200026646775e-06, + "loss": 0.365, + "step": 10887 + }, + { + "epoch": 0.2179616144934064, + "grad_norm": 1.1165003776550293, + "learning_rate": 9.101814893260765e-06, + "loss": 0.3249, + "step": 10888 + }, + { + "epoch": 0.21798163301053475, + "grad_norm": 1.3000894784927368, + "learning_rate": 9.101629502810632e-06, + "loss": 0.3315, + "step": 10889 + }, + { + "epoch": 0.2180016515276631, + "grad_norm": 1.2021156549453735, + "learning_rate": 9.10144409511813e-06, + "loss": 0.331, + "step": 10890 + }, + { + "epoch": 0.21802167004479142, + "grad_norm": 1.8721216917037964, + "learning_rate": 9.10125867018404e-06, + "loss": 0.7458, + "step": 10891 + }, + { + "epoch": 0.21804168856191977, + "grad_norm": 1.120368480682373, + "learning_rate": 9.101073228009144e-06, + "loss": 0.3085, + "step": 10892 + }, + { + "epoch": 0.21806170707904812, + "grad_norm": 1.0989794731140137, + "learning_rate": 9.100887768594216e-06, + "loss": 0.3047, + "step": 10893 + }, + { + "epoch": 0.21808172559617647, + "grad_norm": 1.0312864780426025, + "learning_rate": 9.10070229194004e-06, + "loss": 0.3171, + "step": 10894 + }, + { + "epoch": 0.2181017441133048, + "grad_norm": 1.1271371841430664, + "learning_rate": 9.100516798047393e-06, + "loss": 0.3066, + "step": 10895 + }, + { + "epoch": 0.21812176263043315, + "grad_norm": 1.1143749952316284, + "learning_rate": 9.100331286917056e-06, + "loss": 0.3414, + "step": 10896 + }, + { + "epoch": 0.2181417811475615, + "grad_norm": 1.0097547769546509, + "learning_rate": 9.100145758549808e-06, + "loss": 0.3008, + "step": 10897 + }, + { + "epoch": 0.21816179966468985, + "grad_norm": 1.2105507850646973, + "learning_rate": 9.09996021294643e-06, + "loss": 0.3769, + "step": 10898 + }, + { + "epoch": 0.21818181818181817, + "grad_norm": 1.0881450176239014, + "learning_rate": 9.099774650107702e-06, + "loss": 0.3143, + "step": 10899 + }, + { + "epoch": 0.21820183669894652, + "grad_norm": 1.1913270950317383, + "learning_rate": 9.099589070034405e-06, + "loss": 0.3045, + "step": 10900 + }, + { + "epoch": 0.21822185521607487, + "grad_norm": 1.1391754150390625, + "learning_rate": 9.099403472727315e-06, + "loss": 0.3106, + "step": 10901 + }, + { + "epoch": 0.21824187373320322, + "grad_norm": 1.0773835182189941, + "learning_rate": 9.099217858187217e-06, + "loss": 0.3383, + "step": 10902 + }, + { + "epoch": 0.21826189225033155, + "grad_norm": 1.2221271991729736, + "learning_rate": 9.09903222641489e-06, + "loss": 0.325, + "step": 10903 + }, + { + "epoch": 0.2182819107674599, + "grad_norm": 1.0457628965377808, + "learning_rate": 9.09884657741111e-06, + "loss": 0.3566, + "step": 10904 + }, + { + "epoch": 0.21830192928458825, + "grad_norm": 1.1718575954437256, + "learning_rate": 9.098660911176664e-06, + "loss": 0.3661, + "step": 10905 + }, + { + "epoch": 0.2183219478017166, + "grad_norm": 1.284532070159912, + "learning_rate": 9.098475227712329e-06, + "loss": 0.2845, + "step": 10906 + }, + { + "epoch": 0.21834196631884492, + "grad_norm": 1.0864670276641846, + "learning_rate": 9.098289527018886e-06, + "loss": 0.3207, + "step": 10907 + }, + { + "epoch": 0.21836198483597327, + "grad_norm": 1.0340100526809692, + "learning_rate": 9.098103809097116e-06, + "loss": 0.2888, + "step": 10908 + }, + { + "epoch": 0.21838200335310162, + "grad_norm": 1.003647804260254, + "learning_rate": 9.0979180739478e-06, + "loss": 0.3311, + "step": 10909 + }, + { + "epoch": 0.21840202187022997, + "grad_norm": 0.9285929203033447, + "learning_rate": 9.097732321571717e-06, + "loss": 0.3026, + "step": 10910 + }, + { + "epoch": 0.2184220403873583, + "grad_norm": 1.214970350265503, + "learning_rate": 9.09754655196965e-06, + "loss": 0.3407, + "step": 10911 + }, + { + "epoch": 0.21844205890448665, + "grad_norm": 1.0719660520553589, + "learning_rate": 9.097360765142377e-06, + "loss": 0.3502, + "step": 10912 + }, + { + "epoch": 0.218462077421615, + "grad_norm": 1.1715083122253418, + "learning_rate": 9.097174961090684e-06, + "loss": 0.3497, + "step": 10913 + }, + { + "epoch": 0.21848209593874335, + "grad_norm": 1.5466614961624146, + "learning_rate": 9.096989139815349e-06, + "loss": 0.3336, + "step": 10914 + }, + { + "epoch": 0.21850211445587167, + "grad_norm": 1.1550322771072388, + "learning_rate": 9.096803301317152e-06, + "loss": 0.3179, + "step": 10915 + }, + { + "epoch": 0.21852213297300002, + "grad_norm": 2.1688661575317383, + "learning_rate": 9.096617445596876e-06, + "loss": 0.8766, + "step": 10916 + }, + { + "epoch": 0.21854215149012837, + "grad_norm": 1.0810730457305908, + "learning_rate": 9.096431572655302e-06, + "loss": 0.3011, + "step": 10917 + }, + { + "epoch": 0.21856217000725672, + "grad_norm": 1.205833077430725, + "learning_rate": 9.096245682493212e-06, + "loss": 0.3387, + "step": 10918 + }, + { + "epoch": 0.21858218852438505, + "grad_norm": 1.088600754737854, + "learning_rate": 9.096059775111385e-06, + "loss": 0.3105, + "step": 10919 + }, + { + "epoch": 0.2186022070415134, + "grad_norm": 1.0161488056182861, + "learning_rate": 9.095873850510605e-06, + "loss": 0.3284, + "step": 10920 + }, + { + "epoch": 0.21862222555864175, + "grad_norm": 0.983819842338562, + "learning_rate": 9.095687908691653e-06, + "loss": 0.2812, + "step": 10921 + }, + { + "epoch": 0.2186422440757701, + "grad_norm": 1.686140537261963, + "learning_rate": 9.09550194965531e-06, + "loss": 0.8623, + "step": 10922 + }, + { + "epoch": 0.21866226259289842, + "grad_norm": 1.0935332775115967, + "learning_rate": 9.095315973402359e-06, + "loss": 0.37, + "step": 10923 + }, + { + "epoch": 0.21868228111002677, + "grad_norm": 0.9784026741981506, + "learning_rate": 9.09512997993358e-06, + "loss": 0.3054, + "step": 10924 + }, + { + "epoch": 0.21870229962715512, + "grad_norm": 1.0611704587936401, + "learning_rate": 9.094943969249757e-06, + "loss": 0.3122, + "step": 10925 + }, + { + "epoch": 0.21872231814428347, + "grad_norm": 1.1900455951690674, + "learning_rate": 9.094757941351669e-06, + "loss": 0.3435, + "step": 10926 + }, + { + "epoch": 0.2187423366614118, + "grad_norm": 1.0291460752487183, + "learning_rate": 9.094571896240101e-06, + "loss": 0.2906, + "step": 10927 + }, + { + "epoch": 0.21876235517854015, + "grad_norm": 1.1562687158584595, + "learning_rate": 9.094385833915833e-06, + "loss": 0.3492, + "step": 10928 + }, + { + "epoch": 0.2187823736956685, + "grad_norm": 1.0767797231674194, + "learning_rate": 9.09419975437965e-06, + "loss": 0.3851, + "step": 10929 + }, + { + "epoch": 0.21880239221279685, + "grad_norm": 1.1725863218307495, + "learning_rate": 9.09401365763233e-06, + "loss": 0.3568, + "step": 10930 + }, + { + "epoch": 0.21882241072992517, + "grad_norm": 1.1411372423171997, + "learning_rate": 9.09382754367466e-06, + "loss": 0.3366, + "step": 10931 + }, + { + "epoch": 0.21884242924705352, + "grad_norm": 1.1158368587493896, + "learning_rate": 9.093641412507418e-06, + "loss": 0.3163, + "step": 10932 + }, + { + "epoch": 0.21886244776418187, + "grad_norm": 1.1321383714675903, + "learning_rate": 9.09345526413139e-06, + "loss": 0.3227, + "step": 10933 + }, + { + "epoch": 0.21888246628131022, + "grad_norm": 1.24186372756958, + "learning_rate": 9.093269098547355e-06, + "loss": 0.3226, + "step": 10934 + }, + { + "epoch": 0.21890248479843855, + "grad_norm": 1.090515375137329, + "learning_rate": 9.0930829157561e-06, + "loss": 0.3486, + "step": 10935 + }, + { + "epoch": 0.2189225033155669, + "grad_norm": 1.0362533330917358, + "learning_rate": 9.092896715758403e-06, + "loss": 0.3315, + "step": 10936 + }, + { + "epoch": 0.21894252183269525, + "grad_norm": 1.7704278230667114, + "learning_rate": 9.09271049855505e-06, + "loss": 0.846, + "step": 10937 + }, + { + "epoch": 0.2189625403498236, + "grad_norm": 1.088600516319275, + "learning_rate": 9.092524264146822e-06, + "loss": 0.3198, + "step": 10938 + }, + { + "epoch": 0.21898255886695192, + "grad_norm": 1.0892200469970703, + "learning_rate": 9.092338012534505e-06, + "loss": 0.332, + "step": 10939 + }, + { + "epoch": 0.21900257738408027, + "grad_norm": 1.1070526838302612, + "learning_rate": 9.092151743718878e-06, + "loss": 0.3413, + "step": 10940 + }, + { + "epoch": 0.21902259590120862, + "grad_norm": 1.801144003868103, + "learning_rate": 9.091965457700727e-06, + "loss": 0.8497, + "step": 10941 + }, + { + "epoch": 0.21904261441833697, + "grad_norm": 1.1662195920944214, + "learning_rate": 9.091779154480831e-06, + "loss": 0.3175, + "step": 10942 + }, + { + "epoch": 0.2190626329354653, + "grad_norm": 1.0558431148529053, + "learning_rate": 9.09159283405998e-06, + "loss": 0.3382, + "step": 10943 + }, + { + "epoch": 0.21908265145259365, + "grad_norm": 1.1138232946395874, + "learning_rate": 9.09140649643895e-06, + "loss": 0.3264, + "step": 10944 + }, + { + "epoch": 0.219102669969722, + "grad_norm": 1.0647950172424316, + "learning_rate": 9.091220141618528e-06, + "loss": 0.3162, + "step": 10945 + }, + { + "epoch": 0.21912268848685035, + "grad_norm": 2.03226375579834, + "learning_rate": 9.091033769599498e-06, + "loss": 0.8256, + "step": 10946 + }, + { + "epoch": 0.21914270700397867, + "grad_norm": 1.1169995069503784, + "learning_rate": 9.090847380382642e-06, + "loss": 0.3483, + "step": 10947 + }, + { + "epoch": 0.21916272552110702, + "grad_norm": 1.0400128364562988, + "learning_rate": 9.090660973968744e-06, + "loss": 0.2973, + "step": 10948 + }, + { + "epoch": 0.21918274403823537, + "grad_norm": 1.0337885618209839, + "learning_rate": 9.090474550358588e-06, + "loss": 0.331, + "step": 10949 + }, + { + "epoch": 0.21920276255536372, + "grad_norm": 1.104859471321106, + "learning_rate": 9.090288109552957e-06, + "loss": 0.3733, + "step": 10950 + }, + { + "epoch": 0.21922278107249205, + "grad_norm": 1.0453420877456665, + "learning_rate": 9.090101651552635e-06, + "loss": 0.3663, + "step": 10951 + }, + { + "epoch": 0.2192427995896204, + "grad_norm": 1.0782870054244995, + "learning_rate": 9.089915176358404e-06, + "loss": 0.3147, + "step": 10952 + }, + { + "epoch": 0.21926281810674875, + "grad_norm": 1.229343056678772, + "learning_rate": 9.089728683971053e-06, + "loss": 0.3783, + "step": 10953 + }, + { + "epoch": 0.2192828366238771, + "grad_norm": 1.2062225341796875, + "learning_rate": 9.089542174391361e-06, + "loss": 0.3354, + "step": 10954 + }, + { + "epoch": 0.21930285514100542, + "grad_norm": 1.1149815320968628, + "learning_rate": 9.089355647620114e-06, + "loss": 0.3445, + "step": 10955 + }, + { + "epoch": 0.21932287365813377, + "grad_norm": 1.0432606935501099, + "learning_rate": 9.089169103658096e-06, + "loss": 0.2939, + "step": 10956 + }, + { + "epoch": 0.21934289217526212, + "grad_norm": 1.1213319301605225, + "learning_rate": 9.088982542506091e-06, + "loss": 0.3171, + "step": 10957 + }, + { + "epoch": 0.21936291069239047, + "grad_norm": 1.2591111660003662, + "learning_rate": 9.088795964164883e-06, + "loss": 0.323, + "step": 10958 + }, + { + "epoch": 0.2193829292095188, + "grad_norm": 1.131608247756958, + "learning_rate": 9.088609368635257e-06, + "loss": 0.3525, + "step": 10959 + }, + { + "epoch": 0.21940294772664715, + "grad_norm": 1.2945607900619507, + "learning_rate": 9.088422755917997e-06, + "loss": 0.4012, + "step": 10960 + }, + { + "epoch": 0.2194229662437755, + "grad_norm": 1.073706865310669, + "learning_rate": 9.088236126013887e-06, + "loss": 0.3441, + "step": 10961 + }, + { + "epoch": 0.21944298476090385, + "grad_norm": 1.0493203401565552, + "learning_rate": 9.088049478923713e-06, + "loss": 0.3053, + "step": 10962 + }, + { + "epoch": 0.21946300327803217, + "grad_norm": 1.0742923021316528, + "learning_rate": 9.087862814648258e-06, + "loss": 0.3545, + "step": 10963 + }, + { + "epoch": 0.21948302179516052, + "grad_norm": 1.9335976839065552, + "learning_rate": 9.08767613318831e-06, + "loss": 0.8109, + "step": 10964 + }, + { + "epoch": 0.21950304031228887, + "grad_norm": 1.0970675945281982, + "learning_rate": 9.087489434544648e-06, + "loss": 0.346, + "step": 10965 + }, + { + "epoch": 0.21952305882941722, + "grad_norm": 1.1107240915298462, + "learning_rate": 9.087302718718061e-06, + "loss": 0.354, + "step": 10966 + }, + { + "epoch": 0.21954307734654555, + "grad_norm": 1.1440484523773193, + "learning_rate": 9.087115985709335e-06, + "loss": 0.2956, + "step": 10967 + }, + { + "epoch": 0.2195630958636739, + "grad_norm": 1.1081881523132324, + "learning_rate": 9.086929235519251e-06, + "loss": 0.3112, + "step": 10968 + }, + { + "epoch": 0.21958311438080225, + "grad_norm": 1.1900756359100342, + "learning_rate": 9.086742468148595e-06, + "loss": 0.3299, + "step": 10969 + }, + { + "epoch": 0.2196031328979306, + "grad_norm": 1.08937668800354, + "learning_rate": 9.086555683598154e-06, + "loss": 0.3533, + "step": 10970 + }, + { + "epoch": 0.21962315141505892, + "grad_norm": 1.310442566871643, + "learning_rate": 9.086368881868713e-06, + "loss": 0.3137, + "step": 10971 + }, + { + "epoch": 0.21964316993218727, + "grad_norm": 1.8668326139450073, + "learning_rate": 9.086182062961057e-06, + "loss": 0.8427, + "step": 10972 + }, + { + "epoch": 0.21966318844931562, + "grad_norm": 0.9814786314964294, + "learning_rate": 9.08599522687597e-06, + "loss": 0.2991, + "step": 10973 + }, + { + "epoch": 0.21968320696644397, + "grad_norm": 1.0871150493621826, + "learning_rate": 9.08580837361424e-06, + "loss": 0.2945, + "step": 10974 + }, + { + "epoch": 0.2197032254835723, + "grad_norm": 1.1472578048706055, + "learning_rate": 9.085621503176648e-06, + "loss": 0.3142, + "step": 10975 + }, + { + "epoch": 0.21972324400070065, + "grad_norm": 1.1451969146728516, + "learning_rate": 9.085434615563984e-06, + "loss": 0.3552, + "step": 10976 + }, + { + "epoch": 0.219743262517829, + "grad_norm": 1.0723575353622437, + "learning_rate": 9.085247710777033e-06, + "loss": 0.3088, + "step": 10977 + }, + { + "epoch": 0.21976328103495735, + "grad_norm": 1.2813302278518677, + "learning_rate": 9.085060788816577e-06, + "loss": 0.3464, + "step": 10978 + }, + { + "epoch": 0.21978329955208567, + "grad_norm": 1.1298465728759766, + "learning_rate": 9.084873849683408e-06, + "loss": 0.3366, + "step": 10979 + }, + { + "epoch": 0.21980331806921402, + "grad_norm": 1.168120265007019, + "learning_rate": 9.084686893378305e-06, + "loss": 0.3232, + "step": 10980 + }, + { + "epoch": 0.21982333658634237, + "grad_norm": 1.207044243812561, + "learning_rate": 9.08449991990206e-06, + "loss": 0.3541, + "step": 10981 + }, + { + "epoch": 0.21984335510347072, + "grad_norm": 1.1744433641433716, + "learning_rate": 9.084312929255455e-06, + "loss": 0.3118, + "step": 10982 + }, + { + "epoch": 0.21986337362059905, + "grad_norm": 1.117355227470398, + "learning_rate": 9.08412592143928e-06, + "loss": 0.342, + "step": 10983 + }, + { + "epoch": 0.2198833921377274, + "grad_norm": 1.2258689403533936, + "learning_rate": 9.083938896454315e-06, + "loss": 0.3721, + "step": 10984 + }, + { + "epoch": 0.21990341065485575, + "grad_norm": 1.174758791923523, + "learning_rate": 9.08375185430135e-06, + "loss": 0.297, + "step": 10985 + }, + { + "epoch": 0.2199234291719841, + "grad_norm": 1.043089509010315, + "learning_rate": 9.083564794981174e-06, + "loss": 0.3444, + "step": 10986 + }, + { + "epoch": 0.21994344768911242, + "grad_norm": 2.009030342102051, + "learning_rate": 9.083377718494568e-06, + "loss": 0.825, + "step": 10987 + }, + { + "epoch": 0.21996346620624077, + "grad_norm": 1.121885895729065, + "learning_rate": 9.083190624842321e-06, + "loss": 0.3981, + "step": 10988 + }, + { + "epoch": 0.21998348472336912, + "grad_norm": 1.8520073890686035, + "learning_rate": 9.08300351402522e-06, + "loss": 0.8567, + "step": 10989 + }, + { + "epoch": 0.22000350324049747, + "grad_norm": 1.9645076990127563, + "learning_rate": 9.08281638604405e-06, + "loss": 0.9024, + "step": 10990 + }, + { + "epoch": 0.2200235217576258, + "grad_norm": 1.2158408164978027, + "learning_rate": 9.0826292408996e-06, + "loss": 0.3524, + "step": 10991 + }, + { + "epoch": 0.22004354027475415, + "grad_norm": 1.186946153640747, + "learning_rate": 9.082442078592655e-06, + "loss": 0.371, + "step": 10992 + }, + { + "epoch": 0.2200635587918825, + "grad_norm": 1.0300073623657227, + "learning_rate": 9.082254899124002e-06, + "loss": 0.2884, + "step": 10993 + }, + { + "epoch": 0.22008357730901085, + "grad_norm": 1.0445308685302734, + "learning_rate": 9.082067702494426e-06, + "loss": 0.3231, + "step": 10994 + }, + { + "epoch": 0.22010359582613917, + "grad_norm": 1.0958926677703857, + "learning_rate": 9.081880488704718e-06, + "loss": 0.3293, + "step": 10995 + }, + { + "epoch": 0.22012361434326752, + "grad_norm": 1.0467838048934937, + "learning_rate": 9.081693257755662e-06, + "loss": 0.3153, + "step": 10996 + }, + { + "epoch": 0.22014363286039587, + "grad_norm": 1.1248114109039307, + "learning_rate": 9.081506009648045e-06, + "loss": 0.3727, + "step": 10997 + }, + { + "epoch": 0.22016365137752422, + "grad_norm": 0.9979984164237976, + "learning_rate": 9.081318744382657e-06, + "loss": 0.2982, + "step": 10998 + }, + { + "epoch": 0.22018366989465255, + "grad_norm": 1.0605117082595825, + "learning_rate": 9.081131461960282e-06, + "loss": 0.3598, + "step": 10999 + }, + { + "epoch": 0.2202036884117809, + "grad_norm": 1.0586336851119995, + "learning_rate": 9.080944162381708e-06, + "loss": 0.3727, + "step": 11000 + }, + { + "epoch": 0.22022370692890925, + "grad_norm": 1.0564547777175903, + "learning_rate": 9.080756845647724e-06, + "loss": 0.3472, + "step": 11001 + }, + { + "epoch": 0.2202437254460376, + "grad_norm": 1.039544701576233, + "learning_rate": 9.080569511759117e-06, + "loss": 0.2894, + "step": 11002 + }, + { + "epoch": 0.22026374396316592, + "grad_norm": 1.0818405151367188, + "learning_rate": 9.080382160716675e-06, + "loss": 0.3477, + "step": 11003 + }, + { + "epoch": 0.22028376248029427, + "grad_norm": 1.1809881925582886, + "learning_rate": 9.080194792521182e-06, + "loss": 0.3505, + "step": 11004 + }, + { + "epoch": 0.22030378099742262, + "grad_norm": 1.0056679248809814, + "learning_rate": 9.080007407173427e-06, + "loss": 0.2949, + "step": 11005 + }, + { + "epoch": 0.22032379951455097, + "grad_norm": 1.2012484073638916, + "learning_rate": 9.079820004674203e-06, + "loss": 0.3693, + "step": 11006 + }, + { + "epoch": 0.2203438180316793, + "grad_norm": 1.0952982902526855, + "learning_rate": 9.079632585024292e-06, + "loss": 0.3555, + "step": 11007 + }, + { + "epoch": 0.22036383654880765, + "grad_norm": 1.027313232421875, + "learning_rate": 9.079445148224482e-06, + "loss": 0.298, + "step": 11008 + }, + { + "epoch": 0.220383855065936, + "grad_norm": 1.1522948741912842, + "learning_rate": 9.079257694275563e-06, + "loss": 0.3229, + "step": 11009 + }, + { + "epoch": 0.22040387358306435, + "grad_norm": 1.0466880798339844, + "learning_rate": 9.079070223178323e-06, + "loss": 0.3384, + "step": 11010 + }, + { + "epoch": 0.22042389210019267, + "grad_norm": 1.4029738903045654, + "learning_rate": 9.07888273493355e-06, + "loss": 0.341, + "step": 11011 + }, + { + "epoch": 0.22044391061732102, + "grad_norm": 1.1190824508666992, + "learning_rate": 9.078695229542032e-06, + "loss": 0.3203, + "step": 11012 + }, + { + "epoch": 0.22046392913444937, + "grad_norm": 1.0963505506515503, + "learning_rate": 9.078507707004558e-06, + "loss": 0.2838, + "step": 11013 + }, + { + "epoch": 0.22048394765157772, + "grad_norm": 1.1641874313354492, + "learning_rate": 9.078320167321913e-06, + "loss": 0.3436, + "step": 11014 + }, + { + "epoch": 0.22050396616870604, + "grad_norm": 1.9497936964035034, + "learning_rate": 9.078132610494888e-06, + "loss": 0.8111, + "step": 11015 + }, + { + "epoch": 0.2205239846858344, + "grad_norm": 1.1348745822906494, + "learning_rate": 9.077945036524272e-06, + "loss": 0.3213, + "step": 11016 + }, + { + "epoch": 0.22054400320296275, + "grad_norm": 1.1938644647598267, + "learning_rate": 9.077757445410853e-06, + "loss": 0.3601, + "step": 11017 + }, + { + "epoch": 0.2205640217200911, + "grad_norm": 1.1842845678329468, + "learning_rate": 9.077569837155418e-06, + "loss": 0.3899, + "step": 11018 + }, + { + "epoch": 0.22058404023721942, + "grad_norm": 1.048963189125061, + "learning_rate": 9.077382211758757e-06, + "loss": 0.3552, + "step": 11019 + }, + { + "epoch": 0.22060405875434777, + "grad_norm": 1.202757716178894, + "learning_rate": 9.077194569221659e-06, + "loss": 0.3395, + "step": 11020 + }, + { + "epoch": 0.22062407727147612, + "grad_norm": 1.0637495517730713, + "learning_rate": 9.077006909544912e-06, + "loss": 0.2984, + "step": 11021 + }, + { + "epoch": 0.22064409578860447, + "grad_norm": 1.0619527101516724, + "learning_rate": 9.076819232729305e-06, + "loss": 0.3569, + "step": 11022 + }, + { + "epoch": 0.2206641143057328, + "grad_norm": 1.1552358865737915, + "learning_rate": 9.076631538775628e-06, + "loss": 0.3364, + "step": 11023 + }, + { + "epoch": 0.22068413282286115, + "grad_norm": 1.251786470413208, + "learning_rate": 9.076443827684671e-06, + "loss": 0.3363, + "step": 11024 + }, + { + "epoch": 0.2207041513399895, + "grad_norm": 1.1748411655426025, + "learning_rate": 9.07625609945722e-06, + "loss": 0.3819, + "step": 11025 + }, + { + "epoch": 0.22072416985711785, + "grad_norm": 1.0423948764801025, + "learning_rate": 9.076068354094065e-06, + "loss": 0.3245, + "step": 11026 + }, + { + "epoch": 0.22074418837424617, + "grad_norm": 1.0114784240722656, + "learning_rate": 9.075880591595997e-06, + "loss": 0.3417, + "step": 11027 + }, + { + "epoch": 0.22076420689137452, + "grad_norm": 1.1225765943527222, + "learning_rate": 9.075692811963803e-06, + "loss": 0.3052, + "step": 11028 + }, + { + "epoch": 0.22078422540850287, + "grad_norm": 1.268830418586731, + "learning_rate": 9.075505015198273e-06, + "loss": 0.3453, + "step": 11029 + }, + { + "epoch": 0.22080424392563122, + "grad_norm": 1.8412610292434692, + "learning_rate": 9.0753172013002e-06, + "loss": 0.8381, + "step": 11030 + }, + { + "epoch": 0.22082426244275954, + "grad_norm": 1.9111363887786865, + "learning_rate": 9.075129370270368e-06, + "loss": 0.9028, + "step": 11031 + }, + { + "epoch": 0.2208442809598879, + "grad_norm": 1.0560253858566284, + "learning_rate": 9.074941522109568e-06, + "loss": 0.3515, + "step": 11032 + }, + { + "epoch": 0.22086429947701625, + "grad_norm": 1.2430269718170166, + "learning_rate": 9.074753656818593e-06, + "loss": 0.3385, + "step": 11033 + }, + { + "epoch": 0.2208843179941446, + "grad_norm": 1.0443793535232544, + "learning_rate": 9.07456577439823e-06, + "loss": 0.33, + "step": 11034 + }, + { + "epoch": 0.22090433651127292, + "grad_norm": 2.3539838790893555, + "learning_rate": 9.07437787484927e-06, + "loss": 0.8019, + "step": 11035 + }, + { + "epoch": 0.22092435502840127, + "grad_norm": 1.0522743463516235, + "learning_rate": 9.0741899581725e-06, + "loss": 0.3526, + "step": 11036 + }, + { + "epoch": 0.22094437354552962, + "grad_norm": 1.059677243232727, + "learning_rate": 9.074002024368715e-06, + "loss": 0.3206, + "step": 11037 + }, + { + "epoch": 0.22096439206265797, + "grad_norm": 1.9636274576187134, + "learning_rate": 9.073814073438701e-06, + "loss": 0.8651, + "step": 11038 + }, + { + "epoch": 0.2209844105797863, + "grad_norm": 1.0834810733795166, + "learning_rate": 9.073626105383249e-06, + "loss": 0.3028, + "step": 11039 + }, + { + "epoch": 0.22100442909691465, + "grad_norm": 1.0153627395629883, + "learning_rate": 9.073438120203148e-06, + "loss": 0.3109, + "step": 11040 + }, + { + "epoch": 0.221024447614043, + "grad_norm": 1.1637861728668213, + "learning_rate": 9.073250117899193e-06, + "loss": 0.3014, + "step": 11041 + }, + { + "epoch": 0.22104446613117135, + "grad_norm": 1.2631967067718506, + "learning_rate": 9.073062098472168e-06, + "loss": 0.3176, + "step": 11042 + }, + { + "epoch": 0.22106448464829967, + "grad_norm": 1.216373085975647, + "learning_rate": 9.072874061922868e-06, + "loss": 0.3023, + "step": 11043 + }, + { + "epoch": 0.22108450316542802, + "grad_norm": 1.0256139039993286, + "learning_rate": 9.072686008252083e-06, + "loss": 0.2824, + "step": 11044 + }, + { + "epoch": 0.22110452168255637, + "grad_norm": 1.007285237312317, + "learning_rate": 9.0724979374606e-06, + "loss": 0.3125, + "step": 11045 + }, + { + "epoch": 0.22112454019968472, + "grad_norm": 1.1295342445373535, + "learning_rate": 9.072309849549215e-06, + "loss": 0.3209, + "step": 11046 + }, + { + "epoch": 0.22114455871681304, + "grad_norm": 1.0689500570297241, + "learning_rate": 9.072121744518715e-06, + "loss": 0.3715, + "step": 11047 + }, + { + "epoch": 0.2211645772339414, + "grad_norm": 1.3384068012237549, + "learning_rate": 9.071933622369888e-06, + "loss": 0.3903, + "step": 11048 + }, + { + "epoch": 0.22118459575106975, + "grad_norm": 1.177771806716919, + "learning_rate": 9.071745483103532e-06, + "loss": 0.336, + "step": 11049 + }, + { + "epoch": 0.2212046142681981, + "grad_norm": 1.0434706211090088, + "learning_rate": 9.071557326720434e-06, + "loss": 0.3275, + "step": 11050 + }, + { + "epoch": 0.22122463278532642, + "grad_norm": 1.0808252096176147, + "learning_rate": 9.071369153221383e-06, + "loss": 0.3584, + "step": 11051 + }, + { + "epoch": 0.22124465130245477, + "grad_norm": 1.0954235792160034, + "learning_rate": 9.071180962607175e-06, + "loss": 0.3447, + "step": 11052 + }, + { + "epoch": 0.22126466981958312, + "grad_norm": 1.0721518993377686, + "learning_rate": 9.070992754878597e-06, + "loss": 0.3372, + "step": 11053 + }, + { + "epoch": 0.22128468833671147, + "grad_norm": 1.0150389671325684, + "learning_rate": 9.07080453003644e-06, + "loss": 0.3182, + "step": 11054 + }, + { + "epoch": 0.2213047068538398, + "grad_norm": 1.244419813156128, + "learning_rate": 9.070616288081499e-06, + "loss": 0.3375, + "step": 11055 + }, + { + "epoch": 0.22132472537096815, + "grad_norm": 1.0442944765090942, + "learning_rate": 9.070428029014563e-06, + "loss": 0.3242, + "step": 11056 + }, + { + "epoch": 0.2213447438880965, + "grad_norm": 0.9761172533035278, + "learning_rate": 9.070239752836422e-06, + "loss": 0.3253, + "step": 11057 + }, + { + "epoch": 0.22136476240522485, + "grad_norm": 0.9692269563674927, + "learning_rate": 9.070051459547872e-06, + "loss": 0.3172, + "step": 11058 + }, + { + "epoch": 0.22138478092235317, + "grad_norm": 1.0265554189682007, + "learning_rate": 9.0698631491497e-06, + "loss": 0.3279, + "step": 11059 + }, + { + "epoch": 0.22140479943948152, + "grad_norm": 1.057489275932312, + "learning_rate": 9.069674821642697e-06, + "loss": 0.3161, + "step": 11060 + }, + { + "epoch": 0.22142481795660987, + "grad_norm": 1.0726054906845093, + "learning_rate": 9.06948647702766e-06, + "loss": 0.3019, + "step": 11061 + }, + { + "epoch": 0.22144483647373822, + "grad_norm": 1.067445158958435, + "learning_rate": 9.069298115305376e-06, + "loss": 0.3169, + "step": 11062 + }, + { + "epoch": 0.22146485499086654, + "grad_norm": 1.1239862442016602, + "learning_rate": 9.069109736476639e-06, + "loss": 0.3639, + "step": 11063 + }, + { + "epoch": 0.2214848735079949, + "grad_norm": 1.0495407581329346, + "learning_rate": 9.06892134054224e-06, + "loss": 0.2626, + "step": 11064 + }, + { + "epoch": 0.22150489202512325, + "grad_norm": 1.110830307006836, + "learning_rate": 9.06873292750297e-06, + "loss": 0.3295, + "step": 11065 + }, + { + "epoch": 0.2215249105422516, + "grad_norm": 1.005647897720337, + "learning_rate": 9.068544497359625e-06, + "loss": 0.2886, + "step": 11066 + }, + { + "epoch": 0.22154492905937992, + "grad_norm": 1.2201424837112427, + "learning_rate": 9.068356050112994e-06, + "loss": 0.3655, + "step": 11067 + }, + { + "epoch": 0.22156494757650827, + "grad_norm": 1.1649433374404907, + "learning_rate": 9.068167585763868e-06, + "loss": 0.3676, + "step": 11068 + }, + { + "epoch": 0.22158496609363662, + "grad_norm": 1.2123082876205444, + "learning_rate": 9.067979104313042e-06, + "loss": 0.315, + "step": 11069 + }, + { + "epoch": 0.22160498461076497, + "grad_norm": 1.0169225931167603, + "learning_rate": 9.067790605761307e-06, + "loss": 0.3037, + "step": 11070 + }, + { + "epoch": 0.2216250031278933, + "grad_norm": 1.1458736658096313, + "learning_rate": 9.067602090109457e-06, + "loss": 0.3241, + "step": 11071 + }, + { + "epoch": 0.22164502164502164, + "grad_norm": 1.211708664894104, + "learning_rate": 9.067413557358282e-06, + "loss": 0.3556, + "step": 11072 + }, + { + "epoch": 0.22166504016215, + "grad_norm": 1.2025648355484009, + "learning_rate": 9.067225007508576e-06, + "loss": 0.3308, + "step": 11073 + }, + { + "epoch": 0.22168505867927835, + "grad_norm": 1.0854352712631226, + "learning_rate": 9.06703644056113e-06, + "loss": 0.2871, + "step": 11074 + }, + { + "epoch": 0.22170507719640667, + "grad_norm": 1.2001644372940063, + "learning_rate": 9.06684785651674e-06, + "loss": 0.3081, + "step": 11075 + }, + { + "epoch": 0.22172509571353502, + "grad_norm": 1.1247673034667969, + "learning_rate": 9.066659255376195e-06, + "loss": 0.3315, + "step": 11076 + }, + { + "epoch": 0.22174511423066337, + "grad_norm": 1.1035995483398438, + "learning_rate": 9.06647063714029e-06, + "loss": 0.2804, + "step": 11077 + }, + { + "epoch": 0.22176513274779172, + "grad_norm": 1.8577120304107666, + "learning_rate": 9.06628200180982e-06, + "loss": 0.8665, + "step": 11078 + }, + { + "epoch": 0.22178515126492004, + "grad_norm": 1.0644879341125488, + "learning_rate": 9.066093349385573e-06, + "loss": 0.3202, + "step": 11079 + }, + { + "epoch": 0.2218051697820484, + "grad_norm": 1.065322756767273, + "learning_rate": 9.065904679868346e-06, + "loss": 0.3194, + "step": 11080 + }, + { + "epoch": 0.22182518829917675, + "grad_norm": 1.2096656560897827, + "learning_rate": 9.06571599325893e-06, + "loss": 0.3782, + "step": 11081 + }, + { + "epoch": 0.22184520681630507, + "grad_norm": 1.0119011402130127, + "learning_rate": 9.065527289558119e-06, + "loss": 0.2945, + "step": 11082 + }, + { + "epoch": 0.22186522533343342, + "grad_norm": 1.2137908935546875, + "learning_rate": 9.065338568766707e-06, + "loss": 0.4008, + "step": 11083 + }, + { + "epoch": 0.22188524385056177, + "grad_norm": 1.098631739616394, + "learning_rate": 9.065149830885485e-06, + "loss": 0.3614, + "step": 11084 + }, + { + "epoch": 0.22190526236769012, + "grad_norm": 1.1168683767318726, + "learning_rate": 9.064961075915249e-06, + "loss": 0.3489, + "step": 11085 + }, + { + "epoch": 0.22192528088481844, + "grad_norm": 1.0617729425430298, + "learning_rate": 9.06477230385679e-06, + "loss": 0.3142, + "step": 11086 + }, + { + "epoch": 0.2219452994019468, + "grad_norm": 1.0932449102401733, + "learning_rate": 9.064583514710903e-06, + "loss": 0.3228, + "step": 11087 + }, + { + "epoch": 0.22196531791907514, + "grad_norm": 0.9989539384841919, + "learning_rate": 9.064394708478384e-06, + "loss": 0.3626, + "step": 11088 + }, + { + "epoch": 0.2219853364362035, + "grad_norm": 1.1271553039550781, + "learning_rate": 9.064205885160022e-06, + "loss": 0.3437, + "step": 11089 + }, + { + "epoch": 0.22200535495333182, + "grad_norm": 1.1558088064193726, + "learning_rate": 9.064017044756614e-06, + "loss": 0.3257, + "step": 11090 + }, + { + "epoch": 0.22202537347046017, + "grad_norm": 1.1133641004562378, + "learning_rate": 9.063828187268954e-06, + "loss": 0.3599, + "step": 11091 + }, + { + "epoch": 0.22204539198758852, + "grad_norm": 1.3666144609451294, + "learning_rate": 9.063639312697834e-06, + "loss": 0.3326, + "step": 11092 + }, + { + "epoch": 0.22206541050471687, + "grad_norm": 1.1926414966583252, + "learning_rate": 9.063450421044048e-06, + "loss": 0.3213, + "step": 11093 + }, + { + "epoch": 0.2220854290218452, + "grad_norm": 1.1337169408798218, + "learning_rate": 9.063261512308391e-06, + "loss": 0.3687, + "step": 11094 + }, + { + "epoch": 0.22210544753897354, + "grad_norm": 1.1462966203689575, + "learning_rate": 9.063072586491659e-06, + "loss": 0.3371, + "step": 11095 + }, + { + "epoch": 0.2221254660561019, + "grad_norm": 1.2435551881790161, + "learning_rate": 9.062883643594642e-06, + "loss": 0.2845, + "step": 11096 + }, + { + "epoch": 0.22214548457323025, + "grad_norm": 0.9598131775856018, + "learning_rate": 9.062694683618137e-06, + "loss": 0.3069, + "step": 11097 + }, + { + "epoch": 0.22216550309035857, + "grad_norm": 1.0616568326950073, + "learning_rate": 9.062505706562938e-06, + "loss": 0.3169, + "step": 11098 + }, + { + "epoch": 0.22218552160748692, + "grad_norm": 1.1617026329040527, + "learning_rate": 9.062316712429839e-06, + "loss": 0.3046, + "step": 11099 + }, + { + "epoch": 0.22220554012461527, + "grad_norm": 1.0879430770874023, + "learning_rate": 9.062127701219637e-06, + "loss": 0.3419, + "step": 11100 + }, + { + "epoch": 0.22222555864174362, + "grad_norm": 1.2251503467559814, + "learning_rate": 9.061938672933122e-06, + "loss": 0.3688, + "step": 11101 + }, + { + "epoch": 0.22224557715887194, + "grad_norm": 1.3231656551361084, + "learning_rate": 9.06174962757109e-06, + "loss": 0.3264, + "step": 11102 + }, + { + "epoch": 0.2222655956760003, + "grad_norm": 1.871527075767517, + "learning_rate": 9.06156056513434e-06, + "loss": 0.8948, + "step": 11103 + }, + { + "epoch": 0.22228561419312864, + "grad_norm": 1.1268930435180664, + "learning_rate": 9.06137148562366e-06, + "loss": 0.2924, + "step": 11104 + }, + { + "epoch": 0.222305632710257, + "grad_norm": 0.9766378402709961, + "learning_rate": 9.06118238903985e-06, + "loss": 0.2952, + "step": 11105 + }, + { + "epoch": 0.22232565122738532, + "grad_norm": 1.0785956382751465, + "learning_rate": 9.060993275383702e-06, + "loss": 0.3356, + "step": 11106 + }, + { + "epoch": 0.22234566974451367, + "grad_norm": 1.0979955196380615, + "learning_rate": 9.060804144656015e-06, + "loss": 0.333, + "step": 11107 + }, + { + "epoch": 0.22236568826164202, + "grad_norm": 1.2033321857452393, + "learning_rate": 9.06061499685758e-06, + "loss": 0.3487, + "step": 11108 + }, + { + "epoch": 0.22238570677877037, + "grad_norm": 1.0799299478530884, + "learning_rate": 9.060425831989192e-06, + "loss": 0.3478, + "step": 11109 + }, + { + "epoch": 0.2224057252958987, + "grad_norm": 1.1002691984176636, + "learning_rate": 9.06023665005165e-06, + "loss": 0.3599, + "step": 11110 + }, + { + "epoch": 0.22242574381302704, + "grad_norm": 1.089638113975525, + "learning_rate": 9.060047451045744e-06, + "loss": 0.3564, + "step": 11111 + }, + { + "epoch": 0.2224457623301554, + "grad_norm": 1.097367763519287, + "learning_rate": 9.059858234972274e-06, + "loss": 0.3569, + "step": 11112 + }, + { + "epoch": 0.22246578084728375, + "grad_norm": 1.2034902572631836, + "learning_rate": 9.059669001832033e-06, + "loss": 0.3525, + "step": 11113 + }, + { + "epoch": 0.22248579936441207, + "grad_norm": 1.0703043937683105, + "learning_rate": 9.059479751625819e-06, + "loss": 0.3497, + "step": 11114 + }, + { + "epoch": 0.22250581788154042, + "grad_norm": 1.2950148582458496, + "learning_rate": 9.059290484354424e-06, + "loss": 0.3609, + "step": 11115 + }, + { + "epoch": 0.22252583639866877, + "grad_norm": 1.1227977275848389, + "learning_rate": 9.059101200018645e-06, + "loss": 0.3084, + "step": 11116 + }, + { + "epoch": 0.22254585491579712, + "grad_norm": 1.0332690477371216, + "learning_rate": 9.058911898619279e-06, + "loss": 0.3061, + "step": 11117 + }, + { + "epoch": 0.22256587343292544, + "grad_norm": 1.3033006191253662, + "learning_rate": 9.05872258015712e-06, + "loss": 0.3329, + "step": 11118 + }, + { + "epoch": 0.2225858919500538, + "grad_norm": 1.8428131341934204, + "learning_rate": 9.058533244632966e-06, + "loss": 0.7854, + "step": 11119 + }, + { + "epoch": 0.22260591046718214, + "grad_norm": 1.1384005546569824, + "learning_rate": 9.05834389204761e-06, + "loss": 0.3196, + "step": 11120 + }, + { + "epoch": 0.2226259289843105, + "grad_norm": 1.1970328092575073, + "learning_rate": 9.058154522401853e-06, + "loss": 0.3523, + "step": 11121 + }, + { + "epoch": 0.22264594750143882, + "grad_norm": 1.1152215003967285, + "learning_rate": 9.057965135696482e-06, + "loss": 0.3217, + "step": 11122 + }, + { + "epoch": 0.22266596601856717, + "grad_norm": 1.1682285070419312, + "learning_rate": 9.057775731932303e-06, + "loss": 0.3571, + "step": 11123 + }, + { + "epoch": 0.22268598453569552, + "grad_norm": 1.194123387336731, + "learning_rate": 9.057586311110109e-06, + "loss": 0.316, + "step": 11124 + }, + { + "epoch": 0.22270600305282387, + "grad_norm": 1.0914865732192993, + "learning_rate": 9.057396873230694e-06, + "loss": 0.3477, + "step": 11125 + }, + { + "epoch": 0.2227260215699522, + "grad_norm": 1.099207878112793, + "learning_rate": 9.057207418294854e-06, + "loss": 0.3508, + "step": 11126 + }, + { + "epoch": 0.22274604008708054, + "grad_norm": 1.4294557571411133, + "learning_rate": 9.057017946303387e-06, + "loss": 0.308, + "step": 11127 + }, + { + "epoch": 0.2227660586042089, + "grad_norm": 1.111910343170166, + "learning_rate": 9.056828457257092e-06, + "loss": 0.2964, + "step": 11128 + }, + { + "epoch": 0.22278607712133724, + "grad_norm": 1.0690853595733643, + "learning_rate": 9.056638951156762e-06, + "loss": 0.3594, + "step": 11129 + }, + { + "epoch": 0.22280609563846557, + "grad_norm": 1.8477513790130615, + "learning_rate": 9.056449428003196e-06, + "loss": 0.8484, + "step": 11130 + }, + { + "epoch": 0.22282611415559392, + "grad_norm": 1.2697104215621948, + "learning_rate": 9.056259887797187e-06, + "loss": 0.3687, + "step": 11131 + }, + { + "epoch": 0.22284613267272227, + "grad_norm": 1.222800850868225, + "learning_rate": 9.056070330539536e-06, + "loss": 0.312, + "step": 11132 + }, + { + "epoch": 0.22286615118985062, + "grad_norm": 1.3312478065490723, + "learning_rate": 9.055880756231038e-06, + "loss": 0.3911, + "step": 11133 + }, + { + "epoch": 0.22288616970697894, + "grad_norm": 1.855214238166809, + "learning_rate": 9.055691164872492e-06, + "loss": 0.8181, + "step": 11134 + }, + { + "epoch": 0.2229061882241073, + "grad_norm": 1.2105638980865479, + "learning_rate": 9.055501556464693e-06, + "loss": 0.3457, + "step": 11135 + }, + { + "epoch": 0.22292620674123564, + "grad_norm": 1.031540870666504, + "learning_rate": 9.055311931008435e-06, + "loss": 0.3394, + "step": 11136 + }, + { + "epoch": 0.222946225258364, + "grad_norm": 1.090873122215271, + "learning_rate": 9.05512228850452e-06, + "loss": 0.3198, + "step": 11137 + }, + { + "epoch": 0.22296624377549232, + "grad_norm": 1.052448034286499, + "learning_rate": 9.054932628953744e-06, + "loss": 0.3252, + "step": 11138 + }, + { + "epoch": 0.22298626229262067, + "grad_norm": 1.1117446422576904, + "learning_rate": 9.054742952356903e-06, + "loss": 0.3651, + "step": 11139 + }, + { + "epoch": 0.22300628080974902, + "grad_norm": 1.0377109050750732, + "learning_rate": 9.054553258714795e-06, + "loss": 0.2909, + "step": 11140 + }, + { + "epoch": 0.22302629932687737, + "grad_norm": 1.0566376447677612, + "learning_rate": 9.05436354802822e-06, + "loss": 0.3366, + "step": 11141 + }, + { + "epoch": 0.2230463178440057, + "grad_norm": 1.107884407043457, + "learning_rate": 9.054173820297972e-06, + "loss": 0.3437, + "step": 11142 + }, + { + "epoch": 0.22306633636113404, + "grad_norm": 1.120270013809204, + "learning_rate": 9.053984075524849e-06, + "loss": 0.3087, + "step": 11143 + }, + { + "epoch": 0.2230863548782624, + "grad_norm": 1.0237221717834473, + "learning_rate": 9.05379431370965e-06, + "loss": 0.3366, + "step": 11144 + }, + { + "epoch": 0.22310637339539074, + "grad_norm": 1.226957082748413, + "learning_rate": 9.053604534853171e-06, + "loss": 0.3474, + "step": 11145 + }, + { + "epoch": 0.22312639191251907, + "grad_norm": 1.0985623598098755, + "learning_rate": 9.05341473895621e-06, + "loss": 0.3012, + "step": 11146 + }, + { + "epoch": 0.22314641042964742, + "grad_norm": 1.1059749126434326, + "learning_rate": 9.053224926019568e-06, + "loss": 0.3287, + "step": 11147 + }, + { + "epoch": 0.22316642894677577, + "grad_norm": 1.1694879531860352, + "learning_rate": 9.053035096044041e-06, + "loss": 0.3677, + "step": 11148 + }, + { + "epoch": 0.22318644746390412, + "grad_norm": 1.114156723022461, + "learning_rate": 9.052845249030425e-06, + "loss": 0.3262, + "step": 11149 + }, + { + "epoch": 0.22320646598103244, + "grad_norm": 1.0913375616073608, + "learning_rate": 9.052655384979521e-06, + "loss": 0.3475, + "step": 11150 + }, + { + "epoch": 0.2232264844981608, + "grad_norm": 1.0901813507080078, + "learning_rate": 9.052465503892126e-06, + "loss": 0.3257, + "step": 11151 + }, + { + "epoch": 0.22324650301528914, + "grad_norm": 1.1593576669692993, + "learning_rate": 9.052275605769036e-06, + "loss": 0.3582, + "step": 11152 + }, + { + "epoch": 0.2232665215324175, + "grad_norm": 1.2195230722427368, + "learning_rate": 9.052085690611054e-06, + "loss": 0.3708, + "step": 11153 + }, + { + "epoch": 0.22328654004954582, + "grad_norm": 1.7967803478240967, + "learning_rate": 9.051895758418974e-06, + "loss": 0.8289, + "step": 11154 + }, + { + "epoch": 0.22330655856667417, + "grad_norm": 1.0331071615219116, + "learning_rate": 9.051705809193597e-06, + "loss": 0.3193, + "step": 11155 + }, + { + "epoch": 0.22332657708380252, + "grad_norm": 1.7760339975357056, + "learning_rate": 9.05151584293572e-06, + "loss": 0.8033, + "step": 11156 + }, + { + "epoch": 0.22334659560093087, + "grad_norm": 1.0680649280548096, + "learning_rate": 9.051325859646143e-06, + "loss": 0.3013, + "step": 11157 + }, + { + "epoch": 0.2233666141180592, + "grad_norm": 1.0806792974472046, + "learning_rate": 9.051135859325663e-06, + "loss": 0.3406, + "step": 11158 + }, + { + "epoch": 0.22338663263518754, + "grad_norm": 1.0981056690216064, + "learning_rate": 9.050945841975081e-06, + "loss": 0.3999, + "step": 11159 + }, + { + "epoch": 0.2234066511523159, + "grad_norm": 1.1133962869644165, + "learning_rate": 9.050755807595195e-06, + "loss": 0.322, + "step": 11160 + }, + { + "epoch": 0.22342666966944424, + "grad_norm": 1.1232248544692993, + "learning_rate": 9.050565756186802e-06, + "loss": 0.3498, + "step": 11161 + }, + { + "epoch": 0.22344668818657257, + "grad_norm": 1.199586272239685, + "learning_rate": 9.050375687750703e-06, + "loss": 0.3505, + "step": 11162 + }, + { + "epoch": 0.22346670670370092, + "grad_norm": 1.0873416662216187, + "learning_rate": 9.050185602287696e-06, + "loss": 0.343, + "step": 11163 + }, + { + "epoch": 0.22348672522082927, + "grad_norm": 1.0812196731567383, + "learning_rate": 9.04999549979858e-06, + "loss": 0.2806, + "step": 11164 + }, + { + "epoch": 0.22350674373795762, + "grad_norm": 1.9335286617279053, + "learning_rate": 9.049805380284154e-06, + "loss": 0.8827, + "step": 11165 + }, + { + "epoch": 0.22352676225508594, + "grad_norm": 1.0979456901550293, + "learning_rate": 9.049615243745218e-06, + "loss": 0.3357, + "step": 11166 + }, + { + "epoch": 0.2235467807722143, + "grad_norm": 1.0792865753173828, + "learning_rate": 9.049425090182571e-06, + "loss": 0.2572, + "step": 11167 + }, + { + "epoch": 0.22356679928934264, + "grad_norm": 1.130392074584961, + "learning_rate": 9.049234919597012e-06, + "loss": 0.3522, + "step": 11168 + }, + { + "epoch": 0.223586817806471, + "grad_norm": 1.2689648866653442, + "learning_rate": 9.049044731989342e-06, + "loss": 0.3291, + "step": 11169 + }, + { + "epoch": 0.22360683632359932, + "grad_norm": 1.217605710029602, + "learning_rate": 9.04885452736036e-06, + "loss": 0.3277, + "step": 11170 + }, + { + "epoch": 0.22362685484072767, + "grad_norm": 1.1770073175430298, + "learning_rate": 9.048664305710864e-06, + "loss": 0.3368, + "step": 11171 + }, + { + "epoch": 0.22364687335785602, + "grad_norm": 1.350934386253357, + "learning_rate": 9.048474067041652e-06, + "loss": 0.3573, + "step": 11172 + }, + { + "epoch": 0.22366689187498437, + "grad_norm": 1.0967262983322144, + "learning_rate": 9.04828381135353e-06, + "loss": 0.3176, + "step": 11173 + }, + { + "epoch": 0.2236869103921127, + "grad_norm": 1.0649124383926392, + "learning_rate": 9.048093538647292e-06, + "loss": 0.3363, + "step": 11174 + }, + { + "epoch": 0.22370692890924104, + "grad_norm": 1.064754843711853, + "learning_rate": 9.04790324892374e-06, + "loss": 0.3452, + "step": 11175 + }, + { + "epoch": 0.2237269474263694, + "grad_norm": 1.1794342994689941, + "learning_rate": 9.047712942183674e-06, + "loss": 0.3327, + "step": 11176 + }, + { + "epoch": 0.22374696594349774, + "grad_norm": 1.0944762229919434, + "learning_rate": 9.047522618427896e-06, + "loss": 0.2946, + "step": 11177 + }, + { + "epoch": 0.22376698446062607, + "grad_norm": 1.123242735862732, + "learning_rate": 9.047332277657202e-06, + "loss": 0.3134, + "step": 11178 + }, + { + "epoch": 0.22378700297775442, + "grad_norm": 1.1204957962036133, + "learning_rate": 9.047141919872393e-06, + "loss": 0.3577, + "step": 11179 + }, + { + "epoch": 0.22380702149488277, + "grad_norm": 0.9855847954750061, + "learning_rate": 9.046951545074271e-06, + "loss": 0.3204, + "step": 11180 + }, + { + "epoch": 0.22382704001201112, + "grad_norm": 1.08473539352417, + "learning_rate": 9.046761153263637e-06, + "loss": 0.3228, + "step": 11181 + }, + { + "epoch": 0.22384705852913944, + "grad_norm": 1.0731611251831055, + "learning_rate": 9.04657074444129e-06, + "loss": 0.3163, + "step": 11182 + }, + { + "epoch": 0.2238670770462678, + "grad_norm": 1.2161595821380615, + "learning_rate": 9.046380318608028e-06, + "loss": 0.3614, + "step": 11183 + }, + { + "epoch": 0.22388709556339614, + "grad_norm": 1.0448076725006104, + "learning_rate": 9.046189875764657e-06, + "loss": 0.3435, + "step": 11184 + }, + { + "epoch": 0.2239071140805245, + "grad_norm": 1.1454447507858276, + "learning_rate": 9.045999415911973e-06, + "loss": 0.3211, + "step": 11185 + }, + { + "epoch": 0.22392713259765282, + "grad_norm": 1.0679442882537842, + "learning_rate": 9.045808939050777e-06, + "loss": 0.3683, + "step": 11186 + }, + { + "epoch": 0.22394715111478117, + "grad_norm": 1.8644156455993652, + "learning_rate": 9.045618445181873e-06, + "loss": 0.8297, + "step": 11187 + }, + { + "epoch": 0.22396716963190952, + "grad_norm": 2.033912181854248, + "learning_rate": 9.045427934306059e-06, + "loss": 0.8428, + "step": 11188 + }, + { + "epoch": 0.22398718814903787, + "grad_norm": 1.834896445274353, + "learning_rate": 9.045237406424135e-06, + "loss": 0.856, + "step": 11189 + }, + { + "epoch": 0.2240072066661662, + "grad_norm": 1.9229224920272827, + "learning_rate": 9.045046861536906e-06, + "loss": 0.8001, + "step": 11190 + }, + { + "epoch": 0.22402722518329454, + "grad_norm": 1.1575303077697754, + "learning_rate": 9.044856299645168e-06, + "loss": 0.3345, + "step": 11191 + }, + { + "epoch": 0.2240472437004229, + "grad_norm": 1.0871139764785767, + "learning_rate": 9.044665720749728e-06, + "loss": 0.3055, + "step": 11192 + }, + { + "epoch": 0.22406726221755124, + "grad_norm": 1.3067206144332886, + "learning_rate": 9.04447512485138e-06, + "loss": 0.2857, + "step": 11193 + }, + { + "epoch": 0.22408728073467957, + "grad_norm": 1.0475177764892578, + "learning_rate": 9.044284511950932e-06, + "loss": 0.2956, + "step": 11194 + }, + { + "epoch": 0.22410729925180792, + "grad_norm": 1.2637358903884888, + "learning_rate": 9.04409388204918e-06, + "loss": 0.3846, + "step": 11195 + }, + { + "epoch": 0.22412731776893627, + "grad_norm": 1.1482317447662354, + "learning_rate": 9.043903235146929e-06, + "loss": 0.3139, + "step": 11196 + }, + { + "epoch": 0.22414733628606462, + "grad_norm": 1.0850677490234375, + "learning_rate": 9.043712571244978e-06, + "loss": 0.2857, + "step": 11197 + }, + { + "epoch": 0.22416735480319294, + "grad_norm": 1.4114108085632324, + "learning_rate": 9.04352189034413e-06, + "loss": 0.3624, + "step": 11198 + }, + { + "epoch": 0.2241873733203213, + "grad_norm": 1.0480616092681885, + "learning_rate": 9.043331192445186e-06, + "loss": 0.2715, + "step": 11199 + }, + { + "epoch": 0.22420739183744964, + "grad_norm": 1.1373844146728516, + "learning_rate": 9.043140477548949e-06, + "loss": 0.3356, + "step": 11200 + }, + { + "epoch": 0.224227410354578, + "grad_norm": 1.2131848335266113, + "learning_rate": 9.042949745656217e-06, + "loss": 0.3121, + "step": 11201 + }, + { + "epoch": 0.22424742887170632, + "grad_norm": 1.123949646949768, + "learning_rate": 9.042758996767797e-06, + "loss": 0.3009, + "step": 11202 + }, + { + "epoch": 0.22426744738883467, + "grad_norm": 1.1887595653533936, + "learning_rate": 9.042568230884486e-06, + "loss": 0.314, + "step": 11203 + }, + { + "epoch": 0.22428746590596302, + "grad_norm": 1.9633554220199585, + "learning_rate": 9.042377448007088e-06, + "loss": 0.8417, + "step": 11204 + }, + { + "epoch": 0.22430748442309137, + "grad_norm": 0.9877387881278992, + "learning_rate": 9.042186648136409e-06, + "loss": 0.3113, + "step": 11205 + }, + { + "epoch": 0.2243275029402197, + "grad_norm": 1.9187296628952026, + "learning_rate": 9.041995831273244e-06, + "loss": 0.7614, + "step": 11206 + }, + { + "epoch": 0.22434752145734804, + "grad_norm": 1.1976207494735718, + "learning_rate": 9.041804997418398e-06, + "loss": 0.3428, + "step": 11207 + }, + { + "epoch": 0.2243675399744764, + "grad_norm": 1.2946295738220215, + "learning_rate": 9.041614146572675e-06, + "loss": 0.3117, + "step": 11208 + }, + { + "epoch": 0.22438755849160474, + "grad_norm": 1.1438987255096436, + "learning_rate": 9.041423278736875e-06, + "loss": 0.3556, + "step": 11209 + }, + { + "epoch": 0.22440757700873307, + "grad_norm": 1.1395390033721924, + "learning_rate": 9.041232393911801e-06, + "loss": 0.3378, + "step": 11210 + }, + { + "epoch": 0.22442759552586142, + "grad_norm": 1.15071702003479, + "learning_rate": 9.041041492098257e-06, + "loss": 0.3624, + "step": 11211 + }, + { + "epoch": 0.22444761404298977, + "grad_norm": 1.9481128454208374, + "learning_rate": 9.040850573297044e-06, + "loss": 0.8736, + "step": 11212 + }, + { + "epoch": 0.22446763256011812, + "grad_norm": 1.2083582878112793, + "learning_rate": 9.040659637508964e-06, + "loss": 0.3039, + "step": 11213 + }, + { + "epoch": 0.22448765107724644, + "grad_norm": 1.0386502742767334, + "learning_rate": 9.040468684734821e-06, + "loss": 0.2613, + "step": 11214 + }, + { + "epoch": 0.2245076695943748, + "grad_norm": 1.1624504327774048, + "learning_rate": 9.040277714975416e-06, + "loss": 0.3225, + "step": 11215 + }, + { + "epoch": 0.22452768811150314, + "grad_norm": 1.8897124528884888, + "learning_rate": 9.040086728231555e-06, + "loss": 0.9034, + "step": 11216 + }, + { + "epoch": 0.2245477066286315, + "grad_norm": 1.2067993879318237, + "learning_rate": 9.039895724504037e-06, + "loss": 0.3267, + "step": 11217 + }, + { + "epoch": 0.22456772514575982, + "grad_norm": 2.0121655464172363, + "learning_rate": 9.039704703793669e-06, + "loss": 0.8442, + "step": 11218 + }, + { + "epoch": 0.22458774366288817, + "grad_norm": 1.1955522298812866, + "learning_rate": 9.039513666101249e-06, + "loss": 0.3544, + "step": 11219 + }, + { + "epoch": 0.22460776218001652, + "grad_norm": 1.1489983797073364, + "learning_rate": 9.039322611427584e-06, + "loss": 0.3147, + "step": 11220 + }, + { + "epoch": 0.22462778069714487, + "grad_norm": 1.1168721914291382, + "learning_rate": 9.039131539773476e-06, + "loss": 0.313, + "step": 11221 + }, + { + "epoch": 0.2246477992142732, + "grad_norm": 1.0968941450119019, + "learning_rate": 9.038940451139728e-06, + "loss": 0.333, + "step": 11222 + }, + { + "epoch": 0.22466781773140154, + "grad_norm": 1.1043877601623535, + "learning_rate": 9.038749345527144e-06, + "loss": 0.2806, + "step": 11223 + }, + { + "epoch": 0.2246878362485299, + "grad_norm": 1.1794092655181885, + "learning_rate": 9.03855822293653e-06, + "loss": 0.3303, + "step": 11224 + }, + { + "epoch": 0.22470785476565824, + "grad_norm": 1.0562763214111328, + "learning_rate": 9.038367083368682e-06, + "loss": 0.2769, + "step": 11225 + }, + { + "epoch": 0.22472787328278657, + "grad_norm": 1.1376696825027466, + "learning_rate": 9.038175926824409e-06, + "loss": 0.2984, + "step": 11226 + }, + { + "epoch": 0.22474789179991492, + "grad_norm": 1.1643097400665283, + "learning_rate": 9.037984753304513e-06, + "loss": 0.3522, + "step": 11227 + }, + { + "epoch": 0.22476791031704327, + "grad_norm": 0.9588891863822937, + "learning_rate": 9.037793562809798e-06, + "loss": 0.2611, + "step": 11228 + }, + { + "epoch": 0.22478792883417162, + "grad_norm": 1.1533914804458618, + "learning_rate": 9.037602355341069e-06, + "loss": 0.2872, + "step": 11229 + }, + { + "epoch": 0.22480794735129994, + "grad_norm": 1.1149966716766357, + "learning_rate": 9.037411130899128e-06, + "loss": 0.3473, + "step": 11230 + }, + { + "epoch": 0.2248279658684283, + "grad_norm": 0.9412341713905334, + "learning_rate": 9.037219889484779e-06, + "loss": 0.2942, + "step": 11231 + }, + { + "epoch": 0.22484798438555664, + "grad_norm": 1.2852129936218262, + "learning_rate": 9.037028631098826e-06, + "loss": 0.3213, + "step": 11232 + }, + { + "epoch": 0.224868002902685, + "grad_norm": 1.0553572177886963, + "learning_rate": 9.036837355742075e-06, + "loss": 0.3293, + "step": 11233 + }, + { + "epoch": 0.22488802141981332, + "grad_norm": 1.069210410118103, + "learning_rate": 9.036646063415329e-06, + "loss": 0.3399, + "step": 11234 + }, + { + "epoch": 0.22490803993694167, + "grad_norm": 1.146375298500061, + "learning_rate": 9.036454754119389e-06, + "loss": 0.361, + "step": 11235 + }, + { + "epoch": 0.22492805845407002, + "grad_norm": 1.0392862558364868, + "learning_rate": 9.036263427855063e-06, + "loss": 0.2661, + "step": 11236 + }, + { + "epoch": 0.22494807697119837, + "grad_norm": 1.059125542640686, + "learning_rate": 9.036072084623155e-06, + "loss": 0.2853, + "step": 11237 + }, + { + "epoch": 0.2249680954883267, + "grad_norm": 1.2141201496124268, + "learning_rate": 9.035880724424469e-06, + "loss": 0.3365, + "step": 11238 + }, + { + "epoch": 0.22498811400545504, + "grad_norm": 1.065651535987854, + "learning_rate": 9.035689347259805e-06, + "loss": 0.3382, + "step": 11239 + }, + { + "epoch": 0.2250081325225834, + "grad_norm": 1.1966801881790161, + "learning_rate": 9.035497953129975e-06, + "loss": 0.3099, + "step": 11240 + }, + { + "epoch": 0.22502815103971174, + "grad_norm": 1.0810145139694214, + "learning_rate": 9.03530654203578e-06, + "loss": 0.3452, + "step": 11241 + }, + { + "epoch": 0.22504816955684007, + "grad_norm": 1.2513002157211304, + "learning_rate": 9.035115113978024e-06, + "loss": 0.3496, + "step": 11242 + }, + { + "epoch": 0.22506818807396842, + "grad_norm": 1.1999911069869995, + "learning_rate": 9.034923668957514e-06, + "loss": 0.3232, + "step": 11243 + }, + { + "epoch": 0.22508820659109677, + "grad_norm": 1.8560709953308105, + "learning_rate": 9.03473220697505e-06, + "loss": 0.8798, + "step": 11244 + }, + { + "epoch": 0.22510822510822512, + "grad_norm": 1.8718641996383667, + "learning_rate": 9.034540728031443e-06, + "loss": 0.8338, + "step": 11245 + }, + { + "epoch": 0.22512824362535344, + "grad_norm": 1.1529103517532349, + "learning_rate": 9.034349232127494e-06, + "loss": 0.305, + "step": 11246 + }, + { + "epoch": 0.2251482621424818, + "grad_norm": 1.1251609325408936, + "learning_rate": 9.034157719264009e-06, + "loss": 0.2997, + "step": 11247 + }, + { + "epoch": 0.22516828065961014, + "grad_norm": 1.249900460243225, + "learning_rate": 9.033966189441792e-06, + "loss": 0.3643, + "step": 11248 + }, + { + "epoch": 0.2251882991767385, + "grad_norm": 1.084465742111206, + "learning_rate": 9.03377464266165e-06, + "loss": 0.3519, + "step": 11249 + }, + { + "epoch": 0.22520831769386682, + "grad_norm": 1.130433201789856, + "learning_rate": 9.033583078924388e-06, + "loss": 0.3419, + "step": 11250 + }, + { + "epoch": 0.22522833621099517, + "grad_norm": 1.0922645330429077, + "learning_rate": 9.03339149823081e-06, + "loss": 0.3413, + "step": 11251 + }, + { + "epoch": 0.22524835472812352, + "grad_norm": 1.003255844116211, + "learning_rate": 9.033199900581724e-06, + "loss": 0.3302, + "step": 11252 + }, + { + "epoch": 0.22526837324525187, + "grad_norm": 1.0505280494689941, + "learning_rate": 9.033008285977932e-06, + "loss": 0.2877, + "step": 11253 + }, + { + "epoch": 0.2252883917623802, + "grad_norm": 1.0864838361740112, + "learning_rate": 9.032816654420242e-06, + "loss": 0.3348, + "step": 11254 + }, + { + "epoch": 0.22530841027950854, + "grad_norm": 1.1194521188735962, + "learning_rate": 9.032625005909456e-06, + "loss": 0.3243, + "step": 11255 + }, + { + "epoch": 0.2253284287966369, + "grad_norm": 1.0989997386932373, + "learning_rate": 9.032433340446384e-06, + "loss": 0.3704, + "step": 11256 + }, + { + "epoch": 0.22534844731376524, + "grad_norm": 1.203802227973938, + "learning_rate": 9.032241658031831e-06, + "loss": 0.3219, + "step": 11257 + }, + { + "epoch": 0.22536846583089357, + "grad_norm": 1.1681710481643677, + "learning_rate": 9.032049958666602e-06, + "loss": 0.2985, + "step": 11258 + }, + { + "epoch": 0.22538848434802192, + "grad_norm": 1.206816554069519, + "learning_rate": 9.031858242351502e-06, + "loss": 0.3474, + "step": 11259 + }, + { + "epoch": 0.22540850286515027, + "grad_norm": 1.1047799587249756, + "learning_rate": 9.031666509087337e-06, + "loss": 0.3281, + "step": 11260 + }, + { + "epoch": 0.22542852138227862, + "grad_norm": 1.2493717670440674, + "learning_rate": 9.031474758874914e-06, + "loss": 0.3207, + "step": 11261 + }, + { + "epoch": 0.22544853989940694, + "grad_norm": 1.1564494371414185, + "learning_rate": 9.03128299171504e-06, + "loss": 0.3176, + "step": 11262 + }, + { + "epoch": 0.2254685584165353, + "grad_norm": 1.0546237230300903, + "learning_rate": 9.031091207608518e-06, + "loss": 0.287, + "step": 11263 + }, + { + "epoch": 0.22548857693366364, + "grad_norm": 1.0088567733764648, + "learning_rate": 9.030899406556156e-06, + "loss": 0.3304, + "step": 11264 + }, + { + "epoch": 0.225508595450792, + "grad_norm": 1.1332827806472778, + "learning_rate": 9.030707588558762e-06, + "loss": 0.2742, + "step": 11265 + }, + { + "epoch": 0.22552861396792032, + "grad_norm": 0.9951285719871521, + "learning_rate": 9.03051575361714e-06, + "loss": 0.2949, + "step": 11266 + }, + { + "epoch": 0.22554863248504867, + "grad_norm": 1.0756343603134155, + "learning_rate": 9.030323901732097e-06, + "loss": 0.3069, + "step": 11267 + }, + { + "epoch": 0.22556865100217702, + "grad_norm": 1.2387809753417969, + "learning_rate": 9.03013203290444e-06, + "loss": 0.3178, + "step": 11268 + }, + { + "epoch": 0.22558866951930537, + "grad_norm": 1.0809509754180908, + "learning_rate": 9.029940147134972e-06, + "loss": 0.2912, + "step": 11269 + }, + { + "epoch": 0.2256086880364337, + "grad_norm": 1.084803581237793, + "learning_rate": 9.029748244424507e-06, + "loss": 0.3171, + "step": 11270 + }, + { + "epoch": 0.22562870655356204, + "grad_norm": 1.1160683631896973, + "learning_rate": 9.029556324773845e-06, + "loss": 0.3409, + "step": 11271 + }, + { + "epoch": 0.2256487250706904, + "grad_norm": 1.178199052810669, + "learning_rate": 9.029364388183797e-06, + "loss": 0.3715, + "step": 11272 + }, + { + "epoch": 0.22566874358781874, + "grad_norm": 1.1361770629882812, + "learning_rate": 9.029172434655169e-06, + "loss": 0.3462, + "step": 11273 + }, + { + "epoch": 0.22568876210494707, + "grad_norm": 1.172127604484558, + "learning_rate": 9.028980464188766e-06, + "loss": 0.3572, + "step": 11274 + }, + { + "epoch": 0.22570878062207542, + "grad_norm": 1.8897247314453125, + "learning_rate": 9.028788476785394e-06, + "loss": 0.8431, + "step": 11275 + }, + { + "epoch": 0.22572879913920377, + "grad_norm": 1.1122596263885498, + "learning_rate": 9.028596472445865e-06, + "loss": 0.3363, + "step": 11276 + }, + { + "epoch": 0.22574881765633212, + "grad_norm": 1.1036876440048218, + "learning_rate": 9.028404451170984e-06, + "loss": 0.2945, + "step": 11277 + }, + { + "epoch": 0.22576883617346044, + "grad_norm": 1.0846070051193237, + "learning_rate": 9.028212412961556e-06, + "loss": 0.2628, + "step": 11278 + }, + { + "epoch": 0.2257888546905888, + "grad_norm": 1.2042226791381836, + "learning_rate": 9.02802035781839e-06, + "loss": 0.321, + "step": 11279 + }, + { + "epoch": 0.22580887320771714, + "grad_norm": 1.0526809692382812, + "learning_rate": 9.027828285742294e-06, + "loss": 0.2959, + "step": 11280 + }, + { + "epoch": 0.2258288917248455, + "grad_norm": 1.2321901321411133, + "learning_rate": 9.027636196734075e-06, + "loss": 0.3391, + "step": 11281 + }, + { + "epoch": 0.22584891024197382, + "grad_norm": 1.0761419534683228, + "learning_rate": 9.027444090794539e-06, + "loss": 0.3141, + "step": 11282 + }, + { + "epoch": 0.22586892875910217, + "grad_norm": 0.987086296081543, + "learning_rate": 9.027251967924495e-06, + "loss": 0.3324, + "step": 11283 + }, + { + "epoch": 0.22588894727623052, + "grad_norm": 1.1161671876907349, + "learning_rate": 9.027059828124751e-06, + "loss": 0.2746, + "step": 11284 + }, + { + "epoch": 0.22590896579335887, + "grad_norm": 1.2098307609558105, + "learning_rate": 9.026867671396113e-06, + "loss": 0.3325, + "step": 11285 + }, + { + "epoch": 0.2259289843104872, + "grad_norm": 1.0703681707382202, + "learning_rate": 9.026675497739392e-06, + "loss": 0.31, + "step": 11286 + }, + { + "epoch": 0.22594900282761554, + "grad_norm": 1.0966315269470215, + "learning_rate": 9.026483307155391e-06, + "loss": 0.3064, + "step": 11287 + }, + { + "epoch": 0.2259690213447439, + "grad_norm": 1.071413278579712, + "learning_rate": 9.026291099644923e-06, + "loss": 0.3097, + "step": 11288 + }, + { + "epoch": 0.22598903986187224, + "grad_norm": 1.1042619943618774, + "learning_rate": 9.026098875208793e-06, + "loss": 0.2896, + "step": 11289 + }, + { + "epoch": 0.22600905837900057, + "grad_norm": 1.2345596551895142, + "learning_rate": 9.02590663384781e-06, + "loss": 0.356, + "step": 11290 + }, + { + "epoch": 0.22602907689612892, + "grad_norm": 1.0973485708236694, + "learning_rate": 9.02571437556278e-06, + "loss": 0.3238, + "step": 11291 + }, + { + "epoch": 0.22604909541325727, + "grad_norm": 1.1000239849090576, + "learning_rate": 9.025522100354514e-06, + "loss": 0.3277, + "step": 11292 + }, + { + "epoch": 0.22606911393038562, + "grad_norm": 1.3286256790161133, + "learning_rate": 9.025329808223821e-06, + "loss": 0.3149, + "step": 11293 + }, + { + "epoch": 0.22608913244751394, + "grad_norm": 1.1177242994308472, + "learning_rate": 9.025137499171506e-06, + "loss": 0.3382, + "step": 11294 + }, + { + "epoch": 0.2261091509646423, + "grad_norm": 1.0420153141021729, + "learning_rate": 9.02494517319838e-06, + "loss": 0.3112, + "step": 11295 + }, + { + "epoch": 0.22612916948177064, + "grad_norm": 1.021514654159546, + "learning_rate": 9.02475283030525e-06, + "loss": 0.2795, + "step": 11296 + }, + { + "epoch": 0.226149187998899, + "grad_norm": 1.2553017139434814, + "learning_rate": 9.024560470492927e-06, + "loss": 0.363, + "step": 11297 + }, + { + "epoch": 0.22616920651602732, + "grad_norm": 1.1426178216934204, + "learning_rate": 9.024368093762216e-06, + "loss": 0.2781, + "step": 11298 + }, + { + "epoch": 0.22618922503315567, + "grad_norm": 1.175115704536438, + "learning_rate": 9.024175700113928e-06, + "loss": 0.3461, + "step": 11299 + }, + { + "epoch": 0.22620924355028402, + "grad_norm": 1.4269534349441528, + "learning_rate": 9.023983289548872e-06, + "loss": 0.2847, + "step": 11300 + }, + { + "epoch": 0.22622926206741237, + "grad_norm": 1.9004682302474976, + "learning_rate": 9.023790862067856e-06, + "loss": 0.8362, + "step": 11301 + }, + { + "epoch": 0.2262492805845407, + "grad_norm": 1.322303056716919, + "learning_rate": 9.023598417671688e-06, + "loss": 0.3586, + "step": 11302 + }, + { + "epoch": 0.22626929910166904, + "grad_norm": 1.041205644607544, + "learning_rate": 9.02340595636118e-06, + "loss": 0.3024, + "step": 11303 + }, + { + "epoch": 0.2262893176187974, + "grad_norm": 1.0901583433151245, + "learning_rate": 9.023213478137138e-06, + "loss": 0.3172, + "step": 11304 + }, + { + "epoch": 0.22630933613592574, + "grad_norm": 1.1046545505523682, + "learning_rate": 9.023020983000373e-06, + "loss": 0.3168, + "step": 11305 + }, + { + "epoch": 0.22632935465305407, + "grad_norm": 1.0373759269714355, + "learning_rate": 9.022828470951692e-06, + "loss": 0.33, + "step": 11306 + }, + { + "epoch": 0.22634937317018242, + "grad_norm": 1.0080807209014893, + "learning_rate": 9.022635941991907e-06, + "loss": 0.3786, + "step": 11307 + }, + { + "epoch": 0.22636939168731077, + "grad_norm": 0.9654861688613892, + "learning_rate": 9.022443396121827e-06, + "loss": 0.2836, + "step": 11308 + }, + { + "epoch": 0.22638941020443912, + "grad_norm": 1.0128744840621948, + "learning_rate": 9.02225083334226e-06, + "loss": 0.2978, + "step": 11309 + }, + { + "epoch": 0.22640942872156744, + "grad_norm": 1.6908714771270752, + "learning_rate": 9.022058253654016e-06, + "loss": 0.8185, + "step": 11310 + }, + { + "epoch": 0.2264294472386958, + "grad_norm": 1.346588134765625, + "learning_rate": 9.021865657057904e-06, + "loss": 0.398, + "step": 11311 + }, + { + "epoch": 0.22644946575582414, + "grad_norm": 1.111560344696045, + "learning_rate": 9.021673043554736e-06, + "loss": 0.3692, + "step": 11312 + }, + { + "epoch": 0.2264694842729525, + "grad_norm": 1.1408717632293701, + "learning_rate": 9.021480413145318e-06, + "loss": 0.36, + "step": 11313 + }, + { + "epoch": 0.22648950279008082, + "grad_norm": 1.1931304931640625, + "learning_rate": 9.021287765830462e-06, + "loss": 0.3081, + "step": 11314 + }, + { + "epoch": 0.22650952130720917, + "grad_norm": 2.08009672164917, + "learning_rate": 9.021095101610977e-06, + "loss": 0.8305, + "step": 11315 + }, + { + "epoch": 0.22652953982433752, + "grad_norm": 1.7746888399124146, + "learning_rate": 9.020902420487676e-06, + "loss": 0.8543, + "step": 11316 + }, + { + "epoch": 0.22654955834146587, + "grad_norm": 1.0985041856765747, + "learning_rate": 9.020709722461365e-06, + "loss": 0.3033, + "step": 11317 + }, + { + "epoch": 0.2265695768585942, + "grad_norm": 1.033627986907959, + "learning_rate": 9.020517007532856e-06, + "loss": 0.2714, + "step": 11318 + }, + { + "epoch": 0.22658959537572254, + "grad_norm": 1.1375190019607544, + "learning_rate": 9.02032427570296e-06, + "loss": 0.3117, + "step": 11319 + }, + { + "epoch": 0.2266096138928509, + "grad_norm": 1.0599298477172852, + "learning_rate": 9.020131526972485e-06, + "loss": 0.3252, + "step": 11320 + }, + { + "epoch": 0.22662963240997924, + "grad_norm": 1.0844513177871704, + "learning_rate": 9.019938761342241e-06, + "loss": 0.3125, + "step": 11321 + }, + { + "epoch": 0.22664965092710757, + "grad_norm": 1.0693104267120361, + "learning_rate": 9.01974597881304e-06, + "loss": 0.3226, + "step": 11322 + }, + { + "epoch": 0.22666966944423592, + "grad_norm": 1.9261951446533203, + "learning_rate": 9.019553179385692e-06, + "loss": 0.8341, + "step": 11323 + }, + { + "epoch": 0.22668968796136427, + "grad_norm": 2.050907611846924, + "learning_rate": 9.019360363061007e-06, + "loss": 0.8224, + "step": 11324 + }, + { + "epoch": 0.22670970647849262, + "grad_norm": 1.229322075843811, + "learning_rate": 9.019167529839798e-06, + "loss": 0.3467, + "step": 11325 + }, + { + "epoch": 0.22672972499562094, + "grad_norm": 1.166595458984375, + "learning_rate": 9.018974679722871e-06, + "loss": 0.3363, + "step": 11326 + }, + { + "epoch": 0.2267497435127493, + "grad_norm": 1.2329416275024414, + "learning_rate": 9.018781812711042e-06, + "loss": 0.3286, + "step": 11327 + }, + { + "epoch": 0.22676976202987764, + "grad_norm": 1.0160740613937378, + "learning_rate": 9.018588928805116e-06, + "loss": 0.3251, + "step": 11328 + }, + { + "epoch": 0.226789780547006, + "grad_norm": 1.0667719841003418, + "learning_rate": 9.01839602800591e-06, + "loss": 0.308, + "step": 11329 + }, + { + "epoch": 0.22680979906413432, + "grad_norm": 1.153166651725769, + "learning_rate": 9.018203110314231e-06, + "loss": 0.2977, + "step": 11330 + }, + { + "epoch": 0.22682981758126267, + "grad_norm": 1.0863734483718872, + "learning_rate": 9.01801017573089e-06, + "loss": 0.3489, + "step": 11331 + }, + { + "epoch": 0.22684983609839102, + "grad_norm": 1.0985065698623657, + "learning_rate": 9.0178172242567e-06, + "loss": 0.301, + "step": 11332 + }, + { + "epoch": 0.22686985461551937, + "grad_norm": 1.1259907484054565, + "learning_rate": 9.01762425589247e-06, + "loss": 0.2963, + "step": 11333 + }, + { + "epoch": 0.2268898731326477, + "grad_norm": 1.1254314184188843, + "learning_rate": 9.017431270639011e-06, + "loss": 0.3301, + "step": 11334 + }, + { + "epoch": 0.22690989164977604, + "grad_norm": 1.9719711542129517, + "learning_rate": 9.01723826849714e-06, + "loss": 0.299, + "step": 11335 + }, + { + "epoch": 0.2269299101669044, + "grad_norm": 1.0301926136016846, + "learning_rate": 9.017045249467659e-06, + "loss": 0.3363, + "step": 11336 + }, + { + "epoch": 0.22694992868403274, + "grad_norm": 1.047505497932434, + "learning_rate": 9.016852213551387e-06, + "loss": 0.3206, + "step": 11337 + }, + { + "epoch": 0.22696994720116107, + "grad_norm": 1.0420688390731812, + "learning_rate": 9.016659160749133e-06, + "loss": 0.2698, + "step": 11338 + }, + { + "epoch": 0.22698996571828942, + "grad_norm": 1.169847846031189, + "learning_rate": 9.016466091061706e-06, + "loss": 0.3343, + "step": 11339 + }, + { + "epoch": 0.22700998423541777, + "grad_norm": 1.2557587623596191, + "learning_rate": 9.016273004489922e-06, + "loss": 0.3619, + "step": 11340 + }, + { + "epoch": 0.22703000275254612, + "grad_norm": 1.1559299230575562, + "learning_rate": 9.016079901034588e-06, + "loss": 0.3383, + "step": 11341 + }, + { + "epoch": 0.22705002126967444, + "grad_norm": 1.0767357349395752, + "learning_rate": 9.015886780696521e-06, + "loss": 0.3165, + "step": 11342 + }, + { + "epoch": 0.2270700397868028, + "grad_norm": 1.0324456691741943, + "learning_rate": 9.01569364347653e-06, + "loss": 0.261, + "step": 11343 + }, + { + "epoch": 0.22709005830393114, + "grad_norm": 1.0024865865707397, + "learning_rate": 9.015500489375427e-06, + "loss": 0.317, + "step": 11344 + }, + { + "epoch": 0.2271100768210595, + "grad_norm": 1.103222131729126, + "learning_rate": 9.015307318394022e-06, + "loss": 0.301, + "step": 11345 + }, + { + "epoch": 0.22713009533818782, + "grad_norm": 1.974299430847168, + "learning_rate": 9.015114130533132e-06, + "loss": 0.8983, + "step": 11346 + }, + { + "epoch": 0.22715011385531617, + "grad_norm": 1.1752758026123047, + "learning_rate": 9.014920925793564e-06, + "loss": 0.3309, + "step": 11347 + }, + { + "epoch": 0.22717013237244452, + "grad_norm": 1.1720021963119507, + "learning_rate": 9.014727704176134e-06, + "loss": 0.3615, + "step": 11348 + }, + { + "epoch": 0.22719015088957287, + "grad_norm": 1.328330397605896, + "learning_rate": 9.014534465681653e-06, + "loss": 0.3016, + "step": 11349 + }, + { + "epoch": 0.2272101694067012, + "grad_norm": 1.22648024559021, + "learning_rate": 9.01434121031093e-06, + "loss": 0.3019, + "step": 11350 + }, + { + "epoch": 0.22723018792382954, + "grad_norm": 1.1653180122375488, + "learning_rate": 9.014147938064783e-06, + "loss": 0.3473, + "step": 11351 + }, + { + "epoch": 0.2272502064409579, + "grad_norm": 1.2331185340881348, + "learning_rate": 9.013954648944023e-06, + "loss": 0.336, + "step": 11352 + }, + { + "epoch": 0.22727022495808624, + "grad_norm": 1.0668755769729614, + "learning_rate": 9.01376134294946e-06, + "loss": 0.3259, + "step": 11353 + }, + { + "epoch": 0.22729024347521457, + "grad_norm": 1.070904016494751, + "learning_rate": 9.013568020081906e-06, + "loss": 0.3013, + "step": 11354 + }, + { + "epoch": 0.22731026199234292, + "grad_norm": 0.9926783442497253, + "learning_rate": 9.013374680342179e-06, + "loss": 0.3328, + "step": 11355 + }, + { + "epoch": 0.22733028050947127, + "grad_norm": 1.1431318521499634, + "learning_rate": 9.013181323731086e-06, + "loss": 0.3727, + "step": 11356 + }, + { + "epoch": 0.22735029902659962, + "grad_norm": 1.0349884033203125, + "learning_rate": 9.012987950249446e-06, + "loss": 0.3216, + "step": 11357 + }, + { + "epoch": 0.22737031754372794, + "grad_norm": 1.1212482452392578, + "learning_rate": 9.012794559898065e-06, + "loss": 0.3505, + "step": 11358 + }, + { + "epoch": 0.2273903360608563, + "grad_norm": 1.0699657201766968, + "learning_rate": 9.01260115267776e-06, + "loss": 0.3082, + "step": 11359 + }, + { + "epoch": 0.22741035457798464, + "grad_norm": 1.0605727434158325, + "learning_rate": 9.012407728589343e-06, + "loss": 0.3251, + "step": 11360 + }, + { + "epoch": 0.227430373095113, + "grad_norm": 1.1192899942398071, + "learning_rate": 9.012214287633629e-06, + "loss": 0.3463, + "step": 11361 + }, + { + "epoch": 0.22745039161224131, + "grad_norm": 1.208548903465271, + "learning_rate": 9.012020829811428e-06, + "loss": 0.2958, + "step": 11362 + }, + { + "epoch": 0.22747041012936967, + "grad_norm": 1.0632262229919434, + "learning_rate": 9.011827355123555e-06, + "loss": 0.2956, + "step": 11363 + }, + { + "epoch": 0.22749042864649802, + "grad_norm": 1.112878441810608, + "learning_rate": 9.011633863570824e-06, + "loss": 0.3264, + "step": 11364 + }, + { + "epoch": 0.22751044716362637, + "grad_norm": 1.2824211120605469, + "learning_rate": 9.011440355154046e-06, + "loss": 0.3303, + "step": 11365 + }, + { + "epoch": 0.2275304656807547, + "grad_norm": 0.9962632060050964, + "learning_rate": 9.011246829874037e-06, + "loss": 0.3042, + "step": 11366 + }, + { + "epoch": 0.22755048419788304, + "grad_norm": 1.8058375120162964, + "learning_rate": 9.01105328773161e-06, + "loss": 0.3848, + "step": 11367 + }, + { + "epoch": 0.2275705027150114, + "grad_norm": 1.0125893354415894, + "learning_rate": 9.010859728727578e-06, + "loss": 0.3012, + "step": 11368 + }, + { + "epoch": 0.22759052123213974, + "grad_norm": 1.1741271018981934, + "learning_rate": 9.010666152862756e-06, + "loss": 0.3358, + "step": 11369 + }, + { + "epoch": 0.22761053974926806, + "grad_norm": 1.0971494913101196, + "learning_rate": 9.010472560137953e-06, + "loss": 0.311, + "step": 11370 + }, + { + "epoch": 0.22763055826639642, + "grad_norm": 1.251253366470337, + "learning_rate": 9.01027895055399e-06, + "loss": 0.3377, + "step": 11371 + }, + { + "epoch": 0.22765057678352477, + "grad_norm": 1.1072778701782227, + "learning_rate": 9.010085324111675e-06, + "loss": 0.3821, + "step": 11372 + }, + { + "epoch": 0.22767059530065312, + "grad_norm": 1.1506773233413696, + "learning_rate": 9.009891680811825e-06, + "loss": 0.3652, + "step": 11373 + }, + { + "epoch": 0.22769061381778144, + "grad_norm": 1.1366188526153564, + "learning_rate": 9.009698020655255e-06, + "loss": 0.3042, + "step": 11374 + }, + { + "epoch": 0.2277106323349098, + "grad_norm": 1.0466035604476929, + "learning_rate": 9.009504343642774e-06, + "loss": 0.292, + "step": 11375 + }, + { + "epoch": 0.22773065085203814, + "grad_norm": 1.1198354959487915, + "learning_rate": 9.009310649775202e-06, + "loss": 0.3398, + "step": 11376 + }, + { + "epoch": 0.2277506693691665, + "grad_norm": 1.5715272426605225, + "learning_rate": 9.00911693905335e-06, + "loss": 0.339, + "step": 11377 + }, + { + "epoch": 0.22777068788629481, + "grad_norm": 1.1432750225067139, + "learning_rate": 9.008923211478034e-06, + "loss": 0.3441, + "step": 11378 + }, + { + "epoch": 0.22779070640342317, + "grad_norm": 1.861100673675537, + "learning_rate": 9.008729467050067e-06, + "loss": 0.8814, + "step": 11379 + }, + { + "epoch": 0.22781072492055152, + "grad_norm": 1.2847849130630493, + "learning_rate": 9.008535705770265e-06, + "loss": 0.3455, + "step": 11380 + }, + { + "epoch": 0.22783074343767987, + "grad_norm": 1.0629247426986694, + "learning_rate": 9.008341927639439e-06, + "loss": 0.3002, + "step": 11381 + }, + { + "epoch": 0.2278507619548082, + "grad_norm": 1.1008212566375732, + "learning_rate": 9.008148132658408e-06, + "loss": 0.3254, + "step": 11382 + }, + { + "epoch": 0.22787078047193654, + "grad_norm": 1.0708116292953491, + "learning_rate": 9.007954320827984e-06, + "loss": 0.3158, + "step": 11383 + }, + { + "epoch": 0.2278907989890649, + "grad_norm": 1.3858128786087036, + "learning_rate": 9.007760492148984e-06, + "loss": 0.2865, + "step": 11384 + }, + { + "epoch": 0.22791081750619324, + "grad_norm": 1.1763639450073242, + "learning_rate": 9.007566646622221e-06, + "loss": 0.3302, + "step": 11385 + }, + { + "epoch": 0.22793083602332156, + "grad_norm": 1.1396604776382446, + "learning_rate": 9.00737278424851e-06, + "loss": 0.3199, + "step": 11386 + }, + { + "epoch": 0.22795085454044992, + "grad_norm": 1.1332060098648071, + "learning_rate": 9.007178905028667e-06, + "loss": 0.3105, + "step": 11387 + }, + { + "epoch": 0.22797087305757827, + "grad_norm": 1.1775199174880981, + "learning_rate": 9.006985008963504e-06, + "loss": 0.3855, + "step": 11388 + }, + { + "epoch": 0.22799089157470662, + "grad_norm": 1.0566685199737549, + "learning_rate": 9.00679109605384e-06, + "loss": 0.3539, + "step": 11389 + }, + { + "epoch": 0.22801091009183494, + "grad_norm": 1.0276679992675781, + "learning_rate": 9.006597166300487e-06, + "loss": 0.2911, + "step": 11390 + }, + { + "epoch": 0.2280309286089633, + "grad_norm": 1.0170050859451294, + "learning_rate": 9.006403219704264e-06, + "loss": 0.3058, + "step": 11391 + }, + { + "epoch": 0.22805094712609164, + "grad_norm": 1.1015064716339111, + "learning_rate": 9.006209256265983e-06, + "loss": 0.3177, + "step": 11392 + }, + { + "epoch": 0.22807096564322, + "grad_norm": 1.0756614208221436, + "learning_rate": 9.006015275986459e-06, + "loss": 0.3203, + "step": 11393 + }, + { + "epoch": 0.22809098416034831, + "grad_norm": 1.11837637424469, + "learning_rate": 9.00582127886651e-06, + "loss": 0.3555, + "step": 11394 + }, + { + "epoch": 0.22811100267747667, + "grad_norm": 1.2809398174285889, + "learning_rate": 9.005627264906952e-06, + "loss": 0.3549, + "step": 11395 + }, + { + "epoch": 0.22813102119460502, + "grad_norm": 1.1736762523651123, + "learning_rate": 9.005433234108596e-06, + "loss": 0.3645, + "step": 11396 + }, + { + "epoch": 0.22815103971173337, + "grad_norm": 1.126785397529602, + "learning_rate": 9.005239186472263e-06, + "loss": 0.3396, + "step": 11397 + }, + { + "epoch": 0.2281710582288617, + "grad_norm": 1.9688904285430908, + "learning_rate": 9.005045121998766e-06, + "loss": 0.7858, + "step": 11398 + }, + { + "epoch": 0.22819107674599004, + "grad_norm": 1.0636416673660278, + "learning_rate": 9.00485104068892e-06, + "loss": 0.352, + "step": 11399 + }, + { + "epoch": 0.2282110952631184, + "grad_norm": 1.1990644931793213, + "learning_rate": 9.004656942543542e-06, + "loss": 0.3151, + "step": 11400 + }, + { + "epoch": 0.22823111378024674, + "grad_norm": 1.1298646926879883, + "learning_rate": 9.004462827563449e-06, + "loss": 0.3377, + "step": 11401 + }, + { + "epoch": 0.22825113229737506, + "grad_norm": 1.1065539121627808, + "learning_rate": 9.004268695749456e-06, + "loss": 0.3334, + "step": 11402 + }, + { + "epoch": 0.22827115081450342, + "grad_norm": 1.085526704788208, + "learning_rate": 9.00407454710238e-06, + "loss": 0.3146, + "step": 11403 + }, + { + "epoch": 0.22829116933163177, + "grad_norm": 1.2160770893096924, + "learning_rate": 9.003880381623036e-06, + "loss": 0.3008, + "step": 11404 + }, + { + "epoch": 0.22831118784876012, + "grad_norm": 1.1558233499526978, + "learning_rate": 9.003686199312238e-06, + "loss": 0.3215, + "step": 11405 + }, + { + "epoch": 0.22833120636588844, + "grad_norm": 1.194383144378662, + "learning_rate": 9.003492000170808e-06, + "loss": 0.3405, + "step": 11406 + }, + { + "epoch": 0.2283512248830168, + "grad_norm": 1.3235877752304077, + "learning_rate": 9.003297784199558e-06, + "loss": 0.3123, + "step": 11407 + }, + { + "epoch": 0.22837124340014514, + "grad_norm": 1.280962586402893, + "learning_rate": 9.003103551399304e-06, + "loss": 0.3222, + "step": 11408 + }, + { + "epoch": 0.2283912619172735, + "grad_norm": 1.140146017074585, + "learning_rate": 9.002909301770867e-06, + "loss": 0.3093, + "step": 11409 + }, + { + "epoch": 0.22841128043440181, + "grad_norm": 1.0698044300079346, + "learning_rate": 9.002715035315059e-06, + "loss": 0.3582, + "step": 11410 + }, + { + "epoch": 0.22843129895153017, + "grad_norm": 1.0195432901382446, + "learning_rate": 9.0025207520327e-06, + "loss": 0.2962, + "step": 11411 + }, + { + "epoch": 0.22845131746865852, + "grad_norm": 1.2121355533599854, + "learning_rate": 9.002326451924603e-06, + "loss": 0.3546, + "step": 11412 + }, + { + "epoch": 0.22847133598578687, + "grad_norm": 1.3831855058670044, + "learning_rate": 9.002132134991587e-06, + "loss": 0.3624, + "step": 11413 + }, + { + "epoch": 0.2284913545029152, + "grad_norm": 1.0626920461654663, + "learning_rate": 9.001937801234471e-06, + "loss": 0.3145, + "step": 11414 + }, + { + "epoch": 0.22851137302004354, + "grad_norm": 0.9725205898284912, + "learning_rate": 9.001743450654067e-06, + "loss": 0.2952, + "step": 11415 + }, + { + "epoch": 0.2285313915371719, + "grad_norm": 1.1598355770111084, + "learning_rate": 9.001549083251198e-06, + "loss": 0.3211, + "step": 11416 + }, + { + "epoch": 0.22855141005430024, + "grad_norm": 1.83851957321167, + "learning_rate": 9.001354699026675e-06, + "loss": 0.7684, + "step": 11417 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 1.2039746046066284, + "learning_rate": 9.001160297981319e-06, + "loss": 0.3365, + "step": 11418 + }, + { + "epoch": 0.22859144708855691, + "grad_norm": 1.0898540019989014, + "learning_rate": 9.000965880115947e-06, + "loss": 0.2998, + "step": 11419 + }, + { + "epoch": 0.22861146560568527, + "grad_norm": 1.2112725973129272, + "learning_rate": 9.000771445431373e-06, + "loss": 0.3425, + "step": 11420 + }, + { + "epoch": 0.22863148412281362, + "grad_norm": 1.07542884349823, + "learning_rate": 9.000576993928419e-06, + "loss": 0.3365, + "step": 11421 + }, + { + "epoch": 0.22865150263994194, + "grad_norm": 1.1212787628173828, + "learning_rate": 9.000382525607898e-06, + "loss": 0.3036, + "step": 11422 + }, + { + "epoch": 0.2286715211570703, + "grad_norm": 1.1734592914581299, + "learning_rate": 9.000188040470632e-06, + "loss": 0.3363, + "step": 11423 + }, + { + "epoch": 0.22869153967419864, + "grad_norm": 1.0599849224090576, + "learning_rate": 8.999993538517435e-06, + "loss": 0.3149, + "step": 11424 + }, + { + "epoch": 0.228711558191327, + "grad_norm": 1.2877063751220703, + "learning_rate": 8.999799019749128e-06, + "loss": 0.2973, + "step": 11425 + }, + { + "epoch": 0.22873157670845531, + "grad_norm": 1.1380857229232788, + "learning_rate": 8.999604484166525e-06, + "loss": 0.3615, + "step": 11426 + }, + { + "epoch": 0.22875159522558366, + "grad_norm": 1.2689305543899536, + "learning_rate": 8.999409931770444e-06, + "loss": 0.3478, + "step": 11427 + }, + { + "epoch": 0.22877161374271202, + "grad_norm": 1.0704785585403442, + "learning_rate": 8.999215362561705e-06, + "loss": 0.3051, + "step": 11428 + }, + { + "epoch": 0.22879163225984034, + "grad_norm": 1.823859453201294, + "learning_rate": 8.999020776541127e-06, + "loss": 0.8463, + "step": 11429 + }, + { + "epoch": 0.2288116507769687, + "grad_norm": 1.82759690284729, + "learning_rate": 8.998826173709526e-06, + "loss": 0.8649, + "step": 11430 + }, + { + "epoch": 0.22883166929409704, + "grad_norm": 1.0504481792449951, + "learning_rate": 8.99863155406772e-06, + "loss": 0.2878, + "step": 11431 + }, + { + "epoch": 0.2288516878112254, + "grad_norm": 0.9857547283172607, + "learning_rate": 8.998436917616528e-06, + "loss": 0.3106, + "step": 11432 + }, + { + "epoch": 0.2288717063283537, + "grad_norm": 1.200071930885315, + "learning_rate": 8.998242264356767e-06, + "loss": 0.3462, + "step": 11433 + }, + { + "epoch": 0.22889172484548206, + "grad_norm": 1.304253339767456, + "learning_rate": 8.998047594289256e-06, + "loss": 0.3116, + "step": 11434 + }, + { + "epoch": 0.22891174336261041, + "grad_norm": 1.1176763772964478, + "learning_rate": 8.997852907414812e-06, + "loss": 0.3088, + "step": 11435 + }, + { + "epoch": 0.22893176187973877, + "grad_norm": 1.1846086978912354, + "learning_rate": 8.997658203734257e-06, + "loss": 0.3513, + "step": 11436 + }, + { + "epoch": 0.2289517803968671, + "grad_norm": 1.1233569383621216, + "learning_rate": 8.997463483248406e-06, + "loss": 0.3275, + "step": 11437 + }, + { + "epoch": 0.22897179891399544, + "grad_norm": 1.3032900094985962, + "learning_rate": 8.997268745958078e-06, + "loss": 0.4096, + "step": 11438 + }, + { + "epoch": 0.2289918174311238, + "grad_norm": 1.3240575790405273, + "learning_rate": 8.997073991864094e-06, + "loss": 0.3279, + "step": 11439 + }, + { + "epoch": 0.22901183594825214, + "grad_norm": 1.2056703567504883, + "learning_rate": 8.99687922096727e-06, + "loss": 0.3056, + "step": 11440 + }, + { + "epoch": 0.22903185446538046, + "grad_norm": 1.1668531894683838, + "learning_rate": 8.996684433268426e-06, + "loss": 0.3367, + "step": 11441 + }, + { + "epoch": 0.22905187298250881, + "grad_norm": 1.8787331581115723, + "learning_rate": 8.996489628768382e-06, + "loss": 0.8009, + "step": 11442 + }, + { + "epoch": 0.22907189149963716, + "grad_norm": 1.0750362873077393, + "learning_rate": 8.996294807467954e-06, + "loss": 0.3535, + "step": 11443 + }, + { + "epoch": 0.22909191001676552, + "grad_norm": 1.1166456937789917, + "learning_rate": 8.996099969367963e-06, + "loss": 0.3138, + "step": 11444 + }, + { + "epoch": 0.22911192853389384, + "grad_norm": 1.1388611793518066, + "learning_rate": 8.99590511446923e-06, + "loss": 0.3503, + "step": 11445 + }, + { + "epoch": 0.2291319470510222, + "grad_norm": 1.3123537302017212, + "learning_rate": 8.995710242772568e-06, + "loss": 0.3112, + "step": 11446 + }, + { + "epoch": 0.22915196556815054, + "grad_norm": 1.099303960800171, + "learning_rate": 8.995515354278803e-06, + "loss": 0.2842, + "step": 11447 + }, + { + "epoch": 0.2291719840852789, + "grad_norm": 1.7357209920883179, + "learning_rate": 8.99532044898875e-06, + "loss": 0.8668, + "step": 11448 + }, + { + "epoch": 0.2291920026024072, + "grad_norm": 0.9763599634170532, + "learning_rate": 8.99512552690323e-06, + "loss": 0.3329, + "step": 11449 + }, + { + "epoch": 0.22921202111953556, + "grad_norm": 1.0301337242126465, + "learning_rate": 8.994930588023062e-06, + "loss": 0.3542, + "step": 11450 + }, + { + "epoch": 0.22923203963666391, + "grad_norm": 0.997170090675354, + "learning_rate": 8.994735632349065e-06, + "loss": 0.2861, + "step": 11451 + }, + { + "epoch": 0.22925205815379227, + "grad_norm": 1.1304521560668945, + "learning_rate": 8.99454065988206e-06, + "loss": 0.334, + "step": 11452 + }, + { + "epoch": 0.2292720766709206, + "grad_norm": 1.1708062887191772, + "learning_rate": 8.994345670622867e-06, + "loss": 0.3225, + "step": 11453 + }, + { + "epoch": 0.22929209518804894, + "grad_norm": 1.1968297958374023, + "learning_rate": 8.9941506645723e-06, + "loss": 0.3718, + "step": 11454 + }, + { + "epoch": 0.2293121137051773, + "grad_norm": 0.9893842339515686, + "learning_rate": 8.993955641731186e-06, + "loss": 0.2905, + "step": 11455 + }, + { + "epoch": 0.22933213222230564, + "grad_norm": 1.134812593460083, + "learning_rate": 8.993760602100344e-06, + "loss": 0.3984, + "step": 11456 + }, + { + "epoch": 0.22935215073943396, + "grad_norm": 1.132163643836975, + "learning_rate": 8.993565545680589e-06, + "loss": 0.2657, + "step": 11457 + }, + { + "epoch": 0.2293721692565623, + "grad_norm": 1.1295713186264038, + "learning_rate": 8.993370472472744e-06, + "loss": 0.37, + "step": 11458 + }, + { + "epoch": 0.22939218777369066, + "grad_norm": 0.9411097168922424, + "learning_rate": 8.99317538247763e-06, + "loss": 0.2674, + "step": 11459 + }, + { + "epoch": 0.22941220629081902, + "grad_norm": 1.4868415594100952, + "learning_rate": 8.992980275696065e-06, + "loss": 0.2865, + "step": 11460 + }, + { + "epoch": 0.22943222480794734, + "grad_norm": 1.3051973581314087, + "learning_rate": 8.992785152128871e-06, + "loss": 0.3454, + "step": 11461 + }, + { + "epoch": 0.2294522433250757, + "grad_norm": 1.0936161279678345, + "learning_rate": 8.992590011776867e-06, + "loss": 0.3939, + "step": 11462 + }, + { + "epoch": 0.22947226184220404, + "grad_norm": 1.0983160734176636, + "learning_rate": 8.992394854640874e-06, + "loss": 0.3308, + "step": 11463 + }, + { + "epoch": 0.2294922803593324, + "grad_norm": 1.0972633361816406, + "learning_rate": 8.992199680721712e-06, + "loss": 0.2745, + "step": 11464 + }, + { + "epoch": 0.2295122988764607, + "grad_norm": 1.0293619632720947, + "learning_rate": 8.992004490020202e-06, + "loss": 0.3137, + "step": 11465 + }, + { + "epoch": 0.22953231739358906, + "grad_norm": 1.0674077272415161, + "learning_rate": 8.991809282537164e-06, + "loss": 0.2788, + "step": 11466 + }, + { + "epoch": 0.22955233591071741, + "grad_norm": 1.1157236099243164, + "learning_rate": 8.99161405827342e-06, + "loss": 0.3824, + "step": 11467 + }, + { + "epoch": 0.22957235442784577, + "grad_norm": 1.081534504890442, + "learning_rate": 8.991418817229787e-06, + "loss": 0.3341, + "step": 11468 + }, + { + "epoch": 0.2295923729449741, + "grad_norm": 1.1976064443588257, + "learning_rate": 8.99122355940709e-06, + "loss": 0.3388, + "step": 11469 + }, + { + "epoch": 0.22961239146210244, + "grad_norm": 1.0622437000274658, + "learning_rate": 8.991028284806147e-06, + "loss": 0.315, + "step": 11470 + }, + { + "epoch": 0.2296324099792308, + "grad_norm": 2.0303802490234375, + "learning_rate": 8.990832993427782e-06, + "loss": 0.7716, + "step": 11471 + }, + { + "epoch": 0.22965242849635914, + "grad_norm": 1.1438719034194946, + "learning_rate": 8.990637685272812e-06, + "loss": 0.3596, + "step": 11472 + }, + { + "epoch": 0.22967244701348746, + "grad_norm": 1.1727076768875122, + "learning_rate": 8.99044236034206e-06, + "loss": 0.3042, + "step": 11473 + }, + { + "epoch": 0.2296924655306158, + "grad_norm": 1.08249032497406, + "learning_rate": 8.990247018636349e-06, + "loss": 0.2693, + "step": 11474 + }, + { + "epoch": 0.22971248404774416, + "grad_norm": 1.1181679964065552, + "learning_rate": 8.990051660156496e-06, + "loss": 0.2964, + "step": 11475 + }, + { + "epoch": 0.22973250256487251, + "grad_norm": 0.9883805513381958, + "learning_rate": 8.989856284903326e-06, + "loss": 0.3223, + "step": 11476 + }, + { + "epoch": 0.22975252108200084, + "grad_norm": 1.0845108032226562, + "learning_rate": 8.989660892877659e-06, + "loss": 0.321, + "step": 11477 + }, + { + "epoch": 0.2297725395991292, + "grad_norm": 1.113852620124817, + "learning_rate": 8.989465484080314e-06, + "loss": 0.3321, + "step": 11478 + }, + { + "epoch": 0.22979255811625754, + "grad_norm": 1.9689868688583374, + "learning_rate": 8.989270058512116e-06, + "loss": 0.7985, + "step": 11479 + }, + { + "epoch": 0.2298125766333859, + "grad_norm": 1.143507719039917, + "learning_rate": 8.989074616173884e-06, + "loss": 0.3364, + "step": 11480 + }, + { + "epoch": 0.2298325951505142, + "grad_norm": 1.0985755920410156, + "learning_rate": 8.988879157066443e-06, + "loss": 0.2749, + "step": 11481 + }, + { + "epoch": 0.22985261366764256, + "grad_norm": 1.0828756093978882, + "learning_rate": 8.988683681190612e-06, + "loss": 0.3058, + "step": 11482 + }, + { + "epoch": 0.22987263218477091, + "grad_norm": 1.1470954418182373, + "learning_rate": 8.988488188547212e-06, + "loss": 0.3418, + "step": 11483 + }, + { + "epoch": 0.22989265070189926, + "grad_norm": 1.0469655990600586, + "learning_rate": 8.988292679137068e-06, + "loss": 0.3524, + "step": 11484 + }, + { + "epoch": 0.2299126692190276, + "grad_norm": 1.10244882106781, + "learning_rate": 8.988097152960997e-06, + "loss": 0.2957, + "step": 11485 + }, + { + "epoch": 0.22993268773615594, + "grad_norm": 1.2511277198791504, + "learning_rate": 8.987901610019827e-06, + "loss": 0.299, + "step": 11486 + }, + { + "epoch": 0.2299527062532843, + "grad_norm": 1.166206955909729, + "learning_rate": 8.987706050314374e-06, + "loss": 0.3099, + "step": 11487 + }, + { + "epoch": 0.22997272477041264, + "grad_norm": 1.1560848951339722, + "learning_rate": 8.987510473845464e-06, + "loss": 0.3105, + "step": 11488 + }, + { + "epoch": 0.22999274328754096, + "grad_norm": 1.133444905281067, + "learning_rate": 8.987314880613919e-06, + "loss": 0.3238, + "step": 11489 + }, + { + "epoch": 0.2300127618046693, + "grad_norm": 1.258102536201477, + "learning_rate": 8.98711927062056e-06, + "loss": 0.3556, + "step": 11490 + }, + { + "epoch": 0.23003278032179766, + "grad_norm": 1.2067437171936035, + "learning_rate": 8.986923643866208e-06, + "loss": 0.3266, + "step": 11491 + }, + { + "epoch": 0.23005279883892601, + "grad_norm": 1.113952875137329, + "learning_rate": 8.986728000351688e-06, + "loss": 0.3299, + "step": 11492 + }, + { + "epoch": 0.23007281735605434, + "grad_norm": 1.0877447128295898, + "learning_rate": 8.986532340077821e-06, + "loss": 0.3598, + "step": 11493 + }, + { + "epoch": 0.2300928358731827, + "grad_norm": 1.0942026376724243, + "learning_rate": 8.98633666304543e-06, + "loss": 0.3429, + "step": 11494 + }, + { + "epoch": 0.23011285439031104, + "grad_norm": 1.189361333847046, + "learning_rate": 8.986140969255339e-06, + "loss": 0.3454, + "step": 11495 + }, + { + "epoch": 0.2301328729074394, + "grad_norm": 1.1216157674789429, + "learning_rate": 8.985945258708368e-06, + "loss": 0.2992, + "step": 11496 + }, + { + "epoch": 0.2301528914245677, + "grad_norm": 1.204774022102356, + "learning_rate": 8.98574953140534e-06, + "loss": 0.3024, + "step": 11497 + }, + { + "epoch": 0.23017290994169606, + "grad_norm": 1.1816742420196533, + "learning_rate": 8.985553787347081e-06, + "loss": 0.3206, + "step": 11498 + }, + { + "epoch": 0.23019292845882441, + "grad_norm": 1.181705117225647, + "learning_rate": 8.98535802653441e-06, + "loss": 0.3288, + "step": 11499 + }, + { + "epoch": 0.23021294697595276, + "grad_norm": 1.0831091403961182, + "learning_rate": 8.985162248968153e-06, + "loss": 0.3339, + "step": 11500 + }, + { + "epoch": 0.2302329654930811, + "grad_norm": 1.147282600402832, + "learning_rate": 8.98496645464913e-06, + "loss": 0.3045, + "step": 11501 + }, + { + "epoch": 0.23025298401020944, + "grad_norm": 1.0382187366485596, + "learning_rate": 8.984770643578166e-06, + "loss": 0.3403, + "step": 11502 + }, + { + "epoch": 0.2302730025273378, + "grad_norm": 1.086918830871582, + "learning_rate": 8.984574815756084e-06, + "loss": 0.3459, + "step": 11503 + }, + { + "epoch": 0.23029302104446614, + "grad_norm": 1.2204090356826782, + "learning_rate": 8.984378971183708e-06, + "loss": 0.2885, + "step": 11504 + }, + { + "epoch": 0.23031303956159446, + "grad_norm": 1.143280267715454, + "learning_rate": 8.984183109861857e-06, + "loss": 0.3214, + "step": 11505 + }, + { + "epoch": 0.2303330580787228, + "grad_norm": 1.7916463613510132, + "learning_rate": 8.98398723179136e-06, + "loss": 0.8371, + "step": 11506 + }, + { + "epoch": 0.23035307659585116, + "grad_norm": 1.028581142425537, + "learning_rate": 8.983791336973038e-06, + "loss": 0.3137, + "step": 11507 + }, + { + "epoch": 0.23037309511297951, + "grad_norm": 1.0404123067855835, + "learning_rate": 8.983595425407715e-06, + "loss": 0.2787, + "step": 11508 + }, + { + "epoch": 0.23039311363010784, + "grad_norm": 1.069088339805603, + "learning_rate": 8.983399497096213e-06, + "loss": 0.3424, + "step": 11509 + }, + { + "epoch": 0.2304131321472362, + "grad_norm": 1.1385914087295532, + "learning_rate": 8.983203552039357e-06, + "loss": 0.3, + "step": 11510 + }, + { + "epoch": 0.23043315066436454, + "grad_norm": 1.1726891994476318, + "learning_rate": 8.983007590237971e-06, + "loss": 0.3984, + "step": 11511 + }, + { + "epoch": 0.2304531691814929, + "grad_norm": 1.877030372619629, + "learning_rate": 8.982811611692876e-06, + "loss": 0.8317, + "step": 11512 + }, + { + "epoch": 0.2304731876986212, + "grad_norm": 1.0163298845291138, + "learning_rate": 8.982615616404902e-06, + "loss": 0.3541, + "step": 11513 + }, + { + "epoch": 0.23049320621574956, + "grad_norm": 1.2206602096557617, + "learning_rate": 8.982419604374866e-06, + "loss": 0.3488, + "step": 11514 + }, + { + "epoch": 0.2305132247328779, + "grad_norm": 1.1047371625900269, + "learning_rate": 8.982223575603595e-06, + "loss": 0.3323, + "step": 11515 + }, + { + "epoch": 0.23053324325000626, + "grad_norm": 1.8849292993545532, + "learning_rate": 8.982027530091914e-06, + "loss": 0.8154, + "step": 11516 + }, + { + "epoch": 0.2305532617671346, + "grad_norm": 1.093271017074585, + "learning_rate": 8.981831467840646e-06, + "loss": 0.3073, + "step": 11517 + }, + { + "epoch": 0.23057328028426294, + "grad_norm": 1.1854634284973145, + "learning_rate": 8.981635388850616e-06, + "loss": 0.3418, + "step": 11518 + }, + { + "epoch": 0.2305932988013913, + "grad_norm": 1.1147148609161377, + "learning_rate": 8.981439293122648e-06, + "loss": 0.3206, + "step": 11519 + }, + { + "epoch": 0.23061331731851964, + "grad_norm": 1.8861758708953857, + "learning_rate": 8.981243180657564e-06, + "loss": 0.7465, + "step": 11520 + }, + { + "epoch": 0.23063333583564796, + "grad_norm": 1.0515018701553345, + "learning_rate": 8.981047051456194e-06, + "loss": 0.3243, + "step": 11521 + }, + { + "epoch": 0.2306533543527763, + "grad_norm": 1.1762620210647583, + "learning_rate": 8.980850905519357e-06, + "loss": 0.3234, + "step": 11522 + }, + { + "epoch": 0.23067337286990466, + "grad_norm": 1.1524239778518677, + "learning_rate": 8.980654742847879e-06, + "loss": 0.3691, + "step": 11523 + }, + { + "epoch": 0.23069339138703301, + "grad_norm": 1.065828561782837, + "learning_rate": 8.980458563442584e-06, + "loss": 0.3394, + "step": 11524 + }, + { + "epoch": 0.23071340990416134, + "grad_norm": 1.8783609867095947, + "learning_rate": 8.9802623673043e-06, + "loss": 0.843, + "step": 11525 + }, + { + "epoch": 0.2307334284212897, + "grad_norm": 1.0162534713745117, + "learning_rate": 8.980066154433848e-06, + "loss": 0.294, + "step": 11526 + }, + { + "epoch": 0.23075344693841804, + "grad_norm": 1.2726526260375977, + "learning_rate": 8.979869924832055e-06, + "loss": 0.349, + "step": 11527 + }, + { + "epoch": 0.2307734654555464, + "grad_norm": 1.2074675559997559, + "learning_rate": 8.979673678499745e-06, + "loss": 0.3418, + "step": 11528 + }, + { + "epoch": 0.2307934839726747, + "grad_norm": 1.9146257638931274, + "learning_rate": 8.979477415437744e-06, + "loss": 0.8317, + "step": 11529 + }, + { + "epoch": 0.23081350248980306, + "grad_norm": 1.4555658102035522, + "learning_rate": 8.979281135646875e-06, + "loss": 0.3296, + "step": 11530 + }, + { + "epoch": 0.2308335210069314, + "grad_norm": 1.0098199844360352, + "learning_rate": 8.979084839127966e-06, + "loss": 0.3218, + "step": 11531 + }, + { + "epoch": 0.23085353952405976, + "grad_norm": 1.0182549953460693, + "learning_rate": 8.97888852588184e-06, + "loss": 0.3415, + "step": 11532 + }, + { + "epoch": 0.2308735580411881, + "grad_norm": 1.1594196557998657, + "learning_rate": 8.978692195909323e-06, + "loss": 0.3636, + "step": 11533 + }, + { + "epoch": 0.23089357655831644, + "grad_norm": 1.9393857717514038, + "learning_rate": 8.97849584921124e-06, + "loss": 0.8057, + "step": 11534 + }, + { + "epoch": 0.2309135950754448, + "grad_norm": 1.0626611709594727, + "learning_rate": 8.978299485788417e-06, + "loss": 0.3437, + "step": 11535 + }, + { + "epoch": 0.23093361359257314, + "grad_norm": 1.067830204963684, + "learning_rate": 8.97810310564168e-06, + "loss": 0.3503, + "step": 11536 + }, + { + "epoch": 0.23095363210970146, + "grad_norm": 1.0551114082336426, + "learning_rate": 8.977906708771851e-06, + "loss": 0.3327, + "step": 11537 + }, + { + "epoch": 0.2309736506268298, + "grad_norm": 1.098252296447754, + "learning_rate": 8.977710295179762e-06, + "loss": 0.2966, + "step": 11538 + }, + { + "epoch": 0.23099366914395816, + "grad_norm": 0.991810142993927, + "learning_rate": 8.977513864866232e-06, + "loss": 0.3456, + "step": 11539 + }, + { + "epoch": 0.23101368766108651, + "grad_norm": 1.0409139394760132, + "learning_rate": 8.977317417832089e-06, + "loss": 0.3692, + "step": 11540 + }, + { + "epoch": 0.23103370617821484, + "grad_norm": 1.1061335802078247, + "learning_rate": 8.977120954078163e-06, + "loss": 0.386, + "step": 11541 + }, + { + "epoch": 0.2310537246953432, + "grad_norm": 1.0573198795318604, + "learning_rate": 8.976924473605273e-06, + "loss": 0.335, + "step": 11542 + }, + { + "epoch": 0.23107374321247154, + "grad_norm": 1.0603152513504028, + "learning_rate": 8.97672797641425e-06, + "loss": 0.3616, + "step": 11543 + }, + { + "epoch": 0.2310937617295999, + "grad_norm": 1.2428473234176636, + "learning_rate": 8.976531462505918e-06, + "loss": 0.3406, + "step": 11544 + }, + { + "epoch": 0.2311137802467282, + "grad_norm": 1.1070375442504883, + "learning_rate": 8.976334931881104e-06, + "loss": 0.3616, + "step": 11545 + }, + { + "epoch": 0.23113379876385656, + "grad_norm": 0.9641518592834473, + "learning_rate": 8.976138384540633e-06, + "loss": 0.3167, + "step": 11546 + }, + { + "epoch": 0.2311538172809849, + "grad_norm": 1.099840521812439, + "learning_rate": 8.975941820485332e-06, + "loss": 0.3419, + "step": 11547 + }, + { + "epoch": 0.23117383579811326, + "grad_norm": 1.0732321739196777, + "learning_rate": 8.975745239716027e-06, + "loss": 0.3493, + "step": 11548 + }, + { + "epoch": 0.2311938543152416, + "grad_norm": 1.0169868469238281, + "learning_rate": 8.975548642233546e-06, + "loss": 0.3133, + "step": 11549 + }, + { + "epoch": 0.23121387283236994, + "grad_norm": 1.0870237350463867, + "learning_rate": 8.975352028038712e-06, + "loss": 0.3432, + "step": 11550 + }, + { + "epoch": 0.2312338913494983, + "grad_norm": 1.1363987922668457, + "learning_rate": 8.975155397132356e-06, + "loss": 0.3259, + "step": 11551 + }, + { + "epoch": 0.23125390986662664, + "grad_norm": 2.0720255374908447, + "learning_rate": 8.9749587495153e-06, + "loss": 0.9323, + "step": 11552 + }, + { + "epoch": 0.23127392838375496, + "grad_norm": 1.081886649131775, + "learning_rate": 8.974762085188374e-06, + "loss": 0.3311, + "step": 11553 + }, + { + "epoch": 0.2312939469008833, + "grad_norm": 1.08842134475708, + "learning_rate": 8.974565404152404e-06, + "loss": 0.3158, + "step": 11554 + }, + { + "epoch": 0.23131396541801166, + "grad_norm": 1.0987271070480347, + "learning_rate": 8.974368706408215e-06, + "loss": 0.3324, + "step": 11555 + }, + { + "epoch": 0.23133398393514001, + "grad_norm": 1.9340386390686035, + "learning_rate": 8.974171991956636e-06, + "loss": 0.8354, + "step": 11556 + }, + { + "epoch": 0.23135400245226834, + "grad_norm": 1.1327894926071167, + "learning_rate": 8.973975260798492e-06, + "loss": 0.3249, + "step": 11557 + }, + { + "epoch": 0.2313740209693967, + "grad_norm": 1.081203579902649, + "learning_rate": 8.973778512934611e-06, + "loss": 0.3032, + "step": 11558 + }, + { + "epoch": 0.23139403948652504, + "grad_norm": 1.1644210815429688, + "learning_rate": 8.973581748365823e-06, + "loss": 0.3323, + "step": 11559 + }, + { + "epoch": 0.2314140580036534, + "grad_norm": 1.2648389339447021, + "learning_rate": 8.973384967092951e-06, + "loss": 0.3164, + "step": 11560 + }, + { + "epoch": 0.2314340765207817, + "grad_norm": 1.0584992170333862, + "learning_rate": 8.973188169116824e-06, + "loss": 0.328, + "step": 11561 + }, + { + "epoch": 0.23145409503791006, + "grad_norm": 1.1914870738983154, + "learning_rate": 8.97299135443827e-06, + "loss": 0.3222, + "step": 11562 + }, + { + "epoch": 0.2314741135550384, + "grad_norm": 1.1767568588256836, + "learning_rate": 8.972794523058112e-06, + "loss": 0.3469, + "step": 11563 + }, + { + "epoch": 0.23149413207216676, + "grad_norm": 1.1481302976608276, + "learning_rate": 8.972597674977184e-06, + "loss": 0.3289, + "step": 11564 + }, + { + "epoch": 0.2315141505892951, + "grad_norm": 1.073926329612732, + "learning_rate": 8.972400810196308e-06, + "loss": 0.3542, + "step": 11565 + }, + { + "epoch": 0.23153416910642344, + "grad_norm": 1.1695170402526855, + "learning_rate": 8.972203928716316e-06, + "loss": 0.2767, + "step": 11566 + }, + { + "epoch": 0.2315541876235518, + "grad_norm": 1.07219398021698, + "learning_rate": 8.972007030538032e-06, + "loss": 0.3588, + "step": 11567 + }, + { + "epoch": 0.23157420614068014, + "grad_norm": 1.129325270652771, + "learning_rate": 8.971810115662285e-06, + "loss": 0.2989, + "step": 11568 + }, + { + "epoch": 0.23159422465780846, + "grad_norm": 1.2284715175628662, + "learning_rate": 8.971613184089905e-06, + "loss": 0.3339, + "step": 11569 + }, + { + "epoch": 0.2316142431749368, + "grad_norm": 1.0599733591079712, + "learning_rate": 8.971416235821717e-06, + "loss": 0.3052, + "step": 11570 + }, + { + "epoch": 0.23163426169206516, + "grad_norm": 1.0973554849624634, + "learning_rate": 8.97121927085855e-06, + "loss": 0.3291, + "step": 11571 + }, + { + "epoch": 0.2316542802091935, + "grad_norm": 1.1616863012313843, + "learning_rate": 8.97102228920123e-06, + "loss": 0.3321, + "step": 11572 + }, + { + "epoch": 0.23167429872632184, + "grad_norm": 1.0787311792373657, + "learning_rate": 8.970825290850586e-06, + "loss": 0.3253, + "step": 11573 + }, + { + "epoch": 0.2316943172434502, + "grad_norm": 1.0651150941848755, + "learning_rate": 8.97062827580745e-06, + "loss": 0.3008, + "step": 11574 + }, + { + "epoch": 0.23171433576057854, + "grad_norm": 1.0436867475509644, + "learning_rate": 8.970431244072647e-06, + "loss": 0.4003, + "step": 11575 + }, + { + "epoch": 0.2317343542777069, + "grad_norm": 1.2123923301696777, + "learning_rate": 8.970234195647005e-06, + "loss": 0.3573, + "step": 11576 + }, + { + "epoch": 0.2317543727948352, + "grad_norm": 1.0832347869873047, + "learning_rate": 8.970037130531353e-06, + "loss": 0.2828, + "step": 11577 + }, + { + "epoch": 0.23177439131196356, + "grad_norm": 1.068522334098816, + "learning_rate": 8.969840048726517e-06, + "loss": 0.3196, + "step": 11578 + }, + { + "epoch": 0.2317944098290919, + "grad_norm": 1.1788692474365234, + "learning_rate": 8.96964295023333e-06, + "loss": 0.338, + "step": 11579 + }, + { + "epoch": 0.23181442834622026, + "grad_norm": 1.7843188047409058, + "learning_rate": 8.969445835052619e-06, + "loss": 0.8641, + "step": 11580 + }, + { + "epoch": 0.2318344468633486, + "grad_norm": 1.2125409841537476, + "learning_rate": 8.969248703185211e-06, + "loss": 0.3414, + "step": 11581 + }, + { + "epoch": 0.23185446538047694, + "grad_norm": 1.0760526657104492, + "learning_rate": 8.969051554631935e-06, + "loss": 0.3214, + "step": 11582 + }, + { + "epoch": 0.2318744838976053, + "grad_norm": 1.0874409675598145, + "learning_rate": 8.968854389393622e-06, + "loss": 0.2829, + "step": 11583 + }, + { + "epoch": 0.23189450241473364, + "grad_norm": 1.1944429874420166, + "learning_rate": 8.968657207471097e-06, + "loss": 0.322, + "step": 11584 + }, + { + "epoch": 0.23191452093186196, + "grad_norm": 1.1786503791809082, + "learning_rate": 8.968460008865192e-06, + "loss": 0.3419, + "step": 11585 + }, + { + "epoch": 0.2319345394489903, + "grad_norm": 1.1263879537582397, + "learning_rate": 8.968262793576736e-06, + "loss": 0.3397, + "step": 11586 + }, + { + "epoch": 0.23195455796611866, + "grad_norm": 1.0498411655426025, + "learning_rate": 8.968065561606557e-06, + "loss": 0.2991, + "step": 11587 + }, + { + "epoch": 0.231974576483247, + "grad_norm": 1.1145068407058716, + "learning_rate": 8.967868312955484e-06, + "loss": 0.3473, + "step": 11588 + }, + { + "epoch": 0.23199459500037534, + "grad_norm": 1.0285676717758179, + "learning_rate": 8.967671047624347e-06, + "loss": 0.2938, + "step": 11589 + }, + { + "epoch": 0.2320146135175037, + "grad_norm": 1.2282696962356567, + "learning_rate": 8.967473765613973e-06, + "loss": 0.3142, + "step": 11590 + }, + { + "epoch": 0.23203463203463204, + "grad_norm": 1.6068209409713745, + "learning_rate": 8.967276466925195e-06, + "loss": 0.3539, + "step": 11591 + }, + { + "epoch": 0.2320546505517604, + "grad_norm": 1.1348001956939697, + "learning_rate": 8.96707915155884e-06, + "loss": 0.316, + "step": 11592 + }, + { + "epoch": 0.2320746690688887, + "grad_norm": 1.1251474618911743, + "learning_rate": 8.966881819515739e-06, + "loss": 0.3893, + "step": 11593 + }, + { + "epoch": 0.23209468758601706, + "grad_norm": 1.181235909461975, + "learning_rate": 8.966684470796719e-06, + "loss": 0.363, + "step": 11594 + }, + { + "epoch": 0.2321147061031454, + "grad_norm": 1.1968415975570679, + "learning_rate": 8.96648710540261e-06, + "loss": 0.374, + "step": 11595 + }, + { + "epoch": 0.23213472462027376, + "grad_norm": 1.9754101037979126, + "learning_rate": 8.966289723334245e-06, + "loss": 0.8192, + "step": 11596 + }, + { + "epoch": 0.2321547431374021, + "grad_norm": 1.1829622983932495, + "learning_rate": 8.96609232459245e-06, + "loss": 0.3299, + "step": 11597 + }, + { + "epoch": 0.23217476165453044, + "grad_norm": 0.9768645763397217, + "learning_rate": 8.965894909178057e-06, + "loss": 0.3204, + "step": 11598 + }, + { + "epoch": 0.2321947801716588, + "grad_norm": 1.032434344291687, + "learning_rate": 8.965697477091897e-06, + "loss": 0.3062, + "step": 11599 + }, + { + "epoch": 0.23221479868878714, + "grad_norm": 0.9823652505874634, + "learning_rate": 8.965500028334796e-06, + "loss": 0.3178, + "step": 11600 + }, + { + "epoch": 0.23223481720591546, + "grad_norm": 1.1617255210876465, + "learning_rate": 8.965302562907587e-06, + "loss": 0.3719, + "step": 11601 + }, + { + "epoch": 0.2322548357230438, + "grad_norm": 1.058067798614502, + "learning_rate": 8.965105080811099e-06, + "loss": 0.3077, + "step": 11602 + }, + { + "epoch": 0.23227485424017216, + "grad_norm": 1.223235011100769, + "learning_rate": 8.964907582046163e-06, + "loss": 0.3299, + "step": 11603 + }, + { + "epoch": 0.2322948727573005, + "grad_norm": 1.0069929361343384, + "learning_rate": 8.964710066613608e-06, + "loss": 0.3167, + "step": 11604 + }, + { + "epoch": 0.23231489127442884, + "grad_norm": 1.0169787406921387, + "learning_rate": 8.964512534514266e-06, + "loss": 0.2731, + "step": 11605 + }, + { + "epoch": 0.2323349097915572, + "grad_norm": 1.1921722888946533, + "learning_rate": 8.964314985748968e-06, + "loss": 0.3306, + "step": 11606 + }, + { + "epoch": 0.23235492830868554, + "grad_norm": 1.9052563905715942, + "learning_rate": 8.96411742031854e-06, + "loss": 0.829, + "step": 11607 + }, + { + "epoch": 0.2323749468258139, + "grad_norm": 1.2603732347488403, + "learning_rate": 8.963919838223817e-06, + "loss": 0.3679, + "step": 11608 + }, + { + "epoch": 0.2323949653429422, + "grad_norm": 1.1831514835357666, + "learning_rate": 8.963722239465627e-06, + "loss": 0.353, + "step": 11609 + }, + { + "epoch": 0.23241498386007056, + "grad_norm": 0.989844024181366, + "learning_rate": 8.963524624044802e-06, + "loss": 0.3196, + "step": 11610 + }, + { + "epoch": 0.2324350023771989, + "grad_norm": 1.0953056812286377, + "learning_rate": 8.963326991962174e-06, + "loss": 0.2988, + "step": 11611 + }, + { + "epoch": 0.23245502089432726, + "grad_norm": 1.1709524393081665, + "learning_rate": 8.96312934321857e-06, + "loss": 0.3019, + "step": 11612 + }, + { + "epoch": 0.2324750394114556, + "grad_norm": 1.215047001838684, + "learning_rate": 8.962931677814826e-06, + "loss": 0.3617, + "step": 11613 + }, + { + "epoch": 0.23249505792858394, + "grad_norm": 1.2241586446762085, + "learning_rate": 8.962733995751767e-06, + "loss": 0.3002, + "step": 11614 + }, + { + "epoch": 0.2325150764457123, + "grad_norm": 1.050825834274292, + "learning_rate": 8.962536297030229e-06, + "loss": 0.3139, + "step": 11615 + }, + { + "epoch": 0.23253509496284064, + "grad_norm": 1.2602263689041138, + "learning_rate": 8.96233858165104e-06, + "loss": 0.3491, + "step": 11616 + }, + { + "epoch": 0.23255511347996896, + "grad_norm": 1.11530339717865, + "learning_rate": 8.962140849615034e-06, + "loss": 0.3504, + "step": 11617 + }, + { + "epoch": 0.2325751319970973, + "grad_norm": 1.041263222694397, + "learning_rate": 8.961943100923038e-06, + "loss": 0.3298, + "step": 11618 + }, + { + "epoch": 0.23259515051422566, + "grad_norm": 1.186808466911316, + "learning_rate": 8.961745335575886e-06, + "loss": 0.3129, + "step": 11619 + }, + { + "epoch": 0.232615169031354, + "grad_norm": 1.3217296600341797, + "learning_rate": 8.961547553574413e-06, + "loss": 0.3467, + "step": 11620 + }, + { + "epoch": 0.23263518754848234, + "grad_norm": 1.1004325151443481, + "learning_rate": 8.961349754919443e-06, + "loss": 0.3437, + "step": 11621 + }, + { + "epoch": 0.2326552060656107, + "grad_norm": 1.1744130849838257, + "learning_rate": 8.961151939611811e-06, + "loss": 0.3181, + "step": 11622 + }, + { + "epoch": 0.23267522458273904, + "grad_norm": 1.1392351388931274, + "learning_rate": 8.960954107652352e-06, + "loss": 0.3505, + "step": 11623 + }, + { + "epoch": 0.2326952430998674, + "grad_norm": 1.1146137714385986, + "learning_rate": 8.960756259041891e-06, + "loss": 0.3205, + "step": 11624 + }, + { + "epoch": 0.2327152616169957, + "grad_norm": 1.0888468027114868, + "learning_rate": 8.960558393781264e-06, + "loss": 0.3242, + "step": 11625 + }, + { + "epoch": 0.23273528013412406, + "grad_norm": 1.0197070837020874, + "learning_rate": 8.960360511871302e-06, + "loss": 0.3038, + "step": 11626 + }, + { + "epoch": 0.2327552986512524, + "grad_norm": 1.16287362575531, + "learning_rate": 8.960162613312837e-06, + "loss": 0.331, + "step": 11627 + }, + { + "epoch": 0.23277531716838076, + "grad_norm": 1.1700330972671509, + "learning_rate": 8.9599646981067e-06, + "loss": 0.3269, + "step": 11628 + }, + { + "epoch": 0.23279533568550909, + "grad_norm": 1.1345221996307373, + "learning_rate": 8.959766766253723e-06, + "loss": 0.3285, + "step": 11629 + }, + { + "epoch": 0.23281535420263744, + "grad_norm": 2.0244054794311523, + "learning_rate": 8.95956881775474e-06, + "loss": 0.8786, + "step": 11630 + }, + { + "epoch": 0.2328353727197658, + "grad_norm": 1.2071152925491333, + "learning_rate": 8.959370852610582e-06, + "loss": 0.3303, + "step": 11631 + }, + { + "epoch": 0.23285539123689414, + "grad_norm": 1.991201639175415, + "learning_rate": 8.95917287082208e-06, + "loss": 0.8135, + "step": 11632 + }, + { + "epoch": 0.23287540975402246, + "grad_norm": 1.1127138137817383, + "learning_rate": 8.958974872390067e-06, + "loss": 0.327, + "step": 11633 + }, + { + "epoch": 0.2328954282711508, + "grad_norm": 0.9886870384216309, + "learning_rate": 8.958776857315377e-06, + "loss": 0.3012, + "step": 11634 + }, + { + "epoch": 0.23291544678827916, + "grad_norm": 1.0533000230789185, + "learning_rate": 8.95857882559884e-06, + "loss": 0.2822, + "step": 11635 + }, + { + "epoch": 0.2329354653054075, + "grad_norm": 1.1405609846115112, + "learning_rate": 8.95838077724129e-06, + "loss": 0.2847, + "step": 11636 + }, + { + "epoch": 0.23295548382253584, + "grad_norm": 1.7669615745544434, + "learning_rate": 8.95818271224356e-06, + "loss": 0.8677, + "step": 11637 + }, + { + "epoch": 0.2329755023396642, + "grad_norm": 1.102799654006958, + "learning_rate": 8.95798463060648e-06, + "loss": 0.3153, + "step": 11638 + }, + { + "epoch": 0.23299552085679254, + "grad_norm": 1.8544378280639648, + "learning_rate": 8.957786532330884e-06, + "loss": 0.3144, + "step": 11639 + }, + { + "epoch": 0.2330155393739209, + "grad_norm": 1.1528449058532715, + "learning_rate": 8.957588417417607e-06, + "loss": 0.333, + "step": 11640 + }, + { + "epoch": 0.2330355578910492, + "grad_norm": 0.9939308762550354, + "learning_rate": 8.957390285867478e-06, + "loss": 0.2977, + "step": 11641 + }, + { + "epoch": 0.23305557640817756, + "grad_norm": 1.066721796989441, + "learning_rate": 8.957192137681334e-06, + "loss": 0.2821, + "step": 11642 + }, + { + "epoch": 0.2330755949253059, + "grad_norm": 1.0799623727798462, + "learning_rate": 8.956993972860005e-06, + "loss": 0.344, + "step": 11643 + }, + { + "epoch": 0.23309561344243426, + "grad_norm": 1.0923664569854736, + "learning_rate": 8.956795791404324e-06, + "loss": 0.3429, + "step": 11644 + }, + { + "epoch": 0.23311563195956259, + "grad_norm": 1.2024762630462646, + "learning_rate": 8.956597593315126e-06, + "loss": 0.3799, + "step": 11645 + }, + { + "epoch": 0.23313565047669094, + "grad_norm": 1.003176212310791, + "learning_rate": 8.956399378593243e-06, + "loss": 0.3121, + "step": 11646 + }, + { + "epoch": 0.2331556689938193, + "grad_norm": 1.0986299514770508, + "learning_rate": 8.956201147239509e-06, + "loss": 0.3247, + "step": 11647 + }, + { + "epoch": 0.23317568751094764, + "grad_norm": 1.1299653053283691, + "learning_rate": 8.956002899254754e-06, + "loss": 0.3441, + "step": 11648 + }, + { + "epoch": 0.23319570602807596, + "grad_norm": 1.3360787630081177, + "learning_rate": 8.955804634639818e-06, + "loss": 0.2962, + "step": 11649 + }, + { + "epoch": 0.2332157245452043, + "grad_norm": 1.0336107015609741, + "learning_rate": 8.955606353395528e-06, + "loss": 0.3357, + "step": 11650 + }, + { + "epoch": 0.23323574306233266, + "grad_norm": 1.0974678993225098, + "learning_rate": 8.955408055522721e-06, + "loss": 0.3367, + "step": 11651 + }, + { + "epoch": 0.233255761579461, + "grad_norm": 1.105245590209961, + "learning_rate": 8.95520974102223e-06, + "loss": 0.3465, + "step": 11652 + }, + { + "epoch": 0.23327578009658934, + "grad_norm": 1.1610753536224365, + "learning_rate": 8.955011409894888e-06, + "loss": 0.3384, + "step": 11653 + }, + { + "epoch": 0.2332957986137177, + "grad_norm": 1.0206538438796997, + "learning_rate": 8.95481306214153e-06, + "loss": 0.3259, + "step": 11654 + }, + { + "epoch": 0.23331581713084604, + "grad_norm": 1.0297168493270874, + "learning_rate": 8.954614697762988e-06, + "loss": 0.3179, + "step": 11655 + }, + { + "epoch": 0.2333358356479744, + "grad_norm": 1.110559344291687, + "learning_rate": 8.954416316760097e-06, + "loss": 0.3421, + "step": 11656 + }, + { + "epoch": 0.2333558541651027, + "grad_norm": 1.1900253295898438, + "learning_rate": 8.95421791913369e-06, + "loss": 0.3233, + "step": 11657 + }, + { + "epoch": 0.23337587268223106, + "grad_norm": 1.0859469175338745, + "learning_rate": 8.954019504884603e-06, + "loss": 0.3298, + "step": 11658 + }, + { + "epoch": 0.2333958911993594, + "grad_norm": 1.0887947082519531, + "learning_rate": 8.953821074013669e-06, + "loss": 0.3651, + "step": 11659 + }, + { + "epoch": 0.23341590971648776, + "grad_norm": 1.0788053274154663, + "learning_rate": 8.953622626521721e-06, + "loss": 0.3477, + "step": 11660 + }, + { + "epoch": 0.23343592823361609, + "grad_norm": 1.0551866292953491, + "learning_rate": 8.953424162409596e-06, + "loss": 0.2978, + "step": 11661 + }, + { + "epoch": 0.23345594675074444, + "grad_norm": 1.032851219177246, + "learning_rate": 8.953225681678126e-06, + "loss": 0.3232, + "step": 11662 + }, + { + "epoch": 0.2334759652678728, + "grad_norm": 1.1537199020385742, + "learning_rate": 8.953027184328145e-06, + "loss": 0.3762, + "step": 11663 + }, + { + "epoch": 0.23349598378500114, + "grad_norm": 0.9580202102661133, + "learning_rate": 8.952828670360489e-06, + "loss": 0.2667, + "step": 11664 + }, + { + "epoch": 0.23351600230212946, + "grad_norm": 1.0728133916854858, + "learning_rate": 8.952630139775993e-06, + "loss": 0.3002, + "step": 11665 + }, + { + "epoch": 0.2335360208192578, + "grad_norm": 1.148095965385437, + "learning_rate": 8.952431592575489e-06, + "loss": 0.3326, + "step": 11666 + }, + { + "epoch": 0.23355603933638616, + "grad_norm": 1.9050889015197754, + "learning_rate": 8.952233028759814e-06, + "loss": 0.9111, + "step": 11667 + }, + { + "epoch": 0.2335760578535145, + "grad_norm": 1.105225682258606, + "learning_rate": 8.9520344483298e-06, + "loss": 0.3461, + "step": 11668 + }, + { + "epoch": 0.23359607637064284, + "grad_norm": 1.063377022743225, + "learning_rate": 8.951835851286286e-06, + "loss": 0.325, + "step": 11669 + }, + { + "epoch": 0.2336160948877712, + "grad_norm": 2.0921733379364014, + "learning_rate": 8.951637237630105e-06, + "loss": 0.7842, + "step": 11670 + }, + { + "epoch": 0.23363611340489954, + "grad_norm": 0.9789608716964722, + "learning_rate": 8.95143860736209e-06, + "loss": 0.3461, + "step": 11671 + }, + { + "epoch": 0.2336561319220279, + "grad_norm": 1.0446542501449585, + "learning_rate": 8.951239960483077e-06, + "loss": 0.3096, + "step": 11672 + }, + { + "epoch": 0.2336761504391562, + "grad_norm": 1.948056697845459, + "learning_rate": 8.951041296993902e-06, + "loss": 0.8085, + "step": 11673 + }, + { + "epoch": 0.23369616895628456, + "grad_norm": 1.122861623764038, + "learning_rate": 8.9508426168954e-06, + "loss": 0.3077, + "step": 11674 + }, + { + "epoch": 0.2337161874734129, + "grad_norm": 1.1750775575637817, + "learning_rate": 8.950643920188406e-06, + "loss": 0.3615, + "step": 11675 + }, + { + "epoch": 0.23373620599054126, + "grad_norm": 1.2455804347991943, + "learning_rate": 8.950445206873755e-06, + "loss": 0.3161, + "step": 11676 + }, + { + "epoch": 0.23375622450766959, + "grad_norm": 1.2336653470993042, + "learning_rate": 8.950246476952283e-06, + "loss": 0.3308, + "step": 11677 + }, + { + "epoch": 0.23377624302479794, + "grad_norm": 1.2255020141601562, + "learning_rate": 8.950047730424823e-06, + "loss": 0.3356, + "step": 11678 + }, + { + "epoch": 0.2337962615419263, + "grad_norm": 1.2216520309448242, + "learning_rate": 8.949848967292213e-06, + "loss": 0.3105, + "step": 11679 + }, + { + "epoch": 0.23381628005905464, + "grad_norm": 1.2936322689056396, + "learning_rate": 8.949650187555288e-06, + "loss": 0.3175, + "step": 11680 + }, + { + "epoch": 0.23383629857618296, + "grad_norm": 1.2044484615325928, + "learning_rate": 8.949451391214885e-06, + "loss": 0.3127, + "step": 11681 + }, + { + "epoch": 0.2338563170933113, + "grad_norm": 2.018134832382202, + "learning_rate": 8.949252578271837e-06, + "loss": 0.8985, + "step": 11682 + }, + { + "epoch": 0.23387633561043966, + "grad_norm": 1.1464112997055054, + "learning_rate": 8.949053748726983e-06, + "loss": 0.2893, + "step": 11683 + }, + { + "epoch": 0.233896354127568, + "grad_norm": 1.067225456237793, + "learning_rate": 8.948854902581154e-06, + "loss": 0.3396, + "step": 11684 + }, + { + "epoch": 0.23391637264469634, + "grad_norm": 1.1245737075805664, + "learning_rate": 8.948656039835191e-06, + "loss": 0.2821, + "step": 11685 + }, + { + "epoch": 0.23393639116182469, + "grad_norm": 1.2514209747314453, + "learning_rate": 8.948457160489928e-06, + "loss": 0.3483, + "step": 11686 + }, + { + "epoch": 0.23395640967895304, + "grad_norm": 1.0586559772491455, + "learning_rate": 8.9482582645462e-06, + "loss": 0.3527, + "step": 11687 + }, + { + "epoch": 0.2339764281960814, + "grad_norm": 1.089403510093689, + "learning_rate": 8.948059352004842e-06, + "loss": 0.3295, + "step": 11688 + }, + { + "epoch": 0.2339964467132097, + "grad_norm": 1.1771562099456787, + "learning_rate": 8.947860422866694e-06, + "loss": 0.3417, + "step": 11689 + }, + { + "epoch": 0.23401646523033806, + "grad_norm": 1.8608752489089966, + "learning_rate": 8.94766147713259e-06, + "loss": 0.8621, + "step": 11690 + }, + { + "epoch": 0.2340364837474664, + "grad_norm": 1.161569595336914, + "learning_rate": 8.947462514803367e-06, + "loss": 0.3397, + "step": 11691 + }, + { + "epoch": 0.23405650226459476, + "grad_norm": 1.0677558183670044, + "learning_rate": 8.947263535879862e-06, + "loss": 0.3411, + "step": 11692 + }, + { + "epoch": 0.23407652078172309, + "grad_norm": 1.1146271228790283, + "learning_rate": 8.947064540362908e-06, + "loss": 0.3223, + "step": 11693 + }, + { + "epoch": 0.23409653929885144, + "grad_norm": 1.0771387815475464, + "learning_rate": 8.946865528253346e-06, + "loss": 0.3114, + "step": 11694 + }, + { + "epoch": 0.2341165578159798, + "grad_norm": 1.1472176313400269, + "learning_rate": 8.94666649955201e-06, + "loss": 0.3866, + "step": 11695 + }, + { + "epoch": 0.23413657633310814, + "grad_norm": 1.0011813640594482, + "learning_rate": 8.946467454259739e-06, + "loss": 0.3201, + "step": 11696 + }, + { + "epoch": 0.23415659485023646, + "grad_norm": 1.0398286581039429, + "learning_rate": 8.946268392377366e-06, + "loss": 0.3218, + "step": 11697 + }, + { + "epoch": 0.2341766133673648, + "grad_norm": 1.1858572959899902, + "learning_rate": 8.946069313905732e-06, + "loss": 0.3096, + "step": 11698 + }, + { + "epoch": 0.23419663188449316, + "grad_norm": 1.1848894357681274, + "learning_rate": 8.94587021884567e-06, + "loss": 0.3524, + "step": 11699 + }, + { + "epoch": 0.2342166504016215, + "grad_norm": 1.0290287733078003, + "learning_rate": 8.94567110719802e-06, + "loss": 0.3621, + "step": 11700 + }, + { + "epoch": 0.23423666891874984, + "grad_norm": 1.0371230840682983, + "learning_rate": 8.945471978963616e-06, + "loss": 0.3305, + "step": 11701 + }, + { + "epoch": 0.23425668743587819, + "grad_norm": 1.0732815265655518, + "learning_rate": 8.945272834143299e-06, + "loss": 0.314, + "step": 11702 + }, + { + "epoch": 0.23427670595300654, + "grad_norm": 1.0190829038619995, + "learning_rate": 8.945073672737902e-06, + "loss": 0.3297, + "step": 11703 + }, + { + "epoch": 0.2342967244701349, + "grad_norm": 1.0446348190307617, + "learning_rate": 8.944874494748265e-06, + "loss": 0.2976, + "step": 11704 + }, + { + "epoch": 0.2343167429872632, + "grad_norm": 1.784479022026062, + "learning_rate": 8.944675300175225e-06, + "loss": 0.851, + "step": 11705 + }, + { + "epoch": 0.23433676150439156, + "grad_norm": 1.1028403043746948, + "learning_rate": 8.944476089019619e-06, + "loss": 0.3629, + "step": 11706 + }, + { + "epoch": 0.2343567800215199, + "grad_norm": 0.9767746329307556, + "learning_rate": 8.944276861282284e-06, + "loss": 0.3166, + "step": 11707 + }, + { + "epoch": 0.23437679853864826, + "grad_norm": 1.1907250881195068, + "learning_rate": 8.944077616964057e-06, + "loss": 0.3612, + "step": 11708 + }, + { + "epoch": 0.23439681705577659, + "grad_norm": 1.0458984375, + "learning_rate": 8.943878356065779e-06, + "loss": 0.3578, + "step": 11709 + }, + { + "epoch": 0.23441683557290494, + "grad_norm": 1.1066762208938599, + "learning_rate": 8.943679078588284e-06, + "loss": 0.3238, + "step": 11710 + }, + { + "epoch": 0.2344368540900333, + "grad_norm": 1.8653497695922852, + "learning_rate": 8.94347978453241e-06, + "loss": 0.7585, + "step": 11711 + }, + { + "epoch": 0.23445687260716164, + "grad_norm": 1.0231345891952515, + "learning_rate": 8.943280473898996e-06, + "loss": 0.3026, + "step": 11712 + }, + { + "epoch": 0.23447689112428996, + "grad_norm": 1.1520283222198486, + "learning_rate": 8.943081146688878e-06, + "loss": 0.3495, + "step": 11713 + }, + { + "epoch": 0.2344969096414183, + "grad_norm": 1.1408292055130005, + "learning_rate": 8.942881802902898e-06, + "loss": 0.3378, + "step": 11714 + }, + { + "epoch": 0.23451692815854666, + "grad_norm": 1.9410152435302734, + "learning_rate": 8.942682442541891e-06, + "loss": 0.7922, + "step": 11715 + }, + { + "epoch": 0.234536946675675, + "grad_norm": 1.1045130491256714, + "learning_rate": 8.942483065606696e-06, + "loss": 0.3095, + "step": 11716 + }, + { + "epoch": 0.23455696519280334, + "grad_norm": 1.0979180335998535, + "learning_rate": 8.942283672098149e-06, + "loss": 0.3699, + "step": 11717 + }, + { + "epoch": 0.23457698370993169, + "grad_norm": 1.1514315605163574, + "learning_rate": 8.94208426201709e-06, + "loss": 0.3074, + "step": 11718 + }, + { + "epoch": 0.23459700222706004, + "grad_norm": 1.0841532945632935, + "learning_rate": 8.941884835364358e-06, + "loss": 0.3111, + "step": 11719 + }, + { + "epoch": 0.2346170207441884, + "grad_norm": 1.080785870552063, + "learning_rate": 8.94168539214079e-06, + "loss": 0.2996, + "step": 11720 + }, + { + "epoch": 0.2346370392613167, + "grad_norm": 1.2794755697250366, + "learning_rate": 8.941485932347225e-06, + "loss": 0.3112, + "step": 11721 + }, + { + "epoch": 0.23465705777844506, + "grad_norm": 1.083659052848816, + "learning_rate": 8.9412864559845e-06, + "loss": 0.3616, + "step": 11722 + }, + { + "epoch": 0.2346770762955734, + "grad_norm": 2.070772171020508, + "learning_rate": 8.941086963053455e-06, + "loss": 0.8082, + "step": 11723 + }, + { + "epoch": 0.23469709481270176, + "grad_norm": 1.7909489870071411, + "learning_rate": 8.940887453554932e-06, + "loss": 0.8163, + "step": 11724 + }, + { + "epoch": 0.23471711332983008, + "grad_norm": 1.0749258995056152, + "learning_rate": 8.940687927489763e-06, + "loss": 0.3518, + "step": 11725 + }, + { + "epoch": 0.23473713184695844, + "grad_norm": 1.0453907251358032, + "learning_rate": 8.94048838485879e-06, + "loss": 0.304, + "step": 11726 + }, + { + "epoch": 0.2347571503640868, + "grad_norm": 1.1162590980529785, + "learning_rate": 8.940288825662853e-06, + "loss": 0.3179, + "step": 11727 + }, + { + "epoch": 0.23477716888121514, + "grad_norm": 1.305151343345642, + "learning_rate": 8.940089249902791e-06, + "loss": 0.3632, + "step": 11728 + }, + { + "epoch": 0.23479718739834346, + "grad_norm": 1.0941097736358643, + "learning_rate": 8.93988965757944e-06, + "loss": 0.3025, + "step": 11729 + }, + { + "epoch": 0.2348172059154718, + "grad_norm": 1.8038444519042969, + "learning_rate": 8.93969004869364e-06, + "loss": 0.8249, + "step": 11730 + }, + { + "epoch": 0.23483722443260016, + "grad_norm": 1.0716339349746704, + "learning_rate": 8.939490423246232e-06, + "loss": 0.3554, + "step": 11731 + }, + { + "epoch": 0.2348572429497285, + "grad_norm": 1.1946489810943604, + "learning_rate": 8.939290781238054e-06, + "loss": 0.3082, + "step": 11732 + }, + { + "epoch": 0.23487726146685683, + "grad_norm": 1.0961054563522339, + "learning_rate": 8.939091122669945e-06, + "loss": 0.3432, + "step": 11733 + }, + { + "epoch": 0.23489727998398519, + "grad_norm": 1.0390746593475342, + "learning_rate": 8.938891447542745e-06, + "loss": 0.3138, + "step": 11734 + }, + { + "epoch": 0.23491729850111354, + "grad_norm": 1.2293426990509033, + "learning_rate": 8.938691755857291e-06, + "loss": 0.3209, + "step": 11735 + }, + { + "epoch": 0.2349373170182419, + "grad_norm": 1.082489013671875, + "learning_rate": 8.938492047614428e-06, + "loss": 0.3695, + "step": 11736 + }, + { + "epoch": 0.2349573355353702, + "grad_norm": 1.067879557609558, + "learning_rate": 8.93829232281499e-06, + "loss": 0.3076, + "step": 11737 + }, + { + "epoch": 0.23497735405249856, + "grad_norm": 1.0770344734191895, + "learning_rate": 8.938092581459819e-06, + "loss": 0.2925, + "step": 11738 + }, + { + "epoch": 0.2349973725696269, + "grad_norm": 0.9545282125473022, + "learning_rate": 8.937892823549754e-06, + "loss": 0.2906, + "step": 11739 + }, + { + "epoch": 0.23501739108675526, + "grad_norm": 1.09294855594635, + "learning_rate": 8.937693049085636e-06, + "loss": 0.2877, + "step": 11740 + }, + { + "epoch": 0.23503740960388358, + "grad_norm": 1.086772084236145, + "learning_rate": 8.9374932580683e-06, + "loss": 0.3654, + "step": 11741 + }, + { + "epoch": 0.23505742812101194, + "grad_norm": 1.1528656482696533, + "learning_rate": 8.937293450498594e-06, + "loss": 0.3467, + "step": 11742 + }, + { + "epoch": 0.23507744663814029, + "grad_norm": 1.993855357170105, + "learning_rate": 8.937093626377353e-06, + "loss": 0.8145, + "step": 11743 + }, + { + "epoch": 0.23509746515526864, + "grad_norm": 1.8110814094543457, + "learning_rate": 8.936893785705416e-06, + "loss": 0.845, + "step": 11744 + }, + { + "epoch": 0.23511748367239696, + "grad_norm": 1.1588151454925537, + "learning_rate": 8.936693928483627e-06, + "loss": 0.3224, + "step": 11745 + }, + { + "epoch": 0.2351375021895253, + "grad_norm": 1.1231520175933838, + "learning_rate": 8.936494054712823e-06, + "loss": 0.3664, + "step": 11746 + }, + { + "epoch": 0.23515752070665366, + "grad_norm": 1.057801365852356, + "learning_rate": 8.936294164393843e-06, + "loss": 0.3334, + "step": 11747 + }, + { + "epoch": 0.235177539223782, + "grad_norm": 1.1571705341339111, + "learning_rate": 8.936094257527533e-06, + "loss": 0.3298, + "step": 11748 + }, + { + "epoch": 0.23519755774091033, + "grad_norm": 1.9786126613616943, + "learning_rate": 8.935894334114728e-06, + "loss": 0.8316, + "step": 11749 + }, + { + "epoch": 0.23521757625803869, + "grad_norm": 1.1593838930130005, + "learning_rate": 8.935694394156269e-06, + "loss": 0.3374, + "step": 11750 + }, + { + "epoch": 0.23523759477516704, + "grad_norm": 1.1572284698486328, + "learning_rate": 8.935494437653e-06, + "loss": 0.2959, + "step": 11751 + }, + { + "epoch": 0.2352576132922954, + "grad_norm": 1.1130822896957397, + "learning_rate": 8.935294464605757e-06, + "loss": 0.3035, + "step": 11752 + }, + { + "epoch": 0.2352776318094237, + "grad_norm": 1.09548819065094, + "learning_rate": 8.935094475015384e-06, + "loss": 0.3288, + "step": 11753 + }, + { + "epoch": 0.23529765032655206, + "grad_norm": 1.247061014175415, + "learning_rate": 8.934894468882721e-06, + "loss": 0.3629, + "step": 11754 + }, + { + "epoch": 0.2353176688436804, + "grad_norm": 1.996963381767273, + "learning_rate": 8.93469444620861e-06, + "loss": 0.8327, + "step": 11755 + }, + { + "epoch": 0.23533768736080876, + "grad_norm": 1.1803034543991089, + "learning_rate": 8.934494406993887e-06, + "loss": 0.3257, + "step": 11756 + }, + { + "epoch": 0.23535770587793708, + "grad_norm": 1.2175770998001099, + "learning_rate": 8.934294351239398e-06, + "loss": 0.3123, + "step": 11757 + }, + { + "epoch": 0.23537772439506544, + "grad_norm": 1.9012969732284546, + "learning_rate": 8.934094278945983e-06, + "loss": 0.8273, + "step": 11758 + }, + { + "epoch": 0.23539774291219379, + "grad_norm": 1.251008152961731, + "learning_rate": 8.933894190114481e-06, + "loss": 0.3387, + "step": 11759 + }, + { + "epoch": 0.23541776142932214, + "grad_norm": 1.104595422744751, + "learning_rate": 8.933694084745734e-06, + "loss": 0.3319, + "step": 11760 + }, + { + "epoch": 0.23543777994645046, + "grad_norm": 1.175912618637085, + "learning_rate": 8.933493962840585e-06, + "loss": 0.3091, + "step": 11761 + }, + { + "epoch": 0.2354577984635788, + "grad_norm": 1.078609585762024, + "learning_rate": 8.933293824399873e-06, + "loss": 0.3311, + "step": 11762 + }, + { + "epoch": 0.23547781698070716, + "grad_norm": 1.2044907808303833, + "learning_rate": 8.933093669424441e-06, + "loss": 0.3745, + "step": 11763 + }, + { + "epoch": 0.2354978354978355, + "grad_norm": 1.1416409015655518, + "learning_rate": 8.93289349791513e-06, + "loss": 0.3449, + "step": 11764 + }, + { + "epoch": 0.23551785401496383, + "grad_norm": 0.999531626701355, + "learning_rate": 8.93269330987278e-06, + "loss": 0.2836, + "step": 11765 + }, + { + "epoch": 0.23553787253209219, + "grad_norm": 1.0916664600372314, + "learning_rate": 8.932493105298234e-06, + "loss": 0.2867, + "step": 11766 + }, + { + "epoch": 0.23555789104922054, + "grad_norm": 1.1143255233764648, + "learning_rate": 8.932292884192332e-06, + "loss": 0.3586, + "step": 11767 + }, + { + "epoch": 0.2355779095663489, + "grad_norm": 1.2533656358718872, + "learning_rate": 8.932092646555919e-06, + "loss": 0.3349, + "step": 11768 + }, + { + "epoch": 0.2355979280834772, + "grad_norm": 1.1557797193527222, + "learning_rate": 8.931892392389834e-06, + "loss": 0.3353, + "step": 11769 + }, + { + "epoch": 0.23561794660060556, + "grad_norm": 1.1336684226989746, + "learning_rate": 8.931692121694919e-06, + "loss": 0.3466, + "step": 11770 + }, + { + "epoch": 0.2356379651177339, + "grad_norm": 1.093472957611084, + "learning_rate": 8.931491834472017e-06, + "loss": 0.322, + "step": 11771 + }, + { + "epoch": 0.23565798363486226, + "grad_norm": 1.0962278842926025, + "learning_rate": 8.931291530721968e-06, + "loss": 0.3, + "step": 11772 + }, + { + "epoch": 0.23567800215199058, + "grad_norm": 1.0312974452972412, + "learning_rate": 8.931091210445617e-06, + "loss": 0.3512, + "step": 11773 + }, + { + "epoch": 0.23569802066911894, + "grad_norm": 1.2035775184631348, + "learning_rate": 8.930890873643806e-06, + "loss": 0.3273, + "step": 11774 + }, + { + "epoch": 0.23571803918624729, + "grad_norm": 1.1392487287521362, + "learning_rate": 8.930690520317372e-06, + "loss": 0.3229, + "step": 11775 + }, + { + "epoch": 0.2357380577033756, + "grad_norm": 1.040474534034729, + "learning_rate": 8.930490150467164e-06, + "loss": 0.3125, + "step": 11776 + }, + { + "epoch": 0.23575807622050396, + "grad_norm": 1.7693300247192383, + "learning_rate": 8.930289764094019e-06, + "loss": 0.8819, + "step": 11777 + }, + { + "epoch": 0.2357780947376323, + "grad_norm": 1.28923499584198, + "learning_rate": 8.930089361198783e-06, + "loss": 0.3771, + "step": 11778 + }, + { + "epoch": 0.23579811325476066, + "grad_norm": 1.1820710897445679, + "learning_rate": 8.929888941782296e-06, + "loss": 0.3908, + "step": 11779 + }, + { + "epoch": 0.23581813177188898, + "grad_norm": 1.0905324220657349, + "learning_rate": 8.929688505845404e-06, + "loss": 0.3059, + "step": 11780 + }, + { + "epoch": 0.23583815028901733, + "grad_norm": 1.2481787204742432, + "learning_rate": 8.929488053388944e-06, + "loss": 0.308, + "step": 11781 + }, + { + "epoch": 0.23585816880614568, + "grad_norm": 1.0998278856277466, + "learning_rate": 8.929287584413764e-06, + "loss": 0.3907, + "step": 11782 + }, + { + "epoch": 0.23587818732327404, + "grad_norm": 1.1173491477966309, + "learning_rate": 8.929087098920704e-06, + "loss": 0.3093, + "step": 11783 + }, + { + "epoch": 0.23589820584040236, + "grad_norm": 1.3767613172531128, + "learning_rate": 8.928886596910606e-06, + "loss": 0.3665, + "step": 11784 + }, + { + "epoch": 0.2359182243575307, + "grad_norm": 1.00790536403656, + "learning_rate": 8.928686078384317e-06, + "loss": 0.369, + "step": 11785 + }, + { + "epoch": 0.23593824287465906, + "grad_norm": 1.1887986660003662, + "learning_rate": 8.928485543342676e-06, + "loss": 0.2934, + "step": 11786 + }, + { + "epoch": 0.2359582613917874, + "grad_norm": 1.2621864080429077, + "learning_rate": 8.928284991786526e-06, + "loss": 0.3386, + "step": 11787 + }, + { + "epoch": 0.23597827990891573, + "grad_norm": 1.234105110168457, + "learning_rate": 8.928084423716714e-06, + "loss": 0.328, + "step": 11788 + }, + { + "epoch": 0.23599829842604408, + "grad_norm": 1.076379418373108, + "learning_rate": 8.927883839134077e-06, + "loss": 0.341, + "step": 11789 + }, + { + "epoch": 0.23601831694317243, + "grad_norm": 1.1891028881072998, + "learning_rate": 8.927683238039465e-06, + "loss": 0.3692, + "step": 11790 + }, + { + "epoch": 0.23603833546030079, + "grad_norm": 1.8512327671051025, + "learning_rate": 8.927482620433717e-06, + "loss": 0.8687, + "step": 11791 + }, + { + "epoch": 0.2360583539774291, + "grad_norm": 1.1051926612854004, + "learning_rate": 8.927281986317674e-06, + "loss": 0.3134, + "step": 11792 + }, + { + "epoch": 0.23607837249455746, + "grad_norm": 1.1257991790771484, + "learning_rate": 8.927081335692185e-06, + "loss": 0.3115, + "step": 11793 + }, + { + "epoch": 0.2360983910116858, + "grad_norm": 0.9880839586257935, + "learning_rate": 8.926880668558094e-06, + "loss": 0.3498, + "step": 11794 + }, + { + "epoch": 0.23611840952881416, + "grad_norm": 1.0150190591812134, + "learning_rate": 8.926679984916238e-06, + "loss": 0.2978, + "step": 11795 + }, + { + "epoch": 0.23613842804594248, + "grad_norm": 1.0748423337936401, + "learning_rate": 8.926479284767465e-06, + "loss": 0.3388, + "step": 11796 + }, + { + "epoch": 0.23615844656307083, + "grad_norm": 1.114076018333435, + "learning_rate": 8.926278568112619e-06, + "loss": 0.2836, + "step": 11797 + }, + { + "epoch": 0.23617846508019918, + "grad_norm": 1.0543029308319092, + "learning_rate": 8.926077834952543e-06, + "loss": 0.313, + "step": 11798 + }, + { + "epoch": 0.23619848359732754, + "grad_norm": 1.0978283882141113, + "learning_rate": 8.925877085288079e-06, + "loss": 0.3177, + "step": 11799 + }, + { + "epoch": 0.23621850211445586, + "grad_norm": 1.0369036197662354, + "learning_rate": 8.925676319120074e-06, + "loss": 0.3193, + "step": 11800 + }, + { + "epoch": 0.2362385206315842, + "grad_norm": 1.1010699272155762, + "learning_rate": 8.92547553644937e-06, + "loss": 0.3608, + "step": 11801 + }, + { + "epoch": 0.23625853914871256, + "grad_norm": 1.120125651359558, + "learning_rate": 8.925274737276813e-06, + "loss": 0.3274, + "step": 11802 + }, + { + "epoch": 0.2362785576658409, + "grad_norm": 1.096861720085144, + "learning_rate": 8.925073921603244e-06, + "loss": 0.3036, + "step": 11803 + }, + { + "epoch": 0.23629857618296923, + "grad_norm": 1.9306288957595825, + "learning_rate": 8.92487308942951e-06, + "loss": 0.7882, + "step": 11804 + }, + { + "epoch": 0.23631859470009758, + "grad_norm": 1.0574545860290527, + "learning_rate": 8.924672240756453e-06, + "loss": 0.3862, + "step": 11805 + }, + { + "epoch": 0.23633861321722593, + "grad_norm": 1.1379494667053223, + "learning_rate": 8.92447137558492e-06, + "loss": 0.3697, + "step": 11806 + }, + { + "epoch": 0.23635863173435429, + "grad_norm": 1.231246829032898, + "learning_rate": 8.924270493915752e-06, + "loss": 0.3113, + "step": 11807 + }, + { + "epoch": 0.2363786502514826, + "grad_norm": 1.2246079444885254, + "learning_rate": 8.924069595749797e-06, + "loss": 0.3183, + "step": 11808 + }, + { + "epoch": 0.23639866876861096, + "grad_norm": 1.0621706247329712, + "learning_rate": 8.923868681087897e-06, + "loss": 0.3241, + "step": 11809 + }, + { + "epoch": 0.2364186872857393, + "grad_norm": 1.1140340566635132, + "learning_rate": 8.923667749930898e-06, + "loss": 0.2967, + "step": 11810 + }, + { + "epoch": 0.23643870580286766, + "grad_norm": 1.1379016637802124, + "learning_rate": 8.923466802279645e-06, + "loss": 0.3218, + "step": 11811 + }, + { + "epoch": 0.23645872431999598, + "grad_norm": 1.0780153274536133, + "learning_rate": 8.92326583813498e-06, + "loss": 0.3353, + "step": 11812 + }, + { + "epoch": 0.23647874283712433, + "grad_norm": 1.2518739700317383, + "learning_rate": 8.92306485749775e-06, + "loss": 0.3862, + "step": 11813 + }, + { + "epoch": 0.23649876135425268, + "grad_norm": 1.0802104473114014, + "learning_rate": 8.922863860368801e-06, + "loss": 0.3471, + "step": 11814 + }, + { + "epoch": 0.23651877987138104, + "grad_norm": 1.1599420309066772, + "learning_rate": 8.922662846748976e-06, + "loss": 0.3506, + "step": 11815 + }, + { + "epoch": 0.23653879838850936, + "grad_norm": 1.098418951034546, + "learning_rate": 8.922461816639121e-06, + "loss": 0.3262, + "step": 11816 + }, + { + "epoch": 0.2365588169056377, + "grad_norm": 1.068804383277893, + "learning_rate": 8.922260770040078e-06, + "loss": 0.3415, + "step": 11817 + }, + { + "epoch": 0.23657883542276606, + "grad_norm": 1.121490716934204, + "learning_rate": 8.922059706952697e-06, + "loss": 0.261, + "step": 11818 + }, + { + "epoch": 0.2365988539398944, + "grad_norm": 1.0448391437530518, + "learning_rate": 8.921858627377822e-06, + "loss": 0.3263, + "step": 11819 + }, + { + "epoch": 0.23661887245702273, + "grad_norm": 1.1438305377960205, + "learning_rate": 8.921657531316297e-06, + "loss": 0.3273, + "step": 11820 + }, + { + "epoch": 0.23663889097415108, + "grad_norm": 1.2077592611312866, + "learning_rate": 8.921456418768966e-06, + "loss": 0.3383, + "step": 11821 + }, + { + "epoch": 0.23665890949127943, + "grad_norm": 1.0479258298873901, + "learning_rate": 8.921255289736677e-06, + "loss": 0.349, + "step": 11822 + }, + { + "epoch": 0.23667892800840779, + "grad_norm": 1.0244218111038208, + "learning_rate": 8.921054144220274e-06, + "loss": 0.3335, + "step": 11823 + }, + { + "epoch": 0.2366989465255361, + "grad_norm": 1.3010425567626953, + "learning_rate": 8.920852982220605e-06, + "loss": 0.3573, + "step": 11824 + }, + { + "epoch": 0.23671896504266446, + "grad_norm": 0.9567465782165527, + "learning_rate": 8.920651803738512e-06, + "loss": 0.2648, + "step": 11825 + }, + { + "epoch": 0.2367389835597928, + "grad_norm": 1.1390869617462158, + "learning_rate": 8.920450608774843e-06, + "loss": 0.3713, + "step": 11826 + }, + { + "epoch": 0.23675900207692116, + "grad_norm": 1.1022576093673706, + "learning_rate": 8.920249397330442e-06, + "loss": 0.3859, + "step": 11827 + }, + { + "epoch": 0.23677902059404948, + "grad_norm": 1.1674872636795044, + "learning_rate": 8.92004816940616e-06, + "loss": 0.3351, + "step": 11828 + }, + { + "epoch": 0.23679903911117783, + "grad_norm": 1.108107328414917, + "learning_rate": 8.919846925002835e-06, + "loss": 0.3795, + "step": 11829 + }, + { + "epoch": 0.23681905762830618, + "grad_norm": 1.1882473230361938, + "learning_rate": 8.919645664121319e-06, + "loss": 0.3691, + "step": 11830 + }, + { + "epoch": 0.23683907614543454, + "grad_norm": 1.083210825920105, + "learning_rate": 8.919444386762457e-06, + "loss": 0.2837, + "step": 11831 + }, + { + "epoch": 0.23685909466256286, + "grad_norm": 1.994312047958374, + "learning_rate": 8.919243092927092e-06, + "loss": 0.7715, + "step": 11832 + }, + { + "epoch": 0.2368791131796912, + "grad_norm": 1.9507384300231934, + "learning_rate": 8.919041782616074e-06, + "loss": 0.8165, + "step": 11833 + }, + { + "epoch": 0.23689913169681956, + "grad_norm": 1.0827041864395142, + "learning_rate": 8.918840455830248e-06, + "loss": 0.3447, + "step": 11834 + }, + { + "epoch": 0.2369191502139479, + "grad_norm": 1.1524747610092163, + "learning_rate": 8.91863911257046e-06, + "loss": 0.3491, + "step": 11835 + }, + { + "epoch": 0.23693916873107623, + "grad_norm": 1.1871901750564575, + "learning_rate": 8.918437752837556e-06, + "loss": 0.3107, + "step": 11836 + }, + { + "epoch": 0.23695918724820458, + "grad_norm": 1.098077416419983, + "learning_rate": 8.918236376632382e-06, + "loss": 0.3405, + "step": 11837 + }, + { + "epoch": 0.23697920576533293, + "grad_norm": 1.8033771514892578, + "learning_rate": 8.918034983955787e-06, + "loss": 0.8858, + "step": 11838 + }, + { + "epoch": 0.23699922428246128, + "grad_norm": 1.109659194946289, + "learning_rate": 8.917833574808615e-06, + "loss": 0.3091, + "step": 11839 + }, + { + "epoch": 0.2370192427995896, + "grad_norm": 1.2611831426620483, + "learning_rate": 8.917632149191714e-06, + "loss": 0.3795, + "step": 11840 + }, + { + "epoch": 0.23703926131671796, + "grad_norm": 1.1189926862716675, + "learning_rate": 8.917430707105933e-06, + "loss": 0.3491, + "step": 11841 + }, + { + "epoch": 0.2370592798338463, + "grad_norm": 1.1395405530929565, + "learning_rate": 8.917229248552112e-06, + "loss": 0.362, + "step": 11842 + }, + { + "epoch": 0.23707929835097466, + "grad_norm": 1.163612961769104, + "learning_rate": 8.917027773531106e-06, + "loss": 0.3154, + "step": 11843 + }, + { + "epoch": 0.23709931686810298, + "grad_norm": 1.0599907636642456, + "learning_rate": 8.916826282043755e-06, + "loss": 0.3426, + "step": 11844 + }, + { + "epoch": 0.23711933538523133, + "grad_norm": 1.8224337100982666, + "learning_rate": 8.916624774090911e-06, + "loss": 0.8612, + "step": 11845 + }, + { + "epoch": 0.23713935390235968, + "grad_norm": 1.1589113473892212, + "learning_rate": 8.91642324967342e-06, + "loss": 0.3791, + "step": 11846 + }, + { + "epoch": 0.23715937241948803, + "grad_norm": 1.684733510017395, + "learning_rate": 8.916221708792127e-06, + "loss": 0.8646, + "step": 11847 + }, + { + "epoch": 0.23717939093661636, + "grad_norm": 1.1292699575424194, + "learning_rate": 8.916020151447882e-06, + "loss": 0.3563, + "step": 11848 + }, + { + "epoch": 0.2371994094537447, + "grad_norm": 1.1460847854614258, + "learning_rate": 8.915818577641531e-06, + "loss": 0.3361, + "step": 11849 + }, + { + "epoch": 0.23721942797087306, + "grad_norm": 1.0993794202804565, + "learning_rate": 8.91561698737392e-06, + "loss": 0.3497, + "step": 11850 + }, + { + "epoch": 0.2372394464880014, + "grad_norm": 1.1020680665969849, + "learning_rate": 8.915415380645899e-06, + "loss": 0.3135, + "step": 11851 + }, + { + "epoch": 0.23725946500512973, + "grad_norm": 1.1583054065704346, + "learning_rate": 8.915213757458315e-06, + "loss": 0.3313, + "step": 11852 + }, + { + "epoch": 0.23727948352225808, + "grad_norm": 1.183249592781067, + "learning_rate": 8.915012117812012e-06, + "loss": 0.3378, + "step": 11853 + }, + { + "epoch": 0.23729950203938643, + "grad_norm": 2.015324592590332, + "learning_rate": 8.914810461707843e-06, + "loss": 0.7302, + "step": 11854 + }, + { + "epoch": 0.23731952055651478, + "grad_norm": 1.005705714225769, + "learning_rate": 8.914608789146652e-06, + "loss": 0.318, + "step": 11855 + }, + { + "epoch": 0.2373395390736431, + "grad_norm": 1.327820062637329, + "learning_rate": 8.914407100129289e-06, + "loss": 0.3849, + "step": 11856 + }, + { + "epoch": 0.23735955759077146, + "grad_norm": 0.9431267976760864, + "learning_rate": 8.914205394656601e-06, + "loss": 0.267, + "step": 11857 + }, + { + "epoch": 0.2373795761078998, + "grad_norm": 1.1596602201461792, + "learning_rate": 8.914003672729436e-06, + "loss": 0.3341, + "step": 11858 + }, + { + "epoch": 0.23739959462502816, + "grad_norm": 1.8778213262557983, + "learning_rate": 8.913801934348641e-06, + "loss": 0.8739, + "step": 11859 + }, + { + "epoch": 0.23741961314215648, + "grad_norm": 1.0336300134658813, + "learning_rate": 8.913600179515066e-06, + "loss": 0.3148, + "step": 11860 + }, + { + "epoch": 0.23743963165928483, + "grad_norm": 1.1602343320846558, + "learning_rate": 8.913398408229558e-06, + "loss": 0.3397, + "step": 11861 + }, + { + "epoch": 0.23745965017641318, + "grad_norm": 1.9055272340774536, + "learning_rate": 8.913196620492964e-06, + "loss": 0.8249, + "step": 11862 + }, + { + "epoch": 0.23747966869354153, + "grad_norm": 1.0740779638290405, + "learning_rate": 8.912994816306134e-06, + "loss": 0.3143, + "step": 11863 + }, + { + "epoch": 0.23749968721066986, + "grad_norm": 0.9188457727432251, + "learning_rate": 8.912792995669916e-06, + "loss": 0.246, + "step": 11864 + }, + { + "epoch": 0.2375197057277982, + "grad_norm": 1.0727025270462036, + "learning_rate": 8.91259115858516e-06, + "loss": 0.3225, + "step": 11865 + }, + { + "epoch": 0.23753972424492656, + "grad_norm": 1.0408214330673218, + "learning_rate": 8.91238930505271e-06, + "loss": 0.3112, + "step": 11866 + }, + { + "epoch": 0.2375597427620549, + "grad_norm": 1.125105381011963, + "learning_rate": 8.912187435073419e-06, + "loss": 0.3583, + "step": 11867 + }, + { + "epoch": 0.23757976127918323, + "grad_norm": 1.043652892112732, + "learning_rate": 8.911985548648134e-06, + "loss": 0.3058, + "step": 11868 + }, + { + "epoch": 0.23759977979631158, + "grad_norm": 1.1755483150482178, + "learning_rate": 8.911783645777702e-06, + "loss": 0.3744, + "step": 11869 + }, + { + "epoch": 0.23761979831343993, + "grad_norm": 1.8406740427017212, + "learning_rate": 8.911581726462975e-06, + "loss": 0.8354, + "step": 11870 + }, + { + "epoch": 0.23763981683056828, + "grad_norm": 1.0196621417999268, + "learning_rate": 8.911379790704799e-06, + "loss": 0.3466, + "step": 11871 + }, + { + "epoch": 0.2376598353476966, + "grad_norm": 1.2963327169418335, + "learning_rate": 8.911177838504026e-06, + "loss": 0.2825, + "step": 11872 + }, + { + "epoch": 0.23767985386482496, + "grad_norm": 1.1119000911712646, + "learning_rate": 8.910975869861501e-06, + "loss": 0.2926, + "step": 11873 + }, + { + "epoch": 0.2376998723819533, + "grad_norm": 1.0405703783035278, + "learning_rate": 8.910773884778076e-06, + "loss": 0.3452, + "step": 11874 + }, + { + "epoch": 0.23771989089908166, + "grad_norm": 1.2172672748565674, + "learning_rate": 8.9105718832546e-06, + "loss": 0.3378, + "step": 11875 + }, + { + "epoch": 0.23773990941620998, + "grad_norm": 0.9961640238761902, + "learning_rate": 8.910369865291919e-06, + "loss": 0.2793, + "step": 11876 + }, + { + "epoch": 0.23775992793333833, + "grad_norm": 1.1987650394439697, + "learning_rate": 8.910167830890887e-06, + "loss": 0.3666, + "step": 11877 + }, + { + "epoch": 0.23777994645046668, + "grad_norm": 1.0778720378875732, + "learning_rate": 8.90996578005235e-06, + "loss": 0.2797, + "step": 11878 + }, + { + "epoch": 0.23779996496759503, + "grad_norm": 1.1269499063491821, + "learning_rate": 8.909763712777157e-06, + "loss": 0.3673, + "step": 11879 + }, + { + "epoch": 0.23781998348472336, + "grad_norm": 1.0754666328430176, + "learning_rate": 8.90956162906616e-06, + "loss": 0.3194, + "step": 11880 + }, + { + "epoch": 0.2378400020018517, + "grad_norm": 1.0261446237564087, + "learning_rate": 8.909359528920208e-06, + "loss": 0.3783, + "step": 11881 + }, + { + "epoch": 0.23786002051898006, + "grad_norm": 1.1102408170700073, + "learning_rate": 8.90915741234015e-06, + "loss": 0.2542, + "step": 11882 + }, + { + "epoch": 0.2378800390361084, + "grad_norm": 1.1202603578567505, + "learning_rate": 8.908955279326833e-06, + "loss": 0.3497, + "step": 11883 + }, + { + "epoch": 0.23790005755323673, + "grad_norm": 1.096828818321228, + "learning_rate": 8.908753129881113e-06, + "loss": 0.35, + "step": 11884 + }, + { + "epoch": 0.23792007607036508, + "grad_norm": 1.1218723058700562, + "learning_rate": 8.908550964003833e-06, + "loss": 0.3379, + "step": 11885 + }, + { + "epoch": 0.23794009458749343, + "grad_norm": 1.0128053426742554, + "learning_rate": 8.908348781695847e-06, + "loss": 0.3551, + "step": 11886 + }, + { + "epoch": 0.23796011310462178, + "grad_norm": 1.1354930400848389, + "learning_rate": 8.908146582958005e-06, + "loss": 0.3483, + "step": 11887 + }, + { + "epoch": 0.2379801316217501, + "grad_norm": 1.2295856475830078, + "learning_rate": 8.907944367791152e-06, + "loss": 0.3226, + "step": 11888 + }, + { + "epoch": 0.23800015013887846, + "grad_norm": 1.1495866775512695, + "learning_rate": 8.907742136196145e-06, + "loss": 0.3312, + "step": 11889 + }, + { + "epoch": 0.2380201686560068, + "grad_norm": 1.0311073064804077, + "learning_rate": 8.907539888173831e-06, + "loss": 0.365, + "step": 11890 + }, + { + "epoch": 0.23804018717313516, + "grad_norm": 1.0775750875473022, + "learning_rate": 8.907337623725059e-06, + "loss": 0.3315, + "step": 11891 + }, + { + "epoch": 0.23806020569026348, + "grad_norm": 1.1994154453277588, + "learning_rate": 8.90713534285068e-06, + "loss": 0.3157, + "step": 11892 + }, + { + "epoch": 0.23808022420739183, + "grad_norm": 1.1147997379302979, + "learning_rate": 8.906933045551546e-06, + "loss": 0.3229, + "step": 11893 + }, + { + "epoch": 0.23810024272452018, + "grad_norm": 1.1660150289535522, + "learning_rate": 8.906730731828507e-06, + "loss": 0.3598, + "step": 11894 + }, + { + "epoch": 0.23812026124164853, + "grad_norm": 1.8454127311706543, + "learning_rate": 8.906528401682412e-06, + "loss": 0.7779, + "step": 11895 + }, + { + "epoch": 0.23814027975877686, + "grad_norm": 1.1083155870437622, + "learning_rate": 8.906326055114112e-06, + "loss": 0.3648, + "step": 11896 + }, + { + "epoch": 0.2381602982759052, + "grad_norm": 1.1637765169143677, + "learning_rate": 8.906123692124457e-06, + "loss": 0.3694, + "step": 11897 + }, + { + "epoch": 0.23818031679303356, + "grad_norm": 1.2992876768112183, + "learning_rate": 8.905921312714301e-06, + "loss": 0.2928, + "step": 11898 + }, + { + "epoch": 0.2382003353101619, + "grad_norm": 1.2769745588302612, + "learning_rate": 8.905718916884491e-06, + "loss": 0.3367, + "step": 11899 + }, + { + "epoch": 0.23822035382729023, + "grad_norm": 1.0945887565612793, + "learning_rate": 8.905516504635879e-06, + "loss": 0.3495, + "step": 11900 + }, + { + "epoch": 0.23824037234441858, + "grad_norm": 1.8481597900390625, + "learning_rate": 8.905314075969315e-06, + "loss": 0.8211, + "step": 11901 + }, + { + "epoch": 0.23826039086154693, + "grad_norm": 1.189721941947937, + "learning_rate": 8.905111630885654e-06, + "loss": 0.3711, + "step": 11902 + }, + { + "epoch": 0.23828040937867528, + "grad_norm": 1.0292465686798096, + "learning_rate": 8.904909169385742e-06, + "loss": 0.2994, + "step": 11903 + }, + { + "epoch": 0.2383004278958036, + "grad_norm": 1.70267653465271, + "learning_rate": 8.904706691470433e-06, + "loss": 0.2989, + "step": 11904 + }, + { + "epoch": 0.23832044641293196, + "grad_norm": 1.1844565868377686, + "learning_rate": 8.904504197140578e-06, + "loss": 0.361, + "step": 11905 + }, + { + "epoch": 0.2383404649300603, + "grad_norm": 1.1991639137268066, + "learning_rate": 8.904301686397026e-06, + "loss": 0.328, + "step": 11906 + }, + { + "epoch": 0.23836048344718866, + "grad_norm": 1.0486637353897095, + "learning_rate": 8.90409915924063e-06, + "loss": 0.3239, + "step": 11907 + }, + { + "epoch": 0.23838050196431698, + "grad_norm": 1.1467044353485107, + "learning_rate": 8.903896615672242e-06, + "loss": 0.3043, + "step": 11908 + }, + { + "epoch": 0.23840052048144533, + "grad_norm": 1.7421468496322632, + "learning_rate": 8.903694055692714e-06, + "loss": 0.823, + "step": 11909 + }, + { + "epoch": 0.23842053899857368, + "grad_norm": 1.1819533109664917, + "learning_rate": 8.903491479302894e-06, + "loss": 0.2996, + "step": 11910 + }, + { + "epoch": 0.23844055751570203, + "grad_norm": 1.9436298608779907, + "learning_rate": 8.903288886503636e-06, + "loss": 0.9193, + "step": 11911 + }, + { + "epoch": 0.23846057603283036, + "grad_norm": 1.2487547397613525, + "learning_rate": 8.903086277295793e-06, + "loss": 0.3198, + "step": 11912 + }, + { + "epoch": 0.2384805945499587, + "grad_norm": 1.1072313785552979, + "learning_rate": 8.902883651680215e-06, + "loss": 0.327, + "step": 11913 + }, + { + "epoch": 0.23850061306708706, + "grad_norm": 1.0468069314956665, + "learning_rate": 8.902681009657753e-06, + "loss": 0.3539, + "step": 11914 + }, + { + "epoch": 0.2385206315842154, + "grad_norm": 1.1914820671081543, + "learning_rate": 8.90247835122926e-06, + "loss": 0.3642, + "step": 11915 + }, + { + "epoch": 0.23854065010134373, + "grad_norm": 0.9950615763664246, + "learning_rate": 8.902275676395588e-06, + "loss": 0.3248, + "step": 11916 + }, + { + "epoch": 0.23856066861847208, + "grad_norm": 1.2263774871826172, + "learning_rate": 8.902072985157591e-06, + "loss": 0.3532, + "step": 11917 + }, + { + "epoch": 0.23858068713560043, + "grad_norm": 1.065355658531189, + "learning_rate": 8.901870277516117e-06, + "loss": 0.3505, + "step": 11918 + }, + { + "epoch": 0.23860070565272878, + "grad_norm": 1.0528472661972046, + "learning_rate": 8.901667553472019e-06, + "loss": 0.2973, + "step": 11919 + }, + { + "epoch": 0.2386207241698571, + "grad_norm": 1.1460754871368408, + "learning_rate": 8.90146481302615e-06, + "loss": 0.3145, + "step": 11920 + }, + { + "epoch": 0.23864074268698546, + "grad_norm": 1.058590292930603, + "learning_rate": 8.901262056179365e-06, + "loss": 0.3413, + "step": 11921 + }, + { + "epoch": 0.2386607612041138, + "grad_norm": 1.1805087327957153, + "learning_rate": 8.901059282932513e-06, + "loss": 0.3679, + "step": 11922 + }, + { + "epoch": 0.23868077972124216, + "grad_norm": 1.0749706029891968, + "learning_rate": 8.900856493286446e-06, + "loss": 0.35, + "step": 11923 + }, + { + "epoch": 0.23870079823837048, + "grad_norm": 1.8550373315811157, + "learning_rate": 8.90065368724202e-06, + "loss": 0.8289, + "step": 11924 + }, + { + "epoch": 0.23872081675549883, + "grad_norm": 1.1017522811889648, + "learning_rate": 8.900450864800084e-06, + "loss": 0.3052, + "step": 11925 + }, + { + "epoch": 0.23874083527262718, + "grad_norm": 1.1533429622650146, + "learning_rate": 8.900248025961492e-06, + "loss": 0.3242, + "step": 11926 + }, + { + "epoch": 0.23876085378975553, + "grad_norm": 1.0372902154922485, + "learning_rate": 8.900045170727096e-06, + "loss": 0.3257, + "step": 11927 + }, + { + "epoch": 0.23878087230688386, + "grad_norm": 1.1423149108886719, + "learning_rate": 8.899842299097751e-06, + "loss": 0.3666, + "step": 11928 + }, + { + "epoch": 0.2388008908240122, + "grad_norm": 1.0998048782348633, + "learning_rate": 8.899639411074307e-06, + "loss": 0.2704, + "step": 11929 + }, + { + "epoch": 0.23882090934114056, + "grad_norm": 1.0190056562423706, + "learning_rate": 8.89943650665762e-06, + "loss": 0.3598, + "step": 11930 + }, + { + "epoch": 0.2388409278582689, + "grad_norm": 1.048572063446045, + "learning_rate": 8.899233585848539e-06, + "loss": 0.3626, + "step": 11931 + }, + { + "epoch": 0.23886094637539723, + "grad_norm": 1.1113265752792358, + "learning_rate": 8.89903064864792e-06, + "loss": 0.343, + "step": 11932 + }, + { + "epoch": 0.23888096489252558, + "grad_norm": 1.0852946043014526, + "learning_rate": 8.898827695056613e-06, + "loss": 0.3031, + "step": 11933 + }, + { + "epoch": 0.23890098340965393, + "grad_norm": 1.0969798564910889, + "learning_rate": 8.898624725075477e-06, + "loss": 0.3405, + "step": 11934 + }, + { + "epoch": 0.23892100192678228, + "grad_norm": 1.1543670892715454, + "learning_rate": 8.89842173870536e-06, + "loss": 0.3522, + "step": 11935 + }, + { + "epoch": 0.2389410204439106, + "grad_norm": 1.451582908630371, + "learning_rate": 8.898218735947115e-06, + "loss": 0.3794, + "step": 11936 + }, + { + "epoch": 0.23896103896103896, + "grad_norm": 1.0953737497329712, + "learning_rate": 8.8980157168016e-06, + "loss": 0.3063, + "step": 11937 + }, + { + "epoch": 0.2389810574781673, + "grad_norm": 1.1634067296981812, + "learning_rate": 8.897812681269666e-06, + "loss": 0.3429, + "step": 11938 + }, + { + "epoch": 0.23900107599529566, + "grad_norm": 1.0517140626907349, + "learning_rate": 8.897609629352165e-06, + "loss": 0.3054, + "step": 11939 + }, + { + "epoch": 0.23902109451242398, + "grad_norm": 1.8599985837936401, + "learning_rate": 8.897406561049952e-06, + "loss": 0.8683, + "step": 11940 + }, + { + "epoch": 0.23904111302955233, + "grad_norm": 1.172069787979126, + "learning_rate": 8.89720347636388e-06, + "loss": 0.3371, + "step": 11941 + }, + { + "epoch": 0.23906113154668068, + "grad_norm": 1.102986216545105, + "learning_rate": 8.897000375294805e-06, + "loss": 0.3503, + "step": 11942 + }, + { + "epoch": 0.23908115006380903, + "grad_norm": 0.9992896318435669, + "learning_rate": 8.89679725784358e-06, + "loss": 0.3126, + "step": 11943 + }, + { + "epoch": 0.23910116858093736, + "grad_norm": 1.1313711404800415, + "learning_rate": 8.896594124011055e-06, + "loss": 0.3208, + "step": 11944 + }, + { + "epoch": 0.2391211870980657, + "grad_norm": 1.171513557434082, + "learning_rate": 8.896390973798088e-06, + "loss": 0.3246, + "step": 11945 + }, + { + "epoch": 0.23914120561519406, + "grad_norm": 0.9804176092147827, + "learning_rate": 8.896187807205532e-06, + "loss": 0.305, + "step": 11946 + }, + { + "epoch": 0.2391612241323224, + "grad_norm": 1.193313479423523, + "learning_rate": 8.89598462423424e-06, + "loss": 0.3416, + "step": 11947 + }, + { + "epoch": 0.23918124264945073, + "grad_norm": 1.165018916130066, + "learning_rate": 8.895781424885069e-06, + "loss": 0.3781, + "step": 11948 + }, + { + "epoch": 0.23920126116657908, + "grad_norm": 2.0234272480010986, + "learning_rate": 8.895578209158869e-06, + "loss": 0.8563, + "step": 11949 + }, + { + "epoch": 0.23922127968370743, + "grad_norm": 1.102445125579834, + "learning_rate": 8.895374977056498e-06, + "loss": 0.3234, + "step": 11950 + }, + { + "epoch": 0.23924129820083578, + "grad_norm": 1.1346713304519653, + "learning_rate": 8.895171728578808e-06, + "loss": 0.311, + "step": 11951 + }, + { + "epoch": 0.2392613167179641, + "grad_norm": 1.0625272989273071, + "learning_rate": 8.894968463726655e-06, + "loss": 0.3425, + "step": 11952 + }, + { + "epoch": 0.23928133523509246, + "grad_norm": 1.3009121417999268, + "learning_rate": 8.894765182500893e-06, + "loss": 0.3079, + "step": 11953 + }, + { + "epoch": 0.2393013537522208, + "grad_norm": 1.0146560668945312, + "learning_rate": 8.894561884902375e-06, + "loss": 0.2887, + "step": 11954 + }, + { + "epoch": 0.23932137226934916, + "grad_norm": 1.7088384628295898, + "learning_rate": 8.89435857093196e-06, + "loss": 0.7597, + "step": 11955 + }, + { + "epoch": 0.23934139078647748, + "grad_norm": 1.348168969154358, + "learning_rate": 8.894155240590497e-06, + "loss": 0.3109, + "step": 11956 + }, + { + "epoch": 0.23936140930360583, + "grad_norm": 1.0306925773620605, + "learning_rate": 8.893951893878842e-06, + "loss": 0.2945, + "step": 11957 + }, + { + "epoch": 0.23938142782073418, + "grad_norm": 1.109023928642273, + "learning_rate": 8.893748530797854e-06, + "loss": 0.2685, + "step": 11958 + }, + { + "epoch": 0.23940144633786253, + "grad_norm": 1.094745397567749, + "learning_rate": 8.893545151348384e-06, + "loss": 0.3223, + "step": 11959 + }, + { + "epoch": 0.23942146485499086, + "grad_norm": 1.919101357460022, + "learning_rate": 8.893341755531289e-06, + "loss": 0.8457, + "step": 11960 + }, + { + "epoch": 0.2394414833721192, + "grad_norm": 1.007434368133545, + "learning_rate": 8.893138343347423e-06, + "loss": 0.3111, + "step": 11961 + }, + { + "epoch": 0.23946150188924756, + "grad_norm": 1.162366509437561, + "learning_rate": 8.89293491479764e-06, + "loss": 0.3522, + "step": 11962 + }, + { + "epoch": 0.2394815204063759, + "grad_norm": 1.2319492101669312, + "learning_rate": 8.892731469882796e-06, + "loss": 0.3582, + "step": 11963 + }, + { + "epoch": 0.23950153892350423, + "grad_norm": 1.1010133028030396, + "learning_rate": 8.892528008603749e-06, + "loss": 0.2985, + "step": 11964 + }, + { + "epoch": 0.23952155744063258, + "grad_norm": 1.88377845287323, + "learning_rate": 8.892324530961351e-06, + "loss": 0.837, + "step": 11965 + }, + { + "epoch": 0.23954157595776093, + "grad_norm": 1.2440509796142578, + "learning_rate": 8.892121036956457e-06, + "loss": 0.3445, + "step": 11966 + }, + { + "epoch": 0.23956159447488928, + "grad_norm": 1.1179654598236084, + "learning_rate": 8.891917526589926e-06, + "loss": 0.3495, + "step": 11967 + }, + { + "epoch": 0.2395816129920176, + "grad_norm": 1.029915452003479, + "learning_rate": 8.89171399986261e-06, + "loss": 0.3043, + "step": 11968 + }, + { + "epoch": 0.23960163150914596, + "grad_norm": 1.067525029182434, + "learning_rate": 8.891510456775365e-06, + "loss": 0.3654, + "step": 11969 + }, + { + "epoch": 0.2396216500262743, + "grad_norm": 1.9325933456420898, + "learning_rate": 8.891306897329048e-06, + "loss": 0.8474, + "step": 11970 + }, + { + "epoch": 0.23964166854340266, + "grad_norm": 1.883150577545166, + "learning_rate": 8.891103321524515e-06, + "loss": 0.7533, + "step": 11971 + }, + { + "epoch": 0.23966168706053098, + "grad_norm": 1.0633337497711182, + "learning_rate": 8.89089972936262e-06, + "loss": 0.299, + "step": 11972 + }, + { + "epoch": 0.23968170557765933, + "grad_norm": 1.1361339092254639, + "learning_rate": 8.89069612084422e-06, + "loss": 0.368, + "step": 11973 + }, + { + "epoch": 0.23970172409478768, + "grad_norm": 1.0334173440933228, + "learning_rate": 8.890492495970171e-06, + "loss": 0.3519, + "step": 11974 + }, + { + "epoch": 0.23972174261191603, + "grad_norm": 1.9607605934143066, + "learning_rate": 8.89028885474133e-06, + "loss": 0.7891, + "step": 11975 + }, + { + "epoch": 0.23974176112904436, + "grad_norm": 1.8271899223327637, + "learning_rate": 8.89008519715855e-06, + "loss": 0.8314, + "step": 11976 + }, + { + "epoch": 0.2397617796461727, + "grad_norm": 1.1075855493545532, + "learning_rate": 8.889881523222693e-06, + "loss": 0.3555, + "step": 11977 + }, + { + "epoch": 0.23978179816330106, + "grad_norm": 1.137406826019287, + "learning_rate": 8.889677832934607e-06, + "loss": 0.3592, + "step": 11978 + }, + { + "epoch": 0.2398018166804294, + "grad_norm": 0.9367457032203674, + "learning_rate": 8.889474126295153e-06, + "loss": 0.2654, + "step": 11979 + }, + { + "epoch": 0.23982183519755773, + "grad_norm": 1.1723428964614868, + "learning_rate": 8.889270403305188e-06, + "loss": 0.3334, + "step": 11980 + }, + { + "epoch": 0.23984185371468608, + "grad_norm": 1.2047324180603027, + "learning_rate": 8.889066663965566e-06, + "loss": 0.3379, + "step": 11981 + }, + { + "epoch": 0.23986187223181443, + "grad_norm": 1.050333023071289, + "learning_rate": 8.888862908277144e-06, + "loss": 0.3003, + "step": 11982 + }, + { + "epoch": 0.23988189074894278, + "grad_norm": 1.1268911361694336, + "learning_rate": 8.888659136240782e-06, + "loss": 0.3694, + "step": 11983 + }, + { + "epoch": 0.2399019092660711, + "grad_norm": 1.0276243686676025, + "learning_rate": 8.888455347857332e-06, + "loss": 0.3422, + "step": 11984 + }, + { + "epoch": 0.23992192778319946, + "grad_norm": 1.0393187999725342, + "learning_rate": 8.888251543127653e-06, + "loss": 0.2836, + "step": 11985 + }, + { + "epoch": 0.2399419463003278, + "grad_norm": 1.1001044511795044, + "learning_rate": 8.888047722052601e-06, + "loss": 0.313, + "step": 11986 + }, + { + "epoch": 0.23996196481745616, + "grad_norm": 0.9994146823883057, + "learning_rate": 8.887843884633032e-06, + "loss": 0.2897, + "step": 11987 + }, + { + "epoch": 0.23998198333458448, + "grad_norm": 1.093280553817749, + "learning_rate": 8.887640030869806e-06, + "loss": 0.2816, + "step": 11988 + }, + { + "epoch": 0.24000200185171283, + "grad_norm": 1.0877354145050049, + "learning_rate": 8.887436160763778e-06, + "loss": 0.3525, + "step": 11989 + }, + { + "epoch": 0.24002202036884118, + "grad_norm": 1.2772902250289917, + "learning_rate": 8.887232274315802e-06, + "loss": 0.3197, + "step": 11990 + }, + { + "epoch": 0.24004203888596953, + "grad_norm": 1.1940240859985352, + "learning_rate": 8.88702837152674e-06, + "loss": 0.3155, + "step": 11991 + }, + { + "epoch": 0.24006205740309786, + "grad_norm": 1.0975196361541748, + "learning_rate": 8.88682445239745e-06, + "loss": 0.3457, + "step": 11992 + }, + { + "epoch": 0.2400820759202262, + "grad_norm": 1.1130471229553223, + "learning_rate": 8.886620516928782e-06, + "loss": 0.3251, + "step": 11993 + }, + { + "epoch": 0.24010209443735456, + "grad_norm": 0.9987073540687561, + "learning_rate": 8.886416565121601e-06, + "loss": 0.3114, + "step": 11994 + }, + { + "epoch": 0.2401221129544829, + "grad_norm": 1.1607977151870728, + "learning_rate": 8.886212596976757e-06, + "loss": 0.3504, + "step": 11995 + }, + { + "epoch": 0.24014213147161123, + "grad_norm": 1.041229009628296, + "learning_rate": 8.886008612495116e-06, + "loss": 0.3093, + "step": 11996 + }, + { + "epoch": 0.24016214998873958, + "grad_norm": 1.2234859466552734, + "learning_rate": 8.88580461167753e-06, + "loss": 0.3332, + "step": 11997 + }, + { + "epoch": 0.24018216850586793, + "grad_norm": 1.0499461889266968, + "learning_rate": 8.885600594524855e-06, + "loss": 0.2909, + "step": 11998 + }, + { + "epoch": 0.24020218702299628, + "grad_norm": 1.0607846975326538, + "learning_rate": 8.885396561037955e-06, + "loss": 0.3315, + "step": 11999 + }, + { + "epoch": 0.2402222055401246, + "grad_norm": 1.1911245584487915, + "learning_rate": 8.885192511217681e-06, + "loss": 0.324, + "step": 12000 + }, + { + "epoch": 0.24024222405725296, + "grad_norm": 1.222516417503357, + "learning_rate": 8.884988445064895e-06, + "loss": 0.2988, + "step": 12001 + }, + { + "epoch": 0.2402622425743813, + "grad_norm": 1.0894986391067505, + "learning_rate": 8.884784362580454e-06, + "loss": 0.3343, + "step": 12002 + }, + { + "epoch": 0.24028226109150966, + "grad_norm": 1.227034330368042, + "learning_rate": 8.884580263765216e-06, + "loss": 0.3491, + "step": 12003 + }, + { + "epoch": 0.24030227960863798, + "grad_norm": 1.1193033456802368, + "learning_rate": 8.884376148620037e-06, + "loss": 0.3054, + "step": 12004 + }, + { + "epoch": 0.24032229812576633, + "grad_norm": 1.0050495862960815, + "learning_rate": 8.884172017145779e-06, + "loss": 0.3074, + "step": 12005 + }, + { + "epoch": 0.24034231664289468, + "grad_norm": 2.0262317657470703, + "learning_rate": 8.883967869343295e-06, + "loss": 0.8601, + "step": 12006 + }, + { + "epoch": 0.24036233516002303, + "grad_norm": 1.1297132968902588, + "learning_rate": 8.883763705213447e-06, + "loss": 0.3111, + "step": 12007 + }, + { + "epoch": 0.24038235367715136, + "grad_norm": 1.1131443977355957, + "learning_rate": 8.883559524757093e-06, + "loss": 0.3163, + "step": 12008 + }, + { + "epoch": 0.2404023721942797, + "grad_norm": 1.0956858396530151, + "learning_rate": 8.88335532797509e-06, + "loss": 0.3174, + "step": 12009 + }, + { + "epoch": 0.24042239071140806, + "grad_norm": 1.3044061660766602, + "learning_rate": 8.883151114868295e-06, + "loss": 0.3372, + "step": 12010 + }, + { + "epoch": 0.2404424092285364, + "grad_norm": 1.0990198850631714, + "learning_rate": 8.88294688543757e-06, + "loss": 0.3295, + "step": 12011 + }, + { + "epoch": 0.24046242774566473, + "grad_norm": 1.14749276638031, + "learning_rate": 8.882742639683773e-06, + "loss": 0.3774, + "step": 12012 + }, + { + "epoch": 0.24048244626279308, + "grad_norm": 1.1196305751800537, + "learning_rate": 8.88253837760776e-06, + "loss": 0.3438, + "step": 12013 + }, + { + "epoch": 0.24050246477992143, + "grad_norm": 1.053963303565979, + "learning_rate": 8.88233409921039e-06, + "loss": 0.3177, + "step": 12014 + }, + { + "epoch": 0.24052248329704978, + "grad_norm": 1.9226605892181396, + "learning_rate": 8.882129804492525e-06, + "loss": 0.8107, + "step": 12015 + }, + { + "epoch": 0.2405425018141781, + "grad_norm": 1.196110725402832, + "learning_rate": 8.88192549345502e-06, + "loss": 0.3235, + "step": 12016 + }, + { + "epoch": 0.24056252033130646, + "grad_norm": 1.1359248161315918, + "learning_rate": 8.881721166098739e-06, + "loss": 0.3348, + "step": 12017 + }, + { + "epoch": 0.2405825388484348, + "grad_norm": 1.1561682224273682, + "learning_rate": 8.881516822424534e-06, + "loss": 0.3212, + "step": 12018 + }, + { + "epoch": 0.24060255736556316, + "grad_norm": 1.0327779054641724, + "learning_rate": 8.88131246243327e-06, + "loss": 0.3547, + "step": 12019 + }, + { + "epoch": 0.24062257588269148, + "grad_norm": 1.1221740245819092, + "learning_rate": 8.881108086125802e-06, + "loss": 0.3413, + "step": 12020 + }, + { + "epoch": 0.24064259439981983, + "grad_norm": 1.1716352701187134, + "learning_rate": 8.880903693502992e-06, + "loss": 0.3295, + "step": 12021 + }, + { + "epoch": 0.24066261291694818, + "grad_norm": 1.0807337760925293, + "learning_rate": 8.880699284565698e-06, + "loss": 0.3368, + "step": 12022 + }, + { + "epoch": 0.24068263143407653, + "grad_norm": 1.1158610582351685, + "learning_rate": 8.880494859314779e-06, + "loss": 0.3095, + "step": 12023 + }, + { + "epoch": 0.24070264995120486, + "grad_norm": 1.1011008024215698, + "learning_rate": 8.880290417751094e-06, + "loss": 0.4138, + "step": 12024 + }, + { + "epoch": 0.2407226684683332, + "grad_norm": 1.1701602935791016, + "learning_rate": 8.880085959875504e-06, + "loss": 0.3289, + "step": 12025 + }, + { + "epoch": 0.24074268698546156, + "grad_norm": 1.0817879438400269, + "learning_rate": 8.879881485688869e-06, + "loss": 0.2946, + "step": 12026 + }, + { + "epoch": 0.2407627055025899, + "grad_norm": 1.0208466053009033, + "learning_rate": 8.879676995192044e-06, + "loss": 0.313, + "step": 12027 + }, + { + "epoch": 0.24078272401971823, + "grad_norm": 1.1039867401123047, + "learning_rate": 8.879472488385894e-06, + "loss": 0.2701, + "step": 12028 + }, + { + "epoch": 0.24080274253684658, + "grad_norm": 1.0293858051300049, + "learning_rate": 8.879267965271277e-06, + "loss": 0.3167, + "step": 12029 + }, + { + "epoch": 0.24082276105397493, + "grad_norm": 1.1355756521224976, + "learning_rate": 8.87906342584905e-06, + "loss": 0.3391, + "step": 12030 + }, + { + "epoch": 0.24084277957110328, + "grad_norm": 1.2591419219970703, + "learning_rate": 8.878858870120078e-06, + "loss": 0.3345, + "step": 12031 + }, + { + "epoch": 0.2408627980882316, + "grad_norm": 1.127134919166565, + "learning_rate": 8.878654298085217e-06, + "loss": 0.2946, + "step": 12032 + }, + { + "epoch": 0.24088281660535996, + "grad_norm": 1.062420129776001, + "learning_rate": 8.878449709745326e-06, + "loss": 0.3166, + "step": 12033 + }, + { + "epoch": 0.2409028351224883, + "grad_norm": 1.0927470922470093, + "learning_rate": 8.878245105101268e-06, + "loss": 0.3129, + "step": 12034 + }, + { + "epoch": 0.24092285363961666, + "grad_norm": 1.0601089000701904, + "learning_rate": 8.878040484153902e-06, + "loss": 0.2967, + "step": 12035 + }, + { + "epoch": 0.24094287215674498, + "grad_norm": 1.133577823638916, + "learning_rate": 8.87783584690409e-06, + "loss": 0.3131, + "step": 12036 + }, + { + "epoch": 0.24096289067387333, + "grad_norm": 1.184983491897583, + "learning_rate": 8.87763119335269e-06, + "loss": 0.3236, + "step": 12037 + }, + { + "epoch": 0.24098290919100168, + "grad_norm": 1.1740481853485107, + "learning_rate": 8.877426523500563e-06, + "loss": 0.3582, + "step": 12038 + }, + { + "epoch": 0.24100292770813003, + "grad_norm": 1.1694592237472534, + "learning_rate": 8.877221837348568e-06, + "loss": 0.3607, + "step": 12039 + }, + { + "epoch": 0.24102294622525836, + "grad_norm": 1.125679612159729, + "learning_rate": 8.877017134897569e-06, + "loss": 0.3365, + "step": 12040 + }, + { + "epoch": 0.2410429647423867, + "grad_norm": 1.247483491897583, + "learning_rate": 8.876812416148424e-06, + "loss": 0.3263, + "step": 12041 + }, + { + "epoch": 0.24106298325951506, + "grad_norm": 1.1142672300338745, + "learning_rate": 8.876607681101991e-06, + "loss": 0.2421, + "step": 12042 + }, + { + "epoch": 0.2410830017766434, + "grad_norm": 1.159313440322876, + "learning_rate": 8.876402929759136e-06, + "loss": 0.3395, + "step": 12043 + }, + { + "epoch": 0.24110302029377173, + "grad_norm": 1.4119333028793335, + "learning_rate": 8.876198162120718e-06, + "loss": 0.3696, + "step": 12044 + }, + { + "epoch": 0.24112303881090008, + "grad_norm": 1.0644197463989258, + "learning_rate": 8.875993378187595e-06, + "loss": 0.2907, + "step": 12045 + }, + { + "epoch": 0.24114305732802843, + "grad_norm": 0.9664831757545471, + "learning_rate": 8.87578857796063e-06, + "loss": 0.3182, + "step": 12046 + }, + { + "epoch": 0.24116307584515678, + "grad_norm": 1.1037538051605225, + "learning_rate": 8.875583761440687e-06, + "loss": 0.272, + "step": 12047 + }, + { + "epoch": 0.2411830943622851, + "grad_norm": 1.1572965383529663, + "learning_rate": 8.87537892862862e-06, + "loss": 0.3949, + "step": 12048 + }, + { + "epoch": 0.24120311287941346, + "grad_norm": 1.3628493547439575, + "learning_rate": 8.875174079525297e-06, + "loss": 0.3465, + "step": 12049 + }, + { + "epoch": 0.2412231313965418, + "grad_norm": 1.0224993228912354, + "learning_rate": 8.874969214131575e-06, + "loss": 0.3261, + "step": 12050 + }, + { + "epoch": 0.24124314991367016, + "grad_norm": 1.0998568534851074, + "learning_rate": 8.874764332448316e-06, + "loss": 0.3007, + "step": 12051 + }, + { + "epoch": 0.24126316843079848, + "grad_norm": 1.035690426826477, + "learning_rate": 8.874559434476381e-06, + "loss": 0.3467, + "step": 12052 + }, + { + "epoch": 0.24128318694792683, + "grad_norm": 1.283219337463379, + "learning_rate": 8.874354520216633e-06, + "loss": 0.3338, + "step": 12053 + }, + { + "epoch": 0.24130320546505518, + "grad_norm": 1.2743338346481323, + "learning_rate": 8.87414958966993e-06, + "loss": 0.3263, + "step": 12054 + }, + { + "epoch": 0.24132322398218353, + "grad_norm": 1.1074639558792114, + "learning_rate": 8.87394464283714e-06, + "loss": 0.3302, + "step": 12055 + }, + { + "epoch": 0.24134324249931186, + "grad_norm": 1.1907964944839478, + "learning_rate": 8.873739679719118e-06, + "loss": 0.3756, + "step": 12056 + }, + { + "epoch": 0.2413632610164402, + "grad_norm": 1.1291399002075195, + "learning_rate": 8.873534700316728e-06, + "loss": 0.3202, + "step": 12057 + }, + { + "epoch": 0.24138327953356856, + "grad_norm": 1.2367607355117798, + "learning_rate": 8.873329704630833e-06, + "loss": 0.2758, + "step": 12058 + }, + { + "epoch": 0.2414032980506969, + "grad_norm": 1.96138334274292, + "learning_rate": 8.87312469266229e-06, + "loss": 0.8851, + "step": 12059 + }, + { + "epoch": 0.24142331656782523, + "grad_norm": 0.9922702312469482, + "learning_rate": 8.87291966441197e-06, + "loss": 0.2914, + "step": 12060 + }, + { + "epoch": 0.24144333508495358, + "grad_norm": 1.0801632404327393, + "learning_rate": 8.872714619880724e-06, + "loss": 0.2875, + "step": 12061 + }, + { + "epoch": 0.24146335360208193, + "grad_norm": 1.0296356678009033, + "learning_rate": 8.872509559069423e-06, + "loss": 0.3441, + "step": 12062 + }, + { + "epoch": 0.24148337211921028, + "grad_norm": 1.0817195177078247, + "learning_rate": 8.872304481978922e-06, + "loss": 0.3144, + "step": 12063 + }, + { + "epoch": 0.2415033906363386, + "grad_norm": 1.1115511655807495, + "learning_rate": 8.872099388610088e-06, + "loss": 0.3201, + "step": 12064 + }, + { + "epoch": 0.24152340915346696, + "grad_norm": 1.180415153503418, + "learning_rate": 8.871894278963782e-06, + "loss": 0.3421, + "step": 12065 + }, + { + "epoch": 0.2415434276705953, + "grad_norm": 1.140541434288025, + "learning_rate": 8.871689153040863e-06, + "loss": 0.3342, + "step": 12066 + }, + { + "epoch": 0.24156344618772366, + "grad_norm": 1.8984310626983643, + "learning_rate": 8.871484010842199e-06, + "loss": 0.8116, + "step": 12067 + }, + { + "epoch": 0.24158346470485198, + "grad_norm": 1.0680515766143799, + "learning_rate": 8.871278852368647e-06, + "loss": 0.2989, + "step": 12068 + }, + { + "epoch": 0.24160348322198033, + "grad_norm": 1.0270580053329468, + "learning_rate": 8.871073677621074e-06, + "loss": 0.3231, + "step": 12069 + }, + { + "epoch": 0.24162350173910868, + "grad_norm": 1.339857816696167, + "learning_rate": 8.870868486600339e-06, + "loss": 0.3716, + "step": 12070 + }, + { + "epoch": 0.24164352025623703, + "grad_norm": 1.2346380949020386, + "learning_rate": 8.870663279307307e-06, + "loss": 0.2999, + "step": 12071 + }, + { + "epoch": 0.24166353877336536, + "grad_norm": 1.320114254951477, + "learning_rate": 8.87045805574284e-06, + "loss": 0.3176, + "step": 12072 + }, + { + "epoch": 0.2416835572904937, + "grad_norm": 1.0484448671340942, + "learning_rate": 8.870252815907797e-06, + "loss": 0.3313, + "step": 12073 + }, + { + "epoch": 0.24170357580762206, + "grad_norm": 1.0978227853775024, + "learning_rate": 8.870047559803047e-06, + "loss": 0.2805, + "step": 12074 + }, + { + "epoch": 0.2417235943247504, + "grad_norm": 1.9033340215682983, + "learning_rate": 8.869842287429449e-06, + "loss": 0.7536, + "step": 12075 + }, + { + "epoch": 0.24174361284187873, + "grad_norm": 1.0188127756118774, + "learning_rate": 8.869636998787865e-06, + "loss": 0.3362, + "step": 12076 + }, + { + "epoch": 0.24176363135900708, + "grad_norm": 1.220363974571228, + "learning_rate": 8.869431693879163e-06, + "loss": 0.3296, + "step": 12077 + }, + { + "epoch": 0.24178364987613543, + "grad_norm": 1.1020429134368896, + "learning_rate": 8.869226372704201e-06, + "loss": 0.3321, + "step": 12078 + }, + { + "epoch": 0.24180366839326378, + "grad_norm": 1.2092195749282837, + "learning_rate": 8.869021035263845e-06, + "loss": 0.331, + "step": 12079 + }, + { + "epoch": 0.2418236869103921, + "grad_norm": 1.056834101676941, + "learning_rate": 8.868815681558957e-06, + "loss": 0.3195, + "step": 12080 + }, + { + "epoch": 0.24184370542752046, + "grad_norm": 1.1051137447357178, + "learning_rate": 8.8686103115904e-06, + "loss": 0.3633, + "step": 12081 + }, + { + "epoch": 0.2418637239446488, + "grad_norm": 1.0936921834945679, + "learning_rate": 8.868404925359038e-06, + "loss": 0.3529, + "step": 12082 + }, + { + "epoch": 0.24188374246177716, + "grad_norm": 1.3528053760528564, + "learning_rate": 8.868199522865734e-06, + "loss": 0.3864, + "step": 12083 + }, + { + "epoch": 0.24190376097890548, + "grad_norm": 1.1694157123565674, + "learning_rate": 8.867994104111351e-06, + "loss": 0.3686, + "step": 12084 + }, + { + "epoch": 0.24192377949603383, + "grad_norm": 1.1144499778747559, + "learning_rate": 8.867788669096754e-06, + "loss": 0.3264, + "step": 12085 + }, + { + "epoch": 0.24194379801316218, + "grad_norm": 1.0933114290237427, + "learning_rate": 8.867583217822806e-06, + "loss": 0.3541, + "step": 12086 + }, + { + "epoch": 0.24196381653029053, + "grad_norm": 1.0228468179702759, + "learning_rate": 8.86737775029037e-06, + "loss": 0.3269, + "step": 12087 + }, + { + "epoch": 0.24198383504741885, + "grad_norm": 1.043900489807129, + "learning_rate": 8.86717226650031e-06, + "loss": 0.301, + "step": 12088 + }, + { + "epoch": 0.2420038535645472, + "grad_norm": 1.0718798637390137, + "learning_rate": 8.86696676645349e-06, + "loss": 0.2956, + "step": 12089 + }, + { + "epoch": 0.24202387208167556, + "grad_norm": 1.1192748546600342, + "learning_rate": 8.866761250150775e-06, + "loss": 0.3329, + "step": 12090 + }, + { + "epoch": 0.2420438905988039, + "grad_norm": 1.0954848527908325, + "learning_rate": 8.866555717593027e-06, + "loss": 0.3765, + "step": 12091 + }, + { + "epoch": 0.24206390911593223, + "grad_norm": 1.79780912399292, + "learning_rate": 8.866350168781112e-06, + "loss": 0.8469, + "step": 12092 + }, + { + "epoch": 0.24208392763306058, + "grad_norm": 1.0677273273468018, + "learning_rate": 8.866144603715891e-06, + "loss": 0.3337, + "step": 12093 + }, + { + "epoch": 0.24210394615018893, + "grad_norm": 1.1193668842315674, + "learning_rate": 8.86593902239823e-06, + "loss": 0.3608, + "step": 12094 + }, + { + "epoch": 0.24212396466731728, + "grad_norm": 1.0924491882324219, + "learning_rate": 8.865733424828994e-06, + "loss": 0.3832, + "step": 12095 + }, + { + "epoch": 0.2421439831844456, + "grad_norm": 1.0810540914535522, + "learning_rate": 8.86552781100905e-06, + "loss": 0.2861, + "step": 12096 + }, + { + "epoch": 0.24216400170157396, + "grad_norm": 1.0575040578842163, + "learning_rate": 8.865322180939255e-06, + "loss": 0.2693, + "step": 12097 + }, + { + "epoch": 0.2421840202187023, + "grad_norm": 1.207291841506958, + "learning_rate": 8.865116534620477e-06, + "loss": 0.3223, + "step": 12098 + }, + { + "epoch": 0.24220403873583066, + "grad_norm": 1.0440870523452759, + "learning_rate": 8.864910872053583e-06, + "loss": 0.3386, + "step": 12099 + }, + { + "epoch": 0.24222405725295898, + "grad_norm": 1.0424416065216064, + "learning_rate": 8.864705193239434e-06, + "loss": 0.2993, + "step": 12100 + }, + { + "epoch": 0.24224407577008733, + "grad_norm": 1.0770725011825562, + "learning_rate": 8.864499498178896e-06, + "loss": 0.3356, + "step": 12101 + }, + { + "epoch": 0.24226409428721568, + "grad_norm": 1.1436530351638794, + "learning_rate": 8.864293786872835e-06, + "loss": 0.3306, + "step": 12102 + }, + { + "epoch": 0.24228411280434403, + "grad_norm": 1.0477255582809448, + "learning_rate": 8.864088059322114e-06, + "loss": 0.3205, + "step": 12103 + }, + { + "epoch": 0.24230413132147235, + "grad_norm": 1.0429102182388306, + "learning_rate": 8.863882315527598e-06, + "loss": 0.3176, + "step": 12104 + }, + { + "epoch": 0.2423241498386007, + "grad_norm": 1.2102339267730713, + "learning_rate": 8.863676555490154e-06, + "loss": 0.3425, + "step": 12105 + }, + { + "epoch": 0.24234416835572906, + "grad_norm": 1.0674138069152832, + "learning_rate": 8.863470779210643e-06, + "loss": 0.314, + "step": 12106 + }, + { + "epoch": 0.2423641868728574, + "grad_norm": 1.8192042112350464, + "learning_rate": 8.863264986689932e-06, + "loss": 0.8489, + "step": 12107 + }, + { + "epoch": 0.24238420538998573, + "grad_norm": 1.1949928998947144, + "learning_rate": 8.863059177928887e-06, + "loss": 0.3356, + "step": 12108 + }, + { + "epoch": 0.24240422390711408, + "grad_norm": 1.0348913669586182, + "learning_rate": 8.862853352928374e-06, + "loss": 0.3085, + "step": 12109 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 1.08402419090271, + "learning_rate": 8.862647511689255e-06, + "loss": 0.3103, + "step": 12110 + }, + { + "epoch": 0.24244426094137078, + "grad_norm": 1.9376912117004395, + "learning_rate": 8.8624416542124e-06, + "loss": 0.833, + "step": 12111 + }, + { + "epoch": 0.2424642794584991, + "grad_norm": 1.1249638795852661, + "learning_rate": 8.862235780498669e-06, + "loss": 0.3088, + "step": 12112 + }, + { + "epoch": 0.24248429797562746, + "grad_norm": 1.1572681665420532, + "learning_rate": 8.862029890548928e-06, + "loss": 0.3236, + "step": 12113 + }, + { + "epoch": 0.2425043164927558, + "grad_norm": 1.1515522003173828, + "learning_rate": 8.861823984364049e-06, + "loss": 0.3219, + "step": 12114 + }, + { + "epoch": 0.24252433500988416, + "grad_norm": 1.0183550119400024, + "learning_rate": 8.861618061944889e-06, + "loss": 0.281, + "step": 12115 + }, + { + "epoch": 0.24254435352701248, + "grad_norm": 1.0628602504730225, + "learning_rate": 8.86141212329232e-06, + "loss": 0.334, + "step": 12116 + }, + { + "epoch": 0.24256437204414083, + "grad_norm": 0.9735977649688721, + "learning_rate": 8.861206168407206e-06, + "loss": 0.319, + "step": 12117 + }, + { + "epoch": 0.24258439056126918, + "grad_norm": 1.8378798961639404, + "learning_rate": 8.86100019729041e-06, + "loss": 0.8902, + "step": 12118 + }, + { + "epoch": 0.24260440907839753, + "grad_norm": 1.2571594715118408, + "learning_rate": 8.860794209942803e-06, + "loss": 0.3289, + "step": 12119 + }, + { + "epoch": 0.24262442759552585, + "grad_norm": 1.0637853145599365, + "learning_rate": 8.860588206365244e-06, + "loss": 0.2941, + "step": 12120 + }, + { + "epoch": 0.2426444461126542, + "grad_norm": 1.0443716049194336, + "learning_rate": 8.860382186558607e-06, + "loss": 0.2548, + "step": 12121 + }, + { + "epoch": 0.24266446462978256, + "grad_norm": 1.0859525203704834, + "learning_rate": 8.860176150523751e-06, + "loss": 0.311, + "step": 12122 + }, + { + "epoch": 0.24268448314691088, + "grad_norm": 1.024005651473999, + "learning_rate": 8.859970098261547e-06, + "loss": 0.2726, + "step": 12123 + }, + { + "epoch": 0.24270450166403923, + "grad_norm": 1.0898048877716064, + "learning_rate": 8.85976402977286e-06, + "loss": 0.4076, + "step": 12124 + }, + { + "epoch": 0.24272452018116758, + "grad_norm": 1.1126375198364258, + "learning_rate": 8.859557945058554e-06, + "loss": 0.3465, + "step": 12125 + }, + { + "epoch": 0.24274453869829593, + "grad_norm": 1.003202199935913, + "learning_rate": 8.859351844119497e-06, + "loss": 0.2932, + "step": 12126 + }, + { + "epoch": 0.24276455721542425, + "grad_norm": 1.1843440532684326, + "learning_rate": 8.859145726956555e-06, + "loss": 0.3488, + "step": 12127 + }, + { + "epoch": 0.2427845757325526, + "grad_norm": 1.853724479675293, + "learning_rate": 8.858939593570597e-06, + "loss": 0.7975, + "step": 12128 + }, + { + "epoch": 0.24280459424968096, + "grad_norm": 1.1128790378570557, + "learning_rate": 8.858733443962484e-06, + "loss": 0.3095, + "step": 12129 + }, + { + "epoch": 0.2428246127668093, + "grad_norm": 1.123989224433899, + "learning_rate": 8.85852727813309e-06, + "loss": 0.316, + "step": 12130 + }, + { + "epoch": 0.24284463128393763, + "grad_norm": 1.1289746761322021, + "learning_rate": 8.858321096083276e-06, + "loss": 0.3085, + "step": 12131 + }, + { + "epoch": 0.24286464980106598, + "grad_norm": 1.0913585424423218, + "learning_rate": 8.85811489781391e-06, + "loss": 0.3246, + "step": 12132 + }, + { + "epoch": 0.24288466831819433, + "grad_norm": 1.1468702554702759, + "learning_rate": 8.857908683325858e-06, + "loss": 0.3494, + "step": 12133 + }, + { + "epoch": 0.24290468683532268, + "grad_norm": 1.0203853845596313, + "learning_rate": 8.85770245261999e-06, + "loss": 0.3383, + "step": 12134 + }, + { + "epoch": 0.242924705352451, + "grad_norm": 1.0405125617980957, + "learning_rate": 8.85749620569717e-06, + "loss": 0.2871, + "step": 12135 + }, + { + "epoch": 0.24294472386957935, + "grad_norm": 1.0225465297698975, + "learning_rate": 8.857289942558268e-06, + "loss": 0.3161, + "step": 12136 + }, + { + "epoch": 0.2429647423867077, + "grad_norm": 1.8970533609390259, + "learning_rate": 8.857083663204148e-06, + "loss": 0.8935, + "step": 12137 + }, + { + "epoch": 0.24298476090383606, + "grad_norm": 1.2469645738601685, + "learning_rate": 8.85687736763568e-06, + "loss": 0.3624, + "step": 12138 + }, + { + "epoch": 0.24300477942096438, + "grad_norm": 0.993392825126648, + "learning_rate": 8.856671055853725e-06, + "loss": 0.3101, + "step": 12139 + }, + { + "epoch": 0.24302479793809273, + "grad_norm": 1.021374225616455, + "learning_rate": 8.85646472785916e-06, + "loss": 0.3183, + "step": 12140 + }, + { + "epoch": 0.24304481645522108, + "grad_norm": 0.986042857170105, + "learning_rate": 8.856258383652844e-06, + "loss": 0.2736, + "step": 12141 + }, + { + "epoch": 0.24306483497234943, + "grad_norm": 1.1170777082443237, + "learning_rate": 8.856052023235649e-06, + "loss": 0.3143, + "step": 12142 + }, + { + "epoch": 0.24308485348947775, + "grad_norm": 1.0939786434173584, + "learning_rate": 8.85584564660844e-06, + "loss": 0.3496, + "step": 12143 + }, + { + "epoch": 0.2431048720066061, + "grad_norm": 1.1502841711044312, + "learning_rate": 8.855639253772086e-06, + "loss": 0.3227, + "step": 12144 + }, + { + "epoch": 0.24312489052373445, + "grad_norm": 1.0241117477416992, + "learning_rate": 8.855432844727455e-06, + "loss": 0.3344, + "step": 12145 + }, + { + "epoch": 0.2431449090408628, + "grad_norm": 1.0772784948349, + "learning_rate": 8.855226419475414e-06, + "loss": 0.2979, + "step": 12146 + }, + { + "epoch": 0.24316492755799113, + "grad_norm": 1.034889578819275, + "learning_rate": 8.85501997801683e-06, + "loss": 0.2946, + "step": 12147 + }, + { + "epoch": 0.24318494607511948, + "grad_norm": 1.163450837135315, + "learning_rate": 8.854813520352573e-06, + "loss": 0.3347, + "step": 12148 + }, + { + "epoch": 0.24320496459224783, + "grad_norm": 1.105904221534729, + "learning_rate": 8.854607046483507e-06, + "loss": 0.3321, + "step": 12149 + }, + { + "epoch": 0.24322498310937618, + "grad_norm": 1.0386934280395508, + "learning_rate": 8.854400556410505e-06, + "loss": 0.336, + "step": 12150 + }, + { + "epoch": 0.2432450016265045, + "grad_norm": 1.1219515800476074, + "learning_rate": 8.854194050134431e-06, + "loss": 0.3391, + "step": 12151 + }, + { + "epoch": 0.24326502014363285, + "grad_norm": 1.223249912261963, + "learning_rate": 8.853987527656155e-06, + "loss": 0.337, + "step": 12152 + }, + { + "epoch": 0.2432850386607612, + "grad_norm": 1.0464086532592773, + "learning_rate": 8.853780988976545e-06, + "loss": 0.3021, + "step": 12153 + }, + { + "epoch": 0.24330505717788956, + "grad_norm": 1.2057421207427979, + "learning_rate": 8.85357443409647e-06, + "loss": 0.3425, + "step": 12154 + }, + { + "epoch": 0.24332507569501788, + "grad_norm": 1.836969017982483, + "learning_rate": 8.853367863016796e-06, + "loss": 0.7774, + "step": 12155 + }, + { + "epoch": 0.24334509421214623, + "grad_norm": 1.123119831085205, + "learning_rate": 8.853161275738393e-06, + "loss": 0.3274, + "step": 12156 + }, + { + "epoch": 0.24336511272927458, + "grad_norm": 1.2431789636611938, + "learning_rate": 8.852954672262131e-06, + "loss": 0.3242, + "step": 12157 + }, + { + "epoch": 0.24338513124640293, + "grad_norm": 1.220773696899414, + "learning_rate": 8.852748052588874e-06, + "loss": 0.3559, + "step": 12158 + }, + { + "epoch": 0.24340514976353125, + "grad_norm": 1.138598918914795, + "learning_rate": 8.852541416719494e-06, + "loss": 0.3049, + "step": 12159 + }, + { + "epoch": 0.2434251682806596, + "grad_norm": 1.0976389646530151, + "learning_rate": 8.85233476465486e-06, + "loss": 0.3165, + "step": 12160 + }, + { + "epoch": 0.24344518679778795, + "grad_norm": 0.9958465099334717, + "learning_rate": 8.85212809639584e-06, + "loss": 0.3171, + "step": 12161 + }, + { + "epoch": 0.2434652053149163, + "grad_norm": 1.0611366033554077, + "learning_rate": 8.851921411943301e-06, + "loss": 0.3644, + "step": 12162 + }, + { + "epoch": 0.24348522383204463, + "grad_norm": 1.8276078701019287, + "learning_rate": 8.851714711298113e-06, + "loss": 0.8087, + "step": 12163 + }, + { + "epoch": 0.24350524234917298, + "grad_norm": 2.0181093215942383, + "learning_rate": 8.851507994461147e-06, + "loss": 0.8604, + "step": 12164 + }, + { + "epoch": 0.24352526086630133, + "grad_norm": 1.012521743774414, + "learning_rate": 8.851301261433269e-06, + "loss": 0.3186, + "step": 12165 + }, + { + "epoch": 0.24354527938342968, + "grad_norm": 1.8897349834442139, + "learning_rate": 8.85109451221535e-06, + "loss": 0.8671, + "step": 12166 + }, + { + "epoch": 0.243565297900558, + "grad_norm": 1.7183151245117188, + "learning_rate": 8.850887746808258e-06, + "loss": 0.8228, + "step": 12167 + }, + { + "epoch": 0.24358531641768635, + "grad_norm": 1.229622721672058, + "learning_rate": 8.850680965212863e-06, + "loss": 0.3861, + "step": 12168 + }, + { + "epoch": 0.2436053349348147, + "grad_norm": 1.1097606420516968, + "learning_rate": 8.850474167430035e-06, + "loss": 0.3871, + "step": 12169 + }, + { + "epoch": 0.24362535345194306, + "grad_norm": 1.139269471168518, + "learning_rate": 8.85026735346064e-06, + "loss": 0.3663, + "step": 12170 + }, + { + "epoch": 0.24364537196907138, + "grad_norm": 1.0506223440170288, + "learning_rate": 8.85006052330555e-06, + "loss": 0.3124, + "step": 12171 + }, + { + "epoch": 0.24366539048619973, + "grad_norm": 1.1426198482513428, + "learning_rate": 8.849853676965636e-06, + "loss": 0.3034, + "step": 12172 + }, + { + "epoch": 0.24368540900332808, + "grad_norm": 1.033322811126709, + "learning_rate": 8.849646814441764e-06, + "loss": 0.3029, + "step": 12173 + }, + { + "epoch": 0.24370542752045643, + "grad_norm": 1.1623483896255493, + "learning_rate": 8.849439935734805e-06, + "loss": 0.3807, + "step": 12174 + }, + { + "epoch": 0.24372544603758475, + "grad_norm": 0.9834067225456238, + "learning_rate": 8.849233040845631e-06, + "loss": 0.3086, + "step": 12175 + }, + { + "epoch": 0.2437454645547131, + "grad_norm": 1.1451002359390259, + "learning_rate": 8.849026129775106e-06, + "loss": 0.3216, + "step": 12176 + }, + { + "epoch": 0.24376548307184145, + "grad_norm": 1.1413054466247559, + "learning_rate": 8.848819202524107e-06, + "loss": 0.3526, + "step": 12177 + }, + { + "epoch": 0.2437855015889698, + "grad_norm": 1.0864160060882568, + "learning_rate": 8.848612259093499e-06, + "loss": 0.2911, + "step": 12178 + }, + { + "epoch": 0.24380552010609813, + "grad_norm": 1.110916256904602, + "learning_rate": 8.848405299484154e-06, + "loss": 0.3625, + "step": 12179 + }, + { + "epoch": 0.24382553862322648, + "grad_norm": 1.0845375061035156, + "learning_rate": 8.84819832369694e-06, + "loss": 0.3213, + "step": 12180 + }, + { + "epoch": 0.24384555714035483, + "grad_norm": 1.1444790363311768, + "learning_rate": 8.847991331732728e-06, + "loss": 0.348, + "step": 12181 + }, + { + "epoch": 0.24386557565748318, + "grad_norm": 1.7456084489822388, + "learning_rate": 8.84778432359239e-06, + "loss": 0.8495, + "step": 12182 + }, + { + "epoch": 0.2438855941746115, + "grad_norm": 1.2124141454696655, + "learning_rate": 8.847577299276795e-06, + "loss": 0.3421, + "step": 12183 + }, + { + "epoch": 0.24390561269173985, + "grad_norm": 1.0049970149993896, + "learning_rate": 8.847370258786811e-06, + "loss": 0.2961, + "step": 12184 + }, + { + "epoch": 0.2439256312088682, + "grad_norm": 1.1192573308944702, + "learning_rate": 8.847163202123313e-06, + "loss": 0.3174, + "step": 12185 + }, + { + "epoch": 0.24394564972599656, + "grad_norm": 1.8671761751174927, + "learning_rate": 8.846956129287167e-06, + "loss": 0.796, + "step": 12186 + }, + { + "epoch": 0.24396566824312488, + "grad_norm": 2.2295897006988525, + "learning_rate": 8.846749040279246e-06, + "loss": 0.8458, + "step": 12187 + }, + { + "epoch": 0.24398568676025323, + "grad_norm": 1.1362240314483643, + "learning_rate": 8.84654193510042e-06, + "loss": 0.3237, + "step": 12188 + }, + { + "epoch": 0.24400570527738158, + "grad_norm": 1.2127741575241089, + "learning_rate": 8.846334813751559e-06, + "loss": 0.3618, + "step": 12189 + }, + { + "epoch": 0.24402572379450993, + "grad_norm": 1.1330317258834839, + "learning_rate": 8.846127676233533e-06, + "loss": 0.3721, + "step": 12190 + }, + { + "epoch": 0.24404574231163825, + "grad_norm": 1.1373014450073242, + "learning_rate": 8.845920522547216e-06, + "loss": 0.323, + "step": 12191 + }, + { + "epoch": 0.2440657608287666, + "grad_norm": 1.1984143257141113, + "learning_rate": 8.845713352693477e-06, + "loss": 0.3063, + "step": 12192 + }, + { + "epoch": 0.24408577934589495, + "grad_norm": 1.3365925550460815, + "learning_rate": 8.845506166673185e-06, + "loss": 0.3815, + "step": 12193 + }, + { + "epoch": 0.2441057978630233, + "grad_norm": 1.0722655057907104, + "learning_rate": 8.845298964487214e-06, + "loss": 0.2829, + "step": 12194 + }, + { + "epoch": 0.24412581638015163, + "grad_norm": 1.1406114101409912, + "learning_rate": 8.845091746136434e-06, + "loss": 0.3372, + "step": 12195 + }, + { + "epoch": 0.24414583489727998, + "grad_norm": 1.2146832942962646, + "learning_rate": 8.844884511621714e-06, + "loss": 0.2882, + "step": 12196 + }, + { + "epoch": 0.24416585341440833, + "grad_norm": 2.113335371017456, + "learning_rate": 8.844677260943928e-06, + "loss": 0.8341, + "step": 12197 + }, + { + "epoch": 0.24418587193153668, + "grad_norm": 1.7884886264801025, + "learning_rate": 8.844469994103945e-06, + "loss": 0.8147, + "step": 12198 + }, + { + "epoch": 0.244205890448665, + "grad_norm": 1.1023715734481812, + "learning_rate": 8.844262711102638e-06, + "loss": 0.3183, + "step": 12199 + }, + { + "epoch": 0.24422590896579335, + "grad_norm": 1.0591542720794678, + "learning_rate": 8.84405541194088e-06, + "loss": 0.3283, + "step": 12200 + }, + { + "epoch": 0.2442459274829217, + "grad_norm": 1.1453295946121216, + "learning_rate": 8.843848096619538e-06, + "loss": 0.3925, + "step": 12201 + }, + { + "epoch": 0.24426594600005005, + "grad_norm": 1.1366239786148071, + "learning_rate": 8.843640765139487e-06, + "loss": 0.3399, + "step": 12202 + }, + { + "epoch": 0.24428596451717838, + "grad_norm": 1.8579161167144775, + "learning_rate": 8.843433417501595e-06, + "loss": 0.8296, + "step": 12203 + }, + { + "epoch": 0.24430598303430673, + "grad_norm": 1.8882323503494263, + "learning_rate": 8.843226053706739e-06, + "loss": 0.8157, + "step": 12204 + }, + { + "epoch": 0.24432600155143508, + "grad_norm": 1.6955959796905518, + "learning_rate": 8.843018673755787e-06, + "loss": 0.8317, + "step": 12205 + }, + { + "epoch": 0.24434602006856343, + "grad_norm": 1.0919735431671143, + "learning_rate": 8.84281127764961e-06, + "loss": 0.3631, + "step": 12206 + }, + { + "epoch": 0.24436603858569175, + "grad_norm": 1.079028844833374, + "learning_rate": 8.842603865389084e-06, + "loss": 0.274, + "step": 12207 + }, + { + "epoch": 0.2443860571028201, + "grad_norm": 1.0940951108932495, + "learning_rate": 8.842396436975076e-06, + "loss": 0.3273, + "step": 12208 + }, + { + "epoch": 0.24440607561994845, + "grad_norm": 1.2481123208999634, + "learning_rate": 8.84218899240846e-06, + "loss": 0.2827, + "step": 12209 + }, + { + "epoch": 0.2444260941370768, + "grad_norm": 1.2093136310577393, + "learning_rate": 8.84198153169011e-06, + "loss": 0.348, + "step": 12210 + }, + { + "epoch": 0.24444611265420513, + "grad_norm": 1.0654351711273193, + "learning_rate": 8.841774054820895e-06, + "loss": 0.3252, + "step": 12211 + }, + { + "epoch": 0.24446613117133348, + "grad_norm": 1.075774073600769, + "learning_rate": 8.84156656180169e-06, + "loss": 0.3475, + "step": 12212 + }, + { + "epoch": 0.24448614968846183, + "grad_norm": 1.102203130722046, + "learning_rate": 8.841359052633365e-06, + "loss": 0.2936, + "step": 12213 + }, + { + "epoch": 0.24450616820559018, + "grad_norm": 1.4367296695709229, + "learning_rate": 8.841151527316794e-06, + "loss": 0.316, + "step": 12214 + }, + { + "epoch": 0.2445261867227185, + "grad_norm": 2.2755231857299805, + "learning_rate": 8.84094398585285e-06, + "loss": 0.8637, + "step": 12215 + }, + { + "epoch": 0.24454620523984685, + "grad_norm": 1.0869489908218384, + "learning_rate": 8.8407364282424e-06, + "loss": 0.3063, + "step": 12216 + }, + { + "epoch": 0.2445662237569752, + "grad_norm": 1.216107964515686, + "learning_rate": 8.840528854486323e-06, + "loss": 0.3466, + "step": 12217 + }, + { + "epoch": 0.24458624227410355, + "grad_norm": 1.1770395040512085, + "learning_rate": 8.84032126458549e-06, + "loss": 0.3438, + "step": 12218 + }, + { + "epoch": 0.24460626079123188, + "grad_norm": 0.9827327728271484, + "learning_rate": 8.840113658540772e-06, + "loss": 0.3003, + "step": 12219 + }, + { + "epoch": 0.24462627930836023, + "grad_norm": 1.3142269849777222, + "learning_rate": 8.839906036353041e-06, + "loss": 0.3306, + "step": 12220 + }, + { + "epoch": 0.24464629782548858, + "grad_norm": 1.064346432685852, + "learning_rate": 8.839698398023173e-06, + "loss": 0.3658, + "step": 12221 + }, + { + "epoch": 0.24466631634261693, + "grad_norm": 1.1718016862869263, + "learning_rate": 8.83949074355204e-06, + "loss": 0.2965, + "step": 12222 + }, + { + "epoch": 0.24468633485974525, + "grad_norm": 1.1382101774215698, + "learning_rate": 8.839283072940511e-06, + "loss": 0.355, + "step": 12223 + }, + { + "epoch": 0.2447063533768736, + "grad_norm": 1.8346483707427979, + "learning_rate": 8.839075386189466e-06, + "loss": 0.8241, + "step": 12224 + }, + { + "epoch": 0.24472637189400195, + "grad_norm": 1.0674829483032227, + "learning_rate": 8.838867683299772e-06, + "loss": 0.3004, + "step": 12225 + }, + { + "epoch": 0.2447463904111303, + "grad_norm": 1.1802313327789307, + "learning_rate": 8.838659964272306e-06, + "loss": 0.3438, + "step": 12226 + }, + { + "epoch": 0.24476640892825863, + "grad_norm": 0.9934674501419067, + "learning_rate": 8.838452229107939e-06, + "loss": 0.3242, + "step": 12227 + }, + { + "epoch": 0.24478642744538698, + "grad_norm": 1.0671833753585815, + "learning_rate": 8.838244477807543e-06, + "loss": 0.313, + "step": 12228 + }, + { + "epoch": 0.24480644596251533, + "grad_norm": 1.0809502601623535, + "learning_rate": 8.838036710371995e-06, + "loss": 0.307, + "step": 12229 + }, + { + "epoch": 0.24482646447964368, + "grad_norm": 1.1446384191513062, + "learning_rate": 8.837828926802167e-06, + "loss": 0.3487, + "step": 12230 + }, + { + "epoch": 0.244846482996772, + "grad_norm": 1.1307772397994995, + "learning_rate": 8.837621127098933e-06, + "loss": 0.3106, + "step": 12231 + }, + { + "epoch": 0.24486650151390035, + "grad_norm": 1.1102958917617798, + "learning_rate": 8.837413311263163e-06, + "loss": 0.2923, + "step": 12232 + }, + { + "epoch": 0.2448865200310287, + "grad_norm": 1.0459957122802734, + "learning_rate": 8.837205479295734e-06, + "loss": 0.2773, + "step": 12233 + }, + { + "epoch": 0.24490653854815705, + "grad_norm": 1.0133417844772339, + "learning_rate": 8.83699763119752e-06, + "loss": 0.2948, + "step": 12234 + }, + { + "epoch": 0.24492655706528538, + "grad_norm": 1.0497992038726807, + "learning_rate": 8.836789766969392e-06, + "loss": 0.3416, + "step": 12235 + }, + { + "epoch": 0.24494657558241373, + "grad_norm": 1.0186662673950195, + "learning_rate": 8.836581886612226e-06, + "loss": 0.3453, + "step": 12236 + }, + { + "epoch": 0.24496659409954208, + "grad_norm": 1.1537413597106934, + "learning_rate": 8.836373990126897e-06, + "loss": 0.3613, + "step": 12237 + }, + { + "epoch": 0.24498661261667043, + "grad_norm": 1.6901018619537354, + "learning_rate": 8.836166077514276e-06, + "loss": 0.8613, + "step": 12238 + }, + { + "epoch": 0.24500663113379875, + "grad_norm": 1.0287081003189087, + "learning_rate": 8.835958148775238e-06, + "loss": 0.278, + "step": 12239 + }, + { + "epoch": 0.2450266496509271, + "grad_norm": 1.170642614364624, + "learning_rate": 8.835750203910659e-06, + "loss": 0.3377, + "step": 12240 + }, + { + "epoch": 0.24504666816805545, + "grad_norm": 1.1856958866119385, + "learning_rate": 8.83554224292141e-06, + "loss": 0.3185, + "step": 12241 + }, + { + "epoch": 0.2450666866851838, + "grad_norm": 1.1481517553329468, + "learning_rate": 8.835334265808368e-06, + "loss": 0.3062, + "step": 12242 + }, + { + "epoch": 0.24508670520231213, + "grad_norm": 1.1133029460906982, + "learning_rate": 8.835126272572404e-06, + "loss": 0.3478, + "step": 12243 + }, + { + "epoch": 0.24510672371944048, + "grad_norm": 1.0846295356750488, + "learning_rate": 8.834918263214397e-06, + "loss": 0.3191, + "step": 12244 + }, + { + "epoch": 0.24512674223656883, + "grad_norm": 1.1505950689315796, + "learning_rate": 8.834710237735217e-06, + "loss": 0.3278, + "step": 12245 + }, + { + "epoch": 0.24514676075369718, + "grad_norm": 1.1336605548858643, + "learning_rate": 8.834502196135743e-06, + "loss": 0.3237, + "step": 12246 + }, + { + "epoch": 0.2451667792708255, + "grad_norm": 1.1136738061904907, + "learning_rate": 8.834294138416845e-06, + "loss": 0.3348, + "step": 12247 + }, + { + "epoch": 0.24518679778795385, + "grad_norm": 1.200194001197815, + "learning_rate": 8.834086064579399e-06, + "loss": 0.296, + "step": 12248 + }, + { + "epoch": 0.2452068163050822, + "grad_norm": 1.0195374488830566, + "learning_rate": 8.833877974624282e-06, + "loss": 0.3274, + "step": 12249 + }, + { + "epoch": 0.24522683482221055, + "grad_norm": 1.9066509008407593, + "learning_rate": 8.833669868552366e-06, + "loss": 0.7888, + "step": 12250 + }, + { + "epoch": 0.24524685333933888, + "grad_norm": 1.1286592483520508, + "learning_rate": 8.833461746364526e-06, + "loss": 0.2935, + "step": 12251 + }, + { + "epoch": 0.24526687185646723, + "grad_norm": 1.1907083988189697, + "learning_rate": 8.833253608061641e-06, + "loss": 0.3475, + "step": 12252 + }, + { + "epoch": 0.24528689037359558, + "grad_norm": 1.1250122785568237, + "learning_rate": 8.83304545364458e-06, + "loss": 0.2985, + "step": 12253 + }, + { + "epoch": 0.24530690889072393, + "grad_norm": 1.1776522397994995, + "learning_rate": 8.832837283114221e-06, + "loss": 0.278, + "step": 12254 + }, + { + "epoch": 0.24532692740785225, + "grad_norm": 1.005232572555542, + "learning_rate": 8.832629096471439e-06, + "loss": 0.3139, + "step": 12255 + }, + { + "epoch": 0.2453469459249806, + "grad_norm": 1.1397701501846313, + "learning_rate": 8.83242089371711e-06, + "loss": 0.3702, + "step": 12256 + }, + { + "epoch": 0.24536696444210895, + "grad_norm": 1.0678296089172363, + "learning_rate": 8.832212674852107e-06, + "loss": 0.3276, + "step": 12257 + }, + { + "epoch": 0.2453869829592373, + "grad_norm": 1.2311992645263672, + "learning_rate": 8.832004439877308e-06, + "loss": 0.3509, + "step": 12258 + }, + { + "epoch": 0.24540700147636563, + "grad_norm": 1.1096601486206055, + "learning_rate": 8.831796188793586e-06, + "loss": 0.3025, + "step": 12259 + }, + { + "epoch": 0.24542701999349398, + "grad_norm": 1.2431167364120483, + "learning_rate": 8.831587921601818e-06, + "loss": 0.3275, + "step": 12260 + }, + { + "epoch": 0.24544703851062233, + "grad_norm": 1.1657295227050781, + "learning_rate": 8.83137963830288e-06, + "loss": 0.3199, + "step": 12261 + }, + { + "epoch": 0.24546705702775068, + "grad_norm": 1.1279455423355103, + "learning_rate": 8.831171338897645e-06, + "loss": 0.3144, + "step": 12262 + }, + { + "epoch": 0.245487075544879, + "grad_norm": 1.060212254524231, + "learning_rate": 8.83096302338699e-06, + "loss": 0.344, + "step": 12263 + }, + { + "epoch": 0.24550709406200735, + "grad_norm": 1.0946437120437622, + "learning_rate": 8.83075469177179e-06, + "loss": 0.3466, + "step": 12264 + }, + { + "epoch": 0.2455271125791357, + "grad_norm": 1.2193135023117065, + "learning_rate": 8.830546344052923e-06, + "loss": 0.3308, + "step": 12265 + }, + { + "epoch": 0.24554713109626405, + "grad_norm": 0.9766149520874023, + "learning_rate": 8.830337980231266e-06, + "loss": 0.3388, + "step": 12266 + }, + { + "epoch": 0.24556714961339238, + "grad_norm": 1.97589111328125, + "learning_rate": 8.830129600307689e-06, + "loss": 0.8744, + "step": 12267 + }, + { + "epoch": 0.24558716813052073, + "grad_norm": 1.2431284189224243, + "learning_rate": 8.829921204283072e-06, + "loss": 0.3842, + "step": 12268 + }, + { + "epoch": 0.24560718664764908, + "grad_norm": 1.2032407522201538, + "learning_rate": 8.829712792158291e-06, + "loss": 0.2925, + "step": 12269 + }, + { + "epoch": 0.24562720516477743, + "grad_norm": 1.193739414215088, + "learning_rate": 8.829504363934221e-06, + "loss": 0.3549, + "step": 12270 + }, + { + "epoch": 0.24564722368190575, + "grad_norm": 1.213774561882019, + "learning_rate": 8.82929591961174e-06, + "loss": 0.3236, + "step": 12271 + }, + { + "epoch": 0.2456672421990341, + "grad_norm": 0.9756123423576355, + "learning_rate": 8.829087459191722e-06, + "loss": 0.3537, + "step": 12272 + }, + { + "epoch": 0.24568726071616245, + "grad_norm": 1.202427864074707, + "learning_rate": 8.828878982675044e-06, + "loss": 0.3452, + "step": 12273 + }, + { + "epoch": 0.2457072792332908, + "grad_norm": 0.9774007797241211, + "learning_rate": 8.828670490062582e-06, + "loss": 0.2687, + "step": 12274 + }, + { + "epoch": 0.24572729775041913, + "grad_norm": 1.057750940322876, + "learning_rate": 8.828461981355217e-06, + "loss": 0.2828, + "step": 12275 + }, + { + "epoch": 0.24574731626754748, + "grad_norm": 1.183967113494873, + "learning_rate": 8.82825345655382e-06, + "loss": 0.2954, + "step": 12276 + }, + { + "epoch": 0.24576733478467583, + "grad_norm": 1.8018683195114136, + "learning_rate": 8.828044915659266e-06, + "loss": 0.805, + "step": 12277 + }, + { + "epoch": 0.24578735330180418, + "grad_norm": 1.1835579872131348, + "learning_rate": 8.82783635867244e-06, + "loss": 0.3336, + "step": 12278 + }, + { + "epoch": 0.2458073718189325, + "grad_norm": 1.2168563604354858, + "learning_rate": 8.82762778559421e-06, + "loss": 0.3431, + "step": 12279 + }, + { + "epoch": 0.24582739033606085, + "grad_norm": 1.8463772535324097, + "learning_rate": 8.827419196425458e-06, + "loss": 0.8492, + "step": 12280 + }, + { + "epoch": 0.2458474088531892, + "grad_norm": 1.083670973777771, + "learning_rate": 8.827210591167059e-06, + "loss": 0.2932, + "step": 12281 + }, + { + "epoch": 0.24586742737031755, + "grad_norm": 1.0842094421386719, + "learning_rate": 8.82700196981989e-06, + "loss": 0.3206, + "step": 12282 + }, + { + "epoch": 0.24588744588744588, + "grad_norm": 1.0730115175247192, + "learning_rate": 8.826793332384828e-06, + "loss": 0.3448, + "step": 12283 + }, + { + "epoch": 0.24590746440457423, + "grad_norm": 1.0034328699111938, + "learning_rate": 8.826584678862752e-06, + "loss": 0.3313, + "step": 12284 + }, + { + "epoch": 0.24592748292170258, + "grad_norm": 1.0411478281021118, + "learning_rate": 8.826376009254536e-06, + "loss": 0.3641, + "step": 12285 + }, + { + "epoch": 0.24594750143883093, + "grad_norm": 1.072858214378357, + "learning_rate": 8.82616732356106e-06, + "loss": 0.3546, + "step": 12286 + }, + { + "epoch": 0.24596751995595925, + "grad_norm": 1.0737109184265137, + "learning_rate": 8.825958621783198e-06, + "loss": 0.3091, + "step": 12287 + }, + { + "epoch": 0.2459875384730876, + "grad_norm": 1.040658712387085, + "learning_rate": 8.825749903921831e-06, + "loss": 0.3237, + "step": 12288 + }, + { + "epoch": 0.24600755699021595, + "grad_norm": 1.0114041566848755, + "learning_rate": 8.825541169977835e-06, + "loss": 0.3237, + "step": 12289 + }, + { + "epoch": 0.2460275755073443, + "grad_norm": 0.9901450276374817, + "learning_rate": 8.825332419952087e-06, + "loss": 0.2875, + "step": 12290 + }, + { + "epoch": 0.24604759402447263, + "grad_norm": 1.7534679174423218, + "learning_rate": 8.825123653845463e-06, + "loss": 0.8456, + "step": 12291 + }, + { + "epoch": 0.24606761254160098, + "grad_norm": 1.2165197134017944, + "learning_rate": 8.824914871658844e-06, + "loss": 0.312, + "step": 12292 + }, + { + "epoch": 0.24608763105872933, + "grad_norm": 1.09016752243042, + "learning_rate": 8.824706073393106e-06, + "loss": 0.2628, + "step": 12293 + }, + { + "epoch": 0.24610764957585768, + "grad_norm": 1.1390691995620728, + "learning_rate": 8.824497259049125e-06, + "loss": 0.3678, + "step": 12294 + }, + { + "epoch": 0.246127668092986, + "grad_norm": 1.1352901458740234, + "learning_rate": 8.824288428627782e-06, + "loss": 0.3205, + "step": 12295 + }, + { + "epoch": 0.24614768661011435, + "grad_norm": 1.2163968086242676, + "learning_rate": 8.824079582129954e-06, + "loss": 0.2914, + "step": 12296 + }, + { + "epoch": 0.2461677051272427, + "grad_norm": 0.9309481978416443, + "learning_rate": 8.823870719556518e-06, + "loss": 0.2919, + "step": 12297 + }, + { + "epoch": 0.24618772364437105, + "grad_norm": 1.0591586828231812, + "learning_rate": 8.82366184090835e-06, + "loss": 0.2694, + "step": 12298 + }, + { + "epoch": 0.24620774216149938, + "grad_norm": 1.0603128671646118, + "learning_rate": 8.823452946186334e-06, + "loss": 0.3137, + "step": 12299 + }, + { + "epoch": 0.24622776067862773, + "grad_norm": 1.0909372568130493, + "learning_rate": 8.823244035391343e-06, + "loss": 0.3119, + "step": 12300 + }, + { + "epoch": 0.24624777919575608, + "grad_norm": 1.2437644004821777, + "learning_rate": 8.823035108524257e-06, + "loss": 0.3444, + "step": 12301 + }, + { + "epoch": 0.24626779771288443, + "grad_norm": 1.132865071296692, + "learning_rate": 8.822826165585953e-06, + "loss": 0.3025, + "step": 12302 + }, + { + "epoch": 0.24628781623001275, + "grad_norm": 1.3058656454086304, + "learning_rate": 8.822617206577313e-06, + "loss": 0.3141, + "step": 12303 + }, + { + "epoch": 0.2463078347471411, + "grad_norm": 1.2072128057479858, + "learning_rate": 8.822408231499211e-06, + "loss": 0.3187, + "step": 12304 + }, + { + "epoch": 0.24632785326426945, + "grad_norm": 1.155640959739685, + "learning_rate": 8.82219924035253e-06, + "loss": 0.3354, + "step": 12305 + }, + { + "epoch": 0.2463478717813978, + "grad_norm": 1.9301050901412964, + "learning_rate": 8.821990233138142e-06, + "loss": 0.8631, + "step": 12306 + }, + { + "epoch": 0.24636789029852613, + "grad_norm": 1.099686622619629, + "learning_rate": 8.821781209856933e-06, + "loss": 0.298, + "step": 12307 + }, + { + "epoch": 0.24638790881565448, + "grad_norm": 1.1800510883331299, + "learning_rate": 8.821572170509776e-06, + "loss": 0.2943, + "step": 12308 + }, + { + "epoch": 0.24640792733278283, + "grad_norm": 1.0839101076126099, + "learning_rate": 8.821363115097553e-06, + "loss": 0.3095, + "step": 12309 + }, + { + "epoch": 0.24642794584991118, + "grad_norm": 1.2501286268234253, + "learning_rate": 8.82115404362114e-06, + "loss": 0.3408, + "step": 12310 + }, + { + "epoch": 0.2464479643670395, + "grad_norm": 1.1365939378738403, + "learning_rate": 8.82094495608142e-06, + "loss": 0.3275, + "step": 12311 + }, + { + "epoch": 0.24646798288416785, + "grad_norm": 1.0141327381134033, + "learning_rate": 8.820735852479268e-06, + "loss": 0.2719, + "step": 12312 + }, + { + "epoch": 0.2464880014012962, + "grad_norm": 1.2310670614242554, + "learning_rate": 8.820526732815566e-06, + "loss": 0.3333, + "step": 12313 + }, + { + "epoch": 0.24650801991842455, + "grad_norm": 1.1661251783370972, + "learning_rate": 8.820317597091191e-06, + "loss": 0.3308, + "step": 12314 + }, + { + "epoch": 0.24652803843555288, + "grad_norm": 1.087640404701233, + "learning_rate": 8.820108445307024e-06, + "loss": 0.32, + "step": 12315 + }, + { + "epoch": 0.24654805695268123, + "grad_norm": 1.1918867826461792, + "learning_rate": 8.819899277463942e-06, + "loss": 0.331, + "step": 12316 + }, + { + "epoch": 0.24656807546980958, + "grad_norm": 1.1180249452590942, + "learning_rate": 8.819690093562828e-06, + "loss": 0.2831, + "step": 12317 + }, + { + "epoch": 0.24658809398693793, + "grad_norm": 1.847017526626587, + "learning_rate": 8.819480893604555e-06, + "loss": 0.858, + "step": 12318 + }, + { + "epoch": 0.24660811250406625, + "grad_norm": 1.0366075038909912, + "learning_rate": 8.81927167759001e-06, + "loss": 0.3292, + "step": 12319 + }, + { + "epoch": 0.2466281310211946, + "grad_norm": 1.1422046422958374, + "learning_rate": 8.819062445520066e-06, + "loss": 0.312, + "step": 12320 + }, + { + "epoch": 0.24664814953832295, + "grad_norm": 1.0996167659759521, + "learning_rate": 8.818853197395607e-06, + "loss": 0.3203, + "step": 12321 + }, + { + "epoch": 0.2466681680554513, + "grad_norm": 1.1061278581619263, + "learning_rate": 8.81864393321751e-06, + "loss": 0.2753, + "step": 12322 + }, + { + "epoch": 0.24668818657257963, + "grad_norm": 1.1135412454605103, + "learning_rate": 8.818434652986655e-06, + "loss": 0.3266, + "step": 12323 + }, + { + "epoch": 0.24670820508970798, + "grad_norm": 0.995026707649231, + "learning_rate": 8.818225356703924e-06, + "loss": 0.2814, + "step": 12324 + }, + { + "epoch": 0.24672822360683633, + "grad_norm": 1.1426105499267578, + "learning_rate": 8.818016044370194e-06, + "loss": 0.3707, + "step": 12325 + }, + { + "epoch": 0.24674824212396468, + "grad_norm": 1.8720115423202515, + "learning_rate": 8.817806715986347e-06, + "loss": 0.8236, + "step": 12326 + }, + { + "epoch": 0.246768260641093, + "grad_norm": 1.253223180770874, + "learning_rate": 8.817597371553262e-06, + "loss": 0.3445, + "step": 12327 + }, + { + "epoch": 0.24678827915822135, + "grad_norm": 2.0306715965270996, + "learning_rate": 8.817388011071819e-06, + "loss": 0.8205, + "step": 12328 + }, + { + "epoch": 0.2468082976753497, + "grad_norm": 1.1683670282363892, + "learning_rate": 8.8171786345429e-06, + "loss": 0.3539, + "step": 12329 + }, + { + "epoch": 0.24682831619247805, + "grad_norm": 1.0705047845840454, + "learning_rate": 8.816969241967381e-06, + "loss": 0.3123, + "step": 12330 + }, + { + "epoch": 0.24684833470960638, + "grad_norm": 1.178600549697876, + "learning_rate": 8.816759833346146e-06, + "loss": 0.3108, + "step": 12331 + }, + { + "epoch": 0.24686835322673473, + "grad_norm": 1.133712887763977, + "learning_rate": 8.816550408680074e-06, + "loss": 0.329, + "step": 12332 + }, + { + "epoch": 0.24688837174386308, + "grad_norm": 1.0927772521972656, + "learning_rate": 8.816340967970045e-06, + "loss": 0.3136, + "step": 12333 + }, + { + "epoch": 0.24690839026099143, + "grad_norm": 1.0748449563980103, + "learning_rate": 8.81613151121694e-06, + "loss": 0.2979, + "step": 12334 + }, + { + "epoch": 0.24692840877811975, + "grad_norm": 1.0242416858673096, + "learning_rate": 8.81592203842164e-06, + "loss": 0.2874, + "step": 12335 + }, + { + "epoch": 0.2469484272952481, + "grad_norm": 1.1802762746810913, + "learning_rate": 8.815712549585024e-06, + "loss": 0.2939, + "step": 12336 + }, + { + "epoch": 0.24696844581237645, + "grad_norm": 1.1673085689544678, + "learning_rate": 8.815503044707975e-06, + "loss": 0.3043, + "step": 12337 + }, + { + "epoch": 0.2469884643295048, + "grad_norm": 1.135756015777588, + "learning_rate": 8.81529352379137e-06, + "loss": 0.3338, + "step": 12338 + }, + { + "epoch": 0.24700848284663313, + "grad_norm": 1.0323922634124756, + "learning_rate": 8.815083986836094e-06, + "loss": 0.3288, + "step": 12339 + }, + { + "epoch": 0.24702850136376148, + "grad_norm": 1.144331932067871, + "learning_rate": 8.814874433843026e-06, + "loss": 0.3565, + "step": 12340 + }, + { + "epoch": 0.24704851988088983, + "grad_norm": 1.829595685005188, + "learning_rate": 8.814664864813047e-06, + "loss": 0.8272, + "step": 12341 + }, + { + "epoch": 0.24706853839801818, + "grad_norm": 1.191626787185669, + "learning_rate": 8.814455279747036e-06, + "loss": 0.2914, + "step": 12342 + }, + { + "epoch": 0.2470885569151465, + "grad_norm": 1.4284707307815552, + "learning_rate": 8.814245678645878e-06, + "loss": 0.3523, + "step": 12343 + }, + { + "epoch": 0.24710857543227485, + "grad_norm": 1.122626781463623, + "learning_rate": 8.814036061510451e-06, + "loss": 0.3351, + "step": 12344 + }, + { + "epoch": 0.2471285939494032, + "grad_norm": 1.9534345865249634, + "learning_rate": 8.813826428341637e-06, + "loss": 0.8937, + "step": 12345 + }, + { + "epoch": 0.24714861246653155, + "grad_norm": 1.1104947328567505, + "learning_rate": 8.813616779140318e-06, + "loss": 0.3795, + "step": 12346 + }, + { + "epoch": 0.24716863098365988, + "grad_norm": 1.1914383172988892, + "learning_rate": 8.813407113907373e-06, + "loss": 0.2966, + "step": 12347 + }, + { + "epoch": 0.24718864950078823, + "grad_norm": 1.1799784898757935, + "learning_rate": 8.813197432643688e-06, + "loss": 0.3126, + "step": 12348 + }, + { + "epoch": 0.24720866801791658, + "grad_norm": 1.2500622272491455, + "learning_rate": 8.81298773535014e-06, + "loss": 0.3267, + "step": 12349 + }, + { + "epoch": 0.24722868653504493, + "grad_norm": 1.0491284132003784, + "learning_rate": 8.81277802202761e-06, + "loss": 0.2527, + "step": 12350 + }, + { + "epoch": 0.24724870505217325, + "grad_norm": 1.8393332958221436, + "learning_rate": 8.812568292676985e-06, + "loss": 0.8379, + "step": 12351 + }, + { + "epoch": 0.2472687235693016, + "grad_norm": 1.2353194952011108, + "learning_rate": 8.812358547299142e-06, + "loss": 0.3645, + "step": 12352 + }, + { + "epoch": 0.24728874208642995, + "grad_norm": 1.0077590942382812, + "learning_rate": 8.812148785894966e-06, + "loss": 0.3099, + "step": 12353 + }, + { + "epoch": 0.2473087606035583, + "grad_norm": 1.0232408046722412, + "learning_rate": 8.811939008465333e-06, + "loss": 0.2915, + "step": 12354 + }, + { + "epoch": 0.24732877912068663, + "grad_norm": 1.3045600652694702, + "learning_rate": 8.811729215011133e-06, + "loss": 0.3453, + "step": 12355 + }, + { + "epoch": 0.24734879763781498, + "grad_norm": 1.0880051851272583, + "learning_rate": 8.811519405533241e-06, + "loss": 0.2921, + "step": 12356 + }, + { + "epoch": 0.24736881615494333, + "grad_norm": 1.0360898971557617, + "learning_rate": 8.811309580032542e-06, + "loss": 0.3014, + "step": 12357 + }, + { + "epoch": 0.24738883467207168, + "grad_norm": 1.1453174352645874, + "learning_rate": 8.811099738509919e-06, + "loss": 0.3137, + "step": 12358 + }, + { + "epoch": 0.2474088531892, + "grad_norm": 1.1719872951507568, + "learning_rate": 8.810889880966251e-06, + "loss": 0.3087, + "step": 12359 + }, + { + "epoch": 0.24742887170632835, + "grad_norm": 1.021242618560791, + "learning_rate": 8.810680007402423e-06, + "loss": 0.303, + "step": 12360 + }, + { + "epoch": 0.2474488902234567, + "grad_norm": 1.0912386178970337, + "learning_rate": 8.810470117819316e-06, + "loss": 0.3564, + "step": 12361 + }, + { + "epoch": 0.24746890874058505, + "grad_norm": 1.2327598333358765, + "learning_rate": 8.810260212217813e-06, + "loss": 0.3322, + "step": 12362 + }, + { + "epoch": 0.24748892725771338, + "grad_norm": 1.2345328330993652, + "learning_rate": 8.810050290598797e-06, + "loss": 0.3627, + "step": 12363 + }, + { + "epoch": 0.24750894577484173, + "grad_norm": 1.836236596107483, + "learning_rate": 8.809840352963148e-06, + "loss": 0.8144, + "step": 12364 + }, + { + "epoch": 0.24752896429197008, + "grad_norm": 1.1260737180709839, + "learning_rate": 8.80963039931175e-06, + "loss": 0.3784, + "step": 12365 + }, + { + "epoch": 0.24754898280909843, + "grad_norm": 1.0034652948379517, + "learning_rate": 8.809420429645488e-06, + "loss": 0.2914, + "step": 12366 + }, + { + "epoch": 0.24756900132622675, + "grad_norm": 1.2671825885772705, + "learning_rate": 8.809210443965239e-06, + "loss": 0.3566, + "step": 12367 + }, + { + "epoch": 0.2475890198433551, + "grad_norm": 1.041676640510559, + "learning_rate": 8.80900044227189e-06, + "loss": 0.3098, + "step": 12368 + }, + { + "epoch": 0.24760903836048345, + "grad_norm": 1.158549189567566, + "learning_rate": 8.808790424566324e-06, + "loss": 0.3531, + "step": 12369 + }, + { + "epoch": 0.2476290568776118, + "grad_norm": 1.2140685319900513, + "learning_rate": 8.80858039084942e-06, + "loss": 0.3416, + "step": 12370 + }, + { + "epoch": 0.24764907539474013, + "grad_norm": 1.174128770828247, + "learning_rate": 8.808370341122069e-06, + "loss": 0.275, + "step": 12371 + }, + { + "epoch": 0.24766909391186848, + "grad_norm": 1.1854649782180786, + "learning_rate": 8.808160275385145e-06, + "loss": 0.3389, + "step": 12372 + }, + { + "epoch": 0.24768911242899683, + "grad_norm": 1.2624876499176025, + "learning_rate": 8.807950193639536e-06, + "loss": 0.3273, + "step": 12373 + }, + { + "epoch": 0.24770913094612518, + "grad_norm": 1.1098530292510986, + "learning_rate": 8.807740095886122e-06, + "loss": 0.2993, + "step": 12374 + }, + { + "epoch": 0.2477291494632535, + "grad_norm": 1.1156585216522217, + "learning_rate": 8.807529982125789e-06, + "loss": 0.3003, + "step": 12375 + }, + { + "epoch": 0.24774916798038185, + "grad_norm": 1.198922872543335, + "learning_rate": 8.807319852359419e-06, + "loss": 0.3286, + "step": 12376 + }, + { + "epoch": 0.2477691864975102, + "grad_norm": 1.1458321809768677, + "learning_rate": 8.807109706587897e-06, + "loss": 0.3308, + "step": 12377 + }, + { + "epoch": 0.24778920501463855, + "grad_norm": 1.1921054124832153, + "learning_rate": 8.806899544812104e-06, + "loss": 0.3271, + "step": 12378 + }, + { + "epoch": 0.24780922353176688, + "grad_norm": 1.0751917362213135, + "learning_rate": 8.806689367032925e-06, + "loss": 0.2995, + "step": 12379 + }, + { + "epoch": 0.24782924204889523, + "grad_norm": 1.0506972074508667, + "learning_rate": 8.806479173251242e-06, + "loss": 0.2799, + "step": 12380 + }, + { + "epoch": 0.24784926056602358, + "grad_norm": 1.9385489225387573, + "learning_rate": 8.806268963467941e-06, + "loss": 0.8949, + "step": 12381 + }, + { + "epoch": 0.24786927908315193, + "grad_norm": 1.1391011476516724, + "learning_rate": 8.806058737683905e-06, + "loss": 0.3213, + "step": 12382 + }, + { + "epoch": 0.24788929760028025, + "grad_norm": 1.1563206911087036, + "learning_rate": 8.805848495900016e-06, + "loss": 0.3024, + "step": 12383 + }, + { + "epoch": 0.2479093161174086, + "grad_norm": 1.157623529434204, + "learning_rate": 8.805638238117158e-06, + "loss": 0.2662, + "step": 12384 + }, + { + "epoch": 0.24792933463453695, + "grad_norm": 1.0074563026428223, + "learning_rate": 8.805427964336217e-06, + "loss": 0.3158, + "step": 12385 + }, + { + "epoch": 0.2479493531516653, + "grad_norm": 1.1132268905639648, + "learning_rate": 8.805217674558076e-06, + "loss": 0.3007, + "step": 12386 + }, + { + "epoch": 0.24796937166879363, + "grad_norm": 1.2066264152526855, + "learning_rate": 8.805007368783617e-06, + "loss": 0.3351, + "step": 12387 + }, + { + "epoch": 0.24798939018592198, + "grad_norm": 1.0643197298049927, + "learning_rate": 8.80479704701373e-06, + "loss": 0.3481, + "step": 12388 + }, + { + "epoch": 0.24800940870305033, + "grad_norm": 1.265409231185913, + "learning_rate": 8.804586709249291e-06, + "loss": 0.344, + "step": 12389 + }, + { + "epoch": 0.24802942722017868, + "grad_norm": 1.0711863040924072, + "learning_rate": 8.804376355491189e-06, + "loss": 0.3287, + "step": 12390 + }, + { + "epoch": 0.248049445737307, + "grad_norm": 1.0433886051177979, + "learning_rate": 8.80416598574031e-06, + "loss": 0.3042, + "step": 12391 + }, + { + "epoch": 0.24806946425443535, + "grad_norm": 1.0868487358093262, + "learning_rate": 8.803955599997534e-06, + "loss": 0.3612, + "step": 12392 + }, + { + "epoch": 0.2480894827715637, + "grad_norm": 1.1498123407363892, + "learning_rate": 8.803745198263747e-06, + "loss": 0.3388, + "step": 12393 + }, + { + "epoch": 0.24810950128869205, + "grad_norm": 1.1829978227615356, + "learning_rate": 8.803534780539834e-06, + "loss": 0.36, + "step": 12394 + }, + { + "epoch": 0.24812951980582038, + "grad_norm": 1.3233814239501953, + "learning_rate": 8.803324346826678e-06, + "loss": 0.3396, + "step": 12395 + }, + { + "epoch": 0.24814953832294873, + "grad_norm": 1.6970009803771973, + "learning_rate": 8.803113897125167e-06, + "loss": 0.8207, + "step": 12396 + }, + { + "epoch": 0.24816955684007708, + "grad_norm": 1.0820673704147339, + "learning_rate": 8.802903431436182e-06, + "loss": 0.3051, + "step": 12397 + }, + { + "epoch": 0.24818957535720543, + "grad_norm": 1.1197348833084106, + "learning_rate": 8.802692949760611e-06, + "loss": 0.3277, + "step": 12398 + }, + { + "epoch": 0.24820959387433375, + "grad_norm": 1.0946868658065796, + "learning_rate": 8.802482452099336e-06, + "loss": 0.2834, + "step": 12399 + }, + { + "epoch": 0.2482296123914621, + "grad_norm": 1.1433449983596802, + "learning_rate": 8.802271938453245e-06, + "loss": 0.2944, + "step": 12400 + }, + { + "epoch": 0.24824963090859045, + "grad_norm": 1.0269055366516113, + "learning_rate": 8.80206140882322e-06, + "loss": 0.3312, + "step": 12401 + }, + { + "epoch": 0.2482696494257188, + "grad_norm": 0.9716321229934692, + "learning_rate": 8.801850863210144e-06, + "loss": 0.3281, + "step": 12402 + }, + { + "epoch": 0.24828966794284713, + "grad_norm": 1.9504553079605103, + "learning_rate": 8.801640301614909e-06, + "loss": 0.7938, + "step": 12403 + }, + { + "epoch": 0.24830968645997548, + "grad_norm": 1.054892659187317, + "learning_rate": 8.801429724038394e-06, + "loss": 0.2756, + "step": 12404 + }, + { + "epoch": 0.24832970497710383, + "grad_norm": 1.1098710298538208, + "learning_rate": 8.801219130481489e-06, + "loss": 0.3319, + "step": 12405 + }, + { + "epoch": 0.24834972349423218, + "grad_norm": 1.0191636085510254, + "learning_rate": 8.801008520945076e-06, + "loss": 0.352, + "step": 12406 + }, + { + "epoch": 0.2483697420113605, + "grad_norm": 1.050573706626892, + "learning_rate": 8.80079789543004e-06, + "loss": 0.3075, + "step": 12407 + }, + { + "epoch": 0.24838976052848885, + "grad_norm": 1.6957615613937378, + "learning_rate": 8.800587253937267e-06, + "loss": 0.8107, + "step": 12408 + }, + { + "epoch": 0.2484097790456172, + "grad_norm": 1.0881729125976562, + "learning_rate": 8.800376596467644e-06, + "loss": 0.3036, + "step": 12409 + }, + { + "epoch": 0.24842979756274555, + "grad_norm": 1.0346543788909912, + "learning_rate": 8.800165923022057e-06, + "loss": 0.2989, + "step": 12410 + }, + { + "epoch": 0.24844981607987388, + "grad_norm": 1.116837501525879, + "learning_rate": 8.79995523360139e-06, + "loss": 0.3293, + "step": 12411 + }, + { + "epoch": 0.24846983459700223, + "grad_norm": 1.0839874744415283, + "learning_rate": 8.799744528206526e-06, + "loss": 0.324, + "step": 12412 + }, + { + "epoch": 0.24848985311413058, + "grad_norm": 1.079092264175415, + "learning_rate": 8.799533806838357e-06, + "loss": 0.3466, + "step": 12413 + }, + { + "epoch": 0.24850987163125893, + "grad_norm": 1.166017770767212, + "learning_rate": 8.799323069497764e-06, + "loss": 0.3701, + "step": 12414 + }, + { + "epoch": 0.24852989014838725, + "grad_norm": 1.1152186393737793, + "learning_rate": 8.799112316185634e-06, + "loss": 0.3367, + "step": 12415 + }, + { + "epoch": 0.2485499086655156, + "grad_norm": 1.1971065998077393, + "learning_rate": 8.798901546902854e-06, + "loss": 0.3893, + "step": 12416 + }, + { + "epoch": 0.24856992718264395, + "grad_norm": 1.133281946182251, + "learning_rate": 8.79869076165031e-06, + "loss": 0.3519, + "step": 12417 + }, + { + "epoch": 0.2485899456997723, + "grad_norm": 1.0878969430923462, + "learning_rate": 8.798479960428887e-06, + "loss": 0.3916, + "step": 12418 + }, + { + "epoch": 0.24860996421690063, + "grad_norm": 1.2492306232452393, + "learning_rate": 8.798269143239474e-06, + "loss": 0.2702, + "step": 12419 + }, + { + "epoch": 0.24862998273402898, + "grad_norm": 1.136456847190857, + "learning_rate": 8.798058310082951e-06, + "loss": 0.3926, + "step": 12420 + }, + { + "epoch": 0.24865000125115733, + "grad_norm": 1.2509859800338745, + "learning_rate": 8.79784746096021e-06, + "loss": 0.385, + "step": 12421 + }, + { + "epoch": 0.24867001976828568, + "grad_norm": 1.066239356994629, + "learning_rate": 8.797636595872136e-06, + "loss": 0.2889, + "step": 12422 + }, + { + "epoch": 0.248690038285414, + "grad_norm": 1.1660654544830322, + "learning_rate": 8.797425714819613e-06, + "loss": 0.3526, + "step": 12423 + }, + { + "epoch": 0.24871005680254235, + "grad_norm": 1.1315579414367676, + "learning_rate": 8.797214817803532e-06, + "loss": 0.3394, + "step": 12424 + }, + { + "epoch": 0.2487300753196707, + "grad_norm": 1.0979794263839722, + "learning_rate": 8.797003904824778e-06, + "loss": 0.3095, + "step": 12425 + }, + { + "epoch": 0.24875009383679905, + "grad_norm": 1.063501238822937, + "learning_rate": 8.796792975884233e-06, + "loss": 0.2789, + "step": 12426 + }, + { + "epoch": 0.24877011235392738, + "grad_norm": 1.1864581108093262, + "learning_rate": 8.79658203098279e-06, + "loss": 0.3299, + "step": 12427 + }, + { + "epoch": 0.24879013087105573, + "grad_norm": 1.1555726528167725, + "learning_rate": 8.796371070121333e-06, + "loss": 0.3322, + "step": 12428 + }, + { + "epoch": 0.24881014938818408, + "grad_norm": 1.0596574544906616, + "learning_rate": 8.796160093300748e-06, + "loss": 0.3181, + "step": 12429 + }, + { + "epoch": 0.24883016790531243, + "grad_norm": 1.1199923753738403, + "learning_rate": 8.795949100521922e-06, + "loss": 0.3173, + "step": 12430 + }, + { + "epoch": 0.24885018642244075, + "grad_norm": 1.8673040866851807, + "learning_rate": 8.795738091785744e-06, + "loss": 0.7678, + "step": 12431 + }, + { + "epoch": 0.2488702049395691, + "grad_norm": 1.0765029191970825, + "learning_rate": 8.7955270670931e-06, + "loss": 0.3214, + "step": 12432 + }, + { + "epoch": 0.24889022345669745, + "grad_norm": 1.1134217977523804, + "learning_rate": 8.795316026444878e-06, + "loss": 0.3341, + "step": 12433 + }, + { + "epoch": 0.2489102419738258, + "grad_norm": 1.0683954954147339, + "learning_rate": 8.795104969841963e-06, + "loss": 0.3056, + "step": 12434 + }, + { + "epoch": 0.24893026049095412, + "grad_norm": 1.1149147748947144, + "learning_rate": 8.794893897285244e-06, + "loss": 0.3223, + "step": 12435 + }, + { + "epoch": 0.24895027900808248, + "grad_norm": 1.1402121782302856, + "learning_rate": 8.794682808775608e-06, + "loss": 0.2916, + "step": 12436 + }, + { + "epoch": 0.24897029752521083, + "grad_norm": 1.0692601203918457, + "learning_rate": 8.794471704313941e-06, + "loss": 0.2969, + "step": 12437 + }, + { + "epoch": 0.24899031604233918, + "grad_norm": 1.0454086065292358, + "learning_rate": 8.794260583901133e-06, + "loss": 0.3295, + "step": 12438 + }, + { + "epoch": 0.2490103345594675, + "grad_norm": 1.0458582639694214, + "learning_rate": 8.794049447538069e-06, + "loss": 0.3341, + "step": 12439 + }, + { + "epoch": 0.24903035307659585, + "grad_norm": 1.8736515045166016, + "learning_rate": 8.793838295225637e-06, + "loss": 0.8244, + "step": 12440 + }, + { + "epoch": 0.2490503715937242, + "grad_norm": 1.822278618812561, + "learning_rate": 8.793627126964725e-06, + "loss": 0.8591, + "step": 12441 + }, + { + "epoch": 0.24907039011085255, + "grad_norm": 1.9039299488067627, + "learning_rate": 8.793415942756223e-06, + "loss": 0.7861, + "step": 12442 + }, + { + "epoch": 0.24909040862798087, + "grad_norm": 1.098771333694458, + "learning_rate": 8.793204742601015e-06, + "loss": 0.3336, + "step": 12443 + }, + { + "epoch": 0.24911042714510923, + "grad_norm": 1.0655869245529175, + "learning_rate": 8.79299352649999e-06, + "loss": 0.3605, + "step": 12444 + }, + { + "epoch": 0.24913044566223758, + "grad_norm": 1.215250849723816, + "learning_rate": 8.79278229445404e-06, + "loss": 0.2941, + "step": 12445 + }, + { + "epoch": 0.24915046417936593, + "grad_norm": 1.2195864915847778, + "learning_rate": 8.792571046464047e-06, + "loss": 0.3188, + "step": 12446 + }, + { + "epoch": 0.24917048269649425, + "grad_norm": 1.187050461769104, + "learning_rate": 8.7923597825309e-06, + "loss": 0.3667, + "step": 12447 + }, + { + "epoch": 0.2491905012136226, + "grad_norm": 1.132363200187683, + "learning_rate": 8.792148502655492e-06, + "loss": 0.3353, + "step": 12448 + }, + { + "epoch": 0.24921051973075095, + "grad_norm": 1.1198638677597046, + "learning_rate": 8.791937206838706e-06, + "loss": 0.3132, + "step": 12449 + }, + { + "epoch": 0.2492305382478793, + "grad_norm": 1.078134536743164, + "learning_rate": 8.79172589508143e-06, + "loss": 0.3099, + "step": 12450 + }, + { + "epoch": 0.24925055676500762, + "grad_norm": 1.075893521308899, + "learning_rate": 8.791514567384558e-06, + "loss": 0.3629, + "step": 12451 + }, + { + "epoch": 0.24927057528213598, + "grad_norm": 1.055891752243042, + "learning_rate": 8.791303223748972e-06, + "loss": 0.2923, + "step": 12452 + }, + { + "epoch": 0.24929059379926433, + "grad_norm": 1.071901559829712, + "learning_rate": 8.791091864175564e-06, + "loss": 0.3478, + "step": 12453 + }, + { + "epoch": 0.24931061231639268, + "grad_norm": 1.0493190288543701, + "learning_rate": 8.790880488665223e-06, + "loss": 0.3121, + "step": 12454 + }, + { + "epoch": 0.249330630833521, + "grad_norm": 1.7961502075195312, + "learning_rate": 8.790669097218836e-06, + "loss": 0.8576, + "step": 12455 + }, + { + "epoch": 0.24935064935064935, + "grad_norm": 1.076765775680542, + "learning_rate": 8.79045768983729e-06, + "loss": 0.3482, + "step": 12456 + }, + { + "epoch": 0.2493706678677777, + "grad_norm": 1.1641674041748047, + "learning_rate": 8.790246266521477e-06, + "loss": 0.3424, + "step": 12457 + }, + { + "epoch": 0.24939068638490605, + "grad_norm": 1.0068377256393433, + "learning_rate": 8.790034827272284e-06, + "loss": 0.2878, + "step": 12458 + }, + { + "epoch": 0.24941070490203437, + "grad_norm": 1.0418152809143066, + "learning_rate": 8.789823372090601e-06, + "loss": 0.3442, + "step": 12459 + }, + { + "epoch": 0.24943072341916273, + "grad_norm": 1.8157049417495728, + "learning_rate": 8.789611900977314e-06, + "loss": 0.8658, + "step": 12460 + }, + { + "epoch": 0.24945074193629108, + "grad_norm": 1.9144541025161743, + "learning_rate": 8.789400413933315e-06, + "loss": 0.862, + "step": 12461 + }, + { + "epoch": 0.24947076045341943, + "grad_norm": 1.2859878540039062, + "learning_rate": 8.789188910959494e-06, + "loss": 0.3294, + "step": 12462 + }, + { + "epoch": 0.24949077897054775, + "grad_norm": 1.1055513620376587, + "learning_rate": 8.788977392056735e-06, + "loss": 0.3321, + "step": 12463 + }, + { + "epoch": 0.2495107974876761, + "grad_norm": 1.6648001670837402, + "learning_rate": 8.788765857225932e-06, + "loss": 0.8353, + "step": 12464 + }, + { + "epoch": 0.24953081600480445, + "grad_norm": 1.1272931098937988, + "learning_rate": 8.788554306467974e-06, + "loss": 0.3334, + "step": 12465 + }, + { + "epoch": 0.2495508345219328, + "grad_norm": 1.0873563289642334, + "learning_rate": 8.788342739783748e-06, + "loss": 0.335, + "step": 12466 + }, + { + "epoch": 0.24957085303906112, + "grad_norm": 1.1452226638793945, + "learning_rate": 8.788131157174144e-06, + "loss": 0.3047, + "step": 12467 + }, + { + "epoch": 0.24959087155618948, + "grad_norm": 1.1106441020965576, + "learning_rate": 8.787919558640053e-06, + "loss": 0.358, + "step": 12468 + }, + { + "epoch": 0.24961089007331783, + "grad_norm": 1.1040061712265015, + "learning_rate": 8.787707944182363e-06, + "loss": 0.3311, + "step": 12469 + }, + { + "epoch": 0.24963090859044615, + "grad_norm": 1.1136162281036377, + "learning_rate": 8.787496313801962e-06, + "loss": 0.3206, + "step": 12470 + }, + { + "epoch": 0.2496509271075745, + "grad_norm": 1.1010074615478516, + "learning_rate": 8.787284667499743e-06, + "loss": 0.3575, + "step": 12471 + }, + { + "epoch": 0.24967094562470285, + "grad_norm": 1.0933022499084473, + "learning_rate": 8.787073005276594e-06, + "loss": 0.2911, + "step": 12472 + }, + { + "epoch": 0.2496909641418312, + "grad_norm": 1.1612727642059326, + "learning_rate": 8.786861327133407e-06, + "loss": 0.3262, + "step": 12473 + }, + { + "epoch": 0.24971098265895952, + "grad_norm": 1.033752679824829, + "learning_rate": 8.786649633071067e-06, + "loss": 0.3205, + "step": 12474 + }, + { + "epoch": 0.24973100117608787, + "grad_norm": 1.0084530115127563, + "learning_rate": 8.786437923090467e-06, + "loss": 0.3286, + "step": 12475 + }, + { + "epoch": 0.24975101969321623, + "grad_norm": 1.1002469062805176, + "learning_rate": 8.786226197192499e-06, + "loss": 0.3181, + "step": 12476 + }, + { + "epoch": 0.24977103821034458, + "grad_norm": 1.0943125486373901, + "learning_rate": 8.786014455378048e-06, + "loss": 0.2739, + "step": 12477 + }, + { + "epoch": 0.2497910567274729, + "grad_norm": 1.210038185119629, + "learning_rate": 8.785802697648011e-06, + "loss": 0.3107, + "step": 12478 + }, + { + "epoch": 0.24981107524460125, + "grad_norm": 1.1943448781967163, + "learning_rate": 8.78559092400327e-06, + "loss": 0.3198, + "step": 12479 + }, + { + "epoch": 0.2498310937617296, + "grad_norm": 1.0836254358291626, + "learning_rate": 8.785379134444723e-06, + "loss": 0.3137, + "step": 12480 + }, + { + "epoch": 0.24985111227885795, + "grad_norm": 1.1521000862121582, + "learning_rate": 8.785167328973255e-06, + "loss": 0.3971, + "step": 12481 + }, + { + "epoch": 0.24987113079598627, + "grad_norm": 1.301863193511963, + "learning_rate": 8.784955507589759e-06, + "loss": 0.2796, + "step": 12482 + }, + { + "epoch": 0.24989114931311462, + "grad_norm": 0.9972172379493713, + "learning_rate": 8.784743670295123e-06, + "loss": 0.3009, + "step": 12483 + }, + { + "epoch": 0.24991116783024298, + "grad_norm": 1.066738247871399, + "learning_rate": 8.784531817090241e-06, + "loss": 0.3305, + "step": 12484 + }, + { + "epoch": 0.24993118634737133, + "grad_norm": 1.1565077304840088, + "learning_rate": 8.784319947975998e-06, + "loss": 0.3574, + "step": 12485 + }, + { + "epoch": 0.24995120486449965, + "grad_norm": 0.9572039842605591, + "learning_rate": 8.78410806295329e-06, + "loss": 0.2981, + "step": 12486 + }, + { + "epoch": 0.249971223381628, + "grad_norm": 1.0164690017700195, + "learning_rate": 8.78389616202301e-06, + "loss": 0.3022, + "step": 12487 + }, + { + "epoch": 0.24999124189875635, + "grad_norm": 1.321899652481079, + "learning_rate": 8.783684245186041e-06, + "loss": 0.3471, + "step": 12488 + }, + { + "epoch": 0.2500112604158847, + "grad_norm": 1.0528373718261719, + "learning_rate": 8.783472312443278e-06, + "loss": 0.3179, + "step": 12489 + }, + { + "epoch": 0.250031278933013, + "grad_norm": 1.2425212860107422, + "learning_rate": 8.783260363795614e-06, + "loss": 0.3511, + "step": 12490 + }, + { + "epoch": 0.2500512974501414, + "grad_norm": 1.005876064300537, + "learning_rate": 8.783048399243935e-06, + "loss": 0.3206, + "step": 12491 + }, + { + "epoch": 0.2500713159672697, + "grad_norm": 1.2132350206375122, + "learning_rate": 8.782836418789135e-06, + "loss": 0.2972, + "step": 12492 + }, + { + "epoch": 0.2500913344843981, + "grad_norm": 1.767160415649414, + "learning_rate": 8.782624422432104e-06, + "loss": 0.801, + "step": 12493 + }, + { + "epoch": 0.2501113530015264, + "grad_norm": 1.9287852048873901, + "learning_rate": 8.782412410173737e-06, + "loss": 0.7924, + "step": 12494 + }, + { + "epoch": 0.2501313715186548, + "grad_norm": 1.1353806257247925, + "learning_rate": 8.782200382014919e-06, + "loss": 0.3217, + "step": 12495 + }, + { + "epoch": 0.25015139003578307, + "grad_norm": 1.197640061378479, + "learning_rate": 8.781988337956546e-06, + "loss": 0.3351, + "step": 12496 + }, + { + "epoch": 0.2501714085529114, + "grad_norm": 1.1050174236297607, + "learning_rate": 8.781776277999509e-06, + "loss": 0.3162, + "step": 12497 + }, + { + "epoch": 0.2501914270700398, + "grad_norm": 1.8060054779052734, + "learning_rate": 8.781564202144696e-06, + "loss": 0.8231, + "step": 12498 + }, + { + "epoch": 0.2502114455871681, + "grad_norm": 1.1025792360305786, + "learning_rate": 8.781352110393002e-06, + "loss": 0.2973, + "step": 12499 + }, + { + "epoch": 0.2502314641042965, + "grad_norm": 1.9244052171707153, + "learning_rate": 8.78114000274532e-06, + "loss": 0.8313, + "step": 12500 + }, + { + "epoch": 0.2502514826214248, + "grad_norm": 1.9352489709854126, + "learning_rate": 8.780927879202535e-06, + "loss": 0.7828, + "step": 12501 + }, + { + "epoch": 0.2502715011385532, + "grad_norm": 1.0331979990005493, + "learning_rate": 8.780715739765543e-06, + "loss": 0.2716, + "step": 12502 + }, + { + "epoch": 0.2502915196556815, + "grad_norm": 1.0699973106384277, + "learning_rate": 8.780503584435239e-06, + "loss": 0.3241, + "step": 12503 + }, + { + "epoch": 0.2503115381728098, + "grad_norm": 1.1193124055862427, + "learning_rate": 8.78029141321251e-06, + "loss": 0.2982, + "step": 12504 + }, + { + "epoch": 0.2503315566899382, + "grad_norm": 1.163817286491394, + "learning_rate": 8.780079226098249e-06, + "loss": 0.3201, + "step": 12505 + }, + { + "epoch": 0.2503515752070665, + "grad_norm": 1.045409083366394, + "learning_rate": 8.779867023093347e-06, + "loss": 0.3018, + "step": 12506 + }, + { + "epoch": 0.2503715937241949, + "grad_norm": 1.1330373287200928, + "learning_rate": 8.7796548041987e-06, + "loss": 0.3006, + "step": 12507 + }, + { + "epoch": 0.2503916122413232, + "grad_norm": 0.9857560396194458, + "learning_rate": 8.779442569415196e-06, + "loss": 0.322, + "step": 12508 + }, + { + "epoch": 0.2504116307584516, + "grad_norm": 1.037584900856018, + "learning_rate": 8.77923031874373e-06, + "loss": 0.3001, + "step": 12509 + }, + { + "epoch": 0.2504316492755799, + "grad_norm": 1.2827246189117432, + "learning_rate": 8.779018052185193e-06, + "loss": 0.2981, + "step": 12510 + }, + { + "epoch": 0.2504516677927083, + "grad_norm": 1.191698670387268, + "learning_rate": 8.778805769740474e-06, + "loss": 0.2986, + "step": 12511 + }, + { + "epoch": 0.25047168630983657, + "grad_norm": 1.0639501810073853, + "learning_rate": 8.778593471410473e-06, + "loss": 0.3362, + "step": 12512 + }, + { + "epoch": 0.2504917048269649, + "grad_norm": 1.0614426136016846, + "learning_rate": 8.778381157196076e-06, + "loss": 0.3069, + "step": 12513 + }, + { + "epoch": 0.2505117233440933, + "grad_norm": 1.2593474388122559, + "learning_rate": 8.778168827098179e-06, + "loss": 0.3245, + "step": 12514 + }, + { + "epoch": 0.2505317418612216, + "grad_norm": 1.1036221981048584, + "learning_rate": 8.777956481117673e-06, + "loss": 0.3083, + "step": 12515 + }, + { + "epoch": 0.25055176037835, + "grad_norm": 1.0506525039672852, + "learning_rate": 8.777744119255452e-06, + "loss": 0.3174, + "step": 12516 + }, + { + "epoch": 0.2505717788954783, + "grad_norm": 1.1935895681381226, + "learning_rate": 8.777531741512405e-06, + "loss": 0.3535, + "step": 12517 + }, + { + "epoch": 0.2505917974126067, + "grad_norm": 1.163587212562561, + "learning_rate": 8.777319347889428e-06, + "loss": 0.3119, + "step": 12518 + }, + { + "epoch": 0.250611815929735, + "grad_norm": 1.0659762620925903, + "learning_rate": 8.777106938387417e-06, + "loss": 0.3255, + "step": 12519 + }, + { + "epoch": 0.2506318344468633, + "grad_norm": 1.019968867301941, + "learning_rate": 8.776894513007257e-06, + "loss": 0.3042, + "step": 12520 + }, + { + "epoch": 0.2506518529639917, + "grad_norm": 1.9509352445602417, + "learning_rate": 8.776682071749846e-06, + "loss": 0.8584, + "step": 12521 + }, + { + "epoch": 0.25067187148112, + "grad_norm": 1.1968191862106323, + "learning_rate": 8.776469614616078e-06, + "loss": 0.388, + "step": 12522 + }, + { + "epoch": 0.2506918899982484, + "grad_norm": 1.2267720699310303, + "learning_rate": 8.776257141606843e-06, + "loss": 0.3084, + "step": 12523 + }, + { + "epoch": 0.2507119085153767, + "grad_norm": 1.1558120250701904, + "learning_rate": 8.776044652723038e-06, + "loss": 0.3219, + "step": 12524 + }, + { + "epoch": 0.2507319270325051, + "grad_norm": 1.3763080835342407, + "learning_rate": 8.77583214796555e-06, + "loss": 0.3865, + "step": 12525 + }, + { + "epoch": 0.2507519455496334, + "grad_norm": 1.2009906768798828, + "learning_rate": 8.775619627335279e-06, + "loss": 0.3525, + "step": 12526 + }, + { + "epoch": 0.2507719640667618, + "grad_norm": 1.029126763343811, + "learning_rate": 8.775407090833114e-06, + "loss": 0.3647, + "step": 12527 + }, + { + "epoch": 0.25079198258389007, + "grad_norm": 1.2184321880340576, + "learning_rate": 8.775194538459953e-06, + "loss": 0.352, + "step": 12528 + }, + { + "epoch": 0.2508120011010184, + "grad_norm": 1.225319743156433, + "learning_rate": 8.774981970216686e-06, + "loss": 0.3684, + "step": 12529 + }, + { + "epoch": 0.2508320196181468, + "grad_norm": 1.2559951543807983, + "learning_rate": 8.774769386104204e-06, + "loss": 0.3241, + "step": 12530 + }, + { + "epoch": 0.2508520381352751, + "grad_norm": 1.10614013671875, + "learning_rate": 8.774556786123405e-06, + "loss": 0.3259, + "step": 12531 + }, + { + "epoch": 0.2508720566524035, + "grad_norm": 1.1351791620254517, + "learning_rate": 8.774344170275183e-06, + "loss": 0.3404, + "step": 12532 + }, + { + "epoch": 0.2508920751695318, + "grad_norm": 1.1218904256820679, + "learning_rate": 8.774131538560429e-06, + "loss": 0.3379, + "step": 12533 + }, + { + "epoch": 0.2509120936866602, + "grad_norm": 1.0680482387542725, + "learning_rate": 8.773918890980039e-06, + "loss": 0.3548, + "step": 12534 + }, + { + "epoch": 0.2509321122037885, + "grad_norm": 1.191180944442749, + "learning_rate": 8.773706227534907e-06, + "loss": 0.339, + "step": 12535 + }, + { + "epoch": 0.2509521307209168, + "grad_norm": 1.0821186304092407, + "learning_rate": 8.773493548225925e-06, + "loss": 0.3326, + "step": 12536 + }, + { + "epoch": 0.2509721492380452, + "grad_norm": 1.8808887004852295, + "learning_rate": 8.773280853053988e-06, + "loss": 0.8092, + "step": 12537 + }, + { + "epoch": 0.2509921677551735, + "grad_norm": 1.0750597715377808, + "learning_rate": 8.77306814201999e-06, + "loss": 0.3246, + "step": 12538 + }, + { + "epoch": 0.2510121862723019, + "grad_norm": 0.9890678524971008, + "learning_rate": 8.772855415124826e-06, + "loss": 0.3188, + "step": 12539 + }, + { + "epoch": 0.2510322047894302, + "grad_norm": 1.1354115009307861, + "learning_rate": 8.77264267236939e-06, + "loss": 0.337, + "step": 12540 + }, + { + "epoch": 0.2510522233065586, + "grad_norm": 1.9874035120010376, + "learning_rate": 8.772429913754575e-06, + "loss": 0.8016, + "step": 12541 + }, + { + "epoch": 0.2510722418236869, + "grad_norm": 1.0638713836669922, + "learning_rate": 8.772217139281279e-06, + "loss": 0.3598, + "step": 12542 + }, + { + "epoch": 0.2510922603408153, + "grad_norm": 1.045015811920166, + "learning_rate": 8.77200434895039e-06, + "loss": 0.2789, + "step": 12543 + }, + { + "epoch": 0.25111227885794357, + "grad_norm": 1.099678635597229, + "learning_rate": 8.77179154276281e-06, + "loss": 0.3186, + "step": 12544 + }, + { + "epoch": 0.2511322973750719, + "grad_norm": 1.8936690092086792, + "learning_rate": 8.771578720719429e-06, + "loss": 0.8953, + "step": 12545 + }, + { + "epoch": 0.2511523158922003, + "grad_norm": 1.0301544666290283, + "learning_rate": 8.771365882821143e-06, + "loss": 0.3309, + "step": 12546 + }, + { + "epoch": 0.2511723344093286, + "grad_norm": 1.082108497619629, + "learning_rate": 8.771153029068846e-06, + "loss": 0.3432, + "step": 12547 + }, + { + "epoch": 0.251192352926457, + "grad_norm": 1.1912933588027954, + "learning_rate": 8.770940159463433e-06, + "loss": 0.3626, + "step": 12548 + }, + { + "epoch": 0.2512123714435853, + "grad_norm": 1.2177056074142456, + "learning_rate": 8.7707272740058e-06, + "loss": 0.355, + "step": 12549 + }, + { + "epoch": 0.2512323899607137, + "grad_norm": 1.1134005784988403, + "learning_rate": 8.770514372696838e-06, + "loss": 0.3305, + "step": 12550 + }, + { + "epoch": 0.251252408477842, + "grad_norm": 1.1188180446624756, + "learning_rate": 8.770301455537449e-06, + "loss": 0.3418, + "step": 12551 + }, + { + "epoch": 0.2512724269949703, + "grad_norm": 0.9869982004165649, + "learning_rate": 8.770088522528521e-06, + "loss": 0.3045, + "step": 12552 + }, + { + "epoch": 0.25129244551209867, + "grad_norm": 1.862847089767456, + "learning_rate": 8.769875573670953e-06, + "loss": 0.7869, + "step": 12553 + }, + { + "epoch": 0.251312464029227, + "grad_norm": 1.1027318239212036, + "learning_rate": 8.769662608965641e-06, + "loss": 0.329, + "step": 12554 + }, + { + "epoch": 0.2513324825463554, + "grad_norm": 1.060997486114502, + "learning_rate": 8.769449628413478e-06, + "loss": 0.2796, + "step": 12555 + }, + { + "epoch": 0.2513525010634837, + "grad_norm": 1.0592550039291382, + "learning_rate": 8.769236632015359e-06, + "loss": 0.2882, + "step": 12556 + }, + { + "epoch": 0.2513725195806121, + "grad_norm": 1.0877902507781982, + "learning_rate": 8.769023619772181e-06, + "loss": 0.3194, + "step": 12557 + }, + { + "epoch": 0.2513925380977404, + "grad_norm": 1.0989959239959717, + "learning_rate": 8.768810591684839e-06, + "loss": 0.3432, + "step": 12558 + }, + { + "epoch": 0.2514125566148688, + "grad_norm": 1.012192726135254, + "learning_rate": 8.768597547754228e-06, + "loss": 0.3368, + "step": 12559 + }, + { + "epoch": 0.25143257513199707, + "grad_norm": 1.1077227592468262, + "learning_rate": 8.768384487981244e-06, + "loss": 0.3272, + "step": 12560 + }, + { + "epoch": 0.2514525936491254, + "grad_norm": 1.0903282165527344, + "learning_rate": 8.768171412366783e-06, + "loss": 0.3752, + "step": 12561 + }, + { + "epoch": 0.2514726121662538, + "grad_norm": 1.0987114906311035, + "learning_rate": 8.767958320911739e-06, + "loss": 0.3507, + "step": 12562 + }, + { + "epoch": 0.2514926306833821, + "grad_norm": 1.7841185331344604, + "learning_rate": 8.767745213617009e-06, + "loss": 0.8256, + "step": 12563 + }, + { + "epoch": 0.2515126492005105, + "grad_norm": 1.1166871786117554, + "learning_rate": 8.767532090483491e-06, + "loss": 0.3233, + "step": 12564 + }, + { + "epoch": 0.2515326677176388, + "grad_norm": 1.057423710823059, + "learning_rate": 8.767318951512079e-06, + "loss": 0.2889, + "step": 12565 + }, + { + "epoch": 0.2515526862347672, + "grad_norm": 1.1419044733047485, + "learning_rate": 8.767105796703666e-06, + "loss": 0.3062, + "step": 12566 + }, + { + "epoch": 0.2515727047518955, + "grad_norm": 0.9500020146369934, + "learning_rate": 8.766892626059152e-06, + "loss": 0.2554, + "step": 12567 + }, + { + "epoch": 0.2515927232690238, + "grad_norm": 1.0818995237350464, + "learning_rate": 8.766679439579433e-06, + "loss": 0.3385, + "step": 12568 + }, + { + "epoch": 0.25161274178615217, + "grad_norm": 1.0888935327529907, + "learning_rate": 8.766466237265403e-06, + "loss": 0.3284, + "step": 12569 + }, + { + "epoch": 0.2516327603032805, + "grad_norm": 1.8965775966644287, + "learning_rate": 8.766253019117961e-06, + "loss": 0.8591, + "step": 12570 + }, + { + "epoch": 0.2516527788204089, + "grad_norm": 1.1676088571548462, + "learning_rate": 8.766039785138e-06, + "loss": 0.3266, + "step": 12571 + }, + { + "epoch": 0.2516727973375372, + "grad_norm": 1.129138708114624, + "learning_rate": 8.765826535326421e-06, + "loss": 0.3441, + "step": 12572 + }, + { + "epoch": 0.2516928158546656, + "grad_norm": 1.0435333251953125, + "learning_rate": 8.765613269684116e-06, + "loss": 0.31, + "step": 12573 + }, + { + "epoch": 0.2517128343717939, + "grad_norm": 2.084705114364624, + "learning_rate": 8.765399988211982e-06, + "loss": 0.3125, + "step": 12574 + }, + { + "epoch": 0.2517328528889223, + "grad_norm": 1.0901968479156494, + "learning_rate": 8.765186690910918e-06, + "loss": 0.2829, + "step": 12575 + }, + { + "epoch": 0.25175287140605057, + "grad_norm": 1.1532299518585205, + "learning_rate": 8.76497337778182e-06, + "loss": 0.3372, + "step": 12576 + }, + { + "epoch": 0.2517728899231789, + "grad_norm": 2.0082740783691406, + "learning_rate": 8.764760048825583e-06, + "loss": 0.8497, + "step": 12577 + }, + { + "epoch": 0.2517929084403073, + "grad_norm": 1.1061313152313232, + "learning_rate": 8.764546704043106e-06, + "loss": 0.3214, + "step": 12578 + }, + { + "epoch": 0.2518129269574356, + "grad_norm": 1.0877007246017456, + "learning_rate": 8.764333343435282e-06, + "loss": 0.336, + "step": 12579 + }, + { + "epoch": 0.251832945474564, + "grad_norm": 1.054513692855835, + "learning_rate": 8.764119967003016e-06, + "loss": 0.2995, + "step": 12580 + }, + { + "epoch": 0.2518529639916923, + "grad_norm": 1.259325385093689, + "learning_rate": 8.763906574747195e-06, + "loss": 0.3282, + "step": 12581 + }, + { + "epoch": 0.2518729825088207, + "grad_norm": 1.910713791847229, + "learning_rate": 8.763693166668723e-06, + "loss": 0.8414, + "step": 12582 + }, + { + "epoch": 0.251893001025949, + "grad_norm": 1.0407087802886963, + "learning_rate": 8.763479742768495e-06, + "loss": 0.3087, + "step": 12583 + }, + { + "epoch": 0.2519130195430773, + "grad_norm": 1.2265937328338623, + "learning_rate": 8.763266303047406e-06, + "loss": 0.3457, + "step": 12584 + }, + { + "epoch": 0.25193303806020567, + "grad_norm": 1.0243682861328125, + "learning_rate": 8.763052847506357e-06, + "loss": 0.3036, + "step": 12585 + }, + { + "epoch": 0.251953056577334, + "grad_norm": 1.0134886503219604, + "learning_rate": 8.762839376146244e-06, + "loss": 0.3172, + "step": 12586 + }, + { + "epoch": 0.2519730750944624, + "grad_norm": 1.0950323343276978, + "learning_rate": 8.762625888967963e-06, + "loss": 0.3719, + "step": 12587 + }, + { + "epoch": 0.2519930936115907, + "grad_norm": 1.071509599685669, + "learning_rate": 8.762412385972411e-06, + "loss": 0.3393, + "step": 12588 + }, + { + "epoch": 0.2520131121287191, + "grad_norm": 1.1983438730239868, + "learning_rate": 8.76219886716049e-06, + "loss": 0.3464, + "step": 12589 + }, + { + "epoch": 0.2520331306458474, + "grad_norm": 1.0492016077041626, + "learning_rate": 8.761985332533094e-06, + "loss": 0.3382, + "step": 12590 + }, + { + "epoch": 0.2520531491629758, + "grad_norm": 1.010087013244629, + "learning_rate": 8.761771782091121e-06, + "loss": 0.3233, + "step": 12591 + }, + { + "epoch": 0.25207316768010407, + "grad_norm": 1.5217615365982056, + "learning_rate": 8.761558215835468e-06, + "loss": 0.3543, + "step": 12592 + }, + { + "epoch": 0.2520931861972324, + "grad_norm": 1.226563811302185, + "learning_rate": 8.761344633767035e-06, + "loss": 0.297, + "step": 12593 + }, + { + "epoch": 0.2521132047143608, + "grad_norm": 1.0847445726394653, + "learning_rate": 8.761131035886717e-06, + "loss": 0.3074, + "step": 12594 + }, + { + "epoch": 0.2521332232314891, + "grad_norm": 1.0359288454055786, + "learning_rate": 8.760917422195415e-06, + "loss": 0.2624, + "step": 12595 + }, + { + "epoch": 0.2521532417486175, + "grad_norm": 1.10703706741333, + "learning_rate": 8.760703792694026e-06, + "loss": 0.3053, + "step": 12596 + }, + { + "epoch": 0.2521732602657458, + "grad_norm": 1.0739878416061401, + "learning_rate": 8.760490147383448e-06, + "loss": 0.297, + "step": 12597 + }, + { + "epoch": 0.2521932787828742, + "grad_norm": 1.7532691955566406, + "learning_rate": 8.760276486264575e-06, + "loss": 0.9181, + "step": 12598 + }, + { + "epoch": 0.2522132973000025, + "grad_norm": 1.2620460987091064, + "learning_rate": 8.760062809338313e-06, + "loss": 0.3263, + "step": 12599 + }, + { + "epoch": 0.2522333158171308, + "grad_norm": 1.094037652015686, + "learning_rate": 8.759849116605552e-06, + "loss": 0.3481, + "step": 12600 + }, + { + "epoch": 0.25225333433425917, + "grad_norm": 1.1176979541778564, + "learning_rate": 8.759635408067198e-06, + "loss": 0.3405, + "step": 12601 + }, + { + "epoch": 0.2522733528513875, + "grad_norm": 1.1801329851150513, + "learning_rate": 8.759421683724145e-06, + "loss": 0.3519, + "step": 12602 + }, + { + "epoch": 0.2522933713685159, + "grad_norm": 1.0377036333084106, + "learning_rate": 8.759207943577292e-06, + "loss": 0.3021, + "step": 12603 + }, + { + "epoch": 0.2523133898856442, + "grad_norm": 1.215828776359558, + "learning_rate": 8.758994187627536e-06, + "loss": 0.3367, + "step": 12604 + }, + { + "epoch": 0.2523334084027726, + "grad_norm": 1.1661036014556885, + "learning_rate": 8.758780415875781e-06, + "loss": 0.3356, + "step": 12605 + }, + { + "epoch": 0.2523534269199009, + "grad_norm": 1.84264075756073, + "learning_rate": 8.75856662832292e-06, + "loss": 0.8394, + "step": 12606 + }, + { + "epoch": 0.2523734454370293, + "grad_norm": 1.347976565361023, + "learning_rate": 8.758352824969853e-06, + "loss": 0.3397, + "step": 12607 + }, + { + "epoch": 0.25239346395415757, + "grad_norm": 1.2061958312988281, + "learning_rate": 8.75813900581748e-06, + "loss": 0.3082, + "step": 12608 + }, + { + "epoch": 0.2524134824712859, + "grad_norm": 1.078150749206543, + "learning_rate": 8.7579251708667e-06, + "loss": 0.3148, + "step": 12609 + }, + { + "epoch": 0.25243350098841427, + "grad_norm": 1.065792202949524, + "learning_rate": 8.757711320118412e-06, + "loss": 0.3, + "step": 12610 + }, + { + "epoch": 0.2524535195055426, + "grad_norm": 1.2276498079299927, + "learning_rate": 8.757497453573511e-06, + "loss": 0.3206, + "step": 12611 + }, + { + "epoch": 0.252473538022671, + "grad_norm": 1.124289870262146, + "learning_rate": 8.757283571232903e-06, + "loss": 0.3555, + "step": 12612 + }, + { + "epoch": 0.2524935565397993, + "grad_norm": 1.0867723226547241, + "learning_rate": 8.757069673097482e-06, + "loss": 0.3075, + "step": 12613 + }, + { + "epoch": 0.2525135750569277, + "grad_norm": 1.774786114692688, + "learning_rate": 8.756855759168149e-06, + "loss": 0.7839, + "step": 12614 + }, + { + "epoch": 0.252533593574056, + "grad_norm": 1.831817626953125, + "learning_rate": 8.756641829445803e-06, + "loss": 0.782, + "step": 12615 + }, + { + "epoch": 0.2525536120911843, + "grad_norm": 1.2921855449676514, + "learning_rate": 8.756427883931343e-06, + "loss": 0.3085, + "step": 12616 + }, + { + "epoch": 0.25257363060831267, + "grad_norm": 1.2208728790283203, + "learning_rate": 8.756213922625667e-06, + "loss": 0.3296, + "step": 12617 + }, + { + "epoch": 0.252593649125441, + "grad_norm": 1.0983744859695435, + "learning_rate": 8.755999945529678e-06, + "loss": 0.2775, + "step": 12618 + }, + { + "epoch": 0.2526136676425694, + "grad_norm": 1.2059708833694458, + "learning_rate": 8.755785952644274e-06, + "loss": 0.3919, + "step": 12619 + }, + { + "epoch": 0.2526336861596977, + "grad_norm": 1.005927562713623, + "learning_rate": 8.75557194397035e-06, + "loss": 0.3052, + "step": 12620 + }, + { + "epoch": 0.2526537046768261, + "grad_norm": 1.120572805404663, + "learning_rate": 8.755357919508813e-06, + "loss": 0.3243, + "step": 12621 + }, + { + "epoch": 0.2526737231939544, + "grad_norm": 0.9616006016731262, + "learning_rate": 8.755143879260562e-06, + "loss": 0.3395, + "step": 12622 + }, + { + "epoch": 0.2526937417110828, + "grad_norm": 1.0200767517089844, + "learning_rate": 8.75492982322649e-06, + "loss": 0.299, + "step": 12623 + }, + { + "epoch": 0.25271376022821107, + "grad_norm": 1.0609792470932007, + "learning_rate": 8.754715751407504e-06, + "loss": 0.293, + "step": 12624 + }, + { + "epoch": 0.2527337787453394, + "grad_norm": 1.2780191898345947, + "learning_rate": 8.754501663804498e-06, + "loss": 0.3201, + "step": 12625 + }, + { + "epoch": 0.25275379726246777, + "grad_norm": 1.9990102052688599, + "learning_rate": 8.754287560418377e-06, + "loss": 0.8541, + "step": 12626 + }, + { + "epoch": 0.2527738157795961, + "grad_norm": 1.0691273212432861, + "learning_rate": 8.754073441250038e-06, + "loss": 0.3215, + "step": 12627 + }, + { + "epoch": 0.2527938342967245, + "grad_norm": 1.092187523841858, + "learning_rate": 8.753859306300382e-06, + "loss": 0.3111, + "step": 12628 + }, + { + "epoch": 0.2528138528138528, + "grad_norm": 1.1361992359161377, + "learning_rate": 8.753645155570311e-06, + "loss": 0.3762, + "step": 12629 + }, + { + "epoch": 0.2528338713309812, + "grad_norm": 1.0921180248260498, + "learning_rate": 8.753430989060723e-06, + "loss": 0.3301, + "step": 12630 + }, + { + "epoch": 0.2528538898481095, + "grad_norm": 1.0060651302337646, + "learning_rate": 8.753216806772517e-06, + "loss": 0.3185, + "step": 12631 + }, + { + "epoch": 0.2528739083652378, + "grad_norm": 1.0387167930603027, + "learning_rate": 8.753002608706598e-06, + "loss": 0.2993, + "step": 12632 + }, + { + "epoch": 0.25289392688236617, + "grad_norm": 1.1004116535186768, + "learning_rate": 8.752788394863862e-06, + "loss": 0.3115, + "step": 12633 + }, + { + "epoch": 0.2529139453994945, + "grad_norm": 1.0980098247528076, + "learning_rate": 8.752574165245212e-06, + "loss": 0.3161, + "step": 12634 + }, + { + "epoch": 0.2529339639166229, + "grad_norm": 1.0472145080566406, + "learning_rate": 8.752359919851547e-06, + "loss": 0.3047, + "step": 12635 + }, + { + "epoch": 0.2529539824337512, + "grad_norm": 1.1175463199615479, + "learning_rate": 8.752145658683769e-06, + "loss": 0.3306, + "step": 12636 + }, + { + "epoch": 0.2529740009508796, + "grad_norm": 1.038461685180664, + "learning_rate": 8.751931381742777e-06, + "loss": 0.3, + "step": 12637 + }, + { + "epoch": 0.2529940194680079, + "grad_norm": 1.022945523262024, + "learning_rate": 8.751717089029474e-06, + "loss": 0.3303, + "step": 12638 + }, + { + "epoch": 0.2530140379851363, + "grad_norm": 1.0185437202453613, + "learning_rate": 8.75150278054476e-06, + "loss": 0.2924, + "step": 12639 + }, + { + "epoch": 0.25303405650226457, + "grad_norm": 1.1126433610916138, + "learning_rate": 8.751288456289535e-06, + "loss": 0.2946, + "step": 12640 + }, + { + "epoch": 0.2530540750193929, + "grad_norm": 1.1160088777542114, + "learning_rate": 8.751074116264702e-06, + "loss": 0.3461, + "step": 12641 + }, + { + "epoch": 0.25307409353652127, + "grad_norm": 1.0630371570587158, + "learning_rate": 8.750859760471158e-06, + "loss": 0.3427, + "step": 12642 + }, + { + "epoch": 0.2530941120536496, + "grad_norm": 1.004939079284668, + "learning_rate": 8.750645388909808e-06, + "loss": 0.3058, + "step": 12643 + }, + { + "epoch": 0.253114130570778, + "grad_norm": 1.3602534532546997, + "learning_rate": 8.750431001581553e-06, + "loss": 0.323, + "step": 12644 + }, + { + "epoch": 0.2531341490879063, + "grad_norm": 1.0930613279342651, + "learning_rate": 8.750216598487292e-06, + "loss": 0.3302, + "step": 12645 + }, + { + "epoch": 0.2531541676050347, + "grad_norm": 1.0155036449432373, + "learning_rate": 8.750002179627927e-06, + "loss": 0.3252, + "step": 12646 + }, + { + "epoch": 0.253174186122163, + "grad_norm": 1.183912754058838, + "learning_rate": 8.749787745004361e-06, + "loss": 0.3719, + "step": 12647 + }, + { + "epoch": 0.2531942046392913, + "grad_norm": 1.0778571367263794, + "learning_rate": 8.749573294617495e-06, + "loss": 0.3097, + "step": 12648 + }, + { + "epoch": 0.25321422315641967, + "grad_norm": 1.289831280708313, + "learning_rate": 8.749358828468228e-06, + "loss": 0.3229, + "step": 12649 + }, + { + "epoch": 0.253234241673548, + "grad_norm": 1.0893223285675049, + "learning_rate": 8.749144346557465e-06, + "loss": 0.3425, + "step": 12650 + }, + { + "epoch": 0.2532542601906764, + "grad_norm": 1.1068307161331177, + "learning_rate": 8.748929848886104e-06, + "loss": 0.3149, + "step": 12651 + }, + { + "epoch": 0.2532742787078047, + "grad_norm": 1.2659748792648315, + "learning_rate": 8.74871533545505e-06, + "loss": 0.3878, + "step": 12652 + }, + { + "epoch": 0.2532942972249331, + "grad_norm": 1.1355737447738647, + "learning_rate": 8.748500806265203e-06, + "loss": 0.3339, + "step": 12653 + }, + { + "epoch": 0.2533143157420614, + "grad_norm": 1.056535005569458, + "learning_rate": 8.748286261317466e-06, + "loss": 0.3349, + "step": 12654 + }, + { + "epoch": 0.2533343342591898, + "grad_norm": 1.1513218879699707, + "learning_rate": 8.748071700612739e-06, + "loss": 0.3253, + "step": 12655 + }, + { + "epoch": 0.25335435277631807, + "grad_norm": 1.1811695098876953, + "learning_rate": 8.747857124151927e-06, + "loss": 0.316, + "step": 12656 + }, + { + "epoch": 0.2533743712934464, + "grad_norm": 1.1044974327087402, + "learning_rate": 8.74764253193593e-06, + "loss": 0.2966, + "step": 12657 + }, + { + "epoch": 0.25339438981057477, + "grad_norm": 1.129223108291626, + "learning_rate": 8.747427923965647e-06, + "loss": 0.3335, + "step": 12658 + }, + { + "epoch": 0.2534144083277031, + "grad_norm": 1.1572952270507812, + "learning_rate": 8.747213300241986e-06, + "loss": 0.3263, + "step": 12659 + }, + { + "epoch": 0.2534344268448315, + "grad_norm": 0.9842306971549988, + "learning_rate": 8.746998660765846e-06, + "loss": 0.2861, + "step": 12660 + }, + { + "epoch": 0.2534544453619598, + "grad_norm": 1.913807988166809, + "learning_rate": 8.746784005538132e-06, + "loss": 0.9043, + "step": 12661 + }, + { + "epoch": 0.2534744638790882, + "grad_norm": 1.0414689779281616, + "learning_rate": 8.746569334559742e-06, + "loss": 0.2796, + "step": 12662 + }, + { + "epoch": 0.2534944823962165, + "grad_norm": 1.047266960144043, + "learning_rate": 8.746354647831582e-06, + "loss": 0.3268, + "step": 12663 + }, + { + "epoch": 0.2535145009133448, + "grad_norm": 1.0966449975967407, + "learning_rate": 8.746139945354552e-06, + "loss": 0.31, + "step": 12664 + }, + { + "epoch": 0.25353451943047317, + "grad_norm": 1.0357871055603027, + "learning_rate": 8.745925227129558e-06, + "loss": 0.3157, + "step": 12665 + }, + { + "epoch": 0.2535545379476015, + "grad_norm": 1.1046903133392334, + "learning_rate": 8.7457104931575e-06, + "loss": 0.3248, + "step": 12666 + }, + { + "epoch": 0.25357455646472987, + "grad_norm": 1.0856003761291504, + "learning_rate": 8.745495743439279e-06, + "loss": 0.3221, + "step": 12667 + }, + { + "epoch": 0.2535945749818582, + "grad_norm": 1.1450546979904175, + "learning_rate": 8.745280977975802e-06, + "loss": 0.3161, + "step": 12668 + }, + { + "epoch": 0.2536145934989866, + "grad_norm": 1.2309324741363525, + "learning_rate": 8.745066196767968e-06, + "loss": 0.3287, + "step": 12669 + }, + { + "epoch": 0.2536346120161149, + "grad_norm": 1.019522786140442, + "learning_rate": 8.744851399816684e-06, + "loss": 0.2993, + "step": 12670 + }, + { + "epoch": 0.2536546305332433, + "grad_norm": 1.1559005975723267, + "learning_rate": 8.74463658712285e-06, + "loss": 0.3278, + "step": 12671 + }, + { + "epoch": 0.25367464905037157, + "grad_norm": 1.1365123987197876, + "learning_rate": 8.744421758687368e-06, + "loss": 0.3135, + "step": 12672 + }, + { + "epoch": 0.2536946675674999, + "grad_norm": 1.1397068500518799, + "learning_rate": 8.744206914511144e-06, + "loss": 0.3379, + "step": 12673 + }, + { + "epoch": 0.25371468608462827, + "grad_norm": 1.104716181755066, + "learning_rate": 8.74399205459508e-06, + "loss": 0.3487, + "step": 12674 + }, + { + "epoch": 0.2537347046017566, + "grad_norm": 1.7892659902572632, + "learning_rate": 8.743777178940079e-06, + "loss": 0.7991, + "step": 12675 + }, + { + "epoch": 0.253754723118885, + "grad_norm": 1.0976026058197021, + "learning_rate": 8.743562287547043e-06, + "loss": 0.3296, + "step": 12676 + }, + { + "epoch": 0.2537747416360133, + "grad_norm": 1.1689956188201904, + "learning_rate": 8.74334738041688e-06, + "loss": 0.3352, + "step": 12677 + }, + { + "epoch": 0.2537947601531417, + "grad_norm": 1.7117336988449097, + "learning_rate": 8.743132457550488e-06, + "loss": 0.7827, + "step": 12678 + }, + { + "epoch": 0.25381477867027, + "grad_norm": 1.0821532011032104, + "learning_rate": 8.742917518948773e-06, + "loss": 0.3269, + "step": 12679 + }, + { + "epoch": 0.2538347971873983, + "grad_norm": 1.1301816701889038, + "learning_rate": 8.742702564612637e-06, + "loss": 0.3038, + "step": 12680 + }, + { + "epoch": 0.25385481570452667, + "grad_norm": 1.0523601770401, + "learning_rate": 8.742487594542986e-06, + "loss": 0.3472, + "step": 12681 + }, + { + "epoch": 0.253874834221655, + "grad_norm": 1.0578904151916504, + "learning_rate": 8.742272608740723e-06, + "loss": 0.2752, + "step": 12682 + }, + { + "epoch": 0.25389485273878337, + "grad_norm": 1.3328386545181274, + "learning_rate": 8.74205760720675e-06, + "loss": 0.3295, + "step": 12683 + }, + { + "epoch": 0.2539148712559117, + "grad_norm": 1.1198011636734009, + "learning_rate": 8.741842589941973e-06, + "loss": 0.2815, + "step": 12684 + }, + { + "epoch": 0.2539348897730401, + "grad_norm": 1.0693795680999756, + "learning_rate": 8.741627556947294e-06, + "loss": 0.3098, + "step": 12685 + }, + { + "epoch": 0.2539549082901684, + "grad_norm": 1.199412226676941, + "learning_rate": 8.741412508223621e-06, + "loss": 0.3163, + "step": 12686 + }, + { + "epoch": 0.2539749268072968, + "grad_norm": 1.1733165979385376, + "learning_rate": 8.741197443771852e-06, + "loss": 0.3046, + "step": 12687 + }, + { + "epoch": 0.25399494532442507, + "grad_norm": 1.7735766172409058, + "learning_rate": 8.740982363592893e-06, + "loss": 0.8247, + "step": 12688 + }, + { + "epoch": 0.2540149638415534, + "grad_norm": 1.8079276084899902, + "learning_rate": 8.740767267687654e-06, + "loss": 0.7995, + "step": 12689 + }, + { + "epoch": 0.25403498235868177, + "grad_norm": 0.9248507022857666, + "learning_rate": 8.74055215605703e-06, + "loss": 0.3549, + "step": 12690 + }, + { + "epoch": 0.2540550008758101, + "grad_norm": 1.0755960941314697, + "learning_rate": 8.740337028701931e-06, + "loss": 0.2903, + "step": 12691 + }, + { + "epoch": 0.2540750193929385, + "grad_norm": 1.1335835456848145, + "learning_rate": 8.74012188562326e-06, + "loss": 0.3282, + "step": 12692 + }, + { + "epoch": 0.2540950379100668, + "grad_norm": 1.2601613998413086, + "learning_rate": 8.739906726821924e-06, + "loss": 0.3118, + "step": 12693 + }, + { + "epoch": 0.2541150564271952, + "grad_norm": 1.1011124849319458, + "learning_rate": 8.739691552298822e-06, + "loss": 0.3393, + "step": 12694 + }, + { + "epoch": 0.2541350749443235, + "grad_norm": 1.3032050132751465, + "learning_rate": 8.739476362054861e-06, + "loss": 0.3636, + "step": 12695 + }, + { + "epoch": 0.2541550934614518, + "grad_norm": 1.8122693300247192, + "learning_rate": 8.739261156090948e-06, + "loss": 0.8207, + "step": 12696 + }, + { + "epoch": 0.25417511197858017, + "grad_norm": 1.042790412902832, + "learning_rate": 8.739045934407984e-06, + "loss": 0.3044, + "step": 12697 + }, + { + "epoch": 0.2541951304957085, + "grad_norm": 1.0327003002166748, + "learning_rate": 8.738830697006877e-06, + "loss": 0.3208, + "step": 12698 + }, + { + "epoch": 0.25421514901283687, + "grad_norm": 1.0254185199737549, + "learning_rate": 8.738615443888532e-06, + "loss": 0.3363, + "step": 12699 + }, + { + "epoch": 0.2542351675299652, + "grad_norm": 1.015138864517212, + "learning_rate": 8.738400175053849e-06, + "loss": 0.381, + "step": 12700 + }, + { + "epoch": 0.2542551860470936, + "grad_norm": 1.070020318031311, + "learning_rate": 8.738184890503739e-06, + "loss": 0.3163, + "step": 12701 + }, + { + "epoch": 0.2542752045642219, + "grad_norm": 1.176885724067688, + "learning_rate": 8.737969590239102e-06, + "loss": 0.3352, + "step": 12702 + }, + { + "epoch": 0.2542952230813503, + "grad_norm": 1.0287704467773438, + "learning_rate": 8.737754274260846e-06, + "loss": 0.3157, + "step": 12703 + }, + { + "epoch": 0.25431524159847857, + "grad_norm": 1.1354432106018066, + "learning_rate": 8.737538942569874e-06, + "loss": 0.3231, + "step": 12704 + }, + { + "epoch": 0.2543352601156069, + "grad_norm": 1.8540042638778687, + "learning_rate": 8.737323595167094e-06, + "loss": 0.8756, + "step": 12705 + }, + { + "epoch": 0.25435527863273527, + "grad_norm": 1.1575465202331543, + "learning_rate": 8.73710823205341e-06, + "loss": 0.338, + "step": 12706 + }, + { + "epoch": 0.2543752971498636, + "grad_norm": 1.1542987823486328, + "learning_rate": 8.736892853229728e-06, + "loss": 0.31, + "step": 12707 + }, + { + "epoch": 0.254395315666992, + "grad_norm": 1.1258975267410278, + "learning_rate": 8.736677458696951e-06, + "loss": 0.2932, + "step": 12708 + }, + { + "epoch": 0.2544153341841203, + "grad_norm": 1.0728294849395752, + "learning_rate": 8.736462048455986e-06, + "loss": 0.3245, + "step": 12709 + }, + { + "epoch": 0.2544353527012487, + "grad_norm": 1.2102183103561401, + "learning_rate": 8.73624662250774e-06, + "loss": 0.3104, + "step": 12710 + }, + { + "epoch": 0.254455371218377, + "grad_norm": 1.1636775732040405, + "learning_rate": 8.736031180853117e-06, + "loss": 0.3366, + "step": 12711 + }, + { + "epoch": 0.2544753897355053, + "grad_norm": 1.1407803297042847, + "learning_rate": 8.735815723493022e-06, + "loss": 0.3011, + "step": 12712 + }, + { + "epoch": 0.25449540825263367, + "grad_norm": 1.0361226797103882, + "learning_rate": 8.735600250428362e-06, + "loss": 0.2843, + "step": 12713 + }, + { + "epoch": 0.254515426769762, + "grad_norm": 1.0010572671890259, + "learning_rate": 8.735384761660043e-06, + "loss": 0.3048, + "step": 12714 + }, + { + "epoch": 0.25453544528689037, + "grad_norm": 1.014646291732788, + "learning_rate": 8.73516925718897e-06, + "loss": 0.3206, + "step": 12715 + }, + { + "epoch": 0.2545554638040187, + "grad_norm": 1.3681309223175049, + "learning_rate": 8.73495373701605e-06, + "loss": 0.3523, + "step": 12716 + }, + { + "epoch": 0.2545754823211471, + "grad_norm": 1.15306556224823, + "learning_rate": 8.734738201142188e-06, + "loss": 0.3032, + "step": 12717 + }, + { + "epoch": 0.2545955008382754, + "grad_norm": 1.7600430250167847, + "learning_rate": 8.734522649568289e-06, + "loss": 0.8184, + "step": 12718 + }, + { + "epoch": 0.2546155193554038, + "grad_norm": 1.1184903383255005, + "learning_rate": 8.734307082295261e-06, + "loss": 0.3043, + "step": 12719 + }, + { + "epoch": 0.25463553787253207, + "grad_norm": 1.849475622177124, + "learning_rate": 8.734091499324012e-06, + "loss": 0.8983, + "step": 12720 + }, + { + "epoch": 0.2546555563896604, + "grad_norm": 2.0266287326812744, + "learning_rate": 8.733875900655441e-06, + "loss": 0.7989, + "step": 12721 + }, + { + "epoch": 0.25467557490678877, + "grad_norm": 1.1380845308303833, + "learning_rate": 8.733660286290465e-06, + "loss": 0.3377, + "step": 12722 + }, + { + "epoch": 0.2546955934239171, + "grad_norm": 1.0208338499069214, + "learning_rate": 8.73344465622998e-06, + "loss": 0.326, + "step": 12723 + }, + { + "epoch": 0.25471561194104547, + "grad_norm": 1.9375367164611816, + "learning_rate": 8.7332290104749e-06, + "loss": 0.8165, + "step": 12724 + }, + { + "epoch": 0.2547356304581738, + "grad_norm": 1.0736595392227173, + "learning_rate": 8.733013349026128e-06, + "loss": 0.283, + "step": 12725 + }, + { + "epoch": 0.2547556489753022, + "grad_norm": 1.2860326766967773, + "learning_rate": 8.73279767188457e-06, + "loss": 0.3165, + "step": 12726 + }, + { + "epoch": 0.2547756674924305, + "grad_norm": 1.1426692008972168, + "learning_rate": 8.732581979051135e-06, + "loss": 0.3357, + "step": 12727 + }, + { + "epoch": 0.2547956860095588, + "grad_norm": 1.040955662727356, + "learning_rate": 8.73236627052673e-06, + "loss": 0.3585, + "step": 12728 + }, + { + "epoch": 0.25481570452668717, + "grad_norm": 1.1715682744979858, + "learning_rate": 8.732150546312257e-06, + "loss": 0.3085, + "step": 12729 + }, + { + "epoch": 0.2548357230438155, + "grad_norm": 1.1124616861343384, + "learning_rate": 8.731934806408627e-06, + "loss": 0.2955, + "step": 12730 + }, + { + "epoch": 0.25485574156094387, + "grad_norm": 1.8004248142242432, + "learning_rate": 8.731719050816747e-06, + "loss": 0.8143, + "step": 12731 + }, + { + "epoch": 0.2548757600780722, + "grad_norm": 1.0798910856246948, + "learning_rate": 8.731503279537523e-06, + "loss": 0.3433, + "step": 12732 + }, + { + "epoch": 0.2548957785952006, + "grad_norm": 0.9834892749786377, + "learning_rate": 8.731287492571865e-06, + "loss": 0.2686, + "step": 12733 + }, + { + "epoch": 0.2549157971123289, + "grad_norm": 1.0938457250595093, + "learning_rate": 8.731071689920674e-06, + "loss": 0.3465, + "step": 12734 + }, + { + "epoch": 0.2549358156294573, + "grad_norm": 1.0180243253707886, + "learning_rate": 8.730855871584861e-06, + "loss": 0.3349, + "step": 12735 + }, + { + "epoch": 0.25495583414658557, + "grad_norm": 1.1079822778701782, + "learning_rate": 8.730640037565334e-06, + "loss": 0.2916, + "step": 12736 + }, + { + "epoch": 0.2549758526637139, + "grad_norm": 1.8466089963912964, + "learning_rate": 8.730424187862998e-06, + "loss": 0.8321, + "step": 12737 + }, + { + "epoch": 0.25499587118084227, + "grad_norm": 1.0203336477279663, + "learning_rate": 8.730208322478762e-06, + "loss": 0.2812, + "step": 12738 + }, + { + "epoch": 0.2550158896979706, + "grad_norm": 1.0397922992706299, + "learning_rate": 8.729992441413534e-06, + "loss": 0.3098, + "step": 12739 + }, + { + "epoch": 0.25503590821509897, + "grad_norm": 1.1977102756500244, + "learning_rate": 8.729776544668218e-06, + "loss": 0.2936, + "step": 12740 + }, + { + "epoch": 0.2550559267322273, + "grad_norm": 1.073537826538086, + "learning_rate": 8.729560632243726e-06, + "loss": 0.3585, + "step": 12741 + }, + { + "epoch": 0.2550759452493557, + "grad_norm": 1.1725777387619019, + "learning_rate": 8.729344704140962e-06, + "loss": 0.3408, + "step": 12742 + }, + { + "epoch": 0.255095963766484, + "grad_norm": 1.0486112833023071, + "learning_rate": 8.729128760360837e-06, + "loss": 0.3462, + "step": 12743 + }, + { + "epoch": 0.2551159822836123, + "grad_norm": 1.1563029289245605, + "learning_rate": 8.728912800904258e-06, + "loss": 0.3403, + "step": 12744 + }, + { + "epoch": 0.25513600080074067, + "grad_norm": 1.0683012008666992, + "learning_rate": 8.728696825772131e-06, + "loss": 0.3315, + "step": 12745 + }, + { + "epoch": 0.255156019317869, + "grad_norm": 1.1118606328964233, + "learning_rate": 8.728480834965365e-06, + "loss": 0.3666, + "step": 12746 + }, + { + "epoch": 0.25517603783499737, + "grad_norm": 1.098428726196289, + "learning_rate": 8.728264828484868e-06, + "loss": 0.3143, + "step": 12747 + }, + { + "epoch": 0.2551960563521257, + "grad_norm": 1.0179213285446167, + "learning_rate": 8.728048806331547e-06, + "loss": 0.3378, + "step": 12748 + }, + { + "epoch": 0.2552160748692541, + "grad_norm": 1.0952575206756592, + "learning_rate": 8.727832768506312e-06, + "loss": 0.3411, + "step": 12749 + }, + { + "epoch": 0.2552360933863824, + "grad_norm": 1.0837868452072144, + "learning_rate": 8.72761671501007e-06, + "loss": 0.3386, + "step": 12750 + }, + { + "epoch": 0.2552561119035108, + "grad_norm": 1.186306357383728, + "learning_rate": 8.72740064584373e-06, + "loss": 0.3113, + "step": 12751 + }, + { + "epoch": 0.25527613042063907, + "grad_norm": 1.166763186454773, + "learning_rate": 8.7271845610082e-06, + "loss": 0.3587, + "step": 12752 + }, + { + "epoch": 0.2552961489377674, + "grad_norm": 1.2158987522125244, + "learning_rate": 8.726968460504387e-06, + "loss": 0.3452, + "step": 12753 + }, + { + "epoch": 0.25531616745489577, + "grad_norm": 1.1724706888198853, + "learning_rate": 8.7267523443332e-06, + "loss": 0.3016, + "step": 12754 + }, + { + "epoch": 0.2553361859720241, + "grad_norm": 1.1568161249160767, + "learning_rate": 8.72653621249555e-06, + "loss": 0.3387, + "step": 12755 + }, + { + "epoch": 0.25535620448915247, + "grad_norm": 1.1049537658691406, + "learning_rate": 8.726320064992345e-06, + "loss": 0.2695, + "step": 12756 + }, + { + "epoch": 0.2553762230062808, + "grad_norm": 1.1462184190750122, + "learning_rate": 8.726103901824489e-06, + "loss": 0.3485, + "step": 12757 + }, + { + "epoch": 0.2553962415234092, + "grad_norm": 1.1633723974227905, + "learning_rate": 8.725887722992897e-06, + "loss": 0.3382, + "step": 12758 + }, + { + "epoch": 0.2554162600405375, + "grad_norm": 1.2874789237976074, + "learning_rate": 8.725671528498473e-06, + "loss": 0.3347, + "step": 12759 + }, + { + "epoch": 0.2554362785576658, + "grad_norm": 1.0984044075012207, + "learning_rate": 8.725455318342129e-06, + "loss": 0.2768, + "step": 12760 + }, + { + "epoch": 0.25545629707479417, + "grad_norm": 1.067650556564331, + "learning_rate": 8.72523909252477e-06, + "loss": 0.3701, + "step": 12761 + }, + { + "epoch": 0.2554763155919225, + "grad_norm": 1.0467854738235474, + "learning_rate": 8.725022851047311e-06, + "loss": 0.326, + "step": 12762 + }, + { + "epoch": 0.25549633410905087, + "grad_norm": 1.1255449056625366, + "learning_rate": 8.724806593910656e-06, + "loss": 0.3479, + "step": 12763 + }, + { + "epoch": 0.2555163526261792, + "grad_norm": 1.070990800857544, + "learning_rate": 8.724590321115715e-06, + "loss": 0.3138, + "step": 12764 + }, + { + "epoch": 0.2555363711433076, + "grad_norm": 1.043969988822937, + "learning_rate": 8.7243740326634e-06, + "loss": 0.2818, + "step": 12765 + }, + { + "epoch": 0.2555563896604359, + "grad_norm": 0.9887592196464539, + "learning_rate": 8.724157728554615e-06, + "loss": 0.3354, + "step": 12766 + }, + { + "epoch": 0.2555764081775643, + "grad_norm": 1.5540179014205933, + "learning_rate": 8.723941408790272e-06, + "loss": 0.3416, + "step": 12767 + }, + { + "epoch": 0.25559642669469257, + "grad_norm": 1.4529495239257812, + "learning_rate": 8.723725073371282e-06, + "loss": 0.3194, + "step": 12768 + }, + { + "epoch": 0.2556164452118209, + "grad_norm": 1.0187690258026123, + "learning_rate": 8.723508722298554e-06, + "loss": 0.2913, + "step": 12769 + }, + { + "epoch": 0.25563646372894927, + "grad_norm": 1.1443250179290771, + "learning_rate": 8.723292355573e-06, + "loss": 0.3087, + "step": 12770 + }, + { + "epoch": 0.2556564822460776, + "grad_norm": 1.2300233840942383, + "learning_rate": 8.723075973195521e-06, + "loss": 0.3295, + "step": 12771 + }, + { + "epoch": 0.25567650076320597, + "grad_norm": 1.0486111640930176, + "learning_rate": 8.722859575167034e-06, + "loss": 0.3217, + "step": 12772 + }, + { + "epoch": 0.2556965192803343, + "grad_norm": 0.9914010763168335, + "learning_rate": 8.722643161488444e-06, + "loss": 0.3336, + "step": 12773 + }, + { + "epoch": 0.2557165377974627, + "grad_norm": 1.1515154838562012, + "learning_rate": 8.722426732160664e-06, + "loss": 0.3615, + "step": 12774 + }, + { + "epoch": 0.255736556314591, + "grad_norm": 1.1268267631530762, + "learning_rate": 8.722210287184605e-06, + "loss": 0.312, + "step": 12775 + }, + { + "epoch": 0.2557565748317193, + "grad_norm": 1.850724697113037, + "learning_rate": 8.721993826561175e-06, + "loss": 0.8329, + "step": 12776 + }, + { + "epoch": 0.25577659334884767, + "grad_norm": 1.1348820924758911, + "learning_rate": 8.721777350291282e-06, + "loss": 0.3425, + "step": 12777 + }, + { + "epoch": 0.255796611865976, + "grad_norm": 1.019675850868225, + "learning_rate": 8.72156085837584e-06, + "loss": 0.3059, + "step": 12778 + }, + { + "epoch": 0.25581663038310437, + "grad_norm": 1.079237461090088, + "learning_rate": 8.721344350815757e-06, + "loss": 0.3002, + "step": 12779 + }, + { + "epoch": 0.2558366489002327, + "grad_norm": 1.0859291553497314, + "learning_rate": 8.721127827611941e-06, + "loss": 0.3535, + "step": 12780 + }, + { + "epoch": 0.25585666741736107, + "grad_norm": 1.0549376010894775, + "learning_rate": 8.720911288765305e-06, + "loss": 0.3104, + "step": 12781 + }, + { + "epoch": 0.2558766859344894, + "grad_norm": 1.060839056968689, + "learning_rate": 8.72069473427676e-06, + "loss": 0.3184, + "step": 12782 + }, + { + "epoch": 0.2558967044516178, + "grad_norm": 1.0334250926971436, + "learning_rate": 8.720478164147215e-06, + "loss": 0.3491, + "step": 12783 + }, + { + "epoch": 0.25591672296874607, + "grad_norm": 1.0754191875457764, + "learning_rate": 8.72026157837758e-06, + "loss": 0.2963, + "step": 12784 + }, + { + "epoch": 0.2559367414858744, + "grad_norm": 1.1104940176010132, + "learning_rate": 8.720044976968765e-06, + "loss": 0.3375, + "step": 12785 + }, + { + "epoch": 0.25595676000300277, + "grad_norm": 1.9823429584503174, + "learning_rate": 8.719828359921683e-06, + "loss": 0.7681, + "step": 12786 + }, + { + "epoch": 0.2559767785201311, + "grad_norm": 1.2133362293243408, + "learning_rate": 8.719611727237242e-06, + "loss": 0.374, + "step": 12787 + }, + { + "epoch": 0.25599679703725947, + "grad_norm": 1.0305293798446655, + "learning_rate": 8.719395078916353e-06, + "loss": 0.3082, + "step": 12788 + }, + { + "epoch": 0.2560168155543878, + "grad_norm": 1.2623378038406372, + "learning_rate": 8.71917841495993e-06, + "loss": 0.3656, + "step": 12789 + }, + { + "epoch": 0.2560368340715162, + "grad_norm": 1.8046987056732178, + "learning_rate": 8.71896173536888e-06, + "loss": 0.8098, + "step": 12790 + }, + { + "epoch": 0.2560568525886445, + "grad_norm": 1.1696271896362305, + "learning_rate": 8.718745040144115e-06, + "loss": 0.3035, + "step": 12791 + }, + { + "epoch": 0.2560768711057728, + "grad_norm": 1.95561683177948, + "learning_rate": 8.718528329286546e-06, + "loss": 0.8021, + "step": 12792 + }, + { + "epoch": 0.25609688962290117, + "grad_norm": 1.0889511108398438, + "learning_rate": 8.718311602797085e-06, + "loss": 0.3777, + "step": 12793 + }, + { + "epoch": 0.2561169081400295, + "grad_norm": 1.0896775722503662, + "learning_rate": 8.718094860676641e-06, + "loss": 0.3546, + "step": 12794 + }, + { + "epoch": 0.25613692665715787, + "grad_norm": 1.103559136390686, + "learning_rate": 8.717878102926128e-06, + "loss": 0.3752, + "step": 12795 + }, + { + "epoch": 0.2561569451742862, + "grad_norm": 1.1863291263580322, + "learning_rate": 8.717661329546453e-06, + "loss": 0.372, + "step": 12796 + }, + { + "epoch": 0.25617696369141457, + "grad_norm": 1.216766357421875, + "learning_rate": 8.717444540538532e-06, + "loss": 0.2909, + "step": 12797 + }, + { + "epoch": 0.2561969822085429, + "grad_norm": 1.1241731643676758, + "learning_rate": 8.717227735903274e-06, + "loss": 0.3179, + "step": 12798 + }, + { + "epoch": 0.2562170007256713, + "grad_norm": 1.1561928987503052, + "learning_rate": 8.71701091564159e-06, + "loss": 0.3346, + "step": 12799 + }, + { + "epoch": 0.25623701924279957, + "grad_norm": 1.0740605592727661, + "learning_rate": 8.71679407975439e-06, + "loss": 0.2935, + "step": 12800 + }, + { + "epoch": 0.2562570377599279, + "grad_norm": 1.2109700441360474, + "learning_rate": 8.716577228242592e-06, + "loss": 0.3339, + "step": 12801 + }, + { + "epoch": 0.25627705627705627, + "grad_norm": 1.9878720045089722, + "learning_rate": 8.716360361107102e-06, + "loss": 0.893, + "step": 12802 + }, + { + "epoch": 0.2562970747941846, + "grad_norm": 1.1195874214172363, + "learning_rate": 8.71614347834883e-06, + "loss": 0.3215, + "step": 12803 + }, + { + "epoch": 0.25631709331131297, + "grad_norm": 1.0733749866485596, + "learning_rate": 8.715926579968692e-06, + "loss": 0.2931, + "step": 12804 + }, + { + "epoch": 0.2563371118284413, + "grad_norm": 1.3442109823226929, + "learning_rate": 8.715709665967598e-06, + "loss": 0.3231, + "step": 12805 + }, + { + "epoch": 0.2563571303455697, + "grad_norm": 1.287150502204895, + "learning_rate": 8.71549273634646e-06, + "loss": 0.3175, + "step": 12806 + }, + { + "epoch": 0.256377148862698, + "grad_norm": 1.0933666229248047, + "learning_rate": 8.715275791106192e-06, + "loss": 0.3553, + "step": 12807 + }, + { + "epoch": 0.2563971673798263, + "grad_norm": 1.2263472080230713, + "learning_rate": 8.715058830247703e-06, + "loss": 0.3409, + "step": 12808 + }, + { + "epoch": 0.25641718589695467, + "grad_norm": 1.2214879989624023, + "learning_rate": 8.714841853771906e-06, + "loss": 0.3483, + "step": 12809 + }, + { + "epoch": 0.256437204414083, + "grad_norm": 1.1047526597976685, + "learning_rate": 8.714624861679714e-06, + "loss": 0.2922, + "step": 12810 + }, + { + "epoch": 0.25645722293121137, + "grad_norm": 1.103157639503479, + "learning_rate": 8.714407853972038e-06, + "loss": 0.2958, + "step": 12811 + }, + { + "epoch": 0.2564772414483397, + "grad_norm": 1.2611725330352783, + "learning_rate": 8.71419083064979e-06, + "loss": 0.3416, + "step": 12812 + }, + { + "epoch": 0.25649725996546807, + "grad_norm": 1.912988305091858, + "learning_rate": 8.713973791713884e-06, + "loss": 0.8475, + "step": 12813 + }, + { + "epoch": 0.2565172784825964, + "grad_norm": 1.1138750314712524, + "learning_rate": 8.713756737165231e-06, + "loss": 0.3338, + "step": 12814 + }, + { + "epoch": 0.2565372969997247, + "grad_norm": 1.1163357496261597, + "learning_rate": 8.713539667004745e-06, + "loss": 0.308, + "step": 12815 + }, + { + "epoch": 0.25655731551685307, + "grad_norm": 1.077010154724121, + "learning_rate": 8.713322581233334e-06, + "loss": 0.3448, + "step": 12816 + }, + { + "epoch": 0.2565773340339814, + "grad_norm": 1.912248969078064, + "learning_rate": 8.713105479851918e-06, + "loss": 0.7937, + "step": 12817 + }, + { + "epoch": 0.25659735255110977, + "grad_norm": 1.215071678161621, + "learning_rate": 8.712888362861403e-06, + "loss": 0.3684, + "step": 12818 + }, + { + "epoch": 0.2566173710682381, + "grad_norm": 1.1133310794830322, + "learning_rate": 8.712671230262706e-06, + "loss": 0.2923, + "step": 12819 + }, + { + "epoch": 0.25663738958536647, + "grad_norm": 1.1248654127120972, + "learning_rate": 8.712454082056738e-06, + "loss": 0.3521, + "step": 12820 + }, + { + "epoch": 0.2566574081024948, + "grad_norm": 1.073631763458252, + "learning_rate": 8.71223691824441e-06, + "loss": 0.2901, + "step": 12821 + }, + { + "epoch": 0.2566774266196232, + "grad_norm": 1.069663405418396, + "learning_rate": 8.712019738826639e-06, + "loss": 0.3271, + "step": 12822 + }, + { + "epoch": 0.25669744513675147, + "grad_norm": 0.9997437000274658, + "learning_rate": 8.711802543804335e-06, + "loss": 0.3206, + "step": 12823 + }, + { + "epoch": 0.2567174636538798, + "grad_norm": 1.0689617395401, + "learning_rate": 8.711585333178411e-06, + "loss": 0.3252, + "step": 12824 + }, + { + "epoch": 0.25673748217100817, + "grad_norm": 1.0034252405166626, + "learning_rate": 8.711368106949782e-06, + "loss": 0.3118, + "step": 12825 + }, + { + "epoch": 0.2567575006881365, + "grad_norm": 1.0468369722366333, + "learning_rate": 8.711150865119361e-06, + "loss": 0.3442, + "step": 12826 + }, + { + "epoch": 0.25677751920526487, + "grad_norm": 1.847260594367981, + "learning_rate": 8.710933607688059e-06, + "loss": 0.8618, + "step": 12827 + }, + { + "epoch": 0.2567975377223932, + "grad_norm": 1.1627521514892578, + "learning_rate": 8.710716334656793e-06, + "loss": 0.3384, + "step": 12828 + }, + { + "epoch": 0.25681755623952157, + "grad_norm": 1.162543773651123, + "learning_rate": 8.710499046026472e-06, + "loss": 0.3159, + "step": 12829 + }, + { + "epoch": 0.2568375747566499, + "grad_norm": 1.343761920928955, + "learning_rate": 8.710281741798011e-06, + "loss": 0.3406, + "step": 12830 + }, + { + "epoch": 0.2568575932737782, + "grad_norm": 1.09699285030365, + "learning_rate": 8.710064421972325e-06, + "loss": 0.3279, + "step": 12831 + }, + { + "epoch": 0.25687761179090657, + "grad_norm": 1.1196645498275757, + "learning_rate": 8.709847086550327e-06, + "loss": 0.3195, + "step": 12832 + }, + { + "epoch": 0.2568976303080349, + "grad_norm": 1.1131081581115723, + "learning_rate": 8.70962973553293e-06, + "loss": 0.3331, + "step": 12833 + }, + { + "epoch": 0.25691764882516327, + "grad_norm": 1.1492451429367065, + "learning_rate": 8.709412368921048e-06, + "loss": 0.3631, + "step": 12834 + }, + { + "epoch": 0.2569376673422916, + "grad_norm": 1.2128653526306152, + "learning_rate": 8.709194986715594e-06, + "loss": 0.3312, + "step": 12835 + }, + { + "epoch": 0.25695768585941997, + "grad_norm": 0.9482560157775879, + "learning_rate": 8.708977588917483e-06, + "loss": 0.3134, + "step": 12836 + }, + { + "epoch": 0.2569777043765483, + "grad_norm": 1.1325160264968872, + "learning_rate": 8.708760175527628e-06, + "loss": 0.2925, + "step": 12837 + }, + { + "epoch": 0.25699772289367667, + "grad_norm": 1.0814666748046875, + "learning_rate": 8.708542746546943e-06, + "loss": 0.3167, + "step": 12838 + }, + { + "epoch": 0.25701774141080497, + "grad_norm": 1.0845437049865723, + "learning_rate": 8.708325301976344e-06, + "loss": 0.3328, + "step": 12839 + }, + { + "epoch": 0.2570377599279333, + "grad_norm": 1.3152315616607666, + "learning_rate": 8.708107841816742e-06, + "loss": 0.3019, + "step": 12840 + }, + { + "epoch": 0.25705777844506167, + "grad_norm": 1.190335988998413, + "learning_rate": 8.707890366069054e-06, + "loss": 0.3459, + "step": 12841 + }, + { + "epoch": 0.25707779696219, + "grad_norm": 1.1561884880065918, + "learning_rate": 8.70767287473419e-06, + "loss": 0.2991, + "step": 12842 + }, + { + "epoch": 0.25709781547931837, + "grad_norm": 1.160051941871643, + "learning_rate": 8.70745536781307e-06, + "loss": 0.2954, + "step": 12843 + }, + { + "epoch": 0.2571178339964467, + "grad_norm": 1.9023784399032593, + "learning_rate": 8.707237845306604e-06, + "loss": 0.7644, + "step": 12844 + }, + { + "epoch": 0.25713785251357507, + "grad_norm": 1.1485373973846436, + "learning_rate": 8.707020307215709e-06, + "loss": 0.3127, + "step": 12845 + }, + { + "epoch": 0.2571578710307034, + "grad_norm": 2.024094820022583, + "learning_rate": 8.706802753541298e-06, + "loss": 0.8946, + "step": 12846 + }, + { + "epoch": 0.2571778895478317, + "grad_norm": 1.153916358947754, + "learning_rate": 8.706585184284286e-06, + "loss": 0.2712, + "step": 12847 + }, + { + "epoch": 0.25719790806496007, + "grad_norm": 1.1423978805541992, + "learning_rate": 8.706367599445586e-06, + "loss": 0.3617, + "step": 12848 + }, + { + "epoch": 0.2572179265820884, + "grad_norm": 1.045815348625183, + "learning_rate": 8.706149999026117e-06, + "loss": 0.3186, + "step": 12849 + }, + { + "epoch": 0.25723794509921677, + "grad_norm": 1.2032939195632935, + "learning_rate": 8.70593238302679e-06, + "loss": 0.3338, + "step": 12850 + }, + { + "epoch": 0.2572579636163451, + "grad_norm": 1.0831533670425415, + "learning_rate": 8.70571475144852e-06, + "loss": 0.3099, + "step": 12851 + }, + { + "epoch": 0.25727798213347347, + "grad_norm": 1.1825897693634033, + "learning_rate": 8.705497104292223e-06, + "loss": 0.3402, + "step": 12852 + }, + { + "epoch": 0.2572980006506018, + "grad_norm": 1.0997116565704346, + "learning_rate": 8.705279441558814e-06, + "loss": 0.3177, + "step": 12853 + }, + { + "epoch": 0.25731801916773017, + "grad_norm": 1.1546857357025146, + "learning_rate": 8.705061763249206e-06, + "loss": 0.3264, + "step": 12854 + }, + { + "epoch": 0.25733803768485847, + "grad_norm": 1.1350586414337158, + "learning_rate": 8.704844069364318e-06, + "loss": 0.3007, + "step": 12855 + }, + { + "epoch": 0.2573580562019868, + "grad_norm": 1.0927956104278564, + "learning_rate": 8.70462635990506e-06, + "loss": 0.3465, + "step": 12856 + }, + { + "epoch": 0.25737807471911517, + "grad_norm": 1.8767138719558716, + "learning_rate": 8.704408634872352e-06, + "loss": 0.8104, + "step": 12857 + }, + { + "epoch": 0.2573980932362435, + "grad_norm": 1.1110095977783203, + "learning_rate": 8.704190894267107e-06, + "loss": 0.3234, + "step": 12858 + }, + { + "epoch": 0.25741811175337187, + "grad_norm": 1.8690959215164185, + "learning_rate": 8.703973138090241e-06, + "loss": 0.801, + "step": 12859 + }, + { + "epoch": 0.2574381302705002, + "grad_norm": 1.0378772020339966, + "learning_rate": 8.70375536634267e-06, + "loss": 0.3182, + "step": 12860 + }, + { + "epoch": 0.25745814878762857, + "grad_norm": 1.0643155574798584, + "learning_rate": 8.703537579025306e-06, + "loss": 0.3043, + "step": 12861 + }, + { + "epoch": 0.2574781673047569, + "grad_norm": 1.1171563863754272, + "learning_rate": 8.703319776139068e-06, + "loss": 0.3517, + "step": 12862 + }, + { + "epoch": 0.2574981858218852, + "grad_norm": 1.030678629875183, + "learning_rate": 8.703101957684872e-06, + "loss": 0.2903, + "step": 12863 + }, + { + "epoch": 0.25751820433901357, + "grad_norm": 2.0840108394622803, + "learning_rate": 8.702884123663631e-06, + "loss": 0.7856, + "step": 12864 + }, + { + "epoch": 0.2575382228561419, + "grad_norm": 1.1187877655029297, + "learning_rate": 8.702666274076261e-06, + "loss": 0.3723, + "step": 12865 + }, + { + "epoch": 0.25755824137327027, + "grad_norm": 1.7041974067687988, + "learning_rate": 8.702448408923681e-06, + "loss": 0.7941, + "step": 12866 + }, + { + "epoch": 0.2575782598903986, + "grad_norm": 1.1333329677581787, + "learning_rate": 8.702230528206803e-06, + "loss": 0.3628, + "step": 12867 + }, + { + "epoch": 0.25759827840752697, + "grad_norm": 1.0536463260650635, + "learning_rate": 8.702012631926545e-06, + "loss": 0.345, + "step": 12868 + }, + { + "epoch": 0.2576182969246553, + "grad_norm": 1.1131852865219116, + "learning_rate": 8.701794720083822e-06, + "loss": 0.3454, + "step": 12869 + }, + { + "epoch": 0.25763831544178367, + "grad_norm": 0.9599569439888, + "learning_rate": 8.701576792679552e-06, + "loss": 0.3312, + "step": 12870 + }, + { + "epoch": 0.25765833395891197, + "grad_norm": 1.198203444480896, + "learning_rate": 8.70135884971465e-06, + "loss": 0.3097, + "step": 12871 + }, + { + "epoch": 0.2576783524760403, + "grad_norm": 1.0674285888671875, + "learning_rate": 8.701140891190032e-06, + "loss": 0.311, + "step": 12872 + }, + { + "epoch": 0.25769837099316867, + "grad_norm": 1.0499742031097412, + "learning_rate": 8.700922917106611e-06, + "loss": 0.3309, + "step": 12873 + }, + { + "epoch": 0.257718389510297, + "grad_norm": 1.078906774520874, + "learning_rate": 8.70070492746531e-06, + "loss": 0.3188, + "step": 12874 + }, + { + "epoch": 0.25773840802742537, + "grad_norm": 0.948141872882843, + "learning_rate": 8.700486922267041e-06, + "loss": 0.2901, + "step": 12875 + }, + { + "epoch": 0.2577584265445537, + "grad_norm": 1.2092392444610596, + "learning_rate": 8.70026890151272e-06, + "loss": 0.3571, + "step": 12876 + }, + { + "epoch": 0.25777844506168207, + "grad_norm": 1.2694947719573975, + "learning_rate": 8.700050865203267e-06, + "loss": 0.3273, + "step": 12877 + }, + { + "epoch": 0.2577984635788104, + "grad_norm": 1.3235490322113037, + "learning_rate": 8.699832813339595e-06, + "loss": 0.3122, + "step": 12878 + }, + { + "epoch": 0.2578184820959387, + "grad_norm": 1.163771629333496, + "learning_rate": 8.699614745922621e-06, + "loss": 0.338, + "step": 12879 + }, + { + "epoch": 0.25783850061306707, + "grad_norm": 1.0546342134475708, + "learning_rate": 8.699396662953266e-06, + "loss": 0.2779, + "step": 12880 + }, + { + "epoch": 0.2578585191301954, + "grad_norm": 1.0292770862579346, + "learning_rate": 8.69917856443244e-06, + "loss": 0.3221, + "step": 12881 + }, + { + "epoch": 0.25787853764732377, + "grad_norm": 1.0982791185379028, + "learning_rate": 8.698960450361065e-06, + "loss": 0.3295, + "step": 12882 + }, + { + "epoch": 0.2578985561644521, + "grad_norm": 1.0855838060379028, + "learning_rate": 8.698742320740057e-06, + "loss": 0.3171, + "step": 12883 + }, + { + "epoch": 0.25791857468158047, + "grad_norm": 1.0431158542633057, + "learning_rate": 8.698524175570332e-06, + "loss": 0.2845, + "step": 12884 + }, + { + "epoch": 0.2579385931987088, + "grad_norm": 1.1047241687774658, + "learning_rate": 8.698306014852807e-06, + "loss": 0.3704, + "step": 12885 + }, + { + "epoch": 0.25795861171583717, + "grad_norm": 1.1447049379348755, + "learning_rate": 8.698087838588398e-06, + "loss": 0.3428, + "step": 12886 + }, + { + "epoch": 0.25797863023296547, + "grad_norm": 1.0568500757217407, + "learning_rate": 8.697869646778025e-06, + "loss": 0.328, + "step": 12887 + }, + { + "epoch": 0.2579986487500938, + "grad_norm": 1.1308727264404297, + "learning_rate": 8.697651439422605e-06, + "loss": 0.3247, + "step": 12888 + }, + { + "epoch": 0.25801866726722217, + "grad_norm": 1.0703647136688232, + "learning_rate": 8.697433216523052e-06, + "loss": 0.353, + "step": 12889 + }, + { + "epoch": 0.2580386857843505, + "grad_norm": 1.0794821977615356, + "learning_rate": 8.697214978080286e-06, + "loss": 0.3362, + "step": 12890 + }, + { + "epoch": 0.25805870430147887, + "grad_norm": 1.1625255346298218, + "learning_rate": 8.696996724095225e-06, + "loss": 0.2892, + "step": 12891 + }, + { + "epoch": 0.2580787228186072, + "grad_norm": 1.1799921989440918, + "learning_rate": 8.696778454568784e-06, + "loss": 0.3342, + "step": 12892 + }, + { + "epoch": 0.25809874133573557, + "grad_norm": 2.0221235752105713, + "learning_rate": 8.696560169501882e-06, + "loss": 0.8359, + "step": 12893 + }, + { + "epoch": 0.2581187598528639, + "grad_norm": 1.0940066576004028, + "learning_rate": 8.696341868895437e-06, + "loss": 0.305, + "step": 12894 + }, + { + "epoch": 0.2581387783699922, + "grad_norm": 1.015837550163269, + "learning_rate": 8.696123552750367e-06, + "loss": 0.3411, + "step": 12895 + }, + { + "epoch": 0.25815879688712057, + "grad_norm": 1.0879402160644531, + "learning_rate": 8.695905221067588e-06, + "loss": 0.3417, + "step": 12896 + }, + { + "epoch": 0.2581788154042489, + "grad_norm": 1.0934703350067139, + "learning_rate": 8.695686873848019e-06, + "loss": 0.3434, + "step": 12897 + }, + { + "epoch": 0.25819883392137727, + "grad_norm": 1.2493155002593994, + "learning_rate": 8.695468511092578e-06, + "loss": 0.3174, + "step": 12898 + }, + { + "epoch": 0.2582188524385056, + "grad_norm": 1.8578451871871948, + "learning_rate": 8.695250132802182e-06, + "loss": 0.8793, + "step": 12899 + }, + { + "epoch": 0.25823887095563397, + "grad_norm": 1.2971333265304565, + "learning_rate": 8.69503173897775e-06, + "loss": 0.2988, + "step": 12900 + }, + { + "epoch": 0.2582588894727623, + "grad_norm": 1.0339449644088745, + "learning_rate": 8.694813329620199e-06, + "loss": 0.3148, + "step": 12901 + }, + { + "epoch": 0.25827890798989067, + "grad_norm": 1.0995392799377441, + "learning_rate": 8.69459490473045e-06, + "loss": 0.3362, + "step": 12902 + }, + { + "epoch": 0.25829892650701897, + "grad_norm": 1.1008007526397705, + "learning_rate": 8.694376464309418e-06, + "loss": 0.3629, + "step": 12903 + }, + { + "epoch": 0.2583189450241473, + "grad_norm": 1.240759253501892, + "learning_rate": 8.694158008358022e-06, + "loss": 0.3289, + "step": 12904 + }, + { + "epoch": 0.25833896354127567, + "grad_norm": 1.2057825326919556, + "learning_rate": 8.69393953687718e-06, + "loss": 0.3, + "step": 12905 + }, + { + "epoch": 0.258358982058404, + "grad_norm": 1.1705729961395264, + "learning_rate": 8.693721049867813e-06, + "loss": 0.2962, + "step": 12906 + }, + { + "epoch": 0.25837900057553237, + "grad_norm": 1.0278176069259644, + "learning_rate": 8.693502547330837e-06, + "loss": 0.3328, + "step": 12907 + }, + { + "epoch": 0.2583990190926607, + "grad_norm": 1.1570379734039307, + "learning_rate": 8.69328402926717e-06, + "loss": 0.3483, + "step": 12908 + }, + { + "epoch": 0.25841903760978907, + "grad_norm": 1.2165848016738892, + "learning_rate": 8.693065495677732e-06, + "loss": 0.3421, + "step": 12909 + }, + { + "epoch": 0.2584390561269174, + "grad_norm": 0.9782730340957642, + "learning_rate": 8.692846946563442e-06, + "loss": 0.3074, + "step": 12910 + }, + { + "epoch": 0.2584590746440457, + "grad_norm": 1.124529242515564, + "learning_rate": 8.692628381925218e-06, + "loss": 0.3073, + "step": 12911 + }, + { + "epoch": 0.25847909316117407, + "grad_norm": 1.7751706838607788, + "learning_rate": 8.692409801763979e-06, + "loss": 0.8339, + "step": 12912 + }, + { + "epoch": 0.2584991116783024, + "grad_norm": 1.0589592456817627, + "learning_rate": 8.692191206080644e-06, + "loss": 0.2865, + "step": 12913 + }, + { + "epoch": 0.25851913019543077, + "grad_norm": 1.129104733467102, + "learning_rate": 8.691972594876132e-06, + "loss": 0.3567, + "step": 12914 + }, + { + "epoch": 0.2585391487125591, + "grad_norm": 1.1050479412078857, + "learning_rate": 8.691753968151362e-06, + "loss": 0.3191, + "step": 12915 + }, + { + "epoch": 0.25855916722968747, + "grad_norm": 1.070462703704834, + "learning_rate": 8.691535325907252e-06, + "loss": 0.3382, + "step": 12916 + }, + { + "epoch": 0.2585791857468158, + "grad_norm": 1.0988932847976685, + "learning_rate": 8.691316668144722e-06, + "loss": 0.315, + "step": 12917 + }, + { + "epoch": 0.25859920426394417, + "grad_norm": 1.0291465520858765, + "learning_rate": 8.69109799486469e-06, + "loss": 0.2856, + "step": 12918 + }, + { + "epoch": 0.25861922278107247, + "grad_norm": 1.13346529006958, + "learning_rate": 8.690879306068079e-06, + "loss": 0.3408, + "step": 12919 + }, + { + "epoch": 0.2586392412982008, + "grad_norm": 0.96500563621521, + "learning_rate": 8.690660601755803e-06, + "loss": 0.3046, + "step": 12920 + }, + { + "epoch": 0.25865925981532917, + "grad_norm": 1.1384905576705933, + "learning_rate": 8.690441881928786e-06, + "loss": 0.3431, + "step": 12921 + }, + { + "epoch": 0.2586792783324575, + "grad_norm": 1.098323941230774, + "learning_rate": 8.690223146587943e-06, + "loss": 0.3169, + "step": 12922 + }, + { + "epoch": 0.25869929684958587, + "grad_norm": 1.1822853088378906, + "learning_rate": 8.6900043957342e-06, + "loss": 0.3548, + "step": 12923 + }, + { + "epoch": 0.2587193153667142, + "grad_norm": 1.8629611730575562, + "learning_rate": 8.689785629368468e-06, + "loss": 0.8093, + "step": 12924 + }, + { + "epoch": 0.25873933388384257, + "grad_norm": 0.959423303604126, + "learning_rate": 8.689566847491676e-06, + "loss": 0.2708, + "step": 12925 + }, + { + "epoch": 0.2587593524009709, + "grad_norm": 2.013598918914795, + "learning_rate": 8.689348050104736e-06, + "loss": 0.8524, + "step": 12926 + }, + { + "epoch": 0.2587793709180992, + "grad_norm": 1.869321584701538, + "learning_rate": 8.68912923720857e-06, + "loss": 0.8348, + "step": 12927 + }, + { + "epoch": 0.25879938943522757, + "grad_norm": 1.1697700023651123, + "learning_rate": 8.6889104088041e-06, + "loss": 0.3042, + "step": 12928 + }, + { + "epoch": 0.2588194079523559, + "grad_norm": 1.1441680192947388, + "learning_rate": 8.688691564892245e-06, + "loss": 0.3283, + "step": 12929 + }, + { + "epoch": 0.25883942646948427, + "grad_norm": 1.166373610496521, + "learning_rate": 8.688472705473925e-06, + "loss": 0.3055, + "step": 12930 + }, + { + "epoch": 0.2588594449866126, + "grad_norm": 2.082895517349243, + "learning_rate": 8.688253830550058e-06, + "loss": 0.8825, + "step": 12931 + }, + { + "epoch": 0.25887946350374097, + "grad_norm": 1.1769886016845703, + "learning_rate": 8.688034940121565e-06, + "loss": 0.3038, + "step": 12932 + }, + { + "epoch": 0.2588994820208693, + "grad_norm": 1.0762430429458618, + "learning_rate": 8.687816034189369e-06, + "loss": 0.3403, + "step": 12933 + }, + { + "epoch": 0.25891950053799767, + "grad_norm": 1.264915108680725, + "learning_rate": 8.687597112754386e-06, + "loss": 0.3307, + "step": 12934 + }, + { + "epoch": 0.25893951905512597, + "grad_norm": 1.0705819129943848, + "learning_rate": 8.68737817581754e-06, + "loss": 0.3336, + "step": 12935 + }, + { + "epoch": 0.2589595375722543, + "grad_norm": 1.0788800716400146, + "learning_rate": 8.687159223379749e-06, + "loss": 0.3059, + "step": 12936 + }, + { + "epoch": 0.25897955608938267, + "grad_norm": 1.1252156496047974, + "learning_rate": 8.686940255441934e-06, + "loss": 0.3202, + "step": 12937 + }, + { + "epoch": 0.258999574606511, + "grad_norm": 1.0437155961990356, + "learning_rate": 8.686721272005017e-06, + "loss": 0.3091, + "step": 12938 + }, + { + "epoch": 0.25901959312363937, + "grad_norm": 1.0463318824768066, + "learning_rate": 8.686502273069915e-06, + "loss": 0.3709, + "step": 12939 + }, + { + "epoch": 0.2590396116407677, + "grad_norm": 0.9532608389854431, + "learning_rate": 8.686283258637552e-06, + "loss": 0.2273, + "step": 12940 + }, + { + "epoch": 0.25905963015789607, + "grad_norm": 1.0413564443588257, + "learning_rate": 8.686064228708846e-06, + "loss": 0.3186, + "step": 12941 + }, + { + "epoch": 0.2590796486750244, + "grad_norm": 1.0352298021316528, + "learning_rate": 8.68584518328472e-06, + "loss": 0.358, + "step": 12942 + }, + { + "epoch": 0.2590996671921527, + "grad_norm": 1.0628458261489868, + "learning_rate": 8.685626122366096e-06, + "loss": 0.3272, + "step": 12943 + }, + { + "epoch": 0.25911968570928107, + "grad_norm": 1.0002249479293823, + "learning_rate": 8.68540704595389e-06, + "loss": 0.3206, + "step": 12944 + }, + { + "epoch": 0.2591397042264094, + "grad_norm": 1.1258562803268433, + "learning_rate": 8.685187954049027e-06, + "loss": 0.2813, + "step": 12945 + }, + { + "epoch": 0.25915972274353777, + "grad_norm": 1.088525414466858, + "learning_rate": 8.684968846652428e-06, + "loss": 0.3654, + "step": 12946 + }, + { + "epoch": 0.2591797412606661, + "grad_norm": 1.1611905097961426, + "learning_rate": 8.68474972376501e-06, + "loss": 0.3323, + "step": 12947 + }, + { + "epoch": 0.25919975977779447, + "grad_norm": 2.919004440307617, + "learning_rate": 8.6845305853877e-06, + "loss": 0.8497, + "step": 12948 + }, + { + "epoch": 0.2592197782949228, + "grad_norm": 1.1801481246948242, + "learning_rate": 8.684311431521415e-06, + "loss": 0.3247, + "step": 12949 + }, + { + "epoch": 0.25923979681205117, + "grad_norm": 1.0735564231872559, + "learning_rate": 8.684092262167079e-06, + "loss": 0.3086, + "step": 12950 + }, + { + "epoch": 0.25925981532917947, + "grad_norm": 1.1011980772018433, + "learning_rate": 8.68387307732561e-06, + "loss": 0.3217, + "step": 12951 + }, + { + "epoch": 0.2592798338463078, + "grad_norm": 1.8031600713729858, + "learning_rate": 8.683653876997935e-06, + "loss": 0.8494, + "step": 12952 + }, + { + "epoch": 0.25929985236343617, + "grad_norm": 1.181524395942688, + "learning_rate": 8.683434661184969e-06, + "loss": 0.3445, + "step": 12953 + }, + { + "epoch": 0.2593198708805645, + "grad_norm": 2.530583143234253, + "learning_rate": 8.683215429887635e-06, + "loss": 0.3215, + "step": 12954 + }, + { + "epoch": 0.25933988939769287, + "grad_norm": 0.974344789981842, + "learning_rate": 8.682996183106858e-06, + "loss": 0.3471, + "step": 12955 + }, + { + "epoch": 0.2593599079148212, + "grad_norm": 1.8134511709213257, + "learning_rate": 8.682776920843558e-06, + "loss": 0.8456, + "step": 12956 + }, + { + "epoch": 0.25937992643194957, + "grad_norm": 1.152009129524231, + "learning_rate": 8.682557643098655e-06, + "loss": 0.322, + "step": 12957 + }, + { + "epoch": 0.2593999449490779, + "grad_norm": 1.9579346179962158, + "learning_rate": 8.682338349873073e-06, + "loss": 0.8549, + "step": 12958 + }, + { + "epoch": 0.2594199634662062, + "grad_norm": 1.7733192443847656, + "learning_rate": 8.682119041167734e-06, + "loss": 0.8761, + "step": 12959 + }, + { + "epoch": 0.25943998198333457, + "grad_norm": 1.955338716506958, + "learning_rate": 8.681899716983557e-06, + "loss": 0.7903, + "step": 12960 + }, + { + "epoch": 0.2594600005004629, + "grad_norm": 1.176419734954834, + "learning_rate": 8.681680377321466e-06, + "loss": 0.3341, + "step": 12961 + }, + { + "epoch": 0.25948001901759127, + "grad_norm": 1.1177676916122437, + "learning_rate": 8.681461022182385e-06, + "loss": 0.3564, + "step": 12962 + }, + { + "epoch": 0.2595000375347196, + "grad_norm": 1.226361632347107, + "learning_rate": 8.681241651567231e-06, + "loss": 0.3524, + "step": 12963 + }, + { + "epoch": 0.25952005605184797, + "grad_norm": 1.1069834232330322, + "learning_rate": 8.681022265476933e-06, + "loss": 0.3029, + "step": 12964 + }, + { + "epoch": 0.2595400745689763, + "grad_norm": 1.0848363637924194, + "learning_rate": 8.680802863912407e-06, + "loss": 0.2834, + "step": 12965 + }, + { + "epoch": 0.25956009308610467, + "grad_norm": 1.0252503156661987, + "learning_rate": 8.680583446874579e-06, + "loss": 0.3079, + "step": 12966 + }, + { + "epoch": 0.25958011160323297, + "grad_norm": 1.029785394668579, + "learning_rate": 8.680364014364368e-06, + "loss": 0.3226, + "step": 12967 + }, + { + "epoch": 0.2596001301203613, + "grad_norm": 1.2198642492294312, + "learning_rate": 8.680144566382702e-06, + "loss": 0.3305, + "step": 12968 + }, + { + "epoch": 0.25962014863748967, + "grad_norm": 1.0854387283325195, + "learning_rate": 8.679925102930498e-06, + "loss": 0.2792, + "step": 12969 + }, + { + "epoch": 0.259640167154618, + "grad_norm": 1.10499107837677, + "learning_rate": 8.679705624008682e-06, + "loss": 0.3016, + "step": 12970 + }, + { + "epoch": 0.25966018567174637, + "grad_norm": 1.0090041160583496, + "learning_rate": 8.679486129618176e-06, + "loss": 0.2981, + "step": 12971 + }, + { + "epoch": 0.2596802041888747, + "grad_norm": 1.9275524616241455, + "learning_rate": 8.679266619759901e-06, + "loss": 0.7781, + "step": 12972 + }, + { + "epoch": 0.25970022270600307, + "grad_norm": 1.0839847326278687, + "learning_rate": 8.67904709443478e-06, + "loss": 0.3708, + "step": 12973 + }, + { + "epoch": 0.2597202412231314, + "grad_norm": 1.243986964225769, + "learning_rate": 8.678827553643738e-06, + "loss": 0.3712, + "step": 12974 + }, + { + "epoch": 0.2597402597402597, + "grad_norm": 1.051308035850525, + "learning_rate": 8.678607997387695e-06, + "loss": 0.3436, + "step": 12975 + }, + { + "epoch": 0.25976027825738807, + "grad_norm": 1.1129435300827026, + "learning_rate": 8.678388425667578e-06, + "loss": 0.352, + "step": 12976 + }, + { + "epoch": 0.2597802967745164, + "grad_norm": 1.0313743352890015, + "learning_rate": 8.678168838484308e-06, + "loss": 0.3272, + "step": 12977 + }, + { + "epoch": 0.25980031529164477, + "grad_norm": 1.0854942798614502, + "learning_rate": 8.677949235838807e-06, + "loss": 0.3542, + "step": 12978 + }, + { + "epoch": 0.2598203338087731, + "grad_norm": 1.1445389986038208, + "learning_rate": 8.677729617731998e-06, + "loss": 0.2824, + "step": 12979 + }, + { + "epoch": 0.25984035232590147, + "grad_norm": 1.229614496231079, + "learning_rate": 8.677509984164804e-06, + "loss": 0.3127, + "step": 12980 + }, + { + "epoch": 0.2598603708430298, + "grad_norm": 1.1937825679779053, + "learning_rate": 8.677290335138152e-06, + "loss": 0.3653, + "step": 12981 + }, + { + "epoch": 0.25988038936015817, + "grad_norm": 1.2252652645111084, + "learning_rate": 8.677070670652962e-06, + "loss": 0.3308, + "step": 12982 + }, + { + "epoch": 0.25990040787728647, + "grad_norm": 1.255355954170227, + "learning_rate": 8.676850990710157e-06, + "loss": 0.3257, + "step": 12983 + }, + { + "epoch": 0.2599204263944148, + "grad_norm": 1.2413263320922852, + "learning_rate": 8.676631295310664e-06, + "loss": 0.3161, + "step": 12984 + }, + { + "epoch": 0.25994044491154317, + "grad_norm": 1.0191203355789185, + "learning_rate": 8.676411584455402e-06, + "loss": 0.3165, + "step": 12985 + }, + { + "epoch": 0.2599604634286715, + "grad_norm": 2.0908796787261963, + "learning_rate": 8.676191858145298e-06, + "loss": 0.8011, + "step": 12986 + }, + { + "epoch": 0.25998048194579987, + "grad_norm": 1.9024838209152222, + "learning_rate": 8.675972116381275e-06, + "loss": 0.7909, + "step": 12987 + }, + { + "epoch": 0.2600005004629282, + "grad_norm": 1.060539960861206, + "learning_rate": 8.675752359164256e-06, + "loss": 0.3037, + "step": 12988 + }, + { + "epoch": 0.26002051898005657, + "grad_norm": 1.0876595973968506, + "learning_rate": 8.675532586495163e-06, + "loss": 0.2593, + "step": 12989 + }, + { + "epoch": 0.2600405374971849, + "grad_norm": 1.1217414140701294, + "learning_rate": 8.675312798374925e-06, + "loss": 0.3566, + "step": 12990 + }, + { + "epoch": 0.2600605560143132, + "grad_norm": 1.1309449672698975, + "learning_rate": 8.675092994804461e-06, + "loss": 0.3738, + "step": 12991 + }, + { + "epoch": 0.26008057453144157, + "grad_norm": 1.1777127981185913, + "learning_rate": 8.674873175784698e-06, + "loss": 0.3314, + "step": 12992 + }, + { + "epoch": 0.2601005930485699, + "grad_norm": 1.958856463432312, + "learning_rate": 8.67465334131656e-06, + "loss": 0.8345, + "step": 12993 + }, + { + "epoch": 0.26012061156569827, + "grad_norm": 1.081147313117981, + "learning_rate": 8.674433491400968e-06, + "loss": 0.2764, + "step": 12994 + }, + { + "epoch": 0.2601406300828266, + "grad_norm": 1.0685677528381348, + "learning_rate": 8.674213626038849e-06, + "loss": 0.3092, + "step": 12995 + }, + { + "epoch": 0.26016064859995497, + "grad_norm": 1.1047629117965698, + "learning_rate": 8.673993745231126e-06, + "loss": 0.295, + "step": 12996 + }, + { + "epoch": 0.2601806671170833, + "grad_norm": 1.1057239770889282, + "learning_rate": 8.673773848978725e-06, + "loss": 0.3016, + "step": 12997 + }, + { + "epoch": 0.26020068563421167, + "grad_norm": 1.0057870149612427, + "learning_rate": 8.673553937282569e-06, + "loss": 0.2691, + "step": 12998 + }, + { + "epoch": 0.26022070415133997, + "grad_norm": 1.1519514322280884, + "learning_rate": 8.673334010143584e-06, + "loss": 0.3407, + "step": 12999 + }, + { + "epoch": 0.2602407226684683, + "grad_norm": 1.1990545988082886, + "learning_rate": 8.673114067562692e-06, + "loss": 0.3464, + "step": 13000 + }, + { + "epoch": 0.26026074118559667, + "grad_norm": 1.0099456310272217, + "learning_rate": 8.67289410954082e-06, + "loss": 0.2983, + "step": 13001 + }, + { + "epoch": 0.260280759702725, + "grad_norm": 1.896289348602295, + "learning_rate": 8.672674136078889e-06, + "loss": 0.8455, + "step": 13002 + }, + { + "epoch": 0.26030077821985337, + "grad_norm": 1.3529977798461914, + "learning_rate": 8.672454147177829e-06, + "loss": 0.3656, + "step": 13003 + }, + { + "epoch": 0.2603207967369817, + "grad_norm": 1.0807899236679077, + "learning_rate": 8.67223414283856e-06, + "loss": 0.3169, + "step": 13004 + }, + { + "epoch": 0.26034081525411007, + "grad_norm": 1.1495872735977173, + "learning_rate": 8.672014123062011e-06, + "loss": 0.3056, + "step": 13005 + }, + { + "epoch": 0.2603608337712384, + "grad_norm": 1.0373958349227905, + "learning_rate": 8.671794087849103e-06, + "loss": 0.2917, + "step": 13006 + }, + { + "epoch": 0.2603808522883667, + "grad_norm": 1.1471449136734009, + "learning_rate": 8.671574037200764e-06, + "loss": 0.325, + "step": 13007 + }, + { + "epoch": 0.26040087080549507, + "grad_norm": 1.1390846967697144, + "learning_rate": 8.671353971117917e-06, + "loss": 0.2936, + "step": 13008 + }, + { + "epoch": 0.2604208893226234, + "grad_norm": 1.0601717233657837, + "learning_rate": 8.67113388960149e-06, + "loss": 0.3305, + "step": 13009 + }, + { + "epoch": 0.26044090783975177, + "grad_norm": 1.3254610300064087, + "learning_rate": 8.670913792652405e-06, + "loss": 0.3318, + "step": 13010 + }, + { + "epoch": 0.2604609263568801, + "grad_norm": 1.8484965562820435, + "learning_rate": 8.670693680271586e-06, + "loss": 0.8101, + "step": 13011 + }, + { + "epoch": 0.26048094487400847, + "grad_norm": 1.0194053649902344, + "learning_rate": 8.670473552459964e-06, + "loss": 0.2915, + "step": 13012 + }, + { + "epoch": 0.2605009633911368, + "grad_norm": 1.0881903171539307, + "learning_rate": 8.670253409218461e-06, + "loss": 0.3387, + "step": 13013 + }, + { + "epoch": 0.26052098190826517, + "grad_norm": 1.1981371641159058, + "learning_rate": 8.670033250548e-06, + "loss": 0.3524, + "step": 13014 + }, + { + "epoch": 0.26054100042539347, + "grad_norm": 1.0907155275344849, + "learning_rate": 8.669813076449511e-06, + "loss": 0.3038, + "step": 13015 + }, + { + "epoch": 0.2605610189425218, + "grad_norm": 0.9367846846580505, + "learning_rate": 8.669592886923917e-06, + "loss": 0.3099, + "step": 13016 + }, + { + "epoch": 0.26058103745965017, + "grad_norm": 1.0268532037734985, + "learning_rate": 8.669372681972145e-06, + "loss": 0.2983, + "step": 13017 + }, + { + "epoch": 0.2606010559767785, + "grad_norm": 1.2090576887130737, + "learning_rate": 8.669152461595118e-06, + "loss": 0.3482, + "step": 13018 + }, + { + "epoch": 0.26062107449390687, + "grad_norm": 1.1331332921981812, + "learning_rate": 8.668932225793766e-06, + "loss": 0.3397, + "step": 13019 + }, + { + "epoch": 0.2606410930110352, + "grad_norm": 1.1613603830337524, + "learning_rate": 8.668711974569011e-06, + "loss": 0.3842, + "step": 13020 + }, + { + "epoch": 0.26066111152816357, + "grad_norm": 1.0684733390808105, + "learning_rate": 8.66849170792178e-06, + "loss": 0.3438, + "step": 13021 + }, + { + "epoch": 0.2606811300452919, + "grad_norm": 1.2087329626083374, + "learning_rate": 8.668271425853e-06, + "loss": 0.3282, + "step": 13022 + }, + { + "epoch": 0.2607011485624202, + "grad_norm": 1.20383882522583, + "learning_rate": 8.668051128363596e-06, + "loss": 0.3615, + "step": 13023 + }, + { + "epoch": 0.26072116707954857, + "grad_norm": 1.313598394393921, + "learning_rate": 8.667830815454496e-06, + "loss": 0.3503, + "step": 13024 + }, + { + "epoch": 0.2607411855966769, + "grad_norm": 1.2707678079605103, + "learning_rate": 8.667610487126622e-06, + "loss": 0.3561, + "step": 13025 + }, + { + "epoch": 0.26076120411380527, + "grad_norm": 1.9428282976150513, + "learning_rate": 8.667390143380905e-06, + "loss": 0.8341, + "step": 13026 + }, + { + "epoch": 0.2607812226309336, + "grad_norm": 1.067115306854248, + "learning_rate": 8.667169784218269e-06, + "loss": 0.3007, + "step": 13027 + }, + { + "epoch": 0.26080124114806197, + "grad_norm": 1.1650021076202393, + "learning_rate": 8.66694940963964e-06, + "loss": 0.359, + "step": 13028 + }, + { + "epoch": 0.2608212596651903, + "grad_norm": 1.1280641555786133, + "learning_rate": 8.666729019645944e-06, + "loss": 0.3206, + "step": 13029 + }, + { + "epoch": 0.26084127818231867, + "grad_norm": 1.125784158706665, + "learning_rate": 8.66650861423811e-06, + "loss": 0.3597, + "step": 13030 + }, + { + "epoch": 0.26086129669944697, + "grad_norm": 1.223179817199707, + "learning_rate": 8.66628819341706e-06, + "loss": 0.3345, + "step": 13031 + }, + { + "epoch": 0.2608813152165753, + "grad_norm": 1.0542397499084473, + "learning_rate": 8.666067757183725e-06, + "loss": 0.3343, + "step": 13032 + }, + { + "epoch": 0.26090133373370367, + "grad_norm": 1.1863164901733398, + "learning_rate": 8.66584730553903e-06, + "loss": 0.3246, + "step": 13033 + }, + { + "epoch": 0.260921352250832, + "grad_norm": 1.2204726934432983, + "learning_rate": 8.665626838483903e-06, + "loss": 0.364, + "step": 13034 + }, + { + "epoch": 0.26094137076796037, + "grad_norm": 1.0810680389404297, + "learning_rate": 8.665406356019267e-06, + "loss": 0.3875, + "step": 13035 + }, + { + "epoch": 0.2609613892850887, + "grad_norm": 1.0180302858352661, + "learning_rate": 8.665185858146055e-06, + "loss": 0.3254, + "step": 13036 + }, + { + "epoch": 0.26098140780221707, + "grad_norm": 1.0605924129486084, + "learning_rate": 8.664965344865187e-06, + "loss": 0.333, + "step": 13037 + }, + { + "epoch": 0.2610014263193454, + "grad_norm": 1.2704534530639648, + "learning_rate": 8.664744816177594e-06, + "loss": 0.3928, + "step": 13038 + }, + { + "epoch": 0.2610214448364737, + "grad_norm": 1.3247058391571045, + "learning_rate": 8.664524272084205e-06, + "loss": 0.3054, + "step": 13039 + }, + { + "epoch": 0.26104146335360207, + "grad_norm": 1.3623605966567993, + "learning_rate": 8.664303712585943e-06, + "loss": 0.3429, + "step": 13040 + }, + { + "epoch": 0.2610614818707304, + "grad_norm": 1.1895623207092285, + "learning_rate": 8.664083137683736e-06, + "loss": 0.3615, + "step": 13041 + }, + { + "epoch": 0.26108150038785877, + "grad_norm": 1.0759474039077759, + "learning_rate": 8.663862547378513e-06, + "loss": 0.292, + "step": 13042 + }, + { + "epoch": 0.2611015189049871, + "grad_norm": 1.1378774642944336, + "learning_rate": 8.6636419416712e-06, + "loss": 0.3269, + "step": 13043 + }, + { + "epoch": 0.26112153742211547, + "grad_norm": 1.0966370105743408, + "learning_rate": 8.663421320562724e-06, + "loss": 0.3325, + "step": 13044 + }, + { + "epoch": 0.2611415559392438, + "grad_norm": 1.225522756576538, + "learning_rate": 8.663200684054013e-06, + "loss": 0.3382, + "step": 13045 + }, + { + "epoch": 0.26116157445637217, + "grad_norm": 1.15049409866333, + "learning_rate": 8.662980032145997e-06, + "loss": 0.3542, + "step": 13046 + }, + { + "epoch": 0.26118159297350046, + "grad_norm": 1.0552763938903809, + "learning_rate": 8.662759364839598e-06, + "loss": 0.3353, + "step": 13047 + }, + { + "epoch": 0.2612016114906288, + "grad_norm": 1.1091372966766357, + "learning_rate": 8.66253868213575e-06, + "loss": 0.3178, + "step": 13048 + }, + { + "epoch": 0.26122163000775717, + "grad_norm": 1.1585416793823242, + "learning_rate": 8.662317984035375e-06, + "loss": 0.3435, + "step": 13049 + }, + { + "epoch": 0.2612416485248855, + "grad_norm": 1.0653157234191895, + "learning_rate": 8.662097270539406e-06, + "loss": 0.2908, + "step": 13050 + }, + { + "epoch": 0.26126166704201387, + "grad_norm": 1.0679525136947632, + "learning_rate": 8.661876541648766e-06, + "loss": 0.2996, + "step": 13051 + }, + { + "epoch": 0.2612816855591422, + "grad_norm": 1.1913610696792603, + "learning_rate": 8.661655797364386e-06, + "loss": 0.3626, + "step": 13052 + }, + { + "epoch": 0.26130170407627057, + "grad_norm": 1.1932915449142456, + "learning_rate": 8.661435037687193e-06, + "loss": 0.3892, + "step": 13053 + }, + { + "epoch": 0.2613217225933989, + "grad_norm": 1.0989867448806763, + "learning_rate": 8.661214262618114e-06, + "loss": 0.3357, + "step": 13054 + }, + { + "epoch": 0.2613417411105272, + "grad_norm": 1.2218557596206665, + "learning_rate": 8.660993472158079e-06, + "loss": 0.3119, + "step": 13055 + }, + { + "epoch": 0.26136175962765557, + "grad_norm": 1.854055643081665, + "learning_rate": 8.660772666308014e-06, + "loss": 0.8721, + "step": 13056 + }, + { + "epoch": 0.2613817781447839, + "grad_norm": 1.1798781156539917, + "learning_rate": 8.660551845068851e-06, + "loss": 0.2902, + "step": 13057 + }, + { + "epoch": 0.26140179666191227, + "grad_norm": 1.1135483980178833, + "learning_rate": 8.660331008441516e-06, + "loss": 0.3344, + "step": 13058 + }, + { + "epoch": 0.2614218151790406, + "grad_norm": 1.0466680526733398, + "learning_rate": 8.660110156426935e-06, + "loss": 0.3217, + "step": 13059 + }, + { + "epoch": 0.26144183369616897, + "grad_norm": 1.315238118171692, + "learning_rate": 8.659889289026039e-06, + "loss": 0.2911, + "step": 13060 + }, + { + "epoch": 0.2614618522132973, + "grad_norm": 1.2013213634490967, + "learning_rate": 8.659668406239756e-06, + "loss": 0.3294, + "step": 13061 + }, + { + "epoch": 0.26148187073042567, + "grad_norm": 1.1643885374069214, + "learning_rate": 8.659447508069014e-06, + "loss": 0.3174, + "step": 13062 + }, + { + "epoch": 0.26150188924755396, + "grad_norm": 1.160482406616211, + "learning_rate": 8.659226594514744e-06, + "loss": 0.3344, + "step": 13063 + }, + { + "epoch": 0.2615219077646823, + "grad_norm": 1.1146094799041748, + "learning_rate": 8.659005665577871e-06, + "loss": 0.3115, + "step": 13064 + }, + { + "epoch": 0.26154192628181067, + "grad_norm": 1.0635154247283936, + "learning_rate": 8.658784721259328e-06, + "loss": 0.2975, + "step": 13065 + }, + { + "epoch": 0.261561944798939, + "grad_norm": 1.127506136894226, + "learning_rate": 8.65856376156004e-06, + "loss": 0.3084, + "step": 13066 + }, + { + "epoch": 0.26158196331606737, + "grad_norm": 1.9229260683059692, + "learning_rate": 8.658342786480937e-06, + "loss": 0.8212, + "step": 13067 + }, + { + "epoch": 0.2616019818331957, + "grad_norm": 1.119720458984375, + "learning_rate": 8.658121796022948e-06, + "loss": 0.3672, + "step": 13068 + }, + { + "epoch": 0.26162200035032407, + "grad_norm": 1.4005037546157837, + "learning_rate": 8.657900790187003e-06, + "loss": 0.3395, + "step": 13069 + }, + { + "epoch": 0.2616420188674524, + "grad_norm": 1.9985625743865967, + "learning_rate": 8.65767976897403e-06, + "loss": 0.8074, + "step": 13070 + }, + { + "epoch": 0.2616620373845807, + "grad_norm": 1.0469346046447754, + "learning_rate": 8.657458732384957e-06, + "loss": 0.3545, + "step": 13071 + }, + { + "epoch": 0.26168205590170907, + "grad_norm": 1.0609257221221924, + "learning_rate": 8.657237680420716e-06, + "loss": 0.3246, + "step": 13072 + }, + { + "epoch": 0.2617020744188374, + "grad_norm": 1.9963579177856445, + "learning_rate": 8.657016613082236e-06, + "loss": 0.8252, + "step": 13073 + }, + { + "epoch": 0.26172209293596577, + "grad_norm": 1.1400195360183716, + "learning_rate": 8.656795530370443e-06, + "loss": 0.2595, + "step": 13074 + }, + { + "epoch": 0.2617421114530941, + "grad_norm": 1.0319775342941284, + "learning_rate": 8.656574432286272e-06, + "loss": 0.2881, + "step": 13075 + }, + { + "epoch": 0.26176212997022247, + "grad_norm": 1.0935289859771729, + "learning_rate": 8.656353318830646e-06, + "loss": 0.3251, + "step": 13076 + }, + { + "epoch": 0.2617821484873508, + "grad_norm": 1.1925783157348633, + "learning_rate": 8.656132190004495e-06, + "loss": 0.348, + "step": 13077 + }, + { + "epoch": 0.26180216700447917, + "grad_norm": 1.0454293489456177, + "learning_rate": 8.655911045808756e-06, + "loss": 0.2916, + "step": 13078 + }, + { + "epoch": 0.26182218552160746, + "grad_norm": 1.1919944286346436, + "learning_rate": 8.655689886244351e-06, + "loss": 0.3339, + "step": 13079 + }, + { + "epoch": 0.2618422040387358, + "grad_norm": 1.116181492805481, + "learning_rate": 8.655468711312214e-06, + "loss": 0.3269, + "step": 13080 + }, + { + "epoch": 0.26186222255586417, + "grad_norm": 1.0633184909820557, + "learning_rate": 8.655247521013273e-06, + "loss": 0.2807, + "step": 13081 + }, + { + "epoch": 0.2618822410729925, + "grad_norm": 1.1012423038482666, + "learning_rate": 8.655026315348456e-06, + "loss": 0.3547, + "step": 13082 + }, + { + "epoch": 0.26190225959012087, + "grad_norm": 1.05229651927948, + "learning_rate": 8.654805094318697e-06, + "loss": 0.3548, + "step": 13083 + }, + { + "epoch": 0.2619222781072492, + "grad_norm": 1.0610078573226929, + "learning_rate": 8.654583857924924e-06, + "loss": 0.3298, + "step": 13084 + }, + { + "epoch": 0.26194229662437757, + "grad_norm": 1.7394332885742188, + "learning_rate": 8.654362606168066e-06, + "loss": 0.8921, + "step": 13085 + }, + { + "epoch": 0.2619623151415059, + "grad_norm": 0.9929311871528625, + "learning_rate": 8.654141339049053e-06, + "loss": 0.3125, + "step": 13086 + }, + { + "epoch": 0.2619823336586342, + "grad_norm": 0.9939424395561218, + "learning_rate": 8.65392005656882e-06, + "loss": 0.2992, + "step": 13087 + }, + { + "epoch": 0.26200235217576257, + "grad_norm": 1.1017563343048096, + "learning_rate": 8.653698758728288e-06, + "loss": 0.3204, + "step": 13088 + }, + { + "epoch": 0.2620223706928909, + "grad_norm": 1.0994272232055664, + "learning_rate": 8.653477445528395e-06, + "loss": 0.3542, + "step": 13089 + }, + { + "epoch": 0.26204238921001927, + "grad_norm": 2.0818285942077637, + "learning_rate": 8.65325611697007e-06, + "loss": 0.7658, + "step": 13090 + }, + { + "epoch": 0.2620624077271476, + "grad_norm": 1.070141315460205, + "learning_rate": 8.653034773054243e-06, + "loss": 0.3219, + "step": 13091 + }, + { + "epoch": 0.26208242624427597, + "grad_norm": 1.2166554927825928, + "learning_rate": 8.652813413781843e-06, + "loss": 0.2935, + "step": 13092 + }, + { + "epoch": 0.2621024447614043, + "grad_norm": 1.0406559705734253, + "learning_rate": 8.652592039153802e-06, + "loss": 0.2652, + "step": 13093 + }, + { + "epoch": 0.26212246327853267, + "grad_norm": 1.0513757467269897, + "learning_rate": 8.652370649171048e-06, + "loss": 0.3191, + "step": 13094 + }, + { + "epoch": 0.26214248179566096, + "grad_norm": 1.1065510511398315, + "learning_rate": 8.652149243834516e-06, + "loss": 0.3125, + "step": 13095 + }, + { + "epoch": 0.2621625003127893, + "grad_norm": 0.9332047700881958, + "learning_rate": 8.651927823145134e-06, + "loss": 0.3049, + "step": 13096 + }, + { + "epoch": 0.26218251882991767, + "grad_norm": 1.059922695159912, + "learning_rate": 8.651706387103834e-06, + "loss": 0.3285, + "step": 13097 + }, + { + "epoch": 0.262202537347046, + "grad_norm": 1.1633344888687134, + "learning_rate": 8.651484935711545e-06, + "loss": 0.3073, + "step": 13098 + }, + { + "epoch": 0.26222255586417437, + "grad_norm": 1.1363415718078613, + "learning_rate": 8.6512634689692e-06, + "loss": 0.3656, + "step": 13099 + }, + { + "epoch": 0.2622425743813027, + "grad_norm": 1.1477214097976685, + "learning_rate": 8.651041986877729e-06, + "loss": 0.3222, + "step": 13100 + }, + { + "epoch": 0.26226259289843107, + "grad_norm": 1.0565431118011475, + "learning_rate": 8.650820489438063e-06, + "loss": 0.3319, + "step": 13101 + }, + { + "epoch": 0.2622826114155594, + "grad_norm": 2.027242660522461, + "learning_rate": 8.650598976651134e-06, + "loss": 0.8596, + "step": 13102 + }, + { + "epoch": 0.2623026299326877, + "grad_norm": 1.061818242073059, + "learning_rate": 8.65037744851787e-06, + "loss": 0.3334, + "step": 13103 + }, + { + "epoch": 0.26232264844981606, + "grad_norm": 1.9179027080535889, + "learning_rate": 8.650155905039208e-06, + "loss": 0.8136, + "step": 13104 + }, + { + "epoch": 0.2623426669669444, + "grad_norm": 1.2382173538208008, + "learning_rate": 8.649934346216074e-06, + "loss": 0.3369, + "step": 13105 + }, + { + "epoch": 0.26236268548407277, + "grad_norm": 1.2523720264434814, + "learning_rate": 8.649712772049402e-06, + "loss": 0.3689, + "step": 13106 + }, + { + "epoch": 0.2623827040012011, + "grad_norm": 1.1146132946014404, + "learning_rate": 8.649491182540124e-06, + "loss": 0.3443, + "step": 13107 + }, + { + "epoch": 0.26240272251832947, + "grad_norm": 1.0591050386428833, + "learning_rate": 8.64926957768917e-06, + "loss": 0.3162, + "step": 13108 + }, + { + "epoch": 0.2624227410354578, + "grad_norm": 1.0926975011825562, + "learning_rate": 8.64904795749747e-06, + "loss": 0.3421, + "step": 13109 + }, + { + "epoch": 0.26244275955258617, + "grad_norm": 1.350135326385498, + "learning_rate": 8.64882632196596e-06, + "loss": 0.2916, + "step": 13110 + }, + { + "epoch": 0.26246277806971446, + "grad_norm": 1.2230347394943237, + "learning_rate": 8.64860467109557e-06, + "loss": 0.3328, + "step": 13111 + }, + { + "epoch": 0.2624827965868428, + "grad_norm": 1.221574068069458, + "learning_rate": 8.648383004887228e-06, + "loss": 0.3343, + "step": 13112 + }, + { + "epoch": 0.26250281510397117, + "grad_norm": 1.146856665611267, + "learning_rate": 8.648161323341871e-06, + "loss": 0.3376, + "step": 13113 + }, + { + "epoch": 0.2625228336210995, + "grad_norm": 1.1441593170166016, + "learning_rate": 8.647939626460429e-06, + "loss": 0.2673, + "step": 13114 + }, + { + "epoch": 0.26254285213822787, + "grad_norm": 1.9637809991836548, + "learning_rate": 8.647717914243834e-06, + "loss": 0.836, + "step": 13115 + }, + { + "epoch": 0.2625628706553562, + "grad_norm": 1.1515324115753174, + "learning_rate": 8.647496186693016e-06, + "loss": 0.3215, + "step": 13116 + }, + { + "epoch": 0.26258288917248457, + "grad_norm": 0.9877830147743225, + "learning_rate": 8.647274443808911e-06, + "loss": 0.3295, + "step": 13117 + }, + { + "epoch": 0.2626029076896129, + "grad_norm": 1.1635520458221436, + "learning_rate": 8.647052685592448e-06, + "loss": 0.3157, + "step": 13118 + }, + { + "epoch": 0.2626229262067412, + "grad_norm": 1.3926377296447754, + "learning_rate": 8.646830912044561e-06, + "loss": 0.2526, + "step": 13119 + }, + { + "epoch": 0.26264294472386956, + "grad_norm": 1.0206035375595093, + "learning_rate": 8.646609123166182e-06, + "loss": 0.2794, + "step": 13120 + }, + { + "epoch": 0.2626629632409979, + "grad_norm": 1.1723154783248901, + "learning_rate": 8.646387318958242e-06, + "loss": 0.3516, + "step": 13121 + }, + { + "epoch": 0.26268298175812627, + "grad_norm": 1.849244236946106, + "learning_rate": 8.646165499421676e-06, + "loss": 0.858, + "step": 13122 + }, + { + "epoch": 0.2627030002752546, + "grad_norm": 1.970043420791626, + "learning_rate": 8.645943664557414e-06, + "loss": 0.7619, + "step": 13123 + }, + { + "epoch": 0.26272301879238297, + "grad_norm": 1.203574776649475, + "learning_rate": 8.64572181436639e-06, + "loss": 0.3284, + "step": 13124 + }, + { + "epoch": 0.2627430373095113, + "grad_norm": 1.119131326675415, + "learning_rate": 8.645499948849534e-06, + "loss": 0.2993, + "step": 13125 + }, + { + "epoch": 0.26276305582663967, + "grad_norm": 1.0401710271835327, + "learning_rate": 8.645278068007782e-06, + "loss": 0.3236, + "step": 13126 + }, + { + "epoch": 0.26278307434376796, + "grad_norm": 1.1722131967544556, + "learning_rate": 8.645056171842066e-06, + "loss": 0.32, + "step": 13127 + }, + { + "epoch": 0.2628030928608963, + "grad_norm": 1.087263822555542, + "learning_rate": 8.644834260353319e-06, + "loss": 0.3125, + "step": 13128 + }, + { + "epoch": 0.26282311137802467, + "grad_norm": 1.0094668865203857, + "learning_rate": 8.64461233354247e-06, + "loss": 0.2679, + "step": 13129 + }, + { + "epoch": 0.262843129895153, + "grad_norm": 1.091804027557373, + "learning_rate": 8.644390391410458e-06, + "loss": 0.3226, + "step": 13130 + }, + { + "epoch": 0.26286314841228137, + "grad_norm": 1.0912282466888428, + "learning_rate": 8.644168433958211e-06, + "loss": 0.3561, + "step": 13131 + }, + { + "epoch": 0.2628831669294097, + "grad_norm": 1.0978257656097412, + "learning_rate": 8.643946461186666e-06, + "loss": 0.3177, + "step": 13132 + }, + { + "epoch": 0.26290318544653807, + "grad_norm": 1.7236303091049194, + "learning_rate": 8.643724473096753e-06, + "loss": 0.8296, + "step": 13133 + }, + { + "epoch": 0.2629232039636664, + "grad_norm": 1.1902167797088623, + "learning_rate": 8.643502469689408e-06, + "loss": 0.3651, + "step": 13134 + }, + { + "epoch": 0.2629432224807947, + "grad_norm": 1.2290809154510498, + "learning_rate": 8.643280450965562e-06, + "loss": 0.3264, + "step": 13135 + }, + { + "epoch": 0.26296324099792306, + "grad_norm": 1.0349233150482178, + "learning_rate": 8.64305841692615e-06, + "loss": 0.3139, + "step": 13136 + }, + { + "epoch": 0.2629832595150514, + "grad_norm": 1.233138918876648, + "learning_rate": 8.642836367572102e-06, + "loss": 0.3106, + "step": 13137 + }, + { + "epoch": 0.26300327803217977, + "grad_norm": 1.7579560279846191, + "learning_rate": 8.642614302904355e-06, + "loss": 0.8646, + "step": 13138 + }, + { + "epoch": 0.2630232965493081, + "grad_norm": 1.1308594942092896, + "learning_rate": 8.64239222292384e-06, + "loss": 0.3014, + "step": 13139 + }, + { + "epoch": 0.26304331506643647, + "grad_norm": 1.8708018064498901, + "learning_rate": 8.642170127631495e-06, + "loss": 0.8267, + "step": 13140 + }, + { + "epoch": 0.2630633335835648, + "grad_norm": 1.1430408954620361, + "learning_rate": 8.64194801702825e-06, + "loss": 0.3113, + "step": 13141 + }, + { + "epoch": 0.26308335210069317, + "grad_norm": 1.1598366498947144, + "learning_rate": 8.641725891115036e-06, + "loss": 0.3702, + "step": 13142 + }, + { + "epoch": 0.26310337061782146, + "grad_norm": 1.138856291770935, + "learning_rate": 8.641503749892791e-06, + "loss": 0.3953, + "step": 13143 + }, + { + "epoch": 0.2631233891349498, + "grad_norm": 1.1643372774124146, + "learning_rate": 8.641281593362452e-06, + "loss": 0.3342, + "step": 13144 + }, + { + "epoch": 0.26314340765207817, + "grad_norm": 1.0690219402313232, + "learning_rate": 8.641059421524946e-06, + "loss": 0.2667, + "step": 13145 + }, + { + "epoch": 0.2631634261692065, + "grad_norm": 1.8048800230026245, + "learning_rate": 8.64083723438121e-06, + "loss": 0.8742, + "step": 13146 + }, + { + "epoch": 0.26318344468633487, + "grad_norm": 1.1179046630859375, + "learning_rate": 8.640615031932177e-06, + "loss": 0.303, + "step": 13147 + }, + { + "epoch": 0.2632034632034632, + "grad_norm": 0.9888165593147278, + "learning_rate": 8.640392814178781e-06, + "loss": 0.304, + "step": 13148 + }, + { + "epoch": 0.26322348172059157, + "grad_norm": 1.089788556098938, + "learning_rate": 8.64017058112196e-06, + "loss": 0.2865, + "step": 13149 + }, + { + "epoch": 0.2632435002377199, + "grad_norm": 1.0282272100448608, + "learning_rate": 8.639948332762643e-06, + "loss": 0.2957, + "step": 13150 + }, + { + "epoch": 0.2632635187548482, + "grad_norm": 1.1303269863128662, + "learning_rate": 8.639726069101769e-06, + "loss": 0.3482, + "step": 13151 + }, + { + "epoch": 0.26328353727197656, + "grad_norm": 1.0626798868179321, + "learning_rate": 8.639503790140267e-06, + "loss": 0.3299, + "step": 13152 + }, + { + "epoch": 0.2633035557891049, + "grad_norm": 1.131779432296753, + "learning_rate": 8.639281495879074e-06, + "loss": 0.3615, + "step": 13153 + }, + { + "epoch": 0.26332357430623327, + "grad_norm": 1.100151777267456, + "learning_rate": 8.639059186319128e-06, + "loss": 0.3464, + "step": 13154 + }, + { + "epoch": 0.2633435928233616, + "grad_norm": 1.0741664171218872, + "learning_rate": 8.638836861461357e-06, + "loss": 0.3109, + "step": 13155 + }, + { + "epoch": 0.26336361134048997, + "grad_norm": 1.7416071891784668, + "learning_rate": 8.6386145213067e-06, + "loss": 0.7819, + "step": 13156 + }, + { + "epoch": 0.2633836298576183, + "grad_norm": 1.0151190757751465, + "learning_rate": 8.638392165856092e-06, + "loss": 0.3402, + "step": 13157 + }, + { + "epoch": 0.26340364837474667, + "grad_norm": 1.0488836765289307, + "learning_rate": 8.638169795110464e-06, + "loss": 0.3412, + "step": 13158 + }, + { + "epoch": 0.26342366689187496, + "grad_norm": 1.1213147640228271, + "learning_rate": 8.637947409070755e-06, + "loss": 0.3688, + "step": 13159 + }, + { + "epoch": 0.2634436854090033, + "grad_norm": 1.83528470993042, + "learning_rate": 8.637725007737899e-06, + "loss": 0.878, + "step": 13160 + }, + { + "epoch": 0.26346370392613166, + "grad_norm": 1.131984829902649, + "learning_rate": 8.637502591112827e-06, + "loss": 0.3727, + "step": 13161 + }, + { + "epoch": 0.26348372244326, + "grad_norm": 0.9910182952880859, + "learning_rate": 8.637280159196478e-06, + "loss": 0.3392, + "step": 13162 + }, + { + "epoch": 0.26350374096038837, + "grad_norm": 1.1098111867904663, + "learning_rate": 8.637057711989786e-06, + "loss": 0.3517, + "step": 13163 + }, + { + "epoch": 0.2635237594775167, + "grad_norm": 1.2040599584579468, + "learning_rate": 8.636835249493688e-06, + "loss": 0.292, + "step": 13164 + }, + { + "epoch": 0.26354377799464507, + "grad_norm": 1.003820776939392, + "learning_rate": 8.636612771709113e-06, + "loss": 0.256, + "step": 13165 + }, + { + "epoch": 0.26356379651177336, + "grad_norm": 1.217658519744873, + "learning_rate": 8.636390278637006e-06, + "loss": 0.377, + "step": 13166 + }, + { + "epoch": 0.2635838150289017, + "grad_norm": 1.0699676275253296, + "learning_rate": 8.636167770278293e-06, + "loss": 0.2871, + "step": 13167 + }, + { + "epoch": 0.26360383354603006, + "grad_norm": 1.1499969959259033, + "learning_rate": 8.635945246633915e-06, + "loss": 0.2914, + "step": 13168 + }, + { + "epoch": 0.2636238520631584, + "grad_norm": 1.176118016242981, + "learning_rate": 8.635722707704804e-06, + "loss": 0.3884, + "step": 13169 + }, + { + "epoch": 0.26364387058028677, + "grad_norm": 1.0136455297470093, + "learning_rate": 8.635500153491898e-06, + "loss": 0.3445, + "step": 13170 + }, + { + "epoch": 0.2636638890974151, + "grad_norm": 1.1392920017242432, + "learning_rate": 8.635277583996131e-06, + "loss": 0.3301, + "step": 13171 + }, + { + "epoch": 0.26368390761454347, + "grad_norm": 1.0597999095916748, + "learning_rate": 8.63505499921844e-06, + "loss": 0.3096, + "step": 13172 + }, + { + "epoch": 0.2637039261316718, + "grad_norm": 1.0671008825302124, + "learning_rate": 8.63483239915976e-06, + "loss": 0.341, + "step": 13173 + }, + { + "epoch": 0.2637239446488001, + "grad_norm": 1.0715019702911377, + "learning_rate": 8.634609783821028e-06, + "loss": 0.3414, + "step": 13174 + }, + { + "epoch": 0.26374396316592846, + "grad_norm": 1.0912922620773315, + "learning_rate": 8.634387153203177e-06, + "loss": 0.3349, + "step": 13175 + }, + { + "epoch": 0.2637639816830568, + "grad_norm": 1.1950232982635498, + "learning_rate": 8.634164507307146e-06, + "loss": 0.2975, + "step": 13176 + }, + { + "epoch": 0.26378400020018516, + "grad_norm": 1.0157363414764404, + "learning_rate": 8.633941846133867e-06, + "loss": 0.3161, + "step": 13177 + }, + { + "epoch": 0.2638040187173135, + "grad_norm": 1.074722170829773, + "learning_rate": 8.633719169684281e-06, + "loss": 0.3171, + "step": 13178 + }, + { + "epoch": 0.26382403723444187, + "grad_norm": 1.0699816942214966, + "learning_rate": 8.63349647795932e-06, + "loss": 0.2915, + "step": 13179 + }, + { + "epoch": 0.2638440557515702, + "grad_norm": 1.107417106628418, + "learning_rate": 8.633273770959922e-06, + "loss": 0.4033, + "step": 13180 + }, + { + "epoch": 0.26386407426869857, + "grad_norm": 1.0218199491500854, + "learning_rate": 8.633051048687025e-06, + "loss": 0.2943, + "step": 13181 + }, + { + "epoch": 0.26388409278582686, + "grad_norm": 1.1218515634536743, + "learning_rate": 8.632828311141561e-06, + "loss": 0.3277, + "step": 13182 + }, + { + "epoch": 0.2639041113029552, + "grad_norm": 1.1159077882766724, + "learning_rate": 8.632605558324469e-06, + "loss": 0.3282, + "step": 13183 + }, + { + "epoch": 0.26392412982008356, + "grad_norm": 1.3032773733139038, + "learning_rate": 8.632382790236684e-06, + "loss": 0.3549, + "step": 13184 + }, + { + "epoch": 0.2639441483372119, + "grad_norm": 1.0895817279815674, + "learning_rate": 8.632160006879145e-06, + "loss": 0.356, + "step": 13185 + }, + { + "epoch": 0.26396416685434027, + "grad_norm": 1.1962116956710815, + "learning_rate": 8.631937208252788e-06, + "loss": 0.3233, + "step": 13186 + }, + { + "epoch": 0.2639841853714686, + "grad_norm": 1.0625947713851929, + "learning_rate": 8.631714394358546e-06, + "loss": 0.3009, + "step": 13187 + }, + { + "epoch": 0.26400420388859697, + "grad_norm": 1.5549672842025757, + "learning_rate": 8.63149156519736e-06, + "loss": 0.3498, + "step": 13188 + }, + { + "epoch": 0.2640242224057253, + "grad_norm": 1.0582517385482788, + "learning_rate": 8.631268720770165e-06, + "loss": 0.2869, + "step": 13189 + }, + { + "epoch": 0.2640442409228536, + "grad_norm": 1.0597281455993652, + "learning_rate": 8.631045861077896e-06, + "loss": 0.3244, + "step": 13190 + }, + { + "epoch": 0.26406425943998196, + "grad_norm": 1.1856322288513184, + "learning_rate": 8.630822986121492e-06, + "loss": 0.3157, + "step": 13191 + }, + { + "epoch": 0.2640842779571103, + "grad_norm": 1.9840896129608154, + "learning_rate": 8.63060009590189e-06, + "loss": 0.8306, + "step": 13192 + }, + { + "epoch": 0.26410429647423866, + "grad_norm": 1.1375224590301514, + "learning_rate": 8.630377190420027e-06, + "loss": 0.341, + "step": 13193 + }, + { + "epoch": 0.264124314991367, + "grad_norm": 1.2383248805999756, + "learning_rate": 8.63015426967684e-06, + "loss": 0.3138, + "step": 13194 + }, + { + "epoch": 0.26414433350849537, + "grad_norm": 1.7732715606689453, + "learning_rate": 8.629931333673264e-06, + "loss": 0.9031, + "step": 13195 + }, + { + "epoch": 0.2641643520256237, + "grad_norm": 1.0394341945648193, + "learning_rate": 8.62970838241024e-06, + "loss": 0.2943, + "step": 13196 + }, + { + "epoch": 0.26418437054275207, + "grad_norm": 1.4801945686340332, + "learning_rate": 8.629485415888701e-06, + "loss": 0.3225, + "step": 13197 + }, + { + "epoch": 0.26420438905988036, + "grad_norm": 1.254811406135559, + "learning_rate": 8.62926243410959e-06, + "loss": 0.3944, + "step": 13198 + }, + { + "epoch": 0.2642244075770087, + "grad_norm": 1.03652822971344, + "learning_rate": 8.629039437073836e-06, + "loss": 0.3438, + "step": 13199 + }, + { + "epoch": 0.26424442609413706, + "grad_norm": 1.1399198770523071, + "learning_rate": 8.628816424782383e-06, + "loss": 0.3638, + "step": 13200 + }, + { + "epoch": 0.2642644446112654, + "grad_norm": 1.8153440952301025, + "learning_rate": 8.628593397236167e-06, + "loss": 0.8149, + "step": 13201 + }, + { + "epoch": 0.26428446312839377, + "grad_norm": 1.7303158044815063, + "learning_rate": 8.628370354436125e-06, + "loss": 0.8218, + "step": 13202 + }, + { + "epoch": 0.2643044816455221, + "grad_norm": 1.1743228435516357, + "learning_rate": 8.628147296383196e-06, + "loss": 0.3497, + "step": 13203 + }, + { + "epoch": 0.26432450016265047, + "grad_norm": 1.0626351833343506, + "learning_rate": 8.627924223078315e-06, + "loss": 0.3147, + "step": 13204 + }, + { + "epoch": 0.2643445186797788, + "grad_norm": 1.1164867877960205, + "learning_rate": 8.627701134522422e-06, + "loss": 0.291, + "step": 13205 + }, + { + "epoch": 0.2643645371969071, + "grad_norm": 1.0024542808532715, + "learning_rate": 8.627478030716454e-06, + "loss": 0.3445, + "step": 13206 + }, + { + "epoch": 0.26438455571403546, + "grad_norm": 1.1824575662612915, + "learning_rate": 8.62725491166135e-06, + "loss": 0.3339, + "step": 13207 + }, + { + "epoch": 0.2644045742311638, + "grad_norm": 1.1312116384506226, + "learning_rate": 8.627031777358045e-06, + "loss": 0.2905, + "step": 13208 + }, + { + "epoch": 0.26442459274829216, + "grad_norm": 1.2050203084945679, + "learning_rate": 8.626808627807478e-06, + "loss": 0.3434, + "step": 13209 + }, + { + "epoch": 0.2644446112654205, + "grad_norm": 1.0219916105270386, + "learning_rate": 8.626585463010591e-06, + "loss": 0.2971, + "step": 13210 + }, + { + "epoch": 0.26446462978254887, + "grad_norm": 1.078378677368164, + "learning_rate": 8.626362282968319e-06, + "loss": 0.3063, + "step": 13211 + }, + { + "epoch": 0.2644846482996772, + "grad_norm": 1.1789010763168335, + "learning_rate": 8.6261390876816e-06, + "loss": 0.3808, + "step": 13212 + }, + { + "epoch": 0.26450466681680557, + "grad_norm": 1.857523798942566, + "learning_rate": 8.625915877151371e-06, + "loss": 0.8508, + "step": 13213 + }, + { + "epoch": 0.26452468533393386, + "grad_norm": 1.130570411682129, + "learning_rate": 8.625692651378573e-06, + "loss": 0.3401, + "step": 13214 + }, + { + "epoch": 0.2645447038510622, + "grad_norm": 1.058326005935669, + "learning_rate": 8.625469410364143e-06, + "loss": 0.2659, + "step": 13215 + }, + { + "epoch": 0.26456472236819056, + "grad_norm": 1.2382148504257202, + "learning_rate": 8.625246154109021e-06, + "loss": 0.3985, + "step": 13216 + }, + { + "epoch": 0.2645847408853189, + "grad_norm": 1.319926142692566, + "learning_rate": 8.625022882614143e-06, + "loss": 0.3293, + "step": 13217 + }, + { + "epoch": 0.26460475940244726, + "grad_norm": 1.0890028476715088, + "learning_rate": 8.62479959588045e-06, + "loss": 0.3054, + "step": 13218 + }, + { + "epoch": 0.2646247779195756, + "grad_norm": 1.1238994598388672, + "learning_rate": 8.624576293908879e-06, + "loss": 0.3409, + "step": 13219 + }, + { + "epoch": 0.26464479643670397, + "grad_norm": 1.0414257049560547, + "learning_rate": 8.624352976700368e-06, + "loss": 0.2975, + "step": 13220 + }, + { + "epoch": 0.2646648149538323, + "grad_norm": 1.2146658897399902, + "learning_rate": 8.624129644255859e-06, + "loss": 0.3359, + "step": 13221 + }, + { + "epoch": 0.2646848334709606, + "grad_norm": 1.0828698873519897, + "learning_rate": 8.62390629657629e-06, + "loss": 0.2916, + "step": 13222 + }, + { + "epoch": 0.26470485198808896, + "grad_norm": 1.1261385679244995, + "learning_rate": 8.623682933662596e-06, + "loss": 0.293, + "step": 13223 + }, + { + "epoch": 0.2647248705052173, + "grad_norm": 1.0497996807098389, + "learning_rate": 8.623459555515719e-06, + "loss": 0.3341, + "step": 13224 + }, + { + "epoch": 0.26474488902234566, + "grad_norm": 1.036991834640503, + "learning_rate": 8.6232361621366e-06, + "loss": 0.2782, + "step": 13225 + }, + { + "epoch": 0.264764907539474, + "grad_norm": 1.1313787698745728, + "learning_rate": 8.623012753526173e-06, + "loss": 0.3299, + "step": 13226 + }, + { + "epoch": 0.26478492605660237, + "grad_norm": 1.1208250522613525, + "learning_rate": 8.622789329685382e-06, + "loss": 0.3349, + "step": 13227 + }, + { + "epoch": 0.2648049445737307, + "grad_norm": 1.14285147190094, + "learning_rate": 8.622565890615164e-06, + "loss": 0.3059, + "step": 13228 + }, + { + "epoch": 0.26482496309085907, + "grad_norm": 1.0843591690063477, + "learning_rate": 8.622342436316458e-06, + "loss": 0.3065, + "step": 13229 + }, + { + "epoch": 0.26484498160798736, + "grad_norm": 1.8907729387283325, + "learning_rate": 8.622118966790205e-06, + "loss": 0.9226, + "step": 13230 + }, + { + "epoch": 0.2648650001251157, + "grad_norm": 1.1966220140457153, + "learning_rate": 8.621895482037342e-06, + "loss": 0.3466, + "step": 13231 + }, + { + "epoch": 0.26488501864224406, + "grad_norm": 1.0674619674682617, + "learning_rate": 8.621671982058811e-06, + "loss": 0.2899, + "step": 13232 + }, + { + "epoch": 0.2649050371593724, + "grad_norm": 1.140877604484558, + "learning_rate": 8.62144846685555e-06, + "loss": 0.3529, + "step": 13233 + }, + { + "epoch": 0.26492505567650076, + "grad_norm": 1.1320135593414307, + "learning_rate": 8.621224936428498e-06, + "loss": 0.3001, + "step": 13234 + }, + { + "epoch": 0.2649450741936291, + "grad_norm": 1.0902117490768433, + "learning_rate": 8.621001390778596e-06, + "loss": 0.304, + "step": 13235 + }, + { + "epoch": 0.26496509271075747, + "grad_norm": 1.0800617933273315, + "learning_rate": 8.620777829906786e-06, + "loss": 0.3286, + "step": 13236 + }, + { + "epoch": 0.2649851112278858, + "grad_norm": 1.074880599975586, + "learning_rate": 8.620554253814001e-06, + "loss": 0.3059, + "step": 13237 + }, + { + "epoch": 0.2650051297450141, + "grad_norm": 1.1069316864013672, + "learning_rate": 8.620330662501188e-06, + "loss": 0.2757, + "step": 13238 + }, + { + "epoch": 0.26502514826214246, + "grad_norm": 1.088367223739624, + "learning_rate": 8.620107055969284e-06, + "loss": 0.3468, + "step": 13239 + }, + { + "epoch": 0.2650451667792708, + "grad_norm": 1.9614059925079346, + "learning_rate": 8.619883434219228e-06, + "loss": 0.7781, + "step": 13240 + }, + { + "epoch": 0.26506518529639916, + "grad_norm": 1.3856056928634644, + "learning_rate": 8.619659797251963e-06, + "loss": 0.3187, + "step": 13241 + }, + { + "epoch": 0.2650852038135275, + "grad_norm": 1.2166229486465454, + "learning_rate": 8.619436145068423e-06, + "loss": 0.3438, + "step": 13242 + }, + { + "epoch": 0.26510522233065587, + "grad_norm": 1.122320294380188, + "learning_rate": 8.619212477669557e-06, + "loss": 0.2744, + "step": 13243 + }, + { + "epoch": 0.2651252408477842, + "grad_norm": 1.0443342924118042, + "learning_rate": 8.618988795056297e-06, + "loss": 0.2867, + "step": 13244 + }, + { + "epoch": 0.26514525936491257, + "grad_norm": 1.2043434381484985, + "learning_rate": 8.61876509722959e-06, + "loss": 0.3426, + "step": 13245 + }, + { + "epoch": 0.26516527788204086, + "grad_norm": 1.0573674440383911, + "learning_rate": 8.618541384190371e-06, + "loss": 0.2848, + "step": 13246 + }, + { + "epoch": 0.2651852963991692, + "grad_norm": 1.0640790462493896, + "learning_rate": 8.618317655939583e-06, + "loss": 0.3096, + "step": 13247 + }, + { + "epoch": 0.26520531491629756, + "grad_norm": 1.124286413192749, + "learning_rate": 8.618093912478168e-06, + "loss": 0.2987, + "step": 13248 + }, + { + "epoch": 0.2652253334334259, + "grad_norm": 1.0591208934783936, + "learning_rate": 8.617870153807062e-06, + "loss": 0.3139, + "step": 13249 + }, + { + "epoch": 0.26524535195055426, + "grad_norm": 1.0508877038955688, + "learning_rate": 8.61764637992721e-06, + "loss": 0.3177, + "step": 13250 + }, + { + "epoch": 0.2652653704676826, + "grad_norm": 1.121888279914856, + "learning_rate": 8.617422590839552e-06, + "loss": 0.323, + "step": 13251 + }, + { + "epoch": 0.26528538898481097, + "grad_norm": 1.2488162517547607, + "learning_rate": 8.617198786545026e-06, + "loss": 0.3124, + "step": 13252 + }, + { + "epoch": 0.2653054075019393, + "grad_norm": 1.1581904888153076, + "learning_rate": 8.616974967044576e-06, + "loss": 0.3472, + "step": 13253 + }, + { + "epoch": 0.2653254260190676, + "grad_norm": 1.132334589958191, + "learning_rate": 8.616751132339141e-06, + "loss": 0.3482, + "step": 13254 + }, + { + "epoch": 0.26534544453619596, + "grad_norm": 1.9931385517120361, + "learning_rate": 8.616527282429663e-06, + "loss": 0.8482, + "step": 13255 + }, + { + "epoch": 0.2653654630533243, + "grad_norm": 1.3261321783065796, + "learning_rate": 8.616303417317083e-06, + "loss": 0.3468, + "step": 13256 + }, + { + "epoch": 0.26538548157045266, + "grad_norm": 1.155861735343933, + "learning_rate": 8.61607953700234e-06, + "loss": 0.3407, + "step": 13257 + }, + { + "epoch": 0.265405500087581, + "grad_norm": 1.0547982454299927, + "learning_rate": 8.61585564148638e-06, + "loss": 0.2992, + "step": 13258 + }, + { + "epoch": 0.26542551860470937, + "grad_norm": 1.103947401046753, + "learning_rate": 8.615631730770137e-06, + "loss": 0.3461, + "step": 13259 + }, + { + "epoch": 0.2654455371218377, + "grad_norm": 1.1555628776550293, + "learning_rate": 8.615407804854558e-06, + "loss": 0.3781, + "step": 13260 + }, + { + "epoch": 0.26546555563896607, + "grad_norm": 1.2017152309417725, + "learning_rate": 8.615183863740584e-06, + "loss": 0.2891, + "step": 13261 + }, + { + "epoch": 0.26548557415609436, + "grad_norm": 0.9703893661499023, + "learning_rate": 8.614959907429155e-06, + "loss": 0.2899, + "step": 13262 + }, + { + "epoch": 0.2655055926732227, + "grad_norm": 1.8019739389419556, + "learning_rate": 8.61473593592121e-06, + "loss": 0.7832, + "step": 13263 + }, + { + "epoch": 0.26552561119035106, + "grad_norm": 1.0783100128173828, + "learning_rate": 8.614511949217695e-06, + "loss": 0.3263, + "step": 13264 + }, + { + "epoch": 0.2655456297074794, + "grad_norm": 1.1610743999481201, + "learning_rate": 8.614287947319549e-06, + "loss": 0.3446, + "step": 13265 + }, + { + "epoch": 0.26556564822460776, + "grad_norm": 1.2139403820037842, + "learning_rate": 8.614063930227713e-06, + "loss": 0.277, + "step": 13266 + }, + { + "epoch": 0.2655856667417361, + "grad_norm": 1.9251614809036255, + "learning_rate": 8.613839897943133e-06, + "loss": 0.8608, + "step": 13267 + }, + { + "epoch": 0.26560568525886447, + "grad_norm": 1.8512178659439087, + "learning_rate": 8.613615850466745e-06, + "loss": 0.8324, + "step": 13268 + }, + { + "epoch": 0.2656257037759928, + "grad_norm": 1.026549220085144, + "learning_rate": 8.613391787799496e-06, + "loss": 0.3404, + "step": 13269 + }, + { + "epoch": 0.2656457222931211, + "grad_norm": 1.1009283065795898, + "learning_rate": 8.613167709942324e-06, + "loss": 0.2958, + "step": 13270 + }, + { + "epoch": 0.26566574081024946, + "grad_norm": 1.0919787883758545, + "learning_rate": 8.612943616896172e-06, + "loss": 0.3332, + "step": 13271 + }, + { + "epoch": 0.2656857593273778, + "grad_norm": 1.1159462928771973, + "learning_rate": 8.612719508661984e-06, + "loss": 0.3659, + "step": 13272 + }, + { + "epoch": 0.26570577784450616, + "grad_norm": 0.9805592894554138, + "learning_rate": 8.6124953852407e-06, + "loss": 0.3055, + "step": 13273 + }, + { + "epoch": 0.2657257963616345, + "grad_norm": 1.845473289489746, + "learning_rate": 8.612271246633264e-06, + "loss": 0.8206, + "step": 13274 + }, + { + "epoch": 0.26574581487876286, + "grad_norm": 1.1924855709075928, + "learning_rate": 8.612047092840613e-06, + "loss": 0.31, + "step": 13275 + }, + { + "epoch": 0.2657658333958912, + "grad_norm": 2.079871416091919, + "learning_rate": 8.611822923863697e-06, + "loss": 0.7865, + "step": 13276 + }, + { + "epoch": 0.26578585191301957, + "grad_norm": 1.0860495567321777, + "learning_rate": 8.611598739703455e-06, + "loss": 0.3559, + "step": 13277 + }, + { + "epoch": 0.26580587043014786, + "grad_norm": 1.1429948806762695, + "learning_rate": 8.611374540360827e-06, + "loss": 0.3356, + "step": 13278 + }, + { + "epoch": 0.2658258889472762, + "grad_norm": 1.1191359758377075, + "learning_rate": 8.61115032583676e-06, + "loss": 0.3625, + "step": 13279 + }, + { + "epoch": 0.26584590746440456, + "grad_norm": 1.088223934173584, + "learning_rate": 8.610926096132192e-06, + "loss": 0.307, + "step": 13280 + }, + { + "epoch": 0.2658659259815329, + "grad_norm": 1.0082505941390991, + "learning_rate": 8.61070185124807e-06, + "loss": 0.3206, + "step": 13281 + }, + { + "epoch": 0.26588594449866126, + "grad_norm": 1.0585567951202393, + "learning_rate": 8.610477591185331e-06, + "loss": 0.3059, + "step": 13282 + }, + { + "epoch": 0.2659059630157896, + "grad_norm": 1.1709548234939575, + "learning_rate": 8.610253315944923e-06, + "loss": 0.3324, + "step": 13283 + }, + { + "epoch": 0.26592598153291797, + "grad_norm": 1.201032280921936, + "learning_rate": 8.610029025527789e-06, + "loss": 0.3048, + "step": 13284 + }, + { + "epoch": 0.2659460000500463, + "grad_norm": 1.0052636861801147, + "learning_rate": 8.609804719934867e-06, + "loss": 0.3682, + "step": 13285 + }, + { + "epoch": 0.2659660185671746, + "grad_norm": 1.0239368677139282, + "learning_rate": 8.609580399167104e-06, + "loss": 0.2823, + "step": 13286 + }, + { + "epoch": 0.26598603708430296, + "grad_norm": 1.002804160118103, + "learning_rate": 8.609356063225442e-06, + "loss": 0.2844, + "step": 13287 + }, + { + "epoch": 0.2660060556014313, + "grad_norm": 1.0490630865097046, + "learning_rate": 8.609131712110823e-06, + "loss": 0.3413, + "step": 13288 + }, + { + "epoch": 0.26602607411855966, + "grad_norm": 1.1424098014831543, + "learning_rate": 8.608907345824192e-06, + "loss": 0.3541, + "step": 13289 + }, + { + "epoch": 0.266046092635688, + "grad_norm": 1.111830711364746, + "learning_rate": 8.60868296436649e-06, + "loss": 0.3551, + "step": 13290 + }, + { + "epoch": 0.26606611115281636, + "grad_norm": 1.175707459449768, + "learning_rate": 8.608458567738662e-06, + "loss": 0.3271, + "step": 13291 + }, + { + "epoch": 0.2660861296699447, + "grad_norm": 1.046958088874817, + "learning_rate": 8.608234155941652e-06, + "loss": 0.3275, + "step": 13292 + }, + { + "epoch": 0.26610614818707307, + "grad_norm": 1.0903851985931396, + "learning_rate": 8.608009728976401e-06, + "loss": 0.3144, + "step": 13293 + }, + { + "epoch": 0.26612616670420136, + "grad_norm": 1.1148449182510376, + "learning_rate": 8.607785286843853e-06, + "loss": 0.3245, + "step": 13294 + }, + { + "epoch": 0.2661461852213297, + "grad_norm": 1.1167140007019043, + "learning_rate": 8.607560829544952e-06, + "loss": 0.3238, + "step": 13295 + }, + { + "epoch": 0.26616620373845806, + "grad_norm": 1.002134919166565, + "learning_rate": 8.607336357080642e-06, + "loss": 0.2995, + "step": 13296 + }, + { + "epoch": 0.2661862222555864, + "grad_norm": 1.0322810411453247, + "learning_rate": 8.607111869451867e-06, + "loss": 0.336, + "step": 13297 + }, + { + "epoch": 0.26620624077271476, + "grad_norm": 1.6508057117462158, + "learning_rate": 8.606887366659568e-06, + "loss": 0.2914, + "step": 13298 + }, + { + "epoch": 0.2662262592898431, + "grad_norm": 1.0250860452651978, + "learning_rate": 8.606662848704693e-06, + "loss": 0.3343, + "step": 13299 + }, + { + "epoch": 0.26624627780697147, + "grad_norm": 1.0214987993240356, + "learning_rate": 8.606438315588184e-06, + "loss": 0.32, + "step": 13300 + }, + { + "epoch": 0.2662662963240998, + "grad_norm": 1.1735320091247559, + "learning_rate": 8.606213767310982e-06, + "loss": 0.297, + "step": 13301 + }, + { + "epoch": 0.2662863148412281, + "grad_norm": 1.2651602029800415, + "learning_rate": 8.605989203874034e-06, + "loss": 0.351, + "step": 13302 + }, + { + "epoch": 0.26630633335835646, + "grad_norm": 1.3232039213180542, + "learning_rate": 8.605764625278285e-06, + "loss": 0.3508, + "step": 13303 + }, + { + "epoch": 0.2663263518754848, + "grad_norm": 1.1764683723449707, + "learning_rate": 8.605540031524674e-06, + "loss": 0.3337, + "step": 13304 + }, + { + "epoch": 0.26634637039261316, + "grad_norm": 1.0449585914611816, + "learning_rate": 8.605315422614153e-06, + "loss": 0.3173, + "step": 13305 + }, + { + "epoch": 0.2663663889097415, + "grad_norm": 1.1396827697753906, + "learning_rate": 8.60509079854766e-06, + "loss": 0.3463, + "step": 13306 + }, + { + "epoch": 0.26638640742686986, + "grad_norm": 0.9707316756248474, + "learning_rate": 8.60486615932614e-06, + "loss": 0.3038, + "step": 13307 + }, + { + "epoch": 0.2664064259439982, + "grad_norm": 1.9387887716293335, + "learning_rate": 8.604641504950539e-06, + "loss": 0.8364, + "step": 13308 + }, + { + "epoch": 0.26642644446112657, + "grad_norm": 1.0611209869384766, + "learning_rate": 8.604416835421803e-06, + "loss": 0.338, + "step": 13309 + }, + { + "epoch": 0.26644646297825486, + "grad_norm": 1.176741600036621, + "learning_rate": 8.604192150740871e-06, + "loss": 0.3836, + "step": 13310 + }, + { + "epoch": 0.2664664814953832, + "grad_norm": 1.0494749546051025, + "learning_rate": 8.603967450908693e-06, + "loss": 0.3368, + "step": 13311 + }, + { + "epoch": 0.26648650001251156, + "grad_norm": 1.0586758852005005, + "learning_rate": 8.603742735926211e-06, + "loss": 0.2986, + "step": 13312 + }, + { + "epoch": 0.2665065185296399, + "grad_norm": 1.1667624711990356, + "learning_rate": 8.603518005794369e-06, + "loss": 0.3014, + "step": 13313 + }, + { + "epoch": 0.26652653704676826, + "grad_norm": 1.8347662687301636, + "learning_rate": 8.603293260514114e-06, + "loss": 0.8488, + "step": 13314 + }, + { + "epoch": 0.2665465555638966, + "grad_norm": 1.1732051372528076, + "learning_rate": 8.603068500086388e-06, + "loss": 0.3068, + "step": 13315 + }, + { + "epoch": 0.26656657408102497, + "grad_norm": 1.87735116481781, + "learning_rate": 8.60284372451214e-06, + "loss": 0.836, + "step": 13316 + }, + { + "epoch": 0.2665865925981533, + "grad_norm": 1.3023661375045776, + "learning_rate": 8.60261893379231e-06, + "loss": 0.357, + "step": 13317 + }, + { + "epoch": 0.2666066111152816, + "grad_norm": 1.7848848104476929, + "learning_rate": 8.602394127927846e-06, + "loss": 0.8388, + "step": 13318 + }, + { + "epoch": 0.26662662963240996, + "grad_norm": 1.244100570678711, + "learning_rate": 8.602169306919694e-06, + "loss": 0.3283, + "step": 13319 + }, + { + "epoch": 0.2666466481495383, + "grad_norm": 1.0732810497283936, + "learning_rate": 8.601944470768795e-06, + "loss": 0.3176, + "step": 13320 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.1096664667129517, + "learning_rate": 8.601719619476099e-06, + "loss": 0.3, + "step": 13321 + }, + { + "epoch": 0.266686685183795, + "grad_norm": 1.7459546327590942, + "learning_rate": 8.601494753042547e-06, + "loss": 0.8568, + "step": 13322 + }, + { + "epoch": 0.26670670370092336, + "grad_norm": 1.2803179025650024, + "learning_rate": 8.601269871469086e-06, + "loss": 0.3589, + "step": 13323 + }, + { + "epoch": 0.2667267222180517, + "grad_norm": 1.092144250869751, + "learning_rate": 8.601044974756662e-06, + "loss": 0.3329, + "step": 13324 + }, + { + "epoch": 0.26674674073518007, + "grad_norm": 1.0874513387680054, + "learning_rate": 8.60082006290622e-06, + "loss": 0.2975, + "step": 13325 + }, + { + "epoch": 0.26676675925230836, + "grad_norm": 1.9451076984405518, + "learning_rate": 8.600595135918705e-06, + "loss": 0.8471, + "step": 13326 + }, + { + "epoch": 0.2667867777694367, + "grad_norm": 1.8637913465499878, + "learning_rate": 8.600370193795063e-06, + "loss": 0.8394, + "step": 13327 + }, + { + "epoch": 0.26680679628656506, + "grad_norm": 1.3113828897476196, + "learning_rate": 8.60014523653624e-06, + "loss": 0.2724, + "step": 13328 + }, + { + "epoch": 0.2668268148036934, + "grad_norm": 1.1301991939544678, + "learning_rate": 8.59992026414318e-06, + "loss": 0.3831, + "step": 13329 + }, + { + "epoch": 0.26684683332082176, + "grad_norm": 1.0344364643096924, + "learning_rate": 8.59969527661683e-06, + "loss": 0.3431, + "step": 13330 + }, + { + "epoch": 0.2668668518379501, + "grad_norm": 1.0832947492599487, + "learning_rate": 8.599470273958137e-06, + "loss": 0.3124, + "step": 13331 + }, + { + "epoch": 0.26688687035507846, + "grad_norm": 1.2154179811477661, + "learning_rate": 8.599245256168046e-06, + "loss": 0.292, + "step": 13332 + }, + { + "epoch": 0.2669068888722068, + "grad_norm": 1.8409188985824585, + "learning_rate": 8.5990202232475e-06, + "loss": 0.7725, + "step": 13333 + }, + { + "epoch": 0.2669269073893351, + "grad_norm": 1.1866732835769653, + "learning_rate": 8.59879517519745e-06, + "loss": 0.3078, + "step": 13334 + }, + { + "epoch": 0.26694692590646346, + "grad_norm": 1.0941312313079834, + "learning_rate": 8.598570112018837e-06, + "loss": 0.3341, + "step": 13335 + }, + { + "epoch": 0.2669669444235918, + "grad_norm": 1.0789299011230469, + "learning_rate": 8.598345033712613e-06, + "loss": 0.3129, + "step": 13336 + }, + { + "epoch": 0.26698696294072016, + "grad_norm": 1.2100226879119873, + "learning_rate": 8.598119940279718e-06, + "loss": 0.3126, + "step": 13337 + }, + { + "epoch": 0.2670069814578485, + "grad_norm": 1.0627901554107666, + "learning_rate": 8.597894831721104e-06, + "loss": 0.3285, + "step": 13338 + }, + { + "epoch": 0.26702699997497686, + "grad_norm": 1.0789157152175903, + "learning_rate": 8.597669708037713e-06, + "loss": 0.3171, + "step": 13339 + }, + { + "epoch": 0.2670470184921052, + "grad_norm": 1.0788756608963013, + "learning_rate": 8.597444569230491e-06, + "loss": 0.3006, + "step": 13340 + }, + { + "epoch": 0.26706703700923357, + "grad_norm": 1.146808385848999, + "learning_rate": 8.597219415300388e-06, + "loss": 0.3393, + "step": 13341 + }, + { + "epoch": 0.26708705552636186, + "grad_norm": 1.0614879131317139, + "learning_rate": 8.596994246248347e-06, + "loss": 0.3048, + "step": 13342 + }, + { + "epoch": 0.2671070740434902, + "grad_norm": 1.043549656867981, + "learning_rate": 8.596769062075318e-06, + "loss": 0.3143, + "step": 13343 + }, + { + "epoch": 0.26712709256061856, + "grad_norm": 1.1298261880874634, + "learning_rate": 8.596543862782247e-06, + "loss": 0.3306, + "step": 13344 + }, + { + "epoch": 0.2671471110777469, + "grad_norm": 1.0316250324249268, + "learning_rate": 8.596318648370077e-06, + "loss": 0.3686, + "step": 13345 + }, + { + "epoch": 0.26716712959487526, + "grad_norm": 1.1598304510116577, + "learning_rate": 8.596093418839759e-06, + "loss": 0.3487, + "step": 13346 + }, + { + "epoch": 0.2671871481120036, + "grad_norm": 1.0589205026626587, + "learning_rate": 8.595868174192237e-06, + "loss": 0.3531, + "step": 13347 + }, + { + "epoch": 0.26720716662913196, + "grad_norm": 1.0213054418563843, + "learning_rate": 8.59564291442846e-06, + "loss": 0.3328, + "step": 13348 + }, + { + "epoch": 0.2672271851462603, + "grad_norm": 1.0737906694412231, + "learning_rate": 8.595417639549373e-06, + "loss": 0.3045, + "step": 13349 + }, + { + "epoch": 0.2672472036633886, + "grad_norm": 1.1385846138000488, + "learning_rate": 8.595192349555925e-06, + "loss": 0.3357, + "step": 13350 + }, + { + "epoch": 0.26726722218051696, + "grad_norm": 1.2358746528625488, + "learning_rate": 8.594967044449061e-06, + "loss": 0.353, + "step": 13351 + }, + { + "epoch": 0.2672872406976453, + "grad_norm": 1.0659924745559692, + "learning_rate": 8.59474172422973e-06, + "loss": 0.2815, + "step": 13352 + }, + { + "epoch": 0.26730725921477366, + "grad_norm": 1.1105159521102905, + "learning_rate": 8.594516388898878e-06, + "loss": 0.3279, + "step": 13353 + }, + { + "epoch": 0.267327277731902, + "grad_norm": 1.214877724647522, + "learning_rate": 8.594291038457454e-06, + "loss": 0.2647, + "step": 13354 + }, + { + "epoch": 0.26734729624903036, + "grad_norm": 1.9366201162338257, + "learning_rate": 8.594065672906403e-06, + "loss": 0.8397, + "step": 13355 + }, + { + "epoch": 0.2673673147661587, + "grad_norm": 1.9452861547470093, + "learning_rate": 8.593840292246672e-06, + "loss": 0.8409, + "step": 13356 + }, + { + "epoch": 0.26738733328328707, + "grad_norm": 1.1045931577682495, + "learning_rate": 8.593614896479212e-06, + "loss": 0.3185, + "step": 13357 + }, + { + "epoch": 0.26740735180041536, + "grad_norm": 1.0644856691360474, + "learning_rate": 8.593389485604966e-06, + "loss": 0.3296, + "step": 13358 + }, + { + "epoch": 0.2674273703175437, + "grad_norm": 1.0181022882461548, + "learning_rate": 8.593164059624886e-06, + "loss": 0.3029, + "step": 13359 + }, + { + "epoch": 0.26744738883467206, + "grad_norm": 1.1722373962402344, + "learning_rate": 8.592938618539917e-06, + "loss": 0.3562, + "step": 13360 + }, + { + "epoch": 0.2674674073518004, + "grad_norm": 1.0587488412857056, + "learning_rate": 8.592713162351007e-06, + "loss": 0.282, + "step": 13361 + }, + { + "epoch": 0.26748742586892876, + "grad_norm": 1.041372537612915, + "learning_rate": 8.592487691059103e-06, + "loss": 0.2903, + "step": 13362 + }, + { + "epoch": 0.2675074443860571, + "grad_norm": 1.845009684562683, + "learning_rate": 8.592262204665156e-06, + "loss": 0.3937, + "step": 13363 + }, + { + "epoch": 0.26752746290318546, + "grad_norm": 0.9597002863883972, + "learning_rate": 8.59203670317011e-06, + "loss": 0.2728, + "step": 13364 + }, + { + "epoch": 0.2675474814203138, + "grad_norm": 1.0772733688354492, + "learning_rate": 8.591811186574915e-06, + "loss": 0.3032, + "step": 13365 + }, + { + "epoch": 0.2675674999374421, + "grad_norm": 1.3093920946121216, + "learning_rate": 8.59158565488052e-06, + "loss": 0.3678, + "step": 13366 + }, + { + "epoch": 0.26758751845457046, + "grad_norm": 1.1742310523986816, + "learning_rate": 8.591360108087871e-06, + "loss": 0.3169, + "step": 13367 + }, + { + "epoch": 0.2676075369716988, + "grad_norm": 1.1611336469650269, + "learning_rate": 8.591134546197916e-06, + "loss": 0.307, + "step": 13368 + }, + { + "epoch": 0.26762755548882716, + "grad_norm": 1.1619129180908203, + "learning_rate": 8.590908969211605e-06, + "loss": 0.305, + "step": 13369 + }, + { + "epoch": 0.2676475740059555, + "grad_norm": 1.0711593627929688, + "learning_rate": 8.590683377129887e-06, + "loss": 0.2906, + "step": 13370 + }, + { + "epoch": 0.26766759252308386, + "grad_norm": 0.9852688312530518, + "learning_rate": 8.590457769953705e-06, + "loss": 0.3053, + "step": 13371 + }, + { + "epoch": 0.2676876110402122, + "grad_norm": 1.2335885763168335, + "learning_rate": 8.590232147684015e-06, + "loss": 0.3181, + "step": 13372 + }, + { + "epoch": 0.26770762955734057, + "grad_norm": 1.1037791967391968, + "learning_rate": 8.59000651032176e-06, + "loss": 0.29, + "step": 13373 + }, + { + "epoch": 0.26772764807446886, + "grad_norm": 0.992099940776825, + "learning_rate": 8.589780857867892e-06, + "loss": 0.2869, + "step": 13374 + }, + { + "epoch": 0.2677476665915972, + "grad_norm": 1.8844480514526367, + "learning_rate": 8.589555190323355e-06, + "loss": 0.818, + "step": 13375 + }, + { + "epoch": 0.26776768510872556, + "grad_norm": 1.2133445739746094, + "learning_rate": 8.5893295076891e-06, + "loss": 0.3788, + "step": 13376 + }, + { + "epoch": 0.2677877036258539, + "grad_norm": 1.099482536315918, + "learning_rate": 8.58910380996608e-06, + "loss": 0.2956, + "step": 13377 + }, + { + "epoch": 0.26780772214298226, + "grad_norm": 1.0237318277359009, + "learning_rate": 8.588878097155238e-06, + "loss": 0.3325, + "step": 13378 + }, + { + "epoch": 0.2678277406601106, + "grad_norm": 1.1517678499221802, + "learning_rate": 8.588652369257523e-06, + "loss": 0.3243, + "step": 13379 + }, + { + "epoch": 0.26784775917723896, + "grad_norm": 1.085566759109497, + "learning_rate": 8.58842662627389e-06, + "loss": 0.3162, + "step": 13380 + }, + { + "epoch": 0.2678677776943673, + "grad_norm": 1.181411623954773, + "learning_rate": 8.58820086820528e-06, + "loss": 0.3714, + "step": 13381 + }, + { + "epoch": 0.2678877962114956, + "grad_norm": 1.0649489164352417, + "learning_rate": 8.587975095052648e-06, + "loss": 0.3709, + "step": 13382 + }, + { + "epoch": 0.26790781472862396, + "grad_norm": 1.0158778429031372, + "learning_rate": 8.58774930681694e-06, + "loss": 0.3248, + "step": 13383 + }, + { + "epoch": 0.2679278332457523, + "grad_norm": 1.2927955389022827, + "learning_rate": 8.587523503499105e-06, + "loss": 0.2991, + "step": 13384 + }, + { + "epoch": 0.26794785176288066, + "grad_norm": 1.1191368103027344, + "learning_rate": 8.587297685100094e-06, + "loss": 0.3393, + "step": 13385 + }, + { + "epoch": 0.267967870280009, + "grad_norm": 1.0368891954421997, + "learning_rate": 8.587071851620856e-06, + "loss": 0.313, + "step": 13386 + }, + { + "epoch": 0.26798788879713736, + "grad_norm": 1.1159605979919434, + "learning_rate": 8.586846003062337e-06, + "loss": 0.3074, + "step": 13387 + }, + { + "epoch": 0.2680079073142657, + "grad_norm": 1.1109328269958496, + "learning_rate": 8.586620139425492e-06, + "loss": 0.3618, + "step": 13388 + }, + { + "epoch": 0.26802792583139406, + "grad_norm": 1.2186319828033447, + "learning_rate": 8.586394260711268e-06, + "loss": 0.2764, + "step": 13389 + }, + { + "epoch": 0.26804794434852236, + "grad_norm": 1.253002643585205, + "learning_rate": 8.586168366920614e-06, + "loss": 0.3438, + "step": 13390 + }, + { + "epoch": 0.2680679628656507, + "grad_norm": 1.2530194520950317, + "learning_rate": 8.58594245805448e-06, + "loss": 0.2708, + "step": 13391 + }, + { + "epoch": 0.26808798138277906, + "grad_norm": 1.0311158895492554, + "learning_rate": 8.585716534113815e-06, + "loss": 0.3151, + "step": 13392 + }, + { + "epoch": 0.2681079998999074, + "grad_norm": 1.1211738586425781, + "learning_rate": 8.58549059509957e-06, + "loss": 0.2971, + "step": 13393 + }, + { + "epoch": 0.26812801841703576, + "grad_norm": 1.071760892868042, + "learning_rate": 8.585264641012693e-06, + "loss": 0.3489, + "step": 13394 + }, + { + "epoch": 0.2681480369341641, + "grad_norm": 1.1690421104431152, + "learning_rate": 8.585038671854137e-06, + "loss": 0.3481, + "step": 13395 + }, + { + "epoch": 0.26816805545129246, + "grad_norm": 1.0781888961791992, + "learning_rate": 8.584812687624849e-06, + "loss": 0.3662, + "step": 13396 + }, + { + "epoch": 0.2681880739684208, + "grad_norm": 1.111655831336975, + "learning_rate": 8.584586688325779e-06, + "loss": 0.3171, + "step": 13397 + }, + { + "epoch": 0.2682080924855491, + "grad_norm": 1.1592686176300049, + "learning_rate": 8.58436067395788e-06, + "loss": 0.3547, + "step": 13398 + }, + { + "epoch": 0.26822811100267746, + "grad_norm": 1.1376941204071045, + "learning_rate": 8.584134644522097e-06, + "loss": 0.3377, + "step": 13399 + }, + { + "epoch": 0.2682481295198058, + "grad_norm": 1.224453091621399, + "learning_rate": 8.583908600019384e-06, + "loss": 0.3243, + "step": 13400 + }, + { + "epoch": 0.26826814803693416, + "grad_norm": 1.1618494987487793, + "learning_rate": 8.583682540450693e-06, + "loss": 0.335, + "step": 13401 + }, + { + "epoch": 0.2682881665540625, + "grad_norm": 0.9862871170043945, + "learning_rate": 8.58345646581697e-06, + "loss": 0.2946, + "step": 13402 + }, + { + "epoch": 0.26830818507119086, + "grad_norm": 1.0950462818145752, + "learning_rate": 8.583230376119168e-06, + "loss": 0.2944, + "step": 13403 + }, + { + "epoch": 0.2683282035883192, + "grad_norm": 1.1786407232284546, + "learning_rate": 8.583004271358235e-06, + "loss": 0.3641, + "step": 13404 + }, + { + "epoch": 0.26834822210544756, + "grad_norm": 1.0996742248535156, + "learning_rate": 8.582778151535124e-06, + "loss": 0.3507, + "step": 13405 + }, + { + "epoch": 0.26836824062257586, + "grad_norm": 1.1758803129196167, + "learning_rate": 8.582552016650786e-06, + "loss": 0.2804, + "step": 13406 + }, + { + "epoch": 0.2683882591397042, + "grad_norm": 1.1199573278427124, + "learning_rate": 8.58232586670617e-06, + "loss": 0.3071, + "step": 13407 + }, + { + "epoch": 0.26840827765683256, + "grad_norm": 1.390017032623291, + "learning_rate": 8.582099701702225e-06, + "loss": 0.3866, + "step": 13408 + }, + { + "epoch": 0.2684282961739609, + "grad_norm": 0.9799193739891052, + "learning_rate": 8.581873521639905e-06, + "loss": 0.2754, + "step": 13409 + }, + { + "epoch": 0.26844831469108926, + "grad_norm": 1.4220613241195679, + "learning_rate": 8.581647326520159e-06, + "loss": 0.3138, + "step": 13410 + }, + { + "epoch": 0.2684683332082176, + "grad_norm": 1.1051626205444336, + "learning_rate": 8.581421116343939e-06, + "loss": 0.3107, + "step": 13411 + }, + { + "epoch": 0.26848835172534596, + "grad_norm": 1.1178892850875854, + "learning_rate": 8.581194891112195e-06, + "loss": 0.3674, + "step": 13412 + }, + { + "epoch": 0.2685083702424743, + "grad_norm": 1.09462571144104, + "learning_rate": 8.58096865082588e-06, + "loss": 0.3231, + "step": 13413 + }, + { + "epoch": 0.2685283887596026, + "grad_norm": 1.1109702587127686, + "learning_rate": 8.580742395485942e-06, + "loss": 0.3061, + "step": 13414 + }, + { + "epoch": 0.26854840727673096, + "grad_norm": 1.1588542461395264, + "learning_rate": 8.580516125093331e-06, + "loss": 0.3266, + "step": 13415 + }, + { + "epoch": 0.2685684257938593, + "grad_norm": 1.106231689453125, + "learning_rate": 8.580289839649005e-06, + "loss": 0.3373, + "step": 13416 + }, + { + "epoch": 0.26858844431098766, + "grad_norm": 1.797897458076477, + "learning_rate": 8.58006353915391e-06, + "loss": 0.8435, + "step": 13417 + }, + { + "epoch": 0.268608462828116, + "grad_norm": 1.0555955171585083, + "learning_rate": 8.579837223608999e-06, + "loss": 0.357, + "step": 13418 + }, + { + "epoch": 0.26862848134524436, + "grad_norm": 1.0247681140899658, + "learning_rate": 8.57961089301522e-06, + "loss": 0.3597, + "step": 13419 + }, + { + "epoch": 0.2686484998623727, + "grad_norm": 1.1653739213943481, + "learning_rate": 8.579384547373528e-06, + "loss": 0.3825, + "step": 13420 + }, + { + "epoch": 0.26866851837950106, + "grad_norm": 1.0536085367202759, + "learning_rate": 8.579158186684875e-06, + "loss": 0.3324, + "step": 13421 + }, + { + "epoch": 0.26868853689662936, + "grad_norm": 1.1935092210769653, + "learning_rate": 8.57893181095021e-06, + "loss": 0.2962, + "step": 13422 + }, + { + "epoch": 0.2687085554137577, + "grad_norm": 1.029174566268921, + "learning_rate": 8.578705420170485e-06, + "loss": 0.2795, + "step": 13423 + }, + { + "epoch": 0.26872857393088606, + "grad_norm": 1.2117677927017212, + "learning_rate": 8.578479014346655e-06, + "loss": 0.3449, + "step": 13424 + }, + { + "epoch": 0.2687485924480144, + "grad_norm": 1.9880813360214233, + "learning_rate": 8.578252593479668e-06, + "loss": 0.8419, + "step": 13425 + }, + { + "epoch": 0.26876861096514276, + "grad_norm": 1.91939115524292, + "learning_rate": 8.578026157570477e-06, + "loss": 0.8113, + "step": 13426 + }, + { + "epoch": 0.2687886294822711, + "grad_norm": 1.0524811744689941, + "learning_rate": 8.577799706620035e-06, + "loss": 0.3215, + "step": 13427 + }, + { + "epoch": 0.26880864799939946, + "grad_norm": 1.0949723720550537, + "learning_rate": 8.57757324062929e-06, + "loss": 0.3331, + "step": 13428 + }, + { + "epoch": 0.2688286665165278, + "grad_norm": 1.1043940782546997, + "learning_rate": 8.5773467595992e-06, + "loss": 0.3333, + "step": 13429 + }, + { + "epoch": 0.2688486850336561, + "grad_norm": 1.9104516506195068, + "learning_rate": 8.577120263530714e-06, + "loss": 0.906, + "step": 13430 + }, + { + "epoch": 0.26886870355078446, + "grad_norm": 1.0913105010986328, + "learning_rate": 8.576893752424785e-06, + "loss": 0.3697, + "step": 13431 + }, + { + "epoch": 0.2688887220679128, + "grad_norm": 1.3172482252120972, + "learning_rate": 8.576667226282362e-06, + "loss": 0.334, + "step": 13432 + }, + { + "epoch": 0.26890874058504116, + "grad_norm": 1.1452513933181763, + "learning_rate": 8.576440685104401e-06, + "loss": 0.3736, + "step": 13433 + }, + { + "epoch": 0.2689287591021695, + "grad_norm": 2.016022205352783, + "learning_rate": 8.576214128891853e-06, + "loss": 0.8411, + "step": 13434 + }, + { + "epoch": 0.26894877761929786, + "grad_norm": 1.0409185886383057, + "learning_rate": 8.575987557645669e-06, + "loss": 0.3273, + "step": 13435 + }, + { + "epoch": 0.2689687961364262, + "grad_norm": 1.234182596206665, + "learning_rate": 8.575760971366805e-06, + "loss": 0.3249, + "step": 13436 + }, + { + "epoch": 0.26898881465355456, + "grad_norm": 1.1574212312698364, + "learning_rate": 8.57553437005621e-06, + "loss": 0.3626, + "step": 13437 + }, + { + "epoch": 0.26900883317068286, + "grad_norm": 1.1564936637878418, + "learning_rate": 8.57530775371484e-06, + "loss": 0.3144, + "step": 13438 + }, + { + "epoch": 0.2690288516878112, + "grad_norm": 1.2021207809448242, + "learning_rate": 8.575081122343643e-06, + "loss": 0.3364, + "step": 13439 + }, + { + "epoch": 0.26904887020493956, + "grad_norm": 1.245468258857727, + "learning_rate": 8.574854475943575e-06, + "loss": 0.2597, + "step": 13440 + }, + { + "epoch": 0.2690688887220679, + "grad_norm": 1.1399811506271362, + "learning_rate": 8.574627814515588e-06, + "loss": 0.3152, + "step": 13441 + }, + { + "epoch": 0.26908890723919626, + "grad_norm": 1.1120485067367554, + "learning_rate": 8.574401138060636e-06, + "loss": 0.3338, + "step": 13442 + }, + { + "epoch": 0.2691089257563246, + "grad_norm": 1.1989490985870361, + "learning_rate": 8.57417444657967e-06, + "loss": 0.3637, + "step": 13443 + }, + { + "epoch": 0.26912894427345296, + "grad_norm": 1.7720110416412354, + "learning_rate": 8.573947740073643e-06, + "loss": 0.835, + "step": 13444 + }, + { + "epoch": 0.2691489627905813, + "grad_norm": 1.1180702447891235, + "learning_rate": 8.573721018543511e-06, + "loss": 0.3247, + "step": 13445 + }, + { + "epoch": 0.2691689813077096, + "grad_norm": 1.198072075843811, + "learning_rate": 8.573494281990223e-06, + "loss": 0.3191, + "step": 13446 + }, + { + "epoch": 0.26918899982483796, + "grad_norm": 1.031134009361267, + "learning_rate": 8.573267530414735e-06, + "loss": 0.3459, + "step": 13447 + }, + { + "epoch": 0.2692090183419663, + "grad_norm": 1.1764962673187256, + "learning_rate": 8.573040763818e-06, + "loss": 0.3252, + "step": 13448 + }, + { + "epoch": 0.26922903685909466, + "grad_norm": 1.0440611839294434, + "learning_rate": 8.572813982200968e-06, + "loss": 0.3367, + "step": 13449 + }, + { + "epoch": 0.269249055376223, + "grad_norm": 1.086463212966919, + "learning_rate": 8.572587185564598e-06, + "loss": 0.3189, + "step": 13450 + }, + { + "epoch": 0.26926907389335136, + "grad_norm": 1.0386302471160889, + "learning_rate": 8.572360373909837e-06, + "loss": 0.3277, + "step": 13451 + }, + { + "epoch": 0.2692890924104797, + "grad_norm": 1.923858880996704, + "learning_rate": 8.572133547237644e-06, + "loss": 0.9454, + "step": 13452 + }, + { + "epoch": 0.26930911092760806, + "grad_norm": 1.1629225015640259, + "learning_rate": 8.57190670554897e-06, + "loss": 0.3753, + "step": 13453 + }, + { + "epoch": 0.26932912944473636, + "grad_norm": 2.033989906311035, + "learning_rate": 8.571679848844768e-06, + "loss": 0.8803, + "step": 13454 + }, + { + "epoch": 0.2693491479618647, + "grad_norm": 1.0215178728103638, + "learning_rate": 8.571452977125993e-06, + "loss": 0.295, + "step": 13455 + }, + { + "epoch": 0.26936916647899306, + "grad_norm": 1.18558669090271, + "learning_rate": 8.571226090393599e-06, + "loss": 0.3336, + "step": 13456 + }, + { + "epoch": 0.2693891849961214, + "grad_norm": 1.0345532894134521, + "learning_rate": 8.570999188648539e-06, + "loss": 0.3478, + "step": 13457 + }, + { + "epoch": 0.26940920351324976, + "grad_norm": 1.0936000347137451, + "learning_rate": 8.570772271891768e-06, + "loss": 0.2861, + "step": 13458 + }, + { + "epoch": 0.2694292220303781, + "grad_norm": 1.020268201828003, + "learning_rate": 8.570545340124237e-06, + "loss": 0.3008, + "step": 13459 + }, + { + "epoch": 0.26944924054750646, + "grad_norm": 1.2145520448684692, + "learning_rate": 8.5703183933469e-06, + "loss": 0.3467, + "step": 13460 + }, + { + "epoch": 0.2694692590646348, + "grad_norm": 1.0217005014419556, + "learning_rate": 8.570091431560717e-06, + "loss": 0.3506, + "step": 13461 + }, + { + "epoch": 0.2694892775817631, + "grad_norm": 1.22589111328125, + "learning_rate": 8.569864454766635e-06, + "loss": 0.2979, + "step": 13462 + }, + { + "epoch": 0.26950929609889146, + "grad_norm": 1.1265735626220703, + "learning_rate": 8.569637462965611e-06, + "loss": 0.297, + "step": 13463 + }, + { + "epoch": 0.2695293146160198, + "grad_norm": 1.085341215133667, + "learning_rate": 8.5694104561586e-06, + "loss": 0.327, + "step": 13464 + }, + { + "epoch": 0.26954933313314816, + "grad_norm": 1.0927201509475708, + "learning_rate": 8.569183434346557e-06, + "loss": 0.3409, + "step": 13465 + }, + { + "epoch": 0.2695693516502765, + "grad_norm": 1.0295039415359497, + "learning_rate": 8.568956397530433e-06, + "loss": 0.3167, + "step": 13466 + }, + { + "epoch": 0.26958937016740486, + "grad_norm": 1.1326829195022583, + "learning_rate": 8.568729345711186e-06, + "loss": 0.3264, + "step": 13467 + }, + { + "epoch": 0.2696093886845332, + "grad_norm": 1.7943074703216553, + "learning_rate": 8.568502278889767e-06, + "loss": 0.7571, + "step": 13468 + }, + { + "epoch": 0.26962940720166156, + "grad_norm": 1.1037060022354126, + "learning_rate": 8.568275197067134e-06, + "loss": 0.3518, + "step": 13469 + }, + { + "epoch": 0.26964942571878986, + "grad_norm": 1.0435909032821655, + "learning_rate": 8.56804810024424e-06, + "loss": 0.341, + "step": 13470 + }, + { + "epoch": 0.2696694442359182, + "grad_norm": 1.056439995765686, + "learning_rate": 8.567820988422037e-06, + "loss": 0.3478, + "step": 13471 + }, + { + "epoch": 0.26968946275304656, + "grad_norm": 1.1737570762634277, + "learning_rate": 8.567593861601485e-06, + "loss": 0.3469, + "step": 13472 + }, + { + "epoch": 0.2697094812701749, + "grad_norm": 1.1120883226394653, + "learning_rate": 8.567366719783537e-06, + "loss": 0.2873, + "step": 13473 + }, + { + "epoch": 0.26972949978730326, + "grad_norm": 1.125952124595642, + "learning_rate": 8.567139562969145e-06, + "loss": 0.366, + "step": 13474 + }, + { + "epoch": 0.2697495183044316, + "grad_norm": 1.0982784032821655, + "learning_rate": 8.566912391159265e-06, + "loss": 0.331, + "step": 13475 + }, + { + "epoch": 0.26976953682155996, + "grad_norm": 1.1270884275436401, + "learning_rate": 8.566685204354855e-06, + "loss": 0.3603, + "step": 13476 + }, + { + "epoch": 0.2697895553386883, + "grad_norm": 1.195778727531433, + "learning_rate": 8.566458002556867e-06, + "loss": 0.3641, + "step": 13477 + }, + { + "epoch": 0.2698095738558166, + "grad_norm": 1.083787202835083, + "learning_rate": 8.566230785766258e-06, + "loss": 0.3373, + "step": 13478 + }, + { + "epoch": 0.26982959237294496, + "grad_norm": 1.1302448511123657, + "learning_rate": 8.566003553983982e-06, + "loss": 0.3195, + "step": 13479 + }, + { + "epoch": 0.2698496108900733, + "grad_norm": 1.8690546751022339, + "learning_rate": 8.565776307210994e-06, + "loss": 0.8088, + "step": 13480 + }, + { + "epoch": 0.26986962940720166, + "grad_norm": 1.0490244626998901, + "learning_rate": 8.56554904544825e-06, + "loss": 0.324, + "step": 13481 + }, + { + "epoch": 0.26988964792433, + "grad_norm": 1.067278504371643, + "learning_rate": 8.565321768696704e-06, + "loss": 0.2932, + "step": 13482 + }, + { + "epoch": 0.26990966644145836, + "grad_norm": 1.003524899482727, + "learning_rate": 8.565094476957313e-06, + "loss": 0.2702, + "step": 13483 + }, + { + "epoch": 0.2699296849585867, + "grad_norm": 1.1015055179595947, + "learning_rate": 8.564867170231033e-06, + "loss": 0.3462, + "step": 13484 + }, + { + "epoch": 0.26994970347571506, + "grad_norm": 1.101934790611267, + "learning_rate": 8.564639848518817e-06, + "loss": 0.329, + "step": 13485 + }, + { + "epoch": 0.26996972199284336, + "grad_norm": 1.0585483312606812, + "learning_rate": 8.564412511821625e-06, + "loss": 0.3042, + "step": 13486 + }, + { + "epoch": 0.2699897405099717, + "grad_norm": 1.169259786605835, + "learning_rate": 8.564185160140407e-06, + "loss": 0.2849, + "step": 13487 + }, + { + "epoch": 0.27000975902710006, + "grad_norm": 1.1312053203582764, + "learning_rate": 8.563957793476123e-06, + "loss": 0.3485, + "step": 13488 + }, + { + "epoch": 0.2700297775442284, + "grad_norm": 1.1574838161468506, + "learning_rate": 8.563730411829727e-06, + "loss": 0.3531, + "step": 13489 + }, + { + "epoch": 0.27004979606135676, + "grad_norm": 1.3670214414596558, + "learning_rate": 8.563503015202176e-06, + "loss": 0.3728, + "step": 13490 + }, + { + "epoch": 0.2700698145784851, + "grad_norm": 1.0240073204040527, + "learning_rate": 8.563275603594424e-06, + "loss": 0.3168, + "step": 13491 + }, + { + "epoch": 0.27008983309561346, + "grad_norm": 1.0710023641586304, + "learning_rate": 8.56304817700743e-06, + "loss": 0.3325, + "step": 13492 + }, + { + "epoch": 0.2701098516127418, + "grad_norm": 1.059984564781189, + "learning_rate": 8.562820735442145e-06, + "loss": 0.3389, + "step": 13493 + }, + { + "epoch": 0.2701298701298701, + "grad_norm": 1.123412013053894, + "learning_rate": 8.56259327889953e-06, + "loss": 0.3102, + "step": 13494 + }, + { + "epoch": 0.27014988864699846, + "grad_norm": 1.0344915390014648, + "learning_rate": 8.562365807380542e-06, + "loss": 0.2862, + "step": 13495 + }, + { + "epoch": 0.2701699071641268, + "grad_norm": 1.1805379390716553, + "learning_rate": 8.562138320886133e-06, + "loss": 0.3482, + "step": 13496 + }, + { + "epoch": 0.27018992568125516, + "grad_norm": 1.0724577903747559, + "learning_rate": 8.56191081941726e-06, + "loss": 0.3082, + "step": 13497 + }, + { + "epoch": 0.2702099441983835, + "grad_norm": 1.1816271543502808, + "learning_rate": 8.561683302974881e-06, + "loss": 0.2767, + "step": 13498 + }, + { + "epoch": 0.27022996271551186, + "grad_norm": 1.379910945892334, + "learning_rate": 8.561455771559953e-06, + "loss": 0.3126, + "step": 13499 + }, + { + "epoch": 0.2702499812326402, + "grad_norm": 1.0842161178588867, + "learning_rate": 8.56122822517343e-06, + "loss": 0.3548, + "step": 13500 + }, + { + "epoch": 0.27026999974976856, + "grad_norm": 1.8388115167617798, + "learning_rate": 8.56100066381627e-06, + "loss": 0.8721, + "step": 13501 + }, + { + "epoch": 0.27029001826689686, + "grad_norm": 1.8704451322555542, + "learning_rate": 8.56077308748943e-06, + "loss": 0.8168, + "step": 13502 + }, + { + "epoch": 0.2703100367840252, + "grad_norm": 1.1370139122009277, + "learning_rate": 8.560545496193866e-06, + "loss": 0.3318, + "step": 13503 + }, + { + "epoch": 0.27033005530115356, + "grad_norm": 1.7553354501724243, + "learning_rate": 8.560317889930536e-06, + "loss": 0.7803, + "step": 13504 + }, + { + "epoch": 0.2703500738182819, + "grad_norm": 1.3036688566207886, + "learning_rate": 8.560090268700395e-06, + "loss": 0.3262, + "step": 13505 + }, + { + "epoch": 0.27037009233541026, + "grad_norm": 1.1887835264205933, + "learning_rate": 8.559862632504401e-06, + "loss": 0.2989, + "step": 13506 + }, + { + "epoch": 0.2703901108525386, + "grad_norm": 1.0487587451934814, + "learning_rate": 8.559634981343509e-06, + "loss": 0.2843, + "step": 13507 + }, + { + "epoch": 0.27041012936966696, + "grad_norm": 1.9040117263793945, + "learning_rate": 8.55940731521868e-06, + "loss": 0.7685, + "step": 13508 + }, + { + "epoch": 0.27043014788679526, + "grad_norm": 1.2457834482192993, + "learning_rate": 8.559179634130867e-06, + "loss": 0.3567, + "step": 13509 + }, + { + "epoch": 0.2704501664039236, + "grad_norm": 0.9992903470993042, + "learning_rate": 8.558951938081029e-06, + "loss": 0.3558, + "step": 13510 + }, + { + "epoch": 0.27047018492105196, + "grad_norm": 1.1000807285308838, + "learning_rate": 8.558724227070123e-06, + "loss": 0.3144, + "step": 13511 + }, + { + "epoch": 0.2704902034381803, + "grad_norm": 1.0736339092254639, + "learning_rate": 8.558496501099106e-06, + "loss": 0.2936, + "step": 13512 + }, + { + "epoch": 0.27051022195530866, + "grad_norm": 1.055569052696228, + "learning_rate": 8.558268760168937e-06, + "loss": 0.3057, + "step": 13513 + }, + { + "epoch": 0.270530240472437, + "grad_norm": 1.9323097467422485, + "learning_rate": 8.558041004280571e-06, + "loss": 0.8329, + "step": 13514 + }, + { + "epoch": 0.27055025898956536, + "grad_norm": 2.03779935836792, + "learning_rate": 8.557813233434965e-06, + "loss": 0.7645, + "step": 13515 + }, + { + "epoch": 0.2705702775066937, + "grad_norm": 1.093073844909668, + "learning_rate": 8.557585447633079e-06, + "loss": 0.2802, + "step": 13516 + }, + { + "epoch": 0.270590296023822, + "grad_norm": 1.1043506860733032, + "learning_rate": 8.55735764687587e-06, + "loss": 0.3648, + "step": 13517 + }, + { + "epoch": 0.27061031454095036, + "grad_norm": 1.3072019815444946, + "learning_rate": 8.557129831164294e-06, + "loss": 0.3316, + "step": 13518 + }, + { + "epoch": 0.2706303330580787, + "grad_norm": 1.0617480278015137, + "learning_rate": 8.55690200049931e-06, + "loss": 0.3288, + "step": 13519 + }, + { + "epoch": 0.27065035157520706, + "grad_norm": 1.0752067565917969, + "learning_rate": 8.556674154881875e-06, + "loss": 0.3092, + "step": 13520 + }, + { + "epoch": 0.2706703700923354, + "grad_norm": 1.2745434045791626, + "learning_rate": 8.556446294312949e-06, + "loss": 0.3026, + "step": 13521 + }, + { + "epoch": 0.27069038860946376, + "grad_norm": 1.1037365198135376, + "learning_rate": 8.556218418793484e-06, + "loss": 0.3183, + "step": 13522 + }, + { + "epoch": 0.2707104071265921, + "grad_norm": 1.2264835834503174, + "learning_rate": 8.555990528324446e-06, + "loss": 0.3034, + "step": 13523 + }, + { + "epoch": 0.27073042564372046, + "grad_norm": 1.0231821537017822, + "learning_rate": 8.555762622906787e-06, + "loss": 0.3328, + "step": 13524 + }, + { + "epoch": 0.27075044416084876, + "grad_norm": 1.122289776802063, + "learning_rate": 8.555534702541469e-06, + "loss": 0.2843, + "step": 13525 + }, + { + "epoch": 0.2707704626779771, + "grad_norm": 1.2047228813171387, + "learning_rate": 8.555306767229447e-06, + "loss": 0.2738, + "step": 13526 + }, + { + "epoch": 0.27079048119510546, + "grad_norm": 1.0118401050567627, + "learning_rate": 8.555078816971682e-06, + "loss": 0.326, + "step": 13527 + }, + { + "epoch": 0.2708104997122338, + "grad_norm": 1.1862101554870605, + "learning_rate": 8.554850851769129e-06, + "loss": 0.2786, + "step": 13528 + }, + { + "epoch": 0.27083051822936216, + "grad_norm": 1.793862223625183, + "learning_rate": 8.554622871622749e-06, + "loss": 0.8163, + "step": 13529 + }, + { + "epoch": 0.2708505367464905, + "grad_norm": 1.1965616941452026, + "learning_rate": 8.554394876533498e-06, + "loss": 0.3512, + "step": 13530 + }, + { + "epoch": 0.27087055526361886, + "grad_norm": 1.199446678161621, + "learning_rate": 8.554166866502337e-06, + "loss": 0.3345, + "step": 13531 + }, + { + "epoch": 0.2708905737807472, + "grad_norm": 1.250293254852295, + "learning_rate": 8.553938841530223e-06, + "loss": 0.3454, + "step": 13532 + }, + { + "epoch": 0.2709105922978755, + "grad_norm": 1.202654242515564, + "learning_rate": 8.553710801618115e-06, + "loss": 0.3437, + "step": 13533 + }, + { + "epoch": 0.27093061081500386, + "grad_norm": 2.0661873817443848, + "learning_rate": 8.553482746766972e-06, + "loss": 0.8125, + "step": 13534 + }, + { + "epoch": 0.2709506293321322, + "grad_norm": 1.184792399406433, + "learning_rate": 8.553254676977752e-06, + "loss": 0.4064, + "step": 13535 + }, + { + "epoch": 0.27097064784926056, + "grad_norm": 1.0996787548065186, + "learning_rate": 8.553026592251414e-06, + "loss": 0.315, + "step": 13536 + }, + { + "epoch": 0.2709906663663889, + "grad_norm": 1.967686414718628, + "learning_rate": 8.552798492588915e-06, + "loss": 0.7975, + "step": 13537 + }, + { + "epoch": 0.27101068488351726, + "grad_norm": 1.061185359954834, + "learning_rate": 8.552570377991219e-06, + "loss": 0.343, + "step": 13538 + }, + { + "epoch": 0.2710307034006456, + "grad_norm": 1.1428605318069458, + "learning_rate": 8.55234224845928e-06, + "loss": 0.3166, + "step": 13539 + }, + { + "epoch": 0.27105072191777396, + "grad_norm": 1.2410244941711426, + "learning_rate": 8.552114103994059e-06, + "loss": 0.3478, + "step": 13540 + }, + { + "epoch": 0.27107074043490226, + "grad_norm": 1.072927474975586, + "learning_rate": 8.551885944596514e-06, + "loss": 0.3207, + "step": 13541 + }, + { + "epoch": 0.2710907589520306, + "grad_norm": 1.8402124643325806, + "learning_rate": 8.551657770267605e-06, + "loss": 0.3095, + "step": 13542 + }, + { + "epoch": 0.27111077746915896, + "grad_norm": 1.2585206031799316, + "learning_rate": 8.551429581008293e-06, + "loss": 0.2999, + "step": 13543 + }, + { + "epoch": 0.2711307959862873, + "grad_norm": 1.1513595581054688, + "learning_rate": 8.551201376819532e-06, + "loss": 0.3508, + "step": 13544 + }, + { + "epoch": 0.27115081450341566, + "grad_norm": 2.0175259113311768, + "learning_rate": 8.550973157702288e-06, + "loss": 0.887, + "step": 13545 + }, + { + "epoch": 0.271170833020544, + "grad_norm": 1.1223714351654053, + "learning_rate": 8.550744923657514e-06, + "loss": 0.3325, + "step": 13546 + }, + { + "epoch": 0.27119085153767236, + "grad_norm": 1.0742509365081787, + "learning_rate": 8.550516674686174e-06, + "loss": 0.2851, + "step": 13547 + }, + { + "epoch": 0.2712108700548007, + "grad_norm": 1.1855798959732056, + "learning_rate": 8.550288410789227e-06, + "loss": 0.3146, + "step": 13548 + }, + { + "epoch": 0.271230888571929, + "grad_norm": 1.1176413297653198, + "learning_rate": 8.550060131967628e-06, + "loss": 0.285, + "step": 13549 + }, + { + "epoch": 0.27125090708905736, + "grad_norm": 1.0100305080413818, + "learning_rate": 8.549831838222345e-06, + "loss": 0.2893, + "step": 13550 + }, + { + "epoch": 0.2712709256061857, + "grad_norm": 1.1467640399932861, + "learning_rate": 8.54960352955433e-06, + "loss": 0.3369, + "step": 13551 + }, + { + "epoch": 0.27129094412331406, + "grad_norm": 1.0536710023880005, + "learning_rate": 8.549375205964545e-06, + "loss": 0.3178, + "step": 13552 + }, + { + "epoch": 0.2713109626404424, + "grad_norm": 1.010048747062683, + "learning_rate": 8.549146867453953e-06, + "loss": 0.3386, + "step": 13553 + }, + { + "epoch": 0.27133098115757076, + "grad_norm": 0.991228461265564, + "learning_rate": 8.548918514023509e-06, + "loss": 0.2942, + "step": 13554 + }, + { + "epoch": 0.2713509996746991, + "grad_norm": 1.0243009328842163, + "learning_rate": 8.548690145674177e-06, + "loss": 0.2689, + "step": 13555 + }, + { + "epoch": 0.27137101819182746, + "grad_norm": 1.8329941034317017, + "learning_rate": 8.548461762406915e-06, + "loss": 0.8325, + "step": 13556 + }, + { + "epoch": 0.27139103670895576, + "grad_norm": 1.2836285829544067, + "learning_rate": 8.54823336422268e-06, + "loss": 0.3538, + "step": 13557 + }, + { + "epoch": 0.2714110552260841, + "grad_norm": 1.1719863414764404, + "learning_rate": 8.548004951122439e-06, + "loss": 0.312, + "step": 13558 + }, + { + "epoch": 0.27143107374321246, + "grad_norm": 1.0985193252563477, + "learning_rate": 8.547776523107149e-06, + "loss": 0.3481, + "step": 13559 + }, + { + "epoch": 0.2714510922603408, + "grad_norm": 1.0640283823013306, + "learning_rate": 8.54754808017777e-06, + "loss": 0.3073, + "step": 13560 + }, + { + "epoch": 0.27147111077746916, + "grad_norm": 1.0624289512634277, + "learning_rate": 8.547319622335261e-06, + "loss": 0.3006, + "step": 13561 + }, + { + "epoch": 0.2714911292945975, + "grad_norm": 1.1062018871307373, + "learning_rate": 8.547091149580584e-06, + "loss": 0.3186, + "step": 13562 + }, + { + "epoch": 0.27151114781172586, + "grad_norm": 1.1222699880599976, + "learning_rate": 8.546862661914698e-06, + "loss": 0.317, + "step": 13563 + }, + { + "epoch": 0.2715311663288542, + "grad_norm": 1.8366297483444214, + "learning_rate": 8.546634159338568e-06, + "loss": 0.8774, + "step": 13564 + }, + { + "epoch": 0.2715511848459825, + "grad_norm": 1.8431497812271118, + "learning_rate": 8.546405641853148e-06, + "loss": 0.8133, + "step": 13565 + }, + { + "epoch": 0.27157120336311086, + "grad_norm": 1.115019679069519, + "learning_rate": 8.546177109459403e-06, + "loss": 0.3465, + "step": 13566 + }, + { + "epoch": 0.2715912218802392, + "grad_norm": 1.7181665897369385, + "learning_rate": 8.545948562158293e-06, + "loss": 0.8278, + "step": 13567 + }, + { + "epoch": 0.27161124039736756, + "grad_norm": 1.1784698963165283, + "learning_rate": 8.545719999950779e-06, + "loss": 0.2838, + "step": 13568 + }, + { + "epoch": 0.2716312589144959, + "grad_norm": 1.2142927646636963, + "learning_rate": 8.545491422837819e-06, + "loss": 0.3093, + "step": 13569 + }, + { + "epoch": 0.27165127743162426, + "grad_norm": 1.1716779470443726, + "learning_rate": 8.545262830820378e-06, + "loss": 0.3456, + "step": 13570 + }, + { + "epoch": 0.2716712959487526, + "grad_norm": 1.053177833557129, + "learning_rate": 8.545034223899414e-06, + "loss": 0.2688, + "step": 13571 + }, + { + "epoch": 0.27169131446588096, + "grad_norm": 1.9340286254882812, + "learning_rate": 8.54480560207589e-06, + "loss": 0.8131, + "step": 13572 + }, + { + "epoch": 0.27171133298300926, + "grad_norm": 1.7640066146850586, + "learning_rate": 8.544576965350764e-06, + "loss": 0.777, + "step": 13573 + }, + { + "epoch": 0.2717313515001376, + "grad_norm": 1.1196141242980957, + "learning_rate": 8.544348313725002e-06, + "loss": 0.2977, + "step": 13574 + }, + { + "epoch": 0.27175137001726596, + "grad_norm": 1.1470303535461426, + "learning_rate": 8.54411964719956e-06, + "loss": 0.2844, + "step": 13575 + }, + { + "epoch": 0.2717713885343943, + "grad_norm": 1.1566343307495117, + "learning_rate": 8.543890965775403e-06, + "loss": 0.3079, + "step": 13576 + }, + { + "epoch": 0.27179140705152266, + "grad_norm": 1.0260236263275146, + "learning_rate": 8.54366226945349e-06, + "loss": 0.3387, + "step": 13577 + }, + { + "epoch": 0.271811425568651, + "grad_norm": 1.1058440208435059, + "learning_rate": 8.543433558234784e-06, + "loss": 0.3559, + "step": 13578 + }, + { + "epoch": 0.27183144408577936, + "grad_norm": 1.1972287893295288, + "learning_rate": 8.543204832120247e-06, + "loss": 0.3726, + "step": 13579 + }, + { + "epoch": 0.2718514626029077, + "grad_norm": 1.2466721534729004, + "learning_rate": 8.542976091110838e-06, + "loss": 0.3258, + "step": 13580 + }, + { + "epoch": 0.271871481120036, + "grad_norm": 1.049887776374817, + "learning_rate": 8.542747335207521e-06, + "loss": 0.3026, + "step": 13581 + }, + { + "epoch": 0.27189149963716436, + "grad_norm": 1.232101559638977, + "learning_rate": 8.542518564411256e-06, + "loss": 0.3621, + "step": 13582 + }, + { + "epoch": 0.2719115181542927, + "grad_norm": 1.161676287651062, + "learning_rate": 8.542289778723006e-06, + "loss": 0.3249, + "step": 13583 + }, + { + "epoch": 0.27193153667142106, + "grad_norm": 1.1853405237197876, + "learning_rate": 8.542060978143732e-06, + "loss": 0.3298, + "step": 13584 + }, + { + "epoch": 0.2719515551885494, + "grad_norm": 1.1163647174835205, + "learning_rate": 8.541832162674395e-06, + "loss": 0.3555, + "step": 13585 + }, + { + "epoch": 0.27197157370567776, + "grad_norm": 1.075483798980713, + "learning_rate": 8.541603332315958e-06, + "loss": 0.3128, + "step": 13586 + }, + { + "epoch": 0.2719915922228061, + "grad_norm": 1.0143396854400635, + "learning_rate": 8.541374487069383e-06, + "loss": 0.3074, + "step": 13587 + }, + { + "epoch": 0.27201161073993446, + "grad_norm": 1.0139247179031372, + "learning_rate": 8.541145626935632e-06, + "loss": 0.322, + "step": 13588 + }, + { + "epoch": 0.27203162925706276, + "grad_norm": 1.0652379989624023, + "learning_rate": 8.540916751915666e-06, + "loss": 0.2983, + "step": 13589 + }, + { + "epoch": 0.2720516477741911, + "grad_norm": 1.19675612449646, + "learning_rate": 8.540687862010449e-06, + "loss": 0.3075, + "step": 13590 + }, + { + "epoch": 0.27207166629131946, + "grad_norm": 1.2172271013259888, + "learning_rate": 8.54045895722094e-06, + "loss": 0.3892, + "step": 13591 + }, + { + "epoch": 0.2720916848084478, + "grad_norm": 0.9996619820594788, + "learning_rate": 8.540230037548105e-06, + "loss": 0.3163, + "step": 13592 + }, + { + "epoch": 0.27211170332557616, + "grad_norm": 1.2411541938781738, + "learning_rate": 8.540001102992905e-06, + "loss": 0.2851, + "step": 13593 + }, + { + "epoch": 0.2721317218427045, + "grad_norm": 1.0299912691116333, + "learning_rate": 8.539772153556303e-06, + "loss": 0.324, + "step": 13594 + }, + { + "epoch": 0.27215174035983286, + "grad_norm": 1.3862121105194092, + "learning_rate": 8.539543189239259e-06, + "loss": 0.3261, + "step": 13595 + }, + { + "epoch": 0.2721717588769612, + "grad_norm": 1.090458631515503, + "learning_rate": 8.539314210042739e-06, + "loss": 0.2834, + "step": 13596 + }, + { + "epoch": 0.2721917773940895, + "grad_norm": 1.0582306385040283, + "learning_rate": 8.539085215967702e-06, + "loss": 0.3172, + "step": 13597 + }, + { + "epoch": 0.27221179591121786, + "grad_norm": 1.1644145250320435, + "learning_rate": 8.538856207015113e-06, + "loss": 0.3278, + "step": 13598 + }, + { + "epoch": 0.2722318144283462, + "grad_norm": 1.0932930707931519, + "learning_rate": 8.538627183185934e-06, + "loss": 0.3333, + "step": 13599 + }, + { + "epoch": 0.27225183294547456, + "grad_norm": 1.145440697669983, + "learning_rate": 8.538398144481128e-06, + "loss": 0.2916, + "step": 13600 + }, + { + "epoch": 0.2722718514626029, + "grad_norm": 1.1739997863769531, + "learning_rate": 8.538169090901657e-06, + "loss": 0.2981, + "step": 13601 + }, + { + "epoch": 0.27229186997973126, + "grad_norm": 1.0405864715576172, + "learning_rate": 8.537940022448485e-06, + "loss": 0.3014, + "step": 13602 + }, + { + "epoch": 0.2723118884968596, + "grad_norm": 1.0708032846450806, + "learning_rate": 8.537710939122576e-06, + "loss": 0.3475, + "step": 13603 + }, + { + "epoch": 0.27233190701398796, + "grad_norm": 1.1006866693496704, + "learning_rate": 8.53748184092489e-06, + "loss": 0.3044, + "step": 13604 + }, + { + "epoch": 0.27235192553111626, + "grad_norm": 1.1286427974700928, + "learning_rate": 8.537252727856392e-06, + "loss": 0.3742, + "step": 13605 + }, + { + "epoch": 0.2723719440482446, + "grad_norm": 0.9672775268554688, + "learning_rate": 8.537023599918043e-06, + "loss": 0.3085, + "step": 13606 + }, + { + "epoch": 0.27239196256537296, + "grad_norm": 1.8766295909881592, + "learning_rate": 8.536794457110811e-06, + "loss": 0.8172, + "step": 13607 + }, + { + "epoch": 0.2724119810825013, + "grad_norm": 1.085064172744751, + "learning_rate": 8.536565299435655e-06, + "loss": 0.3461, + "step": 13608 + }, + { + "epoch": 0.27243199959962966, + "grad_norm": 1.0809426307678223, + "learning_rate": 8.536336126893538e-06, + "loss": 0.3161, + "step": 13609 + }, + { + "epoch": 0.272452018116758, + "grad_norm": 1.121720314025879, + "learning_rate": 8.536106939485427e-06, + "loss": 0.2985, + "step": 13610 + }, + { + "epoch": 0.27247203663388636, + "grad_norm": 1.804560899734497, + "learning_rate": 8.535877737212284e-06, + "loss": 0.746, + "step": 13611 + }, + { + "epoch": 0.2724920551510147, + "grad_norm": 1.1428302526474, + "learning_rate": 8.53564852007507e-06, + "loss": 0.3488, + "step": 13612 + }, + { + "epoch": 0.272512073668143, + "grad_norm": 1.1916580200195312, + "learning_rate": 8.535419288074752e-06, + "loss": 0.3877, + "step": 13613 + }, + { + "epoch": 0.27253209218527136, + "grad_norm": 1.1906523704528809, + "learning_rate": 8.53519004121229e-06, + "loss": 0.3413, + "step": 13614 + }, + { + "epoch": 0.2725521107023997, + "grad_norm": 1.8206251859664917, + "learning_rate": 8.53496077948865e-06, + "loss": 0.8574, + "step": 13615 + }, + { + "epoch": 0.27257212921952806, + "grad_norm": 1.087786078453064, + "learning_rate": 8.534731502904799e-06, + "loss": 0.3238, + "step": 13616 + }, + { + "epoch": 0.2725921477366564, + "grad_norm": 1.1049484014511108, + "learning_rate": 8.534502211461695e-06, + "loss": 0.3299, + "step": 13617 + }, + { + "epoch": 0.27261216625378476, + "grad_norm": 1.0659332275390625, + "learning_rate": 8.534272905160303e-06, + "loss": 0.2979, + "step": 13618 + }, + { + "epoch": 0.2726321847709131, + "grad_norm": 1.1229716539382935, + "learning_rate": 8.53404358400159e-06, + "loss": 0.3615, + "step": 13619 + }, + { + "epoch": 0.27265220328804146, + "grad_norm": 1.0271055698394775, + "learning_rate": 8.533814247986518e-06, + "loss": 0.2987, + "step": 13620 + }, + { + "epoch": 0.27267222180516976, + "grad_norm": 0.9983222484588623, + "learning_rate": 8.533584897116052e-06, + "loss": 0.2822, + "step": 13621 + }, + { + "epoch": 0.2726922403222981, + "grad_norm": 1.1233534812927246, + "learning_rate": 8.533355531391155e-06, + "loss": 0.3277, + "step": 13622 + }, + { + "epoch": 0.27271225883942646, + "grad_norm": 1.117042899131775, + "learning_rate": 8.533126150812792e-06, + "loss": 0.2939, + "step": 13623 + }, + { + "epoch": 0.2727322773565548, + "grad_norm": 1.0870184898376465, + "learning_rate": 8.532896755381927e-06, + "loss": 0.324, + "step": 13624 + }, + { + "epoch": 0.27275229587368316, + "grad_norm": 1.0884287357330322, + "learning_rate": 8.532667345099524e-06, + "loss": 0.4103, + "step": 13625 + }, + { + "epoch": 0.2727723143908115, + "grad_norm": 1.1313966512680054, + "learning_rate": 8.532437919966547e-06, + "loss": 0.3273, + "step": 13626 + }, + { + "epoch": 0.27279233290793986, + "grad_norm": 1.0889776945114136, + "learning_rate": 8.532208479983964e-06, + "loss": 0.3018, + "step": 13627 + }, + { + "epoch": 0.2728123514250682, + "grad_norm": 1.1635074615478516, + "learning_rate": 8.531979025152733e-06, + "loss": 0.3751, + "step": 13628 + }, + { + "epoch": 0.2728323699421965, + "grad_norm": 1.0750455856323242, + "learning_rate": 8.531749555473825e-06, + "loss": 0.304, + "step": 13629 + }, + { + "epoch": 0.27285238845932486, + "grad_norm": 1.2373628616333008, + "learning_rate": 8.5315200709482e-06, + "loss": 0.3985, + "step": 13630 + }, + { + "epoch": 0.2728724069764532, + "grad_norm": 1.336333990097046, + "learning_rate": 8.531290571576825e-06, + "loss": 0.3117, + "step": 13631 + }, + { + "epoch": 0.27289242549358156, + "grad_norm": 0.9325637817382812, + "learning_rate": 8.531061057360665e-06, + "loss": 0.3202, + "step": 13632 + }, + { + "epoch": 0.2729124440107099, + "grad_norm": 1.1010618209838867, + "learning_rate": 8.530831528300685e-06, + "loss": 0.3515, + "step": 13633 + }, + { + "epoch": 0.27293246252783826, + "grad_norm": 1.8746510744094849, + "learning_rate": 8.530601984397848e-06, + "loss": 0.7834, + "step": 13634 + }, + { + "epoch": 0.2729524810449666, + "grad_norm": 1.0801109075546265, + "learning_rate": 8.53037242565312e-06, + "loss": 0.3211, + "step": 13635 + }, + { + "epoch": 0.27297249956209496, + "grad_norm": 1.8408302068710327, + "learning_rate": 8.530142852067467e-06, + "loss": 0.9001, + "step": 13636 + }, + { + "epoch": 0.27299251807922326, + "grad_norm": 1.0381593704223633, + "learning_rate": 8.529913263641851e-06, + "loss": 0.3142, + "step": 13637 + }, + { + "epoch": 0.2730125365963516, + "grad_norm": 1.1304409503936768, + "learning_rate": 8.52968366037724e-06, + "loss": 0.3733, + "step": 13638 + }, + { + "epoch": 0.27303255511347996, + "grad_norm": 1.0835670232772827, + "learning_rate": 8.5294540422746e-06, + "loss": 0.3339, + "step": 13639 + }, + { + "epoch": 0.2730525736306083, + "grad_norm": 1.0854781866073608, + "learning_rate": 8.529224409334896e-06, + "loss": 0.3158, + "step": 13640 + }, + { + "epoch": 0.27307259214773666, + "grad_norm": 1.0310841798782349, + "learning_rate": 8.528994761559089e-06, + "loss": 0.2875, + "step": 13641 + }, + { + "epoch": 0.273092610664865, + "grad_norm": 1.1075714826583862, + "learning_rate": 8.528765098948149e-06, + "loss": 0.3412, + "step": 13642 + }, + { + "epoch": 0.27311262918199336, + "grad_norm": 1.0513923168182373, + "learning_rate": 8.52853542150304e-06, + "loss": 0.3162, + "step": 13643 + }, + { + "epoch": 0.2731326476991217, + "grad_norm": 1.8102067708969116, + "learning_rate": 8.528305729224726e-06, + "loss": 0.7639, + "step": 13644 + }, + { + "epoch": 0.27315266621625, + "grad_norm": 1.0822391510009766, + "learning_rate": 8.528076022114173e-06, + "loss": 0.3353, + "step": 13645 + }, + { + "epoch": 0.27317268473337836, + "grad_norm": 0.9907225966453552, + "learning_rate": 8.527846300172351e-06, + "loss": 0.2857, + "step": 13646 + }, + { + "epoch": 0.2731927032505067, + "grad_norm": 1.0469566583633423, + "learning_rate": 8.527616563400221e-06, + "loss": 0.3083, + "step": 13647 + }, + { + "epoch": 0.27321272176763506, + "grad_norm": 1.093916416168213, + "learning_rate": 8.52738681179875e-06, + "loss": 0.3323, + "step": 13648 + }, + { + "epoch": 0.2732327402847634, + "grad_norm": 1.0042442083358765, + "learning_rate": 8.527157045368903e-06, + "loss": 0.3328, + "step": 13649 + }, + { + "epoch": 0.27325275880189176, + "grad_norm": 1.2448958158493042, + "learning_rate": 8.526927264111648e-06, + "loss": 0.3501, + "step": 13650 + }, + { + "epoch": 0.2732727773190201, + "grad_norm": 1.1157678365707397, + "learning_rate": 8.526697468027951e-06, + "loss": 0.3077, + "step": 13651 + }, + { + "epoch": 0.27329279583614846, + "grad_norm": 1.157732605934143, + "learning_rate": 8.526467657118774e-06, + "loss": 0.3001, + "step": 13652 + }, + { + "epoch": 0.27331281435327676, + "grad_norm": 1.086604356765747, + "learning_rate": 8.526237831385087e-06, + "loss": 0.3055, + "step": 13653 + }, + { + "epoch": 0.2733328328704051, + "grad_norm": 1.1839683055877686, + "learning_rate": 8.526007990827856e-06, + "loss": 0.3245, + "step": 13654 + }, + { + "epoch": 0.27335285138753346, + "grad_norm": 1.1372112035751343, + "learning_rate": 8.525778135448045e-06, + "loss": 0.3564, + "step": 13655 + }, + { + "epoch": 0.2733728699046618, + "grad_norm": 1.1862894296646118, + "learning_rate": 8.525548265246622e-06, + "loss": 0.3895, + "step": 13656 + }, + { + "epoch": 0.27339288842179016, + "grad_norm": 1.2136204242706299, + "learning_rate": 8.525318380224552e-06, + "loss": 0.3251, + "step": 13657 + }, + { + "epoch": 0.2734129069389185, + "grad_norm": 1.181030511856079, + "learning_rate": 8.525088480382804e-06, + "loss": 0.3415, + "step": 13658 + }, + { + "epoch": 0.27343292545604686, + "grad_norm": 1.2772282361984253, + "learning_rate": 8.52485856572234e-06, + "loss": 0.3131, + "step": 13659 + }, + { + "epoch": 0.2734529439731752, + "grad_norm": 1.0914498567581177, + "learning_rate": 8.524628636244132e-06, + "loss": 0.3326, + "step": 13660 + }, + { + "epoch": 0.2734729624903035, + "grad_norm": 1.4001045227050781, + "learning_rate": 8.524398691949143e-06, + "loss": 0.3745, + "step": 13661 + }, + { + "epoch": 0.27349298100743186, + "grad_norm": 1.1346865892410278, + "learning_rate": 8.52416873283834e-06, + "loss": 0.3279, + "step": 13662 + }, + { + "epoch": 0.2735129995245602, + "grad_norm": 1.0093742609024048, + "learning_rate": 8.523938758912688e-06, + "loss": 0.3077, + "step": 13663 + }, + { + "epoch": 0.27353301804168856, + "grad_norm": 1.1604397296905518, + "learning_rate": 8.523708770173159e-06, + "loss": 0.3461, + "step": 13664 + }, + { + "epoch": 0.2735530365588169, + "grad_norm": 1.0891255140304565, + "learning_rate": 8.523478766620713e-06, + "loss": 0.3893, + "step": 13665 + }, + { + "epoch": 0.27357305507594526, + "grad_norm": 0.990492045879364, + "learning_rate": 8.523248748256325e-06, + "loss": 0.3555, + "step": 13666 + }, + { + "epoch": 0.2735930735930736, + "grad_norm": 1.1642100811004639, + "learning_rate": 8.523018715080956e-06, + "loss": 0.3522, + "step": 13667 + }, + { + "epoch": 0.27361309211020196, + "grad_norm": 1.1555590629577637, + "learning_rate": 8.522788667095573e-06, + "loss": 0.329, + "step": 13668 + }, + { + "epoch": 0.27363311062733026, + "grad_norm": 1.9402847290039062, + "learning_rate": 8.522558604301145e-06, + "loss": 0.804, + "step": 13669 + }, + { + "epoch": 0.2736531291444586, + "grad_norm": 1.5140925645828247, + "learning_rate": 8.522328526698639e-06, + "loss": 0.3365, + "step": 13670 + }, + { + "epoch": 0.27367314766158696, + "grad_norm": 1.0639327764511108, + "learning_rate": 8.522098434289022e-06, + "loss": 0.3391, + "step": 13671 + }, + { + "epoch": 0.2736931661787153, + "grad_norm": 1.3301475048065186, + "learning_rate": 8.521868327073261e-06, + "loss": 0.2956, + "step": 13672 + }, + { + "epoch": 0.27371318469584366, + "grad_norm": 0.9819098711013794, + "learning_rate": 8.521638205052323e-06, + "loss": 0.2989, + "step": 13673 + }, + { + "epoch": 0.273733203212972, + "grad_norm": 1.7655303478240967, + "learning_rate": 8.521408068227177e-06, + "loss": 0.7913, + "step": 13674 + }, + { + "epoch": 0.27375322173010036, + "grad_norm": 1.1762685775756836, + "learning_rate": 8.521177916598789e-06, + "loss": 0.3134, + "step": 13675 + }, + { + "epoch": 0.2737732402472287, + "grad_norm": 1.2116539478302002, + "learning_rate": 8.520947750168127e-06, + "loss": 0.2809, + "step": 13676 + }, + { + "epoch": 0.273793258764357, + "grad_norm": 1.1143969297409058, + "learning_rate": 8.520717568936157e-06, + "loss": 0.3195, + "step": 13677 + }, + { + "epoch": 0.27381327728148536, + "grad_norm": 1.1276494264602661, + "learning_rate": 8.52048737290385e-06, + "loss": 0.3478, + "step": 13678 + }, + { + "epoch": 0.2738332957986137, + "grad_norm": 1.0439938306808472, + "learning_rate": 8.52025716207217e-06, + "loss": 0.3347, + "step": 13679 + }, + { + "epoch": 0.27385331431574206, + "grad_norm": 1.052772045135498, + "learning_rate": 8.520026936442087e-06, + "loss": 0.3312, + "step": 13680 + }, + { + "epoch": 0.2738733328328704, + "grad_norm": 1.8801602125167847, + "learning_rate": 8.519796696014569e-06, + "loss": 0.7962, + "step": 13681 + }, + { + "epoch": 0.27389335134999876, + "grad_norm": 1.0618849992752075, + "learning_rate": 8.519566440790582e-06, + "loss": 0.3076, + "step": 13682 + }, + { + "epoch": 0.2739133698671271, + "grad_norm": 1.210499882698059, + "learning_rate": 8.519336170771095e-06, + "loss": 0.3181, + "step": 13683 + }, + { + "epoch": 0.27393338838425546, + "grad_norm": 1.0958226919174194, + "learning_rate": 8.519105885957077e-06, + "loss": 0.3221, + "step": 13684 + }, + { + "epoch": 0.27395340690138376, + "grad_norm": 1.139984369277954, + "learning_rate": 8.518875586349493e-06, + "loss": 0.3518, + "step": 13685 + }, + { + "epoch": 0.2739734254185121, + "grad_norm": 0.985759973526001, + "learning_rate": 8.518645271949318e-06, + "loss": 0.3818, + "step": 13686 + }, + { + "epoch": 0.27399344393564046, + "grad_norm": 1.0160672664642334, + "learning_rate": 8.51841494275751e-06, + "loss": 0.3317, + "step": 13687 + }, + { + "epoch": 0.2740134624527688, + "grad_norm": 1.1370456218719482, + "learning_rate": 8.518184598775046e-06, + "loss": 0.3683, + "step": 13688 + }, + { + "epoch": 0.27403348096989716, + "grad_norm": 1.3202152252197266, + "learning_rate": 8.51795424000289e-06, + "loss": 0.3297, + "step": 13689 + }, + { + "epoch": 0.2740534994870255, + "grad_norm": 1.2022240161895752, + "learning_rate": 8.517723866442014e-06, + "loss": 0.3372, + "step": 13690 + }, + { + "epoch": 0.27407351800415386, + "grad_norm": 1.0784932374954224, + "learning_rate": 8.517493478093382e-06, + "loss": 0.296, + "step": 13691 + }, + { + "epoch": 0.2740935365212822, + "grad_norm": 1.1010862588882446, + "learning_rate": 8.517263074957961e-06, + "loss": 0.3523, + "step": 13692 + }, + { + "epoch": 0.2741135550384105, + "grad_norm": 1.2369815111160278, + "learning_rate": 8.517032657036727e-06, + "loss": 0.3426, + "step": 13693 + }, + { + "epoch": 0.27413357355553886, + "grad_norm": 1.0736573934555054, + "learning_rate": 8.516802224330645e-06, + "loss": 0.2995, + "step": 13694 + }, + { + "epoch": 0.2741535920726672, + "grad_norm": 1.9719361066818237, + "learning_rate": 8.516571776840682e-06, + "loss": 0.826, + "step": 13695 + }, + { + "epoch": 0.27417361058979556, + "grad_norm": 1.0605744123458862, + "learning_rate": 8.516341314567808e-06, + "loss": 0.3362, + "step": 13696 + }, + { + "epoch": 0.2741936291069239, + "grad_norm": 1.1096687316894531, + "learning_rate": 8.516110837512992e-06, + "loss": 0.3179, + "step": 13697 + }, + { + "epoch": 0.27421364762405226, + "grad_norm": 1.0032694339752197, + "learning_rate": 8.515880345677201e-06, + "loss": 0.3189, + "step": 13698 + }, + { + "epoch": 0.2742336661411806, + "grad_norm": 1.125196933746338, + "learning_rate": 8.515649839061408e-06, + "loss": 0.3083, + "step": 13699 + }, + { + "epoch": 0.27425368465830896, + "grad_norm": 1.9410319328308105, + "learning_rate": 8.51541931766658e-06, + "loss": 0.8447, + "step": 13700 + }, + { + "epoch": 0.27427370317543726, + "grad_norm": 1.301645278930664, + "learning_rate": 8.515188781493685e-06, + "loss": 0.374, + "step": 13701 + }, + { + "epoch": 0.2742937216925656, + "grad_norm": 1.0904840230941772, + "learning_rate": 8.514958230543692e-06, + "loss": 0.3262, + "step": 13702 + }, + { + "epoch": 0.27431374020969396, + "grad_norm": 0.9969198107719421, + "learning_rate": 8.51472766481757e-06, + "loss": 0.2667, + "step": 13703 + }, + { + "epoch": 0.2743337587268223, + "grad_norm": 1.1780720949172974, + "learning_rate": 8.514497084316292e-06, + "loss": 0.3528, + "step": 13704 + }, + { + "epoch": 0.27435377724395066, + "grad_norm": 1.07108736038208, + "learning_rate": 8.514266489040823e-06, + "loss": 0.3183, + "step": 13705 + }, + { + "epoch": 0.274373795761079, + "grad_norm": 1.9968032836914062, + "learning_rate": 8.514035878992135e-06, + "loss": 0.8616, + "step": 13706 + }, + { + "epoch": 0.27439381427820736, + "grad_norm": 1.202238917350769, + "learning_rate": 8.513805254171194e-06, + "loss": 0.3469, + "step": 13707 + }, + { + "epoch": 0.2744138327953357, + "grad_norm": 1.1502513885498047, + "learning_rate": 8.513574614578975e-06, + "loss": 0.3305, + "step": 13708 + }, + { + "epoch": 0.274433851312464, + "grad_norm": 1.105546474456787, + "learning_rate": 8.513343960216442e-06, + "loss": 0.3127, + "step": 13709 + }, + { + "epoch": 0.27445386982959236, + "grad_norm": 1.8763964176177979, + "learning_rate": 8.513113291084569e-06, + "loss": 0.8327, + "step": 13710 + }, + { + "epoch": 0.2744738883467207, + "grad_norm": 1.0800567865371704, + "learning_rate": 8.512882607184322e-06, + "loss": 0.3393, + "step": 13711 + }, + { + "epoch": 0.27449390686384906, + "grad_norm": 1.8879332542419434, + "learning_rate": 8.512651908516673e-06, + "loss": 0.8545, + "step": 13712 + }, + { + "epoch": 0.2745139253809774, + "grad_norm": 1.0275417566299438, + "learning_rate": 8.512421195082592e-06, + "loss": 0.3396, + "step": 13713 + }, + { + "epoch": 0.27453394389810576, + "grad_norm": 1.1468158960342407, + "learning_rate": 8.512190466883048e-06, + "loss": 0.2781, + "step": 13714 + }, + { + "epoch": 0.2745539624152341, + "grad_norm": 1.0960355997085571, + "learning_rate": 8.511959723919013e-06, + "loss": 0.3238, + "step": 13715 + }, + { + "epoch": 0.27457398093236246, + "grad_norm": 1.1122103929519653, + "learning_rate": 8.51172896619145e-06, + "loss": 0.3596, + "step": 13716 + }, + { + "epoch": 0.27459399944949076, + "grad_norm": 1.2037330865859985, + "learning_rate": 8.511498193701338e-06, + "loss": 0.3766, + "step": 13717 + }, + { + "epoch": 0.2746140179666191, + "grad_norm": 1.118632197380066, + "learning_rate": 8.511267406449643e-06, + "loss": 0.3308, + "step": 13718 + }, + { + "epoch": 0.27463403648374746, + "grad_norm": 1.1984431743621826, + "learning_rate": 8.511036604437335e-06, + "loss": 0.3455, + "step": 13719 + }, + { + "epoch": 0.2746540550008758, + "grad_norm": 1.896243691444397, + "learning_rate": 8.510805787665384e-06, + "loss": 0.8696, + "step": 13720 + }, + { + "epoch": 0.27467407351800416, + "grad_norm": 1.1370230913162231, + "learning_rate": 8.510574956134763e-06, + "loss": 0.2912, + "step": 13721 + }, + { + "epoch": 0.2746940920351325, + "grad_norm": 1.802722692489624, + "learning_rate": 8.510344109846439e-06, + "loss": 0.8418, + "step": 13722 + }, + { + "epoch": 0.27471411055226086, + "grad_norm": 1.3995901346206665, + "learning_rate": 8.510113248801384e-06, + "loss": 0.3491, + "step": 13723 + }, + { + "epoch": 0.2747341290693892, + "grad_norm": 1.2110053300857544, + "learning_rate": 8.509882373000566e-06, + "loss": 0.3472, + "step": 13724 + }, + { + "epoch": 0.2747541475865175, + "grad_norm": 1.1252288818359375, + "learning_rate": 8.50965148244496e-06, + "loss": 0.2979, + "step": 13725 + }, + { + "epoch": 0.27477416610364586, + "grad_norm": 1.1014069318771362, + "learning_rate": 8.509420577135536e-06, + "loss": 0.3222, + "step": 13726 + }, + { + "epoch": 0.2747941846207742, + "grad_norm": 1.1268961429595947, + "learning_rate": 8.50918965707326e-06, + "loss": 0.3493, + "step": 13727 + }, + { + "epoch": 0.27481420313790256, + "grad_norm": 1.037856936454773, + "learning_rate": 8.508958722259108e-06, + "loss": 0.3274, + "step": 13728 + }, + { + "epoch": 0.2748342216550309, + "grad_norm": 1.0070961713790894, + "learning_rate": 8.508727772694047e-06, + "loss": 0.2894, + "step": 13729 + }, + { + "epoch": 0.27485424017215926, + "grad_norm": 1.1359996795654297, + "learning_rate": 8.508496808379052e-06, + "loss": 0.3664, + "step": 13730 + }, + { + "epoch": 0.2748742586892876, + "grad_norm": 1.196635127067566, + "learning_rate": 8.50826582931509e-06, + "loss": 0.3333, + "step": 13731 + }, + { + "epoch": 0.27489427720641596, + "grad_norm": 1.139794111251831, + "learning_rate": 8.508034835503133e-06, + "loss": 0.3, + "step": 13732 + }, + { + "epoch": 0.27491429572354426, + "grad_norm": 1.8692947626113892, + "learning_rate": 8.507803826944153e-06, + "loss": 0.8498, + "step": 13733 + }, + { + "epoch": 0.2749343142406726, + "grad_norm": 1.7325286865234375, + "learning_rate": 8.507572803639122e-06, + "loss": 0.841, + "step": 13734 + }, + { + "epoch": 0.27495433275780096, + "grad_norm": 1.1283924579620361, + "learning_rate": 8.507341765589008e-06, + "loss": 0.3406, + "step": 13735 + }, + { + "epoch": 0.2749743512749293, + "grad_norm": 1.1727585792541504, + "learning_rate": 8.507110712794784e-06, + "loss": 0.3482, + "step": 13736 + }, + { + "epoch": 0.27499436979205766, + "grad_norm": 1.1708381175994873, + "learning_rate": 8.506879645257421e-06, + "loss": 0.2866, + "step": 13737 + }, + { + "epoch": 0.275014388309186, + "grad_norm": 1.176764726638794, + "learning_rate": 8.50664856297789e-06, + "loss": 0.2793, + "step": 13738 + }, + { + "epoch": 0.27503440682631436, + "grad_norm": 1.1541141271591187, + "learning_rate": 8.506417465957166e-06, + "loss": 0.3189, + "step": 13739 + }, + { + "epoch": 0.2750544253434427, + "grad_norm": 1.3141849040985107, + "learning_rate": 8.506186354196215e-06, + "loss": 0.3127, + "step": 13740 + }, + { + "epoch": 0.275074443860571, + "grad_norm": 1.1406209468841553, + "learning_rate": 8.505955227696012e-06, + "loss": 0.3244, + "step": 13741 + }, + { + "epoch": 0.27509446237769936, + "grad_norm": 1.1758700609207153, + "learning_rate": 8.505724086457528e-06, + "loss": 0.3323, + "step": 13742 + }, + { + "epoch": 0.2751144808948277, + "grad_norm": 1.0804070234298706, + "learning_rate": 8.505492930481735e-06, + "loss": 0.3364, + "step": 13743 + }, + { + "epoch": 0.27513449941195606, + "grad_norm": 1.1217334270477295, + "learning_rate": 8.505261759769604e-06, + "loss": 0.3192, + "step": 13744 + }, + { + "epoch": 0.2751545179290844, + "grad_norm": 1.1867660284042358, + "learning_rate": 8.505030574322105e-06, + "loss": 0.2859, + "step": 13745 + }, + { + "epoch": 0.27517453644621276, + "grad_norm": 1.0423635244369507, + "learning_rate": 8.504799374140214e-06, + "loss": 0.3355, + "step": 13746 + }, + { + "epoch": 0.2751945549633411, + "grad_norm": 1.0091814994812012, + "learning_rate": 8.504568159224898e-06, + "loss": 0.3086, + "step": 13747 + }, + { + "epoch": 0.27521457348046946, + "grad_norm": 0.9919812083244324, + "learning_rate": 8.504336929577134e-06, + "loss": 0.2946, + "step": 13748 + }, + { + "epoch": 0.27523459199759776, + "grad_norm": 1.0451633930206299, + "learning_rate": 8.504105685197891e-06, + "loss": 0.2854, + "step": 13749 + }, + { + "epoch": 0.2752546105147261, + "grad_norm": 1.1230404376983643, + "learning_rate": 8.50387442608814e-06, + "loss": 0.3169, + "step": 13750 + }, + { + "epoch": 0.27527462903185446, + "grad_norm": 1.0831098556518555, + "learning_rate": 8.503643152248857e-06, + "loss": 0.3304, + "step": 13751 + }, + { + "epoch": 0.2752946475489828, + "grad_norm": 1.0646028518676758, + "learning_rate": 8.503411863681013e-06, + "loss": 0.3211, + "step": 13752 + }, + { + "epoch": 0.27531466606611116, + "grad_norm": 1.0825268030166626, + "learning_rate": 8.503180560385577e-06, + "loss": 0.3173, + "step": 13753 + }, + { + "epoch": 0.2753346845832395, + "grad_norm": 1.227569818496704, + "learning_rate": 8.502949242363526e-06, + "loss": 0.3452, + "step": 13754 + }, + { + "epoch": 0.27535470310036786, + "grad_norm": 1.1197057962417603, + "learning_rate": 8.50271790961583e-06, + "loss": 0.2915, + "step": 13755 + }, + { + "epoch": 0.2753747216174962, + "grad_norm": 1.0924407243728638, + "learning_rate": 8.50248656214346e-06, + "loss": 0.2884, + "step": 13756 + }, + { + "epoch": 0.2753947401346245, + "grad_norm": 1.041975498199463, + "learning_rate": 8.502255199947392e-06, + "loss": 0.3217, + "step": 13757 + }, + { + "epoch": 0.27541475865175286, + "grad_norm": 1.030866026878357, + "learning_rate": 8.502023823028596e-06, + "loss": 0.2988, + "step": 13758 + }, + { + "epoch": 0.2754347771688812, + "grad_norm": 1.9884445667266846, + "learning_rate": 8.501792431388046e-06, + "loss": 0.8289, + "step": 13759 + }, + { + "epoch": 0.27545479568600956, + "grad_norm": 1.0747090578079224, + "learning_rate": 8.501561025026713e-06, + "loss": 0.3296, + "step": 13760 + }, + { + "epoch": 0.2754748142031379, + "grad_norm": 1.0489534139633179, + "learning_rate": 8.501329603945573e-06, + "loss": 0.3421, + "step": 13761 + }, + { + "epoch": 0.27549483272026626, + "grad_norm": 1.0527276992797852, + "learning_rate": 8.501098168145595e-06, + "loss": 0.3008, + "step": 13762 + }, + { + "epoch": 0.2755148512373946, + "grad_norm": 1.1076644659042358, + "learning_rate": 8.500866717627755e-06, + "loss": 0.2916, + "step": 13763 + }, + { + "epoch": 0.27553486975452296, + "grad_norm": 1.1550228595733643, + "learning_rate": 8.500635252393023e-06, + "loss": 0.3627, + "step": 13764 + }, + { + "epoch": 0.27555488827165125, + "grad_norm": 1.1124117374420166, + "learning_rate": 8.500403772442376e-06, + "loss": 0.3465, + "step": 13765 + }, + { + "epoch": 0.2755749067887796, + "grad_norm": 1.143113374710083, + "learning_rate": 8.500172277776783e-06, + "loss": 0.3555, + "step": 13766 + }, + { + "epoch": 0.27559492530590796, + "grad_norm": 1.144716739654541, + "learning_rate": 8.49994076839722e-06, + "loss": 0.3838, + "step": 13767 + }, + { + "epoch": 0.2756149438230363, + "grad_norm": 1.9049408435821533, + "learning_rate": 8.499709244304658e-06, + "loss": 0.7846, + "step": 13768 + }, + { + "epoch": 0.27563496234016466, + "grad_norm": 1.1010966300964355, + "learning_rate": 8.499477705500073e-06, + "loss": 0.3157, + "step": 13769 + }, + { + "epoch": 0.275654980857293, + "grad_norm": 1.165933609008789, + "learning_rate": 8.499246151984437e-06, + "loss": 0.3233, + "step": 13770 + }, + { + "epoch": 0.27567499937442136, + "grad_norm": 1.2389286756515503, + "learning_rate": 8.499014583758722e-06, + "loss": 0.3313, + "step": 13771 + }, + { + "epoch": 0.2756950178915497, + "grad_norm": 1.0467135906219482, + "learning_rate": 8.498783000823903e-06, + "loss": 0.2696, + "step": 13772 + }, + { + "epoch": 0.275715036408678, + "grad_norm": 1.0363457202911377, + "learning_rate": 8.498551403180952e-06, + "loss": 0.3094, + "step": 13773 + }, + { + "epoch": 0.27573505492580636, + "grad_norm": 1.8584990501403809, + "learning_rate": 8.498319790830847e-06, + "loss": 0.8357, + "step": 13774 + }, + { + "epoch": 0.2757550734429347, + "grad_norm": 1.311655044555664, + "learning_rate": 8.498088163774555e-06, + "loss": 0.2886, + "step": 13775 + }, + { + "epoch": 0.27577509196006306, + "grad_norm": 1.2975062131881714, + "learning_rate": 8.497856522013056e-06, + "loss": 0.3325, + "step": 13776 + }, + { + "epoch": 0.2757951104771914, + "grad_norm": 1.0128424167633057, + "learning_rate": 8.497624865547321e-06, + "loss": 0.303, + "step": 13777 + }, + { + "epoch": 0.27581512899431976, + "grad_norm": 1.1095688343048096, + "learning_rate": 8.49739319437832e-06, + "loss": 0.2989, + "step": 13778 + }, + { + "epoch": 0.2758351475114481, + "grad_norm": 1.1322956085205078, + "learning_rate": 8.497161508507035e-06, + "loss": 0.3112, + "step": 13779 + }, + { + "epoch": 0.27585516602857646, + "grad_norm": 1.0349440574645996, + "learning_rate": 8.496929807934432e-06, + "loss": 0.3513, + "step": 13780 + }, + { + "epoch": 0.27587518454570475, + "grad_norm": 1.0749132633209229, + "learning_rate": 8.49669809266149e-06, + "loss": 0.3419, + "step": 13781 + }, + { + "epoch": 0.2758952030628331, + "grad_norm": 1.16519296169281, + "learning_rate": 8.496466362689184e-06, + "loss": 0.3615, + "step": 13782 + }, + { + "epoch": 0.27591522157996146, + "grad_norm": 1.0507875680923462, + "learning_rate": 8.496234618018482e-06, + "loss": 0.35, + "step": 13783 + }, + { + "epoch": 0.2759352400970898, + "grad_norm": 1.083421230316162, + "learning_rate": 8.496002858650365e-06, + "loss": 0.3287, + "step": 13784 + }, + { + "epoch": 0.27595525861421816, + "grad_norm": 1.1387262344360352, + "learning_rate": 8.495771084585803e-06, + "loss": 0.3634, + "step": 13785 + }, + { + "epoch": 0.2759752771313465, + "grad_norm": 1.7773308753967285, + "learning_rate": 8.495539295825773e-06, + "loss": 0.7804, + "step": 13786 + }, + { + "epoch": 0.27599529564847486, + "grad_norm": 1.2745319604873657, + "learning_rate": 8.495307492371247e-06, + "loss": 0.3267, + "step": 13787 + }, + { + "epoch": 0.2760153141656032, + "grad_norm": 1.0250349044799805, + "learning_rate": 8.4950756742232e-06, + "loss": 0.3418, + "step": 13788 + }, + { + "epoch": 0.2760353326827315, + "grad_norm": 1.075223684310913, + "learning_rate": 8.494843841382607e-06, + "loss": 0.3102, + "step": 13789 + }, + { + "epoch": 0.27605535119985986, + "grad_norm": 1.0590482950210571, + "learning_rate": 8.494611993850444e-06, + "loss": 0.3431, + "step": 13790 + }, + { + "epoch": 0.2760753697169882, + "grad_norm": 1.1604838371276855, + "learning_rate": 8.494380131627681e-06, + "loss": 0.3115, + "step": 13791 + }, + { + "epoch": 0.27609538823411656, + "grad_norm": 1.1602816581726074, + "learning_rate": 8.4941482547153e-06, + "loss": 0.3716, + "step": 13792 + }, + { + "epoch": 0.2761154067512449, + "grad_norm": 1.138917088508606, + "learning_rate": 8.49391636311427e-06, + "loss": 0.319, + "step": 13793 + }, + { + "epoch": 0.27613542526837326, + "grad_norm": 1.1491960287094116, + "learning_rate": 8.493684456825567e-06, + "loss": 0.3203, + "step": 13794 + }, + { + "epoch": 0.2761554437855016, + "grad_norm": 1.158207654953003, + "learning_rate": 8.493452535850166e-06, + "loss": 0.2991, + "step": 13795 + }, + { + "epoch": 0.27617546230262996, + "grad_norm": 1.0680136680603027, + "learning_rate": 8.493220600189046e-06, + "loss": 0.3405, + "step": 13796 + }, + { + "epoch": 0.27619548081975825, + "grad_norm": 1.1679537296295166, + "learning_rate": 8.492988649843174e-06, + "loss": 0.3242, + "step": 13797 + }, + { + "epoch": 0.2762154993368866, + "grad_norm": 1.0712244510650635, + "learning_rate": 8.49275668481353e-06, + "loss": 0.309, + "step": 13798 + }, + { + "epoch": 0.27623551785401496, + "grad_norm": 1.1895229816436768, + "learning_rate": 8.49252470510109e-06, + "loss": 0.3622, + "step": 13799 + }, + { + "epoch": 0.2762555363711433, + "grad_norm": 1.0949926376342773, + "learning_rate": 8.492292710706828e-06, + "loss": 0.3021, + "step": 13800 + }, + { + "epoch": 0.27627555488827166, + "grad_norm": 0.914607048034668, + "learning_rate": 8.492060701631717e-06, + "loss": 0.2879, + "step": 13801 + }, + { + "epoch": 0.2762955734054, + "grad_norm": 1.0784035921096802, + "learning_rate": 8.491828677876736e-06, + "loss": 0.3142, + "step": 13802 + }, + { + "epoch": 0.27631559192252836, + "grad_norm": 1.0352801084518433, + "learning_rate": 8.491596639442859e-06, + "loss": 0.3072, + "step": 13803 + }, + { + "epoch": 0.2763356104396567, + "grad_norm": 1.016287088394165, + "learning_rate": 8.49136458633106e-06, + "loss": 0.3214, + "step": 13804 + }, + { + "epoch": 0.276355628956785, + "grad_norm": 1.2106812000274658, + "learning_rate": 8.491132518542315e-06, + "loss": 0.3657, + "step": 13805 + }, + { + "epoch": 0.27637564747391336, + "grad_norm": 1.91708242893219, + "learning_rate": 8.490900436077603e-06, + "loss": 0.838, + "step": 13806 + }, + { + "epoch": 0.2763956659910417, + "grad_norm": 1.0425547361373901, + "learning_rate": 8.490668338937893e-06, + "loss": 0.351, + "step": 13807 + }, + { + "epoch": 0.27641568450817006, + "grad_norm": 1.0914310216903687, + "learning_rate": 8.490436227124168e-06, + "loss": 0.2702, + "step": 13808 + }, + { + "epoch": 0.2764357030252984, + "grad_norm": 1.8539776802062988, + "learning_rate": 8.490204100637399e-06, + "loss": 0.7705, + "step": 13809 + }, + { + "epoch": 0.27645572154242676, + "grad_norm": 1.0937203168869019, + "learning_rate": 8.489971959478562e-06, + "loss": 0.3548, + "step": 13810 + }, + { + "epoch": 0.2764757400595551, + "grad_norm": 1.2241321802139282, + "learning_rate": 8.489739803648634e-06, + "loss": 0.3561, + "step": 13811 + }, + { + "epoch": 0.27649575857668346, + "grad_norm": 1.156542420387268, + "learning_rate": 8.489507633148593e-06, + "loss": 0.3247, + "step": 13812 + }, + { + "epoch": 0.27651577709381175, + "grad_norm": 1.1473026275634766, + "learning_rate": 8.48927544797941e-06, + "loss": 0.3632, + "step": 13813 + }, + { + "epoch": 0.2765357956109401, + "grad_norm": 1.0021743774414062, + "learning_rate": 8.489043248142067e-06, + "loss": 0.3391, + "step": 13814 + }, + { + "epoch": 0.27655581412806846, + "grad_norm": 1.0736631155014038, + "learning_rate": 8.488811033637534e-06, + "loss": 0.3479, + "step": 13815 + }, + { + "epoch": 0.2765758326451968, + "grad_norm": 1.2264251708984375, + "learning_rate": 8.488578804466792e-06, + "loss": 0.3209, + "step": 13816 + }, + { + "epoch": 0.27659585116232516, + "grad_norm": 1.094041109085083, + "learning_rate": 8.488346560630815e-06, + "loss": 0.3111, + "step": 13817 + }, + { + "epoch": 0.2766158696794535, + "grad_norm": 1.0050957202911377, + "learning_rate": 8.488114302130582e-06, + "loss": 0.2979, + "step": 13818 + }, + { + "epoch": 0.27663588819658186, + "grad_norm": 1.0303728580474854, + "learning_rate": 8.487882028967063e-06, + "loss": 0.3211, + "step": 13819 + }, + { + "epoch": 0.2766559067137102, + "grad_norm": 1.0375505685806274, + "learning_rate": 8.48764974114124e-06, + "loss": 0.292, + "step": 13820 + }, + { + "epoch": 0.2766759252308385, + "grad_norm": 1.814310908317566, + "learning_rate": 8.48741743865409e-06, + "loss": 0.8579, + "step": 13821 + }, + { + "epoch": 0.27669594374796685, + "grad_norm": 1.1719824075698853, + "learning_rate": 8.487185121506585e-06, + "loss": 0.3193, + "step": 13822 + }, + { + "epoch": 0.2767159622650952, + "grad_norm": 1.2052359580993652, + "learning_rate": 8.486952789699707e-06, + "loss": 0.323, + "step": 13823 + }, + { + "epoch": 0.27673598078222356, + "grad_norm": 1.1375118494033813, + "learning_rate": 8.486720443234426e-06, + "loss": 0.3231, + "step": 13824 + }, + { + "epoch": 0.2767559992993519, + "grad_norm": 1.0290461778640747, + "learning_rate": 8.486488082111726e-06, + "loss": 0.319, + "step": 13825 + }, + { + "epoch": 0.27677601781648026, + "grad_norm": 1.4398294687271118, + "learning_rate": 8.48625570633258e-06, + "loss": 0.3132, + "step": 13826 + }, + { + "epoch": 0.2767960363336086, + "grad_norm": 1.090151071548462, + "learning_rate": 8.486023315897964e-06, + "loss": 0.3224, + "step": 13827 + }, + { + "epoch": 0.27681605485073696, + "grad_norm": 1.0026588439941406, + "learning_rate": 8.485790910808858e-06, + "loss": 0.272, + "step": 13828 + }, + { + "epoch": 0.27683607336786525, + "grad_norm": 1.113468885421753, + "learning_rate": 8.485558491066235e-06, + "loss": 0.3375, + "step": 13829 + }, + { + "epoch": 0.2768560918849936, + "grad_norm": 1.360032558441162, + "learning_rate": 8.485326056671074e-06, + "loss": 0.3248, + "step": 13830 + }, + { + "epoch": 0.27687611040212196, + "grad_norm": 1.2771942615509033, + "learning_rate": 8.485093607624354e-06, + "loss": 0.3647, + "step": 13831 + }, + { + "epoch": 0.2768961289192503, + "grad_norm": 1.0835174322128296, + "learning_rate": 8.48486114392705e-06, + "loss": 0.35, + "step": 13832 + }, + { + "epoch": 0.27691614743637866, + "grad_norm": 1.1990923881530762, + "learning_rate": 8.48462866558014e-06, + "loss": 0.3293, + "step": 13833 + }, + { + "epoch": 0.276936165953507, + "grad_norm": 1.8590543270111084, + "learning_rate": 8.4843961725846e-06, + "loss": 0.7915, + "step": 13834 + }, + { + "epoch": 0.27695618447063536, + "grad_norm": 1.1123247146606445, + "learning_rate": 8.484163664941409e-06, + "loss": 0.3457, + "step": 13835 + }, + { + "epoch": 0.2769762029877637, + "grad_norm": 1.0937412977218628, + "learning_rate": 8.483931142651543e-06, + "loss": 0.3164, + "step": 13836 + }, + { + "epoch": 0.276996221504892, + "grad_norm": 1.2772774696350098, + "learning_rate": 8.483698605715982e-06, + "loss": 0.3275, + "step": 13837 + }, + { + "epoch": 0.27701624002202035, + "grad_norm": 1.2515244483947754, + "learning_rate": 8.4834660541357e-06, + "loss": 0.3261, + "step": 13838 + }, + { + "epoch": 0.2770362585391487, + "grad_norm": 1.7777358293533325, + "learning_rate": 8.483233487911678e-06, + "loss": 0.8238, + "step": 13839 + }, + { + "epoch": 0.27705627705627706, + "grad_norm": 1.2704744338989258, + "learning_rate": 8.48300090704489e-06, + "loss": 0.3455, + "step": 13840 + }, + { + "epoch": 0.2770762955734054, + "grad_norm": 1.1617867946624756, + "learning_rate": 8.482768311536316e-06, + "loss": 0.3472, + "step": 13841 + }, + { + "epoch": 0.27709631409053376, + "grad_norm": 1.1353917121887207, + "learning_rate": 8.482535701386934e-06, + "loss": 0.2793, + "step": 13842 + }, + { + "epoch": 0.2771163326076621, + "grad_norm": 1.0913453102111816, + "learning_rate": 8.482303076597721e-06, + "loss": 0.3285, + "step": 13843 + }, + { + "epoch": 0.27713635112479046, + "grad_norm": 1.9015522003173828, + "learning_rate": 8.482070437169656e-06, + "loss": 0.8692, + "step": 13844 + }, + { + "epoch": 0.27715636964191875, + "grad_norm": 1.0584558248519897, + "learning_rate": 8.481837783103717e-06, + "loss": 0.2625, + "step": 13845 + }, + { + "epoch": 0.2771763881590471, + "grad_norm": 0.9901259541511536, + "learning_rate": 8.481605114400879e-06, + "loss": 0.3331, + "step": 13846 + }, + { + "epoch": 0.27719640667617546, + "grad_norm": 1.0967621803283691, + "learning_rate": 8.481372431062124e-06, + "loss": 0.3246, + "step": 13847 + }, + { + "epoch": 0.2772164251933038, + "grad_norm": 1.2264975309371948, + "learning_rate": 8.48113973308843e-06, + "loss": 0.3498, + "step": 13848 + }, + { + "epoch": 0.27723644371043216, + "grad_norm": 1.12909734249115, + "learning_rate": 8.480907020480772e-06, + "loss": 0.3016, + "step": 13849 + }, + { + "epoch": 0.2772564622275605, + "grad_norm": 1.1558904647827148, + "learning_rate": 8.48067429324013e-06, + "loss": 0.3033, + "step": 13850 + }, + { + "epoch": 0.27727648074468886, + "grad_norm": 1.0585664510726929, + "learning_rate": 8.480441551367482e-06, + "loss": 0.3724, + "step": 13851 + }, + { + "epoch": 0.27729649926181715, + "grad_norm": 1.1290303468704224, + "learning_rate": 8.480208794863806e-06, + "loss": 0.3018, + "step": 13852 + }, + { + "epoch": 0.2773165177789455, + "grad_norm": 1.2032936811447144, + "learning_rate": 8.479976023730081e-06, + "loss": 0.3506, + "step": 13853 + }, + { + "epoch": 0.27733653629607385, + "grad_norm": 1.0322588682174683, + "learning_rate": 8.479743237967288e-06, + "loss": 0.2974, + "step": 13854 + }, + { + "epoch": 0.2773565548132022, + "grad_norm": 1.1739752292633057, + "learning_rate": 8.479510437576402e-06, + "loss": 0.2981, + "step": 13855 + }, + { + "epoch": 0.27737657333033056, + "grad_norm": 1.1158634424209595, + "learning_rate": 8.479277622558404e-06, + "loss": 0.279, + "step": 13856 + }, + { + "epoch": 0.2773965918474589, + "grad_norm": 1.16193687915802, + "learning_rate": 8.479044792914269e-06, + "loss": 0.3158, + "step": 13857 + }, + { + "epoch": 0.27741661036458726, + "grad_norm": 1.1025912761688232, + "learning_rate": 8.47881194864498e-06, + "loss": 0.3355, + "step": 13858 + }, + { + "epoch": 0.2774366288817156, + "grad_norm": 0.9847583174705505, + "learning_rate": 8.478579089751516e-06, + "loss": 0.3025, + "step": 13859 + }, + { + "epoch": 0.2774566473988439, + "grad_norm": 1.1483454704284668, + "learning_rate": 8.478346216234852e-06, + "loss": 0.336, + "step": 13860 + }, + { + "epoch": 0.27747666591597225, + "grad_norm": 1.0560156106948853, + "learning_rate": 8.478113328095969e-06, + "loss": 0.3227, + "step": 13861 + }, + { + "epoch": 0.2774966844331006, + "grad_norm": 2.00089693069458, + "learning_rate": 8.477880425335845e-06, + "loss": 0.7942, + "step": 13862 + }, + { + "epoch": 0.27751670295022896, + "grad_norm": 0.9717238545417786, + "learning_rate": 8.477647507955464e-06, + "loss": 0.2867, + "step": 13863 + }, + { + "epoch": 0.2775367214673573, + "grad_norm": 1.1614315509796143, + "learning_rate": 8.477414575955798e-06, + "loss": 0.3282, + "step": 13864 + }, + { + "epoch": 0.27755673998448566, + "grad_norm": 1.0344315767288208, + "learning_rate": 8.47718162933783e-06, + "loss": 0.2748, + "step": 13865 + }, + { + "epoch": 0.277576758501614, + "grad_norm": 1.2430208921432495, + "learning_rate": 8.476948668102538e-06, + "loss": 0.3464, + "step": 13866 + }, + { + "epoch": 0.27759677701874236, + "grad_norm": 1.1005373001098633, + "learning_rate": 8.476715692250906e-06, + "loss": 0.3197, + "step": 13867 + }, + { + "epoch": 0.27761679553587065, + "grad_norm": 1.2118583917617798, + "learning_rate": 8.476482701783905e-06, + "loss": 0.3732, + "step": 13868 + }, + { + "epoch": 0.277636814052999, + "grad_norm": 1.8664149045944214, + "learning_rate": 8.476249696702522e-06, + "loss": 0.8139, + "step": 13869 + }, + { + "epoch": 0.27765683257012735, + "grad_norm": 1.0741301774978638, + "learning_rate": 8.476016677007731e-06, + "loss": 0.2918, + "step": 13870 + }, + { + "epoch": 0.2776768510872557, + "grad_norm": 1.2302383184432983, + "learning_rate": 8.475783642700514e-06, + "loss": 0.289, + "step": 13871 + }, + { + "epoch": 0.27769686960438406, + "grad_norm": 1.3615806102752686, + "learning_rate": 8.475550593781852e-06, + "loss": 0.3295, + "step": 13872 + }, + { + "epoch": 0.2777168881215124, + "grad_norm": 1.7805403470993042, + "learning_rate": 8.475317530252721e-06, + "loss": 0.8287, + "step": 13873 + }, + { + "epoch": 0.27773690663864076, + "grad_norm": 1.0670300722122192, + "learning_rate": 8.475084452114106e-06, + "loss": 0.3432, + "step": 13874 + }, + { + "epoch": 0.2777569251557691, + "grad_norm": 1.1036542654037476, + "learning_rate": 8.47485135936698e-06, + "loss": 0.2812, + "step": 13875 + }, + { + "epoch": 0.2777769436728974, + "grad_norm": 1.2403442859649658, + "learning_rate": 8.47461825201233e-06, + "loss": 0.353, + "step": 13876 + }, + { + "epoch": 0.27779696219002575, + "grad_norm": 1.1155599355697632, + "learning_rate": 8.474385130051133e-06, + "loss": 0.3139, + "step": 13877 + }, + { + "epoch": 0.2778169807071541, + "grad_norm": 1.1698137521743774, + "learning_rate": 8.474151993484366e-06, + "loss": 0.3328, + "step": 13878 + }, + { + "epoch": 0.27783699922428245, + "grad_norm": 1.092642903327942, + "learning_rate": 8.473918842313013e-06, + "loss": 0.3477, + "step": 13879 + }, + { + "epoch": 0.2778570177414108, + "grad_norm": 1.2418115139007568, + "learning_rate": 8.473685676538053e-06, + "loss": 0.3192, + "step": 13880 + }, + { + "epoch": 0.27787703625853916, + "grad_norm": 1.033583164215088, + "learning_rate": 8.473452496160465e-06, + "loss": 0.2991, + "step": 13881 + }, + { + "epoch": 0.2778970547756675, + "grad_norm": 1.1506847143173218, + "learning_rate": 8.473219301181232e-06, + "loss": 0.3544, + "step": 13882 + }, + { + "epoch": 0.27791707329279586, + "grad_norm": 1.10298752784729, + "learning_rate": 8.472986091601331e-06, + "loss": 0.3231, + "step": 13883 + }, + { + "epoch": 0.27793709180992415, + "grad_norm": 1.1127872467041016, + "learning_rate": 8.472752867421743e-06, + "loss": 0.3054, + "step": 13884 + }, + { + "epoch": 0.2779571103270525, + "grad_norm": 1.1554760932922363, + "learning_rate": 8.472519628643451e-06, + "loss": 0.3676, + "step": 13885 + }, + { + "epoch": 0.27797712884418085, + "grad_norm": 1.0779722929000854, + "learning_rate": 8.472286375267434e-06, + "loss": 0.2776, + "step": 13886 + }, + { + "epoch": 0.2779971473613092, + "grad_norm": 1.1895980834960938, + "learning_rate": 8.472053107294671e-06, + "loss": 0.3579, + "step": 13887 + }, + { + "epoch": 0.27801716587843756, + "grad_norm": 1.2837258577346802, + "learning_rate": 8.471819824726144e-06, + "loss": 0.3517, + "step": 13888 + }, + { + "epoch": 0.2780371843955659, + "grad_norm": 1.1686077117919922, + "learning_rate": 8.471586527562837e-06, + "loss": 0.3211, + "step": 13889 + }, + { + "epoch": 0.27805720291269426, + "grad_norm": 1.057228922843933, + "learning_rate": 8.471353215805722e-06, + "loss": 0.3386, + "step": 13890 + }, + { + "epoch": 0.2780772214298226, + "grad_norm": 1.139481782913208, + "learning_rate": 8.471119889455787e-06, + "loss": 0.3396, + "step": 13891 + }, + { + "epoch": 0.2780972399469509, + "grad_norm": 1.1093745231628418, + "learning_rate": 8.470886548514014e-06, + "loss": 0.3497, + "step": 13892 + }, + { + "epoch": 0.27811725846407925, + "grad_norm": 1.1355831623077393, + "learning_rate": 8.470653192981378e-06, + "loss": 0.3465, + "step": 13893 + }, + { + "epoch": 0.2781372769812076, + "grad_norm": 1.9142520427703857, + "learning_rate": 8.470419822858864e-06, + "loss": 0.8441, + "step": 13894 + }, + { + "epoch": 0.27815729549833595, + "grad_norm": 1.23996901512146, + "learning_rate": 8.470186438147449e-06, + "loss": 0.3046, + "step": 13895 + }, + { + "epoch": 0.2781773140154643, + "grad_norm": 0.982147216796875, + "learning_rate": 8.46995303884812e-06, + "loss": 0.3278, + "step": 13896 + }, + { + "epoch": 0.27819733253259266, + "grad_norm": 1.0530999898910522, + "learning_rate": 8.469719624961854e-06, + "loss": 0.2832, + "step": 13897 + }, + { + "epoch": 0.278217351049721, + "grad_norm": 1.087278127670288, + "learning_rate": 8.469486196489635e-06, + "loss": 0.3353, + "step": 13898 + }, + { + "epoch": 0.27823736956684936, + "grad_norm": 1.2493640184402466, + "learning_rate": 8.469252753432441e-06, + "loss": 0.2931, + "step": 13899 + }, + { + "epoch": 0.27825738808397765, + "grad_norm": 1.1227869987487793, + "learning_rate": 8.469019295791256e-06, + "loss": 0.3366, + "step": 13900 + }, + { + "epoch": 0.278277406601106, + "grad_norm": 1.1172585487365723, + "learning_rate": 8.46878582356706e-06, + "loss": 0.3027, + "step": 13901 + }, + { + "epoch": 0.27829742511823435, + "grad_norm": 1.0579496622085571, + "learning_rate": 8.468552336760834e-06, + "loss": 0.3336, + "step": 13902 + }, + { + "epoch": 0.2783174436353627, + "grad_norm": 1.0589863061904907, + "learning_rate": 8.468318835373562e-06, + "loss": 0.3327, + "step": 13903 + }, + { + "epoch": 0.27833746215249106, + "grad_norm": 1.0847413539886475, + "learning_rate": 8.468085319406223e-06, + "loss": 0.3042, + "step": 13904 + }, + { + "epoch": 0.2783574806696194, + "grad_norm": 1.1189879179000854, + "learning_rate": 8.4678517888598e-06, + "loss": 0.3023, + "step": 13905 + }, + { + "epoch": 0.27837749918674776, + "grad_norm": 1.1316614151000977, + "learning_rate": 8.467618243735273e-06, + "loss": 0.2643, + "step": 13906 + }, + { + "epoch": 0.2783975177038761, + "grad_norm": 0.9650679230690002, + "learning_rate": 8.467384684033628e-06, + "loss": 0.3289, + "step": 13907 + }, + { + "epoch": 0.2784175362210044, + "grad_norm": 1.0759289264678955, + "learning_rate": 8.467151109755843e-06, + "loss": 0.3564, + "step": 13908 + }, + { + "epoch": 0.27843755473813275, + "grad_norm": 1.2408896684646606, + "learning_rate": 8.4669175209029e-06, + "loss": 0.2986, + "step": 13909 + }, + { + "epoch": 0.2784575732552611, + "grad_norm": 0.9991808533668518, + "learning_rate": 8.466683917475782e-06, + "loss": 0.3141, + "step": 13910 + }, + { + "epoch": 0.27847759177238945, + "grad_norm": 1.1186498403549194, + "learning_rate": 8.46645029947547e-06, + "loss": 0.3584, + "step": 13911 + }, + { + "epoch": 0.2784976102895178, + "grad_norm": 1.1077848672866821, + "learning_rate": 8.466216666902949e-06, + "loss": 0.3212, + "step": 13912 + }, + { + "epoch": 0.27851762880664616, + "grad_norm": 1.073891282081604, + "learning_rate": 8.465983019759197e-06, + "loss": 0.3508, + "step": 13913 + }, + { + "epoch": 0.2785376473237745, + "grad_norm": 1.0838758945465088, + "learning_rate": 8.4657493580452e-06, + "loss": 0.2938, + "step": 13914 + }, + { + "epoch": 0.27855766584090286, + "grad_norm": 1.085206389427185, + "learning_rate": 8.465515681761937e-06, + "loss": 0.3244, + "step": 13915 + }, + { + "epoch": 0.27857768435803115, + "grad_norm": 1.1760843992233276, + "learning_rate": 8.465281990910392e-06, + "loss": 0.3583, + "step": 13916 + }, + { + "epoch": 0.2785977028751595, + "grad_norm": 1.0603535175323486, + "learning_rate": 8.465048285491548e-06, + "loss": 0.3144, + "step": 13917 + }, + { + "epoch": 0.27861772139228785, + "grad_norm": 1.1966243982315063, + "learning_rate": 8.464814565506388e-06, + "loss": 0.396, + "step": 13918 + }, + { + "epoch": 0.2786377399094162, + "grad_norm": 1.179930329322815, + "learning_rate": 8.464580830955891e-06, + "loss": 0.3485, + "step": 13919 + }, + { + "epoch": 0.27865775842654456, + "grad_norm": 1.1071481704711914, + "learning_rate": 8.464347081841041e-06, + "loss": 0.3109, + "step": 13920 + }, + { + "epoch": 0.2786777769436729, + "grad_norm": 1.1629306077957153, + "learning_rate": 8.464113318162825e-06, + "loss": 0.316, + "step": 13921 + }, + { + "epoch": 0.27869779546080126, + "grad_norm": 1.046606421470642, + "learning_rate": 8.463879539922219e-06, + "loss": 0.3661, + "step": 13922 + }, + { + "epoch": 0.2787178139779296, + "grad_norm": 1.1156905889511108, + "learning_rate": 8.46364574712021e-06, + "loss": 0.3291, + "step": 13923 + }, + { + "epoch": 0.2787378324950579, + "grad_norm": 1.0659812688827515, + "learning_rate": 8.463411939757779e-06, + "loss": 0.2919, + "step": 13924 + }, + { + "epoch": 0.27875785101218625, + "grad_norm": 1.193996787071228, + "learning_rate": 8.46317811783591e-06, + "loss": 0.3417, + "step": 13925 + }, + { + "epoch": 0.2787778695293146, + "grad_norm": 1.2153552770614624, + "learning_rate": 8.462944281355587e-06, + "loss": 0.3738, + "step": 13926 + }, + { + "epoch": 0.27879788804644295, + "grad_norm": 1.0468146800994873, + "learning_rate": 8.46271043031779e-06, + "loss": 0.3345, + "step": 13927 + }, + { + "epoch": 0.2788179065635713, + "grad_norm": 1.0711668729782104, + "learning_rate": 8.462476564723503e-06, + "loss": 0.3329, + "step": 13928 + }, + { + "epoch": 0.27883792508069966, + "grad_norm": 1.0495785474777222, + "learning_rate": 8.46224268457371e-06, + "loss": 0.302, + "step": 13929 + }, + { + "epoch": 0.278857943597828, + "grad_norm": 1.2709195613861084, + "learning_rate": 8.462008789869394e-06, + "loss": 0.3669, + "step": 13930 + }, + { + "epoch": 0.27887796211495636, + "grad_norm": 1.1319724321365356, + "learning_rate": 8.461774880611539e-06, + "loss": 0.3046, + "step": 13931 + }, + { + "epoch": 0.27889798063208465, + "grad_norm": 1.0765691995620728, + "learning_rate": 8.461540956801126e-06, + "loss": 0.2766, + "step": 13932 + }, + { + "epoch": 0.278917999149213, + "grad_norm": 1.135845422744751, + "learning_rate": 8.46130701843914e-06, + "loss": 0.3165, + "step": 13933 + }, + { + "epoch": 0.27893801766634135, + "grad_norm": 1.0985203981399536, + "learning_rate": 8.461073065526564e-06, + "loss": 0.3348, + "step": 13934 + }, + { + "epoch": 0.2789580361834697, + "grad_norm": 1.245110273361206, + "learning_rate": 8.460839098064383e-06, + "loss": 0.3157, + "step": 13935 + }, + { + "epoch": 0.27897805470059805, + "grad_norm": 1.7487190961837769, + "learning_rate": 8.460605116053577e-06, + "loss": 0.7823, + "step": 13936 + }, + { + "epoch": 0.2789980732177264, + "grad_norm": 1.3732175827026367, + "learning_rate": 8.460371119495134e-06, + "loss": 0.3547, + "step": 13937 + }, + { + "epoch": 0.27901809173485476, + "grad_norm": 1.2597843408584595, + "learning_rate": 8.460137108390034e-06, + "loss": 0.3119, + "step": 13938 + }, + { + "epoch": 0.2790381102519831, + "grad_norm": 1.024440884590149, + "learning_rate": 8.459903082739262e-06, + "loss": 0.2929, + "step": 13939 + }, + { + "epoch": 0.2790581287691114, + "grad_norm": 1.1229361295700073, + "learning_rate": 8.459669042543805e-06, + "loss": 0.3568, + "step": 13940 + }, + { + "epoch": 0.27907814728623975, + "grad_norm": 1.2389973402023315, + "learning_rate": 8.459434987804641e-06, + "loss": 0.3363, + "step": 13941 + }, + { + "epoch": 0.2790981658033681, + "grad_norm": 1.0795763731002808, + "learning_rate": 8.459200918522757e-06, + "loss": 0.3298, + "step": 13942 + }, + { + "epoch": 0.27911818432049645, + "grad_norm": 1.2172696590423584, + "learning_rate": 8.458966834699138e-06, + "loss": 0.3499, + "step": 13943 + }, + { + "epoch": 0.2791382028376248, + "grad_norm": 1.1845507621765137, + "learning_rate": 8.458732736334767e-06, + "loss": 0.3293, + "step": 13944 + }, + { + "epoch": 0.27915822135475316, + "grad_norm": 1.0636651515960693, + "learning_rate": 8.458498623430627e-06, + "loss": 0.2878, + "step": 13945 + }, + { + "epoch": 0.2791782398718815, + "grad_norm": 1.061615228652954, + "learning_rate": 8.458264495987703e-06, + "loss": 0.2866, + "step": 13946 + }, + { + "epoch": 0.27919825838900986, + "grad_norm": 1.2222824096679688, + "learning_rate": 8.458030354006978e-06, + "loss": 0.2964, + "step": 13947 + }, + { + "epoch": 0.27921827690613815, + "grad_norm": 1.0487098693847656, + "learning_rate": 8.45779619748944e-06, + "loss": 0.3182, + "step": 13948 + }, + { + "epoch": 0.2792382954232665, + "grad_norm": 1.0674617290496826, + "learning_rate": 8.45756202643607e-06, + "loss": 0.2827, + "step": 13949 + }, + { + "epoch": 0.27925831394039485, + "grad_norm": 1.2620859146118164, + "learning_rate": 8.457327840847853e-06, + "loss": 0.3264, + "step": 13950 + }, + { + "epoch": 0.2792783324575232, + "grad_norm": 1.1221681833267212, + "learning_rate": 8.457093640725774e-06, + "loss": 0.3149, + "step": 13951 + }, + { + "epoch": 0.27929835097465155, + "grad_norm": 1.134598731994629, + "learning_rate": 8.456859426070818e-06, + "loss": 0.3424, + "step": 13952 + }, + { + "epoch": 0.2793183694917799, + "grad_norm": 1.0701513290405273, + "learning_rate": 8.456625196883967e-06, + "loss": 0.3197, + "step": 13953 + }, + { + "epoch": 0.27933838800890826, + "grad_norm": 1.0865427255630493, + "learning_rate": 8.45639095316621e-06, + "loss": 0.3334, + "step": 13954 + }, + { + "epoch": 0.2793584065260366, + "grad_norm": 1.1239008903503418, + "learning_rate": 8.456156694918527e-06, + "loss": 0.3187, + "step": 13955 + }, + { + "epoch": 0.2793784250431649, + "grad_norm": 1.0926299095153809, + "learning_rate": 8.455922422141907e-06, + "loss": 0.3159, + "step": 13956 + }, + { + "epoch": 0.27939844356029325, + "grad_norm": 0.9466344118118286, + "learning_rate": 8.45568813483733e-06, + "loss": 0.323, + "step": 13957 + }, + { + "epoch": 0.2794184620774216, + "grad_norm": 1.1873531341552734, + "learning_rate": 8.455453833005787e-06, + "loss": 0.3589, + "step": 13958 + }, + { + "epoch": 0.27943848059454995, + "grad_norm": 1.1785939931869507, + "learning_rate": 8.455219516648258e-06, + "loss": 0.3037, + "step": 13959 + }, + { + "epoch": 0.2794584991116783, + "grad_norm": 1.1993377208709717, + "learning_rate": 8.45498518576573e-06, + "loss": 0.3169, + "step": 13960 + }, + { + "epoch": 0.27947851762880666, + "grad_norm": 1.0410109758377075, + "learning_rate": 8.454750840359187e-06, + "loss": 0.3003, + "step": 13961 + }, + { + "epoch": 0.279498536145935, + "grad_norm": 1.060425877571106, + "learning_rate": 8.454516480429616e-06, + "loss": 0.3085, + "step": 13962 + }, + { + "epoch": 0.27951855466306336, + "grad_norm": 1.1290878057479858, + "learning_rate": 8.454282105978001e-06, + "loss": 0.3401, + "step": 13963 + }, + { + "epoch": 0.27953857318019165, + "grad_norm": 1.1146726608276367, + "learning_rate": 8.454047717005327e-06, + "loss": 0.2996, + "step": 13964 + }, + { + "epoch": 0.27955859169732, + "grad_norm": 1.0819902420043945, + "learning_rate": 8.45381331351258e-06, + "loss": 0.3367, + "step": 13965 + }, + { + "epoch": 0.27957861021444835, + "grad_norm": 1.2263070344924927, + "learning_rate": 8.453578895500746e-06, + "loss": 0.3365, + "step": 13966 + }, + { + "epoch": 0.2795986287315767, + "grad_norm": 1.065897822380066, + "learning_rate": 8.453344462970809e-06, + "loss": 0.3155, + "step": 13967 + }, + { + "epoch": 0.27961864724870505, + "grad_norm": 1.1687923669815063, + "learning_rate": 8.453110015923753e-06, + "loss": 0.3644, + "step": 13968 + }, + { + "epoch": 0.2796386657658334, + "grad_norm": 1.0128061771392822, + "learning_rate": 8.452875554360568e-06, + "loss": 0.3263, + "step": 13969 + }, + { + "epoch": 0.27965868428296176, + "grad_norm": 1.0968459844589233, + "learning_rate": 8.452641078282236e-06, + "loss": 0.3239, + "step": 13970 + }, + { + "epoch": 0.2796787028000901, + "grad_norm": 1.0614221096038818, + "learning_rate": 8.452406587689745e-06, + "loss": 0.3222, + "step": 13971 + }, + { + "epoch": 0.2796987213172184, + "grad_norm": 1.1719876527786255, + "learning_rate": 8.45217208258408e-06, + "loss": 0.3149, + "step": 13972 + }, + { + "epoch": 0.27971873983434675, + "grad_norm": 1.0475536584854126, + "learning_rate": 8.451937562966226e-06, + "loss": 0.3458, + "step": 13973 + }, + { + "epoch": 0.2797387583514751, + "grad_norm": 1.0712393522262573, + "learning_rate": 8.451703028837168e-06, + "loss": 0.2939, + "step": 13974 + }, + { + "epoch": 0.27975877686860345, + "grad_norm": 1.3668493032455444, + "learning_rate": 8.451468480197893e-06, + "loss": 0.2862, + "step": 13975 + }, + { + "epoch": 0.2797787953857318, + "grad_norm": 1.1757653951644897, + "learning_rate": 8.45123391704939e-06, + "loss": 0.3802, + "step": 13976 + }, + { + "epoch": 0.27979881390286016, + "grad_norm": 1.88431715965271, + "learning_rate": 8.45099933939264e-06, + "loss": 0.8657, + "step": 13977 + }, + { + "epoch": 0.2798188324199885, + "grad_norm": 1.0826846361160278, + "learning_rate": 8.450764747228634e-06, + "loss": 0.3249, + "step": 13978 + }, + { + "epoch": 0.27983885093711686, + "grad_norm": 1.0556321144104004, + "learning_rate": 8.450530140558353e-06, + "loss": 0.2799, + "step": 13979 + }, + { + "epoch": 0.27985886945424515, + "grad_norm": 1.0395337343215942, + "learning_rate": 8.450295519382787e-06, + "loss": 0.3136, + "step": 13980 + }, + { + "epoch": 0.2798788879713735, + "grad_norm": 1.2470152378082275, + "learning_rate": 8.450060883702919e-06, + "loss": 0.3502, + "step": 13981 + }, + { + "epoch": 0.27989890648850185, + "grad_norm": 1.0422866344451904, + "learning_rate": 8.44982623351974e-06, + "loss": 0.311, + "step": 13982 + }, + { + "epoch": 0.2799189250056302, + "grad_norm": 1.0395023822784424, + "learning_rate": 8.449591568834234e-06, + "loss": 0.3551, + "step": 13983 + }, + { + "epoch": 0.27993894352275855, + "grad_norm": 1.0855820178985596, + "learning_rate": 8.449356889647387e-06, + "loss": 0.3455, + "step": 13984 + }, + { + "epoch": 0.2799589620398869, + "grad_norm": 1.214113712310791, + "learning_rate": 8.449122195960187e-06, + "loss": 0.2893, + "step": 13985 + }, + { + "epoch": 0.27997898055701526, + "grad_norm": 1.1520365476608276, + "learning_rate": 8.448887487773617e-06, + "loss": 0.3258, + "step": 13986 + }, + { + "epoch": 0.2799989990741436, + "grad_norm": 1.077417254447937, + "learning_rate": 8.448652765088667e-06, + "loss": 0.3459, + "step": 13987 + }, + { + "epoch": 0.2800190175912719, + "grad_norm": 1.0485550165176392, + "learning_rate": 8.448418027906325e-06, + "loss": 0.3333, + "step": 13988 + }, + { + "epoch": 0.28003903610840025, + "grad_norm": 1.0263959169387817, + "learning_rate": 8.448183276227573e-06, + "loss": 0.3098, + "step": 13989 + }, + { + "epoch": 0.2800590546255286, + "grad_norm": 1.1147682666778564, + "learning_rate": 8.4479485100534e-06, + "loss": 0.2997, + "step": 13990 + }, + { + "epoch": 0.28007907314265695, + "grad_norm": 1.1024633646011353, + "learning_rate": 8.447713729384797e-06, + "loss": 0.3292, + "step": 13991 + }, + { + "epoch": 0.2800990916597853, + "grad_norm": 1.0477980375289917, + "learning_rate": 8.447478934222746e-06, + "loss": 0.3427, + "step": 13992 + }, + { + "epoch": 0.28011911017691365, + "grad_norm": 1.1887166500091553, + "learning_rate": 8.447244124568234e-06, + "loss": 0.3669, + "step": 13993 + }, + { + "epoch": 0.280139128694042, + "grad_norm": 1.1701219081878662, + "learning_rate": 8.447009300422251e-06, + "loss": 0.291, + "step": 13994 + }, + { + "epoch": 0.28015914721117036, + "grad_norm": 1.1123727560043335, + "learning_rate": 8.446774461785782e-06, + "loss": 0.3761, + "step": 13995 + }, + { + "epoch": 0.28017916572829865, + "grad_norm": 1.1193639039993286, + "learning_rate": 8.446539608659817e-06, + "loss": 0.2942, + "step": 13996 + }, + { + "epoch": 0.280199184245427, + "grad_norm": 1.1840310096740723, + "learning_rate": 8.446304741045338e-06, + "loss": 0.3953, + "step": 13997 + }, + { + "epoch": 0.28021920276255535, + "grad_norm": 1.764742374420166, + "learning_rate": 8.446069858943337e-06, + "loss": 0.8309, + "step": 13998 + }, + { + "epoch": 0.2802392212796837, + "grad_norm": 1.84791100025177, + "learning_rate": 8.4458349623548e-06, + "loss": 0.8478, + "step": 13999 + }, + { + "epoch": 0.28025923979681205, + "grad_norm": 2.0006370544433594, + "learning_rate": 8.445600051280714e-06, + "loss": 0.7927, + "step": 14000 + }, + { + "epoch": 0.2802792583139404, + "grad_norm": 1.0929800271987915, + "learning_rate": 8.445365125722068e-06, + "loss": 0.3199, + "step": 14001 + }, + { + "epoch": 0.28029927683106876, + "grad_norm": 1.1988507509231567, + "learning_rate": 8.445130185679848e-06, + "loss": 0.3673, + "step": 14002 + }, + { + "epoch": 0.2803192953481971, + "grad_norm": 0.9793110489845276, + "learning_rate": 8.44489523115504e-06, + "loss": 0.3102, + "step": 14003 + }, + { + "epoch": 0.2803393138653254, + "grad_norm": 1.1897859573364258, + "learning_rate": 8.444660262148636e-06, + "loss": 0.283, + "step": 14004 + }, + { + "epoch": 0.28035933238245375, + "grad_norm": 1.0896828174591064, + "learning_rate": 8.44442527866162e-06, + "loss": 0.335, + "step": 14005 + }, + { + "epoch": 0.2803793508995821, + "grad_norm": 1.1911969184875488, + "learning_rate": 8.444190280694984e-06, + "loss": 0.3321, + "step": 14006 + }, + { + "epoch": 0.28039936941671045, + "grad_norm": 0.9511134028434753, + "learning_rate": 8.443955268249711e-06, + "loss": 0.2865, + "step": 14007 + }, + { + "epoch": 0.2804193879338388, + "grad_norm": 0.9344732761383057, + "learning_rate": 8.443720241326791e-06, + "loss": 0.2735, + "step": 14008 + }, + { + "epoch": 0.28043940645096715, + "grad_norm": 1.2544772624969482, + "learning_rate": 8.443485199927215e-06, + "loss": 0.3984, + "step": 14009 + }, + { + "epoch": 0.2804594249680955, + "grad_norm": 1.1749407052993774, + "learning_rate": 8.443250144051965e-06, + "loss": 0.3388, + "step": 14010 + }, + { + "epoch": 0.28047944348522386, + "grad_norm": 1.0559908151626587, + "learning_rate": 8.443015073702034e-06, + "loss": 0.2939, + "step": 14011 + }, + { + "epoch": 0.28049946200235215, + "grad_norm": 1.0524535179138184, + "learning_rate": 8.442779988878408e-06, + "loss": 0.3455, + "step": 14012 + }, + { + "epoch": 0.2805194805194805, + "grad_norm": 0.9829584956169128, + "learning_rate": 8.442544889582077e-06, + "loss": 0.3037, + "step": 14013 + }, + { + "epoch": 0.28053949903660885, + "grad_norm": 1.1563090085983276, + "learning_rate": 8.442309775814026e-06, + "loss": 0.3363, + "step": 14014 + }, + { + "epoch": 0.2805595175537372, + "grad_norm": 1.1102708578109741, + "learning_rate": 8.442074647575247e-06, + "loss": 0.3108, + "step": 14015 + }, + { + "epoch": 0.28057953607086555, + "grad_norm": 1.095533013343811, + "learning_rate": 8.441839504866727e-06, + "loss": 0.381, + "step": 14016 + }, + { + "epoch": 0.2805995545879939, + "grad_norm": 1.0531858205795288, + "learning_rate": 8.441604347689454e-06, + "loss": 0.3163, + "step": 14017 + }, + { + "epoch": 0.28061957310512226, + "grad_norm": 1.1165895462036133, + "learning_rate": 8.441369176044418e-06, + "loss": 0.3101, + "step": 14018 + }, + { + "epoch": 0.2806395916222506, + "grad_norm": 1.2389205694198608, + "learning_rate": 8.441133989932604e-06, + "loss": 0.3596, + "step": 14019 + }, + { + "epoch": 0.2806596101393789, + "grad_norm": 1.1968823671340942, + "learning_rate": 8.440898789355005e-06, + "loss": 0.3553, + "step": 14020 + }, + { + "epoch": 0.28067962865650725, + "grad_norm": 1.158113718032837, + "learning_rate": 8.440663574312608e-06, + "loss": 0.312, + "step": 14021 + }, + { + "epoch": 0.2806996471736356, + "grad_norm": 1.1528971195220947, + "learning_rate": 8.440428344806402e-06, + "loss": 0.3166, + "step": 14022 + }, + { + "epoch": 0.28071966569076395, + "grad_norm": 1.0201165676116943, + "learning_rate": 8.440193100837375e-06, + "loss": 0.3373, + "step": 14023 + }, + { + "epoch": 0.2807396842078923, + "grad_norm": 1.1390219926834106, + "learning_rate": 8.439957842406517e-06, + "loss": 0.3048, + "step": 14024 + }, + { + "epoch": 0.28075970272502065, + "grad_norm": 1.78482985496521, + "learning_rate": 8.439722569514816e-06, + "loss": 0.8609, + "step": 14025 + }, + { + "epoch": 0.280779721242149, + "grad_norm": 1.2453893423080444, + "learning_rate": 8.439487282163262e-06, + "loss": 0.3986, + "step": 14026 + }, + { + "epoch": 0.28079973975927736, + "grad_norm": 1.0912461280822754, + "learning_rate": 8.439251980352845e-06, + "loss": 0.307, + "step": 14027 + }, + { + "epoch": 0.28081975827640565, + "grad_norm": 0.9817609190940857, + "learning_rate": 8.439016664084551e-06, + "loss": 0.2904, + "step": 14028 + }, + { + "epoch": 0.280839776793534, + "grad_norm": 1.2268257141113281, + "learning_rate": 8.438781333359372e-06, + "loss": 0.3612, + "step": 14029 + }, + { + "epoch": 0.28085979531066235, + "grad_norm": 1.3073301315307617, + "learning_rate": 8.438545988178295e-06, + "loss": 0.3361, + "step": 14030 + }, + { + "epoch": 0.2808798138277907, + "grad_norm": 1.0883597135543823, + "learning_rate": 8.43831062854231e-06, + "loss": 0.3519, + "step": 14031 + }, + { + "epoch": 0.28089983234491905, + "grad_norm": 1.0698407888412476, + "learning_rate": 8.438075254452409e-06, + "loss": 0.3008, + "step": 14032 + }, + { + "epoch": 0.2809198508620474, + "grad_norm": 1.0984892845153809, + "learning_rate": 8.437839865909579e-06, + "loss": 0.375, + "step": 14033 + }, + { + "epoch": 0.28093986937917576, + "grad_norm": 1.3604179620742798, + "learning_rate": 8.43760446291481e-06, + "loss": 0.3727, + "step": 14034 + }, + { + "epoch": 0.2809598878963041, + "grad_norm": 1.0690958499908447, + "learning_rate": 8.437369045469091e-06, + "loss": 0.3157, + "step": 14035 + }, + { + "epoch": 0.2809799064134324, + "grad_norm": 1.1242793798446655, + "learning_rate": 8.437133613573414e-06, + "loss": 0.3483, + "step": 14036 + }, + { + "epoch": 0.28099992493056075, + "grad_norm": 1.1449434757232666, + "learning_rate": 8.436898167228764e-06, + "loss": 0.312, + "step": 14037 + }, + { + "epoch": 0.2810199434476891, + "grad_norm": 1.0611144304275513, + "learning_rate": 8.436662706436136e-06, + "loss": 0.3473, + "step": 14038 + }, + { + "epoch": 0.28103996196481745, + "grad_norm": 1.1522231101989746, + "learning_rate": 8.436427231196518e-06, + "loss": 0.3357, + "step": 14039 + }, + { + "epoch": 0.2810599804819458, + "grad_norm": 1.298403024673462, + "learning_rate": 8.436191741510898e-06, + "loss": 0.3594, + "step": 14040 + }, + { + "epoch": 0.28107999899907415, + "grad_norm": 1.0292885303497314, + "learning_rate": 8.435956237380267e-06, + "loss": 0.3113, + "step": 14041 + }, + { + "epoch": 0.2811000175162025, + "grad_norm": 1.1717538833618164, + "learning_rate": 8.435720718805618e-06, + "loss": 0.2838, + "step": 14042 + }, + { + "epoch": 0.28112003603333086, + "grad_norm": 1.1624677181243896, + "learning_rate": 8.435485185787935e-06, + "loss": 0.3836, + "step": 14043 + }, + { + "epoch": 0.28114005455045915, + "grad_norm": 1.0891635417938232, + "learning_rate": 8.435249638328214e-06, + "loss": 0.3312, + "step": 14044 + }, + { + "epoch": 0.2811600730675875, + "grad_norm": 1.235148310661316, + "learning_rate": 8.435014076427442e-06, + "loss": 0.3661, + "step": 14045 + }, + { + "epoch": 0.28118009158471585, + "grad_norm": 1.0063292980194092, + "learning_rate": 8.434778500086611e-06, + "loss": 0.3146, + "step": 14046 + }, + { + "epoch": 0.2812001101018442, + "grad_norm": 1.9341943264007568, + "learning_rate": 8.434542909306708e-06, + "loss": 0.8391, + "step": 14047 + }, + { + "epoch": 0.28122012861897255, + "grad_norm": 1.219672679901123, + "learning_rate": 8.434307304088728e-06, + "loss": 0.366, + "step": 14048 + }, + { + "epoch": 0.2812401471361009, + "grad_norm": 1.0217989683151245, + "learning_rate": 8.43407168443366e-06, + "loss": 0.3145, + "step": 14049 + }, + { + "epoch": 0.28126016565322925, + "grad_norm": 0.9948217868804932, + "learning_rate": 8.43383605034249e-06, + "loss": 0.329, + "step": 14050 + }, + { + "epoch": 0.2812801841703576, + "grad_norm": 1.4467294216156006, + "learning_rate": 8.433600401816215e-06, + "loss": 0.3456, + "step": 14051 + }, + { + "epoch": 0.2813002026874859, + "grad_norm": 1.0711346864700317, + "learning_rate": 8.433364738855823e-06, + "loss": 0.3008, + "step": 14052 + }, + { + "epoch": 0.28132022120461425, + "grad_norm": 1.0076619386672974, + "learning_rate": 8.433129061462305e-06, + "loss": 0.3009, + "step": 14053 + }, + { + "epoch": 0.2813402397217426, + "grad_norm": 1.168498158454895, + "learning_rate": 8.432893369636648e-06, + "loss": 0.3269, + "step": 14054 + }, + { + "epoch": 0.28136025823887095, + "grad_norm": 1.2043311595916748, + "learning_rate": 8.432657663379849e-06, + "loss": 0.2954, + "step": 14055 + }, + { + "epoch": 0.2813802767559993, + "grad_norm": 2.1880199909210205, + "learning_rate": 8.432421942692894e-06, + "loss": 0.8304, + "step": 14056 + }, + { + "epoch": 0.28140029527312765, + "grad_norm": 1.79286527633667, + "learning_rate": 8.432186207576776e-06, + "loss": 0.7885, + "step": 14057 + }, + { + "epoch": 0.281420313790256, + "grad_norm": 1.1266731023788452, + "learning_rate": 8.431950458032487e-06, + "loss": 0.2955, + "step": 14058 + }, + { + "epoch": 0.28144033230738436, + "grad_norm": 2.242527961730957, + "learning_rate": 8.431714694061017e-06, + "loss": 0.8287, + "step": 14059 + }, + { + "epoch": 0.28146035082451265, + "grad_norm": 1.129952073097229, + "learning_rate": 8.431478915663355e-06, + "loss": 0.3635, + "step": 14060 + }, + { + "epoch": 0.281480369341641, + "grad_norm": 1.315308928489685, + "learning_rate": 8.431243122840495e-06, + "loss": 0.3249, + "step": 14061 + }, + { + "epoch": 0.28150038785876935, + "grad_norm": 1.0787498950958252, + "learning_rate": 8.43100731559343e-06, + "loss": 0.3099, + "step": 14062 + }, + { + "epoch": 0.2815204063758977, + "grad_norm": 1.1228896379470825, + "learning_rate": 8.430771493923144e-06, + "loss": 0.291, + "step": 14063 + }, + { + "epoch": 0.28154042489302605, + "grad_norm": 1.1768244504928589, + "learning_rate": 8.430535657830636e-06, + "loss": 0.3373, + "step": 14064 + }, + { + "epoch": 0.2815604434101544, + "grad_norm": 1.1162587404251099, + "learning_rate": 8.430299807316892e-06, + "loss": 0.3212, + "step": 14065 + }, + { + "epoch": 0.28158046192728275, + "grad_norm": 1.1372711658477783, + "learning_rate": 8.430063942382909e-06, + "loss": 0.3155, + "step": 14066 + }, + { + "epoch": 0.2816004804444111, + "grad_norm": 1.0304449796676636, + "learning_rate": 8.429828063029675e-06, + "loss": 0.3372, + "step": 14067 + }, + { + "epoch": 0.2816204989615394, + "grad_norm": 1.0595612525939941, + "learning_rate": 8.42959216925818e-06, + "loss": 0.3267, + "step": 14068 + }, + { + "epoch": 0.28164051747866775, + "grad_norm": 1.1221171617507935, + "learning_rate": 8.42935626106942e-06, + "loss": 0.3282, + "step": 14069 + }, + { + "epoch": 0.2816605359957961, + "grad_norm": 1.151098608970642, + "learning_rate": 8.429120338464381e-06, + "loss": 0.3589, + "step": 14070 + }, + { + "epoch": 0.28168055451292445, + "grad_norm": 1.1608681678771973, + "learning_rate": 8.42888440144406e-06, + "loss": 0.3412, + "step": 14071 + }, + { + "epoch": 0.2817005730300528, + "grad_norm": 1.2099123001098633, + "learning_rate": 8.428648450009446e-06, + "loss": 0.2985, + "step": 14072 + }, + { + "epoch": 0.28172059154718115, + "grad_norm": 1.3863099813461304, + "learning_rate": 8.428412484161532e-06, + "loss": 0.2901, + "step": 14073 + }, + { + "epoch": 0.2817406100643095, + "grad_norm": 1.0697740316390991, + "learning_rate": 8.42817650390131e-06, + "loss": 0.3144, + "step": 14074 + }, + { + "epoch": 0.28176062858143786, + "grad_norm": 1.0151286125183105, + "learning_rate": 8.427940509229771e-06, + "loss": 0.3052, + "step": 14075 + }, + { + "epoch": 0.28178064709856615, + "grad_norm": 1.167410969734192, + "learning_rate": 8.427704500147908e-06, + "loss": 0.2691, + "step": 14076 + }, + { + "epoch": 0.2818006656156945, + "grad_norm": 1.1104762554168701, + "learning_rate": 8.427468476656715e-06, + "loss": 0.3345, + "step": 14077 + }, + { + "epoch": 0.28182068413282285, + "grad_norm": 1.1601171493530273, + "learning_rate": 8.42723243875718e-06, + "loss": 0.3377, + "step": 14078 + }, + { + "epoch": 0.2818407026499512, + "grad_norm": 1.1486883163452148, + "learning_rate": 8.426996386450298e-06, + "loss": 0.351, + "step": 14079 + }, + { + "epoch": 0.28186072116707955, + "grad_norm": 1.7341012954711914, + "learning_rate": 8.426760319737061e-06, + "loss": 0.8065, + "step": 14080 + }, + { + "epoch": 0.2818807396842079, + "grad_norm": 1.0328295230865479, + "learning_rate": 8.42652423861846e-06, + "loss": 0.3244, + "step": 14081 + }, + { + "epoch": 0.28190075820133625, + "grad_norm": 1.0104233026504517, + "learning_rate": 8.42628814309549e-06, + "loss": 0.3164, + "step": 14082 + }, + { + "epoch": 0.2819207767184646, + "grad_norm": 1.3017938137054443, + "learning_rate": 8.42605203316914e-06, + "loss": 0.35, + "step": 14083 + }, + { + "epoch": 0.2819407952355929, + "grad_norm": 1.1179531812667847, + "learning_rate": 8.425815908840407e-06, + "loss": 0.2803, + "step": 14084 + }, + { + "epoch": 0.28196081375272125, + "grad_norm": 1.1679450273513794, + "learning_rate": 8.425579770110279e-06, + "loss": 0.346, + "step": 14085 + }, + { + "epoch": 0.2819808322698496, + "grad_norm": 1.8857970237731934, + "learning_rate": 8.42534361697975e-06, + "loss": 0.8152, + "step": 14086 + }, + { + "epoch": 0.28200085078697795, + "grad_norm": 1.2200089693069458, + "learning_rate": 8.425107449449816e-06, + "loss": 0.3242, + "step": 14087 + }, + { + "epoch": 0.2820208693041063, + "grad_norm": 1.0667906999588013, + "learning_rate": 8.424871267521469e-06, + "loss": 0.2879, + "step": 14088 + }, + { + "epoch": 0.28204088782123465, + "grad_norm": 1.1307992935180664, + "learning_rate": 8.424635071195697e-06, + "loss": 0.389, + "step": 14089 + }, + { + "epoch": 0.282060906338363, + "grad_norm": 1.2764816284179688, + "learning_rate": 8.424398860473496e-06, + "loss": 0.2815, + "step": 14090 + }, + { + "epoch": 0.28208092485549136, + "grad_norm": 1.0909326076507568, + "learning_rate": 8.424162635355862e-06, + "loss": 0.3084, + "step": 14091 + }, + { + "epoch": 0.28210094337261965, + "grad_norm": 1.1777156591415405, + "learning_rate": 8.423926395843782e-06, + "loss": 0.3528, + "step": 14092 + }, + { + "epoch": 0.282120961889748, + "grad_norm": 1.0864144563674927, + "learning_rate": 8.423690141938254e-06, + "loss": 0.3397, + "step": 14093 + }, + { + "epoch": 0.28214098040687635, + "grad_norm": 0.9898544549942017, + "learning_rate": 8.423453873640268e-06, + "loss": 0.2952, + "step": 14094 + }, + { + "epoch": 0.2821609989240047, + "grad_norm": 1.1430498361587524, + "learning_rate": 8.42321759095082e-06, + "loss": 0.3108, + "step": 14095 + }, + { + "epoch": 0.28218101744113305, + "grad_norm": 1.1808075904846191, + "learning_rate": 8.422981293870902e-06, + "loss": 0.3045, + "step": 14096 + }, + { + "epoch": 0.2822010359582614, + "grad_norm": 1.2248528003692627, + "learning_rate": 8.422744982401508e-06, + "loss": 0.3297, + "step": 14097 + }, + { + "epoch": 0.28222105447538975, + "grad_norm": 1.2501389980316162, + "learning_rate": 8.42250865654363e-06, + "loss": 0.3752, + "step": 14098 + }, + { + "epoch": 0.2822410729925181, + "grad_norm": 1.060988426208496, + "learning_rate": 8.42227231629826e-06, + "loss": 0.3268, + "step": 14099 + }, + { + "epoch": 0.2822610915096464, + "grad_norm": 1.1201543807983398, + "learning_rate": 8.422035961666396e-06, + "loss": 0.3493, + "step": 14100 + }, + { + "epoch": 0.28228111002677475, + "grad_norm": 1.1437110900878906, + "learning_rate": 8.421799592649028e-06, + "loss": 0.3435, + "step": 14101 + }, + { + "epoch": 0.2823011285439031, + "grad_norm": 1.1268231868743896, + "learning_rate": 8.421563209247153e-06, + "loss": 0.3475, + "step": 14102 + }, + { + "epoch": 0.28232114706103145, + "grad_norm": 1.7419811487197876, + "learning_rate": 8.42132681146176e-06, + "loss": 0.8279, + "step": 14103 + }, + { + "epoch": 0.2823411655781598, + "grad_norm": 0.9826239347457886, + "learning_rate": 8.421090399293846e-06, + "loss": 0.25, + "step": 14104 + }, + { + "epoch": 0.28236118409528815, + "grad_norm": 1.0444648265838623, + "learning_rate": 8.420853972744404e-06, + "loss": 0.3478, + "step": 14105 + }, + { + "epoch": 0.2823812026124165, + "grad_norm": 1.0177613496780396, + "learning_rate": 8.42061753181443e-06, + "loss": 0.3197, + "step": 14106 + }, + { + "epoch": 0.28240122112954485, + "grad_norm": 1.092461347579956, + "learning_rate": 8.420381076504915e-06, + "loss": 0.3402, + "step": 14107 + }, + { + "epoch": 0.28242123964667315, + "grad_norm": 0.9987649917602539, + "learning_rate": 8.420144606816854e-06, + "loss": 0.311, + "step": 14108 + }, + { + "epoch": 0.2824412581638015, + "grad_norm": 1.1531037092208862, + "learning_rate": 8.419908122751241e-06, + "loss": 0.3149, + "step": 14109 + }, + { + "epoch": 0.28246127668092985, + "grad_norm": 0.9648556709289551, + "learning_rate": 8.41967162430907e-06, + "loss": 0.2925, + "step": 14110 + }, + { + "epoch": 0.2824812951980582, + "grad_norm": 1.1392115354537964, + "learning_rate": 8.419435111491334e-06, + "loss": 0.3302, + "step": 14111 + }, + { + "epoch": 0.28250131371518655, + "grad_norm": 1.141656517982483, + "learning_rate": 8.419198584299031e-06, + "loss": 0.3755, + "step": 14112 + }, + { + "epoch": 0.2825213322323149, + "grad_norm": 1.0461176633834839, + "learning_rate": 8.418962042733152e-06, + "loss": 0.3338, + "step": 14113 + }, + { + "epoch": 0.28254135074944325, + "grad_norm": 2.0633695125579834, + "learning_rate": 8.418725486794693e-06, + "loss": 0.7986, + "step": 14114 + }, + { + "epoch": 0.2825613692665716, + "grad_norm": 1.0793092250823975, + "learning_rate": 8.418488916484648e-06, + "loss": 0.3225, + "step": 14115 + }, + { + "epoch": 0.2825813877836999, + "grad_norm": 1.1603397130966187, + "learning_rate": 8.418252331804009e-06, + "loss": 0.3483, + "step": 14116 + }, + { + "epoch": 0.28260140630082825, + "grad_norm": 1.1935501098632812, + "learning_rate": 8.418015732753775e-06, + "loss": 0.3591, + "step": 14117 + }, + { + "epoch": 0.2826214248179566, + "grad_norm": 1.0679585933685303, + "learning_rate": 8.417779119334937e-06, + "loss": 0.3359, + "step": 14118 + }, + { + "epoch": 0.28264144333508495, + "grad_norm": 1.130076289176941, + "learning_rate": 8.417542491548493e-06, + "loss": 0.3143, + "step": 14119 + }, + { + "epoch": 0.2826614618522133, + "grad_norm": 1.2102243900299072, + "learning_rate": 8.417305849395434e-06, + "loss": 0.3382, + "step": 14120 + }, + { + "epoch": 0.28268148036934165, + "grad_norm": 1.210347056388855, + "learning_rate": 8.417069192876757e-06, + "loss": 0.3587, + "step": 14121 + }, + { + "epoch": 0.28270149888647, + "grad_norm": 1.0233005285263062, + "learning_rate": 8.416832521993457e-06, + "loss": 0.3104, + "step": 14122 + }, + { + "epoch": 0.28272151740359835, + "grad_norm": 1.1235820055007935, + "learning_rate": 8.416595836746528e-06, + "loss": 0.3088, + "step": 14123 + }, + { + "epoch": 0.28274153592072665, + "grad_norm": 1.1411372423171997, + "learning_rate": 8.416359137136968e-06, + "loss": 0.312, + "step": 14124 + }, + { + "epoch": 0.282761554437855, + "grad_norm": 1.1512465476989746, + "learning_rate": 8.416122423165764e-06, + "loss": 0.3107, + "step": 14125 + }, + { + "epoch": 0.28278157295498335, + "grad_norm": 1.008664846420288, + "learning_rate": 8.415885694833921e-06, + "loss": 0.2802, + "step": 14126 + }, + { + "epoch": 0.2828015914721117, + "grad_norm": 1.1388144493103027, + "learning_rate": 8.415648952142429e-06, + "loss": 0.2929, + "step": 14127 + }, + { + "epoch": 0.28282160998924005, + "grad_norm": 1.7922970056533813, + "learning_rate": 8.415412195092281e-06, + "loss": 0.7623, + "step": 14128 + }, + { + "epoch": 0.2828416285063684, + "grad_norm": 1.818743348121643, + "learning_rate": 8.415175423684478e-06, + "loss": 0.8223, + "step": 14129 + }, + { + "epoch": 0.28286164702349675, + "grad_norm": 1.0552271604537964, + "learning_rate": 8.414938637920012e-06, + "loss": 0.295, + "step": 14130 + }, + { + "epoch": 0.2828816655406251, + "grad_norm": 1.1085678339004517, + "learning_rate": 8.414701837799877e-06, + "loss": 0.3442, + "step": 14131 + }, + { + "epoch": 0.2829016840577534, + "grad_norm": 1.0353033542633057, + "learning_rate": 8.414465023325071e-06, + "loss": 0.3195, + "step": 14132 + }, + { + "epoch": 0.28292170257488175, + "grad_norm": 1.3191907405853271, + "learning_rate": 8.414228194496591e-06, + "loss": 0.3565, + "step": 14133 + }, + { + "epoch": 0.2829417210920101, + "grad_norm": 1.1462914943695068, + "learning_rate": 8.413991351315429e-06, + "loss": 0.3483, + "step": 14134 + }, + { + "epoch": 0.28296173960913845, + "grad_norm": 1.2380784749984741, + "learning_rate": 8.413754493782583e-06, + "loss": 0.3096, + "step": 14135 + }, + { + "epoch": 0.2829817581262668, + "grad_norm": 1.0806044340133667, + "learning_rate": 8.413517621899045e-06, + "loss": 0.3822, + "step": 14136 + }, + { + "epoch": 0.28300177664339515, + "grad_norm": 1.128771424293518, + "learning_rate": 8.413280735665816e-06, + "loss": 0.3139, + "step": 14137 + }, + { + "epoch": 0.2830217951605235, + "grad_norm": 1.0499789714813232, + "learning_rate": 8.413043835083889e-06, + "loss": 0.3243, + "step": 14138 + }, + { + "epoch": 0.28304181367765185, + "grad_norm": 1.2028062343597412, + "learning_rate": 8.412806920154259e-06, + "loss": 0.3553, + "step": 14139 + }, + { + "epoch": 0.28306183219478015, + "grad_norm": 1.1470576524734497, + "learning_rate": 8.412569990877925e-06, + "loss": 0.3474, + "step": 14140 + }, + { + "epoch": 0.2830818507119085, + "grad_norm": 1.2286571264266968, + "learning_rate": 8.41233304725588e-06, + "loss": 0.31, + "step": 14141 + }, + { + "epoch": 0.28310186922903685, + "grad_norm": 0.9790144562721252, + "learning_rate": 8.412096089289122e-06, + "loss": 0.3432, + "step": 14142 + }, + { + "epoch": 0.2831218877461652, + "grad_norm": 1.0851057767868042, + "learning_rate": 8.411859116978644e-06, + "loss": 0.2868, + "step": 14143 + }, + { + "epoch": 0.28314190626329355, + "grad_norm": 1.1119420528411865, + "learning_rate": 8.411622130325448e-06, + "loss": 0.3589, + "step": 14144 + }, + { + "epoch": 0.2831619247804219, + "grad_norm": 1.1034064292907715, + "learning_rate": 8.411385129330524e-06, + "loss": 0.3172, + "step": 14145 + }, + { + "epoch": 0.28318194329755025, + "grad_norm": 1.046852946281433, + "learning_rate": 8.411148113994873e-06, + "loss": 0.349, + "step": 14146 + }, + { + "epoch": 0.2832019618146786, + "grad_norm": 1.1195957660675049, + "learning_rate": 8.410911084319487e-06, + "loss": 0.3136, + "step": 14147 + }, + { + "epoch": 0.2832219803318069, + "grad_norm": 1.04679274559021, + "learning_rate": 8.410674040305366e-06, + "loss": 0.3173, + "step": 14148 + }, + { + "epoch": 0.28324199884893525, + "grad_norm": 1.090139389038086, + "learning_rate": 8.410436981953507e-06, + "loss": 0.3274, + "step": 14149 + }, + { + "epoch": 0.2832620173660636, + "grad_norm": 1.1417287588119507, + "learning_rate": 8.410199909264902e-06, + "loss": 0.3114, + "step": 14150 + }, + { + "epoch": 0.28328203588319195, + "grad_norm": 1.0647939443588257, + "learning_rate": 8.409962822240554e-06, + "loss": 0.268, + "step": 14151 + }, + { + "epoch": 0.2833020544003203, + "grad_norm": 1.1110889911651611, + "learning_rate": 8.409725720881452e-06, + "loss": 0.3797, + "step": 14152 + }, + { + "epoch": 0.28332207291744865, + "grad_norm": 1.119958519935608, + "learning_rate": 8.4094886051886e-06, + "loss": 0.2683, + "step": 14153 + }, + { + "epoch": 0.283342091434577, + "grad_norm": 1.0429115295410156, + "learning_rate": 8.40925147516299e-06, + "loss": 0.3877, + "step": 14154 + }, + { + "epoch": 0.28336210995170535, + "grad_norm": 1.212833046913147, + "learning_rate": 8.409014330805621e-06, + "loss": 0.3454, + "step": 14155 + }, + { + "epoch": 0.28338212846883365, + "grad_norm": 1.1102408170700073, + "learning_rate": 8.40877717211749e-06, + "loss": 0.3332, + "step": 14156 + }, + { + "epoch": 0.283402146985962, + "grad_norm": 1.115636944770813, + "learning_rate": 8.408539999099592e-06, + "loss": 0.3215, + "step": 14157 + }, + { + "epoch": 0.28342216550309035, + "grad_norm": 1.135043978691101, + "learning_rate": 8.408302811752926e-06, + "loss": 0.347, + "step": 14158 + }, + { + "epoch": 0.2834421840202187, + "grad_norm": 1.2297542095184326, + "learning_rate": 8.40806561007849e-06, + "loss": 0.3394, + "step": 14159 + }, + { + "epoch": 0.28346220253734705, + "grad_norm": 1.2580387592315674, + "learning_rate": 8.407828394077277e-06, + "loss": 0.3102, + "step": 14160 + }, + { + "epoch": 0.2834822210544754, + "grad_norm": 1.4738975763320923, + "learning_rate": 8.407591163750289e-06, + "loss": 0.3382, + "step": 14161 + }, + { + "epoch": 0.28350223957160375, + "grad_norm": 1.0223567485809326, + "learning_rate": 8.40735391909852e-06, + "loss": 0.3676, + "step": 14162 + }, + { + "epoch": 0.2835222580887321, + "grad_norm": 1.049284815788269, + "learning_rate": 8.407116660122967e-06, + "loss": 0.3509, + "step": 14163 + }, + { + "epoch": 0.2835422766058604, + "grad_norm": 1.1964879035949707, + "learning_rate": 8.406879386824631e-06, + "loss": 0.3402, + "step": 14164 + }, + { + "epoch": 0.28356229512298875, + "grad_norm": 2.08124041557312, + "learning_rate": 8.406642099204507e-06, + "loss": 0.8534, + "step": 14165 + }, + { + "epoch": 0.2835823136401171, + "grad_norm": 2.0620181560516357, + "learning_rate": 8.406404797263592e-06, + "loss": 0.8589, + "step": 14166 + }, + { + "epoch": 0.28360233215724545, + "grad_norm": 1.0508567094802856, + "learning_rate": 8.406167481002884e-06, + "loss": 0.3052, + "step": 14167 + }, + { + "epoch": 0.2836223506743738, + "grad_norm": 1.086562156677246, + "learning_rate": 8.405930150423382e-06, + "loss": 0.3098, + "step": 14168 + }, + { + "epoch": 0.28364236919150215, + "grad_norm": 1.1208783388137817, + "learning_rate": 8.405692805526083e-06, + "loss": 0.3196, + "step": 14169 + }, + { + "epoch": 0.2836623877086305, + "grad_norm": 1.2742520570755005, + "learning_rate": 8.405455446311981e-06, + "loss": 0.3097, + "step": 14170 + }, + { + "epoch": 0.28368240622575885, + "grad_norm": 1.0534260272979736, + "learning_rate": 8.405218072782082e-06, + "loss": 0.3347, + "step": 14171 + }, + { + "epoch": 0.28370242474288715, + "grad_norm": 1.82181715965271, + "learning_rate": 8.404980684937376e-06, + "loss": 0.7857, + "step": 14172 + }, + { + "epoch": 0.2837224432600155, + "grad_norm": 1.076897144317627, + "learning_rate": 8.404743282778865e-06, + "loss": 0.3074, + "step": 14173 + }, + { + "epoch": 0.28374246177714385, + "grad_norm": 1.2021468877792358, + "learning_rate": 8.404505866307544e-06, + "loss": 0.3905, + "step": 14174 + }, + { + "epoch": 0.2837624802942722, + "grad_norm": 1.0578161478042603, + "learning_rate": 8.404268435524415e-06, + "loss": 0.3378, + "step": 14175 + }, + { + "epoch": 0.28378249881140055, + "grad_norm": 1.0834687948226929, + "learning_rate": 8.404030990430473e-06, + "loss": 0.31, + "step": 14176 + }, + { + "epoch": 0.2838025173285289, + "grad_norm": 1.1524770259857178, + "learning_rate": 8.403793531026718e-06, + "loss": 0.3453, + "step": 14177 + }, + { + "epoch": 0.28382253584565725, + "grad_norm": 1.095945119857788, + "learning_rate": 8.403556057314147e-06, + "loss": 0.352, + "step": 14178 + }, + { + "epoch": 0.2838425543627856, + "grad_norm": 1.191893219947815, + "learning_rate": 8.40331856929376e-06, + "loss": 0.3115, + "step": 14179 + }, + { + "epoch": 0.2838625728799139, + "grad_norm": 1.0876438617706299, + "learning_rate": 8.403081066966552e-06, + "loss": 0.3288, + "step": 14180 + }, + { + "epoch": 0.28388259139704225, + "grad_norm": 1.1383934020996094, + "learning_rate": 8.402843550333526e-06, + "loss": 0.3589, + "step": 14181 + }, + { + "epoch": 0.2839026099141706, + "grad_norm": 2.2144317626953125, + "learning_rate": 8.402606019395677e-06, + "loss": 0.7605, + "step": 14182 + }, + { + "epoch": 0.28392262843129895, + "grad_norm": 2.1566731929779053, + "learning_rate": 8.402368474154004e-06, + "loss": 0.8585, + "step": 14183 + }, + { + "epoch": 0.2839426469484273, + "grad_norm": 1.1653213500976562, + "learning_rate": 8.402130914609508e-06, + "loss": 0.3401, + "step": 14184 + }, + { + "epoch": 0.28396266546555565, + "grad_norm": 1.1636219024658203, + "learning_rate": 8.401893340763183e-06, + "loss": 0.3351, + "step": 14185 + }, + { + "epoch": 0.283982683982684, + "grad_norm": 1.0343283414840698, + "learning_rate": 8.401655752616031e-06, + "loss": 0.3194, + "step": 14186 + }, + { + "epoch": 0.28400270249981235, + "grad_norm": 1.1473582983016968, + "learning_rate": 8.401418150169051e-06, + "loss": 0.359, + "step": 14187 + }, + { + "epoch": 0.28402272101694065, + "grad_norm": 1.0178284645080566, + "learning_rate": 8.401180533423243e-06, + "loss": 0.3406, + "step": 14188 + }, + { + "epoch": 0.284042739534069, + "grad_norm": 1.107161283493042, + "learning_rate": 8.4009429023796e-06, + "loss": 0.3488, + "step": 14189 + }, + { + "epoch": 0.28406275805119735, + "grad_norm": 1.9086461067199707, + "learning_rate": 8.400705257039128e-06, + "loss": 0.8512, + "step": 14190 + }, + { + "epoch": 0.2840827765683257, + "grad_norm": 1.1517587900161743, + "learning_rate": 8.400467597402822e-06, + "loss": 0.325, + "step": 14191 + }, + { + "epoch": 0.28410279508545405, + "grad_norm": 1.0230481624603271, + "learning_rate": 8.400229923471683e-06, + "loss": 0.3242, + "step": 14192 + }, + { + "epoch": 0.2841228136025824, + "grad_norm": 1.072619915008545, + "learning_rate": 8.399992235246708e-06, + "loss": 0.3295, + "step": 14193 + }, + { + "epoch": 0.28414283211971075, + "grad_norm": 1.1365077495574951, + "learning_rate": 8.399754532728897e-06, + "loss": 0.3164, + "step": 14194 + }, + { + "epoch": 0.2841628506368391, + "grad_norm": 1.3259985446929932, + "learning_rate": 8.39951681591925e-06, + "loss": 0.3433, + "step": 14195 + }, + { + "epoch": 0.2841828691539674, + "grad_norm": 1.0798931121826172, + "learning_rate": 8.399279084818766e-06, + "loss": 0.3272, + "step": 14196 + }, + { + "epoch": 0.28420288767109575, + "grad_norm": 1.1311932802200317, + "learning_rate": 8.399041339428444e-06, + "loss": 0.3205, + "step": 14197 + }, + { + "epoch": 0.2842229061882241, + "grad_norm": 1.8702740669250488, + "learning_rate": 8.398803579749283e-06, + "loss": 0.8562, + "step": 14198 + }, + { + "epoch": 0.28424292470535245, + "grad_norm": 1.1251325607299805, + "learning_rate": 8.398565805782286e-06, + "loss": 0.2875, + "step": 14199 + }, + { + "epoch": 0.2842629432224808, + "grad_norm": 1.1225483417510986, + "learning_rate": 8.398328017528446e-06, + "loss": 0.3065, + "step": 14200 + }, + { + "epoch": 0.28428296173960915, + "grad_norm": 1.1634691953659058, + "learning_rate": 8.39809021498877e-06, + "loss": 0.3015, + "step": 14201 + }, + { + "epoch": 0.2843029802567375, + "grad_norm": 1.142046332359314, + "learning_rate": 8.39785239816425e-06, + "loss": 0.3117, + "step": 14202 + }, + { + "epoch": 0.2843229987738658, + "grad_norm": 0.9660161733627319, + "learning_rate": 8.397614567055892e-06, + "loss": 0.2508, + "step": 14203 + }, + { + "epoch": 0.28434301729099415, + "grad_norm": 1.8843597173690796, + "learning_rate": 8.397376721664695e-06, + "loss": 0.8314, + "step": 14204 + }, + { + "epoch": 0.2843630358081225, + "grad_norm": 1.010677695274353, + "learning_rate": 8.397138861991654e-06, + "loss": 0.3259, + "step": 14205 + }, + { + "epoch": 0.28438305432525085, + "grad_norm": 1.0909264087677002, + "learning_rate": 8.396900988037776e-06, + "loss": 0.3397, + "step": 14206 + }, + { + "epoch": 0.2844030728423792, + "grad_norm": 1.0640342235565186, + "learning_rate": 8.396663099804055e-06, + "loss": 0.3148, + "step": 14207 + }, + { + "epoch": 0.28442309135950755, + "grad_norm": 1.1766284704208374, + "learning_rate": 8.396425197291493e-06, + "loss": 0.3488, + "step": 14208 + }, + { + "epoch": 0.2844431098766359, + "grad_norm": 1.1312850713729858, + "learning_rate": 8.396187280501092e-06, + "loss": 0.2926, + "step": 14209 + }, + { + "epoch": 0.28446312839376425, + "grad_norm": 1.024409532546997, + "learning_rate": 8.39594934943385e-06, + "loss": 0.2513, + "step": 14210 + }, + { + "epoch": 0.28448314691089255, + "grad_norm": 1.2313896417617798, + "learning_rate": 8.395711404090767e-06, + "loss": 0.3145, + "step": 14211 + }, + { + "epoch": 0.2845031654280209, + "grad_norm": 1.0668143033981323, + "learning_rate": 8.395473444472844e-06, + "loss": 0.3227, + "step": 14212 + }, + { + "epoch": 0.28452318394514925, + "grad_norm": 1.2075363397598267, + "learning_rate": 8.395235470581083e-06, + "loss": 0.3484, + "step": 14213 + }, + { + "epoch": 0.2845432024622776, + "grad_norm": 1.0353630781173706, + "learning_rate": 8.39499748241648e-06, + "loss": 0.3033, + "step": 14214 + }, + { + "epoch": 0.28456322097940595, + "grad_norm": 1.006730318069458, + "learning_rate": 8.39475947998004e-06, + "loss": 0.3366, + "step": 14215 + }, + { + "epoch": 0.2845832394965343, + "grad_norm": 1.0968989133834839, + "learning_rate": 8.394521463272761e-06, + "loss": 0.3341, + "step": 14216 + }, + { + "epoch": 0.28460325801366265, + "grad_norm": 1.049656629562378, + "learning_rate": 8.394283432295645e-06, + "loss": 0.328, + "step": 14217 + }, + { + "epoch": 0.284623276530791, + "grad_norm": 1.4799989461898804, + "learning_rate": 8.394045387049692e-06, + "loss": 0.3657, + "step": 14218 + }, + { + "epoch": 0.2846432950479193, + "grad_norm": 1.1002068519592285, + "learning_rate": 8.3938073275359e-06, + "loss": 0.3538, + "step": 14219 + }, + { + "epoch": 0.28466331356504765, + "grad_norm": 1.01111900806427, + "learning_rate": 8.393569253755276e-06, + "loss": 0.3283, + "step": 14220 + }, + { + "epoch": 0.284683332082176, + "grad_norm": 1.0203731060028076, + "learning_rate": 8.393331165708816e-06, + "loss": 0.3541, + "step": 14221 + }, + { + "epoch": 0.28470335059930435, + "grad_norm": 1.0629817247390747, + "learning_rate": 8.393093063397521e-06, + "loss": 0.2961, + "step": 14222 + }, + { + "epoch": 0.2847233691164327, + "grad_norm": 1.1842641830444336, + "learning_rate": 8.392854946822395e-06, + "loss": 0.3205, + "step": 14223 + }, + { + "epoch": 0.28474338763356105, + "grad_norm": 1.0661848783493042, + "learning_rate": 8.392616815984435e-06, + "loss": 0.3069, + "step": 14224 + }, + { + "epoch": 0.2847634061506894, + "grad_norm": 1.0729702711105347, + "learning_rate": 8.392378670884644e-06, + "loss": 0.2841, + "step": 14225 + }, + { + "epoch": 0.28478342466781775, + "grad_norm": 1.1974506378173828, + "learning_rate": 8.392140511524025e-06, + "loss": 0.3201, + "step": 14226 + }, + { + "epoch": 0.28480344318494605, + "grad_norm": 1.1689828634262085, + "learning_rate": 8.391902337903576e-06, + "loss": 0.3163, + "step": 14227 + }, + { + "epoch": 0.2848234617020744, + "grad_norm": 1.0090831518173218, + "learning_rate": 8.391664150024298e-06, + "loss": 0.3077, + "step": 14228 + }, + { + "epoch": 0.28484348021920275, + "grad_norm": 1.0607142448425293, + "learning_rate": 8.391425947887196e-06, + "loss": 0.3137, + "step": 14229 + }, + { + "epoch": 0.2848634987363311, + "grad_norm": 1.0263185501098633, + "learning_rate": 8.391187731493269e-06, + "loss": 0.3521, + "step": 14230 + }, + { + "epoch": 0.28488351725345945, + "grad_norm": 1.1124253273010254, + "learning_rate": 8.390949500843518e-06, + "loss": 0.3926, + "step": 14231 + }, + { + "epoch": 0.2849035357705878, + "grad_norm": 1.1207541227340698, + "learning_rate": 8.390711255938946e-06, + "loss": 0.3384, + "step": 14232 + }, + { + "epoch": 0.28492355428771615, + "grad_norm": 1.090785026550293, + "learning_rate": 8.390472996780551e-06, + "loss": 0.3302, + "step": 14233 + }, + { + "epoch": 0.2849435728048445, + "grad_norm": 1.171958565711975, + "learning_rate": 8.39023472336934e-06, + "loss": 0.3219, + "step": 14234 + }, + { + "epoch": 0.2849635913219728, + "grad_norm": 1.1830793619155884, + "learning_rate": 8.389996435706311e-06, + "loss": 0.3035, + "step": 14235 + }, + { + "epoch": 0.28498360983910115, + "grad_norm": 1.0775153636932373, + "learning_rate": 8.389758133792465e-06, + "loss": 0.3326, + "step": 14236 + }, + { + "epoch": 0.2850036283562295, + "grad_norm": 1.0622628927230835, + "learning_rate": 8.389519817628807e-06, + "loss": 0.3295, + "step": 14237 + }, + { + "epoch": 0.28502364687335785, + "grad_norm": 1.8887826204299927, + "learning_rate": 8.389281487216337e-06, + "loss": 0.8356, + "step": 14238 + }, + { + "epoch": 0.2850436653904862, + "grad_norm": 1.8080723285675049, + "learning_rate": 8.389043142556056e-06, + "loss": 0.795, + "step": 14239 + }, + { + "epoch": 0.28506368390761455, + "grad_norm": 1.3157395124435425, + "learning_rate": 8.388804783648968e-06, + "loss": 0.3533, + "step": 14240 + }, + { + "epoch": 0.2850837024247429, + "grad_norm": 1.1378017663955688, + "learning_rate": 8.388566410496073e-06, + "loss": 0.3498, + "step": 14241 + }, + { + "epoch": 0.28510372094187125, + "grad_norm": 1.037927508354187, + "learning_rate": 8.388328023098376e-06, + "loss": 0.3182, + "step": 14242 + }, + { + "epoch": 0.28512373945899955, + "grad_norm": 1.1726539134979248, + "learning_rate": 8.388089621456874e-06, + "loss": 0.3254, + "step": 14243 + }, + { + "epoch": 0.2851437579761279, + "grad_norm": 1.1399708986282349, + "learning_rate": 8.387851205572574e-06, + "loss": 0.3264, + "step": 14244 + }, + { + "epoch": 0.28516377649325625, + "grad_norm": 1.315376877784729, + "learning_rate": 8.387612775446478e-06, + "loss": 0.3844, + "step": 14245 + }, + { + "epoch": 0.2851837950103846, + "grad_norm": 1.2350835800170898, + "learning_rate": 8.387374331079586e-06, + "loss": 0.355, + "step": 14246 + }, + { + "epoch": 0.28520381352751295, + "grad_norm": 1.1956287622451782, + "learning_rate": 8.3871358724729e-06, + "loss": 0.3508, + "step": 14247 + }, + { + "epoch": 0.2852238320446413, + "grad_norm": 0.996376633644104, + "learning_rate": 8.386897399627425e-06, + "loss": 0.3397, + "step": 14248 + }, + { + "epoch": 0.28524385056176965, + "grad_norm": 0.9756340980529785, + "learning_rate": 8.386658912544162e-06, + "loss": 0.295, + "step": 14249 + }, + { + "epoch": 0.285263869078898, + "grad_norm": 1.0954480171203613, + "learning_rate": 8.386420411224113e-06, + "loss": 0.3355, + "step": 14250 + }, + { + "epoch": 0.2852838875960263, + "grad_norm": 1.127013087272644, + "learning_rate": 8.386181895668282e-06, + "loss": 0.2969, + "step": 14251 + }, + { + "epoch": 0.28530390611315465, + "grad_norm": 1.1200801134109497, + "learning_rate": 8.385943365877672e-06, + "loss": 0.301, + "step": 14252 + }, + { + "epoch": 0.285323924630283, + "grad_norm": 1.0286246538162231, + "learning_rate": 8.385704821853283e-06, + "loss": 0.3055, + "step": 14253 + }, + { + "epoch": 0.28534394314741135, + "grad_norm": 1.1532206535339355, + "learning_rate": 8.385466263596121e-06, + "loss": 0.3542, + "step": 14254 + }, + { + "epoch": 0.2853639616645397, + "grad_norm": 1.021588683128357, + "learning_rate": 8.385227691107186e-06, + "loss": 0.3404, + "step": 14255 + }, + { + "epoch": 0.28538398018166805, + "grad_norm": 1.165146827697754, + "learning_rate": 8.384989104387482e-06, + "loss": 0.3084, + "step": 14256 + }, + { + "epoch": 0.2854039986987964, + "grad_norm": 1.1368225812911987, + "learning_rate": 8.384750503438015e-06, + "loss": 0.3404, + "step": 14257 + }, + { + "epoch": 0.28542401721592475, + "grad_norm": 1.0076607465744019, + "learning_rate": 8.384511888259781e-06, + "loss": 0.2956, + "step": 14258 + }, + { + "epoch": 0.28544403573305305, + "grad_norm": 1.7372604608535767, + "learning_rate": 8.384273258853792e-06, + "loss": 0.8597, + "step": 14259 + }, + { + "epoch": 0.2854640542501814, + "grad_norm": 1.0191969871520996, + "learning_rate": 8.384034615221044e-06, + "loss": 0.2937, + "step": 14260 + }, + { + "epoch": 0.28548407276730975, + "grad_norm": 1.191116213798523, + "learning_rate": 8.383795957362543e-06, + "loss": 0.3851, + "step": 14261 + }, + { + "epoch": 0.2855040912844381, + "grad_norm": 1.1325585842132568, + "learning_rate": 8.383557285279292e-06, + "loss": 0.2995, + "step": 14262 + }, + { + "epoch": 0.28552410980156645, + "grad_norm": 1.0641114711761475, + "learning_rate": 8.383318598972294e-06, + "loss": 0.2796, + "step": 14263 + }, + { + "epoch": 0.2855441283186948, + "grad_norm": 1.132265567779541, + "learning_rate": 8.383079898442552e-06, + "loss": 0.3339, + "step": 14264 + }, + { + "epoch": 0.28556414683582315, + "grad_norm": 1.2285610437393188, + "learning_rate": 8.382841183691073e-06, + "loss": 0.3352, + "step": 14265 + }, + { + "epoch": 0.2855841653529515, + "grad_norm": 1.9472898244857788, + "learning_rate": 8.382602454718855e-06, + "loss": 0.7861, + "step": 14266 + }, + { + "epoch": 0.2856041838700798, + "grad_norm": 1.1220253705978394, + "learning_rate": 8.382363711526907e-06, + "loss": 0.3255, + "step": 14267 + }, + { + "epoch": 0.28562420238720815, + "grad_norm": 1.0405676364898682, + "learning_rate": 8.382124954116226e-06, + "loss": 0.3262, + "step": 14268 + }, + { + "epoch": 0.2856442209043365, + "grad_norm": 1.0536060333251953, + "learning_rate": 8.381886182487821e-06, + "loss": 0.3259, + "step": 14269 + }, + { + "epoch": 0.28566423942146485, + "grad_norm": 1.1582683324813843, + "learning_rate": 8.381647396642696e-06, + "loss": 0.3136, + "step": 14270 + }, + { + "epoch": 0.2856842579385932, + "grad_norm": 1.0987777709960938, + "learning_rate": 8.381408596581853e-06, + "loss": 0.3071, + "step": 14271 + }, + { + "epoch": 0.28570427645572155, + "grad_norm": 1.106719732284546, + "learning_rate": 8.381169782306293e-06, + "loss": 0.293, + "step": 14272 + }, + { + "epoch": 0.2857242949728499, + "grad_norm": 1.1115403175354004, + "learning_rate": 8.380930953817026e-06, + "loss": 0.3309, + "step": 14273 + }, + { + "epoch": 0.28574431348997825, + "grad_norm": 1.1346607208251953, + "learning_rate": 8.380692111115052e-06, + "loss": 0.3232, + "step": 14274 + }, + { + "epoch": 0.28576433200710655, + "grad_norm": 1.0821162462234497, + "learning_rate": 8.380453254201376e-06, + "loss": 0.3255, + "step": 14275 + }, + { + "epoch": 0.2857843505242349, + "grad_norm": 1.0493053197860718, + "learning_rate": 8.380214383077e-06, + "loss": 0.3197, + "step": 14276 + }, + { + "epoch": 0.28580436904136325, + "grad_norm": 2.084324598312378, + "learning_rate": 8.37997549774293e-06, + "loss": 0.8573, + "step": 14277 + }, + { + "epoch": 0.2858243875584916, + "grad_norm": 1.1238837242126465, + "learning_rate": 8.379736598200174e-06, + "loss": 0.3555, + "step": 14278 + }, + { + "epoch": 0.28584440607561995, + "grad_norm": 1.2488597631454468, + "learning_rate": 8.379497684449732e-06, + "loss": 0.308, + "step": 14279 + }, + { + "epoch": 0.2858644245927483, + "grad_norm": 1.0754436254501343, + "learning_rate": 8.379258756492606e-06, + "loss": 0.3134, + "step": 14280 + }, + { + "epoch": 0.28588444310987665, + "grad_norm": 1.2251091003417969, + "learning_rate": 8.379019814329806e-06, + "loss": 0.2658, + "step": 14281 + }, + { + "epoch": 0.285904461627005, + "grad_norm": 1.0850498676300049, + "learning_rate": 8.378780857962332e-06, + "loss": 0.3568, + "step": 14282 + }, + { + "epoch": 0.2859244801441333, + "grad_norm": 1.2659484148025513, + "learning_rate": 8.378541887391192e-06, + "loss": 0.3405, + "step": 14283 + }, + { + "epoch": 0.28594449866126165, + "grad_norm": 1.7841471433639526, + "learning_rate": 8.378302902617388e-06, + "loss": 0.8401, + "step": 14284 + }, + { + "epoch": 0.28596451717839, + "grad_norm": 1.1609724760055542, + "learning_rate": 8.378063903641927e-06, + "loss": 0.2988, + "step": 14285 + }, + { + "epoch": 0.28598453569551835, + "grad_norm": 1.067294716835022, + "learning_rate": 8.37782489046581e-06, + "loss": 0.3346, + "step": 14286 + }, + { + "epoch": 0.2860045542126467, + "grad_norm": 1.0345954895019531, + "learning_rate": 8.377585863090046e-06, + "loss": 0.3121, + "step": 14287 + }, + { + "epoch": 0.28602457272977505, + "grad_norm": 1.9812958240509033, + "learning_rate": 8.377346821515637e-06, + "loss": 0.871, + "step": 14288 + }, + { + "epoch": 0.2860445912469034, + "grad_norm": 1.2240989208221436, + "learning_rate": 8.377107765743589e-06, + "loss": 0.3768, + "step": 14289 + }, + { + "epoch": 0.28606460976403175, + "grad_norm": 1.0952436923980713, + "learning_rate": 8.376868695774906e-06, + "loss": 0.3358, + "step": 14290 + }, + { + "epoch": 0.28608462828116005, + "grad_norm": 1.0623741149902344, + "learning_rate": 8.376629611610597e-06, + "loss": 0.3218, + "step": 14291 + }, + { + "epoch": 0.2861046467982884, + "grad_norm": 1.0508439540863037, + "learning_rate": 8.37639051325166e-06, + "loss": 0.2826, + "step": 14292 + }, + { + "epoch": 0.28612466531541675, + "grad_norm": 1.0898860692977905, + "learning_rate": 8.376151400699104e-06, + "loss": 0.3335, + "step": 14293 + }, + { + "epoch": 0.2861446838325451, + "grad_norm": 1.0876038074493408, + "learning_rate": 8.375912273953937e-06, + "loss": 0.3042, + "step": 14294 + }, + { + "epoch": 0.28616470234967345, + "grad_norm": 1.0301824808120728, + "learning_rate": 8.375673133017157e-06, + "loss": 0.2888, + "step": 14295 + }, + { + "epoch": 0.2861847208668018, + "grad_norm": 1.9005563259124756, + "learning_rate": 8.375433977889778e-06, + "loss": 0.7615, + "step": 14296 + }, + { + "epoch": 0.28620473938393015, + "grad_norm": 1.1453927755355835, + "learning_rate": 8.375194808572799e-06, + "loss": 0.3419, + "step": 14297 + }, + { + "epoch": 0.2862247579010585, + "grad_norm": 1.1617549657821655, + "learning_rate": 8.374955625067227e-06, + "loss": 0.3741, + "step": 14298 + }, + { + "epoch": 0.2862447764181868, + "grad_norm": 1.1574549674987793, + "learning_rate": 8.374716427374068e-06, + "loss": 0.329, + "step": 14299 + }, + { + "epoch": 0.28626479493531515, + "grad_norm": 1.0590356588363647, + "learning_rate": 8.374477215494328e-06, + "loss": 0.2845, + "step": 14300 + }, + { + "epoch": 0.2862848134524435, + "grad_norm": 1.092921495437622, + "learning_rate": 8.374237989429013e-06, + "loss": 0.3524, + "step": 14301 + }, + { + "epoch": 0.28630483196957185, + "grad_norm": 1.0767362117767334, + "learning_rate": 8.373998749179125e-06, + "loss": 0.3261, + "step": 14302 + }, + { + "epoch": 0.2863248504867002, + "grad_norm": 1.064530611038208, + "learning_rate": 8.373759494745675e-06, + "loss": 0.3349, + "step": 14303 + }, + { + "epoch": 0.28634486900382855, + "grad_norm": 1.1497803926467896, + "learning_rate": 8.373520226129664e-06, + "loss": 0.3396, + "step": 14304 + }, + { + "epoch": 0.2863648875209569, + "grad_norm": 1.1946889162063599, + "learning_rate": 8.373280943332101e-06, + "loss": 0.3098, + "step": 14305 + }, + { + "epoch": 0.28638490603808525, + "grad_norm": 1.197160243988037, + "learning_rate": 8.37304164635399e-06, + "loss": 0.2881, + "step": 14306 + }, + { + "epoch": 0.28640492455521355, + "grad_norm": 1.0144075155258179, + "learning_rate": 8.372802335196339e-06, + "loss": 0.2668, + "step": 14307 + }, + { + "epoch": 0.2864249430723419, + "grad_norm": 1.066819667816162, + "learning_rate": 8.372563009860152e-06, + "loss": 0.324, + "step": 14308 + }, + { + "epoch": 0.28644496158947025, + "grad_norm": 1.1143059730529785, + "learning_rate": 8.372323670346436e-06, + "loss": 0.3122, + "step": 14309 + }, + { + "epoch": 0.2864649801065986, + "grad_norm": 1.0996123552322388, + "learning_rate": 8.372084316656197e-06, + "loss": 0.3133, + "step": 14310 + }, + { + "epoch": 0.28648499862372695, + "grad_norm": 0.982552707195282, + "learning_rate": 8.371844948790441e-06, + "loss": 0.3117, + "step": 14311 + }, + { + "epoch": 0.2865050171408553, + "grad_norm": 1.891197681427002, + "learning_rate": 8.371605566750175e-06, + "loss": 0.8103, + "step": 14312 + }, + { + "epoch": 0.28652503565798365, + "grad_norm": 1.150930404663086, + "learning_rate": 8.371366170536404e-06, + "loss": 0.3409, + "step": 14313 + }, + { + "epoch": 0.286545054175112, + "grad_norm": 1.4346247911453247, + "learning_rate": 8.371126760150136e-06, + "loss": 0.3234, + "step": 14314 + }, + { + "epoch": 0.2865650726922403, + "grad_norm": 1.278778314590454, + "learning_rate": 8.370887335592378e-06, + "loss": 0.3165, + "step": 14315 + }, + { + "epoch": 0.28658509120936865, + "grad_norm": 1.0956904888153076, + "learning_rate": 8.370647896864132e-06, + "loss": 0.2793, + "step": 14316 + }, + { + "epoch": 0.286605109726497, + "grad_norm": 2.0115365982055664, + "learning_rate": 8.37040844396641e-06, + "loss": 0.8393, + "step": 14317 + }, + { + "epoch": 0.28662512824362535, + "grad_norm": 1.2038618326187134, + "learning_rate": 8.370168976900214e-06, + "loss": 0.3542, + "step": 14318 + }, + { + "epoch": 0.2866451467607537, + "grad_norm": 1.1253912448883057, + "learning_rate": 8.369929495666554e-06, + "loss": 0.3131, + "step": 14319 + }, + { + "epoch": 0.28666516527788205, + "grad_norm": 1.0654126405715942, + "learning_rate": 8.369690000266436e-06, + "loss": 0.3209, + "step": 14320 + }, + { + "epoch": 0.2866851837950104, + "grad_norm": 1.1634899377822876, + "learning_rate": 8.369450490700865e-06, + "loss": 0.3537, + "step": 14321 + }, + { + "epoch": 0.28670520231213875, + "grad_norm": 1.1110447645187378, + "learning_rate": 8.369210966970851e-06, + "loss": 0.3098, + "step": 14322 + }, + { + "epoch": 0.28672522082926705, + "grad_norm": 1.1603292226791382, + "learning_rate": 8.368971429077399e-06, + "loss": 0.3402, + "step": 14323 + }, + { + "epoch": 0.2867452393463954, + "grad_norm": 1.2077029943466187, + "learning_rate": 8.368731877021515e-06, + "loss": 0.2977, + "step": 14324 + }, + { + "epoch": 0.28676525786352375, + "grad_norm": 1.1286518573760986, + "learning_rate": 8.368492310804208e-06, + "loss": 0.3126, + "step": 14325 + }, + { + "epoch": 0.2867852763806521, + "grad_norm": 0.9250386953353882, + "learning_rate": 8.368252730426485e-06, + "loss": 0.2714, + "step": 14326 + }, + { + "epoch": 0.28680529489778045, + "grad_norm": 1.065506935119629, + "learning_rate": 8.36801313588935e-06, + "loss": 0.3191, + "step": 14327 + }, + { + "epoch": 0.2868253134149088, + "grad_norm": 0.9764115214347839, + "learning_rate": 8.367773527193815e-06, + "loss": 0.2642, + "step": 14328 + }, + { + "epoch": 0.28684533193203715, + "grad_norm": 1.1111277341842651, + "learning_rate": 8.367533904340883e-06, + "loss": 0.2476, + "step": 14329 + }, + { + "epoch": 0.2868653504491655, + "grad_norm": 1.1472216844558716, + "learning_rate": 8.367294267331564e-06, + "loss": 0.2779, + "step": 14330 + }, + { + "epoch": 0.2868853689662938, + "grad_norm": 1.1694457530975342, + "learning_rate": 8.367054616166866e-06, + "loss": 0.3301, + "step": 14331 + }, + { + "epoch": 0.28690538748342215, + "grad_norm": 1.078129768371582, + "learning_rate": 8.366814950847793e-06, + "loss": 0.3316, + "step": 14332 + }, + { + "epoch": 0.2869254060005505, + "grad_norm": 1.1732183694839478, + "learning_rate": 8.366575271375355e-06, + "loss": 0.3589, + "step": 14333 + }, + { + "epoch": 0.28694542451767885, + "grad_norm": 0.9376320242881775, + "learning_rate": 8.366335577750559e-06, + "loss": 0.286, + "step": 14334 + }, + { + "epoch": 0.2869654430348072, + "grad_norm": 1.1126115322113037, + "learning_rate": 8.366095869974412e-06, + "loss": 0.3181, + "step": 14335 + }, + { + "epoch": 0.28698546155193555, + "grad_norm": 1.0132673978805542, + "learning_rate": 8.365856148047924e-06, + "loss": 0.3152, + "step": 14336 + }, + { + "epoch": 0.2870054800690639, + "grad_norm": 1.2426308393478394, + "learning_rate": 8.3656164119721e-06, + "loss": 0.2903, + "step": 14337 + }, + { + "epoch": 0.28702549858619225, + "grad_norm": 1.1333365440368652, + "learning_rate": 8.36537666174795e-06, + "loss": 0.2957, + "step": 14338 + }, + { + "epoch": 0.28704551710332055, + "grad_norm": 1.1616153717041016, + "learning_rate": 8.365136897376478e-06, + "loss": 0.3205, + "step": 14339 + }, + { + "epoch": 0.2870655356204489, + "grad_norm": 1.117626667022705, + "learning_rate": 8.364897118858698e-06, + "loss": 0.3214, + "step": 14340 + }, + { + "epoch": 0.28708555413757725, + "grad_norm": 1.0145354270935059, + "learning_rate": 8.364657326195614e-06, + "loss": 0.3223, + "step": 14341 + }, + { + "epoch": 0.2871055726547056, + "grad_norm": 1.1713758707046509, + "learning_rate": 8.364417519388232e-06, + "loss": 0.3864, + "step": 14342 + }, + { + "epoch": 0.28712559117183395, + "grad_norm": 1.9606382846832275, + "learning_rate": 8.364177698437565e-06, + "loss": 0.8762, + "step": 14343 + }, + { + "epoch": 0.2871456096889623, + "grad_norm": 1.1104698181152344, + "learning_rate": 8.363937863344617e-06, + "loss": 0.2952, + "step": 14344 + }, + { + "epoch": 0.28716562820609065, + "grad_norm": 1.842559576034546, + "learning_rate": 8.363698014110402e-06, + "loss": 0.7617, + "step": 14345 + }, + { + "epoch": 0.287185646723219, + "grad_norm": 1.0414386987686157, + "learning_rate": 8.363458150735922e-06, + "loss": 0.3043, + "step": 14346 + }, + { + "epoch": 0.2872056652403473, + "grad_norm": 1.1259896755218506, + "learning_rate": 8.363218273222187e-06, + "loss": 0.348, + "step": 14347 + }, + { + "epoch": 0.28722568375747565, + "grad_norm": 1.2054927349090576, + "learning_rate": 8.362978381570209e-06, + "loss": 0.3015, + "step": 14348 + }, + { + "epoch": 0.287245702274604, + "grad_norm": 1.134203314781189, + "learning_rate": 8.36273847578099e-06, + "loss": 0.2949, + "step": 14349 + }, + { + "epoch": 0.28726572079173235, + "grad_norm": 1.229541540145874, + "learning_rate": 8.362498555855544e-06, + "loss": 0.351, + "step": 14350 + }, + { + "epoch": 0.2872857393088607, + "grad_norm": 1.0170648097991943, + "learning_rate": 8.362258621794877e-06, + "loss": 0.3416, + "step": 14351 + }, + { + "epoch": 0.28730575782598905, + "grad_norm": 1.0842241048812866, + "learning_rate": 8.362018673599998e-06, + "loss": 0.3034, + "step": 14352 + }, + { + "epoch": 0.2873257763431174, + "grad_norm": 2.013559579849243, + "learning_rate": 8.361778711271918e-06, + "loss": 0.8402, + "step": 14353 + }, + { + "epoch": 0.28734579486024575, + "grad_norm": 1.117398977279663, + "learning_rate": 8.361538734811643e-06, + "loss": 0.2986, + "step": 14354 + }, + { + "epoch": 0.28736581337737405, + "grad_norm": 1.0931860208511353, + "learning_rate": 8.361298744220182e-06, + "loss": 0.3038, + "step": 14355 + }, + { + "epoch": 0.2873858318945024, + "grad_norm": 1.1601707935333252, + "learning_rate": 8.361058739498544e-06, + "loss": 0.3284, + "step": 14356 + }, + { + "epoch": 0.28740585041163075, + "grad_norm": 1.1526175737380981, + "learning_rate": 8.36081872064774e-06, + "loss": 0.3722, + "step": 14357 + }, + { + "epoch": 0.2874258689287591, + "grad_norm": 1.3047763109207153, + "learning_rate": 8.360578687668776e-06, + "loss": 0.328, + "step": 14358 + }, + { + "epoch": 0.28744588744588745, + "grad_norm": 1.011460304260254, + "learning_rate": 8.360338640562662e-06, + "loss": 0.3047, + "step": 14359 + }, + { + "epoch": 0.2874659059630158, + "grad_norm": 1.990922212600708, + "learning_rate": 8.360098579330408e-06, + "loss": 0.8116, + "step": 14360 + }, + { + "epoch": 0.28748592448014415, + "grad_norm": 1.0255464315414429, + "learning_rate": 8.359858503973024e-06, + "loss": 0.3251, + "step": 14361 + }, + { + "epoch": 0.2875059429972725, + "grad_norm": 1.1663414239883423, + "learning_rate": 8.359618414491515e-06, + "loss": 0.3689, + "step": 14362 + }, + { + "epoch": 0.2875259615144008, + "grad_norm": 1.0495314598083496, + "learning_rate": 8.359378310886897e-06, + "loss": 0.336, + "step": 14363 + }, + { + "epoch": 0.28754598003152915, + "grad_norm": 1.893490195274353, + "learning_rate": 8.359138193160171e-06, + "loss": 0.8553, + "step": 14364 + }, + { + "epoch": 0.2875659985486575, + "grad_norm": 1.1855477094650269, + "learning_rate": 8.358898061312355e-06, + "loss": 0.3433, + "step": 14365 + }, + { + "epoch": 0.28758601706578585, + "grad_norm": 1.0023168325424194, + "learning_rate": 8.358657915344453e-06, + "loss": 0.3082, + "step": 14366 + }, + { + "epoch": 0.2876060355829142, + "grad_norm": 1.1903753280639648, + "learning_rate": 8.358417755257476e-06, + "loss": 0.3345, + "step": 14367 + }, + { + "epoch": 0.28762605410004255, + "grad_norm": 1.7863236665725708, + "learning_rate": 8.358177581052433e-06, + "loss": 0.8259, + "step": 14368 + }, + { + "epoch": 0.2876460726171709, + "grad_norm": 1.2343426942825317, + "learning_rate": 8.357937392730332e-06, + "loss": 0.3224, + "step": 14369 + }, + { + "epoch": 0.28766609113429925, + "grad_norm": 1.13099205493927, + "learning_rate": 8.357697190292189e-06, + "loss": 0.2905, + "step": 14370 + }, + { + "epoch": 0.28768610965142755, + "grad_norm": 1.2344067096710205, + "learning_rate": 8.357456973739007e-06, + "loss": 0.3532, + "step": 14371 + }, + { + "epoch": 0.2877061281685559, + "grad_norm": 1.0308544635772705, + "learning_rate": 8.3572167430718e-06, + "loss": 0.3154, + "step": 14372 + }, + { + "epoch": 0.28772614668568425, + "grad_norm": 1.099630355834961, + "learning_rate": 8.356976498291576e-06, + "loss": 0.3343, + "step": 14373 + }, + { + "epoch": 0.2877461652028126, + "grad_norm": 1.9270631074905396, + "learning_rate": 8.356736239399345e-06, + "loss": 0.825, + "step": 14374 + }, + { + "epoch": 0.28776618371994095, + "grad_norm": 1.108442783355713, + "learning_rate": 8.356495966396115e-06, + "loss": 0.296, + "step": 14375 + }, + { + "epoch": 0.2877862022370693, + "grad_norm": 1.0352553129196167, + "learning_rate": 8.3562556792829e-06, + "loss": 0.3256, + "step": 14376 + }, + { + "epoch": 0.28780622075419765, + "grad_norm": 1.1643162965774536, + "learning_rate": 8.356015378060708e-06, + "loss": 0.3392, + "step": 14377 + }, + { + "epoch": 0.287826239271326, + "grad_norm": 1.1341960430145264, + "learning_rate": 8.35577506273055e-06, + "loss": 0.3589, + "step": 14378 + }, + { + "epoch": 0.2878462577884543, + "grad_norm": 1.2560055255889893, + "learning_rate": 8.355534733293437e-06, + "loss": 0.3326, + "step": 14379 + }, + { + "epoch": 0.28786627630558265, + "grad_norm": 1.036094069480896, + "learning_rate": 8.355294389750377e-06, + "loss": 0.3202, + "step": 14380 + }, + { + "epoch": 0.287886294822711, + "grad_norm": 1.1186515092849731, + "learning_rate": 8.355054032102383e-06, + "loss": 0.3484, + "step": 14381 + }, + { + "epoch": 0.28790631333983935, + "grad_norm": 1.12054443359375, + "learning_rate": 8.354813660350462e-06, + "loss": 0.3107, + "step": 14382 + }, + { + "epoch": 0.2879263318569677, + "grad_norm": 1.050488829612732, + "learning_rate": 8.354573274495627e-06, + "loss": 0.325, + "step": 14383 + }, + { + "epoch": 0.28794635037409605, + "grad_norm": 1.8618674278259277, + "learning_rate": 8.354332874538887e-06, + "loss": 0.7821, + "step": 14384 + }, + { + "epoch": 0.2879663688912244, + "grad_norm": 1.0631645917892456, + "learning_rate": 8.354092460481254e-06, + "loss": 0.3048, + "step": 14385 + }, + { + "epoch": 0.28798638740835275, + "grad_norm": 1.1191438436508179, + "learning_rate": 8.35385203232374e-06, + "loss": 0.3257, + "step": 14386 + }, + { + "epoch": 0.28800640592548105, + "grad_norm": 1.1624222993850708, + "learning_rate": 8.353611590067352e-06, + "loss": 0.3099, + "step": 14387 + }, + { + "epoch": 0.2880264244426094, + "grad_norm": 1.0591963529586792, + "learning_rate": 8.353371133713102e-06, + "loss": 0.3115, + "step": 14388 + }, + { + "epoch": 0.28804644295973775, + "grad_norm": 1.1406527757644653, + "learning_rate": 8.353130663262e-06, + "loss": 0.3169, + "step": 14389 + }, + { + "epoch": 0.2880664614768661, + "grad_norm": 1.1117128133773804, + "learning_rate": 8.352890178715062e-06, + "loss": 0.3315, + "step": 14390 + }, + { + "epoch": 0.28808647999399445, + "grad_norm": 1.0302973985671997, + "learning_rate": 8.352649680073293e-06, + "loss": 0.3276, + "step": 14391 + }, + { + "epoch": 0.2881064985111228, + "grad_norm": 1.1389240026474, + "learning_rate": 8.352409167337708e-06, + "loss": 0.3616, + "step": 14392 + }, + { + "epoch": 0.28812651702825115, + "grad_norm": 1.1811916828155518, + "learning_rate": 8.352168640509313e-06, + "loss": 0.3361, + "step": 14393 + }, + { + "epoch": 0.2881465355453795, + "grad_norm": 1.1912908554077148, + "learning_rate": 8.351928099589127e-06, + "loss": 0.3294, + "step": 14394 + }, + { + "epoch": 0.2881665540625078, + "grad_norm": 1.0535199642181396, + "learning_rate": 8.351687544578154e-06, + "loss": 0.3174, + "step": 14395 + }, + { + "epoch": 0.28818657257963615, + "grad_norm": 1.0615912675857544, + "learning_rate": 8.351446975477406e-06, + "loss": 0.3495, + "step": 14396 + }, + { + "epoch": 0.2882065910967645, + "grad_norm": 1.1420159339904785, + "learning_rate": 8.351206392287899e-06, + "loss": 0.3197, + "step": 14397 + }, + { + "epoch": 0.28822660961389285, + "grad_norm": 1.338796854019165, + "learning_rate": 8.350965795010641e-06, + "loss": 0.3484, + "step": 14398 + }, + { + "epoch": 0.2882466281310212, + "grad_norm": 1.1012367010116577, + "learning_rate": 8.350725183646643e-06, + "loss": 0.331, + "step": 14399 + }, + { + "epoch": 0.28826664664814955, + "grad_norm": 1.1552034616470337, + "learning_rate": 8.350484558196918e-06, + "loss": 0.3422, + "step": 14400 + }, + { + "epoch": 0.2882866651652779, + "grad_norm": 1.0709404945373535, + "learning_rate": 8.350243918662478e-06, + "loss": 0.3268, + "step": 14401 + }, + { + "epoch": 0.28830668368240625, + "grad_norm": 1.1223300695419312, + "learning_rate": 8.35000326504433e-06, + "loss": 0.293, + "step": 14402 + }, + { + "epoch": 0.28832670219953455, + "grad_norm": 1.1385536193847656, + "learning_rate": 8.349762597343493e-06, + "loss": 0.28, + "step": 14403 + }, + { + "epoch": 0.2883467207166629, + "grad_norm": 1.7278426885604858, + "learning_rate": 8.349521915560973e-06, + "loss": 0.8844, + "step": 14404 + }, + { + "epoch": 0.28836673923379125, + "grad_norm": 1.0404717922210693, + "learning_rate": 8.349281219697784e-06, + "loss": 0.3274, + "step": 14405 + }, + { + "epoch": 0.2883867577509196, + "grad_norm": 1.1171596050262451, + "learning_rate": 8.349040509754938e-06, + "loss": 0.3227, + "step": 14406 + }, + { + "epoch": 0.28840677626804795, + "grad_norm": 1.0328786373138428, + "learning_rate": 8.348799785733447e-06, + "loss": 0.3168, + "step": 14407 + }, + { + "epoch": 0.2884267947851763, + "grad_norm": 1.0905405282974243, + "learning_rate": 8.348559047634322e-06, + "loss": 0.3209, + "step": 14408 + }, + { + "epoch": 0.28844681330230465, + "grad_norm": 1.0850123167037964, + "learning_rate": 8.348318295458575e-06, + "loss": 0.3476, + "step": 14409 + }, + { + "epoch": 0.288466831819433, + "grad_norm": 1.1098275184631348, + "learning_rate": 8.348077529207216e-06, + "loss": 0.266, + "step": 14410 + }, + { + "epoch": 0.2884868503365613, + "grad_norm": 1.0581454038619995, + "learning_rate": 8.347836748881265e-06, + "loss": 0.3312, + "step": 14411 + }, + { + "epoch": 0.28850686885368965, + "grad_norm": 1.0550332069396973, + "learning_rate": 8.347595954481725e-06, + "loss": 0.3555, + "step": 14412 + }, + { + "epoch": 0.288526887370818, + "grad_norm": 1.1468900442123413, + "learning_rate": 8.347355146009613e-06, + "loss": 0.3549, + "step": 14413 + }, + { + "epoch": 0.28854690588794635, + "grad_norm": 1.1343088150024414, + "learning_rate": 8.34711432346594e-06, + "loss": 0.3048, + "step": 14414 + }, + { + "epoch": 0.2885669244050747, + "grad_norm": 1.0854005813598633, + "learning_rate": 8.346873486851718e-06, + "loss": 0.2967, + "step": 14415 + }, + { + "epoch": 0.28858694292220305, + "grad_norm": 1.0543864965438843, + "learning_rate": 8.346632636167962e-06, + "loss": 0.3292, + "step": 14416 + }, + { + "epoch": 0.2886069614393314, + "grad_norm": 1.0781654119491577, + "learning_rate": 8.34639177141568e-06, + "loss": 0.3142, + "step": 14417 + }, + { + "epoch": 0.28862697995645975, + "grad_norm": 1.78339684009552, + "learning_rate": 8.34615089259589e-06, + "loss": 0.8471, + "step": 14418 + }, + { + "epoch": 0.28864699847358805, + "grad_norm": 1.0682686567306519, + "learning_rate": 8.3459099997096e-06, + "loss": 0.3047, + "step": 14419 + }, + { + "epoch": 0.2886670169907164, + "grad_norm": 1.1497000455856323, + "learning_rate": 8.345669092757825e-06, + "loss": 0.2855, + "step": 14420 + }, + { + "epoch": 0.28868703550784475, + "grad_norm": 1.1681981086730957, + "learning_rate": 8.345428171741578e-06, + "loss": 0.3067, + "step": 14421 + }, + { + "epoch": 0.2887070540249731, + "grad_norm": 2.0695688724517822, + "learning_rate": 8.34518723666187e-06, + "loss": 0.8305, + "step": 14422 + }, + { + "epoch": 0.28872707254210145, + "grad_norm": 1.0678350925445557, + "learning_rate": 8.344946287519713e-06, + "loss": 0.3216, + "step": 14423 + }, + { + "epoch": 0.2887470910592298, + "grad_norm": 1.0605394840240479, + "learning_rate": 8.344705324316124e-06, + "loss": 0.3104, + "step": 14424 + }, + { + "epoch": 0.28876710957635815, + "grad_norm": 1.9472371339797974, + "learning_rate": 8.344464347052112e-06, + "loss": 0.8812, + "step": 14425 + }, + { + "epoch": 0.2887871280934865, + "grad_norm": 1.0358549356460571, + "learning_rate": 8.344223355728693e-06, + "loss": 0.3447, + "step": 14426 + }, + { + "epoch": 0.2888071466106148, + "grad_norm": 1.022013545036316, + "learning_rate": 8.343982350346878e-06, + "loss": 0.3076, + "step": 14427 + }, + { + "epoch": 0.28882716512774315, + "grad_norm": 1.2011899948120117, + "learning_rate": 8.343741330907681e-06, + "loss": 0.3635, + "step": 14428 + }, + { + "epoch": 0.2888471836448715, + "grad_norm": 1.0787097215652466, + "learning_rate": 8.343500297412115e-06, + "loss": 0.3219, + "step": 14429 + }, + { + "epoch": 0.28886720216199985, + "grad_norm": 0.9932916164398193, + "learning_rate": 8.343259249861194e-06, + "loss": 0.3023, + "step": 14430 + }, + { + "epoch": 0.2888872206791282, + "grad_norm": 1.023699402809143, + "learning_rate": 8.34301818825593e-06, + "loss": 0.3265, + "step": 14431 + }, + { + "epoch": 0.28890723919625655, + "grad_norm": 1.0499565601348877, + "learning_rate": 8.342777112597336e-06, + "loss": 0.2853, + "step": 14432 + }, + { + "epoch": 0.2889272577133849, + "grad_norm": 1.0540359020233154, + "learning_rate": 8.342536022886427e-06, + "loss": 0.3056, + "step": 14433 + }, + { + "epoch": 0.28894727623051325, + "grad_norm": 1.8362245559692383, + "learning_rate": 8.342294919124218e-06, + "loss": 0.7971, + "step": 14434 + }, + { + "epoch": 0.28896729474764155, + "grad_norm": 1.177900791168213, + "learning_rate": 8.342053801311719e-06, + "loss": 0.3338, + "step": 14435 + }, + { + "epoch": 0.2889873132647699, + "grad_norm": 1.6276946067810059, + "learning_rate": 8.341812669449943e-06, + "loss": 0.3581, + "step": 14436 + }, + { + "epoch": 0.28900733178189825, + "grad_norm": 1.1495509147644043, + "learning_rate": 8.341571523539908e-06, + "loss": 0.3353, + "step": 14437 + }, + { + "epoch": 0.2890273502990266, + "grad_norm": 1.2222492694854736, + "learning_rate": 8.341330363582624e-06, + "loss": 0.3193, + "step": 14438 + }, + { + "epoch": 0.28904736881615495, + "grad_norm": 1.0917139053344727, + "learning_rate": 8.341089189579108e-06, + "loss": 0.3071, + "step": 14439 + }, + { + "epoch": 0.2890673873332833, + "grad_norm": 1.1311336755752563, + "learning_rate": 8.340848001530371e-06, + "loss": 0.3148, + "step": 14440 + }, + { + "epoch": 0.28908740585041165, + "grad_norm": 1.0743128061294556, + "learning_rate": 8.340606799437428e-06, + "loss": 0.3108, + "step": 14441 + }, + { + "epoch": 0.28910742436754, + "grad_norm": 1.1832318305969238, + "learning_rate": 8.340365583301292e-06, + "loss": 0.3466, + "step": 14442 + }, + { + "epoch": 0.2891274428846683, + "grad_norm": 1.927641749382019, + "learning_rate": 8.34012435312298e-06, + "loss": 0.8669, + "step": 14443 + }, + { + "epoch": 0.28914746140179665, + "grad_norm": 1.0297064781188965, + "learning_rate": 8.339883108903502e-06, + "loss": 0.3432, + "step": 14444 + }, + { + "epoch": 0.289167479918925, + "grad_norm": 1.0601286888122559, + "learning_rate": 8.339641850643875e-06, + "loss": 0.2859, + "step": 14445 + }, + { + "epoch": 0.28918749843605335, + "grad_norm": 1.1585549116134644, + "learning_rate": 8.339400578345111e-06, + "loss": 0.3101, + "step": 14446 + }, + { + "epoch": 0.2892075169531817, + "grad_norm": 1.9419307708740234, + "learning_rate": 8.339159292008228e-06, + "loss": 0.7639, + "step": 14447 + }, + { + "epoch": 0.28922753547031005, + "grad_norm": 1.2025243043899536, + "learning_rate": 8.338917991634237e-06, + "loss": 0.3581, + "step": 14448 + }, + { + "epoch": 0.2892475539874384, + "grad_norm": 1.0017850399017334, + "learning_rate": 8.338676677224154e-06, + "loss": 0.3015, + "step": 14449 + }, + { + "epoch": 0.28926757250456675, + "grad_norm": 1.1380380392074585, + "learning_rate": 8.338435348778991e-06, + "loss": 0.38, + "step": 14450 + }, + { + "epoch": 0.28928759102169505, + "grad_norm": 1.10788094997406, + "learning_rate": 8.338194006299766e-06, + "loss": 0.339, + "step": 14451 + }, + { + "epoch": 0.2893076095388234, + "grad_norm": 1.192640781402588, + "learning_rate": 8.337952649787491e-06, + "loss": 0.297, + "step": 14452 + }, + { + "epoch": 0.28932762805595175, + "grad_norm": 1.2026954889297485, + "learning_rate": 8.337711279243181e-06, + "loss": 0.3322, + "step": 14453 + }, + { + "epoch": 0.2893476465730801, + "grad_norm": 0.9512853026390076, + "learning_rate": 8.337469894667852e-06, + "loss": 0.2827, + "step": 14454 + }, + { + "epoch": 0.28936766509020845, + "grad_norm": 1.0515077114105225, + "learning_rate": 8.337228496062517e-06, + "loss": 0.2959, + "step": 14455 + }, + { + "epoch": 0.2893876836073368, + "grad_norm": 0.9645517468452454, + "learning_rate": 8.336987083428191e-06, + "loss": 0.2982, + "step": 14456 + }, + { + "epoch": 0.28940770212446515, + "grad_norm": 1.1433886289596558, + "learning_rate": 8.33674565676589e-06, + "loss": 0.3212, + "step": 14457 + }, + { + "epoch": 0.2894277206415935, + "grad_norm": 1.303303599357605, + "learning_rate": 8.336504216076629e-06, + "loss": 0.2852, + "step": 14458 + }, + { + "epoch": 0.2894477391587218, + "grad_norm": 1.1019465923309326, + "learning_rate": 8.336262761361422e-06, + "loss": 0.3457, + "step": 14459 + }, + { + "epoch": 0.28946775767585015, + "grad_norm": 1.0853554010391235, + "learning_rate": 8.336021292621286e-06, + "loss": 0.3307, + "step": 14460 + }, + { + "epoch": 0.2894877761929785, + "grad_norm": 1.0991348028182983, + "learning_rate": 8.335779809857232e-06, + "loss": 0.3151, + "step": 14461 + }, + { + "epoch": 0.28950779471010685, + "grad_norm": 1.0694396495819092, + "learning_rate": 8.335538313070278e-06, + "loss": 0.3205, + "step": 14462 + }, + { + "epoch": 0.2895278132272352, + "grad_norm": 1.1657979488372803, + "learning_rate": 8.335296802261439e-06, + "loss": 0.3107, + "step": 14463 + }, + { + "epoch": 0.28954783174436355, + "grad_norm": 1.129514455795288, + "learning_rate": 8.33505527743173e-06, + "loss": 0.3403, + "step": 14464 + }, + { + "epoch": 0.2895678502614919, + "grad_norm": 1.1223760843276978, + "learning_rate": 8.334813738582166e-06, + "loss": 0.3125, + "step": 14465 + }, + { + "epoch": 0.28958786877862025, + "grad_norm": 1.2213441133499146, + "learning_rate": 8.334572185713762e-06, + "loss": 0.3958, + "step": 14466 + }, + { + "epoch": 0.28960788729574855, + "grad_norm": 1.0383315086364746, + "learning_rate": 8.334330618827536e-06, + "loss": 0.3034, + "step": 14467 + }, + { + "epoch": 0.2896279058128769, + "grad_norm": 1.2543702125549316, + "learning_rate": 8.3340890379245e-06, + "loss": 0.3731, + "step": 14468 + }, + { + "epoch": 0.28964792433000525, + "grad_norm": 1.2173690795898438, + "learning_rate": 8.333847443005673e-06, + "loss": 0.3571, + "step": 14469 + }, + { + "epoch": 0.2896679428471336, + "grad_norm": 1.0237289667129517, + "learning_rate": 8.333605834072067e-06, + "loss": 0.2694, + "step": 14470 + }, + { + "epoch": 0.28968796136426195, + "grad_norm": 1.0855863094329834, + "learning_rate": 8.3333642111247e-06, + "loss": 0.2838, + "step": 14471 + }, + { + "epoch": 0.2897079798813903, + "grad_norm": 1.0845052003860474, + "learning_rate": 8.333122574164587e-06, + "loss": 0.3666, + "step": 14472 + }, + { + "epoch": 0.28972799839851865, + "grad_norm": 1.0698200464248657, + "learning_rate": 8.332880923192746e-06, + "loss": 0.3207, + "step": 14473 + }, + { + "epoch": 0.289748016915647, + "grad_norm": 1.1422494649887085, + "learning_rate": 8.332639258210188e-06, + "loss": 0.3473, + "step": 14474 + }, + { + "epoch": 0.2897680354327753, + "grad_norm": 1.0718882083892822, + "learning_rate": 8.332397579217933e-06, + "loss": 0.3399, + "step": 14475 + }, + { + "epoch": 0.28978805394990365, + "grad_norm": 1.2382891178131104, + "learning_rate": 8.332155886216998e-06, + "loss": 0.3601, + "step": 14476 + }, + { + "epoch": 0.289808072467032, + "grad_norm": 1.085684895515442, + "learning_rate": 8.331914179208394e-06, + "loss": 0.3238, + "step": 14477 + }, + { + "epoch": 0.28982809098416035, + "grad_norm": 0.9570134282112122, + "learning_rate": 8.331672458193139e-06, + "loss": 0.292, + "step": 14478 + }, + { + "epoch": 0.2898481095012887, + "grad_norm": 1.27665376663208, + "learning_rate": 8.331430723172251e-06, + "loss": 0.3445, + "step": 14479 + }, + { + "epoch": 0.28986812801841705, + "grad_norm": 1.1665934324264526, + "learning_rate": 8.331188974146746e-06, + "loss": 0.3114, + "step": 14480 + }, + { + "epoch": 0.2898881465355454, + "grad_norm": 1.251071572303772, + "learning_rate": 8.330947211117638e-06, + "loss": 0.3657, + "step": 14481 + }, + { + "epoch": 0.28990816505267375, + "grad_norm": 1.0540400743484497, + "learning_rate": 8.330705434085946e-06, + "loss": 0.337, + "step": 14482 + }, + { + "epoch": 0.28992818356980204, + "grad_norm": 1.9645856618881226, + "learning_rate": 8.330463643052683e-06, + "loss": 0.8543, + "step": 14483 + }, + { + "epoch": 0.2899482020869304, + "grad_norm": 1.0669581890106201, + "learning_rate": 8.330221838018871e-06, + "loss": 0.262, + "step": 14484 + }, + { + "epoch": 0.28996822060405875, + "grad_norm": 1.0040099620819092, + "learning_rate": 8.329980018985522e-06, + "loss": 0.2897, + "step": 14485 + }, + { + "epoch": 0.2899882391211871, + "grad_norm": 1.22767174243927, + "learning_rate": 8.329738185953652e-06, + "loss": 0.3505, + "step": 14486 + }, + { + "epoch": 0.29000825763831545, + "grad_norm": 1.0923359394073486, + "learning_rate": 8.32949633892428e-06, + "loss": 0.2758, + "step": 14487 + }, + { + "epoch": 0.2900282761554438, + "grad_norm": 1.9238402843475342, + "learning_rate": 8.329254477898422e-06, + "loss": 0.8395, + "step": 14488 + }, + { + "epoch": 0.29004829467257215, + "grad_norm": 1.1982969045639038, + "learning_rate": 8.329012602877095e-06, + "loss": 0.3106, + "step": 14489 + }, + { + "epoch": 0.2900683131897005, + "grad_norm": 1.9254125356674194, + "learning_rate": 8.328770713861316e-06, + "loss": 0.8413, + "step": 14490 + }, + { + "epoch": 0.2900883317068288, + "grad_norm": 1.7680959701538086, + "learning_rate": 8.3285288108521e-06, + "loss": 0.8565, + "step": 14491 + }, + { + "epoch": 0.29010835022395715, + "grad_norm": 1.1440407037734985, + "learning_rate": 8.328286893850465e-06, + "loss": 0.3749, + "step": 14492 + }, + { + "epoch": 0.2901283687410855, + "grad_norm": 1.1070455312728882, + "learning_rate": 8.328044962857431e-06, + "loss": 0.3173, + "step": 14493 + }, + { + "epoch": 0.29014838725821385, + "grad_norm": 1.344623327255249, + "learning_rate": 8.32780301787401e-06, + "loss": 0.3429, + "step": 14494 + }, + { + "epoch": 0.2901684057753422, + "grad_norm": 1.0688989162445068, + "learning_rate": 8.32756105890122e-06, + "loss": 0.3403, + "step": 14495 + }, + { + "epoch": 0.29018842429247055, + "grad_norm": 1.1147103309631348, + "learning_rate": 8.32731908594008e-06, + "loss": 0.3298, + "step": 14496 + }, + { + "epoch": 0.2902084428095989, + "grad_norm": 1.168949842453003, + "learning_rate": 8.327077098991608e-06, + "loss": 0.3107, + "step": 14497 + }, + { + "epoch": 0.29022846132672725, + "grad_norm": 1.0890029668807983, + "learning_rate": 8.32683509805682e-06, + "loss": 0.2954, + "step": 14498 + }, + { + "epoch": 0.29024847984385554, + "grad_norm": 1.145479440689087, + "learning_rate": 8.326593083136731e-06, + "loss": 0.3549, + "step": 14499 + }, + { + "epoch": 0.2902684983609839, + "grad_norm": 1.1310710906982422, + "learning_rate": 8.326351054232362e-06, + "loss": 0.3293, + "step": 14500 + }, + { + "epoch": 0.29028851687811225, + "grad_norm": 1.0914952754974365, + "learning_rate": 8.32610901134473e-06, + "loss": 0.3116, + "step": 14501 + }, + { + "epoch": 0.2903085353952406, + "grad_norm": 1.0107269287109375, + "learning_rate": 8.325866954474851e-06, + "loss": 0.2935, + "step": 14502 + }, + { + "epoch": 0.29032855391236895, + "grad_norm": 1.0706592798233032, + "learning_rate": 8.325624883623742e-06, + "loss": 0.3048, + "step": 14503 + }, + { + "epoch": 0.2903485724294973, + "grad_norm": 1.061553716659546, + "learning_rate": 8.325382798792423e-06, + "loss": 0.3231, + "step": 14504 + }, + { + "epoch": 0.29036859094662565, + "grad_norm": 1.0156909227371216, + "learning_rate": 8.325140699981909e-06, + "loss": 0.2746, + "step": 14505 + }, + { + "epoch": 0.290388609463754, + "grad_norm": 1.0540549755096436, + "learning_rate": 8.324898587193219e-06, + "loss": 0.3328, + "step": 14506 + }, + { + "epoch": 0.2904086279808823, + "grad_norm": 1.7349627017974854, + "learning_rate": 8.324656460427372e-06, + "loss": 0.8259, + "step": 14507 + }, + { + "epoch": 0.29042864649801065, + "grad_norm": 1.0197696685791016, + "learning_rate": 8.324414319685384e-06, + "loss": 0.3112, + "step": 14508 + }, + { + "epoch": 0.290448665015139, + "grad_norm": 1.1458232402801514, + "learning_rate": 8.324172164968273e-06, + "loss": 0.2873, + "step": 14509 + }, + { + "epoch": 0.29046868353226735, + "grad_norm": 1.1083385944366455, + "learning_rate": 8.323929996277059e-06, + "loss": 0.3097, + "step": 14510 + }, + { + "epoch": 0.2904887020493957, + "grad_norm": 1.3018351793289185, + "learning_rate": 8.323687813612758e-06, + "loss": 0.3531, + "step": 14511 + }, + { + "epoch": 0.29050872056652405, + "grad_norm": 1.058019995689392, + "learning_rate": 8.323445616976389e-06, + "loss": 0.3183, + "step": 14512 + }, + { + "epoch": 0.2905287390836524, + "grad_norm": 1.0871013402938843, + "learning_rate": 8.32320340636897e-06, + "loss": 0.382, + "step": 14513 + }, + { + "epoch": 0.29054875760078075, + "grad_norm": 1.0433257818222046, + "learning_rate": 8.322961181791517e-06, + "loss": 0.3274, + "step": 14514 + }, + { + "epoch": 0.29056877611790904, + "grad_norm": 1.1132893562316895, + "learning_rate": 8.322718943245051e-06, + "loss": 0.3753, + "step": 14515 + }, + { + "epoch": 0.2905887946350374, + "grad_norm": 1.0141584873199463, + "learning_rate": 8.322476690730592e-06, + "loss": 0.3083, + "step": 14516 + }, + { + "epoch": 0.29060881315216575, + "grad_norm": 1.3364086151123047, + "learning_rate": 8.322234424249153e-06, + "loss": 0.3145, + "step": 14517 + }, + { + "epoch": 0.2906288316692941, + "grad_norm": 1.3218029737472534, + "learning_rate": 8.321992143801758e-06, + "loss": 0.2871, + "step": 14518 + }, + { + "epoch": 0.29064885018642245, + "grad_norm": 1.8559720516204834, + "learning_rate": 8.321749849389423e-06, + "loss": 0.3546, + "step": 14519 + }, + { + "epoch": 0.2906688687035508, + "grad_norm": 1.093065857887268, + "learning_rate": 8.321507541013165e-06, + "loss": 0.3456, + "step": 14520 + }, + { + "epoch": 0.29068888722067915, + "grad_norm": 1.0867406129837036, + "learning_rate": 8.321265218674004e-06, + "loss": 0.3625, + "step": 14521 + }, + { + "epoch": 0.2907089057378075, + "grad_norm": 1.24845290184021, + "learning_rate": 8.321022882372958e-06, + "loss": 0.3536, + "step": 14522 + }, + { + "epoch": 0.2907289242549358, + "grad_norm": 1.8124639987945557, + "learning_rate": 8.320780532111048e-06, + "loss": 0.904, + "step": 14523 + }, + { + "epoch": 0.29074894277206415, + "grad_norm": 1.1364787817001343, + "learning_rate": 8.320538167889291e-06, + "loss": 0.3013, + "step": 14524 + }, + { + "epoch": 0.2907689612891925, + "grad_norm": 1.0828187465667725, + "learning_rate": 8.320295789708705e-06, + "loss": 0.3123, + "step": 14525 + }, + { + "epoch": 0.29078897980632085, + "grad_norm": 1.8580197095870972, + "learning_rate": 8.320053397570313e-06, + "loss": 0.7635, + "step": 14526 + }, + { + "epoch": 0.2908089983234492, + "grad_norm": 1.4796791076660156, + "learning_rate": 8.31981099147513e-06, + "loss": 0.3175, + "step": 14527 + }, + { + "epoch": 0.29082901684057755, + "grad_norm": 1.0710078477859497, + "learning_rate": 8.319568571424174e-06, + "loss": 0.3327, + "step": 14528 + }, + { + "epoch": 0.2908490353577059, + "grad_norm": 1.170589566230774, + "learning_rate": 8.319326137418466e-06, + "loss": 0.3619, + "step": 14529 + }, + { + "epoch": 0.29086905387483425, + "grad_norm": 1.8953585624694824, + "learning_rate": 8.319083689459027e-06, + "loss": 0.7555, + "step": 14530 + }, + { + "epoch": 0.29088907239196254, + "grad_norm": 1.1722770929336548, + "learning_rate": 8.318841227546875e-06, + "loss": 0.3489, + "step": 14531 + }, + { + "epoch": 0.2909090909090909, + "grad_norm": 1.8446656465530396, + "learning_rate": 8.318598751683028e-06, + "loss": 0.8708, + "step": 14532 + }, + { + "epoch": 0.29092910942621925, + "grad_norm": 1.0226411819458008, + "learning_rate": 8.318356261868506e-06, + "loss": 0.3111, + "step": 14533 + }, + { + "epoch": 0.2909491279433476, + "grad_norm": 1.1939115524291992, + "learning_rate": 8.318113758104327e-06, + "loss": 0.3346, + "step": 14534 + }, + { + "epoch": 0.29096914646047595, + "grad_norm": 1.0600541830062866, + "learning_rate": 8.317871240391512e-06, + "loss": 0.2963, + "step": 14535 + }, + { + "epoch": 0.2909891649776043, + "grad_norm": 1.9780184030532837, + "learning_rate": 8.317628708731083e-06, + "loss": 0.8364, + "step": 14536 + }, + { + "epoch": 0.29100918349473265, + "grad_norm": 1.1175693273544312, + "learning_rate": 8.317386163124054e-06, + "loss": 0.3471, + "step": 14537 + }, + { + "epoch": 0.291029202011861, + "grad_norm": 1.1865397691726685, + "learning_rate": 8.31714360357145e-06, + "loss": 0.3398, + "step": 14538 + }, + { + "epoch": 0.2910492205289893, + "grad_norm": 1.1094125509262085, + "learning_rate": 8.316901030074285e-06, + "loss": 0.2803, + "step": 14539 + }, + { + "epoch": 0.29106923904611764, + "grad_norm": 1.0714019536972046, + "learning_rate": 8.316658442633583e-06, + "loss": 0.274, + "step": 14540 + }, + { + "epoch": 0.291089257563246, + "grad_norm": 1.1202348470687866, + "learning_rate": 8.316415841250363e-06, + "loss": 0.3472, + "step": 14541 + }, + { + "epoch": 0.29110927608037435, + "grad_norm": 1.2041016817092896, + "learning_rate": 8.316173225925645e-06, + "loss": 0.3034, + "step": 14542 + }, + { + "epoch": 0.2911292945975027, + "grad_norm": 1.0476000308990479, + "learning_rate": 8.315930596660448e-06, + "loss": 0.2884, + "step": 14543 + }, + { + "epoch": 0.29114931311463105, + "grad_norm": 1.092313528060913, + "learning_rate": 8.315687953455793e-06, + "loss": 0.2866, + "step": 14544 + }, + { + "epoch": 0.2911693316317594, + "grad_norm": 1.3208531141281128, + "learning_rate": 8.3154452963127e-06, + "loss": 0.3195, + "step": 14545 + }, + { + "epoch": 0.2911893501488877, + "grad_norm": 0.9898298382759094, + "learning_rate": 8.315202625232186e-06, + "loss": 0.281, + "step": 14546 + }, + { + "epoch": 0.29120936866601604, + "grad_norm": 1.0872511863708496, + "learning_rate": 8.314959940215276e-06, + "loss": 0.3036, + "step": 14547 + }, + { + "epoch": 0.2912293871831444, + "grad_norm": 1.0474417209625244, + "learning_rate": 8.314717241262986e-06, + "loss": 0.3364, + "step": 14548 + }, + { + "epoch": 0.29124940570027275, + "grad_norm": 1.083863377571106, + "learning_rate": 8.314474528376339e-06, + "loss": 0.2918, + "step": 14549 + }, + { + "epoch": 0.2912694242174011, + "grad_norm": 1.0330700874328613, + "learning_rate": 8.314231801556354e-06, + "loss": 0.3018, + "step": 14550 + }, + { + "epoch": 0.29128944273452945, + "grad_norm": 1.2681628465652466, + "learning_rate": 8.313989060804054e-06, + "loss": 0.2945, + "step": 14551 + }, + { + "epoch": 0.2913094612516578, + "grad_norm": 1.1920926570892334, + "learning_rate": 8.313746306120454e-06, + "loss": 0.3125, + "step": 14552 + }, + { + "epoch": 0.29132947976878615, + "grad_norm": 1.1234837770462036, + "learning_rate": 8.313503537506577e-06, + "loss": 0.3379, + "step": 14553 + }, + { + "epoch": 0.29134949828591444, + "grad_norm": 1.0878198146820068, + "learning_rate": 8.313260754963447e-06, + "loss": 0.2801, + "step": 14554 + }, + { + "epoch": 0.2913695168030428, + "grad_norm": 1.1247048377990723, + "learning_rate": 8.31301795849208e-06, + "loss": 0.3079, + "step": 14555 + }, + { + "epoch": 0.29138953532017114, + "grad_norm": 0.9857357144355774, + "learning_rate": 8.3127751480935e-06, + "loss": 0.3133, + "step": 14556 + }, + { + "epoch": 0.2914095538372995, + "grad_norm": 1.0907845497131348, + "learning_rate": 8.312532323768725e-06, + "loss": 0.3393, + "step": 14557 + }, + { + "epoch": 0.29142957235442785, + "grad_norm": 1.9558985233306885, + "learning_rate": 8.312289485518779e-06, + "loss": 0.7932, + "step": 14558 + }, + { + "epoch": 0.2914495908715562, + "grad_norm": 1.1752305030822754, + "learning_rate": 8.312046633344678e-06, + "loss": 0.3326, + "step": 14559 + }, + { + "epoch": 0.29146960938868455, + "grad_norm": 1.0391508340835571, + "learning_rate": 8.311803767247447e-06, + "loss": 0.2815, + "step": 14560 + }, + { + "epoch": 0.2914896279058129, + "grad_norm": 1.0352487564086914, + "learning_rate": 8.311560887228105e-06, + "loss": 0.2977, + "step": 14561 + }, + { + "epoch": 0.2915096464229412, + "grad_norm": 1.209376573562622, + "learning_rate": 8.311317993287676e-06, + "loss": 0.3367, + "step": 14562 + }, + { + "epoch": 0.29152966494006954, + "grad_norm": 1.204655408859253, + "learning_rate": 8.311075085427177e-06, + "loss": 0.3105, + "step": 14563 + }, + { + "epoch": 0.2915496834571979, + "grad_norm": 1.1869611740112305, + "learning_rate": 8.31083216364763e-06, + "loss": 0.3568, + "step": 14564 + }, + { + "epoch": 0.29156970197432625, + "grad_norm": 1.23771071434021, + "learning_rate": 8.310589227950057e-06, + "loss": 0.3601, + "step": 14565 + }, + { + "epoch": 0.2915897204914546, + "grad_norm": 1.9076759815216064, + "learning_rate": 8.310346278335482e-06, + "loss": 0.8186, + "step": 14566 + }, + { + "epoch": 0.29160973900858295, + "grad_norm": 1.0705312490463257, + "learning_rate": 8.310103314804921e-06, + "loss": 0.3443, + "step": 14567 + }, + { + "epoch": 0.2916297575257113, + "grad_norm": 1.0913180112838745, + "learning_rate": 8.3098603373594e-06, + "loss": 0.2711, + "step": 14568 + }, + { + "epoch": 0.29164977604283965, + "grad_norm": 1.1650075912475586, + "learning_rate": 8.309617345999936e-06, + "loss": 0.3619, + "step": 14569 + }, + { + "epoch": 0.29166979455996794, + "grad_norm": 0.9760818481445312, + "learning_rate": 8.309374340727556e-06, + "loss": 0.2827, + "step": 14570 + }, + { + "epoch": 0.2916898130770963, + "grad_norm": 1.1027860641479492, + "learning_rate": 8.309131321543276e-06, + "loss": 0.2633, + "step": 14571 + }, + { + "epoch": 0.29170983159422464, + "grad_norm": 1.1238678693771362, + "learning_rate": 8.30888828844812e-06, + "loss": 0.2978, + "step": 14572 + }, + { + "epoch": 0.291729850111353, + "grad_norm": 1.0847053527832031, + "learning_rate": 8.308645241443111e-06, + "loss": 0.35, + "step": 14573 + }, + { + "epoch": 0.29174986862848135, + "grad_norm": 0.9996933341026306, + "learning_rate": 8.30840218052927e-06, + "loss": 0.3201, + "step": 14574 + }, + { + "epoch": 0.2917698871456097, + "grad_norm": 1.8742985725402832, + "learning_rate": 8.308159105707615e-06, + "loss": 0.8456, + "step": 14575 + }, + { + "epoch": 0.29178990566273805, + "grad_norm": 1.9053303003311157, + "learning_rate": 8.307916016979175e-06, + "loss": 0.7925, + "step": 14576 + }, + { + "epoch": 0.2918099241798664, + "grad_norm": 1.082801342010498, + "learning_rate": 8.307672914344967e-06, + "loss": 0.3142, + "step": 14577 + }, + { + "epoch": 0.2918299426969947, + "grad_norm": 1.008671760559082, + "learning_rate": 8.307429797806012e-06, + "loss": 0.3495, + "step": 14578 + }, + { + "epoch": 0.29184996121412304, + "grad_norm": 1.0055338144302368, + "learning_rate": 8.307186667363335e-06, + "loss": 0.3114, + "step": 14579 + }, + { + "epoch": 0.2918699797312514, + "grad_norm": 1.0985904932022095, + "learning_rate": 8.306943523017957e-06, + "loss": 0.338, + "step": 14580 + }, + { + "epoch": 0.29188999824837975, + "grad_norm": 1.0896204710006714, + "learning_rate": 8.3067003647709e-06, + "loss": 0.3318, + "step": 14581 + }, + { + "epoch": 0.2919100167655081, + "grad_norm": 1.0762710571289062, + "learning_rate": 8.306457192623186e-06, + "loss": 0.3603, + "step": 14582 + }, + { + "epoch": 0.29193003528263645, + "grad_norm": 1.0854359865188599, + "learning_rate": 8.306214006575837e-06, + "loss": 0.2982, + "step": 14583 + }, + { + "epoch": 0.2919500537997648, + "grad_norm": 1.0712251663208008, + "learning_rate": 8.305970806629877e-06, + "loss": 0.3301, + "step": 14584 + }, + { + "epoch": 0.29197007231689315, + "grad_norm": 1.0194300413131714, + "learning_rate": 8.305727592786328e-06, + "loss": 0.3039, + "step": 14585 + }, + { + "epoch": 0.29199009083402144, + "grad_norm": 1.0321635007858276, + "learning_rate": 8.305484365046211e-06, + "loss": 0.3325, + "step": 14586 + }, + { + "epoch": 0.2920101093511498, + "grad_norm": 1.0837572813034058, + "learning_rate": 8.305241123410547e-06, + "loss": 0.3012, + "step": 14587 + }, + { + "epoch": 0.29203012786827814, + "grad_norm": 1.106027603149414, + "learning_rate": 8.304997867880362e-06, + "loss": 0.3451, + "step": 14588 + }, + { + "epoch": 0.2920501463854065, + "grad_norm": 1.0672662258148193, + "learning_rate": 8.304754598456678e-06, + "loss": 0.3226, + "step": 14589 + }, + { + "epoch": 0.29207016490253485, + "grad_norm": 1.2848039865493774, + "learning_rate": 8.304511315140515e-06, + "loss": 0.3383, + "step": 14590 + }, + { + "epoch": 0.2920901834196632, + "grad_norm": 1.067866325378418, + "learning_rate": 8.304268017932899e-06, + "loss": 0.324, + "step": 14591 + }, + { + "epoch": 0.29211020193679155, + "grad_norm": 0.925532341003418, + "learning_rate": 8.304024706834852e-06, + "loss": 0.2742, + "step": 14592 + }, + { + "epoch": 0.2921302204539199, + "grad_norm": 1.1884374618530273, + "learning_rate": 8.303781381847396e-06, + "loss": 0.3429, + "step": 14593 + }, + { + "epoch": 0.2921502389710482, + "grad_norm": 1.915185809135437, + "learning_rate": 8.303538042971552e-06, + "loss": 0.8037, + "step": 14594 + }, + { + "epoch": 0.29217025748817654, + "grad_norm": 0.9995163679122925, + "learning_rate": 8.303294690208347e-06, + "loss": 0.3122, + "step": 14595 + }, + { + "epoch": 0.2921902760053049, + "grad_norm": 1.061135172843933, + "learning_rate": 8.303051323558801e-06, + "loss": 0.254, + "step": 14596 + }, + { + "epoch": 0.29221029452243324, + "grad_norm": 1.4225317239761353, + "learning_rate": 8.302807943023939e-06, + "loss": 0.3247, + "step": 14597 + }, + { + "epoch": 0.2922303130395616, + "grad_norm": 1.1696586608886719, + "learning_rate": 8.302564548604782e-06, + "loss": 0.3053, + "step": 14598 + }, + { + "epoch": 0.29225033155668995, + "grad_norm": 1.2865593433380127, + "learning_rate": 8.302321140302355e-06, + "loss": 0.3761, + "step": 14599 + }, + { + "epoch": 0.2922703500738183, + "grad_norm": 1.1359035968780518, + "learning_rate": 8.30207771811768e-06, + "loss": 0.4101, + "step": 14600 + }, + { + "epoch": 0.29229036859094665, + "grad_norm": 1.0612668991088867, + "learning_rate": 8.301834282051782e-06, + "loss": 0.3278, + "step": 14601 + }, + { + "epoch": 0.29231038710807494, + "grad_norm": 1.2196638584136963, + "learning_rate": 8.301590832105682e-06, + "loss": 0.2607, + "step": 14602 + }, + { + "epoch": 0.2923304056252033, + "grad_norm": 1.0757122039794922, + "learning_rate": 8.301347368280406e-06, + "loss": 0.3652, + "step": 14603 + }, + { + "epoch": 0.29235042414233164, + "grad_norm": 1.0929211378097534, + "learning_rate": 8.301103890576975e-06, + "loss": 0.3248, + "step": 14604 + }, + { + "epoch": 0.29237044265946, + "grad_norm": 0.9706696271896362, + "learning_rate": 8.300860398996412e-06, + "loss": 0.3165, + "step": 14605 + }, + { + "epoch": 0.29239046117658835, + "grad_norm": 1.0438445806503296, + "learning_rate": 8.300616893539745e-06, + "loss": 0.3082, + "step": 14606 + }, + { + "epoch": 0.2924104796937167, + "grad_norm": 1.0899053812026978, + "learning_rate": 8.300373374207994e-06, + "loss": 0.3321, + "step": 14607 + }, + { + "epoch": 0.29243049821084505, + "grad_norm": 1.1571537256240845, + "learning_rate": 8.300129841002184e-06, + "loss": 0.3252, + "step": 14608 + }, + { + "epoch": 0.2924505167279734, + "grad_norm": 1.1694741249084473, + "learning_rate": 8.29988629392334e-06, + "loss": 0.366, + "step": 14609 + }, + { + "epoch": 0.2924705352451017, + "grad_norm": 1.7721633911132812, + "learning_rate": 8.29964273297248e-06, + "loss": 0.3532, + "step": 14610 + }, + { + "epoch": 0.29249055376223004, + "grad_norm": 1.0010203123092651, + "learning_rate": 8.299399158150636e-06, + "loss": 0.3072, + "step": 14611 + }, + { + "epoch": 0.2925105722793584, + "grad_norm": 1.127463459968567, + "learning_rate": 8.299155569458826e-06, + "loss": 0.2922, + "step": 14612 + }, + { + "epoch": 0.29253059079648674, + "grad_norm": 1.1730679273605347, + "learning_rate": 8.298911966898077e-06, + "loss": 0.312, + "step": 14613 + }, + { + "epoch": 0.2925506093136151, + "grad_norm": 1.0050456523895264, + "learning_rate": 8.29866835046941e-06, + "loss": 0.3221, + "step": 14614 + }, + { + "epoch": 0.29257062783074345, + "grad_norm": 1.9829702377319336, + "learning_rate": 8.298424720173853e-06, + "loss": 0.7919, + "step": 14615 + }, + { + "epoch": 0.2925906463478718, + "grad_norm": 1.268702507019043, + "learning_rate": 8.298181076012428e-06, + "loss": 0.3297, + "step": 14616 + }, + { + "epoch": 0.29261066486500015, + "grad_norm": 1.1720936298370361, + "learning_rate": 8.29793741798616e-06, + "loss": 0.3179, + "step": 14617 + }, + { + "epoch": 0.29263068338212844, + "grad_norm": 1.079781174659729, + "learning_rate": 8.297693746096072e-06, + "loss": 0.4189, + "step": 14618 + }, + { + "epoch": 0.2926507018992568, + "grad_norm": 1.1458256244659424, + "learning_rate": 8.29745006034319e-06, + "loss": 0.3033, + "step": 14619 + }, + { + "epoch": 0.29267072041638514, + "grad_norm": 1.0743516683578491, + "learning_rate": 8.297206360728536e-06, + "loss": 0.322, + "step": 14620 + }, + { + "epoch": 0.2926907389335135, + "grad_norm": 1.234182357788086, + "learning_rate": 8.296962647253137e-06, + "loss": 0.3149, + "step": 14621 + }, + { + "epoch": 0.29271075745064185, + "grad_norm": 1.07308030128479, + "learning_rate": 8.296718919918015e-06, + "loss": 0.2843, + "step": 14622 + }, + { + "epoch": 0.2927307759677702, + "grad_norm": 1.2051811218261719, + "learning_rate": 8.296475178724197e-06, + "loss": 0.3676, + "step": 14623 + }, + { + "epoch": 0.29275079448489855, + "grad_norm": 1.3606679439544678, + "learning_rate": 8.296231423672709e-06, + "loss": 0.3711, + "step": 14624 + }, + { + "epoch": 0.2927708130020269, + "grad_norm": 1.2199625968933105, + "learning_rate": 8.29598765476457e-06, + "loss": 0.3415, + "step": 14625 + }, + { + "epoch": 0.2927908315191552, + "grad_norm": 1.188338041305542, + "learning_rate": 8.29574387200081e-06, + "loss": 0.3024, + "step": 14626 + }, + { + "epoch": 0.29281085003628354, + "grad_norm": 0.9767984747886658, + "learning_rate": 8.295500075382451e-06, + "loss": 0.2731, + "step": 14627 + }, + { + "epoch": 0.2928308685534119, + "grad_norm": 1.738763451576233, + "learning_rate": 8.295256264910521e-06, + "loss": 0.8693, + "step": 14628 + }, + { + "epoch": 0.29285088707054024, + "grad_norm": 1.1389501094818115, + "learning_rate": 8.295012440586041e-06, + "loss": 0.33, + "step": 14629 + }, + { + "epoch": 0.2928709055876686, + "grad_norm": 1.0867445468902588, + "learning_rate": 8.29476860241004e-06, + "loss": 0.3117, + "step": 14630 + }, + { + "epoch": 0.29289092410479695, + "grad_norm": 1.2780380249023438, + "learning_rate": 8.29452475038354e-06, + "loss": 0.3548, + "step": 14631 + }, + { + "epoch": 0.2929109426219253, + "grad_norm": 1.1560524702072144, + "learning_rate": 8.294280884507563e-06, + "loss": 0.3405, + "step": 14632 + }, + { + "epoch": 0.29293096113905365, + "grad_norm": 1.0699400901794434, + "learning_rate": 8.294037004783143e-06, + "loss": 0.3442, + "step": 14633 + }, + { + "epoch": 0.29295097965618194, + "grad_norm": 1.1375312805175781, + "learning_rate": 8.293793111211299e-06, + "loss": 0.3479, + "step": 14634 + }, + { + "epoch": 0.2929709981733103, + "grad_norm": 1.1517568826675415, + "learning_rate": 8.293549203793056e-06, + "loss": 0.3369, + "step": 14635 + }, + { + "epoch": 0.29299101669043864, + "grad_norm": 1.1814719438552856, + "learning_rate": 8.293305282529443e-06, + "loss": 0.3057, + "step": 14636 + }, + { + "epoch": 0.293011035207567, + "grad_norm": 1.1190224885940552, + "learning_rate": 8.293061347421484e-06, + "loss": 0.3094, + "step": 14637 + }, + { + "epoch": 0.29303105372469535, + "grad_norm": 1.7809163331985474, + "learning_rate": 8.292817398470202e-06, + "loss": 0.2824, + "step": 14638 + }, + { + "epoch": 0.2930510722418237, + "grad_norm": 1.1480205059051514, + "learning_rate": 8.292573435676623e-06, + "loss": 0.2858, + "step": 14639 + }, + { + "epoch": 0.29307109075895205, + "grad_norm": 1.8373812437057495, + "learning_rate": 8.292329459041775e-06, + "loss": 0.8132, + "step": 14640 + }, + { + "epoch": 0.2930911092760804, + "grad_norm": 1.1549818515777588, + "learning_rate": 8.292085468566684e-06, + "loss": 0.3559, + "step": 14641 + }, + { + "epoch": 0.2931111277932087, + "grad_norm": 1.218957543373108, + "learning_rate": 8.291841464252373e-06, + "loss": 0.3587, + "step": 14642 + }, + { + "epoch": 0.29313114631033704, + "grad_norm": 1.706304907798767, + "learning_rate": 8.291597446099868e-06, + "loss": 0.7589, + "step": 14643 + }, + { + "epoch": 0.2931511648274654, + "grad_norm": 1.1069482564926147, + "learning_rate": 8.291353414110198e-06, + "loss": 0.336, + "step": 14644 + }, + { + "epoch": 0.29317118334459374, + "grad_norm": 1.163629412651062, + "learning_rate": 8.291109368284383e-06, + "loss": 0.3309, + "step": 14645 + }, + { + "epoch": 0.2931912018617221, + "grad_norm": 1.0747078657150269, + "learning_rate": 8.290865308623454e-06, + "loss": 0.2998, + "step": 14646 + }, + { + "epoch": 0.29321122037885045, + "grad_norm": 1.2905710935592651, + "learning_rate": 8.290621235128437e-06, + "loss": 0.298, + "step": 14647 + }, + { + "epoch": 0.2932312388959788, + "grad_norm": 1.936960220336914, + "learning_rate": 8.290377147800356e-06, + "loss": 0.8339, + "step": 14648 + }, + { + "epoch": 0.29325125741310715, + "grad_norm": 1.0884710550308228, + "learning_rate": 8.290133046640237e-06, + "loss": 0.3345, + "step": 14649 + }, + { + "epoch": 0.29327127593023544, + "grad_norm": 1.0472588539123535, + "learning_rate": 8.289888931649107e-06, + "loss": 0.3079, + "step": 14650 + }, + { + "epoch": 0.2932912944473638, + "grad_norm": 1.2069584131240845, + "learning_rate": 8.289644802827992e-06, + "loss": 0.3345, + "step": 14651 + }, + { + "epoch": 0.29331131296449214, + "grad_norm": 1.8155372142791748, + "learning_rate": 8.289400660177917e-06, + "loss": 0.8713, + "step": 14652 + }, + { + "epoch": 0.2933313314816205, + "grad_norm": 1.227428913116455, + "learning_rate": 8.28915650369991e-06, + "loss": 0.3837, + "step": 14653 + }, + { + "epoch": 0.29335134999874884, + "grad_norm": 1.0407006740570068, + "learning_rate": 8.288912333394998e-06, + "loss": 0.3173, + "step": 14654 + }, + { + "epoch": 0.2933713685158772, + "grad_norm": 1.1483879089355469, + "learning_rate": 8.288668149264205e-06, + "loss": 0.355, + "step": 14655 + }, + { + "epoch": 0.29339138703300555, + "grad_norm": 1.2653777599334717, + "learning_rate": 8.288423951308559e-06, + "loss": 0.2784, + "step": 14656 + }, + { + "epoch": 0.2934114055501339, + "grad_norm": 1.1150370836257935, + "learning_rate": 8.288179739529088e-06, + "loss": 0.3395, + "step": 14657 + }, + { + "epoch": 0.2934314240672622, + "grad_norm": 1.1713508367538452, + "learning_rate": 8.287935513926815e-06, + "loss": 0.366, + "step": 14658 + }, + { + "epoch": 0.29345144258439054, + "grad_norm": 0.9518369436264038, + "learning_rate": 8.28769127450277e-06, + "loss": 0.3006, + "step": 14659 + }, + { + "epoch": 0.2934714611015189, + "grad_norm": 1.1183815002441406, + "learning_rate": 8.287447021257977e-06, + "loss": 0.3379, + "step": 14660 + }, + { + "epoch": 0.29349147961864724, + "grad_norm": 1.1613126993179321, + "learning_rate": 8.287202754193467e-06, + "loss": 0.3211, + "step": 14661 + }, + { + "epoch": 0.2935114981357756, + "grad_norm": 1.0989207029342651, + "learning_rate": 8.286958473310261e-06, + "loss": 0.3427, + "step": 14662 + }, + { + "epoch": 0.29353151665290395, + "grad_norm": 1.696521282196045, + "learning_rate": 8.28671417860939e-06, + "loss": 0.3271, + "step": 14663 + }, + { + "epoch": 0.2935515351700323, + "grad_norm": 1.9422156810760498, + "learning_rate": 8.28646987009188e-06, + "loss": 0.8024, + "step": 14664 + }, + { + "epoch": 0.29357155368716065, + "grad_norm": 1.132171392440796, + "learning_rate": 8.286225547758759e-06, + "loss": 0.2954, + "step": 14665 + }, + { + "epoch": 0.29359157220428894, + "grad_norm": 1.0422682762145996, + "learning_rate": 8.285981211611051e-06, + "loss": 0.2821, + "step": 14666 + }, + { + "epoch": 0.2936115907214173, + "grad_norm": 1.7567353248596191, + "learning_rate": 8.285736861649786e-06, + "loss": 0.9038, + "step": 14667 + }, + { + "epoch": 0.29363160923854564, + "grad_norm": 1.9744970798492432, + "learning_rate": 8.28549249787599e-06, + "loss": 0.7991, + "step": 14668 + }, + { + "epoch": 0.293651627755674, + "grad_norm": 1.842836856842041, + "learning_rate": 8.285248120290692e-06, + "loss": 0.8401, + "step": 14669 + }, + { + "epoch": 0.29367164627280234, + "grad_norm": 1.1226222515106201, + "learning_rate": 8.285003728894915e-06, + "loss": 0.3315, + "step": 14670 + }, + { + "epoch": 0.2936916647899307, + "grad_norm": 1.15520441532135, + "learning_rate": 8.284759323689692e-06, + "loss": 0.3445, + "step": 14671 + }, + { + "epoch": 0.29371168330705905, + "grad_norm": 1.825325846672058, + "learning_rate": 8.284514904676048e-06, + "loss": 0.2937, + "step": 14672 + }, + { + "epoch": 0.2937317018241874, + "grad_norm": 1.110940933227539, + "learning_rate": 8.284270471855008e-06, + "loss": 0.3448, + "step": 14673 + }, + { + "epoch": 0.2937517203413157, + "grad_norm": 1.0857219696044922, + "learning_rate": 8.284026025227602e-06, + "loss": 0.3655, + "step": 14674 + }, + { + "epoch": 0.29377173885844404, + "grad_norm": 1.1225254535675049, + "learning_rate": 8.283781564794859e-06, + "loss": 0.3245, + "step": 14675 + }, + { + "epoch": 0.2937917573755724, + "grad_norm": 1.0751793384552002, + "learning_rate": 8.283537090557804e-06, + "loss": 0.3073, + "step": 14676 + }, + { + "epoch": 0.29381177589270074, + "grad_norm": 1.1421061754226685, + "learning_rate": 8.283292602517465e-06, + "loss": 0.3548, + "step": 14677 + }, + { + "epoch": 0.2938317944098291, + "grad_norm": 1.240086555480957, + "learning_rate": 8.283048100674872e-06, + "loss": 0.3461, + "step": 14678 + }, + { + "epoch": 0.29385181292695745, + "grad_norm": 1.0525922775268555, + "learning_rate": 8.28280358503105e-06, + "loss": 0.3253, + "step": 14679 + }, + { + "epoch": 0.2938718314440858, + "grad_norm": 1.017148733139038, + "learning_rate": 8.282559055587029e-06, + "loss": 0.3149, + "step": 14680 + }, + { + "epoch": 0.29389184996121415, + "grad_norm": 1.1532516479492188, + "learning_rate": 8.282314512343837e-06, + "loss": 0.2999, + "step": 14681 + }, + { + "epoch": 0.29391186847834244, + "grad_norm": 1.0830920934677124, + "learning_rate": 8.282069955302498e-06, + "loss": 0.2823, + "step": 14682 + }, + { + "epoch": 0.2939318869954708, + "grad_norm": 2.001736640930176, + "learning_rate": 8.281825384464046e-06, + "loss": 0.8442, + "step": 14683 + }, + { + "epoch": 0.29395190551259914, + "grad_norm": 0.9916533827781677, + "learning_rate": 8.281580799829504e-06, + "loss": 0.3212, + "step": 14684 + }, + { + "epoch": 0.2939719240297275, + "grad_norm": 1.2131074666976929, + "learning_rate": 8.281336201399905e-06, + "loss": 0.3097, + "step": 14685 + }, + { + "epoch": 0.29399194254685584, + "grad_norm": 0.9711751937866211, + "learning_rate": 8.281091589176275e-06, + "loss": 0.3321, + "step": 14686 + }, + { + "epoch": 0.2940119610639842, + "grad_norm": 0.9864559769630432, + "learning_rate": 8.28084696315964e-06, + "loss": 0.3133, + "step": 14687 + }, + { + "epoch": 0.29403197958111255, + "grad_norm": 1.0199544429779053, + "learning_rate": 8.280602323351032e-06, + "loss": 0.3023, + "step": 14688 + }, + { + "epoch": 0.2940519980982409, + "grad_norm": 1.0709511041641235, + "learning_rate": 8.280357669751476e-06, + "loss": 0.3269, + "step": 14689 + }, + { + "epoch": 0.2940720166153692, + "grad_norm": 1.1142683029174805, + "learning_rate": 8.280113002362005e-06, + "loss": 0.3389, + "step": 14690 + }, + { + "epoch": 0.29409203513249754, + "grad_norm": 1.0786795616149902, + "learning_rate": 8.279868321183641e-06, + "loss": 0.3605, + "step": 14691 + }, + { + "epoch": 0.2941120536496259, + "grad_norm": 1.461992859840393, + "learning_rate": 8.27962362621742e-06, + "loss": 0.3202, + "step": 14692 + }, + { + "epoch": 0.29413207216675424, + "grad_norm": 1.0848757028579712, + "learning_rate": 8.279378917464366e-06, + "loss": 0.3423, + "step": 14693 + }, + { + "epoch": 0.2941520906838826, + "grad_norm": 1.2772696018218994, + "learning_rate": 8.279134194925508e-06, + "loss": 0.331, + "step": 14694 + }, + { + "epoch": 0.29417210920101095, + "grad_norm": 1.775148868560791, + "learning_rate": 8.278889458601875e-06, + "loss": 0.8206, + "step": 14695 + }, + { + "epoch": 0.2941921277181393, + "grad_norm": 1.1538846492767334, + "learning_rate": 8.2786447084945e-06, + "loss": 0.324, + "step": 14696 + }, + { + "epoch": 0.29421214623526765, + "grad_norm": 1.2210088968276978, + "learning_rate": 8.278399944604405e-06, + "loss": 0.3175, + "step": 14697 + }, + { + "epoch": 0.29423216475239594, + "grad_norm": 1.1207077503204346, + "learning_rate": 8.278155166932621e-06, + "loss": 0.3525, + "step": 14698 + }, + { + "epoch": 0.2942521832695243, + "grad_norm": 0.997474193572998, + "learning_rate": 8.27791037548018e-06, + "loss": 0.3078, + "step": 14699 + }, + { + "epoch": 0.29427220178665264, + "grad_norm": 1.2617486715316772, + "learning_rate": 8.27766557024811e-06, + "loss": 0.3344, + "step": 14700 + }, + { + "epoch": 0.294292220303781, + "grad_norm": 1.8720767498016357, + "learning_rate": 8.277420751237437e-06, + "loss": 0.8887, + "step": 14701 + }, + { + "epoch": 0.29431223882090934, + "grad_norm": 1.133615493774414, + "learning_rate": 8.277175918449195e-06, + "loss": 0.3564, + "step": 14702 + }, + { + "epoch": 0.2943322573380377, + "grad_norm": 1.0053126811981201, + "learning_rate": 8.276931071884408e-06, + "loss": 0.3206, + "step": 14703 + }, + { + "epoch": 0.29435227585516605, + "grad_norm": 1.0341193675994873, + "learning_rate": 8.276686211544112e-06, + "loss": 0.3508, + "step": 14704 + }, + { + "epoch": 0.2943722943722944, + "grad_norm": 1.1539229154586792, + "learning_rate": 8.276441337429328e-06, + "loss": 0.3048, + "step": 14705 + }, + { + "epoch": 0.2943923128894227, + "grad_norm": 1.185895562171936, + "learning_rate": 8.276196449541091e-06, + "loss": 0.3141, + "step": 14706 + }, + { + "epoch": 0.29441233140655104, + "grad_norm": 1.8176066875457764, + "learning_rate": 8.27595154788043e-06, + "loss": 0.7933, + "step": 14707 + }, + { + "epoch": 0.2944323499236794, + "grad_norm": 1.0637381076812744, + "learning_rate": 8.275706632448374e-06, + "loss": 0.2905, + "step": 14708 + }, + { + "epoch": 0.29445236844080774, + "grad_norm": 1.1192747354507446, + "learning_rate": 8.27546170324595e-06, + "loss": 0.352, + "step": 14709 + }, + { + "epoch": 0.2944723869579361, + "grad_norm": 0.9940785765647888, + "learning_rate": 8.275216760274194e-06, + "loss": 0.3201, + "step": 14710 + }, + { + "epoch": 0.29449240547506444, + "grad_norm": 1.1364898681640625, + "learning_rate": 8.274971803534128e-06, + "loss": 0.2795, + "step": 14711 + }, + { + "epoch": 0.2945124239921928, + "grad_norm": 1.0786495208740234, + "learning_rate": 8.274726833026787e-06, + "loss": 0.3204, + "step": 14712 + }, + { + "epoch": 0.29453244250932115, + "grad_norm": 1.0910648107528687, + "learning_rate": 8.274481848753198e-06, + "loss": 0.2807, + "step": 14713 + }, + { + "epoch": 0.29455246102644944, + "grad_norm": 1.0006452798843384, + "learning_rate": 8.274236850714393e-06, + "loss": 0.3203, + "step": 14714 + }, + { + "epoch": 0.2945724795435778, + "grad_norm": 2.0290584564208984, + "learning_rate": 8.2739918389114e-06, + "loss": 0.7906, + "step": 14715 + }, + { + "epoch": 0.29459249806070614, + "grad_norm": 0.9837693572044373, + "learning_rate": 8.27374681334525e-06, + "loss": 0.2854, + "step": 14716 + }, + { + "epoch": 0.2946125165778345, + "grad_norm": 1.1378904581069946, + "learning_rate": 8.273501774016975e-06, + "loss": 0.3298, + "step": 14717 + }, + { + "epoch": 0.29463253509496284, + "grad_norm": 1.1245574951171875, + "learning_rate": 8.2732567209276e-06, + "loss": 0.2894, + "step": 14718 + }, + { + "epoch": 0.2946525536120912, + "grad_norm": 1.2367451190948486, + "learning_rate": 8.27301165407816e-06, + "loss": 0.2873, + "step": 14719 + }, + { + "epoch": 0.29467257212921955, + "grad_norm": 1.0909909009933472, + "learning_rate": 8.272766573469681e-06, + "loss": 0.3399, + "step": 14720 + }, + { + "epoch": 0.2946925906463479, + "grad_norm": 1.9173588752746582, + "learning_rate": 8.272521479103199e-06, + "loss": 0.8129, + "step": 14721 + }, + { + "epoch": 0.2947126091634762, + "grad_norm": 1.2492423057556152, + "learning_rate": 8.272276370979738e-06, + "loss": 0.2971, + "step": 14722 + }, + { + "epoch": 0.29473262768060454, + "grad_norm": 2.0029947757720947, + "learning_rate": 8.272031249100333e-06, + "loss": 0.807, + "step": 14723 + }, + { + "epoch": 0.2947526461977329, + "grad_norm": 1.1341339349746704, + "learning_rate": 8.271786113466012e-06, + "loss": 0.3055, + "step": 14724 + }, + { + "epoch": 0.29477266471486124, + "grad_norm": 1.0916907787322998, + "learning_rate": 8.271540964077806e-06, + "loss": 0.3332, + "step": 14725 + }, + { + "epoch": 0.2947926832319896, + "grad_norm": 1.9344539642333984, + "learning_rate": 8.271295800936748e-06, + "loss": 0.8252, + "step": 14726 + }, + { + "epoch": 0.29481270174911794, + "grad_norm": 1.8598588705062866, + "learning_rate": 8.271050624043864e-06, + "loss": 0.8366, + "step": 14727 + }, + { + "epoch": 0.2948327202662463, + "grad_norm": 1.0716201066970825, + "learning_rate": 8.270805433400189e-06, + "loss": 0.3584, + "step": 14728 + }, + { + "epoch": 0.29485273878337465, + "grad_norm": 1.050158143043518, + "learning_rate": 8.27056022900675e-06, + "loss": 0.2992, + "step": 14729 + }, + { + "epoch": 0.29487275730050294, + "grad_norm": 1.2823857069015503, + "learning_rate": 8.270315010864582e-06, + "loss": 0.3506, + "step": 14730 + }, + { + "epoch": 0.2948927758176313, + "grad_norm": 1.0795519351959229, + "learning_rate": 8.270069778974711e-06, + "loss": 0.2776, + "step": 14731 + }, + { + "epoch": 0.29491279433475964, + "grad_norm": 1.0331236124038696, + "learning_rate": 8.269824533338172e-06, + "loss": 0.32, + "step": 14732 + }, + { + "epoch": 0.294932812851888, + "grad_norm": 1.124117136001587, + "learning_rate": 8.269579273955994e-06, + "loss": 0.3616, + "step": 14733 + }, + { + "epoch": 0.29495283136901634, + "grad_norm": 1.0609338283538818, + "learning_rate": 8.269334000829208e-06, + "loss": 0.3293, + "step": 14734 + }, + { + "epoch": 0.2949728498861447, + "grad_norm": 1.1239523887634277, + "learning_rate": 8.269088713958846e-06, + "loss": 0.332, + "step": 14735 + }, + { + "epoch": 0.29499286840327305, + "grad_norm": 1.8982858657836914, + "learning_rate": 8.268843413345938e-06, + "loss": 0.8587, + "step": 14736 + }, + { + "epoch": 0.2950128869204014, + "grad_norm": 1.1511322259902954, + "learning_rate": 8.268598098991516e-06, + "loss": 0.3285, + "step": 14737 + }, + { + "epoch": 0.2950329054375297, + "grad_norm": 1.0670115947723389, + "learning_rate": 8.26835277089661e-06, + "loss": 0.3184, + "step": 14738 + }, + { + "epoch": 0.29505292395465804, + "grad_norm": 1.106302261352539, + "learning_rate": 8.268107429062253e-06, + "loss": 0.3055, + "step": 14739 + }, + { + "epoch": 0.2950729424717864, + "grad_norm": 1.2487809658050537, + "learning_rate": 8.267862073489477e-06, + "loss": 0.2974, + "step": 14740 + }, + { + "epoch": 0.29509296098891474, + "grad_norm": 1.0538545846939087, + "learning_rate": 8.267616704179312e-06, + "loss": 0.3108, + "step": 14741 + }, + { + "epoch": 0.2951129795060431, + "grad_norm": 1.1940888166427612, + "learning_rate": 8.267371321132786e-06, + "loss": 0.3301, + "step": 14742 + }, + { + "epoch": 0.29513299802317144, + "grad_norm": 1.3002320528030396, + "learning_rate": 8.267125924350939e-06, + "loss": 0.335, + "step": 14743 + }, + { + "epoch": 0.2951530165402998, + "grad_norm": 1.1596206426620483, + "learning_rate": 8.266880513834795e-06, + "loss": 0.319, + "step": 14744 + }, + { + "epoch": 0.29517303505742815, + "grad_norm": 1.1022729873657227, + "learning_rate": 8.266635089585388e-06, + "loss": 0.325, + "step": 14745 + }, + { + "epoch": 0.29519305357455644, + "grad_norm": 1.0153027772903442, + "learning_rate": 8.26638965160375e-06, + "loss": 0.2947, + "step": 14746 + }, + { + "epoch": 0.2952130720916848, + "grad_norm": 1.060009479522705, + "learning_rate": 8.266144199890913e-06, + "loss": 0.3539, + "step": 14747 + }, + { + "epoch": 0.29523309060881314, + "grad_norm": 1.0789930820465088, + "learning_rate": 8.26589873444791e-06, + "loss": 0.2349, + "step": 14748 + }, + { + "epoch": 0.2952531091259415, + "grad_norm": 1.2510936260223389, + "learning_rate": 8.26565325527577e-06, + "loss": 0.3467, + "step": 14749 + }, + { + "epoch": 0.29527312764306984, + "grad_norm": 1.3010228872299194, + "learning_rate": 8.265407762375527e-06, + "loss": 0.3097, + "step": 14750 + }, + { + "epoch": 0.2952931461601982, + "grad_norm": 1.179137110710144, + "learning_rate": 8.265162255748213e-06, + "loss": 0.2947, + "step": 14751 + }, + { + "epoch": 0.29531316467732655, + "grad_norm": 1.0817588567733765, + "learning_rate": 8.264916735394862e-06, + "loss": 0.3581, + "step": 14752 + }, + { + "epoch": 0.2953331831944549, + "grad_norm": 1.0396114587783813, + "learning_rate": 8.264671201316499e-06, + "loss": 0.3095, + "step": 14753 + }, + { + "epoch": 0.2953532017115832, + "grad_norm": 1.1247584819793701, + "learning_rate": 8.264425653514162e-06, + "loss": 0.3447, + "step": 14754 + }, + { + "epoch": 0.29537322022871154, + "grad_norm": 1.062049150466919, + "learning_rate": 8.264180091988884e-06, + "loss": 0.3484, + "step": 14755 + }, + { + "epoch": 0.2953932387458399, + "grad_norm": 1.0946998596191406, + "learning_rate": 8.263934516741694e-06, + "loss": 0.3318, + "step": 14756 + }, + { + "epoch": 0.29541325726296824, + "grad_norm": 1.0586544275283813, + "learning_rate": 8.263688927773626e-06, + "loss": 0.2965, + "step": 14757 + }, + { + "epoch": 0.2954332757800966, + "grad_norm": 1.0375882387161255, + "learning_rate": 8.263443325085711e-06, + "loss": 0.3258, + "step": 14758 + }, + { + "epoch": 0.29545329429722494, + "grad_norm": 1.1341326236724854, + "learning_rate": 8.263197708678985e-06, + "loss": 0.33, + "step": 14759 + }, + { + "epoch": 0.2954733128143533, + "grad_norm": 1.006048560142517, + "learning_rate": 8.262952078554477e-06, + "loss": 0.2685, + "step": 14760 + }, + { + "epoch": 0.29549333133148165, + "grad_norm": 1.0925483703613281, + "learning_rate": 8.26270643471322e-06, + "loss": 0.2929, + "step": 14761 + }, + { + "epoch": 0.29551334984860994, + "grad_norm": 1.1639326810836792, + "learning_rate": 8.262460777156248e-06, + "loss": 0.2677, + "step": 14762 + }, + { + "epoch": 0.2955333683657383, + "grad_norm": 1.0926010608673096, + "learning_rate": 8.262215105884592e-06, + "loss": 0.309, + "step": 14763 + }, + { + "epoch": 0.29555338688286664, + "grad_norm": 1.1530803442001343, + "learning_rate": 8.261969420899286e-06, + "loss": 0.3501, + "step": 14764 + }, + { + "epoch": 0.295573405399995, + "grad_norm": 1.876847743988037, + "learning_rate": 8.261723722201365e-06, + "loss": 0.804, + "step": 14765 + }, + { + "epoch": 0.29559342391712334, + "grad_norm": 1.153743028640747, + "learning_rate": 8.261478009791857e-06, + "loss": 0.3362, + "step": 14766 + }, + { + "epoch": 0.2956134424342517, + "grad_norm": 2.15956711769104, + "learning_rate": 8.261232283671795e-06, + "loss": 0.8355, + "step": 14767 + }, + { + "epoch": 0.29563346095138004, + "grad_norm": 1.0684503316879272, + "learning_rate": 8.26098654384222e-06, + "loss": 0.3251, + "step": 14768 + }, + { + "epoch": 0.2956534794685084, + "grad_norm": 1.1221884489059448, + "learning_rate": 8.260740790304155e-06, + "loss": 0.3553, + "step": 14769 + }, + { + "epoch": 0.2956734979856367, + "grad_norm": 0.9710073471069336, + "learning_rate": 8.260495023058639e-06, + "loss": 0.3414, + "step": 14770 + }, + { + "epoch": 0.29569351650276504, + "grad_norm": 1.1054478883743286, + "learning_rate": 8.260249242106702e-06, + "loss": 0.3334, + "step": 14771 + }, + { + "epoch": 0.2957135350198934, + "grad_norm": 1.0499809980392456, + "learning_rate": 8.260003447449381e-06, + "loss": 0.3319, + "step": 14772 + }, + { + "epoch": 0.29573355353702174, + "grad_norm": 1.1016627550125122, + "learning_rate": 8.259757639087706e-06, + "loss": 0.3264, + "step": 14773 + }, + { + "epoch": 0.2957535720541501, + "grad_norm": 1.2435104846954346, + "learning_rate": 8.259511817022712e-06, + "loss": 0.3346, + "step": 14774 + }, + { + "epoch": 0.29577359057127844, + "grad_norm": 1.2211663722991943, + "learning_rate": 8.259265981255431e-06, + "loss": 0.357, + "step": 14775 + }, + { + "epoch": 0.2957936090884068, + "grad_norm": 1.0326242446899414, + "learning_rate": 8.259020131786896e-06, + "loss": 0.3277, + "step": 14776 + }, + { + "epoch": 0.29581362760553515, + "grad_norm": 1.059621810913086, + "learning_rate": 8.258774268618146e-06, + "loss": 0.2732, + "step": 14777 + }, + { + "epoch": 0.29583364612266344, + "grad_norm": 1.1101189851760864, + "learning_rate": 8.258528391750206e-06, + "loss": 0.349, + "step": 14778 + }, + { + "epoch": 0.2958536646397918, + "grad_norm": 1.9729136228561401, + "learning_rate": 8.258282501184115e-06, + "loss": 0.8216, + "step": 14779 + }, + { + "epoch": 0.29587368315692014, + "grad_norm": 1.2717676162719727, + "learning_rate": 8.258036596920906e-06, + "loss": 0.325, + "step": 14780 + }, + { + "epoch": 0.2958937016740485, + "grad_norm": 1.0671510696411133, + "learning_rate": 8.257790678961614e-06, + "loss": 0.2732, + "step": 14781 + }, + { + "epoch": 0.29591372019117684, + "grad_norm": 1.0174400806427002, + "learning_rate": 8.25754474730727e-06, + "loss": 0.3194, + "step": 14782 + }, + { + "epoch": 0.2959337387083052, + "grad_norm": 1.2059720754623413, + "learning_rate": 8.257298801958906e-06, + "loss": 0.259, + "step": 14783 + }, + { + "epoch": 0.29595375722543354, + "grad_norm": 1.1021441221237183, + "learning_rate": 8.257052842917561e-06, + "loss": 0.3343, + "step": 14784 + }, + { + "epoch": 0.2959737757425619, + "grad_norm": 1.0477186441421509, + "learning_rate": 8.256806870184267e-06, + "loss": 0.3339, + "step": 14785 + }, + { + "epoch": 0.2959937942596902, + "grad_norm": 1.1400800943374634, + "learning_rate": 8.256560883760059e-06, + "loss": 0.3185, + "step": 14786 + }, + { + "epoch": 0.29601381277681854, + "grad_norm": 1.1022911071777344, + "learning_rate": 8.256314883645968e-06, + "loss": 0.3151, + "step": 14787 + }, + { + "epoch": 0.2960338312939469, + "grad_norm": 1.932165503501892, + "learning_rate": 8.25606886984303e-06, + "loss": 0.8002, + "step": 14788 + }, + { + "epoch": 0.29605384981107524, + "grad_norm": 1.1183291673660278, + "learning_rate": 8.255822842352279e-06, + "loss": 0.332, + "step": 14789 + }, + { + "epoch": 0.2960738683282036, + "grad_norm": 1.076349139213562, + "learning_rate": 8.255576801174752e-06, + "loss": 0.3337, + "step": 14790 + }, + { + "epoch": 0.29609388684533194, + "grad_norm": 1.1057469844818115, + "learning_rate": 8.255330746311478e-06, + "loss": 0.3263, + "step": 14791 + }, + { + "epoch": 0.2961139053624603, + "grad_norm": 1.03855299949646, + "learning_rate": 8.255084677763495e-06, + "loss": 0.301, + "step": 14792 + }, + { + "epoch": 0.29613392387958865, + "grad_norm": 1.0898165702819824, + "learning_rate": 8.254838595531835e-06, + "loss": 0.4047, + "step": 14793 + }, + { + "epoch": 0.29615394239671694, + "grad_norm": 1.0317736864089966, + "learning_rate": 8.254592499617538e-06, + "loss": 0.29, + "step": 14794 + }, + { + "epoch": 0.2961739609138453, + "grad_norm": 1.0819824934005737, + "learning_rate": 8.25434639002163e-06, + "loss": 0.2811, + "step": 14795 + }, + { + "epoch": 0.29619397943097364, + "grad_norm": 1.0057988166809082, + "learning_rate": 8.254100266745154e-06, + "loss": 0.2853, + "step": 14796 + }, + { + "epoch": 0.296213997948102, + "grad_norm": 1.0995526313781738, + "learning_rate": 8.253854129789139e-06, + "loss": 0.2975, + "step": 14797 + }, + { + "epoch": 0.29623401646523034, + "grad_norm": 1.1611418724060059, + "learning_rate": 8.25360797915462e-06, + "loss": 0.3097, + "step": 14798 + }, + { + "epoch": 0.2962540349823587, + "grad_norm": 0.9972110390663147, + "learning_rate": 8.253361814842635e-06, + "loss": 0.3241, + "step": 14799 + }, + { + "epoch": 0.29627405349948704, + "grad_norm": 1.1906951665878296, + "learning_rate": 8.253115636854217e-06, + "loss": 0.3471, + "step": 14800 + }, + { + "epoch": 0.2962940720166154, + "grad_norm": 1.1269818544387817, + "learning_rate": 8.2528694451904e-06, + "loss": 0.3174, + "step": 14801 + }, + { + "epoch": 0.2963140905337437, + "grad_norm": 1.8257324695587158, + "learning_rate": 8.25262323985222e-06, + "loss": 0.7698, + "step": 14802 + }, + { + "epoch": 0.29633410905087204, + "grad_norm": 1.0194528102874756, + "learning_rate": 8.252377020840713e-06, + "loss": 0.3489, + "step": 14803 + }, + { + "epoch": 0.2963541275680004, + "grad_norm": 1.1113855838775635, + "learning_rate": 8.25213078815691e-06, + "loss": 0.3246, + "step": 14804 + }, + { + "epoch": 0.29637414608512874, + "grad_norm": 0.9885270595550537, + "learning_rate": 8.251884541801854e-06, + "loss": 0.2391, + "step": 14805 + }, + { + "epoch": 0.2963941646022571, + "grad_norm": 1.070351004600525, + "learning_rate": 8.251638281776572e-06, + "loss": 0.3647, + "step": 14806 + }, + { + "epoch": 0.29641418311938544, + "grad_norm": 1.127666711807251, + "learning_rate": 8.251392008082104e-06, + "loss": 0.3362, + "step": 14807 + }, + { + "epoch": 0.2964342016365138, + "grad_norm": 1.2718132734298706, + "learning_rate": 8.251145720719482e-06, + "loss": 0.3613, + "step": 14808 + }, + { + "epoch": 0.29645422015364215, + "grad_norm": 1.090484857559204, + "learning_rate": 8.250899419689743e-06, + "loss": 0.3022, + "step": 14809 + }, + { + "epoch": 0.29647423867077044, + "grad_norm": 0.9535415768623352, + "learning_rate": 8.250653104993923e-06, + "loss": 0.2905, + "step": 14810 + }, + { + "epoch": 0.2964942571878988, + "grad_norm": 1.251534104347229, + "learning_rate": 8.250406776633057e-06, + "loss": 0.2869, + "step": 14811 + }, + { + "epoch": 0.29651427570502714, + "grad_norm": 1.071675181388855, + "learning_rate": 8.25016043460818e-06, + "loss": 0.2882, + "step": 14812 + }, + { + "epoch": 0.2965342942221555, + "grad_norm": 1.1490001678466797, + "learning_rate": 8.249914078920328e-06, + "loss": 0.3312, + "step": 14813 + }, + { + "epoch": 0.29655431273928384, + "grad_norm": 2.1711959838867188, + "learning_rate": 8.249667709570538e-06, + "loss": 0.8165, + "step": 14814 + }, + { + "epoch": 0.2965743312564122, + "grad_norm": 1.1067382097244263, + "learning_rate": 8.249421326559843e-06, + "loss": 0.3105, + "step": 14815 + }, + { + "epoch": 0.29659434977354054, + "grad_norm": 1.8340647220611572, + "learning_rate": 8.249174929889281e-06, + "loss": 0.821, + "step": 14816 + }, + { + "epoch": 0.2966143682906689, + "grad_norm": 1.0882678031921387, + "learning_rate": 8.248928519559886e-06, + "loss": 0.3494, + "step": 14817 + }, + { + "epoch": 0.2966343868077972, + "grad_norm": 1.136963963508606, + "learning_rate": 8.248682095572694e-06, + "loss": 0.3117, + "step": 14818 + }, + { + "epoch": 0.29665440532492554, + "grad_norm": 1.2266895771026611, + "learning_rate": 8.248435657928743e-06, + "loss": 0.3608, + "step": 14819 + }, + { + "epoch": 0.2966744238420539, + "grad_norm": 1.2034685611724854, + "learning_rate": 8.248189206629068e-06, + "loss": 0.3059, + "step": 14820 + }, + { + "epoch": 0.29669444235918224, + "grad_norm": 1.0346043109893799, + "learning_rate": 8.247942741674703e-06, + "loss": 0.3862, + "step": 14821 + }, + { + "epoch": 0.2967144608763106, + "grad_norm": 1.0085469484329224, + "learning_rate": 8.247696263066685e-06, + "loss": 0.3799, + "step": 14822 + }, + { + "epoch": 0.29673447939343894, + "grad_norm": 1.168586254119873, + "learning_rate": 8.247449770806053e-06, + "loss": 0.3467, + "step": 14823 + }, + { + "epoch": 0.2967544979105673, + "grad_norm": 1.0586806535720825, + "learning_rate": 8.24720326489384e-06, + "loss": 0.3085, + "step": 14824 + }, + { + "epoch": 0.29677451642769564, + "grad_norm": 1.0866727828979492, + "learning_rate": 8.246956745331082e-06, + "loss": 0.3008, + "step": 14825 + }, + { + "epoch": 0.29679453494482394, + "grad_norm": 1.1932939291000366, + "learning_rate": 8.246710212118818e-06, + "loss": 0.3529, + "step": 14826 + }, + { + "epoch": 0.2968145534619523, + "grad_norm": 1.1758761405944824, + "learning_rate": 8.246463665258085e-06, + "loss": 0.3278, + "step": 14827 + }, + { + "epoch": 0.29683457197908064, + "grad_norm": 1.2536169290542603, + "learning_rate": 8.246217104749914e-06, + "loss": 0.2959, + "step": 14828 + }, + { + "epoch": 0.296854590496209, + "grad_norm": 1.0827339887619019, + "learning_rate": 8.245970530595346e-06, + "loss": 0.3138, + "step": 14829 + }, + { + "epoch": 0.29687460901333734, + "grad_norm": 1.1757830381393433, + "learning_rate": 8.245723942795417e-06, + "loss": 0.3381, + "step": 14830 + }, + { + "epoch": 0.2968946275304657, + "grad_norm": 1.2168967723846436, + "learning_rate": 8.245477341351162e-06, + "loss": 0.2964, + "step": 14831 + }, + { + "epoch": 0.29691464604759404, + "grad_norm": 1.8201977014541626, + "learning_rate": 8.24523072626362e-06, + "loss": 0.8102, + "step": 14832 + }, + { + "epoch": 0.2969346645647224, + "grad_norm": 1.177889108657837, + "learning_rate": 8.244984097533826e-06, + "loss": 0.3686, + "step": 14833 + }, + { + "epoch": 0.2969546830818507, + "grad_norm": 1.8873896598815918, + "learning_rate": 8.244737455162818e-06, + "loss": 0.7919, + "step": 14834 + }, + { + "epoch": 0.29697470159897904, + "grad_norm": 1.1819199323654175, + "learning_rate": 8.24449079915163e-06, + "loss": 0.3354, + "step": 14835 + }, + { + "epoch": 0.2969947201161074, + "grad_norm": 1.2363064289093018, + "learning_rate": 8.244244129501302e-06, + "loss": 0.2888, + "step": 14836 + }, + { + "epoch": 0.29701473863323574, + "grad_norm": 1.0593162775039673, + "learning_rate": 8.243997446212868e-06, + "loss": 0.3536, + "step": 14837 + }, + { + "epoch": 0.2970347571503641, + "grad_norm": 1.222580909729004, + "learning_rate": 8.243750749287368e-06, + "loss": 0.3909, + "step": 14838 + }, + { + "epoch": 0.29705477566749244, + "grad_norm": 1.097003698348999, + "learning_rate": 8.24350403872584e-06, + "loss": 0.3053, + "step": 14839 + }, + { + "epoch": 0.2970747941846208, + "grad_norm": 1.0823736190795898, + "learning_rate": 8.243257314529319e-06, + "loss": 0.3156, + "step": 14840 + }, + { + "epoch": 0.29709481270174914, + "grad_norm": 1.118787407875061, + "learning_rate": 8.24301057669884e-06, + "loss": 0.3323, + "step": 14841 + }, + { + "epoch": 0.29711483121887744, + "grad_norm": 1.1298840045928955, + "learning_rate": 8.242763825235443e-06, + "loss": 0.3044, + "step": 14842 + }, + { + "epoch": 0.2971348497360058, + "grad_norm": 1.1024047136306763, + "learning_rate": 8.242517060140165e-06, + "loss": 0.3117, + "step": 14843 + }, + { + "epoch": 0.29715486825313414, + "grad_norm": 1.0598845481872559, + "learning_rate": 8.242270281414042e-06, + "loss": 0.3256, + "step": 14844 + }, + { + "epoch": 0.2971748867702625, + "grad_norm": 1.0311033725738525, + "learning_rate": 8.242023489058114e-06, + "loss": 0.297, + "step": 14845 + }, + { + "epoch": 0.29719490528739084, + "grad_norm": 1.1331723928451538, + "learning_rate": 8.241776683073417e-06, + "loss": 0.375, + "step": 14846 + }, + { + "epoch": 0.2972149238045192, + "grad_norm": 1.2054617404937744, + "learning_rate": 8.241529863460987e-06, + "loss": 0.3114, + "step": 14847 + }, + { + "epoch": 0.29723494232164754, + "grad_norm": 1.1069847345352173, + "learning_rate": 8.241283030221863e-06, + "loss": 0.3462, + "step": 14848 + }, + { + "epoch": 0.2972549608387759, + "grad_norm": 1.8394699096679688, + "learning_rate": 8.241036183357085e-06, + "loss": 0.7994, + "step": 14849 + }, + { + "epoch": 0.2972749793559042, + "grad_norm": 1.4020828008651733, + "learning_rate": 8.240789322867688e-06, + "loss": 0.2923, + "step": 14850 + }, + { + "epoch": 0.29729499787303254, + "grad_norm": 1.0890049934387207, + "learning_rate": 8.240542448754706e-06, + "loss": 0.3072, + "step": 14851 + }, + { + "epoch": 0.2973150163901609, + "grad_norm": 1.0437045097351074, + "learning_rate": 8.240295561019185e-06, + "loss": 0.3128, + "step": 14852 + }, + { + "epoch": 0.29733503490728924, + "grad_norm": 1.0664348602294922, + "learning_rate": 8.240048659662158e-06, + "loss": 0.3569, + "step": 14853 + }, + { + "epoch": 0.2973550534244176, + "grad_norm": 1.1501247882843018, + "learning_rate": 8.239801744684663e-06, + "loss": 0.3319, + "step": 14854 + }, + { + "epoch": 0.29737507194154594, + "grad_norm": 1.0424864292144775, + "learning_rate": 8.239554816087741e-06, + "loss": 0.3069, + "step": 14855 + }, + { + "epoch": 0.2973950904586743, + "grad_norm": 1.0868080854415894, + "learning_rate": 8.239307873872425e-06, + "loss": 0.3122, + "step": 14856 + }, + { + "epoch": 0.29741510897580264, + "grad_norm": 0.9520446062088013, + "learning_rate": 8.239060918039756e-06, + "loss": 0.3099, + "step": 14857 + }, + { + "epoch": 0.29743512749293094, + "grad_norm": 1.0709573030471802, + "learning_rate": 8.238813948590773e-06, + "loss": 0.3265, + "step": 14858 + }, + { + "epoch": 0.2974551460100593, + "grad_norm": 1.0225739479064941, + "learning_rate": 8.238566965526514e-06, + "loss": 0.3361, + "step": 14859 + }, + { + "epoch": 0.29747516452718764, + "grad_norm": 1.1618839502334595, + "learning_rate": 8.238319968848014e-06, + "loss": 0.2843, + "step": 14860 + }, + { + "epoch": 0.297495183044316, + "grad_norm": 1.1158965826034546, + "learning_rate": 8.238072958556315e-06, + "loss": 0.3344, + "step": 14861 + }, + { + "epoch": 0.29751520156144434, + "grad_norm": 1.0920382738113403, + "learning_rate": 8.237825934652455e-06, + "loss": 0.3543, + "step": 14862 + }, + { + "epoch": 0.2975352200785727, + "grad_norm": 1.0300142765045166, + "learning_rate": 8.23757889713747e-06, + "loss": 0.3249, + "step": 14863 + }, + { + "epoch": 0.29755523859570104, + "grad_norm": 1.1867882013320923, + "learning_rate": 8.2373318460124e-06, + "loss": 0.3665, + "step": 14864 + }, + { + "epoch": 0.2975752571128294, + "grad_norm": 1.0405055284500122, + "learning_rate": 8.237084781278283e-06, + "loss": 0.3253, + "step": 14865 + }, + { + "epoch": 0.2975952756299577, + "grad_norm": 1.0861763954162598, + "learning_rate": 8.23683770293616e-06, + "loss": 0.3073, + "step": 14866 + }, + { + "epoch": 0.29761529414708604, + "grad_norm": 1.0558918714523315, + "learning_rate": 8.236590610987067e-06, + "loss": 0.3057, + "step": 14867 + }, + { + "epoch": 0.2976353126642144, + "grad_norm": 1.1200453042984009, + "learning_rate": 8.236343505432044e-06, + "loss": 0.3522, + "step": 14868 + }, + { + "epoch": 0.29765533118134274, + "grad_norm": 1.1885913610458374, + "learning_rate": 8.23609638627213e-06, + "loss": 0.2996, + "step": 14869 + }, + { + "epoch": 0.2976753496984711, + "grad_norm": 1.1626828908920288, + "learning_rate": 8.235849253508363e-06, + "loss": 0.3598, + "step": 14870 + }, + { + "epoch": 0.29769536821559944, + "grad_norm": 0.9575257301330566, + "learning_rate": 8.235602107141782e-06, + "loss": 0.3032, + "step": 14871 + }, + { + "epoch": 0.2977153867327278, + "grad_norm": 1.4294058084487915, + "learning_rate": 8.235354947173426e-06, + "loss": 0.3583, + "step": 14872 + }, + { + "epoch": 0.29773540524985614, + "grad_norm": 1.0965361595153809, + "learning_rate": 8.235107773604333e-06, + "loss": 0.307, + "step": 14873 + }, + { + "epoch": 0.29775542376698444, + "grad_norm": 1.0755678415298462, + "learning_rate": 8.234860586435545e-06, + "loss": 0.3143, + "step": 14874 + }, + { + "epoch": 0.2977754422841128, + "grad_norm": 1.3393505811691284, + "learning_rate": 8.234613385668097e-06, + "loss": 0.3499, + "step": 14875 + }, + { + "epoch": 0.29779546080124114, + "grad_norm": 1.0270534753799438, + "learning_rate": 8.234366171303032e-06, + "loss": 0.311, + "step": 14876 + }, + { + "epoch": 0.2978154793183695, + "grad_norm": 1.1078916788101196, + "learning_rate": 8.234118943341387e-06, + "loss": 0.3787, + "step": 14877 + }, + { + "epoch": 0.29783549783549784, + "grad_norm": 1.0890463590621948, + "learning_rate": 8.233871701784202e-06, + "loss": 0.3118, + "step": 14878 + }, + { + "epoch": 0.2978555163526262, + "grad_norm": 1.0379047393798828, + "learning_rate": 8.233624446632515e-06, + "loss": 0.3299, + "step": 14879 + }, + { + "epoch": 0.29787553486975454, + "grad_norm": 1.1377015113830566, + "learning_rate": 8.23337717788737e-06, + "loss": 0.345, + "step": 14880 + }, + { + "epoch": 0.2978955533868829, + "grad_norm": 1.1868985891342163, + "learning_rate": 8.2331298955498e-06, + "loss": 0.3557, + "step": 14881 + }, + { + "epoch": 0.2979155719040112, + "grad_norm": 1.0552647113800049, + "learning_rate": 8.23288259962085e-06, + "loss": 0.3263, + "step": 14882 + }, + { + "epoch": 0.29793559042113954, + "grad_norm": 1.1513183116912842, + "learning_rate": 8.232635290101556e-06, + "loss": 0.3318, + "step": 14883 + }, + { + "epoch": 0.2979556089382679, + "grad_norm": 1.0852501392364502, + "learning_rate": 8.232387966992959e-06, + "loss": 0.3553, + "step": 14884 + }, + { + "epoch": 0.29797562745539624, + "grad_norm": 1.0228767395019531, + "learning_rate": 8.2321406302961e-06, + "loss": 0.3103, + "step": 14885 + }, + { + "epoch": 0.2979956459725246, + "grad_norm": 1.0047107934951782, + "learning_rate": 8.231893280012014e-06, + "loss": 0.2928, + "step": 14886 + }, + { + "epoch": 0.29801566448965294, + "grad_norm": 1.077055811882019, + "learning_rate": 8.231645916141748e-06, + "loss": 0.3159, + "step": 14887 + }, + { + "epoch": 0.2980356830067813, + "grad_norm": 1.0574355125427246, + "learning_rate": 8.231398538686336e-06, + "loss": 0.3162, + "step": 14888 + }, + { + "epoch": 0.29805570152390964, + "grad_norm": 1.1185071468353271, + "learning_rate": 8.23115114764682e-06, + "loss": 0.3488, + "step": 14889 + }, + { + "epoch": 0.29807572004103794, + "grad_norm": 1.1559211015701294, + "learning_rate": 8.23090374302424e-06, + "loss": 0.3419, + "step": 14890 + }, + { + "epoch": 0.2980957385581663, + "grad_norm": 1.2166831493377686, + "learning_rate": 8.230656324819636e-06, + "loss": 0.3591, + "step": 14891 + }, + { + "epoch": 0.29811575707529464, + "grad_norm": 1.1335718631744385, + "learning_rate": 8.230408893034048e-06, + "loss": 0.3201, + "step": 14892 + }, + { + "epoch": 0.298135775592423, + "grad_norm": 1.0923022031784058, + "learning_rate": 8.230161447668517e-06, + "loss": 0.3549, + "step": 14893 + }, + { + "epoch": 0.29815579410955134, + "grad_norm": 1.0312670469284058, + "learning_rate": 8.229913988724081e-06, + "loss": 0.331, + "step": 14894 + }, + { + "epoch": 0.2981758126266797, + "grad_norm": 1.0423246622085571, + "learning_rate": 8.229666516201784e-06, + "loss": 0.2817, + "step": 14895 + }, + { + "epoch": 0.29819583114380804, + "grad_norm": 1.0696946382522583, + "learning_rate": 8.229419030102661e-06, + "loss": 0.3512, + "step": 14896 + }, + { + "epoch": 0.29821584966093634, + "grad_norm": 1.127167820930481, + "learning_rate": 8.229171530427757e-06, + "loss": 0.3075, + "step": 14897 + }, + { + "epoch": 0.2982358681780647, + "grad_norm": 1.0176886320114136, + "learning_rate": 8.228924017178112e-06, + "loss": 0.2923, + "step": 14898 + }, + { + "epoch": 0.29825588669519304, + "grad_norm": 1.0319145917892456, + "learning_rate": 8.228676490354763e-06, + "loss": 0.3041, + "step": 14899 + }, + { + "epoch": 0.2982759052123214, + "grad_norm": 1.9087055921554565, + "learning_rate": 8.228428949958754e-06, + "loss": 0.8676, + "step": 14900 + }, + { + "epoch": 0.29829592372944974, + "grad_norm": 1.1010514497756958, + "learning_rate": 8.228181395991125e-06, + "loss": 0.325, + "step": 14901 + }, + { + "epoch": 0.2983159422465781, + "grad_norm": 1.0965994596481323, + "learning_rate": 8.227933828452914e-06, + "loss": 0.3094, + "step": 14902 + }, + { + "epoch": 0.29833596076370644, + "grad_norm": 1.1444814205169678, + "learning_rate": 8.227686247345167e-06, + "loss": 0.318, + "step": 14903 + }, + { + "epoch": 0.2983559792808348, + "grad_norm": 1.3342183828353882, + "learning_rate": 8.227438652668919e-06, + "loss": 0.3492, + "step": 14904 + }, + { + "epoch": 0.2983759977979631, + "grad_norm": 1.165686011314392, + "learning_rate": 8.227191044425215e-06, + "loss": 0.307, + "step": 14905 + }, + { + "epoch": 0.29839601631509144, + "grad_norm": 2.009225368499756, + "learning_rate": 8.226943422615092e-06, + "loss": 0.877, + "step": 14906 + }, + { + "epoch": 0.2984160348322198, + "grad_norm": 1.1343363523483276, + "learning_rate": 8.226695787239596e-06, + "loss": 0.3037, + "step": 14907 + }, + { + "epoch": 0.29843605334934814, + "grad_norm": 1.184195876121521, + "learning_rate": 8.226448138299765e-06, + "loss": 0.3378, + "step": 14908 + }, + { + "epoch": 0.2984560718664765, + "grad_norm": 1.1664905548095703, + "learning_rate": 8.226200475796639e-06, + "loss": 0.2977, + "step": 14909 + }, + { + "epoch": 0.29847609038360484, + "grad_norm": 1.0354703664779663, + "learning_rate": 8.22595279973126e-06, + "loss": 0.2904, + "step": 14910 + }, + { + "epoch": 0.2984961089007332, + "grad_norm": 1.1069754362106323, + "learning_rate": 8.225705110104671e-06, + "loss": 0.3037, + "step": 14911 + }, + { + "epoch": 0.29851612741786154, + "grad_norm": 1.0712071657180786, + "learning_rate": 8.225457406917914e-06, + "loss": 0.3224, + "step": 14912 + }, + { + "epoch": 0.29853614593498984, + "grad_norm": 1.03237783908844, + "learning_rate": 8.225209690172025e-06, + "loss": 0.3057, + "step": 14913 + }, + { + "epoch": 0.2985561644521182, + "grad_norm": 1.0987917184829712, + "learning_rate": 8.22496195986805e-06, + "loss": 0.3142, + "step": 14914 + }, + { + "epoch": 0.29857618296924654, + "grad_norm": 1.015992522239685, + "learning_rate": 8.224714216007028e-06, + "loss": 0.2947, + "step": 14915 + }, + { + "epoch": 0.2985962014863749, + "grad_norm": 1.0340585708618164, + "learning_rate": 8.224466458590002e-06, + "loss": 0.313, + "step": 14916 + }, + { + "epoch": 0.29861622000350324, + "grad_norm": 1.1416020393371582, + "learning_rate": 8.224218687618012e-06, + "loss": 0.3072, + "step": 14917 + }, + { + "epoch": 0.2986362385206316, + "grad_norm": 1.224306344985962, + "learning_rate": 8.223970903092103e-06, + "loss": 0.3284, + "step": 14918 + }, + { + "epoch": 0.29865625703775994, + "grad_norm": 1.185648798942566, + "learning_rate": 8.22372310501331e-06, + "loss": 0.3297, + "step": 14919 + }, + { + "epoch": 0.2986762755548883, + "grad_norm": 1.1414380073547363, + "learning_rate": 8.22347529338268e-06, + "loss": 0.2867, + "step": 14920 + }, + { + "epoch": 0.2986962940720166, + "grad_norm": 1.1874574422836304, + "learning_rate": 8.223227468201256e-06, + "loss": 0.347, + "step": 14921 + }, + { + "epoch": 0.29871631258914494, + "grad_norm": 1.0897384881973267, + "learning_rate": 8.222979629470077e-06, + "loss": 0.3311, + "step": 14922 + }, + { + "epoch": 0.2987363311062733, + "grad_norm": 1.1540583372116089, + "learning_rate": 8.222731777190185e-06, + "loss": 0.3299, + "step": 14923 + }, + { + "epoch": 0.29875634962340164, + "grad_norm": 1.2838034629821777, + "learning_rate": 8.22248391136262e-06, + "loss": 0.3221, + "step": 14924 + }, + { + "epoch": 0.29877636814053, + "grad_norm": 1.1144299507141113, + "learning_rate": 8.222236031988429e-06, + "loss": 0.3268, + "step": 14925 + }, + { + "epoch": 0.29879638665765834, + "grad_norm": 1.1207014322280884, + "learning_rate": 8.221988139068648e-06, + "loss": 0.3101, + "step": 14926 + }, + { + "epoch": 0.2988164051747867, + "grad_norm": 1.0513153076171875, + "learning_rate": 8.221740232604325e-06, + "loss": 0.2989, + "step": 14927 + }, + { + "epoch": 0.29883642369191504, + "grad_norm": 1.2260563373565674, + "learning_rate": 8.221492312596497e-06, + "loss": 0.3093, + "step": 14928 + }, + { + "epoch": 0.29885644220904334, + "grad_norm": 1.1921638250350952, + "learning_rate": 8.221244379046211e-06, + "loss": 0.3895, + "step": 14929 + }, + { + "epoch": 0.2988764607261717, + "grad_norm": 1.095168948173523, + "learning_rate": 8.220996431954506e-06, + "loss": 0.3103, + "step": 14930 + }, + { + "epoch": 0.29889647924330004, + "grad_norm": 1.137195348739624, + "learning_rate": 8.220748471322426e-06, + "loss": 0.3167, + "step": 14931 + }, + { + "epoch": 0.2989164977604284, + "grad_norm": 1.9468457698822021, + "learning_rate": 8.22050049715101e-06, + "loss": 0.8438, + "step": 14932 + }, + { + "epoch": 0.29893651627755674, + "grad_norm": 1.347656011581421, + "learning_rate": 8.220252509441304e-06, + "loss": 0.3737, + "step": 14933 + }, + { + "epoch": 0.2989565347946851, + "grad_norm": 1.1333963871002197, + "learning_rate": 8.22000450819435e-06, + "loss": 0.3395, + "step": 14934 + }, + { + "epoch": 0.29897655331181344, + "grad_norm": 1.1642587184906006, + "learning_rate": 8.219756493411191e-06, + "loss": 0.3272, + "step": 14935 + }, + { + "epoch": 0.2989965718289418, + "grad_norm": 1.1349180936813354, + "learning_rate": 8.219508465092867e-06, + "loss": 0.2946, + "step": 14936 + }, + { + "epoch": 0.2990165903460701, + "grad_norm": 1.0896944999694824, + "learning_rate": 8.219260423240422e-06, + "loss": 0.2694, + "step": 14937 + }, + { + "epoch": 0.29903660886319844, + "grad_norm": 1.1017934083938599, + "learning_rate": 8.219012367854901e-06, + "loss": 0.3486, + "step": 14938 + }, + { + "epoch": 0.2990566273803268, + "grad_norm": 1.056747317314148, + "learning_rate": 8.218764298937343e-06, + "loss": 0.3321, + "step": 14939 + }, + { + "epoch": 0.29907664589745514, + "grad_norm": 1.908369541168213, + "learning_rate": 8.218516216488794e-06, + "loss": 0.816, + "step": 14940 + }, + { + "epoch": 0.2990966644145835, + "grad_norm": 1.147047996520996, + "learning_rate": 8.218268120510294e-06, + "loss": 0.3632, + "step": 14941 + }, + { + "epoch": 0.29911668293171184, + "grad_norm": 1.2017822265625, + "learning_rate": 8.218020011002888e-06, + "loss": 0.3075, + "step": 14942 + }, + { + "epoch": 0.2991367014488402, + "grad_norm": 1.1744787693023682, + "learning_rate": 8.217771887967619e-06, + "loss": 0.3192, + "step": 14943 + }, + { + "epoch": 0.29915671996596854, + "grad_norm": 1.1511595249176025, + "learning_rate": 8.217523751405528e-06, + "loss": 0.3416, + "step": 14944 + }, + { + "epoch": 0.29917673848309684, + "grad_norm": 1.5113024711608887, + "learning_rate": 8.217275601317661e-06, + "loss": 0.3289, + "step": 14945 + }, + { + "epoch": 0.2991967570002252, + "grad_norm": 1.0864267349243164, + "learning_rate": 8.217027437705058e-06, + "loss": 0.3, + "step": 14946 + }, + { + "epoch": 0.29921677551735354, + "grad_norm": 1.214154839515686, + "learning_rate": 8.216779260568766e-06, + "loss": 0.2747, + "step": 14947 + }, + { + "epoch": 0.2992367940344819, + "grad_norm": 1.1890021562576294, + "learning_rate": 8.216531069909825e-06, + "loss": 0.3148, + "step": 14948 + }, + { + "epoch": 0.29925681255161024, + "grad_norm": 1.0060054063796997, + "learning_rate": 8.21628286572928e-06, + "loss": 0.3169, + "step": 14949 + }, + { + "epoch": 0.2992768310687386, + "grad_norm": 1.1143771409988403, + "learning_rate": 8.216034648028174e-06, + "loss": 0.3132, + "step": 14950 + }, + { + "epoch": 0.29929684958586694, + "grad_norm": 1.1141353845596313, + "learning_rate": 8.215786416807551e-06, + "loss": 0.3272, + "step": 14951 + }, + { + "epoch": 0.2993168681029953, + "grad_norm": 1.0060454607009888, + "learning_rate": 8.215538172068453e-06, + "loss": 0.3167, + "step": 14952 + }, + { + "epoch": 0.2993368866201236, + "grad_norm": 1.067447543144226, + "learning_rate": 8.215289913811924e-06, + "loss": 0.3013, + "step": 14953 + }, + { + "epoch": 0.29935690513725194, + "grad_norm": 1.0780901908874512, + "learning_rate": 8.21504164203901e-06, + "loss": 0.2802, + "step": 14954 + }, + { + "epoch": 0.2993769236543803, + "grad_norm": 1.0972516536712646, + "learning_rate": 8.214793356750752e-06, + "loss": 0.2951, + "step": 14955 + }, + { + "epoch": 0.29939694217150864, + "grad_norm": 1.100191354751587, + "learning_rate": 8.214545057948193e-06, + "loss": 0.2998, + "step": 14956 + }, + { + "epoch": 0.299416960688637, + "grad_norm": 2.0418620109558105, + "learning_rate": 8.21429674563238e-06, + "loss": 0.8305, + "step": 14957 + }, + { + "epoch": 0.29943697920576534, + "grad_norm": 0.9774419069290161, + "learning_rate": 8.214048419804354e-06, + "loss": 0.3014, + "step": 14958 + }, + { + "epoch": 0.2994569977228937, + "grad_norm": 1.9742001295089722, + "learning_rate": 8.213800080465161e-06, + "loss": 0.8127, + "step": 14959 + }, + { + "epoch": 0.29947701624002204, + "grad_norm": 1.1395071744918823, + "learning_rate": 8.213551727615845e-06, + "loss": 0.3477, + "step": 14960 + }, + { + "epoch": 0.29949703475715034, + "grad_norm": 1.166678547859192, + "learning_rate": 8.21330336125745e-06, + "loss": 0.311, + "step": 14961 + }, + { + "epoch": 0.2995170532742787, + "grad_norm": 1.1171863079071045, + "learning_rate": 8.213054981391016e-06, + "loss": 0.387, + "step": 14962 + }, + { + "epoch": 0.29953707179140704, + "grad_norm": 1.0896588563919067, + "learning_rate": 8.21280658801759e-06, + "loss": 0.3051, + "step": 14963 + }, + { + "epoch": 0.2995570903085354, + "grad_norm": 1.2293474674224854, + "learning_rate": 8.21255818113822e-06, + "loss": 0.3225, + "step": 14964 + }, + { + "epoch": 0.29957710882566374, + "grad_norm": 1.078285813331604, + "learning_rate": 8.212309760753945e-06, + "loss": 0.2914, + "step": 14965 + }, + { + "epoch": 0.2995971273427921, + "grad_norm": 1.001785159111023, + "learning_rate": 8.21206132686581e-06, + "loss": 0.2452, + "step": 14966 + }, + { + "epoch": 0.29961714585992044, + "grad_norm": 1.1796958446502686, + "learning_rate": 8.211812879474862e-06, + "loss": 0.3076, + "step": 14967 + }, + { + "epoch": 0.2996371643770488, + "grad_norm": 0.9747113585472107, + "learning_rate": 8.211564418582143e-06, + "loss": 0.2623, + "step": 14968 + }, + { + "epoch": 0.2996571828941771, + "grad_norm": 1.150192379951477, + "learning_rate": 8.211315944188698e-06, + "loss": 0.349, + "step": 14969 + }, + { + "epoch": 0.29967720141130544, + "grad_norm": 1.2416398525238037, + "learning_rate": 8.211067456295574e-06, + "loss": 0.3561, + "step": 14970 + }, + { + "epoch": 0.2996972199284338, + "grad_norm": 1.1417768001556396, + "learning_rate": 8.210818954903811e-06, + "loss": 0.3377, + "step": 14971 + }, + { + "epoch": 0.29971723844556214, + "grad_norm": 1.2253996133804321, + "learning_rate": 8.210570440014456e-06, + "loss": 0.2996, + "step": 14972 + }, + { + "epoch": 0.2997372569626905, + "grad_norm": 1.0949944257736206, + "learning_rate": 8.210321911628556e-06, + "loss": 0.3475, + "step": 14973 + }, + { + "epoch": 0.29975727547981884, + "grad_norm": 1.158990502357483, + "learning_rate": 8.210073369747153e-06, + "loss": 0.3747, + "step": 14974 + }, + { + "epoch": 0.2997772939969472, + "grad_norm": 0.9624007344245911, + "learning_rate": 8.20982481437129e-06, + "loss": 0.2835, + "step": 14975 + }, + { + "epoch": 0.29979731251407554, + "grad_norm": 1.0223915576934814, + "learning_rate": 8.209576245502018e-06, + "loss": 0.279, + "step": 14976 + }, + { + "epoch": 0.29981733103120384, + "grad_norm": 1.050252079963684, + "learning_rate": 8.209327663140375e-06, + "loss": 0.3079, + "step": 14977 + }, + { + "epoch": 0.2998373495483322, + "grad_norm": 1.0646908283233643, + "learning_rate": 8.209079067287411e-06, + "loss": 0.3237, + "step": 14978 + }, + { + "epoch": 0.29985736806546054, + "grad_norm": 0.9930159449577332, + "learning_rate": 8.208830457944167e-06, + "loss": 0.2822, + "step": 14979 + }, + { + "epoch": 0.2998773865825889, + "grad_norm": 1.194489598274231, + "learning_rate": 8.208581835111693e-06, + "loss": 0.3167, + "step": 14980 + }, + { + "epoch": 0.29989740509971724, + "grad_norm": 1.174597144126892, + "learning_rate": 8.208333198791029e-06, + "loss": 0.324, + "step": 14981 + }, + { + "epoch": 0.2999174236168456, + "grad_norm": 1.1016311645507812, + "learning_rate": 8.208084548983225e-06, + "loss": 0.3741, + "step": 14982 + }, + { + "epoch": 0.29993744213397394, + "grad_norm": 1.284650206565857, + "learning_rate": 8.207835885689322e-06, + "loss": 0.277, + "step": 14983 + }, + { + "epoch": 0.2999574606511023, + "grad_norm": 0.9860815405845642, + "learning_rate": 8.207587208910368e-06, + "loss": 0.2966, + "step": 14984 + }, + { + "epoch": 0.2999774791682306, + "grad_norm": 1.1457082033157349, + "learning_rate": 8.20733851864741e-06, + "loss": 0.303, + "step": 14985 + }, + { + "epoch": 0.29999749768535894, + "grad_norm": 1.0373681783676147, + "learning_rate": 8.207089814901487e-06, + "loss": 0.3157, + "step": 14986 + }, + { + "epoch": 0.3000175162024873, + "grad_norm": 1.0885767936706543, + "learning_rate": 8.20684109767365e-06, + "loss": 0.325, + "step": 14987 + }, + { + "epoch": 0.30003753471961564, + "grad_norm": 1.136519432067871, + "learning_rate": 8.206592366964946e-06, + "loss": 0.335, + "step": 14988 + }, + { + "epoch": 0.300057553236744, + "grad_norm": 1.8984935283660889, + "learning_rate": 8.206343622776413e-06, + "loss": 0.958, + "step": 14989 + }, + { + "epoch": 0.30007757175387234, + "grad_norm": 1.102573037147522, + "learning_rate": 8.206094865109104e-06, + "loss": 0.331, + "step": 14990 + }, + { + "epoch": 0.3000975902710007, + "grad_norm": 1.0796674489974976, + "learning_rate": 8.205846093964063e-06, + "loss": 0.2939, + "step": 14991 + }, + { + "epoch": 0.30011760878812904, + "grad_norm": 1.04366135597229, + "learning_rate": 8.205597309342333e-06, + "loss": 0.3362, + "step": 14992 + }, + { + "epoch": 0.30013762730525734, + "grad_norm": 0.9952346086502075, + "learning_rate": 8.205348511244964e-06, + "loss": 0.2718, + "step": 14993 + }, + { + "epoch": 0.3001576458223857, + "grad_norm": 1.1507790088653564, + "learning_rate": 8.205099699672998e-06, + "loss": 0.3149, + "step": 14994 + }, + { + "epoch": 0.30017766433951404, + "grad_norm": 1.1646443605422974, + "learning_rate": 8.204850874627483e-06, + "loss": 0.3419, + "step": 14995 + }, + { + "epoch": 0.3001976828566424, + "grad_norm": 1.390168309211731, + "learning_rate": 8.204602036109465e-06, + "loss": 0.3176, + "step": 14996 + }, + { + "epoch": 0.30021770137377074, + "grad_norm": 1.1161447763442993, + "learning_rate": 8.20435318411999e-06, + "loss": 0.3198, + "step": 14997 + }, + { + "epoch": 0.3002377198908991, + "grad_norm": 1.136250376701355, + "learning_rate": 8.204104318660103e-06, + "loss": 0.342, + "step": 14998 + }, + { + "epoch": 0.30025773840802744, + "grad_norm": 1.1986892223358154, + "learning_rate": 8.203855439730849e-06, + "loss": 0.3887, + "step": 14999 + }, + { + "epoch": 0.3002777569251558, + "grad_norm": 1.1450556516647339, + "learning_rate": 8.203606547333278e-06, + "loss": 0.3481, + "step": 15000 + }, + { + "epoch": 0.3002977754422841, + "grad_norm": 1.2532100677490234, + "learning_rate": 8.203357641468435e-06, + "loss": 0.3401, + "step": 15001 + }, + { + "epoch": 0.30031779395941244, + "grad_norm": 1.285403847694397, + "learning_rate": 8.203108722137365e-06, + "loss": 0.3689, + "step": 15002 + }, + { + "epoch": 0.3003378124765408, + "grad_norm": 2.0593628883361816, + "learning_rate": 8.202859789341115e-06, + "loss": 0.7693, + "step": 15003 + }, + { + "epoch": 0.30035783099366914, + "grad_norm": 1.117128610610962, + "learning_rate": 8.202610843080735e-06, + "loss": 0.334, + "step": 15004 + }, + { + "epoch": 0.3003778495107975, + "grad_norm": 1.0920188426971436, + "learning_rate": 8.202361883357265e-06, + "loss": 0.3137, + "step": 15005 + }, + { + "epoch": 0.30039786802792584, + "grad_norm": 1.338325023651123, + "learning_rate": 8.202112910171756e-06, + "loss": 0.3247, + "step": 15006 + }, + { + "epoch": 0.3004178865450542, + "grad_norm": 1.1314741373062134, + "learning_rate": 8.201863923525251e-06, + "loss": 0.2643, + "step": 15007 + }, + { + "epoch": 0.30043790506218254, + "grad_norm": 1.09135103225708, + "learning_rate": 8.201614923418801e-06, + "loss": 0.3174, + "step": 15008 + }, + { + "epoch": 0.30045792357931084, + "grad_norm": 1.0515512228012085, + "learning_rate": 8.20136590985345e-06, + "loss": 0.3371, + "step": 15009 + }, + { + "epoch": 0.3004779420964392, + "grad_norm": 1.8807672262191772, + "learning_rate": 8.201116882830247e-06, + "loss": 0.8444, + "step": 15010 + }, + { + "epoch": 0.30049796061356754, + "grad_norm": 1.1825355291366577, + "learning_rate": 8.200867842350239e-06, + "loss": 0.3136, + "step": 15011 + }, + { + "epoch": 0.3005179791306959, + "grad_norm": 1.0341142416000366, + "learning_rate": 8.20061878841447e-06, + "loss": 0.3066, + "step": 15012 + }, + { + "epoch": 0.30053799764782424, + "grad_norm": 1.0219651460647583, + "learning_rate": 8.200369721023986e-06, + "loss": 0.3244, + "step": 15013 + }, + { + "epoch": 0.3005580161649526, + "grad_norm": 0.9998983144760132, + "learning_rate": 8.200120640179839e-06, + "loss": 0.2846, + "step": 15014 + }, + { + "epoch": 0.30057803468208094, + "grad_norm": 1.0768464803695679, + "learning_rate": 8.199871545883073e-06, + "loss": 0.2988, + "step": 15015 + }, + { + "epoch": 0.3005980531992093, + "grad_norm": 1.114870548248291, + "learning_rate": 8.199622438134736e-06, + "loss": 0.3073, + "step": 15016 + }, + { + "epoch": 0.3006180717163376, + "grad_norm": 1.0119684934616089, + "learning_rate": 8.199373316935874e-06, + "loss": 0.3029, + "step": 15017 + }, + { + "epoch": 0.30063809023346594, + "grad_norm": 1.0723685026168823, + "learning_rate": 8.199124182287534e-06, + "loss": 0.3447, + "step": 15018 + }, + { + "epoch": 0.3006581087505943, + "grad_norm": 1.1345977783203125, + "learning_rate": 8.198875034190768e-06, + "loss": 0.3149, + "step": 15019 + }, + { + "epoch": 0.30067812726772264, + "grad_norm": 1.0648833513259888, + "learning_rate": 8.198625872646617e-06, + "loss": 0.3085, + "step": 15020 + }, + { + "epoch": 0.300698145784851, + "grad_norm": 1.2358074188232422, + "learning_rate": 8.198376697656131e-06, + "loss": 0.3236, + "step": 15021 + }, + { + "epoch": 0.30071816430197934, + "grad_norm": 1.1781455278396606, + "learning_rate": 8.198127509220359e-06, + "loss": 0.3202, + "step": 15022 + }, + { + "epoch": 0.3007381828191077, + "grad_norm": 1.1569726467132568, + "learning_rate": 8.197878307340346e-06, + "loss": 0.3068, + "step": 15023 + }, + { + "epoch": 0.30075820133623604, + "grad_norm": 1.1411590576171875, + "learning_rate": 8.197629092017142e-06, + "loss": 0.3428, + "step": 15024 + }, + { + "epoch": 0.30077821985336434, + "grad_norm": 1.1875271797180176, + "learning_rate": 8.197379863251792e-06, + "loss": 0.3782, + "step": 15025 + }, + { + "epoch": 0.3007982383704927, + "grad_norm": 1.898052453994751, + "learning_rate": 8.197130621045347e-06, + "loss": 0.8478, + "step": 15026 + }, + { + "epoch": 0.30081825688762104, + "grad_norm": 1.0547538995742798, + "learning_rate": 8.19688136539885e-06, + "loss": 0.269, + "step": 15027 + }, + { + "epoch": 0.3008382754047494, + "grad_norm": 1.8676090240478516, + "learning_rate": 8.196632096313355e-06, + "loss": 0.7811, + "step": 15028 + }, + { + "epoch": 0.30085829392187774, + "grad_norm": 1.0944390296936035, + "learning_rate": 8.196382813789905e-06, + "loss": 0.3497, + "step": 15029 + }, + { + "epoch": 0.3008783124390061, + "grad_norm": 1.1594077348709106, + "learning_rate": 8.196133517829548e-06, + "loss": 0.3576, + "step": 15030 + }, + { + "epoch": 0.30089833095613444, + "grad_norm": 1.0493195056915283, + "learning_rate": 8.195884208433336e-06, + "loss": 0.3437, + "step": 15031 + }, + { + "epoch": 0.3009183494732628, + "grad_norm": 1.101406216621399, + "learning_rate": 8.195634885602314e-06, + "loss": 0.4147, + "step": 15032 + }, + { + "epoch": 0.3009383679903911, + "grad_norm": 1.0976346731185913, + "learning_rate": 8.19538554933753e-06, + "loss": 0.3315, + "step": 15033 + }, + { + "epoch": 0.30095838650751944, + "grad_norm": 1.2604753971099854, + "learning_rate": 8.195136199640032e-06, + "loss": 0.378, + "step": 15034 + }, + { + "epoch": 0.3009784050246478, + "grad_norm": 1.9393459558486938, + "learning_rate": 8.194886836510872e-06, + "loss": 0.8588, + "step": 15035 + }, + { + "epoch": 0.30099842354177614, + "grad_norm": 1.1931782960891724, + "learning_rate": 8.194637459951092e-06, + "loss": 0.3463, + "step": 15036 + }, + { + "epoch": 0.3010184420589045, + "grad_norm": 1.7737780809402466, + "learning_rate": 8.194388069961746e-06, + "loss": 0.9405, + "step": 15037 + }, + { + "epoch": 0.30103846057603284, + "grad_norm": 1.1535353660583496, + "learning_rate": 8.19413866654388e-06, + "loss": 0.3279, + "step": 15038 + }, + { + "epoch": 0.3010584790931612, + "grad_norm": 1.0626378059387207, + "learning_rate": 8.193889249698541e-06, + "loss": 0.2838, + "step": 15039 + }, + { + "epoch": 0.30107849761028954, + "grad_norm": 1.180128574371338, + "learning_rate": 8.19363981942678e-06, + "loss": 0.3693, + "step": 15040 + }, + { + "epoch": 0.30109851612741784, + "grad_norm": 1.7195792198181152, + "learning_rate": 8.193390375729642e-06, + "loss": 0.8189, + "step": 15041 + }, + { + "epoch": 0.3011185346445462, + "grad_norm": 1.3278322219848633, + "learning_rate": 8.19314091860818e-06, + "loss": 0.3167, + "step": 15042 + }, + { + "epoch": 0.30113855316167454, + "grad_norm": 1.0742764472961426, + "learning_rate": 8.192891448063442e-06, + "loss": 0.3003, + "step": 15043 + }, + { + "epoch": 0.3011585716788029, + "grad_norm": 1.0180466175079346, + "learning_rate": 8.192641964096476e-06, + "loss": 0.2929, + "step": 15044 + }, + { + "epoch": 0.30117859019593124, + "grad_norm": 1.061739444732666, + "learning_rate": 8.192392466708326e-06, + "loss": 0.3427, + "step": 15045 + }, + { + "epoch": 0.3011986087130596, + "grad_norm": 1.0824004411697388, + "learning_rate": 8.19214295590005e-06, + "loss": 0.2989, + "step": 15046 + }, + { + "epoch": 0.30121862723018794, + "grad_norm": 1.0840507745742798, + "learning_rate": 8.191893431672689e-06, + "loss": 0.318, + "step": 15047 + }, + { + "epoch": 0.3012386457473163, + "grad_norm": 1.1295427083969116, + "learning_rate": 8.191643894027295e-06, + "loss": 0.2891, + "step": 15048 + }, + { + "epoch": 0.3012586642644446, + "grad_norm": 1.073831558227539, + "learning_rate": 8.191394342964917e-06, + "loss": 0.2994, + "step": 15049 + }, + { + "epoch": 0.30127868278157294, + "grad_norm": 1.2334944009780884, + "learning_rate": 8.191144778486603e-06, + "loss": 0.3461, + "step": 15050 + }, + { + "epoch": 0.3012987012987013, + "grad_norm": 1.0772027969360352, + "learning_rate": 8.190895200593405e-06, + "loss": 0.312, + "step": 15051 + }, + { + "epoch": 0.30131871981582964, + "grad_norm": 1.049338698387146, + "learning_rate": 8.19064560928637e-06, + "loss": 0.3368, + "step": 15052 + }, + { + "epoch": 0.301338738332958, + "grad_norm": 1.221172571182251, + "learning_rate": 8.190396004566547e-06, + "loss": 0.3335, + "step": 15053 + }, + { + "epoch": 0.30135875685008634, + "grad_norm": 1.983237385749817, + "learning_rate": 8.190146386434985e-06, + "loss": 0.8233, + "step": 15054 + }, + { + "epoch": 0.3013787753672147, + "grad_norm": 1.0945470333099365, + "learning_rate": 8.189896754892737e-06, + "loss": 0.274, + "step": 15055 + }, + { + "epoch": 0.30139879388434304, + "grad_norm": 1.0355585813522339, + "learning_rate": 8.189647109940845e-06, + "loss": 0.3346, + "step": 15056 + }, + { + "epoch": 0.30141881240147134, + "grad_norm": 1.072586178779602, + "learning_rate": 8.189397451580368e-06, + "loss": 0.2833, + "step": 15057 + }, + { + "epoch": 0.3014388309185997, + "grad_norm": 1.0887912511825562, + "learning_rate": 8.189147779812348e-06, + "loss": 0.2864, + "step": 15058 + }, + { + "epoch": 0.30145884943572804, + "grad_norm": 1.1414779424667358, + "learning_rate": 8.188898094637836e-06, + "loss": 0.2982, + "step": 15059 + }, + { + "epoch": 0.3014788679528564, + "grad_norm": 1.0657583475112915, + "learning_rate": 8.188648396057884e-06, + "loss": 0.3144, + "step": 15060 + }, + { + "epoch": 0.30149888646998474, + "grad_norm": 1.1630128622055054, + "learning_rate": 8.18839868407354e-06, + "loss": 0.3461, + "step": 15061 + }, + { + "epoch": 0.3015189049871131, + "grad_norm": 1.1409486532211304, + "learning_rate": 8.188148958685854e-06, + "loss": 0.3637, + "step": 15062 + }, + { + "epoch": 0.30153892350424144, + "grad_norm": 1.106844425201416, + "learning_rate": 8.187899219895876e-06, + "loss": 0.3198, + "step": 15063 + }, + { + "epoch": 0.3015589420213698, + "grad_norm": 1.1363731622695923, + "learning_rate": 8.187649467704656e-06, + "loss": 0.3254, + "step": 15064 + }, + { + "epoch": 0.3015789605384981, + "grad_norm": 1.1042163372039795, + "learning_rate": 8.187399702113243e-06, + "loss": 0.3753, + "step": 15065 + }, + { + "epoch": 0.30159897905562644, + "grad_norm": 1.0037490129470825, + "learning_rate": 8.187149923122688e-06, + "loss": 0.255, + "step": 15066 + }, + { + "epoch": 0.3016189975727548, + "grad_norm": 1.1430325508117676, + "learning_rate": 8.186900130734041e-06, + "loss": 0.2949, + "step": 15067 + }, + { + "epoch": 0.30163901608988314, + "grad_norm": 1.0170925855636597, + "learning_rate": 8.186650324948351e-06, + "loss": 0.3223, + "step": 15068 + }, + { + "epoch": 0.3016590346070115, + "grad_norm": 1.1335824728012085, + "learning_rate": 8.18640050576667e-06, + "loss": 0.2939, + "step": 15069 + }, + { + "epoch": 0.30167905312413984, + "grad_norm": 1.1758626699447632, + "learning_rate": 8.186150673190046e-06, + "loss": 0.3089, + "step": 15070 + }, + { + "epoch": 0.3016990716412682, + "grad_norm": 1.2216249704360962, + "learning_rate": 8.185900827219531e-06, + "loss": 0.3179, + "step": 15071 + }, + { + "epoch": 0.30171909015839654, + "grad_norm": 1.0470175743103027, + "learning_rate": 8.185650967856173e-06, + "loss": 0.3261, + "step": 15072 + }, + { + "epoch": 0.30173910867552484, + "grad_norm": 1.0673500299453735, + "learning_rate": 8.185401095101025e-06, + "loss": 0.3023, + "step": 15073 + }, + { + "epoch": 0.3017591271926532, + "grad_norm": 1.087443232536316, + "learning_rate": 8.185151208955136e-06, + "loss": 0.3293, + "step": 15074 + }, + { + "epoch": 0.30177914570978154, + "grad_norm": 1.805654525756836, + "learning_rate": 8.184901309419556e-06, + "loss": 0.7863, + "step": 15075 + }, + { + "epoch": 0.3017991642269099, + "grad_norm": 1.1366721391677856, + "learning_rate": 8.184651396495337e-06, + "loss": 0.3612, + "step": 15076 + }, + { + "epoch": 0.30181918274403824, + "grad_norm": 1.1621944904327393, + "learning_rate": 8.184401470183529e-06, + "loss": 0.3485, + "step": 15077 + }, + { + "epoch": 0.3018392012611666, + "grad_norm": 1.0500383377075195, + "learning_rate": 8.184151530485183e-06, + "loss": 0.348, + "step": 15078 + }, + { + "epoch": 0.30185921977829494, + "grad_norm": 1.0300028324127197, + "learning_rate": 8.18390157740135e-06, + "loss": 0.2826, + "step": 15079 + }, + { + "epoch": 0.3018792382954233, + "grad_norm": 1.0824241638183594, + "learning_rate": 8.183651610933076e-06, + "loss": 0.3212, + "step": 15080 + }, + { + "epoch": 0.3018992568125516, + "grad_norm": 1.1731010675430298, + "learning_rate": 8.183401631081421e-06, + "loss": 0.3056, + "step": 15081 + }, + { + "epoch": 0.30191927532967994, + "grad_norm": 1.150370717048645, + "learning_rate": 8.183151637847426e-06, + "loss": 0.3565, + "step": 15082 + }, + { + "epoch": 0.3019392938468083, + "grad_norm": 1.0188394784927368, + "learning_rate": 8.18290163123215e-06, + "loss": 0.3415, + "step": 15083 + }, + { + "epoch": 0.30195931236393664, + "grad_norm": 1.085593819618225, + "learning_rate": 8.182651611236638e-06, + "loss": 0.2887, + "step": 15084 + }, + { + "epoch": 0.301979330881065, + "grad_norm": 1.289753794670105, + "learning_rate": 8.182401577861944e-06, + "loss": 0.4001, + "step": 15085 + }, + { + "epoch": 0.30199934939819334, + "grad_norm": 1.1274136304855347, + "learning_rate": 8.18215153110912e-06, + "loss": 0.3547, + "step": 15086 + }, + { + "epoch": 0.3020193679153217, + "grad_norm": 1.230702519416809, + "learning_rate": 8.181901470979214e-06, + "loss": 0.3309, + "step": 15087 + }, + { + "epoch": 0.30203938643245004, + "grad_norm": 1.0231045484542847, + "learning_rate": 8.181651397473281e-06, + "loss": 0.319, + "step": 15088 + }, + { + "epoch": 0.30205940494957834, + "grad_norm": 1.0660566091537476, + "learning_rate": 8.181401310592369e-06, + "loss": 0.338, + "step": 15089 + }, + { + "epoch": 0.3020794234667067, + "grad_norm": 1.0899447202682495, + "learning_rate": 8.18115121033753e-06, + "loss": 0.3184, + "step": 15090 + }, + { + "epoch": 0.30209944198383504, + "grad_norm": 1.0085901021957397, + "learning_rate": 8.180901096709818e-06, + "loss": 0.3219, + "step": 15091 + }, + { + "epoch": 0.3021194605009634, + "grad_norm": 1.2444013357162476, + "learning_rate": 8.18065096971028e-06, + "loss": 0.343, + "step": 15092 + }, + { + "epoch": 0.30213947901809174, + "grad_norm": 1.1856069564819336, + "learning_rate": 8.180400829339971e-06, + "loss": 0.3199, + "step": 15093 + }, + { + "epoch": 0.3021594975352201, + "grad_norm": 1.9171521663665771, + "learning_rate": 8.180150675599942e-06, + "loss": 0.7924, + "step": 15094 + }, + { + "epoch": 0.30217951605234844, + "grad_norm": 1.199375867843628, + "learning_rate": 8.179900508491243e-06, + "loss": 0.3327, + "step": 15095 + }, + { + "epoch": 0.3021995345694768, + "grad_norm": 1.1587470769882202, + "learning_rate": 8.179650328014926e-06, + "loss": 0.3285, + "step": 15096 + }, + { + "epoch": 0.3022195530866051, + "grad_norm": 0.9801636934280396, + "learning_rate": 8.179400134172045e-06, + "loss": 0.3169, + "step": 15097 + }, + { + "epoch": 0.30223957160373344, + "grad_norm": 1.0310862064361572, + "learning_rate": 8.179149926963648e-06, + "loss": 0.3459, + "step": 15098 + }, + { + "epoch": 0.3022595901208618, + "grad_norm": 1.0274633169174194, + "learning_rate": 8.17889970639079e-06, + "loss": 0.3213, + "step": 15099 + }, + { + "epoch": 0.30227960863799014, + "grad_norm": 1.2910553216934204, + "learning_rate": 8.178649472454521e-06, + "loss": 0.3543, + "step": 15100 + }, + { + "epoch": 0.3022996271551185, + "grad_norm": 1.0799967050552368, + "learning_rate": 8.178399225155896e-06, + "loss": 0.3027, + "step": 15101 + }, + { + "epoch": 0.30231964567224684, + "grad_norm": 1.8053348064422607, + "learning_rate": 8.178148964495961e-06, + "loss": 0.8728, + "step": 15102 + }, + { + "epoch": 0.3023396641893752, + "grad_norm": 1.154707670211792, + "learning_rate": 8.177898690475774e-06, + "loss": 0.3188, + "step": 15103 + }, + { + "epoch": 0.30235968270650354, + "grad_norm": 1.0172117948532104, + "learning_rate": 8.177648403096385e-06, + "loss": 0.3036, + "step": 15104 + }, + { + "epoch": 0.30237970122363184, + "grad_norm": 1.1426186561584473, + "learning_rate": 8.177398102358845e-06, + "loss": 0.3532, + "step": 15105 + }, + { + "epoch": 0.3023997197407602, + "grad_norm": 0.9880574941635132, + "learning_rate": 8.177147788264208e-06, + "loss": 0.3554, + "step": 15106 + }, + { + "epoch": 0.30241973825788854, + "grad_norm": 1.8488341569900513, + "learning_rate": 8.176897460813524e-06, + "loss": 0.8221, + "step": 15107 + }, + { + "epoch": 0.3024397567750169, + "grad_norm": 1.1103962659835815, + "learning_rate": 8.176647120007847e-06, + "loss": 0.3651, + "step": 15108 + }, + { + "epoch": 0.30245977529214524, + "grad_norm": 1.0459752082824707, + "learning_rate": 8.176396765848232e-06, + "loss": 0.2766, + "step": 15109 + }, + { + "epoch": 0.3024797938092736, + "grad_norm": 1.1725658178329468, + "learning_rate": 8.176146398335724e-06, + "loss": 0.3263, + "step": 15110 + }, + { + "epoch": 0.30249981232640194, + "grad_norm": 1.0737781524658203, + "learning_rate": 8.175896017471386e-06, + "loss": 0.3128, + "step": 15111 + }, + { + "epoch": 0.3025198308435303, + "grad_norm": 1.2866820096969604, + "learning_rate": 8.175645623256258e-06, + "loss": 0.3465, + "step": 15112 + }, + { + "epoch": 0.3025398493606586, + "grad_norm": 1.2101144790649414, + "learning_rate": 8.175395215691405e-06, + "loss": 0.3058, + "step": 15113 + }, + { + "epoch": 0.30255986787778694, + "grad_norm": 1.2586017847061157, + "learning_rate": 8.17514479477787e-06, + "loss": 0.3481, + "step": 15114 + }, + { + "epoch": 0.3025798863949153, + "grad_norm": 1.1322362422943115, + "learning_rate": 8.174894360516711e-06, + "loss": 0.3005, + "step": 15115 + }, + { + "epoch": 0.30259990491204364, + "grad_norm": 1.8822364807128906, + "learning_rate": 8.174643912908977e-06, + "loss": 0.8139, + "step": 15116 + }, + { + "epoch": 0.302619923429172, + "grad_norm": 1.085636854171753, + "learning_rate": 8.174393451955727e-06, + "loss": 0.2982, + "step": 15117 + }, + { + "epoch": 0.30263994194630034, + "grad_norm": 1.8985328674316406, + "learning_rate": 8.174142977658008e-06, + "loss": 0.8646, + "step": 15118 + }, + { + "epoch": 0.3026599604634287, + "grad_norm": 1.002267599105835, + "learning_rate": 8.173892490016874e-06, + "loss": 0.2817, + "step": 15119 + }, + { + "epoch": 0.30267997898055704, + "grad_norm": 1.1391421556472778, + "learning_rate": 8.173641989033382e-06, + "loss": 0.34, + "step": 15120 + }, + { + "epoch": 0.30269999749768534, + "grad_norm": 1.2047981023788452, + "learning_rate": 8.173391474708581e-06, + "loss": 0.378, + "step": 15121 + }, + { + "epoch": 0.3027200160148137, + "grad_norm": 1.0890799760818481, + "learning_rate": 8.173140947043522e-06, + "loss": 0.3242, + "step": 15122 + }, + { + "epoch": 0.30274003453194204, + "grad_norm": 1.1769684553146362, + "learning_rate": 8.172890406039264e-06, + "loss": 0.3357, + "step": 15123 + }, + { + "epoch": 0.3027600530490704, + "grad_norm": 1.2325226068496704, + "learning_rate": 8.17263985169686e-06, + "loss": 0.3137, + "step": 15124 + }, + { + "epoch": 0.30278007156619874, + "grad_norm": 1.0195915699005127, + "learning_rate": 8.172389284017357e-06, + "loss": 0.2989, + "step": 15125 + }, + { + "epoch": 0.3028000900833271, + "grad_norm": 1.1397159099578857, + "learning_rate": 8.172138703001813e-06, + "loss": 0.3354, + "step": 15126 + }, + { + "epoch": 0.30282010860045544, + "grad_norm": 1.155626654624939, + "learning_rate": 8.17188810865128e-06, + "loss": 0.3211, + "step": 15127 + }, + { + "epoch": 0.3028401271175838, + "grad_norm": 1.1167101860046387, + "learning_rate": 8.171637500966814e-06, + "loss": 0.3308, + "step": 15128 + }, + { + "epoch": 0.3028601456347121, + "grad_norm": 1.1699684858322144, + "learning_rate": 8.171386879949465e-06, + "loss": 0.3275, + "step": 15129 + }, + { + "epoch": 0.30288016415184044, + "grad_norm": 1.1625577211380005, + "learning_rate": 8.171136245600289e-06, + "loss": 0.3184, + "step": 15130 + }, + { + "epoch": 0.3029001826689688, + "grad_norm": 1.454315185546875, + "learning_rate": 8.170885597920338e-06, + "loss": 0.3323, + "step": 15131 + }, + { + "epoch": 0.30292020118609714, + "grad_norm": 1.066390872001648, + "learning_rate": 8.170634936910667e-06, + "loss": 0.2967, + "step": 15132 + }, + { + "epoch": 0.3029402197032255, + "grad_norm": 1.1260565519332886, + "learning_rate": 8.170384262572328e-06, + "loss": 0.3153, + "step": 15133 + }, + { + "epoch": 0.30296023822035384, + "grad_norm": 1.095585823059082, + "learning_rate": 8.170133574906376e-06, + "loss": 0.3134, + "step": 15134 + }, + { + "epoch": 0.3029802567374822, + "grad_norm": 1.100920557975769, + "learning_rate": 8.169882873913865e-06, + "loss": 0.3184, + "step": 15135 + }, + { + "epoch": 0.30300027525461054, + "grad_norm": 1.2253128290176392, + "learning_rate": 8.169632159595849e-06, + "loss": 0.3012, + "step": 15136 + }, + { + "epoch": 0.30302029377173884, + "grad_norm": 1.0458921194076538, + "learning_rate": 8.16938143195338e-06, + "loss": 0.3138, + "step": 15137 + }, + { + "epoch": 0.3030403122888672, + "grad_norm": 1.1281386613845825, + "learning_rate": 8.169130690987516e-06, + "loss": 0.3123, + "step": 15138 + }, + { + "epoch": 0.30306033080599554, + "grad_norm": 1.2833935022354126, + "learning_rate": 8.168879936699306e-06, + "loss": 0.3513, + "step": 15139 + }, + { + "epoch": 0.3030803493231239, + "grad_norm": 1.0761473178863525, + "learning_rate": 8.168629169089809e-06, + "loss": 0.2948, + "step": 15140 + }, + { + "epoch": 0.30310036784025224, + "grad_norm": 1.1668068170547485, + "learning_rate": 8.168378388160076e-06, + "loss": 0.3339, + "step": 15141 + }, + { + "epoch": 0.3031203863573806, + "grad_norm": 1.213485598564148, + "learning_rate": 8.168127593911162e-06, + "loss": 0.3161, + "step": 15142 + }, + { + "epoch": 0.30314040487450894, + "grad_norm": 1.0591000318527222, + "learning_rate": 8.16787678634412e-06, + "loss": 0.3555, + "step": 15143 + }, + { + "epoch": 0.3031604233916373, + "grad_norm": 1.0861819982528687, + "learning_rate": 8.167625965460008e-06, + "loss": 0.3976, + "step": 15144 + }, + { + "epoch": 0.3031804419087656, + "grad_norm": 1.0566596984863281, + "learning_rate": 8.167375131259875e-06, + "loss": 0.3504, + "step": 15145 + }, + { + "epoch": 0.30320046042589394, + "grad_norm": 1.083483099937439, + "learning_rate": 8.167124283744783e-06, + "loss": 0.3035, + "step": 15146 + }, + { + "epoch": 0.3032204789430223, + "grad_norm": 1.035142183303833, + "learning_rate": 8.166873422915779e-06, + "loss": 0.3367, + "step": 15147 + }, + { + "epoch": 0.30324049746015064, + "grad_norm": 1.246084213256836, + "learning_rate": 8.166622548773921e-06, + "loss": 0.3193, + "step": 15148 + }, + { + "epoch": 0.303260515977279, + "grad_norm": 1.1911498308181763, + "learning_rate": 8.166371661320263e-06, + "loss": 0.4053, + "step": 15149 + }, + { + "epoch": 0.30328053449440734, + "grad_norm": 1.0199534893035889, + "learning_rate": 8.166120760555862e-06, + "loss": 0.2827, + "step": 15150 + }, + { + "epoch": 0.3033005530115357, + "grad_norm": 1.1188169717788696, + "learning_rate": 8.165869846481769e-06, + "loss": 0.2824, + "step": 15151 + }, + { + "epoch": 0.30332057152866404, + "grad_norm": 1.1464173793792725, + "learning_rate": 8.16561891909904e-06, + "loss": 0.322, + "step": 15152 + }, + { + "epoch": 0.30334059004579234, + "grad_norm": 1.1124825477600098, + "learning_rate": 8.165367978408731e-06, + "loss": 0.289, + "step": 15153 + }, + { + "epoch": 0.3033606085629207, + "grad_norm": 1.147545337677002, + "learning_rate": 8.165117024411897e-06, + "loss": 0.3149, + "step": 15154 + }, + { + "epoch": 0.30338062708004904, + "grad_norm": 1.1221500635147095, + "learning_rate": 8.164866057109591e-06, + "loss": 0.3539, + "step": 15155 + }, + { + "epoch": 0.3034006455971774, + "grad_norm": 1.2195379734039307, + "learning_rate": 8.16461507650287e-06, + "loss": 0.3478, + "step": 15156 + }, + { + "epoch": 0.30342066411430574, + "grad_norm": 1.9430075883865356, + "learning_rate": 8.164364082592788e-06, + "loss": 0.8142, + "step": 15157 + }, + { + "epoch": 0.3034406826314341, + "grad_norm": 0.9456197023391724, + "learning_rate": 8.1641130753804e-06, + "loss": 0.2795, + "step": 15158 + }, + { + "epoch": 0.30346070114856244, + "grad_norm": 0.987610936164856, + "learning_rate": 8.163862054866762e-06, + "loss": 0.3196, + "step": 15159 + }, + { + "epoch": 0.3034807196656908, + "grad_norm": 0.9736295938491821, + "learning_rate": 8.163611021052929e-06, + "loss": 0.3412, + "step": 15160 + }, + { + "epoch": 0.3035007381828191, + "grad_norm": 1.1713422536849976, + "learning_rate": 8.163359973939955e-06, + "loss": 0.3001, + "step": 15161 + }, + { + "epoch": 0.30352075669994744, + "grad_norm": 1.085254192352295, + "learning_rate": 8.163108913528898e-06, + "loss": 0.2954, + "step": 15162 + }, + { + "epoch": 0.3035407752170758, + "grad_norm": 1.2299044132232666, + "learning_rate": 8.16285783982081e-06, + "loss": 0.3518, + "step": 15163 + }, + { + "epoch": 0.30356079373420414, + "grad_norm": 1.1312791109085083, + "learning_rate": 8.16260675281675e-06, + "loss": 0.3425, + "step": 15164 + }, + { + "epoch": 0.3035808122513325, + "grad_norm": 1.0288242101669312, + "learning_rate": 8.16235565251777e-06, + "loss": 0.3118, + "step": 15165 + }, + { + "epoch": 0.30360083076846084, + "grad_norm": 1.1613335609436035, + "learning_rate": 8.162104538924928e-06, + "loss": 0.3147, + "step": 15166 + }, + { + "epoch": 0.3036208492855892, + "grad_norm": 1.0873125791549683, + "learning_rate": 8.161853412039281e-06, + "loss": 0.3224, + "step": 15167 + }, + { + "epoch": 0.30364086780271754, + "grad_norm": 1.036207914352417, + "learning_rate": 8.16160227186188e-06, + "loss": 0.2935, + "step": 15168 + }, + { + "epoch": 0.30366088631984584, + "grad_norm": 1.6782593727111816, + "learning_rate": 8.161351118393785e-06, + "loss": 0.7749, + "step": 15169 + }, + { + "epoch": 0.3036809048369742, + "grad_norm": 1.0844430923461914, + "learning_rate": 8.161099951636049e-06, + "loss": 0.3499, + "step": 15170 + }, + { + "epoch": 0.30370092335410254, + "grad_norm": 1.1003035306930542, + "learning_rate": 8.16084877158973e-06, + "loss": 0.305, + "step": 15171 + }, + { + "epoch": 0.3037209418712309, + "grad_norm": 1.348360300064087, + "learning_rate": 8.160597578255884e-06, + "loss": 0.3343, + "step": 15172 + }, + { + "epoch": 0.30374096038835924, + "grad_norm": 1.1197268962860107, + "learning_rate": 8.160346371635565e-06, + "loss": 0.3845, + "step": 15173 + }, + { + "epoch": 0.3037609789054876, + "grad_norm": 1.196125864982605, + "learning_rate": 8.160095151729828e-06, + "loss": 0.3416, + "step": 15174 + }, + { + "epoch": 0.30378099742261594, + "grad_norm": 1.1406445503234863, + "learning_rate": 8.159843918539733e-06, + "loss": 0.3202, + "step": 15175 + }, + { + "epoch": 0.3038010159397443, + "grad_norm": 1.0096832513809204, + "learning_rate": 8.159592672066334e-06, + "loss": 0.3408, + "step": 15176 + }, + { + "epoch": 0.3038210344568726, + "grad_norm": 1.1264798641204834, + "learning_rate": 8.159341412310689e-06, + "loss": 0.3313, + "step": 15177 + }, + { + "epoch": 0.30384105297400094, + "grad_norm": 0.9975374937057495, + "learning_rate": 8.15909013927385e-06, + "loss": 0.3334, + "step": 15178 + }, + { + "epoch": 0.3038610714911293, + "grad_norm": 2.126549482345581, + "learning_rate": 8.158838852956877e-06, + "loss": 0.8251, + "step": 15179 + }, + { + "epoch": 0.30388109000825764, + "grad_norm": 1.8334481716156006, + "learning_rate": 8.158587553360825e-06, + "loss": 0.8551, + "step": 15180 + }, + { + "epoch": 0.303901108525386, + "grad_norm": 1.0803241729736328, + "learning_rate": 8.158336240486751e-06, + "loss": 0.3215, + "step": 15181 + }, + { + "epoch": 0.30392112704251434, + "grad_norm": 0.953720211982727, + "learning_rate": 8.15808491433571e-06, + "loss": 0.3104, + "step": 15182 + }, + { + "epoch": 0.3039411455596427, + "grad_norm": 1.1371651887893677, + "learning_rate": 8.157833574908761e-06, + "loss": 0.2884, + "step": 15183 + }, + { + "epoch": 0.30396116407677104, + "grad_norm": 1.0762094259262085, + "learning_rate": 8.15758222220696e-06, + "loss": 0.4099, + "step": 15184 + }, + { + "epoch": 0.30398118259389934, + "grad_norm": 1.14704430103302, + "learning_rate": 8.15733085623136e-06, + "loss": 0.3025, + "step": 15185 + }, + { + "epoch": 0.3040012011110277, + "grad_norm": 1.027745246887207, + "learning_rate": 8.157079476983022e-06, + "loss": 0.298, + "step": 15186 + }, + { + "epoch": 0.30402121962815604, + "grad_norm": 1.8572673797607422, + "learning_rate": 8.156828084463003e-06, + "loss": 0.8046, + "step": 15187 + }, + { + "epoch": 0.3040412381452844, + "grad_norm": 1.1388401985168457, + "learning_rate": 8.156576678672357e-06, + "loss": 0.3731, + "step": 15188 + }, + { + "epoch": 0.30406125666241274, + "grad_norm": 0.9933094382286072, + "learning_rate": 8.156325259612141e-06, + "loss": 0.291, + "step": 15189 + }, + { + "epoch": 0.3040812751795411, + "grad_norm": 1.087193250656128, + "learning_rate": 8.156073827283413e-06, + "loss": 0.3001, + "step": 15190 + }, + { + "epoch": 0.30410129369666944, + "grad_norm": 1.0338202714920044, + "learning_rate": 8.155822381687231e-06, + "loss": 0.3327, + "step": 15191 + }, + { + "epoch": 0.3041213122137978, + "grad_norm": 1.1702208518981934, + "learning_rate": 8.155570922824649e-06, + "loss": 0.3758, + "step": 15192 + }, + { + "epoch": 0.3041413307309261, + "grad_norm": 1.1052988767623901, + "learning_rate": 8.155319450696727e-06, + "loss": 0.3048, + "step": 15193 + }, + { + "epoch": 0.30416134924805444, + "grad_norm": 1.2367068529129028, + "learning_rate": 8.155067965304523e-06, + "loss": 0.3305, + "step": 15194 + }, + { + "epoch": 0.3041813677651828, + "grad_norm": 1.0795466899871826, + "learning_rate": 8.15481646664909e-06, + "loss": 0.3252, + "step": 15195 + }, + { + "epoch": 0.30420138628231114, + "grad_norm": 1.2174433469772339, + "learning_rate": 8.154564954731485e-06, + "loss": 0.319, + "step": 15196 + }, + { + "epoch": 0.3042214047994395, + "grad_norm": 1.9760808944702148, + "learning_rate": 8.154313429552772e-06, + "loss": 0.8559, + "step": 15197 + }, + { + "epoch": 0.30424142331656784, + "grad_norm": 0.9732483625411987, + "learning_rate": 8.154061891114003e-06, + "loss": 0.2914, + "step": 15198 + }, + { + "epoch": 0.3042614418336962, + "grad_norm": 1.793947696685791, + "learning_rate": 8.153810339416236e-06, + "loss": 0.8537, + "step": 15199 + }, + { + "epoch": 0.30428146035082454, + "grad_norm": 1.235360026359558, + "learning_rate": 8.153558774460527e-06, + "loss": 0.3574, + "step": 15200 + }, + { + "epoch": 0.30430147886795283, + "grad_norm": 1.1204904317855835, + "learning_rate": 8.15330719624794e-06, + "loss": 0.3107, + "step": 15201 + }, + { + "epoch": 0.3043214973850812, + "grad_norm": 1.1135194301605225, + "learning_rate": 8.153055604779525e-06, + "loss": 0.3875, + "step": 15202 + }, + { + "epoch": 0.30434151590220954, + "grad_norm": 1.1282795667648315, + "learning_rate": 8.152804000056345e-06, + "loss": 0.3258, + "step": 15203 + }, + { + "epoch": 0.3043615344193379, + "grad_norm": 1.0722053050994873, + "learning_rate": 8.152552382079455e-06, + "loss": 0.2811, + "step": 15204 + }, + { + "epoch": 0.30438155293646624, + "grad_norm": 1.2815148830413818, + "learning_rate": 8.152300750849911e-06, + "loss": 0.377, + "step": 15205 + }, + { + "epoch": 0.3044015714535946, + "grad_norm": 1.0013333559036255, + "learning_rate": 8.152049106368776e-06, + "loss": 0.2807, + "step": 15206 + }, + { + "epoch": 0.30442158997072294, + "grad_norm": 1.0443029403686523, + "learning_rate": 8.151797448637101e-06, + "loss": 0.2996, + "step": 15207 + }, + { + "epoch": 0.3044416084878513, + "grad_norm": 1.1775951385498047, + "learning_rate": 8.151545777655953e-06, + "loss": 0.292, + "step": 15208 + }, + { + "epoch": 0.3044616270049796, + "grad_norm": 1.1700891256332397, + "learning_rate": 8.15129409342638e-06, + "loss": 0.3336, + "step": 15209 + }, + { + "epoch": 0.30448164552210794, + "grad_norm": 1.2461405992507935, + "learning_rate": 8.151042395949448e-06, + "loss": 0.3523, + "step": 15210 + }, + { + "epoch": 0.3045016640392363, + "grad_norm": 1.1268302202224731, + "learning_rate": 8.150790685226211e-06, + "loss": 0.3316, + "step": 15211 + }, + { + "epoch": 0.30452168255636464, + "grad_norm": 1.0769623517990112, + "learning_rate": 8.150538961257727e-06, + "loss": 0.3604, + "step": 15212 + }, + { + "epoch": 0.304541701073493, + "grad_norm": 1.0753138065338135, + "learning_rate": 8.150287224045056e-06, + "loss": 0.2839, + "step": 15213 + }, + { + "epoch": 0.30456171959062134, + "grad_norm": 1.153982162475586, + "learning_rate": 8.150035473589256e-06, + "loss": 0.3061, + "step": 15214 + }, + { + "epoch": 0.3045817381077497, + "grad_norm": 1.060286521911621, + "learning_rate": 8.149783709891384e-06, + "loss": 0.3241, + "step": 15215 + }, + { + "epoch": 0.30460175662487804, + "grad_norm": 1.065531849861145, + "learning_rate": 8.149531932952499e-06, + "loss": 0.3128, + "step": 15216 + }, + { + "epoch": 0.30462177514200633, + "grad_norm": 1.2406576871871948, + "learning_rate": 8.14928014277366e-06, + "loss": 0.3788, + "step": 15217 + }, + { + "epoch": 0.3046417936591347, + "grad_norm": 1.1207116842269897, + "learning_rate": 8.149028339355925e-06, + "loss": 0.2892, + "step": 15218 + }, + { + "epoch": 0.30466181217626304, + "grad_norm": 1.342418909072876, + "learning_rate": 8.148776522700352e-06, + "loss": 0.3135, + "step": 15219 + }, + { + "epoch": 0.3046818306933914, + "grad_norm": 1.0172137022018433, + "learning_rate": 8.148524692808e-06, + "loss": 0.3178, + "step": 15220 + }, + { + "epoch": 0.30470184921051974, + "grad_norm": 1.0342686176300049, + "learning_rate": 8.148272849679928e-06, + "loss": 0.3545, + "step": 15221 + }, + { + "epoch": 0.3047218677276481, + "grad_norm": 1.079430103302002, + "learning_rate": 8.148020993317194e-06, + "loss": 0.2882, + "step": 15222 + }, + { + "epoch": 0.30474188624477644, + "grad_norm": 1.0192418098449707, + "learning_rate": 8.147769123720858e-06, + "loss": 0.2826, + "step": 15223 + }, + { + "epoch": 0.3047619047619048, + "grad_norm": 2.086817979812622, + "learning_rate": 8.14751724089198e-06, + "loss": 0.8177, + "step": 15224 + }, + { + "epoch": 0.3047819232790331, + "grad_norm": 1.1295214891433716, + "learning_rate": 8.147265344831614e-06, + "loss": 0.3413, + "step": 15225 + }, + { + "epoch": 0.30480194179616144, + "grad_norm": 1.190187692642212, + "learning_rate": 8.147013435540822e-06, + "loss": 0.3354, + "step": 15226 + }, + { + "epoch": 0.3048219603132898, + "grad_norm": 1.1310412883758545, + "learning_rate": 8.146761513020664e-06, + "loss": 0.3038, + "step": 15227 + }, + { + "epoch": 0.30484197883041814, + "grad_norm": 1.0853350162506104, + "learning_rate": 8.146509577272197e-06, + "loss": 0.3036, + "step": 15228 + }, + { + "epoch": 0.3048619973475465, + "grad_norm": 1.1304363012313843, + "learning_rate": 8.146257628296481e-06, + "loss": 0.3656, + "step": 15229 + }, + { + "epoch": 0.30488201586467484, + "grad_norm": 1.6999435424804688, + "learning_rate": 8.146005666094575e-06, + "loss": 0.3293, + "step": 15230 + }, + { + "epoch": 0.3049020343818032, + "grad_norm": 1.270792007446289, + "learning_rate": 8.145753690667539e-06, + "loss": 0.3298, + "step": 15231 + }, + { + "epoch": 0.30492205289893154, + "grad_norm": 1.092357873916626, + "learning_rate": 8.145501702016429e-06, + "loss": 0.3884, + "step": 15232 + }, + { + "epoch": 0.30494207141605983, + "grad_norm": 1.1269792318344116, + "learning_rate": 8.14524970014231e-06, + "loss": 0.3452, + "step": 15233 + }, + { + "epoch": 0.3049620899331882, + "grad_norm": 1.1753056049346924, + "learning_rate": 8.144997685046237e-06, + "loss": 0.3069, + "step": 15234 + }, + { + "epoch": 0.30498210845031654, + "grad_norm": 1.9669030904769897, + "learning_rate": 8.144745656729269e-06, + "loss": 0.8038, + "step": 15235 + }, + { + "epoch": 0.3050021269674449, + "grad_norm": 1.1608866453170776, + "learning_rate": 8.144493615192468e-06, + "loss": 0.2961, + "step": 15236 + }, + { + "epoch": 0.30502214548457324, + "grad_norm": 1.1593728065490723, + "learning_rate": 8.144241560436893e-06, + "loss": 0.3397, + "step": 15237 + }, + { + "epoch": 0.3050421640017016, + "grad_norm": 1.2198898792266846, + "learning_rate": 8.143989492463602e-06, + "loss": 0.3626, + "step": 15238 + }, + { + "epoch": 0.30506218251882994, + "grad_norm": 1.2517200708389282, + "learning_rate": 8.143737411273658e-06, + "loss": 0.3194, + "step": 15239 + }, + { + "epoch": 0.30508220103595823, + "grad_norm": 1.126637578010559, + "learning_rate": 8.143485316868115e-06, + "loss": 0.3085, + "step": 15240 + }, + { + "epoch": 0.3051022195530866, + "grad_norm": 1.2273386716842651, + "learning_rate": 8.14323320924804e-06, + "loss": 0.3519, + "step": 15241 + }, + { + "epoch": 0.30512223807021494, + "grad_norm": 1.0168853998184204, + "learning_rate": 8.142981088414486e-06, + "loss": 0.3055, + "step": 15242 + }, + { + "epoch": 0.3051422565873433, + "grad_norm": 1.1162837743759155, + "learning_rate": 8.142728954368518e-06, + "loss": 0.2712, + "step": 15243 + }, + { + "epoch": 0.30516227510447164, + "grad_norm": 1.057847499847412, + "learning_rate": 8.142476807111193e-06, + "loss": 0.3189, + "step": 15244 + }, + { + "epoch": 0.3051822936216, + "grad_norm": 1.0640499591827393, + "learning_rate": 8.14222464664357e-06, + "loss": 0.3228, + "step": 15245 + }, + { + "epoch": 0.30520231213872834, + "grad_norm": 1.1686890125274658, + "learning_rate": 8.141972472966716e-06, + "loss": 0.3011, + "step": 15246 + }, + { + "epoch": 0.3052223306558567, + "grad_norm": 1.0472133159637451, + "learning_rate": 8.14172028608168e-06, + "loss": 0.3515, + "step": 15247 + }, + { + "epoch": 0.305242349172985, + "grad_norm": 1.7703304290771484, + "learning_rate": 8.141468085989532e-06, + "loss": 0.8344, + "step": 15248 + }, + { + "epoch": 0.30526236769011333, + "grad_norm": 1.1410914659500122, + "learning_rate": 8.141215872691325e-06, + "loss": 0.2963, + "step": 15249 + }, + { + "epoch": 0.3052823862072417, + "grad_norm": 1.16007399559021, + "learning_rate": 8.140963646188124e-06, + "loss": 0.3383, + "step": 15250 + }, + { + "epoch": 0.30530240472437004, + "grad_norm": 1.0424970388412476, + "learning_rate": 8.140711406480988e-06, + "loss": 0.3202, + "step": 15251 + }, + { + "epoch": 0.3053224232414984, + "grad_norm": 1.1170787811279297, + "learning_rate": 8.140459153570977e-06, + "loss": 0.2892, + "step": 15252 + }, + { + "epoch": 0.30534244175862674, + "grad_norm": 1.2623852491378784, + "learning_rate": 8.140206887459152e-06, + "loss": 0.3433, + "step": 15253 + }, + { + "epoch": 0.3053624602757551, + "grad_norm": 1.2366856336593628, + "learning_rate": 8.139954608146572e-06, + "loss": 0.3286, + "step": 15254 + }, + { + "epoch": 0.30538247879288344, + "grad_norm": 1.142146348953247, + "learning_rate": 8.139702315634298e-06, + "loss": 0.3462, + "step": 15255 + }, + { + "epoch": 0.30540249731001173, + "grad_norm": 1.3731105327606201, + "learning_rate": 8.139450009923395e-06, + "loss": 0.3427, + "step": 15256 + }, + { + "epoch": 0.3054225158271401, + "grad_norm": 1.2377333641052246, + "learning_rate": 8.139197691014916e-06, + "loss": 0.3898, + "step": 15257 + }, + { + "epoch": 0.30544253434426843, + "grad_norm": 1.1746981143951416, + "learning_rate": 8.138945358909927e-06, + "loss": 0.3651, + "step": 15258 + }, + { + "epoch": 0.3054625528613968, + "grad_norm": 0.993679940700531, + "learning_rate": 8.138693013609486e-06, + "loss": 0.3012, + "step": 15259 + }, + { + "epoch": 0.30548257137852514, + "grad_norm": 1.0769388675689697, + "learning_rate": 8.138440655114657e-06, + "loss": 0.3223, + "step": 15260 + }, + { + "epoch": 0.3055025898956535, + "grad_norm": 1.1935389041900635, + "learning_rate": 8.138188283426497e-06, + "loss": 0.3068, + "step": 15261 + }, + { + "epoch": 0.30552260841278184, + "grad_norm": 1.0363240242004395, + "learning_rate": 8.13793589854607e-06, + "loss": 0.3375, + "step": 15262 + }, + { + "epoch": 0.3055426269299102, + "grad_norm": 1.122172236442566, + "learning_rate": 8.137683500474433e-06, + "loss": 0.3419, + "step": 15263 + }, + { + "epoch": 0.3055626454470385, + "grad_norm": 1.0518946647644043, + "learning_rate": 8.137431089212653e-06, + "loss": 0.3145, + "step": 15264 + }, + { + "epoch": 0.30558266396416683, + "grad_norm": 1.173098087310791, + "learning_rate": 8.137178664761788e-06, + "loss": 0.3569, + "step": 15265 + }, + { + "epoch": 0.3056026824812952, + "grad_norm": 2.051013708114624, + "learning_rate": 8.136926227122897e-06, + "loss": 0.7507, + "step": 15266 + }, + { + "epoch": 0.30562270099842354, + "grad_norm": 1.1617844104766846, + "learning_rate": 8.136673776297044e-06, + "loss": 0.3566, + "step": 15267 + }, + { + "epoch": 0.3056427195155519, + "grad_norm": 1.1950571537017822, + "learning_rate": 8.13642131228529e-06, + "loss": 0.3381, + "step": 15268 + }, + { + "epoch": 0.30566273803268024, + "grad_norm": 1.194236159324646, + "learning_rate": 8.136168835088696e-06, + "loss": 0.3267, + "step": 15269 + }, + { + "epoch": 0.3056827565498086, + "grad_norm": 1.0889027118682861, + "learning_rate": 8.135916344708321e-06, + "loss": 0.3341, + "step": 15270 + }, + { + "epoch": 0.30570277506693694, + "grad_norm": 0.9987488985061646, + "learning_rate": 8.135663841145231e-06, + "loss": 0.3203, + "step": 15271 + }, + { + "epoch": 0.30572279358406523, + "grad_norm": 1.147126317024231, + "learning_rate": 8.135411324400483e-06, + "loss": 0.343, + "step": 15272 + }, + { + "epoch": 0.3057428121011936, + "grad_norm": 0.9783841967582703, + "learning_rate": 8.135158794475144e-06, + "loss": 0.2957, + "step": 15273 + }, + { + "epoch": 0.30576283061832193, + "grad_norm": 1.9223902225494385, + "learning_rate": 8.13490625137027e-06, + "loss": 0.8543, + "step": 15274 + }, + { + "epoch": 0.3057828491354503, + "grad_norm": 1.866345763206482, + "learning_rate": 8.134653695086922e-06, + "loss": 0.858, + "step": 15275 + }, + { + "epoch": 0.30580286765257864, + "grad_norm": 1.0414209365844727, + "learning_rate": 8.134401125626168e-06, + "loss": 0.297, + "step": 15276 + }, + { + "epoch": 0.305822886169707, + "grad_norm": 1.1549503803253174, + "learning_rate": 8.134148542989065e-06, + "loss": 0.3287, + "step": 15277 + }, + { + "epoch": 0.30584290468683534, + "grad_norm": 1.3060612678527832, + "learning_rate": 8.133895947176677e-06, + "loss": 0.3249, + "step": 15278 + }, + { + "epoch": 0.3058629232039637, + "grad_norm": 1.7212884426116943, + "learning_rate": 8.133643338190063e-06, + "loss": 0.8292, + "step": 15279 + }, + { + "epoch": 0.305882941721092, + "grad_norm": 1.0955027341842651, + "learning_rate": 8.133390716030288e-06, + "loss": 0.2888, + "step": 15280 + }, + { + "epoch": 0.30590296023822033, + "grad_norm": 0.9319384098052979, + "learning_rate": 8.133138080698414e-06, + "loss": 0.257, + "step": 15281 + }, + { + "epoch": 0.3059229787553487, + "grad_norm": 1.9224547147750854, + "learning_rate": 8.132885432195499e-06, + "loss": 0.8001, + "step": 15282 + }, + { + "epoch": 0.30594299727247704, + "grad_norm": 1.1666653156280518, + "learning_rate": 8.132632770522609e-06, + "loss": 0.3391, + "step": 15283 + }, + { + "epoch": 0.3059630157896054, + "grad_norm": 1.2271478176116943, + "learning_rate": 8.132380095680805e-06, + "loss": 0.3143, + "step": 15284 + }, + { + "epoch": 0.30598303430673374, + "grad_norm": 0.9776285886764526, + "learning_rate": 8.13212740767115e-06, + "loss": 0.3301, + "step": 15285 + }, + { + "epoch": 0.3060030528238621, + "grad_norm": 1.0781387090682983, + "learning_rate": 8.131874706494704e-06, + "loss": 0.3192, + "step": 15286 + }, + { + "epoch": 0.30602307134099044, + "grad_norm": 1.0476106405258179, + "learning_rate": 8.131621992152532e-06, + "loss": 0.3117, + "step": 15287 + }, + { + "epoch": 0.30604308985811873, + "grad_norm": 1.224820852279663, + "learning_rate": 8.131369264645695e-06, + "loss": 0.307, + "step": 15288 + }, + { + "epoch": 0.3060631083752471, + "grad_norm": 1.121566891670227, + "learning_rate": 8.131116523975253e-06, + "loss": 0.2978, + "step": 15289 + }, + { + "epoch": 0.30608312689237543, + "grad_norm": 1.1329360008239746, + "learning_rate": 8.130863770142274e-06, + "loss": 0.3362, + "step": 15290 + }, + { + "epoch": 0.3061031454095038, + "grad_norm": 1.0662460327148438, + "learning_rate": 8.130611003147816e-06, + "loss": 0.2954, + "step": 15291 + }, + { + "epoch": 0.30612316392663214, + "grad_norm": 1.1194078922271729, + "learning_rate": 8.130358222992944e-06, + "loss": 0.3492, + "step": 15292 + }, + { + "epoch": 0.3061431824437605, + "grad_norm": 1.1129541397094727, + "learning_rate": 8.130105429678719e-06, + "loss": 0.3073, + "step": 15293 + }, + { + "epoch": 0.30616320096088884, + "grad_norm": 1.0748800039291382, + "learning_rate": 8.129852623206204e-06, + "loss": 0.2987, + "step": 15294 + }, + { + "epoch": 0.3061832194780172, + "grad_norm": 1.106964111328125, + "learning_rate": 8.129599803576464e-06, + "loss": 0.3144, + "step": 15295 + }, + { + "epoch": 0.3062032379951455, + "grad_norm": 0.9529418349266052, + "learning_rate": 8.129346970790559e-06, + "loss": 0.31, + "step": 15296 + }, + { + "epoch": 0.30622325651227383, + "grad_norm": 1.0942645072937012, + "learning_rate": 8.129094124849553e-06, + "loss": 0.3249, + "step": 15297 + }, + { + "epoch": 0.3062432750294022, + "grad_norm": 1.1566623449325562, + "learning_rate": 8.128841265754507e-06, + "loss": 0.3264, + "step": 15298 + }, + { + "epoch": 0.30626329354653054, + "grad_norm": 1.2194161415100098, + "learning_rate": 8.128588393506487e-06, + "loss": 0.3348, + "step": 15299 + }, + { + "epoch": 0.3062833120636589, + "grad_norm": 1.159818410873413, + "learning_rate": 8.128335508106553e-06, + "loss": 0.3185, + "step": 15300 + }, + { + "epoch": 0.30630333058078724, + "grad_norm": 1.053627610206604, + "learning_rate": 8.128082609555771e-06, + "loss": 0.2919, + "step": 15301 + }, + { + "epoch": 0.3063233490979156, + "grad_norm": 1.2012776136398315, + "learning_rate": 8.127829697855203e-06, + "loss": 0.3723, + "step": 15302 + }, + { + "epoch": 0.30634336761504394, + "grad_norm": 1.1004235744476318, + "learning_rate": 8.127576773005914e-06, + "loss": 0.3199, + "step": 15303 + }, + { + "epoch": 0.30636338613217223, + "grad_norm": 1.0434657335281372, + "learning_rate": 8.127323835008963e-06, + "loss": 0.3478, + "step": 15304 + }, + { + "epoch": 0.3063834046493006, + "grad_norm": 1.2082875967025757, + "learning_rate": 8.127070883865416e-06, + "loss": 0.3482, + "step": 15305 + }, + { + "epoch": 0.30640342316642893, + "grad_norm": 1.4448596239089966, + "learning_rate": 8.126817919576335e-06, + "loss": 0.3264, + "step": 15306 + }, + { + "epoch": 0.3064234416835573, + "grad_norm": 1.1819480657577515, + "learning_rate": 8.126564942142786e-06, + "loss": 0.3062, + "step": 15307 + }, + { + "epoch": 0.30644346020068564, + "grad_norm": 1.0961579084396362, + "learning_rate": 8.126311951565831e-06, + "loss": 0.3386, + "step": 15308 + }, + { + "epoch": 0.306463478717814, + "grad_norm": 1.3188472986221313, + "learning_rate": 8.126058947846531e-06, + "loss": 0.3114, + "step": 15309 + }, + { + "epoch": 0.30648349723494234, + "grad_norm": 1.1530590057373047, + "learning_rate": 8.125805930985955e-06, + "loss": 0.328, + "step": 15310 + }, + { + "epoch": 0.3065035157520707, + "grad_norm": 1.0935999155044556, + "learning_rate": 8.12555290098516e-06, + "loss": 0.3543, + "step": 15311 + }, + { + "epoch": 0.306523534269199, + "grad_norm": 1.2651463747024536, + "learning_rate": 8.125299857845217e-06, + "loss": 0.3618, + "step": 15312 + }, + { + "epoch": 0.30654355278632733, + "grad_norm": 1.1071574687957764, + "learning_rate": 8.125046801567182e-06, + "loss": 0.3388, + "step": 15313 + }, + { + "epoch": 0.3065635713034557, + "grad_norm": 1.1329201459884644, + "learning_rate": 8.124793732152127e-06, + "loss": 0.3344, + "step": 15314 + }, + { + "epoch": 0.30658358982058403, + "grad_norm": 1.999050498008728, + "learning_rate": 8.124540649601108e-06, + "loss": 0.8127, + "step": 15315 + }, + { + "epoch": 0.3066036083377124, + "grad_norm": 1.2300466299057007, + "learning_rate": 8.124287553915196e-06, + "loss": 0.3385, + "step": 15316 + }, + { + "epoch": 0.30662362685484074, + "grad_norm": 1.304672122001648, + "learning_rate": 8.124034445095448e-06, + "loss": 0.3664, + "step": 15317 + }, + { + "epoch": 0.3066436453719691, + "grad_norm": 1.16444993019104, + "learning_rate": 8.123781323142934e-06, + "loss": 0.3458, + "step": 15318 + }, + { + "epoch": 0.30666366388909744, + "grad_norm": 1.1253221035003662, + "learning_rate": 8.123528188058715e-06, + "loss": 0.2981, + "step": 15319 + }, + { + "epoch": 0.30668368240622573, + "grad_norm": 1.0736130475997925, + "learning_rate": 8.123275039843854e-06, + "loss": 0.3299, + "step": 15320 + }, + { + "epoch": 0.3067037009233541, + "grad_norm": 1.7958904504776, + "learning_rate": 8.123021878499419e-06, + "loss": 0.8506, + "step": 15321 + }, + { + "epoch": 0.30672371944048243, + "grad_norm": 1.0249278545379639, + "learning_rate": 8.122768704026471e-06, + "loss": 0.3388, + "step": 15322 + }, + { + "epoch": 0.3067437379576108, + "grad_norm": 1.1397367715835571, + "learning_rate": 8.122515516426076e-06, + "loss": 0.3784, + "step": 15323 + }, + { + "epoch": 0.30676375647473914, + "grad_norm": 1.073671579360962, + "learning_rate": 8.122262315699296e-06, + "loss": 0.294, + "step": 15324 + }, + { + "epoch": 0.3067837749918675, + "grad_norm": 1.1393482685089111, + "learning_rate": 8.122009101847198e-06, + "loss": 0.3143, + "step": 15325 + }, + { + "epoch": 0.30680379350899584, + "grad_norm": 1.898178219795227, + "learning_rate": 8.121755874870847e-06, + "loss": 0.7483, + "step": 15326 + }, + { + "epoch": 0.3068238120261242, + "grad_norm": 1.0051167011260986, + "learning_rate": 8.121502634771306e-06, + "loss": 0.3656, + "step": 15327 + }, + { + "epoch": 0.3068438305432525, + "grad_norm": 1.0945284366607666, + "learning_rate": 8.121249381549639e-06, + "loss": 0.3044, + "step": 15328 + }, + { + "epoch": 0.30686384906038083, + "grad_norm": 0.9820823669433594, + "learning_rate": 8.120996115206911e-06, + "loss": 0.3312, + "step": 15329 + }, + { + "epoch": 0.3068838675775092, + "grad_norm": 1.310113787651062, + "learning_rate": 8.120742835744187e-06, + "loss": 0.2658, + "step": 15330 + }, + { + "epoch": 0.30690388609463753, + "grad_norm": 0.9621457457542419, + "learning_rate": 8.120489543162532e-06, + "loss": 0.2711, + "step": 15331 + }, + { + "epoch": 0.3069239046117659, + "grad_norm": 1.1587169170379639, + "learning_rate": 8.120236237463011e-06, + "loss": 0.3452, + "step": 15332 + }, + { + "epoch": 0.30694392312889424, + "grad_norm": 1.0561808347702026, + "learning_rate": 8.119982918646687e-06, + "loss": 0.3376, + "step": 15333 + }, + { + "epoch": 0.3069639416460226, + "grad_norm": 1.9871188402175903, + "learning_rate": 8.119729586714627e-06, + "loss": 0.7756, + "step": 15334 + }, + { + "epoch": 0.30698396016315094, + "grad_norm": 1.2153009176254272, + "learning_rate": 8.119476241667895e-06, + "loss": 0.3429, + "step": 15335 + }, + { + "epoch": 0.30700397868027923, + "grad_norm": 1.0397405624389648, + "learning_rate": 8.119222883507557e-06, + "loss": 0.3619, + "step": 15336 + }, + { + "epoch": 0.3070239971974076, + "grad_norm": 1.0272611379623413, + "learning_rate": 8.118969512234676e-06, + "loss": 0.3165, + "step": 15337 + }, + { + "epoch": 0.30704401571453593, + "grad_norm": 1.0855320692062378, + "learning_rate": 8.118716127850319e-06, + "loss": 0.3491, + "step": 15338 + }, + { + "epoch": 0.3070640342316643, + "grad_norm": 1.1942144632339478, + "learning_rate": 8.118462730355553e-06, + "loss": 0.3046, + "step": 15339 + }, + { + "epoch": 0.30708405274879264, + "grad_norm": 1.1370271444320679, + "learning_rate": 8.118209319751437e-06, + "loss": 0.3377, + "step": 15340 + }, + { + "epoch": 0.307104071265921, + "grad_norm": 1.4155187606811523, + "learning_rate": 8.11795589603904e-06, + "loss": 0.3441, + "step": 15341 + }, + { + "epoch": 0.30712408978304934, + "grad_norm": 1.061492681503296, + "learning_rate": 8.11770245921943e-06, + "loss": 0.3444, + "step": 15342 + }, + { + "epoch": 0.3071441083001777, + "grad_norm": 1.016148567199707, + "learning_rate": 8.117449009293668e-06, + "loss": 0.3473, + "step": 15343 + }, + { + "epoch": 0.307164126817306, + "grad_norm": 1.7643465995788574, + "learning_rate": 8.117195546262822e-06, + "loss": 0.8909, + "step": 15344 + }, + { + "epoch": 0.30718414533443433, + "grad_norm": 1.327349305152893, + "learning_rate": 8.116942070127958e-06, + "loss": 0.3198, + "step": 15345 + }, + { + "epoch": 0.3072041638515627, + "grad_norm": 1.9528870582580566, + "learning_rate": 8.116688580890137e-06, + "loss": 0.8558, + "step": 15346 + }, + { + "epoch": 0.30722418236869103, + "grad_norm": 1.0995421409606934, + "learning_rate": 8.11643507855043e-06, + "loss": 0.2753, + "step": 15347 + }, + { + "epoch": 0.3072442008858194, + "grad_norm": 1.1293171644210815, + "learning_rate": 8.1161815631099e-06, + "loss": 0.3016, + "step": 15348 + }, + { + "epoch": 0.30726421940294774, + "grad_norm": 1.028125524520874, + "learning_rate": 8.115928034569616e-06, + "loss": 0.3332, + "step": 15349 + }, + { + "epoch": 0.3072842379200761, + "grad_norm": 1.909885048866272, + "learning_rate": 8.115674492930639e-06, + "loss": 0.8696, + "step": 15350 + }, + { + "epoch": 0.30730425643720444, + "grad_norm": 1.1895339488983154, + "learning_rate": 8.115420938194036e-06, + "loss": 0.3424, + "step": 15351 + }, + { + "epoch": 0.30732427495433273, + "grad_norm": 1.1365917921066284, + "learning_rate": 8.115167370360875e-06, + "loss": 0.3727, + "step": 15352 + }, + { + "epoch": 0.3073442934714611, + "grad_norm": 1.1992366313934326, + "learning_rate": 8.11491378943222e-06, + "loss": 0.2879, + "step": 15353 + }, + { + "epoch": 0.30736431198858943, + "grad_norm": 1.1490293741226196, + "learning_rate": 8.114660195409138e-06, + "loss": 0.3845, + "step": 15354 + }, + { + "epoch": 0.3073843305057178, + "grad_norm": 1.3515751361846924, + "learning_rate": 8.114406588292694e-06, + "loss": 0.3076, + "step": 15355 + }, + { + "epoch": 0.30740434902284614, + "grad_norm": 1.049353837966919, + "learning_rate": 8.114152968083957e-06, + "loss": 0.3493, + "step": 15356 + }, + { + "epoch": 0.3074243675399745, + "grad_norm": 1.037859559059143, + "learning_rate": 8.11389933478399e-06, + "loss": 0.3469, + "step": 15357 + }, + { + "epoch": 0.30744438605710284, + "grad_norm": 1.8565868139266968, + "learning_rate": 8.11364568839386e-06, + "loss": 0.8573, + "step": 15358 + }, + { + "epoch": 0.3074644045742312, + "grad_norm": 1.0075069665908813, + "learning_rate": 8.113392028914634e-06, + "loss": 0.2915, + "step": 15359 + }, + { + "epoch": 0.3074844230913595, + "grad_norm": 1.0358431339263916, + "learning_rate": 8.113138356347377e-06, + "loss": 0.3283, + "step": 15360 + }, + { + "epoch": 0.30750444160848783, + "grad_norm": 1.173590064048767, + "learning_rate": 8.112884670693159e-06, + "loss": 0.2843, + "step": 15361 + }, + { + "epoch": 0.3075244601256162, + "grad_norm": 2.03132700920105, + "learning_rate": 8.112630971953042e-06, + "loss": 0.7201, + "step": 15362 + }, + { + "epoch": 0.30754447864274453, + "grad_norm": 1.0241072177886963, + "learning_rate": 8.112377260128093e-06, + "loss": 0.359, + "step": 15363 + }, + { + "epoch": 0.3075644971598729, + "grad_norm": 1.1795631647109985, + "learning_rate": 8.11212353521938e-06, + "loss": 0.3385, + "step": 15364 + }, + { + "epoch": 0.30758451567700124, + "grad_norm": 1.151647686958313, + "learning_rate": 8.111869797227972e-06, + "loss": 0.3306, + "step": 15365 + }, + { + "epoch": 0.3076045341941296, + "grad_norm": 1.1409640312194824, + "learning_rate": 8.111616046154931e-06, + "loss": 0.3583, + "step": 15366 + }, + { + "epoch": 0.30762455271125794, + "grad_norm": 1.1674121618270874, + "learning_rate": 8.111362282001327e-06, + "loss": 0.2835, + "step": 15367 + }, + { + "epoch": 0.30764457122838623, + "grad_norm": 1.1295313835144043, + "learning_rate": 8.111108504768224e-06, + "loss": 0.3471, + "step": 15368 + }, + { + "epoch": 0.3076645897455146, + "grad_norm": 1.269321322441101, + "learning_rate": 8.11085471445669e-06, + "loss": 0.3968, + "step": 15369 + }, + { + "epoch": 0.30768460826264293, + "grad_norm": 1.2200098037719727, + "learning_rate": 8.110600911067794e-06, + "loss": 0.3812, + "step": 15370 + }, + { + "epoch": 0.3077046267797713, + "grad_norm": 1.1350533962249756, + "learning_rate": 8.1103470946026e-06, + "loss": 0.3451, + "step": 15371 + }, + { + "epoch": 0.30772464529689963, + "grad_norm": 1.138370394706726, + "learning_rate": 8.110093265062177e-06, + "loss": 0.3083, + "step": 15372 + }, + { + "epoch": 0.307744663814028, + "grad_norm": 1.8149648904800415, + "learning_rate": 8.10983942244759e-06, + "loss": 0.84, + "step": 15373 + }, + { + "epoch": 0.30776468233115634, + "grad_norm": 1.1737959384918213, + "learning_rate": 8.10958556675991e-06, + "loss": 0.3508, + "step": 15374 + }, + { + "epoch": 0.3077847008482847, + "grad_norm": 1.140092134475708, + "learning_rate": 8.109331698000197e-06, + "loss": 0.3038, + "step": 15375 + }, + { + "epoch": 0.307804719365413, + "grad_norm": 1.1826331615447998, + "learning_rate": 8.109077816169527e-06, + "loss": 0.3271, + "step": 15376 + }, + { + "epoch": 0.30782473788254133, + "grad_norm": 1.9217137098312378, + "learning_rate": 8.10882392126896e-06, + "loss": 0.8487, + "step": 15377 + }, + { + "epoch": 0.3078447563996697, + "grad_norm": 1.2718008756637573, + "learning_rate": 8.108570013299568e-06, + "loss": 0.346, + "step": 15378 + }, + { + "epoch": 0.30786477491679803, + "grad_norm": 1.0327320098876953, + "learning_rate": 8.108316092262415e-06, + "loss": 0.3399, + "step": 15379 + }, + { + "epoch": 0.3078847934339264, + "grad_norm": 1.1244651079177856, + "learning_rate": 8.108062158158574e-06, + "loss": 0.3171, + "step": 15380 + }, + { + "epoch": 0.30790481195105474, + "grad_norm": 1.2782918214797974, + "learning_rate": 8.107808210989106e-06, + "loss": 0.3872, + "step": 15381 + }, + { + "epoch": 0.3079248304681831, + "grad_norm": 1.0415403842926025, + "learning_rate": 8.107554250755079e-06, + "loss": 0.3279, + "step": 15382 + }, + { + "epoch": 0.30794484898531144, + "grad_norm": 1.0909795761108398, + "learning_rate": 8.107300277457566e-06, + "loss": 0.3344, + "step": 15383 + }, + { + "epoch": 0.30796486750243973, + "grad_norm": 1.0554126501083374, + "learning_rate": 8.107046291097629e-06, + "loss": 0.2802, + "step": 15384 + }, + { + "epoch": 0.3079848860195681, + "grad_norm": 1.0721229314804077, + "learning_rate": 8.106792291676339e-06, + "loss": 0.3564, + "step": 15385 + }, + { + "epoch": 0.30800490453669643, + "grad_norm": 1.0526437759399414, + "learning_rate": 8.106538279194762e-06, + "loss": 0.3392, + "step": 15386 + }, + { + "epoch": 0.3080249230538248, + "grad_norm": 1.169018030166626, + "learning_rate": 8.106284253653969e-06, + "loss": 0.3168, + "step": 15387 + }, + { + "epoch": 0.30804494157095313, + "grad_norm": 1.0662169456481934, + "learning_rate": 8.106030215055023e-06, + "loss": 0.248, + "step": 15388 + }, + { + "epoch": 0.3080649600880815, + "grad_norm": 1.1352064609527588, + "learning_rate": 8.105776163398995e-06, + "loss": 0.3493, + "step": 15389 + }, + { + "epoch": 0.30808497860520984, + "grad_norm": 1.0489312410354614, + "learning_rate": 8.105522098686952e-06, + "loss": 0.3486, + "step": 15390 + }, + { + "epoch": 0.3081049971223382, + "grad_norm": 1.0486770868301392, + "learning_rate": 8.105268020919963e-06, + "loss": 0.3174, + "step": 15391 + }, + { + "epoch": 0.3081250156394665, + "grad_norm": 1.9903364181518555, + "learning_rate": 8.105013930099097e-06, + "loss": 0.7951, + "step": 15392 + }, + { + "epoch": 0.30814503415659483, + "grad_norm": 1.1805144548416138, + "learning_rate": 8.104759826225417e-06, + "loss": 0.3679, + "step": 15393 + }, + { + "epoch": 0.3081650526737232, + "grad_norm": 1.069310188293457, + "learning_rate": 8.104505709299998e-06, + "loss": 0.3336, + "step": 15394 + }, + { + "epoch": 0.30818507119085153, + "grad_norm": 1.1435292959213257, + "learning_rate": 8.104251579323902e-06, + "loss": 0.3428, + "step": 15395 + }, + { + "epoch": 0.3082050897079799, + "grad_norm": 1.1177583932876587, + "learning_rate": 8.103997436298205e-06, + "loss": 0.3117, + "step": 15396 + }, + { + "epoch": 0.30822510822510824, + "grad_norm": 1.0116679668426514, + "learning_rate": 8.103743280223969e-06, + "loss": 0.2991, + "step": 15397 + }, + { + "epoch": 0.3082451267422366, + "grad_norm": 1.1854888200759888, + "learning_rate": 8.103489111102262e-06, + "loss": 0.3051, + "step": 15398 + }, + { + "epoch": 0.30826514525936494, + "grad_norm": 1.0610312223434448, + "learning_rate": 8.103234928934156e-06, + "loss": 0.2989, + "step": 15399 + }, + { + "epoch": 0.30828516377649323, + "grad_norm": 1.1036121845245361, + "learning_rate": 8.102980733720717e-06, + "loss": 0.3137, + "step": 15400 + }, + { + "epoch": 0.3083051822936216, + "grad_norm": 2.171123743057251, + "learning_rate": 8.102726525463017e-06, + "loss": 0.8856, + "step": 15401 + }, + { + "epoch": 0.30832520081074993, + "grad_norm": 1.1498990058898926, + "learning_rate": 8.10247230416212e-06, + "loss": 0.3447, + "step": 15402 + }, + { + "epoch": 0.3083452193278783, + "grad_norm": 1.0601731538772583, + "learning_rate": 8.1022180698191e-06, + "loss": 0.3132, + "step": 15403 + }, + { + "epoch": 0.30836523784500663, + "grad_norm": 1.5454212427139282, + "learning_rate": 8.10196382243502e-06, + "loss": 0.3755, + "step": 15404 + }, + { + "epoch": 0.308385256362135, + "grad_norm": 1.1190470457077026, + "learning_rate": 8.101709562010953e-06, + "loss": 0.2852, + "step": 15405 + }, + { + "epoch": 0.30840527487926334, + "grad_norm": 1.267340064048767, + "learning_rate": 8.101455288547968e-06, + "loss": 0.2902, + "step": 15406 + }, + { + "epoch": 0.3084252933963917, + "grad_norm": 1.1871627569198608, + "learning_rate": 8.10120100204713e-06, + "loss": 0.3665, + "step": 15407 + }, + { + "epoch": 0.30844531191352, + "grad_norm": 1.0592174530029297, + "learning_rate": 8.10094670250951e-06, + "loss": 0.3017, + "step": 15408 + }, + { + "epoch": 0.30846533043064833, + "grad_norm": 1.1920603513717651, + "learning_rate": 8.10069238993618e-06, + "loss": 0.3598, + "step": 15409 + }, + { + "epoch": 0.3084853489477767, + "grad_norm": 1.0287890434265137, + "learning_rate": 8.100438064328205e-06, + "loss": 0.3543, + "step": 15410 + }, + { + "epoch": 0.30850536746490503, + "grad_norm": 1.0442572832107544, + "learning_rate": 8.100183725686656e-06, + "loss": 0.2994, + "step": 15411 + }, + { + "epoch": 0.3085253859820334, + "grad_norm": 1.086262583732605, + "learning_rate": 8.099929374012603e-06, + "loss": 0.3346, + "step": 15412 + }, + { + "epoch": 0.30854540449916174, + "grad_norm": 1.0753629207611084, + "learning_rate": 8.099675009307111e-06, + "loss": 0.3504, + "step": 15413 + }, + { + "epoch": 0.3085654230162901, + "grad_norm": 1.1203384399414062, + "learning_rate": 8.099420631571254e-06, + "loss": 0.2827, + "step": 15414 + }, + { + "epoch": 0.30858544153341844, + "grad_norm": 1.1071906089782715, + "learning_rate": 8.0991662408061e-06, + "loss": 0.2744, + "step": 15415 + }, + { + "epoch": 0.30860546005054673, + "grad_norm": 1.1645761728286743, + "learning_rate": 8.098911837012717e-06, + "loss": 0.3027, + "step": 15416 + }, + { + "epoch": 0.3086254785676751, + "grad_norm": 1.1544467210769653, + "learning_rate": 8.098657420192176e-06, + "loss": 0.3394, + "step": 15417 + }, + { + "epoch": 0.30864549708480343, + "grad_norm": 1.1418299674987793, + "learning_rate": 8.098402990345546e-06, + "loss": 0.3434, + "step": 15418 + }, + { + "epoch": 0.3086655156019318, + "grad_norm": 1.0901949405670166, + "learning_rate": 8.098148547473897e-06, + "loss": 0.3232, + "step": 15419 + }, + { + "epoch": 0.30868553411906013, + "grad_norm": 1.8782521486282349, + "learning_rate": 8.097894091578298e-06, + "loss": 0.8274, + "step": 15420 + }, + { + "epoch": 0.3087055526361885, + "grad_norm": 1.1385003328323364, + "learning_rate": 8.097639622659818e-06, + "loss": 0.3258, + "step": 15421 + }, + { + "epoch": 0.30872557115331684, + "grad_norm": 1.03463613986969, + "learning_rate": 8.097385140719529e-06, + "loss": 0.3323, + "step": 15422 + }, + { + "epoch": 0.3087455896704452, + "grad_norm": 1.1769613027572632, + "learning_rate": 8.0971306457585e-06, + "loss": 0.3115, + "step": 15423 + }, + { + "epoch": 0.3087656081875735, + "grad_norm": 1.0333759784698486, + "learning_rate": 8.096876137777798e-06, + "loss": 0.3385, + "step": 15424 + }, + { + "epoch": 0.30878562670470183, + "grad_norm": 1.0377269983291626, + "learning_rate": 8.096621616778498e-06, + "loss": 0.2811, + "step": 15425 + }, + { + "epoch": 0.3088056452218302, + "grad_norm": 1.0172522068023682, + "learning_rate": 8.096367082761665e-06, + "loss": 0.2754, + "step": 15426 + }, + { + "epoch": 0.30882566373895853, + "grad_norm": 1.0654082298278809, + "learning_rate": 8.096112535728373e-06, + "loss": 0.3114, + "step": 15427 + }, + { + "epoch": 0.3088456822560869, + "grad_norm": 1.092921257019043, + "learning_rate": 8.095857975679687e-06, + "loss": 0.328, + "step": 15428 + }, + { + "epoch": 0.30886570077321523, + "grad_norm": 1.3087270259857178, + "learning_rate": 8.095603402616683e-06, + "loss": 0.3309, + "step": 15429 + }, + { + "epoch": 0.3088857192903436, + "grad_norm": 1.1932345628738403, + "learning_rate": 8.095348816540427e-06, + "loss": 0.3223, + "step": 15430 + }, + { + "epoch": 0.30890573780747194, + "grad_norm": 1.938783049583435, + "learning_rate": 8.095094217451992e-06, + "loss": 0.9088, + "step": 15431 + }, + { + "epoch": 0.30892575632460023, + "grad_norm": 1.231571078300476, + "learning_rate": 8.094839605352446e-06, + "loss": 0.3013, + "step": 15432 + }, + { + "epoch": 0.3089457748417286, + "grad_norm": 1.062790036201477, + "learning_rate": 8.094584980242861e-06, + "loss": 0.325, + "step": 15433 + }, + { + "epoch": 0.30896579335885693, + "grad_norm": 1.110206961631775, + "learning_rate": 8.094330342124307e-06, + "loss": 0.3301, + "step": 15434 + }, + { + "epoch": 0.3089858118759853, + "grad_norm": 1.1524996757507324, + "learning_rate": 8.094075690997853e-06, + "loss": 0.3312, + "step": 15435 + }, + { + "epoch": 0.30900583039311363, + "grad_norm": 1.078147292137146, + "learning_rate": 8.093821026864571e-06, + "loss": 0.3092, + "step": 15436 + }, + { + "epoch": 0.309025848910242, + "grad_norm": 1.7662678956985474, + "learning_rate": 8.093566349725532e-06, + "loss": 0.8193, + "step": 15437 + }, + { + "epoch": 0.30904586742737034, + "grad_norm": 1.1124433279037476, + "learning_rate": 8.093311659581806e-06, + "loss": 0.3066, + "step": 15438 + }, + { + "epoch": 0.3090658859444987, + "grad_norm": 1.072014570236206, + "learning_rate": 8.093056956434462e-06, + "loss": 0.3125, + "step": 15439 + }, + { + "epoch": 0.309085904461627, + "grad_norm": 1.1372088193893433, + "learning_rate": 8.092802240284572e-06, + "loss": 0.3345, + "step": 15440 + }, + { + "epoch": 0.30910592297875533, + "grad_norm": 1.1754333972930908, + "learning_rate": 8.092547511133207e-06, + "loss": 0.3036, + "step": 15441 + }, + { + "epoch": 0.3091259414958837, + "grad_norm": 1.2140804529190063, + "learning_rate": 8.09229276898144e-06, + "loss": 0.3359, + "step": 15442 + }, + { + "epoch": 0.30914596001301203, + "grad_norm": 1.1617093086242676, + "learning_rate": 8.092038013830337e-06, + "loss": 0.3579, + "step": 15443 + }, + { + "epoch": 0.3091659785301404, + "grad_norm": 1.0558241605758667, + "learning_rate": 8.091783245680972e-06, + "loss": 0.2777, + "step": 15444 + }, + { + "epoch": 0.30918599704726873, + "grad_norm": 1.017581820487976, + "learning_rate": 8.091528464534419e-06, + "loss": 0.3144, + "step": 15445 + }, + { + "epoch": 0.3092060155643971, + "grad_norm": 1.0346159934997559, + "learning_rate": 8.091273670391742e-06, + "loss": 0.3591, + "step": 15446 + }, + { + "epoch": 0.30922603408152544, + "grad_norm": 1.0178436040878296, + "learning_rate": 8.091018863254016e-06, + "loss": 0.3254, + "step": 15447 + }, + { + "epoch": 0.30924605259865373, + "grad_norm": 1.3415865898132324, + "learning_rate": 8.090764043122314e-06, + "loss": 0.3642, + "step": 15448 + }, + { + "epoch": 0.3092660711157821, + "grad_norm": 1.14958655834198, + "learning_rate": 8.090509209997704e-06, + "loss": 0.3405, + "step": 15449 + }, + { + "epoch": 0.30928608963291043, + "grad_norm": 1.0118741989135742, + "learning_rate": 8.090254363881258e-06, + "loss": 0.3003, + "step": 15450 + }, + { + "epoch": 0.3093061081500388, + "grad_norm": 1.1045341491699219, + "learning_rate": 8.089999504774048e-06, + "loss": 0.2905, + "step": 15451 + }, + { + "epoch": 0.30932612666716713, + "grad_norm": 1.2605441808700562, + "learning_rate": 8.089744632677145e-06, + "loss": 0.3683, + "step": 15452 + }, + { + "epoch": 0.3093461451842955, + "grad_norm": 1.1968536376953125, + "learning_rate": 8.08948974759162e-06, + "loss": 0.3393, + "step": 15453 + }, + { + "epoch": 0.30936616370142384, + "grad_norm": 2.0272157192230225, + "learning_rate": 8.089234849518545e-06, + "loss": 0.8408, + "step": 15454 + }, + { + "epoch": 0.3093861822185522, + "grad_norm": 1.816165804862976, + "learning_rate": 8.088979938458993e-06, + "loss": 0.7797, + "step": 15455 + }, + { + "epoch": 0.3094062007356805, + "grad_norm": 1.9150487184524536, + "learning_rate": 8.088725014414032e-06, + "loss": 0.8238, + "step": 15456 + }, + { + "epoch": 0.30942621925280883, + "grad_norm": 1.0553512573242188, + "learning_rate": 8.088470077384737e-06, + "loss": 0.2816, + "step": 15457 + }, + { + "epoch": 0.3094462377699372, + "grad_norm": 1.1134873628616333, + "learning_rate": 8.088215127372179e-06, + "loss": 0.3036, + "step": 15458 + }, + { + "epoch": 0.30946625628706553, + "grad_norm": 1.159867763519287, + "learning_rate": 8.087960164377428e-06, + "loss": 0.3411, + "step": 15459 + }, + { + "epoch": 0.3094862748041939, + "grad_norm": 1.035557508468628, + "learning_rate": 8.087705188401557e-06, + "loss": 0.2965, + "step": 15460 + }, + { + "epoch": 0.30950629332132223, + "grad_norm": 1.1715139150619507, + "learning_rate": 8.087450199445637e-06, + "loss": 0.3226, + "step": 15461 + }, + { + "epoch": 0.3095263118384506, + "grad_norm": 1.096580982208252, + "learning_rate": 8.087195197510742e-06, + "loss": 0.3077, + "step": 15462 + }, + { + "epoch": 0.30954633035557894, + "grad_norm": 1.2262699604034424, + "learning_rate": 8.086940182597943e-06, + "loss": 0.3491, + "step": 15463 + }, + { + "epoch": 0.30956634887270723, + "grad_norm": 1.1721612215042114, + "learning_rate": 8.086685154708312e-06, + "loss": 0.3312, + "step": 15464 + }, + { + "epoch": 0.3095863673898356, + "grad_norm": 1.0056278705596924, + "learning_rate": 8.08643011384292e-06, + "loss": 0.2606, + "step": 15465 + }, + { + "epoch": 0.30960638590696393, + "grad_norm": 1.1262563467025757, + "learning_rate": 8.08617506000284e-06, + "loss": 0.2983, + "step": 15466 + }, + { + "epoch": 0.3096264044240923, + "grad_norm": 1.0848668813705444, + "learning_rate": 8.085919993189147e-06, + "loss": 0.3147, + "step": 15467 + }, + { + "epoch": 0.30964642294122063, + "grad_norm": 1.1010117530822754, + "learning_rate": 8.085664913402906e-06, + "loss": 0.3551, + "step": 15468 + }, + { + "epoch": 0.309666441458349, + "grad_norm": 1.1991329193115234, + "learning_rate": 8.085409820645196e-06, + "loss": 0.371, + "step": 15469 + }, + { + "epoch": 0.30968645997547734, + "grad_norm": 1.137044906616211, + "learning_rate": 8.085154714917088e-06, + "loss": 0.3068, + "step": 15470 + }, + { + "epoch": 0.3097064784926057, + "grad_norm": 1.3002434968948364, + "learning_rate": 8.084899596219653e-06, + "loss": 0.3707, + "step": 15471 + }, + { + "epoch": 0.309726497009734, + "grad_norm": 0.9627085328102112, + "learning_rate": 8.084644464553964e-06, + "loss": 0.3034, + "step": 15472 + }, + { + "epoch": 0.30974651552686233, + "grad_norm": 1.1224069595336914, + "learning_rate": 8.084389319921092e-06, + "loss": 0.3218, + "step": 15473 + }, + { + "epoch": 0.3097665340439907, + "grad_norm": 1.2585104703903198, + "learning_rate": 8.084134162322112e-06, + "loss": 0.3283, + "step": 15474 + }, + { + "epoch": 0.30978655256111903, + "grad_norm": 1.2652660608291626, + "learning_rate": 8.083878991758095e-06, + "loss": 0.3543, + "step": 15475 + }, + { + "epoch": 0.3098065710782474, + "grad_norm": 1.230024814605713, + "learning_rate": 8.083623808230115e-06, + "loss": 0.3913, + "step": 15476 + }, + { + "epoch": 0.30982658959537573, + "grad_norm": 1.2480404376983643, + "learning_rate": 8.083368611739245e-06, + "loss": 0.3064, + "step": 15477 + }, + { + "epoch": 0.3098466081125041, + "grad_norm": 1.1111053228378296, + "learning_rate": 8.083113402286556e-06, + "loss": 0.3368, + "step": 15478 + }, + { + "epoch": 0.30986662662963244, + "grad_norm": 1.1680870056152344, + "learning_rate": 8.082858179873122e-06, + "loss": 0.3773, + "step": 15479 + }, + { + "epoch": 0.30988664514676073, + "grad_norm": 1.1205155849456787, + "learning_rate": 8.082602944500015e-06, + "loss": 0.3243, + "step": 15480 + }, + { + "epoch": 0.3099066636638891, + "grad_norm": 1.1093144416809082, + "learning_rate": 8.08234769616831e-06, + "loss": 0.3089, + "step": 15481 + }, + { + "epoch": 0.30992668218101743, + "grad_norm": 1.092448353767395, + "learning_rate": 8.082092434879077e-06, + "loss": 0.3719, + "step": 15482 + }, + { + "epoch": 0.3099467006981458, + "grad_norm": 1.0595682859420776, + "learning_rate": 8.08183716063339e-06, + "loss": 0.2485, + "step": 15483 + }, + { + "epoch": 0.30996671921527413, + "grad_norm": 1.1641124486923218, + "learning_rate": 8.081581873432325e-06, + "loss": 0.3087, + "step": 15484 + }, + { + "epoch": 0.3099867377324025, + "grad_norm": 1.126804232597351, + "learning_rate": 8.081326573276953e-06, + "loss": 0.3167, + "step": 15485 + }, + { + "epoch": 0.31000675624953083, + "grad_norm": 1.231791377067566, + "learning_rate": 8.081071260168345e-06, + "loss": 0.339, + "step": 15486 + }, + { + "epoch": 0.3100267747666592, + "grad_norm": 1.1102770566940308, + "learning_rate": 8.08081593410758e-06, + "loss": 0.298, + "step": 15487 + }, + { + "epoch": 0.3100467932837875, + "grad_norm": 1.171085238456726, + "learning_rate": 8.080560595095724e-06, + "loss": 0.3236, + "step": 15488 + }, + { + "epoch": 0.31006681180091583, + "grad_norm": 1.1176741123199463, + "learning_rate": 8.080305243133857e-06, + "loss": 0.2894, + "step": 15489 + }, + { + "epoch": 0.3100868303180442, + "grad_norm": 1.1860147714614868, + "learning_rate": 8.080049878223048e-06, + "loss": 0.3233, + "step": 15490 + }, + { + "epoch": 0.31010684883517253, + "grad_norm": 1.0331463813781738, + "learning_rate": 8.079794500364373e-06, + "loss": 0.3221, + "step": 15491 + }, + { + "epoch": 0.3101268673523009, + "grad_norm": 1.1915379762649536, + "learning_rate": 8.079539109558903e-06, + "loss": 0.3309, + "step": 15492 + }, + { + "epoch": 0.31014688586942923, + "grad_norm": 1.081940770149231, + "learning_rate": 8.079283705807716e-06, + "loss": 0.3021, + "step": 15493 + }, + { + "epoch": 0.3101669043865576, + "grad_norm": 1.0240644216537476, + "learning_rate": 8.079028289111881e-06, + "loss": 0.2789, + "step": 15494 + }, + { + "epoch": 0.31018692290368594, + "grad_norm": 1.914732575416565, + "learning_rate": 8.078772859472475e-06, + "loss": 0.8376, + "step": 15495 + }, + { + "epoch": 0.31020694142081423, + "grad_norm": 1.074008822441101, + "learning_rate": 8.07851741689057e-06, + "loss": 0.3356, + "step": 15496 + }, + { + "epoch": 0.3102269599379426, + "grad_norm": 1.0133734941482544, + "learning_rate": 8.07826196136724e-06, + "loss": 0.2604, + "step": 15497 + }, + { + "epoch": 0.31024697845507093, + "grad_norm": 1.8638442754745483, + "learning_rate": 8.078006492903559e-06, + "loss": 0.7939, + "step": 15498 + }, + { + "epoch": 0.3102669969721993, + "grad_norm": 1.8528732061386108, + "learning_rate": 8.0777510115006e-06, + "loss": 0.7872, + "step": 15499 + }, + { + "epoch": 0.31028701548932763, + "grad_norm": 1.9606046676635742, + "learning_rate": 8.077495517159439e-06, + "loss": 0.8502, + "step": 15500 + }, + { + "epoch": 0.310307034006456, + "grad_norm": 1.80347740650177, + "learning_rate": 8.07724000988115e-06, + "loss": 0.7793, + "step": 15501 + }, + { + "epoch": 0.31032705252358433, + "grad_norm": 1.2161442041397095, + "learning_rate": 8.076984489666806e-06, + "loss": 0.352, + "step": 15502 + }, + { + "epoch": 0.3103470710407127, + "grad_norm": 1.1494582891464233, + "learning_rate": 8.076728956517482e-06, + "loss": 0.3469, + "step": 15503 + }, + { + "epoch": 0.310367089557841, + "grad_norm": 1.1053955554962158, + "learning_rate": 8.076473410434249e-06, + "loss": 0.3231, + "step": 15504 + }, + { + "epoch": 0.31038710807496933, + "grad_norm": 1.3256380558013916, + "learning_rate": 8.076217851418186e-06, + "loss": 0.3662, + "step": 15505 + }, + { + "epoch": 0.3104071265920977, + "grad_norm": 1.1116013526916504, + "learning_rate": 8.075962279470365e-06, + "loss": 0.338, + "step": 15506 + }, + { + "epoch": 0.31042714510922603, + "grad_norm": 1.1008762121200562, + "learning_rate": 8.075706694591861e-06, + "loss": 0.3437, + "step": 15507 + }, + { + "epoch": 0.3104471636263544, + "grad_norm": 1.0870670080184937, + "learning_rate": 8.075451096783747e-06, + "loss": 0.3182, + "step": 15508 + }, + { + "epoch": 0.31046718214348273, + "grad_norm": 1.2081968784332275, + "learning_rate": 8.075195486047099e-06, + "loss": 0.3263, + "step": 15509 + }, + { + "epoch": 0.3104872006606111, + "grad_norm": 1.0807856321334839, + "learning_rate": 8.07493986238299e-06, + "loss": 0.3555, + "step": 15510 + }, + { + "epoch": 0.31050721917773944, + "grad_norm": 1.050170660018921, + "learning_rate": 8.074684225792496e-06, + "loss": 0.3517, + "step": 15511 + }, + { + "epoch": 0.31052723769486773, + "grad_norm": 1.1034810543060303, + "learning_rate": 8.074428576276692e-06, + "loss": 0.3451, + "step": 15512 + }, + { + "epoch": 0.3105472562119961, + "grad_norm": 1.0954618453979492, + "learning_rate": 8.074172913836652e-06, + "loss": 0.3381, + "step": 15513 + }, + { + "epoch": 0.31056727472912443, + "grad_norm": 1.0237303972244263, + "learning_rate": 8.073917238473448e-06, + "loss": 0.3408, + "step": 15514 + }, + { + "epoch": 0.3105872932462528, + "grad_norm": 1.090315580368042, + "learning_rate": 8.073661550188159e-06, + "loss": 0.2973, + "step": 15515 + }, + { + "epoch": 0.31060731176338113, + "grad_norm": 1.1209313869476318, + "learning_rate": 8.073405848981858e-06, + "loss": 0.3487, + "step": 15516 + }, + { + "epoch": 0.3106273302805095, + "grad_norm": 1.1557265520095825, + "learning_rate": 8.073150134855622e-06, + "loss": 0.3404, + "step": 15517 + }, + { + "epoch": 0.31064734879763783, + "grad_norm": 1.01973557472229, + "learning_rate": 8.072894407810523e-06, + "loss": 0.2625, + "step": 15518 + }, + { + "epoch": 0.3106673673147662, + "grad_norm": 1.126299500465393, + "learning_rate": 8.072638667847637e-06, + "loss": 0.3139, + "step": 15519 + }, + { + "epoch": 0.3106873858318945, + "grad_norm": 1.0481189489364624, + "learning_rate": 8.07238291496804e-06, + "loss": 0.2771, + "step": 15520 + }, + { + "epoch": 0.31070740434902283, + "grad_norm": 1.0103671550750732, + "learning_rate": 8.072127149172806e-06, + "loss": 0.3088, + "step": 15521 + }, + { + "epoch": 0.3107274228661512, + "grad_norm": 1.143117070198059, + "learning_rate": 8.071871370463011e-06, + "loss": 0.3178, + "step": 15522 + }, + { + "epoch": 0.31074744138327953, + "grad_norm": 1.1639984846115112, + "learning_rate": 8.07161557883973e-06, + "loss": 0.3328, + "step": 15523 + }, + { + "epoch": 0.3107674599004079, + "grad_norm": 1.0278252363204956, + "learning_rate": 8.071359774304037e-06, + "loss": 0.3465, + "step": 15524 + }, + { + "epoch": 0.31078747841753623, + "grad_norm": 1.2640053033828735, + "learning_rate": 8.071103956857008e-06, + "loss": 0.3536, + "step": 15525 + }, + { + "epoch": 0.3108074969346646, + "grad_norm": 1.0900068283081055, + "learning_rate": 8.070848126499719e-06, + "loss": 0.3145, + "step": 15526 + }, + { + "epoch": 0.31082751545179294, + "grad_norm": 1.8948763608932495, + "learning_rate": 8.070592283233247e-06, + "loss": 0.7948, + "step": 15527 + }, + { + "epoch": 0.31084753396892123, + "grad_norm": 1.473596215248108, + "learning_rate": 8.070336427058665e-06, + "loss": 0.3288, + "step": 15528 + }, + { + "epoch": 0.3108675524860496, + "grad_norm": 1.1071689128875732, + "learning_rate": 8.07008055797705e-06, + "loss": 0.3249, + "step": 15529 + }, + { + "epoch": 0.31088757100317793, + "grad_norm": 1.7752492427825928, + "learning_rate": 8.069824675989474e-06, + "loss": 0.7619, + "step": 15530 + }, + { + "epoch": 0.3109075895203063, + "grad_norm": 1.1475969552993774, + "learning_rate": 8.06956878109702e-06, + "loss": 0.3259, + "step": 15531 + }, + { + "epoch": 0.31092760803743463, + "grad_norm": 1.173184871673584, + "learning_rate": 8.069312873300757e-06, + "loss": 0.3259, + "step": 15532 + }, + { + "epoch": 0.310947626554563, + "grad_norm": 1.1805336475372314, + "learning_rate": 8.069056952601764e-06, + "loss": 0.3489, + "step": 15533 + }, + { + "epoch": 0.31096764507169133, + "grad_norm": 1.0674395561218262, + "learning_rate": 8.068801019001115e-06, + "loss": 0.2958, + "step": 15534 + }, + { + "epoch": 0.3109876635888197, + "grad_norm": 1.0987324714660645, + "learning_rate": 8.06854507249989e-06, + "loss": 0.312, + "step": 15535 + }, + { + "epoch": 0.311007682105948, + "grad_norm": 1.1175756454467773, + "learning_rate": 8.068289113099158e-06, + "loss": 0.3177, + "step": 15536 + }, + { + "epoch": 0.31102770062307633, + "grad_norm": 1.242931604385376, + "learning_rate": 8.0680331408e-06, + "loss": 0.3278, + "step": 15537 + }, + { + "epoch": 0.3110477191402047, + "grad_norm": 1.0326552391052246, + "learning_rate": 8.067777155603493e-06, + "loss": 0.308, + "step": 15538 + }, + { + "epoch": 0.31106773765733303, + "grad_norm": 1.1137994527816772, + "learning_rate": 8.067521157510709e-06, + "loss": 0.2946, + "step": 15539 + }, + { + "epoch": 0.3110877561744614, + "grad_norm": 1.02821946144104, + "learning_rate": 8.067265146522726e-06, + "loss": 0.2893, + "step": 15540 + }, + { + "epoch": 0.31110777469158973, + "grad_norm": 0.9823203682899475, + "learning_rate": 8.067009122640622e-06, + "loss": 0.3193, + "step": 15541 + }, + { + "epoch": 0.3111277932087181, + "grad_norm": 1.13719642162323, + "learning_rate": 8.06675308586547e-06, + "loss": 0.31, + "step": 15542 + }, + { + "epoch": 0.31114781172584643, + "grad_norm": 1.0853917598724365, + "learning_rate": 8.06649703619835e-06, + "loss": 0.3352, + "step": 15543 + }, + { + "epoch": 0.31116783024297473, + "grad_norm": 1.1899343729019165, + "learning_rate": 8.066240973640334e-06, + "loss": 0.3212, + "step": 15544 + }, + { + "epoch": 0.3111878487601031, + "grad_norm": 1.2770116329193115, + "learning_rate": 8.0659848981925e-06, + "loss": 0.3811, + "step": 15545 + }, + { + "epoch": 0.31120786727723143, + "grad_norm": 1.1089086532592773, + "learning_rate": 8.065728809855929e-06, + "loss": 0.3388, + "step": 15546 + }, + { + "epoch": 0.3112278857943598, + "grad_norm": 1.1384453773498535, + "learning_rate": 8.065472708631692e-06, + "loss": 0.3189, + "step": 15547 + }, + { + "epoch": 0.31124790431148813, + "grad_norm": 1.0740376710891724, + "learning_rate": 8.065216594520866e-06, + "loss": 0.321, + "step": 15548 + }, + { + "epoch": 0.3112679228286165, + "grad_norm": 1.2389968633651733, + "learning_rate": 8.064960467524532e-06, + "loss": 0.3807, + "step": 15549 + }, + { + "epoch": 0.31128794134574483, + "grad_norm": 0.9863143563270569, + "learning_rate": 8.064704327643762e-06, + "loss": 0.2878, + "step": 15550 + }, + { + "epoch": 0.3113079598628732, + "grad_norm": 1.0954824686050415, + "learning_rate": 8.064448174879635e-06, + "loss": 0.3026, + "step": 15551 + }, + { + "epoch": 0.3113279783800015, + "grad_norm": 1.2275148630142212, + "learning_rate": 8.064192009233228e-06, + "loss": 0.2907, + "step": 15552 + }, + { + "epoch": 0.31134799689712983, + "grad_norm": 1.1235240697860718, + "learning_rate": 8.063935830705615e-06, + "loss": 0.3527, + "step": 15553 + }, + { + "epoch": 0.3113680154142582, + "grad_norm": 1.2883719205856323, + "learning_rate": 8.063679639297876e-06, + "loss": 0.3392, + "step": 15554 + }, + { + "epoch": 0.31138803393138653, + "grad_norm": 1.1301016807556152, + "learning_rate": 8.063423435011088e-06, + "loss": 0.348, + "step": 15555 + }, + { + "epoch": 0.3114080524485149, + "grad_norm": 1.10042142868042, + "learning_rate": 8.063167217846326e-06, + "loss": 0.3397, + "step": 15556 + }, + { + "epoch": 0.31142807096564323, + "grad_norm": 1.088335394859314, + "learning_rate": 8.062910987804669e-06, + "loss": 0.358, + "step": 15557 + }, + { + "epoch": 0.3114480894827716, + "grad_norm": 1.127225637435913, + "learning_rate": 8.062654744887193e-06, + "loss": 0.3392, + "step": 15558 + }, + { + "epoch": 0.31146810799989993, + "grad_norm": 1.0706309080123901, + "learning_rate": 8.062398489094976e-06, + "loss": 0.3125, + "step": 15559 + }, + { + "epoch": 0.31148812651702823, + "grad_norm": 1.0494557619094849, + "learning_rate": 8.062142220429095e-06, + "loss": 0.3063, + "step": 15560 + }, + { + "epoch": 0.3115081450341566, + "grad_norm": 1.164910078048706, + "learning_rate": 8.061885938890628e-06, + "loss": 0.3492, + "step": 15561 + }, + { + "epoch": 0.31152816355128493, + "grad_norm": 1.0703861713409424, + "learning_rate": 8.06162964448065e-06, + "loss": 0.3072, + "step": 15562 + }, + { + "epoch": 0.3115481820684133, + "grad_norm": 1.0987967252731323, + "learning_rate": 8.061373337200238e-06, + "loss": 0.3606, + "step": 15563 + }, + { + "epoch": 0.31156820058554163, + "grad_norm": 1.247041940689087, + "learning_rate": 8.061117017050475e-06, + "loss": 0.279, + "step": 15564 + }, + { + "epoch": 0.31158821910267, + "grad_norm": 1.2705273628234863, + "learning_rate": 8.060860684032432e-06, + "loss": 0.3062, + "step": 15565 + }, + { + "epoch": 0.31160823761979833, + "grad_norm": 1.1457998752593994, + "learning_rate": 8.06060433814719e-06, + "loss": 0.3331, + "step": 15566 + }, + { + "epoch": 0.3116282561369267, + "grad_norm": 1.1847846508026123, + "learning_rate": 8.060347979395828e-06, + "loss": 0.3119, + "step": 15567 + }, + { + "epoch": 0.311648274654055, + "grad_norm": 1.0958566665649414, + "learning_rate": 8.06009160777942e-06, + "loss": 0.3398, + "step": 15568 + }, + { + "epoch": 0.31166829317118333, + "grad_norm": 1.0121901035308838, + "learning_rate": 8.059835223299047e-06, + "loss": 0.3126, + "step": 15569 + }, + { + "epoch": 0.3116883116883117, + "grad_norm": 1.1481376886367798, + "learning_rate": 8.059578825955782e-06, + "loss": 0.3323, + "step": 15570 + }, + { + "epoch": 0.31170833020544003, + "grad_norm": 1.0551773309707642, + "learning_rate": 8.059322415750709e-06, + "loss": 0.3089, + "step": 15571 + }, + { + "epoch": 0.3117283487225684, + "grad_norm": 1.0234023332595825, + "learning_rate": 8.0590659926849e-06, + "loss": 0.3036, + "step": 15572 + }, + { + "epoch": 0.31174836723969673, + "grad_norm": 1.0738961696624756, + "learning_rate": 8.05880955675944e-06, + "loss": 0.3382, + "step": 15573 + }, + { + "epoch": 0.3117683857568251, + "grad_norm": 1.053776741027832, + "learning_rate": 8.0585531079754e-06, + "loss": 0.3449, + "step": 15574 + }, + { + "epoch": 0.31178840427395343, + "grad_norm": 1.218919038772583, + "learning_rate": 8.05829664633386e-06, + "loss": 0.3751, + "step": 15575 + }, + { + "epoch": 0.31180842279108173, + "grad_norm": 1.1521327495574951, + "learning_rate": 8.058040171835901e-06, + "loss": 0.2992, + "step": 15576 + }, + { + "epoch": 0.3118284413082101, + "grad_norm": 1.2032290697097778, + "learning_rate": 8.057783684482601e-06, + "loss": 0.3173, + "step": 15577 + }, + { + "epoch": 0.31184845982533843, + "grad_norm": 1.1117981672286987, + "learning_rate": 8.057527184275034e-06, + "loss": 0.3462, + "step": 15578 + }, + { + "epoch": 0.3118684783424668, + "grad_norm": 1.0145788192749023, + "learning_rate": 8.05727067121428e-06, + "loss": 0.321, + "step": 15579 + }, + { + "epoch": 0.31188849685959513, + "grad_norm": 1.1814663410186768, + "learning_rate": 8.05701414530142e-06, + "loss": 0.3803, + "step": 15580 + }, + { + "epoch": 0.3119085153767235, + "grad_norm": 1.8632586002349854, + "learning_rate": 8.056757606537528e-06, + "loss": 0.804, + "step": 15581 + }, + { + "epoch": 0.31192853389385183, + "grad_norm": 1.1638599634170532, + "learning_rate": 8.056501054923687e-06, + "loss": 0.3139, + "step": 15582 + }, + { + "epoch": 0.3119485524109802, + "grad_norm": 1.2123302221298218, + "learning_rate": 8.056244490460972e-06, + "loss": 0.3397, + "step": 15583 + }, + { + "epoch": 0.3119685709281085, + "grad_norm": 1.0324643850326538, + "learning_rate": 8.055987913150464e-06, + "loss": 0.2811, + "step": 15584 + }, + { + "epoch": 0.31198858944523683, + "grad_norm": 1.0772039890289307, + "learning_rate": 8.055731322993239e-06, + "loss": 0.2996, + "step": 15585 + }, + { + "epoch": 0.3120086079623652, + "grad_norm": 1.0735371112823486, + "learning_rate": 8.055474719990379e-06, + "loss": 0.3371, + "step": 15586 + }, + { + "epoch": 0.31202862647949353, + "grad_norm": 1.1123563051223755, + "learning_rate": 8.05521810414296e-06, + "loss": 0.2629, + "step": 15587 + }, + { + "epoch": 0.3120486449966219, + "grad_norm": 1.0552252531051636, + "learning_rate": 8.054961475452062e-06, + "loss": 0.3011, + "step": 15588 + }, + { + "epoch": 0.31206866351375023, + "grad_norm": 2.014307737350464, + "learning_rate": 8.054704833918763e-06, + "loss": 0.8192, + "step": 15589 + }, + { + "epoch": 0.3120886820308786, + "grad_norm": 1.207537055015564, + "learning_rate": 8.05444817954414e-06, + "loss": 0.2855, + "step": 15590 + }, + { + "epoch": 0.3121087005480069, + "grad_norm": 1.0965988636016846, + "learning_rate": 8.054191512329276e-06, + "loss": 0.3363, + "step": 15591 + }, + { + "epoch": 0.31212871906513523, + "grad_norm": 1.12181556224823, + "learning_rate": 8.05393483227525e-06, + "loss": 0.3432, + "step": 15592 + }, + { + "epoch": 0.3121487375822636, + "grad_norm": 1.1431176662445068, + "learning_rate": 8.053678139383138e-06, + "loss": 0.3203, + "step": 15593 + }, + { + "epoch": 0.31216875609939193, + "grad_norm": 1.1371500492095947, + "learning_rate": 8.053421433654019e-06, + "loss": 0.3665, + "step": 15594 + }, + { + "epoch": 0.3121887746165203, + "grad_norm": 1.1302365064620972, + "learning_rate": 8.053164715088974e-06, + "loss": 0.3476, + "step": 15595 + }, + { + "epoch": 0.31220879313364863, + "grad_norm": 1.0446758270263672, + "learning_rate": 8.052907983689082e-06, + "loss": 0.3347, + "step": 15596 + }, + { + "epoch": 0.312228811650777, + "grad_norm": 1.9681047201156616, + "learning_rate": 8.052651239455422e-06, + "loss": 0.9005, + "step": 15597 + }, + { + "epoch": 0.31224883016790533, + "grad_norm": 1.2107620239257812, + "learning_rate": 8.052394482389071e-06, + "loss": 0.3497, + "step": 15598 + }, + { + "epoch": 0.31226884868503363, + "grad_norm": 1.0108243227005005, + "learning_rate": 8.052137712491115e-06, + "loss": 0.2862, + "step": 15599 + }, + { + "epoch": 0.312288867202162, + "grad_norm": 1.1710752248764038, + "learning_rate": 8.051880929762625e-06, + "loss": 0.341, + "step": 15600 + }, + { + "epoch": 0.31230888571929033, + "grad_norm": 1.1755807399749756, + "learning_rate": 8.051624134204684e-06, + "loss": 0.2808, + "step": 15601 + }, + { + "epoch": 0.3123289042364187, + "grad_norm": 1.1380386352539062, + "learning_rate": 8.051367325818376e-06, + "loss": 0.3326, + "step": 15602 + }, + { + "epoch": 0.31234892275354703, + "grad_norm": 1.0612947940826416, + "learning_rate": 8.051110504604773e-06, + "loss": 0.3147, + "step": 15603 + }, + { + "epoch": 0.3123689412706754, + "grad_norm": 1.1915894746780396, + "learning_rate": 8.050853670564958e-06, + "loss": 0.3253, + "step": 15604 + }, + { + "epoch": 0.31238895978780373, + "grad_norm": 1.9729158878326416, + "learning_rate": 8.050596823700011e-06, + "loss": 0.7844, + "step": 15605 + }, + { + "epoch": 0.3124089783049321, + "grad_norm": 1.087348461151123, + "learning_rate": 8.050339964011014e-06, + "loss": 0.3036, + "step": 15606 + }, + { + "epoch": 0.3124289968220604, + "grad_norm": 0.99129319190979, + "learning_rate": 8.050083091499041e-06, + "loss": 0.2849, + "step": 15607 + }, + { + "epoch": 0.31244901533918873, + "grad_norm": 1.0322099924087524, + "learning_rate": 8.049826206165176e-06, + "loss": 0.3395, + "step": 15608 + }, + { + "epoch": 0.3124690338563171, + "grad_norm": 1.0499634742736816, + "learning_rate": 8.049569308010499e-06, + "loss": 0.2715, + "step": 15609 + }, + { + "epoch": 0.31248905237344543, + "grad_norm": 1.1836693286895752, + "learning_rate": 8.049312397036088e-06, + "loss": 0.3463, + "step": 15610 + }, + { + "epoch": 0.3125090708905738, + "grad_norm": 1.1681461334228516, + "learning_rate": 8.049055473243024e-06, + "loss": 0.3066, + "step": 15611 + }, + { + "epoch": 0.31252908940770213, + "grad_norm": 1.086311936378479, + "learning_rate": 8.048798536632387e-06, + "loss": 0.3457, + "step": 15612 + }, + { + "epoch": 0.3125491079248305, + "grad_norm": 1.2309616804122925, + "learning_rate": 8.048541587205257e-06, + "loss": 0.3366, + "step": 15613 + }, + { + "epoch": 0.31256912644195883, + "grad_norm": 1.2120205163955688, + "learning_rate": 8.048284624962715e-06, + "loss": 0.3497, + "step": 15614 + }, + { + "epoch": 0.31258914495908713, + "grad_norm": 1.8778371810913086, + "learning_rate": 8.04802764990584e-06, + "loss": 0.7875, + "step": 15615 + }, + { + "epoch": 0.3126091634762155, + "grad_norm": 1.1010512113571167, + "learning_rate": 8.047770662035713e-06, + "loss": 0.3739, + "step": 15616 + }, + { + "epoch": 0.31262918199334383, + "grad_norm": 1.811161994934082, + "learning_rate": 8.047513661353413e-06, + "loss": 0.8696, + "step": 15617 + }, + { + "epoch": 0.3126492005104722, + "grad_norm": 1.1537748575210571, + "learning_rate": 8.04725664786002e-06, + "loss": 0.3231, + "step": 15618 + }, + { + "epoch": 0.31266921902760053, + "grad_norm": 1.1887705326080322, + "learning_rate": 8.046999621556619e-06, + "loss": 0.3381, + "step": 15619 + }, + { + "epoch": 0.3126892375447289, + "grad_norm": 1.1950410604476929, + "learning_rate": 8.046742582444285e-06, + "loss": 0.2891, + "step": 15620 + }, + { + "epoch": 0.31270925606185723, + "grad_norm": 1.0487631559371948, + "learning_rate": 8.046485530524103e-06, + "loss": 0.3113, + "step": 15621 + }, + { + "epoch": 0.3127292745789856, + "grad_norm": 1.245863914489746, + "learning_rate": 8.04622846579715e-06, + "loss": 0.3388, + "step": 15622 + }, + { + "epoch": 0.3127492930961139, + "grad_norm": 1.0595300197601318, + "learning_rate": 8.045971388264509e-06, + "loss": 0.3103, + "step": 15623 + }, + { + "epoch": 0.31276931161324223, + "grad_norm": 1.2012207508087158, + "learning_rate": 8.04571429792726e-06, + "loss": 0.3401, + "step": 15624 + }, + { + "epoch": 0.3127893301303706, + "grad_norm": 1.13100004196167, + "learning_rate": 8.045457194786482e-06, + "loss": 0.3584, + "step": 15625 + }, + { + "epoch": 0.31280934864749893, + "grad_norm": 1.0507965087890625, + "learning_rate": 8.045200078843257e-06, + "loss": 0.3623, + "step": 15626 + }, + { + "epoch": 0.3128293671646273, + "grad_norm": 1.0584055185317993, + "learning_rate": 8.044942950098666e-06, + "loss": 0.3229, + "step": 15627 + }, + { + "epoch": 0.31284938568175563, + "grad_norm": 1.0293183326721191, + "learning_rate": 8.044685808553794e-06, + "loss": 0.3431, + "step": 15628 + }, + { + "epoch": 0.312869404198884, + "grad_norm": 1.0247926712036133, + "learning_rate": 8.044428654209716e-06, + "loss": 0.3096, + "step": 15629 + }, + { + "epoch": 0.31288942271601233, + "grad_norm": 1.2981622219085693, + "learning_rate": 8.044171487067513e-06, + "loss": 0.2935, + "step": 15630 + }, + { + "epoch": 0.31290944123314063, + "grad_norm": 1.04010808467865, + "learning_rate": 8.04391430712827e-06, + "loss": 0.2904, + "step": 15631 + }, + { + "epoch": 0.312929459750269, + "grad_norm": 1.1170920133590698, + "learning_rate": 8.043657114393066e-06, + "loss": 0.3432, + "step": 15632 + }, + { + "epoch": 0.31294947826739733, + "grad_norm": 0.9439957737922668, + "learning_rate": 8.043399908862983e-06, + "loss": 0.2825, + "step": 15633 + }, + { + "epoch": 0.3129694967845257, + "grad_norm": 1.0906403064727783, + "learning_rate": 8.043142690539102e-06, + "loss": 0.3285, + "step": 15634 + }, + { + "epoch": 0.31298951530165403, + "grad_norm": 1.2679654359817505, + "learning_rate": 8.042885459422503e-06, + "loss": 0.3152, + "step": 15635 + }, + { + "epoch": 0.3130095338187824, + "grad_norm": 1.2004622220993042, + "learning_rate": 8.042628215514267e-06, + "loss": 0.306, + "step": 15636 + }, + { + "epoch": 0.31302955233591073, + "grad_norm": 1.2060807943344116, + "learning_rate": 8.04237095881548e-06, + "loss": 0.3532, + "step": 15637 + }, + { + "epoch": 0.3130495708530391, + "grad_norm": 1.181524395942688, + "learning_rate": 8.042113689327218e-06, + "loss": 0.326, + "step": 15638 + }, + { + "epoch": 0.3130695893701674, + "grad_norm": 1.06272554397583, + "learning_rate": 8.041856407050566e-06, + "loss": 0.3045, + "step": 15639 + }, + { + "epoch": 0.31308960788729573, + "grad_norm": 1.1557509899139404, + "learning_rate": 8.041599111986602e-06, + "loss": 0.3494, + "step": 15640 + }, + { + "epoch": 0.3131096264044241, + "grad_norm": 1.320471167564392, + "learning_rate": 8.041341804136413e-06, + "loss": 0.3074, + "step": 15641 + }, + { + "epoch": 0.31312964492155243, + "grad_norm": 1.1629337072372437, + "learning_rate": 8.041084483501076e-06, + "loss": 0.3044, + "step": 15642 + }, + { + "epoch": 0.3131496634386808, + "grad_norm": 1.0787512063980103, + "learning_rate": 8.040827150081673e-06, + "loss": 0.3483, + "step": 15643 + }, + { + "epoch": 0.31316968195580913, + "grad_norm": 1.1974799633026123, + "learning_rate": 8.040569803879291e-06, + "loss": 0.3445, + "step": 15644 + }, + { + "epoch": 0.3131897004729375, + "grad_norm": 1.162218689918518, + "learning_rate": 8.040312444895004e-06, + "loss": 0.3616, + "step": 15645 + }, + { + "epoch": 0.31320971899006583, + "grad_norm": 1.1499762535095215, + "learning_rate": 8.040055073129899e-06, + "loss": 0.3271, + "step": 15646 + }, + { + "epoch": 0.31322973750719413, + "grad_norm": 1.0550448894500732, + "learning_rate": 8.039797688585056e-06, + "loss": 0.3223, + "step": 15647 + }, + { + "epoch": 0.3132497560243225, + "grad_norm": 1.1013814210891724, + "learning_rate": 8.039540291261558e-06, + "loss": 0.2931, + "step": 15648 + }, + { + "epoch": 0.31326977454145083, + "grad_norm": 0.9898095726966858, + "learning_rate": 8.039282881160488e-06, + "loss": 0.3297, + "step": 15649 + }, + { + "epoch": 0.3132897930585792, + "grad_norm": 1.0698925256729126, + "learning_rate": 8.039025458282925e-06, + "loss": 0.3049, + "step": 15650 + }, + { + "epoch": 0.31330981157570753, + "grad_norm": 1.0965434312820435, + "learning_rate": 8.038768022629955e-06, + "loss": 0.3642, + "step": 15651 + }, + { + "epoch": 0.3133298300928359, + "grad_norm": 1.1859889030456543, + "learning_rate": 8.038510574202658e-06, + "loss": 0.3375, + "step": 15652 + }, + { + "epoch": 0.31334984860996423, + "grad_norm": 1.1805647611618042, + "learning_rate": 8.038253113002115e-06, + "loss": 0.3308, + "step": 15653 + }, + { + "epoch": 0.3133698671270926, + "grad_norm": 1.054650068283081, + "learning_rate": 8.03799563902941e-06, + "loss": 0.3141, + "step": 15654 + }, + { + "epoch": 0.3133898856442209, + "grad_norm": 1.1715754270553589, + "learning_rate": 8.037738152285626e-06, + "loss": 0.3119, + "step": 15655 + }, + { + "epoch": 0.31340990416134923, + "grad_norm": 1.0163367986679077, + "learning_rate": 8.037480652771844e-06, + "loss": 0.344, + "step": 15656 + }, + { + "epoch": 0.3134299226784776, + "grad_norm": 1.0841970443725586, + "learning_rate": 8.037223140489147e-06, + "loss": 0.3888, + "step": 15657 + }, + { + "epoch": 0.31344994119560593, + "grad_norm": 1.1270426511764526, + "learning_rate": 8.036965615438617e-06, + "loss": 0.3274, + "step": 15658 + }, + { + "epoch": 0.3134699597127343, + "grad_norm": 1.1162132024765015, + "learning_rate": 8.036708077621338e-06, + "loss": 0.3236, + "step": 15659 + }, + { + "epoch": 0.31348997822986263, + "grad_norm": 1.0272904634475708, + "learning_rate": 8.03645052703839e-06, + "loss": 0.3371, + "step": 15660 + }, + { + "epoch": 0.313509996746991, + "grad_norm": 1.087588906288147, + "learning_rate": 8.036192963690859e-06, + "loss": 0.3322, + "step": 15661 + }, + { + "epoch": 0.31353001526411933, + "grad_norm": 1.094014286994934, + "learning_rate": 8.035935387579828e-06, + "loss": 0.3434, + "step": 15662 + }, + { + "epoch": 0.31355003378124763, + "grad_norm": 1.111309289932251, + "learning_rate": 8.035677798706376e-06, + "loss": 0.3203, + "step": 15663 + }, + { + "epoch": 0.313570052298376, + "grad_norm": 1.2434228658676147, + "learning_rate": 8.035420197071587e-06, + "loss": 0.3354, + "step": 15664 + }, + { + "epoch": 0.31359007081550433, + "grad_norm": 1.046966791152954, + "learning_rate": 8.035162582676546e-06, + "loss": 0.3119, + "step": 15665 + }, + { + "epoch": 0.3136100893326327, + "grad_norm": 1.0154142379760742, + "learning_rate": 8.034904955522334e-06, + "loss": 0.2888, + "step": 15666 + }, + { + "epoch": 0.31363010784976103, + "grad_norm": 1.099287986755371, + "learning_rate": 8.034647315610034e-06, + "loss": 0.3233, + "step": 15667 + }, + { + "epoch": 0.3136501263668894, + "grad_norm": 1.1309412717819214, + "learning_rate": 8.034389662940731e-06, + "loss": 0.3598, + "step": 15668 + }, + { + "epoch": 0.31367014488401773, + "grad_norm": 1.1959844827651978, + "learning_rate": 8.034131997515506e-06, + "loss": 0.3461, + "step": 15669 + }, + { + "epoch": 0.3136901634011461, + "grad_norm": 1.1062365770339966, + "learning_rate": 8.033874319335445e-06, + "loss": 0.2609, + "step": 15670 + }, + { + "epoch": 0.3137101819182744, + "grad_norm": 1.1354551315307617, + "learning_rate": 8.033616628401627e-06, + "loss": 0.3228, + "step": 15671 + }, + { + "epoch": 0.31373020043540273, + "grad_norm": 1.1578699350357056, + "learning_rate": 8.033358924715139e-06, + "loss": 0.3702, + "step": 15672 + }, + { + "epoch": 0.3137502189525311, + "grad_norm": 1.1751383543014526, + "learning_rate": 8.033101208277062e-06, + "loss": 0.3435, + "step": 15673 + }, + { + "epoch": 0.31377023746965943, + "grad_norm": 1.2406147718429565, + "learning_rate": 8.03284347908848e-06, + "loss": 0.2996, + "step": 15674 + }, + { + "epoch": 0.3137902559867878, + "grad_norm": 1.1676526069641113, + "learning_rate": 8.032585737150477e-06, + "loss": 0.3541, + "step": 15675 + }, + { + "epoch": 0.31381027450391613, + "grad_norm": 1.0954184532165527, + "learning_rate": 8.032327982464137e-06, + "loss": 0.3298, + "step": 15676 + }, + { + "epoch": 0.3138302930210445, + "grad_norm": 1.115667700767517, + "learning_rate": 8.032070215030542e-06, + "loss": 0.3312, + "step": 15677 + }, + { + "epoch": 0.31385031153817283, + "grad_norm": 1.004814863204956, + "learning_rate": 8.031812434850777e-06, + "loss": 0.259, + "step": 15678 + }, + { + "epoch": 0.3138703300553011, + "grad_norm": 2.082362174987793, + "learning_rate": 8.031554641925923e-06, + "loss": 0.8612, + "step": 15679 + }, + { + "epoch": 0.3138903485724295, + "grad_norm": 1.2021313905715942, + "learning_rate": 8.031296836257068e-06, + "loss": 0.3731, + "step": 15680 + }, + { + "epoch": 0.31391036708955783, + "grad_norm": 1.8465549945831299, + "learning_rate": 8.031039017845292e-06, + "loss": 0.8002, + "step": 15681 + }, + { + "epoch": 0.3139303856066862, + "grad_norm": 1.0496525764465332, + "learning_rate": 8.03078118669168e-06, + "loss": 0.3298, + "step": 15682 + }, + { + "epoch": 0.31395040412381453, + "grad_norm": 1.1777002811431885, + "learning_rate": 8.030523342797319e-06, + "loss": 0.3131, + "step": 15683 + }, + { + "epoch": 0.3139704226409429, + "grad_norm": 1.0345282554626465, + "learning_rate": 8.030265486163287e-06, + "loss": 0.2878, + "step": 15684 + }, + { + "epoch": 0.31399044115807123, + "grad_norm": 0.9855992197990417, + "learning_rate": 8.030007616790671e-06, + "loss": 0.3177, + "step": 15685 + }, + { + "epoch": 0.3140104596751996, + "grad_norm": 1.1034196615219116, + "learning_rate": 8.029749734680557e-06, + "loss": 0.2943, + "step": 15686 + }, + { + "epoch": 0.3140304781923279, + "grad_norm": 1.9076626300811768, + "learning_rate": 8.029491839834025e-06, + "loss": 0.7607, + "step": 15687 + }, + { + "epoch": 0.31405049670945623, + "grad_norm": 0.9840494990348816, + "learning_rate": 8.029233932252163e-06, + "loss": 0.3023, + "step": 15688 + }, + { + "epoch": 0.3140705152265846, + "grad_norm": 1.2594090700149536, + "learning_rate": 8.028976011936053e-06, + "loss": 0.3655, + "step": 15689 + }, + { + "epoch": 0.31409053374371293, + "grad_norm": 1.1295222043991089, + "learning_rate": 8.02871807888678e-06, + "loss": 0.3205, + "step": 15690 + }, + { + "epoch": 0.3141105522608413, + "grad_norm": 1.0929162502288818, + "learning_rate": 8.028460133105427e-06, + "loss": 0.3359, + "step": 15691 + }, + { + "epoch": 0.31413057077796963, + "grad_norm": 1.0005232095718384, + "learning_rate": 8.02820217459308e-06, + "loss": 0.3196, + "step": 15692 + }, + { + "epoch": 0.314150589295098, + "grad_norm": 1.2098878622055054, + "learning_rate": 8.027944203350822e-06, + "loss": 0.3287, + "step": 15693 + }, + { + "epoch": 0.31417060781222633, + "grad_norm": 1.2848397493362427, + "learning_rate": 8.027686219379739e-06, + "loss": 0.3674, + "step": 15694 + }, + { + "epoch": 0.3141906263293546, + "grad_norm": 1.11765456199646, + "learning_rate": 8.027428222680915e-06, + "loss": 0.2912, + "step": 15695 + }, + { + "epoch": 0.314210644846483, + "grad_norm": 1.1628978252410889, + "learning_rate": 8.027170213255433e-06, + "loss": 0.3803, + "step": 15696 + }, + { + "epoch": 0.31423066336361133, + "grad_norm": 1.2982254028320312, + "learning_rate": 8.02691219110438e-06, + "loss": 0.3327, + "step": 15697 + }, + { + "epoch": 0.3142506818807397, + "grad_norm": 1.1611967086791992, + "learning_rate": 8.026654156228839e-06, + "loss": 0.3142, + "step": 15698 + }, + { + "epoch": 0.31427070039786803, + "grad_norm": 1.2215628623962402, + "learning_rate": 8.026396108629896e-06, + "loss": 0.3199, + "step": 15699 + }, + { + "epoch": 0.3142907189149964, + "grad_norm": 0.9614350199699402, + "learning_rate": 8.026138048308633e-06, + "loss": 0.3247, + "step": 15700 + }, + { + "epoch": 0.31431073743212473, + "grad_norm": 0.9627054333686829, + "learning_rate": 8.025879975266139e-06, + "loss": 0.2931, + "step": 15701 + }, + { + "epoch": 0.3143307559492531, + "grad_norm": 1.2638790607452393, + "learning_rate": 8.025621889503497e-06, + "loss": 0.2777, + "step": 15702 + }, + { + "epoch": 0.3143507744663814, + "grad_norm": 1.1828669309616089, + "learning_rate": 8.025363791021789e-06, + "loss": 0.3503, + "step": 15703 + }, + { + "epoch": 0.31437079298350973, + "grad_norm": 1.098024845123291, + "learning_rate": 8.025105679822107e-06, + "loss": 0.273, + "step": 15704 + }, + { + "epoch": 0.3143908115006381, + "grad_norm": 1.1090959310531616, + "learning_rate": 8.024847555905528e-06, + "loss": 0.3704, + "step": 15705 + }, + { + "epoch": 0.31441083001776643, + "grad_norm": 1.073093295097351, + "learning_rate": 8.024589419273141e-06, + "loss": 0.3599, + "step": 15706 + }, + { + "epoch": 0.3144308485348948, + "grad_norm": 1.0968550443649292, + "learning_rate": 8.024331269926033e-06, + "loss": 0.3408, + "step": 15707 + }, + { + "epoch": 0.31445086705202313, + "grad_norm": 1.1276482343673706, + "learning_rate": 8.024073107865286e-06, + "loss": 0.3099, + "step": 15708 + }, + { + "epoch": 0.3144708855691515, + "grad_norm": 1.1036436557769775, + "learning_rate": 8.023814933091987e-06, + "loss": 0.2975, + "step": 15709 + }, + { + "epoch": 0.31449090408627983, + "grad_norm": 1.1366468667984009, + "learning_rate": 8.02355674560722e-06, + "loss": 0.257, + "step": 15710 + }, + { + "epoch": 0.3145109226034081, + "grad_norm": 1.7914388179779053, + "learning_rate": 8.02329854541207e-06, + "loss": 0.8605, + "step": 15711 + }, + { + "epoch": 0.3145309411205365, + "grad_norm": 1.2225176095962524, + "learning_rate": 8.023040332507627e-06, + "loss": 0.3318, + "step": 15712 + }, + { + "epoch": 0.31455095963766483, + "grad_norm": 1.1552369594573975, + "learning_rate": 8.022782106894971e-06, + "loss": 0.3471, + "step": 15713 + }, + { + "epoch": 0.3145709781547932, + "grad_norm": 1.0867440700531006, + "learning_rate": 8.02252386857519e-06, + "loss": 0.2868, + "step": 15714 + }, + { + "epoch": 0.31459099667192153, + "grad_norm": 1.0487263202667236, + "learning_rate": 8.022265617549367e-06, + "loss": 0.2771, + "step": 15715 + }, + { + "epoch": 0.3146110151890499, + "grad_norm": 1.0115684270858765, + "learning_rate": 8.022007353818592e-06, + "loss": 0.3081, + "step": 15716 + }, + { + "epoch": 0.31463103370617823, + "grad_norm": 1.9023159742355347, + "learning_rate": 8.02174907738395e-06, + "loss": 0.8255, + "step": 15717 + }, + { + "epoch": 0.3146510522233066, + "grad_norm": 1.9991940259933472, + "learning_rate": 8.021490788246522e-06, + "loss": 0.8366, + "step": 15718 + }, + { + "epoch": 0.3146710707404349, + "grad_norm": 1.1342860460281372, + "learning_rate": 8.021232486407397e-06, + "loss": 0.2954, + "step": 15719 + }, + { + "epoch": 0.31469108925756323, + "grad_norm": 1.0752241611480713, + "learning_rate": 8.020974171867661e-06, + "loss": 0.3264, + "step": 15720 + }, + { + "epoch": 0.3147111077746916, + "grad_norm": 1.1524704694747925, + "learning_rate": 8.020715844628402e-06, + "loss": 0.3391, + "step": 15721 + }, + { + "epoch": 0.31473112629181993, + "grad_norm": 0.9620039463043213, + "learning_rate": 8.020457504690702e-06, + "loss": 0.3085, + "step": 15722 + }, + { + "epoch": 0.3147511448089483, + "grad_norm": 1.1481143236160278, + "learning_rate": 8.020199152055647e-06, + "loss": 0.3739, + "step": 15723 + }, + { + "epoch": 0.31477116332607663, + "grad_norm": 0.9750120639801025, + "learning_rate": 8.019940786724327e-06, + "loss": 0.3141, + "step": 15724 + }, + { + "epoch": 0.314791181843205, + "grad_norm": 1.1561002731323242, + "learning_rate": 8.019682408697827e-06, + "loss": 0.3075, + "step": 15725 + }, + { + "epoch": 0.31481120036033333, + "grad_norm": 1.0756703615188599, + "learning_rate": 8.01942401797723e-06, + "loss": 0.3184, + "step": 15726 + }, + { + "epoch": 0.3148312188774616, + "grad_norm": 1.1717413663864136, + "learning_rate": 8.019165614563624e-06, + "loss": 0.333, + "step": 15727 + }, + { + "epoch": 0.31485123739459, + "grad_norm": 0.9970999956130981, + "learning_rate": 8.018907198458096e-06, + "loss": 0.3155, + "step": 15728 + }, + { + "epoch": 0.31487125591171833, + "grad_norm": 1.0899558067321777, + "learning_rate": 8.018648769661732e-06, + "loss": 0.3424, + "step": 15729 + }, + { + "epoch": 0.3148912744288467, + "grad_norm": 1.0257127285003662, + "learning_rate": 8.018390328175618e-06, + "loss": 0.3433, + "step": 15730 + }, + { + "epoch": 0.31491129294597503, + "grad_norm": 1.2286878824234009, + "learning_rate": 8.01813187400084e-06, + "loss": 0.3632, + "step": 15731 + }, + { + "epoch": 0.3149313114631034, + "grad_norm": 1.3243751525878906, + "learning_rate": 8.017873407138486e-06, + "loss": 0.3337, + "step": 15732 + }, + { + "epoch": 0.31495132998023173, + "grad_norm": 0.9809083938598633, + "learning_rate": 8.017614927589642e-06, + "loss": 0.3086, + "step": 15733 + }, + { + "epoch": 0.3149713484973601, + "grad_norm": 1.1247329711914062, + "learning_rate": 8.017356435355394e-06, + "loss": 0.3561, + "step": 15734 + }, + { + "epoch": 0.3149913670144884, + "grad_norm": 1.0849250555038452, + "learning_rate": 8.017097930436827e-06, + "loss": 0.2634, + "step": 15735 + }, + { + "epoch": 0.3150113855316167, + "grad_norm": 1.3560513257980347, + "learning_rate": 8.016839412835032e-06, + "loss": 0.3104, + "step": 15736 + }, + { + "epoch": 0.3150314040487451, + "grad_norm": 1.9807308912277222, + "learning_rate": 8.016580882551094e-06, + "loss": 0.8035, + "step": 15737 + }, + { + "epoch": 0.31505142256587343, + "grad_norm": 1.076993703842163, + "learning_rate": 8.016322339586096e-06, + "loss": 0.3445, + "step": 15738 + }, + { + "epoch": 0.3150714410830018, + "grad_norm": 1.0131973028182983, + "learning_rate": 8.01606378394113e-06, + "loss": 0.2841, + "step": 15739 + }, + { + "epoch": 0.31509145960013013, + "grad_norm": 1.0925086736679077, + "learning_rate": 8.01580521561728e-06, + "loss": 0.2926, + "step": 15740 + }, + { + "epoch": 0.3151114781172585, + "grad_norm": 1.0818530321121216, + "learning_rate": 8.015546634615636e-06, + "loss": 0.3008, + "step": 15741 + }, + { + "epoch": 0.31513149663438683, + "grad_norm": 1.0820523500442505, + "learning_rate": 8.015288040937282e-06, + "loss": 0.3355, + "step": 15742 + }, + { + "epoch": 0.3151515151515151, + "grad_norm": 1.0438135862350464, + "learning_rate": 8.015029434583304e-06, + "loss": 0.3243, + "step": 15743 + }, + { + "epoch": 0.3151715336686435, + "grad_norm": 1.1686077117919922, + "learning_rate": 8.014770815554793e-06, + "loss": 0.3352, + "step": 15744 + }, + { + "epoch": 0.31519155218577183, + "grad_norm": 1.131981611251831, + "learning_rate": 8.014512183852833e-06, + "loss": 0.3262, + "step": 15745 + }, + { + "epoch": 0.3152115707029002, + "grad_norm": 1.0970282554626465, + "learning_rate": 8.014253539478513e-06, + "loss": 0.3044, + "step": 15746 + }, + { + "epoch": 0.31523158922002853, + "grad_norm": 1.9019279479980469, + "learning_rate": 8.01399488243292e-06, + "loss": 0.818, + "step": 15747 + }, + { + "epoch": 0.3152516077371569, + "grad_norm": 1.1281988620758057, + "learning_rate": 8.013736212717142e-06, + "loss": 0.3229, + "step": 15748 + }, + { + "epoch": 0.31527162625428523, + "grad_norm": 2.102296829223633, + "learning_rate": 8.013477530332264e-06, + "loss": 0.8722, + "step": 15749 + }, + { + "epoch": 0.3152916447714136, + "grad_norm": 1.1635243892669678, + "learning_rate": 8.013218835279376e-06, + "loss": 0.3209, + "step": 15750 + }, + { + "epoch": 0.3153116632885419, + "grad_norm": 1.0581918954849243, + "learning_rate": 8.012960127559564e-06, + "loss": 0.3433, + "step": 15751 + }, + { + "epoch": 0.3153316818056702, + "grad_norm": 1.1772412061691284, + "learning_rate": 8.012701407173916e-06, + "loss": 0.3201, + "step": 15752 + }, + { + "epoch": 0.3153517003227986, + "grad_norm": 1.137519121170044, + "learning_rate": 8.012442674123519e-06, + "loss": 0.3442, + "step": 15753 + }, + { + "epoch": 0.31537171883992693, + "grad_norm": 1.1422820091247559, + "learning_rate": 8.012183928409462e-06, + "loss": 0.3465, + "step": 15754 + }, + { + "epoch": 0.3153917373570553, + "grad_norm": 1.042007327079773, + "learning_rate": 8.011925170032833e-06, + "loss": 0.3245, + "step": 15755 + }, + { + "epoch": 0.31541175587418363, + "grad_norm": 1.0892845392227173, + "learning_rate": 8.011666398994718e-06, + "loss": 0.324, + "step": 15756 + }, + { + "epoch": 0.315431774391312, + "grad_norm": 1.047895073890686, + "learning_rate": 8.011407615296205e-06, + "loss": 0.3184, + "step": 15757 + }, + { + "epoch": 0.31545179290844033, + "grad_norm": 1.138633370399475, + "learning_rate": 8.011148818938384e-06, + "loss": 0.3593, + "step": 15758 + }, + { + "epoch": 0.3154718114255686, + "grad_norm": 1.097508430480957, + "learning_rate": 8.01089000992234e-06, + "loss": 0.2635, + "step": 15759 + }, + { + "epoch": 0.315491829942697, + "grad_norm": 1.0854929685592651, + "learning_rate": 8.010631188249162e-06, + "loss": 0.3115, + "step": 15760 + }, + { + "epoch": 0.31551184845982533, + "grad_norm": 1.0693942308425903, + "learning_rate": 8.01037235391994e-06, + "loss": 0.3659, + "step": 15761 + }, + { + "epoch": 0.3155318669769537, + "grad_norm": 1.0053330659866333, + "learning_rate": 8.010113506935757e-06, + "loss": 0.2872, + "step": 15762 + }, + { + "epoch": 0.31555188549408203, + "grad_norm": 1.3271634578704834, + "learning_rate": 8.009854647297708e-06, + "loss": 0.3084, + "step": 15763 + }, + { + "epoch": 0.3155719040112104, + "grad_norm": 1.1900808811187744, + "learning_rate": 8.009595775006878e-06, + "loss": 0.338, + "step": 15764 + }, + { + "epoch": 0.31559192252833873, + "grad_norm": 1.1216686964035034, + "learning_rate": 8.009336890064352e-06, + "loss": 0.3908, + "step": 15765 + }, + { + "epoch": 0.3156119410454671, + "grad_norm": 0.9300796985626221, + "learning_rate": 8.009077992471223e-06, + "loss": 0.3209, + "step": 15766 + }, + { + "epoch": 0.3156319595625954, + "grad_norm": 1.2519354820251465, + "learning_rate": 8.008819082228578e-06, + "loss": 0.3227, + "step": 15767 + }, + { + "epoch": 0.3156519780797237, + "grad_norm": 1.0958812236785889, + "learning_rate": 8.008560159337506e-06, + "loss": 0.3445, + "step": 15768 + }, + { + "epoch": 0.3156719965968521, + "grad_norm": 2.0197560787200928, + "learning_rate": 8.008301223799093e-06, + "loss": 0.8632, + "step": 15769 + }, + { + "epoch": 0.31569201511398043, + "grad_norm": 1.2844959497451782, + "learning_rate": 8.00804227561443e-06, + "loss": 0.325, + "step": 15770 + }, + { + "epoch": 0.3157120336311088, + "grad_norm": 1.2288776636123657, + "learning_rate": 8.007783314784602e-06, + "loss": 0.3595, + "step": 15771 + }, + { + "epoch": 0.31573205214823713, + "grad_norm": 1.1152892112731934, + "learning_rate": 8.007524341310704e-06, + "loss": 0.3138, + "step": 15772 + }, + { + "epoch": 0.3157520706653655, + "grad_norm": 1.0548319816589355, + "learning_rate": 8.007265355193818e-06, + "loss": 0.2837, + "step": 15773 + }, + { + "epoch": 0.31577208918249383, + "grad_norm": 1.0432190895080566, + "learning_rate": 8.007006356435038e-06, + "loss": 0.2986, + "step": 15774 + }, + { + "epoch": 0.3157921076996221, + "grad_norm": 1.0398350954055786, + "learning_rate": 8.006747345035448e-06, + "loss": 0.3304, + "step": 15775 + }, + { + "epoch": 0.3158121262167505, + "grad_norm": 0.998888373374939, + "learning_rate": 8.006488320996142e-06, + "loss": 0.2867, + "step": 15776 + }, + { + "epoch": 0.31583214473387883, + "grad_norm": 1.1826393604278564, + "learning_rate": 8.006229284318204e-06, + "loss": 0.3251, + "step": 15777 + }, + { + "epoch": 0.3158521632510072, + "grad_norm": 1.128859043121338, + "learning_rate": 8.005970235002726e-06, + "loss": 0.3155, + "step": 15778 + }, + { + "epoch": 0.31587218176813553, + "grad_norm": 1.8225364685058594, + "learning_rate": 8.005711173050795e-06, + "loss": 0.7891, + "step": 15779 + }, + { + "epoch": 0.3158922002852639, + "grad_norm": 0.9634138345718384, + "learning_rate": 8.005452098463503e-06, + "loss": 0.3641, + "step": 15780 + }, + { + "epoch": 0.31591221880239223, + "grad_norm": 1.1385380029678345, + "learning_rate": 8.005193011241935e-06, + "loss": 0.3406, + "step": 15781 + }, + { + "epoch": 0.3159322373195206, + "grad_norm": 1.0744577646255493, + "learning_rate": 8.004933911387182e-06, + "loss": 0.3065, + "step": 15782 + }, + { + "epoch": 0.3159522558366489, + "grad_norm": 1.0572575330734253, + "learning_rate": 8.004674798900336e-06, + "loss": 0.3302, + "step": 15783 + }, + { + "epoch": 0.3159722743537772, + "grad_norm": 1.855162501335144, + "learning_rate": 8.004415673782482e-06, + "loss": 0.8026, + "step": 15784 + }, + { + "epoch": 0.3159922928709056, + "grad_norm": 1.1663331985473633, + "learning_rate": 8.004156536034713e-06, + "loss": 0.3491, + "step": 15785 + }, + { + "epoch": 0.31601231138803393, + "grad_norm": 1.0844404697418213, + "learning_rate": 8.003897385658112e-06, + "loss": 0.3397, + "step": 15786 + }, + { + "epoch": 0.3160323299051623, + "grad_norm": 1.092086911201477, + "learning_rate": 8.003638222653777e-06, + "loss": 0.2871, + "step": 15787 + }, + { + "epoch": 0.31605234842229063, + "grad_norm": 1.1577972173690796, + "learning_rate": 8.003379047022793e-06, + "loss": 0.3077, + "step": 15788 + }, + { + "epoch": 0.316072366939419, + "grad_norm": 1.1625703573226929, + "learning_rate": 8.00311985876625e-06, + "loss": 0.3174, + "step": 15789 + }, + { + "epoch": 0.31609238545654733, + "grad_norm": 1.143356442451477, + "learning_rate": 8.002860657885235e-06, + "loss": 0.4038, + "step": 15790 + }, + { + "epoch": 0.3161124039736756, + "grad_norm": 0.9960188269615173, + "learning_rate": 8.00260144438084e-06, + "loss": 0.3059, + "step": 15791 + }, + { + "epoch": 0.316132422490804, + "grad_norm": 1.7026962041854858, + "learning_rate": 8.002342218254157e-06, + "loss": 0.8193, + "step": 15792 + }, + { + "epoch": 0.3161524410079323, + "grad_norm": 1.0080374479293823, + "learning_rate": 8.002082979506271e-06, + "loss": 0.322, + "step": 15793 + }, + { + "epoch": 0.3161724595250607, + "grad_norm": 1.0205527544021606, + "learning_rate": 8.001823728138276e-06, + "loss": 0.3304, + "step": 15794 + }, + { + "epoch": 0.31619247804218903, + "grad_norm": 1.0544840097427368, + "learning_rate": 8.00156446415126e-06, + "loss": 0.3344, + "step": 15795 + }, + { + "epoch": 0.3162124965593174, + "grad_norm": 1.156980037689209, + "learning_rate": 8.001305187546313e-06, + "loss": 0.3218, + "step": 15796 + }, + { + "epoch": 0.31623251507644573, + "grad_norm": 1.2355762720108032, + "learning_rate": 8.001045898324524e-06, + "loss": 0.2932, + "step": 15797 + }, + { + "epoch": 0.3162525335935741, + "grad_norm": 1.1314669847488403, + "learning_rate": 8.000786596486984e-06, + "loss": 0.356, + "step": 15798 + }, + { + "epoch": 0.3162725521107024, + "grad_norm": 1.1068975925445557, + "learning_rate": 8.000527282034785e-06, + "loss": 0.3017, + "step": 15799 + }, + { + "epoch": 0.3162925706278307, + "grad_norm": 1.2005906105041504, + "learning_rate": 8.000267954969012e-06, + "loss": 0.3207, + "step": 15800 + }, + { + "epoch": 0.3163125891449591, + "grad_norm": 1.277336835861206, + "learning_rate": 8.00000861529076e-06, + "loss": 0.28, + "step": 15801 + }, + { + "epoch": 0.31633260766208743, + "grad_norm": 1.0334149599075317, + "learning_rate": 7.999749263001116e-06, + "loss": 0.2761, + "step": 15802 + }, + { + "epoch": 0.3163526261792158, + "grad_norm": 1.139955997467041, + "learning_rate": 7.999489898101174e-06, + "loss": 0.2836, + "step": 15803 + }, + { + "epoch": 0.31637264469634413, + "grad_norm": 1.2117798328399658, + "learning_rate": 7.999230520592018e-06, + "loss": 0.3105, + "step": 15804 + }, + { + "epoch": 0.3163926632134725, + "grad_norm": 1.1137034893035889, + "learning_rate": 7.998971130474746e-06, + "loss": 0.3638, + "step": 15805 + }, + { + "epoch": 0.31641268173060083, + "grad_norm": 0.9849093556404114, + "learning_rate": 7.998711727750444e-06, + "loss": 0.3124, + "step": 15806 + }, + { + "epoch": 0.3164327002477291, + "grad_norm": 1.2504088878631592, + "learning_rate": 7.998452312420202e-06, + "loss": 0.3217, + "step": 15807 + }, + { + "epoch": 0.3164527187648575, + "grad_norm": 1.2415603399276733, + "learning_rate": 7.998192884485113e-06, + "loss": 0.321, + "step": 15808 + }, + { + "epoch": 0.3164727372819858, + "grad_norm": 1.1006156206130981, + "learning_rate": 7.997933443946267e-06, + "loss": 0.3316, + "step": 15809 + }, + { + "epoch": 0.3164927557991142, + "grad_norm": 1.0315626859664917, + "learning_rate": 7.997673990804751e-06, + "loss": 0.3288, + "step": 15810 + }, + { + "epoch": 0.31651277431624253, + "grad_norm": 1.101485013961792, + "learning_rate": 7.997414525061662e-06, + "loss": 0.2827, + "step": 15811 + }, + { + "epoch": 0.3165327928333709, + "grad_norm": 1.9558436870574951, + "learning_rate": 7.997155046718086e-06, + "loss": 0.7891, + "step": 15812 + }, + { + "epoch": 0.31655281135049923, + "grad_norm": 1.3124572038650513, + "learning_rate": 7.996895555775114e-06, + "loss": 0.3443, + "step": 15813 + }, + { + "epoch": 0.3165728298676276, + "grad_norm": 1.8921172618865967, + "learning_rate": 7.99663605223384e-06, + "loss": 0.7682, + "step": 15814 + }, + { + "epoch": 0.3165928483847559, + "grad_norm": 1.0837377309799194, + "learning_rate": 7.996376536095351e-06, + "loss": 0.3183, + "step": 15815 + }, + { + "epoch": 0.3166128669018842, + "grad_norm": 1.1433411836624146, + "learning_rate": 7.996117007360741e-06, + "loss": 0.3611, + "step": 15816 + }, + { + "epoch": 0.3166328854190126, + "grad_norm": 1.043753981590271, + "learning_rate": 7.9958574660311e-06, + "loss": 0.3881, + "step": 15817 + }, + { + "epoch": 0.31665290393614093, + "grad_norm": 1.1111655235290527, + "learning_rate": 7.995597912107517e-06, + "loss": 0.3336, + "step": 15818 + }, + { + "epoch": 0.3166729224532693, + "grad_norm": 1.134453296661377, + "learning_rate": 7.995338345591088e-06, + "loss": 0.3213, + "step": 15819 + }, + { + "epoch": 0.31669294097039763, + "grad_norm": 1.2944285869598389, + "learning_rate": 7.9950787664829e-06, + "loss": 0.3121, + "step": 15820 + }, + { + "epoch": 0.316712959487526, + "grad_norm": 1.2274463176727295, + "learning_rate": 7.994819174784045e-06, + "loss": 0.2873, + "step": 15821 + }, + { + "epoch": 0.31673297800465433, + "grad_norm": 1.0921602249145508, + "learning_rate": 7.994559570495615e-06, + "loss": 0.3605, + "step": 15822 + }, + { + "epoch": 0.3167529965217826, + "grad_norm": 1.0399888753890991, + "learning_rate": 7.9942999536187e-06, + "loss": 0.33, + "step": 15823 + }, + { + "epoch": 0.316773015038911, + "grad_norm": 1.0562633275985718, + "learning_rate": 7.994040324154396e-06, + "loss": 0.3371, + "step": 15824 + }, + { + "epoch": 0.3167930335560393, + "grad_norm": 1.2044246196746826, + "learning_rate": 7.993780682103787e-06, + "loss": 0.324, + "step": 15825 + }, + { + "epoch": 0.3168130520731677, + "grad_norm": 1.105836272239685, + "learning_rate": 7.99352102746797e-06, + "loss": 0.3705, + "step": 15826 + }, + { + "epoch": 0.31683307059029603, + "grad_norm": 1.1767297983169556, + "learning_rate": 7.993261360248034e-06, + "loss": 0.3131, + "step": 15827 + }, + { + "epoch": 0.3168530891074244, + "grad_norm": 1.3309804201126099, + "learning_rate": 7.993001680445072e-06, + "loss": 0.3241, + "step": 15828 + }, + { + "epoch": 0.31687310762455273, + "grad_norm": 1.0447752475738525, + "learning_rate": 7.992741988060173e-06, + "loss": 0.2765, + "step": 15829 + }, + { + "epoch": 0.3168931261416811, + "grad_norm": 1.0529768466949463, + "learning_rate": 7.992482283094434e-06, + "loss": 0.3131, + "step": 15830 + }, + { + "epoch": 0.3169131446588094, + "grad_norm": 1.0819138288497925, + "learning_rate": 7.992222565548942e-06, + "loss": 0.2895, + "step": 15831 + }, + { + "epoch": 0.3169331631759377, + "grad_norm": 1.0702972412109375, + "learning_rate": 7.99196283542479e-06, + "loss": 0.3443, + "step": 15832 + }, + { + "epoch": 0.3169531816930661, + "grad_norm": 1.0058482885360718, + "learning_rate": 7.99170309272307e-06, + "loss": 0.3059, + "step": 15833 + }, + { + "epoch": 0.31697320021019443, + "grad_norm": 1.103512167930603, + "learning_rate": 7.991443337444874e-06, + "loss": 0.3347, + "step": 15834 + }, + { + "epoch": 0.3169932187273228, + "grad_norm": 1.9870537519454956, + "learning_rate": 7.991183569591294e-06, + "loss": 0.8145, + "step": 15835 + }, + { + "epoch": 0.31701323724445113, + "grad_norm": 1.1283202171325684, + "learning_rate": 7.990923789163422e-06, + "loss": 0.2967, + "step": 15836 + }, + { + "epoch": 0.3170332557615795, + "grad_norm": 1.1741104125976562, + "learning_rate": 7.99066399616235e-06, + "loss": 0.3361, + "step": 15837 + }, + { + "epoch": 0.31705327427870783, + "grad_norm": 1.3285850286483765, + "learning_rate": 7.990404190589172e-06, + "loss": 0.3472, + "step": 15838 + }, + { + "epoch": 0.3170732927958361, + "grad_norm": 1.1953144073486328, + "learning_rate": 7.990144372444976e-06, + "loss": 0.3459, + "step": 15839 + }, + { + "epoch": 0.3170933113129645, + "grad_norm": 1.1300404071807861, + "learning_rate": 7.989884541730857e-06, + "loss": 0.3095, + "step": 15840 + }, + { + "epoch": 0.3171133298300928, + "grad_norm": 1.1285746097564697, + "learning_rate": 7.98962469844791e-06, + "loss": 0.3287, + "step": 15841 + }, + { + "epoch": 0.3171333483472212, + "grad_norm": 1.781247854232788, + "learning_rate": 7.989364842597219e-06, + "loss": 0.85, + "step": 15842 + }, + { + "epoch": 0.31715336686434953, + "grad_norm": 1.3064906597137451, + "learning_rate": 7.989104974179884e-06, + "loss": 0.3103, + "step": 15843 + }, + { + "epoch": 0.3171733853814779, + "grad_norm": 1.1467981338500977, + "learning_rate": 7.988845093196996e-06, + "loss": 0.3461, + "step": 15844 + }, + { + "epoch": 0.31719340389860623, + "grad_norm": 1.9014232158660889, + "learning_rate": 7.988585199649644e-06, + "loss": 0.7582, + "step": 15845 + }, + { + "epoch": 0.3172134224157346, + "grad_norm": 1.1187278032302856, + "learning_rate": 7.988325293538923e-06, + "loss": 0.3298, + "step": 15846 + }, + { + "epoch": 0.3172334409328629, + "grad_norm": 1.095849633216858, + "learning_rate": 7.988065374865928e-06, + "loss": 0.3468, + "step": 15847 + }, + { + "epoch": 0.3172534594499912, + "grad_norm": 1.1109914779663086, + "learning_rate": 7.987805443631746e-06, + "loss": 0.3247, + "step": 15848 + }, + { + "epoch": 0.3172734779671196, + "grad_norm": 1.022599220275879, + "learning_rate": 7.987545499837477e-06, + "loss": 0.3272, + "step": 15849 + }, + { + "epoch": 0.3172934964842479, + "grad_norm": 1.1183732748031616, + "learning_rate": 7.987285543484206e-06, + "loss": 0.3693, + "step": 15850 + }, + { + "epoch": 0.3173135150013763, + "grad_norm": 1.0737025737762451, + "learning_rate": 7.987025574573031e-06, + "loss": 0.3619, + "step": 15851 + }, + { + "epoch": 0.31733353351850463, + "grad_norm": 1.881754755973816, + "learning_rate": 7.986765593105043e-06, + "loss": 0.7877, + "step": 15852 + }, + { + "epoch": 0.317353552035633, + "grad_norm": 1.0550525188446045, + "learning_rate": 7.986505599081337e-06, + "loss": 0.3065, + "step": 15853 + }, + { + "epoch": 0.31737357055276133, + "grad_norm": 0.9532001614570618, + "learning_rate": 7.986245592503002e-06, + "loss": 0.2926, + "step": 15854 + }, + { + "epoch": 0.3173935890698896, + "grad_norm": 1.1970770359039307, + "learning_rate": 7.985985573371134e-06, + "loss": 0.3126, + "step": 15855 + }, + { + "epoch": 0.317413607587018, + "grad_norm": 1.1531903743743896, + "learning_rate": 7.985725541686825e-06, + "loss": 0.332, + "step": 15856 + }, + { + "epoch": 0.3174336261041463, + "grad_norm": 1.0695210695266724, + "learning_rate": 7.985465497451168e-06, + "loss": 0.3375, + "step": 15857 + }, + { + "epoch": 0.3174536446212747, + "grad_norm": 1.1166261434555054, + "learning_rate": 7.985205440665258e-06, + "loss": 0.3316, + "step": 15858 + }, + { + "epoch": 0.31747366313840303, + "grad_norm": 1.063260793685913, + "learning_rate": 7.984945371330184e-06, + "loss": 0.3222, + "step": 15859 + }, + { + "epoch": 0.3174936816555314, + "grad_norm": 1.1358400583267212, + "learning_rate": 7.984685289447046e-06, + "loss": 0.3196, + "step": 15860 + }, + { + "epoch": 0.31751370017265973, + "grad_norm": 1.2030786275863647, + "learning_rate": 7.98442519501693e-06, + "loss": 0.3109, + "step": 15861 + }, + { + "epoch": 0.3175337186897881, + "grad_norm": 1.0462323427200317, + "learning_rate": 7.984165088040933e-06, + "loss": 0.3382, + "step": 15862 + }, + { + "epoch": 0.3175537372069164, + "grad_norm": 1.1048104763031006, + "learning_rate": 7.983904968520148e-06, + "loss": 0.3021, + "step": 15863 + }, + { + "epoch": 0.3175737557240447, + "grad_norm": 1.1377933025360107, + "learning_rate": 7.98364483645567e-06, + "loss": 0.3284, + "step": 15864 + }, + { + "epoch": 0.3175937742411731, + "grad_norm": 1.7649656534194946, + "learning_rate": 7.983384691848592e-06, + "loss": 0.8016, + "step": 15865 + }, + { + "epoch": 0.3176137927583014, + "grad_norm": 1.0741307735443115, + "learning_rate": 7.983124534700006e-06, + "loss": 0.2948, + "step": 15866 + }, + { + "epoch": 0.3176338112754298, + "grad_norm": 1.2575838565826416, + "learning_rate": 7.982864365011004e-06, + "loss": 0.3017, + "step": 15867 + }, + { + "epoch": 0.31765382979255813, + "grad_norm": 1.1571115255355835, + "learning_rate": 7.982604182782684e-06, + "loss": 0.3277, + "step": 15868 + }, + { + "epoch": 0.3176738483096865, + "grad_norm": 1.0953725576400757, + "learning_rate": 7.982343988016139e-06, + "loss": 0.3241, + "step": 15869 + }, + { + "epoch": 0.31769386682681483, + "grad_norm": 1.3251255750656128, + "learning_rate": 7.98208378071246e-06, + "loss": 0.342, + "step": 15870 + }, + { + "epoch": 0.3177138853439431, + "grad_norm": 1.042497992515564, + "learning_rate": 7.981823560872744e-06, + "loss": 0.3522, + "step": 15871 + }, + { + "epoch": 0.3177339038610715, + "grad_norm": 1.082552194595337, + "learning_rate": 7.981563328498083e-06, + "loss": 0.3367, + "step": 15872 + }, + { + "epoch": 0.3177539223781998, + "grad_norm": 1.0490095615386963, + "learning_rate": 7.981303083589571e-06, + "loss": 0.3166, + "step": 15873 + }, + { + "epoch": 0.3177739408953282, + "grad_norm": 1.1493165493011475, + "learning_rate": 7.981042826148302e-06, + "loss": 0.3507, + "step": 15874 + }, + { + "epoch": 0.31779395941245653, + "grad_norm": 1.0954639911651611, + "learning_rate": 7.980782556175371e-06, + "loss": 0.3511, + "step": 15875 + }, + { + "epoch": 0.3178139779295849, + "grad_norm": 1.0571439266204834, + "learning_rate": 7.98052227367187e-06, + "loss": 0.2775, + "step": 15876 + }, + { + "epoch": 0.31783399644671323, + "grad_norm": 1.0528301000595093, + "learning_rate": 7.980261978638898e-06, + "loss": 0.2844, + "step": 15877 + }, + { + "epoch": 0.3178540149638416, + "grad_norm": 1.0495128631591797, + "learning_rate": 7.980001671077546e-06, + "loss": 0.3312, + "step": 15878 + }, + { + "epoch": 0.3178740334809699, + "grad_norm": 1.2023578882217407, + "learning_rate": 7.979741350988906e-06, + "loss": 0.331, + "step": 15879 + }, + { + "epoch": 0.3178940519980982, + "grad_norm": 1.0511821508407593, + "learning_rate": 7.979481018374074e-06, + "loss": 0.2834, + "step": 15880 + }, + { + "epoch": 0.3179140705152266, + "grad_norm": 1.1926765441894531, + "learning_rate": 7.979220673234148e-06, + "loss": 0.346, + "step": 15881 + }, + { + "epoch": 0.3179340890323549, + "grad_norm": 1.062485933303833, + "learning_rate": 7.978960315570217e-06, + "loss": 0.3641, + "step": 15882 + }, + { + "epoch": 0.3179541075494833, + "grad_norm": 1.1747099161148071, + "learning_rate": 7.978699945383379e-06, + "loss": 0.3439, + "step": 15883 + }, + { + "epoch": 0.31797412606661163, + "grad_norm": 1.327055811882019, + "learning_rate": 7.978439562674727e-06, + "loss": 0.3925, + "step": 15884 + }, + { + "epoch": 0.31799414458374, + "grad_norm": 1.0818301439285278, + "learning_rate": 7.978179167445354e-06, + "loss": 0.3261, + "step": 15885 + }, + { + "epoch": 0.31801416310086833, + "grad_norm": 0.9691792726516724, + "learning_rate": 7.977918759696359e-06, + "loss": 0.279, + "step": 15886 + }, + { + "epoch": 0.3180341816179966, + "grad_norm": 1.1512354612350464, + "learning_rate": 7.977658339428835e-06, + "loss": 0.3526, + "step": 15887 + }, + { + "epoch": 0.318054200135125, + "grad_norm": 1.0556752681732178, + "learning_rate": 7.977397906643876e-06, + "loss": 0.3338, + "step": 15888 + }, + { + "epoch": 0.3180742186522533, + "grad_norm": 1.0940375328063965, + "learning_rate": 7.977137461342576e-06, + "loss": 0.3327, + "step": 15889 + }, + { + "epoch": 0.3180942371693817, + "grad_norm": 1.8126535415649414, + "learning_rate": 7.97687700352603e-06, + "loss": 0.8367, + "step": 15890 + }, + { + "epoch": 0.31811425568651003, + "grad_norm": 1.3063842058181763, + "learning_rate": 7.976616533195336e-06, + "loss": 0.3432, + "step": 15891 + }, + { + "epoch": 0.3181342742036384, + "grad_norm": 1.121138334274292, + "learning_rate": 7.976356050351586e-06, + "loss": 0.2943, + "step": 15892 + }, + { + "epoch": 0.31815429272076673, + "grad_norm": 1.187639832496643, + "learning_rate": 7.976095554995875e-06, + "loss": 0.2813, + "step": 15893 + }, + { + "epoch": 0.3181743112378951, + "grad_norm": 1.0919607877731323, + "learning_rate": 7.9758350471293e-06, + "loss": 0.3505, + "step": 15894 + }, + { + "epoch": 0.3181943297550234, + "grad_norm": 1.8138633966445923, + "learning_rate": 7.975574526752952e-06, + "loss": 0.8741, + "step": 15895 + }, + { + "epoch": 0.3182143482721517, + "grad_norm": 1.07542085647583, + "learning_rate": 7.97531399386793e-06, + "loss": 0.3476, + "step": 15896 + }, + { + "epoch": 0.3182343667892801, + "grad_norm": 1.0434579849243164, + "learning_rate": 7.97505344847533e-06, + "loss": 0.3433, + "step": 15897 + }, + { + "epoch": 0.3182543853064084, + "grad_norm": 1.4234166145324707, + "learning_rate": 7.974792890576245e-06, + "loss": 0.2878, + "step": 15898 + }, + { + "epoch": 0.3182744038235368, + "grad_norm": 1.1008360385894775, + "learning_rate": 7.974532320171768e-06, + "loss": 0.3598, + "step": 15899 + }, + { + "epoch": 0.31829442234066513, + "grad_norm": 5.684725761413574, + "learning_rate": 7.974271737263002e-06, + "loss": 0.9382, + "step": 15900 + }, + { + "epoch": 0.3183144408577935, + "grad_norm": 1.2124577760696411, + "learning_rate": 7.974011141851034e-06, + "loss": 0.3356, + "step": 15901 + }, + { + "epoch": 0.31833445937492183, + "grad_norm": 1.0695265531539917, + "learning_rate": 7.973750533936966e-06, + "loss": 0.2796, + "step": 15902 + }, + { + "epoch": 0.3183544778920501, + "grad_norm": 1.0909574031829834, + "learning_rate": 7.973489913521888e-06, + "loss": 0.3254, + "step": 15903 + }, + { + "epoch": 0.3183744964091785, + "grad_norm": 1.029857873916626, + "learning_rate": 7.9732292806069e-06, + "loss": 0.3144, + "step": 15904 + }, + { + "epoch": 0.3183945149263068, + "grad_norm": 1.1691703796386719, + "learning_rate": 7.972968635193096e-06, + "loss": 0.3954, + "step": 15905 + }, + { + "epoch": 0.3184145334434352, + "grad_norm": 1.0529288053512573, + "learning_rate": 7.972707977281571e-06, + "loss": 0.3176, + "step": 15906 + }, + { + "epoch": 0.3184345519605635, + "grad_norm": 1.0838969945907593, + "learning_rate": 7.97244730687342e-06, + "loss": 0.3388, + "step": 15907 + }, + { + "epoch": 0.3184545704776919, + "grad_norm": 1.2141493558883667, + "learning_rate": 7.972186623969743e-06, + "loss": 0.3287, + "step": 15908 + }, + { + "epoch": 0.31847458899482023, + "grad_norm": 1.0475459098815918, + "learning_rate": 7.971925928571633e-06, + "loss": 0.3452, + "step": 15909 + }, + { + "epoch": 0.3184946075119486, + "grad_norm": 1.0512219667434692, + "learning_rate": 7.971665220680183e-06, + "loss": 0.318, + "step": 15910 + }, + { + "epoch": 0.3185146260290769, + "grad_norm": 1.1586828231811523, + "learning_rate": 7.971404500296494e-06, + "loss": 0.3841, + "step": 15911 + }, + { + "epoch": 0.3185346445462052, + "grad_norm": 1.1654413938522339, + "learning_rate": 7.97114376742166e-06, + "loss": 0.3165, + "step": 15912 + }, + { + "epoch": 0.3185546630633336, + "grad_norm": 1.0807576179504395, + "learning_rate": 7.970883022056777e-06, + "loss": 0.3504, + "step": 15913 + }, + { + "epoch": 0.3185746815804619, + "grad_norm": 1.1503883600234985, + "learning_rate": 7.97062226420294e-06, + "loss": 0.3086, + "step": 15914 + }, + { + "epoch": 0.3185947000975903, + "grad_norm": 1.31142258644104, + "learning_rate": 7.970361493861248e-06, + "loss": 0.3926, + "step": 15915 + }, + { + "epoch": 0.31861471861471863, + "grad_norm": 1.1350359916687012, + "learning_rate": 7.970100711032794e-06, + "loss": 0.2688, + "step": 15916 + }, + { + "epoch": 0.318634737131847, + "grad_norm": 1.06954824924469, + "learning_rate": 7.969839915718676e-06, + "loss": 0.349, + "step": 15917 + }, + { + "epoch": 0.31865475564897533, + "grad_norm": 1.1605249643325806, + "learning_rate": 7.969579107919992e-06, + "loss": 0.3842, + "step": 15918 + }, + { + "epoch": 0.3186747741661036, + "grad_norm": 1.3090388774871826, + "learning_rate": 7.969318287637833e-06, + "loss": 0.3342, + "step": 15919 + }, + { + "epoch": 0.318694792683232, + "grad_norm": 1.1585639715194702, + "learning_rate": 7.969057454873302e-06, + "loss": 0.3381, + "step": 15920 + }, + { + "epoch": 0.3187148112003603, + "grad_norm": 1.0270460844039917, + "learning_rate": 7.968796609627491e-06, + "loss": 0.2889, + "step": 15921 + }, + { + "epoch": 0.3187348297174887, + "grad_norm": 1.1470599174499512, + "learning_rate": 7.968535751901499e-06, + "loss": 0.3054, + "step": 15922 + }, + { + "epoch": 0.318754848234617, + "grad_norm": 1.0616958141326904, + "learning_rate": 7.96827488169642e-06, + "loss": 0.3351, + "step": 15923 + }, + { + "epoch": 0.3187748667517454, + "grad_norm": 1.113740086555481, + "learning_rate": 7.968013999013353e-06, + "loss": 0.3009, + "step": 15924 + }, + { + "epoch": 0.31879488526887373, + "grad_norm": 1.2132222652435303, + "learning_rate": 7.967753103853393e-06, + "loss": 0.3305, + "step": 15925 + }, + { + "epoch": 0.3188149037860021, + "grad_norm": 1.4278208017349243, + "learning_rate": 7.96749219621764e-06, + "loss": 0.326, + "step": 15926 + }, + { + "epoch": 0.3188349223031304, + "grad_norm": 1.0849207639694214, + "learning_rate": 7.967231276107186e-06, + "loss": 0.3146, + "step": 15927 + }, + { + "epoch": 0.3188549408202587, + "grad_norm": 1.1286039352416992, + "learning_rate": 7.96697034352313e-06, + "loss": 0.3286, + "step": 15928 + }, + { + "epoch": 0.3188749593373871, + "grad_norm": 1.0540021657943726, + "learning_rate": 7.966709398466573e-06, + "loss": 0.2912, + "step": 15929 + }, + { + "epoch": 0.3188949778545154, + "grad_norm": 1.0235848426818848, + "learning_rate": 7.966448440938603e-06, + "loss": 0.2954, + "step": 15930 + }, + { + "epoch": 0.3189149963716438, + "grad_norm": 1.0865809917449951, + "learning_rate": 7.966187470940325e-06, + "loss": 0.2988, + "step": 15931 + }, + { + "epoch": 0.31893501488877213, + "grad_norm": 1.0043267011642456, + "learning_rate": 7.965926488472832e-06, + "loss": 0.2929, + "step": 15932 + }, + { + "epoch": 0.3189550334059005, + "grad_norm": 1.0063058137893677, + "learning_rate": 7.965665493537225e-06, + "loss": 0.3014, + "step": 15933 + }, + { + "epoch": 0.3189750519230288, + "grad_norm": 1.2208715677261353, + "learning_rate": 7.965404486134596e-06, + "loss": 0.3228, + "step": 15934 + }, + { + "epoch": 0.3189950704401571, + "grad_norm": 1.8943649530410767, + "learning_rate": 7.965143466266046e-06, + "loss": 0.8473, + "step": 15935 + }, + { + "epoch": 0.3190150889572855, + "grad_norm": 1.2104442119598389, + "learning_rate": 7.96488243393267e-06, + "loss": 0.358, + "step": 15936 + }, + { + "epoch": 0.3190351074744138, + "grad_norm": 1.2160768508911133, + "learning_rate": 7.964621389135565e-06, + "loss": 0.2886, + "step": 15937 + }, + { + "epoch": 0.3190551259915422, + "grad_norm": 1.1685001850128174, + "learning_rate": 7.964360331875832e-06, + "loss": 0.3332, + "step": 15938 + }, + { + "epoch": 0.3190751445086705, + "grad_norm": 1.0822430849075317, + "learning_rate": 7.964099262154565e-06, + "loss": 0.3444, + "step": 15939 + }, + { + "epoch": 0.3190951630257989, + "grad_norm": 1.1724649667739868, + "learning_rate": 7.963838179972862e-06, + "loss": 0.2744, + "step": 15940 + }, + { + "epoch": 0.31911518154292723, + "grad_norm": 1.4280519485473633, + "learning_rate": 7.963577085331824e-06, + "loss": 0.2804, + "step": 15941 + }, + { + "epoch": 0.3191352000600555, + "grad_norm": 1.0658701658248901, + "learning_rate": 7.963315978232543e-06, + "loss": 0.3062, + "step": 15942 + }, + { + "epoch": 0.3191552185771839, + "grad_norm": 1.1882983446121216, + "learning_rate": 7.96305485867612e-06, + "loss": 0.324, + "step": 15943 + }, + { + "epoch": 0.3191752370943122, + "grad_norm": 1.0637778043746948, + "learning_rate": 7.962793726663652e-06, + "loss": 0.3227, + "step": 15944 + }, + { + "epoch": 0.3191952556114406, + "grad_norm": 1.375177025794983, + "learning_rate": 7.962532582196237e-06, + "loss": 0.2642, + "step": 15945 + }, + { + "epoch": 0.3192152741285689, + "grad_norm": 1.0942161083221436, + "learning_rate": 7.962271425274972e-06, + "loss": 0.2886, + "step": 15946 + }, + { + "epoch": 0.3192352926456973, + "grad_norm": 1.1363954544067383, + "learning_rate": 7.962010255900956e-06, + "loss": 0.3182, + "step": 15947 + }, + { + "epoch": 0.31925531116282563, + "grad_norm": 1.0700794458389282, + "learning_rate": 7.961749074075287e-06, + "loss": 0.36, + "step": 15948 + }, + { + "epoch": 0.319275329679954, + "grad_norm": 1.0689876079559326, + "learning_rate": 7.961487879799061e-06, + "loss": 0.3435, + "step": 15949 + }, + { + "epoch": 0.3192953481970823, + "grad_norm": 1.036600112915039, + "learning_rate": 7.961226673073377e-06, + "loss": 0.3432, + "step": 15950 + }, + { + "epoch": 0.3193153667142106, + "grad_norm": 1.1764057874679565, + "learning_rate": 7.960965453899333e-06, + "loss": 0.278, + "step": 15951 + }, + { + "epoch": 0.319335385231339, + "grad_norm": 1.027419090270996, + "learning_rate": 7.96070422227803e-06, + "loss": 0.3155, + "step": 15952 + }, + { + "epoch": 0.3193554037484673, + "grad_norm": 1.2023357152938843, + "learning_rate": 7.960442978210562e-06, + "loss": 0.3331, + "step": 15953 + }, + { + "epoch": 0.3193754222655957, + "grad_norm": 1.0066418647766113, + "learning_rate": 7.96018172169803e-06, + "loss": 0.2841, + "step": 15954 + }, + { + "epoch": 0.319395440782724, + "grad_norm": 1.1372411251068115, + "learning_rate": 7.959920452741529e-06, + "loss": 0.3168, + "step": 15955 + }, + { + "epoch": 0.3194154592998524, + "grad_norm": 1.121742606163025, + "learning_rate": 7.95965917134216e-06, + "loss": 0.3519, + "step": 15956 + }, + { + "epoch": 0.31943547781698073, + "grad_norm": 1.1315486431121826, + "learning_rate": 7.959397877501021e-06, + "loss": 0.3301, + "step": 15957 + }, + { + "epoch": 0.319455496334109, + "grad_norm": 1.0933513641357422, + "learning_rate": 7.95913657121921e-06, + "loss": 0.333, + "step": 15958 + }, + { + "epoch": 0.3194755148512374, + "grad_norm": 0.9890584349632263, + "learning_rate": 7.958875252497826e-06, + "loss": 0.3172, + "step": 15959 + }, + { + "epoch": 0.3194955333683657, + "grad_norm": 1.9399298429489136, + "learning_rate": 7.958613921337968e-06, + "loss": 0.8704, + "step": 15960 + }, + { + "epoch": 0.3195155518854941, + "grad_norm": 1.0454214811325073, + "learning_rate": 7.958352577740733e-06, + "loss": 0.3346, + "step": 15961 + }, + { + "epoch": 0.3195355704026224, + "grad_norm": 1.143484115600586, + "learning_rate": 7.958091221707221e-06, + "loss": 0.3057, + "step": 15962 + }, + { + "epoch": 0.3195555889197508, + "grad_norm": 1.1103031635284424, + "learning_rate": 7.957829853238527e-06, + "loss": 0.3433, + "step": 15963 + }, + { + "epoch": 0.3195756074368791, + "grad_norm": 1.1421387195587158, + "learning_rate": 7.957568472335757e-06, + "loss": 0.3446, + "step": 15964 + }, + { + "epoch": 0.3195956259540075, + "grad_norm": 1.8416670560836792, + "learning_rate": 7.957307079000003e-06, + "loss": 0.861, + "step": 15965 + }, + { + "epoch": 0.3196156444711358, + "grad_norm": 1.9766820669174194, + "learning_rate": 7.95704567323237e-06, + "loss": 0.8134, + "step": 15966 + }, + { + "epoch": 0.3196356629882641, + "grad_norm": 1.112717628479004, + "learning_rate": 7.956784255033949e-06, + "loss": 0.3204, + "step": 15967 + }, + { + "epoch": 0.3196556815053925, + "grad_norm": 1.1118100881576538, + "learning_rate": 7.956522824405845e-06, + "loss": 0.294, + "step": 15968 + }, + { + "epoch": 0.3196757000225208, + "grad_norm": 1.042443037033081, + "learning_rate": 7.956261381349155e-06, + "loss": 0.3004, + "step": 15969 + }, + { + "epoch": 0.3196957185396492, + "grad_norm": 0.9901010394096375, + "learning_rate": 7.95599992586498e-06, + "loss": 0.3499, + "step": 15970 + }, + { + "epoch": 0.3197157370567775, + "grad_norm": 1.1066582202911377, + "learning_rate": 7.955738457954416e-06, + "loss": 0.3197, + "step": 15971 + }, + { + "epoch": 0.3197357555739059, + "grad_norm": 1.0707855224609375, + "learning_rate": 7.955476977618563e-06, + "loss": 0.3447, + "step": 15972 + }, + { + "epoch": 0.31975577409103423, + "grad_norm": 1.0601770877838135, + "learning_rate": 7.95521548485852e-06, + "loss": 0.3068, + "step": 15973 + }, + { + "epoch": 0.3197757926081625, + "grad_norm": 1.0233275890350342, + "learning_rate": 7.954953979675389e-06, + "loss": 0.2891, + "step": 15974 + }, + { + "epoch": 0.3197958111252909, + "grad_norm": 2.0299901962280273, + "learning_rate": 7.954692462070268e-06, + "loss": 0.7612, + "step": 15975 + }, + { + "epoch": 0.3198158296424192, + "grad_norm": 1.8467440605163574, + "learning_rate": 7.954430932044255e-06, + "loss": 0.8234, + "step": 15976 + }, + { + "epoch": 0.3198358481595476, + "grad_norm": 1.1677742004394531, + "learning_rate": 7.95416938959845e-06, + "loss": 0.3101, + "step": 15977 + }, + { + "epoch": 0.3198558666766759, + "grad_norm": 1.1323002576828003, + "learning_rate": 7.953907834733952e-06, + "loss": 0.302, + "step": 15978 + }, + { + "epoch": 0.3198758851938043, + "grad_norm": 1.165783405303955, + "learning_rate": 7.953646267451861e-06, + "loss": 0.3388, + "step": 15979 + }, + { + "epoch": 0.3198959037109326, + "grad_norm": 1.1811617612838745, + "learning_rate": 7.953384687753279e-06, + "loss": 0.3508, + "step": 15980 + }, + { + "epoch": 0.319915922228061, + "grad_norm": 1.1922128200531006, + "learning_rate": 7.953123095639301e-06, + "loss": 0.2959, + "step": 15981 + }, + { + "epoch": 0.3199359407451893, + "grad_norm": 1.0713189840316772, + "learning_rate": 7.95286149111103e-06, + "loss": 0.326, + "step": 15982 + }, + { + "epoch": 0.3199559592623176, + "grad_norm": 1.114931344985962, + "learning_rate": 7.952599874169565e-06, + "loss": 0.3244, + "step": 15983 + }, + { + "epoch": 0.319975977779446, + "grad_norm": 1.1026209592819214, + "learning_rate": 7.952338244816004e-06, + "loss": 0.2741, + "step": 15984 + }, + { + "epoch": 0.3199959962965743, + "grad_norm": 1.0762616395950317, + "learning_rate": 7.95207660305145e-06, + "loss": 0.3245, + "step": 15985 + }, + { + "epoch": 0.3200160148137027, + "grad_norm": 1.0192564725875854, + "learning_rate": 7.951814948877e-06, + "loss": 0.3319, + "step": 15986 + }, + { + "epoch": 0.320036033330831, + "grad_norm": 1.2564797401428223, + "learning_rate": 7.951553282293755e-06, + "loss": 0.3386, + "step": 15987 + }, + { + "epoch": 0.3200560518479594, + "grad_norm": 1.825385332107544, + "learning_rate": 7.951291603302818e-06, + "loss": 0.865, + "step": 15988 + }, + { + "epoch": 0.32007607036508773, + "grad_norm": 0.9400061964988708, + "learning_rate": 7.951029911905283e-06, + "loss": 0.2807, + "step": 15989 + }, + { + "epoch": 0.320096088882216, + "grad_norm": 1.0542563199996948, + "learning_rate": 7.950768208102256e-06, + "loss": 0.3273, + "step": 15990 + }, + { + "epoch": 0.3201161073993444, + "grad_norm": 1.1054904460906982, + "learning_rate": 7.950506491894832e-06, + "loss": 0.3445, + "step": 15991 + }, + { + "epoch": 0.3201361259164727, + "grad_norm": 1.0205395221710205, + "learning_rate": 7.950244763284114e-06, + "loss": 0.3235, + "step": 15992 + }, + { + "epoch": 0.3201561444336011, + "grad_norm": 1.9740216732025146, + "learning_rate": 7.949983022271204e-06, + "loss": 0.791, + "step": 15993 + }, + { + "epoch": 0.3201761629507294, + "grad_norm": 1.0673481225967407, + "learning_rate": 7.949721268857197e-06, + "loss": 0.3595, + "step": 15994 + }, + { + "epoch": 0.3201961814678578, + "grad_norm": 1.2670310735702515, + "learning_rate": 7.9494595030432e-06, + "loss": 0.3185, + "step": 15995 + }, + { + "epoch": 0.3202161999849861, + "grad_norm": 1.0441516637802124, + "learning_rate": 7.949197724830308e-06, + "loss": 0.3012, + "step": 15996 + }, + { + "epoch": 0.3202362185021145, + "grad_norm": 1.222864031791687, + "learning_rate": 7.948935934219624e-06, + "loss": 0.3041, + "step": 15997 + }, + { + "epoch": 0.3202562370192428, + "grad_norm": 1.1145933866500854, + "learning_rate": 7.948674131212249e-06, + "loss": 0.364, + "step": 15998 + }, + { + "epoch": 0.3202762555363711, + "grad_norm": 1.0446783304214478, + "learning_rate": 7.94841231580928e-06, + "loss": 0.2763, + "step": 15999 + }, + { + "epoch": 0.3202962740534995, + "grad_norm": 1.0252676010131836, + "learning_rate": 7.948150488011822e-06, + "loss": 0.2725, + "step": 16000 + }, + { + "epoch": 0.3203162925706278, + "grad_norm": 1.024547815322876, + "learning_rate": 7.947888647820972e-06, + "loss": 0.3038, + "step": 16001 + }, + { + "epoch": 0.3203363110877562, + "grad_norm": 1.1181195974349976, + "learning_rate": 7.947626795237836e-06, + "loss": 0.3096, + "step": 16002 + }, + { + "epoch": 0.3203563296048845, + "grad_norm": 1.0718649625778198, + "learning_rate": 7.947364930263509e-06, + "loss": 0.304, + "step": 16003 + }, + { + "epoch": 0.3203763481220129, + "grad_norm": 2.1099584102630615, + "learning_rate": 7.947103052899092e-06, + "loss": 0.8441, + "step": 16004 + }, + { + "epoch": 0.32039636663914123, + "grad_norm": 1.166396141052246, + "learning_rate": 7.946841163145692e-06, + "loss": 0.3257, + "step": 16005 + }, + { + "epoch": 0.3204163851562695, + "grad_norm": 0.98058021068573, + "learning_rate": 7.946579261004404e-06, + "loss": 0.2982, + "step": 16006 + }, + { + "epoch": 0.3204364036733979, + "grad_norm": 1.1534782648086548, + "learning_rate": 7.94631734647633e-06, + "loss": 0.3097, + "step": 16007 + }, + { + "epoch": 0.3204564221905262, + "grad_norm": 1.0161395072937012, + "learning_rate": 7.946055419562573e-06, + "loss": 0.2871, + "step": 16008 + }, + { + "epoch": 0.3204764407076546, + "grad_norm": 1.3195303678512573, + "learning_rate": 7.945793480264234e-06, + "loss": 0.2965, + "step": 16009 + }, + { + "epoch": 0.3204964592247829, + "grad_norm": 1.1033633947372437, + "learning_rate": 7.94553152858241e-06, + "loss": 0.3012, + "step": 16010 + }, + { + "epoch": 0.3205164777419113, + "grad_norm": 1.1941496133804321, + "learning_rate": 7.945269564518208e-06, + "loss": 0.3599, + "step": 16011 + }, + { + "epoch": 0.3205364962590396, + "grad_norm": 1.1831756830215454, + "learning_rate": 7.945007588072726e-06, + "loss": 0.333, + "step": 16012 + }, + { + "epoch": 0.320556514776168, + "grad_norm": 1.0773601531982422, + "learning_rate": 7.944745599247067e-06, + "loss": 0.3403, + "step": 16013 + }, + { + "epoch": 0.3205765332932963, + "grad_norm": 1.1774502992630005, + "learning_rate": 7.94448359804233e-06, + "loss": 0.3382, + "step": 16014 + }, + { + "epoch": 0.3205965518104246, + "grad_norm": 1.1177886724472046, + "learning_rate": 7.944221584459617e-06, + "loss": 0.3332, + "step": 16015 + }, + { + "epoch": 0.320616570327553, + "grad_norm": 1.0924806594848633, + "learning_rate": 7.94395955850003e-06, + "loss": 0.3033, + "step": 16016 + }, + { + "epoch": 0.3206365888446813, + "grad_norm": 1.0626869201660156, + "learning_rate": 7.943697520164673e-06, + "loss": 0.2748, + "step": 16017 + }, + { + "epoch": 0.3206566073618097, + "grad_norm": 1.080229640007019, + "learning_rate": 7.943435469454643e-06, + "loss": 0.3361, + "step": 16018 + }, + { + "epoch": 0.320676625878938, + "grad_norm": 1.3598190546035767, + "learning_rate": 7.943173406371043e-06, + "loss": 0.3395, + "step": 16019 + }, + { + "epoch": 0.3206966443960664, + "grad_norm": 1.1227644681930542, + "learning_rate": 7.942911330914978e-06, + "loss": 0.306, + "step": 16020 + }, + { + "epoch": 0.3207166629131947, + "grad_norm": 1.0766648054122925, + "learning_rate": 7.942649243087544e-06, + "loss": 0.3429, + "step": 16021 + }, + { + "epoch": 0.320736681430323, + "grad_norm": 1.302140712738037, + "learning_rate": 7.942387142889848e-06, + "loss": 0.305, + "step": 16022 + }, + { + "epoch": 0.3207566999474514, + "grad_norm": 1.0459681749343872, + "learning_rate": 7.94212503032299e-06, + "loss": 0.3439, + "step": 16023 + }, + { + "epoch": 0.3207767184645797, + "grad_norm": 1.9229519367218018, + "learning_rate": 7.94186290538807e-06, + "loss": 0.8448, + "step": 16024 + }, + { + "epoch": 0.3207967369817081, + "grad_norm": 1.3524985313415527, + "learning_rate": 7.941600768086192e-06, + "loss": 0.3629, + "step": 16025 + }, + { + "epoch": 0.3208167554988364, + "grad_norm": 1.2364529371261597, + "learning_rate": 7.941338618418457e-06, + "loss": 0.3688, + "step": 16026 + }, + { + "epoch": 0.3208367740159648, + "grad_norm": 1.4068673849105835, + "learning_rate": 7.941076456385969e-06, + "loss": 0.2815, + "step": 16027 + }, + { + "epoch": 0.3208567925330931, + "grad_norm": 1.2149852514266968, + "learning_rate": 7.940814281989826e-06, + "loss": 0.359, + "step": 16028 + }, + { + "epoch": 0.3208768110502215, + "grad_norm": 1.1890134811401367, + "learning_rate": 7.940552095231135e-06, + "loss": 0.3454, + "step": 16029 + }, + { + "epoch": 0.3208968295673498, + "grad_norm": 1.1113344430923462, + "learning_rate": 7.940289896110996e-06, + "loss": 0.3293, + "step": 16030 + }, + { + "epoch": 0.3209168480844781, + "grad_norm": 1.9659397602081299, + "learning_rate": 7.94002768463051e-06, + "loss": 0.8799, + "step": 16031 + }, + { + "epoch": 0.3209368666016065, + "grad_norm": 1.1121853590011597, + "learning_rate": 7.939765460790778e-06, + "loss": 0.2877, + "step": 16032 + }, + { + "epoch": 0.3209568851187348, + "grad_norm": 1.0937674045562744, + "learning_rate": 7.939503224592908e-06, + "loss": 0.3262, + "step": 16033 + }, + { + "epoch": 0.3209769036358632, + "grad_norm": 1.0280979871749878, + "learning_rate": 7.939240976037998e-06, + "loss": 0.2906, + "step": 16034 + }, + { + "epoch": 0.3209969221529915, + "grad_norm": 1.9138277769088745, + "learning_rate": 7.938978715127152e-06, + "loss": 0.8386, + "step": 16035 + }, + { + "epoch": 0.3210169406701199, + "grad_norm": 1.1064046621322632, + "learning_rate": 7.93871644186147e-06, + "loss": 0.2974, + "step": 16036 + }, + { + "epoch": 0.3210369591872482, + "grad_norm": 1.975903868675232, + "learning_rate": 7.938454156242059e-06, + "loss": 0.8048, + "step": 16037 + }, + { + "epoch": 0.3210569777043765, + "grad_norm": 1.0572443008422852, + "learning_rate": 7.938191858270017e-06, + "loss": 0.3072, + "step": 16038 + }, + { + "epoch": 0.3210769962215049, + "grad_norm": 1.2503273487091064, + "learning_rate": 7.93792954794645e-06, + "loss": 0.3232, + "step": 16039 + }, + { + "epoch": 0.3210970147386332, + "grad_norm": 1.94866943359375, + "learning_rate": 7.937667225272458e-06, + "loss": 0.7954, + "step": 16040 + }, + { + "epoch": 0.3211170332557616, + "grad_norm": 1.837038278579712, + "learning_rate": 7.937404890249148e-06, + "loss": 0.8032, + "step": 16041 + }, + { + "epoch": 0.3211370517728899, + "grad_norm": 1.2533166408538818, + "learning_rate": 7.937142542877618e-06, + "loss": 0.3149, + "step": 16042 + }, + { + "epoch": 0.3211570702900183, + "grad_norm": 1.8451939821243286, + "learning_rate": 7.936880183158973e-06, + "loss": 0.8581, + "step": 16043 + }, + { + "epoch": 0.3211770888071466, + "grad_norm": 1.8747882843017578, + "learning_rate": 7.936617811094316e-06, + "loss": 0.862, + "step": 16044 + }, + { + "epoch": 0.321197107324275, + "grad_norm": 1.1311297416687012, + "learning_rate": 7.936355426684749e-06, + "loss": 0.2656, + "step": 16045 + }, + { + "epoch": 0.3212171258414033, + "grad_norm": 1.099130630493164, + "learning_rate": 7.936093029931376e-06, + "loss": 0.3134, + "step": 16046 + }, + { + "epoch": 0.3212371443585316, + "grad_norm": 2.064281702041626, + "learning_rate": 7.935830620835299e-06, + "loss": 0.7892, + "step": 16047 + }, + { + "epoch": 0.32125716287566, + "grad_norm": 1.130246639251709, + "learning_rate": 7.935568199397624e-06, + "loss": 0.288, + "step": 16048 + }, + { + "epoch": 0.3212771813927883, + "grad_norm": 1.1016185283660889, + "learning_rate": 7.93530576561945e-06, + "loss": 0.3385, + "step": 16049 + }, + { + "epoch": 0.3212971999099167, + "grad_norm": 1.3103420734405518, + "learning_rate": 7.935043319501883e-06, + "loss": 0.3686, + "step": 16050 + }, + { + "epoch": 0.321317218427045, + "grad_norm": 1.0595530271530151, + "learning_rate": 7.934780861046026e-06, + "loss": 0.3477, + "step": 16051 + }, + { + "epoch": 0.3213372369441734, + "grad_norm": 1.0202035903930664, + "learning_rate": 7.93451839025298e-06, + "loss": 0.2773, + "step": 16052 + }, + { + "epoch": 0.3213572554613017, + "grad_norm": 1.0569735765457153, + "learning_rate": 7.934255907123853e-06, + "loss": 0.3266, + "step": 16053 + }, + { + "epoch": 0.32137727397843, + "grad_norm": 2.113718271255493, + "learning_rate": 7.933993411659743e-06, + "loss": 0.8332, + "step": 16054 + }, + { + "epoch": 0.3213972924955584, + "grad_norm": 1.8945256471633911, + "learning_rate": 7.933730903861759e-06, + "loss": 0.8291, + "step": 16055 + }, + { + "epoch": 0.3214173110126867, + "grad_norm": 1.4318739175796509, + "learning_rate": 7.933468383731e-06, + "loss": 0.322, + "step": 16056 + }, + { + "epoch": 0.3214373295298151, + "grad_norm": 1.149771809577942, + "learning_rate": 7.93320585126857e-06, + "loss": 0.3851, + "step": 16057 + }, + { + "epoch": 0.3214573480469434, + "grad_norm": 1.2350233793258667, + "learning_rate": 7.932943306475576e-06, + "loss": 0.3376, + "step": 16058 + }, + { + "epoch": 0.3214773665640718, + "grad_norm": 1.7120078802108765, + "learning_rate": 7.93268074935312e-06, + "loss": 0.8457, + "step": 16059 + }, + { + "epoch": 0.3214973850812001, + "grad_norm": 1.0713465213775635, + "learning_rate": 7.932418179902303e-06, + "loss": 0.3001, + "step": 16060 + }, + { + "epoch": 0.3215174035983285, + "grad_norm": 1.0612777471542358, + "learning_rate": 7.932155598124232e-06, + "loss": 0.3103, + "step": 16061 + }, + { + "epoch": 0.3215374221154568, + "grad_norm": 1.1093813180923462, + "learning_rate": 7.931893004020008e-06, + "loss": 0.3002, + "step": 16062 + }, + { + "epoch": 0.3215574406325851, + "grad_norm": 1.058369755744934, + "learning_rate": 7.931630397590739e-06, + "loss": 0.3209, + "step": 16063 + }, + { + "epoch": 0.3215774591497135, + "grad_norm": 1.1517219543457031, + "learning_rate": 7.931367778837526e-06, + "loss": 0.3323, + "step": 16064 + }, + { + "epoch": 0.3215974776668418, + "grad_norm": 1.0667530298233032, + "learning_rate": 7.931105147761474e-06, + "loss": 0.3547, + "step": 16065 + }, + { + "epoch": 0.3216174961839702, + "grad_norm": 1.0369693040847778, + "learning_rate": 7.930842504363687e-06, + "loss": 0.3766, + "step": 16066 + }, + { + "epoch": 0.3216375147010985, + "grad_norm": 1.2009707689285278, + "learning_rate": 7.930579848645269e-06, + "loss": 0.3429, + "step": 16067 + }, + { + "epoch": 0.3216575332182269, + "grad_norm": 1.0964951515197754, + "learning_rate": 7.930317180607324e-06, + "loss": 0.335, + "step": 16068 + }, + { + "epoch": 0.3216775517353552, + "grad_norm": 1.0060811042785645, + "learning_rate": 7.930054500250955e-06, + "loss": 0.3401, + "step": 16069 + }, + { + "epoch": 0.3216975702524835, + "grad_norm": 1.0499184131622314, + "learning_rate": 7.929791807577267e-06, + "loss": 0.3064, + "step": 16070 + }, + { + "epoch": 0.3217175887696119, + "grad_norm": 1.3482917547225952, + "learning_rate": 7.929529102587366e-06, + "loss": 0.2945, + "step": 16071 + }, + { + "epoch": 0.3217376072867402, + "grad_norm": 1.0706195831298828, + "learning_rate": 7.929266385282354e-06, + "loss": 0.3194, + "step": 16072 + }, + { + "epoch": 0.3217576258038686, + "grad_norm": 1.1365076303482056, + "learning_rate": 7.929003655663339e-06, + "loss": 0.4189, + "step": 16073 + }, + { + "epoch": 0.3217776443209969, + "grad_norm": 1.2115520238876343, + "learning_rate": 7.92874091373142e-06, + "loss": 0.2915, + "step": 16074 + }, + { + "epoch": 0.3217976628381253, + "grad_norm": 1.8820641040802002, + "learning_rate": 7.928478159487707e-06, + "loss": 0.8561, + "step": 16075 + }, + { + "epoch": 0.3218176813552536, + "grad_norm": 1.8864651918411255, + "learning_rate": 7.928215392933301e-06, + "loss": 0.7847, + "step": 16076 + }, + { + "epoch": 0.321837699872382, + "grad_norm": 1.1186076402664185, + "learning_rate": 7.927952614069308e-06, + "loss": 0.279, + "step": 16077 + }, + { + "epoch": 0.32185771838951027, + "grad_norm": 1.0806411504745483, + "learning_rate": 7.92768982289683e-06, + "loss": 0.3143, + "step": 16078 + }, + { + "epoch": 0.3218777369066386, + "grad_norm": 1.132778525352478, + "learning_rate": 7.927427019416975e-06, + "loss": 0.3072, + "step": 16079 + }, + { + "epoch": 0.321897755423767, + "grad_norm": 1.1170390844345093, + "learning_rate": 7.92716420363085e-06, + "loss": 0.3256, + "step": 16080 + }, + { + "epoch": 0.3219177739408953, + "grad_norm": 1.0695304870605469, + "learning_rate": 7.926901375539553e-06, + "loss": 0.3088, + "step": 16081 + }, + { + "epoch": 0.3219377924580237, + "grad_norm": 1.1016368865966797, + "learning_rate": 7.926638535144193e-06, + "loss": 0.3087, + "step": 16082 + }, + { + "epoch": 0.321957810975152, + "grad_norm": 1.0748343467712402, + "learning_rate": 7.926375682445876e-06, + "loss": 0.3018, + "step": 16083 + }, + { + "epoch": 0.3219778294922804, + "grad_norm": 1.0800751447677612, + "learning_rate": 7.926112817445705e-06, + "loss": 0.2921, + "step": 16084 + }, + { + "epoch": 0.3219978480094087, + "grad_norm": 1.1788190603256226, + "learning_rate": 7.925849940144784e-06, + "loss": 0.3191, + "step": 16085 + }, + { + "epoch": 0.322017866526537, + "grad_norm": 0.9940882325172424, + "learning_rate": 7.925587050544221e-06, + "loss": 0.3171, + "step": 16086 + }, + { + "epoch": 0.3220378850436654, + "grad_norm": 1.0637725591659546, + "learning_rate": 7.92532414864512e-06, + "loss": 0.2827, + "step": 16087 + }, + { + "epoch": 0.3220579035607937, + "grad_norm": 1.1280368566513062, + "learning_rate": 7.925061234448584e-06, + "loss": 0.35, + "step": 16088 + }, + { + "epoch": 0.3220779220779221, + "grad_norm": 1.0970193147659302, + "learning_rate": 7.924798307955721e-06, + "loss": 0.3301, + "step": 16089 + }, + { + "epoch": 0.3220979405950504, + "grad_norm": 1.0308270454406738, + "learning_rate": 7.924535369167635e-06, + "loss": 0.3373, + "step": 16090 + }, + { + "epoch": 0.3221179591121788, + "grad_norm": 1.1281191110610962, + "learning_rate": 7.924272418085432e-06, + "loss": 0.2785, + "step": 16091 + }, + { + "epoch": 0.3221379776293071, + "grad_norm": 1.206221580505371, + "learning_rate": 7.924009454710217e-06, + "loss": 0.3302, + "step": 16092 + }, + { + "epoch": 0.3221579961464355, + "grad_norm": 1.0963261127471924, + "learning_rate": 7.923746479043096e-06, + "loss": 0.3275, + "step": 16093 + }, + { + "epoch": 0.32217801466356377, + "grad_norm": 1.1452805995941162, + "learning_rate": 7.923483491085173e-06, + "loss": 0.3272, + "step": 16094 + }, + { + "epoch": 0.3221980331806921, + "grad_norm": 1.103285551071167, + "learning_rate": 7.923220490837555e-06, + "loss": 0.361, + "step": 16095 + }, + { + "epoch": 0.3222180516978205, + "grad_norm": 1.0283870697021484, + "learning_rate": 7.922957478301348e-06, + "loss": 0.3336, + "step": 16096 + }, + { + "epoch": 0.3222380702149488, + "grad_norm": 1.8576953411102295, + "learning_rate": 7.922694453477655e-06, + "loss": 0.8178, + "step": 16097 + }, + { + "epoch": 0.3222580887320772, + "grad_norm": 1.1587140560150146, + "learning_rate": 7.922431416367584e-06, + "loss": 0.3142, + "step": 16098 + }, + { + "epoch": 0.3222781072492055, + "grad_norm": 1.0796771049499512, + "learning_rate": 7.922168366972243e-06, + "loss": 0.336, + "step": 16099 + }, + { + "epoch": 0.3222981257663339, + "grad_norm": 1.2277714014053345, + "learning_rate": 7.921905305292732e-06, + "loss": 0.3311, + "step": 16100 + }, + { + "epoch": 0.3223181442834622, + "grad_norm": 1.0475188493728638, + "learning_rate": 7.92164223133016e-06, + "loss": 0.2995, + "step": 16101 + }, + { + "epoch": 0.3223381628005905, + "grad_norm": 1.1873611211776733, + "learning_rate": 7.921379145085635e-06, + "loss": 0.346, + "step": 16102 + }, + { + "epoch": 0.3223581813177189, + "grad_norm": 1.1472736597061157, + "learning_rate": 7.921116046560257e-06, + "loss": 0.3096, + "step": 16103 + }, + { + "epoch": 0.3223781998348472, + "grad_norm": 1.1369794607162476, + "learning_rate": 7.920852935755138e-06, + "loss": 0.3383, + "step": 16104 + }, + { + "epoch": 0.3223982183519756, + "grad_norm": 1.089717984199524, + "learning_rate": 7.920589812671382e-06, + "loss": 0.3238, + "step": 16105 + }, + { + "epoch": 0.3224182368691039, + "grad_norm": 1.136135458946228, + "learning_rate": 7.920326677310097e-06, + "loss": 0.3255, + "step": 16106 + }, + { + "epoch": 0.3224382553862323, + "grad_norm": 1.1571234464645386, + "learning_rate": 7.920063529672383e-06, + "loss": 0.2958, + "step": 16107 + }, + { + "epoch": 0.3224582739033606, + "grad_norm": 1.2181396484375, + "learning_rate": 7.919800369759353e-06, + "loss": 0.329, + "step": 16108 + }, + { + "epoch": 0.322478292420489, + "grad_norm": 1.06546151638031, + "learning_rate": 7.91953719757211e-06, + "loss": 0.3642, + "step": 16109 + }, + { + "epoch": 0.32249831093761727, + "grad_norm": 1.0075995922088623, + "learning_rate": 7.91927401311176e-06, + "loss": 0.2559, + "step": 16110 + }, + { + "epoch": 0.3225183294547456, + "grad_norm": 1.1189472675323486, + "learning_rate": 7.919010816379412e-06, + "loss": 0.3534, + "step": 16111 + }, + { + "epoch": 0.322538347971874, + "grad_norm": 1.2900933027267456, + "learning_rate": 7.918747607376169e-06, + "loss": 0.336, + "step": 16112 + }, + { + "epoch": 0.3225583664890023, + "grad_norm": 1.6208049058914185, + "learning_rate": 7.91848438610314e-06, + "loss": 0.3201, + "step": 16113 + }, + { + "epoch": 0.3225783850061307, + "grad_norm": 1.175154209136963, + "learning_rate": 7.91822115256143e-06, + "loss": 0.3377, + "step": 16114 + }, + { + "epoch": 0.322598403523259, + "grad_norm": 1.2785727977752686, + "learning_rate": 7.917957906752146e-06, + "loss": 0.326, + "step": 16115 + }, + { + "epoch": 0.3226184220403874, + "grad_norm": 1.3901379108428955, + "learning_rate": 7.917694648676397e-06, + "loss": 0.3608, + "step": 16116 + }, + { + "epoch": 0.3226384405575157, + "grad_norm": 1.0784764289855957, + "learning_rate": 7.917431378335287e-06, + "loss": 0.3353, + "step": 16117 + }, + { + "epoch": 0.322658459074644, + "grad_norm": 1.044811487197876, + "learning_rate": 7.917168095729922e-06, + "loss": 0.3328, + "step": 16118 + }, + { + "epoch": 0.3226784775917724, + "grad_norm": 1.144865870475769, + "learning_rate": 7.916904800861411e-06, + "loss": 0.3637, + "step": 16119 + }, + { + "epoch": 0.3226984961089007, + "grad_norm": 1.1667673587799072, + "learning_rate": 7.91664149373086e-06, + "loss": 0.3305, + "step": 16120 + }, + { + "epoch": 0.3227185146260291, + "grad_norm": 1.8235106468200684, + "learning_rate": 7.916378174339376e-06, + "loss": 0.7409, + "step": 16121 + }, + { + "epoch": 0.3227385331431574, + "grad_norm": 1.2312806844711304, + "learning_rate": 7.916114842688065e-06, + "loss": 0.3031, + "step": 16122 + }, + { + "epoch": 0.3227585516602858, + "grad_norm": 1.1937977075576782, + "learning_rate": 7.915851498778035e-06, + "loss": 0.2722, + "step": 16123 + }, + { + "epoch": 0.3227785701774141, + "grad_norm": 1.2520259618759155, + "learning_rate": 7.915588142610394e-06, + "loss": 0.3371, + "step": 16124 + }, + { + "epoch": 0.3227985886945425, + "grad_norm": 1.0609010457992554, + "learning_rate": 7.915324774186246e-06, + "loss": 0.3137, + "step": 16125 + }, + { + "epoch": 0.32281860721167077, + "grad_norm": 1.2212260961532593, + "learning_rate": 7.915061393506701e-06, + "loss": 0.3334, + "step": 16126 + }, + { + "epoch": 0.3228386257287991, + "grad_norm": 0.9767196774482727, + "learning_rate": 7.914798000572866e-06, + "loss": 0.2714, + "step": 16127 + }, + { + "epoch": 0.3228586442459275, + "grad_norm": 1.125308632850647, + "learning_rate": 7.914534595385845e-06, + "loss": 0.3325, + "step": 16128 + }, + { + "epoch": 0.3228786627630558, + "grad_norm": 1.1677724123001099, + "learning_rate": 7.914271177946749e-06, + "loss": 0.2981, + "step": 16129 + }, + { + "epoch": 0.3228986812801842, + "grad_norm": 1.042743444442749, + "learning_rate": 7.914007748256685e-06, + "loss": 0.32, + "step": 16130 + }, + { + "epoch": 0.3229186997973125, + "grad_norm": 1.1691100597381592, + "learning_rate": 7.91374430631676e-06, + "loss": 0.3393, + "step": 16131 + }, + { + "epoch": 0.3229387183144409, + "grad_norm": 1.9781666994094849, + "learning_rate": 7.913480852128079e-06, + "loss": 0.8566, + "step": 16132 + }, + { + "epoch": 0.3229587368315692, + "grad_norm": 1.1674858331680298, + "learning_rate": 7.913217385691754e-06, + "loss": 0.3024, + "step": 16133 + }, + { + "epoch": 0.3229787553486975, + "grad_norm": 1.154548168182373, + "learning_rate": 7.912953907008887e-06, + "loss": 0.3076, + "step": 16134 + }, + { + "epoch": 0.32299877386582587, + "grad_norm": 1.1622698307037354, + "learning_rate": 7.91269041608059e-06, + "loss": 0.2735, + "step": 16135 + }, + { + "epoch": 0.3230187923829542, + "grad_norm": 1.1502145528793335, + "learning_rate": 7.912426912907969e-06, + "loss": 0.3377, + "step": 16136 + }, + { + "epoch": 0.3230388109000826, + "grad_norm": 1.0766959190368652, + "learning_rate": 7.912163397492134e-06, + "loss": 0.3231, + "step": 16137 + }, + { + "epoch": 0.3230588294172109, + "grad_norm": 1.0993515253067017, + "learning_rate": 7.911899869834188e-06, + "loss": 0.3453, + "step": 16138 + }, + { + "epoch": 0.3230788479343393, + "grad_norm": 1.199218988418579, + "learning_rate": 7.911636329935244e-06, + "loss": 0.3834, + "step": 16139 + }, + { + "epoch": 0.3230988664514676, + "grad_norm": 1.250146746635437, + "learning_rate": 7.911372777796405e-06, + "loss": 0.2974, + "step": 16140 + }, + { + "epoch": 0.323118884968596, + "grad_norm": 2.1037914752960205, + "learning_rate": 7.911109213418785e-06, + "loss": 0.9703, + "step": 16141 + }, + { + "epoch": 0.32313890348572427, + "grad_norm": 1.0794836282730103, + "learning_rate": 7.910845636803485e-06, + "loss": 0.2981, + "step": 16142 + }, + { + "epoch": 0.3231589220028526, + "grad_norm": 2.2011263370513916, + "learning_rate": 7.910582047951617e-06, + "loss": 0.7704, + "step": 16143 + }, + { + "epoch": 0.323178940519981, + "grad_norm": 1.0841180086135864, + "learning_rate": 7.910318446864288e-06, + "loss": 0.2909, + "step": 16144 + }, + { + "epoch": 0.3231989590371093, + "grad_norm": 1.0557315349578857, + "learning_rate": 7.910054833542609e-06, + "loss": 0.3091, + "step": 16145 + }, + { + "epoch": 0.3232189775542377, + "grad_norm": 1.0566134452819824, + "learning_rate": 7.909791207987684e-06, + "loss": 0.2711, + "step": 16146 + }, + { + "epoch": 0.323238996071366, + "grad_norm": 1.1436489820480347, + "learning_rate": 7.909527570200623e-06, + "loss": 0.312, + "step": 16147 + }, + { + "epoch": 0.3232590145884944, + "grad_norm": 1.0867899656295776, + "learning_rate": 7.909263920182534e-06, + "loss": 0.3192, + "step": 16148 + }, + { + "epoch": 0.3232790331056227, + "grad_norm": 0.9911724925041199, + "learning_rate": 7.909000257934524e-06, + "loss": 0.2982, + "step": 16149 + }, + { + "epoch": 0.323299051622751, + "grad_norm": 1.1812829971313477, + "learning_rate": 7.908736583457705e-06, + "loss": 0.3021, + "step": 16150 + }, + { + "epoch": 0.32331907013987937, + "grad_norm": 1.0922406911849976, + "learning_rate": 7.908472896753184e-06, + "loss": 0.3055, + "step": 16151 + }, + { + "epoch": 0.3233390886570077, + "grad_norm": 1.0808327198028564, + "learning_rate": 7.908209197822068e-06, + "loss": 0.361, + "step": 16152 + }, + { + "epoch": 0.3233591071741361, + "grad_norm": 1.2282137870788574, + "learning_rate": 7.907945486665467e-06, + "loss": 0.2914, + "step": 16153 + }, + { + "epoch": 0.3233791256912644, + "grad_norm": 1.0666550397872925, + "learning_rate": 7.907681763284488e-06, + "loss": 0.2959, + "step": 16154 + }, + { + "epoch": 0.3233991442083928, + "grad_norm": 1.080361247062683, + "learning_rate": 7.907418027680241e-06, + "loss": 0.3387, + "step": 16155 + }, + { + "epoch": 0.3234191627255211, + "grad_norm": 1.3822215795516968, + "learning_rate": 7.907154279853835e-06, + "loss": 0.371, + "step": 16156 + }, + { + "epoch": 0.3234391812426495, + "grad_norm": 1.30974280834198, + "learning_rate": 7.906890519806378e-06, + "loss": 0.3331, + "step": 16157 + }, + { + "epoch": 0.32345919975977777, + "grad_norm": 1.1437638998031616, + "learning_rate": 7.906626747538978e-06, + "loss": 0.3015, + "step": 16158 + }, + { + "epoch": 0.3234792182769061, + "grad_norm": 1.1856536865234375, + "learning_rate": 7.906362963052745e-06, + "loss": 0.2955, + "step": 16159 + }, + { + "epoch": 0.3234992367940345, + "grad_norm": 1.085404872894287, + "learning_rate": 7.906099166348786e-06, + "loss": 0.2898, + "step": 16160 + }, + { + "epoch": 0.3235192553111628, + "grad_norm": 1.0298467874526978, + "learning_rate": 7.905835357428212e-06, + "loss": 0.327, + "step": 16161 + }, + { + "epoch": 0.3235392738282912, + "grad_norm": 1.2704006433486938, + "learning_rate": 7.905571536292134e-06, + "loss": 0.3442, + "step": 16162 + }, + { + "epoch": 0.3235592923454195, + "grad_norm": 2.783914089202881, + "learning_rate": 7.905307702941656e-06, + "loss": 0.8312, + "step": 16163 + }, + { + "epoch": 0.3235793108625479, + "grad_norm": 1.1253961324691772, + "learning_rate": 7.905043857377892e-06, + "loss": 0.3434, + "step": 16164 + }, + { + "epoch": 0.3235993293796762, + "grad_norm": 2.1325180530548096, + "learning_rate": 7.904779999601947e-06, + "loss": 0.7833, + "step": 16165 + }, + { + "epoch": 0.3236193478968045, + "grad_norm": 2.1058168411254883, + "learning_rate": 7.904516129614932e-06, + "loss": 0.7611, + "step": 16166 + }, + { + "epoch": 0.32363936641393287, + "grad_norm": 1.0927650928497314, + "learning_rate": 7.904252247417957e-06, + "loss": 0.3324, + "step": 16167 + }, + { + "epoch": 0.3236593849310612, + "grad_norm": 0.9766761064529419, + "learning_rate": 7.903988353012129e-06, + "loss": 0.3079, + "step": 16168 + }, + { + "epoch": 0.3236794034481896, + "grad_norm": 1.145432710647583, + "learning_rate": 7.90372444639856e-06, + "loss": 0.2779, + "step": 16169 + }, + { + "epoch": 0.3236994219653179, + "grad_norm": 1.9559627771377563, + "learning_rate": 7.90346052757836e-06, + "loss": 0.8393, + "step": 16170 + }, + { + "epoch": 0.3237194404824463, + "grad_norm": 1.091784119606018, + "learning_rate": 7.903196596552633e-06, + "loss": 0.3251, + "step": 16171 + }, + { + "epoch": 0.3237394589995746, + "grad_norm": 1.127421259880066, + "learning_rate": 7.902932653322495e-06, + "loss": 0.375, + "step": 16172 + }, + { + "epoch": 0.323759477516703, + "grad_norm": 2.1296470165252686, + "learning_rate": 7.902668697889053e-06, + "loss": 0.7452, + "step": 16173 + }, + { + "epoch": 0.32377949603383127, + "grad_norm": 1.0812299251556396, + "learning_rate": 7.902404730253416e-06, + "loss": 0.3386, + "step": 16174 + }, + { + "epoch": 0.3237995145509596, + "grad_norm": 1.9175376892089844, + "learning_rate": 7.902140750416694e-06, + "loss": 0.8771, + "step": 16175 + }, + { + "epoch": 0.323819533068088, + "grad_norm": 1.068881869316101, + "learning_rate": 7.901876758379996e-06, + "loss": 0.3094, + "step": 16176 + }, + { + "epoch": 0.3238395515852163, + "grad_norm": 1.2543230056762695, + "learning_rate": 7.901612754144435e-06, + "loss": 0.3241, + "step": 16177 + }, + { + "epoch": 0.3238595701023447, + "grad_norm": 1.0384190082550049, + "learning_rate": 7.901348737711114e-06, + "loss": 0.3102, + "step": 16178 + }, + { + "epoch": 0.323879588619473, + "grad_norm": 1.0341272354125977, + "learning_rate": 7.90108470908115e-06, + "loss": 0.3398, + "step": 16179 + }, + { + "epoch": 0.3238996071366014, + "grad_norm": 1.8289638757705688, + "learning_rate": 7.900820668255651e-06, + "loss": 0.8885, + "step": 16180 + }, + { + "epoch": 0.3239196256537297, + "grad_norm": 1.0507162809371948, + "learning_rate": 7.900556615235727e-06, + "loss": 0.3367, + "step": 16181 + }, + { + "epoch": 0.323939644170858, + "grad_norm": 1.1076455116271973, + "learning_rate": 7.900292550022483e-06, + "loss": 0.2869, + "step": 16182 + }, + { + "epoch": 0.32395966268798637, + "grad_norm": 1.1297917366027832, + "learning_rate": 7.900028472617038e-06, + "loss": 0.2894, + "step": 16183 + }, + { + "epoch": 0.3239796812051147, + "grad_norm": 1.125055193901062, + "learning_rate": 7.899764383020494e-06, + "loss": 0.3112, + "step": 16184 + }, + { + "epoch": 0.3239996997222431, + "grad_norm": 2.017249345779419, + "learning_rate": 7.899500281233966e-06, + "loss": 0.8781, + "step": 16185 + }, + { + "epoch": 0.3240197182393714, + "grad_norm": 1.011231541633606, + "learning_rate": 7.899236167258564e-06, + "loss": 0.3207, + "step": 16186 + }, + { + "epoch": 0.3240397367564998, + "grad_norm": 1.0142754316329956, + "learning_rate": 7.898972041095394e-06, + "loss": 0.296, + "step": 16187 + }, + { + "epoch": 0.3240597552736281, + "grad_norm": 1.2266159057617188, + "learning_rate": 7.898707902745572e-06, + "loss": 0.3281, + "step": 16188 + }, + { + "epoch": 0.3240797737907565, + "grad_norm": 1.9585585594177246, + "learning_rate": 7.898443752210204e-06, + "loss": 0.8434, + "step": 16189 + }, + { + "epoch": 0.32409979230788477, + "grad_norm": 1.2110556364059448, + "learning_rate": 7.898179589490404e-06, + "loss": 0.3664, + "step": 16190 + }, + { + "epoch": 0.3241198108250131, + "grad_norm": 1.048672080039978, + "learning_rate": 7.897915414587278e-06, + "loss": 0.3315, + "step": 16191 + }, + { + "epoch": 0.3241398293421415, + "grad_norm": 1.1184724569320679, + "learning_rate": 7.897651227501942e-06, + "loss": 0.3807, + "step": 16192 + }, + { + "epoch": 0.3241598478592698, + "grad_norm": 1.278669834136963, + "learning_rate": 7.897387028235504e-06, + "loss": 0.3103, + "step": 16193 + }, + { + "epoch": 0.3241798663763982, + "grad_norm": 1.024172306060791, + "learning_rate": 7.897122816789073e-06, + "loss": 0.2931, + "step": 16194 + }, + { + "epoch": 0.3241998848935265, + "grad_norm": 1.2291597127914429, + "learning_rate": 7.896858593163762e-06, + "loss": 0.3157, + "step": 16195 + }, + { + "epoch": 0.3242199034106549, + "grad_norm": 1.8828980922698975, + "learning_rate": 7.89659435736068e-06, + "loss": 0.8025, + "step": 16196 + }, + { + "epoch": 0.3242399219277832, + "grad_norm": 1.2437220811843872, + "learning_rate": 7.896330109380939e-06, + "loss": 0.2967, + "step": 16197 + }, + { + "epoch": 0.3242599404449115, + "grad_norm": 1.1285593509674072, + "learning_rate": 7.89606584922565e-06, + "loss": 0.3455, + "step": 16198 + }, + { + "epoch": 0.32427995896203987, + "grad_norm": 1.0716884136199951, + "learning_rate": 7.895801576895923e-06, + "loss": 0.2963, + "step": 16199 + }, + { + "epoch": 0.3242999774791682, + "grad_norm": 1.1323370933532715, + "learning_rate": 7.895537292392867e-06, + "loss": 0.3133, + "step": 16200 + }, + { + "epoch": 0.3243199959962966, + "grad_norm": 1.141405701637268, + "learning_rate": 7.8952729957176e-06, + "loss": 0.323, + "step": 16201 + }, + { + "epoch": 0.3243400145134249, + "grad_norm": 1.1234959363937378, + "learning_rate": 7.895008686871224e-06, + "loss": 0.3119, + "step": 16202 + }, + { + "epoch": 0.3243600330305533, + "grad_norm": 1.1258184909820557, + "learning_rate": 7.894744365854858e-06, + "loss": 0.3541, + "step": 16203 + }, + { + "epoch": 0.3243800515476816, + "grad_norm": 1.1424942016601562, + "learning_rate": 7.894480032669607e-06, + "loss": 0.2775, + "step": 16204 + }, + { + "epoch": 0.32440007006481, + "grad_norm": 1.0490864515304565, + "learning_rate": 7.894215687316587e-06, + "loss": 0.3108, + "step": 16205 + }, + { + "epoch": 0.32442008858193827, + "grad_norm": 1.0436755418777466, + "learning_rate": 7.893951329796905e-06, + "loss": 0.3455, + "step": 16206 + }, + { + "epoch": 0.3244401070990666, + "grad_norm": 1.903099775314331, + "learning_rate": 7.893686960111675e-06, + "loss": 0.83, + "step": 16207 + }, + { + "epoch": 0.32446012561619497, + "grad_norm": 1.0776020288467407, + "learning_rate": 7.893422578262009e-06, + "loss": 0.331, + "step": 16208 + }, + { + "epoch": 0.3244801441333233, + "grad_norm": 1.1239858865737915, + "learning_rate": 7.893158184249017e-06, + "loss": 0.3015, + "step": 16209 + }, + { + "epoch": 0.3245001626504517, + "grad_norm": 1.0303808450698853, + "learning_rate": 7.892893778073809e-06, + "loss": 0.3194, + "step": 16210 + }, + { + "epoch": 0.32452018116758, + "grad_norm": 1.130673885345459, + "learning_rate": 7.8926293597375e-06, + "loss": 0.2967, + "step": 16211 + }, + { + "epoch": 0.3245401996847084, + "grad_norm": 1.0720995664596558, + "learning_rate": 7.892364929241199e-06, + "loss": 0.3128, + "step": 16212 + }, + { + "epoch": 0.3245602182018367, + "grad_norm": 1.0946418046951294, + "learning_rate": 7.892100486586017e-06, + "loss": 0.3499, + "step": 16213 + }, + { + "epoch": 0.324580236718965, + "grad_norm": 1.7405047416687012, + "learning_rate": 7.891836031773068e-06, + "loss": 0.8061, + "step": 16214 + }, + { + "epoch": 0.32460025523609337, + "grad_norm": 1.0334800481796265, + "learning_rate": 7.891571564803465e-06, + "loss": 0.305, + "step": 16215 + }, + { + "epoch": 0.3246202737532217, + "grad_norm": 1.3210160732269287, + "learning_rate": 7.891307085678312e-06, + "loss": 0.3877, + "step": 16216 + }, + { + "epoch": 0.3246402922703501, + "grad_norm": 1.0606282949447632, + "learning_rate": 7.891042594398731e-06, + "loss": 0.3057, + "step": 16217 + }, + { + "epoch": 0.3246603107874784, + "grad_norm": 1.7373936176300049, + "learning_rate": 7.890778090965827e-06, + "loss": 0.7456, + "step": 16218 + }, + { + "epoch": 0.3246803293046068, + "grad_norm": 1.0730582475662231, + "learning_rate": 7.890513575380713e-06, + "loss": 0.34, + "step": 16219 + }, + { + "epoch": 0.3247003478217351, + "grad_norm": 1.9907119274139404, + "learning_rate": 7.890249047644504e-06, + "loss": 0.8132, + "step": 16220 + }, + { + "epoch": 0.3247203663388635, + "grad_norm": 0.9772821068763733, + "learning_rate": 7.88998450775831e-06, + "loss": 0.2498, + "step": 16221 + }, + { + "epoch": 0.32474038485599177, + "grad_norm": 1.1289479732513428, + "learning_rate": 7.889719955723243e-06, + "loss": 0.3367, + "step": 16222 + }, + { + "epoch": 0.3247604033731201, + "grad_norm": 1.0803419351577759, + "learning_rate": 7.889455391540416e-06, + "loss": 0.3244, + "step": 16223 + }, + { + "epoch": 0.32478042189024847, + "grad_norm": 1.1515501737594604, + "learning_rate": 7.889190815210936e-06, + "loss": 0.3049, + "step": 16224 + }, + { + "epoch": 0.3248004404073768, + "grad_norm": 1.000035047531128, + "learning_rate": 7.888926226735925e-06, + "loss": 0.335, + "step": 16225 + }, + { + "epoch": 0.3248204589245052, + "grad_norm": 1.0792388916015625, + "learning_rate": 7.888661626116486e-06, + "loss": 0.2951, + "step": 16226 + }, + { + "epoch": 0.3248404774416335, + "grad_norm": 1.1511404514312744, + "learning_rate": 7.888397013353738e-06, + "loss": 0.3588, + "step": 16227 + }, + { + "epoch": 0.3248604959587619, + "grad_norm": 1.1236844062805176, + "learning_rate": 7.888132388448789e-06, + "loss": 0.3065, + "step": 16228 + }, + { + "epoch": 0.3248805144758902, + "grad_norm": 1.043344259262085, + "learning_rate": 7.88786775140275e-06, + "loss": 0.3543, + "step": 16229 + }, + { + "epoch": 0.3249005329930185, + "grad_norm": 1.0248831510543823, + "learning_rate": 7.887603102216741e-06, + "loss": 0.3233, + "step": 16230 + }, + { + "epoch": 0.32492055151014687, + "grad_norm": 1.01386559009552, + "learning_rate": 7.88733844089187e-06, + "loss": 0.2726, + "step": 16231 + }, + { + "epoch": 0.3249405700272752, + "grad_norm": 1.3404544591903687, + "learning_rate": 7.887073767429247e-06, + "loss": 0.3117, + "step": 16232 + }, + { + "epoch": 0.3249605885444036, + "grad_norm": 1.1135718822479248, + "learning_rate": 7.886809081829988e-06, + "loss": 0.3315, + "step": 16233 + }, + { + "epoch": 0.3249806070615319, + "grad_norm": 1.1114089488983154, + "learning_rate": 7.886544384095205e-06, + "loss": 0.2718, + "step": 16234 + }, + { + "epoch": 0.3250006255786603, + "grad_norm": 1.0383281707763672, + "learning_rate": 7.886279674226012e-06, + "loss": 0.2939, + "step": 16235 + }, + { + "epoch": 0.3250206440957886, + "grad_norm": 1.1026116609573364, + "learning_rate": 7.886014952223518e-06, + "loss": 0.3028, + "step": 16236 + }, + { + "epoch": 0.325040662612917, + "grad_norm": 1.2163788080215454, + "learning_rate": 7.88575021808884e-06, + "loss": 0.3847, + "step": 16237 + }, + { + "epoch": 0.32506068113004527, + "grad_norm": 1.8205161094665527, + "learning_rate": 7.88548547182309e-06, + "loss": 0.8985, + "step": 16238 + }, + { + "epoch": 0.3250806996471736, + "grad_norm": 1.0178372859954834, + "learning_rate": 7.885220713427378e-06, + "loss": 0.3187, + "step": 16239 + }, + { + "epoch": 0.32510071816430197, + "grad_norm": 1.1998999118804932, + "learning_rate": 7.884955942902818e-06, + "loss": 0.3349, + "step": 16240 + }, + { + "epoch": 0.3251207366814303, + "grad_norm": 1.0777390003204346, + "learning_rate": 7.884691160250525e-06, + "loss": 0.3223, + "step": 16241 + }, + { + "epoch": 0.3251407551985587, + "grad_norm": 1.0746848583221436, + "learning_rate": 7.884426365471611e-06, + "loss": 0.3194, + "step": 16242 + }, + { + "epoch": 0.325160773715687, + "grad_norm": 1.1336582899093628, + "learning_rate": 7.884161558567191e-06, + "loss": 0.3094, + "step": 16243 + }, + { + "epoch": 0.3251807922328154, + "grad_norm": 1.230401873588562, + "learning_rate": 7.883896739538374e-06, + "loss": 0.3486, + "step": 16244 + }, + { + "epoch": 0.3252008107499437, + "grad_norm": 1.0450741052627563, + "learning_rate": 7.88363190838628e-06, + "loss": 0.357, + "step": 16245 + }, + { + "epoch": 0.325220829267072, + "grad_norm": 1.9512885808944702, + "learning_rate": 7.883367065112015e-06, + "loss": 0.8112, + "step": 16246 + }, + { + "epoch": 0.32524084778420037, + "grad_norm": 1.1095143556594849, + "learning_rate": 7.883102209716696e-06, + "loss": 0.3781, + "step": 16247 + }, + { + "epoch": 0.3252608663013287, + "grad_norm": 1.1646603345870972, + "learning_rate": 7.882837342201434e-06, + "loss": 0.3004, + "step": 16248 + }, + { + "epoch": 0.3252808848184571, + "grad_norm": 1.8348444700241089, + "learning_rate": 7.882572462567347e-06, + "loss": 0.8085, + "step": 16249 + }, + { + "epoch": 0.3253009033355854, + "grad_norm": 1.0409164428710938, + "learning_rate": 7.882307570815544e-06, + "loss": 0.2996, + "step": 16250 + }, + { + "epoch": 0.3253209218527138, + "grad_norm": 1.8949124813079834, + "learning_rate": 7.88204266694714e-06, + "loss": 0.7968, + "step": 16251 + }, + { + "epoch": 0.3253409403698421, + "grad_norm": 1.0627635717391968, + "learning_rate": 7.881777750963248e-06, + "loss": 0.294, + "step": 16252 + }, + { + "epoch": 0.3253609588869705, + "grad_norm": 1.0162205696105957, + "learning_rate": 7.881512822864985e-06, + "loss": 0.2971, + "step": 16253 + }, + { + "epoch": 0.32538097740409877, + "grad_norm": 1.0538578033447266, + "learning_rate": 7.881247882653461e-06, + "loss": 0.3526, + "step": 16254 + }, + { + "epoch": 0.3254009959212271, + "grad_norm": 1.2154923677444458, + "learning_rate": 7.88098293032979e-06, + "loss": 0.3574, + "step": 16255 + }, + { + "epoch": 0.32542101443835547, + "grad_norm": 1.053796410560608, + "learning_rate": 7.880717965895089e-06, + "loss": 0.3272, + "step": 16256 + }, + { + "epoch": 0.3254410329554838, + "grad_norm": 1.063008189201355, + "learning_rate": 7.880452989350468e-06, + "loss": 0.3097, + "step": 16257 + }, + { + "epoch": 0.3254610514726122, + "grad_norm": 1.1165740489959717, + "learning_rate": 7.880188000697042e-06, + "loss": 0.3657, + "step": 16258 + }, + { + "epoch": 0.3254810699897405, + "grad_norm": 1.0402792692184448, + "learning_rate": 7.879922999935927e-06, + "loss": 0.3582, + "step": 16259 + }, + { + "epoch": 0.3255010885068689, + "grad_norm": 1.0418949127197266, + "learning_rate": 7.879657987068235e-06, + "loss": 0.3453, + "step": 16260 + }, + { + "epoch": 0.3255211070239972, + "grad_norm": 1.068782925605774, + "learning_rate": 7.879392962095081e-06, + "loss": 0.2984, + "step": 16261 + }, + { + "epoch": 0.3255411255411255, + "grad_norm": 1.1460527181625366, + "learning_rate": 7.879127925017578e-06, + "loss": 0.3131, + "step": 16262 + }, + { + "epoch": 0.32556114405825387, + "grad_norm": 1.0359512567520142, + "learning_rate": 7.87886287583684e-06, + "loss": 0.2725, + "step": 16263 + }, + { + "epoch": 0.3255811625753822, + "grad_norm": 1.0517299175262451, + "learning_rate": 7.878597814553983e-06, + "loss": 0.3195, + "step": 16264 + }, + { + "epoch": 0.32560118109251057, + "grad_norm": 0.9735729098320007, + "learning_rate": 7.87833274117012e-06, + "loss": 0.3637, + "step": 16265 + }, + { + "epoch": 0.3256211996096389, + "grad_norm": 1.3705542087554932, + "learning_rate": 7.878067655686366e-06, + "loss": 0.339, + "step": 16266 + }, + { + "epoch": 0.3256412181267673, + "grad_norm": 1.0932292938232422, + "learning_rate": 7.877802558103836e-06, + "loss": 0.2926, + "step": 16267 + }, + { + "epoch": 0.3256612366438956, + "grad_norm": 1.1085667610168457, + "learning_rate": 7.877537448423642e-06, + "loss": 0.3485, + "step": 16268 + }, + { + "epoch": 0.325681255161024, + "grad_norm": 1.3335590362548828, + "learning_rate": 7.877272326646899e-06, + "loss": 0.3414, + "step": 16269 + }, + { + "epoch": 0.32570127367815227, + "grad_norm": 1.0787559747695923, + "learning_rate": 7.877007192774723e-06, + "loss": 0.3271, + "step": 16270 + }, + { + "epoch": 0.3257212921952806, + "grad_norm": 1.1335126161575317, + "learning_rate": 7.876742046808228e-06, + "loss": 0.2962, + "step": 16271 + }, + { + "epoch": 0.32574131071240897, + "grad_norm": 1.1374694108963013, + "learning_rate": 7.876476888748528e-06, + "loss": 0.3007, + "step": 16272 + }, + { + "epoch": 0.3257613292295373, + "grad_norm": 1.252689003944397, + "learning_rate": 7.87621171859674e-06, + "loss": 0.3441, + "step": 16273 + }, + { + "epoch": 0.3257813477466657, + "grad_norm": 1.259680151939392, + "learning_rate": 7.875946536353974e-06, + "loss": 0.3441, + "step": 16274 + }, + { + "epoch": 0.325801366263794, + "grad_norm": 1.2749520540237427, + "learning_rate": 7.87568134202135e-06, + "loss": 0.3184, + "step": 16275 + }, + { + "epoch": 0.3258213847809224, + "grad_norm": 1.0913171768188477, + "learning_rate": 7.87541613559998e-06, + "loss": 0.3166, + "step": 16276 + }, + { + "epoch": 0.32584140329805067, + "grad_norm": 1.0997620820999146, + "learning_rate": 7.875150917090978e-06, + "loss": 0.344, + "step": 16277 + }, + { + "epoch": 0.325861421815179, + "grad_norm": 0.9710639715194702, + "learning_rate": 7.87488568649546e-06, + "loss": 0.3057, + "step": 16278 + }, + { + "epoch": 0.32588144033230737, + "grad_norm": 1.1195389032363892, + "learning_rate": 7.874620443814546e-06, + "loss": 0.3466, + "step": 16279 + }, + { + "epoch": 0.3259014588494357, + "grad_norm": 1.9816452264785767, + "learning_rate": 7.874355189049342e-06, + "loss": 0.8465, + "step": 16280 + }, + { + "epoch": 0.32592147736656407, + "grad_norm": 1.2091984748840332, + "learning_rate": 7.87408992220097e-06, + "loss": 0.3488, + "step": 16281 + }, + { + "epoch": 0.3259414958836924, + "grad_norm": 1.0953466892242432, + "learning_rate": 7.873824643270539e-06, + "loss": 0.3137, + "step": 16282 + }, + { + "epoch": 0.3259615144008208, + "grad_norm": 1.1082974672317505, + "learning_rate": 7.87355935225917e-06, + "loss": 0.3535, + "step": 16283 + }, + { + "epoch": 0.3259815329179491, + "grad_norm": 1.211747646331787, + "learning_rate": 7.873294049167974e-06, + "loss": 0.3073, + "step": 16284 + }, + { + "epoch": 0.3260015514350774, + "grad_norm": 1.2940548658370972, + "learning_rate": 7.87302873399807e-06, + "loss": 0.3235, + "step": 16285 + }, + { + "epoch": 0.32602156995220577, + "grad_norm": 1.1436469554901123, + "learning_rate": 7.872763406750572e-06, + "loss": 0.3088, + "step": 16286 + }, + { + "epoch": 0.3260415884693341, + "grad_norm": 1.1180087327957153, + "learning_rate": 7.872498067426593e-06, + "loss": 0.3019, + "step": 16287 + }, + { + "epoch": 0.32606160698646247, + "grad_norm": 1.164935827255249, + "learning_rate": 7.872232716027252e-06, + "loss": 0.3044, + "step": 16288 + }, + { + "epoch": 0.3260816255035908, + "grad_norm": 1.0832090377807617, + "learning_rate": 7.871967352553663e-06, + "loss": 0.3099, + "step": 16289 + }, + { + "epoch": 0.3261016440207192, + "grad_norm": 1.051428198814392, + "learning_rate": 7.871701977006938e-06, + "loss": 0.3545, + "step": 16290 + }, + { + "epoch": 0.3261216625378475, + "grad_norm": 1.0064105987548828, + "learning_rate": 7.871436589388199e-06, + "loss": 0.3055, + "step": 16291 + }, + { + "epoch": 0.3261416810549759, + "grad_norm": 1.0593355894088745, + "learning_rate": 7.871171189698557e-06, + "loss": 0.3154, + "step": 16292 + }, + { + "epoch": 0.32616169957210417, + "grad_norm": 1.1908408403396606, + "learning_rate": 7.870905777939131e-06, + "loss": 0.3755, + "step": 16293 + }, + { + "epoch": 0.3261817180892325, + "grad_norm": 1.150409460067749, + "learning_rate": 7.870640354111033e-06, + "loss": 0.3258, + "step": 16294 + }, + { + "epoch": 0.32620173660636087, + "grad_norm": 1.188036322593689, + "learning_rate": 7.870374918215381e-06, + "loss": 0.3408, + "step": 16295 + }, + { + "epoch": 0.3262217551234892, + "grad_norm": 1.0831853151321411, + "learning_rate": 7.870109470253291e-06, + "loss": 0.3306, + "step": 16296 + }, + { + "epoch": 0.32624177364061757, + "grad_norm": 1.0778999328613281, + "learning_rate": 7.869844010225877e-06, + "loss": 0.3037, + "step": 16297 + }, + { + "epoch": 0.3262617921577459, + "grad_norm": 1.0991055965423584, + "learning_rate": 7.86957853813426e-06, + "loss": 0.3627, + "step": 16298 + }, + { + "epoch": 0.3262818106748743, + "grad_norm": 1.149235725402832, + "learning_rate": 7.86931305397955e-06, + "loss": 0.3905, + "step": 16299 + }, + { + "epoch": 0.3263018291920026, + "grad_norm": 1.0753101110458374, + "learning_rate": 7.869047557762865e-06, + "loss": 0.2771, + "step": 16300 + }, + { + "epoch": 0.3263218477091309, + "grad_norm": 1.0471876859664917, + "learning_rate": 7.868782049485324e-06, + "loss": 0.3041, + "step": 16301 + }, + { + "epoch": 0.32634186622625927, + "grad_norm": 1.0701944828033447, + "learning_rate": 7.868516529148038e-06, + "loss": 0.3331, + "step": 16302 + }, + { + "epoch": 0.3263618847433876, + "grad_norm": 1.0504441261291504, + "learning_rate": 7.868250996752127e-06, + "loss": 0.3193, + "step": 16303 + }, + { + "epoch": 0.32638190326051597, + "grad_norm": 0.98722904920578, + "learning_rate": 7.867985452298705e-06, + "loss": 0.2817, + "step": 16304 + }, + { + "epoch": 0.3264019217776443, + "grad_norm": 1.1118245124816895, + "learning_rate": 7.86771989578889e-06, + "loss": 0.3025, + "step": 16305 + }, + { + "epoch": 0.3264219402947727, + "grad_norm": 1.0377600193023682, + "learning_rate": 7.867454327223798e-06, + "loss": 0.3065, + "step": 16306 + }, + { + "epoch": 0.326441958811901, + "grad_norm": 1.8527872562408447, + "learning_rate": 7.867188746604544e-06, + "loss": 0.8647, + "step": 16307 + }, + { + "epoch": 0.3264619773290294, + "grad_norm": 2.572662830352783, + "learning_rate": 7.866923153932247e-06, + "loss": 0.8443, + "step": 16308 + }, + { + "epoch": 0.32648199584615767, + "grad_norm": 1.1663264036178589, + "learning_rate": 7.86665754920802e-06, + "loss": 0.3499, + "step": 16309 + }, + { + "epoch": 0.326502014363286, + "grad_norm": 1.1631314754486084, + "learning_rate": 7.866391932432984e-06, + "loss": 0.3176, + "step": 16310 + }, + { + "epoch": 0.32652203288041437, + "grad_norm": 1.1471668481826782, + "learning_rate": 7.866126303608252e-06, + "loss": 0.3598, + "step": 16311 + }, + { + "epoch": 0.3265420513975427, + "grad_norm": 2.005331516265869, + "learning_rate": 7.865860662734942e-06, + "loss": 0.8151, + "step": 16312 + }, + { + "epoch": 0.32656206991467107, + "grad_norm": 1.0672098398208618, + "learning_rate": 7.86559500981417e-06, + "loss": 0.323, + "step": 16313 + }, + { + "epoch": 0.3265820884317994, + "grad_norm": 1.156332015991211, + "learning_rate": 7.865329344847055e-06, + "loss": 0.3467, + "step": 16314 + }, + { + "epoch": 0.3266021069489278, + "grad_norm": 1.1757773160934448, + "learning_rate": 7.865063667834709e-06, + "loss": 0.3211, + "step": 16315 + }, + { + "epoch": 0.3266221254660561, + "grad_norm": 1.1481614112854004, + "learning_rate": 7.864797978778254e-06, + "loss": 0.3203, + "step": 16316 + }, + { + "epoch": 0.3266421439831844, + "grad_norm": 1.0977085828781128, + "learning_rate": 7.864532277678805e-06, + "loss": 0.3111, + "step": 16317 + }, + { + "epoch": 0.32666216250031277, + "grad_norm": 1.0252481698989868, + "learning_rate": 7.864266564537478e-06, + "loss": 0.3301, + "step": 16318 + }, + { + "epoch": 0.3266821810174411, + "grad_norm": 1.9736329317092896, + "learning_rate": 7.864000839355391e-06, + "loss": 0.8092, + "step": 16319 + }, + { + "epoch": 0.32670219953456947, + "grad_norm": 1.9799546003341675, + "learning_rate": 7.86373510213366e-06, + "loss": 0.7941, + "step": 16320 + }, + { + "epoch": 0.3267222180516978, + "grad_norm": 2.0272791385650635, + "learning_rate": 7.863469352873404e-06, + "loss": 0.8029, + "step": 16321 + }, + { + "epoch": 0.32674223656882617, + "grad_norm": 1.1441833972930908, + "learning_rate": 7.863203591575738e-06, + "loss": 0.3212, + "step": 16322 + }, + { + "epoch": 0.3267622550859545, + "grad_norm": 0.9536663293838501, + "learning_rate": 7.862937818241782e-06, + "loss": 0.2971, + "step": 16323 + }, + { + "epoch": 0.3267822736030829, + "grad_norm": 1.7596007585525513, + "learning_rate": 7.862672032872651e-06, + "loss": 0.9147, + "step": 16324 + }, + { + "epoch": 0.32680229212021117, + "grad_norm": 1.0854623317718506, + "learning_rate": 7.862406235469461e-06, + "loss": 0.3354, + "step": 16325 + }, + { + "epoch": 0.3268223106373395, + "grad_norm": 1.215273380279541, + "learning_rate": 7.862140426033334e-06, + "loss": 0.2977, + "step": 16326 + }, + { + "epoch": 0.32684232915446787, + "grad_norm": 1.4360921382904053, + "learning_rate": 7.861874604565382e-06, + "loss": 0.3271, + "step": 16327 + }, + { + "epoch": 0.3268623476715962, + "grad_norm": 1.9924012422561646, + "learning_rate": 7.861608771066726e-06, + "loss": 0.7524, + "step": 16328 + }, + { + "epoch": 0.32688236618872457, + "grad_norm": 1.0577143430709839, + "learning_rate": 7.861342925538484e-06, + "loss": 0.3012, + "step": 16329 + }, + { + "epoch": 0.3269023847058529, + "grad_norm": 1.023497223854065, + "learning_rate": 7.861077067981772e-06, + "loss": 0.3383, + "step": 16330 + }, + { + "epoch": 0.3269224032229813, + "grad_norm": 1.1564079523086548, + "learning_rate": 7.860811198397706e-06, + "loss": 0.3163, + "step": 16331 + }, + { + "epoch": 0.3269424217401096, + "grad_norm": 1.2446447610855103, + "learning_rate": 7.860545316787405e-06, + "loss": 0.3235, + "step": 16332 + }, + { + "epoch": 0.3269624402572379, + "grad_norm": 1.0773652791976929, + "learning_rate": 7.860279423151988e-06, + "loss": 0.3271, + "step": 16333 + }, + { + "epoch": 0.32698245877436627, + "grad_norm": 2.0301601886749268, + "learning_rate": 7.860013517492573e-06, + "loss": 0.7907, + "step": 16334 + }, + { + "epoch": 0.3270024772914946, + "grad_norm": 1.1247434616088867, + "learning_rate": 7.859747599810275e-06, + "loss": 0.3041, + "step": 16335 + }, + { + "epoch": 0.32702249580862297, + "grad_norm": 1.9958007335662842, + "learning_rate": 7.859481670106214e-06, + "loss": 0.7941, + "step": 16336 + }, + { + "epoch": 0.3270425143257513, + "grad_norm": 1.1376993656158447, + "learning_rate": 7.859215728381507e-06, + "loss": 0.3262, + "step": 16337 + }, + { + "epoch": 0.32706253284287967, + "grad_norm": 1.0964349508285522, + "learning_rate": 7.858949774637273e-06, + "loss": 0.3514, + "step": 16338 + }, + { + "epoch": 0.327082551360008, + "grad_norm": 1.160332202911377, + "learning_rate": 7.85868380887463e-06, + "loss": 0.3036, + "step": 16339 + }, + { + "epoch": 0.3271025698771364, + "grad_norm": 1.0033434629440308, + "learning_rate": 7.858417831094695e-06, + "loss": 0.2963, + "step": 16340 + }, + { + "epoch": 0.32712258839426467, + "grad_norm": 1.1267962455749512, + "learning_rate": 7.858151841298586e-06, + "loss": 0.2601, + "step": 16341 + }, + { + "epoch": 0.327142606911393, + "grad_norm": 1.1233888864517212, + "learning_rate": 7.857885839487422e-06, + "loss": 0.3168, + "step": 16342 + }, + { + "epoch": 0.32716262542852137, + "grad_norm": 1.1175529956817627, + "learning_rate": 7.857619825662322e-06, + "loss": 0.3554, + "step": 16343 + }, + { + "epoch": 0.3271826439456497, + "grad_norm": 1.1193151473999023, + "learning_rate": 7.857353799824401e-06, + "loss": 0.3518, + "step": 16344 + }, + { + "epoch": 0.32720266246277807, + "grad_norm": 1.0969282388687134, + "learning_rate": 7.85708776197478e-06, + "loss": 0.2877, + "step": 16345 + }, + { + "epoch": 0.3272226809799064, + "grad_norm": 1.1293641328811646, + "learning_rate": 7.856821712114578e-06, + "loss": 0.3573, + "step": 16346 + }, + { + "epoch": 0.3272426994970348, + "grad_norm": 1.0704504251480103, + "learning_rate": 7.856555650244912e-06, + "loss": 0.3161, + "step": 16347 + }, + { + "epoch": 0.3272627180141631, + "grad_norm": 1.1462568044662476, + "learning_rate": 7.8562895763669e-06, + "loss": 0.3582, + "step": 16348 + }, + { + "epoch": 0.3272827365312914, + "grad_norm": 1.1986021995544434, + "learning_rate": 7.856023490481664e-06, + "loss": 0.3332, + "step": 16349 + }, + { + "epoch": 0.32730275504841977, + "grad_norm": 1.429365873336792, + "learning_rate": 7.855757392590317e-06, + "loss": 0.3142, + "step": 16350 + }, + { + "epoch": 0.3273227735655481, + "grad_norm": 1.1713186502456665, + "learning_rate": 7.855491282693982e-06, + "loss": 0.3444, + "step": 16351 + }, + { + "epoch": 0.32734279208267647, + "grad_norm": 2.113351583480835, + "learning_rate": 7.855225160793776e-06, + "loss": 0.8196, + "step": 16352 + }, + { + "epoch": 0.3273628105998048, + "grad_norm": 1.213074803352356, + "learning_rate": 7.854959026890816e-06, + "loss": 0.267, + "step": 16353 + }, + { + "epoch": 0.32738282911693317, + "grad_norm": 1.2175997495651245, + "learning_rate": 7.854692880986225e-06, + "loss": 0.3189, + "step": 16354 + }, + { + "epoch": 0.3274028476340615, + "grad_norm": 1.046679973602295, + "learning_rate": 7.854426723081119e-06, + "loss": 0.3218, + "step": 16355 + }, + { + "epoch": 0.3274228661511899, + "grad_norm": 1.083573818206787, + "learning_rate": 7.854160553176617e-06, + "loss": 0.3422, + "step": 16356 + }, + { + "epoch": 0.32744288466831817, + "grad_norm": 1.0505937337875366, + "learning_rate": 7.853894371273838e-06, + "loss": 0.29, + "step": 16357 + }, + { + "epoch": 0.3274629031854465, + "grad_norm": 1.2565271854400635, + "learning_rate": 7.8536281773739e-06, + "loss": 0.3044, + "step": 16358 + }, + { + "epoch": 0.32748292170257487, + "grad_norm": 1.1408751010894775, + "learning_rate": 7.853361971477925e-06, + "loss": 0.3409, + "step": 16359 + }, + { + "epoch": 0.3275029402197032, + "grad_norm": 1.2277518510818481, + "learning_rate": 7.85309575358703e-06, + "loss": 0.3181, + "step": 16360 + }, + { + "epoch": 0.32752295873683157, + "grad_norm": 1.120378851890564, + "learning_rate": 7.852829523702336e-06, + "loss": 0.2701, + "step": 16361 + }, + { + "epoch": 0.3275429772539599, + "grad_norm": 1.0393803119659424, + "learning_rate": 7.852563281824959e-06, + "loss": 0.3117, + "step": 16362 + }, + { + "epoch": 0.3275629957710883, + "grad_norm": 1.094037652015686, + "learning_rate": 7.85229702795602e-06, + "loss": 0.3175, + "step": 16363 + }, + { + "epoch": 0.3275830142882166, + "grad_norm": 1.1682380437850952, + "learning_rate": 7.852030762096636e-06, + "loss": 0.342, + "step": 16364 + }, + { + "epoch": 0.3276030328053449, + "grad_norm": 1.0300439596176147, + "learning_rate": 7.851764484247931e-06, + "loss": 0.3468, + "step": 16365 + }, + { + "epoch": 0.32762305132247327, + "grad_norm": 1.1466619968414307, + "learning_rate": 7.851498194411021e-06, + "loss": 0.3577, + "step": 16366 + }, + { + "epoch": 0.3276430698396016, + "grad_norm": 1.212632417678833, + "learning_rate": 7.851231892587026e-06, + "loss": 0.3281, + "step": 16367 + }, + { + "epoch": 0.32766308835672997, + "grad_norm": 1.1534007787704468, + "learning_rate": 7.850965578777066e-06, + "loss": 0.311, + "step": 16368 + }, + { + "epoch": 0.3276831068738583, + "grad_norm": 1.1145429611206055, + "learning_rate": 7.850699252982261e-06, + "loss": 0.3143, + "step": 16369 + }, + { + "epoch": 0.32770312539098667, + "grad_norm": 1.8858277797698975, + "learning_rate": 7.850432915203729e-06, + "loss": 0.8286, + "step": 16370 + }, + { + "epoch": 0.327723143908115, + "grad_norm": 1.085796594619751, + "learning_rate": 7.850166565442592e-06, + "loss": 0.2885, + "step": 16371 + }, + { + "epoch": 0.3277431624252434, + "grad_norm": 1.1174300909042358, + "learning_rate": 7.849900203699968e-06, + "loss": 0.3151, + "step": 16372 + }, + { + "epoch": 0.32776318094237167, + "grad_norm": 1.1425048112869263, + "learning_rate": 7.849633829976975e-06, + "loss": 0.2809, + "step": 16373 + }, + { + "epoch": 0.3277831994595, + "grad_norm": 1.2749381065368652, + "learning_rate": 7.849367444274735e-06, + "loss": 0.3928, + "step": 16374 + }, + { + "epoch": 0.32780321797662837, + "grad_norm": 1.1057661771774292, + "learning_rate": 7.849101046594368e-06, + "loss": 0.2975, + "step": 16375 + }, + { + "epoch": 0.3278232364937567, + "grad_norm": 1.1989896297454834, + "learning_rate": 7.848834636936992e-06, + "loss": 0.3195, + "step": 16376 + }, + { + "epoch": 0.32784325501088507, + "grad_norm": 1.2290645837783813, + "learning_rate": 7.84856821530373e-06, + "loss": 0.2684, + "step": 16377 + }, + { + "epoch": 0.3278632735280134, + "grad_norm": 1.2177757024765015, + "learning_rate": 7.848301781695699e-06, + "loss": 0.3661, + "step": 16378 + }, + { + "epoch": 0.32788329204514177, + "grad_norm": 0.9885496497154236, + "learning_rate": 7.848035336114021e-06, + "loss": 0.3005, + "step": 16379 + }, + { + "epoch": 0.3279033105622701, + "grad_norm": 0.9636948704719543, + "learning_rate": 7.847768878559816e-06, + "loss": 0.295, + "step": 16380 + }, + { + "epoch": 0.3279233290793984, + "grad_norm": 1.1196436882019043, + "learning_rate": 7.847502409034202e-06, + "loss": 0.3427, + "step": 16381 + }, + { + "epoch": 0.32794334759652677, + "grad_norm": 1.1254048347473145, + "learning_rate": 7.847235927538302e-06, + "loss": 0.3008, + "step": 16382 + }, + { + "epoch": 0.3279633661136551, + "grad_norm": 2.156987190246582, + "learning_rate": 7.846969434073233e-06, + "loss": 0.9423, + "step": 16383 + }, + { + "epoch": 0.32798338463078347, + "grad_norm": 1.019765019416809, + "learning_rate": 7.846702928640119e-06, + "loss": 0.3112, + "step": 16384 + }, + { + "epoch": 0.3280034031479118, + "grad_norm": 1.068599820137024, + "learning_rate": 7.846436411240078e-06, + "loss": 0.2962, + "step": 16385 + }, + { + "epoch": 0.32802342166504017, + "grad_norm": 1.1058588027954102, + "learning_rate": 7.846169881874231e-06, + "loss": 0.2984, + "step": 16386 + }, + { + "epoch": 0.3280434401821685, + "grad_norm": 1.1890645027160645, + "learning_rate": 7.845903340543698e-06, + "loss": 0.3275, + "step": 16387 + }, + { + "epoch": 0.3280634586992969, + "grad_norm": 1.1072537899017334, + "learning_rate": 7.845636787249599e-06, + "loss": 0.295, + "step": 16388 + }, + { + "epoch": 0.32808347721642517, + "grad_norm": 1.0351191759109497, + "learning_rate": 7.845370221993055e-06, + "loss": 0.3386, + "step": 16389 + }, + { + "epoch": 0.3281034957335535, + "grad_norm": 1.104093074798584, + "learning_rate": 7.84510364477519e-06, + "loss": 0.3314, + "step": 16390 + }, + { + "epoch": 0.32812351425068187, + "grad_norm": 1.1883783340454102, + "learning_rate": 7.84483705559712e-06, + "loss": 0.3059, + "step": 16391 + }, + { + "epoch": 0.3281435327678102, + "grad_norm": 1.9145110845565796, + "learning_rate": 7.844570454459967e-06, + "loss": 0.8428, + "step": 16392 + }, + { + "epoch": 0.32816355128493857, + "grad_norm": 1.01971435546875, + "learning_rate": 7.84430384136485e-06, + "loss": 0.299, + "step": 16393 + }, + { + "epoch": 0.3281835698020669, + "grad_norm": 2.0499069690704346, + "learning_rate": 7.844037216312897e-06, + "loss": 0.7918, + "step": 16394 + }, + { + "epoch": 0.32820358831919527, + "grad_norm": 1.0479249954223633, + "learning_rate": 7.843770579305218e-06, + "loss": 0.328, + "step": 16395 + }, + { + "epoch": 0.3282236068363236, + "grad_norm": 1.1063182353973389, + "learning_rate": 7.843503930342942e-06, + "loss": 0.2768, + "step": 16396 + }, + { + "epoch": 0.3282436253534519, + "grad_norm": 1.9741350412368774, + "learning_rate": 7.843237269427187e-06, + "loss": 0.8407, + "step": 16397 + }, + { + "epoch": 0.32826364387058027, + "grad_norm": 1.1542123556137085, + "learning_rate": 7.842970596559075e-06, + "loss": 0.2991, + "step": 16398 + }, + { + "epoch": 0.3282836623877086, + "grad_norm": 1.2333050966262817, + "learning_rate": 7.842703911739726e-06, + "loss": 0.3391, + "step": 16399 + }, + { + "epoch": 0.32830368090483697, + "grad_norm": 1.1211944818496704, + "learning_rate": 7.842437214970263e-06, + "loss": 0.3338, + "step": 16400 + }, + { + "epoch": 0.3283236994219653, + "grad_norm": 1.212584137916565, + "learning_rate": 7.842170506251803e-06, + "loss": 0.3177, + "step": 16401 + }, + { + "epoch": 0.32834371793909367, + "grad_norm": 1.3848462104797363, + "learning_rate": 7.841903785585469e-06, + "loss": 0.3194, + "step": 16402 + }, + { + "epoch": 0.328363736456222, + "grad_norm": 1.0821213722229004, + "learning_rate": 7.841637052972386e-06, + "loss": 0.3424, + "step": 16403 + }, + { + "epoch": 0.3283837549733504, + "grad_norm": 1.8164284229278564, + "learning_rate": 7.841370308413672e-06, + "loss": 0.8105, + "step": 16404 + }, + { + "epoch": 0.32840377349047867, + "grad_norm": 1.1211931705474854, + "learning_rate": 7.841103551910448e-06, + "loss": 0.3567, + "step": 16405 + }, + { + "epoch": 0.328423792007607, + "grad_norm": 1.032504677772522, + "learning_rate": 7.840836783463836e-06, + "loss": 0.3201, + "step": 16406 + }, + { + "epoch": 0.32844381052473537, + "grad_norm": 1.44198739528656, + "learning_rate": 7.840570003074959e-06, + "loss": 0.3337, + "step": 16407 + }, + { + "epoch": 0.3284638290418637, + "grad_norm": 1.1188902854919434, + "learning_rate": 7.840303210744935e-06, + "loss": 0.2966, + "step": 16408 + }, + { + "epoch": 0.32848384755899207, + "grad_norm": 1.1433565616607666, + "learning_rate": 7.840036406474889e-06, + "loss": 0.3281, + "step": 16409 + }, + { + "epoch": 0.3285038660761204, + "grad_norm": 1.0008810758590698, + "learning_rate": 7.839769590265939e-06, + "loss": 0.3164, + "step": 16410 + }, + { + "epoch": 0.32852388459324877, + "grad_norm": 1.9126626253128052, + "learning_rate": 7.839502762119211e-06, + "loss": 0.7665, + "step": 16411 + }, + { + "epoch": 0.3285439031103771, + "grad_norm": 1.1052604913711548, + "learning_rate": 7.839235922035823e-06, + "loss": 0.31, + "step": 16412 + }, + { + "epoch": 0.3285639216275054, + "grad_norm": 1.1020393371582031, + "learning_rate": 7.838969070016898e-06, + "loss": 0.2855, + "step": 16413 + }, + { + "epoch": 0.32858394014463377, + "grad_norm": 1.0799643993377686, + "learning_rate": 7.83870220606356e-06, + "loss": 0.2946, + "step": 16414 + }, + { + "epoch": 0.3286039586617621, + "grad_norm": 1.1387124061584473, + "learning_rate": 7.838435330176926e-06, + "loss": 0.3292, + "step": 16415 + }, + { + "epoch": 0.32862397717889047, + "grad_norm": 0.9441320300102234, + "learning_rate": 7.838168442358123e-06, + "loss": 0.2979, + "step": 16416 + }, + { + "epoch": 0.3286439956960188, + "grad_norm": 1.8185157775878906, + "learning_rate": 7.83790154260827e-06, + "loss": 0.8303, + "step": 16417 + }, + { + "epoch": 0.32866401421314717, + "grad_norm": 1.1248047351837158, + "learning_rate": 7.83763463092849e-06, + "loss": 0.3473, + "step": 16418 + }, + { + "epoch": 0.3286840327302755, + "grad_norm": 1.085930585861206, + "learning_rate": 7.837367707319903e-06, + "loss": 0.2996, + "step": 16419 + }, + { + "epoch": 0.3287040512474039, + "grad_norm": 1.1142008304595947, + "learning_rate": 7.837100771783634e-06, + "loss": 0.3216, + "step": 16420 + }, + { + "epoch": 0.32872406976453217, + "grad_norm": 1.1386045217514038, + "learning_rate": 7.836833824320804e-06, + "loss": 0.2981, + "step": 16421 + }, + { + "epoch": 0.3287440882816605, + "grad_norm": 1.1325438022613525, + "learning_rate": 7.836566864932535e-06, + "loss": 0.2928, + "step": 16422 + }, + { + "epoch": 0.32876410679878887, + "grad_norm": 1.0473915338516235, + "learning_rate": 7.83629989361995e-06, + "loss": 0.3389, + "step": 16423 + }, + { + "epoch": 0.3287841253159172, + "grad_norm": 1.9659764766693115, + "learning_rate": 7.836032910384171e-06, + "loss": 0.7975, + "step": 16424 + }, + { + "epoch": 0.32880414383304557, + "grad_norm": 1.0270472764968872, + "learning_rate": 7.83576591522632e-06, + "loss": 0.2904, + "step": 16425 + }, + { + "epoch": 0.3288241623501739, + "grad_norm": 1.148248314857483, + "learning_rate": 7.835498908147517e-06, + "loss": 0.3448, + "step": 16426 + }, + { + "epoch": 0.32884418086730227, + "grad_norm": 1.0835411548614502, + "learning_rate": 7.835231889148888e-06, + "loss": 0.2998, + "step": 16427 + }, + { + "epoch": 0.3288641993844306, + "grad_norm": 1.1433141231536865, + "learning_rate": 7.834964858231553e-06, + "loss": 0.3054, + "step": 16428 + }, + { + "epoch": 0.3288842179015589, + "grad_norm": 1.1304181814193726, + "learning_rate": 7.834697815396638e-06, + "loss": 0.3606, + "step": 16429 + }, + { + "epoch": 0.32890423641868727, + "grad_norm": 1.2361547946929932, + "learning_rate": 7.834430760645263e-06, + "loss": 0.2949, + "step": 16430 + }, + { + "epoch": 0.3289242549358156, + "grad_norm": 1.102076768875122, + "learning_rate": 7.83416369397855e-06, + "loss": 0.3436, + "step": 16431 + }, + { + "epoch": 0.32894427345294397, + "grad_norm": 1.1656793355941772, + "learning_rate": 7.833896615397622e-06, + "loss": 0.2673, + "step": 16432 + }, + { + "epoch": 0.3289642919700723, + "grad_norm": 1.3697149753570557, + "learning_rate": 7.833629524903605e-06, + "loss": 0.3124, + "step": 16433 + }, + { + "epoch": 0.32898431048720067, + "grad_norm": 1.1245753765106201, + "learning_rate": 7.83336242249762e-06, + "loss": 0.3297, + "step": 16434 + }, + { + "epoch": 0.329004329004329, + "grad_norm": 1.684891700744629, + "learning_rate": 7.833095308180786e-06, + "loss": 0.7486, + "step": 16435 + }, + { + "epoch": 0.32902434752145737, + "grad_norm": 1.091839075088501, + "learning_rate": 7.832828181954231e-06, + "loss": 0.3259, + "step": 16436 + }, + { + "epoch": 0.32904436603858567, + "grad_norm": 1.054312825202942, + "learning_rate": 7.832561043819075e-06, + "loss": 0.3199, + "step": 16437 + }, + { + "epoch": 0.329064384555714, + "grad_norm": 1.9366432428359985, + "learning_rate": 7.832293893776442e-06, + "loss": 0.8165, + "step": 16438 + }, + { + "epoch": 0.32908440307284237, + "grad_norm": 1.0378259420394897, + "learning_rate": 7.832026731827456e-06, + "loss": 0.2881, + "step": 16439 + }, + { + "epoch": 0.3291044215899707, + "grad_norm": 1.0778529644012451, + "learning_rate": 7.831759557973238e-06, + "loss": 0.3399, + "step": 16440 + }, + { + "epoch": 0.32912444010709907, + "grad_norm": 1.0284779071807861, + "learning_rate": 7.831492372214911e-06, + "loss": 0.3143, + "step": 16441 + }, + { + "epoch": 0.3291444586242274, + "grad_norm": 1.0581693649291992, + "learning_rate": 7.831225174553603e-06, + "loss": 0.3135, + "step": 16442 + }, + { + "epoch": 0.32916447714135577, + "grad_norm": 1.0919066667556763, + "learning_rate": 7.830957964990431e-06, + "loss": 0.3307, + "step": 16443 + }, + { + "epoch": 0.3291844956584841, + "grad_norm": 1.121649146080017, + "learning_rate": 7.830690743526521e-06, + "loss": 0.2771, + "step": 16444 + }, + { + "epoch": 0.3292045141756124, + "grad_norm": 1.0142639875411987, + "learning_rate": 7.830423510162996e-06, + "loss": 0.3258, + "step": 16445 + }, + { + "epoch": 0.32922453269274077, + "grad_norm": 0.9823919534683228, + "learning_rate": 7.83015626490098e-06, + "loss": 0.3204, + "step": 16446 + }, + { + "epoch": 0.3292445512098691, + "grad_norm": 1.0677971839904785, + "learning_rate": 7.829889007741597e-06, + "loss": 0.2909, + "step": 16447 + }, + { + "epoch": 0.32926456972699747, + "grad_norm": 1.045840859413147, + "learning_rate": 7.82962173868597e-06, + "loss": 0.2909, + "step": 16448 + }, + { + "epoch": 0.3292845882441258, + "grad_norm": 1.0821588039398193, + "learning_rate": 7.829354457735221e-06, + "loss": 0.348, + "step": 16449 + }, + { + "epoch": 0.32930460676125417, + "grad_norm": 1.0952595472335815, + "learning_rate": 7.829087164890476e-06, + "loss": 0.3425, + "step": 16450 + }, + { + "epoch": 0.3293246252783825, + "grad_norm": 1.049166202545166, + "learning_rate": 7.828819860152856e-06, + "loss": 0.2911, + "step": 16451 + }, + { + "epoch": 0.32934464379551087, + "grad_norm": 1.2022883892059326, + "learning_rate": 7.828552543523487e-06, + "loss": 0.2842, + "step": 16452 + }, + { + "epoch": 0.32936466231263917, + "grad_norm": 2.039991617202759, + "learning_rate": 7.828285215003491e-06, + "loss": 0.7754, + "step": 16453 + }, + { + "epoch": 0.3293846808297675, + "grad_norm": 1.0711610317230225, + "learning_rate": 7.828017874593993e-06, + "loss": 0.3292, + "step": 16454 + }, + { + "epoch": 0.32940469934689587, + "grad_norm": 1.0844136476516724, + "learning_rate": 7.827750522296117e-06, + "loss": 0.3245, + "step": 16455 + }, + { + "epoch": 0.3294247178640242, + "grad_norm": 0.9429582357406616, + "learning_rate": 7.827483158110986e-06, + "loss": 0.3067, + "step": 16456 + }, + { + "epoch": 0.32944473638115257, + "grad_norm": 1.2850390672683716, + "learning_rate": 7.827215782039723e-06, + "loss": 0.3521, + "step": 16457 + }, + { + "epoch": 0.3294647548982809, + "grad_norm": 1.2921208143234253, + "learning_rate": 7.826948394083455e-06, + "loss": 0.2543, + "step": 16458 + }, + { + "epoch": 0.32948477341540927, + "grad_norm": 1.1990190744400024, + "learning_rate": 7.826680994243304e-06, + "loss": 0.2747, + "step": 16459 + }, + { + "epoch": 0.3295047919325376, + "grad_norm": 1.112828254699707, + "learning_rate": 7.826413582520393e-06, + "loss": 0.3168, + "step": 16460 + }, + { + "epoch": 0.3295248104496659, + "grad_norm": 1.1630008220672607, + "learning_rate": 7.82614615891585e-06, + "loss": 0.3253, + "step": 16461 + }, + { + "epoch": 0.32954482896679427, + "grad_norm": 1.0837106704711914, + "learning_rate": 7.825878723430795e-06, + "loss": 0.3266, + "step": 16462 + }, + { + "epoch": 0.3295648474839226, + "grad_norm": 1.1297495365142822, + "learning_rate": 7.825611276066354e-06, + "loss": 0.3398, + "step": 16463 + }, + { + "epoch": 0.32958486600105097, + "grad_norm": 1.0978890657424927, + "learning_rate": 7.825343816823651e-06, + "loss": 0.3594, + "step": 16464 + }, + { + "epoch": 0.3296048845181793, + "grad_norm": 1.2076865434646606, + "learning_rate": 7.825076345703812e-06, + "loss": 0.3466, + "step": 16465 + }, + { + "epoch": 0.32962490303530767, + "grad_norm": 1.1000685691833496, + "learning_rate": 7.82480886270796e-06, + "loss": 0.3019, + "step": 16466 + }, + { + "epoch": 0.329644921552436, + "grad_norm": 1.077755331993103, + "learning_rate": 7.824541367837218e-06, + "loss": 0.3287, + "step": 16467 + }, + { + "epoch": 0.32966494006956437, + "grad_norm": 1.179468035697937, + "learning_rate": 7.824273861092713e-06, + "loss": 0.327, + "step": 16468 + }, + { + "epoch": 0.32968495858669267, + "grad_norm": 1.1013708114624023, + "learning_rate": 7.824006342475568e-06, + "loss": 0.3215, + "step": 16469 + }, + { + "epoch": 0.329704977103821, + "grad_norm": 2.142942190170288, + "learning_rate": 7.823738811986906e-06, + "loss": 0.7854, + "step": 16470 + }, + { + "epoch": 0.32972499562094937, + "grad_norm": 1.157102346420288, + "learning_rate": 7.823471269627858e-06, + "loss": 0.3363, + "step": 16471 + }, + { + "epoch": 0.3297450141380777, + "grad_norm": 0.9927443861961365, + "learning_rate": 7.823203715399543e-06, + "loss": 0.2944, + "step": 16472 + }, + { + "epoch": 0.32976503265520607, + "grad_norm": 1.1536914110183716, + "learning_rate": 7.822936149303085e-06, + "loss": 0.3123, + "step": 16473 + }, + { + "epoch": 0.3297850511723344, + "grad_norm": 1.0232399702072144, + "learning_rate": 7.822668571339613e-06, + "loss": 0.2871, + "step": 16474 + }, + { + "epoch": 0.32980506968946277, + "grad_norm": 1.127366065979004, + "learning_rate": 7.82240098151025e-06, + "loss": 0.312, + "step": 16475 + }, + { + "epoch": 0.3298250882065911, + "grad_norm": 1.159932017326355, + "learning_rate": 7.822133379816119e-06, + "loss": 0.3373, + "step": 16476 + }, + { + "epoch": 0.3298451067237194, + "grad_norm": 1.169713020324707, + "learning_rate": 7.821865766258347e-06, + "loss": 0.3448, + "step": 16477 + }, + { + "epoch": 0.32986512524084777, + "grad_norm": 1.0725252628326416, + "learning_rate": 7.82159814083806e-06, + "loss": 0.3141, + "step": 16478 + }, + { + "epoch": 0.3298851437579761, + "grad_norm": 1.069534420967102, + "learning_rate": 7.82133050355638e-06, + "loss": 0.3444, + "step": 16479 + }, + { + "epoch": 0.32990516227510447, + "grad_norm": 1.8585407733917236, + "learning_rate": 7.821062854414434e-06, + "loss": 0.8049, + "step": 16480 + }, + { + "epoch": 0.3299251807922328, + "grad_norm": 1.0637160539627075, + "learning_rate": 7.820795193413347e-06, + "loss": 0.2812, + "step": 16481 + }, + { + "epoch": 0.32994519930936117, + "grad_norm": 1.0214558839797974, + "learning_rate": 7.820527520554245e-06, + "loss": 0.3248, + "step": 16482 + }, + { + "epoch": 0.3299652178264895, + "grad_norm": 1.0316555500030518, + "learning_rate": 7.820259835838251e-06, + "loss": 0.2937, + "step": 16483 + }, + { + "epoch": 0.32998523634361787, + "grad_norm": 1.109067440032959, + "learning_rate": 7.819992139266492e-06, + "loss": 0.3232, + "step": 16484 + }, + { + "epoch": 0.33000525486074617, + "grad_norm": 1.092687964439392, + "learning_rate": 7.819724430840093e-06, + "loss": 0.3345, + "step": 16485 + }, + { + "epoch": 0.3300252733778745, + "grad_norm": 1.0727367401123047, + "learning_rate": 7.819456710560181e-06, + "loss": 0.3237, + "step": 16486 + }, + { + "epoch": 0.33004529189500287, + "grad_norm": 1.2061009407043457, + "learning_rate": 7.819188978427878e-06, + "loss": 0.3568, + "step": 16487 + }, + { + "epoch": 0.3300653104121312, + "grad_norm": 1.0838100910186768, + "learning_rate": 7.81892123444431e-06, + "loss": 0.3033, + "step": 16488 + }, + { + "epoch": 0.33008532892925957, + "grad_norm": 1.082804560661316, + "learning_rate": 7.818653478610605e-06, + "loss": 0.3422, + "step": 16489 + }, + { + "epoch": 0.3301053474463879, + "grad_norm": 1.1003917455673218, + "learning_rate": 7.818385710927887e-06, + "loss": 0.3377, + "step": 16490 + }, + { + "epoch": 0.33012536596351627, + "grad_norm": 1.2022128105163574, + "learning_rate": 7.818117931397284e-06, + "loss": 0.3272, + "step": 16491 + }, + { + "epoch": 0.3301453844806446, + "grad_norm": 1.1117106676101685, + "learning_rate": 7.817850140019918e-06, + "loss": 0.3315, + "step": 16492 + }, + { + "epoch": 0.3301654029977729, + "grad_norm": 1.1433266401290894, + "learning_rate": 7.817582336796915e-06, + "loss": 0.3359, + "step": 16493 + }, + { + "epoch": 0.33018542151490127, + "grad_norm": 1.1316343545913696, + "learning_rate": 7.817314521729404e-06, + "loss": 0.2758, + "step": 16494 + }, + { + "epoch": 0.3302054400320296, + "grad_norm": 1.1374621391296387, + "learning_rate": 7.817046694818508e-06, + "loss": 0.3222, + "step": 16495 + }, + { + "epoch": 0.33022545854915797, + "grad_norm": 1.162029504776001, + "learning_rate": 7.816778856065355e-06, + "loss": 0.3439, + "step": 16496 + }, + { + "epoch": 0.3302454770662863, + "grad_norm": 1.9960746765136719, + "learning_rate": 7.816511005471071e-06, + "loss": 0.8243, + "step": 16497 + }, + { + "epoch": 0.33026549558341467, + "grad_norm": 1.1333080530166626, + "learning_rate": 7.816243143036779e-06, + "loss": 0.3221, + "step": 16498 + }, + { + "epoch": 0.330285514100543, + "grad_norm": 1.2012995481491089, + "learning_rate": 7.815975268763608e-06, + "loss": 0.3635, + "step": 16499 + }, + { + "epoch": 0.33030553261767137, + "grad_norm": 1.079920768737793, + "learning_rate": 7.815707382652682e-06, + "loss": 0.3548, + "step": 16500 + }, + { + "epoch": 0.33032555113479967, + "grad_norm": 1.0741182565689087, + "learning_rate": 7.815439484705127e-06, + "loss": 0.3425, + "step": 16501 + }, + { + "epoch": 0.330345569651928, + "grad_norm": 1.1152064800262451, + "learning_rate": 7.815171574922073e-06, + "loss": 0.343, + "step": 16502 + }, + { + "epoch": 0.33036558816905637, + "grad_norm": 1.8787895441055298, + "learning_rate": 7.814903653304642e-06, + "loss": 0.8143, + "step": 16503 + }, + { + "epoch": 0.3303856066861847, + "grad_norm": 1.0565474033355713, + "learning_rate": 7.814635719853963e-06, + "loss": 0.2644, + "step": 16504 + }, + { + "epoch": 0.33040562520331307, + "grad_norm": 1.1273113489151, + "learning_rate": 7.814367774571158e-06, + "loss": 0.3166, + "step": 16505 + }, + { + "epoch": 0.3304256437204414, + "grad_norm": 1.1886638402938843, + "learning_rate": 7.81409981745736e-06, + "loss": 0.3332, + "step": 16506 + }, + { + "epoch": 0.33044566223756977, + "grad_norm": 1.133471131324768, + "learning_rate": 7.81383184851369e-06, + "loss": 0.3546, + "step": 16507 + }, + { + "epoch": 0.3304656807546981, + "grad_norm": 1.098556637763977, + "learning_rate": 7.813563867741278e-06, + "loss": 0.3557, + "step": 16508 + }, + { + "epoch": 0.3304856992718264, + "grad_norm": 0.9926977753639221, + "learning_rate": 7.81329587514125e-06, + "loss": 0.2839, + "step": 16509 + }, + { + "epoch": 0.33050571778895477, + "grad_norm": 1.912940502166748, + "learning_rate": 7.813027870714728e-06, + "loss": 0.7676, + "step": 16510 + }, + { + "epoch": 0.3305257363060831, + "grad_norm": 1.2119765281677246, + "learning_rate": 7.812759854462843e-06, + "loss": 0.3584, + "step": 16511 + }, + { + "epoch": 0.33054575482321147, + "grad_norm": 1.1703094244003296, + "learning_rate": 7.812491826386723e-06, + "loss": 0.3416, + "step": 16512 + }, + { + "epoch": 0.3305657733403398, + "grad_norm": 1.0680737495422363, + "learning_rate": 7.812223786487492e-06, + "loss": 0.3311, + "step": 16513 + }, + { + "epoch": 0.33058579185746817, + "grad_norm": 1.0813241004943848, + "learning_rate": 7.811955734766275e-06, + "loss": 0.2865, + "step": 16514 + }, + { + "epoch": 0.3306058103745965, + "grad_norm": 1.171764612197876, + "learning_rate": 7.811687671224204e-06, + "loss": 0.3775, + "step": 16515 + }, + { + "epoch": 0.33062582889172487, + "grad_norm": 1.2670586109161377, + "learning_rate": 7.811419595862401e-06, + "loss": 0.2932, + "step": 16516 + }, + { + "epoch": 0.33064584740885317, + "grad_norm": 0.9665318727493286, + "learning_rate": 7.811151508681998e-06, + "loss": 0.2867, + "step": 16517 + }, + { + "epoch": 0.3306658659259815, + "grad_norm": 1.1808240413665771, + "learning_rate": 7.810883409684116e-06, + "loss": 0.3667, + "step": 16518 + }, + { + "epoch": 0.33068588444310987, + "grad_norm": 1.03617525100708, + "learning_rate": 7.810615298869886e-06, + "loss": 0.301, + "step": 16519 + }, + { + "epoch": 0.3307059029602382, + "grad_norm": 2.0629279613494873, + "learning_rate": 7.810347176240437e-06, + "loss": 0.7747, + "step": 16520 + }, + { + "epoch": 0.33072592147736657, + "grad_norm": 1.3032221794128418, + "learning_rate": 7.810079041796887e-06, + "loss": 0.3192, + "step": 16521 + }, + { + "epoch": 0.3307459399944949, + "grad_norm": 1.0063196420669556, + "learning_rate": 7.809810895540374e-06, + "loss": 0.2932, + "step": 16522 + }, + { + "epoch": 0.33076595851162327, + "grad_norm": 1.1351220607757568, + "learning_rate": 7.80954273747202e-06, + "loss": 0.3169, + "step": 16523 + }, + { + "epoch": 0.3307859770287516, + "grad_norm": 0.996799886226654, + "learning_rate": 7.809274567592953e-06, + "loss": 0.3335, + "step": 16524 + }, + { + "epoch": 0.3308059955458799, + "grad_norm": 1.3212026357650757, + "learning_rate": 7.8090063859043e-06, + "loss": 0.3169, + "step": 16525 + }, + { + "epoch": 0.33082601406300827, + "grad_norm": 1.1926196813583374, + "learning_rate": 7.808738192407189e-06, + "loss": 0.3222, + "step": 16526 + }, + { + "epoch": 0.3308460325801366, + "grad_norm": 1.129921555519104, + "learning_rate": 7.808469987102747e-06, + "loss": 0.3585, + "step": 16527 + }, + { + "epoch": 0.33086605109726497, + "grad_norm": 1.1807411909103394, + "learning_rate": 7.808201769992101e-06, + "loss": 0.3288, + "step": 16528 + }, + { + "epoch": 0.3308860696143933, + "grad_norm": 1.8248391151428223, + "learning_rate": 7.80793354107638e-06, + "loss": 0.8283, + "step": 16529 + }, + { + "epoch": 0.33090608813152167, + "grad_norm": 1.0035878419876099, + "learning_rate": 7.80766530035671e-06, + "loss": 0.3238, + "step": 16530 + }, + { + "epoch": 0.33092610664865, + "grad_norm": 1.2205196619033813, + "learning_rate": 7.80739704783422e-06, + "loss": 0.3435, + "step": 16531 + }, + { + "epoch": 0.33094612516577837, + "grad_norm": 1.0943971872329712, + "learning_rate": 7.807128783510035e-06, + "loss": 0.2917, + "step": 16532 + }, + { + "epoch": 0.33096614368290667, + "grad_norm": 1.106337308883667, + "learning_rate": 7.806860507385287e-06, + "loss": 0.3327, + "step": 16533 + }, + { + "epoch": 0.330986162200035, + "grad_norm": 1.1622192859649658, + "learning_rate": 7.806592219461099e-06, + "loss": 0.3636, + "step": 16534 + }, + { + "epoch": 0.33100618071716337, + "grad_norm": 1.1000515222549438, + "learning_rate": 7.806323919738603e-06, + "loss": 0.3263, + "step": 16535 + }, + { + "epoch": 0.3310261992342917, + "grad_norm": 1.1574903726577759, + "learning_rate": 7.806055608218925e-06, + "loss": 0.3158, + "step": 16536 + }, + { + "epoch": 0.33104621775142007, + "grad_norm": 1.147434949874878, + "learning_rate": 7.805787284903193e-06, + "loss": 0.3113, + "step": 16537 + }, + { + "epoch": 0.3310662362685484, + "grad_norm": 1.10971200466156, + "learning_rate": 7.805518949792536e-06, + "loss": 0.3738, + "step": 16538 + }, + { + "epoch": 0.33108625478567677, + "grad_norm": 1.1019009351730347, + "learning_rate": 7.80525060288808e-06, + "loss": 0.3409, + "step": 16539 + }, + { + "epoch": 0.3311062733028051, + "grad_norm": 0.9957515001296997, + "learning_rate": 7.804982244190955e-06, + "loss": 0.3048, + "step": 16540 + }, + { + "epoch": 0.3311262918199334, + "grad_norm": 1.8682596683502197, + "learning_rate": 7.804713873702288e-06, + "loss": 0.8441, + "step": 16541 + }, + { + "epoch": 0.33114631033706177, + "grad_norm": 1.1106235980987549, + "learning_rate": 7.804445491423207e-06, + "loss": 0.3041, + "step": 16542 + }, + { + "epoch": 0.3311663288541901, + "grad_norm": 1.144891381263733, + "learning_rate": 7.80417709735484e-06, + "loss": 0.3527, + "step": 16543 + }, + { + "epoch": 0.33118634737131847, + "grad_norm": 1.0359135866165161, + "learning_rate": 7.803908691498316e-06, + "loss": 0.2841, + "step": 16544 + }, + { + "epoch": 0.3312063658884468, + "grad_norm": 1.0736076831817627, + "learning_rate": 7.803640273854765e-06, + "loss": 0.3138, + "step": 16545 + }, + { + "epoch": 0.33122638440557517, + "grad_norm": 1.0822020769119263, + "learning_rate": 7.803371844425314e-06, + "loss": 0.3289, + "step": 16546 + }, + { + "epoch": 0.3312464029227035, + "grad_norm": 1.141324758529663, + "learning_rate": 7.803103403211088e-06, + "loss": 0.3359, + "step": 16547 + }, + { + "epoch": 0.33126642143983187, + "grad_norm": 1.155135154724121, + "learning_rate": 7.802834950213221e-06, + "loss": 0.3248, + "step": 16548 + }, + { + "epoch": 0.33128643995696017, + "grad_norm": 1.1098616123199463, + "learning_rate": 7.802566485432838e-06, + "loss": 0.3076, + "step": 16549 + }, + { + "epoch": 0.3313064584740885, + "grad_norm": 1.0426338911056519, + "learning_rate": 7.802298008871069e-06, + "loss": 0.3424, + "step": 16550 + }, + { + "epoch": 0.33132647699121687, + "grad_norm": 1.237351417541504, + "learning_rate": 7.802029520529043e-06, + "loss": 0.3386, + "step": 16551 + }, + { + "epoch": 0.3313464955083452, + "grad_norm": 1.1473733186721802, + "learning_rate": 7.801761020407885e-06, + "loss": 0.332, + "step": 16552 + }, + { + "epoch": 0.33136651402547357, + "grad_norm": 1.8797303438186646, + "learning_rate": 7.80149250850873e-06, + "loss": 0.8367, + "step": 16553 + }, + { + "epoch": 0.3313865325426019, + "grad_norm": 1.1018450260162354, + "learning_rate": 7.801223984832703e-06, + "loss": 0.3471, + "step": 16554 + }, + { + "epoch": 0.33140655105973027, + "grad_norm": 1.2235002517700195, + "learning_rate": 7.800955449380932e-06, + "loss": 0.3676, + "step": 16555 + }, + { + "epoch": 0.3314265695768586, + "grad_norm": 1.0670064687728882, + "learning_rate": 7.800686902154546e-06, + "loss": 0.3288, + "step": 16556 + }, + { + "epoch": 0.3314465880939869, + "grad_norm": 1.1148414611816406, + "learning_rate": 7.800418343154677e-06, + "loss": 0.333, + "step": 16557 + }, + { + "epoch": 0.33146660661111527, + "grad_norm": 1.0030488967895508, + "learning_rate": 7.80014977238245e-06, + "loss": 0.3205, + "step": 16558 + }, + { + "epoch": 0.3314866251282436, + "grad_norm": 1.2011733055114746, + "learning_rate": 7.799881189838999e-06, + "loss": 0.3255, + "step": 16559 + }, + { + "epoch": 0.33150664364537197, + "grad_norm": 1.091828465461731, + "learning_rate": 7.799612595525448e-06, + "loss": 0.3204, + "step": 16560 + }, + { + "epoch": 0.3315266621625003, + "grad_norm": 1.2693240642547607, + "learning_rate": 7.799343989442927e-06, + "loss": 0.3588, + "step": 16561 + }, + { + "epoch": 0.33154668067962867, + "grad_norm": 1.1277008056640625, + "learning_rate": 7.799075371592567e-06, + "loss": 0.2857, + "step": 16562 + }, + { + "epoch": 0.331566699196757, + "grad_norm": 1.8536230325698853, + "learning_rate": 7.798806741975497e-06, + "loss": 0.8021, + "step": 16563 + }, + { + "epoch": 0.33158671771388537, + "grad_norm": 1.1788718700408936, + "learning_rate": 7.798538100592847e-06, + "loss": 0.3057, + "step": 16564 + }, + { + "epoch": 0.33160673623101367, + "grad_norm": 1.0332597494125366, + "learning_rate": 7.798269447445744e-06, + "loss": 0.3566, + "step": 16565 + }, + { + "epoch": 0.331626754748142, + "grad_norm": 0.9993187189102173, + "learning_rate": 7.798000782535317e-06, + "loss": 0.3092, + "step": 16566 + }, + { + "epoch": 0.33164677326527037, + "grad_norm": 1.0009479522705078, + "learning_rate": 7.797732105862699e-06, + "loss": 0.3067, + "step": 16567 + }, + { + "epoch": 0.3316667917823987, + "grad_norm": 1.2154039144515991, + "learning_rate": 7.797463417429017e-06, + "loss": 0.3497, + "step": 16568 + }, + { + "epoch": 0.33168681029952707, + "grad_norm": 1.2068637609481812, + "learning_rate": 7.797194717235401e-06, + "loss": 0.3364, + "step": 16569 + }, + { + "epoch": 0.3317068288166554, + "grad_norm": 1.2400293350219727, + "learning_rate": 7.79692600528298e-06, + "loss": 0.317, + "step": 16570 + }, + { + "epoch": 0.33172684733378377, + "grad_norm": 1.954757809638977, + "learning_rate": 7.796657281572883e-06, + "loss": 0.7932, + "step": 16571 + }, + { + "epoch": 0.3317468658509121, + "grad_norm": 1.909424901008606, + "learning_rate": 7.796388546106242e-06, + "loss": 0.8536, + "step": 16572 + }, + { + "epoch": 0.3317668843680404, + "grad_norm": 1.1123709678649902, + "learning_rate": 7.796119798884184e-06, + "loss": 0.3393, + "step": 16573 + }, + { + "epoch": 0.33178690288516877, + "grad_norm": 1.0673506259918213, + "learning_rate": 7.79585103990784e-06, + "loss": 0.3651, + "step": 16574 + }, + { + "epoch": 0.3318069214022971, + "grad_norm": 1.8115334510803223, + "learning_rate": 7.79558226917834e-06, + "loss": 0.7914, + "step": 16575 + }, + { + "epoch": 0.33182693991942547, + "grad_norm": 1.269957184791565, + "learning_rate": 7.795313486696816e-06, + "loss": 0.3032, + "step": 16576 + }, + { + "epoch": 0.3318469584365538, + "grad_norm": 1.1297011375427246, + "learning_rate": 7.795044692464396e-06, + "loss": 0.3618, + "step": 16577 + }, + { + "epoch": 0.33186697695368217, + "grad_norm": 1.2623722553253174, + "learning_rate": 7.794775886482206e-06, + "loss": 0.3766, + "step": 16578 + }, + { + "epoch": 0.3318869954708105, + "grad_norm": 1.8428877592086792, + "learning_rate": 7.794507068751381e-06, + "loss": 0.771, + "step": 16579 + }, + { + "epoch": 0.33190701398793887, + "grad_norm": 1.084559679031372, + "learning_rate": 7.794238239273051e-06, + "loss": 0.3041, + "step": 16580 + }, + { + "epoch": 0.33192703250506717, + "grad_norm": 1.1755870580673218, + "learning_rate": 7.793969398048343e-06, + "loss": 0.2975, + "step": 16581 + }, + { + "epoch": 0.3319470510221955, + "grad_norm": 0.9826698899269104, + "learning_rate": 7.79370054507839e-06, + "loss": 0.3179, + "step": 16582 + }, + { + "epoch": 0.33196706953932387, + "grad_norm": 1.099118947982788, + "learning_rate": 7.793431680364321e-06, + "loss": 0.317, + "step": 16583 + }, + { + "epoch": 0.3319870880564522, + "grad_norm": 1.9278796911239624, + "learning_rate": 7.793162803907266e-06, + "loss": 0.8092, + "step": 16584 + }, + { + "epoch": 0.33200710657358057, + "grad_norm": 1.1758041381835938, + "learning_rate": 7.792893915708355e-06, + "loss": 0.3456, + "step": 16585 + }, + { + "epoch": 0.3320271250907089, + "grad_norm": 1.0095845460891724, + "learning_rate": 7.79262501576872e-06, + "loss": 0.3184, + "step": 16586 + }, + { + "epoch": 0.33204714360783727, + "grad_norm": 0.9584732055664062, + "learning_rate": 7.792356104089492e-06, + "loss": 0.3065, + "step": 16587 + }, + { + "epoch": 0.3320671621249656, + "grad_norm": 1.1562891006469727, + "learning_rate": 7.792087180671796e-06, + "loss": 0.3162, + "step": 16588 + }, + { + "epoch": 0.3320871806420939, + "grad_norm": 1.8017932176589966, + "learning_rate": 7.791818245516769e-06, + "loss": 0.8593, + "step": 16589 + }, + { + "epoch": 0.33210719915922227, + "grad_norm": 1.269769549369812, + "learning_rate": 7.791549298625539e-06, + "loss": 0.3728, + "step": 16590 + }, + { + "epoch": 0.3321272176763506, + "grad_norm": 1.1546728610992432, + "learning_rate": 7.791280339999236e-06, + "loss": 0.3509, + "step": 16591 + }, + { + "epoch": 0.33214723619347897, + "grad_norm": 1.0625871419906616, + "learning_rate": 7.79101136963899e-06, + "loss": 0.3456, + "step": 16592 + }, + { + "epoch": 0.3321672547106073, + "grad_norm": 1.2309606075286865, + "learning_rate": 7.790742387545935e-06, + "loss": 0.362, + "step": 16593 + }, + { + "epoch": 0.33218727322773567, + "grad_norm": 1.050318956375122, + "learning_rate": 7.790473393721199e-06, + "loss": 0.3304, + "step": 16594 + }, + { + "epoch": 0.332207291744864, + "grad_norm": 1.0231257677078247, + "learning_rate": 7.790204388165912e-06, + "loss": 0.3234, + "step": 16595 + }, + { + "epoch": 0.33222731026199237, + "grad_norm": 1.077652096748352, + "learning_rate": 7.789935370881208e-06, + "loss": 0.3577, + "step": 16596 + }, + { + "epoch": 0.33224732877912067, + "grad_norm": 1.8610931634902954, + "learning_rate": 7.789666341868215e-06, + "loss": 0.8551, + "step": 16597 + }, + { + "epoch": 0.332267347296249, + "grad_norm": 1.7884716987609863, + "learning_rate": 7.789397301128064e-06, + "loss": 0.8151, + "step": 16598 + }, + { + "epoch": 0.33228736581337737, + "grad_norm": 1.083695411682129, + "learning_rate": 7.789128248661888e-06, + "loss": 0.342, + "step": 16599 + }, + { + "epoch": 0.3323073843305057, + "grad_norm": 1.7780296802520752, + "learning_rate": 7.788859184470818e-06, + "loss": 0.8091, + "step": 16600 + }, + { + "epoch": 0.33232740284763407, + "grad_norm": 1.1202619075775146, + "learning_rate": 7.788590108555983e-06, + "loss": 0.3105, + "step": 16601 + }, + { + "epoch": 0.3323474213647624, + "grad_norm": 1.1415470838546753, + "learning_rate": 7.788321020918518e-06, + "loss": 0.3158, + "step": 16602 + }, + { + "epoch": 0.33236743988189077, + "grad_norm": 1.1365817785263062, + "learning_rate": 7.788051921559548e-06, + "loss": 0.3379, + "step": 16603 + }, + { + "epoch": 0.3323874583990191, + "grad_norm": 1.9414583444595337, + "learning_rate": 7.787782810480209e-06, + "loss": 0.8203, + "step": 16604 + }, + { + "epoch": 0.3324074769161474, + "grad_norm": 1.047690510749817, + "learning_rate": 7.78751368768163e-06, + "loss": 0.2777, + "step": 16605 + }, + { + "epoch": 0.33242749543327577, + "grad_norm": 1.0457773208618164, + "learning_rate": 7.787244553164945e-06, + "loss": 0.3073, + "step": 16606 + }, + { + "epoch": 0.3324475139504041, + "grad_norm": 1.102155089378357, + "learning_rate": 7.786975406931282e-06, + "loss": 0.3294, + "step": 16607 + }, + { + "epoch": 0.33246753246753247, + "grad_norm": 1.1490050554275513, + "learning_rate": 7.786706248981777e-06, + "loss": 0.2776, + "step": 16608 + }, + { + "epoch": 0.3324875509846608, + "grad_norm": 1.1283844709396362, + "learning_rate": 7.786437079317554e-06, + "loss": 0.3309, + "step": 16609 + }, + { + "epoch": 0.33250756950178917, + "grad_norm": 1.1010236740112305, + "learning_rate": 7.786167897939754e-06, + "loss": 0.3031, + "step": 16610 + }, + { + "epoch": 0.3325275880189175, + "grad_norm": 1.8309181928634644, + "learning_rate": 7.785898704849501e-06, + "loss": 0.8356, + "step": 16611 + }, + { + "epoch": 0.33254760653604587, + "grad_norm": 1.2344164848327637, + "learning_rate": 7.78562950004793e-06, + "loss": 0.3407, + "step": 16612 + }, + { + "epoch": 0.33256762505317417, + "grad_norm": 1.112562656402588, + "learning_rate": 7.785360283536172e-06, + "loss": 0.2768, + "step": 16613 + }, + { + "epoch": 0.3325876435703025, + "grad_norm": 1.7170637845993042, + "learning_rate": 7.78509105531536e-06, + "loss": 0.8065, + "step": 16614 + }, + { + "epoch": 0.33260766208743087, + "grad_norm": 0.939702570438385, + "learning_rate": 7.784821815386624e-06, + "loss": 0.2731, + "step": 16615 + }, + { + "epoch": 0.3326276806045592, + "grad_norm": 1.1187126636505127, + "learning_rate": 7.784552563751095e-06, + "loss": 0.3294, + "step": 16616 + }, + { + "epoch": 0.33264769912168757, + "grad_norm": 1.1080422401428223, + "learning_rate": 7.784283300409907e-06, + "loss": 0.3073, + "step": 16617 + }, + { + "epoch": 0.3326677176388159, + "grad_norm": 1.0426065921783447, + "learning_rate": 7.78401402536419e-06, + "loss": 0.2777, + "step": 16618 + }, + { + "epoch": 0.33268773615594427, + "grad_norm": 0.9891573190689087, + "learning_rate": 7.78374473861508e-06, + "loss": 0.291, + "step": 16619 + }, + { + "epoch": 0.3327077546730726, + "grad_norm": 1.0346728563308716, + "learning_rate": 7.783475440163704e-06, + "loss": 0.3423, + "step": 16620 + }, + { + "epoch": 0.3327277731902009, + "grad_norm": 1.0733917951583862, + "learning_rate": 7.783206130011196e-06, + "loss": 0.3283, + "step": 16621 + }, + { + "epoch": 0.33274779170732927, + "grad_norm": 1.230769157409668, + "learning_rate": 7.78293680815869e-06, + "loss": 0.3181, + "step": 16622 + }, + { + "epoch": 0.3327678102244576, + "grad_norm": 1.038772702217102, + "learning_rate": 7.782667474607316e-06, + "loss": 0.3323, + "step": 16623 + }, + { + "epoch": 0.33278782874158597, + "grad_norm": 1.019917368888855, + "learning_rate": 7.782398129358206e-06, + "loss": 0.3323, + "step": 16624 + }, + { + "epoch": 0.3328078472587143, + "grad_norm": 0.9847938418388367, + "learning_rate": 7.782128772412493e-06, + "loss": 0.2918, + "step": 16625 + }, + { + "epoch": 0.33282786577584267, + "grad_norm": 0.8848216533660889, + "learning_rate": 7.781859403771313e-06, + "loss": 0.2697, + "step": 16626 + }, + { + "epoch": 0.332847884292971, + "grad_norm": 1.1783159971237183, + "learning_rate": 7.78159002343579e-06, + "loss": 0.3267, + "step": 16627 + }, + { + "epoch": 0.3328679028100993, + "grad_norm": 1.9682377576828003, + "learning_rate": 7.781320631407064e-06, + "loss": 0.7971, + "step": 16628 + }, + { + "epoch": 0.33288792132722766, + "grad_norm": 1.0571067333221436, + "learning_rate": 7.781051227686263e-06, + "loss": 0.3107, + "step": 16629 + }, + { + "epoch": 0.332907939844356, + "grad_norm": 1.0477389097213745, + "learning_rate": 7.78078181227452e-06, + "loss": 0.3174, + "step": 16630 + }, + { + "epoch": 0.33292795836148437, + "grad_norm": 1.2450194358825684, + "learning_rate": 7.780512385172972e-06, + "loss": 0.3664, + "step": 16631 + }, + { + "epoch": 0.3329479768786127, + "grad_norm": 1.0876233577728271, + "learning_rate": 7.780242946382745e-06, + "loss": 0.318, + "step": 16632 + }, + { + "epoch": 0.33296799539574107, + "grad_norm": 1.0765302181243896, + "learning_rate": 7.779973495904978e-06, + "loss": 0.3176, + "step": 16633 + }, + { + "epoch": 0.3329880139128694, + "grad_norm": 1.0319725275039673, + "learning_rate": 7.779704033740799e-06, + "loss": 0.313, + "step": 16634 + }, + { + "epoch": 0.33300803242999777, + "grad_norm": 1.0356261730194092, + "learning_rate": 7.779434559891343e-06, + "loss": 0.3302, + "step": 16635 + }, + { + "epoch": 0.33302805094712606, + "grad_norm": 1.1119247674942017, + "learning_rate": 7.779165074357742e-06, + "loss": 0.3352, + "step": 16636 + }, + { + "epoch": 0.3330480694642544, + "grad_norm": 1.0614713430404663, + "learning_rate": 7.778895577141129e-06, + "loss": 0.3523, + "step": 16637 + }, + { + "epoch": 0.33306808798138277, + "grad_norm": 1.0240671634674072, + "learning_rate": 7.778626068242636e-06, + "loss": 0.2916, + "step": 16638 + }, + { + "epoch": 0.3330881064985111, + "grad_norm": 1.221858024597168, + "learning_rate": 7.778356547663399e-06, + "loss": 0.3429, + "step": 16639 + }, + { + "epoch": 0.33310812501563947, + "grad_norm": 1.0714049339294434, + "learning_rate": 7.778087015404548e-06, + "loss": 0.302, + "step": 16640 + }, + { + "epoch": 0.3331281435327678, + "grad_norm": 1.0148873329162598, + "learning_rate": 7.777817471467217e-06, + "loss": 0.274, + "step": 16641 + }, + { + "epoch": 0.33314816204989617, + "grad_norm": 1.0715523958206177, + "learning_rate": 7.77754791585254e-06, + "loss": 0.3596, + "step": 16642 + }, + { + "epoch": 0.3331681805670245, + "grad_norm": 1.0945873260498047, + "learning_rate": 7.777278348561648e-06, + "loss": 0.3954, + "step": 16643 + }, + { + "epoch": 0.3331881990841528, + "grad_norm": 1.9419652223587036, + "learning_rate": 7.777008769595676e-06, + "loss": 0.8353, + "step": 16644 + }, + { + "epoch": 0.33320821760128116, + "grad_norm": 1.0162285566329956, + "learning_rate": 7.776739178955757e-06, + "loss": 0.2778, + "step": 16645 + }, + { + "epoch": 0.3332282361184095, + "grad_norm": 1.0436922311782837, + "learning_rate": 7.776469576643025e-06, + "loss": 0.3288, + "step": 16646 + }, + { + "epoch": 0.33324825463553787, + "grad_norm": 1.2787959575653076, + "learning_rate": 7.77619996265861e-06, + "loss": 0.3262, + "step": 16647 + }, + { + "epoch": 0.3332682731526662, + "grad_norm": 1.077505350112915, + "learning_rate": 7.77593033700365e-06, + "loss": 0.3236, + "step": 16648 + }, + { + "epoch": 0.33328829166979457, + "grad_norm": 1.1302151679992676, + "learning_rate": 7.775660699679275e-06, + "loss": 0.3769, + "step": 16649 + }, + { + "epoch": 0.3333083101869229, + "grad_norm": 1.0905567407608032, + "learning_rate": 7.77539105068662e-06, + "loss": 0.3316, + "step": 16650 + }, + { + "epoch": 0.33332832870405127, + "grad_norm": 1.189757227897644, + "learning_rate": 7.775121390026817e-06, + "loss": 0.3344, + "step": 16651 + }, + { + "epoch": 0.33334834722117956, + "grad_norm": 1.1210482120513916, + "learning_rate": 7.774851717701004e-06, + "loss": 0.2993, + "step": 16652 + }, + { + "epoch": 0.3333683657383079, + "grad_norm": 1.1560381650924683, + "learning_rate": 7.77458203371031e-06, + "loss": 0.3767, + "step": 16653 + }, + { + "epoch": 0.33338838425543627, + "grad_norm": 1.0071001052856445, + "learning_rate": 7.77431233805587e-06, + "loss": 0.3156, + "step": 16654 + }, + { + "epoch": 0.3334084027725646, + "grad_norm": 1.0364880561828613, + "learning_rate": 7.774042630738817e-06, + "loss": 0.2804, + "step": 16655 + }, + { + "epoch": 0.33342842128969297, + "grad_norm": 1.2910481691360474, + "learning_rate": 7.773772911760286e-06, + "loss": 0.3113, + "step": 16656 + }, + { + "epoch": 0.3334484398068213, + "grad_norm": 1.0338689088821411, + "learning_rate": 7.773503181121412e-06, + "loss": 0.2952, + "step": 16657 + }, + { + "epoch": 0.33346845832394967, + "grad_norm": 1.1714766025543213, + "learning_rate": 7.773233438823327e-06, + "loss": 0.3147, + "step": 16658 + }, + { + "epoch": 0.333488476841078, + "grad_norm": 1.0809500217437744, + "learning_rate": 7.772963684867165e-06, + "loss": 0.326, + "step": 16659 + }, + { + "epoch": 0.3335084953582063, + "grad_norm": 1.0834691524505615, + "learning_rate": 7.772693919254061e-06, + "loss": 0.2879, + "step": 16660 + }, + { + "epoch": 0.33352851387533466, + "grad_norm": 1.1444435119628906, + "learning_rate": 7.772424141985149e-06, + "loss": 0.3216, + "step": 16661 + }, + { + "epoch": 0.333548532392463, + "grad_norm": 1.3309333324432373, + "learning_rate": 7.772154353061561e-06, + "loss": 0.3508, + "step": 16662 + }, + { + "epoch": 0.33356855090959137, + "grad_norm": 1.01826810836792, + "learning_rate": 7.771884552484432e-06, + "loss": 0.2763, + "step": 16663 + }, + { + "epoch": 0.3335885694267197, + "grad_norm": 1.129351258277893, + "learning_rate": 7.7716147402549e-06, + "loss": 0.3579, + "step": 16664 + }, + { + "epoch": 0.33360858794384807, + "grad_norm": 1.0111782550811768, + "learning_rate": 7.771344916374093e-06, + "loss": 0.2729, + "step": 16665 + }, + { + "epoch": 0.3336286064609764, + "grad_norm": 1.3195409774780273, + "learning_rate": 7.771075080843151e-06, + "loss": 0.3159, + "step": 16666 + }, + { + "epoch": 0.33364862497810477, + "grad_norm": 1.0432733297348022, + "learning_rate": 7.770805233663204e-06, + "loss": 0.3554, + "step": 16667 + }, + { + "epoch": 0.33366864349523306, + "grad_norm": 1.1617419719696045, + "learning_rate": 7.770535374835388e-06, + "loss": 0.3016, + "step": 16668 + }, + { + "epoch": 0.3336886620123614, + "grad_norm": 1.0600690841674805, + "learning_rate": 7.770265504360837e-06, + "loss": 0.3081, + "step": 16669 + }, + { + "epoch": 0.33370868052948977, + "grad_norm": 1.1430504322052002, + "learning_rate": 7.769995622240688e-06, + "loss": 0.3311, + "step": 16670 + }, + { + "epoch": 0.3337286990466181, + "grad_norm": 1.1244728565216064, + "learning_rate": 7.769725728476071e-06, + "loss": 0.3052, + "step": 16671 + }, + { + "epoch": 0.33374871756374647, + "grad_norm": 1.0534666776657104, + "learning_rate": 7.769455823068126e-06, + "loss": 0.2965, + "step": 16672 + }, + { + "epoch": 0.3337687360808748, + "grad_norm": 1.1183621883392334, + "learning_rate": 7.769185906017981e-06, + "loss": 0.3236, + "step": 16673 + }, + { + "epoch": 0.33378875459800317, + "grad_norm": 1.3736742734909058, + "learning_rate": 7.768915977326778e-06, + "loss": 0.3322, + "step": 16674 + }, + { + "epoch": 0.3338087731151315, + "grad_norm": 2.1728456020355225, + "learning_rate": 7.76864603699565e-06, + "loss": 0.8842, + "step": 16675 + }, + { + "epoch": 0.3338287916322598, + "grad_norm": 1.1087820529937744, + "learning_rate": 7.768376085025724e-06, + "loss": 0.3211, + "step": 16676 + }, + { + "epoch": 0.33384881014938816, + "grad_norm": 0.9793511033058167, + "learning_rate": 7.768106121418144e-06, + "loss": 0.2735, + "step": 16677 + }, + { + "epoch": 0.3338688286665165, + "grad_norm": 1.1135603189468384, + "learning_rate": 7.76783614617404e-06, + "loss": 0.3091, + "step": 16678 + }, + { + "epoch": 0.33388884718364487, + "grad_norm": 1.461150884628296, + "learning_rate": 7.767566159294551e-06, + "loss": 0.3207, + "step": 16679 + }, + { + "epoch": 0.3339088657007732, + "grad_norm": 1.219273328781128, + "learning_rate": 7.767296160780806e-06, + "loss": 0.3118, + "step": 16680 + }, + { + "epoch": 0.33392888421790157, + "grad_norm": 0.9936797618865967, + "learning_rate": 7.767026150633945e-06, + "loss": 0.3404, + "step": 16681 + }, + { + "epoch": 0.3339489027350299, + "grad_norm": 1.1782902479171753, + "learning_rate": 7.766756128855102e-06, + "loss": 0.3623, + "step": 16682 + }, + { + "epoch": 0.33396892125215827, + "grad_norm": 1.1038880348205566, + "learning_rate": 7.766486095445412e-06, + "loss": 0.3465, + "step": 16683 + }, + { + "epoch": 0.33398893976928656, + "grad_norm": 2.1063313484191895, + "learning_rate": 7.76621605040601e-06, + "loss": 0.8393, + "step": 16684 + }, + { + "epoch": 0.3340089582864149, + "grad_norm": 1.1256680488586426, + "learning_rate": 7.76594599373803e-06, + "loss": 0.3058, + "step": 16685 + }, + { + "epoch": 0.33402897680354326, + "grad_norm": 1.3505061864852905, + "learning_rate": 7.765675925442608e-06, + "loss": 0.3569, + "step": 16686 + }, + { + "epoch": 0.3340489953206716, + "grad_norm": 1.0165445804595947, + "learning_rate": 7.765405845520882e-06, + "loss": 0.2909, + "step": 16687 + }, + { + "epoch": 0.33406901383779997, + "grad_norm": 1.0987415313720703, + "learning_rate": 7.765135753973983e-06, + "loss": 0.318, + "step": 16688 + }, + { + "epoch": 0.3340890323549283, + "grad_norm": 1.1599515676498413, + "learning_rate": 7.764865650803047e-06, + "loss": 0.3332, + "step": 16689 + }, + { + "epoch": 0.33410905087205667, + "grad_norm": 1.1738829612731934, + "learning_rate": 7.764595536009213e-06, + "loss": 0.3077, + "step": 16690 + }, + { + "epoch": 0.334129069389185, + "grad_norm": 1.125105381011963, + "learning_rate": 7.764325409593613e-06, + "loss": 0.3533, + "step": 16691 + }, + { + "epoch": 0.3341490879063133, + "grad_norm": 1.1092618703842163, + "learning_rate": 7.764055271557385e-06, + "loss": 0.3464, + "step": 16692 + }, + { + "epoch": 0.33416910642344166, + "grad_norm": 1.0062016248703003, + "learning_rate": 7.763785121901662e-06, + "loss": 0.3231, + "step": 16693 + }, + { + "epoch": 0.33418912494057, + "grad_norm": 1.2508631944656372, + "learning_rate": 7.763514960627583e-06, + "loss": 0.3496, + "step": 16694 + }, + { + "epoch": 0.33420914345769837, + "grad_norm": 1.3122096061706543, + "learning_rate": 7.763244787736281e-06, + "loss": 0.2848, + "step": 16695 + }, + { + "epoch": 0.3342291619748267, + "grad_norm": 1.0998340845108032, + "learning_rate": 7.762974603228892e-06, + "loss": 0.3306, + "step": 16696 + }, + { + "epoch": 0.33424918049195507, + "grad_norm": 1.009325385093689, + "learning_rate": 7.762704407106554e-06, + "loss": 0.3214, + "step": 16697 + }, + { + "epoch": 0.3342691990090834, + "grad_norm": 1.116133451461792, + "learning_rate": 7.762434199370399e-06, + "loss": 0.3353, + "step": 16698 + }, + { + "epoch": 0.33428921752621177, + "grad_norm": 1.1283423900604248, + "learning_rate": 7.762163980021566e-06, + "loss": 0.36, + "step": 16699 + }, + { + "epoch": 0.33430923604334006, + "grad_norm": 1.130305528640747, + "learning_rate": 7.76189374906119e-06, + "loss": 0.299, + "step": 16700 + }, + { + "epoch": 0.3343292545604684, + "grad_norm": 1.1854865550994873, + "learning_rate": 7.761623506490408e-06, + "loss": 0.3216, + "step": 16701 + }, + { + "epoch": 0.33434927307759676, + "grad_norm": 1.272727370262146, + "learning_rate": 7.761353252310352e-06, + "loss": 0.3682, + "step": 16702 + }, + { + "epoch": 0.3343692915947251, + "grad_norm": 1.013003945350647, + "learning_rate": 7.761082986522165e-06, + "loss": 0.3423, + "step": 16703 + }, + { + "epoch": 0.33438931011185347, + "grad_norm": 1.126361608505249, + "learning_rate": 7.760812709126978e-06, + "loss": 0.3134, + "step": 16704 + }, + { + "epoch": 0.3344093286289818, + "grad_norm": 1.10772705078125, + "learning_rate": 7.760542420125929e-06, + "loss": 0.3237, + "step": 16705 + }, + { + "epoch": 0.33442934714611017, + "grad_norm": 1.0981013774871826, + "learning_rate": 7.760272119520153e-06, + "loss": 0.317, + "step": 16706 + }, + { + "epoch": 0.3344493656632385, + "grad_norm": 0.967054009437561, + "learning_rate": 7.760001807310786e-06, + "loss": 0.2954, + "step": 16707 + }, + { + "epoch": 0.3344693841803668, + "grad_norm": 1.1098660230636597, + "learning_rate": 7.759731483498967e-06, + "loss": 0.3259, + "step": 16708 + }, + { + "epoch": 0.33448940269749516, + "grad_norm": 0.9980252981185913, + "learning_rate": 7.759461148085829e-06, + "loss": 0.2993, + "step": 16709 + }, + { + "epoch": 0.3345094212146235, + "grad_norm": 1.1647018194198608, + "learning_rate": 7.759190801072512e-06, + "loss": 0.3241, + "step": 16710 + }, + { + "epoch": 0.33452943973175187, + "grad_norm": 1.9473443031311035, + "learning_rate": 7.758920442460151e-06, + "loss": 0.7836, + "step": 16711 + }, + { + "epoch": 0.3345494582488802, + "grad_norm": 1.0530115365982056, + "learning_rate": 7.758650072249882e-06, + "loss": 0.328, + "step": 16712 + }, + { + "epoch": 0.33456947676600857, + "grad_norm": 1.0435277223587036, + "learning_rate": 7.75837969044284e-06, + "loss": 0.3231, + "step": 16713 + }, + { + "epoch": 0.3345894952831369, + "grad_norm": 1.2100470066070557, + "learning_rate": 7.758109297040164e-06, + "loss": 0.311, + "step": 16714 + }, + { + "epoch": 0.33460951380026527, + "grad_norm": 1.2843658924102783, + "learning_rate": 7.75783889204299e-06, + "loss": 0.3512, + "step": 16715 + }, + { + "epoch": 0.33462953231739356, + "grad_norm": 1.832028865814209, + "learning_rate": 7.757568475452456e-06, + "loss": 0.8027, + "step": 16716 + }, + { + "epoch": 0.3346495508345219, + "grad_norm": 0.9897558093070984, + "learning_rate": 7.757298047269697e-06, + "loss": 0.2945, + "step": 16717 + }, + { + "epoch": 0.33466956935165026, + "grad_norm": 1.6776174306869507, + "learning_rate": 7.757027607495851e-06, + "loss": 0.8092, + "step": 16718 + }, + { + "epoch": 0.3346895878687786, + "grad_norm": 1.049669623374939, + "learning_rate": 7.756757156132052e-06, + "loss": 0.2998, + "step": 16719 + }, + { + "epoch": 0.33470960638590697, + "grad_norm": 1.7857216596603394, + "learning_rate": 7.75648669317944e-06, + "loss": 0.81, + "step": 16720 + }, + { + "epoch": 0.3347296249030353, + "grad_norm": 1.1014139652252197, + "learning_rate": 7.756216218639154e-06, + "loss": 0.2859, + "step": 16721 + }, + { + "epoch": 0.33474964342016367, + "grad_norm": 1.0906239748001099, + "learning_rate": 7.755945732512324e-06, + "loss": 0.3223, + "step": 16722 + }, + { + "epoch": 0.334769661937292, + "grad_norm": 1.0319534540176392, + "learning_rate": 7.755675234800094e-06, + "loss": 0.2799, + "step": 16723 + }, + { + "epoch": 0.3347896804544203, + "grad_norm": 1.1991074085235596, + "learning_rate": 7.755404725503597e-06, + "loss": 0.3073, + "step": 16724 + }, + { + "epoch": 0.33480969897154866, + "grad_norm": 1.2573051452636719, + "learning_rate": 7.755134204623972e-06, + "loss": 0.3358, + "step": 16725 + }, + { + "epoch": 0.334829717488677, + "grad_norm": 1.2058067321777344, + "learning_rate": 7.754863672162356e-06, + "loss": 0.3564, + "step": 16726 + }, + { + "epoch": 0.33484973600580537, + "grad_norm": 1.1282578706741333, + "learning_rate": 7.754593128119886e-06, + "loss": 0.3033, + "step": 16727 + }, + { + "epoch": 0.3348697545229337, + "grad_norm": 1.857979655265808, + "learning_rate": 7.754322572497699e-06, + "loss": 0.8735, + "step": 16728 + }, + { + "epoch": 0.33488977304006207, + "grad_norm": 1.0564186573028564, + "learning_rate": 7.754052005296932e-06, + "loss": 0.3381, + "step": 16729 + }, + { + "epoch": 0.3349097915571904, + "grad_norm": 1.2071013450622559, + "learning_rate": 7.753781426518724e-06, + "loss": 0.3288, + "step": 16730 + }, + { + "epoch": 0.33492981007431877, + "grad_norm": 1.0337016582489014, + "learning_rate": 7.75351083616421e-06, + "loss": 0.2863, + "step": 16731 + }, + { + "epoch": 0.33494982859144706, + "grad_norm": 1.1843137741088867, + "learning_rate": 7.753240234234532e-06, + "loss": 0.301, + "step": 16732 + }, + { + "epoch": 0.3349698471085754, + "grad_norm": 1.1185827255249023, + "learning_rate": 7.752969620730821e-06, + "loss": 0.3312, + "step": 16733 + }, + { + "epoch": 0.33498986562570376, + "grad_norm": 1.1740940809249878, + "learning_rate": 7.752698995654221e-06, + "loss": 0.3596, + "step": 16734 + }, + { + "epoch": 0.3350098841428321, + "grad_norm": 1.0970265865325928, + "learning_rate": 7.752428359005865e-06, + "loss": 0.3601, + "step": 16735 + }, + { + "epoch": 0.33502990265996047, + "grad_norm": 1.0165584087371826, + "learning_rate": 7.752157710786894e-06, + "loss": 0.2595, + "step": 16736 + }, + { + "epoch": 0.3350499211770888, + "grad_norm": 1.0899039506912231, + "learning_rate": 7.751887050998443e-06, + "loss": 0.3602, + "step": 16737 + }, + { + "epoch": 0.33506993969421717, + "grad_norm": 1.194368839263916, + "learning_rate": 7.751616379641652e-06, + "loss": 0.3628, + "step": 16738 + }, + { + "epoch": 0.3350899582113455, + "grad_norm": 1.1488158702850342, + "learning_rate": 7.751345696717658e-06, + "loss": 0.3158, + "step": 16739 + }, + { + "epoch": 0.3351099767284738, + "grad_norm": 1.1961716413497925, + "learning_rate": 7.751075002227598e-06, + "loss": 0.3201, + "step": 16740 + }, + { + "epoch": 0.33512999524560216, + "grad_norm": 1.1235339641571045, + "learning_rate": 7.750804296172612e-06, + "loss": 0.3196, + "step": 16741 + }, + { + "epoch": 0.3351500137627305, + "grad_norm": 1.1170119047164917, + "learning_rate": 7.750533578553834e-06, + "loss": 0.344, + "step": 16742 + }, + { + "epoch": 0.33517003227985886, + "grad_norm": 0.9676849246025085, + "learning_rate": 7.750262849372407e-06, + "loss": 0.2841, + "step": 16743 + }, + { + "epoch": 0.3351900507969872, + "grad_norm": 1.140250563621521, + "learning_rate": 7.749992108629465e-06, + "loss": 0.3114, + "step": 16744 + }, + { + "epoch": 0.33521006931411557, + "grad_norm": 1.2169431447982788, + "learning_rate": 7.74972135632615e-06, + "loss": 0.3043, + "step": 16745 + }, + { + "epoch": 0.3352300878312439, + "grad_norm": 1.2965550422668457, + "learning_rate": 7.749450592463596e-06, + "loss": 0.3666, + "step": 16746 + }, + { + "epoch": 0.33525010634837227, + "grad_norm": 1.1846179962158203, + "learning_rate": 7.749179817042947e-06, + "loss": 0.3232, + "step": 16747 + }, + { + "epoch": 0.33527012486550056, + "grad_norm": 1.0572761297225952, + "learning_rate": 7.748909030065335e-06, + "loss": 0.3046, + "step": 16748 + }, + { + "epoch": 0.3352901433826289, + "grad_norm": 1.1773823499679565, + "learning_rate": 7.7486382315319e-06, + "loss": 0.2928, + "step": 16749 + }, + { + "epoch": 0.33531016189975726, + "grad_norm": 1.1100616455078125, + "learning_rate": 7.748367421443784e-06, + "loss": 0.2963, + "step": 16750 + }, + { + "epoch": 0.3353301804168856, + "grad_norm": 0.9715559482574463, + "learning_rate": 7.748096599802121e-06, + "loss": 0.2885, + "step": 16751 + }, + { + "epoch": 0.33535019893401397, + "grad_norm": 1.0601660013198853, + "learning_rate": 7.747825766608052e-06, + "loss": 0.3372, + "step": 16752 + }, + { + "epoch": 0.3353702174511423, + "grad_norm": 1.178958535194397, + "learning_rate": 7.747554921862714e-06, + "loss": 0.3178, + "step": 16753 + }, + { + "epoch": 0.33539023596827067, + "grad_norm": 1.8989746570587158, + "learning_rate": 7.747284065567249e-06, + "loss": 0.7725, + "step": 16754 + }, + { + "epoch": 0.335410254485399, + "grad_norm": 1.1450183391571045, + "learning_rate": 7.74701319772279e-06, + "loss": 0.3302, + "step": 16755 + }, + { + "epoch": 0.3354302730025273, + "grad_norm": 1.2745659351348877, + "learning_rate": 7.74674231833048e-06, + "loss": 0.3319, + "step": 16756 + }, + { + "epoch": 0.33545029151965566, + "grad_norm": 1.2480738162994385, + "learning_rate": 7.746471427391458e-06, + "loss": 0.3337, + "step": 16757 + }, + { + "epoch": 0.335470310036784, + "grad_norm": 1.0566561222076416, + "learning_rate": 7.746200524906858e-06, + "loss": 0.3133, + "step": 16758 + }, + { + "epoch": 0.33549032855391236, + "grad_norm": 1.069688081741333, + "learning_rate": 7.745929610877825e-06, + "loss": 0.3473, + "step": 16759 + }, + { + "epoch": 0.3355103470710407, + "grad_norm": 1.1800934076309204, + "learning_rate": 7.745658685305492e-06, + "loss": 0.3131, + "step": 16760 + }, + { + "epoch": 0.33553036558816907, + "grad_norm": 0.9991317987442017, + "learning_rate": 7.745387748191002e-06, + "loss": 0.3367, + "step": 16761 + }, + { + "epoch": 0.3355503841052974, + "grad_norm": 1.2346951961517334, + "learning_rate": 7.745116799535492e-06, + "loss": 0.3355, + "step": 16762 + }, + { + "epoch": 0.33557040262242577, + "grad_norm": 1.9121989011764526, + "learning_rate": 7.744845839340103e-06, + "loss": 0.859, + "step": 16763 + }, + { + "epoch": 0.33559042113955406, + "grad_norm": 1.176020622253418, + "learning_rate": 7.744574867605972e-06, + "loss": 0.3405, + "step": 16764 + }, + { + "epoch": 0.3356104396566824, + "grad_norm": 1.8196276426315308, + "learning_rate": 7.74430388433424e-06, + "loss": 0.8346, + "step": 16765 + }, + { + "epoch": 0.33563045817381076, + "grad_norm": 1.9004358053207397, + "learning_rate": 7.744032889526042e-06, + "loss": 0.7861, + "step": 16766 + }, + { + "epoch": 0.3356504766909391, + "grad_norm": 1.1186950206756592, + "learning_rate": 7.743761883182525e-06, + "loss": 0.2272, + "step": 16767 + }, + { + "epoch": 0.33567049520806747, + "grad_norm": 1.1132843494415283, + "learning_rate": 7.74349086530482e-06, + "loss": 0.2744, + "step": 16768 + }, + { + "epoch": 0.3356905137251958, + "grad_norm": 1.186233401298523, + "learning_rate": 7.743219835894072e-06, + "loss": 0.3561, + "step": 16769 + }, + { + "epoch": 0.33571053224232417, + "grad_norm": 1.1662580966949463, + "learning_rate": 7.742948794951417e-06, + "loss": 0.3356, + "step": 16770 + }, + { + "epoch": 0.3357305507594525, + "grad_norm": 1.1722261905670166, + "learning_rate": 7.742677742477995e-06, + "loss": 0.3656, + "step": 16771 + }, + { + "epoch": 0.3357505692765808, + "grad_norm": 1.0140210390090942, + "learning_rate": 7.742406678474946e-06, + "loss": 0.3599, + "step": 16772 + }, + { + "epoch": 0.33577058779370916, + "grad_norm": 1.0986037254333496, + "learning_rate": 7.742135602943411e-06, + "loss": 0.3336, + "step": 16773 + }, + { + "epoch": 0.3357906063108375, + "grad_norm": 1.1137889623641968, + "learning_rate": 7.741864515884527e-06, + "loss": 0.3352, + "step": 16774 + }, + { + "epoch": 0.33581062482796586, + "grad_norm": 0.9914953708648682, + "learning_rate": 7.741593417299433e-06, + "loss": 0.3172, + "step": 16775 + }, + { + "epoch": 0.3358306433450942, + "grad_norm": 1.1281403303146362, + "learning_rate": 7.741322307189273e-06, + "loss": 0.3165, + "step": 16776 + }, + { + "epoch": 0.33585066186222257, + "grad_norm": 1.1214293241500854, + "learning_rate": 7.741051185555182e-06, + "loss": 0.3191, + "step": 16777 + }, + { + "epoch": 0.3358706803793509, + "grad_norm": 1.003624677658081, + "learning_rate": 7.740780052398303e-06, + "loss": 0.3124, + "step": 16778 + }, + { + "epoch": 0.33589069889647927, + "grad_norm": 1.339093804359436, + "learning_rate": 7.740508907719775e-06, + "loss": 0.2954, + "step": 16779 + }, + { + "epoch": 0.33591071741360756, + "grad_norm": 1.1758499145507812, + "learning_rate": 7.740237751520737e-06, + "loss": 0.3268, + "step": 16780 + }, + { + "epoch": 0.3359307359307359, + "grad_norm": 1.029503345489502, + "learning_rate": 7.739966583802328e-06, + "loss": 0.2927, + "step": 16781 + }, + { + "epoch": 0.33595075444786426, + "grad_norm": 1.1422972679138184, + "learning_rate": 7.739695404565688e-06, + "loss": 0.345, + "step": 16782 + }, + { + "epoch": 0.3359707729649926, + "grad_norm": 1.0195097923278809, + "learning_rate": 7.739424213811961e-06, + "loss": 0.3585, + "step": 16783 + }, + { + "epoch": 0.33599079148212097, + "grad_norm": 0.9640304446220398, + "learning_rate": 7.739153011542283e-06, + "loss": 0.2457, + "step": 16784 + }, + { + "epoch": 0.3360108099992493, + "grad_norm": 1.0877360105514526, + "learning_rate": 7.738881797757796e-06, + "loss": 0.332, + "step": 16785 + }, + { + "epoch": 0.33603082851637767, + "grad_norm": 1.0887998342514038, + "learning_rate": 7.738610572459638e-06, + "loss": 0.3245, + "step": 16786 + }, + { + "epoch": 0.336050847033506, + "grad_norm": 1.1407761573791504, + "learning_rate": 7.73833933564895e-06, + "loss": 0.3034, + "step": 16787 + }, + { + "epoch": 0.3360708655506343, + "grad_norm": 1.1351176500320435, + "learning_rate": 7.738068087326873e-06, + "loss": 0.3058, + "step": 16788 + }, + { + "epoch": 0.33609088406776266, + "grad_norm": 1.9638363122940063, + "learning_rate": 7.73779682749455e-06, + "loss": 0.8146, + "step": 16789 + }, + { + "epoch": 0.336110902584891, + "grad_norm": 1.1661527156829834, + "learning_rate": 7.737525556153116e-06, + "loss": 0.3359, + "step": 16790 + }, + { + "epoch": 0.33613092110201936, + "grad_norm": 1.0746477842330933, + "learning_rate": 7.737254273303712e-06, + "loss": 0.3252, + "step": 16791 + }, + { + "epoch": 0.3361509396191477, + "grad_norm": 1.8764973878860474, + "learning_rate": 7.736982978947483e-06, + "loss": 0.8403, + "step": 16792 + }, + { + "epoch": 0.33617095813627607, + "grad_norm": 1.1752541065216064, + "learning_rate": 7.736711673085565e-06, + "loss": 0.3475, + "step": 16793 + }, + { + "epoch": 0.3361909766534044, + "grad_norm": 1.1239060163497925, + "learning_rate": 7.7364403557191e-06, + "loss": 0.3125, + "step": 16794 + }, + { + "epoch": 0.33621099517053277, + "grad_norm": 1.1010856628417969, + "learning_rate": 7.736169026849229e-06, + "loss": 0.3366, + "step": 16795 + }, + { + "epoch": 0.33623101368766106, + "grad_norm": 1.0389372110366821, + "learning_rate": 7.735897686477093e-06, + "loss": 0.2916, + "step": 16796 + }, + { + "epoch": 0.3362510322047894, + "grad_norm": 1.980023741722107, + "learning_rate": 7.735626334603832e-06, + "loss": 0.883, + "step": 16797 + }, + { + "epoch": 0.33627105072191776, + "grad_norm": 0.9954087138175964, + "learning_rate": 7.735354971230584e-06, + "loss": 0.2775, + "step": 16798 + }, + { + "epoch": 0.3362910692390461, + "grad_norm": 1.1592708826065063, + "learning_rate": 7.735083596358497e-06, + "loss": 0.3398, + "step": 16799 + }, + { + "epoch": 0.33631108775617446, + "grad_norm": 1.179620385169983, + "learning_rate": 7.734812209988702e-06, + "loss": 0.3657, + "step": 16800 + }, + { + "epoch": 0.3363311062733028, + "grad_norm": 1.0959681272506714, + "learning_rate": 7.734540812122348e-06, + "loss": 0.3314, + "step": 16801 + }, + { + "epoch": 0.33635112479043117, + "grad_norm": 1.0790963172912598, + "learning_rate": 7.734269402760573e-06, + "loss": 0.3045, + "step": 16802 + }, + { + "epoch": 0.3363711433075595, + "grad_norm": 1.092642903327942, + "learning_rate": 7.733997981904517e-06, + "loss": 0.3126, + "step": 16803 + }, + { + "epoch": 0.3363911618246878, + "grad_norm": 1.4363889694213867, + "learning_rate": 7.73372654955532e-06, + "loss": 0.3697, + "step": 16804 + }, + { + "epoch": 0.33641118034181616, + "grad_norm": 1.1894738674163818, + "learning_rate": 7.733455105714128e-06, + "loss": 0.3084, + "step": 16805 + }, + { + "epoch": 0.3364311988589445, + "grad_norm": 1.061018466949463, + "learning_rate": 7.733183650382078e-06, + "loss": 0.3209, + "step": 16806 + }, + { + "epoch": 0.33645121737607286, + "grad_norm": 1.8505001068115234, + "learning_rate": 7.732912183560313e-06, + "loss": 0.8137, + "step": 16807 + }, + { + "epoch": 0.3364712358932012, + "grad_norm": 1.1838204860687256, + "learning_rate": 7.732640705249971e-06, + "loss": 0.2974, + "step": 16808 + }, + { + "epoch": 0.33649125441032957, + "grad_norm": 1.1037052869796753, + "learning_rate": 7.732369215452198e-06, + "loss": 0.2901, + "step": 16809 + }, + { + "epoch": 0.3365112729274579, + "grad_norm": 1.205485463142395, + "learning_rate": 7.73209771416813e-06, + "loss": 0.3514, + "step": 16810 + }, + { + "epoch": 0.33653129144458627, + "grad_norm": 1.1066818237304688, + "learning_rate": 7.731826201398913e-06, + "loss": 0.2848, + "step": 16811 + }, + { + "epoch": 0.33655130996171456, + "grad_norm": 0.9938555955886841, + "learning_rate": 7.731554677145687e-06, + "loss": 0.3339, + "step": 16812 + }, + { + "epoch": 0.3365713284788429, + "grad_norm": 1.1829758882522583, + "learning_rate": 7.731283141409591e-06, + "loss": 0.2889, + "step": 16813 + }, + { + "epoch": 0.33659134699597126, + "grad_norm": 1.0174453258514404, + "learning_rate": 7.73101159419177e-06, + "loss": 0.2662, + "step": 16814 + }, + { + "epoch": 0.3366113655130996, + "grad_norm": 1.1680352687835693, + "learning_rate": 7.730740035493365e-06, + "loss": 0.3515, + "step": 16815 + }, + { + "epoch": 0.33663138403022796, + "grad_norm": 1.2622756958007812, + "learning_rate": 7.730468465315514e-06, + "loss": 0.2742, + "step": 16816 + }, + { + "epoch": 0.3366514025473563, + "grad_norm": 1.135702133178711, + "learning_rate": 7.730196883659363e-06, + "loss": 0.3081, + "step": 16817 + }, + { + "epoch": 0.33667142106448467, + "grad_norm": 1.782992959022522, + "learning_rate": 7.729925290526052e-06, + "loss": 0.796, + "step": 16818 + }, + { + "epoch": 0.336691439581613, + "grad_norm": 1.3149349689483643, + "learning_rate": 7.729653685916721e-06, + "loss": 0.3268, + "step": 16819 + }, + { + "epoch": 0.3367114580987413, + "grad_norm": 1.0302765369415283, + "learning_rate": 7.729382069832515e-06, + "loss": 0.282, + "step": 16820 + }, + { + "epoch": 0.33673147661586966, + "grad_norm": 1.877203345298767, + "learning_rate": 7.729110442274575e-06, + "loss": 0.8167, + "step": 16821 + }, + { + "epoch": 0.336751495132998, + "grad_norm": 1.9841371774673462, + "learning_rate": 7.72883880324404e-06, + "loss": 0.8248, + "step": 16822 + }, + { + "epoch": 0.33677151365012636, + "grad_norm": 1.8900794982910156, + "learning_rate": 7.728567152742054e-06, + "loss": 0.7883, + "step": 16823 + }, + { + "epoch": 0.3367915321672547, + "grad_norm": 1.159398078918457, + "learning_rate": 7.728295490769758e-06, + "loss": 0.2879, + "step": 16824 + }, + { + "epoch": 0.33681155068438307, + "grad_norm": 1.1363818645477295, + "learning_rate": 7.728023817328295e-06, + "loss": 0.341, + "step": 16825 + }, + { + "epoch": 0.3368315692015114, + "grad_norm": 1.8630940914154053, + "learning_rate": 7.727752132418808e-06, + "loss": 0.865, + "step": 16826 + }, + { + "epoch": 0.33685158771863977, + "grad_norm": 1.8731393814086914, + "learning_rate": 7.72748043604244e-06, + "loss": 0.8096, + "step": 16827 + }, + { + "epoch": 0.33687160623576806, + "grad_norm": 2.0127995014190674, + "learning_rate": 7.727208728200328e-06, + "loss": 0.8348, + "step": 16828 + }, + { + "epoch": 0.3368916247528964, + "grad_norm": 1.074373483657837, + "learning_rate": 7.72693700889362e-06, + "loss": 0.3026, + "step": 16829 + }, + { + "epoch": 0.33691164327002476, + "grad_norm": 1.023378849029541, + "learning_rate": 7.726665278123456e-06, + "loss": 0.301, + "step": 16830 + }, + { + "epoch": 0.3369316617871531, + "grad_norm": 1.1165080070495605, + "learning_rate": 7.726393535890975e-06, + "loss": 0.3261, + "step": 16831 + }, + { + "epoch": 0.33695168030428146, + "grad_norm": 1.9084213972091675, + "learning_rate": 7.726121782197323e-06, + "loss": 0.749, + "step": 16832 + }, + { + "epoch": 0.3369716988214098, + "grad_norm": 1.066484808921814, + "learning_rate": 7.725850017043643e-06, + "loss": 0.3249, + "step": 16833 + }, + { + "epoch": 0.33699171733853817, + "grad_norm": 1.1008635759353638, + "learning_rate": 7.725578240431075e-06, + "loss": 0.3349, + "step": 16834 + }, + { + "epoch": 0.3370117358556665, + "grad_norm": 1.1255184412002563, + "learning_rate": 7.725306452360763e-06, + "loss": 0.3909, + "step": 16835 + }, + { + "epoch": 0.3370317543727948, + "grad_norm": 1.0863147974014282, + "learning_rate": 7.725034652833849e-06, + "loss": 0.3364, + "step": 16836 + }, + { + "epoch": 0.33705177288992316, + "grad_norm": 1.0757451057434082, + "learning_rate": 7.724762841851476e-06, + "loss": 0.3486, + "step": 16837 + }, + { + "epoch": 0.3370717914070515, + "grad_norm": 1.2014304399490356, + "learning_rate": 7.724491019414787e-06, + "loss": 0.3459, + "step": 16838 + }, + { + "epoch": 0.33709180992417986, + "grad_norm": 1.8174384832382202, + "learning_rate": 7.724219185524923e-06, + "loss": 0.7783, + "step": 16839 + }, + { + "epoch": 0.3371118284413082, + "grad_norm": 1.094500184059143, + "learning_rate": 7.723947340183029e-06, + "loss": 0.3348, + "step": 16840 + }, + { + "epoch": 0.33713184695843657, + "grad_norm": 1.0275201797485352, + "learning_rate": 7.723675483390245e-06, + "loss": 0.3051, + "step": 16841 + }, + { + "epoch": 0.3371518654755649, + "grad_norm": 1.1244893074035645, + "learning_rate": 7.723403615147716e-06, + "loss": 0.3382, + "step": 16842 + }, + { + "epoch": 0.33717188399269327, + "grad_norm": 1.0479426383972168, + "learning_rate": 7.723131735456586e-06, + "loss": 0.3069, + "step": 16843 + }, + { + "epoch": 0.33719190250982156, + "grad_norm": 1.2275419235229492, + "learning_rate": 7.722859844317993e-06, + "loss": 0.3122, + "step": 16844 + }, + { + "epoch": 0.3372119210269499, + "grad_norm": 1.144719123840332, + "learning_rate": 7.722587941733086e-06, + "loss": 0.3538, + "step": 16845 + }, + { + "epoch": 0.33723193954407826, + "grad_norm": 1.1599600315093994, + "learning_rate": 7.722316027703003e-06, + "loss": 0.3039, + "step": 16846 + }, + { + "epoch": 0.3372519580612066, + "grad_norm": 1.1496201753616333, + "learning_rate": 7.722044102228891e-06, + "loss": 0.3181, + "step": 16847 + }, + { + "epoch": 0.33727197657833496, + "grad_norm": 1.080238699913025, + "learning_rate": 7.721772165311891e-06, + "loss": 0.2897, + "step": 16848 + }, + { + "epoch": 0.3372919950954633, + "grad_norm": 0.9805610775947571, + "learning_rate": 7.721500216953146e-06, + "loss": 0.3173, + "step": 16849 + }, + { + "epoch": 0.33731201361259167, + "grad_norm": 1.132169485092163, + "learning_rate": 7.721228257153802e-06, + "loss": 0.2811, + "step": 16850 + }, + { + "epoch": 0.33733203212972, + "grad_norm": 1.1526364088058472, + "learning_rate": 7.720956285914998e-06, + "loss": 0.3562, + "step": 16851 + }, + { + "epoch": 0.3373520506468483, + "grad_norm": 1.119474172592163, + "learning_rate": 7.720684303237878e-06, + "loss": 0.3401, + "step": 16852 + }, + { + "epoch": 0.33737206916397666, + "grad_norm": 1.2044090032577515, + "learning_rate": 7.720412309123587e-06, + "loss": 0.3231, + "step": 16853 + }, + { + "epoch": 0.337392087681105, + "grad_norm": 1.2377818822860718, + "learning_rate": 7.720140303573271e-06, + "loss": 0.3267, + "step": 16854 + }, + { + "epoch": 0.33741210619823336, + "grad_norm": 0.9835225939750671, + "learning_rate": 7.719868286588068e-06, + "loss": 0.293, + "step": 16855 + }, + { + "epoch": 0.3374321247153617, + "grad_norm": 1.8246225118637085, + "learning_rate": 7.719596258169126e-06, + "loss": 0.7775, + "step": 16856 + }, + { + "epoch": 0.33745214323249006, + "grad_norm": 1.0365488529205322, + "learning_rate": 7.719324218317585e-06, + "loss": 0.297, + "step": 16857 + }, + { + "epoch": 0.3374721617496184, + "grad_norm": 1.148809552192688, + "learning_rate": 7.719052167034591e-06, + "loss": 0.3694, + "step": 16858 + }, + { + "epoch": 0.33749218026674677, + "grad_norm": 1.0087721347808838, + "learning_rate": 7.718780104321286e-06, + "loss": 0.33, + "step": 16859 + }, + { + "epoch": 0.33751219878387506, + "grad_norm": 2.1743595600128174, + "learning_rate": 7.718508030178816e-06, + "loss": 0.7869, + "step": 16860 + }, + { + "epoch": 0.3375322173010034, + "grad_norm": 1.8480455875396729, + "learning_rate": 7.718235944608321e-06, + "loss": 0.7894, + "step": 16861 + }, + { + "epoch": 0.33755223581813176, + "grad_norm": 1.0086297988891602, + "learning_rate": 7.717963847610947e-06, + "loss": 0.2892, + "step": 16862 + }, + { + "epoch": 0.3375722543352601, + "grad_norm": 1.0818806886672974, + "learning_rate": 7.717691739187841e-06, + "loss": 0.3181, + "step": 16863 + }, + { + "epoch": 0.33759227285238846, + "grad_norm": 1.035170316696167, + "learning_rate": 7.717419619340142e-06, + "loss": 0.2803, + "step": 16864 + }, + { + "epoch": 0.3376122913695168, + "grad_norm": 1.0401910543441772, + "learning_rate": 7.717147488068996e-06, + "loss": 0.3632, + "step": 16865 + }, + { + "epoch": 0.33763230988664517, + "grad_norm": 2.060835361480713, + "learning_rate": 7.716875345375545e-06, + "loss": 0.7736, + "step": 16866 + }, + { + "epoch": 0.3376523284037735, + "grad_norm": 1.0810786485671997, + "learning_rate": 7.716603191260935e-06, + "loss": 0.3174, + "step": 16867 + }, + { + "epoch": 0.3376723469209018, + "grad_norm": 1.0302902460098267, + "learning_rate": 7.716331025726312e-06, + "loss": 0.2911, + "step": 16868 + }, + { + "epoch": 0.33769236543803016, + "grad_norm": 1.0427488088607788, + "learning_rate": 7.716058848772818e-06, + "loss": 0.3244, + "step": 16869 + }, + { + "epoch": 0.3377123839551585, + "grad_norm": 1.052835464477539, + "learning_rate": 7.715786660401595e-06, + "loss": 0.3259, + "step": 16870 + }, + { + "epoch": 0.33773240247228686, + "grad_norm": 1.1569476127624512, + "learning_rate": 7.715514460613792e-06, + "loss": 0.3511, + "step": 16871 + }, + { + "epoch": 0.3377524209894152, + "grad_norm": 1.9705582857131958, + "learning_rate": 7.715242249410549e-06, + "loss": 0.8203, + "step": 16872 + }, + { + "epoch": 0.33777243950654356, + "grad_norm": 1.2015368938446045, + "learning_rate": 7.71497002679301e-06, + "loss": 0.3233, + "step": 16873 + }, + { + "epoch": 0.3377924580236719, + "grad_norm": 1.1305615901947021, + "learning_rate": 7.714697792762324e-06, + "loss": 0.2832, + "step": 16874 + }, + { + "epoch": 0.33781247654080027, + "grad_norm": 1.2048897743225098, + "learning_rate": 7.714425547319631e-06, + "loss": 0.343, + "step": 16875 + }, + { + "epoch": 0.33783249505792856, + "grad_norm": 1.750131368637085, + "learning_rate": 7.71415329046608e-06, + "loss": 0.7966, + "step": 16876 + }, + { + "epoch": 0.3378525135750569, + "grad_norm": 1.0965940952301025, + "learning_rate": 7.71388102220281e-06, + "loss": 0.2883, + "step": 16877 + }, + { + "epoch": 0.33787253209218526, + "grad_norm": 1.1226553916931152, + "learning_rate": 7.713608742530968e-06, + "loss": 0.3491, + "step": 16878 + }, + { + "epoch": 0.3378925506093136, + "grad_norm": 0.9968367218971252, + "learning_rate": 7.713336451451698e-06, + "loss": 0.3102, + "step": 16879 + }, + { + "epoch": 0.33791256912644196, + "grad_norm": 1.0354350805282593, + "learning_rate": 7.713064148966147e-06, + "loss": 0.2878, + "step": 16880 + }, + { + "epoch": 0.3379325876435703, + "grad_norm": 1.129011631011963, + "learning_rate": 7.71279183507546e-06, + "loss": 0.2838, + "step": 16881 + }, + { + "epoch": 0.33795260616069867, + "grad_norm": 1.14535391330719, + "learning_rate": 7.712519509780779e-06, + "loss": 0.3282, + "step": 16882 + }, + { + "epoch": 0.337972624677827, + "grad_norm": 1.062644600868225, + "learning_rate": 7.712247173083247e-06, + "loss": 0.33, + "step": 16883 + }, + { + "epoch": 0.3379926431949553, + "grad_norm": 2.007577419281006, + "learning_rate": 7.711974824984014e-06, + "loss": 0.8183, + "step": 16884 + }, + { + "epoch": 0.33801266171208366, + "grad_norm": 1.3648990392684937, + "learning_rate": 7.711702465484222e-06, + "loss": 0.351, + "step": 16885 + }, + { + "epoch": 0.338032680229212, + "grad_norm": 1.0359331369400024, + "learning_rate": 7.711430094585016e-06, + "loss": 0.3432, + "step": 16886 + }, + { + "epoch": 0.33805269874634036, + "grad_norm": 2.0450429916381836, + "learning_rate": 7.711157712287543e-06, + "loss": 0.7899, + "step": 16887 + }, + { + "epoch": 0.3380727172634687, + "grad_norm": 0.965910792350769, + "learning_rate": 7.710885318592944e-06, + "loss": 0.2779, + "step": 16888 + }, + { + "epoch": 0.33809273578059706, + "grad_norm": 1.104324221611023, + "learning_rate": 7.710612913502369e-06, + "loss": 0.3469, + "step": 16889 + }, + { + "epoch": 0.3381127542977254, + "grad_norm": 1.1019742488861084, + "learning_rate": 7.710340497016957e-06, + "loss": 0.3698, + "step": 16890 + }, + { + "epoch": 0.33813277281485377, + "grad_norm": 1.3024758100509644, + "learning_rate": 7.71006806913786e-06, + "loss": 0.2941, + "step": 16891 + }, + { + "epoch": 0.33815279133198206, + "grad_norm": 1.0547902584075928, + "learning_rate": 7.709795629866219e-06, + "loss": 0.2991, + "step": 16892 + }, + { + "epoch": 0.3381728098491104, + "grad_norm": 1.0932327508926392, + "learning_rate": 7.70952317920318e-06, + "loss": 0.2955, + "step": 16893 + }, + { + "epoch": 0.33819282836623876, + "grad_norm": 1.0631898641586304, + "learning_rate": 7.70925071714989e-06, + "loss": 0.3255, + "step": 16894 + }, + { + "epoch": 0.3382128468833671, + "grad_norm": 1.1590075492858887, + "learning_rate": 7.708978243707493e-06, + "loss": 0.3031, + "step": 16895 + }, + { + "epoch": 0.33823286540049546, + "grad_norm": 1.1050852537155151, + "learning_rate": 7.708705758877133e-06, + "loss": 0.3029, + "step": 16896 + }, + { + "epoch": 0.3382528839176238, + "grad_norm": 1.1168864965438843, + "learning_rate": 7.708433262659956e-06, + "loss": 0.3538, + "step": 16897 + }, + { + "epoch": 0.33827290243475217, + "grad_norm": 1.0299092531204224, + "learning_rate": 7.708160755057111e-06, + "loss": 0.2837, + "step": 16898 + }, + { + "epoch": 0.3382929209518805, + "grad_norm": 1.1077520847320557, + "learning_rate": 7.707888236069741e-06, + "loss": 0.3143, + "step": 16899 + }, + { + "epoch": 0.3383129394690088, + "grad_norm": 1.1638431549072266, + "learning_rate": 7.70761570569899e-06, + "loss": 0.2974, + "step": 16900 + }, + { + "epoch": 0.33833295798613716, + "grad_norm": 1.9562469720840454, + "learning_rate": 7.707343163946004e-06, + "loss": 0.8439, + "step": 16901 + }, + { + "epoch": 0.3383529765032655, + "grad_norm": 1.1225028038024902, + "learning_rate": 7.707070610811934e-06, + "loss": 0.3015, + "step": 16902 + }, + { + "epoch": 0.33837299502039386, + "grad_norm": 1.0782041549682617, + "learning_rate": 7.706798046297918e-06, + "loss": 0.3279, + "step": 16903 + }, + { + "epoch": 0.3383930135375222, + "grad_norm": 1.0711385011672974, + "learning_rate": 7.706525470405107e-06, + "loss": 0.3245, + "step": 16904 + }, + { + "epoch": 0.33841303205465056, + "grad_norm": 1.0324381589889526, + "learning_rate": 7.706252883134647e-06, + "loss": 0.3053, + "step": 16905 + }, + { + "epoch": 0.3384330505717789, + "grad_norm": 1.301132321357727, + "learning_rate": 7.70598028448768e-06, + "loss": 0.3492, + "step": 16906 + }, + { + "epoch": 0.33845306908890727, + "grad_norm": 1.715523600578308, + "learning_rate": 7.705707674465355e-06, + "loss": 0.3202, + "step": 16907 + }, + { + "epoch": 0.33847308760603556, + "grad_norm": 1.1365457773208618, + "learning_rate": 7.705435053068816e-06, + "loss": 0.3209, + "step": 16908 + }, + { + "epoch": 0.3384931061231639, + "grad_norm": 1.0117672681808472, + "learning_rate": 7.705162420299212e-06, + "loss": 0.2803, + "step": 16909 + }, + { + "epoch": 0.33851312464029226, + "grad_norm": 1.9903026819229126, + "learning_rate": 7.704889776157685e-06, + "loss": 0.8287, + "step": 16910 + }, + { + "epoch": 0.3385331431574206, + "grad_norm": 1.0529942512512207, + "learning_rate": 7.704617120645387e-06, + "loss": 0.3135, + "step": 16911 + }, + { + "epoch": 0.33855316167454896, + "grad_norm": 1.0100575685501099, + "learning_rate": 7.704344453763458e-06, + "loss": 0.3256, + "step": 16912 + }, + { + "epoch": 0.3385731801916773, + "grad_norm": 1.1545109748840332, + "learning_rate": 7.704071775513048e-06, + "loss": 0.3489, + "step": 16913 + }, + { + "epoch": 0.33859319870880566, + "grad_norm": 1.1560386419296265, + "learning_rate": 7.703799085895303e-06, + "loss": 0.3187, + "step": 16914 + }, + { + "epoch": 0.338613217225934, + "grad_norm": 1.8415440320968628, + "learning_rate": 7.703526384911366e-06, + "loss": 0.8069, + "step": 16915 + }, + { + "epoch": 0.3386332357430623, + "grad_norm": 0.9989517331123352, + "learning_rate": 7.703253672562388e-06, + "loss": 0.2919, + "step": 16916 + }, + { + "epoch": 0.33865325426019066, + "grad_norm": 1.208152413368225, + "learning_rate": 7.702980948849512e-06, + "loss": 0.3498, + "step": 16917 + }, + { + "epoch": 0.338673272777319, + "grad_norm": 1.1522296667099, + "learning_rate": 7.702708213773886e-06, + "loss": 0.3674, + "step": 16918 + }, + { + "epoch": 0.33869329129444736, + "grad_norm": 1.0301117897033691, + "learning_rate": 7.702435467336658e-06, + "loss": 0.3083, + "step": 16919 + }, + { + "epoch": 0.3387133098115757, + "grad_norm": 1.8420528173446655, + "learning_rate": 7.702162709538971e-06, + "loss": 0.8152, + "step": 16920 + }, + { + "epoch": 0.33873332832870406, + "grad_norm": 1.0152804851531982, + "learning_rate": 7.701889940381973e-06, + "loss": 0.3385, + "step": 16921 + }, + { + "epoch": 0.3387533468458324, + "grad_norm": 1.9047054052352905, + "learning_rate": 7.701617159866812e-06, + "loss": 0.8375, + "step": 16922 + }, + { + "epoch": 0.33877336536296077, + "grad_norm": 1.1190940141677856, + "learning_rate": 7.701344367994633e-06, + "loss": 0.2718, + "step": 16923 + }, + { + "epoch": 0.33879338388008906, + "grad_norm": 1.2086650133132935, + "learning_rate": 7.701071564766587e-06, + "loss": 0.3631, + "step": 16924 + }, + { + "epoch": 0.3388134023972174, + "grad_norm": 1.1042191982269287, + "learning_rate": 7.700798750183815e-06, + "loss": 0.3098, + "step": 16925 + }, + { + "epoch": 0.33883342091434576, + "grad_norm": 1.0665851831436157, + "learning_rate": 7.700525924247466e-06, + "loss": 0.3044, + "step": 16926 + }, + { + "epoch": 0.3388534394314741, + "grad_norm": 1.1306445598602295, + "learning_rate": 7.700253086958687e-06, + "loss": 0.2924, + "step": 16927 + }, + { + "epoch": 0.33887345794860246, + "grad_norm": 1.1137458086013794, + "learning_rate": 7.699980238318625e-06, + "loss": 0.266, + "step": 16928 + }, + { + "epoch": 0.3388934764657308, + "grad_norm": 1.1068999767303467, + "learning_rate": 7.699707378328429e-06, + "loss": 0.3209, + "step": 16929 + }, + { + "epoch": 0.33891349498285916, + "grad_norm": 1.1301385164260864, + "learning_rate": 7.699434506989242e-06, + "loss": 0.3031, + "step": 16930 + }, + { + "epoch": 0.3389335134999875, + "grad_norm": 1.1157891750335693, + "learning_rate": 7.699161624302214e-06, + "loss": 0.3282, + "step": 16931 + }, + { + "epoch": 0.3389535320171158, + "grad_norm": 1.0490469932556152, + "learning_rate": 7.698888730268492e-06, + "loss": 0.3563, + "step": 16932 + }, + { + "epoch": 0.33897355053424416, + "grad_norm": 1.1217015981674194, + "learning_rate": 7.698615824889221e-06, + "loss": 0.3146, + "step": 16933 + }, + { + "epoch": 0.3389935690513725, + "grad_norm": 1.0702508687973022, + "learning_rate": 7.698342908165552e-06, + "loss": 0.2848, + "step": 16934 + }, + { + "epoch": 0.33901358756850086, + "grad_norm": 1.9988939762115479, + "learning_rate": 7.698069980098629e-06, + "loss": 0.8556, + "step": 16935 + }, + { + "epoch": 0.3390336060856292, + "grad_norm": 1.087845802307129, + "learning_rate": 7.6977970406896e-06, + "loss": 0.2985, + "step": 16936 + }, + { + "epoch": 0.33905362460275756, + "grad_norm": 1.057457685470581, + "learning_rate": 7.697524089939615e-06, + "loss": 0.3456, + "step": 16937 + }, + { + "epoch": 0.3390736431198859, + "grad_norm": 1.0508381128311157, + "learning_rate": 7.697251127849816e-06, + "loss": 0.2914, + "step": 16938 + }, + { + "epoch": 0.33909366163701427, + "grad_norm": 1.1482614278793335, + "learning_rate": 7.696978154421356e-06, + "loss": 0.3055, + "step": 16939 + }, + { + "epoch": 0.33911368015414256, + "grad_norm": 1.0500121116638184, + "learning_rate": 7.696705169655379e-06, + "loss": 0.2857, + "step": 16940 + }, + { + "epoch": 0.3391336986712709, + "grad_norm": 1.0428438186645508, + "learning_rate": 7.696432173553035e-06, + "loss": 0.2787, + "step": 16941 + }, + { + "epoch": 0.33915371718839926, + "grad_norm": 1.124579668045044, + "learning_rate": 7.696159166115469e-06, + "loss": 0.3104, + "step": 16942 + }, + { + "epoch": 0.3391737357055276, + "grad_norm": 1.0958356857299805, + "learning_rate": 7.695886147343831e-06, + "loss": 0.3049, + "step": 16943 + }, + { + "epoch": 0.33919375422265596, + "grad_norm": 1.3258588314056396, + "learning_rate": 7.695613117239267e-06, + "loss": 0.3538, + "step": 16944 + }, + { + "epoch": 0.3392137727397843, + "grad_norm": 1.1218327283859253, + "learning_rate": 7.695340075802927e-06, + "loss": 0.3288, + "step": 16945 + }, + { + "epoch": 0.33923379125691266, + "grad_norm": 1.0422158241271973, + "learning_rate": 7.695067023035955e-06, + "loss": 0.291, + "step": 16946 + }, + { + "epoch": 0.339253809774041, + "grad_norm": 1.8049875497817993, + "learning_rate": 7.694793958939503e-06, + "loss": 0.7915, + "step": 16947 + }, + { + "epoch": 0.3392738282911693, + "grad_norm": 1.0290077924728394, + "learning_rate": 7.694520883514718e-06, + "loss": 0.2923, + "step": 16948 + }, + { + "epoch": 0.33929384680829766, + "grad_norm": 1.3187581300735474, + "learning_rate": 7.694247796762747e-06, + "loss": 0.33, + "step": 16949 + }, + { + "epoch": 0.339313865325426, + "grad_norm": 1.1962990760803223, + "learning_rate": 7.693974698684737e-06, + "loss": 0.3536, + "step": 16950 + }, + { + "epoch": 0.33933388384255436, + "grad_norm": 1.3073499202728271, + "learning_rate": 7.693701589281836e-06, + "loss": 0.3411, + "step": 16951 + }, + { + "epoch": 0.3393539023596827, + "grad_norm": 1.0811394453048706, + "learning_rate": 7.693428468555196e-06, + "loss": 0.2799, + "step": 16952 + }, + { + "epoch": 0.33937392087681106, + "grad_norm": 1.075774073600769, + "learning_rate": 7.693155336505959e-06, + "loss": 0.2885, + "step": 16953 + }, + { + "epoch": 0.3393939393939394, + "grad_norm": 1.7583537101745605, + "learning_rate": 7.692882193135279e-06, + "loss": 0.7931, + "step": 16954 + }, + { + "epoch": 0.33941395791106777, + "grad_norm": 1.2201149463653564, + "learning_rate": 7.6926090384443e-06, + "loss": 0.2807, + "step": 16955 + }, + { + "epoch": 0.33943397642819606, + "grad_norm": 1.8119792938232422, + "learning_rate": 7.692335872434173e-06, + "loss": 0.8836, + "step": 16956 + }, + { + "epoch": 0.3394539949453244, + "grad_norm": 1.1257121562957764, + "learning_rate": 7.692062695106046e-06, + "loss": 0.2755, + "step": 16957 + }, + { + "epoch": 0.33947401346245276, + "grad_norm": 1.1143743991851807, + "learning_rate": 7.691789506461067e-06, + "loss": 0.3504, + "step": 16958 + }, + { + "epoch": 0.3394940319795811, + "grad_norm": 1.0634562969207764, + "learning_rate": 7.691516306500381e-06, + "loss": 0.3059, + "step": 16959 + }, + { + "epoch": 0.33951405049670946, + "grad_norm": 1.1575032472610474, + "learning_rate": 7.691243095225143e-06, + "loss": 0.3009, + "step": 16960 + }, + { + "epoch": 0.3395340690138378, + "grad_norm": 1.1750564575195312, + "learning_rate": 7.690969872636498e-06, + "loss": 0.2794, + "step": 16961 + }, + { + "epoch": 0.33955408753096616, + "grad_norm": 1.2108688354492188, + "learning_rate": 7.690696638735594e-06, + "loss": 0.33, + "step": 16962 + }, + { + "epoch": 0.3395741060480945, + "grad_norm": 1.1995868682861328, + "learning_rate": 7.69042339352358e-06, + "loss": 0.3232, + "step": 16963 + }, + { + "epoch": 0.3395941245652228, + "grad_norm": 1.3585104942321777, + "learning_rate": 7.690150137001605e-06, + "loss": 0.321, + "step": 16964 + }, + { + "epoch": 0.33961414308235116, + "grad_norm": 1.0305771827697754, + "learning_rate": 7.689876869170817e-06, + "loss": 0.3282, + "step": 16965 + }, + { + "epoch": 0.3396341615994795, + "grad_norm": 1.3850616216659546, + "learning_rate": 7.689603590032368e-06, + "loss": 0.3199, + "step": 16966 + }, + { + "epoch": 0.33965418011660786, + "grad_norm": 1.0345836877822876, + "learning_rate": 7.689330299587403e-06, + "loss": 0.352, + "step": 16967 + }, + { + "epoch": 0.3396741986337362, + "grad_norm": 1.1288297176361084, + "learning_rate": 7.689056997837072e-06, + "loss": 0.2979, + "step": 16968 + }, + { + "epoch": 0.33969421715086456, + "grad_norm": 1.3720757961273193, + "learning_rate": 7.688783684782526e-06, + "loss": 0.3714, + "step": 16969 + }, + { + "epoch": 0.3397142356679929, + "grad_norm": 1.1271250247955322, + "learning_rate": 7.68851036042491e-06, + "loss": 0.3266, + "step": 16970 + }, + { + "epoch": 0.3397342541851212, + "grad_norm": 1.2735280990600586, + "learning_rate": 7.688237024765375e-06, + "loss": 0.3346, + "step": 16971 + }, + { + "epoch": 0.33975427270224956, + "grad_norm": 1.062846064567566, + "learning_rate": 7.687963677805069e-06, + "loss": 0.3627, + "step": 16972 + }, + { + "epoch": 0.3397742912193779, + "grad_norm": 1.0746864080429077, + "learning_rate": 7.687690319545145e-06, + "loss": 0.3336, + "step": 16973 + }, + { + "epoch": 0.33979430973650626, + "grad_norm": 1.0635229349136353, + "learning_rate": 7.687416949986747e-06, + "loss": 0.3606, + "step": 16974 + }, + { + "epoch": 0.3398143282536346, + "grad_norm": 0.9906729459762573, + "learning_rate": 7.687143569131028e-06, + "loss": 0.2677, + "step": 16975 + }, + { + "epoch": 0.33983434677076296, + "grad_norm": 1.1366057395935059, + "learning_rate": 7.686870176979136e-06, + "loss": 0.2949, + "step": 16976 + }, + { + "epoch": 0.3398543652878913, + "grad_norm": 1.0379881858825684, + "learning_rate": 7.686596773532218e-06, + "loss": 0.3457, + "step": 16977 + }, + { + "epoch": 0.33987438380501966, + "grad_norm": 1.0399627685546875, + "learning_rate": 7.686323358791426e-06, + "loss": 0.3053, + "step": 16978 + }, + { + "epoch": 0.33989440232214796, + "grad_norm": 1.8822591304779053, + "learning_rate": 7.686049932757909e-06, + "loss": 0.7934, + "step": 16979 + }, + { + "epoch": 0.3399144208392763, + "grad_norm": 1.0269039869308472, + "learning_rate": 7.685776495432816e-06, + "loss": 0.3028, + "step": 16980 + }, + { + "epoch": 0.33993443935640466, + "grad_norm": 1.1086481809616089, + "learning_rate": 7.685503046817296e-06, + "loss": 0.3221, + "step": 16981 + }, + { + "epoch": 0.339954457873533, + "grad_norm": 1.0485135316848755, + "learning_rate": 7.685229586912502e-06, + "loss": 0.3456, + "step": 16982 + }, + { + "epoch": 0.33997447639066136, + "grad_norm": 1.2070603370666504, + "learning_rate": 7.684956115719578e-06, + "loss": 0.3139, + "step": 16983 + }, + { + "epoch": 0.3399944949077897, + "grad_norm": 1.094653606414795, + "learning_rate": 7.684682633239675e-06, + "loss": 0.3158, + "step": 16984 + }, + { + "epoch": 0.34001451342491806, + "grad_norm": 1.1214765310287476, + "learning_rate": 7.684409139473946e-06, + "loss": 0.3195, + "step": 16985 + }, + { + "epoch": 0.3400345319420464, + "grad_norm": 1.0925801992416382, + "learning_rate": 7.68413563442354e-06, + "loss": 0.3305, + "step": 16986 + }, + { + "epoch": 0.3400545504591747, + "grad_norm": 1.1104000806808472, + "learning_rate": 7.683862118089604e-06, + "loss": 0.3121, + "step": 16987 + }, + { + "epoch": 0.34007456897630306, + "grad_norm": 1.094658374786377, + "learning_rate": 7.683588590473287e-06, + "loss": 0.2967, + "step": 16988 + }, + { + "epoch": 0.3400945874934314, + "grad_norm": 1.2348359823226929, + "learning_rate": 7.683315051575746e-06, + "loss": 0.323, + "step": 16989 + }, + { + "epoch": 0.34011460601055976, + "grad_norm": 1.0045112371444702, + "learning_rate": 7.683041501398122e-06, + "loss": 0.3122, + "step": 16990 + }, + { + "epoch": 0.3401346245276881, + "grad_norm": 1.0362309217453003, + "learning_rate": 7.682767939941569e-06, + "loss": 0.2916, + "step": 16991 + }, + { + "epoch": 0.34015464304481646, + "grad_norm": 1.024635910987854, + "learning_rate": 7.682494367207238e-06, + "loss": 0.3528, + "step": 16992 + }, + { + "epoch": 0.3401746615619448, + "grad_norm": 1.10501229763031, + "learning_rate": 7.682220783196279e-06, + "loss": 0.3185, + "step": 16993 + }, + { + "epoch": 0.34019468007907316, + "grad_norm": 1.1099389791488647, + "learning_rate": 7.68194718790984e-06, + "loss": 0.3095, + "step": 16994 + }, + { + "epoch": 0.34021469859620146, + "grad_norm": 1.029943823814392, + "learning_rate": 7.681673581349072e-06, + "loss": 0.3107, + "step": 16995 + }, + { + "epoch": 0.3402347171133298, + "grad_norm": 1.0376207828521729, + "learning_rate": 7.681399963515125e-06, + "loss": 0.3152, + "step": 16996 + }, + { + "epoch": 0.34025473563045816, + "grad_norm": 1.0677603483200073, + "learning_rate": 7.681126334409151e-06, + "loss": 0.3289, + "step": 16997 + }, + { + "epoch": 0.3402747541475865, + "grad_norm": 1.2095346450805664, + "learning_rate": 7.680852694032298e-06, + "loss": 0.3426, + "step": 16998 + }, + { + "epoch": 0.34029477266471486, + "grad_norm": 1.1599171161651611, + "learning_rate": 7.680579042385717e-06, + "loss": 0.3009, + "step": 16999 + }, + { + "epoch": 0.3403147911818432, + "grad_norm": 1.085339903831482, + "learning_rate": 7.680305379470559e-06, + "loss": 0.3395, + "step": 17000 + }, + { + "epoch": 0.34033480969897156, + "grad_norm": 1.0361665487289429, + "learning_rate": 7.680031705287973e-06, + "loss": 0.322, + "step": 17001 + }, + { + "epoch": 0.3403548282160999, + "grad_norm": 1.1202197074890137, + "learning_rate": 7.679758019839112e-06, + "loss": 0.3578, + "step": 17002 + }, + { + "epoch": 0.3403748467332282, + "grad_norm": 1.0771080255508423, + "learning_rate": 7.679484323125125e-06, + "loss": 0.2891, + "step": 17003 + }, + { + "epoch": 0.34039486525035656, + "grad_norm": 1.1732839345932007, + "learning_rate": 7.679210615147161e-06, + "loss": 0.3187, + "step": 17004 + }, + { + "epoch": 0.3404148837674849, + "grad_norm": 1.289425253868103, + "learning_rate": 7.678936895906373e-06, + "loss": 0.3119, + "step": 17005 + }, + { + "epoch": 0.34043490228461326, + "grad_norm": 1.7883535623550415, + "learning_rate": 7.678663165403912e-06, + "loss": 0.7776, + "step": 17006 + }, + { + "epoch": 0.3404549208017416, + "grad_norm": 1.2310614585876465, + "learning_rate": 7.678389423640926e-06, + "loss": 0.3614, + "step": 17007 + }, + { + "epoch": 0.34047493931886996, + "grad_norm": 1.108981966972351, + "learning_rate": 7.678115670618566e-06, + "loss": 0.3434, + "step": 17008 + }, + { + "epoch": 0.3404949578359983, + "grad_norm": 1.0543230772018433, + "learning_rate": 7.677841906337988e-06, + "loss": 0.2782, + "step": 17009 + }, + { + "epoch": 0.34051497635312666, + "grad_norm": 1.8478178977966309, + "learning_rate": 7.677568130800335e-06, + "loss": 0.8181, + "step": 17010 + }, + { + "epoch": 0.34053499487025496, + "grad_norm": 1.1763468980789185, + "learning_rate": 7.677294344006762e-06, + "loss": 0.2918, + "step": 17011 + }, + { + "epoch": 0.3405550133873833, + "grad_norm": 1.1999051570892334, + "learning_rate": 7.677020545958423e-06, + "loss": 0.3614, + "step": 17012 + }, + { + "epoch": 0.34057503190451166, + "grad_norm": 1.9553554058074951, + "learning_rate": 7.676746736656463e-06, + "loss": 0.9005, + "step": 17013 + }, + { + "epoch": 0.34059505042164, + "grad_norm": 1.075517177581787, + "learning_rate": 7.676472916102035e-06, + "loss": 0.3197, + "step": 17014 + }, + { + "epoch": 0.34061506893876836, + "grad_norm": 1.3305110931396484, + "learning_rate": 7.676199084296294e-06, + "loss": 0.3416, + "step": 17015 + }, + { + "epoch": 0.3406350874558967, + "grad_norm": 1.2029823064804077, + "learning_rate": 7.675925241240386e-06, + "loss": 0.3683, + "step": 17016 + }, + { + "epoch": 0.34065510597302506, + "grad_norm": 1.1930153369903564, + "learning_rate": 7.675651386935465e-06, + "loss": 0.3389, + "step": 17017 + }, + { + "epoch": 0.3406751244901534, + "grad_norm": 1.8341561555862427, + "learning_rate": 7.67537752138268e-06, + "loss": 0.7554, + "step": 17018 + }, + { + "epoch": 0.3406951430072817, + "grad_norm": 1.2011595964431763, + "learning_rate": 7.675103644583184e-06, + "loss": 0.3833, + "step": 17019 + }, + { + "epoch": 0.34071516152441006, + "grad_norm": 1.1323370933532715, + "learning_rate": 7.67482975653813e-06, + "loss": 0.2714, + "step": 17020 + }, + { + "epoch": 0.3407351800415384, + "grad_norm": 1.7972357273101807, + "learning_rate": 7.674555857248664e-06, + "loss": 0.7777, + "step": 17021 + }, + { + "epoch": 0.34075519855866676, + "grad_norm": 0.9353742599487305, + "learning_rate": 7.674281946715945e-06, + "loss": 0.3275, + "step": 17022 + }, + { + "epoch": 0.3407752170757951, + "grad_norm": 1.0753566026687622, + "learning_rate": 7.674008024941115e-06, + "loss": 0.3264, + "step": 17023 + }, + { + "epoch": 0.34079523559292346, + "grad_norm": 1.050803542137146, + "learning_rate": 7.673734091925335e-06, + "loss": 0.3257, + "step": 17024 + }, + { + "epoch": 0.3408152541100518, + "grad_norm": 1.1961472034454346, + "learning_rate": 7.67346014766975e-06, + "loss": 0.3358, + "step": 17025 + }, + { + "epoch": 0.34083527262718016, + "grad_norm": 1.089542031288147, + "learning_rate": 7.673186192175514e-06, + "loss": 0.2716, + "step": 17026 + }, + { + "epoch": 0.34085529114430846, + "grad_norm": 1.9771242141723633, + "learning_rate": 7.67291222544378e-06, + "loss": 0.8635, + "step": 17027 + }, + { + "epoch": 0.3408753096614368, + "grad_norm": 1.1335920095443726, + "learning_rate": 7.6726382474757e-06, + "loss": 0.3288, + "step": 17028 + }, + { + "epoch": 0.34089532817856516, + "grad_norm": 1.024871826171875, + "learning_rate": 7.672364258272419e-06, + "loss": 0.288, + "step": 17029 + }, + { + "epoch": 0.3409153466956935, + "grad_norm": 1.0215332508087158, + "learning_rate": 7.672090257835096e-06, + "loss": 0.301, + "step": 17030 + }, + { + "epoch": 0.34093536521282186, + "grad_norm": 2.0436556339263916, + "learning_rate": 7.67181624616488e-06, + "loss": 0.8027, + "step": 17031 + }, + { + "epoch": 0.3409553837299502, + "grad_norm": 0.9940220713615417, + "learning_rate": 7.671542223262924e-06, + "loss": 0.3478, + "step": 17032 + }, + { + "epoch": 0.34097540224707856, + "grad_norm": 1.1802469491958618, + "learning_rate": 7.671268189130382e-06, + "loss": 0.3546, + "step": 17033 + }, + { + "epoch": 0.3409954207642069, + "grad_norm": 1.0615824460983276, + "learning_rate": 7.6709941437684e-06, + "loss": 0.3216, + "step": 17034 + }, + { + "epoch": 0.3410154392813352, + "grad_norm": 1.1164507865905762, + "learning_rate": 7.670720087178134e-06, + "loss": 0.3228, + "step": 17035 + }, + { + "epoch": 0.34103545779846356, + "grad_norm": 1.1462894678115845, + "learning_rate": 7.670446019360735e-06, + "loss": 0.3309, + "step": 17036 + }, + { + "epoch": 0.3410554763155919, + "grad_norm": 1.1157530546188354, + "learning_rate": 7.670171940317357e-06, + "loss": 0.3516, + "step": 17037 + }, + { + "epoch": 0.34107549483272026, + "grad_norm": 1.4562599658966064, + "learning_rate": 7.669897850049151e-06, + "loss": 0.3131, + "step": 17038 + }, + { + "epoch": 0.3410955133498486, + "grad_norm": 1.2310256958007812, + "learning_rate": 7.66962374855727e-06, + "loss": 0.3138, + "step": 17039 + }, + { + "epoch": 0.34111553186697696, + "grad_norm": 1.0874019861221313, + "learning_rate": 7.669349635842861e-06, + "loss": 0.3021, + "step": 17040 + }, + { + "epoch": 0.3411355503841053, + "grad_norm": 1.1608388423919678, + "learning_rate": 7.669075511907085e-06, + "loss": 0.3463, + "step": 17041 + }, + { + "epoch": 0.34115556890123366, + "grad_norm": 1.0424011945724487, + "learning_rate": 7.668801376751088e-06, + "loss": 0.3453, + "step": 17042 + }, + { + "epoch": 0.34117558741836196, + "grad_norm": 1.0084633827209473, + "learning_rate": 7.668527230376023e-06, + "loss": 0.2959, + "step": 17043 + }, + { + "epoch": 0.3411956059354903, + "grad_norm": 1.2518073320388794, + "learning_rate": 7.668253072783045e-06, + "loss": 0.3442, + "step": 17044 + }, + { + "epoch": 0.34121562445261866, + "grad_norm": 1.2156331539154053, + "learning_rate": 7.667978903973306e-06, + "loss": 0.2638, + "step": 17045 + }, + { + "epoch": 0.341235642969747, + "grad_norm": 1.278907060623169, + "learning_rate": 7.667704723947958e-06, + "loss": 0.3091, + "step": 17046 + }, + { + "epoch": 0.34125566148687536, + "grad_norm": 1.0614757537841797, + "learning_rate": 7.667430532708152e-06, + "loss": 0.315, + "step": 17047 + }, + { + "epoch": 0.3412756800040037, + "grad_norm": 1.2600915431976318, + "learning_rate": 7.667156330255044e-06, + "loss": 0.375, + "step": 17048 + }, + { + "epoch": 0.34129569852113206, + "grad_norm": 1.169426679611206, + "learning_rate": 7.666882116589784e-06, + "loss": 0.3459, + "step": 17049 + }, + { + "epoch": 0.3413157170382604, + "grad_norm": 1.0917112827301025, + "learning_rate": 7.666607891713525e-06, + "loss": 0.3331, + "step": 17050 + }, + { + "epoch": 0.3413357355553887, + "grad_norm": 1.1699576377868652, + "learning_rate": 7.66633365562742e-06, + "loss": 0.3321, + "step": 17051 + }, + { + "epoch": 0.34135575407251706, + "grad_norm": 1.0296859741210938, + "learning_rate": 7.666059408332623e-06, + "loss": 0.317, + "step": 17052 + }, + { + "epoch": 0.3413757725896454, + "grad_norm": 1.081792950630188, + "learning_rate": 7.665785149830285e-06, + "loss": 0.3029, + "step": 17053 + }, + { + "epoch": 0.34139579110677376, + "grad_norm": 1.1113131046295166, + "learning_rate": 7.665510880121561e-06, + "loss": 0.3415, + "step": 17054 + }, + { + "epoch": 0.3414158096239021, + "grad_norm": 1.173108458518982, + "learning_rate": 7.665236599207601e-06, + "loss": 0.3538, + "step": 17055 + }, + { + "epoch": 0.34143582814103046, + "grad_norm": 1.237817406654358, + "learning_rate": 7.66496230708956e-06, + "loss": 0.2708, + "step": 17056 + }, + { + "epoch": 0.3414558466581588, + "grad_norm": 1.2140765190124512, + "learning_rate": 7.664688003768594e-06, + "loss": 0.3243, + "step": 17057 + }, + { + "epoch": 0.34147586517528716, + "grad_norm": 1.084557056427002, + "learning_rate": 7.66441368924585e-06, + "loss": 0.3065, + "step": 17058 + }, + { + "epoch": 0.34149588369241546, + "grad_norm": 1.8635187149047852, + "learning_rate": 7.664139363522487e-06, + "loss": 0.7939, + "step": 17059 + }, + { + "epoch": 0.3415159022095438, + "grad_norm": 1.1054601669311523, + "learning_rate": 7.663865026599656e-06, + "loss": 0.3379, + "step": 17060 + }, + { + "epoch": 0.34153592072667216, + "grad_norm": 1.0530056953430176, + "learning_rate": 7.663590678478505e-06, + "loss": 0.3489, + "step": 17061 + }, + { + "epoch": 0.3415559392438005, + "grad_norm": 1.139785647392273, + "learning_rate": 7.663316319160194e-06, + "loss": 0.3123, + "step": 17062 + }, + { + "epoch": 0.34157595776092886, + "grad_norm": 1.0574204921722412, + "learning_rate": 7.663041948645876e-06, + "loss": 0.2935, + "step": 17063 + }, + { + "epoch": 0.3415959762780572, + "grad_norm": 1.0768022537231445, + "learning_rate": 7.662767566936702e-06, + "loss": 0.2961, + "step": 17064 + }, + { + "epoch": 0.34161599479518556, + "grad_norm": 1.267724633216858, + "learning_rate": 7.662493174033826e-06, + "loss": 0.3049, + "step": 17065 + }, + { + "epoch": 0.3416360133123139, + "grad_norm": 1.1148747205734253, + "learning_rate": 7.662218769938402e-06, + "loss": 0.3187, + "step": 17066 + }, + { + "epoch": 0.3416560318294422, + "grad_norm": 1.0873029232025146, + "learning_rate": 7.661944354651583e-06, + "loss": 0.3429, + "step": 17067 + }, + { + "epoch": 0.34167605034657056, + "grad_norm": 1.2617127895355225, + "learning_rate": 7.661669928174523e-06, + "loss": 0.3246, + "step": 17068 + }, + { + "epoch": 0.3416960688636989, + "grad_norm": 1.0032190084457397, + "learning_rate": 7.661395490508374e-06, + "loss": 0.2961, + "step": 17069 + }, + { + "epoch": 0.34171608738082726, + "grad_norm": 1.085824966430664, + "learning_rate": 7.661121041654291e-06, + "loss": 0.34, + "step": 17070 + }, + { + "epoch": 0.3417361058979556, + "grad_norm": 1.1830335855484009, + "learning_rate": 7.66084658161343e-06, + "loss": 0.3333, + "step": 17071 + }, + { + "epoch": 0.34175612441508396, + "grad_norm": 1.1252634525299072, + "learning_rate": 7.66057211038694e-06, + "loss": 0.3361, + "step": 17072 + }, + { + "epoch": 0.3417761429322123, + "grad_norm": 1.0452405214309692, + "learning_rate": 7.66029762797598e-06, + "loss": 0.3521, + "step": 17073 + }, + { + "epoch": 0.34179616144934066, + "grad_norm": 1.989350438117981, + "learning_rate": 7.660023134381702e-06, + "loss": 0.8426, + "step": 17074 + }, + { + "epoch": 0.34181617996646896, + "grad_norm": 1.1367237567901611, + "learning_rate": 7.659748629605257e-06, + "loss": 0.3615, + "step": 17075 + }, + { + "epoch": 0.3418361984835973, + "grad_norm": 1.8358330726623535, + "learning_rate": 7.6594741136478e-06, + "loss": 0.7973, + "step": 17076 + }, + { + "epoch": 0.34185621700072566, + "grad_norm": 1.160880446434021, + "learning_rate": 7.659199586510488e-06, + "loss": 0.2975, + "step": 17077 + }, + { + "epoch": 0.341876235517854, + "grad_norm": 1.879632830619812, + "learning_rate": 7.658925048194473e-06, + "loss": 0.8518, + "step": 17078 + }, + { + "epoch": 0.34189625403498236, + "grad_norm": 1.1915631294250488, + "learning_rate": 7.65865049870091e-06, + "loss": 0.3278, + "step": 17079 + }, + { + "epoch": 0.3419162725521107, + "grad_norm": 1.2218812704086304, + "learning_rate": 7.658375938030951e-06, + "loss": 0.2678, + "step": 17080 + }, + { + "epoch": 0.34193629106923906, + "grad_norm": 1.1600486040115356, + "learning_rate": 7.658101366185753e-06, + "loss": 0.3508, + "step": 17081 + }, + { + "epoch": 0.3419563095863674, + "grad_norm": 1.2178982496261597, + "learning_rate": 7.657826783166469e-06, + "loss": 0.3192, + "step": 17082 + }, + { + "epoch": 0.3419763281034957, + "grad_norm": 1.153551459312439, + "learning_rate": 7.657552188974251e-06, + "loss": 0.3338, + "step": 17083 + }, + { + "epoch": 0.34199634662062406, + "grad_norm": 1.2949939966201782, + "learning_rate": 7.657277583610257e-06, + "loss": 0.3174, + "step": 17084 + }, + { + "epoch": 0.3420163651377524, + "grad_norm": 1.136686086654663, + "learning_rate": 7.65700296707564e-06, + "loss": 0.3531, + "step": 17085 + }, + { + "epoch": 0.34203638365488076, + "grad_norm": 1.1325808763504028, + "learning_rate": 7.656728339371556e-06, + "loss": 0.3292, + "step": 17086 + }, + { + "epoch": 0.3420564021720091, + "grad_norm": 1.165952205657959, + "learning_rate": 7.656453700499154e-06, + "loss": 0.3376, + "step": 17087 + }, + { + "epoch": 0.34207642068913746, + "grad_norm": 1.099982500076294, + "learning_rate": 7.656179050459593e-06, + "loss": 0.3629, + "step": 17088 + }, + { + "epoch": 0.3420964392062658, + "grad_norm": 1.0564701557159424, + "learning_rate": 7.65590438925403e-06, + "loss": 0.3416, + "step": 17089 + }, + { + "epoch": 0.34211645772339416, + "grad_norm": 1.347434163093567, + "learning_rate": 7.655629716883615e-06, + "loss": 0.3269, + "step": 17090 + }, + { + "epoch": 0.34213647624052246, + "grad_norm": 1.15791916847229, + "learning_rate": 7.655355033349503e-06, + "loss": 0.3542, + "step": 17091 + }, + { + "epoch": 0.3421564947576508, + "grad_norm": 1.0544971227645874, + "learning_rate": 7.65508033865285e-06, + "loss": 0.3314, + "step": 17092 + }, + { + "epoch": 0.34217651327477916, + "grad_norm": 1.0960499048233032, + "learning_rate": 7.654805632794812e-06, + "loss": 0.3016, + "step": 17093 + }, + { + "epoch": 0.3421965317919075, + "grad_norm": 1.179219365119934, + "learning_rate": 7.654530915776542e-06, + "loss": 0.3359, + "step": 17094 + }, + { + "epoch": 0.34221655030903586, + "grad_norm": 1.8514684438705444, + "learning_rate": 7.654256187599194e-06, + "loss": 0.8717, + "step": 17095 + }, + { + "epoch": 0.3422365688261642, + "grad_norm": 1.3117389678955078, + "learning_rate": 7.653981448263925e-06, + "loss": 0.3581, + "step": 17096 + }, + { + "epoch": 0.34225658734329256, + "grad_norm": 1.0688284635543823, + "learning_rate": 7.65370669777189e-06, + "loss": 0.3232, + "step": 17097 + }, + { + "epoch": 0.3422766058604209, + "grad_norm": 1.1013201475143433, + "learning_rate": 7.653431936124242e-06, + "loss": 0.2885, + "step": 17098 + }, + { + "epoch": 0.3422966243775492, + "grad_norm": 1.2355520725250244, + "learning_rate": 7.653157163322137e-06, + "loss": 0.3513, + "step": 17099 + }, + { + "epoch": 0.34231664289467756, + "grad_norm": 1.0485731363296509, + "learning_rate": 7.652882379366733e-06, + "loss": 0.2924, + "step": 17100 + }, + { + "epoch": 0.3423366614118059, + "grad_norm": 1.1478060483932495, + "learning_rate": 7.65260758425918e-06, + "loss": 0.3476, + "step": 17101 + }, + { + "epoch": 0.34235667992893426, + "grad_norm": 1.105077862739563, + "learning_rate": 7.652332778000635e-06, + "loss": 0.3497, + "step": 17102 + }, + { + "epoch": 0.3423766984460626, + "grad_norm": 1.0502148866653442, + "learning_rate": 7.652057960592254e-06, + "loss": 0.3502, + "step": 17103 + }, + { + "epoch": 0.34239671696319096, + "grad_norm": 1.1473329067230225, + "learning_rate": 7.651783132035195e-06, + "loss": 0.3201, + "step": 17104 + }, + { + "epoch": 0.3424167354803193, + "grad_norm": 1.0779587030410767, + "learning_rate": 7.651508292330607e-06, + "loss": 0.3931, + "step": 17105 + }, + { + "epoch": 0.34243675399744766, + "grad_norm": 1.180734395980835, + "learning_rate": 7.651233441479648e-06, + "loss": 0.3048, + "step": 17106 + }, + { + "epoch": 0.34245677251457596, + "grad_norm": 1.2645471096038818, + "learning_rate": 7.650958579483476e-06, + "loss": 0.3525, + "step": 17107 + }, + { + "epoch": 0.3424767910317043, + "grad_norm": 1.0444482564926147, + "learning_rate": 7.650683706343246e-06, + "loss": 0.2485, + "step": 17108 + }, + { + "epoch": 0.34249680954883266, + "grad_norm": 1.925358533859253, + "learning_rate": 7.650408822060111e-06, + "loss": 0.8271, + "step": 17109 + }, + { + "epoch": 0.342516828065961, + "grad_norm": 1.1054681539535522, + "learning_rate": 7.650133926635228e-06, + "loss": 0.3204, + "step": 17110 + }, + { + "epoch": 0.34253684658308936, + "grad_norm": 1.0327531099319458, + "learning_rate": 7.649859020069752e-06, + "loss": 0.3045, + "step": 17111 + }, + { + "epoch": 0.3425568651002177, + "grad_norm": 1.1491096019744873, + "learning_rate": 7.649584102364838e-06, + "loss": 0.3275, + "step": 17112 + }, + { + "epoch": 0.34257688361734606, + "grad_norm": 1.1215236186981201, + "learning_rate": 7.649309173521643e-06, + "loss": 0.3853, + "step": 17113 + }, + { + "epoch": 0.3425969021344744, + "grad_norm": 1.1721196174621582, + "learning_rate": 7.649034233541321e-06, + "loss": 0.3035, + "step": 17114 + }, + { + "epoch": 0.3426169206516027, + "grad_norm": 1.0368396043777466, + "learning_rate": 7.648759282425033e-06, + "loss": 0.2952, + "step": 17115 + }, + { + "epoch": 0.34263693916873106, + "grad_norm": 1.0992776155471802, + "learning_rate": 7.648484320173928e-06, + "loss": 0.319, + "step": 17116 + }, + { + "epoch": 0.3426569576858594, + "grad_norm": 1.981188178062439, + "learning_rate": 7.648209346789167e-06, + "loss": 0.8371, + "step": 17117 + }, + { + "epoch": 0.34267697620298776, + "grad_norm": 1.8188931941986084, + "learning_rate": 7.647934362271902e-06, + "loss": 0.7469, + "step": 17118 + }, + { + "epoch": 0.3426969947201161, + "grad_norm": 1.1061424016952515, + "learning_rate": 7.64765936662329e-06, + "loss": 0.2978, + "step": 17119 + }, + { + "epoch": 0.34271701323724446, + "grad_norm": 1.1250910758972168, + "learning_rate": 7.64738435984449e-06, + "loss": 0.3261, + "step": 17120 + }, + { + "epoch": 0.3427370317543728, + "grad_norm": 1.154248595237732, + "learning_rate": 7.647109341936653e-06, + "loss": 0.3209, + "step": 17121 + }, + { + "epoch": 0.34275705027150116, + "grad_norm": 1.1956490278244019, + "learning_rate": 7.64683431290094e-06, + "loss": 0.3036, + "step": 17122 + }, + { + "epoch": 0.34277706878862946, + "grad_norm": 1.512679100036621, + "learning_rate": 7.646559272738507e-06, + "loss": 0.362, + "step": 17123 + }, + { + "epoch": 0.3427970873057578, + "grad_norm": 1.061434268951416, + "learning_rate": 7.646284221450506e-06, + "loss": 0.3084, + "step": 17124 + }, + { + "epoch": 0.34281710582288616, + "grad_norm": 1.2051780223846436, + "learning_rate": 7.646009159038097e-06, + "loss": 0.3499, + "step": 17125 + }, + { + "epoch": 0.3428371243400145, + "grad_norm": 1.300570011138916, + "learning_rate": 7.645734085502433e-06, + "loss": 0.3444, + "step": 17126 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 1.1073182821273804, + "learning_rate": 7.645459000844673e-06, + "loss": 0.3751, + "step": 17127 + }, + { + "epoch": 0.3428771613742712, + "grad_norm": 1.1927893161773682, + "learning_rate": 7.645183905065974e-06, + "loss": 0.3288, + "step": 17128 + }, + { + "epoch": 0.34289717989139956, + "grad_norm": 1.0545552968978882, + "learning_rate": 7.64490879816749e-06, + "loss": 0.2937, + "step": 17129 + }, + { + "epoch": 0.3429171984085279, + "grad_norm": 1.0340372323989868, + "learning_rate": 7.644633680150379e-06, + "loss": 0.2886, + "step": 17130 + }, + { + "epoch": 0.3429372169256562, + "grad_norm": 1.101070761680603, + "learning_rate": 7.644358551015796e-06, + "loss": 0.2906, + "step": 17131 + }, + { + "epoch": 0.34295723544278456, + "grad_norm": 1.0449897050857544, + "learning_rate": 7.6440834107649e-06, + "loss": 0.2971, + "step": 17132 + }, + { + "epoch": 0.3429772539599129, + "grad_norm": 1.953808069229126, + "learning_rate": 7.643808259398846e-06, + "loss": 0.8761, + "step": 17133 + }, + { + "epoch": 0.34299727247704126, + "grad_norm": 1.1791845560073853, + "learning_rate": 7.64353309691879e-06, + "loss": 0.3691, + "step": 17134 + }, + { + "epoch": 0.3430172909941696, + "grad_norm": 1.834427833557129, + "learning_rate": 7.64325792332589e-06, + "loss": 0.7849, + "step": 17135 + }, + { + "epoch": 0.34303730951129796, + "grad_norm": 1.9573503732681274, + "learning_rate": 7.642982738621306e-06, + "loss": 0.7984, + "step": 17136 + }, + { + "epoch": 0.3430573280284263, + "grad_norm": 1.1337332725524902, + "learning_rate": 7.642707542806187e-06, + "loss": 0.345, + "step": 17137 + }, + { + "epoch": 0.34307734654555466, + "grad_norm": 1.2082345485687256, + "learning_rate": 7.642432335881695e-06, + "loss": 0.2999, + "step": 17138 + }, + { + "epoch": 0.34309736506268296, + "grad_norm": 1.2869294881820679, + "learning_rate": 7.642157117848987e-06, + "loss": 0.3179, + "step": 17139 + }, + { + "epoch": 0.3431173835798113, + "grad_norm": 1.024114727973938, + "learning_rate": 7.64188188870922e-06, + "loss": 0.2923, + "step": 17140 + }, + { + "epoch": 0.34313740209693966, + "grad_norm": 1.012681245803833, + "learning_rate": 7.641606648463548e-06, + "loss": 0.3039, + "step": 17141 + }, + { + "epoch": 0.343157420614068, + "grad_norm": 1.2530136108398438, + "learning_rate": 7.641331397113133e-06, + "loss": 0.3345, + "step": 17142 + }, + { + "epoch": 0.34317743913119636, + "grad_norm": 1.1296597719192505, + "learning_rate": 7.641056134659126e-06, + "loss": 0.3543, + "step": 17143 + }, + { + "epoch": 0.3431974576483247, + "grad_norm": 1.1660816669464111, + "learning_rate": 7.640780861102688e-06, + "loss": 0.3444, + "step": 17144 + }, + { + "epoch": 0.34321747616545306, + "grad_norm": 0.9902932047843933, + "learning_rate": 7.640505576444977e-06, + "loss": 0.2812, + "step": 17145 + }, + { + "epoch": 0.3432374946825814, + "grad_norm": 1.0051836967468262, + "learning_rate": 7.64023028068715e-06, + "loss": 0.2914, + "step": 17146 + }, + { + "epoch": 0.3432575131997097, + "grad_norm": 1.2728947401046753, + "learning_rate": 7.63995497383036e-06, + "loss": 0.3317, + "step": 17147 + }, + { + "epoch": 0.34327753171683806, + "grad_norm": 1.0548573732376099, + "learning_rate": 7.639679655875768e-06, + "loss": 0.3435, + "step": 17148 + }, + { + "epoch": 0.3432975502339664, + "grad_norm": 1.0901989936828613, + "learning_rate": 7.639404326824531e-06, + "loss": 0.3738, + "step": 17149 + }, + { + "epoch": 0.34331756875109476, + "grad_norm": 1.2765474319458008, + "learning_rate": 7.639128986677807e-06, + "loss": 0.3262, + "step": 17150 + }, + { + "epoch": 0.3433375872682231, + "grad_norm": 1.1105554103851318, + "learning_rate": 7.638853635436753e-06, + "loss": 0.2942, + "step": 17151 + }, + { + "epoch": 0.34335760578535146, + "grad_norm": 1.1091090440750122, + "learning_rate": 7.638578273102524e-06, + "loss": 0.3191, + "step": 17152 + }, + { + "epoch": 0.3433776243024798, + "grad_norm": 1.1066365242004395, + "learning_rate": 7.638302899676282e-06, + "loss": 0.3213, + "step": 17153 + }, + { + "epoch": 0.34339764281960816, + "grad_norm": 1.1759697198867798, + "learning_rate": 7.638027515159182e-06, + "loss": 0.3002, + "step": 17154 + }, + { + "epoch": 0.34341766133673646, + "grad_norm": 1.1877027750015259, + "learning_rate": 7.637752119552382e-06, + "loss": 0.3375, + "step": 17155 + }, + { + "epoch": 0.3434376798538648, + "grad_norm": 1.8723281621932983, + "learning_rate": 7.637476712857038e-06, + "loss": 0.88, + "step": 17156 + }, + { + "epoch": 0.34345769837099316, + "grad_norm": 1.0343042612075806, + "learning_rate": 7.637201295074313e-06, + "loss": 0.3516, + "step": 17157 + }, + { + "epoch": 0.3434777168881215, + "grad_norm": 1.1135870218276978, + "learning_rate": 7.636925866205358e-06, + "loss": 0.3085, + "step": 17158 + }, + { + "epoch": 0.34349773540524986, + "grad_norm": 1.1576628684997559, + "learning_rate": 7.636650426251335e-06, + "loss": 0.3196, + "step": 17159 + }, + { + "epoch": 0.3435177539223782, + "grad_norm": 1.1272432804107666, + "learning_rate": 7.6363749752134e-06, + "loss": 0.3316, + "step": 17160 + }, + { + "epoch": 0.34353777243950656, + "grad_norm": 1.1507034301757812, + "learning_rate": 7.636099513092715e-06, + "loss": 0.3518, + "step": 17161 + }, + { + "epoch": 0.3435577909566349, + "grad_norm": 1.1065832376480103, + "learning_rate": 7.635824039890434e-06, + "loss": 0.3063, + "step": 17162 + }, + { + "epoch": 0.3435778094737632, + "grad_norm": 1.0008550882339478, + "learning_rate": 7.635548555607714e-06, + "loss": 0.2603, + "step": 17163 + }, + { + "epoch": 0.34359782799089156, + "grad_norm": 1.8330039978027344, + "learning_rate": 7.635273060245717e-06, + "loss": 0.7775, + "step": 17164 + }, + { + "epoch": 0.3436178465080199, + "grad_norm": 1.076926350593567, + "learning_rate": 7.634997553805598e-06, + "loss": 0.304, + "step": 17165 + }, + { + "epoch": 0.34363786502514826, + "grad_norm": 1.1728688478469849, + "learning_rate": 7.634722036288518e-06, + "loss": 0.3536, + "step": 17166 + }, + { + "epoch": 0.3436578835422766, + "grad_norm": 1.0753244161605835, + "learning_rate": 7.634446507695631e-06, + "loss": 0.3466, + "step": 17167 + }, + { + "epoch": 0.34367790205940496, + "grad_norm": 1.1688847541809082, + "learning_rate": 7.6341709680281e-06, + "loss": 0.3122, + "step": 17168 + }, + { + "epoch": 0.3436979205765333, + "grad_norm": 1.0400296449661255, + "learning_rate": 7.633895417287081e-06, + "loss": 0.3515, + "step": 17169 + }, + { + "epoch": 0.34371793909366166, + "grad_norm": 2.0127978324890137, + "learning_rate": 7.63361985547373e-06, + "loss": 0.7524, + "step": 17170 + }, + { + "epoch": 0.34373795761078996, + "grad_norm": 1.0654696226119995, + "learning_rate": 7.63334428258921e-06, + "loss": 0.3747, + "step": 17171 + }, + { + "epoch": 0.3437579761279183, + "grad_norm": 1.1303563117980957, + "learning_rate": 7.633068698634678e-06, + "loss": 0.3209, + "step": 17172 + }, + { + "epoch": 0.34377799464504666, + "grad_norm": 1.063456416130066, + "learning_rate": 7.63279310361129e-06, + "loss": 0.3184, + "step": 17173 + }, + { + "epoch": 0.343798013162175, + "grad_norm": 1.238538146018982, + "learning_rate": 7.632517497520207e-06, + "loss": 0.3324, + "step": 17174 + }, + { + "epoch": 0.34381803167930336, + "grad_norm": 1.8618565797805786, + "learning_rate": 7.63224188036259e-06, + "loss": 0.7595, + "step": 17175 + }, + { + "epoch": 0.3438380501964317, + "grad_norm": 1.2883349657058716, + "learning_rate": 7.63196625213959e-06, + "loss": 0.2811, + "step": 17176 + }, + { + "epoch": 0.34385806871356006, + "grad_norm": 1.062425136566162, + "learning_rate": 7.631690612852372e-06, + "loss": 0.3189, + "step": 17177 + }, + { + "epoch": 0.3438780872306884, + "grad_norm": 1.086266040802002, + "learning_rate": 7.631414962502093e-06, + "loss": 0.3184, + "step": 17178 + }, + { + "epoch": 0.3438981057478167, + "grad_norm": 1.0729047060012817, + "learning_rate": 7.631139301089912e-06, + "loss": 0.3257, + "step": 17179 + }, + { + "epoch": 0.34391812426494506, + "grad_norm": 1.0448271036148071, + "learning_rate": 7.630863628616986e-06, + "loss": 0.3668, + "step": 17180 + }, + { + "epoch": 0.3439381427820734, + "grad_norm": 1.24724543094635, + "learning_rate": 7.630587945084477e-06, + "loss": 0.3163, + "step": 17181 + }, + { + "epoch": 0.34395816129920176, + "grad_norm": 1.1037927865982056, + "learning_rate": 7.630312250493543e-06, + "loss": 0.3203, + "step": 17182 + }, + { + "epoch": 0.3439781798163301, + "grad_norm": 1.1432766914367676, + "learning_rate": 7.630036544845342e-06, + "loss": 0.3104, + "step": 17183 + }, + { + "epoch": 0.34399819833345846, + "grad_norm": 1.1979103088378906, + "learning_rate": 7.629760828141033e-06, + "loss": 0.3226, + "step": 17184 + }, + { + "epoch": 0.3440182168505868, + "grad_norm": 1.1719956398010254, + "learning_rate": 7.629485100381776e-06, + "loss": 0.2936, + "step": 17185 + }, + { + "epoch": 0.34403823536771516, + "grad_norm": 1.6191877126693726, + "learning_rate": 7.629209361568729e-06, + "loss": 0.315, + "step": 17186 + }, + { + "epoch": 0.34405825388484346, + "grad_norm": 1.220545768737793, + "learning_rate": 7.628933611703051e-06, + "loss": 0.3311, + "step": 17187 + }, + { + "epoch": 0.3440782724019718, + "grad_norm": 1.8585063219070435, + "learning_rate": 7.628657850785903e-06, + "loss": 0.8475, + "step": 17188 + }, + { + "epoch": 0.34409829091910016, + "grad_norm": 1.923688292503357, + "learning_rate": 7.6283820788184414e-06, + "loss": 0.8111, + "step": 17189 + }, + { + "epoch": 0.3441183094362285, + "grad_norm": 1.1330536603927612, + "learning_rate": 7.628106295801829e-06, + "loss": 0.3041, + "step": 17190 + }, + { + "epoch": 0.34413832795335686, + "grad_norm": 1.1458351612091064, + "learning_rate": 7.627830501737222e-06, + "loss": 0.3271, + "step": 17191 + }, + { + "epoch": 0.3441583464704852, + "grad_norm": 0.9931438565254211, + "learning_rate": 7.627554696625783e-06, + "loss": 0.2583, + "step": 17192 + }, + { + "epoch": 0.34417836498761356, + "grad_norm": 1.1767126321792603, + "learning_rate": 7.627278880468667e-06, + "loss": 0.3561, + "step": 17193 + }, + { + "epoch": 0.3441983835047419, + "grad_norm": 1.1087387800216675, + "learning_rate": 7.627003053267038e-06, + "loss": 0.3121, + "step": 17194 + }, + { + "epoch": 0.3442184020218702, + "grad_norm": 1.135282278060913, + "learning_rate": 7.626727215022054e-06, + "loss": 0.3385, + "step": 17195 + }, + { + "epoch": 0.34423842053899856, + "grad_norm": 1.115468978881836, + "learning_rate": 7.626451365734871e-06, + "loss": 0.2757, + "step": 17196 + }, + { + "epoch": 0.3442584390561269, + "grad_norm": 1.0695559978485107, + "learning_rate": 7.626175505406654e-06, + "loss": 0.2989, + "step": 17197 + }, + { + "epoch": 0.34427845757325526, + "grad_norm": 1.1904455423355103, + "learning_rate": 7.62589963403856e-06, + "loss": 0.3228, + "step": 17198 + }, + { + "epoch": 0.3442984760903836, + "grad_norm": 1.134340763092041, + "learning_rate": 7.625623751631749e-06, + "loss": 0.3067, + "step": 17199 + }, + { + "epoch": 0.34431849460751196, + "grad_norm": 1.0480339527130127, + "learning_rate": 7.62534785818738e-06, + "loss": 0.3404, + "step": 17200 + }, + { + "epoch": 0.3443385131246403, + "grad_norm": 1.2906614542007446, + "learning_rate": 7.6250719537066155e-06, + "loss": 0.3619, + "step": 17201 + }, + { + "epoch": 0.34435853164176866, + "grad_norm": 1.0381948947906494, + "learning_rate": 7.624796038190612e-06, + "loss": 0.3, + "step": 17202 + }, + { + "epoch": 0.34437855015889696, + "grad_norm": 1.2689272165298462, + "learning_rate": 7.62452011164053e-06, + "loss": 0.3121, + "step": 17203 + }, + { + "epoch": 0.3443985686760253, + "grad_norm": 1.191724181175232, + "learning_rate": 7.62424417405753e-06, + "loss": 0.3598, + "step": 17204 + }, + { + "epoch": 0.34441858719315366, + "grad_norm": 1.1135530471801758, + "learning_rate": 7.623968225442775e-06, + "loss": 0.2788, + "step": 17205 + }, + { + "epoch": 0.344438605710282, + "grad_norm": 1.153592586517334, + "learning_rate": 7.62369226579742e-06, + "loss": 0.3193, + "step": 17206 + }, + { + "epoch": 0.34445862422741036, + "grad_norm": 1.262872338294983, + "learning_rate": 7.6234162951226276e-06, + "loss": 0.4166, + "step": 17207 + }, + { + "epoch": 0.3444786427445387, + "grad_norm": 1.0849634408950806, + "learning_rate": 7.623140313419559e-06, + "loss": 0.285, + "step": 17208 + }, + { + "epoch": 0.34449866126166706, + "grad_norm": 1.097040057182312, + "learning_rate": 7.622864320689372e-06, + "loss": 0.3144, + "step": 17209 + }, + { + "epoch": 0.3445186797787954, + "grad_norm": 1.8160384893417358, + "learning_rate": 7.622588316933228e-06, + "loss": 0.7544, + "step": 17210 + }, + { + "epoch": 0.3445386982959237, + "grad_norm": 1.074729323387146, + "learning_rate": 7.622312302152286e-06, + "loss": 0.2748, + "step": 17211 + }, + { + "epoch": 0.34455871681305206, + "grad_norm": 1.156861662864685, + "learning_rate": 7.6220362763477095e-06, + "loss": 0.3275, + "step": 17212 + }, + { + "epoch": 0.3445787353301804, + "grad_norm": 1.9574061632156372, + "learning_rate": 7.621760239520655e-06, + "loss": 0.7329, + "step": 17213 + }, + { + "epoch": 0.34459875384730876, + "grad_norm": 1.3499215841293335, + "learning_rate": 7.6214841916722855e-06, + "loss": 0.3241, + "step": 17214 + }, + { + "epoch": 0.3446187723644371, + "grad_norm": 1.0334177017211914, + "learning_rate": 7.621208132803762e-06, + "loss": 0.3263, + "step": 17215 + }, + { + "epoch": 0.34463879088156546, + "grad_norm": 1.8829939365386963, + "learning_rate": 7.620932062916241e-06, + "loss": 0.8568, + "step": 17216 + }, + { + "epoch": 0.3446588093986938, + "grad_norm": 1.0685352087020874, + "learning_rate": 7.620655982010887e-06, + "loss": 0.3138, + "step": 17217 + }, + { + "epoch": 0.34467882791582216, + "grad_norm": 1.1423507928848267, + "learning_rate": 7.6203798900888584e-06, + "loss": 0.3514, + "step": 17218 + }, + { + "epoch": 0.34469884643295046, + "grad_norm": 1.1587055921554565, + "learning_rate": 7.620103787151319e-06, + "loss": 0.3083, + "step": 17219 + }, + { + "epoch": 0.3447188649500788, + "grad_norm": 1.187471866607666, + "learning_rate": 7.619827673199425e-06, + "loss": 0.3115, + "step": 17220 + }, + { + "epoch": 0.34473888346720716, + "grad_norm": 1.258128046989441, + "learning_rate": 7.61955154823434e-06, + "loss": 0.348, + "step": 17221 + }, + { + "epoch": 0.3447589019843355, + "grad_norm": 1.2236452102661133, + "learning_rate": 7.619275412257221e-06, + "loss": 0.3305, + "step": 17222 + }, + { + "epoch": 0.34477892050146386, + "grad_norm": 1.2127845287322998, + "learning_rate": 7.618999265269236e-06, + "loss": 0.3465, + "step": 17223 + }, + { + "epoch": 0.3447989390185922, + "grad_norm": 1.179677128791809, + "learning_rate": 7.61872310727154e-06, + "loss": 0.3825, + "step": 17224 + }, + { + "epoch": 0.34481895753572056, + "grad_norm": 1.91777765750885, + "learning_rate": 7.618446938265296e-06, + "loss": 0.7576, + "step": 17225 + }, + { + "epoch": 0.3448389760528489, + "grad_norm": 1.0161566734313965, + "learning_rate": 7.618170758251664e-06, + "loss": 0.3057, + "step": 17226 + }, + { + "epoch": 0.3448589945699772, + "grad_norm": 2.1176159381866455, + "learning_rate": 7.617894567231807e-06, + "loss": 0.8642, + "step": 17227 + }, + { + "epoch": 0.34487901308710556, + "grad_norm": 1.056501030921936, + "learning_rate": 7.617618365206883e-06, + "loss": 0.2857, + "step": 17228 + }, + { + "epoch": 0.3448990316042339, + "grad_norm": 1.1176849603652954, + "learning_rate": 7.617342152178054e-06, + "loss": 0.3013, + "step": 17229 + }, + { + "epoch": 0.34491905012136226, + "grad_norm": 1.0909374952316284, + "learning_rate": 7.6170659281464846e-06, + "loss": 0.2955, + "step": 17230 + }, + { + "epoch": 0.3449390686384906, + "grad_norm": 1.0698007345199585, + "learning_rate": 7.616789693113332e-06, + "loss": 0.3356, + "step": 17231 + }, + { + "epoch": 0.34495908715561896, + "grad_norm": 1.1095372438430786, + "learning_rate": 7.616513447079758e-06, + "loss": 0.3222, + "step": 17232 + }, + { + "epoch": 0.3449791056727473, + "grad_norm": 1.0339734554290771, + "learning_rate": 7.616237190046924e-06, + "loss": 0.2981, + "step": 17233 + }, + { + "epoch": 0.34499912418987566, + "grad_norm": 1.1509100198745728, + "learning_rate": 7.6159609220159944e-06, + "loss": 0.2911, + "step": 17234 + }, + { + "epoch": 0.34501914270700396, + "grad_norm": 1.1032990217208862, + "learning_rate": 7.615684642988125e-06, + "loss": 0.3327, + "step": 17235 + }, + { + "epoch": 0.3450391612241323, + "grad_norm": 1.0858372449874878, + "learning_rate": 7.6154083529644815e-06, + "loss": 0.3205, + "step": 17236 + }, + { + "epoch": 0.34505917974126066, + "grad_norm": 1.9668524265289307, + "learning_rate": 7.615132051946226e-06, + "loss": 0.7546, + "step": 17237 + }, + { + "epoch": 0.345079198258389, + "grad_norm": 1.0531727075576782, + "learning_rate": 7.6148557399345156e-06, + "loss": 0.3086, + "step": 17238 + }, + { + "epoch": 0.34509921677551736, + "grad_norm": 1.0537164211273193, + "learning_rate": 7.614579416930516e-06, + "loss": 0.2715, + "step": 17239 + }, + { + "epoch": 0.3451192352926457, + "grad_norm": 1.1987289190292358, + "learning_rate": 7.614303082935386e-06, + "loss": 0.3159, + "step": 17240 + }, + { + "epoch": 0.34513925380977406, + "grad_norm": 1.336942195892334, + "learning_rate": 7.614026737950289e-06, + "loss": 0.3228, + "step": 17241 + }, + { + "epoch": 0.3451592723269024, + "grad_norm": 1.1704480648040771, + "learning_rate": 7.613750381976386e-06, + "loss": 0.3046, + "step": 17242 + }, + { + "epoch": 0.3451792908440307, + "grad_norm": 1.1157288551330566, + "learning_rate": 7.613474015014839e-06, + "loss": 0.3715, + "step": 17243 + }, + { + "epoch": 0.34519930936115906, + "grad_norm": 1.1717268228530884, + "learning_rate": 7.613197637066809e-06, + "loss": 0.2766, + "step": 17244 + }, + { + "epoch": 0.3452193278782874, + "grad_norm": 1.1427886486053467, + "learning_rate": 7.612921248133461e-06, + "loss": 0.3431, + "step": 17245 + }, + { + "epoch": 0.34523934639541576, + "grad_norm": 1.8499743938446045, + "learning_rate": 7.612644848215951e-06, + "loss": 0.7931, + "step": 17246 + }, + { + "epoch": 0.3452593649125441, + "grad_norm": 1.0165497064590454, + "learning_rate": 7.612368437315447e-06, + "loss": 0.29, + "step": 17247 + }, + { + "epoch": 0.34527938342967246, + "grad_norm": 1.0346763134002686, + "learning_rate": 7.612092015433106e-06, + "loss": 0.2749, + "step": 17248 + }, + { + "epoch": 0.3452994019468008, + "grad_norm": 1.035053014755249, + "learning_rate": 7.6118155825700935e-06, + "loss": 0.3018, + "step": 17249 + }, + { + "epoch": 0.34531942046392916, + "grad_norm": 1.094794750213623, + "learning_rate": 7.61153913872757e-06, + "loss": 0.3474, + "step": 17250 + }, + { + "epoch": 0.34533943898105746, + "grad_norm": 1.0849279165267944, + "learning_rate": 7.611262683906699e-06, + "loss": 0.3055, + "step": 17251 + }, + { + "epoch": 0.3453594574981858, + "grad_norm": 1.79744291305542, + "learning_rate": 7.61098621810864e-06, + "loss": 0.859, + "step": 17252 + }, + { + "epoch": 0.34537947601531416, + "grad_norm": 0.9330624341964722, + "learning_rate": 7.610709741334559e-06, + "loss": 0.2698, + "step": 17253 + }, + { + "epoch": 0.3453994945324425, + "grad_norm": 1.230678915977478, + "learning_rate": 7.610433253585614e-06, + "loss": 0.3373, + "step": 17254 + }, + { + "epoch": 0.34541951304957086, + "grad_norm": 1.1268891096115112, + "learning_rate": 7.61015675486297e-06, + "loss": 0.3472, + "step": 17255 + }, + { + "epoch": 0.3454395315666992, + "grad_norm": 1.2942602634429932, + "learning_rate": 7.609880245167789e-06, + "loss": 0.3247, + "step": 17256 + }, + { + "epoch": 0.34545955008382756, + "grad_norm": 1.0979323387145996, + "learning_rate": 7.609603724501233e-06, + "loss": 0.384, + "step": 17257 + }, + { + "epoch": 0.3454795686009559, + "grad_norm": 0.9844490885734558, + "learning_rate": 7.609327192864464e-06, + "loss": 0.3103, + "step": 17258 + }, + { + "epoch": 0.3454995871180842, + "grad_norm": 1.1196891069412231, + "learning_rate": 7.609050650258647e-06, + "loss": 0.3997, + "step": 17259 + }, + { + "epoch": 0.34551960563521256, + "grad_norm": 1.2098877429962158, + "learning_rate": 7.608774096684941e-06, + "loss": 0.3057, + "step": 17260 + }, + { + "epoch": 0.3455396241523409, + "grad_norm": 1.1133852005004883, + "learning_rate": 7.60849753214451e-06, + "loss": 0.2948, + "step": 17261 + }, + { + "epoch": 0.34555964266946926, + "grad_norm": 1.2527894973754883, + "learning_rate": 7.608220956638516e-06, + "loss": 0.3329, + "step": 17262 + }, + { + "epoch": 0.3455796611865976, + "grad_norm": 1.0810730457305908, + "learning_rate": 7.607944370168124e-06, + "loss": 0.315, + "step": 17263 + }, + { + "epoch": 0.34559967970372596, + "grad_norm": 1.055065393447876, + "learning_rate": 7.607667772734492e-06, + "loss": 0.2812, + "step": 17264 + }, + { + "epoch": 0.3456196982208543, + "grad_norm": 1.9330579042434692, + "learning_rate": 7.607391164338791e-06, + "loss": 0.8544, + "step": 17265 + }, + { + "epoch": 0.34563971673798266, + "grad_norm": 0.9692252278327942, + "learning_rate": 7.607114544982175e-06, + "loss": 0.2584, + "step": 17266 + }, + { + "epoch": 0.34565973525511096, + "grad_norm": 1.0657846927642822, + "learning_rate": 7.60683791466581e-06, + "loss": 0.3119, + "step": 17267 + }, + { + "epoch": 0.3456797537722393, + "grad_norm": 2.005218267440796, + "learning_rate": 7.606561273390862e-06, + "loss": 0.2953, + "step": 17268 + }, + { + "epoch": 0.34569977228936766, + "grad_norm": 1.1135677099227905, + "learning_rate": 7.60628462115849e-06, + "loss": 0.3705, + "step": 17269 + }, + { + "epoch": 0.345719790806496, + "grad_norm": 1.2366276979446411, + "learning_rate": 7.606007957969858e-06, + "loss": 0.3008, + "step": 17270 + }, + { + "epoch": 0.34573980932362436, + "grad_norm": 1.0556575059890747, + "learning_rate": 7.60573128382613e-06, + "loss": 0.3362, + "step": 17271 + }, + { + "epoch": 0.3457598278407527, + "grad_norm": 1.1317024230957031, + "learning_rate": 7.60545459872847e-06, + "loss": 0.293, + "step": 17272 + }, + { + "epoch": 0.34577984635788106, + "grad_norm": 1.0324469804763794, + "learning_rate": 7.605177902678037e-06, + "loss": 0.2878, + "step": 17273 + }, + { + "epoch": 0.3457998648750094, + "grad_norm": 0.9690262675285339, + "learning_rate": 7.6049011956759975e-06, + "loss": 0.3038, + "step": 17274 + }, + { + "epoch": 0.3458198833921377, + "grad_norm": 1.0345600843429565, + "learning_rate": 7.604624477723513e-06, + "loss": 0.3252, + "step": 17275 + }, + { + "epoch": 0.34583990190926606, + "grad_norm": 1.2127313613891602, + "learning_rate": 7.60434774882175e-06, + "loss": 0.2914, + "step": 17276 + }, + { + "epoch": 0.3458599204263944, + "grad_norm": 1.1296346187591553, + "learning_rate": 7.6040710089718674e-06, + "loss": 0.3053, + "step": 17277 + }, + { + "epoch": 0.34587993894352276, + "grad_norm": 1.0819686651229858, + "learning_rate": 7.603794258175033e-06, + "loss": 0.3404, + "step": 17278 + }, + { + "epoch": 0.3458999574606511, + "grad_norm": 1.0685240030288696, + "learning_rate": 7.603517496432405e-06, + "loss": 0.3125, + "step": 17279 + }, + { + "epoch": 0.34591997597777946, + "grad_norm": 1.1790827512741089, + "learning_rate": 7.603240723745151e-06, + "loss": 0.3261, + "step": 17280 + }, + { + "epoch": 0.3459399944949078, + "grad_norm": 1.0380107164382935, + "learning_rate": 7.602963940114434e-06, + "loss": 0.3725, + "step": 17281 + }, + { + "epoch": 0.34596001301203616, + "grad_norm": 0.975614070892334, + "learning_rate": 7.602687145541416e-06, + "loss": 0.3093, + "step": 17282 + }, + { + "epoch": 0.34598003152916446, + "grad_norm": 1.9925916194915771, + "learning_rate": 7.602410340027262e-06, + "loss": 0.8832, + "step": 17283 + }, + { + "epoch": 0.3460000500462928, + "grad_norm": 1.1286725997924805, + "learning_rate": 7.602133523573134e-06, + "loss": 0.3469, + "step": 17284 + }, + { + "epoch": 0.34602006856342116, + "grad_norm": 1.1027989387512207, + "learning_rate": 7.601856696180199e-06, + "loss": 0.3672, + "step": 17285 + }, + { + "epoch": 0.3460400870805495, + "grad_norm": 1.2424520254135132, + "learning_rate": 7.601579857849615e-06, + "loss": 0.3338, + "step": 17286 + }, + { + "epoch": 0.34606010559767786, + "grad_norm": 1.1247718334197998, + "learning_rate": 7.60130300858255e-06, + "loss": 0.3334, + "step": 17287 + }, + { + "epoch": 0.3460801241148062, + "grad_norm": 1.1293586492538452, + "learning_rate": 7.601026148380168e-06, + "loss": 0.2982, + "step": 17288 + }, + { + "epoch": 0.34610014263193456, + "grad_norm": 2.1406822204589844, + "learning_rate": 7.600749277243631e-06, + "loss": 0.8038, + "step": 17289 + }, + { + "epoch": 0.3461201611490629, + "grad_norm": 2.084840774536133, + "learning_rate": 7.600472395174105e-06, + "loss": 0.7941, + "step": 17290 + }, + { + "epoch": 0.3461401796661912, + "grad_norm": 1.0918195247650146, + "learning_rate": 7.600195502172751e-06, + "loss": 0.342, + "step": 17291 + }, + { + "epoch": 0.34616019818331956, + "grad_norm": 1.1914747953414917, + "learning_rate": 7.599918598240737e-06, + "loss": 0.3674, + "step": 17292 + }, + { + "epoch": 0.3461802167004479, + "grad_norm": 1.166519045829773, + "learning_rate": 7.599641683379222e-06, + "loss": 0.2886, + "step": 17293 + }, + { + "epoch": 0.34620023521757626, + "grad_norm": 1.1821644306182861, + "learning_rate": 7.599364757589374e-06, + "loss": 0.3304, + "step": 17294 + }, + { + "epoch": 0.3462202537347046, + "grad_norm": 1.078021764755249, + "learning_rate": 7.599087820872356e-06, + "loss": 0.2909, + "step": 17295 + }, + { + "epoch": 0.34624027225183296, + "grad_norm": 1.1790244579315186, + "learning_rate": 7.598810873229333e-06, + "loss": 0.3894, + "step": 17296 + }, + { + "epoch": 0.3462602907689613, + "grad_norm": 1.2148433923721313, + "learning_rate": 7.598533914661467e-06, + "loss": 0.3747, + "step": 17297 + }, + { + "epoch": 0.34628030928608966, + "grad_norm": 1.0898021459579468, + "learning_rate": 7.598256945169924e-06, + "loss": 0.3187, + "step": 17298 + }, + { + "epoch": 0.34630032780321796, + "grad_norm": 1.1712148189544678, + "learning_rate": 7.597979964755868e-06, + "loss": 0.3188, + "step": 17299 + }, + { + "epoch": 0.3463203463203463, + "grad_norm": 1.1704602241516113, + "learning_rate": 7.5977029734204636e-06, + "loss": 0.3507, + "step": 17300 + }, + { + "epoch": 0.34634036483747466, + "grad_norm": 2.097196340560913, + "learning_rate": 7.5974259711648756e-06, + "loss": 0.925, + "step": 17301 + }, + { + "epoch": 0.346360383354603, + "grad_norm": 1.0486842393875122, + "learning_rate": 7.5971489579902655e-06, + "loss": 0.3499, + "step": 17302 + }, + { + "epoch": 0.34638040187173136, + "grad_norm": 1.0787527561187744, + "learning_rate": 7.596871933897802e-06, + "loss": 0.3089, + "step": 17303 + }, + { + "epoch": 0.3464004203888597, + "grad_norm": 1.2078405618667603, + "learning_rate": 7.596594898888647e-06, + "loss": 0.3634, + "step": 17304 + }, + { + "epoch": 0.34642043890598806, + "grad_norm": 1.2210168838500977, + "learning_rate": 7.596317852963966e-06, + "loss": 0.3014, + "step": 17305 + }, + { + "epoch": 0.3464404574231164, + "grad_norm": 1.2383902072906494, + "learning_rate": 7.596040796124924e-06, + "loss": 0.2654, + "step": 17306 + }, + { + "epoch": 0.3464604759402447, + "grad_norm": 1.0498110055923462, + "learning_rate": 7.595763728372685e-06, + "loss": 0.3181, + "step": 17307 + }, + { + "epoch": 0.34648049445737306, + "grad_norm": 1.0651596784591675, + "learning_rate": 7.595486649708412e-06, + "loss": 0.2648, + "step": 17308 + }, + { + "epoch": 0.3465005129745014, + "grad_norm": 1.0521159172058105, + "learning_rate": 7.595209560133274e-06, + "loss": 0.3189, + "step": 17309 + }, + { + "epoch": 0.34652053149162976, + "grad_norm": 1.086275577545166, + "learning_rate": 7.594932459648434e-06, + "loss": 0.3243, + "step": 17310 + }, + { + "epoch": 0.3465405500087581, + "grad_norm": 1.0268734693527222, + "learning_rate": 7.594655348255056e-06, + "loss": 0.304, + "step": 17311 + }, + { + "epoch": 0.34656056852588646, + "grad_norm": 1.1813822984695435, + "learning_rate": 7.594378225954303e-06, + "loss": 0.3439, + "step": 17312 + }, + { + "epoch": 0.3465805870430148, + "grad_norm": 1.017850399017334, + "learning_rate": 7.594101092747344e-06, + "loss": 0.308, + "step": 17313 + }, + { + "epoch": 0.34660060556014316, + "grad_norm": 1.0625832080841064, + "learning_rate": 7.593823948635342e-06, + "loss": 0.3073, + "step": 17314 + }, + { + "epoch": 0.34662062407727146, + "grad_norm": 2.0175321102142334, + "learning_rate": 7.5935467936194615e-06, + "loss": 0.8533, + "step": 17315 + }, + { + "epoch": 0.3466406425943998, + "grad_norm": 1.9666283130645752, + "learning_rate": 7.59326962770087e-06, + "loss": 0.8144, + "step": 17316 + }, + { + "epoch": 0.34666066111152816, + "grad_norm": 1.2265307903289795, + "learning_rate": 7.592992450880731e-06, + "loss": 0.3189, + "step": 17317 + }, + { + "epoch": 0.3466806796286565, + "grad_norm": 1.0787204504013062, + "learning_rate": 7.592715263160209e-06, + "loss": 0.2702, + "step": 17318 + }, + { + "epoch": 0.34670069814578486, + "grad_norm": 1.1550323963165283, + "learning_rate": 7.592438064540468e-06, + "loss": 0.3327, + "step": 17319 + }, + { + "epoch": 0.3467207166629132, + "grad_norm": 1.7788723707199097, + "learning_rate": 7.592160855022679e-06, + "loss": 0.7959, + "step": 17320 + }, + { + "epoch": 0.34674073518004156, + "grad_norm": 1.312744379043579, + "learning_rate": 7.591883634608002e-06, + "loss": 0.3426, + "step": 17321 + }, + { + "epoch": 0.34676075369716985, + "grad_norm": 1.0915449857711792, + "learning_rate": 7.591606403297604e-06, + "loss": 0.3231, + "step": 17322 + }, + { + "epoch": 0.3467807722142982, + "grad_norm": 1.0457451343536377, + "learning_rate": 7.591329161092651e-06, + "loss": 0.3031, + "step": 17323 + }, + { + "epoch": 0.34680079073142656, + "grad_norm": 1.1947120428085327, + "learning_rate": 7.591051907994307e-06, + "loss": 0.3078, + "step": 17324 + }, + { + "epoch": 0.3468208092485549, + "grad_norm": 1.2467529773712158, + "learning_rate": 7.590774644003738e-06, + "loss": 0.3466, + "step": 17325 + }, + { + "epoch": 0.34684082776568326, + "grad_norm": 1.1674758195877075, + "learning_rate": 7.59049736912211e-06, + "loss": 0.3311, + "step": 17326 + }, + { + "epoch": 0.3468608462828116, + "grad_norm": 1.05015230178833, + "learning_rate": 7.59022008335059e-06, + "loss": 0.3177, + "step": 17327 + }, + { + "epoch": 0.34688086479993996, + "grad_norm": 1.0069438219070435, + "learning_rate": 7.589942786690341e-06, + "loss": 0.3307, + "step": 17328 + }, + { + "epoch": 0.3469008833170683, + "grad_norm": 1.172953724861145, + "learning_rate": 7.5896654791425296e-06, + "loss": 0.3381, + "step": 17329 + }, + { + "epoch": 0.3469209018341966, + "grad_norm": 1.1374201774597168, + "learning_rate": 7.589388160708322e-06, + "loss": 0.342, + "step": 17330 + }, + { + "epoch": 0.34694092035132496, + "grad_norm": 1.1930195093154907, + "learning_rate": 7.589110831388884e-06, + "loss": 0.3166, + "step": 17331 + }, + { + "epoch": 0.3469609388684533, + "grad_norm": 1.13201105594635, + "learning_rate": 7.588833491185382e-06, + "loss": 0.274, + "step": 17332 + }, + { + "epoch": 0.34698095738558166, + "grad_norm": 1.0973811149597168, + "learning_rate": 7.58855614009898e-06, + "loss": 0.3686, + "step": 17333 + }, + { + "epoch": 0.34700097590271, + "grad_norm": 1.0965131521224976, + "learning_rate": 7.588278778130847e-06, + "loss": 0.3174, + "step": 17334 + }, + { + "epoch": 0.34702099441983836, + "grad_norm": 1.076871633529663, + "learning_rate": 7.588001405282145e-06, + "loss": 0.2809, + "step": 17335 + }, + { + "epoch": 0.3470410129369667, + "grad_norm": 1.0444622039794922, + "learning_rate": 7.587724021554043e-06, + "loss": 0.2872, + "step": 17336 + }, + { + "epoch": 0.34706103145409506, + "grad_norm": 1.180808663368225, + "learning_rate": 7.587446626947705e-06, + "loss": 0.3192, + "step": 17337 + }, + { + "epoch": 0.34708104997122335, + "grad_norm": 1.0566155910491943, + "learning_rate": 7.587169221464299e-06, + "loss": 0.3252, + "step": 17338 + }, + { + "epoch": 0.3471010684883517, + "grad_norm": 1.1478983163833618, + "learning_rate": 7.58689180510499e-06, + "loss": 0.3124, + "step": 17339 + }, + { + "epoch": 0.34712108700548006, + "grad_norm": 1.1487619876861572, + "learning_rate": 7.586614377870944e-06, + "loss": 0.3453, + "step": 17340 + }, + { + "epoch": 0.3471411055226084, + "grad_norm": 1.2936521768569946, + "learning_rate": 7.58633693976333e-06, + "loss": 0.3016, + "step": 17341 + }, + { + "epoch": 0.34716112403973676, + "grad_norm": 1.0107266902923584, + "learning_rate": 7.586059490783311e-06, + "loss": 0.2795, + "step": 17342 + }, + { + "epoch": 0.3471811425568651, + "grad_norm": 1.8204354047775269, + "learning_rate": 7.585782030932054e-06, + "loss": 0.7834, + "step": 17343 + }, + { + "epoch": 0.34720116107399346, + "grad_norm": 1.977805733680725, + "learning_rate": 7.585504560210726e-06, + "loss": 0.8212, + "step": 17344 + }, + { + "epoch": 0.3472211795911218, + "grad_norm": 1.0772457122802734, + "learning_rate": 7.585227078620493e-06, + "loss": 0.3375, + "step": 17345 + }, + { + "epoch": 0.3472411981082501, + "grad_norm": 1.0062230825424194, + "learning_rate": 7.584949586162522e-06, + "loss": 0.3178, + "step": 17346 + }, + { + "epoch": 0.34726121662537845, + "grad_norm": 1.0249706506729126, + "learning_rate": 7.58467208283798e-06, + "loss": 0.312, + "step": 17347 + }, + { + "epoch": 0.3472812351425068, + "grad_norm": 1.0837920904159546, + "learning_rate": 7.5843945686480306e-06, + "loss": 0.2963, + "step": 17348 + }, + { + "epoch": 0.34730125365963516, + "grad_norm": 1.1413787603378296, + "learning_rate": 7.584117043593845e-06, + "loss": 0.3377, + "step": 17349 + }, + { + "epoch": 0.3473212721767635, + "grad_norm": 1.025084376335144, + "learning_rate": 7.583839507676586e-06, + "loss": 0.3321, + "step": 17350 + }, + { + "epoch": 0.34734129069389186, + "grad_norm": 1.0379348993301392, + "learning_rate": 7.583561960897421e-06, + "loss": 0.2837, + "step": 17351 + }, + { + "epoch": 0.3473613092110202, + "grad_norm": 1.1715766191482544, + "learning_rate": 7.58328440325752e-06, + "loss": 0.3339, + "step": 17352 + }, + { + "epoch": 0.34738132772814856, + "grad_norm": 1.1419843435287476, + "learning_rate": 7.5830068347580454e-06, + "loss": 0.2726, + "step": 17353 + }, + { + "epoch": 0.34740134624527685, + "grad_norm": 1.0356402397155762, + "learning_rate": 7.582729255400167e-06, + "loss": 0.3377, + "step": 17354 + }, + { + "epoch": 0.3474213647624052, + "grad_norm": 0.9820488095283508, + "learning_rate": 7.58245166518505e-06, + "loss": 0.3208, + "step": 17355 + }, + { + "epoch": 0.34744138327953356, + "grad_norm": 1.052971601486206, + "learning_rate": 7.5821740641138615e-06, + "loss": 0.3339, + "step": 17356 + }, + { + "epoch": 0.3474614017966619, + "grad_norm": 1.0361939668655396, + "learning_rate": 7.581896452187769e-06, + "loss": 0.3141, + "step": 17357 + }, + { + "epoch": 0.34748142031379026, + "grad_norm": 1.0425169467926025, + "learning_rate": 7.58161882940794e-06, + "loss": 0.2792, + "step": 17358 + }, + { + "epoch": 0.3475014388309186, + "grad_norm": 1.1326262950897217, + "learning_rate": 7.581341195775541e-06, + "loss": 0.357, + "step": 17359 + }, + { + "epoch": 0.34752145734804696, + "grad_norm": 1.0373493432998657, + "learning_rate": 7.581063551291741e-06, + "loss": 0.3082, + "step": 17360 + }, + { + "epoch": 0.3475414758651753, + "grad_norm": 1.3565528392791748, + "learning_rate": 7.580785895957702e-06, + "loss": 0.3037, + "step": 17361 + }, + { + "epoch": 0.3475614943823036, + "grad_norm": 1.1360080242156982, + "learning_rate": 7.580508229774597e-06, + "loss": 0.2746, + "step": 17362 + }, + { + "epoch": 0.34758151289943195, + "grad_norm": 1.0348445177078247, + "learning_rate": 7.580230552743588e-06, + "loss": 0.3072, + "step": 17363 + }, + { + "epoch": 0.3476015314165603, + "grad_norm": 1.1330196857452393, + "learning_rate": 7.579952864865847e-06, + "loss": 0.3149, + "step": 17364 + }, + { + "epoch": 0.34762154993368866, + "grad_norm": 1.1511708498001099, + "learning_rate": 7.579675166142539e-06, + "loss": 0.3228, + "step": 17365 + }, + { + "epoch": 0.347641568450817, + "grad_norm": 1.1058324575424194, + "learning_rate": 7.579397456574832e-06, + "loss": 0.3264, + "step": 17366 + }, + { + "epoch": 0.34766158696794536, + "grad_norm": 0.9611104726791382, + "learning_rate": 7.579119736163894e-06, + "loss": 0.2937, + "step": 17367 + }, + { + "epoch": 0.3476816054850737, + "grad_norm": 1.122310757637024, + "learning_rate": 7.578842004910889e-06, + "loss": 0.3596, + "step": 17368 + }, + { + "epoch": 0.34770162400220206, + "grad_norm": 1.061716914176941, + "learning_rate": 7.578564262816989e-06, + "loss": 0.3128, + "step": 17369 + }, + { + "epoch": 0.34772164251933035, + "grad_norm": 1.1689655780792236, + "learning_rate": 7.578286509883359e-06, + "loss": 0.3412, + "step": 17370 + }, + { + "epoch": 0.3477416610364587, + "grad_norm": 1.1721628904342651, + "learning_rate": 7.578008746111168e-06, + "loss": 0.3737, + "step": 17371 + }, + { + "epoch": 0.34776167955358706, + "grad_norm": 1.1953766345977783, + "learning_rate": 7.5777309715015825e-06, + "loss": 0.3407, + "step": 17372 + }, + { + "epoch": 0.3477816980707154, + "grad_norm": 1.0250290632247925, + "learning_rate": 7.577453186055772e-06, + "loss": 0.2728, + "step": 17373 + }, + { + "epoch": 0.34780171658784376, + "grad_norm": 1.1165586709976196, + "learning_rate": 7.577175389774903e-06, + "loss": 0.3307, + "step": 17374 + }, + { + "epoch": 0.3478217351049721, + "grad_norm": 1.0122660398483276, + "learning_rate": 7.576897582660142e-06, + "loss": 0.2829, + "step": 17375 + }, + { + "epoch": 0.34784175362210046, + "grad_norm": 1.0534441471099854, + "learning_rate": 7.5766197647126585e-06, + "loss": 0.3452, + "step": 17376 + }, + { + "epoch": 0.3478617721392288, + "grad_norm": 1.0232510566711426, + "learning_rate": 7.57634193593362e-06, + "loss": 0.3119, + "step": 17377 + }, + { + "epoch": 0.3478817906563571, + "grad_norm": 1.1401009559631348, + "learning_rate": 7.576064096324194e-06, + "loss": 0.3393, + "step": 17378 + }, + { + "epoch": 0.34790180917348545, + "grad_norm": 1.0657789707183838, + "learning_rate": 7.575786245885549e-06, + "loss": 0.2731, + "step": 17379 + }, + { + "epoch": 0.3479218276906138, + "grad_norm": 1.0161612033843994, + "learning_rate": 7.575508384618855e-06, + "loss": 0.2958, + "step": 17380 + }, + { + "epoch": 0.34794184620774216, + "grad_norm": 1.07626211643219, + "learning_rate": 7.575230512525275e-06, + "loss": 0.307, + "step": 17381 + }, + { + "epoch": 0.3479618647248705, + "grad_norm": 1.2623950242996216, + "learning_rate": 7.57495262960598e-06, + "loss": 0.3653, + "step": 17382 + }, + { + "epoch": 0.34798188324199886, + "grad_norm": 1.0165261030197144, + "learning_rate": 7.574674735862142e-06, + "loss": 0.2918, + "step": 17383 + }, + { + "epoch": 0.3480019017591272, + "grad_norm": 1.36518132686615, + "learning_rate": 7.574396831294922e-06, + "loss": 0.3449, + "step": 17384 + }, + { + "epoch": 0.34802192027625556, + "grad_norm": 1.0154519081115723, + "learning_rate": 7.574118915905493e-06, + "loss": 0.2697, + "step": 17385 + }, + { + "epoch": 0.34804193879338385, + "grad_norm": 1.0503522157669067, + "learning_rate": 7.5738409896950225e-06, + "loss": 0.3228, + "step": 17386 + }, + { + "epoch": 0.3480619573105122, + "grad_norm": 0.9871373772621155, + "learning_rate": 7.573563052664679e-06, + "loss": 0.2923, + "step": 17387 + }, + { + "epoch": 0.34808197582764056, + "grad_norm": 1.1107423305511475, + "learning_rate": 7.5732851048156285e-06, + "loss": 0.371, + "step": 17388 + }, + { + "epoch": 0.3481019943447689, + "grad_norm": 1.0596323013305664, + "learning_rate": 7.573007146149041e-06, + "loss": 0.3364, + "step": 17389 + }, + { + "epoch": 0.34812201286189726, + "grad_norm": 0.941699206829071, + "learning_rate": 7.572729176666085e-06, + "loss": 0.3285, + "step": 17390 + }, + { + "epoch": 0.3481420313790256, + "grad_norm": 1.1621965169906616, + "learning_rate": 7.572451196367931e-06, + "loss": 0.3481, + "step": 17391 + }, + { + "epoch": 0.34816204989615396, + "grad_norm": 1.0669804811477661, + "learning_rate": 7.572173205255744e-06, + "loss": 0.3636, + "step": 17392 + }, + { + "epoch": 0.3481820684132823, + "grad_norm": 1.0254783630371094, + "learning_rate": 7.571895203330697e-06, + "loss": 0.3121, + "step": 17393 + }, + { + "epoch": 0.3482020869304106, + "grad_norm": 1.1956719160079956, + "learning_rate": 7.571617190593954e-06, + "loss": 0.3371, + "step": 17394 + }, + { + "epoch": 0.34822210544753895, + "grad_norm": 1.148950219154358, + "learning_rate": 7.571339167046685e-06, + "loss": 0.332, + "step": 17395 + }, + { + "epoch": 0.3482421239646673, + "grad_norm": 1.143344521522522, + "learning_rate": 7.571061132690062e-06, + "loss": 0.307, + "step": 17396 + }, + { + "epoch": 0.34826214248179566, + "grad_norm": 1.2296888828277588, + "learning_rate": 7.570783087525248e-06, + "loss": 0.3197, + "step": 17397 + }, + { + "epoch": 0.348282160998924, + "grad_norm": 1.3329449892044067, + "learning_rate": 7.570505031553419e-06, + "loss": 0.2861, + "step": 17398 + }, + { + "epoch": 0.34830217951605236, + "grad_norm": 1.2036128044128418, + "learning_rate": 7.570226964775737e-06, + "loss": 0.335, + "step": 17399 + }, + { + "epoch": 0.3483221980331807, + "grad_norm": 1.1399251222610474, + "learning_rate": 7.569948887193377e-06, + "loss": 0.3098, + "step": 17400 + }, + { + "epoch": 0.34834221655030906, + "grad_norm": 1.1691701412200928, + "learning_rate": 7.569670798807503e-06, + "loss": 0.374, + "step": 17401 + }, + { + "epoch": 0.34836223506743735, + "grad_norm": 1.0084655284881592, + "learning_rate": 7.569392699619284e-06, + "loss": 0.2809, + "step": 17402 + }, + { + "epoch": 0.3483822535845657, + "grad_norm": 1.0762859582901, + "learning_rate": 7.569114589629893e-06, + "loss": 0.3501, + "step": 17403 + }, + { + "epoch": 0.34840227210169405, + "grad_norm": 1.0857291221618652, + "learning_rate": 7.568836468840497e-06, + "loss": 0.3331, + "step": 17404 + }, + { + "epoch": 0.3484222906188224, + "grad_norm": 1.0484684705734253, + "learning_rate": 7.5685583372522665e-06, + "loss": 0.284, + "step": 17405 + }, + { + "epoch": 0.34844230913595076, + "grad_norm": 1.068011999130249, + "learning_rate": 7.568280194866369e-06, + "loss": 0.2982, + "step": 17406 + }, + { + "epoch": 0.3484623276530791, + "grad_norm": 1.4972704648971558, + "learning_rate": 7.568002041683973e-06, + "loss": 0.3271, + "step": 17407 + }, + { + "epoch": 0.34848234617020746, + "grad_norm": 1.1233693361282349, + "learning_rate": 7.567723877706249e-06, + "loss": 0.344, + "step": 17408 + }, + { + "epoch": 0.3485023646873358, + "grad_norm": 1.3175421953201294, + "learning_rate": 7.567445702934368e-06, + "loss": 0.3138, + "step": 17409 + }, + { + "epoch": 0.3485223832044641, + "grad_norm": 1.1199510097503662, + "learning_rate": 7.567167517369496e-06, + "loss": 0.3241, + "step": 17410 + }, + { + "epoch": 0.34854240172159245, + "grad_norm": 1.1800944805145264, + "learning_rate": 7.5668893210128055e-06, + "loss": 0.3157, + "step": 17411 + }, + { + "epoch": 0.3485624202387208, + "grad_norm": 1.165250301361084, + "learning_rate": 7.566611113865464e-06, + "loss": 0.3103, + "step": 17412 + }, + { + "epoch": 0.34858243875584916, + "grad_norm": 1.8785781860351562, + "learning_rate": 7.566332895928643e-06, + "loss": 0.798, + "step": 17413 + }, + { + "epoch": 0.3486024572729775, + "grad_norm": 1.1167746782302856, + "learning_rate": 7.566054667203509e-06, + "loss": 0.3436, + "step": 17414 + }, + { + "epoch": 0.34862247579010586, + "grad_norm": 1.166551113128662, + "learning_rate": 7.565776427691234e-06, + "loss": 0.2724, + "step": 17415 + }, + { + "epoch": 0.3486424943072342, + "grad_norm": 1.1098021268844604, + "learning_rate": 7.565498177392988e-06, + "loss": 0.3251, + "step": 17416 + }, + { + "epoch": 0.34866251282436256, + "grad_norm": 0.9973650574684143, + "learning_rate": 7.5652199163099385e-06, + "loss": 0.3013, + "step": 17417 + }, + { + "epoch": 0.34868253134149085, + "grad_norm": 1.3411422967910767, + "learning_rate": 7.5649416444432575e-06, + "loss": 0.3647, + "step": 17418 + }, + { + "epoch": 0.3487025498586192, + "grad_norm": 1.1408041715621948, + "learning_rate": 7.564663361794113e-06, + "loss": 0.3512, + "step": 17419 + }, + { + "epoch": 0.34872256837574755, + "grad_norm": 1.0980980396270752, + "learning_rate": 7.564385068363677e-06, + "loss": 0.3443, + "step": 17420 + }, + { + "epoch": 0.3487425868928759, + "grad_norm": 1.0692534446716309, + "learning_rate": 7.564106764153116e-06, + "loss": 0.2973, + "step": 17421 + }, + { + "epoch": 0.34876260541000426, + "grad_norm": 1.2002640962600708, + "learning_rate": 7.563828449163604e-06, + "loss": 0.3379, + "step": 17422 + }, + { + "epoch": 0.3487826239271326, + "grad_norm": 1.1179534196853638, + "learning_rate": 7.563550123396307e-06, + "loss": 0.3683, + "step": 17423 + }, + { + "epoch": 0.34880264244426096, + "grad_norm": 1.1148334741592407, + "learning_rate": 7.563271786852399e-06, + "loss": 0.2818, + "step": 17424 + }, + { + "epoch": 0.3488226609613893, + "grad_norm": 1.2038183212280273, + "learning_rate": 7.562993439533047e-06, + "loss": 0.3828, + "step": 17425 + }, + { + "epoch": 0.3488426794785176, + "grad_norm": 1.1271519660949707, + "learning_rate": 7.5627150814394225e-06, + "loss": 0.3008, + "step": 17426 + }, + { + "epoch": 0.34886269799564595, + "grad_norm": 1.1755702495574951, + "learning_rate": 7.562436712572694e-06, + "loss": 0.3777, + "step": 17427 + }, + { + "epoch": 0.3488827165127743, + "grad_norm": 1.1068896055221558, + "learning_rate": 7.562158332934033e-06, + "loss": 0.3318, + "step": 17428 + }, + { + "epoch": 0.34890273502990266, + "grad_norm": 1.001263976097107, + "learning_rate": 7.561879942524611e-06, + "loss": 0.3361, + "step": 17429 + }, + { + "epoch": 0.348922753547031, + "grad_norm": 1.429344654083252, + "learning_rate": 7.561601541345596e-06, + "loss": 0.316, + "step": 17430 + }, + { + "epoch": 0.34894277206415936, + "grad_norm": 1.056419014930725, + "learning_rate": 7.561323129398162e-06, + "loss": 0.2947, + "step": 17431 + }, + { + "epoch": 0.3489627905812877, + "grad_norm": 1.2036393880844116, + "learning_rate": 7.561044706683474e-06, + "loss": 0.3264, + "step": 17432 + }, + { + "epoch": 0.34898280909841606, + "grad_norm": 1.025086760520935, + "learning_rate": 7.5607662732027065e-06, + "loss": 0.3061, + "step": 17433 + }, + { + "epoch": 0.34900282761554435, + "grad_norm": 1.0295977592468262, + "learning_rate": 7.560487828957027e-06, + "loss": 0.2987, + "step": 17434 + }, + { + "epoch": 0.3490228461326727, + "grad_norm": 1.2860628366470337, + "learning_rate": 7.560209373947609e-06, + "loss": 0.4102, + "step": 17435 + }, + { + "epoch": 0.34904286464980105, + "grad_norm": 1.1053186655044556, + "learning_rate": 7.559930908175623e-06, + "loss": 0.3231, + "step": 17436 + }, + { + "epoch": 0.3490628831669294, + "grad_norm": 1.0755780935287476, + "learning_rate": 7.559652431642237e-06, + "loss": 0.322, + "step": 17437 + }, + { + "epoch": 0.34908290168405776, + "grad_norm": 1.0821421146392822, + "learning_rate": 7.559373944348622e-06, + "loss": 0.2749, + "step": 17438 + }, + { + "epoch": 0.3491029202011861, + "grad_norm": 1.095370888710022, + "learning_rate": 7.5590954462959524e-06, + "loss": 0.318, + "step": 17439 + }, + { + "epoch": 0.34912293871831446, + "grad_norm": 1.0588396787643433, + "learning_rate": 7.558816937485393e-06, + "loss": 0.3627, + "step": 17440 + }, + { + "epoch": 0.3491429572354428, + "grad_norm": 1.209373950958252, + "learning_rate": 7.55853841791812e-06, + "loss": 0.2848, + "step": 17441 + }, + { + "epoch": 0.3491629757525711, + "grad_norm": 1.112347960472107, + "learning_rate": 7.5582598875953015e-06, + "loss": 0.3047, + "step": 17442 + }, + { + "epoch": 0.34918299426969945, + "grad_norm": 1.1382207870483398, + "learning_rate": 7.557981346518108e-06, + "loss": 0.3226, + "step": 17443 + }, + { + "epoch": 0.3492030127868278, + "grad_norm": 1.822035312652588, + "learning_rate": 7.557702794687713e-06, + "loss": 0.7831, + "step": 17444 + }, + { + "epoch": 0.34922303130395616, + "grad_norm": 1.1465096473693848, + "learning_rate": 7.557424232105286e-06, + "loss": 0.2862, + "step": 17445 + }, + { + "epoch": 0.3492430498210845, + "grad_norm": 1.3379265069961548, + "learning_rate": 7.557145658771998e-06, + "loss": 0.3049, + "step": 17446 + }, + { + "epoch": 0.34926306833821286, + "grad_norm": 1.6477923393249512, + "learning_rate": 7.556867074689018e-06, + "loss": 0.8157, + "step": 17447 + }, + { + "epoch": 0.3492830868553412, + "grad_norm": 1.1092066764831543, + "learning_rate": 7.556588479857521e-06, + "loss": 0.3256, + "step": 17448 + }, + { + "epoch": 0.34930310537246956, + "grad_norm": 1.2009990215301514, + "learning_rate": 7.5563098742786755e-06, + "loss": 0.3104, + "step": 17449 + }, + { + "epoch": 0.34932312388959785, + "grad_norm": 1.082364559173584, + "learning_rate": 7.556031257953654e-06, + "loss": 0.3131, + "step": 17450 + }, + { + "epoch": 0.3493431424067262, + "grad_norm": 1.857686161994934, + "learning_rate": 7.555752630883626e-06, + "loss": 0.7729, + "step": 17451 + }, + { + "epoch": 0.34936316092385455, + "grad_norm": 1.146471619606018, + "learning_rate": 7.555473993069764e-06, + "loss": 0.3125, + "step": 17452 + }, + { + "epoch": 0.3493831794409829, + "grad_norm": 1.0521512031555176, + "learning_rate": 7.5551953445132394e-06, + "loss": 0.3353, + "step": 17453 + }, + { + "epoch": 0.34940319795811126, + "grad_norm": 1.1609255075454712, + "learning_rate": 7.554916685215223e-06, + "loss": 0.2883, + "step": 17454 + }, + { + "epoch": 0.3494232164752396, + "grad_norm": 1.0781809091567993, + "learning_rate": 7.554638015176889e-06, + "loss": 0.281, + "step": 17455 + }, + { + "epoch": 0.34944323499236796, + "grad_norm": 0.9936699271202087, + "learning_rate": 7.554359334399406e-06, + "loss": 0.2947, + "step": 17456 + }, + { + "epoch": 0.3494632535094963, + "grad_norm": 1.2071335315704346, + "learning_rate": 7.554080642883944e-06, + "loss": 0.3114, + "step": 17457 + }, + { + "epoch": 0.3494832720266246, + "grad_norm": 1.0453332662582397, + "learning_rate": 7.5538019406316785e-06, + "loss": 0.3137, + "step": 17458 + }, + { + "epoch": 0.34950329054375295, + "grad_norm": 1.0454542636871338, + "learning_rate": 7.5535232276437775e-06, + "loss": 0.3183, + "step": 17459 + }, + { + "epoch": 0.3495233090608813, + "grad_norm": 1.1369986534118652, + "learning_rate": 7.553244503921415e-06, + "loss": 0.3309, + "step": 17460 + }, + { + "epoch": 0.34954332757800965, + "grad_norm": 1.2074698209762573, + "learning_rate": 7.552965769465763e-06, + "loss": 0.324, + "step": 17461 + }, + { + "epoch": 0.349563346095138, + "grad_norm": 1.8014729022979736, + "learning_rate": 7.552687024277991e-06, + "loss": 0.8189, + "step": 17462 + }, + { + "epoch": 0.34958336461226636, + "grad_norm": 1.2143168449401855, + "learning_rate": 7.552408268359274e-06, + "loss": 0.3361, + "step": 17463 + }, + { + "epoch": 0.3496033831293947, + "grad_norm": 1.1588722467422485, + "learning_rate": 7.552129501710781e-06, + "loss": 0.2953, + "step": 17464 + }, + { + "epoch": 0.34962340164652306, + "grad_norm": 1.0568139553070068, + "learning_rate": 7.5518507243336844e-06, + "loss": 0.3255, + "step": 17465 + }, + { + "epoch": 0.34964342016365135, + "grad_norm": 1.1050515174865723, + "learning_rate": 7.551571936229156e-06, + "loss": 0.2915, + "step": 17466 + }, + { + "epoch": 0.3496634386807797, + "grad_norm": 1.0703260898590088, + "learning_rate": 7.55129313739837e-06, + "loss": 0.3636, + "step": 17467 + }, + { + "epoch": 0.34968345719790805, + "grad_norm": 1.0924739837646484, + "learning_rate": 7.551014327842494e-06, + "loss": 0.3043, + "step": 17468 + }, + { + "epoch": 0.3497034757150364, + "grad_norm": 2.1342670917510986, + "learning_rate": 7.550735507562706e-06, + "loss": 0.8432, + "step": 17469 + }, + { + "epoch": 0.34972349423216476, + "grad_norm": 1.1483714580535889, + "learning_rate": 7.550456676560172e-06, + "loss": 0.3491, + "step": 17470 + }, + { + "epoch": 0.3497435127492931, + "grad_norm": 1.9323410987854004, + "learning_rate": 7.55017783483607e-06, + "loss": 0.7713, + "step": 17471 + }, + { + "epoch": 0.34976353126642146, + "grad_norm": 1.1535061597824097, + "learning_rate": 7.549898982391566e-06, + "loss": 0.342, + "step": 17472 + }, + { + "epoch": 0.3497835497835498, + "grad_norm": 1.2527409791946411, + "learning_rate": 7.549620119227836e-06, + "loss": 0.3216, + "step": 17473 + }, + { + "epoch": 0.3498035683006781, + "grad_norm": 1.8191195726394653, + "learning_rate": 7.549341245346054e-06, + "loss": 0.8235, + "step": 17474 + }, + { + "epoch": 0.34982358681780645, + "grad_norm": 1.2433607578277588, + "learning_rate": 7.549062360747388e-06, + "loss": 0.3073, + "step": 17475 + }, + { + "epoch": 0.3498436053349348, + "grad_norm": 1.184632420539856, + "learning_rate": 7.548783465433013e-06, + "loss": 0.3173, + "step": 17476 + }, + { + "epoch": 0.34986362385206315, + "grad_norm": 1.0749444961547852, + "learning_rate": 7.5485045594041016e-06, + "loss": 0.3181, + "step": 17477 + }, + { + "epoch": 0.3498836423691915, + "grad_norm": 1.144972562789917, + "learning_rate": 7.548225642661824e-06, + "loss": 0.3467, + "step": 17478 + }, + { + "epoch": 0.34990366088631986, + "grad_norm": 1.0923820734024048, + "learning_rate": 7.547946715207355e-06, + "loss": 0.2871, + "step": 17479 + }, + { + "epoch": 0.3499236794034482, + "grad_norm": 1.0899523496627808, + "learning_rate": 7.547667777041866e-06, + "loss": 0.4017, + "step": 17480 + }, + { + "epoch": 0.34994369792057656, + "grad_norm": 1.1249606609344482, + "learning_rate": 7.54738882816653e-06, + "loss": 0.3164, + "step": 17481 + }, + { + "epoch": 0.34996371643770485, + "grad_norm": 1.1465303897857666, + "learning_rate": 7.547109868582521e-06, + "loss": 0.3431, + "step": 17482 + }, + { + "epoch": 0.3499837349548332, + "grad_norm": 1.031313419342041, + "learning_rate": 7.546830898291008e-06, + "loss": 0.3173, + "step": 17483 + }, + { + "epoch": 0.35000375347196155, + "grad_norm": 2.024439811706543, + "learning_rate": 7.546551917293168e-06, + "loss": 0.7811, + "step": 17484 + }, + { + "epoch": 0.3500237719890899, + "grad_norm": 1.1958187818527222, + "learning_rate": 7.54627292559017e-06, + "loss": 0.3375, + "step": 17485 + }, + { + "epoch": 0.35004379050621826, + "grad_norm": 1.9213557243347168, + "learning_rate": 7.545993923183188e-06, + "loss": 0.7987, + "step": 17486 + }, + { + "epoch": 0.3500638090233466, + "grad_norm": 0.9995277523994446, + "learning_rate": 7.545714910073398e-06, + "loss": 0.3453, + "step": 17487 + }, + { + "epoch": 0.35008382754047496, + "grad_norm": 1.0543365478515625, + "learning_rate": 7.545435886261968e-06, + "loss": 0.3049, + "step": 17488 + }, + { + "epoch": 0.3501038460576033, + "grad_norm": 1.0263993740081787, + "learning_rate": 7.545156851750075e-06, + "loss": 0.3254, + "step": 17489 + }, + { + "epoch": 0.3501238645747316, + "grad_norm": 1.0437560081481934, + "learning_rate": 7.54487780653889e-06, + "loss": 0.2876, + "step": 17490 + }, + { + "epoch": 0.35014388309185995, + "grad_norm": 1.0815283060073853, + "learning_rate": 7.544598750629586e-06, + "loss": 0.344, + "step": 17491 + }, + { + "epoch": 0.3501639016089883, + "grad_norm": 1.1103228330612183, + "learning_rate": 7.5443196840233355e-06, + "loss": 0.3054, + "step": 17492 + }, + { + "epoch": 0.35018392012611665, + "grad_norm": 1.1392792463302612, + "learning_rate": 7.544040606721315e-06, + "loss": 0.3388, + "step": 17493 + }, + { + "epoch": 0.350203938643245, + "grad_norm": 1.946069598197937, + "learning_rate": 7.543761518724693e-06, + "loss": 0.7775, + "step": 17494 + }, + { + "epoch": 0.35022395716037336, + "grad_norm": 1.1172852516174316, + "learning_rate": 7.543482420034647e-06, + "loss": 0.3188, + "step": 17495 + }, + { + "epoch": 0.3502439756775017, + "grad_norm": 1.8941116333007812, + "learning_rate": 7.543203310652346e-06, + "loss": 0.8233, + "step": 17496 + }, + { + "epoch": 0.35026399419463006, + "grad_norm": 0.9483193159103394, + "learning_rate": 7.5429241905789675e-06, + "loss": 0.2882, + "step": 17497 + }, + { + "epoch": 0.35028401271175835, + "grad_norm": 1.0738555192947388, + "learning_rate": 7.5426450598156805e-06, + "loss": 0.3525, + "step": 17498 + }, + { + "epoch": 0.3503040312288867, + "grad_norm": 1.1305826902389526, + "learning_rate": 7.542365918363664e-06, + "loss": 0.3414, + "step": 17499 + }, + { + "epoch": 0.35032404974601505, + "grad_norm": 1.029841423034668, + "learning_rate": 7.542086766224086e-06, + "loss": 0.3286, + "step": 17500 + }, + { + "epoch": 0.3503440682631434, + "grad_norm": 1.0691648721694946, + "learning_rate": 7.541807603398123e-06, + "loss": 0.3116, + "step": 17501 + }, + { + "epoch": 0.35036408678027176, + "grad_norm": 1.1162232160568237, + "learning_rate": 7.541528429886949e-06, + "loss": 0.3598, + "step": 17502 + }, + { + "epoch": 0.3503841052974001, + "grad_norm": 1.0695027112960815, + "learning_rate": 7.541249245691735e-06, + "loss": 0.3419, + "step": 17503 + }, + { + "epoch": 0.35040412381452846, + "grad_norm": 1.136061668395996, + "learning_rate": 7.540970050813656e-06, + "loss": 0.302, + "step": 17504 + }, + { + "epoch": 0.3504241423316568, + "grad_norm": 1.1611398458480835, + "learning_rate": 7.5406908452538865e-06, + "loss": 0.3136, + "step": 17505 + }, + { + "epoch": 0.3504441608487851, + "grad_norm": 0.9861056208610535, + "learning_rate": 7.5404116290135995e-06, + "loss": 0.3007, + "step": 17506 + }, + { + "epoch": 0.35046417936591345, + "grad_norm": 1.2040953636169434, + "learning_rate": 7.540132402093968e-06, + "loss": 0.4043, + "step": 17507 + }, + { + "epoch": 0.3504841978830418, + "grad_norm": 1.0753564834594727, + "learning_rate": 7.539853164496167e-06, + "loss": 0.3251, + "step": 17508 + }, + { + "epoch": 0.35050421640017015, + "grad_norm": 1.2580736875534058, + "learning_rate": 7.53957391622137e-06, + "loss": 0.3289, + "step": 17509 + }, + { + "epoch": 0.3505242349172985, + "grad_norm": 1.018180012702942, + "learning_rate": 7.53929465727075e-06, + "loss": 0.3075, + "step": 17510 + }, + { + "epoch": 0.35054425343442686, + "grad_norm": 1.9895473718643188, + "learning_rate": 7.539015387645483e-06, + "loss": 0.7584, + "step": 17511 + }, + { + "epoch": 0.3505642719515552, + "grad_norm": 1.2414319515228271, + "learning_rate": 7.53873610734674e-06, + "loss": 0.2964, + "step": 17512 + }, + { + "epoch": 0.35058429046868356, + "grad_norm": 1.9230785369873047, + "learning_rate": 7.538456816375699e-06, + "loss": 0.7815, + "step": 17513 + }, + { + "epoch": 0.35060430898581185, + "grad_norm": 1.1115970611572266, + "learning_rate": 7.53817751473353e-06, + "loss": 0.3215, + "step": 17514 + }, + { + "epoch": 0.3506243275029402, + "grad_norm": 1.1249982118606567, + "learning_rate": 7.537898202421409e-06, + "loss": 0.3215, + "step": 17515 + }, + { + "epoch": 0.35064434602006855, + "grad_norm": 1.9509141445159912, + "learning_rate": 7.537618879440511e-06, + "loss": 0.7853, + "step": 17516 + }, + { + "epoch": 0.3506643645371969, + "grad_norm": 2.1127889156341553, + "learning_rate": 7.5373395457920084e-06, + "loss": 0.7783, + "step": 17517 + }, + { + "epoch": 0.35068438305432525, + "grad_norm": 1.1263659000396729, + "learning_rate": 7.537060201477078e-06, + "loss": 0.3517, + "step": 17518 + }, + { + "epoch": 0.3507044015714536, + "grad_norm": 1.0728845596313477, + "learning_rate": 7.536780846496891e-06, + "loss": 0.2754, + "step": 17519 + }, + { + "epoch": 0.35072442008858196, + "grad_norm": 1.08003568649292, + "learning_rate": 7.536501480852625e-06, + "loss": 0.341, + "step": 17520 + }, + { + "epoch": 0.3507444386057103, + "grad_norm": 1.13763427734375, + "learning_rate": 7.53622210454545e-06, + "loss": 0.3368, + "step": 17521 + }, + { + "epoch": 0.3507644571228386, + "grad_norm": 1.3142813444137573, + "learning_rate": 7.535942717576546e-06, + "loss": 0.3039, + "step": 17522 + }, + { + "epoch": 0.35078447563996695, + "grad_norm": 1.1107922792434692, + "learning_rate": 7.535663319947082e-06, + "loss": 0.3418, + "step": 17523 + }, + { + "epoch": 0.3508044941570953, + "grad_norm": 1.134842038154602, + "learning_rate": 7.535383911658236e-06, + "loss": 0.3444, + "step": 17524 + }, + { + "epoch": 0.35082451267422365, + "grad_norm": 1.0496736764907837, + "learning_rate": 7.5351044927111806e-06, + "loss": 0.3141, + "step": 17525 + }, + { + "epoch": 0.350844531191352, + "grad_norm": 1.0579721927642822, + "learning_rate": 7.534825063107094e-06, + "loss": 0.3288, + "step": 17526 + }, + { + "epoch": 0.35086454970848036, + "grad_norm": 1.8864624500274658, + "learning_rate": 7.534545622847146e-06, + "loss": 0.8749, + "step": 17527 + }, + { + "epoch": 0.3508845682256087, + "grad_norm": 1.1512556076049805, + "learning_rate": 7.534266171932515e-06, + "loss": 0.2794, + "step": 17528 + }, + { + "epoch": 0.35090458674273706, + "grad_norm": 1.2230956554412842, + "learning_rate": 7.533986710364374e-06, + "loss": 0.3178, + "step": 17529 + }, + { + "epoch": 0.35092460525986535, + "grad_norm": 1.2140278816223145, + "learning_rate": 7.533707238143897e-06, + "loss": 0.3011, + "step": 17530 + }, + { + "epoch": 0.3509446237769937, + "grad_norm": 1.9582959413528442, + "learning_rate": 7.533427755272262e-06, + "loss": 0.8021, + "step": 17531 + }, + { + "epoch": 0.35096464229412205, + "grad_norm": 1.13774573802948, + "learning_rate": 7.53314826175064e-06, + "loss": 0.3266, + "step": 17532 + }, + { + "epoch": 0.3509846608112504, + "grad_norm": 1.1170562505722046, + "learning_rate": 7.5328687575802095e-06, + "loss": 0.335, + "step": 17533 + }, + { + "epoch": 0.35100467932837875, + "grad_norm": 1.180586576461792, + "learning_rate": 7.532589242762143e-06, + "loss": 0.3373, + "step": 17534 + }, + { + "epoch": 0.3510246978455071, + "grad_norm": 1.0928010940551758, + "learning_rate": 7.532309717297618e-06, + "loss": 0.3131, + "step": 17535 + }, + { + "epoch": 0.35104471636263546, + "grad_norm": 1.083670735359192, + "learning_rate": 7.532030181187806e-06, + "loss": 0.3051, + "step": 17536 + }, + { + "epoch": 0.3510647348797638, + "grad_norm": 1.969597339630127, + "learning_rate": 7.531750634433884e-06, + "loss": 0.7991, + "step": 17537 + }, + { + "epoch": 0.3510847533968921, + "grad_norm": 1.548280954360962, + "learning_rate": 7.531471077037027e-06, + "loss": 0.3552, + "step": 17538 + }, + { + "epoch": 0.35110477191402045, + "grad_norm": 1.081891417503357, + "learning_rate": 7.531191508998412e-06, + "loss": 0.3514, + "step": 17539 + }, + { + "epoch": 0.3511247904311488, + "grad_norm": 1.1477105617523193, + "learning_rate": 7.530911930319212e-06, + "loss": 0.3564, + "step": 17540 + }, + { + "epoch": 0.35114480894827715, + "grad_norm": 1.15940523147583, + "learning_rate": 7.530632341000602e-06, + "loss": 0.3509, + "step": 17541 + }, + { + "epoch": 0.3511648274654055, + "grad_norm": 1.163061499595642, + "learning_rate": 7.530352741043758e-06, + "loss": 0.3642, + "step": 17542 + }, + { + "epoch": 0.35118484598253386, + "grad_norm": 1.2749847173690796, + "learning_rate": 7.530073130449857e-06, + "loss": 0.3554, + "step": 17543 + }, + { + "epoch": 0.3512048644996622, + "grad_norm": 1.1389784812927246, + "learning_rate": 7.529793509220074e-06, + "loss": 0.3167, + "step": 17544 + }, + { + "epoch": 0.35122488301679056, + "grad_norm": 1.2868667840957642, + "learning_rate": 7.529513877355581e-06, + "loss": 0.3399, + "step": 17545 + }, + { + "epoch": 0.35124490153391885, + "grad_norm": 1.1421302556991577, + "learning_rate": 7.529234234857557e-06, + "loss": 0.3468, + "step": 17546 + }, + { + "epoch": 0.3512649200510472, + "grad_norm": 1.1459518671035767, + "learning_rate": 7.528954581727177e-06, + "loss": 0.3306, + "step": 17547 + }, + { + "epoch": 0.35128493856817555, + "grad_norm": 1.258082389831543, + "learning_rate": 7.528674917965615e-06, + "loss": 0.2924, + "step": 17548 + }, + { + "epoch": 0.3513049570853039, + "grad_norm": 1.0336753129959106, + "learning_rate": 7.528395243574049e-06, + "loss": 0.3026, + "step": 17549 + }, + { + "epoch": 0.35132497560243225, + "grad_norm": 1.2400177717208862, + "learning_rate": 7.528115558553653e-06, + "loss": 0.3154, + "step": 17550 + }, + { + "epoch": 0.3513449941195606, + "grad_norm": 1.0531784296035767, + "learning_rate": 7.527835862905604e-06, + "loss": 0.3254, + "step": 17551 + }, + { + "epoch": 0.35136501263668896, + "grad_norm": 0.9961960911750793, + "learning_rate": 7.5275561566310755e-06, + "loss": 0.2725, + "step": 17552 + }, + { + "epoch": 0.3513850311538173, + "grad_norm": 1.1310728788375854, + "learning_rate": 7.527276439731246e-06, + "loss": 0.35, + "step": 17553 + }, + { + "epoch": 0.3514050496709456, + "grad_norm": 1.1720203161239624, + "learning_rate": 7.526996712207289e-06, + "loss": 0.3269, + "step": 17554 + }, + { + "epoch": 0.35142506818807395, + "grad_norm": 1.7664068937301636, + "learning_rate": 7.526716974060383e-06, + "loss": 0.8354, + "step": 17555 + }, + { + "epoch": 0.3514450867052023, + "grad_norm": 1.0102996826171875, + "learning_rate": 7.5264372252917015e-06, + "loss": 0.327, + "step": 17556 + }, + { + "epoch": 0.35146510522233065, + "grad_norm": 1.654292106628418, + "learning_rate": 7.526157465902423e-06, + "loss": 0.8419, + "step": 17557 + }, + { + "epoch": 0.351485123739459, + "grad_norm": 1.8709973096847534, + "learning_rate": 7.525877695893721e-06, + "loss": 0.8928, + "step": 17558 + }, + { + "epoch": 0.35150514225658736, + "grad_norm": 1.220804214477539, + "learning_rate": 7.525597915266774e-06, + "loss": 0.3281, + "step": 17559 + }, + { + "epoch": 0.3515251607737157, + "grad_norm": 1.1562837362289429, + "learning_rate": 7.525318124022755e-06, + "loss": 0.3177, + "step": 17560 + }, + { + "epoch": 0.35154517929084406, + "grad_norm": 1.1399991512298584, + "learning_rate": 7.525038322162843e-06, + "loss": 0.3509, + "step": 17561 + }, + { + "epoch": 0.35156519780797235, + "grad_norm": 1.1464755535125732, + "learning_rate": 7.524758509688214e-06, + "loss": 0.2983, + "step": 17562 + }, + { + "epoch": 0.3515852163251007, + "grad_norm": 1.398148775100708, + "learning_rate": 7.524478686600043e-06, + "loss": 0.3253, + "step": 17563 + }, + { + "epoch": 0.35160523484222905, + "grad_norm": 0.926957905292511, + "learning_rate": 7.524198852899507e-06, + "loss": 0.2838, + "step": 17564 + }, + { + "epoch": 0.3516252533593574, + "grad_norm": 1.009337067604065, + "learning_rate": 7.523919008587781e-06, + "loss": 0.3086, + "step": 17565 + }, + { + "epoch": 0.35164527187648575, + "grad_norm": 1.1197032928466797, + "learning_rate": 7.523639153666044e-06, + "loss": 0.331, + "step": 17566 + }, + { + "epoch": 0.3516652903936141, + "grad_norm": 1.1490358114242554, + "learning_rate": 7.5233592881354714e-06, + "loss": 0.3536, + "step": 17567 + }, + { + "epoch": 0.35168530891074246, + "grad_norm": 1.000718116760254, + "learning_rate": 7.523079411997238e-06, + "loss": 0.2975, + "step": 17568 + }, + { + "epoch": 0.3517053274278708, + "grad_norm": 1.0288747549057007, + "learning_rate": 7.522799525252522e-06, + "loss": 0.3291, + "step": 17569 + }, + { + "epoch": 0.3517253459449991, + "grad_norm": 1.101529598236084, + "learning_rate": 7.522519627902501e-06, + "loss": 0.318, + "step": 17570 + }, + { + "epoch": 0.35174536446212745, + "grad_norm": 2.0153279304504395, + "learning_rate": 7.52223971994835e-06, + "loss": 0.8101, + "step": 17571 + }, + { + "epoch": 0.3517653829792558, + "grad_norm": 1.037489891052246, + "learning_rate": 7.521959801391246e-06, + "loss": 0.3558, + "step": 17572 + }, + { + "epoch": 0.35178540149638415, + "grad_norm": 1.0161564350128174, + "learning_rate": 7.521679872232366e-06, + "loss": 0.335, + "step": 17573 + }, + { + "epoch": 0.3518054200135125, + "grad_norm": 1.141257643699646, + "learning_rate": 7.521399932472884e-06, + "loss": 0.2885, + "step": 17574 + }, + { + "epoch": 0.35182543853064085, + "grad_norm": 1.038742184638977, + "learning_rate": 7.5211199821139816e-06, + "loss": 0.3228, + "step": 17575 + }, + { + "epoch": 0.3518454570477692, + "grad_norm": 1.0233732461929321, + "learning_rate": 7.520840021156832e-06, + "loss": 0.3093, + "step": 17576 + }, + { + "epoch": 0.35186547556489756, + "grad_norm": 1.2195262908935547, + "learning_rate": 7.520560049602615e-06, + "loss": 0.3324, + "step": 17577 + }, + { + "epoch": 0.35188549408202585, + "grad_norm": 1.1392390727996826, + "learning_rate": 7.520280067452504e-06, + "loss": 0.2961, + "step": 17578 + }, + { + "epoch": 0.3519055125991542, + "grad_norm": 1.1026772260665894, + "learning_rate": 7.520000074707681e-06, + "loss": 0.3032, + "step": 17579 + }, + { + "epoch": 0.35192553111628255, + "grad_norm": 1.8559175729751587, + "learning_rate": 7.519720071369317e-06, + "loss": 0.8175, + "step": 17580 + }, + { + "epoch": 0.3519455496334109, + "grad_norm": 1.1206079721450806, + "learning_rate": 7.519440057438593e-06, + "loss": 0.2998, + "step": 17581 + }, + { + "epoch": 0.35196556815053925, + "grad_norm": 1.795249581336975, + "learning_rate": 7.519160032916686e-06, + "loss": 0.7901, + "step": 17582 + }, + { + "epoch": 0.3519855866676676, + "grad_norm": 1.0894272327423096, + "learning_rate": 7.518879997804771e-06, + "loss": 0.2823, + "step": 17583 + }, + { + "epoch": 0.35200560518479596, + "grad_norm": 1.1619294881820679, + "learning_rate": 7.5185999521040285e-06, + "loss": 0.3196, + "step": 17584 + }, + { + "epoch": 0.3520256237019243, + "grad_norm": 1.2544306516647339, + "learning_rate": 7.518319895815632e-06, + "loss": 0.3172, + "step": 17585 + }, + { + "epoch": 0.3520456422190526, + "grad_norm": 1.1258238554000854, + "learning_rate": 7.518039828940762e-06, + "loss": 0.3248, + "step": 17586 + }, + { + "epoch": 0.35206566073618095, + "grad_norm": 1.1843408346176147, + "learning_rate": 7.517759751480593e-06, + "loss": 0.3064, + "step": 17587 + }, + { + "epoch": 0.3520856792533093, + "grad_norm": 1.2539982795715332, + "learning_rate": 7.517479663436304e-06, + "loss": 0.2887, + "step": 17588 + }, + { + "epoch": 0.35210569777043765, + "grad_norm": 1.972093939781189, + "learning_rate": 7.517199564809073e-06, + "loss": 0.8143, + "step": 17589 + }, + { + "epoch": 0.352125716287566, + "grad_norm": 0.9614388346672058, + "learning_rate": 7.516919455600078e-06, + "loss": 0.3237, + "step": 17590 + }, + { + "epoch": 0.35214573480469435, + "grad_norm": 1.1489965915679932, + "learning_rate": 7.516639335810493e-06, + "loss": 0.3012, + "step": 17591 + }, + { + "epoch": 0.3521657533218227, + "grad_norm": 1.1922008991241455, + "learning_rate": 7.516359205441499e-06, + "loss": 0.3572, + "step": 17592 + }, + { + "epoch": 0.35218577183895106, + "grad_norm": 1.1111723184585571, + "learning_rate": 7.516079064494273e-06, + "loss": 0.3387, + "step": 17593 + }, + { + "epoch": 0.35220579035607935, + "grad_norm": 1.086371660232544, + "learning_rate": 7.5157989129699895e-06, + "loss": 0.3259, + "step": 17594 + }, + { + "epoch": 0.3522258088732077, + "grad_norm": 1.09042227268219, + "learning_rate": 7.51551875086983e-06, + "loss": 0.2777, + "step": 17595 + }, + { + "epoch": 0.35224582739033605, + "grad_norm": 1.177578330039978, + "learning_rate": 7.515238578194972e-06, + "loss": 0.333, + "step": 17596 + }, + { + "epoch": 0.3522658459074644, + "grad_norm": 1.2246036529541016, + "learning_rate": 7.514958394946592e-06, + "loss": 0.3421, + "step": 17597 + }, + { + "epoch": 0.35228586442459275, + "grad_norm": 1.2054107189178467, + "learning_rate": 7.514678201125867e-06, + "loss": 0.323, + "step": 17598 + }, + { + "epoch": 0.3523058829417211, + "grad_norm": 1.049576997756958, + "learning_rate": 7.514397996733977e-06, + "loss": 0.2991, + "step": 17599 + }, + { + "epoch": 0.35232590145884946, + "grad_norm": 1.103285789489746, + "learning_rate": 7.514117781772099e-06, + "loss": 0.3391, + "step": 17600 + }, + { + "epoch": 0.3523459199759778, + "grad_norm": 1.1259827613830566, + "learning_rate": 7.51383755624141e-06, + "loss": 0.3027, + "step": 17601 + }, + { + "epoch": 0.3523659384931061, + "grad_norm": 1.105078101158142, + "learning_rate": 7.51355732014309e-06, + "loss": 0.3455, + "step": 17602 + }, + { + "epoch": 0.35238595701023445, + "grad_norm": 1.8196473121643066, + "learning_rate": 7.513277073478316e-06, + "loss": 0.8109, + "step": 17603 + }, + { + "epoch": 0.3524059755273628, + "grad_norm": 1.0043588876724243, + "learning_rate": 7.5129968162482656e-06, + "loss": 0.2527, + "step": 17604 + }, + { + "epoch": 0.35242599404449115, + "grad_norm": 1.1663851737976074, + "learning_rate": 7.512716548454118e-06, + "loss": 0.2966, + "step": 17605 + }, + { + "epoch": 0.3524460125616195, + "grad_norm": 1.1366565227508545, + "learning_rate": 7.5124362700970496e-06, + "loss": 0.3329, + "step": 17606 + }, + { + "epoch": 0.35246603107874785, + "grad_norm": 1.3580883741378784, + "learning_rate": 7.51215598117824e-06, + "loss": 0.3634, + "step": 17607 + }, + { + "epoch": 0.3524860495958762, + "grad_norm": 1.2173504829406738, + "learning_rate": 7.5118756816988685e-06, + "loss": 0.3037, + "step": 17608 + }, + { + "epoch": 0.35250606811300456, + "grad_norm": 1.313390851020813, + "learning_rate": 7.51159537166011e-06, + "loss": 0.332, + "step": 17609 + }, + { + "epoch": 0.35252608663013285, + "grad_norm": 1.1131824254989624, + "learning_rate": 7.511315051063147e-06, + "loss": 0.3279, + "step": 17610 + }, + { + "epoch": 0.3525461051472612, + "grad_norm": 1.096817970275879, + "learning_rate": 7.511034719909155e-06, + "loss": 0.3269, + "step": 17611 + }, + { + "epoch": 0.35256612366438955, + "grad_norm": 1.0580167770385742, + "learning_rate": 7.510754378199314e-06, + "loss": 0.3191, + "step": 17612 + }, + { + "epoch": 0.3525861421815179, + "grad_norm": 1.074396014213562, + "learning_rate": 7.510474025934802e-06, + "loss": 0.3654, + "step": 17613 + }, + { + "epoch": 0.35260616069864625, + "grad_norm": 1.0847665071487427, + "learning_rate": 7.510193663116796e-06, + "loss": 0.2961, + "step": 17614 + }, + { + "epoch": 0.3526261792157746, + "grad_norm": 1.852533221244812, + "learning_rate": 7.5099132897464775e-06, + "loss": 0.7911, + "step": 17615 + }, + { + "epoch": 0.35264619773290296, + "grad_norm": 1.0822457075119019, + "learning_rate": 7.509632905825024e-06, + "loss": 0.3114, + "step": 17616 + }, + { + "epoch": 0.3526662162500313, + "grad_norm": 1.1545878648757935, + "learning_rate": 7.509352511353614e-06, + "loss": 0.2846, + "step": 17617 + }, + { + "epoch": 0.3526862347671596, + "grad_norm": 1.1089085340499878, + "learning_rate": 7.5090721063334236e-06, + "loss": 0.3399, + "step": 17618 + }, + { + "epoch": 0.35270625328428795, + "grad_norm": 1.8796846866607666, + "learning_rate": 7.5087916907656375e-06, + "loss": 0.8106, + "step": 17619 + }, + { + "epoch": 0.3527262718014163, + "grad_norm": 1.0299179553985596, + "learning_rate": 7.508511264651428e-06, + "loss": 0.3057, + "step": 17620 + }, + { + "epoch": 0.35274629031854465, + "grad_norm": 1.2810784578323364, + "learning_rate": 7.508230827991978e-06, + "loss": 0.2831, + "step": 17621 + }, + { + "epoch": 0.352766308835673, + "grad_norm": 1.0458407402038574, + "learning_rate": 7.507950380788465e-06, + "loss": 0.3161, + "step": 17622 + }, + { + "epoch": 0.35278632735280135, + "grad_norm": 1.0475986003875732, + "learning_rate": 7.507669923042069e-06, + "loss": 0.2958, + "step": 17623 + }, + { + "epoch": 0.3528063458699297, + "grad_norm": 1.0565086603164673, + "learning_rate": 7.507389454753969e-06, + "loss": 0.3201, + "step": 17624 + }, + { + "epoch": 0.35282636438705806, + "grad_norm": 1.035385012626648, + "learning_rate": 7.5071089759253426e-06, + "loss": 0.3081, + "step": 17625 + }, + { + "epoch": 0.35284638290418635, + "grad_norm": 1.1814583539962769, + "learning_rate": 7.506828486557369e-06, + "loss": 0.2954, + "step": 17626 + }, + { + "epoch": 0.3528664014213147, + "grad_norm": 1.090539813041687, + "learning_rate": 7.506547986651228e-06, + "loss": 0.3359, + "step": 17627 + }, + { + "epoch": 0.35288641993844305, + "grad_norm": 1.1398584842681885, + "learning_rate": 7.5062674762081e-06, + "loss": 0.3296, + "step": 17628 + }, + { + "epoch": 0.3529064384555714, + "grad_norm": 1.0492817163467407, + "learning_rate": 7.5059869552291605e-06, + "loss": 0.3151, + "step": 17629 + }, + { + "epoch": 0.35292645697269975, + "grad_norm": 1.0158276557922363, + "learning_rate": 7.505706423715594e-06, + "loss": 0.3171, + "step": 17630 + }, + { + "epoch": 0.3529464754898281, + "grad_norm": 1.1563403606414795, + "learning_rate": 7.505425881668575e-06, + "loss": 0.3133, + "step": 17631 + }, + { + "epoch": 0.35296649400695645, + "grad_norm": 1.0092824697494507, + "learning_rate": 7.5051453290892865e-06, + "loss": 0.2629, + "step": 17632 + }, + { + "epoch": 0.3529865125240848, + "grad_norm": 1.1715089082717896, + "learning_rate": 7.504864765978904e-06, + "loss": 0.2664, + "step": 17633 + }, + { + "epoch": 0.3530065310412131, + "grad_norm": 1.1176139116287231, + "learning_rate": 7.504584192338609e-06, + "loss": 0.3186, + "step": 17634 + }, + { + "epoch": 0.35302654955834145, + "grad_norm": 1.2916003465652466, + "learning_rate": 7.504303608169583e-06, + "loss": 0.2974, + "step": 17635 + }, + { + "epoch": 0.3530465680754698, + "grad_norm": 1.134627103805542, + "learning_rate": 7.504023013473003e-06, + "loss": 0.3308, + "step": 17636 + }, + { + "epoch": 0.35306658659259815, + "grad_norm": 1.094029188156128, + "learning_rate": 7.503742408250049e-06, + "loss": 0.3164, + "step": 17637 + }, + { + "epoch": 0.3530866051097265, + "grad_norm": 1.0747029781341553, + "learning_rate": 7.503461792501901e-06, + "loss": 0.3338, + "step": 17638 + }, + { + "epoch": 0.35310662362685485, + "grad_norm": 1.035370945930481, + "learning_rate": 7.503181166229736e-06, + "loss": 0.3278, + "step": 17639 + }, + { + "epoch": 0.3531266421439832, + "grad_norm": 2.064345359802246, + "learning_rate": 7.502900529434738e-06, + "loss": 0.8348, + "step": 17640 + }, + { + "epoch": 0.35314666066111156, + "grad_norm": 1.0404045581817627, + "learning_rate": 7.502619882118085e-06, + "loss": 0.3453, + "step": 17641 + }, + { + "epoch": 0.35316667917823985, + "grad_norm": 1.0737961530685425, + "learning_rate": 7.502339224280957e-06, + "loss": 0.3142, + "step": 17642 + }, + { + "epoch": 0.3531866976953682, + "grad_norm": 1.1326966285705566, + "learning_rate": 7.502058555924533e-06, + "loss": 0.3519, + "step": 17643 + }, + { + "epoch": 0.35320671621249655, + "grad_norm": 1.1006181240081787, + "learning_rate": 7.501777877049994e-06, + "loss": 0.3389, + "step": 17644 + }, + { + "epoch": 0.3532267347296249, + "grad_norm": 1.3346301317214966, + "learning_rate": 7.5014971876585175e-06, + "loss": 0.3567, + "step": 17645 + }, + { + "epoch": 0.35324675324675325, + "grad_norm": 1.1582585573196411, + "learning_rate": 7.501216487751285e-06, + "loss": 0.3497, + "step": 17646 + }, + { + "epoch": 0.3532667717638816, + "grad_norm": 1.1685370206832886, + "learning_rate": 7.5009357773294765e-06, + "loss": 0.3591, + "step": 17647 + }, + { + "epoch": 0.35328679028100995, + "grad_norm": 1.1656423807144165, + "learning_rate": 7.500655056394275e-06, + "loss": 0.3211, + "step": 17648 + }, + { + "epoch": 0.3533068087981383, + "grad_norm": 2.1051549911499023, + "learning_rate": 7.500374324946856e-06, + "loss": 0.8211, + "step": 17649 + }, + { + "epoch": 0.3533268273152666, + "grad_norm": 1.0773464441299438, + "learning_rate": 7.500093582988401e-06, + "loss": 0.3088, + "step": 17650 + }, + { + "epoch": 0.35334684583239495, + "grad_norm": 1.1644397974014282, + "learning_rate": 7.49981283052009e-06, + "loss": 0.3477, + "step": 17651 + }, + { + "epoch": 0.3533668643495233, + "grad_norm": 1.1138112545013428, + "learning_rate": 7.499532067543105e-06, + "loss": 0.3221, + "step": 17652 + }, + { + "epoch": 0.35338688286665165, + "grad_norm": 0.9652606844902039, + "learning_rate": 7.499251294058625e-06, + "loss": 0.3074, + "step": 17653 + }, + { + "epoch": 0.35340690138378, + "grad_norm": 1.229627251625061, + "learning_rate": 7.498970510067831e-06, + "loss": 0.3553, + "step": 17654 + }, + { + "epoch": 0.35342691990090835, + "grad_norm": 1.087926983833313, + "learning_rate": 7.498689715571903e-06, + "loss": 0.2807, + "step": 17655 + }, + { + "epoch": 0.3534469384180367, + "grad_norm": 1.1667858362197876, + "learning_rate": 7.4984089105720196e-06, + "loss": 0.309, + "step": 17656 + }, + { + "epoch": 0.35346695693516506, + "grad_norm": 1.0547370910644531, + "learning_rate": 7.498128095069364e-06, + "loss": 0.2764, + "step": 17657 + }, + { + "epoch": 0.35348697545229335, + "grad_norm": 1.1535731554031372, + "learning_rate": 7.497847269065115e-06, + "loss": 0.3577, + "step": 17658 + }, + { + "epoch": 0.3535069939694217, + "grad_norm": 1.0657750368118286, + "learning_rate": 7.497566432560452e-06, + "loss": 0.3233, + "step": 17659 + }, + { + "epoch": 0.35352701248655005, + "grad_norm": 2.0298750400543213, + "learning_rate": 7.49728558555656e-06, + "loss": 0.8614, + "step": 17660 + }, + { + "epoch": 0.3535470310036784, + "grad_norm": 1.1778676509857178, + "learning_rate": 7.497004728054616e-06, + "loss": 0.317, + "step": 17661 + }, + { + "epoch": 0.35356704952080675, + "grad_norm": 1.7826811075210571, + "learning_rate": 7.4967238600558e-06, + "loss": 0.7998, + "step": 17662 + }, + { + "epoch": 0.3535870680379351, + "grad_norm": 1.0972926616668701, + "learning_rate": 7.496442981561296e-06, + "loss": 0.3152, + "step": 17663 + }, + { + "epoch": 0.35360708655506345, + "grad_norm": 1.0960617065429688, + "learning_rate": 7.496162092572281e-06, + "loss": 0.351, + "step": 17664 + }, + { + "epoch": 0.35362710507219175, + "grad_norm": 1.1132200956344604, + "learning_rate": 7.495881193089939e-06, + "loss": 0.3335, + "step": 17665 + }, + { + "epoch": 0.3536471235893201, + "grad_norm": 1.255472183227539, + "learning_rate": 7.495600283115451e-06, + "loss": 0.2905, + "step": 17666 + }, + { + "epoch": 0.35366714210644845, + "grad_norm": 1.0681978464126587, + "learning_rate": 7.495319362649994e-06, + "loss": 0.3528, + "step": 17667 + }, + { + "epoch": 0.3536871606235768, + "grad_norm": 1.1705347299575806, + "learning_rate": 7.495038431694753e-06, + "loss": 0.3666, + "step": 17668 + }, + { + "epoch": 0.35370717914070515, + "grad_norm": 1.2735109329223633, + "learning_rate": 7.494757490250907e-06, + "loss": 0.2976, + "step": 17669 + }, + { + "epoch": 0.3537271976578335, + "grad_norm": 1.3034507036209106, + "learning_rate": 7.494476538319637e-06, + "loss": 0.3484, + "step": 17670 + }, + { + "epoch": 0.35374721617496185, + "grad_norm": 1.2266128063201904, + "learning_rate": 7.494195575902125e-06, + "loss": 0.3124, + "step": 17671 + }, + { + "epoch": 0.3537672346920902, + "grad_norm": 1.1966869831085205, + "learning_rate": 7.49391460299955e-06, + "loss": 0.3113, + "step": 17672 + }, + { + "epoch": 0.3537872532092185, + "grad_norm": 1.075140357017517, + "learning_rate": 7.493633619613096e-06, + "loss": 0.3278, + "step": 17673 + }, + { + "epoch": 0.35380727172634685, + "grad_norm": 1.8902026414871216, + "learning_rate": 7.493352625743944e-06, + "loss": 0.787, + "step": 17674 + }, + { + "epoch": 0.3538272902434752, + "grad_norm": 0.9990206360816956, + "learning_rate": 7.493071621393273e-06, + "loss": 0.3042, + "step": 17675 + }, + { + "epoch": 0.35384730876060355, + "grad_norm": 1.1196926832199097, + "learning_rate": 7.492790606562264e-06, + "loss": 0.2978, + "step": 17676 + }, + { + "epoch": 0.3538673272777319, + "grad_norm": 1.0046371221542358, + "learning_rate": 7.492509581252101e-06, + "loss": 0.3107, + "step": 17677 + }, + { + "epoch": 0.35388734579486025, + "grad_norm": 1.1632554531097412, + "learning_rate": 7.4922285454639645e-06, + "loss": 0.336, + "step": 17678 + }, + { + "epoch": 0.3539073643119886, + "grad_norm": 1.1959238052368164, + "learning_rate": 7.491947499199035e-06, + "loss": 0.3259, + "step": 17679 + }, + { + "epoch": 0.35392738282911695, + "grad_norm": 1.1335031986236572, + "learning_rate": 7.491666442458495e-06, + "loss": 0.3848, + "step": 17680 + }, + { + "epoch": 0.35394740134624525, + "grad_norm": 1.9329899549484253, + "learning_rate": 7.491385375243525e-06, + "loss": 0.7684, + "step": 17681 + }, + { + "epoch": 0.3539674198633736, + "grad_norm": 1.6598747968673706, + "learning_rate": 7.491104297555307e-06, + "loss": 0.788, + "step": 17682 + }, + { + "epoch": 0.35398743838050195, + "grad_norm": 1.0717825889587402, + "learning_rate": 7.490823209395024e-06, + "loss": 0.3165, + "step": 17683 + }, + { + "epoch": 0.3540074568976303, + "grad_norm": 1.0599511861801147, + "learning_rate": 7.4905421107638545e-06, + "loss": 0.3237, + "step": 17684 + }, + { + "epoch": 0.35402747541475865, + "grad_norm": 1.1413413286209106, + "learning_rate": 7.490261001662983e-06, + "loss": 0.292, + "step": 17685 + }, + { + "epoch": 0.354047493931887, + "grad_norm": 1.124417781829834, + "learning_rate": 7.4899798820935895e-06, + "loss": 0.3768, + "step": 17686 + }, + { + "epoch": 0.35406751244901535, + "grad_norm": 2.0000367164611816, + "learning_rate": 7.489698752056856e-06, + "loss": 0.8101, + "step": 17687 + }, + { + "epoch": 0.3540875309661437, + "grad_norm": 1.253478765487671, + "learning_rate": 7.489417611553967e-06, + "loss": 0.3274, + "step": 17688 + }, + { + "epoch": 0.354107549483272, + "grad_norm": 1.2746496200561523, + "learning_rate": 7.489136460586099e-06, + "loss": 0.3027, + "step": 17689 + }, + { + "epoch": 0.35412756800040035, + "grad_norm": 1.1877529621124268, + "learning_rate": 7.488855299154438e-06, + "loss": 0.3503, + "step": 17690 + }, + { + "epoch": 0.3541475865175287, + "grad_norm": 1.109334111213684, + "learning_rate": 7.488574127260167e-06, + "loss": 0.3092, + "step": 17691 + }, + { + "epoch": 0.35416760503465705, + "grad_norm": 1.0581083297729492, + "learning_rate": 7.488292944904464e-06, + "loss": 0.3384, + "step": 17692 + }, + { + "epoch": 0.3541876235517854, + "grad_norm": 1.1313589811325073, + "learning_rate": 7.488011752088513e-06, + "loss": 0.3416, + "step": 17693 + }, + { + "epoch": 0.35420764206891375, + "grad_norm": 1.2328250408172607, + "learning_rate": 7.487730548813497e-06, + "loss": 0.3331, + "step": 17694 + }, + { + "epoch": 0.3542276605860421, + "grad_norm": 1.0242605209350586, + "learning_rate": 7.487449335080596e-06, + "loss": 0.3145, + "step": 17695 + }, + { + "epoch": 0.35424767910317045, + "grad_norm": 1.1386688947677612, + "learning_rate": 7.487168110890995e-06, + "loss": 0.2998, + "step": 17696 + }, + { + "epoch": 0.35426769762029875, + "grad_norm": 1.7813810110092163, + "learning_rate": 7.486886876245871e-06, + "loss": 0.8119, + "step": 17697 + }, + { + "epoch": 0.3542877161374271, + "grad_norm": 1.152675747871399, + "learning_rate": 7.4866056311464116e-06, + "loss": 0.3282, + "step": 17698 + }, + { + "epoch": 0.35430773465455545, + "grad_norm": 1.141677737236023, + "learning_rate": 7.486324375593799e-06, + "loss": 0.2821, + "step": 17699 + }, + { + "epoch": 0.3543277531716838, + "grad_norm": 1.30739164352417, + "learning_rate": 7.486043109589212e-06, + "loss": 0.3602, + "step": 17700 + }, + { + "epoch": 0.35434777168881215, + "grad_norm": 1.1843022108078003, + "learning_rate": 7.485761833133836e-06, + "loss": 0.3247, + "step": 17701 + }, + { + "epoch": 0.3543677902059405, + "grad_norm": 1.2456581592559814, + "learning_rate": 7.485480546228851e-06, + "loss": 0.3483, + "step": 17702 + }, + { + "epoch": 0.35438780872306885, + "grad_norm": 1.1035306453704834, + "learning_rate": 7.48519924887544e-06, + "loss": 0.3422, + "step": 17703 + }, + { + "epoch": 0.3544078272401972, + "grad_norm": 1.0809820890426636, + "learning_rate": 7.484917941074788e-06, + "loss": 0.3203, + "step": 17704 + }, + { + "epoch": 0.3544278457573255, + "grad_norm": 1.1283689737319946, + "learning_rate": 7.484636622828074e-06, + "loss": 0.3166, + "step": 17705 + }, + { + "epoch": 0.35444786427445385, + "grad_norm": 1.057979702949524, + "learning_rate": 7.484355294136485e-06, + "loss": 0.3209, + "step": 17706 + }, + { + "epoch": 0.3544678827915822, + "grad_norm": 1.0539549589157104, + "learning_rate": 7.484073955001199e-06, + "loss": 0.2996, + "step": 17707 + }, + { + "epoch": 0.35448790130871055, + "grad_norm": 1.0901576280593872, + "learning_rate": 7.483792605423401e-06, + "loss": 0.3204, + "step": 17708 + }, + { + "epoch": 0.3545079198258389, + "grad_norm": 1.145430326461792, + "learning_rate": 7.483511245404273e-06, + "loss": 0.328, + "step": 17709 + }, + { + "epoch": 0.35452793834296725, + "grad_norm": 1.018987774848938, + "learning_rate": 7.483229874944999e-06, + "loss": 0.2997, + "step": 17710 + }, + { + "epoch": 0.3545479568600956, + "grad_norm": 1.0411895513534546, + "learning_rate": 7.482948494046759e-06, + "loss": 0.3432, + "step": 17711 + }, + { + "epoch": 0.35456797537722395, + "grad_norm": 1.1974941492080688, + "learning_rate": 7.48266710271074e-06, + "loss": 0.3586, + "step": 17712 + }, + { + "epoch": 0.35458799389435225, + "grad_norm": 1.1254366636276245, + "learning_rate": 7.482385700938122e-06, + "loss": 0.3524, + "step": 17713 + }, + { + "epoch": 0.3546080124114806, + "grad_norm": 1.1598700284957886, + "learning_rate": 7.48210428873009e-06, + "loss": 0.271, + "step": 17714 + }, + { + "epoch": 0.35462803092860895, + "grad_norm": 1.080437421798706, + "learning_rate": 7.4818228660878244e-06, + "loss": 0.339, + "step": 17715 + }, + { + "epoch": 0.3546480494457373, + "grad_norm": 1.0652838945388794, + "learning_rate": 7.481541433012509e-06, + "loss": 0.3013, + "step": 17716 + }, + { + "epoch": 0.35466806796286565, + "grad_norm": 1.2767869234085083, + "learning_rate": 7.481259989505328e-06, + "loss": 0.3322, + "step": 17717 + }, + { + "epoch": 0.354688086479994, + "grad_norm": 1.2186379432678223, + "learning_rate": 7.480978535567464e-06, + "loss": 0.3286, + "step": 17718 + }, + { + "epoch": 0.35470810499712235, + "grad_norm": 1.200562596321106, + "learning_rate": 7.480697071200102e-06, + "loss": 0.3217, + "step": 17719 + }, + { + "epoch": 0.3547281235142507, + "grad_norm": 1.880265712738037, + "learning_rate": 7.480415596404421e-06, + "loss": 0.8102, + "step": 17720 + }, + { + "epoch": 0.354748142031379, + "grad_norm": 1.780937671661377, + "learning_rate": 7.480134111181608e-06, + "loss": 0.8045, + "step": 17721 + }, + { + "epoch": 0.35476816054850735, + "grad_norm": 1.0881812572479248, + "learning_rate": 7.479852615532843e-06, + "loss": 0.3403, + "step": 17722 + }, + { + "epoch": 0.3547881790656357, + "grad_norm": 1.0625935792922974, + "learning_rate": 7.4795711094593126e-06, + "loss": 0.3244, + "step": 17723 + }, + { + "epoch": 0.35480819758276405, + "grad_norm": 1.1412402391433716, + "learning_rate": 7.479289592962199e-06, + "loss": 0.2888, + "step": 17724 + }, + { + "epoch": 0.3548282160998924, + "grad_norm": 1.067628026008606, + "learning_rate": 7.4790080660426855e-06, + "loss": 0.3726, + "step": 17725 + }, + { + "epoch": 0.35484823461702075, + "grad_norm": 1.0762659311294556, + "learning_rate": 7.4787265287019554e-06, + "loss": 0.3374, + "step": 17726 + }, + { + "epoch": 0.3548682531341491, + "grad_norm": 1.0867747068405151, + "learning_rate": 7.478444980941193e-06, + "loss": 0.3272, + "step": 17727 + }, + { + "epoch": 0.35488827165127745, + "grad_norm": 1.0911240577697754, + "learning_rate": 7.4781634227615795e-06, + "loss": 0.3393, + "step": 17728 + }, + { + "epoch": 0.35490829016840575, + "grad_norm": 1.096956729888916, + "learning_rate": 7.477881854164302e-06, + "loss": 0.3519, + "step": 17729 + }, + { + "epoch": 0.3549283086855341, + "grad_norm": 1.096380352973938, + "learning_rate": 7.477600275150543e-06, + "loss": 0.2902, + "step": 17730 + }, + { + "epoch": 0.35494832720266245, + "grad_norm": 1.0551855564117432, + "learning_rate": 7.477318685721483e-06, + "loss": 0.3344, + "step": 17731 + }, + { + "epoch": 0.3549683457197908, + "grad_norm": 0.9920300841331482, + "learning_rate": 7.477037085878312e-06, + "loss": 0.2469, + "step": 17732 + }, + { + "epoch": 0.35498836423691915, + "grad_norm": 1.1458470821380615, + "learning_rate": 7.476755475622206e-06, + "loss": 0.3436, + "step": 17733 + }, + { + "epoch": 0.3550083827540475, + "grad_norm": 1.1288752555847168, + "learning_rate": 7.476473854954356e-06, + "loss": 0.3122, + "step": 17734 + }, + { + "epoch": 0.35502840127117585, + "grad_norm": 1.8949562311172485, + "learning_rate": 7.476192223875942e-06, + "loss": 0.7942, + "step": 17735 + }, + { + "epoch": 0.3550484197883042, + "grad_norm": 1.042397379875183, + "learning_rate": 7.475910582388148e-06, + "loss": 0.2795, + "step": 17736 + }, + { + "epoch": 0.3550684383054325, + "grad_norm": 1.2119793891906738, + "learning_rate": 7.475628930492159e-06, + "loss": 0.3409, + "step": 17737 + }, + { + "epoch": 0.35508845682256085, + "grad_norm": 1.0637301206588745, + "learning_rate": 7.47534726818916e-06, + "loss": 0.3123, + "step": 17738 + }, + { + "epoch": 0.3551084753396892, + "grad_norm": 1.1151905059814453, + "learning_rate": 7.475065595480333e-06, + "loss": 0.3098, + "step": 17739 + }, + { + "epoch": 0.35512849385681755, + "grad_norm": 2.054274320602417, + "learning_rate": 7.474783912366863e-06, + "loss": 0.8539, + "step": 17740 + }, + { + "epoch": 0.3551485123739459, + "grad_norm": 1.1502057313919067, + "learning_rate": 7.474502218849933e-06, + "loss": 0.2923, + "step": 17741 + }, + { + "epoch": 0.35516853089107425, + "grad_norm": 1.119703769683838, + "learning_rate": 7.474220514930729e-06, + "loss": 0.3204, + "step": 17742 + }, + { + "epoch": 0.3551885494082026, + "grad_norm": 1.158436894416809, + "learning_rate": 7.4739388006104345e-06, + "loss": 0.3503, + "step": 17743 + }, + { + "epoch": 0.35520856792533095, + "grad_norm": 1.2006393671035767, + "learning_rate": 7.473657075890233e-06, + "loss": 0.3308, + "step": 17744 + }, + { + "epoch": 0.35522858644245925, + "grad_norm": 1.0304044485092163, + "learning_rate": 7.473375340771312e-06, + "loss": 0.3421, + "step": 17745 + }, + { + "epoch": 0.3552486049595876, + "grad_norm": 1.060278058052063, + "learning_rate": 7.4730935952548504e-06, + "loss": 0.2888, + "step": 17746 + }, + { + "epoch": 0.35526862347671595, + "grad_norm": 1.0894474983215332, + "learning_rate": 7.472811839342037e-06, + "loss": 0.3147, + "step": 17747 + }, + { + "epoch": 0.3552886419938443, + "grad_norm": 1.055413007736206, + "learning_rate": 7.472530073034054e-06, + "loss": 0.2943, + "step": 17748 + }, + { + "epoch": 0.35530866051097265, + "grad_norm": 1.1332789659500122, + "learning_rate": 7.472248296332086e-06, + "loss": 0.2918, + "step": 17749 + }, + { + "epoch": 0.355328679028101, + "grad_norm": 1.1339597702026367, + "learning_rate": 7.471966509237321e-06, + "loss": 0.3462, + "step": 17750 + }, + { + "epoch": 0.35534869754522935, + "grad_norm": 1.0810306072235107, + "learning_rate": 7.471684711750937e-06, + "loss": 0.2961, + "step": 17751 + }, + { + "epoch": 0.3553687160623577, + "grad_norm": 1.0100635290145874, + "learning_rate": 7.4714029038741255e-06, + "loss": 0.3199, + "step": 17752 + }, + { + "epoch": 0.355388734579486, + "grad_norm": 1.266361951828003, + "learning_rate": 7.471121085608066e-06, + "loss": 0.325, + "step": 17753 + }, + { + "epoch": 0.35540875309661435, + "grad_norm": 1.069810390472412, + "learning_rate": 7.470839256953947e-06, + "loss": 0.2871, + "step": 17754 + }, + { + "epoch": 0.3554287716137427, + "grad_norm": 1.0597704648971558, + "learning_rate": 7.470557417912949e-06, + "loss": 0.3181, + "step": 17755 + }, + { + "epoch": 0.35544879013087105, + "grad_norm": 1.050857663154602, + "learning_rate": 7.470275568486263e-06, + "loss": 0.3225, + "step": 17756 + }, + { + "epoch": 0.3554688086479994, + "grad_norm": 1.1952056884765625, + "learning_rate": 7.4699937086750676e-06, + "loss": 0.2966, + "step": 17757 + }, + { + "epoch": 0.35548882716512775, + "grad_norm": 1.1243624687194824, + "learning_rate": 7.469711838480551e-06, + "loss": 0.3364, + "step": 17758 + }, + { + "epoch": 0.3555088456822561, + "grad_norm": 0.972918689250946, + "learning_rate": 7.469429957903898e-06, + "loss": 0.331, + "step": 17759 + }, + { + "epoch": 0.35552886419938445, + "grad_norm": 1.0310667753219604, + "learning_rate": 7.46914806694629e-06, + "loss": 0.2844, + "step": 17760 + }, + { + "epoch": 0.35554888271651275, + "grad_norm": 1.052799105644226, + "learning_rate": 7.4688661656089165e-06, + "loss": 0.2575, + "step": 17761 + }, + { + "epoch": 0.3555689012336411, + "grad_norm": 1.0291848182678223, + "learning_rate": 7.468584253892961e-06, + "loss": 0.3208, + "step": 17762 + }, + { + "epoch": 0.35558891975076945, + "grad_norm": 1.357987880706787, + "learning_rate": 7.4683023317996095e-06, + "loss": 0.3055, + "step": 17763 + }, + { + "epoch": 0.3556089382678978, + "grad_norm": 1.0885107517242432, + "learning_rate": 7.468020399330045e-06, + "loss": 0.3501, + "step": 17764 + }, + { + "epoch": 0.35562895678502615, + "grad_norm": 1.8243318796157837, + "learning_rate": 7.467738456485455e-06, + "loss": 0.8511, + "step": 17765 + }, + { + "epoch": 0.3556489753021545, + "grad_norm": 1.0754326581954956, + "learning_rate": 7.467456503267023e-06, + "loss": 0.3334, + "step": 17766 + }, + { + "epoch": 0.35566899381928285, + "grad_norm": 1.0998972654342651, + "learning_rate": 7.467174539675934e-06, + "loss": 0.3393, + "step": 17767 + }, + { + "epoch": 0.3556890123364112, + "grad_norm": 1.9483424425125122, + "learning_rate": 7.466892565713375e-06, + "loss": 0.8236, + "step": 17768 + }, + { + "epoch": 0.3557090308535395, + "grad_norm": 1.1665078401565552, + "learning_rate": 7.466610581380531e-06, + "loss": 0.3831, + "step": 17769 + }, + { + "epoch": 0.35572904937066785, + "grad_norm": 0.9657038450241089, + "learning_rate": 7.466328586678586e-06, + "loss": 0.3275, + "step": 17770 + }, + { + "epoch": 0.3557490678877962, + "grad_norm": 1.7835122346878052, + "learning_rate": 7.466046581608727e-06, + "loss": 0.8125, + "step": 17771 + }, + { + "epoch": 0.35576908640492455, + "grad_norm": 0.9910821914672852, + "learning_rate": 7.465764566172139e-06, + "loss": 0.2809, + "step": 17772 + }, + { + "epoch": 0.3557891049220529, + "grad_norm": 1.0745476484298706, + "learning_rate": 7.465482540370007e-06, + "loss": 0.32, + "step": 17773 + }, + { + "epoch": 0.35580912343918125, + "grad_norm": 1.0522180795669556, + "learning_rate": 7.465200504203516e-06, + "loss": 0.3152, + "step": 17774 + }, + { + "epoch": 0.3558291419563096, + "grad_norm": 1.1114060878753662, + "learning_rate": 7.4649184576738535e-06, + "loss": 0.3051, + "step": 17775 + }, + { + "epoch": 0.35584916047343795, + "grad_norm": 1.853090524673462, + "learning_rate": 7.464636400782205e-06, + "loss": 0.8213, + "step": 17776 + }, + { + "epoch": 0.35586917899056625, + "grad_norm": 1.1102873086929321, + "learning_rate": 7.464354333529754e-06, + "loss": 0.3352, + "step": 17777 + }, + { + "epoch": 0.3558891975076946, + "grad_norm": 1.043444037437439, + "learning_rate": 7.464072255917689e-06, + "loss": 0.3386, + "step": 17778 + }, + { + "epoch": 0.35590921602482295, + "grad_norm": 1.1015639305114746, + "learning_rate": 7.4637901679471955e-06, + "loss": 0.3145, + "step": 17779 + }, + { + "epoch": 0.3559292345419513, + "grad_norm": 1.080406904220581, + "learning_rate": 7.463508069619455e-06, + "loss": 0.3232, + "step": 17780 + }, + { + "epoch": 0.35594925305907965, + "grad_norm": 1.040966272354126, + "learning_rate": 7.46322596093566e-06, + "loss": 0.2624, + "step": 17781 + }, + { + "epoch": 0.355969271576208, + "grad_norm": 1.182576060295105, + "learning_rate": 7.462943841896991e-06, + "loss": 0.306, + "step": 17782 + }, + { + "epoch": 0.35598929009333635, + "grad_norm": 1.063396692276001, + "learning_rate": 7.462661712504637e-06, + "loss": 0.2818, + "step": 17783 + }, + { + "epoch": 0.3560093086104647, + "grad_norm": 1.0664958953857422, + "learning_rate": 7.462379572759783e-06, + "loss": 0.3301, + "step": 17784 + }, + { + "epoch": 0.356029327127593, + "grad_norm": 1.0757933855056763, + "learning_rate": 7.462097422663617e-06, + "loss": 0.3292, + "step": 17785 + }, + { + "epoch": 0.35604934564472135, + "grad_norm": 1.14156174659729, + "learning_rate": 7.46181526221732e-06, + "loss": 0.358, + "step": 17786 + }, + { + "epoch": 0.3560693641618497, + "grad_norm": 1.1261318922042847, + "learning_rate": 7.461533091422083e-06, + "loss": 0.3147, + "step": 17787 + }, + { + "epoch": 0.35608938267897805, + "grad_norm": 1.2019896507263184, + "learning_rate": 7.46125091027909e-06, + "loss": 0.2673, + "step": 17788 + }, + { + "epoch": 0.3561094011961064, + "grad_norm": 0.9318165183067322, + "learning_rate": 7.460968718789528e-06, + "loss": 0.2491, + "step": 17789 + }, + { + "epoch": 0.35612941971323475, + "grad_norm": 1.2154450416564941, + "learning_rate": 7.460686516954585e-06, + "loss": 0.372, + "step": 17790 + }, + { + "epoch": 0.3561494382303631, + "grad_norm": 1.9744561910629272, + "learning_rate": 7.4604043047754435e-06, + "loss": 0.7931, + "step": 17791 + }, + { + "epoch": 0.35616945674749145, + "grad_norm": 1.0435402393341064, + "learning_rate": 7.460122082253293e-06, + "loss": 0.3063, + "step": 17792 + }, + { + "epoch": 0.35618947526461975, + "grad_norm": 1.0515167713165283, + "learning_rate": 7.459839849389317e-06, + "loss": 0.3066, + "step": 17793 + }, + { + "epoch": 0.3562094937817481, + "grad_norm": 1.2707221508026123, + "learning_rate": 7.459557606184704e-06, + "loss": 0.3598, + "step": 17794 + }, + { + "epoch": 0.35622951229887645, + "grad_norm": 1.0549370050430298, + "learning_rate": 7.45927535264064e-06, + "loss": 0.271, + "step": 17795 + }, + { + "epoch": 0.3562495308160048, + "grad_norm": 1.109557867050171, + "learning_rate": 7.458993088758314e-06, + "loss": 0.3391, + "step": 17796 + }, + { + "epoch": 0.35626954933313315, + "grad_norm": 1.1696810722351074, + "learning_rate": 7.458710814538908e-06, + "loss": 0.3346, + "step": 17797 + }, + { + "epoch": 0.3562895678502615, + "grad_norm": 1.1402268409729004, + "learning_rate": 7.458428529983612e-06, + "loss": 0.3342, + "step": 17798 + }, + { + "epoch": 0.35630958636738985, + "grad_norm": 1.8722912073135376, + "learning_rate": 7.458146235093611e-06, + "loss": 0.8332, + "step": 17799 + }, + { + "epoch": 0.3563296048845182, + "grad_norm": 1.2000229358673096, + "learning_rate": 7.457863929870092e-06, + "loss": 0.3949, + "step": 17800 + }, + { + "epoch": 0.3563496234016465, + "grad_norm": 1.1892242431640625, + "learning_rate": 7.4575816143142424e-06, + "loss": 0.3552, + "step": 17801 + }, + { + "epoch": 0.35636964191877485, + "grad_norm": 1.0065585374832153, + "learning_rate": 7.457299288427247e-06, + "loss": 0.3039, + "step": 17802 + }, + { + "epoch": 0.3563896604359032, + "grad_norm": 1.2281900644302368, + "learning_rate": 7.457016952210297e-06, + "loss": 0.3412, + "step": 17803 + }, + { + "epoch": 0.35640967895303155, + "grad_norm": 1.270153522491455, + "learning_rate": 7.456734605664574e-06, + "loss": 0.3533, + "step": 17804 + }, + { + "epoch": 0.3564296974701599, + "grad_norm": 1.0264986753463745, + "learning_rate": 7.4564522487912695e-06, + "loss": 0.306, + "step": 17805 + }, + { + "epoch": 0.35644971598728825, + "grad_norm": 1.1890745162963867, + "learning_rate": 7.456169881591566e-06, + "loss": 0.3119, + "step": 17806 + }, + { + "epoch": 0.3564697345044166, + "grad_norm": 1.8078336715698242, + "learning_rate": 7.455887504066654e-06, + "loss": 0.9162, + "step": 17807 + }, + { + "epoch": 0.35648975302154495, + "grad_norm": 1.1677112579345703, + "learning_rate": 7.45560511621772e-06, + "loss": 0.2651, + "step": 17808 + }, + { + "epoch": 0.35650977153867325, + "grad_norm": 1.1952478885650635, + "learning_rate": 7.455322718045951e-06, + "loss": 0.3549, + "step": 17809 + }, + { + "epoch": 0.3565297900558016, + "grad_norm": 1.1513413190841675, + "learning_rate": 7.4550403095525325e-06, + "loss": 0.3003, + "step": 17810 + }, + { + "epoch": 0.35654980857292995, + "grad_norm": 1.193471074104309, + "learning_rate": 7.454757890738653e-06, + "loss": 0.3156, + "step": 17811 + }, + { + "epoch": 0.3565698270900583, + "grad_norm": 1.8544467687606812, + "learning_rate": 7.4544754616055e-06, + "loss": 0.7904, + "step": 17812 + }, + { + "epoch": 0.35658984560718665, + "grad_norm": 1.2871947288513184, + "learning_rate": 7.45419302215426e-06, + "loss": 0.3041, + "step": 17813 + }, + { + "epoch": 0.356609864124315, + "grad_norm": 1.017773985862732, + "learning_rate": 7.453910572386121e-06, + "loss": 0.3001, + "step": 17814 + }, + { + "epoch": 0.35662988264144335, + "grad_norm": 1.028083086013794, + "learning_rate": 7.45362811230227e-06, + "loss": 0.3092, + "step": 17815 + }, + { + "epoch": 0.3566499011585717, + "grad_norm": 1.8667134046554565, + "learning_rate": 7.453345641903895e-06, + "loss": 0.8817, + "step": 17816 + }, + { + "epoch": 0.3566699196757, + "grad_norm": 1.1745784282684326, + "learning_rate": 7.453063161192182e-06, + "loss": 0.3405, + "step": 17817 + }, + { + "epoch": 0.35668993819282835, + "grad_norm": 1.1392930746078491, + "learning_rate": 7.45278067016832e-06, + "loss": 0.3292, + "step": 17818 + }, + { + "epoch": 0.3567099567099567, + "grad_norm": 1.4153811931610107, + "learning_rate": 7.452498168833494e-06, + "loss": 0.3014, + "step": 17819 + }, + { + "epoch": 0.35672997522708505, + "grad_norm": 1.2140955924987793, + "learning_rate": 7.452215657188896e-06, + "loss": 0.3334, + "step": 17820 + }, + { + "epoch": 0.3567499937442134, + "grad_norm": 1.144392490386963, + "learning_rate": 7.451933135235711e-06, + "loss": 0.318, + "step": 17821 + }, + { + "epoch": 0.35677001226134175, + "grad_norm": 1.0862175226211548, + "learning_rate": 7.451650602975124e-06, + "loss": 0.3141, + "step": 17822 + }, + { + "epoch": 0.3567900307784701, + "grad_norm": 1.083030104637146, + "learning_rate": 7.4513680604083284e-06, + "loss": 0.3572, + "step": 17823 + }, + { + "epoch": 0.35681004929559845, + "grad_norm": 1.1491553783416748, + "learning_rate": 7.451085507536509e-06, + "loss": 0.3715, + "step": 17824 + }, + { + "epoch": 0.35683006781272675, + "grad_norm": 1.0385822057724, + "learning_rate": 7.450802944360851e-06, + "loss": 0.2802, + "step": 17825 + }, + { + "epoch": 0.3568500863298551, + "grad_norm": 1.122054100036621, + "learning_rate": 7.450520370882546e-06, + "loss": 0.292, + "step": 17826 + }, + { + "epoch": 0.35687010484698345, + "grad_norm": 0.9940608143806458, + "learning_rate": 7.450237787102782e-06, + "loss": 0.26, + "step": 17827 + }, + { + "epoch": 0.3568901233641118, + "grad_norm": 1.0972095727920532, + "learning_rate": 7.449955193022744e-06, + "loss": 0.3501, + "step": 17828 + }, + { + "epoch": 0.35691014188124015, + "grad_norm": 1.111989974975586, + "learning_rate": 7.449672588643624e-06, + "loss": 0.3107, + "step": 17829 + }, + { + "epoch": 0.3569301603983685, + "grad_norm": 1.086705207824707, + "learning_rate": 7.449389973966605e-06, + "loss": 0.3415, + "step": 17830 + }, + { + "epoch": 0.35695017891549685, + "grad_norm": 1.0805333852767944, + "learning_rate": 7.449107348992879e-06, + "loss": 0.3065, + "step": 17831 + }, + { + "epoch": 0.3569701974326252, + "grad_norm": 1.1843235492706299, + "learning_rate": 7.448824713723633e-06, + "loss": 0.3575, + "step": 17832 + }, + { + "epoch": 0.3569902159497535, + "grad_norm": 1.9811533689498901, + "learning_rate": 7.448542068160053e-06, + "loss": 0.7865, + "step": 17833 + }, + { + "epoch": 0.35701023446688185, + "grad_norm": 1.8699790239334106, + "learning_rate": 7.448259412303331e-06, + "loss": 0.8806, + "step": 17834 + }, + { + "epoch": 0.3570302529840102, + "grad_norm": 1.1053316593170166, + "learning_rate": 7.447976746154653e-06, + "loss": 0.3263, + "step": 17835 + }, + { + "epoch": 0.35705027150113855, + "grad_norm": 1.2407548427581787, + "learning_rate": 7.447694069715208e-06, + "loss": 0.3212, + "step": 17836 + }, + { + "epoch": 0.3570702900182669, + "grad_norm": 1.1296130418777466, + "learning_rate": 7.447411382986183e-06, + "loss": 0.3376, + "step": 17837 + }, + { + "epoch": 0.35709030853539525, + "grad_norm": 1.0098321437835693, + "learning_rate": 7.447128685968766e-06, + "loss": 0.3053, + "step": 17838 + }, + { + "epoch": 0.3571103270525236, + "grad_norm": 1.018386721611023, + "learning_rate": 7.446845978664148e-06, + "loss": 0.3489, + "step": 17839 + }, + { + "epoch": 0.35713034556965195, + "grad_norm": 1.1048073768615723, + "learning_rate": 7.446563261073516e-06, + "loss": 0.3453, + "step": 17840 + }, + { + "epoch": 0.35715036408678025, + "grad_norm": 1.0426442623138428, + "learning_rate": 7.446280533198058e-06, + "loss": 0.3149, + "step": 17841 + }, + { + "epoch": 0.3571703826039086, + "grad_norm": 1.212027907371521, + "learning_rate": 7.445997795038965e-06, + "loss": 0.3162, + "step": 17842 + }, + { + "epoch": 0.35719040112103695, + "grad_norm": 1.090925931930542, + "learning_rate": 7.445715046597423e-06, + "loss": 0.3566, + "step": 17843 + }, + { + "epoch": 0.3572104196381653, + "grad_norm": 1.1894232034683228, + "learning_rate": 7.44543228787462e-06, + "loss": 0.3099, + "step": 17844 + }, + { + "epoch": 0.35723043815529365, + "grad_norm": 1.1709718704223633, + "learning_rate": 7.445149518871746e-06, + "loss": 0.3749, + "step": 17845 + }, + { + "epoch": 0.357250456672422, + "grad_norm": 1.8327544927597046, + "learning_rate": 7.444866739589991e-06, + "loss": 0.8646, + "step": 17846 + }, + { + "epoch": 0.35727047518955035, + "grad_norm": 1.7858905792236328, + "learning_rate": 7.444583950030542e-06, + "loss": 0.7707, + "step": 17847 + }, + { + "epoch": 0.3572904937066787, + "grad_norm": 1.0454872846603394, + "learning_rate": 7.4443011501945865e-06, + "loss": 0.3508, + "step": 17848 + }, + { + "epoch": 0.357310512223807, + "grad_norm": 1.1552037000656128, + "learning_rate": 7.444018340083317e-06, + "loss": 0.3153, + "step": 17849 + }, + { + "epoch": 0.35733053074093535, + "grad_norm": 1.8316694498062134, + "learning_rate": 7.443735519697919e-06, + "loss": 0.821, + "step": 17850 + }, + { + "epoch": 0.3573505492580637, + "grad_norm": 1.0568598508834839, + "learning_rate": 7.443452689039583e-06, + "loss": 0.3371, + "step": 17851 + }, + { + "epoch": 0.35737056777519205, + "grad_norm": 1.1668457984924316, + "learning_rate": 7.443169848109499e-06, + "loss": 0.3188, + "step": 17852 + }, + { + "epoch": 0.3573905862923204, + "grad_norm": 1.070156455039978, + "learning_rate": 7.442886996908854e-06, + "loss": 0.3107, + "step": 17853 + }, + { + "epoch": 0.35741060480944875, + "grad_norm": 1.053244709968567, + "learning_rate": 7.442604135438838e-06, + "loss": 0.2515, + "step": 17854 + }, + { + "epoch": 0.3574306233265771, + "grad_norm": 1.046942949295044, + "learning_rate": 7.442321263700638e-06, + "loss": 0.3457, + "step": 17855 + }, + { + "epoch": 0.35745064184370545, + "grad_norm": 1.2021493911743164, + "learning_rate": 7.442038381695448e-06, + "loss": 0.4151, + "step": 17856 + }, + { + "epoch": 0.35747066036083375, + "grad_norm": 1.8826087713241577, + "learning_rate": 7.441755489424452e-06, + "loss": 0.798, + "step": 17857 + }, + { + "epoch": 0.3574906788779621, + "grad_norm": 1.2120531797409058, + "learning_rate": 7.441472586888842e-06, + "loss": 0.3239, + "step": 17858 + }, + { + "epoch": 0.35751069739509045, + "grad_norm": 1.0480408668518066, + "learning_rate": 7.441189674089806e-06, + "loss": 0.3217, + "step": 17859 + }, + { + "epoch": 0.3575307159122188, + "grad_norm": 1.212843656539917, + "learning_rate": 7.440906751028535e-06, + "loss": 0.3264, + "step": 17860 + }, + { + "epoch": 0.35755073442934715, + "grad_norm": 1.179987907409668, + "learning_rate": 7.440623817706216e-06, + "loss": 0.3496, + "step": 17861 + }, + { + "epoch": 0.3575707529464755, + "grad_norm": 1.2091469764709473, + "learning_rate": 7.440340874124042e-06, + "loss": 0.3198, + "step": 17862 + }, + { + "epoch": 0.35759077146360385, + "grad_norm": 1.1339998245239258, + "learning_rate": 7.440057920283198e-06, + "loss": 0.2738, + "step": 17863 + }, + { + "epoch": 0.3576107899807322, + "grad_norm": 1.113243818283081, + "learning_rate": 7.4397749561848765e-06, + "loss": 0.2846, + "step": 17864 + }, + { + "epoch": 0.3576308084978605, + "grad_norm": 1.2135436534881592, + "learning_rate": 7.439491981830266e-06, + "loss": 0.3376, + "step": 17865 + }, + { + "epoch": 0.35765082701498885, + "grad_norm": 1.0278977155685425, + "learning_rate": 7.439208997220556e-06, + "loss": 0.3438, + "step": 17866 + }, + { + "epoch": 0.3576708455321172, + "grad_norm": 1.0428763628005981, + "learning_rate": 7.4389260023569364e-06, + "loss": 0.2857, + "step": 17867 + }, + { + "epoch": 0.35769086404924555, + "grad_norm": 1.1366044282913208, + "learning_rate": 7.438642997240597e-06, + "loss": 0.328, + "step": 17868 + }, + { + "epoch": 0.3577108825663739, + "grad_norm": 1.1053410768508911, + "learning_rate": 7.438359981872728e-06, + "loss": 0.2755, + "step": 17869 + }, + { + "epoch": 0.35773090108350225, + "grad_norm": 1.122576355934143, + "learning_rate": 7.438076956254516e-06, + "loss": 0.3472, + "step": 17870 + }, + { + "epoch": 0.3577509196006306, + "grad_norm": 1.9953854084014893, + "learning_rate": 7.437793920387155e-06, + "loss": 0.7745, + "step": 17871 + }, + { + "epoch": 0.35777093811775895, + "grad_norm": 1.0848326683044434, + "learning_rate": 7.437510874271833e-06, + "loss": 0.2794, + "step": 17872 + }, + { + "epoch": 0.35779095663488725, + "grad_norm": 1.0330744981765747, + "learning_rate": 7.437227817909739e-06, + "loss": 0.2565, + "step": 17873 + }, + { + "epoch": 0.3578109751520156, + "grad_norm": 1.282541275024414, + "learning_rate": 7.436944751302066e-06, + "loss": 0.2868, + "step": 17874 + }, + { + "epoch": 0.35783099366914395, + "grad_norm": 1.0794107913970947, + "learning_rate": 7.436661674449999e-06, + "loss": 0.3005, + "step": 17875 + }, + { + "epoch": 0.3578510121862723, + "grad_norm": 1.394546627998352, + "learning_rate": 7.4363785873547315e-06, + "loss": 0.3665, + "step": 17876 + }, + { + "epoch": 0.35787103070340065, + "grad_norm": 1.0916955471038818, + "learning_rate": 7.436095490017452e-06, + "loss": 0.2659, + "step": 17877 + }, + { + "epoch": 0.357891049220529, + "grad_norm": 1.042820692062378, + "learning_rate": 7.435812382439353e-06, + "loss": 0.2819, + "step": 17878 + }, + { + "epoch": 0.35791106773765735, + "grad_norm": 1.3438141345977783, + "learning_rate": 7.435529264621622e-06, + "loss": 0.3672, + "step": 17879 + }, + { + "epoch": 0.3579310862547857, + "grad_norm": 1.074084997177124, + "learning_rate": 7.4352461365654504e-06, + "loss": 0.3281, + "step": 17880 + }, + { + "epoch": 0.357951104771914, + "grad_norm": 1.1532238721847534, + "learning_rate": 7.434962998272027e-06, + "loss": 0.3261, + "step": 17881 + }, + { + "epoch": 0.35797112328904235, + "grad_norm": 1.1374800205230713, + "learning_rate": 7.434679849742546e-06, + "loss": 0.3446, + "step": 17882 + }, + { + "epoch": 0.3579911418061707, + "grad_norm": 1.0797983407974243, + "learning_rate": 7.434396690978192e-06, + "loss": 0.304, + "step": 17883 + }, + { + "epoch": 0.35801116032329905, + "grad_norm": 1.334466576576233, + "learning_rate": 7.434113521980158e-06, + "loss": 0.3529, + "step": 17884 + }, + { + "epoch": 0.3580311788404274, + "grad_norm": 1.125158667564392, + "learning_rate": 7.433830342749636e-06, + "loss": 0.3055, + "step": 17885 + }, + { + "epoch": 0.35805119735755575, + "grad_norm": 1.0666613578796387, + "learning_rate": 7.4335471532878145e-06, + "loss": 0.2818, + "step": 17886 + }, + { + "epoch": 0.3580712158746841, + "grad_norm": 1.1295286417007446, + "learning_rate": 7.433263953595885e-06, + "loss": 0.3345, + "step": 17887 + }, + { + "epoch": 0.35809123439181245, + "grad_norm": 1.0614045858383179, + "learning_rate": 7.432980743675038e-06, + "loss": 0.2789, + "step": 17888 + }, + { + "epoch": 0.35811125290894075, + "grad_norm": 1.1191624402999878, + "learning_rate": 7.432697523526462e-06, + "loss": 0.3333, + "step": 17889 + }, + { + "epoch": 0.3581312714260691, + "grad_norm": 1.105902075767517, + "learning_rate": 7.432414293151349e-06, + "loss": 0.3172, + "step": 17890 + }, + { + "epoch": 0.35815128994319745, + "grad_norm": 1.1017125844955444, + "learning_rate": 7.432131052550891e-06, + "loss": 0.3234, + "step": 17891 + }, + { + "epoch": 0.3581713084603258, + "grad_norm": 1.0700337886810303, + "learning_rate": 7.431847801726277e-06, + "loss": 0.3046, + "step": 17892 + }, + { + "epoch": 0.35819132697745415, + "grad_norm": 1.1107103824615479, + "learning_rate": 7.4315645406786985e-06, + "loss": 0.3633, + "step": 17893 + }, + { + "epoch": 0.3582113454945825, + "grad_norm": 1.0424420833587646, + "learning_rate": 7.4312812694093465e-06, + "loss": 0.3328, + "step": 17894 + }, + { + "epoch": 0.35823136401171085, + "grad_norm": 1.7193278074264526, + "learning_rate": 7.43099798791941e-06, + "loss": 0.7941, + "step": 17895 + }, + { + "epoch": 0.3582513825288392, + "grad_norm": 1.213934302330017, + "learning_rate": 7.430714696210082e-06, + "loss": 0.2907, + "step": 17896 + }, + { + "epoch": 0.3582714010459675, + "grad_norm": 1.1677426099777222, + "learning_rate": 7.430431394282551e-06, + "loss": 0.329, + "step": 17897 + }, + { + "epoch": 0.35829141956309585, + "grad_norm": 1.8870841264724731, + "learning_rate": 7.430148082138011e-06, + "loss": 0.7684, + "step": 17898 + }, + { + "epoch": 0.3583114380802242, + "grad_norm": 1.2186992168426514, + "learning_rate": 7.429864759777651e-06, + "loss": 0.3362, + "step": 17899 + }, + { + "epoch": 0.35833145659735255, + "grad_norm": 1.1863659620285034, + "learning_rate": 7.4295814272026624e-06, + "loss": 0.3371, + "step": 17900 + }, + { + "epoch": 0.3583514751144809, + "grad_norm": 1.0857924222946167, + "learning_rate": 7.429298084414236e-06, + "loss": 0.3277, + "step": 17901 + }, + { + "epoch": 0.35837149363160925, + "grad_norm": 1.1266320943832397, + "learning_rate": 7.429014731413564e-06, + "loss": 0.3949, + "step": 17902 + }, + { + "epoch": 0.3583915121487376, + "grad_norm": 1.042630910873413, + "learning_rate": 7.428731368201836e-06, + "loss": 0.2935, + "step": 17903 + }, + { + "epoch": 0.35841153066586595, + "grad_norm": 1.1230398416519165, + "learning_rate": 7.4284479947802445e-06, + "loss": 0.3021, + "step": 17904 + }, + { + "epoch": 0.35843154918299425, + "grad_norm": 1.1716408729553223, + "learning_rate": 7.428164611149981e-06, + "loss": 0.3385, + "step": 17905 + }, + { + "epoch": 0.3584515677001226, + "grad_norm": 1.167903184890747, + "learning_rate": 7.427881217312234e-06, + "loss": 0.2623, + "step": 17906 + }, + { + "epoch": 0.35847158621725095, + "grad_norm": 1.1724830865859985, + "learning_rate": 7.427597813268199e-06, + "loss": 0.329, + "step": 17907 + }, + { + "epoch": 0.3584916047343793, + "grad_norm": 1.171569585800171, + "learning_rate": 7.427314399019064e-06, + "loss": 0.3033, + "step": 17908 + }, + { + "epoch": 0.35851162325150765, + "grad_norm": 1.0918999910354614, + "learning_rate": 7.4270309745660206e-06, + "loss": 0.2935, + "step": 17909 + }, + { + "epoch": 0.358531641768636, + "grad_norm": 1.1069893836975098, + "learning_rate": 7.426747539910263e-06, + "loss": 0.3181, + "step": 17910 + }, + { + "epoch": 0.35855166028576435, + "grad_norm": 1.1008108854293823, + "learning_rate": 7.426464095052981e-06, + "loss": 0.3358, + "step": 17911 + }, + { + "epoch": 0.3585716788028927, + "grad_norm": 1.1919505596160889, + "learning_rate": 7.426180639995365e-06, + "loss": 0.3162, + "step": 17912 + }, + { + "epoch": 0.358591697320021, + "grad_norm": 1.1466959714889526, + "learning_rate": 7.42589717473861e-06, + "loss": 0.3319, + "step": 17913 + }, + { + "epoch": 0.35861171583714935, + "grad_norm": 1.050506830215454, + "learning_rate": 7.425613699283902e-06, + "loss": 0.2961, + "step": 17914 + }, + { + "epoch": 0.3586317343542777, + "grad_norm": 1.1329728364944458, + "learning_rate": 7.425330213632438e-06, + "loss": 0.3307, + "step": 17915 + }, + { + "epoch": 0.35865175287140605, + "grad_norm": 1.0793845653533936, + "learning_rate": 7.425046717785408e-06, + "loss": 0.3411, + "step": 17916 + }, + { + "epoch": 0.3586717713885344, + "grad_norm": 1.1476821899414062, + "learning_rate": 7.4247632117440025e-06, + "loss": 0.2924, + "step": 17917 + }, + { + "epoch": 0.35869178990566275, + "grad_norm": 1.221131682395935, + "learning_rate": 7.424479695509415e-06, + "loss": 0.33, + "step": 17918 + }, + { + "epoch": 0.3587118084227911, + "grad_norm": 1.1099759340286255, + "learning_rate": 7.4241961690828356e-06, + "loss": 0.3211, + "step": 17919 + }, + { + "epoch": 0.35873182693991945, + "grad_norm": 1.1307159662246704, + "learning_rate": 7.4239126324654584e-06, + "loss": 0.3089, + "step": 17920 + }, + { + "epoch": 0.35875184545704775, + "grad_norm": 1.2384402751922607, + "learning_rate": 7.423629085658474e-06, + "loss": 0.3759, + "step": 17921 + }, + { + "epoch": 0.3587718639741761, + "grad_norm": 1.95403254032135, + "learning_rate": 7.423345528663073e-06, + "loss": 0.8549, + "step": 17922 + }, + { + "epoch": 0.35879188249130445, + "grad_norm": 1.1464273929595947, + "learning_rate": 7.42306196148045e-06, + "loss": 0.3505, + "step": 17923 + }, + { + "epoch": 0.3588119010084328, + "grad_norm": 1.1573940515518188, + "learning_rate": 7.422778384111797e-06, + "loss": 0.3104, + "step": 17924 + }, + { + "epoch": 0.35883191952556115, + "grad_norm": 1.1638610363006592, + "learning_rate": 7.422494796558304e-06, + "loss": 0.3141, + "step": 17925 + }, + { + "epoch": 0.3588519380426895, + "grad_norm": 1.2190827131271362, + "learning_rate": 7.422211198821165e-06, + "loss": 0.2893, + "step": 17926 + }, + { + "epoch": 0.35887195655981785, + "grad_norm": 0.9968129396438599, + "learning_rate": 7.4219275909015695e-06, + "loss": 0.3173, + "step": 17927 + }, + { + "epoch": 0.3588919750769462, + "grad_norm": 0.9638173580169678, + "learning_rate": 7.421643972800711e-06, + "loss": 0.3005, + "step": 17928 + }, + { + "epoch": 0.3589119935940745, + "grad_norm": 1.0312203168869019, + "learning_rate": 7.421360344519786e-06, + "loss": 0.2622, + "step": 17929 + }, + { + "epoch": 0.35893201211120285, + "grad_norm": 1.1345252990722656, + "learning_rate": 7.421076706059981e-06, + "loss": 0.3786, + "step": 17930 + }, + { + "epoch": 0.3589520306283312, + "grad_norm": 1.2136714458465576, + "learning_rate": 7.420793057422491e-06, + "loss": 0.3079, + "step": 17931 + }, + { + "epoch": 0.35897204914545955, + "grad_norm": 1.1666284799575806, + "learning_rate": 7.4205093986085076e-06, + "loss": 0.3468, + "step": 17932 + }, + { + "epoch": 0.3589920676625879, + "grad_norm": 1.1604896783828735, + "learning_rate": 7.4202257296192245e-06, + "loss": 0.2658, + "step": 17933 + }, + { + "epoch": 0.35901208617971625, + "grad_norm": 0.99080491065979, + "learning_rate": 7.419942050455833e-06, + "loss": 0.3686, + "step": 17934 + }, + { + "epoch": 0.3590321046968446, + "grad_norm": 1.062795639038086, + "learning_rate": 7.4196583611195235e-06, + "loss": 0.3167, + "step": 17935 + }, + { + "epoch": 0.35905212321397295, + "grad_norm": 1.1196759939193726, + "learning_rate": 7.419374661611493e-06, + "loss": 0.3476, + "step": 17936 + }, + { + "epoch": 0.35907214173110125, + "grad_norm": 1.158633828163147, + "learning_rate": 7.419090951932932e-06, + "loss": 0.3121, + "step": 17937 + }, + { + "epoch": 0.3590921602482296, + "grad_norm": 1.1432533264160156, + "learning_rate": 7.4188072320850345e-06, + "loss": 0.32, + "step": 17938 + }, + { + "epoch": 0.35911217876535795, + "grad_norm": 1.0157684087753296, + "learning_rate": 7.41852350206899e-06, + "loss": 0.286, + "step": 17939 + }, + { + "epoch": 0.3591321972824863, + "grad_norm": 1.0160244703292847, + "learning_rate": 7.418239761885995e-06, + "loss": 0.3015, + "step": 17940 + }, + { + "epoch": 0.35915221579961465, + "grad_norm": 1.0680632591247559, + "learning_rate": 7.417956011537237e-06, + "loss": 0.3039, + "step": 17941 + }, + { + "epoch": 0.359172234316743, + "grad_norm": 1.1595417261123657, + "learning_rate": 7.417672251023916e-06, + "loss": 0.305, + "step": 17942 + }, + { + "epoch": 0.35919225283387135, + "grad_norm": 1.159644603729248, + "learning_rate": 7.417388480347219e-06, + "loss": 0.3126, + "step": 17943 + }, + { + "epoch": 0.3592122713509997, + "grad_norm": 1.2186554670333862, + "learning_rate": 7.4171046995083425e-06, + "loss": 0.3454, + "step": 17944 + }, + { + "epoch": 0.359232289868128, + "grad_norm": 1.1026592254638672, + "learning_rate": 7.416820908508477e-06, + "loss": 0.345, + "step": 17945 + }, + { + "epoch": 0.35925230838525635, + "grad_norm": 1.0005497932434082, + "learning_rate": 7.4165371073488176e-06, + "loss": 0.2733, + "step": 17946 + }, + { + "epoch": 0.3592723269023847, + "grad_norm": 1.8657771348953247, + "learning_rate": 7.4162532960305555e-06, + "loss": 0.8102, + "step": 17947 + }, + { + "epoch": 0.35929234541951305, + "grad_norm": 1.0326846837997437, + "learning_rate": 7.4159694745548835e-06, + "loss": 0.3223, + "step": 17948 + }, + { + "epoch": 0.3593123639366414, + "grad_norm": 1.0259861946105957, + "learning_rate": 7.415685642922998e-06, + "loss": 0.3198, + "step": 17949 + }, + { + "epoch": 0.35933238245376975, + "grad_norm": 2.0508787631988525, + "learning_rate": 7.415401801136087e-06, + "loss": 0.8121, + "step": 17950 + }, + { + "epoch": 0.3593524009708981, + "grad_norm": 1.2444243431091309, + "learning_rate": 7.41511794919535e-06, + "loss": 0.3221, + "step": 17951 + }, + { + "epoch": 0.35937241948802645, + "grad_norm": 1.0380864143371582, + "learning_rate": 7.4148340871019745e-06, + "loss": 0.3352, + "step": 17952 + }, + { + "epoch": 0.35939243800515475, + "grad_norm": 1.052017092704773, + "learning_rate": 7.414550214857156e-06, + "loss": 0.3005, + "step": 17953 + }, + { + "epoch": 0.3594124565222831, + "grad_norm": 1.1757947206497192, + "learning_rate": 7.414266332462089e-06, + "loss": 0.3351, + "step": 17954 + }, + { + "epoch": 0.35943247503941145, + "grad_norm": 1.1390327215194702, + "learning_rate": 7.413982439917966e-06, + "loss": 0.2948, + "step": 17955 + }, + { + "epoch": 0.3594524935565398, + "grad_norm": 1.1745156049728394, + "learning_rate": 7.41369853722598e-06, + "loss": 0.3217, + "step": 17956 + }, + { + "epoch": 0.35947251207366815, + "grad_norm": 1.0826194286346436, + "learning_rate": 7.413414624387325e-06, + "loss": 0.2757, + "step": 17957 + }, + { + "epoch": 0.3594925305907965, + "grad_norm": 1.146053671836853, + "learning_rate": 7.413130701403195e-06, + "loss": 0.3255, + "step": 17958 + }, + { + "epoch": 0.35951254910792485, + "grad_norm": 2.1251885890960693, + "learning_rate": 7.4128467682747815e-06, + "loss": 0.7598, + "step": 17959 + }, + { + "epoch": 0.3595325676250532, + "grad_norm": 1.0709103345870972, + "learning_rate": 7.412562825003279e-06, + "loss": 0.3057, + "step": 17960 + }, + { + "epoch": 0.3595525861421815, + "grad_norm": 1.0639857053756714, + "learning_rate": 7.412278871589882e-06, + "loss": 0.3591, + "step": 17961 + }, + { + "epoch": 0.35957260465930985, + "grad_norm": 1.0876545906066895, + "learning_rate": 7.411994908035785e-06, + "loss": 0.3191, + "step": 17962 + }, + { + "epoch": 0.3595926231764382, + "grad_norm": 1.0105922222137451, + "learning_rate": 7.41171093434218e-06, + "loss": 0.3043, + "step": 17963 + }, + { + "epoch": 0.35961264169356655, + "grad_norm": 1.1674773693084717, + "learning_rate": 7.411426950510261e-06, + "loss": 0.3246, + "step": 17964 + }, + { + "epoch": 0.3596326602106949, + "grad_norm": 1.1451876163482666, + "learning_rate": 7.4111429565412215e-06, + "loss": 0.3035, + "step": 17965 + }, + { + "epoch": 0.35965267872782325, + "grad_norm": 1.8357568979263306, + "learning_rate": 7.4108589524362565e-06, + "loss": 0.7796, + "step": 17966 + }, + { + "epoch": 0.3596726972449516, + "grad_norm": 1.0582748651504517, + "learning_rate": 7.41057493819656e-06, + "loss": 0.305, + "step": 17967 + }, + { + "epoch": 0.35969271576207995, + "grad_norm": 1.3488585948944092, + "learning_rate": 7.410290913823324e-06, + "loss": 0.3042, + "step": 17968 + }, + { + "epoch": 0.35971273427920825, + "grad_norm": 1.0412625074386597, + "learning_rate": 7.4100068793177435e-06, + "loss": 0.3028, + "step": 17969 + }, + { + "epoch": 0.3597327527963366, + "grad_norm": 1.1382226943969727, + "learning_rate": 7.409722834681014e-06, + "loss": 0.3436, + "step": 17970 + }, + { + "epoch": 0.35975277131346495, + "grad_norm": 1.738281488418579, + "learning_rate": 7.409438779914327e-06, + "loss": 0.8651, + "step": 17971 + }, + { + "epoch": 0.3597727898305933, + "grad_norm": 1.1299060583114624, + "learning_rate": 7.409154715018878e-06, + "loss": 0.3028, + "step": 17972 + }, + { + "epoch": 0.35979280834772165, + "grad_norm": 1.1039760112762451, + "learning_rate": 7.408870639995862e-06, + "loss": 0.3345, + "step": 17973 + }, + { + "epoch": 0.35981282686485, + "grad_norm": 1.1853079795837402, + "learning_rate": 7.408586554846471e-06, + "loss": 0.3277, + "step": 17974 + }, + { + "epoch": 0.35983284538197835, + "grad_norm": 1.1367851495742798, + "learning_rate": 7.408302459571901e-06, + "loss": 0.3246, + "step": 17975 + }, + { + "epoch": 0.3598528638991067, + "grad_norm": 1.1383371353149414, + "learning_rate": 7.4080183541733465e-06, + "loss": 0.2987, + "step": 17976 + }, + { + "epoch": 0.359872882416235, + "grad_norm": 1.0532373189926147, + "learning_rate": 7.407734238652e-06, + "loss": 0.3299, + "step": 17977 + }, + { + "epoch": 0.35989290093336335, + "grad_norm": 1.1609550714492798, + "learning_rate": 7.407450113009058e-06, + "loss": 0.2763, + "step": 17978 + }, + { + "epoch": 0.3599129194504917, + "grad_norm": 1.188841462135315, + "learning_rate": 7.407165977245713e-06, + "loss": 0.3344, + "step": 17979 + }, + { + "epoch": 0.35993293796762005, + "grad_norm": 1.1235547065734863, + "learning_rate": 7.406881831363161e-06, + "loss": 0.3011, + "step": 17980 + }, + { + "epoch": 0.3599529564847484, + "grad_norm": 1.8837631940841675, + "learning_rate": 7.406597675362594e-06, + "loss": 0.9133, + "step": 17981 + }, + { + "epoch": 0.35997297500187675, + "grad_norm": 1.1299633979797363, + "learning_rate": 7.406313509245211e-06, + "loss": 0.33, + "step": 17982 + }, + { + "epoch": 0.3599929935190051, + "grad_norm": 1.0952495336532593, + "learning_rate": 7.406029333012201e-06, + "loss": 0.3445, + "step": 17983 + }, + { + "epoch": 0.36001301203613345, + "grad_norm": 1.135076642036438, + "learning_rate": 7.4057451466647635e-06, + "loss": 0.3079, + "step": 17984 + }, + { + "epoch": 0.36003303055326175, + "grad_norm": 1.134229302406311, + "learning_rate": 7.4054609502040906e-06, + "loss": 0.3246, + "step": 17985 + }, + { + "epoch": 0.3600530490703901, + "grad_norm": 1.8773012161254883, + "learning_rate": 7.405176743631377e-06, + "loss": 0.748, + "step": 17986 + }, + { + "epoch": 0.36007306758751845, + "grad_norm": 1.0058537721633911, + "learning_rate": 7.404892526947817e-06, + "loss": 0.2999, + "step": 17987 + }, + { + "epoch": 0.3600930861046468, + "grad_norm": 1.167962670326233, + "learning_rate": 7.404608300154608e-06, + "loss": 0.3143, + "step": 17988 + }, + { + "epoch": 0.36011310462177515, + "grad_norm": 1.136047124862671, + "learning_rate": 7.4043240632529445e-06, + "loss": 0.2955, + "step": 17989 + }, + { + "epoch": 0.3601331231389035, + "grad_norm": 1.1041136980056763, + "learning_rate": 7.404039816244018e-06, + "loss": 0.2559, + "step": 17990 + }, + { + "epoch": 0.36015314165603185, + "grad_norm": 1.0702128410339355, + "learning_rate": 7.4037555591290256e-06, + "loss": 0.3288, + "step": 17991 + }, + { + "epoch": 0.3601731601731602, + "grad_norm": 1.1004571914672852, + "learning_rate": 7.403471291909161e-06, + "loss": 0.34, + "step": 17992 + }, + { + "epoch": 0.3601931786902885, + "grad_norm": 1.1048738956451416, + "learning_rate": 7.4031870145856236e-06, + "loss": 0.2766, + "step": 17993 + }, + { + "epoch": 0.36021319720741685, + "grad_norm": 1.088767647743225, + "learning_rate": 7.402902727159602e-06, + "loss": 0.3463, + "step": 17994 + }, + { + "epoch": 0.3602332157245452, + "grad_norm": 1.0296049118041992, + "learning_rate": 7.4026184296322964e-06, + "loss": 0.2757, + "step": 17995 + }, + { + "epoch": 0.36025323424167355, + "grad_norm": 0.9353311657905579, + "learning_rate": 7.402334122004899e-06, + "loss": 0.2856, + "step": 17996 + }, + { + "epoch": 0.3602732527588019, + "grad_norm": 1.1218358278274536, + "learning_rate": 7.402049804278607e-06, + "loss": 0.3189, + "step": 17997 + }, + { + "epoch": 0.36029327127593025, + "grad_norm": 1.168957233428955, + "learning_rate": 7.401765476454613e-06, + "loss": 0.3233, + "step": 17998 + }, + { + "epoch": 0.3603132897930586, + "grad_norm": 1.1541515588760376, + "learning_rate": 7.401481138534115e-06, + "loss": 0.3435, + "step": 17999 + }, + { + "epoch": 0.36033330831018695, + "grad_norm": 1.0732518434524536, + "learning_rate": 7.401196790518307e-06, + "loss": 0.3122, + "step": 18000 + }, + { + "epoch": 0.36035332682731525, + "grad_norm": 1.2518895864486694, + "learning_rate": 7.400912432408383e-06, + "loss": 0.3323, + "step": 18001 + }, + { + "epoch": 0.3603733453444436, + "grad_norm": 2.007291316986084, + "learning_rate": 7.400628064205542e-06, + "loss": 0.7922, + "step": 18002 + }, + { + "epoch": 0.36039336386157195, + "grad_norm": 1.1215856075286865, + "learning_rate": 7.4003436859109755e-06, + "loss": 0.3078, + "step": 18003 + }, + { + "epoch": 0.3604133823787003, + "grad_norm": 1.0674563646316528, + "learning_rate": 7.400059297525881e-06, + "loss": 0.3535, + "step": 18004 + }, + { + "epoch": 0.36043340089582865, + "grad_norm": 1.2661296129226685, + "learning_rate": 7.399774899051454e-06, + "loss": 0.3751, + "step": 18005 + }, + { + "epoch": 0.360453419412957, + "grad_norm": 1.2334243059158325, + "learning_rate": 7.39949049048889e-06, + "loss": 0.3098, + "step": 18006 + }, + { + "epoch": 0.36047343793008535, + "grad_norm": 1.1533294916152954, + "learning_rate": 7.399206071839384e-06, + "loss": 0.3093, + "step": 18007 + }, + { + "epoch": 0.36049345644721364, + "grad_norm": 1.1452116966247559, + "learning_rate": 7.398921643104134e-06, + "loss": 0.3459, + "step": 18008 + }, + { + "epoch": 0.360513474964342, + "grad_norm": 0.9845949411392212, + "learning_rate": 7.398637204284332e-06, + "loss": 0.2791, + "step": 18009 + }, + { + "epoch": 0.36053349348147035, + "grad_norm": 2.2463223934173584, + "learning_rate": 7.398352755381174e-06, + "loss": 0.8307, + "step": 18010 + }, + { + "epoch": 0.3605535119985987, + "grad_norm": 1.1176910400390625, + "learning_rate": 7.398068296395858e-06, + "loss": 0.3126, + "step": 18011 + }, + { + "epoch": 0.36057353051572705, + "grad_norm": 1.1977179050445557, + "learning_rate": 7.397783827329578e-06, + "loss": 0.2803, + "step": 18012 + }, + { + "epoch": 0.3605935490328554, + "grad_norm": 1.2570048570632935, + "learning_rate": 7.397499348183535e-06, + "loss": 0.362, + "step": 18013 + }, + { + "epoch": 0.36061356754998375, + "grad_norm": 1.0965120792388916, + "learning_rate": 7.397214858958916e-06, + "loss": 0.3054, + "step": 18014 + }, + { + "epoch": 0.3606335860671121, + "grad_norm": 1.1978867053985596, + "learning_rate": 7.396930359656924e-06, + "loss": 0.3363, + "step": 18015 + }, + { + "epoch": 0.3606536045842404, + "grad_norm": 1.2526582479476929, + "learning_rate": 7.396645850278752e-06, + "loss": 0.3127, + "step": 18016 + }, + { + "epoch": 0.36067362310136875, + "grad_norm": 1.0529111623764038, + "learning_rate": 7.396361330825596e-06, + "loss": 0.3169, + "step": 18017 + }, + { + "epoch": 0.3606936416184971, + "grad_norm": 1.8142262697219849, + "learning_rate": 7.396076801298653e-06, + "loss": 0.8072, + "step": 18018 + }, + { + "epoch": 0.36071366013562545, + "grad_norm": 1.2308354377746582, + "learning_rate": 7.395792261699119e-06, + "loss": 0.299, + "step": 18019 + }, + { + "epoch": 0.3607336786527538, + "grad_norm": 1.0811638832092285, + "learning_rate": 7.3955077120281916e-06, + "loss": 0.3072, + "step": 18020 + }, + { + "epoch": 0.36075369716988215, + "grad_norm": 1.1314595937728882, + "learning_rate": 7.395223152287063e-06, + "loss": 0.3863, + "step": 18021 + }, + { + "epoch": 0.3607737156870105, + "grad_norm": 1.1190176010131836, + "learning_rate": 7.3949385824769334e-06, + "loss": 0.2451, + "step": 18022 + }, + { + "epoch": 0.36079373420413885, + "grad_norm": 1.0800434350967407, + "learning_rate": 7.394654002598996e-06, + "loss": 0.2998, + "step": 18023 + }, + { + "epoch": 0.36081375272126714, + "grad_norm": 1.2098273038864136, + "learning_rate": 7.3943694126544485e-06, + "loss": 0.3894, + "step": 18024 + }, + { + "epoch": 0.3608337712383955, + "grad_norm": 1.1052409410476685, + "learning_rate": 7.3940848126444875e-06, + "loss": 0.2823, + "step": 18025 + }, + { + "epoch": 0.36085378975552385, + "grad_norm": 1.0204846858978271, + "learning_rate": 7.393800202570309e-06, + "loss": 0.3394, + "step": 18026 + }, + { + "epoch": 0.3608738082726522, + "grad_norm": 1.0360902547836304, + "learning_rate": 7.39351558243311e-06, + "loss": 0.2838, + "step": 18027 + }, + { + "epoch": 0.36089382678978055, + "grad_norm": 1.168683648109436, + "learning_rate": 7.393230952234087e-06, + "loss": 0.3438, + "step": 18028 + }, + { + "epoch": 0.3609138453069089, + "grad_norm": 1.1350102424621582, + "learning_rate": 7.392946311974436e-06, + "loss": 0.3251, + "step": 18029 + }, + { + "epoch": 0.36093386382403725, + "grad_norm": 1.0831176042556763, + "learning_rate": 7.392661661655353e-06, + "loss": 0.2808, + "step": 18030 + }, + { + "epoch": 0.3609538823411656, + "grad_norm": 1.4059722423553467, + "learning_rate": 7.392377001278036e-06, + "loss": 0.3489, + "step": 18031 + }, + { + "epoch": 0.3609739008582939, + "grad_norm": 1.1152597665786743, + "learning_rate": 7.39209233084368e-06, + "loss": 0.3363, + "step": 18032 + }, + { + "epoch": 0.36099391937542225, + "grad_norm": 0.9841442704200745, + "learning_rate": 7.391807650353484e-06, + "loss": 0.2902, + "step": 18033 + }, + { + "epoch": 0.3610139378925506, + "grad_norm": 1.5418105125427246, + "learning_rate": 7.3915229598086414e-06, + "loss": 0.3639, + "step": 18034 + }, + { + "epoch": 0.36103395640967895, + "grad_norm": 1.0904954671859741, + "learning_rate": 7.3912382592103535e-06, + "loss": 0.3002, + "step": 18035 + }, + { + "epoch": 0.3610539749268073, + "grad_norm": 1.066812515258789, + "learning_rate": 7.390953548559811e-06, + "loss": 0.3259, + "step": 18036 + }, + { + "epoch": 0.36107399344393565, + "grad_norm": 1.1076369285583496, + "learning_rate": 7.390668827858217e-06, + "loss": 0.343, + "step": 18037 + }, + { + "epoch": 0.361094011961064, + "grad_norm": 1.1309266090393066, + "learning_rate": 7.390384097106765e-06, + "loss": 0.3543, + "step": 18038 + }, + { + "epoch": 0.36111403047819235, + "grad_norm": 1.482335090637207, + "learning_rate": 7.3900993563066534e-06, + "loss": 0.3575, + "step": 18039 + }, + { + "epoch": 0.36113404899532064, + "grad_norm": 1.058836817741394, + "learning_rate": 7.389814605459078e-06, + "loss": 0.3155, + "step": 18040 + }, + { + "epoch": 0.361154067512449, + "grad_norm": 1.1009145975112915, + "learning_rate": 7.3895298445652365e-06, + "loss": 0.3417, + "step": 18041 + }, + { + "epoch": 0.36117408602957735, + "grad_norm": 1.0882447957992554, + "learning_rate": 7.389245073626326e-06, + "loss": 0.3594, + "step": 18042 + }, + { + "epoch": 0.3611941045467057, + "grad_norm": 1.1336466073989868, + "learning_rate": 7.388960292643542e-06, + "loss": 0.3337, + "step": 18043 + }, + { + "epoch": 0.36121412306383405, + "grad_norm": 1.128285527229309, + "learning_rate": 7.388675501618083e-06, + "loss": 0.3382, + "step": 18044 + }, + { + "epoch": 0.3612341415809624, + "grad_norm": 1.896070957183838, + "learning_rate": 7.388390700551148e-06, + "loss": 0.7755, + "step": 18045 + }, + { + "epoch": 0.36125416009809075, + "grad_norm": 1.0918558835983276, + "learning_rate": 7.388105889443932e-06, + "loss": 0.2864, + "step": 18046 + }, + { + "epoch": 0.3612741786152191, + "grad_norm": 1.872260570526123, + "learning_rate": 7.387821068297632e-06, + "loss": 0.8222, + "step": 18047 + }, + { + "epoch": 0.3612941971323474, + "grad_norm": 1.153660535812378, + "learning_rate": 7.3875362371134466e-06, + "loss": 0.3699, + "step": 18048 + }, + { + "epoch": 0.36131421564947575, + "grad_norm": 1.2164685726165771, + "learning_rate": 7.3872513958925715e-06, + "loss": 0.2945, + "step": 18049 + }, + { + "epoch": 0.3613342341666041, + "grad_norm": 1.0497766733169556, + "learning_rate": 7.386966544636207e-06, + "loss": 0.3229, + "step": 18050 + }, + { + "epoch": 0.36135425268373245, + "grad_norm": 1.042724609375, + "learning_rate": 7.386681683345549e-06, + "loss": 0.3029, + "step": 18051 + }, + { + "epoch": 0.3613742712008608, + "grad_norm": 1.0064688920974731, + "learning_rate": 7.386396812021793e-06, + "loss": 0.3351, + "step": 18052 + }, + { + "epoch": 0.36139428971798915, + "grad_norm": 1.880621075630188, + "learning_rate": 7.38611193066614e-06, + "loss": 0.8479, + "step": 18053 + }, + { + "epoch": 0.3614143082351175, + "grad_norm": 1.0216898918151855, + "learning_rate": 7.385827039279785e-06, + "loss": 0.2458, + "step": 18054 + }, + { + "epoch": 0.36143432675224585, + "grad_norm": 1.1142027378082275, + "learning_rate": 7.385542137863927e-06, + "loss": 0.3326, + "step": 18055 + }, + { + "epoch": 0.36145434526937414, + "grad_norm": 1.1745271682739258, + "learning_rate": 7.385257226419763e-06, + "loss": 0.2961, + "step": 18056 + }, + { + "epoch": 0.3614743637865025, + "grad_norm": 1.0459022521972656, + "learning_rate": 7.38497230494849e-06, + "loss": 0.3205, + "step": 18057 + }, + { + "epoch": 0.36149438230363085, + "grad_norm": 1.1224418878555298, + "learning_rate": 7.384687373451307e-06, + "loss": 0.359, + "step": 18058 + }, + { + "epoch": 0.3615144008207592, + "grad_norm": 1.0459272861480713, + "learning_rate": 7.384402431929414e-06, + "loss": 0.2799, + "step": 18059 + }, + { + "epoch": 0.36153441933788755, + "grad_norm": 1.1072595119476318, + "learning_rate": 7.384117480384003e-06, + "loss": 0.3149, + "step": 18060 + }, + { + "epoch": 0.3615544378550159, + "grad_norm": 1.0770155191421509, + "learning_rate": 7.383832518816277e-06, + "loss": 0.3109, + "step": 18061 + }, + { + "epoch": 0.36157445637214425, + "grad_norm": 1.1876552104949951, + "learning_rate": 7.383547547227432e-06, + "loss": 0.3203, + "step": 18062 + }, + { + "epoch": 0.3615944748892726, + "grad_norm": 1.0702416896820068, + "learning_rate": 7.383262565618665e-06, + "loss": 0.3092, + "step": 18063 + }, + { + "epoch": 0.3616144934064009, + "grad_norm": 0.9917477965354919, + "learning_rate": 7.382977573991177e-06, + "loss": 0.3012, + "step": 18064 + }, + { + "epoch": 0.36163451192352924, + "grad_norm": 1.0583621263504028, + "learning_rate": 7.382692572346161e-06, + "loss": 0.315, + "step": 18065 + }, + { + "epoch": 0.3616545304406576, + "grad_norm": 1.113438367843628, + "learning_rate": 7.382407560684821e-06, + "loss": 0.2931, + "step": 18066 + }, + { + "epoch": 0.36167454895778595, + "grad_norm": 1.2078874111175537, + "learning_rate": 7.3821225390083515e-06, + "loss": 0.3643, + "step": 18067 + }, + { + "epoch": 0.3616945674749143, + "grad_norm": 1.2486157417297363, + "learning_rate": 7.3818375073179524e-06, + "loss": 0.3115, + "step": 18068 + }, + { + "epoch": 0.36171458599204265, + "grad_norm": 1.0987807512283325, + "learning_rate": 7.381552465614819e-06, + "loss": 0.3174, + "step": 18069 + }, + { + "epoch": 0.361734604509171, + "grad_norm": 1.8415703773498535, + "learning_rate": 7.381267413900153e-06, + "loss": 0.7774, + "step": 18070 + }, + { + "epoch": 0.36175462302629935, + "grad_norm": 1.1649483442306519, + "learning_rate": 7.380982352175151e-06, + "loss": 0.3374, + "step": 18071 + }, + { + "epoch": 0.36177464154342764, + "grad_norm": 1.3142850399017334, + "learning_rate": 7.380697280441012e-06, + "loss": 0.308, + "step": 18072 + }, + { + "epoch": 0.361794660060556, + "grad_norm": 2.158346176147461, + "learning_rate": 7.380412198698933e-06, + "loss": 0.803, + "step": 18073 + }, + { + "epoch": 0.36181467857768435, + "grad_norm": 1.0701943635940552, + "learning_rate": 7.380127106950113e-06, + "loss": 0.3255, + "step": 18074 + }, + { + "epoch": 0.3618346970948127, + "grad_norm": 1.005531907081604, + "learning_rate": 7.379842005195752e-06, + "loss": 0.3419, + "step": 18075 + }, + { + "epoch": 0.36185471561194105, + "grad_norm": 1.1108425855636597, + "learning_rate": 7.379556893437045e-06, + "loss": 0.313, + "step": 18076 + }, + { + "epoch": 0.3618747341290694, + "grad_norm": 1.093076229095459, + "learning_rate": 7.379271771675196e-06, + "loss": 0.3244, + "step": 18077 + }, + { + "epoch": 0.36189475264619775, + "grad_norm": 1.1180787086486816, + "learning_rate": 7.378986639911399e-06, + "loss": 0.3173, + "step": 18078 + }, + { + "epoch": 0.3619147711633261, + "grad_norm": 1.2040066719055176, + "learning_rate": 7.3787014981468555e-06, + "loss": 0.3444, + "step": 18079 + }, + { + "epoch": 0.3619347896804544, + "grad_norm": 1.125185251235962, + "learning_rate": 7.378416346382762e-06, + "loss": 0.3313, + "step": 18080 + }, + { + "epoch": 0.36195480819758274, + "grad_norm": 1.0378974676132202, + "learning_rate": 7.378131184620317e-06, + "loss": 0.3343, + "step": 18081 + }, + { + "epoch": 0.3619748267147111, + "grad_norm": 1.0533225536346436, + "learning_rate": 7.377846012860721e-06, + "loss": 0.3362, + "step": 18082 + }, + { + "epoch": 0.36199484523183945, + "grad_norm": 1.1015032529830933, + "learning_rate": 7.377560831105173e-06, + "loss": 0.2868, + "step": 18083 + }, + { + "epoch": 0.3620148637489678, + "grad_norm": 2.0813353061676025, + "learning_rate": 7.3772756393548694e-06, + "loss": 0.8647, + "step": 18084 + }, + { + "epoch": 0.36203488226609615, + "grad_norm": 1.077597975730896, + "learning_rate": 7.376990437611011e-06, + "loss": 0.296, + "step": 18085 + }, + { + "epoch": 0.3620549007832245, + "grad_norm": 1.373350739479065, + "learning_rate": 7.376705225874798e-06, + "loss": 0.3594, + "step": 18086 + }, + { + "epoch": 0.36207491930035285, + "grad_norm": 1.0882643461227417, + "learning_rate": 7.376420004147426e-06, + "loss": 0.3442, + "step": 18087 + }, + { + "epoch": 0.36209493781748114, + "grad_norm": 1.9996520280838013, + "learning_rate": 7.3761347724300944e-06, + "loss": 0.8046, + "step": 18088 + }, + { + "epoch": 0.3621149563346095, + "grad_norm": 1.3138631582260132, + "learning_rate": 7.375849530724005e-06, + "loss": 0.3665, + "step": 18089 + }, + { + "epoch": 0.36213497485173785, + "grad_norm": 1.366560459136963, + "learning_rate": 7.3755642790303575e-06, + "loss": 0.2922, + "step": 18090 + }, + { + "epoch": 0.3621549933688662, + "grad_norm": 1.0882773399353027, + "learning_rate": 7.375279017350346e-06, + "loss": 0.3081, + "step": 18091 + }, + { + "epoch": 0.36217501188599455, + "grad_norm": 1.114338994026184, + "learning_rate": 7.374993745685175e-06, + "loss": 0.302, + "step": 18092 + }, + { + "epoch": 0.3621950304031229, + "grad_norm": 1.1477446556091309, + "learning_rate": 7.37470846403604e-06, + "loss": 0.3386, + "step": 18093 + }, + { + "epoch": 0.36221504892025125, + "grad_norm": 1.2485779523849487, + "learning_rate": 7.374423172404141e-06, + "loss": 0.3674, + "step": 18094 + }, + { + "epoch": 0.3622350674373796, + "grad_norm": 1.1032017469406128, + "learning_rate": 7.37413787079068e-06, + "loss": 0.3168, + "step": 18095 + }, + { + "epoch": 0.3622550859545079, + "grad_norm": 1.1414546966552734, + "learning_rate": 7.373852559196852e-06, + "loss": 0.3325, + "step": 18096 + }, + { + "epoch": 0.36227510447163624, + "grad_norm": 1.085004448890686, + "learning_rate": 7.373567237623861e-06, + "loss": 0.3761, + "step": 18097 + }, + { + "epoch": 0.3622951229887646, + "grad_norm": 1.2514758110046387, + "learning_rate": 7.373281906072902e-06, + "loss": 0.4036, + "step": 18098 + }, + { + "epoch": 0.36231514150589295, + "grad_norm": 1.5145403146743774, + "learning_rate": 7.3729965645451786e-06, + "loss": 0.36, + "step": 18099 + }, + { + "epoch": 0.3623351600230213, + "grad_norm": 1.0732007026672363, + "learning_rate": 7.372711213041886e-06, + "loss": 0.3157, + "step": 18100 + }, + { + "epoch": 0.36235517854014965, + "grad_norm": 1.0604230165481567, + "learning_rate": 7.372425851564228e-06, + "loss": 0.29, + "step": 18101 + }, + { + "epoch": 0.362375197057278, + "grad_norm": 1.8416743278503418, + "learning_rate": 7.372140480113401e-06, + "loss": 0.8389, + "step": 18102 + }, + { + "epoch": 0.36239521557440635, + "grad_norm": 2.2209839820861816, + "learning_rate": 7.371855098690607e-06, + "loss": 0.769, + "step": 18103 + }, + { + "epoch": 0.36241523409153464, + "grad_norm": 1.107470989227295, + "learning_rate": 7.371569707297045e-06, + "loss": 0.2964, + "step": 18104 + }, + { + "epoch": 0.362435252608663, + "grad_norm": 1.0854852199554443, + "learning_rate": 7.371284305933913e-06, + "loss": 0.3306, + "step": 18105 + }, + { + "epoch": 0.36245527112579135, + "grad_norm": 1.0802935361862183, + "learning_rate": 7.370998894602414e-06, + "loss": 0.3069, + "step": 18106 + }, + { + "epoch": 0.3624752896429197, + "grad_norm": 1.16739821434021, + "learning_rate": 7.370713473303743e-06, + "loss": 0.3639, + "step": 18107 + }, + { + "epoch": 0.36249530816004805, + "grad_norm": 1.0111440420150757, + "learning_rate": 7.370428042039104e-06, + "loss": 0.3101, + "step": 18108 + }, + { + "epoch": 0.3625153266771764, + "grad_norm": 0.9455140829086304, + "learning_rate": 7.370142600809695e-06, + "loss": 0.2866, + "step": 18109 + }, + { + "epoch": 0.36253534519430475, + "grad_norm": 1.088233470916748, + "learning_rate": 7.369857149616719e-06, + "loss": 0.3218, + "step": 18110 + }, + { + "epoch": 0.3625553637114331, + "grad_norm": 1.0824658870697021, + "learning_rate": 7.369571688461371e-06, + "loss": 0.3215, + "step": 18111 + }, + { + "epoch": 0.3625753822285614, + "grad_norm": 1.0162369012832642, + "learning_rate": 7.369286217344855e-06, + "loss": 0.2845, + "step": 18112 + }, + { + "epoch": 0.36259540074568974, + "grad_norm": 1.1971544027328491, + "learning_rate": 7.369000736268369e-06, + "loss": 0.3177, + "step": 18113 + }, + { + "epoch": 0.3626154192628181, + "grad_norm": 1.1369774341583252, + "learning_rate": 7.368715245233113e-06, + "loss": 0.3263, + "step": 18114 + }, + { + "epoch": 0.36263543777994645, + "grad_norm": 1.1968307495117188, + "learning_rate": 7.36842974424029e-06, + "loss": 0.3163, + "step": 18115 + }, + { + "epoch": 0.3626554562970748, + "grad_norm": 1.993624210357666, + "learning_rate": 7.368144233291096e-06, + "loss": 0.8672, + "step": 18116 + }, + { + "epoch": 0.36267547481420315, + "grad_norm": 1.2156215906143188, + "learning_rate": 7.367858712386735e-06, + "loss": 0.3281, + "step": 18117 + }, + { + "epoch": 0.3626954933313315, + "grad_norm": 1.1525516510009766, + "learning_rate": 7.367573181528403e-06, + "loss": 0.3463, + "step": 18118 + }, + { + "epoch": 0.36271551184845985, + "grad_norm": 1.0892205238342285, + "learning_rate": 7.367287640717305e-06, + "loss": 0.2625, + "step": 18119 + }, + { + "epoch": 0.36273553036558814, + "grad_norm": 1.149718999862671, + "learning_rate": 7.367002089954639e-06, + "loss": 0.3476, + "step": 18120 + }, + { + "epoch": 0.3627555488827165, + "grad_norm": 1.1022107601165771, + "learning_rate": 7.3667165292416046e-06, + "loss": 0.2838, + "step": 18121 + }, + { + "epoch": 0.36277556739984484, + "grad_norm": 1.0941526889801025, + "learning_rate": 7.366430958579404e-06, + "loss": 0.3387, + "step": 18122 + }, + { + "epoch": 0.3627955859169732, + "grad_norm": 1.1095829010009766, + "learning_rate": 7.366145377969237e-06, + "loss": 0.3357, + "step": 18123 + }, + { + "epoch": 0.36281560443410155, + "grad_norm": 1.0969878435134888, + "learning_rate": 7.365859787412305e-06, + "loss": 0.2872, + "step": 18124 + }, + { + "epoch": 0.3628356229512299, + "grad_norm": 1.3006218671798706, + "learning_rate": 7.365574186909805e-06, + "loss": 0.3428, + "step": 18125 + }, + { + "epoch": 0.36285564146835825, + "grad_norm": 1.0438613891601562, + "learning_rate": 7.365288576462941e-06, + "loss": 0.3608, + "step": 18126 + }, + { + "epoch": 0.3628756599854866, + "grad_norm": 1.10908043384552, + "learning_rate": 7.365002956072914e-06, + "loss": 0.3005, + "step": 18127 + }, + { + "epoch": 0.3628956785026149, + "grad_norm": 1.188528060913086, + "learning_rate": 7.3647173257409235e-06, + "loss": 0.3261, + "step": 18128 + }, + { + "epoch": 0.36291569701974324, + "grad_norm": 1.1138668060302734, + "learning_rate": 7.364431685468169e-06, + "loss": 0.342, + "step": 18129 + }, + { + "epoch": 0.3629357155368716, + "grad_norm": 1.0972002744674683, + "learning_rate": 7.3641460352558545e-06, + "loss": 0.3158, + "step": 18130 + }, + { + "epoch": 0.36295573405399995, + "grad_norm": 1.1374599933624268, + "learning_rate": 7.363860375105176e-06, + "loss": 0.2858, + "step": 18131 + }, + { + "epoch": 0.3629757525711283, + "grad_norm": 1.2111544609069824, + "learning_rate": 7.36357470501734e-06, + "loss": 0.3606, + "step": 18132 + }, + { + "epoch": 0.36299577108825665, + "grad_norm": 1.847899317741394, + "learning_rate": 7.363289024993543e-06, + "loss": 0.7978, + "step": 18133 + }, + { + "epoch": 0.363015789605385, + "grad_norm": 1.093537449836731, + "learning_rate": 7.363003335034987e-06, + "loss": 0.3072, + "step": 18134 + }, + { + "epoch": 0.36303580812251335, + "grad_norm": 1.079006314277649, + "learning_rate": 7.362717635142875e-06, + "loss": 0.2836, + "step": 18135 + }, + { + "epoch": 0.36305582663964164, + "grad_norm": 1.1702909469604492, + "learning_rate": 7.362431925318406e-06, + "loss": 0.2945, + "step": 18136 + }, + { + "epoch": 0.36307584515677, + "grad_norm": 1.0608500242233276, + "learning_rate": 7.362146205562782e-06, + "loss": 0.3348, + "step": 18137 + }, + { + "epoch": 0.36309586367389834, + "grad_norm": 1.004477858543396, + "learning_rate": 7.361860475877203e-06, + "loss": 0.3431, + "step": 18138 + }, + { + "epoch": 0.3631158821910267, + "grad_norm": 1.227320909500122, + "learning_rate": 7.3615747362628705e-06, + "loss": 0.3423, + "step": 18139 + }, + { + "epoch": 0.36313590070815505, + "grad_norm": 1.2371701002120972, + "learning_rate": 7.361288986720987e-06, + "loss": 0.3326, + "step": 18140 + }, + { + "epoch": 0.3631559192252834, + "grad_norm": 1.2842680215835571, + "learning_rate": 7.361003227252752e-06, + "loss": 0.3266, + "step": 18141 + }, + { + "epoch": 0.36317593774241175, + "grad_norm": 1.1136776208877563, + "learning_rate": 7.360717457859367e-06, + "loss": 0.2883, + "step": 18142 + }, + { + "epoch": 0.3631959562595401, + "grad_norm": 1.1554855108261108, + "learning_rate": 7.360431678542036e-06, + "loss": 0.2938, + "step": 18143 + }, + { + "epoch": 0.3632159747766684, + "grad_norm": 1.1538218259811401, + "learning_rate": 7.360145889301957e-06, + "loss": 0.2692, + "step": 18144 + }, + { + "epoch": 0.36323599329379674, + "grad_norm": 1.1585928201675415, + "learning_rate": 7.3598600901403325e-06, + "loss": 0.3016, + "step": 18145 + }, + { + "epoch": 0.3632560118109251, + "grad_norm": 1.0957629680633545, + "learning_rate": 7.359574281058364e-06, + "loss": 0.3302, + "step": 18146 + }, + { + "epoch": 0.36327603032805345, + "grad_norm": 1.2114180326461792, + "learning_rate": 7.3592884620572525e-06, + "loss": 0.3241, + "step": 18147 + }, + { + "epoch": 0.3632960488451818, + "grad_norm": 1.0789114236831665, + "learning_rate": 7.3590026331382e-06, + "loss": 0.3046, + "step": 18148 + }, + { + "epoch": 0.36331606736231015, + "grad_norm": 1.16542387008667, + "learning_rate": 7.358716794302409e-06, + "loss": 0.293, + "step": 18149 + }, + { + "epoch": 0.3633360858794385, + "grad_norm": 1.0725622177124023, + "learning_rate": 7.35843094555108e-06, + "loss": 0.3099, + "step": 18150 + }, + { + "epoch": 0.36335610439656685, + "grad_norm": 1.213597059249878, + "learning_rate": 7.358145086885414e-06, + "loss": 0.329, + "step": 18151 + }, + { + "epoch": 0.36337612291369514, + "grad_norm": 1.7389675378799438, + "learning_rate": 7.3578592183066145e-06, + "loss": 0.8496, + "step": 18152 + }, + { + "epoch": 0.3633961414308235, + "grad_norm": 1.007638931274414, + "learning_rate": 7.357573339815881e-06, + "loss": 0.3108, + "step": 18153 + }, + { + "epoch": 0.36341615994795184, + "grad_norm": 1.0045830011367798, + "learning_rate": 7.357287451414419e-06, + "loss": 0.3166, + "step": 18154 + }, + { + "epoch": 0.3634361784650802, + "grad_norm": 1.1025863885879517, + "learning_rate": 7.357001553103427e-06, + "loss": 0.3837, + "step": 18155 + }, + { + "epoch": 0.36345619698220855, + "grad_norm": 1.22813081741333, + "learning_rate": 7.356715644884106e-06, + "loss": 0.3008, + "step": 18156 + }, + { + "epoch": 0.3634762154993369, + "grad_norm": 0.9927284121513367, + "learning_rate": 7.356429726757661e-06, + "loss": 0.3593, + "step": 18157 + }, + { + "epoch": 0.36349623401646525, + "grad_norm": 1.6068323850631714, + "learning_rate": 7.356143798725293e-06, + "loss": 0.3652, + "step": 18158 + }, + { + "epoch": 0.3635162525335936, + "grad_norm": 1.11928391456604, + "learning_rate": 7.355857860788202e-06, + "loss": 0.3308, + "step": 18159 + }, + { + "epoch": 0.3635362710507219, + "grad_norm": 1.0948253870010376, + "learning_rate": 7.355571912947591e-06, + "loss": 0.3569, + "step": 18160 + }, + { + "epoch": 0.36355628956785024, + "grad_norm": 1.1101391315460205, + "learning_rate": 7.355285955204665e-06, + "loss": 0.3732, + "step": 18161 + }, + { + "epoch": 0.3635763080849786, + "grad_norm": 1.2254064083099365, + "learning_rate": 7.354999987560621e-06, + "loss": 0.2806, + "step": 18162 + }, + { + "epoch": 0.36359632660210695, + "grad_norm": 1.1315587759017944, + "learning_rate": 7.354714010016667e-06, + "loss": 0.314, + "step": 18163 + }, + { + "epoch": 0.3636163451192353, + "grad_norm": 1.0081512928009033, + "learning_rate": 7.354428022573999e-06, + "loss": 0.2981, + "step": 18164 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 1.0428218841552734, + "learning_rate": 7.354142025233823e-06, + "loss": 0.2892, + "step": 18165 + }, + { + "epoch": 0.363656382153492, + "grad_norm": 1.1552373170852661, + "learning_rate": 7.353856017997343e-06, + "loss": 0.309, + "step": 18166 + }, + { + "epoch": 0.36367640067062035, + "grad_norm": 1.7861839532852173, + "learning_rate": 7.353570000865757e-06, + "loss": 0.8929, + "step": 18167 + }, + { + "epoch": 0.36369641918774864, + "grad_norm": 1.018799066543579, + "learning_rate": 7.353283973840269e-06, + "loss": 0.3202, + "step": 18168 + }, + { + "epoch": 0.363716437704877, + "grad_norm": 1.147413730621338, + "learning_rate": 7.352997936922082e-06, + "loss": 0.2654, + "step": 18169 + }, + { + "epoch": 0.36373645622200534, + "grad_norm": 0.8711066246032715, + "learning_rate": 7.352711890112398e-06, + "loss": 0.2756, + "step": 18170 + }, + { + "epoch": 0.3637564747391337, + "grad_norm": 1.9202042818069458, + "learning_rate": 7.35242583341242e-06, + "loss": 0.7929, + "step": 18171 + }, + { + "epoch": 0.36377649325626205, + "grad_norm": 1.7333379983901978, + "learning_rate": 7.35213976682335e-06, + "loss": 0.8604, + "step": 18172 + }, + { + "epoch": 0.3637965117733904, + "grad_norm": 1.9725593328475952, + "learning_rate": 7.351853690346389e-06, + "loss": 0.8983, + "step": 18173 + }, + { + "epoch": 0.36381653029051875, + "grad_norm": 1.0550605058670044, + "learning_rate": 7.351567603982743e-06, + "loss": 0.3435, + "step": 18174 + }, + { + "epoch": 0.3638365488076471, + "grad_norm": 1.1246364116668701, + "learning_rate": 7.351281507733612e-06, + "loss": 0.287, + "step": 18175 + }, + { + "epoch": 0.3638565673247754, + "grad_norm": 1.066636085510254, + "learning_rate": 7.350995401600201e-06, + "loss": 0.3432, + "step": 18176 + }, + { + "epoch": 0.36387658584190374, + "grad_norm": 1.781308889389038, + "learning_rate": 7.3507092855837085e-06, + "loss": 0.7241, + "step": 18177 + }, + { + "epoch": 0.3638966043590321, + "grad_norm": 1.9569400548934937, + "learning_rate": 7.350423159685342e-06, + "loss": 0.8347, + "step": 18178 + }, + { + "epoch": 0.36391662287616044, + "grad_norm": 1.1192697286605835, + "learning_rate": 7.350137023906302e-06, + "loss": 0.3528, + "step": 18179 + }, + { + "epoch": 0.3639366413932888, + "grad_norm": 1.164525032043457, + "learning_rate": 7.349850878247791e-06, + "loss": 0.3669, + "step": 18180 + }, + { + "epoch": 0.36395665991041715, + "grad_norm": 1.1946262121200562, + "learning_rate": 7.3495647227110135e-06, + "loss": 0.3613, + "step": 18181 + }, + { + "epoch": 0.3639766784275455, + "grad_norm": 1.8405075073242188, + "learning_rate": 7.3492785572971715e-06, + "loss": 0.8037, + "step": 18182 + }, + { + "epoch": 0.36399669694467385, + "grad_norm": 1.1162413358688354, + "learning_rate": 7.348992382007468e-06, + "loss": 0.3121, + "step": 18183 + }, + { + "epoch": 0.36401671546180214, + "grad_norm": 1.0500497817993164, + "learning_rate": 7.348706196843105e-06, + "loss": 0.3033, + "step": 18184 + }, + { + "epoch": 0.3640367339789305, + "grad_norm": 1.1255619525909424, + "learning_rate": 7.348420001805288e-06, + "loss": 0.3362, + "step": 18185 + }, + { + "epoch": 0.36405675249605884, + "grad_norm": 1.0817536115646362, + "learning_rate": 7.348133796895218e-06, + "loss": 0.3266, + "step": 18186 + }, + { + "epoch": 0.3640767710131872, + "grad_norm": 1.0276869535446167, + "learning_rate": 7.347847582114098e-06, + "loss": 0.3071, + "step": 18187 + }, + { + "epoch": 0.36409678953031555, + "grad_norm": 1.010373830795288, + "learning_rate": 7.347561357463134e-06, + "loss": 0.2961, + "step": 18188 + }, + { + "epoch": 0.3641168080474439, + "grad_norm": 1.0937373638153076, + "learning_rate": 7.347275122943527e-06, + "loss": 0.2804, + "step": 18189 + }, + { + "epoch": 0.36413682656457225, + "grad_norm": 1.95185124874115, + "learning_rate": 7.3469888785564784e-06, + "loss": 0.7995, + "step": 18190 + }, + { + "epoch": 0.3641568450817006, + "grad_norm": 1.0916998386383057, + "learning_rate": 7.346702624303195e-06, + "loss": 0.3211, + "step": 18191 + }, + { + "epoch": 0.3641768635988289, + "grad_norm": 1.0754003524780273, + "learning_rate": 7.346416360184879e-06, + "loss": 0.3403, + "step": 18192 + }, + { + "epoch": 0.36419688211595724, + "grad_norm": 1.1169973611831665, + "learning_rate": 7.346130086202733e-06, + "loss": 0.326, + "step": 18193 + }, + { + "epoch": 0.3642169006330856, + "grad_norm": 1.1625810861587524, + "learning_rate": 7.3458438023579615e-06, + "loss": 0.341, + "step": 18194 + }, + { + "epoch": 0.36423691915021394, + "grad_norm": 1.7261546850204468, + "learning_rate": 7.345557508651767e-06, + "loss": 0.806, + "step": 18195 + }, + { + "epoch": 0.3642569376673423, + "grad_norm": 1.0207312107086182, + "learning_rate": 7.345271205085355e-06, + "loss": 0.3107, + "step": 18196 + }, + { + "epoch": 0.36427695618447065, + "grad_norm": 1.9241557121276855, + "learning_rate": 7.344984891659925e-06, + "loss": 0.7806, + "step": 18197 + }, + { + "epoch": 0.364296974701599, + "grad_norm": 0.989601731300354, + "learning_rate": 7.344698568376685e-06, + "loss": 0.3469, + "step": 18198 + }, + { + "epoch": 0.36431699321872735, + "grad_norm": 1.0858765840530396, + "learning_rate": 7.344412235236837e-06, + "loss": 0.2776, + "step": 18199 + }, + { + "epoch": 0.36433701173585564, + "grad_norm": 1.3312413692474365, + "learning_rate": 7.344125892241584e-06, + "loss": 0.3152, + "step": 18200 + }, + { + "epoch": 0.364357030252984, + "grad_norm": 1.1142206192016602, + "learning_rate": 7.34383953939213e-06, + "loss": 0.3528, + "step": 18201 + }, + { + "epoch": 0.36437704877011234, + "grad_norm": 1.0017975568771362, + "learning_rate": 7.3435531766896795e-06, + "loss": 0.3164, + "step": 18202 + }, + { + "epoch": 0.3643970672872407, + "grad_norm": 1.172656774520874, + "learning_rate": 7.343266804135434e-06, + "loss": 0.3421, + "step": 18203 + }, + { + "epoch": 0.36441708580436905, + "grad_norm": 1.0907478332519531, + "learning_rate": 7.3429804217306e-06, + "loss": 0.3233, + "step": 18204 + }, + { + "epoch": 0.3644371043214974, + "grad_norm": 1.0397040843963623, + "learning_rate": 7.342694029476382e-06, + "loss": 0.2774, + "step": 18205 + }, + { + "epoch": 0.36445712283862575, + "grad_norm": 1.9656339883804321, + "learning_rate": 7.342407627373981e-06, + "loss": 0.8803, + "step": 18206 + }, + { + "epoch": 0.3644771413557541, + "grad_norm": 1.319569706916809, + "learning_rate": 7.342121215424603e-06, + "loss": 0.325, + "step": 18207 + }, + { + "epoch": 0.3644971598728824, + "grad_norm": 1.1920579671859741, + "learning_rate": 7.341834793629452e-06, + "loss": 0.3344, + "step": 18208 + }, + { + "epoch": 0.36451717839001074, + "grad_norm": 2.1163668632507324, + "learning_rate": 7.341548361989731e-06, + "loss": 0.8615, + "step": 18209 + }, + { + "epoch": 0.3645371969071391, + "grad_norm": 1.1298227310180664, + "learning_rate": 7.341261920506644e-06, + "loss": 0.3143, + "step": 18210 + }, + { + "epoch": 0.36455721542426744, + "grad_norm": 1.0760586261749268, + "learning_rate": 7.340975469181395e-06, + "loss": 0.3277, + "step": 18211 + }, + { + "epoch": 0.3645772339413958, + "grad_norm": 1.0732054710388184, + "learning_rate": 7.340689008015191e-06, + "loss": 0.2816, + "step": 18212 + }, + { + "epoch": 0.36459725245852415, + "grad_norm": 1.0818082094192505, + "learning_rate": 7.340402537009232e-06, + "loss": 0.2983, + "step": 18213 + }, + { + "epoch": 0.3646172709756525, + "grad_norm": 1.1368359327316284, + "learning_rate": 7.340116056164727e-06, + "loss": 0.3643, + "step": 18214 + }, + { + "epoch": 0.36463728949278085, + "grad_norm": 1.2037370204925537, + "learning_rate": 7.339829565482876e-06, + "loss": 0.3251, + "step": 18215 + }, + { + "epoch": 0.36465730800990914, + "grad_norm": 1.8168965578079224, + "learning_rate": 7.339543064964885e-06, + "loss": 0.822, + "step": 18216 + }, + { + "epoch": 0.3646773265270375, + "grad_norm": 1.2056169509887695, + "learning_rate": 7.339256554611958e-06, + "loss": 0.3544, + "step": 18217 + }, + { + "epoch": 0.36469734504416584, + "grad_norm": 1.1800177097320557, + "learning_rate": 7.338970034425301e-06, + "loss": 0.3107, + "step": 18218 + }, + { + "epoch": 0.3647173635612942, + "grad_norm": 1.9136911630630493, + "learning_rate": 7.338683504406118e-06, + "loss": 0.7906, + "step": 18219 + }, + { + "epoch": 0.36473738207842255, + "grad_norm": 1.035840392112732, + "learning_rate": 7.338396964555612e-06, + "loss": 0.3082, + "step": 18220 + }, + { + "epoch": 0.3647574005955509, + "grad_norm": 1.0674108266830444, + "learning_rate": 7.338110414874988e-06, + "loss": 0.2969, + "step": 18221 + }, + { + "epoch": 0.36477741911267925, + "grad_norm": 1.7823511362075806, + "learning_rate": 7.337823855365452e-06, + "loss": 0.7811, + "step": 18222 + }, + { + "epoch": 0.3647974376298076, + "grad_norm": 1.5317275524139404, + "learning_rate": 7.337537286028205e-06, + "loss": 0.3371, + "step": 18223 + }, + { + "epoch": 0.3648174561469359, + "grad_norm": 1.0851658582687378, + "learning_rate": 7.337250706864456e-06, + "loss": 0.3641, + "step": 18224 + }, + { + "epoch": 0.36483747466406424, + "grad_norm": 1.8663744926452637, + "learning_rate": 7.336964117875408e-06, + "loss": 0.7603, + "step": 18225 + }, + { + "epoch": 0.3648574931811926, + "grad_norm": 1.128214716911316, + "learning_rate": 7.336677519062265e-06, + "loss": 0.3458, + "step": 18226 + }, + { + "epoch": 0.36487751169832094, + "grad_norm": 1.8183156251907349, + "learning_rate": 7.336390910426235e-06, + "loss": 0.8334, + "step": 18227 + }, + { + "epoch": 0.3648975302154493, + "grad_norm": 1.1758034229278564, + "learning_rate": 7.336104291968519e-06, + "loss": 0.3278, + "step": 18228 + }, + { + "epoch": 0.36491754873257765, + "grad_norm": 1.907027006149292, + "learning_rate": 7.335817663690322e-06, + "loss": 0.7533, + "step": 18229 + }, + { + "epoch": 0.364937567249706, + "grad_norm": 1.9363256692886353, + "learning_rate": 7.3355310255928524e-06, + "loss": 0.7922, + "step": 18230 + }, + { + "epoch": 0.36495758576683435, + "grad_norm": 1.1420698165893555, + "learning_rate": 7.3352443776773104e-06, + "loss": 0.3355, + "step": 18231 + }, + { + "epoch": 0.36497760428396264, + "grad_norm": 1.9842240810394287, + "learning_rate": 7.3349577199449065e-06, + "loss": 0.8351, + "step": 18232 + }, + { + "epoch": 0.364997622801091, + "grad_norm": 1.167051911354065, + "learning_rate": 7.33467105239684e-06, + "loss": 0.3478, + "step": 18233 + }, + { + "epoch": 0.36501764131821934, + "grad_norm": 1.1448687314987183, + "learning_rate": 7.334384375034321e-06, + "loss": 0.3535, + "step": 18234 + }, + { + "epoch": 0.3650376598353477, + "grad_norm": 0.9831673502922058, + "learning_rate": 7.334097687858552e-06, + "loss": 0.2716, + "step": 18235 + }, + { + "epoch": 0.36505767835247604, + "grad_norm": 1.041253685951233, + "learning_rate": 7.333810990870737e-06, + "loss": 0.2987, + "step": 18236 + }, + { + "epoch": 0.3650776968696044, + "grad_norm": 1.189022421836853, + "learning_rate": 7.333524284072085e-06, + "loss": 0.3677, + "step": 18237 + }, + { + "epoch": 0.36509771538673275, + "grad_norm": 1.0914963483810425, + "learning_rate": 7.333237567463798e-06, + "loss": 0.3069, + "step": 18238 + }, + { + "epoch": 0.3651177339038611, + "grad_norm": 1.3613930940628052, + "learning_rate": 7.332950841047084e-06, + "loss": 0.3317, + "step": 18239 + }, + { + "epoch": 0.3651377524209894, + "grad_norm": 1.1455011367797852, + "learning_rate": 7.3326641048231436e-06, + "loss": 0.3904, + "step": 18240 + }, + { + "epoch": 0.36515777093811774, + "grad_norm": 1.1382676362991333, + "learning_rate": 7.332377358793188e-06, + "loss": 0.302, + "step": 18241 + }, + { + "epoch": 0.3651777894552461, + "grad_norm": 1.3106396198272705, + "learning_rate": 7.332090602958418e-06, + "loss": 0.3042, + "step": 18242 + }, + { + "epoch": 0.36519780797237444, + "grad_norm": 0.9858645796775818, + "learning_rate": 7.331803837320043e-06, + "loss": 0.2971, + "step": 18243 + }, + { + "epoch": 0.3652178264895028, + "grad_norm": 1.0708318948745728, + "learning_rate": 7.331517061879264e-06, + "loss": 0.3197, + "step": 18244 + }, + { + "epoch": 0.36523784500663115, + "grad_norm": 1.2347934246063232, + "learning_rate": 7.331230276637291e-06, + "loss": 0.2505, + "step": 18245 + }, + { + "epoch": 0.3652578635237595, + "grad_norm": 1.0771290063858032, + "learning_rate": 7.330943481595327e-06, + "loss": 0.3008, + "step": 18246 + }, + { + "epoch": 0.36527788204088785, + "grad_norm": 1.1028920412063599, + "learning_rate": 7.330656676754579e-06, + "loss": 0.3031, + "step": 18247 + }, + { + "epoch": 0.36529790055801614, + "grad_norm": 2.211606025695801, + "learning_rate": 7.330369862116251e-06, + "loss": 0.8208, + "step": 18248 + }, + { + "epoch": 0.3653179190751445, + "grad_norm": 1.209468126296997, + "learning_rate": 7.330083037681549e-06, + "loss": 0.3573, + "step": 18249 + }, + { + "epoch": 0.36533793759227284, + "grad_norm": 1.1086302995681763, + "learning_rate": 7.329796203451682e-06, + "loss": 0.3388, + "step": 18250 + }, + { + "epoch": 0.3653579561094012, + "grad_norm": 1.9546846151351929, + "learning_rate": 7.3295093594278495e-06, + "loss": 0.7338, + "step": 18251 + }, + { + "epoch": 0.36537797462652954, + "grad_norm": 1.0444618463516235, + "learning_rate": 7.329222505611263e-06, + "loss": 0.3282, + "step": 18252 + }, + { + "epoch": 0.3653979931436579, + "grad_norm": 1.0681350231170654, + "learning_rate": 7.328935642003125e-06, + "loss": 0.3439, + "step": 18253 + }, + { + "epoch": 0.36541801166078625, + "grad_norm": 1.1506755352020264, + "learning_rate": 7.328648768604646e-06, + "loss": 0.3391, + "step": 18254 + }, + { + "epoch": 0.3654380301779146, + "grad_norm": 1.1034390926361084, + "learning_rate": 7.3283618854170245e-06, + "loss": 0.3096, + "step": 18255 + }, + { + "epoch": 0.3654580486950429, + "grad_norm": 1.0289430618286133, + "learning_rate": 7.328074992441474e-06, + "loss": 0.2383, + "step": 18256 + }, + { + "epoch": 0.36547806721217124, + "grad_norm": 1.6533015966415405, + "learning_rate": 7.327788089679195e-06, + "loss": 0.7983, + "step": 18257 + }, + { + "epoch": 0.3654980857292996, + "grad_norm": 0.9904930591583252, + "learning_rate": 7.327501177131398e-06, + "loss": 0.2915, + "step": 18258 + }, + { + "epoch": 0.36551810424642794, + "grad_norm": 1.071422815322876, + "learning_rate": 7.327214254799285e-06, + "loss": 0.3308, + "step": 18259 + }, + { + "epoch": 0.3655381227635563, + "grad_norm": 1.245644450187683, + "learning_rate": 7.326927322684065e-06, + "loss": 0.3378, + "step": 18260 + }, + { + "epoch": 0.36555814128068465, + "grad_norm": 1.1942788362503052, + "learning_rate": 7.3266403807869425e-06, + "loss": 0.3347, + "step": 18261 + }, + { + "epoch": 0.365578159797813, + "grad_norm": 1.191911220550537, + "learning_rate": 7.326353429109125e-06, + "loss": 0.3034, + "step": 18262 + }, + { + "epoch": 0.36559817831494135, + "grad_norm": 1.042319416999817, + "learning_rate": 7.326066467651819e-06, + "loss": 0.3189, + "step": 18263 + }, + { + "epoch": 0.36561819683206964, + "grad_norm": 1.2382789850234985, + "learning_rate": 7.32577949641623e-06, + "loss": 0.3135, + "step": 18264 + }, + { + "epoch": 0.365638215349198, + "grad_norm": 1.2406232357025146, + "learning_rate": 7.325492515403564e-06, + "loss": 0.3179, + "step": 18265 + }, + { + "epoch": 0.36565823386632634, + "grad_norm": 1.1097116470336914, + "learning_rate": 7.325205524615028e-06, + "loss": 0.3338, + "step": 18266 + }, + { + "epoch": 0.3656782523834547, + "grad_norm": 1.0749591588974, + "learning_rate": 7.324918524051828e-06, + "loss": 0.3337, + "step": 18267 + }, + { + "epoch": 0.36569827090058304, + "grad_norm": 1.1525352001190186, + "learning_rate": 7.32463151371517e-06, + "loss": 0.3208, + "step": 18268 + }, + { + "epoch": 0.3657182894177114, + "grad_norm": 1.103295087814331, + "learning_rate": 7.324344493606264e-06, + "loss": 0.3592, + "step": 18269 + }, + { + "epoch": 0.36573830793483975, + "grad_norm": 1.0328240394592285, + "learning_rate": 7.324057463726313e-06, + "loss": 0.2862, + "step": 18270 + }, + { + "epoch": 0.3657583264519681, + "grad_norm": 1.1114027500152588, + "learning_rate": 7.323770424076524e-06, + "loss": 0.301, + "step": 18271 + }, + { + "epoch": 0.3657783449690964, + "grad_norm": 1.1392537355422974, + "learning_rate": 7.323483374658104e-06, + "loss": 0.3222, + "step": 18272 + }, + { + "epoch": 0.36579836348622474, + "grad_norm": 1.8730307817459106, + "learning_rate": 7.323196315472261e-06, + "loss": 0.8242, + "step": 18273 + }, + { + "epoch": 0.3658183820033531, + "grad_norm": 1.04390287399292, + "learning_rate": 7.322909246520199e-06, + "loss": 0.3128, + "step": 18274 + }, + { + "epoch": 0.36583840052048144, + "grad_norm": 1.0682886838912964, + "learning_rate": 7.3226221678031275e-06, + "loss": 0.236, + "step": 18275 + }, + { + "epoch": 0.3658584190376098, + "grad_norm": 1.1679250001907349, + "learning_rate": 7.322335079322251e-06, + "loss": 0.377, + "step": 18276 + }, + { + "epoch": 0.36587843755473815, + "grad_norm": 1.1336561441421509, + "learning_rate": 7.322047981078779e-06, + "loss": 0.3107, + "step": 18277 + }, + { + "epoch": 0.3658984560718665, + "grad_norm": 1.1381113529205322, + "learning_rate": 7.321760873073918e-06, + "loss": 0.3302, + "step": 18278 + }, + { + "epoch": 0.36591847458899485, + "grad_norm": 1.2083666324615479, + "learning_rate": 7.321473755308872e-06, + "loss": 0.3075, + "step": 18279 + }, + { + "epoch": 0.36593849310612314, + "grad_norm": 1.0371789932250977, + "learning_rate": 7.321186627784851e-06, + "loss": 0.2887, + "step": 18280 + }, + { + "epoch": 0.3659585116232515, + "grad_norm": 1.0269910097122192, + "learning_rate": 7.32089949050306e-06, + "loss": 0.2728, + "step": 18281 + }, + { + "epoch": 0.36597853014037984, + "grad_norm": 1.1522363424301147, + "learning_rate": 7.320612343464707e-06, + "loss": 0.3037, + "step": 18282 + }, + { + "epoch": 0.3659985486575082, + "grad_norm": 1.077867865562439, + "learning_rate": 7.320325186671e-06, + "loss": 0.3674, + "step": 18283 + }, + { + "epoch": 0.36601856717463654, + "grad_norm": 0.9787023663520813, + "learning_rate": 7.320038020123145e-06, + "loss": 0.2858, + "step": 18284 + }, + { + "epoch": 0.3660385856917649, + "grad_norm": 1.1079448461532593, + "learning_rate": 7.31975084382235e-06, + "loss": 0.3094, + "step": 18285 + }, + { + "epoch": 0.36605860420889325, + "grad_norm": 1.1115434169769287, + "learning_rate": 7.3194636577698206e-06, + "loss": 0.2743, + "step": 18286 + }, + { + "epoch": 0.3660786227260216, + "grad_norm": 1.1022807359695435, + "learning_rate": 7.319176461966765e-06, + "loss": 0.3092, + "step": 18287 + }, + { + "epoch": 0.3660986412431499, + "grad_norm": 1.0326857566833496, + "learning_rate": 7.31888925641439e-06, + "loss": 0.35, + "step": 18288 + }, + { + "epoch": 0.36611865976027824, + "grad_norm": 1.0857207775115967, + "learning_rate": 7.318602041113906e-06, + "loss": 0.3232, + "step": 18289 + }, + { + "epoch": 0.3661386782774066, + "grad_norm": 1.0852752923965454, + "learning_rate": 7.318314816066518e-06, + "loss": 0.3045, + "step": 18290 + }, + { + "epoch": 0.36615869679453494, + "grad_norm": 1.0379592180252075, + "learning_rate": 7.31802758127343e-06, + "loss": 0.2936, + "step": 18291 + }, + { + "epoch": 0.3661787153116633, + "grad_norm": 1.1239007711410522, + "learning_rate": 7.3177403367358565e-06, + "loss": 0.3111, + "step": 18292 + }, + { + "epoch": 0.36619873382879164, + "grad_norm": 1.966953992843628, + "learning_rate": 7.3174530824549995e-06, + "loss": 0.7848, + "step": 18293 + }, + { + "epoch": 0.36621875234592, + "grad_norm": 1.1642671823501587, + "learning_rate": 7.3171658184320684e-06, + "loss": 0.3435, + "step": 18294 + }, + { + "epoch": 0.36623877086304835, + "grad_norm": 1.0273844003677368, + "learning_rate": 7.316878544668271e-06, + "loss": 0.3015, + "step": 18295 + }, + { + "epoch": 0.36625878938017664, + "grad_norm": 1.05947744846344, + "learning_rate": 7.316591261164816e-06, + "loss": 0.319, + "step": 18296 + }, + { + "epoch": 0.366278807897305, + "grad_norm": 1.9662262201309204, + "learning_rate": 7.316303967922909e-06, + "loss": 0.7556, + "step": 18297 + }, + { + "epoch": 0.36629882641443334, + "grad_norm": 1.156232476234436, + "learning_rate": 7.316016664943759e-06, + "loss": 0.2983, + "step": 18298 + }, + { + "epoch": 0.3663188449315617, + "grad_norm": 1.0533264875411987, + "learning_rate": 7.3157293522285725e-06, + "loss": 0.3205, + "step": 18299 + }, + { + "epoch": 0.36633886344869004, + "grad_norm": 1.2139015197753906, + "learning_rate": 7.315442029778558e-06, + "loss": 0.3415, + "step": 18300 + }, + { + "epoch": 0.3663588819658184, + "grad_norm": 1.105224847793579, + "learning_rate": 7.315154697594925e-06, + "loss": 0.3095, + "step": 18301 + }, + { + "epoch": 0.36637890048294675, + "grad_norm": 0.990619957447052, + "learning_rate": 7.314867355678879e-06, + "loss": 0.3148, + "step": 18302 + }, + { + "epoch": 0.3663989190000751, + "grad_norm": 1.1279438734054565, + "learning_rate": 7.314580004031628e-06, + "loss": 0.3333, + "step": 18303 + }, + { + "epoch": 0.3664189375172034, + "grad_norm": 1.0487195253372192, + "learning_rate": 7.314292642654381e-06, + "loss": 0.2925, + "step": 18304 + }, + { + "epoch": 0.36643895603433174, + "grad_norm": 1.011211633682251, + "learning_rate": 7.314005271548348e-06, + "loss": 0.259, + "step": 18305 + }, + { + "epoch": 0.3664589745514601, + "grad_norm": 1.1436468362808228, + "learning_rate": 7.3137178907147334e-06, + "loss": 0.2863, + "step": 18306 + }, + { + "epoch": 0.36647899306858844, + "grad_norm": 1.185302734375, + "learning_rate": 7.3134305001547455e-06, + "loss": 0.3455, + "step": 18307 + }, + { + "epoch": 0.3664990115857168, + "grad_norm": 1.0778552293777466, + "learning_rate": 7.313143099869594e-06, + "loss": 0.2937, + "step": 18308 + }, + { + "epoch": 0.36651903010284514, + "grad_norm": 1.0185315608978271, + "learning_rate": 7.312855689860488e-06, + "loss": 0.3087, + "step": 18309 + }, + { + "epoch": 0.3665390486199735, + "grad_norm": 1.1534578800201416, + "learning_rate": 7.312568270128634e-06, + "loss": 0.3613, + "step": 18310 + }, + { + "epoch": 0.36655906713710185, + "grad_norm": 0.99592125415802, + "learning_rate": 7.312280840675241e-06, + "loss": 0.3161, + "step": 18311 + }, + { + "epoch": 0.36657908565423014, + "grad_norm": 0.9937283396720886, + "learning_rate": 7.311993401501516e-06, + "loss": 0.2983, + "step": 18312 + }, + { + "epoch": 0.3665991041713585, + "grad_norm": 1.0474454164505005, + "learning_rate": 7.311705952608668e-06, + "loss": 0.2871, + "step": 18313 + }, + { + "epoch": 0.36661912268848684, + "grad_norm": 1.3474304676055908, + "learning_rate": 7.311418493997906e-06, + "loss": 0.3562, + "step": 18314 + }, + { + "epoch": 0.3666391412056152, + "grad_norm": 1.1215709447860718, + "learning_rate": 7.311131025670438e-06, + "loss": 0.3487, + "step": 18315 + }, + { + "epoch": 0.36665915972274354, + "grad_norm": 1.9948458671569824, + "learning_rate": 7.3108435476274744e-06, + "loss": 0.8128, + "step": 18316 + }, + { + "epoch": 0.3666791782398719, + "grad_norm": 1.7638334035873413, + "learning_rate": 7.3105560598702195e-06, + "loss": 0.8341, + "step": 18317 + }, + { + "epoch": 0.36669919675700025, + "grad_norm": 0.9195970892906189, + "learning_rate": 7.3102685623998845e-06, + "loss": 0.2895, + "step": 18318 + }, + { + "epoch": 0.3667192152741286, + "grad_norm": 1.138990044593811, + "learning_rate": 7.309981055217679e-06, + "loss": 0.3487, + "step": 18319 + }, + { + "epoch": 0.3667392337912569, + "grad_norm": 1.2186188697814941, + "learning_rate": 7.309693538324809e-06, + "loss": 0.3045, + "step": 18320 + }, + { + "epoch": 0.36675925230838524, + "grad_norm": 1.1081759929656982, + "learning_rate": 7.309406011722484e-06, + "loss": 0.2792, + "step": 18321 + }, + { + "epoch": 0.3667792708255136, + "grad_norm": 1.159381628036499, + "learning_rate": 7.309118475411915e-06, + "loss": 0.3544, + "step": 18322 + }, + { + "epoch": 0.36679928934264194, + "grad_norm": 1.8860071897506714, + "learning_rate": 7.308830929394308e-06, + "loss": 0.7435, + "step": 18323 + }, + { + "epoch": 0.3668193078597703, + "grad_norm": 0.9948919415473938, + "learning_rate": 7.308543373670872e-06, + "loss": 0.2771, + "step": 18324 + }, + { + "epoch": 0.36683932637689864, + "grad_norm": 1.0082863569259644, + "learning_rate": 7.308255808242817e-06, + "loss": 0.334, + "step": 18325 + }, + { + "epoch": 0.366859344894027, + "grad_norm": 1.895809292793274, + "learning_rate": 7.30796823311135e-06, + "loss": 0.8122, + "step": 18326 + }, + { + "epoch": 0.36687936341115535, + "grad_norm": 1.084617018699646, + "learning_rate": 7.307680648277683e-06, + "loss": 0.3418, + "step": 18327 + }, + { + "epoch": 0.36689938192828364, + "grad_norm": 1.2522534132003784, + "learning_rate": 7.307393053743023e-06, + "loss": 0.3373, + "step": 18328 + }, + { + "epoch": 0.366919400445412, + "grad_norm": 1.1730865240097046, + "learning_rate": 7.307105449508579e-06, + "loss": 0.3328, + "step": 18329 + }, + { + "epoch": 0.36693941896254034, + "grad_norm": 1.1805436611175537, + "learning_rate": 7.306817835575559e-06, + "loss": 0.3529, + "step": 18330 + }, + { + "epoch": 0.3669594374796687, + "grad_norm": 1.2363563776016235, + "learning_rate": 7.306530211945175e-06, + "loss": 0.3075, + "step": 18331 + }, + { + "epoch": 0.36697945599679704, + "grad_norm": 1.1025853157043457, + "learning_rate": 7.306242578618633e-06, + "loss": 0.3057, + "step": 18332 + }, + { + "epoch": 0.3669994745139254, + "grad_norm": 1.1703296899795532, + "learning_rate": 7.3059549355971435e-06, + "loss": 0.2517, + "step": 18333 + }, + { + "epoch": 0.36701949303105375, + "grad_norm": 1.1775457859039307, + "learning_rate": 7.305667282881918e-06, + "loss": 0.3046, + "step": 18334 + }, + { + "epoch": 0.3670395115481821, + "grad_norm": 1.050197958946228, + "learning_rate": 7.30537962047416e-06, + "loss": 0.3227, + "step": 18335 + }, + { + "epoch": 0.3670595300653104, + "grad_norm": 1.141899824142456, + "learning_rate": 7.3050919483750846e-06, + "loss": 0.3324, + "step": 18336 + }, + { + "epoch": 0.36707954858243874, + "grad_norm": 1.0859248638153076, + "learning_rate": 7.304804266585898e-06, + "loss": 0.3173, + "step": 18337 + }, + { + "epoch": 0.3670995670995671, + "grad_norm": 1.2002936601638794, + "learning_rate": 7.304516575107809e-06, + "loss": 0.3038, + "step": 18338 + }, + { + "epoch": 0.36711958561669544, + "grad_norm": 1.2031561136245728, + "learning_rate": 7.30422887394203e-06, + "loss": 0.3569, + "step": 18339 + }, + { + "epoch": 0.3671396041338238, + "grad_norm": 1.195163369178772, + "learning_rate": 7.3039411630897685e-06, + "loss": 0.3175, + "step": 18340 + }, + { + "epoch": 0.36715962265095214, + "grad_norm": 1.184334397315979, + "learning_rate": 7.3036534425522345e-06, + "loss": 0.3417, + "step": 18341 + }, + { + "epoch": 0.3671796411680805, + "grad_norm": 1.072205662727356, + "learning_rate": 7.303365712330637e-06, + "loss": 0.3125, + "step": 18342 + }, + { + "epoch": 0.36719965968520885, + "grad_norm": 1.2521361112594604, + "learning_rate": 7.303077972426184e-06, + "loss": 0.3445, + "step": 18343 + }, + { + "epoch": 0.36721967820233714, + "grad_norm": 0.9995176792144775, + "learning_rate": 7.302790222840088e-06, + "loss": 0.2805, + "step": 18344 + }, + { + "epoch": 0.3672396967194655, + "grad_norm": 1.057529330253601, + "learning_rate": 7.3025024635735575e-06, + "loss": 0.2894, + "step": 18345 + }, + { + "epoch": 0.36725971523659384, + "grad_norm": 1.9326661825180054, + "learning_rate": 7.3022146946278025e-06, + "loss": 0.7888, + "step": 18346 + }, + { + "epoch": 0.3672797337537222, + "grad_norm": 0.9701617360115051, + "learning_rate": 7.301926916004032e-06, + "loss": 0.2948, + "step": 18347 + }, + { + "epoch": 0.36729975227085054, + "grad_norm": 1.016114354133606, + "learning_rate": 7.301639127703456e-06, + "loss": 0.3335, + "step": 18348 + }, + { + "epoch": 0.3673197707879789, + "grad_norm": 1.1043328046798706, + "learning_rate": 7.301351329727285e-06, + "loss": 0.324, + "step": 18349 + }, + { + "epoch": 0.36733978930510724, + "grad_norm": 1.0053050518035889, + "learning_rate": 7.3010635220767275e-06, + "loss": 0.2519, + "step": 18350 + }, + { + "epoch": 0.3673598078222356, + "grad_norm": 1.0892244577407837, + "learning_rate": 7.300775704752995e-06, + "loss": 0.3162, + "step": 18351 + }, + { + "epoch": 0.3673798263393639, + "grad_norm": 1.128010869026184, + "learning_rate": 7.3004878777572955e-06, + "loss": 0.4091, + "step": 18352 + }, + { + "epoch": 0.36739984485649224, + "grad_norm": 1.09913170337677, + "learning_rate": 7.300200041090841e-06, + "loss": 0.3156, + "step": 18353 + }, + { + "epoch": 0.3674198633736206, + "grad_norm": 0.9391640424728394, + "learning_rate": 7.299912194754841e-06, + "loss": 0.2868, + "step": 18354 + }, + { + "epoch": 0.36743988189074894, + "grad_norm": 1.1166050434112549, + "learning_rate": 7.299624338750504e-06, + "loss": 0.3037, + "step": 18355 + }, + { + "epoch": 0.3674599004078773, + "grad_norm": 0.9610292315483093, + "learning_rate": 7.299336473079042e-06, + "loss": 0.3173, + "step": 18356 + }, + { + "epoch": 0.36747991892500564, + "grad_norm": 1.1434698104858398, + "learning_rate": 7.299048597741663e-06, + "loss": 0.3139, + "step": 18357 + }, + { + "epoch": 0.367499937442134, + "grad_norm": 1.1323901414871216, + "learning_rate": 7.2987607127395785e-06, + "loss": 0.3075, + "step": 18358 + }, + { + "epoch": 0.3675199559592623, + "grad_norm": 1.1086475849151611, + "learning_rate": 7.298472818073999e-06, + "loss": 0.2984, + "step": 18359 + }, + { + "epoch": 0.36753997447639064, + "grad_norm": 1.144726037979126, + "learning_rate": 7.298184913746136e-06, + "loss": 0.3279, + "step": 18360 + }, + { + "epoch": 0.367559992993519, + "grad_norm": 1.1554278135299683, + "learning_rate": 7.2978969997571965e-06, + "loss": 0.3721, + "step": 18361 + }, + { + "epoch": 0.36758001151064734, + "grad_norm": 1.2393112182617188, + "learning_rate": 7.297609076108394e-06, + "loss": 0.339, + "step": 18362 + }, + { + "epoch": 0.3676000300277757, + "grad_norm": 1.0245290994644165, + "learning_rate": 7.297321142800935e-06, + "loss": 0.2767, + "step": 18363 + }, + { + "epoch": 0.36762004854490404, + "grad_norm": 1.2576286792755127, + "learning_rate": 7.297033199836033e-06, + "loss": 0.3294, + "step": 18364 + }, + { + "epoch": 0.3676400670620324, + "grad_norm": 1.1834981441497803, + "learning_rate": 7.2967452472149e-06, + "loss": 0.2947, + "step": 18365 + }, + { + "epoch": 0.36766008557916074, + "grad_norm": 1.1572917699813843, + "learning_rate": 7.296457284938741e-06, + "loss": 0.3405, + "step": 18366 + }, + { + "epoch": 0.36768010409628904, + "grad_norm": 1.1383161544799805, + "learning_rate": 7.296169313008773e-06, + "loss": 0.3127, + "step": 18367 + }, + { + "epoch": 0.3677001226134174, + "grad_norm": 1.1073428392410278, + "learning_rate": 7.295881331426201e-06, + "loss": 0.3139, + "step": 18368 + }, + { + "epoch": 0.36772014113054574, + "grad_norm": 1.0397329330444336, + "learning_rate": 7.295593340192239e-06, + "loss": 0.3216, + "step": 18369 + }, + { + "epoch": 0.3677401596476741, + "grad_norm": 1.1756939888000488, + "learning_rate": 7.295305339308097e-06, + "loss": 0.342, + "step": 18370 + }, + { + "epoch": 0.36776017816480244, + "grad_norm": 1.1385010480880737, + "learning_rate": 7.295017328774984e-06, + "loss": 0.3229, + "step": 18371 + }, + { + "epoch": 0.3677801966819308, + "grad_norm": 1.1440623998641968, + "learning_rate": 7.294729308594113e-06, + "loss": 0.3275, + "step": 18372 + }, + { + "epoch": 0.36780021519905914, + "grad_norm": 1.1142659187316895, + "learning_rate": 7.294441278766693e-06, + "loss": 0.3152, + "step": 18373 + }, + { + "epoch": 0.3678202337161875, + "grad_norm": 1.107266902923584, + "learning_rate": 7.294153239293937e-06, + "loss": 0.2631, + "step": 18374 + }, + { + "epoch": 0.3678402522333158, + "grad_norm": 1.1210112571716309, + "learning_rate": 7.293865190177054e-06, + "loss": 0.3507, + "step": 18375 + }, + { + "epoch": 0.36786027075044414, + "grad_norm": 1.3870799541473389, + "learning_rate": 7.2935771314172544e-06, + "loss": 0.3768, + "step": 18376 + }, + { + "epoch": 0.3678802892675725, + "grad_norm": 1.1039128303527832, + "learning_rate": 7.2932890630157505e-06, + "loss": 0.3533, + "step": 18377 + }, + { + "epoch": 0.36790030778470084, + "grad_norm": 1.060862421989441, + "learning_rate": 7.293000984973754e-06, + "loss": 0.3143, + "step": 18378 + }, + { + "epoch": 0.3679203263018292, + "grad_norm": 1.134964942932129, + "learning_rate": 7.292712897292473e-06, + "loss": 0.3223, + "step": 18379 + }, + { + "epoch": 0.36794034481895754, + "grad_norm": 1.8208742141723633, + "learning_rate": 7.292424799973122e-06, + "loss": 0.8662, + "step": 18380 + }, + { + "epoch": 0.3679603633360859, + "grad_norm": 1.94281005859375, + "learning_rate": 7.292136693016909e-06, + "loss": 0.7652, + "step": 18381 + }, + { + "epoch": 0.36798038185321424, + "grad_norm": 1.1214202642440796, + "learning_rate": 7.291848576425049e-06, + "loss": 0.3059, + "step": 18382 + }, + { + "epoch": 0.36800040037034254, + "grad_norm": 1.2657607793807983, + "learning_rate": 7.291560450198748e-06, + "loss": 0.3174, + "step": 18383 + }, + { + "epoch": 0.3680204188874709, + "grad_norm": 1.2701655626296997, + "learning_rate": 7.2912723143392216e-06, + "loss": 0.3028, + "step": 18384 + }, + { + "epoch": 0.36804043740459924, + "grad_norm": 1.143440842628479, + "learning_rate": 7.290984168847679e-06, + "loss": 0.3061, + "step": 18385 + }, + { + "epoch": 0.3680604559217276, + "grad_norm": 1.0607339143753052, + "learning_rate": 7.290696013725332e-06, + "loss": 0.3089, + "step": 18386 + }, + { + "epoch": 0.36808047443885594, + "grad_norm": 1.8762918710708618, + "learning_rate": 7.2904078489733935e-06, + "loss": 0.7842, + "step": 18387 + }, + { + "epoch": 0.3681004929559843, + "grad_norm": 1.023870587348938, + "learning_rate": 7.29011967459307e-06, + "loss": 0.3085, + "step": 18388 + }, + { + "epoch": 0.36812051147311264, + "grad_norm": 1.1221050024032593, + "learning_rate": 7.289831490585578e-06, + "loss": 0.3258, + "step": 18389 + }, + { + "epoch": 0.368140529990241, + "grad_norm": 1.02582585811615, + "learning_rate": 7.2895432969521275e-06, + "loss": 0.2901, + "step": 18390 + }, + { + "epoch": 0.3681605485073693, + "grad_norm": 1.1679368019104004, + "learning_rate": 7.28925509369393e-06, + "loss": 0.3574, + "step": 18391 + }, + { + "epoch": 0.36818056702449764, + "grad_norm": 1.2364311218261719, + "learning_rate": 7.288966880812196e-06, + "loss": 0.304, + "step": 18392 + }, + { + "epoch": 0.368200585541626, + "grad_norm": 1.0199358463287354, + "learning_rate": 7.288678658308138e-06, + "loss": 0.3075, + "step": 18393 + }, + { + "epoch": 0.36822060405875434, + "grad_norm": 1.215291142463684, + "learning_rate": 7.288390426182968e-06, + "loss": 0.3341, + "step": 18394 + }, + { + "epoch": 0.3682406225758827, + "grad_norm": 1.1540732383728027, + "learning_rate": 7.288102184437897e-06, + "loss": 0.3202, + "step": 18395 + }, + { + "epoch": 0.36826064109301104, + "grad_norm": 1.0683144330978394, + "learning_rate": 7.287813933074136e-06, + "loss": 0.3327, + "step": 18396 + }, + { + "epoch": 0.3682806596101394, + "grad_norm": 1.1586514711380005, + "learning_rate": 7.287525672092897e-06, + "loss": 0.3564, + "step": 18397 + }, + { + "epoch": 0.36830067812726774, + "grad_norm": 1.0178229808807373, + "learning_rate": 7.287237401495394e-06, + "loss": 0.287, + "step": 18398 + }, + { + "epoch": 0.36832069664439604, + "grad_norm": 1.0938314199447632, + "learning_rate": 7.286949121282836e-06, + "loss": 0.3332, + "step": 18399 + }, + { + "epoch": 0.3683407151615244, + "grad_norm": 1.2411627769470215, + "learning_rate": 7.286660831456438e-06, + "loss": 0.2999, + "step": 18400 + }, + { + "epoch": 0.36836073367865274, + "grad_norm": 1.0689524412155151, + "learning_rate": 7.2863725320174075e-06, + "loss": 0.3004, + "step": 18401 + }, + { + "epoch": 0.3683807521957811, + "grad_norm": 1.0713484287261963, + "learning_rate": 7.286084222966959e-06, + "loss": 0.3395, + "step": 18402 + }, + { + "epoch": 0.36840077071290944, + "grad_norm": 1.069909930229187, + "learning_rate": 7.285795904306305e-06, + "loss": 0.2994, + "step": 18403 + }, + { + "epoch": 0.3684207892300378, + "grad_norm": 1.1859893798828125, + "learning_rate": 7.285507576036659e-06, + "loss": 0.303, + "step": 18404 + }, + { + "epoch": 0.36844080774716614, + "grad_norm": 1.164953351020813, + "learning_rate": 7.285219238159229e-06, + "loss": 0.333, + "step": 18405 + }, + { + "epoch": 0.3684608262642945, + "grad_norm": 1.1439310312271118, + "learning_rate": 7.284930890675229e-06, + "loss": 0.3251, + "step": 18406 + }, + { + "epoch": 0.3684808447814228, + "grad_norm": 1.1096386909484863, + "learning_rate": 7.284642533585873e-06, + "loss": 0.3192, + "step": 18407 + }, + { + "epoch": 0.36850086329855114, + "grad_norm": 1.1030420064926147, + "learning_rate": 7.284354166892369e-06, + "loss": 0.3347, + "step": 18408 + }, + { + "epoch": 0.3685208818156795, + "grad_norm": 1.0941110849380493, + "learning_rate": 7.284065790595933e-06, + "loss": 0.296, + "step": 18409 + }, + { + "epoch": 0.36854090033280784, + "grad_norm": 1.02298903465271, + "learning_rate": 7.283777404697775e-06, + "loss": 0.301, + "step": 18410 + }, + { + "epoch": 0.3685609188499362, + "grad_norm": 1.1616904735565186, + "learning_rate": 7.2834890091991096e-06, + "loss": 0.3066, + "step": 18411 + }, + { + "epoch": 0.36858093736706454, + "grad_norm": 1.104145884513855, + "learning_rate": 7.283200604101146e-06, + "loss": 0.3107, + "step": 18412 + }, + { + "epoch": 0.3686009558841929, + "grad_norm": 1.1311358213424683, + "learning_rate": 7.2829121894051005e-06, + "loss": 0.3532, + "step": 18413 + }, + { + "epoch": 0.36862097440132124, + "grad_norm": 1.0004138946533203, + "learning_rate": 7.282623765112182e-06, + "loss": 0.2676, + "step": 18414 + }, + { + "epoch": 0.36864099291844954, + "grad_norm": 1.0490247011184692, + "learning_rate": 7.282335331223606e-06, + "loss": 0.334, + "step": 18415 + }, + { + "epoch": 0.3686610114355779, + "grad_norm": 1.0188374519348145, + "learning_rate": 7.282046887740582e-06, + "loss": 0.2825, + "step": 18416 + }, + { + "epoch": 0.36868102995270624, + "grad_norm": 1.2077099084854126, + "learning_rate": 7.281758434664325e-06, + "loss": 0.3049, + "step": 18417 + }, + { + "epoch": 0.3687010484698346, + "grad_norm": 1.0520771741867065, + "learning_rate": 7.281469971996046e-06, + "loss": 0.3122, + "step": 18418 + }, + { + "epoch": 0.36872106698696294, + "grad_norm": 1.0646038055419922, + "learning_rate": 7.281181499736958e-06, + "loss": 0.3146, + "step": 18419 + }, + { + "epoch": 0.3687410855040913, + "grad_norm": 1.099888563156128, + "learning_rate": 7.2808930178882746e-06, + "loss": 0.3059, + "step": 18420 + }, + { + "epoch": 0.36876110402121964, + "grad_norm": 1.106953740119934, + "learning_rate": 7.280604526451207e-06, + "loss": 0.3199, + "step": 18421 + }, + { + "epoch": 0.368781122538348, + "grad_norm": 1.1800698041915894, + "learning_rate": 7.280316025426968e-06, + "loss": 0.375, + "step": 18422 + }, + { + "epoch": 0.3688011410554763, + "grad_norm": 1.1169350147247314, + "learning_rate": 7.280027514816772e-06, + "loss": 0.3456, + "step": 18423 + }, + { + "epoch": 0.36882115957260464, + "grad_norm": 1.0698435306549072, + "learning_rate": 7.279738994621832e-06, + "loss": 0.2962, + "step": 18424 + }, + { + "epoch": 0.368841178089733, + "grad_norm": 1.772215485572815, + "learning_rate": 7.279450464843359e-06, + "loss": 0.8419, + "step": 18425 + }, + { + "epoch": 0.36886119660686134, + "grad_norm": 1.096631407737732, + "learning_rate": 7.279161925482568e-06, + "loss": 0.3044, + "step": 18426 + }, + { + "epoch": 0.3688812151239897, + "grad_norm": 1.0981040000915527, + "learning_rate": 7.2788733765406695e-06, + "loss": 0.313, + "step": 18427 + }, + { + "epoch": 0.36890123364111804, + "grad_norm": 1.8750494718551636, + "learning_rate": 7.278584818018877e-06, + "loss": 0.7802, + "step": 18428 + }, + { + "epoch": 0.3689212521582464, + "grad_norm": 1.040302038192749, + "learning_rate": 7.278296249918405e-06, + "loss": 0.3244, + "step": 18429 + }, + { + "epoch": 0.36894127067537474, + "grad_norm": 1.112635850906372, + "learning_rate": 7.278007672240465e-06, + "loss": 0.2918, + "step": 18430 + }, + { + "epoch": 0.36896128919250304, + "grad_norm": 1.0991531610488892, + "learning_rate": 7.277719084986273e-06, + "loss": 0.2932, + "step": 18431 + }, + { + "epoch": 0.3689813077096314, + "grad_norm": 1.8482842445373535, + "learning_rate": 7.277430488157039e-06, + "loss": 0.7964, + "step": 18432 + }, + { + "epoch": 0.36900132622675974, + "grad_norm": 1.05875563621521, + "learning_rate": 7.277141881753977e-06, + "loss": 0.2776, + "step": 18433 + }, + { + "epoch": 0.3690213447438881, + "grad_norm": 1.2169337272644043, + "learning_rate": 7.276853265778301e-06, + "loss": 0.3534, + "step": 18434 + }, + { + "epoch": 0.36904136326101644, + "grad_norm": 1.1279354095458984, + "learning_rate": 7.276564640231224e-06, + "loss": 0.3318, + "step": 18435 + }, + { + "epoch": 0.3690613817781448, + "grad_norm": 1.9847731590270996, + "learning_rate": 7.276276005113959e-06, + "loss": 0.8115, + "step": 18436 + }, + { + "epoch": 0.36908140029527314, + "grad_norm": 0.9643277525901794, + "learning_rate": 7.27598736042772e-06, + "loss": 0.2787, + "step": 18437 + }, + { + "epoch": 0.3691014188124015, + "grad_norm": 1.0311932563781738, + "learning_rate": 7.2756987061737195e-06, + "loss": 0.3158, + "step": 18438 + }, + { + "epoch": 0.3691214373295298, + "grad_norm": 0.9895012378692627, + "learning_rate": 7.275410042353172e-06, + "loss": 0.2875, + "step": 18439 + }, + { + "epoch": 0.36914145584665814, + "grad_norm": 1.1443005800247192, + "learning_rate": 7.275121368967289e-06, + "loss": 0.3539, + "step": 18440 + }, + { + "epoch": 0.3691614743637865, + "grad_norm": 1.2502328157424927, + "learning_rate": 7.274832686017286e-06, + "loss": 0.3884, + "step": 18441 + }, + { + "epoch": 0.36918149288091484, + "grad_norm": 1.1978950500488281, + "learning_rate": 7.274543993504376e-06, + "loss": 0.3466, + "step": 18442 + }, + { + "epoch": 0.3692015113980432, + "grad_norm": 1.156813621520996, + "learning_rate": 7.274255291429772e-06, + "loss": 0.2665, + "step": 18443 + }, + { + "epoch": 0.36922152991517154, + "grad_norm": 1.2435193061828613, + "learning_rate": 7.273966579794689e-06, + "loss": 0.3407, + "step": 18444 + }, + { + "epoch": 0.3692415484322999, + "grad_norm": 1.058354377746582, + "learning_rate": 7.27367785860034e-06, + "loss": 0.3081, + "step": 18445 + }, + { + "epoch": 0.36926156694942824, + "grad_norm": 0.9828478097915649, + "learning_rate": 7.273389127847938e-06, + "loss": 0.3244, + "step": 18446 + }, + { + "epoch": 0.36928158546655654, + "grad_norm": 1.0605530738830566, + "learning_rate": 7.273100387538697e-06, + "loss": 0.2936, + "step": 18447 + }, + { + "epoch": 0.3693016039836849, + "grad_norm": 1.1996573209762573, + "learning_rate": 7.272811637673832e-06, + "loss": 0.3249, + "step": 18448 + }, + { + "epoch": 0.36932162250081324, + "grad_norm": 1.2197188138961792, + "learning_rate": 7.272522878254557e-06, + "loss": 0.3591, + "step": 18449 + }, + { + "epoch": 0.3693416410179416, + "grad_norm": 1.8453444242477417, + "learning_rate": 7.272234109282083e-06, + "loss": 0.8641, + "step": 18450 + }, + { + "epoch": 0.36936165953506994, + "grad_norm": 1.00059974193573, + "learning_rate": 7.271945330757626e-06, + "loss": 0.3195, + "step": 18451 + }, + { + "epoch": 0.3693816780521983, + "grad_norm": 1.1318119764328003, + "learning_rate": 7.2716565426824005e-06, + "loss": 0.2873, + "step": 18452 + }, + { + "epoch": 0.36940169656932664, + "grad_norm": 1.357417345046997, + "learning_rate": 7.27136774505762e-06, + "loss": 0.3405, + "step": 18453 + }, + { + "epoch": 0.369421715086455, + "grad_norm": 1.1168897151947021, + "learning_rate": 7.271078937884496e-06, + "loss": 0.328, + "step": 18454 + }, + { + "epoch": 0.3694417336035833, + "grad_norm": 1.143300175666809, + "learning_rate": 7.270790121164247e-06, + "loss": 0.3536, + "step": 18455 + }, + { + "epoch": 0.36946175212071164, + "grad_norm": 1.2605386972427368, + "learning_rate": 7.270501294898084e-06, + "loss": 0.315, + "step": 18456 + }, + { + "epoch": 0.36948177063784, + "grad_norm": 1.7635247707366943, + "learning_rate": 7.270212459087224e-06, + "loss": 0.8577, + "step": 18457 + }, + { + "epoch": 0.36950178915496834, + "grad_norm": 1.1163488626480103, + "learning_rate": 7.269923613732879e-06, + "loss": 0.3224, + "step": 18458 + }, + { + "epoch": 0.3695218076720967, + "grad_norm": 1.202388048171997, + "learning_rate": 7.269634758836261e-06, + "loss": 0.2943, + "step": 18459 + }, + { + "epoch": 0.36954182618922504, + "grad_norm": 1.1909536123275757, + "learning_rate": 7.26934589439859e-06, + "loss": 0.3109, + "step": 18460 + }, + { + "epoch": 0.3695618447063534, + "grad_norm": 1.1224582195281982, + "learning_rate": 7.269057020421075e-06, + "loss": 0.3021, + "step": 18461 + }, + { + "epoch": 0.36958186322348174, + "grad_norm": 1.1700772047042847, + "learning_rate": 7.268768136904934e-06, + "loss": 0.3167, + "step": 18462 + }, + { + "epoch": 0.36960188174061004, + "grad_norm": 1.0810575485229492, + "learning_rate": 7.268479243851378e-06, + "loss": 0.2997, + "step": 18463 + }, + { + "epoch": 0.3696219002577384, + "grad_norm": 1.1189460754394531, + "learning_rate": 7.268190341261626e-06, + "loss": 0.3421, + "step": 18464 + }, + { + "epoch": 0.36964191877486674, + "grad_norm": 1.3088363409042358, + "learning_rate": 7.2679014291368885e-06, + "loss": 0.3859, + "step": 18465 + }, + { + "epoch": 0.3696619372919951, + "grad_norm": 1.0541852712631226, + "learning_rate": 7.267612507478383e-06, + "loss": 0.3427, + "step": 18466 + }, + { + "epoch": 0.36968195580912344, + "grad_norm": 1.0472718477249146, + "learning_rate": 7.267323576287319e-06, + "loss": 0.3056, + "step": 18467 + }, + { + "epoch": 0.3697019743262518, + "grad_norm": 0.9455018043518066, + "learning_rate": 7.267034635564918e-06, + "loss": 0.3164, + "step": 18468 + }, + { + "epoch": 0.36972199284338014, + "grad_norm": 1.1603621244430542, + "learning_rate": 7.266745685312391e-06, + "loss": 0.3192, + "step": 18469 + }, + { + "epoch": 0.3697420113605085, + "grad_norm": 1.1522603034973145, + "learning_rate": 7.266456725530952e-06, + "loss": 0.3526, + "step": 18470 + }, + { + "epoch": 0.3697620298776368, + "grad_norm": 1.2995933294296265, + "learning_rate": 7.266167756221817e-06, + "loss": 0.3225, + "step": 18471 + }, + { + "epoch": 0.36978204839476514, + "grad_norm": 1.0541378259658813, + "learning_rate": 7.2658787773862e-06, + "loss": 0.3103, + "step": 18472 + }, + { + "epoch": 0.3698020669118935, + "grad_norm": 1.0933433771133423, + "learning_rate": 7.265589789025316e-06, + "loss": 0.3302, + "step": 18473 + }, + { + "epoch": 0.36982208542902184, + "grad_norm": 1.2328959703445435, + "learning_rate": 7.26530079114038e-06, + "loss": 0.3565, + "step": 18474 + }, + { + "epoch": 0.3698421039461502, + "grad_norm": 1.7884118556976318, + "learning_rate": 7.265011783732608e-06, + "loss": 0.8076, + "step": 18475 + }, + { + "epoch": 0.36986212246327854, + "grad_norm": 1.0663832426071167, + "learning_rate": 7.264722766803214e-06, + "loss": 0.3272, + "step": 18476 + }, + { + "epoch": 0.3698821409804069, + "grad_norm": 1.0101028680801392, + "learning_rate": 7.264433740353413e-06, + "loss": 0.304, + "step": 18477 + }, + { + "epoch": 0.36990215949753524, + "grad_norm": 1.0598499774932861, + "learning_rate": 7.264144704384419e-06, + "loss": 0.331, + "step": 18478 + }, + { + "epoch": 0.36992217801466354, + "grad_norm": 1.0179507732391357, + "learning_rate": 7.263855658897447e-06, + "loss": 0.2929, + "step": 18479 + }, + { + "epoch": 0.3699421965317919, + "grad_norm": 1.0626342296600342, + "learning_rate": 7.263566603893715e-06, + "loss": 0.3645, + "step": 18480 + }, + { + "epoch": 0.36996221504892024, + "grad_norm": 1.2603459358215332, + "learning_rate": 7.263277539374435e-06, + "loss": 0.3395, + "step": 18481 + }, + { + "epoch": 0.3699822335660486, + "grad_norm": 1.121787667274475, + "learning_rate": 7.262988465340824e-06, + "loss": 0.3321, + "step": 18482 + }, + { + "epoch": 0.37000225208317694, + "grad_norm": 1.069244146347046, + "learning_rate": 7.262699381794095e-06, + "loss": 0.3106, + "step": 18483 + }, + { + "epoch": 0.3700222706003053, + "grad_norm": 1.1214569807052612, + "learning_rate": 7.262410288735466e-06, + "loss": 0.2906, + "step": 18484 + }, + { + "epoch": 0.37004228911743364, + "grad_norm": 2.0987792015075684, + "learning_rate": 7.262121186166151e-06, + "loss": 0.8404, + "step": 18485 + }, + { + "epoch": 0.370062307634562, + "grad_norm": 1.0402069091796875, + "learning_rate": 7.261832074087365e-06, + "loss": 0.3127, + "step": 18486 + }, + { + "epoch": 0.3700823261516903, + "grad_norm": 1.0282931327819824, + "learning_rate": 7.2615429525003235e-06, + "loss": 0.2966, + "step": 18487 + }, + { + "epoch": 0.37010234466881864, + "grad_norm": 1.1656901836395264, + "learning_rate": 7.2612538214062424e-06, + "loss": 0.2868, + "step": 18488 + }, + { + "epoch": 0.370122363185947, + "grad_norm": 1.0738164186477661, + "learning_rate": 7.2609646808063375e-06, + "loss": 0.3036, + "step": 18489 + }, + { + "epoch": 0.37014238170307534, + "grad_norm": 1.0548710823059082, + "learning_rate": 7.2606755307018225e-06, + "loss": 0.3138, + "step": 18490 + }, + { + "epoch": 0.3701624002202037, + "grad_norm": 1.1581677198410034, + "learning_rate": 7.260386371093916e-06, + "loss": 0.2726, + "step": 18491 + }, + { + "epoch": 0.37018241873733204, + "grad_norm": 0.9963937401771545, + "learning_rate": 7.260097201983829e-06, + "loss": 0.3129, + "step": 18492 + }, + { + "epoch": 0.3702024372544604, + "grad_norm": 1.0218086242675781, + "learning_rate": 7.25980802337278e-06, + "loss": 0.3364, + "step": 18493 + }, + { + "epoch": 0.37022245577158874, + "grad_norm": 1.1460233926773071, + "learning_rate": 7.259518835261986e-06, + "loss": 0.3416, + "step": 18494 + }, + { + "epoch": 0.37024247428871704, + "grad_norm": 1.777039885520935, + "learning_rate": 7.259229637652661e-06, + "loss": 0.801, + "step": 18495 + }, + { + "epoch": 0.3702624928058454, + "grad_norm": 1.0619816780090332, + "learning_rate": 7.25894043054602e-06, + "loss": 0.3136, + "step": 18496 + }, + { + "epoch": 0.37028251132297374, + "grad_norm": 1.1480302810668945, + "learning_rate": 7.25865121394328e-06, + "loss": 0.3385, + "step": 18497 + }, + { + "epoch": 0.3703025298401021, + "grad_norm": 1.12679123878479, + "learning_rate": 7.258361987845656e-06, + "loss": 0.3637, + "step": 18498 + }, + { + "epoch": 0.37032254835723044, + "grad_norm": 0.98752760887146, + "learning_rate": 7.2580727522543634e-06, + "loss": 0.266, + "step": 18499 + }, + { + "epoch": 0.3703425668743588, + "grad_norm": 1.1280195713043213, + "learning_rate": 7.257783507170622e-06, + "loss": 0.3076, + "step": 18500 + }, + { + "epoch": 0.37036258539148714, + "grad_norm": 2.1785480976104736, + "learning_rate": 7.2574942525956415e-06, + "loss": 0.7431, + "step": 18501 + }, + { + "epoch": 0.3703826039086155, + "grad_norm": 1.0049796104431152, + "learning_rate": 7.257204988530642e-06, + "loss": 0.3133, + "step": 18502 + }, + { + "epoch": 0.3704026224257438, + "grad_norm": 1.1723843812942505, + "learning_rate": 7.256915714976837e-06, + "loss": 0.2961, + "step": 18503 + }, + { + "epoch": 0.37042264094287214, + "grad_norm": 0.9882611632347107, + "learning_rate": 7.256626431935447e-06, + "loss": 0.2572, + "step": 18504 + }, + { + "epoch": 0.3704426594600005, + "grad_norm": 1.2889726161956787, + "learning_rate": 7.256337139407683e-06, + "loss": 0.3039, + "step": 18505 + }, + { + "epoch": 0.37046267797712884, + "grad_norm": 1.025395154953003, + "learning_rate": 7.256047837394764e-06, + "loss": 0.3266, + "step": 18506 + }, + { + "epoch": 0.3704826964942572, + "grad_norm": 1.9114723205566406, + "learning_rate": 7.255758525897904e-06, + "loss": 0.7959, + "step": 18507 + }, + { + "epoch": 0.37050271501138554, + "grad_norm": 1.1052114963531494, + "learning_rate": 7.2554692049183225e-06, + "loss": 0.3486, + "step": 18508 + }, + { + "epoch": 0.3705227335285139, + "grad_norm": 1.0930230617523193, + "learning_rate": 7.2551798744572335e-06, + "loss": 0.3201, + "step": 18509 + }, + { + "epoch": 0.37054275204564224, + "grad_norm": 1.1952059268951416, + "learning_rate": 7.254890534515854e-06, + "loss": 0.3327, + "step": 18510 + }, + { + "epoch": 0.37056277056277054, + "grad_norm": 1.0785892009735107, + "learning_rate": 7.254601185095399e-06, + "loss": 0.2647, + "step": 18511 + }, + { + "epoch": 0.3705827890798989, + "grad_norm": 1.1934552192687988, + "learning_rate": 7.2543118261970856e-06, + "loss": 0.3754, + "step": 18512 + }, + { + "epoch": 0.37060280759702724, + "grad_norm": 1.1075598001480103, + "learning_rate": 7.254022457822132e-06, + "loss": 0.3169, + "step": 18513 + }, + { + "epoch": 0.3706228261141556, + "grad_norm": 1.9366326332092285, + "learning_rate": 7.253733079971751e-06, + "loss": 0.8479, + "step": 18514 + }, + { + "epoch": 0.37064284463128394, + "grad_norm": 1.072584629058838, + "learning_rate": 7.2534436926471615e-06, + "loss": 0.3357, + "step": 18515 + }, + { + "epoch": 0.3706628631484123, + "grad_norm": 1.083106279373169, + "learning_rate": 7.25315429584958e-06, + "loss": 0.2795, + "step": 18516 + }, + { + "epoch": 0.37068288166554064, + "grad_norm": 1.967091679573059, + "learning_rate": 7.252864889580225e-06, + "loss": 0.8063, + "step": 18517 + }, + { + "epoch": 0.370702900182669, + "grad_norm": 1.0786954164505005, + "learning_rate": 7.252575473840308e-06, + "loss": 0.2881, + "step": 18518 + }, + { + "epoch": 0.3707229186997973, + "grad_norm": 1.223592758178711, + "learning_rate": 7.252286048631049e-06, + "loss": 0.3406, + "step": 18519 + }, + { + "epoch": 0.37074293721692564, + "grad_norm": 1.2446590662002563, + "learning_rate": 7.251996613953665e-06, + "loss": 0.3571, + "step": 18520 + }, + { + "epoch": 0.370762955734054, + "grad_norm": 1.2290281057357788, + "learning_rate": 7.25170716980937e-06, + "loss": 0.3286, + "step": 18521 + }, + { + "epoch": 0.37078297425118234, + "grad_norm": 0.9871399402618408, + "learning_rate": 7.251417716199385e-06, + "loss": 0.3182, + "step": 18522 + }, + { + "epoch": 0.3708029927683107, + "grad_norm": 1.0829304456710815, + "learning_rate": 7.251128253124923e-06, + "loss": 0.3096, + "step": 18523 + }, + { + "epoch": 0.37082301128543904, + "grad_norm": 0.9784489870071411, + "learning_rate": 7.250838780587202e-06, + "loss": 0.2422, + "step": 18524 + }, + { + "epoch": 0.3708430298025674, + "grad_norm": 1.097934603691101, + "learning_rate": 7.2505492985874396e-06, + "loss": 0.3341, + "step": 18525 + }, + { + "epoch": 0.37086304831969574, + "grad_norm": 1.0962861776351929, + "learning_rate": 7.250259807126853e-06, + "loss": 0.353, + "step": 18526 + }, + { + "epoch": 0.37088306683682404, + "grad_norm": 1.1058464050292969, + "learning_rate": 7.249970306206657e-06, + "loss": 0.3135, + "step": 18527 + }, + { + "epoch": 0.3709030853539524, + "grad_norm": 1.2668524980545044, + "learning_rate": 7.249680795828071e-06, + "loss": 0.3198, + "step": 18528 + }, + { + "epoch": 0.37092310387108074, + "grad_norm": 1.1267197132110596, + "learning_rate": 7.249391275992311e-06, + "loss": 0.3291, + "step": 18529 + }, + { + "epoch": 0.3709431223882091, + "grad_norm": 1.1771730184555054, + "learning_rate": 7.249101746700595e-06, + "loss": 0.3544, + "step": 18530 + }, + { + "epoch": 0.37096314090533744, + "grad_norm": 1.0263375043869019, + "learning_rate": 7.248812207954138e-06, + "loss": 0.339, + "step": 18531 + }, + { + "epoch": 0.3709831594224658, + "grad_norm": 1.8075408935546875, + "learning_rate": 7.248522659754158e-06, + "loss": 0.8251, + "step": 18532 + }, + { + "epoch": 0.37100317793959414, + "grad_norm": 1.0283650159835815, + "learning_rate": 7.248233102101875e-06, + "loss": 0.2789, + "step": 18533 + }, + { + "epoch": 0.3710231964567225, + "grad_norm": 1.2230092287063599, + "learning_rate": 7.247943534998501e-06, + "loss": 0.3375, + "step": 18534 + }, + { + "epoch": 0.3710432149738508, + "grad_norm": 1.8988105058670044, + "learning_rate": 7.247653958445259e-06, + "loss": 0.8451, + "step": 18535 + }, + { + "epoch": 0.37106323349097914, + "grad_norm": 1.0315332412719727, + "learning_rate": 7.247364372443362e-06, + "loss": 0.3105, + "step": 18536 + }, + { + "epoch": 0.3710832520081075, + "grad_norm": 1.186339020729065, + "learning_rate": 7.2470747769940275e-06, + "loss": 0.3178, + "step": 18537 + }, + { + "epoch": 0.37110327052523584, + "grad_norm": 1.095583438873291, + "learning_rate": 7.246785172098475e-06, + "loss": 0.3636, + "step": 18538 + }, + { + "epoch": 0.3711232890423642, + "grad_norm": 1.3490180969238281, + "learning_rate": 7.246495557757922e-06, + "loss": 0.3494, + "step": 18539 + }, + { + "epoch": 0.37114330755949254, + "grad_norm": 1.0874441862106323, + "learning_rate": 7.246205933973584e-06, + "loss": 0.2914, + "step": 18540 + }, + { + "epoch": 0.3711633260766209, + "grad_norm": 1.1712956428527832, + "learning_rate": 7.245916300746681e-06, + "loss": 0.2897, + "step": 18541 + }, + { + "epoch": 0.37118334459374924, + "grad_norm": 1.1381624937057495, + "learning_rate": 7.24562665807843e-06, + "loss": 0.2867, + "step": 18542 + }, + { + "epoch": 0.37120336311087754, + "grad_norm": 1.0346760749816895, + "learning_rate": 7.245337005970046e-06, + "loss": 0.3027, + "step": 18543 + }, + { + "epoch": 0.3712233816280059, + "grad_norm": 1.080236554145813, + "learning_rate": 7.245047344422747e-06, + "loss": 0.3082, + "step": 18544 + }, + { + "epoch": 0.37124340014513424, + "grad_norm": 1.008270502090454, + "learning_rate": 7.244757673437755e-06, + "loss": 0.2796, + "step": 18545 + }, + { + "epoch": 0.3712634186622626, + "grad_norm": 1.012452244758606, + "learning_rate": 7.244467993016284e-06, + "loss": 0.2712, + "step": 18546 + }, + { + "epoch": 0.37128343717939094, + "grad_norm": 1.873136281967163, + "learning_rate": 7.244178303159551e-06, + "loss": 0.8053, + "step": 18547 + }, + { + "epoch": 0.3713034556965193, + "grad_norm": 1.1025571823120117, + "learning_rate": 7.2438886038687785e-06, + "loss": 0.323, + "step": 18548 + }, + { + "epoch": 0.37132347421364764, + "grad_norm": 1.1491061449050903, + "learning_rate": 7.243598895145178e-06, + "loss": 0.3851, + "step": 18549 + }, + { + "epoch": 0.371343492730776, + "grad_norm": 1.0804930925369263, + "learning_rate": 7.243309176989972e-06, + "loss": 0.3109, + "step": 18550 + }, + { + "epoch": 0.3713635112479043, + "grad_norm": 1.0295742750167847, + "learning_rate": 7.243019449404377e-06, + "loss": 0.2593, + "step": 18551 + }, + { + "epoch": 0.37138352976503264, + "grad_norm": 1.1343104839324951, + "learning_rate": 7.242729712389611e-06, + "loss": 0.3144, + "step": 18552 + }, + { + "epoch": 0.371403548282161, + "grad_norm": 1.3193073272705078, + "learning_rate": 7.2424399659468925e-06, + "loss": 0.3256, + "step": 18553 + }, + { + "epoch": 0.37142356679928934, + "grad_norm": 1.3278541564941406, + "learning_rate": 7.242150210077438e-06, + "loss": 0.3504, + "step": 18554 + }, + { + "epoch": 0.3714435853164177, + "grad_norm": 1.2583633661270142, + "learning_rate": 7.241860444782467e-06, + "loss": 0.3241, + "step": 18555 + }, + { + "epoch": 0.37146360383354604, + "grad_norm": 1.1024984121322632, + "learning_rate": 7.241570670063196e-06, + "loss": 0.2912, + "step": 18556 + }, + { + "epoch": 0.3714836223506744, + "grad_norm": 1.2140462398529053, + "learning_rate": 7.241280885920845e-06, + "loss": 0.3371, + "step": 18557 + }, + { + "epoch": 0.37150364086780274, + "grad_norm": 1.1559735536575317, + "learning_rate": 7.240991092356631e-06, + "loss": 0.3172, + "step": 18558 + }, + { + "epoch": 0.37152365938493104, + "grad_norm": 1.059779167175293, + "learning_rate": 7.2407012893717745e-06, + "loss": 0.2636, + "step": 18559 + }, + { + "epoch": 0.3715436779020594, + "grad_norm": 1.1090830564498901, + "learning_rate": 7.24041147696749e-06, + "loss": 0.329, + "step": 18560 + }, + { + "epoch": 0.37156369641918774, + "grad_norm": 1.288123369216919, + "learning_rate": 7.240121655144999e-06, + "loss": 0.3274, + "step": 18561 + }, + { + "epoch": 0.3715837149363161, + "grad_norm": 1.08261239528656, + "learning_rate": 7.2398318239055185e-06, + "loss": 0.3309, + "step": 18562 + }, + { + "epoch": 0.37160373345344444, + "grad_norm": 0.9740999341011047, + "learning_rate": 7.239541983250266e-06, + "loss": 0.2942, + "step": 18563 + }, + { + "epoch": 0.3716237519705728, + "grad_norm": 1.1499449014663696, + "learning_rate": 7.239252133180463e-06, + "loss": 0.3529, + "step": 18564 + }, + { + "epoch": 0.37164377048770114, + "grad_norm": 1.1160095930099487, + "learning_rate": 7.2389622736973244e-06, + "loss": 0.3138, + "step": 18565 + }, + { + "epoch": 0.3716637890048295, + "grad_norm": 1.8044449090957642, + "learning_rate": 7.238672404802071e-06, + "loss": 0.8165, + "step": 18566 + }, + { + "epoch": 0.3716838075219578, + "grad_norm": 1.9400986433029175, + "learning_rate": 7.2383825264959195e-06, + "loss": 0.8393, + "step": 18567 + }, + { + "epoch": 0.37170382603908614, + "grad_norm": 1.1861335039138794, + "learning_rate": 7.238092638780091e-06, + "loss": 0.3068, + "step": 18568 + }, + { + "epoch": 0.3717238445562145, + "grad_norm": 1.813212275505066, + "learning_rate": 7.237802741655802e-06, + "loss": 0.8226, + "step": 18569 + }, + { + "epoch": 0.37174386307334284, + "grad_norm": 1.160226821899414, + "learning_rate": 7.2375128351242706e-06, + "loss": 0.3214, + "step": 18570 + }, + { + "epoch": 0.3717638815904712, + "grad_norm": 1.2123801708221436, + "learning_rate": 7.237222919186717e-06, + "loss": 0.2931, + "step": 18571 + }, + { + "epoch": 0.37178390010759954, + "grad_norm": 1.103165626525879, + "learning_rate": 7.236932993844361e-06, + "loss": 0.3134, + "step": 18572 + }, + { + "epoch": 0.3718039186247279, + "grad_norm": 1.1309868097305298, + "learning_rate": 7.23664305909842e-06, + "loss": 0.3476, + "step": 18573 + }, + { + "epoch": 0.37182393714185624, + "grad_norm": 1.0627309083938599, + "learning_rate": 7.236353114950113e-06, + "loss": 0.3111, + "step": 18574 + }, + { + "epoch": 0.37184395565898454, + "grad_norm": 1.089626431465149, + "learning_rate": 7.236063161400658e-06, + "loss": 0.3133, + "step": 18575 + }, + { + "epoch": 0.3718639741761129, + "grad_norm": 1.1484851837158203, + "learning_rate": 7.235773198451274e-06, + "loss": 0.3375, + "step": 18576 + }, + { + "epoch": 0.37188399269324124, + "grad_norm": 1.0903968811035156, + "learning_rate": 7.235483226103183e-06, + "loss": 0.3176, + "step": 18577 + }, + { + "epoch": 0.3719040112103696, + "grad_norm": 1.07864248752594, + "learning_rate": 7.2351932443576e-06, + "loss": 0.2903, + "step": 18578 + }, + { + "epoch": 0.37192402972749794, + "grad_norm": 1.119491696357727, + "learning_rate": 7.234903253215746e-06, + "loss": 0.3281, + "step": 18579 + }, + { + "epoch": 0.3719440482446263, + "grad_norm": 1.2088818550109863, + "learning_rate": 7.23461325267884e-06, + "loss": 0.3007, + "step": 18580 + }, + { + "epoch": 0.37196406676175464, + "grad_norm": 1.1142103672027588, + "learning_rate": 7.2343232427481e-06, + "loss": 0.3314, + "step": 18581 + }, + { + "epoch": 0.371984085278883, + "grad_norm": 1.2561006546020508, + "learning_rate": 7.234033223424747e-06, + "loss": 0.3261, + "step": 18582 + }, + { + "epoch": 0.3720041037960113, + "grad_norm": 0.9498857259750366, + "learning_rate": 7.233743194709997e-06, + "loss": 0.2918, + "step": 18583 + }, + { + "epoch": 0.37202412231313964, + "grad_norm": 1.2035003900527954, + "learning_rate": 7.233453156605074e-06, + "loss": 0.3239, + "step": 18584 + }, + { + "epoch": 0.372044140830268, + "grad_norm": 1.0859946012496948, + "learning_rate": 7.233163109111192e-06, + "loss": 0.3076, + "step": 18585 + }, + { + "epoch": 0.37206415934739634, + "grad_norm": 1.2838152647018433, + "learning_rate": 7.2328730522295755e-06, + "loss": 0.3309, + "step": 18586 + }, + { + "epoch": 0.3720841778645247, + "grad_norm": 0.9566455483436584, + "learning_rate": 7.23258298596144e-06, + "loss": 0.2994, + "step": 18587 + }, + { + "epoch": 0.37210419638165304, + "grad_norm": 1.9091709852218628, + "learning_rate": 7.232292910308005e-06, + "loss": 0.8425, + "step": 18588 + }, + { + "epoch": 0.3721242148987814, + "grad_norm": 1.1881558895111084, + "learning_rate": 7.2320028252704914e-06, + "loss": 0.3815, + "step": 18589 + }, + { + "epoch": 0.37214423341590974, + "grad_norm": 2.0944392681121826, + "learning_rate": 7.231712730850119e-06, + "loss": 0.8107, + "step": 18590 + }, + { + "epoch": 0.37216425193303804, + "grad_norm": 1.2551521062850952, + "learning_rate": 7.2314226270481055e-06, + "loss": 0.3895, + "step": 18591 + }, + { + "epoch": 0.3721842704501664, + "grad_norm": 1.0475943088531494, + "learning_rate": 7.2311325138656725e-06, + "loss": 0.2997, + "step": 18592 + }, + { + "epoch": 0.37220428896729474, + "grad_norm": 1.0771121978759766, + "learning_rate": 7.23084239130404e-06, + "loss": 0.2953, + "step": 18593 + }, + { + "epoch": 0.3722243074844231, + "grad_norm": 1.2410846948623657, + "learning_rate": 7.230552259364424e-06, + "loss": 0.3191, + "step": 18594 + }, + { + "epoch": 0.37224432600155144, + "grad_norm": 1.727806806564331, + "learning_rate": 7.230262118048046e-06, + "loss": 0.7986, + "step": 18595 + }, + { + "epoch": 0.3722643445186798, + "grad_norm": 1.8220555782318115, + "learning_rate": 7.229971967356126e-06, + "loss": 0.8257, + "step": 18596 + }, + { + "epoch": 0.37228436303580814, + "grad_norm": 1.0461795330047607, + "learning_rate": 7.229681807289885e-06, + "loss": 0.2727, + "step": 18597 + }, + { + "epoch": 0.3723043815529365, + "grad_norm": 1.8930692672729492, + "learning_rate": 7.229391637850542e-06, + "loss": 0.9078, + "step": 18598 + }, + { + "epoch": 0.3723244000700648, + "grad_norm": 1.0852210521697998, + "learning_rate": 7.229101459039316e-06, + "loss": 0.3138, + "step": 18599 + }, + { + "epoch": 0.37234441858719314, + "grad_norm": 2.138796806335449, + "learning_rate": 7.228811270857427e-06, + "loss": 0.8156, + "step": 18600 + }, + { + "epoch": 0.3723644371043215, + "grad_norm": 1.1168650388717651, + "learning_rate": 7.228521073306094e-06, + "loss": 0.3309, + "step": 18601 + }, + { + "epoch": 0.37238445562144984, + "grad_norm": 1.181492567062378, + "learning_rate": 7.228230866386537e-06, + "loss": 0.3056, + "step": 18602 + }, + { + "epoch": 0.3724044741385782, + "grad_norm": 1.056601643562317, + "learning_rate": 7.22794065009998e-06, + "loss": 0.3496, + "step": 18603 + }, + { + "epoch": 0.37242449265570654, + "grad_norm": 1.0628772974014282, + "learning_rate": 7.227650424447638e-06, + "loss": 0.3423, + "step": 18604 + }, + { + "epoch": 0.3724445111728349, + "grad_norm": 1.041763424873352, + "learning_rate": 7.227360189430734e-06, + "loss": 0.3009, + "step": 18605 + }, + { + "epoch": 0.37246452968996324, + "grad_norm": 1.145495891571045, + "learning_rate": 7.227069945050488e-06, + "loss": 0.3085, + "step": 18606 + }, + { + "epoch": 0.37248454820709154, + "grad_norm": 1.0756111145019531, + "learning_rate": 7.226779691308117e-06, + "loss": 0.3225, + "step": 18607 + }, + { + "epoch": 0.3725045667242199, + "grad_norm": 2.03428053855896, + "learning_rate": 7.2264894282048434e-06, + "loss": 0.7736, + "step": 18608 + }, + { + "epoch": 0.37252458524134824, + "grad_norm": 1.1581398248672485, + "learning_rate": 7.226199155741889e-06, + "loss": 0.2883, + "step": 18609 + }, + { + "epoch": 0.3725446037584766, + "grad_norm": 1.7858877182006836, + "learning_rate": 7.225908873920472e-06, + "loss": 0.7854, + "step": 18610 + }, + { + "epoch": 0.37256462227560494, + "grad_norm": 0.995535671710968, + "learning_rate": 7.225618582741813e-06, + "loss": 0.2737, + "step": 18611 + }, + { + "epoch": 0.3725846407927333, + "grad_norm": 1.1399004459381104, + "learning_rate": 7.2253282822071326e-06, + "loss": 0.2896, + "step": 18612 + }, + { + "epoch": 0.37260465930986164, + "grad_norm": 1.3135634660720825, + "learning_rate": 7.2250379723176504e-06, + "loss": 0.2716, + "step": 18613 + }, + { + "epoch": 0.37262467782699, + "grad_norm": 1.1568483114242554, + "learning_rate": 7.224747653074587e-06, + "loss": 0.3356, + "step": 18614 + }, + { + "epoch": 0.3726446963441183, + "grad_norm": 1.2066624164581299, + "learning_rate": 7.224457324479164e-06, + "loss": 0.3044, + "step": 18615 + }, + { + "epoch": 0.37266471486124664, + "grad_norm": 1.0285671949386597, + "learning_rate": 7.224166986532601e-06, + "loss": 0.3418, + "step": 18616 + }, + { + "epoch": 0.372684733378375, + "grad_norm": 1.1017096042633057, + "learning_rate": 7.223876639236119e-06, + "loss": 0.2997, + "step": 18617 + }, + { + "epoch": 0.37270475189550334, + "grad_norm": 1.1889582872390747, + "learning_rate": 7.223586282590937e-06, + "loss": 0.3216, + "step": 18618 + }, + { + "epoch": 0.3727247704126317, + "grad_norm": 1.0926322937011719, + "learning_rate": 7.223295916598279e-06, + "loss": 0.3351, + "step": 18619 + }, + { + "epoch": 0.37274478892976004, + "grad_norm": 1.107704997062683, + "learning_rate": 7.223005541259361e-06, + "loss": 0.3391, + "step": 18620 + }, + { + "epoch": 0.3727648074468884, + "grad_norm": 1.0976922512054443, + "learning_rate": 7.222715156575407e-06, + "loss": 0.3188, + "step": 18621 + }, + { + "epoch": 0.37278482596401674, + "grad_norm": 2.1087708473205566, + "learning_rate": 7.222424762547638e-06, + "loss": 0.8007, + "step": 18622 + }, + { + "epoch": 0.37280484448114504, + "grad_norm": 1.1566814184188843, + "learning_rate": 7.222134359177272e-06, + "loss": 0.2613, + "step": 18623 + }, + { + "epoch": 0.3728248629982734, + "grad_norm": 1.1739177703857422, + "learning_rate": 7.221843946465532e-06, + "loss": 0.3824, + "step": 18624 + }, + { + "epoch": 0.37284488151540174, + "grad_norm": 1.0532149076461792, + "learning_rate": 7.221553524413638e-06, + "loss": 0.3154, + "step": 18625 + }, + { + "epoch": 0.3728649000325301, + "grad_norm": 1.8164434432983398, + "learning_rate": 7.221263093022812e-06, + "loss": 0.8512, + "step": 18626 + }, + { + "epoch": 0.37288491854965844, + "grad_norm": 1.1542261838912964, + "learning_rate": 7.220972652294271e-06, + "loss": 0.2867, + "step": 18627 + }, + { + "epoch": 0.3729049370667868, + "grad_norm": 1.8226426839828491, + "learning_rate": 7.220682202229243e-06, + "loss": 0.8338, + "step": 18628 + }, + { + "epoch": 0.37292495558391514, + "grad_norm": 1.1200555562973022, + "learning_rate": 7.220391742828943e-06, + "loss": 0.3511, + "step": 18629 + }, + { + "epoch": 0.3729449741010435, + "grad_norm": 1.0012012720108032, + "learning_rate": 7.220101274094594e-06, + "loss": 0.2837, + "step": 18630 + }, + { + "epoch": 0.3729649926181718, + "grad_norm": 1.2073866128921509, + "learning_rate": 7.2198107960274164e-06, + "loss": 0.3416, + "step": 18631 + }, + { + "epoch": 0.37298501113530014, + "grad_norm": 1.1865633726119995, + "learning_rate": 7.219520308628634e-06, + "loss": 0.3181, + "step": 18632 + }, + { + "epoch": 0.3730050296524285, + "grad_norm": 1.0177980661392212, + "learning_rate": 7.219229811899463e-06, + "loss": 0.3232, + "step": 18633 + }, + { + "epoch": 0.37302504816955684, + "grad_norm": 1.0636504888534546, + "learning_rate": 7.2189393058411285e-06, + "loss": 0.2815, + "step": 18634 + }, + { + "epoch": 0.3730450666866852, + "grad_norm": 1.9133213758468628, + "learning_rate": 7.218648790454852e-06, + "loss": 0.8088, + "step": 18635 + }, + { + "epoch": 0.37306508520381354, + "grad_norm": 1.092128872871399, + "learning_rate": 7.218358265741852e-06, + "loss": 0.3088, + "step": 18636 + }, + { + "epoch": 0.3730851037209419, + "grad_norm": 1.1211185455322266, + "learning_rate": 7.218067731703352e-06, + "loss": 0.3148, + "step": 18637 + }, + { + "epoch": 0.37310512223807024, + "grad_norm": 1.1327555179595947, + "learning_rate": 7.217777188340571e-06, + "loss": 0.309, + "step": 18638 + }, + { + "epoch": 0.37312514075519854, + "grad_norm": 1.2240087985992432, + "learning_rate": 7.217486635654734e-06, + "loss": 0.3046, + "step": 18639 + }, + { + "epoch": 0.3731451592723269, + "grad_norm": 1.2072932720184326, + "learning_rate": 7.217196073647059e-06, + "loss": 0.3691, + "step": 18640 + }, + { + "epoch": 0.37316517778945524, + "grad_norm": 1.3446977138519287, + "learning_rate": 7.216905502318769e-06, + "loss": 0.3693, + "step": 18641 + }, + { + "epoch": 0.3731851963065836, + "grad_norm": 1.126438021659851, + "learning_rate": 7.216614921671087e-06, + "loss": 0.3132, + "step": 18642 + }, + { + "epoch": 0.37320521482371194, + "grad_norm": 1.048762321472168, + "learning_rate": 7.216324331705231e-06, + "loss": 0.2977, + "step": 18643 + }, + { + "epoch": 0.3732252333408403, + "grad_norm": 1.2184430360794067, + "learning_rate": 7.216033732422425e-06, + "loss": 0.3002, + "step": 18644 + }, + { + "epoch": 0.37324525185796864, + "grad_norm": 1.18128502368927, + "learning_rate": 7.215743123823892e-06, + "loss": 0.3528, + "step": 18645 + }, + { + "epoch": 0.373265270375097, + "grad_norm": 1.1031795740127563, + "learning_rate": 7.215452505910849e-06, + "loss": 0.3385, + "step": 18646 + }, + { + "epoch": 0.3732852888922253, + "grad_norm": 1.1937024593353271, + "learning_rate": 7.215161878684521e-06, + "loss": 0.3468, + "step": 18647 + }, + { + "epoch": 0.37330530740935364, + "grad_norm": 1.1282607316970825, + "learning_rate": 7.214871242146131e-06, + "loss": 0.3363, + "step": 18648 + }, + { + "epoch": 0.373325325926482, + "grad_norm": 1.048556923866272, + "learning_rate": 7.214580596296896e-06, + "loss": 0.3441, + "step": 18649 + }, + { + "epoch": 0.37334534444361034, + "grad_norm": 1.2129595279693604, + "learning_rate": 7.214289941138044e-06, + "loss": 0.3545, + "step": 18650 + }, + { + "epoch": 0.3733653629607387, + "grad_norm": 1.6954724788665771, + "learning_rate": 7.213999276670791e-06, + "loss": 0.8195, + "step": 18651 + }, + { + "epoch": 0.37338538147786704, + "grad_norm": 1.0742603540420532, + "learning_rate": 7.213708602896363e-06, + "loss": 0.3245, + "step": 18652 + }, + { + "epoch": 0.3734053999949954, + "grad_norm": 2.0036025047302246, + "learning_rate": 7.21341791981598e-06, + "loss": 0.8052, + "step": 18653 + }, + { + "epoch": 0.37342541851212374, + "grad_norm": 1.1176694631576538, + "learning_rate": 7.213127227430863e-06, + "loss": 0.303, + "step": 18654 + }, + { + "epoch": 0.37344543702925204, + "grad_norm": 1.131803274154663, + "learning_rate": 7.212836525742237e-06, + "loss": 0.3715, + "step": 18655 + }, + { + "epoch": 0.3734654555463804, + "grad_norm": 1.175322413444519, + "learning_rate": 7.212545814751323e-06, + "loss": 0.3266, + "step": 18656 + }, + { + "epoch": 0.37348547406350874, + "grad_norm": 1.1193441152572632, + "learning_rate": 7.212255094459342e-06, + "loss": 0.363, + "step": 18657 + }, + { + "epoch": 0.3735054925806371, + "grad_norm": 1.186413288116455, + "learning_rate": 7.2119643648675155e-06, + "loss": 0.2996, + "step": 18658 + }, + { + "epoch": 0.37352551109776544, + "grad_norm": 1.115068793296814, + "learning_rate": 7.211673625977067e-06, + "loss": 0.3164, + "step": 18659 + }, + { + "epoch": 0.3735455296148938, + "grad_norm": 1.1816961765289307, + "learning_rate": 7.211382877789219e-06, + "loss": 0.3304, + "step": 18660 + }, + { + "epoch": 0.37356554813202214, + "grad_norm": 1.0630450248718262, + "learning_rate": 7.211092120305194e-06, + "loss": 0.3047, + "step": 18661 + }, + { + "epoch": 0.3735855666491505, + "grad_norm": 1.0690817832946777, + "learning_rate": 7.210801353526213e-06, + "loss": 0.3247, + "step": 18662 + }, + { + "epoch": 0.3736055851662788, + "grad_norm": 1.044852614402771, + "learning_rate": 7.2105105774535e-06, + "loss": 0.2768, + "step": 18663 + }, + { + "epoch": 0.37362560368340714, + "grad_norm": 1.1651084423065186, + "learning_rate": 7.210219792088274e-06, + "loss": 0.3567, + "step": 18664 + }, + { + "epoch": 0.3736456222005355, + "grad_norm": 1.2316569089889526, + "learning_rate": 7.209928997431761e-06, + "loss": 0.3167, + "step": 18665 + }, + { + "epoch": 0.37366564071766384, + "grad_norm": 1.1183360815048218, + "learning_rate": 7.2096381934851825e-06, + "loss": 0.3094, + "step": 18666 + }, + { + "epoch": 0.3736856592347922, + "grad_norm": 1.0719258785247803, + "learning_rate": 7.20934738024976e-06, + "loss": 0.3215, + "step": 18667 + }, + { + "epoch": 0.37370567775192054, + "grad_norm": 1.1681625843048096, + "learning_rate": 7.209056557726716e-06, + "loss": 0.3022, + "step": 18668 + }, + { + "epoch": 0.3737256962690489, + "grad_norm": 1.2025253772735596, + "learning_rate": 7.2087657259172736e-06, + "loss": 0.3086, + "step": 18669 + }, + { + "epoch": 0.37374571478617724, + "grad_norm": 1.1489828824996948, + "learning_rate": 7.2084748848226575e-06, + "loss": 0.3479, + "step": 18670 + }, + { + "epoch": 0.37376573330330554, + "grad_norm": 1.1210684776306152, + "learning_rate": 7.208184034444086e-06, + "loss": 0.3655, + "step": 18671 + }, + { + "epoch": 0.3737857518204339, + "grad_norm": 1.1427572965621948, + "learning_rate": 7.207893174782784e-06, + "loss": 0.3203, + "step": 18672 + }, + { + "epoch": 0.37380577033756224, + "grad_norm": 1.0888675451278687, + "learning_rate": 7.207602305839975e-06, + "loss": 0.2922, + "step": 18673 + }, + { + "epoch": 0.3738257888546906, + "grad_norm": 1.14631187915802, + "learning_rate": 7.207311427616881e-06, + "loss": 0.3159, + "step": 18674 + }, + { + "epoch": 0.37384580737181894, + "grad_norm": 1.0920330286026, + "learning_rate": 7.207020540114724e-06, + "loss": 0.3194, + "step": 18675 + }, + { + "epoch": 0.3738658258889473, + "grad_norm": 1.0679854154586792, + "learning_rate": 7.206729643334729e-06, + "loss": 0.3607, + "step": 18676 + }, + { + "epoch": 0.37388584440607564, + "grad_norm": 1.0119553804397583, + "learning_rate": 7.206438737278118e-06, + "loss": 0.2912, + "step": 18677 + }, + { + "epoch": 0.373905862923204, + "grad_norm": 1.1748517751693726, + "learning_rate": 7.206147821946111e-06, + "loss": 0.3542, + "step": 18678 + }, + { + "epoch": 0.3739258814403323, + "grad_norm": 1.0355238914489746, + "learning_rate": 7.205856897339934e-06, + "loss": 0.2702, + "step": 18679 + }, + { + "epoch": 0.37394589995746064, + "grad_norm": 1.1372106075286865, + "learning_rate": 7.20556596346081e-06, + "loss": 0.3312, + "step": 18680 + }, + { + "epoch": 0.373965918474589, + "grad_norm": 1.9680685997009277, + "learning_rate": 7.205275020309962e-06, + "loss": 0.7944, + "step": 18681 + }, + { + "epoch": 0.37398593699171734, + "grad_norm": 1.132920742034912, + "learning_rate": 7.204984067888611e-06, + "loss": 0.3333, + "step": 18682 + }, + { + "epoch": 0.3740059555088457, + "grad_norm": 1.0065199136734009, + "learning_rate": 7.204693106197982e-06, + "loss": 0.2773, + "step": 18683 + }, + { + "epoch": 0.37402597402597404, + "grad_norm": 1.3265106678009033, + "learning_rate": 7.204402135239298e-06, + "loss": 0.3151, + "step": 18684 + }, + { + "epoch": 0.3740459925431024, + "grad_norm": 1.1599135398864746, + "learning_rate": 7.204111155013781e-06, + "loss": 0.3185, + "step": 18685 + }, + { + "epoch": 0.37406601106023074, + "grad_norm": 1.0145362615585327, + "learning_rate": 7.203820165522655e-06, + "loss": 0.3076, + "step": 18686 + }, + { + "epoch": 0.37408602957735904, + "grad_norm": 1.1078581809997559, + "learning_rate": 7.203529166767145e-06, + "loss": 0.3, + "step": 18687 + }, + { + "epoch": 0.3741060480944874, + "grad_norm": 1.0994811058044434, + "learning_rate": 7.203238158748471e-06, + "loss": 0.3098, + "step": 18688 + }, + { + "epoch": 0.37412606661161574, + "grad_norm": 1.1804009675979614, + "learning_rate": 7.202947141467858e-06, + "loss": 0.3497, + "step": 18689 + }, + { + "epoch": 0.3741460851287441, + "grad_norm": 1.1003669500350952, + "learning_rate": 7.202656114926531e-06, + "loss": 0.3494, + "step": 18690 + }, + { + "epoch": 0.37416610364587244, + "grad_norm": 1.1167150735855103, + "learning_rate": 7.20236507912571e-06, + "loss": 0.3236, + "step": 18691 + }, + { + "epoch": 0.3741861221630008, + "grad_norm": 1.1936522722244263, + "learning_rate": 7.20207403406662e-06, + "loss": 0.3219, + "step": 18692 + }, + { + "epoch": 0.37420614068012914, + "grad_norm": 2.1997530460357666, + "learning_rate": 7.201782979750484e-06, + "loss": 0.8251, + "step": 18693 + }, + { + "epoch": 0.3742261591972575, + "grad_norm": 1.0441333055496216, + "learning_rate": 7.201491916178528e-06, + "loss": 0.3195, + "step": 18694 + }, + { + "epoch": 0.3742461777143858, + "grad_norm": 1.0772980451583862, + "learning_rate": 7.201200843351973e-06, + "loss": 0.3872, + "step": 18695 + }, + { + "epoch": 0.37426619623151414, + "grad_norm": 1.8888555765151978, + "learning_rate": 7.200909761272043e-06, + "loss": 0.8419, + "step": 18696 + }, + { + "epoch": 0.3742862147486425, + "grad_norm": 1.054126262664795, + "learning_rate": 7.200618669939963e-06, + "loss": 0.3141, + "step": 18697 + }, + { + "epoch": 0.37430623326577084, + "grad_norm": 1.0965238809585571, + "learning_rate": 7.200327569356955e-06, + "loss": 0.3183, + "step": 18698 + }, + { + "epoch": 0.3743262517828992, + "grad_norm": 1.1347625255584717, + "learning_rate": 7.200036459524243e-06, + "loss": 0.3274, + "step": 18699 + }, + { + "epoch": 0.37434627030002754, + "grad_norm": 1.070167064666748, + "learning_rate": 7.199745340443052e-06, + "loss": 0.3339, + "step": 18700 + }, + { + "epoch": 0.3743662888171559, + "grad_norm": 1.0896319150924683, + "learning_rate": 7.199454212114605e-06, + "loss": 0.366, + "step": 18701 + }, + { + "epoch": 0.3743863073342842, + "grad_norm": 1.036077857017517, + "learning_rate": 7.199163074540125e-06, + "loss": 0.3181, + "step": 18702 + }, + { + "epoch": 0.37440632585141254, + "grad_norm": 1.0603450536727905, + "learning_rate": 7.198871927720839e-06, + "loss": 0.3467, + "step": 18703 + }, + { + "epoch": 0.3744263443685409, + "grad_norm": 1.8404076099395752, + "learning_rate": 7.198580771657966e-06, + "loss": 0.7703, + "step": 18704 + }, + { + "epoch": 0.37444636288566924, + "grad_norm": 1.2039209604263306, + "learning_rate": 7.198289606352733e-06, + "loss": 0.3469, + "step": 18705 + }, + { + "epoch": 0.3744663814027976, + "grad_norm": 1.0865600109100342, + "learning_rate": 7.197998431806363e-06, + "loss": 0.317, + "step": 18706 + }, + { + "epoch": 0.37448639991992594, + "grad_norm": 1.1593109369277954, + "learning_rate": 7.197707248020083e-06, + "loss": 0.3877, + "step": 18707 + }, + { + "epoch": 0.3745064184370543, + "grad_norm": 1.1845041513442993, + "learning_rate": 7.197416054995114e-06, + "loss": 0.2915, + "step": 18708 + }, + { + "epoch": 0.37452643695418264, + "grad_norm": 1.0570359230041504, + "learning_rate": 7.1971248527326785e-06, + "loss": 0.3533, + "step": 18709 + }, + { + "epoch": 0.37454645547131094, + "grad_norm": 1.83025324344635, + "learning_rate": 7.1968336412340045e-06, + "loss": 0.8367, + "step": 18710 + }, + { + "epoch": 0.3745664739884393, + "grad_norm": 1.0863587856292725, + "learning_rate": 7.196542420500314e-06, + "loss": 0.3361, + "step": 18711 + }, + { + "epoch": 0.37458649250556764, + "grad_norm": 1.0871104001998901, + "learning_rate": 7.196251190532834e-06, + "loss": 0.3059, + "step": 18712 + }, + { + "epoch": 0.374606511022696, + "grad_norm": 1.0888820886611938, + "learning_rate": 7.195959951332785e-06, + "loss": 0.3862, + "step": 18713 + }, + { + "epoch": 0.37462652953982434, + "grad_norm": 1.045621633529663, + "learning_rate": 7.195668702901393e-06, + "loss": 0.2984, + "step": 18714 + }, + { + "epoch": 0.3746465480569527, + "grad_norm": 1.111183762550354, + "learning_rate": 7.195377445239881e-06, + "loss": 0.3254, + "step": 18715 + }, + { + "epoch": 0.37466656657408104, + "grad_norm": 1.1839897632598877, + "learning_rate": 7.195086178349478e-06, + "loss": 0.365, + "step": 18716 + }, + { + "epoch": 0.3746865850912094, + "grad_norm": 1.0774762630462646, + "learning_rate": 7.194794902231402e-06, + "loss": 0.3323, + "step": 18717 + }, + { + "epoch": 0.3747066036083377, + "grad_norm": 1.9246081113815308, + "learning_rate": 7.194503616886882e-06, + "loss": 0.7832, + "step": 18718 + }, + { + "epoch": 0.37472662212546604, + "grad_norm": 1.0889253616333008, + "learning_rate": 7.194212322317141e-06, + "loss": 0.3702, + "step": 18719 + }, + { + "epoch": 0.3747466406425944, + "grad_norm": 1.1107149124145508, + "learning_rate": 7.193921018523403e-06, + "loss": 0.3286, + "step": 18720 + }, + { + "epoch": 0.37476665915972274, + "grad_norm": 1.0656678676605225, + "learning_rate": 7.193629705506894e-06, + "loss": 0.2481, + "step": 18721 + }, + { + "epoch": 0.3747866776768511, + "grad_norm": 1.139976143836975, + "learning_rate": 7.1933383832688375e-06, + "loss": 0.2849, + "step": 18722 + }, + { + "epoch": 0.37480669619397944, + "grad_norm": 1.0365506410598755, + "learning_rate": 7.193047051810458e-06, + "loss": 0.321, + "step": 18723 + }, + { + "epoch": 0.3748267147111078, + "grad_norm": 1.195976972579956, + "learning_rate": 7.192755711132979e-06, + "loss": 0.3551, + "step": 18724 + }, + { + "epoch": 0.37484673322823614, + "grad_norm": 1.045087456703186, + "learning_rate": 7.19246436123763e-06, + "loss": 0.3411, + "step": 18725 + }, + { + "epoch": 0.37486675174536443, + "grad_norm": 1.0821653604507446, + "learning_rate": 7.192173002125631e-06, + "loss": 0.3098, + "step": 18726 + }, + { + "epoch": 0.3748867702624928, + "grad_norm": 1.1099908351898193, + "learning_rate": 7.19188163379821e-06, + "loss": 0.4084, + "step": 18727 + }, + { + "epoch": 0.37490678877962114, + "grad_norm": 1.1394946575164795, + "learning_rate": 7.191590256256588e-06, + "loss": 0.307, + "step": 18728 + }, + { + "epoch": 0.3749268072967495, + "grad_norm": 1.1265212297439575, + "learning_rate": 7.191298869501995e-06, + "loss": 0.3242, + "step": 18729 + }, + { + "epoch": 0.37494682581387784, + "grad_norm": 1.2654205560684204, + "learning_rate": 7.191007473535651e-06, + "loss": 0.3177, + "step": 18730 + }, + { + "epoch": 0.3749668443310062, + "grad_norm": 1.2007535696029663, + "learning_rate": 7.190716068358783e-06, + "loss": 0.2807, + "step": 18731 + }, + { + "epoch": 0.37498686284813454, + "grad_norm": 1.1312662363052368, + "learning_rate": 7.190424653972618e-06, + "loss": 0.394, + "step": 18732 + }, + { + "epoch": 0.3750068813652629, + "grad_norm": 1.2146447896957397, + "learning_rate": 7.190133230378377e-06, + "loss": 0.3117, + "step": 18733 + }, + { + "epoch": 0.3750268998823912, + "grad_norm": 1.1952825784683228, + "learning_rate": 7.189841797577289e-06, + "loss": 0.3118, + "step": 18734 + }, + { + "epoch": 0.37504691839951954, + "grad_norm": 1.281340479850769, + "learning_rate": 7.189550355570576e-06, + "loss": 0.3034, + "step": 18735 + }, + { + "epoch": 0.3750669369166479, + "grad_norm": 1.0994164943695068, + "learning_rate": 7.189258904359464e-06, + "loss": 0.3012, + "step": 18736 + }, + { + "epoch": 0.37508695543377624, + "grad_norm": 1.9093542098999023, + "learning_rate": 7.1889674439451785e-06, + "loss": 0.8473, + "step": 18737 + }, + { + "epoch": 0.3751069739509046, + "grad_norm": 1.2851886749267578, + "learning_rate": 7.188675974328947e-06, + "loss": 0.3397, + "step": 18738 + }, + { + "epoch": 0.37512699246803294, + "grad_norm": 1.1029691696166992, + "learning_rate": 7.188384495511993e-06, + "loss": 0.325, + "step": 18739 + }, + { + "epoch": 0.3751470109851613, + "grad_norm": 1.1032568216323853, + "learning_rate": 7.18809300749554e-06, + "loss": 0.3129, + "step": 18740 + }, + { + "epoch": 0.37516702950228964, + "grad_norm": 1.0189039707183838, + "learning_rate": 7.187801510280815e-06, + "loss": 0.3251, + "step": 18741 + }, + { + "epoch": 0.37518704801941793, + "grad_norm": 1.1715657711029053, + "learning_rate": 7.1875100038690424e-06, + "loss": 0.3564, + "step": 18742 + }, + { + "epoch": 0.3752070665365463, + "grad_norm": 1.03425931930542, + "learning_rate": 7.187218488261451e-06, + "loss": 0.3213, + "step": 18743 + }, + { + "epoch": 0.37522708505367464, + "grad_norm": 1.9559053182601929, + "learning_rate": 7.186926963459261e-06, + "loss": 0.7853, + "step": 18744 + }, + { + "epoch": 0.375247103570803, + "grad_norm": 1.0480695962905884, + "learning_rate": 7.186635429463702e-06, + "loss": 0.3384, + "step": 18745 + }, + { + "epoch": 0.37526712208793134, + "grad_norm": 0.9633887410163879, + "learning_rate": 7.186343886275998e-06, + "loss": 0.3323, + "step": 18746 + }, + { + "epoch": 0.3752871406050597, + "grad_norm": 1.2463326454162598, + "learning_rate": 7.186052333897375e-06, + "loss": 0.321, + "step": 18747 + }, + { + "epoch": 0.37530715912218804, + "grad_norm": 1.053830623626709, + "learning_rate": 7.185760772329059e-06, + "loss": 0.3107, + "step": 18748 + }, + { + "epoch": 0.3753271776393164, + "grad_norm": 1.7807745933532715, + "learning_rate": 7.185469201572274e-06, + "loss": 0.8238, + "step": 18749 + }, + { + "epoch": 0.3753471961564447, + "grad_norm": 1.1100581884384155, + "learning_rate": 7.185177621628248e-06, + "loss": 0.2977, + "step": 18750 + }, + { + "epoch": 0.37536721467357304, + "grad_norm": 1.9106063842773438, + "learning_rate": 7.184886032498205e-06, + "loss": 0.8534, + "step": 18751 + }, + { + "epoch": 0.3753872331907014, + "grad_norm": 1.1742531061172485, + "learning_rate": 7.184594434183372e-06, + "loss": 0.3552, + "step": 18752 + }, + { + "epoch": 0.37540725170782974, + "grad_norm": 1.372817873954773, + "learning_rate": 7.1843028266849725e-06, + "loss": 0.3515, + "step": 18753 + }, + { + "epoch": 0.3754272702249581, + "grad_norm": 1.1675704717636108, + "learning_rate": 7.184011210004236e-06, + "loss": 0.3061, + "step": 18754 + }, + { + "epoch": 0.37544728874208644, + "grad_norm": 1.1307313442230225, + "learning_rate": 7.183719584142383e-06, + "loss": 0.2927, + "step": 18755 + }, + { + "epoch": 0.3754673072592148, + "grad_norm": 1.11738121509552, + "learning_rate": 7.1834279491006455e-06, + "loss": 0.3292, + "step": 18756 + }, + { + "epoch": 0.37548732577634314, + "grad_norm": 1.053907871246338, + "learning_rate": 7.183136304880246e-06, + "loss": 0.3217, + "step": 18757 + }, + { + "epoch": 0.37550734429347143, + "grad_norm": 1.164156436920166, + "learning_rate": 7.182844651482413e-06, + "loss": 0.3177, + "step": 18758 + }, + { + "epoch": 0.3755273628105998, + "grad_norm": 1.149125099182129, + "learning_rate": 7.182552988908368e-06, + "loss": 0.3529, + "step": 18759 + }, + { + "epoch": 0.37554738132772814, + "grad_norm": 1.061036467552185, + "learning_rate": 7.182261317159342e-06, + "loss": 0.2763, + "step": 18760 + }, + { + "epoch": 0.3755673998448565, + "grad_norm": 1.1411628723144531, + "learning_rate": 7.181969636236559e-06, + "loss": 0.3439, + "step": 18761 + }, + { + "epoch": 0.37558741836198484, + "grad_norm": 1.1892820596694946, + "learning_rate": 7.181677946141243e-06, + "loss": 0.3139, + "step": 18762 + }, + { + "epoch": 0.3756074368791132, + "grad_norm": 2.0173394680023193, + "learning_rate": 7.181386246874624e-06, + "loss": 0.8132, + "step": 18763 + }, + { + "epoch": 0.37562745539624154, + "grad_norm": 1.1292511224746704, + "learning_rate": 7.181094538437927e-06, + "loss": 0.2757, + "step": 18764 + }, + { + "epoch": 0.3756474739133699, + "grad_norm": 1.10299551486969, + "learning_rate": 7.180802820832378e-06, + "loss": 0.3626, + "step": 18765 + }, + { + "epoch": 0.3756674924304982, + "grad_norm": 1.120055913925171, + "learning_rate": 7.1805110940592024e-06, + "loss": 0.312, + "step": 18766 + }, + { + "epoch": 0.37568751094762654, + "grad_norm": 1.1859527826309204, + "learning_rate": 7.180219358119629e-06, + "loss": 0.3222, + "step": 18767 + }, + { + "epoch": 0.3757075294647549, + "grad_norm": 1.1402571201324463, + "learning_rate": 7.179927613014881e-06, + "loss": 0.3475, + "step": 18768 + }, + { + "epoch": 0.37572754798188324, + "grad_norm": 1.1135361194610596, + "learning_rate": 7.179635858746187e-06, + "loss": 0.2871, + "step": 18769 + }, + { + "epoch": 0.3757475664990116, + "grad_norm": 1.0472934246063232, + "learning_rate": 7.179344095314773e-06, + "loss": 0.3246, + "step": 18770 + }, + { + "epoch": 0.37576758501613994, + "grad_norm": 1.0522106885910034, + "learning_rate": 7.179052322721864e-06, + "loss": 0.289, + "step": 18771 + }, + { + "epoch": 0.3757876035332683, + "grad_norm": 1.022824764251709, + "learning_rate": 7.17876054096869e-06, + "loss": 0.2923, + "step": 18772 + }, + { + "epoch": 0.37580762205039664, + "grad_norm": 1.3909237384796143, + "learning_rate": 7.178468750056475e-06, + "loss": 0.3237, + "step": 18773 + }, + { + "epoch": 0.37582764056752493, + "grad_norm": 1.0814003944396973, + "learning_rate": 7.178176949986445e-06, + "loss": 0.297, + "step": 18774 + }, + { + "epoch": 0.3758476590846533, + "grad_norm": 1.066959023475647, + "learning_rate": 7.177885140759829e-06, + "loss": 0.3364, + "step": 18775 + }, + { + "epoch": 0.37586767760178164, + "grad_norm": 1.13190495967865, + "learning_rate": 7.177593322377853e-06, + "loss": 0.3413, + "step": 18776 + }, + { + "epoch": 0.37588769611891, + "grad_norm": 1.0212323665618896, + "learning_rate": 7.177301494841742e-06, + "loss": 0.2929, + "step": 18777 + }, + { + "epoch": 0.37590771463603834, + "grad_norm": 1.2098424434661865, + "learning_rate": 7.177009658152725e-06, + "loss": 0.3157, + "step": 18778 + }, + { + "epoch": 0.3759277331531667, + "grad_norm": 1.1326638460159302, + "learning_rate": 7.1767178123120275e-06, + "loss": 0.3162, + "step": 18779 + }, + { + "epoch": 0.37594775167029504, + "grad_norm": 1.1055766344070435, + "learning_rate": 7.176425957320879e-06, + "loss": 0.2977, + "step": 18780 + }, + { + "epoch": 0.3759677701874234, + "grad_norm": 1.0533170700073242, + "learning_rate": 7.1761340931805e-06, + "loss": 0.2908, + "step": 18781 + }, + { + "epoch": 0.3759877887045517, + "grad_norm": 2.204113006591797, + "learning_rate": 7.175842219892124e-06, + "loss": 0.8647, + "step": 18782 + }, + { + "epoch": 0.37600780722168003, + "grad_norm": 1.0818867683410645, + "learning_rate": 7.175550337456976e-06, + "loss": 0.3038, + "step": 18783 + }, + { + "epoch": 0.3760278257388084, + "grad_norm": 1.0174821615219116, + "learning_rate": 7.175258445876281e-06, + "loss": 0.3244, + "step": 18784 + }, + { + "epoch": 0.37604784425593674, + "grad_norm": 1.208085536956787, + "learning_rate": 7.174966545151268e-06, + "loss": 0.321, + "step": 18785 + }, + { + "epoch": 0.3760678627730651, + "grad_norm": 1.8457738161087036, + "learning_rate": 7.1746746352831645e-06, + "loss": 0.859, + "step": 18786 + }, + { + "epoch": 0.37608788129019344, + "grad_norm": 1.0524927377700806, + "learning_rate": 7.174382716273196e-06, + "loss": 0.2329, + "step": 18787 + }, + { + "epoch": 0.3761078998073218, + "grad_norm": 1.1755644083023071, + "learning_rate": 7.174090788122591e-06, + "loss": 0.3283, + "step": 18788 + }, + { + "epoch": 0.37612791832445014, + "grad_norm": 1.0579952001571655, + "learning_rate": 7.173798850832576e-06, + "loss": 0.2975, + "step": 18789 + }, + { + "epoch": 0.37614793684157843, + "grad_norm": 1.8981117010116577, + "learning_rate": 7.173506904404378e-06, + "loss": 0.7695, + "step": 18790 + }, + { + "epoch": 0.3761679553587068, + "grad_norm": 1.0066533088684082, + "learning_rate": 7.1732149488392265e-06, + "loss": 0.2932, + "step": 18791 + }, + { + "epoch": 0.37618797387583514, + "grad_norm": 1.1488189697265625, + "learning_rate": 7.172922984138347e-06, + "loss": 0.3268, + "step": 18792 + }, + { + "epoch": 0.3762079923929635, + "grad_norm": 1.0095676183700562, + "learning_rate": 7.1726310103029636e-06, + "loss": 0.2761, + "step": 18793 + }, + { + "epoch": 0.37622801091009184, + "grad_norm": 1.6381558179855347, + "learning_rate": 7.17233902733431e-06, + "loss": 0.7674, + "step": 18794 + }, + { + "epoch": 0.3762480294272202, + "grad_norm": 2.2006211280822754, + "learning_rate": 7.172047035233608e-06, + "loss": 0.783, + "step": 18795 + }, + { + "epoch": 0.37626804794434854, + "grad_norm": 1.2285329103469849, + "learning_rate": 7.17175503400209e-06, + "loss": 0.328, + "step": 18796 + }, + { + "epoch": 0.3762880664614769, + "grad_norm": 1.1400699615478516, + "learning_rate": 7.171463023640981e-06, + "loss": 0.3017, + "step": 18797 + }, + { + "epoch": 0.3763080849786052, + "grad_norm": 1.1773751974105835, + "learning_rate": 7.171171004151508e-06, + "loss": 0.278, + "step": 18798 + }, + { + "epoch": 0.37632810349573353, + "grad_norm": 1.2008721828460693, + "learning_rate": 7.1708789755348995e-06, + "loss": 0.3015, + "step": 18799 + }, + { + "epoch": 0.3763481220128619, + "grad_norm": 1.0423362255096436, + "learning_rate": 7.170586937792383e-06, + "loss": 0.3522, + "step": 18800 + }, + { + "epoch": 0.37636814052999024, + "grad_norm": 1.2954045534133911, + "learning_rate": 7.170294890925186e-06, + "loss": 0.3253, + "step": 18801 + }, + { + "epoch": 0.3763881590471186, + "grad_norm": 1.091368556022644, + "learning_rate": 7.170002834934537e-06, + "loss": 0.3235, + "step": 18802 + }, + { + "epoch": 0.37640817756424694, + "grad_norm": 1.0914909839630127, + "learning_rate": 7.169710769821663e-06, + "loss": 0.323, + "step": 18803 + }, + { + "epoch": 0.3764281960813753, + "grad_norm": 1.8346222639083862, + "learning_rate": 7.169418695587791e-06, + "loss": 0.7944, + "step": 18804 + }, + { + "epoch": 0.37644821459850364, + "grad_norm": 1.0145751237869263, + "learning_rate": 7.169126612234152e-06, + "loss": 0.304, + "step": 18805 + }, + { + "epoch": 0.37646823311563193, + "grad_norm": 1.1745861768722534, + "learning_rate": 7.168834519761969e-06, + "loss": 0.3412, + "step": 18806 + }, + { + "epoch": 0.3764882516327603, + "grad_norm": 1.1480822563171387, + "learning_rate": 7.168542418172472e-06, + "loss": 0.2779, + "step": 18807 + }, + { + "epoch": 0.37650827014988864, + "grad_norm": 1.1340829133987427, + "learning_rate": 7.16825030746689e-06, + "loss": 0.2943, + "step": 18808 + }, + { + "epoch": 0.376528288667017, + "grad_norm": 1.1216884851455688, + "learning_rate": 7.167958187646451e-06, + "loss": 0.3598, + "step": 18809 + }, + { + "epoch": 0.37654830718414534, + "grad_norm": 1.1504663228988647, + "learning_rate": 7.167666058712382e-06, + "loss": 0.307, + "step": 18810 + }, + { + "epoch": 0.3765683257012737, + "grad_norm": 1.1638206243515015, + "learning_rate": 7.167373920665912e-06, + "loss": 0.304, + "step": 18811 + }, + { + "epoch": 0.37658834421840204, + "grad_norm": 1.2848875522613525, + "learning_rate": 7.167081773508269e-06, + "loss": 0.3184, + "step": 18812 + }, + { + "epoch": 0.3766083627355304, + "grad_norm": 1.2190078496932983, + "learning_rate": 7.166789617240678e-06, + "loss": 0.4125, + "step": 18813 + }, + { + "epoch": 0.3766283812526587, + "grad_norm": 1.0551018714904785, + "learning_rate": 7.166497451864372e-06, + "loss": 0.3079, + "step": 18814 + }, + { + "epoch": 0.37664839976978703, + "grad_norm": 1.1394590139389038, + "learning_rate": 7.166205277380576e-06, + "loss": 0.3346, + "step": 18815 + }, + { + "epoch": 0.3766684182869154, + "grad_norm": 1.8254690170288086, + "learning_rate": 7.16591309379052e-06, + "loss": 0.791, + "step": 18816 + }, + { + "epoch": 0.37668843680404374, + "grad_norm": 1.0098239183425903, + "learning_rate": 7.16562090109543e-06, + "loss": 0.3063, + "step": 18817 + }, + { + "epoch": 0.3767084553211721, + "grad_norm": 1.4324244260787964, + "learning_rate": 7.165328699296538e-06, + "loss": 0.3123, + "step": 18818 + }, + { + "epoch": 0.37672847383830044, + "grad_norm": 1.2887563705444336, + "learning_rate": 7.165036488395067e-06, + "loss": 0.2798, + "step": 18819 + }, + { + "epoch": 0.3767484923554288, + "grad_norm": 1.0981249809265137, + "learning_rate": 7.164744268392251e-06, + "loss": 0.2731, + "step": 18820 + }, + { + "epoch": 0.37676851087255714, + "grad_norm": 1.1148672103881836, + "learning_rate": 7.164452039289315e-06, + "loss": 0.3038, + "step": 18821 + }, + { + "epoch": 0.37678852938968543, + "grad_norm": 1.7794197797775269, + "learning_rate": 7.164159801087489e-06, + "loss": 0.8154, + "step": 18822 + }, + { + "epoch": 0.3768085479068138, + "grad_norm": 1.834168553352356, + "learning_rate": 7.163867553788002e-06, + "loss": 0.8333, + "step": 18823 + }, + { + "epoch": 0.37682856642394214, + "grad_norm": 1.1183388233184814, + "learning_rate": 7.163575297392079e-06, + "loss": 0.3119, + "step": 18824 + }, + { + "epoch": 0.3768485849410705, + "grad_norm": 1.6258461475372314, + "learning_rate": 7.163283031900953e-06, + "loss": 0.8952, + "step": 18825 + }, + { + "epoch": 0.37686860345819884, + "grad_norm": 1.089200735092163, + "learning_rate": 7.1629907573158485e-06, + "loss": 0.3014, + "step": 18826 + }, + { + "epoch": 0.3768886219753272, + "grad_norm": 1.251217007637024, + "learning_rate": 7.162698473637998e-06, + "loss": 0.356, + "step": 18827 + }, + { + "epoch": 0.37690864049245554, + "grad_norm": 1.0898045301437378, + "learning_rate": 7.162406180868628e-06, + "loss": 0.2895, + "step": 18828 + }, + { + "epoch": 0.3769286590095839, + "grad_norm": 1.1302131414413452, + "learning_rate": 7.162113879008968e-06, + "loss": 0.3326, + "step": 18829 + }, + { + "epoch": 0.3769486775267122, + "grad_norm": 2.0243148803710938, + "learning_rate": 7.161821568060246e-06, + "loss": 0.8027, + "step": 18830 + }, + { + "epoch": 0.37696869604384053, + "grad_norm": 1.9710155725479126, + "learning_rate": 7.161529248023693e-06, + "loss": 0.8811, + "step": 18831 + }, + { + "epoch": 0.3769887145609689, + "grad_norm": 1.0607573986053467, + "learning_rate": 7.1612369189005345e-06, + "loss": 0.2731, + "step": 18832 + }, + { + "epoch": 0.37700873307809724, + "grad_norm": 1.8742789030075073, + "learning_rate": 7.160944580692002e-06, + "loss": 0.7705, + "step": 18833 + }, + { + "epoch": 0.3770287515952256, + "grad_norm": 1.2583545446395874, + "learning_rate": 7.160652233399323e-06, + "loss": 0.3556, + "step": 18834 + }, + { + "epoch": 0.37704877011235394, + "grad_norm": 1.0667132139205933, + "learning_rate": 7.160359877023727e-06, + "loss": 0.29, + "step": 18835 + }, + { + "epoch": 0.3770687886294823, + "grad_norm": 1.1671512126922607, + "learning_rate": 7.160067511566444e-06, + "loss": 0.3641, + "step": 18836 + }, + { + "epoch": 0.37708880714661064, + "grad_norm": 1.0963153839111328, + "learning_rate": 7.1597751370287e-06, + "loss": 0.2994, + "step": 18837 + }, + { + "epoch": 0.37710882566373893, + "grad_norm": 1.0107896327972412, + "learning_rate": 7.1594827534117286e-06, + "loss": 0.2851, + "step": 18838 + }, + { + "epoch": 0.3771288441808673, + "grad_norm": 1.1885156631469727, + "learning_rate": 7.159190360716753e-06, + "loss": 0.3192, + "step": 18839 + }, + { + "epoch": 0.37714886269799563, + "grad_norm": 1.097020149230957, + "learning_rate": 7.158897958945009e-06, + "loss": 0.352, + "step": 18840 + }, + { + "epoch": 0.377168881215124, + "grad_norm": 1.0653302669525146, + "learning_rate": 7.158605548097721e-06, + "loss": 0.2841, + "step": 18841 + }, + { + "epoch": 0.37718889973225234, + "grad_norm": 0.9998888969421387, + "learning_rate": 7.158313128176121e-06, + "loss": 0.2623, + "step": 18842 + }, + { + "epoch": 0.3772089182493807, + "grad_norm": 1.1708251237869263, + "learning_rate": 7.1580206991814376e-06, + "loss": 0.2796, + "step": 18843 + }, + { + "epoch": 0.37722893676650904, + "grad_norm": 1.1910827159881592, + "learning_rate": 7.157728261114898e-06, + "loss": 0.3086, + "step": 18844 + }, + { + "epoch": 0.3772489552836374, + "grad_norm": 1.0182322263717651, + "learning_rate": 7.157435813977733e-06, + "loss": 0.3153, + "step": 18845 + }, + { + "epoch": 0.3772689738007657, + "grad_norm": 1.206861138343811, + "learning_rate": 7.157143357771173e-06, + "loss": 0.2871, + "step": 18846 + }, + { + "epoch": 0.37728899231789403, + "grad_norm": 1.1244016885757446, + "learning_rate": 7.156850892496446e-06, + "loss": 0.2709, + "step": 18847 + }, + { + "epoch": 0.3773090108350224, + "grad_norm": 1.1180437803268433, + "learning_rate": 7.156558418154782e-06, + "loss": 0.324, + "step": 18848 + }, + { + "epoch": 0.37732902935215074, + "grad_norm": 1.9469655752182007, + "learning_rate": 7.156265934747411e-06, + "loss": 0.7875, + "step": 18849 + }, + { + "epoch": 0.3773490478692791, + "grad_norm": 1.0225306749343872, + "learning_rate": 7.155973442275561e-06, + "loss": 0.3574, + "step": 18850 + }, + { + "epoch": 0.37736906638640744, + "grad_norm": 1.9039087295532227, + "learning_rate": 7.155680940740465e-06, + "loss": 0.7879, + "step": 18851 + }, + { + "epoch": 0.3773890849035358, + "grad_norm": 1.0811415910720825, + "learning_rate": 7.155388430143347e-06, + "loss": 0.3356, + "step": 18852 + }, + { + "epoch": 0.37740910342066414, + "grad_norm": 1.0647826194763184, + "learning_rate": 7.155095910485442e-06, + "loss": 0.3436, + "step": 18853 + }, + { + "epoch": 0.37742912193779243, + "grad_norm": 1.0424003601074219, + "learning_rate": 7.154803381767978e-06, + "loss": 0.299, + "step": 18854 + }, + { + "epoch": 0.3774491404549208, + "grad_norm": 1.0496327877044678, + "learning_rate": 7.1545108439921825e-06, + "loss": 0.3569, + "step": 18855 + }, + { + "epoch": 0.37746915897204913, + "grad_norm": 1.0800610780715942, + "learning_rate": 7.154218297159289e-06, + "loss": 0.3536, + "step": 18856 + }, + { + "epoch": 0.3774891774891775, + "grad_norm": 1.1043941974639893, + "learning_rate": 7.153925741270523e-06, + "loss": 0.3352, + "step": 18857 + }, + { + "epoch": 0.37750919600630584, + "grad_norm": 0.9880721569061279, + "learning_rate": 7.153633176327119e-06, + "loss": 0.3125, + "step": 18858 + }, + { + "epoch": 0.3775292145234342, + "grad_norm": 1.0850945711135864, + "learning_rate": 7.153340602330302e-06, + "loss": 0.3, + "step": 18859 + }, + { + "epoch": 0.37754923304056254, + "grad_norm": 1.8603109121322632, + "learning_rate": 7.153048019281308e-06, + "loss": 0.7984, + "step": 18860 + }, + { + "epoch": 0.3775692515576909, + "grad_norm": 1.3051083087921143, + "learning_rate": 7.1527554271813605e-06, + "loss": 0.3353, + "step": 18861 + }, + { + "epoch": 0.3775892700748192, + "grad_norm": 1.0258262157440186, + "learning_rate": 7.1524628260316944e-06, + "loss": 0.3161, + "step": 18862 + }, + { + "epoch": 0.37760928859194753, + "grad_norm": 0.9907150864601135, + "learning_rate": 7.152170215833536e-06, + "loss": 0.2584, + "step": 18863 + }, + { + "epoch": 0.3776293071090759, + "grad_norm": 1.1428070068359375, + "learning_rate": 7.151877596588119e-06, + "loss": 0.3036, + "step": 18864 + }, + { + "epoch": 0.37764932562620424, + "grad_norm": 1.937764286994934, + "learning_rate": 7.15158496829667e-06, + "loss": 0.8397, + "step": 18865 + }, + { + "epoch": 0.3776693441433326, + "grad_norm": 1.9744764566421509, + "learning_rate": 7.151292330960422e-06, + "loss": 0.7876, + "step": 18866 + }, + { + "epoch": 0.37768936266046094, + "grad_norm": 1.128643274307251, + "learning_rate": 7.150999684580604e-06, + "loss": 0.3038, + "step": 18867 + }, + { + "epoch": 0.3777093811775893, + "grad_norm": 1.0991413593292236, + "learning_rate": 7.150707029158446e-06, + "loss": 0.3391, + "step": 18868 + }, + { + "epoch": 0.37772939969471764, + "grad_norm": 1.2042498588562012, + "learning_rate": 7.150414364695179e-06, + "loss": 0.3464, + "step": 18869 + }, + { + "epoch": 0.37774941821184593, + "grad_norm": 1.0531418323516846, + "learning_rate": 7.150121691192031e-06, + "loss": 0.3064, + "step": 18870 + }, + { + "epoch": 0.3777694367289743, + "grad_norm": 1.3324670791625977, + "learning_rate": 7.149829008650235e-06, + "loss": 0.2836, + "step": 18871 + }, + { + "epoch": 0.37778945524610263, + "grad_norm": 1.1002594232559204, + "learning_rate": 7.14953631707102e-06, + "loss": 0.2938, + "step": 18872 + }, + { + "epoch": 0.377809473763231, + "grad_norm": 1.969693660736084, + "learning_rate": 7.149243616455619e-06, + "loss": 0.844, + "step": 18873 + }, + { + "epoch": 0.37782949228035934, + "grad_norm": 1.0781487226486206, + "learning_rate": 7.148950906805259e-06, + "loss": 0.3331, + "step": 18874 + }, + { + "epoch": 0.3778495107974877, + "grad_norm": 1.0469081401824951, + "learning_rate": 7.148658188121172e-06, + "loss": 0.3269, + "step": 18875 + }, + { + "epoch": 0.37786952931461604, + "grad_norm": 1.0624958276748657, + "learning_rate": 7.148365460404588e-06, + "loss": 0.3094, + "step": 18876 + }, + { + "epoch": 0.3778895478317444, + "grad_norm": 1.099165916442871, + "learning_rate": 7.148072723656738e-06, + "loss": 0.3145, + "step": 18877 + }, + { + "epoch": 0.3779095663488727, + "grad_norm": 1.088923454284668, + "learning_rate": 7.147779977878853e-06, + "loss": 0.2747, + "step": 18878 + }, + { + "epoch": 0.37792958486600103, + "grad_norm": 1.1236376762390137, + "learning_rate": 7.147487223072163e-06, + "loss": 0.2812, + "step": 18879 + }, + { + "epoch": 0.3779496033831294, + "grad_norm": 1.1366190910339355, + "learning_rate": 7.147194459237898e-06, + "loss": 0.379, + "step": 18880 + }, + { + "epoch": 0.37796962190025774, + "grad_norm": 1.0480659008026123, + "learning_rate": 7.14690168637729e-06, + "loss": 0.3136, + "step": 18881 + }, + { + "epoch": 0.3779896404173861, + "grad_norm": 1.0534027814865112, + "learning_rate": 7.146608904491571e-06, + "loss": 0.3393, + "step": 18882 + }, + { + "epoch": 0.37800965893451444, + "grad_norm": 1.1289666891098022, + "learning_rate": 7.146316113581968e-06, + "loss": 0.3704, + "step": 18883 + }, + { + "epoch": 0.3780296774516428, + "grad_norm": 1.1111829280853271, + "learning_rate": 7.146023313649715e-06, + "loss": 0.319, + "step": 18884 + }, + { + "epoch": 0.37804969596877114, + "grad_norm": 1.1189037561416626, + "learning_rate": 7.1457305046960414e-06, + "loss": 0.3074, + "step": 18885 + }, + { + "epoch": 0.37806971448589943, + "grad_norm": 1.0944418907165527, + "learning_rate": 7.1454376867221785e-06, + "loss": 0.3144, + "step": 18886 + }, + { + "epoch": 0.3780897330030278, + "grad_norm": 1.2011985778808594, + "learning_rate": 7.145144859729358e-06, + "loss": 0.3798, + "step": 18887 + }, + { + "epoch": 0.37810975152015613, + "grad_norm": 1.142124891281128, + "learning_rate": 7.144852023718809e-06, + "loss": 0.3511, + "step": 18888 + }, + { + "epoch": 0.3781297700372845, + "grad_norm": 1.0940980911254883, + "learning_rate": 7.144559178691764e-06, + "loss": 0.3071, + "step": 18889 + }, + { + "epoch": 0.37814978855441284, + "grad_norm": 1.0740216970443726, + "learning_rate": 7.144266324649455e-06, + "loss": 0.2817, + "step": 18890 + }, + { + "epoch": 0.3781698070715412, + "grad_norm": 1.0534369945526123, + "learning_rate": 7.14397346159311e-06, + "loss": 0.3182, + "step": 18891 + }, + { + "epoch": 0.37818982558866954, + "grad_norm": 1.126293659210205, + "learning_rate": 7.143680589523962e-06, + "loss": 0.3216, + "step": 18892 + }, + { + "epoch": 0.3782098441057979, + "grad_norm": 1.0737005472183228, + "learning_rate": 7.143387708443244e-06, + "loss": 0.2657, + "step": 18893 + }, + { + "epoch": 0.3782298626229262, + "grad_norm": 1.036987543106079, + "learning_rate": 7.1430948183521835e-06, + "loss": 0.3081, + "step": 18894 + }, + { + "epoch": 0.37824988114005453, + "grad_norm": 1.0881998538970947, + "learning_rate": 7.142801919252015e-06, + "loss": 0.3312, + "step": 18895 + }, + { + "epoch": 0.3782698996571829, + "grad_norm": 1.1612573862075806, + "learning_rate": 7.142509011143968e-06, + "loss": 0.304, + "step": 18896 + }, + { + "epoch": 0.37828991817431123, + "grad_norm": 1.653632402420044, + "learning_rate": 7.142216094029273e-06, + "loss": 0.309, + "step": 18897 + }, + { + "epoch": 0.3783099366914396, + "grad_norm": 1.1324596405029297, + "learning_rate": 7.141923167909164e-06, + "loss": 0.3059, + "step": 18898 + }, + { + "epoch": 0.37832995520856794, + "grad_norm": 1.003973364830017, + "learning_rate": 7.14163023278487e-06, + "loss": 0.3326, + "step": 18899 + }, + { + "epoch": 0.3783499737256963, + "grad_norm": 1.0748862028121948, + "learning_rate": 7.141337288657624e-06, + "loss": 0.3201, + "step": 18900 + }, + { + "epoch": 0.37836999224282464, + "grad_norm": 1.1117669343948364, + "learning_rate": 7.1410443355286565e-06, + "loss": 0.2853, + "step": 18901 + }, + { + "epoch": 0.37839001075995293, + "grad_norm": 1.0945204496383667, + "learning_rate": 7.1407513733992e-06, + "loss": 0.287, + "step": 18902 + }, + { + "epoch": 0.3784100292770813, + "grad_norm": 1.0480096340179443, + "learning_rate": 7.140458402270485e-06, + "loss": 0.2924, + "step": 18903 + }, + { + "epoch": 0.37843004779420963, + "grad_norm": 0.9315944314002991, + "learning_rate": 7.140165422143742e-06, + "loss": 0.306, + "step": 18904 + }, + { + "epoch": 0.378450066311338, + "grad_norm": 1.2177525758743286, + "learning_rate": 7.139872433020205e-06, + "loss": 0.3225, + "step": 18905 + }, + { + "epoch": 0.37847008482846634, + "grad_norm": 1.210731029510498, + "learning_rate": 7.1395794349011075e-06, + "loss": 0.3336, + "step": 18906 + }, + { + "epoch": 0.3784901033455947, + "grad_norm": 1.170844554901123, + "learning_rate": 7.139286427787676e-06, + "loss": 0.3025, + "step": 18907 + }, + { + "epoch": 0.37851012186272304, + "grad_norm": 1.0491042137145996, + "learning_rate": 7.138993411681145e-06, + "loss": 0.3146, + "step": 18908 + }, + { + "epoch": 0.3785301403798514, + "grad_norm": 1.3127305507659912, + "learning_rate": 7.138700386582745e-06, + "loss": 0.3182, + "step": 18909 + }, + { + "epoch": 0.3785501588969797, + "grad_norm": 1.153419017791748, + "learning_rate": 7.138407352493711e-06, + "loss": 0.4157, + "step": 18910 + }, + { + "epoch": 0.37857017741410803, + "grad_norm": 1.0588765144348145, + "learning_rate": 7.138114309415272e-06, + "loss": 0.3607, + "step": 18911 + }, + { + "epoch": 0.3785901959312364, + "grad_norm": 2.21172833442688, + "learning_rate": 7.137821257348658e-06, + "loss": 0.8214, + "step": 18912 + }, + { + "epoch": 0.37861021444836473, + "grad_norm": 1.0592831373214722, + "learning_rate": 7.137528196295107e-06, + "loss": 0.3032, + "step": 18913 + }, + { + "epoch": 0.3786302329654931, + "grad_norm": 1.0122992992401123, + "learning_rate": 7.137235126255846e-06, + "loss": 0.2671, + "step": 18914 + }, + { + "epoch": 0.37865025148262144, + "grad_norm": 1.2192213535308838, + "learning_rate": 7.136942047232109e-06, + "loss": 0.3302, + "step": 18915 + }, + { + "epoch": 0.3786702699997498, + "grad_norm": 1.2133342027664185, + "learning_rate": 7.136648959225127e-06, + "loss": 0.3265, + "step": 18916 + }, + { + "epoch": 0.37869028851687814, + "grad_norm": 1.11117422580719, + "learning_rate": 7.136355862236132e-06, + "loss": 0.3221, + "step": 18917 + }, + { + "epoch": 0.37871030703400643, + "grad_norm": 1.2215172052383423, + "learning_rate": 7.136062756266357e-06, + "loss": 0.3515, + "step": 18918 + }, + { + "epoch": 0.3787303255511348, + "grad_norm": 1.1935473680496216, + "learning_rate": 7.135769641317035e-06, + "loss": 0.3096, + "step": 18919 + }, + { + "epoch": 0.37875034406826313, + "grad_norm": 1.1959222555160522, + "learning_rate": 7.135476517389397e-06, + "loss": 0.3537, + "step": 18920 + }, + { + "epoch": 0.3787703625853915, + "grad_norm": 1.8280562162399292, + "learning_rate": 7.135183384484674e-06, + "loss": 0.7642, + "step": 18921 + }, + { + "epoch": 0.37879038110251984, + "grad_norm": 1.029590368270874, + "learning_rate": 7.1348902426041e-06, + "loss": 0.3394, + "step": 18922 + }, + { + "epoch": 0.3788103996196482, + "grad_norm": 1.0896297693252563, + "learning_rate": 7.1345970917489076e-06, + "loss": 0.3037, + "step": 18923 + }, + { + "epoch": 0.37883041813677654, + "grad_norm": 1.131085991859436, + "learning_rate": 7.134303931920328e-06, + "loss": 0.3227, + "step": 18924 + }, + { + "epoch": 0.3788504366539049, + "grad_norm": 1.0906541347503662, + "learning_rate": 7.134010763119592e-06, + "loss": 0.3485, + "step": 18925 + }, + { + "epoch": 0.3788704551710332, + "grad_norm": 1.8457609415054321, + "learning_rate": 7.133717585347937e-06, + "loss": 0.8562, + "step": 18926 + }, + { + "epoch": 0.37889047368816153, + "grad_norm": 1.062089204788208, + "learning_rate": 7.1334243986065924e-06, + "loss": 0.3113, + "step": 18927 + }, + { + "epoch": 0.3789104922052899, + "grad_norm": 1.2621946334838867, + "learning_rate": 7.133131202896788e-06, + "loss": 0.3272, + "step": 18928 + }, + { + "epoch": 0.37893051072241823, + "grad_norm": 1.084285855293274, + "learning_rate": 7.132837998219761e-06, + "loss": 0.3255, + "step": 18929 + }, + { + "epoch": 0.3789505292395466, + "grad_norm": 1.1282410621643066, + "learning_rate": 7.132544784576741e-06, + "loss": 0.3153, + "step": 18930 + }, + { + "epoch": 0.37897054775667494, + "grad_norm": 1.231806993484497, + "learning_rate": 7.132251561968964e-06, + "loss": 0.3695, + "step": 18931 + }, + { + "epoch": 0.3789905662738033, + "grad_norm": 1.190415620803833, + "learning_rate": 7.131958330397657e-06, + "loss": 0.3114, + "step": 18932 + }, + { + "epoch": 0.37901058479093164, + "grad_norm": 2.0039658546447754, + "learning_rate": 7.131665089864058e-06, + "loss": 0.8348, + "step": 18933 + }, + { + "epoch": 0.37903060330805993, + "grad_norm": 1.1072015762329102, + "learning_rate": 7.131371840369397e-06, + "loss": 0.2701, + "step": 18934 + }, + { + "epoch": 0.3790506218251883, + "grad_norm": 1.0881954431533813, + "learning_rate": 7.131078581914908e-06, + "loss": 0.3245, + "step": 18935 + }, + { + "epoch": 0.37907064034231663, + "grad_norm": 1.1477351188659668, + "learning_rate": 7.130785314501822e-06, + "loss": 0.3248, + "step": 18936 + }, + { + "epoch": 0.379090658859445, + "grad_norm": 1.130568027496338, + "learning_rate": 7.1304920381313755e-06, + "loss": 0.3011, + "step": 18937 + }, + { + "epoch": 0.37911067737657334, + "grad_norm": 1.133527159690857, + "learning_rate": 7.130198752804798e-06, + "loss": 0.313, + "step": 18938 + }, + { + "epoch": 0.3791306958937017, + "grad_norm": 1.0429729223251343, + "learning_rate": 7.129905458523323e-06, + "loss": 0.3397, + "step": 18939 + }, + { + "epoch": 0.37915071441083004, + "grad_norm": 1.1517804861068726, + "learning_rate": 7.129612155288184e-06, + "loss": 0.3107, + "step": 18940 + }, + { + "epoch": 0.3791707329279584, + "grad_norm": 1.0123822689056396, + "learning_rate": 7.1293188431006135e-06, + "loss": 0.2994, + "step": 18941 + }, + { + "epoch": 0.3791907514450867, + "grad_norm": 1.021828293800354, + "learning_rate": 7.1290255219618444e-06, + "loss": 0.3408, + "step": 18942 + }, + { + "epoch": 0.37921076996221503, + "grad_norm": 1.0117158889770508, + "learning_rate": 7.12873219187311e-06, + "loss": 0.2714, + "step": 18943 + }, + { + "epoch": 0.3792307884793434, + "grad_norm": 1.7574044466018677, + "learning_rate": 7.128438852835644e-06, + "loss": 0.7942, + "step": 18944 + }, + { + "epoch": 0.37925080699647173, + "grad_norm": 1.245305061340332, + "learning_rate": 7.12814550485068e-06, + "loss": 0.3184, + "step": 18945 + }, + { + "epoch": 0.3792708255136001, + "grad_norm": 1.260765790939331, + "learning_rate": 7.127852147919449e-06, + "loss": 0.3539, + "step": 18946 + }, + { + "epoch": 0.37929084403072844, + "grad_norm": 1.048928141593933, + "learning_rate": 7.127558782043187e-06, + "loss": 0.2986, + "step": 18947 + }, + { + "epoch": 0.3793108625478568, + "grad_norm": 1.1216659545898438, + "learning_rate": 7.127265407223124e-06, + "loss": 0.3175, + "step": 18948 + }, + { + "epoch": 0.37933088106498514, + "grad_norm": 1.1612499952316284, + "learning_rate": 7.126972023460497e-06, + "loss": 0.3432, + "step": 18949 + }, + { + "epoch": 0.37935089958211343, + "grad_norm": 1.1090009212493896, + "learning_rate": 7.126678630756535e-06, + "loss": 0.3543, + "step": 18950 + }, + { + "epoch": 0.3793709180992418, + "grad_norm": 1.080988883972168, + "learning_rate": 7.1263852291124756e-06, + "loss": 0.3495, + "step": 18951 + }, + { + "epoch": 0.37939093661637013, + "grad_norm": 1.0649882555007935, + "learning_rate": 7.126091818529549e-06, + "loss": 0.3107, + "step": 18952 + }, + { + "epoch": 0.3794109551334985, + "grad_norm": 1.1777287721633911, + "learning_rate": 7.1257983990089915e-06, + "loss": 0.2781, + "step": 18953 + }, + { + "epoch": 0.37943097365062683, + "grad_norm": 1.126962661743164, + "learning_rate": 7.125504970552034e-06, + "loss": 0.3072, + "step": 18954 + }, + { + "epoch": 0.3794509921677552, + "grad_norm": 1.1311949491500854, + "learning_rate": 7.125211533159911e-06, + "loss": 0.3381, + "step": 18955 + }, + { + "epoch": 0.37947101068488354, + "grad_norm": 1.1718813180923462, + "learning_rate": 7.124918086833856e-06, + "loss": 0.3248, + "step": 18956 + }, + { + "epoch": 0.3794910292020119, + "grad_norm": 1.1297940015792847, + "learning_rate": 7.124624631575102e-06, + "loss": 0.3355, + "step": 18957 + }, + { + "epoch": 0.3795110477191402, + "grad_norm": 1.2666972875595093, + "learning_rate": 7.124331167384886e-06, + "loss": 0.3076, + "step": 18958 + }, + { + "epoch": 0.37953106623626853, + "grad_norm": 1.9518216848373413, + "learning_rate": 7.1240376942644365e-06, + "loss": 0.8464, + "step": 18959 + }, + { + "epoch": 0.3795510847533969, + "grad_norm": 1.0554753541946411, + "learning_rate": 7.123744212214989e-06, + "loss": 0.3325, + "step": 18960 + }, + { + "epoch": 0.37957110327052523, + "grad_norm": 1.825384497642517, + "learning_rate": 7.123450721237779e-06, + "loss": 0.7822, + "step": 18961 + }, + { + "epoch": 0.3795911217876536, + "grad_norm": 1.2308541536331177, + "learning_rate": 7.12315722133404e-06, + "loss": 0.321, + "step": 18962 + }, + { + "epoch": 0.37961114030478194, + "grad_norm": 1.12363862991333, + "learning_rate": 7.122863712505002e-06, + "loss": 0.3459, + "step": 18963 + }, + { + "epoch": 0.3796311588219103, + "grad_norm": 1.452527403831482, + "learning_rate": 7.1225701947519055e-06, + "loss": 0.3331, + "step": 18964 + }, + { + "epoch": 0.37965117733903864, + "grad_norm": 1.1794856786727905, + "learning_rate": 7.122276668075978e-06, + "loss": 0.2934, + "step": 18965 + }, + { + "epoch": 0.37967119585616693, + "grad_norm": 1.2014273405075073, + "learning_rate": 7.121983132478458e-06, + "loss": 0.2641, + "step": 18966 + }, + { + "epoch": 0.3796912143732953, + "grad_norm": 1.06461763381958, + "learning_rate": 7.121689587960576e-06, + "loss": 0.3281, + "step": 18967 + }, + { + "epoch": 0.37971123289042363, + "grad_norm": 1.158237099647522, + "learning_rate": 7.121396034523569e-06, + "loss": 0.3181, + "step": 18968 + }, + { + "epoch": 0.379731251407552, + "grad_norm": 1.042723536491394, + "learning_rate": 7.12110247216867e-06, + "loss": 0.3292, + "step": 18969 + }, + { + "epoch": 0.37975126992468033, + "grad_norm": 1.0358150005340576, + "learning_rate": 7.1208089008971106e-06, + "loss": 0.336, + "step": 18970 + }, + { + "epoch": 0.3797712884418087, + "grad_norm": 1.0867183208465576, + "learning_rate": 7.120515320710128e-06, + "loss": 0.306, + "step": 18971 + }, + { + "epoch": 0.37979130695893704, + "grad_norm": 1.1716878414154053, + "learning_rate": 7.120221731608955e-06, + "loss": 0.3, + "step": 18972 + }, + { + "epoch": 0.3798113254760654, + "grad_norm": 1.277876615524292, + "learning_rate": 7.119928133594827e-06, + "loss": 0.3041, + "step": 18973 + }, + { + "epoch": 0.3798313439931937, + "grad_norm": 1.2124696969985962, + "learning_rate": 7.119634526668976e-06, + "loss": 0.3307, + "step": 18974 + }, + { + "epoch": 0.37985136251032203, + "grad_norm": 1.034635066986084, + "learning_rate": 7.119340910832639e-06, + "loss": 0.3471, + "step": 18975 + }, + { + "epoch": 0.3798713810274504, + "grad_norm": 1.9057813882827759, + "learning_rate": 7.119047286087048e-06, + "loss": 0.7325, + "step": 18976 + }, + { + "epoch": 0.37989139954457873, + "grad_norm": 1.9145113229751587, + "learning_rate": 7.118753652433439e-06, + "loss": 0.8557, + "step": 18977 + }, + { + "epoch": 0.3799114180617071, + "grad_norm": 1.1457939147949219, + "learning_rate": 7.118460009873044e-06, + "loss": 0.3217, + "step": 18978 + }, + { + "epoch": 0.37993143657883544, + "grad_norm": 1.0844767093658447, + "learning_rate": 7.118166358407102e-06, + "loss": 0.2991, + "step": 18979 + }, + { + "epoch": 0.3799514550959638, + "grad_norm": 1.0444130897521973, + "learning_rate": 7.117872698036841e-06, + "loss": 0.3042, + "step": 18980 + }, + { + "epoch": 0.37997147361309214, + "grad_norm": 1.1597049236297607, + "learning_rate": 7.1175790287635e-06, + "loss": 0.3094, + "step": 18981 + }, + { + "epoch": 0.37999149213022043, + "grad_norm": 1.193362832069397, + "learning_rate": 7.1172853505883135e-06, + "loss": 0.2795, + "step": 18982 + }, + { + "epoch": 0.3800115106473488, + "grad_norm": 1.0711917877197266, + "learning_rate": 7.116991663512515e-06, + "loss": 0.3064, + "step": 18983 + }, + { + "epoch": 0.38003152916447713, + "grad_norm": 1.0258594751358032, + "learning_rate": 7.116697967537338e-06, + "loss": 0.317, + "step": 18984 + }, + { + "epoch": 0.3800515476816055, + "grad_norm": 1.1008131504058838, + "learning_rate": 7.1164042626640185e-06, + "loss": 0.3588, + "step": 18985 + }, + { + "epoch": 0.38007156619873383, + "grad_norm": 1.1064990758895874, + "learning_rate": 7.116110548893791e-06, + "loss": 0.3193, + "step": 18986 + }, + { + "epoch": 0.3800915847158622, + "grad_norm": 1.0436055660247803, + "learning_rate": 7.11581682622789e-06, + "loss": 0.2652, + "step": 18987 + }, + { + "epoch": 0.38011160323299054, + "grad_norm": 1.1457293033599854, + "learning_rate": 7.115523094667553e-06, + "loss": 0.3193, + "step": 18988 + }, + { + "epoch": 0.3801316217501189, + "grad_norm": 1.1893634796142578, + "learning_rate": 7.115229354214009e-06, + "loss": 0.3733, + "step": 18989 + }, + { + "epoch": 0.3801516402672472, + "grad_norm": 1.196316123008728, + "learning_rate": 7.114935604868497e-06, + "loss": 0.3022, + "step": 18990 + }, + { + "epoch": 0.38017165878437553, + "grad_norm": 1.9260363578796387, + "learning_rate": 7.114641846632252e-06, + "loss": 0.7679, + "step": 18991 + }, + { + "epoch": 0.3801916773015039, + "grad_norm": 1.0239001512527466, + "learning_rate": 7.114348079506506e-06, + "loss": 0.3033, + "step": 18992 + }, + { + "epoch": 0.38021169581863223, + "grad_norm": 1.3596326112747192, + "learning_rate": 7.114054303492496e-06, + "loss": 0.2918, + "step": 18993 + }, + { + "epoch": 0.3802317143357606, + "grad_norm": 1.1469477415084839, + "learning_rate": 7.113760518591457e-06, + "loss": 0.278, + "step": 18994 + }, + { + "epoch": 0.38025173285288894, + "grad_norm": 1.9767190217971802, + "learning_rate": 7.113466724804624e-06, + "loss": 0.8409, + "step": 18995 + }, + { + "epoch": 0.3802717513700173, + "grad_norm": 1.2219672203063965, + "learning_rate": 7.113172922133231e-06, + "loss": 0.3151, + "step": 18996 + }, + { + "epoch": 0.38029176988714564, + "grad_norm": 1.0824685096740723, + "learning_rate": 7.112879110578514e-06, + "loss": 0.3119, + "step": 18997 + }, + { + "epoch": 0.38031178840427393, + "grad_norm": 1.0531895160675049, + "learning_rate": 7.112585290141708e-06, + "loss": 0.2953, + "step": 18998 + }, + { + "epoch": 0.3803318069214023, + "grad_norm": 1.143707036972046, + "learning_rate": 7.1122914608240474e-06, + "loss": 0.3413, + "step": 18999 + }, + { + "epoch": 0.38035182543853063, + "grad_norm": 1.9217875003814697, + "learning_rate": 7.11199762262677e-06, + "loss": 0.7708, + "step": 19000 + }, + { + "epoch": 0.380371843955659, + "grad_norm": 1.184076189994812, + "learning_rate": 7.1117037755511074e-06, + "loss": 0.2984, + "step": 19001 + }, + { + "epoch": 0.38039186247278733, + "grad_norm": 1.8745585680007935, + "learning_rate": 7.111409919598298e-06, + "loss": 0.7938, + "step": 19002 + }, + { + "epoch": 0.3804118809899157, + "grad_norm": 1.1719081401824951, + "learning_rate": 7.111116054769575e-06, + "loss": 0.3293, + "step": 19003 + }, + { + "epoch": 0.38043189950704404, + "grad_norm": 1.1599704027175903, + "learning_rate": 7.110822181066175e-06, + "loss": 0.2756, + "step": 19004 + }, + { + "epoch": 0.3804519180241724, + "grad_norm": 1.0623879432678223, + "learning_rate": 7.110528298489331e-06, + "loss": 0.3546, + "step": 19005 + }, + { + "epoch": 0.3804719365413007, + "grad_norm": 1.1314582824707031, + "learning_rate": 7.110234407040281e-06, + "loss": 0.3099, + "step": 19006 + }, + { + "epoch": 0.38049195505842903, + "grad_norm": 1.091043472290039, + "learning_rate": 7.109940506720259e-06, + "loss": 0.3355, + "step": 19007 + }, + { + "epoch": 0.3805119735755574, + "grad_norm": 1.1010822057724, + "learning_rate": 7.109646597530503e-06, + "loss": 0.2963, + "step": 19008 + }, + { + "epoch": 0.38053199209268573, + "grad_norm": 1.0477858781814575, + "learning_rate": 7.1093526794722455e-06, + "loss": 0.324, + "step": 19009 + }, + { + "epoch": 0.3805520106098141, + "grad_norm": 1.084473967552185, + "learning_rate": 7.109058752546724e-06, + "loss": 0.3062, + "step": 19010 + }, + { + "epoch": 0.38057202912694243, + "grad_norm": 1.0056544542312622, + "learning_rate": 7.108764816755174e-06, + "loss": 0.2681, + "step": 19011 + }, + { + "epoch": 0.3805920476440708, + "grad_norm": 1.9474453926086426, + "learning_rate": 7.108470872098827e-06, + "loss": 0.7533, + "step": 19012 + }, + { + "epoch": 0.38061206616119914, + "grad_norm": 1.0923393964767456, + "learning_rate": 7.108176918578926e-06, + "loss": 0.3629, + "step": 19013 + }, + { + "epoch": 0.38063208467832743, + "grad_norm": 1.0056538581848145, + "learning_rate": 7.107882956196701e-06, + "loss": 0.3048, + "step": 19014 + }, + { + "epoch": 0.3806521031954558, + "grad_norm": 1.84188973903656, + "learning_rate": 7.107588984953391e-06, + "loss": 0.797, + "step": 19015 + }, + { + "epoch": 0.38067212171258413, + "grad_norm": 1.1758475303649902, + "learning_rate": 7.1072950048502294e-06, + "loss": 0.3436, + "step": 19016 + }, + { + "epoch": 0.3806921402297125, + "grad_norm": 1.0139387845993042, + "learning_rate": 7.1070010158884536e-06, + "loss": 0.3046, + "step": 19017 + }, + { + "epoch": 0.38071215874684083, + "grad_norm": 1.1195764541625977, + "learning_rate": 7.106707018069299e-06, + "loss": 0.3553, + "step": 19018 + }, + { + "epoch": 0.3807321772639692, + "grad_norm": 1.3057111501693726, + "learning_rate": 7.106413011394e-06, + "loss": 0.3455, + "step": 19019 + }, + { + "epoch": 0.38075219578109754, + "grad_norm": 1.184735894203186, + "learning_rate": 7.106118995863796e-06, + "loss": 0.3381, + "step": 19020 + }, + { + "epoch": 0.3807722142982259, + "grad_norm": 1.2016340494155884, + "learning_rate": 7.10582497147992e-06, + "loss": 0.3448, + "step": 19021 + }, + { + "epoch": 0.3807922328153542, + "grad_norm": 1.1127809286117554, + "learning_rate": 7.10553093824361e-06, + "loss": 0.2788, + "step": 19022 + }, + { + "epoch": 0.38081225133248253, + "grad_norm": 1.7579718828201294, + "learning_rate": 7.1052368961561e-06, + "loss": 0.7453, + "step": 19023 + }, + { + "epoch": 0.3808322698496109, + "grad_norm": 1.0507158041000366, + "learning_rate": 7.1049428452186275e-06, + "loss": 0.2772, + "step": 19024 + }, + { + "epoch": 0.38085228836673923, + "grad_norm": 1.063391923904419, + "learning_rate": 7.104648785432427e-06, + "loss": 0.2956, + "step": 19025 + }, + { + "epoch": 0.3808723068838676, + "grad_norm": 1.0932308435440063, + "learning_rate": 7.104354716798738e-06, + "loss": 0.2881, + "step": 19026 + }, + { + "epoch": 0.38089232540099593, + "grad_norm": 1.1111843585968018, + "learning_rate": 7.104060639318793e-06, + "loss": 0.3368, + "step": 19027 + }, + { + "epoch": 0.3809123439181243, + "grad_norm": 1.084672451019287, + "learning_rate": 7.103766552993832e-06, + "loss": 0.323, + "step": 19028 + }, + { + "epoch": 0.38093236243525264, + "grad_norm": 2.0524344444274902, + "learning_rate": 7.1034724578250865e-06, + "loss": 0.7471, + "step": 19029 + }, + { + "epoch": 0.38095238095238093, + "grad_norm": 2.0549793243408203, + "learning_rate": 7.103178353813798e-06, + "loss": 0.8235, + "step": 19030 + }, + { + "epoch": 0.3809723994695093, + "grad_norm": 1.0400941371917725, + "learning_rate": 7.102884240961198e-06, + "loss": 0.2933, + "step": 19031 + }, + { + "epoch": 0.38099241798663763, + "grad_norm": 1.177873969078064, + "learning_rate": 7.102590119268526e-06, + "loss": 0.3104, + "step": 19032 + }, + { + "epoch": 0.381012436503766, + "grad_norm": 1.7515759468078613, + "learning_rate": 7.1022959887370195e-06, + "loss": 0.8091, + "step": 19033 + }, + { + "epoch": 0.38103245502089433, + "grad_norm": 1.2326830625534058, + "learning_rate": 7.10200184936791e-06, + "loss": 0.2725, + "step": 19034 + }, + { + "epoch": 0.3810524735380227, + "grad_norm": 1.1064904928207397, + "learning_rate": 7.101707701162439e-06, + "loss": 0.3436, + "step": 19035 + }, + { + "epoch": 0.38107249205515104, + "grad_norm": 1.061293363571167, + "learning_rate": 7.10141354412184e-06, + "loss": 0.297, + "step": 19036 + }, + { + "epoch": 0.3810925105722794, + "grad_norm": 1.2361277341842651, + "learning_rate": 7.101119378247352e-06, + "loss": 0.3689, + "step": 19037 + }, + { + "epoch": 0.3811125290894077, + "grad_norm": 1.9638880491256714, + "learning_rate": 7.1008252035402096e-06, + "loss": 0.7854, + "step": 19038 + }, + { + "epoch": 0.38113254760653603, + "grad_norm": 1.2011899948120117, + "learning_rate": 7.1005310200016495e-06, + "loss": 0.3209, + "step": 19039 + }, + { + "epoch": 0.3811525661236644, + "grad_norm": 1.0791701078414917, + "learning_rate": 7.100236827632909e-06, + "loss": 0.3395, + "step": 19040 + }, + { + "epoch": 0.38117258464079273, + "grad_norm": 1.0436843633651733, + "learning_rate": 7.0999426264352265e-06, + "loss": 0.295, + "step": 19041 + }, + { + "epoch": 0.3811926031579211, + "grad_norm": 1.1393564939498901, + "learning_rate": 7.099648416409837e-06, + "loss": 0.3359, + "step": 19042 + }, + { + "epoch": 0.38121262167504943, + "grad_norm": 1.2107064723968506, + "learning_rate": 7.099354197557977e-06, + "loss": 0.3427, + "step": 19043 + }, + { + "epoch": 0.3812326401921778, + "grad_norm": 1.102489709854126, + "learning_rate": 7.099059969880882e-06, + "loss": 0.3058, + "step": 19044 + }, + { + "epoch": 0.38125265870930614, + "grad_norm": 1.09089195728302, + "learning_rate": 7.0987657333797924e-06, + "loss": 0.3858, + "step": 19045 + }, + { + "epoch": 0.38127267722643443, + "grad_norm": 1.143591046333313, + "learning_rate": 7.098471488055942e-06, + "loss": 0.2829, + "step": 19046 + }, + { + "epoch": 0.3812926957435628, + "grad_norm": 1.0264027118682861, + "learning_rate": 7.0981772339105705e-06, + "loss": 0.2915, + "step": 19047 + }, + { + "epoch": 0.38131271426069113, + "grad_norm": 1.168965458869934, + "learning_rate": 7.097882970944914e-06, + "loss": 0.3254, + "step": 19048 + }, + { + "epoch": 0.3813327327778195, + "grad_norm": 1.154284954071045, + "learning_rate": 7.097588699160207e-06, + "loss": 0.3156, + "step": 19049 + }, + { + "epoch": 0.38135275129494783, + "grad_norm": 1.7343032360076904, + "learning_rate": 7.0972944185576895e-06, + "loss": 0.8476, + "step": 19050 + }, + { + "epoch": 0.3813727698120762, + "grad_norm": 1.0616061687469482, + "learning_rate": 7.097000129138596e-06, + "loss": 0.3398, + "step": 19051 + }, + { + "epoch": 0.38139278832920454, + "grad_norm": 1.2451955080032349, + "learning_rate": 7.096705830904167e-06, + "loss": 0.2998, + "step": 19052 + }, + { + "epoch": 0.38141280684633283, + "grad_norm": 1.0863524675369263, + "learning_rate": 7.096411523855639e-06, + "loss": 0.3485, + "step": 19053 + }, + { + "epoch": 0.3814328253634612, + "grad_norm": 1.1029692888259888, + "learning_rate": 7.096117207994246e-06, + "loss": 0.2794, + "step": 19054 + }, + { + "epoch": 0.38145284388058953, + "grad_norm": 0.9986124634742737, + "learning_rate": 7.095822883321229e-06, + "loss": 0.3185, + "step": 19055 + }, + { + "epoch": 0.3814728623977179, + "grad_norm": 1.0214024782180786, + "learning_rate": 7.0955285498378225e-06, + "loss": 0.3285, + "step": 19056 + }, + { + "epoch": 0.38149288091484623, + "grad_norm": 1.0769222974777222, + "learning_rate": 7.095234207545264e-06, + "loss": 0.3139, + "step": 19057 + }, + { + "epoch": 0.3815128994319746, + "grad_norm": 1.1071643829345703, + "learning_rate": 7.0949398564447924e-06, + "loss": 0.3469, + "step": 19058 + }, + { + "epoch": 0.38153291794910293, + "grad_norm": 1.0960017442703247, + "learning_rate": 7.094645496537645e-06, + "loss": 0.3327, + "step": 19059 + }, + { + "epoch": 0.3815529364662313, + "grad_norm": 1.2154440879821777, + "learning_rate": 7.094351127825059e-06, + "loss": 0.2929, + "step": 19060 + }, + { + "epoch": 0.3815729549833596, + "grad_norm": 1.1400941610336304, + "learning_rate": 7.094056750308271e-06, + "loss": 0.3022, + "step": 19061 + }, + { + "epoch": 0.38159297350048793, + "grad_norm": 1.021019697189331, + "learning_rate": 7.093762363988521e-06, + "loss": 0.2641, + "step": 19062 + }, + { + "epoch": 0.3816129920176163, + "grad_norm": 1.2103047370910645, + "learning_rate": 7.093467968867043e-06, + "loss": 0.3571, + "step": 19063 + }, + { + "epoch": 0.38163301053474463, + "grad_norm": 1.200649380683899, + "learning_rate": 7.093173564945075e-06, + "loss": 0.3337, + "step": 19064 + }, + { + "epoch": 0.381653029051873, + "grad_norm": 1.0489745140075684, + "learning_rate": 7.092879152223857e-06, + "loss": 0.3462, + "step": 19065 + }, + { + "epoch": 0.38167304756900133, + "grad_norm": 1.1459360122680664, + "learning_rate": 7.0925847307046255e-06, + "loss": 0.353, + "step": 19066 + }, + { + "epoch": 0.3816930660861297, + "grad_norm": 1.7463139295578003, + "learning_rate": 7.092290300388618e-06, + "loss": 0.8382, + "step": 19067 + }, + { + "epoch": 0.38171308460325803, + "grad_norm": 1.057899832725525, + "learning_rate": 7.091995861277072e-06, + "loss": 0.3148, + "step": 19068 + }, + { + "epoch": 0.38173310312038633, + "grad_norm": 1.1404026746749878, + "learning_rate": 7.091701413371226e-06, + "loss": 0.2801, + "step": 19069 + }, + { + "epoch": 0.3817531216375147, + "grad_norm": 1.0694844722747803, + "learning_rate": 7.091406956672316e-06, + "loss": 0.354, + "step": 19070 + }, + { + "epoch": 0.38177314015464303, + "grad_norm": 1.0530437231063843, + "learning_rate": 7.091112491181582e-06, + "loss": 0.2751, + "step": 19071 + }, + { + "epoch": 0.3817931586717714, + "grad_norm": 1.1498185396194458, + "learning_rate": 7.090818016900263e-06, + "loss": 0.2943, + "step": 19072 + }, + { + "epoch": 0.38181317718889973, + "grad_norm": 1.0695841312408447, + "learning_rate": 7.090523533829594e-06, + "loss": 0.3302, + "step": 19073 + }, + { + "epoch": 0.3818331957060281, + "grad_norm": 1.0340760946273804, + "learning_rate": 7.090229041970813e-06, + "loss": 0.3253, + "step": 19074 + }, + { + "epoch": 0.38185321422315643, + "grad_norm": 1.2554434537887573, + "learning_rate": 7.089934541325161e-06, + "loss": 0.3421, + "step": 19075 + }, + { + "epoch": 0.3818732327402848, + "grad_norm": 1.0576450824737549, + "learning_rate": 7.089640031893872e-06, + "loss": 0.2849, + "step": 19076 + }, + { + "epoch": 0.3818932512574131, + "grad_norm": 1.0176324844360352, + "learning_rate": 7.089345513678185e-06, + "loss": 0.322, + "step": 19077 + }, + { + "epoch": 0.38191326977454143, + "grad_norm": 1.0137909650802612, + "learning_rate": 7.08905098667934e-06, + "loss": 0.3061, + "step": 19078 + }, + { + "epoch": 0.3819332882916698, + "grad_norm": 1.0547046661376953, + "learning_rate": 7.088756450898577e-06, + "loss": 0.3579, + "step": 19079 + }, + { + "epoch": 0.38195330680879813, + "grad_norm": 1.093420147895813, + "learning_rate": 7.088461906337127e-06, + "loss": 0.3308, + "step": 19080 + }, + { + "epoch": 0.3819733253259265, + "grad_norm": 1.1848939657211304, + "learning_rate": 7.088167352996236e-06, + "loss": 0.3664, + "step": 19081 + }, + { + "epoch": 0.38199334384305483, + "grad_norm": 1.180018424987793, + "learning_rate": 7.0878727908771375e-06, + "loss": 0.2965, + "step": 19082 + }, + { + "epoch": 0.3820133623601832, + "grad_norm": 1.0863584280014038, + "learning_rate": 7.08757821998107e-06, + "loss": 0.3357, + "step": 19083 + }, + { + "epoch": 0.38203338087731153, + "grad_norm": 1.1444629430770874, + "learning_rate": 7.0872836403092744e-06, + "loss": 0.3305, + "step": 19084 + }, + { + "epoch": 0.38205339939443983, + "grad_norm": 1.0983121395111084, + "learning_rate": 7.086989051862987e-06, + "loss": 0.3256, + "step": 19085 + }, + { + "epoch": 0.3820734179115682, + "grad_norm": 1.1583294868469238, + "learning_rate": 7.0866944546434476e-06, + "loss": 0.3258, + "step": 19086 + }, + { + "epoch": 0.38209343642869653, + "grad_norm": 1.0357478857040405, + "learning_rate": 7.0863998486518926e-06, + "loss": 0.3601, + "step": 19087 + }, + { + "epoch": 0.3821134549458249, + "grad_norm": 0.9776657223701477, + "learning_rate": 7.086105233889563e-06, + "loss": 0.2837, + "step": 19088 + }, + { + "epoch": 0.38213347346295323, + "grad_norm": 1.2480745315551758, + "learning_rate": 7.085810610357695e-06, + "loss": 0.304, + "step": 19089 + }, + { + "epoch": 0.3821534919800816, + "grad_norm": 1.1281315088272095, + "learning_rate": 7.085515978057527e-06, + "loss": 0.3, + "step": 19090 + }, + { + "epoch": 0.38217351049720993, + "grad_norm": 0.9590210914611816, + "learning_rate": 7.0852213369903e-06, + "loss": 0.2811, + "step": 19091 + }, + { + "epoch": 0.3821935290143383, + "grad_norm": 1.016483187675476, + "learning_rate": 7.084926687157252e-06, + "loss": 0.3521, + "step": 19092 + }, + { + "epoch": 0.3822135475314666, + "grad_norm": 1.1895880699157715, + "learning_rate": 7.084632028559619e-06, + "loss": 0.3746, + "step": 19093 + }, + { + "epoch": 0.38223356604859493, + "grad_norm": 1.1883686780929565, + "learning_rate": 7.084337361198643e-06, + "loss": 0.3516, + "step": 19094 + }, + { + "epoch": 0.3822535845657233, + "grad_norm": 0.9824044704437256, + "learning_rate": 7.0840426850755604e-06, + "loss": 0.2866, + "step": 19095 + }, + { + "epoch": 0.38227360308285163, + "grad_norm": 1.2400543689727783, + "learning_rate": 7.083748000191612e-06, + "loss": 0.3175, + "step": 19096 + }, + { + "epoch": 0.38229362159998, + "grad_norm": 0.9888293743133545, + "learning_rate": 7.083453306548035e-06, + "loss": 0.3249, + "step": 19097 + }, + { + "epoch": 0.38231364011710833, + "grad_norm": 1.1112215518951416, + "learning_rate": 7.083158604146068e-06, + "loss": 0.2966, + "step": 19098 + }, + { + "epoch": 0.3823336586342367, + "grad_norm": 1.1910690069198608, + "learning_rate": 7.082863892986951e-06, + "loss": 0.3205, + "step": 19099 + }, + { + "epoch": 0.38235367715136503, + "grad_norm": 1.1000837087631226, + "learning_rate": 7.082569173071923e-06, + "loss": 0.2989, + "step": 19100 + }, + { + "epoch": 0.38237369566849333, + "grad_norm": 0.9971103668212891, + "learning_rate": 7.082274444402222e-06, + "loss": 0.3057, + "step": 19101 + }, + { + "epoch": 0.3823937141856217, + "grad_norm": 1.0460193157196045, + "learning_rate": 7.081979706979086e-06, + "loss": 0.3002, + "step": 19102 + }, + { + "epoch": 0.38241373270275003, + "grad_norm": 1.1598265171051025, + "learning_rate": 7.081684960803756e-06, + "loss": 0.2975, + "step": 19103 + }, + { + "epoch": 0.3824337512198784, + "grad_norm": 1.8887041807174683, + "learning_rate": 7.081390205877472e-06, + "loss": 0.8124, + "step": 19104 + }, + { + "epoch": 0.38245376973700673, + "grad_norm": 1.121228575706482, + "learning_rate": 7.0810954422014685e-06, + "loss": 0.3243, + "step": 19105 + }, + { + "epoch": 0.3824737882541351, + "grad_norm": 0.9931028485298157, + "learning_rate": 7.080800669776991e-06, + "loss": 0.306, + "step": 19106 + }, + { + "epoch": 0.38249380677126343, + "grad_norm": 1.2342475652694702, + "learning_rate": 7.080505888605273e-06, + "loss": 0.3403, + "step": 19107 + }, + { + "epoch": 0.3825138252883918, + "grad_norm": 0.9970996379852295, + "learning_rate": 7.080211098687557e-06, + "loss": 0.3197, + "step": 19108 + }, + { + "epoch": 0.3825338438055201, + "grad_norm": 0.9624868035316467, + "learning_rate": 7.079916300025081e-06, + "loss": 0.2489, + "step": 19109 + }, + { + "epoch": 0.38255386232264843, + "grad_norm": 1.1293150186538696, + "learning_rate": 7.079621492619084e-06, + "loss": 0.3152, + "step": 19110 + }, + { + "epoch": 0.3825738808397768, + "grad_norm": 1.1318035125732422, + "learning_rate": 7.079326676470805e-06, + "loss": 0.3428, + "step": 19111 + }, + { + "epoch": 0.38259389935690513, + "grad_norm": 1.3828630447387695, + "learning_rate": 7.079031851581486e-06, + "loss": 0.3292, + "step": 19112 + }, + { + "epoch": 0.3826139178740335, + "grad_norm": 1.6679240465164185, + "learning_rate": 7.078737017952364e-06, + "loss": 0.8147, + "step": 19113 + }, + { + "epoch": 0.38263393639116183, + "grad_norm": 1.0658619403839111, + "learning_rate": 7.078442175584679e-06, + "loss": 0.3351, + "step": 19114 + }, + { + "epoch": 0.3826539549082902, + "grad_norm": 1.7696306705474854, + "learning_rate": 7.07814732447967e-06, + "loss": 0.7906, + "step": 19115 + }, + { + "epoch": 0.38267397342541853, + "grad_norm": 1.194929599761963, + "learning_rate": 7.077852464638576e-06, + "loss": 0.3697, + "step": 19116 + }, + { + "epoch": 0.38269399194254683, + "grad_norm": 1.1749862432479858, + "learning_rate": 7.0775575960626395e-06, + "loss": 0.3315, + "step": 19117 + }, + { + "epoch": 0.3827140104596752, + "grad_norm": 1.0828619003295898, + "learning_rate": 7.077262718753097e-06, + "loss": 0.3168, + "step": 19118 + }, + { + "epoch": 0.38273402897680353, + "grad_norm": 1.2686916589736938, + "learning_rate": 7.076967832711189e-06, + "loss": 0.3024, + "step": 19119 + }, + { + "epoch": 0.3827540474939319, + "grad_norm": 1.129366159439087, + "learning_rate": 7.076672937938154e-06, + "loss": 0.3499, + "step": 19120 + }, + { + "epoch": 0.38277406601106023, + "grad_norm": 1.7840276956558228, + "learning_rate": 7.076378034435234e-06, + "loss": 0.7989, + "step": 19121 + }, + { + "epoch": 0.3827940845281886, + "grad_norm": 1.057487964630127, + "learning_rate": 7.076083122203668e-06, + "loss": 0.321, + "step": 19122 + }, + { + "epoch": 0.38281410304531693, + "grad_norm": 1.154437780380249, + "learning_rate": 7.075788201244694e-06, + "loss": 0.3204, + "step": 19123 + }, + { + "epoch": 0.3828341215624453, + "grad_norm": 1.1286498308181763, + "learning_rate": 7.0754932715595535e-06, + "loss": 0.4208, + "step": 19124 + }, + { + "epoch": 0.3828541400795736, + "grad_norm": 1.1008799076080322, + "learning_rate": 7.075198333149488e-06, + "loss": 0.3062, + "step": 19125 + }, + { + "epoch": 0.38287415859670193, + "grad_norm": 1.767042636871338, + "learning_rate": 7.0749033860157345e-06, + "loss": 0.8553, + "step": 19126 + }, + { + "epoch": 0.3828941771138303, + "grad_norm": 1.0673187971115112, + "learning_rate": 7.074608430159532e-06, + "loss": 0.3193, + "step": 19127 + }, + { + "epoch": 0.38291419563095863, + "grad_norm": 1.4223750829696655, + "learning_rate": 7.074313465582122e-06, + "loss": 0.3238, + "step": 19128 + }, + { + "epoch": 0.382934214148087, + "grad_norm": 1.045844316482544, + "learning_rate": 7.0740184922847455e-06, + "loss": 0.2559, + "step": 19129 + }, + { + "epoch": 0.38295423266521533, + "grad_norm": 1.825040340423584, + "learning_rate": 7.073723510268642e-06, + "loss": 0.8473, + "step": 19130 + }, + { + "epoch": 0.3829742511823437, + "grad_norm": 1.1821515560150146, + "learning_rate": 7.07342851953505e-06, + "loss": 0.3069, + "step": 19131 + }, + { + "epoch": 0.38299426969947203, + "grad_norm": 1.0763609409332275, + "learning_rate": 7.073133520085214e-06, + "loss": 0.3229, + "step": 19132 + }, + { + "epoch": 0.38301428821660033, + "grad_norm": 1.1287435293197632, + "learning_rate": 7.072838511920367e-06, + "loss": 0.3458, + "step": 19133 + }, + { + "epoch": 0.3830343067337287, + "grad_norm": 1.1442376375198364, + "learning_rate": 7.072543495041754e-06, + "loss": 0.318, + "step": 19134 + }, + { + "epoch": 0.38305432525085703, + "grad_norm": 1.2704757452011108, + "learning_rate": 7.072248469450616e-06, + "loss": 0.3091, + "step": 19135 + }, + { + "epoch": 0.3830743437679854, + "grad_norm": 1.074561357498169, + "learning_rate": 7.071953435148189e-06, + "loss": 0.336, + "step": 19136 + }, + { + "epoch": 0.38309436228511373, + "grad_norm": 1.1309956312179565, + "learning_rate": 7.071658392135716e-06, + "loss": 0.2976, + "step": 19137 + }, + { + "epoch": 0.3831143808022421, + "grad_norm": 1.1430481672286987, + "learning_rate": 7.071363340414437e-06, + "loss": 0.3472, + "step": 19138 + }, + { + "epoch": 0.38313439931937043, + "grad_norm": 1.0203238725662231, + "learning_rate": 7.071068279985595e-06, + "loss": 0.3024, + "step": 19139 + }, + { + "epoch": 0.3831544178364988, + "grad_norm": 0.9803671836853027, + "learning_rate": 7.070773210850425e-06, + "loss": 0.308, + "step": 19140 + }, + { + "epoch": 0.3831744363536271, + "grad_norm": 1.174953818321228, + "learning_rate": 7.070478133010169e-06, + "loss": 0.3644, + "step": 19141 + }, + { + "epoch": 0.38319445487075543, + "grad_norm": 1.0966074466705322, + "learning_rate": 7.07018304646607e-06, + "loss": 0.3127, + "step": 19142 + }, + { + "epoch": 0.3832144733878838, + "grad_norm": 1.0573856830596924, + "learning_rate": 7.069887951219368e-06, + "loss": 0.304, + "step": 19143 + }, + { + "epoch": 0.38323449190501213, + "grad_norm": 1.149931788444519, + "learning_rate": 7.0695928472713005e-06, + "loss": 0.3194, + "step": 19144 + }, + { + "epoch": 0.3832545104221405, + "grad_norm": 1.0830936431884766, + "learning_rate": 7.069297734623112e-06, + "loss": 0.3419, + "step": 19145 + }, + { + "epoch": 0.38327452893926883, + "grad_norm": 1.1279138326644897, + "learning_rate": 7.069002613276039e-06, + "loss": 0.3103, + "step": 19146 + }, + { + "epoch": 0.3832945474563972, + "grad_norm": 1.0564509630203247, + "learning_rate": 7.0687074832313255e-06, + "loss": 0.3456, + "step": 19147 + }, + { + "epoch": 0.38331456597352553, + "grad_norm": 1.1814817190170288, + "learning_rate": 7.068412344490211e-06, + "loss": 0.3429, + "step": 19148 + }, + { + "epoch": 0.38333458449065383, + "grad_norm": 1.075233817100525, + "learning_rate": 7.068117197053936e-06, + "loss": 0.3585, + "step": 19149 + }, + { + "epoch": 0.3833546030077822, + "grad_norm": 1.741805911064148, + "learning_rate": 7.067822040923742e-06, + "loss": 0.7653, + "step": 19150 + }, + { + "epoch": 0.38337462152491053, + "grad_norm": 1.079451560974121, + "learning_rate": 7.067526876100867e-06, + "loss": 0.338, + "step": 19151 + }, + { + "epoch": 0.3833946400420389, + "grad_norm": 1.0229190587997437, + "learning_rate": 7.067231702586557e-06, + "loss": 0.2706, + "step": 19152 + }, + { + "epoch": 0.38341465855916723, + "grad_norm": 1.2854670286178589, + "learning_rate": 7.066936520382047e-06, + "loss": 0.2946, + "step": 19153 + }, + { + "epoch": 0.3834346770762956, + "grad_norm": 1.188551425933838, + "learning_rate": 7.066641329488582e-06, + "loss": 0.3308, + "step": 19154 + }, + { + "epoch": 0.38345469559342393, + "grad_norm": 1.0510278940200806, + "learning_rate": 7.066346129907401e-06, + "loss": 0.2971, + "step": 19155 + }, + { + "epoch": 0.3834747141105523, + "grad_norm": 1.1301466226577759, + "learning_rate": 7.066050921639746e-06, + "loss": 0.31, + "step": 19156 + }, + { + "epoch": 0.3834947326276806, + "grad_norm": 1.2059839963912964, + "learning_rate": 7.065755704686859e-06, + "loss": 0.3444, + "step": 19157 + }, + { + "epoch": 0.38351475114480893, + "grad_norm": 1.128303050994873, + "learning_rate": 7.065460479049978e-06, + "loss": 0.2727, + "step": 19158 + }, + { + "epoch": 0.3835347696619373, + "grad_norm": 1.1647872924804688, + "learning_rate": 7.065165244730345e-06, + "loss": 0.3413, + "step": 19159 + }, + { + "epoch": 0.38355478817906563, + "grad_norm": 1.119312047958374, + "learning_rate": 7.064870001729203e-06, + "loss": 0.3136, + "step": 19160 + }, + { + "epoch": 0.383574806696194, + "grad_norm": 1.1965718269348145, + "learning_rate": 7.0645747500477915e-06, + "loss": 0.3299, + "step": 19161 + }, + { + "epoch": 0.38359482521332233, + "grad_norm": 1.0877106189727783, + "learning_rate": 7.064279489687351e-06, + "loss": 0.3062, + "step": 19162 + }, + { + "epoch": 0.3836148437304507, + "grad_norm": 1.0543487071990967, + "learning_rate": 7.063984220649126e-06, + "loss": 0.2987, + "step": 19163 + }, + { + "epoch": 0.38363486224757903, + "grad_norm": 1.0898480415344238, + "learning_rate": 7.063688942934353e-06, + "loss": 0.3073, + "step": 19164 + }, + { + "epoch": 0.38365488076470733, + "grad_norm": 1.2041600942611694, + "learning_rate": 7.063393656544279e-06, + "loss": 0.3251, + "step": 19165 + }, + { + "epoch": 0.3836748992818357, + "grad_norm": 1.0905431509017944, + "learning_rate": 7.0630983614801386e-06, + "loss": 0.2687, + "step": 19166 + }, + { + "epoch": 0.38369491779896403, + "grad_norm": 1.0958001613616943, + "learning_rate": 7.062803057743178e-06, + "loss": 0.2996, + "step": 19167 + }, + { + "epoch": 0.3837149363160924, + "grad_norm": 1.1372934579849243, + "learning_rate": 7.062507745334639e-06, + "loss": 0.3289, + "step": 19168 + }, + { + "epoch": 0.38373495483322073, + "grad_norm": 1.0304378271102905, + "learning_rate": 7.062212424255759e-06, + "loss": 0.3088, + "step": 19169 + }, + { + "epoch": 0.3837549733503491, + "grad_norm": 1.0475469827651978, + "learning_rate": 7.061917094507783e-06, + "loss": 0.3168, + "step": 19170 + }, + { + "epoch": 0.38377499186747743, + "grad_norm": 1.9752448797225952, + "learning_rate": 7.06162175609195e-06, + "loss": 0.7676, + "step": 19171 + }, + { + "epoch": 0.3837950103846058, + "grad_norm": 1.227381944656372, + "learning_rate": 7.061326409009504e-06, + "loss": 0.3081, + "step": 19172 + }, + { + "epoch": 0.3838150289017341, + "grad_norm": 1.8025727272033691, + "learning_rate": 7.061031053261685e-06, + "loss": 0.8678, + "step": 19173 + }, + { + "epoch": 0.38383504741886243, + "grad_norm": 1.0589741468429565, + "learning_rate": 7.060735688849735e-06, + "loss": 0.3145, + "step": 19174 + }, + { + "epoch": 0.3838550659359908, + "grad_norm": 1.1131900548934937, + "learning_rate": 7.0604403157748945e-06, + "loss": 0.3495, + "step": 19175 + }, + { + "epoch": 0.38387508445311913, + "grad_norm": 1.1850993633270264, + "learning_rate": 7.060144934038409e-06, + "loss": 0.3263, + "step": 19176 + }, + { + "epoch": 0.3838951029702475, + "grad_norm": 1.1591206789016724, + "learning_rate": 7.059849543641516e-06, + "loss": 0.321, + "step": 19177 + }, + { + "epoch": 0.38391512148737583, + "grad_norm": 1.1346837282180786, + "learning_rate": 7.059554144585459e-06, + "loss": 0.3308, + "step": 19178 + }, + { + "epoch": 0.3839351400045042, + "grad_norm": 1.0342620611190796, + "learning_rate": 7.059258736871478e-06, + "loss": 0.2941, + "step": 19179 + }, + { + "epoch": 0.38395515852163253, + "grad_norm": 1.8952624797821045, + "learning_rate": 7.058963320500818e-06, + "loss": 0.8074, + "step": 19180 + }, + { + "epoch": 0.38397517703876083, + "grad_norm": 1.0107306241989136, + "learning_rate": 7.0586678954747204e-06, + "loss": 0.315, + "step": 19181 + }, + { + "epoch": 0.3839951955558892, + "grad_norm": 1.1456947326660156, + "learning_rate": 7.058372461794424e-06, + "loss": 0.291, + "step": 19182 + }, + { + "epoch": 0.38401521407301753, + "grad_norm": 1.2149094343185425, + "learning_rate": 7.058077019461174e-06, + "loss": 0.3037, + "step": 19183 + }, + { + "epoch": 0.3840352325901459, + "grad_norm": 1.0680092573165894, + "learning_rate": 7.05778156847621e-06, + "loss": 0.3287, + "step": 19184 + }, + { + "epoch": 0.38405525110727423, + "grad_norm": 1.8970954418182373, + "learning_rate": 7.057486108840776e-06, + "loss": 0.7839, + "step": 19185 + }, + { + "epoch": 0.3840752696244026, + "grad_norm": 1.0991936922073364, + "learning_rate": 7.057190640556112e-06, + "loss": 0.275, + "step": 19186 + }, + { + "epoch": 0.38409528814153093, + "grad_norm": 1.0105029344558716, + "learning_rate": 7.0568951636234625e-06, + "loss": 0.3641, + "step": 19187 + }, + { + "epoch": 0.3841153066586593, + "grad_norm": 0.9993019700050354, + "learning_rate": 7.056599678044068e-06, + "loss": 0.3317, + "step": 19188 + }, + { + "epoch": 0.3841353251757876, + "grad_norm": 1.0902988910675049, + "learning_rate": 7.056304183819172e-06, + "loss": 0.2775, + "step": 19189 + }, + { + "epoch": 0.38415534369291593, + "grad_norm": 1.0890275239944458, + "learning_rate": 7.056008680950015e-06, + "loss": 0.341, + "step": 19190 + }, + { + "epoch": 0.3841753622100443, + "grad_norm": 1.2925312519073486, + "learning_rate": 7.055713169437839e-06, + "loss": 0.3409, + "step": 19191 + }, + { + "epoch": 0.38419538072717263, + "grad_norm": 1.0708248615264893, + "learning_rate": 7.0554176492838875e-06, + "loss": 0.2904, + "step": 19192 + }, + { + "epoch": 0.384215399244301, + "grad_norm": 1.1332188844680786, + "learning_rate": 7.055122120489402e-06, + "loss": 0.2749, + "step": 19193 + }, + { + "epoch": 0.38423541776142933, + "grad_norm": 1.0720020532608032, + "learning_rate": 7.054826583055627e-06, + "loss": 0.3125, + "step": 19194 + }, + { + "epoch": 0.3842554362785577, + "grad_norm": 1.1452760696411133, + "learning_rate": 7.054531036983803e-06, + "loss": 0.3628, + "step": 19195 + }, + { + "epoch": 0.38427545479568603, + "grad_norm": 1.0825648307800293, + "learning_rate": 7.054235482275172e-06, + "loss": 0.3194, + "step": 19196 + }, + { + "epoch": 0.38429547331281433, + "grad_norm": 1.0536714792251587, + "learning_rate": 7.053939918930978e-06, + "loss": 0.2779, + "step": 19197 + }, + { + "epoch": 0.3843154918299427, + "grad_norm": 1.0877976417541504, + "learning_rate": 7.053644346952462e-06, + "loss": 0.3023, + "step": 19198 + }, + { + "epoch": 0.38433551034707103, + "grad_norm": 1.8889422416687012, + "learning_rate": 7.053348766340867e-06, + "loss": 0.8489, + "step": 19199 + }, + { + "epoch": 0.3843555288641994, + "grad_norm": 1.1082565784454346, + "learning_rate": 7.053053177097435e-06, + "loss": 0.3575, + "step": 19200 + }, + { + "epoch": 0.38437554738132773, + "grad_norm": 1.1496951580047607, + "learning_rate": 7.05275757922341e-06, + "loss": 0.3699, + "step": 19201 + }, + { + "epoch": 0.3843955658984561, + "grad_norm": 1.0608868598937988, + "learning_rate": 7.052461972720035e-06, + "loss": 0.3114, + "step": 19202 + }, + { + "epoch": 0.38441558441558443, + "grad_norm": 1.1893088817596436, + "learning_rate": 7.0521663575885505e-06, + "loss": 0.3052, + "step": 19203 + }, + { + "epoch": 0.3844356029327128, + "grad_norm": 2.0439164638519287, + "learning_rate": 7.0518707338302e-06, + "loss": 0.7687, + "step": 19204 + }, + { + "epoch": 0.3844556214498411, + "grad_norm": 1.0934675931930542, + "learning_rate": 7.051575101446226e-06, + "loss": 0.3086, + "step": 19205 + }, + { + "epoch": 0.38447563996696943, + "grad_norm": 1.07065749168396, + "learning_rate": 7.051279460437872e-06, + "loss": 0.3278, + "step": 19206 + }, + { + "epoch": 0.3844956584840978, + "grad_norm": 1.2014936208724976, + "learning_rate": 7.050983810806382e-06, + "loss": 0.3469, + "step": 19207 + }, + { + "epoch": 0.38451567700122613, + "grad_norm": 1.1548796892166138, + "learning_rate": 7.050688152552998e-06, + "loss": 0.3107, + "step": 19208 + }, + { + "epoch": 0.3845356955183545, + "grad_norm": 1.1262718439102173, + "learning_rate": 7.05039248567896e-06, + "loss": 0.3282, + "step": 19209 + }, + { + "epoch": 0.38455571403548283, + "grad_norm": 1.1423858404159546, + "learning_rate": 7.0500968101855135e-06, + "loss": 0.2924, + "step": 19210 + }, + { + "epoch": 0.3845757325526112, + "grad_norm": 1.0043513774871826, + "learning_rate": 7.049801126073902e-06, + "loss": 0.273, + "step": 19211 + }, + { + "epoch": 0.38459575106973953, + "grad_norm": 1.0583915710449219, + "learning_rate": 7.049505433345367e-06, + "loss": 0.3537, + "step": 19212 + }, + { + "epoch": 0.38461576958686783, + "grad_norm": 0.9837968349456787, + "learning_rate": 7.049209732001153e-06, + "loss": 0.2965, + "step": 19213 + }, + { + "epoch": 0.3846357881039962, + "grad_norm": 1.0143324136734009, + "learning_rate": 7.048914022042502e-06, + "loss": 0.3277, + "step": 19214 + }, + { + "epoch": 0.38465580662112453, + "grad_norm": 1.142221212387085, + "learning_rate": 7.048618303470658e-06, + "loss": 0.3468, + "step": 19215 + }, + { + "epoch": 0.3846758251382529, + "grad_norm": 1.2055392265319824, + "learning_rate": 7.0483225762868635e-06, + "loss": 0.279, + "step": 19216 + }, + { + "epoch": 0.38469584365538123, + "grad_norm": 1.18162202835083, + "learning_rate": 7.04802684049236e-06, + "loss": 0.3179, + "step": 19217 + }, + { + "epoch": 0.3847158621725096, + "grad_norm": 1.0476832389831543, + "learning_rate": 7.047731096088393e-06, + "loss": 0.3066, + "step": 19218 + }, + { + "epoch": 0.38473588068963793, + "grad_norm": 1.819819450378418, + "learning_rate": 7.047435343076206e-06, + "loss": 0.8306, + "step": 19219 + }, + { + "epoch": 0.3847558992067663, + "grad_norm": 1.053998589515686, + "learning_rate": 7.0471395814570405e-06, + "loss": 0.2741, + "step": 19220 + }, + { + "epoch": 0.3847759177238946, + "grad_norm": 1.0970913171768188, + "learning_rate": 7.046843811232143e-06, + "loss": 0.35, + "step": 19221 + }, + { + "epoch": 0.38479593624102293, + "grad_norm": 1.197105884552002, + "learning_rate": 7.046548032402752e-06, + "loss": 0.2938, + "step": 19222 + }, + { + "epoch": 0.3848159547581513, + "grad_norm": 1.1822900772094727, + "learning_rate": 7.046252244970114e-06, + "loss": 0.3603, + "step": 19223 + }, + { + "epoch": 0.38483597327527963, + "grad_norm": 1.0278575420379639, + "learning_rate": 7.045956448935473e-06, + "loss": 0.2993, + "step": 19224 + }, + { + "epoch": 0.384855991792408, + "grad_norm": 1.0615990161895752, + "learning_rate": 7.045660644300071e-06, + "loss": 0.3194, + "step": 19225 + }, + { + "epoch": 0.38487601030953633, + "grad_norm": 1.0482964515686035, + "learning_rate": 7.045364831065152e-06, + "loss": 0.3303, + "step": 19226 + }, + { + "epoch": 0.3848960288266647, + "grad_norm": 1.0489928722381592, + "learning_rate": 7.0450690092319585e-06, + "loss": 0.3305, + "step": 19227 + }, + { + "epoch": 0.38491604734379303, + "grad_norm": 1.1776608228683472, + "learning_rate": 7.044773178801735e-06, + "loss": 0.325, + "step": 19228 + }, + { + "epoch": 0.38493606586092133, + "grad_norm": 1.056270718574524, + "learning_rate": 7.044477339775726e-06, + "loss": 0.3226, + "step": 19229 + }, + { + "epoch": 0.3849560843780497, + "grad_norm": 1.1966705322265625, + "learning_rate": 7.044181492155174e-06, + "loss": 0.3418, + "step": 19230 + }, + { + "epoch": 0.38497610289517803, + "grad_norm": 1.0997483730316162, + "learning_rate": 7.043885635941322e-06, + "loss": 0.3022, + "step": 19231 + }, + { + "epoch": 0.3849961214123064, + "grad_norm": 1.1554583311080933, + "learning_rate": 7.043589771135416e-06, + "loss": 0.3363, + "step": 19232 + }, + { + "epoch": 0.38501613992943473, + "grad_norm": 1.8172228336334229, + "learning_rate": 7.043293897738698e-06, + "loss": 0.8407, + "step": 19233 + }, + { + "epoch": 0.3850361584465631, + "grad_norm": 1.1373566389083862, + "learning_rate": 7.042998015752413e-06, + "loss": 0.3512, + "step": 19234 + }, + { + "epoch": 0.38505617696369143, + "grad_norm": 1.0399497747421265, + "learning_rate": 7.042702125177802e-06, + "loss": 0.3082, + "step": 19235 + }, + { + "epoch": 0.3850761954808198, + "grad_norm": 1.0058057308197021, + "learning_rate": 7.042406226016112e-06, + "loss": 0.2645, + "step": 19236 + }, + { + "epoch": 0.3850962139979481, + "grad_norm": 1.2232393026351929, + "learning_rate": 7.042110318268584e-06, + "loss": 0.308, + "step": 19237 + }, + { + "epoch": 0.38511623251507643, + "grad_norm": 1.1257492303848267, + "learning_rate": 7.041814401936466e-06, + "loss": 0.3393, + "step": 19238 + }, + { + "epoch": 0.3851362510322048, + "grad_norm": 1.0609185695648193, + "learning_rate": 7.041518477020999e-06, + "loss": 0.3136, + "step": 19239 + }, + { + "epoch": 0.38515626954933313, + "grad_norm": 1.0615991353988647, + "learning_rate": 7.041222543523428e-06, + "loss": 0.2854, + "step": 19240 + }, + { + "epoch": 0.3851762880664615, + "grad_norm": 1.8564168214797974, + "learning_rate": 7.040926601444997e-06, + "loss": 0.8101, + "step": 19241 + }, + { + "epoch": 0.38519630658358983, + "grad_norm": 1.2024210691452026, + "learning_rate": 7.040630650786948e-06, + "loss": 0.2762, + "step": 19242 + }, + { + "epoch": 0.3852163251007182, + "grad_norm": 0.9877988696098328, + "learning_rate": 7.040334691550528e-06, + "loss": 0.3167, + "step": 19243 + }, + { + "epoch": 0.38523634361784653, + "grad_norm": 1.0550720691680908, + "learning_rate": 7.040038723736979e-06, + "loss": 0.2948, + "step": 19244 + }, + { + "epoch": 0.38525636213497483, + "grad_norm": 1.034523367881775, + "learning_rate": 7.0397427473475485e-06, + "loss": 0.3152, + "step": 19245 + }, + { + "epoch": 0.3852763806521032, + "grad_norm": 1.1704864501953125, + "learning_rate": 7.039446762383477e-06, + "loss": 0.2909, + "step": 19246 + }, + { + "epoch": 0.38529639916923153, + "grad_norm": 2.011037588119507, + "learning_rate": 7.039150768846012e-06, + "loss": 0.8312, + "step": 19247 + }, + { + "epoch": 0.3853164176863599, + "grad_norm": 1.0811564922332764, + "learning_rate": 7.038854766736394e-06, + "loss": 0.3457, + "step": 19248 + }, + { + "epoch": 0.38533643620348823, + "grad_norm": 1.3058174848556519, + "learning_rate": 7.038558756055869e-06, + "loss": 0.2986, + "step": 19249 + }, + { + "epoch": 0.3853564547206166, + "grad_norm": 1.0318526029586792, + "learning_rate": 7.038262736805682e-06, + "loss": 0.3029, + "step": 19250 + }, + { + "epoch": 0.38537647323774493, + "grad_norm": 1.001118540763855, + "learning_rate": 7.037966708987077e-06, + "loss": 0.2726, + "step": 19251 + }, + { + "epoch": 0.3853964917548733, + "grad_norm": 1.2786182165145874, + "learning_rate": 7.0376706726013e-06, + "loss": 0.3371, + "step": 19252 + }, + { + "epoch": 0.3854165102720016, + "grad_norm": 1.137142539024353, + "learning_rate": 7.037374627649594e-06, + "loss": 0.3007, + "step": 19253 + }, + { + "epoch": 0.38543652878912993, + "grad_norm": 1.1901044845581055, + "learning_rate": 7.037078574133202e-06, + "loss": 0.3281, + "step": 19254 + }, + { + "epoch": 0.3854565473062583, + "grad_norm": 1.039764404296875, + "learning_rate": 7.036782512053371e-06, + "loss": 0.3204, + "step": 19255 + }, + { + "epoch": 0.38547656582338663, + "grad_norm": 1.1872549057006836, + "learning_rate": 7.0364864414113435e-06, + "loss": 0.3142, + "step": 19256 + }, + { + "epoch": 0.385496584340515, + "grad_norm": 1.0725183486938477, + "learning_rate": 7.036190362208365e-06, + "loss": 0.2507, + "step": 19257 + }, + { + "epoch": 0.38551660285764333, + "grad_norm": 1.0541530847549438, + "learning_rate": 7.0358942744456825e-06, + "loss": 0.3265, + "step": 19258 + }, + { + "epoch": 0.3855366213747717, + "grad_norm": 1.0918728113174438, + "learning_rate": 7.035598178124537e-06, + "loss": 0.3438, + "step": 19259 + }, + { + "epoch": 0.38555663989190003, + "grad_norm": 1.1031837463378906, + "learning_rate": 7.035302073246176e-06, + "loss": 0.3266, + "step": 19260 + }, + { + "epoch": 0.38557665840902833, + "grad_norm": 1.1266038417816162, + "learning_rate": 7.035005959811843e-06, + "loss": 0.3395, + "step": 19261 + }, + { + "epoch": 0.3855966769261567, + "grad_norm": 1.1440786123275757, + "learning_rate": 7.0347098378227805e-06, + "loss": 0.3211, + "step": 19262 + }, + { + "epoch": 0.38561669544328503, + "grad_norm": 1.122663974761963, + "learning_rate": 7.034413707280239e-06, + "loss": 0.2921, + "step": 19263 + }, + { + "epoch": 0.3856367139604134, + "grad_norm": 1.1247152090072632, + "learning_rate": 7.034117568185458e-06, + "loss": 0.3043, + "step": 19264 + }, + { + "epoch": 0.38565673247754173, + "grad_norm": 1.2020543813705444, + "learning_rate": 7.033821420539686e-06, + "loss": 0.2915, + "step": 19265 + }, + { + "epoch": 0.3856767509946701, + "grad_norm": 1.8150300979614258, + "learning_rate": 7.0335252643441646e-06, + "loss": 0.8175, + "step": 19266 + }, + { + "epoch": 0.38569676951179843, + "grad_norm": 1.3889212608337402, + "learning_rate": 7.033229099600142e-06, + "loss": 0.3377, + "step": 19267 + }, + { + "epoch": 0.3857167880289268, + "grad_norm": 1.227081060409546, + "learning_rate": 7.032932926308861e-06, + "loss": 0.3091, + "step": 19268 + }, + { + "epoch": 0.3857368065460551, + "grad_norm": 1.1065211296081543, + "learning_rate": 7.032636744471568e-06, + "loss": 0.3578, + "step": 19269 + }, + { + "epoch": 0.38575682506318343, + "grad_norm": 1.8010050058364868, + "learning_rate": 7.032340554089506e-06, + "loss": 0.7925, + "step": 19270 + }, + { + "epoch": 0.3857768435803118, + "grad_norm": 1.071480393409729, + "learning_rate": 7.0320443551639236e-06, + "loss": 0.3102, + "step": 19271 + }, + { + "epoch": 0.38579686209744013, + "grad_norm": 1.101635456085205, + "learning_rate": 7.031748147696065e-06, + "loss": 0.333, + "step": 19272 + }, + { + "epoch": 0.3858168806145685, + "grad_norm": 1.160535454750061, + "learning_rate": 7.031451931687172e-06, + "loss": 0.3162, + "step": 19273 + }, + { + "epoch": 0.38583689913169683, + "grad_norm": 1.138846516609192, + "learning_rate": 7.031155707138494e-06, + "loss": 0.3179, + "step": 19274 + }, + { + "epoch": 0.3858569176488252, + "grad_norm": 1.1204031705856323, + "learning_rate": 7.030859474051273e-06, + "loss": 0.3268, + "step": 19275 + }, + { + "epoch": 0.38587693616595353, + "grad_norm": 1.105225682258606, + "learning_rate": 7.030563232426757e-06, + "loss": 0.311, + "step": 19276 + }, + { + "epoch": 0.3858969546830818, + "grad_norm": 1.915505290031433, + "learning_rate": 7.030266982266189e-06, + "loss": 0.8461, + "step": 19277 + }, + { + "epoch": 0.3859169732002102, + "grad_norm": 1.199888825416565, + "learning_rate": 7.029970723570816e-06, + "loss": 0.3001, + "step": 19278 + }, + { + "epoch": 0.38593699171733853, + "grad_norm": 1.2001450061798096, + "learning_rate": 7.029674456341883e-06, + "loss": 0.3321, + "step": 19279 + }, + { + "epoch": 0.3859570102344669, + "grad_norm": 1.1012078523635864, + "learning_rate": 7.029378180580636e-06, + "loss": 0.2944, + "step": 19280 + }, + { + "epoch": 0.38597702875159523, + "grad_norm": 1.3520150184631348, + "learning_rate": 7.029081896288319e-06, + "loss": 0.3058, + "step": 19281 + }, + { + "epoch": 0.3859970472687236, + "grad_norm": 1.1115081310272217, + "learning_rate": 7.028785603466178e-06, + "loss": 0.3314, + "step": 19282 + }, + { + "epoch": 0.38601706578585193, + "grad_norm": 1.0532100200653076, + "learning_rate": 7.028489302115461e-06, + "loss": 0.311, + "step": 19283 + }, + { + "epoch": 0.3860370843029803, + "grad_norm": 1.1762011051177979, + "learning_rate": 7.02819299223741e-06, + "loss": 0.3479, + "step": 19284 + }, + { + "epoch": 0.3860571028201086, + "grad_norm": 1.0341404676437378, + "learning_rate": 7.027896673833272e-06, + "loss": 0.2977, + "step": 19285 + }, + { + "epoch": 0.38607712133723693, + "grad_norm": 1.0234684944152832, + "learning_rate": 7.027600346904293e-06, + "loss": 0.3674, + "step": 19286 + }, + { + "epoch": 0.3860971398543653, + "grad_norm": 1.0423153638839722, + "learning_rate": 7.027304011451719e-06, + "loss": 0.2518, + "step": 19287 + }, + { + "epoch": 0.38611715837149363, + "grad_norm": 1.0838158130645752, + "learning_rate": 7.027007667476794e-06, + "loss": 0.3368, + "step": 19288 + }, + { + "epoch": 0.386137176888622, + "grad_norm": 1.1088001728057861, + "learning_rate": 7.026711314980766e-06, + "loss": 0.3029, + "step": 19289 + }, + { + "epoch": 0.38615719540575033, + "grad_norm": 1.0650215148925781, + "learning_rate": 7.0264149539648795e-06, + "loss": 0.3035, + "step": 19290 + }, + { + "epoch": 0.3861772139228787, + "grad_norm": 0.9311749935150146, + "learning_rate": 7.026118584430382e-06, + "loss": 0.2846, + "step": 19291 + }, + { + "epoch": 0.38619723244000703, + "grad_norm": 1.0956796407699585, + "learning_rate": 7.025822206378516e-06, + "loss": 0.3249, + "step": 19292 + }, + { + "epoch": 0.3862172509571353, + "grad_norm": 1.8948028087615967, + "learning_rate": 7.0255258198105306e-06, + "loss": 0.83, + "step": 19293 + }, + { + "epoch": 0.3862372694742637, + "grad_norm": 1.16233229637146, + "learning_rate": 7.025229424727668e-06, + "loss": 0.3175, + "step": 19294 + }, + { + "epoch": 0.38625728799139203, + "grad_norm": 1.153929591178894, + "learning_rate": 7.02493302113118e-06, + "loss": 0.3367, + "step": 19295 + }, + { + "epoch": 0.3862773065085204, + "grad_norm": 1.0971847772598267, + "learning_rate": 7.024636609022308e-06, + "loss": 0.3061, + "step": 19296 + }, + { + "epoch": 0.38629732502564873, + "grad_norm": 1.3214095830917358, + "learning_rate": 7.024340188402299e-06, + "loss": 0.2986, + "step": 19297 + }, + { + "epoch": 0.3863173435427771, + "grad_norm": 1.1583844423294067, + "learning_rate": 7.024043759272401e-06, + "loss": 0.3351, + "step": 19298 + }, + { + "epoch": 0.38633736205990543, + "grad_norm": 1.1092007160186768, + "learning_rate": 7.023747321633857e-06, + "loss": 0.3375, + "step": 19299 + }, + { + "epoch": 0.3863573805770338, + "grad_norm": 1.1059378385543823, + "learning_rate": 7.023450875487916e-06, + "loss": 0.3195, + "step": 19300 + }, + { + "epoch": 0.3863773990941621, + "grad_norm": 1.1952816247940063, + "learning_rate": 7.02315442083582e-06, + "loss": 0.3276, + "step": 19301 + }, + { + "epoch": 0.38639741761129043, + "grad_norm": 1.2695707082748413, + "learning_rate": 7.022857957678822e-06, + "loss": 0.3459, + "step": 19302 + }, + { + "epoch": 0.3864174361284188, + "grad_norm": 1.0588288307189941, + "learning_rate": 7.022561486018164e-06, + "loss": 0.2764, + "step": 19303 + }, + { + "epoch": 0.38643745464554713, + "grad_norm": 2.1279382705688477, + "learning_rate": 7.022265005855091e-06, + "loss": 0.9005, + "step": 19304 + }, + { + "epoch": 0.3864574731626755, + "grad_norm": 1.1104379892349243, + "learning_rate": 7.0219685171908525e-06, + "loss": 0.2841, + "step": 19305 + }, + { + "epoch": 0.38647749167980383, + "grad_norm": 1.0101829767227173, + "learning_rate": 7.021672020026693e-06, + "loss": 0.2816, + "step": 19306 + }, + { + "epoch": 0.3864975101969322, + "grad_norm": 1.7291522026062012, + "learning_rate": 7.021375514363858e-06, + "loss": 0.7747, + "step": 19307 + }, + { + "epoch": 0.38651752871406053, + "grad_norm": 1.101406216621399, + "learning_rate": 7.021079000203597e-06, + "loss": 0.3222, + "step": 19308 + }, + { + "epoch": 0.3865375472311888, + "grad_norm": 1.8000965118408203, + "learning_rate": 7.020782477547154e-06, + "loss": 0.8232, + "step": 19309 + }, + { + "epoch": 0.3865575657483172, + "grad_norm": 1.070910930633545, + "learning_rate": 7.020485946395777e-06, + "loss": 0.3051, + "step": 19310 + }, + { + "epoch": 0.38657758426544553, + "grad_norm": 1.3233195543289185, + "learning_rate": 7.020189406750713e-06, + "loss": 0.3452, + "step": 19311 + }, + { + "epoch": 0.3865976027825739, + "grad_norm": 1.9303256273269653, + "learning_rate": 7.019892858613206e-06, + "loss": 0.8644, + "step": 19312 + }, + { + "epoch": 0.38661762129970223, + "grad_norm": 1.2046700716018677, + "learning_rate": 7.019596301984504e-06, + "loss": 0.3298, + "step": 19313 + }, + { + "epoch": 0.3866376398168306, + "grad_norm": 1.9829227924346924, + "learning_rate": 7.019299736865855e-06, + "loss": 0.7833, + "step": 19314 + }, + { + "epoch": 0.38665765833395893, + "grad_norm": 1.0627058744430542, + "learning_rate": 7.019003163258502e-06, + "loss": 0.3241, + "step": 19315 + }, + { + "epoch": 0.3866776768510873, + "grad_norm": 1.8188647031784058, + "learning_rate": 7.018706581163697e-06, + "loss": 0.8347, + "step": 19316 + }, + { + "epoch": 0.3866976953682156, + "grad_norm": 1.1159051656723022, + "learning_rate": 7.018409990582682e-06, + "loss": 0.324, + "step": 19317 + }, + { + "epoch": 0.38671771388534393, + "grad_norm": 1.1558845043182373, + "learning_rate": 7.018113391516708e-06, + "loss": 0.3415, + "step": 19318 + }, + { + "epoch": 0.3867377324024723, + "grad_norm": 1.075718879699707, + "learning_rate": 7.017816783967016e-06, + "loss": 0.3006, + "step": 19319 + }, + { + "epoch": 0.38675775091960063, + "grad_norm": 1.0772337913513184, + "learning_rate": 7.017520167934859e-06, + "loss": 0.3428, + "step": 19320 + }, + { + "epoch": 0.386777769436729, + "grad_norm": 0.9226540923118591, + "learning_rate": 7.017223543421481e-06, + "loss": 0.2509, + "step": 19321 + }, + { + "epoch": 0.38679778795385733, + "grad_norm": 1.0646461248397827, + "learning_rate": 7.016926910428129e-06, + "loss": 0.3172, + "step": 19322 + }, + { + "epoch": 0.3868178064709857, + "grad_norm": 1.0809500217437744, + "learning_rate": 7.016630268956052e-06, + "loss": 0.3306, + "step": 19323 + }, + { + "epoch": 0.38683782498811403, + "grad_norm": 1.1724332571029663, + "learning_rate": 7.016333619006493e-06, + "loss": 0.3512, + "step": 19324 + }, + { + "epoch": 0.3868578435052423, + "grad_norm": 1.1079407930374146, + "learning_rate": 7.016036960580703e-06, + "loss": 0.3441, + "step": 19325 + }, + { + "epoch": 0.3868778620223707, + "grad_norm": 1.776360034942627, + "learning_rate": 7.015740293679927e-06, + "loss": 0.7664, + "step": 19326 + }, + { + "epoch": 0.38689788053949903, + "grad_norm": 1.1004596948623657, + "learning_rate": 7.015443618305411e-06, + "loss": 0.2949, + "step": 19327 + }, + { + "epoch": 0.3869178990566274, + "grad_norm": 1.1047277450561523, + "learning_rate": 7.015146934458406e-06, + "loss": 0.2538, + "step": 19328 + }, + { + "epoch": 0.38693791757375573, + "grad_norm": 1.1830487251281738, + "learning_rate": 7.014850242140155e-06, + "loss": 0.2569, + "step": 19329 + }, + { + "epoch": 0.3869579360908841, + "grad_norm": 1.0781570672988892, + "learning_rate": 7.014553541351908e-06, + "loss": 0.2789, + "step": 19330 + }, + { + "epoch": 0.38697795460801243, + "grad_norm": 1.2328084707260132, + "learning_rate": 7.014256832094913e-06, + "loss": 0.278, + "step": 19331 + }, + { + "epoch": 0.3869979731251408, + "grad_norm": 1.2394999265670776, + "learning_rate": 7.013960114370414e-06, + "loss": 0.3341, + "step": 19332 + }, + { + "epoch": 0.3870179916422691, + "grad_norm": 1.0701966285705566, + "learning_rate": 7.01366338817966e-06, + "loss": 0.3282, + "step": 19333 + }, + { + "epoch": 0.3870380101593974, + "grad_norm": 1.1576968431472778, + "learning_rate": 7.0133666535239e-06, + "loss": 0.3197, + "step": 19334 + }, + { + "epoch": 0.3870580286765258, + "grad_norm": 1.104983925819397, + "learning_rate": 7.013069910404378e-06, + "loss": 0.3216, + "step": 19335 + }, + { + "epoch": 0.38707804719365413, + "grad_norm": 1.0259714126586914, + "learning_rate": 7.012773158822345e-06, + "loss": 0.3183, + "step": 19336 + }, + { + "epoch": 0.3870980657107825, + "grad_norm": 1.0843298435211182, + "learning_rate": 7.012476398779045e-06, + "loss": 0.3282, + "step": 19337 + }, + { + "epoch": 0.38711808422791083, + "grad_norm": 1.8910335302352905, + "learning_rate": 7.012179630275729e-06, + "loss": 0.7693, + "step": 19338 + }, + { + "epoch": 0.3871381027450392, + "grad_norm": 1.1192508935928345, + "learning_rate": 7.011882853313642e-06, + "loss": 0.3125, + "step": 19339 + }, + { + "epoch": 0.38715812126216753, + "grad_norm": 1.0227516889572144, + "learning_rate": 7.011586067894032e-06, + "loss": 0.2461, + "step": 19340 + }, + { + "epoch": 0.3871781397792958, + "grad_norm": 1.1067943572998047, + "learning_rate": 7.011289274018147e-06, + "loss": 0.3096, + "step": 19341 + }, + { + "epoch": 0.3871981582964242, + "grad_norm": 1.085221529006958, + "learning_rate": 7.0109924716872366e-06, + "loss": 0.3215, + "step": 19342 + }, + { + "epoch": 0.38721817681355253, + "grad_norm": 1.1110337972640991, + "learning_rate": 7.0106956609025445e-06, + "loss": 0.3245, + "step": 19343 + }, + { + "epoch": 0.3872381953306809, + "grad_norm": 1.098831057548523, + "learning_rate": 7.0103988416653225e-06, + "loss": 0.3246, + "step": 19344 + }, + { + "epoch": 0.38725821384780923, + "grad_norm": 1.043535828590393, + "learning_rate": 7.010102013976815e-06, + "loss": 0.2973, + "step": 19345 + }, + { + "epoch": 0.3872782323649376, + "grad_norm": 1.0408045053482056, + "learning_rate": 7.009805177838271e-06, + "loss": 0.298, + "step": 19346 + }, + { + "epoch": 0.38729825088206593, + "grad_norm": 1.9243887662887573, + "learning_rate": 7.00950833325094e-06, + "loss": 0.8256, + "step": 19347 + }, + { + "epoch": 0.3873182693991943, + "grad_norm": 1.1112934350967407, + "learning_rate": 7.009211480216067e-06, + "loss": 0.3447, + "step": 19348 + }, + { + "epoch": 0.3873382879163226, + "grad_norm": 1.1402686834335327, + "learning_rate": 7.008914618734903e-06, + "loss": 0.3061, + "step": 19349 + }, + { + "epoch": 0.3873583064334509, + "grad_norm": 1.1070502996444702, + "learning_rate": 7.0086177488086925e-06, + "loss": 0.3094, + "step": 19350 + }, + { + "epoch": 0.3873783249505793, + "grad_norm": 1.4234066009521484, + "learning_rate": 7.008320870438686e-06, + "loss": 0.319, + "step": 19351 + }, + { + "epoch": 0.38739834346770763, + "grad_norm": 2.084665298461914, + "learning_rate": 7.008023983626131e-06, + "loss": 0.7811, + "step": 19352 + }, + { + "epoch": 0.387418361984836, + "grad_norm": 1.3661489486694336, + "learning_rate": 7.007727088372273e-06, + "loss": 0.3129, + "step": 19353 + }, + { + "epoch": 0.38743838050196433, + "grad_norm": 1.142111897468567, + "learning_rate": 7.007430184678367e-06, + "loss": 0.3261, + "step": 19354 + }, + { + "epoch": 0.3874583990190927, + "grad_norm": 1.0728257894515991, + "learning_rate": 7.007133272545652e-06, + "loss": 0.2629, + "step": 19355 + }, + { + "epoch": 0.38747841753622103, + "grad_norm": 1.0633670091629028, + "learning_rate": 7.006836351975384e-06, + "loss": 0.3191, + "step": 19356 + }, + { + "epoch": 0.3874984360533493, + "grad_norm": 1.0217440128326416, + "learning_rate": 7.006539422968806e-06, + "loss": 0.3106, + "step": 19357 + }, + { + "epoch": 0.3875184545704777, + "grad_norm": 1.268466830253601, + "learning_rate": 7.0062424855271685e-06, + "loss": 0.3526, + "step": 19358 + }, + { + "epoch": 0.38753847308760603, + "grad_norm": 1.0265182256698608, + "learning_rate": 7.005945539651718e-06, + "loss": 0.3221, + "step": 19359 + }, + { + "epoch": 0.3875584916047344, + "grad_norm": 2.0097110271453857, + "learning_rate": 7.005648585343706e-06, + "loss": 0.8761, + "step": 19360 + }, + { + "epoch": 0.38757851012186273, + "grad_norm": 1.1189593076705933, + "learning_rate": 7.005351622604378e-06, + "loss": 0.3346, + "step": 19361 + }, + { + "epoch": 0.3875985286389911, + "grad_norm": 1.0007483959197998, + "learning_rate": 7.0050546514349844e-06, + "loss": 0.331, + "step": 19362 + }, + { + "epoch": 0.38761854715611943, + "grad_norm": 1.118728756904602, + "learning_rate": 7.004757671836772e-06, + "loss": 0.3144, + "step": 19363 + }, + { + "epoch": 0.3876385656732478, + "grad_norm": 1.2492473125457764, + "learning_rate": 7.004460683810989e-06, + "loss": 0.3109, + "step": 19364 + }, + { + "epoch": 0.3876585841903761, + "grad_norm": 1.128411054611206, + "learning_rate": 7.004163687358886e-06, + "loss": 0.3231, + "step": 19365 + }, + { + "epoch": 0.3876786027075044, + "grad_norm": 1.1520479917526245, + "learning_rate": 7.0038666824817085e-06, + "loss": 0.307, + "step": 19366 + }, + { + "epoch": 0.3876986212246328, + "grad_norm": 1.774306058883667, + "learning_rate": 7.003569669180709e-06, + "loss": 0.8186, + "step": 19367 + }, + { + "epoch": 0.38771863974176113, + "grad_norm": 1.0902827978134155, + "learning_rate": 7.003272647457131e-06, + "loss": 0.2921, + "step": 19368 + }, + { + "epoch": 0.3877386582588895, + "grad_norm": 1.1841635704040527, + "learning_rate": 7.002975617312229e-06, + "loss": 0.3617, + "step": 19369 + }, + { + "epoch": 0.38775867677601783, + "grad_norm": 1.0859241485595703, + "learning_rate": 7.0026785787472464e-06, + "loss": 0.2966, + "step": 19370 + }, + { + "epoch": 0.3877786952931462, + "grad_norm": 1.195873737335205, + "learning_rate": 7.0023815317634334e-06, + "loss": 0.3104, + "step": 19371 + }, + { + "epoch": 0.38779871381027453, + "grad_norm": 1.1123398542404175, + "learning_rate": 7.00208447636204e-06, + "loss": 0.3548, + "step": 19372 + }, + { + "epoch": 0.3878187323274028, + "grad_norm": 1.1195945739746094, + "learning_rate": 7.001787412544316e-06, + "loss": 0.4047, + "step": 19373 + }, + { + "epoch": 0.3878387508445312, + "grad_norm": 1.3551216125488281, + "learning_rate": 7.0014903403115064e-06, + "loss": 0.3563, + "step": 19374 + }, + { + "epoch": 0.38785876936165953, + "grad_norm": 1.7888023853302002, + "learning_rate": 7.0011932596648635e-06, + "loss": 0.7798, + "step": 19375 + }, + { + "epoch": 0.3878787878787879, + "grad_norm": 1.195428729057312, + "learning_rate": 7.000896170605636e-06, + "loss": 0.3333, + "step": 19376 + }, + { + "epoch": 0.38789880639591623, + "grad_norm": 1.1560566425323486, + "learning_rate": 7.000599073135068e-06, + "loss": 0.2727, + "step": 19377 + }, + { + "epoch": 0.3879188249130446, + "grad_norm": 1.1025464534759521, + "learning_rate": 7.0003019672544146e-06, + "loss": 0.3546, + "step": 19378 + }, + { + "epoch": 0.38793884343017293, + "grad_norm": 1.0749075412750244, + "learning_rate": 7.0000048529649214e-06, + "loss": 0.3111, + "step": 19379 + }, + { + "epoch": 0.3879588619473013, + "grad_norm": 1.0680099725723267, + "learning_rate": 6.9997077302678386e-06, + "loss": 0.3646, + "step": 19380 + }, + { + "epoch": 0.3879788804644296, + "grad_norm": 1.1019744873046875, + "learning_rate": 6.999410599164414e-06, + "loss": 0.3247, + "step": 19381 + }, + { + "epoch": 0.3879988989815579, + "grad_norm": 1.04410719871521, + "learning_rate": 6.9991134596558985e-06, + "loss": 0.2922, + "step": 19382 + }, + { + "epoch": 0.3880189174986863, + "grad_norm": 1.1478811502456665, + "learning_rate": 6.998816311743539e-06, + "loss": 0.356, + "step": 19383 + }, + { + "epoch": 0.38803893601581463, + "grad_norm": 1.3131194114685059, + "learning_rate": 6.998519155428587e-06, + "loss": 0.3389, + "step": 19384 + }, + { + "epoch": 0.388058954532943, + "grad_norm": 1.0589252710342407, + "learning_rate": 6.998221990712291e-06, + "loss": 0.3032, + "step": 19385 + }, + { + "epoch": 0.38807897305007133, + "grad_norm": 1.7980289459228516, + "learning_rate": 6.997924817595898e-06, + "loss": 0.8788, + "step": 19386 + }, + { + "epoch": 0.3880989915671997, + "grad_norm": 1.1158902645111084, + "learning_rate": 6.997627636080662e-06, + "loss": 0.3059, + "step": 19387 + }, + { + "epoch": 0.38811901008432803, + "grad_norm": 1.0299707651138306, + "learning_rate": 6.997330446167826e-06, + "loss": 0.3001, + "step": 19388 + }, + { + "epoch": 0.3881390286014563, + "grad_norm": 1.1835641860961914, + "learning_rate": 6.997033247858644e-06, + "loss": 0.3963, + "step": 19389 + }, + { + "epoch": 0.3881590471185847, + "grad_norm": 1.146996259689331, + "learning_rate": 6.996736041154364e-06, + "loss": 0.3131, + "step": 19390 + }, + { + "epoch": 0.388179065635713, + "grad_norm": 1.107278823852539, + "learning_rate": 6.996438826056236e-06, + "loss": 0.3231, + "step": 19391 + }, + { + "epoch": 0.3881990841528414, + "grad_norm": 1.110811471939087, + "learning_rate": 6.996141602565507e-06, + "loss": 0.2954, + "step": 19392 + }, + { + "epoch": 0.38821910266996973, + "grad_norm": 1.0300400257110596, + "learning_rate": 6.995844370683431e-06, + "loss": 0.3316, + "step": 19393 + }, + { + "epoch": 0.3882391211870981, + "grad_norm": 1.1539198160171509, + "learning_rate": 6.995547130411252e-06, + "loss": 0.3606, + "step": 19394 + }, + { + "epoch": 0.38825913970422643, + "grad_norm": 1.1044288873672485, + "learning_rate": 6.995249881750224e-06, + "loss": 0.2725, + "step": 19395 + }, + { + "epoch": 0.3882791582213547, + "grad_norm": 1.017406940460205, + "learning_rate": 6.994952624701594e-06, + "loss": 0.3084, + "step": 19396 + }, + { + "epoch": 0.3882991767384831, + "grad_norm": 1.1060118675231934, + "learning_rate": 6.9946553592666134e-06, + "loss": 0.2815, + "step": 19397 + }, + { + "epoch": 0.3883191952556114, + "grad_norm": 1.1207789182662964, + "learning_rate": 6.994358085446531e-06, + "loss": 0.3431, + "step": 19398 + }, + { + "epoch": 0.3883392137727398, + "grad_norm": 1.2061758041381836, + "learning_rate": 6.994060803242595e-06, + "loss": 0.3559, + "step": 19399 + }, + { + "epoch": 0.38835923228986813, + "grad_norm": 1.2681680917739868, + "learning_rate": 6.993763512656058e-06, + "loss": 0.3172, + "step": 19400 + }, + { + "epoch": 0.3883792508069965, + "grad_norm": 1.1823209524154663, + "learning_rate": 6.993466213688167e-06, + "loss": 0.3808, + "step": 19401 + }, + { + "epoch": 0.38839926932412483, + "grad_norm": 1.8348904848098755, + "learning_rate": 6.993168906340175e-06, + "loss": 0.8394, + "step": 19402 + }, + { + "epoch": 0.3884192878412532, + "grad_norm": 1.2290023565292358, + "learning_rate": 6.992871590613328e-06, + "loss": 0.3405, + "step": 19403 + }, + { + "epoch": 0.3884393063583815, + "grad_norm": 1.2313196659088135, + "learning_rate": 6.992574266508878e-06, + "loss": 0.3659, + "step": 19404 + }, + { + "epoch": 0.3884593248755098, + "grad_norm": 1.1334726810455322, + "learning_rate": 6.9922769340280736e-06, + "loss": 0.3519, + "step": 19405 + }, + { + "epoch": 0.3884793433926382, + "grad_norm": 1.0623443126678467, + "learning_rate": 6.991979593172168e-06, + "loss": 0.3324, + "step": 19406 + }, + { + "epoch": 0.3884993619097665, + "grad_norm": 1.1390889883041382, + "learning_rate": 6.991682243942408e-06, + "loss": 0.3278, + "step": 19407 + }, + { + "epoch": 0.3885193804268949, + "grad_norm": 1.0845192670822144, + "learning_rate": 6.991384886340045e-06, + "loss": 0.3522, + "step": 19408 + }, + { + "epoch": 0.38853939894402323, + "grad_norm": 1.3535724878311157, + "learning_rate": 6.991087520366327e-06, + "loss": 0.3283, + "step": 19409 + }, + { + "epoch": 0.3885594174611516, + "grad_norm": 1.061133861541748, + "learning_rate": 6.990790146022506e-06, + "loss": 0.2949, + "step": 19410 + }, + { + "epoch": 0.38857943597827993, + "grad_norm": 1.1769975423812866, + "learning_rate": 6.9904927633098315e-06, + "loss": 0.4052, + "step": 19411 + }, + { + "epoch": 0.3885994544954082, + "grad_norm": 1.0773013830184937, + "learning_rate": 6.9901953722295546e-06, + "loss": 0.3163, + "step": 19412 + }, + { + "epoch": 0.3886194730125366, + "grad_norm": 1.0853995084762573, + "learning_rate": 6.989897972782924e-06, + "loss": 0.3396, + "step": 19413 + }, + { + "epoch": 0.3886394915296649, + "grad_norm": 1.05194091796875, + "learning_rate": 6.989600564971191e-06, + "loss": 0.3265, + "step": 19414 + }, + { + "epoch": 0.3886595100467933, + "grad_norm": 1.0259361267089844, + "learning_rate": 6.9893031487956055e-06, + "loss": 0.2869, + "step": 19415 + }, + { + "epoch": 0.38867952856392163, + "grad_norm": 1.1222699880599976, + "learning_rate": 6.989005724257417e-06, + "loss": 0.3232, + "step": 19416 + }, + { + "epoch": 0.38869954708105, + "grad_norm": 1.1133569478988647, + "learning_rate": 6.9887082913578776e-06, + "loss": 0.3182, + "step": 19417 + }, + { + "epoch": 0.38871956559817833, + "grad_norm": 1.0208561420440674, + "learning_rate": 6.988410850098236e-06, + "loss": 0.3042, + "step": 19418 + }, + { + "epoch": 0.3887395841153067, + "grad_norm": 1.9161810874938965, + "learning_rate": 6.988113400479742e-06, + "loss": 0.7498, + "step": 19419 + }, + { + "epoch": 0.388759602632435, + "grad_norm": 0.9564628005027771, + "learning_rate": 6.987815942503648e-06, + "loss": 0.2781, + "step": 19420 + }, + { + "epoch": 0.3887796211495633, + "grad_norm": 1.0886729955673218, + "learning_rate": 6.987518476171204e-06, + "loss": 0.3437, + "step": 19421 + }, + { + "epoch": 0.3887996396666917, + "grad_norm": 1.251123309135437, + "learning_rate": 6.987221001483659e-06, + "loss": 0.3888, + "step": 19422 + }, + { + "epoch": 0.38881965818382, + "grad_norm": 1.199800968170166, + "learning_rate": 6.986923518442265e-06, + "loss": 0.3202, + "step": 19423 + }, + { + "epoch": 0.3888396767009484, + "grad_norm": 1.1635576486587524, + "learning_rate": 6.986626027048273e-06, + "loss": 0.2887, + "step": 19424 + }, + { + "epoch": 0.38885969521807673, + "grad_norm": 2.065829277038574, + "learning_rate": 6.986328527302931e-06, + "loss": 0.7679, + "step": 19425 + }, + { + "epoch": 0.3888797137352051, + "grad_norm": 1.9454318284988403, + "learning_rate": 6.986031019207493e-06, + "loss": 0.7722, + "step": 19426 + }, + { + "epoch": 0.38889973225233343, + "grad_norm": 1.1567165851593018, + "learning_rate": 6.985733502763208e-06, + "loss": 0.2916, + "step": 19427 + }, + { + "epoch": 0.3889197507694617, + "grad_norm": 1.8272082805633545, + "learning_rate": 6.985435977971326e-06, + "loss": 0.7804, + "step": 19428 + }, + { + "epoch": 0.3889397692865901, + "grad_norm": 1.190986156463623, + "learning_rate": 6.985138444833097e-06, + "loss": 0.2978, + "step": 19429 + }, + { + "epoch": 0.3889597878037184, + "grad_norm": 1.1306216716766357, + "learning_rate": 6.984840903349776e-06, + "loss": 0.3089, + "step": 19430 + }, + { + "epoch": 0.3889798063208468, + "grad_norm": 1.1606512069702148, + "learning_rate": 6.984543353522609e-06, + "loss": 0.3413, + "step": 19431 + }, + { + "epoch": 0.38899982483797513, + "grad_norm": 1.1715874671936035, + "learning_rate": 6.984245795352849e-06, + "loss": 0.3499, + "step": 19432 + }, + { + "epoch": 0.3890198433551035, + "grad_norm": 1.1810067892074585, + "learning_rate": 6.9839482288417485e-06, + "loss": 0.3106, + "step": 19433 + }, + { + "epoch": 0.38903986187223183, + "grad_norm": 1.1573989391326904, + "learning_rate": 6.983650653990554e-06, + "loss": 0.336, + "step": 19434 + }, + { + "epoch": 0.3890598803893602, + "grad_norm": 1.0988192558288574, + "learning_rate": 6.98335307080052e-06, + "loss": 0.2584, + "step": 19435 + }, + { + "epoch": 0.3890798989064885, + "grad_norm": 1.0139636993408203, + "learning_rate": 6.983055479272896e-06, + "loss": 0.2813, + "step": 19436 + }, + { + "epoch": 0.3890999174236168, + "grad_norm": 1.1956415176391602, + "learning_rate": 6.982757879408935e-06, + "loss": 0.3232, + "step": 19437 + }, + { + "epoch": 0.3891199359407452, + "grad_norm": 1.2828572988510132, + "learning_rate": 6.982460271209886e-06, + "loss": 0.3263, + "step": 19438 + }, + { + "epoch": 0.3891399544578735, + "grad_norm": 1.0686436891555786, + "learning_rate": 6.9821626546770005e-06, + "loss": 0.3216, + "step": 19439 + }, + { + "epoch": 0.3891599729750019, + "grad_norm": 1.2068464756011963, + "learning_rate": 6.9818650298115305e-06, + "loss": 0.3886, + "step": 19440 + }, + { + "epoch": 0.38917999149213023, + "grad_norm": 1.011230230331421, + "learning_rate": 6.981567396614726e-06, + "loss": 0.309, + "step": 19441 + }, + { + "epoch": 0.3892000100092586, + "grad_norm": 1.1154940128326416, + "learning_rate": 6.981269755087837e-06, + "loss": 0.3119, + "step": 19442 + }, + { + "epoch": 0.38922002852638693, + "grad_norm": 1.1127564907073975, + "learning_rate": 6.980972105232118e-06, + "loss": 0.2757, + "step": 19443 + }, + { + "epoch": 0.3892400470435152, + "grad_norm": 1.0826013088226318, + "learning_rate": 6.980674447048819e-06, + "loss": 0.3227, + "step": 19444 + }, + { + "epoch": 0.3892600655606436, + "grad_norm": 1.1893121004104614, + "learning_rate": 6.980376780539188e-06, + "loss": 0.3492, + "step": 19445 + }, + { + "epoch": 0.3892800840777719, + "grad_norm": 1.051304817199707, + "learning_rate": 6.980079105704483e-06, + "loss": 0.3027, + "step": 19446 + }, + { + "epoch": 0.3893001025949003, + "grad_norm": 1.1257878541946411, + "learning_rate": 6.979781422545949e-06, + "loss": 0.3406, + "step": 19447 + }, + { + "epoch": 0.3893201211120286, + "grad_norm": 1.2965106964111328, + "learning_rate": 6.97948373106484e-06, + "loss": 0.2997, + "step": 19448 + }, + { + "epoch": 0.389340139629157, + "grad_norm": 1.0291119813919067, + "learning_rate": 6.979186031262409e-06, + "loss": 0.2494, + "step": 19449 + }, + { + "epoch": 0.38936015814628533, + "grad_norm": 1.0643502473831177, + "learning_rate": 6.978888323139905e-06, + "loss": 0.3415, + "step": 19450 + }, + { + "epoch": 0.3893801766634137, + "grad_norm": 1.108544111251831, + "learning_rate": 6.978590606698581e-06, + "loss": 0.3268, + "step": 19451 + }, + { + "epoch": 0.389400195180542, + "grad_norm": 1.1523233652114868, + "learning_rate": 6.978292881939687e-06, + "loss": 0.3005, + "step": 19452 + }, + { + "epoch": 0.3894202136976703, + "grad_norm": 1.014710545539856, + "learning_rate": 6.977995148864476e-06, + "loss": 0.2925, + "step": 19453 + }, + { + "epoch": 0.3894402322147987, + "grad_norm": 1.136771559715271, + "learning_rate": 6.977697407474198e-06, + "loss": 0.3461, + "step": 19454 + }, + { + "epoch": 0.389460250731927, + "grad_norm": 1.1951336860656738, + "learning_rate": 6.9773996577701065e-06, + "loss": 0.3177, + "step": 19455 + }, + { + "epoch": 0.3894802692490554, + "grad_norm": 2.0091488361358643, + "learning_rate": 6.977101899753453e-06, + "loss": 0.7757, + "step": 19456 + }, + { + "epoch": 0.38950028776618373, + "grad_norm": 1.237280011177063, + "learning_rate": 6.976804133425488e-06, + "loss": 0.356, + "step": 19457 + }, + { + "epoch": 0.3895203062833121, + "grad_norm": 1.1891212463378906, + "learning_rate": 6.976506358787462e-06, + "loss": 0.3263, + "step": 19458 + }, + { + "epoch": 0.38954032480044043, + "grad_norm": 1.0692278146743774, + "learning_rate": 6.976208575840632e-06, + "loss": 0.3112, + "step": 19459 + }, + { + "epoch": 0.3895603433175687, + "grad_norm": 1.1870970726013184, + "learning_rate": 6.975910784586245e-06, + "loss": 0.3243, + "step": 19460 + }, + { + "epoch": 0.3895803618346971, + "grad_norm": 1.1370341777801514, + "learning_rate": 6.975612985025553e-06, + "loss": 0.3169, + "step": 19461 + }, + { + "epoch": 0.3896003803518254, + "grad_norm": 1.2897820472717285, + "learning_rate": 6.975315177159811e-06, + "loss": 0.3612, + "step": 19462 + }, + { + "epoch": 0.3896203988689538, + "grad_norm": 1.0313609838485718, + "learning_rate": 6.975017360990268e-06, + "loss": 0.3226, + "step": 19463 + }, + { + "epoch": 0.3896404173860821, + "grad_norm": 1.0916887521743774, + "learning_rate": 6.9747195365181775e-06, + "loss": 0.3335, + "step": 19464 + }, + { + "epoch": 0.3896604359032105, + "grad_norm": 1.0596257448196411, + "learning_rate": 6.974421703744791e-06, + "loss": 0.3564, + "step": 19465 + }, + { + "epoch": 0.38968045442033883, + "grad_norm": 1.1137938499450684, + "learning_rate": 6.97412386267136e-06, + "loss": 0.3189, + "step": 19466 + }, + { + "epoch": 0.3897004729374672, + "grad_norm": 1.1281185150146484, + "learning_rate": 6.9738260132991384e-06, + "loss": 0.3281, + "step": 19467 + }, + { + "epoch": 0.3897204914545955, + "grad_norm": 1.9425104856491089, + "learning_rate": 6.973528155629375e-06, + "loss": 0.8272, + "step": 19468 + }, + { + "epoch": 0.3897405099717238, + "grad_norm": 1.100528597831726, + "learning_rate": 6.973230289663326e-06, + "loss": 0.2996, + "step": 19469 + }, + { + "epoch": 0.3897605284888522, + "grad_norm": 1.1328332424163818, + "learning_rate": 6.97293241540224e-06, + "loss": 0.3438, + "step": 19470 + }, + { + "epoch": 0.3897805470059805, + "grad_norm": 1.0851972103118896, + "learning_rate": 6.972634532847372e-06, + "loss": 0.3252, + "step": 19471 + }, + { + "epoch": 0.3898005655231089, + "grad_norm": 1.0964808464050293, + "learning_rate": 6.972336641999973e-06, + "loss": 0.3672, + "step": 19472 + }, + { + "epoch": 0.38982058404023723, + "grad_norm": 1.2013170719146729, + "learning_rate": 6.972038742861295e-06, + "loss": 0.3503, + "step": 19473 + }, + { + "epoch": 0.3898406025573656, + "grad_norm": 1.1990293264389038, + "learning_rate": 6.971740835432588e-06, + "loss": 0.33, + "step": 19474 + }, + { + "epoch": 0.38986062107449393, + "grad_norm": 1.0993139743804932, + "learning_rate": 6.97144291971511e-06, + "loss": 0.315, + "step": 19475 + }, + { + "epoch": 0.3898806395916222, + "grad_norm": 1.1923187971115112, + "learning_rate": 6.971144995710108e-06, + "loss": 0.3076, + "step": 19476 + }, + { + "epoch": 0.3899006581087506, + "grad_norm": 1.0824663639068604, + "learning_rate": 6.970847063418839e-06, + "loss": 0.3008, + "step": 19477 + }, + { + "epoch": 0.3899206766258789, + "grad_norm": 1.9564578533172607, + "learning_rate": 6.970549122842552e-06, + "loss": 0.782, + "step": 19478 + }, + { + "epoch": 0.3899406951430073, + "grad_norm": 1.3572889566421509, + "learning_rate": 6.9702511739824996e-06, + "loss": 0.3645, + "step": 19479 + }, + { + "epoch": 0.3899607136601356, + "grad_norm": 1.0457326173782349, + "learning_rate": 6.969953216839936e-06, + "loss": 0.3333, + "step": 19480 + }, + { + "epoch": 0.389980732177264, + "grad_norm": 1.0186352729797363, + "learning_rate": 6.969655251416113e-06, + "loss": 0.2972, + "step": 19481 + }, + { + "epoch": 0.39000075069439233, + "grad_norm": 1.0923128128051758, + "learning_rate": 6.969357277712283e-06, + "loss": 0.3374, + "step": 19482 + }, + { + "epoch": 0.3900207692115207, + "grad_norm": 1.1811208724975586, + "learning_rate": 6.969059295729699e-06, + "loss": 0.3106, + "step": 19483 + }, + { + "epoch": 0.390040787728649, + "grad_norm": 1.068338394165039, + "learning_rate": 6.968761305469614e-06, + "loss": 0.2924, + "step": 19484 + }, + { + "epoch": 0.3900608062457773, + "grad_norm": 1.1692559719085693, + "learning_rate": 6.9684633069332795e-06, + "loss": 0.3328, + "step": 19485 + }, + { + "epoch": 0.3900808247629057, + "grad_norm": 1.7850940227508545, + "learning_rate": 6.968165300121949e-06, + "loss": 0.7764, + "step": 19486 + }, + { + "epoch": 0.390100843280034, + "grad_norm": 1.0568466186523438, + "learning_rate": 6.967867285036874e-06, + "loss": 0.3084, + "step": 19487 + }, + { + "epoch": 0.3901208617971624, + "grad_norm": 1.1765353679656982, + "learning_rate": 6.967569261679311e-06, + "loss": 0.3299, + "step": 19488 + }, + { + "epoch": 0.39014088031429073, + "grad_norm": 1.1300814151763916, + "learning_rate": 6.967271230050509e-06, + "loss": 0.3126, + "step": 19489 + }, + { + "epoch": 0.3901608988314191, + "grad_norm": 1.1712700128555298, + "learning_rate": 6.966973190151722e-06, + "loss": 0.2846, + "step": 19490 + }, + { + "epoch": 0.39018091734854743, + "grad_norm": 1.2938400506973267, + "learning_rate": 6.966675141984204e-06, + "loss": 0.3307, + "step": 19491 + }, + { + "epoch": 0.3902009358656757, + "grad_norm": 1.1396855115890503, + "learning_rate": 6.966377085549205e-06, + "loss": 0.3258, + "step": 19492 + }, + { + "epoch": 0.3902209543828041, + "grad_norm": 1.001610279083252, + "learning_rate": 6.96607902084798e-06, + "loss": 0.3011, + "step": 19493 + }, + { + "epoch": 0.3902409728999324, + "grad_norm": 1.0512219667434692, + "learning_rate": 6.965780947881783e-06, + "loss": 0.3398, + "step": 19494 + }, + { + "epoch": 0.3902609914170608, + "grad_norm": 0.9985180497169495, + "learning_rate": 6.9654828666518655e-06, + "loss": 0.3175, + "step": 19495 + }, + { + "epoch": 0.3902810099341891, + "grad_norm": 1.2343345880508423, + "learning_rate": 6.965184777159482e-06, + "loss": 0.3685, + "step": 19496 + }, + { + "epoch": 0.3903010284513175, + "grad_norm": 1.1674915552139282, + "learning_rate": 6.9648866794058834e-06, + "loss": 0.3369, + "step": 19497 + }, + { + "epoch": 0.39032104696844583, + "grad_norm": 1.8127717971801758, + "learning_rate": 6.964588573392324e-06, + "loss": 0.7722, + "step": 19498 + }, + { + "epoch": 0.3903410654855742, + "grad_norm": 1.2339091300964355, + "learning_rate": 6.964290459120057e-06, + "loss": 0.3747, + "step": 19499 + }, + { + "epoch": 0.3903610840027025, + "grad_norm": 0.9354884028434753, + "learning_rate": 6.963992336590337e-06, + "loss": 0.2899, + "step": 19500 + }, + { + "epoch": 0.3903811025198308, + "grad_norm": 1.1222343444824219, + "learning_rate": 6.9636942058044145e-06, + "loss": 0.2997, + "step": 19501 + }, + { + "epoch": 0.3904011210369592, + "grad_norm": 1.0110878944396973, + "learning_rate": 6.963396066763544e-06, + "loss": 0.266, + "step": 19502 + }, + { + "epoch": 0.3904211395540875, + "grad_norm": 1.1175475120544434, + "learning_rate": 6.963097919468979e-06, + "loss": 0.3735, + "step": 19503 + }, + { + "epoch": 0.3904411580712159, + "grad_norm": 1.2005637884140015, + "learning_rate": 6.962799763921975e-06, + "loss": 0.3472, + "step": 19504 + }, + { + "epoch": 0.3904611765883442, + "grad_norm": 1.41048002243042, + "learning_rate": 6.962501600123781e-06, + "loss": 0.3238, + "step": 19505 + }, + { + "epoch": 0.3904811951054726, + "grad_norm": 1.1284457445144653, + "learning_rate": 6.962203428075654e-06, + "loss": 0.3496, + "step": 19506 + }, + { + "epoch": 0.39050121362260093, + "grad_norm": 1.0192620754241943, + "learning_rate": 6.961905247778844e-06, + "loss": 0.2961, + "step": 19507 + }, + { + "epoch": 0.3905212321397292, + "grad_norm": 1.0509153604507446, + "learning_rate": 6.9616070592346085e-06, + "loss": 0.3618, + "step": 19508 + }, + { + "epoch": 0.3905412506568576, + "grad_norm": 1.081161618232727, + "learning_rate": 6.961308862444198e-06, + "loss": 0.3146, + "step": 19509 + }, + { + "epoch": 0.3905612691739859, + "grad_norm": 1.1087970733642578, + "learning_rate": 6.961010657408868e-06, + "loss": 0.3062, + "step": 19510 + }, + { + "epoch": 0.3905812876911143, + "grad_norm": 1.0205538272857666, + "learning_rate": 6.960712444129871e-06, + "loss": 0.2994, + "step": 19511 + }, + { + "epoch": 0.3906013062082426, + "grad_norm": 0.9963564872741699, + "learning_rate": 6.960414222608461e-06, + "loss": 0.2779, + "step": 19512 + }, + { + "epoch": 0.390621324725371, + "grad_norm": 1.060867190361023, + "learning_rate": 6.9601159928458895e-06, + "loss": 0.2869, + "step": 19513 + }, + { + "epoch": 0.39064134324249933, + "grad_norm": 1.1241488456726074, + "learning_rate": 6.959817754843414e-06, + "loss": 0.3384, + "step": 19514 + }, + { + "epoch": 0.3906613617596277, + "grad_norm": 1.091005802154541, + "learning_rate": 6.959519508602286e-06, + "loss": 0.318, + "step": 19515 + }, + { + "epoch": 0.390681380276756, + "grad_norm": 1.0933287143707275, + "learning_rate": 6.9592212541237604e-06, + "loss": 0.3596, + "step": 19516 + }, + { + "epoch": 0.3907013987938843, + "grad_norm": 1.1910486221313477, + "learning_rate": 6.9589229914090894e-06, + "loss": 0.3433, + "step": 19517 + }, + { + "epoch": 0.3907214173110127, + "grad_norm": 1.079140543937683, + "learning_rate": 6.958624720459528e-06, + "loss": 0.3123, + "step": 19518 + }, + { + "epoch": 0.390741435828141, + "grad_norm": 1.1604496240615845, + "learning_rate": 6.958326441276329e-06, + "loss": 0.3252, + "step": 19519 + }, + { + "epoch": 0.3907614543452694, + "grad_norm": 1.123535394668579, + "learning_rate": 6.958028153860747e-06, + "loss": 0.2914, + "step": 19520 + }, + { + "epoch": 0.3907814728623977, + "grad_norm": 1.0319730043411255, + "learning_rate": 6.957729858214038e-06, + "loss": 0.3411, + "step": 19521 + }, + { + "epoch": 0.3908014913795261, + "grad_norm": 1.1250101327896118, + "learning_rate": 6.957431554337452e-06, + "loss": 0.3203, + "step": 19522 + }, + { + "epoch": 0.39082150989665443, + "grad_norm": 1.1331075429916382, + "learning_rate": 6.957133242232247e-06, + "loss": 0.2802, + "step": 19523 + }, + { + "epoch": 0.3908415284137827, + "grad_norm": 0.9925497174263, + "learning_rate": 6.956834921899674e-06, + "loss": 0.2787, + "step": 19524 + }, + { + "epoch": 0.3908615469309111, + "grad_norm": 1.0351133346557617, + "learning_rate": 6.956536593340988e-06, + "loss": 0.3425, + "step": 19525 + }, + { + "epoch": 0.3908815654480394, + "grad_norm": 1.2669185400009155, + "learning_rate": 6.956238256557443e-06, + "loss": 0.3226, + "step": 19526 + }, + { + "epoch": 0.3909015839651678, + "grad_norm": 0.9662792682647705, + "learning_rate": 6.955939911550292e-06, + "loss": 0.3151, + "step": 19527 + }, + { + "epoch": 0.3909216024822961, + "grad_norm": 1.1432493925094604, + "learning_rate": 6.9556415583207935e-06, + "loss": 0.2984, + "step": 19528 + }, + { + "epoch": 0.3909416209994245, + "grad_norm": 1.0744653940200806, + "learning_rate": 6.9553431968701965e-06, + "loss": 0.3007, + "step": 19529 + }, + { + "epoch": 0.39096163951655283, + "grad_norm": 0.9814329147338867, + "learning_rate": 6.95504482719976e-06, + "loss": 0.2964, + "step": 19530 + }, + { + "epoch": 0.3909816580336812, + "grad_norm": 1.142813801765442, + "learning_rate": 6.954746449310732e-06, + "loss": 0.2662, + "step": 19531 + }, + { + "epoch": 0.3910016765508095, + "grad_norm": 1.0256787538528442, + "learning_rate": 6.954448063204373e-06, + "loss": 0.3368, + "step": 19532 + }, + { + "epoch": 0.3910216950679378, + "grad_norm": 2.033734083175659, + "learning_rate": 6.954149668881935e-06, + "loss": 0.7596, + "step": 19533 + }, + { + "epoch": 0.3910417135850662, + "grad_norm": 1.1116132736206055, + "learning_rate": 6.953851266344671e-06, + "loss": 0.3015, + "step": 19534 + }, + { + "epoch": 0.3910617321021945, + "grad_norm": 1.2930351495742798, + "learning_rate": 6.953552855593839e-06, + "loss": 0.3385, + "step": 19535 + }, + { + "epoch": 0.3910817506193229, + "grad_norm": 0.9944398403167725, + "learning_rate": 6.95325443663069e-06, + "loss": 0.286, + "step": 19536 + }, + { + "epoch": 0.3911017691364512, + "grad_norm": 1.9166674613952637, + "learning_rate": 6.95295600945648e-06, + "loss": 0.8182, + "step": 19537 + }, + { + "epoch": 0.3911217876535796, + "grad_norm": 1.1040050983428955, + "learning_rate": 6.952657574072462e-06, + "loss": 0.3629, + "step": 19538 + }, + { + "epoch": 0.39114180617070793, + "grad_norm": 1.2582521438598633, + "learning_rate": 6.952359130479892e-06, + "loss": 0.3094, + "step": 19539 + }, + { + "epoch": 0.3911618246878362, + "grad_norm": 1.1716874837875366, + "learning_rate": 6.952060678680025e-06, + "loss": 0.3239, + "step": 19540 + }, + { + "epoch": 0.3911818432049646, + "grad_norm": 1.042592167854309, + "learning_rate": 6.9517622186741154e-06, + "loss": 0.3079, + "step": 19541 + }, + { + "epoch": 0.3912018617220929, + "grad_norm": 1.016843318939209, + "learning_rate": 6.951463750463417e-06, + "loss": 0.2936, + "step": 19542 + }, + { + "epoch": 0.3912218802392213, + "grad_norm": 1.1206480264663696, + "learning_rate": 6.951165274049185e-06, + "loss": 0.3251, + "step": 19543 + }, + { + "epoch": 0.3912418987563496, + "grad_norm": 1.903408169746399, + "learning_rate": 6.950866789432673e-06, + "loss": 0.8258, + "step": 19544 + }, + { + "epoch": 0.391261917273478, + "grad_norm": 1.4971847534179688, + "learning_rate": 6.9505682966151375e-06, + "loss": 0.3449, + "step": 19545 + }, + { + "epoch": 0.39128193579060633, + "grad_norm": 0.9851130843162537, + "learning_rate": 6.950269795597834e-06, + "loss": 0.3072, + "step": 19546 + }, + { + "epoch": 0.3913019543077347, + "grad_norm": 1.0572274923324585, + "learning_rate": 6.949971286382014e-06, + "loss": 0.2874, + "step": 19547 + }, + { + "epoch": 0.391321972824863, + "grad_norm": 1.8845518827438354, + "learning_rate": 6.949672768968937e-06, + "loss": 0.839, + "step": 19548 + }, + { + "epoch": 0.3913419913419913, + "grad_norm": 1.8217331171035767, + "learning_rate": 6.949374243359853e-06, + "loss": 0.7915, + "step": 19549 + }, + { + "epoch": 0.3913620098591197, + "grad_norm": 1.1616402864456177, + "learning_rate": 6.94907570955602e-06, + "loss": 0.3562, + "step": 19550 + }, + { + "epoch": 0.391382028376248, + "grad_norm": 1.029410481452942, + "learning_rate": 6.948777167558691e-06, + "loss": 0.3252, + "step": 19551 + }, + { + "epoch": 0.3914020468933764, + "grad_norm": 1.8925414085388184, + "learning_rate": 6.948478617369123e-06, + "loss": 0.8197, + "step": 19552 + }, + { + "epoch": 0.3914220654105047, + "grad_norm": 1.079858660697937, + "learning_rate": 6.948180058988571e-06, + "loss": 0.3048, + "step": 19553 + }, + { + "epoch": 0.3914420839276331, + "grad_norm": 1.0749632120132446, + "learning_rate": 6.947881492418289e-06, + "loss": 0.3513, + "step": 19554 + }, + { + "epoch": 0.39146210244476143, + "grad_norm": 1.1391494274139404, + "learning_rate": 6.947582917659532e-06, + "loss": 0.3126, + "step": 19555 + }, + { + "epoch": 0.3914821209618897, + "grad_norm": 1.0734574794769287, + "learning_rate": 6.9472843347135555e-06, + "loss": 0.3122, + "step": 19556 + }, + { + "epoch": 0.3915021394790181, + "grad_norm": 1.1157572269439697, + "learning_rate": 6.9469857435816145e-06, + "loss": 0.348, + "step": 19557 + }, + { + "epoch": 0.3915221579961464, + "grad_norm": 1.010388731956482, + "learning_rate": 6.9466871442649645e-06, + "loss": 0.2734, + "step": 19558 + }, + { + "epoch": 0.3915421765132748, + "grad_norm": 1.1500377655029297, + "learning_rate": 6.946388536764861e-06, + "loss": 0.2952, + "step": 19559 + }, + { + "epoch": 0.3915621950304031, + "grad_norm": 1.0514070987701416, + "learning_rate": 6.946089921082558e-06, + "loss": 0.3346, + "step": 19560 + }, + { + "epoch": 0.3915822135475315, + "grad_norm": 1.1220998764038086, + "learning_rate": 6.9457912972193135e-06, + "loss": 0.284, + "step": 19561 + }, + { + "epoch": 0.3916022320646598, + "grad_norm": 1.1447725296020508, + "learning_rate": 6.94549266517638e-06, + "loss": 0.3531, + "step": 19562 + }, + { + "epoch": 0.3916222505817882, + "grad_norm": 1.0908710956573486, + "learning_rate": 6.945194024955016e-06, + "loss": 0.3088, + "step": 19563 + }, + { + "epoch": 0.3916422690989165, + "grad_norm": 1.0291842222213745, + "learning_rate": 6.944895376556472e-06, + "loss": 0.307, + "step": 19564 + }, + { + "epoch": 0.3916622876160448, + "grad_norm": 1.119960069656372, + "learning_rate": 6.944596719982006e-06, + "loss": 0.2966, + "step": 19565 + }, + { + "epoch": 0.3916823061331732, + "grad_norm": 1.0488593578338623, + "learning_rate": 6.9442980552328764e-06, + "loss": 0.2774, + "step": 19566 + }, + { + "epoch": 0.3917023246503015, + "grad_norm": 1.0188837051391602, + "learning_rate": 6.9439993823103355e-06, + "loss": 0.308, + "step": 19567 + }, + { + "epoch": 0.3917223431674299, + "grad_norm": 1.1966803073883057, + "learning_rate": 6.94370070121564e-06, + "loss": 0.3082, + "step": 19568 + }, + { + "epoch": 0.3917423616845582, + "grad_norm": 1.249738335609436, + "learning_rate": 6.943402011950043e-06, + "loss": 0.3011, + "step": 19569 + }, + { + "epoch": 0.3917623802016866, + "grad_norm": 2.0562314987182617, + "learning_rate": 6.943103314514803e-06, + "loss": 0.8377, + "step": 19570 + }, + { + "epoch": 0.39178239871881493, + "grad_norm": 1.0715346336364746, + "learning_rate": 6.942804608911176e-06, + "loss": 0.3011, + "step": 19571 + }, + { + "epoch": 0.3918024172359432, + "grad_norm": 1.0509740114212036, + "learning_rate": 6.942505895140417e-06, + "loss": 0.2887, + "step": 19572 + }, + { + "epoch": 0.3918224357530716, + "grad_norm": 1.8853645324707031, + "learning_rate": 6.94220717320378e-06, + "loss": 0.7696, + "step": 19573 + }, + { + "epoch": 0.3918424542701999, + "grad_norm": 1.1921831369400024, + "learning_rate": 6.9419084431025204e-06, + "loss": 0.2898, + "step": 19574 + }, + { + "epoch": 0.3918624727873283, + "grad_norm": 1.092344880104065, + "learning_rate": 6.9416097048379e-06, + "loss": 0.292, + "step": 19575 + }, + { + "epoch": 0.3918824913044566, + "grad_norm": 1.0876597166061401, + "learning_rate": 6.941310958411166e-06, + "loss": 0.3581, + "step": 19576 + }, + { + "epoch": 0.391902509821585, + "grad_norm": 0.984100341796875, + "learning_rate": 6.941012203823579e-06, + "loss": 0.324, + "step": 19577 + }, + { + "epoch": 0.3919225283387133, + "grad_norm": 1.188546895980835, + "learning_rate": 6.940713441076396e-06, + "loss": 0.3364, + "step": 19578 + }, + { + "epoch": 0.3919425468558417, + "grad_norm": 2.069725513458252, + "learning_rate": 6.940414670170871e-06, + "loss": 0.7723, + "step": 19579 + }, + { + "epoch": 0.39196256537297, + "grad_norm": 1.090036392211914, + "learning_rate": 6.94011589110826e-06, + "loss": 0.274, + "step": 19580 + }, + { + "epoch": 0.3919825838900983, + "grad_norm": 1.0275969505310059, + "learning_rate": 6.93981710388982e-06, + "loss": 0.2982, + "step": 19581 + }, + { + "epoch": 0.3920026024072267, + "grad_norm": 1.1068586111068726, + "learning_rate": 6.939518308516805e-06, + "loss": 0.2815, + "step": 19582 + }, + { + "epoch": 0.392022620924355, + "grad_norm": 1.112055778503418, + "learning_rate": 6.939219504990473e-06, + "loss": 0.327, + "step": 19583 + }, + { + "epoch": 0.3920426394414834, + "grad_norm": 0.9698256254196167, + "learning_rate": 6.93892069331208e-06, + "loss": 0.2907, + "step": 19584 + }, + { + "epoch": 0.3920626579586117, + "grad_norm": 1.122518539428711, + "learning_rate": 6.938621873482882e-06, + "loss": 0.2855, + "step": 19585 + }, + { + "epoch": 0.3920826764757401, + "grad_norm": 1.1144307851791382, + "learning_rate": 6.938323045504134e-06, + "loss": 0.2861, + "step": 19586 + }, + { + "epoch": 0.39210269499286843, + "grad_norm": 1.1339627504348755, + "learning_rate": 6.9380242093770935e-06, + "loss": 0.2731, + "step": 19587 + }, + { + "epoch": 0.3921227135099967, + "grad_norm": 1.0846823453903198, + "learning_rate": 6.937725365103016e-06, + "loss": 0.3637, + "step": 19588 + }, + { + "epoch": 0.3921427320271251, + "grad_norm": 1.093031406402588, + "learning_rate": 6.937426512683158e-06, + "loss": 0.3173, + "step": 19589 + }, + { + "epoch": 0.3921627505442534, + "grad_norm": 1.0225319862365723, + "learning_rate": 6.937127652118776e-06, + "loss": 0.3145, + "step": 19590 + }, + { + "epoch": 0.3921827690613818, + "grad_norm": 1.8220641613006592, + "learning_rate": 6.9368287834111245e-06, + "loss": 0.8248, + "step": 19591 + }, + { + "epoch": 0.3922027875785101, + "grad_norm": 1.105793833732605, + "learning_rate": 6.9365299065614645e-06, + "loss": 0.3221, + "step": 19592 + }, + { + "epoch": 0.3922228060956385, + "grad_norm": 1.2521952390670776, + "learning_rate": 6.936231021571048e-06, + "loss": 0.2815, + "step": 19593 + }, + { + "epoch": 0.3922428246127668, + "grad_norm": 1.041105031967163, + "learning_rate": 6.935932128441134e-06, + "loss": 0.324, + "step": 19594 + }, + { + "epoch": 0.3922628431298952, + "grad_norm": 1.0994601249694824, + "learning_rate": 6.9356332271729765e-06, + "loss": 0.3193, + "step": 19595 + }, + { + "epoch": 0.3922828616470235, + "grad_norm": 1.154427409172058, + "learning_rate": 6.935334317767835e-06, + "loss": 0.2856, + "step": 19596 + }, + { + "epoch": 0.3923028801641518, + "grad_norm": 1.2021085023880005, + "learning_rate": 6.9350354002269635e-06, + "loss": 0.3608, + "step": 19597 + }, + { + "epoch": 0.3923228986812802, + "grad_norm": 1.3129764795303345, + "learning_rate": 6.93473647455162e-06, + "loss": 0.2591, + "step": 19598 + }, + { + "epoch": 0.3923429171984085, + "grad_norm": 1.901835560798645, + "learning_rate": 6.934437540743061e-06, + "loss": 0.8018, + "step": 19599 + }, + { + "epoch": 0.3923629357155369, + "grad_norm": 1.1701748371124268, + "learning_rate": 6.934138598802542e-06, + "loss": 0.3666, + "step": 19600 + }, + { + "epoch": 0.3923829542326652, + "grad_norm": 1.088354229927063, + "learning_rate": 6.933839648731322e-06, + "loss": 0.3071, + "step": 19601 + }, + { + "epoch": 0.3924029727497936, + "grad_norm": 1.1714221239089966, + "learning_rate": 6.933540690530655e-06, + "loss": 0.3283, + "step": 19602 + }, + { + "epoch": 0.39242299126692193, + "grad_norm": 1.1118661165237427, + "learning_rate": 6.933241724201798e-06, + "loss": 0.3467, + "step": 19603 + }, + { + "epoch": 0.3924430097840502, + "grad_norm": 1.0962456464767456, + "learning_rate": 6.93294274974601e-06, + "loss": 0.3516, + "step": 19604 + }, + { + "epoch": 0.3924630283011786, + "grad_norm": 1.1932899951934814, + "learning_rate": 6.9326437671645476e-06, + "loss": 0.3245, + "step": 19605 + }, + { + "epoch": 0.3924830468183069, + "grad_norm": 1.0810014009475708, + "learning_rate": 6.932344776458666e-06, + "loss": 0.3063, + "step": 19606 + }, + { + "epoch": 0.3925030653354353, + "grad_norm": 1.2403013706207275, + "learning_rate": 6.932045777629623e-06, + "loss": 0.3165, + "step": 19607 + }, + { + "epoch": 0.3925230838525636, + "grad_norm": 1.1249016523361206, + "learning_rate": 6.931746770678674e-06, + "loss": 0.2926, + "step": 19608 + }, + { + "epoch": 0.392543102369692, + "grad_norm": 1.136584758758545, + "learning_rate": 6.931447755607078e-06, + "loss": 0.3286, + "step": 19609 + }, + { + "epoch": 0.3925631208868203, + "grad_norm": 1.0711908340454102, + "learning_rate": 6.931148732416092e-06, + "loss": 0.3096, + "step": 19610 + }, + { + "epoch": 0.3925831394039487, + "grad_norm": 1.0045804977416992, + "learning_rate": 6.930849701106972e-06, + "loss": 0.3124, + "step": 19611 + }, + { + "epoch": 0.392603157921077, + "grad_norm": 1.157459020614624, + "learning_rate": 6.930550661680976e-06, + "loss": 0.3058, + "step": 19612 + }, + { + "epoch": 0.3926231764382053, + "grad_norm": 1.1238123178482056, + "learning_rate": 6.930251614139359e-06, + "loss": 0.3279, + "step": 19613 + }, + { + "epoch": 0.3926431949553337, + "grad_norm": 1.098496913909912, + "learning_rate": 6.929952558483381e-06, + "loss": 0.3301, + "step": 19614 + }, + { + "epoch": 0.392663213472462, + "grad_norm": 1.1709065437316895, + "learning_rate": 6.929653494714297e-06, + "loss": 0.3245, + "step": 19615 + }, + { + "epoch": 0.3926832319895904, + "grad_norm": 1.2502681016921997, + "learning_rate": 6.929354422833365e-06, + "loss": 0.3145, + "step": 19616 + }, + { + "epoch": 0.3927032505067187, + "grad_norm": 1.14031183719635, + "learning_rate": 6.929055342841843e-06, + "loss": 0.2799, + "step": 19617 + }, + { + "epoch": 0.3927232690238471, + "grad_norm": 1.1951653957366943, + "learning_rate": 6.928756254740987e-06, + "loss": 0.3385, + "step": 19618 + }, + { + "epoch": 0.3927432875409754, + "grad_norm": 1.143934965133667, + "learning_rate": 6.928457158532055e-06, + "loss": 0.3248, + "step": 19619 + }, + { + "epoch": 0.3927633060581037, + "grad_norm": 1.262733817100525, + "learning_rate": 6.928158054216303e-06, + "loss": 0.3615, + "step": 19620 + }, + { + "epoch": 0.3927833245752321, + "grad_norm": 1.0553029775619507, + "learning_rate": 6.927858941794989e-06, + "loss": 0.2876, + "step": 19621 + }, + { + "epoch": 0.3928033430923604, + "grad_norm": 1.1187723875045776, + "learning_rate": 6.927559821269372e-06, + "loss": 0.3044, + "step": 19622 + }, + { + "epoch": 0.3928233616094888, + "grad_norm": 1.074574589729309, + "learning_rate": 6.927260692640709e-06, + "loss": 0.3375, + "step": 19623 + }, + { + "epoch": 0.3928433801266171, + "grad_norm": 1.147729516029358, + "learning_rate": 6.926961555910257e-06, + "loss": 0.2994, + "step": 19624 + }, + { + "epoch": 0.3928633986437455, + "grad_norm": 1.1843675374984741, + "learning_rate": 6.926662411079272e-06, + "loss": 0.3244, + "step": 19625 + }, + { + "epoch": 0.3928834171608738, + "grad_norm": 1.19126296043396, + "learning_rate": 6.926363258149015e-06, + "loss": 0.3498, + "step": 19626 + }, + { + "epoch": 0.3929034356780022, + "grad_norm": 1.2181507349014282, + "learning_rate": 6.92606409712074e-06, + "loss": 0.3311, + "step": 19627 + }, + { + "epoch": 0.3929234541951305, + "grad_norm": 1.1191716194152832, + "learning_rate": 6.925764927995704e-06, + "loss": 0.287, + "step": 19628 + }, + { + "epoch": 0.3929434727122588, + "grad_norm": 1.085625171661377, + "learning_rate": 6.925465750775169e-06, + "loss": 0.2972, + "step": 19629 + }, + { + "epoch": 0.3929634912293872, + "grad_norm": 1.8873125314712524, + "learning_rate": 6.92516656546039e-06, + "loss": 0.8516, + "step": 19630 + }, + { + "epoch": 0.3929835097465155, + "grad_norm": 1.1995906829833984, + "learning_rate": 6.924867372052624e-06, + "loss": 0.3373, + "step": 19631 + }, + { + "epoch": 0.3930035282636439, + "grad_norm": 1.142439365386963, + "learning_rate": 6.924568170553132e-06, + "loss": 0.3521, + "step": 19632 + }, + { + "epoch": 0.3930235467807722, + "grad_norm": 1.2497237920761108, + "learning_rate": 6.924268960963167e-06, + "loss": 0.4189, + "step": 19633 + }, + { + "epoch": 0.3930435652979006, + "grad_norm": 1.1418015956878662, + "learning_rate": 6.923969743283992e-06, + "loss": 0.2561, + "step": 19634 + }, + { + "epoch": 0.3930635838150289, + "grad_norm": 1.2785032987594604, + "learning_rate": 6.9236705175168595e-06, + "loss": 0.3896, + "step": 19635 + }, + { + "epoch": 0.3930836023321572, + "grad_norm": 1.0618444681167603, + "learning_rate": 6.923371283663032e-06, + "loss": 0.3138, + "step": 19636 + }, + { + "epoch": 0.3931036208492856, + "grad_norm": 1.960352897644043, + "learning_rate": 6.923072041723766e-06, + "loss": 0.7742, + "step": 19637 + }, + { + "epoch": 0.3931236393664139, + "grad_norm": 1.0579785108566284, + "learning_rate": 6.922772791700318e-06, + "loss": 0.322, + "step": 19638 + }, + { + "epoch": 0.3931436578835423, + "grad_norm": 1.1381572484970093, + "learning_rate": 6.922473533593948e-06, + "loss": 0.3165, + "step": 19639 + }, + { + "epoch": 0.3931636764006706, + "grad_norm": 1.136146068572998, + "learning_rate": 6.922174267405911e-06, + "loss": 0.2885, + "step": 19640 + }, + { + "epoch": 0.393183694917799, + "grad_norm": 1.3084465265274048, + "learning_rate": 6.9218749931374675e-06, + "loss": 0.3309, + "step": 19641 + }, + { + "epoch": 0.3932037134349273, + "grad_norm": 1.1064884662628174, + "learning_rate": 6.921575710789875e-06, + "loss": 0.3219, + "step": 19642 + }, + { + "epoch": 0.3932237319520557, + "grad_norm": 1.0148179531097412, + "learning_rate": 6.921276420364393e-06, + "loss": 0.328, + "step": 19643 + }, + { + "epoch": 0.393243750469184, + "grad_norm": 1.3454015254974365, + "learning_rate": 6.920977121862278e-06, + "loss": 0.3388, + "step": 19644 + }, + { + "epoch": 0.3932637689863123, + "grad_norm": 1.0884634256362915, + "learning_rate": 6.920677815284789e-06, + "loss": 0.3233, + "step": 19645 + }, + { + "epoch": 0.3932837875034407, + "grad_norm": 1.8522446155548096, + "learning_rate": 6.920378500633184e-06, + "loss": 0.7503, + "step": 19646 + }, + { + "epoch": 0.393303806020569, + "grad_norm": 1.115268349647522, + "learning_rate": 6.920079177908718e-06, + "loss": 0.332, + "step": 19647 + }, + { + "epoch": 0.3933238245376974, + "grad_norm": 1.2082817554473877, + "learning_rate": 6.919779847112655e-06, + "loss": 0.3096, + "step": 19648 + }, + { + "epoch": 0.3933438430548257, + "grad_norm": 1.1139171123504639, + "learning_rate": 6.91948050824625e-06, + "loss": 0.2932, + "step": 19649 + }, + { + "epoch": 0.3933638615719541, + "grad_norm": 1.1638892889022827, + "learning_rate": 6.919181161310764e-06, + "loss": 0.3764, + "step": 19650 + }, + { + "epoch": 0.3933838800890824, + "grad_norm": 1.8794347047805786, + "learning_rate": 6.918881806307451e-06, + "loss": 0.8442, + "step": 19651 + }, + { + "epoch": 0.3934038986062107, + "grad_norm": 1.8807201385498047, + "learning_rate": 6.918582443237573e-06, + "loss": 0.8619, + "step": 19652 + }, + { + "epoch": 0.3934239171233391, + "grad_norm": 1.5369595289230347, + "learning_rate": 6.918283072102387e-06, + "loss": 0.356, + "step": 19653 + }, + { + "epoch": 0.3934439356404674, + "grad_norm": 1.0859532356262207, + "learning_rate": 6.917983692903151e-06, + "loss": 0.3381, + "step": 19654 + }, + { + "epoch": 0.3934639541575958, + "grad_norm": 1.0941556692123413, + "learning_rate": 6.917684305641124e-06, + "loss": 0.3402, + "step": 19655 + }, + { + "epoch": 0.3934839726747241, + "grad_norm": 1.169568657875061, + "learning_rate": 6.917384910317566e-06, + "loss": 0.3226, + "step": 19656 + }, + { + "epoch": 0.3935039911918525, + "grad_norm": 1.202760934829712, + "learning_rate": 6.917085506933736e-06, + "loss": 0.3144, + "step": 19657 + }, + { + "epoch": 0.3935240097089808, + "grad_norm": 1.2942825555801392, + "learning_rate": 6.916786095490888e-06, + "loss": 0.3205, + "step": 19658 + }, + { + "epoch": 0.3935440282261092, + "grad_norm": 1.9594476222991943, + "learning_rate": 6.916486675990286e-06, + "loss": 0.7929, + "step": 19659 + }, + { + "epoch": 0.3935640467432375, + "grad_norm": 1.121633529663086, + "learning_rate": 6.916187248433184e-06, + "loss": 0.302, + "step": 19660 + }, + { + "epoch": 0.3935840652603658, + "grad_norm": 1.8144606351852417, + "learning_rate": 6.915887812820845e-06, + "loss": 0.7978, + "step": 19661 + }, + { + "epoch": 0.3936040837774942, + "grad_norm": 1.1212151050567627, + "learning_rate": 6.915588369154526e-06, + "loss": 0.3796, + "step": 19662 + }, + { + "epoch": 0.3936241022946225, + "grad_norm": 1.2658591270446777, + "learning_rate": 6.915288917435485e-06, + "loss": 0.3648, + "step": 19663 + }, + { + "epoch": 0.3936441208117509, + "grad_norm": 1.0394915342330933, + "learning_rate": 6.914989457664982e-06, + "loss": 0.3164, + "step": 19664 + }, + { + "epoch": 0.3936641393288792, + "grad_norm": 1.1920462846755981, + "learning_rate": 6.914689989844277e-06, + "loss": 0.3158, + "step": 19665 + }, + { + "epoch": 0.3936841578460076, + "grad_norm": 1.0129034519195557, + "learning_rate": 6.914390513974625e-06, + "loss": 0.2988, + "step": 19666 + }, + { + "epoch": 0.3937041763631359, + "grad_norm": 1.1073269844055176, + "learning_rate": 6.914091030057287e-06, + "loss": 0.3221, + "step": 19667 + }, + { + "epoch": 0.3937241948802642, + "grad_norm": 1.0854130983352661, + "learning_rate": 6.913791538093524e-06, + "loss": 0.3054, + "step": 19668 + }, + { + "epoch": 0.3937442133973926, + "grad_norm": 1.1363154649734497, + "learning_rate": 6.913492038084592e-06, + "loss": 0.3037, + "step": 19669 + }, + { + "epoch": 0.3937642319145209, + "grad_norm": 1.7294341325759888, + "learning_rate": 6.913192530031753e-06, + "loss": 0.8107, + "step": 19670 + }, + { + "epoch": 0.3937842504316493, + "grad_norm": 1.2002873420715332, + "learning_rate": 6.912893013936261e-06, + "loss": 0.2906, + "step": 19671 + }, + { + "epoch": 0.3938042689487776, + "grad_norm": 1.2467137575149536, + "learning_rate": 6.9125934897993815e-06, + "loss": 0.3145, + "step": 19672 + }, + { + "epoch": 0.393824287465906, + "grad_norm": 1.0873360633850098, + "learning_rate": 6.912293957622369e-06, + "loss": 0.348, + "step": 19673 + }, + { + "epoch": 0.3938443059830343, + "grad_norm": 1.0718625783920288, + "learning_rate": 6.911994417406485e-06, + "loss": 0.3325, + "step": 19674 + }, + { + "epoch": 0.3938643245001627, + "grad_norm": 1.0075509548187256, + "learning_rate": 6.911694869152987e-06, + "loss": 0.3316, + "step": 19675 + }, + { + "epoch": 0.39388434301729097, + "grad_norm": 1.1742247343063354, + "learning_rate": 6.911395312863137e-06, + "loss": 0.3224, + "step": 19676 + }, + { + "epoch": 0.3939043615344193, + "grad_norm": 1.7770251035690308, + "learning_rate": 6.91109574853819e-06, + "loss": 0.9118, + "step": 19677 + }, + { + "epoch": 0.3939243800515477, + "grad_norm": 1.2286756038665771, + "learning_rate": 6.9107961761794105e-06, + "loss": 0.3321, + "step": 19678 + }, + { + "epoch": 0.393944398568676, + "grad_norm": 1.84513521194458, + "learning_rate": 6.910496595788053e-06, + "loss": 0.7257, + "step": 19679 + }, + { + "epoch": 0.3939644170858044, + "grad_norm": 1.1519675254821777, + "learning_rate": 6.910197007365379e-06, + "loss": 0.3218, + "step": 19680 + }, + { + "epoch": 0.3939844356029327, + "grad_norm": 1.1083301305770874, + "learning_rate": 6.9098974109126494e-06, + "loss": 0.3325, + "step": 19681 + }, + { + "epoch": 0.3940044541200611, + "grad_norm": 1.2154515981674194, + "learning_rate": 6.909597806431121e-06, + "loss": 0.2903, + "step": 19682 + }, + { + "epoch": 0.3940244726371894, + "grad_norm": 1.1636037826538086, + "learning_rate": 6.909298193922055e-06, + "loss": 0.3166, + "step": 19683 + }, + { + "epoch": 0.3940444911543177, + "grad_norm": 1.1549547910690308, + "learning_rate": 6.9089985733867096e-06, + "loss": 0.3257, + "step": 19684 + }, + { + "epoch": 0.3940645096714461, + "grad_norm": 1.0702098608016968, + "learning_rate": 6.908698944826345e-06, + "loss": 0.3355, + "step": 19685 + }, + { + "epoch": 0.3940845281885744, + "grad_norm": 1.2310562133789062, + "learning_rate": 6.9083993082422195e-06, + "loss": 0.2995, + "step": 19686 + }, + { + "epoch": 0.3941045467057028, + "grad_norm": 0.971752941608429, + "learning_rate": 6.908099663635597e-06, + "loss": 0.3068, + "step": 19687 + }, + { + "epoch": 0.3941245652228311, + "grad_norm": 1.0868712663650513, + "learning_rate": 6.907800011007733e-06, + "loss": 0.3262, + "step": 19688 + }, + { + "epoch": 0.3941445837399595, + "grad_norm": 1.2105534076690674, + "learning_rate": 6.907500350359888e-06, + "loss": 0.3448, + "step": 19689 + }, + { + "epoch": 0.3941646022570878, + "grad_norm": 1.149160385131836, + "learning_rate": 6.907200681693323e-06, + "loss": 0.3086, + "step": 19690 + }, + { + "epoch": 0.3941846207742162, + "grad_norm": 1.2402212619781494, + "learning_rate": 6.9069010050092945e-06, + "loss": 0.305, + "step": 19691 + }, + { + "epoch": 0.39420463929134447, + "grad_norm": 1.0544217824935913, + "learning_rate": 6.906601320309065e-06, + "loss": 0.3257, + "step": 19692 + }, + { + "epoch": 0.3942246578084728, + "grad_norm": 1.0085344314575195, + "learning_rate": 6.906301627593894e-06, + "loss": 0.3082, + "step": 19693 + }, + { + "epoch": 0.3942446763256012, + "grad_norm": 1.2067081928253174, + "learning_rate": 6.906001926865043e-06, + "loss": 0.327, + "step": 19694 + }, + { + "epoch": 0.3942646948427295, + "grad_norm": 1.2910298109054565, + "learning_rate": 6.9057022181237685e-06, + "loss": 0.2915, + "step": 19695 + }, + { + "epoch": 0.3942847133598579, + "grad_norm": 1.0685192346572876, + "learning_rate": 6.905402501371333e-06, + "loss": 0.3352, + "step": 19696 + }, + { + "epoch": 0.3943047318769862, + "grad_norm": 1.0498454570770264, + "learning_rate": 6.905102776608994e-06, + "loss": 0.2833, + "step": 19697 + }, + { + "epoch": 0.3943247503941146, + "grad_norm": 1.1876474618911743, + "learning_rate": 6.904803043838015e-06, + "loss": 0.3425, + "step": 19698 + }, + { + "epoch": 0.3943447689112429, + "grad_norm": 1.2213853597640991, + "learning_rate": 6.904503303059652e-06, + "loss": 0.3299, + "step": 19699 + }, + { + "epoch": 0.3943647874283712, + "grad_norm": 1.0060797929763794, + "learning_rate": 6.904203554275167e-06, + "loss": 0.3086, + "step": 19700 + }, + { + "epoch": 0.3943848059454996, + "grad_norm": 1.7526564598083496, + "learning_rate": 6.903903797485821e-06, + "loss": 0.825, + "step": 19701 + }, + { + "epoch": 0.3944048244626279, + "grad_norm": 1.9905645847320557, + "learning_rate": 6.9036040326928735e-06, + "loss": 0.7469, + "step": 19702 + }, + { + "epoch": 0.3944248429797563, + "grad_norm": 1.1588650941848755, + "learning_rate": 6.903304259897584e-06, + "loss": 0.372, + "step": 19703 + }, + { + "epoch": 0.3944448614968846, + "grad_norm": 1.7469089031219482, + "learning_rate": 6.903004479101212e-06, + "loss": 0.8483, + "step": 19704 + }, + { + "epoch": 0.394464880014013, + "grad_norm": 1.9291630983352661, + "learning_rate": 6.902704690305019e-06, + "loss": 0.8777, + "step": 19705 + }, + { + "epoch": 0.3944848985311413, + "grad_norm": 1.1788266897201538, + "learning_rate": 6.902404893510265e-06, + "loss": 0.3165, + "step": 19706 + }, + { + "epoch": 0.3945049170482697, + "grad_norm": 1.1410804986953735, + "learning_rate": 6.902105088718212e-06, + "loss": 0.3147, + "step": 19707 + }, + { + "epoch": 0.39452493556539797, + "grad_norm": 1.4001741409301758, + "learning_rate": 6.901805275930115e-06, + "loss": 0.3534, + "step": 19708 + }, + { + "epoch": 0.3945449540825263, + "grad_norm": 1.1937702894210815, + "learning_rate": 6.901505455147241e-06, + "loss": 0.3374, + "step": 19709 + }, + { + "epoch": 0.3945649725996547, + "grad_norm": 1.0706669092178345, + "learning_rate": 6.901205626370847e-06, + "loss": 0.3417, + "step": 19710 + }, + { + "epoch": 0.394584991116783, + "grad_norm": 1.1533751487731934, + "learning_rate": 6.900905789602192e-06, + "loss": 0.3517, + "step": 19711 + }, + { + "epoch": 0.3946050096339114, + "grad_norm": 1.8250468969345093, + "learning_rate": 6.90060594484254e-06, + "loss": 0.8333, + "step": 19712 + }, + { + "epoch": 0.3946250281510397, + "grad_norm": 1.1748759746551514, + "learning_rate": 6.900306092093148e-06, + "loss": 0.3334, + "step": 19713 + }, + { + "epoch": 0.3946450466681681, + "grad_norm": 0.9552600979804993, + "learning_rate": 6.900006231355278e-06, + "loss": 0.3087, + "step": 19714 + }, + { + "epoch": 0.3946650651852964, + "grad_norm": 1.3422707319259644, + "learning_rate": 6.899706362630192e-06, + "loss": 0.3093, + "step": 19715 + }, + { + "epoch": 0.3946850837024247, + "grad_norm": 1.039380669593811, + "learning_rate": 6.8994064859191486e-06, + "loss": 0.2926, + "step": 19716 + }, + { + "epoch": 0.3947051022195531, + "grad_norm": 2.0156328678131104, + "learning_rate": 6.8991066012234094e-06, + "loss": 0.912, + "step": 19717 + }, + { + "epoch": 0.3947251207366814, + "grad_norm": 1.0998473167419434, + "learning_rate": 6.898806708544235e-06, + "loss": 0.3608, + "step": 19718 + }, + { + "epoch": 0.3947451392538098, + "grad_norm": 1.1483715772628784, + "learning_rate": 6.898506807882885e-06, + "loss": 0.3193, + "step": 19719 + }, + { + "epoch": 0.3947651577709381, + "grad_norm": 1.1854671239852905, + "learning_rate": 6.898206899240622e-06, + "loss": 0.2966, + "step": 19720 + }, + { + "epoch": 0.3947851762880665, + "grad_norm": 0.9948230981826782, + "learning_rate": 6.897906982618705e-06, + "loss": 0.2759, + "step": 19721 + }, + { + "epoch": 0.3948051948051948, + "grad_norm": 1.1662172079086304, + "learning_rate": 6.8976070580183965e-06, + "loss": 0.276, + "step": 19722 + }, + { + "epoch": 0.3948252133223232, + "grad_norm": 1.1914507150650024, + "learning_rate": 6.8973071254409555e-06, + "loss": 0.3154, + "step": 19723 + }, + { + "epoch": 0.39484523183945147, + "grad_norm": 1.7943185567855835, + "learning_rate": 6.897007184887643e-06, + "loss": 0.838, + "step": 19724 + }, + { + "epoch": 0.3948652503565798, + "grad_norm": 1.2905277013778687, + "learning_rate": 6.896707236359721e-06, + "loss": 0.3672, + "step": 19725 + }, + { + "epoch": 0.3948852688737082, + "grad_norm": 1.9388351440429688, + "learning_rate": 6.896407279858449e-06, + "loss": 0.8107, + "step": 19726 + }, + { + "epoch": 0.3949052873908365, + "grad_norm": 1.1472808122634888, + "learning_rate": 6.89610731538509e-06, + "loss": 0.2608, + "step": 19727 + }, + { + "epoch": 0.3949253059079649, + "grad_norm": 1.1167982816696167, + "learning_rate": 6.8958073429409035e-06, + "loss": 0.3337, + "step": 19728 + }, + { + "epoch": 0.3949453244250932, + "grad_norm": 1.176785945892334, + "learning_rate": 6.8955073625271515e-06, + "loss": 0.3466, + "step": 19729 + }, + { + "epoch": 0.3949653429422216, + "grad_norm": 1.122523546218872, + "learning_rate": 6.8952073741450946e-06, + "loss": 0.3048, + "step": 19730 + }, + { + "epoch": 0.3949853614593499, + "grad_norm": 1.1775903701782227, + "learning_rate": 6.894907377795993e-06, + "loss": 0.3279, + "step": 19731 + }, + { + "epoch": 0.3950053799764782, + "grad_norm": 1.0883064270019531, + "learning_rate": 6.894607373481109e-06, + "loss": 0.3513, + "step": 19732 + }, + { + "epoch": 0.39502539849360657, + "grad_norm": 1.0595643520355225, + "learning_rate": 6.894307361201703e-06, + "loss": 0.304, + "step": 19733 + }, + { + "epoch": 0.3950454170107349, + "grad_norm": 0.9577150940895081, + "learning_rate": 6.894007340959037e-06, + "loss": 0.3014, + "step": 19734 + }, + { + "epoch": 0.3950654355278633, + "grad_norm": 1.1989139318466187, + "learning_rate": 6.893707312754372e-06, + "loss": 0.2634, + "step": 19735 + }, + { + "epoch": 0.3950854540449916, + "grad_norm": 1.1137757301330566, + "learning_rate": 6.893407276588968e-06, + "loss": 0.3062, + "step": 19736 + }, + { + "epoch": 0.39510547256212, + "grad_norm": 1.1298381090164185, + "learning_rate": 6.893107232464088e-06, + "loss": 0.3038, + "step": 19737 + }, + { + "epoch": 0.3951254910792483, + "grad_norm": 1.0543485879898071, + "learning_rate": 6.892807180380992e-06, + "loss": 0.3329, + "step": 19738 + }, + { + "epoch": 0.3951455095963767, + "grad_norm": 1.4844297170639038, + "learning_rate": 6.892507120340941e-06, + "loss": 0.3548, + "step": 19739 + }, + { + "epoch": 0.39516552811350497, + "grad_norm": 1.015032410621643, + "learning_rate": 6.8922070523452e-06, + "loss": 0.3439, + "step": 19740 + }, + { + "epoch": 0.3951855466306333, + "grad_norm": 1.2054064273834229, + "learning_rate": 6.8919069763950264e-06, + "loss": 0.3104, + "step": 19741 + }, + { + "epoch": 0.3952055651477617, + "grad_norm": 1.1880542039871216, + "learning_rate": 6.891606892491683e-06, + "loss": 0.2753, + "step": 19742 + }, + { + "epoch": 0.39522558366489, + "grad_norm": 1.1573551893234253, + "learning_rate": 6.891306800636431e-06, + "loss": 0.3196, + "step": 19743 + }, + { + "epoch": 0.3952456021820184, + "grad_norm": 1.0995848178863525, + "learning_rate": 6.891006700830532e-06, + "loss": 0.2983, + "step": 19744 + }, + { + "epoch": 0.3952656206991467, + "grad_norm": 1.065025806427002, + "learning_rate": 6.890706593075249e-06, + "loss": 0.3017, + "step": 19745 + }, + { + "epoch": 0.3952856392162751, + "grad_norm": 1.8331547975540161, + "learning_rate": 6.890406477371841e-06, + "loss": 0.8232, + "step": 19746 + }, + { + "epoch": 0.39530565773340337, + "grad_norm": 1.122161626815796, + "learning_rate": 6.890106353721573e-06, + "loss": 0.3197, + "step": 19747 + }, + { + "epoch": 0.3953256762505317, + "grad_norm": 1.074199914932251, + "learning_rate": 6.889806222125703e-06, + "loss": 0.3218, + "step": 19748 + }, + { + "epoch": 0.39534569476766007, + "grad_norm": 1.1147551536560059, + "learning_rate": 6.889506082585495e-06, + "loss": 0.3091, + "step": 19749 + }, + { + "epoch": 0.3953657132847884, + "grad_norm": 1.0185743570327759, + "learning_rate": 6.889205935102209e-06, + "loss": 0.3129, + "step": 19750 + }, + { + "epoch": 0.3953857318019168, + "grad_norm": 1.0831358432769775, + "learning_rate": 6.888905779677109e-06, + "loss": 0.3078, + "step": 19751 + }, + { + "epoch": 0.3954057503190451, + "grad_norm": 1.8633183240890503, + "learning_rate": 6.8886056163114545e-06, + "loss": 0.7922, + "step": 19752 + }, + { + "epoch": 0.3954257688361735, + "grad_norm": 1.105380654335022, + "learning_rate": 6.888305445006509e-06, + "loss": 0.3454, + "step": 19753 + }, + { + "epoch": 0.3954457873533018, + "grad_norm": 1.149234652519226, + "learning_rate": 6.888005265763534e-06, + "loss": 0.3491, + "step": 19754 + }, + { + "epoch": 0.3954658058704301, + "grad_norm": 1.138728141784668, + "learning_rate": 6.8877050785837905e-06, + "loss": 0.2904, + "step": 19755 + }, + { + "epoch": 0.39548582438755847, + "grad_norm": 1.2183959484100342, + "learning_rate": 6.887404883468542e-06, + "loss": 0.2955, + "step": 19756 + }, + { + "epoch": 0.3955058429046868, + "grad_norm": 2.031254529953003, + "learning_rate": 6.887104680419048e-06, + "loss": 0.8284, + "step": 19757 + }, + { + "epoch": 0.3955258614218152, + "grad_norm": 1.041298270225525, + "learning_rate": 6.886804469436574e-06, + "loss": 0.2948, + "step": 19758 + }, + { + "epoch": 0.3955458799389435, + "grad_norm": 1.162387490272522, + "learning_rate": 6.886504250522379e-06, + "loss": 0.3319, + "step": 19759 + }, + { + "epoch": 0.3955658984560719, + "grad_norm": 1.1828429698944092, + "learning_rate": 6.886204023677727e-06, + "loss": 0.2842, + "step": 19760 + }, + { + "epoch": 0.3955859169732002, + "grad_norm": 1.1972484588623047, + "learning_rate": 6.885903788903878e-06, + "loss": 0.3048, + "step": 19761 + }, + { + "epoch": 0.3956059354903286, + "grad_norm": 1.0700124502182007, + "learning_rate": 6.885603546202097e-06, + "loss": 0.3466, + "step": 19762 + }, + { + "epoch": 0.39562595400745687, + "grad_norm": 1.099552035331726, + "learning_rate": 6.885303295573642e-06, + "loss": 0.287, + "step": 19763 + }, + { + "epoch": 0.3956459725245852, + "grad_norm": 1.1538729667663574, + "learning_rate": 6.885003037019778e-06, + "loss": 0.3472, + "step": 19764 + }, + { + "epoch": 0.39566599104171357, + "grad_norm": 1.2622276544570923, + "learning_rate": 6.884702770541769e-06, + "loss": 0.3343, + "step": 19765 + }, + { + "epoch": 0.3956860095588419, + "grad_norm": 1.0695581436157227, + "learning_rate": 6.884402496140874e-06, + "loss": 0.3195, + "step": 19766 + }, + { + "epoch": 0.3957060280759703, + "grad_norm": 1.0798555612564087, + "learning_rate": 6.884102213818357e-06, + "loss": 0.2894, + "step": 19767 + }, + { + "epoch": 0.3957260465930986, + "grad_norm": 0.9991192817687988, + "learning_rate": 6.883801923575478e-06, + "loss": 0.3007, + "step": 19768 + }, + { + "epoch": 0.395746065110227, + "grad_norm": 1.1812825202941895, + "learning_rate": 6.8835016254135025e-06, + "loss": 0.3, + "step": 19769 + }, + { + "epoch": 0.3957660836273553, + "grad_norm": 1.2169554233551025, + "learning_rate": 6.883201319333691e-06, + "loss": 0.3132, + "step": 19770 + }, + { + "epoch": 0.3957861021444836, + "grad_norm": 1.1695289611816406, + "learning_rate": 6.882901005337307e-06, + "loss": 0.324, + "step": 19771 + }, + { + "epoch": 0.39580612066161197, + "grad_norm": 1.151176929473877, + "learning_rate": 6.882600683425613e-06, + "loss": 0.3827, + "step": 19772 + }, + { + "epoch": 0.3958261391787403, + "grad_norm": 1.0573177337646484, + "learning_rate": 6.882300353599869e-06, + "loss": 0.2961, + "step": 19773 + }, + { + "epoch": 0.3958461576958687, + "grad_norm": 1.0502547025680542, + "learning_rate": 6.882000015861341e-06, + "loss": 0.2726, + "step": 19774 + }, + { + "epoch": 0.395866176212997, + "grad_norm": 1.1403626203536987, + "learning_rate": 6.881699670211289e-06, + "loss": 0.3029, + "step": 19775 + }, + { + "epoch": 0.3958861947301254, + "grad_norm": 1.8242837190628052, + "learning_rate": 6.8813993166509765e-06, + "loss": 0.7988, + "step": 19776 + }, + { + "epoch": 0.3959062132472537, + "grad_norm": 1.1047838926315308, + "learning_rate": 6.881098955181666e-06, + "loss": 0.3771, + "step": 19777 + }, + { + "epoch": 0.3959262317643821, + "grad_norm": 1.295202374458313, + "learning_rate": 6.8807985858046225e-06, + "loss": 0.3253, + "step": 19778 + }, + { + "epoch": 0.39594625028151037, + "grad_norm": 1.1899919509887695, + "learning_rate": 6.880498208521104e-06, + "loss": 0.2996, + "step": 19779 + }, + { + "epoch": 0.3959662687986387, + "grad_norm": 1.0540544986724854, + "learning_rate": 6.880197823332377e-06, + "loss": 0.257, + "step": 19780 + }, + { + "epoch": 0.39598628731576707, + "grad_norm": 1.2600613832473755, + "learning_rate": 6.879897430239701e-06, + "loss": 0.3351, + "step": 19781 + }, + { + "epoch": 0.3960063058328954, + "grad_norm": 1.1597548723220825, + "learning_rate": 6.879597029244342e-06, + "loss": 0.2902, + "step": 19782 + }, + { + "epoch": 0.3960263243500238, + "grad_norm": 1.1506717205047607, + "learning_rate": 6.879296620347563e-06, + "loss": 0.3319, + "step": 19783 + }, + { + "epoch": 0.3960463428671521, + "grad_norm": 1.0355616807937622, + "learning_rate": 6.878996203550624e-06, + "loss": 0.3199, + "step": 19784 + }, + { + "epoch": 0.3960663613842805, + "grad_norm": 1.1355578899383545, + "learning_rate": 6.8786957788547905e-06, + "loss": 0.3096, + "step": 19785 + }, + { + "epoch": 0.3960863799014088, + "grad_norm": 1.0880634784698486, + "learning_rate": 6.878395346261323e-06, + "loss": 0.3185, + "step": 19786 + }, + { + "epoch": 0.3961063984185371, + "grad_norm": 1.9625942707061768, + "learning_rate": 6.878094905771486e-06, + "loss": 0.7776, + "step": 19787 + }, + { + "epoch": 0.39612641693566547, + "grad_norm": 1.0416055917739868, + "learning_rate": 6.877794457386542e-06, + "loss": 0.3192, + "step": 19788 + }, + { + "epoch": 0.3961464354527938, + "grad_norm": 1.0437613725662231, + "learning_rate": 6.877494001107754e-06, + "loss": 0.286, + "step": 19789 + }, + { + "epoch": 0.39616645396992217, + "grad_norm": 1.227861762046814, + "learning_rate": 6.877193536936385e-06, + "loss": 0.354, + "step": 19790 + }, + { + "epoch": 0.3961864724870505, + "grad_norm": 1.2347904443740845, + "learning_rate": 6.8768930648737e-06, + "loss": 0.3011, + "step": 19791 + }, + { + "epoch": 0.3962064910041789, + "grad_norm": 1.1135814189910889, + "learning_rate": 6.87659258492096e-06, + "loss": 0.3241, + "step": 19792 + }, + { + "epoch": 0.3962265095213072, + "grad_norm": 1.4005521535873413, + "learning_rate": 6.876292097079428e-06, + "loss": 0.3167, + "step": 19793 + }, + { + "epoch": 0.3962465280384356, + "grad_norm": 1.8124557733535767, + "learning_rate": 6.875991601350367e-06, + "loss": 0.7495, + "step": 19794 + }, + { + "epoch": 0.39626654655556387, + "grad_norm": 1.0658308267593384, + "learning_rate": 6.875691097735042e-06, + "loss": 0.3153, + "step": 19795 + }, + { + "epoch": 0.3962865650726922, + "grad_norm": 1.9241552352905273, + "learning_rate": 6.875390586234716e-06, + "loss": 0.8786, + "step": 19796 + }, + { + "epoch": 0.39630658358982057, + "grad_norm": 1.0121632814407349, + "learning_rate": 6.87509006685065e-06, + "loss": 0.2736, + "step": 19797 + }, + { + "epoch": 0.3963266021069489, + "grad_norm": 1.074116826057434, + "learning_rate": 6.874789539584111e-06, + "loss": 0.2888, + "step": 19798 + }, + { + "epoch": 0.3963466206240773, + "grad_norm": 1.029655933380127, + "learning_rate": 6.874489004436358e-06, + "loss": 0.3108, + "step": 19799 + }, + { + "epoch": 0.3963666391412056, + "grad_norm": 1.5502928495407104, + "learning_rate": 6.874188461408658e-06, + "loss": 0.7813, + "step": 19800 + }, + { + "epoch": 0.396386657658334, + "grad_norm": 1.1325907707214355, + "learning_rate": 6.873887910502273e-06, + "loss": 0.2833, + "step": 19801 + }, + { + "epoch": 0.3964066761754623, + "grad_norm": 1.81352698802948, + "learning_rate": 6.8735873517184645e-06, + "loss": 0.829, + "step": 19802 + }, + { + "epoch": 0.3964266946925906, + "grad_norm": 1.0652369260787964, + "learning_rate": 6.873286785058501e-06, + "loss": 0.325, + "step": 19803 + }, + { + "epoch": 0.39644671320971897, + "grad_norm": 1.7839144468307495, + "learning_rate": 6.87298621052364e-06, + "loss": 0.8013, + "step": 19804 + }, + { + "epoch": 0.3964667317268473, + "grad_norm": 1.980376124382019, + "learning_rate": 6.872685628115151e-06, + "loss": 0.7639, + "step": 19805 + }, + { + "epoch": 0.39648675024397567, + "grad_norm": 1.3902814388275146, + "learning_rate": 6.872385037834293e-06, + "loss": 0.2948, + "step": 19806 + }, + { + "epoch": 0.396506768761104, + "grad_norm": 1.1186506748199463, + "learning_rate": 6.872084439682331e-06, + "loss": 0.3116, + "step": 19807 + }, + { + "epoch": 0.3965267872782324, + "grad_norm": 1.332335114479065, + "learning_rate": 6.871783833660528e-06, + "loss": 0.3615, + "step": 19808 + }, + { + "epoch": 0.3965468057953607, + "grad_norm": 1.0798391103744507, + "learning_rate": 6.871483219770151e-06, + "loss": 0.3336, + "step": 19809 + }, + { + "epoch": 0.3965668243124891, + "grad_norm": 1.1102871894836426, + "learning_rate": 6.87118259801246e-06, + "loss": 0.3136, + "step": 19810 + }, + { + "epoch": 0.39658684282961737, + "grad_norm": 1.042296290397644, + "learning_rate": 6.870881968388721e-06, + "loss": 0.3036, + "step": 19811 + }, + { + "epoch": 0.3966068613467457, + "grad_norm": 1.2007412910461426, + "learning_rate": 6.870581330900196e-06, + "loss": 0.2461, + "step": 19812 + }, + { + "epoch": 0.39662687986387407, + "grad_norm": 1.1645718812942505, + "learning_rate": 6.87028068554815e-06, + "loss": 0.3446, + "step": 19813 + }, + { + "epoch": 0.3966468983810024, + "grad_norm": 1.1192574501037598, + "learning_rate": 6.869980032333845e-06, + "loss": 0.3165, + "step": 19814 + }, + { + "epoch": 0.3966669168981308, + "grad_norm": 1.0739614963531494, + "learning_rate": 6.869679371258548e-06, + "loss": 0.2782, + "step": 19815 + }, + { + "epoch": 0.3966869354152591, + "grad_norm": 1.2472176551818848, + "learning_rate": 6.8693787023235216e-06, + "loss": 0.3338, + "step": 19816 + }, + { + "epoch": 0.3967069539323875, + "grad_norm": 1.1901005506515503, + "learning_rate": 6.8690780255300286e-06, + "loss": 0.335, + "step": 19817 + }, + { + "epoch": 0.3967269724495158, + "grad_norm": 1.8896517753601074, + "learning_rate": 6.8687773408793334e-06, + "loss": 0.8024, + "step": 19818 + }, + { + "epoch": 0.3967469909666441, + "grad_norm": 1.1266276836395264, + "learning_rate": 6.868476648372701e-06, + "loss": 0.3301, + "step": 19819 + }, + { + "epoch": 0.39676700948377247, + "grad_norm": 1.2391505241394043, + "learning_rate": 6.8681759480113944e-06, + "loss": 0.376, + "step": 19820 + }, + { + "epoch": 0.3967870280009008, + "grad_norm": 1.1066521406173706, + "learning_rate": 6.867875239796677e-06, + "loss": 0.3368, + "step": 19821 + }, + { + "epoch": 0.39680704651802917, + "grad_norm": 1.1109516620635986, + "learning_rate": 6.867574523729817e-06, + "loss": 0.3166, + "step": 19822 + }, + { + "epoch": 0.3968270650351575, + "grad_norm": 1.043352723121643, + "learning_rate": 6.867273799812074e-06, + "loss": 0.3098, + "step": 19823 + }, + { + "epoch": 0.3968470835522859, + "grad_norm": 1.1683077812194824, + "learning_rate": 6.866973068044715e-06, + "loss": 0.3354, + "step": 19824 + }, + { + "epoch": 0.3968671020694142, + "grad_norm": 1.1918721199035645, + "learning_rate": 6.866672328429003e-06, + "loss": 0.3732, + "step": 19825 + }, + { + "epoch": 0.3968871205865426, + "grad_norm": 1.1986347436904907, + "learning_rate": 6.8663715809662e-06, + "loss": 0.2808, + "step": 19826 + }, + { + "epoch": 0.39690713910367087, + "grad_norm": 1.0587793588638306, + "learning_rate": 6.8660708256575734e-06, + "loss": 0.2602, + "step": 19827 + }, + { + "epoch": 0.3969271576207992, + "grad_norm": 1.097930908203125, + "learning_rate": 6.8657700625043865e-06, + "loss": 0.3535, + "step": 19828 + }, + { + "epoch": 0.39694717613792757, + "grad_norm": 1.086129069328308, + "learning_rate": 6.865469291507905e-06, + "loss": 0.2945, + "step": 19829 + }, + { + "epoch": 0.3969671946550559, + "grad_norm": 1.172609567642212, + "learning_rate": 6.8651685126693916e-06, + "loss": 0.3059, + "step": 19830 + }, + { + "epoch": 0.3969872131721843, + "grad_norm": 1.1123416423797607, + "learning_rate": 6.864867725990111e-06, + "loss": 0.3315, + "step": 19831 + }, + { + "epoch": 0.3970072316893126, + "grad_norm": 1.0613354444503784, + "learning_rate": 6.864566931471327e-06, + "loss": 0.3183, + "step": 19832 + }, + { + "epoch": 0.397027250206441, + "grad_norm": 0.9896817803382874, + "learning_rate": 6.864266129114305e-06, + "loss": 0.2888, + "step": 19833 + }, + { + "epoch": 0.3970472687235693, + "grad_norm": 1.0667561292648315, + "learning_rate": 6.86396531892031e-06, + "loss": 0.3007, + "step": 19834 + }, + { + "epoch": 0.3970672872406976, + "grad_norm": 1.0998374223709106, + "learning_rate": 6.863664500890606e-06, + "loss": 0.3564, + "step": 19835 + }, + { + "epoch": 0.39708730575782597, + "grad_norm": 1.1701364517211914, + "learning_rate": 6.863363675026458e-06, + "loss": 0.3418, + "step": 19836 + }, + { + "epoch": 0.3971073242749543, + "grad_norm": 1.1539838314056396, + "learning_rate": 6.863062841329129e-06, + "loss": 0.336, + "step": 19837 + }, + { + "epoch": 0.39712734279208267, + "grad_norm": 1.1444084644317627, + "learning_rate": 6.862761999799886e-06, + "loss": 0.3523, + "step": 19838 + }, + { + "epoch": 0.397147361309211, + "grad_norm": 1.0188924074172974, + "learning_rate": 6.862461150439992e-06, + "loss": 0.325, + "step": 19839 + }, + { + "epoch": 0.3971673798263394, + "grad_norm": 1.0821425914764404, + "learning_rate": 6.862160293250711e-06, + "loss": 0.3527, + "step": 19840 + }, + { + "epoch": 0.3971873983434677, + "grad_norm": 1.3526396751403809, + "learning_rate": 6.8618594282333086e-06, + "loss": 0.3362, + "step": 19841 + }, + { + "epoch": 0.3972074168605961, + "grad_norm": 1.0852161645889282, + "learning_rate": 6.861558555389052e-06, + "loss": 0.3181, + "step": 19842 + }, + { + "epoch": 0.39722743537772437, + "grad_norm": 1.942855954170227, + "learning_rate": 6.8612576747192015e-06, + "loss": 0.7675, + "step": 19843 + }, + { + "epoch": 0.3972474538948527, + "grad_norm": 1.2211834192276, + "learning_rate": 6.860956786225027e-06, + "loss": 0.3262, + "step": 19844 + }, + { + "epoch": 0.39726747241198107, + "grad_norm": 1.9725271463394165, + "learning_rate": 6.86065588990779e-06, + "loss": 0.8086, + "step": 19845 + }, + { + "epoch": 0.3972874909291094, + "grad_norm": 1.0553234815597534, + "learning_rate": 6.860354985768754e-06, + "loss": 0.3288, + "step": 19846 + }, + { + "epoch": 0.39730750944623777, + "grad_norm": 1.0709445476531982, + "learning_rate": 6.860054073809187e-06, + "loss": 0.2941, + "step": 19847 + }, + { + "epoch": 0.3973275279633661, + "grad_norm": 1.1380950212478638, + "learning_rate": 6.859753154030353e-06, + "loss": 0.3252, + "step": 19848 + }, + { + "epoch": 0.3973475464804945, + "grad_norm": 1.1932991743087769, + "learning_rate": 6.8594522264335185e-06, + "loss": 0.3064, + "step": 19849 + }, + { + "epoch": 0.3973675649976228, + "grad_norm": 1.2308889627456665, + "learning_rate": 6.859151291019944e-06, + "loss": 0.3462, + "step": 19850 + }, + { + "epoch": 0.3973875835147511, + "grad_norm": 1.184731364250183, + "learning_rate": 6.858850347790901e-06, + "loss": 0.3146, + "step": 19851 + }, + { + "epoch": 0.39740760203187947, + "grad_norm": 1.036783218383789, + "learning_rate": 6.858549396747648e-06, + "loss": 0.3211, + "step": 19852 + }, + { + "epoch": 0.3974276205490078, + "grad_norm": 1.0238133668899536, + "learning_rate": 6.858248437891455e-06, + "loss": 0.3221, + "step": 19853 + }, + { + "epoch": 0.39744763906613617, + "grad_norm": 1.8104069232940674, + "learning_rate": 6.857947471223585e-06, + "loss": 0.7732, + "step": 19854 + }, + { + "epoch": 0.3974676575832645, + "grad_norm": 2.014636516571045, + "learning_rate": 6.857646496745304e-06, + "loss": 0.7797, + "step": 19855 + }, + { + "epoch": 0.3974876761003929, + "grad_norm": 1.3110682964324951, + "learning_rate": 6.857345514457877e-06, + "loss": 0.3016, + "step": 19856 + }, + { + "epoch": 0.3975076946175212, + "grad_norm": 1.0909234285354614, + "learning_rate": 6.857044524362568e-06, + "loss": 0.2941, + "step": 19857 + }, + { + "epoch": 0.3975277131346496, + "grad_norm": 1.100057601928711, + "learning_rate": 6.856743526460645e-06, + "loss": 0.3017, + "step": 19858 + }, + { + "epoch": 0.39754773165177787, + "grad_norm": 1.0877610445022583, + "learning_rate": 6.85644252075337e-06, + "loss": 0.3055, + "step": 19859 + }, + { + "epoch": 0.3975677501689062, + "grad_norm": 1.0743297338485718, + "learning_rate": 6.856141507242012e-06, + "loss": 0.3721, + "step": 19860 + }, + { + "epoch": 0.39758776868603457, + "grad_norm": 1.0569400787353516, + "learning_rate": 6.855840485927834e-06, + "loss": 0.2761, + "step": 19861 + }, + { + "epoch": 0.3976077872031629, + "grad_norm": 1.1480422019958496, + "learning_rate": 6.855539456812101e-06, + "loss": 0.3577, + "step": 19862 + }, + { + "epoch": 0.39762780572029127, + "grad_norm": 1.0514825582504272, + "learning_rate": 6.855238419896081e-06, + "loss": 0.2516, + "step": 19863 + }, + { + "epoch": 0.3976478242374196, + "grad_norm": 1.1193888187408447, + "learning_rate": 6.854937375181038e-06, + "loss": 0.2748, + "step": 19864 + }, + { + "epoch": 0.397667842754548, + "grad_norm": 1.1179234981536865, + "learning_rate": 6.854636322668235e-06, + "loss": 0.3243, + "step": 19865 + }, + { + "epoch": 0.3976878612716763, + "grad_norm": 1.159044861793518, + "learning_rate": 6.854335262358942e-06, + "loss": 0.3464, + "step": 19866 + }, + { + "epoch": 0.3977078797888046, + "grad_norm": 1.1220608949661255, + "learning_rate": 6.854034194254423e-06, + "loss": 0.3462, + "step": 19867 + }, + { + "epoch": 0.39772789830593297, + "grad_norm": 1.2283438444137573, + "learning_rate": 6.853733118355942e-06, + "loss": 0.3752, + "step": 19868 + }, + { + "epoch": 0.3977479168230613, + "grad_norm": 1.9398733377456665, + "learning_rate": 6.853432034664768e-06, + "loss": 0.8742, + "step": 19869 + }, + { + "epoch": 0.39776793534018967, + "grad_norm": 1.160434603691101, + "learning_rate": 6.853130943182163e-06, + "loss": 0.3159, + "step": 19870 + }, + { + "epoch": 0.397787953857318, + "grad_norm": 1.167110800743103, + "learning_rate": 6.852829843909395e-06, + "loss": 0.3465, + "step": 19871 + }, + { + "epoch": 0.3978079723744464, + "grad_norm": 1.1745961904525757, + "learning_rate": 6.852528736847726e-06, + "loss": 0.3089, + "step": 19872 + }, + { + "epoch": 0.3978279908915747, + "grad_norm": 1.1221460103988647, + "learning_rate": 6.852227621998428e-06, + "loss": 0.2515, + "step": 19873 + }, + { + "epoch": 0.3978480094087031, + "grad_norm": 1.0646824836730957, + "learning_rate": 6.851926499362762e-06, + "loss": 0.3353, + "step": 19874 + }, + { + "epoch": 0.39786802792583137, + "grad_norm": 1.1250649690628052, + "learning_rate": 6.8516253689419975e-06, + "loss": 0.3221, + "step": 19875 + }, + { + "epoch": 0.3978880464429597, + "grad_norm": 1.1096621751785278, + "learning_rate": 6.851324230737397e-06, + "loss": 0.354, + "step": 19876 + }, + { + "epoch": 0.39790806496008807, + "grad_norm": 1.2487773895263672, + "learning_rate": 6.851023084750228e-06, + "loss": 0.3381, + "step": 19877 + }, + { + "epoch": 0.3979280834772164, + "grad_norm": 1.0997356176376343, + "learning_rate": 6.850721930981756e-06, + "loss": 0.2957, + "step": 19878 + }, + { + "epoch": 0.39794810199434477, + "grad_norm": 1.1554038524627686, + "learning_rate": 6.850420769433248e-06, + "loss": 0.2784, + "step": 19879 + }, + { + "epoch": 0.3979681205114731, + "grad_norm": 1.1622620820999146, + "learning_rate": 6.85011960010597e-06, + "loss": 0.3001, + "step": 19880 + }, + { + "epoch": 0.3979881390286015, + "grad_norm": 1.0714633464813232, + "learning_rate": 6.8498184230011855e-06, + "loss": 0.2928, + "step": 19881 + }, + { + "epoch": 0.3980081575457298, + "grad_norm": 1.1757272481918335, + "learning_rate": 6.849517238120165e-06, + "loss": 0.2939, + "step": 19882 + }, + { + "epoch": 0.3980281760628581, + "grad_norm": 1.1808823347091675, + "learning_rate": 6.84921604546417e-06, + "loss": 0.3242, + "step": 19883 + }, + { + "epoch": 0.39804819457998647, + "grad_norm": 1.1467067003250122, + "learning_rate": 6.848914845034469e-06, + "loss": 0.3451, + "step": 19884 + }, + { + "epoch": 0.3980682130971148, + "grad_norm": 1.0199956893920898, + "learning_rate": 6.848613636832327e-06, + "loss": 0.3307, + "step": 19885 + }, + { + "epoch": 0.39808823161424317, + "grad_norm": 1.107250452041626, + "learning_rate": 6.848312420859012e-06, + "loss": 0.2696, + "step": 19886 + }, + { + "epoch": 0.3981082501313715, + "grad_norm": 1.2235047817230225, + "learning_rate": 6.8480111971157904e-06, + "loss": 0.3064, + "step": 19887 + }, + { + "epoch": 0.3981282686484999, + "grad_norm": 1.1228132247924805, + "learning_rate": 6.847709965603926e-06, + "loss": 0.2767, + "step": 19888 + }, + { + "epoch": 0.3981482871656282, + "grad_norm": 1.9639354944229126, + "learning_rate": 6.847408726324688e-06, + "loss": 0.8222, + "step": 19889 + }, + { + "epoch": 0.3981683056827566, + "grad_norm": 1.1030889749526978, + "learning_rate": 6.84710747927934e-06, + "loss": 0.3436, + "step": 19890 + }, + { + "epoch": 0.39818832419988487, + "grad_norm": 1.0279594659805298, + "learning_rate": 6.8468062244691495e-06, + "loss": 0.295, + "step": 19891 + }, + { + "epoch": 0.3982083427170132, + "grad_norm": 1.1141103506088257, + "learning_rate": 6.8465049618953825e-06, + "loss": 0.324, + "step": 19892 + }, + { + "epoch": 0.39822836123414157, + "grad_norm": 1.2289682626724243, + "learning_rate": 6.8462036915593074e-06, + "loss": 0.3581, + "step": 19893 + }, + { + "epoch": 0.3982483797512699, + "grad_norm": 1.1184862852096558, + "learning_rate": 6.845902413462189e-06, + "loss": 0.2881, + "step": 19894 + }, + { + "epoch": 0.39826839826839827, + "grad_norm": 2.0701353549957275, + "learning_rate": 6.8456011276052945e-06, + "loss": 0.7901, + "step": 19895 + }, + { + "epoch": 0.3982884167855266, + "grad_norm": 1.1067014932632446, + "learning_rate": 6.84529983398989e-06, + "loss": 0.3577, + "step": 19896 + }, + { + "epoch": 0.398308435302655, + "grad_norm": 1.1587365865707397, + "learning_rate": 6.844998532617242e-06, + "loss": 0.3196, + "step": 19897 + }, + { + "epoch": 0.3983284538197833, + "grad_norm": 1.146838665008545, + "learning_rate": 6.8446972234886165e-06, + "loss": 0.3246, + "step": 19898 + }, + { + "epoch": 0.3983484723369116, + "grad_norm": 1.1051894426345825, + "learning_rate": 6.8443959066052815e-06, + "loss": 0.316, + "step": 19899 + }, + { + "epoch": 0.39836849085403997, + "grad_norm": 1.2102338075637817, + "learning_rate": 6.844094581968503e-06, + "loss": 0.3207, + "step": 19900 + }, + { + "epoch": 0.3983885093711683, + "grad_norm": 1.917504072189331, + "learning_rate": 6.843793249579547e-06, + "loss": 0.8087, + "step": 19901 + }, + { + "epoch": 0.39840852788829667, + "grad_norm": 1.0634924173355103, + "learning_rate": 6.843491909439682e-06, + "loss": 0.3181, + "step": 19902 + }, + { + "epoch": 0.398428546405425, + "grad_norm": 1.1897660493850708, + "learning_rate": 6.843190561550172e-06, + "loss": 0.3161, + "step": 19903 + }, + { + "epoch": 0.39844856492255337, + "grad_norm": 1.1448585987091064, + "learning_rate": 6.842889205912287e-06, + "loss": 0.3387, + "step": 19904 + }, + { + "epoch": 0.3984685834396817, + "grad_norm": 1.1038273572921753, + "learning_rate": 6.842587842527292e-06, + "loss": 0.3063, + "step": 19905 + }, + { + "epoch": 0.3984886019568101, + "grad_norm": 1.1190491914749146, + "learning_rate": 6.842286471396456e-06, + "loss": 0.3403, + "step": 19906 + }, + { + "epoch": 0.39850862047393837, + "grad_norm": 1.1909525394439697, + "learning_rate": 6.841985092521042e-06, + "loss": 0.3827, + "step": 19907 + }, + { + "epoch": 0.3985286389910667, + "grad_norm": 1.1530299186706543, + "learning_rate": 6.8416837059023185e-06, + "loss": 0.3535, + "step": 19908 + }, + { + "epoch": 0.39854865750819507, + "grad_norm": 1.1668676137924194, + "learning_rate": 6.841382311541556e-06, + "loss": 0.362, + "step": 19909 + }, + { + "epoch": 0.3985686760253234, + "grad_norm": 1.138616919517517, + "learning_rate": 6.8410809094400155e-06, + "loss": 0.2602, + "step": 19910 + }, + { + "epoch": 0.39858869454245177, + "grad_norm": 1.1448216438293457, + "learning_rate": 6.840779499598968e-06, + "loss": 0.3647, + "step": 19911 + }, + { + "epoch": 0.3986087130595801, + "grad_norm": 1.0968281030654907, + "learning_rate": 6.840478082019679e-06, + "loss": 0.3365, + "step": 19912 + }, + { + "epoch": 0.3986287315767085, + "grad_norm": 1.1348514556884766, + "learning_rate": 6.840176656703418e-06, + "loss": 0.2937, + "step": 19913 + }, + { + "epoch": 0.3986487500938368, + "grad_norm": 0.9919813275337219, + "learning_rate": 6.839875223651449e-06, + "loss": 0.3059, + "step": 19914 + }, + { + "epoch": 0.3986687686109651, + "grad_norm": 1.087475299835205, + "learning_rate": 6.839573782865041e-06, + "loss": 0.3257, + "step": 19915 + }, + { + "epoch": 0.39868878712809347, + "grad_norm": 1.1056513786315918, + "learning_rate": 6.839272334345461e-06, + "loss": 0.3675, + "step": 19916 + }, + { + "epoch": 0.3987088056452218, + "grad_norm": 1.0494171380996704, + "learning_rate": 6.8389708780939754e-06, + "loss": 0.2949, + "step": 19917 + }, + { + "epoch": 0.39872882416235017, + "grad_norm": 1.2942625284194946, + "learning_rate": 6.838669414111853e-06, + "loss": 0.3633, + "step": 19918 + }, + { + "epoch": 0.3987488426794785, + "grad_norm": 1.0276165008544922, + "learning_rate": 6.838367942400358e-06, + "loss": 0.3397, + "step": 19919 + }, + { + "epoch": 0.39876886119660687, + "grad_norm": 1.1634442806243896, + "learning_rate": 6.838066462960762e-06, + "loss": 0.3369, + "step": 19920 + }, + { + "epoch": 0.3987888797137352, + "grad_norm": 1.1089345216751099, + "learning_rate": 6.8377649757943285e-06, + "loss": 0.3177, + "step": 19921 + }, + { + "epoch": 0.3988088982308636, + "grad_norm": 1.1321799755096436, + "learning_rate": 6.837463480902328e-06, + "loss": 0.3175, + "step": 19922 + }, + { + "epoch": 0.39882891674799187, + "grad_norm": 1.1300036907196045, + "learning_rate": 6.837161978286025e-06, + "loss": 0.2956, + "step": 19923 + }, + { + "epoch": 0.3988489352651202, + "grad_norm": 1.1989766359329224, + "learning_rate": 6.8368604679466885e-06, + "loss": 0.3158, + "step": 19924 + }, + { + "epoch": 0.39886895378224857, + "grad_norm": 1.1266916990280151, + "learning_rate": 6.836558949885586e-06, + "loss": 0.2715, + "step": 19925 + }, + { + "epoch": 0.3988889722993769, + "grad_norm": 1.1432474851608276, + "learning_rate": 6.836257424103986e-06, + "loss": 0.3489, + "step": 19926 + }, + { + "epoch": 0.39890899081650527, + "grad_norm": 1.0744044780731201, + "learning_rate": 6.8359558906031545e-06, + "loss": 0.3141, + "step": 19927 + }, + { + "epoch": 0.3989290093336336, + "grad_norm": 1.0720549821853638, + "learning_rate": 6.83565434938436e-06, + "loss": 0.3367, + "step": 19928 + }, + { + "epoch": 0.398949027850762, + "grad_norm": 1.0588535070419312, + "learning_rate": 6.835352800448868e-06, + "loss": 0.2891, + "step": 19929 + }, + { + "epoch": 0.3989690463678903, + "grad_norm": 1.1167739629745483, + "learning_rate": 6.835051243797948e-06, + "loss": 0.3705, + "step": 19930 + }, + { + "epoch": 0.3989890648850186, + "grad_norm": 1.009742259979248, + "learning_rate": 6.834749679432868e-06, + "loss": 0.3181, + "step": 19931 + }, + { + "epoch": 0.39900908340214697, + "grad_norm": 1.135268211364746, + "learning_rate": 6.834448107354896e-06, + "loss": 0.3237, + "step": 19932 + }, + { + "epoch": 0.3990291019192753, + "grad_norm": 1.1001636981964111, + "learning_rate": 6.834146527565298e-06, + "loss": 0.2922, + "step": 19933 + }, + { + "epoch": 0.39904912043640367, + "grad_norm": 1.1610691547393799, + "learning_rate": 6.833844940065344e-06, + "loss": 0.3905, + "step": 19934 + }, + { + "epoch": 0.399069138953532, + "grad_norm": 0.9790513515472412, + "learning_rate": 6.833543344856299e-06, + "loss": 0.2835, + "step": 19935 + }, + { + "epoch": 0.39908915747066037, + "grad_norm": 1.1318610906600952, + "learning_rate": 6.833241741939434e-06, + "loss": 0.2885, + "step": 19936 + }, + { + "epoch": 0.3991091759877887, + "grad_norm": 1.1896716356277466, + "learning_rate": 6.832940131316013e-06, + "loss": 0.3054, + "step": 19937 + }, + { + "epoch": 0.3991291945049171, + "grad_norm": 1.0714517831802368, + "learning_rate": 6.832638512987309e-06, + "loss": 0.301, + "step": 19938 + }, + { + "epoch": 0.39914921302204537, + "grad_norm": 1.1409912109375, + "learning_rate": 6.832336886954583e-06, + "loss": 0.2883, + "step": 19939 + }, + { + "epoch": 0.3991692315391737, + "grad_norm": 1.108805537223816, + "learning_rate": 6.832035253219111e-06, + "loss": 0.4004, + "step": 19940 + }, + { + "epoch": 0.39918925005630207, + "grad_norm": 1.0614463090896606, + "learning_rate": 6.8317336117821555e-06, + "loss": 0.3316, + "step": 19941 + }, + { + "epoch": 0.3992092685734304, + "grad_norm": 1.2051445245742798, + "learning_rate": 6.831431962644985e-06, + "loss": 0.3824, + "step": 19942 + }, + { + "epoch": 0.39922928709055877, + "grad_norm": 1.1196049451828003, + "learning_rate": 6.83113030580887e-06, + "loss": 0.3168, + "step": 19943 + }, + { + "epoch": 0.3992493056076871, + "grad_norm": 1.1100355386734009, + "learning_rate": 6.830828641275077e-06, + "loss": 0.3313, + "step": 19944 + }, + { + "epoch": 0.3992693241248155, + "grad_norm": 1.2183468341827393, + "learning_rate": 6.830526969044874e-06, + "loss": 0.3259, + "step": 19945 + }, + { + "epoch": 0.3992893426419438, + "grad_norm": 1.1046204566955566, + "learning_rate": 6.830225289119531e-06, + "loss": 0.2691, + "step": 19946 + }, + { + "epoch": 0.3993093611590721, + "grad_norm": 1.0540947914123535, + "learning_rate": 6.829923601500313e-06, + "loss": 0.3438, + "step": 19947 + }, + { + "epoch": 0.39932937967620047, + "grad_norm": 1.9165101051330566, + "learning_rate": 6.829621906188491e-06, + "loss": 0.8355, + "step": 19948 + }, + { + "epoch": 0.3993493981933288, + "grad_norm": 1.0413211584091187, + "learning_rate": 6.829320203185332e-06, + "loss": 0.3348, + "step": 19949 + }, + { + "epoch": 0.39936941671045717, + "grad_norm": 0.97553551197052, + "learning_rate": 6.829018492492103e-06, + "loss": 0.3024, + "step": 19950 + }, + { + "epoch": 0.3993894352275855, + "grad_norm": 1.0562306642532349, + "learning_rate": 6.828716774110077e-06, + "loss": 0.3497, + "step": 19951 + }, + { + "epoch": 0.39940945374471387, + "grad_norm": 0.9899274110794067, + "learning_rate": 6.828415048040518e-06, + "loss": 0.2834, + "step": 19952 + }, + { + "epoch": 0.3994294722618422, + "grad_norm": 1.1378273963928223, + "learning_rate": 6.828113314284694e-06, + "loss": 0.3631, + "step": 19953 + }, + { + "epoch": 0.3994494907789706, + "grad_norm": 1.0506296157836914, + "learning_rate": 6.827811572843876e-06, + "loss": 0.3154, + "step": 19954 + }, + { + "epoch": 0.39946950929609887, + "grad_norm": 1.1492035388946533, + "learning_rate": 6.827509823719332e-06, + "loss": 0.2967, + "step": 19955 + }, + { + "epoch": 0.3994895278132272, + "grad_norm": 1.0757757425308228, + "learning_rate": 6.82720806691233e-06, + "loss": 0.3595, + "step": 19956 + }, + { + "epoch": 0.39950954633035557, + "grad_norm": 1.1138569116592407, + "learning_rate": 6.826906302424137e-06, + "loss": 0.2944, + "step": 19957 + }, + { + "epoch": 0.3995295648474839, + "grad_norm": 1.215192198753357, + "learning_rate": 6.826604530256024e-06, + "loss": 0.3436, + "step": 19958 + }, + { + "epoch": 0.39954958336461227, + "grad_norm": 1.1379178762435913, + "learning_rate": 6.82630275040926e-06, + "loss": 0.286, + "step": 19959 + }, + { + "epoch": 0.3995696018817406, + "grad_norm": 1.0943318605422974, + "learning_rate": 6.8260009628851124e-06, + "loss": 0.2875, + "step": 19960 + }, + { + "epoch": 0.39958962039886897, + "grad_norm": 1.126044511795044, + "learning_rate": 6.825699167684848e-06, + "loss": 0.3624, + "step": 19961 + }, + { + "epoch": 0.3996096389159973, + "grad_norm": 1.119569182395935, + "learning_rate": 6.825397364809737e-06, + "loss": 0.3135, + "step": 19962 + }, + { + "epoch": 0.3996296574331256, + "grad_norm": 1.2070165872573853, + "learning_rate": 6.825095554261049e-06, + "loss": 0.254, + "step": 19963 + }, + { + "epoch": 0.39964967595025397, + "grad_norm": 1.065992832183838, + "learning_rate": 6.8247937360400515e-06, + "loss": 0.2914, + "step": 19964 + }, + { + "epoch": 0.3996696944673823, + "grad_norm": 1.1916754245758057, + "learning_rate": 6.824491910148014e-06, + "loss": 0.3238, + "step": 19965 + }, + { + "epoch": 0.39968971298451067, + "grad_norm": 1.229016661643982, + "learning_rate": 6.824190076586207e-06, + "loss": 0.3493, + "step": 19966 + }, + { + "epoch": 0.399709731501639, + "grad_norm": 1.094419002532959, + "learning_rate": 6.823888235355896e-06, + "loss": 0.3496, + "step": 19967 + }, + { + "epoch": 0.39972975001876737, + "grad_norm": 1.0354197025299072, + "learning_rate": 6.8235863864583504e-06, + "loss": 0.3568, + "step": 19968 + }, + { + "epoch": 0.3997497685358957, + "grad_norm": 1.048214077949524, + "learning_rate": 6.82328452989484e-06, + "loss": 0.2866, + "step": 19969 + }, + { + "epoch": 0.3997697870530241, + "grad_norm": 0.9666504859924316, + "learning_rate": 6.822982665666637e-06, + "loss": 0.3492, + "step": 19970 + }, + { + "epoch": 0.39978980557015237, + "grad_norm": 1.9991686344146729, + "learning_rate": 6.822680793775005e-06, + "loss": 0.7779, + "step": 19971 + }, + { + "epoch": 0.3998098240872807, + "grad_norm": 1.1915440559387207, + "learning_rate": 6.822378914221215e-06, + "loss": 0.3256, + "step": 19972 + }, + { + "epoch": 0.39982984260440907, + "grad_norm": 1.1030006408691406, + "learning_rate": 6.8220770270065375e-06, + "loss": 0.3558, + "step": 19973 + }, + { + "epoch": 0.3998498611215374, + "grad_norm": 1.0420544147491455, + "learning_rate": 6.8217751321322376e-06, + "loss": 0.3011, + "step": 19974 + }, + { + "epoch": 0.39986987963866577, + "grad_norm": 1.0185235738754272, + "learning_rate": 6.821473229599589e-06, + "loss": 0.3081, + "step": 19975 + }, + { + "epoch": 0.3998898981557941, + "grad_norm": 2.011983871459961, + "learning_rate": 6.821171319409858e-06, + "loss": 0.8178, + "step": 19976 + }, + { + "epoch": 0.39990991667292247, + "grad_norm": 1.0703074932098389, + "learning_rate": 6.820869401564316e-06, + "loss": 0.326, + "step": 19977 + }, + { + "epoch": 0.3999299351900508, + "grad_norm": 1.2673050165176392, + "learning_rate": 6.82056747606423e-06, + "loss": 0.3255, + "step": 19978 + }, + { + "epoch": 0.3999499537071791, + "grad_norm": 1.0465995073318481, + "learning_rate": 6.820265542910871e-06, + "loss": 0.2804, + "step": 19979 + }, + { + "epoch": 0.39996997222430747, + "grad_norm": 1.0561379194259644, + "learning_rate": 6.819963602105506e-06, + "loss": 0.3253, + "step": 19980 + }, + { + "epoch": 0.3999899907414358, + "grad_norm": 1.1481058597564697, + "learning_rate": 6.8196616536494064e-06, + "loss": 0.3355, + "step": 19981 + }, + { + "epoch": 0.40001000925856417, + "grad_norm": 1.171406865119934, + "learning_rate": 6.819359697543841e-06, + "loss": 0.3113, + "step": 19982 + }, + { + "epoch": 0.4000300277756925, + "grad_norm": 1.1040737628936768, + "learning_rate": 6.819057733790079e-06, + "loss": 0.3248, + "step": 19983 + }, + { + "epoch": 0.40005004629282087, + "grad_norm": 1.0044080018997192, + "learning_rate": 6.8187557623893904e-06, + "loss": 0.3005, + "step": 19984 + }, + { + "epoch": 0.4000700648099492, + "grad_norm": 1.1518439054489136, + "learning_rate": 6.818453783343042e-06, + "loss": 0.2533, + "step": 19985 + }, + { + "epoch": 0.4000900833270776, + "grad_norm": 1.7566215991973877, + "learning_rate": 6.818151796652308e-06, + "loss": 0.7872, + "step": 19986 + }, + { + "epoch": 0.40011010184420587, + "grad_norm": 2.2178664207458496, + "learning_rate": 6.817849802318452e-06, + "loss": 0.7604, + "step": 19987 + }, + { + "epoch": 0.4001301203613342, + "grad_norm": 1.082005500793457, + "learning_rate": 6.817547800342748e-06, + "loss": 0.3373, + "step": 19988 + }, + { + "epoch": 0.40015013887846257, + "grad_norm": 1.0173979997634888, + "learning_rate": 6.817245790726465e-06, + "loss": 0.3169, + "step": 19989 + }, + { + "epoch": 0.4001701573955909, + "grad_norm": 1.0541859865188599, + "learning_rate": 6.816943773470871e-06, + "loss": 0.3426, + "step": 19990 + }, + { + "epoch": 0.40019017591271927, + "grad_norm": 1.2594703435897827, + "learning_rate": 6.816641748577237e-06, + "loss": 0.2929, + "step": 19991 + }, + { + "epoch": 0.4002101944298476, + "grad_norm": 1.2583231925964355, + "learning_rate": 6.8163397160468315e-06, + "loss": 0.3136, + "step": 19992 + }, + { + "epoch": 0.40023021294697597, + "grad_norm": 1.9704524278640747, + "learning_rate": 6.8160376758809245e-06, + "loss": 0.8984, + "step": 19993 + }, + { + "epoch": 0.4002502314641043, + "grad_norm": 1.2351570129394531, + "learning_rate": 6.815735628080785e-06, + "loss": 0.3271, + "step": 19994 + }, + { + "epoch": 0.4002702499812326, + "grad_norm": 1.091267466545105, + "learning_rate": 6.815433572647685e-06, + "loss": 0.3114, + "step": 19995 + }, + { + "epoch": 0.40029026849836097, + "grad_norm": 1.1828078031539917, + "learning_rate": 6.8151315095828925e-06, + "loss": 0.33, + "step": 19996 + }, + { + "epoch": 0.4003102870154893, + "grad_norm": 1.1673554182052612, + "learning_rate": 6.814829438887679e-06, + "loss": 0.3086, + "step": 19997 + }, + { + "epoch": 0.40033030553261767, + "grad_norm": 1.2902076244354248, + "learning_rate": 6.814527360563312e-06, + "loss": 0.3799, + "step": 19998 + }, + { + "epoch": 0.400350324049746, + "grad_norm": 1.0185341835021973, + "learning_rate": 6.814225274611063e-06, + "loss": 0.3237, + "step": 19999 + }, + { + "epoch": 0.40037034256687437, + "grad_norm": 0.9337800145149231, + "learning_rate": 6.8139231810322e-06, + "loss": 0.2629, + "step": 20000 + }, + { + "epoch": 0.4003903610840027, + "grad_norm": 1.0313712358474731, + "learning_rate": 6.813621079827994e-06, + "loss": 0.3217, + "step": 20001 + }, + { + "epoch": 0.4004103796011311, + "grad_norm": 1.236121416091919, + "learning_rate": 6.813318970999718e-06, + "loss": 0.2834, + "step": 20002 + }, + { + "epoch": 0.40043039811825937, + "grad_norm": 1.1738213300704956, + "learning_rate": 6.813016854548638e-06, + "loss": 0.2863, + "step": 20003 + }, + { + "epoch": 0.4004504166353877, + "grad_norm": 1.1011650562286377, + "learning_rate": 6.812714730476026e-06, + "loss": 0.3766, + "step": 20004 + }, + { + "epoch": 0.40047043515251607, + "grad_norm": 1.123535394668579, + "learning_rate": 6.812412598783149e-06, + "loss": 0.3236, + "step": 20005 + }, + { + "epoch": 0.4004904536696444, + "grad_norm": 1.1161935329437256, + "learning_rate": 6.812110459471282e-06, + "loss": 0.3094, + "step": 20006 + }, + { + "epoch": 0.40051047218677277, + "grad_norm": 1.2453179359436035, + "learning_rate": 6.81180831254169e-06, + "loss": 0.2719, + "step": 20007 + }, + { + "epoch": 0.4005304907039011, + "grad_norm": 1.1036885976791382, + "learning_rate": 6.811506157995648e-06, + "loss": 0.3082, + "step": 20008 + }, + { + "epoch": 0.40055050922102947, + "grad_norm": 1.8122186660766602, + "learning_rate": 6.8112039958344235e-06, + "loss": 0.777, + "step": 20009 + }, + { + "epoch": 0.4005705277381578, + "grad_norm": 1.09885835647583, + "learning_rate": 6.810901826059287e-06, + "loss": 0.3361, + "step": 20010 + }, + { + "epoch": 0.4005905462552861, + "grad_norm": 1.8608779907226562, + "learning_rate": 6.81059964867151e-06, + "loss": 0.8183, + "step": 20011 + }, + { + "epoch": 0.40061056477241447, + "grad_norm": 1.1188671588897705, + "learning_rate": 6.810297463672359e-06, + "loss": 0.359, + "step": 20012 + }, + { + "epoch": 0.4006305832895428, + "grad_norm": 1.1696373224258423, + "learning_rate": 6.809995271063109e-06, + "loss": 0.3701, + "step": 20013 + }, + { + "epoch": 0.40065060180667117, + "grad_norm": 1.1706937551498413, + "learning_rate": 6.809693070845027e-06, + "loss": 0.3396, + "step": 20014 + }, + { + "epoch": 0.4006706203237995, + "grad_norm": 1.1689107418060303, + "learning_rate": 6.809390863019387e-06, + "loss": 0.3339, + "step": 20015 + }, + { + "epoch": 0.40069063884092787, + "grad_norm": 1.0449601411819458, + "learning_rate": 6.809088647587456e-06, + "loss": 0.3119, + "step": 20016 + }, + { + "epoch": 0.4007106573580562, + "grad_norm": 1.0690281391143799, + "learning_rate": 6.808786424550506e-06, + "loss": 0.2859, + "step": 20017 + }, + { + "epoch": 0.40073067587518457, + "grad_norm": 1.8812705278396606, + "learning_rate": 6.8084841939098056e-06, + "loss": 0.7393, + "step": 20018 + }, + { + "epoch": 0.40075069439231287, + "grad_norm": 1.0585482120513916, + "learning_rate": 6.808181955666629e-06, + "loss": 0.2765, + "step": 20019 + }, + { + "epoch": 0.4007707129094412, + "grad_norm": 1.0585157871246338, + "learning_rate": 6.8078797098222425e-06, + "loss": 0.3157, + "step": 20020 + }, + { + "epoch": 0.40079073142656957, + "grad_norm": 1.0370502471923828, + "learning_rate": 6.807577456377921e-06, + "loss": 0.3136, + "step": 20021 + }, + { + "epoch": 0.4008107499436979, + "grad_norm": 1.2001734972000122, + "learning_rate": 6.807275195334933e-06, + "loss": 0.3526, + "step": 20022 + }, + { + "epoch": 0.40083076846082627, + "grad_norm": 1.1693305969238281, + "learning_rate": 6.806972926694546e-06, + "loss": 0.3491, + "step": 20023 + }, + { + "epoch": 0.4008507869779546, + "grad_norm": 1.1437911987304688, + "learning_rate": 6.806670650458036e-06, + "loss": 0.331, + "step": 20024 + }, + { + "epoch": 0.40087080549508297, + "grad_norm": 1.0471243858337402, + "learning_rate": 6.80636836662667e-06, + "loss": 0.3084, + "step": 20025 + }, + { + "epoch": 0.4008908240122113, + "grad_norm": 1.1218518018722534, + "learning_rate": 6.8060660752017215e-06, + "loss": 0.3183, + "step": 20026 + }, + { + "epoch": 0.4009108425293396, + "grad_norm": 1.3520230054855347, + "learning_rate": 6.805763776184458e-06, + "loss": 0.3268, + "step": 20027 + }, + { + "epoch": 0.40093086104646797, + "grad_norm": 1.0471633672714233, + "learning_rate": 6.8054614695761546e-06, + "loss": 0.32, + "step": 20028 + }, + { + "epoch": 0.4009508795635963, + "grad_norm": 0.9750092029571533, + "learning_rate": 6.805159155378078e-06, + "loss": 0.2589, + "step": 20029 + }, + { + "epoch": 0.40097089808072467, + "grad_norm": 1.1142809391021729, + "learning_rate": 6.804856833591502e-06, + "loss": 0.3437, + "step": 20030 + }, + { + "epoch": 0.400990916597853, + "grad_norm": 1.835602045059204, + "learning_rate": 6.804554504217695e-06, + "loss": 0.7381, + "step": 20031 + }, + { + "epoch": 0.40101093511498137, + "grad_norm": 1.0538418292999268, + "learning_rate": 6.80425216725793e-06, + "loss": 0.2757, + "step": 20032 + }, + { + "epoch": 0.4010309536321097, + "grad_norm": 1.1571881771087646, + "learning_rate": 6.803949822713478e-06, + "loss": 0.3014, + "step": 20033 + }, + { + "epoch": 0.40105097214923807, + "grad_norm": 1.8822194337844849, + "learning_rate": 6.803647470585608e-06, + "loss": 0.7562, + "step": 20034 + }, + { + "epoch": 0.40107099066636637, + "grad_norm": 1.3115509748458862, + "learning_rate": 6.8033451108755925e-06, + "loss": 0.327, + "step": 20035 + }, + { + "epoch": 0.4010910091834947, + "grad_norm": 0.9763419032096863, + "learning_rate": 6.803042743584702e-06, + "loss": 0.2815, + "step": 20036 + }, + { + "epoch": 0.40111102770062307, + "grad_norm": 1.1555989980697632, + "learning_rate": 6.802740368714209e-06, + "loss": 0.3617, + "step": 20037 + }, + { + "epoch": 0.4011310462177514, + "grad_norm": 1.225885033607483, + "learning_rate": 6.802437986265382e-06, + "loss": 0.3523, + "step": 20038 + }, + { + "epoch": 0.40115106473487977, + "grad_norm": 1.085558295249939, + "learning_rate": 6.802135596239493e-06, + "loss": 0.3662, + "step": 20039 + }, + { + "epoch": 0.4011710832520081, + "grad_norm": 1.058727741241455, + "learning_rate": 6.801833198637815e-06, + "loss": 0.3084, + "step": 20040 + }, + { + "epoch": 0.40119110176913647, + "grad_norm": 1.087077260017395, + "learning_rate": 6.801530793461618e-06, + "loss": 0.3567, + "step": 20041 + }, + { + "epoch": 0.4012111202862648, + "grad_norm": 1.0798625946044922, + "learning_rate": 6.801228380712173e-06, + "loss": 0.3212, + "step": 20042 + }, + { + "epoch": 0.4012311388033931, + "grad_norm": 1.0602620840072632, + "learning_rate": 6.800925960390752e-06, + "loss": 0.2707, + "step": 20043 + }, + { + "epoch": 0.40125115732052147, + "grad_norm": 1.1078524589538574, + "learning_rate": 6.800623532498626e-06, + "loss": 0.3223, + "step": 20044 + }, + { + "epoch": 0.4012711758376498, + "grad_norm": 1.1366664171218872, + "learning_rate": 6.800321097037063e-06, + "loss": 0.3089, + "step": 20045 + }, + { + "epoch": 0.40129119435477817, + "grad_norm": 1.0734381675720215, + "learning_rate": 6.800018654007342e-06, + "loss": 0.3264, + "step": 20046 + }, + { + "epoch": 0.4013112128719065, + "grad_norm": 1.1422914266586304, + "learning_rate": 6.799716203410727e-06, + "loss": 0.3358, + "step": 20047 + }, + { + "epoch": 0.40133123138903487, + "grad_norm": 1.1851246356964111, + "learning_rate": 6.799413745248494e-06, + "loss": 0.3289, + "step": 20048 + }, + { + "epoch": 0.4013512499061632, + "grad_norm": 1.0903549194335938, + "learning_rate": 6.799111279521913e-06, + "loss": 0.2887, + "step": 20049 + }, + { + "epoch": 0.40137126842329157, + "grad_norm": 1.9507962465286255, + "learning_rate": 6.798808806232255e-06, + "loss": 0.8848, + "step": 20050 + }, + { + "epoch": 0.40139128694041987, + "grad_norm": 1.067079782485962, + "learning_rate": 6.798506325380791e-06, + "loss": 0.3168, + "step": 20051 + }, + { + "epoch": 0.4014113054575482, + "grad_norm": 1.0974375009536743, + "learning_rate": 6.7982038369687934e-06, + "loss": 0.3043, + "step": 20052 + }, + { + "epoch": 0.40143132397467657, + "grad_norm": 0.9688325524330139, + "learning_rate": 6.7979013409975346e-06, + "loss": 0.3024, + "step": 20053 + }, + { + "epoch": 0.4014513424918049, + "grad_norm": 1.1451843976974487, + "learning_rate": 6.7975988374682846e-06, + "loss": 0.3227, + "step": 20054 + }, + { + "epoch": 0.40147136100893327, + "grad_norm": 1.204134464263916, + "learning_rate": 6.797296326382317e-06, + "loss": 0.3228, + "step": 20055 + }, + { + "epoch": 0.4014913795260616, + "grad_norm": 1.0895421504974365, + "learning_rate": 6.796993807740903e-06, + "loss": 0.3104, + "step": 20056 + }, + { + "epoch": 0.40151139804318997, + "grad_norm": 1.33590567111969, + "learning_rate": 6.796691281545313e-06, + "loss": 0.3046, + "step": 20057 + }, + { + "epoch": 0.4015314165603183, + "grad_norm": 1.0144106149673462, + "learning_rate": 6.796388747796817e-06, + "loss": 0.2439, + "step": 20058 + }, + { + "epoch": 0.4015514350774466, + "grad_norm": 1.8948878049850464, + "learning_rate": 6.796086206496693e-06, + "loss": 0.8027, + "step": 20059 + }, + { + "epoch": 0.40157145359457497, + "grad_norm": 1.0844651460647583, + "learning_rate": 6.795783657646206e-06, + "loss": 0.3839, + "step": 20060 + }, + { + "epoch": 0.4015914721117033, + "grad_norm": 1.1369900703430176, + "learning_rate": 6.795481101246635e-06, + "loss": 0.2756, + "step": 20061 + }, + { + "epoch": 0.40161149062883167, + "grad_norm": 1.1602509021759033, + "learning_rate": 6.795178537299244e-06, + "loss": 0.3413, + "step": 20062 + }, + { + "epoch": 0.40163150914596, + "grad_norm": 1.1954424381256104, + "learning_rate": 6.7948759658053105e-06, + "loss": 0.3274, + "step": 20063 + }, + { + "epoch": 0.40165152766308837, + "grad_norm": 1.0300564765930176, + "learning_rate": 6.794573386766105e-06, + "loss": 0.319, + "step": 20064 + }, + { + "epoch": 0.4016715461802167, + "grad_norm": 1.0927468538284302, + "learning_rate": 6.794270800182898e-06, + "loss": 0.3131, + "step": 20065 + }, + { + "epoch": 0.40169156469734507, + "grad_norm": 1.23080313205719, + "learning_rate": 6.793968206056964e-06, + "loss": 0.3138, + "step": 20066 + }, + { + "epoch": 0.40171158321447337, + "grad_norm": 1.0674123764038086, + "learning_rate": 6.7936656043895725e-06, + "loss": 0.3006, + "step": 20067 + }, + { + "epoch": 0.4017316017316017, + "grad_norm": 1.241860270500183, + "learning_rate": 6.793362995181998e-06, + "loss": 0.3104, + "step": 20068 + }, + { + "epoch": 0.40175162024873007, + "grad_norm": 1.1151217222213745, + "learning_rate": 6.79306037843551e-06, + "loss": 0.3073, + "step": 20069 + }, + { + "epoch": 0.4017716387658584, + "grad_norm": 1.7048077583312988, + "learning_rate": 6.7927577541513845e-06, + "loss": 0.7545, + "step": 20070 + }, + { + "epoch": 0.40179165728298677, + "grad_norm": 1.357069969177246, + "learning_rate": 6.792455122330888e-06, + "loss": 0.2606, + "step": 20071 + }, + { + "epoch": 0.4018116758001151, + "grad_norm": 1.1001204252243042, + "learning_rate": 6.792152482975297e-06, + "loss": 0.3532, + "step": 20072 + }, + { + "epoch": 0.40183169431724347, + "grad_norm": 1.0743498802185059, + "learning_rate": 6.791849836085883e-06, + "loss": 0.3334, + "step": 20073 + }, + { + "epoch": 0.4018517128343718, + "grad_norm": 1.2594271898269653, + "learning_rate": 6.791547181663919e-06, + "loss": 0.326, + "step": 20074 + }, + { + "epoch": 0.4018717313515001, + "grad_norm": 1.0596466064453125, + "learning_rate": 6.791244519710676e-06, + "loss": 0.3231, + "step": 20075 + }, + { + "epoch": 0.40189174986862847, + "grad_norm": 1.0412466526031494, + "learning_rate": 6.790941850227425e-06, + "loss": 0.3181, + "step": 20076 + }, + { + "epoch": 0.4019117683857568, + "grad_norm": 1.0249576568603516, + "learning_rate": 6.79063917321544e-06, + "loss": 0.2906, + "step": 20077 + }, + { + "epoch": 0.40193178690288517, + "grad_norm": 1.1745752096176147, + "learning_rate": 6.790336488675993e-06, + "loss": 0.2793, + "step": 20078 + }, + { + "epoch": 0.4019518054200135, + "grad_norm": 1.166029691696167, + "learning_rate": 6.790033796610358e-06, + "loss": 0.3327, + "step": 20079 + }, + { + "epoch": 0.40197182393714187, + "grad_norm": 1.1781963109970093, + "learning_rate": 6.789731097019805e-06, + "loss": 0.2863, + "step": 20080 + }, + { + "epoch": 0.4019918424542702, + "grad_norm": 1.058457851409912, + "learning_rate": 6.789428389905609e-06, + "loss": 0.3157, + "step": 20081 + }, + { + "epoch": 0.40201186097139857, + "grad_norm": 1.0658875703811646, + "learning_rate": 6.7891256752690405e-06, + "loss": 0.2851, + "step": 20082 + }, + { + "epoch": 0.40203187948852687, + "grad_norm": 1.081756830215454, + "learning_rate": 6.788822953111374e-06, + "loss": 0.3189, + "step": 20083 + }, + { + "epoch": 0.4020518980056552, + "grad_norm": 1.061232089996338, + "learning_rate": 6.788520223433878e-06, + "loss": 0.288, + "step": 20084 + }, + { + "epoch": 0.40207191652278357, + "grad_norm": 1.232908010482788, + "learning_rate": 6.788217486237829e-06, + "loss": 0.301, + "step": 20085 + }, + { + "epoch": 0.4020919350399119, + "grad_norm": 1.1021978855133057, + "learning_rate": 6.7879147415245e-06, + "loss": 0.332, + "step": 20086 + }, + { + "epoch": 0.40211195355704027, + "grad_norm": 1.1486774682998657, + "learning_rate": 6.78761198929516e-06, + "loss": 0.2844, + "step": 20087 + }, + { + "epoch": 0.4021319720741686, + "grad_norm": 1.0212384462356567, + "learning_rate": 6.7873092295510844e-06, + "loss": 0.3373, + "step": 20088 + }, + { + "epoch": 0.40215199059129697, + "grad_norm": 1.094058632850647, + "learning_rate": 6.787006462293546e-06, + "loss": 0.3217, + "step": 20089 + }, + { + "epoch": 0.40217200910842527, + "grad_norm": 1.2358191013336182, + "learning_rate": 6.786703687523817e-06, + "loss": 0.2979, + "step": 20090 + }, + { + "epoch": 0.4021920276255536, + "grad_norm": 1.002882480621338, + "learning_rate": 6.786400905243169e-06, + "loss": 0.2403, + "step": 20091 + }, + { + "epoch": 0.40221204614268197, + "grad_norm": 1.0887409448623657, + "learning_rate": 6.786098115452877e-06, + "loss": 0.3045, + "step": 20092 + }, + { + "epoch": 0.4022320646598103, + "grad_norm": 1.0627740621566772, + "learning_rate": 6.785795318154214e-06, + "loss": 0.3213, + "step": 20093 + }, + { + "epoch": 0.40225208317693867, + "grad_norm": 1.074700951576233, + "learning_rate": 6.785492513348451e-06, + "loss": 0.292, + "step": 20094 + }, + { + "epoch": 0.402272101694067, + "grad_norm": 1.1472349166870117, + "learning_rate": 6.785189701036862e-06, + "loss": 0.3506, + "step": 20095 + }, + { + "epoch": 0.40229212021119537, + "grad_norm": 1.085155725479126, + "learning_rate": 6.784886881220718e-06, + "loss": 0.3098, + "step": 20096 + }, + { + "epoch": 0.4023121387283237, + "grad_norm": 1.0288777351379395, + "learning_rate": 6.7845840539012954e-06, + "loss": 0.2655, + "step": 20097 + }, + { + "epoch": 0.402332157245452, + "grad_norm": 1.0880290269851685, + "learning_rate": 6.784281219079864e-06, + "loss": 0.3031, + "step": 20098 + }, + { + "epoch": 0.40235217576258037, + "grad_norm": 1.8587000370025635, + "learning_rate": 6.783978376757701e-06, + "loss": 0.7776, + "step": 20099 + }, + { + "epoch": 0.4023721942797087, + "grad_norm": 1.9364724159240723, + "learning_rate": 6.783675526936073e-06, + "loss": 0.8356, + "step": 20100 + }, + { + "epoch": 0.40239221279683707, + "grad_norm": 1.081954002380371, + "learning_rate": 6.7833726696162605e-06, + "loss": 0.3036, + "step": 20101 + }, + { + "epoch": 0.4024122313139654, + "grad_norm": 1.1808502674102783, + "learning_rate": 6.783069804799531e-06, + "loss": 0.3094, + "step": 20102 + }, + { + "epoch": 0.40243224983109377, + "grad_norm": 1.1223286390304565, + "learning_rate": 6.7827669324871596e-06, + "loss": 0.2912, + "step": 20103 + }, + { + "epoch": 0.4024522683482221, + "grad_norm": 1.1044856309890747, + "learning_rate": 6.782464052680419e-06, + "loss": 0.3608, + "step": 20104 + }, + { + "epoch": 0.40247228686535047, + "grad_norm": 1.2716763019561768, + "learning_rate": 6.782161165380586e-06, + "loss": 0.3168, + "step": 20105 + }, + { + "epoch": 0.40249230538247877, + "grad_norm": 1.038558006286621, + "learning_rate": 6.78185827058893e-06, + "loss": 0.2683, + "step": 20106 + }, + { + "epoch": 0.4025123238996071, + "grad_norm": 1.1061015129089355, + "learning_rate": 6.7815553683067245e-06, + "loss": 0.3381, + "step": 20107 + }, + { + "epoch": 0.40253234241673547, + "grad_norm": 1.1897962093353271, + "learning_rate": 6.781252458535244e-06, + "loss": 0.3694, + "step": 20108 + }, + { + "epoch": 0.4025523609338638, + "grad_norm": 1.066164493560791, + "learning_rate": 6.780949541275762e-06, + "loss": 0.3243, + "step": 20109 + }, + { + "epoch": 0.40257237945099217, + "grad_norm": 1.0789393186569214, + "learning_rate": 6.78064661652955e-06, + "loss": 0.3167, + "step": 20110 + }, + { + "epoch": 0.4025923979681205, + "grad_norm": 1.1779569387435913, + "learning_rate": 6.7803436842978835e-06, + "loss": 0.3341, + "step": 20111 + }, + { + "epoch": 0.40261241648524887, + "grad_norm": 1.0998564958572388, + "learning_rate": 6.780040744582036e-06, + "loss": 0.3306, + "step": 20112 + }, + { + "epoch": 0.4026324350023772, + "grad_norm": 1.1108051538467407, + "learning_rate": 6.779737797383279e-06, + "loss": 0.2968, + "step": 20113 + }, + { + "epoch": 0.4026524535195055, + "grad_norm": 1.1029291152954102, + "learning_rate": 6.779434842702889e-06, + "loss": 0.3005, + "step": 20114 + }, + { + "epoch": 0.40267247203663387, + "grad_norm": 1.1182341575622559, + "learning_rate": 6.7791318805421354e-06, + "loss": 0.3806, + "step": 20115 + }, + { + "epoch": 0.4026924905537622, + "grad_norm": 1.3395049571990967, + "learning_rate": 6.778828910902296e-06, + "loss": 0.3097, + "step": 20116 + }, + { + "epoch": 0.40271250907089057, + "grad_norm": 1.101199746131897, + "learning_rate": 6.778525933784643e-06, + "loss": 0.2982, + "step": 20117 + }, + { + "epoch": 0.4027325275880189, + "grad_norm": 1.0324383974075317, + "learning_rate": 6.778222949190449e-06, + "loss": 0.3047, + "step": 20118 + }, + { + "epoch": 0.40275254610514727, + "grad_norm": 1.1142404079437256, + "learning_rate": 6.777919957120989e-06, + "loss": 0.2899, + "step": 20119 + }, + { + "epoch": 0.4027725646222756, + "grad_norm": 1.105521321296692, + "learning_rate": 6.7776169575775356e-06, + "loss": 0.3515, + "step": 20120 + }, + { + "epoch": 0.40279258313940397, + "grad_norm": 1.0272352695465088, + "learning_rate": 6.777313950561364e-06, + "loss": 0.3271, + "step": 20121 + }, + { + "epoch": 0.40281260165653227, + "grad_norm": 1.016469120979309, + "learning_rate": 6.777010936073745e-06, + "loss": 0.3043, + "step": 20122 + }, + { + "epoch": 0.4028326201736606, + "grad_norm": 0.9923204779624939, + "learning_rate": 6.776707914115955e-06, + "loss": 0.284, + "step": 20123 + }, + { + "epoch": 0.40285263869078897, + "grad_norm": 1.7830814123153687, + "learning_rate": 6.7764048846892675e-06, + "loss": 0.8225, + "step": 20124 + }, + { + "epoch": 0.4028726572079173, + "grad_norm": 1.0683605670928955, + "learning_rate": 6.776101847794957e-06, + "loss": 0.3168, + "step": 20125 + }, + { + "epoch": 0.40289267572504567, + "grad_norm": 1.3777329921722412, + "learning_rate": 6.775798803434297e-06, + "loss": 0.303, + "step": 20126 + }, + { + "epoch": 0.402912694242174, + "grad_norm": 1.043027639389038, + "learning_rate": 6.775495751608559e-06, + "loss": 0.2993, + "step": 20127 + }, + { + "epoch": 0.40293271275930237, + "grad_norm": 1.8105133771896362, + "learning_rate": 6.775192692319021e-06, + "loss": 0.8061, + "step": 20128 + }, + { + "epoch": 0.4029527312764307, + "grad_norm": 1.144065499305725, + "learning_rate": 6.774889625566953e-06, + "loss": 0.2793, + "step": 20129 + }, + { + "epoch": 0.402972749793559, + "grad_norm": 1.2850096225738525, + "learning_rate": 6.774586551353632e-06, + "loss": 0.2743, + "step": 20130 + }, + { + "epoch": 0.40299276831068737, + "grad_norm": 1.0176945924758911, + "learning_rate": 6.774283469680331e-06, + "loss": 0.2905, + "step": 20131 + }, + { + "epoch": 0.4030127868278157, + "grad_norm": 1.2330551147460938, + "learning_rate": 6.773980380548324e-06, + "loss": 0.3973, + "step": 20132 + }, + { + "epoch": 0.40303280534494407, + "grad_norm": 1.059989333152771, + "learning_rate": 6.773677283958885e-06, + "loss": 0.3013, + "step": 20133 + }, + { + "epoch": 0.4030528238620724, + "grad_norm": 1.0611392259597778, + "learning_rate": 6.773374179913289e-06, + "loss": 0.3193, + "step": 20134 + }, + { + "epoch": 0.40307284237920077, + "grad_norm": 1.2367138862609863, + "learning_rate": 6.773071068412808e-06, + "loss": 0.3599, + "step": 20135 + }, + { + "epoch": 0.4030928608963291, + "grad_norm": 1.0271645784378052, + "learning_rate": 6.7727679494587194e-06, + "loss": 0.3591, + "step": 20136 + }, + { + "epoch": 0.40311287941345747, + "grad_norm": 1.73822820186615, + "learning_rate": 6.772464823052296e-06, + "loss": 0.7904, + "step": 20137 + }, + { + "epoch": 0.40313289793058577, + "grad_norm": 1.055800199508667, + "learning_rate": 6.772161689194812e-06, + "loss": 0.3371, + "step": 20138 + }, + { + "epoch": 0.4031529164477141, + "grad_norm": 1.1133995056152344, + "learning_rate": 6.771858547887541e-06, + "loss": 0.3492, + "step": 20139 + }, + { + "epoch": 0.40317293496484247, + "grad_norm": 1.7902618646621704, + "learning_rate": 6.771555399131757e-06, + "loss": 0.7869, + "step": 20140 + }, + { + "epoch": 0.4031929534819708, + "grad_norm": 1.2219854593276978, + "learning_rate": 6.771252242928736e-06, + "loss": 0.3129, + "step": 20141 + }, + { + "epoch": 0.40321297199909917, + "grad_norm": 1.1743273735046387, + "learning_rate": 6.770949079279753e-06, + "loss": 0.3193, + "step": 20142 + }, + { + "epoch": 0.4032329905162275, + "grad_norm": 0.9622101783752441, + "learning_rate": 6.7706459081860805e-06, + "loss": 0.3094, + "step": 20143 + }, + { + "epoch": 0.40325300903335587, + "grad_norm": 1.1451534032821655, + "learning_rate": 6.770342729648993e-06, + "loss": 0.3077, + "step": 20144 + }, + { + "epoch": 0.4032730275504842, + "grad_norm": 1.5612035989761353, + "learning_rate": 6.770039543669766e-06, + "loss": 0.3487, + "step": 20145 + }, + { + "epoch": 0.4032930460676125, + "grad_norm": 1.1916719675064087, + "learning_rate": 6.769736350249674e-06, + "loss": 0.3386, + "step": 20146 + }, + { + "epoch": 0.40331306458474087, + "grad_norm": 1.0782990455627441, + "learning_rate": 6.769433149389992e-06, + "loss": 0.3546, + "step": 20147 + }, + { + "epoch": 0.4033330831018692, + "grad_norm": 1.1072628498077393, + "learning_rate": 6.769129941091992e-06, + "loss": 0.2965, + "step": 20148 + }, + { + "epoch": 0.40335310161899757, + "grad_norm": 1.1802881956100464, + "learning_rate": 6.768826725356952e-06, + "loss": 0.2916, + "step": 20149 + }, + { + "epoch": 0.4033731201361259, + "grad_norm": 1.152841329574585, + "learning_rate": 6.768523502186144e-06, + "loss": 0.3522, + "step": 20150 + }, + { + "epoch": 0.40339313865325427, + "grad_norm": 1.1120487451553345, + "learning_rate": 6.768220271580845e-06, + "loss": 0.3025, + "step": 20151 + }, + { + "epoch": 0.4034131571703826, + "grad_norm": 1.04314124584198, + "learning_rate": 6.7679170335423275e-06, + "loss": 0.3078, + "step": 20152 + }, + { + "epoch": 0.40343317568751097, + "grad_norm": 1.0244386196136475, + "learning_rate": 6.767613788071867e-06, + "loss": 0.3261, + "step": 20153 + }, + { + "epoch": 0.40345319420463926, + "grad_norm": 1.4029911756515503, + "learning_rate": 6.767310535170738e-06, + "loss": 0.3639, + "step": 20154 + }, + { + "epoch": 0.4034732127217676, + "grad_norm": 1.2894929647445679, + "learning_rate": 6.767007274840217e-06, + "loss": 0.3533, + "step": 20155 + }, + { + "epoch": 0.40349323123889597, + "grad_norm": 1.0528255701065063, + "learning_rate": 6.766704007081578e-06, + "loss": 0.3408, + "step": 20156 + }, + { + "epoch": 0.4035132497560243, + "grad_norm": 1.011368989944458, + "learning_rate": 6.766400731896096e-06, + "loss": 0.3265, + "step": 20157 + }, + { + "epoch": 0.40353326827315267, + "grad_norm": 1.3347392082214355, + "learning_rate": 6.766097449285044e-06, + "loss": 0.3261, + "step": 20158 + }, + { + "epoch": 0.403553286790281, + "grad_norm": 1.1120736598968506, + "learning_rate": 6.765794159249699e-06, + "loss": 0.3718, + "step": 20159 + }, + { + "epoch": 0.40357330530740937, + "grad_norm": 1.8110777139663696, + "learning_rate": 6.765490861791335e-06, + "loss": 0.8285, + "step": 20160 + }, + { + "epoch": 0.4035933238245377, + "grad_norm": 1.3113691806793213, + "learning_rate": 6.765187556911227e-06, + "loss": 0.3314, + "step": 20161 + }, + { + "epoch": 0.403613342341666, + "grad_norm": 1.252293586730957, + "learning_rate": 6.764884244610651e-06, + "loss": 0.2972, + "step": 20162 + }, + { + "epoch": 0.40363336085879437, + "grad_norm": 1.709787368774414, + "learning_rate": 6.764580924890882e-06, + "loss": 0.828, + "step": 20163 + }, + { + "epoch": 0.4036533793759227, + "grad_norm": 1.0562255382537842, + "learning_rate": 6.764277597753192e-06, + "loss": 0.3141, + "step": 20164 + }, + { + "epoch": 0.40367339789305107, + "grad_norm": 1.1033858060836792, + "learning_rate": 6.7639742631988625e-06, + "loss": 0.3178, + "step": 20165 + }, + { + "epoch": 0.4036934164101794, + "grad_norm": 1.1392881870269775, + "learning_rate": 6.763670921229163e-06, + "loss": 0.3239, + "step": 20166 + }, + { + "epoch": 0.40371343492730777, + "grad_norm": 1.0625370740890503, + "learning_rate": 6.76336757184537e-06, + "loss": 0.2928, + "step": 20167 + }, + { + "epoch": 0.4037334534444361, + "grad_norm": 1.0415270328521729, + "learning_rate": 6.76306421504876e-06, + "loss": 0.2903, + "step": 20168 + }, + { + "epoch": 0.40375347196156447, + "grad_norm": 1.2085485458374023, + "learning_rate": 6.7627608508406064e-06, + "loss": 0.3128, + "step": 20169 + }, + { + "epoch": 0.40377349047869276, + "grad_norm": 0.9583704471588135, + "learning_rate": 6.7624574792221866e-06, + "loss": 0.306, + "step": 20170 + }, + { + "epoch": 0.4037935089958211, + "grad_norm": 1.0484267473220825, + "learning_rate": 6.7621541001947746e-06, + "loss": 0.2887, + "step": 20171 + }, + { + "epoch": 0.40381352751294947, + "grad_norm": 1.0678538084030151, + "learning_rate": 6.761850713759645e-06, + "loss": 0.3194, + "step": 20172 + }, + { + "epoch": 0.4038335460300778, + "grad_norm": 1.0491141080856323, + "learning_rate": 6.761547319918076e-06, + "loss": 0.3219, + "step": 20173 + }, + { + "epoch": 0.40385356454720617, + "grad_norm": 1.0946606397628784, + "learning_rate": 6.76124391867134e-06, + "loss": 0.3221, + "step": 20174 + }, + { + "epoch": 0.4038735830643345, + "grad_norm": 1.09459388256073, + "learning_rate": 6.760940510020712e-06, + "loss": 0.27, + "step": 20175 + }, + { + "epoch": 0.40389360158146287, + "grad_norm": 1.2272803783416748, + "learning_rate": 6.7606370939674725e-06, + "loss": 0.2829, + "step": 20176 + }, + { + "epoch": 0.4039136200985912, + "grad_norm": 1.175908088684082, + "learning_rate": 6.760333670512891e-06, + "loss": 0.3537, + "step": 20177 + }, + { + "epoch": 0.4039336386157195, + "grad_norm": 1.0541492700576782, + "learning_rate": 6.760030239658248e-06, + "loss": 0.2916, + "step": 20178 + }, + { + "epoch": 0.40395365713284787, + "grad_norm": 1.1390471458435059, + "learning_rate": 6.759726801404813e-06, + "loss": 0.3087, + "step": 20179 + }, + { + "epoch": 0.4039736756499762, + "grad_norm": 1.230419635772705, + "learning_rate": 6.759423355753868e-06, + "loss": 0.3309, + "step": 20180 + }, + { + "epoch": 0.40399369416710457, + "grad_norm": 1.9109363555908203, + "learning_rate": 6.759119902706685e-06, + "loss": 0.809, + "step": 20181 + }, + { + "epoch": 0.4040137126842329, + "grad_norm": 1.9307066202163696, + "learning_rate": 6.758816442264541e-06, + "loss": 0.7771, + "step": 20182 + }, + { + "epoch": 0.40403373120136127, + "grad_norm": 1.2260311841964722, + "learning_rate": 6.758512974428712e-06, + "loss": 0.3124, + "step": 20183 + }, + { + "epoch": 0.4040537497184896, + "grad_norm": 1.2095345258712769, + "learning_rate": 6.75820949920047e-06, + "loss": 0.2925, + "step": 20184 + }, + { + "epoch": 0.40407376823561797, + "grad_norm": 1.0618815422058105, + "learning_rate": 6.757906016581096e-06, + "loss": 0.3343, + "step": 20185 + }, + { + "epoch": 0.40409378675274626, + "grad_norm": 1.0485953092575073, + "learning_rate": 6.757602526571863e-06, + "loss": 0.362, + "step": 20186 + }, + { + "epoch": 0.4041138052698746, + "grad_norm": 1.0603660345077515, + "learning_rate": 6.757299029174047e-06, + "loss": 0.2988, + "step": 20187 + }, + { + "epoch": 0.40413382378700297, + "grad_norm": 1.1600492000579834, + "learning_rate": 6.756995524388923e-06, + "loss": 0.3066, + "step": 20188 + }, + { + "epoch": 0.4041538423041313, + "grad_norm": 0.9687890410423279, + "learning_rate": 6.756692012217769e-06, + "loss": 0.2697, + "step": 20189 + }, + { + "epoch": 0.40417386082125967, + "grad_norm": 1.1338173151016235, + "learning_rate": 6.756388492661861e-06, + "loss": 0.2755, + "step": 20190 + }, + { + "epoch": 0.404193879338388, + "grad_norm": 1.046234130859375, + "learning_rate": 6.756084965722472e-06, + "loss": 0.3329, + "step": 20191 + }, + { + "epoch": 0.40421389785551637, + "grad_norm": 1.093930959701538, + "learning_rate": 6.755781431400879e-06, + "loss": 0.3299, + "step": 20192 + }, + { + "epoch": 0.4042339163726447, + "grad_norm": 1.0883755683898926, + "learning_rate": 6.7554778896983606e-06, + "loss": 0.3736, + "step": 20193 + }, + { + "epoch": 0.404253934889773, + "grad_norm": 1.080523133277893, + "learning_rate": 6.755174340616191e-06, + "loss": 0.3311, + "step": 20194 + }, + { + "epoch": 0.40427395340690137, + "grad_norm": 1.0087966918945312, + "learning_rate": 6.754870784155645e-06, + "loss": 0.3138, + "step": 20195 + }, + { + "epoch": 0.4042939719240297, + "grad_norm": 1.9990891218185425, + "learning_rate": 6.7545672203180005e-06, + "loss": 0.8615, + "step": 20196 + }, + { + "epoch": 0.40431399044115807, + "grad_norm": 1.0490695238113403, + "learning_rate": 6.754263649104534e-06, + "loss": 0.3224, + "step": 20197 + }, + { + "epoch": 0.4043340089582864, + "grad_norm": 1.1367883682250977, + "learning_rate": 6.753960070516518e-06, + "loss": 0.3347, + "step": 20198 + }, + { + "epoch": 0.40435402747541477, + "grad_norm": 1.2140583992004395, + "learning_rate": 6.753656484555233e-06, + "loss": 0.3698, + "step": 20199 + }, + { + "epoch": 0.4043740459925431, + "grad_norm": 1.1438169479370117, + "learning_rate": 6.753352891221954e-06, + "loss": 0.2991, + "step": 20200 + }, + { + "epoch": 0.40439406450967147, + "grad_norm": 1.0878103971481323, + "learning_rate": 6.753049290517956e-06, + "loss": 0.3227, + "step": 20201 + }, + { + "epoch": 0.40441408302679976, + "grad_norm": 1.1186480522155762, + "learning_rate": 6.752745682444516e-06, + "loss": 0.3489, + "step": 20202 + }, + { + "epoch": 0.4044341015439281, + "grad_norm": 1.2221050262451172, + "learning_rate": 6.752442067002911e-06, + "loss": 0.3758, + "step": 20203 + }, + { + "epoch": 0.40445412006105647, + "grad_norm": 1.005954384803772, + "learning_rate": 6.752138444194416e-06, + "loss": 0.3085, + "step": 20204 + }, + { + "epoch": 0.4044741385781848, + "grad_norm": 1.2970209121704102, + "learning_rate": 6.751834814020307e-06, + "loss": 0.3082, + "step": 20205 + }, + { + "epoch": 0.40449415709531317, + "grad_norm": 0.9867056608200073, + "learning_rate": 6.751531176481863e-06, + "loss": 0.2987, + "step": 20206 + }, + { + "epoch": 0.4045141756124415, + "grad_norm": 1.8820058107376099, + "learning_rate": 6.751227531580359e-06, + "loss": 0.7445, + "step": 20207 + }, + { + "epoch": 0.40453419412956987, + "grad_norm": 1.2104965448379517, + "learning_rate": 6.750923879317071e-06, + "loss": 0.3541, + "step": 20208 + }, + { + "epoch": 0.4045542126466982, + "grad_norm": 1.0882728099822998, + "learning_rate": 6.750620219693276e-06, + "loss": 0.3143, + "step": 20209 + }, + { + "epoch": 0.4045742311638265, + "grad_norm": 0.9838890433311462, + "learning_rate": 6.75031655271025e-06, + "loss": 0.3152, + "step": 20210 + }, + { + "epoch": 0.40459424968095486, + "grad_norm": 1.198917269706726, + "learning_rate": 6.75001287836927e-06, + "loss": 0.3639, + "step": 20211 + }, + { + "epoch": 0.4046142681980832, + "grad_norm": 1.1191633939743042, + "learning_rate": 6.749709196671611e-06, + "loss": 0.3252, + "step": 20212 + }, + { + "epoch": 0.40463428671521157, + "grad_norm": 1.3364065885543823, + "learning_rate": 6.7494055076185525e-06, + "loss": 0.3205, + "step": 20213 + }, + { + "epoch": 0.4046543052323399, + "grad_norm": 1.0645078420639038, + "learning_rate": 6.7491018112113705e-06, + "loss": 0.3012, + "step": 20214 + }, + { + "epoch": 0.40467432374946827, + "grad_norm": 1.1563239097595215, + "learning_rate": 6.7487981074513395e-06, + "loss": 0.2948, + "step": 20215 + }, + { + "epoch": 0.4046943422665966, + "grad_norm": 1.0804457664489746, + "learning_rate": 6.748494396339738e-06, + "loss": 0.322, + "step": 20216 + }, + { + "epoch": 0.40471436078372497, + "grad_norm": 1.1043576002120972, + "learning_rate": 6.748190677877843e-06, + "loss": 0.3283, + "step": 20217 + }, + { + "epoch": 0.40473437930085326, + "grad_norm": 1.0738002061843872, + "learning_rate": 6.747886952066929e-06, + "loss": 0.3001, + "step": 20218 + }, + { + "epoch": 0.4047543978179816, + "grad_norm": 1.139197826385498, + "learning_rate": 6.7475832189082754e-06, + "loss": 0.3766, + "step": 20219 + }, + { + "epoch": 0.40477441633510997, + "grad_norm": 1.8616987466812134, + "learning_rate": 6.747279478403159e-06, + "loss": 0.7921, + "step": 20220 + }, + { + "epoch": 0.4047944348522383, + "grad_norm": 1.0060852766036987, + "learning_rate": 6.746975730552855e-06, + "loss": 0.3012, + "step": 20221 + }, + { + "epoch": 0.40481445336936667, + "grad_norm": 1.9266177415847778, + "learning_rate": 6.74667197535864e-06, + "loss": 0.7805, + "step": 20222 + }, + { + "epoch": 0.404834471886495, + "grad_norm": 1.0597625970840454, + "learning_rate": 6.7463682128217945e-06, + "loss": 0.2977, + "step": 20223 + }, + { + "epoch": 0.40485449040362337, + "grad_norm": 1.2276726961135864, + "learning_rate": 6.74606444294359e-06, + "loss": 0.3251, + "step": 20224 + }, + { + "epoch": 0.4048745089207517, + "grad_norm": 1.3103115558624268, + "learning_rate": 6.745760665725307e-06, + "loss": 0.361, + "step": 20225 + }, + { + "epoch": 0.40489452743788, + "grad_norm": 1.0929948091506958, + "learning_rate": 6.745456881168222e-06, + "loss": 0.2948, + "step": 20226 + }, + { + "epoch": 0.40491454595500836, + "grad_norm": 1.3263081312179565, + "learning_rate": 6.745153089273613e-06, + "loss": 0.4044, + "step": 20227 + }, + { + "epoch": 0.4049345644721367, + "grad_norm": 1.0508768558502197, + "learning_rate": 6.744849290042754e-06, + "loss": 0.3497, + "step": 20228 + }, + { + "epoch": 0.40495458298926507, + "grad_norm": 1.0736085176467896, + "learning_rate": 6.7445454834769265e-06, + "loss": 0.3024, + "step": 20229 + }, + { + "epoch": 0.4049746015063934, + "grad_norm": 1.0395617485046387, + "learning_rate": 6.744241669577404e-06, + "loss": 0.2909, + "step": 20230 + }, + { + "epoch": 0.40499462002352177, + "grad_norm": 1.1370948553085327, + "learning_rate": 6.743937848345463e-06, + "loss": 0.2893, + "step": 20231 + }, + { + "epoch": 0.4050146385406501, + "grad_norm": 1.2434815168380737, + "learning_rate": 6.743634019782386e-06, + "loss": 0.3305, + "step": 20232 + }, + { + "epoch": 0.40503465705777847, + "grad_norm": 0.9890876412391663, + "learning_rate": 6.743330183889445e-06, + "loss": 0.3063, + "step": 20233 + }, + { + "epoch": 0.40505467557490676, + "grad_norm": 1.181514859199524, + "learning_rate": 6.74302634066792e-06, + "loss": 0.351, + "step": 20234 + }, + { + "epoch": 0.4050746940920351, + "grad_norm": 1.0370845794677734, + "learning_rate": 6.742722490119086e-06, + "loss": 0.2863, + "step": 20235 + }, + { + "epoch": 0.40509471260916347, + "grad_norm": 1.7992589473724365, + "learning_rate": 6.742418632244223e-06, + "loss": 0.8856, + "step": 20236 + }, + { + "epoch": 0.4051147311262918, + "grad_norm": 1.1505273580551147, + "learning_rate": 6.742114767044605e-06, + "loss": 0.3053, + "step": 20237 + }, + { + "epoch": 0.40513474964342017, + "grad_norm": 1.1152859926223755, + "learning_rate": 6.7418108945215134e-06, + "loss": 0.3259, + "step": 20238 + }, + { + "epoch": 0.4051547681605485, + "grad_norm": 1.859289526939392, + "learning_rate": 6.741507014676223e-06, + "loss": 0.7678, + "step": 20239 + }, + { + "epoch": 0.40517478667767687, + "grad_norm": 1.2208528518676758, + "learning_rate": 6.741203127510012e-06, + "loss": 0.332, + "step": 20240 + }, + { + "epoch": 0.4051948051948052, + "grad_norm": 1.0983850955963135, + "learning_rate": 6.7408992330241584e-06, + "loss": 0.3159, + "step": 20241 + }, + { + "epoch": 0.4052148237119335, + "grad_norm": 1.0874316692352295, + "learning_rate": 6.740595331219938e-06, + "loss": 0.3291, + "step": 20242 + }, + { + "epoch": 0.40523484222906186, + "grad_norm": 1.9821447134017944, + "learning_rate": 6.7402914220986295e-06, + "loss": 0.7588, + "step": 20243 + }, + { + "epoch": 0.4052548607461902, + "grad_norm": 1.192830204963684, + "learning_rate": 6.73998750566151e-06, + "loss": 0.3321, + "step": 20244 + }, + { + "epoch": 0.40527487926331857, + "grad_norm": 0.9607119560241699, + "learning_rate": 6.73968358190986e-06, + "loss": 0.2824, + "step": 20245 + }, + { + "epoch": 0.4052948977804469, + "grad_norm": 1.2139025926589966, + "learning_rate": 6.739379650844951e-06, + "loss": 0.3375, + "step": 20246 + }, + { + "epoch": 0.40531491629757527, + "grad_norm": 1.170005440711975, + "learning_rate": 6.739075712468066e-06, + "loss": 0.3301, + "step": 20247 + }, + { + "epoch": 0.4053349348147036, + "grad_norm": 1.9799100160598755, + "learning_rate": 6.738771766780481e-06, + "loss": 0.8725, + "step": 20248 + }, + { + "epoch": 0.40535495333183197, + "grad_norm": 1.1028544902801514, + "learning_rate": 6.738467813783475e-06, + "loss": 0.3071, + "step": 20249 + }, + { + "epoch": 0.40537497184896026, + "grad_norm": 1.0652666091918945, + "learning_rate": 6.738163853478322e-06, + "loss": 0.309, + "step": 20250 + }, + { + "epoch": 0.4053949903660886, + "grad_norm": 1.2106062173843384, + "learning_rate": 6.7378598858663025e-06, + "loss": 0.3062, + "step": 20251 + }, + { + "epoch": 0.40541500888321697, + "grad_norm": 1.2484323978424072, + "learning_rate": 6.737555910948696e-06, + "loss": 0.3408, + "step": 20252 + }, + { + "epoch": 0.4054350274003453, + "grad_norm": 1.118355393409729, + "learning_rate": 6.737251928726777e-06, + "loss": 0.2802, + "step": 20253 + }, + { + "epoch": 0.40545504591747367, + "grad_norm": 1.8144137859344482, + "learning_rate": 6.736947939201825e-06, + "loss": 0.7744, + "step": 20254 + }, + { + "epoch": 0.405475064434602, + "grad_norm": 1.147241234779358, + "learning_rate": 6.736643942375117e-06, + "loss": 0.3805, + "step": 20255 + }, + { + "epoch": 0.40549508295173037, + "grad_norm": 1.0070682764053345, + "learning_rate": 6.7363399382479325e-06, + "loss": 0.2798, + "step": 20256 + }, + { + "epoch": 0.4055151014688587, + "grad_norm": 1.1843845844268799, + "learning_rate": 6.736035926821548e-06, + "loss": 0.3174, + "step": 20257 + }, + { + "epoch": 0.405535119985987, + "grad_norm": 1.0743193626403809, + "learning_rate": 6.735731908097243e-06, + "loss": 0.2845, + "step": 20258 + }, + { + "epoch": 0.40555513850311536, + "grad_norm": 1.1511669158935547, + "learning_rate": 6.735427882076293e-06, + "loss": 0.3067, + "step": 20259 + }, + { + "epoch": 0.4055751570202437, + "grad_norm": 1.0164902210235596, + "learning_rate": 6.735123848759979e-06, + "loss": 0.3143, + "step": 20260 + }, + { + "epoch": 0.40559517553737207, + "grad_norm": 1.1213420629501343, + "learning_rate": 6.734819808149578e-06, + "loss": 0.3305, + "step": 20261 + }, + { + "epoch": 0.4056151940545004, + "grad_norm": 1.0991843938827515, + "learning_rate": 6.734515760246367e-06, + "loss": 0.3231, + "step": 20262 + }, + { + "epoch": 0.40563521257162877, + "grad_norm": 1.32347571849823, + "learning_rate": 6.734211705051625e-06, + "loss": 0.3105, + "step": 20263 + }, + { + "epoch": 0.4056552310887571, + "grad_norm": 1.1458081007003784, + "learning_rate": 6.733907642566631e-06, + "loss": 0.2939, + "step": 20264 + }, + { + "epoch": 0.40567524960588547, + "grad_norm": 0.989929735660553, + "learning_rate": 6.733603572792662e-06, + "loss": 0.2875, + "step": 20265 + }, + { + "epoch": 0.40569526812301376, + "grad_norm": 1.0902864933013916, + "learning_rate": 6.733299495730996e-06, + "loss": 0.3284, + "step": 20266 + }, + { + "epoch": 0.4057152866401421, + "grad_norm": 1.0562025308609009, + "learning_rate": 6.732995411382912e-06, + "loss": 0.2845, + "step": 20267 + }, + { + "epoch": 0.40573530515727046, + "grad_norm": 1.0969665050506592, + "learning_rate": 6.732691319749689e-06, + "loss": 0.301, + "step": 20268 + }, + { + "epoch": 0.4057553236743988, + "grad_norm": 1.224936842918396, + "learning_rate": 6.732387220832604e-06, + "loss": 0.2913, + "step": 20269 + }, + { + "epoch": 0.40577534219152717, + "grad_norm": 1.2378454208374023, + "learning_rate": 6.732083114632935e-06, + "loss": 0.3001, + "step": 20270 + }, + { + "epoch": 0.4057953607086555, + "grad_norm": 1.1979457139968872, + "learning_rate": 6.731779001151964e-06, + "loss": 0.3012, + "step": 20271 + }, + { + "epoch": 0.40581537922578387, + "grad_norm": 1.032395839691162, + "learning_rate": 6.731474880390964e-06, + "loss": 0.3014, + "step": 20272 + }, + { + "epoch": 0.4058353977429122, + "grad_norm": 1.101804256439209, + "learning_rate": 6.731170752351218e-06, + "loss": 0.3708, + "step": 20273 + }, + { + "epoch": 0.4058554162600405, + "grad_norm": 1.043851375579834, + "learning_rate": 6.730866617034003e-06, + "loss": 0.2714, + "step": 20274 + }, + { + "epoch": 0.40587543477716886, + "grad_norm": 1.0291826725006104, + "learning_rate": 6.7305624744405965e-06, + "loss": 0.2866, + "step": 20275 + }, + { + "epoch": 0.4058954532942972, + "grad_norm": 1.1157763004302979, + "learning_rate": 6.730258324572276e-06, + "loss": 0.3107, + "step": 20276 + }, + { + "epoch": 0.40591547181142557, + "grad_norm": 1.053515076637268, + "learning_rate": 6.7299541674303235e-06, + "loss": 0.3452, + "step": 20277 + }, + { + "epoch": 0.4059354903285539, + "grad_norm": 1.1290820837020874, + "learning_rate": 6.7296500030160175e-06, + "loss": 0.3478, + "step": 20278 + }, + { + "epoch": 0.40595550884568227, + "grad_norm": 0.9900777339935303, + "learning_rate": 6.7293458313306325e-06, + "loss": 0.3068, + "step": 20279 + }, + { + "epoch": 0.4059755273628106, + "grad_norm": 1.0214085578918457, + "learning_rate": 6.729041652375452e-06, + "loss": 0.292, + "step": 20280 + }, + { + "epoch": 0.40599554587993897, + "grad_norm": 1.1032518148422241, + "learning_rate": 6.728737466151752e-06, + "loss": 0.2855, + "step": 20281 + }, + { + "epoch": 0.40601556439706726, + "grad_norm": 1.2710407972335815, + "learning_rate": 6.72843327266081e-06, + "loss": 0.3515, + "step": 20282 + }, + { + "epoch": 0.4060355829141956, + "grad_norm": 0.9988778233528137, + "learning_rate": 6.728129071903908e-06, + "loss": 0.2936, + "step": 20283 + }, + { + "epoch": 0.40605560143132396, + "grad_norm": 1.0412516593933105, + "learning_rate": 6.7278248638823215e-06, + "loss": 0.2891, + "step": 20284 + }, + { + "epoch": 0.4060756199484523, + "grad_norm": 1.1325186491012573, + "learning_rate": 6.727520648597334e-06, + "loss": 0.3288, + "step": 20285 + }, + { + "epoch": 0.40609563846558067, + "grad_norm": 1.126804232597351, + "learning_rate": 6.7272164260502195e-06, + "loss": 0.3007, + "step": 20286 + }, + { + "epoch": 0.406115656982709, + "grad_norm": 1.046972632408142, + "learning_rate": 6.726912196242259e-06, + "loss": 0.2952, + "step": 20287 + }, + { + "epoch": 0.40613567549983737, + "grad_norm": 1.0906215906143188, + "learning_rate": 6.726607959174731e-06, + "loss": 0.3225, + "step": 20288 + }, + { + "epoch": 0.4061556940169657, + "grad_norm": 1.0381829738616943, + "learning_rate": 6.726303714848916e-06, + "loss": 0.3169, + "step": 20289 + }, + { + "epoch": 0.406175712534094, + "grad_norm": 2.073420286178589, + "learning_rate": 6.725999463266091e-06, + "loss": 0.8288, + "step": 20290 + }, + { + "epoch": 0.40619573105122236, + "grad_norm": 1.0875645875930786, + "learning_rate": 6.725695204427536e-06, + "loss": 0.3145, + "step": 20291 + }, + { + "epoch": 0.4062157495683507, + "grad_norm": 0.9973466396331787, + "learning_rate": 6.725390938334528e-06, + "loss": 0.2996, + "step": 20292 + }, + { + "epoch": 0.40623576808547907, + "grad_norm": 1.0962083339691162, + "learning_rate": 6.72508666498835e-06, + "loss": 0.3303, + "step": 20293 + }, + { + "epoch": 0.4062557866026074, + "grad_norm": 1.1106257438659668, + "learning_rate": 6.724782384390279e-06, + "loss": 0.3246, + "step": 20294 + }, + { + "epoch": 0.40627580511973577, + "grad_norm": 1.8241541385650635, + "learning_rate": 6.724478096541591e-06, + "loss": 0.8416, + "step": 20295 + }, + { + "epoch": 0.4062958236368641, + "grad_norm": 1.0986223220825195, + "learning_rate": 6.724173801443572e-06, + "loss": 0.2886, + "step": 20296 + }, + { + "epoch": 0.40631584215399247, + "grad_norm": 1.0230391025543213, + "learning_rate": 6.723869499097495e-06, + "loss": 0.2894, + "step": 20297 + }, + { + "epoch": 0.40633586067112076, + "grad_norm": 1.08726966381073, + "learning_rate": 6.723565189504643e-06, + "loss": 0.2797, + "step": 20298 + }, + { + "epoch": 0.4063558791882491, + "grad_norm": 1.9373666048049927, + "learning_rate": 6.723260872666293e-06, + "loss": 0.7892, + "step": 20299 + }, + { + "epoch": 0.40637589770537746, + "grad_norm": 1.1057052612304688, + "learning_rate": 6.722956548583725e-06, + "loss": 0.3365, + "step": 20300 + }, + { + "epoch": 0.4063959162225058, + "grad_norm": 1.2191966772079468, + "learning_rate": 6.722652217258219e-06, + "loss": 0.3477, + "step": 20301 + }, + { + "epoch": 0.40641593473963417, + "grad_norm": 1.1002784967422485, + "learning_rate": 6.722347878691054e-06, + "loss": 0.329, + "step": 20302 + }, + { + "epoch": 0.4064359532567625, + "grad_norm": 1.0831776857376099, + "learning_rate": 6.722043532883509e-06, + "loss": 0.2863, + "step": 20303 + }, + { + "epoch": 0.40645597177389087, + "grad_norm": 1.1788356304168701, + "learning_rate": 6.721739179836863e-06, + "loss": 0.3152, + "step": 20304 + }, + { + "epoch": 0.4064759902910192, + "grad_norm": 1.0391275882720947, + "learning_rate": 6.721434819552396e-06, + "loss": 0.3023, + "step": 20305 + }, + { + "epoch": 0.4064960088081475, + "grad_norm": 1.1609752178192139, + "learning_rate": 6.721130452031388e-06, + "loss": 0.3156, + "step": 20306 + }, + { + "epoch": 0.40651602732527586, + "grad_norm": 1.0923067331314087, + "learning_rate": 6.720826077275118e-06, + "loss": 0.2973, + "step": 20307 + }, + { + "epoch": 0.4065360458424042, + "grad_norm": 1.28690767288208, + "learning_rate": 6.720521695284864e-06, + "loss": 0.3257, + "step": 20308 + }, + { + "epoch": 0.40655606435953257, + "grad_norm": 1.141697645187378, + "learning_rate": 6.720217306061909e-06, + "loss": 0.3208, + "step": 20309 + }, + { + "epoch": 0.4065760828766609, + "grad_norm": 1.3134459257125854, + "learning_rate": 6.719912909607529e-06, + "loss": 0.3698, + "step": 20310 + }, + { + "epoch": 0.40659610139378927, + "grad_norm": 2.021026134490967, + "learning_rate": 6.719608505923007e-06, + "loss": 0.817, + "step": 20311 + }, + { + "epoch": 0.4066161199109176, + "grad_norm": 1.0849162340164185, + "learning_rate": 6.71930409500962e-06, + "loss": 0.2965, + "step": 20312 + }, + { + "epoch": 0.40663613842804597, + "grad_norm": 1.0952223539352417, + "learning_rate": 6.71899967686865e-06, + "loss": 0.3083, + "step": 20313 + }, + { + "epoch": 0.40665615694517426, + "grad_norm": 1.9101415872573853, + "learning_rate": 6.718695251501373e-06, + "loss": 0.8067, + "step": 20314 + }, + { + "epoch": 0.4066761754623026, + "grad_norm": 1.1210212707519531, + "learning_rate": 6.718390818909071e-06, + "loss": 0.3393, + "step": 20315 + }, + { + "epoch": 0.40669619397943096, + "grad_norm": 1.302836537361145, + "learning_rate": 6.718086379093025e-06, + "loss": 0.3171, + "step": 20316 + }, + { + "epoch": 0.4067162124965593, + "grad_norm": 1.112905502319336, + "learning_rate": 6.717781932054514e-06, + "loss": 0.3288, + "step": 20317 + }, + { + "epoch": 0.40673623101368767, + "grad_norm": 1.1385622024536133, + "learning_rate": 6.717477477794817e-06, + "loss": 0.3052, + "step": 20318 + }, + { + "epoch": 0.406756249530816, + "grad_norm": 1.063995599746704, + "learning_rate": 6.717173016315214e-06, + "loss": 0.2708, + "step": 20319 + }, + { + "epoch": 0.40677626804794437, + "grad_norm": 0.9908729791641235, + "learning_rate": 6.716868547616986e-06, + "loss": 0.2981, + "step": 20320 + }, + { + "epoch": 0.4067962865650727, + "grad_norm": 1.0614464282989502, + "learning_rate": 6.71656407170141e-06, + "loss": 0.3174, + "step": 20321 + }, + { + "epoch": 0.406816305082201, + "grad_norm": 1.1052525043487549, + "learning_rate": 6.71625958856977e-06, + "loss": 0.3128, + "step": 20322 + }, + { + "epoch": 0.40683632359932936, + "grad_norm": 1.101617455482483, + "learning_rate": 6.715955098223343e-06, + "loss": 0.3168, + "step": 20323 + }, + { + "epoch": 0.4068563421164577, + "grad_norm": 1.0962930917739868, + "learning_rate": 6.71565060066341e-06, + "loss": 0.3187, + "step": 20324 + }, + { + "epoch": 0.40687636063358606, + "grad_norm": 1.0281041860580444, + "learning_rate": 6.715346095891253e-06, + "loss": 0.3137, + "step": 20325 + }, + { + "epoch": 0.4068963791507144, + "grad_norm": 1.1003484725952148, + "learning_rate": 6.715041583908149e-06, + "loss": 0.2943, + "step": 20326 + }, + { + "epoch": 0.40691639766784277, + "grad_norm": 1.1441850662231445, + "learning_rate": 6.714737064715378e-06, + "loss": 0.2978, + "step": 20327 + }, + { + "epoch": 0.4069364161849711, + "grad_norm": 1.0599300861358643, + "learning_rate": 6.714432538314222e-06, + "loss": 0.3747, + "step": 20328 + }, + { + "epoch": 0.40695643470209947, + "grad_norm": 1.040293574333191, + "learning_rate": 6.714128004705962e-06, + "loss": 0.3445, + "step": 20329 + }, + { + "epoch": 0.40697645321922776, + "grad_norm": 1.0463191270828247, + "learning_rate": 6.713823463891875e-06, + "loss": 0.2843, + "step": 20330 + }, + { + "epoch": 0.4069964717363561, + "grad_norm": 1.1006829738616943, + "learning_rate": 6.713518915873245e-06, + "loss": 0.3103, + "step": 20331 + }, + { + "epoch": 0.40701649025348446, + "grad_norm": 1.2002252340316772, + "learning_rate": 6.713214360651348e-06, + "loss": 0.306, + "step": 20332 + }, + { + "epoch": 0.4070365087706128, + "grad_norm": 0.9893474578857422, + "learning_rate": 6.712909798227468e-06, + "loss": 0.3173, + "step": 20333 + }, + { + "epoch": 0.40705652728774117, + "grad_norm": 1.076568603515625, + "learning_rate": 6.712605228602882e-06, + "loss": 0.3028, + "step": 20334 + }, + { + "epoch": 0.4070765458048695, + "grad_norm": 1.1844897270202637, + "learning_rate": 6.712300651778874e-06, + "loss": 0.3465, + "step": 20335 + }, + { + "epoch": 0.40709656432199787, + "grad_norm": 1.1444987058639526, + "learning_rate": 6.711996067756723e-06, + "loss": 0.3099, + "step": 20336 + }, + { + "epoch": 0.4071165828391262, + "grad_norm": 1.1713097095489502, + "learning_rate": 6.711691476537707e-06, + "loss": 0.3201, + "step": 20337 + }, + { + "epoch": 0.4071366013562545, + "grad_norm": 1.0942541360855103, + "learning_rate": 6.711386878123111e-06, + "loss": 0.3191, + "step": 20338 + }, + { + "epoch": 0.40715661987338286, + "grad_norm": 1.8790169954299927, + "learning_rate": 6.71108227251421e-06, + "loss": 0.8315, + "step": 20339 + }, + { + "epoch": 0.4071766383905112, + "grad_norm": 1.1769863367080688, + "learning_rate": 6.710777659712288e-06, + "loss": 0.3272, + "step": 20340 + }, + { + "epoch": 0.40719665690763956, + "grad_norm": 1.124242901802063, + "learning_rate": 6.710473039718626e-06, + "loss": 0.3324, + "step": 20341 + }, + { + "epoch": 0.4072166754247679, + "grad_norm": 1.2290775775909424, + "learning_rate": 6.710168412534503e-06, + "loss": 0.3214, + "step": 20342 + }, + { + "epoch": 0.40723669394189627, + "grad_norm": 1.2140880823135376, + "learning_rate": 6.7098637781612e-06, + "loss": 0.3041, + "step": 20343 + }, + { + "epoch": 0.4072567124590246, + "grad_norm": 1.126403570175171, + "learning_rate": 6.709559136599998e-06, + "loss": 0.3476, + "step": 20344 + }, + { + "epoch": 0.40727673097615297, + "grad_norm": 1.08104407787323, + "learning_rate": 6.709254487852177e-06, + "loss": 0.3428, + "step": 20345 + }, + { + "epoch": 0.40729674949328126, + "grad_norm": 1.0767767429351807, + "learning_rate": 6.708949831919018e-06, + "loss": 0.3904, + "step": 20346 + }, + { + "epoch": 0.4073167680104096, + "grad_norm": 1.9057570695877075, + "learning_rate": 6.7086451688018005e-06, + "loss": 0.838, + "step": 20347 + }, + { + "epoch": 0.40733678652753796, + "grad_norm": 1.0962799787521362, + "learning_rate": 6.708340498501808e-06, + "loss": 0.3045, + "step": 20348 + }, + { + "epoch": 0.4073568050446663, + "grad_norm": 1.0681376457214355, + "learning_rate": 6.708035821020319e-06, + "loss": 0.3099, + "step": 20349 + }, + { + "epoch": 0.40737682356179467, + "grad_norm": 1.0210076570510864, + "learning_rate": 6.707731136358615e-06, + "loss": 0.3144, + "step": 20350 + }, + { + "epoch": 0.407396842078923, + "grad_norm": 1.2770920991897583, + "learning_rate": 6.707426444517977e-06, + "loss": 0.345, + "step": 20351 + }, + { + "epoch": 0.40741686059605137, + "grad_norm": 1.1365175247192383, + "learning_rate": 6.707121745499685e-06, + "loss": 0.2983, + "step": 20352 + }, + { + "epoch": 0.4074368791131797, + "grad_norm": 1.3428046703338623, + "learning_rate": 6.706817039305021e-06, + "loss": 0.3516, + "step": 20353 + }, + { + "epoch": 0.407456897630308, + "grad_norm": 1.020916223526001, + "learning_rate": 6.706512325935266e-06, + "loss": 0.3222, + "step": 20354 + }, + { + "epoch": 0.40747691614743636, + "grad_norm": 0.9974054098129272, + "learning_rate": 6.7062076053917e-06, + "loss": 0.247, + "step": 20355 + }, + { + "epoch": 0.4074969346645647, + "grad_norm": 1.1695560216903687, + "learning_rate": 6.705902877675605e-06, + "loss": 0.3131, + "step": 20356 + }, + { + "epoch": 0.40751695318169306, + "grad_norm": 1.0744823217391968, + "learning_rate": 6.705598142788261e-06, + "loss": 0.319, + "step": 20357 + }, + { + "epoch": 0.4075369716988214, + "grad_norm": 1.1042791604995728, + "learning_rate": 6.70529340073095e-06, + "loss": 0.3137, + "step": 20358 + }, + { + "epoch": 0.40755699021594977, + "grad_norm": 1.1601972579956055, + "learning_rate": 6.704988651504951e-06, + "loss": 0.3004, + "step": 20359 + }, + { + "epoch": 0.4075770087330781, + "grad_norm": 1.1849924325942993, + "learning_rate": 6.704683895111547e-06, + "loss": 0.3016, + "step": 20360 + }, + { + "epoch": 0.40759702725020647, + "grad_norm": 2.0910439491271973, + "learning_rate": 6.704379131552019e-06, + "loss": 0.7789, + "step": 20361 + }, + { + "epoch": 0.40761704576733476, + "grad_norm": 1.1383200883865356, + "learning_rate": 6.704074360827648e-06, + "loss": 0.3382, + "step": 20362 + }, + { + "epoch": 0.4076370642844631, + "grad_norm": 1.7760554552078247, + "learning_rate": 6.703769582939715e-06, + "loss": 0.7844, + "step": 20363 + }, + { + "epoch": 0.40765708280159146, + "grad_norm": 1.0356141328811646, + "learning_rate": 6.703464797889503e-06, + "loss": 0.325, + "step": 20364 + }, + { + "epoch": 0.4076771013187198, + "grad_norm": 1.2753530740737915, + "learning_rate": 6.70316000567829e-06, + "loss": 0.3062, + "step": 20365 + }, + { + "epoch": 0.40769711983584817, + "grad_norm": 1.2276843786239624, + "learning_rate": 6.702855206307358e-06, + "loss": 0.3025, + "step": 20366 + }, + { + "epoch": 0.4077171383529765, + "grad_norm": 1.0861454010009766, + "learning_rate": 6.702550399777991e-06, + "loss": 0.3277, + "step": 20367 + }, + { + "epoch": 0.40773715687010487, + "grad_norm": 0.9737551212310791, + "learning_rate": 6.702245586091467e-06, + "loss": 0.2846, + "step": 20368 + }, + { + "epoch": 0.4077571753872332, + "grad_norm": 1.2285571098327637, + "learning_rate": 6.701940765249071e-06, + "loss": 0.3811, + "step": 20369 + }, + { + "epoch": 0.4077771939043615, + "grad_norm": 1.2178215980529785, + "learning_rate": 6.70163593725208e-06, + "loss": 0.3349, + "step": 20370 + }, + { + "epoch": 0.40779721242148986, + "grad_norm": 1.0081721544265747, + "learning_rate": 6.701331102101779e-06, + "loss": 0.2715, + "step": 20371 + }, + { + "epoch": 0.4078172309386182, + "grad_norm": 1.085097074508667, + "learning_rate": 6.701026259799448e-06, + "loss": 0.329, + "step": 20372 + }, + { + "epoch": 0.40783724945574656, + "grad_norm": 1.1395460367202759, + "learning_rate": 6.700721410346367e-06, + "loss": 0.3073, + "step": 20373 + }, + { + "epoch": 0.4078572679728749, + "grad_norm": 1.0966945886611938, + "learning_rate": 6.700416553743821e-06, + "loss": 0.2768, + "step": 20374 + }, + { + "epoch": 0.40787728649000327, + "grad_norm": 1.1973257064819336, + "learning_rate": 6.700111689993089e-06, + "loss": 0.2993, + "step": 20375 + }, + { + "epoch": 0.4078973050071316, + "grad_norm": 1.1440608501434326, + "learning_rate": 6.699806819095456e-06, + "loss": 0.3256, + "step": 20376 + }, + { + "epoch": 0.40791732352425997, + "grad_norm": 1.0750007629394531, + "learning_rate": 6.699501941052198e-06, + "loss": 0.3088, + "step": 20377 + }, + { + "epoch": 0.40793734204138826, + "grad_norm": 1.0574573278427124, + "learning_rate": 6.699197055864599e-06, + "loss": 0.287, + "step": 20378 + }, + { + "epoch": 0.4079573605585166, + "grad_norm": 1.3240249156951904, + "learning_rate": 6.6988921635339424e-06, + "loss": 0.3537, + "step": 20379 + }, + { + "epoch": 0.40797737907564496, + "grad_norm": 1.074918270111084, + "learning_rate": 6.698587264061509e-06, + "loss": 0.3367, + "step": 20380 + }, + { + "epoch": 0.4079973975927733, + "grad_norm": 1.1093286275863647, + "learning_rate": 6.69828235744858e-06, + "loss": 0.3463, + "step": 20381 + }, + { + "epoch": 0.40801741610990166, + "grad_norm": 1.2331925630569458, + "learning_rate": 6.697977443696439e-06, + "loss": 0.3085, + "step": 20382 + }, + { + "epoch": 0.40803743462703, + "grad_norm": 1.1260876655578613, + "learning_rate": 6.697672522806363e-06, + "loss": 0.3819, + "step": 20383 + }, + { + "epoch": 0.40805745314415837, + "grad_norm": 1.8866082429885864, + "learning_rate": 6.69736759477964e-06, + "loss": 0.8232, + "step": 20384 + }, + { + "epoch": 0.4080774716612867, + "grad_norm": 1.1617684364318848, + "learning_rate": 6.697062659617547e-06, + "loss": 0.3652, + "step": 20385 + }, + { + "epoch": 0.408097490178415, + "grad_norm": 1.2185255289077759, + "learning_rate": 6.696757717321368e-06, + "loss": 0.293, + "step": 20386 + }, + { + "epoch": 0.40811750869554336, + "grad_norm": 1.1572831869125366, + "learning_rate": 6.696452767892386e-06, + "loss": 0.3313, + "step": 20387 + }, + { + "epoch": 0.4081375272126717, + "grad_norm": 1.040955901145935, + "learning_rate": 6.696147811331881e-06, + "loss": 0.3655, + "step": 20388 + }, + { + "epoch": 0.40815754572980006, + "grad_norm": 1.059291124343872, + "learning_rate": 6.695842847641137e-06, + "loss": 0.3287, + "step": 20389 + }, + { + "epoch": 0.4081775642469284, + "grad_norm": 1.897955298423767, + "learning_rate": 6.695537876821432e-06, + "loss": 0.8683, + "step": 20390 + }, + { + "epoch": 0.40819758276405677, + "grad_norm": 1.8403712511062622, + "learning_rate": 6.695232898874051e-06, + "loss": 0.818, + "step": 20391 + }, + { + "epoch": 0.4082176012811851, + "grad_norm": 1.1247045993804932, + "learning_rate": 6.694927913800275e-06, + "loss": 0.3187, + "step": 20392 + }, + { + "epoch": 0.40823761979831347, + "grad_norm": 1.0914605855941772, + "learning_rate": 6.69462292160139e-06, + "loss": 0.2953, + "step": 20393 + }, + { + "epoch": 0.40825763831544176, + "grad_norm": 1.2945398092269897, + "learning_rate": 6.694317922278672e-06, + "loss": 0.3327, + "step": 20394 + }, + { + "epoch": 0.4082776568325701, + "grad_norm": 1.0667266845703125, + "learning_rate": 6.6940129158334086e-06, + "loss": 0.3542, + "step": 20395 + }, + { + "epoch": 0.40829767534969846, + "grad_norm": 1.2383605241775513, + "learning_rate": 6.693707902266877e-06, + "loss": 0.2907, + "step": 20396 + }, + { + "epoch": 0.4083176938668268, + "grad_norm": 1.1274555921554565, + "learning_rate": 6.693402881580364e-06, + "loss": 0.3292, + "step": 20397 + }, + { + "epoch": 0.40833771238395516, + "grad_norm": 1.100846767425537, + "learning_rate": 6.693097853775148e-06, + "loss": 0.3209, + "step": 20398 + }, + { + "epoch": 0.4083577309010835, + "grad_norm": 1.056353211402893, + "learning_rate": 6.692792818852514e-06, + "loss": 0.3046, + "step": 20399 + }, + { + "epoch": 0.40837774941821187, + "grad_norm": 1.0754793882369995, + "learning_rate": 6.692487776813743e-06, + "loss": 0.3149, + "step": 20400 + }, + { + "epoch": 0.4083977679353402, + "grad_norm": 1.0904991626739502, + "learning_rate": 6.692182727660117e-06, + "loss": 0.3092, + "step": 20401 + }, + { + "epoch": 0.4084177864524685, + "grad_norm": 1.0081909894943237, + "learning_rate": 6.691877671392921e-06, + "loss": 0.2865, + "step": 20402 + }, + { + "epoch": 0.40843780496959686, + "grad_norm": 1.0925872325897217, + "learning_rate": 6.691572608013433e-06, + "loss": 0.328, + "step": 20403 + }, + { + "epoch": 0.4084578234867252, + "grad_norm": 1.2395986318588257, + "learning_rate": 6.691267537522939e-06, + "loss": 0.3015, + "step": 20404 + }, + { + "epoch": 0.40847784200385356, + "grad_norm": 1.128341794013977, + "learning_rate": 6.690962459922718e-06, + "loss": 0.327, + "step": 20405 + }, + { + "epoch": 0.4084978605209819, + "grad_norm": 1.0728578567504883, + "learning_rate": 6.690657375214058e-06, + "loss": 0.3444, + "step": 20406 + }, + { + "epoch": 0.40851787903811027, + "grad_norm": 1.089537262916565, + "learning_rate": 6.690352283398237e-06, + "loss": 0.353, + "step": 20407 + }, + { + "epoch": 0.4085378975552386, + "grad_norm": 1.1983269453048706, + "learning_rate": 6.690047184476539e-06, + "loss": 0.3601, + "step": 20408 + }, + { + "epoch": 0.40855791607236697, + "grad_norm": 1.0938527584075928, + "learning_rate": 6.689742078450246e-06, + "loss": 0.3075, + "step": 20409 + }, + { + "epoch": 0.40857793458949526, + "grad_norm": 1.133445382118225, + "learning_rate": 6.689436965320642e-06, + "loss": 0.3362, + "step": 20410 + }, + { + "epoch": 0.4085979531066236, + "grad_norm": 1.2326570749282837, + "learning_rate": 6.6891318450890065e-06, + "loss": 0.313, + "step": 20411 + }, + { + "epoch": 0.40861797162375196, + "grad_norm": 1.2897623777389526, + "learning_rate": 6.6888267177566245e-06, + "loss": 0.3099, + "step": 20412 + }, + { + "epoch": 0.4086379901408803, + "grad_norm": 0.992211639881134, + "learning_rate": 6.68852158332478e-06, + "loss": 0.2919, + "step": 20413 + }, + { + "epoch": 0.40865800865800866, + "grad_norm": 1.0593488216400146, + "learning_rate": 6.688216441794753e-06, + "loss": 0.3143, + "step": 20414 + }, + { + "epoch": 0.408678027175137, + "grad_norm": 1.1548752784729004, + "learning_rate": 6.687911293167828e-06, + "loss": 0.3465, + "step": 20415 + }, + { + "epoch": 0.40869804569226537, + "grad_norm": 1.0396835803985596, + "learning_rate": 6.687606137445286e-06, + "loss": 0.2975, + "step": 20416 + }, + { + "epoch": 0.4087180642093937, + "grad_norm": 1.1057746410369873, + "learning_rate": 6.687300974628413e-06, + "loss": 0.3333, + "step": 20417 + }, + { + "epoch": 0.408738082726522, + "grad_norm": 1.188122034072876, + "learning_rate": 6.686995804718488e-06, + "loss": 0.3208, + "step": 20418 + }, + { + "epoch": 0.40875810124365036, + "grad_norm": 1.101308822631836, + "learning_rate": 6.686690627716796e-06, + "loss": 0.3074, + "step": 20419 + }, + { + "epoch": 0.4087781197607787, + "grad_norm": 1.2575350999832153, + "learning_rate": 6.6863854436246214e-06, + "loss": 0.3582, + "step": 20420 + }, + { + "epoch": 0.40879813827790706, + "grad_norm": 1.0344021320343018, + "learning_rate": 6.686080252443243e-06, + "loss": 0.3345, + "step": 20421 + }, + { + "epoch": 0.4088181567950354, + "grad_norm": 1.1779170036315918, + "learning_rate": 6.6857750541739475e-06, + "loss": 0.3202, + "step": 20422 + }, + { + "epoch": 0.40883817531216377, + "grad_norm": 1.0631096363067627, + "learning_rate": 6.685469848818016e-06, + "loss": 0.2921, + "step": 20423 + }, + { + "epoch": 0.4088581938292921, + "grad_norm": 1.0962682962417603, + "learning_rate": 6.685164636376731e-06, + "loss": 0.3125, + "step": 20424 + }, + { + "epoch": 0.40887821234642047, + "grad_norm": 1.092640995979309, + "learning_rate": 6.684859416851377e-06, + "loss": 0.3168, + "step": 20425 + }, + { + "epoch": 0.40889823086354876, + "grad_norm": 1.094921350479126, + "learning_rate": 6.684554190243238e-06, + "loss": 0.2891, + "step": 20426 + }, + { + "epoch": 0.4089182493806771, + "grad_norm": 1.118427038192749, + "learning_rate": 6.684248956553593e-06, + "loss": 0.2617, + "step": 20427 + }, + { + "epoch": 0.40893826789780546, + "grad_norm": 1.8316129446029663, + "learning_rate": 6.683943715783731e-06, + "loss": 0.7873, + "step": 20428 + }, + { + "epoch": 0.4089582864149338, + "grad_norm": 1.2604570388793945, + "learning_rate": 6.68363846793493e-06, + "loss": 0.3344, + "step": 20429 + }, + { + "epoch": 0.40897830493206216, + "grad_norm": 1.989182710647583, + "learning_rate": 6.683333213008475e-06, + "loss": 0.8026, + "step": 20430 + }, + { + "epoch": 0.4089983234491905, + "grad_norm": 0.9993500113487244, + "learning_rate": 6.68302795100565e-06, + "loss": 0.3095, + "step": 20431 + }, + { + "epoch": 0.40901834196631887, + "grad_norm": 1.104885458946228, + "learning_rate": 6.682722681927738e-06, + "loss": 0.2857, + "step": 20432 + }, + { + "epoch": 0.40903836048344716, + "grad_norm": 1.2028415203094482, + "learning_rate": 6.682417405776023e-06, + "loss": 0.3318, + "step": 20433 + }, + { + "epoch": 0.4090583790005755, + "grad_norm": 1.1087491512298584, + "learning_rate": 6.682112122551785e-06, + "loss": 0.2978, + "step": 20434 + }, + { + "epoch": 0.40907839751770386, + "grad_norm": 1.2069002389907837, + "learning_rate": 6.681806832256311e-06, + "loss": 0.3063, + "step": 20435 + }, + { + "epoch": 0.4090984160348322, + "grad_norm": 1.8360540866851807, + "learning_rate": 6.6815015348908815e-06, + "loss": 0.7489, + "step": 20436 + }, + { + "epoch": 0.40911843455196056, + "grad_norm": 1.1243430376052856, + "learning_rate": 6.6811962304567834e-06, + "loss": 0.3653, + "step": 20437 + }, + { + "epoch": 0.4091384530690889, + "grad_norm": 1.0553642511367798, + "learning_rate": 6.680890918955296e-06, + "loss": 0.2908, + "step": 20438 + }, + { + "epoch": 0.40915847158621726, + "grad_norm": 1.0501593351364136, + "learning_rate": 6.680585600387708e-06, + "loss": 0.3248, + "step": 20439 + }, + { + "epoch": 0.4091784901033456, + "grad_norm": 1.1158068180084229, + "learning_rate": 6.680280274755297e-06, + "loss": 0.2952, + "step": 20440 + }, + { + "epoch": 0.4091985086204739, + "grad_norm": 1.1691080331802368, + "learning_rate": 6.679974942059351e-06, + "loss": 0.3152, + "step": 20441 + }, + { + "epoch": 0.40921852713760226, + "grad_norm": 1.1524605751037598, + "learning_rate": 6.679669602301151e-06, + "loss": 0.3444, + "step": 20442 + }, + { + "epoch": 0.4092385456547306, + "grad_norm": 1.093940019607544, + "learning_rate": 6.679364255481982e-06, + "loss": 0.3269, + "step": 20443 + }, + { + "epoch": 0.40925856417185896, + "grad_norm": 1.184554100036621, + "learning_rate": 6.679058901603127e-06, + "loss": 0.3338, + "step": 20444 + }, + { + "epoch": 0.4092785826889873, + "grad_norm": 1.1759389638900757, + "learning_rate": 6.678753540665869e-06, + "loss": 0.3274, + "step": 20445 + }, + { + "epoch": 0.40929860120611566, + "grad_norm": 1.1738332509994507, + "learning_rate": 6.678448172671493e-06, + "loss": 0.2733, + "step": 20446 + }, + { + "epoch": 0.409318619723244, + "grad_norm": 1.0294013023376465, + "learning_rate": 6.678142797621282e-06, + "loss": 0.2968, + "step": 20447 + }, + { + "epoch": 0.40933863824037237, + "grad_norm": 1.1909130811691284, + "learning_rate": 6.67783741551652e-06, + "loss": 0.2937, + "step": 20448 + }, + { + "epoch": 0.40935865675750066, + "grad_norm": 1.9293283224105835, + "learning_rate": 6.6775320263584905e-06, + "loss": 0.8649, + "step": 20449 + }, + { + "epoch": 0.409378675274629, + "grad_norm": 0.9648942351341248, + "learning_rate": 6.677226630148477e-06, + "loss": 0.2928, + "step": 20450 + }, + { + "epoch": 0.40939869379175736, + "grad_norm": 1.4597492218017578, + "learning_rate": 6.676921226887765e-06, + "loss": 0.2885, + "step": 20451 + }, + { + "epoch": 0.4094187123088857, + "grad_norm": 1.2025821208953857, + "learning_rate": 6.676615816577635e-06, + "loss": 0.3048, + "step": 20452 + }, + { + "epoch": 0.40943873082601406, + "grad_norm": 1.263292670249939, + "learning_rate": 6.676310399219375e-06, + "loss": 0.3154, + "step": 20453 + }, + { + "epoch": 0.4094587493431424, + "grad_norm": 1.1485698223114014, + "learning_rate": 6.676004974814265e-06, + "loss": 0.3218, + "step": 20454 + }, + { + "epoch": 0.40947876786027076, + "grad_norm": 1.1177023649215698, + "learning_rate": 6.675699543363591e-06, + "loss": 0.304, + "step": 20455 + }, + { + "epoch": 0.4094987863773991, + "grad_norm": 1.0354087352752686, + "learning_rate": 6.675394104868637e-06, + "loss": 0.25, + "step": 20456 + }, + { + "epoch": 0.4095188048945274, + "grad_norm": 1.0586323738098145, + "learning_rate": 6.675088659330687e-06, + "loss": 0.3016, + "step": 20457 + }, + { + "epoch": 0.40953882341165576, + "grad_norm": 1.0225660800933838, + "learning_rate": 6.674783206751025e-06, + "loss": 0.2794, + "step": 20458 + }, + { + "epoch": 0.4095588419287841, + "grad_norm": 1.0464774370193481, + "learning_rate": 6.674477747130934e-06, + "loss": 0.2773, + "step": 20459 + }, + { + "epoch": 0.40957886044591246, + "grad_norm": 1.3317737579345703, + "learning_rate": 6.674172280471701e-06, + "loss": 0.3381, + "step": 20460 + }, + { + "epoch": 0.4095988789630408, + "grad_norm": 1.0516760349273682, + "learning_rate": 6.673866806774606e-06, + "loss": 0.313, + "step": 20461 + }, + { + "epoch": 0.40961889748016916, + "grad_norm": 1.0816013813018799, + "learning_rate": 6.673561326040935e-06, + "loss": 0.3053, + "step": 20462 + }, + { + "epoch": 0.4096389159972975, + "grad_norm": 1.0140105485916138, + "learning_rate": 6.673255838271972e-06, + "loss": 0.2706, + "step": 20463 + }, + { + "epoch": 0.40965893451442587, + "grad_norm": 1.0351276397705078, + "learning_rate": 6.672950343469003e-06, + "loss": 0.3035, + "step": 20464 + }, + { + "epoch": 0.40967895303155416, + "grad_norm": 1.154334306716919, + "learning_rate": 6.67264484163331e-06, + "loss": 0.306, + "step": 20465 + }, + { + "epoch": 0.4096989715486825, + "grad_norm": 1.2890607118606567, + "learning_rate": 6.67233933276618e-06, + "loss": 0.3106, + "step": 20466 + }, + { + "epoch": 0.40971899006581086, + "grad_norm": 1.151418924331665, + "learning_rate": 6.672033816868893e-06, + "loss": 0.3356, + "step": 20467 + }, + { + "epoch": 0.4097390085829392, + "grad_norm": 1.1046836376190186, + "learning_rate": 6.671728293942735e-06, + "loss": 0.3192, + "step": 20468 + }, + { + "epoch": 0.40975902710006756, + "grad_norm": 1.0726596117019653, + "learning_rate": 6.671422763988993e-06, + "loss": 0.3125, + "step": 20469 + }, + { + "epoch": 0.4097790456171959, + "grad_norm": 0.9670843482017517, + "learning_rate": 6.671117227008948e-06, + "loss": 0.2922, + "step": 20470 + }, + { + "epoch": 0.40979906413432426, + "grad_norm": 1.0931389331817627, + "learning_rate": 6.670811683003889e-06, + "loss": 0.2877, + "step": 20471 + }, + { + "epoch": 0.4098190826514526, + "grad_norm": 1.9846588373184204, + "learning_rate": 6.670506131975093e-06, + "loss": 0.7719, + "step": 20472 + }, + { + "epoch": 0.4098391011685809, + "grad_norm": 1.1411542892456055, + "learning_rate": 6.670200573923852e-06, + "loss": 0.2853, + "step": 20473 + }, + { + "epoch": 0.40985911968570926, + "grad_norm": 1.1575604677200317, + "learning_rate": 6.669895008851445e-06, + "loss": 0.3379, + "step": 20474 + }, + { + "epoch": 0.4098791382028376, + "grad_norm": 1.0363519191741943, + "learning_rate": 6.669589436759158e-06, + "loss": 0.2959, + "step": 20475 + }, + { + "epoch": 0.40989915671996596, + "grad_norm": 1.1582896709442139, + "learning_rate": 6.669283857648278e-06, + "loss": 0.315, + "step": 20476 + }, + { + "epoch": 0.4099191752370943, + "grad_norm": 1.0777277946472168, + "learning_rate": 6.668978271520088e-06, + "loss": 0.3423, + "step": 20477 + }, + { + "epoch": 0.40993919375422266, + "grad_norm": 1.187971830368042, + "learning_rate": 6.668672678375873e-06, + "loss": 0.3119, + "step": 20478 + }, + { + "epoch": 0.409959212271351, + "grad_norm": 1.8491929769515991, + "learning_rate": 6.6683670782169164e-06, + "loss": 0.8066, + "step": 20479 + }, + { + "epoch": 0.40997923078847937, + "grad_norm": 1.1568588018417358, + "learning_rate": 6.6680614710445025e-06, + "loss": 0.3122, + "step": 20480 + }, + { + "epoch": 0.40999924930560766, + "grad_norm": 1.1806789636611938, + "learning_rate": 6.667755856859919e-06, + "loss": 0.3616, + "step": 20481 + }, + { + "epoch": 0.410019267822736, + "grad_norm": 1.0727607011795044, + "learning_rate": 6.6674502356644475e-06, + "loss": 0.3011, + "step": 20482 + }, + { + "epoch": 0.41003928633986436, + "grad_norm": 1.1242127418518066, + "learning_rate": 6.6671446074593746e-06, + "loss": 0.3438, + "step": 20483 + }, + { + "epoch": 0.4100593048569927, + "grad_norm": 1.0771815776824951, + "learning_rate": 6.666838972245986e-06, + "loss": 0.318, + "step": 20484 + }, + { + "epoch": 0.41007932337412106, + "grad_norm": 1.1173877716064453, + "learning_rate": 6.666533330025562e-06, + "loss": 0.2958, + "step": 20485 + }, + { + "epoch": 0.4100993418912494, + "grad_norm": 1.1014636754989624, + "learning_rate": 6.666227680799393e-06, + "loss": 0.3464, + "step": 20486 + }, + { + "epoch": 0.41011936040837776, + "grad_norm": 0.9590832591056824, + "learning_rate": 6.665922024568761e-06, + "loss": 0.2905, + "step": 20487 + }, + { + "epoch": 0.4101393789255061, + "grad_norm": 1.1066676378250122, + "learning_rate": 6.66561636133495e-06, + "loss": 0.2749, + "step": 20488 + }, + { + "epoch": 0.4101593974426344, + "grad_norm": 1.1705042123794556, + "learning_rate": 6.665310691099246e-06, + "loss": 0.3919, + "step": 20489 + }, + { + "epoch": 0.41017941595976276, + "grad_norm": 1.0833429098129272, + "learning_rate": 6.665005013862936e-06, + "loss": 0.3153, + "step": 20490 + }, + { + "epoch": 0.4101994344768911, + "grad_norm": 1.1590410470962524, + "learning_rate": 6.664699329627304e-06, + "loss": 0.307, + "step": 20491 + }, + { + "epoch": 0.41021945299401946, + "grad_norm": 1.0796226263046265, + "learning_rate": 6.6643936383936316e-06, + "loss": 0.3617, + "step": 20492 + }, + { + "epoch": 0.4102394715111478, + "grad_norm": 1.1280343532562256, + "learning_rate": 6.664087940163209e-06, + "loss": 0.2869, + "step": 20493 + }, + { + "epoch": 0.41025949002827616, + "grad_norm": 1.0478694438934326, + "learning_rate": 6.663782234937316e-06, + "loss": 0.3247, + "step": 20494 + }, + { + "epoch": 0.4102795085454045, + "grad_norm": 1.2078678607940674, + "learning_rate": 6.663476522717243e-06, + "loss": 0.3142, + "step": 20495 + }, + { + "epoch": 0.41029952706253286, + "grad_norm": 1.049530029296875, + "learning_rate": 6.663170803504271e-06, + "loss": 0.3391, + "step": 20496 + }, + { + "epoch": 0.41031954557966116, + "grad_norm": 1.244178295135498, + "learning_rate": 6.662865077299688e-06, + "loss": 0.3141, + "step": 20497 + }, + { + "epoch": 0.4103395640967895, + "grad_norm": 1.055254578590393, + "learning_rate": 6.662559344104777e-06, + "loss": 0.3459, + "step": 20498 + }, + { + "epoch": 0.41035958261391786, + "grad_norm": 1.074521780014038, + "learning_rate": 6.6622536039208265e-06, + "loss": 0.2915, + "step": 20499 + }, + { + "epoch": 0.4103796011310462, + "grad_norm": 1.842885136604309, + "learning_rate": 6.661947856749117e-06, + "loss": 0.7436, + "step": 20500 + }, + { + "epoch": 0.41039961964817456, + "grad_norm": 0.9776528477668762, + "learning_rate": 6.661642102590937e-06, + "loss": 0.2772, + "step": 20501 + }, + { + "epoch": 0.4104196381653029, + "grad_norm": 1.3031193017959595, + "learning_rate": 6.661336341447573e-06, + "loss": 0.3334, + "step": 20502 + }, + { + "epoch": 0.41043965668243126, + "grad_norm": 1.1669243574142456, + "learning_rate": 6.6610305733203065e-06, + "loss": 0.3219, + "step": 20503 + }, + { + "epoch": 0.4104596751995596, + "grad_norm": 1.0655138492584229, + "learning_rate": 6.660724798210426e-06, + "loss": 0.2824, + "step": 20504 + }, + { + "epoch": 0.4104796937166879, + "grad_norm": 1.144738793373108, + "learning_rate": 6.660419016119215e-06, + "loss": 0.2972, + "step": 20505 + }, + { + "epoch": 0.41049971223381626, + "grad_norm": 1.0822906494140625, + "learning_rate": 6.6601132270479605e-06, + "loss": 0.3501, + "step": 20506 + }, + { + "epoch": 0.4105197307509446, + "grad_norm": 1.0735410451889038, + "learning_rate": 6.659807430997946e-06, + "loss": 0.365, + "step": 20507 + }, + { + "epoch": 0.41053974926807296, + "grad_norm": 2.1119332313537598, + "learning_rate": 6.65950162797046e-06, + "loss": 0.8362, + "step": 20508 + }, + { + "epoch": 0.4105597677852013, + "grad_norm": 1.2649282217025757, + "learning_rate": 6.659195817966785e-06, + "loss": 0.3089, + "step": 20509 + }, + { + "epoch": 0.41057978630232966, + "grad_norm": 1.166404366493225, + "learning_rate": 6.658890000988209e-06, + "loss": 0.3264, + "step": 20510 + }, + { + "epoch": 0.410599804819458, + "grad_norm": 0.934481143951416, + "learning_rate": 6.658584177036015e-06, + "loss": 0.2966, + "step": 20511 + }, + { + "epoch": 0.41061982333658636, + "grad_norm": 1.1647984981536865, + "learning_rate": 6.658278346111492e-06, + "loss": 0.3314, + "step": 20512 + }, + { + "epoch": 0.41063984185371466, + "grad_norm": 1.0743408203125, + "learning_rate": 6.657972508215922e-06, + "loss": 0.3087, + "step": 20513 + }, + { + "epoch": 0.410659860370843, + "grad_norm": 1.0847930908203125, + "learning_rate": 6.6576666633505925e-06, + "loss": 0.3502, + "step": 20514 + }, + { + "epoch": 0.41067987888797136, + "grad_norm": 1.1575149297714233, + "learning_rate": 6.6573608115167894e-06, + "loss": 0.3386, + "step": 20515 + }, + { + "epoch": 0.4106998974050997, + "grad_norm": 1.802519679069519, + "learning_rate": 6.657054952715798e-06, + "loss": 0.8818, + "step": 20516 + }, + { + "epoch": 0.41071991592222806, + "grad_norm": 1.164729356765747, + "learning_rate": 6.656749086948906e-06, + "loss": 0.2874, + "step": 20517 + }, + { + "epoch": 0.4107399344393564, + "grad_norm": 1.1425756216049194, + "learning_rate": 6.656443214217395e-06, + "loss": 0.3287, + "step": 20518 + }, + { + "epoch": 0.41075995295648476, + "grad_norm": 1.078777551651001, + "learning_rate": 6.656137334522555e-06, + "loss": 0.2873, + "step": 20519 + }, + { + "epoch": 0.4107799714736131, + "grad_norm": 0.9811016321182251, + "learning_rate": 6.655831447865667e-06, + "loss": 0.3129, + "step": 20520 + }, + { + "epoch": 0.4107999899907414, + "grad_norm": 1.1037750244140625, + "learning_rate": 6.6555255542480225e-06, + "loss": 0.3177, + "step": 20521 + }, + { + "epoch": 0.41082000850786976, + "grad_norm": 1.0249992609024048, + "learning_rate": 6.655219653670905e-06, + "loss": 0.3011, + "step": 20522 + }, + { + "epoch": 0.4108400270249981, + "grad_norm": 1.300681471824646, + "learning_rate": 6.654913746135599e-06, + "loss": 0.2871, + "step": 20523 + }, + { + "epoch": 0.41086004554212646, + "grad_norm": 1.0967741012573242, + "learning_rate": 6.6546078316433934e-06, + "loss": 0.2986, + "step": 20524 + }, + { + "epoch": 0.4108800640592548, + "grad_norm": 1.1819056272506714, + "learning_rate": 6.654301910195571e-06, + "loss": 0.2814, + "step": 20525 + }, + { + "epoch": 0.41090008257638316, + "grad_norm": 1.0823743343353271, + "learning_rate": 6.65399598179342e-06, + "loss": 0.33, + "step": 20526 + }, + { + "epoch": 0.4109201010935115, + "grad_norm": 1.1065495014190674, + "learning_rate": 6.653690046438224e-06, + "loss": 0.3379, + "step": 20527 + }, + { + "epoch": 0.41094011961063986, + "grad_norm": 1.2274690866470337, + "learning_rate": 6.6533841041312745e-06, + "loss": 0.3356, + "step": 20528 + }, + { + "epoch": 0.41096013812776816, + "grad_norm": 1.1367756128311157, + "learning_rate": 6.653078154873851e-06, + "loss": 0.3134, + "step": 20529 + }, + { + "epoch": 0.4109801566448965, + "grad_norm": 1.1240825653076172, + "learning_rate": 6.6527721986672445e-06, + "loss": 0.3559, + "step": 20530 + }, + { + "epoch": 0.41100017516202486, + "grad_norm": 1.0839385986328125, + "learning_rate": 6.652466235512738e-06, + "loss": 0.3051, + "step": 20531 + }, + { + "epoch": 0.4110201936791532, + "grad_norm": 1.109150767326355, + "learning_rate": 6.6521602654116205e-06, + "loss": 0.3371, + "step": 20532 + }, + { + "epoch": 0.41104021219628156, + "grad_norm": 1.1247974634170532, + "learning_rate": 6.651854288365176e-06, + "loss": 0.3077, + "step": 20533 + }, + { + "epoch": 0.4110602307134099, + "grad_norm": 1.7633596658706665, + "learning_rate": 6.651548304374691e-06, + "loss": 0.8154, + "step": 20534 + }, + { + "epoch": 0.41108024923053826, + "grad_norm": 1.1152294874191284, + "learning_rate": 6.651242313441454e-06, + "loss": 0.3082, + "step": 20535 + }, + { + "epoch": 0.4111002677476666, + "grad_norm": 1.0183501243591309, + "learning_rate": 6.650936315566747e-06, + "loss": 0.3224, + "step": 20536 + }, + { + "epoch": 0.4111202862647949, + "grad_norm": 1.067954421043396, + "learning_rate": 6.650630310751862e-06, + "loss": 0.2628, + "step": 20537 + }, + { + "epoch": 0.41114030478192326, + "grad_norm": 1.052337408065796, + "learning_rate": 6.650324298998081e-06, + "loss": 0.2913, + "step": 20538 + }, + { + "epoch": 0.4111603232990516, + "grad_norm": 0.9887195825576782, + "learning_rate": 6.650018280306691e-06, + "loss": 0.2993, + "step": 20539 + }, + { + "epoch": 0.41118034181617996, + "grad_norm": 1.2910994291305542, + "learning_rate": 6.64971225467898e-06, + "loss": 0.3372, + "step": 20540 + }, + { + "epoch": 0.4112003603333083, + "grad_norm": 1.8543211221694946, + "learning_rate": 6.649406222116235e-06, + "loss": 0.8771, + "step": 20541 + }, + { + "epoch": 0.41122037885043666, + "grad_norm": 1.2537548542022705, + "learning_rate": 6.649100182619739e-06, + "loss": 0.3167, + "step": 20542 + }, + { + "epoch": 0.411240397367565, + "grad_norm": 1.128232717514038, + "learning_rate": 6.648794136190781e-06, + "loss": 0.3013, + "step": 20543 + }, + { + "epoch": 0.41126041588469336, + "grad_norm": 1.1636284589767456, + "learning_rate": 6.648488082830649e-06, + "loss": 0.3204, + "step": 20544 + }, + { + "epoch": 0.41128043440182166, + "grad_norm": 1.0944993495941162, + "learning_rate": 6.648182022540627e-06, + "loss": 0.3042, + "step": 20545 + }, + { + "epoch": 0.41130045291895, + "grad_norm": 1.2026524543762207, + "learning_rate": 6.647875955322002e-06, + "loss": 0.2896, + "step": 20546 + }, + { + "epoch": 0.41132047143607836, + "grad_norm": 1.018270492553711, + "learning_rate": 6.647569881176062e-06, + "loss": 0.3245, + "step": 20547 + }, + { + "epoch": 0.4113404899532067, + "grad_norm": 1.2236418724060059, + "learning_rate": 6.647263800104091e-06, + "loss": 0.3196, + "step": 20548 + }, + { + "epoch": 0.41136050847033506, + "grad_norm": 1.0589128732681274, + "learning_rate": 6.646957712107379e-06, + "loss": 0.3431, + "step": 20549 + }, + { + "epoch": 0.4113805269874634, + "grad_norm": 1.129240870475769, + "learning_rate": 6.646651617187212e-06, + "loss": 0.3753, + "step": 20550 + }, + { + "epoch": 0.41140054550459176, + "grad_norm": 1.1519429683685303, + "learning_rate": 6.646345515344874e-06, + "loss": 0.3316, + "step": 20551 + }, + { + "epoch": 0.4114205640217201, + "grad_norm": 1.1108372211456299, + "learning_rate": 6.6460394065816545e-06, + "loss": 0.2978, + "step": 20552 + }, + { + "epoch": 0.4114405825388484, + "grad_norm": 1.1701470613479614, + "learning_rate": 6.645733290898839e-06, + "loss": 0.3324, + "step": 20553 + }, + { + "epoch": 0.41146060105597676, + "grad_norm": 1.7502888441085815, + "learning_rate": 6.645427168297715e-06, + "loss": 0.7639, + "step": 20554 + }, + { + "epoch": 0.4114806195731051, + "grad_norm": 1.0863560438156128, + "learning_rate": 6.6451210387795714e-06, + "loss": 0.3106, + "step": 20555 + }, + { + "epoch": 0.41150063809023346, + "grad_norm": 1.2273330688476562, + "learning_rate": 6.644814902345691e-06, + "loss": 0.3273, + "step": 20556 + }, + { + "epoch": 0.4115206566073618, + "grad_norm": 1.114011287689209, + "learning_rate": 6.644508758997365e-06, + "loss": 0.3221, + "step": 20557 + }, + { + "epoch": 0.41154067512449016, + "grad_norm": 1.1742384433746338, + "learning_rate": 6.644202608735874e-06, + "loss": 0.3134, + "step": 20558 + }, + { + "epoch": 0.4115606936416185, + "grad_norm": 1.2049696445465088, + "learning_rate": 6.643896451562511e-06, + "loss": 0.335, + "step": 20559 + }, + { + "epoch": 0.41158071215874686, + "grad_norm": 0.9827166795730591, + "learning_rate": 6.643590287478562e-06, + "loss": 0.2923, + "step": 20560 + }, + { + "epoch": 0.41160073067587516, + "grad_norm": 1.0580862760543823, + "learning_rate": 6.6432841164853134e-06, + "loss": 0.3054, + "step": 20561 + }, + { + "epoch": 0.4116207491930035, + "grad_norm": 1.021668791770935, + "learning_rate": 6.6429779385840495e-06, + "loss": 0.2986, + "step": 20562 + }, + { + "epoch": 0.41164076771013186, + "grad_norm": 1.0863792896270752, + "learning_rate": 6.642671753776063e-06, + "loss": 0.2856, + "step": 20563 + }, + { + "epoch": 0.4116607862272602, + "grad_norm": 1.0897228717803955, + "learning_rate": 6.642365562062636e-06, + "loss": 0.3525, + "step": 20564 + }, + { + "epoch": 0.41168080474438856, + "grad_norm": 1.145656943321228, + "learning_rate": 6.642059363445058e-06, + "loss": 0.3516, + "step": 20565 + }, + { + "epoch": 0.4117008232615169, + "grad_norm": 0.9835890531539917, + "learning_rate": 6.641753157924616e-06, + "loss": 0.308, + "step": 20566 + }, + { + "epoch": 0.41172084177864526, + "grad_norm": 1.0759673118591309, + "learning_rate": 6.641446945502596e-06, + "loss": 0.3075, + "step": 20567 + }, + { + "epoch": 0.4117408602957736, + "grad_norm": 1.13641357421875, + "learning_rate": 6.641140726180289e-06, + "loss": 0.2779, + "step": 20568 + }, + { + "epoch": 0.4117608788129019, + "grad_norm": 1.1028374433517456, + "learning_rate": 6.640834499958977e-06, + "loss": 0.3312, + "step": 20569 + }, + { + "epoch": 0.41178089733003026, + "grad_norm": 1.1476699113845825, + "learning_rate": 6.640528266839952e-06, + "loss": 0.3646, + "step": 20570 + }, + { + "epoch": 0.4118009158471586, + "grad_norm": 1.057440996170044, + "learning_rate": 6.640222026824496e-06, + "loss": 0.2875, + "step": 20571 + }, + { + "epoch": 0.41182093436428696, + "grad_norm": 1.043630838394165, + "learning_rate": 6.639915779913902e-06, + "loss": 0.2841, + "step": 20572 + }, + { + "epoch": 0.4118409528814153, + "grad_norm": 1.1739808320999146, + "learning_rate": 6.6396095261094536e-06, + "loss": 0.3541, + "step": 20573 + }, + { + "epoch": 0.41186097139854366, + "grad_norm": 1.0413459539413452, + "learning_rate": 6.639303265412442e-06, + "loss": 0.3238, + "step": 20574 + }, + { + "epoch": 0.411880989915672, + "grad_norm": 1.1213648319244385, + "learning_rate": 6.638996997824151e-06, + "loss": 0.3324, + "step": 20575 + }, + { + "epoch": 0.41190100843280036, + "grad_norm": 1.1434435844421387, + "learning_rate": 6.638690723345869e-06, + "loss": 0.362, + "step": 20576 + }, + { + "epoch": 0.41192102694992866, + "grad_norm": 1.1741310358047485, + "learning_rate": 6.638384441978883e-06, + "loss": 0.3513, + "step": 20577 + }, + { + "epoch": 0.411941045467057, + "grad_norm": 1.8823143243789673, + "learning_rate": 6.638078153724482e-06, + "loss": 0.8565, + "step": 20578 + }, + { + "epoch": 0.41196106398418536, + "grad_norm": 1.021926760673523, + "learning_rate": 6.637771858583954e-06, + "loss": 0.288, + "step": 20579 + }, + { + "epoch": 0.4119810825013137, + "grad_norm": 0.9635242223739624, + "learning_rate": 6.637465556558585e-06, + "loss": 0.3245, + "step": 20580 + }, + { + "epoch": 0.41200110101844206, + "grad_norm": 1.1625593900680542, + "learning_rate": 6.637159247649663e-06, + "loss": 0.3178, + "step": 20581 + }, + { + "epoch": 0.4120211195355704, + "grad_norm": 1.0831496715545654, + "learning_rate": 6.636852931858475e-06, + "loss": 0.3237, + "step": 20582 + }, + { + "epoch": 0.41204113805269876, + "grad_norm": 1.1852906942367554, + "learning_rate": 6.6365466091863116e-06, + "loss": 0.3008, + "step": 20583 + }, + { + "epoch": 0.4120611565698271, + "grad_norm": 1.058032751083374, + "learning_rate": 6.636240279634456e-06, + "loss": 0.2818, + "step": 20584 + }, + { + "epoch": 0.4120811750869554, + "grad_norm": 1.1142942905426025, + "learning_rate": 6.6359339432042004e-06, + "loss": 0.3149, + "step": 20585 + }, + { + "epoch": 0.41210119360408376, + "grad_norm": 1.0748906135559082, + "learning_rate": 6.63562759989683e-06, + "loss": 0.3002, + "step": 20586 + }, + { + "epoch": 0.4121212121212121, + "grad_norm": 1.108162760734558, + "learning_rate": 6.635321249713632e-06, + "loss": 0.3053, + "step": 20587 + }, + { + "epoch": 0.41214123063834046, + "grad_norm": 1.2705966234207153, + "learning_rate": 6.635014892655898e-06, + "loss": 0.4017, + "step": 20588 + }, + { + "epoch": 0.4121612491554688, + "grad_norm": 1.095794916152954, + "learning_rate": 6.63470852872491e-06, + "loss": 0.2775, + "step": 20589 + }, + { + "epoch": 0.41218126767259716, + "grad_norm": 1.0553559064865112, + "learning_rate": 6.634402157921961e-06, + "loss": 0.3281, + "step": 20590 + }, + { + "epoch": 0.4122012861897255, + "grad_norm": 1.100097417831421, + "learning_rate": 6.634095780248337e-06, + "loss": 0.2725, + "step": 20591 + }, + { + "epoch": 0.41222130470685386, + "grad_norm": 1.0812398195266724, + "learning_rate": 6.633789395705326e-06, + "loss": 0.3423, + "step": 20592 + }, + { + "epoch": 0.41224132322398216, + "grad_norm": 1.2325881719589233, + "learning_rate": 6.633483004294214e-06, + "loss": 0.3602, + "step": 20593 + }, + { + "epoch": 0.4122613417411105, + "grad_norm": 1.393776774406433, + "learning_rate": 6.633176606016294e-06, + "loss": 0.3475, + "step": 20594 + }, + { + "epoch": 0.41228136025823886, + "grad_norm": 1.1968878507614136, + "learning_rate": 6.6328702008728505e-06, + "loss": 0.3239, + "step": 20595 + }, + { + "epoch": 0.4123013787753672, + "grad_norm": 1.8852535486221313, + "learning_rate": 6.6325637888651715e-06, + "loss": 0.7469, + "step": 20596 + }, + { + "epoch": 0.41232139729249556, + "grad_norm": 1.1885648965835571, + "learning_rate": 6.6322573699945444e-06, + "loss": 0.285, + "step": 20597 + }, + { + "epoch": 0.4123414158096239, + "grad_norm": 1.0925800800323486, + "learning_rate": 6.63195094426226e-06, + "loss": 0.2912, + "step": 20598 + }, + { + "epoch": 0.41236143432675226, + "grad_norm": 1.1370081901550293, + "learning_rate": 6.6316445116696045e-06, + "loss": 0.3348, + "step": 20599 + }, + { + "epoch": 0.4123814528438806, + "grad_norm": 1.0432679653167725, + "learning_rate": 6.631338072217867e-06, + "loss": 0.3225, + "step": 20600 + }, + { + "epoch": 0.4124014713610089, + "grad_norm": 1.1008741855621338, + "learning_rate": 6.631031625908335e-06, + "loss": 0.3142, + "step": 20601 + }, + { + "epoch": 0.41242148987813726, + "grad_norm": 2.0590097904205322, + "learning_rate": 6.6307251727422975e-06, + "loss": 0.7906, + "step": 20602 + }, + { + "epoch": 0.4124415083952656, + "grad_norm": 1.0144363641738892, + "learning_rate": 6.63041871272104e-06, + "loss": 0.3226, + "step": 20603 + }, + { + "epoch": 0.41246152691239396, + "grad_norm": 1.190917730331421, + "learning_rate": 6.630112245845855e-06, + "loss": 0.3001, + "step": 20604 + }, + { + "epoch": 0.4124815454295223, + "grad_norm": 1.0615458488464355, + "learning_rate": 6.62980577211803e-06, + "loss": 0.3287, + "step": 20605 + }, + { + "epoch": 0.41250156394665066, + "grad_norm": 1.0728223323822021, + "learning_rate": 6.629499291538853e-06, + "loss": 0.325, + "step": 20606 + }, + { + "epoch": 0.412521582463779, + "grad_norm": 1.0555297136306763, + "learning_rate": 6.629192804109608e-06, + "loss": 0.3085, + "step": 20607 + }, + { + "epoch": 0.41254160098090736, + "grad_norm": 1.8316181898117065, + "learning_rate": 6.62888630983159e-06, + "loss": 0.7966, + "step": 20608 + }, + { + "epoch": 0.41256161949803566, + "grad_norm": 1.152024507522583, + "learning_rate": 6.628579808706084e-06, + "loss": 0.3092, + "step": 20609 + }, + { + "epoch": 0.412581638015164, + "grad_norm": 1.100285530090332, + "learning_rate": 6.628273300734378e-06, + "loss": 0.3151, + "step": 20610 + }, + { + "epoch": 0.41260165653229236, + "grad_norm": 1.0749168395996094, + "learning_rate": 6.627966785917761e-06, + "loss": 0.2693, + "step": 20611 + }, + { + "epoch": 0.4126216750494207, + "grad_norm": 1.1649532318115234, + "learning_rate": 6.6276602642575225e-06, + "loss": 0.3126, + "step": 20612 + }, + { + "epoch": 0.41264169356654906, + "grad_norm": 1.1236963272094727, + "learning_rate": 6.627353735754951e-06, + "loss": 0.3736, + "step": 20613 + }, + { + "epoch": 0.4126617120836774, + "grad_norm": 1.1458414793014526, + "learning_rate": 6.627047200411335e-06, + "loss": 0.2855, + "step": 20614 + }, + { + "epoch": 0.41268173060080576, + "grad_norm": 1.0185446739196777, + "learning_rate": 6.626740658227962e-06, + "loss": 0.3475, + "step": 20615 + }, + { + "epoch": 0.4127017491179341, + "grad_norm": 1.0559378862380981, + "learning_rate": 6.626434109206121e-06, + "loss": 0.3132, + "step": 20616 + }, + { + "epoch": 0.4127217676350624, + "grad_norm": 1.0912874937057495, + "learning_rate": 6.6261275533471025e-06, + "loss": 0.3279, + "step": 20617 + }, + { + "epoch": 0.41274178615219076, + "grad_norm": 1.080432415008545, + "learning_rate": 6.625820990652192e-06, + "loss": 0.3567, + "step": 20618 + }, + { + "epoch": 0.4127618046693191, + "grad_norm": 1.2358742952346802, + "learning_rate": 6.625514421122682e-06, + "loss": 0.3287, + "step": 20619 + }, + { + "epoch": 0.41278182318644746, + "grad_norm": 1.082903504371643, + "learning_rate": 6.625207844759858e-06, + "loss": 0.2873, + "step": 20620 + }, + { + "epoch": 0.4128018417035758, + "grad_norm": 1.2250187397003174, + "learning_rate": 6.62490126156501e-06, + "loss": 0.3099, + "step": 20621 + }, + { + "epoch": 0.41282186022070416, + "grad_norm": 1.0948338508605957, + "learning_rate": 6.624594671539426e-06, + "loss": 0.3225, + "step": 20622 + }, + { + "epoch": 0.4128418787378325, + "grad_norm": 1.0195590257644653, + "learning_rate": 6.624288074684395e-06, + "loss": 0.3036, + "step": 20623 + }, + { + "epoch": 0.41286189725496086, + "grad_norm": 1.4845752716064453, + "learning_rate": 6.6239814710012085e-06, + "loss": 0.3029, + "step": 20624 + }, + { + "epoch": 0.41288191577208916, + "grad_norm": 1.062045693397522, + "learning_rate": 6.6236748604911526e-06, + "loss": 0.3, + "step": 20625 + }, + { + "epoch": 0.4129019342892175, + "grad_norm": 1.0325567722320557, + "learning_rate": 6.623368243155517e-06, + "loss": 0.3051, + "step": 20626 + }, + { + "epoch": 0.41292195280634586, + "grad_norm": 1.0604336261749268, + "learning_rate": 6.623061618995591e-06, + "loss": 0.3304, + "step": 20627 + }, + { + "epoch": 0.4129419713234742, + "grad_norm": 2.0681345462799072, + "learning_rate": 6.622754988012663e-06, + "loss": 0.8272, + "step": 20628 + }, + { + "epoch": 0.41296198984060256, + "grad_norm": 1.2470078468322754, + "learning_rate": 6.622448350208022e-06, + "loss": 0.3249, + "step": 20629 + }, + { + "epoch": 0.4129820083577309, + "grad_norm": 1.0393391847610474, + "learning_rate": 6.622141705582957e-06, + "loss": 0.2873, + "step": 20630 + }, + { + "epoch": 0.41300202687485926, + "grad_norm": 1.082250952720642, + "learning_rate": 6.621835054138758e-06, + "loss": 0.265, + "step": 20631 + }, + { + "epoch": 0.4130220453919876, + "grad_norm": 1.0679869651794434, + "learning_rate": 6.621528395876714e-06, + "loss": 0.4, + "step": 20632 + }, + { + "epoch": 0.4130420639091159, + "grad_norm": 1.8010120391845703, + "learning_rate": 6.621221730798112e-06, + "loss": 0.8036, + "step": 20633 + }, + { + "epoch": 0.41306208242624426, + "grad_norm": 0.9925661683082581, + "learning_rate": 6.620915058904245e-06, + "loss": 0.3201, + "step": 20634 + }, + { + "epoch": 0.4130821009433726, + "grad_norm": 1.1330277919769287, + "learning_rate": 6.620608380196398e-06, + "loss": 0.3256, + "step": 20635 + }, + { + "epoch": 0.41310211946050096, + "grad_norm": 1.0285873413085938, + "learning_rate": 6.6203016946758615e-06, + "loss": 0.3028, + "step": 20636 + }, + { + "epoch": 0.4131221379776293, + "grad_norm": 1.0567708015441895, + "learning_rate": 6.619995002343927e-06, + "loss": 0.3385, + "step": 20637 + }, + { + "epoch": 0.41314215649475766, + "grad_norm": 1.825225830078125, + "learning_rate": 6.619688303201881e-06, + "loss": 0.8102, + "step": 20638 + }, + { + "epoch": 0.413162175011886, + "grad_norm": 1.1855504512786865, + "learning_rate": 6.619381597251015e-06, + "loss": 0.3072, + "step": 20639 + }, + { + "epoch": 0.41318219352901436, + "grad_norm": 1.1897687911987305, + "learning_rate": 6.619074884492618e-06, + "loss": 0.3425, + "step": 20640 + }, + { + "epoch": 0.41320221204614266, + "grad_norm": 1.8860074281692505, + "learning_rate": 6.6187681649279765e-06, + "loss": 0.7796, + "step": 20641 + }, + { + "epoch": 0.413222230563271, + "grad_norm": 1.138933539390564, + "learning_rate": 6.618461438558382e-06, + "loss": 0.2727, + "step": 20642 + }, + { + "epoch": 0.41324224908039936, + "grad_norm": 1.0312553644180298, + "learning_rate": 6.6181547053851255e-06, + "loss": 0.3012, + "step": 20643 + }, + { + "epoch": 0.4132622675975277, + "grad_norm": 1.052506923675537, + "learning_rate": 6.617847965409494e-06, + "loss": 0.2865, + "step": 20644 + }, + { + "epoch": 0.41328228611465606, + "grad_norm": 1.1085232496261597, + "learning_rate": 6.617541218632779e-06, + "loss": 0.337, + "step": 20645 + }, + { + "epoch": 0.4133023046317844, + "grad_norm": 1.176244854927063, + "learning_rate": 6.617234465056267e-06, + "loss": 0.3442, + "step": 20646 + }, + { + "epoch": 0.41332232314891276, + "grad_norm": 1.0877755880355835, + "learning_rate": 6.616927704681251e-06, + "loss": 0.2988, + "step": 20647 + }, + { + "epoch": 0.4133423416660411, + "grad_norm": 1.198339819908142, + "learning_rate": 6.616620937509017e-06, + "loss": 0.3353, + "step": 20648 + }, + { + "epoch": 0.4133623601831694, + "grad_norm": 1.0874381065368652, + "learning_rate": 6.616314163540858e-06, + "loss": 0.3077, + "step": 20649 + }, + { + "epoch": 0.41338237870029776, + "grad_norm": 1.2461013793945312, + "learning_rate": 6.616007382778062e-06, + "loss": 0.359, + "step": 20650 + }, + { + "epoch": 0.4134023972174261, + "grad_norm": 1.0841041803359985, + "learning_rate": 6.615700595221919e-06, + "loss": 0.2871, + "step": 20651 + }, + { + "epoch": 0.41342241573455446, + "grad_norm": 1.0762124061584473, + "learning_rate": 6.615393800873718e-06, + "loss": 0.3423, + "step": 20652 + }, + { + "epoch": 0.4134424342516828, + "grad_norm": 1.214251160621643, + "learning_rate": 6.615086999734748e-06, + "loss": 0.2844, + "step": 20653 + }, + { + "epoch": 0.41346245276881116, + "grad_norm": 1.1178585290908813, + "learning_rate": 6.6147801918063e-06, + "loss": 0.2891, + "step": 20654 + }, + { + "epoch": 0.4134824712859395, + "grad_norm": 1.2031525373458862, + "learning_rate": 6.614473377089664e-06, + "loss": 0.2929, + "step": 20655 + }, + { + "epoch": 0.41350248980306786, + "grad_norm": 1.9676389694213867, + "learning_rate": 6.61416655558613e-06, + "loss": 0.77, + "step": 20656 + }, + { + "epoch": 0.41352250832019616, + "grad_norm": 1.1173175573349, + "learning_rate": 6.613859727296987e-06, + "loss": 0.3393, + "step": 20657 + }, + { + "epoch": 0.4135425268373245, + "grad_norm": 1.1427463293075562, + "learning_rate": 6.613552892223525e-06, + "loss": 0.3015, + "step": 20658 + }, + { + "epoch": 0.41356254535445286, + "grad_norm": 1.2184256315231323, + "learning_rate": 6.613246050367035e-06, + "loss": 0.3356, + "step": 20659 + }, + { + "epoch": 0.4135825638715812, + "grad_norm": 1.2924138307571411, + "learning_rate": 6.612939201728804e-06, + "loss": 0.3625, + "step": 20660 + }, + { + "epoch": 0.41360258238870956, + "grad_norm": 1.1679770946502686, + "learning_rate": 6.6126323463101236e-06, + "loss": 0.3248, + "step": 20661 + }, + { + "epoch": 0.4136226009058379, + "grad_norm": 1.0392884016036987, + "learning_rate": 6.612325484112284e-06, + "loss": 0.2788, + "step": 20662 + }, + { + "epoch": 0.41364261942296626, + "grad_norm": 1.023164987564087, + "learning_rate": 6.612018615136576e-06, + "loss": 0.2902, + "step": 20663 + }, + { + "epoch": 0.4136626379400946, + "grad_norm": 1.7226568460464478, + "learning_rate": 6.611711739384288e-06, + "loss": 0.7856, + "step": 20664 + }, + { + "epoch": 0.4136826564572229, + "grad_norm": 1.1749210357666016, + "learning_rate": 6.611404856856711e-06, + "loss": 0.3202, + "step": 20665 + }, + { + "epoch": 0.41370267497435126, + "grad_norm": 1.061653733253479, + "learning_rate": 6.6110979675551344e-06, + "loss": 0.326, + "step": 20666 + }, + { + "epoch": 0.4137226934914796, + "grad_norm": 1.0368750095367432, + "learning_rate": 6.6107910714808485e-06, + "loss": 0.3077, + "step": 20667 + }, + { + "epoch": 0.41374271200860796, + "grad_norm": 1.0997430086135864, + "learning_rate": 6.610484168635145e-06, + "loss": 0.3191, + "step": 20668 + }, + { + "epoch": 0.4137627305257363, + "grad_norm": 1.102817177772522, + "learning_rate": 6.61017725901931e-06, + "loss": 0.3074, + "step": 20669 + }, + { + "epoch": 0.41378274904286466, + "grad_norm": 1.2115451097488403, + "learning_rate": 6.609870342634639e-06, + "loss": 0.3387, + "step": 20670 + }, + { + "epoch": 0.413802767559993, + "grad_norm": 0.9938542246818542, + "learning_rate": 6.609563419482419e-06, + "loss": 0.2616, + "step": 20671 + }, + { + "epoch": 0.41382278607712136, + "grad_norm": 1.1255558729171753, + "learning_rate": 6.60925648956394e-06, + "loss": 0.3197, + "step": 20672 + }, + { + "epoch": 0.41384280459424966, + "grad_norm": 1.3310186862945557, + "learning_rate": 6.608949552880492e-06, + "loss": 0.3623, + "step": 20673 + }, + { + "epoch": 0.413862823111378, + "grad_norm": 1.1954669952392578, + "learning_rate": 6.608642609433368e-06, + "loss": 0.3494, + "step": 20674 + }, + { + "epoch": 0.41388284162850636, + "grad_norm": 1.1295514106750488, + "learning_rate": 6.608335659223855e-06, + "loss": 0.3124, + "step": 20675 + }, + { + "epoch": 0.4139028601456347, + "grad_norm": 1.9890117645263672, + "learning_rate": 6.608028702253248e-06, + "loss": 0.775, + "step": 20676 + }, + { + "epoch": 0.41392287866276306, + "grad_norm": 1.0625708103179932, + "learning_rate": 6.607721738522832e-06, + "loss": 0.2927, + "step": 20677 + }, + { + "epoch": 0.4139428971798914, + "grad_norm": 1.045030117034912, + "learning_rate": 6.607414768033901e-06, + "loss": 0.328, + "step": 20678 + }, + { + "epoch": 0.41396291569701976, + "grad_norm": 1.1427433490753174, + "learning_rate": 6.607107790787744e-06, + "loss": 0.3226, + "step": 20679 + }, + { + "epoch": 0.4139829342141481, + "grad_norm": 1.8480452299118042, + "learning_rate": 6.606800806785651e-06, + "loss": 0.8264, + "step": 20680 + }, + { + "epoch": 0.4140029527312764, + "grad_norm": 1.0841037034988403, + "learning_rate": 6.606493816028914e-06, + "loss": 0.2348, + "step": 20681 + }, + { + "epoch": 0.41402297124840476, + "grad_norm": 1.0258952379226685, + "learning_rate": 6.60618681851882e-06, + "loss": 0.2846, + "step": 20682 + }, + { + "epoch": 0.4140429897655331, + "grad_norm": 1.873455286026001, + "learning_rate": 6.605879814256666e-06, + "loss": 0.7998, + "step": 20683 + }, + { + "epoch": 0.41406300828266146, + "grad_norm": 1.2659298181533813, + "learning_rate": 6.605572803243738e-06, + "loss": 0.3082, + "step": 20684 + }, + { + "epoch": 0.4140830267997898, + "grad_norm": 1.1983182430267334, + "learning_rate": 6.6052657854813264e-06, + "loss": 0.3127, + "step": 20685 + }, + { + "epoch": 0.41410304531691816, + "grad_norm": 1.115139126777649, + "learning_rate": 6.604958760970723e-06, + "loss": 0.3275, + "step": 20686 + }, + { + "epoch": 0.4141230638340465, + "grad_norm": 1.0351537466049194, + "learning_rate": 6.604651729713219e-06, + "loss": 0.3313, + "step": 20687 + }, + { + "epoch": 0.41414308235117486, + "grad_norm": 1.074131727218628, + "learning_rate": 6.6043446917101025e-06, + "loss": 0.3129, + "step": 20688 + }, + { + "epoch": 0.41416310086830316, + "grad_norm": 1.0933563709259033, + "learning_rate": 6.604037646962668e-06, + "loss": 0.3405, + "step": 20689 + }, + { + "epoch": 0.4141831193854315, + "grad_norm": 1.9905221462249756, + "learning_rate": 6.6037305954722066e-06, + "loss": 0.8083, + "step": 20690 + }, + { + "epoch": 0.41420313790255986, + "grad_norm": 1.1638755798339844, + "learning_rate": 6.603423537240004e-06, + "loss": 0.3151, + "step": 20691 + }, + { + "epoch": 0.4142231564196882, + "grad_norm": 1.0337862968444824, + "learning_rate": 6.603116472267355e-06, + "loss": 0.2811, + "step": 20692 + }, + { + "epoch": 0.41424317493681656, + "grad_norm": 1.0884572267532349, + "learning_rate": 6.602809400555547e-06, + "loss": 0.308, + "step": 20693 + }, + { + "epoch": 0.4142631934539449, + "grad_norm": 1.137969970703125, + "learning_rate": 6.6025023221058745e-06, + "loss": 0.3311, + "step": 20694 + }, + { + "epoch": 0.41428321197107326, + "grad_norm": 1.0688806772232056, + "learning_rate": 6.602195236919629e-06, + "loss": 0.3085, + "step": 20695 + }, + { + "epoch": 0.4143032304882016, + "grad_norm": 0.9737642407417297, + "learning_rate": 6.6018881449980975e-06, + "loss": 0.2876, + "step": 20696 + }, + { + "epoch": 0.4143232490053299, + "grad_norm": 1.1527060270309448, + "learning_rate": 6.601581046342574e-06, + "loss": 0.3823, + "step": 20697 + }, + { + "epoch": 0.41434326752245826, + "grad_norm": 1.2486473321914673, + "learning_rate": 6.601273940954348e-06, + "loss": 0.3255, + "step": 20698 + }, + { + "epoch": 0.4143632860395866, + "grad_norm": 1.1278338432312012, + "learning_rate": 6.600966828834711e-06, + "loss": 0.3075, + "step": 20699 + }, + { + "epoch": 0.41438330455671496, + "grad_norm": 1.0414423942565918, + "learning_rate": 6.600659709984953e-06, + "loss": 0.3134, + "step": 20700 + }, + { + "epoch": 0.4144033230738433, + "grad_norm": 2.109200954437256, + "learning_rate": 6.600352584406368e-06, + "loss": 0.8585, + "step": 20701 + }, + { + "epoch": 0.41442334159097166, + "grad_norm": 1.1220779418945312, + "learning_rate": 6.6000454521002434e-06, + "loss": 0.3355, + "step": 20702 + }, + { + "epoch": 0.4144433601081, + "grad_norm": 1.0433247089385986, + "learning_rate": 6.599738313067874e-06, + "loss": 0.3019, + "step": 20703 + }, + { + "epoch": 0.41446337862522836, + "grad_norm": 2.062011241912842, + "learning_rate": 6.599431167310547e-06, + "loss": 0.7483, + "step": 20704 + }, + { + "epoch": 0.41448339714235666, + "grad_norm": 1.941503882408142, + "learning_rate": 6.599124014829556e-06, + "loss": 0.7104, + "step": 20705 + }, + { + "epoch": 0.414503415659485, + "grad_norm": 1.2645761966705322, + "learning_rate": 6.598816855626191e-06, + "loss": 0.3016, + "step": 20706 + }, + { + "epoch": 0.41452343417661336, + "grad_norm": 1.123175859451294, + "learning_rate": 6.598509689701747e-06, + "loss": 0.2965, + "step": 20707 + }, + { + "epoch": 0.4145434526937417, + "grad_norm": 1.3594028949737549, + "learning_rate": 6.59820251705751e-06, + "loss": 0.3496, + "step": 20708 + }, + { + "epoch": 0.41456347121087006, + "grad_norm": 1.1212655305862427, + "learning_rate": 6.597895337694774e-06, + "loss": 0.3434, + "step": 20709 + }, + { + "epoch": 0.4145834897279984, + "grad_norm": 1.2089043855667114, + "learning_rate": 6.597588151614831e-06, + "loss": 0.2846, + "step": 20710 + }, + { + "epoch": 0.41460350824512676, + "grad_norm": 1.1130584478378296, + "learning_rate": 6.597280958818969e-06, + "loss": 0.3286, + "step": 20711 + }, + { + "epoch": 0.4146235267622551, + "grad_norm": 1.1140272617340088, + "learning_rate": 6.5969737593084825e-06, + "loss": 0.3412, + "step": 20712 + }, + { + "epoch": 0.4146435452793834, + "grad_norm": 0.9753885269165039, + "learning_rate": 6.596666553084662e-06, + "loss": 0.3098, + "step": 20713 + }, + { + "epoch": 0.41466356379651176, + "grad_norm": 1.9478520154953003, + "learning_rate": 6.5963593401487995e-06, + "loss": 0.7608, + "step": 20714 + }, + { + "epoch": 0.4146835823136401, + "grad_norm": 1.0858550071716309, + "learning_rate": 6.596052120502186e-06, + "loss": 0.3124, + "step": 20715 + }, + { + "epoch": 0.41470360083076846, + "grad_norm": 1.1106715202331543, + "learning_rate": 6.595744894146112e-06, + "loss": 0.3607, + "step": 20716 + }, + { + "epoch": 0.4147236193478968, + "grad_norm": 1.164435863494873, + "learning_rate": 6.59543766108187e-06, + "loss": 0.3559, + "step": 20717 + }, + { + "epoch": 0.41474363786502516, + "grad_norm": 1.9327243566513062, + "learning_rate": 6.595130421310752e-06, + "loss": 0.8069, + "step": 20718 + }, + { + "epoch": 0.4147636563821535, + "grad_norm": 2.006722927093506, + "learning_rate": 6.594823174834047e-06, + "loss": 0.8632, + "step": 20719 + }, + { + "epoch": 0.41478367489928186, + "grad_norm": 1.151660680770874, + "learning_rate": 6.594515921653051e-06, + "loss": 0.3081, + "step": 20720 + }, + { + "epoch": 0.41480369341641016, + "grad_norm": 2.072300434112549, + "learning_rate": 6.594208661769053e-06, + "loss": 0.904, + "step": 20721 + }, + { + "epoch": 0.4148237119335385, + "grad_norm": 1.0246747732162476, + "learning_rate": 6.593901395183344e-06, + "loss": 0.2821, + "step": 20722 + }, + { + "epoch": 0.41484373045066686, + "grad_norm": 1.1803159713745117, + "learning_rate": 6.593594121897216e-06, + "loss": 0.3403, + "step": 20723 + }, + { + "epoch": 0.4148637489677952, + "grad_norm": 1.0717803239822388, + "learning_rate": 6.593286841911962e-06, + "loss": 0.2988, + "step": 20724 + }, + { + "epoch": 0.41488376748492356, + "grad_norm": 1.1034739017486572, + "learning_rate": 6.592979555228873e-06, + "loss": 0.2984, + "step": 20725 + }, + { + "epoch": 0.4149037860020519, + "grad_norm": 2.0387282371520996, + "learning_rate": 6.59267226184924e-06, + "loss": 0.7444, + "step": 20726 + }, + { + "epoch": 0.41492380451918026, + "grad_norm": 1.1684598922729492, + "learning_rate": 6.5923649617743576e-06, + "loss": 0.3174, + "step": 20727 + }, + { + "epoch": 0.4149438230363086, + "grad_norm": 1.0445375442504883, + "learning_rate": 6.5920576550055135e-06, + "loss": 0.2676, + "step": 20728 + }, + { + "epoch": 0.4149638415534369, + "grad_norm": 1.083380103111267, + "learning_rate": 6.591750341544003e-06, + "loss": 0.3437, + "step": 20729 + }, + { + "epoch": 0.41498386007056526, + "grad_norm": 1.016215205192566, + "learning_rate": 6.591443021391116e-06, + "loss": 0.2688, + "step": 20730 + }, + { + "epoch": 0.4150038785876936, + "grad_norm": 1.005715250968933, + "learning_rate": 6.591135694548145e-06, + "loss": 0.2986, + "step": 20731 + }, + { + "epoch": 0.41502389710482196, + "grad_norm": 1.0458170175552368, + "learning_rate": 6.590828361016383e-06, + "loss": 0.3399, + "step": 20732 + }, + { + "epoch": 0.4150439156219503, + "grad_norm": 1.2187563180923462, + "learning_rate": 6.590521020797118e-06, + "loss": 0.3018, + "step": 20733 + }, + { + "epoch": 0.41506393413907866, + "grad_norm": 1.1206879615783691, + "learning_rate": 6.590213673891648e-06, + "loss": 0.3504, + "step": 20734 + }, + { + "epoch": 0.415083952656207, + "grad_norm": 0.957551896572113, + "learning_rate": 6.58990632030126e-06, + "loss": 0.2847, + "step": 20735 + }, + { + "epoch": 0.41510397117333536, + "grad_norm": 1.1043891906738281, + "learning_rate": 6.58959896002725e-06, + "loss": 0.3096, + "step": 20736 + }, + { + "epoch": 0.41512398969046366, + "grad_norm": 1.2212588787078857, + "learning_rate": 6.589291593070907e-06, + "loss": 0.2907, + "step": 20737 + }, + { + "epoch": 0.415144008207592, + "grad_norm": 1.2102460861206055, + "learning_rate": 6.588984219433523e-06, + "loss": 0.3451, + "step": 20738 + }, + { + "epoch": 0.41516402672472036, + "grad_norm": 1.7878600358963013, + "learning_rate": 6.588676839116392e-06, + "loss": 0.799, + "step": 20739 + }, + { + "epoch": 0.4151840452418487, + "grad_norm": 1.3047205209732056, + "learning_rate": 6.588369452120806e-06, + "loss": 0.2931, + "step": 20740 + }, + { + "epoch": 0.41520406375897706, + "grad_norm": 1.0280567407608032, + "learning_rate": 6.5880620584480574e-06, + "loss": 0.2487, + "step": 20741 + }, + { + "epoch": 0.4152240822761054, + "grad_norm": 1.0876301527023315, + "learning_rate": 6.587754658099437e-06, + "loss": 0.306, + "step": 20742 + }, + { + "epoch": 0.41524410079323376, + "grad_norm": 1.0821491479873657, + "learning_rate": 6.587447251076237e-06, + "loss": 0.2987, + "step": 20743 + }, + { + "epoch": 0.4152641193103621, + "grad_norm": 1.0130332708358765, + "learning_rate": 6.58713983737975e-06, + "loss": 0.3099, + "step": 20744 + }, + { + "epoch": 0.4152841378274904, + "grad_norm": 1.123857021331787, + "learning_rate": 6.586832417011269e-06, + "loss": 0.3185, + "step": 20745 + }, + { + "epoch": 0.41530415634461876, + "grad_norm": 1.1402981281280518, + "learning_rate": 6.5865249899720875e-06, + "loss": 0.3043, + "step": 20746 + }, + { + "epoch": 0.4153241748617471, + "grad_norm": 1.1574606895446777, + "learning_rate": 6.5862175562634945e-06, + "loss": 0.338, + "step": 20747 + }, + { + "epoch": 0.41534419337887546, + "grad_norm": 1.1507335901260376, + "learning_rate": 6.585910115886785e-06, + "loss": 0.3396, + "step": 20748 + }, + { + "epoch": 0.4153642118960038, + "grad_norm": 1.1886086463928223, + "learning_rate": 6.585602668843252e-06, + "loss": 0.3318, + "step": 20749 + }, + { + "epoch": 0.41538423041313216, + "grad_norm": 1.194498062133789, + "learning_rate": 6.585295215134184e-06, + "loss": 0.3203, + "step": 20750 + }, + { + "epoch": 0.4154042489302605, + "grad_norm": 1.060951590538025, + "learning_rate": 6.584987754760878e-06, + "loss": 0.3195, + "step": 20751 + }, + { + "epoch": 0.41542426744738886, + "grad_norm": 1.8965808153152466, + "learning_rate": 6.584680287724624e-06, + "loss": 0.8018, + "step": 20752 + }, + { + "epoch": 0.41544428596451716, + "grad_norm": 1.1062517166137695, + "learning_rate": 6.584372814026715e-06, + "loss": 0.3016, + "step": 20753 + }, + { + "epoch": 0.4154643044816455, + "grad_norm": 1.0498007535934448, + "learning_rate": 6.584065333668443e-06, + "loss": 0.3315, + "step": 20754 + }, + { + "epoch": 0.41548432299877386, + "grad_norm": 1.2121433019638062, + "learning_rate": 6.583757846651102e-06, + "loss": 0.3375, + "step": 20755 + }, + { + "epoch": 0.4155043415159022, + "grad_norm": 1.1418880224227905, + "learning_rate": 6.583450352975985e-06, + "loss": 0.307, + "step": 20756 + }, + { + "epoch": 0.41552436003303056, + "grad_norm": 1.1680387258529663, + "learning_rate": 6.583142852644382e-06, + "loss": 0.3457, + "step": 20757 + }, + { + "epoch": 0.4155443785501589, + "grad_norm": 1.1269042491912842, + "learning_rate": 6.582835345657587e-06, + "loss": 0.3374, + "step": 20758 + }, + { + "epoch": 0.41556439706728726, + "grad_norm": 1.1806769371032715, + "learning_rate": 6.582527832016893e-06, + "loss": 0.3586, + "step": 20759 + }, + { + "epoch": 0.4155844155844156, + "grad_norm": 1.1552748680114746, + "learning_rate": 6.582220311723593e-06, + "loss": 0.351, + "step": 20760 + }, + { + "epoch": 0.4156044341015439, + "grad_norm": 1.2118290662765503, + "learning_rate": 6.581912784778979e-06, + "loss": 0.3596, + "step": 20761 + }, + { + "epoch": 0.41562445261867226, + "grad_norm": 1.1111674308776855, + "learning_rate": 6.581605251184344e-06, + "loss": 0.2986, + "step": 20762 + }, + { + "epoch": 0.4156444711358006, + "grad_norm": 1.1085474491119385, + "learning_rate": 6.581297710940981e-06, + "loss": 0.3169, + "step": 20763 + }, + { + "epoch": 0.41566448965292896, + "grad_norm": 1.0419412851333618, + "learning_rate": 6.580990164050183e-06, + "loss": 0.3308, + "step": 20764 + }, + { + "epoch": 0.4156845081700573, + "grad_norm": 1.1675355434417725, + "learning_rate": 6.5806826105132424e-06, + "loss": 0.3363, + "step": 20765 + }, + { + "epoch": 0.41570452668718566, + "grad_norm": 1.0807437896728516, + "learning_rate": 6.580375050331452e-06, + "loss": 0.2915, + "step": 20766 + }, + { + "epoch": 0.415724545204314, + "grad_norm": 1.917924404144287, + "learning_rate": 6.580067483506106e-06, + "loss": 0.8185, + "step": 20767 + }, + { + "epoch": 0.41574456372144236, + "grad_norm": 1.1010668277740479, + "learning_rate": 6.579759910038496e-06, + "loss": 0.3561, + "step": 20768 + }, + { + "epoch": 0.41576458223857066, + "grad_norm": 1.0704357624053955, + "learning_rate": 6.579452329929915e-06, + "loss": 0.2951, + "step": 20769 + }, + { + "epoch": 0.415784600755699, + "grad_norm": 1.0801278352737427, + "learning_rate": 6.579144743181656e-06, + "loss": 0.307, + "step": 20770 + }, + { + "epoch": 0.41580461927282736, + "grad_norm": 1.1291108131408691, + "learning_rate": 6.578837149795012e-06, + "loss": 0.306, + "step": 20771 + }, + { + "epoch": 0.4158246377899557, + "grad_norm": 1.1373794078826904, + "learning_rate": 6.578529549771276e-06, + "loss": 0.3221, + "step": 20772 + }, + { + "epoch": 0.41584465630708406, + "grad_norm": 1.1220670938491821, + "learning_rate": 6.578221943111744e-06, + "loss": 0.3053, + "step": 20773 + }, + { + "epoch": 0.4158646748242124, + "grad_norm": 1.0988578796386719, + "learning_rate": 6.577914329817705e-06, + "loss": 0.3402, + "step": 20774 + }, + { + "epoch": 0.41588469334134076, + "grad_norm": 0.9206014275550842, + "learning_rate": 6.5776067098904536e-06, + "loss": 0.2987, + "step": 20775 + }, + { + "epoch": 0.4159047118584691, + "grad_norm": 1.0380115509033203, + "learning_rate": 6.577299083331282e-06, + "loss": 0.2844, + "step": 20776 + }, + { + "epoch": 0.4159247303755974, + "grad_norm": 1.1398663520812988, + "learning_rate": 6.576991450141486e-06, + "loss": 0.3203, + "step": 20777 + }, + { + "epoch": 0.41594474889272576, + "grad_norm": 1.1204519271850586, + "learning_rate": 6.5766838103223574e-06, + "loss": 0.3173, + "step": 20778 + }, + { + "epoch": 0.4159647674098541, + "grad_norm": 1.2404801845550537, + "learning_rate": 6.576376163875189e-06, + "loss": 0.3882, + "step": 20779 + }, + { + "epoch": 0.41598478592698246, + "grad_norm": 1.1132938861846924, + "learning_rate": 6.5760685108012745e-06, + "loss": 0.3344, + "step": 20780 + }, + { + "epoch": 0.4160048044441108, + "grad_norm": 1.1985304355621338, + "learning_rate": 6.575760851101907e-06, + "loss": 0.2774, + "step": 20781 + }, + { + "epoch": 0.41602482296123916, + "grad_norm": 1.2475441694259644, + "learning_rate": 6.5754531847783805e-06, + "loss": 0.3453, + "step": 20782 + }, + { + "epoch": 0.4160448414783675, + "grad_norm": 1.0966651439666748, + "learning_rate": 6.5751455118319875e-06, + "loss": 0.2915, + "step": 20783 + }, + { + "epoch": 0.4160648599954958, + "grad_norm": 1.0872383117675781, + "learning_rate": 6.57483783226402e-06, + "loss": 0.2861, + "step": 20784 + }, + { + "epoch": 0.41608487851262416, + "grad_norm": 1.1971169710159302, + "learning_rate": 6.574530146075776e-06, + "loss": 0.3014, + "step": 20785 + }, + { + "epoch": 0.4161048970297525, + "grad_norm": 1.140273928642273, + "learning_rate": 6.574222453268545e-06, + "loss": 0.331, + "step": 20786 + }, + { + "epoch": 0.41612491554688086, + "grad_norm": 1.111523985862732, + "learning_rate": 6.573914753843622e-06, + "loss": 0.3595, + "step": 20787 + }, + { + "epoch": 0.4161449340640092, + "grad_norm": 1.1556987762451172, + "learning_rate": 6.573607047802299e-06, + "loss": 0.3246, + "step": 20788 + }, + { + "epoch": 0.41616495258113756, + "grad_norm": 1.882944107055664, + "learning_rate": 6.573299335145871e-06, + "loss": 0.8451, + "step": 20789 + }, + { + "epoch": 0.4161849710982659, + "grad_norm": 0.9994077086448669, + "learning_rate": 6.5729916158756315e-06, + "loss": 0.3443, + "step": 20790 + }, + { + "epoch": 0.41620498961539426, + "grad_norm": 1.0238028764724731, + "learning_rate": 6.5726838899928735e-06, + "loss": 0.3367, + "step": 20791 + }, + { + "epoch": 0.41622500813252256, + "grad_norm": 1.192835807800293, + "learning_rate": 6.572376157498891e-06, + "loss": 0.3389, + "step": 20792 + }, + { + "epoch": 0.4162450266496509, + "grad_norm": 1.1399844884872437, + "learning_rate": 6.572068418394978e-06, + "loss": 0.3171, + "step": 20793 + }, + { + "epoch": 0.41626504516677926, + "grad_norm": 1.10110342502594, + "learning_rate": 6.571760672682428e-06, + "loss": 0.356, + "step": 20794 + }, + { + "epoch": 0.4162850636839076, + "grad_norm": 1.7632776498794556, + "learning_rate": 6.5714529203625335e-06, + "loss": 0.8542, + "step": 20795 + }, + { + "epoch": 0.41630508220103596, + "grad_norm": 1.091758131980896, + "learning_rate": 6.571145161436588e-06, + "loss": 0.2907, + "step": 20796 + }, + { + "epoch": 0.4163251007181643, + "grad_norm": 1.0079052448272705, + "learning_rate": 6.570837395905888e-06, + "loss": 0.2606, + "step": 20797 + }, + { + "epoch": 0.41634511923529266, + "grad_norm": 1.0393918752670288, + "learning_rate": 6.570529623771725e-06, + "loss": 0.3076, + "step": 20798 + }, + { + "epoch": 0.416365137752421, + "grad_norm": 1.0669723749160767, + "learning_rate": 6.570221845035394e-06, + "loss": 0.3327, + "step": 20799 + }, + { + "epoch": 0.4163851562695493, + "grad_norm": 1.1578398942947388, + "learning_rate": 6.569914059698188e-06, + "loss": 0.3323, + "step": 20800 + }, + { + "epoch": 0.41640517478667766, + "grad_norm": 1.0680763721466064, + "learning_rate": 6.569606267761401e-06, + "loss": 0.3067, + "step": 20801 + }, + { + "epoch": 0.416425193303806, + "grad_norm": 1.1177678108215332, + "learning_rate": 6.5692984692263266e-06, + "loss": 0.3355, + "step": 20802 + }, + { + "epoch": 0.41644521182093436, + "grad_norm": 1.1525908708572388, + "learning_rate": 6.568990664094259e-06, + "loss": 0.3106, + "step": 20803 + }, + { + "epoch": 0.4164652303380627, + "grad_norm": 1.475157380104065, + "learning_rate": 6.568682852366494e-06, + "loss": 0.3037, + "step": 20804 + }, + { + "epoch": 0.41648524885519106, + "grad_norm": 1.2153682708740234, + "learning_rate": 6.568375034044323e-06, + "loss": 0.313, + "step": 20805 + }, + { + "epoch": 0.4165052673723194, + "grad_norm": 1.0877379179000854, + "learning_rate": 6.568067209129041e-06, + "loss": 0.306, + "step": 20806 + }, + { + "epoch": 0.41652528588944776, + "grad_norm": 1.0944676399230957, + "learning_rate": 6.567759377621942e-06, + "loss": 0.3356, + "step": 20807 + }, + { + "epoch": 0.41654530440657606, + "grad_norm": 1.1198604106903076, + "learning_rate": 6.567451539524319e-06, + "loss": 0.295, + "step": 20808 + }, + { + "epoch": 0.4165653229237044, + "grad_norm": 1.8084428310394287, + "learning_rate": 6.567143694837468e-06, + "loss": 0.8056, + "step": 20809 + }, + { + "epoch": 0.41658534144083276, + "grad_norm": 1.0149059295654297, + "learning_rate": 6.56683584356268e-06, + "loss": 0.3304, + "step": 20810 + }, + { + "epoch": 0.4166053599579611, + "grad_norm": 1.9854172468185425, + "learning_rate": 6.566527985701254e-06, + "loss": 0.7693, + "step": 20811 + }, + { + "epoch": 0.41662537847508946, + "grad_norm": 1.2562254667282104, + "learning_rate": 6.56622012125448e-06, + "loss": 0.3262, + "step": 20812 + }, + { + "epoch": 0.4166453969922178, + "grad_norm": 1.0507310628890991, + "learning_rate": 6.565912250223655e-06, + "loss": 0.2935, + "step": 20813 + }, + { + "epoch": 0.41666541550934616, + "grad_norm": 1.1397427320480347, + "learning_rate": 6.56560437261007e-06, + "loss": 0.356, + "step": 20814 + }, + { + "epoch": 0.4166854340264745, + "grad_norm": 1.1152451038360596, + "learning_rate": 6.565296488415023e-06, + "loss": 0.2846, + "step": 20815 + }, + { + "epoch": 0.4167054525436028, + "grad_norm": 1.1528838872909546, + "learning_rate": 6.564988597639806e-06, + "loss": 0.3261, + "step": 20816 + }, + { + "epoch": 0.41672547106073116, + "grad_norm": 1.0345128774642944, + "learning_rate": 6.5646807002857126e-06, + "loss": 0.3017, + "step": 20817 + }, + { + "epoch": 0.4167454895778595, + "grad_norm": 1.146034598350525, + "learning_rate": 6.5643727963540405e-06, + "loss": 0.3549, + "step": 20818 + }, + { + "epoch": 0.41676550809498786, + "grad_norm": 1.1668179035186768, + "learning_rate": 6.56406488584608e-06, + "loss": 0.3057, + "step": 20819 + }, + { + "epoch": 0.4167855266121162, + "grad_norm": 1.1818492412567139, + "learning_rate": 6.563756968763128e-06, + "loss": 0.3043, + "step": 20820 + }, + { + "epoch": 0.41680554512924456, + "grad_norm": 1.1866331100463867, + "learning_rate": 6.5634490451064784e-06, + "loss": 0.3472, + "step": 20821 + }, + { + "epoch": 0.4168255636463729, + "grad_norm": 1.0987569093704224, + "learning_rate": 6.563141114877423e-06, + "loss": 0.3433, + "step": 20822 + }, + { + "epoch": 0.41684558216350126, + "grad_norm": 1.0506945848464966, + "learning_rate": 6.562833178077261e-06, + "loss": 0.3034, + "step": 20823 + }, + { + "epoch": 0.41686560068062956, + "grad_norm": 1.1891158819198608, + "learning_rate": 6.5625252347072845e-06, + "loss": 0.2956, + "step": 20824 + }, + { + "epoch": 0.4168856191977579, + "grad_norm": 1.336018681526184, + "learning_rate": 6.562217284768789e-06, + "loss": 0.3348, + "step": 20825 + }, + { + "epoch": 0.41690563771488626, + "grad_norm": 1.1026251316070557, + "learning_rate": 6.561909328263068e-06, + "loss": 0.3739, + "step": 20826 + }, + { + "epoch": 0.4169256562320146, + "grad_norm": 1.4659463167190552, + "learning_rate": 6.561601365191415e-06, + "loss": 0.3658, + "step": 20827 + }, + { + "epoch": 0.41694567474914296, + "grad_norm": 1.3562736511230469, + "learning_rate": 6.561293395555125e-06, + "loss": 0.3126, + "step": 20828 + }, + { + "epoch": 0.4169656932662713, + "grad_norm": 1.1531599760055542, + "learning_rate": 6.560985419355496e-06, + "loss": 0.3117, + "step": 20829 + }, + { + "epoch": 0.41698571178339966, + "grad_norm": 1.081055998802185, + "learning_rate": 6.560677436593819e-06, + "loss": 0.2983, + "step": 20830 + }, + { + "epoch": 0.417005730300528, + "grad_norm": 1.0230222940444946, + "learning_rate": 6.560369447271391e-06, + "loss": 0.2662, + "step": 20831 + }, + { + "epoch": 0.4170257488176563, + "grad_norm": 0.9887229800224304, + "learning_rate": 6.560061451389503e-06, + "loss": 0.2991, + "step": 20832 + }, + { + "epoch": 0.41704576733478466, + "grad_norm": 1.171218752861023, + "learning_rate": 6.559753448949455e-06, + "loss": 0.3423, + "step": 20833 + }, + { + "epoch": 0.417065785851913, + "grad_norm": 0.9954811334609985, + "learning_rate": 6.559445439952539e-06, + "loss": 0.2998, + "step": 20834 + }, + { + "epoch": 0.41708580436904136, + "grad_norm": 1.0861375331878662, + "learning_rate": 6.559137424400048e-06, + "loss": 0.3467, + "step": 20835 + }, + { + "epoch": 0.4171058228861697, + "grad_norm": 1.0550850629806519, + "learning_rate": 6.55882940229328e-06, + "loss": 0.3186, + "step": 20836 + }, + { + "epoch": 0.41712584140329806, + "grad_norm": 1.1353957653045654, + "learning_rate": 6.558521373633529e-06, + "loss": 0.3134, + "step": 20837 + }, + { + "epoch": 0.4171458599204264, + "grad_norm": 1.0495257377624512, + "learning_rate": 6.55821333842209e-06, + "loss": 0.3122, + "step": 20838 + }, + { + "epoch": 0.41716587843755476, + "grad_norm": 1.2412337064743042, + "learning_rate": 6.557905296660257e-06, + "loss": 0.3445, + "step": 20839 + }, + { + "epoch": 0.41718589695468306, + "grad_norm": 1.800039529800415, + "learning_rate": 6.557597248349326e-06, + "loss": 0.8506, + "step": 20840 + }, + { + "epoch": 0.4172059154718114, + "grad_norm": 1.178924560546875, + "learning_rate": 6.557289193490589e-06, + "loss": 0.3388, + "step": 20841 + }, + { + "epoch": 0.41722593398893976, + "grad_norm": 1.1548047065734863, + "learning_rate": 6.556981132085346e-06, + "loss": 0.3284, + "step": 20842 + }, + { + "epoch": 0.4172459525060681, + "grad_norm": 1.2663809061050415, + "learning_rate": 6.556673064134887e-06, + "loss": 0.3528, + "step": 20843 + }, + { + "epoch": 0.41726597102319646, + "grad_norm": 1.1391279697418213, + "learning_rate": 6.556364989640512e-06, + "loss": 0.3449, + "step": 20844 + }, + { + "epoch": 0.4172859895403248, + "grad_norm": 1.1320627927780151, + "learning_rate": 6.5560569086035116e-06, + "loss": 0.2886, + "step": 20845 + }, + { + "epoch": 0.41730600805745316, + "grad_norm": 1.1765391826629639, + "learning_rate": 6.555748821025184e-06, + "loss": 0.3269, + "step": 20846 + }, + { + "epoch": 0.4173260265745815, + "grad_norm": 1.0412242412567139, + "learning_rate": 6.555440726906823e-06, + "loss": 0.3072, + "step": 20847 + }, + { + "epoch": 0.4173460450917098, + "grad_norm": 1.1330115795135498, + "learning_rate": 6.555132626249723e-06, + "loss": 0.3264, + "step": 20848 + }, + { + "epoch": 0.41736606360883816, + "grad_norm": 1.0106972455978394, + "learning_rate": 6.554824519055181e-06, + "loss": 0.3142, + "step": 20849 + }, + { + "epoch": 0.4173860821259665, + "grad_norm": 1.1646075248718262, + "learning_rate": 6.5545164053244905e-06, + "loss": 0.3248, + "step": 20850 + }, + { + "epoch": 0.41740610064309486, + "grad_norm": 1.8687444925308228, + "learning_rate": 6.554208285058949e-06, + "loss": 0.8311, + "step": 20851 + }, + { + "epoch": 0.4174261191602232, + "grad_norm": 1.0939996242523193, + "learning_rate": 6.553900158259849e-06, + "loss": 0.2786, + "step": 20852 + }, + { + "epoch": 0.41744613767735156, + "grad_norm": 1.3218142986297607, + "learning_rate": 6.553592024928487e-06, + "loss": 0.3578, + "step": 20853 + }, + { + "epoch": 0.4174661561944799, + "grad_norm": 2.0524582862854004, + "learning_rate": 6.553283885066159e-06, + "loss": 0.8121, + "step": 20854 + }, + { + "epoch": 0.41748617471160826, + "grad_norm": 1.076926589012146, + "learning_rate": 6.55297573867416e-06, + "loss": 0.375, + "step": 20855 + }, + { + "epoch": 0.41750619322873656, + "grad_norm": 1.89002525806427, + "learning_rate": 6.552667585753786e-06, + "loss": 0.7863, + "step": 20856 + }, + { + "epoch": 0.4175262117458649, + "grad_norm": 1.0826292037963867, + "learning_rate": 6.55235942630633e-06, + "loss": 0.3011, + "step": 20857 + }, + { + "epoch": 0.41754623026299326, + "grad_norm": 1.2296510934829712, + "learning_rate": 6.55205126033309e-06, + "loss": 0.3268, + "step": 20858 + }, + { + "epoch": 0.4175662487801216, + "grad_norm": 1.1541827917099, + "learning_rate": 6.55174308783536e-06, + "loss": 0.3325, + "step": 20859 + }, + { + "epoch": 0.41758626729724996, + "grad_norm": 1.1570227146148682, + "learning_rate": 6.551434908814435e-06, + "loss": 0.2873, + "step": 20860 + }, + { + "epoch": 0.4176062858143783, + "grad_norm": 1.1731566190719604, + "learning_rate": 6.5511267232716124e-06, + "loss": 0.2806, + "step": 20861 + }, + { + "epoch": 0.41762630433150666, + "grad_norm": 1.1198909282684326, + "learning_rate": 6.5508185312081884e-06, + "loss": 0.3334, + "step": 20862 + }, + { + "epoch": 0.417646322848635, + "grad_norm": 1.0770214796066284, + "learning_rate": 6.550510332625455e-06, + "loss": 0.2679, + "step": 20863 + }, + { + "epoch": 0.4176663413657633, + "grad_norm": 2.134288787841797, + "learning_rate": 6.550202127524712e-06, + "loss": 0.9066, + "step": 20864 + }, + { + "epoch": 0.41768635988289166, + "grad_norm": 1.0447497367858887, + "learning_rate": 6.549893915907251e-06, + "loss": 0.3263, + "step": 20865 + }, + { + "epoch": 0.41770637840002, + "grad_norm": 1.063062310218811, + "learning_rate": 6.54958569777437e-06, + "loss": 0.3206, + "step": 20866 + }, + { + "epoch": 0.41772639691714836, + "grad_norm": 1.0812981128692627, + "learning_rate": 6.549277473127364e-06, + "loss": 0.3436, + "step": 20867 + }, + { + "epoch": 0.4177464154342767, + "grad_norm": 1.2085338830947876, + "learning_rate": 6.5489692419675285e-06, + "loss": 0.3679, + "step": 20868 + }, + { + "epoch": 0.41776643395140506, + "grad_norm": 1.0429174900054932, + "learning_rate": 6.548661004296162e-06, + "loss": 0.3248, + "step": 20869 + }, + { + "epoch": 0.4177864524685334, + "grad_norm": 1.1039131879806519, + "learning_rate": 6.548352760114556e-06, + "loss": 0.273, + "step": 20870 + }, + { + "epoch": 0.41780647098566176, + "grad_norm": 1.1813019514083862, + "learning_rate": 6.548044509424007e-06, + "loss": 0.3744, + "step": 20871 + }, + { + "epoch": 0.41782648950279005, + "grad_norm": 1.193861722946167, + "learning_rate": 6.547736252225814e-06, + "loss": 0.2768, + "step": 20872 + }, + { + "epoch": 0.4178465080199184, + "grad_norm": 1.2639954090118408, + "learning_rate": 6.54742798852127e-06, + "loss": 0.3021, + "step": 20873 + }, + { + "epoch": 0.41786652653704676, + "grad_norm": 1.0994253158569336, + "learning_rate": 6.547119718311672e-06, + "loss": 0.3662, + "step": 20874 + }, + { + "epoch": 0.4178865450541751, + "grad_norm": 1.207269310951233, + "learning_rate": 6.546811441598315e-06, + "loss": 0.2901, + "step": 20875 + }, + { + "epoch": 0.41790656357130346, + "grad_norm": 1.0074595212936401, + "learning_rate": 6.546503158382497e-06, + "loss": 0.3455, + "step": 20876 + }, + { + "epoch": 0.4179265820884318, + "grad_norm": 1.167709469795227, + "learning_rate": 6.546194868665512e-06, + "loss": 0.2964, + "step": 20877 + }, + { + "epoch": 0.41794660060556016, + "grad_norm": 1.1473582983016968, + "learning_rate": 6.5458865724486575e-06, + "loss": 0.3044, + "step": 20878 + }, + { + "epoch": 0.4179666191226885, + "grad_norm": 1.198248267173767, + "learning_rate": 6.545578269733226e-06, + "loss": 0.3096, + "step": 20879 + }, + { + "epoch": 0.4179866376398168, + "grad_norm": 1.9140766859054565, + "learning_rate": 6.545269960520517e-06, + "loss": 0.8184, + "step": 20880 + }, + { + "epoch": 0.41800665615694516, + "grad_norm": 1.0444982051849365, + "learning_rate": 6.544961644811826e-06, + "loss": 0.3093, + "step": 20881 + }, + { + "epoch": 0.4180266746740735, + "grad_norm": 1.4881625175476074, + "learning_rate": 6.544653322608449e-06, + "loss": 0.2909, + "step": 20882 + }, + { + "epoch": 0.41804669319120186, + "grad_norm": 1.0139293670654297, + "learning_rate": 6.544344993911682e-06, + "loss": 0.2852, + "step": 20883 + }, + { + "epoch": 0.4180667117083302, + "grad_norm": 1.0911685228347778, + "learning_rate": 6.544036658722821e-06, + "loss": 0.3873, + "step": 20884 + }, + { + "epoch": 0.41808673022545856, + "grad_norm": 1.056626796722412, + "learning_rate": 6.543728317043161e-06, + "loss": 0.3477, + "step": 20885 + }, + { + "epoch": 0.4181067487425869, + "grad_norm": 1.039591908454895, + "learning_rate": 6.543419968874001e-06, + "loss": 0.2896, + "step": 20886 + }, + { + "epoch": 0.41812676725971526, + "grad_norm": 1.117364525794983, + "learning_rate": 6.543111614216635e-06, + "loss": 0.3066, + "step": 20887 + }, + { + "epoch": 0.41814678577684355, + "grad_norm": 1.1963837146759033, + "learning_rate": 6.542803253072359e-06, + "loss": 0.3601, + "step": 20888 + }, + { + "epoch": 0.4181668042939719, + "grad_norm": 1.1190439462661743, + "learning_rate": 6.542494885442471e-06, + "loss": 0.3336, + "step": 20889 + }, + { + "epoch": 0.41818682281110026, + "grad_norm": 1.0980455875396729, + "learning_rate": 6.542186511328266e-06, + "loss": 0.3602, + "step": 20890 + }, + { + "epoch": 0.4182068413282286, + "grad_norm": 1.2129173278808594, + "learning_rate": 6.541878130731041e-06, + "loss": 0.36, + "step": 20891 + }, + { + "epoch": 0.41822685984535696, + "grad_norm": 1.0562801361083984, + "learning_rate": 6.541569743652091e-06, + "loss": 0.3077, + "step": 20892 + }, + { + "epoch": 0.4182468783624853, + "grad_norm": 0.9558702111244202, + "learning_rate": 6.5412613500927155e-06, + "loss": 0.2497, + "step": 20893 + }, + { + "epoch": 0.41826689687961366, + "grad_norm": 1.0984948873519897, + "learning_rate": 6.540952950054207e-06, + "loss": 0.323, + "step": 20894 + }, + { + "epoch": 0.418286915396742, + "grad_norm": 1.1378748416900635, + "learning_rate": 6.540644543537866e-06, + "loss": 0.3233, + "step": 20895 + }, + { + "epoch": 0.4183069339138703, + "grad_norm": 1.818744421005249, + "learning_rate": 6.540336130544986e-06, + "loss": 0.8161, + "step": 20896 + }, + { + "epoch": 0.41832695243099866, + "grad_norm": 1.1069300174713135, + "learning_rate": 6.540027711076865e-06, + "loss": 0.3082, + "step": 20897 + }, + { + "epoch": 0.418346970948127, + "grad_norm": 1.2052537202835083, + "learning_rate": 6.539719285134798e-06, + "loss": 0.3191, + "step": 20898 + }, + { + "epoch": 0.41836698946525536, + "grad_norm": 1.069012999534607, + "learning_rate": 6.539410852720082e-06, + "loss": 0.3083, + "step": 20899 + }, + { + "epoch": 0.4183870079823837, + "grad_norm": 1.2015246152877808, + "learning_rate": 6.539102413834016e-06, + "loss": 0.3068, + "step": 20900 + }, + { + "epoch": 0.41840702649951206, + "grad_norm": 1.1262117624282837, + "learning_rate": 6.538793968477892e-06, + "loss": 0.3, + "step": 20901 + }, + { + "epoch": 0.4184270450166404, + "grad_norm": 1.7840850353240967, + "learning_rate": 6.538485516653011e-06, + "loss": 0.8286, + "step": 20902 + }, + { + "epoch": 0.41844706353376876, + "grad_norm": 1.1987426280975342, + "learning_rate": 6.538177058360668e-06, + "loss": 0.3765, + "step": 20903 + }, + { + "epoch": 0.41846708205089705, + "grad_norm": 1.1084797382354736, + "learning_rate": 6.53786859360216e-06, + "loss": 0.3431, + "step": 20904 + }, + { + "epoch": 0.4184871005680254, + "grad_norm": 1.8523130416870117, + "learning_rate": 6.537560122378781e-06, + "loss": 0.8501, + "step": 20905 + }, + { + "epoch": 0.41850711908515376, + "grad_norm": 1.1623036861419678, + "learning_rate": 6.537251644691833e-06, + "loss": 0.3293, + "step": 20906 + }, + { + "epoch": 0.4185271376022821, + "grad_norm": 1.0752615928649902, + "learning_rate": 6.536943160542608e-06, + "loss": 0.2804, + "step": 20907 + }, + { + "epoch": 0.41854715611941046, + "grad_norm": 1.058870792388916, + "learning_rate": 6.536634669932407e-06, + "loss": 0.2597, + "step": 20908 + }, + { + "epoch": 0.4185671746365388, + "grad_norm": 1.0542311668395996, + "learning_rate": 6.5363261728625236e-06, + "loss": 0.307, + "step": 20909 + }, + { + "epoch": 0.41858719315366716, + "grad_norm": 1.1155656576156616, + "learning_rate": 6.536017669334254e-06, + "loss": 0.3034, + "step": 20910 + }, + { + "epoch": 0.4186072116707955, + "grad_norm": 1.061899185180664, + "learning_rate": 6.535709159348898e-06, + "loss": 0.3021, + "step": 20911 + }, + { + "epoch": 0.4186272301879238, + "grad_norm": 1.1957136392593384, + "learning_rate": 6.53540064290775e-06, + "loss": 0.3073, + "step": 20912 + }, + { + "epoch": 0.41864724870505216, + "grad_norm": 0.9874668717384338, + "learning_rate": 6.53509212001211e-06, + "loss": 0.2633, + "step": 20913 + }, + { + "epoch": 0.4186672672221805, + "grad_norm": 1.0573679208755493, + "learning_rate": 6.534783590663271e-06, + "loss": 0.2996, + "step": 20914 + }, + { + "epoch": 0.41868728573930886, + "grad_norm": 1.1948045492172241, + "learning_rate": 6.534475054862535e-06, + "loss": 0.3611, + "step": 20915 + }, + { + "epoch": 0.4187073042564372, + "grad_norm": 2.015850782394409, + "learning_rate": 6.534166512611193e-06, + "loss": 0.7674, + "step": 20916 + }, + { + "epoch": 0.41872732277356556, + "grad_norm": 1.1063786745071411, + "learning_rate": 6.533857963910547e-06, + "loss": 0.272, + "step": 20917 + }, + { + "epoch": 0.4187473412906939, + "grad_norm": 1.242722511291504, + "learning_rate": 6.53354940876189e-06, + "loss": 0.2456, + "step": 20918 + }, + { + "epoch": 0.41876735980782226, + "grad_norm": 1.285360336303711, + "learning_rate": 6.533240847166524e-06, + "loss": 0.319, + "step": 20919 + }, + { + "epoch": 0.41878737832495055, + "grad_norm": 1.1154322624206543, + "learning_rate": 6.532932279125743e-06, + "loss": 0.3699, + "step": 20920 + }, + { + "epoch": 0.4188073968420789, + "grad_norm": 1.041009545326233, + "learning_rate": 6.5326237046408435e-06, + "loss": 0.2996, + "step": 20921 + }, + { + "epoch": 0.41882741535920726, + "grad_norm": 1.0244089365005493, + "learning_rate": 6.532315123713125e-06, + "loss": 0.2913, + "step": 20922 + }, + { + "epoch": 0.4188474338763356, + "grad_norm": 1.051214575767517, + "learning_rate": 6.532006536343882e-06, + "loss": 0.2936, + "step": 20923 + }, + { + "epoch": 0.41886745239346396, + "grad_norm": 1.1641159057617188, + "learning_rate": 6.531697942534414e-06, + "loss": 0.3327, + "step": 20924 + }, + { + "epoch": 0.4188874709105923, + "grad_norm": 2.098973274230957, + "learning_rate": 6.531389342286016e-06, + "loss": 0.8426, + "step": 20925 + }, + { + "epoch": 0.41890748942772066, + "grad_norm": 1.2655781507492065, + "learning_rate": 6.531080735599989e-06, + "loss": 0.2988, + "step": 20926 + }, + { + "epoch": 0.418927507944849, + "grad_norm": 1.168069839477539, + "learning_rate": 6.530772122477628e-06, + "loss": 0.3237, + "step": 20927 + }, + { + "epoch": 0.4189475264619773, + "grad_norm": 1.8578640222549438, + "learning_rate": 6.5304635029202305e-06, + "loss": 0.8048, + "step": 20928 + }, + { + "epoch": 0.41896754497910565, + "grad_norm": 1.100696086883545, + "learning_rate": 6.530154876929093e-06, + "loss": 0.3176, + "step": 20929 + }, + { + "epoch": 0.418987563496234, + "grad_norm": 0.9929807782173157, + "learning_rate": 6.529846244505514e-06, + "loss": 0.3053, + "step": 20930 + }, + { + "epoch": 0.41900758201336236, + "grad_norm": 1.1099662780761719, + "learning_rate": 6.529537605650789e-06, + "loss": 0.2922, + "step": 20931 + }, + { + "epoch": 0.4190276005304907, + "grad_norm": 1.1124168634414673, + "learning_rate": 6.529228960366217e-06, + "loss": 0.3386, + "step": 20932 + }, + { + "epoch": 0.41904761904761906, + "grad_norm": 1.1305265426635742, + "learning_rate": 6.528920308653099e-06, + "loss": 0.3139, + "step": 20933 + }, + { + "epoch": 0.4190676375647474, + "grad_norm": 1.087395429611206, + "learning_rate": 6.528611650512726e-06, + "loss": 0.3039, + "step": 20934 + }, + { + "epoch": 0.41908765608187576, + "grad_norm": 1.015376329421997, + "learning_rate": 6.5283029859464e-06, + "loss": 0.3203, + "step": 20935 + }, + { + "epoch": 0.41910767459900405, + "grad_norm": 1.096664309501648, + "learning_rate": 6.527994314955416e-06, + "loss": 0.3102, + "step": 20936 + }, + { + "epoch": 0.4191276931161324, + "grad_norm": 1.2249256372451782, + "learning_rate": 6.5276856375410724e-06, + "loss": 0.2998, + "step": 20937 + }, + { + "epoch": 0.41914771163326076, + "grad_norm": 1.1902856826782227, + "learning_rate": 6.527376953704668e-06, + "loss": 0.3345, + "step": 20938 + }, + { + "epoch": 0.4191677301503891, + "grad_norm": 1.0985219478607178, + "learning_rate": 6.5270682634475005e-06, + "loss": 0.2983, + "step": 20939 + }, + { + "epoch": 0.41918774866751746, + "grad_norm": 1.1088379621505737, + "learning_rate": 6.526759566770866e-06, + "loss": 0.3139, + "step": 20940 + }, + { + "epoch": 0.4192077671846458, + "grad_norm": 1.1747651100158691, + "learning_rate": 6.526450863676063e-06, + "loss": 0.292, + "step": 20941 + }, + { + "epoch": 0.41922778570177416, + "grad_norm": 1.096779227256775, + "learning_rate": 6.52614215416439e-06, + "loss": 0.3682, + "step": 20942 + }, + { + "epoch": 0.4192478042189025, + "grad_norm": 1.000592827796936, + "learning_rate": 6.525833438237141e-06, + "loss": 0.3432, + "step": 20943 + }, + { + "epoch": 0.4192678227360308, + "grad_norm": 1.108679175376892, + "learning_rate": 6.525524715895618e-06, + "loss": 0.3047, + "step": 20944 + }, + { + "epoch": 0.41928784125315915, + "grad_norm": 1.150536060333252, + "learning_rate": 6.525215987141118e-06, + "loss": 0.3478, + "step": 20945 + }, + { + "epoch": 0.4193078597702875, + "grad_norm": 0.9932423830032349, + "learning_rate": 6.524907251974938e-06, + "loss": 0.2867, + "step": 20946 + }, + { + "epoch": 0.41932787828741586, + "grad_norm": 1.1176549196243286, + "learning_rate": 6.524598510398376e-06, + "loss": 0.3321, + "step": 20947 + }, + { + "epoch": 0.4193478968045442, + "grad_norm": 1.118972659111023, + "learning_rate": 6.52428976241273e-06, + "loss": 0.3258, + "step": 20948 + }, + { + "epoch": 0.41936791532167256, + "grad_norm": 0.9814110398292542, + "learning_rate": 6.5239810080192965e-06, + "loss": 0.2844, + "step": 20949 + }, + { + "epoch": 0.4193879338388009, + "grad_norm": 1.0652875900268555, + "learning_rate": 6.523672247219376e-06, + "loss": 0.3368, + "step": 20950 + }, + { + "epoch": 0.41940795235592926, + "grad_norm": 0.9855347871780396, + "learning_rate": 6.523363480014266e-06, + "loss": 0.3036, + "step": 20951 + }, + { + "epoch": 0.41942797087305755, + "grad_norm": 1.0987870693206787, + "learning_rate": 6.523054706405263e-06, + "loss": 0.3059, + "step": 20952 + }, + { + "epoch": 0.4194479893901859, + "grad_norm": 1.2375298738479614, + "learning_rate": 6.5227459263936655e-06, + "loss": 0.3584, + "step": 20953 + }, + { + "epoch": 0.41946800790731426, + "grad_norm": 1.8897234201431274, + "learning_rate": 6.5224371399807716e-06, + "loss": 0.8393, + "step": 20954 + }, + { + "epoch": 0.4194880264244426, + "grad_norm": 1.0398088693618774, + "learning_rate": 6.522128347167881e-06, + "loss": 0.2961, + "step": 20955 + }, + { + "epoch": 0.41950804494157096, + "grad_norm": 1.2392886877059937, + "learning_rate": 6.52181954795629e-06, + "loss": 0.2961, + "step": 20956 + }, + { + "epoch": 0.4195280634586993, + "grad_norm": 1.0667108297348022, + "learning_rate": 6.521510742347296e-06, + "loss": 0.2924, + "step": 20957 + }, + { + "epoch": 0.41954808197582766, + "grad_norm": 1.9979110956192017, + "learning_rate": 6.521201930342198e-06, + "loss": 0.764, + "step": 20958 + }, + { + "epoch": 0.419568100492956, + "grad_norm": 1.1719640493392944, + "learning_rate": 6.520893111942296e-06, + "loss": 0.3363, + "step": 20959 + }, + { + "epoch": 0.4195881190100843, + "grad_norm": 1.1069363355636597, + "learning_rate": 6.5205842871488865e-06, + "loss": 0.282, + "step": 20960 + }, + { + "epoch": 0.41960813752721265, + "grad_norm": 1.239327311515808, + "learning_rate": 6.5202754559632675e-06, + "loss": 0.3313, + "step": 20961 + }, + { + "epoch": 0.419628156044341, + "grad_norm": 1.2438743114471436, + "learning_rate": 6.5199666183867375e-06, + "loss": 0.3134, + "step": 20962 + }, + { + "epoch": 0.41964817456146936, + "grad_norm": 1.173071265220642, + "learning_rate": 6.519657774420593e-06, + "loss": 0.2975, + "step": 20963 + }, + { + "epoch": 0.4196681930785977, + "grad_norm": 1.117101788520813, + "learning_rate": 6.519348924066138e-06, + "loss": 0.3064, + "step": 20964 + }, + { + "epoch": 0.41968821159572606, + "grad_norm": 1.1265895366668701, + "learning_rate": 6.519040067324665e-06, + "loss": 0.3341, + "step": 20965 + }, + { + "epoch": 0.4197082301128544, + "grad_norm": 1.07606041431427, + "learning_rate": 6.518731204197475e-06, + "loss": 0.3217, + "step": 20966 + }, + { + "epoch": 0.41972824862998276, + "grad_norm": 1.1328295469284058, + "learning_rate": 6.5184223346858655e-06, + "loss": 0.3602, + "step": 20967 + }, + { + "epoch": 0.41974826714711105, + "grad_norm": 1.0745073556900024, + "learning_rate": 6.518113458791135e-06, + "loss": 0.3547, + "step": 20968 + }, + { + "epoch": 0.4197682856642394, + "grad_norm": 1.1232397556304932, + "learning_rate": 6.517804576514584e-06, + "loss": 0.2954, + "step": 20969 + }, + { + "epoch": 0.41978830418136776, + "grad_norm": 1.2493438720703125, + "learning_rate": 6.5174956878575065e-06, + "loss": 0.3627, + "step": 20970 + }, + { + "epoch": 0.4198083226984961, + "grad_norm": 1.114661693572998, + "learning_rate": 6.5171867928212065e-06, + "loss": 0.3534, + "step": 20971 + }, + { + "epoch": 0.41982834121562446, + "grad_norm": 1.8499343395233154, + "learning_rate": 6.516877891406976e-06, + "loss": 0.8047, + "step": 20972 + }, + { + "epoch": 0.4198483597327528, + "grad_norm": 1.939232349395752, + "learning_rate": 6.51656898361612e-06, + "loss": 0.793, + "step": 20973 + }, + { + "epoch": 0.41986837824988116, + "grad_norm": 1.043881893157959, + "learning_rate": 6.516260069449934e-06, + "loss": 0.2903, + "step": 20974 + }, + { + "epoch": 0.4198883967670095, + "grad_norm": 1.2512792348861694, + "learning_rate": 6.515951148909717e-06, + "loss": 0.3414, + "step": 20975 + }, + { + "epoch": 0.4199084152841378, + "grad_norm": 1.1470240354537964, + "learning_rate": 6.515642221996768e-06, + "loss": 0.288, + "step": 20976 + }, + { + "epoch": 0.41992843380126615, + "grad_norm": 1.2764766216278076, + "learning_rate": 6.515333288712384e-06, + "loss": 0.343, + "step": 20977 + }, + { + "epoch": 0.4199484523183945, + "grad_norm": 1.0833919048309326, + "learning_rate": 6.5150243490578655e-06, + "loss": 0.2958, + "step": 20978 + }, + { + "epoch": 0.41996847083552286, + "grad_norm": 1.118004560470581, + "learning_rate": 6.514715403034512e-06, + "loss": 0.3155, + "step": 20979 + }, + { + "epoch": 0.4199884893526512, + "grad_norm": 1.1531646251678467, + "learning_rate": 6.514406450643619e-06, + "loss": 0.3364, + "step": 20980 + }, + { + "epoch": 0.42000850786977956, + "grad_norm": 1.2231051921844482, + "learning_rate": 6.514097491886489e-06, + "loss": 0.3734, + "step": 20981 + }, + { + "epoch": 0.4200285263869079, + "grad_norm": 1.9703713655471802, + "learning_rate": 6.513788526764417e-06, + "loss": 0.8102, + "step": 20982 + }, + { + "epoch": 0.42004854490403626, + "grad_norm": 1.104737401008606, + "learning_rate": 6.513479555278704e-06, + "loss": 0.2781, + "step": 20983 + }, + { + "epoch": 0.42006856342116455, + "grad_norm": 1.092758059501648, + "learning_rate": 6.513170577430651e-06, + "loss": 0.2953, + "step": 20984 + }, + { + "epoch": 0.4200885819382929, + "grad_norm": 1.1087533235549927, + "learning_rate": 6.512861593221552e-06, + "loss": 0.3113, + "step": 20985 + }, + { + "epoch": 0.42010860045542125, + "grad_norm": 1.029105544090271, + "learning_rate": 6.512552602652711e-06, + "loss": 0.3288, + "step": 20986 + }, + { + "epoch": 0.4201286189725496, + "grad_norm": 1.8420264720916748, + "learning_rate": 6.512243605725422e-06, + "loss": 0.7822, + "step": 20987 + }, + { + "epoch": 0.42014863748967796, + "grad_norm": 1.2908340692520142, + "learning_rate": 6.511934602440986e-06, + "loss": 0.3574, + "step": 20988 + }, + { + "epoch": 0.4201686560068063, + "grad_norm": 1.1751511096954346, + "learning_rate": 6.511625592800703e-06, + "loss": 0.3042, + "step": 20989 + }, + { + "epoch": 0.42018867452393466, + "grad_norm": 1.0047754049301147, + "learning_rate": 6.511316576805873e-06, + "loss": 0.3117, + "step": 20990 + }, + { + "epoch": 0.420208693041063, + "grad_norm": 1.2221081256866455, + "learning_rate": 6.511007554457791e-06, + "loss": 0.387, + "step": 20991 + }, + { + "epoch": 0.4202287115581913, + "grad_norm": 1.1450244188308716, + "learning_rate": 6.5106985257577606e-06, + "loss": 0.3251, + "step": 20992 + }, + { + "epoch": 0.42024873007531965, + "grad_norm": 1.1494399309158325, + "learning_rate": 6.510389490707078e-06, + "loss": 0.3036, + "step": 20993 + }, + { + "epoch": 0.420268748592448, + "grad_norm": 1.2268974781036377, + "learning_rate": 6.510080449307041e-06, + "loss": 0.3478, + "step": 20994 + }, + { + "epoch": 0.42028876710957636, + "grad_norm": 1.1080447435379028, + "learning_rate": 6.509771401558953e-06, + "loss": 0.2676, + "step": 20995 + }, + { + "epoch": 0.4203087856267047, + "grad_norm": 1.0712487697601318, + "learning_rate": 6.50946234746411e-06, + "loss": 0.33, + "step": 20996 + }, + { + "epoch": 0.42032880414383306, + "grad_norm": 1.020742416381836, + "learning_rate": 6.5091532870238126e-06, + "loss": 0.326, + "step": 20997 + }, + { + "epoch": 0.4203488226609614, + "grad_norm": 1.1560084819793701, + "learning_rate": 6.508844220239359e-06, + "loss": 0.3032, + "step": 20998 + }, + { + "epoch": 0.42036884117808976, + "grad_norm": 1.9557998180389404, + "learning_rate": 6.508535147112049e-06, + "loss": 0.7585, + "step": 20999 + }, + { + "epoch": 0.42038885969521805, + "grad_norm": 1.0390876531600952, + "learning_rate": 6.508226067643183e-06, + "loss": 0.3307, + "step": 21000 + }, + { + "epoch": 0.4204088782123464, + "grad_norm": 1.1970043182373047, + "learning_rate": 6.507916981834057e-06, + "loss": 0.3408, + "step": 21001 + }, + { + "epoch": 0.42042889672947475, + "grad_norm": 1.1333645582199097, + "learning_rate": 6.507607889685975e-06, + "loss": 0.3078, + "step": 21002 + }, + { + "epoch": 0.4204489152466031, + "grad_norm": 1.144403338432312, + "learning_rate": 6.5072987912002326e-06, + "loss": 0.2865, + "step": 21003 + }, + { + "epoch": 0.42046893376373146, + "grad_norm": 1.1440515518188477, + "learning_rate": 6.506989686378131e-06, + "loss": 0.29, + "step": 21004 + }, + { + "epoch": 0.4204889522808598, + "grad_norm": 1.2035014629364014, + "learning_rate": 6.506680575220968e-06, + "loss": 0.2944, + "step": 21005 + }, + { + "epoch": 0.42050897079798816, + "grad_norm": 1.2278027534484863, + "learning_rate": 6.506371457730046e-06, + "loss": 0.2896, + "step": 21006 + }, + { + "epoch": 0.4205289893151165, + "grad_norm": 1.0872961282730103, + "learning_rate": 6.506062333906661e-06, + "loss": 0.3033, + "step": 21007 + }, + { + "epoch": 0.4205490078322448, + "grad_norm": 1.1203910112380981, + "learning_rate": 6.505753203752115e-06, + "loss": 0.3282, + "step": 21008 + }, + { + "epoch": 0.42056902634937315, + "grad_norm": 1.1118004322052002, + "learning_rate": 6.505444067267704e-06, + "loss": 0.307, + "step": 21009 + }, + { + "epoch": 0.4205890448665015, + "grad_norm": 1.0015320777893066, + "learning_rate": 6.505134924454734e-06, + "loss": 0.3219, + "step": 21010 + }, + { + "epoch": 0.42060906338362986, + "grad_norm": 1.0806993246078491, + "learning_rate": 6.504825775314498e-06, + "loss": 0.2825, + "step": 21011 + }, + { + "epoch": 0.4206290819007582, + "grad_norm": 1.0551912784576416, + "learning_rate": 6.5045166198483e-06, + "loss": 0.3333, + "step": 21012 + }, + { + "epoch": 0.42064910041788656, + "grad_norm": 1.004157304763794, + "learning_rate": 6.5042074580574365e-06, + "loss": 0.2726, + "step": 21013 + }, + { + "epoch": 0.4206691189350149, + "grad_norm": 1.0637425184249878, + "learning_rate": 6.503898289943209e-06, + "loss": 0.2692, + "step": 21014 + }, + { + "epoch": 0.42068913745214326, + "grad_norm": 1.2121062278747559, + "learning_rate": 6.5035891155069165e-06, + "loss": 0.3684, + "step": 21015 + }, + { + "epoch": 0.42070915596927155, + "grad_norm": 1.2128090858459473, + "learning_rate": 6.50327993474986e-06, + "loss": 0.3225, + "step": 21016 + }, + { + "epoch": 0.4207291744863999, + "grad_norm": 1.1188509464263916, + "learning_rate": 6.502970747673337e-06, + "loss": 0.2695, + "step": 21017 + }, + { + "epoch": 0.42074919300352825, + "grad_norm": 1.0542832612991333, + "learning_rate": 6.502661554278649e-06, + "loss": 0.3314, + "step": 21018 + }, + { + "epoch": 0.4207692115206566, + "grad_norm": 1.1455166339874268, + "learning_rate": 6.502352354567095e-06, + "loss": 0.3043, + "step": 21019 + }, + { + "epoch": 0.42078923003778496, + "grad_norm": 1.3325852155685425, + "learning_rate": 6.502043148539975e-06, + "loss": 0.3258, + "step": 21020 + }, + { + "epoch": 0.4208092485549133, + "grad_norm": 1.1273471117019653, + "learning_rate": 6.501733936198589e-06, + "loss": 0.3469, + "step": 21021 + }, + { + "epoch": 0.42082926707204166, + "grad_norm": 1.0684887170791626, + "learning_rate": 6.5014247175442355e-06, + "loss": 0.336, + "step": 21022 + }, + { + "epoch": 0.42084928558917, + "grad_norm": 1.0674147605895996, + "learning_rate": 6.501115492578217e-06, + "loss": 0.2904, + "step": 21023 + }, + { + "epoch": 0.4208693041062983, + "grad_norm": 1.111859917640686, + "learning_rate": 6.500806261301833e-06, + "loss": 0.2874, + "step": 21024 + }, + { + "epoch": 0.42088932262342665, + "grad_norm": 1.1429158449172974, + "learning_rate": 6.500497023716381e-06, + "loss": 0.2986, + "step": 21025 + }, + { + "epoch": 0.420909341140555, + "grad_norm": 1.1643192768096924, + "learning_rate": 6.500187779823162e-06, + "loss": 0.3808, + "step": 21026 + }, + { + "epoch": 0.42092935965768336, + "grad_norm": 1.0793464183807373, + "learning_rate": 6.499878529623475e-06, + "loss": 0.3061, + "step": 21027 + }, + { + "epoch": 0.4209493781748117, + "grad_norm": 1.0566948652267456, + "learning_rate": 6.4995692731186255e-06, + "loss": 0.3314, + "step": 21028 + }, + { + "epoch": 0.42096939669194006, + "grad_norm": 1.2296173572540283, + "learning_rate": 6.499260010309907e-06, + "loss": 0.3437, + "step": 21029 + }, + { + "epoch": 0.4209894152090684, + "grad_norm": 1.0658519268035889, + "learning_rate": 6.498950741198623e-06, + "loss": 0.3118, + "step": 21030 + }, + { + "epoch": 0.42100943372619676, + "grad_norm": 1.0503709316253662, + "learning_rate": 6.498641465786072e-06, + "loss": 0.3308, + "step": 21031 + }, + { + "epoch": 0.42102945224332505, + "grad_norm": 1.0687469244003296, + "learning_rate": 6.498332184073555e-06, + "loss": 0.3692, + "step": 21032 + }, + { + "epoch": 0.4210494707604534, + "grad_norm": 1.035410761833191, + "learning_rate": 6.498022896062372e-06, + "loss": 0.2855, + "step": 21033 + }, + { + "epoch": 0.42106948927758175, + "grad_norm": 1.1028077602386475, + "learning_rate": 6.497713601753822e-06, + "loss": 0.3845, + "step": 21034 + }, + { + "epoch": 0.4210895077947101, + "grad_norm": 0.9148303270339966, + "learning_rate": 6.497404301149208e-06, + "loss": 0.2729, + "step": 21035 + }, + { + "epoch": 0.42110952631183846, + "grad_norm": 0.9749636650085449, + "learning_rate": 6.4970949942498276e-06, + "loss": 0.2955, + "step": 21036 + }, + { + "epoch": 0.4211295448289668, + "grad_norm": 1.1791319847106934, + "learning_rate": 6.496785681056983e-06, + "loss": 0.3276, + "step": 21037 + }, + { + "epoch": 0.42114956334609516, + "grad_norm": 1.0535930395126343, + "learning_rate": 6.496476361571973e-06, + "loss": 0.2965, + "step": 21038 + }, + { + "epoch": 0.4211695818632235, + "grad_norm": 1.0302963256835938, + "learning_rate": 6.496167035796097e-06, + "loss": 0.279, + "step": 21039 + }, + { + "epoch": 0.4211896003803518, + "grad_norm": 1.1272915601730347, + "learning_rate": 6.495857703730658e-06, + "loss": 0.3021, + "step": 21040 + }, + { + "epoch": 0.42120961889748015, + "grad_norm": 1.2199453115463257, + "learning_rate": 6.495548365376956e-06, + "loss": 0.3841, + "step": 21041 + }, + { + "epoch": 0.4212296374146085, + "grad_norm": 1.1213676929473877, + "learning_rate": 6.495239020736289e-06, + "loss": 0.3025, + "step": 21042 + }, + { + "epoch": 0.42124965593173685, + "grad_norm": 0.9857500195503235, + "learning_rate": 6.494929669809959e-06, + "loss": 0.3018, + "step": 21043 + }, + { + "epoch": 0.4212696744488652, + "grad_norm": 1.266946792602539, + "learning_rate": 6.494620312599269e-06, + "loss": 0.3114, + "step": 21044 + }, + { + "epoch": 0.42128969296599356, + "grad_norm": 1.193486213684082, + "learning_rate": 6.494310949105514e-06, + "loss": 0.3136, + "step": 21045 + }, + { + "epoch": 0.4213097114831219, + "grad_norm": 1.122215986251831, + "learning_rate": 6.494001579329998e-06, + "loss": 0.3071, + "step": 21046 + }, + { + "epoch": 0.42132973000025026, + "grad_norm": 1.745104193687439, + "learning_rate": 6.49369220327402e-06, + "loss": 0.7946, + "step": 21047 + }, + { + "epoch": 0.42134974851737855, + "grad_norm": 1.1275075674057007, + "learning_rate": 6.493382820938884e-06, + "loss": 0.3272, + "step": 21048 + }, + { + "epoch": 0.4213697670345069, + "grad_norm": 1.2856022119522095, + "learning_rate": 6.493073432325886e-06, + "loss": 0.3642, + "step": 21049 + }, + { + "epoch": 0.42138978555163525, + "grad_norm": 0.9704827666282654, + "learning_rate": 6.49276403743633e-06, + "loss": 0.3259, + "step": 21050 + }, + { + "epoch": 0.4214098040687636, + "grad_norm": 1.1928718090057373, + "learning_rate": 6.492454636271513e-06, + "loss": 0.3318, + "step": 21051 + }, + { + "epoch": 0.42142982258589196, + "grad_norm": 1.3164483308792114, + "learning_rate": 6.4921452288327395e-06, + "loss": 0.3067, + "step": 21052 + }, + { + "epoch": 0.4214498411030203, + "grad_norm": 1.1510939598083496, + "learning_rate": 6.491835815121308e-06, + "loss": 0.3164, + "step": 21053 + }, + { + "epoch": 0.42146985962014866, + "grad_norm": 1.096990942955017, + "learning_rate": 6.49152639513852e-06, + "loss": 0.2934, + "step": 21054 + }, + { + "epoch": 0.421489878137277, + "grad_norm": 1.180698037147522, + "learning_rate": 6.491216968885677e-06, + "loss": 0.3299, + "step": 21055 + }, + { + "epoch": 0.4215098966544053, + "grad_norm": 1.1893659830093384, + "learning_rate": 6.490907536364077e-06, + "loss": 0.3425, + "step": 21056 + }, + { + "epoch": 0.42152991517153365, + "grad_norm": 1.1636309623718262, + "learning_rate": 6.490598097575024e-06, + "loss": 0.2683, + "step": 21057 + }, + { + "epoch": 0.421549933688662, + "grad_norm": 1.1226412057876587, + "learning_rate": 6.490288652519816e-06, + "loss": 0.3012, + "step": 21058 + }, + { + "epoch": 0.42156995220579035, + "grad_norm": 1.1533228158950806, + "learning_rate": 6.489979201199757e-06, + "loss": 0.3084, + "step": 21059 + }, + { + "epoch": 0.4215899707229187, + "grad_norm": 1.178276538848877, + "learning_rate": 6.4896697436161435e-06, + "loss": 0.3337, + "step": 21060 + }, + { + "epoch": 0.42160998924004706, + "grad_norm": 1.6764438152313232, + "learning_rate": 6.489360279770281e-06, + "loss": 0.8564, + "step": 21061 + }, + { + "epoch": 0.4216300077571754, + "grad_norm": 1.0921317338943481, + "learning_rate": 6.489050809663468e-06, + "loss": 0.3283, + "step": 21062 + }, + { + "epoch": 0.42165002627430376, + "grad_norm": 1.0970962047576904, + "learning_rate": 6.488741333297006e-06, + "loss": 0.3143, + "step": 21063 + }, + { + "epoch": 0.42167004479143205, + "grad_norm": 1.1823595762252808, + "learning_rate": 6.488431850672196e-06, + "loss": 0.3039, + "step": 21064 + }, + { + "epoch": 0.4216900633085604, + "grad_norm": 1.088172435760498, + "learning_rate": 6.4881223617903375e-06, + "loss": 0.3436, + "step": 21065 + }, + { + "epoch": 0.42171008182568875, + "grad_norm": 1.0779858827590942, + "learning_rate": 6.4878128666527344e-06, + "loss": 0.2927, + "step": 21066 + }, + { + "epoch": 0.4217301003428171, + "grad_norm": 1.887524962425232, + "learning_rate": 6.487503365260686e-06, + "loss": 0.7664, + "step": 21067 + }, + { + "epoch": 0.42175011885994546, + "grad_norm": 0.9845516085624695, + "learning_rate": 6.487193857615493e-06, + "loss": 0.2542, + "step": 21068 + }, + { + "epoch": 0.4217701373770738, + "grad_norm": 1.152888536453247, + "learning_rate": 6.486884343718457e-06, + "loss": 0.3011, + "step": 21069 + }, + { + "epoch": 0.42179015589420216, + "grad_norm": 1.0664535760879517, + "learning_rate": 6.4865748235708796e-06, + "loss": 0.273, + "step": 21070 + }, + { + "epoch": 0.4218101744113305, + "grad_norm": 1.9036195278167725, + "learning_rate": 6.486265297174062e-06, + "loss": 0.8947, + "step": 21071 + }, + { + "epoch": 0.4218301929284588, + "grad_norm": 1.9561735391616821, + "learning_rate": 6.485955764529303e-06, + "loss": 0.7893, + "step": 21072 + }, + { + "epoch": 0.42185021144558715, + "grad_norm": 1.2591769695281982, + "learning_rate": 6.485646225637907e-06, + "loss": 0.3376, + "step": 21073 + }, + { + "epoch": 0.4218702299627155, + "grad_norm": 1.1530649662017822, + "learning_rate": 6.485336680501174e-06, + "loss": 0.3237, + "step": 21074 + }, + { + "epoch": 0.42189024847984385, + "grad_norm": 1.2257895469665527, + "learning_rate": 6.485027129120406e-06, + "loss": 0.322, + "step": 21075 + }, + { + "epoch": 0.4219102669969722, + "grad_norm": 1.0144648551940918, + "learning_rate": 6.484717571496903e-06, + "loss": 0.3123, + "step": 21076 + }, + { + "epoch": 0.42193028551410056, + "grad_norm": 1.2223050594329834, + "learning_rate": 6.484408007631967e-06, + "loss": 0.3283, + "step": 21077 + }, + { + "epoch": 0.4219503040312289, + "grad_norm": 1.1984790563583374, + "learning_rate": 6.4840984375268965e-06, + "loss": 0.3108, + "step": 21078 + }, + { + "epoch": 0.42197032254835726, + "grad_norm": 1.1714431047439575, + "learning_rate": 6.483788861182999e-06, + "loss": 0.3475, + "step": 21079 + }, + { + "epoch": 0.42199034106548555, + "grad_norm": 1.0822380781173706, + "learning_rate": 6.483479278601571e-06, + "loss": 0.3028, + "step": 21080 + }, + { + "epoch": 0.4220103595826139, + "grad_norm": 1.2784327268600464, + "learning_rate": 6.483169689783917e-06, + "loss": 0.3253, + "step": 21081 + }, + { + "epoch": 0.42203037809974225, + "grad_norm": 0.975683331489563, + "learning_rate": 6.482860094731335e-06, + "loss": 0.294, + "step": 21082 + }, + { + "epoch": 0.4220503966168706, + "grad_norm": 1.1793197393417358, + "learning_rate": 6.482550493445129e-06, + "loss": 0.3191, + "step": 21083 + }, + { + "epoch": 0.42207041513399896, + "grad_norm": 1.1589348316192627, + "learning_rate": 6.4822408859265985e-06, + "loss": 0.3216, + "step": 21084 + }, + { + "epoch": 0.4220904336511273, + "grad_norm": 1.8628618717193604, + "learning_rate": 6.4819312721770465e-06, + "loss": 0.8147, + "step": 21085 + }, + { + "epoch": 0.42211045216825566, + "grad_norm": 1.1716433763504028, + "learning_rate": 6.481621652197776e-06, + "loss": 0.3648, + "step": 21086 + }, + { + "epoch": 0.422130470685384, + "grad_norm": 1.202048897743225, + "learning_rate": 6.481312025990087e-06, + "loss": 0.2644, + "step": 21087 + }, + { + "epoch": 0.4221504892025123, + "grad_norm": 1.0724579095840454, + "learning_rate": 6.48100239355528e-06, + "loss": 0.2806, + "step": 21088 + }, + { + "epoch": 0.42217050771964065, + "grad_norm": 1.1355851888656616, + "learning_rate": 6.480692754894658e-06, + "loss": 0.3424, + "step": 21089 + }, + { + "epoch": 0.422190526236769, + "grad_norm": 1.0092344284057617, + "learning_rate": 6.480383110009523e-06, + "loss": 0.272, + "step": 21090 + }, + { + "epoch": 0.42221054475389735, + "grad_norm": 1.0213865041732788, + "learning_rate": 6.480073458901173e-06, + "loss": 0.2683, + "step": 21091 + }, + { + "epoch": 0.4222305632710257, + "grad_norm": 1.0575957298278809, + "learning_rate": 6.479763801570915e-06, + "loss": 0.3277, + "step": 21092 + }, + { + "epoch": 0.42225058178815406, + "grad_norm": 1.049744963645935, + "learning_rate": 6.479454138020048e-06, + "loss": 0.3088, + "step": 21093 + }, + { + "epoch": 0.4222706003052824, + "grad_norm": 1.134218454360962, + "learning_rate": 6.479144468249875e-06, + "loss": 0.338, + "step": 21094 + }, + { + "epoch": 0.42229061882241076, + "grad_norm": 1.0186902284622192, + "learning_rate": 6.478834792261696e-06, + "loss": 0.3017, + "step": 21095 + }, + { + "epoch": 0.42231063733953905, + "grad_norm": 1.0888174772262573, + "learning_rate": 6.4785251100568144e-06, + "loss": 0.3125, + "step": 21096 + }, + { + "epoch": 0.4223306558566674, + "grad_norm": 1.1091660261154175, + "learning_rate": 6.478215421636531e-06, + "loss": 0.3402, + "step": 21097 + }, + { + "epoch": 0.42235067437379575, + "grad_norm": 1.0519063472747803, + "learning_rate": 6.477905727002148e-06, + "loss": 0.3153, + "step": 21098 + }, + { + "epoch": 0.4223706928909241, + "grad_norm": 1.1591688394546509, + "learning_rate": 6.4775960261549674e-06, + "loss": 0.311, + "step": 21099 + }, + { + "epoch": 0.42239071140805245, + "grad_norm": 1.0319572687149048, + "learning_rate": 6.477286319096292e-06, + "loss": 0.3095, + "step": 21100 + }, + { + "epoch": 0.4224107299251808, + "grad_norm": 1.1063686609268188, + "learning_rate": 6.476976605827422e-06, + "loss": 0.3084, + "step": 21101 + }, + { + "epoch": 0.42243074844230916, + "grad_norm": 1.286604881286621, + "learning_rate": 6.47666688634966e-06, + "loss": 0.3369, + "step": 21102 + }, + { + "epoch": 0.4224507669594375, + "grad_norm": 1.1193385124206543, + "learning_rate": 6.476357160664309e-06, + "loss": 0.3265, + "step": 21103 + }, + { + "epoch": 0.4224707854765658, + "grad_norm": 1.1735739707946777, + "learning_rate": 6.4760474287726685e-06, + "loss": 0.3128, + "step": 21104 + }, + { + "epoch": 0.42249080399369415, + "grad_norm": 1.1711280345916748, + "learning_rate": 6.475737690676044e-06, + "loss": 0.3342, + "step": 21105 + }, + { + "epoch": 0.4225108225108225, + "grad_norm": 1.1651924848556519, + "learning_rate": 6.4754279463757364e-06, + "loss": 0.3553, + "step": 21106 + }, + { + "epoch": 0.42253084102795085, + "grad_norm": 1.0494798421859741, + "learning_rate": 6.475118195873045e-06, + "loss": 0.3293, + "step": 21107 + }, + { + "epoch": 0.4225508595450792, + "grad_norm": 1.1569726467132568, + "learning_rate": 6.474808439169275e-06, + "loss": 0.3317, + "step": 21108 + }, + { + "epoch": 0.42257087806220756, + "grad_norm": 1.0170754194259644, + "learning_rate": 6.474498676265729e-06, + "loss": 0.2843, + "step": 21109 + }, + { + "epoch": 0.4225908965793359, + "grad_norm": 1.068845510482788, + "learning_rate": 6.474188907163706e-06, + "loss": 0.3132, + "step": 21110 + }, + { + "epoch": 0.42261091509646426, + "grad_norm": 1.9802595376968384, + "learning_rate": 6.473879131864511e-06, + "loss": 0.7774, + "step": 21111 + }, + { + "epoch": 0.42263093361359255, + "grad_norm": 1.0164726972579956, + "learning_rate": 6.473569350369446e-06, + "loss": 0.3012, + "step": 21112 + }, + { + "epoch": 0.4226509521307209, + "grad_norm": 1.1767007112503052, + "learning_rate": 6.473259562679811e-06, + "loss": 0.3397, + "step": 21113 + }, + { + "epoch": 0.42267097064784925, + "grad_norm": 1.1382977962493896, + "learning_rate": 6.472949768796911e-06, + "loss": 0.3656, + "step": 21114 + }, + { + "epoch": 0.4226909891649776, + "grad_norm": 1.0349990129470825, + "learning_rate": 6.472639968722046e-06, + "loss": 0.3334, + "step": 21115 + }, + { + "epoch": 0.42271100768210595, + "grad_norm": 1.200740098953247, + "learning_rate": 6.472330162456522e-06, + "loss": 0.2888, + "step": 21116 + }, + { + "epoch": 0.4227310261992343, + "grad_norm": 1.046169638633728, + "learning_rate": 6.472020350001637e-06, + "loss": 0.3055, + "step": 21117 + }, + { + "epoch": 0.42275104471636266, + "grad_norm": 1.7929470539093018, + "learning_rate": 6.471710531358695e-06, + "loss": 0.7499, + "step": 21118 + }, + { + "epoch": 0.422771063233491, + "grad_norm": 1.0903122425079346, + "learning_rate": 6.4714007065289986e-06, + "loss": 0.3408, + "step": 21119 + }, + { + "epoch": 0.4227910817506193, + "grad_norm": 1.0550323724746704, + "learning_rate": 6.4710908755138515e-06, + "loss": 0.3147, + "step": 21120 + }, + { + "epoch": 0.42281110026774765, + "grad_norm": 1.2329217195510864, + "learning_rate": 6.470781038314554e-06, + "loss": 0.3467, + "step": 21121 + }, + { + "epoch": 0.422831118784876, + "grad_norm": 1.0813757181167603, + "learning_rate": 6.470471194932409e-06, + "loss": 0.3474, + "step": 21122 + }, + { + "epoch": 0.42285113730200435, + "grad_norm": 1.9363157749176025, + "learning_rate": 6.47016134536872e-06, + "loss": 0.8994, + "step": 21123 + }, + { + "epoch": 0.4228711558191327, + "grad_norm": 1.0856966972351074, + "learning_rate": 6.46985148962479e-06, + "loss": 0.3596, + "step": 21124 + }, + { + "epoch": 0.42289117433626106, + "grad_norm": 1.2399381399154663, + "learning_rate": 6.469541627701919e-06, + "loss": 0.3388, + "step": 21125 + }, + { + "epoch": 0.4229111928533894, + "grad_norm": 1.231911301612854, + "learning_rate": 6.469231759601413e-06, + "loss": 0.3105, + "step": 21126 + }, + { + "epoch": 0.4229312113705177, + "grad_norm": 1.0874288082122803, + "learning_rate": 6.4689218853245725e-06, + "loss": 0.299, + "step": 21127 + }, + { + "epoch": 0.42295122988764605, + "grad_norm": 1.1611027717590332, + "learning_rate": 6.468612004872701e-06, + "loss": 0.3205, + "step": 21128 + }, + { + "epoch": 0.4229712484047744, + "grad_norm": 1.1380980014801025, + "learning_rate": 6.468302118247099e-06, + "loss": 0.2768, + "step": 21129 + }, + { + "epoch": 0.42299126692190275, + "grad_norm": 1.7982590198516846, + "learning_rate": 6.467992225449071e-06, + "loss": 0.8306, + "step": 21130 + }, + { + "epoch": 0.4230112854390311, + "grad_norm": 1.0478813648223877, + "learning_rate": 6.467682326479921e-06, + "loss": 0.3039, + "step": 21131 + }, + { + "epoch": 0.42303130395615945, + "grad_norm": 1.0465844869613647, + "learning_rate": 6.467372421340949e-06, + "loss": 0.2756, + "step": 21132 + }, + { + "epoch": 0.4230513224732878, + "grad_norm": 1.7278980016708374, + "learning_rate": 6.4670625100334595e-06, + "loss": 0.8102, + "step": 21133 + }, + { + "epoch": 0.42307134099041616, + "grad_norm": 1.785048246383667, + "learning_rate": 6.466752592558756e-06, + "loss": 0.7959, + "step": 21134 + }, + { + "epoch": 0.42309135950754445, + "grad_norm": 1.1367716789245605, + "learning_rate": 6.4664426689181395e-06, + "loss": 0.3113, + "step": 21135 + }, + { + "epoch": 0.4231113780246728, + "grad_norm": 1.2305833101272583, + "learning_rate": 6.466132739112912e-06, + "loss": 0.2746, + "step": 21136 + }, + { + "epoch": 0.42313139654180115, + "grad_norm": 1.8751025199890137, + "learning_rate": 6.465822803144379e-06, + "loss": 0.7791, + "step": 21137 + }, + { + "epoch": 0.4231514150589295, + "grad_norm": 1.1983903646469116, + "learning_rate": 6.465512861013844e-06, + "loss": 0.2998, + "step": 21138 + }, + { + "epoch": 0.42317143357605785, + "grad_norm": 1.0680160522460938, + "learning_rate": 6.465202912722608e-06, + "loss": 0.3256, + "step": 21139 + }, + { + "epoch": 0.4231914520931862, + "grad_norm": 1.0628819465637207, + "learning_rate": 6.464892958271974e-06, + "loss": 0.3528, + "step": 21140 + }, + { + "epoch": 0.42321147061031456, + "grad_norm": 1.002733826637268, + "learning_rate": 6.4645829976632445e-06, + "loss": 0.2933, + "step": 21141 + }, + { + "epoch": 0.4232314891274429, + "grad_norm": 1.1414047479629517, + "learning_rate": 6.464273030897723e-06, + "loss": 0.2934, + "step": 21142 + }, + { + "epoch": 0.4232515076445712, + "grad_norm": 1.0991414785385132, + "learning_rate": 6.463963057976712e-06, + "loss": 0.3399, + "step": 21143 + }, + { + "epoch": 0.42327152616169955, + "grad_norm": 1.1258505582809448, + "learning_rate": 6.463653078901517e-06, + "loss": 0.3171, + "step": 21144 + }, + { + "epoch": 0.4232915446788279, + "grad_norm": 1.1356971263885498, + "learning_rate": 6.4633430936734394e-06, + "loss": 0.3732, + "step": 21145 + }, + { + "epoch": 0.42331156319595625, + "grad_norm": 1.0737707614898682, + "learning_rate": 6.463033102293782e-06, + "loss": 0.3171, + "step": 21146 + }, + { + "epoch": 0.4233315817130846, + "grad_norm": 1.263035535812378, + "learning_rate": 6.462723104763848e-06, + "loss": 0.3238, + "step": 21147 + }, + { + "epoch": 0.42335160023021295, + "grad_norm": 1.0574222803115845, + "learning_rate": 6.4624131010849415e-06, + "loss": 0.2855, + "step": 21148 + }, + { + "epoch": 0.4233716187473413, + "grad_norm": 1.0513768196105957, + "learning_rate": 6.4621030912583635e-06, + "loss": 0.3517, + "step": 21149 + }, + { + "epoch": 0.42339163726446966, + "grad_norm": 2.0729472637176514, + "learning_rate": 6.4617930752854205e-06, + "loss": 0.8401, + "step": 21150 + }, + { + "epoch": 0.42341165578159795, + "grad_norm": 0.9765242338180542, + "learning_rate": 6.461483053167414e-06, + "loss": 0.2912, + "step": 21151 + }, + { + "epoch": 0.4234316742987263, + "grad_norm": 1.2341464757919312, + "learning_rate": 6.461173024905647e-06, + "loss": 0.3368, + "step": 21152 + }, + { + "epoch": 0.42345169281585465, + "grad_norm": 1.8173189163208008, + "learning_rate": 6.460862990501422e-06, + "loss": 0.8312, + "step": 21153 + }, + { + "epoch": 0.423471711332983, + "grad_norm": 1.141278624534607, + "learning_rate": 6.460552949956045e-06, + "loss": 0.2971, + "step": 21154 + }, + { + "epoch": 0.42349172985011135, + "grad_norm": 1.1333070993423462, + "learning_rate": 6.460242903270817e-06, + "loss": 0.2997, + "step": 21155 + }, + { + "epoch": 0.4235117483672397, + "grad_norm": 1.0063213109970093, + "learning_rate": 6.459932850447041e-06, + "loss": 0.3279, + "step": 21156 + }, + { + "epoch": 0.42353176688436805, + "grad_norm": 1.8987950086593628, + "learning_rate": 6.459622791486022e-06, + "loss": 0.8456, + "step": 21157 + }, + { + "epoch": 0.4235517854014964, + "grad_norm": 1.1494899988174438, + "learning_rate": 6.459312726389064e-06, + "loss": 0.2996, + "step": 21158 + }, + { + "epoch": 0.4235718039186247, + "grad_norm": 1.2104185819625854, + "learning_rate": 6.459002655157469e-06, + "loss": 0.3251, + "step": 21159 + }, + { + "epoch": 0.42359182243575305, + "grad_norm": 1.111209750175476, + "learning_rate": 6.45869257779254e-06, + "loss": 0.3078, + "step": 21160 + }, + { + "epoch": 0.4236118409528814, + "grad_norm": 1.2785842418670654, + "learning_rate": 6.458382494295581e-06, + "loss": 0.3353, + "step": 21161 + }, + { + "epoch": 0.42363185947000975, + "grad_norm": 1.1554288864135742, + "learning_rate": 6.458072404667897e-06, + "loss": 0.3131, + "step": 21162 + }, + { + "epoch": 0.4236518779871381, + "grad_norm": 1.0917292833328247, + "learning_rate": 6.45776230891079e-06, + "loss": 0.3323, + "step": 21163 + }, + { + "epoch": 0.42367189650426645, + "grad_norm": 1.351585865020752, + "learning_rate": 6.457452207025563e-06, + "loss": 0.2458, + "step": 21164 + }, + { + "epoch": 0.4236919150213948, + "grad_norm": 1.055391550064087, + "learning_rate": 6.457142099013523e-06, + "loss": 0.309, + "step": 21165 + }, + { + "epoch": 0.42371193353852316, + "grad_norm": 1.152163028717041, + "learning_rate": 6.456831984875969e-06, + "loss": 0.313, + "step": 21166 + }, + { + "epoch": 0.42373195205565145, + "grad_norm": 1.2793399095535278, + "learning_rate": 6.456521864614208e-06, + "loss": 0.3047, + "step": 21167 + }, + { + "epoch": 0.4237519705727798, + "grad_norm": 1.0879255533218384, + "learning_rate": 6.456211738229542e-06, + "loss": 0.356, + "step": 21168 + }, + { + "epoch": 0.42377198908990815, + "grad_norm": 1.1055020093917847, + "learning_rate": 6.455901605723275e-06, + "loss": 0.2973, + "step": 21169 + }, + { + "epoch": 0.4237920076070365, + "grad_norm": 1.1489640474319458, + "learning_rate": 6.455591467096711e-06, + "loss": 0.3306, + "step": 21170 + }, + { + "epoch": 0.42381202612416485, + "grad_norm": 1.3191616535186768, + "learning_rate": 6.455281322351155e-06, + "loss": 0.3369, + "step": 21171 + }, + { + "epoch": 0.4238320446412932, + "grad_norm": 1.1265161037445068, + "learning_rate": 6.4549711714879085e-06, + "loss": 0.3293, + "step": 21172 + }, + { + "epoch": 0.42385206315842155, + "grad_norm": 1.0859565734863281, + "learning_rate": 6.454661014508275e-06, + "loss": 0.2852, + "step": 21173 + }, + { + "epoch": 0.4238720816755499, + "grad_norm": 1.2346036434173584, + "learning_rate": 6.454350851413561e-06, + "loss": 0.3064, + "step": 21174 + }, + { + "epoch": 0.4238921001926782, + "grad_norm": 1.093263864517212, + "learning_rate": 6.4540406822050696e-06, + "loss": 0.3129, + "step": 21175 + }, + { + "epoch": 0.42391211870980655, + "grad_norm": 1.0962917804718018, + "learning_rate": 6.453730506884104e-06, + "loss": 0.2904, + "step": 21176 + }, + { + "epoch": 0.4239321372269349, + "grad_norm": 1.1751302480697632, + "learning_rate": 6.4534203254519665e-06, + "loss": 0.297, + "step": 21177 + }, + { + "epoch": 0.42395215574406325, + "grad_norm": 1.1841952800750732, + "learning_rate": 6.453110137909965e-06, + "loss": 0.3177, + "step": 21178 + }, + { + "epoch": 0.4239721742611916, + "grad_norm": 1.1888591051101685, + "learning_rate": 6.4527999442594005e-06, + "loss": 0.3512, + "step": 21179 + }, + { + "epoch": 0.42399219277831995, + "grad_norm": 1.1092426776885986, + "learning_rate": 6.452489744501577e-06, + "loss": 0.3226, + "step": 21180 + }, + { + "epoch": 0.4240122112954483, + "grad_norm": 0.9974663257598877, + "learning_rate": 6.452179538637798e-06, + "loss": 0.2983, + "step": 21181 + }, + { + "epoch": 0.42403222981257666, + "grad_norm": 1.7810477018356323, + "learning_rate": 6.451869326669371e-06, + "loss": 0.823, + "step": 21182 + }, + { + "epoch": 0.42405224832970495, + "grad_norm": 1.1516324281692505, + "learning_rate": 6.451559108597597e-06, + "loss": 0.3283, + "step": 21183 + }, + { + "epoch": 0.4240722668468333, + "grad_norm": 1.1299586296081543, + "learning_rate": 6.451248884423781e-06, + "loss": 0.3212, + "step": 21184 + }, + { + "epoch": 0.42409228536396165, + "grad_norm": 1.3604142665863037, + "learning_rate": 6.450938654149227e-06, + "loss": 0.3789, + "step": 21185 + }, + { + "epoch": 0.42411230388109, + "grad_norm": 1.2969837188720703, + "learning_rate": 6.450628417775239e-06, + "loss": 0.334, + "step": 21186 + }, + { + "epoch": 0.42413232239821835, + "grad_norm": 1.0960503816604614, + "learning_rate": 6.450318175303121e-06, + "loss": 0.313, + "step": 21187 + }, + { + "epoch": 0.4241523409153467, + "grad_norm": 1.0084924697875977, + "learning_rate": 6.450007926734176e-06, + "loss": 0.3128, + "step": 21188 + }, + { + "epoch": 0.42417235943247505, + "grad_norm": 1.0316897630691528, + "learning_rate": 6.449697672069713e-06, + "loss": 0.2771, + "step": 21189 + }, + { + "epoch": 0.4241923779496034, + "grad_norm": 1.2146978378295898, + "learning_rate": 6.449387411311032e-06, + "loss": 0.3429, + "step": 21190 + }, + { + "epoch": 0.4242123964667317, + "grad_norm": 1.0825188159942627, + "learning_rate": 6.4490771444594365e-06, + "loss": 0.2861, + "step": 21191 + }, + { + "epoch": 0.42423241498386005, + "grad_norm": 1.1020311117172241, + "learning_rate": 6.448766871516236e-06, + "loss": 0.3127, + "step": 21192 + }, + { + "epoch": 0.4242524335009884, + "grad_norm": 1.4886474609375, + "learning_rate": 6.448456592482728e-06, + "loss": 0.3142, + "step": 21193 + }, + { + "epoch": 0.42427245201811675, + "grad_norm": 1.107073426246643, + "learning_rate": 6.448146307360222e-06, + "loss": 0.2951, + "step": 21194 + }, + { + "epoch": 0.4242924705352451, + "grad_norm": 1.0437495708465576, + "learning_rate": 6.4478360161500185e-06, + "loss": 0.2958, + "step": 21195 + }, + { + "epoch": 0.42431248905237345, + "grad_norm": 1.1730380058288574, + "learning_rate": 6.4475257188534255e-06, + "loss": 0.327, + "step": 21196 + }, + { + "epoch": 0.4243325075695018, + "grad_norm": 1.1247379779815674, + "learning_rate": 6.447215415471746e-06, + "loss": 0.3251, + "step": 21197 + }, + { + "epoch": 0.42435252608663016, + "grad_norm": 1.0160229206085205, + "learning_rate": 6.446905106006285e-06, + "loss": 0.2968, + "step": 21198 + }, + { + "epoch": 0.42437254460375845, + "grad_norm": 1.1035629510879517, + "learning_rate": 6.446594790458345e-06, + "loss": 0.3173, + "step": 21199 + }, + { + "epoch": 0.4243925631208868, + "grad_norm": 1.111547589302063, + "learning_rate": 6.446284468829231e-06, + "loss": 0.3251, + "step": 21200 + }, + { + "epoch": 0.42441258163801515, + "grad_norm": 1.910807728767395, + "learning_rate": 6.445974141120251e-06, + "loss": 0.8051, + "step": 21201 + }, + { + "epoch": 0.4244326001551435, + "grad_norm": 2.148526668548584, + "learning_rate": 6.445663807332705e-06, + "loss": 0.3233, + "step": 21202 + }, + { + "epoch": 0.42445261867227185, + "grad_norm": 1.1631685495376587, + "learning_rate": 6.445353467467899e-06, + "loss": 0.2819, + "step": 21203 + }, + { + "epoch": 0.4244726371894002, + "grad_norm": 1.1605795621871948, + "learning_rate": 6.445043121527139e-06, + "loss": 0.2849, + "step": 21204 + }, + { + "epoch": 0.42449265570652855, + "grad_norm": 1.0956368446350098, + "learning_rate": 6.44473276951173e-06, + "loss": 0.3067, + "step": 21205 + }, + { + "epoch": 0.4245126742236569, + "grad_norm": 1.030018925666809, + "learning_rate": 6.444422411422972e-06, + "loss": 0.3258, + "step": 21206 + }, + { + "epoch": 0.4245326927407852, + "grad_norm": 1.1604560613632202, + "learning_rate": 6.444112047262175e-06, + "loss": 0.3022, + "step": 21207 + }, + { + "epoch": 0.42455271125791355, + "grad_norm": 1.8395335674285889, + "learning_rate": 6.443801677030642e-06, + "loss": 0.7936, + "step": 21208 + }, + { + "epoch": 0.4245727297750419, + "grad_norm": 1.1157097816467285, + "learning_rate": 6.443491300729679e-06, + "loss": 0.3219, + "step": 21209 + }, + { + "epoch": 0.42459274829217025, + "grad_norm": 1.0464966297149658, + "learning_rate": 6.443180918360585e-06, + "loss": 0.3269, + "step": 21210 + }, + { + "epoch": 0.4246127668092986, + "grad_norm": 1.127219319343567, + "learning_rate": 6.442870529924673e-06, + "loss": 0.3183, + "step": 21211 + }, + { + "epoch": 0.42463278532642695, + "grad_norm": 1.0489883422851562, + "learning_rate": 6.442560135423241e-06, + "loss": 0.2921, + "step": 21212 + }, + { + "epoch": 0.4246528038435553, + "grad_norm": 1.095086693763733, + "learning_rate": 6.442249734857598e-06, + "loss": 0.2986, + "step": 21213 + }, + { + "epoch": 0.42467282236068365, + "grad_norm": 1.228276252746582, + "learning_rate": 6.4419393282290475e-06, + "loss": 0.2939, + "step": 21214 + }, + { + "epoch": 0.42469284087781195, + "grad_norm": 0.9846141934394836, + "learning_rate": 6.4416289155388945e-06, + "loss": 0.2488, + "step": 21215 + }, + { + "epoch": 0.4247128593949403, + "grad_norm": 1.1139417886734009, + "learning_rate": 6.441318496788444e-06, + "loss": 0.3244, + "step": 21216 + }, + { + "epoch": 0.42473287791206865, + "grad_norm": 1.2127621173858643, + "learning_rate": 6.441008071979e-06, + "loss": 0.2985, + "step": 21217 + }, + { + "epoch": 0.424752896429197, + "grad_norm": 1.1141722202301025, + "learning_rate": 6.440697641111868e-06, + "loss": 0.3032, + "step": 21218 + }, + { + "epoch": 0.42477291494632535, + "grad_norm": 1.158888816833496, + "learning_rate": 6.440387204188354e-06, + "loss": 0.2842, + "step": 21219 + }, + { + "epoch": 0.4247929334634537, + "grad_norm": 1.1454271078109741, + "learning_rate": 6.440076761209761e-06, + "loss": 0.3453, + "step": 21220 + }, + { + "epoch": 0.42481295198058205, + "grad_norm": 1.2193961143493652, + "learning_rate": 6.439766312177397e-06, + "loss": 0.3221, + "step": 21221 + }, + { + "epoch": 0.4248329704977104, + "grad_norm": 1.0856207609176636, + "learning_rate": 6.439455857092564e-06, + "loss": 0.3311, + "step": 21222 + }, + { + "epoch": 0.4248529890148387, + "grad_norm": 1.0325524806976318, + "learning_rate": 6.439145395956568e-06, + "loss": 0.2988, + "step": 21223 + }, + { + "epoch": 0.42487300753196705, + "grad_norm": 1.3567659854888916, + "learning_rate": 6.438834928770715e-06, + "loss": 0.3389, + "step": 21224 + }, + { + "epoch": 0.4248930260490954, + "grad_norm": 1.0343315601348877, + "learning_rate": 6.438524455536309e-06, + "loss": 0.2963, + "step": 21225 + }, + { + "epoch": 0.42491304456622375, + "grad_norm": 1.9132754802703857, + "learning_rate": 6.4382139762546566e-06, + "loss": 0.7794, + "step": 21226 + }, + { + "epoch": 0.4249330630833521, + "grad_norm": 1.125443458557129, + "learning_rate": 6.437903490927062e-06, + "loss": 0.3286, + "step": 21227 + }, + { + "epoch": 0.42495308160048045, + "grad_norm": 1.2759524583816528, + "learning_rate": 6.43759299955483e-06, + "loss": 0.3014, + "step": 21228 + }, + { + "epoch": 0.4249731001176088, + "grad_norm": 1.131610631942749, + "learning_rate": 6.437282502139268e-06, + "loss": 0.3365, + "step": 21229 + }, + { + "epoch": 0.42499311863473715, + "grad_norm": 2.075228452682495, + "learning_rate": 6.436971998681678e-06, + "loss": 0.757, + "step": 21230 + }, + { + "epoch": 0.42501313715186545, + "grad_norm": 1.1791374683380127, + "learning_rate": 6.4366614891833675e-06, + "loss": 0.3425, + "step": 21231 + }, + { + "epoch": 0.4250331556689938, + "grad_norm": 1.8342487812042236, + "learning_rate": 6.436350973645642e-06, + "loss": 0.8452, + "step": 21232 + }, + { + "epoch": 0.42505317418612215, + "grad_norm": 1.1135824918746948, + "learning_rate": 6.436040452069805e-06, + "loss": 0.3772, + "step": 21233 + }, + { + "epoch": 0.4250731927032505, + "grad_norm": 1.1206879615783691, + "learning_rate": 6.435729924457164e-06, + "loss": 0.3272, + "step": 21234 + }, + { + "epoch": 0.42509321122037885, + "grad_norm": 1.820576548576355, + "learning_rate": 6.435419390809021e-06, + "loss": 0.741, + "step": 21235 + }, + { + "epoch": 0.4251132297375072, + "grad_norm": 1.096938133239746, + "learning_rate": 6.435108851126687e-06, + "loss": 0.3442, + "step": 21236 + }, + { + "epoch": 0.42513324825463555, + "grad_norm": 1.0102063417434692, + "learning_rate": 6.434798305411463e-06, + "loss": 0.2842, + "step": 21237 + }, + { + "epoch": 0.4251532667717639, + "grad_norm": 1.0613187551498413, + "learning_rate": 6.434487753664655e-06, + "loss": 0.3153, + "step": 21238 + }, + { + "epoch": 0.4251732852888922, + "grad_norm": 1.2204322814941406, + "learning_rate": 6.434177195887569e-06, + "loss": 0.3468, + "step": 21239 + }, + { + "epoch": 0.42519330380602055, + "grad_norm": 1.0422377586364746, + "learning_rate": 6.433866632081511e-06, + "loss": 0.2977, + "step": 21240 + }, + { + "epoch": 0.4252133223231489, + "grad_norm": 1.2190170288085938, + "learning_rate": 6.4335560622477865e-06, + "loss": 0.3042, + "step": 21241 + }, + { + "epoch": 0.42523334084027725, + "grad_norm": 1.1561826467514038, + "learning_rate": 6.433245486387701e-06, + "loss": 0.3202, + "step": 21242 + }, + { + "epoch": 0.4252533593574056, + "grad_norm": 1.2105823755264282, + "learning_rate": 6.432934904502561e-06, + "loss": 0.3125, + "step": 21243 + }, + { + "epoch": 0.42527337787453395, + "grad_norm": 1.1704872846603394, + "learning_rate": 6.43262431659367e-06, + "loss": 0.3332, + "step": 21244 + }, + { + "epoch": 0.4252933963916623, + "grad_norm": 1.1516776084899902, + "learning_rate": 6.432313722662333e-06, + "loss": 0.3406, + "step": 21245 + }, + { + "epoch": 0.42531341490879065, + "grad_norm": 1.2396060228347778, + "learning_rate": 6.432003122709859e-06, + "loss": 0.3012, + "step": 21246 + }, + { + "epoch": 0.42533343342591895, + "grad_norm": 1.031488299369812, + "learning_rate": 6.431692516737553e-06, + "loss": 0.3021, + "step": 21247 + }, + { + "epoch": 0.4253534519430473, + "grad_norm": 1.0838756561279297, + "learning_rate": 6.431381904746718e-06, + "loss": 0.2667, + "step": 21248 + }, + { + "epoch": 0.42537347046017565, + "grad_norm": 1.139918327331543, + "learning_rate": 6.431071286738662e-06, + "loss": 0.3425, + "step": 21249 + }, + { + "epoch": 0.425393488977304, + "grad_norm": 1.0780458450317383, + "learning_rate": 6.43076066271469e-06, + "loss": 0.3537, + "step": 21250 + }, + { + "epoch": 0.42541350749443235, + "grad_norm": 1.0438313484191895, + "learning_rate": 6.430450032676109e-06, + "loss": 0.3109, + "step": 21251 + }, + { + "epoch": 0.4254335260115607, + "grad_norm": 1.051115870475769, + "learning_rate": 6.430139396624224e-06, + "loss": 0.3022, + "step": 21252 + }, + { + "epoch": 0.42545354452868905, + "grad_norm": 1.2098314762115479, + "learning_rate": 6.42982875456034e-06, + "loss": 0.3013, + "step": 21253 + }, + { + "epoch": 0.4254735630458174, + "grad_norm": 1.9505785703659058, + "learning_rate": 6.4295181064857635e-06, + "loss": 0.7778, + "step": 21254 + }, + { + "epoch": 0.4254935815629457, + "grad_norm": 1.9098225831985474, + "learning_rate": 6.429207452401801e-06, + "loss": 0.8452, + "step": 21255 + }, + { + "epoch": 0.42551360008007405, + "grad_norm": 1.0510334968566895, + "learning_rate": 6.428896792309759e-06, + "loss": 0.2499, + "step": 21256 + }, + { + "epoch": 0.4255336185972024, + "grad_norm": 1.230992078781128, + "learning_rate": 6.4285861262109405e-06, + "loss": 0.3504, + "step": 21257 + }, + { + "epoch": 0.42555363711433075, + "grad_norm": 1.086152195930481, + "learning_rate": 6.428275454106654e-06, + "loss": 0.3, + "step": 21258 + }, + { + "epoch": 0.4255736556314591, + "grad_norm": 1.0178916454315186, + "learning_rate": 6.427964775998204e-06, + "loss": 0.3096, + "step": 21259 + }, + { + "epoch": 0.42559367414858745, + "grad_norm": 1.014047384262085, + "learning_rate": 6.427654091886899e-06, + "loss": 0.2715, + "step": 21260 + }, + { + "epoch": 0.4256136926657158, + "grad_norm": 1.8699581623077393, + "learning_rate": 6.427343401774042e-06, + "loss": 0.8347, + "step": 21261 + }, + { + "epoch": 0.42563371118284415, + "grad_norm": 1.0179542303085327, + "learning_rate": 6.4270327056609415e-06, + "loss": 0.2869, + "step": 21262 + }, + { + "epoch": 0.42565372969997245, + "grad_norm": 1.0881271362304688, + "learning_rate": 6.426722003548902e-06, + "loss": 0.3156, + "step": 21263 + }, + { + "epoch": 0.4256737482171008, + "grad_norm": 1.878764033317566, + "learning_rate": 6.42641129543923e-06, + "loss": 0.817, + "step": 21264 + }, + { + "epoch": 0.42569376673422915, + "grad_norm": 1.0572024583816528, + "learning_rate": 6.426100581333233e-06, + "loss": 0.3191, + "step": 21265 + }, + { + "epoch": 0.4257137852513575, + "grad_norm": 1.1198183298110962, + "learning_rate": 6.425789861232216e-06, + "loss": 0.3116, + "step": 21266 + }, + { + "epoch": 0.42573380376848585, + "grad_norm": 1.0342870950698853, + "learning_rate": 6.425479135137484e-06, + "loss": 0.3003, + "step": 21267 + }, + { + "epoch": 0.4257538222856142, + "grad_norm": 1.1480094194412231, + "learning_rate": 6.425168403050346e-06, + "loss": 0.3158, + "step": 21268 + }, + { + "epoch": 0.42577384080274255, + "grad_norm": 1.0014482736587524, + "learning_rate": 6.4248576649721065e-06, + "loss": 0.2963, + "step": 21269 + }, + { + "epoch": 0.4257938593198709, + "grad_norm": 1.2532378435134888, + "learning_rate": 6.424546920904071e-06, + "loss": 0.3324, + "step": 21270 + }, + { + "epoch": 0.4258138778369992, + "grad_norm": 1.0948669910430908, + "learning_rate": 6.424236170847546e-06, + "loss": 0.2926, + "step": 21271 + }, + { + "epoch": 0.42583389635412755, + "grad_norm": 1.0567468404769897, + "learning_rate": 6.42392541480384e-06, + "loss": 0.327, + "step": 21272 + }, + { + "epoch": 0.4258539148712559, + "grad_norm": 1.0347288846969604, + "learning_rate": 6.423614652774258e-06, + "loss": 0.3343, + "step": 21273 + }, + { + "epoch": 0.42587393338838425, + "grad_norm": 1.1030246019363403, + "learning_rate": 6.423303884760107e-06, + "loss": 0.267, + "step": 21274 + }, + { + "epoch": 0.4258939519055126, + "grad_norm": 1.1390678882598877, + "learning_rate": 6.422993110762692e-06, + "loss": 0.3308, + "step": 21275 + }, + { + "epoch": 0.42591397042264095, + "grad_norm": 1.0880258083343506, + "learning_rate": 6.4226823307833196e-06, + "loss": 0.3315, + "step": 21276 + }, + { + "epoch": 0.4259339889397693, + "grad_norm": 1.1228950023651123, + "learning_rate": 6.422371544823297e-06, + "loss": 0.2908, + "step": 21277 + }, + { + "epoch": 0.42595400745689765, + "grad_norm": 1.103452444076538, + "learning_rate": 6.422060752883932e-06, + "loss": 0.2951, + "step": 21278 + }, + { + "epoch": 0.42597402597402595, + "grad_norm": 1.0575156211853027, + "learning_rate": 6.421749954966528e-06, + "loss": 0.2942, + "step": 21279 + }, + { + "epoch": 0.4259940444911543, + "grad_norm": 1.1112005710601807, + "learning_rate": 6.421439151072395e-06, + "loss": 0.2755, + "step": 21280 + }, + { + "epoch": 0.42601406300828265, + "grad_norm": 1.0933868885040283, + "learning_rate": 6.421128341202836e-06, + "loss": 0.3033, + "step": 21281 + }, + { + "epoch": 0.426034081525411, + "grad_norm": 1.092143177986145, + "learning_rate": 6.42081752535916e-06, + "loss": 0.3469, + "step": 21282 + }, + { + "epoch": 0.42605410004253935, + "grad_norm": 1.0693575143814087, + "learning_rate": 6.420506703542673e-06, + "loss": 0.3138, + "step": 21283 + }, + { + "epoch": 0.4260741185596677, + "grad_norm": 1.162542462348938, + "learning_rate": 6.4201958757546804e-06, + "loss": 0.3012, + "step": 21284 + }, + { + "epoch": 0.42609413707679605, + "grad_norm": 1.2112408876419067, + "learning_rate": 6.419885041996491e-06, + "loss": 0.2811, + "step": 21285 + }, + { + "epoch": 0.4261141555939244, + "grad_norm": 1.1998690366744995, + "learning_rate": 6.4195742022694096e-06, + "loss": 0.2948, + "step": 21286 + }, + { + "epoch": 0.4261341741110527, + "grad_norm": 1.1806789636611938, + "learning_rate": 6.419263356574745e-06, + "loss": 0.3286, + "step": 21287 + }, + { + "epoch": 0.42615419262818105, + "grad_norm": 1.321007490158081, + "learning_rate": 6.4189525049138015e-06, + "loss": 0.3308, + "step": 21288 + }, + { + "epoch": 0.4261742111453094, + "grad_norm": 1.3252034187316895, + "learning_rate": 6.418641647287887e-06, + "loss": 0.2963, + "step": 21289 + }, + { + "epoch": 0.42619422966243775, + "grad_norm": 1.1817492246627808, + "learning_rate": 6.418330783698309e-06, + "loss": 0.382, + "step": 21290 + }, + { + "epoch": 0.4262142481795661, + "grad_norm": 1.8942855596542358, + "learning_rate": 6.418019914146374e-06, + "loss": 0.8713, + "step": 21291 + }, + { + "epoch": 0.42623426669669445, + "grad_norm": 0.9576817750930786, + "learning_rate": 6.417709038633387e-06, + "loss": 0.2888, + "step": 21292 + }, + { + "epoch": 0.4262542852138228, + "grad_norm": 1.0951915979385376, + "learning_rate": 6.417398157160658e-06, + "loss": 0.2939, + "step": 21293 + }, + { + "epoch": 0.42627430373095115, + "grad_norm": 1.1716362237930298, + "learning_rate": 6.417087269729492e-06, + "loss": 0.3132, + "step": 21294 + }, + { + "epoch": 0.42629432224807945, + "grad_norm": 1.079717755317688, + "learning_rate": 6.416776376341195e-06, + "loss": 0.3177, + "step": 21295 + }, + { + "epoch": 0.4263143407652078, + "grad_norm": 1.9788610935211182, + "learning_rate": 6.416465476997075e-06, + "loss": 0.8407, + "step": 21296 + }, + { + "epoch": 0.42633435928233615, + "grad_norm": 1.2031443119049072, + "learning_rate": 6.416154571698439e-06, + "loss": 0.3058, + "step": 21297 + }, + { + "epoch": 0.4263543777994645, + "grad_norm": 1.268513560295105, + "learning_rate": 6.415843660446595e-06, + "loss": 0.3301, + "step": 21298 + }, + { + "epoch": 0.42637439631659285, + "grad_norm": 1.2217090129852295, + "learning_rate": 6.415532743242847e-06, + "loss": 0.3465, + "step": 21299 + }, + { + "epoch": 0.4263944148337212, + "grad_norm": 1.1031267642974854, + "learning_rate": 6.4152218200885064e-06, + "loss": 0.3231, + "step": 21300 + }, + { + "epoch": 0.42641443335084955, + "grad_norm": 1.0687025785446167, + "learning_rate": 6.414910890984877e-06, + "loss": 0.3373, + "step": 21301 + }, + { + "epoch": 0.4264344518679779, + "grad_norm": 1.0847446918487549, + "learning_rate": 6.414599955933267e-06, + "loss": 0.3067, + "step": 21302 + }, + { + "epoch": 0.4264544703851062, + "grad_norm": 1.1360814571380615, + "learning_rate": 6.41428901493498e-06, + "loss": 0.2995, + "step": 21303 + }, + { + "epoch": 0.42647448890223455, + "grad_norm": 1.264813780784607, + "learning_rate": 6.41397806799133e-06, + "loss": 0.3303, + "step": 21304 + }, + { + "epoch": 0.4264945074193629, + "grad_norm": 1.1552451848983765, + "learning_rate": 6.4136671151036204e-06, + "loss": 0.2838, + "step": 21305 + }, + { + "epoch": 0.42651452593649125, + "grad_norm": 1.891444206237793, + "learning_rate": 6.413356156273157e-06, + "loss": 0.8044, + "step": 21306 + }, + { + "epoch": 0.4265345444536196, + "grad_norm": 1.188166618347168, + "learning_rate": 6.413045191501251e-06, + "loss": 0.2693, + "step": 21307 + }, + { + "epoch": 0.42655456297074795, + "grad_norm": 1.0682190656661987, + "learning_rate": 6.412734220789203e-06, + "loss": 0.3674, + "step": 21308 + }, + { + "epoch": 0.4265745814878763, + "grad_norm": 1.0063608884811401, + "learning_rate": 6.412423244138326e-06, + "loss": 0.2735, + "step": 21309 + }, + { + "epoch": 0.42659460000500465, + "grad_norm": 1.074534296989441, + "learning_rate": 6.412112261549926e-06, + "loss": 0.2733, + "step": 21310 + }, + { + "epoch": 0.42661461852213295, + "grad_norm": 1.1212955713272095, + "learning_rate": 6.411801273025311e-06, + "loss": 0.3596, + "step": 21311 + }, + { + "epoch": 0.4266346370392613, + "grad_norm": 1.7853530645370483, + "learning_rate": 6.411490278565786e-06, + "loss": 0.8218, + "step": 21312 + }, + { + "epoch": 0.42665465555638965, + "grad_norm": 1.108517050743103, + "learning_rate": 6.41117927817266e-06, + "loss": 0.2827, + "step": 21313 + }, + { + "epoch": 0.426674674073518, + "grad_norm": 1.1225996017456055, + "learning_rate": 6.410868271847239e-06, + "loss": 0.3229, + "step": 21314 + }, + { + "epoch": 0.42669469259064635, + "grad_norm": 1.1438517570495605, + "learning_rate": 6.410557259590834e-06, + "loss": 0.3665, + "step": 21315 + }, + { + "epoch": 0.4267147111077747, + "grad_norm": 1.091397762298584, + "learning_rate": 6.4102462414047475e-06, + "loss": 0.3224, + "step": 21316 + }, + { + "epoch": 0.42673472962490305, + "grad_norm": 1.040639042854309, + "learning_rate": 6.40993521729029e-06, + "loss": 0.2931, + "step": 21317 + }, + { + "epoch": 0.4267547481420314, + "grad_norm": 1.1019660234451294, + "learning_rate": 6.409624187248769e-06, + "loss": 0.327, + "step": 21318 + }, + { + "epoch": 0.4267747666591597, + "grad_norm": 1.103283166885376, + "learning_rate": 6.40931315128149e-06, + "loss": 0.3361, + "step": 21319 + }, + { + "epoch": 0.42679478517628805, + "grad_norm": 1.1608327627182007, + "learning_rate": 6.409002109389763e-06, + "loss": 0.3165, + "step": 21320 + }, + { + "epoch": 0.4268148036934164, + "grad_norm": 1.0092271566390991, + "learning_rate": 6.408691061574892e-06, + "loss": 0.2679, + "step": 21321 + }, + { + "epoch": 0.42683482221054475, + "grad_norm": 1.1341882944107056, + "learning_rate": 6.4083800078381886e-06, + "loss": 0.3202, + "step": 21322 + }, + { + "epoch": 0.4268548407276731, + "grad_norm": 1.0871992111206055, + "learning_rate": 6.4080689481809585e-06, + "loss": 0.3538, + "step": 21323 + }, + { + "epoch": 0.42687485924480145, + "grad_norm": 1.1128145456314087, + "learning_rate": 6.407757882604511e-06, + "loss": 0.3237, + "step": 21324 + }, + { + "epoch": 0.4268948777619298, + "grad_norm": 1.1050320863723755, + "learning_rate": 6.407446811110152e-06, + "loss": 0.26, + "step": 21325 + }, + { + "epoch": 0.42691489627905815, + "grad_norm": 1.1745498180389404, + "learning_rate": 6.4071357336991875e-06, + "loss": 0.352, + "step": 21326 + }, + { + "epoch": 0.42693491479618645, + "grad_norm": 1.144839882850647, + "learning_rate": 6.40682465037293e-06, + "loss": 0.3114, + "step": 21327 + }, + { + "epoch": 0.4269549333133148, + "grad_norm": 1.1508268117904663, + "learning_rate": 6.406513561132681e-06, + "loss": 0.3304, + "step": 21328 + }, + { + "epoch": 0.42697495183044315, + "grad_norm": 1.1256169080734253, + "learning_rate": 6.4062024659797525e-06, + "loss": 0.3185, + "step": 21329 + }, + { + "epoch": 0.4269949703475715, + "grad_norm": 1.0329824686050415, + "learning_rate": 6.405891364915453e-06, + "loss": 0.2805, + "step": 21330 + }, + { + "epoch": 0.42701498886469985, + "grad_norm": 1.1753309965133667, + "learning_rate": 6.405580257941088e-06, + "loss": 0.2908, + "step": 21331 + }, + { + "epoch": 0.4270350073818282, + "grad_norm": 1.1295579671859741, + "learning_rate": 6.405269145057966e-06, + "loss": 0.3273, + "step": 21332 + }, + { + "epoch": 0.42705502589895655, + "grad_norm": 1.4303374290466309, + "learning_rate": 6.404958026267396e-06, + "loss": 0.3214, + "step": 21333 + }, + { + "epoch": 0.4270750444160849, + "grad_norm": 1.170803189277649, + "learning_rate": 6.404646901570684e-06, + "loss": 0.3253, + "step": 21334 + }, + { + "epoch": 0.4270950629332132, + "grad_norm": 1.0471912622451782, + "learning_rate": 6.404335770969138e-06, + "loss": 0.313, + "step": 21335 + }, + { + "epoch": 0.42711508145034155, + "grad_norm": 0.9960187673568726, + "learning_rate": 6.4040246344640675e-06, + "loss": 0.273, + "step": 21336 + }, + { + "epoch": 0.4271350999674699, + "grad_norm": 1.8066574335098267, + "learning_rate": 6.403713492056779e-06, + "loss": 0.8022, + "step": 21337 + }, + { + "epoch": 0.42715511848459825, + "grad_norm": 1.0611780881881714, + "learning_rate": 6.4034023437485816e-06, + "loss": 0.3469, + "step": 21338 + }, + { + "epoch": 0.4271751370017266, + "grad_norm": 1.1215510368347168, + "learning_rate": 6.403091189540782e-06, + "loss": 0.303, + "step": 21339 + }, + { + "epoch": 0.42719515551885495, + "grad_norm": 1.0166784524917603, + "learning_rate": 6.402780029434691e-06, + "loss": 0.3326, + "step": 21340 + }, + { + "epoch": 0.4272151740359833, + "grad_norm": 0.9457951784133911, + "learning_rate": 6.402468863431612e-06, + "loss": 0.3082, + "step": 21341 + }, + { + "epoch": 0.42723519255311165, + "grad_norm": 1.0835100412368774, + "learning_rate": 6.402157691532857e-06, + "loss": 0.2664, + "step": 21342 + }, + { + "epoch": 0.42725521107023995, + "grad_norm": 1.188823938369751, + "learning_rate": 6.401846513739732e-06, + "loss": 0.3262, + "step": 21343 + }, + { + "epoch": 0.4272752295873683, + "grad_norm": 1.210587739944458, + "learning_rate": 6.401535330053547e-06, + "loss": 0.2839, + "step": 21344 + }, + { + "epoch": 0.42729524810449665, + "grad_norm": 1.8036468029022217, + "learning_rate": 6.401224140475608e-06, + "loss": 0.7836, + "step": 21345 + }, + { + "epoch": 0.427315266621625, + "grad_norm": 1.1574314832687378, + "learning_rate": 6.400912945007226e-06, + "loss": 0.3146, + "step": 21346 + }, + { + "epoch": 0.42733528513875335, + "grad_norm": 1.1360530853271484, + "learning_rate": 6.400601743649706e-06, + "loss": 0.3013, + "step": 21347 + }, + { + "epoch": 0.4273553036558817, + "grad_norm": 1.0370612144470215, + "learning_rate": 6.400290536404357e-06, + "loss": 0.3006, + "step": 21348 + }, + { + "epoch": 0.42737532217301005, + "grad_norm": 1.9467743635177612, + "learning_rate": 6.399979323272491e-06, + "loss": 0.7983, + "step": 21349 + }, + { + "epoch": 0.4273953406901384, + "grad_norm": 1.0095252990722656, + "learning_rate": 6.399668104255411e-06, + "loss": 0.2711, + "step": 21350 + }, + { + "epoch": 0.4274153592072667, + "grad_norm": 1.1629719734191895, + "learning_rate": 6.399356879354428e-06, + "loss": 0.282, + "step": 21351 + }, + { + "epoch": 0.42743537772439505, + "grad_norm": 1.1531288623809814, + "learning_rate": 6.39904564857085e-06, + "loss": 0.3016, + "step": 21352 + }, + { + "epoch": 0.4274553962415234, + "grad_norm": 1.0989023447036743, + "learning_rate": 6.3987344119059846e-06, + "loss": 0.3235, + "step": 21353 + }, + { + "epoch": 0.42747541475865175, + "grad_norm": 1.1776695251464844, + "learning_rate": 6.398423169361141e-06, + "loss": 0.3306, + "step": 21354 + }, + { + "epoch": 0.4274954332757801, + "grad_norm": 1.8599365949630737, + "learning_rate": 6.398111920937626e-06, + "loss": 0.863, + "step": 21355 + }, + { + "epoch": 0.42751545179290845, + "grad_norm": 1.258906602859497, + "learning_rate": 6.397800666636751e-06, + "loss": 0.2867, + "step": 21356 + }, + { + "epoch": 0.4275354703100368, + "grad_norm": 1.042422890663147, + "learning_rate": 6.397489406459823e-06, + "loss": 0.2837, + "step": 21357 + }, + { + "epoch": 0.42755548882716515, + "grad_norm": 1.0719106197357178, + "learning_rate": 6.39717814040815e-06, + "loss": 0.3067, + "step": 21358 + }, + { + "epoch": 0.42757550734429345, + "grad_norm": 1.4462394714355469, + "learning_rate": 6.39686686848304e-06, + "loss": 0.311, + "step": 21359 + }, + { + "epoch": 0.4275955258614218, + "grad_norm": 1.0961936712265015, + "learning_rate": 6.396555590685803e-06, + "loss": 0.3592, + "step": 21360 + }, + { + "epoch": 0.42761554437855015, + "grad_norm": 1.1450471878051758, + "learning_rate": 6.396244307017746e-06, + "loss": 0.3223, + "step": 21361 + }, + { + "epoch": 0.4276355628956785, + "grad_norm": 1.259755253791809, + "learning_rate": 6.3959330174801794e-06, + "loss": 0.3241, + "step": 21362 + }, + { + "epoch": 0.42765558141280685, + "grad_norm": 1.2060624361038208, + "learning_rate": 6.39562172207441e-06, + "loss": 0.3226, + "step": 21363 + }, + { + "epoch": 0.4276755999299352, + "grad_norm": 0.9952327609062195, + "learning_rate": 6.395310420801748e-06, + "loss": 0.2774, + "step": 21364 + }, + { + "epoch": 0.42769561844706355, + "grad_norm": 1.2044677734375, + "learning_rate": 6.394999113663501e-06, + "loss": 0.3265, + "step": 21365 + }, + { + "epoch": 0.4277156369641919, + "grad_norm": 1.8866740465164185, + "learning_rate": 6.394687800660978e-06, + "loss": 0.8162, + "step": 21366 + }, + { + "epoch": 0.4277356554813202, + "grad_norm": 1.1593669652938843, + "learning_rate": 6.3943764817954874e-06, + "loss": 0.2998, + "step": 21367 + }, + { + "epoch": 0.42775567399844855, + "grad_norm": 1.1569384336471558, + "learning_rate": 6.394065157068338e-06, + "loss": 0.3755, + "step": 21368 + }, + { + "epoch": 0.4277756925155769, + "grad_norm": 1.1213016510009766, + "learning_rate": 6.39375382648084e-06, + "loss": 0.3578, + "step": 21369 + }, + { + "epoch": 0.42779571103270525, + "grad_norm": 1.1025407314300537, + "learning_rate": 6.393442490034298e-06, + "loss": 0.3268, + "step": 21370 + }, + { + "epoch": 0.4278157295498336, + "grad_norm": 1.1384352445602417, + "learning_rate": 6.393131147730026e-06, + "loss": 0.3264, + "step": 21371 + }, + { + "epoch": 0.42783574806696195, + "grad_norm": 1.0338526964187622, + "learning_rate": 6.392819799569329e-06, + "loss": 0.3179, + "step": 21372 + }, + { + "epoch": 0.4278557665840903, + "grad_norm": 1.2070621252059937, + "learning_rate": 6.392508445553518e-06, + "loss": 0.3246, + "step": 21373 + }, + { + "epoch": 0.42787578510121865, + "grad_norm": 1.0924310684204102, + "learning_rate": 6.392197085683902e-06, + "loss": 0.3212, + "step": 21374 + }, + { + "epoch": 0.42789580361834695, + "grad_norm": 1.070810079574585, + "learning_rate": 6.391885719961788e-06, + "loss": 0.3343, + "step": 21375 + }, + { + "epoch": 0.4279158221354753, + "grad_norm": 1.1202651262283325, + "learning_rate": 6.391574348388486e-06, + "loss": 0.2563, + "step": 21376 + }, + { + "epoch": 0.42793584065260365, + "grad_norm": 1.0764302015304565, + "learning_rate": 6.391262970965306e-06, + "loss": 0.3195, + "step": 21377 + }, + { + "epoch": 0.427955859169732, + "grad_norm": 1.396233320236206, + "learning_rate": 6.390951587693556e-06, + "loss": 0.3446, + "step": 21378 + }, + { + "epoch": 0.42797587768686035, + "grad_norm": 1.0463204383850098, + "learning_rate": 6.390640198574544e-06, + "loss": 0.3031, + "step": 21379 + }, + { + "epoch": 0.4279958962039887, + "grad_norm": 1.1131590604782104, + "learning_rate": 6.390328803609579e-06, + "loss": 0.2979, + "step": 21380 + }, + { + "epoch": 0.42801591472111705, + "grad_norm": 1.083863615989685, + "learning_rate": 6.390017402799972e-06, + "loss": 0.318, + "step": 21381 + }, + { + "epoch": 0.4280359332382454, + "grad_norm": 1.082194209098816, + "learning_rate": 6.389705996147031e-06, + "loss": 0.3022, + "step": 21382 + }, + { + "epoch": 0.4280559517553737, + "grad_norm": 1.0905284881591797, + "learning_rate": 6.389394583652064e-06, + "loss": 0.3204, + "step": 21383 + }, + { + "epoch": 0.42807597027250205, + "grad_norm": 1.0347492694854736, + "learning_rate": 6.389083165316383e-06, + "loss": 0.2887, + "step": 21384 + }, + { + "epoch": 0.4280959887896304, + "grad_norm": 1.0600333213806152, + "learning_rate": 6.388771741141294e-06, + "loss": 0.3097, + "step": 21385 + }, + { + "epoch": 0.42811600730675875, + "grad_norm": 1.0549728870391846, + "learning_rate": 6.388460311128107e-06, + "loss": 0.3107, + "step": 21386 + }, + { + "epoch": 0.4281360258238871, + "grad_norm": 1.2864625453948975, + "learning_rate": 6.388148875278132e-06, + "loss": 0.2921, + "step": 21387 + }, + { + "epoch": 0.42815604434101545, + "grad_norm": 1.1498438119888306, + "learning_rate": 6.387837433592679e-06, + "loss": 0.3353, + "step": 21388 + }, + { + "epoch": 0.4281760628581438, + "grad_norm": 1.149220585823059, + "learning_rate": 6.387525986073055e-06, + "loss": 0.2843, + "step": 21389 + }, + { + "epoch": 0.42819608137527215, + "grad_norm": 1.1276135444641113, + "learning_rate": 6.387214532720571e-06, + "loss": 0.3309, + "step": 21390 + }, + { + "epoch": 0.42821609989240045, + "grad_norm": 1.0854907035827637, + "learning_rate": 6.386903073536536e-06, + "loss": 0.3119, + "step": 21391 + }, + { + "epoch": 0.4282361184095288, + "grad_norm": 1.0679304599761963, + "learning_rate": 6.386591608522258e-06, + "loss": 0.3421, + "step": 21392 + }, + { + "epoch": 0.42825613692665715, + "grad_norm": 1.0532243251800537, + "learning_rate": 6.386280137679047e-06, + "loss": 0.2978, + "step": 21393 + }, + { + "epoch": 0.4282761554437855, + "grad_norm": 1.1842176914215088, + "learning_rate": 6.385968661008213e-06, + "loss": 0.2842, + "step": 21394 + }, + { + "epoch": 0.42829617396091385, + "grad_norm": 1.0258620977401733, + "learning_rate": 6.385657178511067e-06, + "loss": 0.3113, + "step": 21395 + }, + { + "epoch": 0.4283161924780422, + "grad_norm": 1.0922592878341675, + "learning_rate": 6.385345690188913e-06, + "loss": 0.3469, + "step": 21396 + }, + { + "epoch": 0.42833621099517055, + "grad_norm": 1.060356855392456, + "learning_rate": 6.385034196043066e-06, + "loss": 0.3404, + "step": 21397 + }, + { + "epoch": 0.4283562295122989, + "grad_norm": 1.1005645990371704, + "learning_rate": 6.384722696074832e-06, + "loss": 0.3244, + "step": 21398 + }, + { + "epoch": 0.4283762480294272, + "grad_norm": 1.154020071029663, + "learning_rate": 6.3844111902855225e-06, + "loss": 0.281, + "step": 21399 + }, + { + "epoch": 0.42839626654655555, + "grad_norm": 1.8897706270217896, + "learning_rate": 6.384099678676448e-06, + "loss": 0.7995, + "step": 21400 + }, + { + "epoch": 0.4284162850636839, + "grad_norm": 1.0225878953933716, + "learning_rate": 6.383788161248914e-06, + "loss": 0.2739, + "step": 21401 + }, + { + "epoch": 0.42843630358081225, + "grad_norm": 0.9970770478248596, + "learning_rate": 6.383476638004234e-06, + "loss": 0.2796, + "step": 21402 + }, + { + "epoch": 0.4284563220979406, + "grad_norm": 0.9967604875564575, + "learning_rate": 6.383165108943714e-06, + "loss": 0.294, + "step": 21403 + }, + { + "epoch": 0.42847634061506895, + "grad_norm": 1.8927583694458008, + "learning_rate": 6.382853574068667e-06, + "loss": 0.8119, + "step": 21404 + }, + { + "epoch": 0.4284963591321973, + "grad_norm": 1.0742148160934448, + "learning_rate": 6.3825420333804e-06, + "loss": 0.3289, + "step": 21405 + }, + { + "epoch": 0.42851637764932565, + "grad_norm": 1.025063395500183, + "learning_rate": 6.382230486880224e-06, + "loss": 0.3591, + "step": 21406 + }, + { + "epoch": 0.42853639616645395, + "grad_norm": 1.1335407495498657, + "learning_rate": 6.3819189345694485e-06, + "loss": 0.283, + "step": 21407 + }, + { + "epoch": 0.4285564146835823, + "grad_norm": 1.0885347127914429, + "learning_rate": 6.3816073764493845e-06, + "loss": 0.3009, + "step": 21408 + }, + { + "epoch": 0.42857643320071065, + "grad_norm": 1.0872682332992554, + "learning_rate": 6.38129581252134e-06, + "loss": 0.3436, + "step": 21409 + }, + { + "epoch": 0.428596451717839, + "grad_norm": 1.2078462839126587, + "learning_rate": 6.380984242786626e-06, + "loss": 0.3455, + "step": 21410 + }, + { + "epoch": 0.42861647023496735, + "grad_norm": 1.2034095525741577, + "learning_rate": 6.380672667246548e-06, + "loss": 0.3628, + "step": 21411 + }, + { + "epoch": 0.4286364887520957, + "grad_norm": 0.9313145279884338, + "learning_rate": 6.380361085902421e-06, + "loss": 0.3051, + "step": 21412 + }, + { + "epoch": 0.42865650726922405, + "grad_norm": 1.1765365600585938, + "learning_rate": 6.380049498755555e-06, + "loss": 0.3068, + "step": 21413 + }, + { + "epoch": 0.4286765257863524, + "grad_norm": 1.0806964635849, + "learning_rate": 6.379737905807255e-06, + "loss": 0.3077, + "step": 21414 + }, + { + "epoch": 0.4286965443034807, + "grad_norm": 1.1107494831085205, + "learning_rate": 6.379426307058836e-06, + "loss": 0.3704, + "step": 21415 + }, + { + "epoch": 0.42871656282060905, + "grad_norm": 1.1752647161483765, + "learning_rate": 6.379114702511605e-06, + "loss": 0.3058, + "step": 21416 + }, + { + "epoch": 0.4287365813377374, + "grad_norm": 1.177615761756897, + "learning_rate": 6.378803092166873e-06, + "loss": 0.3666, + "step": 21417 + }, + { + "epoch": 0.42875659985486575, + "grad_norm": 1.1241180896759033, + "learning_rate": 6.378491476025949e-06, + "loss": 0.3065, + "step": 21418 + }, + { + "epoch": 0.4287766183719941, + "grad_norm": 1.0592753887176514, + "learning_rate": 6.378179854090142e-06, + "loss": 0.3126, + "step": 21419 + }, + { + "epoch": 0.42879663688912245, + "grad_norm": 1.24774968624115, + "learning_rate": 6.377868226360766e-06, + "loss": 0.2978, + "step": 21420 + }, + { + "epoch": 0.4288166554062508, + "grad_norm": 1.1047548055648804, + "learning_rate": 6.377556592839127e-06, + "loss": 0.3665, + "step": 21421 + }, + { + "epoch": 0.42883667392337915, + "grad_norm": 1.1143944263458252, + "learning_rate": 6.377244953526538e-06, + "loss": 0.3455, + "step": 21422 + }, + { + "epoch": 0.42885669244050745, + "grad_norm": 1.1562541723251343, + "learning_rate": 6.376933308424306e-06, + "loss": 0.3583, + "step": 21423 + }, + { + "epoch": 0.4288767109576358, + "grad_norm": 1.035752296447754, + "learning_rate": 6.376621657533743e-06, + "loss": 0.2861, + "step": 21424 + }, + { + "epoch": 0.42889672947476415, + "grad_norm": 1.0630478858947754, + "learning_rate": 6.376310000856157e-06, + "loss": 0.3178, + "step": 21425 + }, + { + "epoch": 0.4289167479918925, + "grad_norm": 1.055575966835022, + "learning_rate": 6.375998338392863e-06, + "loss": 0.3064, + "step": 21426 + }, + { + "epoch": 0.42893676650902085, + "grad_norm": 1.0181400775909424, + "learning_rate": 6.375686670145167e-06, + "loss": 0.3127, + "step": 21427 + }, + { + "epoch": 0.4289567850261492, + "grad_norm": 1.1486009359359741, + "learning_rate": 6.375374996114381e-06, + "loss": 0.2821, + "step": 21428 + }, + { + "epoch": 0.42897680354327755, + "grad_norm": 1.040955901145935, + "learning_rate": 6.375063316301813e-06, + "loss": 0.3587, + "step": 21429 + }, + { + "epoch": 0.4289968220604059, + "grad_norm": 1.1744070053100586, + "learning_rate": 6.3747516307087764e-06, + "loss": 0.3289, + "step": 21430 + }, + { + "epoch": 0.4290168405775342, + "grad_norm": 1.1116950511932373, + "learning_rate": 6.374439939336578e-06, + "loss": 0.2849, + "step": 21431 + }, + { + "epoch": 0.42903685909466255, + "grad_norm": 1.1226046085357666, + "learning_rate": 6.374128242186531e-06, + "loss": 0.3091, + "step": 21432 + }, + { + "epoch": 0.4290568776117909, + "grad_norm": 1.0908143520355225, + "learning_rate": 6.373816539259945e-06, + "loss": 0.2753, + "step": 21433 + }, + { + "epoch": 0.42907689612891925, + "grad_norm": 1.0339083671569824, + "learning_rate": 6.3735048305581295e-06, + "loss": 0.3032, + "step": 21434 + }, + { + "epoch": 0.4290969146460476, + "grad_norm": 1.124954104423523, + "learning_rate": 6.373193116082395e-06, + "loss": 0.3075, + "step": 21435 + }, + { + "epoch": 0.42911693316317595, + "grad_norm": 1.8395627737045288, + "learning_rate": 6.372881395834052e-06, + "loss": 0.8628, + "step": 21436 + }, + { + "epoch": 0.4291369516803043, + "grad_norm": 1.214776635169983, + "learning_rate": 6.372569669814411e-06, + "loss": 0.2983, + "step": 21437 + }, + { + "epoch": 0.42915697019743265, + "grad_norm": 1.03292715549469, + "learning_rate": 6.372257938024783e-06, + "loss": 0.266, + "step": 21438 + }, + { + "epoch": 0.42917698871456095, + "grad_norm": 1.1797399520874023, + "learning_rate": 6.371946200466478e-06, + "loss": 0.2626, + "step": 21439 + }, + { + "epoch": 0.4291970072316893, + "grad_norm": 1.0395050048828125, + "learning_rate": 6.3716344571408074e-06, + "loss": 0.2905, + "step": 21440 + }, + { + "epoch": 0.42921702574881765, + "grad_norm": 1.156644344329834, + "learning_rate": 6.371322708049079e-06, + "loss": 0.3081, + "step": 21441 + }, + { + "epoch": 0.429237044265946, + "grad_norm": 1.1294909715652466, + "learning_rate": 6.371010953192606e-06, + "loss": 0.3107, + "step": 21442 + }, + { + "epoch": 0.42925706278307435, + "grad_norm": 1.1471930742263794, + "learning_rate": 6.370699192572697e-06, + "loss": 0.3043, + "step": 21443 + }, + { + "epoch": 0.4292770813002027, + "grad_norm": 0.9801986217498779, + "learning_rate": 6.370387426190663e-06, + "loss": 0.2811, + "step": 21444 + }, + { + "epoch": 0.42929709981733105, + "grad_norm": 1.9755289554595947, + "learning_rate": 6.370075654047816e-06, + "loss": 0.732, + "step": 21445 + }, + { + "epoch": 0.4293171183344594, + "grad_norm": 1.0614628791809082, + "learning_rate": 6.369763876145467e-06, + "loss": 0.3074, + "step": 21446 + }, + { + "epoch": 0.4293371368515877, + "grad_norm": 1.2061296701431274, + "learning_rate": 6.369452092484925e-06, + "loss": 0.3041, + "step": 21447 + }, + { + "epoch": 0.42935715536871605, + "grad_norm": 1.0579921007156372, + "learning_rate": 6.3691403030675e-06, + "loss": 0.3454, + "step": 21448 + }, + { + "epoch": 0.4293771738858444, + "grad_norm": 1.2352216243743896, + "learning_rate": 6.368828507894504e-06, + "loss": 0.3318, + "step": 21449 + }, + { + "epoch": 0.42939719240297275, + "grad_norm": 1.0815664529800415, + "learning_rate": 6.3685167069672484e-06, + "loss": 0.3082, + "step": 21450 + }, + { + "epoch": 0.4294172109201011, + "grad_norm": 1.0187140703201294, + "learning_rate": 6.368204900287042e-06, + "loss": 0.3154, + "step": 21451 + }, + { + "epoch": 0.42943722943722945, + "grad_norm": 1.165163516998291, + "learning_rate": 6.367893087855198e-06, + "loss": 0.2995, + "step": 21452 + }, + { + "epoch": 0.4294572479543578, + "grad_norm": 1.1338739395141602, + "learning_rate": 6.367581269673026e-06, + "loss": 0.312, + "step": 21453 + }, + { + "epoch": 0.42947726647148615, + "grad_norm": 2.004767894744873, + "learning_rate": 6.367269445741835e-06, + "loss": 0.7868, + "step": 21454 + }, + { + "epoch": 0.42949728498861445, + "grad_norm": 1.1263524293899536, + "learning_rate": 6.3669576160629385e-06, + "loss": 0.3271, + "step": 21455 + }, + { + "epoch": 0.4295173035057428, + "grad_norm": 1.1304863691329956, + "learning_rate": 6.366645780637646e-06, + "loss": 0.3051, + "step": 21456 + }, + { + "epoch": 0.42953732202287115, + "grad_norm": 1.0264567136764526, + "learning_rate": 6.366333939467268e-06, + "loss": 0.311, + "step": 21457 + }, + { + "epoch": 0.4295573405399995, + "grad_norm": 1.0696719884872437, + "learning_rate": 6.3660220925531165e-06, + "loss": 0.3117, + "step": 21458 + }, + { + "epoch": 0.42957735905712785, + "grad_norm": 1.119182825088501, + "learning_rate": 6.365710239896504e-06, + "loss": 0.3101, + "step": 21459 + }, + { + "epoch": 0.4295973775742562, + "grad_norm": 1.205404281616211, + "learning_rate": 6.365398381498737e-06, + "loss": 0.3559, + "step": 21460 + }, + { + "epoch": 0.42961739609138455, + "grad_norm": 1.1943222284317017, + "learning_rate": 6.365086517361132e-06, + "loss": 0.334, + "step": 21461 + }, + { + "epoch": 0.4296374146085129, + "grad_norm": 0.9821703433990479, + "learning_rate": 6.364774647484995e-06, + "loss": 0.329, + "step": 21462 + }, + { + "epoch": 0.4296574331256412, + "grad_norm": 1.136164665222168, + "learning_rate": 6.364462771871638e-06, + "loss": 0.2919, + "step": 21463 + }, + { + "epoch": 0.42967745164276955, + "grad_norm": 1.0955359935760498, + "learning_rate": 6.364150890522376e-06, + "loss": 0.2989, + "step": 21464 + }, + { + "epoch": 0.4296974701598979, + "grad_norm": 1.0915379524230957, + "learning_rate": 6.363839003438514e-06, + "loss": 0.3306, + "step": 21465 + }, + { + "epoch": 0.42971748867702625, + "grad_norm": 1.0860284566879272, + "learning_rate": 6.363527110621369e-06, + "loss": 0.2688, + "step": 21466 + }, + { + "epoch": 0.4297375071941546, + "grad_norm": 1.1514548063278198, + "learning_rate": 6.363215212072249e-06, + "loss": 0.329, + "step": 21467 + }, + { + "epoch": 0.42975752571128295, + "grad_norm": 1.282652497291565, + "learning_rate": 6.362903307792465e-06, + "loss": 0.3437, + "step": 21468 + }, + { + "epoch": 0.4297775442284113, + "grad_norm": 1.0193618535995483, + "learning_rate": 6.362591397783328e-06, + "loss": 0.3017, + "step": 21469 + }, + { + "epoch": 0.42979756274553965, + "grad_norm": 1.000245451927185, + "learning_rate": 6.3622794820461505e-06, + "loss": 0.2995, + "step": 21470 + }, + { + "epoch": 0.42981758126266795, + "grad_norm": 1.1969568729400635, + "learning_rate": 6.361967560582245e-06, + "loss": 0.3477, + "step": 21471 + }, + { + "epoch": 0.4298375997797963, + "grad_norm": 1.2234175205230713, + "learning_rate": 6.361655633392919e-06, + "loss": 0.2739, + "step": 21472 + }, + { + "epoch": 0.42985761829692465, + "grad_norm": 1.209937572479248, + "learning_rate": 6.361343700479488e-06, + "loss": 0.2797, + "step": 21473 + }, + { + "epoch": 0.429877636814053, + "grad_norm": 1.863532304763794, + "learning_rate": 6.361031761843258e-06, + "loss": 0.7955, + "step": 21474 + }, + { + "epoch": 0.42989765533118135, + "grad_norm": 2.1184957027435303, + "learning_rate": 6.360719817485545e-06, + "loss": 0.8417, + "step": 21475 + }, + { + "epoch": 0.4299176738483097, + "grad_norm": 1.0484963655471802, + "learning_rate": 6.3604078674076574e-06, + "loss": 0.317, + "step": 21476 + }, + { + "epoch": 0.42993769236543805, + "grad_norm": 1.2235819101333618, + "learning_rate": 6.360095911610909e-06, + "loss": 0.3114, + "step": 21477 + }, + { + "epoch": 0.42995771088256635, + "grad_norm": 1.1271229982376099, + "learning_rate": 6.359783950096611e-06, + "loss": 0.2964, + "step": 21478 + }, + { + "epoch": 0.4299777293996947, + "grad_norm": 1.0353341102600098, + "learning_rate": 6.3594719828660735e-06, + "loss": 0.2895, + "step": 21479 + }, + { + "epoch": 0.42999774791682305, + "grad_norm": 1.0330337285995483, + "learning_rate": 6.359160009920606e-06, + "loss": 0.3375, + "step": 21480 + }, + { + "epoch": 0.4300177664339514, + "grad_norm": 1.150153398513794, + "learning_rate": 6.358848031261525e-06, + "loss": 0.3271, + "step": 21481 + }, + { + "epoch": 0.43003778495107975, + "grad_norm": 1.21494460105896, + "learning_rate": 6.358536046890138e-06, + "loss": 0.3007, + "step": 21482 + }, + { + "epoch": 0.4300578034682081, + "grad_norm": 1.1130095720291138, + "learning_rate": 6.358224056807758e-06, + "loss": 0.3382, + "step": 21483 + }, + { + "epoch": 0.43007782198533645, + "grad_norm": 1.1608480215072632, + "learning_rate": 6.357912061015697e-06, + "loss": 0.3409, + "step": 21484 + }, + { + "epoch": 0.4300978405024648, + "grad_norm": 1.1878502368927002, + "learning_rate": 6.357600059515266e-06, + "loss": 0.3119, + "step": 21485 + }, + { + "epoch": 0.4301178590195931, + "grad_norm": 1.822392225265503, + "learning_rate": 6.357288052307777e-06, + "loss": 0.7897, + "step": 21486 + }, + { + "epoch": 0.43013787753672145, + "grad_norm": 1.0720256567001343, + "learning_rate": 6.356976039394539e-06, + "loss": 0.2842, + "step": 21487 + }, + { + "epoch": 0.4301578960538498, + "grad_norm": 1.278067708015442, + "learning_rate": 6.356664020776867e-06, + "loss": 0.2525, + "step": 21488 + }, + { + "epoch": 0.43017791457097815, + "grad_norm": 1.1406794786453247, + "learning_rate": 6.35635199645607e-06, + "loss": 0.3142, + "step": 21489 + }, + { + "epoch": 0.4301979330881065, + "grad_norm": 1.1200557947158813, + "learning_rate": 6.356039966433464e-06, + "loss": 0.2937, + "step": 21490 + }, + { + "epoch": 0.43021795160523485, + "grad_norm": 1.0381653308868408, + "learning_rate": 6.355727930710356e-06, + "loss": 0.279, + "step": 21491 + }, + { + "epoch": 0.4302379701223632, + "grad_norm": 1.1971588134765625, + "learning_rate": 6.35541588928806e-06, + "loss": 0.3648, + "step": 21492 + }, + { + "epoch": 0.43025798863949155, + "grad_norm": 1.0816216468811035, + "learning_rate": 6.3551038421678875e-06, + "loss": 0.2744, + "step": 21493 + }, + { + "epoch": 0.43027800715661985, + "grad_norm": 1.2318644523620605, + "learning_rate": 6.354791789351149e-06, + "loss": 0.3248, + "step": 21494 + }, + { + "epoch": 0.4302980256737482, + "grad_norm": 1.1221057176589966, + "learning_rate": 6.354479730839158e-06, + "loss": 0.3091, + "step": 21495 + }, + { + "epoch": 0.43031804419087655, + "grad_norm": 1.021511197090149, + "learning_rate": 6.354167666633225e-06, + "loss": 0.2912, + "step": 21496 + }, + { + "epoch": 0.4303380627080049, + "grad_norm": 1.1359928846359253, + "learning_rate": 6.3538555967346636e-06, + "loss": 0.3287, + "step": 21497 + }, + { + "epoch": 0.43035808122513325, + "grad_norm": 1.1594117879867554, + "learning_rate": 6.353543521144785e-06, + "loss": 0.3831, + "step": 21498 + }, + { + "epoch": 0.4303780997422616, + "grad_norm": 1.1297932863235474, + "learning_rate": 6.3532314398649e-06, + "loss": 0.3299, + "step": 21499 + }, + { + "epoch": 0.43039811825938995, + "grad_norm": 1.1844291687011719, + "learning_rate": 6.352919352896321e-06, + "loss": 0.3538, + "step": 21500 + }, + { + "epoch": 0.4304181367765183, + "grad_norm": 1.1616524457931519, + "learning_rate": 6.35260726024036e-06, + "loss": 0.3254, + "step": 21501 + }, + { + "epoch": 0.4304381552936466, + "grad_norm": 1.1448465585708618, + "learning_rate": 6.352295161898328e-06, + "loss": 0.2823, + "step": 21502 + }, + { + "epoch": 0.43045817381077495, + "grad_norm": 1.1191853284835815, + "learning_rate": 6.351983057871541e-06, + "loss": 0.3673, + "step": 21503 + }, + { + "epoch": 0.4304781923279033, + "grad_norm": 1.1746207475662231, + "learning_rate": 6.351670948161308e-06, + "loss": 0.3856, + "step": 21504 + }, + { + "epoch": 0.43049821084503165, + "grad_norm": 1.0493220090866089, + "learning_rate": 6.351358832768939e-06, + "loss": 0.3348, + "step": 21505 + }, + { + "epoch": 0.43051822936216, + "grad_norm": 1.004706859588623, + "learning_rate": 6.351046711695751e-06, + "loss": 0.3104, + "step": 21506 + }, + { + "epoch": 0.43053824787928835, + "grad_norm": 1.1197763681411743, + "learning_rate": 6.350734584943051e-06, + "loss": 0.298, + "step": 21507 + }, + { + "epoch": 0.4305582663964167, + "grad_norm": 1.046695590019226, + "learning_rate": 6.350422452512154e-06, + "loss": 0.2765, + "step": 21508 + }, + { + "epoch": 0.43057828491354505, + "grad_norm": 1.0972144603729248, + "learning_rate": 6.350110314404372e-06, + "loss": 0.2848, + "step": 21509 + }, + { + "epoch": 0.43059830343067335, + "grad_norm": 0.9946622252464294, + "learning_rate": 6.349798170621016e-06, + "loss": 0.335, + "step": 21510 + }, + { + "epoch": 0.4306183219478017, + "grad_norm": 1.0942188501358032, + "learning_rate": 6.3494860211634e-06, + "loss": 0.3292, + "step": 21511 + }, + { + "epoch": 0.43063834046493005, + "grad_norm": 1.8207472562789917, + "learning_rate": 6.349173866032835e-06, + "loss": 0.84, + "step": 21512 + }, + { + "epoch": 0.4306583589820584, + "grad_norm": 1.0626108646392822, + "learning_rate": 6.348861705230634e-06, + "loss": 0.3024, + "step": 21513 + }, + { + "epoch": 0.43067837749918675, + "grad_norm": 1.091618537902832, + "learning_rate": 6.3485495387581066e-06, + "loss": 0.3154, + "step": 21514 + }, + { + "epoch": 0.4306983960163151, + "grad_norm": 1.889642357826233, + "learning_rate": 6.3482373666165695e-06, + "loss": 0.8357, + "step": 21515 + }, + { + "epoch": 0.43071841453344345, + "grad_norm": 1.3118678331375122, + "learning_rate": 6.347925188807331e-06, + "loss": 0.3239, + "step": 21516 + }, + { + "epoch": 0.4307384330505718, + "grad_norm": 1.2038179636001587, + "learning_rate": 6.347613005331706e-06, + "loss": 0.3364, + "step": 21517 + }, + { + "epoch": 0.4307584515677001, + "grad_norm": 1.1646015644073486, + "learning_rate": 6.347300816191006e-06, + "loss": 0.2946, + "step": 21518 + }, + { + "epoch": 0.43077847008482845, + "grad_norm": 1.1928437948226929, + "learning_rate": 6.346988621386543e-06, + "loss": 0.3416, + "step": 21519 + }, + { + "epoch": 0.4307984886019568, + "grad_norm": 1.1974488496780396, + "learning_rate": 6.34667642091963e-06, + "loss": 0.3208, + "step": 21520 + }, + { + "epoch": 0.43081850711908515, + "grad_norm": 1.1294044256210327, + "learning_rate": 6.3463642147915785e-06, + "loss": 0.3211, + "step": 21521 + }, + { + "epoch": 0.4308385256362135, + "grad_norm": 1.1406679153442383, + "learning_rate": 6.346052003003701e-06, + "loss": 0.3314, + "step": 21522 + }, + { + "epoch": 0.43085854415334185, + "grad_norm": 1.1898562908172607, + "learning_rate": 6.345739785557312e-06, + "loss": 0.3424, + "step": 21523 + }, + { + "epoch": 0.4308785626704702, + "grad_norm": 1.2155988216400146, + "learning_rate": 6.345427562453722e-06, + "loss": 0.3672, + "step": 21524 + }, + { + "epoch": 0.43089858118759855, + "grad_norm": 2.0410263538360596, + "learning_rate": 6.3451153336942435e-06, + "loss": 0.7971, + "step": 21525 + }, + { + "epoch": 0.43091859970472685, + "grad_norm": 1.1170177459716797, + "learning_rate": 6.344803099280191e-06, + "loss": 0.3613, + "step": 21526 + }, + { + "epoch": 0.4309386182218552, + "grad_norm": 1.071649193763733, + "learning_rate": 6.344490859212874e-06, + "loss": 0.3147, + "step": 21527 + }, + { + "epoch": 0.43095863673898355, + "grad_norm": 1.1818443536758423, + "learning_rate": 6.344178613493608e-06, + "loss": 0.3191, + "step": 21528 + }, + { + "epoch": 0.4309786552561119, + "grad_norm": 1.1394513845443726, + "learning_rate": 6.3438663621237025e-06, + "loss": 0.3193, + "step": 21529 + }, + { + "epoch": 0.43099867377324025, + "grad_norm": 1.1455708742141724, + "learning_rate": 6.343554105104474e-06, + "loss": 0.3426, + "step": 21530 + }, + { + "epoch": 0.4310186922903686, + "grad_norm": 1.1772327423095703, + "learning_rate": 6.343241842437233e-06, + "loss": 0.323, + "step": 21531 + }, + { + "epoch": 0.43103871080749695, + "grad_norm": 1.157681941986084, + "learning_rate": 6.342929574123293e-06, + "loss": 0.3358, + "step": 21532 + }, + { + "epoch": 0.4310587293246253, + "grad_norm": 1.1854623556137085, + "learning_rate": 6.342617300163965e-06, + "loss": 0.3151, + "step": 21533 + }, + { + "epoch": 0.4310787478417536, + "grad_norm": 1.1290059089660645, + "learning_rate": 6.3423050205605616e-06, + "loss": 0.3074, + "step": 21534 + }, + { + "epoch": 0.43109876635888195, + "grad_norm": 1.0624358654022217, + "learning_rate": 6.341992735314399e-06, + "loss": 0.3195, + "step": 21535 + }, + { + "epoch": 0.4311187848760103, + "grad_norm": 1.157403588294983, + "learning_rate": 6.3416804444267864e-06, + "loss": 0.3583, + "step": 21536 + }, + { + "epoch": 0.43113880339313865, + "grad_norm": 1.0615485906600952, + "learning_rate": 6.34136814789904e-06, + "loss": 0.2887, + "step": 21537 + }, + { + "epoch": 0.431158821910267, + "grad_norm": 1.820900321006775, + "learning_rate": 6.3410558457324675e-06, + "loss": 0.8945, + "step": 21538 + }, + { + "epoch": 0.43117884042739535, + "grad_norm": 1.2347021102905273, + "learning_rate": 6.340743537928386e-06, + "loss": 0.3349, + "step": 21539 + }, + { + "epoch": 0.4311988589445237, + "grad_norm": 1.8422739505767822, + "learning_rate": 6.340431224488107e-06, + "loss": 0.8596, + "step": 21540 + }, + { + "epoch": 0.43121887746165205, + "grad_norm": 2.2061197757720947, + "learning_rate": 6.340118905412944e-06, + "loss": 0.7169, + "step": 21541 + }, + { + "epoch": 0.43123889597878035, + "grad_norm": 2.0584065914154053, + "learning_rate": 6.33980658070421e-06, + "loss": 0.7731, + "step": 21542 + }, + { + "epoch": 0.4312589144959087, + "grad_norm": 2.013645887374878, + "learning_rate": 6.339494250363217e-06, + "loss": 0.8558, + "step": 21543 + }, + { + "epoch": 0.43127893301303705, + "grad_norm": 1.1529446840286255, + "learning_rate": 6.339181914391279e-06, + "loss": 0.2851, + "step": 21544 + }, + { + "epoch": 0.4312989515301654, + "grad_norm": 1.147767424583435, + "learning_rate": 6.338869572789708e-06, + "loss": 0.3137, + "step": 21545 + }, + { + "epoch": 0.43131897004729375, + "grad_norm": 1.0603340864181519, + "learning_rate": 6.338557225559817e-06, + "loss": 0.323, + "step": 21546 + }, + { + "epoch": 0.4313389885644221, + "grad_norm": 1.0183881521224976, + "learning_rate": 6.338244872702919e-06, + "loss": 0.3153, + "step": 21547 + }, + { + "epoch": 0.43135900708155045, + "grad_norm": 1.0877200365066528, + "learning_rate": 6.337932514220329e-06, + "loss": 0.2789, + "step": 21548 + }, + { + "epoch": 0.4313790255986788, + "grad_norm": 1.0849661827087402, + "learning_rate": 6.337620150113358e-06, + "loss": 0.2912, + "step": 21549 + }, + { + "epoch": 0.4313990441158071, + "grad_norm": 1.2698166370391846, + "learning_rate": 6.337307780383321e-06, + "loss": 0.3213, + "step": 21550 + }, + { + "epoch": 0.43141906263293545, + "grad_norm": 1.1720819473266602, + "learning_rate": 6.336995405031527e-06, + "loss": 0.3249, + "step": 21551 + }, + { + "epoch": 0.4314390811500638, + "grad_norm": 2.0750956535339355, + "learning_rate": 6.336683024059295e-06, + "loss": 0.7839, + "step": 21552 + }, + { + "epoch": 0.43145909966719215, + "grad_norm": 2.002166509628296, + "learning_rate": 6.3363706374679325e-06, + "loss": 0.8673, + "step": 21553 + }, + { + "epoch": 0.4314791181843205, + "grad_norm": 1.0904892683029175, + "learning_rate": 6.336058245258757e-06, + "loss": 0.3168, + "step": 21554 + }, + { + "epoch": 0.43149913670144885, + "grad_norm": 1.0778107643127441, + "learning_rate": 6.335745847433081e-06, + "loss": 0.2866, + "step": 21555 + }, + { + "epoch": 0.4315191552185772, + "grad_norm": 1.0348379611968994, + "learning_rate": 6.335433443992216e-06, + "loss": 0.3133, + "step": 21556 + }, + { + "epoch": 0.43153917373570555, + "grad_norm": 1.104428768157959, + "learning_rate": 6.335121034937477e-06, + "loss": 0.314, + "step": 21557 + }, + { + "epoch": 0.43155919225283385, + "grad_norm": 1.901627779006958, + "learning_rate": 6.334808620270174e-06, + "loss": 0.7921, + "step": 21558 + }, + { + "epoch": 0.4315792107699622, + "grad_norm": 1.162976861000061, + "learning_rate": 6.334496199991624e-06, + "loss": 0.3, + "step": 21559 + }, + { + "epoch": 0.43159922928709055, + "grad_norm": 1.0400842428207397, + "learning_rate": 6.33418377410314e-06, + "loss": 0.3349, + "step": 21560 + }, + { + "epoch": 0.4316192478042189, + "grad_norm": 1.0761737823486328, + "learning_rate": 6.333871342606035e-06, + "loss": 0.2987, + "step": 21561 + }, + { + "epoch": 0.43163926632134725, + "grad_norm": 1.889870524406433, + "learning_rate": 6.33355890550162e-06, + "loss": 0.7874, + "step": 21562 + }, + { + "epoch": 0.4316592848384756, + "grad_norm": 1.0209729671478271, + "learning_rate": 6.333246462791212e-06, + "loss": 0.2948, + "step": 21563 + }, + { + "epoch": 0.43167930335560395, + "grad_norm": 1.1336990594863892, + "learning_rate": 6.332934014476122e-06, + "loss": 0.3583, + "step": 21564 + }, + { + "epoch": 0.4316993218727323, + "grad_norm": 1.0081592798233032, + "learning_rate": 6.332621560557665e-06, + "loss": 0.291, + "step": 21565 + }, + { + "epoch": 0.4317193403898606, + "grad_norm": 1.216282606124878, + "learning_rate": 6.332309101037153e-06, + "loss": 0.3248, + "step": 21566 + }, + { + "epoch": 0.43173935890698895, + "grad_norm": 1.1553325653076172, + "learning_rate": 6.331996635915899e-06, + "loss": 0.3203, + "step": 21567 + }, + { + "epoch": 0.4317593774241173, + "grad_norm": 1.1835567951202393, + "learning_rate": 6.33168416519522e-06, + "loss": 0.3273, + "step": 21568 + }, + { + "epoch": 0.43177939594124565, + "grad_norm": 1.096400260925293, + "learning_rate": 6.3313716888764265e-06, + "loss": 0.3401, + "step": 21569 + }, + { + "epoch": 0.431799414458374, + "grad_norm": 1.3023462295532227, + "learning_rate": 6.331059206960833e-06, + "loss": 0.349, + "step": 21570 + }, + { + "epoch": 0.43181943297550235, + "grad_norm": 1.0858832597732544, + "learning_rate": 6.330746719449753e-06, + "loss": 0.3415, + "step": 21571 + }, + { + "epoch": 0.4318394514926307, + "grad_norm": 1.0984677076339722, + "learning_rate": 6.330434226344499e-06, + "loss": 0.3398, + "step": 21572 + }, + { + "epoch": 0.43185947000975905, + "grad_norm": 1.9551653861999512, + "learning_rate": 6.330121727646388e-06, + "loss": 0.7872, + "step": 21573 + }, + { + "epoch": 0.43187948852688735, + "grad_norm": 1.07722008228302, + "learning_rate": 6.329809223356731e-06, + "loss": 0.2908, + "step": 21574 + }, + { + "epoch": 0.4318995070440157, + "grad_norm": 1.0118836164474487, + "learning_rate": 6.329496713476842e-06, + "loss": 0.3034, + "step": 21575 + }, + { + "epoch": 0.43191952556114405, + "grad_norm": 1.1331366300582886, + "learning_rate": 6.329184198008034e-06, + "loss": 0.3199, + "step": 21576 + }, + { + "epoch": 0.4319395440782724, + "grad_norm": 1.0308291912078857, + "learning_rate": 6.3288716769516236e-06, + "loss": 0.2847, + "step": 21577 + }, + { + "epoch": 0.43195956259540075, + "grad_norm": 1.1446765661239624, + "learning_rate": 6.328559150308921e-06, + "loss": 0.3103, + "step": 21578 + }, + { + "epoch": 0.4319795811125291, + "grad_norm": 1.0658013820648193, + "learning_rate": 6.328246618081242e-06, + "loss": 0.254, + "step": 21579 + }, + { + "epoch": 0.43199959962965745, + "grad_norm": 1.9194080829620361, + "learning_rate": 6.3279340802698985e-06, + "loss": 0.8335, + "step": 21580 + }, + { + "epoch": 0.4320196181467858, + "grad_norm": 1.161032795906067, + "learning_rate": 6.3276215368762085e-06, + "loss": 0.3454, + "step": 21581 + }, + { + "epoch": 0.4320396366639141, + "grad_norm": 1.2213331460952759, + "learning_rate": 6.327308987901481e-06, + "loss": 0.3123, + "step": 21582 + }, + { + "epoch": 0.43205965518104245, + "grad_norm": 1.0916942358016968, + "learning_rate": 6.326996433347034e-06, + "loss": 0.2969, + "step": 21583 + }, + { + "epoch": 0.4320796736981708, + "grad_norm": 1.245987892150879, + "learning_rate": 6.326683873214179e-06, + "loss": 0.3552, + "step": 21584 + }, + { + "epoch": 0.43209969221529915, + "grad_norm": 1.1317514181137085, + "learning_rate": 6.326371307504231e-06, + "loss": 0.3686, + "step": 21585 + }, + { + "epoch": 0.4321197107324275, + "grad_norm": 1.448886513710022, + "learning_rate": 6.326058736218504e-06, + "loss": 0.2955, + "step": 21586 + }, + { + "epoch": 0.43213972924955585, + "grad_norm": 1.0780878067016602, + "learning_rate": 6.325746159358309e-06, + "loss": 0.3313, + "step": 21587 + }, + { + "epoch": 0.4321597477666842, + "grad_norm": 1.130001425743103, + "learning_rate": 6.3254335769249645e-06, + "loss": 0.3184, + "step": 21588 + }, + { + "epoch": 0.43217976628381255, + "grad_norm": 1.0730187892913818, + "learning_rate": 6.325120988919782e-06, + "loss": 0.276, + "step": 21589 + }, + { + "epoch": 0.43219978480094084, + "grad_norm": 1.0976581573486328, + "learning_rate": 6.324808395344075e-06, + "loss": 0.317, + "step": 21590 + }, + { + "epoch": 0.4322198033180692, + "grad_norm": 1.183289885520935, + "learning_rate": 6.324495796199159e-06, + "loss": 0.3718, + "step": 21591 + }, + { + "epoch": 0.43223982183519755, + "grad_norm": 1.1899464130401611, + "learning_rate": 6.324183191486349e-06, + "loss": 0.3097, + "step": 21592 + }, + { + "epoch": 0.4322598403523259, + "grad_norm": 1.0568747520446777, + "learning_rate": 6.323870581206957e-06, + "loss": 0.3171, + "step": 21593 + }, + { + "epoch": 0.43227985886945425, + "grad_norm": 1.1769181489944458, + "learning_rate": 6.323557965362298e-06, + "loss": 0.2979, + "step": 21594 + }, + { + "epoch": 0.4322998773865826, + "grad_norm": 1.0319191217422485, + "learning_rate": 6.323245343953686e-06, + "loss": 0.3076, + "step": 21595 + }, + { + "epoch": 0.43231989590371095, + "grad_norm": 1.0755951404571533, + "learning_rate": 6.322932716982435e-06, + "loss": 0.2984, + "step": 21596 + }, + { + "epoch": 0.4323399144208393, + "grad_norm": 2.0184614658355713, + "learning_rate": 6.32262008444986e-06, + "loss": 0.7952, + "step": 21597 + }, + { + "epoch": 0.4323599329379676, + "grad_norm": 1.8652515411376953, + "learning_rate": 6.322307446357274e-06, + "loss": 0.8743, + "step": 21598 + }, + { + "epoch": 0.43237995145509595, + "grad_norm": 1.0755829811096191, + "learning_rate": 6.321994802705994e-06, + "loss": 0.3038, + "step": 21599 + }, + { + "epoch": 0.4323999699722243, + "grad_norm": 1.1268013715744019, + "learning_rate": 6.32168215349733e-06, + "loss": 0.3367, + "step": 21600 + }, + { + "epoch": 0.43241998848935265, + "grad_norm": 1.1641916036605835, + "learning_rate": 6.321369498732602e-06, + "loss": 0.2533, + "step": 21601 + }, + { + "epoch": 0.432440007006481, + "grad_norm": 1.0741392374038696, + "learning_rate": 6.321056838413117e-06, + "loss": 0.2732, + "step": 21602 + }, + { + "epoch": 0.43246002552360935, + "grad_norm": 0.9803313612937927, + "learning_rate": 6.320744172540196e-06, + "loss": 0.2294, + "step": 21603 + }, + { + "epoch": 0.4324800440407377, + "grad_norm": 1.1145397424697876, + "learning_rate": 6.320431501115149e-06, + "loss": 0.3299, + "step": 21604 + }, + { + "epoch": 0.43250006255786605, + "grad_norm": 1.1208641529083252, + "learning_rate": 6.320118824139294e-06, + "loss": 0.274, + "step": 21605 + }, + { + "epoch": 0.43252008107499434, + "grad_norm": 1.1349265575408936, + "learning_rate": 6.319806141613942e-06, + "loss": 0.3012, + "step": 21606 + }, + { + "epoch": 0.4325400995921227, + "grad_norm": 1.0490636825561523, + "learning_rate": 6.3194934535404105e-06, + "loss": 0.3404, + "step": 21607 + }, + { + "epoch": 0.43256011810925105, + "grad_norm": 1.145419955253601, + "learning_rate": 6.319180759920011e-06, + "loss": 0.2808, + "step": 21608 + }, + { + "epoch": 0.4325801366263794, + "grad_norm": 1.9500371217727661, + "learning_rate": 6.31886806075406e-06, + "loss": 0.8219, + "step": 21609 + }, + { + "epoch": 0.43260015514350775, + "grad_norm": 1.0949455499649048, + "learning_rate": 6.3185553560438714e-06, + "loss": 0.2813, + "step": 21610 + }, + { + "epoch": 0.4326201736606361, + "grad_norm": 1.2718828916549683, + "learning_rate": 6.31824264579076e-06, + "loss": 0.3079, + "step": 21611 + }, + { + "epoch": 0.43264019217776445, + "grad_norm": 1.3297712802886963, + "learning_rate": 6.317929929996041e-06, + "loss": 0.3408, + "step": 21612 + }, + { + "epoch": 0.4326602106948928, + "grad_norm": 1.2194768190383911, + "learning_rate": 6.317617208661026e-06, + "loss": 0.3206, + "step": 21613 + }, + { + "epoch": 0.4326802292120211, + "grad_norm": 1.007224440574646, + "learning_rate": 6.317304481787034e-06, + "loss": 0.2895, + "step": 21614 + }, + { + "epoch": 0.43270024772914945, + "grad_norm": 1.896565318107605, + "learning_rate": 6.3169917493753765e-06, + "loss": 0.8245, + "step": 21615 + }, + { + "epoch": 0.4327202662462778, + "grad_norm": 1.203181505203247, + "learning_rate": 6.31667901142737e-06, + "loss": 0.3257, + "step": 21616 + }, + { + "epoch": 0.43274028476340615, + "grad_norm": 1.069155216217041, + "learning_rate": 6.316366267944327e-06, + "loss": 0.3244, + "step": 21617 + }, + { + "epoch": 0.4327603032805345, + "grad_norm": 0.9955441355705261, + "learning_rate": 6.316053518927564e-06, + "loss": 0.3082, + "step": 21618 + }, + { + "epoch": 0.43278032179766285, + "grad_norm": 1.4402382373809814, + "learning_rate": 6.3157407643783955e-06, + "loss": 0.357, + "step": 21619 + }, + { + "epoch": 0.4328003403147912, + "grad_norm": 1.105544924736023, + "learning_rate": 6.315428004298135e-06, + "loss": 0.3232, + "step": 21620 + }, + { + "epoch": 0.43282035883191955, + "grad_norm": 1.176730751991272, + "learning_rate": 6.3151152386881e-06, + "loss": 0.3019, + "step": 21621 + }, + { + "epoch": 0.43284037734904784, + "grad_norm": 1.3201746940612793, + "learning_rate": 6.314802467549602e-06, + "loss": 0.2969, + "step": 21622 + }, + { + "epoch": 0.4328603958661762, + "grad_norm": 1.097487211227417, + "learning_rate": 6.3144896908839584e-06, + "loss": 0.3138, + "step": 21623 + }, + { + "epoch": 0.43288041438330455, + "grad_norm": 0.9717741012573242, + "learning_rate": 6.314176908692482e-06, + "loss": 0.3255, + "step": 21624 + }, + { + "epoch": 0.4329004329004329, + "grad_norm": 1.078083872795105, + "learning_rate": 6.313864120976489e-06, + "loss": 0.3014, + "step": 21625 + }, + { + "epoch": 0.43292045141756125, + "grad_norm": 1.8998420238494873, + "learning_rate": 6.313551327737294e-06, + "loss": 0.8239, + "step": 21626 + }, + { + "epoch": 0.4329404699346896, + "grad_norm": 0.9949568510055542, + "learning_rate": 6.313238528976213e-06, + "loss": 0.3106, + "step": 21627 + }, + { + "epoch": 0.43296048845181795, + "grad_norm": 1.7981741428375244, + "learning_rate": 6.312925724694559e-06, + "loss": 0.8177, + "step": 21628 + }, + { + "epoch": 0.4329805069689463, + "grad_norm": 1.1798127889633179, + "learning_rate": 6.312612914893648e-06, + "loss": 0.3235, + "step": 21629 + }, + { + "epoch": 0.4330005254860746, + "grad_norm": 1.0937455892562866, + "learning_rate": 6.312300099574794e-06, + "loss": 0.2948, + "step": 21630 + }, + { + "epoch": 0.43302054400320295, + "grad_norm": 1.0751737356185913, + "learning_rate": 6.311987278739313e-06, + "loss": 0.2897, + "step": 21631 + }, + { + "epoch": 0.4330405625203313, + "grad_norm": 1.9202806949615479, + "learning_rate": 6.311674452388521e-06, + "loss": 0.7356, + "step": 21632 + }, + { + "epoch": 0.43306058103745965, + "grad_norm": 1.1495070457458496, + "learning_rate": 6.31136162052373e-06, + "loss": 0.3133, + "step": 21633 + }, + { + "epoch": 0.433080599554588, + "grad_norm": 0.9981727600097656, + "learning_rate": 6.311048783146259e-06, + "loss": 0.2658, + "step": 21634 + }, + { + "epoch": 0.43310061807171635, + "grad_norm": 1.1986932754516602, + "learning_rate": 6.31073594025742e-06, + "loss": 0.3913, + "step": 21635 + }, + { + "epoch": 0.4331206365888447, + "grad_norm": 1.1791653633117676, + "learning_rate": 6.310423091858529e-06, + "loss": 0.3391, + "step": 21636 + }, + { + "epoch": 0.43314065510597305, + "grad_norm": 0.9871127605438232, + "learning_rate": 6.310110237950901e-06, + "loss": 0.2473, + "step": 21637 + }, + { + "epoch": 0.43316067362310134, + "grad_norm": 0.9541604518890381, + "learning_rate": 6.309797378535852e-06, + "loss": 0.252, + "step": 21638 + }, + { + "epoch": 0.4331806921402297, + "grad_norm": 1.1194525957107544, + "learning_rate": 6.309484513614697e-06, + "loss": 0.3433, + "step": 21639 + }, + { + "epoch": 0.43320071065735805, + "grad_norm": 1.1285176277160645, + "learning_rate": 6.3091716431887505e-06, + "loss": 0.258, + "step": 21640 + }, + { + "epoch": 0.4332207291744864, + "grad_norm": 1.2968847751617432, + "learning_rate": 6.308858767259329e-06, + "loss": 0.3384, + "step": 21641 + }, + { + "epoch": 0.43324074769161475, + "grad_norm": 1.234476923942566, + "learning_rate": 6.308545885827746e-06, + "loss": 0.2823, + "step": 21642 + }, + { + "epoch": 0.4332607662087431, + "grad_norm": 1.1404081583023071, + "learning_rate": 6.308232998895317e-06, + "loss": 0.3552, + "step": 21643 + }, + { + "epoch": 0.43328078472587145, + "grad_norm": 1.064342975616455, + "learning_rate": 6.3079201064633584e-06, + "loss": 0.3256, + "step": 21644 + }, + { + "epoch": 0.4333008032429998, + "grad_norm": 1.1673190593719482, + "learning_rate": 6.307607208533186e-06, + "loss": 0.331, + "step": 21645 + }, + { + "epoch": 0.4333208217601281, + "grad_norm": 1.1058523654937744, + "learning_rate": 6.307294305106114e-06, + "loss": 0.3468, + "step": 21646 + }, + { + "epoch": 0.43334084027725644, + "grad_norm": 1.0524564981460571, + "learning_rate": 6.3069813961834595e-06, + "loss": 0.3615, + "step": 21647 + }, + { + "epoch": 0.4333608587943848, + "grad_norm": 1.1421095132827759, + "learning_rate": 6.306668481766534e-06, + "loss": 0.265, + "step": 21648 + }, + { + "epoch": 0.43338087731151315, + "grad_norm": 1.9713045358657837, + "learning_rate": 6.3063555618566564e-06, + "loss": 0.7914, + "step": 21649 + }, + { + "epoch": 0.4334008958286415, + "grad_norm": 1.0321424007415771, + "learning_rate": 6.306042636455142e-06, + "loss": 0.3219, + "step": 21650 + }, + { + "epoch": 0.43342091434576985, + "grad_norm": 1.0931545495986938, + "learning_rate": 6.305729705563304e-06, + "loss": 0.3564, + "step": 21651 + }, + { + "epoch": 0.4334409328628982, + "grad_norm": 1.0434213876724243, + "learning_rate": 6.305416769182459e-06, + "loss": 0.3299, + "step": 21652 + }, + { + "epoch": 0.43346095138002655, + "grad_norm": 1.8757226467132568, + "learning_rate": 6.305103827313924e-06, + "loss": 0.8037, + "step": 21653 + }, + { + "epoch": 0.43348096989715484, + "grad_norm": 1.6609619855880737, + "learning_rate": 6.304790879959014e-06, + "loss": 0.7696, + "step": 21654 + }, + { + "epoch": 0.4335009884142832, + "grad_norm": 1.0745688676834106, + "learning_rate": 6.3044779271190425e-06, + "loss": 0.3509, + "step": 21655 + }, + { + "epoch": 0.43352100693141155, + "grad_norm": 1.0108461380004883, + "learning_rate": 6.304164968795326e-06, + "loss": 0.2862, + "step": 21656 + }, + { + "epoch": 0.4335410254485399, + "grad_norm": 1.2319029569625854, + "learning_rate": 6.303852004989182e-06, + "loss": 0.3009, + "step": 21657 + }, + { + "epoch": 0.43356104396566825, + "grad_norm": 1.1540005207061768, + "learning_rate": 6.303539035701925e-06, + "loss": 0.2813, + "step": 21658 + }, + { + "epoch": 0.4335810624827966, + "grad_norm": 0.9708606600761414, + "learning_rate": 6.30322606093487e-06, + "loss": 0.2445, + "step": 21659 + }, + { + "epoch": 0.43360108099992495, + "grad_norm": 1.0237001180648804, + "learning_rate": 6.3029130806893335e-06, + "loss": 0.2361, + "step": 21660 + }, + { + "epoch": 0.4336210995170533, + "grad_norm": 1.8485547304153442, + "learning_rate": 6.30260009496663e-06, + "loss": 0.7093, + "step": 21661 + }, + { + "epoch": 0.4336411180341816, + "grad_norm": 1.0958651304244995, + "learning_rate": 6.302287103768075e-06, + "loss": 0.2706, + "step": 21662 + }, + { + "epoch": 0.43366113655130994, + "grad_norm": 1.1381590366363525, + "learning_rate": 6.301974107094988e-06, + "loss": 0.3097, + "step": 21663 + }, + { + "epoch": 0.4336811550684383, + "grad_norm": 1.074717402458191, + "learning_rate": 6.30166110494868e-06, + "loss": 0.3106, + "step": 21664 + }, + { + "epoch": 0.43370117358556665, + "grad_norm": 1.0447280406951904, + "learning_rate": 6.3013480973304695e-06, + "loss": 0.2744, + "step": 21665 + }, + { + "epoch": 0.433721192102695, + "grad_norm": 1.109208345413208, + "learning_rate": 6.301035084241671e-06, + "loss": 0.2896, + "step": 21666 + }, + { + "epoch": 0.43374121061982335, + "grad_norm": 1.7385926246643066, + "learning_rate": 6.300722065683602e-06, + "loss": 0.7512, + "step": 21667 + }, + { + "epoch": 0.4337612291369517, + "grad_norm": 1.1646983623504639, + "learning_rate": 6.300409041657577e-06, + "loss": 0.343, + "step": 21668 + }, + { + "epoch": 0.43378124765408005, + "grad_norm": 1.857241153717041, + "learning_rate": 6.300096012164913e-06, + "loss": 0.7507, + "step": 21669 + }, + { + "epoch": 0.43380126617120834, + "grad_norm": 1.0662918090820312, + "learning_rate": 6.299782977206924e-06, + "loss": 0.3085, + "step": 21670 + }, + { + "epoch": 0.4338212846883367, + "grad_norm": 1.018295407295227, + "learning_rate": 6.299469936784927e-06, + "loss": 0.3256, + "step": 21671 + }, + { + "epoch": 0.43384130320546505, + "grad_norm": 1.1123751401901245, + "learning_rate": 6.299156890900238e-06, + "loss": 0.3016, + "step": 21672 + }, + { + "epoch": 0.4338613217225934, + "grad_norm": 1.0716711282730103, + "learning_rate": 6.298843839554174e-06, + "loss": 0.3193, + "step": 21673 + }, + { + "epoch": 0.43388134023972175, + "grad_norm": 0.9656370878219604, + "learning_rate": 6.298530782748049e-06, + "loss": 0.3114, + "step": 21674 + }, + { + "epoch": 0.4339013587568501, + "grad_norm": 0.9823309779167175, + "learning_rate": 6.29821772048318e-06, + "loss": 0.3341, + "step": 21675 + }, + { + "epoch": 0.43392137727397845, + "grad_norm": 1.0741382837295532, + "learning_rate": 6.2979046527608844e-06, + "loss": 0.3129, + "step": 21676 + }, + { + "epoch": 0.4339413957911068, + "grad_norm": 1.9440292119979858, + "learning_rate": 6.297591579582476e-06, + "loss": 0.8173, + "step": 21677 + }, + { + "epoch": 0.4339614143082351, + "grad_norm": 1.074712872505188, + "learning_rate": 6.297278500949272e-06, + "loss": 0.3275, + "step": 21678 + }, + { + "epoch": 0.43398143282536344, + "grad_norm": 1.0332757234573364, + "learning_rate": 6.296965416862589e-06, + "loss": 0.2964, + "step": 21679 + }, + { + "epoch": 0.4340014513424918, + "grad_norm": 1.276151418685913, + "learning_rate": 6.296652327323741e-06, + "loss": 0.3176, + "step": 21680 + }, + { + "epoch": 0.43402146985962015, + "grad_norm": 1.205804705619812, + "learning_rate": 6.296339232334046e-06, + "loss": 0.3102, + "step": 21681 + }, + { + "epoch": 0.4340414883767485, + "grad_norm": 0.9815117120742798, + "learning_rate": 6.29602613189482e-06, + "loss": 0.2449, + "step": 21682 + }, + { + "epoch": 0.43406150689387685, + "grad_norm": 1.1134579181671143, + "learning_rate": 6.29571302600738e-06, + "loss": 0.3157, + "step": 21683 + }, + { + "epoch": 0.4340815254110052, + "grad_norm": 1.165272831916809, + "learning_rate": 6.295399914673041e-06, + "loss": 0.3219, + "step": 21684 + }, + { + "epoch": 0.43410154392813355, + "grad_norm": 1.1846824884414673, + "learning_rate": 6.2950867978931195e-06, + "loss": 0.293, + "step": 21685 + }, + { + "epoch": 0.43412156244526184, + "grad_norm": 1.0645899772644043, + "learning_rate": 6.294773675668931e-06, + "loss": 0.336, + "step": 21686 + }, + { + "epoch": 0.4341415809623902, + "grad_norm": 1.9412544965744019, + "learning_rate": 6.294460548001793e-06, + "loss": 0.8052, + "step": 21687 + }, + { + "epoch": 0.43416159947951855, + "grad_norm": 1.1785180568695068, + "learning_rate": 6.294147414893022e-06, + "loss": 0.2563, + "step": 21688 + }, + { + "epoch": 0.4341816179966469, + "grad_norm": 1.1029590368270874, + "learning_rate": 6.293834276343933e-06, + "loss": 0.3264, + "step": 21689 + }, + { + "epoch": 0.43420163651377525, + "grad_norm": 1.1050982475280762, + "learning_rate": 6.2935211323558444e-06, + "loss": 0.3327, + "step": 21690 + }, + { + "epoch": 0.4342216550309036, + "grad_norm": 1.1219267845153809, + "learning_rate": 6.29320798293007e-06, + "loss": 0.334, + "step": 21691 + }, + { + "epoch": 0.43424167354803195, + "grad_norm": 1.2441240549087524, + "learning_rate": 6.2928948280679285e-06, + "loss": 0.2675, + "step": 21692 + }, + { + "epoch": 0.4342616920651603, + "grad_norm": 1.134596347808838, + "learning_rate": 6.292581667770734e-06, + "loss": 0.3034, + "step": 21693 + }, + { + "epoch": 0.4342817105822886, + "grad_norm": 1.1034207344055176, + "learning_rate": 6.292268502039805e-06, + "loss": 0.3047, + "step": 21694 + }, + { + "epoch": 0.43430172909941694, + "grad_norm": 1.8227298259735107, + "learning_rate": 6.291955330876457e-06, + "loss": 0.8231, + "step": 21695 + }, + { + "epoch": 0.4343217476165453, + "grad_norm": 1.823722004890442, + "learning_rate": 6.291642154282007e-06, + "loss": 0.7932, + "step": 21696 + }, + { + "epoch": 0.43434176613367365, + "grad_norm": 1.3207621574401855, + "learning_rate": 6.291328972257772e-06, + "loss": 0.3264, + "step": 21697 + }, + { + "epoch": 0.434361784650802, + "grad_norm": 1.912292242050171, + "learning_rate": 6.291015784805068e-06, + "loss": 0.8735, + "step": 21698 + }, + { + "epoch": 0.43438180316793035, + "grad_norm": 1.161981225013733, + "learning_rate": 6.290702591925211e-06, + "loss": 0.2699, + "step": 21699 + }, + { + "epoch": 0.4344018216850587, + "grad_norm": 1.1078351736068726, + "learning_rate": 6.290389393619517e-06, + "loss": 0.2657, + "step": 21700 + }, + { + "epoch": 0.43442184020218705, + "grad_norm": 1.949592113494873, + "learning_rate": 6.290076189889305e-06, + "loss": 0.7844, + "step": 21701 + }, + { + "epoch": 0.43444185871931534, + "grad_norm": 1.002371072769165, + "learning_rate": 6.28976298073589e-06, + "loss": 0.2642, + "step": 21702 + }, + { + "epoch": 0.4344618772364437, + "grad_norm": 1.097665786743164, + "learning_rate": 6.289449766160589e-06, + "loss": 0.3249, + "step": 21703 + }, + { + "epoch": 0.43448189575357204, + "grad_norm": 0.9490498900413513, + "learning_rate": 6.289136546164719e-06, + "loss": 0.3162, + "step": 21704 + }, + { + "epoch": 0.4345019142707004, + "grad_norm": 1.0951930284500122, + "learning_rate": 6.288823320749596e-06, + "loss": 0.3462, + "step": 21705 + }, + { + "epoch": 0.43452193278782875, + "grad_norm": 1.0586154460906982, + "learning_rate": 6.288510089916538e-06, + "loss": 0.273, + "step": 21706 + }, + { + "epoch": 0.4345419513049571, + "grad_norm": 1.1949716806411743, + "learning_rate": 6.288196853666859e-06, + "loss": 0.3149, + "step": 21707 + }, + { + "epoch": 0.43456196982208545, + "grad_norm": 2.0406575202941895, + "learning_rate": 6.2878836120018785e-06, + "loss": 0.7532, + "step": 21708 + }, + { + "epoch": 0.4345819883392138, + "grad_norm": 1.120281457901001, + "learning_rate": 6.287570364922914e-06, + "loss": 0.3172, + "step": 21709 + }, + { + "epoch": 0.4346020068563421, + "grad_norm": 1.051855206489563, + "learning_rate": 6.287257112431279e-06, + "loss": 0.2875, + "step": 21710 + }, + { + "epoch": 0.43462202537347044, + "grad_norm": 1.1754438877105713, + "learning_rate": 6.286943854528293e-06, + "loss": 0.3532, + "step": 21711 + }, + { + "epoch": 0.4346420438905988, + "grad_norm": 1.0975916385650635, + "learning_rate": 6.2866305912152715e-06, + "loss": 0.2939, + "step": 21712 + }, + { + "epoch": 0.43466206240772715, + "grad_norm": 1.034429907798767, + "learning_rate": 6.286317322493531e-06, + "loss": 0.3084, + "step": 21713 + }, + { + "epoch": 0.4346820809248555, + "grad_norm": 1.066341757774353, + "learning_rate": 6.28600404836439e-06, + "loss": 0.3152, + "step": 21714 + }, + { + "epoch": 0.43470209944198385, + "grad_norm": 1.1264621019363403, + "learning_rate": 6.285690768829165e-06, + "loss": 0.3489, + "step": 21715 + }, + { + "epoch": 0.4347221179591122, + "grad_norm": 1.897503137588501, + "learning_rate": 6.285377483889173e-06, + "loss": 0.781, + "step": 21716 + }, + { + "epoch": 0.43474213647624055, + "grad_norm": 1.7831203937530518, + "learning_rate": 6.28506419354573e-06, + "loss": 0.8145, + "step": 21717 + }, + { + "epoch": 0.43476215499336884, + "grad_norm": 1.110645055770874, + "learning_rate": 6.284750897800155e-06, + "loss": 0.3418, + "step": 21718 + }, + { + "epoch": 0.4347821735104972, + "grad_norm": 1.0902239084243774, + "learning_rate": 6.284437596653763e-06, + "loss": 0.3095, + "step": 21719 + }, + { + "epoch": 0.43480219202762554, + "grad_norm": 1.0691547393798828, + "learning_rate": 6.28412429010787e-06, + "loss": 0.3061, + "step": 21720 + }, + { + "epoch": 0.4348222105447539, + "grad_norm": 1.0954455137252808, + "learning_rate": 6.2838109781637955e-06, + "loss": 0.3065, + "step": 21721 + }, + { + "epoch": 0.43484222906188225, + "grad_norm": 1.1477081775665283, + "learning_rate": 6.283497660822858e-06, + "loss": 0.3394, + "step": 21722 + }, + { + "epoch": 0.4348622475790106, + "grad_norm": 1.1498061418533325, + "learning_rate": 6.283184338086372e-06, + "loss": 0.2915, + "step": 21723 + }, + { + "epoch": 0.43488226609613895, + "grad_norm": 1.2524737119674683, + "learning_rate": 6.2828710099556545e-06, + "loss": 0.3578, + "step": 21724 + }, + { + "epoch": 0.4349022846132673, + "grad_norm": 1.071873426437378, + "learning_rate": 6.282557676432024e-06, + "loss": 0.3037, + "step": 21725 + }, + { + "epoch": 0.4349223031303956, + "grad_norm": 1.2472004890441895, + "learning_rate": 6.282244337516794e-06, + "loss": 0.3017, + "step": 21726 + }, + { + "epoch": 0.43494232164752394, + "grad_norm": 0.9977607131004333, + "learning_rate": 6.281930993211288e-06, + "loss": 0.3052, + "step": 21727 + }, + { + "epoch": 0.4349623401646523, + "grad_norm": 1.0432738065719604, + "learning_rate": 6.281617643516819e-06, + "loss": 0.3049, + "step": 21728 + }, + { + "epoch": 0.43498235868178065, + "grad_norm": 1.116506576538086, + "learning_rate": 6.281304288434706e-06, + "loss": 0.3177, + "step": 21729 + }, + { + "epoch": 0.435002377198909, + "grad_norm": 1.1439300775527954, + "learning_rate": 6.280990927966264e-06, + "loss": 0.353, + "step": 21730 + }, + { + "epoch": 0.43502239571603735, + "grad_norm": 1.3541975021362305, + "learning_rate": 6.280677562112813e-06, + "loss": 0.2998, + "step": 21731 + }, + { + "epoch": 0.4350424142331657, + "grad_norm": 1.0633933544158936, + "learning_rate": 6.280364190875669e-06, + "loss": 0.3356, + "step": 21732 + }, + { + "epoch": 0.43506243275029405, + "grad_norm": 1.1008297204971313, + "learning_rate": 6.280050814256149e-06, + "loss": 0.2906, + "step": 21733 + }, + { + "epoch": 0.43508245126742234, + "grad_norm": 1.2580115795135498, + "learning_rate": 6.2797374322555715e-06, + "loss": 0.2832, + "step": 21734 + }, + { + "epoch": 0.4351024697845507, + "grad_norm": 1.1978861093521118, + "learning_rate": 6.279424044875253e-06, + "loss": 0.3366, + "step": 21735 + }, + { + "epoch": 0.43512248830167904, + "grad_norm": 1.8116661310195923, + "learning_rate": 6.279110652116511e-06, + "loss": 0.81, + "step": 21736 + }, + { + "epoch": 0.4351425068188074, + "grad_norm": 1.0555528402328491, + "learning_rate": 6.2787972539806626e-06, + "loss": 0.3159, + "step": 21737 + }, + { + "epoch": 0.43516252533593575, + "grad_norm": 1.1166942119598389, + "learning_rate": 6.278483850469027e-06, + "loss": 0.3393, + "step": 21738 + }, + { + "epoch": 0.4351825438530641, + "grad_norm": 1.0517714023590088, + "learning_rate": 6.278170441582918e-06, + "loss": 0.2965, + "step": 21739 + }, + { + "epoch": 0.43520256237019245, + "grad_norm": 1.0984070301055908, + "learning_rate": 6.277857027323659e-06, + "loss": 0.3424, + "step": 21740 + }, + { + "epoch": 0.4352225808873208, + "grad_norm": 1.3063596487045288, + "learning_rate": 6.277543607692561e-06, + "loss": 0.3713, + "step": 21741 + }, + { + "epoch": 0.4352425994044491, + "grad_norm": 1.0236389636993408, + "learning_rate": 6.277230182690946e-06, + "loss": 0.333, + "step": 21742 + }, + { + "epoch": 0.43526261792157744, + "grad_norm": 1.1852043867111206, + "learning_rate": 6.276916752320132e-06, + "loss": 0.2923, + "step": 21743 + }, + { + "epoch": 0.4352826364387058, + "grad_norm": 0.9851787686347961, + "learning_rate": 6.276603316581431e-06, + "loss": 0.3397, + "step": 21744 + }, + { + "epoch": 0.43530265495583415, + "grad_norm": 1.9015023708343506, + "learning_rate": 6.276289875476166e-06, + "loss": 0.7944, + "step": 21745 + }, + { + "epoch": 0.4353226734729625, + "grad_norm": 1.0108574628829956, + "learning_rate": 6.275976429005653e-06, + "loss": 0.2797, + "step": 21746 + }, + { + "epoch": 0.43534269199009085, + "grad_norm": 1.1908066272735596, + "learning_rate": 6.2756629771712105e-06, + "loss": 0.3264, + "step": 21747 + }, + { + "epoch": 0.4353627105072192, + "grad_norm": 0.9683756232261658, + "learning_rate": 6.275349519974154e-06, + "loss": 0.3066, + "step": 21748 + }, + { + "epoch": 0.43538272902434755, + "grad_norm": 0.9605647325515747, + "learning_rate": 6.275036057415804e-06, + "loss": 0.2805, + "step": 21749 + }, + { + "epoch": 0.43540274754147584, + "grad_norm": 1.0113426446914673, + "learning_rate": 6.274722589497476e-06, + "loss": 0.3557, + "step": 21750 + }, + { + "epoch": 0.4354227660586042, + "grad_norm": 1.0394706726074219, + "learning_rate": 6.274409116220489e-06, + "loss": 0.31, + "step": 21751 + }, + { + "epoch": 0.43544278457573254, + "grad_norm": 1.874780297279358, + "learning_rate": 6.274095637586159e-06, + "loss": 0.7996, + "step": 21752 + }, + { + "epoch": 0.4354628030928609, + "grad_norm": 1.0855891704559326, + "learning_rate": 6.273782153595806e-06, + "loss": 0.2995, + "step": 21753 + }, + { + "epoch": 0.43548282160998925, + "grad_norm": 1.1549065113067627, + "learning_rate": 6.273468664250748e-06, + "loss": 0.2905, + "step": 21754 + }, + { + "epoch": 0.4355028401271176, + "grad_norm": 0.978138267993927, + "learning_rate": 6.273155169552301e-06, + "loss": 0.242, + "step": 21755 + }, + { + "epoch": 0.43552285864424595, + "grad_norm": 1.989792823791504, + "learning_rate": 6.272841669501784e-06, + "loss": 0.8075, + "step": 21756 + }, + { + "epoch": 0.4355428771613743, + "grad_norm": 1.216888189315796, + "learning_rate": 6.272528164100513e-06, + "loss": 0.3565, + "step": 21757 + }, + { + "epoch": 0.4355628956785026, + "grad_norm": 1.890508770942688, + "learning_rate": 6.272214653349809e-06, + "loss": 0.8058, + "step": 21758 + }, + { + "epoch": 0.43558291419563094, + "grad_norm": 1.0443003177642822, + "learning_rate": 6.271901137250986e-06, + "loss": 0.2983, + "step": 21759 + }, + { + "epoch": 0.4356029327127593, + "grad_norm": 1.0321141481399536, + "learning_rate": 6.271587615805368e-06, + "loss": 0.3147, + "step": 21760 + }, + { + "epoch": 0.43562295122988764, + "grad_norm": 1.9690078496932983, + "learning_rate": 6.271274089014267e-06, + "loss": 0.8021, + "step": 21761 + }, + { + "epoch": 0.435642969747016, + "grad_norm": 1.0437911748886108, + "learning_rate": 6.2709605568790045e-06, + "loss": 0.3372, + "step": 21762 + }, + { + "epoch": 0.43566298826414435, + "grad_norm": 1.1218947172164917, + "learning_rate": 6.2706470194008974e-06, + "loss": 0.305, + "step": 21763 + }, + { + "epoch": 0.4356830067812727, + "grad_norm": 1.0889071226119995, + "learning_rate": 6.270333476581263e-06, + "loss": 0.3191, + "step": 21764 + }, + { + "epoch": 0.43570302529840105, + "grad_norm": 1.0010350942611694, + "learning_rate": 6.270019928421419e-06, + "loss": 0.2665, + "step": 21765 + }, + { + "epoch": 0.43572304381552934, + "grad_norm": 1.9412657022476196, + "learning_rate": 6.269706374922685e-06, + "loss": 0.8207, + "step": 21766 + }, + { + "epoch": 0.4357430623326577, + "grad_norm": 1.0329140424728394, + "learning_rate": 6.26939281608638e-06, + "loss": 0.314, + "step": 21767 + }, + { + "epoch": 0.43576308084978604, + "grad_norm": 1.1937248706817627, + "learning_rate": 6.26907925191382e-06, + "loss": 0.2946, + "step": 21768 + }, + { + "epoch": 0.4357830993669144, + "grad_norm": 1.0155138969421387, + "learning_rate": 6.268765682406324e-06, + "loss": 0.321, + "step": 21769 + }, + { + "epoch": 0.43580311788404275, + "grad_norm": 1.069415807723999, + "learning_rate": 6.268452107565209e-06, + "loss": 0.2827, + "step": 21770 + }, + { + "epoch": 0.4358231364011711, + "grad_norm": 1.2143023014068604, + "learning_rate": 6.268138527391796e-06, + "loss": 0.2772, + "step": 21771 + }, + { + "epoch": 0.43584315491829945, + "grad_norm": 1.332303524017334, + "learning_rate": 6.2678249418874e-06, + "loss": 0.302, + "step": 21772 + }, + { + "epoch": 0.4358631734354278, + "grad_norm": 1.1538234949111938, + "learning_rate": 6.267511351053343e-06, + "loss": 0.2982, + "step": 21773 + }, + { + "epoch": 0.4358831919525561, + "grad_norm": 1.1507407426834106, + "learning_rate": 6.26719775489094e-06, + "loss": 0.3403, + "step": 21774 + }, + { + "epoch": 0.43590321046968444, + "grad_norm": 1.0986342430114746, + "learning_rate": 6.266884153401509e-06, + "loss": 0.3497, + "step": 21775 + }, + { + "epoch": 0.4359232289868128, + "grad_norm": 0.9518199563026428, + "learning_rate": 6.266570546586371e-06, + "loss": 0.2572, + "step": 21776 + }, + { + "epoch": 0.43594324750394114, + "grad_norm": 1.0373622179031372, + "learning_rate": 6.266256934446842e-06, + "loss": 0.281, + "step": 21777 + }, + { + "epoch": 0.4359632660210695, + "grad_norm": 1.0739644765853882, + "learning_rate": 6.265943316984242e-06, + "loss": 0.2999, + "step": 21778 + }, + { + "epoch": 0.43598328453819785, + "grad_norm": 1.0558720827102661, + "learning_rate": 6.265629694199888e-06, + "loss": 0.276, + "step": 21779 + }, + { + "epoch": 0.4360033030553262, + "grad_norm": 1.1679326295852661, + "learning_rate": 6.265316066095102e-06, + "loss": 0.3874, + "step": 21780 + }, + { + "epoch": 0.43602332157245455, + "grad_norm": 1.0785112380981445, + "learning_rate": 6.265002432671197e-06, + "loss": 0.3434, + "step": 21781 + }, + { + "epoch": 0.43604334008958284, + "grad_norm": 1.1253949403762817, + "learning_rate": 6.264688793929494e-06, + "loss": 0.3471, + "step": 21782 + }, + { + "epoch": 0.4360633586067112, + "grad_norm": 1.0443097352981567, + "learning_rate": 6.264375149871312e-06, + "loss": 0.3089, + "step": 21783 + }, + { + "epoch": 0.43608337712383954, + "grad_norm": 1.0509225130081177, + "learning_rate": 6.2640615004979685e-06, + "loss": 0.3131, + "step": 21784 + }, + { + "epoch": 0.4361033956409679, + "grad_norm": 1.3184733390808105, + "learning_rate": 6.263747845810784e-06, + "loss": 0.358, + "step": 21785 + }, + { + "epoch": 0.43612341415809625, + "grad_norm": 1.073900580406189, + "learning_rate": 6.263434185811074e-06, + "loss": 0.3192, + "step": 21786 + }, + { + "epoch": 0.4361434326752246, + "grad_norm": 1.156264066696167, + "learning_rate": 6.263120520500159e-06, + "loss": 0.3223, + "step": 21787 + }, + { + "epoch": 0.43616345119235295, + "grad_norm": 1.074393391609192, + "learning_rate": 6.262806849879357e-06, + "loss": 0.3102, + "step": 21788 + }, + { + "epoch": 0.4361834697094813, + "grad_norm": 1.9230769872665405, + "learning_rate": 6.2624931739499876e-06, + "loss": 0.7853, + "step": 21789 + }, + { + "epoch": 0.4362034882266096, + "grad_norm": 1.1762800216674805, + "learning_rate": 6.2621794927133674e-06, + "loss": 0.3277, + "step": 21790 + }, + { + "epoch": 0.43622350674373794, + "grad_norm": 1.0596550703048706, + "learning_rate": 6.261865806170816e-06, + "loss": 0.3341, + "step": 21791 + }, + { + "epoch": 0.4362435252608663, + "grad_norm": 1.0967397689819336, + "learning_rate": 6.261552114323652e-06, + "loss": 0.354, + "step": 21792 + }, + { + "epoch": 0.43626354377799464, + "grad_norm": 1.1953593492507935, + "learning_rate": 6.261238417173196e-06, + "loss": 0.2666, + "step": 21793 + }, + { + "epoch": 0.436283562295123, + "grad_norm": 1.0640350580215454, + "learning_rate": 6.260924714720765e-06, + "loss": 0.3226, + "step": 21794 + }, + { + "epoch": 0.43630358081225135, + "grad_norm": 1.2594977617263794, + "learning_rate": 6.2606110069676785e-06, + "loss": 0.2853, + "step": 21795 + }, + { + "epoch": 0.4363235993293797, + "grad_norm": 1.1403253078460693, + "learning_rate": 6.260297293915253e-06, + "loss": 0.2598, + "step": 21796 + }, + { + "epoch": 0.43634361784650805, + "grad_norm": 1.1271066665649414, + "learning_rate": 6.259983575564809e-06, + "loss": 0.3539, + "step": 21797 + }, + { + "epoch": 0.43636363636363634, + "grad_norm": 1.0761958360671997, + "learning_rate": 6.259669851917666e-06, + "loss": 0.3093, + "step": 21798 + }, + { + "epoch": 0.4363836548807647, + "grad_norm": 1.154129981994629, + "learning_rate": 6.259356122975141e-06, + "loss": 0.3232, + "step": 21799 + }, + { + "epoch": 0.43640367339789304, + "grad_norm": 2.119957208633423, + "learning_rate": 6.259042388738556e-06, + "loss": 0.782, + "step": 21800 + }, + { + "epoch": 0.4364236919150214, + "grad_norm": 1.0721040964126587, + "learning_rate": 6.258728649209225e-06, + "loss": 0.3124, + "step": 21801 + }, + { + "epoch": 0.43644371043214975, + "grad_norm": 1.8948564529418945, + "learning_rate": 6.258414904388472e-06, + "loss": 0.815, + "step": 21802 + }, + { + "epoch": 0.4364637289492781, + "grad_norm": 1.2332544326782227, + "learning_rate": 6.258101154277612e-06, + "loss": 0.3381, + "step": 21803 + }, + { + "epoch": 0.43648374746640645, + "grad_norm": 1.2137597799301147, + "learning_rate": 6.257787398877966e-06, + "loss": 0.3385, + "step": 21804 + }, + { + "epoch": 0.4365037659835348, + "grad_norm": 1.076693058013916, + "learning_rate": 6.257473638190852e-06, + "loss": 0.2898, + "step": 21805 + }, + { + "epoch": 0.4365237845006631, + "grad_norm": 1.747979998588562, + "learning_rate": 6.257159872217589e-06, + "loss": 0.7722, + "step": 21806 + }, + { + "epoch": 0.43654380301779144, + "grad_norm": 1.149695634841919, + "learning_rate": 6.2568461009594985e-06, + "loss": 0.3205, + "step": 21807 + }, + { + "epoch": 0.4365638215349198, + "grad_norm": 1.1911745071411133, + "learning_rate": 6.2565323244178965e-06, + "loss": 0.2992, + "step": 21808 + }, + { + "epoch": 0.43658384005204814, + "grad_norm": 1.0626860857009888, + "learning_rate": 6.256218542594101e-06, + "loss": 0.3103, + "step": 21809 + }, + { + "epoch": 0.4366038585691765, + "grad_norm": 1.1178967952728271, + "learning_rate": 6.255904755489434e-06, + "loss": 0.3267, + "step": 21810 + }, + { + "epoch": 0.43662387708630485, + "grad_norm": 1.4338507652282715, + "learning_rate": 6.255590963105216e-06, + "loss": 0.3401, + "step": 21811 + }, + { + "epoch": 0.4366438956034332, + "grad_norm": 1.1480705738067627, + "learning_rate": 6.255277165442762e-06, + "loss": 0.3163, + "step": 21812 + }, + { + "epoch": 0.43666391412056155, + "grad_norm": 1.161698818206787, + "learning_rate": 6.254963362503394e-06, + "loss": 0.3787, + "step": 21813 + }, + { + "epoch": 0.43668393263768984, + "grad_norm": 1.1567150354385376, + "learning_rate": 6.254649554288429e-06, + "loss": 0.341, + "step": 21814 + }, + { + "epoch": 0.4367039511548182, + "grad_norm": 1.058635950088501, + "learning_rate": 6.254335740799189e-06, + "loss": 0.3503, + "step": 21815 + }, + { + "epoch": 0.43672396967194654, + "grad_norm": 1.0225520133972168, + "learning_rate": 6.254021922036989e-06, + "loss": 0.282, + "step": 21816 + }, + { + "epoch": 0.4367439881890749, + "grad_norm": 1.0546138286590576, + "learning_rate": 6.253708098003152e-06, + "loss": 0.3203, + "step": 21817 + }, + { + "epoch": 0.43676400670620324, + "grad_norm": 1.2254527807235718, + "learning_rate": 6.253394268698995e-06, + "loss": 0.3298, + "step": 21818 + }, + { + "epoch": 0.4367840252233316, + "grad_norm": 1.0834466218948364, + "learning_rate": 6.25308043412584e-06, + "loss": 0.3158, + "step": 21819 + }, + { + "epoch": 0.43680404374045995, + "grad_norm": 1.0394251346588135, + "learning_rate": 6.252766594285004e-06, + "loss": 0.3224, + "step": 21820 + }, + { + "epoch": 0.43682406225758824, + "grad_norm": 1.152345895767212, + "learning_rate": 6.252452749177805e-06, + "loss": 0.3184, + "step": 21821 + }, + { + "epoch": 0.4368440807747166, + "grad_norm": 1.0504753589630127, + "learning_rate": 6.2521388988055666e-06, + "loss": 0.2699, + "step": 21822 + }, + { + "epoch": 0.43686409929184494, + "grad_norm": 1.0920956134796143, + "learning_rate": 6.2518250431696034e-06, + "loss": 0.3455, + "step": 21823 + }, + { + "epoch": 0.4368841178089733, + "grad_norm": 1.1250072717666626, + "learning_rate": 6.251511182271239e-06, + "loss": 0.3505, + "step": 21824 + }, + { + "epoch": 0.43690413632610164, + "grad_norm": 1.1205012798309326, + "learning_rate": 6.25119731611179e-06, + "loss": 0.3073, + "step": 21825 + }, + { + "epoch": 0.43692415484323, + "grad_norm": 1.0520695447921753, + "learning_rate": 6.250883444692577e-06, + "loss": 0.2858, + "step": 21826 + }, + { + "epoch": 0.43694417336035835, + "grad_norm": 1.7489005327224731, + "learning_rate": 6.25056956801492e-06, + "loss": 0.7629, + "step": 21827 + }, + { + "epoch": 0.4369641918774867, + "grad_norm": 1.0582720041275024, + "learning_rate": 6.250255686080137e-06, + "loss": 0.3166, + "step": 21828 + }, + { + "epoch": 0.436984210394615, + "grad_norm": 1.023468017578125, + "learning_rate": 6.2499417988895475e-06, + "loss": 0.3462, + "step": 21829 + }, + { + "epoch": 0.43700422891174334, + "grad_norm": 1.2518771886825562, + "learning_rate": 6.249627906444472e-06, + "loss": 0.2686, + "step": 21830 + }, + { + "epoch": 0.4370242474288717, + "grad_norm": 1.2034282684326172, + "learning_rate": 6.24931400874623e-06, + "loss": 0.2582, + "step": 21831 + }, + { + "epoch": 0.43704426594600004, + "grad_norm": 0.9968280792236328, + "learning_rate": 6.249000105796141e-06, + "loss": 0.3433, + "step": 21832 + }, + { + "epoch": 0.4370642844631284, + "grad_norm": 1.0866811275482178, + "learning_rate": 6.248686197595525e-06, + "loss": 0.3149, + "step": 21833 + }, + { + "epoch": 0.43708430298025674, + "grad_norm": 1.1786643266677856, + "learning_rate": 6.2483722841457e-06, + "loss": 0.3292, + "step": 21834 + }, + { + "epoch": 0.4371043214973851, + "grad_norm": 1.1717623472213745, + "learning_rate": 6.2480583654479856e-06, + "loss": 0.3404, + "step": 21835 + }, + { + "epoch": 0.43712434001451345, + "grad_norm": 1.0316681861877441, + "learning_rate": 6.2477444415037035e-06, + "loss": 0.3045, + "step": 21836 + }, + { + "epoch": 0.43714435853164174, + "grad_norm": 1.0546804666519165, + "learning_rate": 6.247430512314172e-06, + "loss": 0.2782, + "step": 21837 + }, + { + "epoch": 0.4371643770487701, + "grad_norm": 1.0573477745056152, + "learning_rate": 6.247116577880712e-06, + "loss": 0.2872, + "step": 21838 + }, + { + "epoch": 0.43718439556589844, + "grad_norm": 1.2404135465621948, + "learning_rate": 6.246802638204641e-06, + "loss": 0.3596, + "step": 21839 + }, + { + "epoch": 0.4372044140830268, + "grad_norm": 1.037121057510376, + "learning_rate": 6.24648869328728e-06, + "loss": 0.3164, + "step": 21840 + }, + { + "epoch": 0.43722443260015514, + "grad_norm": 1.169166088104248, + "learning_rate": 6.24617474312995e-06, + "loss": 0.3133, + "step": 21841 + }, + { + "epoch": 0.4372444511172835, + "grad_norm": 1.0992395877838135, + "learning_rate": 6.245860787733968e-06, + "loss": 0.315, + "step": 21842 + }, + { + "epoch": 0.43726446963441185, + "grad_norm": 1.0475388765335083, + "learning_rate": 6.245546827100656e-06, + "loss": 0.2961, + "step": 21843 + }, + { + "epoch": 0.4372844881515402, + "grad_norm": 1.218221664428711, + "learning_rate": 6.245232861231334e-06, + "loss": 0.2686, + "step": 21844 + }, + { + "epoch": 0.4373045066686685, + "grad_norm": 1.0604801177978516, + "learning_rate": 6.244918890127321e-06, + "loss": 0.3308, + "step": 21845 + }, + { + "epoch": 0.43732452518579684, + "grad_norm": 1.0387670993804932, + "learning_rate": 6.244604913789937e-06, + "loss": 0.3252, + "step": 21846 + }, + { + "epoch": 0.4373445437029252, + "grad_norm": 1.0988892316818237, + "learning_rate": 6.244290932220501e-06, + "loss": 0.2929, + "step": 21847 + }, + { + "epoch": 0.43736456222005354, + "grad_norm": 1.9059759378433228, + "learning_rate": 6.243976945420333e-06, + "loss": 0.7877, + "step": 21848 + }, + { + "epoch": 0.4373845807371819, + "grad_norm": 1.2853846549987793, + "learning_rate": 6.243662953390755e-06, + "loss": 0.3351, + "step": 21849 + }, + { + "epoch": 0.43740459925431024, + "grad_norm": 1.0712429285049438, + "learning_rate": 6.243348956133084e-06, + "loss": 0.3084, + "step": 21850 + }, + { + "epoch": 0.4374246177714386, + "grad_norm": 1.169498085975647, + "learning_rate": 6.243034953648644e-06, + "loss": 0.3005, + "step": 21851 + }, + { + "epoch": 0.43744463628856695, + "grad_norm": 1.1407310962677002, + "learning_rate": 6.242720945938751e-06, + "loss": 0.2971, + "step": 21852 + }, + { + "epoch": 0.43746465480569524, + "grad_norm": 1.0292521715164185, + "learning_rate": 6.242406933004727e-06, + "loss": 0.293, + "step": 21853 + }, + { + "epoch": 0.4374846733228236, + "grad_norm": 1.9499634504318237, + "learning_rate": 6.24209291484789e-06, + "loss": 0.7918, + "step": 21854 + }, + { + "epoch": 0.43750469183995194, + "grad_norm": 2.1221323013305664, + "learning_rate": 6.241778891469563e-06, + "loss": 0.842, + "step": 21855 + }, + { + "epoch": 0.4375247103570803, + "grad_norm": 1.152616262435913, + "learning_rate": 6.241464862871065e-06, + "loss": 0.3488, + "step": 21856 + }, + { + "epoch": 0.43754472887420864, + "grad_norm": 2.0037851333618164, + "learning_rate": 6.241150829053716e-06, + "loss": 0.8042, + "step": 21857 + }, + { + "epoch": 0.437564747391337, + "grad_norm": 1.1818653345108032, + "learning_rate": 6.240836790018836e-06, + "loss": 0.3368, + "step": 21858 + }, + { + "epoch": 0.43758476590846535, + "grad_norm": 1.3623809814453125, + "learning_rate": 6.240522745767744e-06, + "loss": 0.3014, + "step": 21859 + }, + { + "epoch": 0.4376047844255937, + "grad_norm": 1.1110295057296753, + "learning_rate": 6.2402086963017625e-06, + "loss": 0.3151, + "step": 21860 + }, + { + "epoch": 0.437624802942722, + "grad_norm": 1.1174761056900024, + "learning_rate": 6.23989464162221e-06, + "loss": 0.3364, + "step": 21861 + }, + { + "epoch": 0.43764482145985034, + "grad_norm": 1.0086909532546997, + "learning_rate": 6.239580581730408e-06, + "loss": 0.3008, + "step": 21862 + }, + { + "epoch": 0.4376648399769787, + "grad_norm": 1.0316418409347534, + "learning_rate": 6.239266516627676e-06, + "loss": 0.3248, + "step": 21863 + }, + { + "epoch": 0.43768485849410704, + "grad_norm": 1.2267082929611206, + "learning_rate": 6.238952446315334e-06, + "loss": 0.3258, + "step": 21864 + }, + { + "epoch": 0.4377048770112354, + "grad_norm": 1.0993880033493042, + "learning_rate": 6.238638370794701e-06, + "loss": 0.3406, + "step": 21865 + }, + { + "epoch": 0.43772489552836374, + "grad_norm": 1.0379841327667236, + "learning_rate": 6.238324290067102e-06, + "loss": 0.3171, + "step": 21866 + }, + { + "epoch": 0.4377449140454921, + "grad_norm": 1.1249905824661255, + "learning_rate": 6.238010204133851e-06, + "loss": 0.3104, + "step": 21867 + }, + { + "epoch": 0.43776493256262045, + "grad_norm": 0.9897502660751343, + "learning_rate": 6.237696112996272e-06, + "loss": 0.2717, + "step": 21868 + }, + { + "epoch": 0.43778495107974874, + "grad_norm": 1.0263231992721558, + "learning_rate": 6.237382016655686e-06, + "loss": 0.3098, + "step": 21869 + }, + { + "epoch": 0.4378049695968771, + "grad_norm": 1.1437973976135254, + "learning_rate": 6.237067915113413e-06, + "loss": 0.3063, + "step": 21870 + }, + { + "epoch": 0.43782498811400544, + "grad_norm": 1.1341511011123657, + "learning_rate": 6.236753808370772e-06, + "loss": 0.3239, + "step": 21871 + }, + { + "epoch": 0.4378450066311338, + "grad_norm": 0.9704879522323608, + "learning_rate": 6.236439696429083e-06, + "loss": 0.2603, + "step": 21872 + }, + { + "epoch": 0.43786502514826214, + "grad_norm": 1.0743359327316284, + "learning_rate": 6.236125579289668e-06, + "loss": 0.3242, + "step": 21873 + }, + { + "epoch": 0.4378850436653905, + "grad_norm": 1.2247956991195679, + "learning_rate": 6.235811456953848e-06, + "loss": 0.3572, + "step": 21874 + }, + { + "epoch": 0.43790506218251884, + "grad_norm": 1.187949538230896, + "learning_rate": 6.235497329422943e-06, + "loss": 0.3327, + "step": 21875 + }, + { + "epoch": 0.4379250806996472, + "grad_norm": 1.0399843454360962, + "learning_rate": 6.235183196698272e-06, + "loss": 0.3191, + "step": 21876 + }, + { + "epoch": 0.4379450992167755, + "grad_norm": 1.090177059173584, + "learning_rate": 6.234869058781157e-06, + "loss": 0.3492, + "step": 21877 + }, + { + "epoch": 0.43796511773390384, + "grad_norm": 1.1331571340560913, + "learning_rate": 6.234554915672919e-06, + "loss": 0.2929, + "step": 21878 + }, + { + "epoch": 0.4379851362510322, + "grad_norm": 0.9569219350814819, + "learning_rate": 6.234240767374876e-06, + "loss": 0.321, + "step": 21879 + }, + { + "epoch": 0.43800515476816054, + "grad_norm": 1.2071257829666138, + "learning_rate": 6.233926613888352e-06, + "loss": 0.3053, + "step": 21880 + }, + { + "epoch": 0.4380251732852889, + "grad_norm": 1.3115452527999878, + "learning_rate": 6.2336124552146634e-06, + "loss": 0.3057, + "step": 21881 + }, + { + "epoch": 0.43804519180241724, + "grad_norm": 1.9621527194976807, + "learning_rate": 6.233298291355136e-06, + "loss": 0.7609, + "step": 21882 + }, + { + "epoch": 0.4380652103195456, + "grad_norm": 1.0724968910217285, + "learning_rate": 6.232984122311087e-06, + "loss": 0.2599, + "step": 21883 + }, + { + "epoch": 0.43808522883667395, + "grad_norm": 1.15059494972229, + "learning_rate": 6.2326699480838405e-06, + "loss": 0.3018, + "step": 21884 + }, + { + "epoch": 0.43810524735380224, + "grad_norm": 1.134423017501831, + "learning_rate": 6.232355768674713e-06, + "loss": 0.3454, + "step": 21885 + }, + { + "epoch": 0.4381252658709306, + "grad_norm": 0.9684606194496155, + "learning_rate": 6.232041584085026e-06, + "loss": 0.2826, + "step": 21886 + }, + { + "epoch": 0.43814528438805894, + "grad_norm": 1.1646571159362793, + "learning_rate": 6.231727394316102e-06, + "loss": 0.3088, + "step": 21887 + }, + { + "epoch": 0.4381653029051873, + "grad_norm": 1.1115772724151611, + "learning_rate": 6.231413199369262e-06, + "loss": 0.3029, + "step": 21888 + }, + { + "epoch": 0.43818532142231564, + "grad_norm": 1.1533007621765137, + "learning_rate": 6.231098999245826e-06, + "loss": 0.3288, + "step": 21889 + }, + { + "epoch": 0.438205339939444, + "grad_norm": 1.100042462348938, + "learning_rate": 6.230784793947113e-06, + "loss": 0.3354, + "step": 21890 + }, + { + "epoch": 0.43822535845657234, + "grad_norm": 1.1384625434875488, + "learning_rate": 6.230470583474448e-06, + "loss": 0.3143, + "step": 21891 + }, + { + "epoch": 0.4382453769737007, + "grad_norm": 1.1106419563293457, + "learning_rate": 6.230156367829148e-06, + "loss": 0.2777, + "step": 21892 + }, + { + "epoch": 0.438265395490829, + "grad_norm": 1.0918513536453247, + "learning_rate": 6.229842147012535e-06, + "loss": 0.33, + "step": 21893 + }, + { + "epoch": 0.43828541400795734, + "grad_norm": 1.1393804550170898, + "learning_rate": 6.2295279210259296e-06, + "loss": 0.3171, + "step": 21894 + }, + { + "epoch": 0.4383054325250857, + "grad_norm": 1.0457735061645508, + "learning_rate": 6.2292136898706554e-06, + "loss": 0.3483, + "step": 21895 + }, + { + "epoch": 0.43832545104221404, + "grad_norm": 1.0140100717544556, + "learning_rate": 6.2288994535480305e-06, + "loss": 0.334, + "step": 21896 + }, + { + "epoch": 0.4383454695593424, + "grad_norm": 1.069663643836975, + "learning_rate": 6.228585212059377e-06, + "loss": 0.2952, + "step": 21897 + }, + { + "epoch": 0.43836548807647074, + "grad_norm": 1.062785267829895, + "learning_rate": 6.228270965406016e-06, + "loss": 0.2993, + "step": 21898 + }, + { + "epoch": 0.4383855065935991, + "grad_norm": 1.856436014175415, + "learning_rate": 6.227956713589267e-06, + "loss": 0.8917, + "step": 21899 + }, + { + "epoch": 0.43840552511072745, + "grad_norm": 0.972212016582489, + "learning_rate": 6.227642456610453e-06, + "loss": 0.2816, + "step": 21900 + }, + { + "epoch": 0.43842554362785574, + "grad_norm": 1.002415657043457, + "learning_rate": 6.2273281944708945e-06, + "loss": 0.2404, + "step": 21901 + }, + { + "epoch": 0.4384455621449841, + "grad_norm": 1.10066819190979, + "learning_rate": 6.227013927171912e-06, + "loss": 0.3437, + "step": 21902 + }, + { + "epoch": 0.43846558066211244, + "grad_norm": 1.1566513776779175, + "learning_rate": 6.226699654714827e-06, + "loss": 0.3435, + "step": 21903 + }, + { + "epoch": 0.4384855991792408, + "grad_norm": 1.140537977218628, + "learning_rate": 6.226385377100962e-06, + "loss": 0.3078, + "step": 21904 + }, + { + "epoch": 0.43850561769636914, + "grad_norm": 1.148288607597351, + "learning_rate": 6.2260710943316345e-06, + "loss": 0.308, + "step": 21905 + }, + { + "epoch": 0.4385256362134975, + "grad_norm": 1.1031548976898193, + "learning_rate": 6.225756806408168e-06, + "loss": 0.3264, + "step": 21906 + }, + { + "epoch": 0.43854565473062584, + "grad_norm": 1.174123764038086, + "learning_rate": 6.225442513331885e-06, + "loss": 0.2417, + "step": 21907 + }, + { + "epoch": 0.4385656732477542, + "grad_norm": 1.099983811378479, + "learning_rate": 6.225128215104106e-06, + "loss": 0.3239, + "step": 21908 + }, + { + "epoch": 0.4385856917648825, + "grad_norm": 1.8908342123031616, + "learning_rate": 6.224813911726151e-06, + "loss": 0.8516, + "step": 21909 + }, + { + "epoch": 0.43860571028201084, + "grad_norm": 0.9737906455993652, + "learning_rate": 6.224499603199341e-06, + "loss": 0.3035, + "step": 21910 + }, + { + "epoch": 0.4386257287991392, + "grad_norm": 1.2056316137313843, + "learning_rate": 6.224185289525e-06, + "loss": 0.2924, + "step": 21911 + }, + { + "epoch": 0.43864574731626754, + "grad_norm": 1.11431086063385, + "learning_rate": 6.2238709707044445e-06, + "loss": 0.3265, + "step": 21912 + }, + { + "epoch": 0.4386657658333959, + "grad_norm": 1.1657395362854004, + "learning_rate": 6.223556646739002e-06, + "loss": 0.3309, + "step": 21913 + }, + { + "epoch": 0.43868578435052424, + "grad_norm": 1.2404370307922363, + "learning_rate": 6.2232423176299884e-06, + "loss": 0.3557, + "step": 21914 + }, + { + "epoch": 0.4387058028676526, + "grad_norm": 1.171296238899231, + "learning_rate": 6.222927983378729e-06, + "loss": 0.3123, + "step": 21915 + }, + { + "epoch": 0.43872582138478095, + "grad_norm": 1.1010791063308716, + "learning_rate": 6.222613643986543e-06, + "loss": 0.3089, + "step": 21916 + }, + { + "epoch": 0.43874583990190924, + "grad_norm": 1.0736782550811768, + "learning_rate": 6.222299299454752e-06, + "loss": 0.286, + "step": 21917 + }, + { + "epoch": 0.4387658584190376, + "grad_norm": 1.2341458797454834, + "learning_rate": 6.221984949784677e-06, + "loss": 0.3076, + "step": 21918 + }, + { + "epoch": 0.43878587693616594, + "grad_norm": 1.5133910179138184, + "learning_rate": 6.221670594977641e-06, + "loss": 0.3151, + "step": 21919 + }, + { + "epoch": 0.4388058954532943, + "grad_norm": 1.1895980834960938, + "learning_rate": 6.2213562350349655e-06, + "loss": 0.2936, + "step": 21920 + }, + { + "epoch": 0.43882591397042264, + "grad_norm": 1.122004747390747, + "learning_rate": 6.221041869957971e-06, + "loss": 0.327, + "step": 21921 + }, + { + "epoch": 0.438845932487551, + "grad_norm": 1.2121881246566772, + "learning_rate": 6.2207274997479796e-06, + "loss": 0.3318, + "step": 21922 + }, + { + "epoch": 0.43886595100467934, + "grad_norm": 1.112403392791748, + "learning_rate": 6.22041312440631e-06, + "loss": 0.2972, + "step": 21923 + }, + { + "epoch": 0.4388859695218077, + "grad_norm": 1.0457243919372559, + "learning_rate": 6.220098743934289e-06, + "loss": 0.321, + "step": 21924 + }, + { + "epoch": 0.438905988038936, + "grad_norm": 1.2962950468063354, + "learning_rate": 6.219784358333234e-06, + "loss": 0.3427, + "step": 21925 + }, + { + "epoch": 0.43892600655606434, + "grad_norm": 1.0094159841537476, + "learning_rate": 6.219469967604469e-06, + "loss": 0.2856, + "step": 21926 + }, + { + "epoch": 0.4389460250731927, + "grad_norm": 1.2227656841278076, + "learning_rate": 6.219155571749314e-06, + "loss": 0.3525, + "step": 21927 + }, + { + "epoch": 0.43896604359032104, + "grad_norm": 0.9710201025009155, + "learning_rate": 6.218841170769092e-06, + "loss": 0.286, + "step": 21928 + }, + { + "epoch": 0.4389860621074494, + "grad_norm": 1.1861684322357178, + "learning_rate": 6.218526764665122e-06, + "loss": 0.3712, + "step": 21929 + }, + { + "epoch": 0.43900608062457774, + "grad_norm": 1.1556998491287231, + "learning_rate": 6.21821235343873e-06, + "loss": 0.2819, + "step": 21930 + }, + { + "epoch": 0.4390260991417061, + "grad_norm": 1.240246295928955, + "learning_rate": 6.2178979370912354e-06, + "loss": 0.3395, + "step": 21931 + }, + { + "epoch": 0.43904611765883444, + "grad_norm": 1.2020736932754517, + "learning_rate": 6.217583515623958e-06, + "loss": 0.311, + "step": 21932 + }, + { + "epoch": 0.43906613617596274, + "grad_norm": 1.103920817375183, + "learning_rate": 6.217269089038224e-06, + "loss": 0.2983, + "step": 21933 + }, + { + "epoch": 0.4390861546930911, + "grad_norm": 0.9763355851173401, + "learning_rate": 6.2169546573353515e-06, + "loss": 0.3033, + "step": 21934 + }, + { + "epoch": 0.43910617321021944, + "grad_norm": 0.9860326051712036, + "learning_rate": 6.2166402205166635e-06, + "loss": 0.2615, + "step": 21935 + }, + { + "epoch": 0.4391261917273478, + "grad_norm": 1.0963002443313599, + "learning_rate": 6.216325778583482e-06, + "loss": 0.3226, + "step": 21936 + }, + { + "epoch": 0.43914621024447614, + "grad_norm": 1.10197114944458, + "learning_rate": 6.216011331537128e-06, + "loss": 0.292, + "step": 21937 + }, + { + "epoch": 0.4391662287616045, + "grad_norm": 1.137464165687561, + "learning_rate": 6.215696879378924e-06, + "loss": 0.281, + "step": 21938 + }, + { + "epoch": 0.43918624727873284, + "grad_norm": 1.0775837898254395, + "learning_rate": 6.215382422110193e-06, + "loss": 0.2872, + "step": 21939 + }, + { + "epoch": 0.4392062657958612, + "grad_norm": 1.1037209033966064, + "learning_rate": 6.215067959732255e-06, + "loss": 0.3054, + "step": 21940 + }, + { + "epoch": 0.4392262843129895, + "grad_norm": 1.0968893766403198, + "learning_rate": 6.214753492246434e-06, + "loss": 0.2691, + "step": 21941 + }, + { + "epoch": 0.43924630283011784, + "grad_norm": 1.913110375404358, + "learning_rate": 6.21443901965405e-06, + "loss": 0.7613, + "step": 21942 + }, + { + "epoch": 0.4392663213472462, + "grad_norm": 1.1512762308120728, + "learning_rate": 6.214124541956426e-06, + "loss": 0.3239, + "step": 21943 + }, + { + "epoch": 0.43928633986437454, + "grad_norm": 1.0598978996276855, + "learning_rate": 6.213810059154882e-06, + "loss": 0.3147, + "step": 21944 + }, + { + "epoch": 0.4393063583815029, + "grad_norm": 1.0103747844696045, + "learning_rate": 6.213495571250743e-06, + "loss": 0.3361, + "step": 21945 + }, + { + "epoch": 0.43932637689863124, + "grad_norm": 1.9937822818756104, + "learning_rate": 6.213181078245331e-06, + "loss": 0.7752, + "step": 21946 + }, + { + "epoch": 0.4393463954157596, + "grad_norm": 1.1140152215957642, + "learning_rate": 6.212866580139964e-06, + "loss": 0.3779, + "step": 21947 + }, + { + "epoch": 0.43936641393288794, + "grad_norm": 1.1078840494155884, + "learning_rate": 6.21255207693597e-06, + "loss": 0.3235, + "step": 21948 + }, + { + "epoch": 0.43938643245001624, + "grad_norm": 1.2138482332229614, + "learning_rate": 6.212237568634666e-06, + "loss": 0.3545, + "step": 21949 + }, + { + "epoch": 0.4394064509671446, + "grad_norm": 0.9541086554527283, + "learning_rate": 6.211923055237378e-06, + "loss": 0.313, + "step": 21950 + }, + { + "epoch": 0.43942646948427294, + "grad_norm": 1.968567132949829, + "learning_rate": 6.211608536745424e-06, + "loss": 0.8133, + "step": 21951 + }, + { + "epoch": 0.4394464880014013, + "grad_norm": 1.2756714820861816, + "learning_rate": 6.211294013160129e-06, + "loss": 0.3208, + "step": 21952 + }, + { + "epoch": 0.43946650651852964, + "grad_norm": 1.0340341329574585, + "learning_rate": 6.210979484482816e-06, + "loss": 0.3205, + "step": 21953 + }, + { + "epoch": 0.439486525035658, + "grad_norm": 1.0699084997177124, + "learning_rate": 6.210664950714804e-06, + "loss": 0.2992, + "step": 21954 + }, + { + "epoch": 0.43950654355278634, + "grad_norm": 1.0528241395950317, + "learning_rate": 6.210350411857419e-06, + "loss": 0.3207, + "step": 21955 + }, + { + "epoch": 0.4395265620699147, + "grad_norm": 1.1447640657424927, + "learning_rate": 6.21003586791198e-06, + "loss": 0.2999, + "step": 21956 + }, + { + "epoch": 0.439546580587043, + "grad_norm": 1.1000033617019653, + "learning_rate": 6.20972131887981e-06, + "loss": 0.3239, + "step": 21957 + }, + { + "epoch": 0.43956659910417134, + "grad_norm": 1.2371758222579956, + "learning_rate": 6.209406764762233e-06, + "loss": 0.353, + "step": 21958 + }, + { + "epoch": 0.4395866176212997, + "grad_norm": 1.1612648963928223, + "learning_rate": 6.209092205560571e-06, + "loss": 0.3101, + "step": 21959 + }, + { + "epoch": 0.43960663613842804, + "grad_norm": 0.9660787582397461, + "learning_rate": 6.208777641276143e-06, + "loss": 0.269, + "step": 21960 + }, + { + "epoch": 0.4396266546555564, + "grad_norm": 1.1856645345687866, + "learning_rate": 6.208463071910277e-06, + "loss": 0.2601, + "step": 21961 + }, + { + "epoch": 0.43964667317268474, + "grad_norm": 1.0866122245788574, + "learning_rate": 6.208148497464292e-06, + "loss": 0.3525, + "step": 21962 + }, + { + "epoch": 0.4396666916898131, + "grad_norm": 1.1944111585617065, + "learning_rate": 6.207833917939509e-06, + "loss": 0.2837, + "step": 21963 + }, + { + "epoch": 0.43968671020694144, + "grad_norm": 1.0712273120880127, + "learning_rate": 6.207519333337252e-06, + "loss": 0.3267, + "step": 21964 + }, + { + "epoch": 0.43970672872406974, + "grad_norm": 1.046280026435852, + "learning_rate": 6.207204743658845e-06, + "loss": 0.2944, + "step": 21965 + }, + { + "epoch": 0.4397267472411981, + "grad_norm": 1.9545221328735352, + "learning_rate": 6.206890148905609e-06, + "loss": 0.783, + "step": 21966 + }, + { + "epoch": 0.43974676575832644, + "grad_norm": 1.048879623413086, + "learning_rate": 6.206575549078865e-06, + "loss": 0.3078, + "step": 21967 + }, + { + "epoch": 0.4397667842754548, + "grad_norm": 1.0363540649414062, + "learning_rate": 6.2062609441799386e-06, + "loss": 0.2811, + "step": 21968 + }, + { + "epoch": 0.43978680279258314, + "grad_norm": 1.2213191986083984, + "learning_rate": 6.20594633421015e-06, + "loss": 0.3481, + "step": 21969 + }, + { + "epoch": 0.4398068213097115, + "grad_norm": 1.0348596572875977, + "learning_rate": 6.205631719170822e-06, + "loss": 0.3645, + "step": 21970 + }, + { + "epoch": 0.43982683982683984, + "grad_norm": 2.0097029209136963, + "learning_rate": 6.2053170990632784e-06, + "loss": 0.8035, + "step": 21971 + }, + { + "epoch": 0.4398468583439682, + "grad_norm": 1.3463667631149292, + "learning_rate": 6.2050024738888415e-06, + "loss": 0.3062, + "step": 21972 + }, + { + "epoch": 0.4398668768610965, + "grad_norm": 1.0574017763137817, + "learning_rate": 6.204687843648833e-06, + "loss": 0.2954, + "step": 21973 + }, + { + "epoch": 0.43988689537822484, + "grad_norm": 1.05942702293396, + "learning_rate": 6.204373208344576e-06, + "loss": 0.2332, + "step": 21974 + }, + { + "epoch": 0.4399069138953532, + "grad_norm": 1.0798476934432983, + "learning_rate": 6.2040585679773945e-06, + "loss": 0.3148, + "step": 21975 + }, + { + "epoch": 0.43992693241248154, + "grad_norm": 1.165157675743103, + "learning_rate": 6.203743922548608e-06, + "loss": 0.3496, + "step": 21976 + }, + { + "epoch": 0.4399469509296099, + "grad_norm": 1.0318771600723267, + "learning_rate": 6.203429272059541e-06, + "loss": 0.334, + "step": 21977 + }, + { + "epoch": 0.43996696944673824, + "grad_norm": 1.0327788591384888, + "learning_rate": 6.203114616511518e-06, + "loss": 0.2748, + "step": 21978 + }, + { + "epoch": 0.4399869879638666, + "grad_norm": 1.2597273588180542, + "learning_rate": 6.2027999559058595e-06, + "loss": 0.3741, + "step": 21979 + }, + { + "epoch": 0.44000700648099494, + "grad_norm": 1.0601235628128052, + "learning_rate": 6.202485290243888e-06, + "loss": 0.3254, + "step": 21980 + }, + { + "epoch": 0.44002702499812324, + "grad_norm": 0.9647213816642761, + "learning_rate": 6.202170619526928e-06, + "loss": 0.2812, + "step": 21981 + }, + { + "epoch": 0.4400470435152516, + "grad_norm": 1.9005942344665527, + "learning_rate": 6.201855943756301e-06, + "loss": 0.8042, + "step": 21982 + }, + { + "epoch": 0.44006706203237994, + "grad_norm": 1.1712054014205933, + "learning_rate": 6.20154126293333e-06, + "loss": 0.3433, + "step": 21983 + }, + { + "epoch": 0.4400870805495083, + "grad_norm": 0.9962467551231384, + "learning_rate": 6.201226577059339e-06, + "loss": 0.3091, + "step": 21984 + }, + { + "epoch": 0.44010709906663664, + "grad_norm": 1.1143183708190918, + "learning_rate": 6.20091188613565e-06, + "loss": 0.3074, + "step": 21985 + }, + { + "epoch": 0.440127117583765, + "grad_norm": 1.0613622665405273, + "learning_rate": 6.2005971901635855e-06, + "loss": 0.3364, + "step": 21986 + }, + { + "epoch": 0.44014713610089334, + "grad_norm": 1.2226057052612305, + "learning_rate": 6.200282489144468e-06, + "loss": 0.328, + "step": 21987 + }, + { + "epoch": 0.4401671546180217, + "grad_norm": 1.14398992061615, + "learning_rate": 6.1999677830796224e-06, + "loss": 0.3625, + "step": 21988 + }, + { + "epoch": 0.44018717313515, + "grad_norm": 1.1260677576065063, + "learning_rate": 6.199653071970369e-06, + "loss": 0.3177, + "step": 21989 + }, + { + "epoch": 0.44020719165227834, + "grad_norm": 1.1742780208587646, + "learning_rate": 6.199338355818034e-06, + "loss": 0.3197, + "step": 21990 + }, + { + "epoch": 0.4402272101694067, + "grad_norm": 0.944765567779541, + "learning_rate": 6.1990236346239374e-06, + "loss": 0.2677, + "step": 21991 + }, + { + "epoch": 0.44024722868653504, + "grad_norm": 1.063722014427185, + "learning_rate": 6.198708908389403e-06, + "loss": 0.3232, + "step": 21992 + }, + { + "epoch": 0.4402672472036634, + "grad_norm": 1.251274824142456, + "learning_rate": 6.198394177115757e-06, + "loss": 0.3163, + "step": 21993 + }, + { + "epoch": 0.44028726572079174, + "grad_norm": 2.068634033203125, + "learning_rate": 6.198079440804318e-06, + "loss": 0.7623, + "step": 21994 + }, + { + "epoch": 0.4403072842379201, + "grad_norm": 1.243730902671814, + "learning_rate": 6.19776469945641e-06, + "loss": 0.3229, + "step": 21995 + }, + { + "epoch": 0.44032730275504844, + "grad_norm": 1.1262052059173584, + "learning_rate": 6.197449953073358e-06, + "loss": 0.2957, + "step": 21996 + }, + { + "epoch": 0.44034732127217674, + "grad_norm": 1.2296531200408936, + "learning_rate": 6.197135201656485e-06, + "loss": 0.3075, + "step": 21997 + }, + { + "epoch": 0.4403673397893051, + "grad_norm": 1.0933595895767212, + "learning_rate": 6.1968204452071115e-06, + "loss": 0.3497, + "step": 21998 + }, + { + "epoch": 0.44038735830643344, + "grad_norm": 1.1464146375656128, + "learning_rate": 6.196505683726564e-06, + "loss": 0.3535, + "step": 21999 + }, + { + "epoch": 0.4404073768235618, + "grad_norm": 1.1518745422363281, + "learning_rate": 6.196190917216162e-06, + "loss": 0.3051, + "step": 22000 + }, + { + "epoch": 0.44042739534069014, + "grad_norm": 1.2223474979400635, + "learning_rate": 6.195876145677233e-06, + "loss": 0.3337, + "step": 22001 + }, + { + "epoch": 0.4404474138578185, + "grad_norm": 1.1846593618392944, + "learning_rate": 6.195561369111096e-06, + "loss": 0.3311, + "step": 22002 + }, + { + "epoch": 0.44046743237494684, + "grad_norm": 1.1624804735183716, + "learning_rate": 6.195246587519077e-06, + "loss": 0.3173, + "step": 22003 + }, + { + "epoch": 0.4404874508920752, + "grad_norm": 0.99287348985672, + "learning_rate": 6.194931800902499e-06, + "loss": 0.3128, + "step": 22004 + }, + { + "epoch": 0.4405074694092035, + "grad_norm": 1.0723499059677124, + "learning_rate": 6.1946170092626845e-06, + "loss": 0.2904, + "step": 22005 + }, + { + "epoch": 0.44052748792633184, + "grad_norm": 1.0903842449188232, + "learning_rate": 6.194302212600959e-06, + "loss": 0.3237, + "step": 22006 + }, + { + "epoch": 0.4405475064434602, + "grad_norm": 1.0900253057479858, + "learning_rate": 6.193987410918641e-06, + "loss": 0.3184, + "step": 22007 + }, + { + "epoch": 0.44056752496058854, + "grad_norm": 1.1657130718231201, + "learning_rate": 6.193672604217058e-06, + "loss": 0.3054, + "step": 22008 + }, + { + "epoch": 0.4405875434777169, + "grad_norm": 1.819797396659851, + "learning_rate": 6.193357792497532e-06, + "loss": 0.8036, + "step": 22009 + }, + { + "epoch": 0.44060756199484524, + "grad_norm": 1.0890567302703857, + "learning_rate": 6.193042975761389e-06, + "loss": 0.3188, + "step": 22010 + }, + { + "epoch": 0.4406275805119736, + "grad_norm": 1.3081995248794556, + "learning_rate": 6.192728154009947e-06, + "loss": 0.3364, + "step": 22011 + }, + { + "epoch": 0.44064759902910194, + "grad_norm": 1.0811209678649902, + "learning_rate": 6.192413327244535e-06, + "loss": 0.3026, + "step": 22012 + }, + { + "epoch": 0.44066761754623024, + "grad_norm": 1.1274175643920898, + "learning_rate": 6.192098495466472e-06, + "loss": 0.3483, + "step": 22013 + }, + { + "epoch": 0.4406876360633586, + "grad_norm": 1.205754041671753, + "learning_rate": 6.1917836586770855e-06, + "loss": 0.2748, + "step": 22014 + }, + { + "epoch": 0.44070765458048694, + "grad_norm": 1.804739236831665, + "learning_rate": 6.191468816877695e-06, + "loss": 0.737, + "step": 22015 + }, + { + "epoch": 0.4407276730976153, + "grad_norm": 1.09694242477417, + "learning_rate": 6.191153970069626e-06, + "loss": 0.3224, + "step": 22016 + }, + { + "epoch": 0.44074769161474364, + "grad_norm": 1.7404885292053223, + "learning_rate": 6.190839118254204e-06, + "loss": 0.8755, + "step": 22017 + }, + { + "epoch": 0.440767710131872, + "grad_norm": 1.0548653602600098, + "learning_rate": 6.190524261432748e-06, + "loss": 0.3173, + "step": 22018 + }, + { + "epoch": 0.44078772864900034, + "grad_norm": 1.2556265592575073, + "learning_rate": 6.190209399606587e-06, + "loss": 0.3153, + "step": 22019 + }, + { + "epoch": 0.4408077471661287, + "grad_norm": 1.9155116081237793, + "learning_rate": 6.18989453277704e-06, + "loss": 0.7855, + "step": 22020 + }, + { + "epoch": 0.440827765683257, + "grad_norm": 1.7707641124725342, + "learning_rate": 6.189579660945433e-06, + "loss": 0.7564, + "step": 22021 + }, + { + "epoch": 0.44084778420038534, + "grad_norm": 1.3682992458343506, + "learning_rate": 6.1892647841130886e-06, + "loss": 0.2904, + "step": 22022 + }, + { + "epoch": 0.4408678027175137, + "grad_norm": 1.1771785020828247, + "learning_rate": 6.188949902281331e-06, + "loss": 0.335, + "step": 22023 + }, + { + "epoch": 0.44088782123464204, + "grad_norm": 1.1308262348175049, + "learning_rate": 6.1886350154514855e-06, + "loss": 0.3347, + "step": 22024 + }, + { + "epoch": 0.4409078397517704, + "grad_norm": 1.1281769275665283, + "learning_rate": 6.188320123624872e-06, + "loss": 0.2636, + "step": 22025 + }, + { + "epoch": 0.44092785826889874, + "grad_norm": 1.1043468713760376, + "learning_rate": 6.188005226802818e-06, + "loss": 0.3111, + "step": 22026 + }, + { + "epoch": 0.4409478767860271, + "grad_norm": 1.1284713745117188, + "learning_rate": 6.187690324986645e-06, + "loss": 0.3036, + "step": 22027 + }, + { + "epoch": 0.44096789530315544, + "grad_norm": 1.0988682508468628, + "learning_rate": 6.187375418177677e-06, + "loss": 0.3088, + "step": 22028 + }, + { + "epoch": 0.44098791382028374, + "grad_norm": 1.0878416299819946, + "learning_rate": 6.187060506377238e-06, + "loss": 0.337, + "step": 22029 + }, + { + "epoch": 0.4410079323374121, + "grad_norm": 1.245529055595398, + "learning_rate": 6.186745589586654e-06, + "loss": 0.2958, + "step": 22030 + }, + { + "epoch": 0.44102795085454044, + "grad_norm": 1.0149760246276855, + "learning_rate": 6.186430667807245e-06, + "loss": 0.2739, + "step": 22031 + }, + { + "epoch": 0.4410479693716688, + "grad_norm": 1.0974953174591064, + "learning_rate": 6.1861157410403384e-06, + "loss": 0.3538, + "step": 22032 + }, + { + "epoch": 0.44106798788879714, + "grad_norm": 1.1385904550552368, + "learning_rate": 6.185800809287256e-06, + "loss": 0.3258, + "step": 22033 + }, + { + "epoch": 0.4410880064059255, + "grad_norm": 1.247847557067871, + "learning_rate": 6.18548587254932e-06, + "loss": 0.3111, + "step": 22034 + }, + { + "epoch": 0.44110802492305384, + "grad_norm": 1.148219108581543, + "learning_rate": 6.185170930827859e-06, + "loss": 0.3529, + "step": 22035 + }, + { + "epoch": 0.4411280434401822, + "grad_norm": 1.0742287635803223, + "learning_rate": 6.184855984124194e-06, + "loss": 0.3699, + "step": 22036 + }, + { + "epoch": 0.4411480619573105, + "grad_norm": 1.1301915645599365, + "learning_rate": 6.18454103243965e-06, + "loss": 0.3229, + "step": 22037 + }, + { + "epoch": 0.44116808047443884, + "grad_norm": 1.0690947771072388, + "learning_rate": 6.184226075775549e-06, + "loss": 0.2866, + "step": 22038 + }, + { + "epoch": 0.4411880989915672, + "grad_norm": 1.8962069749832153, + "learning_rate": 6.1839111141332185e-06, + "loss": 0.8025, + "step": 22039 + }, + { + "epoch": 0.44120811750869554, + "grad_norm": 1.2218223810195923, + "learning_rate": 6.183596147513978e-06, + "loss": 0.2889, + "step": 22040 + }, + { + "epoch": 0.4412281360258239, + "grad_norm": 1.0994657278060913, + "learning_rate": 6.183281175919155e-06, + "loss": 0.3297, + "step": 22041 + }, + { + "epoch": 0.44124815454295224, + "grad_norm": 1.8192647695541382, + "learning_rate": 6.182966199350073e-06, + "loss": 0.7709, + "step": 22042 + }, + { + "epoch": 0.4412681730600806, + "grad_norm": 1.4266173839569092, + "learning_rate": 6.1826512178080555e-06, + "loss": 0.3083, + "step": 22043 + }, + { + "epoch": 0.44128819157720894, + "grad_norm": 1.272185206413269, + "learning_rate": 6.182336231294426e-06, + "loss": 0.3402, + "step": 22044 + }, + { + "epoch": 0.44130821009433724, + "grad_norm": 1.2183105945587158, + "learning_rate": 6.1820212398105105e-06, + "loss": 0.2899, + "step": 22045 + }, + { + "epoch": 0.4413282286114656, + "grad_norm": 1.1222585439682007, + "learning_rate": 6.181706243357631e-06, + "loss": 0.3666, + "step": 22046 + }, + { + "epoch": 0.44134824712859394, + "grad_norm": 1.100353717803955, + "learning_rate": 6.181391241937114e-06, + "loss": 0.2878, + "step": 22047 + }, + { + "epoch": 0.4413682656457223, + "grad_norm": 1.037353754043579, + "learning_rate": 6.181076235550282e-06, + "loss": 0.281, + "step": 22048 + }, + { + "epoch": 0.44138828416285064, + "grad_norm": 1.0524805784225464, + "learning_rate": 6.180761224198458e-06, + "loss": 0.3108, + "step": 22049 + }, + { + "epoch": 0.441408302679979, + "grad_norm": 1.1004772186279297, + "learning_rate": 6.180446207882971e-06, + "loss": 0.3113, + "step": 22050 + }, + { + "epoch": 0.44142832119710734, + "grad_norm": 2.075153112411499, + "learning_rate": 6.18013118660514e-06, + "loss": 0.9132, + "step": 22051 + }, + { + "epoch": 0.4414483397142357, + "grad_norm": 1.0778957605361938, + "learning_rate": 6.1798161603662925e-06, + "loss": 0.3416, + "step": 22052 + }, + { + "epoch": 0.441468358231364, + "grad_norm": 1.2192270755767822, + "learning_rate": 6.179501129167751e-06, + "loss": 0.3402, + "step": 22053 + }, + { + "epoch": 0.44148837674849234, + "grad_norm": 1.2324029207229614, + "learning_rate": 6.179186093010841e-06, + "loss": 0.3377, + "step": 22054 + }, + { + "epoch": 0.4415083952656207, + "grad_norm": 0.9403894543647766, + "learning_rate": 6.178871051896886e-06, + "loss": 0.2689, + "step": 22055 + }, + { + "epoch": 0.44152841378274904, + "grad_norm": 1.1173242330551147, + "learning_rate": 6.178556005827211e-06, + "loss": 0.3004, + "step": 22056 + }, + { + "epoch": 0.4415484322998774, + "grad_norm": 1.1454747915267944, + "learning_rate": 6.178240954803141e-06, + "loss": 0.3066, + "step": 22057 + }, + { + "epoch": 0.44156845081700574, + "grad_norm": 1.0748542547225952, + "learning_rate": 6.177925898825998e-06, + "loss": 0.291, + "step": 22058 + }, + { + "epoch": 0.4415884693341341, + "grad_norm": 1.0232477188110352, + "learning_rate": 6.177610837897109e-06, + "loss": 0.3152, + "step": 22059 + }, + { + "epoch": 0.44160848785126244, + "grad_norm": 1.1008490324020386, + "learning_rate": 6.1772957720177975e-06, + "loss": 0.3245, + "step": 22060 + }, + { + "epoch": 0.44162850636839074, + "grad_norm": 1.0473370552062988, + "learning_rate": 6.176980701189388e-06, + "loss": 0.2928, + "step": 22061 + }, + { + "epoch": 0.4416485248855191, + "grad_norm": 1.0596450567245483, + "learning_rate": 6.176665625413204e-06, + "loss": 0.3322, + "step": 22062 + }, + { + "epoch": 0.44166854340264744, + "grad_norm": 1.2765727043151855, + "learning_rate": 6.1763505446905724e-06, + "loss": 0.3919, + "step": 22063 + }, + { + "epoch": 0.4416885619197758, + "grad_norm": 1.0707628726959229, + "learning_rate": 6.1760354590228155e-06, + "loss": 0.339, + "step": 22064 + }, + { + "epoch": 0.44170858043690414, + "grad_norm": 1.925062656402588, + "learning_rate": 6.175720368411258e-06, + "loss": 0.8651, + "step": 22065 + }, + { + "epoch": 0.4417285989540325, + "grad_norm": 1.095313310623169, + "learning_rate": 6.175405272857227e-06, + "loss": 0.2571, + "step": 22066 + }, + { + "epoch": 0.44174861747116084, + "grad_norm": 1.1014569997787476, + "learning_rate": 6.175090172362043e-06, + "loss": 0.3296, + "step": 22067 + }, + { + "epoch": 0.4417686359882892, + "grad_norm": 1.292405366897583, + "learning_rate": 6.174775066927035e-06, + "loss": 0.3181, + "step": 22068 + }, + { + "epoch": 0.4417886545054175, + "grad_norm": 1.0524979829788208, + "learning_rate": 6.174459956553523e-06, + "loss": 0.301, + "step": 22069 + }, + { + "epoch": 0.44180867302254584, + "grad_norm": 1.144057273864746, + "learning_rate": 6.174144841242836e-06, + "loss": 0.3322, + "step": 22070 + }, + { + "epoch": 0.4418286915396742, + "grad_norm": 1.0949344635009766, + "learning_rate": 6.173829720996296e-06, + "loss": 0.3174, + "step": 22071 + }, + { + "epoch": 0.44184871005680254, + "grad_norm": 1.1800808906555176, + "learning_rate": 6.173514595815228e-06, + "loss": 0.346, + "step": 22072 + }, + { + "epoch": 0.4418687285739309, + "grad_norm": 1.911252737045288, + "learning_rate": 6.173199465700957e-06, + "loss": 0.7676, + "step": 22073 + }, + { + "epoch": 0.44188874709105924, + "grad_norm": 1.8952341079711914, + "learning_rate": 6.172884330654809e-06, + "loss": 0.802, + "step": 22074 + }, + { + "epoch": 0.4419087656081876, + "grad_norm": 1.1099021434783936, + "learning_rate": 6.172569190678107e-06, + "loss": 0.2669, + "step": 22075 + }, + { + "epoch": 0.44192878412531594, + "grad_norm": 0.9693436622619629, + "learning_rate": 6.172254045772177e-06, + "loss": 0.2789, + "step": 22076 + }, + { + "epoch": 0.44194880264244424, + "grad_norm": 1.1247025728225708, + "learning_rate": 6.171938895938344e-06, + "loss": 0.3149, + "step": 22077 + }, + { + "epoch": 0.4419688211595726, + "grad_norm": 1.148290753364563, + "learning_rate": 6.17162374117793e-06, + "loss": 0.3486, + "step": 22078 + }, + { + "epoch": 0.44198883967670094, + "grad_norm": 1.147983431816101, + "learning_rate": 6.1713085814922636e-06, + "loss": 0.2982, + "step": 22079 + }, + { + "epoch": 0.4420088581938293, + "grad_norm": 1.0024096965789795, + "learning_rate": 6.1709934168826665e-06, + "loss": 0.3055, + "step": 22080 + }, + { + "epoch": 0.44202887671095764, + "grad_norm": 1.9572312831878662, + "learning_rate": 6.170678247350467e-06, + "loss": 0.7415, + "step": 22081 + }, + { + "epoch": 0.442048895228086, + "grad_norm": 1.249212622642517, + "learning_rate": 6.170363072896987e-06, + "loss": 0.3393, + "step": 22082 + }, + { + "epoch": 0.44206891374521434, + "grad_norm": 1.2050952911376953, + "learning_rate": 6.170047893523552e-06, + "loss": 0.372, + "step": 22083 + }, + { + "epoch": 0.4420889322623427, + "grad_norm": 0.9269788265228271, + "learning_rate": 6.1697327092314885e-06, + "loss": 0.2753, + "step": 22084 + }, + { + "epoch": 0.442108950779471, + "grad_norm": 1.1995211839675903, + "learning_rate": 6.169417520022118e-06, + "loss": 0.3303, + "step": 22085 + }, + { + "epoch": 0.44212896929659934, + "grad_norm": 1.1171232461929321, + "learning_rate": 6.16910232589677e-06, + "loss": 0.3429, + "step": 22086 + }, + { + "epoch": 0.4421489878137277, + "grad_norm": 1.3292486667633057, + "learning_rate": 6.168787126856768e-06, + "loss": 0.3202, + "step": 22087 + }, + { + "epoch": 0.44216900633085604, + "grad_norm": 1.0817320346832275, + "learning_rate": 6.168471922903436e-06, + "loss": 0.2988, + "step": 22088 + }, + { + "epoch": 0.4421890248479844, + "grad_norm": 2.023745059967041, + "learning_rate": 6.1681567140380995e-06, + "loss": 0.8144, + "step": 22089 + }, + { + "epoch": 0.44220904336511274, + "grad_norm": 1.049141764640808, + "learning_rate": 6.167841500262084e-06, + "loss": 0.2868, + "step": 22090 + }, + { + "epoch": 0.4422290618822411, + "grad_norm": 1.2400283813476562, + "learning_rate": 6.167526281576712e-06, + "loss": 0.2837, + "step": 22091 + }, + { + "epoch": 0.44224908039936944, + "grad_norm": 1.1157581806182861, + "learning_rate": 6.167211057983312e-06, + "loss": 0.336, + "step": 22092 + }, + { + "epoch": 0.44226909891649774, + "grad_norm": 1.0029323101043701, + "learning_rate": 6.166895829483208e-06, + "loss": 0.3201, + "step": 22093 + }, + { + "epoch": 0.4422891174336261, + "grad_norm": 1.0520811080932617, + "learning_rate": 6.166580596077726e-06, + "loss": 0.3588, + "step": 22094 + }, + { + "epoch": 0.44230913595075444, + "grad_norm": 1.1284805536270142, + "learning_rate": 6.166265357768189e-06, + "loss": 0.3376, + "step": 22095 + }, + { + "epoch": 0.4423291544678828, + "grad_norm": 1.1810721158981323, + "learning_rate": 6.165950114555924e-06, + "loss": 0.3303, + "step": 22096 + }, + { + "epoch": 0.44234917298501114, + "grad_norm": 1.9947564601898193, + "learning_rate": 6.165634866442256e-06, + "loss": 0.7718, + "step": 22097 + }, + { + "epoch": 0.4423691915021395, + "grad_norm": 1.178381323814392, + "learning_rate": 6.165319613428508e-06, + "loss": 0.3234, + "step": 22098 + }, + { + "epoch": 0.44238921001926784, + "grad_norm": 1.1643577814102173, + "learning_rate": 6.165004355516009e-06, + "loss": 0.2762, + "step": 22099 + }, + { + "epoch": 0.4424092285363962, + "grad_norm": 0.9817674160003662, + "learning_rate": 6.164689092706081e-06, + "loss": 0.2705, + "step": 22100 + }, + { + "epoch": 0.4424292470535245, + "grad_norm": 1.1569348573684692, + "learning_rate": 6.1643738250000525e-06, + "loss": 0.2911, + "step": 22101 + }, + { + "epoch": 0.44244926557065284, + "grad_norm": 1.3475751876831055, + "learning_rate": 6.164058552399246e-06, + "loss": 0.3382, + "step": 22102 + }, + { + "epoch": 0.4424692840877812, + "grad_norm": 1.1347578763961792, + "learning_rate": 6.163743274904988e-06, + "loss": 0.2787, + "step": 22103 + }, + { + "epoch": 0.44248930260490954, + "grad_norm": 1.859549880027771, + "learning_rate": 6.163427992518604e-06, + "loss": 0.722, + "step": 22104 + }, + { + "epoch": 0.4425093211220379, + "grad_norm": 1.010164737701416, + "learning_rate": 6.163112705241417e-06, + "loss": 0.2862, + "step": 22105 + }, + { + "epoch": 0.44252933963916624, + "grad_norm": 1.025499701499939, + "learning_rate": 6.162797413074756e-06, + "loss": 0.2845, + "step": 22106 + }, + { + "epoch": 0.4425493581562946, + "grad_norm": 1.2783979177474976, + "learning_rate": 6.162482116019946e-06, + "loss": 0.3376, + "step": 22107 + }, + { + "epoch": 0.44256937667342294, + "grad_norm": 1.1932950019836426, + "learning_rate": 6.162166814078311e-06, + "loss": 0.3165, + "step": 22108 + }, + { + "epoch": 0.44258939519055124, + "grad_norm": 1.0523333549499512, + "learning_rate": 6.1618515072511755e-06, + "loss": 0.3113, + "step": 22109 + }, + { + "epoch": 0.4426094137076796, + "grad_norm": 1.067224383354187, + "learning_rate": 6.161536195539868e-06, + "loss": 0.3526, + "step": 22110 + }, + { + "epoch": 0.44262943222480794, + "grad_norm": 1.0837695598602295, + "learning_rate": 6.16122087894571e-06, + "loss": 0.3344, + "step": 22111 + }, + { + "epoch": 0.4426494507419363, + "grad_norm": 1.0379149913787842, + "learning_rate": 6.160905557470032e-06, + "loss": 0.3409, + "step": 22112 + }, + { + "epoch": 0.44266946925906464, + "grad_norm": 1.0033750534057617, + "learning_rate": 6.160590231114154e-06, + "loss": 0.3014, + "step": 22113 + }, + { + "epoch": 0.442689487776193, + "grad_norm": 1.0368632078170776, + "learning_rate": 6.160274899879407e-06, + "loss": 0.2874, + "step": 22114 + }, + { + "epoch": 0.44270950629332134, + "grad_norm": 1.0635651350021362, + "learning_rate": 6.159959563767113e-06, + "loss": 0.3444, + "step": 22115 + }, + { + "epoch": 0.4427295248104497, + "grad_norm": 1.1857757568359375, + "learning_rate": 6.159644222778598e-06, + "loss": 0.2849, + "step": 22116 + }, + { + "epoch": 0.442749543327578, + "grad_norm": 0.9652465581893921, + "learning_rate": 6.159328876915189e-06, + "loss": 0.3047, + "step": 22117 + }, + { + "epoch": 0.44276956184470634, + "grad_norm": 1.0258688926696777, + "learning_rate": 6.159013526178211e-06, + "loss": 0.3019, + "step": 22118 + }, + { + "epoch": 0.4427895803618347, + "grad_norm": 1.0610506534576416, + "learning_rate": 6.158698170568988e-06, + "loss": 0.3088, + "step": 22119 + }, + { + "epoch": 0.44280959887896304, + "grad_norm": 1.1149547100067139, + "learning_rate": 6.158382810088849e-06, + "loss": 0.3306, + "step": 22120 + }, + { + "epoch": 0.4428296173960914, + "grad_norm": 1.0639357566833496, + "learning_rate": 6.158067444739119e-06, + "loss": 0.3154, + "step": 22121 + }, + { + "epoch": 0.44284963591321974, + "grad_norm": 1.1450035572052002, + "learning_rate": 6.15775207452112e-06, + "loss": 0.275, + "step": 22122 + }, + { + "epoch": 0.4428696544303481, + "grad_norm": 1.068664789199829, + "learning_rate": 6.157436699436182e-06, + "loss": 0.3199, + "step": 22123 + }, + { + "epoch": 0.44288967294747644, + "grad_norm": 0.9907131195068359, + "learning_rate": 6.1571213194856275e-06, + "loss": 0.2618, + "step": 22124 + }, + { + "epoch": 0.44290969146460474, + "grad_norm": 1.0521214008331299, + "learning_rate": 6.156805934670786e-06, + "loss": 0.3425, + "step": 22125 + }, + { + "epoch": 0.4429297099817331, + "grad_norm": 1.0211718082427979, + "learning_rate": 6.15649054499298e-06, + "loss": 0.3411, + "step": 22126 + }, + { + "epoch": 0.44294972849886144, + "grad_norm": 1.2183527946472168, + "learning_rate": 6.156175150453538e-06, + "loss": 0.3114, + "step": 22127 + }, + { + "epoch": 0.4429697470159898, + "grad_norm": 1.0925023555755615, + "learning_rate": 6.155859751053784e-06, + "loss": 0.3086, + "step": 22128 + }, + { + "epoch": 0.44298976553311814, + "grad_norm": 1.884063959121704, + "learning_rate": 6.155544346795044e-06, + "loss": 0.8163, + "step": 22129 + }, + { + "epoch": 0.4430097840502465, + "grad_norm": 1.024649739265442, + "learning_rate": 6.155228937678643e-06, + "loss": 0.3071, + "step": 22130 + }, + { + "epoch": 0.44302980256737484, + "grad_norm": 1.0450913906097412, + "learning_rate": 6.15491352370591e-06, + "loss": 0.3141, + "step": 22131 + }, + { + "epoch": 0.4430498210845032, + "grad_norm": 1.227841854095459, + "learning_rate": 6.154598104878169e-06, + "loss": 0.3482, + "step": 22132 + }, + { + "epoch": 0.4430698396016315, + "grad_norm": 1.1393558979034424, + "learning_rate": 6.154282681196745e-06, + "loss": 0.3095, + "step": 22133 + }, + { + "epoch": 0.44308985811875984, + "grad_norm": 0.9996827244758606, + "learning_rate": 6.153967252662967e-06, + "loss": 0.2935, + "step": 22134 + }, + { + "epoch": 0.4431098766358882, + "grad_norm": 1.0166592597961426, + "learning_rate": 6.153651819278157e-06, + "loss": 0.2309, + "step": 22135 + }, + { + "epoch": 0.44312989515301654, + "grad_norm": 1.0289148092269897, + "learning_rate": 6.153336381043644e-06, + "loss": 0.3094, + "step": 22136 + }, + { + "epoch": 0.4431499136701449, + "grad_norm": 1.0381059646606445, + "learning_rate": 6.153020937960751e-06, + "loss": 0.2997, + "step": 22137 + }, + { + "epoch": 0.44316993218727324, + "grad_norm": 0.9949555397033691, + "learning_rate": 6.152705490030808e-06, + "loss": 0.3174, + "step": 22138 + }, + { + "epoch": 0.4431899507044016, + "grad_norm": 1.1424628496170044, + "learning_rate": 6.152390037255141e-06, + "loss": 0.3416, + "step": 22139 + }, + { + "epoch": 0.44320996922152994, + "grad_norm": 0.9971774816513062, + "learning_rate": 6.15207457963507e-06, + "loss": 0.3048, + "step": 22140 + }, + { + "epoch": 0.44322998773865824, + "grad_norm": 1.0283303260803223, + "learning_rate": 6.151759117171929e-06, + "loss": 0.3445, + "step": 22141 + }, + { + "epoch": 0.4432500062557866, + "grad_norm": 1.0373589992523193, + "learning_rate": 6.1514436498670385e-06, + "loss": 0.2784, + "step": 22142 + }, + { + "epoch": 0.44327002477291494, + "grad_norm": 1.8963947296142578, + "learning_rate": 6.151128177721727e-06, + "loss": 0.7766, + "step": 22143 + }, + { + "epoch": 0.4432900432900433, + "grad_norm": 1.8191899061203003, + "learning_rate": 6.1508127007373205e-06, + "loss": 0.8099, + "step": 22144 + }, + { + "epoch": 0.44331006180717164, + "grad_norm": 1.055321455001831, + "learning_rate": 6.150497218915146e-06, + "loss": 0.3053, + "step": 22145 + }, + { + "epoch": 0.4433300803243, + "grad_norm": 1.2390292882919312, + "learning_rate": 6.1501817322565275e-06, + "loss": 0.2906, + "step": 22146 + }, + { + "epoch": 0.44335009884142834, + "grad_norm": 1.8711098432540894, + "learning_rate": 6.149866240762794e-06, + "loss": 0.8209, + "step": 22147 + }, + { + "epoch": 0.4433701173585567, + "grad_norm": 1.0728622674942017, + "learning_rate": 6.149550744435269e-06, + "loss": 0.3045, + "step": 22148 + }, + { + "epoch": 0.443390135875685, + "grad_norm": 1.2659263610839844, + "learning_rate": 6.14923524327528e-06, + "loss": 0.3749, + "step": 22149 + }, + { + "epoch": 0.44341015439281334, + "grad_norm": 1.8785068988800049, + "learning_rate": 6.148919737284154e-06, + "loss": 0.8161, + "step": 22150 + }, + { + "epoch": 0.4434301729099417, + "grad_norm": 1.1113662719726562, + "learning_rate": 6.148604226463216e-06, + "loss": 0.3169, + "step": 22151 + }, + { + "epoch": 0.44345019142707004, + "grad_norm": 1.0682286024093628, + "learning_rate": 6.1482887108137945e-06, + "loss": 0.321, + "step": 22152 + }, + { + "epoch": 0.4434702099441984, + "grad_norm": 0.9699750542640686, + "learning_rate": 6.147973190337212e-06, + "loss": 0.2986, + "step": 22153 + }, + { + "epoch": 0.44349022846132674, + "grad_norm": 1.135370135307312, + "learning_rate": 6.1476576650348e-06, + "loss": 0.3495, + "step": 22154 + }, + { + "epoch": 0.4435102469784551, + "grad_norm": 1.153674840927124, + "learning_rate": 6.14734213490788e-06, + "loss": 0.2701, + "step": 22155 + }, + { + "epoch": 0.44353026549558344, + "grad_norm": 1.2665729522705078, + "learning_rate": 6.14702659995778e-06, + "loss": 0.3159, + "step": 22156 + }, + { + "epoch": 0.44355028401271174, + "grad_norm": 1.0069396495819092, + "learning_rate": 6.146711060185828e-06, + "loss": 0.3006, + "step": 22157 + }, + { + "epoch": 0.4435703025298401, + "grad_norm": 1.043033480644226, + "learning_rate": 6.146395515593351e-06, + "loss": 0.2951, + "step": 22158 + }, + { + "epoch": 0.44359032104696844, + "grad_norm": 1.1477984189987183, + "learning_rate": 6.146079966181673e-06, + "loss": 0.3576, + "step": 22159 + }, + { + "epoch": 0.4436103395640968, + "grad_norm": 1.9013729095458984, + "learning_rate": 6.145764411952121e-06, + "loss": 0.8523, + "step": 22160 + }, + { + "epoch": 0.44363035808122514, + "grad_norm": 1.2283438444137573, + "learning_rate": 6.145448852906023e-06, + "loss": 0.2585, + "step": 22161 + }, + { + "epoch": 0.4436503765983535, + "grad_norm": 1.1075568199157715, + "learning_rate": 6.145133289044704e-06, + "loss": 0.3616, + "step": 22162 + }, + { + "epoch": 0.44367039511548184, + "grad_norm": 1.0421273708343506, + "learning_rate": 6.14481772036949e-06, + "loss": 0.3189, + "step": 22163 + }, + { + "epoch": 0.44369041363261014, + "grad_norm": 1.1967304944992065, + "learning_rate": 6.144502146881709e-06, + "loss": 0.3455, + "step": 22164 + }, + { + "epoch": 0.4437104321497385, + "grad_norm": 1.0790072679519653, + "learning_rate": 6.144186568582688e-06, + "loss": 0.282, + "step": 22165 + }, + { + "epoch": 0.44373045066686684, + "grad_norm": 0.8811100721359253, + "learning_rate": 6.143870985473752e-06, + "loss": 0.2678, + "step": 22166 + }, + { + "epoch": 0.4437504691839952, + "grad_norm": 1.151311993598938, + "learning_rate": 6.14355539755623e-06, + "loss": 0.3637, + "step": 22167 + }, + { + "epoch": 0.44377048770112354, + "grad_norm": 1.2968674898147583, + "learning_rate": 6.1432398048314466e-06, + "loss": 0.3172, + "step": 22168 + }, + { + "epoch": 0.4437905062182519, + "grad_norm": 1.7765434980392456, + "learning_rate": 6.142924207300729e-06, + "loss": 0.836, + "step": 22169 + }, + { + "epoch": 0.44381052473538024, + "grad_norm": 1.0753110647201538, + "learning_rate": 6.142608604965405e-06, + "loss": 0.312, + "step": 22170 + }, + { + "epoch": 0.4438305432525086, + "grad_norm": 1.038333773612976, + "learning_rate": 6.142292997826798e-06, + "loss": 0.3117, + "step": 22171 + }, + { + "epoch": 0.4438505617696369, + "grad_norm": 1.907025694847107, + "learning_rate": 6.141977385886239e-06, + "loss": 0.8177, + "step": 22172 + }, + { + "epoch": 0.44387058028676524, + "grad_norm": 1.033355712890625, + "learning_rate": 6.141661769145051e-06, + "loss": 0.3089, + "step": 22173 + }, + { + "epoch": 0.4438905988038936, + "grad_norm": 1.021260380744934, + "learning_rate": 6.1413461476045644e-06, + "loss": 0.3193, + "step": 22174 + }, + { + "epoch": 0.44391061732102194, + "grad_norm": 1.0253040790557861, + "learning_rate": 6.141030521266103e-06, + "loss": 0.2828, + "step": 22175 + }, + { + "epoch": 0.4439306358381503, + "grad_norm": 1.0769832134246826, + "learning_rate": 6.140714890130995e-06, + "loss": 0.2935, + "step": 22176 + }, + { + "epoch": 0.44395065435527864, + "grad_norm": 1.0655860900878906, + "learning_rate": 6.140399254200568e-06, + "loss": 0.3067, + "step": 22177 + }, + { + "epoch": 0.443970672872407, + "grad_norm": 1.1188465356826782, + "learning_rate": 6.140083613476147e-06, + "loss": 0.3021, + "step": 22178 + }, + { + "epoch": 0.44399069138953534, + "grad_norm": 1.1249644756317139, + "learning_rate": 6.13976796795906e-06, + "loss": 0.3229, + "step": 22179 + }, + { + "epoch": 0.44401070990666364, + "grad_norm": 1.224958062171936, + "learning_rate": 6.139452317650635e-06, + "loss": 0.3257, + "step": 22180 + }, + { + "epoch": 0.444030728423792, + "grad_norm": 1.0820300579071045, + "learning_rate": 6.1391366625521965e-06, + "loss": 0.3434, + "step": 22181 + }, + { + "epoch": 0.44405074694092034, + "grad_norm": 1.04718017578125, + "learning_rate": 6.138821002665072e-06, + "loss": 0.2768, + "step": 22182 + }, + { + "epoch": 0.4440707654580487, + "grad_norm": 1.1117476224899292, + "learning_rate": 6.13850533799059e-06, + "loss": 0.3056, + "step": 22183 + }, + { + "epoch": 0.44409078397517704, + "grad_norm": 1.1332532167434692, + "learning_rate": 6.138189668530077e-06, + "loss": 0.318, + "step": 22184 + }, + { + "epoch": 0.4441108024923054, + "grad_norm": 1.0800367593765259, + "learning_rate": 6.1378739942848585e-06, + "loss": 0.3455, + "step": 22185 + }, + { + "epoch": 0.44413082100943374, + "grad_norm": 1.0476030111312866, + "learning_rate": 6.137558315256263e-06, + "loss": 0.3146, + "step": 22186 + }, + { + "epoch": 0.4441508395265621, + "grad_norm": 1.1258336305618286, + "learning_rate": 6.137242631445618e-06, + "loss": 0.2935, + "step": 22187 + }, + { + "epoch": 0.4441708580436904, + "grad_norm": 1.084876298904419, + "learning_rate": 6.136926942854249e-06, + "loss": 0.2953, + "step": 22188 + }, + { + "epoch": 0.44419087656081874, + "grad_norm": 1.101401686668396, + "learning_rate": 6.1366112494834825e-06, + "loss": 0.3069, + "step": 22189 + }, + { + "epoch": 0.4442108950779471, + "grad_norm": 1.0525981187820435, + "learning_rate": 6.136295551334648e-06, + "loss": 0.3171, + "step": 22190 + }, + { + "epoch": 0.44423091359507544, + "grad_norm": 1.0397642850875854, + "learning_rate": 6.135979848409073e-06, + "loss": 0.3064, + "step": 22191 + }, + { + "epoch": 0.4442509321122038, + "grad_norm": 1.0377832651138306, + "learning_rate": 6.135664140708082e-06, + "loss": 0.3059, + "step": 22192 + }, + { + "epoch": 0.44427095062933214, + "grad_norm": 1.108699917793274, + "learning_rate": 6.135348428233003e-06, + "loss": 0.3101, + "step": 22193 + }, + { + "epoch": 0.4442909691464605, + "grad_norm": 1.1335045099258423, + "learning_rate": 6.135032710985163e-06, + "loss": 0.2838, + "step": 22194 + }, + { + "epoch": 0.44431098766358884, + "grad_norm": 1.04262113571167, + "learning_rate": 6.1347169889658896e-06, + "loss": 0.2947, + "step": 22195 + }, + { + "epoch": 0.44433100618071714, + "grad_norm": 1.0839850902557373, + "learning_rate": 6.134401262176512e-06, + "loss": 0.3532, + "step": 22196 + }, + { + "epoch": 0.4443510246978455, + "grad_norm": 1.029005765914917, + "learning_rate": 6.1340855306183545e-06, + "loss": 0.3362, + "step": 22197 + }, + { + "epoch": 0.44437104321497384, + "grad_norm": 1.1239162683486938, + "learning_rate": 6.133769794292746e-06, + "loss": 0.3138, + "step": 22198 + }, + { + "epoch": 0.4443910617321022, + "grad_norm": 1.4655817747116089, + "learning_rate": 6.1334540532010135e-06, + "loss": 0.2917, + "step": 22199 + }, + { + "epoch": 0.44441108024923054, + "grad_norm": 1.1432433128356934, + "learning_rate": 6.133138307344484e-06, + "loss": 0.2871, + "step": 22200 + }, + { + "epoch": 0.4444310987663589, + "grad_norm": 1.1501566171646118, + "learning_rate": 6.132822556724483e-06, + "loss": 0.3037, + "step": 22201 + }, + { + "epoch": 0.44445111728348724, + "grad_norm": 1.1757742166519165, + "learning_rate": 6.132506801342342e-06, + "loss": 0.2768, + "step": 22202 + }, + { + "epoch": 0.4444711358006156, + "grad_norm": 1.9138990640640259, + "learning_rate": 6.132191041199386e-06, + "loss": 0.8329, + "step": 22203 + }, + { + "epoch": 0.4444911543177439, + "grad_norm": 1.123773217201233, + "learning_rate": 6.131875276296941e-06, + "loss": 0.279, + "step": 22204 + }, + { + "epoch": 0.44451117283487224, + "grad_norm": 1.3437992334365845, + "learning_rate": 6.131559506636338e-06, + "loss": 0.3229, + "step": 22205 + }, + { + "epoch": 0.4445311913520006, + "grad_norm": 1.2281101942062378, + "learning_rate": 6.1312437322189e-06, + "loss": 0.3699, + "step": 22206 + }, + { + "epoch": 0.44455120986912894, + "grad_norm": 1.082511067390442, + "learning_rate": 6.1309279530459576e-06, + "loss": 0.3368, + "step": 22207 + }, + { + "epoch": 0.4445712283862573, + "grad_norm": 1.2536065578460693, + "learning_rate": 6.130612169118837e-06, + "loss": 0.3071, + "step": 22208 + }, + { + "epoch": 0.44459124690338564, + "grad_norm": 1.103726863861084, + "learning_rate": 6.1302963804388685e-06, + "loss": 0.2815, + "step": 22209 + }, + { + "epoch": 0.444611265420514, + "grad_norm": 1.209073781967163, + "learning_rate": 6.129980587007375e-06, + "loss": 0.3188, + "step": 22210 + }, + { + "epoch": 0.44463128393764234, + "grad_norm": 1.026024580001831, + "learning_rate": 6.129664788825687e-06, + "loss": 0.3113, + "step": 22211 + }, + { + "epoch": 0.44465130245477064, + "grad_norm": 1.1202865839004517, + "learning_rate": 6.129348985895133e-06, + "loss": 0.3339, + "step": 22212 + }, + { + "epoch": 0.444671320971899, + "grad_norm": 1.1787171363830566, + "learning_rate": 6.129033178217036e-06, + "loss": 0.3423, + "step": 22213 + }, + { + "epoch": 0.44469133948902734, + "grad_norm": 1.3113476037979126, + "learning_rate": 6.128717365792728e-06, + "loss": 0.3005, + "step": 22214 + }, + { + "epoch": 0.4447113580061557, + "grad_norm": 1.819756031036377, + "learning_rate": 6.128401548623534e-06, + "loss": 0.7626, + "step": 22215 + }, + { + "epoch": 0.44473137652328404, + "grad_norm": 1.085897445678711, + "learning_rate": 6.128085726710784e-06, + "loss": 0.2949, + "step": 22216 + }, + { + "epoch": 0.4447513950404124, + "grad_norm": 1.0990209579467773, + "learning_rate": 6.127769900055803e-06, + "loss": 0.3075, + "step": 22217 + }, + { + "epoch": 0.44477141355754074, + "grad_norm": 1.2320892810821533, + "learning_rate": 6.127454068659922e-06, + "loss": 0.324, + "step": 22218 + }, + { + "epoch": 0.4447914320746691, + "grad_norm": 1.2006179094314575, + "learning_rate": 6.127138232524466e-06, + "loss": 0.3452, + "step": 22219 + }, + { + "epoch": 0.4448114505917974, + "grad_norm": 1.0583038330078125, + "learning_rate": 6.126822391650762e-06, + "loss": 0.3063, + "step": 22220 + }, + { + "epoch": 0.44483146910892574, + "grad_norm": 1.1720037460327148, + "learning_rate": 6.1265065460401396e-06, + "loss": 0.3517, + "step": 22221 + }, + { + "epoch": 0.4448514876260541, + "grad_norm": 1.0330803394317627, + "learning_rate": 6.126190695693928e-06, + "loss": 0.3091, + "step": 22222 + }, + { + "epoch": 0.44487150614318244, + "grad_norm": 1.1266663074493408, + "learning_rate": 6.125874840613453e-06, + "loss": 0.3656, + "step": 22223 + }, + { + "epoch": 0.4448915246603108, + "grad_norm": 1.1554012298583984, + "learning_rate": 6.1255589808000405e-06, + "loss": 0.2946, + "step": 22224 + }, + { + "epoch": 0.44491154317743914, + "grad_norm": 1.2119991779327393, + "learning_rate": 6.125243116255021e-06, + "loss": 0.3499, + "step": 22225 + }, + { + "epoch": 0.4449315616945675, + "grad_norm": 1.104333519935608, + "learning_rate": 6.124927246979722e-06, + "loss": 0.32, + "step": 22226 + }, + { + "epoch": 0.44495158021169584, + "grad_norm": 2.0321240425109863, + "learning_rate": 6.124611372975469e-06, + "loss": 0.8215, + "step": 22227 + }, + { + "epoch": 0.44497159872882414, + "grad_norm": 1.0399810075759888, + "learning_rate": 6.124295494243594e-06, + "loss": 0.3066, + "step": 22228 + }, + { + "epoch": 0.4449916172459525, + "grad_norm": 0.9832967519760132, + "learning_rate": 6.123979610785422e-06, + "loss": 0.2752, + "step": 22229 + }, + { + "epoch": 0.44501163576308084, + "grad_norm": 1.1772202253341675, + "learning_rate": 6.123663722602282e-06, + "loss": 0.2935, + "step": 22230 + }, + { + "epoch": 0.4450316542802092, + "grad_norm": 1.1513749361038208, + "learning_rate": 6.123347829695502e-06, + "loss": 0.3083, + "step": 22231 + }, + { + "epoch": 0.44505167279733754, + "grad_norm": 1.1683037281036377, + "learning_rate": 6.123031932066408e-06, + "loss": 0.356, + "step": 22232 + }, + { + "epoch": 0.4450716913144659, + "grad_norm": 1.0275886058807373, + "learning_rate": 6.122716029716329e-06, + "loss": 0.3018, + "step": 22233 + }, + { + "epoch": 0.44509170983159424, + "grad_norm": 1.1272029876708984, + "learning_rate": 6.1224001226465956e-06, + "loss": 0.295, + "step": 22234 + }, + { + "epoch": 0.4451117283487226, + "grad_norm": 1.949512243270874, + "learning_rate": 6.122084210858532e-06, + "loss": 0.8127, + "step": 22235 + }, + { + "epoch": 0.4451317468658509, + "grad_norm": 1.118349313735962, + "learning_rate": 6.121768294353469e-06, + "loss": 0.326, + "step": 22236 + }, + { + "epoch": 0.44515176538297924, + "grad_norm": 1.1537226438522339, + "learning_rate": 6.121452373132732e-06, + "loss": 0.3736, + "step": 22237 + }, + { + "epoch": 0.4451717839001076, + "grad_norm": 1.1825931072235107, + "learning_rate": 6.121136447197653e-06, + "loss": 0.3461, + "step": 22238 + }, + { + "epoch": 0.44519180241723594, + "grad_norm": 1.0379741191864014, + "learning_rate": 6.120820516549556e-06, + "loss": 0.3077, + "step": 22239 + }, + { + "epoch": 0.4452118209343643, + "grad_norm": 1.0667874813079834, + "learning_rate": 6.12050458118977e-06, + "loss": 0.3283, + "step": 22240 + }, + { + "epoch": 0.44523183945149264, + "grad_norm": 1.1893525123596191, + "learning_rate": 6.120188641119624e-06, + "loss": 0.3016, + "step": 22241 + }, + { + "epoch": 0.445251857968621, + "grad_norm": 1.1940525770187378, + "learning_rate": 6.119872696340448e-06, + "loss": 0.3673, + "step": 22242 + }, + { + "epoch": 0.44527187648574934, + "grad_norm": 1.0578205585479736, + "learning_rate": 6.119556746853568e-06, + "loss": 0.3173, + "step": 22243 + }, + { + "epoch": 0.44529189500287764, + "grad_norm": 1.1749324798583984, + "learning_rate": 6.11924079266031e-06, + "loss": 0.3345, + "step": 22244 + }, + { + "epoch": 0.445311913520006, + "grad_norm": 1.1205427646636963, + "learning_rate": 6.118924833762007e-06, + "loss": 0.3147, + "step": 22245 + }, + { + "epoch": 0.44533193203713434, + "grad_norm": 1.1954102516174316, + "learning_rate": 6.118608870159982e-06, + "loss": 0.3438, + "step": 22246 + }, + { + "epoch": 0.4453519505542627, + "grad_norm": 1.1638814210891724, + "learning_rate": 6.118292901855569e-06, + "loss": 0.3091, + "step": 22247 + }, + { + "epoch": 0.44537196907139104, + "grad_norm": 1.2250808477401733, + "learning_rate": 6.1179769288500914e-06, + "loss": 0.3654, + "step": 22248 + }, + { + "epoch": 0.4453919875885194, + "grad_norm": 0.9838035106658936, + "learning_rate": 6.11766095114488e-06, + "loss": 0.3294, + "step": 22249 + }, + { + "epoch": 0.44541200610564774, + "grad_norm": 0.9840442538261414, + "learning_rate": 6.1173449687412634e-06, + "loss": 0.299, + "step": 22250 + }, + { + "epoch": 0.4454320246227761, + "grad_norm": 1.105014443397522, + "learning_rate": 6.117028981640568e-06, + "loss": 0.3464, + "step": 22251 + }, + { + "epoch": 0.4454520431399044, + "grad_norm": 0.9963194131851196, + "learning_rate": 6.116712989844123e-06, + "loss": 0.283, + "step": 22252 + }, + { + "epoch": 0.44547206165703274, + "grad_norm": 1.06675386428833, + "learning_rate": 6.116396993353258e-06, + "loss": 0.3106, + "step": 22253 + }, + { + "epoch": 0.4454920801741611, + "grad_norm": 1.1060527563095093, + "learning_rate": 6.1160809921692984e-06, + "loss": 0.3838, + "step": 22254 + }, + { + "epoch": 0.44551209869128944, + "grad_norm": 1.0762200355529785, + "learning_rate": 6.1157649862935755e-06, + "loss": 0.2994, + "step": 22255 + }, + { + "epoch": 0.4455321172084178, + "grad_norm": 1.1014926433563232, + "learning_rate": 6.115448975727417e-06, + "loss": 0.2971, + "step": 22256 + }, + { + "epoch": 0.44555213572554614, + "grad_norm": 1.1507906913757324, + "learning_rate": 6.115132960472151e-06, + "loss": 0.2778, + "step": 22257 + }, + { + "epoch": 0.4455721542426745, + "grad_norm": 1.1471836566925049, + "learning_rate": 6.114816940529105e-06, + "loss": 0.2957, + "step": 22258 + }, + { + "epoch": 0.44559217275980284, + "grad_norm": 1.2646815776824951, + "learning_rate": 6.11450091589961e-06, + "loss": 0.2896, + "step": 22259 + }, + { + "epoch": 0.44561219127693114, + "grad_norm": 1.129817008972168, + "learning_rate": 6.1141848865849926e-06, + "loss": 0.3033, + "step": 22260 + }, + { + "epoch": 0.4456322097940595, + "grad_norm": 1.2052408456802368, + "learning_rate": 6.113868852586581e-06, + "loss": 0.3222, + "step": 22261 + }, + { + "epoch": 0.44565222831118784, + "grad_norm": 1.1401773691177368, + "learning_rate": 6.113552813905705e-06, + "loss": 0.3148, + "step": 22262 + }, + { + "epoch": 0.4456722468283162, + "grad_norm": 1.8524529933929443, + "learning_rate": 6.113236770543692e-06, + "loss": 0.7526, + "step": 22263 + }, + { + "epoch": 0.44569226534544454, + "grad_norm": 1.0789530277252197, + "learning_rate": 6.112920722501873e-06, + "loss": 0.3057, + "step": 22264 + }, + { + "epoch": 0.4457122838625729, + "grad_norm": 1.137425184249878, + "learning_rate": 6.112604669781572e-06, + "loss": 0.3162, + "step": 22265 + }, + { + "epoch": 0.44573230237970124, + "grad_norm": 1.0769778490066528, + "learning_rate": 6.1122886123841215e-06, + "loss": 0.3232, + "step": 22266 + }, + { + "epoch": 0.4457523208968296, + "grad_norm": 1.0957636833190918, + "learning_rate": 6.111972550310851e-06, + "loss": 0.3069, + "step": 22267 + }, + { + "epoch": 0.4457723394139579, + "grad_norm": 1.107755184173584, + "learning_rate": 6.1116564835630856e-06, + "loss": 0.3358, + "step": 22268 + }, + { + "epoch": 0.44579235793108624, + "grad_norm": 1.0376276969909668, + "learning_rate": 6.111340412142156e-06, + "loss": 0.2646, + "step": 22269 + }, + { + "epoch": 0.4458123764482146, + "grad_norm": 1.3744302988052368, + "learning_rate": 6.1110243360493894e-06, + "loss": 0.3729, + "step": 22270 + }, + { + "epoch": 0.44583239496534294, + "grad_norm": 1.0642063617706299, + "learning_rate": 6.110708255286116e-06, + "loss": 0.3477, + "step": 22271 + }, + { + "epoch": 0.4458524134824713, + "grad_norm": 1.6918141841888428, + "learning_rate": 6.110392169853664e-06, + "loss": 0.7859, + "step": 22272 + }, + { + "epoch": 0.44587243199959964, + "grad_norm": 1.1176378726959229, + "learning_rate": 6.110076079753363e-06, + "loss": 0.2815, + "step": 22273 + }, + { + "epoch": 0.445892450516728, + "grad_norm": 1.0121053457260132, + "learning_rate": 6.109759984986541e-06, + "loss": 0.2923, + "step": 22274 + }, + { + "epoch": 0.44591246903385634, + "grad_norm": 1.1580724716186523, + "learning_rate": 6.109443885554527e-06, + "loss": 0.3671, + "step": 22275 + }, + { + "epoch": 0.44593248755098464, + "grad_norm": 1.080450415611267, + "learning_rate": 6.109127781458649e-06, + "loss": 0.3123, + "step": 22276 + }, + { + "epoch": 0.445952506068113, + "grad_norm": 1.0974359512329102, + "learning_rate": 6.108811672700237e-06, + "loss": 0.3006, + "step": 22277 + }, + { + "epoch": 0.44597252458524134, + "grad_norm": 1.2223371267318726, + "learning_rate": 6.108495559280618e-06, + "loss": 0.3194, + "step": 22278 + }, + { + "epoch": 0.4459925431023697, + "grad_norm": 1.1476283073425293, + "learning_rate": 6.108179441201123e-06, + "loss": 0.3204, + "step": 22279 + }, + { + "epoch": 0.44601256161949804, + "grad_norm": 1.9724090099334717, + "learning_rate": 6.107863318463081e-06, + "loss": 0.7535, + "step": 22280 + }, + { + "epoch": 0.4460325801366264, + "grad_norm": 1.1244014501571655, + "learning_rate": 6.107547191067819e-06, + "loss": 0.3177, + "step": 22281 + }, + { + "epoch": 0.44605259865375474, + "grad_norm": 1.7631224393844604, + "learning_rate": 6.107231059016669e-06, + "loss": 0.7896, + "step": 22282 + }, + { + "epoch": 0.4460726171708831, + "grad_norm": 1.031160831451416, + "learning_rate": 6.106914922310955e-06, + "loss": 0.3055, + "step": 22283 + }, + { + "epoch": 0.4460926356880114, + "grad_norm": 1.0911691188812256, + "learning_rate": 6.10659878095201e-06, + "loss": 0.3161, + "step": 22284 + }, + { + "epoch": 0.44611265420513974, + "grad_norm": 1.0675731897354126, + "learning_rate": 6.106282634941162e-06, + "loss": 0.3031, + "step": 22285 + }, + { + "epoch": 0.4461326727222681, + "grad_norm": 1.09312903881073, + "learning_rate": 6.10596648427974e-06, + "loss": 0.2986, + "step": 22286 + }, + { + "epoch": 0.44615269123939644, + "grad_norm": 1.0905258655548096, + "learning_rate": 6.105650328969073e-06, + "loss": 0.3283, + "step": 22287 + }, + { + "epoch": 0.4461727097565248, + "grad_norm": 1.1436774730682373, + "learning_rate": 6.105334169010489e-06, + "loss": 0.3279, + "step": 22288 + }, + { + "epoch": 0.44619272827365314, + "grad_norm": 1.86106276512146, + "learning_rate": 6.10501800440532e-06, + "loss": 0.7819, + "step": 22289 + }, + { + "epoch": 0.4462127467907815, + "grad_norm": 1.1107968091964722, + "learning_rate": 6.104701835154891e-06, + "loss": 0.332, + "step": 22290 + }, + { + "epoch": 0.44623276530790984, + "grad_norm": 1.0482728481292725, + "learning_rate": 6.1043856612605345e-06, + "loss": 0.2923, + "step": 22291 + }, + { + "epoch": 0.44625278382503814, + "grad_norm": 1.2587003707885742, + "learning_rate": 6.104069482723577e-06, + "loss": 0.3477, + "step": 22292 + }, + { + "epoch": 0.4462728023421665, + "grad_norm": 1.204176664352417, + "learning_rate": 6.103753299545351e-06, + "loss": 0.329, + "step": 22293 + }, + { + "epoch": 0.44629282085929484, + "grad_norm": 1.035607099533081, + "learning_rate": 6.1034371117271815e-06, + "loss": 0.2832, + "step": 22294 + }, + { + "epoch": 0.4463128393764232, + "grad_norm": 1.0753809213638306, + "learning_rate": 6.103120919270402e-06, + "loss": 0.2948, + "step": 22295 + }, + { + "epoch": 0.44633285789355154, + "grad_norm": 1.0706177949905396, + "learning_rate": 6.102804722176339e-06, + "loss": 0.302, + "step": 22296 + }, + { + "epoch": 0.4463528764106799, + "grad_norm": 1.0764573812484741, + "learning_rate": 6.102488520446321e-06, + "loss": 0.284, + "step": 22297 + }, + { + "epoch": 0.44637289492780824, + "grad_norm": 1.1680210828781128, + "learning_rate": 6.102172314081679e-06, + "loss": 0.3535, + "step": 22298 + }, + { + "epoch": 0.4463929134449366, + "grad_norm": 0.9758449792861938, + "learning_rate": 6.1018561030837416e-06, + "loss": 0.2939, + "step": 22299 + }, + { + "epoch": 0.4464129319620649, + "grad_norm": 1.083539605140686, + "learning_rate": 6.10153988745384e-06, + "loss": 0.3089, + "step": 22300 + }, + { + "epoch": 0.44643295047919324, + "grad_norm": 1.8571089506149292, + "learning_rate": 6.1012236671933e-06, + "loss": 0.9004, + "step": 22301 + }, + { + "epoch": 0.4464529689963216, + "grad_norm": 1.3173843622207642, + "learning_rate": 6.100907442303455e-06, + "loss": 0.3415, + "step": 22302 + }, + { + "epoch": 0.44647298751344994, + "grad_norm": 1.2703732252120972, + "learning_rate": 6.1005912127856285e-06, + "loss": 0.3279, + "step": 22303 + }, + { + "epoch": 0.4464930060305783, + "grad_norm": 0.9998849630355835, + "learning_rate": 6.100274978641154e-06, + "loss": 0.2677, + "step": 22304 + }, + { + "epoch": 0.44651302454770664, + "grad_norm": 1.1135563850402832, + "learning_rate": 6.099958739871361e-06, + "loss": 0.3353, + "step": 22305 + }, + { + "epoch": 0.446533043064835, + "grad_norm": 1.2886401414871216, + "learning_rate": 6.09964249647758e-06, + "loss": 0.3287, + "step": 22306 + }, + { + "epoch": 0.44655306158196334, + "grad_norm": 1.9581207036972046, + "learning_rate": 6.099326248461137e-06, + "loss": 0.7855, + "step": 22307 + }, + { + "epoch": 0.44657308009909163, + "grad_norm": 1.062142252922058, + "learning_rate": 6.099009995823364e-06, + "loss": 0.2945, + "step": 22308 + }, + { + "epoch": 0.44659309861622, + "grad_norm": 1.2982723712921143, + "learning_rate": 6.098693738565589e-06, + "loss": 0.3137, + "step": 22309 + }, + { + "epoch": 0.44661311713334834, + "grad_norm": 1.162372350692749, + "learning_rate": 6.09837747668914e-06, + "loss": 0.354, + "step": 22310 + }, + { + "epoch": 0.4466331356504767, + "grad_norm": 1.9147005081176758, + "learning_rate": 6.098061210195351e-06, + "loss": 0.7773, + "step": 22311 + }, + { + "epoch": 0.44665315416760504, + "grad_norm": 1.114113450050354, + "learning_rate": 6.097744939085547e-06, + "loss": 0.3023, + "step": 22312 + }, + { + "epoch": 0.4466731726847334, + "grad_norm": 2.1617894172668457, + "learning_rate": 6.097428663361061e-06, + "loss": 0.8375, + "step": 22313 + }, + { + "epoch": 0.44669319120186174, + "grad_norm": 1.0686256885528564, + "learning_rate": 6.097112383023221e-06, + "loss": 0.3463, + "step": 22314 + }, + { + "epoch": 0.4467132097189901, + "grad_norm": 1.1773643493652344, + "learning_rate": 6.096796098073356e-06, + "loss": 0.304, + "step": 22315 + }, + { + "epoch": 0.4467332282361184, + "grad_norm": 1.1851167678833008, + "learning_rate": 6.096479808512797e-06, + "loss": 0.3215, + "step": 22316 + }, + { + "epoch": 0.44675324675324674, + "grad_norm": 1.0922000408172607, + "learning_rate": 6.096163514342872e-06, + "loss": 0.2884, + "step": 22317 + }, + { + "epoch": 0.4467732652703751, + "grad_norm": 1.169136881828308, + "learning_rate": 6.095847215564912e-06, + "loss": 0.3684, + "step": 22318 + }, + { + "epoch": 0.44679328378750344, + "grad_norm": 1.2885215282440186, + "learning_rate": 6.095530912180245e-06, + "loss": 0.3324, + "step": 22319 + }, + { + "epoch": 0.4468133023046318, + "grad_norm": 0.9619719982147217, + "learning_rate": 6.0952146041902035e-06, + "loss": 0.2966, + "step": 22320 + }, + { + "epoch": 0.44683332082176014, + "grad_norm": 1.0369054079055786, + "learning_rate": 6.094898291596115e-06, + "loss": 0.2891, + "step": 22321 + }, + { + "epoch": 0.4468533393388885, + "grad_norm": 1.2027381658554077, + "learning_rate": 6.094581974399311e-06, + "loss": 0.3269, + "step": 22322 + }, + { + "epoch": 0.44687335785601684, + "grad_norm": 1.0003018379211426, + "learning_rate": 6.094265652601117e-06, + "loss": 0.3381, + "step": 22323 + }, + { + "epoch": 0.44689337637314513, + "grad_norm": 1.1388789415359497, + "learning_rate": 6.093949326202867e-06, + "loss": 0.2972, + "step": 22324 + }, + { + "epoch": 0.4469133948902735, + "grad_norm": 1.0343327522277832, + "learning_rate": 6.093632995205889e-06, + "loss": 0.3108, + "step": 22325 + }, + { + "epoch": 0.44693341340740184, + "grad_norm": 1.0051240921020508, + "learning_rate": 6.093316659611515e-06, + "loss": 0.3121, + "step": 22326 + }, + { + "epoch": 0.4469534319245302, + "grad_norm": 1.1081598997116089, + "learning_rate": 6.093000319421072e-06, + "loss": 0.3023, + "step": 22327 + }, + { + "epoch": 0.44697345044165854, + "grad_norm": 1.1174606084823608, + "learning_rate": 6.09268397463589e-06, + "loss": 0.3254, + "step": 22328 + }, + { + "epoch": 0.4469934689587869, + "grad_norm": 1.2825253009796143, + "learning_rate": 6.092367625257301e-06, + "loss": 0.3352, + "step": 22329 + }, + { + "epoch": 0.44701348747591524, + "grad_norm": 1.223207950592041, + "learning_rate": 6.092051271286632e-06, + "loss": 0.3103, + "step": 22330 + }, + { + "epoch": 0.4470335059930436, + "grad_norm": 1.062013864517212, + "learning_rate": 6.091734912725217e-06, + "loss": 0.3313, + "step": 22331 + }, + { + "epoch": 0.4470535245101719, + "grad_norm": 1.138838529586792, + "learning_rate": 6.091418549574381e-06, + "loss": 0.3615, + "step": 22332 + }, + { + "epoch": 0.44707354302730024, + "grad_norm": 1.0626779794692993, + "learning_rate": 6.091102181835458e-06, + "loss": 0.331, + "step": 22333 + }, + { + "epoch": 0.4470935615444286, + "grad_norm": 1.1616071462631226, + "learning_rate": 6.090785809509775e-06, + "loss": 0.2979, + "step": 22334 + }, + { + "epoch": 0.44711358006155694, + "grad_norm": 1.094664454460144, + "learning_rate": 6.090469432598664e-06, + "loss": 0.3739, + "step": 22335 + }, + { + "epoch": 0.4471335985786853, + "grad_norm": 1.1423165798187256, + "learning_rate": 6.090153051103452e-06, + "loss": 0.3391, + "step": 22336 + }, + { + "epoch": 0.44715361709581364, + "grad_norm": 1.1516733169555664, + "learning_rate": 6.089836665025474e-06, + "loss": 0.3464, + "step": 22337 + }, + { + "epoch": 0.447173635612942, + "grad_norm": 1.0833545923233032, + "learning_rate": 6.089520274366057e-06, + "loss": 0.3445, + "step": 22338 + }, + { + "epoch": 0.44719365413007034, + "grad_norm": 1.0893534421920776, + "learning_rate": 6.089203879126529e-06, + "loss": 0.3145, + "step": 22339 + }, + { + "epoch": 0.44721367264719863, + "grad_norm": 1.0938199758529663, + "learning_rate": 6.088887479308225e-06, + "loss": 0.3039, + "step": 22340 + }, + { + "epoch": 0.447233691164327, + "grad_norm": 1.186503529548645, + "learning_rate": 6.088571074912471e-06, + "loss": 0.3091, + "step": 22341 + }, + { + "epoch": 0.44725370968145534, + "grad_norm": 1.1475625038146973, + "learning_rate": 6.088254665940597e-06, + "loss": 0.3113, + "step": 22342 + }, + { + "epoch": 0.4472737281985837, + "grad_norm": 1.7618376016616821, + "learning_rate": 6.0879382523939364e-06, + "loss": 0.8287, + "step": 22343 + }, + { + "epoch": 0.44729374671571204, + "grad_norm": 1.0682750940322876, + "learning_rate": 6.087621834273818e-06, + "loss": 0.319, + "step": 22344 + }, + { + "epoch": 0.4473137652328404, + "grad_norm": 1.1031192541122437, + "learning_rate": 6.08730541158157e-06, + "loss": 0.3717, + "step": 22345 + }, + { + "epoch": 0.44733378374996874, + "grad_norm": 1.1917630434036255, + "learning_rate": 6.086988984318524e-06, + "loss": 0.3329, + "step": 22346 + }, + { + "epoch": 0.4473538022670971, + "grad_norm": 0.9624899625778198, + "learning_rate": 6.086672552486012e-06, + "loss": 0.256, + "step": 22347 + }, + { + "epoch": 0.4473738207842254, + "grad_norm": 1.121698021888733, + "learning_rate": 6.086356116085363e-06, + "loss": 0.3065, + "step": 22348 + }, + { + "epoch": 0.44739383930135374, + "grad_norm": 1.87131929397583, + "learning_rate": 6.086039675117903e-06, + "loss": 0.7352, + "step": 22349 + }, + { + "epoch": 0.4474138578184821, + "grad_norm": 1.1204863786697388, + "learning_rate": 6.085723229584968e-06, + "loss": 0.3492, + "step": 22350 + }, + { + "epoch": 0.44743387633561044, + "grad_norm": 1.0606694221496582, + "learning_rate": 6.085406779487887e-06, + "loss": 0.3239, + "step": 22351 + }, + { + "epoch": 0.4474538948527388, + "grad_norm": 1.1219888925552368, + "learning_rate": 6.085090324827988e-06, + "loss": 0.2998, + "step": 22352 + }, + { + "epoch": 0.44747391336986714, + "grad_norm": 1.128798007965088, + "learning_rate": 6.0847738656066045e-06, + "loss": 0.3254, + "step": 22353 + }, + { + "epoch": 0.4474939318869955, + "grad_norm": 1.091981291770935, + "learning_rate": 6.084457401825063e-06, + "loss": 0.3418, + "step": 22354 + }, + { + "epoch": 0.44751395040412384, + "grad_norm": 1.0367538928985596, + "learning_rate": 6.084140933484697e-06, + "loss": 0.3155, + "step": 22355 + }, + { + "epoch": 0.44753396892125213, + "grad_norm": 1.1749608516693115, + "learning_rate": 6.083824460586836e-06, + "loss": 0.3087, + "step": 22356 + }, + { + "epoch": 0.4475539874383805, + "grad_norm": 1.0028810501098633, + "learning_rate": 6.083507983132811e-06, + "loss": 0.3071, + "step": 22357 + }, + { + "epoch": 0.44757400595550884, + "grad_norm": 1.0791600942611694, + "learning_rate": 6.083191501123951e-06, + "loss": 0.3585, + "step": 22358 + }, + { + "epoch": 0.4475940244726372, + "grad_norm": 1.1106082201004028, + "learning_rate": 6.082875014561585e-06, + "loss": 0.2905, + "step": 22359 + }, + { + "epoch": 0.44761404298976554, + "grad_norm": 1.034993052482605, + "learning_rate": 6.082558523447048e-06, + "loss": 0.3161, + "step": 22360 + }, + { + "epoch": 0.4476340615068939, + "grad_norm": 1.2328370809555054, + "learning_rate": 6.0822420277816675e-06, + "loss": 0.3212, + "step": 22361 + }, + { + "epoch": 0.44765408002402224, + "grad_norm": 1.0141111612319946, + "learning_rate": 6.081925527566774e-06, + "loss": 0.3472, + "step": 22362 + }, + { + "epoch": 0.4476740985411506, + "grad_norm": 1.8412622213363647, + "learning_rate": 6.081609022803697e-06, + "loss": 0.7602, + "step": 22363 + }, + { + "epoch": 0.4476941170582789, + "grad_norm": 1.0276435613632202, + "learning_rate": 6.08129251349377e-06, + "loss": 0.2818, + "step": 22364 + }, + { + "epoch": 0.44771413557540723, + "grad_norm": 1.0867300033569336, + "learning_rate": 6.080975999638322e-06, + "loss": 0.3164, + "step": 22365 + }, + { + "epoch": 0.4477341540925356, + "grad_norm": 1.0840030908584595, + "learning_rate": 6.0806594812386845e-06, + "loss": 0.2981, + "step": 22366 + }, + { + "epoch": 0.44775417260966394, + "grad_norm": 1.0994415283203125, + "learning_rate": 6.080342958296186e-06, + "loss": 0.2966, + "step": 22367 + }, + { + "epoch": 0.4477741911267923, + "grad_norm": 1.1065833568572998, + "learning_rate": 6.080026430812157e-06, + "loss": 0.3479, + "step": 22368 + }, + { + "epoch": 0.44779420964392064, + "grad_norm": 1.8532147407531738, + "learning_rate": 6.079709898787931e-06, + "loss": 0.8684, + "step": 22369 + }, + { + "epoch": 0.447814228161049, + "grad_norm": 1.0429083108901978, + "learning_rate": 6.079393362224837e-06, + "loss": 0.3079, + "step": 22370 + }, + { + "epoch": 0.44783424667817734, + "grad_norm": 1.1838114261627197, + "learning_rate": 6.079076821124206e-06, + "loss": 0.378, + "step": 22371 + }, + { + "epoch": 0.44785426519530563, + "grad_norm": 1.0935232639312744, + "learning_rate": 6.078760275487367e-06, + "loss": 0.3177, + "step": 22372 + }, + { + "epoch": 0.447874283712434, + "grad_norm": 1.2166128158569336, + "learning_rate": 6.078443725315652e-06, + "loss": 0.3068, + "step": 22373 + }, + { + "epoch": 0.44789430222956234, + "grad_norm": 1.9047470092773438, + "learning_rate": 6.078127170610392e-06, + "loss": 0.8302, + "step": 22374 + }, + { + "epoch": 0.4479143207466907, + "grad_norm": 1.1958260536193848, + "learning_rate": 6.0778106113729165e-06, + "loss": 0.3303, + "step": 22375 + }, + { + "epoch": 0.44793433926381904, + "grad_norm": 0.9969249367713928, + "learning_rate": 6.077494047604557e-06, + "loss": 0.283, + "step": 22376 + }, + { + "epoch": 0.4479543577809474, + "grad_norm": 1.055462121963501, + "learning_rate": 6.0771774793066464e-06, + "loss": 0.3202, + "step": 22377 + }, + { + "epoch": 0.44797437629807574, + "grad_norm": 1.1270242929458618, + "learning_rate": 6.076860906480512e-06, + "loss": 0.3426, + "step": 22378 + }, + { + "epoch": 0.4479943948152041, + "grad_norm": 1.055063247680664, + "learning_rate": 6.076544329127487e-06, + "loss": 0.2749, + "step": 22379 + }, + { + "epoch": 0.4480144133323324, + "grad_norm": 1.2043110132217407, + "learning_rate": 6.076227747248901e-06, + "loss": 0.3463, + "step": 22380 + }, + { + "epoch": 0.44803443184946073, + "grad_norm": 1.0387505292892456, + "learning_rate": 6.075911160846084e-06, + "loss": 0.2994, + "step": 22381 + }, + { + "epoch": 0.4480544503665891, + "grad_norm": 1.1003977060317993, + "learning_rate": 6.07559456992037e-06, + "loss": 0.2737, + "step": 22382 + }, + { + "epoch": 0.44807446888371744, + "grad_norm": 1.0458818674087524, + "learning_rate": 6.075277974473086e-06, + "loss": 0.296, + "step": 22383 + }, + { + "epoch": 0.4480944874008458, + "grad_norm": 1.0230435132980347, + "learning_rate": 6.074961374505567e-06, + "loss": 0.2989, + "step": 22384 + }, + { + "epoch": 0.44811450591797414, + "grad_norm": 1.1367771625518799, + "learning_rate": 6.074644770019139e-06, + "loss": 0.3029, + "step": 22385 + }, + { + "epoch": 0.4481345244351025, + "grad_norm": 1.1019798517227173, + "learning_rate": 6.074328161015139e-06, + "loss": 0.308, + "step": 22386 + }, + { + "epoch": 0.44815454295223084, + "grad_norm": 1.0557641983032227, + "learning_rate": 6.074011547494891e-06, + "loss": 0.2882, + "step": 22387 + }, + { + "epoch": 0.44817456146935913, + "grad_norm": 1.2959649562835693, + "learning_rate": 6.073694929459731e-06, + "loss": 0.2691, + "step": 22388 + }, + { + "epoch": 0.4481945799864875, + "grad_norm": 1.1446925401687622, + "learning_rate": 6.07337830691099e-06, + "loss": 0.3277, + "step": 22389 + }, + { + "epoch": 0.44821459850361584, + "grad_norm": 1.182990312576294, + "learning_rate": 6.073061679849997e-06, + "loss": 0.3537, + "step": 22390 + }, + { + "epoch": 0.4482346170207442, + "grad_norm": 1.1712825298309326, + "learning_rate": 6.072745048278083e-06, + "loss": 0.3369, + "step": 22391 + }, + { + "epoch": 0.44825463553787254, + "grad_norm": 0.9913872480392456, + "learning_rate": 6.072428412196579e-06, + "loss": 0.3114, + "step": 22392 + }, + { + "epoch": 0.4482746540550009, + "grad_norm": 1.1523102521896362, + "learning_rate": 6.072111771606816e-06, + "loss": 0.3651, + "step": 22393 + }, + { + "epoch": 0.44829467257212924, + "grad_norm": 1.2645272016525269, + "learning_rate": 6.0717951265101285e-06, + "loss": 0.3241, + "step": 22394 + }, + { + "epoch": 0.4483146910892576, + "grad_norm": 1.0498735904693604, + "learning_rate": 6.071478476907844e-06, + "loss": 0.3142, + "step": 22395 + }, + { + "epoch": 0.4483347096063859, + "grad_norm": 1.3338415622711182, + "learning_rate": 6.071161822801293e-06, + "loss": 0.3275, + "step": 22396 + }, + { + "epoch": 0.44835472812351423, + "grad_norm": 1.0411876440048218, + "learning_rate": 6.070845164191811e-06, + "loss": 0.2796, + "step": 22397 + }, + { + "epoch": 0.4483747466406426, + "grad_norm": 1.0837242603302002, + "learning_rate": 6.0705285010807245e-06, + "loss": 0.2967, + "step": 22398 + }, + { + "epoch": 0.44839476515777094, + "grad_norm": 1.0063449144363403, + "learning_rate": 6.070211833469367e-06, + "loss": 0.3287, + "step": 22399 + }, + { + "epoch": 0.4484147836748993, + "grad_norm": 1.0442349910736084, + "learning_rate": 6.06989516135907e-06, + "loss": 0.2655, + "step": 22400 + }, + { + "epoch": 0.44843480219202764, + "grad_norm": 1.1640204191207886, + "learning_rate": 6.069578484751162e-06, + "loss": 0.3344, + "step": 22401 + }, + { + "epoch": 0.448454820709156, + "grad_norm": 1.1307703256607056, + "learning_rate": 6.069261803646978e-06, + "loss": 0.3147, + "step": 22402 + }, + { + "epoch": 0.44847483922628434, + "grad_norm": 1.0346791744232178, + "learning_rate": 6.068945118047846e-06, + "loss": 0.336, + "step": 22403 + }, + { + "epoch": 0.44849485774341263, + "grad_norm": 1.062182903289795, + "learning_rate": 6.068628427955101e-06, + "loss": 0.3357, + "step": 22404 + }, + { + "epoch": 0.448514876260541, + "grad_norm": 1.0776060819625854, + "learning_rate": 6.0683117333700705e-06, + "loss": 0.292, + "step": 22405 + }, + { + "epoch": 0.44853489477766934, + "grad_norm": 1.1667518615722656, + "learning_rate": 6.067995034294087e-06, + "loss": 0.3025, + "step": 22406 + }, + { + "epoch": 0.4485549132947977, + "grad_norm": 1.126808762550354, + "learning_rate": 6.0676783307284825e-06, + "loss": 0.3311, + "step": 22407 + }, + { + "epoch": 0.44857493181192604, + "grad_norm": 1.340714454650879, + "learning_rate": 6.067361622674589e-06, + "loss": 0.3239, + "step": 22408 + }, + { + "epoch": 0.4485949503290544, + "grad_norm": 1.0815370082855225, + "learning_rate": 6.067044910133736e-06, + "loss": 0.2884, + "step": 22409 + }, + { + "epoch": 0.44861496884618274, + "grad_norm": 1.082919716835022, + "learning_rate": 6.066728193107256e-06, + "loss": 0.2897, + "step": 22410 + }, + { + "epoch": 0.4486349873633111, + "grad_norm": 1.911756992340088, + "learning_rate": 6.066411471596481e-06, + "loss": 0.8947, + "step": 22411 + }, + { + "epoch": 0.4486550058804394, + "grad_norm": 1.0341291427612305, + "learning_rate": 6.066094745602742e-06, + "loss": 0.3395, + "step": 22412 + }, + { + "epoch": 0.44867502439756773, + "grad_norm": 1.1409331560134888, + "learning_rate": 6.065778015127368e-06, + "loss": 0.318, + "step": 22413 + }, + { + "epoch": 0.4486950429146961, + "grad_norm": 1.9442938566207886, + "learning_rate": 6.0654612801716935e-06, + "loss": 0.8272, + "step": 22414 + }, + { + "epoch": 0.44871506143182444, + "grad_norm": 1.043050765991211, + "learning_rate": 6.06514454073705e-06, + "loss": 0.3102, + "step": 22415 + }, + { + "epoch": 0.4487350799489528, + "grad_norm": 1.2846543788909912, + "learning_rate": 6.064827796824767e-06, + "loss": 0.3467, + "step": 22416 + }, + { + "epoch": 0.44875509846608114, + "grad_norm": 1.105373740196228, + "learning_rate": 6.064511048436178e-06, + "loss": 0.2891, + "step": 22417 + }, + { + "epoch": 0.4487751169832095, + "grad_norm": 1.2539206743240356, + "learning_rate": 6.064194295572613e-06, + "loss": 0.302, + "step": 22418 + }, + { + "epoch": 0.44879513550033784, + "grad_norm": 1.0709956884384155, + "learning_rate": 6.063877538235404e-06, + "loss": 0.3154, + "step": 22419 + }, + { + "epoch": 0.44881515401746613, + "grad_norm": 1.1406148672103882, + "learning_rate": 6.063560776425884e-06, + "loss": 0.2832, + "step": 22420 + }, + { + "epoch": 0.4488351725345945, + "grad_norm": 1.0839098691940308, + "learning_rate": 6.063244010145381e-06, + "loss": 0.3183, + "step": 22421 + }, + { + "epoch": 0.44885519105172283, + "grad_norm": 1.2624748945236206, + "learning_rate": 6.062927239395232e-06, + "loss": 0.3069, + "step": 22422 + }, + { + "epoch": 0.4488752095688512, + "grad_norm": 0.9847859144210815, + "learning_rate": 6.062610464176764e-06, + "loss": 0.2849, + "step": 22423 + }, + { + "epoch": 0.44889522808597954, + "grad_norm": 1.0655919313430786, + "learning_rate": 6.06229368449131e-06, + "loss": 0.2806, + "step": 22424 + }, + { + "epoch": 0.4489152466031079, + "grad_norm": 1.1316760778427124, + "learning_rate": 6.0619769003402025e-06, + "loss": 0.3356, + "step": 22425 + }, + { + "epoch": 0.44893526512023624, + "grad_norm": 1.1906750202178955, + "learning_rate": 6.061660111724772e-06, + "loss": 0.3419, + "step": 22426 + }, + { + "epoch": 0.4489552836373646, + "grad_norm": 1.216407299041748, + "learning_rate": 6.061343318646351e-06, + "loss": 0.3406, + "step": 22427 + }, + { + "epoch": 0.4489753021544929, + "grad_norm": 1.0925770998001099, + "learning_rate": 6.061026521106272e-06, + "loss": 0.3103, + "step": 22428 + }, + { + "epoch": 0.44899532067162123, + "grad_norm": 1.2450823783874512, + "learning_rate": 6.060709719105865e-06, + "loss": 0.3525, + "step": 22429 + }, + { + "epoch": 0.4490153391887496, + "grad_norm": 0.9749270677566528, + "learning_rate": 6.060392912646463e-06, + "loss": 0.343, + "step": 22430 + }, + { + "epoch": 0.44903535770587794, + "grad_norm": 1.0980619192123413, + "learning_rate": 6.060076101729397e-06, + "loss": 0.3218, + "step": 22431 + }, + { + "epoch": 0.4490553762230063, + "grad_norm": 1.0518152713775635, + "learning_rate": 6.059759286355999e-06, + "loss": 0.2906, + "step": 22432 + }, + { + "epoch": 0.44907539474013464, + "grad_norm": 1.1190299987792969, + "learning_rate": 6.059442466527603e-06, + "loss": 0.3393, + "step": 22433 + }, + { + "epoch": 0.449095413257263, + "grad_norm": 1.0647499561309814, + "learning_rate": 6.059125642245537e-06, + "loss": 0.2885, + "step": 22434 + }, + { + "epoch": 0.44911543177439134, + "grad_norm": 1.0235754251480103, + "learning_rate": 6.058808813511135e-06, + "loss": 0.3259, + "step": 22435 + }, + { + "epoch": 0.44913545029151963, + "grad_norm": 1.2603827714920044, + "learning_rate": 6.058491980325729e-06, + "loss": 0.2876, + "step": 22436 + }, + { + "epoch": 0.449155468808648, + "grad_norm": 1.2123925685882568, + "learning_rate": 6.058175142690651e-06, + "loss": 0.3197, + "step": 22437 + }, + { + "epoch": 0.44917548732577633, + "grad_norm": 1.0965687036514282, + "learning_rate": 6.057858300607231e-06, + "loss": 0.2805, + "step": 22438 + }, + { + "epoch": 0.4491955058429047, + "grad_norm": 1.0817780494689941, + "learning_rate": 6.057541454076803e-06, + "loss": 0.3251, + "step": 22439 + }, + { + "epoch": 0.44921552436003304, + "grad_norm": 1.0466331243515015, + "learning_rate": 6.057224603100698e-06, + "loss": 0.2995, + "step": 22440 + }, + { + "epoch": 0.4492355428771614, + "grad_norm": 1.0865825414657593, + "learning_rate": 6.0569077476802494e-06, + "loss": 0.2492, + "step": 22441 + }, + { + "epoch": 0.44925556139428974, + "grad_norm": 1.0221670866012573, + "learning_rate": 6.056590887816788e-06, + "loss": 0.3145, + "step": 22442 + }, + { + "epoch": 0.4492755799114181, + "grad_norm": 1.101575255393982, + "learning_rate": 6.0562740235116446e-06, + "loss": 0.3092, + "step": 22443 + }, + { + "epoch": 0.4492955984285464, + "grad_norm": 1.210998773574829, + "learning_rate": 6.055957154766152e-06, + "loss": 0.2809, + "step": 22444 + }, + { + "epoch": 0.44931561694567473, + "grad_norm": 1.1094157695770264, + "learning_rate": 6.055640281581644e-06, + "loss": 0.3353, + "step": 22445 + }, + { + "epoch": 0.4493356354628031, + "grad_norm": 1.1116505861282349, + "learning_rate": 6.055323403959453e-06, + "loss": 0.2768, + "step": 22446 + }, + { + "epoch": 0.44935565397993144, + "grad_norm": 2.034712314605713, + "learning_rate": 6.055006521900906e-06, + "loss": 0.7991, + "step": 22447 + }, + { + "epoch": 0.4493756724970598, + "grad_norm": 1.0547276735305786, + "learning_rate": 6.054689635407341e-06, + "loss": 0.2936, + "step": 22448 + }, + { + "epoch": 0.44939569101418814, + "grad_norm": 1.0741623640060425, + "learning_rate": 6.054372744480087e-06, + "loss": 0.2542, + "step": 22449 + }, + { + "epoch": 0.4494157095313165, + "grad_norm": 1.1444449424743652, + "learning_rate": 6.054055849120478e-06, + "loss": 0.3106, + "step": 22450 + }, + { + "epoch": 0.44943572804844484, + "grad_norm": 1.0894153118133545, + "learning_rate": 6.053738949329844e-06, + "loss": 0.3222, + "step": 22451 + }, + { + "epoch": 0.44945574656557313, + "grad_norm": 1.2360916137695312, + "learning_rate": 6.053422045109517e-06, + "loss": 0.2904, + "step": 22452 + }, + { + "epoch": 0.4494757650827015, + "grad_norm": 1.102912187576294, + "learning_rate": 6.053105136460833e-06, + "loss": 0.2592, + "step": 22453 + }, + { + "epoch": 0.44949578359982983, + "grad_norm": 1.1716678142547607, + "learning_rate": 6.05278822338512e-06, + "loss": 0.3216, + "step": 22454 + }, + { + "epoch": 0.4495158021169582, + "grad_norm": 1.091932773590088, + "learning_rate": 6.0524713058837125e-06, + "loss": 0.2951, + "step": 22455 + }, + { + "epoch": 0.44953582063408654, + "grad_norm": 1.1780683994293213, + "learning_rate": 6.052154383957942e-06, + "loss": 0.3099, + "step": 22456 + }, + { + "epoch": 0.4495558391512149, + "grad_norm": 1.1256030797958374, + "learning_rate": 6.0518374576091396e-06, + "loss": 0.3833, + "step": 22457 + }, + { + "epoch": 0.44957585766834324, + "grad_norm": 1.0810747146606445, + "learning_rate": 6.05152052683864e-06, + "loss": 0.3089, + "step": 22458 + }, + { + "epoch": 0.4495958761854716, + "grad_norm": 1.0349924564361572, + "learning_rate": 6.051203591647775e-06, + "loss": 0.3373, + "step": 22459 + }, + { + "epoch": 0.4496158947025999, + "grad_norm": 1.1712677478790283, + "learning_rate": 6.050886652037876e-06, + "loss": 0.3618, + "step": 22460 + }, + { + "epoch": 0.44963591321972823, + "grad_norm": 1.2890855073928833, + "learning_rate": 6.050569708010275e-06, + "loss": 0.3292, + "step": 22461 + }, + { + "epoch": 0.4496559317368566, + "grad_norm": 1.9523013830184937, + "learning_rate": 6.050252759566307e-06, + "loss": 0.7797, + "step": 22462 + }, + { + "epoch": 0.44967595025398494, + "grad_norm": 1.2465485334396362, + "learning_rate": 6.049935806707301e-06, + "loss": 0.3149, + "step": 22463 + }, + { + "epoch": 0.4496959687711133, + "grad_norm": 1.0826302766799927, + "learning_rate": 6.0496188494345895e-06, + "loss": 0.3111, + "step": 22464 + }, + { + "epoch": 0.44971598728824164, + "grad_norm": 1.0821315050125122, + "learning_rate": 6.049301887749507e-06, + "loss": 0.2908, + "step": 22465 + }, + { + "epoch": 0.44973600580537, + "grad_norm": 1.2177761793136597, + "learning_rate": 6.048984921653387e-06, + "loss": 0.3673, + "step": 22466 + }, + { + "epoch": 0.44975602432249834, + "grad_norm": 1.0721176862716675, + "learning_rate": 6.048667951147558e-06, + "loss": 0.3484, + "step": 22467 + }, + { + "epoch": 0.44977604283962663, + "grad_norm": 1.0995553731918335, + "learning_rate": 6.0483509762333555e-06, + "loss": 0.3601, + "step": 22468 + }, + { + "epoch": 0.449796061356755, + "grad_norm": 1.174930453300476, + "learning_rate": 6.048033996912112e-06, + "loss": 0.3128, + "step": 22469 + }, + { + "epoch": 0.44981607987388333, + "grad_norm": 1.823129415512085, + "learning_rate": 6.047717013185157e-06, + "loss": 0.8102, + "step": 22470 + }, + { + "epoch": 0.4498360983910117, + "grad_norm": 1.1596537828445435, + "learning_rate": 6.047400025053826e-06, + "loss": 0.3002, + "step": 22471 + }, + { + "epoch": 0.44985611690814004, + "grad_norm": 1.209168791770935, + "learning_rate": 6.047083032519452e-06, + "loss": 0.2943, + "step": 22472 + }, + { + "epoch": 0.4498761354252684, + "grad_norm": 1.0821894407272339, + "learning_rate": 6.046766035583364e-06, + "loss": 0.322, + "step": 22473 + }, + { + "epoch": 0.44989615394239674, + "grad_norm": 0.9958720207214355, + "learning_rate": 6.046449034246898e-06, + "loss": 0.2916, + "step": 22474 + }, + { + "epoch": 0.4499161724595251, + "grad_norm": 1.0963356494903564, + "learning_rate": 6.046132028511387e-06, + "loss": 0.2749, + "step": 22475 + }, + { + "epoch": 0.4499361909766534, + "grad_norm": 1.898795485496521, + "learning_rate": 6.04581501837816e-06, + "loss": 0.817, + "step": 22476 + }, + { + "epoch": 0.44995620949378173, + "grad_norm": 1.1580978631973267, + "learning_rate": 6.045498003848552e-06, + "loss": 0.2836, + "step": 22477 + }, + { + "epoch": 0.4499762280109101, + "grad_norm": 1.2134782075881958, + "learning_rate": 6.045180984923895e-06, + "loss": 0.3707, + "step": 22478 + }, + { + "epoch": 0.44999624652803843, + "grad_norm": 1.1270966529846191, + "learning_rate": 6.044863961605524e-06, + "loss": 0.2982, + "step": 22479 + }, + { + "epoch": 0.4500162650451668, + "grad_norm": 1.158355474472046, + "learning_rate": 6.0445469338947685e-06, + "loss": 0.3681, + "step": 22480 + }, + { + "epoch": 0.45003628356229514, + "grad_norm": 1.7585599422454834, + "learning_rate": 6.044229901792963e-06, + "loss": 0.7974, + "step": 22481 + }, + { + "epoch": 0.4500563020794235, + "grad_norm": 1.0423330068588257, + "learning_rate": 6.043912865301438e-06, + "loss": 0.3111, + "step": 22482 + }, + { + "epoch": 0.45007632059655184, + "grad_norm": 1.3239003419876099, + "learning_rate": 6.043595824421529e-06, + "loss": 0.3391, + "step": 22483 + }, + { + "epoch": 0.45009633911368013, + "grad_norm": 1.058052659034729, + "learning_rate": 6.043278779154569e-06, + "loss": 0.2638, + "step": 22484 + }, + { + "epoch": 0.4501163576308085, + "grad_norm": 1.1171144247055054, + "learning_rate": 6.042961729501888e-06, + "loss": 0.2887, + "step": 22485 + }, + { + "epoch": 0.45013637614793683, + "grad_norm": 1.2548145055770874, + "learning_rate": 6.042644675464822e-06, + "loss": 0.2717, + "step": 22486 + }, + { + "epoch": 0.4501563946650652, + "grad_norm": 1.1306229829788208, + "learning_rate": 6.0423276170447e-06, + "loss": 0.3233, + "step": 22487 + }, + { + "epoch": 0.45017641318219354, + "grad_norm": 2.1408278942108154, + "learning_rate": 6.042010554242859e-06, + "loss": 0.7641, + "step": 22488 + }, + { + "epoch": 0.4501964316993219, + "grad_norm": 1.0700935125350952, + "learning_rate": 6.041693487060628e-06, + "loss": 0.2941, + "step": 22489 + }, + { + "epoch": 0.45021645021645024, + "grad_norm": 1.1420913934707642, + "learning_rate": 6.041376415499343e-06, + "loss": 0.3239, + "step": 22490 + }, + { + "epoch": 0.4502364687335786, + "grad_norm": 1.1152466535568237, + "learning_rate": 6.041059339560333e-06, + "loss": 0.2937, + "step": 22491 + }, + { + "epoch": 0.4502564872507069, + "grad_norm": 1.1329760551452637, + "learning_rate": 6.040742259244937e-06, + "loss": 0.2605, + "step": 22492 + }, + { + "epoch": 0.45027650576783523, + "grad_norm": 1.0548878908157349, + "learning_rate": 6.040425174554485e-06, + "loss": 0.2628, + "step": 22493 + }, + { + "epoch": 0.4502965242849636, + "grad_norm": 1.4294648170471191, + "learning_rate": 6.040108085490307e-06, + "loss": 0.3472, + "step": 22494 + }, + { + "epoch": 0.45031654280209193, + "grad_norm": 1.1179827451705933, + "learning_rate": 6.039790992053739e-06, + "loss": 0.325, + "step": 22495 + }, + { + "epoch": 0.4503365613192203, + "grad_norm": 1.0194711685180664, + "learning_rate": 6.039473894246113e-06, + "loss": 0.2845, + "step": 22496 + }, + { + "epoch": 0.45035657983634864, + "grad_norm": 1.173492193222046, + "learning_rate": 6.039156792068764e-06, + "loss": 0.2976, + "step": 22497 + }, + { + "epoch": 0.450376598353477, + "grad_norm": 1.1338636875152588, + "learning_rate": 6.038839685523022e-06, + "loss": 0.2986, + "step": 22498 + }, + { + "epoch": 0.45039661687060534, + "grad_norm": 1.2278516292572021, + "learning_rate": 6.038522574610223e-06, + "loss": 0.3524, + "step": 22499 + }, + { + "epoch": 0.45041663538773363, + "grad_norm": 1.1930471658706665, + "learning_rate": 6.038205459331698e-06, + "loss": 0.3113, + "step": 22500 + }, + { + "epoch": 0.450436653904862, + "grad_norm": 1.1167913675308228, + "learning_rate": 6.03788833968878e-06, + "loss": 0.2958, + "step": 22501 + }, + { + "epoch": 0.45045667242199033, + "grad_norm": 1.1738426685333252, + "learning_rate": 6.0375712156828025e-06, + "loss": 0.3075, + "step": 22502 + }, + { + "epoch": 0.4504766909391187, + "grad_norm": 1.0837057828903198, + "learning_rate": 6.037254087315099e-06, + "loss": 0.2901, + "step": 22503 + }, + { + "epoch": 0.45049670945624704, + "grad_norm": 1.0349482297897339, + "learning_rate": 6.036936954587005e-06, + "loss": 0.3072, + "step": 22504 + }, + { + "epoch": 0.4505167279733754, + "grad_norm": 1.1732932329177856, + "learning_rate": 6.036619817499848e-06, + "loss": 0.2942, + "step": 22505 + }, + { + "epoch": 0.45053674649050374, + "grad_norm": 1.893498420715332, + "learning_rate": 6.036302676054967e-06, + "loss": 0.8196, + "step": 22506 + }, + { + "epoch": 0.4505567650076321, + "grad_norm": 1.9661996364593506, + "learning_rate": 6.035985530253691e-06, + "loss": 0.8335, + "step": 22507 + }, + { + "epoch": 0.4505767835247604, + "grad_norm": 1.2783203125, + "learning_rate": 6.0356683800973535e-06, + "loss": 0.3075, + "step": 22508 + }, + { + "epoch": 0.45059680204188873, + "grad_norm": 1.2155381441116333, + "learning_rate": 6.0353512255872906e-06, + "loss": 0.2868, + "step": 22509 + }, + { + "epoch": 0.4506168205590171, + "grad_norm": 1.250253677368164, + "learning_rate": 6.035034066724834e-06, + "loss": 0.3189, + "step": 22510 + }, + { + "epoch": 0.45063683907614543, + "grad_norm": 1.0015814304351807, + "learning_rate": 6.034716903511317e-06, + "loss": 0.3097, + "step": 22511 + }, + { + "epoch": 0.4506568575932738, + "grad_norm": 1.050694465637207, + "learning_rate": 6.034399735948074e-06, + "loss": 0.3045, + "step": 22512 + }, + { + "epoch": 0.45067687611040214, + "grad_norm": 1.0384271144866943, + "learning_rate": 6.0340825640364345e-06, + "loss": 0.3256, + "step": 22513 + }, + { + "epoch": 0.4506968946275305, + "grad_norm": 1.912233829498291, + "learning_rate": 6.033765387777737e-06, + "loss": 0.7953, + "step": 22514 + }, + { + "epoch": 0.4507169131446588, + "grad_norm": 0.9844310283660889, + "learning_rate": 6.03344820717331e-06, + "loss": 0.2764, + "step": 22515 + }, + { + "epoch": 0.45073693166178713, + "grad_norm": 1.1352142095565796, + "learning_rate": 6.03313102222449e-06, + "loss": 0.3614, + "step": 22516 + }, + { + "epoch": 0.4507569501789155, + "grad_norm": 1.0622812509536743, + "learning_rate": 6.032813832932611e-06, + "loss": 0.3155, + "step": 22517 + }, + { + "epoch": 0.45077696869604383, + "grad_norm": 1.1932095289230347, + "learning_rate": 6.032496639299002e-06, + "loss": 0.3656, + "step": 22518 + }, + { + "epoch": 0.4507969872131722, + "grad_norm": 1.1153695583343506, + "learning_rate": 6.032179441325001e-06, + "loss": 0.2813, + "step": 22519 + }, + { + "epoch": 0.45081700573030054, + "grad_norm": 1.125672459602356, + "learning_rate": 6.0318622390119395e-06, + "loss": 0.3133, + "step": 22520 + }, + { + "epoch": 0.4508370242474289, + "grad_norm": 1.1445560455322266, + "learning_rate": 6.031545032361152e-06, + "loss": 0.3248, + "step": 22521 + }, + { + "epoch": 0.45085704276455724, + "grad_norm": 1.8604929447174072, + "learning_rate": 6.031227821373969e-06, + "loss": 0.8181, + "step": 22522 + }, + { + "epoch": 0.45087706128168553, + "grad_norm": 1.1601638793945312, + "learning_rate": 6.030910606051729e-06, + "loss": 0.3169, + "step": 22523 + }, + { + "epoch": 0.4508970797988139, + "grad_norm": 1.0428202152252197, + "learning_rate": 6.03059338639576e-06, + "loss": 0.3094, + "step": 22524 + }, + { + "epoch": 0.45091709831594223, + "grad_norm": 0.9522122740745544, + "learning_rate": 6.0302761624074e-06, + "loss": 0.2611, + "step": 22525 + }, + { + "epoch": 0.4509371168330706, + "grad_norm": 1.132990837097168, + "learning_rate": 6.0299589340879815e-06, + "loss": 0.385, + "step": 22526 + }, + { + "epoch": 0.45095713535019893, + "grad_norm": 1.4537227153778076, + "learning_rate": 6.029641701438835e-06, + "loss": 0.3363, + "step": 22527 + }, + { + "epoch": 0.4509771538673273, + "grad_norm": 1.0345600843429565, + "learning_rate": 6.029324464461298e-06, + "loss": 0.3423, + "step": 22528 + }, + { + "epoch": 0.45099717238445564, + "grad_norm": 1.1391682624816895, + "learning_rate": 6.029007223156701e-06, + "loss": 0.3206, + "step": 22529 + }, + { + "epoch": 0.451017190901584, + "grad_norm": 1.0724595785140991, + "learning_rate": 6.028689977526381e-06, + "loss": 0.2892, + "step": 22530 + }, + { + "epoch": 0.4510372094187123, + "grad_norm": 1.0499677658081055, + "learning_rate": 6.028372727571667e-06, + "loss": 0.2899, + "step": 22531 + }, + { + "epoch": 0.45105722793584063, + "grad_norm": 1.1665936708450317, + "learning_rate": 6.028055473293899e-06, + "loss": 0.2909, + "step": 22532 + }, + { + "epoch": 0.451077246452969, + "grad_norm": 1.0837346315383911, + "learning_rate": 6.027738214694404e-06, + "loss": 0.2904, + "step": 22533 + }, + { + "epoch": 0.45109726497009733, + "grad_norm": 1.2067246437072754, + "learning_rate": 6.02742095177452e-06, + "loss": 0.3553, + "step": 22534 + }, + { + "epoch": 0.4511172834872257, + "grad_norm": 1.0953165292739868, + "learning_rate": 6.0271036845355794e-06, + "loss": 0.3166, + "step": 22535 + }, + { + "epoch": 0.45113730200435403, + "grad_norm": 1.100868821144104, + "learning_rate": 6.026786412978916e-06, + "loss": 0.3231, + "step": 22536 + }, + { + "epoch": 0.4511573205214824, + "grad_norm": 1.0418235063552856, + "learning_rate": 6.026469137105862e-06, + "loss": 0.2961, + "step": 22537 + }, + { + "epoch": 0.45117733903861074, + "grad_norm": 1.1157437562942505, + "learning_rate": 6.026151856917755e-06, + "loss": 0.3053, + "step": 22538 + }, + { + "epoch": 0.45119735755573903, + "grad_norm": 1.0842692852020264, + "learning_rate": 6.025834572415925e-06, + "loss": 0.3531, + "step": 22539 + }, + { + "epoch": 0.4512173760728674, + "grad_norm": 1.2846317291259766, + "learning_rate": 6.025517283601707e-06, + "loss": 0.291, + "step": 22540 + }, + { + "epoch": 0.45123739458999573, + "grad_norm": 1.1689785718917847, + "learning_rate": 6.025199990476435e-06, + "loss": 0.3449, + "step": 22541 + }, + { + "epoch": 0.4512574131071241, + "grad_norm": 1.0602561235427856, + "learning_rate": 6.024882693041444e-06, + "loss": 0.3013, + "step": 22542 + }, + { + "epoch": 0.45127743162425243, + "grad_norm": 1.1394239664077759, + "learning_rate": 6.024565391298066e-06, + "loss": 0.3304, + "step": 22543 + }, + { + "epoch": 0.4512974501413808, + "grad_norm": 1.0849461555480957, + "learning_rate": 6.024248085247636e-06, + "loss": 0.3449, + "step": 22544 + }, + { + "epoch": 0.45131746865850914, + "grad_norm": 1.0935229063034058, + "learning_rate": 6.023930774891488e-06, + "loss": 0.3016, + "step": 22545 + }, + { + "epoch": 0.4513374871756375, + "grad_norm": 1.0257935523986816, + "learning_rate": 6.023613460230955e-06, + "loss": 0.2805, + "step": 22546 + }, + { + "epoch": 0.4513575056927658, + "grad_norm": 1.1440708637237549, + "learning_rate": 6.023296141267369e-06, + "loss": 0.36, + "step": 22547 + }, + { + "epoch": 0.45137752420989413, + "grad_norm": 1.0947564840316772, + "learning_rate": 6.022978818002069e-06, + "loss": 0.3022, + "step": 22548 + }, + { + "epoch": 0.4513975427270225, + "grad_norm": 1.7976431846618652, + "learning_rate": 6.022661490436384e-06, + "loss": 0.7485, + "step": 22549 + }, + { + "epoch": 0.45141756124415083, + "grad_norm": 1.1695650815963745, + "learning_rate": 6.022344158571653e-06, + "loss": 0.2956, + "step": 22550 + }, + { + "epoch": 0.4514375797612792, + "grad_norm": 1.0662912130355835, + "learning_rate": 6.022026822409205e-06, + "loss": 0.3203, + "step": 22551 + }, + { + "epoch": 0.45145759827840753, + "grad_norm": 1.3203171491622925, + "learning_rate": 6.021709481950377e-06, + "loss": 0.3516, + "step": 22552 + }, + { + "epoch": 0.4514776167955359, + "grad_norm": 1.7611922025680542, + "learning_rate": 6.0213921371965026e-06, + "loss": 0.8135, + "step": 22553 + }, + { + "epoch": 0.45149763531266424, + "grad_norm": 1.0871858596801758, + "learning_rate": 6.021074788148914e-06, + "loss": 0.2708, + "step": 22554 + }, + { + "epoch": 0.45151765382979253, + "grad_norm": 1.1085277795791626, + "learning_rate": 6.020757434808947e-06, + "loss": 0.3392, + "step": 22555 + }, + { + "epoch": 0.4515376723469209, + "grad_norm": 1.2912330627441406, + "learning_rate": 6.020440077177937e-06, + "loss": 0.3347, + "step": 22556 + }, + { + "epoch": 0.45155769086404923, + "grad_norm": 1.2322981357574463, + "learning_rate": 6.0201227152572165e-06, + "loss": 0.3347, + "step": 22557 + }, + { + "epoch": 0.4515777093811776, + "grad_norm": 1.1050941944122314, + "learning_rate": 6.019805349048118e-06, + "loss": 0.3035, + "step": 22558 + }, + { + "epoch": 0.45159772789830593, + "grad_norm": 1.086206316947937, + "learning_rate": 6.0194879785519774e-06, + "loss": 0.327, + "step": 22559 + }, + { + "epoch": 0.4516177464154343, + "grad_norm": 1.1220613718032837, + "learning_rate": 6.019170603770128e-06, + "loss": 0.2693, + "step": 22560 + }, + { + "epoch": 0.45163776493256264, + "grad_norm": 1.238444209098816, + "learning_rate": 6.018853224703907e-06, + "loss": 0.314, + "step": 22561 + }, + { + "epoch": 0.451657783449691, + "grad_norm": 1.0867360830307007, + "learning_rate": 6.018535841354645e-06, + "loss": 0.3513, + "step": 22562 + }, + { + "epoch": 0.4516778019668193, + "grad_norm": 1.0774359703063965, + "learning_rate": 6.018218453723679e-06, + "loss": 0.3184, + "step": 22563 + }, + { + "epoch": 0.45169782048394763, + "grad_norm": 1.8206090927124023, + "learning_rate": 6.017901061812339e-06, + "loss": 0.7935, + "step": 22564 + }, + { + "epoch": 0.451717839001076, + "grad_norm": 1.0334961414337158, + "learning_rate": 6.0175836656219646e-06, + "loss": 0.2792, + "step": 22565 + }, + { + "epoch": 0.45173785751820433, + "grad_norm": 0.9877731204032898, + "learning_rate": 6.017266265153886e-06, + "loss": 0.2782, + "step": 22566 + }, + { + "epoch": 0.4517578760353327, + "grad_norm": 1.1240922212600708, + "learning_rate": 6.016948860409439e-06, + "loss": 0.3385, + "step": 22567 + }, + { + "epoch": 0.45177789455246103, + "grad_norm": 1.3417613506317139, + "learning_rate": 6.0166314513899595e-06, + "loss": 0.3264, + "step": 22568 + }, + { + "epoch": 0.4517979130695894, + "grad_norm": 1.1344630718231201, + "learning_rate": 6.0163140380967775e-06, + "loss": 0.3703, + "step": 22569 + }, + { + "epoch": 0.45181793158671774, + "grad_norm": 1.180842399597168, + "learning_rate": 6.0159966205312325e-06, + "loss": 0.3423, + "step": 22570 + }, + { + "epoch": 0.45183795010384603, + "grad_norm": 1.2772682905197144, + "learning_rate": 6.015679198694655e-06, + "loss": 0.3474, + "step": 22571 + }, + { + "epoch": 0.4518579686209744, + "grad_norm": 1.2745038270950317, + "learning_rate": 6.015361772588382e-06, + "loss": 0.2935, + "step": 22572 + }, + { + "epoch": 0.45187798713810273, + "grad_norm": 1.2416696548461914, + "learning_rate": 6.015044342213743e-06, + "loss": 0.3001, + "step": 22573 + }, + { + "epoch": 0.4518980056552311, + "grad_norm": 1.0154789686203003, + "learning_rate": 6.014726907572079e-06, + "loss": 0.2901, + "step": 22574 + }, + { + "epoch": 0.45191802417235943, + "grad_norm": 1.0938864946365356, + "learning_rate": 6.014409468664721e-06, + "loss": 0.2657, + "step": 22575 + }, + { + "epoch": 0.4519380426894878, + "grad_norm": 1.8521960973739624, + "learning_rate": 6.0140920254930044e-06, + "loss": 0.8621, + "step": 22576 + }, + { + "epoch": 0.45195806120661614, + "grad_norm": 1.1227943897247314, + "learning_rate": 6.013774578058264e-06, + "loss": 0.2748, + "step": 22577 + }, + { + "epoch": 0.4519780797237445, + "grad_norm": 1.0349526405334473, + "learning_rate": 6.013457126361831e-06, + "loss": 0.3081, + "step": 22578 + }, + { + "epoch": 0.4519980982408728, + "grad_norm": 1.1438859701156616, + "learning_rate": 6.013139670405044e-06, + "loss": 0.2759, + "step": 22579 + }, + { + "epoch": 0.45201811675800113, + "grad_norm": 1.1194088459014893, + "learning_rate": 6.012822210189234e-06, + "loss": 0.3117, + "step": 22580 + }, + { + "epoch": 0.4520381352751295, + "grad_norm": 1.2372767925262451, + "learning_rate": 6.01250474571574e-06, + "loss": 0.3201, + "step": 22581 + }, + { + "epoch": 0.45205815379225783, + "grad_norm": 1.096179723739624, + "learning_rate": 6.012187276985891e-06, + "loss": 0.2655, + "step": 22582 + }, + { + "epoch": 0.4520781723093862, + "grad_norm": 1.0912761688232422, + "learning_rate": 6.011869804001027e-06, + "loss": 0.3115, + "step": 22583 + }, + { + "epoch": 0.45209819082651453, + "grad_norm": 1.1392570734024048, + "learning_rate": 6.01155232676248e-06, + "loss": 0.3592, + "step": 22584 + }, + { + "epoch": 0.4521182093436429, + "grad_norm": 1.8388967514038086, + "learning_rate": 6.011234845271584e-06, + "loss": 0.7677, + "step": 22585 + }, + { + "epoch": 0.45213822786077124, + "grad_norm": 0.9904199838638306, + "learning_rate": 6.010917359529672e-06, + "loss": 0.284, + "step": 22586 + }, + { + "epoch": 0.45215824637789953, + "grad_norm": 1.13094961643219, + "learning_rate": 6.010599869538084e-06, + "loss": 0.313, + "step": 22587 + }, + { + "epoch": 0.4521782648950279, + "grad_norm": 1.1071943044662476, + "learning_rate": 6.010282375298152e-06, + "loss": 0.3173, + "step": 22588 + }, + { + "epoch": 0.45219828341215623, + "grad_norm": 1.0646731853485107, + "learning_rate": 6.009964876811209e-06, + "loss": 0.2968, + "step": 22589 + }, + { + "epoch": 0.4522183019292846, + "grad_norm": 0.9956527352333069, + "learning_rate": 6.009647374078592e-06, + "loss": 0.2865, + "step": 22590 + }, + { + "epoch": 0.45223832044641293, + "grad_norm": 1.1417145729064941, + "learning_rate": 6.009329867101633e-06, + "loss": 0.311, + "step": 22591 + }, + { + "epoch": 0.4522583389635413, + "grad_norm": 1.0486959218978882, + "learning_rate": 6.009012355881669e-06, + "loss": 0.2819, + "step": 22592 + }, + { + "epoch": 0.45227835748066963, + "grad_norm": 0.987669050693512, + "learning_rate": 6.008694840420034e-06, + "loss": 0.2783, + "step": 22593 + }, + { + "epoch": 0.452298375997798, + "grad_norm": 1.1370265483856201, + "learning_rate": 6.008377320718064e-06, + "loss": 0.3238, + "step": 22594 + }, + { + "epoch": 0.4523183945149263, + "grad_norm": 1.1378532648086548, + "learning_rate": 6.008059796777093e-06, + "loss": 0.3142, + "step": 22595 + }, + { + "epoch": 0.45233841303205463, + "grad_norm": 1.7798882722854614, + "learning_rate": 6.0077422685984556e-06, + "loss": 0.8292, + "step": 22596 + }, + { + "epoch": 0.452358431549183, + "grad_norm": 1.0900577306747437, + "learning_rate": 6.0074247361834845e-06, + "loss": 0.3049, + "step": 22597 + }, + { + "epoch": 0.45237845006631133, + "grad_norm": 1.105094313621521, + "learning_rate": 6.007107199533518e-06, + "loss": 0.335, + "step": 22598 + }, + { + "epoch": 0.4523984685834397, + "grad_norm": 1.0258539915084839, + "learning_rate": 6.00678965864989e-06, + "loss": 0.2894, + "step": 22599 + }, + { + "epoch": 0.45241848710056803, + "grad_norm": 1.1242611408233643, + "learning_rate": 6.006472113533933e-06, + "loss": 0.353, + "step": 22600 + }, + { + "epoch": 0.4524385056176964, + "grad_norm": 1.2380298376083374, + "learning_rate": 6.006154564186987e-06, + "loss": 0.3051, + "step": 22601 + }, + { + "epoch": 0.45245852413482474, + "grad_norm": 1.051439642906189, + "learning_rate": 6.005837010610382e-06, + "loss": 0.295, + "step": 22602 + }, + { + "epoch": 0.45247854265195303, + "grad_norm": 1.0719307661056519, + "learning_rate": 6.005519452805455e-06, + "loss": 0.301, + "step": 22603 + }, + { + "epoch": 0.4524985611690814, + "grad_norm": 1.077378511428833, + "learning_rate": 6.00520189077354e-06, + "loss": 0.2564, + "step": 22604 + }, + { + "epoch": 0.45251857968620973, + "grad_norm": 1.1681411266326904, + "learning_rate": 6.004884324515972e-06, + "loss": 0.3033, + "step": 22605 + }, + { + "epoch": 0.4525385982033381, + "grad_norm": 1.072779655456543, + "learning_rate": 6.004566754034087e-06, + "loss": 0.2854, + "step": 22606 + }, + { + "epoch": 0.45255861672046643, + "grad_norm": 1.1281741857528687, + "learning_rate": 6.00424917932922e-06, + "loss": 0.3678, + "step": 22607 + }, + { + "epoch": 0.4525786352375948, + "grad_norm": 1.8009620904922485, + "learning_rate": 6.003931600402706e-06, + "loss": 0.7633, + "step": 22608 + }, + { + "epoch": 0.45259865375472313, + "grad_norm": 1.048658013343811, + "learning_rate": 6.003614017255878e-06, + "loss": 0.3273, + "step": 22609 + }, + { + "epoch": 0.4526186722718515, + "grad_norm": 1.0826778411865234, + "learning_rate": 6.003296429890075e-06, + "loss": 0.3091, + "step": 22610 + }, + { + "epoch": 0.4526386907889798, + "grad_norm": 1.9462859630584717, + "learning_rate": 6.002978838306629e-06, + "loss": 0.8152, + "step": 22611 + }, + { + "epoch": 0.45265870930610813, + "grad_norm": 1.117783546447754, + "learning_rate": 6.002661242506875e-06, + "loss": 0.328, + "step": 22612 + }, + { + "epoch": 0.4526787278232365, + "grad_norm": 1.056952714920044, + "learning_rate": 6.002343642492148e-06, + "loss": 0.276, + "step": 22613 + }, + { + "epoch": 0.45269874634036483, + "grad_norm": 1.1192989349365234, + "learning_rate": 6.0020260382637865e-06, + "loss": 0.2912, + "step": 22614 + }, + { + "epoch": 0.4527187648574932, + "grad_norm": 1.1038215160369873, + "learning_rate": 6.001708429823121e-06, + "loss": 0.3364, + "step": 22615 + }, + { + "epoch": 0.45273878337462153, + "grad_norm": 1.169181227684021, + "learning_rate": 6.001390817171491e-06, + "loss": 0.299, + "step": 22616 + }, + { + "epoch": 0.4527588018917499, + "grad_norm": 0.9875466227531433, + "learning_rate": 6.001073200310228e-06, + "loss": 0.3046, + "step": 22617 + }, + { + "epoch": 0.45277882040887824, + "grad_norm": 1.910325050354004, + "learning_rate": 6.000755579240668e-06, + "loss": 0.8176, + "step": 22618 + }, + { + "epoch": 0.45279883892600653, + "grad_norm": 1.0453448295593262, + "learning_rate": 6.000437953964149e-06, + "loss": 0.3156, + "step": 22619 + }, + { + "epoch": 0.4528188574431349, + "grad_norm": 1.1119683980941772, + "learning_rate": 6.0001203244820015e-06, + "loss": 0.3498, + "step": 22620 + }, + { + "epoch": 0.45283887596026323, + "grad_norm": 1.8515052795410156, + "learning_rate": 5.999802690795566e-06, + "loss": 0.8106, + "step": 22621 + }, + { + "epoch": 0.4528588944773916, + "grad_norm": 1.0599302053451538, + "learning_rate": 5.999485052906173e-06, + "loss": 0.3424, + "step": 22622 + }, + { + "epoch": 0.45287891299451993, + "grad_norm": 1.045046091079712, + "learning_rate": 5.999167410815162e-06, + "loss": 0.3063, + "step": 22623 + }, + { + "epoch": 0.4528989315116483, + "grad_norm": 1.111962914466858, + "learning_rate": 5.998849764523865e-06, + "loss": 0.3295, + "step": 22624 + }, + { + "epoch": 0.45291895002877663, + "grad_norm": 1.1733025312423706, + "learning_rate": 5.998532114033618e-06, + "loss": 0.2976, + "step": 22625 + }, + { + "epoch": 0.452938968545905, + "grad_norm": 1.2072653770446777, + "learning_rate": 5.998214459345757e-06, + "loss": 0.3236, + "step": 22626 + }, + { + "epoch": 0.4529589870630333, + "grad_norm": 1.0881258249282837, + "learning_rate": 5.997896800461619e-06, + "loss": 0.2972, + "step": 22627 + }, + { + "epoch": 0.45297900558016163, + "grad_norm": 1.0244659185409546, + "learning_rate": 5.997579137382535e-06, + "loss": 0.3022, + "step": 22628 + }, + { + "epoch": 0.45299902409729, + "grad_norm": 1.1246367692947388, + "learning_rate": 5.997261470109845e-06, + "loss": 0.3127, + "step": 22629 + }, + { + "epoch": 0.45301904261441833, + "grad_norm": 1.1288375854492188, + "learning_rate": 5.996943798644882e-06, + "loss": 0.3613, + "step": 22630 + }, + { + "epoch": 0.4530390611315467, + "grad_norm": 1.0898293256759644, + "learning_rate": 5.996626122988981e-06, + "loss": 0.2627, + "step": 22631 + }, + { + "epoch": 0.45305907964867503, + "grad_norm": 1.2871707677841187, + "learning_rate": 5.9963084431434805e-06, + "loss": 0.3247, + "step": 22632 + }, + { + "epoch": 0.4530790981658034, + "grad_norm": 1.2209278345108032, + "learning_rate": 5.995990759109711e-06, + "loss": 0.3019, + "step": 22633 + }, + { + "epoch": 0.45309911668293174, + "grad_norm": 1.1979782581329346, + "learning_rate": 5.995673070889012e-06, + "loss": 0.2915, + "step": 22634 + }, + { + "epoch": 0.45311913520006003, + "grad_norm": 1.85086989402771, + "learning_rate": 5.995355378482718e-06, + "loss": 0.749, + "step": 22635 + }, + { + "epoch": 0.4531391537171884, + "grad_norm": 1.274348497390747, + "learning_rate": 5.995037681892164e-06, + "loss": 0.323, + "step": 22636 + }, + { + "epoch": 0.45315917223431673, + "grad_norm": 1.110830307006836, + "learning_rate": 5.994719981118685e-06, + "loss": 0.3412, + "step": 22637 + }, + { + "epoch": 0.4531791907514451, + "grad_norm": 1.4453364610671997, + "learning_rate": 5.994402276163617e-06, + "loss": 0.3438, + "step": 22638 + }, + { + "epoch": 0.45319920926857343, + "grad_norm": 1.068403720855713, + "learning_rate": 5.994084567028298e-06, + "loss": 0.3502, + "step": 22639 + }, + { + "epoch": 0.4532192277857018, + "grad_norm": 1.123849868774414, + "learning_rate": 5.993766853714059e-06, + "loss": 0.3046, + "step": 22640 + }, + { + "epoch": 0.45323924630283013, + "grad_norm": 0.9651669859886169, + "learning_rate": 5.993449136222239e-06, + "loss": 0.2529, + "step": 22641 + }, + { + "epoch": 0.4532592648199585, + "grad_norm": 1.1012834310531616, + "learning_rate": 5.993131414554172e-06, + "loss": 0.2878, + "step": 22642 + }, + { + "epoch": 0.4532792833370868, + "grad_norm": 1.0757622718811035, + "learning_rate": 5.992813688711194e-06, + "loss": 0.2864, + "step": 22643 + }, + { + "epoch": 0.45329930185421513, + "grad_norm": 1.1137365102767944, + "learning_rate": 5.992495958694642e-06, + "loss": 0.3273, + "step": 22644 + }, + { + "epoch": 0.4533193203713435, + "grad_norm": 1.2424721717834473, + "learning_rate": 5.992178224505851e-06, + "loss": 0.3351, + "step": 22645 + }, + { + "epoch": 0.45333933888847183, + "grad_norm": 1.0843745470046997, + "learning_rate": 5.991860486146155e-06, + "loss": 0.367, + "step": 22646 + }, + { + "epoch": 0.4533593574056002, + "grad_norm": 1.1406627893447876, + "learning_rate": 5.9915427436168914e-06, + "loss": 0.2914, + "step": 22647 + }, + { + "epoch": 0.45337937592272853, + "grad_norm": 1.085763931274414, + "learning_rate": 5.991224996919396e-06, + "loss": 0.3579, + "step": 22648 + }, + { + "epoch": 0.4533993944398569, + "grad_norm": 1.0785155296325684, + "learning_rate": 5.9909072460550045e-06, + "loss": 0.2582, + "step": 22649 + }, + { + "epoch": 0.45341941295698523, + "grad_norm": 1.0777568817138672, + "learning_rate": 5.990589491025051e-06, + "loss": 0.3168, + "step": 22650 + }, + { + "epoch": 0.45343943147411353, + "grad_norm": 1.3811800479888916, + "learning_rate": 5.9902717318308725e-06, + "loss": 0.3341, + "step": 22651 + }, + { + "epoch": 0.4534594499912419, + "grad_norm": 1.1785657405853271, + "learning_rate": 5.989953968473805e-06, + "loss": 0.3361, + "step": 22652 + }, + { + "epoch": 0.45347946850837023, + "grad_norm": 1.156129002571106, + "learning_rate": 5.989636200955183e-06, + "loss": 0.3049, + "step": 22653 + }, + { + "epoch": 0.4534994870254986, + "grad_norm": 1.0816890001296997, + "learning_rate": 5.989318429276345e-06, + "loss": 0.3059, + "step": 22654 + }, + { + "epoch": 0.45351950554262693, + "grad_norm": 1.0740097761154175, + "learning_rate": 5.9890006534386245e-06, + "loss": 0.3165, + "step": 22655 + }, + { + "epoch": 0.4535395240597553, + "grad_norm": 1.101196527481079, + "learning_rate": 5.988682873443357e-06, + "loss": 0.3534, + "step": 22656 + }, + { + "epoch": 0.45355954257688363, + "grad_norm": 1.317232370376587, + "learning_rate": 5.988365089291881e-06, + "loss": 0.2904, + "step": 22657 + }, + { + "epoch": 0.453579561094012, + "grad_norm": 1.175711750984192, + "learning_rate": 5.9880473009855315e-06, + "loss": 0.3508, + "step": 22658 + }, + { + "epoch": 0.4535995796111403, + "grad_norm": 2.034292459487915, + "learning_rate": 5.9877295085256426e-06, + "loss": 0.7689, + "step": 22659 + }, + { + "epoch": 0.45361959812826863, + "grad_norm": 1.1569360494613647, + "learning_rate": 5.9874117119135524e-06, + "loss": 0.3183, + "step": 22660 + }, + { + "epoch": 0.453639616645397, + "grad_norm": 2.0671584606170654, + "learning_rate": 5.987093911150595e-06, + "loss": 0.7516, + "step": 22661 + }, + { + "epoch": 0.45365963516252533, + "grad_norm": 1.0609105825424194, + "learning_rate": 5.986776106238108e-06, + "loss": 0.3681, + "step": 22662 + }, + { + "epoch": 0.4536796536796537, + "grad_norm": 1.192260980606079, + "learning_rate": 5.9864582971774255e-06, + "loss": 0.2835, + "step": 22663 + }, + { + "epoch": 0.45369967219678203, + "grad_norm": 1.4929139614105225, + "learning_rate": 5.986140483969885e-06, + "loss": 0.3309, + "step": 22664 + }, + { + "epoch": 0.4537196907139104, + "grad_norm": 1.069989800453186, + "learning_rate": 5.9858226666168215e-06, + "loss": 0.3141, + "step": 22665 + }, + { + "epoch": 0.45373970923103873, + "grad_norm": 1.2529053688049316, + "learning_rate": 5.985504845119573e-06, + "loss": 0.3044, + "step": 22666 + }, + { + "epoch": 0.45375972774816703, + "grad_norm": 1.0723063945770264, + "learning_rate": 5.9851870194794735e-06, + "loss": 0.3293, + "step": 22667 + }, + { + "epoch": 0.4537797462652954, + "grad_norm": 1.7987834215164185, + "learning_rate": 5.9848691896978594e-06, + "loss": 0.8411, + "step": 22668 + }, + { + "epoch": 0.45379976478242373, + "grad_norm": 1.0755014419555664, + "learning_rate": 5.984551355776067e-06, + "loss": 0.3242, + "step": 22669 + }, + { + "epoch": 0.4538197832995521, + "grad_norm": 1.1454482078552246, + "learning_rate": 5.984233517715433e-06, + "loss": 0.3264, + "step": 22670 + }, + { + "epoch": 0.45383980181668043, + "grad_norm": 1.1635143756866455, + "learning_rate": 5.983915675517295e-06, + "loss": 0.3132, + "step": 22671 + }, + { + "epoch": 0.4538598203338088, + "grad_norm": 1.1523654460906982, + "learning_rate": 5.983597829182986e-06, + "loss": 0.3612, + "step": 22672 + }, + { + "epoch": 0.45387983885093713, + "grad_norm": 1.0915356874465942, + "learning_rate": 5.983279978713842e-06, + "loss": 0.3005, + "step": 22673 + }, + { + "epoch": 0.4538998573680655, + "grad_norm": 1.9796137809753418, + "learning_rate": 5.982962124111202e-06, + "loss": 0.8745, + "step": 22674 + }, + { + "epoch": 0.4539198758851938, + "grad_norm": 1.168273687362671, + "learning_rate": 5.9826442653763995e-06, + "loss": 0.3221, + "step": 22675 + }, + { + "epoch": 0.45393989440232213, + "grad_norm": 1.0375581979751587, + "learning_rate": 5.982326402510773e-06, + "loss": 0.2832, + "step": 22676 + }, + { + "epoch": 0.4539599129194505, + "grad_norm": 1.0890483856201172, + "learning_rate": 5.9820085355156565e-06, + "loss": 0.3098, + "step": 22677 + }, + { + "epoch": 0.45397993143657883, + "grad_norm": 1.3044532537460327, + "learning_rate": 5.981690664392389e-06, + "loss": 0.3369, + "step": 22678 + }, + { + "epoch": 0.4539999499537072, + "grad_norm": 1.5127447843551636, + "learning_rate": 5.981372789142304e-06, + "loss": 0.3059, + "step": 22679 + }, + { + "epoch": 0.45401996847083553, + "grad_norm": 0.9415027499198914, + "learning_rate": 5.98105490976674e-06, + "loss": 0.3257, + "step": 22680 + }, + { + "epoch": 0.4540399869879639, + "grad_norm": 1.6647871732711792, + "learning_rate": 5.980737026267032e-06, + "loss": 0.8218, + "step": 22681 + }, + { + "epoch": 0.45406000550509223, + "grad_norm": 1.0962923765182495, + "learning_rate": 5.980419138644517e-06, + "loss": 0.2853, + "step": 22682 + }, + { + "epoch": 0.45408002402222053, + "grad_norm": 0.9767177700996399, + "learning_rate": 5.98010124690053e-06, + "loss": 0.2975, + "step": 22683 + }, + { + "epoch": 0.4541000425393489, + "grad_norm": 1.023712158203125, + "learning_rate": 5.979783351036409e-06, + "loss": 0.2693, + "step": 22684 + }, + { + "epoch": 0.45412006105647723, + "grad_norm": 1.1010276079177856, + "learning_rate": 5.97946545105349e-06, + "loss": 0.3115, + "step": 22685 + }, + { + "epoch": 0.4541400795736056, + "grad_norm": 1.0855604410171509, + "learning_rate": 5.979147546953108e-06, + "loss": 0.3262, + "step": 22686 + }, + { + "epoch": 0.45416009809073393, + "grad_norm": 1.194775104522705, + "learning_rate": 5.978829638736601e-06, + "loss": 0.3074, + "step": 22687 + }, + { + "epoch": 0.4541801166078623, + "grad_norm": 1.0317763090133667, + "learning_rate": 5.9785117264053036e-06, + "loss": 0.3126, + "step": 22688 + }, + { + "epoch": 0.45420013512499063, + "grad_norm": 1.0197880268096924, + "learning_rate": 5.9781938099605545e-06, + "loss": 0.2791, + "step": 22689 + }, + { + "epoch": 0.454220153642119, + "grad_norm": 1.0057239532470703, + "learning_rate": 5.977875889403688e-06, + "loss": 0.325, + "step": 22690 + }, + { + "epoch": 0.4542401721592473, + "grad_norm": 1.1500552892684937, + "learning_rate": 5.977557964736043e-06, + "loss": 0.3232, + "step": 22691 + }, + { + "epoch": 0.45426019067637563, + "grad_norm": 1.0111651420593262, + "learning_rate": 5.977240035958956e-06, + "loss": 0.351, + "step": 22692 + }, + { + "epoch": 0.454280209193504, + "grad_norm": 1.9111231565475464, + "learning_rate": 5.976922103073761e-06, + "loss": 0.7581, + "step": 22693 + }, + { + "epoch": 0.45430022771063233, + "grad_norm": 1.0864357948303223, + "learning_rate": 5.976604166081794e-06, + "loss": 0.2921, + "step": 22694 + }, + { + "epoch": 0.4543202462277607, + "grad_norm": 1.073167085647583, + "learning_rate": 5.976286224984395e-06, + "loss": 0.2881, + "step": 22695 + }, + { + "epoch": 0.45434026474488903, + "grad_norm": 1.143730878829956, + "learning_rate": 5.975968279782899e-06, + "loss": 0.3133, + "step": 22696 + }, + { + "epoch": 0.4543602832620174, + "grad_norm": 1.0912827253341675, + "learning_rate": 5.975650330478642e-06, + "loss": 0.2883, + "step": 22697 + }, + { + "epoch": 0.45438030177914573, + "grad_norm": 1.3516789674758911, + "learning_rate": 5.975332377072961e-06, + "loss": 0.3162, + "step": 22698 + }, + { + "epoch": 0.45440032029627403, + "grad_norm": 1.20415461063385, + "learning_rate": 5.975014419567192e-06, + "loss": 0.2897, + "step": 22699 + }, + { + "epoch": 0.4544203388134024, + "grad_norm": 1.8786925077438354, + "learning_rate": 5.974696457962673e-06, + "loss": 0.7442, + "step": 22700 + }, + { + "epoch": 0.45444035733053073, + "grad_norm": 1.053457260131836, + "learning_rate": 5.97437849226074e-06, + "loss": 0.3035, + "step": 22701 + }, + { + "epoch": 0.4544603758476591, + "grad_norm": 1.78705894947052, + "learning_rate": 5.974060522462728e-06, + "loss": 0.8088, + "step": 22702 + }, + { + "epoch": 0.45448039436478743, + "grad_norm": 2.1011786460876465, + "learning_rate": 5.973742548569978e-06, + "loss": 0.8401, + "step": 22703 + }, + { + "epoch": 0.4545004128819158, + "grad_norm": 1.8180311918258667, + "learning_rate": 5.97342457058382e-06, + "loss": 0.749, + "step": 22704 + }, + { + "epoch": 0.45452043139904413, + "grad_norm": 1.0493618249893188, + "learning_rate": 5.973106588505598e-06, + "loss": 0.2958, + "step": 22705 + }, + { + "epoch": 0.4545404499161725, + "grad_norm": 0.9655908346176147, + "learning_rate": 5.972788602336644e-06, + "loss": 0.2625, + "step": 22706 + }, + { + "epoch": 0.4545604684333008, + "grad_norm": 1.0204455852508545, + "learning_rate": 5.972470612078295e-06, + "loss": 0.3224, + "step": 22707 + }, + { + "epoch": 0.45458048695042913, + "grad_norm": 1.1506116390228271, + "learning_rate": 5.972152617731889e-06, + "loss": 0.3061, + "step": 22708 + }, + { + "epoch": 0.4546005054675575, + "grad_norm": 1.121884822845459, + "learning_rate": 5.971834619298764e-06, + "loss": 0.3235, + "step": 22709 + }, + { + "epoch": 0.45462052398468583, + "grad_norm": 1.0720583200454712, + "learning_rate": 5.971516616780255e-06, + "loss": 0.3, + "step": 22710 + }, + { + "epoch": 0.4546405425018142, + "grad_norm": 1.1097795963287354, + "learning_rate": 5.971198610177699e-06, + "loss": 0.2877, + "step": 22711 + }, + { + "epoch": 0.45466056101894253, + "grad_norm": 1.0855242013931274, + "learning_rate": 5.970880599492435e-06, + "loss": 0.2897, + "step": 22712 + }, + { + "epoch": 0.4546805795360709, + "grad_norm": 1.1168947219848633, + "learning_rate": 5.970562584725795e-06, + "loss": 0.3067, + "step": 22713 + }, + { + "epoch": 0.45470059805319923, + "grad_norm": 1.152304768562317, + "learning_rate": 5.97024456587912e-06, + "loss": 0.3168, + "step": 22714 + }, + { + "epoch": 0.45472061657032753, + "grad_norm": 1.4871835708618164, + "learning_rate": 5.969926542953745e-06, + "loss": 0.3791, + "step": 22715 + }, + { + "epoch": 0.4547406350874559, + "grad_norm": 1.0437946319580078, + "learning_rate": 5.969608515951007e-06, + "loss": 0.3488, + "step": 22716 + }, + { + "epoch": 0.45476065360458423, + "grad_norm": 1.0678846836090088, + "learning_rate": 5.969290484872245e-06, + "loss": 0.3152, + "step": 22717 + }, + { + "epoch": 0.4547806721217126, + "grad_norm": 0.9435097575187683, + "learning_rate": 5.968972449718795e-06, + "loss": 0.2944, + "step": 22718 + }, + { + "epoch": 0.45480069063884093, + "grad_norm": 1.08310866355896, + "learning_rate": 5.968654410491991e-06, + "loss": 0.3012, + "step": 22719 + }, + { + "epoch": 0.4548207091559693, + "grad_norm": 1.9067999124526978, + "learning_rate": 5.968336367193174e-06, + "loss": 0.7715, + "step": 22720 + }, + { + "epoch": 0.45484072767309763, + "grad_norm": 1.1543421745300293, + "learning_rate": 5.968018319823677e-06, + "loss": 0.3468, + "step": 22721 + }, + { + "epoch": 0.454860746190226, + "grad_norm": 1.1637439727783203, + "learning_rate": 5.967700268384843e-06, + "loss": 0.3365, + "step": 22722 + }, + { + "epoch": 0.4548807647073543, + "grad_norm": 1.1091704368591309, + "learning_rate": 5.967382212878003e-06, + "loss": 0.3155, + "step": 22723 + }, + { + "epoch": 0.45490078322448263, + "grad_norm": 2.045653820037842, + "learning_rate": 5.967064153304497e-06, + "loss": 0.8732, + "step": 22724 + }, + { + "epoch": 0.454920801741611, + "grad_norm": 1.2023128271102905, + "learning_rate": 5.966746089665662e-06, + "loss": 0.3005, + "step": 22725 + }, + { + "epoch": 0.45494082025873933, + "grad_norm": 1.0406471490859985, + "learning_rate": 5.9664280219628334e-06, + "loss": 0.2873, + "step": 22726 + }, + { + "epoch": 0.4549608387758677, + "grad_norm": 1.0396767854690552, + "learning_rate": 5.966109950197349e-06, + "loss": 0.3435, + "step": 22727 + }, + { + "epoch": 0.45498085729299603, + "grad_norm": 1.2155708074569702, + "learning_rate": 5.965791874370545e-06, + "loss": 0.3467, + "step": 22728 + }, + { + "epoch": 0.4550008758101244, + "grad_norm": 1.005342721939087, + "learning_rate": 5.965473794483764e-06, + "loss": 0.273, + "step": 22729 + }, + { + "epoch": 0.45502089432725273, + "grad_norm": 1.0068031549453735, + "learning_rate": 5.965155710538337e-06, + "loss": 0.3086, + "step": 22730 + }, + { + "epoch": 0.45504091284438103, + "grad_norm": 1.0733108520507812, + "learning_rate": 5.964837622535602e-06, + "loss": 0.3071, + "step": 22731 + }, + { + "epoch": 0.4550609313615094, + "grad_norm": 0.969218373298645, + "learning_rate": 5.964519530476898e-06, + "loss": 0.2708, + "step": 22732 + }, + { + "epoch": 0.45508094987863773, + "grad_norm": 1.1348882913589478, + "learning_rate": 5.964201434363561e-06, + "loss": 0.3232, + "step": 22733 + }, + { + "epoch": 0.4551009683957661, + "grad_norm": 1.6857118606567383, + "learning_rate": 5.96388333419693e-06, + "loss": 0.9242, + "step": 22734 + }, + { + "epoch": 0.45512098691289443, + "grad_norm": 1.0697157382965088, + "learning_rate": 5.96356522997834e-06, + "loss": 0.307, + "step": 22735 + }, + { + "epoch": 0.4551410054300228, + "grad_norm": 1.1438992023468018, + "learning_rate": 5.963247121709129e-06, + "loss": 0.2809, + "step": 22736 + }, + { + "epoch": 0.45516102394715113, + "grad_norm": 1.0155484676361084, + "learning_rate": 5.9629290093906344e-06, + "loss": 0.3114, + "step": 22737 + }, + { + "epoch": 0.4551810424642795, + "grad_norm": 1.233489990234375, + "learning_rate": 5.9626108930241946e-06, + "loss": 0.2838, + "step": 22738 + }, + { + "epoch": 0.4552010609814078, + "grad_norm": 1.0895854234695435, + "learning_rate": 5.9622927726111445e-06, + "loss": 0.2998, + "step": 22739 + }, + { + "epoch": 0.45522107949853613, + "grad_norm": 1.1165368556976318, + "learning_rate": 5.961974648152822e-06, + "loss": 0.3027, + "step": 22740 + }, + { + "epoch": 0.4552410980156645, + "grad_norm": 1.2382971048355103, + "learning_rate": 5.961656519650566e-06, + "loss": 0.2924, + "step": 22741 + }, + { + "epoch": 0.45526111653279283, + "grad_norm": 1.0125149488449097, + "learning_rate": 5.961338387105713e-06, + "loss": 0.3088, + "step": 22742 + }, + { + "epoch": 0.4552811350499212, + "grad_norm": 1.8820674419403076, + "learning_rate": 5.9610202505196e-06, + "loss": 0.8076, + "step": 22743 + }, + { + "epoch": 0.45530115356704953, + "grad_norm": 1.033864974975586, + "learning_rate": 5.960702109893566e-06, + "loss": 0.3047, + "step": 22744 + }, + { + "epoch": 0.4553211720841779, + "grad_norm": 1.0416386127471924, + "learning_rate": 5.960383965228946e-06, + "loss": 0.2851, + "step": 22745 + }, + { + "epoch": 0.45534119060130623, + "grad_norm": 1.1234315633773804, + "learning_rate": 5.960065816527078e-06, + "loss": 0.2948, + "step": 22746 + }, + { + "epoch": 0.45536120911843453, + "grad_norm": 1.2621052265167236, + "learning_rate": 5.959747663789301e-06, + "loss": 0.3025, + "step": 22747 + }, + { + "epoch": 0.4553812276355629, + "grad_norm": 1.8736467361450195, + "learning_rate": 5.9594295070169505e-06, + "loss": 0.7981, + "step": 22748 + }, + { + "epoch": 0.45540124615269123, + "grad_norm": 1.263344645500183, + "learning_rate": 5.959111346211366e-06, + "loss": 0.3079, + "step": 22749 + }, + { + "epoch": 0.4554212646698196, + "grad_norm": 1.101439118385315, + "learning_rate": 5.958793181373882e-06, + "loss": 0.3149, + "step": 22750 + }, + { + "epoch": 0.45544128318694793, + "grad_norm": 1.2607474327087402, + "learning_rate": 5.95847501250584e-06, + "loss": 0.29, + "step": 22751 + }, + { + "epoch": 0.4554613017040763, + "grad_norm": 1.1571481227874756, + "learning_rate": 5.9581568396085735e-06, + "loss": 0.3086, + "step": 22752 + }, + { + "epoch": 0.45548132022120463, + "grad_norm": 1.1975473165512085, + "learning_rate": 5.9578386626834216e-06, + "loss": 0.3056, + "step": 22753 + }, + { + "epoch": 0.455501338738333, + "grad_norm": 1.0968351364135742, + "learning_rate": 5.957520481731723e-06, + "loss": 0.3141, + "step": 22754 + }, + { + "epoch": 0.4555213572554613, + "grad_norm": 1.8501498699188232, + "learning_rate": 5.957202296754815e-06, + "loss": 0.7818, + "step": 22755 + }, + { + "epoch": 0.45554137577258963, + "grad_norm": 1.0756291151046753, + "learning_rate": 5.956884107754034e-06, + "loss": 0.3357, + "step": 22756 + }, + { + "epoch": 0.455561394289718, + "grad_norm": 1.1182810068130493, + "learning_rate": 5.956565914730717e-06, + "loss": 0.3015, + "step": 22757 + }, + { + "epoch": 0.45558141280684633, + "grad_norm": 1.7955877780914307, + "learning_rate": 5.956247717686205e-06, + "loss": 0.7942, + "step": 22758 + }, + { + "epoch": 0.4556014313239747, + "grad_norm": 1.0577514171600342, + "learning_rate": 5.95592951662183e-06, + "loss": 0.2791, + "step": 22759 + }, + { + "epoch": 0.45562144984110303, + "grad_norm": 1.200180172920227, + "learning_rate": 5.955611311538936e-06, + "loss": 0.3524, + "step": 22760 + }, + { + "epoch": 0.4556414683582314, + "grad_norm": 1.1146727800369263, + "learning_rate": 5.955293102438857e-06, + "loss": 0.3069, + "step": 22761 + }, + { + "epoch": 0.45566148687535973, + "grad_norm": 1.0560227632522583, + "learning_rate": 5.954974889322932e-06, + "loss": 0.318, + "step": 22762 + }, + { + "epoch": 0.45568150539248803, + "grad_norm": 0.9866451025009155, + "learning_rate": 5.954656672192496e-06, + "loss": 0.2857, + "step": 22763 + }, + { + "epoch": 0.4557015239096164, + "grad_norm": 1.1731922626495361, + "learning_rate": 5.954338451048891e-06, + "loss": 0.3485, + "step": 22764 + }, + { + "epoch": 0.45572154242674473, + "grad_norm": 1.1197792291641235, + "learning_rate": 5.954020225893451e-06, + "loss": 0.3489, + "step": 22765 + }, + { + "epoch": 0.4557415609438731, + "grad_norm": 1.0737781524658203, + "learning_rate": 5.953701996727515e-06, + "loss": 0.2719, + "step": 22766 + }, + { + "epoch": 0.45576157946100143, + "grad_norm": 1.0985305309295654, + "learning_rate": 5.953383763552423e-06, + "loss": 0.2584, + "step": 22767 + }, + { + "epoch": 0.4557815979781298, + "grad_norm": 1.0931581258773804, + "learning_rate": 5.953065526369509e-06, + "loss": 0.3364, + "step": 22768 + }, + { + "epoch": 0.45580161649525813, + "grad_norm": 1.1194533109664917, + "learning_rate": 5.952747285180115e-06, + "loss": 0.307, + "step": 22769 + }, + { + "epoch": 0.4558216350123865, + "grad_norm": 1.0627347230911255, + "learning_rate": 5.9524290399855745e-06, + "loss": 0.2977, + "step": 22770 + }, + { + "epoch": 0.4558416535295148, + "grad_norm": 1.0775079727172852, + "learning_rate": 5.952110790787228e-06, + "loss": 0.3003, + "step": 22771 + }, + { + "epoch": 0.45586167204664313, + "grad_norm": 1.055720329284668, + "learning_rate": 5.951792537586411e-06, + "loss": 0.306, + "step": 22772 + }, + { + "epoch": 0.4558816905637715, + "grad_norm": 1.2697175741195679, + "learning_rate": 5.951474280384464e-06, + "loss": 0.3607, + "step": 22773 + }, + { + "epoch": 0.45590170908089983, + "grad_norm": 1.1648766994476318, + "learning_rate": 5.951156019182724e-06, + "loss": 0.3126, + "step": 22774 + }, + { + "epoch": 0.4559217275980282, + "grad_norm": 1.8424079418182373, + "learning_rate": 5.95083775398253e-06, + "loss": 0.8403, + "step": 22775 + }, + { + "epoch": 0.45594174611515653, + "grad_norm": 0.9841166734695435, + "learning_rate": 5.950519484785217e-06, + "loss": 0.2873, + "step": 22776 + }, + { + "epoch": 0.4559617646322849, + "grad_norm": 1.385054588317871, + "learning_rate": 5.950201211592126e-06, + "loss": 0.345, + "step": 22777 + }, + { + "epoch": 0.45598178314941323, + "grad_norm": 1.1270102262496948, + "learning_rate": 5.949882934404592e-06, + "loss": 0.3313, + "step": 22778 + }, + { + "epoch": 0.45600180166654153, + "grad_norm": 1.0999614000320435, + "learning_rate": 5.949564653223955e-06, + "loss": 0.3174, + "step": 22779 + }, + { + "epoch": 0.4560218201836699, + "grad_norm": 1.0884966850280762, + "learning_rate": 5.949246368051553e-06, + "loss": 0.3246, + "step": 22780 + }, + { + "epoch": 0.45604183870079823, + "grad_norm": 1.9843745231628418, + "learning_rate": 5.948928078888723e-06, + "loss": 0.8071, + "step": 22781 + }, + { + "epoch": 0.4560618572179266, + "grad_norm": 1.0061099529266357, + "learning_rate": 5.9486097857368055e-06, + "loss": 0.249, + "step": 22782 + }, + { + "epoch": 0.45608187573505493, + "grad_norm": 1.078548550605774, + "learning_rate": 5.948291488597134e-06, + "loss": 0.3078, + "step": 22783 + }, + { + "epoch": 0.4561018942521833, + "grad_norm": 1.2091681957244873, + "learning_rate": 5.947973187471051e-06, + "loss": 0.3466, + "step": 22784 + }, + { + "epoch": 0.45612191276931163, + "grad_norm": 1.996045708656311, + "learning_rate": 5.947654882359893e-06, + "loss": 0.7278, + "step": 22785 + }, + { + "epoch": 0.45614193128644, + "grad_norm": 1.1390279531478882, + "learning_rate": 5.947336573264995e-06, + "loss": 0.3189, + "step": 22786 + }, + { + "epoch": 0.4561619498035683, + "grad_norm": 1.1571613550186157, + "learning_rate": 5.947018260187701e-06, + "loss": 0.3394, + "step": 22787 + }, + { + "epoch": 0.45618196832069663, + "grad_norm": 0.9938228130340576, + "learning_rate": 5.946699943129345e-06, + "loss": 0.3344, + "step": 22788 + }, + { + "epoch": 0.456201986837825, + "grad_norm": 1.1903789043426514, + "learning_rate": 5.946381622091266e-06, + "loss": 0.3766, + "step": 22789 + }, + { + "epoch": 0.45622200535495333, + "grad_norm": 1.1743015050888062, + "learning_rate": 5.946063297074802e-06, + "loss": 0.3263, + "step": 22790 + }, + { + "epoch": 0.4562420238720817, + "grad_norm": 1.7752861976623535, + "learning_rate": 5.945744968081292e-06, + "loss": 0.7555, + "step": 22791 + }, + { + "epoch": 0.45626204238921003, + "grad_norm": 1.1464554071426392, + "learning_rate": 5.945426635112073e-06, + "loss": 0.3083, + "step": 22792 + }, + { + "epoch": 0.4562820609063384, + "grad_norm": 0.9947004914283752, + "learning_rate": 5.945108298168485e-06, + "loss": 0.3017, + "step": 22793 + }, + { + "epoch": 0.45630207942346673, + "grad_norm": 1.035685420036316, + "learning_rate": 5.944789957251863e-06, + "loss": 0.2786, + "step": 22794 + }, + { + "epoch": 0.45632209794059503, + "grad_norm": 1.006425142288208, + "learning_rate": 5.9444716123635494e-06, + "loss": 0.2831, + "step": 22795 + }, + { + "epoch": 0.4563421164577234, + "grad_norm": 1.0421870946884155, + "learning_rate": 5.944153263504882e-06, + "loss": 0.2619, + "step": 22796 + }, + { + "epoch": 0.45636213497485173, + "grad_norm": 1.8285787105560303, + "learning_rate": 5.943834910677194e-06, + "loss": 0.789, + "step": 22797 + }, + { + "epoch": 0.4563821534919801, + "grad_norm": 1.194265604019165, + "learning_rate": 5.9435165538818275e-06, + "loss": 0.3279, + "step": 22798 + }, + { + "epoch": 0.45640217200910843, + "grad_norm": 1.0578454732894897, + "learning_rate": 5.943198193120121e-06, + "loss": 0.2873, + "step": 22799 + }, + { + "epoch": 0.4564221905262368, + "grad_norm": 1.0544838905334473, + "learning_rate": 5.942879828393413e-06, + "loss": 0.3705, + "step": 22800 + }, + { + "epoch": 0.45644220904336513, + "grad_norm": 1.242396354675293, + "learning_rate": 5.942561459703041e-06, + "loss": 0.3318, + "step": 22801 + }, + { + "epoch": 0.4564622275604935, + "grad_norm": 1.0512032508850098, + "learning_rate": 5.942243087050343e-06, + "loss": 0.2993, + "step": 22802 + }, + { + "epoch": 0.4564822460776218, + "grad_norm": 1.0990384817123413, + "learning_rate": 5.941924710436658e-06, + "loss": 0.309, + "step": 22803 + }, + { + "epoch": 0.45650226459475013, + "grad_norm": 1.0082985162734985, + "learning_rate": 5.941606329863324e-06, + "loss": 0.301, + "step": 22804 + }, + { + "epoch": 0.4565222831118785, + "grad_norm": 1.3114062547683716, + "learning_rate": 5.9412879453316794e-06, + "loss": 0.3664, + "step": 22805 + }, + { + "epoch": 0.45654230162900683, + "grad_norm": 1.1282753944396973, + "learning_rate": 5.940969556843065e-06, + "loss": 0.3581, + "step": 22806 + }, + { + "epoch": 0.4565623201461352, + "grad_norm": 1.3681721687316895, + "learning_rate": 5.940651164398815e-06, + "loss": 0.3035, + "step": 22807 + }, + { + "epoch": 0.45658233866326353, + "grad_norm": 1.271093726158142, + "learning_rate": 5.94033276800027e-06, + "loss": 0.3637, + "step": 22808 + }, + { + "epoch": 0.4566023571803919, + "grad_norm": 1.0603697299957275, + "learning_rate": 5.94001436764877e-06, + "loss": 0.2881, + "step": 22809 + }, + { + "epoch": 0.45662237569752023, + "grad_norm": 1.1159141063690186, + "learning_rate": 5.9396959633456495e-06, + "loss": 0.3296, + "step": 22810 + }, + { + "epoch": 0.45664239421464853, + "grad_norm": 1.0439677238464355, + "learning_rate": 5.939377555092251e-06, + "loss": 0.2694, + "step": 22811 + }, + { + "epoch": 0.4566624127317769, + "grad_norm": 1.0002052783966064, + "learning_rate": 5.9390591428899104e-06, + "loss": 0.3077, + "step": 22812 + }, + { + "epoch": 0.45668243124890523, + "grad_norm": 1.1571882963180542, + "learning_rate": 5.938740726739969e-06, + "loss": 0.3134, + "step": 22813 + }, + { + "epoch": 0.4567024497660336, + "grad_norm": 1.0741969347000122, + "learning_rate": 5.938422306643761e-06, + "loss": 0.309, + "step": 22814 + }, + { + "epoch": 0.45672246828316193, + "grad_norm": 1.0966049432754517, + "learning_rate": 5.93810388260263e-06, + "loss": 0.3151, + "step": 22815 + }, + { + "epoch": 0.4567424868002903, + "grad_norm": 1.756569743156433, + "learning_rate": 5.937785454617911e-06, + "loss": 0.7405, + "step": 22816 + }, + { + "epoch": 0.45676250531741863, + "grad_norm": 1.1399474143981934, + "learning_rate": 5.937467022690944e-06, + "loss": 0.314, + "step": 22817 + }, + { + "epoch": 0.456782523834547, + "grad_norm": 1.1162480115890503, + "learning_rate": 5.937148586823067e-06, + "loss": 0.3125, + "step": 22818 + }, + { + "epoch": 0.4568025423516753, + "grad_norm": 1.190215826034546, + "learning_rate": 5.936830147015618e-06, + "loss": 0.3523, + "step": 22819 + }, + { + "epoch": 0.45682256086880363, + "grad_norm": 1.1876810789108276, + "learning_rate": 5.93651170326994e-06, + "loss": 0.3236, + "step": 22820 + }, + { + "epoch": 0.456842579385932, + "grad_norm": 1.0961432456970215, + "learning_rate": 5.9361932555873655e-06, + "loss": 0.3474, + "step": 22821 + }, + { + "epoch": 0.45686259790306033, + "grad_norm": 1.129486322402954, + "learning_rate": 5.935874803969237e-06, + "loss": 0.3226, + "step": 22822 + }, + { + "epoch": 0.4568826164201887, + "grad_norm": 1.1265285015106201, + "learning_rate": 5.935556348416892e-06, + "loss": 0.3029, + "step": 22823 + }, + { + "epoch": 0.45690263493731703, + "grad_norm": 0.9035801887512207, + "learning_rate": 5.93523788893167e-06, + "loss": 0.3085, + "step": 22824 + }, + { + "epoch": 0.4569226534544454, + "grad_norm": 1.314958930015564, + "learning_rate": 5.934919425514908e-06, + "loss": 0.3279, + "step": 22825 + }, + { + "epoch": 0.45694267197157373, + "grad_norm": 1.1304807662963867, + "learning_rate": 5.9346009581679465e-06, + "loss": 0.3108, + "step": 22826 + }, + { + "epoch": 0.45696269048870203, + "grad_norm": 1.1600581407546997, + "learning_rate": 5.934282486892124e-06, + "loss": 0.3663, + "step": 22827 + }, + { + "epoch": 0.4569827090058304, + "grad_norm": 1.0586897134780884, + "learning_rate": 5.9339640116887795e-06, + "loss": 0.3685, + "step": 22828 + }, + { + "epoch": 0.45700272752295873, + "grad_norm": 1.2288874387741089, + "learning_rate": 5.93364553255925e-06, + "loss": 0.3518, + "step": 22829 + }, + { + "epoch": 0.4570227460400871, + "grad_norm": 1.0375924110412598, + "learning_rate": 5.9333270495048755e-06, + "loss": 0.2713, + "step": 22830 + }, + { + "epoch": 0.45704276455721543, + "grad_norm": 1.1187342405319214, + "learning_rate": 5.933008562526996e-06, + "loss": 0.2818, + "step": 22831 + }, + { + "epoch": 0.4570627830743438, + "grad_norm": 1.0794731378555298, + "learning_rate": 5.9326900716269484e-06, + "loss": 0.3406, + "step": 22832 + }, + { + "epoch": 0.45708280159147213, + "grad_norm": 1.2033947706222534, + "learning_rate": 5.932371576806074e-06, + "loss": 0.3262, + "step": 22833 + }, + { + "epoch": 0.4571028201086005, + "grad_norm": 1.9454138278961182, + "learning_rate": 5.932053078065709e-06, + "loss": 0.778, + "step": 22834 + }, + { + "epoch": 0.4571228386257288, + "grad_norm": 1.0787876844406128, + "learning_rate": 5.931734575407194e-06, + "loss": 0.2894, + "step": 22835 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 1.1275323629379272, + "learning_rate": 5.931416068831867e-06, + "loss": 0.3413, + "step": 22836 + }, + { + "epoch": 0.4571628756599855, + "grad_norm": 1.0358023643493652, + "learning_rate": 5.931097558341067e-06, + "loss": 0.3225, + "step": 22837 + }, + { + "epoch": 0.45718289417711383, + "grad_norm": 1.0745118856430054, + "learning_rate": 5.9307790439361335e-06, + "loss": 0.3454, + "step": 22838 + }, + { + "epoch": 0.4572029126942422, + "grad_norm": 1.1598048210144043, + "learning_rate": 5.9304605256184044e-06, + "loss": 0.3423, + "step": 22839 + }, + { + "epoch": 0.45722293121137053, + "grad_norm": 1.1365705728530884, + "learning_rate": 5.930142003389221e-06, + "loss": 0.3173, + "step": 22840 + }, + { + "epoch": 0.4572429497284989, + "grad_norm": 1.1687501668930054, + "learning_rate": 5.9298234772499195e-06, + "loss": 0.3234, + "step": 22841 + }, + { + "epoch": 0.45726296824562723, + "grad_norm": 1.1416410207748413, + "learning_rate": 5.92950494720184e-06, + "loss": 0.3416, + "step": 22842 + }, + { + "epoch": 0.45728298676275553, + "grad_norm": 0.9813659191131592, + "learning_rate": 5.929186413246323e-06, + "loss": 0.321, + "step": 22843 + }, + { + "epoch": 0.4573030052798839, + "grad_norm": 1.0414201021194458, + "learning_rate": 5.928867875384706e-06, + "loss": 0.3032, + "step": 22844 + }, + { + "epoch": 0.45732302379701223, + "grad_norm": 1.2292602062225342, + "learning_rate": 5.9285493336183275e-06, + "loss": 0.3293, + "step": 22845 + }, + { + "epoch": 0.4573430423141406, + "grad_norm": 1.9728021621704102, + "learning_rate": 5.928230787948529e-06, + "loss": 0.8637, + "step": 22846 + }, + { + "epoch": 0.45736306083126893, + "grad_norm": 1.1340105533599854, + "learning_rate": 5.927912238376648e-06, + "loss": 0.2989, + "step": 22847 + }, + { + "epoch": 0.4573830793483973, + "grad_norm": 1.0362392663955688, + "learning_rate": 5.927593684904022e-06, + "loss": 0.3254, + "step": 22848 + }, + { + "epoch": 0.45740309786552563, + "grad_norm": 1.0981284379959106, + "learning_rate": 5.927275127531992e-06, + "loss": 0.2975, + "step": 22849 + }, + { + "epoch": 0.457423116382654, + "grad_norm": 1.0564829111099243, + "learning_rate": 5.926956566261897e-06, + "loss": 0.2928, + "step": 22850 + }, + { + "epoch": 0.4574431348997823, + "grad_norm": 1.0689517259597778, + "learning_rate": 5.926638001095077e-06, + "loss": 0.2766, + "step": 22851 + }, + { + "epoch": 0.45746315341691063, + "grad_norm": 1.022931694984436, + "learning_rate": 5.92631943203287e-06, + "loss": 0.2979, + "step": 22852 + }, + { + "epoch": 0.457483171934039, + "grad_norm": 1.0545543432235718, + "learning_rate": 5.926000859076615e-06, + "loss": 0.3092, + "step": 22853 + }, + { + "epoch": 0.45750319045116733, + "grad_norm": 0.9749571681022644, + "learning_rate": 5.925682282227652e-06, + "loss": 0.2812, + "step": 22854 + }, + { + "epoch": 0.4575232089682957, + "grad_norm": 1.0763161182403564, + "learning_rate": 5.925363701487319e-06, + "loss": 0.315, + "step": 22855 + }, + { + "epoch": 0.45754322748542403, + "grad_norm": 1.0659037828445435, + "learning_rate": 5.9250451168569566e-06, + "loss": 0.2831, + "step": 22856 + }, + { + "epoch": 0.4575632460025524, + "grad_norm": 1.0963656902313232, + "learning_rate": 5.924726528337904e-06, + "loss": 0.285, + "step": 22857 + }, + { + "epoch": 0.4575832645196807, + "grad_norm": 1.0737533569335938, + "learning_rate": 5.924407935931501e-06, + "loss": 0.3629, + "step": 22858 + }, + { + "epoch": 0.457603283036809, + "grad_norm": 1.1262156963348389, + "learning_rate": 5.924089339639084e-06, + "loss": 0.2832, + "step": 22859 + }, + { + "epoch": 0.4576233015539374, + "grad_norm": 1.115478754043579, + "learning_rate": 5.923770739461996e-06, + "loss": 0.3358, + "step": 22860 + }, + { + "epoch": 0.45764332007106573, + "grad_norm": 1.1304901838302612, + "learning_rate": 5.923452135401574e-06, + "loss": 0.2845, + "step": 22861 + }, + { + "epoch": 0.4576633385881941, + "grad_norm": 1.9147517681121826, + "learning_rate": 5.923133527459157e-06, + "loss": 0.8085, + "step": 22862 + }, + { + "epoch": 0.45768335710532243, + "grad_norm": 1.1267266273498535, + "learning_rate": 5.922814915636086e-06, + "loss": 0.3106, + "step": 22863 + }, + { + "epoch": 0.4577033756224508, + "grad_norm": 1.335405707359314, + "learning_rate": 5.9224962999336995e-06, + "loss": 0.3522, + "step": 22864 + }, + { + "epoch": 0.45772339413957913, + "grad_norm": 1.110896110534668, + "learning_rate": 5.922177680353338e-06, + "loss": 0.3187, + "step": 22865 + }, + { + "epoch": 0.4577434126567074, + "grad_norm": 1.1697356700897217, + "learning_rate": 5.921859056896339e-06, + "loss": 0.3368, + "step": 22866 + }, + { + "epoch": 0.4577634311738358, + "grad_norm": 1.0836316347122192, + "learning_rate": 5.9215404295640435e-06, + "loss": 0.3661, + "step": 22867 + }, + { + "epoch": 0.45778344969096413, + "grad_norm": 1.3378986120224, + "learning_rate": 5.92122179835779e-06, + "loss": 0.3353, + "step": 22868 + }, + { + "epoch": 0.4578034682080925, + "grad_norm": 1.0949857234954834, + "learning_rate": 5.920903163278918e-06, + "loss": 0.3307, + "step": 22869 + }, + { + "epoch": 0.45782348672522083, + "grad_norm": 1.200736403465271, + "learning_rate": 5.920584524328767e-06, + "loss": 0.3127, + "step": 22870 + }, + { + "epoch": 0.4578435052423492, + "grad_norm": 1.1738368272781372, + "learning_rate": 5.920265881508679e-06, + "loss": 0.3223, + "step": 22871 + }, + { + "epoch": 0.45786352375947753, + "grad_norm": 1.1282565593719482, + "learning_rate": 5.91994723481999e-06, + "loss": 0.3175, + "step": 22872 + }, + { + "epoch": 0.4578835422766059, + "grad_norm": 1.0301200151443481, + "learning_rate": 5.919628584264041e-06, + "loss": 0.3131, + "step": 22873 + }, + { + "epoch": 0.4579035607937342, + "grad_norm": 1.1064175367355347, + "learning_rate": 5.9193099298421694e-06, + "loss": 0.2913, + "step": 22874 + }, + { + "epoch": 0.4579235793108625, + "grad_norm": 1.1842913627624512, + "learning_rate": 5.918991271555719e-06, + "loss": 0.3429, + "step": 22875 + }, + { + "epoch": 0.4579435978279909, + "grad_norm": 1.2345114946365356, + "learning_rate": 5.918672609406025e-06, + "loss": 0.3217, + "step": 22876 + }, + { + "epoch": 0.45796361634511923, + "grad_norm": 1.1504695415496826, + "learning_rate": 5.918353943394431e-06, + "loss": 0.388, + "step": 22877 + }, + { + "epoch": 0.4579836348622476, + "grad_norm": 1.0339994430541992, + "learning_rate": 5.918035273522273e-06, + "loss": 0.2942, + "step": 22878 + }, + { + "epoch": 0.45800365337937593, + "grad_norm": 1.1254867315292358, + "learning_rate": 5.917716599790895e-06, + "loss": 0.306, + "step": 22879 + }, + { + "epoch": 0.4580236718965043, + "grad_norm": 0.9710211157798767, + "learning_rate": 5.917397922201631e-06, + "loss": 0.2927, + "step": 22880 + }, + { + "epoch": 0.45804369041363263, + "grad_norm": 1.094146966934204, + "learning_rate": 5.917079240755824e-06, + "loss": 0.3282, + "step": 22881 + }, + { + "epoch": 0.4580637089307609, + "grad_norm": 1.216884732246399, + "learning_rate": 5.916760555454814e-06, + "loss": 0.3523, + "step": 22882 + }, + { + "epoch": 0.4580837274478893, + "grad_norm": 1.0243550539016724, + "learning_rate": 5.91644186629994e-06, + "loss": 0.263, + "step": 22883 + }, + { + "epoch": 0.45810374596501763, + "grad_norm": 1.0973106622695923, + "learning_rate": 5.916123173292542e-06, + "loss": 0.3101, + "step": 22884 + }, + { + "epoch": 0.458123764482146, + "grad_norm": 1.1058253049850464, + "learning_rate": 5.915804476433959e-06, + "loss": 0.3485, + "step": 22885 + }, + { + "epoch": 0.45814378299927433, + "grad_norm": 1.0597695112228394, + "learning_rate": 5.915485775725531e-06, + "loss": 0.2961, + "step": 22886 + }, + { + "epoch": 0.4581638015164027, + "grad_norm": 1.0845634937286377, + "learning_rate": 5.915167071168597e-06, + "loss": 0.3525, + "step": 22887 + }, + { + "epoch": 0.45818382003353103, + "grad_norm": 1.0280925035476685, + "learning_rate": 5.9148483627644985e-06, + "loss": 0.2904, + "step": 22888 + }, + { + "epoch": 0.4582038385506594, + "grad_norm": 0.9938081502914429, + "learning_rate": 5.914529650514573e-06, + "loss": 0.2787, + "step": 22889 + }, + { + "epoch": 0.4582238570677877, + "grad_norm": 1.1970717906951904, + "learning_rate": 5.914210934420164e-06, + "loss": 0.3332, + "step": 22890 + }, + { + "epoch": 0.458243875584916, + "grad_norm": 1.1658356189727783, + "learning_rate": 5.913892214482608e-06, + "loss": 0.3549, + "step": 22891 + }, + { + "epoch": 0.4582638941020444, + "grad_norm": 1.1315513849258423, + "learning_rate": 5.913573490703245e-06, + "loss": 0.3054, + "step": 22892 + }, + { + "epoch": 0.45828391261917273, + "grad_norm": 1.1362719535827637, + "learning_rate": 5.913254763083416e-06, + "loss": 0.3531, + "step": 22893 + }, + { + "epoch": 0.4583039311363011, + "grad_norm": 1.260912537574768, + "learning_rate": 5.91293603162446e-06, + "loss": 0.3712, + "step": 22894 + }, + { + "epoch": 0.45832394965342943, + "grad_norm": 1.83747398853302, + "learning_rate": 5.912617296327719e-06, + "loss": 0.8115, + "step": 22895 + }, + { + "epoch": 0.4583439681705578, + "grad_norm": 1.1623016595840454, + "learning_rate": 5.91229855719453e-06, + "loss": 0.3512, + "step": 22896 + }, + { + "epoch": 0.45836398668768613, + "grad_norm": 1.145203948020935, + "learning_rate": 5.911979814226235e-06, + "loss": 0.3333, + "step": 22897 + }, + { + "epoch": 0.4583840052048144, + "grad_norm": 1.8901357650756836, + "learning_rate": 5.911661067424173e-06, + "loss": 0.7797, + "step": 22898 + }, + { + "epoch": 0.4584040237219428, + "grad_norm": 1.2896242141723633, + "learning_rate": 5.911342316789683e-06, + "loss": 0.3106, + "step": 22899 + }, + { + "epoch": 0.45842404223907113, + "grad_norm": 1.1411056518554688, + "learning_rate": 5.911023562324107e-06, + "loss": 0.3182, + "step": 22900 + }, + { + "epoch": 0.4584440607561995, + "grad_norm": 1.077102541923523, + "learning_rate": 5.910704804028783e-06, + "loss": 0.3594, + "step": 22901 + }, + { + "epoch": 0.45846407927332783, + "grad_norm": 1.0078123807907104, + "learning_rate": 5.910386041905054e-06, + "loss": 0.295, + "step": 22902 + }, + { + "epoch": 0.4584840977904562, + "grad_norm": 1.1395617723464966, + "learning_rate": 5.910067275954257e-06, + "loss": 0.3428, + "step": 22903 + }, + { + "epoch": 0.45850411630758453, + "grad_norm": 1.135269284248352, + "learning_rate": 5.909748506177733e-06, + "loss": 0.3075, + "step": 22904 + }, + { + "epoch": 0.4585241348247129, + "grad_norm": 1.8387151956558228, + "learning_rate": 5.909429732576821e-06, + "loss": 0.7667, + "step": 22905 + }, + { + "epoch": 0.4585441533418412, + "grad_norm": 0.9810805320739746, + "learning_rate": 5.909110955152862e-06, + "loss": 0.3489, + "step": 22906 + }, + { + "epoch": 0.4585641718589695, + "grad_norm": 1.0853923559188843, + "learning_rate": 5.908792173907197e-06, + "loss": 0.3693, + "step": 22907 + }, + { + "epoch": 0.4585841903760979, + "grad_norm": 1.1325639486312866, + "learning_rate": 5.908473388841166e-06, + "loss": 0.3078, + "step": 22908 + }, + { + "epoch": 0.45860420889322623, + "grad_norm": 1.8790637254714966, + "learning_rate": 5.908154599956108e-06, + "loss": 0.7693, + "step": 22909 + }, + { + "epoch": 0.4586242274103546, + "grad_norm": 1.0882776975631714, + "learning_rate": 5.907835807253364e-06, + "loss": 0.3238, + "step": 22910 + }, + { + "epoch": 0.45864424592748293, + "grad_norm": 1.236618161201477, + "learning_rate": 5.907517010734273e-06, + "loss": 0.3093, + "step": 22911 + }, + { + "epoch": 0.4586642644446113, + "grad_norm": 1.0306051969528198, + "learning_rate": 5.907198210400176e-06, + "loss": 0.2754, + "step": 22912 + }, + { + "epoch": 0.45868428296173963, + "grad_norm": 1.2044936418533325, + "learning_rate": 5.906879406252412e-06, + "loss": 0.2831, + "step": 22913 + }, + { + "epoch": 0.4587043014788679, + "grad_norm": 1.1633284091949463, + "learning_rate": 5.906560598292321e-06, + "loss": 0.3841, + "step": 22914 + }, + { + "epoch": 0.4587243199959963, + "grad_norm": 1.1229227781295776, + "learning_rate": 5.906241786521247e-06, + "loss": 0.2994, + "step": 22915 + }, + { + "epoch": 0.4587443385131246, + "grad_norm": 1.0771050453186035, + "learning_rate": 5.9059229709405265e-06, + "loss": 0.2767, + "step": 22916 + }, + { + "epoch": 0.458764357030253, + "grad_norm": 0.9757498502731323, + "learning_rate": 5.905604151551502e-06, + "loss": 0.3495, + "step": 22917 + }, + { + "epoch": 0.45878437554738133, + "grad_norm": 1.8769729137420654, + "learning_rate": 5.905285328355511e-06, + "loss": 0.7396, + "step": 22918 + }, + { + "epoch": 0.4588043940645097, + "grad_norm": 1.0939602851867676, + "learning_rate": 5.904966501353895e-06, + "loss": 0.2843, + "step": 22919 + }, + { + "epoch": 0.45882441258163803, + "grad_norm": 1.4317394495010376, + "learning_rate": 5.904647670547995e-06, + "loss": 0.3392, + "step": 22920 + }, + { + "epoch": 0.4588444310987664, + "grad_norm": 1.0710476636886597, + "learning_rate": 5.904328835939153e-06, + "loss": 0.3645, + "step": 22921 + }, + { + "epoch": 0.4588644496158947, + "grad_norm": 1.0978734493255615, + "learning_rate": 5.9040099975287056e-06, + "loss": 0.3003, + "step": 22922 + }, + { + "epoch": 0.458884468133023, + "grad_norm": 1.0389434099197388, + "learning_rate": 5.903691155317994e-06, + "loss": 0.3146, + "step": 22923 + }, + { + "epoch": 0.4589044866501514, + "grad_norm": 1.1922870874404907, + "learning_rate": 5.903372309308361e-06, + "loss": 0.302, + "step": 22924 + }, + { + "epoch": 0.45892450516727973, + "grad_norm": 1.0938091278076172, + "learning_rate": 5.903053459501142e-06, + "loss": 0.3104, + "step": 22925 + }, + { + "epoch": 0.4589445236844081, + "grad_norm": 1.1792383193969727, + "learning_rate": 5.902734605897683e-06, + "loss": 0.2799, + "step": 22926 + }, + { + "epoch": 0.45896454220153643, + "grad_norm": 1.044348955154419, + "learning_rate": 5.902415748499321e-06, + "loss": 0.3157, + "step": 22927 + }, + { + "epoch": 0.4589845607186648, + "grad_norm": 1.1690800189971924, + "learning_rate": 5.902096887307399e-06, + "loss": 0.3346, + "step": 22928 + }, + { + "epoch": 0.45900457923579313, + "grad_norm": 1.105432152748108, + "learning_rate": 5.901778022323255e-06, + "loss": 0.2945, + "step": 22929 + }, + { + "epoch": 0.4590245977529214, + "grad_norm": 1.1685923337936401, + "learning_rate": 5.90145915354823e-06, + "loss": 0.3546, + "step": 22930 + }, + { + "epoch": 0.4590446162700498, + "grad_norm": 1.1370657682418823, + "learning_rate": 5.901140280983666e-06, + "loss": 0.3597, + "step": 22931 + }, + { + "epoch": 0.4590646347871781, + "grad_norm": 1.050961971282959, + "learning_rate": 5.9008214046308986e-06, + "loss": 0.278, + "step": 22932 + }, + { + "epoch": 0.4590846533043065, + "grad_norm": 1.9928112030029297, + "learning_rate": 5.900502524491275e-06, + "loss": 0.7825, + "step": 22933 + }, + { + "epoch": 0.45910467182143483, + "grad_norm": 1.0056605339050293, + "learning_rate": 5.900183640566132e-06, + "loss": 0.2584, + "step": 22934 + }, + { + "epoch": 0.4591246903385632, + "grad_norm": 1.005127191543579, + "learning_rate": 5.899864752856811e-06, + "loss": 0.3227, + "step": 22935 + }, + { + "epoch": 0.45914470885569153, + "grad_norm": 1.1173601150512695, + "learning_rate": 5.899545861364651e-06, + "loss": 0.3738, + "step": 22936 + }, + { + "epoch": 0.4591647273728199, + "grad_norm": 1.0137072801589966, + "learning_rate": 5.899226966090996e-06, + "loss": 0.2949, + "step": 22937 + }, + { + "epoch": 0.4591847458899482, + "grad_norm": 1.0863709449768066, + "learning_rate": 5.898908067037182e-06, + "loss": 0.341, + "step": 22938 + }, + { + "epoch": 0.4592047644070765, + "grad_norm": 1.0737919807434082, + "learning_rate": 5.898589164204552e-06, + "loss": 0.2999, + "step": 22939 + }, + { + "epoch": 0.4592247829242049, + "grad_norm": 1.1349902153015137, + "learning_rate": 5.898270257594448e-06, + "loss": 0.3145, + "step": 22940 + }, + { + "epoch": 0.45924480144133323, + "grad_norm": 1.014173150062561, + "learning_rate": 5.897951347208208e-06, + "loss": 0.2835, + "step": 22941 + }, + { + "epoch": 0.4592648199584616, + "grad_norm": 1.1097495555877686, + "learning_rate": 5.897632433047176e-06, + "loss": 0.2852, + "step": 22942 + }, + { + "epoch": 0.45928483847558993, + "grad_norm": 1.1121598482131958, + "learning_rate": 5.897313515112688e-06, + "loss": 0.2998, + "step": 22943 + }, + { + "epoch": 0.4593048569927183, + "grad_norm": 0.9814164042472839, + "learning_rate": 5.896994593406088e-06, + "loss": 0.3049, + "step": 22944 + }, + { + "epoch": 0.45932487550984663, + "grad_norm": 0.9643862843513489, + "learning_rate": 5.896675667928714e-06, + "loss": 0.2774, + "step": 22945 + }, + { + "epoch": 0.4593448940269749, + "grad_norm": 2.060751438140869, + "learning_rate": 5.896356738681911e-06, + "loss": 0.7793, + "step": 22946 + }, + { + "epoch": 0.4593649125441033, + "grad_norm": 1.0979969501495361, + "learning_rate": 5.896037805667016e-06, + "loss": 0.3383, + "step": 22947 + }, + { + "epoch": 0.4593849310612316, + "grad_norm": 1.083665370941162, + "learning_rate": 5.895718868885372e-06, + "loss": 0.3117, + "step": 22948 + }, + { + "epoch": 0.45940494957836, + "grad_norm": 1.1848700046539307, + "learning_rate": 5.895399928338317e-06, + "loss": 0.3404, + "step": 22949 + }, + { + "epoch": 0.45942496809548833, + "grad_norm": 1.0730066299438477, + "learning_rate": 5.895080984027195e-06, + "loss": 0.3158, + "step": 22950 + }, + { + "epoch": 0.4594449866126167, + "grad_norm": 1.0597440004348755, + "learning_rate": 5.894762035953343e-06, + "loss": 0.2999, + "step": 22951 + }, + { + "epoch": 0.45946500512974503, + "grad_norm": 1.2403706312179565, + "learning_rate": 5.894443084118106e-06, + "loss": 0.3603, + "step": 22952 + }, + { + "epoch": 0.4594850236468734, + "grad_norm": 1.2553004026412964, + "learning_rate": 5.894124128522822e-06, + "loss": 0.3446, + "step": 22953 + }, + { + "epoch": 0.4595050421640017, + "grad_norm": 1.1688153743743896, + "learning_rate": 5.893805169168832e-06, + "loss": 0.2877, + "step": 22954 + }, + { + "epoch": 0.45952506068113, + "grad_norm": 1.0277149677276611, + "learning_rate": 5.893486206057479e-06, + "loss": 0.2903, + "step": 22955 + }, + { + "epoch": 0.4595450791982584, + "grad_norm": 1.087256908416748, + "learning_rate": 5.8931672391901005e-06, + "loss": 0.2999, + "step": 22956 + }, + { + "epoch": 0.45956509771538673, + "grad_norm": 1.8045436143875122, + "learning_rate": 5.892848268568041e-06, + "loss": 0.8016, + "step": 22957 + }, + { + "epoch": 0.4595851162325151, + "grad_norm": 1.7559188604354858, + "learning_rate": 5.892529294192637e-06, + "loss": 0.8201, + "step": 22958 + }, + { + "epoch": 0.45960513474964343, + "grad_norm": 1.0521246194839478, + "learning_rate": 5.892210316065233e-06, + "loss": 0.2898, + "step": 22959 + }, + { + "epoch": 0.4596251532667718, + "grad_norm": 1.0664774179458618, + "learning_rate": 5.89189133418717e-06, + "loss": 0.3187, + "step": 22960 + }, + { + "epoch": 0.45964517178390013, + "grad_norm": 1.3342654705047607, + "learning_rate": 5.891572348559788e-06, + "loss": 0.3481, + "step": 22961 + }, + { + "epoch": 0.4596651903010284, + "grad_norm": 1.2880048751831055, + "learning_rate": 5.891253359184426e-06, + "loss": 0.3136, + "step": 22962 + }, + { + "epoch": 0.4596852088181568, + "grad_norm": 1.283217191696167, + "learning_rate": 5.890934366062427e-06, + "loss": 0.2695, + "step": 22963 + }, + { + "epoch": 0.4597052273352851, + "grad_norm": 1.0180944204330444, + "learning_rate": 5.890615369195132e-06, + "loss": 0.3141, + "step": 22964 + }, + { + "epoch": 0.4597252458524135, + "grad_norm": 1.0569210052490234, + "learning_rate": 5.890296368583881e-06, + "loss": 0.3324, + "step": 22965 + }, + { + "epoch": 0.45974526436954183, + "grad_norm": 1.0909379720687866, + "learning_rate": 5.889977364230018e-06, + "loss": 0.2963, + "step": 22966 + }, + { + "epoch": 0.4597652828866702, + "grad_norm": 1.1182557344436646, + "learning_rate": 5.889658356134879e-06, + "loss": 0.3617, + "step": 22967 + }, + { + "epoch": 0.45978530140379853, + "grad_norm": 1.1560174226760864, + "learning_rate": 5.889339344299809e-06, + "loss": 0.3319, + "step": 22968 + }, + { + "epoch": 0.4598053199209269, + "grad_norm": 1.091347098350525, + "learning_rate": 5.889020328726147e-06, + "loss": 0.3153, + "step": 22969 + }, + { + "epoch": 0.4598253384380552, + "grad_norm": 1.1300289630889893, + "learning_rate": 5.888701309415237e-06, + "loss": 0.2907, + "step": 22970 + }, + { + "epoch": 0.4598453569551835, + "grad_norm": 1.1615442037582397, + "learning_rate": 5.888382286368413e-06, + "loss": 0.2863, + "step": 22971 + }, + { + "epoch": 0.4598653754723119, + "grad_norm": 1.0234631299972534, + "learning_rate": 5.888063259587025e-06, + "loss": 0.3248, + "step": 22972 + }, + { + "epoch": 0.4598853939894402, + "grad_norm": 1.251186490058899, + "learning_rate": 5.887744229072412e-06, + "loss": 0.273, + "step": 22973 + }, + { + "epoch": 0.4599054125065686, + "grad_norm": 1.137479543685913, + "learning_rate": 5.88742519482591e-06, + "loss": 0.31, + "step": 22974 + }, + { + "epoch": 0.45992543102369693, + "grad_norm": 1.3135617971420288, + "learning_rate": 5.887106156848865e-06, + "loss": 0.3081, + "step": 22975 + }, + { + "epoch": 0.4599454495408253, + "grad_norm": 1.1683173179626465, + "learning_rate": 5.886787115142615e-06, + "loss": 0.2869, + "step": 22976 + }, + { + "epoch": 0.45996546805795363, + "grad_norm": 1.8555535078048706, + "learning_rate": 5.8864680697085045e-06, + "loss": 0.6994, + "step": 22977 + }, + { + "epoch": 0.4599854865750819, + "grad_norm": 1.127151608467102, + "learning_rate": 5.886149020547871e-06, + "loss": 0.2853, + "step": 22978 + }, + { + "epoch": 0.4600055050922103, + "grad_norm": 1.0776311159133911, + "learning_rate": 5.88582996766206e-06, + "loss": 0.3146, + "step": 22979 + }, + { + "epoch": 0.4600255236093386, + "grad_norm": 1.1049951314926147, + "learning_rate": 5.8855109110524086e-06, + "loss": 0.3179, + "step": 22980 + }, + { + "epoch": 0.460045542126467, + "grad_norm": 1.2312767505645752, + "learning_rate": 5.885191850720261e-06, + "loss": 0.33, + "step": 22981 + }, + { + "epoch": 0.46006556064359533, + "grad_norm": 1.1638362407684326, + "learning_rate": 5.884872786666957e-06, + "loss": 0.3195, + "step": 22982 + }, + { + "epoch": 0.4600855791607237, + "grad_norm": 1.8836654424667358, + "learning_rate": 5.884553718893839e-06, + "loss": 0.802, + "step": 22983 + }, + { + "epoch": 0.46010559767785203, + "grad_norm": 1.135654091835022, + "learning_rate": 5.884234647402246e-06, + "loss": 0.3402, + "step": 22984 + }, + { + "epoch": 0.4601256161949804, + "grad_norm": 1.1618331670761108, + "learning_rate": 5.8839155721935215e-06, + "loss": 0.3405, + "step": 22985 + }, + { + "epoch": 0.4601456347121087, + "grad_norm": 1.028633713722229, + "learning_rate": 5.883596493269007e-06, + "loss": 0.3238, + "step": 22986 + }, + { + "epoch": 0.460165653229237, + "grad_norm": 1.194174885749817, + "learning_rate": 5.883277410630042e-06, + "loss": 0.3457, + "step": 22987 + }, + { + "epoch": 0.4601856717463654, + "grad_norm": 1.204182744026184, + "learning_rate": 5.882958324277969e-06, + "loss": 0.3353, + "step": 22988 + }, + { + "epoch": 0.4602056902634937, + "grad_norm": 1.3900493383407593, + "learning_rate": 5.882639234214129e-06, + "loss": 0.346, + "step": 22989 + }, + { + "epoch": 0.4602257087806221, + "grad_norm": 1.312715768814087, + "learning_rate": 5.882320140439864e-06, + "loss": 0.2771, + "step": 22990 + }, + { + "epoch": 0.46024572729775043, + "grad_norm": 1.0017015933990479, + "learning_rate": 5.8820010429565144e-06, + "loss": 0.3103, + "step": 22991 + }, + { + "epoch": 0.4602657458148788, + "grad_norm": 1.0640827417373657, + "learning_rate": 5.881681941765423e-06, + "loss": 0.3001, + "step": 22992 + }, + { + "epoch": 0.46028576433200713, + "grad_norm": 1.0095617771148682, + "learning_rate": 5.88136283686793e-06, + "loss": 0.2887, + "step": 22993 + }, + { + "epoch": 0.4603057828491354, + "grad_norm": 1.1302114725112915, + "learning_rate": 5.881043728265379e-06, + "loss": 0.3066, + "step": 22994 + }, + { + "epoch": 0.4603258013662638, + "grad_norm": 1.0418205261230469, + "learning_rate": 5.880724615959109e-06, + "loss": 0.2942, + "step": 22995 + }, + { + "epoch": 0.4603458198833921, + "grad_norm": 0.9955132603645325, + "learning_rate": 5.880405499950461e-06, + "loss": 0.2537, + "step": 22996 + }, + { + "epoch": 0.4603658384005205, + "grad_norm": 1.1620577573776245, + "learning_rate": 5.880086380240778e-06, + "loss": 0.3163, + "step": 22997 + }, + { + "epoch": 0.46038585691764883, + "grad_norm": 1.090300440788269, + "learning_rate": 5.879767256831402e-06, + "loss": 0.3359, + "step": 22998 + }, + { + "epoch": 0.4604058754347772, + "grad_norm": 0.9532424211502075, + "learning_rate": 5.879448129723672e-06, + "loss": 0.2946, + "step": 22999 + }, + { + "epoch": 0.46042589395190553, + "grad_norm": 1.0415880680084229, + "learning_rate": 5.879128998918933e-06, + "loss": 0.3197, + "step": 23000 + }, + { + "epoch": 0.4604459124690339, + "grad_norm": 1.1425141096115112, + "learning_rate": 5.8788098644185255e-06, + "loss": 0.2962, + "step": 23001 + }, + { + "epoch": 0.4604659309861622, + "grad_norm": 1.0893526077270508, + "learning_rate": 5.878490726223789e-06, + "loss": 0.3023, + "step": 23002 + }, + { + "epoch": 0.4604859495032905, + "grad_norm": 1.150582194328308, + "learning_rate": 5.878171584336066e-06, + "loss": 0.2962, + "step": 23003 + }, + { + "epoch": 0.4605059680204189, + "grad_norm": 1.1353968381881714, + "learning_rate": 5.877852438756702e-06, + "loss": 0.3062, + "step": 23004 + }, + { + "epoch": 0.4605259865375472, + "grad_norm": 1.0590095520019531, + "learning_rate": 5.877533289487032e-06, + "loss": 0.3156, + "step": 23005 + }, + { + "epoch": 0.4605460050546756, + "grad_norm": 1.2397342920303345, + "learning_rate": 5.877214136528402e-06, + "loss": 0.3559, + "step": 23006 + }, + { + "epoch": 0.46056602357180393, + "grad_norm": 1.0898200273513794, + "learning_rate": 5.876894979882152e-06, + "loss": 0.3478, + "step": 23007 + }, + { + "epoch": 0.4605860420889323, + "grad_norm": 1.0920456647872925, + "learning_rate": 5.8765758195496255e-06, + "loss": 0.2747, + "step": 23008 + }, + { + "epoch": 0.46060606060606063, + "grad_norm": 1.9561294317245483, + "learning_rate": 5.876256655532161e-06, + "loss": 0.8249, + "step": 23009 + }, + { + "epoch": 0.4606260791231889, + "grad_norm": 1.129278302192688, + "learning_rate": 5.875937487831103e-06, + "loss": 0.3002, + "step": 23010 + }, + { + "epoch": 0.4606460976403173, + "grad_norm": 1.1716560125350952, + "learning_rate": 5.875618316447792e-06, + "loss": 0.3448, + "step": 23011 + }, + { + "epoch": 0.4606661161574456, + "grad_norm": 1.0567868947982788, + "learning_rate": 5.875299141383572e-06, + "loss": 0.2745, + "step": 23012 + }, + { + "epoch": 0.460686134674574, + "grad_norm": 1.9916467666625977, + "learning_rate": 5.87497996263978e-06, + "loss": 0.7668, + "step": 23013 + }, + { + "epoch": 0.46070615319170233, + "grad_norm": 1.3208131790161133, + "learning_rate": 5.874660780217762e-06, + "loss": 0.336, + "step": 23014 + }, + { + "epoch": 0.4607261717088307, + "grad_norm": 1.1533737182617188, + "learning_rate": 5.874341594118858e-06, + "loss": 0.2819, + "step": 23015 + }, + { + "epoch": 0.46074619022595903, + "grad_norm": 1.314598560333252, + "learning_rate": 5.874022404344409e-06, + "loss": 0.3769, + "step": 23016 + }, + { + "epoch": 0.4607662087430874, + "grad_norm": 1.052823781967163, + "learning_rate": 5.87370321089576e-06, + "loss": 0.3532, + "step": 23017 + }, + { + "epoch": 0.4607862272602157, + "grad_norm": 1.0980390310287476, + "learning_rate": 5.873384013774251e-06, + "loss": 0.2835, + "step": 23018 + }, + { + "epoch": 0.460806245777344, + "grad_norm": 1.2768620252609253, + "learning_rate": 5.873064812981222e-06, + "loss": 0.3194, + "step": 23019 + }, + { + "epoch": 0.4608262642944724, + "grad_norm": 1.0538372993469238, + "learning_rate": 5.872745608518016e-06, + "loss": 0.2663, + "step": 23020 + }, + { + "epoch": 0.4608462828116007, + "grad_norm": 1.1033521890640259, + "learning_rate": 5.872426400385977e-06, + "loss": 0.3501, + "step": 23021 + }, + { + "epoch": 0.4608663013287291, + "grad_norm": 1.0898948907852173, + "learning_rate": 5.8721071885864436e-06, + "loss": 0.298, + "step": 23022 + }, + { + "epoch": 0.46088631984585743, + "grad_norm": 1.077329397201538, + "learning_rate": 5.8717879731207595e-06, + "loss": 0.3383, + "step": 23023 + }, + { + "epoch": 0.4609063383629858, + "grad_norm": 1.1866847276687622, + "learning_rate": 5.871468753990266e-06, + "loss": 0.3407, + "step": 23024 + }, + { + "epoch": 0.46092635688011413, + "grad_norm": 1.7250033617019653, + "learning_rate": 5.871149531196307e-06, + "loss": 0.7672, + "step": 23025 + }, + { + "epoch": 0.4609463753972424, + "grad_norm": 1.013595461845398, + "learning_rate": 5.8708303047402225e-06, + "loss": 0.3008, + "step": 23026 + }, + { + "epoch": 0.4609663939143708, + "grad_norm": 1.8846336603164673, + "learning_rate": 5.870511074623354e-06, + "loss": 0.7463, + "step": 23027 + }, + { + "epoch": 0.4609864124314991, + "grad_norm": 1.038698673248291, + "learning_rate": 5.870191840847044e-06, + "loss": 0.3421, + "step": 23028 + }, + { + "epoch": 0.4610064309486275, + "grad_norm": 1.0533870458602905, + "learning_rate": 5.869872603412635e-06, + "loss": 0.3263, + "step": 23029 + }, + { + "epoch": 0.4610264494657558, + "grad_norm": 1.0769939422607422, + "learning_rate": 5.86955336232147e-06, + "loss": 0.3101, + "step": 23030 + }, + { + "epoch": 0.4610464679828842, + "grad_norm": 1.1063989400863647, + "learning_rate": 5.869234117574888e-06, + "loss": 0.2984, + "step": 23031 + }, + { + "epoch": 0.46106648650001253, + "grad_norm": 1.0601086616516113, + "learning_rate": 5.868914869174235e-06, + "loss": 0.3043, + "step": 23032 + }, + { + "epoch": 0.4610865050171409, + "grad_norm": 1.074410319328308, + "learning_rate": 5.868595617120849e-06, + "loss": 0.3256, + "step": 23033 + }, + { + "epoch": 0.4611065235342692, + "grad_norm": 1.0879085063934326, + "learning_rate": 5.868276361416076e-06, + "loss": 0.3669, + "step": 23034 + }, + { + "epoch": 0.4611265420513975, + "grad_norm": 1.926761269569397, + "learning_rate": 5.867957102061253e-06, + "loss": 0.7226, + "step": 23035 + }, + { + "epoch": 0.4611465605685259, + "grad_norm": 1.0973855257034302, + "learning_rate": 5.867637839057727e-06, + "loss": 0.3267, + "step": 23036 + }, + { + "epoch": 0.4611665790856542, + "grad_norm": 2.024087905883789, + "learning_rate": 5.867318572406838e-06, + "loss": 0.8148, + "step": 23037 + }, + { + "epoch": 0.4611865976027826, + "grad_norm": 0.9708749055862427, + "learning_rate": 5.866999302109928e-06, + "loss": 0.2927, + "step": 23038 + }, + { + "epoch": 0.46120661611991093, + "grad_norm": 1.822818636894226, + "learning_rate": 5.866680028168341e-06, + "loss": 0.7846, + "step": 23039 + }, + { + "epoch": 0.4612266346370393, + "grad_norm": 1.1705315113067627, + "learning_rate": 5.8663607505834155e-06, + "loss": 0.301, + "step": 23040 + }, + { + "epoch": 0.46124665315416763, + "grad_norm": 1.119063377380371, + "learning_rate": 5.866041469356497e-06, + "loss": 0.3164, + "step": 23041 + }, + { + "epoch": 0.4612666716712959, + "grad_norm": 1.056674838066101, + "learning_rate": 5.865722184488926e-06, + "loss": 0.3231, + "step": 23042 + }, + { + "epoch": 0.4612866901884243, + "grad_norm": 1.068874716758728, + "learning_rate": 5.865402895982046e-06, + "loss": 0.2834, + "step": 23043 + }, + { + "epoch": 0.4613067087055526, + "grad_norm": 1.1120628118515015, + "learning_rate": 5.865083603837197e-06, + "loss": 0.3132, + "step": 23044 + }, + { + "epoch": 0.461326727222681, + "grad_norm": 1.1569736003875732, + "learning_rate": 5.8647643080557245e-06, + "loss": 0.3333, + "step": 23045 + }, + { + "epoch": 0.4613467457398093, + "grad_norm": 1.8809806108474731, + "learning_rate": 5.864445008638969e-06, + "loss": 0.8146, + "step": 23046 + }, + { + "epoch": 0.4613667642569377, + "grad_norm": 1.0225694179534912, + "learning_rate": 5.864125705588272e-06, + "loss": 0.3012, + "step": 23047 + }, + { + "epoch": 0.46138678277406603, + "grad_norm": 1.0755951404571533, + "learning_rate": 5.863806398904976e-06, + "loss": 0.2833, + "step": 23048 + }, + { + "epoch": 0.4614068012911944, + "grad_norm": 1.1740089654922485, + "learning_rate": 5.863487088590424e-06, + "loss": 0.3913, + "step": 23049 + }, + { + "epoch": 0.4614268198083227, + "grad_norm": 1.0839130878448486, + "learning_rate": 5.8631677746459594e-06, + "loss": 0.2461, + "step": 23050 + }, + { + "epoch": 0.461446838325451, + "grad_norm": 1.1758838891983032, + "learning_rate": 5.862848457072922e-06, + "loss": 0.3484, + "step": 23051 + }, + { + "epoch": 0.4614668568425794, + "grad_norm": 1.0852382183074951, + "learning_rate": 5.862529135872655e-06, + "loss": 0.312, + "step": 23052 + }, + { + "epoch": 0.4614868753597077, + "grad_norm": 1.2254438400268555, + "learning_rate": 5.862209811046502e-06, + "loss": 0.3486, + "step": 23053 + }, + { + "epoch": 0.4615068938768361, + "grad_norm": 1.3423259258270264, + "learning_rate": 5.8618904825958035e-06, + "loss": 0.3077, + "step": 23054 + }, + { + "epoch": 0.46152691239396443, + "grad_norm": 1.088239312171936, + "learning_rate": 5.861571150521903e-06, + "loss": 0.312, + "step": 23055 + }, + { + "epoch": 0.4615469309110928, + "grad_norm": 1.0467147827148438, + "learning_rate": 5.861251814826144e-06, + "loss": 0.2964, + "step": 23056 + }, + { + "epoch": 0.46156694942822113, + "grad_norm": 1.0849623680114746, + "learning_rate": 5.860932475509868e-06, + "loss": 0.2877, + "step": 23057 + }, + { + "epoch": 0.4615869679453494, + "grad_norm": 1.7850275039672852, + "learning_rate": 5.860613132574416e-06, + "loss": 0.7671, + "step": 23058 + }, + { + "epoch": 0.4616069864624778, + "grad_norm": 1.1915161609649658, + "learning_rate": 5.860293786021133e-06, + "loss": 0.3012, + "step": 23059 + }, + { + "epoch": 0.4616270049796061, + "grad_norm": 1.1505318880081177, + "learning_rate": 5.859974435851357e-06, + "loss": 0.3602, + "step": 23060 + }, + { + "epoch": 0.4616470234967345, + "grad_norm": 1.1938470602035522, + "learning_rate": 5.859655082066436e-06, + "loss": 0.3232, + "step": 23061 + }, + { + "epoch": 0.4616670420138628, + "grad_norm": 1.0806070566177368, + "learning_rate": 5.85933572466771e-06, + "loss": 0.3069, + "step": 23062 + }, + { + "epoch": 0.4616870605309912, + "grad_norm": 1.0512547492980957, + "learning_rate": 5.859016363656521e-06, + "loss": 0.3463, + "step": 23063 + }, + { + "epoch": 0.46170707904811953, + "grad_norm": 1.1671792268753052, + "learning_rate": 5.858696999034211e-06, + "loss": 0.3028, + "step": 23064 + }, + { + "epoch": 0.4617270975652479, + "grad_norm": 1.1290123462677002, + "learning_rate": 5.858377630802126e-06, + "loss": 0.3012, + "step": 23065 + }, + { + "epoch": 0.4617471160823762, + "grad_norm": 1.1411577463150024, + "learning_rate": 5.858058258961604e-06, + "loss": 0.3347, + "step": 23066 + }, + { + "epoch": 0.4617671345995045, + "grad_norm": 1.0540180206298828, + "learning_rate": 5.85773888351399e-06, + "loss": 0.304, + "step": 23067 + }, + { + "epoch": 0.4617871531166329, + "grad_norm": 1.230334997177124, + "learning_rate": 5.857419504460629e-06, + "loss": 0.326, + "step": 23068 + }, + { + "epoch": 0.4618071716337612, + "grad_norm": 1.31092369556427, + "learning_rate": 5.857100121802858e-06, + "loss": 0.3026, + "step": 23069 + }, + { + "epoch": 0.4618271901508896, + "grad_norm": 1.089390754699707, + "learning_rate": 5.856780735542024e-06, + "loss": 0.2867, + "step": 23070 + }, + { + "epoch": 0.46184720866801793, + "grad_norm": 1.2409586906433105, + "learning_rate": 5.856461345679467e-06, + "loss": 0.3616, + "step": 23071 + }, + { + "epoch": 0.4618672271851463, + "grad_norm": 1.1093449592590332, + "learning_rate": 5.8561419522165315e-06, + "loss": 0.3045, + "step": 23072 + }, + { + "epoch": 0.46188724570227463, + "grad_norm": 1.1667368412017822, + "learning_rate": 5.855822555154559e-06, + "loss": 0.3859, + "step": 23073 + }, + { + "epoch": 0.4619072642194029, + "grad_norm": 1.2768687009811401, + "learning_rate": 5.855503154494893e-06, + "loss": 0.3165, + "step": 23074 + }, + { + "epoch": 0.4619272827365313, + "grad_norm": 1.0966286659240723, + "learning_rate": 5.855183750238875e-06, + "loss": 0.3148, + "step": 23075 + }, + { + "epoch": 0.4619473012536596, + "grad_norm": 1.050368309020996, + "learning_rate": 5.8548643423878505e-06, + "loss": 0.3041, + "step": 23076 + }, + { + "epoch": 0.461967319770788, + "grad_norm": 1.2055699825286865, + "learning_rate": 5.85454493094316e-06, + "loss": 0.3024, + "step": 23077 + }, + { + "epoch": 0.4619873382879163, + "grad_norm": 1.0837671756744385, + "learning_rate": 5.854225515906144e-06, + "loss": 0.2461, + "step": 23078 + }, + { + "epoch": 0.4620073568050447, + "grad_norm": 1.1713542938232422, + "learning_rate": 5.85390609727815e-06, + "loss": 0.3069, + "step": 23079 + }, + { + "epoch": 0.46202737532217303, + "grad_norm": 1.153093695640564, + "learning_rate": 5.853586675060517e-06, + "loss": 0.3337, + "step": 23080 + }, + { + "epoch": 0.4620473938393014, + "grad_norm": 1.1177589893341064, + "learning_rate": 5.8532672492545915e-06, + "loss": 0.3082, + "step": 23081 + }, + { + "epoch": 0.4620674123564297, + "grad_norm": 1.8513171672821045, + "learning_rate": 5.8529478198617114e-06, + "loss": 0.8075, + "step": 23082 + }, + { + "epoch": 0.462087430873558, + "grad_norm": 1.5781114101409912, + "learning_rate": 5.852628386883226e-06, + "loss": 0.2674, + "step": 23083 + }, + { + "epoch": 0.4621074493906864, + "grad_norm": 1.0684667825698853, + "learning_rate": 5.852308950320471e-06, + "loss": 0.3654, + "step": 23084 + }, + { + "epoch": 0.4621274679078147, + "grad_norm": 1.257771372795105, + "learning_rate": 5.851989510174794e-06, + "loss": 0.3346, + "step": 23085 + }, + { + "epoch": 0.4621474864249431, + "grad_norm": 1.1037105321884155, + "learning_rate": 5.851670066447536e-06, + "loss": 0.3456, + "step": 23086 + }, + { + "epoch": 0.4621675049420714, + "grad_norm": 1.099034309387207, + "learning_rate": 5.851350619140039e-06, + "loss": 0.3165, + "step": 23087 + }, + { + "epoch": 0.4621875234591998, + "grad_norm": 1.2245962619781494, + "learning_rate": 5.85103116825365e-06, + "loss": 0.3395, + "step": 23088 + }, + { + "epoch": 0.46220754197632813, + "grad_norm": 1.0736714601516724, + "learning_rate": 5.850711713789708e-06, + "loss": 0.3098, + "step": 23089 + }, + { + "epoch": 0.4622275604934564, + "grad_norm": 1.8480676412582397, + "learning_rate": 5.8503922557495576e-06, + "loss": 0.8073, + "step": 23090 + }, + { + "epoch": 0.4622475790105848, + "grad_norm": 1.2720059156417847, + "learning_rate": 5.850072794134539e-06, + "loss": 0.2718, + "step": 23091 + }, + { + "epoch": 0.4622675975277131, + "grad_norm": 1.0661371946334839, + "learning_rate": 5.849753328945998e-06, + "loss": 0.2712, + "step": 23092 + }, + { + "epoch": 0.4622876160448415, + "grad_norm": 1.2460613250732422, + "learning_rate": 5.849433860185277e-06, + "loss": 0.3432, + "step": 23093 + }, + { + "epoch": 0.4623076345619698, + "grad_norm": 1.043837070465088, + "learning_rate": 5.8491143878537205e-06, + "loss": 0.2915, + "step": 23094 + }, + { + "epoch": 0.4623276530790982, + "grad_norm": 1.2228227853775024, + "learning_rate": 5.848794911952669e-06, + "loss": 0.3081, + "step": 23095 + }, + { + "epoch": 0.46234767159622653, + "grad_norm": 1.1961464881896973, + "learning_rate": 5.848475432483466e-06, + "loss": 0.3626, + "step": 23096 + }, + { + "epoch": 0.4623676901133549, + "grad_norm": 1.0258227586746216, + "learning_rate": 5.848155949447455e-06, + "loss": 0.334, + "step": 23097 + }, + { + "epoch": 0.4623877086304832, + "grad_norm": 1.0906232595443726, + "learning_rate": 5.84783646284598e-06, + "loss": 0.3068, + "step": 23098 + }, + { + "epoch": 0.4624077271476115, + "grad_norm": 1.2048770189285278, + "learning_rate": 5.8475169726803806e-06, + "loss": 0.3609, + "step": 23099 + }, + { + "epoch": 0.4624277456647399, + "grad_norm": 1.2473766803741455, + "learning_rate": 5.847197478952003e-06, + "loss": 0.3384, + "step": 23100 + }, + { + "epoch": 0.4624477641818682, + "grad_norm": 1.1794573068618774, + "learning_rate": 5.846877981662191e-06, + "loss": 0.3347, + "step": 23101 + }, + { + "epoch": 0.4624677826989966, + "grad_norm": 1.0256125926971436, + "learning_rate": 5.8465584808122845e-06, + "loss": 0.2922, + "step": 23102 + }, + { + "epoch": 0.4624878012161249, + "grad_norm": 1.0384852886199951, + "learning_rate": 5.84623897640363e-06, + "loss": 0.3112, + "step": 23103 + }, + { + "epoch": 0.4625078197332533, + "grad_norm": 1.1664663553237915, + "learning_rate": 5.845919468437567e-06, + "loss": 0.2969, + "step": 23104 + }, + { + "epoch": 0.46252783825038163, + "grad_norm": 0.9728915691375732, + "learning_rate": 5.845599956915442e-06, + "loss": 0.2632, + "step": 23105 + }, + { + "epoch": 0.4625478567675099, + "grad_norm": 1.0187541246414185, + "learning_rate": 5.845280441838596e-06, + "loss": 0.3129, + "step": 23106 + }, + { + "epoch": 0.4625678752846383, + "grad_norm": 1.0693432092666626, + "learning_rate": 5.844960923208374e-06, + "loss": 0.278, + "step": 23107 + }, + { + "epoch": 0.4625878938017666, + "grad_norm": 1.086168646812439, + "learning_rate": 5.844641401026116e-06, + "loss": 0.2668, + "step": 23108 + }, + { + "epoch": 0.462607912318895, + "grad_norm": 1.179306149482727, + "learning_rate": 5.84432187529317e-06, + "loss": 0.3407, + "step": 23109 + }, + { + "epoch": 0.4626279308360233, + "grad_norm": 1.1442291736602783, + "learning_rate": 5.844002346010875e-06, + "loss": 0.3293, + "step": 23110 + }, + { + "epoch": 0.4626479493531517, + "grad_norm": 1.7575682401657104, + "learning_rate": 5.8436828131805754e-06, + "loss": 0.7754, + "step": 23111 + }, + { + "epoch": 0.46266796787028003, + "grad_norm": 1.3556525707244873, + "learning_rate": 5.843363276803615e-06, + "loss": 0.3704, + "step": 23112 + }, + { + "epoch": 0.4626879863874084, + "grad_norm": 1.0349029302597046, + "learning_rate": 5.843043736881338e-06, + "loss": 0.3066, + "step": 23113 + }, + { + "epoch": 0.4627080049045367, + "grad_norm": 1.0323147773742676, + "learning_rate": 5.842724193415086e-06, + "loss": 0.2703, + "step": 23114 + }, + { + "epoch": 0.462728023421665, + "grad_norm": 1.2360823154449463, + "learning_rate": 5.8424046464062026e-06, + "loss": 0.3225, + "step": 23115 + }, + { + "epoch": 0.4627480419387934, + "grad_norm": 1.0743837356567383, + "learning_rate": 5.842085095856031e-06, + "loss": 0.2993, + "step": 23116 + }, + { + "epoch": 0.4627680604559217, + "grad_norm": 1.8771942853927612, + "learning_rate": 5.841765541765916e-06, + "loss": 0.7511, + "step": 23117 + }, + { + "epoch": 0.4627880789730501, + "grad_norm": 1.1479634046554565, + "learning_rate": 5.841445984137199e-06, + "loss": 0.3105, + "step": 23118 + }, + { + "epoch": 0.4628080974901784, + "grad_norm": 1.227831482887268, + "learning_rate": 5.841126422971225e-06, + "loss": 0.298, + "step": 23119 + }, + { + "epoch": 0.4628281160073068, + "grad_norm": 1.1307215690612793, + "learning_rate": 5.840806858269335e-06, + "loss": 0.3608, + "step": 23120 + }, + { + "epoch": 0.46284813452443513, + "grad_norm": 1.108432650566101, + "learning_rate": 5.8404872900328756e-06, + "loss": 0.3378, + "step": 23121 + }, + { + "epoch": 0.4628681530415634, + "grad_norm": 1.2036638259887695, + "learning_rate": 5.840167718263188e-06, + "loss": 0.3508, + "step": 23122 + }, + { + "epoch": 0.4628881715586918, + "grad_norm": 0.9539669156074524, + "learning_rate": 5.839848142961616e-06, + "loss": 0.2823, + "step": 23123 + }, + { + "epoch": 0.4629081900758201, + "grad_norm": 1.1681954860687256, + "learning_rate": 5.839528564129502e-06, + "loss": 0.2834, + "step": 23124 + }, + { + "epoch": 0.4629282085929485, + "grad_norm": 1.079404354095459, + "learning_rate": 5.8392089817681905e-06, + "loss": 0.3137, + "step": 23125 + }, + { + "epoch": 0.4629482271100768, + "grad_norm": 1.2223401069641113, + "learning_rate": 5.838889395879026e-06, + "loss": 0.315, + "step": 23126 + }, + { + "epoch": 0.4629682456272052, + "grad_norm": 0.9539557695388794, + "learning_rate": 5.838569806463352e-06, + "loss": 0.3143, + "step": 23127 + }, + { + "epoch": 0.46298826414433353, + "grad_norm": 1.1860971450805664, + "learning_rate": 5.838250213522509e-06, + "loss": 0.32, + "step": 23128 + }, + { + "epoch": 0.4630082826614619, + "grad_norm": 1.0701290369033813, + "learning_rate": 5.837930617057846e-06, + "loss": 0.289, + "step": 23129 + }, + { + "epoch": 0.4630283011785902, + "grad_norm": 1.8819077014923096, + "learning_rate": 5.8376110170707004e-06, + "loss": 0.8385, + "step": 23130 + }, + { + "epoch": 0.4630483196957185, + "grad_norm": 1.1532630920410156, + "learning_rate": 5.837291413562417e-06, + "loss": 0.2762, + "step": 23131 + }, + { + "epoch": 0.4630683382128469, + "grad_norm": 1.0613371133804321, + "learning_rate": 5.836971806534343e-06, + "loss": 0.3275, + "step": 23132 + }, + { + "epoch": 0.4630883567299752, + "grad_norm": 1.869654655456543, + "learning_rate": 5.836652195987817e-06, + "loss": 0.8647, + "step": 23133 + }, + { + "epoch": 0.4631083752471036, + "grad_norm": 1.8062866926193237, + "learning_rate": 5.836332581924189e-06, + "loss": 0.743, + "step": 23134 + }, + { + "epoch": 0.4631283937642319, + "grad_norm": 1.0243480205535889, + "learning_rate": 5.836012964344796e-06, + "loss": 0.3419, + "step": 23135 + }, + { + "epoch": 0.4631484122813603, + "grad_norm": 1.049241065979004, + "learning_rate": 5.835693343250985e-06, + "loss": 0.2653, + "step": 23136 + }, + { + "epoch": 0.46316843079848863, + "grad_norm": 1.11899995803833, + "learning_rate": 5.8353737186441e-06, + "loss": 0.3379, + "step": 23137 + }, + { + "epoch": 0.4631884493156169, + "grad_norm": 1.2024542093276978, + "learning_rate": 5.835054090525483e-06, + "loss": 0.4138, + "step": 23138 + }, + { + "epoch": 0.4632084678327453, + "grad_norm": 1.1181379556655884, + "learning_rate": 5.834734458896477e-06, + "loss": 0.2934, + "step": 23139 + }, + { + "epoch": 0.4632284863498736, + "grad_norm": 2.0160107612609863, + "learning_rate": 5.8344148237584296e-06, + "loss": 0.8837, + "step": 23140 + }, + { + "epoch": 0.463248504867002, + "grad_norm": 1.0641120672225952, + "learning_rate": 5.83409518511268e-06, + "loss": 0.2678, + "step": 23141 + }, + { + "epoch": 0.4632685233841303, + "grad_norm": 1.0757664442062378, + "learning_rate": 5.833775542960575e-06, + "loss": 0.2716, + "step": 23142 + }, + { + "epoch": 0.4632885419012587, + "grad_norm": 1.060333490371704, + "learning_rate": 5.833455897303456e-06, + "loss": 0.3087, + "step": 23143 + }, + { + "epoch": 0.463308560418387, + "grad_norm": 1.235695481300354, + "learning_rate": 5.833136248142667e-06, + "loss": 0.3407, + "step": 23144 + }, + { + "epoch": 0.4633285789355154, + "grad_norm": 1.1917095184326172, + "learning_rate": 5.832816595479555e-06, + "loss": 0.3355, + "step": 23145 + }, + { + "epoch": 0.4633485974526437, + "grad_norm": 1.0541139841079712, + "learning_rate": 5.8324969393154584e-06, + "loss": 0.3314, + "step": 23146 + }, + { + "epoch": 0.463368615969772, + "grad_norm": 1.01425039768219, + "learning_rate": 5.832177279651726e-06, + "loss": 0.2999, + "step": 23147 + }, + { + "epoch": 0.4633886344869004, + "grad_norm": 1.1022281646728516, + "learning_rate": 5.831857616489698e-06, + "loss": 0.3148, + "step": 23148 + }, + { + "epoch": 0.4634086530040287, + "grad_norm": 1.8134068250656128, + "learning_rate": 5.831537949830721e-06, + "loss": 0.795, + "step": 23149 + }, + { + "epoch": 0.4634286715211571, + "grad_norm": 1.1152565479278564, + "learning_rate": 5.8312182796761365e-06, + "loss": 0.2629, + "step": 23150 + }, + { + "epoch": 0.4634486900382854, + "grad_norm": 1.1223453283309937, + "learning_rate": 5.830898606027289e-06, + "loss": 0.2848, + "step": 23151 + }, + { + "epoch": 0.4634687085554138, + "grad_norm": 1.0778170824050903, + "learning_rate": 5.830578928885524e-06, + "loss": 0.3469, + "step": 23152 + }, + { + "epoch": 0.46348872707254213, + "grad_norm": 1.0832537412643433, + "learning_rate": 5.8302592482521815e-06, + "loss": 0.3502, + "step": 23153 + }, + { + "epoch": 0.4635087455896704, + "grad_norm": 0.9293630123138428, + "learning_rate": 5.829939564128611e-06, + "loss": 0.2997, + "step": 23154 + }, + { + "epoch": 0.4635287641067988, + "grad_norm": 1.0618724822998047, + "learning_rate": 5.82961987651615e-06, + "loss": 0.294, + "step": 23155 + }, + { + "epoch": 0.4635487826239271, + "grad_norm": 1.1651012897491455, + "learning_rate": 5.829300185416148e-06, + "loss": 0.3142, + "step": 23156 + }, + { + "epoch": 0.4635688011410555, + "grad_norm": 1.0726678371429443, + "learning_rate": 5.828980490829944e-06, + "loss": 0.3205, + "step": 23157 + }, + { + "epoch": 0.4635888196581838, + "grad_norm": 1.0893278121948242, + "learning_rate": 5.828660792758887e-06, + "loss": 0.3007, + "step": 23158 + }, + { + "epoch": 0.4636088381753122, + "grad_norm": 1.1683317422866821, + "learning_rate": 5.828341091204316e-06, + "loss": 0.2929, + "step": 23159 + }, + { + "epoch": 0.4636288566924405, + "grad_norm": 1.2266738414764404, + "learning_rate": 5.82802138616758e-06, + "loss": 0.3222, + "step": 23160 + }, + { + "epoch": 0.4636488752095689, + "grad_norm": 1.0652801990509033, + "learning_rate": 5.827701677650018e-06, + "loss": 0.3544, + "step": 23161 + }, + { + "epoch": 0.4636688937266972, + "grad_norm": 1.0476861000061035, + "learning_rate": 5.827381965652978e-06, + "loss": 0.2918, + "step": 23162 + }, + { + "epoch": 0.4636889122438255, + "grad_norm": 1.0855698585510254, + "learning_rate": 5.8270622501777996e-06, + "loss": 0.3103, + "step": 23163 + }, + { + "epoch": 0.4637089307609539, + "grad_norm": 1.2168549299240112, + "learning_rate": 5.826742531225831e-06, + "loss": 0.2794, + "step": 23164 + }, + { + "epoch": 0.4637289492780822, + "grad_norm": 1.1738466024398804, + "learning_rate": 5.826422808798415e-06, + "loss": 0.3022, + "step": 23165 + }, + { + "epoch": 0.4637489677952106, + "grad_norm": 1.759250283241272, + "learning_rate": 5.826103082896896e-06, + "loss": 0.8537, + "step": 23166 + }, + { + "epoch": 0.4637689863123389, + "grad_norm": 1.332129716873169, + "learning_rate": 5.825783353522617e-06, + "loss": 0.2758, + "step": 23167 + }, + { + "epoch": 0.4637890048294673, + "grad_norm": 1.0778168439865112, + "learning_rate": 5.825463620676922e-06, + "loss": 0.2887, + "step": 23168 + }, + { + "epoch": 0.46380902334659563, + "grad_norm": 1.9290452003479004, + "learning_rate": 5.825143884361156e-06, + "loss": 0.8049, + "step": 23169 + }, + { + "epoch": 0.4638290418637239, + "grad_norm": 1.0096715688705444, + "learning_rate": 5.824824144576661e-06, + "loss": 0.2827, + "step": 23170 + }, + { + "epoch": 0.4638490603808523, + "grad_norm": 1.0816943645477295, + "learning_rate": 5.8245044013247855e-06, + "loss": 0.3323, + "step": 23171 + }, + { + "epoch": 0.4638690788979806, + "grad_norm": 1.18955659866333, + "learning_rate": 5.82418465460687e-06, + "loss": 0.3156, + "step": 23172 + }, + { + "epoch": 0.463889097415109, + "grad_norm": 1.086134910583496, + "learning_rate": 5.823864904424259e-06, + "loss": 0.3092, + "step": 23173 + }, + { + "epoch": 0.4639091159322373, + "grad_norm": 1.093558430671692, + "learning_rate": 5.823545150778298e-06, + "loss": 0.3005, + "step": 23174 + }, + { + "epoch": 0.4639291344493657, + "grad_norm": 1.0959243774414062, + "learning_rate": 5.82322539367033e-06, + "loss": 0.3042, + "step": 23175 + }, + { + "epoch": 0.463949152966494, + "grad_norm": 1.0612670183181763, + "learning_rate": 5.822905633101698e-06, + "loss": 0.3116, + "step": 23176 + }, + { + "epoch": 0.4639691714836224, + "grad_norm": 1.9729231595993042, + "learning_rate": 5.822585869073749e-06, + "loss": 0.7278, + "step": 23177 + }, + { + "epoch": 0.4639891900007507, + "grad_norm": 0.9973174929618835, + "learning_rate": 5.822266101587828e-06, + "loss": 0.2722, + "step": 23178 + }, + { + "epoch": 0.464009208517879, + "grad_norm": 1.1333574056625366, + "learning_rate": 5.821946330645275e-06, + "loss": 0.2948, + "step": 23179 + }, + { + "epoch": 0.4640292270350074, + "grad_norm": 1.0137344598770142, + "learning_rate": 5.8216265562474375e-06, + "loss": 0.2886, + "step": 23180 + }, + { + "epoch": 0.4640492455521357, + "grad_norm": 1.0646547079086304, + "learning_rate": 5.821306778395658e-06, + "loss": 0.3082, + "step": 23181 + }, + { + "epoch": 0.4640692640692641, + "grad_norm": 1.3682535886764526, + "learning_rate": 5.820986997091282e-06, + "loss": 0.3634, + "step": 23182 + }, + { + "epoch": 0.4640892825863924, + "grad_norm": 1.1955159902572632, + "learning_rate": 5.820667212335653e-06, + "loss": 0.3284, + "step": 23183 + }, + { + "epoch": 0.4641093011035208, + "grad_norm": 1.3163175582885742, + "learning_rate": 5.820347424130116e-06, + "loss": 0.3313, + "step": 23184 + }, + { + "epoch": 0.46412931962064913, + "grad_norm": 1.9489376544952393, + "learning_rate": 5.820027632476015e-06, + "loss": 0.7668, + "step": 23185 + }, + { + "epoch": 0.4641493381377774, + "grad_norm": 1.1361169815063477, + "learning_rate": 5.819707837374694e-06, + "loss": 0.338, + "step": 23186 + }, + { + "epoch": 0.4641693566549058, + "grad_norm": 1.178424596786499, + "learning_rate": 5.819388038827499e-06, + "loss": 0.3492, + "step": 23187 + }, + { + "epoch": 0.4641893751720341, + "grad_norm": 1.2110270261764526, + "learning_rate": 5.819068236835771e-06, + "loss": 0.3813, + "step": 23188 + }, + { + "epoch": 0.4642093936891625, + "grad_norm": 1.07796049118042, + "learning_rate": 5.8187484314008565e-06, + "loss": 0.3253, + "step": 23189 + }, + { + "epoch": 0.4642294122062908, + "grad_norm": 1.7324986457824707, + "learning_rate": 5.818428622524101e-06, + "loss": 0.77, + "step": 23190 + }, + { + "epoch": 0.4642494307234192, + "grad_norm": 1.084601640701294, + "learning_rate": 5.818108810206849e-06, + "loss": 0.304, + "step": 23191 + }, + { + "epoch": 0.4642694492405475, + "grad_norm": 1.06199049949646, + "learning_rate": 5.817788994450442e-06, + "loss": 0.3226, + "step": 23192 + }, + { + "epoch": 0.4642894677576759, + "grad_norm": 1.1431463956832886, + "learning_rate": 5.817469175256225e-06, + "loss": 0.3036, + "step": 23193 + }, + { + "epoch": 0.4643094862748042, + "grad_norm": 1.2503753900527954, + "learning_rate": 5.817149352625545e-06, + "loss": 0.296, + "step": 23194 + }, + { + "epoch": 0.4643295047919325, + "grad_norm": 1.1527382135391235, + "learning_rate": 5.8168295265597445e-06, + "loss": 0.3049, + "step": 23195 + }, + { + "epoch": 0.4643495233090609, + "grad_norm": 1.0569225549697876, + "learning_rate": 5.816509697060167e-06, + "loss": 0.3084, + "step": 23196 + }, + { + "epoch": 0.4643695418261892, + "grad_norm": 1.0554474592208862, + "learning_rate": 5.81618986412816e-06, + "loss": 0.3081, + "step": 23197 + }, + { + "epoch": 0.4643895603433176, + "grad_norm": 1.115736722946167, + "learning_rate": 5.815870027765067e-06, + "loss": 0.3739, + "step": 23198 + }, + { + "epoch": 0.4644095788604459, + "grad_norm": 1.2769289016723633, + "learning_rate": 5.815550187972231e-06, + "loss": 0.3123, + "step": 23199 + }, + { + "epoch": 0.4644295973775743, + "grad_norm": 1.0336288213729858, + "learning_rate": 5.815230344750999e-06, + "loss": 0.316, + "step": 23200 + }, + { + "epoch": 0.4644496158947026, + "grad_norm": 2.0135154724121094, + "learning_rate": 5.814910498102712e-06, + "loss": 0.806, + "step": 23201 + }, + { + "epoch": 0.4644696344118309, + "grad_norm": 1.0059912204742432, + "learning_rate": 5.814590648028717e-06, + "loss": 0.3174, + "step": 23202 + }, + { + "epoch": 0.4644896529289593, + "grad_norm": 1.208020567893982, + "learning_rate": 5.814270794530359e-06, + "loss": 0.3065, + "step": 23203 + }, + { + "epoch": 0.4645096714460876, + "grad_norm": 1.0575084686279297, + "learning_rate": 5.813950937608981e-06, + "loss": 0.3166, + "step": 23204 + }, + { + "epoch": 0.464529689963216, + "grad_norm": 1.1615673303604126, + "learning_rate": 5.81363107726593e-06, + "loss": 0.349, + "step": 23205 + }, + { + "epoch": 0.4645497084803443, + "grad_norm": 1.137973427772522, + "learning_rate": 5.813311213502547e-06, + "loss": 0.3021, + "step": 23206 + }, + { + "epoch": 0.4645697269974727, + "grad_norm": 1.1055545806884766, + "learning_rate": 5.81299134632018e-06, + "loss": 0.3429, + "step": 23207 + }, + { + "epoch": 0.464589745514601, + "grad_norm": 1.0934950113296509, + "learning_rate": 5.812671475720173e-06, + "loss": 0.3527, + "step": 23208 + }, + { + "epoch": 0.4646097640317293, + "grad_norm": 1.9703480005264282, + "learning_rate": 5.812351601703868e-06, + "loss": 0.815, + "step": 23209 + }, + { + "epoch": 0.4646297825488577, + "grad_norm": 1.0744690895080566, + "learning_rate": 5.812031724272612e-06, + "loss": 0.3045, + "step": 23210 + }, + { + "epoch": 0.464649801065986, + "grad_norm": 1.2488117218017578, + "learning_rate": 5.811711843427751e-06, + "loss": 0.3656, + "step": 23211 + }, + { + "epoch": 0.4646698195831144, + "grad_norm": 1.0793336629867554, + "learning_rate": 5.811391959170627e-06, + "loss": 0.3187, + "step": 23212 + }, + { + "epoch": 0.4646898381002427, + "grad_norm": 1.1495797634124756, + "learning_rate": 5.811072071502587e-06, + "loss": 0.3304, + "step": 23213 + }, + { + "epoch": 0.4647098566173711, + "grad_norm": 1.0121606588363647, + "learning_rate": 5.810752180424973e-06, + "loss": 0.2519, + "step": 23214 + }, + { + "epoch": 0.4647298751344994, + "grad_norm": 1.903461217880249, + "learning_rate": 5.810432285939131e-06, + "loss": 0.8741, + "step": 23215 + }, + { + "epoch": 0.4647498936516278, + "grad_norm": 1.092805027961731, + "learning_rate": 5.810112388046408e-06, + "loss": 0.3173, + "step": 23216 + }, + { + "epoch": 0.46476991216875607, + "grad_norm": 0.9360358119010925, + "learning_rate": 5.809792486748145e-06, + "loss": 0.2487, + "step": 23217 + }, + { + "epoch": 0.4647899306858844, + "grad_norm": 0.9727925062179565, + "learning_rate": 5.80947258204569e-06, + "loss": 0.2769, + "step": 23218 + }, + { + "epoch": 0.4648099492030128, + "grad_norm": 1.129318356513977, + "learning_rate": 5.809152673940386e-06, + "loss": 0.2889, + "step": 23219 + }, + { + "epoch": 0.4648299677201411, + "grad_norm": 1.1992342472076416, + "learning_rate": 5.808832762433579e-06, + "loss": 0.3012, + "step": 23220 + }, + { + "epoch": 0.4648499862372695, + "grad_norm": 1.0504894256591797, + "learning_rate": 5.8085128475266115e-06, + "loss": 0.3188, + "step": 23221 + }, + { + "epoch": 0.4648700047543978, + "grad_norm": 1.223958134651184, + "learning_rate": 5.808192929220831e-06, + "loss": 0.3204, + "step": 23222 + }, + { + "epoch": 0.4648900232715262, + "grad_norm": 1.1373586654663086, + "learning_rate": 5.807873007517582e-06, + "loss": 0.3399, + "step": 23223 + }, + { + "epoch": 0.4649100417886545, + "grad_norm": 1.1472971439361572, + "learning_rate": 5.807553082418207e-06, + "loss": 0.3323, + "step": 23224 + }, + { + "epoch": 0.4649300603057828, + "grad_norm": 1.9515013694763184, + "learning_rate": 5.807233153924055e-06, + "loss": 0.7692, + "step": 23225 + }, + { + "epoch": 0.4649500788229112, + "grad_norm": 1.1682476997375488, + "learning_rate": 5.806913222036466e-06, + "loss": 0.3076, + "step": 23226 + }, + { + "epoch": 0.4649700973400395, + "grad_norm": 1.1687999963760376, + "learning_rate": 5.8065932867567875e-06, + "loss": 0.3034, + "step": 23227 + }, + { + "epoch": 0.4649901158571679, + "grad_norm": 1.1053684949874878, + "learning_rate": 5.806273348086365e-06, + "loss": 0.2625, + "step": 23228 + }, + { + "epoch": 0.4650101343742962, + "grad_norm": 1.2967876195907593, + "learning_rate": 5.805953406026544e-06, + "loss": 0.3524, + "step": 23229 + }, + { + "epoch": 0.4650301528914246, + "grad_norm": 1.2484025955200195, + "learning_rate": 5.805633460578668e-06, + "loss": 0.2956, + "step": 23230 + }, + { + "epoch": 0.4650501714085529, + "grad_norm": 1.1159782409667969, + "learning_rate": 5.805313511744082e-06, + "loss": 0.3058, + "step": 23231 + }, + { + "epoch": 0.4650701899256813, + "grad_norm": 1.0664515495300293, + "learning_rate": 5.804993559524132e-06, + "loss": 0.2857, + "step": 23232 + }, + { + "epoch": 0.46509020844280957, + "grad_norm": 1.1980230808258057, + "learning_rate": 5.804673603920161e-06, + "loss": 0.3432, + "step": 23233 + }, + { + "epoch": 0.4651102269599379, + "grad_norm": 1.2616254091262817, + "learning_rate": 5.804353644933516e-06, + "loss": 0.3198, + "step": 23234 + }, + { + "epoch": 0.4651302454770663, + "grad_norm": 0.9730897545814514, + "learning_rate": 5.804033682565541e-06, + "loss": 0.2804, + "step": 23235 + }, + { + "epoch": 0.4651502639941946, + "grad_norm": 1.0157407522201538, + "learning_rate": 5.803713716817582e-06, + "loss": 0.3122, + "step": 23236 + }, + { + "epoch": 0.465170282511323, + "grad_norm": 1.217918038368225, + "learning_rate": 5.803393747690983e-06, + "loss": 0.2961, + "step": 23237 + }, + { + "epoch": 0.4651903010284513, + "grad_norm": 1.0305184125900269, + "learning_rate": 5.80307377518709e-06, + "loss": 0.3302, + "step": 23238 + }, + { + "epoch": 0.4652103195455797, + "grad_norm": 1.1112526655197144, + "learning_rate": 5.802753799307247e-06, + "loss": 0.3475, + "step": 23239 + }, + { + "epoch": 0.465230338062708, + "grad_norm": 1.0617189407348633, + "learning_rate": 5.8024338200528e-06, + "loss": 0.2998, + "step": 23240 + }, + { + "epoch": 0.4652503565798363, + "grad_norm": 1.1990845203399658, + "learning_rate": 5.802113837425094e-06, + "loss": 0.3456, + "step": 23241 + }, + { + "epoch": 0.4652703750969647, + "grad_norm": 1.073839545249939, + "learning_rate": 5.801793851425475e-06, + "loss": 0.3278, + "step": 23242 + }, + { + "epoch": 0.465290393614093, + "grad_norm": 1.274543285369873, + "learning_rate": 5.8014738620552855e-06, + "loss": 0.3482, + "step": 23243 + }, + { + "epoch": 0.4653104121312214, + "grad_norm": 1.6779171228408813, + "learning_rate": 5.8011538693158746e-06, + "loss": 0.8285, + "step": 23244 + }, + { + "epoch": 0.4653304306483497, + "grad_norm": 1.1176719665527344, + "learning_rate": 5.800833873208584e-06, + "loss": 0.3458, + "step": 23245 + }, + { + "epoch": 0.4653504491654781, + "grad_norm": 1.1720653772354126, + "learning_rate": 5.80051387373476e-06, + "loss": 0.3074, + "step": 23246 + }, + { + "epoch": 0.4653704676826064, + "grad_norm": 1.2392797470092773, + "learning_rate": 5.8001938708957475e-06, + "loss": 0.2905, + "step": 23247 + }, + { + "epoch": 0.4653904861997348, + "grad_norm": 1.1908208131790161, + "learning_rate": 5.799873864692892e-06, + "loss": 0.3063, + "step": 23248 + }, + { + "epoch": 0.46541050471686307, + "grad_norm": 1.1903480291366577, + "learning_rate": 5.79955385512754e-06, + "loss": 0.3531, + "step": 23249 + }, + { + "epoch": 0.4654305232339914, + "grad_norm": 1.1470208168029785, + "learning_rate": 5.799233842201035e-06, + "loss": 0.3327, + "step": 23250 + }, + { + "epoch": 0.4654505417511198, + "grad_norm": 1.0073041915893555, + "learning_rate": 5.798913825914724e-06, + "loss": 0.3097, + "step": 23251 + }, + { + "epoch": 0.4654705602682481, + "grad_norm": 1.2474708557128906, + "learning_rate": 5.79859380626995e-06, + "loss": 0.3333, + "step": 23252 + }, + { + "epoch": 0.4654905787853765, + "grad_norm": 1.1306711435317993, + "learning_rate": 5.798273783268059e-06, + "loss": 0.2863, + "step": 23253 + }, + { + "epoch": 0.4655105973025048, + "grad_norm": 1.067553162574768, + "learning_rate": 5.7979537569103975e-06, + "loss": 0.3155, + "step": 23254 + }, + { + "epoch": 0.4655306158196332, + "grad_norm": 1.1329556703567505, + "learning_rate": 5.797633727198311e-06, + "loss": 0.3178, + "step": 23255 + }, + { + "epoch": 0.4655506343367615, + "grad_norm": 1.990044116973877, + "learning_rate": 5.797313694133143e-06, + "loss": 0.7756, + "step": 23256 + }, + { + "epoch": 0.4655706528538898, + "grad_norm": 1.2071723937988281, + "learning_rate": 5.79699365771624e-06, + "loss": 0.2967, + "step": 23257 + }, + { + "epoch": 0.46559067137101817, + "grad_norm": 1.0495038032531738, + "learning_rate": 5.796673617948948e-06, + "loss": 0.3164, + "step": 23258 + }, + { + "epoch": 0.4656106898881465, + "grad_norm": 1.1143367290496826, + "learning_rate": 5.79635357483261e-06, + "loss": 0.3143, + "step": 23259 + }, + { + "epoch": 0.4656307084052749, + "grad_norm": 1.1927155256271362, + "learning_rate": 5.796033528368574e-06, + "loss": 0.346, + "step": 23260 + }, + { + "epoch": 0.4656507269224032, + "grad_norm": 1.1191715002059937, + "learning_rate": 5.795713478558184e-06, + "loss": 0.3752, + "step": 23261 + }, + { + "epoch": 0.4656707454395316, + "grad_norm": 1.132498860359192, + "learning_rate": 5.795393425402785e-06, + "loss": 0.3277, + "step": 23262 + }, + { + "epoch": 0.4656907639566599, + "grad_norm": 1.0163987874984741, + "learning_rate": 5.795073368903724e-06, + "loss": 0.2828, + "step": 23263 + }, + { + "epoch": 0.4657107824737883, + "grad_norm": 1.0970464944839478, + "learning_rate": 5.794753309062347e-06, + "loss": 0.3038, + "step": 23264 + }, + { + "epoch": 0.46573080099091657, + "grad_norm": 1.1110669374465942, + "learning_rate": 5.794433245879995e-06, + "loss": 0.296, + "step": 23265 + }, + { + "epoch": 0.4657508195080449, + "grad_norm": 1.0222432613372803, + "learning_rate": 5.7941131793580194e-06, + "loss": 0.2997, + "step": 23266 + }, + { + "epoch": 0.4657708380251733, + "grad_norm": 1.084036946296692, + "learning_rate": 5.793793109497763e-06, + "loss": 0.3399, + "step": 23267 + }, + { + "epoch": 0.4657908565423016, + "grad_norm": 1.376235008239746, + "learning_rate": 5.79347303630057e-06, + "loss": 0.2739, + "step": 23268 + }, + { + "epoch": 0.46581087505943, + "grad_norm": 1.095918893814087, + "learning_rate": 5.793152959767787e-06, + "loss": 0.3918, + "step": 23269 + }, + { + "epoch": 0.4658308935765583, + "grad_norm": 1.194604516029358, + "learning_rate": 5.79283287990076e-06, + "loss": 0.294, + "step": 23270 + }, + { + "epoch": 0.4658509120936867, + "grad_norm": 1.062029242515564, + "learning_rate": 5.7925127967008345e-06, + "loss": 0.3427, + "step": 23271 + }, + { + "epoch": 0.465870930610815, + "grad_norm": 1.1803772449493408, + "learning_rate": 5.792192710169354e-06, + "loss": 0.3423, + "step": 23272 + }, + { + "epoch": 0.4658909491279433, + "grad_norm": 1.1260775327682495, + "learning_rate": 5.791872620307667e-06, + "loss": 0.2883, + "step": 23273 + }, + { + "epoch": 0.46591096764507167, + "grad_norm": 1.1257327795028687, + "learning_rate": 5.791552527117118e-06, + "loss": 0.288, + "step": 23274 + }, + { + "epoch": 0.4659309861622, + "grad_norm": 1.086037039756775, + "learning_rate": 5.791232430599053e-06, + "loss": 0.2969, + "step": 23275 + }, + { + "epoch": 0.4659510046793284, + "grad_norm": 1.141181230545044, + "learning_rate": 5.790912330754818e-06, + "loss": 0.3305, + "step": 23276 + }, + { + "epoch": 0.4659710231964567, + "grad_norm": 1.2322242259979248, + "learning_rate": 5.790592227585756e-06, + "loss": 0.3476, + "step": 23277 + }, + { + "epoch": 0.4659910417135851, + "grad_norm": 1.0837523937225342, + "learning_rate": 5.7902721210932145e-06, + "loss": 0.3077, + "step": 23278 + }, + { + "epoch": 0.4660110602307134, + "grad_norm": 1.898347020149231, + "learning_rate": 5.789952011278539e-06, + "loss": 0.8562, + "step": 23279 + }, + { + "epoch": 0.4660310787478418, + "grad_norm": 1.245439052581787, + "learning_rate": 5.7896318981430765e-06, + "loss": 0.31, + "step": 23280 + }, + { + "epoch": 0.46605109726497007, + "grad_norm": 1.1780014038085938, + "learning_rate": 5.7893117816881694e-06, + "loss": 0.3736, + "step": 23281 + }, + { + "epoch": 0.4660711157820984, + "grad_norm": 1.308722972869873, + "learning_rate": 5.7889916619151675e-06, + "loss": 0.3915, + "step": 23282 + }, + { + "epoch": 0.4660911342992268, + "grad_norm": 1.0174115896224976, + "learning_rate": 5.788671538825413e-06, + "loss": 0.3014, + "step": 23283 + }, + { + "epoch": 0.4661111528163551, + "grad_norm": 1.0225462913513184, + "learning_rate": 5.788351412420254e-06, + "loss": 0.2792, + "step": 23284 + }, + { + "epoch": 0.4661311713334835, + "grad_norm": 1.1023180484771729, + "learning_rate": 5.788031282701035e-06, + "loss": 0.307, + "step": 23285 + }, + { + "epoch": 0.4661511898506118, + "grad_norm": 1.1195775270462036, + "learning_rate": 5.787711149669102e-06, + "loss": 0.3202, + "step": 23286 + }, + { + "epoch": 0.4661712083677402, + "grad_norm": 1.160038948059082, + "learning_rate": 5.787391013325801e-06, + "loss": 0.2837, + "step": 23287 + }, + { + "epoch": 0.4661912268848685, + "grad_norm": 1.7478866577148438, + "learning_rate": 5.787070873672478e-06, + "loss": 0.7449, + "step": 23288 + }, + { + "epoch": 0.4662112454019968, + "grad_norm": 1.1597511768341064, + "learning_rate": 5.7867507307104785e-06, + "loss": 0.3291, + "step": 23289 + }, + { + "epoch": 0.46623126391912517, + "grad_norm": 1.138319730758667, + "learning_rate": 5.786430584441148e-06, + "loss": 0.3182, + "step": 23290 + }, + { + "epoch": 0.4662512824362535, + "grad_norm": 1.1428643465042114, + "learning_rate": 5.786110434865831e-06, + "loss": 0.3315, + "step": 23291 + }, + { + "epoch": 0.4662713009533819, + "grad_norm": 1.1276074647903442, + "learning_rate": 5.7857902819858756e-06, + "loss": 0.2936, + "step": 23292 + }, + { + "epoch": 0.4662913194705102, + "grad_norm": 1.102318525314331, + "learning_rate": 5.785470125802629e-06, + "loss": 0.3204, + "step": 23293 + }, + { + "epoch": 0.4663113379876386, + "grad_norm": 1.0376046895980835, + "learning_rate": 5.7851499663174325e-06, + "loss": 0.292, + "step": 23294 + }, + { + "epoch": 0.4663313565047669, + "grad_norm": 1.3296804428100586, + "learning_rate": 5.784829803531637e-06, + "loss": 0.323, + "step": 23295 + }, + { + "epoch": 0.4663513750218953, + "grad_norm": 1.0844969749450684, + "learning_rate": 5.784509637446585e-06, + "loss": 0.3084, + "step": 23296 + }, + { + "epoch": 0.46637139353902357, + "grad_norm": 1.1400827169418335, + "learning_rate": 5.784189468063622e-06, + "loss": 0.2925, + "step": 23297 + }, + { + "epoch": 0.4663914120561519, + "grad_norm": 0.9884691834449768, + "learning_rate": 5.7838692953840955e-06, + "loss": 0.3069, + "step": 23298 + }, + { + "epoch": 0.4664114305732803, + "grad_norm": 1.0290757417678833, + "learning_rate": 5.783549119409352e-06, + "loss": 0.3307, + "step": 23299 + }, + { + "epoch": 0.4664314490904086, + "grad_norm": 1.0454730987548828, + "learning_rate": 5.783228940140737e-06, + "loss": 0.3348, + "step": 23300 + }, + { + "epoch": 0.466451467607537, + "grad_norm": 1.351020097732544, + "learning_rate": 5.782908757579595e-06, + "loss": 0.3178, + "step": 23301 + }, + { + "epoch": 0.4664714861246653, + "grad_norm": 1.2180315256118774, + "learning_rate": 5.782588571727275e-06, + "loss": 0.3107, + "step": 23302 + }, + { + "epoch": 0.4664915046417937, + "grad_norm": 1.2610023021697998, + "learning_rate": 5.782268382585118e-06, + "loss": 0.3481, + "step": 23303 + }, + { + "epoch": 0.466511523158922, + "grad_norm": 1.1563297510147095, + "learning_rate": 5.781948190154474e-06, + "loss": 0.2841, + "step": 23304 + }, + { + "epoch": 0.4665315416760503, + "grad_norm": 1.0404795408248901, + "learning_rate": 5.781627994436688e-06, + "loss": 0.2965, + "step": 23305 + }, + { + "epoch": 0.46655156019317867, + "grad_norm": 1.7754713296890259, + "learning_rate": 5.781307795433109e-06, + "loss": 0.769, + "step": 23306 + }, + { + "epoch": 0.466571578710307, + "grad_norm": 1.3199408054351807, + "learning_rate": 5.780987593145078e-06, + "loss": 0.293, + "step": 23307 + }, + { + "epoch": 0.4665915972274354, + "grad_norm": 1.1017417907714844, + "learning_rate": 5.780667387573943e-06, + "loss": 0.2775, + "step": 23308 + }, + { + "epoch": 0.4666116157445637, + "grad_norm": 1.1025842428207397, + "learning_rate": 5.7803471787210505e-06, + "loss": 0.3309, + "step": 23309 + }, + { + "epoch": 0.4666316342616921, + "grad_norm": 1.0615911483764648, + "learning_rate": 5.780026966587746e-06, + "loss": 0.3266, + "step": 23310 + }, + { + "epoch": 0.4666516527788204, + "grad_norm": 1.1345475912094116, + "learning_rate": 5.779706751175376e-06, + "loss": 0.3139, + "step": 23311 + }, + { + "epoch": 0.4666716712959488, + "grad_norm": 1.0459991693496704, + "learning_rate": 5.779386532485287e-06, + "loss": 0.2921, + "step": 23312 + }, + { + "epoch": 0.46669168981307707, + "grad_norm": 1.1884255409240723, + "learning_rate": 5.779066310518825e-06, + "loss": 0.3793, + "step": 23313 + }, + { + "epoch": 0.4667117083302054, + "grad_norm": 1.1155791282653809, + "learning_rate": 5.778746085277336e-06, + "loss": 0.3489, + "step": 23314 + }, + { + "epoch": 0.46673172684733377, + "grad_norm": 1.1250109672546387, + "learning_rate": 5.778425856762166e-06, + "loss": 0.2921, + "step": 23315 + }, + { + "epoch": 0.4667517453644621, + "grad_norm": 1.1072098016738892, + "learning_rate": 5.77810562497466e-06, + "loss": 0.309, + "step": 23316 + }, + { + "epoch": 0.4667717638815905, + "grad_norm": 1.0730311870574951, + "learning_rate": 5.777785389916166e-06, + "loss": 0.3128, + "step": 23317 + }, + { + "epoch": 0.4667917823987188, + "grad_norm": 2.0227341651916504, + "learning_rate": 5.777465151588031e-06, + "loss": 0.7375, + "step": 23318 + }, + { + "epoch": 0.4668118009158472, + "grad_norm": 1.191936731338501, + "learning_rate": 5.777144909991599e-06, + "loss": 0.3072, + "step": 23319 + }, + { + "epoch": 0.4668318194329755, + "grad_norm": 1.0075124502182007, + "learning_rate": 5.7768246651282166e-06, + "loss": 0.3036, + "step": 23320 + }, + { + "epoch": 0.4668518379501038, + "grad_norm": 1.2197867631912231, + "learning_rate": 5.77650441699923e-06, + "loss": 0.3206, + "step": 23321 + }, + { + "epoch": 0.46687185646723217, + "grad_norm": 1.0445466041564941, + "learning_rate": 5.7761841656059885e-06, + "loss": 0.2981, + "step": 23322 + }, + { + "epoch": 0.4668918749843605, + "grad_norm": 1.037454605102539, + "learning_rate": 5.775863910949833e-06, + "loss": 0.2965, + "step": 23323 + }, + { + "epoch": 0.4669118935014889, + "grad_norm": 1.0812032222747803, + "learning_rate": 5.775543653032113e-06, + "loss": 0.3152, + "step": 23324 + }, + { + "epoch": 0.4669319120186172, + "grad_norm": 0.9898414015769958, + "learning_rate": 5.775223391854174e-06, + "loss": 0.3153, + "step": 23325 + }, + { + "epoch": 0.4669519305357456, + "grad_norm": 1.1739450693130493, + "learning_rate": 5.774903127417364e-06, + "loss": 0.2979, + "step": 23326 + }, + { + "epoch": 0.4669719490528739, + "grad_norm": 1.1406643390655518, + "learning_rate": 5.774582859723028e-06, + "loss": 0.2925, + "step": 23327 + }, + { + "epoch": 0.4669919675700023, + "grad_norm": 0.9311584830284119, + "learning_rate": 5.774262588772512e-06, + "loss": 0.25, + "step": 23328 + }, + { + "epoch": 0.46701198608713057, + "grad_norm": 1.0970546007156372, + "learning_rate": 5.773942314567164e-06, + "loss": 0.3045, + "step": 23329 + }, + { + "epoch": 0.4670320046042589, + "grad_norm": 1.2381253242492676, + "learning_rate": 5.7736220371083265e-06, + "loss": 0.2704, + "step": 23330 + }, + { + "epoch": 0.46705202312138727, + "grad_norm": 1.2695293426513672, + "learning_rate": 5.773301756397349e-06, + "loss": 0.3351, + "step": 23331 + }, + { + "epoch": 0.4670720416385156, + "grad_norm": 1.978712558746338, + "learning_rate": 5.772981472435578e-06, + "loss": 0.8139, + "step": 23332 + }, + { + "epoch": 0.467092060155644, + "grad_norm": 1.018300175666809, + "learning_rate": 5.7726611852243595e-06, + "loss": 0.3258, + "step": 23333 + }, + { + "epoch": 0.4671120786727723, + "grad_norm": 1.2194664478302002, + "learning_rate": 5.772340894765039e-06, + "loss": 0.3201, + "step": 23334 + }, + { + "epoch": 0.4671320971899007, + "grad_norm": 1.2089675664901733, + "learning_rate": 5.772020601058965e-06, + "loss": 0.2905, + "step": 23335 + }, + { + "epoch": 0.467152115707029, + "grad_norm": 0.9843608736991882, + "learning_rate": 5.771700304107481e-06, + "loss": 0.3228, + "step": 23336 + }, + { + "epoch": 0.4671721342241573, + "grad_norm": 1.0891555547714233, + "learning_rate": 5.771380003911935e-06, + "loss": 0.3251, + "step": 23337 + }, + { + "epoch": 0.46719215274128567, + "grad_norm": 1.0602935552597046, + "learning_rate": 5.771059700473675e-06, + "loss": 0.2711, + "step": 23338 + }, + { + "epoch": 0.467212171258414, + "grad_norm": 1.2168188095092773, + "learning_rate": 5.770739393794045e-06, + "loss": 0.3759, + "step": 23339 + }, + { + "epoch": 0.4672321897755424, + "grad_norm": 1.1071776151657104, + "learning_rate": 5.770419083874393e-06, + "loss": 0.3199, + "step": 23340 + }, + { + "epoch": 0.4672522082926707, + "grad_norm": 1.2105921506881714, + "learning_rate": 5.7700987707160635e-06, + "loss": 0.298, + "step": 23341 + }, + { + "epoch": 0.4672722268097991, + "grad_norm": 0.9274188876152039, + "learning_rate": 5.769778454320406e-06, + "loss": 0.2853, + "step": 23342 + }, + { + "epoch": 0.4672922453269274, + "grad_norm": 1.094983696937561, + "learning_rate": 5.7694581346887635e-06, + "loss": 0.3241, + "step": 23343 + }, + { + "epoch": 0.4673122638440558, + "grad_norm": 1.007943034172058, + "learning_rate": 5.769137811822486e-06, + "loss": 0.3092, + "step": 23344 + }, + { + "epoch": 0.46733228236118407, + "grad_norm": 1.1008847951889038, + "learning_rate": 5.768817485722918e-06, + "loss": 0.3102, + "step": 23345 + }, + { + "epoch": 0.4673523008783124, + "grad_norm": 0.9883277416229248, + "learning_rate": 5.768497156391408e-06, + "loss": 0.3335, + "step": 23346 + }, + { + "epoch": 0.46737231939544077, + "grad_norm": 1.8340104818344116, + "learning_rate": 5.7681768238293016e-06, + "loss": 0.7524, + "step": 23347 + }, + { + "epoch": 0.4673923379125691, + "grad_norm": 1.4698548316955566, + "learning_rate": 5.767856488037944e-06, + "loss": 0.3684, + "step": 23348 + }, + { + "epoch": 0.4674123564296975, + "grad_norm": 1.1283044815063477, + "learning_rate": 5.767536149018682e-06, + "loss": 0.356, + "step": 23349 + }, + { + "epoch": 0.4674323749468258, + "grad_norm": 1.2139708995819092, + "learning_rate": 5.767215806772865e-06, + "loss": 0.3439, + "step": 23350 + }, + { + "epoch": 0.4674523934639542, + "grad_norm": 1.1495426893234253, + "learning_rate": 5.766895461301838e-06, + "loss": 0.3687, + "step": 23351 + }, + { + "epoch": 0.4674724119810825, + "grad_norm": 0.9876556992530823, + "learning_rate": 5.766575112606946e-06, + "loss": 0.3127, + "step": 23352 + }, + { + "epoch": 0.4674924304982108, + "grad_norm": 0.9832318425178528, + "learning_rate": 5.766254760689538e-06, + "loss": 0.3453, + "step": 23353 + }, + { + "epoch": 0.46751244901533917, + "grad_norm": 1.1121554374694824, + "learning_rate": 5.7659344055509605e-06, + "loss": 0.3238, + "step": 23354 + }, + { + "epoch": 0.4675324675324675, + "grad_norm": 1.0996042490005493, + "learning_rate": 5.7656140471925595e-06, + "loss": 0.3034, + "step": 23355 + }, + { + "epoch": 0.4675524860495959, + "grad_norm": 1.7429033517837524, + "learning_rate": 5.765293685615679e-06, + "loss": 0.7529, + "step": 23356 + }, + { + "epoch": 0.4675725045667242, + "grad_norm": 1.1457147598266602, + "learning_rate": 5.7649733208216716e-06, + "loss": 0.3011, + "step": 23357 + }, + { + "epoch": 0.4675925230838526, + "grad_norm": 1.06586754322052, + "learning_rate": 5.76465295281188e-06, + "loss": 0.29, + "step": 23358 + }, + { + "epoch": 0.4676125416009809, + "grad_norm": 1.0461339950561523, + "learning_rate": 5.764332581587652e-06, + "loss": 0.3125, + "step": 23359 + }, + { + "epoch": 0.4676325601181093, + "grad_norm": 1.0559022426605225, + "learning_rate": 5.764012207150336e-06, + "loss": 0.3222, + "step": 23360 + }, + { + "epoch": 0.46765257863523757, + "grad_norm": 1.1689951419830322, + "learning_rate": 5.763691829501274e-06, + "loss": 0.327, + "step": 23361 + }, + { + "epoch": 0.4676725971523659, + "grad_norm": 1.181067705154419, + "learning_rate": 5.7633714486418175e-06, + "loss": 0.3127, + "step": 23362 + }, + { + "epoch": 0.46769261566949427, + "grad_norm": 2.1486308574676514, + "learning_rate": 5.763051064573312e-06, + "loss": 0.8065, + "step": 23363 + }, + { + "epoch": 0.4677126341866226, + "grad_norm": 1.2459920644760132, + "learning_rate": 5.762730677297104e-06, + "loss": 0.3217, + "step": 23364 + }, + { + "epoch": 0.467732652703751, + "grad_norm": 1.1906516551971436, + "learning_rate": 5.762410286814539e-06, + "loss": 0.328, + "step": 23365 + }, + { + "epoch": 0.4677526712208793, + "grad_norm": 1.0742765665054321, + "learning_rate": 5.762089893126968e-06, + "loss": 0.2589, + "step": 23366 + }, + { + "epoch": 0.4677726897380077, + "grad_norm": 0.9542055130004883, + "learning_rate": 5.761769496235733e-06, + "loss": 0.3201, + "step": 23367 + }, + { + "epoch": 0.467792708255136, + "grad_norm": 1.2698098421096802, + "learning_rate": 5.761449096142183e-06, + "loss": 0.3284, + "step": 23368 + }, + { + "epoch": 0.4678127267722643, + "grad_norm": 1.1603811979293823, + "learning_rate": 5.761128692847664e-06, + "loss": 0.3685, + "step": 23369 + }, + { + "epoch": 0.46783274528939267, + "grad_norm": 1.1004804372787476, + "learning_rate": 5.760808286353525e-06, + "loss": 0.312, + "step": 23370 + }, + { + "epoch": 0.467852763806521, + "grad_norm": 1.1365407705307007, + "learning_rate": 5.760487876661112e-06, + "loss": 0.274, + "step": 23371 + }, + { + "epoch": 0.46787278232364937, + "grad_norm": 1.1475774049758911, + "learning_rate": 5.760167463771772e-06, + "loss": 0.3189, + "step": 23372 + }, + { + "epoch": 0.4678928008407777, + "grad_norm": 1.9635989665985107, + "learning_rate": 5.759847047686852e-06, + "loss": 0.8139, + "step": 23373 + }, + { + "epoch": 0.4679128193579061, + "grad_norm": 1.1414023637771606, + "learning_rate": 5.7595266284076966e-06, + "loss": 0.2866, + "step": 23374 + }, + { + "epoch": 0.4679328378750344, + "grad_norm": 1.1395204067230225, + "learning_rate": 5.759206205935655e-06, + "loss": 0.3319, + "step": 23375 + }, + { + "epoch": 0.4679528563921628, + "grad_norm": 2.029799461364746, + "learning_rate": 5.758885780272074e-06, + "loss": 0.7749, + "step": 23376 + }, + { + "epoch": 0.46797287490929107, + "grad_norm": 1.0574555397033691, + "learning_rate": 5.758565351418302e-06, + "loss": 0.3179, + "step": 23377 + }, + { + "epoch": 0.4679928934264194, + "grad_norm": 1.9408080577850342, + "learning_rate": 5.758244919375683e-06, + "loss": 0.8036, + "step": 23378 + }, + { + "epoch": 0.46801291194354777, + "grad_norm": 1.1115132570266724, + "learning_rate": 5.757924484145566e-06, + "loss": 0.2875, + "step": 23379 + }, + { + "epoch": 0.4680329304606761, + "grad_norm": 1.046966791152954, + "learning_rate": 5.757604045729298e-06, + "loss": 0.2758, + "step": 23380 + }, + { + "epoch": 0.4680529489778045, + "grad_norm": 1.0493052005767822, + "learning_rate": 5.757283604128226e-06, + "loss": 0.2867, + "step": 23381 + }, + { + "epoch": 0.4680729674949328, + "grad_norm": 1.294960379600525, + "learning_rate": 5.756963159343695e-06, + "loss": 0.3435, + "step": 23382 + }, + { + "epoch": 0.4680929860120612, + "grad_norm": 1.30045485496521, + "learning_rate": 5.756642711377054e-06, + "loss": 0.2982, + "step": 23383 + }, + { + "epoch": 0.4681130045291895, + "grad_norm": 1.1297454833984375, + "learning_rate": 5.756322260229651e-06, + "loss": 0.3087, + "step": 23384 + }, + { + "epoch": 0.4681330230463178, + "grad_norm": 1.0917881727218628, + "learning_rate": 5.756001805902831e-06, + "loss": 0.2842, + "step": 23385 + }, + { + "epoch": 0.46815304156344617, + "grad_norm": 1.0877379179000854, + "learning_rate": 5.755681348397943e-06, + "loss": 0.3316, + "step": 23386 + }, + { + "epoch": 0.4681730600805745, + "grad_norm": 1.202020287513733, + "learning_rate": 5.7553608877163325e-06, + "loss": 0.312, + "step": 23387 + }, + { + "epoch": 0.46819307859770287, + "grad_norm": 1.131371259689331, + "learning_rate": 5.755040423859347e-06, + "loss": 0.3347, + "step": 23388 + }, + { + "epoch": 0.4682130971148312, + "grad_norm": 1.0289583206176758, + "learning_rate": 5.754719956828334e-06, + "loss": 0.3559, + "step": 23389 + }, + { + "epoch": 0.4682331156319596, + "grad_norm": 1.0054361820220947, + "learning_rate": 5.754399486624642e-06, + "loss": 0.3091, + "step": 23390 + }, + { + "epoch": 0.4682531341490879, + "grad_norm": 1.0582177639007568, + "learning_rate": 5.754079013249619e-06, + "loss": 0.2997, + "step": 23391 + }, + { + "epoch": 0.4682731526662163, + "grad_norm": 1.1583691835403442, + "learning_rate": 5.7537585367046054e-06, + "loss": 0.3229, + "step": 23392 + }, + { + "epoch": 0.46829317118334457, + "grad_norm": 1.0617882013320923, + "learning_rate": 5.753438056990956e-06, + "loss": 0.3034, + "step": 23393 + }, + { + "epoch": 0.4683131897004729, + "grad_norm": 1.788912296295166, + "learning_rate": 5.753117574110014e-06, + "loss": 0.7668, + "step": 23394 + }, + { + "epoch": 0.46833320821760127, + "grad_norm": 1.8843697309494019, + "learning_rate": 5.7527970880631275e-06, + "loss": 0.782, + "step": 23395 + }, + { + "epoch": 0.4683532267347296, + "grad_norm": 1.0017186403274536, + "learning_rate": 5.752476598851644e-06, + "loss": 0.3282, + "step": 23396 + }, + { + "epoch": 0.468373245251858, + "grad_norm": 0.9894954562187195, + "learning_rate": 5.752156106476913e-06, + "loss": 0.2622, + "step": 23397 + }, + { + "epoch": 0.4683932637689863, + "grad_norm": 1.0061514377593994, + "learning_rate": 5.751835610940278e-06, + "loss": 0.3003, + "step": 23398 + }, + { + "epoch": 0.4684132822861147, + "grad_norm": 1.0607179403305054, + "learning_rate": 5.751515112243088e-06, + "loss": 0.3612, + "step": 23399 + }, + { + "epoch": 0.468433300803243, + "grad_norm": 2.0192434787750244, + "learning_rate": 5.75119461038669e-06, + "loss": 0.7929, + "step": 23400 + }, + { + "epoch": 0.4684533193203713, + "grad_norm": 1.029894471168518, + "learning_rate": 5.75087410537243e-06, + "loss": 0.3297, + "step": 23401 + }, + { + "epoch": 0.46847333783749967, + "grad_norm": 1.1654465198516846, + "learning_rate": 5.7505535972016605e-06, + "loss": 0.3163, + "step": 23402 + }, + { + "epoch": 0.468493356354628, + "grad_norm": 0.9800211787223816, + "learning_rate": 5.750233085875723e-06, + "loss": 0.3142, + "step": 23403 + }, + { + "epoch": 0.46851337487175637, + "grad_norm": 1.3046596050262451, + "learning_rate": 5.749912571395968e-06, + "loss": 0.3614, + "step": 23404 + }, + { + "epoch": 0.4685333933888847, + "grad_norm": 1.135158896446228, + "learning_rate": 5.749592053763741e-06, + "loss": 0.3133, + "step": 23405 + }, + { + "epoch": 0.4685534119060131, + "grad_norm": 1.107464075088501, + "learning_rate": 5.7492715329803905e-06, + "loss": 0.304, + "step": 23406 + }, + { + "epoch": 0.4685734304231414, + "grad_norm": 1.1019606590270996, + "learning_rate": 5.748951009047264e-06, + "loss": 0.2788, + "step": 23407 + }, + { + "epoch": 0.4685934489402698, + "grad_norm": 1.1193058490753174, + "learning_rate": 5.748630481965707e-06, + "loss": 0.3448, + "step": 23408 + }, + { + "epoch": 0.46861346745739807, + "grad_norm": 1.299477458000183, + "learning_rate": 5.74830995173707e-06, + "loss": 0.3214, + "step": 23409 + }, + { + "epoch": 0.4686334859745264, + "grad_norm": 1.9307392835617065, + "learning_rate": 5.7479894183627e-06, + "loss": 0.8106, + "step": 23410 + }, + { + "epoch": 0.46865350449165477, + "grad_norm": 1.0290913581848145, + "learning_rate": 5.747668881843943e-06, + "loss": 0.2927, + "step": 23411 + }, + { + "epoch": 0.4686735230087831, + "grad_norm": 1.1541510820388794, + "learning_rate": 5.747348342182145e-06, + "loss": 0.3376, + "step": 23412 + }, + { + "epoch": 0.4686935415259115, + "grad_norm": 1.9583895206451416, + "learning_rate": 5.7470277993786575e-06, + "loss": 0.8824, + "step": 23413 + }, + { + "epoch": 0.4687135600430398, + "grad_norm": 1.039235234260559, + "learning_rate": 5.746707253434825e-06, + "loss": 0.3051, + "step": 23414 + }, + { + "epoch": 0.4687335785601682, + "grad_norm": 1.1119345426559448, + "learning_rate": 5.746386704351996e-06, + "loss": 0.3197, + "step": 23415 + }, + { + "epoch": 0.4687535970772965, + "grad_norm": 1.1204062700271606, + "learning_rate": 5.746066152131518e-06, + "loss": 0.2775, + "step": 23416 + }, + { + "epoch": 0.4687736155944248, + "grad_norm": 1.3320863246917725, + "learning_rate": 5.745745596774739e-06, + "loss": 0.3434, + "step": 23417 + }, + { + "epoch": 0.46879363411155317, + "grad_norm": 1.0807245969772339, + "learning_rate": 5.745425038283004e-06, + "loss": 0.2778, + "step": 23418 + }, + { + "epoch": 0.4688136526286815, + "grad_norm": 1.1136113405227661, + "learning_rate": 5.745104476657664e-06, + "loss": 0.3543, + "step": 23419 + }, + { + "epoch": 0.46883367114580987, + "grad_norm": 1.0630204677581787, + "learning_rate": 5.744783911900066e-06, + "loss": 0.2902, + "step": 23420 + }, + { + "epoch": 0.4688536896629382, + "grad_norm": 1.0215262174606323, + "learning_rate": 5.744463344011555e-06, + "loss": 0.322, + "step": 23421 + }, + { + "epoch": 0.4688737081800666, + "grad_norm": 1.1774665117263794, + "learning_rate": 5.744142772993482e-06, + "loss": 0.2986, + "step": 23422 + }, + { + "epoch": 0.4688937266971949, + "grad_norm": 1.1989772319793701, + "learning_rate": 5.743822198847191e-06, + "loss": 0.3763, + "step": 23423 + }, + { + "epoch": 0.4689137452143233, + "grad_norm": 1.1077059507369995, + "learning_rate": 5.743501621574033e-06, + "loss": 0.3427, + "step": 23424 + }, + { + "epoch": 0.46893376373145157, + "grad_norm": 1.0582103729248047, + "learning_rate": 5.743181041175354e-06, + "loss": 0.3002, + "step": 23425 + }, + { + "epoch": 0.4689537822485799, + "grad_norm": 1.1655486822128296, + "learning_rate": 5.742860457652501e-06, + "loss": 0.2391, + "step": 23426 + }, + { + "epoch": 0.46897380076570827, + "grad_norm": 1.1301993131637573, + "learning_rate": 5.742539871006823e-06, + "loss": 0.3019, + "step": 23427 + }, + { + "epoch": 0.4689938192828366, + "grad_norm": 1.1605937480926514, + "learning_rate": 5.742219281239669e-06, + "loss": 0.3587, + "step": 23428 + }, + { + "epoch": 0.46901383779996497, + "grad_norm": 1.1105917692184448, + "learning_rate": 5.7418986883523805e-06, + "loss": 0.3272, + "step": 23429 + }, + { + "epoch": 0.4690338563170933, + "grad_norm": 1.1374306678771973, + "learning_rate": 5.741578092346314e-06, + "loss": 0.285, + "step": 23430 + }, + { + "epoch": 0.4690538748342217, + "grad_norm": 1.0101031064987183, + "learning_rate": 5.741257493222811e-06, + "loss": 0.315, + "step": 23431 + }, + { + "epoch": 0.46907389335135, + "grad_norm": 1.8337148427963257, + "learning_rate": 5.740936890983221e-06, + "loss": 0.787, + "step": 23432 + }, + { + "epoch": 0.4690939118684783, + "grad_norm": 1.0940572023391724, + "learning_rate": 5.740616285628892e-06, + "loss": 0.3322, + "step": 23433 + }, + { + "epoch": 0.46911393038560667, + "grad_norm": 1.1795294284820557, + "learning_rate": 5.74029567716117e-06, + "loss": 0.3402, + "step": 23434 + }, + { + "epoch": 0.469133948902735, + "grad_norm": 1.0665843486785889, + "learning_rate": 5.7399750655814066e-06, + "loss": 0.3208, + "step": 23435 + }, + { + "epoch": 0.46915396741986337, + "grad_norm": 1.1817631721496582, + "learning_rate": 5.739654450890946e-06, + "loss": 0.3479, + "step": 23436 + }, + { + "epoch": 0.4691739859369917, + "grad_norm": 1.9208042621612549, + "learning_rate": 5.739333833091139e-06, + "loss": 0.843, + "step": 23437 + }, + { + "epoch": 0.4691940044541201, + "grad_norm": 1.0640710592269897, + "learning_rate": 5.739013212183329e-06, + "loss": 0.3181, + "step": 23438 + }, + { + "epoch": 0.4692140229712484, + "grad_norm": 1.057215929031372, + "learning_rate": 5.7386925881688675e-06, + "loss": 0.2569, + "step": 23439 + }, + { + "epoch": 0.4692340414883768, + "grad_norm": 1.120100975036621, + "learning_rate": 5.738371961049101e-06, + "loss": 0.3294, + "step": 23440 + }, + { + "epoch": 0.46925406000550507, + "grad_norm": 0.9739536643028259, + "learning_rate": 5.738051330825378e-06, + "loss": 0.2992, + "step": 23441 + }, + { + "epoch": 0.4692740785226334, + "grad_norm": 1.066448450088501, + "learning_rate": 5.737730697499046e-06, + "loss": 0.3233, + "step": 23442 + }, + { + "epoch": 0.46929409703976177, + "grad_norm": 1.2192773818969727, + "learning_rate": 5.737410061071454e-06, + "loss": 0.296, + "step": 23443 + }, + { + "epoch": 0.4693141155568901, + "grad_norm": 1.013970136642456, + "learning_rate": 5.737089421543949e-06, + "loss": 0.3154, + "step": 23444 + }, + { + "epoch": 0.46933413407401847, + "grad_norm": 1.29439377784729, + "learning_rate": 5.736768778917877e-06, + "loss": 0.3066, + "step": 23445 + }, + { + "epoch": 0.4693541525911468, + "grad_norm": 1.1104718446731567, + "learning_rate": 5.736448133194588e-06, + "loss": 0.3201, + "step": 23446 + }, + { + "epoch": 0.4693741711082752, + "grad_norm": 1.184734582901001, + "learning_rate": 5.736127484375429e-06, + "loss": 0.3178, + "step": 23447 + }, + { + "epoch": 0.4693941896254035, + "grad_norm": 1.1074897050857544, + "learning_rate": 5.73580683246175e-06, + "loss": 0.2997, + "step": 23448 + }, + { + "epoch": 0.4694142081425318, + "grad_norm": 1.0589618682861328, + "learning_rate": 5.735486177454896e-06, + "loss": 0.2768, + "step": 23449 + }, + { + "epoch": 0.46943422665966017, + "grad_norm": 1.1747649908065796, + "learning_rate": 5.7351655193562186e-06, + "loss": 0.29, + "step": 23450 + }, + { + "epoch": 0.4694542451767885, + "grad_norm": 1.940181851387024, + "learning_rate": 5.734844858167062e-06, + "loss": 0.7985, + "step": 23451 + }, + { + "epoch": 0.46947426369391687, + "grad_norm": 1.1132320165634155, + "learning_rate": 5.734524193888774e-06, + "loss": 0.3315, + "step": 23452 + }, + { + "epoch": 0.4694942822110452, + "grad_norm": 2.0106632709503174, + "learning_rate": 5.734203526522707e-06, + "loss": 0.8114, + "step": 23453 + }, + { + "epoch": 0.4695143007281736, + "grad_norm": 0.9997418522834778, + "learning_rate": 5.733882856070205e-06, + "loss": 0.2938, + "step": 23454 + }, + { + "epoch": 0.4695343192453019, + "grad_norm": 1.0930532217025757, + "learning_rate": 5.733562182532619e-06, + "loss": 0.3345, + "step": 23455 + }, + { + "epoch": 0.4695543377624303, + "grad_norm": 1.8771581649780273, + "learning_rate": 5.7332415059112945e-06, + "loss": 0.7669, + "step": 23456 + }, + { + "epoch": 0.46957435627955857, + "grad_norm": 1.0477598905563354, + "learning_rate": 5.732920826207581e-06, + "loss": 0.2875, + "step": 23457 + }, + { + "epoch": 0.4695943747966869, + "grad_norm": 1.051026463508606, + "learning_rate": 5.732600143422824e-06, + "loss": 0.2966, + "step": 23458 + }, + { + "epoch": 0.46961439331381527, + "grad_norm": 1.0930125713348389, + "learning_rate": 5.732279457558375e-06, + "loss": 0.343, + "step": 23459 + }, + { + "epoch": 0.4696344118309436, + "grad_norm": 1.080759882926941, + "learning_rate": 5.731958768615581e-06, + "loss": 0.3094, + "step": 23460 + }, + { + "epoch": 0.46965443034807197, + "grad_norm": 1.2044124603271484, + "learning_rate": 5.73163807659579e-06, + "loss": 0.3322, + "step": 23461 + }, + { + "epoch": 0.4696744488652003, + "grad_norm": 1.2226513624191284, + "learning_rate": 5.73131738150035e-06, + "loss": 0.3884, + "step": 23462 + }, + { + "epoch": 0.4696944673823287, + "grad_norm": 1.195845365524292, + "learning_rate": 5.730996683330609e-06, + "loss": 0.3023, + "step": 23463 + }, + { + "epoch": 0.469714485899457, + "grad_norm": 1.2166595458984375, + "learning_rate": 5.730675982087914e-06, + "loss": 0.3263, + "step": 23464 + }, + { + "epoch": 0.4697345044165853, + "grad_norm": 1.3765654563903809, + "learning_rate": 5.730355277773616e-06, + "loss": 0.2794, + "step": 23465 + }, + { + "epoch": 0.46975452293371367, + "grad_norm": 1.1189229488372803, + "learning_rate": 5.730034570389062e-06, + "loss": 0.3105, + "step": 23466 + }, + { + "epoch": 0.469774541450842, + "grad_norm": 1.0527960062026978, + "learning_rate": 5.729713859935597e-06, + "loss": 0.2688, + "step": 23467 + }, + { + "epoch": 0.46979455996797037, + "grad_norm": 1.1228910684585571, + "learning_rate": 5.729393146414574e-06, + "loss": 0.3326, + "step": 23468 + }, + { + "epoch": 0.4698145784850987, + "grad_norm": 1.1423786878585815, + "learning_rate": 5.729072429827339e-06, + "loss": 0.3256, + "step": 23469 + }, + { + "epoch": 0.4698345970022271, + "grad_norm": 1.183825135231018, + "learning_rate": 5.728751710175242e-06, + "loss": 0.3265, + "step": 23470 + }, + { + "epoch": 0.4698546155193554, + "grad_norm": 1.8881994485855103, + "learning_rate": 5.728430987459628e-06, + "loss": 0.7452, + "step": 23471 + }, + { + "epoch": 0.4698746340364838, + "grad_norm": 1.122464656829834, + "learning_rate": 5.7281102616818455e-06, + "loss": 0.3723, + "step": 23472 + }, + { + "epoch": 0.46989465255361207, + "grad_norm": 1.2221925258636475, + "learning_rate": 5.727789532843245e-06, + "loss": 0.3097, + "step": 23473 + }, + { + "epoch": 0.4699146710707404, + "grad_norm": 1.232622742652893, + "learning_rate": 5.727468800945175e-06, + "loss": 0.3366, + "step": 23474 + }, + { + "epoch": 0.46993468958786877, + "grad_norm": 1.0310189723968506, + "learning_rate": 5.727148065988983e-06, + "loss": 0.2974, + "step": 23475 + }, + { + "epoch": 0.4699547081049971, + "grad_norm": 1.110596776008606, + "learning_rate": 5.726827327976017e-06, + "loss": 0.3434, + "step": 23476 + }, + { + "epoch": 0.46997472662212547, + "grad_norm": 1.0680903196334839, + "learning_rate": 5.726506586907623e-06, + "loss": 0.3061, + "step": 23477 + }, + { + "epoch": 0.4699947451392538, + "grad_norm": 1.0851988792419434, + "learning_rate": 5.726185842785153e-06, + "loss": 0.289, + "step": 23478 + }, + { + "epoch": 0.4700147636563822, + "grad_norm": 1.2028099298477173, + "learning_rate": 5.725865095609955e-06, + "loss": 0.3138, + "step": 23479 + }, + { + "epoch": 0.4700347821735105, + "grad_norm": 1.1310889720916748, + "learning_rate": 5.725544345383375e-06, + "loss": 0.3242, + "step": 23480 + }, + { + "epoch": 0.4700548006906388, + "grad_norm": 1.3109996318817139, + "learning_rate": 5.725223592106763e-06, + "loss": 0.3373, + "step": 23481 + }, + { + "epoch": 0.47007481920776717, + "grad_norm": 2.059629201889038, + "learning_rate": 5.724902835781467e-06, + "loss": 0.8236, + "step": 23482 + }, + { + "epoch": 0.4700948377248955, + "grad_norm": 1.2112458944320679, + "learning_rate": 5.724582076408837e-06, + "loss": 0.2994, + "step": 23483 + }, + { + "epoch": 0.47011485624202387, + "grad_norm": 1.0434404611587524, + "learning_rate": 5.724261313990218e-06, + "loss": 0.2726, + "step": 23484 + }, + { + "epoch": 0.4701348747591522, + "grad_norm": 1.119247317314148, + "learning_rate": 5.723940548526962e-06, + "loss": 0.3465, + "step": 23485 + }, + { + "epoch": 0.47015489327628057, + "grad_norm": 1.1023168563842773, + "learning_rate": 5.723619780020415e-06, + "loss": 0.322, + "step": 23486 + }, + { + "epoch": 0.4701749117934089, + "grad_norm": 1.8885753154754639, + "learning_rate": 5.723299008471925e-06, + "loss": 0.7807, + "step": 23487 + }, + { + "epoch": 0.4701949303105373, + "grad_norm": 1.1337110996246338, + "learning_rate": 5.722978233882843e-06, + "loss": 0.3043, + "step": 23488 + }, + { + "epoch": 0.47021494882766557, + "grad_norm": 1.8761615753173828, + "learning_rate": 5.722657456254516e-06, + "loss": 0.838, + "step": 23489 + }, + { + "epoch": 0.4702349673447939, + "grad_norm": 1.0246508121490479, + "learning_rate": 5.722336675588291e-06, + "loss": 0.3071, + "step": 23490 + }, + { + "epoch": 0.47025498586192227, + "grad_norm": 1.1366363763809204, + "learning_rate": 5.722015891885519e-06, + "loss": 0.2858, + "step": 23491 + }, + { + "epoch": 0.4702750043790506, + "grad_norm": 1.0039258003234863, + "learning_rate": 5.721695105147549e-06, + "loss": 0.251, + "step": 23492 + }, + { + "epoch": 0.47029502289617897, + "grad_norm": 1.07828688621521, + "learning_rate": 5.721374315375726e-06, + "loss": 0.3047, + "step": 23493 + }, + { + "epoch": 0.4703150414133073, + "grad_norm": 1.121762990951538, + "learning_rate": 5.721053522571402e-06, + "loss": 0.265, + "step": 23494 + }, + { + "epoch": 0.4703350599304357, + "grad_norm": 1.0829806327819824, + "learning_rate": 5.720732726735925e-06, + "loss": 0.3294, + "step": 23495 + }, + { + "epoch": 0.470355078447564, + "grad_norm": 1.1118159294128418, + "learning_rate": 5.720411927870641e-06, + "loss": 0.3059, + "step": 23496 + }, + { + "epoch": 0.4703750969646923, + "grad_norm": 1.1793242692947388, + "learning_rate": 5.7200911259769e-06, + "loss": 0.2808, + "step": 23497 + }, + { + "epoch": 0.47039511548182067, + "grad_norm": 1.0182740688323975, + "learning_rate": 5.719770321056051e-06, + "loss": 0.3061, + "step": 23498 + }, + { + "epoch": 0.470415133998949, + "grad_norm": 1.7944658994674683, + "learning_rate": 5.7194495131094445e-06, + "loss": 0.8432, + "step": 23499 + }, + { + "epoch": 0.47043515251607737, + "grad_norm": 1.0237149000167847, + "learning_rate": 5.719128702138425e-06, + "loss": 0.2741, + "step": 23500 + }, + { + "epoch": 0.4704551710332057, + "grad_norm": 1.0221003293991089, + "learning_rate": 5.718807888144345e-06, + "loss": 0.304, + "step": 23501 + }, + { + "epoch": 0.47047518955033407, + "grad_norm": 1.1458581686019897, + "learning_rate": 5.718487071128549e-06, + "loss": 0.3149, + "step": 23502 + }, + { + "epoch": 0.4704952080674624, + "grad_norm": 1.0166075229644775, + "learning_rate": 5.718166251092389e-06, + "loss": 0.3273, + "step": 23503 + }, + { + "epoch": 0.4705152265845908, + "grad_norm": 1.013665795326233, + "learning_rate": 5.717845428037212e-06, + "loss": 0.3191, + "step": 23504 + }, + { + "epoch": 0.47053524510171907, + "grad_norm": 1.124579668045044, + "learning_rate": 5.717524601964368e-06, + "loss": 0.2827, + "step": 23505 + }, + { + "epoch": 0.4705552636188474, + "grad_norm": 1.0694055557250977, + "learning_rate": 5.717203772875206e-06, + "loss": 0.2738, + "step": 23506 + }, + { + "epoch": 0.47057528213597577, + "grad_norm": 1.0787016153335571, + "learning_rate": 5.7168829407710725e-06, + "loss": 0.2834, + "step": 23507 + }, + { + "epoch": 0.4705953006531041, + "grad_norm": 1.057793140411377, + "learning_rate": 5.716562105653318e-06, + "loss": 0.2878, + "step": 23508 + }, + { + "epoch": 0.47061531917023247, + "grad_norm": 1.0809237957000732, + "learning_rate": 5.71624126752329e-06, + "loss": 0.3388, + "step": 23509 + }, + { + "epoch": 0.4706353376873608, + "grad_norm": 1.018693447113037, + "learning_rate": 5.715920426382338e-06, + "loss": 0.3156, + "step": 23510 + }, + { + "epoch": 0.4706553562044892, + "grad_norm": 1.883126139640808, + "learning_rate": 5.71559958223181e-06, + "loss": 0.8315, + "step": 23511 + }, + { + "epoch": 0.4706753747216175, + "grad_norm": 1.0722681283950806, + "learning_rate": 5.715278735073056e-06, + "loss": 0.3336, + "step": 23512 + }, + { + "epoch": 0.4706953932387458, + "grad_norm": 1.124031662940979, + "learning_rate": 5.7149578849074235e-06, + "loss": 0.2705, + "step": 23513 + }, + { + "epoch": 0.47071541175587417, + "grad_norm": 1.0309656858444214, + "learning_rate": 5.7146370317362636e-06, + "loss": 0.3656, + "step": 23514 + }, + { + "epoch": 0.4707354302730025, + "grad_norm": 1.24553644657135, + "learning_rate": 5.7143161755609226e-06, + "loss": 0.3625, + "step": 23515 + }, + { + "epoch": 0.47075544879013087, + "grad_norm": 1.0942914485931396, + "learning_rate": 5.713995316382749e-06, + "loss": 0.3048, + "step": 23516 + }, + { + "epoch": 0.4707754673072592, + "grad_norm": 1.1692770719528198, + "learning_rate": 5.713674454203094e-06, + "loss": 0.3835, + "step": 23517 + }, + { + "epoch": 0.47079548582438757, + "grad_norm": 1.0990591049194336, + "learning_rate": 5.713353589023305e-06, + "loss": 0.311, + "step": 23518 + }, + { + "epoch": 0.4708155043415159, + "grad_norm": 1.0274991989135742, + "learning_rate": 5.713032720844731e-06, + "loss": 0.3181, + "step": 23519 + }, + { + "epoch": 0.4708355228586443, + "grad_norm": 0.9784660339355469, + "learning_rate": 5.71271184966872e-06, + "loss": 0.2897, + "step": 23520 + }, + { + "epoch": 0.47085554137577257, + "grad_norm": 1.1309735774993896, + "learning_rate": 5.712390975496624e-06, + "loss": 0.303, + "step": 23521 + }, + { + "epoch": 0.4708755598929009, + "grad_norm": 1.128298282623291, + "learning_rate": 5.712070098329787e-06, + "loss": 0.3581, + "step": 23522 + }, + { + "epoch": 0.47089557841002927, + "grad_norm": 1.0555475950241089, + "learning_rate": 5.711749218169561e-06, + "loss": 0.2473, + "step": 23523 + }, + { + "epoch": 0.4709155969271576, + "grad_norm": 1.0363755226135254, + "learning_rate": 5.711428335017295e-06, + "loss": 0.2854, + "step": 23524 + }, + { + "epoch": 0.47093561544428597, + "grad_norm": 1.031485915184021, + "learning_rate": 5.711107448874338e-06, + "loss": 0.3198, + "step": 23525 + }, + { + "epoch": 0.4709556339614143, + "grad_norm": 1.1230497360229492, + "learning_rate": 5.710786559742039e-06, + "loss": 0.3468, + "step": 23526 + }, + { + "epoch": 0.4709756524785427, + "grad_norm": 1.0597060918807983, + "learning_rate": 5.710465667621744e-06, + "loss": 0.3157, + "step": 23527 + }, + { + "epoch": 0.470995670995671, + "grad_norm": 1.1870200634002686, + "learning_rate": 5.710144772514805e-06, + "loss": 0.336, + "step": 23528 + }, + { + "epoch": 0.4710156895127993, + "grad_norm": 1.0756111145019531, + "learning_rate": 5.709823874422571e-06, + "loss": 0.3241, + "step": 23529 + }, + { + "epoch": 0.47103570802992767, + "grad_norm": 1.0843837261199951, + "learning_rate": 5.7095029733463905e-06, + "loss": 0.3179, + "step": 23530 + }, + { + "epoch": 0.471055726547056, + "grad_norm": 1.2388843297958374, + "learning_rate": 5.70918206928761e-06, + "loss": 0.2922, + "step": 23531 + }, + { + "epoch": 0.47107574506418437, + "grad_norm": 2.1474366188049316, + "learning_rate": 5.7088611622475835e-06, + "loss": 0.7462, + "step": 23532 + }, + { + "epoch": 0.4710957635813127, + "grad_norm": 1.1193145513534546, + "learning_rate": 5.708540252227655e-06, + "loss": 0.3115, + "step": 23533 + }, + { + "epoch": 0.47111578209844107, + "grad_norm": 2.2468504905700684, + "learning_rate": 5.708219339229177e-06, + "loss": 0.7812, + "step": 23534 + }, + { + "epoch": 0.4711358006155694, + "grad_norm": 1.8583852052688599, + "learning_rate": 5.7078984232534965e-06, + "loss": 0.7845, + "step": 23535 + }, + { + "epoch": 0.4711558191326978, + "grad_norm": 1.2093249559402466, + "learning_rate": 5.707577504301963e-06, + "loss": 0.3459, + "step": 23536 + }, + { + "epoch": 0.47117583764982607, + "grad_norm": 1.1936126947402954, + "learning_rate": 5.7072565823759274e-06, + "loss": 0.3508, + "step": 23537 + }, + { + "epoch": 0.4711958561669544, + "grad_norm": 1.2863949537277222, + "learning_rate": 5.706935657476737e-06, + "loss": 0.3578, + "step": 23538 + }, + { + "epoch": 0.47121587468408277, + "grad_norm": 1.1801056861877441, + "learning_rate": 5.706614729605741e-06, + "loss": 0.2761, + "step": 23539 + }, + { + "epoch": 0.4712358932012111, + "grad_norm": 1.2492433786392212, + "learning_rate": 5.7062937987642885e-06, + "loss": 0.3306, + "step": 23540 + }, + { + "epoch": 0.47125591171833947, + "grad_norm": 1.1900330781936646, + "learning_rate": 5.7059728649537285e-06, + "loss": 0.2658, + "step": 23541 + }, + { + "epoch": 0.4712759302354678, + "grad_norm": 1.2076082229614258, + "learning_rate": 5.705651928175411e-06, + "loss": 0.2881, + "step": 23542 + }, + { + "epoch": 0.47129594875259617, + "grad_norm": 1.1719046831130981, + "learning_rate": 5.705330988430685e-06, + "loss": 0.3257, + "step": 23543 + }, + { + "epoch": 0.4713159672697245, + "grad_norm": 1.2494257688522339, + "learning_rate": 5.705010045720898e-06, + "loss": 0.3118, + "step": 23544 + }, + { + "epoch": 0.4713359857868528, + "grad_norm": 1.2488508224487305, + "learning_rate": 5.704689100047402e-06, + "loss": 0.2997, + "step": 23545 + }, + { + "epoch": 0.47135600430398117, + "grad_norm": 1.0500000715255737, + "learning_rate": 5.704368151411544e-06, + "loss": 0.2904, + "step": 23546 + }, + { + "epoch": 0.4713760228211095, + "grad_norm": 1.09873628616333, + "learning_rate": 5.704047199814675e-06, + "loss": 0.3361, + "step": 23547 + }, + { + "epoch": 0.47139604133823787, + "grad_norm": 1.9028644561767578, + "learning_rate": 5.703726245258141e-06, + "loss": 0.8206, + "step": 23548 + }, + { + "epoch": 0.4714160598553662, + "grad_norm": 1.8887330293655396, + "learning_rate": 5.703405287743294e-06, + "loss": 0.8226, + "step": 23549 + }, + { + "epoch": 0.47143607837249457, + "grad_norm": 1.0931395292282104, + "learning_rate": 5.703084327271484e-06, + "loss": 0.2718, + "step": 23550 + }, + { + "epoch": 0.4714560968896229, + "grad_norm": 1.0475540161132812, + "learning_rate": 5.702763363844057e-06, + "loss": 0.3271, + "step": 23551 + }, + { + "epoch": 0.4714761154067512, + "grad_norm": 1.0870602130889893, + "learning_rate": 5.702442397462366e-06, + "loss": 0.2729, + "step": 23552 + }, + { + "epoch": 0.47149613392387957, + "grad_norm": 1.0958653688430786, + "learning_rate": 5.702121428127756e-06, + "loss": 0.2802, + "step": 23553 + }, + { + "epoch": 0.4715161524410079, + "grad_norm": 1.0524771213531494, + "learning_rate": 5.70180045584158e-06, + "loss": 0.3085, + "step": 23554 + }, + { + "epoch": 0.47153617095813627, + "grad_norm": 1.155073642730713, + "learning_rate": 5.701479480605184e-06, + "loss": 0.3331, + "step": 23555 + }, + { + "epoch": 0.4715561894752646, + "grad_norm": 1.1671181917190552, + "learning_rate": 5.701158502419923e-06, + "loss": 0.3169, + "step": 23556 + }, + { + "epoch": 0.47157620799239297, + "grad_norm": 1.091715931892395, + "learning_rate": 5.7008375212871405e-06, + "loss": 0.3127, + "step": 23557 + }, + { + "epoch": 0.4715962265095213, + "grad_norm": 1.1367454528808594, + "learning_rate": 5.700516537208187e-06, + "loss": 0.3243, + "step": 23558 + }, + { + "epoch": 0.47161624502664967, + "grad_norm": 1.06206214427948, + "learning_rate": 5.7001955501844146e-06, + "loss": 0.2878, + "step": 23559 + }, + { + "epoch": 0.47163626354377797, + "grad_norm": 1.250161051750183, + "learning_rate": 5.699874560217169e-06, + "loss": 0.3304, + "step": 23560 + }, + { + "epoch": 0.4716562820609063, + "grad_norm": 1.2528691291809082, + "learning_rate": 5.699553567307802e-06, + "loss": 0.3484, + "step": 23561 + }, + { + "epoch": 0.47167630057803467, + "grad_norm": 1.7341032028198242, + "learning_rate": 5.699232571457661e-06, + "loss": 0.8153, + "step": 23562 + }, + { + "epoch": 0.471696319095163, + "grad_norm": 1.0774402618408203, + "learning_rate": 5.698911572668099e-06, + "loss": 0.3139, + "step": 23563 + }, + { + "epoch": 0.47171633761229137, + "grad_norm": 1.0202751159667969, + "learning_rate": 5.698590570940462e-06, + "loss": 0.2599, + "step": 23564 + }, + { + "epoch": 0.4717363561294197, + "grad_norm": 1.1508028507232666, + "learning_rate": 5.698269566276102e-06, + "loss": 0.286, + "step": 23565 + }, + { + "epoch": 0.47175637464654807, + "grad_norm": 1.8370239734649658, + "learning_rate": 5.6979485586763646e-06, + "loss": 0.7718, + "step": 23566 + }, + { + "epoch": 0.4717763931636764, + "grad_norm": 1.2942328453063965, + "learning_rate": 5.697627548142604e-06, + "loss": 0.3215, + "step": 23567 + }, + { + "epoch": 0.4717964116808047, + "grad_norm": 1.080404281616211, + "learning_rate": 5.697306534676166e-06, + "loss": 0.3115, + "step": 23568 + }, + { + "epoch": 0.47181643019793307, + "grad_norm": 2.050990104675293, + "learning_rate": 5.696985518278402e-06, + "loss": 0.7867, + "step": 23569 + }, + { + "epoch": 0.4718364487150614, + "grad_norm": 1.1733015775680542, + "learning_rate": 5.6966644989506604e-06, + "loss": 0.2959, + "step": 23570 + }, + { + "epoch": 0.47185646723218977, + "grad_norm": 1.2490195035934448, + "learning_rate": 5.696343476694291e-06, + "loss": 0.3197, + "step": 23571 + }, + { + "epoch": 0.4718764857493181, + "grad_norm": 1.9749621152877808, + "learning_rate": 5.696022451510644e-06, + "loss": 0.7187, + "step": 23572 + }, + { + "epoch": 0.47189650426644647, + "grad_norm": 1.1811870336532593, + "learning_rate": 5.695701423401068e-06, + "loss": 0.3286, + "step": 23573 + }, + { + "epoch": 0.4719165227835748, + "grad_norm": 0.945452094078064, + "learning_rate": 5.695380392366912e-06, + "loss": 0.3036, + "step": 23574 + }, + { + "epoch": 0.47193654130070317, + "grad_norm": 0.9997972249984741, + "learning_rate": 5.695059358409528e-06, + "loss": 0.3422, + "step": 23575 + }, + { + "epoch": 0.47195655981783147, + "grad_norm": 0.9523839354515076, + "learning_rate": 5.694738321530264e-06, + "loss": 0.2895, + "step": 23576 + }, + { + "epoch": 0.4719765783349598, + "grad_norm": 1.8286709785461426, + "learning_rate": 5.694417281730469e-06, + "loss": 0.7028, + "step": 23577 + }, + { + "epoch": 0.47199659685208817, + "grad_norm": 1.0946931838989258, + "learning_rate": 5.694096239011494e-06, + "loss": 0.2941, + "step": 23578 + }, + { + "epoch": 0.4720166153692165, + "grad_norm": 1.0599840879440308, + "learning_rate": 5.693775193374687e-06, + "loss": 0.3142, + "step": 23579 + }, + { + "epoch": 0.47203663388634487, + "grad_norm": 1.07466721534729, + "learning_rate": 5.693454144821398e-06, + "loss": 0.3161, + "step": 23580 + }, + { + "epoch": 0.4720566524034732, + "grad_norm": 1.1205984354019165, + "learning_rate": 5.693133093352978e-06, + "loss": 0.3071, + "step": 23581 + }, + { + "epoch": 0.47207667092060157, + "grad_norm": 1.8170546293258667, + "learning_rate": 5.692812038970775e-06, + "loss": 0.7681, + "step": 23582 + }, + { + "epoch": 0.4720966894377299, + "grad_norm": 1.0123298168182373, + "learning_rate": 5.692490981676141e-06, + "loss": 0.2992, + "step": 23583 + }, + { + "epoch": 0.4721167079548582, + "grad_norm": 1.219359278678894, + "learning_rate": 5.692169921470422e-06, + "loss": 0.3163, + "step": 23584 + }, + { + "epoch": 0.47213672647198657, + "grad_norm": 1.3425097465515137, + "learning_rate": 5.691848858354971e-06, + "loss": 0.3176, + "step": 23585 + }, + { + "epoch": 0.4721567449891149, + "grad_norm": 1.118543267250061, + "learning_rate": 5.6915277923311345e-06, + "loss": 0.3175, + "step": 23586 + }, + { + "epoch": 0.47217676350624327, + "grad_norm": 1.0570255517959595, + "learning_rate": 5.691206723400264e-06, + "loss": 0.2882, + "step": 23587 + }, + { + "epoch": 0.4721967820233716, + "grad_norm": 1.1851750612258911, + "learning_rate": 5.69088565156371e-06, + "loss": 0.2981, + "step": 23588 + }, + { + "epoch": 0.47221680054049997, + "grad_norm": 1.0316311120986938, + "learning_rate": 5.690564576822821e-06, + "loss": 0.2834, + "step": 23589 + }, + { + "epoch": 0.4722368190576283, + "grad_norm": 1.138088583946228, + "learning_rate": 5.690243499178949e-06, + "loss": 0.2882, + "step": 23590 + }, + { + "epoch": 0.47225683757475667, + "grad_norm": 1.1455004215240479, + "learning_rate": 5.68992241863344e-06, + "loss": 0.331, + "step": 23591 + }, + { + "epoch": 0.47227685609188497, + "grad_norm": 1.3012925386428833, + "learning_rate": 5.689601335187647e-06, + "loss": 0.3329, + "step": 23592 + }, + { + "epoch": 0.4722968746090133, + "grad_norm": 1.021591305732727, + "learning_rate": 5.689280248842916e-06, + "loss": 0.2958, + "step": 23593 + }, + { + "epoch": 0.47231689312614167, + "grad_norm": 1.0445277690887451, + "learning_rate": 5.688959159600601e-06, + "loss": 0.2976, + "step": 23594 + }, + { + "epoch": 0.47233691164327, + "grad_norm": 1.0381672382354736, + "learning_rate": 5.688638067462049e-06, + "loss": 0.3426, + "step": 23595 + }, + { + "epoch": 0.47235693016039837, + "grad_norm": 1.132472276687622, + "learning_rate": 5.688316972428613e-06, + "loss": 0.311, + "step": 23596 + }, + { + "epoch": 0.4723769486775267, + "grad_norm": 1.852002739906311, + "learning_rate": 5.687995874501637e-06, + "loss": 0.8276, + "step": 23597 + }, + { + "epoch": 0.47239696719465507, + "grad_norm": 0.938774585723877, + "learning_rate": 5.687674773682478e-06, + "loss": 0.2577, + "step": 23598 + }, + { + "epoch": 0.4724169857117834, + "grad_norm": 1.079984188079834, + "learning_rate": 5.68735366997248e-06, + "loss": 0.3156, + "step": 23599 + }, + { + "epoch": 0.4724370042289117, + "grad_norm": 1.1356621980667114, + "learning_rate": 5.687032563372995e-06, + "loss": 0.3202, + "step": 23600 + }, + { + "epoch": 0.47245702274604007, + "grad_norm": 1.2844574451446533, + "learning_rate": 5.686711453885374e-06, + "loss": 0.2955, + "step": 23601 + }, + { + "epoch": 0.4724770412631684, + "grad_norm": 1.9697980880737305, + "learning_rate": 5.686390341510966e-06, + "loss": 0.7909, + "step": 23602 + }, + { + "epoch": 0.47249705978029677, + "grad_norm": 1.8189947605133057, + "learning_rate": 5.6860692262511195e-06, + "loss": 0.7807, + "step": 23603 + }, + { + "epoch": 0.4725170782974251, + "grad_norm": 1.1705530881881714, + "learning_rate": 5.685748108107186e-06, + "loss": 0.3144, + "step": 23604 + }, + { + "epoch": 0.47253709681455347, + "grad_norm": 1.1910006999969482, + "learning_rate": 5.685426987080516e-06, + "loss": 0.2798, + "step": 23605 + }, + { + "epoch": 0.4725571153316818, + "grad_norm": 1.3149811029434204, + "learning_rate": 5.6851058631724565e-06, + "loss": 0.3509, + "step": 23606 + }, + { + "epoch": 0.47257713384881017, + "grad_norm": 1.0968143939971924, + "learning_rate": 5.684784736384361e-06, + "loss": 0.3029, + "step": 23607 + }, + { + "epoch": 0.47259715236593847, + "grad_norm": 1.0803847312927246, + "learning_rate": 5.684463606717577e-06, + "loss": 0.3027, + "step": 23608 + }, + { + "epoch": 0.4726171708830668, + "grad_norm": 1.076478362083435, + "learning_rate": 5.684142474173456e-06, + "loss": 0.3178, + "step": 23609 + }, + { + "epoch": 0.47263718940019517, + "grad_norm": 1.208038091659546, + "learning_rate": 5.683821338753348e-06, + "loss": 0.3862, + "step": 23610 + }, + { + "epoch": 0.4726572079173235, + "grad_norm": 1.0689642429351807, + "learning_rate": 5.683500200458601e-06, + "loss": 0.3247, + "step": 23611 + }, + { + "epoch": 0.47267722643445187, + "grad_norm": 1.0506569147109985, + "learning_rate": 5.683179059290567e-06, + "loss": 0.2651, + "step": 23612 + }, + { + "epoch": 0.4726972449515802, + "grad_norm": 1.213245153427124, + "learning_rate": 5.682857915250594e-06, + "loss": 0.342, + "step": 23613 + }, + { + "epoch": 0.47271726346870857, + "grad_norm": 1.3175325393676758, + "learning_rate": 5.682536768340034e-06, + "loss": 0.2678, + "step": 23614 + }, + { + "epoch": 0.4727372819858369, + "grad_norm": 1.9242908954620361, + "learning_rate": 5.682215618560236e-06, + "loss": 0.7443, + "step": 23615 + }, + { + "epoch": 0.4727573005029652, + "grad_norm": 1.1490607261657715, + "learning_rate": 5.681894465912552e-06, + "loss": 0.2937, + "step": 23616 + }, + { + "epoch": 0.47277731902009357, + "grad_norm": 1.096617341041565, + "learning_rate": 5.681573310398328e-06, + "loss": 0.2919, + "step": 23617 + }, + { + "epoch": 0.4727973375372219, + "grad_norm": 1.1566572189331055, + "learning_rate": 5.681252152018919e-06, + "loss": 0.2676, + "step": 23618 + }, + { + "epoch": 0.47281735605435027, + "grad_norm": 1.334999680519104, + "learning_rate": 5.68093099077567e-06, + "loss": 0.2852, + "step": 23619 + }, + { + "epoch": 0.4728373745714786, + "grad_norm": 1.068695068359375, + "learning_rate": 5.680609826669935e-06, + "loss": 0.3044, + "step": 23620 + }, + { + "epoch": 0.47285739308860697, + "grad_norm": 1.0661537647247314, + "learning_rate": 5.680288659703063e-06, + "loss": 0.3162, + "step": 23621 + }, + { + "epoch": 0.4728774116057353, + "grad_norm": 1.2071051597595215, + "learning_rate": 5.679967489876404e-06, + "loss": 0.3042, + "step": 23622 + }, + { + "epoch": 0.47289743012286367, + "grad_norm": 1.1694282293319702, + "learning_rate": 5.6796463171913075e-06, + "loss": 0.3306, + "step": 23623 + }, + { + "epoch": 0.47291744863999197, + "grad_norm": 1.0473028421401978, + "learning_rate": 5.679325141649123e-06, + "loss": 0.2933, + "step": 23624 + }, + { + "epoch": 0.4729374671571203, + "grad_norm": 1.2291327714920044, + "learning_rate": 5.679003963251203e-06, + "loss": 0.3351, + "step": 23625 + }, + { + "epoch": 0.47295748567424867, + "grad_norm": 1.133785605430603, + "learning_rate": 5.678682781998895e-06, + "loss": 0.2562, + "step": 23626 + }, + { + "epoch": 0.472977504191377, + "grad_norm": 1.147997498512268, + "learning_rate": 5.678361597893554e-06, + "loss": 0.3244, + "step": 23627 + }, + { + "epoch": 0.47299752270850537, + "grad_norm": 1.0338258743286133, + "learning_rate": 5.678040410936524e-06, + "loss": 0.287, + "step": 23628 + }, + { + "epoch": 0.4730175412256337, + "grad_norm": 1.170802354812622, + "learning_rate": 5.677719221129159e-06, + "loss": 0.2946, + "step": 23629 + }, + { + "epoch": 0.47303755974276207, + "grad_norm": 0.9914054870605469, + "learning_rate": 5.677398028472809e-06, + "loss": 0.2928, + "step": 23630 + }, + { + "epoch": 0.4730575782598904, + "grad_norm": 1.241418480873108, + "learning_rate": 5.677076832968822e-06, + "loss": 0.2716, + "step": 23631 + }, + { + "epoch": 0.4730775967770187, + "grad_norm": 1.281204342842102, + "learning_rate": 5.67675563461855e-06, + "loss": 0.3297, + "step": 23632 + }, + { + "epoch": 0.47309761529414707, + "grad_norm": 1.1430851221084595, + "learning_rate": 5.676434433423343e-06, + "loss": 0.3196, + "step": 23633 + }, + { + "epoch": 0.4731176338112754, + "grad_norm": 1.0838228464126587, + "learning_rate": 5.676113229384552e-06, + "loss": 0.325, + "step": 23634 + }, + { + "epoch": 0.47313765232840377, + "grad_norm": 1.0711851119995117, + "learning_rate": 5.6757920225035255e-06, + "loss": 0.2964, + "step": 23635 + }, + { + "epoch": 0.4731576708455321, + "grad_norm": 1.0383046865463257, + "learning_rate": 5.675470812781616e-06, + "loss": 0.2987, + "step": 23636 + }, + { + "epoch": 0.47317768936266047, + "grad_norm": 1.2913219928741455, + "learning_rate": 5.675149600220171e-06, + "loss": 0.3764, + "step": 23637 + }, + { + "epoch": 0.4731977078797888, + "grad_norm": 1.1239203214645386, + "learning_rate": 5.674828384820544e-06, + "loss": 0.3105, + "step": 23638 + }, + { + "epoch": 0.47321772639691717, + "grad_norm": 1.0218734741210938, + "learning_rate": 5.674507166584081e-06, + "loss": 0.3147, + "step": 23639 + }, + { + "epoch": 0.47323774491404547, + "grad_norm": 1.2735066413879395, + "learning_rate": 5.674185945512137e-06, + "loss": 0.3252, + "step": 23640 + }, + { + "epoch": 0.4732577634311738, + "grad_norm": 1.1341816186904907, + "learning_rate": 5.673864721606062e-06, + "loss": 0.2877, + "step": 23641 + }, + { + "epoch": 0.47327778194830217, + "grad_norm": 1.143043875694275, + "learning_rate": 5.673543494867202e-06, + "loss": 0.3271, + "step": 23642 + }, + { + "epoch": 0.4732978004654305, + "grad_norm": 1.1128981113433838, + "learning_rate": 5.673222265296911e-06, + "loss": 0.2822, + "step": 23643 + }, + { + "epoch": 0.47331781898255887, + "grad_norm": 1.0259790420532227, + "learning_rate": 5.6729010328965385e-06, + "loss": 0.2944, + "step": 23644 + }, + { + "epoch": 0.4733378374996872, + "grad_norm": 1.1429589986801147, + "learning_rate": 5.672579797667435e-06, + "loss": 0.3286, + "step": 23645 + }, + { + "epoch": 0.47335785601681557, + "grad_norm": 1.085115671157837, + "learning_rate": 5.6722585596109505e-06, + "loss": 0.3135, + "step": 23646 + }, + { + "epoch": 0.4733778745339439, + "grad_norm": 1.0551224946975708, + "learning_rate": 5.671937318728436e-06, + "loss": 0.2728, + "step": 23647 + }, + { + "epoch": 0.4733978930510722, + "grad_norm": 1.225466251373291, + "learning_rate": 5.671616075021241e-06, + "loss": 0.3, + "step": 23648 + }, + { + "epoch": 0.47341791156820057, + "grad_norm": 1.2823495864868164, + "learning_rate": 5.671294828490718e-06, + "loss": 0.3258, + "step": 23649 + }, + { + "epoch": 0.4734379300853289, + "grad_norm": 1.165533423423767, + "learning_rate": 5.670973579138215e-06, + "loss": 0.3025, + "step": 23650 + }, + { + "epoch": 0.47345794860245727, + "grad_norm": 1.8577626943588257, + "learning_rate": 5.670652326965083e-06, + "loss": 0.284, + "step": 23651 + }, + { + "epoch": 0.4734779671195856, + "grad_norm": 0.9959478974342346, + "learning_rate": 5.670331071972674e-06, + "loss": 0.2777, + "step": 23652 + }, + { + "epoch": 0.47349798563671397, + "grad_norm": 1.2415419816970825, + "learning_rate": 5.670009814162336e-06, + "loss": 0.3246, + "step": 23653 + }, + { + "epoch": 0.4735180041538423, + "grad_norm": 1.0812569856643677, + "learning_rate": 5.669688553535423e-06, + "loss": 0.3168, + "step": 23654 + }, + { + "epoch": 0.47353802267097067, + "grad_norm": 1.0836650133132935, + "learning_rate": 5.669367290093282e-06, + "loss": 0.2638, + "step": 23655 + }, + { + "epoch": 0.47355804118809897, + "grad_norm": 1.1176880598068237, + "learning_rate": 5.669046023837265e-06, + "loss": 0.3185, + "step": 23656 + }, + { + "epoch": 0.4735780597052273, + "grad_norm": 1.1125354766845703, + "learning_rate": 5.668724754768722e-06, + "loss": 0.2822, + "step": 23657 + }, + { + "epoch": 0.47359807822235567, + "grad_norm": 1.0922211408615112, + "learning_rate": 5.668403482889005e-06, + "loss": 0.3066, + "step": 23658 + }, + { + "epoch": 0.473618096739484, + "grad_norm": 1.1020381450653076, + "learning_rate": 5.6680822081994625e-06, + "loss": 0.3115, + "step": 23659 + }, + { + "epoch": 0.47363811525661237, + "grad_norm": 1.726728081703186, + "learning_rate": 5.667760930701448e-06, + "loss": 0.8384, + "step": 23660 + }, + { + "epoch": 0.4736581337737407, + "grad_norm": 1.8810453414916992, + "learning_rate": 5.6674396503963095e-06, + "loss": 0.7921, + "step": 23661 + }, + { + "epoch": 0.47367815229086907, + "grad_norm": 0.9893934726715088, + "learning_rate": 5.667118367285398e-06, + "loss": 0.2777, + "step": 23662 + }, + { + "epoch": 0.4736981708079974, + "grad_norm": 1.0924941301345825, + "learning_rate": 5.666797081370063e-06, + "loss": 0.2721, + "step": 23663 + }, + { + "epoch": 0.4737181893251257, + "grad_norm": 1.082385540008545, + "learning_rate": 5.666475792651659e-06, + "loss": 0.2822, + "step": 23664 + }, + { + "epoch": 0.47373820784225407, + "grad_norm": 1.27057945728302, + "learning_rate": 5.666154501131533e-06, + "loss": 0.3101, + "step": 23665 + }, + { + "epoch": 0.4737582263593824, + "grad_norm": 1.2209725379943848, + "learning_rate": 5.665833206811038e-06, + "loss": 0.3317, + "step": 23666 + }, + { + "epoch": 0.47377824487651077, + "grad_norm": 1.0435949563980103, + "learning_rate": 5.665511909691522e-06, + "loss": 0.3048, + "step": 23667 + }, + { + "epoch": 0.4737982633936391, + "grad_norm": 1.0529154539108276, + "learning_rate": 5.665190609774338e-06, + "loss": 0.2849, + "step": 23668 + }, + { + "epoch": 0.47381828191076747, + "grad_norm": 1.1213141679763794, + "learning_rate": 5.664869307060836e-06, + "loss": 0.2951, + "step": 23669 + }, + { + "epoch": 0.4738383004278958, + "grad_norm": 1.2081371545791626, + "learning_rate": 5.664548001552366e-06, + "loss": 0.2889, + "step": 23670 + }, + { + "epoch": 0.47385831894502417, + "grad_norm": 1.0519911050796509, + "learning_rate": 5.664226693250279e-06, + "loss": 0.3386, + "step": 23671 + }, + { + "epoch": 0.47387833746215247, + "grad_norm": 1.0688515901565552, + "learning_rate": 5.663905382155926e-06, + "loss": 0.3105, + "step": 23672 + }, + { + "epoch": 0.4738983559792808, + "grad_norm": 1.16093111038208, + "learning_rate": 5.663584068270658e-06, + "loss": 0.3129, + "step": 23673 + }, + { + "epoch": 0.47391837449640917, + "grad_norm": 1.1207354068756104, + "learning_rate": 5.663262751595826e-06, + "loss": 0.3153, + "step": 23674 + }, + { + "epoch": 0.4739383930135375, + "grad_norm": 1.1985243558883667, + "learning_rate": 5.662941432132779e-06, + "loss": 0.3344, + "step": 23675 + }, + { + "epoch": 0.47395841153066587, + "grad_norm": 2.064513921737671, + "learning_rate": 5.662620109882869e-06, + "loss": 0.7323, + "step": 23676 + }, + { + "epoch": 0.4739784300477942, + "grad_norm": 1.0973056554794312, + "learning_rate": 5.662298784847446e-06, + "loss": 0.3181, + "step": 23677 + }, + { + "epoch": 0.47399844856492257, + "grad_norm": 1.729833722114563, + "learning_rate": 5.661977457027864e-06, + "loss": 0.8368, + "step": 23678 + }, + { + "epoch": 0.4740184670820509, + "grad_norm": 1.4405348300933838, + "learning_rate": 5.661656126425469e-06, + "loss": 0.3173, + "step": 23679 + }, + { + "epoch": 0.4740384855991792, + "grad_norm": 1.016705870628357, + "learning_rate": 5.661334793041615e-06, + "loss": 0.2875, + "step": 23680 + }, + { + "epoch": 0.47405850411630757, + "grad_norm": 1.9067957401275635, + "learning_rate": 5.661013456877651e-06, + "loss": 0.7312, + "step": 23681 + }, + { + "epoch": 0.4740785226334359, + "grad_norm": 1.1148147583007812, + "learning_rate": 5.66069211793493e-06, + "loss": 0.3297, + "step": 23682 + }, + { + "epoch": 0.47409854115056427, + "grad_norm": 1.242600917816162, + "learning_rate": 5.660370776214801e-06, + "loss": 0.3054, + "step": 23683 + }, + { + "epoch": 0.4741185596676926, + "grad_norm": 0.941687822341919, + "learning_rate": 5.660049431718614e-06, + "loss": 0.304, + "step": 23684 + }, + { + "epoch": 0.47413857818482097, + "grad_norm": 1.0327907800674438, + "learning_rate": 5.659728084447723e-06, + "loss": 0.3264, + "step": 23685 + }, + { + "epoch": 0.4741585967019493, + "grad_norm": 1.1379992961883545, + "learning_rate": 5.659406734403476e-06, + "loss": 0.3089, + "step": 23686 + }, + { + "epoch": 0.47417861521907767, + "grad_norm": 1.0641191005706787, + "learning_rate": 5.659085381587226e-06, + "loss": 0.2822, + "step": 23687 + }, + { + "epoch": 0.47419863373620597, + "grad_norm": 1.0196536779403687, + "learning_rate": 5.6587640260003215e-06, + "loss": 0.3021, + "step": 23688 + }, + { + "epoch": 0.4742186522533343, + "grad_norm": 1.0052504539489746, + "learning_rate": 5.658442667644115e-06, + "loss": 0.3376, + "step": 23689 + }, + { + "epoch": 0.47423867077046267, + "grad_norm": 1.1078546047210693, + "learning_rate": 5.658121306519958e-06, + "loss": 0.2966, + "step": 23690 + }, + { + "epoch": 0.474258689287591, + "grad_norm": 1.106834053993225, + "learning_rate": 5.657799942629201e-06, + "loss": 0.3559, + "step": 23691 + }, + { + "epoch": 0.47427870780471937, + "grad_norm": 1.124693751335144, + "learning_rate": 5.657478575973195e-06, + "loss": 0.311, + "step": 23692 + }, + { + "epoch": 0.4742987263218477, + "grad_norm": 1.0385078191757202, + "learning_rate": 5.6571572065532885e-06, + "loss": 0.3428, + "step": 23693 + }, + { + "epoch": 0.47431874483897607, + "grad_norm": 1.098816156387329, + "learning_rate": 5.656835834370838e-06, + "loss": 0.3042, + "step": 23694 + }, + { + "epoch": 0.4743387633561044, + "grad_norm": 1.7489433288574219, + "learning_rate": 5.656514459427187e-06, + "loss": 0.7757, + "step": 23695 + }, + { + "epoch": 0.4743587818732327, + "grad_norm": 1.1696743965148926, + "learning_rate": 5.656193081723692e-06, + "loss": 0.2785, + "step": 23696 + }, + { + "epoch": 0.47437880039036107, + "grad_norm": 1.1770650148391724, + "learning_rate": 5.655871701261703e-06, + "loss": 0.2922, + "step": 23697 + }, + { + "epoch": 0.4743988189074894, + "grad_norm": 1.145029902458191, + "learning_rate": 5.6555503180425694e-06, + "loss": 0.3098, + "step": 23698 + }, + { + "epoch": 0.47441883742461777, + "grad_norm": 1.952848196029663, + "learning_rate": 5.655228932067645e-06, + "loss": 0.823, + "step": 23699 + }, + { + "epoch": 0.4744388559417461, + "grad_norm": 1.094831943511963, + "learning_rate": 5.6549075433382785e-06, + "loss": 0.2913, + "step": 23700 + }, + { + "epoch": 0.47445887445887447, + "grad_norm": 1.0637890100479126, + "learning_rate": 5.654586151855821e-06, + "loss": 0.2751, + "step": 23701 + }, + { + "epoch": 0.4744788929760028, + "grad_norm": 1.0073479413986206, + "learning_rate": 5.654264757621623e-06, + "loss": 0.3112, + "step": 23702 + }, + { + "epoch": 0.47449891149313117, + "grad_norm": 1.1956799030303955, + "learning_rate": 5.65394336063704e-06, + "loss": 0.3005, + "step": 23703 + }, + { + "epoch": 0.47451893001025947, + "grad_norm": 1.119081974029541, + "learning_rate": 5.653621960903418e-06, + "loss": 0.3119, + "step": 23704 + }, + { + "epoch": 0.4745389485273878, + "grad_norm": 1.0761085748672485, + "learning_rate": 5.65330055842211e-06, + "loss": 0.3269, + "step": 23705 + }, + { + "epoch": 0.47455896704451617, + "grad_norm": 1.0942356586456299, + "learning_rate": 5.652979153194468e-06, + "loss": 0.2964, + "step": 23706 + }, + { + "epoch": 0.4745789855616445, + "grad_norm": 1.181064248085022, + "learning_rate": 5.652657745221841e-06, + "loss": 0.3317, + "step": 23707 + }, + { + "epoch": 0.47459900407877287, + "grad_norm": 1.1097025871276855, + "learning_rate": 5.652336334505581e-06, + "loss": 0.368, + "step": 23708 + }, + { + "epoch": 0.4746190225959012, + "grad_norm": 1.0023661851882935, + "learning_rate": 5.65201492104704e-06, + "loss": 0.3166, + "step": 23709 + }, + { + "epoch": 0.47463904111302957, + "grad_norm": 1.1492843627929688, + "learning_rate": 5.651693504847569e-06, + "loss": 0.3306, + "step": 23710 + }, + { + "epoch": 0.4746590596301579, + "grad_norm": 1.1583024263381958, + "learning_rate": 5.651372085908518e-06, + "loss": 0.3407, + "step": 23711 + }, + { + "epoch": 0.4746790781472862, + "grad_norm": 1.1422414779663086, + "learning_rate": 5.651050664231239e-06, + "loss": 0.3007, + "step": 23712 + }, + { + "epoch": 0.47469909666441457, + "grad_norm": 1.0850239992141724, + "learning_rate": 5.650729239817084e-06, + "loss": 0.2637, + "step": 23713 + }, + { + "epoch": 0.4747191151815429, + "grad_norm": 1.0462517738342285, + "learning_rate": 5.650407812667404e-06, + "loss": 0.302, + "step": 23714 + }, + { + "epoch": 0.47473913369867127, + "grad_norm": 1.0820246934890747, + "learning_rate": 5.650086382783547e-06, + "loss": 0.3525, + "step": 23715 + }, + { + "epoch": 0.4747591522157996, + "grad_norm": 1.0226459503173828, + "learning_rate": 5.649764950166869e-06, + "loss": 0.3341, + "step": 23716 + }, + { + "epoch": 0.47477917073292797, + "grad_norm": 0.9986994862556458, + "learning_rate": 5.649443514818718e-06, + "loss": 0.3239, + "step": 23717 + }, + { + "epoch": 0.4747991892500563, + "grad_norm": 1.0654361248016357, + "learning_rate": 5.6491220767404464e-06, + "loss": 0.315, + "step": 23718 + }, + { + "epoch": 0.47481920776718467, + "grad_norm": 1.091199517250061, + "learning_rate": 5.648800635933405e-06, + "loss": 0.312, + "step": 23719 + }, + { + "epoch": 0.47483922628431297, + "grad_norm": 1.1441938877105713, + "learning_rate": 5.648479192398946e-06, + "loss": 0.3111, + "step": 23720 + }, + { + "epoch": 0.4748592448014413, + "grad_norm": 1.1165419816970825, + "learning_rate": 5.648157746138418e-06, + "loss": 0.3116, + "step": 23721 + }, + { + "epoch": 0.47487926331856967, + "grad_norm": 1.1842243671417236, + "learning_rate": 5.647836297153176e-06, + "loss": 0.3595, + "step": 23722 + }, + { + "epoch": 0.474899281835698, + "grad_norm": 1.0599160194396973, + "learning_rate": 5.6475148454445696e-06, + "loss": 0.3088, + "step": 23723 + }, + { + "epoch": 0.47491930035282637, + "grad_norm": 1.0668737888336182, + "learning_rate": 5.64719339101395e-06, + "loss": 0.3198, + "step": 23724 + }, + { + "epoch": 0.4749393188699547, + "grad_norm": 1.36751127243042, + "learning_rate": 5.64687193386267e-06, + "loss": 0.3, + "step": 23725 + }, + { + "epoch": 0.47495933738708307, + "grad_norm": 1.0827056169509888, + "learning_rate": 5.646550473992078e-06, + "loss": 0.2786, + "step": 23726 + }, + { + "epoch": 0.4749793559042114, + "grad_norm": 1.095398187637329, + "learning_rate": 5.646229011403525e-06, + "loss": 0.3217, + "step": 23727 + }, + { + "epoch": 0.4749993744213397, + "grad_norm": 1.1982622146606445, + "learning_rate": 5.645907546098367e-06, + "loss": 0.3271, + "step": 23728 + }, + { + "epoch": 0.47501939293846807, + "grad_norm": 1.0455926656723022, + "learning_rate": 5.645586078077954e-06, + "loss": 0.3375, + "step": 23729 + }, + { + "epoch": 0.4750394114555964, + "grad_norm": 1.022400975227356, + "learning_rate": 5.645264607343632e-06, + "loss": 0.3105, + "step": 23730 + }, + { + "epoch": 0.47505942997272477, + "grad_norm": 1.024061679840088, + "learning_rate": 5.644943133896761e-06, + "loss": 0.329, + "step": 23731 + }, + { + "epoch": 0.4750794484898531, + "grad_norm": 1.1804009675979614, + "learning_rate": 5.644621657738685e-06, + "loss": 0.3646, + "step": 23732 + }, + { + "epoch": 0.47509946700698147, + "grad_norm": 1.1024770736694336, + "learning_rate": 5.644300178870758e-06, + "loss": 0.3101, + "step": 23733 + }, + { + "epoch": 0.4751194855241098, + "grad_norm": 1.1630074977874756, + "learning_rate": 5.6439786972943336e-06, + "loss": 0.2892, + "step": 23734 + }, + { + "epoch": 0.47513950404123817, + "grad_norm": 1.0458616018295288, + "learning_rate": 5.643657213010759e-06, + "loss": 0.292, + "step": 23735 + }, + { + "epoch": 0.47515952255836647, + "grad_norm": 1.0216728448867798, + "learning_rate": 5.64333572602139e-06, + "loss": 0.3142, + "step": 23736 + }, + { + "epoch": 0.4751795410754948, + "grad_norm": 1.0405409336090088, + "learning_rate": 5.643014236327575e-06, + "loss": 0.3477, + "step": 23737 + }, + { + "epoch": 0.47519955959262317, + "grad_norm": 1.1412553787231445, + "learning_rate": 5.642692743930668e-06, + "loss": 0.3487, + "step": 23738 + }, + { + "epoch": 0.4752195781097515, + "grad_norm": 1.0645643472671509, + "learning_rate": 5.642371248832017e-06, + "loss": 0.3368, + "step": 23739 + }, + { + "epoch": 0.47523959662687987, + "grad_norm": 1.1516716480255127, + "learning_rate": 5.642049751032977e-06, + "loss": 0.3515, + "step": 23740 + }, + { + "epoch": 0.4752596151440082, + "grad_norm": 1.0328142642974854, + "learning_rate": 5.641728250534896e-06, + "loss": 0.3083, + "step": 23741 + }, + { + "epoch": 0.47527963366113657, + "grad_norm": 1.0180052518844604, + "learning_rate": 5.64140674733913e-06, + "loss": 0.297, + "step": 23742 + }, + { + "epoch": 0.4752996521782649, + "grad_norm": 1.0288809537887573, + "learning_rate": 5.641085241447026e-06, + "loss": 0.3007, + "step": 23743 + }, + { + "epoch": 0.4753196706953932, + "grad_norm": 1.0599194765090942, + "learning_rate": 5.64076373285994e-06, + "loss": 0.2925, + "step": 23744 + }, + { + "epoch": 0.47533968921252157, + "grad_norm": 0.9301084876060486, + "learning_rate": 5.64044222157922e-06, + "loss": 0.2972, + "step": 23745 + }, + { + "epoch": 0.4753597077296499, + "grad_norm": 1.0345202684402466, + "learning_rate": 5.640120707606218e-06, + "loss": 0.3199, + "step": 23746 + }, + { + "epoch": 0.47537972624677827, + "grad_norm": 1.2687135934829712, + "learning_rate": 5.6397991909422865e-06, + "loss": 0.2764, + "step": 23747 + }, + { + "epoch": 0.4753997447639066, + "grad_norm": 1.0006020069122314, + "learning_rate": 5.639477671588777e-06, + "loss": 0.2855, + "step": 23748 + }, + { + "epoch": 0.47541976328103497, + "grad_norm": 1.2861896753311157, + "learning_rate": 5.639156149547041e-06, + "loss": 0.2901, + "step": 23749 + }, + { + "epoch": 0.4754397817981633, + "grad_norm": 0.9794865250587463, + "learning_rate": 5.63883462481843e-06, + "loss": 0.277, + "step": 23750 + }, + { + "epoch": 0.47545980031529167, + "grad_norm": 1.11056649684906, + "learning_rate": 5.638513097404297e-06, + "loss": 0.3461, + "step": 23751 + }, + { + "epoch": 0.47547981883241996, + "grad_norm": 1.1323305368423462, + "learning_rate": 5.638191567305991e-06, + "loss": 0.3121, + "step": 23752 + }, + { + "epoch": 0.4754998373495483, + "grad_norm": 1.9408087730407715, + "learning_rate": 5.637870034524865e-06, + "loss": 0.8012, + "step": 23753 + }, + { + "epoch": 0.47551985586667667, + "grad_norm": 1.0745439529418945, + "learning_rate": 5.63754849906227e-06, + "loss": 0.2889, + "step": 23754 + }, + { + "epoch": 0.475539874383805, + "grad_norm": 1.1675950288772583, + "learning_rate": 5.63722696091956e-06, + "loss": 0.3314, + "step": 23755 + }, + { + "epoch": 0.47555989290093337, + "grad_norm": 1.1536602973937988, + "learning_rate": 5.6369054200980855e-06, + "loss": 0.3535, + "step": 23756 + }, + { + "epoch": 0.4755799114180617, + "grad_norm": 1.142162561416626, + "learning_rate": 5.636583876599195e-06, + "loss": 0.3289, + "step": 23757 + }, + { + "epoch": 0.47559992993519007, + "grad_norm": 1.0654501914978027, + "learning_rate": 5.636262330424246e-06, + "loss": 0.3348, + "step": 23758 + }, + { + "epoch": 0.4756199484523184, + "grad_norm": 1.0562304258346558, + "learning_rate": 5.635940781574585e-06, + "loss": 0.3093, + "step": 23759 + }, + { + "epoch": 0.4756399669694467, + "grad_norm": 1.0686286687850952, + "learning_rate": 5.635619230051565e-06, + "loss": 0.3479, + "step": 23760 + }, + { + "epoch": 0.47565998548657507, + "grad_norm": 1.0536763668060303, + "learning_rate": 5.635297675856539e-06, + "loss": 0.3022, + "step": 23761 + }, + { + "epoch": 0.4756800040037034, + "grad_norm": 1.0791258811950684, + "learning_rate": 5.63497611899086e-06, + "loss": 0.3128, + "step": 23762 + }, + { + "epoch": 0.47570002252083177, + "grad_norm": 0.9995158910751343, + "learning_rate": 5.634654559455877e-06, + "loss": 0.2943, + "step": 23763 + }, + { + "epoch": 0.4757200410379601, + "grad_norm": 1.1004968881607056, + "learning_rate": 5.634332997252944e-06, + "loss": 0.323, + "step": 23764 + }, + { + "epoch": 0.47574005955508847, + "grad_norm": 2.0504348278045654, + "learning_rate": 5.634011432383409e-06, + "loss": 0.7818, + "step": 23765 + }, + { + "epoch": 0.4757600780722168, + "grad_norm": 1.1764498949050903, + "learning_rate": 5.633689864848628e-06, + "loss": 0.3354, + "step": 23766 + }, + { + "epoch": 0.47578009658934517, + "grad_norm": 1.1265590190887451, + "learning_rate": 5.633368294649953e-06, + "loss": 0.2571, + "step": 23767 + }, + { + "epoch": 0.47580011510647346, + "grad_norm": 1.1848665475845337, + "learning_rate": 5.6330467217887305e-06, + "loss": 0.3004, + "step": 23768 + }, + { + "epoch": 0.4758201336236018, + "grad_norm": 1.087204933166504, + "learning_rate": 5.632725146266318e-06, + "loss": 0.3262, + "step": 23769 + }, + { + "epoch": 0.47584015214073017, + "grad_norm": 1.3023263216018677, + "learning_rate": 5.632403568084064e-06, + "loss": 0.2999, + "step": 23770 + }, + { + "epoch": 0.4758601706578585, + "grad_norm": 1.1182669401168823, + "learning_rate": 5.632081987243323e-06, + "loss": 0.3299, + "step": 23771 + }, + { + "epoch": 0.47588018917498687, + "grad_norm": 1.9190053939819336, + "learning_rate": 5.631760403745443e-06, + "loss": 0.757, + "step": 23772 + }, + { + "epoch": 0.4759002076921152, + "grad_norm": 1.0526586771011353, + "learning_rate": 5.63143881759178e-06, + "loss": 0.2882, + "step": 23773 + }, + { + "epoch": 0.47592022620924357, + "grad_norm": 1.2010945081710815, + "learning_rate": 5.631117228783682e-06, + "loss": 0.3294, + "step": 23774 + }, + { + "epoch": 0.4759402447263719, + "grad_norm": 1.1339081525802612, + "learning_rate": 5.630795637322506e-06, + "loss": 0.2732, + "step": 23775 + }, + { + "epoch": 0.4759602632435002, + "grad_norm": 1.1357730627059937, + "learning_rate": 5.6304740432096005e-06, + "loss": 0.311, + "step": 23776 + }, + { + "epoch": 0.47598028176062857, + "grad_norm": 1.052358627319336, + "learning_rate": 5.6301524464463165e-06, + "loss": 0.3104, + "step": 23777 + }, + { + "epoch": 0.4760003002777569, + "grad_norm": 1.019187092781067, + "learning_rate": 5.629830847034008e-06, + "loss": 0.2996, + "step": 23778 + }, + { + "epoch": 0.47602031879488527, + "grad_norm": 0.98822420835495, + "learning_rate": 5.629509244974024e-06, + "loss": 0.2987, + "step": 23779 + }, + { + "epoch": 0.4760403373120136, + "grad_norm": 1.0601686239242554, + "learning_rate": 5.629187640267721e-06, + "loss": 0.2992, + "step": 23780 + }, + { + "epoch": 0.47606035582914197, + "grad_norm": 1.0966495275497437, + "learning_rate": 5.628866032916448e-06, + "loss": 0.3084, + "step": 23781 + }, + { + "epoch": 0.4760803743462703, + "grad_norm": 1.2858219146728516, + "learning_rate": 5.628544422921558e-06, + "loss": 0.3017, + "step": 23782 + }, + { + "epoch": 0.47610039286339867, + "grad_norm": 1.2063243389129639, + "learning_rate": 5.628222810284402e-06, + "loss": 0.306, + "step": 23783 + }, + { + "epoch": 0.47612041138052696, + "grad_norm": 1.209045648574829, + "learning_rate": 5.6279011950063326e-06, + "loss": 0.3668, + "step": 23784 + }, + { + "epoch": 0.4761404298976553, + "grad_norm": 1.0845978260040283, + "learning_rate": 5.627579577088701e-06, + "loss": 0.3285, + "step": 23785 + }, + { + "epoch": 0.47616044841478367, + "grad_norm": 1.044815182685852, + "learning_rate": 5.62725795653286e-06, + "loss": 0.3231, + "step": 23786 + }, + { + "epoch": 0.476180466931912, + "grad_norm": 1.1314243078231812, + "learning_rate": 5.626936333340162e-06, + "loss": 0.3276, + "step": 23787 + }, + { + "epoch": 0.47620048544904037, + "grad_norm": 1.9824984073638916, + "learning_rate": 5.626614707511959e-06, + "loss": 0.8111, + "step": 23788 + }, + { + "epoch": 0.4762205039661687, + "grad_norm": 1.1469435691833496, + "learning_rate": 5.626293079049603e-06, + "loss": 0.2688, + "step": 23789 + }, + { + "epoch": 0.47624052248329707, + "grad_norm": 1.271574854850769, + "learning_rate": 5.625971447954445e-06, + "loss": 0.3465, + "step": 23790 + }, + { + "epoch": 0.4762605410004254, + "grad_norm": 1.0542134046554565, + "learning_rate": 5.625649814227838e-06, + "loss": 0.2883, + "step": 23791 + }, + { + "epoch": 0.4762805595175537, + "grad_norm": 1.1596705913543701, + "learning_rate": 5.625328177871131e-06, + "loss": 0.3288, + "step": 23792 + }, + { + "epoch": 0.47630057803468207, + "grad_norm": 1.253411054611206, + "learning_rate": 5.625006538885682e-06, + "loss": 0.3475, + "step": 23793 + }, + { + "epoch": 0.4763205965518104, + "grad_norm": 1.8407217264175415, + "learning_rate": 5.62468489727284e-06, + "loss": 0.8262, + "step": 23794 + }, + { + "epoch": 0.47634061506893877, + "grad_norm": 1.0015367269515991, + "learning_rate": 5.624363253033956e-06, + "loss": 0.3306, + "step": 23795 + }, + { + "epoch": 0.4763606335860671, + "grad_norm": 1.1986626386642456, + "learning_rate": 5.624041606170384e-06, + "loss": 0.3103, + "step": 23796 + }, + { + "epoch": 0.47638065210319547, + "grad_norm": 1.1490463018417358, + "learning_rate": 5.623719956683475e-06, + "loss": 0.3364, + "step": 23797 + }, + { + "epoch": 0.4764006706203238, + "grad_norm": 1.1809762716293335, + "learning_rate": 5.623398304574582e-06, + "loss": 0.268, + "step": 23798 + }, + { + "epoch": 0.47642068913745217, + "grad_norm": 1.0128164291381836, + "learning_rate": 5.623076649845056e-06, + "loss": 0.3075, + "step": 23799 + }, + { + "epoch": 0.47644070765458046, + "grad_norm": 1.9646954536437988, + "learning_rate": 5.622754992496251e-06, + "loss": 0.7998, + "step": 23800 + }, + { + "epoch": 0.4764607261717088, + "grad_norm": 1.0010758638381958, + "learning_rate": 5.622433332529517e-06, + "loss": 0.3246, + "step": 23801 + }, + { + "epoch": 0.47648074468883717, + "grad_norm": 1.294542908668518, + "learning_rate": 5.622111669946208e-06, + "loss": 0.3359, + "step": 23802 + }, + { + "epoch": 0.4765007632059655, + "grad_norm": 1.106799602508545, + "learning_rate": 5.621790004747673e-06, + "loss": 0.2785, + "step": 23803 + }, + { + "epoch": 0.47652078172309387, + "grad_norm": 1.131116271018982, + "learning_rate": 5.62146833693527e-06, + "loss": 0.3279, + "step": 23804 + }, + { + "epoch": 0.4765408002402222, + "grad_norm": 1.0569937229156494, + "learning_rate": 5.621146666510346e-06, + "loss": 0.2659, + "step": 23805 + }, + { + "epoch": 0.47656081875735057, + "grad_norm": 1.102794885635376, + "learning_rate": 5.620824993474255e-06, + "loss": 0.3067, + "step": 23806 + }, + { + "epoch": 0.4765808372744789, + "grad_norm": 1.0929951667785645, + "learning_rate": 5.62050331782835e-06, + "loss": 0.2876, + "step": 23807 + }, + { + "epoch": 0.4766008557916072, + "grad_norm": 1.1206245422363281, + "learning_rate": 5.6201816395739825e-06, + "loss": 0.3443, + "step": 23808 + }, + { + "epoch": 0.47662087430873556, + "grad_norm": 1.0982893705368042, + "learning_rate": 5.6198599587125055e-06, + "loss": 0.2552, + "step": 23809 + }, + { + "epoch": 0.4766408928258639, + "grad_norm": 1.1027617454528809, + "learning_rate": 5.619538275245269e-06, + "loss": 0.294, + "step": 23810 + }, + { + "epoch": 0.47666091134299227, + "grad_norm": 1.8296171426773071, + "learning_rate": 5.619216589173628e-06, + "loss": 0.7387, + "step": 23811 + }, + { + "epoch": 0.4766809298601206, + "grad_norm": 1.0965328216552734, + "learning_rate": 5.618894900498932e-06, + "loss": 0.2956, + "step": 23812 + }, + { + "epoch": 0.47670094837724897, + "grad_norm": 1.2528125047683716, + "learning_rate": 5.618573209222538e-06, + "loss": 0.3487, + "step": 23813 + }, + { + "epoch": 0.4767209668943773, + "grad_norm": 1.1261061429977417, + "learning_rate": 5.618251515345793e-06, + "loss": 0.3277, + "step": 23814 + }, + { + "epoch": 0.47674098541150567, + "grad_norm": 1.0875002145767212, + "learning_rate": 5.617929818870052e-06, + "loss": 0.3024, + "step": 23815 + }, + { + "epoch": 0.47676100392863396, + "grad_norm": 1.0179258584976196, + "learning_rate": 5.617608119796668e-06, + "loss": 0.287, + "step": 23816 + }, + { + "epoch": 0.4767810224457623, + "grad_norm": 1.0671788454055786, + "learning_rate": 5.617286418126992e-06, + "loss": 0.2987, + "step": 23817 + }, + { + "epoch": 0.47680104096289067, + "grad_norm": 1.005807876586914, + "learning_rate": 5.616964713862377e-06, + "loss": 0.2745, + "step": 23818 + }, + { + "epoch": 0.476821059480019, + "grad_norm": 1.0440325736999512, + "learning_rate": 5.616643007004175e-06, + "loss": 0.3337, + "step": 23819 + }, + { + "epoch": 0.47684107799714737, + "grad_norm": 1.2269424200057983, + "learning_rate": 5.616321297553738e-06, + "loss": 0.2816, + "step": 23820 + }, + { + "epoch": 0.4768610965142757, + "grad_norm": 1.9300456047058105, + "learning_rate": 5.615999585512419e-06, + "loss": 0.8311, + "step": 23821 + }, + { + "epoch": 0.47688111503140407, + "grad_norm": 1.0317702293395996, + "learning_rate": 5.6156778708815706e-06, + "loss": 0.3153, + "step": 23822 + }, + { + "epoch": 0.4769011335485324, + "grad_norm": 1.0847229957580566, + "learning_rate": 5.615356153662544e-06, + "loss": 0.3125, + "step": 23823 + }, + { + "epoch": 0.4769211520656607, + "grad_norm": 1.1513702869415283, + "learning_rate": 5.6150344338566945e-06, + "loss": 0.3303, + "step": 23824 + }, + { + "epoch": 0.47694117058278906, + "grad_norm": 1.073283076286316, + "learning_rate": 5.61471271146537e-06, + "loss": 0.3235, + "step": 23825 + }, + { + "epoch": 0.4769611890999174, + "grad_norm": 1.0884207487106323, + "learning_rate": 5.614390986489928e-06, + "loss": 0.2922, + "step": 23826 + }, + { + "epoch": 0.47698120761704577, + "grad_norm": 1.0409742593765259, + "learning_rate": 5.614069258931717e-06, + "loss": 0.3227, + "step": 23827 + }, + { + "epoch": 0.4770012261341741, + "grad_norm": 1.077219843864441, + "learning_rate": 5.613747528792092e-06, + "loss": 0.2844, + "step": 23828 + }, + { + "epoch": 0.47702124465130247, + "grad_norm": 1.183266282081604, + "learning_rate": 5.613425796072405e-06, + "loss": 0.3377, + "step": 23829 + }, + { + "epoch": 0.4770412631684308, + "grad_norm": 1.1184855699539185, + "learning_rate": 5.6131040607740064e-06, + "loss": 0.3784, + "step": 23830 + }, + { + "epoch": 0.47706128168555917, + "grad_norm": 0.9936798810958862, + "learning_rate": 5.61278232289825e-06, + "loss": 0.3103, + "step": 23831 + }, + { + "epoch": 0.47708130020268746, + "grad_norm": 1.0485386848449707, + "learning_rate": 5.612460582446489e-06, + "loss": 0.336, + "step": 23832 + }, + { + "epoch": 0.4771013187198158, + "grad_norm": 0.9725555777549744, + "learning_rate": 5.6121388394200775e-06, + "loss": 0.2965, + "step": 23833 + }, + { + "epoch": 0.47712133723694417, + "grad_norm": 1.0522475242614746, + "learning_rate": 5.611817093820363e-06, + "loss": 0.328, + "step": 23834 + }, + { + "epoch": 0.4771413557540725, + "grad_norm": 1.0499420166015625, + "learning_rate": 5.6114953456487035e-06, + "loss": 0.2856, + "step": 23835 + }, + { + "epoch": 0.47716137427120087, + "grad_norm": 1.1142714023590088, + "learning_rate": 5.6111735949064475e-06, + "loss": 0.3308, + "step": 23836 + }, + { + "epoch": 0.4771813927883292, + "grad_norm": 1.175097942352295, + "learning_rate": 5.61085184159495e-06, + "loss": 0.3269, + "step": 23837 + }, + { + "epoch": 0.47720141130545757, + "grad_norm": 1.0977113246917725, + "learning_rate": 5.610530085715562e-06, + "loss": 0.2839, + "step": 23838 + }, + { + "epoch": 0.4772214298225859, + "grad_norm": 1.2697341442108154, + "learning_rate": 5.610208327269638e-06, + "loss": 0.3087, + "step": 23839 + }, + { + "epoch": 0.4772414483397142, + "grad_norm": 1.1260027885437012, + "learning_rate": 5.60988656625853e-06, + "loss": 0.3026, + "step": 23840 + }, + { + "epoch": 0.47726146685684256, + "grad_norm": 1.1681418418884277, + "learning_rate": 5.609564802683589e-06, + "loss": 0.3682, + "step": 23841 + }, + { + "epoch": 0.4772814853739709, + "grad_norm": 1.3162221908569336, + "learning_rate": 5.6092430365461705e-06, + "loss": 0.3206, + "step": 23842 + }, + { + "epoch": 0.47730150389109927, + "grad_norm": 1.1892071962356567, + "learning_rate": 5.6089212678476224e-06, + "loss": 0.3132, + "step": 23843 + }, + { + "epoch": 0.4773215224082276, + "grad_norm": 1.0792893171310425, + "learning_rate": 5.608599496589302e-06, + "loss": 0.3236, + "step": 23844 + }, + { + "epoch": 0.47734154092535597, + "grad_norm": 1.0079823732376099, + "learning_rate": 5.60827772277256e-06, + "loss": 0.3272, + "step": 23845 + }, + { + "epoch": 0.4773615594424843, + "grad_norm": 1.0730218887329102, + "learning_rate": 5.60795594639875e-06, + "loss": 0.3452, + "step": 23846 + }, + { + "epoch": 0.47738157795961267, + "grad_norm": 1.084453821182251, + "learning_rate": 5.607634167469223e-06, + "loss": 0.311, + "step": 23847 + }, + { + "epoch": 0.47740159647674096, + "grad_norm": 1.0659865140914917, + "learning_rate": 5.607312385985334e-06, + "loss": 0.3172, + "step": 23848 + }, + { + "epoch": 0.4774216149938693, + "grad_norm": 1.114027500152588, + "learning_rate": 5.606990601948433e-06, + "loss": 0.3001, + "step": 23849 + }, + { + "epoch": 0.47744163351099767, + "grad_norm": 1.2558084726333618, + "learning_rate": 5.606668815359875e-06, + "loss": 0.317, + "step": 23850 + }, + { + "epoch": 0.477461652028126, + "grad_norm": 0.9846954941749573, + "learning_rate": 5.606347026221012e-06, + "loss": 0.335, + "step": 23851 + }, + { + "epoch": 0.47748167054525437, + "grad_norm": 1.0621869564056396, + "learning_rate": 5.6060252345331954e-06, + "loss": 0.3289, + "step": 23852 + }, + { + "epoch": 0.4775016890623827, + "grad_norm": 1.072566270828247, + "learning_rate": 5.6057034402977805e-06, + "loss": 0.3219, + "step": 23853 + }, + { + "epoch": 0.47752170757951107, + "grad_norm": 1.1035807132720947, + "learning_rate": 5.605381643516118e-06, + "loss": 0.3516, + "step": 23854 + }, + { + "epoch": 0.4775417260966394, + "grad_norm": 1.0609116554260254, + "learning_rate": 5.605059844189562e-06, + "loss": 0.3258, + "step": 23855 + }, + { + "epoch": 0.4775617446137677, + "grad_norm": 1.1935713291168213, + "learning_rate": 5.604738042319464e-06, + "loss": 0.3555, + "step": 23856 + }, + { + "epoch": 0.47758176313089606, + "grad_norm": 1.2223124504089355, + "learning_rate": 5.604416237907178e-06, + "loss": 0.3129, + "step": 23857 + }, + { + "epoch": 0.4776017816480244, + "grad_norm": 1.892460823059082, + "learning_rate": 5.6040944309540545e-06, + "loss": 0.8238, + "step": 23858 + }, + { + "epoch": 0.47762180016515277, + "grad_norm": 1.242577314376831, + "learning_rate": 5.603772621461449e-06, + "loss": 0.3529, + "step": 23859 + }, + { + "epoch": 0.4776418186822811, + "grad_norm": 1.1535894870758057, + "learning_rate": 5.603450809430715e-06, + "loss": 0.3237, + "step": 23860 + }, + { + "epoch": 0.47766183719940947, + "grad_norm": 1.8446903228759766, + "learning_rate": 5.603128994863202e-06, + "loss": 0.779, + "step": 23861 + }, + { + "epoch": 0.4776818557165378, + "grad_norm": 1.0482721328735352, + "learning_rate": 5.602807177760265e-06, + "loss": 0.2643, + "step": 23862 + }, + { + "epoch": 0.47770187423366617, + "grad_norm": 1.0582810640335083, + "learning_rate": 5.602485358123255e-06, + "loss": 0.3134, + "step": 23863 + }, + { + "epoch": 0.47772189275079446, + "grad_norm": 1.167083501815796, + "learning_rate": 5.602163535953529e-06, + "loss": 0.3298, + "step": 23864 + }, + { + "epoch": 0.4777419112679228, + "grad_norm": 2.1560847759246826, + "learning_rate": 5.601841711252433e-06, + "loss": 0.7658, + "step": 23865 + }, + { + "epoch": 0.47776192978505116, + "grad_norm": 1.0821453332901, + "learning_rate": 5.6015198840213285e-06, + "loss": 0.3449, + "step": 23866 + }, + { + "epoch": 0.4777819483021795, + "grad_norm": 1.2288432121276855, + "learning_rate": 5.601198054261561e-06, + "loss": 0.3197, + "step": 23867 + }, + { + "epoch": 0.47780196681930787, + "grad_norm": 1.2287633419036865, + "learning_rate": 5.600876221974489e-06, + "loss": 0.2702, + "step": 23868 + }, + { + "epoch": 0.4778219853364362, + "grad_norm": 1.1389191150665283, + "learning_rate": 5.600554387161459e-06, + "loss": 0.2927, + "step": 23869 + }, + { + "epoch": 0.47784200385356457, + "grad_norm": 1.0519858598709106, + "learning_rate": 5.600232549823829e-06, + "loss": 0.2752, + "step": 23870 + }, + { + "epoch": 0.4778620223706929, + "grad_norm": 1.943691372871399, + "learning_rate": 5.599910709962951e-06, + "loss": 0.7816, + "step": 23871 + }, + { + "epoch": 0.4778820408878212, + "grad_norm": 1.2592699527740479, + "learning_rate": 5.599588867580177e-06, + "loss": 0.3199, + "step": 23872 + }, + { + "epoch": 0.47790205940494956, + "grad_norm": 1.2682586908340454, + "learning_rate": 5.599267022676862e-06, + "loss": 0.343, + "step": 23873 + }, + { + "epoch": 0.4779220779220779, + "grad_norm": 1.9576054811477661, + "learning_rate": 5.5989451752543555e-06, + "loss": 0.7746, + "step": 23874 + }, + { + "epoch": 0.47794209643920627, + "grad_norm": 1.1211183071136475, + "learning_rate": 5.598623325314012e-06, + "loss": 0.3095, + "step": 23875 + }, + { + "epoch": 0.4779621149563346, + "grad_norm": 1.1615062952041626, + "learning_rate": 5.598301472857186e-06, + "loss": 0.3163, + "step": 23876 + }, + { + "epoch": 0.47798213347346297, + "grad_norm": 1.0483019351959229, + "learning_rate": 5.5979796178852296e-06, + "loss": 0.3326, + "step": 23877 + }, + { + "epoch": 0.4780021519905913, + "grad_norm": 1.0590362548828125, + "learning_rate": 5.597657760399495e-06, + "loss": 0.3203, + "step": 23878 + }, + { + "epoch": 0.47802217050771967, + "grad_norm": 1.2839020490646362, + "learning_rate": 5.597335900401337e-06, + "loss": 0.3035, + "step": 23879 + }, + { + "epoch": 0.47804218902484796, + "grad_norm": 1.5827516317367554, + "learning_rate": 5.597014037892105e-06, + "loss": 0.3127, + "step": 23880 + }, + { + "epoch": 0.4780622075419763, + "grad_norm": 1.2102479934692383, + "learning_rate": 5.596692172873157e-06, + "loss": 0.333, + "step": 23881 + }, + { + "epoch": 0.47808222605910466, + "grad_norm": 1.0974955558776855, + "learning_rate": 5.5963703053458415e-06, + "loss": 0.3075, + "step": 23882 + }, + { + "epoch": 0.478102244576233, + "grad_norm": 1.1165971755981445, + "learning_rate": 5.596048435311515e-06, + "loss": 0.3173, + "step": 23883 + }, + { + "epoch": 0.47812226309336137, + "grad_norm": 1.2149423360824585, + "learning_rate": 5.5957265627715285e-06, + "loss": 0.3115, + "step": 23884 + }, + { + "epoch": 0.4781422816104897, + "grad_norm": 1.054449439048767, + "learning_rate": 5.595404687727236e-06, + "loss": 0.2815, + "step": 23885 + }, + { + "epoch": 0.47816230012761807, + "grad_norm": 1.0396422147750854, + "learning_rate": 5.59508281017999e-06, + "loss": 0.2918, + "step": 23886 + }, + { + "epoch": 0.4781823186447464, + "grad_norm": 1.0841366052627563, + "learning_rate": 5.594760930131144e-06, + "loss": 0.2909, + "step": 23887 + }, + { + "epoch": 0.4782023371618747, + "grad_norm": 1.1476573944091797, + "learning_rate": 5.594439047582051e-06, + "loss": 0.3286, + "step": 23888 + }, + { + "epoch": 0.47822235567900306, + "grad_norm": 1.0413727760314941, + "learning_rate": 5.594117162534064e-06, + "loss": 0.3282, + "step": 23889 + }, + { + "epoch": 0.4782423741961314, + "grad_norm": 1.0426021814346313, + "learning_rate": 5.5937952749885376e-06, + "loss": 0.2981, + "step": 23890 + }, + { + "epoch": 0.47826239271325977, + "grad_norm": 1.0572466850280762, + "learning_rate": 5.5934733849468235e-06, + "loss": 0.3023, + "step": 23891 + }, + { + "epoch": 0.4782824112303881, + "grad_norm": 0.9855287075042725, + "learning_rate": 5.593151492410274e-06, + "loss": 0.3102, + "step": 23892 + }, + { + "epoch": 0.47830242974751647, + "grad_norm": 1.8665640354156494, + "learning_rate": 5.592829597380244e-06, + "loss": 0.7716, + "step": 23893 + }, + { + "epoch": 0.4783224482646448, + "grad_norm": 1.085983157157898, + "learning_rate": 5.592507699858084e-06, + "loss": 0.3025, + "step": 23894 + }, + { + "epoch": 0.4783424667817731, + "grad_norm": 1.1708893775939941, + "learning_rate": 5.592185799845151e-06, + "loss": 0.3021, + "step": 23895 + }, + { + "epoch": 0.47836248529890146, + "grad_norm": 1.141439437866211, + "learning_rate": 5.5918638973427955e-06, + "loss": 0.3166, + "step": 23896 + }, + { + "epoch": 0.4783825038160298, + "grad_norm": 1.0689256191253662, + "learning_rate": 5.591541992352372e-06, + "loss": 0.3217, + "step": 23897 + }, + { + "epoch": 0.47840252233315816, + "grad_norm": 1.3002612590789795, + "learning_rate": 5.591220084875233e-06, + "loss": 0.2969, + "step": 23898 + }, + { + "epoch": 0.4784225408502865, + "grad_norm": 1.20182204246521, + "learning_rate": 5.5908981749127335e-06, + "loss": 0.2858, + "step": 23899 + }, + { + "epoch": 0.47844255936741487, + "grad_norm": 1.808404564857483, + "learning_rate": 5.590576262466223e-06, + "loss": 0.8043, + "step": 23900 + }, + { + "epoch": 0.4784625778845432, + "grad_norm": 1.0961540937423706, + "learning_rate": 5.590254347537057e-06, + "loss": 0.2829, + "step": 23901 + }, + { + "epoch": 0.47848259640167157, + "grad_norm": 1.1772946119308472, + "learning_rate": 5.589932430126591e-06, + "loss": 0.3244, + "step": 23902 + }, + { + "epoch": 0.47850261491879986, + "grad_norm": 1.2044986486434937, + "learning_rate": 5.589610510236174e-06, + "loss": 0.3424, + "step": 23903 + }, + { + "epoch": 0.4785226334359282, + "grad_norm": 1.1134368181228638, + "learning_rate": 5.589288587867162e-06, + "loss": 0.2709, + "step": 23904 + }, + { + "epoch": 0.47854265195305656, + "grad_norm": 1.071961760520935, + "learning_rate": 5.588966663020907e-06, + "loss": 0.3189, + "step": 23905 + }, + { + "epoch": 0.4785626704701849, + "grad_norm": 1.082645058631897, + "learning_rate": 5.588644735698765e-06, + "loss": 0.3012, + "step": 23906 + }, + { + "epoch": 0.47858268898731327, + "grad_norm": 0.9892553091049194, + "learning_rate": 5.588322805902086e-06, + "loss": 0.2989, + "step": 23907 + }, + { + "epoch": 0.4786027075044416, + "grad_norm": 1.1306105852127075, + "learning_rate": 5.5880008736322235e-06, + "loss": 0.2838, + "step": 23908 + }, + { + "epoch": 0.47862272602156997, + "grad_norm": 0.9846380949020386, + "learning_rate": 5.587678938890532e-06, + "loss": 0.3015, + "step": 23909 + }, + { + "epoch": 0.4786427445386983, + "grad_norm": 1.0896333456039429, + "learning_rate": 5.587357001678365e-06, + "loss": 0.3182, + "step": 23910 + }, + { + "epoch": 0.4786627630558266, + "grad_norm": 1.149176001548767, + "learning_rate": 5.587035061997076e-06, + "loss": 0.3021, + "step": 23911 + }, + { + "epoch": 0.47868278157295496, + "grad_norm": 1.154372215270996, + "learning_rate": 5.586713119848018e-06, + "loss": 0.3008, + "step": 23912 + }, + { + "epoch": 0.4787028000900833, + "grad_norm": 1.1629782915115356, + "learning_rate": 5.586391175232544e-06, + "loss": 0.28, + "step": 23913 + }, + { + "epoch": 0.47872281860721166, + "grad_norm": 1.0778542757034302, + "learning_rate": 5.586069228152007e-06, + "loss": 0.3011, + "step": 23914 + }, + { + "epoch": 0.47874283712434, + "grad_norm": 1.1513595581054688, + "learning_rate": 5.5857472786077626e-06, + "loss": 0.3151, + "step": 23915 + }, + { + "epoch": 0.47876285564146837, + "grad_norm": 1.1385278701782227, + "learning_rate": 5.585425326601162e-06, + "loss": 0.3137, + "step": 23916 + }, + { + "epoch": 0.4787828741585967, + "grad_norm": 1.0117093324661255, + "learning_rate": 5.58510337213356e-06, + "loss": 0.2933, + "step": 23917 + }, + { + "epoch": 0.47880289267572507, + "grad_norm": 1.0826314687728882, + "learning_rate": 5.584781415206309e-06, + "loss": 0.2952, + "step": 23918 + }, + { + "epoch": 0.47882291119285336, + "grad_norm": 1.1864861249923706, + "learning_rate": 5.584459455820762e-06, + "loss": 0.3124, + "step": 23919 + }, + { + "epoch": 0.4788429297099817, + "grad_norm": 1.0764093399047852, + "learning_rate": 5.584137493978273e-06, + "loss": 0.3047, + "step": 23920 + }, + { + "epoch": 0.47886294822711006, + "grad_norm": 1.1599273681640625, + "learning_rate": 5.583815529680197e-06, + "loss": 0.2982, + "step": 23921 + }, + { + "epoch": 0.4788829667442384, + "grad_norm": 1.1622754335403442, + "learning_rate": 5.583493562927885e-06, + "loss": 0.3206, + "step": 23922 + }, + { + "epoch": 0.47890298526136676, + "grad_norm": 1.0624430179595947, + "learning_rate": 5.583171593722692e-06, + "loss": 0.2849, + "step": 23923 + }, + { + "epoch": 0.4789230037784951, + "grad_norm": 1.2168406248092651, + "learning_rate": 5.582849622065971e-06, + "loss": 0.3169, + "step": 23924 + }, + { + "epoch": 0.47894302229562347, + "grad_norm": 1.121859073638916, + "learning_rate": 5.582527647959076e-06, + "loss": 0.3038, + "step": 23925 + }, + { + "epoch": 0.4789630408127518, + "grad_norm": 1.0240942239761353, + "learning_rate": 5.58220567140336e-06, + "loss": 0.3217, + "step": 23926 + }, + { + "epoch": 0.4789830593298801, + "grad_norm": 1.164432406425476, + "learning_rate": 5.581883692400176e-06, + "loss": 0.3121, + "step": 23927 + }, + { + "epoch": 0.47900307784700846, + "grad_norm": 1.2643883228302002, + "learning_rate": 5.5815617109508795e-06, + "loss": 0.3409, + "step": 23928 + }, + { + "epoch": 0.4790230963641368, + "grad_norm": 1.0866700410842896, + "learning_rate": 5.5812397270568205e-06, + "loss": 0.2931, + "step": 23929 + }, + { + "epoch": 0.47904311488126516, + "grad_norm": 1.133170485496521, + "learning_rate": 5.580917740719359e-06, + "loss": 0.2793, + "step": 23930 + }, + { + "epoch": 0.4790631333983935, + "grad_norm": 1.1513378620147705, + "learning_rate": 5.5805957519398406e-06, + "loss": 0.3724, + "step": 23931 + }, + { + "epoch": 0.47908315191552187, + "grad_norm": 1.1546210050582886, + "learning_rate": 5.580273760719623e-06, + "loss": 0.3165, + "step": 23932 + }, + { + "epoch": 0.4791031704326502, + "grad_norm": 1.1227591037750244, + "learning_rate": 5.57995176706006e-06, + "loss": 0.2951, + "step": 23933 + }, + { + "epoch": 0.47912318894977857, + "grad_norm": 1.0285124778747559, + "learning_rate": 5.579629770962505e-06, + "loss": 0.2922, + "step": 23934 + }, + { + "epoch": 0.47914320746690686, + "grad_norm": 1.1314136981964111, + "learning_rate": 5.579307772428311e-06, + "loss": 0.3314, + "step": 23935 + }, + { + "epoch": 0.4791632259840352, + "grad_norm": 1.0674740076065063, + "learning_rate": 5.578985771458831e-06, + "loss": 0.3066, + "step": 23936 + }, + { + "epoch": 0.47918324450116356, + "grad_norm": 1.0802019834518433, + "learning_rate": 5.57866376805542e-06, + "loss": 0.2972, + "step": 23937 + }, + { + "epoch": 0.4792032630182919, + "grad_norm": 1.1305510997772217, + "learning_rate": 5.578341762219432e-06, + "loss": 0.317, + "step": 23938 + }, + { + "epoch": 0.47922328153542026, + "grad_norm": 1.8279879093170166, + "learning_rate": 5.578019753952217e-06, + "loss": 0.7823, + "step": 23939 + }, + { + "epoch": 0.4792433000525486, + "grad_norm": 1.2574462890625, + "learning_rate": 5.577697743255133e-06, + "loss": 0.2952, + "step": 23940 + }, + { + "epoch": 0.47926331856967697, + "grad_norm": 1.2224489450454712, + "learning_rate": 5.5773757301295325e-06, + "loss": 0.2855, + "step": 23941 + }, + { + "epoch": 0.4792833370868053, + "grad_norm": 1.0416613817214966, + "learning_rate": 5.5770537145767676e-06, + "loss": 0.3154, + "step": 23942 + }, + { + "epoch": 0.4793033556039336, + "grad_norm": 1.0750269889831543, + "learning_rate": 5.576731696598194e-06, + "loss": 0.294, + "step": 23943 + }, + { + "epoch": 0.47932337412106196, + "grad_norm": 1.2378528118133545, + "learning_rate": 5.576409676195166e-06, + "loss": 0.3075, + "step": 23944 + }, + { + "epoch": 0.4793433926381903, + "grad_norm": 1.1306123733520508, + "learning_rate": 5.5760876533690335e-06, + "loss": 0.3706, + "step": 23945 + }, + { + "epoch": 0.47936341115531866, + "grad_norm": 1.1282356977462769, + "learning_rate": 5.575765628121152e-06, + "loss": 0.3391, + "step": 23946 + }, + { + "epoch": 0.479383429672447, + "grad_norm": 1.1297661066055298, + "learning_rate": 5.575443600452877e-06, + "loss": 0.2938, + "step": 23947 + }, + { + "epoch": 0.47940344818957537, + "grad_norm": 1.2050482034683228, + "learning_rate": 5.57512157036556e-06, + "loss": 0.3378, + "step": 23948 + }, + { + "epoch": 0.4794234667067037, + "grad_norm": 1.7896101474761963, + "learning_rate": 5.5747995378605566e-06, + "loss": 0.7709, + "step": 23949 + }, + { + "epoch": 0.47944348522383207, + "grad_norm": 1.057390809059143, + "learning_rate": 5.5744775029392194e-06, + "loss": 0.3066, + "step": 23950 + }, + { + "epoch": 0.47946350374096036, + "grad_norm": 1.0956683158874512, + "learning_rate": 5.574155465602902e-06, + "loss": 0.3096, + "step": 23951 + }, + { + "epoch": 0.4794835222580887, + "grad_norm": 1.1921536922454834, + "learning_rate": 5.573833425852959e-06, + "loss": 0.382, + "step": 23952 + }, + { + "epoch": 0.47950354077521706, + "grad_norm": 1.1962110996246338, + "learning_rate": 5.573511383690743e-06, + "loss": 0.266, + "step": 23953 + }, + { + "epoch": 0.4795235592923454, + "grad_norm": 1.1618403196334839, + "learning_rate": 5.5731893391176095e-06, + "loss": 0.3317, + "step": 23954 + }, + { + "epoch": 0.47954357780947376, + "grad_norm": 1.0859073400497437, + "learning_rate": 5.572867292134913e-06, + "loss": 0.3237, + "step": 23955 + }, + { + "epoch": 0.4795635963266021, + "grad_norm": 1.1803339719772339, + "learning_rate": 5.572545242744003e-06, + "loss": 0.3101, + "step": 23956 + }, + { + "epoch": 0.47958361484373047, + "grad_norm": 1.0481144189834595, + "learning_rate": 5.572223190946237e-06, + "loss": 0.3043, + "step": 23957 + }, + { + "epoch": 0.4796036333608588, + "grad_norm": 1.1699585914611816, + "learning_rate": 5.571901136742968e-06, + "loss": 0.3527, + "step": 23958 + }, + { + "epoch": 0.4796236518779871, + "grad_norm": 1.239548683166504, + "learning_rate": 5.571579080135548e-06, + "loss": 0.3364, + "step": 23959 + }, + { + "epoch": 0.47964367039511546, + "grad_norm": 1.068666934967041, + "learning_rate": 5.571257021125334e-06, + "loss": 0.2836, + "step": 23960 + }, + { + "epoch": 0.4796636889122438, + "grad_norm": 1.1636974811553955, + "learning_rate": 5.570934959713678e-06, + "loss": 0.3117, + "step": 23961 + }, + { + "epoch": 0.47968370742937216, + "grad_norm": 1.0154740810394287, + "learning_rate": 5.570612895901935e-06, + "loss": 0.2924, + "step": 23962 + }, + { + "epoch": 0.4797037259465005, + "grad_norm": 1.0987647771835327, + "learning_rate": 5.570290829691458e-06, + "loss": 0.2929, + "step": 23963 + }, + { + "epoch": 0.47972374446362887, + "grad_norm": 1.1541422605514526, + "learning_rate": 5.569968761083602e-06, + "loss": 0.2971, + "step": 23964 + }, + { + "epoch": 0.4797437629807572, + "grad_norm": 1.0430099964141846, + "learning_rate": 5.5696466900797166e-06, + "loss": 0.2712, + "step": 23965 + }, + { + "epoch": 0.47976378149788557, + "grad_norm": 1.1725143194198608, + "learning_rate": 5.569324616681162e-06, + "loss": 0.3561, + "step": 23966 + }, + { + "epoch": 0.47978380001501386, + "grad_norm": 1.0768567323684692, + "learning_rate": 5.569002540889288e-06, + "loss": 0.3386, + "step": 23967 + }, + { + "epoch": 0.4798038185321422, + "grad_norm": 1.0171287059783936, + "learning_rate": 5.568680462705452e-06, + "loss": 0.3196, + "step": 23968 + }, + { + "epoch": 0.47982383704927056, + "grad_norm": 1.0840307474136353, + "learning_rate": 5.568358382131003e-06, + "loss": 0.328, + "step": 23969 + }, + { + "epoch": 0.4798438555663989, + "grad_norm": 1.056701421737671, + "learning_rate": 5.5680362991672995e-06, + "loss": 0.2619, + "step": 23970 + }, + { + "epoch": 0.47986387408352726, + "grad_norm": 2.035186767578125, + "learning_rate": 5.567714213815693e-06, + "loss": 0.7727, + "step": 23971 + }, + { + "epoch": 0.4798838926006556, + "grad_norm": 1.1107075214385986, + "learning_rate": 5.567392126077536e-06, + "loss": 0.3409, + "step": 23972 + }, + { + "epoch": 0.47990391111778397, + "grad_norm": 1.2167019844055176, + "learning_rate": 5.567070035954187e-06, + "loss": 0.2713, + "step": 23973 + }, + { + "epoch": 0.4799239296349123, + "grad_norm": 1.062291145324707, + "learning_rate": 5.566747943446998e-06, + "loss": 0.286, + "step": 23974 + }, + { + "epoch": 0.4799439481520406, + "grad_norm": 1.861918330192566, + "learning_rate": 5.566425848557322e-06, + "loss": 0.83, + "step": 23975 + }, + { + "epoch": 0.47996396666916896, + "grad_norm": 1.010806918144226, + "learning_rate": 5.566103751286514e-06, + "loss": 0.2916, + "step": 23976 + }, + { + "epoch": 0.4799839851862973, + "grad_norm": 1.0592091083526611, + "learning_rate": 5.5657816516359275e-06, + "loss": 0.319, + "step": 23977 + }, + { + "epoch": 0.48000400370342566, + "grad_norm": 1.046890139579773, + "learning_rate": 5.565459549606914e-06, + "loss": 0.2738, + "step": 23978 + }, + { + "epoch": 0.480024022220554, + "grad_norm": 1.0091122388839722, + "learning_rate": 5.565137445200834e-06, + "loss": 0.3277, + "step": 23979 + }, + { + "epoch": 0.48004404073768236, + "grad_norm": 2.0159666538238525, + "learning_rate": 5.5648153384190365e-06, + "loss": 0.7796, + "step": 23980 + }, + { + "epoch": 0.4800640592548107, + "grad_norm": 1.0377857685089111, + "learning_rate": 5.564493229262876e-06, + "loss": 0.2985, + "step": 23981 + }, + { + "epoch": 0.48008407777193907, + "grad_norm": 1.158448338508606, + "learning_rate": 5.564171117733709e-06, + "loss": 0.3251, + "step": 23982 + }, + { + "epoch": 0.48010409628906736, + "grad_norm": 1.2469359636306763, + "learning_rate": 5.5638490038328875e-06, + "loss": 0.3124, + "step": 23983 + }, + { + "epoch": 0.4801241148061957, + "grad_norm": 1.0402332544326782, + "learning_rate": 5.563526887561766e-06, + "loss": 0.3859, + "step": 23984 + }, + { + "epoch": 0.48014413332332406, + "grad_norm": 1.0278222560882568, + "learning_rate": 5.563204768921697e-06, + "loss": 0.2421, + "step": 23985 + }, + { + "epoch": 0.4801641518404524, + "grad_norm": 1.7881381511688232, + "learning_rate": 5.562882647914038e-06, + "loss": 0.8261, + "step": 23986 + }, + { + "epoch": 0.48018417035758076, + "grad_norm": 1.0895198583602905, + "learning_rate": 5.562560524540141e-06, + "loss": 0.343, + "step": 23987 + }, + { + "epoch": 0.4802041888747091, + "grad_norm": 1.0499818325042725, + "learning_rate": 5.562238398801363e-06, + "loss": 0.296, + "step": 23988 + }, + { + "epoch": 0.48022420739183747, + "grad_norm": 1.1174960136413574, + "learning_rate": 5.561916270699053e-06, + "loss": 0.3398, + "step": 23989 + }, + { + "epoch": 0.4802442259089658, + "grad_norm": 1.2623199224472046, + "learning_rate": 5.561594140234569e-06, + "loss": 0.3722, + "step": 23990 + }, + { + "epoch": 0.4802642444260941, + "grad_norm": 1.0980165004730225, + "learning_rate": 5.561272007409262e-06, + "loss": 0.3724, + "step": 23991 + }, + { + "epoch": 0.48028426294322246, + "grad_norm": 1.933006763458252, + "learning_rate": 5.560949872224492e-06, + "loss": 0.8426, + "step": 23992 + }, + { + "epoch": 0.4803042814603508, + "grad_norm": 1.0612030029296875, + "learning_rate": 5.560627734681606e-06, + "loss": 0.3085, + "step": 23993 + }, + { + "epoch": 0.48032429997747916, + "grad_norm": 1.0771458148956299, + "learning_rate": 5.5603055947819636e-06, + "loss": 0.3214, + "step": 23994 + }, + { + "epoch": 0.4803443184946075, + "grad_norm": 1.302870273590088, + "learning_rate": 5.559983452526918e-06, + "loss": 0.3222, + "step": 23995 + }, + { + "epoch": 0.48036433701173586, + "grad_norm": 1.1092939376831055, + "learning_rate": 5.559661307917821e-06, + "loss": 0.2934, + "step": 23996 + }, + { + "epoch": 0.4803843555288642, + "grad_norm": 1.1501221656799316, + "learning_rate": 5.5593391609560275e-06, + "loss": 0.3055, + "step": 23997 + }, + { + "epoch": 0.48040437404599257, + "grad_norm": 1.1210517883300781, + "learning_rate": 5.559017011642893e-06, + "loss": 0.3169, + "step": 23998 + }, + { + "epoch": 0.48042439256312086, + "grad_norm": 1.352862000465393, + "learning_rate": 5.558694859979772e-06, + "loss": 0.3168, + "step": 23999 + }, + { + "epoch": 0.4804444110802492, + "grad_norm": 1.1470816135406494, + "learning_rate": 5.558372705968017e-06, + "loss": 0.3057, + "step": 24000 + }, + { + "epoch": 0.48046442959737756, + "grad_norm": 1.0481956005096436, + "learning_rate": 5.558050549608986e-06, + "loss": 0.3137, + "step": 24001 + }, + { + "epoch": 0.4804844481145059, + "grad_norm": 1.1913286447525024, + "learning_rate": 5.557728390904027e-06, + "loss": 0.2986, + "step": 24002 + }, + { + "epoch": 0.48050446663163426, + "grad_norm": 1.0809043645858765, + "learning_rate": 5.5574062298545e-06, + "loss": 0.2545, + "step": 24003 + }, + { + "epoch": 0.4805244851487626, + "grad_norm": 1.904180645942688, + "learning_rate": 5.557084066461755e-06, + "loss": 0.7857, + "step": 24004 + }, + { + "epoch": 0.48054450366589097, + "grad_norm": 1.0858949422836304, + "learning_rate": 5.556761900727151e-06, + "loss": 0.2899, + "step": 24005 + }, + { + "epoch": 0.4805645221830193, + "grad_norm": 1.0907864570617676, + "learning_rate": 5.5564397326520404e-06, + "loss": 0.3183, + "step": 24006 + }, + { + "epoch": 0.4805845407001476, + "grad_norm": 1.7502529621124268, + "learning_rate": 5.556117562237775e-06, + "loss": 0.777, + "step": 24007 + }, + { + "epoch": 0.48060455921727596, + "grad_norm": 1.0976866483688354, + "learning_rate": 5.555795389485712e-06, + "loss": 0.3023, + "step": 24008 + }, + { + "epoch": 0.4806245777344043, + "grad_norm": 1.0594298839569092, + "learning_rate": 5.5554732143972034e-06, + "loss": 0.3166, + "step": 24009 + }, + { + "epoch": 0.48064459625153266, + "grad_norm": 1.1738150119781494, + "learning_rate": 5.555151036973606e-06, + "loss": 0.3094, + "step": 24010 + }, + { + "epoch": 0.480664614768661, + "grad_norm": 1.181373119354248, + "learning_rate": 5.554828857216273e-06, + "loss": 0.258, + "step": 24011 + }, + { + "epoch": 0.48068463328578936, + "grad_norm": 1.150923252105713, + "learning_rate": 5.554506675126558e-06, + "loss": 0.3772, + "step": 24012 + }, + { + "epoch": 0.4807046518029177, + "grad_norm": 1.0620803833007812, + "learning_rate": 5.554184490705818e-06, + "loss": 0.2768, + "step": 24013 + }, + { + "epoch": 0.48072467032004607, + "grad_norm": 1.0851924419403076, + "learning_rate": 5.553862303955405e-06, + "loss": 0.3189, + "step": 24014 + }, + { + "epoch": 0.48074468883717436, + "grad_norm": 1.1110845804214478, + "learning_rate": 5.553540114876673e-06, + "loss": 0.2972, + "step": 24015 + }, + { + "epoch": 0.4807647073543027, + "grad_norm": 1.253718376159668, + "learning_rate": 5.5532179234709795e-06, + "loss": 0.3123, + "step": 24016 + }, + { + "epoch": 0.48078472587143106, + "grad_norm": 1.0538052320480347, + "learning_rate": 5.552895729739676e-06, + "loss": 0.3062, + "step": 24017 + }, + { + "epoch": 0.4808047443885594, + "grad_norm": 1.0760198831558228, + "learning_rate": 5.552573533684116e-06, + "loss": 0.3025, + "step": 24018 + }, + { + "epoch": 0.48082476290568776, + "grad_norm": 1.1790828704833984, + "learning_rate": 5.552251335305658e-06, + "loss": 0.2918, + "step": 24019 + }, + { + "epoch": 0.4808447814228161, + "grad_norm": 1.2509523630142212, + "learning_rate": 5.551929134605653e-06, + "loss": 0.366, + "step": 24020 + }, + { + "epoch": 0.48086479993994447, + "grad_norm": 1.0437953472137451, + "learning_rate": 5.551606931585458e-06, + "loss": 0.2771, + "step": 24021 + }, + { + "epoch": 0.4808848184570728, + "grad_norm": 0.9795624017715454, + "learning_rate": 5.551284726246425e-06, + "loss": 0.2796, + "step": 24022 + }, + { + "epoch": 0.4809048369742011, + "grad_norm": 1.1209050416946411, + "learning_rate": 5.55096251858991e-06, + "loss": 0.2963, + "step": 24023 + }, + { + "epoch": 0.48092485549132946, + "grad_norm": 1.8750027418136597, + "learning_rate": 5.550640308617267e-06, + "loss": 0.7435, + "step": 24024 + }, + { + "epoch": 0.4809448740084578, + "grad_norm": 1.116014838218689, + "learning_rate": 5.550318096329851e-06, + "loss": 0.3381, + "step": 24025 + }, + { + "epoch": 0.48096489252558616, + "grad_norm": 1.018378496170044, + "learning_rate": 5.549995881729016e-06, + "loss": 0.2996, + "step": 24026 + }, + { + "epoch": 0.4809849110427145, + "grad_norm": 1.2363712787628174, + "learning_rate": 5.549673664816118e-06, + "loss": 0.368, + "step": 24027 + }, + { + "epoch": 0.48100492955984286, + "grad_norm": 1.027189016342163, + "learning_rate": 5.549351445592509e-06, + "loss": 0.3151, + "step": 24028 + }, + { + "epoch": 0.4810249480769712, + "grad_norm": 2.357616662979126, + "learning_rate": 5.549029224059545e-06, + "loss": 0.8171, + "step": 24029 + }, + { + "epoch": 0.48104496659409957, + "grad_norm": 1.083752989768982, + "learning_rate": 5.5487070002185784e-06, + "loss": 0.2899, + "step": 24030 + }, + { + "epoch": 0.48106498511122786, + "grad_norm": 1.6533178091049194, + "learning_rate": 5.548384774070967e-06, + "loss": 0.7713, + "step": 24031 + }, + { + "epoch": 0.4810850036283562, + "grad_norm": 1.7478421926498413, + "learning_rate": 5.548062545618065e-06, + "loss": 0.7822, + "step": 24032 + }, + { + "epoch": 0.48110502214548456, + "grad_norm": 1.1764874458312988, + "learning_rate": 5.547740314861224e-06, + "loss": 0.26, + "step": 24033 + }, + { + "epoch": 0.4811250406626129, + "grad_norm": 1.1327407360076904, + "learning_rate": 5.547418081801803e-06, + "loss": 0.2937, + "step": 24034 + }, + { + "epoch": 0.48114505917974126, + "grad_norm": 1.1102449893951416, + "learning_rate": 5.547095846441153e-06, + "loss": 0.2954, + "step": 24035 + }, + { + "epoch": 0.4811650776968696, + "grad_norm": 1.0826334953308105, + "learning_rate": 5.546773608780629e-06, + "loss": 0.2976, + "step": 24036 + }, + { + "epoch": 0.48118509621399796, + "grad_norm": 1.0641298294067383, + "learning_rate": 5.546451368821588e-06, + "loss": 0.2507, + "step": 24037 + }, + { + "epoch": 0.4812051147311263, + "grad_norm": 1.0803887844085693, + "learning_rate": 5.546129126565382e-06, + "loss": 0.3379, + "step": 24038 + }, + { + "epoch": 0.4812251332482546, + "grad_norm": 1.2136645317077637, + "learning_rate": 5.545806882013367e-06, + "loss": 0.3281, + "step": 24039 + }, + { + "epoch": 0.48124515176538296, + "grad_norm": 1.102692723274231, + "learning_rate": 5.545484635166897e-06, + "loss": 0.3178, + "step": 24040 + }, + { + "epoch": 0.4812651702825113, + "grad_norm": 1.091778039932251, + "learning_rate": 5.545162386027327e-06, + "loss": 0.3045, + "step": 24041 + }, + { + "epoch": 0.48128518879963966, + "grad_norm": 1.130142092704773, + "learning_rate": 5.544840134596012e-06, + "loss": 0.3203, + "step": 24042 + }, + { + "epoch": 0.481305207316768, + "grad_norm": 1.1653776168823242, + "learning_rate": 5.544517880874306e-06, + "loss": 0.3126, + "step": 24043 + }, + { + "epoch": 0.48132522583389636, + "grad_norm": 1.013689398765564, + "learning_rate": 5.544195624863564e-06, + "loss": 0.2567, + "step": 24044 + }, + { + "epoch": 0.4813452443510247, + "grad_norm": 1.1714403629302979, + "learning_rate": 5.543873366565141e-06, + "loss": 0.2944, + "step": 24045 + }, + { + "epoch": 0.48136526286815307, + "grad_norm": 1.146239995956421, + "learning_rate": 5.543551105980391e-06, + "loss": 0.2648, + "step": 24046 + }, + { + "epoch": 0.48138528138528136, + "grad_norm": 1.0529054403305054, + "learning_rate": 5.543228843110671e-06, + "loss": 0.3216, + "step": 24047 + }, + { + "epoch": 0.4814052999024097, + "grad_norm": 1.130109429359436, + "learning_rate": 5.542906577957331e-06, + "loss": 0.2865, + "step": 24048 + }, + { + "epoch": 0.48142531841953806, + "grad_norm": 1.8487467765808105, + "learning_rate": 5.54258431052173e-06, + "loss": 0.8077, + "step": 24049 + }, + { + "epoch": 0.4814453369366664, + "grad_norm": 2.143886089324951, + "learning_rate": 5.5422620408052215e-06, + "loss": 0.8231, + "step": 24050 + }, + { + "epoch": 0.48146535545379476, + "grad_norm": 1.075559377670288, + "learning_rate": 5.541939768809159e-06, + "loss": 0.3366, + "step": 24051 + }, + { + "epoch": 0.4814853739709231, + "grad_norm": 1.2421969175338745, + "learning_rate": 5.5416174945349e-06, + "loss": 0.3472, + "step": 24052 + }, + { + "epoch": 0.48150539248805146, + "grad_norm": 1.1777867078781128, + "learning_rate": 5.541295217983796e-06, + "loss": 0.2719, + "step": 24053 + }, + { + "epoch": 0.4815254110051798, + "grad_norm": 1.1012145280838013, + "learning_rate": 5.540972939157205e-06, + "loss": 0.3323, + "step": 24054 + }, + { + "epoch": 0.4815454295223081, + "grad_norm": 1.9543311595916748, + "learning_rate": 5.540650658056479e-06, + "loss": 0.7426, + "step": 24055 + }, + { + "epoch": 0.48156544803943646, + "grad_norm": 1.0825518369674683, + "learning_rate": 5.540328374682974e-06, + "loss": 0.3084, + "step": 24056 + }, + { + "epoch": 0.4815854665565648, + "grad_norm": 1.1230714321136475, + "learning_rate": 5.540006089038044e-06, + "loss": 0.2689, + "step": 24057 + }, + { + "epoch": 0.48160548507369316, + "grad_norm": 1.1383737325668335, + "learning_rate": 5.539683801123047e-06, + "loss": 0.3126, + "step": 24058 + }, + { + "epoch": 0.4816255035908215, + "grad_norm": 1.129340648651123, + "learning_rate": 5.539361510939335e-06, + "loss": 0.3202, + "step": 24059 + }, + { + "epoch": 0.48164552210794986, + "grad_norm": 1.7303889989852905, + "learning_rate": 5.539039218488262e-06, + "loss": 0.7671, + "step": 24060 + }, + { + "epoch": 0.4816655406250782, + "grad_norm": 1.133746862411499, + "learning_rate": 5.538716923771183e-06, + "loss": 0.3087, + "step": 24061 + }, + { + "epoch": 0.48168555914220657, + "grad_norm": 1.185847520828247, + "learning_rate": 5.538394626789456e-06, + "loss": 0.3436, + "step": 24062 + }, + { + "epoch": 0.48170557765933486, + "grad_norm": 1.0893521308898926, + "learning_rate": 5.538072327544435e-06, + "loss": 0.2987, + "step": 24063 + }, + { + "epoch": 0.4817255961764632, + "grad_norm": 1.1649434566497803, + "learning_rate": 5.5377500260374705e-06, + "loss": 0.3076, + "step": 24064 + }, + { + "epoch": 0.48174561469359156, + "grad_norm": 1.0223393440246582, + "learning_rate": 5.537427722269923e-06, + "loss": 0.2839, + "step": 24065 + }, + { + "epoch": 0.4817656332107199, + "grad_norm": 1.0818156003952026, + "learning_rate": 5.537105416243143e-06, + "loss": 0.3036, + "step": 24066 + }, + { + "epoch": 0.48178565172784826, + "grad_norm": 1.1093517541885376, + "learning_rate": 5.53678310795849e-06, + "loss": 0.3112, + "step": 24067 + }, + { + "epoch": 0.4818056702449766, + "grad_norm": 1.0153475999832153, + "learning_rate": 5.536460797417315e-06, + "loss": 0.3105, + "step": 24068 + }, + { + "epoch": 0.48182568876210496, + "grad_norm": 1.1237220764160156, + "learning_rate": 5.536138484620974e-06, + "loss": 0.2923, + "step": 24069 + }, + { + "epoch": 0.4818457072792333, + "grad_norm": 1.0617892742156982, + "learning_rate": 5.535816169570823e-06, + "loss": 0.3369, + "step": 24070 + }, + { + "epoch": 0.4818657257963616, + "grad_norm": 1.2827575206756592, + "learning_rate": 5.5354938522682154e-06, + "loss": 0.3472, + "step": 24071 + }, + { + "epoch": 0.48188574431348996, + "grad_norm": 1.9423096179962158, + "learning_rate": 5.5351715327145075e-06, + "loss": 0.7955, + "step": 24072 + }, + { + "epoch": 0.4819057628306183, + "grad_norm": 1.3091704845428467, + "learning_rate": 5.534849210911053e-06, + "loss": 0.3389, + "step": 24073 + }, + { + "epoch": 0.48192578134774666, + "grad_norm": 1.162613034248352, + "learning_rate": 5.534526886859207e-06, + "loss": 0.3455, + "step": 24074 + }, + { + "epoch": 0.481945799864875, + "grad_norm": 1.1695188283920288, + "learning_rate": 5.534204560560326e-06, + "loss": 0.2973, + "step": 24075 + }, + { + "epoch": 0.48196581838200336, + "grad_norm": 1.0688724517822266, + "learning_rate": 5.533882232015764e-06, + "loss": 0.3121, + "step": 24076 + }, + { + "epoch": 0.4819858368991317, + "grad_norm": 1.0483001470565796, + "learning_rate": 5.533559901226875e-06, + "loss": 0.3196, + "step": 24077 + }, + { + "epoch": 0.48200585541626007, + "grad_norm": 1.492842674255371, + "learning_rate": 5.5332375681950155e-06, + "loss": 0.3277, + "step": 24078 + }, + { + "epoch": 0.48202587393338836, + "grad_norm": 1.0174468755722046, + "learning_rate": 5.5329152329215415e-06, + "loss": 0.3089, + "step": 24079 + }, + { + "epoch": 0.4820458924505167, + "grad_norm": 1.2388616800308228, + "learning_rate": 5.532592895407804e-06, + "loss": 0.3399, + "step": 24080 + }, + { + "epoch": 0.48206591096764506, + "grad_norm": 1.1200720071792603, + "learning_rate": 5.532270555655163e-06, + "loss": 0.3405, + "step": 24081 + }, + { + "epoch": 0.4820859294847734, + "grad_norm": 1.1558914184570312, + "learning_rate": 5.53194821366497e-06, + "loss": 0.3812, + "step": 24082 + }, + { + "epoch": 0.48210594800190176, + "grad_norm": 1.146041989326477, + "learning_rate": 5.53162586943858e-06, + "loss": 0.3266, + "step": 24083 + }, + { + "epoch": 0.4821259665190301, + "grad_norm": 1.1425437927246094, + "learning_rate": 5.53130352297735e-06, + "loss": 0.2951, + "step": 24084 + }, + { + "epoch": 0.48214598503615846, + "grad_norm": 1.1125239133834839, + "learning_rate": 5.530981174282636e-06, + "loss": 0.3009, + "step": 24085 + }, + { + "epoch": 0.4821660035532868, + "grad_norm": 1.0624229907989502, + "learning_rate": 5.53065882335579e-06, + "loss": 0.3132, + "step": 24086 + }, + { + "epoch": 0.4821860220704151, + "grad_norm": 1.1242538690567017, + "learning_rate": 5.530336470198167e-06, + "loss": 0.2978, + "step": 24087 + }, + { + "epoch": 0.48220604058754346, + "grad_norm": 1.3069840669631958, + "learning_rate": 5.530014114811126e-06, + "loss": 0.3228, + "step": 24088 + }, + { + "epoch": 0.4822260591046718, + "grad_norm": 0.9557837843894958, + "learning_rate": 5.52969175719602e-06, + "loss": 0.2783, + "step": 24089 + }, + { + "epoch": 0.48224607762180016, + "grad_norm": 1.4481608867645264, + "learning_rate": 5.529369397354203e-06, + "loss": 0.3374, + "step": 24090 + }, + { + "epoch": 0.4822660961389285, + "grad_norm": 1.107383131980896, + "learning_rate": 5.529047035287031e-06, + "loss": 0.3071, + "step": 24091 + }, + { + "epoch": 0.48228611465605686, + "grad_norm": 1.1711660623550415, + "learning_rate": 5.528724670995858e-06, + "loss": 0.3029, + "step": 24092 + }, + { + "epoch": 0.4823061331731852, + "grad_norm": 1.261709213256836, + "learning_rate": 5.5284023044820416e-06, + "loss": 0.2959, + "step": 24093 + }, + { + "epoch": 0.48232615169031356, + "grad_norm": 0.9810002446174622, + "learning_rate": 5.528079935746935e-06, + "loss": 0.2695, + "step": 24094 + }, + { + "epoch": 0.48234617020744186, + "grad_norm": 1.2352665662765503, + "learning_rate": 5.527757564791893e-06, + "loss": 0.3275, + "step": 24095 + }, + { + "epoch": 0.4823661887245702, + "grad_norm": 1.1281412839889526, + "learning_rate": 5.527435191618275e-06, + "loss": 0.3423, + "step": 24096 + }, + { + "epoch": 0.48238620724169856, + "grad_norm": 1.1780894994735718, + "learning_rate": 5.52711281622743e-06, + "loss": 0.3321, + "step": 24097 + }, + { + "epoch": 0.4824062257588269, + "grad_norm": 1.0377075672149658, + "learning_rate": 5.526790438620717e-06, + "loss": 0.3178, + "step": 24098 + }, + { + "epoch": 0.48242624427595526, + "grad_norm": 1.1044762134552002, + "learning_rate": 5.52646805879949e-06, + "loss": 0.2975, + "step": 24099 + }, + { + "epoch": 0.4824462627930836, + "grad_norm": 1.0062042474746704, + "learning_rate": 5.526145676765106e-06, + "loss": 0.3077, + "step": 24100 + }, + { + "epoch": 0.48246628131021196, + "grad_norm": 1.0778536796569824, + "learning_rate": 5.5258232925189185e-06, + "loss": 0.3139, + "step": 24101 + }, + { + "epoch": 0.4824862998273403, + "grad_norm": 1.543141484260559, + "learning_rate": 5.525500906062281e-06, + "loss": 0.3139, + "step": 24102 + }, + { + "epoch": 0.4825063183444686, + "grad_norm": 1.330357551574707, + "learning_rate": 5.525178517396554e-06, + "loss": 0.3521, + "step": 24103 + }, + { + "epoch": 0.48252633686159696, + "grad_norm": 1.0080376863479614, + "learning_rate": 5.524856126523086e-06, + "loss": 0.2679, + "step": 24104 + }, + { + "epoch": 0.4825463553787253, + "grad_norm": 1.0884833335876465, + "learning_rate": 5.5245337334432384e-06, + "loss": 0.3256, + "step": 24105 + }, + { + "epoch": 0.48256637389585366, + "grad_norm": 1.0936754941940308, + "learning_rate": 5.524211338158363e-06, + "loss": 0.3373, + "step": 24106 + }, + { + "epoch": 0.482586392412982, + "grad_norm": 1.156461477279663, + "learning_rate": 5.523888940669815e-06, + "loss": 0.3114, + "step": 24107 + }, + { + "epoch": 0.48260641093011036, + "grad_norm": 1.0301069021224976, + "learning_rate": 5.523566540978951e-06, + "loss": 0.3173, + "step": 24108 + }, + { + "epoch": 0.4826264294472387, + "grad_norm": 1.0880485773086548, + "learning_rate": 5.5232441390871275e-06, + "loss": 0.3146, + "step": 24109 + }, + { + "epoch": 0.48264644796436706, + "grad_norm": 1.1581107378005981, + "learning_rate": 5.522921734995698e-06, + "loss": 0.3613, + "step": 24110 + }, + { + "epoch": 0.48266646648149536, + "grad_norm": 1.19724440574646, + "learning_rate": 5.522599328706017e-06, + "loss": 0.3258, + "step": 24111 + }, + { + "epoch": 0.4826864849986237, + "grad_norm": 1.0284239053726196, + "learning_rate": 5.522276920219441e-06, + "loss": 0.3078, + "step": 24112 + }, + { + "epoch": 0.48270650351575206, + "grad_norm": 1.208120584487915, + "learning_rate": 5.521954509537324e-06, + "loss": 0.2888, + "step": 24113 + }, + { + "epoch": 0.4827265220328804, + "grad_norm": 1.1604915857315063, + "learning_rate": 5.521632096661025e-06, + "loss": 0.3385, + "step": 24114 + }, + { + "epoch": 0.48274654055000876, + "grad_norm": 1.1433898210525513, + "learning_rate": 5.5213096815918954e-06, + "loss": 0.3134, + "step": 24115 + }, + { + "epoch": 0.4827665590671371, + "grad_norm": 1.2062467336654663, + "learning_rate": 5.520987264331292e-06, + "loss": 0.3226, + "step": 24116 + }, + { + "epoch": 0.48278657758426546, + "grad_norm": 1.0511326789855957, + "learning_rate": 5.520664844880571e-06, + "loss": 0.2804, + "step": 24117 + }, + { + "epoch": 0.4828065961013938, + "grad_norm": 1.0338025093078613, + "learning_rate": 5.520342423241087e-06, + "loss": 0.2863, + "step": 24118 + }, + { + "epoch": 0.4828266146185221, + "grad_norm": 1.0959277153015137, + "learning_rate": 5.520019999414196e-06, + "loss": 0.3185, + "step": 24119 + }, + { + "epoch": 0.48284663313565046, + "grad_norm": 1.1092090606689453, + "learning_rate": 5.519697573401252e-06, + "loss": 0.3221, + "step": 24120 + }, + { + "epoch": 0.4828666516527788, + "grad_norm": 1.1150665283203125, + "learning_rate": 5.5193751452036115e-06, + "loss": 0.3146, + "step": 24121 + }, + { + "epoch": 0.48288667016990716, + "grad_norm": 1.2208607196807861, + "learning_rate": 5.5190527148226295e-06, + "loss": 0.3115, + "step": 24122 + }, + { + "epoch": 0.4829066886870355, + "grad_norm": 1.0535744428634644, + "learning_rate": 5.518730282259661e-06, + "loss": 0.3172, + "step": 24123 + }, + { + "epoch": 0.48292670720416386, + "grad_norm": 2.03550124168396, + "learning_rate": 5.518407847516063e-06, + "loss": 0.8093, + "step": 24124 + }, + { + "epoch": 0.4829467257212922, + "grad_norm": 1.1179388761520386, + "learning_rate": 5.518085410593189e-06, + "loss": 0.3133, + "step": 24125 + }, + { + "epoch": 0.48296674423842056, + "grad_norm": 1.111659049987793, + "learning_rate": 5.517762971492395e-06, + "loss": 0.3196, + "step": 24126 + }, + { + "epoch": 0.48298676275554886, + "grad_norm": 1.1094964742660522, + "learning_rate": 5.517440530215039e-06, + "loss": 0.3078, + "step": 24127 + }, + { + "epoch": 0.4830067812726772, + "grad_norm": 1.0541903972625732, + "learning_rate": 5.517118086762473e-06, + "loss": 0.3366, + "step": 24128 + }, + { + "epoch": 0.48302679978980556, + "grad_norm": 1.0731329917907715, + "learning_rate": 5.516795641136055e-06, + "loss": 0.2968, + "step": 24129 + }, + { + "epoch": 0.4830468183069339, + "grad_norm": 1.3061593770980835, + "learning_rate": 5.516473193337137e-06, + "loss": 0.3113, + "step": 24130 + }, + { + "epoch": 0.48306683682406226, + "grad_norm": 1.1275562047958374, + "learning_rate": 5.51615074336708e-06, + "loss": 0.3314, + "step": 24131 + }, + { + "epoch": 0.4830868553411906, + "grad_norm": 1.2508409023284912, + "learning_rate": 5.515828291227234e-06, + "loss": 0.3467, + "step": 24132 + }, + { + "epoch": 0.48310687385831896, + "grad_norm": 1.022578239440918, + "learning_rate": 5.5155058369189574e-06, + "loss": 0.2578, + "step": 24133 + }, + { + "epoch": 0.4831268923754473, + "grad_norm": 1.021889328956604, + "learning_rate": 5.515183380443606e-06, + "loss": 0.3481, + "step": 24134 + }, + { + "epoch": 0.4831469108925756, + "grad_norm": 1.8764442205429077, + "learning_rate": 5.514860921802533e-06, + "loss": 0.782, + "step": 24135 + }, + { + "epoch": 0.48316692940970396, + "grad_norm": 1.1741608381271362, + "learning_rate": 5.514538460997097e-06, + "loss": 0.3011, + "step": 24136 + }, + { + "epoch": 0.4831869479268323, + "grad_norm": 1.0679733753204346, + "learning_rate": 5.514215998028651e-06, + "loss": 0.3149, + "step": 24137 + }, + { + "epoch": 0.48320696644396066, + "grad_norm": 1.1634958982467651, + "learning_rate": 5.513893532898552e-06, + "loss": 0.3494, + "step": 24138 + }, + { + "epoch": 0.483226984961089, + "grad_norm": 1.7382965087890625, + "learning_rate": 5.513571065608154e-06, + "loss": 0.8606, + "step": 24139 + }, + { + "epoch": 0.48324700347821736, + "grad_norm": 1.095848798751831, + "learning_rate": 5.513248596158816e-06, + "loss": 0.3757, + "step": 24140 + }, + { + "epoch": 0.4832670219953457, + "grad_norm": 1.5841574668884277, + "learning_rate": 5.512926124551891e-06, + "loss": 0.3089, + "step": 24141 + }, + { + "epoch": 0.48328704051247406, + "grad_norm": 0.9673086404800415, + "learning_rate": 5.512603650788734e-06, + "loss": 0.2845, + "step": 24142 + }, + { + "epoch": 0.48330705902960236, + "grad_norm": 1.3366206884384155, + "learning_rate": 5.512281174870702e-06, + "loss": 0.3032, + "step": 24143 + }, + { + "epoch": 0.4833270775467307, + "grad_norm": 1.0718084573745728, + "learning_rate": 5.51195869679915e-06, + "loss": 0.2869, + "step": 24144 + }, + { + "epoch": 0.48334709606385906, + "grad_norm": 1.0206738710403442, + "learning_rate": 5.5116362165754335e-06, + "loss": 0.3422, + "step": 24145 + }, + { + "epoch": 0.4833671145809874, + "grad_norm": 1.0701850652694702, + "learning_rate": 5.511313734200908e-06, + "loss": 0.2968, + "step": 24146 + }, + { + "epoch": 0.48338713309811576, + "grad_norm": 1.0643976926803589, + "learning_rate": 5.510991249676931e-06, + "loss": 0.3027, + "step": 24147 + }, + { + "epoch": 0.4834071516152441, + "grad_norm": 0.992783784866333, + "learning_rate": 5.510668763004856e-06, + "loss": 0.2957, + "step": 24148 + }, + { + "epoch": 0.48342717013237246, + "grad_norm": 1.1245017051696777, + "learning_rate": 5.51034627418604e-06, + "loss": 0.2889, + "step": 24149 + }, + { + "epoch": 0.4834471886495008, + "grad_norm": 2.010936737060547, + "learning_rate": 5.510023783221837e-06, + "loss": 0.7812, + "step": 24150 + }, + { + "epoch": 0.4834672071666291, + "grad_norm": 1.0797799825668335, + "learning_rate": 5.509701290113603e-06, + "loss": 0.2941, + "step": 24151 + }, + { + "epoch": 0.48348722568375746, + "grad_norm": 1.0862678289413452, + "learning_rate": 5.509378794862697e-06, + "loss": 0.3395, + "step": 24152 + }, + { + "epoch": 0.4835072442008858, + "grad_norm": 1.0883713960647583, + "learning_rate": 5.5090562974704706e-06, + "loss": 0.277, + "step": 24153 + }, + { + "epoch": 0.48352726271801416, + "grad_norm": 1.8323465585708618, + "learning_rate": 5.508733797938283e-06, + "loss": 0.7741, + "step": 24154 + }, + { + "epoch": 0.4835472812351425, + "grad_norm": 1.022350788116455, + "learning_rate": 5.508411296267486e-06, + "loss": 0.2426, + "step": 24155 + }, + { + "epoch": 0.48356729975227086, + "grad_norm": 1.0184991359710693, + "learning_rate": 5.508088792459438e-06, + "loss": 0.3044, + "step": 24156 + }, + { + "epoch": 0.4835873182693992, + "grad_norm": 1.1808650493621826, + "learning_rate": 5.507766286515493e-06, + "loss": 0.3391, + "step": 24157 + }, + { + "epoch": 0.48360733678652756, + "grad_norm": 1.8567198514938354, + "learning_rate": 5.507443778437009e-06, + "loss": 0.8343, + "step": 24158 + }, + { + "epoch": 0.48362735530365586, + "grad_norm": 1.266815423965454, + "learning_rate": 5.507121268225339e-06, + "loss": 0.3107, + "step": 24159 + }, + { + "epoch": 0.4836473738207842, + "grad_norm": 2.0561389923095703, + "learning_rate": 5.506798755881844e-06, + "loss": 0.799, + "step": 24160 + }, + { + "epoch": 0.48366739233791256, + "grad_norm": 1.1173319816589355, + "learning_rate": 5.506476241407872e-06, + "loss": 0.2804, + "step": 24161 + }, + { + "epoch": 0.4836874108550409, + "grad_norm": 1.1560180187225342, + "learning_rate": 5.506153724804786e-06, + "loss": 0.2886, + "step": 24162 + }, + { + "epoch": 0.48370742937216926, + "grad_norm": 1.0778939723968506, + "learning_rate": 5.505831206073937e-06, + "loss": 0.2953, + "step": 24163 + }, + { + "epoch": 0.4837274478892976, + "grad_norm": 1.1208008527755737, + "learning_rate": 5.505508685216681e-06, + "loss": 0.3575, + "step": 24164 + }, + { + "epoch": 0.48374746640642596, + "grad_norm": 1.1470537185668945, + "learning_rate": 5.505186162234378e-06, + "loss": 0.3263, + "step": 24165 + }, + { + "epoch": 0.4837674849235543, + "grad_norm": 1.1194099187850952, + "learning_rate": 5.504863637128379e-06, + "loss": 0.3523, + "step": 24166 + }, + { + "epoch": 0.4837875034406826, + "grad_norm": 1.0806151628494263, + "learning_rate": 5.504541109900045e-06, + "loss": 0.2645, + "step": 24167 + }, + { + "epoch": 0.48380752195781096, + "grad_norm": 1.2081077098846436, + "learning_rate": 5.504218580550725e-06, + "loss": 0.3222, + "step": 24168 + }, + { + "epoch": 0.4838275404749393, + "grad_norm": 1.2366477251052856, + "learning_rate": 5.503896049081781e-06, + "loss": 0.3384, + "step": 24169 + }, + { + "epoch": 0.48384755899206766, + "grad_norm": 1.0238025188446045, + "learning_rate": 5.5035735154945655e-06, + "loss": 0.2997, + "step": 24170 + }, + { + "epoch": 0.483867577509196, + "grad_norm": 1.0916942358016968, + "learning_rate": 5.5032509797904355e-06, + "loss": 0.3275, + "step": 24171 + }, + { + "epoch": 0.48388759602632436, + "grad_norm": 1.0866456031799316, + "learning_rate": 5.502928441970746e-06, + "loss": 0.3255, + "step": 24172 + }, + { + "epoch": 0.4839076145434527, + "grad_norm": 1.145132064819336, + "learning_rate": 5.502605902036855e-06, + "loss": 0.3035, + "step": 24173 + }, + { + "epoch": 0.48392763306058106, + "grad_norm": 1.8225935697555542, + "learning_rate": 5.502283359990117e-06, + "loss": 0.7252, + "step": 24174 + }, + { + "epoch": 0.48394765157770936, + "grad_norm": 1.096683144569397, + "learning_rate": 5.501960815831887e-06, + "loss": 0.2828, + "step": 24175 + }, + { + "epoch": 0.4839676700948377, + "grad_norm": 1.0865817070007324, + "learning_rate": 5.501638269563523e-06, + "loss": 0.3051, + "step": 24176 + }, + { + "epoch": 0.48398768861196606, + "grad_norm": 1.1605150699615479, + "learning_rate": 5.501315721186377e-06, + "loss": 0.3573, + "step": 24177 + }, + { + "epoch": 0.4840077071290944, + "grad_norm": 1.1450222730636597, + "learning_rate": 5.500993170701809e-06, + "loss": 0.3229, + "step": 24178 + }, + { + "epoch": 0.48402772564622276, + "grad_norm": 1.1900192499160767, + "learning_rate": 5.5006706181111736e-06, + "loss": 0.3465, + "step": 24179 + }, + { + "epoch": 0.4840477441633511, + "grad_norm": 1.1115998029708862, + "learning_rate": 5.500348063415827e-06, + "loss": 0.3285, + "step": 24180 + }, + { + "epoch": 0.48406776268047946, + "grad_norm": 1.1954582929611206, + "learning_rate": 5.5000255066171235e-06, + "loss": 0.312, + "step": 24181 + }, + { + "epoch": 0.4840877811976078, + "grad_norm": 2.0683681964874268, + "learning_rate": 5.499702947716423e-06, + "loss": 0.794, + "step": 24182 + }, + { + "epoch": 0.4841077997147361, + "grad_norm": 1.1523607969284058, + "learning_rate": 5.499380386715076e-06, + "loss": 0.3657, + "step": 24183 + }, + { + "epoch": 0.48412781823186446, + "grad_norm": 1.1139580011367798, + "learning_rate": 5.499057823614443e-06, + "loss": 0.3667, + "step": 24184 + }, + { + "epoch": 0.4841478367489928, + "grad_norm": 1.035016655921936, + "learning_rate": 5.498735258415878e-06, + "loss": 0.3181, + "step": 24185 + }, + { + "epoch": 0.48416785526612116, + "grad_norm": 1.1464757919311523, + "learning_rate": 5.498412691120737e-06, + "loss": 0.3041, + "step": 24186 + }, + { + "epoch": 0.4841878737832495, + "grad_norm": 1.0229840278625488, + "learning_rate": 5.498090121730377e-06, + "loss": 0.3336, + "step": 24187 + }, + { + "epoch": 0.48420789230037786, + "grad_norm": 0.9653713703155518, + "learning_rate": 5.497767550246152e-06, + "loss": 0.3253, + "step": 24188 + }, + { + "epoch": 0.4842279108175062, + "grad_norm": 1.1018104553222656, + "learning_rate": 5.497444976669422e-06, + "loss": 0.3233, + "step": 24189 + }, + { + "epoch": 0.48424792933463456, + "grad_norm": 1.0731089115142822, + "learning_rate": 5.497122401001537e-06, + "loss": 0.2931, + "step": 24190 + }, + { + "epoch": 0.48426794785176286, + "grad_norm": 1.2072912454605103, + "learning_rate": 5.496799823243858e-06, + "loss": 0.2916, + "step": 24191 + }, + { + "epoch": 0.4842879663688912, + "grad_norm": 1.0692063570022583, + "learning_rate": 5.496477243397739e-06, + "loss": 0.291, + "step": 24192 + }, + { + "epoch": 0.48430798488601956, + "grad_norm": 1.2719610929489136, + "learning_rate": 5.496154661464538e-06, + "loss": 0.3464, + "step": 24193 + }, + { + "epoch": 0.4843280034031479, + "grad_norm": 1.2314342260360718, + "learning_rate": 5.495832077445609e-06, + "loss": 0.3186, + "step": 24194 + }, + { + "epoch": 0.48434802192027626, + "grad_norm": 1.143914818763733, + "learning_rate": 5.495509491342307e-06, + "loss": 0.3512, + "step": 24195 + }, + { + "epoch": 0.4843680404374046, + "grad_norm": 1.046197772026062, + "learning_rate": 5.49518690315599e-06, + "loss": 0.2998, + "step": 24196 + }, + { + "epoch": 0.48438805895453296, + "grad_norm": 1.0201051235198975, + "learning_rate": 5.494864312888016e-06, + "loss": 0.2865, + "step": 24197 + }, + { + "epoch": 0.4844080774716613, + "grad_norm": 0.9672667980194092, + "learning_rate": 5.494541720539736e-06, + "loss": 0.2987, + "step": 24198 + }, + { + "epoch": 0.4844280959887896, + "grad_norm": 1.128920316696167, + "learning_rate": 5.494219126112511e-06, + "loss": 0.2717, + "step": 24199 + }, + { + "epoch": 0.48444811450591796, + "grad_norm": 1.116091012954712, + "learning_rate": 5.493896529607697e-06, + "loss": 0.3448, + "step": 24200 + }, + { + "epoch": 0.4844681330230463, + "grad_norm": 1.936247706413269, + "learning_rate": 5.493573931026645e-06, + "loss": 0.8327, + "step": 24201 + }, + { + "epoch": 0.48448815154017466, + "grad_norm": 1.1002871990203857, + "learning_rate": 5.493251330370716e-06, + "loss": 0.3082, + "step": 24202 + }, + { + "epoch": 0.484508170057303, + "grad_norm": 1.1857022047042847, + "learning_rate": 5.4929287276412625e-06, + "loss": 0.2943, + "step": 24203 + }, + { + "epoch": 0.48452818857443136, + "grad_norm": 1.152801275253296, + "learning_rate": 5.492606122839644e-06, + "loss": 0.3212, + "step": 24204 + }, + { + "epoch": 0.4845482070915597, + "grad_norm": 1.052649736404419, + "learning_rate": 5.492283515967216e-06, + "loss": 0.2972, + "step": 24205 + }, + { + "epoch": 0.48456822560868806, + "grad_norm": 1.0581185817718506, + "learning_rate": 5.491960907025335e-06, + "loss": 0.3308, + "step": 24206 + }, + { + "epoch": 0.48458824412581636, + "grad_norm": 1.0491516590118408, + "learning_rate": 5.491638296015355e-06, + "loss": 0.31, + "step": 24207 + }, + { + "epoch": 0.4846082626429447, + "grad_norm": 1.1643061637878418, + "learning_rate": 5.491315682938633e-06, + "loss": 0.3386, + "step": 24208 + }, + { + "epoch": 0.48462828116007306, + "grad_norm": 1.1498162746429443, + "learning_rate": 5.490993067796525e-06, + "loss": 0.3711, + "step": 24209 + }, + { + "epoch": 0.4846482996772014, + "grad_norm": 1.1231532096862793, + "learning_rate": 5.490670450590387e-06, + "loss": 0.3022, + "step": 24210 + }, + { + "epoch": 0.48466831819432976, + "grad_norm": 1.0656551122665405, + "learning_rate": 5.49034783132158e-06, + "loss": 0.311, + "step": 24211 + }, + { + "epoch": 0.4846883367114581, + "grad_norm": 1.0219841003417969, + "learning_rate": 5.490025209991453e-06, + "loss": 0.3163, + "step": 24212 + }, + { + "epoch": 0.48470835522858646, + "grad_norm": 1.1361397504806519, + "learning_rate": 5.489702586601366e-06, + "loss": 0.3093, + "step": 24213 + }, + { + "epoch": 0.4847283737457148, + "grad_norm": 1.1150416135787964, + "learning_rate": 5.489379961152677e-06, + "loss": 0.3032, + "step": 24214 + }, + { + "epoch": 0.4847483922628431, + "grad_norm": 1.2340878248214722, + "learning_rate": 5.4890573336467366e-06, + "loss": 0.3353, + "step": 24215 + }, + { + "epoch": 0.48476841077997146, + "grad_norm": 1.1302335262298584, + "learning_rate": 5.488734704084906e-06, + "loss": 0.3158, + "step": 24216 + }, + { + "epoch": 0.4847884292970998, + "grad_norm": 1.0734761953353882, + "learning_rate": 5.48841207246854e-06, + "loss": 0.2744, + "step": 24217 + }, + { + "epoch": 0.48480844781422816, + "grad_norm": 1.8902783393859863, + "learning_rate": 5.488089438798994e-06, + "loss": 0.8201, + "step": 24218 + }, + { + "epoch": 0.4848284663313565, + "grad_norm": 1.0351390838623047, + "learning_rate": 5.487766803077626e-06, + "loss": 0.2525, + "step": 24219 + }, + { + "epoch": 0.48484848484848486, + "grad_norm": 1.1975295543670654, + "learning_rate": 5.487444165305791e-06, + "loss": 0.29, + "step": 24220 + }, + { + "epoch": 0.4848685033656132, + "grad_norm": 1.2533299922943115, + "learning_rate": 5.4871215254848455e-06, + "loss": 0.3165, + "step": 24221 + }, + { + "epoch": 0.48488852188274156, + "grad_norm": 1.1719342470169067, + "learning_rate": 5.486798883616145e-06, + "loss": 0.3001, + "step": 24222 + }, + { + "epoch": 0.48490854039986986, + "grad_norm": 1.278021216392517, + "learning_rate": 5.4864762397010475e-06, + "loss": 0.334, + "step": 24223 + }, + { + "epoch": 0.4849285589169982, + "grad_norm": 1.9903780221939087, + "learning_rate": 5.48615359374091e-06, + "loss": 0.8432, + "step": 24224 + }, + { + "epoch": 0.48494857743412656, + "grad_norm": 1.3360956907272339, + "learning_rate": 5.485830945737086e-06, + "loss": 0.3171, + "step": 24225 + }, + { + "epoch": 0.4849685959512549, + "grad_norm": 1.004043698310852, + "learning_rate": 5.485508295690934e-06, + "loss": 0.2882, + "step": 24226 + }, + { + "epoch": 0.48498861446838326, + "grad_norm": 1.024144172668457, + "learning_rate": 5.48518564360381e-06, + "loss": 0.2776, + "step": 24227 + }, + { + "epoch": 0.4850086329855116, + "grad_norm": 1.128257393836975, + "learning_rate": 5.484862989477068e-06, + "loss": 0.3051, + "step": 24228 + }, + { + "epoch": 0.48502865150263996, + "grad_norm": 1.1739649772644043, + "learning_rate": 5.4845403333120675e-06, + "loss": 0.3354, + "step": 24229 + }, + { + "epoch": 0.4850486700197683, + "grad_norm": 1.047700047492981, + "learning_rate": 5.484217675110163e-06, + "loss": 0.334, + "step": 24230 + }, + { + "epoch": 0.4850686885368966, + "grad_norm": 1.0415173768997192, + "learning_rate": 5.4838950148727125e-06, + "loss": 0.2883, + "step": 24231 + }, + { + "epoch": 0.48508870705402496, + "grad_norm": 1.9046595096588135, + "learning_rate": 5.483572352601071e-06, + "loss": 0.8077, + "step": 24232 + }, + { + "epoch": 0.4851087255711533, + "grad_norm": 1.0973669290542603, + "learning_rate": 5.4832496882965955e-06, + "loss": 0.3092, + "step": 24233 + }, + { + "epoch": 0.48512874408828166, + "grad_norm": 1.0496559143066406, + "learning_rate": 5.482927021960642e-06, + "loss": 0.3211, + "step": 24234 + }, + { + "epoch": 0.48514876260541, + "grad_norm": 1.113439917564392, + "learning_rate": 5.482604353594567e-06, + "loss": 0.37, + "step": 24235 + }, + { + "epoch": 0.48516878112253836, + "grad_norm": 1.138496994972229, + "learning_rate": 5.482281683199728e-06, + "loss": 0.3245, + "step": 24236 + }, + { + "epoch": 0.4851887996396667, + "grad_norm": 1.1801443099975586, + "learning_rate": 5.481959010777481e-06, + "loss": 0.2908, + "step": 24237 + }, + { + "epoch": 0.48520881815679506, + "grad_norm": 1.1511861085891724, + "learning_rate": 5.481636336329181e-06, + "loss": 0.3331, + "step": 24238 + }, + { + "epoch": 0.48522883667392336, + "grad_norm": 1.3076153993606567, + "learning_rate": 5.481313659856187e-06, + "loss": 0.2988, + "step": 24239 + }, + { + "epoch": 0.4852488551910517, + "grad_norm": 1.0447921752929688, + "learning_rate": 5.4809909813598525e-06, + "loss": 0.254, + "step": 24240 + }, + { + "epoch": 0.48526887370818006, + "grad_norm": 0.9842844009399414, + "learning_rate": 5.480668300841535e-06, + "loss": 0.2613, + "step": 24241 + }, + { + "epoch": 0.4852888922253084, + "grad_norm": 1.9092626571655273, + "learning_rate": 5.480345618302592e-06, + "loss": 0.8204, + "step": 24242 + }, + { + "epoch": 0.48530891074243676, + "grad_norm": 1.148617148399353, + "learning_rate": 5.48002293374438e-06, + "loss": 0.2999, + "step": 24243 + }, + { + "epoch": 0.4853289292595651, + "grad_norm": 1.0551666021347046, + "learning_rate": 5.479700247168255e-06, + "loss": 0.2849, + "step": 24244 + }, + { + "epoch": 0.48534894777669346, + "grad_norm": 1.021996021270752, + "learning_rate": 5.479377558575571e-06, + "loss": 0.3001, + "step": 24245 + }, + { + "epoch": 0.48536896629382176, + "grad_norm": 1.9628812074661255, + "learning_rate": 5.47905486796769e-06, + "loss": 0.7626, + "step": 24246 + }, + { + "epoch": 0.4853889848109501, + "grad_norm": 1.170533299446106, + "learning_rate": 5.478732175345965e-06, + "loss": 0.3717, + "step": 24247 + }, + { + "epoch": 0.48540900332807846, + "grad_norm": 0.9662389755249023, + "learning_rate": 5.47840948071175e-06, + "loss": 0.3134, + "step": 24248 + }, + { + "epoch": 0.4854290218452068, + "grad_norm": 1.2994787693023682, + "learning_rate": 5.478086784066408e-06, + "loss": 0.3518, + "step": 24249 + }, + { + "epoch": 0.48544904036233516, + "grad_norm": 1.216318130493164, + "learning_rate": 5.47776408541129e-06, + "loss": 0.3306, + "step": 24250 + }, + { + "epoch": 0.4854690588794635, + "grad_norm": 1.1003849506378174, + "learning_rate": 5.477441384747756e-06, + "loss": 0.322, + "step": 24251 + }, + { + "epoch": 0.48548907739659186, + "grad_norm": 1.2625011205673218, + "learning_rate": 5.477118682077161e-06, + "loss": 0.3289, + "step": 24252 + }, + { + "epoch": 0.4855090959137202, + "grad_norm": 0.978888213634491, + "learning_rate": 5.476795977400862e-06, + "loss": 0.3017, + "step": 24253 + }, + { + "epoch": 0.4855291144308485, + "grad_norm": 1.02094304561615, + "learning_rate": 5.4764732707202124e-06, + "loss": 0.3131, + "step": 24254 + }, + { + "epoch": 0.48554913294797686, + "grad_norm": 1.104483723640442, + "learning_rate": 5.476150562036574e-06, + "loss": 0.2885, + "step": 24255 + }, + { + "epoch": 0.4855691514651052, + "grad_norm": 1.9581542015075684, + "learning_rate": 5.475827851351302e-06, + "loss": 0.8439, + "step": 24256 + }, + { + "epoch": 0.48558916998223356, + "grad_norm": 1.0542430877685547, + "learning_rate": 5.47550513866575e-06, + "loss": 0.3337, + "step": 24257 + }, + { + "epoch": 0.4856091884993619, + "grad_norm": 1.2033987045288086, + "learning_rate": 5.475182423981279e-06, + "loss": 0.3232, + "step": 24258 + }, + { + "epoch": 0.48562920701649026, + "grad_norm": 1.168460488319397, + "learning_rate": 5.474859707299242e-06, + "loss": 0.3198, + "step": 24259 + }, + { + "epoch": 0.4856492255336186, + "grad_norm": 1.14523184299469, + "learning_rate": 5.474536988620997e-06, + "loss": 0.293, + "step": 24260 + }, + { + "epoch": 0.48566924405074696, + "grad_norm": 1.0909106731414795, + "learning_rate": 5.4742142679478996e-06, + "loss": 0.3082, + "step": 24261 + }, + { + "epoch": 0.48568926256787526, + "grad_norm": 1.0711123943328857, + "learning_rate": 5.473891545281309e-06, + "loss": 0.2684, + "step": 24262 + }, + { + "epoch": 0.4857092810850036, + "grad_norm": 1.2757787704467773, + "learning_rate": 5.473568820622579e-06, + "loss": 0.2903, + "step": 24263 + }, + { + "epoch": 0.48572929960213196, + "grad_norm": 1.0733388662338257, + "learning_rate": 5.4732460939730695e-06, + "loss": 0.3465, + "step": 24264 + }, + { + "epoch": 0.4857493181192603, + "grad_norm": 1.0396068096160889, + "learning_rate": 5.472923365334134e-06, + "loss": 0.3183, + "step": 24265 + }, + { + "epoch": 0.48576933663638866, + "grad_norm": 1.0787129402160645, + "learning_rate": 5.472600634707131e-06, + "loss": 0.3649, + "step": 24266 + }, + { + "epoch": 0.485789355153517, + "grad_norm": 1.0489026308059692, + "learning_rate": 5.472277902093416e-06, + "loss": 0.295, + "step": 24267 + }, + { + "epoch": 0.48580937367064536, + "grad_norm": 1.1755801439285278, + "learning_rate": 5.471955167494346e-06, + "loss": 0.3071, + "step": 24268 + }, + { + "epoch": 0.4858293921877737, + "grad_norm": 1.0138484239578247, + "learning_rate": 5.47163243091128e-06, + "loss": 0.2796, + "step": 24269 + }, + { + "epoch": 0.485849410704902, + "grad_norm": 1.093300461769104, + "learning_rate": 5.471309692345571e-06, + "loss": 0.3378, + "step": 24270 + }, + { + "epoch": 0.48586942922203036, + "grad_norm": 1.7520136833190918, + "learning_rate": 5.470986951798578e-06, + "loss": 0.7509, + "step": 24271 + }, + { + "epoch": 0.4858894477391587, + "grad_norm": 1.0569446086883545, + "learning_rate": 5.470664209271656e-06, + "loss": 0.3235, + "step": 24272 + }, + { + "epoch": 0.48590946625628706, + "grad_norm": 1.1190613508224487, + "learning_rate": 5.470341464766165e-06, + "loss": 0.3565, + "step": 24273 + }, + { + "epoch": 0.4859294847734154, + "grad_norm": 1.1146291494369507, + "learning_rate": 5.470018718283458e-06, + "loss": 0.3353, + "step": 24274 + }, + { + "epoch": 0.48594950329054376, + "grad_norm": 1.0843840837478638, + "learning_rate": 5.469695969824895e-06, + "loss": 0.3067, + "step": 24275 + }, + { + "epoch": 0.4859695218076721, + "grad_norm": 1.0914123058319092, + "learning_rate": 5.469373219391831e-06, + "loss": 0.3193, + "step": 24276 + }, + { + "epoch": 0.48598954032480046, + "grad_norm": 1.4006731510162354, + "learning_rate": 5.469050466985622e-06, + "loss": 0.3217, + "step": 24277 + }, + { + "epoch": 0.48600955884192876, + "grad_norm": 1.803464412689209, + "learning_rate": 5.468727712607628e-06, + "loss": 0.8266, + "step": 24278 + }, + { + "epoch": 0.4860295773590571, + "grad_norm": 1.0401678085327148, + "learning_rate": 5.468404956259201e-06, + "loss": 0.2966, + "step": 24279 + }, + { + "epoch": 0.48604959587618546, + "grad_norm": 1.2248616218566895, + "learning_rate": 5.468082197941701e-06, + "loss": 0.2794, + "step": 24280 + }, + { + "epoch": 0.4860696143933138, + "grad_norm": 1.0851584672927856, + "learning_rate": 5.467759437656483e-06, + "loss": 0.283, + "step": 24281 + }, + { + "epoch": 0.48608963291044216, + "grad_norm": 1.0699725151062012, + "learning_rate": 5.467436675404908e-06, + "loss": 0.3599, + "step": 24282 + }, + { + "epoch": 0.4861096514275705, + "grad_norm": 1.267682671546936, + "learning_rate": 5.4671139111883275e-06, + "loss": 0.3192, + "step": 24283 + }, + { + "epoch": 0.48612966994469886, + "grad_norm": 1.1865005493164062, + "learning_rate": 5.466791145008101e-06, + "loss": 0.3199, + "step": 24284 + }, + { + "epoch": 0.4861496884618272, + "grad_norm": 1.8528404235839844, + "learning_rate": 5.466468376865586e-06, + "loss": 0.7894, + "step": 24285 + }, + { + "epoch": 0.4861697069789555, + "grad_norm": 1.1890219449996948, + "learning_rate": 5.466145606762136e-06, + "loss": 0.3381, + "step": 24286 + }, + { + "epoch": 0.48618972549608386, + "grad_norm": 1.0677510499954224, + "learning_rate": 5.465822834699113e-06, + "loss": 0.3387, + "step": 24287 + }, + { + "epoch": 0.4862097440132122, + "grad_norm": 1.8730876445770264, + "learning_rate": 5.46550006067787e-06, + "loss": 0.7534, + "step": 24288 + }, + { + "epoch": 0.48622976253034056, + "grad_norm": 1.090639591217041, + "learning_rate": 5.465177284699765e-06, + "loss": 0.3508, + "step": 24289 + }, + { + "epoch": 0.4862497810474689, + "grad_norm": 1.0079727172851562, + "learning_rate": 5.464854506766154e-06, + "loss": 0.3261, + "step": 24290 + }, + { + "epoch": 0.48626979956459726, + "grad_norm": 1.1208949089050293, + "learning_rate": 5.464531726878396e-06, + "loss": 0.3384, + "step": 24291 + }, + { + "epoch": 0.4862898180817256, + "grad_norm": 1.2570546865463257, + "learning_rate": 5.464208945037845e-06, + "loss": 0.2775, + "step": 24292 + }, + { + "epoch": 0.48630983659885396, + "grad_norm": 1.1093590259552002, + "learning_rate": 5.46388616124586e-06, + "loss": 0.3409, + "step": 24293 + }, + { + "epoch": 0.48632985511598226, + "grad_norm": 1.8464497327804565, + "learning_rate": 5.463563375503798e-06, + "loss": 0.801, + "step": 24294 + }, + { + "epoch": 0.4863498736331106, + "grad_norm": 1.0023422241210938, + "learning_rate": 5.463240587813016e-06, + "loss": 0.3172, + "step": 24295 + }, + { + "epoch": 0.48636989215023896, + "grad_norm": 1.213850498199463, + "learning_rate": 5.4629177981748685e-06, + "loss": 0.3647, + "step": 24296 + }, + { + "epoch": 0.4863899106673673, + "grad_norm": 1.073050618171692, + "learning_rate": 5.4625950065907164e-06, + "loss": 0.314, + "step": 24297 + }, + { + "epoch": 0.48640992918449566, + "grad_norm": 1.1115700006484985, + "learning_rate": 5.462272213061912e-06, + "loss": 0.2949, + "step": 24298 + }, + { + "epoch": 0.486429947701624, + "grad_norm": 1.2421534061431885, + "learning_rate": 5.4619494175898155e-06, + "loss": 0.3294, + "step": 24299 + }, + { + "epoch": 0.48644996621875236, + "grad_norm": 1.0486394166946411, + "learning_rate": 5.461626620175784e-06, + "loss": 0.2919, + "step": 24300 + }, + { + "epoch": 0.4864699847358807, + "grad_norm": 1.204643964767456, + "learning_rate": 5.461303820821172e-06, + "loss": 0.3668, + "step": 24301 + }, + { + "epoch": 0.486490003253009, + "grad_norm": 1.181357741355896, + "learning_rate": 5.4609810195273404e-06, + "loss": 0.3072, + "step": 24302 + }, + { + "epoch": 0.48651002177013736, + "grad_norm": 1.084154725074768, + "learning_rate": 5.460658216295641e-06, + "loss": 0.2968, + "step": 24303 + }, + { + "epoch": 0.4865300402872657, + "grad_norm": 1.0997333526611328, + "learning_rate": 5.460335411127435e-06, + "loss": 0.3345, + "step": 24304 + }, + { + "epoch": 0.48655005880439406, + "grad_norm": 1.1360286474227905, + "learning_rate": 5.460012604024078e-06, + "loss": 0.2894, + "step": 24305 + }, + { + "epoch": 0.4865700773215224, + "grad_norm": 1.1814768314361572, + "learning_rate": 5.459689794986927e-06, + "loss": 0.3072, + "step": 24306 + }, + { + "epoch": 0.48659009583865076, + "grad_norm": 1.1260700225830078, + "learning_rate": 5.459366984017338e-06, + "loss": 0.2635, + "step": 24307 + }, + { + "epoch": 0.4866101143557791, + "grad_norm": 1.9496870040893555, + "learning_rate": 5.459044171116671e-06, + "loss": 0.867, + "step": 24308 + }, + { + "epoch": 0.48663013287290746, + "grad_norm": 1.1213146448135376, + "learning_rate": 5.458721356286281e-06, + "loss": 0.2658, + "step": 24309 + }, + { + "epoch": 0.48665015139003576, + "grad_norm": 1.093312382698059, + "learning_rate": 5.458398539527523e-06, + "loss": 0.3194, + "step": 24310 + }, + { + "epoch": 0.4866701699071641, + "grad_norm": 1.8775073289871216, + "learning_rate": 5.458075720841758e-06, + "loss": 0.7752, + "step": 24311 + }, + { + "epoch": 0.48669018842429246, + "grad_norm": 1.1055550575256348, + "learning_rate": 5.45775290023034e-06, + "loss": 0.2553, + "step": 24312 + }, + { + "epoch": 0.4867102069414208, + "grad_norm": 1.3350684642791748, + "learning_rate": 5.457430077694629e-06, + "loss": 0.3459, + "step": 24313 + }, + { + "epoch": 0.48673022545854916, + "grad_norm": 2.385429620742798, + "learning_rate": 5.457107253235978e-06, + "loss": 0.8224, + "step": 24314 + }, + { + "epoch": 0.4867502439756775, + "grad_norm": 1.0415984392166138, + "learning_rate": 5.456784426855749e-06, + "loss": 0.3147, + "step": 24315 + }, + { + "epoch": 0.48677026249280586, + "grad_norm": 1.1539263725280762, + "learning_rate": 5.4564615985552945e-06, + "loss": 0.3162, + "step": 24316 + }, + { + "epoch": 0.4867902810099342, + "grad_norm": 1.1164090633392334, + "learning_rate": 5.456138768335976e-06, + "loss": 0.309, + "step": 24317 + }, + { + "epoch": 0.4868102995270625, + "grad_norm": 1.165342092514038, + "learning_rate": 5.455815936199146e-06, + "loss": 0.3107, + "step": 24318 + }, + { + "epoch": 0.48683031804419086, + "grad_norm": 1.1220636367797852, + "learning_rate": 5.455493102146164e-06, + "loss": 0.2974, + "step": 24319 + }, + { + "epoch": 0.4868503365613192, + "grad_norm": 1.114274501800537, + "learning_rate": 5.455170266178389e-06, + "loss": 0.3334, + "step": 24320 + }, + { + "epoch": 0.48687035507844756, + "grad_norm": 1.05428147315979, + "learning_rate": 5.454847428297174e-06, + "loss": 0.2771, + "step": 24321 + }, + { + "epoch": 0.4868903735955759, + "grad_norm": 1.1433290243148804, + "learning_rate": 5.45452458850388e-06, + "loss": 0.2964, + "step": 24322 + }, + { + "epoch": 0.48691039211270426, + "grad_norm": 1.1099680662155151, + "learning_rate": 5.454201746799861e-06, + "loss": 0.3215, + "step": 24323 + }, + { + "epoch": 0.4869304106298326, + "grad_norm": 1.221053957939148, + "learning_rate": 5.453878903186476e-06, + "loss": 0.3058, + "step": 24324 + }, + { + "epoch": 0.48695042914696096, + "grad_norm": 1.1562557220458984, + "learning_rate": 5.453556057665082e-06, + "loss": 0.3046, + "step": 24325 + }, + { + "epoch": 0.48697044766408926, + "grad_norm": 1.1347852945327759, + "learning_rate": 5.453233210237037e-06, + "loss": 0.2957, + "step": 24326 + }, + { + "epoch": 0.4869904661812176, + "grad_norm": 1.9075382947921753, + "learning_rate": 5.452910360903695e-06, + "loss": 0.7955, + "step": 24327 + }, + { + "epoch": 0.48701048469834596, + "grad_norm": 1.1649236679077148, + "learning_rate": 5.452587509666417e-06, + "loss": 0.2841, + "step": 24328 + }, + { + "epoch": 0.4870305032154743, + "grad_norm": 1.0937458276748657, + "learning_rate": 5.452264656526558e-06, + "loss": 0.3067, + "step": 24329 + }, + { + "epoch": 0.48705052173260266, + "grad_norm": 1.285407543182373, + "learning_rate": 5.451941801485477e-06, + "loss": 0.3118, + "step": 24330 + }, + { + "epoch": 0.487070540249731, + "grad_norm": 1.0180217027664185, + "learning_rate": 5.451618944544527e-06, + "loss": 0.3316, + "step": 24331 + }, + { + "epoch": 0.48709055876685936, + "grad_norm": 1.0492275953292847, + "learning_rate": 5.451296085705069e-06, + "loss": 0.3376, + "step": 24332 + }, + { + "epoch": 0.4871105772839877, + "grad_norm": 1.1244827508926392, + "learning_rate": 5.450973224968461e-06, + "loss": 0.346, + "step": 24333 + }, + { + "epoch": 0.487130595801116, + "grad_norm": 1.0503807067871094, + "learning_rate": 5.450650362336057e-06, + "loss": 0.3131, + "step": 24334 + }, + { + "epoch": 0.48715061431824436, + "grad_norm": 1.9316596984863281, + "learning_rate": 5.4503274978092166e-06, + "loss": 0.7632, + "step": 24335 + }, + { + "epoch": 0.4871706328353727, + "grad_norm": 1.140753984451294, + "learning_rate": 5.450004631389296e-06, + "loss": 0.3554, + "step": 24336 + }, + { + "epoch": 0.48719065135250106, + "grad_norm": 1.973806619644165, + "learning_rate": 5.449681763077653e-06, + "loss": 0.8406, + "step": 24337 + }, + { + "epoch": 0.4872106698696294, + "grad_norm": 1.0229346752166748, + "learning_rate": 5.449358892875644e-06, + "loss": 0.317, + "step": 24338 + }, + { + "epoch": 0.48723068838675776, + "grad_norm": 2.0840606689453125, + "learning_rate": 5.449036020784628e-06, + "loss": 0.806, + "step": 24339 + }, + { + "epoch": 0.4872507069038861, + "grad_norm": 1.0614007711410522, + "learning_rate": 5.448713146805962e-06, + "loss": 0.259, + "step": 24340 + }, + { + "epoch": 0.48727072542101446, + "grad_norm": 1.172037124633789, + "learning_rate": 5.448390270941001e-06, + "loss": 0.2871, + "step": 24341 + }, + { + "epoch": 0.48729074393814276, + "grad_norm": 1.7201224565505981, + "learning_rate": 5.448067393191104e-06, + "loss": 0.7994, + "step": 24342 + }, + { + "epoch": 0.4873107624552711, + "grad_norm": 1.0892874002456665, + "learning_rate": 5.447744513557629e-06, + "loss": 0.3026, + "step": 24343 + }, + { + "epoch": 0.48733078097239946, + "grad_norm": 1.3310236930847168, + "learning_rate": 5.44742163204193e-06, + "loss": 0.314, + "step": 24344 + }, + { + "epoch": 0.4873507994895278, + "grad_norm": 1.0953086614608765, + "learning_rate": 5.447098748645369e-06, + "loss": 0.2588, + "step": 24345 + }, + { + "epoch": 0.48737081800665616, + "grad_norm": 1.103294849395752, + "learning_rate": 5.4467758633693e-06, + "loss": 0.3184, + "step": 24346 + }, + { + "epoch": 0.4873908365237845, + "grad_norm": 1.8505003452301025, + "learning_rate": 5.446452976215082e-06, + "loss": 0.7961, + "step": 24347 + }, + { + "epoch": 0.48741085504091286, + "grad_norm": 1.0172491073608398, + "learning_rate": 5.446130087184073e-06, + "loss": 0.3312, + "step": 24348 + }, + { + "epoch": 0.4874308735580412, + "grad_norm": 1.1107330322265625, + "learning_rate": 5.445807196277627e-06, + "loss": 0.3338, + "step": 24349 + }, + { + "epoch": 0.4874508920751695, + "grad_norm": 1.2077946662902832, + "learning_rate": 5.445484303497104e-06, + "loss": 0.326, + "step": 24350 + }, + { + "epoch": 0.48747091059229786, + "grad_norm": 1.1894145011901855, + "learning_rate": 5.4451614088438606e-06, + "loss": 0.2858, + "step": 24351 + }, + { + "epoch": 0.4874909291094262, + "grad_norm": 1.1279363632202148, + "learning_rate": 5.444838512319255e-06, + "loss": 0.3164, + "step": 24352 + }, + { + "epoch": 0.48751094762655456, + "grad_norm": 1.1501104831695557, + "learning_rate": 5.444515613924645e-06, + "loss": 0.2917, + "step": 24353 + }, + { + "epoch": 0.4875309661436829, + "grad_norm": 1.1145963668823242, + "learning_rate": 5.444192713661385e-06, + "loss": 0.3176, + "step": 24354 + }, + { + "epoch": 0.48755098466081126, + "grad_norm": 1.3077056407928467, + "learning_rate": 5.443869811530837e-06, + "loss": 0.334, + "step": 24355 + }, + { + "epoch": 0.4875710031779396, + "grad_norm": 1.0925270318984985, + "learning_rate": 5.443546907534353e-06, + "loss": 0.3337, + "step": 24356 + }, + { + "epoch": 0.48759102169506796, + "grad_norm": 1.0740718841552734, + "learning_rate": 5.443224001673294e-06, + "loss": 0.3218, + "step": 24357 + }, + { + "epoch": 0.48761104021219626, + "grad_norm": 1.077589511871338, + "learning_rate": 5.442901093949017e-06, + "loss": 0.3106, + "step": 24358 + }, + { + "epoch": 0.4876310587293246, + "grad_norm": 1.153643250465393, + "learning_rate": 5.44257818436288e-06, + "loss": 0.3313, + "step": 24359 + }, + { + "epoch": 0.48765107724645296, + "grad_norm": 1.1409646272659302, + "learning_rate": 5.442255272916239e-06, + "loss": 0.3379, + "step": 24360 + }, + { + "epoch": 0.4876710957635813, + "grad_norm": 1.121579647064209, + "learning_rate": 5.441932359610451e-06, + "loss": 0.3216, + "step": 24361 + }, + { + "epoch": 0.48769111428070966, + "grad_norm": 1.2515217065811157, + "learning_rate": 5.441609444446876e-06, + "loss": 0.3396, + "step": 24362 + }, + { + "epoch": 0.487711132797838, + "grad_norm": 1.9172593355178833, + "learning_rate": 5.441286527426868e-06, + "loss": 0.766, + "step": 24363 + }, + { + "epoch": 0.48773115131496636, + "grad_norm": 1.0348753929138184, + "learning_rate": 5.440963608551788e-06, + "loss": 0.31, + "step": 24364 + }, + { + "epoch": 0.4877511698320947, + "grad_norm": 1.0630741119384766, + "learning_rate": 5.4406406878229916e-06, + "loss": 0.3196, + "step": 24365 + }, + { + "epoch": 0.487771188349223, + "grad_norm": 1.090064287185669, + "learning_rate": 5.4403177652418374e-06, + "loss": 0.3052, + "step": 24366 + }, + { + "epoch": 0.48779120686635136, + "grad_norm": 1.07520592212677, + "learning_rate": 5.43999484080968e-06, + "loss": 0.3248, + "step": 24367 + }, + { + "epoch": 0.4878112253834797, + "grad_norm": 1.279603123664856, + "learning_rate": 5.439671914527881e-06, + "loss": 0.3365, + "step": 24368 + }, + { + "epoch": 0.48783124390060806, + "grad_norm": 0.9853501915931702, + "learning_rate": 5.439348986397795e-06, + "loss": 0.2865, + "step": 24369 + }, + { + "epoch": 0.4878512624177364, + "grad_norm": 1.0077934265136719, + "learning_rate": 5.43902605642078e-06, + "loss": 0.2974, + "step": 24370 + }, + { + "epoch": 0.48787128093486476, + "grad_norm": 1.0203912258148193, + "learning_rate": 5.438703124598195e-06, + "loss": 0.319, + "step": 24371 + }, + { + "epoch": 0.4878912994519931, + "grad_norm": 1.206893801689148, + "learning_rate": 5.438380190931395e-06, + "loss": 0.3536, + "step": 24372 + }, + { + "epoch": 0.48791131796912146, + "grad_norm": 1.1761431694030762, + "learning_rate": 5.43805725542174e-06, + "loss": 0.3374, + "step": 24373 + }, + { + "epoch": 0.48793133648624976, + "grad_norm": 1.3107084035873413, + "learning_rate": 5.4377343180705865e-06, + "loss": 0.3299, + "step": 24374 + }, + { + "epoch": 0.4879513550033781, + "grad_norm": 1.0540449619293213, + "learning_rate": 5.437411378879292e-06, + "loss": 0.3243, + "step": 24375 + }, + { + "epoch": 0.48797137352050646, + "grad_norm": 1.9287588596343994, + "learning_rate": 5.4370884378492135e-06, + "loss": 0.7726, + "step": 24376 + }, + { + "epoch": 0.4879913920376348, + "grad_norm": 1.129685640335083, + "learning_rate": 5.436765494981711e-06, + "loss": 0.2802, + "step": 24377 + }, + { + "epoch": 0.48801141055476316, + "grad_norm": 1.130061149597168, + "learning_rate": 5.436442550278139e-06, + "loss": 0.3234, + "step": 24378 + }, + { + "epoch": 0.4880314290718915, + "grad_norm": 1.0002564191818237, + "learning_rate": 5.4361196037398574e-06, + "loss": 0.2508, + "step": 24379 + }, + { + "epoch": 0.48805144758901986, + "grad_norm": 1.2107927799224854, + "learning_rate": 5.435796655368222e-06, + "loss": 0.2826, + "step": 24380 + }, + { + "epoch": 0.4880714661061482, + "grad_norm": 1.2116725444793701, + "learning_rate": 5.435473705164593e-06, + "loss": 0.3193, + "step": 24381 + }, + { + "epoch": 0.4880914846232765, + "grad_norm": 1.109513521194458, + "learning_rate": 5.435150753130322e-06, + "loss": 0.289, + "step": 24382 + }, + { + "epoch": 0.48811150314040486, + "grad_norm": 2.097649574279785, + "learning_rate": 5.4348277992667745e-06, + "loss": 0.7437, + "step": 24383 + }, + { + "epoch": 0.4881315216575332, + "grad_norm": 1.2549023628234863, + "learning_rate": 5.434504843575305e-06, + "loss": 0.3032, + "step": 24384 + }, + { + "epoch": 0.48815154017466156, + "grad_norm": 1.1129682064056396, + "learning_rate": 5.434181886057268e-06, + "loss": 0.3317, + "step": 24385 + }, + { + "epoch": 0.4881715586917899, + "grad_norm": 1.002368450164795, + "learning_rate": 5.433858926714026e-06, + "loss": 0.3299, + "step": 24386 + }, + { + "epoch": 0.48819157720891826, + "grad_norm": 2.046938896179199, + "learning_rate": 5.433535965546933e-06, + "loss": 0.85, + "step": 24387 + }, + { + "epoch": 0.4882115957260466, + "grad_norm": 1.0315868854522705, + "learning_rate": 5.43321300255735e-06, + "loss": 0.2861, + "step": 24388 + }, + { + "epoch": 0.48823161424317496, + "grad_norm": 1.3745813369750977, + "learning_rate": 5.4328900377466285e-06, + "loss": 0.3805, + "step": 24389 + }, + { + "epoch": 0.48825163276030326, + "grad_norm": 1.0974500179290771, + "learning_rate": 5.432567071116135e-06, + "loss": 0.3676, + "step": 24390 + }, + { + "epoch": 0.4882716512774316, + "grad_norm": 1.2469162940979004, + "learning_rate": 5.43224410266722e-06, + "loss": 0.3283, + "step": 24391 + }, + { + "epoch": 0.48829166979455996, + "grad_norm": 1.3355364799499512, + "learning_rate": 5.431921132401246e-06, + "loss": 0.3759, + "step": 24392 + }, + { + "epoch": 0.4883116883116883, + "grad_norm": 1.0752837657928467, + "learning_rate": 5.431598160319568e-06, + "loss": 0.3406, + "step": 24393 + }, + { + "epoch": 0.48833170682881666, + "grad_norm": 1.0741310119628906, + "learning_rate": 5.431275186423542e-06, + "loss": 0.2902, + "step": 24394 + }, + { + "epoch": 0.488351725345945, + "grad_norm": 1.1183549165725708, + "learning_rate": 5.430952210714529e-06, + "loss": 0.2929, + "step": 24395 + }, + { + "epoch": 0.48837174386307336, + "grad_norm": 1.1236282587051392, + "learning_rate": 5.430629233193886e-06, + "loss": 0.2765, + "step": 24396 + }, + { + "epoch": 0.4883917623802017, + "grad_norm": 1.9624730348587036, + "learning_rate": 5.430306253862972e-06, + "loss": 0.7777, + "step": 24397 + }, + { + "epoch": 0.48841178089733, + "grad_norm": 1.0259147882461548, + "learning_rate": 5.429983272723141e-06, + "loss": 0.2829, + "step": 24398 + }, + { + "epoch": 0.48843179941445836, + "grad_norm": 1.1281687021255493, + "learning_rate": 5.4296602897757536e-06, + "loss": 0.3657, + "step": 24399 + }, + { + "epoch": 0.4884518179315867, + "grad_norm": 1.8122282028198242, + "learning_rate": 5.429337305022167e-06, + "loss": 0.7306, + "step": 24400 + }, + { + "epoch": 0.48847183644871506, + "grad_norm": 1.027590036392212, + "learning_rate": 5.4290143184637375e-06, + "loss": 0.3368, + "step": 24401 + }, + { + "epoch": 0.4884918549658434, + "grad_norm": 1.8685705661773682, + "learning_rate": 5.428691330101825e-06, + "loss": 0.8332, + "step": 24402 + }, + { + "epoch": 0.48851187348297176, + "grad_norm": 1.1576224565505981, + "learning_rate": 5.428368339937787e-06, + "loss": 0.3175, + "step": 24403 + }, + { + "epoch": 0.4885318920001001, + "grad_norm": 1.2654898166656494, + "learning_rate": 5.42804534797298e-06, + "loss": 0.3004, + "step": 24404 + }, + { + "epoch": 0.48855191051722846, + "grad_norm": 1.27582848072052, + "learning_rate": 5.427722354208762e-06, + "loss": 0.306, + "step": 24405 + }, + { + "epoch": 0.48857192903435676, + "grad_norm": 0.9958682060241699, + "learning_rate": 5.427399358646492e-06, + "loss": 0.3138, + "step": 24406 + }, + { + "epoch": 0.4885919475514851, + "grad_norm": 1.0924968719482422, + "learning_rate": 5.427076361287526e-06, + "loss": 0.3185, + "step": 24407 + }, + { + "epoch": 0.48861196606861346, + "grad_norm": 1.1473828554153442, + "learning_rate": 5.426753362133225e-06, + "loss": 0.3009, + "step": 24408 + }, + { + "epoch": 0.4886319845857418, + "grad_norm": 1.0858845710754395, + "learning_rate": 5.426430361184943e-06, + "loss": 0.2687, + "step": 24409 + }, + { + "epoch": 0.48865200310287016, + "grad_norm": 1.0761562585830688, + "learning_rate": 5.42610735844404e-06, + "loss": 0.3175, + "step": 24410 + }, + { + "epoch": 0.4886720216199985, + "grad_norm": 1.216368317604065, + "learning_rate": 5.4257843539118726e-06, + "loss": 0.3202, + "step": 24411 + }, + { + "epoch": 0.48869204013712686, + "grad_norm": 1.029093623161316, + "learning_rate": 5.425461347589801e-06, + "loss": 0.3062, + "step": 24412 + }, + { + "epoch": 0.4887120586542552, + "grad_norm": 1.1486769914627075, + "learning_rate": 5.425138339479181e-06, + "loss": 0.3041, + "step": 24413 + }, + { + "epoch": 0.4887320771713835, + "grad_norm": 1.072860836982727, + "learning_rate": 5.424815329581371e-06, + "loss": 0.2738, + "step": 24414 + }, + { + "epoch": 0.48875209568851186, + "grad_norm": 1.0343233346939087, + "learning_rate": 5.424492317897727e-06, + "loss": 0.3082, + "step": 24415 + }, + { + "epoch": 0.4887721142056402, + "grad_norm": 1.1151189804077148, + "learning_rate": 5.424169304429611e-06, + "loss": 0.3012, + "step": 24416 + }, + { + "epoch": 0.48879213272276856, + "grad_norm": 1.0582239627838135, + "learning_rate": 5.423846289178377e-06, + "loss": 0.3229, + "step": 24417 + }, + { + "epoch": 0.4888121512398969, + "grad_norm": 1.0416539907455444, + "learning_rate": 5.423523272145384e-06, + "loss": 0.2772, + "step": 24418 + }, + { + "epoch": 0.48883216975702526, + "grad_norm": 1.14424467086792, + "learning_rate": 5.423200253331993e-06, + "loss": 0.289, + "step": 24419 + }, + { + "epoch": 0.4888521882741536, + "grad_norm": 1.1011838912963867, + "learning_rate": 5.422877232739557e-06, + "loss": 0.3105, + "step": 24420 + }, + { + "epoch": 0.48887220679128196, + "grad_norm": 1.017024278640747, + "learning_rate": 5.422554210369436e-06, + "loss": 0.3067, + "step": 24421 + }, + { + "epoch": 0.48889222530841026, + "grad_norm": 1.105597734451294, + "learning_rate": 5.422231186222988e-06, + "loss": 0.2625, + "step": 24422 + }, + { + "epoch": 0.4889122438255386, + "grad_norm": 1.0593020915985107, + "learning_rate": 5.421908160301572e-06, + "loss": 0.2895, + "step": 24423 + }, + { + "epoch": 0.48893226234266696, + "grad_norm": 1.8898628950119019, + "learning_rate": 5.421585132606546e-06, + "loss": 0.7201, + "step": 24424 + }, + { + "epoch": 0.4889522808597953, + "grad_norm": 1.1426639556884766, + "learning_rate": 5.4212621031392644e-06, + "loss": 0.3011, + "step": 24425 + }, + { + "epoch": 0.48897229937692366, + "grad_norm": 1.1488425731658936, + "learning_rate": 5.420939071901089e-06, + "loss": 0.3029, + "step": 24426 + }, + { + "epoch": 0.488992317894052, + "grad_norm": 1.9174537658691406, + "learning_rate": 5.420616038893376e-06, + "loss": 0.7663, + "step": 24427 + }, + { + "epoch": 0.48901233641118036, + "grad_norm": 1.0819830894470215, + "learning_rate": 5.420293004117483e-06, + "loss": 0.3032, + "step": 24428 + }, + { + "epoch": 0.4890323549283087, + "grad_norm": 1.3659518957138062, + "learning_rate": 5.419969967574768e-06, + "loss": 0.3385, + "step": 24429 + }, + { + "epoch": 0.489052373445437, + "grad_norm": 1.0081288814544678, + "learning_rate": 5.419646929266591e-06, + "loss": 0.3186, + "step": 24430 + }, + { + "epoch": 0.48907239196256536, + "grad_norm": 1.1390483379364014, + "learning_rate": 5.419323889194309e-06, + "loss": 0.2966, + "step": 24431 + }, + { + "epoch": 0.4890924104796937, + "grad_norm": 1.054437279701233, + "learning_rate": 5.4190008473592795e-06, + "loss": 0.3216, + "step": 24432 + }, + { + "epoch": 0.48911242899682206, + "grad_norm": 1.0212959051132202, + "learning_rate": 5.418677803762859e-06, + "loss": 0.3198, + "step": 24433 + }, + { + "epoch": 0.4891324475139504, + "grad_norm": 1.1429771184921265, + "learning_rate": 5.418354758406408e-06, + "loss": 0.2985, + "step": 24434 + }, + { + "epoch": 0.48915246603107876, + "grad_norm": 1.0597285032272339, + "learning_rate": 5.418031711291285e-06, + "loss": 0.29, + "step": 24435 + }, + { + "epoch": 0.4891724845482071, + "grad_norm": 1.9455832242965698, + "learning_rate": 5.417708662418844e-06, + "loss": 0.8453, + "step": 24436 + }, + { + "epoch": 0.48919250306533546, + "grad_norm": 1.054734230041504, + "learning_rate": 5.417385611790449e-06, + "loss": 0.2798, + "step": 24437 + }, + { + "epoch": 0.48921252158246376, + "grad_norm": 1.038378119468689, + "learning_rate": 5.417062559407452e-06, + "loss": 0.2873, + "step": 24438 + }, + { + "epoch": 0.4892325400995921, + "grad_norm": 1.1380422115325928, + "learning_rate": 5.4167395052712155e-06, + "loss": 0.3437, + "step": 24439 + }, + { + "epoch": 0.48925255861672046, + "grad_norm": 1.8695741891860962, + "learning_rate": 5.416416449383094e-06, + "loss": 0.8045, + "step": 24440 + }, + { + "epoch": 0.4892725771338488, + "grad_norm": 1.0137587785720825, + "learning_rate": 5.416093391744448e-06, + "loss": 0.3508, + "step": 24441 + }, + { + "epoch": 0.48929259565097716, + "grad_norm": 1.0464190244674683, + "learning_rate": 5.415770332356635e-06, + "loss": 0.2909, + "step": 24442 + }, + { + "epoch": 0.4893126141681055, + "grad_norm": 2.0234878063201904, + "learning_rate": 5.415447271221014e-06, + "loss": 0.7836, + "step": 24443 + }, + { + "epoch": 0.48933263268523386, + "grad_norm": 1.2109951972961426, + "learning_rate": 5.4151242083389425e-06, + "loss": 0.3238, + "step": 24444 + }, + { + "epoch": 0.4893526512023622, + "grad_norm": 1.0182162523269653, + "learning_rate": 5.414801143711775e-06, + "loss": 0.3346, + "step": 24445 + }, + { + "epoch": 0.4893726697194905, + "grad_norm": 2.0586376190185547, + "learning_rate": 5.414478077340874e-06, + "loss": 0.8283, + "step": 24446 + }, + { + "epoch": 0.48939268823661886, + "grad_norm": 1.1453056335449219, + "learning_rate": 5.414155009227597e-06, + "loss": 0.3257, + "step": 24447 + }, + { + "epoch": 0.4894127067537472, + "grad_norm": 1.135957956314087, + "learning_rate": 5.413831939373303e-06, + "loss": 0.3729, + "step": 24448 + }, + { + "epoch": 0.48943272527087556, + "grad_norm": 1.0321524143218994, + "learning_rate": 5.4135088677793466e-06, + "loss": 0.3659, + "step": 24449 + }, + { + "epoch": 0.4894527437880039, + "grad_norm": 1.1989840269088745, + "learning_rate": 5.413185794447089e-06, + "loss": 0.3223, + "step": 24450 + }, + { + "epoch": 0.48947276230513226, + "grad_norm": 1.1357247829437256, + "learning_rate": 5.412862719377886e-06, + "loss": 0.3205, + "step": 24451 + }, + { + "epoch": 0.4894927808222606, + "grad_norm": 1.167484998703003, + "learning_rate": 5.412539642573098e-06, + "loss": 0.2747, + "step": 24452 + }, + { + "epoch": 0.48951279933938896, + "grad_norm": 1.1802698373794556, + "learning_rate": 5.412216564034082e-06, + "loss": 0.3314, + "step": 24453 + }, + { + "epoch": 0.48953281785651725, + "grad_norm": 1.1990684270858765, + "learning_rate": 5.411893483762195e-06, + "loss": 0.2786, + "step": 24454 + }, + { + "epoch": 0.4895528363736456, + "grad_norm": 0.995697557926178, + "learning_rate": 5.411570401758799e-06, + "loss": 0.3162, + "step": 24455 + }, + { + "epoch": 0.48957285489077396, + "grad_norm": 1.890016794204712, + "learning_rate": 5.4112473180252465e-06, + "loss": 0.8263, + "step": 24456 + }, + { + "epoch": 0.4895928734079023, + "grad_norm": 1.7761733531951904, + "learning_rate": 5.410924232562902e-06, + "loss": 0.8208, + "step": 24457 + }, + { + "epoch": 0.48961289192503066, + "grad_norm": 0.9637302756309509, + "learning_rate": 5.410601145373118e-06, + "loss": 0.2953, + "step": 24458 + }, + { + "epoch": 0.489632910442159, + "grad_norm": 0.9875587224960327, + "learning_rate": 5.410278056457255e-06, + "loss": 0.3104, + "step": 24459 + }, + { + "epoch": 0.48965292895928736, + "grad_norm": 1.0595136880874634, + "learning_rate": 5.409954965816673e-06, + "loss": 0.3124, + "step": 24460 + }, + { + "epoch": 0.4896729474764157, + "grad_norm": 1.0878347158432007, + "learning_rate": 5.409631873452728e-06, + "loss": 0.345, + "step": 24461 + }, + { + "epoch": 0.489692965993544, + "grad_norm": 1.0714179277420044, + "learning_rate": 5.409308779366779e-06, + "loss": 0.2794, + "step": 24462 + }, + { + "epoch": 0.48971298451067236, + "grad_norm": 1.0690836906433105, + "learning_rate": 5.408985683560184e-06, + "loss": 0.2955, + "step": 24463 + }, + { + "epoch": 0.4897330030278007, + "grad_norm": 1.0230827331542969, + "learning_rate": 5.4086625860343e-06, + "loss": 0.2705, + "step": 24464 + }, + { + "epoch": 0.48975302154492906, + "grad_norm": 1.1980153322219849, + "learning_rate": 5.408339486790488e-06, + "loss": 0.3562, + "step": 24465 + }, + { + "epoch": 0.4897730400620574, + "grad_norm": 1.1087520122528076, + "learning_rate": 5.408016385830103e-06, + "loss": 0.3168, + "step": 24466 + }, + { + "epoch": 0.48979305857918576, + "grad_norm": 1.1510303020477295, + "learning_rate": 5.407693283154506e-06, + "loss": 0.3259, + "step": 24467 + }, + { + "epoch": 0.4898130770963141, + "grad_norm": 1.0760858058929443, + "learning_rate": 5.407370178765055e-06, + "loss": 0.278, + "step": 24468 + }, + { + "epoch": 0.48983309561344246, + "grad_norm": 1.097446322441101, + "learning_rate": 5.407047072663105e-06, + "loss": 0.3153, + "step": 24469 + }, + { + "epoch": 0.48985311413057075, + "grad_norm": 1.7713749408721924, + "learning_rate": 5.406723964850019e-06, + "loss": 0.8855, + "step": 24470 + }, + { + "epoch": 0.4898731326476991, + "grad_norm": 1.94588041305542, + "learning_rate": 5.406400855327152e-06, + "loss": 0.7854, + "step": 24471 + }, + { + "epoch": 0.48989315116482746, + "grad_norm": 1.8848838806152344, + "learning_rate": 5.4060777440958625e-06, + "loss": 0.8175, + "step": 24472 + }, + { + "epoch": 0.4899131696819558, + "grad_norm": 1.0194611549377441, + "learning_rate": 5.40575463115751e-06, + "loss": 0.2718, + "step": 24473 + }, + { + "epoch": 0.48993318819908416, + "grad_norm": 1.0732895135879517, + "learning_rate": 5.405431516513453e-06, + "loss": 0.2749, + "step": 24474 + }, + { + "epoch": 0.4899532067162125, + "grad_norm": 1.15762460231781, + "learning_rate": 5.40510840016505e-06, + "loss": 0.3185, + "step": 24475 + }, + { + "epoch": 0.48997322523334086, + "grad_norm": 1.1833068132400513, + "learning_rate": 5.404785282113657e-06, + "loss": 0.3065, + "step": 24476 + }, + { + "epoch": 0.4899932437504692, + "grad_norm": 1.229587435722351, + "learning_rate": 5.4044621623606356e-06, + "loss": 0.3474, + "step": 24477 + }, + { + "epoch": 0.4900132622675975, + "grad_norm": 1.8797085285186768, + "learning_rate": 5.40413904090734e-06, + "loss": 0.7661, + "step": 24478 + }, + { + "epoch": 0.49003328078472586, + "grad_norm": 1.1498199701309204, + "learning_rate": 5.403815917755131e-06, + "loss": 0.264, + "step": 24479 + }, + { + "epoch": 0.4900532993018542, + "grad_norm": 1.098626971244812, + "learning_rate": 5.403492792905367e-06, + "loss": 0.3505, + "step": 24480 + }, + { + "epoch": 0.49007331781898256, + "grad_norm": 1.1092339754104614, + "learning_rate": 5.403169666359407e-06, + "loss": 0.3171, + "step": 24481 + }, + { + "epoch": 0.4900933363361109, + "grad_norm": 1.3355823755264282, + "learning_rate": 5.402846538118608e-06, + "loss": 0.3046, + "step": 24482 + }, + { + "epoch": 0.49011335485323926, + "grad_norm": 1.0637664794921875, + "learning_rate": 5.40252340818433e-06, + "loss": 0.2966, + "step": 24483 + }, + { + "epoch": 0.4901333733703676, + "grad_norm": 1.3515453338623047, + "learning_rate": 5.402200276557928e-06, + "loss": 0.2952, + "step": 24484 + }, + { + "epoch": 0.49015339188749596, + "grad_norm": 1.0732296705245972, + "learning_rate": 5.401877143240764e-06, + "loss": 0.2935, + "step": 24485 + }, + { + "epoch": 0.49017341040462425, + "grad_norm": 1.8606524467468262, + "learning_rate": 5.401554008234195e-06, + "loss": 0.775, + "step": 24486 + }, + { + "epoch": 0.4901934289217526, + "grad_norm": 1.072986364364624, + "learning_rate": 5.4012308715395785e-06, + "loss": 0.2719, + "step": 24487 + }, + { + "epoch": 0.49021344743888096, + "grad_norm": 0.975200891494751, + "learning_rate": 5.400907733158276e-06, + "loss": 0.2339, + "step": 24488 + }, + { + "epoch": 0.4902334659560093, + "grad_norm": 1.1060925722122192, + "learning_rate": 5.400584593091641e-06, + "loss": 0.2809, + "step": 24489 + }, + { + "epoch": 0.49025348447313766, + "grad_norm": 1.0547086000442505, + "learning_rate": 5.4002614513410364e-06, + "loss": 0.3119, + "step": 24490 + }, + { + "epoch": 0.490273502990266, + "grad_norm": 0.9388068318367004, + "learning_rate": 5.399938307907817e-06, + "loss": 0.2963, + "step": 24491 + }, + { + "epoch": 0.49029352150739436, + "grad_norm": 1.2304387092590332, + "learning_rate": 5.399615162793344e-06, + "loss": 0.307, + "step": 24492 + }, + { + "epoch": 0.4903135400245227, + "grad_norm": 1.0388290882110596, + "learning_rate": 5.399292015998975e-06, + "loss": 0.2764, + "step": 24493 + }, + { + "epoch": 0.490333558541651, + "grad_norm": 1.1004263162612915, + "learning_rate": 5.398968867526069e-06, + "loss": 0.3093, + "step": 24494 + }, + { + "epoch": 0.49035357705877936, + "grad_norm": 1.1302613019943237, + "learning_rate": 5.398645717375982e-06, + "loss": 0.3263, + "step": 24495 + }, + { + "epoch": 0.4903735955759077, + "grad_norm": 1.0621161460876465, + "learning_rate": 5.398322565550076e-06, + "loss": 0.299, + "step": 24496 + }, + { + "epoch": 0.49039361409303606, + "grad_norm": 1.9014147520065308, + "learning_rate": 5.397999412049706e-06, + "loss": 0.8069, + "step": 24497 + }, + { + "epoch": 0.4904136326101644, + "grad_norm": 1.2280793190002441, + "learning_rate": 5.397676256876233e-06, + "loss": 0.3109, + "step": 24498 + }, + { + "epoch": 0.49043365112729276, + "grad_norm": 1.104062795639038, + "learning_rate": 5.397353100031016e-06, + "loss": 0.2661, + "step": 24499 + }, + { + "epoch": 0.4904536696444211, + "grad_norm": 0.9870318174362183, + "learning_rate": 5.39702994151541e-06, + "loss": 0.3585, + "step": 24500 + }, + { + "epoch": 0.49047368816154946, + "grad_norm": 1.0749508142471313, + "learning_rate": 5.396706781330776e-06, + "loss": 0.2831, + "step": 24501 + }, + { + "epoch": 0.49049370667867775, + "grad_norm": 1.0297812223434448, + "learning_rate": 5.396383619478472e-06, + "loss": 0.2731, + "step": 24502 + }, + { + "epoch": 0.4905137251958061, + "grad_norm": 1.1628608703613281, + "learning_rate": 5.396060455959857e-06, + "loss": 0.3499, + "step": 24503 + }, + { + "epoch": 0.49053374371293446, + "grad_norm": 1.2061704397201538, + "learning_rate": 5.395737290776288e-06, + "loss": 0.3241, + "step": 24504 + }, + { + "epoch": 0.4905537622300628, + "grad_norm": 1.5725739002227783, + "learning_rate": 5.395414123929125e-06, + "loss": 0.313, + "step": 24505 + }, + { + "epoch": 0.49057378074719116, + "grad_norm": 1.161492109298706, + "learning_rate": 5.395090955419727e-06, + "loss": 0.2818, + "step": 24506 + }, + { + "epoch": 0.4905937992643195, + "grad_norm": 1.2065236568450928, + "learning_rate": 5.394767785249452e-06, + "loss": 0.2976, + "step": 24507 + }, + { + "epoch": 0.49061381778144786, + "grad_norm": 1.9587657451629639, + "learning_rate": 5.3944446134196586e-06, + "loss": 0.7951, + "step": 24508 + }, + { + "epoch": 0.4906338362985762, + "grad_norm": 1.0859205722808838, + "learning_rate": 5.3941214399317025e-06, + "loss": 0.2922, + "step": 24509 + }, + { + "epoch": 0.4906538548157045, + "grad_norm": 1.1878231763839722, + "learning_rate": 5.393798264786946e-06, + "loss": 0.2996, + "step": 24510 + }, + { + "epoch": 0.49067387333283285, + "grad_norm": 1.082423210144043, + "learning_rate": 5.393475087986745e-06, + "loss": 0.3152, + "step": 24511 + }, + { + "epoch": 0.4906938918499612, + "grad_norm": 1.8377612829208374, + "learning_rate": 5.393151909532461e-06, + "loss": 0.7667, + "step": 24512 + }, + { + "epoch": 0.49071391036708956, + "grad_norm": 1.1138914823532104, + "learning_rate": 5.392828729425451e-06, + "loss": 0.3185, + "step": 24513 + }, + { + "epoch": 0.4907339288842179, + "grad_norm": 1.8775124549865723, + "learning_rate": 5.392505547667074e-06, + "loss": 0.7556, + "step": 24514 + }, + { + "epoch": 0.49075394740134626, + "grad_norm": 1.0618627071380615, + "learning_rate": 5.392182364258687e-06, + "loss": 0.2793, + "step": 24515 + }, + { + "epoch": 0.4907739659184746, + "grad_norm": 1.1425186395645142, + "learning_rate": 5.391859179201651e-06, + "loss": 0.3093, + "step": 24516 + }, + { + "epoch": 0.49079398443560296, + "grad_norm": 1.8875970840454102, + "learning_rate": 5.391535992497321e-06, + "loss": 0.7849, + "step": 24517 + }, + { + "epoch": 0.49081400295273125, + "grad_norm": 1.1604046821594238, + "learning_rate": 5.391212804147059e-06, + "loss": 0.3161, + "step": 24518 + }, + { + "epoch": 0.4908340214698596, + "grad_norm": 1.1839885711669922, + "learning_rate": 5.390889614152224e-06, + "loss": 0.377, + "step": 24519 + }, + { + "epoch": 0.49085403998698796, + "grad_norm": 1.198927402496338, + "learning_rate": 5.390566422514172e-06, + "loss": 0.2671, + "step": 24520 + }, + { + "epoch": 0.4908740585041163, + "grad_norm": 1.1169829368591309, + "learning_rate": 5.3902432292342636e-06, + "loss": 0.3293, + "step": 24521 + }, + { + "epoch": 0.49089407702124466, + "grad_norm": 1.0309593677520752, + "learning_rate": 5.389920034313855e-06, + "loss": 0.2973, + "step": 24522 + }, + { + "epoch": 0.490914095538373, + "grad_norm": 1.132212519645691, + "learning_rate": 5.389596837754308e-06, + "loss": 0.285, + "step": 24523 + }, + { + "epoch": 0.49093411405550136, + "grad_norm": 1.0554591417312622, + "learning_rate": 5.3892736395569775e-06, + "loss": 0.3195, + "step": 24524 + }, + { + "epoch": 0.4909541325726297, + "grad_norm": 1.0016977787017822, + "learning_rate": 5.388950439723227e-06, + "loss": 0.297, + "step": 24525 + }, + { + "epoch": 0.490974151089758, + "grad_norm": 1.0870517492294312, + "learning_rate": 5.388627238254412e-06, + "loss": 0.2963, + "step": 24526 + }, + { + "epoch": 0.49099416960688635, + "grad_norm": 2.0224766731262207, + "learning_rate": 5.388304035151891e-06, + "loss": 0.8, + "step": 24527 + }, + { + "epoch": 0.4910141881240147, + "grad_norm": 2.0808169841766357, + "learning_rate": 5.387980830417025e-06, + "loss": 0.797, + "step": 24528 + }, + { + "epoch": 0.49103420664114306, + "grad_norm": 1.2923749685287476, + "learning_rate": 5.387657624051169e-06, + "loss": 0.3265, + "step": 24529 + }, + { + "epoch": 0.4910542251582714, + "grad_norm": 1.0881949663162231, + "learning_rate": 5.387334416055684e-06, + "loss": 0.2939, + "step": 24530 + }, + { + "epoch": 0.49107424367539976, + "grad_norm": 1.1124099493026733, + "learning_rate": 5.387011206431929e-06, + "loss": 0.2726, + "step": 24531 + }, + { + "epoch": 0.4910942621925281, + "grad_norm": 1.0333634614944458, + "learning_rate": 5.386687995181262e-06, + "loss": 0.2821, + "step": 24532 + }, + { + "epoch": 0.49111428070965646, + "grad_norm": 1.154064655303955, + "learning_rate": 5.386364782305042e-06, + "loss": 0.2983, + "step": 24533 + }, + { + "epoch": 0.49113429922678475, + "grad_norm": 1.1391221284866333, + "learning_rate": 5.386041567804628e-06, + "loss": 0.3044, + "step": 24534 + }, + { + "epoch": 0.4911543177439131, + "grad_norm": 1.0851154327392578, + "learning_rate": 5.385718351681378e-06, + "loss": 0.3534, + "step": 24535 + }, + { + "epoch": 0.49117433626104146, + "grad_norm": 1.013163447380066, + "learning_rate": 5.385395133936652e-06, + "loss": 0.3033, + "step": 24536 + }, + { + "epoch": 0.4911943547781698, + "grad_norm": 1.1422417163848877, + "learning_rate": 5.3850719145718056e-06, + "loss": 0.3642, + "step": 24537 + }, + { + "epoch": 0.49121437329529816, + "grad_norm": 1.047714352607727, + "learning_rate": 5.384748693588202e-06, + "loss": 0.3337, + "step": 24538 + }, + { + "epoch": 0.4912343918124265, + "grad_norm": 1.0168788433074951, + "learning_rate": 5.3844254709871965e-06, + "loss": 0.2656, + "step": 24539 + }, + { + "epoch": 0.49125441032955486, + "grad_norm": 1.285866618156433, + "learning_rate": 5.38410224677015e-06, + "loss": 0.3751, + "step": 24540 + }, + { + "epoch": 0.4912744288466832, + "grad_norm": 1.0246009826660156, + "learning_rate": 5.38377902093842e-06, + "loss": 0.3006, + "step": 24541 + }, + { + "epoch": 0.4912944473638115, + "grad_norm": 1.1142630577087402, + "learning_rate": 5.383455793493365e-06, + "loss": 0.3085, + "step": 24542 + }, + { + "epoch": 0.49131446588093985, + "grad_norm": 1.888285517692566, + "learning_rate": 5.383132564436344e-06, + "loss": 0.8134, + "step": 24543 + }, + { + "epoch": 0.4913344843980682, + "grad_norm": 1.0432130098342896, + "learning_rate": 5.382809333768716e-06, + "loss": 0.2956, + "step": 24544 + }, + { + "epoch": 0.49135450291519656, + "grad_norm": 1.0565465688705444, + "learning_rate": 5.3824861014918415e-06, + "loss": 0.3186, + "step": 24545 + }, + { + "epoch": 0.4913745214323249, + "grad_norm": 1.237036108970642, + "learning_rate": 5.382162867607077e-06, + "loss": 0.2935, + "step": 24546 + }, + { + "epoch": 0.49139453994945326, + "grad_norm": 1.1186178922653198, + "learning_rate": 5.381839632115781e-06, + "loss": 0.3206, + "step": 24547 + }, + { + "epoch": 0.4914145584665816, + "grad_norm": 1.132368803024292, + "learning_rate": 5.381516395019315e-06, + "loss": 0.3307, + "step": 24548 + }, + { + "epoch": 0.49143457698370996, + "grad_norm": 1.9240199327468872, + "learning_rate": 5.381193156319035e-06, + "loss": 0.7542, + "step": 24549 + }, + { + "epoch": 0.49145459550083825, + "grad_norm": 1.8461657762527466, + "learning_rate": 5.380869916016302e-06, + "loss": 0.8303, + "step": 24550 + }, + { + "epoch": 0.4914746140179666, + "grad_norm": 1.0089528560638428, + "learning_rate": 5.380546674112473e-06, + "loss": 0.3107, + "step": 24551 + }, + { + "epoch": 0.49149463253509496, + "grad_norm": 1.7927995920181274, + "learning_rate": 5.3802234306089085e-06, + "loss": 0.7229, + "step": 24552 + }, + { + "epoch": 0.4915146510522233, + "grad_norm": 1.9290910959243774, + "learning_rate": 5.379900185506965e-06, + "loss": 0.7519, + "step": 24553 + }, + { + "epoch": 0.49153466956935166, + "grad_norm": 1.1278420686721802, + "learning_rate": 5.379576938808005e-06, + "loss": 0.3361, + "step": 24554 + }, + { + "epoch": 0.49155468808648, + "grad_norm": 1.1485319137573242, + "learning_rate": 5.379253690513384e-06, + "loss": 0.3143, + "step": 24555 + }, + { + "epoch": 0.49157470660360836, + "grad_norm": 1.2235209941864014, + "learning_rate": 5.378930440624462e-06, + "loss": 0.342, + "step": 24556 + }, + { + "epoch": 0.4915947251207367, + "grad_norm": 1.0920487642288208, + "learning_rate": 5.378607189142597e-06, + "loss": 0.3274, + "step": 24557 + }, + { + "epoch": 0.491614743637865, + "grad_norm": 1.9089246988296509, + "learning_rate": 5.378283936069152e-06, + "loss": 0.8204, + "step": 24558 + }, + { + "epoch": 0.49163476215499335, + "grad_norm": 1.1121543645858765, + "learning_rate": 5.377960681405481e-06, + "loss": 0.3158, + "step": 24559 + }, + { + "epoch": 0.4916547806721217, + "grad_norm": 1.337497353553772, + "learning_rate": 5.377637425152944e-06, + "loss": 0.3193, + "step": 24560 + }, + { + "epoch": 0.49167479918925006, + "grad_norm": 1.0114848613739014, + "learning_rate": 5.377314167312901e-06, + "loss": 0.2621, + "step": 24561 + }, + { + "epoch": 0.4916948177063784, + "grad_norm": 1.124293565750122, + "learning_rate": 5.37699090788671e-06, + "loss": 0.344, + "step": 24562 + }, + { + "epoch": 0.49171483622350676, + "grad_norm": 1.0727572441101074, + "learning_rate": 5.376667646875731e-06, + "loss": 0.3706, + "step": 24563 + }, + { + "epoch": 0.4917348547406351, + "grad_norm": 1.189038634300232, + "learning_rate": 5.376344384281321e-06, + "loss": 0.3306, + "step": 24564 + }, + { + "epoch": 0.49175487325776346, + "grad_norm": 1.0914177894592285, + "learning_rate": 5.376021120104843e-06, + "loss": 0.3115, + "step": 24565 + }, + { + "epoch": 0.49177489177489175, + "grad_norm": 1.0804282426834106, + "learning_rate": 5.37569785434765e-06, + "loss": 0.283, + "step": 24566 + }, + { + "epoch": 0.4917949102920201, + "grad_norm": 1.091093897819519, + "learning_rate": 5.375374587011106e-06, + "loss": 0.3212, + "step": 24567 + }, + { + "epoch": 0.49181492880914846, + "grad_norm": 1.115061640739441, + "learning_rate": 5.375051318096568e-06, + "loss": 0.247, + "step": 24568 + }, + { + "epoch": 0.4918349473262768, + "grad_norm": 1.1138108968734741, + "learning_rate": 5.374728047605393e-06, + "loss": 0.3106, + "step": 24569 + }, + { + "epoch": 0.49185496584340516, + "grad_norm": 1.337783694267273, + "learning_rate": 5.374404775538945e-06, + "loss": 0.2927, + "step": 24570 + }, + { + "epoch": 0.4918749843605335, + "grad_norm": 1.665318250656128, + "learning_rate": 5.374081501898577e-06, + "loss": 0.3215, + "step": 24571 + }, + { + "epoch": 0.49189500287766186, + "grad_norm": 1.978872537612915, + "learning_rate": 5.373758226685654e-06, + "loss": 0.7997, + "step": 24572 + }, + { + "epoch": 0.4919150213947902, + "grad_norm": 1.201798677444458, + "learning_rate": 5.373434949901529e-06, + "loss": 0.2896, + "step": 24573 + }, + { + "epoch": 0.4919350399119185, + "grad_norm": 1.3621587753295898, + "learning_rate": 5.373111671547565e-06, + "loss": 0.2676, + "step": 24574 + }, + { + "epoch": 0.49195505842904685, + "grad_norm": 1.0745662450790405, + "learning_rate": 5.372788391625119e-06, + "loss": 0.2951, + "step": 24575 + }, + { + "epoch": 0.4919750769461752, + "grad_norm": 1.201480507850647, + "learning_rate": 5.372465110135552e-06, + "loss": 0.3358, + "step": 24576 + }, + { + "epoch": 0.49199509546330356, + "grad_norm": 1.0983326435089111, + "learning_rate": 5.372141827080221e-06, + "loss": 0.2806, + "step": 24577 + }, + { + "epoch": 0.4920151139804319, + "grad_norm": 1.1031725406646729, + "learning_rate": 5.371818542460487e-06, + "loss": 0.3218, + "step": 24578 + }, + { + "epoch": 0.49203513249756026, + "grad_norm": 1.0880299806594849, + "learning_rate": 5.371495256277706e-06, + "loss": 0.358, + "step": 24579 + }, + { + "epoch": 0.4920551510146886, + "grad_norm": 1.044271469116211, + "learning_rate": 5.371171968533241e-06, + "loss": 0.3168, + "step": 24580 + }, + { + "epoch": 0.49207516953181696, + "grad_norm": 1.0932984352111816, + "learning_rate": 5.370848679228446e-06, + "loss": 0.3033, + "step": 24581 + }, + { + "epoch": 0.49209518804894525, + "grad_norm": 1.1744201183319092, + "learning_rate": 5.370525388364685e-06, + "loss": 0.3684, + "step": 24582 + }, + { + "epoch": 0.4921152065660736, + "grad_norm": 1.0760174989700317, + "learning_rate": 5.3702020959433155e-06, + "loss": 0.3345, + "step": 24583 + }, + { + "epoch": 0.49213522508320195, + "grad_norm": 1.1332919597625732, + "learning_rate": 5.369878801965695e-06, + "loss": 0.3542, + "step": 24584 + }, + { + "epoch": 0.4921552436003303, + "grad_norm": 1.0715458393096924, + "learning_rate": 5.369555506433185e-06, + "loss": 0.3388, + "step": 24585 + }, + { + "epoch": 0.49217526211745866, + "grad_norm": 1.2947067022323608, + "learning_rate": 5.369232209347142e-06, + "loss": 0.3389, + "step": 24586 + }, + { + "epoch": 0.492195280634587, + "grad_norm": 1.9478950500488281, + "learning_rate": 5.368908910708927e-06, + "loss": 0.8622, + "step": 24587 + }, + { + "epoch": 0.49221529915171536, + "grad_norm": 1.1604409217834473, + "learning_rate": 5.368585610519895e-06, + "loss": 0.3086, + "step": 24588 + }, + { + "epoch": 0.49223531766884365, + "grad_norm": 1.1505529880523682, + "learning_rate": 5.3682623087814124e-06, + "loss": 0.3326, + "step": 24589 + }, + { + "epoch": 0.492255336185972, + "grad_norm": 1.3125128746032715, + "learning_rate": 5.367939005494833e-06, + "loss": 0.3339, + "step": 24590 + }, + { + "epoch": 0.49227535470310035, + "grad_norm": 1.1390583515167236, + "learning_rate": 5.367615700661517e-06, + "loss": 0.2711, + "step": 24591 + }, + { + "epoch": 0.4922953732202287, + "grad_norm": 1.9426265954971313, + "learning_rate": 5.367292394282825e-06, + "loss": 0.7385, + "step": 24592 + }, + { + "epoch": 0.49231539173735706, + "grad_norm": 1.188110589981079, + "learning_rate": 5.366969086360113e-06, + "loss": 0.3207, + "step": 24593 + }, + { + "epoch": 0.4923354102544854, + "grad_norm": 1.1499345302581787, + "learning_rate": 5.3666457768947425e-06, + "loss": 0.3138, + "step": 24594 + }, + { + "epoch": 0.49235542877161376, + "grad_norm": 1.2523716688156128, + "learning_rate": 5.366322465888072e-06, + "loss": 0.3347, + "step": 24595 + }, + { + "epoch": 0.4923754472887421, + "grad_norm": 1.0490227937698364, + "learning_rate": 5.365999153341462e-06, + "loss": 0.311, + "step": 24596 + }, + { + "epoch": 0.4923954658058704, + "grad_norm": 1.1539931297302246, + "learning_rate": 5.365675839256268e-06, + "loss": 0.2497, + "step": 24597 + }, + { + "epoch": 0.49241548432299875, + "grad_norm": 1.0963270664215088, + "learning_rate": 5.3653525236338536e-06, + "loss": 0.2995, + "step": 24598 + }, + { + "epoch": 0.4924355028401271, + "grad_norm": 1.8564338684082031, + "learning_rate": 5.365029206475575e-06, + "loss": 0.7765, + "step": 24599 + }, + { + "epoch": 0.49245552135725545, + "grad_norm": 1.167885422706604, + "learning_rate": 5.364705887782793e-06, + "loss": 0.3107, + "step": 24600 + }, + { + "epoch": 0.4924755398743838, + "grad_norm": 1.186866283416748, + "learning_rate": 5.364382567556864e-06, + "loss": 0.3274, + "step": 24601 + }, + { + "epoch": 0.49249555839151216, + "grad_norm": 1.0958088636398315, + "learning_rate": 5.3640592457991505e-06, + "loss": 0.3076, + "step": 24602 + }, + { + "epoch": 0.4925155769086405, + "grad_norm": 1.1087620258331299, + "learning_rate": 5.3637359225110095e-06, + "loss": 0.2303, + "step": 24603 + }, + { + "epoch": 0.49253559542576886, + "grad_norm": 1.0059970617294312, + "learning_rate": 5.363412597693801e-06, + "loss": 0.2789, + "step": 24604 + }, + { + "epoch": 0.49255561394289715, + "grad_norm": 1.1038798093795776, + "learning_rate": 5.363089271348886e-06, + "loss": 0.2921, + "step": 24605 + }, + { + "epoch": 0.4925756324600255, + "grad_norm": 1.0579382181167603, + "learning_rate": 5.362765943477619e-06, + "loss": 0.33, + "step": 24606 + }, + { + "epoch": 0.49259565097715385, + "grad_norm": 1.0622529983520508, + "learning_rate": 5.362442614081364e-06, + "loss": 0.2795, + "step": 24607 + }, + { + "epoch": 0.4926156694942822, + "grad_norm": 1.333370566368103, + "learning_rate": 5.362119283161476e-06, + "loss": 0.3264, + "step": 24608 + }, + { + "epoch": 0.49263568801141056, + "grad_norm": 1.1270830631256104, + "learning_rate": 5.36179595071932e-06, + "loss": 0.3274, + "step": 24609 + }, + { + "epoch": 0.4926557065285389, + "grad_norm": 1.1669965982437134, + "learning_rate": 5.361472616756249e-06, + "loss": 0.365, + "step": 24610 + }, + { + "epoch": 0.49267572504566726, + "grad_norm": 1.7747619152069092, + "learning_rate": 5.361149281273627e-06, + "loss": 0.8216, + "step": 24611 + }, + { + "epoch": 0.4926957435627956, + "grad_norm": 1.1332341432571411, + "learning_rate": 5.3608259442728095e-06, + "loss": 0.3086, + "step": 24612 + }, + { + "epoch": 0.4927157620799239, + "grad_norm": 1.0589721202850342, + "learning_rate": 5.360502605755158e-06, + "loss": 0.3119, + "step": 24613 + }, + { + "epoch": 0.49273578059705225, + "grad_norm": 1.7893463373184204, + "learning_rate": 5.36017926572203e-06, + "loss": 0.8517, + "step": 24614 + }, + { + "epoch": 0.4927557991141806, + "grad_norm": 1.2170387506484985, + "learning_rate": 5.359855924174786e-06, + "loss": 0.2808, + "step": 24615 + }, + { + "epoch": 0.49277581763130895, + "grad_norm": 1.012961745262146, + "learning_rate": 5.359532581114787e-06, + "loss": 0.2734, + "step": 24616 + }, + { + "epoch": 0.4927958361484373, + "grad_norm": 1.0032572746276855, + "learning_rate": 5.359209236543389e-06, + "loss": 0.2893, + "step": 24617 + }, + { + "epoch": 0.49281585466556566, + "grad_norm": 1.0478489398956299, + "learning_rate": 5.358885890461953e-06, + "loss": 0.3236, + "step": 24618 + }, + { + "epoch": 0.492835873182694, + "grad_norm": 1.2402515411376953, + "learning_rate": 5.358562542871838e-06, + "loss": 0.3687, + "step": 24619 + }, + { + "epoch": 0.49285589169982236, + "grad_norm": 1.151104211807251, + "learning_rate": 5.358239193774402e-06, + "loss": 0.3333, + "step": 24620 + }, + { + "epoch": 0.49287591021695065, + "grad_norm": 1.0425556898117065, + "learning_rate": 5.3579158431710074e-06, + "loss": 0.319, + "step": 24621 + }, + { + "epoch": 0.492895928734079, + "grad_norm": 1.0600703954696655, + "learning_rate": 5.357592491063011e-06, + "loss": 0.2847, + "step": 24622 + }, + { + "epoch": 0.49291594725120735, + "grad_norm": 1.0498720407485962, + "learning_rate": 5.357269137451772e-06, + "loss": 0.2998, + "step": 24623 + }, + { + "epoch": 0.4929359657683357, + "grad_norm": 1.9608681201934814, + "learning_rate": 5.356945782338652e-06, + "loss": 0.7509, + "step": 24624 + }, + { + "epoch": 0.49295598428546406, + "grad_norm": 1.096498966217041, + "learning_rate": 5.3566224257250086e-06, + "loss": 0.3115, + "step": 24625 + }, + { + "epoch": 0.4929760028025924, + "grad_norm": 1.2181073427200317, + "learning_rate": 5.3562990676122e-06, + "loss": 0.3404, + "step": 24626 + }, + { + "epoch": 0.49299602131972076, + "grad_norm": 1.155542254447937, + "learning_rate": 5.355975708001586e-06, + "loss": 0.3348, + "step": 24627 + }, + { + "epoch": 0.4930160398368491, + "grad_norm": 1.0979881286621094, + "learning_rate": 5.355652346894528e-06, + "loss": 0.3197, + "step": 24628 + }, + { + "epoch": 0.4930360583539774, + "grad_norm": 1.1311182975769043, + "learning_rate": 5.355328984292385e-06, + "loss": 0.3331, + "step": 24629 + }, + { + "epoch": 0.49305607687110575, + "grad_norm": 1.134608268737793, + "learning_rate": 5.355005620196514e-06, + "loss": 0.3188, + "step": 24630 + }, + { + "epoch": 0.4930760953882341, + "grad_norm": 1.1793434619903564, + "learning_rate": 5.354682254608276e-06, + "loss": 0.3314, + "step": 24631 + }, + { + "epoch": 0.49309611390536245, + "grad_norm": 1.0800971984863281, + "learning_rate": 5.354358887529031e-06, + "loss": 0.2993, + "step": 24632 + }, + { + "epoch": 0.4931161324224908, + "grad_norm": 1.1444424390792847, + "learning_rate": 5.354035518960136e-06, + "loss": 0.3242, + "step": 24633 + }, + { + "epoch": 0.49313615093961916, + "grad_norm": 1.0295050144195557, + "learning_rate": 5.353712148902953e-06, + "loss": 0.3191, + "step": 24634 + }, + { + "epoch": 0.4931561694567475, + "grad_norm": 1.0370110273361206, + "learning_rate": 5.35338877735884e-06, + "loss": 0.3096, + "step": 24635 + }, + { + "epoch": 0.49317618797387586, + "grad_norm": 1.0798825025558472, + "learning_rate": 5.353065404329158e-06, + "loss": 0.3065, + "step": 24636 + }, + { + "epoch": 0.49319620649100415, + "grad_norm": 1.277122974395752, + "learning_rate": 5.352742029815262e-06, + "loss": 0.2859, + "step": 24637 + }, + { + "epoch": 0.4932162250081325, + "grad_norm": 1.0846940279006958, + "learning_rate": 5.352418653818517e-06, + "loss": 0.2857, + "step": 24638 + }, + { + "epoch": 0.49323624352526085, + "grad_norm": 1.0918480157852173, + "learning_rate": 5.352095276340279e-06, + "loss": 0.321, + "step": 24639 + }, + { + "epoch": 0.4932562620423892, + "grad_norm": 1.0806604623794556, + "learning_rate": 5.351771897381908e-06, + "loss": 0.3154, + "step": 24640 + }, + { + "epoch": 0.49327628055951755, + "grad_norm": 1.0642776489257812, + "learning_rate": 5.351448516944763e-06, + "loss": 0.3574, + "step": 24641 + }, + { + "epoch": 0.4932962990766459, + "grad_norm": 1.1610218286514282, + "learning_rate": 5.351125135030205e-06, + "loss": 0.3328, + "step": 24642 + }, + { + "epoch": 0.49331631759377426, + "grad_norm": 1.0761480331420898, + "learning_rate": 5.350801751639593e-06, + "loss": 0.2991, + "step": 24643 + }, + { + "epoch": 0.4933363361109026, + "grad_norm": 1.1057746410369873, + "learning_rate": 5.350478366774284e-06, + "loss": 0.3818, + "step": 24644 + }, + { + "epoch": 0.4933563546280309, + "grad_norm": 1.1210442781448364, + "learning_rate": 5.3501549804356415e-06, + "loss": 0.2464, + "step": 24645 + }, + { + "epoch": 0.49337637314515925, + "grad_norm": 1.1931113004684448, + "learning_rate": 5.349831592625021e-06, + "loss": 0.3422, + "step": 24646 + }, + { + "epoch": 0.4933963916622876, + "grad_norm": 1.233546495437622, + "learning_rate": 5.349508203343785e-06, + "loss": 0.336, + "step": 24647 + }, + { + "epoch": 0.49341641017941595, + "grad_norm": 1.1209423542022705, + "learning_rate": 5.3491848125932914e-06, + "loss": 0.3125, + "step": 24648 + }, + { + "epoch": 0.4934364286965443, + "grad_norm": 1.0752739906311035, + "learning_rate": 5.3488614203749e-06, + "loss": 0.3213, + "step": 24649 + }, + { + "epoch": 0.49345644721367266, + "grad_norm": 1.1611309051513672, + "learning_rate": 5.34853802668997e-06, + "loss": 0.3341, + "step": 24650 + }, + { + "epoch": 0.493476465730801, + "grad_norm": 1.0406663417816162, + "learning_rate": 5.348214631539863e-06, + "loss": 0.3128, + "step": 24651 + }, + { + "epoch": 0.49349648424792936, + "grad_norm": 1.1758185625076294, + "learning_rate": 5.3478912349259335e-06, + "loss": 0.3193, + "step": 24652 + }, + { + "epoch": 0.49351650276505765, + "grad_norm": 1.2328407764434814, + "learning_rate": 5.347567836849545e-06, + "loss": 0.3521, + "step": 24653 + }, + { + "epoch": 0.493536521282186, + "grad_norm": 1.0432051420211792, + "learning_rate": 5.347244437312058e-06, + "loss": 0.2922, + "step": 24654 + }, + { + "epoch": 0.49355653979931435, + "grad_norm": 0.9794113636016846, + "learning_rate": 5.346921036314829e-06, + "loss": 0.3255, + "step": 24655 + }, + { + "epoch": 0.4935765583164427, + "grad_norm": 1.2035082578659058, + "learning_rate": 5.346597633859218e-06, + "loss": 0.321, + "step": 24656 + }, + { + "epoch": 0.49359657683357105, + "grad_norm": 1.0646342039108276, + "learning_rate": 5.346274229946587e-06, + "loss": 0.322, + "step": 24657 + }, + { + "epoch": 0.4936165953506994, + "grad_norm": 1.1039026975631714, + "learning_rate": 5.345950824578291e-06, + "loss": 0.3566, + "step": 24658 + }, + { + "epoch": 0.49363661386782776, + "grad_norm": 1.2584201097488403, + "learning_rate": 5.345627417755693e-06, + "loss": 0.3119, + "step": 24659 + }, + { + "epoch": 0.4936566323849561, + "grad_norm": 1.2005658149719238, + "learning_rate": 5.345304009480153e-06, + "loss": 0.3013, + "step": 24660 + }, + { + "epoch": 0.4936766509020844, + "grad_norm": 1.068143367767334, + "learning_rate": 5.344980599753028e-06, + "loss": 0.2887, + "step": 24661 + }, + { + "epoch": 0.49369666941921275, + "grad_norm": 0.9287933707237244, + "learning_rate": 5.344657188575679e-06, + "loss": 0.2708, + "step": 24662 + }, + { + "epoch": 0.4937166879363411, + "grad_norm": 1.0309799909591675, + "learning_rate": 5.344333775949467e-06, + "loss": 0.2823, + "step": 24663 + }, + { + "epoch": 0.49373670645346945, + "grad_norm": 1.0592067241668701, + "learning_rate": 5.344010361875748e-06, + "loss": 0.3184, + "step": 24664 + }, + { + "epoch": 0.4937567249705978, + "grad_norm": 1.1326189041137695, + "learning_rate": 5.3436869463558835e-06, + "loss": 0.3182, + "step": 24665 + }, + { + "epoch": 0.49377674348772616, + "grad_norm": 1.1735759973526, + "learning_rate": 5.343363529391233e-06, + "loss": 0.3103, + "step": 24666 + }, + { + "epoch": 0.4937967620048545, + "grad_norm": 1.1164369583129883, + "learning_rate": 5.3430401109831574e-06, + "loss": 0.3538, + "step": 24667 + }, + { + "epoch": 0.49381678052198286, + "grad_norm": 1.1219143867492676, + "learning_rate": 5.342716691133014e-06, + "loss": 0.3181, + "step": 24668 + }, + { + "epoch": 0.49383679903911115, + "grad_norm": 1.2351394891738892, + "learning_rate": 5.342393269842164e-06, + "loss": 0.3429, + "step": 24669 + }, + { + "epoch": 0.4938568175562395, + "grad_norm": 1.0409153699874878, + "learning_rate": 5.3420698471119655e-06, + "loss": 0.2989, + "step": 24670 + }, + { + "epoch": 0.49387683607336785, + "grad_norm": 1.1870805025100708, + "learning_rate": 5.34174642294378e-06, + "loss": 0.3064, + "step": 24671 + }, + { + "epoch": 0.4938968545904962, + "grad_norm": 1.1253252029418945, + "learning_rate": 5.3414229973389645e-06, + "loss": 0.3013, + "step": 24672 + }, + { + "epoch": 0.49391687310762455, + "grad_norm": 1.1521403789520264, + "learning_rate": 5.341099570298882e-06, + "loss": 0.336, + "step": 24673 + }, + { + "epoch": 0.4939368916247529, + "grad_norm": 1.154049038887024, + "learning_rate": 5.34077614182489e-06, + "loss": 0.3273, + "step": 24674 + }, + { + "epoch": 0.49395691014188126, + "grad_norm": 0.9949697852134705, + "learning_rate": 5.340452711918348e-06, + "loss": 0.295, + "step": 24675 + }, + { + "epoch": 0.4939769286590096, + "grad_norm": 1.203222632408142, + "learning_rate": 5.340129280580616e-06, + "loss": 0.3485, + "step": 24676 + }, + { + "epoch": 0.4939969471761379, + "grad_norm": 1.224189043045044, + "learning_rate": 5.339805847813053e-06, + "loss": 0.3047, + "step": 24677 + }, + { + "epoch": 0.49401696569326625, + "grad_norm": 1.1026357412338257, + "learning_rate": 5.339482413617021e-06, + "loss": 0.3296, + "step": 24678 + }, + { + "epoch": 0.4940369842103946, + "grad_norm": 1.2257267236709595, + "learning_rate": 5.339158977993876e-06, + "loss": 0.324, + "step": 24679 + }, + { + "epoch": 0.49405700272752295, + "grad_norm": 0.9189408421516418, + "learning_rate": 5.338835540944981e-06, + "loss": 0.2854, + "step": 24680 + }, + { + "epoch": 0.4940770212446513, + "grad_norm": 1.1576329469680786, + "learning_rate": 5.338512102471694e-06, + "loss": 0.3646, + "step": 24681 + }, + { + "epoch": 0.49409703976177966, + "grad_norm": 1.1684666872024536, + "learning_rate": 5.338188662575376e-06, + "loss": 0.3377, + "step": 24682 + }, + { + "epoch": 0.494117058278908, + "grad_norm": 1.050447702407837, + "learning_rate": 5.337865221257384e-06, + "loss": 0.3258, + "step": 24683 + }, + { + "epoch": 0.49413707679603636, + "grad_norm": 1.0651694536209106, + "learning_rate": 5.3375417785190794e-06, + "loss": 0.3442, + "step": 24684 + }, + { + "epoch": 0.49415709531316465, + "grad_norm": 1.0303558111190796, + "learning_rate": 5.337218334361823e-06, + "loss": 0.3011, + "step": 24685 + }, + { + "epoch": 0.494177113830293, + "grad_norm": 1.2780681848526, + "learning_rate": 5.336894888786972e-06, + "loss": 0.3215, + "step": 24686 + }, + { + "epoch": 0.49419713234742135, + "grad_norm": 1.0644636154174805, + "learning_rate": 5.336571441795889e-06, + "loss": 0.3133, + "step": 24687 + }, + { + "epoch": 0.4942171508645497, + "grad_norm": 1.0843194723129272, + "learning_rate": 5.3362479933899305e-06, + "loss": 0.3521, + "step": 24688 + }, + { + "epoch": 0.49423716938167805, + "grad_norm": 1.4530845880508423, + "learning_rate": 5.3359245435704595e-06, + "loss": 0.3063, + "step": 24689 + }, + { + "epoch": 0.4942571878988064, + "grad_norm": 1.0766115188598633, + "learning_rate": 5.335601092338832e-06, + "loss": 0.2657, + "step": 24690 + }, + { + "epoch": 0.49427720641593476, + "grad_norm": 1.0496786832809448, + "learning_rate": 5.3352776396964104e-06, + "loss": 0.2863, + "step": 24691 + }, + { + "epoch": 0.4942972249330631, + "grad_norm": 1.2670798301696777, + "learning_rate": 5.334954185644554e-06, + "loss": 0.3972, + "step": 24692 + }, + { + "epoch": 0.4943172434501914, + "grad_norm": 1.2616275548934937, + "learning_rate": 5.334630730184623e-06, + "loss": 0.289, + "step": 24693 + }, + { + "epoch": 0.49433726196731975, + "grad_norm": 1.0382616519927979, + "learning_rate": 5.334307273317976e-06, + "loss": 0.329, + "step": 24694 + }, + { + "epoch": 0.4943572804844481, + "grad_norm": 1.297683835029602, + "learning_rate": 5.333983815045973e-06, + "loss": 0.3256, + "step": 24695 + }, + { + "epoch": 0.49437729900157645, + "grad_norm": 1.128790259361267, + "learning_rate": 5.333660355369973e-06, + "loss": 0.2864, + "step": 24696 + }, + { + "epoch": 0.4943973175187048, + "grad_norm": 1.0456360578536987, + "learning_rate": 5.333336894291337e-06, + "loss": 0.3141, + "step": 24697 + }, + { + "epoch": 0.49441733603583315, + "grad_norm": 1.179949164390564, + "learning_rate": 5.333013431811426e-06, + "loss": 0.3049, + "step": 24698 + }, + { + "epoch": 0.4944373545529615, + "grad_norm": 1.1713460683822632, + "learning_rate": 5.332689967931597e-06, + "loss": 0.2915, + "step": 24699 + }, + { + "epoch": 0.49445737307008986, + "grad_norm": 1.131537675857544, + "learning_rate": 5.332366502653211e-06, + "loss": 0.2813, + "step": 24700 + }, + { + "epoch": 0.49447739158721815, + "grad_norm": 1.8161622285842896, + "learning_rate": 5.332043035977628e-06, + "loss": 0.8826, + "step": 24701 + }, + { + "epoch": 0.4944974101043465, + "grad_norm": 1.085808515548706, + "learning_rate": 5.331719567906207e-06, + "loss": 0.2858, + "step": 24702 + }, + { + "epoch": 0.49451742862147485, + "grad_norm": 1.1776864528656006, + "learning_rate": 5.331396098440309e-06, + "loss": 0.3395, + "step": 24703 + }, + { + "epoch": 0.4945374471386032, + "grad_norm": 1.1802804470062256, + "learning_rate": 5.331072627581292e-06, + "loss": 0.2945, + "step": 24704 + }, + { + "epoch": 0.49455746565573155, + "grad_norm": 1.0605432987213135, + "learning_rate": 5.330749155330519e-06, + "loss": 0.2647, + "step": 24705 + }, + { + "epoch": 0.4945774841728599, + "grad_norm": 0.9575822949409485, + "learning_rate": 5.330425681689346e-06, + "loss": 0.2433, + "step": 24706 + }, + { + "epoch": 0.49459750268998826, + "grad_norm": 0.9502704739570618, + "learning_rate": 5.330102206659135e-06, + "loss": 0.2955, + "step": 24707 + }, + { + "epoch": 0.4946175212071166, + "grad_norm": 1.1310948133468628, + "learning_rate": 5.329778730241245e-06, + "loss": 0.3227, + "step": 24708 + }, + { + "epoch": 0.4946375397242449, + "grad_norm": 1.1506136655807495, + "learning_rate": 5.329455252437037e-06, + "loss": 0.3437, + "step": 24709 + }, + { + "epoch": 0.49465755824137325, + "grad_norm": 1.1294039487838745, + "learning_rate": 5.329131773247868e-06, + "loss": 0.3507, + "step": 24710 + }, + { + "epoch": 0.4946775767585016, + "grad_norm": 1.044842004776001, + "learning_rate": 5.328808292675103e-06, + "loss": 0.2897, + "step": 24711 + }, + { + "epoch": 0.49469759527562995, + "grad_norm": 1.8575729131698608, + "learning_rate": 5.328484810720096e-06, + "loss": 0.7971, + "step": 24712 + }, + { + "epoch": 0.4947176137927583, + "grad_norm": 1.028481364250183, + "learning_rate": 5.328161327384212e-06, + "loss": 0.3164, + "step": 24713 + }, + { + "epoch": 0.49473763230988665, + "grad_norm": 1.1301679611206055, + "learning_rate": 5.327837842668806e-06, + "loss": 0.309, + "step": 24714 + }, + { + "epoch": 0.494757650827015, + "grad_norm": 1.0928456783294678, + "learning_rate": 5.327514356575243e-06, + "loss": 0.3149, + "step": 24715 + }, + { + "epoch": 0.49477766934414336, + "grad_norm": 1.2001047134399414, + "learning_rate": 5.327190869104878e-06, + "loss": 0.2973, + "step": 24716 + }, + { + "epoch": 0.49479768786127165, + "grad_norm": 1.1756623983383179, + "learning_rate": 5.326867380259073e-06, + "loss": 0.2953, + "step": 24717 + }, + { + "epoch": 0.4948177063784, + "grad_norm": 1.0821428298950195, + "learning_rate": 5.326543890039188e-06, + "loss": 0.305, + "step": 24718 + }, + { + "epoch": 0.49483772489552835, + "grad_norm": 1.1086747646331787, + "learning_rate": 5.326220398446583e-06, + "loss": 0.3114, + "step": 24719 + }, + { + "epoch": 0.4948577434126567, + "grad_norm": 1.2200320959091187, + "learning_rate": 5.325896905482619e-06, + "loss": 0.3516, + "step": 24720 + }, + { + "epoch": 0.49487776192978505, + "grad_norm": 1.0903366804122925, + "learning_rate": 5.325573411148652e-06, + "loss": 0.3233, + "step": 24721 + }, + { + "epoch": 0.4948977804469134, + "grad_norm": 1.292216420173645, + "learning_rate": 5.325249915446046e-06, + "loss": 0.3014, + "step": 24722 + }, + { + "epoch": 0.49491779896404176, + "grad_norm": 1.1051504611968994, + "learning_rate": 5.324926418376158e-06, + "loss": 0.302, + "step": 24723 + }, + { + "epoch": 0.4949378174811701, + "grad_norm": 1.9166513681411743, + "learning_rate": 5.324602919940351e-06, + "loss": 0.7904, + "step": 24724 + }, + { + "epoch": 0.4949578359982984, + "grad_norm": 1.1960114240646362, + "learning_rate": 5.3242794201399835e-06, + "loss": 0.3774, + "step": 24725 + }, + { + "epoch": 0.49497785451542675, + "grad_norm": 1.1558421850204468, + "learning_rate": 5.3239559189764136e-06, + "loss": 0.3349, + "step": 24726 + }, + { + "epoch": 0.4949978730325551, + "grad_norm": 1.059778094291687, + "learning_rate": 5.323632416451004e-06, + "loss": 0.3074, + "step": 24727 + }, + { + "epoch": 0.49501789154968345, + "grad_norm": 1.1340447664260864, + "learning_rate": 5.323308912565113e-06, + "loss": 0.2964, + "step": 24728 + }, + { + "epoch": 0.4950379100668118, + "grad_norm": 1.0455533266067505, + "learning_rate": 5.3229854073200995e-06, + "loss": 0.3299, + "step": 24729 + }, + { + "epoch": 0.49505792858394015, + "grad_norm": 1.9264442920684814, + "learning_rate": 5.322661900717325e-06, + "loss": 0.7995, + "step": 24730 + }, + { + "epoch": 0.4950779471010685, + "grad_norm": 1.129820704460144, + "learning_rate": 5.3223383927581514e-06, + "loss": 0.2469, + "step": 24731 + }, + { + "epoch": 0.49509796561819686, + "grad_norm": 1.1066337823867798, + "learning_rate": 5.322014883443934e-06, + "loss": 0.3179, + "step": 24732 + }, + { + "epoch": 0.49511798413532515, + "grad_norm": 1.0339399576187134, + "learning_rate": 5.321691372776039e-06, + "loss": 0.309, + "step": 24733 + }, + { + "epoch": 0.4951380026524535, + "grad_norm": 1.041884183883667, + "learning_rate": 5.32136786075582e-06, + "loss": 0.264, + "step": 24734 + }, + { + "epoch": 0.49515802116958185, + "grad_norm": 1.0738787651062012, + "learning_rate": 5.321044347384638e-06, + "loss": 0.2812, + "step": 24735 + }, + { + "epoch": 0.4951780396867102, + "grad_norm": 1.0212091207504272, + "learning_rate": 5.320720832663859e-06, + "loss": 0.3283, + "step": 24736 + }, + { + "epoch": 0.49519805820383855, + "grad_norm": 1.7335658073425293, + "learning_rate": 5.320397316594836e-06, + "loss": 0.7698, + "step": 24737 + }, + { + "epoch": 0.4952180767209669, + "grad_norm": 1.4653161764144897, + "learning_rate": 5.320073799178932e-06, + "loss": 0.3081, + "step": 24738 + }, + { + "epoch": 0.49523809523809526, + "grad_norm": 1.2837164402008057, + "learning_rate": 5.319750280417506e-06, + "loss": 0.302, + "step": 24739 + }, + { + "epoch": 0.4952581137552236, + "grad_norm": 1.1758930683135986, + "learning_rate": 5.31942676031192e-06, + "loss": 0.2958, + "step": 24740 + }, + { + "epoch": 0.4952781322723519, + "grad_norm": 1.1492300033569336, + "learning_rate": 5.319103238863531e-06, + "loss": 0.2962, + "step": 24741 + }, + { + "epoch": 0.49529815078948025, + "grad_norm": 1.2245240211486816, + "learning_rate": 5.318779716073702e-06, + "loss": 0.3252, + "step": 24742 + }, + { + "epoch": 0.4953181693066086, + "grad_norm": 1.1809000968933105, + "learning_rate": 5.31845619194379e-06, + "loss": 0.3113, + "step": 24743 + }, + { + "epoch": 0.49533818782373695, + "grad_norm": 1.063077449798584, + "learning_rate": 5.318132666475159e-06, + "loss": 0.2409, + "step": 24744 + }, + { + "epoch": 0.4953582063408653, + "grad_norm": 1.178694486618042, + "learning_rate": 5.317809139669166e-06, + "loss": 0.3232, + "step": 24745 + }, + { + "epoch": 0.49537822485799365, + "grad_norm": 1.1347886323928833, + "learning_rate": 5.317485611527171e-06, + "loss": 0.2965, + "step": 24746 + }, + { + "epoch": 0.495398243375122, + "grad_norm": 1.0748682022094727, + "learning_rate": 5.317162082050536e-06, + "loss": 0.2985, + "step": 24747 + }, + { + "epoch": 0.49541826189225036, + "grad_norm": 1.1135832071304321, + "learning_rate": 5.316838551240617e-06, + "loss": 0.3881, + "step": 24748 + }, + { + "epoch": 0.49543828040937865, + "grad_norm": 1.2385574579238892, + "learning_rate": 5.31651501909878e-06, + "loss": 0.3691, + "step": 24749 + }, + { + "epoch": 0.495458298926507, + "grad_norm": 1.1008217334747314, + "learning_rate": 5.31619148562638e-06, + "loss": 0.3502, + "step": 24750 + }, + { + "epoch": 0.49547831744363535, + "grad_norm": 1.17833411693573, + "learning_rate": 5.3158679508247824e-06, + "loss": 0.333, + "step": 24751 + }, + { + "epoch": 0.4954983359607637, + "grad_norm": 1.2150362730026245, + "learning_rate": 5.315544414695341e-06, + "loss": 0.26, + "step": 24752 + }, + { + "epoch": 0.49551835447789205, + "grad_norm": 1.0657546520233154, + "learning_rate": 5.3152208772394196e-06, + "loss": 0.2751, + "step": 24753 + }, + { + "epoch": 0.4955383729950204, + "grad_norm": 1.9547452926635742, + "learning_rate": 5.314897338458377e-06, + "loss": 0.8538, + "step": 24754 + }, + { + "epoch": 0.49555839151214875, + "grad_norm": 1.1317445039749146, + "learning_rate": 5.314573798353573e-06, + "loss": 0.2911, + "step": 24755 + }, + { + "epoch": 0.4955784100292771, + "grad_norm": 1.8802522420883179, + "learning_rate": 5.314250256926369e-06, + "loss": 0.8036, + "step": 24756 + }, + { + "epoch": 0.4955984285464054, + "grad_norm": 1.0090011358261108, + "learning_rate": 5.313926714178126e-06, + "loss": 0.2929, + "step": 24757 + }, + { + "epoch": 0.49561844706353375, + "grad_norm": 1.0705962181091309, + "learning_rate": 5.313603170110202e-06, + "loss": 0.3087, + "step": 24758 + }, + { + "epoch": 0.4956384655806621, + "grad_norm": 1.1959315538406372, + "learning_rate": 5.313279624723957e-06, + "loss": 0.33, + "step": 24759 + }, + { + "epoch": 0.49565848409779045, + "grad_norm": 1.1666871309280396, + "learning_rate": 5.312956078020752e-06, + "loss": 0.348, + "step": 24760 + }, + { + "epoch": 0.4956785026149188, + "grad_norm": 1.5497568845748901, + "learning_rate": 5.312632530001946e-06, + "loss": 0.3009, + "step": 24761 + }, + { + "epoch": 0.49569852113204715, + "grad_norm": 1.9517980813980103, + "learning_rate": 5.312308980668902e-06, + "loss": 0.7965, + "step": 24762 + }, + { + "epoch": 0.4957185396491755, + "grad_norm": 1.135452151298523, + "learning_rate": 5.3119854300229775e-06, + "loss": 0.2909, + "step": 24763 + }, + { + "epoch": 0.49573855816630386, + "grad_norm": 1.0657894611358643, + "learning_rate": 5.311661878065535e-06, + "loss": 0.3377, + "step": 24764 + }, + { + "epoch": 0.49575857668343215, + "grad_norm": 1.1350938081741333, + "learning_rate": 5.3113383247979296e-06, + "loss": 0.2956, + "step": 24765 + }, + { + "epoch": 0.4957785952005605, + "grad_norm": 1.9097167253494263, + "learning_rate": 5.311014770221528e-06, + "loss": 0.8061, + "step": 24766 + }, + { + "epoch": 0.49579861371768885, + "grad_norm": 1.8213711977005005, + "learning_rate": 5.310691214337686e-06, + "loss": 0.817, + "step": 24767 + }, + { + "epoch": 0.4958186322348172, + "grad_norm": 1.1412230730056763, + "learning_rate": 5.310367657147765e-06, + "loss": 0.317, + "step": 24768 + }, + { + "epoch": 0.49583865075194555, + "grad_norm": 1.0468190908432007, + "learning_rate": 5.3100440986531256e-06, + "loss": 0.279, + "step": 24769 + }, + { + "epoch": 0.4958586692690739, + "grad_norm": 1.853459358215332, + "learning_rate": 5.309720538855126e-06, + "loss": 0.8072, + "step": 24770 + }, + { + "epoch": 0.49587868778620225, + "grad_norm": 1.119691014289856, + "learning_rate": 5.309396977755131e-06, + "loss": 0.3572, + "step": 24771 + }, + { + "epoch": 0.4958987063033306, + "grad_norm": 1.104768991470337, + "learning_rate": 5.309073415354495e-06, + "loss": 0.3345, + "step": 24772 + }, + { + "epoch": 0.4959187248204589, + "grad_norm": 1.1973294019699097, + "learning_rate": 5.308749851654581e-06, + "loss": 0.3215, + "step": 24773 + }, + { + "epoch": 0.49593874333758725, + "grad_norm": 1.1084414720535278, + "learning_rate": 5.308426286656749e-06, + "loss": 0.3024, + "step": 24774 + }, + { + "epoch": 0.4959587618547156, + "grad_norm": 1.0378683805465698, + "learning_rate": 5.3081027203623604e-06, + "loss": 0.2491, + "step": 24775 + }, + { + "epoch": 0.49597878037184395, + "grad_norm": 1.1555120944976807, + "learning_rate": 5.307779152772774e-06, + "loss": 0.3827, + "step": 24776 + }, + { + "epoch": 0.4959987988889723, + "grad_norm": 1.03326416015625, + "learning_rate": 5.307455583889351e-06, + "loss": 0.3225, + "step": 24777 + }, + { + "epoch": 0.49601881740610065, + "grad_norm": 1.0517728328704834, + "learning_rate": 5.307132013713451e-06, + "loss": 0.3063, + "step": 24778 + }, + { + "epoch": 0.496038835923229, + "grad_norm": 1.2749500274658203, + "learning_rate": 5.306808442246432e-06, + "loss": 0.2651, + "step": 24779 + }, + { + "epoch": 0.49605885444035736, + "grad_norm": 1.3187878131866455, + "learning_rate": 5.306484869489656e-06, + "loss": 0.3137, + "step": 24780 + }, + { + "epoch": 0.49607887295748565, + "grad_norm": 1.3026427030563354, + "learning_rate": 5.306161295444486e-06, + "loss": 0.3457, + "step": 24781 + }, + { + "epoch": 0.496098891474614, + "grad_norm": 1.197938323020935, + "learning_rate": 5.305837720112279e-06, + "loss": 0.3062, + "step": 24782 + }, + { + "epoch": 0.49611890999174235, + "grad_norm": 1.1212502717971802, + "learning_rate": 5.305514143494396e-06, + "loss": 0.2823, + "step": 24783 + }, + { + "epoch": 0.4961389285088707, + "grad_norm": 1.1330708265304565, + "learning_rate": 5.305190565592197e-06, + "loss": 0.3564, + "step": 24784 + }, + { + "epoch": 0.49615894702599905, + "grad_norm": 0.9880305528640747, + "learning_rate": 5.304866986407043e-06, + "loss": 0.2818, + "step": 24785 + }, + { + "epoch": 0.4961789655431274, + "grad_norm": 1.9100791215896606, + "learning_rate": 5.3045434059402925e-06, + "loss": 0.8078, + "step": 24786 + }, + { + "epoch": 0.49619898406025575, + "grad_norm": 1.277075171470642, + "learning_rate": 5.304219824193307e-06, + "loss": 0.2949, + "step": 24787 + }, + { + "epoch": 0.4962190025773841, + "grad_norm": 1.8581256866455078, + "learning_rate": 5.303896241167449e-06, + "loss": 0.7884, + "step": 24788 + }, + { + "epoch": 0.4962390210945124, + "grad_norm": 1.1298999786376953, + "learning_rate": 5.303572656864076e-06, + "loss": 0.3151, + "step": 24789 + }, + { + "epoch": 0.49625903961164075, + "grad_norm": 2.0023577213287354, + "learning_rate": 5.303249071284549e-06, + "loss": 0.7598, + "step": 24790 + }, + { + "epoch": 0.4962790581287691, + "grad_norm": 1.217253565788269, + "learning_rate": 5.302925484430227e-06, + "loss": 0.3251, + "step": 24791 + }, + { + "epoch": 0.49629907664589745, + "grad_norm": 1.1054719686508179, + "learning_rate": 5.302601896302472e-06, + "loss": 0.3072, + "step": 24792 + }, + { + "epoch": 0.4963190951630258, + "grad_norm": 1.074611783027649, + "learning_rate": 5.302278306902643e-06, + "loss": 0.3444, + "step": 24793 + }, + { + "epoch": 0.49633911368015415, + "grad_norm": 1.2612937688827515, + "learning_rate": 5.301954716232101e-06, + "loss": 0.2775, + "step": 24794 + }, + { + "epoch": 0.4963591321972825, + "grad_norm": 1.207456111907959, + "learning_rate": 5.301631124292208e-06, + "loss": 0.3168, + "step": 24795 + }, + { + "epoch": 0.49637915071441086, + "grad_norm": 1.17790949344635, + "learning_rate": 5.301307531084321e-06, + "loss": 0.3389, + "step": 24796 + }, + { + "epoch": 0.49639916923153915, + "grad_norm": 1.3537201881408691, + "learning_rate": 5.3009839366098035e-06, + "loss": 0.2851, + "step": 24797 + }, + { + "epoch": 0.4964191877486675, + "grad_norm": 1.2012829780578613, + "learning_rate": 5.300660340870014e-06, + "loss": 0.3157, + "step": 24798 + }, + { + "epoch": 0.49643920626579585, + "grad_norm": 1.0955829620361328, + "learning_rate": 5.300336743866312e-06, + "loss": 0.2888, + "step": 24799 + }, + { + "epoch": 0.4964592247829242, + "grad_norm": 1.1280052661895752, + "learning_rate": 5.30001314560006e-06, + "loss": 0.321, + "step": 24800 + }, + { + "epoch": 0.49647924330005255, + "grad_norm": 1.2688639163970947, + "learning_rate": 5.299689546072616e-06, + "loss": 0.3432, + "step": 24801 + }, + { + "epoch": 0.4964992618171809, + "grad_norm": 1.0420664548873901, + "learning_rate": 5.299365945285344e-06, + "loss": 0.2721, + "step": 24802 + }, + { + "epoch": 0.49651928033430925, + "grad_norm": 1.2078073024749756, + "learning_rate": 5.2990423432396e-06, + "loss": 0.2762, + "step": 24803 + }, + { + "epoch": 0.4965392988514376, + "grad_norm": 1.1943994760513306, + "learning_rate": 5.298718739936748e-06, + "loss": 0.3538, + "step": 24804 + }, + { + "epoch": 0.4965593173685659, + "grad_norm": 1.597374439239502, + "learning_rate": 5.298395135378145e-06, + "loss": 0.3513, + "step": 24805 + }, + { + "epoch": 0.49657933588569425, + "grad_norm": 1.0921891927719116, + "learning_rate": 5.298071529565153e-06, + "loss": 0.3139, + "step": 24806 + }, + { + "epoch": 0.4965993544028226, + "grad_norm": 1.119157075881958, + "learning_rate": 5.297747922499132e-06, + "loss": 0.3161, + "step": 24807 + }, + { + "epoch": 0.49661937291995095, + "grad_norm": 1.203871488571167, + "learning_rate": 5.297424314181445e-06, + "loss": 0.3122, + "step": 24808 + }, + { + "epoch": 0.4966393914370793, + "grad_norm": 1.1196975708007812, + "learning_rate": 5.2971007046134506e-06, + "loss": 0.3121, + "step": 24809 + }, + { + "epoch": 0.49665940995420765, + "grad_norm": 1.0711101293563843, + "learning_rate": 5.296777093796507e-06, + "loss": 0.3465, + "step": 24810 + }, + { + "epoch": 0.496679428471336, + "grad_norm": 1.1230084896087646, + "learning_rate": 5.296453481731977e-06, + "loss": 0.3231, + "step": 24811 + }, + { + "epoch": 0.49669944698846435, + "grad_norm": 1.3402807712554932, + "learning_rate": 5.296129868421218e-06, + "loss": 0.3424, + "step": 24812 + }, + { + "epoch": 0.49671946550559265, + "grad_norm": 1.7703039646148682, + "learning_rate": 5.2958062538655956e-06, + "loss": 0.8179, + "step": 24813 + }, + { + "epoch": 0.496739484022721, + "grad_norm": 1.0631530284881592, + "learning_rate": 5.295482638066466e-06, + "loss": 0.2745, + "step": 24814 + }, + { + "epoch": 0.49675950253984935, + "grad_norm": 1.1810051202774048, + "learning_rate": 5.2951590210251925e-06, + "loss": 0.3211, + "step": 24815 + }, + { + "epoch": 0.4967795210569777, + "grad_norm": 1.1839650869369507, + "learning_rate": 5.2948354027431325e-06, + "loss": 0.3107, + "step": 24816 + }, + { + "epoch": 0.49679953957410605, + "grad_norm": 1.9072020053863525, + "learning_rate": 5.294511783221649e-06, + "loss": 0.7663, + "step": 24817 + }, + { + "epoch": 0.4968195580912344, + "grad_norm": 1.2953652143478394, + "learning_rate": 5.294188162462101e-06, + "loss": 0.3774, + "step": 24818 + }, + { + "epoch": 0.49683957660836275, + "grad_norm": 1.2196764945983887, + "learning_rate": 5.293864540465849e-06, + "loss": 0.2901, + "step": 24819 + }, + { + "epoch": 0.4968595951254911, + "grad_norm": 1.029542088508606, + "learning_rate": 5.293540917234253e-06, + "loss": 0.2915, + "step": 24820 + }, + { + "epoch": 0.4968796136426194, + "grad_norm": 1.0783839225769043, + "learning_rate": 5.293217292768676e-06, + "loss": 0.3376, + "step": 24821 + }, + { + "epoch": 0.49689963215974775, + "grad_norm": 1.1991969347000122, + "learning_rate": 5.2928936670704754e-06, + "loss": 0.3185, + "step": 24822 + }, + { + "epoch": 0.4969196506768761, + "grad_norm": 1.4128249883651733, + "learning_rate": 5.292570040141012e-06, + "loss": 0.311, + "step": 24823 + }, + { + "epoch": 0.49693966919400445, + "grad_norm": 1.098583698272705, + "learning_rate": 5.292246411981648e-06, + "loss": 0.3214, + "step": 24824 + }, + { + "epoch": 0.4969596877111328, + "grad_norm": 1.1184923648834229, + "learning_rate": 5.291922782593742e-06, + "loss": 0.3101, + "step": 24825 + }, + { + "epoch": 0.49697970622826115, + "grad_norm": 1.0334199666976929, + "learning_rate": 5.291599151978658e-06, + "loss": 0.3244, + "step": 24826 + }, + { + "epoch": 0.4969997247453895, + "grad_norm": 1.148675799369812, + "learning_rate": 5.291275520137752e-06, + "loss": 0.3256, + "step": 24827 + }, + { + "epoch": 0.49701974326251785, + "grad_norm": 1.0315945148468018, + "learning_rate": 5.290951887072388e-06, + "loss": 0.2923, + "step": 24828 + }, + { + "epoch": 0.49703976177964615, + "grad_norm": 2.047868251800537, + "learning_rate": 5.2906282527839224e-06, + "loss": 0.7782, + "step": 24829 + }, + { + "epoch": 0.4970597802967745, + "grad_norm": 1.177822232246399, + "learning_rate": 5.2903046172737205e-06, + "loss": 0.3259, + "step": 24830 + }, + { + "epoch": 0.49707979881390285, + "grad_norm": 1.9293341636657715, + "learning_rate": 5.289980980543139e-06, + "loss": 0.8199, + "step": 24831 + }, + { + "epoch": 0.4970998173310312, + "grad_norm": 1.1672146320343018, + "learning_rate": 5.28965734259354e-06, + "loss": 0.2983, + "step": 24832 + }, + { + "epoch": 0.49711983584815955, + "grad_norm": 1.0797057151794434, + "learning_rate": 5.289333703426285e-06, + "loss": 0.3734, + "step": 24833 + }, + { + "epoch": 0.4971398543652879, + "grad_norm": 1.9814085960388184, + "learning_rate": 5.289010063042732e-06, + "loss": 0.8316, + "step": 24834 + }, + { + "epoch": 0.49715987288241625, + "grad_norm": 1.1376276016235352, + "learning_rate": 5.288686421444244e-06, + "loss": 0.3001, + "step": 24835 + }, + { + "epoch": 0.4971798913995446, + "grad_norm": 1.2362806797027588, + "learning_rate": 5.288362778632179e-06, + "loss": 0.3199, + "step": 24836 + }, + { + "epoch": 0.4971999099166729, + "grad_norm": 1.1236225366592407, + "learning_rate": 5.288039134607901e-06, + "loss": 0.276, + "step": 24837 + }, + { + "epoch": 0.49721992843380125, + "grad_norm": 1.156025767326355, + "learning_rate": 5.287715489372766e-06, + "loss": 0.34, + "step": 24838 + }, + { + "epoch": 0.4972399469509296, + "grad_norm": 1.1862778663635254, + "learning_rate": 5.287391842928138e-06, + "loss": 0.3229, + "step": 24839 + }, + { + "epoch": 0.49725996546805795, + "grad_norm": 1.0505475997924805, + "learning_rate": 5.287068195275378e-06, + "loss": 0.2976, + "step": 24840 + }, + { + "epoch": 0.4972799839851863, + "grad_norm": 1.2298190593719482, + "learning_rate": 5.2867445464158425e-06, + "loss": 0.3174, + "step": 24841 + }, + { + "epoch": 0.49730000250231465, + "grad_norm": 1.0984784364700317, + "learning_rate": 5.286420896350898e-06, + "loss": 0.3265, + "step": 24842 + }, + { + "epoch": 0.497320021019443, + "grad_norm": 1.1340014934539795, + "learning_rate": 5.286097245081898e-06, + "loss": 0.3467, + "step": 24843 + }, + { + "epoch": 0.49734003953657135, + "grad_norm": 1.1029235124588013, + "learning_rate": 5.285773592610209e-06, + "loss": 0.3454, + "step": 24844 + }, + { + "epoch": 0.49736005805369965, + "grad_norm": 1.9337557554244995, + "learning_rate": 5.2854499389371885e-06, + "loss": 0.7777, + "step": 24845 + }, + { + "epoch": 0.497380076570828, + "grad_norm": 1.14785897731781, + "learning_rate": 5.285126284064198e-06, + "loss": 0.296, + "step": 24846 + }, + { + "epoch": 0.49740009508795635, + "grad_norm": 1.165901780128479, + "learning_rate": 5.284802627992598e-06, + "loss": 0.3182, + "step": 24847 + }, + { + "epoch": 0.4974201136050847, + "grad_norm": 1.0792005062103271, + "learning_rate": 5.2844789707237495e-06, + "loss": 0.3403, + "step": 24848 + }, + { + "epoch": 0.49744013212221305, + "grad_norm": 1.182032585144043, + "learning_rate": 5.2841553122590115e-06, + "loss": 0.3118, + "step": 24849 + }, + { + "epoch": 0.4974601506393414, + "grad_norm": 1.3021117448806763, + "learning_rate": 5.283831652599748e-06, + "loss": 0.3121, + "step": 24850 + }, + { + "epoch": 0.49748016915646975, + "grad_norm": 1.043323278427124, + "learning_rate": 5.283507991747315e-06, + "loss": 0.3288, + "step": 24851 + }, + { + "epoch": 0.4975001876735981, + "grad_norm": 1.145735263824463, + "learning_rate": 5.283184329703076e-06, + "loss": 0.3347, + "step": 24852 + }, + { + "epoch": 0.4975202061907264, + "grad_norm": 1.0316816568374634, + "learning_rate": 5.282860666468392e-06, + "loss": 0.31, + "step": 24853 + }, + { + "epoch": 0.49754022470785475, + "grad_norm": 1.081318736076355, + "learning_rate": 5.282537002044621e-06, + "loss": 0.3163, + "step": 24854 + }, + { + "epoch": 0.4975602432249831, + "grad_norm": 1.826902985572815, + "learning_rate": 5.282213336433127e-06, + "loss": 0.7583, + "step": 24855 + }, + { + "epoch": 0.49758026174211145, + "grad_norm": 1.1042585372924805, + "learning_rate": 5.281889669635267e-06, + "loss": 0.3269, + "step": 24856 + }, + { + "epoch": 0.4976002802592398, + "grad_norm": 1.1652032136917114, + "learning_rate": 5.281566001652405e-06, + "loss": 0.3229, + "step": 24857 + }, + { + "epoch": 0.49762029877636815, + "grad_norm": 1.1386185884475708, + "learning_rate": 5.281242332485899e-06, + "loss": 0.311, + "step": 24858 + }, + { + "epoch": 0.4976403172934965, + "grad_norm": 1.1540119647979736, + "learning_rate": 5.2809186621371125e-06, + "loss": 0.3113, + "step": 24859 + }, + { + "epoch": 0.49766033581062485, + "grad_norm": 1.068377137184143, + "learning_rate": 5.280594990607403e-06, + "loss": 0.2735, + "step": 24860 + }, + { + "epoch": 0.49768035432775315, + "grad_norm": 1.0877522230148315, + "learning_rate": 5.280271317898132e-06, + "loss": 0.3087, + "step": 24861 + }, + { + "epoch": 0.4977003728448815, + "grad_norm": 0.9842978715896606, + "learning_rate": 5.279947644010664e-06, + "loss": 0.2889, + "step": 24862 + }, + { + "epoch": 0.49772039136200985, + "grad_norm": 1.144879937171936, + "learning_rate": 5.279623968946353e-06, + "loss": 0.2847, + "step": 24863 + }, + { + "epoch": 0.4977404098791382, + "grad_norm": 1.2565386295318604, + "learning_rate": 5.279300292706563e-06, + "loss": 0.2925, + "step": 24864 + }, + { + "epoch": 0.49776042839626655, + "grad_norm": 2.0660762786865234, + "learning_rate": 5.278976615292654e-06, + "loss": 0.7974, + "step": 24865 + }, + { + "epoch": 0.4977804469133949, + "grad_norm": 1.0726181268692017, + "learning_rate": 5.2786529367059904e-06, + "loss": 0.3078, + "step": 24866 + }, + { + "epoch": 0.49780046543052325, + "grad_norm": 1.9504671096801758, + "learning_rate": 5.278329256947928e-06, + "loss": 0.769, + "step": 24867 + }, + { + "epoch": 0.4978204839476516, + "grad_norm": 1.1006234884262085, + "learning_rate": 5.27800557601983e-06, + "loss": 0.2838, + "step": 24868 + }, + { + "epoch": 0.4978405024647799, + "grad_norm": 1.1253312826156616, + "learning_rate": 5.277681893923056e-06, + "loss": 0.3393, + "step": 24869 + }, + { + "epoch": 0.49786052098190825, + "grad_norm": 1.0673774480819702, + "learning_rate": 5.277358210658966e-06, + "loss": 0.2993, + "step": 24870 + }, + { + "epoch": 0.4978805394990366, + "grad_norm": 1.2068631649017334, + "learning_rate": 5.277034526228924e-06, + "loss": 0.3543, + "step": 24871 + }, + { + "epoch": 0.49790055801616495, + "grad_norm": 1.15134859085083, + "learning_rate": 5.276710840634287e-06, + "loss": 0.3213, + "step": 24872 + }, + { + "epoch": 0.4979205765332933, + "grad_norm": 1.1864509582519531, + "learning_rate": 5.276387153876418e-06, + "loss": 0.3076, + "step": 24873 + }, + { + "epoch": 0.49794059505042165, + "grad_norm": 1.1801730394363403, + "learning_rate": 5.2760634659566755e-06, + "loss": 0.3182, + "step": 24874 + }, + { + "epoch": 0.49796061356755, + "grad_norm": 1.1120448112487793, + "learning_rate": 5.275739776876424e-06, + "loss": 0.312, + "step": 24875 + }, + { + "epoch": 0.49798063208467835, + "grad_norm": 1.2059787511825562, + "learning_rate": 5.27541608663702e-06, + "loss": 0.3259, + "step": 24876 + }, + { + "epoch": 0.49800065060180665, + "grad_norm": 1.041070580482483, + "learning_rate": 5.275092395239826e-06, + "loss": 0.3134, + "step": 24877 + }, + { + "epoch": 0.498020669118935, + "grad_norm": 1.1799780130386353, + "learning_rate": 5.274768702686202e-06, + "loss": 0.3429, + "step": 24878 + }, + { + "epoch": 0.49804068763606335, + "grad_norm": 1.040431022644043, + "learning_rate": 5.274445008977511e-06, + "loss": 0.3255, + "step": 24879 + }, + { + "epoch": 0.4980607061531917, + "grad_norm": 1.1491917371749878, + "learning_rate": 5.274121314115112e-06, + "loss": 0.3268, + "step": 24880 + }, + { + "epoch": 0.49808072467032005, + "grad_norm": 1.8436130285263062, + "learning_rate": 5.273797618100366e-06, + "loss": 0.7404, + "step": 24881 + }, + { + "epoch": 0.4981007431874484, + "grad_norm": 1.1832882165908813, + "learning_rate": 5.273473920934633e-06, + "loss": 0.3147, + "step": 24882 + }, + { + "epoch": 0.49812076170457675, + "grad_norm": 1.276059865951538, + "learning_rate": 5.273150222619276e-06, + "loss": 0.2967, + "step": 24883 + }, + { + "epoch": 0.4981407802217051, + "grad_norm": 1.0651488304138184, + "learning_rate": 5.272826523155653e-06, + "loss": 0.2935, + "step": 24884 + }, + { + "epoch": 0.4981607987388334, + "grad_norm": 1.250198245048523, + "learning_rate": 5.272502822545126e-06, + "loss": 0.2794, + "step": 24885 + }, + { + "epoch": 0.49818081725596175, + "grad_norm": 1.9394001960754395, + "learning_rate": 5.272179120789056e-06, + "loss": 0.8172, + "step": 24886 + }, + { + "epoch": 0.4982008357730901, + "grad_norm": 1.0758202075958252, + "learning_rate": 5.271855417888803e-06, + "loss": 0.3199, + "step": 24887 + }, + { + "epoch": 0.49822085429021845, + "grad_norm": 1.0323048830032349, + "learning_rate": 5.27153171384573e-06, + "loss": 0.2992, + "step": 24888 + }, + { + "epoch": 0.4982408728073468, + "grad_norm": 0.9989657402038574, + "learning_rate": 5.271208008661195e-06, + "loss": 0.3057, + "step": 24889 + }, + { + "epoch": 0.49826089132447515, + "grad_norm": 1.0909186601638794, + "learning_rate": 5.27088430233656e-06, + "loss": 0.3089, + "step": 24890 + }, + { + "epoch": 0.4982809098416035, + "grad_norm": 1.0262506008148193, + "learning_rate": 5.270560594873185e-06, + "loss": 0.3492, + "step": 24891 + }, + { + "epoch": 0.49830092835873185, + "grad_norm": 1.0728586912155151, + "learning_rate": 5.270236886272433e-06, + "loss": 0.312, + "step": 24892 + }, + { + "epoch": 0.49832094687586015, + "grad_norm": 1.2193092107772827, + "learning_rate": 5.2699131765356624e-06, + "loss": 0.3333, + "step": 24893 + }, + { + "epoch": 0.4983409653929885, + "grad_norm": 2.151153087615967, + "learning_rate": 5.2695894656642345e-06, + "loss": 0.8249, + "step": 24894 + }, + { + "epoch": 0.49836098391011685, + "grad_norm": 1.1491085290908813, + "learning_rate": 5.26926575365951e-06, + "loss": 0.2902, + "step": 24895 + }, + { + "epoch": 0.4983810024272452, + "grad_norm": 1.1801927089691162, + "learning_rate": 5.268942040522852e-06, + "loss": 0.3202, + "step": 24896 + }, + { + "epoch": 0.49840102094437355, + "grad_norm": 1.8610607385635376, + "learning_rate": 5.268618326255619e-06, + "loss": 0.7572, + "step": 24897 + }, + { + "epoch": 0.4984210394615019, + "grad_norm": 1.1396667957305908, + "learning_rate": 5.2682946108591715e-06, + "loss": 0.3401, + "step": 24898 + }, + { + "epoch": 0.49844105797863025, + "grad_norm": 0.9862982630729675, + "learning_rate": 5.267970894334873e-06, + "loss": 0.2625, + "step": 24899 + }, + { + "epoch": 0.4984610764957586, + "grad_norm": 1.0397424697875977, + "learning_rate": 5.26764717668408e-06, + "loss": 0.2974, + "step": 24900 + }, + { + "epoch": 0.4984810950128869, + "grad_norm": 1.0514612197875977, + "learning_rate": 5.267323457908158e-06, + "loss": 0.3006, + "step": 24901 + }, + { + "epoch": 0.49850111353001525, + "grad_norm": 1.0836790800094604, + "learning_rate": 5.266999738008465e-06, + "loss": 0.3469, + "step": 24902 + }, + { + "epoch": 0.4985211320471436, + "grad_norm": 2.0491061210632324, + "learning_rate": 5.2666760169863615e-06, + "loss": 0.7843, + "step": 24903 + }, + { + "epoch": 0.49854115056427195, + "grad_norm": 1.8681687116622925, + "learning_rate": 5.266352294843211e-06, + "loss": 0.7503, + "step": 24904 + }, + { + "epoch": 0.4985611690814003, + "grad_norm": 1.146941065788269, + "learning_rate": 5.266028571580371e-06, + "loss": 0.2841, + "step": 24905 + }, + { + "epoch": 0.49858118759852865, + "grad_norm": 1.013414978981018, + "learning_rate": 5.265704847199207e-06, + "loss": 0.289, + "step": 24906 + }, + { + "epoch": 0.498601206115657, + "grad_norm": 1.0860649347305298, + "learning_rate": 5.265381121701074e-06, + "loss": 0.3165, + "step": 24907 + }, + { + "epoch": 0.49862122463278535, + "grad_norm": 1.3796671628952026, + "learning_rate": 5.265057395087337e-06, + "loss": 0.2969, + "step": 24908 + }, + { + "epoch": 0.49864124314991365, + "grad_norm": 1.0685518980026245, + "learning_rate": 5.2647336673593554e-06, + "loss": 0.304, + "step": 24909 + }, + { + "epoch": 0.498661261667042, + "grad_norm": 1.1166304349899292, + "learning_rate": 5.264409938518491e-06, + "loss": 0.2882, + "step": 24910 + }, + { + "epoch": 0.49868128018417035, + "grad_norm": 1.0985760688781738, + "learning_rate": 5.2640862085661035e-06, + "loss": 0.3315, + "step": 24911 + }, + { + "epoch": 0.4987012987012987, + "grad_norm": 1.0783616304397583, + "learning_rate": 5.2637624775035565e-06, + "loss": 0.3186, + "step": 24912 + }, + { + "epoch": 0.49872131721842705, + "grad_norm": 1.9866992235183716, + "learning_rate": 5.263438745332208e-06, + "loss": 0.8361, + "step": 24913 + }, + { + "epoch": 0.4987413357355554, + "grad_norm": 1.5405186414718628, + "learning_rate": 5.2631150120534184e-06, + "loss": 0.3501, + "step": 24914 + }, + { + "epoch": 0.49876135425268375, + "grad_norm": 1.1479101181030273, + "learning_rate": 5.26279127766855e-06, + "loss": 0.361, + "step": 24915 + }, + { + "epoch": 0.4987813727698121, + "grad_norm": 1.0810185670852661, + "learning_rate": 5.262467542178964e-06, + "loss": 0.3444, + "step": 24916 + }, + { + "epoch": 0.4988013912869404, + "grad_norm": 1.1667753458023071, + "learning_rate": 5.262143805586021e-06, + "loss": 0.3169, + "step": 24917 + }, + { + "epoch": 0.49882140980406875, + "grad_norm": 1.1033687591552734, + "learning_rate": 5.2618200678910814e-06, + "loss": 0.2946, + "step": 24918 + }, + { + "epoch": 0.4988414283211971, + "grad_norm": 1.0158483982086182, + "learning_rate": 5.261496329095508e-06, + "loss": 0.2788, + "step": 24919 + }, + { + "epoch": 0.49886144683832545, + "grad_norm": 1.0996623039245605, + "learning_rate": 5.261172589200658e-06, + "loss": 0.3161, + "step": 24920 + }, + { + "epoch": 0.4988814653554538, + "grad_norm": 1.1140711307525635, + "learning_rate": 5.2608488482078965e-06, + "loss": 0.3249, + "step": 24921 + }, + { + "epoch": 0.49890148387258215, + "grad_norm": 0.9999697208404541, + "learning_rate": 5.260525106118581e-06, + "loss": 0.2984, + "step": 24922 + }, + { + "epoch": 0.4989215023897105, + "grad_norm": 1.1928441524505615, + "learning_rate": 5.260201362934076e-06, + "loss": 0.3604, + "step": 24923 + }, + { + "epoch": 0.49894152090683885, + "grad_norm": 1.1491422653198242, + "learning_rate": 5.25987761865574e-06, + "loss": 0.3228, + "step": 24924 + }, + { + "epoch": 0.49896153942396715, + "grad_norm": 1.086130976676941, + "learning_rate": 5.259553873284934e-06, + "loss": 0.2698, + "step": 24925 + }, + { + "epoch": 0.4989815579410955, + "grad_norm": 1.1684277057647705, + "learning_rate": 5.259230126823019e-06, + "loss": 0.2916, + "step": 24926 + }, + { + "epoch": 0.49900157645822385, + "grad_norm": 1.124587059020996, + "learning_rate": 5.2589063792713564e-06, + "loss": 0.2945, + "step": 24927 + }, + { + "epoch": 0.4990215949753522, + "grad_norm": 1.2412611246109009, + "learning_rate": 5.258582630631308e-06, + "loss": 0.2648, + "step": 24928 + }, + { + "epoch": 0.49904161349248055, + "grad_norm": 1.121218204498291, + "learning_rate": 5.258258880904232e-06, + "loss": 0.317, + "step": 24929 + }, + { + "epoch": 0.4990616320096089, + "grad_norm": 1.1149306297302246, + "learning_rate": 5.257935130091494e-06, + "loss": 0.2826, + "step": 24930 + }, + { + "epoch": 0.49908165052673725, + "grad_norm": 1.1490856409072876, + "learning_rate": 5.257611378194451e-06, + "loss": 0.3385, + "step": 24931 + }, + { + "epoch": 0.4991016690438656, + "grad_norm": 1.1068401336669922, + "learning_rate": 5.2572876252144644e-06, + "loss": 0.3067, + "step": 24932 + }, + { + "epoch": 0.4991216875609939, + "grad_norm": 1.177515983581543, + "learning_rate": 5.256963871152896e-06, + "loss": 0.292, + "step": 24933 + }, + { + "epoch": 0.49914170607812225, + "grad_norm": 1.0987498760223389, + "learning_rate": 5.256640116011107e-06, + "loss": 0.3224, + "step": 24934 + }, + { + "epoch": 0.4991617245952506, + "grad_norm": 1.064842700958252, + "learning_rate": 5.2563163597904585e-06, + "loss": 0.2766, + "step": 24935 + }, + { + "epoch": 0.49918174311237895, + "grad_norm": 1.113845944404602, + "learning_rate": 5.255992602492311e-06, + "loss": 0.3309, + "step": 24936 + }, + { + "epoch": 0.4992017616295073, + "grad_norm": 0.9957998991012573, + "learning_rate": 5.255668844118027e-06, + "loss": 0.2848, + "step": 24937 + }, + { + "epoch": 0.49922178014663565, + "grad_norm": 1.121642827987671, + "learning_rate": 5.2553450846689646e-06, + "loss": 0.2894, + "step": 24938 + }, + { + "epoch": 0.499241798663764, + "grad_norm": 1.2008256912231445, + "learning_rate": 5.255021324146488e-06, + "loss": 0.3366, + "step": 24939 + }, + { + "epoch": 0.4992618171808923, + "grad_norm": 1.340238094329834, + "learning_rate": 5.254697562551955e-06, + "loss": 0.3158, + "step": 24940 + }, + { + "epoch": 0.49928183569802065, + "grad_norm": 1.8257662057876587, + "learning_rate": 5.254373799886729e-06, + "loss": 0.8251, + "step": 24941 + }, + { + "epoch": 0.499301854215149, + "grad_norm": 1.0715070962905884, + "learning_rate": 5.2540500361521684e-06, + "loss": 0.301, + "step": 24942 + }, + { + "epoch": 0.49932187273227735, + "grad_norm": 1.0339218378067017, + "learning_rate": 5.2537262713496386e-06, + "loss": 0.2587, + "step": 24943 + }, + { + "epoch": 0.4993418912494057, + "grad_norm": 1.111744999885559, + "learning_rate": 5.2534025054804985e-06, + "loss": 0.316, + "step": 24944 + }, + { + "epoch": 0.49936190976653405, + "grad_norm": 1.231610894203186, + "learning_rate": 5.253078738546107e-06, + "loss": 0.3485, + "step": 24945 + }, + { + "epoch": 0.4993819282836624, + "grad_norm": 1.858683466911316, + "learning_rate": 5.252754970547827e-06, + "loss": 0.7967, + "step": 24946 + }, + { + "epoch": 0.49940194680079075, + "grad_norm": 1.1000908613204956, + "learning_rate": 5.252431201487019e-06, + "loss": 0.2831, + "step": 24947 + }, + { + "epoch": 0.49942196531791905, + "grad_norm": 1.1304931640625, + "learning_rate": 5.252107431365046e-06, + "loss": 0.3426, + "step": 24948 + }, + { + "epoch": 0.4994419838350474, + "grad_norm": 1.1247209310531616, + "learning_rate": 5.251783660183267e-06, + "loss": 0.2946, + "step": 24949 + }, + { + "epoch": 0.49946200235217575, + "grad_norm": 1.162566065788269, + "learning_rate": 5.2514598879430445e-06, + "loss": 0.2888, + "step": 24950 + }, + { + "epoch": 0.4994820208693041, + "grad_norm": 1.8979763984680176, + "learning_rate": 5.251136114645739e-06, + "loss": 0.8919, + "step": 24951 + }, + { + "epoch": 0.49950203938643245, + "grad_norm": 1.0624443292617798, + "learning_rate": 5.25081234029271e-06, + "loss": 0.3075, + "step": 24952 + }, + { + "epoch": 0.4995220579035608, + "grad_norm": 1.0734788179397583, + "learning_rate": 5.25048856488532e-06, + "loss": 0.3112, + "step": 24953 + }, + { + "epoch": 0.49954207642068915, + "grad_norm": 1.191178321838379, + "learning_rate": 5.250164788424931e-06, + "loss": 0.3094, + "step": 24954 + }, + { + "epoch": 0.4995620949378175, + "grad_norm": 1.051900863647461, + "learning_rate": 5.249841010912903e-06, + "loss": 0.2923, + "step": 24955 + }, + { + "epoch": 0.4995821134549458, + "grad_norm": 1.0566219091415405, + "learning_rate": 5.249517232350595e-06, + "loss": 0.3072, + "step": 24956 + }, + { + "epoch": 0.49960213197207415, + "grad_norm": 1.2266291379928589, + "learning_rate": 5.2491934527393726e-06, + "loss": 0.3625, + "step": 24957 + }, + { + "epoch": 0.4996221504892025, + "grad_norm": 1.010859727859497, + "learning_rate": 5.2488696720805934e-06, + "loss": 0.3607, + "step": 24958 + }, + { + "epoch": 0.49964216900633085, + "grad_norm": 1.1195951700210571, + "learning_rate": 5.24854589037562e-06, + "loss": 0.308, + "step": 24959 + }, + { + "epoch": 0.4996621875234592, + "grad_norm": 1.013061285018921, + "learning_rate": 5.248222107625812e-06, + "loss": 0.343, + "step": 24960 + }, + { + "epoch": 0.49968220604058755, + "grad_norm": 1.7568762302398682, + "learning_rate": 5.247898323832534e-06, + "loss": 0.8066, + "step": 24961 + }, + { + "epoch": 0.4997022245577159, + "grad_norm": 1.1111419200897217, + "learning_rate": 5.247574538997143e-06, + "loss": 0.2866, + "step": 24962 + }, + { + "epoch": 0.49972224307484425, + "grad_norm": 1.0597878694534302, + "learning_rate": 5.247250753121004e-06, + "loss": 0.3011, + "step": 24963 + }, + { + "epoch": 0.49974226159197255, + "grad_norm": 1.115969181060791, + "learning_rate": 5.2469269662054736e-06, + "loss": 0.3248, + "step": 24964 + }, + { + "epoch": 0.4997622801091009, + "grad_norm": 1.0928668975830078, + "learning_rate": 5.246603178251918e-06, + "loss": 0.3297, + "step": 24965 + }, + { + "epoch": 0.49978229862622925, + "grad_norm": 1.0433557033538818, + "learning_rate": 5.246279389261692e-06, + "loss": 0.2973, + "step": 24966 + }, + { + "epoch": 0.4998023171433576, + "grad_norm": 1.2896474599838257, + "learning_rate": 5.245955599236164e-06, + "loss": 0.3163, + "step": 24967 + }, + { + "epoch": 0.49982233566048595, + "grad_norm": 1.1064817905426025, + "learning_rate": 5.24563180817669e-06, + "loss": 0.316, + "step": 24968 + }, + { + "epoch": 0.4998423541776143, + "grad_norm": 1.0062228441238403, + "learning_rate": 5.245308016084633e-06, + "loss": 0.3047, + "step": 24969 + }, + { + "epoch": 0.49986237269474265, + "grad_norm": 1.1613174676895142, + "learning_rate": 5.244984222961353e-06, + "loss": 0.2897, + "step": 24970 + }, + { + "epoch": 0.499882391211871, + "grad_norm": 1.1967693567276, + "learning_rate": 5.244660428808213e-06, + "loss": 0.3035, + "step": 24971 + }, + { + "epoch": 0.4999024097289993, + "grad_norm": 1.2907123565673828, + "learning_rate": 5.244336633626573e-06, + "loss": 0.3673, + "step": 24972 + }, + { + "epoch": 0.49992242824612765, + "grad_norm": 1.0403791666030884, + "learning_rate": 5.244012837417793e-06, + "loss": 0.3173, + "step": 24973 + }, + { + "epoch": 0.499942446763256, + "grad_norm": 1.1464914083480835, + "learning_rate": 5.243689040183238e-06, + "loss": 0.3234, + "step": 24974 + }, + { + "epoch": 0.49996246528038435, + "grad_norm": 1.0723148584365845, + "learning_rate": 5.2433652419242655e-06, + "loss": 0.3016, + "step": 24975 + }, + { + "epoch": 0.4999824837975127, + "grad_norm": 0.9975376129150391, + "learning_rate": 5.24304144264224e-06, + "loss": 0.2767, + "step": 24976 + }, + { + "epoch": 0.500002502314641, + "grad_norm": 1.087916374206543, + "learning_rate": 5.2427176423385185e-06, + "loss": 0.2855, + "step": 24977 + }, + { + "epoch": 0.5000225208317693, + "grad_norm": 1.4825628995895386, + "learning_rate": 5.242393841014464e-06, + "loss": 0.3613, + "step": 24978 + }, + { + "epoch": 0.5000425393488978, + "grad_norm": 1.117910623550415, + "learning_rate": 5.242070038671437e-06, + "loss": 0.351, + "step": 24979 + }, + { + "epoch": 0.500062557866026, + "grad_norm": 0.952975869178772, + "learning_rate": 5.2417462353108e-06, + "loss": 0.2724, + "step": 24980 + }, + { + "epoch": 0.5000825763831545, + "grad_norm": 1.0593698024749756, + "learning_rate": 5.241422430933917e-06, + "loss": 0.3172, + "step": 24981 + }, + { + "epoch": 0.5001025949002827, + "grad_norm": 1.211763620376587, + "learning_rate": 5.2410986255421435e-06, + "loss": 0.3251, + "step": 24982 + }, + { + "epoch": 0.5001226134174112, + "grad_norm": 1.0968040227890015, + "learning_rate": 5.240774819136843e-06, + "loss": 0.3006, + "step": 24983 + }, + { + "epoch": 0.5001426319345395, + "grad_norm": 1.102608323097229, + "learning_rate": 5.240451011719379e-06, + "loss": 0.2875, + "step": 24984 + }, + { + "epoch": 0.5001626504516677, + "grad_norm": 1.25043523311615, + "learning_rate": 5.240127203291108e-06, + "loss": 0.3378, + "step": 24985 + }, + { + "epoch": 0.5001826689687962, + "grad_norm": 0.9925844073295593, + "learning_rate": 5.239803393853397e-06, + "loss": 0.297, + "step": 24986 + }, + { + "epoch": 0.5002026874859244, + "grad_norm": 1.9387233257293701, + "learning_rate": 5.2394795834076004e-06, + "loss": 0.8045, + "step": 24987 + }, + { + "epoch": 0.5002227060030529, + "grad_norm": 1.041316032409668, + "learning_rate": 5.239155771955087e-06, + "loss": 0.3166, + "step": 24988 + }, + { + "epoch": 0.5002427245201811, + "grad_norm": 1.756335735321045, + "learning_rate": 5.238831959497212e-06, + "loss": 0.721, + "step": 24989 + }, + { + "epoch": 0.5002627430373096, + "grad_norm": 1.0533435344696045, + "learning_rate": 5.2385081460353395e-06, + "loss": 0.2696, + "step": 24990 + }, + { + "epoch": 0.5002827615544378, + "grad_norm": 1.2825640439987183, + "learning_rate": 5.238184331570829e-06, + "loss": 0.3016, + "step": 24991 + }, + { + "epoch": 0.5003027800715661, + "grad_norm": 1.1892280578613281, + "learning_rate": 5.237860516105044e-06, + "loss": 0.3376, + "step": 24992 + }, + { + "epoch": 0.5003227985886946, + "grad_norm": 1.9678326845169067, + "learning_rate": 5.237536699639343e-06, + "loss": 0.7946, + "step": 24993 + }, + { + "epoch": 0.5003428171058228, + "grad_norm": 1.978553056716919, + "learning_rate": 5.237212882175091e-06, + "loss": 0.8294, + "step": 24994 + }, + { + "epoch": 0.5003628356229513, + "grad_norm": 1.0387470722198486, + "learning_rate": 5.236889063713646e-06, + "loss": 0.3118, + "step": 24995 + }, + { + "epoch": 0.5003828541400795, + "grad_norm": 1.2811583280563354, + "learning_rate": 5.236565244256371e-06, + "loss": 0.3091, + "step": 24996 + }, + { + "epoch": 0.500402872657208, + "grad_norm": 1.2171587944030762, + "learning_rate": 5.236241423804627e-06, + "loss": 0.3094, + "step": 24997 + }, + { + "epoch": 0.5004228911743362, + "grad_norm": 1.2224265336990356, + "learning_rate": 5.2359176023597716e-06, + "loss": 0.3436, + "step": 24998 + }, + { + "epoch": 0.5004429096914647, + "grad_norm": 1.1277718544006348, + "learning_rate": 5.235593779923173e-06, + "loss": 0.3142, + "step": 24999 + }, + { + "epoch": 0.500462928208593, + "grad_norm": 1.0398602485656738, + "learning_rate": 5.235269956496187e-06, + "loss": 0.2901, + "step": 25000 + }, + { + "epoch": 0.5004829467257212, + "grad_norm": 1.1386536359786987, + "learning_rate": 5.234946132080177e-06, + "loss": 0.3182, + "step": 25001 + }, + { + "epoch": 0.5005029652428497, + "grad_norm": 1.141409993171692, + "learning_rate": 5.234622306676504e-06, + "loss": 0.2739, + "step": 25002 + }, + { + "epoch": 0.500522983759978, + "grad_norm": 0.9929104447364807, + "learning_rate": 5.234298480286531e-06, + "loss": 0.275, + "step": 25003 + }, + { + "epoch": 0.5005430022771064, + "grad_norm": 1.1565214395523071, + "learning_rate": 5.233974652911617e-06, + "loss": 0.3232, + "step": 25004 + }, + { + "epoch": 0.5005630207942346, + "grad_norm": 1.0584590435028076, + "learning_rate": 5.233650824553122e-06, + "loss": 0.3171, + "step": 25005 + }, + { + "epoch": 0.500583039311363, + "grad_norm": 1.1431095600128174, + "learning_rate": 5.23332699521241e-06, + "loss": 0.3121, + "step": 25006 + }, + { + "epoch": 0.5006030578284913, + "grad_norm": 0.9824585914611816, + "learning_rate": 5.233003164890842e-06, + "loss": 0.2703, + "step": 25007 + }, + { + "epoch": 0.5006230763456196, + "grad_norm": 1.0934326648712158, + "learning_rate": 5.23267933358978e-06, + "loss": 0.282, + "step": 25008 + }, + { + "epoch": 0.500643094862748, + "grad_norm": 1.094812273979187, + "learning_rate": 5.232355501310582e-06, + "loss": 0.2983, + "step": 25009 + }, + { + "epoch": 0.5006631133798763, + "grad_norm": 1.1538201570510864, + "learning_rate": 5.2320316680546135e-06, + "loss": 0.3608, + "step": 25010 + }, + { + "epoch": 0.5006831318970048, + "grad_norm": 1.03367280960083, + "learning_rate": 5.231707833823231e-06, + "loss": 0.2966, + "step": 25011 + }, + { + "epoch": 0.500703150414133, + "grad_norm": 1.1737180948257446, + "learning_rate": 5.2313839986178015e-06, + "loss": 0.2805, + "step": 25012 + }, + { + "epoch": 0.5007231689312615, + "grad_norm": 1.0399415493011475, + "learning_rate": 5.231060162439682e-06, + "loss": 0.2814, + "step": 25013 + }, + { + "epoch": 0.5007431874483897, + "grad_norm": 1.0832993984222412, + "learning_rate": 5.230736325290235e-06, + "loss": 0.3177, + "step": 25014 + }, + { + "epoch": 0.5007632059655182, + "grad_norm": 1.1687923669815063, + "learning_rate": 5.230412487170823e-06, + "loss": 0.3562, + "step": 25015 + }, + { + "epoch": 0.5007832244826464, + "grad_norm": 1.164257526397705, + "learning_rate": 5.230088648082807e-06, + "loss": 0.2607, + "step": 25016 + }, + { + "epoch": 0.5008032429997747, + "grad_norm": 1.4571304321289062, + "learning_rate": 5.229764808027547e-06, + "loss": 0.29, + "step": 25017 + }, + { + "epoch": 0.5008232615169032, + "grad_norm": 1.1269755363464355, + "learning_rate": 5.229440967006405e-06, + "loss": 0.2846, + "step": 25018 + }, + { + "epoch": 0.5008432800340314, + "grad_norm": 1.0640575885772705, + "learning_rate": 5.229117125020743e-06, + "loss": 0.3121, + "step": 25019 + }, + { + "epoch": 0.5008632985511599, + "grad_norm": 1.3477928638458252, + "learning_rate": 5.228793282071921e-06, + "loss": 0.362, + "step": 25020 + }, + { + "epoch": 0.5008833170682881, + "grad_norm": 1.8252463340759277, + "learning_rate": 5.228469438161303e-06, + "loss": 0.7873, + "step": 25021 + }, + { + "epoch": 0.5009033355854166, + "grad_norm": 1.0625312328338623, + "learning_rate": 5.228145593290247e-06, + "loss": 0.2707, + "step": 25022 + }, + { + "epoch": 0.5009233541025448, + "grad_norm": 1.1926462650299072, + "learning_rate": 5.227821747460118e-06, + "loss": 0.2832, + "step": 25023 + }, + { + "epoch": 0.5009433726196731, + "grad_norm": 0.9724442958831787, + "learning_rate": 5.227497900672272e-06, + "loss": 0.2861, + "step": 25024 + }, + { + "epoch": 0.5009633911368016, + "grad_norm": 1.264422059059143, + "learning_rate": 5.227174052928077e-06, + "loss": 0.3109, + "step": 25025 + }, + { + "epoch": 0.5009834096539298, + "grad_norm": 1.3124417066574097, + "learning_rate": 5.2268502042288896e-06, + "loss": 0.3209, + "step": 25026 + }, + { + "epoch": 0.5010034281710583, + "grad_norm": 1.1465730667114258, + "learning_rate": 5.226526354576073e-06, + "loss": 0.322, + "step": 25027 + }, + { + "epoch": 0.5010234466881865, + "grad_norm": 1.123128890991211, + "learning_rate": 5.22620250397099e-06, + "loss": 0.288, + "step": 25028 + }, + { + "epoch": 0.501043465205315, + "grad_norm": 1.1776009798049927, + "learning_rate": 5.225878652414998e-06, + "loss": 0.3326, + "step": 25029 + }, + { + "epoch": 0.5010634837224432, + "grad_norm": 1.1300865411758423, + "learning_rate": 5.2255547999094614e-06, + "loss": 0.3094, + "step": 25030 + }, + { + "epoch": 0.5010835022395717, + "grad_norm": 1.132469892501831, + "learning_rate": 5.225230946455741e-06, + "loss": 0.3386, + "step": 25031 + }, + { + "epoch": 0.5011035207567, + "grad_norm": 1.858018398284912, + "learning_rate": 5.224907092055199e-06, + "loss": 0.8376, + "step": 25032 + }, + { + "epoch": 0.5011235392738282, + "grad_norm": 1.1247007846832275, + "learning_rate": 5.224583236709195e-06, + "loss": 0.2945, + "step": 25033 + }, + { + "epoch": 0.5011435577909567, + "grad_norm": 1.076358675956726, + "learning_rate": 5.224259380419092e-06, + "loss": 0.2666, + "step": 25034 + }, + { + "epoch": 0.501163576308085, + "grad_norm": 1.1386148929595947, + "learning_rate": 5.223935523186251e-06, + "loss": 0.3079, + "step": 25035 + }, + { + "epoch": 0.5011835948252134, + "grad_norm": 1.0743861198425293, + "learning_rate": 5.223611665012033e-06, + "loss": 0.294, + "step": 25036 + }, + { + "epoch": 0.5012036133423416, + "grad_norm": 1.104262113571167, + "learning_rate": 5.223287805897798e-06, + "loss": 0.3274, + "step": 25037 + }, + { + "epoch": 0.50122363185947, + "grad_norm": 1.0988292694091797, + "learning_rate": 5.222963945844911e-06, + "loss": 0.3088, + "step": 25038 + }, + { + "epoch": 0.5012436503765983, + "grad_norm": 1.1367402076721191, + "learning_rate": 5.222640084854732e-06, + "loss": 0.2921, + "step": 25039 + }, + { + "epoch": 0.5012636688937266, + "grad_norm": 1.0740975141525269, + "learning_rate": 5.2223162229286214e-06, + "loss": 0.3314, + "step": 25040 + }, + { + "epoch": 0.501283687410855, + "grad_norm": 1.0181807279586792, + "learning_rate": 5.221992360067941e-06, + "loss": 0.2856, + "step": 25041 + }, + { + "epoch": 0.5013037059279833, + "grad_norm": 1.1039087772369385, + "learning_rate": 5.221668496274053e-06, + "loss": 0.2899, + "step": 25042 + }, + { + "epoch": 0.5013237244451118, + "grad_norm": 0.9753841757774353, + "learning_rate": 5.2213446315483176e-06, + "loss": 0.3125, + "step": 25043 + }, + { + "epoch": 0.50134374296224, + "grad_norm": 1.1907827854156494, + "learning_rate": 5.221020765892096e-06, + "loss": 0.3032, + "step": 25044 + }, + { + "epoch": 0.5013637614793685, + "grad_norm": 1.9909170866012573, + "learning_rate": 5.220696899306753e-06, + "loss": 0.8022, + "step": 25045 + }, + { + "epoch": 0.5013837799964967, + "grad_norm": 1.1295124292373657, + "learning_rate": 5.220373031793646e-06, + "loss": 0.3168, + "step": 25046 + }, + { + "epoch": 0.5014037985136252, + "grad_norm": 1.1475162506103516, + "learning_rate": 5.220049163354139e-06, + "loss": 0.3256, + "step": 25047 + }, + { + "epoch": 0.5014238170307534, + "grad_norm": 1.1237201690673828, + "learning_rate": 5.219725293989592e-06, + "loss": 0.3354, + "step": 25048 + }, + { + "epoch": 0.5014438355478817, + "grad_norm": 1.8871009349822998, + "learning_rate": 5.219401423701369e-06, + "loss": 0.8277, + "step": 25049 + }, + { + "epoch": 0.5014638540650102, + "grad_norm": 1.176240086555481, + "learning_rate": 5.219077552490828e-06, + "loss": 0.2753, + "step": 25050 + }, + { + "epoch": 0.5014838725821384, + "grad_norm": 1.2376842498779297, + "learning_rate": 5.218753680359332e-06, + "loss": 0.3702, + "step": 25051 + }, + { + "epoch": 0.5015038910992669, + "grad_norm": 1.1418979167938232, + "learning_rate": 5.218429807308244e-06, + "loss": 0.3269, + "step": 25052 + }, + { + "epoch": 0.5015239096163951, + "grad_norm": 1.0951415300369263, + "learning_rate": 5.218105933338923e-06, + "loss": 0.2929, + "step": 25053 + }, + { + "epoch": 0.5015439281335236, + "grad_norm": 1.2396867275238037, + "learning_rate": 5.217782058452732e-06, + "loss": 0.3396, + "step": 25054 + }, + { + "epoch": 0.5015639466506518, + "grad_norm": 1.0760774612426758, + "learning_rate": 5.217458182651032e-06, + "loss": 0.2872, + "step": 25055 + }, + { + "epoch": 0.5015839651677801, + "grad_norm": 1.2114522457122803, + "learning_rate": 5.2171343059351835e-06, + "loss": 0.2977, + "step": 25056 + }, + { + "epoch": 0.5016039836849085, + "grad_norm": 1.8393453359603882, + "learning_rate": 5.2168104283065504e-06, + "loss": 0.8067, + "step": 25057 + }, + { + "epoch": 0.5016240022020368, + "grad_norm": 1.04549241065979, + "learning_rate": 5.216486549766493e-06, + "loss": 0.3079, + "step": 25058 + }, + { + "epoch": 0.5016440207191653, + "grad_norm": 1.020755410194397, + "learning_rate": 5.216162670316372e-06, + "loss": 0.2671, + "step": 25059 + }, + { + "epoch": 0.5016640392362935, + "grad_norm": 1.0997893810272217, + "learning_rate": 5.2158387899575505e-06, + "loss": 0.3207, + "step": 25060 + }, + { + "epoch": 0.501684057753422, + "grad_norm": 1.153502345085144, + "learning_rate": 5.2155149086913895e-06, + "loss": 0.3511, + "step": 25061 + }, + { + "epoch": 0.5017040762705502, + "grad_norm": 1.1876378059387207, + "learning_rate": 5.215191026519249e-06, + "loss": 0.2998, + "step": 25062 + }, + { + "epoch": 0.5017240947876787, + "grad_norm": 1.081858515739441, + "learning_rate": 5.2148671434424915e-06, + "loss": 0.317, + "step": 25063 + }, + { + "epoch": 0.501744113304807, + "grad_norm": 1.0639455318450928, + "learning_rate": 5.2145432594624776e-06, + "loss": 0.3053, + "step": 25064 + }, + { + "epoch": 0.5017641318219352, + "grad_norm": 1.02530038356781, + "learning_rate": 5.2142193745805735e-06, + "loss": 0.2974, + "step": 25065 + }, + { + "epoch": 0.5017841503390637, + "grad_norm": 1.0320444107055664, + "learning_rate": 5.2138954887981345e-06, + "loss": 0.3094, + "step": 25066 + }, + { + "epoch": 0.501804168856192, + "grad_norm": 1.1575889587402344, + "learning_rate": 5.213571602116528e-06, + "loss": 0.2815, + "step": 25067 + }, + { + "epoch": 0.5018241873733204, + "grad_norm": 1.1062334775924683, + "learning_rate": 5.213247714537108e-06, + "loss": 0.309, + "step": 25068 + }, + { + "epoch": 0.5018442058904486, + "grad_norm": 1.107582926750183, + "learning_rate": 5.2129238260612425e-06, + "loss": 0.3334, + "step": 25069 + }, + { + "epoch": 0.501864224407577, + "grad_norm": 1.1576251983642578, + "learning_rate": 5.2125999366902925e-06, + "loss": 0.3243, + "step": 25070 + }, + { + "epoch": 0.5018842429247053, + "grad_norm": 1.2378758192062378, + "learning_rate": 5.212276046425616e-06, + "loss": 0.3033, + "step": 25071 + }, + { + "epoch": 0.5019042614418336, + "grad_norm": 1.0810295343399048, + "learning_rate": 5.211952155268579e-06, + "loss": 0.3112, + "step": 25072 + }, + { + "epoch": 0.501924279958962, + "grad_norm": 1.0799386501312256, + "learning_rate": 5.211628263220539e-06, + "loss": 0.3153, + "step": 25073 + }, + { + "epoch": 0.5019442984760903, + "grad_norm": 1.2683290243148804, + "learning_rate": 5.211304370282861e-06, + "loss": 0.2904, + "step": 25074 + }, + { + "epoch": 0.5019643169932188, + "grad_norm": 1.1419795751571655, + "learning_rate": 5.210980476456903e-06, + "loss": 0.3401, + "step": 25075 + }, + { + "epoch": 0.501984335510347, + "grad_norm": 1.1054695844650269, + "learning_rate": 5.210656581744028e-06, + "loss": 0.3388, + "step": 25076 + }, + { + "epoch": 0.5020043540274755, + "grad_norm": 1.0988531112670898, + "learning_rate": 5.210332686145599e-06, + "loss": 0.3196, + "step": 25077 + }, + { + "epoch": 0.5020243725446037, + "grad_norm": 1.2397302389144897, + "learning_rate": 5.210008789662978e-06, + "loss": 0.2732, + "step": 25078 + }, + { + "epoch": 0.5020443910617322, + "grad_norm": 1.2514201402664185, + "learning_rate": 5.209684892297523e-06, + "loss": 0.298, + "step": 25079 + }, + { + "epoch": 0.5020644095788604, + "grad_norm": 2.0022075176239014, + "learning_rate": 5.2093609940505994e-06, + "loss": 0.7391, + "step": 25080 + }, + { + "epoch": 0.5020844280959887, + "grad_norm": 1.09577476978302, + "learning_rate": 5.209037094923567e-06, + "loss": 0.3255, + "step": 25081 + }, + { + "epoch": 0.5021044466131172, + "grad_norm": 1.093526840209961, + "learning_rate": 5.208713194917787e-06, + "loss": 0.3015, + "step": 25082 + }, + { + "epoch": 0.5021244651302454, + "grad_norm": 1.0853354930877686, + "learning_rate": 5.208389294034623e-06, + "loss": 0.2823, + "step": 25083 + }, + { + "epoch": 0.5021444836473739, + "grad_norm": 1.1170355081558228, + "learning_rate": 5.208065392275433e-06, + "loss": 0.3027, + "step": 25084 + }, + { + "epoch": 0.5021645021645021, + "grad_norm": 1.9256407022476196, + "learning_rate": 5.207741489641584e-06, + "loss": 0.7786, + "step": 25085 + }, + { + "epoch": 0.5021845206816306, + "grad_norm": 1.1596646308898926, + "learning_rate": 5.207417586134431e-06, + "loss": 0.3085, + "step": 25086 + }, + { + "epoch": 0.5022045391987588, + "grad_norm": 1.2490160465240479, + "learning_rate": 5.2070936817553416e-06, + "loss": 0.3262, + "step": 25087 + }, + { + "epoch": 0.5022245577158871, + "grad_norm": 1.073777437210083, + "learning_rate": 5.206769776505675e-06, + "loss": 0.3527, + "step": 25088 + }, + { + "epoch": 0.5022445762330155, + "grad_norm": 1.2621793746948242, + "learning_rate": 5.20644587038679e-06, + "loss": 0.3177, + "step": 25089 + }, + { + "epoch": 0.5022645947501438, + "grad_norm": 1.1733219623565674, + "learning_rate": 5.206121963400054e-06, + "loss": 0.3538, + "step": 25090 + }, + { + "epoch": 0.5022846132672723, + "grad_norm": 1.2502824068069458, + "learning_rate": 5.205798055546825e-06, + "loss": 0.3276, + "step": 25091 + }, + { + "epoch": 0.5023046317844005, + "grad_norm": 1.0979028940200806, + "learning_rate": 5.205474146828464e-06, + "loss": 0.279, + "step": 25092 + }, + { + "epoch": 0.502324650301529, + "grad_norm": 1.1287647485733032, + "learning_rate": 5.2051502372463345e-06, + "loss": 0.2894, + "step": 25093 + }, + { + "epoch": 0.5023446688186572, + "grad_norm": 1.1030491590499878, + "learning_rate": 5.204826326801798e-06, + "loss": 0.2958, + "step": 25094 + }, + { + "epoch": 0.5023646873357857, + "grad_norm": 1.8566666841506958, + "learning_rate": 5.204502415496215e-06, + "loss": 0.7863, + "step": 25095 + }, + { + "epoch": 0.502384705852914, + "grad_norm": 1.3096623420715332, + "learning_rate": 5.204178503330949e-06, + "loss": 0.3243, + "step": 25096 + }, + { + "epoch": 0.5024047243700422, + "grad_norm": 1.7382441759109497, + "learning_rate": 5.203854590307359e-06, + "loss": 0.8227, + "step": 25097 + }, + { + "epoch": 0.5024247428871707, + "grad_norm": 1.1762990951538086, + "learning_rate": 5.203530676426809e-06, + "loss": 0.2982, + "step": 25098 + }, + { + "epoch": 0.502444761404299, + "grad_norm": 1.0716524124145508, + "learning_rate": 5.20320676169066e-06, + "loss": 0.324, + "step": 25099 + }, + { + "epoch": 0.5024647799214274, + "grad_norm": 1.2640150785446167, + "learning_rate": 5.202882846100273e-06, + "loss": 0.3305, + "step": 25100 + }, + { + "epoch": 0.5024847984385556, + "grad_norm": 1.125490665435791, + "learning_rate": 5.20255892965701e-06, + "loss": 0.3205, + "step": 25101 + }, + { + "epoch": 0.502504816955684, + "grad_norm": 1.1139596700668335, + "learning_rate": 5.202235012362233e-06, + "loss": 0.2918, + "step": 25102 + }, + { + "epoch": 0.5025248354728123, + "grad_norm": 1.1165401935577393, + "learning_rate": 5.2019110942173045e-06, + "loss": 0.292, + "step": 25103 + }, + { + "epoch": 0.5025448539899406, + "grad_norm": 1.088091254234314, + "learning_rate": 5.2015871752235846e-06, + "loss": 0.3314, + "step": 25104 + }, + { + "epoch": 0.502564872507069, + "grad_norm": 1.1986196041107178, + "learning_rate": 5.201263255382436e-06, + "loss": 0.2955, + "step": 25105 + }, + { + "epoch": 0.5025848910241973, + "grad_norm": 1.9604195356369019, + "learning_rate": 5.2009393346952194e-06, + "loss": 0.8482, + "step": 25106 + }, + { + "epoch": 0.5026049095413258, + "grad_norm": 1.1871426105499268, + "learning_rate": 5.200615413163297e-06, + "loss": 0.3235, + "step": 25107 + }, + { + "epoch": 0.502624928058454, + "grad_norm": 1.1489627361297607, + "learning_rate": 5.20029149078803e-06, + "loss": 0.3147, + "step": 25108 + }, + { + "epoch": 0.5026449465755825, + "grad_norm": 1.1224881410598755, + "learning_rate": 5.199967567570782e-06, + "loss": 0.3158, + "step": 25109 + }, + { + "epoch": 0.5026649650927107, + "grad_norm": 1.182691216468811, + "learning_rate": 5.1996436435129125e-06, + "loss": 0.3204, + "step": 25110 + }, + { + "epoch": 0.5026849836098392, + "grad_norm": 1.0584337711334229, + "learning_rate": 5.199319718615785e-06, + "loss": 0.2896, + "step": 25111 + }, + { + "epoch": 0.5027050021269674, + "grad_norm": 1.2152131795883179, + "learning_rate": 5.19899579288076e-06, + "loss": 0.3496, + "step": 25112 + }, + { + "epoch": 0.5027250206440957, + "grad_norm": 1.1327331066131592, + "learning_rate": 5.198671866309199e-06, + "loss": 0.3195, + "step": 25113 + }, + { + "epoch": 0.5027450391612241, + "grad_norm": 1.1495548486709595, + "learning_rate": 5.1983479389024635e-06, + "loss": 0.2955, + "step": 25114 + }, + { + "epoch": 0.5027650576783524, + "grad_norm": 1.0466285943984985, + "learning_rate": 5.198024010661917e-06, + "loss": 0.3097, + "step": 25115 + }, + { + "epoch": 0.5027850761954809, + "grad_norm": 1.0997952222824097, + "learning_rate": 5.197700081588921e-06, + "loss": 0.3098, + "step": 25116 + }, + { + "epoch": 0.5028050947126091, + "grad_norm": 1.9274464845657349, + "learning_rate": 5.1973761516848354e-06, + "loss": 0.7665, + "step": 25117 + }, + { + "epoch": 0.5028251132297376, + "grad_norm": 1.1256054639816284, + "learning_rate": 5.197052220951023e-06, + "loss": 0.3554, + "step": 25118 + }, + { + "epoch": 0.5028451317468658, + "grad_norm": 1.0592358112335205, + "learning_rate": 5.196728289388845e-06, + "loss": 0.3401, + "step": 25119 + }, + { + "epoch": 0.5028651502639941, + "grad_norm": 1.1395573616027832, + "learning_rate": 5.196404356999663e-06, + "loss": 0.2846, + "step": 25120 + }, + { + "epoch": 0.5028851687811225, + "grad_norm": 1.3436142206192017, + "learning_rate": 5.19608042378484e-06, + "loss": 0.3179, + "step": 25121 + }, + { + "epoch": 0.5029051872982508, + "grad_norm": 1.0058377981185913, + "learning_rate": 5.195756489745739e-06, + "loss": 0.2822, + "step": 25122 + }, + { + "epoch": 0.5029252058153793, + "grad_norm": 1.1000664234161377, + "learning_rate": 5.195432554883719e-06, + "loss": 0.32, + "step": 25123 + }, + { + "epoch": 0.5029452243325075, + "grad_norm": 1.9430571794509888, + "learning_rate": 5.195108619200142e-06, + "loss": 0.8078, + "step": 25124 + }, + { + "epoch": 0.502965242849636, + "grad_norm": 1.0128810405731201, + "learning_rate": 5.194784682696371e-06, + "loss": 0.3136, + "step": 25125 + }, + { + "epoch": 0.5029852613667642, + "grad_norm": 1.0726218223571777, + "learning_rate": 5.194460745373766e-06, + "loss": 0.2931, + "step": 25126 + }, + { + "epoch": 0.5030052798838927, + "grad_norm": 1.8493160009384155, + "learning_rate": 5.19413680723369e-06, + "loss": 0.7166, + "step": 25127 + }, + { + "epoch": 0.503025298401021, + "grad_norm": 1.1162583827972412, + "learning_rate": 5.193812868277505e-06, + "loss": 0.3321, + "step": 25128 + }, + { + "epoch": 0.5030453169181492, + "grad_norm": 1.1706287860870361, + "learning_rate": 5.193488928506572e-06, + "loss": 0.3276, + "step": 25129 + }, + { + "epoch": 0.5030653354352776, + "grad_norm": 1.0849888324737549, + "learning_rate": 5.1931649879222535e-06, + "loss": 0.3195, + "step": 25130 + }, + { + "epoch": 0.503085353952406, + "grad_norm": 1.903687596321106, + "learning_rate": 5.192841046525911e-06, + "loss": 0.8072, + "step": 25131 + }, + { + "epoch": 0.5031053724695344, + "grad_norm": 1.0729745626449585, + "learning_rate": 5.192517104318906e-06, + "loss": 0.2958, + "step": 25132 + }, + { + "epoch": 0.5031253909866626, + "grad_norm": 0.9898545145988464, + "learning_rate": 5.192193161302601e-06, + "loss": 0.2893, + "step": 25133 + }, + { + "epoch": 0.503145409503791, + "grad_norm": 1.8099677562713623, + "learning_rate": 5.191869217478358e-06, + "loss": 0.7266, + "step": 25134 + }, + { + "epoch": 0.5031654280209193, + "grad_norm": 1.1025570631027222, + "learning_rate": 5.191545272847537e-06, + "loss": 0.3052, + "step": 25135 + }, + { + "epoch": 0.5031854465380476, + "grad_norm": 1.1190723180770874, + "learning_rate": 5.191221327411501e-06, + "loss": 0.3493, + "step": 25136 + }, + { + "epoch": 0.503205465055176, + "grad_norm": 1.0999606847763062, + "learning_rate": 5.190897381171611e-06, + "loss": 0.2162, + "step": 25137 + }, + { + "epoch": 0.5032254835723043, + "grad_norm": 1.992504358291626, + "learning_rate": 5.190573434129232e-06, + "loss": 0.7797, + "step": 25138 + }, + { + "epoch": 0.5032455020894328, + "grad_norm": 1.1169493198394775, + "learning_rate": 5.19024948628572e-06, + "loss": 0.3043, + "step": 25139 + }, + { + "epoch": 0.503265520606561, + "grad_norm": 1.08407461643219, + "learning_rate": 5.189925537642442e-06, + "loss": 0.2974, + "step": 25140 + }, + { + "epoch": 0.5032855391236895, + "grad_norm": 1.0307408571243286, + "learning_rate": 5.189601588200757e-06, + "loss": 0.3539, + "step": 25141 + }, + { + "epoch": 0.5033055576408177, + "grad_norm": 1.0411427021026611, + "learning_rate": 5.189277637962029e-06, + "loss": 0.274, + "step": 25142 + }, + { + "epoch": 0.5033255761579462, + "grad_norm": 1.1137758493423462, + "learning_rate": 5.188953686927618e-06, + "loss": 0.2958, + "step": 25143 + }, + { + "epoch": 0.5033455946750744, + "grad_norm": 1.0523881912231445, + "learning_rate": 5.188629735098886e-06, + "loss": 0.2969, + "step": 25144 + }, + { + "epoch": 0.5033656131922027, + "grad_norm": 1.09810209274292, + "learning_rate": 5.1883057824771954e-06, + "loss": 0.319, + "step": 25145 + }, + { + "epoch": 0.5033856317093311, + "grad_norm": 1.2558796405792236, + "learning_rate": 5.187981829063907e-06, + "loss": 0.282, + "step": 25146 + }, + { + "epoch": 0.5034056502264594, + "grad_norm": 2.067255735397339, + "learning_rate": 5.1876578748603845e-06, + "loss": 0.7557, + "step": 25147 + }, + { + "epoch": 0.5034256687435879, + "grad_norm": 1.1242597103118896, + "learning_rate": 5.187333919867989e-06, + "loss": 0.2895, + "step": 25148 + }, + { + "epoch": 0.5034456872607161, + "grad_norm": 1.1405484676361084, + "learning_rate": 5.187009964088081e-06, + "loss": 0.2953, + "step": 25149 + }, + { + "epoch": 0.5034657057778446, + "grad_norm": 1.1672792434692383, + "learning_rate": 5.1866860075220234e-06, + "loss": 0.3801, + "step": 25150 + }, + { + "epoch": 0.5034857242949728, + "grad_norm": 1.0346001386642456, + "learning_rate": 5.18636205017118e-06, + "loss": 0.3163, + "step": 25151 + }, + { + "epoch": 0.5035057428121011, + "grad_norm": 1.1116563081741333, + "learning_rate": 5.186038092036908e-06, + "loss": 0.3066, + "step": 25152 + }, + { + "epoch": 0.5035257613292295, + "grad_norm": 1.0839961767196655, + "learning_rate": 5.185714133120573e-06, + "loss": 0.3418, + "step": 25153 + }, + { + "epoch": 0.5035457798463578, + "grad_norm": 1.0786610841751099, + "learning_rate": 5.185390173423537e-06, + "loss": 0.3082, + "step": 25154 + }, + { + "epoch": 0.5035657983634863, + "grad_norm": 1.1571112871170044, + "learning_rate": 5.1850662129471586e-06, + "loss": 0.2877, + "step": 25155 + }, + { + "epoch": 0.5035858168806145, + "grad_norm": 1.0278359651565552, + "learning_rate": 5.184742251692802e-06, + "loss": 0.269, + "step": 25156 + }, + { + "epoch": 0.503605835397743, + "grad_norm": 1.1735705137252808, + "learning_rate": 5.184418289661829e-06, + "loss": 0.3257, + "step": 25157 + }, + { + "epoch": 0.5036258539148712, + "grad_norm": 1.8721929788589478, + "learning_rate": 5.184094326855601e-06, + "loss": 0.7532, + "step": 25158 + }, + { + "epoch": 0.5036458724319997, + "grad_norm": 1.136208176612854, + "learning_rate": 5.18377036327548e-06, + "loss": 0.3336, + "step": 25159 + }, + { + "epoch": 0.503665890949128, + "grad_norm": 1.085471510887146, + "learning_rate": 5.183446398922829e-06, + "loss": 0.2908, + "step": 25160 + }, + { + "epoch": 0.5036859094662562, + "grad_norm": 1.1510833501815796, + "learning_rate": 5.183122433799009e-06, + "loss": 0.3208, + "step": 25161 + }, + { + "epoch": 0.5037059279833846, + "grad_norm": 1.1065231561660767, + "learning_rate": 5.18279846790538e-06, + "loss": 0.3741, + "step": 25162 + }, + { + "epoch": 0.5037259465005129, + "grad_norm": 1.2466745376586914, + "learning_rate": 5.182474501243307e-06, + "loss": 0.3167, + "step": 25163 + }, + { + "epoch": 0.5037459650176414, + "grad_norm": 1.396544098854065, + "learning_rate": 5.18215053381415e-06, + "loss": 0.3261, + "step": 25164 + }, + { + "epoch": 0.5037659835347696, + "grad_norm": 2.2930688858032227, + "learning_rate": 5.18182656561927e-06, + "loss": 0.7704, + "step": 25165 + }, + { + "epoch": 0.503786002051898, + "grad_norm": 1.01052725315094, + "learning_rate": 5.181502596660032e-06, + "loss": 0.3267, + "step": 25166 + }, + { + "epoch": 0.5038060205690263, + "grad_norm": 1.1340107917785645, + "learning_rate": 5.1811786269377966e-06, + "loss": 0.3459, + "step": 25167 + }, + { + "epoch": 0.5038260390861546, + "grad_norm": 1.145585298538208, + "learning_rate": 5.180854656453923e-06, + "loss": 0.2841, + "step": 25168 + }, + { + "epoch": 0.503846057603283, + "grad_norm": 1.1691410541534424, + "learning_rate": 5.180530685209777e-06, + "loss": 0.3622, + "step": 25169 + }, + { + "epoch": 0.5038660761204113, + "grad_norm": 1.087755799293518, + "learning_rate": 5.180206713206718e-06, + "loss": 0.3285, + "step": 25170 + }, + { + "epoch": 0.5038860946375397, + "grad_norm": 1.1977459192276, + "learning_rate": 5.179882740446109e-06, + "loss": 0.31, + "step": 25171 + }, + { + "epoch": 0.503906113154668, + "grad_norm": 1.0547922849655151, + "learning_rate": 5.179558766929311e-06, + "loss": 0.2772, + "step": 25172 + }, + { + "epoch": 0.5039261316717965, + "grad_norm": 1.0785229206085205, + "learning_rate": 5.179234792657689e-06, + "loss": 0.3336, + "step": 25173 + }, + { + "epoch": 0.5039461501889247, + "grad_norm": 1.159765362739563, + "learning_rate": 5.178910817632601e-06, + "loss": 0.2973, + "step": 25174 + }, + { + "epoch": 0.5039661687060532, + "grad_norm": 1.0674327611923218, + "learning_rate": 5.178586841855411e-06, + "loss": 0.3111, + "step": 25175 + }, + { + "epoch": 0.5039861872231814, + "grad_norm": 1.1011898517608643, + "learning_rate": 5.17826286532748e-06, + "loss": 0.3336, + "step": 25176 + }, + { + "epoch": 0.5040062057403097, + "grad_norm": 1.05938720703125, + "learning_rate": 5.177938888050169e-06, + "loss": 0.3481, + "step": 25177 + }, + { + "epoch": 0.5040262242574381, + "grad_norm": 1.964355707168579, + "learning_rate": 5.177614910024842e-06, + "loss": 0.7404, + "step": 25178 + }, + { + "epoch": 0.5040462427745664, + "grad_norm": 1.0977857112884521, + "learning_rate": 5.1772909312528594e-06, + "loss": 0.2972, + "step": 25179 + }, + { + "epoch": 0.5040662612916949, + "grad_norm": 1.0187627077102661, + "learning_rate": 5.176966951735586e-06, + "loss": 0.3014, + "step": 25180 + }, + { + "epoch": 0.5040862798088231, + "grad_norm": 1.0400394201278687, + "learning_rate": 5.176642971474381e-06, + "loss": 0.2964, + "step": 25181 + }, + { + "epoch": 0.5041062983259516, + "grad_norm": 1.1004852056503296, + "learning_rate": 5.176318990470607e-06, + "loss": 0.3061, + "step": 25182 + }, + { + "epoch": 0.5041263168430798, + "grad_norm": 1.257836937904358, + "learning_rate": 5.175995008725624e-06, + "loss": 0.3008, + "step": 25183 + }, + { + "epoch": 0.5041463353602081, + "grad_norm": 1.1208065748214722, + "learning_rate": 5.175671026240797e-06, + "loss": 0.3496, + "step": 25184 + }, + { + "epoch": 0.5041663538773365, + "grad_norm": 1.079818844795227, + "learning_rate": 5.175347043017488e-06, + "loss": 0.3678, + "step": 25185 + }, + { + "epoch": 0.5041863723944648, + "grad_norm": 0.9691723585128784, + "learning_rate": 5.1750230590570565e-06, + "loss": 0.2782, + "step": 25186 + }, + { + "epoch": 0.5042063909115932, + "grad_norm": 1.07576322555542, + "learning_rate": 5.174699074360866e-06, + "loss": 0.2852, + "step": 25187 + }, + { + "epoch": 0.5042264094287215, + "grad_norm": 1.8541308641433716, + "learning_rate": 5.174375088930278e-06, + "loss": 0.7641, + "step": 25188 + }, + { + "epoch": 0.50424642794585, + "grad_norm": 1.9438645839691162, + "learning_rate": 5.1740511027666555e-06, + "loss": 0.7826, + "step": 25189 + }, + { + "epoch": 0.5042664464629782, + "grad_norm": 1.339509129524231, + "learning_rate": 5.173727115871358e-06, + "loss": 0.3022, + "step": 25190 + }, + { + "epoch": 0.5042864649801067, + "grad_norm": 1.1349319219589233, + "learning_rate": 5.1734031282457495e-06, + "loss": 0.3379, + "step": 25191 + }, + { + "epoch": 0.504306483497235, + "grad_norm": 1.0349318981170654, + "learning_rate": 5.173079139891192e-06, + "loss": 0.2928, + "step": 25192 + }, + { + "epoch": 0.5043265020143632, + "grad_norm": 1.8803445100784302, + "learning_rate": 5.172755150809048e-06, + "loss": 0.8544, + "step": 25193 + }, + { + "epoch": 0.5043465205314916, + "grad_norm": 1.2027338743209839, + "learning_rate": 5.172431161000677e-06, + "loss": 0.2887, + "step": 25194 + }, + { + "epoch": 0.5043665390486199, + "grad_norm": 1.277958869934082, + "learning_rate": 5.172107170467444e-06, + "loss": 0.3288, + "step": 25195 + }, + { + "epoch": 0.5043865575657484, + "grad_norm": 1.1313045024871826, + "learning_rate": 5.17178317921071e-06, + "loss": 0.2994, + "step": 25196 + }, + { + "epoch": 0.5044065760828766, + "grad_norm": 1.1061246395111084, + "learning_rate": 5.171459187231833e-06, + "loss": 0.2818, + "step": 25197 + }, + { + "epoch": 0.504426594600005, + "grad_norm": 1.1421606540679932, + "learning_rate": 5.171135194532182e-06, + "loss": 0.3061, + "step": 25198 + }, + { + "epoch": 0.5044466131171333, + "grad_norm": 1.1444132328033447, + "learning_rate": 5.170811201113113e-06, + "loss": 0.3221, + "step": 25199 + }, + { + "epoch": 0.5044666316342616, + "grad_norm": 1.1583101749420166, + "learning_rate": 5.170487206975992e-06, + "loss": 0.2789, + "step": 25200 + }, + { + "epoch": 0.50448665015139, + "grad_norm": 1.1317492723464966, + "learning_rate": 5.170163212122179e-06, + "loss": 0.3245, + "step": 25201 + }, + { + "epoch": 0.5045066686685183, + "grad_norm": 1.0591402053833008, + "learning_rate": 5.169839216553038e-06, + "loss": 0.2766, + "step": 25202 + }, + { + "epoch": 0.5045266871856467, + "grad_norm": 1.0968172550201416, + "learning_rate": 5.169515220269927e-06, + "loss": 0.3183, + "step": 25203 + }, + { + "epoch": 0.504546705702775, + "grad_norm": 1.196642518043518, + "learning_rate": 5.16919122327421e-06, + "loss": 0.3261, + "step": 25204 + }, + { + "epoch": 0.5045667242199035, + "grad_norm": 1.2645386457443237, + "learning_rate": 5.168867225567253e-06, + "loss": 0.3457, + "step": 25205 + }, + { + "epoch": 0.5045867427370317, + "grad_norm": 2.136603832244873, + "learning_rate": 5.168543227150411e-06, + "loss": 0.761, + "step": 25206 + }, + { + "epoch": 0.5046067612541602, + "grad_norm": 1.134922742843628, + "learning_rate": 5.168219228025052e-06, + "loss": 0.3348, + "step": 25207 + }, + { + "epoch": 0.5046267797712884, + "grad_norm": 1.8731319904327393, + "learning_rate": 5.167895228192534e-06, + "loss": 0.7319, + "step": 25208 + }, + { + "epoch": 0.5046467982884167, + "grad_norm": 1.1677731275558472, + "learning_rate": 5.167571227654222e-06, + "loss": 0.3354, + "step": 25209 + }, + { + "epoch": 0.5046668168055451, + "grad_norm": 1.0242093801498413, + "learning_rate": 5.1672472264114745e-06, + "loss": 0.2932, + "step": 25210 + }, + { + "epoch": 0.5046868353226734, + "grad_norm": 1.0920023918151855, + "learning_rate": 5.166923224465657e-06, + "loss": 0.3078, + "step": 25211 + }, + { + "epoch": 0.5047068538398019, + "grad_norm": 1.003463625907898, + "learning_rate": 5.166599221818131e-06, + "loss": 0.2988, + "step": 25212 + }, + { + "epoch": 0.5047268723569301, + "grad_norm": 1.1587727069854736, + "learning_rate": 5.166275218470257e-06, + "loss": 0.3891, + "step": 25213 + }, + { + "epoch": 0.5047468908740586, + "grad_norm": 1.9314548969268799, + "learning_rate": 5.165951214423398e-06, + "loss": 0.8359, + "step": 25214 + }, + { + "epoch": 0.5047669093911868, + "grad_norm": 1.0420793294906616, + "learning_rate": 5.1656272096789164e-06, + "loss": 0.3205, + "step": 25215 + }, + { + "epoch": 0.5047869279083151, + "grad_norm": 1.0372759103775024, + "learning_rate": 5.165303204238171e-06, + "loss": 0.3038, + "step": 25216 + }, + { + "epoch": 0.5048069464254435, + "grad_norm": 0.9983157515525818, + "learning_rate": 5.164979198102529e-06, + "loss": 0.2705, + "step": 25217 + }, + { + "epoch": 0.5048269649425718, + "grad_norm": 1.1230454444885254, + "learning_rate": 5.164655191273349e-06, + "loss": 0.3548, + "step": 25218 + }, + { + "epoch": 0.5048469834597002, + "grad_norm": 1.1958932876586914, + "learning_rate": 5.164331183751994e-06, + "loss": 0.3076, + "step": 25219 + }, + { + "epoch": 0.5048670019768285, + "grad_norm": 1.0200378894805908, + "learning_rate": 5.164007175539827e-06, + "loss": 0.2981, + "step": 25220 + }, + { + "epoch": 0.504887020493957, + "grad_norm": 1.1809899806976318, + "learning_rate": 5.163683166638208e-06, + "loss": 0.3597, + "step": 25221 + }, + { + "epoch": 0.5049070390110852, + "grad_norm": 1.8734198808670044, + "learning_rate": 5.163359157048501e-06, + "loss": 0.8072, + "step": 25222 + }, + { + "epoch": 0.5049270575282137, + "grad_norm": 0.9986172914505005, + "learning_rate": 5.163035146772065e-06, + "loss": 0.2747, + "step": 25223 + }, + { + "epoch": 0.504947076045342, + "grad_norm": 1.1759287118911743, + "learning_rate": 5.1627111358102675e-06, + "loss": 0.304, + "step": 25224 + }, + { + "epoch": 0.5049670945624702, + "grad_norm": 1.0505683422088623, + "learning_rate": 5.162387124164465e-06, + "loss": 0.3033, + "step": 25225 + }, + { + "epoch": 0.5049871130795986, + "grad_norm": 1.8495149612426758, + "learning_rate": 5.162063111836024e-06, + "loss": 0.8033, + "step": 25226 + }, + { + "epoch": 0.5050071315967269, + "grad_norm": 1.0950766801834106, + "learning_rate": 5.161739098826305e-06, + "loss": 0.2729, + "step": 25227 + }, + { + "epoch": 0.5050271501138553, + "grad_norm": 1.0514378547668457, + "learning_rate": 5.161415085136668e-06, + "loss": 0.3123, + "step": 25228 + }, + { + "epoch": 0.5050471686309836, + "grad_norm": 1.2998493909835815, + "learning_rate": 5.161091070768476e-06, + "loss": 0.3201, + "step": 25229 + }, + { + "epoch": 0.505067187148112, + "grad_norm": 1.119857907295227, + "learning_rate": 5.160767055723093e-06, + "loss": 0.3083, + "step": 25230 + }, + { + "epoch": 0.5050872056652403, + "grad_norm": 1.0833967924118042, + "learning_rate": 5.1604430400018804e-06, + "loss": 0.3321, + "step": 25231 + }, + { + "epoch": 0.5051072241823686, + "grad_norm": 1.1603515148162842, + "learning_rate": 5.160119023606198e-06, + "loss": 0.3328, + "step": 25232 + }, + { + "epoch": 0.505127242699497, + "grad_norm": 1.190155267715454, + "learning_rate": 5.159795006537411e-06, + "loss": 0.3388, + "step": 25233 + }, + { + "epoch": 0.5051472612166253, + "grad_norm": 1.2530494928359985, + "learning_rate": 5.15947098879688e-06, + "loss": 0.291, + "step": 25234 + }, + { + "epoch": 0.5051672797337537, + "grad_norm": 1.1241358518600464, + "learning_rate": 5.159146970385968e-06, + "loss": 0.3087, + "step": 25235 + }, + { + "epoch": 0.505187298250882, + "grad_norm": 1.131959080696106, + "learning_rate": 5.158822951306035e-06, + "loss": 0.2834, + "step": 25236 + }, + { + "epoch": 0.5052073167680105, + "grad_norm": 1.1132984161376953, + "learning_rate": 5.158498931558444e-06, + "loss": 0.315, + "step": 25237 + }, + { + "epoch": 0.5052273352851387, + "grad_norm": 1.2200185060501099, + "learning_rate": 5.15817491114456e-06, + "loss": 0.3521, + "step": 25238 + }, + { + "epoch": 0.5052473538022672, + "grad_norm": 1.1232489347457886, + "learning_rate": 5.1578508900657405e-06, + "loss": 0.2674, + "step": 25239 + }, + { + "epoch": 0.5052673723193954, + "grad_norm": 1.0487779378890991, + "learning_rate": 5.157526868323351e-06, + "loss": 0.2931, + "step": 25240 + }, + { + "epoch": 0.5052873908365237, + "grad_norm": 1.195823311805725, + "learning_rate": 5.1572028459187525e-06, + "loss": 0.2753, + "step": 25241 + }, + { + "epoch": 0.5053074093536521, + "grad_norm": 1.0054329633712769, + "learning_rate": 5.156878822853306e-06, + "loss": 0.3182, + "step": 25242 + }, + { + "epoch": 0.5053274278707804, + "grad_norm": 1.1655302047729492, + "learning_rate": 5.1565547991283746e-06, + "loss": 0.3808, + "step": 25243 + }, + { + "epoch": 0.5053474463879088, + "grad_norm": 1.2010135650634766, + "learning_rate": 5.156230774745324e-06, + "loss": 0.2939, + "step": 25244 + }, + { + "epoch": 0.5053674649050371, + "grad_norm": 0.9829433560371399, + "learning_rate": 5.155906749705508e-06, + "loss": 0.3175, + "step": 25245 + }, + { + "epoch": 0.5053874834221656, + "grad_norm": 1.0958079099655151, + "learning_rate": 5.155582724010297e-06, + "loss": 0.3195, + "step": 25246 + }, + { + "epoch": 0.5054075019392938, + "grad_norm": 1.0823121070861816, + "learning_rate": 5.155258697661048e-06, + "loss": 0.289, + "step": 25247 + }, + { + "epoch": 0.5054275204564221, + "grad_norm": 1.2996058464050293, + "learning_rate": 5.1549346706591265e-06, + "loss": 0.3079, + "step": 25248 + }, + { + "epoch": 0.5054475389735505, + "grad_norm": 1.1605596542358398, + "learning_rate": 5.154610643005891e-06, + "loss": 0.3479, + "step": 25249 + }, + { + "epoch": 0.5054675574906788, + "grad_norm": 1.1379549503326416, + "learning_rate": 5.154286614702706e-06, + "loss": 0.3207, + "step": 25250 + }, + { + "epoch": 0.5054875760078072, + "grad_norm": 1.1642839908599854, + "learning_rate": 5.153962585750935e-06, + "loss": 0.3487, + "step": 25251 + }, + { + "epoch": 0.5055075945249355, + "grad_norm": 1.0511739253997803, + "learning_rate": 5.153638556151936e-06, + "loss": 0.3065, + "step": 25252 + }, + { + "epoch": 0.505527613042064, + "grad_norm": 1.1144219636917114, + "learning_rate": 5.153314525907075e-06, + "loss": 0.328, + "step": 25253 + }, + { + "epoch": 0.5055476315591922, + "grad_norm": 1.0850043296813965, + "learning_rate": 5.152990495017713e-06, + "loss": 0.3149, + "step": 25254 + }, + { + "epoch": 0.5055676500763207, + "grad_norm": 1.2977159023284912, + "learning_rate": 5.15266646348521e-06, + "loss": 0.3449, + "step": 25255 + }, + { + "epoch": 0.505587668593449, + "grad_norm": 1.836003303527832, + "learning_rate": 5.1523424313109315e-06, + "loss": 0.7961, + "step": 25256 + }, + { + "epoch": 0.5056076871105772, + "grad_norm": 1.0693732500076294, + "learning_rate": 5.152018398496238e-06, + "loss": 0.3027, + "step": 25257 + }, + { + "epoch": 0.5056277056277056, + "grad_norm": 1.180068850517273, + "learning_rate": 5.151694365042493e-06, + "loss": 0.2896, + "step": 25258 + }, + { + "epoch": 0.5056477241448339, + "grad_norm": 0.9977602362632751, + "learning_rate": 5.151370330951056e-06, + "loss": 0.2817, + "step": 25259 + }, + { + "epoch": 0.5056677426619623, + "grad_norm": 1.197561502456665, + "learning_rate": 5.151046296223292e-06, + "loss": 0.3299, + "step": 25260 + }, + { + "epoch": 0.5056877611790906, + "grad_norm": 1.2750294208526611, + "learning_rate": 5.1507222608605604e-06, + "loss": 0.3161, + "step": 25261 + }, + { + "epoch": 0.505707779696219, + "grad_norm": 1.027301549911499, + "learning_rate": 5.1503982248642245e-06, + "loss": 0.3199, + "step": 25262 + }, + { + "epoch": 0.5057277982133473, + "grad_norm": 1.2693122625350952, + "learning_rate": 5.150074188235647e-06, + "loss": 0.2951, + "step": 25263 + }, + { + "epoch": 0.5057478167304756, + "grad_norm": 1.0131731033325195, + "learning_rate": 5.149750150976193e-06, + "loss": 0.2797, + "step": 25264 + }, + { + "epoch": 0.505767835247604, + "grad_norm": 1.0535359382629395, + "learning_rate": 5.149426113087218e-06, + "loss": 0.3197, + "step": 25265 + }, + { + "epoch": 0.5057878537647323, + "grad_norm": 1.7750264406204224, + "learning_rate": 5.1491020745700905e-06, + "loss": 0.7866, + "step": 25266 + }, + { + "epoch": 0.5058078722818607, + "grad_norm": 1.0673211812973022, + "learning_rate": 5.1487780354261675e-06, + "loss": 0.3095, + "step": 25267 + }, + { + "epoch": 0.505827890798989, + "grad_norm": 1.1006661653518677, + "learning_rate": 5.148453995656815e-06, + "loss": 0.3, + "step": 25268 + }, + { + "epoch": 0.5058479093161175, + "grad_norm": 1.1052093505859375, + "learning_rate": 5.148129955263394e-06, + "loss": 0.2864, + "step": 25269 + }, + { + "epoch": 0.5058679278332457, + "grad_norm": 1.057922601699829, + "learning_rate": 5.147805914247266e-06, + "loss": 0.2906, + "step": 25270 + }, + { + "epoch": 0.5058879463503742, + "grad_norm": 1.202858805656433, + "learning_rate": 5.147481872609794e-06, + "loss": 0.3438, + "step": 25271 + }, + { + "epoch": 0.5059079648675024, + "grad_norm": 1.2201564311981201, + "learning_rate": 5.14715783035234e-06, + "loss": 0.3434, + "step": 25272 + }, + { + "epoch": 0.5059279833846307, + "grad_norm": 1.0465787649154663, + "learning_rate": 5.1468337874762666e-06, + "loss": 0.3361, + "step": 25273 + }, + { + "epoch": 0.5059480019017591, + "grad_norm": 1.0622859001159668, + "learning_rate": 5.1465097439829335e-06, + "loss": 0.3054, + "step": 25274 + }, + { + "epoch": 0.5059680204188874, + "grad_norm": 1.003867506980896, + "learning_rate": 5.146185699873706e-06, + "loss": 0.3196, + "step": 25275 + }, + { + "epoch": 0.5059880389360158, + "grad_norm": 1.2135465145111084, + "learning_rate": 5.145861655149945e-06, + "loss": 0.3262, + "step": 25276 + }, + { + "epoch": 0.5060080574531441, + "grad_norm": 1.4245774745941162, + "learning_rate": 5.145537609813015e-06, + "loss": 0.3842, + "step": 25277 + }, + { + "epoch": 0.5060280759702726, + "grad_norm": 1.1474636793136597, + "learning_rate": 5.145213563864276e-06, + "loss": 0.3179, + "step": 25278 + }, + { + "epoch": 0.5060480944874008, + "grad_norm": 1.0234708786010742, + "learning_rate": 5.144889517305088e-06, + "loss": 0.2826, + "step": 25279 + }, + { + "epoch": 0.5060681130045291, + "grad_norm": 1.0857027769088745, + "learning_rate": 5.144565470136816e-06, + "loss": 0.3013, + "step": 25280 + }, + { + "epoch": 0.5060881315216575, + "grad_norm": 0.9830973148345947, + "learning_rate": 5.144241422360823e-06, + "loss": 0.2715, + "step": 25281 + }, + { + "epoch": 0.5061081500387858, + "grad_norm": 1.0674880743026733, + "learning_rate": 5.143917373978471e-06, + "loss": 0.2832, + "step": 25282 + }, + { + "epoch": 0.5061281685559142, + "grad_norm": 1.2168810367584229, + "learning_rate": 5.143593324991119e-06, + "loss": 0.3121, + "step": 25283 + }, + { + "epoch": 0.5061481870730425, + "grad_norm": 1.0416537523269653, + "learning_rate": 5.143269275400135e-06, + "loss": 0.3265, + "step": 25284 + }, + { + "epoch": 0.506168205590171, + "grad_norm": 1.2651020288467407, + "learning_rate": 5.142945225206875e-06, + "loss": 0.3159, + "step": 25285 + }, + { + "epoch": 0.5061882241072992, + "grad_norm": 1.054715633392334, + "learning_rate": 5.142621174412704e-06, + "loss": 0.2865, + "step": 25286 + }, + { + "epoch": 0.5062082426244275, + "grad_norm": 1.066177248954773, + "learning_rate": 5.142297123018986e-06, + "loss": 0.3315, + "step": 25287 + }, + { + "epoch": 0.506228261141556, + "grad_norm": 1.114322543144226, + "learning_rate": 5.141973071027079e-06, + "loss": 0.2752, + "step": 25288 + }, + { + "epoch": 0.5062482796586842, + "grad_norm": 1.0047348737716675, + "learning_rate": 5.14164901843835e-06, + "loss": 0.2719, + "step": 25289 + }, + { + "epoch": 0.5062682981758126, + "grad_norm": 1.9197791814804077, + "learning_rate": 5.141324965254157e-06, + "loss": 0.8287, + "step": 25290 + }, + { + "epoch": 0.5062883166929409, + "grad_norm": 1.1159111261367798, + "learning_rate": 5.1410009114758665e-06, + "loss": 0.3374, + "step": 25291 + }, + { + "epoch": 0.5063083352100693, + "grad_norm": 0.9885652661323547, + "learning_rate": 5.140676857104837e-06, + "loss": 0.2808, + "step": 25292 + }, + { + "epoch": 0.5063283537271976, + "grad_norm": 2.0306668281555176, + "learning_rate": 5.140352802142433e-06, + "loss": 0.8433, + "step": 25293 + }, + { + "epoch": 0.506348372244326, + "grad_norm": 1.0567909479141235, + "learning_rate": 5.140028746590016e-06, + "loss": 0.2755, + "step": 25294 + }, + { + "epoch": 0.5063683907614543, + "grad_norm": 1.0884321928024292, + "learning_rate": 5.139704690448948e-06, + "loss": 0.3506, + "step": 25295 + }, + { + "epoch": 0.5063884092785826, + "grad_norm": 1.1489479541778564, + "learning_rate": 5.139380633720591e-06, + "loss": 0.3036, + "step": 25296 + }, + { + "epoch": 0.506408427795711, + "grad_norm": 1.1231926679611206, + "learning_rate": 5.1390565764063095e-06, + "loss": 0.2874, + "step": 25297 + }, + { + "epoch": 0.5064284463128393, + "grad_norm": 1.2326173782348633, + "learning_rate": 5.138732518507462e-06, + "loss": 0.3312, + "step": 25298 + }, + { + "epoch": 0.5064484648299677, + "grad_norm": 1.319035291671753, + "learning_rate": 5.138408460025415e-06, + "loss": 0.2459, + "step": 25299 + }, + { + "epoch": 0.506468483347096, + "grad_norm": 1.1534168720245361, + "learning_rate": 5.138084400961527e-06, + "loss": 0.3401, + "step": 25300 + }, + { + "epoch": 0.5064885018642244, + "grad_norm": 1.0106096267700195, + "learning_rate": 5.1377603413171615e-06, + "loss": 0.2752, + "step": 25301 + }, + { + "epoch": 0.5065085203813527, + "grad_norm": 1.188400387763977, + "learning_rate": 5.137436281093683e-06, + "loss": 0.3463, + "step": 25302 + }, + { + "epoch": 0.506528538898481, + "grad_norm": 1.055393099784851, + "learning_rate": 5.137112220292451e-06, + "loss": 0.3094, + "step": 25303 + }, + { + "epoch": 0.5065485574156094, + "grad_norm": 1.0620293617248535, + "learning_rate": 5.136788158914828e-06, + "loss": 0.3146, + "step": 25304 + }, + { + "epoch": 0.5065685759327377, + "grad_norm": 1.1053811311721802, + "learning_rate": 5.136464096962178e-06, + "loss": 0.3135, + "step": 25305 + }, + { + "epoch": 0.5065885944498661, + "grad_norm": 1.188199520111084, + "learning_rate": 5.136140034435861e-06, + "loss": 0.2642, + "step": 25306 + }, + { + "epoch": 0.5066086129669944, + "grad_norm": 1.1608165502548218, + "learning_rate": 5.1358159713372416e-06, + "loss": 0.3056, + "step": 25307 + }, + { + "epoch": 0.5066286314841228, + "grad_norm": 1.0433425903320312, + "learning_rate": 5.135491907667682e-06, + "loss": 0.3151, + "step": 25308 + }, + { + "epoch": 0.5066486500012511, + "grad_norm": 1.0607621669769287, + "learning_rate": 5.1351678434285435e-06, + "loss": 0.3092, + "step": 25309 + }, + { + "epoch": 0.5066686685183796, + "grad_norm": 1.2674897909164429, + "learning_rate": 5.134843778621187e-06, + "loss": 0.2774, + "step": 25310 + }, + { + "epoch": 0.5066886870355078, + "grad_norm": 1.323965311050415, + "learning_rate": 5.134519713246977e-06, + "loss": 0.3382, + "step": 25311 + }, + { + "epoch": 0.5067087055526361, + "grad_norm": 1.0740737915039062, + "learning_rate": 5.1341956473072755e-06, + "loss": 0.3163, + "step": 25312 + }, + { + "epoch": 0.5067287240697645, + "grad_norm": 1.087227463722229, + "learning_rate": 5.133871580803444e-06, + "loss": 0.361, + "step": 25313 + }, + { + "epoch": 0.5067487425868928, + "grad_norm": 1.1681010723114014, + "learning_rate": 5.133547513736844e-06, + "loss": 0.3447, + "step": 25314 + }, + { + "epoch": 0.5067687611040212, + "grad_norm": 1.2769180536270142, + "learning_rate": 5.13322344610884e-06, + "loss": 0.3097, + "step": 25315 + }, + { + "epoch": 0.5067887796211495, + "grad_norm": 1.1377699375152588, + "learning_rate": 5.132899377920794e-06, + "loss": 0.3288, + "step": 25316 + }, + { + "epoch": 0.506808798138278, + "grad_norm": 1.153066635131836, + "learning_rate": 5.132575309174067e-06, + "loss": 0.3117, + "step": 25317 + }, + { + "epoch": 0.5068288166554062, + "grad_norm": 1.0041768550872803, + "learning_rate": 5.132251239870022e-06, + "loss": 0.2722, + "step": 25318 + }, + { + "epoch": 0.5068488351725345, + "grad_norm": 1.0046414136886597, + "learning_rate": 5.1319271700100204e-06, + "loss": 0.3054, + "step": 25319 + }, + { + "epoch": 0.506868853689663, + "grad_norm": 1.136802077293396, + "learning_rate": 5.1316030995954265e-06, + "loss": 0.3502, + "step": 25320 + }, + { + "epoch": 0.5068888722067912, + "grad_norm": 1.0145732164382935, + "learning_rate": 5.131279028627601e-06, + "loss": 0.2767, + "step": 25321 + }, + { + "epoch": 0.5069088907239196, + "grad_norm": 1.0455657243728638, + "learning_rate": 5.130954957107907e-06, + "loss": 0.3557, + "step": 25322 + }, + { + "epoch": 0.5069289092410479, + "grad_norm": 1.1225186586380005, + "learning_rate": 5.130630885037706e-06, + "loss": 0.3021, + "step": 25323 + }, + { + "epoch": 0.5069489277581763, + "grad_norm": 1.0998741388320923, + "learning_rate": 5.130306812418362e-06, + "loss": 0.313, + "step": 25324 + }, + { + "epoch": 0.5069689462753046, + "grad_norm": 1.1174476146697998, + "learning_rate": 5.129982739251234e-06, + "loss": 0.2942, + "step": 25325 + }, + { + "epoch": 0.506988964792433, + "grad_norm": 1.0271668434143066, + "learning_rate": 5.129658665537689e-06, + "loss": 0.3125, + "step": 25326 + }, + { + "epoch": 0.5070089833095613, + "grad_norm": 1.6852914094924927, + "learning_rate": 5.129334591279085e-06, + "loss": 0.7755, + "step": 25327 + }, + { + "epoch": 0.5070290018266896, + "grad_norm": 1.2075698375701904, + "learning_rate": 5.129010516476788e-06, + "loss": 0.2871, + "step": 25328 + }, + { + "epoch": 0.507049020343818, + "grad_norm": 1.4243314266204834, + "learning_rate": 5.128686441132157e-06, + "loss": 0.2333, + "step": 25329 + }, + { + "epoch": 0.5070690388609463, + "grad_norm": 1.1110939979553223, + "learning_rate": 5.128362365246557e-06, + "loss": 0.2766, + "step": 25330 + }, + { + "epoch": 0.5070890573780747, + "grad_norm": 1.7766114473342896, + "learning_rate": 5.128038288821349e-06, + "loss": 0.8041, + "step": 25331 + }, + { + "epoch": 0.507109075895203, + "grad_norm": 1.1771188974380493, + "learning_rate": 5.1277142118578945e-06, + "loss": 0.2541, + "step": 25332 + }, + { + "epoch": 0.5071290944123314, + "grad_norm": 1.100691556930542, + "learning_rate": 5.127390134357558e-06, + "loss": 0.3009, + "step": 25333 + }, + { + "epoch": 0.5071491129294597, + "grad_norm": 1.177701473236084, + "learning_rate": 5.1270660563217015e-06, + "loss": 0.3548, + "step": 25334 + }, + { + "epoch": 0.507169131446588, + "grad_norm": 1.151543140411377, + "learning_rate": 5.1267419777516855e-06, + "loss": 0.2824, + "step": 25335 + }, + { + "epoch": 0.5071891499637164, + "grad_norm": 1.0548406839370728, + "learning_rate": 5.126417898648873e-06, + "loss": 0.3266, + "step": 25336 + }, + { + "epoch": 0.5072091684808447, + "grad_norm": 1.0793302059173584, + "learning_rate": 5.126093819014628e-06, + "loss": 0.3533, + "step": 25337 + }, + { + "epoch": 0.5072291869979731, + "grad_norm": 1.0801585912704468, + "learning_rate": 5.125769738850311e-06, + "loss": 0.3287, + "step": 25338 + }, + { + "epoch": 0.5072492055151014, + "grad_norm": 1.0548985004425049, + "learning_rate": 5.125445658157285e-06, + "loss": 0.3128, + "step": 25339 + }, + { + "epoch": 0.5072692240322298, + "grad_norm": 1.0857112407684326, + "learning_rate": 5.125121576936912e-06, + "loss": 0.3787, + "step": 25340 + }, + { + "epoch": 0.5072892425493581, + "grad_norm": 1.0951913595199585, + "learning_rate": 5.1247974951905565e-06, + "loss": 0.3335, + "step": 25341 + }, + { + "epoch": 0.5073092610664865, + "grad_norm": 1.0831184387207031, + "learning_rate": 5.124473412919579e-06, + "loss": 0.3593, + "step": 25342 + }, + { + "epoch": 0.5073292795836148, + "grad_norm": 1.8230493068695068, + "learning_rate": 5.124149330125342e-06, + "loss": 0.7943, + "step": 25343 + }, + { + "epoch": 0.5073492981007431, + "grad_norm": 2.0522687435150146, + "learning_rate": 5.1238252468092064e-06, + "loss": 0.7465, + "step": 25344 + }, + { + "epoch": 0.5073693166178715, + "grad_norm": 1.0759721994400024, + "learning_rate": 5.123501162972536e-06, + "loss": 0.3483, + "step": 25345 + }, + { + "epoch": 0.5073893351349998, + "grad_norm": 2.1063671112060547, + "learning_rate": 5.1231770786166955e-06, + "loss": 0.7675, + "step": 25346 + }, + { + "epoch": 0.5074093536521282, + "grad_norm": 1.0176893472671509, + "learning_rate": 5.122852993743044e-06, + "loss": 0.3298, + "step": 25347 + }, + { + "epoch": 0.5074293721692565, + "grad_norm": 0.9797976016998291, + "learning_rate": 5.122528908352945e-06, + "loss": 0.2663, + "step": 25348 + }, + { + "epoch": 0.507449390686385, + "grad_norm": 1.222819209098816, + "learning_rate": 5.122204822447761e-06, + "loss": 0.3263, + "step": 25349 + }, + { + "epoch": 0.5074694092035132, + "grad_norm": 1.1080663204193115, + "learning_rate": 5.121880736028854e-06, + "loss": 0.3634, + "step": 25350 + }, + { + "epoch": 0.5074894277206415, + "grad_norm": 1.1158467531204224, + "learning_rate": 5.121556649097587e-06, + "loss": 0.2879, + "step": 25351 + }, + { + "epoch": 0.50750944623777, + "grad_norm": 1.1559181213378906, + "learning_rate": 5.121232561655321e-06, + "loss": 0.3183, + "step": 25352 + }, + { + "epoch": 0.5075294647548982, + "grad_norm": 1.0791518688201904, + "learning_rate": 5.1209084737034195e-06, + "loss": 0.3113, + "step": 25353 + }, + { + "epoch": 0.5075494832720266, + "grad_norm": 1.0549458265304565, + "learning_rate": 5.120584385243244e-06, + "loss": 0.3339, + "step": 25354 + }, + { + "epoch": 0.5075695017891549, + "grad_norm": 1.0540858507156372, + "learning_rate": 5.120260296276161e-06, + "loss": 0.3406, + "step": 25355 + }, + { + "epoch": 0.5075895203062833, + "grad_norm": 1.0615854263305664, + "learning_rate": 5.119936206803526e-06, + "loss": 0.325, + "step": 25356 + }, + { + "epoch": 0.5076095388234116, + "grad_norm": 1.1262471675872803, + "learning_rate": 5.119612116826706e-06, + "loss": 0.3079, + "step": 25357 + }, + { + "epoch": 0.50762955734054, + "grad_norm": 1.2017395496368408, + "learning_rate": 5.119288026347063e-06, + "loss": 0.2891, + "step": 25358 + }, + { + "epoch": 0.5076495758576683, + "grad_norm": 1.2620177268981934, + "learning_rate": 5.1189639353659595e-06, + "loss": 0.2878, + "step": 25359 + }, + { + "epoch": 0.5076695943747966, + "grad_norm": 1.1291205883026123, + "learning_rate": 5.118639843884757e-06, + "loss": 0.3439, + "step": 25360 + }, + { + "epoch": 0.507689612891925, + "grad_norm": 1.0701725482940674, + "learning_rate": 5.118315751904817e-06, + "loss": 0.3489, + "step": 25361 + }, + { + "epoch": 0.5077096314090533, + "grad_norm": 1.9885406494140625, + "learning_rate": 5.117991659427505e-06, + "loss": 0.8183, + "step": 25362 + }, + { + "epoch": 0.5077296499261817, + "grad_norm": 1.1229989528656006, + "learning_rate": 5.1176675664541795e-06, + "loss": 0.3311, + "step": 25363 + }, + { + "epoch": 0.50774966844331, + "grad_norm": 1.1760129928588867, + "learning_rate": 5.117343472986205e-06, + "loss": 0.356, + "step": 25364 + }, + { + "epoch": 0.5077696869604384, + "grad_norm": 1.8140065670013428, + "learning_rate": 5.117019379024943e-06, + "loss": 0.793, + "step": 25365 + }, + { + "epoch": 0.5077897054775667, + "grad_norm": 1.1784838438034058, + "learning_rate": 5.116695284571759e-06, + "loss": 0.3237, + "step": 25366 + }, + { + "epoch": 0.507809723994695, + "grad_norm": 1.9714560508728027, + "learning_rate": 5.1163711896280125e-06, + "loss": 0.8093, + "step": 25367 + }, + { + "epoch": 0.5078297425118234, + "grad_norm": 1.1449702978134155, + "learning_rate": 5.1160470941950655e-06, + "loss": 0.3123, + "step": 25368 + }, + { + "epoch": 0.5078497610289517, + "grad_norm": 1.0155200958251953, + "learning_rate": 5.115722998274283e-06, + "loss": 0.3207, + "step": 25369 + }, + { + "epoch": 0.5078697795460801, + "grad_norm": 1.072393536567688, + "learning_rate": 5.115398901867023e-06, + "loss": 0.3554, + "step": 25370 + }, + { + "epoch": 0.5078897980632084, + "grad_norm": 1.0163445472717285, + "learning_rate": 5.115074804974654e-06, + "loss": 0.3317, + "step": 25371 + }, + { + "epoch": 0.5079098165803368, + "grad_norm": 1.9059640169143677, + "learning_rate": 5.114750707598534e-06, + "loss": 0.8324, + "step": 25372 + }, + { + "epoch": 0.5079298350974651, + "grad_norm": 1.1559028625488281, + "learning_rate": 5.114426609740027e-06, + "loss": 0.3625, + "step": 25373 + }, + { + "epoch": 0.5079498536145935, + "grad_norm": 1.1253979206085205, + "learning_rate": 5.114102511400496e-06, + "loss": 0.2699, + "step": 25374 + }, + { + "epoch": 0.5079698721317218, + "grad_norm": 1.1745373010635376, + "learning_rate": 5.113778412581302e-06, + "loss": 0.2817, + "step": 25375 + }, + { + "epoch": 0.5079898906488501, + "grad_norm": 1.0412219762802124, + "learning_rate": 5.113454313283808e-06, + "loss": 0.3432, + "step": 25376 + }, + { + "epoch": 0.5080099091659785, + "grad_norm": 1.0351544618606567, + "learning_rate": 5.1131302135093756e-06, + "loss": 0.3011, + "step": 25377 + }, + { + "epoch": 0.5080299276831068, + "grad_norm": 1.069284200668335, + "learning_rate": 5.1128061132593675e-06, + "loss": 0.2784, + "step": 25378 + }, + { + "epoch": 0.5080499462002352, + "grad_norm": 1.187644362449646, + "learning_rate": 5.11248201253515e-06, + "loss": 0.2951, + "step": 25379 + }, + { + "epoch": 0.5080699647173635, + "grad_norm": 1.3877769708633423, + "learning_rate": 5.112157911338078e-06, + "loss": 0.2526, + "step": 25380 + }, + { + "epoch": 0.508089983234492, + "grad_norm": 1.5956286191940308, + "learning_rate": 5.111833809669522e-06, + "loss": 0.3168, + "step": 25381 + }, + { + "epoch": 0.5081100017516202, + "grad_norm": 1.1528388261795044, + "learning_rate": 5.11150970753084e-06, + "loss": 0.274, + "step": 25382 + }, + { + "epoch": 0.5081300202687485, + "grad_norm": 1.5061044692993164, + "learning_rate": 5.111185604923393e-06, + "loss": 0.2645, + "step": 25383 + }, + { + "epoch": 0.508150038785877, + "grad_norm": 1.1705118417739868, + "learning_rate": 5.110861501848547e-06, + "loss": 0.321, + "step": 25384 + }, + { + "epoch": 0.5081700573030052, + "grad_norm": 1.1371468305587769, + "learning_rate": 5.1105373983076635e-06, + "loss": 0.293, + "step": 25385 + }, + { + "epoch": 0.5081900758201336, + "grad_norm": 1.3687056303024292, + "learning_rate": 5.110213294302105e-06, + "loss": 0.2619, + "step": 25386 + }, + { + "epoch": 0.5082100943372619, + "grad_norm": 1.0308440923690796, + "learning_rate": 5.109889189833233e-06, + "loss": 0.2789, + "step": 25387 + }, + { + "epoch": 0.5082301128543903, + "grad_norm": 1.0742406845092773, + "learning_rate": 5.10956508490241e-06, + "loss": 0.249, + "step": 25388 + }, + { + "epoch": 0.5082501313715186, + "grad_norm": 1.2013344764709473, + "learning_rate": 5.1092409795109996e-06, + "loss": 0.3369, + "step": 25389 + }, + { + "epoch": 0.508270149888647, + "grad_norm": 1.0574891567230225, + "learning_rate": 5.108916873660362e-06, + "loss": 0.2864, + "step": 25390 + }, + { + "epoch": 0.5082901684057753, + "grad_norm": 1.1037797927856445, + "learning_rate": 5.108592767351863e-06, + "loss": 0.364, + "step": 25391 + }, + { + "epoch": 0.5083101869229036, + "grad_norm": 1.0199819803237915, + "learning_rate": 5.1082686605868635e-06, + "loss": 0.2992, + "step": 25392 + }, + { + "epoch": 0.508330205440032, + "grad_norm": 0.9820481538772583, + "learning_rate": 5.107944553366726e-06, + "loss": 0.2894, + "step": 25393 + }, + { + "epoch": 0.5083502239571603, + "grad_norm": 1.166961908340454, + "learning_rate": 5.1076204456928115e-06, + "loss": 0.2816, + "step": 25394 + }, + { + "epoch": 0.5083702424742887, + "grad_norm": 1.0326671600341797, + "learning_rate": 5.107296337566485e-06, + "loss": 0.277, + "step": 25395 + }, + { + "epoch": 0.508390260991417, + "grad_norm": 1.1779677867889404, + "learning_rate": 5.106972228989107e-06, + "loss": 0.3248, + "step": 25396 + }, + { + "epoch": 0.5084102795085454, + "grad_norm": 1.3572440147399902, + "learning_rate": 5.106648119962041e-06, + "loss": 0.3181, + "step": 25397 + }, + { + "epoch": 0.5084302980256737, + "grad_norm": 1.1469440460205078, + "learning_rate": 5.106324010486649e-06, + "loss": 0.3154, + "step": 25398 + }, + { + "epoch": 0.508450316542802, + "grad_norm": 1.075283408164978, + "learning_rate": 5.105999900564295e-06, + "loss": 0.3121, + "step": 25399 + }, + { + "epoch": 0.5084703350599304, + "grad_norm": 1.188858151435852, + "learning_rate": 5.1056757901963386e-06, + "loss": 0.2977, + "step": 25400 + }, + { + "epoch": 0.5084903535770587, + "grad_norm": 1.3014158010482788, + "learning_rate": 5.105351679384145e-06, + "loss": 0.3111, + "step": 25401 + }, + { + "epoch": 0.5085103720941871, + "grad_norm": 1.2033929824829102, + "learning_rate": 5.105027568129075e-06, + "loss": 0.3136, + "step": 25402 + }, + { + "epoch": 0.5085303906113154, + "grad_norm": 1.069731593132019, + "learning_rate": 5.104703456432492e-06, + "loss": 0.3316, + "step": 25403 + }, + { + "epoch": 0.5085504091284438, + "grad_norm": 1.1394484043121338, + "learning_rate": 5.104379344295758e-06, + "loss": 0.2939, + "step": 25404 + }, + { + "epoch": 0.5085704276455721, + "grad_norm": 1.1120461225509644, + "learning_rate": 5.104055231720235e-06, + "loss": 0.297, + "step": 25405 + }, + { + "epoch": 0.5085904461627005, + "grad_norm": 1.0236278772354126, + "learning_rate": 5.103731118707288e-06, + "loss": 0.3345, + "step": 25406 + }, + { + "epoch": 0.5086104646798288, + "grad_norm": 1.1187516450881958, + "learning_rate": 5.1034070052582765e-06, + "loss": 0.2964, + "step": 25407 + }, + { + "epoch": 0.5086304831969571, + "grad_norm": 1.1161179542541504, + "learning_rate": 5.103082891374566e-06, + "loss": 0.2558, + "step": 25408 + }, + { + "epoch": 0.5086505017140855, + "grad_norm": 1.0997798442840576, + "learning_rate": 5.102758777057513e-06, + "loss": 0.2761, + "step": 25409 + }, + { + "epoch": 0.5086705202312138, + "grad_norm": 1.001626968383789, + "learning_rate": 5.102434662308487e-06, + "loss": 0.3273, + "step": 25410 + }, + { + "epoch": 0.5086905387483422, + "grad_norm": 1.2365968227386475, + "learning_rate": 5.102110547128848e-06, + "loss": 0.3073, + "step": 25411 + }, + { + "epoch": 0.5087105572654705, + "grad_norm": 1.0225179195404053, + "learning_rate": 5.101786431519958e-06, + "loss": 0.3363, + "step": 25412 + }, + { + "epoch": 0.508730575782599, + "grad_norm": 1.0207059383392334, + "learning_rate": 5.10146231548318e-06, + "loss": 0.2601, + "step": 25413 + }, + { + "epoch": 0.5087505942997272, + "grad_norm": 1.0847227573394775, + "learning_rate": 5.101138199019876e-06, + "loss": 0.3119, + "step": 25414 + }, + { + "epoch": 0.5087706128168555, + "grad_norm": 1.1850556135177612, + "learning_rate": 5.100814082131408e-06, + "loss": 0.2932, + "step": 25415 + }, + { + "epoch": 0.508790631333984, + "grad_norm": 1.0143005847930908, + "learning_rate": 5.1004899648191394e-06, + "loss": 0.2689, + "step": 25416 + }, + { + "epoch": 0.5088106498511122, + "grad_norm": 1.5067600011825562, + "learning_rate": 5.100165847084434e-06, + "loss": 0.3384, + "step": 25417 + }, + { + "epoch": 0.5088306683682406, + "grad_norm": 1.0066375732421875, + "learning_rate": 5.099841728928652e-06, + "loss": 0.3064, + "step": 25418 + }, + { + "epoch": 0.5088506868853689, + "grad_norm": 1.0532830953598022, + "learning_rate": 5.099517610353157e-06, + "loss": 0.2795, + "step": 25419 + }, + { + "epoch": 0.5088707054024973, + "grad_norm": 1.144882321357727, + "learning_rate": 5.099193491359311e-06, + "loss": 0.282, + "step": 25420 + }, + { + "epoch": 0.5088907239196256, + "grad_norm": 1.2662429809570312, + "learning_rate": 5.098869371948477e-06, + "loss": 0.2952, + "step": 25421 + }, + { + "epoch": 0.508910742436754, + "grad_norm": 1.1993396282196045, + "learning_rate": 5.098545252122017e-06, + "loss": 0.3256, + "step": 25422 + }, + { + "epoch": 0.5089307609538823, + "grad_norm": 1.063797116279602, + "learning_rate": 5.098221131881295e-06, + "loss": 0.3007, + "step": 25423 + }, + { + "epoch": 0.5089507794710106, + "grad_norm": 1.106532096862793, + "learning_rate": 5.0978970112276725e-06, + "loss": 0.2914, + "step": 25424 + }, + { + "epoch": 0.508970797988139, + "grad_norm": 1.235336422920227, + "learning_rate": 5.097572890162511e-06, + "loss": 0.3341, + "step": 25425 + }, + { + "epoch": 0.5089908165052673, + "grad_norm": 1.1172059774398804, + "learning_rate": 5.097248768687177e-06, + "loss": 0.3378, + "step": 25426 + }, + { + "epoch": 0.5090108350223957, + "grad_norm": 1.1312628984451294, + "learning_rate": 5.096924646803026e-06, + "loss": 0.2965, + "step": 25427 + }, + { + "epoch": 0.509030853539524, + "grad_norm": 1.2470272779464722, + "learning_rate": 5.096600524511426e-06, + "loss": 0.3331, + "step": 25428 + }, + { + "epoch": 0.5090508720566524, + "grad_norm": 1.1707032918930054, + "learning_rate": 5.096276401813738e-06, + "loss": 0.3451, + "step": 25429 + }, + { + "epoch": 0.5090708905737807, + "grad_norm": 1.0500366687774658, + "learning_rate": 5.095952278711327e-06, + "loss": 0.2682, + "step": 25430 + }, + { + "epoch": 0.509090909090909, + "grad_norm": 1.025773286819458, + "learning_rate": 5.095628155205551e-06, + "loss": 0.2757, + "step": 25431 + }, + { + "epoch": 0.5091109276080374, + "grad_norm": 1.2740464210510254, + "learning_rate": 5.095304031297775e-06, + "loss": 0.3404, + "step": 25432 + }, + { + "epoch": 0.5091309461251657, + "grad_norm": 1.1715786457061768, + "learning_rate": 5.0949799069893625e-06, + "loss": 0.303, + "step": 25433 + }, + { + "epoch": 0.5091509646422941, + "grad_norm": 1.1341266632080078, + "learning_rate": 5.0946557822816745e-06, + "loss": 0.376, + "step": 25434 + }, + { + "epoch": 0.5091709831594224, + "grad_norm": 1.069443941116333, + "learning_rate": 5.094331657176072e-06, + "loss": 0.3473, + "step": 25435 + }, + { + "epoch": 0.5091910016765508, + "grad_norm": 1.9553591012954712, + "learning_rate": 5.0940075316739225e-06, + "loss": 0.8484, + "step": 25436 + }, + { + "epoch": 0.5092110201936791, + "grad_norm": 1.16017746925354, + "learning_rate": 5.093683405776584e-06, + "loss": 0.2976, + "step": 25437 + }, + { + "epoch": 0.5092310387108075, + "grad_norm": 0.9977574944496155, + "learning_rate": 5.093359279485421e-06, + "loss": 0.327, + "step": 25438 + }, + { + "epoch": 0.5092510572279358, + "grad_norm": 1.0682978630065918, + "learning_rate": 5.093035152801796e-06, + "loss": 0.2987, + "step": 25439 + }, + { + "epoch": 0.5092710757450641, + "grad_norm": 1.0800822973251343, + "learning_rate": 5.09271102572707e-06, + "loss": 0.3253, + "step": 25440 + }, + { + "epoch": 0.5092910942621925, + "grad_norm": 1.1659780740737915, + "learning_rate": 5.092386898262608e-06, + "loss": 0.3277, + "step": 25441 + }, + { + "epoch": 0.5093111127793208, + "grad_norm": 1.0299986600875854, + "learning_rate": 5.09206277040977e-06, + "loss": 0.3124, + "step": 25442 + }, + { + "epoch": 0.5093311312964492, + "grad_norm": 1.1722458600997925, + "learning_rate": 5.091738642169921e-06, + "loss": 0.3286, + "step": 25443 + }, + { + "epoch": 0.5093511498135775, + "grad_norm": 1.115586757659912, + "learning_rate": 5.0914145135444225e-06, + "loss": 0.3398, + "step": 25444 + }, + { + "epoch": 0.509371168330706, + "grad_norm": 1.0238488912582397, + "learning_rate": 5.091090384534637e-06, + "loss": 0.2524, + "step": 25445 + }, + { + "epoch": 0.5093911868478342, + "grad_norm": 1.0817478895187378, + "learning_rate": 5.090766255141928e-06, + "loss": 0.3221, + "step": 25446 + }, + { + "epoch": 0.5094112053649625, + "grad_norm": 1.1211763620376587, + "learning_rate": 5.090442125367656e-06, + "loss": 0.2722, + "step": 25447 + }, + { + "epoch": 0.5094312238820909, + "grad_norm": 1.1889313459396362, + "learning_rate": 5.090117995213184e-06, + "loss": 0.2581, + "step": 25448 + }, + { + "epoch": 0.5094512423992192, + "grad_norm": 1.8227806091308594, + "learning_rate": 5.089793864679876e-06, + "loss": 0.8306, + "step": 25449 + }, + { + "epoch": 0.5094712609163476, + "grad_norm": 2.1608476638793945, + "learning_rate": 5.089469733769096e-06, + "loss": 0.7393, + "step": 25450 + }, + { + "epoch": 0.5094912794334759, + "grad_norm": 1.285210371017456, + "learning_rate": 5.089145602482202e-06, + "loss": 0.3124, + "step": 25451 + }, + { + "epoch": 0.5095112979506043, + "grad_norm": 1.1094717979431152, + "learning_rate": 5.088821470820561e-06, + "loss": 0.308, + "step": 25452 + }, + { + "epoch": 0.5095313164677326, + "grad_norm": 1.0449360609054565, + "learning_rate": 5.0884973387855315e-06, + "loss": 0.2951, + "step": 25453 + }, + { + "epoch": 0.509551334984861, + "grad_norm": 1.1044961214065552, + "learning_rate": 5.088173206378478e-06, + "loss": 0.3484, + "step": 25454 + }, + { + "epoch": 0.5095713535019893, + "grad_norm": 1.181020975112915, + "learning_rate": 5.087849073600766e-06, + "loss": 0.3035, + "step": 25455 + }, + { + "epoch": 0.5095913720191176, + "grad_norm": 1.9274616241455078, + "learning_rate": 5.087524940453753e-06, + "loss": 0.7476, + "step": 25456 + }, + { + "epoch": 0.509611390536246, + "grad_norm": 1.0274772644042969, + "learning_rate": 5.0872008069388055e-06, + "loss": 0.2855, + "step": 25457 + }, + { + "epoch": 0.5096314090533743, + "grad_norm": 1.1087840795516968, + "learning_rate": 5.086876673057284e-06, + "loss": 0.2982, + "step": 25458 + }, + { + "epoch": 0.5096514275705027, + "grad_norm": 1.045203447341919, + "learning_rate": 5.086552538810552e-06, + "loss": 0.2419, + "step": 25459 + }, + { + "epoch": 0.509671446087631, + "grad_norm": 1.0217636823654175, + "learning_rate": 5.086228404199971e-06, + "loss": 0.2623, + "step": 25460 + }, + { + "epoch": 0.5096914646047594, + "grad_norm": 1.1736723184585571, + "learning_rate": 5.085904269226905e-06, + "loss": 0.3143, + "step": 25461 + }, + { + "epoch": 0.5097114831218877, + "grad_norm": 1.0491269826889038, + "learning_rate": 5.085580133892715e-06, + "loss": 0.3068, + "step": 25462 + }, + { + "epoch": 0.509731501639016, + "grad_norm": 1.1790258884429932, + "learning_rate": 5.085255998198766e-06, + "loss": 0.275, + "step": 25463 + }, + { + "epoch": 0.5097515201561444, + "grad_norm": 1.0440632104873657, + "learning_rate": 5.084931862146417e-06, + "loss": 0.3026, + "step": 25464 + }, + { + "epoch": 0.5097715386732727, + "grad_norm": 1.252590298652649, + "learning_rate": 5.084607725737035e-06, + "loss": 0.3157, + "step": 25465 + }, + { + "epoch": 0.5097915571904011, + "grad_norm": 1.212272047996521, + "learning_rate": 5.0842835889719796e-06, + "loss": 0.3338, + "step": 25466 + }, + { + "epoch": 0.5098115757075294, + "grad_norm": 1.041983723640442, + "learning_rate": 5.0839594518526135e-06, + "loss": 0.3003, + "step": 25467 + }, + { + "epoch": 0.5098315942246578, + "grad_norm": 1.9198365211486816, + "learning_rate": 5.083635314380301e-06, + "loss": 0.8549, + "step": 25468 + }, + { + "epoch": 0.5098516127417861, + "grad_norm": 1.197733998298645, + "learning_rate": 5.083311176556402e-06, + "loss": 0.32, + "step": 25469 + }, + { + "epoch": 0.5098716312589145, + "grad_norm": 1.165903091430664, + "learning_rate": 5.082987038382283e-06, + "loss": 0.2961, + "step": 25470 + }, + { + "epoch": 0.5098916497760428, + "grad_norm": 1.8000867366790771, + "learning_rate": 5.082662899859303e-06, + "loss": 0.8069, + "step": 25471 + }, + { + "epoch": 0.5099116682931711, + "grad_norm": 1.102107286453247, + "learning_rate": 5.082338760988827e-06, + "loss": 0.3294, + "step": 25472 + }, + { + "epoch": 0.5099316868102995, + "grad_norm": 1.3309694528579712, + "learning_rate": 5.082014621772215e-06, + "loss": 0.3223, + "step": 25473 + }, + { + "epoch": 0.5099517053274278, + "grad_norm": 0.9667074084281921, + "learning_rate": 5.081690482210831e-06, + "loss": 0.304, + "step": 25474 + }, + { + "epoch": 0.5099717238445562, + "grad_norm": 1.0709437131881714, + "learning_rate": 5.0813663423060375e-06, + "loss": 0.2864, + "step": 25475 + }, + { + "epoch": 0.5099917423616845, + "grad_norm": 1.003713607788086, + "learning_rate": 5.0810422020592e-06, + "loss": 0.2999, + "step": 25476 + }, + { + "epoch": 0.510011760878813, + "grad_norm": 2.0922491550445557, + "learning_rate": 5.080718061471678e-06, + "loss": 0.8225, + "step": 25477 + }, + { + "epoch": 0.5100317793959412, + "grad_norm": 2.0363147258758545, + "learning_rate": 5.080393920544832e-06, + "loss": 0.8174, + "step": 25478 + }, + { + "epoch": 0.5100517979130695, + "grad_norm": 1.1953928470611572, + "learning_rate": 5.080069779280029e-06, + "loss": 0.2839, + "step": 25479 + }, + { + "epoch": 0.5100718164301979, + "grad_norm": 1.1168967485427856, + "learning_rate": 5.079745637678629e-06, + "loss": 0.3099, + "step": 25480 + }, + { + "epoch": 0.5100918349473262, + "grad_norm": 1.1169228553771973, + "learning_rate": 5.079421495741997e-06, + "loss": 0.3237, + "step": 25481 + }, + { + "epoch": 0.5101118534644546, + "grad_norm": 1.039862871170044, + "learning_rate": 5.0790973534714935e-06, + "loss": 0.271, + "step": 25482 + }, + { + "epoch": 0.5101318719815829, + "grad_norm": 1.1882747411727905, + "learning_rate": 5.078773210868483e-06, + "loss": 0.3446, + "step": 25483 + }, + { + "epoch": 0.5101518904987113, + "grad_norm": 1.1426007747650146, + "learning_rate": 5.0784490679343245e-06, + "loss": 0.2813, + "step": 25484 + }, + { + "epoch": 0.5101719090158396, + "grad_norm": 1.1126015186309814, + "learning_rate": 5.078124924670384e-06, + "loss": 0.3061, + "step": 25485 + }, + { + "epoch": 0.510191927532968, + "grad_norm": 1.014223337173462, + "learning_rate": 5.077800781078024e-06, + "loss": 0.2863, + "step": 25486 + }, + { + "epoch": 0.5102119460500963, + "grad_norm": 1.1104971170425415, + "learning_rate": 5.077476637158605e-06, + "loss": 0.3316, + "step": 25487 + }, + { + "epoch": 0.5102319645672246, + "grad_norm": 1.2109390497207642, + "learning_rate": 5.0771524929134915e-06, + "loss": 0.318, + "step": 25488 + }, + { + "epoch": 0.510251983084353, + "grad_norm": 1.891845941543579, + "learning_rate": 5.076828348344045e-06, + "loss": 0.7451, + "step": 25489 + }, + { + "epoch": 0.5102720016014813, + "grad_norm": 1.149457573890686, + "learning_rate": 5.07650420345163e-06, + "loss": 0.3179, + "step": 25490 + }, + { + "epoch": 0.5102920201186097, + "grad_norm": 1.1274478435516357, + "learning_rate": 5.076180058237606e-06, + "loss": 0.3237, + "step": 25491 + }, + { + "epoch": 0.510312038635738, + "grad_norm": 1.330850601196289, + "learning_rate": 5.075855912703338e-06, + "loss": 0.3596, + "step": 25492 + }, + { + "epoch": 0.5103320571528664, + "grad_norm": 1.9747370481491089, + "learning_rate": 5.075531766850188e-06, + "loss": 0.7674, + "step": 25493 + }, + { + "epoch": 0.5103520756699947, + "grad_norm": 1.0381189584732056, + "learning_rate": 5.07520762067952e-06, + "loss": 0.3084, + "step": 25494 + }, + { + "epoch": 0.510372094187123, + "grad_norm": 1.9837312698364258, + "learning_rate": 5.0748834741926935e-06, + "loss": 0.7217, + "step": 25495 + }, + { + "epoch": 0.5103921127042514, + "grad_norm": 1.129942774772644, + "learning_rate": 5.074559327391074e-06, + "loss": 0.3142, + "step": 25496 + }, + { + "epoch": 0.5104121312213797, + "grad_norm": 1.405843734741211, + "learning_rate": 5.074235180276024e-06, + "loss": 0.3659, + "step": 25497 + }, + { + "epoch": 0.5104321497385081, + "grad_norm": 1.081316351890564, + "learning_rate": 5.073911032848904e-06, + "loss": 0.2804, + "step": 25498 + }, + { + "epoch": 0.5104521682556364, + "grad_norm": 1.0527540445327759, + "learning_rate": 5.073586885111078e-06, + "loss": 0.3201, + "step": 25499 + }, + { + "epoch": 0.5104721867727648, + "grad_norm": 1.1416898965835571, + "learning_rate": 5.073262737063908e-06, + "loss": 0.3278, + "step": 25500 + }, + { + "epoch": 0.5104922052898931, + "grad_norm": 1.1721928119659424, + "learning_rate": 5.072938588708759e-06, + "loss": 0.2874, + "step": 25501 + }, + { + "epoch": 0.5105122238070215, + "grad_norm": 1.0513240098953247, + "learning_rate": 5.07261444004699e-06, + "loss": 0.3009, + "step": 25502 + }, + { + "epoch": 0.5105322423241498, + "grad_norm": 1.1245520114898682, + "learning_rate": 5.072290291079967e-06, + "loss": 0.2968, + "step": 25503 + }, + { + "epoch": 0.5105522608412781, + "grad_norm": 1.0547502040863037, + "learning_rate": 5.07196614180905e-06, + "loss": 0.2947, + "step": 25504 + }, + { + "epoch": 0.5105722793584065, + "grad_norm": 1.0454392433166504, + "learning_rate": 5.071641992235603e-06, + "loss": 0.3369, + "step": 25505 + }, + { + "epoch": 0.5105922978755348, + "grad_norm": 1.0092923641204834, + "learning_rate": 5.071317842360989e-06, + "loss": 0.2775, + "step": 25506 + }, + { + "epoch": 0.5106123163926632, + "grad_norm": 1.0995501279830933, + "learning_rate": 5.07099369218657e-06, + "loss": 0.328, + "step": 25507 + }, + { + "epoch": 0.5106323349097915, + "grad_norm": 1.8389408588409424, + "learning_rate": 5.07066954171371e-06, + "loss": 0.7095, + "step": 25508 + }, + { + "epoch": 0.51065235342692, + "grad_norm": 0.9878889322280884, + "learning_rate": 5.0703453909437684e-06, + "loss": 0.3154, + "step": 25509 + }, + { + "epoch": 0.5106723719440482, + "grad_norm": 1.0832321643829346, + "learning_rate": 5.070021239878111e-06, + "loss": 0.319, + "step": 25510 + }, + { + "epoch": 0.5106923904611765, + "grad_norm": 1.2422890663146973, + "learning_rate": 5.0696970885181e-06, + "loss": 0.3434, + "step": 25511 + }, + { + "epoch": 0.5107124089783049, + "grad_norm": 1.1244709491729736, + "learning_rate": 5.069372936865095e-06, + "loss": 0.3384, + "step": 25512 + }, + { + "epoch": 0.5107324274954332, + "grad_norm": 1.000392198562622, + "learning_rate": 5.069048784920463e-06, + "loss": 0.2784, + "step": 25513 + }, + { + "epoch": 0.5107524460125616, + "grad_norm": 1.3019661903381348, + "learning_rate": 5.068724632685565e-06, + "loss": 0.3184, + "step": 25514 + }, + { + "epoch": 0.5107724645296899, + "grad_norm": 1.0769951343536377, + "learning_rate": 5.068400480161763e-06, + "loss": 0.2665, + "step": 25515 + }, + { + "epoch": 0.5107924830468183, + "grad_norm": 1.1189934015274048, + "learning_rate": 5.068076327350419e-06, + "loss": 0.321, + "step": 25516 + }, + { + "epoch": 0.5108125015639466, + "grad_norm": 1.091689109802246, + "learning_rate": 5.067752174252898e-06, + "loss": 0.3468, + "step": 25517 + }, + { + "epoch": 0.510832520081075, + "grad_norm": 1.0639811754226685, + "learning_rate": 5.0674280208705605e-06, + "loss": 0.2816, + "step": 25518 + }, + { + "epoch": 0.5108525385982033, + "grad_norm": 1.1994630098342896, + "learning_rate": 5.067103867204771e-06, + "loss": 0.2915, + "step": 25519 + }, + { + "epoch": 0.5108725571153316, + "grad_norm": 1.0388776063919067, + "learning_rate": 5.066779713256892e-06, + "loss": 0.2898, + "step": 25520 + }, + { + "epoch": 0.51089257563246, + "grad_norm": 1.1804355382919312, + "learning_rate": 5.066455559028285e-06, + "loss": 0.3525, + "step": 25521 + }, + { + "epoch": 0.5109125941495883, + "grad_norm": 1.2952163219451904, + "learning_rate": 5.066131404520312e-06, + "loss": 0.3148, + "step": 25522 + }, + { + "epoch": 0.5109326126667167, + "grad_norm": 1.13035249710083, + "learning_rate": 5.065807249734338e-06, + "loss": 0.3121, + "step": 25523 + }, + { + "epoch": 0.510952631183845, + "grad_norm": 1.1667375564575195, + "learning_rate": 5.065483094671724e-06, + "loss": 0.291, + "step": 25524 + }, + { + "epoch": 0.5109726497009734, + "grad_norm": 1.1529115438461304, + "learning_rate": 5.065158939333832e-06, + "loss": 0.2818, + "step": 25525 + }, + { + "epoch": 0.5109926682181017, + "grad_norm": 1.1221966743469238, + "learning_rate": 5.064834783722027e-06, + "loss": 0.3366, + "step": 25526 + }, + { + "epoch": 0.51101268673523, + "grad_norm": 1.056178331375122, + "learning_rate": 5.064510627837671e-06, + "loss": 0.2931, + "step": 25527 + }, + { + "epoch": 0.5110327052523584, + "grad_norm": 1.082637071609497, + "learning_rate": 5.064186471682126e-06, + "loss": 0.3099, + "step": 25528 + }, + { + "epoch": 0.5110527237694867, + "grad_norm": 1.1061168909072876, + "learning_rate": 5.063862315256755e-06, + "loss": 0.3064, + "step": 25529 + }, + { + "epoch": 0.5110727422866151, + "grad_norm": 1.1118004322052002, + "learning_rate": 5.06353815856292e-06, + "loss": 0.3063, + "step": 25530 + }, + { + "epoch": 0.5110927608037434, + "grad_norm": 1.3840227127075195, + "learning_rate": 5.0632140016019824e-06, + "loss": 0.3243, + "step": 25531 + }, + { + "epoch": 0.5111127793208718, + "grad_norm": 1.0047553777694702, + "learning_rate": 5.06288984437531e-06, + "loss": 0.2875, + "step": 25532 + }, + { + "epoch": 0.5111327978380001, + "grad_norm": 1.0214059352874756, + "learning_rate": 5.062565686884261e-06, + "loss": 0.3139, + "step": 25533 + }, + { + "epoch": 0.5111528163551285, + "grad_norm": 1.1752747297286987, + "learning_rate": 5.0622415291302e-06, + "loss": 0.273, + "step": 25534 + }, + { + "epoch": 0.5111728348722568, + "grad_norm": 1.0286712646484375, + "learning_rate": 5.061917371114488e-06, + "loss": 0.3008, + "step": 25535 + }, + { + "epoch": 0.5111928533893851, + "grad_norm": 1.1043331623077393, + "learning_rate": 5.061593212838489e-06, + "loss": 0.3211, + "step": 25536 + }, + { + "epoch": 0.5112128719065135, + "grad_norm": 1.0543321371078491, + "learning_rate": 5.061269054303565e-06, + "loss": 0.3072, + "step": 25537 + }, + { + "epoch": 0.5112328904236418, + "grad_norm": 1.1700974702835083, + "learning_rate": 5.0609448955110785e-06, + "loss": 0.3272, + "step": 25538 + }, + { + "epoch": 0.5112529089407702, + "grad_norm": 1.0396026372909546, + "learning_rate": 5.0606207364623946e-06, + "loss": 0.3219, + "step": 25539 + }, + { + "epoch": 0.5112729274578985, + "grad_norm": 1.0910346508026123, + "learning_rate": 5.060296577158873e-06, + "loss": 0.3123, + "step": 25540 + }, + { + "epoch": 0.511292945975027, + "grad_norm": 0.97736656665802, + "learning_rate": 5.059972417601878e-06, + "loss": 0.3541, + "step": 25541 + }, + { + "epoch": 0.5113129644921552, + "grad_norm": 1.2178868055343628, + "learning_rate": 5.059648257792772e-06, + "loss": 0.3349, + "step": 25542 + }, + { + "epoch": 0.5113329830092835, + "grad_norm": 1.0817525386810303, + "learning_rate": 5.059324097732917e-06, + "loss": 0.3126, + "step": 25543 + }, + { + "epoch": 0.5113530015264119, + "grad_norm": 1.1581544876098633, + "learning_rate": 5.058999937423677e-06, + "loss": 0.2976, + "step": 25544 + }, + { + "epoch": 0.5113730200435402, + "grad_norm": 1.1502043008804321, + "learning_rate": 5.058675776866413e-06, + "loss": 0.3517, + "step": 25545 + }, + { + "epoch": 0.5113930385606686, + "grad_norm": 1.1070785522460938, + "learning_rate": 5.058351616062489e-06, + "loss": 0.3248, + "step": 25546 + }, + { + "epoch": 0.5114130570777969, + "grad_norm": 1.7307615280151367, + "learning_rate": 5.058027455013268e-06, + "loss": 0.7875, + "step": 25547 + }, + { + "epoch": 0.5114330755949253, + "grad_norm": 1.2855963706970215, + "learning_rate": 5.057703293720111e-06, + "loss": 0.3166, + "step": 25548 + }, + { + "epoch": 0.5114530941120536, + "grad_norm": 1.9531720876693726, + "learning_rate": 5.057379132184383e-06, + "loss": 0.8373, + "step": 25549 + }, + { + "epoch": 0.511473112629182, + "grad_norm": 1.1046481132507324, + "learning_rate": 5.057054970407444e-06, + "loss": 0.3301, + "step": 25550 + }, + { + "epoch": 0.5114931311463103, + "grad_norm": 1.146907925605774, + "learning_rate": 5.056730808390659e-06, + "loss": 0.3097, + "step": 25551 + }, + { + "epoch": 0.5115131496634386, + "grad_norm": 1.034448266029358, + "learning_rate": 5.056406646135391e-06, + "loss": 0.3229, + "step": 25552 + }, + { + "epoch": 0.511533168180567, + "grad_norm": 1.3584507703781128, + "learning_rate": 5.056082483643e-06, + "loss": 0.3274, + "step": 25553 + }, + { + "epoch": 0.5115531866976953, + "grad_norm": 1.2284342050552368, + "learning_rate": 5.055758320914852e-06, + "loss": 0.3046, + "step": 25554 + }, + { + "epoch": 0.5115732052148237, + "grad_norm": 1.149329662322998, + "learning_rate": 5.055434157952306e-06, + "loss": 0.3221, + "step": 25555 + }, + { + "epoch": 0.511593223731952, + "grad_norm": 1.216845989227295, + "learning_rate": 5.055109994756727e-06, + "loss": 0.3587, + "step": 25556 + }, + { + "epoch": 0.5116132422490804, + "grad_norm": 1.7880276441574097, + "learning_rate": 5.0547858313294774e-06, + "loss": 0.7597, + "step": 25557 + }, + { + "epoch": 0.5116332607662087, + "grad_norm": 1.0131412744522095, + "learning_rate": 5.054461667671921e-06, + "loss": 0.2831, + "step": 25558 + }, + { + "epoch": 0.511653279283337, + "grad_norm": 1.8790916204452515, + "learning_rate": 5.054137503785419e-06, + "loss": 0.8387, + "step": 25559 + }, + { + "epoch": 0.5116732978004654, + "grad_norm": 1.0263171195983887, + "learning_rate": 5.0538133396713354e-06, + "loss": 0.289, + "step": 25560 + }, + { + "epoch": 0.5116933163175937, + "grad_norm": 1.1537903547286987, + "learning_rate": 5.0534891753310325e-06, + "loss": 0.309, + "step": 25561 + }, + { + "epoch": 0.5117133348347221, + "grad_norm": 1.2052803039550781, + "learning_rate": 5.05316501076587e-06, + "loss": 0.2865, + "step": 25562 + }, + { + "epoch": 0.5117333533518504, + "grad_norm": 1.0384374856948853, + "learning_rate": 5.052840845977215e-06, + "loss": 0.3198, + "step": 25563 + }, + { + "epoch": 0.5117533718689788, + "grad_norm": 1.0691293478012085, + "learning_rate": 5.052516680966427e-06, + "loss": 0.2561, + "step": 25564 + }, + { + "epoch": 0.5117733903861071, + "grad_norm": 1.091415524482727, + "learning_rate": 5.052192515734873e-06, + "loss": 0.3017, + "step": 25565 + }, + { + "epoch": 0.5117934089032355, + "grad_norm": 1.0409752130508423, + "learning_rate": 5.051868350283911e-06, + "loss": 0.3327, + "step": 25566 + }, + { + "epoch": 0.5118134274203638, + "grad_norm": 1.2521030902862549, + "learning_rate": 5.051544184614906e-06, + "loss": 0.3241, + "step": 25567 + }, + { + "epoch": 0.5118334459374921, + "grad_norm": 2.1039931774139404, + "learning_rate": 5.0512200187292196e-06, + "loss": 0.8285, + "step": 25568 + }, + { + "epoch": 0.5118534644546205, + "grad_norm": 1.1390694379806519, + "learning_rate": 5.050895852628215e-06, + "loss": 0.314, + "step": 25569 + }, + { + "epoch": 0.5118734829717488, + "grad_norm": 1.0868498086929321, + "learning_rate": 5.050571686313257e-06, + "loss": 0.3173, + "step": 25570 + }, + { + "epoch": 0.5118935014888772, + "grad_norm": 1.3020671606063843, + "learning_rate": 5.050247519785705e-06, + "loss": 0.3449, + "step": 25571 + }, + { + "epoch": 0.5119135200060055, + "grad_norm": 1.156526803970337, + "learning_rate": 5.049923353046925e-06, + "loss": 0.3309, + "step": 25572 + }, + { + "epoch": 0.511933538523134, + "grad_norm": 1.166059136390686, + "learning_rate": 5.049599186098276e-06, + "loss": 0.3514, + "step": 25573 + }, + { + "epoch": 0.5119535570402622, + "grad_norm": 1.0509761571884155, + "learning_rate": 5.049275018941125e-06, + "loss": 0.3523, + "step": 25574 + }, + { + "epoch": 0.5119735755573905, + "grad_norm": 1.0985493659973145, + "learning_rate": 5.048950851576829e-06, + "loss": 0.2997, + "step": 25575 + }, + { + "epoch": 0.5119935940745189, + "grad_norm": 1.0922118425369263, + "learning_rate": 5.048626684006755e-06, + "loss": 0.2922, + "step": 25576 + }, + { + "epoch": 0.5120136125916472, + "grad_norm": 1.2707055807113647, + "learning_rate": 5.048302516232266e-06, + "loss": 0.3121, + "step": 25577 + }, + { + "epoch": 0.5120336311087756, + "grad_norm": 1.1209043264389038, + "learning_rate": 5.0479783482547236e-06, + "loss": 0.2916, + "step": 25578 + }, + { + "epoch": 0.5120536496259039, + "grad_norm": 1.0461329221725464, + "learning_rate": 5.047654180075491e-06, + "loss": 0.3025, + "step": 25579 + }, + { + "epoch": 0.5120736681430323, + "grad_norm": 1.0936799049377441, + "learning_rate": 5.04733001169593e-06, + "loss": 0.3157, + "step": 25580 + }, + { + "epoch": 0.5120936866601606, + "grad_norm": 1.281591534614563, + "learning_rate": 5.047005843117405e-06, + "loss": 0.3093, + "step": 25581 + }, + { + "epoch": 0.512113705177289, + "grad_norm": 1.7162503004074097, + "learning_rate": 5.046681674341275e-06, + "loss": 0.7214, + "step": 25582 + }, + { + "epoch": 0.5121337236944173, + "grad_norm": 1.0790672302246094, + "learning_rate": 5.046357505368907e-06, + "loss": 0.3327, + "step": 25583 + }, + { + "epoch": 0.5121537422115456, + "grad_norm": 1.022032380104065, + "learning_rate": 5.046033336201661e-06, + "loss": 0.2634, + "step": 25584 + }, + { + "epoch": 0.512173760728674, + "grad_norm": 1.2109755277633667, + "learning_rate": 5.045709166840902e-06, + "loss": 0.3299, + "step": 25585 + }, + { + "epoch": 0.5121937792458023, + "grad_norm": 1.0658881664276123, + "learning_rate": 5.045384997287991e-06, + "loss": 0.292, + "step": 25586 + }, + { + "epoch": 0.5122137977629307, + "grad_norm": 1.0966242551803589, + "learning_rate": 5.045060827544291e-06, + "loss": 0.2633, + "step": 25587 + }, + { + "epoch": 0.512233816280059, + "grad_norm": 1.2275116443634033, + "learning_rate": 5.044736657611165e-06, + "loss": 0.3644, + "step": 25588 + }, + { + "epoch": 0.5122538347971874, + "grad_norm": 1.1642305850982666, + "learning_rate": 5.044412487489976e-06, + "loss": 0.3105, + "step": 25589 + }, + { + "epoch": 0.5122738533143157, + "grad_norm": 0.9943109154701233, + "learning_rate": 5.044088317182085e-06, + "loss": 0.2977, + "step": 25590 + }, + { + "epoch": 0.512293871831444, + "grad_norm": 1.087925672531128, + "learning_rate": 5.043764146688859e-06, + "loss": 0.2522, + "step": 25591 + }, + { + "epoch": 0.5123138903485724, + "grad_norm": 1.0870611667633057, + "learning_rate": 5.043439976011657e-06, + "loss": 0.3294, + "step": 25592 + }, + { + "epoch": 0.5123339088657007, + "grad_norm": 1.2599834203720093, + "learning_rate": 5.043115805151841e-06, + "loss": 0.3103, + "step": 25593 + }, + { + "epoch": 0.5123539273828291, + "grad_norm": 1.093454360961914, + "learning_rate": 5.042791634110778e-06, + "loss": 0.3453, + "step": 25594 + }, + { + "epoch": 0.5123739458999574, + "grad_norm": 1.2157315015792847, + "learning_rate": 5.042467462889825e-06, + "loss": 0.322, + "step": 25595 + }, + { + "epoch": 0.5123939644170858, + "grad_norm": 1.0891811847686768, + "learning_rate": 5.04214329149035e-06, + "loss": 0.3023, + "step": 25596 + }, + { + "epoch": 0.5124139829342141, + "grad_norm": 1.0860886573791504, + "learning_rate": 5.041819119913713e-06, + "loss": 0.3138, + "step": 25597 + }, + { + "epoch": 0.5124340014513425, + "grad_norm": 1.100143313407898, + "learning_rate": 5.041494948161278e-06, + "loss": 0.3376, + "step": 25598 + }, + { + "epoch": 0.5124540199684708, + "grad_norm": 0.9801263213157654, + "learning_rate": 5.041170776234406e-06, + "loss": 0.2501, + "step": 25599 + }, + { + "epoch": 0.5124740384855991, + "grad_norm": 1.243162989616394, + "learning_rate": 5.040846604134464e-06, + "loss": 0.315, + "step": 25600 + }, + { + "epoch": 0.5124940570027275, + "grad_norm": 1.0633869171142578, + "learning_rate": 5.040522431862809e-06, + "loss": 0.325, + "step": 25601 + }, + { + "epoch": 0.5125140755198558, + "grad_norm": 1.215330958366394, + "learning_rate": 5.040198259420805e-06, + "loss": 0.2741, + "step": 25602 + }, + { + "epoch": 0.5125340940369842, + "grad_norm": 1.5751056671142578, + "learning_rate": 5.0398740868098194e-06, + "loss": 0.2884, + "step": 25603 + }, + { + "epoch": 0.5125541125541125, + "grad_norm": 1.095080018043518, + "learning_rate": 5.03954991403121e-06, + "loss": 0.3528, + "step": 25604 + }, + { + "epoch": 0.512574131071241, + "grad_norm": 1.241660475730896, + "learning_rate": 5.039225741086342e-06, + "loss": 0.3945, + "step": 25605 + }, + { + "epoch": 0.5125941495883692, + "grad_norm": 1.1939133405685425, + "learning_rate": 5.038901567976578e-06, + "loss": 0.3018, + "step": 25606 + }, + { + "epoch": 0.5126141681054975, + "grad_norm": 2.023852825164795, + "learning_rate": 5.038577394703279e-06, + "loss": 0.8334, + "step": 25607 + }, + { + "epoch": 0.5126341866226259, + "grad_norm": 1.0628383159637451, + "learning_rate": 5.038253221267808e-06, + "loss": 0.2854, + "step": 25608 + }, + { + "epoch": 0.5126542051397542, + "grad_norm": 1.097894549369812, + "learning_rate": 5.03792904767153e-06, + "loss": 0.2921, + "step": 25609 + }, + { + "epoch": 0.5126742236568826, + "grad_norm": 1.1994906663894653, + "learning_rate": 5.037604873915807e-06, + "loss": 0.3383, + "step": 25610 + }, + { + "epoch": 0.5126942421740109, + "grad_norm": 1.2363560199737549, + "learning_rate": 5.037280700002001e-06, + "loss": 0.3285, + "step": 25611 + }, + { + "epoch": 0.5127142606911393, + "grad_norm": 1.0858876705169678, + "learning_rate": 5.0369565259314745e-06, + "loss": 0.2929, + "step": 25612 + }, + { + "epoch": 0.5127342792082676, + "grad_norm": 1.0181806087493896, + "learning_rate": 5.036632351705591e-06, + "loss": 0.3363, + "step": 25613 + }, + { + "epoch": 0.512754297725396, + "grad_norm": 1.0092436075210571, + "learning_rate": 5.036308177325712e-06, + "loss": 0.303, + "step": 25614 + }, + { + "epoch": 0.5127743162425243, + "grad_norm": 1.1135571002960205, + "learning_rate": 5.035984002793201e-06, + "loss": 0.284, + "step": 25615 + }, + { + "epoch": 0.5127943347596526, + "grad_norm": 1.2234604358673096, + "learning_rate": 5.0356598281094226e-06, + "loss": 0.2901, + "step": 25616 + }, + { + "epoch": 0.512814353276781, + "grad_norm": 1.8613098859786987, + "learning_rate": 5.035335653275737e-06, + "loss": 0.7491, + "step": 25617 + }, + { + "epoch": 0.5128343717939093, + "grad_norm": 1.1557356119155884, + "learning_rate": 5.035011478293509e-06, + "loss": 0.3052, + "step": 25618 + }, + { + "epoch": 0.5128543903110377, + "grad_norm": 1.1004420518875122, + "learning_rate": 5.0346873031640996e-06, + "loss": 0.3208, + "step": 25619 + }, + { + "epoch": 0.512874408828166, + "grad_norm": 1.1950711011886597, + "learning_rate": 5.034363127888872e-06, + "loss": 0.3259, + "step": 25620 + }, + { + "epoch": 0.5128944273452944, + "grad_norm": 1.231757402420044, + "learning_rate": 5.034038952469188e-06, + "loss": 0.321, + "step": 25621 + }, + { + "epoch": 0.5129144458624227, + "grad_norm": 1.0645942687988281, + "learning_rate": 5.033714776906414e-06, + "loss": 0.2885, + "step": 25622 + }, + { + "epoch": 0.512934464379551, + "grad_norm": 1.3129875659942627, + "learning_rate": 5.03339060120191e-06, + "loss": 0.3119, + "step": 25623 + }, + { + "epoch": 0.5129544828966794, + "grad_norm": 1.843127727508545, + "learning_rate": 5.033066425357037e-06, + "loss": 0.8024, + "step": 25624 + }, + { + "epoch": 0.5129745014138077, + "grad_norm": 1.1879239082336426, + "learning_rate": 5.0327422493731624e-06, + "loss": 0.34, + "step": 25625 + }, + { + "epoch": 0.5129945199309361, + "grad_norm": 1.1946752071380615, + "learning_rate": 5.032418073251645e-06, + "loss": 0.3239, + "step": 25626 + }, + { + "epoch": 0.5130145384480644, + "grad_norm": 1.2046808004379272, + "learning_rate": 5.0320938969938495e-06, + "loss": 0.3148, + "step": 25627 + }, + { + "epoch": 0.5130345569651928, + "grad_norm": 1.102333664894104, + "learning_rate": 5.031769720601136e-06, + "loss": 0.3362, + "step": 25628 + }, + { + "epoch": 0.5130545754823211, + "grad_norm": 1.9070513248443604, + "learning_rate": 5.031445544074873e-06, + "loss": 0.8802, + "step": 25629 + }, + { + "epoch": 0.5130745939994494, + "grad_norm": 1.052936315536499, + "learning_rate": 5.031121367416418e-06, + "loss": 0.325, + "step": 25630 + }, + { + "epoch": 0.5130946125165778, + "grad_norm": 1.1816972494125366, + "learning_rate": 5.030797190627136e-06, + "loss": 0.3172, + "step": 25631 + }, + { + "epoch": 0.5131146310337061, + "grad_norm": 1.1653728485107422, + "learning_rate": 5.030473013708388e-06, + "loss": 0.3483, + "step": 25632 + }, + { + "epoch": 0.5131346495508345, + "grad_norm": 1.098101258277893, + "learning_rate": 5.0301488366615395e-06, + "loss": 0.3469, + "step": 25633 + }, + { + "epoch": 0.5131546680679628, + "grad_norm": 1.1324044466018677, + "learning_rate": 5.0298246594879506e-06, + "loss": 0.306, + "step": 25634 + }, + { + "epoch": 0.5131746865850912, + "grad_norm": 1.2118161916732788, + "learning_rate": 5.029500482188985e-06, + "loss": 0.3021, + "step": 25635 + }, + { + "epoch": 0.5131947051022195, + "grad_norm": 1.307677984237671, + "learning_rate": 5.029176304766007e-06, + "loss": 0.2713, + "step": 25636 + }, + { + "epoch": 0.513214723619348, + "grad_norm": 1.088942527770996, + "learning_rate": 5.0288521272203776e-06, + "loss": 0.2994, + "step": 25637 + }, + { + "epoch": 0.5132347421364762, + "grad_norm": 1.1097006797790527, + "learning_rate": 5.02852794955346e-06, + "loss": 0.3537, + "step": 25638 + }, + { + "epoch": 0.5132547606536045, + "grad_norm": 1.0541188716888428, + "learning_rate": 5.028203771766616e-06, + "loss": 0.3172, + "step": 25639 + }, + { + "epoch": 0.5132747791707329, + "grad_norm": 1.0694109201431274, + "learning_rate": 5.02787959386121e-06, + "loss": 0.2875, + "step": 25640 + }, + { + "epoch": 0.5132947976878612, + "grad_norm": 1.1191924810409546, + "learning_rate": 5.0275554158386055e-06, + "loss": 0.3162, + "step": 25641 + }, + { + "epoch": 0.5133148162049896, + "grad_norm": 0.9522697329521179, + "learning_rate": 5.027231237700163e-06, + "loss": 0.2924, + "step": 25642 + }, + { + "epoch": 0.5133348347221179, + "grad_norm": 1.114063024520874, + "learning_rate": 5.0269070594472465e-06, + "loss": 0.315, + "step": 25643 + }, + { + "epoch": 0.5133548532392463, + "grad_norm": 1.0441962480545044, + "learning_rate": 5.026582881081219e-06, + "loss": 0.3391, + "step": 25644 + }, + { + "epoch": 0.5133748717563746, + "grad_norm": 2.056522846221924, + "learning_rate": 5.026258702603442e-06, + "loss": 0.7938, + "step": 25645 + }, + { + "epoch": 0.5133948902735029, + "grad_norm": 1.1318482160568237, + "learning_rate": 5.025934524015279e-06, + "loss": 0.304, + "step": 25646 + }, + { + "epoch": 0.5134149087906313, + "grad_norm": 1.1154754161834717, + "learning_rate": 5.025610345318093e-06, + "loss": 0.3759, + "step": 25647 + }, + { + "epoch": 0.5134349273077596, + "grad_norm": 1.0377317667007446, + "learning_rate": 5.025286166513246e-06, + "loss": 0.3019, + "step": 25648 + }, + { + "epoch": 0.513454945824888, + "grad_norm": 1.2534829378128052, + "learning_rate": 5.024961987602103e-06, + "loss": 0.3142, + "step": 25649 + }, + { + "epoch": 0.5134749643420163, + "grad_norm": 1.0830519199371338, + "learning_rate": 5.024637808586022e-06, + "loss": 0.2681, + "step": 25650 + }, + { + "epoch": 0.5134949828591447, + "grad_norm": 1.177004098892212, + "learning_rate": 5.024313629466373e-06, + "loss": 0.3345, + "step": 25651 + }, + { + "epoch": 0.513515001376273, + "grad_norm": 1.9973204135894775, + "learning_rate": 5.023989450244512e-06, + "loss": 0.8277, + "step": 25652 + }, + { + "epoch": 0.5135350198934014, + "grad_norm": 1.0913710594177246, + "learning_rate": 5.023665270921805e-06, + "loss": 0.3385, + "step": 25653 + }, + { + "epoch": 0.5135550384105297, + "grad_norm": 1.0830076932907104, + "learning_rate": 5.023341091499616e-06, + "loss": 0.3104, + "step": 25654 + }, + { + "epoch": 0.513575056927658, + "grad_norm": 2.0686964988708496, + "learning_rate": 5.023016911979304e-06, + "loss": 0.7905, + "step": 25655 + }, + { + "epoch": 0.5135950754447864, + "grad_norm": 1.0978212356567383, + "learning_rate": 5.022692732362235e-06, + "loss": 0.3066, + "step": 25656 + }, + { + "epoch": 0.5136150939619147, + "grad_norm": 1.063129186630249, + "learning_rate": 5.02236855264977e-06, + "loss": 0.3242, + "step": 25657 + }, + { + "epoch": 0.5136351124790431, + "grad_norm": 1.0154839754104614, + "learning_rate": 5.022044372843273e-06, + "loss": 0.2626, + "step": 25658 + }, + { + "epoch": 0.5136551309961714, + "grad_norm": 1.0588370561599731, + "learning_rate": 5.021720192944106e-06, + "loss": 0.2869, + "step": 25659 + }, + { + "epoch": 0.5136751495132998, + "grad_norm": 1.0923632383346558, + "learning_rate": 5.021396012953631e-06, + "loss": 0.3138, + "step": 25660 + }, + { + "epoch": 0.5136951680304281, + "grad_norm": 1.1590443849563599, + "learning_rate": 5.021071832873213e-06, + "loss": 0.3173, + "step": 25661 + }, + { + "epoch": 0.5137151865475564, + "grad_norm": 1.3566539287567139, + "learning_rate": 5.0207476527042136e-06, + "loss": 0.3166, + "step": 25662 + }, + { + "epoch": 0.5137352050646848, + "grad_norm": 0.9753929376602173, + "learning_rate": 5.020423472447995e-06, + "loss": 0.2862, + "step": 25663 + }, + { + "epoch": 0.5137552235818131, + "grad_norm": 1.0525234937667847, + "learning_rate": 5.0200992921059215e-06, + "loss": 0.3432, + "step": 25664 + }, + { + "epoch": 0.5137752420989415, + "grad_norm": 1.0582365989685059, + "learning_rate": 5.019775111679352e-06, + "loss": 0.3327, + "step": 25665 + }, + { + "epoch": 0.5137952606160698, + "grad_norm": 1.2299814224243164, + "learning_rate": 5.019450931169655e-06, + "loss": 0.301, + "step": 25666 + }, + { + "epoch": 0.5138152791331982, + "grad_norm": 1.982265830039978, + "learning_rate": 5.01912675057819e-06, + "loss": 0.827, + "step": 25667 + }, + { + "epoch": 0.5138352976503265, + "grad_norm": 1.8201333284378052, + "learning_rate": 5.018802569906319e-06, + "loss": 0.8396, + "step": 25668 + }, + { + "epoch": 0.513855316167455, + "grad_norm": 1.275664210319519, + "learning_rate": 5.018478389155408e-06, + "loss": 0.3483, + "step": 25669 + }, + { + "epoch": 0.5138753346845832, + "grad_norm": 1.1993790864944458, + "learning_rate": 5.018154208326816e-06, + "loss": 0.3034, + "step": 25670 + }, + { + "epoch": 0.5138953532017115, + "grad_norm": 1.190858006477356, + "learning_rate": 5.017830027421909e-06, + "loss": 0.3012, + "step": 25671 + }, + { + "epoch": 0.5139153717188399, + "grad_norm": 1.1346020698547363, + "learning_rate": 5.017505846442048e-06, + "loss": 0.3543, + "step": 25672 + }, + { + "epoch": 0.5139353902359682, + "grad_norm": 1.0249358415603638, + "learning_rate": 5.017181665388595e-06, + "loss": 0.2887, + "step": 25673 + }, + { + "epoch": 0.5139554087530966, + "grad_norm": 1.2280203104019165, + "learning_rate": 5.016857484262916e-06, + "loss": 0.3091, + "step": 25674 + }, + { + "epoch": 0.5139754272702249, + "grad_norm": 1.065323829650879, + "learning_rate": 5.016533303066371e-06, + "loss": 0.302, + "step": 25675 + }, + { + "epoch": 0.5139954457873533, + "grad_norm": 0.9991739988327026, + "learning_rate": 5.016209121800324e-06, + "loss": 0.2458, + "step": 25676 + }, + { + "epoch": 0.5140154643044816, + "grad_norm": 1.0620343685150146, + "learning_rate": 5.0158849404661355e-06, + "loss": 0.2987, + "step": 25677 + }, + { + "epoch": 0.5140354828216099, + "grad_norm": 1.1729931831359863, + "learning_rate": 5.015560759065171e-06, + "loss": 0.2924, + "step": 25678 + }, + { + "epoch": 0.5140555013387383, + "grad_norm": 1.072574257850647, + "learning_rate": 5.015236577598792e-06, + "loss": 0.2903, + "step": 25679 + }, + { + "epoch": 0.5140755198558666, + "grad_norm": 0.962878406047821, + "learning_rate": 5.014912396068363e-06, + "loss": 0.3154, + "step": 25680 + }, + { + "epoch": 0.514095538372995, + "grad_norm": 1.096840500831604, + "learning_rate": 5.014588214475245e-06, + "loss": 0.3025, + "step": 25681 + }, + { + "epoch": 0.5141155568901233, + "grad_norm": 1.1729270219802856, + "learning_rate": 5.014264032820802e-06, + "loss": 0.344, + "step": 25682 + }, + { + "epoch": 0.5141355754072517, + "grad_norm": 1.073595404624939, + "learning_rate": 5.013939851106394e-06, + "loss": 0.3247, + "step": 25683 + }, + { + "epoch": 0.51415559392438, + "grad_norm": 1.0355232954025269, + "learning_rate": 5.0136156693333885e-06, + "loss": 0.3436, + "step": 25684 + }, + { + "epoch": 0.5141756124415084, + "grad_norm": 1.0863631963729858, + "learning_rate": 5.0132914875031436e-06, + "loss": 0.3314, + "step": 25685 + }, + { + "epoch": 0.5141956309586367, + "grad_norm": 1.0840964317321777, + "learning_rate": 5.012967305617025e-06, + "loss": 0.3056, + "step": 25686 + }, + { + "epoch": 0.514215649475765, + "grad_norm": 1.093269944190979, + "learning_rate": 5.012643123676395e-06, + "loss": 0.332, + "step": 25687 + }, + { + "epoch": 0.5142356679928934, + "grad_norm": 0.9802899360656738, + "learning_rate": 5.012318941682615e-06, + "loss": 0.262, + "step": 25688 + }, + { + "epoch": 0.5142556865100217, + "grad_norm": 1.0721486806869507, + "learning_rate": 5.01199475963705e-06, + "loss": 0.3285, + "step": 25689 + }, + { + "epoch": 0.5142757050271501, + "grad_norm": 0.9991018772125244, + "learning_rate": 5.01167057754106e-06, + "loss": 0.3134, + "step": 25690 + }, + { + "epoch": 0.5142957235442784, + "grad_norm": 1.0928740501403809, + "learning_rate": 5.01134639539601e-06, + "loss": 0.3005, + "step": 25691 + }, + { + "epoch": 0.5143157420614068, + "grad_norm": 1.9077714681625366, + "learning_rate": 5.0110222132032625e-06, + "loss": 0.8605, + "step": 25692 + }, + { + "epoch": 0.5143357605785351, + "grad_norm": 1.0237680673599243, + "learning_rate": 5.010698030964181e-06, + "loss": 0.2964, + "step": 25693 + }, + { + "epoch": 0.5143557790956634, + "grad_norm": 1.2433862686157227, + "learning_rate": 5.0103738486801254e-06, + "loss": 0.3363, + "step": 25694 + }, + { + "epoch": 0.5143757976127918, + "grad_norm": 1.1174261569976807, + "learning_rate": 5.010049666352462e-06, + "loss": 0.3647, + "step": 25695 + }, + { + "epoch": 0.5143958161299201, + "grad_norm": 1.0993181467056274, + "learning_rate": 5.009725483982551e-06, + "loss": 0.3429, + "step": 25696 + }, + { + "epoch": 0.5144158346470485, + "grad_norm": 1.0428147315979004, + "learning_rate": 5.009401301571757e-06, + "loss": 0.3511, + "step": 25697 + }, + { + "epoch": 0.5144358531641768, + "grad_norm": 1.0281131267547607, + "learning_rate": 5.0090771191214405e-06, + "loss": 0.287, + "step": 25698 + }, + { + "epoch": 0.5144558716813052, + "grad_norm": 1.0388129949569702, + "learning_rate": 5.008752936632967e-06, + "loss": 0.3367, + "step": 25699 + }, + { + "epoch": 0.5144758901984335, + "grad_norm": 1.9038392305374146, + "learning_rate": 5.008428754107698e-06, + "loss": 0.7796, + "step": 25700 + }, + { + "epoch": 0.514495908715562, + "grad_norm": 1.0063552856445312, + "learning_rate": 5.008104571546995e-06, + "loss": 0.2917, + "step": 25701 + }, + { + "epoch": 0.5145159272326902, + "grad_norm": 1.0788737535476685, + "learning_rate": 5.007780388952224e-06, + "loss": 0.2632, + "step": 25702 + }, + { + "epoch": 0.5145359457498185, + "grad_norm": 1.0020192861557007, + "learning_rate": 5.0074562063247445e-06, + "loss": 0.2904, + "step": 25703 + }, + { + "epoch": 0.5145559642669469, + "grad_norm": 1.158579707145691, + "learning_rate": 5.00713202366592e-06, + "loss": 0.3357, + "step": 25704 + }, + { + "epoch": 0.5145759827840752, + "grad_norm": 1.3278790712356567, + "learning_rate": 5.006807840977116e-06, + "loss": 0.3506, + "step": 25705 + }, + { + "epoch": 0.5145960013012036, + "grad_norm": 1.0315041542053223, + "learning_rate": 5.006483658259694e-06, + "loss": 0.2958, + "step": 25706 + }, + { + "epoch": 0.5146160198183319, + "grad_norm": 1.0343420505523682, + "learning_rate": 5.006159475515015e-06, + "loss": 0.2936, + "step": 25707 + }, + { + "epoch": 0.5146360383354603, + "grad_norm": 1.1108994483947754, + "learning_rate": 5.005835292744441e-06, + "loss": 0.316, + "step": 25708 + }, + { + "epoch": 0.5146560568525886, + "grad_norm": 1.0645185708999634, + "learning_rate": 5.0055111099493394e-06, + "loss": 0.3038, + "step": 25709 + }, + { + "epoch": 0.5146760753697169, + "grad_norm": 1.0400007963180542, + "learning_rate": 5.005186927131068e-06, + "loss": 0.3513, + "step": 25710 + }, + { + "epoch": 0.5146960938868453, + "grad_norm": 1.0627167224884033, + "learning_rate": 5.004862744290993e-06, + "loss": 0.3015, + "step": 25711 + }, + { + "epoch": 0.5147161124039736, + "grad_norm": 1.7675572633743286, + "learning_rate": 5.004538561430475e-06, + "loss": 0.8015, + "step": 25712 + }, + { + "epoch": 0.514736130921102, + "grad_norm": 1.016926884651184, + "learning_rate": 5.00421437855088e-06, + "loss": 0.3063, + "step": 25713 + }, + { + "epoch": 0.5147561494382303, + "grad_norm": 1.2514172792434692, + "learning_rate": 5.003890195653567e-06, + "loss": 0.3379, + "step": 25714 + }, + { + "epoch": 0.5147761679553587, + "grad_norm": 1.2359611988067627, + "learning_rate": 5.0035660127399025e-06, + "loss": 0.3312, + "step": 25715 + }, + { + "epoch": 0.514796186472487, + "grad_norm": 1.9824248552322388, + "learning_rate": 5.003241829811244e-06, + "loss": 0.7867, + "step": 25716 + }, + { + "epoch": 0.5148162049896154, + "grad_norm": 1.2454884052276611, + "learning_rate": 5.002917646868959e-06, + "loss": 0.3203, + "step": 25717 + }, + { + "epoch": 0.5148362235067437, + "grad_norm": 1.0971341133117676, + "learning_rate": 5.00259346391441e-06, + "loss": 0.3445, + "step": 25718 + }, + { + "epoch": 0.514856242023872, + "grad_norm": 1.2963005304336548, + "learning_rate": 5.002269280948957e-06, + "loss": 0.2835, + "step": 25719 + }, + { + "epoch": 0.5148762605410004, + "grad_norm": 1.2078254222869873, + "learning_rate": 5.001945097973966e-06, + "loss": 0.3603, + "step": 25720 + }, + { + "epoch": 0.5148962790581287, + "grad_norm": 1.2488137483596802, + "learning_rate": 5.001620914990796e-06, + "loss": 0.3291, + "step": 25721 + }, + { + "epoch": 0.5149162975752571, + "grad_norm": 1.2539924383163452, + "learning_rate": 5.001296732000815e-06, + "loss": 0.2907, + "step": 25722 + }, + { + "epoch": 0.5149363160923854, + "grad_norm": 1.1180334091186523, + "learning_rate": 5.0009725490053815e-06, + "loss": 0.3531, + "step": 25723 + }, + { + "epoch": 0.5149563346095138, + "grad_norm": 1.0830539464950562, + "learning_rate": 5.000648366005859e-06, + "loss": 0.3297, + "step": 25724 + }, + { + "epoch": 0.5149763531266421, + "grad_norm": 1.0599935054779053, + "learning_rate": 5.000324183003611e-06, + "loss": 0.3044, + "step": 25725 + }, + { + "epoch": 0.5149963716437704, + "grad_norm": 1.7458298206329346, + "learning_rate": 5e-06, + "loss": 0.7699, + "step": 25726 + }, + { + "epoch": 0.5150163901608988, + "grad_norm": 1.0699564218521118, + "learning_rate": 4.999675816996391e-06, + "loss": 0.296, + "step": 25727 + }, + { + "epoch": 0.5150364086780271, + "grad_norm": 1.101041555404663, + "learning_rate": 4.999351633994142e-06, + "loss": 0.3152, + "step": 25728 + }, + { + "epoch": 0.5150564271951555, + "grad_norm": 2.0778286457061768, + "learning_rate": 4.99902745099462e-06, + "loss": 0.737, + "step": 25729 + }, + { + "epoch": 0.5150764457122838, + "grad_norm": 1.158022403717041, + "learning_rate": 4.998703267999186e-06, + "loss": 0.3325, + "step": 25730 + }, + { + "epoch": 0.5150964642294122, + "grad_norm": 1.0579980611801147, + "learning_rate": 4.998379085009204e-06, + "loss": 0.2517, + "step": 25731 + }, + { + "epoch": 0.5151164827465405, + "grad_norm": 1.1713029146194458, + "learning_rate": 4.998054902026036e-06, + "loss": 0.3378, + "step": 25732 + }, + { + "epoch": 0.5151365012636689, + "grad_norm": 1.0844916105270386, + "learning_rate": 4.997730719051043e-06, + "loss": 0.2919, + "step": 25733 + }, + { + "epoch": 0.5151565197807972, + "grad_norm": 1.420920729637146, + "learning_rate": 4.997406536085591e-06, + "loss": 0.2898, + "step": 25734 + }, + { + "epoch": 0.5151765382979255, + "grad_norm": 1.0903762578964233, + "learning_rate": 4.997082353131042e-06, + "loss": 0.295, + "step": 25735 + }, + { + "epoch": 0.5151965568150539, + "grad_norm": 1.035103678703308, + "learning_rate": 4.996758170188757e-06, + "loss": 0.263, + "step": 25736 + }, + { + "epoch": 0.5152165753321822, + "grad_norm": 1.1901891231536865, + "learning_rate": 4.996433987260101e-06, + "loss": 0.2494, + "step": 25737 + }, + { + "epoch": 0.5152365938493106, + "grad_norm": 1.1198593378067017, + "learning_rate": 4.996109804346434e-06, + "loss": 0.3221, + "step": 25738 + }, + { + "epoch": 0.5152566123664389, + "grad_norm": 1.1064494848251343, + "learning_rate": 4.995785621449122e-06, + "loss": 0.3163, + "step": 25739 + }, + { + "epoch": 0.5152766308835673, + "grad_norm": 1.1483861207962036, + "learning_rate": 4.995461438569526e-06, + "loss": 0.2488, + "step": 25740 + }, + { + "epoch": 0.5152966494006956, + "grad_norm": 1.1319085359573364, + "learning_rate": 4.995137255709009e-06, + "loss": 0.3206, + "step": 25741 + }, + { + "epoch": 0.5153166679178239, + "grad_norm": 1.1068729162216187, + "learning_rate": 4.994813072868933e-06, + "loss": 0.3173, + "step": 25742 + }, + { + "epoch": 0.5153366864349523, + "grad_norm": 1.299764633178711, + "learning_rate": 4.994488890050663e-06, + "loss": 0.322, + "step": 25743 + }, + { + "epoch": 0.5153567049520806, + "grad_norm": 1.2461528778076172, + "learning_rate": 4.99416470725556e-06, + "loss": 0.2751, + "step": 25744 + }, + { + "epoch": 0.515376723469209, + "grad_norm": 1.0949982404708862, + "learning_rate": 4.993840524484989e-06, + "loss": 0.2773, + "step": 25745 + }, + { + "epoch": 0.5153967419863373, + "grad_norm": 1.1158967018127441, + "learning_rate": 4.993516341740307e-06, + "loss": 0.3377, + "step": 25746 + }, + { + "epoch": 0.5154167605034657, + "grad_norm": 1.0931131839752197, + "learning_rate": 4.993192159022885e-06, + "loss": 0.2903, + "step": 25747 + }, + { + "epoch": 0.515436779020594, + "grad_norm": 1.147098183631897, + "learning_rate": 4.992867976334081e-06, + "loss": 0.3256, + "step": 25748 + }, + { + "epoch": 0.5154567975377224, + "grad_norm": 1.041059970855713, + "learning_rate": 4.992543793675257e-06, + "loss": 0.3109, + "step": 25749 + }, + { + "epoch": 0.5154768160548507, + "grad_norm": 1.1064152717590332, + "learning_rate": 4.992219611047779e-06, + "loss": 0.3319, + "step": 25750 + }, + { + "epoch": 0.515496834571979, + "grad_norm": 1.0292807817459106, + "learning_rate": 4.9918954284530054e-06, + "loss": 0.3301, + "step": 25751 + }, + { + "epoch": 0.5155168530891074, + "grad_norm": 1.1490375995635986, + "learning_rate": 4.991571245892304e-06, + "loss": 0.3518, + "step": 25752 + }, + { + "epoch": 0.5155368716062357, + "grad_norm": 1.0286537408828735, + "learning_rate": 4.991247063367035e-06, + "loss": 0.3127, + "step": 25753 + }, + { + "epoch": 0.5155568901233641, + "grad_norm": 1.4460729360580444, + "learning_rate": 4.990922880878561e-06, + "loss": 0.3238, + "step": 25754 + }, + { + "epoch": 0.5155769086404924, + "grad_norm": 1.7253021001815796, + "learning_rate": 4.990598698428245e-06, + "loss": 0.7431, + "step": 25755 + }, + { + "epoch": 0.5155969271576208, + "grad_norm": 1.072996973991394, + "learning_rate": 4.9902745160174495e-06, + "loss": 0.2926, + "step": 25756 + }, + { + "epoch": 0.5156169456747491, + "grad_norm": 1.0191994905471802, + "learning_rate": 4.9899503336475396e-06, + "loss": 0.3014, + "step": 25757 + }, + { + "epoch": 0.5156369641918774, + "grad_norm": 1.1597601175308228, + "learning_rate": 4.989626151319876e-06, + "loss": 0.3338, + "step": 25758 + }, + { + "epoch": 0.5156569827090058, + "grad_norm": 1.208725094795227, + "learning_rate": 4.9893019690358215e-06, + "loss": 0.3247, + "step": 25759 + }, + { + "epoch": 0.5156770012261341, + "grad_norm": 0.9924926161766052, + "learning_rate": 4.988977786796738e-06, + "loss": 0.2993, + "step": 25760 + }, + { + "epoch": 0.5156970197432625, + "grad_norm": 1.0194748640060425, + "learning_rate": 4.988653604603991e-06, + "loss": 0.2919, + "step": 25761 + }, + { + "epoch": 0.5157170382603908, + "grad_norm": 1.4371508359909058, + "learning_rate": 4.9883294224589415e-06, + "loss": 0.3518, + "step": 25762 + }, + { + "epoch": 0.5157370567775192, + "grad_norm": 1.8079692125320435, + "learning_rate": 4.988005240362953e-06, + "loss": 0.8308, + "step": 25763 + }, + { + "epoch": 0.5157570752946475, + "grad_norm": 1.087748646736145, + "learning_rate": 4.987681058317386e-06, + "loss": 0.3468, + "step": 25764 + }, + { + "epoch": 0.5157770938117759, + "grad_norm": 1.0808427333831787, + "learning_rate": 4.987356876323607e-06, + "loss": 0.2971, + "step": 25765 + }, + { + "epoch": 0.5157971123289042, + "grad_norm": 1.0847985744476318, + "learning_rate": 4.987032694382977e-06, + "loss": 0.3049, + "step": 25766 + }, + { + "epoch": 0.5158171308460325, + "grad_norm": 1.0385459661483765, + "learning_rate": 4.986708512496858e-06, + "loss": 0.2674, + "step": 25767 + }, + { + "epoch": 0.5158371493631609, + "grad_norm": 1.318321943283081, + "learning_rate": 4.986384330666614e-06, + "loss": 0.3197, + "step": 25768 + }, + { + "epoch": 0.5158571678802892, + "grad_norm": 1.096990704536438, + "learning_rate": 4.986060148893606e-06, + "loss": 0.3165, + "step": 25769 + }, + { + "epoch": 0.5158771863974176, + "grad_norm": 2.009343147277832, + "learning_rate": 4.985735967179199e-06, + "loss": 0.677, + "step": 25770 + }, + { + "epoch": 0.5158972049145459, + "grad_norm": 1.2704925537109375, + "learning_rate": 4.985411785524756e-06, + "loss": 0.3199, + "step": 25771 + }, + { + "epoch": 0.5159172234316743, + "grad_norm": 1.0828090906143188, + "learning_rate": 4.98508760393164e-06, + "loss": 0.2976, + "step": 25772 + }, + { + "epoch": 0.5159372419488026, + "grad_norm": 1.036362886428833, + "learning_rate": 4.984763422401208e-06, + "loss": 0.2921, + "step": 25773 + }, + { + "epoch": 0.5159572604659309, + "grad_norm": 1.0021618604660034, + "learning_rate": 4.984439240934831e-06, + "loss": 0.2617, + "step": 25774 + }, + { + "epoch": 0.5159772789830593, + "grad_norm": 1.1552408933639526, + "learning_rate": 4.984115059533865e-06, + "loss": 0.3659, + "step": 25775 + }, + { + "epoch": 0.5159972975001876, + "grad_norm": 1.125182032585144, + "learning_rate": 4.983790878199679e-06, + "loss": 0.3274, + "step": 25776 + }, + { + "epoch": 0.516017316017316, + "grad_norm": 1.0283634662628174, + "learning_rate": 4.9834666969336295e-06, + "loss": 0.2667, + "step": 25777 + }, + { + "epoch": 0.5160373345344443, + "grad_norm": 1.2014304399490356, + "learning_rate": 4.983142515737085e-06, + "loss": 0.3334, + "step": 25778 + }, + { + "epoch": 0.5160573530515727, + "grad_norm": 1.0719443559646606, + "learning_rate": 4.982818334611406e-06, + "loss": 0.3234, + "step": 25779 + }, + { + "epoch": 0.516077371568701, + "grad_norm": 0.97191321849823, + "learning_rate": 4.982494153557954e-06, + "loss": 0.2926, + "step": 25780 + }, + { + "epoch": 0.5160973900858294, + "grad_norm": 1.093325138092041, + "learning_rate": 4.9821699725780935e-06, + "loss": 0.3171, + "step": 25781 + }, + { + "epoch": 0.5161174086029577, + "grad_norm": 1.110864281654358, + "learning_rate": 4.981845791673184e-06, + "loss": 0.2899, + "step": 25782 + }, + { + "epoch": 0.516137427120086, + "grad_norm": 1.071315884590149, + "learning_rate": 4.981521610844593e-06, + "loss": 0.2757, + "step": 25783 + }, + { + "epoch": 0.5161574456372144, + "grad_norm": 1.076446533203125, + "learning_rate": 4.981197430093682e-06, + "loss": 0.3023, + "step": 25784 + }, + { + "epoch": 0.5161774641543427, + "grad_norm": 1.0502029657363892, + "learning_rate": 4.980873249421813e-06, + "loss": 0.2928, + "step": 25785 + }, + { + "epoch": 0.5161974826714711, + "grad_norm": 0.9792867302894592, + "learning_rate": 4.980549068830346e-06, + "loss": 0.277, + "step": 25786 + }, + { + "epoch": 0.5162175011885994, + "grad_norm": 1.0792863368988037, + "learning_rate": 4.980224888320648e-06, + "loss": 0.2849, + "step": 25787 + }, + { + "epoch": 0.5162375197057278, + "grad_norm": 1.149122953414917, + "learning_rate": 4.979900707894081e-06, + "loss": 0.3232, + "step": 25788 + }, + { + "epoch": 0.5162575382228561, + "grad_norm": 1.9902997016906738, + "learning_rate": 4.979576527552007e-06, + "loss": 0.7803, + "step": 25789 + }, + { + "epoch": 0.5162775567399844, + "grad_norm": 1.1242347955703735, + "learning_rate": 4.979252347295789e-06, + "loss": 0.3133, + "step": 25790 + }, + { + "epoch": 0.5162975752571128, + "grad_norm": 1.0660605430603027, + "learning_rate": 4.978928167126787e-06, + "loss": 0.3008, + "step": 25791 + }, + { + "epoch": 0.5163175937742411, + "grad_norm": 1.148808240890503, + "learning_rate": 4.97860398704637e-06, + "loss": 0.3324, + "step": 25792 + }, + { + "epoch": 0.5163376122913695, + "grad_norm": 1.1133372783660889, + "learning_rate": 4.978279807055896e-06, + "loss": 0.3344, + "step": 25793 + }, + { + "epoch": 0.5163576308084978, + "grad_norm": 1.2090632915496826, + "learning_rate": 4.97795562715673e-06, + "loss": 0.354, + "step": 25794 + }, + { + "epoch": 0.5163776493256262, + "grad_norm": 1.1508537530899048, + "learning_rate": 4.97763144735023e-06, + "loss": 0.3553, + "step": 25795 + }, + { + "epoch": 0.5163976678427545, + "grad_norm": 1.1627343893051147, + "learning_rate": 4.977307267637766e-06, + "loss": 0.3439, + "step": 25796 + }, + { + "epoch": 0.5164176863598829, + "grad_norm": 1.1179287433624268, + "learning_rate": 4.976983088020698e-06, + "loss": 0.2991, + "step": 25797 + }, + { + "epoch": 0.5164377048770112, + "grad_norm": 1.891797661781311, + "learning_rate": 4.976658908500387e-06, + "loss": 0.7726, + "step": 25798 + }, + { + "epoch": 0.5164577233941395, + "grad_norm": 1.1209443807601929, + "learning_rate": 4.976334729078195e-06, + "loss": 0.3647, + "step": 25799 + }, + { + "epoch": 0.5164777419112679, + "grad_norm": 1.0720226764678955, + "learning_rate": 4.9760105497554885e-06, + "loss": 0.3675, + "step": 25800 + }, + { + "epoch": 0.5164977604283962, + "grad_norm": 1.1108665466308594, + "learning_rate": 4.975686370533628e-06, + "loss": 0.3533, + "step": 25801 + }, + { + "epoch": 0.5165177789455246, + "grad_norm": 1.1516855955123901, + "learning_rate": 4.9753621914139785e-06, + "loss": 0.3192, + "step": 25802 + }, + { + "epoch": 0.5165377974626529, + "grad_norm": 1.1192877292633057, + "learning_rate": 4.9750380123979006e-06, + "loss": 0.3313, + "step": 25803 + }, + { + "epoch": 0.5165578159797813, + "grad_norm": 1.091898798942566, + "learning_rate": 4.974713833486755e-06, + "loss": 0.3123, + "step": 25804 + }, + { + "epoch": 0.5165778344969096, + "grad_norm": 1.0141124725341797, + "learning_rate": 4.974389654681909e-06, + "loss": 0.284, + "step": 25805 + }, + { + "epoch": 0.5165978530140379, + "grad_norm": 1.134352445602417, + "learning_rate": 4.974065475984722e-06, + "loss": 0.3101, + "step": 25806 + }, + { + "epoch": 0.5166178715311663, + "grad_norm": 1.1690119504928589, + "learning_rate": 4.97374129739656e-06, + "loss": 0.2946, + "step": 25807 + }, + { + "epoch": 0.5166378900482946, + "grad_norm": 1.0482441186904907, + "learning_rate": 4.973417118918782e-06, + "loss": 0.3245, + "step": 25808 + }, + { + "epoch": 0.516657908565423, + "grad_norm": 1.0965477228164673, + "learning_rate": 4.973092940552754e-06, + "loss": 0.3034, + "step": 25809 + }, + { + "epoch": 0.5166779270825513, + "grad_norm": 1.1512525081634521, + "learning_rate": 4.972768762299839e-06, + "loss": 0.3279, + "step": 25810 + }, + { + "epoch": 0.5166979455996797, + "grad_norm": 1.2393244504928589, + "learning_rate": 4.972444584161397e-06, + "loss": 0.32, + "step": 25811 + }, + { + "epoch": 0.516717964116808, + "grad_norm": 1.2031114101409912, + "learning_rate": 4.9721204061387905e-06, + "loss": 0.306, + "step": 25812 + }, + { + "epoch": 0.5167379826339364, + "grad_norm": 0.936674177646637, + "learning_rate": 4.9717962282333845e-06, + "loss": 0.2683, + "step": 25813 + }, + { + "epoch": 0.5167580011510647, + "grad_norm": 1.1386712789535522, + "learning_rate": 4.971472050446541e-06, + "loss": 0.3258, + "step": 25814 + }, + { + "epoch": 0.516778019668193, + "grad_norm": 1.243578553199768, + "learning_rate": 4.971147872779624e-06, + "loss": 0.31, + "step": 25815 + }, + { + "epoch": 0.5167980381853214, + "grad_norm": 1.112734317779541, + "learning_rate": 4.970823695233995e-06, + "loss": 0.3124, + "step": 25816 + }, + { + "epoch": 0.5168180567024497, + "grad_norm": 1.125736117362976, + "learning_rate": 4.970499517811015e-06, + "loss": 0.2872, + "step": 25817 + }, + { + "epoch": 0.5168380752195781, + "grad_norm": 1.1749917268753052, + "learning_rate": 4.970175340512051e-06, + "loss": 0.2847, + "step": 25818 + }, + { + "epoch": 0.5168580937367064, + "grad_norm": 1.1758641004562378, + "learning_rate": 4.969851163338462e-06, + "loss": 0.3355, + "step": 25819 + }, + { + "epoch": 0.5168781122538348, + "grad_norm": 1.0760085582733154, + "learning_rate": 4.969526986291614e-06, + "loss": 0.282, + "step": 25820 + }, + { + "epoch": 0.5168981307709631, + "grad_norm": 1.0910977125167847, + "learning_rate": 4.969202809372867e-06, + "loss": 0.3225, + "step": 25821 + }, + { + "epoch": 0.5169181492880914, + "grad_norm": 1.1534276008605957, + "learning_rate": 4.968878632583583e-06, + "loss": 0.2946, + "step": 25822 + }, + { + "epoch": 0.5169381678052198, + "grad_norm": 1.230285882949829, + "learning_rate": 4.968554455925128e-06, + "loss": 0.3134, + "step": 25823 + }, + { + "epoch": 0.5169581863223481, + "grad_norm": 1.0953842401504517, + "learning_rate": 4.968230279398865e-06, + "loss": 0.3265, + "step": 25824 + }, + { + "epoch": 0.5169782048394765, + "grad_norm": 1.6022186279296875, + "learning_rate": 4.967906103006153e-06, + "loss": 0.3343, + "step": 25825 + }, + { + "epoch": 0.5169982233566048, + "grad_norm": 1.208915114402771, + "learning_rate": 4.9675819267483565e-06, + "loss": 0.344, + "step": 25826 + }, + { + "epoch": 0.5170182418737332, + "grad_norm": 0.9698368906974792, + "learning_rate": 4.967257750626838e-06, + "loss": 0.296, + "step": 25827 + }, + { + "epoch": 0.5170382603908615, + "grad_norm": 0.9853977560997009, + "learning_rate": 4.966933574642964e-06, + "loss": 0.2971, + "step": 25828 + }, + { + "epoch": 0.5170582789079899, + "grad_norm": 1.196560263633728, + "learning_rate": 4.966609398798093e-06, + "loss": 0.3151, + "step": 25829 + }, + { + "epoch": 0.5170782974251182, + "grad_norm": 1.1799553632736206, + "learning_rate": 4.966285223093587e-06, + "loss": 0.2692, + "step": 25830 + }, + { + "epoch": 0.5170983159422465, + "grad_norm": 1.138655424118042, + "learning_rate": 4.965961047530812e-06, + "loss": 0.3028, + "step": 25831 + }, + { + "epoch": 0.5171183344593749, + "grad_norm": 1.0294216871261597, + "learning_rate": 4.965636872111129e-06, + "loss": 0.3287, + "step": 25832 + }, + { + "epoch": 0.5171383529765032, + "grad_norm": 1.098156452178955, + "learning_rate": 4.965312696835902e-06, + "loss": 0.313, + "step": 25833 + }, + { + "epoch": 0.5171583714936316, + "grad_norm": 1.0208522081375122, + "learning_rate": 4.964988521706493e-06, + "loss": 0.2834, + "step": 25834 + }, + { + "epoch": 0.5171783900107599, + "grad_norm": 1.9574809074401855, + "learning_rate": 4.9646643467242624e-06, + "loss": 0.8219, + "step": 25835 + }, + { + "epoch": 0.5171984085278883, + "grad_norm": 1.0654683113098145, + "learning_rate": 4.964340171890578e-06, + "loss": 0.2843, + "step": 25836 + }, + { + "epoch": 0.5172184270450166, + "grad_norm": 1.0691585540771484, + "learning_rate": 4.9640159972067994e-06, + "loss": 0.2992, + "step": 25837 + }, + { + "epoch": 0.5172384455621449, + "grad_norm": 0.9952586889266968, + "learning_rate": 4.96369182267429e-06, + "loss": 0.3181, + "step": 25838 + }, + { + "epoch": 0.5172584640792733, + "grad_norm": 1.8537670373916626, + "learning_rate": 4.963367648294411e-06, + "loss": 0.801, + "step": 25839 + }, + { + "epoch": 0.5172784825964016, + "grad_norm": 1.1673774719238281, + "learning_rate": 4.963043474068527e-06, + "loss": 0.3235, + "step": 25840 + }, + { + "epoch": 0.51729850111353, + "grad_norm": 1.1603118181228638, + "learning_rate": 4.962719299998e-06, + "loss": 0.2908, + "step": 25841 + }, + { + "epoch": 0.5173185196306583, + "grad_norm": 0.9649497866630554, + "learning_rate": 4.9623951260841955e-06, + "loss": 0.2907, + "step": 25842 + }, + { + "epoch": 0.5173385381477867, + "grad_norm": 1.0031776428222656, + "learning_rate": 4.962070952328472e-06, + "loss": 0.3037, + "step": 25843 + }, + { + "epoch": 0.517358556664915, + "grad_norm": 1.2285237312316895, + "learning_rate": 4.961746778732192e-06, + "loss": 0.3223, + "step": 25844 + }, + { + "epoch": 0.5173785751820434, + "grad_norm": 0.9762293100357056, + "learning_rate": 4.961422605296723e-06, + "loss": 0.2766, + "step": 25845 + }, + { + "epoch": 0.5173985936991717, + "grad_norm": 1.235528588294983, + "learning_rate": 4.961098432023424e-06, + "loss": 0.3815, + "step": 25846 + }, + { + "epoch": 0.5174186122163, + "grad_norm": 1.2756638526916504, + "learning_rate": 4.96077425891366e-06, + "loss": 0.3337, + "step": 25847 + }, + { + "epoch": 0.5174386307334284, + "grad_norm": 1.0909186601638794, + "learning_rate": 4.96045008596879e-06, + "loss": 0.3693, + "step": 25848 + }, + { + "epoch": 0.5174586492505567, + "grad_norm": 1.9403992891311646, + "learning_rate": 4.960125913190182e-06, + "loss": 0.8212, + "step": 25849 + }, + { + "epoch": 0.5174786677676851, + "grad_norm": 1.2299712896347046, + "learning_rate": 4.9598017405791955e-06, + "loss": 0.3579, + "step": 25850 + }, + { + "epoch": 0.5174986862848134, + "grad_norm": 1.9603118896484375, + "learning_rate": 4.9594775681371936e-06, + "loss": 0.8395, + "step": 25851 + }, + { + "epoch": 0.5175187048019418, + "grad_norm": 1.4919078350067139, + "learning_rate": 4.959153395865539e-06, + "loss": 0.3167, + "step": 25852 + }, + { + "epoch": 0.5175387233190701, + "grad_norm": 1.1156725883483887, + "learning_rate": 4.958829223765593e-06, + "loss": 0.3314, + "step": 25853 + }, + { + "epoch": 0.5175587418361984, + "grad_norm": 1.1507594585418701, + "learning_rate": 4.9585050518387225e-06, + "loss": 0.3218, + "step": 25854 + }, + { + "epoch": 0.5175787603533268, + "grad_norm": 1.0202138423919678, + "learning_rate": 4.9581808800862884e-06, + "loss": 0.2673, + "step": 25855 + }, + { + "epoch": 0.5175987788704551, + "grad_norm": 1.197064995765686, + "learning_rate": 4.957856708509653e-06, + "loss": 0.3087, + "step": 25856 + }, + { + "epoch": 0.5176187973875835, + "grad_norm": 1.1071960926055908, + "learning_rate": 4.957532537110175e-06, + "loss": 0.3344, + "step": 25857 + }, + { + "epoch": 0.5176388159047118, + "grad_norm": 1.074938178062439, + "learning_rate": 4.957208365889224e-06, + "loss": 0.3168, + "step": 25858 + }, + { + "epoch": 0.5176588344218402, + "grad_norm": 1.069474458694458, + "learning_rate": 4.95688419484816e-06, + "loss": 0.283, + "step": 25859 + }, + { + "epoch": 0.5176788529389685, + "grad_norm": 1.1291722059249878, + "learning_rate": 4.956560023988346e-06, + "loss": 0.2834, + "step": 25860 + }, + { + "epoch": 0.5176988714560969, + "grad_norm": 1.8180381059646606, + "learning_rate": 4.956235853311142e-06, + "loss": 0.752, + "step": 25861 + }, + { + "epoch": 0.5177188899732252, + "grad_norm": 1.1585438251495361, + "learning_rate": 4.955911682817915e-06, + "loss": 0.3354, + "step": 25862 + }, + { + "epoch": 0.5177389084903535, + "grad_norm": 0.9469248056411743, + "learning_rate": 4.955587512510026e-06, + "loss": 0.2713, + "step": 25863 + }, + { + "epoch": 0.5177589270074819, + "grad_norm": 1.1433475017547607, + "learning_rate": 4.9552633423888364e-06, + "loss": 0.2968, + "step": 25864 + }, + { + "epoch": 0.5177789455246102, + "grad_norm": 1.113295078277588, + "learning_rate": 4.954939172455711e-06, + "loss": 0.3481, + "step": 25865 + }, + { + "epoch": 0.5177989640417386, + "grad_norm": 1.1990792751312256, + "learning_rate": 4.954615002712009e-06, + "loss": 0.3381, + "step": 25866 + }, + { + "epoch": 0.5178189825588669, + "grad_norm": 1.3240435123443604, + "learning_rate": 4.9542908331590984e-06, + "loss": 0.3522, + "step": 25867 + }, + { + "epoch": 0.5178390010759953, + "grad_norm": 1.2236508131027222, + "learning_rate": 4.953966663798341e-06, + "loss": 0.398, + "step": 25868 + }, + { + "epoch": 0.5178590195931236, + "grad_norm": 1.1212776899337769, + "learning_rate": 4.953642494631096e-06, + "loss": 0.3066, + "step": 25869 + }, + { + "epoch": 0.5178790381102519, + "grad_norm": 1.0713951587677002, + "learning_rate": 4.953318325658726e-06, + "loss": 0.3002, + "step": 25870 + }, + { + "epoch": 0.5178990566273803, + "grad_norm": 1.0840106010437012, + "learning_rate": 4.952994156882596e-06, + "loss": 0.277, + "step": 25871 + }, + { + "epoch": 0.5179190751445086, + "grad_norm": 1.9039218425750732, + "learning_rate": 4.9526699883040704e-06, + "loss": 0.7743, + "step": 25872 + }, + { + "epoch": 0.517939093661637, + "grad_norm": 1.0296916961669922, + "learning_rate": 4.9523458199245105e-06, + "loss": 0.2999, + "step": 25873 + }, + { + "epoch": 0.5179591121787653, + "grad_norm": 1.1939843893051147, + "learning_rate": 4.952021651745278e-06, + "loss": 0.3488, + "step": 25874 + }, + { + "epoch": 0.5179791306958937, + "grad_norm": 1.1781599521636963, + "learning_rate": 4.9516974837677335e-06, + "loss": 0.3303, + "step": 25875 + }, + { + "epoch": 0.517999149213022, + "grad_norm": 1.0732688903808594, + "learning_rate": 4.951373315993246e-06, + "loss": 0.3221, + "step": 25876 + }, + { + "epoch": 0.5180191677301504, + "grad_norm": 1.1189031600952148, + "learning_rate": 4.951049148423172e-06, + "loss": 0.3238, + "step": 25877 + }, + { + "epoch": 0.5180391862472787, + "grad_norm": 1.0264759063720703, + "learning_rate": 4.950724981058879e-06, + "loss": 0.2842, + "step": 25878 + }, + { + "epoch": 0.518059204764407, + "grad_norm": 1.1116124391555786, + "learning_rate": 4.9504008139017245e-06, + "loss": 0.3209, + "step": 25879 + }, + { + "epoch": 0.5180792232815354, + "grad_norm": 1.1993954181671143, + "learning_rate": 4.950076646953077e-06, + "loss": 0.2889, + "step": 25880 + }, + { + "epoch": 0.5180992417986637, + "grad_norm": 1.2993015050888062, + "learning_rate": 4.949752480214297e-06, + "loss": 0.3455, + "step": 25881 + }, + { + "epoch": 0.5181192603157921, + "grad_norm": 1.053853988647461, + "learning_rate": 4.949428313686746e-06, + "loss": 0.2866, + "step": 25882 + }, + { + "epoch": 0.5181392788329204, + "grad_norm": 1.0838335752487183, + "learning_rate": 4.949104147371786e-06, + "loss": 0.3503, + "step": 25883 + }, + { + "epoch": 0.5181592973500488, + "grad_norm": 1.0230072736740112, + "learning_rate": 4.948779981270781e-06, + "loss": 0.2682, + "step": 25884 + }, + { + "epoch": 0.5181793158671771, + "grad_norm": 1.11582612991333, + "learning_rate": 4.9484558153850955e-06, + "loss": 0.3386, + "step": 25885 + }, + { + "epoch": 0.5181993343843054, + "grad_norm": 1.0722980499267578, + "learning_rate": 4.948131649716091e-06, + "loss": 0.3399, + "step": 25886 + }, + { + "epoch": 0.5182193529014338, + "grad_norm": 1.0304919481277466, + "learning_rate": 4.9478074842651305e-06, + "loss": 0.2845, + "step": 25887 + }, + { + "epoch": 0.5182393714185621, + "grad_norm": 2.090573787689209, + "learning_rate": 4.947483319033573e-06, + "loss": 0.8136, + "step": 25888 + }, + { + "epoch": 0.5182593899356905, + "grad_norm": 1.203498363494873, + "learning_rate": 4.9471591540227866e-06, + "loss": 0.3137, + "step": 25889 + }, + { + "epoch": 0.5182794084528188, + "grad_norm": 1.0886300802230835, + "learning_rate": 4.946834989234131e-06, + "loss": 0.2909, + "step": 25890 + }, + { + "epoch": 0.5182994269699472, + "grad_norm": 1.0436311960220337, + "learning_rate": 4.946510824668971e-06, + "loss": 0.263, + "step": 25891 + }, + { + "epoch": 0.5183194454870755, + "grad_norm": 1.0157840251922607, + "learning_rate": 4.946186660328665e-06, + "loss": 0.2769, + "step": 25892 + }, + { + "epoch": 0.5183394640042039, + "grad_norm": 1.072640061378479, + "learning_rate": 4.945862496214581e-06, + "loss": 0.2999, + "step": 25893 + }, + { + "epoch": 0.5183594825213322, + "grad_norm": 1.239482045173645, + "learning_rate": 4.94553833232808e-06, + "loss": 0.2748, + "step": 25894 + }, + { + "epoch": 0.5183795010384605, + "grad_norm": 1.1241185665130615, + "learning_rate": 4.945214168670523e-06, + "loss": 0.3205, + "step": 25895 + }, + { + "epoch": 0.5183995195555889, + "grad_norm": 1.7702757120132446, + "learning_rate": 4.944890005243274e-06, + "loss": 0.7611, + "step": 25896 + }, + { + "epoch": 0.5184195380727172, + "grad_norm": 1.0982768535614014, + "learning_rate": 4.944565842047694e-06, + "loss": 0.3256, + "step": 25897 + }, + { + "epoch": 0.5184395565898456, + "grad_norm": 1.1078145503997803, + "learning_rate": 4.94424167908515e-06, + "loss": 0.3419, + "step": 25898 + }, + { + "epoch": 0.5184595751069739, + "grad_norm": 1.0131632089614868, + "learning_rate": 4.943917516357001e-06, + "loss": 0.286, + "step": 25899 + }, + { + "epoch": 0.5184795936241023, + "grad_norm": 1.7872041463851929, + "learning_rate": 4.943593353864612e-06, + "loss": 0.7977, + "step": 25900 + }, + { + "epoch": 0.5184996121412306, + "grad_norm": 1.0620089769363403, + "learning_rate": 4.9432691916093414e-06, + "loss": 0.3116, + "step": 25901 + }, + { + "epoch": 0.5185196306583589, + "grad_norm": 1.0292400121688843, + "learning_rate": 4.942945029592557e-06, + "loss": 0.3189, + "step": 25902 + }, + { + "epoch": 0.5185396491754873, + "grad_norm": 1.8661675453186035, + "learning_rate": 4.942620867815618e-06, + "loss": 0.7519, + "step": 25903 + }, + { + "epoch": 0.5185596676926156, + "grad_norm": 1.0987915992736816, + "learning_rate": 4.94229670627989e-06, + "loss": 0.2705, + "step": 25904 + }, + { + "epoch": 0.518579686209744, + "grad_norm": 1.036425232887268, + "learning_rate": 4.941972544986735e-06, + "loss": 0.2877, + "step": 25905 + }, + { + "epoch": 0.5185997047268723, + "grad_norm": 1.0509318113327026, + "learning_rate": 4.9416483839375114e-06, + "loss": 0.3045, + "step": 25906 + }, + { + "epoch": 0.5186197232440007, + "grad_norm": 1.0300837755203247, + "learning_rate": 4.941324223133588e-06, + "loss": 0.2888, + "step": 25907 + }, + { + "epoch": 0.518639741761129, + "grad_norm": 1.1067777872085571, + "learning_rate": 4.941000062576326e-06, + "loss": 0.3514, + "step": 25908 + }, + { + "epoch": 0.5186597602782574, + "grad_norm": 1.1262274980545044, + "learning_rate": 4.940675902267085e-06, + "loss": 0.3086, + "step": 25909 + }, + { + "epoch": 0.5186797787953857, + "grad_norm": 1.782859206199646, + "learning_rate": 4.940351742207228e-06, + "loss": 0.8338, + "step": 25910 + }, + { + "epoch": 0.518699797312514, + "grad_norm": 1.188603162765503, + "learning_rate": 4.940027582398123e-06, + "loss": 0.3323, + "step": 25911 + }, + { + "epoch": 0.5187198158296424, + "grad_norm": 1.101876139640808, + "learning_rate": 4.9397034228411285e-06, + "loss": 0.3005, + "step": 25912 + }, + { + "epoch": 0.5187398343467707, + "grad_norm": 1.191174864768982, + "learning_rate": 4.939379263537608e-06, + "loss": 0.3327, + "step": 25913 + }, + { + "epoch": 0.5187598528638991, + "grad_norm": 1.140101671218872, + "learning_rate": 4.939055104488922e-06, + "loss": 0.2707, + "step": 25914 + }, + { + "epoch": 0.5187798713810274, + "grad_norm": 1.2178475856781006, + "learning_rate": 4.938730945696437e-06, + "loss": 0.358, + "step": 25915 + }, + { + "epoch": 0.5187998898981558, + "grad_norm": 1.1172693967819214, + "learning_rate": 4.938406787161512e-06, + "loss": 0.3659, + "step": 25916 + }, + { + "epoch": 0.5188199084152841, + "grad_norm": 1.0737062692642212, + "learning_rate": 4.938082628885514e-06, + "loss": 0.3133, + "step": 25917 + }, + { + "epoch": 0.5188399269324124, + "grad_norm": 1.072405457496643, + "learning_rate": 4.937758470869803e-06, + "loss": 0.337, + "step": 25918 + }, + { + "epoch": 0.5188599454495408, + "grad_norm": 1.180680513381958, + "learning_rate": 4.93743431311574e-06, + "loss": 0.3157, + "step": 25919 + }, + { + "epoch": 0.5188799639666691, + "grad_norm": 1.0533815622329712, + "learning_rate": 4.937110155624691e-06, + "loss": 0.309, + "step": 25920 + }, + { + "epoch": 0.5188999824837975, + "grad_norm": 1.0792081356048584, + "learning_rate": 4.936785998398018e-06, + "loss": 0.2994, + "step": 25921 + }, + { + "epoch": 0.5189200010009258, + "grad_norm": 1.1466455459594727, + "learning_rate": 4.936461841437083e-06, + "loss": 0.2706, + "step": 25922 + }, + { + "epoch": 0.5189400195180542, + "grad_norm": 1.0786620378494263, + "learning_rate": 4.936137684743246e-06, + "loss": 0.3248, + "step": 25923 + }, + { + "epoch": 0.5189600380351825, + "grad_norm": 1.1384844779968262, + "learning_rate": 4.9358135283178756e-06, + "loss": 0.2748, + "step": 25924 + }, + { + "epoch": 0.5189800565523109, + "grad_norm": 1.1479487419128418, + "learning_rate": 4.93548937216233e-06, + "loss": 0.3059, + "step": 25925 + }, + { + "epoch": 0.5190000750694392, + "grad_norm": 1.047477126121521, + "learning_rate": 4.935165216277975e-06, + "loss": 0.3262, + "step": 25926 + }, + { + "epoch": 0.5190200935865675, + "grad_norm": 1.0246152877807617, + "learning_rate": 4.9348410606661694e-06, + "loss": 0.2865, + "step": 25927 + }, + { + "epoch": 0.5190401121036959, + "grad_norm": 1.1718143224716187, + "learning_rate": 4.934516905328278e-06, + "loss": 0.3061, + "step": 25928 + }, + { + "epoch": 0.5190601306208242, + "grad_norm": 1.1068909168243408, + "learning_rate": 4.934192750265664e-06, + "loss": 0.2994, + "step": 25929 + }, + { + "epoch": 0.5190801491379526, + "grad_norm": 1.1597788333892822, + "learning_rate": 4.933868595479689e-06, + "loss": 0.3282, + "step": 25930 + }, + { + "epoch": 0.5191001676550809, + "grad_norm": 1.032220721244812, + "learning_rate": 4.9335444409717184e-06, + "loss": 0.3318, + "step": 25931 + }, + { + "epoch": 0.5191201861722093, + "grad_norm": 0.9899221062660217, + "learning_rate": 4.9332202867431085e-06, + "loss": 0.2629, + "step": 25932 + }, + { + "epoch": 0.5191402046893376, + "grad_norm": 1.0478705167770386, + "learning_rate": 4.93289613279523e-06, + "loss": 0.2892, + "step": 25933 + }, + { + "epoch": 0.5191602232064659, + "grad_norm": 1.1408207416534424, + "learning_rate": 4.932571979129441e-06, + "loss": 0.3441, + "step": 25934 + }, + { + "epoch": 0.5191802417235943, + "grad_norm": 1.2565635442733765, + "learning_rate": 4.9322478257471035e-06, + "loss": 0.3291, + "step": 25935 + }, + { + "epoch": 0.5192002602407226, + "grad_norm": 1.0161584615707397, + "learning_rate": 4.9319236726495824e-06, + "loss": 0.2841, + "step": 25936 + }, + { + "epoch": 0.519220278757851, + "grad_norm": 1.794632077217102, + "learning_rate": 4.931599519838238e-06, + "loss": 0.7767, + "step": 25937 + }, + { + "epoch": 0.5192402972749793, + "grad_norm": 1.1727159023284912, + "learning_rate": 4.931275367314437e-06, + "loss": 0.3029, + "step": 25938 + }, + { + "epoch": 0.5192603157921077, + "grad_norm": 1.4211382865905762, + "learning_rate": 4.930951215079539e-06, + "loss": 0.2961, + "step": 25939 + }, + { + "epoch": 0.519280334309236, + "grad_norm": 1.095353126525879, + "learning_rate": 4.930627063134906e-06, + "loss": 0.3268, + "step": 25940 + }, + { + "epoch": 0.5193003528263644, + "grad_norm": 1.1293787956237793, + "learning_rate": 4.9303029114819026e-06, + "loss": 0.2923, + "step": 25941 + }, + { + "epoch": 0.5193203713434927, + "grad_norm": 1.2031689882278442, + "learning_rate": 4.92997876012189e-06, + "loss": 0.3119, + "step": 25942 + }, + { + "epoch": 0.519340389860621, + "grad_norm": 1.10686457157135, + "learning_rate": 4.929654609056233e-06, + "loss": 0.2684, + "step": 25943 + }, + { + "epoch": 0.5193604083777494, + "grad_norm": 1.100874662399292, + "learning_rate": 4.929330458286293e-06, + "loss": 0.3277, + "step": 25944 + }, + { + "epoch": 0.5193804268948777, + "grad_norm": 1.1354663372039795, + "learning_rate": 4.92900630781343e-06, + "loss": 0.3026, + "step": 25945 + }, + { + "epoch": 0.5194004454120061, + "grad_norm": 1.1726412773132324, + "learning_rate": 4.928682157639012e-06, + "loss": 0.3176, + "step": 25946 + }, + { + "epoch": 0.5194204639291344, + "grad_norm": 1.107081651687622, + "learning_rate": 4.928358007764398e-06, + "loss": 0.3136, + "step": 25947 + }, + { + "epoch": 0.5194404824462628, + "grad_norm": 1.138393759727478, + "learning_rate": 4.928033858190952e-06, + "loss": 0.3495, + "step": 25948 + }, + { + "epoch": 0.5194605009633911, + "grad_norm": 1.0558171272277832, + "learning_rate": 4.927709708920036e-06, + "loss": 0.3293, + "step": 25949 + }, + { + "epoch": 0.5194805194805194, + "grad_norm": 1.1165210008621216, + "learning_rate": 4.92738555995301e-06, + "loss": 0.281, + "step": 25950 + }, + { + "epoch": 0.5195005379976478, + "grad_norm": 1.7176828384399414, + "learning_rate": 4.927061411291243e-06, + "loss": 0.7671, + "step": 25951 + }, + { + "epoch": 0.5195205565147761, + "grad_norm": 2.1264283657073975, + "learning_rate": 4.926737262936094e-06, + "loss": 0.8205, + "step": 25952 + }, + { + "epoch": 0.5195405750319045, + "grad_norm": 1.1280726194381714, + "learning_rate": 4.926413114888924e-06, + "loss": 0.269, + "step": 25953 + }, + { + "epoch": 0.5195605935490328, + "grad_norm": 1.1764899492263794, + "learning_rate": 4.926088967151098e-06, + "loss": 0.3179, + "step": 25954 + }, + { + "epoch": 0.5195806120661612, + "grad_norm": 1.1651760339736938, + "learning_rate": 4.925764819723978e-06, + "loss": 0.3226, + "step": 25955 + }, + { + "epoch": 0.5196006305832895, + "grad_norm": 1.3378437757492065, + "learning_rate": 4.925440672608927e-06, + "loss": 0.319, + "step": 25956 + }, + { + "epoch": 0.5196206491004179, + "grad_norm": 1.0235916376113892, + "learning_rate": 4.925116525807308e-06, + "loss": 0.308, + "step": 25957 + }, + { + "epoch": 0.5196406676175462, + "grad_norm": 1.052111029624939, + "learning_rate": 4.924792379320484e-06, + "loss": 0.3459, + "step": 25958 + }, + { + "epoch": 0.5196606861346745, + "grad_norm": 1.4031463861465454, + "learning_rate": 4.9244682331498125e-06, + "loss": 0.3229, + "step": 25959 + }, + { + "epoch": 0.5196807046518029, + "grad_norm": 1.1519057750701904, + "learning_rate": 4.9241440872966635e-06, + "loss": 0.3558, + "step": 25960 + }, + { + "epoch": 0.5197007231689312, + "grad_norm": 1.0993531942367554, + "learning_rate": 4.923819941762395e-06, + "loss": 0.3518, + "step": 25961 + }, + { + "epoch": 0.5197207416860596, + "grad_norm": 1.1482789516448975, + "learning_rate": 4.923495796548373e-06, + "loss": 0.3214, + "step": 25962 + }, + { + "epoch": 0.5197407602031879, + "grad_norm": 1.0464115142822266, + "learning_rate": 4.923171651655955e-06, + "loss": 0.3142, + "step": 25963 + }, + { + "epoch": 0.5197607787203163, + "grad_norm": 1.076390266418457, + "learning_rate": 4.92284750708651e-06, + "loss": 0.2909, + "step": 25964 + }, + { + "epoch": 0.5197807972374446, + "grad_norm": 1.2193912267684937, + "learning_rate": 4.922523362841397e-06, + "loss": 0.3412, + "step": 25965 + }, + { + "epoch": 0.5198008157545729, + "grad_norm": 1.3915362358093262, + "learning_rate": 4.922199218921978e-06, + "loss": 0.3559, + "step": 25966 + }, + { + "epoch": 0.5198208342717013, + "grad_norm": 1.2298003435134888, + "learning_rate": 4.9218750753296175e-06, + "loss": 0.3217, + "step": 25967 + }, + { + "epoch": 0.5198408527888296, + "grad_norm": 1.0655815601348877, + "learning_rate": 4.9215509320656755e-06, + "loss": 0.3386, + "step": 25968 + }, + { + "epoch": 0.519860871305958, + "grad_norm": 1.1523586511611938, + "learning_rate": 4.921226789131519e-06, + "loss": 0.352, + "step": 25969 + }, + { + "epoch": 0.5198808898230863, + "grad_norm": 1.1086887121200562, + "learning_rate": 4.920902646528508e-06, + "loss": 0.3226, + "step": 25970 + }, + { + "epoch": 0.5199009083402147, + "grad_norm": 1.0578473806381226, + "learning_rate": 4.920578504258005e-06, + "loss": 0.3674, + "step": 25971 + }, + { + "epoch": 0.519920926857343, + "grad_norm": 1.1899069547653198, + "learning_rate": 4.920254362321371e-06, + "loss": 0.3104, + "step": 25972 + }, + { + "epoch": 0.5199409453744713, + "grad_norm": 1.842660665512085, + "learning_rate": 4.919930220719972e-06, + "loss": 0.7516, + "step": 25973 + }, + { + "epoch": 0.5199609638915997, + "grad_norm": 1.1437122821807861, + "learning_rate": 4.919606079455168e-06, + "loss": 0.3313, + "step": 25974 + }, + { + "epoch": 0.519980982408728, + "grad_norm": 1.0702706575393677, + "learning_rate": 4.919281938528325e-06, + "loss": 0.2946, + "step": 25975 + }, + { + "epoch": 0.5200010009258564, + "grad_norm": 1.0732240676879883, + "learning_rate": 4.9189577979408e-06, + "loss": 0.3116, + "step": 25976 + }, + { + "epoch": 0.5200210194429847, + "grad_norm": 1.182357668876648, + "learning_rate": 4.9186336576939625e-06, + "loss": 0.3467, + "step": 25977 + }, + { + "epoch": 0.5200410379601131, + "grad_norm": 1.139671802520752, + "learning_rate": 4.91830951778917e-06, + "loss": 0.2785, + "step": 25978 + }, + { + "epoch": 0.5200610564772414, + "grad_norm": 1.1144052743911743, + "learning_rate": 4.917985378227787e-06, + "loss": 0.2891, + "step": 25979 + }, + { + "epoch": 0.5200810749943698, + "grad_norm": 1.1207422018051147, + "learning_rate": 4.917661239011176e-06, + "loss": 0.2993, + "step": 25980 + }, + { + "epoch": 0.5201010935114981, + "grad_norm": 1.2444344758987427, + "learning_rate": 4.917337100140698e-06, + "loss": 0.2405, + "step": 25981 + }, + { + "epoch": 0.5201211120286264, + "grad_norm": 1.0220695734024048, + "learning_rate": 4.917012961617719e-06, + "loss": 0.3306, + "step": 25982 + }, + { + "epoch": 0.5201411305457548, + "grad_norm": 1.234532356262207, + "learning_rate": 4.916688823443599e-06, + "loss": 0.2864, + "step": 25983 + }, + { + "epoch": 0.5201611490628831, + "grad_norm": 1.0551923513412476, + "learning_rate": 4.916364685619702e-06, + "loss": 0.2785, + "step": 25984 + }, + { + "epoch": 0.5201811675800115, + "grad_norm": 1.0329523086547852, + "learning_rate": 4.916040548147387e-06, + "loss": 0.3302, + "step": 25985 + }, + { + "epoch": 0.5202011860971398, + "grad_norm": 1.0488113164901733, + "learning_rate": 4.915716411028022e-06, + "loss": 0.3224, + "step": 25986 + }, + { + "epoch": 0.5202212046142682, + "grad_norm": 1.0614588260650635, + "learning_rate": 4.915392274262967e-06, + "loss": 0.2759, + "step": 25987 + }, + { + "epoch": 0.5202412231313965, + "grad_norm": 1.297595500946045, + "learning_rate": 4.915068137853584e-06, + "loss": 0.3486, + "step": 25988 + }, + { + "epoch": 0.5202612416485248, + "grad_norm": 1.9924062490463257, + "learning_rate": 4.914744001801237e-06, + "loss": 0.7406, + "step": 25989 + }, + { + "epoch": 0.5202812601656532, + "grad_norm": 1.123783826828003, + "learning_rate": 4.914419866107286e-06, + "loss": 0.2981, + "step": 25990 + }, + { + "epoch": 0.5203012786827815, + "grad_norm": 1.1258463859558105, + "learning_rate": 4.914095730773097e-06, + "loss": 0.3222, + "step": 25991 + }, + { + "epoch": 0.5203212971999099, + "grad_norm": 1.118094801902771, + "learning_rate": 4.91377159580003e-06, + "loss": 0.3192, + "step": 25992 + }, + { + "epoch": 0.5203413157170382, + "grad_norm": 1.1009020805358887, + "learning_rate": 4.91344746118945e-06, + "loss": 0.2797, + "step": 25993 + }, + { + "epoch": 0.5203613342341666, + "grad_norm": 1.1215897798538208, + "learning_rate": 4.913123326942716e-06, + "loss": 0.3201, + "step": 25994 + }, + { + "epoch": 0.5203813527512949, + "grad_norm": 1.0219688415527344, + "learning_rate": 4.912799193061195e-06, + "loss": 0.3072, + "step": 25995 + }, + { + "epoch": 0.5204013712684233, + "grad_norm": 1.1000399589538574, + "learning_rate": 4.912475059546248e-06, + "loss": 0.3516, + "step": 25996 + }, + { + "epoch": 0.5204213897855516, + "grad_norm": 1.128028154373169, + "learning_rate": 4.9121509263992365e-06, + "loss": 0.272, + "step": 25997 + }, + { + "epoch": 0.5204414083026799, + "grad_norm": 1.0796935558319092, + "learning_rate": 4.911826793621523e-06, + "loss": 0.3252, + "step": 25998 + }, + { + "epoch": 0.5204614268198083, + "grad_norm": 1.2283039093017578, + "learning_rate": 4.91150266121447e-06, + "loss": 0.3109, + "step": 25999 + }, + { + "epoch": 0.5204814453369366, + "grad_norm": 1.1582069396972656, + "learning_rate": 4.911178529179441e-06, + "loss": 0.3369, + "step": 26000 + }, + { + "epoch": 0.520501463854065, + "grad_norm": 1.2375478744506836, + "learning_rate": 4.9108543975178e-06, + "loss": 0.3032, + "step": 26001 + }, + { + "epoch": 0.5205214823711933, + "grad_norm": 1.0790199041366577, + "learning_rate": 4.910530266230907e-06, + "loss": 0.3049, + "step": 26002 + }, + { + "epoch": 0.5205415008883217, + "grad_norm": 1.0424928665161133, + "learning_rate": 4.910206135320123e-06, + "loss": 0.3065, + "step": 26003 + }, + { + "epoch": 0.52056151940545, + "grad_norm": 1.0790314674377441, + "learning_rate": 4.909882004786817e-06, + "loss": 0.2926, + "step": 26004 + }, + { + "epoch": 0.5205815379225783, + "grad_norm": 0.953816294670105, + "learning_rate": 4.9095578746323455e-06, + "loss": 0.304, + "step": 26005 + }, + { + "epoch": 0.5206015564397067, + "grad_norm": 2.24318528175354, + "learning_rate": 4.909233744858075e-06, + "loss": 0.3477, + "step": 26006 + }, + { + "epoch": 0.520621574956835, + "grad_norm": 1.1852598190307617, + "learning_rate": 4.908909615465363e-06, + "loss": 0.2825, + "step": 26007 + }, + { + "epoch": 0.5206415934739634, + "grad_norm": 1.1065698862075806, + "learning_rate": 4.9085854864555775e-06, + "loss": 0.3151, + "step": 26008 + }, + { + "epoch": 0.5206616119910917, + "grad_norm": 1.1089786291122437, + "learning_rate": 4.90826135783008e-06, + "loss": 0.285, + "step": 26009 + }, + { + "epoch": 0.5206816305082201, + "grad_norm": 1.2380008697509766, + "learning_rate": 4.9079372295902315e-06, + "loss": 0.3439, + "step": 26010 + }, + { + "epoch": 0.5207016490253484, + "grad_norm": 1.0444408655166626, + "learning_rate": 4.907613101737394e-06, + "loss": 0.323, + "step": 26011 + }, + { + "epoch": 0.5207216675424768, + "grad_norm": 1.0818517208099365, + "learning_rate": 4.907288974272931e-06, + "loss": 0.2949, + "step": 26012 + }, + { + "epoch": 0.5207416860596051, + "grad_norm": 1.0160526037216187, + "learning_rate": 4.9069648471982056e-06, + "loss": 0.2789, + "step": 26013 + }, + { + "epoch": 0.5207617045767334, + "grad_norm": 1.8555541038513184, + "learning_rate": 4.90664072051458e-06, + "loss": 0.813, + "step": 26014 + }, + { + "epoch": 0.5207817230938618, + "grad_norm": 1.8864142894744873, + "learning_rate": 4.906316594223418e-06, + "loss": 0.8211, + "step": 26015 + }, + { + "epoch": 0.5208017416109901, + "grad_norm": 1.0136685371398926, + "learning_rate": 4.905992468326078e-06, + "loss": 0.338, + "step": 26016 + }, + { + "epoch": 0.5208217601281185, + "grad_norm": 1.0670541524887085, + "learning_rate": 4.9056683428239285e-06, + "loss": 0.2642, + "step": 26017 + }, + { + "epoch": 0.5208417786452468, + "grad_norm": 1.1187680959701538, + "learning_rate": 4.905344217718327e-06, + "loss": 0.3074, + "step": 26018 + }, + { + "epoch": 0.5208617971623752, + "grad_norm": 1.156334638595581, + "learning_rate": 4.905020093010639e-06, + "loss": 0.2844, + "step": 26019 + }, + { + "epoch": 0.5208818156795035, + "grad_norm": 1.0672035217285156, + "learning_rate": 4.904695968702226e-06, + "loss": 0.345, + "step": 26020 + }, + { + "epoch": 0.5209018341966318, + "grad_norm": 1.0698862075805664, + "learning_rate": 4.90437184479445e-06, + "loss": 0.294, + "step": 26021 + }, + { + "epoch": 0.5209218527137602, + "grad_norm": 1.0900675058364868, + "learning_rate": 4.904047721288675e-06, + "loss": 0.2882, + "step": 26022 + }, + { + "epoch": 0.5209418712308885, + "grad_norm": 1.1506999731063843, + "learning_rate": 4.903723598186263e-06, + "loss": 0.3271, + "step": 26023 + }, + { + "epoch": 0.5209618897480169, + "grad_norm": 1.0340944528579712, + "learning_rate": 4.903399475488575e-06, + "loss": 0.3248, + "step": 26024 + }, + { + "epoch": 0.5209819082651452, + "grad_norm": 1.1395446062088013, + "learning_rate": 4.903075353196975e-06, + "loss": 0.3594, + "step": 26025 + }, + { + "epoch": 0.5210019267822736, + "grad_norm": 1.1102900505065918, + "learning_rate": 4.902751231312826e-06, + "loss": 0.3347, + "step": 26026 + }, + { + "epoch": 0.5210219452994019, + "grad_norm": 1.0600680112838745, + "learning_rate": 4.90242710983749e-06, + "loss": 0.3058, + "step": 26027 + }, + { + "epoch": 0.5210419638165303, + "grad_norm": 1.2251776456832886, + "learning_rate": 4.90210298877233e-06, + "loss": 0.3836, + "step": 26028 + }, + { + "epoch": 0.5210619823336586, + "grad_norm": 1.1143989562988281, + "learning_rate": 4.901778868118708e-06, + "loss": 0.3158, + "step": 26029 + }, + { + "epoch": 0.5210820008507869, + "grad_norm": 1.0979355573654175, + "learning_rate": 4.901454747877983e-06, + "loss": 0.3181, + "step": 26030 + }, + { + "epoch": 0.5211020193679153, + "grad_norm": 1.1238508224487305, + "learning_rate": 4.9011306280515234e-06, + "loss": 0.3275, + "step": 26031 + }, + { + "epoch": 0.5211220378850436, + "grad_norm": 1.1029096841812134, + "learning_rate": 4.900806508640691e-06, + "loss": 0.3668, + "step": 26032 + }, + { + "epoch": 0.521142056402172, + "grad_norm": 1.221255898475647, + "learning_rate": 4.9004823896468455e-06, + "loss": 0.2951, + "step": 26033 + }, + { + "epoch": 0.5211620749193003, + "grad_norm": 1.1238402128219604, + "learning_rate": 4.900158271071349e-06, + "loss": 0.3596, + "step": 26034 + }, + { + "epoch": 0.5211820934364287, + "grad_norm": 1.1158874034881592, + "learning_rate": 4.8998341529155675e-06, + "loss": 0.2952, + "step": 26035 + }, + { + "epoch": 0.521202111953557, + "grad_norm": 1.883334755897522, + "learning_rate": 4.899510035180861e-06, + "loss": 0.7639, + "step": 26036 + }, + { + "epoch": 0.5212221304706853, + "grad_norm": 1.1486706733703613, + "learning_rate": 4.899185917868593e-06, + "loss": 0.2871, + "step": 26037 + }, + { + "epoch": 0.5212421489878137, + "grad_norm": 1.1735049486160278, + "learning_rate": 4.898861800980126e-06, + "loss": 0.2937, + "step": 26038 + }, + { + "epoch": 0.521262167504942, + "grad_norm": 1.0515464544296265, + "learning_rate": 4.898537684516821e-06, + "loss": 0.2999, + "step": 26039 + }, + { + "epoch": 0.5212821860220704, + "grad_norm": 1.1295559406280518, + "learning_rate": 4.898213568480043e-06, + "loss": 0.3097, + "step": 26040 + }, + { + "epoch": 0.5213022045391987, + "grad_norm": 1.0632960796356201, + "learning_rate": 4.8978894528711535e-06, + "loss": 0.2821, + "step": 26041 + }, + { + "epoch": 0.5213222230563271, + "grad_norm": 1.1727510690689087, + "learning_rate": 4.897565337691515e-06, + "loss": 0.3017, + "step": 26042 + }, + { + "epoch": 0.5213422415734554, + "grad_norm": 1.1022145748138428, + "learning_rate": 4.897241222942487e-06, + "loss": 0.2964, + "step": 26043 + }, + { + "epoch": 0.5213622600905838, + "grad_norm": 0.952222466468811, + "learning_rate": 4.896917108625437e-06, + "loss": 0.2874, + "step": 26044 + }, + { + "epoch": 0.5213822786077121, + "grad_norm": 1.1154488325119019, + "learning_rate": 4.896592994741725e-06, + "loss": 0.3058, + "step": 26045 + }, + { + "epoch": 0.5214022971248404, + "grad_norm": 1.1456230878829956, + "learning_rate": 4.896268881292714e-06, + "loss": 0.2704, + "step": 26046 + }, + { + "epoch": 0.5214223156419688, + "grad_norm": 2.0169577598571777, + "learning_rate": 4.8959447682797654e-06, + "loss": 0.7954, + "step": 26047 + }, + { + "epoch": 0.5214423341590971, + "grad_norm": 1.1538814306259155, + "learning_rate": 4.895620655704244e-06, + "loss": 0.2942, + "step": 26048 + }, + { + "epoch": 0.5214623526762255, + "grad_norm": 1.2877250909805298, + "learning_rate": 4.89529654356751e-06, + "loss": 0.3366, + "step": 26049 + }, + { + "epoch": 0.5214823711933538, + "grad_norm": 0.9964112639427185, + "learning_rate": 4.894972431870927e-06, + "loss": 0.28, + "step": 26050 + }, + { + "epoch": 0.5215023897104822, + "grad_norm": 1.2212402820587158, + "learning_rate": 4.894648320615857e-06, + "loss": 0.3605, + "step": 26051 + }, + { + "epoch": 0.5215224082276105, + "grad_norm": 1.3245878219604492, + "learning_rate": 4.894324209803662e-06, + "loss": 0.3588, + "step": 26052 + }, + { + "epoch": 0.5215424267447388, + "grad_norm": 1.1679625511169434, + "learning_rate": 4.894000099435707e-06, + "loss": 0.3223, + "step": 26053 + }, + { + "epoch": 0.5215624452618672, + "grad_norm": 1.111730933189392, + "learning_rate": 4.893675989513352e-06, + "loss": 0.2713, + "step": 26054 + }, + { + "epoch": 0.5215824637789955, + "grad_norm": 1.9588582515716553, + "learning_rate": 4.893351880037962e-06, + "loss": 0.8501, + "step": 26055 + }, + { + "epoch": 0.5216024822961239, + "grad_norm": 1.166698932647705, + "learning_rate": 4.893027771010894e-06, + "loss": 0.2924, + "step": 26056 + }, + { + "epoch": 0.5216225008132522, + "grad_norm": 1.0136522054672241, + "learning_rate": 4.892703662433516e-06, + "loss": 0.3394, + "step": 26057 + }, + { + "epoch": 0.5216425193303806, + "grad_norm": 1.2781531810760498, + "learning_rate": 4.892379554307189e-06, + "loss": 0.3019, + "step": 26058 + }, + { + "epoch": 0.5216625378475089, + "grad_norm": 1.0870871543884277, + "learning_rate": 4.892055446633276e-06, + "loss": 0.2784, + "step": 26059 + }, + { + "epoch": 0.5216825563646373, + "grad_norm": 1.2586051225662231, + "learning_rate": 4.891731339413139e-06, + "loss": 0.2952, + "step": 26060 + }, + { + "epoch": 0.5217025748817656, + "grad_norm": 1.0782444477081299, + "learning_rate": 4.891407232648138e-06, + "loss": 0.317, + "step": 26061 + }, + { + "epoch": 0.5217225933988939, + "grad_norm": 1.0114729404449463, + "learning_rate": 4.891083126339639e-06, + "loss": 0.2684, + "step": 26062 + }, + { + "epoch": 0.5217426119160223, + "grad_norm": 1.8630226850509644, + "learning_rate": 4.890759020489002e-06, + "loss": 0.8488, + "step": 26063 + }, + { + "epoch": 0.5217626304331506, + "grad_norm": 1.284671425819397, + "learning_rate": 4.890434915097592e-06, + "loss": 0.294, + "step": 26064 + }, + { + "epoch": 0.521782648950279, + "grad_norm": 1.310530662536621, + "learning_rate": 4.890110810166768e-06, + "loss": 0.3052, + "step": 26065 + }, + { + "epoch": 0.5218026674674073, + "grad_norm": 1.1953386068344116, + "learning_rate": 4.889786705697896e-06, + "loss": 0.3414, + "step": 26066 + }, + { + "epoch": 0.5218226859845357, + "grad_norm": 0.947655975818634, + "learning_rate": 4.889462601692338e-06, + "loss": 0.289, + "step": 26067 + }, + { + "epoch": 0.521842704501664, + "grad_norm": 1.2557528018951416, + "learning_rate": 4.8891384981514554e-06, + "loss": 0.3289, + "step": 26068 + }, + { + "epoch": 0.5218627230187923, + "grad_norm": 1.149949550628662, + "learning_rate": 4.888814395076608e-06, + "loss": 0.2988, + "step": 26069 + }, + { + "epoch": 0.5218827415359207, + "grad_norm": 1.1537163257598877, + "learning_rate": 4.888490292469161e-06, + "loss": 0.3704, + "step": 26070 + }, + { + "epoch": 0.521902760053049, + "grad_norm": 0.9835250973701477, + "learning_rate": 4.88816619033048e-06, + "loss": 0.2761, + "step": 26071 + }, + { + "epoch": 0.5219227785701774, + "grad_norm": 1.0203803777694702, + "learning_rate": 4.887842088661923e-06, + "loss": 0.2714, + "step": 26072 + }, + { + "epoch": 0.5219427970873057, + "grad_norm": 1.1823621988296509, + "learning_rate": 4.887517987464854e-06, + "loss": 0.2832, + "step": 26073 + }, + { + "epoch": 0.5219628156044341, + "grad_norm": 1.1240394115447998, + "learning_rate": 4.8871938867406325e-06, + "loss": 0.3336, + "step": 26074 + }, + { + "epoch": 0.5219828341215624, + "grad_norm": 1.1361403465270996, + "learning_rate": 4.886869786490627e-06, + "loss": 0.2846, + "step": 26075 + }, + { + "epoch": 0.5220028526386908, + "grad_norm": 2.011211633682251, + "learning_rate": 4.886545686716194e-06, + "loss": 0.8144, + "step": 26076 + }, + { + "epoch": 0.5220228711558191, + "grad_norm": 1.1582801342010498, + "learning_rate": 4.886221587418701e-06, + "loss": 0.3485, + "step": 26077 + }, + { + "epoch": 0.5220428896729474, + "grad_norm": 1.1219737529754639, + "learning_rate": 4.885897488599505e-06, + "loss": 0.3019, + "step": 26078 + }, + { + "epoch": 0.5220629081900758, + "grad_norm": 1.0549044609069824, + "learning_rate": 4.885573390259974e-06, + "loss": 0.3251, + "step": 26079 + }, + { + "epoch": 0.5220829267072041, + "grad_norm": 1.1256941556930542, + "learning_rate": 4.885249292401467e-06, + "loss": 0.3134, + "step": 26080 + }, + { + "epoch": 0.5221029452243325, + "grad_norm": 1.0973063707351685, + "learning_rate": 4.884925195025348e-06, + "loss": 0.3123, + "step": 26081 + }, + { + "epoch": 0.5221229637414608, + "grad_norm": 1.2702444791793823, + "learning_rate": 4.8846010981329776e-06, + "loss": 0.3606, + "step": 26082 + }, + { + "epoch": 0.5221429822585892, + "grad_norm": 1.1486519575119019, + "learning_rate": 4.884277001725718e-06, + "loss": 0.3423, + "step": 26083 + }, + { + "epoch": 0.5221630007757175, + "grad_norm": 0.985170841217041, + "learning_rate": 4.883952905804935e-06, + "loss": 0.3027, + "step": 26084 + }, + { + "epoch": 0.5221830192928458, + "grad_norm": 1.806320071220398, + "learning_rate": 4.88362881037199e-06, + "loss": 0.8023, + "step": 26085 + }, + { + "epoch": 0.5222030378099742, + "grad_norm": 1.2305461168289185, + "learning_rate": 4.8833047154282435e-06, + "loss": 0.3289, + "step": 26086 + }, + { + "epoch": 0.5222230563271025, + "grad_norm": 1.0733036994934082, + "learning_rate": 4.882980620975057e-06, + "loss": 0.3334, + "step": 26087 + }, + { + "epoch": 0.5222430748442309, + "grad_norm": 1.0551098585128784, + "learning_rate": 4.882656527013797e-06, + "loss": 0.3079, + "step": 26088 + }, + { + "epoch": 0.5222630933613592, + "grad_norm": 1.1831088066101074, + "learning_rate": 4.882332433545822e-06, + "loss": 0.3358, + "step": 26089 + }, + { + "epoch": 0.5222831118784876, + "grad_norm": 1.1992267370224, + "learning_rate": 4.882008340572498e-06, + "loss": 0.3216, + "step": 26090 + }, + { + "epoch": 0.5223031303956159, + "grad_norm": 1.0900626182556152, + "learning_rate": 4.8816842480951834e-06, + "loss": 0.2631, + "step": 26091 + }, + { + "epoch": 0.5223231489127443, + "grad_norm": 1.1098625659942627, + "learning_rate": 4.881360156115244e-06, + "loss": 0.3073, + "step": 26092 + }, + { + "epoch": 0.5223431674298726, + "grad_norm": 1.1501469612121582, + "learning_rate": 4.881036064634041e-06, + "loss": 0.3375, + "step": 26093 + }, + { + "epoch": 0.5223631859470009, + "grad_norm": 1.383899211883545, + "learning_rate": 4.880711973652938e-06, + "loss": 0.3395, + "step": 26094 + }, + { + "epoch": 0.5223832044641293, + "grad_norm": 1.9533112049102783, + "learning_rate": 4.8803878831732946e-06, + "loss": 0.8548, + "step": 26095 + }, + { + "epoch": 0.5224032229812576, + "grad_norm": 1.2583240270614624, + "learning_rate": 4.880063793196473e-06, + "loss": 0.3215, + "step": 26096 + }, + { + "epoch": 0.522423241498386, + "grad_norm": 0.9810184836387634, + "learning_rate": 4.879739703723841e-06, + "loss": 0.3078, + "step": 26097 + }, + { + "epoch": 0.5224432600155143, + "grad_norm": 1.0445226430892944, + "learning_rate": 4.8794156147567565e-06, + "loss": 0.2604, + "step": 26098 + }, + { + "epoch": 0.5224632785326427, + "grad_norm": 1.1810121536254883, + "learning_rate": 4.879091526296583e-06, + "loss": 0.3417, + "step": 26099 + }, + { + "epoch": 0.522483297049771, + "grad_norm": 1.0694608688354492, + "learning_rate": 4.87876743834468e-06, + "loss": 0.3078, + "step": 26100 + }, + { + "epoch": 0.5225033155668993, + "grad_norm": 1.0996710062026978, + "learning_rate": 4.878443350902415e-06, + "loss": 0.3129, + "step": 26101 + }, + { + "epoch": 0.5225233340840277, + "grad_norm": 1.000361680984497, + "learning_rate": 4.878119263971147e-06, + "loss": 0.2577, + "step": 26102 + }, + { + "epoch": 0.522543352601156, + "grad_norm": 1.0438661575317383, + "learning_rate": 4.877795177552241e-06, + "loss": 0.3003, + "step": 26103 + }, + { + "epoch": 0.5225633711182844, + "grad_norm": 1.8240199089050293, + "learning_rate": 4.8774710916470575e-06, + "loss": 0.8309, + "step": 26104 + }, + { + "epoch": 0.5225833896354127, + "grad_norm": 1.210703730583191, + "learning_rate": 4.877147006256957e-06, + "loss": 0.3294, + "step": 26105 + }, + { + "epoch": 0.5226034081525411, + "grad_norm": 1.1128616333007812, + "learning_rate": 4.876822921383305e-06, + "loss": 0.3523, + "step": 26106 + }, + { + "epoch": 0.5226234266696694, + "grad_norm": 1.2142083644866943, + "learning_rate": 4.876498837027465e-06, + "loss": 0.3362, + "step": 26107 + }, + { + "epoch": 0.5226434451867978, + "grad_norm": 1.226188063621521, + "learning_rate": 4.876174753190795e-06, + "loss": 0.3269, + "step": 26108 + }, + { + "epoch": 0.5226634637039261, + "grad_norm": 1.1472283601760864, + "learning_rate": 4.875850669874659e-06, + "loss": 0.325, + "step": 26109 + }, + { + "epoch": 0.5226834822210544, + "grad_norm": 1.3297269344329834, + "learning_rate": 4.875526587080422e-06, + "loss": 0.3925, + "step": 26110 + }, + { + "epoch": 0.5227035007381828, + "grad_norm": 1.9471290111541748, + "learning_rate": 4.875202504809444e-06, + "loss": 0.7725, + "step": 26111 + }, + { + "epoch": 0.5227235192553111, + "grad_norm": 1.9561545848846436, + "learning_rate": 4.874878423063089e-06, + "loss": 0.781, + "step": 26112 + }, + { + "epoch": 0.5227435377724395, + "grad_norm": 1.4407167434692383, + "learning_rate": 4.874554341842716e-06, + "loss": 0.3182, + "step": 26113 + }, + { + "epoch": 0.5227635562895678, + "grad_norm": 1.1815954446792603, + "learning_rate": 4.87423026114969e-06, + "loss": 0.3244, + "step": 26114 + }, + { + "epoch": 0.5227835748066962, + "grad_norm": 1.0800923109054565, + "learning_rate": 4.873906180985373e-06, + "loss": 0.324, + "step": 26115 + }, + { + "epoch": 0.5228035933238245, + "grad_norm": 1.0539250373840332, + "learning_rate": 4.873582101351128e-06, + "loss": 0.2926, + "step": 26116 + }, + { + "epoch": 0.5228236118409528, + "grad_norm": 1.157322645187378, + "learning_rate": 4.873258022248317e-06, + "loss": 0.3154, + "step": 26117 + }, + { + "epoch": 0.5228436303580812, + "grad_norm": 1.051986813545227, + "learning_rate": 4.872933943678299e-06, + "loss": 0.3208, + "step": 26118 + }, + { + "epoch": 0.5228636488752095, + "grad_norm": 1.1096937656402588, + "learning_rate": 4.872609865642443e-06, + "loss": 0.333, + "step": 26119 + }, + { + "epoch": 0.5228836673923379, + "grad_norm": 1.9571188688278198, + "learning_rate": 4.872285788142107e-06, + "loss": 0.8452, + "step": 26120 + }, + { + "epoch": 0.5229036859094662, + "grad_norm": 2.024569034576416, + "learning_rate": 4.871961711178653e-06, + "loss": 0.7216, + "step": 26121 + }, + { + "epoch": 0.5229237044265946, + "grad_norm": 1.0833230018615723, + "learning_rate": 4.871637634753443e-06, + "loss": 0.3552, + "step": 26122 + }, + { + "epoch": 0.5229437229437229, + "grad_norm": 1.1590073108673096, + "learning_rate": 4.871313558867843e-06, + "loss": 0.2791, + "step": 26123 + }, + { + "epoch": 0.5229637414608513, + "grad_norm": 1.221861481666565, + "learning_rate": 4.870989483523214e-06, + "loss": 0.3476, + "step": 26124 + }, + { + "epoch": 0.5229837599779796, + "grad_norm": 1.071815848350525, + "learning_rate": 4.870665408720916e-06, + "loss": 0.3326, + "step": 26125 + }, + { + "epoch": 0.5230037784951079, + "grad_norm": 1.2653954029083252, + "learning_rate": 4.870341334462312e-06, + "loss": 0.3194, + "step": 26126 + }, + { + "epoch": 0.5230237970122363, + "grad_norm": 1.02768075466156, + "learning_rate": 4.8700172607487664e-06, + "loss": 0.2741, + "step": 26127 + }, + { + "epoch": 0.5230438155293646, + "grad_norm": 1.073218822479248, + "learning_rate": 4.86969318758164e-06, + "loss": 0.2851, + "step": 26128 + }, + { + "epoch": 0.523063834046493, + "grad_norm": 1.125164270401001, + "learning_rate": 4.869369114962295e-06, + "loss": 0.3202, + "step": 26129 + }, + { + "epoch": 0.5230838525636213, + "grad_norm": 1.915220856666565, + "learning_rate": 4.869045042892095e-06, + "loss": 0.7431, + "step": 26130 + }, + { + "epoch": 0.5231038710807497, + "grad_norm": 1.0502108335494995, + "learning_rate": 4.868720971372399e-06, + "loss": 0.2998, + "step": 26131 + }, + { + "epoch": 0.523123889597878, + "grad_norm": 0.9988083839416504, + "learning_rate": 4.868396900404575e-06, + "loss": 0.2955, + "step": 26132 + }, + { + "epoch": 0.5231439081150063, + "grad_norm": 1.2452048063278198, + "learning_rate": 4.868072829989981e-06, + "loss": 0.3525, + "step": 26133 + }, + { + "epoch": 0.5231639266321347, + "grad_norm": 1.0440689325332642, + "learning_rate": 4.86774876012998e-06, + "loss": 0.2685, + "step": 26134 + }, + { + "epoch": 0.523183945149263, + "grad_norm": 1.1910476684570312, + "learning_rate": 4.867424690825936e-06, + "loss": 0.2948, + "step": 26135 + }, + { + "epoch": 0.5232039636663914, + "grad_norm": 1.0284321308135986, + "learning_rate": 4.867100622079206e-06, + "loss": 0.2927, + "step": 26136 + }, + { + "epoch": 0.5232239821835197, + "grad_norm": 1.1265010833740234, + "learning_rate": 4.866776553891161e-06, + "loss": 0.328, + "step": 26137 + }, + { + "epoch": 0.5232440007006481, + "grad_norm": 1.255091667175293, + "learning_rate": 4.866452486263158e-06, + "loss": 0.2846, + "step": 26138 + }, + { + "epoch": 0.5232640192177764, + "grad_norm": 0.9740954637527466, + "learning_rate": 4.866128419196559e-06, + "loss": 0.2782, + "step": 26139 + }, + { + "epoch": 0.5232840377349048, + "grad_norm": 1.088576078414917, + "learning_rate": 4.865804352692726e-06, + "loss": 0.2997, + "step": 26140 + }, + { + "epoch": 0.5233040562520331, + "grad_norm": 1.068252682685852, + "learning_rate": 4.865480286753023e-06, + "loss": 0.2973, + "step": 26141 + }, + { + "epoch": 0.5233240747691614, + "grad_norm": 1.2213796377182007, + "learning_rate": 4.865156221378814e-06, + "loss": 0.3064, + "step": 26142 + }, + { + "epoch": 0.5233440932862898, + "grad_norm": 1.0483242273330688, + "learning_rate": 4.864832156571459e-06, + "loss": 0.2604, + "step": 26143 + }, + { + "epoch": 0.5233641118034181, + "grad_norm": 0.9909867644309998, + "learning_rate": 4.8645080923323206e-06, + "loss": 0.269, + "step": 26144 + }, + { + "epoch": 0.5233841303205465, + "grad_norm": 1.046037197113037, + "learning_rate": 4.864184028662758e-06, + "loss": 0.2923, + "step": 26145 + }, + { + "epoch": 0.5234041488376748, + "grad_norm": 1.0553027391433716, + "learning_rate": 4.86385996556414e-06, + "loss": 0.2916, + "step": 26146 + }, + { + "epoch": 0.5234241673548032, + "grad_norm": 1.8955864906311035, + "learning_rate": 4.863535903037823e-06, + "loss": 0.7666, + "step": 26147 + }, + { + "epoch": 0.5234441858719315, + "grad_norm": 1.1169143915176392, + "learning_rate": 4.863211841085174e-06, + "loss": 0.331, + "step": 26148 + }, + { + "epoch": 0.5234642043890598, + "grad_norm": 0.9388505816459656, + "learning_rate": 4.86288777970755e-06, + "loss": 0.2777, + "step": 26149 + }, + { + "epoch": 0.5234842229061882, + "grad_norm": 1.007185935974121, + "learning_rate": 4.862563718906318e-06, + "loss": 0.2759, + "step": 26150 + }, + { + "epoch": 0.5235042414233165, + "grad_norm": 1.1056394577026367, + "learning_rate": 4.862239658682839e-06, + "loss": 0.2809, + "step": 26151 + }, + { + "epoch": 0.5235242599404449, + "grad_norm": 0.9895520210266113, + "learning_rate": 4.861915599038475e-06, + "loss": 0.2685, + "step": 26152 + }, + { + "epoch": 0.5235442784575732, + "grad_norm": 1.8396247625350952, + "learning_rate": 4.861591539974587e-06, + "loss": 0.7704, + "step": 26153 + }, + { + "epoch": 0.5235642969747016, + "grad_norm": 1.1042437553405762, + "learning_rate": 4.861267481492538e-06, + "loss": 0.3422, + "step": 26154 + }, + { + "epoch": 0.5235843154918299, + "grad_norm": 1.0405863523483276, + "learning_rate": 4.860943423593692e-06, + "loss": 0.2999, + "step": 26155 + }, + { + "epoch": 0.5236043340089583, + "grad_norm": 1.1878151893615723, + "learning_rate": 4.860619366279411e-06, + "loss": 0.3356, + "step": 26156 + }, + { + "epoch": 0.5236243525260866, + "grad_norm": 2.1969380378723145, + "learning_rate": 4.860295309551055e-06, + "loss": 0.7638, + "step": 26157 + }, + { + "epoch": 0.5236443710432149, + "grad_norm": 1.1789090633392334, + "learning_rate": 4.859971253409985e-06, + "loss": 0.306, + "step": 26158 + }, + { + "epoch": 0.5236643895603433, + "grad_norm": 1.0677322149276733, + "learning_rate": 4.859647197857568e-06, + "loss": 0.2883, + "step": 26159 + }, + { + "epoch": 0.5236844080774716, + "grad_norm": 0.9931179285049438, + "learning_rate": 4.8593231428951636e-06, + "loss": 0.2784, + "step": 26160 + }, + { + "epoch": 0.5237044265946, + "grad_norm": 1.28489351272583, + "learning_rate": 4.858999088524136e-06, + "loss": 0.2954, + "step": 26161 + }, + { + "epoch": 0.5237244451117283, + "grad_norm": 1.1697813272476196, + "learning_rate": 4.858675034745843e-06, + "loss": 0.3402, + "step": 26162 + }, + { + "epoch": 0.5237444636288567, + "grad_norm": 1.1457003355026245, + "learning_rate": 4.858350981561651e-06, + "loss": 0.3219, + "step": 26163 + }, + { + "epoch": 0.523764482145985, + "grad_norm": 1.9337329864501953, + "learning_rate": 4.858026928972922e-06, + "loss": 0.8016, + "step": 26164 + }, + { + "epoch": 0.5237845006631133, + "grad_norm": 1.1317740678787231, + "learning_rate": 4.8577028769810166e-06, + "loss": 0.329, + "step": 26165 + }, + { + "epoch": 0.5238045191802417, + "grad_norm": 1.0956364870071411, + "learning_rate": 4.857378825587298e-06, + "loss": 0.3208, + "step": 26166 + }, + { + "epoch": 0.52382453769737, + "grad_norm": 0.9959695935249329, + "learning_rate": 4.857054774793126e-06, + "loss": 0.3013, + "step": 26167 + }, + { + "epoch": 0.5238445562144984, + "grad_norm": 1.8100924491882324, + "learning_rate": 4.8567307245998675e-06, + "loss": 0.7943, + "step": 26168 + }, + { + "epoch": 0.5238645747316267, + "grad_norm": 1.1091715097427368, + "learning_rate": 4.8564066750088815e-06, + "loss": 0.311, + "step": 26169 + }, + { + "epoch": 0.5238845932487551, + "grad_norm": 1.1610714197158813, + "learning_rate": 4.856082626021532e-06, + "loss": 0.2975, + "step": 26170 + }, + { + "epoch": 0.5239046117658834, + "grad_norm": 1.1774147748947144, + "learning_rate": 4.855758577639177e-06, + "loss": 0.3092, + "step": 26171 + }, + { + "epoch": 0.5239246302830118, + "grad_norm": 1.0228379964828491, + "learning_rate": 4.8554345298631845e-06, + "loss": 0.264, + "step": 26172 + }, + { + "epoch": 0.5239446488001401, + "grad_norm": 1.0543943643569946, + "learning_rate": 4.855110482694913e-06, + "loss": 0.2978, + "step": 26173 + }, + { + "epoch": 0.5239646673172684, + "grad_norm": 1.0434634685516357, + "learning_rate": 4.854786436135727e-06, + "loss": 0.318, + "step": 26174 + }, + { + "epoch": 0.5239846858343968, + "grad_norm": 1.1892281770706177, + "learning_rate": 4.854462390186988e-06, + "loss": 0.3106, + "step": 26175 + }, + { + "epoch": 0.5240047043515251, + "grad_norm": 1.2115589380264282, + "learning_rate": 4.854138344850054e-06, + "loss": 0.3105, + "step": 26176 + }, + { + "epoch": 0.5240247228686535, + "grad_norm": 1.0945613384246826, + "learning_rate": 4.853814300126295e-06, + "loss": 0.2828, + "step": 26177 + }, + { + "epoch": 0.5240447413857818, + "grad_norm": 1.1485795974731445, + "learning_rate": 4.853490256017067e-06, + "loss": 0.3392, + "step": 26178 + }, + { + "epoch": 0.5240647599029102, + "grad_norm": 1.1547924280166626, + "learning_rate": 4.853166212523737e-06, + "loss": 0.2399, + "step": 26179 + }, + { + "epoch": 0.5240847784200385, + "grad_norm": 0.9924196600914001, + "learning_rate": 4.85284216964766e-06, + "loss": 0.283, + "step": 26180 + }, + { + "epoch": 0.5241047969371668, + "grad_norm": 1.1270049810409546, + "learning_rate": 4.852518127390207e-06, + "loss": 0.3229, + "step": 26181 + }, + { + "epoch": 0.5241248154542952, + "grad_norm": 1.141648530960083, + "learning_rate": 4.8521940857527365e-06, + "loss": 0.3196, + "step": 26182 + }, + { + "epoch": 0.5241448339714235, + "grad_norm": 1.04695463180542, + "learning_rate": 4.851870044736609e-06, + "loss": 0.3061, + "step": 26183 + }, + { + "epoch": 0.5241648524885519, + "grad_norm": 1.090818166732788, + "learning_rate": 4.851546004343185e-06, + "loss": 0.3001, + "step": 26184 + }, + { + "epoch": 0.5241848710056802, + "grad_norm": 1.334775447845459, + "learning_rate": 4.851221964573833e-06, + "loss": 0.2986, + "step": 26185 + }, + { + "epoch": 0.5242048895228086, + "grad_norm": 1.0904791355133057, + "learning_rate": 4.850897925429911e-06, + "loss": 0.2813, + "step": 26186 + }, + { + "epoch": 0.5242249080399369, + "grad_norm": 1.2311012744903564, + "learning_rate": 4.850573886912783e-06, + "loss": 0.2926, + "step": 26187 + }, + { + "epoch": 0.5242449265570653, + "grad_norm": 1.2217315435409546, + "learning_rate": 4.85024984902381e-06, + "loss": 0.3724, + "step": 26188 + }, + { + "epoch": 0.5242649450741936, + "grad_norm": 1.149503469467163, + "learning_rate": 4.849925811764352e-06, + "loss": 0.3243, + "step": 26189 + }, + { + "epoch": 0.5242849635913219, + "grad_norm": 1.1621516942977905, + "learning_rate": 4.849601775135776e-06, + "loss": 0.2961, + "step": 26190 + }, + { + "epoch": 0.5243049821084503, + "grad_norm": 1.022722840309143, + "learning_rate": 4.84927773913944e-06, + "loss": 0.301, + "step": 26191 + }, + { + "epoch": 0.5243250006255786, + "grad_norm": 1.229562759399414, + "learning_rate": 4.8489537037767105e-06, + "loss": 0.3291, + "step": 26192 + }, + { + "epoch": 0.524345019142707, + "grad_norm": 1.2136207818984985, + "learning_rate": 4.8486296690489445e-06, + "loss": 0.3303, + "step": 26193 + }, + { + "epoch": 0.5243650376598353, + "grad_norm": 1.1004719734191895, + "learning_rate": 4.848305634957509e-06, + "loss": 0.3191, + "step": 26194 + }, + { + "epoch": 0.5243850561769637, + "grad_norm": 1.129685878753662, + "learning_rate": 4.847981601503763e-06, + "loss": 0.2968, + "step": 26195 + }, + { + "epoch": 0.524405074694092, + "grad_norm": 1.1316300630569458, + "learning_rate": 4.84765756868907e-06, + "loss": 0.3194, + "step": 26196 + }, + { + "epoch": 0.5244250932112203, + "grad_norm": 1.2171331644058228, + "learning_rate": 4.847333536514791e-06, + "loss": 0.3025, + "step": 26197 + }, + { + "epoch": 0.5244451117283487, + "grad_norm": 1.2755212783813477, + "learning_rate": 4.847009504982289e-06, + "loss": 0.3102, + "step": 26198 + }, + { + "epoch": 0.524465130245477, + "grad_norm": 1.8407803773880005, + "learning_rate": 4.846685474092926e-06, + "loss": 0.8328, + "step": 26199 + }, + { + "epoch": 0.5244851487626054, + "grad_norm": 1.1436073780059814, + "learning_rate": 4.846361443848065e-06, + "loss": 0.291, + "step": 26200 + }, + { + "epoch": 0.5245051672797337, + "grad_norm": 1.0370821952819824, + "learning_rate": 4.846037414249069e-06, + "loss": 0.295, + "step": 26201 + }, + { + "epoch": 0.5245251857968621, + "grad_norm": 1.1160004138946533, + "learning_rate": 4.845713385297294e-06, + "loss": 0.288, + "step": 26202 + }, + { + "epoch": 0.5245452043139904, + "grad_norm": 1.219903588294983, + "learning_rate": 4.845389356994111e-06, + "loss": 0.3435, + "step": 26203 + }, + { + "epoch": 0.5245652228311188, + "grad_norm": 1.1108027696609497, + "learning_rate": 4.845065329340875e-06, + "loss": 0.3117, + "step": 26204 + }, + { + "epoch": 0.5245852413482471, + "grad_norm": 1.169991374015808, + "learning_rate": 4.8447413023389534e-06, + "loss": 0.3379, + "step": 26205 + }, + { + "epoch": 0.5246052598653754, + "grad_norm": 1.0326085090637207, + "learning_rate": 4.844417275989705e-06, + "loss": 0.3349, + "step": 26206 + }, + { + "epoch": 0.5246252783825038, + "grad_norm": 1.1052764654159546, + "learning_rate": 4.844093250294491e-06, + "loss": 0.3418, + "step": 26207 + }, + { + "epoch": 0.5246452968996321, + "grad_norm": 1.2175755500793457, + "learning_rate": 4.843769225254679e-06, + "loss": 0.3144, + "step": 26208 + }, + { + "epoch": 0.5246653154167605, + "grad_norm": 1.3555619716644287, + "learning_rate": 4.843445200871626e-06, + "loss": 0.3316, + "step": 26209 + }, + { + "epoch": 0.5246853339338888, + "grad_norm": 1.119261384010315, + "learning_rate": 4.843121177146695e-06, + "loss": 0.3141, + "step": 26210 + }, + { + "epoch": 0.5247053524510172, + "grad_norm": 1.8160966634750366, + "learning_rate": 4.842797154081249e-06, + "loss": 0.8301, + "step": 26211 + }, + { + "epoch": 0.5247253709681455, + "grad_norm": 1.1309090852737427, + "learning_rate": 4.8424731316766496e-06, + "loss": 0.2933, + "step": 26212 + }, + { + "epoch": 0.5247453894852738, + "grad_norm": 1.0881321430206299, + "learning_rate": 4.84214910993426e-06, + "loss": 0.273, + "step": 26213 + }, + { + "epoch": 0.5247654080024022, + "grad_norm": 1.2023062705993652, + "learning_rate": 4.841825088855443e-06, + "loss": 0.2871, + "step": 26214 + }, + { + "epoch": 0.5247854265195305, + "grad_norm": 1.3627229928970337, + "learning_rate": 4.841501068441556e-06, + "loss": 0.2774, + "step": 26215 + }, + { + "epoch": 0.5248054450366589, + "grad_norm": 1.2163139581680298, + "learning_rate": 4.8411770486939666e-06, + "loss": 0.3265, + "step": 26216 + }, + { + "epoch": 0.5248254635537872, + "grad_norm": 1.110133171081543, + "learning_rate": 4.840853029614034e-06, + "loss": 0.3222, + "step": 26217 + }, + { + "epoch": 0.5248454820709156, + "grad_norm": 1.8708268404006958, + "learning_rate": 4.840529011203122e-06, + "loss": 0.7452, + "step": 26218 + }, + { + "epoch": 0.5248655005880439, + "grad_norm": 1.7377173900604248, + "learning_rate": 4.8402049934625914e-06, + "loss": 0.7655, + "step": 26219 + }, + { + "epoch": 0.5248855191051723, + "grad_norm": 1.0132795572280884, + "learning_rate": 4.839880976393803e-06, + "loss": 0.2765, + "step": 26220 + }, + { + "epoch": 0.5249055376223006, + "grad_norm": 1.1291018724441528, + "learning_rate": 4.839556959998122e-06, + "loss": 0.3396, + "step": 26221 + }, + { + "epoch": 0.5249255561394289, + "grad_norm": 1.072111964225769, + "learning_rate": 4.839232944276909e-06, + "loss": 0.3338, + "step": 26222 + }, + { + "epoch": 0.5249455746565573, + "grad_norm": 1.028626561164856, + "learning_rate": 4.8389089292315255e-06, + "loss": 0.2339, + "step": 26223 + }, + { + "epoch": 0.5249655931736856, + "grad_norm": 1.0205464363098145, + "learning_rate": 4.838584914863334e-06, + "loss": 0.2846, + "step": 26224 + }, + { + "epoch": 0.524985611690814, + "grad_norm": 1.7984429597854614, + "learning_rate": 4.838260901173697e-06, + "loss": 0.7721, + "step": 26225 + }, + { + "epoch": 0.5250056302079423, + "grad_norm": 1.1201236248016357, + "learning_rate": 4.837936888163977e-06, + "loss": 0.3638, + "step": 26226 + }, + { + "epoch": 0.5250256487250707, + "grad_norm": 1.2013304233551025, + "learning_rate": 4.837612875835536e-06, + "loss": 0.349, + "step": 26227 + }, + { + "epoch": 0.525045667242199, + "grad_norm": 1.0222163200378418, + "learning_rate": 4.837288864189735e-06, + "loss": 0.3219, + "step": 26228 + }, + { + "epoch": 0.5250656857593273, + "grad_norm": 1.163089394569397, + "learning_rate": 4.836964853227935e-06, + "loss": 0.2978, + "step": 26229 + }, + { + "epoch": 0.5250857042764557, + "grad_norm": 1.2514407634735107, + "learning_rate": 4.8366408429515e-06, + "loss": 0.303, + "step": 26230 + }, + { + "epoch": 0.525105722793584, + "grad_norm": 1.0122967958450317, + "learning_rate": 4.836316833361793e-06, + "loss": 0.2848, + "step": 26231 + }, + { + "epoch": 0.5251257413107124, + "grad_norm": 1.112715721130371, + "learning_rate": 4.835992824460175e-06, + "loss": 0.3173, + "step": 26232 + }, + { + "epoch": 0.5251457598278407, + "grad_norm": 1.0501893758773804, + "learning_rate": 4.835668816248007e-06, + "loss": 0.3177, + "step": 26233 + }, + { + "epoch": 0.5251657783449691, + "grad_norm": 1.1906169652938843, + "learning_rate": 4.835344808726652e-06, + "loss": 0.3414, + "step": 26234 + }, + { + "epoch": 0.5251857968620974, + "grad_norm": 1.0269081592559814, + "learning_rate": 4.835020801897473e-06, + "loss": 0.3131, + "step": 26235 + }, + { + "epoch": 0.5252058153792258, + "grad_norm": 1.0650285482406616, + "learning_rate": 4.83469679576183e-06, + "loss": 0.3025, + "step": 26236 + }, + { + "epoch": 0.5252258338963541, + "grad_norm": 1.027654767036438, + "learning_rate": 4.834372790321086e-06, + "loss": 0.3664, + "step": 26237 + }, + { + "epoch": 0.5252458524134824, + "grad_norm": 1.1470016241073608, + "learning_rate": 4.8340487855766025e-06, + "loss": 0.2917, + "step": 26238 + }, + { + "epoch": 0.5252658709306108, + "grad_norm": 0.9822925329208374, + "learning_rate": 4.833724781529744e-06, + "loss": 0.2792, + "step": 26239 + }, + { + "epoch": 0.5252858894477391, + "grad_norm": 2.006897449493408, + "learning_rate": 4.833400778181871e-06, + "loss": 0.7545, + "step": 26240 + }, + { + "epoch": 0.5253059079648675, + "grad_norm": 1.1326009035110474, + "learning_rate": 4.8330767755343445e-06, + "loss": 0.2858, + "step": 26241 + }, + { + "epoch": 0.5253259264819958, + "grad_norm": 1.116566777229309, + "learning_rate": 4.8327527735885255e-06, + "loss": 0.3318, + "step": 26242 + }, + { + "epoch": 0.5253459449991242, + "grad_norm": 1.0340653657913208, + "learning_rate": 4.8324287723457795e-06, + "loss": 0.2872, + "step": 26243 + }, + { + "epoch": 0.5253659635162525, + "grad_norm": 1.138043999671936, + "learning_rate": 4.832104771807467e-06, + "loss": 0.3259, + "step": 26244 + }, + { + "epoch": 0.5253859820333808, + "grad_norm": 1.1179732084274292, + "learning_rate": 4.83178077197495e-06, + "loss": 0.3471, + "step": 26245 + }, + { + "epoch": 0.5254060005505092, + "grad_norm": 1.6969127655029297, + "learning_rate": 4.831456772849589e-06, + "loss": 0.8046, + "step": 26246 + }, + { + "epoch": 0.5254260190676375, + "grad_norm": 1.0979242324829102, + "learning_rate": 4.831132774432749e-06, + "loss": 0.3054, + "step": 26247 + }, + { + "epoch": 0.5254460375847659, + "grad_norm": 1.2016209363937378, + "learning_rate": 4.83080877672579e-06, + "loss": 0.2896, + "step": 26248 + }, + { + "epoch": 0.5254660561018942, + "grad_norm": 1.8459223508834839, + "learning_rate": 4.830484779730074e-06, + "loss": 0.7971, + "step": 26249 + }, + { + "epoch": 0.5254860746190226, + "grad_norm": 1.0795502662658691, + "learning_rate": 4.830160783446965e-06, + "loss": 0.3069, + "step": 26250 + }, + { + "epoch": 0.5255060931361509, + "grad_norm": 1.0053493976593018, + "learning_rate": 4.829836787877821e-06, + "loss": 0.3115, + "step": 26251 + }, + { + "epoch": 0.5255261116532793, + "grad_norm": 1.1316542625427246, + "learning_rate": 4.829512793024009e-06, + "loss": 0.3121, + "step": 26252 + }, + { + "epoch": 0.5255461301704076, + "grad_norm": 1.0191737413406372, + "learning_rate": 4.829188798886889e-06, + "loss": 0.2848, + "step": 26253 + }, + { + "epoch": 0.5255661486875359, + "grad_norm": 1.087148666381836, + "learning_rate": 4.828864805467821e-06, + "loss": 0.2898, + "step": 26254 + }, + { + "epoch": 0.5255861672046643, + "grad_norm": 1.1142725944519043, + "learning_rate": 4.828540812768167e-06, + "loss": 0.3138, + "step": 26255 + }, + { + "epoch": 0.5256061857217926, + "grad_norm": 1.1763660907745361, + "learning_rate": 4.828216820789292e-06, + "loss": 0.3004, + "step": 26256 + }, + { + "epoch": 0.525626204238921, + "grad_norm": 1.1774812936782837, + "learning_rate": 4.827892829532557e-06, + "loss": 0.2855, + "step": 26257 + }, + { + "epoch": 0.5256462227560493, + "grad_norm": 1.137130618095398, + "learning_rate": 4.827568838999324e-06, + "loss": 0.3172, + "step": 26258 + }, + { + "epoch": 0.5256662412731777, + "grad_norm": 1.0329970121383667, + "learning_rate": 4.827244849190955e-06, + "loss": 0.2917, + "step": 26259 + }, + { + "epoch": 0.525686259790306, + "grad_norm": 1.1181225776672363, + "learning_rate": 4.826920860108808e-06, + "loss": 0.3344, + "step": 26260 + }, + { + "epoch": 0.5257062783074343, + "grad_norm": 1.0978779792785645, + "learning_rate": 4.826596871754251e-06, + "loss": 0.314, + "step": 26261 + }, + { + "epoch": 0.5257262968245627, + "grad_norm": 1.1353896856307983, + "learning_rate": 4.826272884128643e-06, + "loss": 0.28, + "step": 26262 + }, + { + "epoch": 0.525746315341691, + "grad_norm": 1.1435394287109375, + "learning_rate": 4.825948897233347e-06, + "loss": 0.287, + "step": 26263 + }, + { + "epoch": 0.5257663338588194, + "grad_norm": 1.2644987106323242, + "learning_rate": 4.825624911069722e-06, + "loss": 0.2851, + "step": 26264 + }, + { + "epoch": 0.5257863523759477, + "grad_norm": 1.2327076196670532, + "learning_rate": 4.8253009256391355e-06, + "loss": 0.3352, + "step": 26265 + }, + { + "epoch": 0.5258063708930761, + "grad_norm": 2.150184154510498, + "learning_rate": 4.824976940942945e-06, + "loss": 0.7676, + "step": 26266 + }, + { + "epoch": 0.5258263894102044, + "grad_norm": 1.274243712425232, + "learning_rate": 4.824652956982515e-06, + "loss": 0.3746, + "step": 26267 + }, + { + "epoch": 0.5258464079273328, + "grad_norm": 1.1661027669906616, + "learning_rate": 4.824328973759204e-06, + "loss": 0.319, + "step": 26268 + }, + { + "epoch": 0.5258664264444611, + "grad_norm": 1.113125205039978, + "learning_rate": 4.824004991274377e-06, + "loss": 0.279, + "step": 26269 + }, + { + "epoch": 0.5258864449615894, + "grad_norm": 1.2321499586105347, + "learning_rate": 4.823681009529395e-06, + "loss": 0.2993, + "step": 26270 + }, + { + "epoch": 0.5259064634787178, + "grad_norm": 1.0078684091567993, + "learning_rate": 4.823357028525622e-06, + "loss": 0.3084, + "step": 26271 + }, + { + "epoch": 0.5259264819958461, + "grad_norm": 1.094098687171936, + "learning_rate": 4.8230330482644164e-06, + "loss": 0.2533, + "step": 26272 + }, + { + "epoch": 0.5259465005129745, + "grad_norm": 1.4497675895690918, + "learning_rate": 4.82270906874714e-06, + "loss": 0.2801, + "step": 26273 + }, + { + "epoch": 0.5259665190301028, + "grad_norm": 1.3606773614883423, + "learning_rate": 4.82238508997516e-06, + "loss": 0.2826, + "step": 26274 + }, + { + "epoch": 0.5259865375472312, + "grad_norm": 1.0949760675430298, + "learning_rate": 4.822061111949832e-06, + "loss": 0.3146, + "step": 26275 + }, + { + "epoch": 0.5260065560643595, + "grad_norm": 1.100212812423706, + "learning_rate": 4.821737134672523e-06, + "loss": 0.2796, + "step": 26276 + }, + { + "epoch": 0.5260265745814878, + "grad_norm": 1.1137064695358276, + "learning_rate": 4.82141315814459e-06, + "loss": 0.3141, + "step": 26277 + }, + { + "epoch": 0.5260465930986162, + "grad_norm": 1.1249940395355225, + "learning_rate": 4.8210891823674e-06, + "loss": 0.2744, + "step": 26278 + }, + { + "epoch": 0.5260666116157445, + "grad_norm": 1.0856987237930298, + "learning_rate": 4.8207652073423124e-06, + "loss": 0.2992, + "step": 26279 + }, + { + "epoch": 0.5260866301328729, + "grad_norm": 1.1519181728363037, + "learning_rate": 4.82044123307069e-06, + "loss": 0.3024, + "step": 26280 + }, + { + "epoch": 0.5261066486500012, + "grad_norm": 1.1774663925170898, + "learning_rate": 4.8201172595538925e-06, + "loss": 0.3033, + "step": 26281 + }, + { + "epoch": 0.5261266671671296, + "grad_norm": 1.0788593292236328, + "learning_rate": 4.8197932867932815e-06, + "loss": 0.3116, + "step": 26282 + }, + { + "epoch": 0.5261466856842579, + "grad_norm": 1.0157077312469482, + "learning_rate": 4.819469314790224e-06, + "loss": 0.3268, + "step": 26283 + }, + { + "epoch": 0.5261667042013863, + "grad_norm": 1.071304202079773, + "learning_rate": 4.819145343546078e-06, + "loss": 0.3158, + "step": 26284 + }, + { + "epoch": 0.5261867227185146, + "grad_norm": 1.1063551902770996, + "learning_rate": 4.818821373062207e-06, + "loss": 0.3087, + "step": 26285 + }, + { + "epoch": 0.5262067412356429, + "grad_norm": 1.1413153409957886, + "learning_rate": 4.818497403339969e-06, + "loss": 0.3301, + "step": 26286 + }, + { + "epoch": 0.5262267597527713, + "grad_norm": 1.114471197128296, + "learning_rate": 4.818173434380731e-06, + "loss": 0.3327, + "step": 26287 + }, + { + "epoch": 0.5262467782698996, + "grad_norm": 1.0320117473602295, + "learning_rate": 4.817849466185851e-06, + "loss": 0.2549, + "step": 26288 + }, + { + "epoch": 0.526266796787028, + "grad_norm": 1.8904260396957397, + "learning_rate": 4.817525498756695e-06, + "loss": 0.7746, + "step": 26289 + }, + { + "epoch": 0.5262868153041563, + "grad_norm": 1.3066943883895874, + "learning_rate": 4.817201532094622e-06, + "loss": 0.3651, + "step": 26290 + }, + { + "epoch": 0.5263068338212847, + "grad_norm": 1.0426280498504639, + "learning_rate": 4.816877566200992e-06, + "loss": 0.2835, + "step": 26291 + }, + { + "epoch": 0.526326852338413, + "grad_norm": 0.997725248336792, + "learning_rate": 4.816553601077173e-06, + "loss": 0.262, + "step": 26292 + }, + { + "epoch": 0.5263468708555413, + "grad_norm": 0.9801021218299866, + "learning_rate": 4.8162296367245216e-06, + "loss": 0.3464, + "step": 26293 + }, + { + "epoch": 0.5263668893726697, + "grad_norm": 1.126112461090088, + "learning_rate": 4.8159056731444e-06, + "loss": 0.3192, + "step": 26294 + }, + { + "epoch": 0.526386907889798, + "grad_norm": 1.032880187034607, + "learning_rate": 4.815581710338171e-06, + "loss": 0.337, + "step": 26295 + }, + { + "epoch": 0.5264069264069264, + "grad_norm": 1.0169683694839478, + "learning_rate": 4.8152577483071984e-06, + "loss": 0.2938, + "step": 26296 + }, + { + "epoch": 0.5264269449240547, + "grad_norm": 1.1554800271987915, + "learning_rate": 4.814933787052843e-06, + "loss": 0.3342, + "step": 26297 + }, + { + "epoch": 0.5264469634411831, + "grad_norm": 1.1227093935012817, + "learning_rate": 4.814609826576466e-06, + "loss": 0.3561, + "step": 26298 + }, + { + "epoch": 0.5264669819583114, + "grad_norm": 1.1911921501159668, + "learning_rate": 4.814285866879428e-06, + "loss": 0.373, + "step": 26299 + }, + { + "epoch": 0.5264870004754398, + "grad_norm": 1.0855140686035156, + "learning_rate": 4.8139619079630925e-06, + "loss": 0.308, + "step": 26300 + }, + { + "epoch": 0.5265070189925681, + "grad_norm": 1.9086233377456665, + "learning_rate": 4.813637949828821e-06, + "loss": 0.7601, + "step": 26301 + }, + { + "epoch": 0.5265270375096964, + "grad_norm": 1.7911943197250366, + "learning_rate": 4.813313992477977e-06, + "loss": 0.757, + "step": 26302 + }, + { + "epoch": 0.5265470560268248, + "grad_norm": 1.0760524272918701, + "learning_rate": 4.812990035911921e-06, + "loss": 0.2909, + "step": 26303 + }, + { + "epoch": 0.5265670745439531, + "grad_norm": 1.0913008451461792, + "learning_rate": 4.812666080132012e-06, + "loss": 0.3228, + "step": 26304 + }, + { + "epoch": 0.5265870930610815, + "grad_norm": 1.0485647916793823, + "learning_rate": 4.812342125139616e-06, + "loss": 0.3214, + "step": 26305 + }, + { + "epoch": 0.5266071115782098, + "grad_norm": 1.0725231170654297, + "learning_rate": 4.8120181709360945e-06, + "loss": 0.3327, + "step": 26306 + }, + { + "epoch": 0.5266271300953382, + "grad_norm": 1.5554499626159668, + "learning_rate": 4.811694217522806e-06, + "loss": 0.2847, + "step": 26307 + }, + { + "epoch": 0.5266471486124665, + "grad_norm": 1.1127839088439941, + "learning_rate": 4.811370264901114e-06, + "loss": 0.3217, + "step": 26308 + }, + { + "epoch": 0.5266671671295948, + "grad_norm": 1.1981576681137085, + "learning_rate": 4.811046313072383e-06, + "loss": 0.2974, + "step": 26309 + }, + { + "epoch": 0.5266871856467232, + "grad_norm": 1.0969712734222412, + "learning_rate": 4.8107223620379725e-06, + "loss": 0.3326, + "step": 26310 + }, + { + "epoch": 0.5267072041638515, + "grad_norm": 1.0542147159576416, + "learning_rate": 4.810398411799245e-06, + "loss": 0.31, + "step": 26311 + }, + { + "epoch": 0.5267272226809799, + "grad_norm": 1.8170431852340698, + "learning_rate": 4.8100744623575595e-06, + "loss": 0.7829, + "step": 26312 + }, + { + "epoch": 0.5267472411981082, + "grad_norm": 1.7963284254074097, + "learning_rate": 4.8097505137142805e-06, + "loss": 0.7502, + "step": 26313 + }, + { + "epoch": 0.5267672597152366, + "grad_norm": 1.1422431468963623, + "learning_rate": 4.80942656587077e-06, + "loss": 0.4051, + "step": 26314 + }, + { + "epoch": 0.5267872782323649, + "grad_norm": 1.1624915599822998, + "learning_rate": 4.80910261882839e-06, + "loss": 0.2956, + "step": 26315 + }, + { + "epoch": 0.5268072967494933, + "grad_norm": 1.0614389181137085, + "learning_rate": 4.808778672588502e-06, + "loss": 0.3061, + "step": 26316 + }, + { + "epoch": 0.5268273152666216, + "grad_norm": 1.003459095954895, + "learning_rate": 4.808454727152464e-06, + "loss": 0.2649, + "step": 26317 + }, + { + "epoch": 0.5268473337837499, + "grad_norm": 1.0740489959716797, + "learning_rate": 4.808130782521644e-06, + "loss": 0.3344, + "step": 26318 + }, + { + "epoch": 0.5268673523008783, + "grad_norm": 1.0850499868392944, + "learning_rate": 4.807806838697401e-06, + "loss": 0.2935, + "step": 26319 + }, + { + "epoch": 0.5268873708180066, + "grad_norm": 1.0499876737594604, + "learning_rate": 4.8074828956810946e-06, + "loss": 0.2907, + "step": 26320 + }, + { + "epoch": 0.526907389335135, + "grad_norm": 1.2913600206375122, + "learning_rate": 4.807158953474091e-06, + "loss": 0.3306, + "step": 26321 + }, + { + "epoch": 0.5269274078522633, + "grad_norm": 1.4377104043960571, + "learning_rate": 4.8068350120777464e-06, + "loss": 0.2768, + "step": 26322 + }, + { + "epoch": 0.5269474263693917, + "grad_norm": 1.0742160081863403, + "learning_rate": 4.806511071493429e-06, + "loss": 0.3258, + "step": 26323 + }, + { + "epoch": 0.52696744488652, + "grad_norm": 2.024916410446167, + "learning_rate": 4.806187131722497e-06, + "loss": 0.8214, + "step": 26324 + }, + { + "epoch": 0.5269874634036483, + "grad_norm": 1.1172593832015991, + "learning_rate": 4.805863192766312e-06, + "loss": 0.325, + "step": 26325 + }, + { + "epoch": 0.5270074819207767, + "grad_norm": 1.2453335523605347, + "learning_rate": 4.8055392546262355e-06, + "loss": 0.2759, + "step": 26326 + }, + { + "epoch": 0.527027500437905, + "grad_norm": 1.8402268886566162, + "learning_rate": 4.8052153173036305e-06, + "loss": 0.7851, + "step": 26327 + }, + { + "epoch": 0.5270475189550334, + "grad_norm": 1.2452120780944824, + "learning_rate": 4.804891380799859e-06, + "loss": 0.3348, + "step": 26328 + }, + { + "epoch": 0.5270675374721617, + "grad_norm": 1.2612282037734985, + "learning_rate": 4.804567445116283e-06, + "loss": 0.2836, + "step": 26329 + }, + { + "epoch": 0.5270875559892901, + "grad_norm": 1.102595567703247, + "learning_rate": 4.804243510254261e-06, + "loss": 0.3476, + "step": 26330 + }, + { + "epoch": 0.5271075745064184, + "grad_norm": 1.1363240480422974, + "learning_rate": 4.803919576215159e-06, + "loss": 0.299, + "step": 26331 + }, + { + "epoch": 0.5271275930235467, + "grad_norm": 1.9723544120788574, + "learning_rate": 4.8035956430003376e-06, + "loss": 0.8092, + "step": 26332 + }, + { + "epoch": 0.5271476115406751, + "grad_norm": 1.1524906158447266, + "learning_rate": 4.803271710611156e-06, + "loss": 0.3085, + "step": 26333 + }, + { + "epoch": 0.5271676300578034, + "grad_norm": 1.1696693897247314, + "learning_rate": 4.802947779048979e-06, + "loss": 0.2756, + "step": 26334 + }, + { + "epoch": 0.5271876485749318, + "grad_norm": 1.0027000904083252, + "learning_rate": 4.802623848315165e-06, + "loss": 0.327, + "step": 26335 + }, + { + "epoch": 0.5272076670920601, + "grad_norm": 1.238166332244873, + "learning_rate": 4.80229991841108e-06, + "loss": 0.321, + "step": 26336 + }, + { + "epoch": 0.5272276856091885, + "grad_norm": 0.9948731064796448, + "learning_rate": 4.801975989338084e-06, + "loss": 0.2392, + "step": 26337 + }, + { + "epoch": 0.5272477041263168, + "grad_norm": 1.8584506511688232, + "learning_rate": 4.801652061097537e-06, + "loss": 0.78, + "step": 26338 + }, + { + "epoch": 0.5272677226434452, + "grad_norm": 1.183834195137024, + "learning_rate": 4.801328133690802e-06, + "loss": 0.2974, + "step": 26339 + }, + { + "epoch": 0.5272877411605735, + "grad_norm": 0.9812594056129456, + "learning_rate": 4.801004207119241e-06, + "loss": 0.2711, + "step": 26340 + }, + { + "epoch": 0.5273077596777018, + "grad_norm": 1.1410084962844849, + "learning_rate": 4.800680281384216e-06, + "loss": 0.3334, + "step": 26341 + }, + { + "epoch": 0.5273277781948302, + "grad_norm": 1.1649770736694336, + "learning_rate": 4.800356356487089e-06, + "loss": 0.3344, + "step": 26342 + }, + { + "epoch": 0.5273477967119585, + "grad_norm": 1.1352850198745728, + "learning_rate": 4.80003243242922e-06, + "loss": 0.3, + "step": 26343 + }, + { + "epoch": 0.5273678152290869, + "grad_norm": 1.8105260133743286, + "learning_rate": 4.79970850921197e-06, + "loss": 0.8263, + "step": 26344 + }, + { + "epoch": 0.5273878337462152, + "grad_norm": 1.9314833879470825, + "learning_rate": 4.799384586836705e-06, + "loss": 0.8197, + "step": 26345 + }, + { + "epoch": 0.5274078522633436, + "grad_norm": 1.0745978355407715, + "learning_rate": 4.799060665304782e-06, + "loss": 0.3071, + "step": 26346 + }, + { + "epoch": 0.5274278707804719, + "grad_norm": 1.0859041213989258, + "learning_rate": 4.798736744617566e-06, + "loss": 0.3546, + "step": 26347 + }, + { + "epoch": 0.5274478892976002, + "grad_norm": 1.0975245237350464, + "learning_rate": 4.798412824776415e-06, + "loss": 0.293, + "step": 26348 + }, + { + "epoch": 0.5274679078147286, + "grad_norm": 1.0660783052444458, + "learning_rate": 4.798088905782696e-06, + "loss": 0.3422, + "step": 26349 + }, + { + "epoch": 0.5274879263318569, + "grad_norm": 1.0107303857803345, + "learning_rate": 4.797764987637768e-06, + "loss": 0.3048, + "step": 26350 + }, + { + "epoch": 0.5275079448489853, + "grad_norm": 1.1218225955963135, + "learning_rate": 4.797441070342991e-06, + "loss": 0.2993, + "step": 26351 + }, + { + "epoch": 0.5275279633661136, + "grad_norm": 1.0320976972579956, + "learning_rate": 4.7971171538997295e-06, + "loss": 0.2788, + "step": 26352 + }, + { + "epoch": 0.527547981883242, + "grad_norm": 1.1228007078170776, + "learning_rate": 4.7967932383093405e-06, + "loss": 0.2984, + "step": 26353 + }, + { + "epoch": 0.5275680004003703, + "grad_norm": 1.052940011024475, + "learning_rate": 4.796469323573193e-06, + "loss": 0.3096, + "step": 26354 + }, + { + "epoch": 0.5275880189174987, + "grad_norm": 1.116024374961853, + "learning_rate": 4.796145409692643e-06, + "loss": 0.2655, + "step": 26355 + }, + { + "epoch": 0.527608037434627, + "grad_norm": 1.3587524890899658, + "learning_rate": 4.795821496669054e-06, + "loss": 0.2795, + "step": 26356 + }, + { + "epoch": 0.5276280559517553, + "grad_norm": 1.1823195219039917, + "learning_rate": 4.795497584503786e-06, + "loss": 0.2884, + "step": 26357 + }, + { + "epoch": 0.5276480744688837, + "grad_norm": 1.4200549125671387, + "learning_rate": 4.795173673198204e-06, + "loss": 0.3194, + "step": 26358 + }, + { + "epoch": 0.527668092986012, + "grad_norm": 1.1811425685882568, + "learning_rate": 4.794849762753666e-06, + "loss": 0.3185, + "step": 26359 + }, + { + "epoch": 0.5276881115031404, + "grad_norm": 1.0886465311050415, + "learning_rate": 4.794525853171538e-06, + "loss": 0.3227, + "step": 26360 + }, + { + "epoch": 0.5277081300202687, + "grad_norm": 1.0728986263275146, + "learning_rate": 4.794201944453176e-06, + "loss": 0.3086, + "step": 26361 + }, + { + "epoch": 0.5277281485373971, + "grad_norm": 1.8611516952514648, + "learning_rate": 4.793878036599947e-06, + "loss": 0.8281, + "step": 26362 + }, + { + "epoch": 0.5277481670545254, + "grad_norm": 1.047803521156311, + "learning_rate": 4.793554129613211e-06, + "loss": 0.3345, + "step": 26363 + }, + { + "epoch": 0.5277681855716537, + "grad_norm": 1.090653657913208, + "learning_rate": 4.793230223494327e-06, + "loss": 0.3458, + "step": 26364 + }, + { + "epoch": 0.5277882040887821, + "grad_norm": 1.134364128112793, + "learning_rate": 4.792906318244661e-06, + "loss": 0.3954, + "step": 26365 + }, + { + "epoch": 0.5278082226059104, + "grad_norm": 1.05474853515625, + "learning_rate": 4.792582413865569e-06, + "loss": 0.2689, + "step": 26366 + }, + { + "epoch": 0.5278282411230388, + "grad_norm": 1.1213358640670776, + "learning_rate": 4.792258510358418e-06, + "loss": 0.2677, + "step": 26367 + }, + { + "epoch": 0.5278482596401671, + "grad_norm": 1.0210388898849487, + "learning_rate": 4.791934607724568e-06, + "loss": 0.2848, + "step": 26368 + }, + { + "epoch": 0.5278682781572955, + "grad_norm": 1.0680981874465942, + "learning_rate": 4.79161070596538e-06, + "loss": 0.2859, + "step": 26369 + }, + { + "epoch": 0.5278882966744238, + "grad_norm": 1.2288600206375122, + "learning_rate": 4.7912868050822135e-06, + "loss": 0.3095, + "step": 26370 + }, + { + "epoch": 0.5279083151915522, + "grad_norm": 1.2199355363845825, + "learning_rate": 4.7909629050764345e-06, + "loss": 0.3243, + "step": 26371 + }, + { + "epoch": 0.5279283337086805, + "grad_norm": 1.0591741800308228, + "learning_rate": 4.790639005949401e-06, + "loss": 0.2905, + "step": 26372 + }, + { + "epoch": 0.5279483522258088, + "grad_norm": 1.7460997104644775, + "learning_rate": 4.790315107702478e-06, + "loss": 0.777, + "step": 26373 + }, + { + "epoch": 0.5279683707429372, + "grad_norm": 1.123859167098999, + "learning_rate": 4.789991210337025e-06, + "loss": 0.3017, + "step": 26374 + }, + { + "epoch": 0.5279883892600655, + "grad_norm": 1.0369787216186523, + "learning_rate": 4.789667313854402e-06, + "loss": 0.2952, + "step": 26375 + }, + { + "epoch": 0.5280084077771939, + "grad_norm": 1.1221855878829956, + "learning_rate": 4.789343418255973e-06, + "loss": 0.3076, + "step": 26376 + }, + { + "epoch": 0.5280284262943222, + "grad_norm": 1.2466291189193726, + "learning_rate": 4.789019523543099e-06, + "loss": 0.2949, + "step": 26377 + }, + { + "epoch": 0.5280484448114506, + "grad_norm": 1.9017356634140015, + "learning_rate": 4.788695629717142e-06, + "loss": 0.8234, + "step": 26378 + }, + { + "epoch": 0.5280684633285789, + "grad_norm": 1.0902193784713745, + "learning_rate": 4.7883717367794615e-06, + "loss": 0.2916, + "step": 26379 + }, + { + "epoch": 0.5280884818457072, + "grad_norm": 1.0505249500274658, + "learning_rate": 4.788047844731422e-06, + "loss": 0.3254, + "step": 26380 + }, + { + "epoch": 0.5281085003628356, + "grad_norm": 1.046875, + "learning_rate": 4.787723953574385e-06, + "loss": 0.2667, + "step": 26381 + }, + { + "epoch": 0.5281285188799639, + "grad_norm": 1.1626970767974854, + "learning_rate": 4.78740006330971e-06, + "loss": 0.2841, + "step": 26382 + }, + { + "epoch": 0.5281485373970923, + "grad_norm": 1.0839194059371948, + "learning_rate": 4.7870761739387575e-06, + "loss": 0.2622, + "step": 26383 + }, + { + "epoch": 0.5281685559142206, + "grad_norm": 1.0728861093521118, + "learning_rate": 4.786752285462893e-06, + "loss": 0.2709, + "step": 26384 + }, + { + "epoch": 0.528188574431349, + "grad_norm": 1.161096215248108, + "learning_rate": 4.786428397883475e-06, + "loss": 0.3415, + "step": 26385 + }, + { + "epoch": 0.5282085929484773, + "grad_norm": 1.9118608236312866, + "learning_rate": 4.786104511201866e-06, + "loss": 0.8127, + "step": 26386 + }, + { + "epoch": 0.5282286114656057, + "grad_norm": 1.0754176378250122, + "learning_rate": 4.78578062541943e-06, + "loss": 0.3319, + "step": 26387 + }, + { + "epoch": 0.528248629982734, + "grad_norm": 1.1177244186401367, + "learning_rate": 4.7854567405375216e-06, + "loss": 0.2835, + "step": 26388 + }, + { + "epoch": 0.5282686484998623, + "grad_norm": 1.2689183950424194, + "learning_rate": 4.78513285655751e-06, + "loss": 0.3227, + "step": 26389 + }, + { + "epoch": 0.5282886670169907, + "grad_norm": 1.1442985534667969, + "learning_rate": 4.784808973480754e-06, + "loss": 0.2745, + "step": 26390 + }, + { + "epoch": 0.528308685534119, + "grad_norm": 1.94612455368042, + "learning_rate": 4.784485091308614e-06, + "loss": 0.8227, + "step": 26391 + }, + { + "epoch": 0.5283287040512474, + "grad_norm": 1.0407004356384277, + "learning_rate": 4.78416121004245e-06, + "loss": 0.3181, + "step": 26392 + }, + { + "epoch": 0.5283487225683757, + "grad_norm": 1.1097172498703003, + "learning_rate": 4.7838373296836285e-06, + "loss": 0.2963, + "step": 26393 + }, + { + "epoch": 0.5283687410855041, + "grad_norm": 1.1790751218795776, + "learning_rate": 4.783513450233509e-06, + "loss": 0.3406, + "step": 26394 + }, + { + "epoch": 0.5283887596026324, + "grad_norm": 1.0648034811019897, + "learning_rate": 4.783189571693452e-06, + "loss": 0.265, + "step": 26395 + }, + { + "epoch": 0.5284087781197607, + "grad_norm": 1.0667784214019775, + "learning_rate": 4.782865694064817e-06, + "loss": 0.3079, + "step": 26396 + }, + { + "epoch": 0.5284287966368891, + "grad_norm": 1.0137672424316406, + "learning_rate": 4.78254181734897e-06, + "loss": 0.2848, + "step": 26397 + }, + { + "epoch": 0.5284488151540174, + "grad_norm": 1.2096902132034302, + "learning_rate": 4.782217941547269e-06, + "loss": 0.3259, + "step": 26398 + }, + { + "epoch": 0.5284688336711458, + "grad_norm": 1.0680747032165527, + "learning_rate": 4.781894066661079e-06, + "loss": 0.3031, + "step": 26399 + }, + { + "epoch": 0.5284888521882741, + "grad_norm": 1.111146330833435, + "learning_rate": 4.781570192691759e-06, + "loss": 0.317, + "step": 26400 + }, + { + "epoch": 0.5285088707054025, + "grad_norm": 1.0210062265396118, + "learning_rate": 4.781246319640668e-06, + "loss": 0.3315, + "step": 26401 + }, + { + "epoch": 0.5285288892225308, + "grad_norm": 1.0609320402145386, + "learning_rate": 4.780922447509173e-06, + "loss": 0.3319, + "step": 26402 + }, + { + "epoch": 0.5285489077396592, + "grad_norm": 1.0767077207565308, + "learning_rate": 4.780598576298632e-06, + "loss": 0.2923, + "step": 26403 + }, + { + "epoch": 0.5285689262567875, + "grad_norm": 1.1163339614868164, + "learning_rate": 4.7802747060104085e-06, + "loss": 0.2933, + "step": 26404 + }, + { + "epoch": 0.5285889447739158, + "grad_norm": 1.411014199256897, + "learning_rate": 4.7799508366458626e-06, + "loss": 0.3319, + "step": 26405 + }, + { + "epoch": 0.5286089632910442, + "grad_norm": 1.1642485857009888, + "learning_rate": 4.779626968206354e-06, + "loss": 0.3027, + "step": 26406 + }, + { + "epoch": 0.5286289818081725, + "grad_norm": 1.2570627927780151, + "learning_rate": 4.779303100693248e-06, + "loss": 0.3263, + "step": 26407 + }, + { + "epoch": 0.5286490003253009, + "grad_norm": 1.0909408330917358, + "learning_rate": 4.778979234107905e-06, + "loss": 0.2989, + "step": 26408 + }, + { + "epoch": 0.5286690188424292, + "grad_norm": 1.0336756706237793, + "learning_rate": 4.778655368451685e-06, + "loss": 0.3287, + "step": 26409 + }, + { + "epoch": 0.5286890373595576, + "grad_norm": 1.1029412746429443, + "learning_rate": 4.778331503725949e-06, + "loss": 0.3438, + "step": 26410 + }, + { + "epoch": 0.5287090558766859, + "grad_norm": 1.07566499710083, + "learning_rate": 4.77800763993206e-06, + "loss": 0.3157, + "step": 26411 + }, + { + "epoch": 0.5287290743938142, + "grad_norm": 1.2307904958724976, + "learning_rate": 4.77768377707138e-06, + "loss": 0.3807, + "step": 26412 + }, + { + "epoch": 0.5287490929109426, + "grad_norm": 1.2068593502044678, + "learning_rate": 4.77735991514527e-06, + "loss": 0.3378, + "step": 26413 + }, + { + "epoch": 0.5287691114280709, + "grad_norm": 1.0515129566192627, + "learning_rate": 4.777036054155088e-06, + "loss": 0.3239, + "step": 26414 + }, + { + "epoch": 0.5287891299451993, + "grad_norm": 1.117867112159729, + "learning_rate": 4.7767121941022025e-06, + "loss": 0.3081, + "step": 26415 + }, + { + "epoch": 0.5288091484623276, + "grad_norm": 1.093582272529602, + "learning_rate": 4.776388334987968e-06, + "loss": 0.3081, + "step": 26416 + }, + { + "epoch": 0.528829166979456, + "grad_norm": 2.1505861282348633, + "learning_rate": 4.776064476813751e-06, + "loss": 0.7722, + "step": 26417 + }, + { + "epoch": 0.5288491854965843, + "grad_norm": 1.1203022003173828, + "learning_rate": 4.77574061958091e-06, + "loss": 0.3141, + "step": 26418 + }, + { + "epoch": 0.5288692040137127, + "grad_norm": 1.0914850234985352, + "learning_rate": 4.775416763290806e-06, + "loss": 0.3206, + "step": 26419 + }, + { + "epoch": 0.528889222530841, + "grad_norm": 1.1091954708099365, + "learning_rate": 4.775092907944803e-06, + "loss": 0.3189, + "step": 26420 + }, + { + "epoch": 0.5289092410479693, + "grad_norm": 1.1266173124313354, + "learning_rate": 4.77476905354426e-06, + "loss": 0.2965, + "step": 26421 + }, + { + "epoch": 0.5289292595650977, + "grad_norm": 1.0905245542526245, + "learning_rate": 4.77444520009054e-06, + "loss": 0.3452, + "step": 26422 + }, + { + "epoch": 0.528949278082226, + "grad_norm": 1.1255993843078613, + "learning_rate": 4.774121347585003e-06, + "loss": 0.2938, + "step": 26423 + }, + { + "epoch": 0.5289692965993544, + "grad_norm": 1.2186039686203003, + "learning_rate": 4.773797496029012e-06, + "loss": 0.2754, + "step": 26424 + }, + { + "epoch": 0.5289893151164827, + "grad_norm": 1.1227916479110718, + "learning_rate": 4.773473645423927e-06, + "loss": 0.3591, + "step": 26425 + }, + { + "epoch": 0.5290093336336111, + "grad_norm": 1.1755034923553467, + "learning_rate": 4.773149795771112e-06, + "loss": 0.3497, + "step": 26426 + }, + { + "epoch": 0.5290293521507394, + "grad_norm": 1.0885992050170898, + "learning_rate": 4.7728259470719254e-06, + "loss": 0.2968, + "step": 26427 + }, + { + "epoch": 0.5290493706678677, + "grad_norm": 1.0784891843795776, + "learning_rate": 4.772502099327729e-06, + "loss": 0.315, + "step": 26428 + }, + { + "epoch": 0.5290693891849961, + "grad_norm": 1.3301382064819336, + "learning_rate": 4.772178252539884e-06, + "loss": 0.3374, + "step": 26429 + }, + { + "epoch": 0.5290894077021244, + "grad_norm": 1.1320067644119263, + "learning_rate": 4.771854406709754e-06, + "loss": 0.3367, + "step": 26430 + }, + { + "epoch": 0.5291094262192528, + "grad_norm": 1.1721258163452148, + "learning_rate": 4.7715305618387e-06, + "loss": 0.297, + "step": 26431 + }, + { + "epoch": 0.5291294447363811, + "grad_norm": 1.9532520771026611, + "learning_rate": 4.7712067179280795e-06, + "loss": 0.7639, + "step": 26432 + }, + { + "epoch": 0.5291494632535095, + "grad_norm": 1.308457851409912, + "learning_rate": 4.770882874979259e-06, + "loss": 0.3341, + "step": 26433 + }, + { + "epoch": 0.5291694817706378, + "grad_norm": 1.9585484266281128, + "learning_rate": 4.770559032993597e-06, + "loss": 0.7581, + "step": 26434 + }, + { + "epoch": 0.5291895002877662, + "grad_norm": 1.1046342849731445, + "learning_rate": 4.770235191972455e-06, + "loss": 0.2957, + "step": 26435 + }, + { + "epoch": 0.5292095188048945, + "grad_norm": 1.0853270292282104, + "learning_rate": 4.769911351917196e-06, + "loss": 0.3309, + "step": 26436 + }, + { + "epoch": 0.5292295373220228, + "grad_norm": 1.1617687940597534, + "learning_rate": 4.769587512829177e-06, + "loss": 0.3254, + "step": 26437 + }, + { + "epoch": 0.5292495558391512, + "grad_norm": 1.278117060661316, + "learning_rate": 4.769263674709766e-06, + "loss": 0.3747, + "step": 26438 + }, + { + "epoch": 0.5292695743562795, + "grad_norm": 1.230269432067871, + "learning_rate": 4.7689398375603205e-06, + "loss": 0.3425, + "step": 26439 + }, + { + "epoch": 0.5292895928734079, + "grad_norm": 1.0578936338424683, + "learning_rate": 4.768616001382201e-06, + "loss": 0.3165, + "step": 26440 + }, + { + "epoch": 0.5293096113905362, + "grad_norm": 1.1011754274368286, + "learning_rate": 4.768292166176769e-06, + "loss": 0.3072, + "step": 26441 + }, + { + "epoch": 0.5293296299076646, + "grad_norm": 1.1020022630691528, + "learning_rate": 4.767968331945388e-06, + "loss": 0.3364, + "step": 26442 + }, + { + "epoch": 0.5293496484247929, + "grad_norm": 1.0782890319824219, + "learning_rate": 4.767644498689419e-06, + "loss": 0.2787, + "step": 26443 + }, + { + "epoch": 0.5293696669419212, + "grad_norm": 1.159368634223938, + "learning_rate": 4.767320666410223e-06, + "loss": 0.3049, + "step": 26444 + }, + { + "epoch": 0.5293896854590496, + "grad_norm": 1.1680970191955566, + "learning_rate": 4.766996835109158e-06, + "loss": 0.3242, + "step": 26445 + }, + { + "epoch": 0.5294097039761779, + "grad_norm": 1.0545287132263184, + "learning_rate": 4.76667300478759e-06, + "loss": 0.2897, + "step": 26446 + }, + { + "epoch": 0.5294297224933063, + "grad_norm": 1.244285225868225, + "learning_rate": 4.766349175446879e-06, + "loss": 0.3001, + "step": 26447 + }, + { + "epoch": 0.5294497410104346, + "grad_norm": 1.1208094358444214, + "learning_rate": 4.766025347088386e-06, + "loss": 0.3391, + "step": 26448 + }, + { + "epoch": 0.529469759527563, + "grad_norm": 1.2616230249404907, + "learning_rate": 4.765701519713471e-06, + "loss": 0.3256, + "step": 26449 + }, + { + "epoch": 0.5294897780446913, + "grad_norm": 1.1141010522842407, + "learning_rate": 4.765377693323496e-06, + "loss": 0.291, + "step": 26450 + }, + { + "epoch": 0.5295097965618197, + "grad_norm": 1.0762299299240112, + "learning_rate": 4.7650538679198235e-06, + "loss": 0.3257, + "step": 26451 + }, + { + "epoch": 0.529529815078948, + "grad_norm": 1.0886752605438232, + "learning_rate": 4.7647300435038145e-06, + "loss": 0.3056, + "step": 26452 + }, + { + "epoch": 0.5295498335960763, + "grad_norm": 1.8504977226257324, + "learning_rate": 4.76440622007683e-06, + "loss": 0.7951, + "step": 26453 + }, + { + "epoch": 0.5295698521132047, + "grad_norm": 1.0349197387695312, + "learning_rate": 4.764082397640228e-06, + "loss": 0.3002, + "step": 26454 + }, + { + "epoch": 0.529589870630333, + "grad_norm": 1.1200798749923706, + "learning_rate": 4.7637585761953755e-06, + "loss": 0.3259, + "step": 26455 + }, + { + "epoch": 0.5296098891474614, + "grad_norm": 1.1836737394332886, + "learning_rate": 4.7634347557436305e-06, + "loss": 0.3499, + "step": 26456 + }, + { + "epoch": 0.5296299076645897, + "grad_norm": 1.129333257675171, + "learning_rate": 4.763110936286356e-06, + "loss": 0.2812, + "step": 26457 + }, + { + "epoch": 0.5296499261817181, + "grad_norm": 1.9344760179519653, + "learning_rate": 4.762787117824912e-06, + "loss": 0.8063, + "step": 26458 + }, + { + "epoch": 0.5296699446988464, + "grad_norm": 1.04265558719635, + "learning_rate": 4.762463300360657e-06, + "loss": 0.2687, + "step": 26459 + }, + { + "epoch": 0.5296899632159747, + "grad_norm": 1.1507562398910522, + "learning_rate": 4.762139483894958e-06, + "loss": 0.3088, + "step": 26460 + }, + { + "epoch": 0.5297099817331031, + "grad_norm": 1.339261531829834, + "learning_rate": 4.7618156684291725e-06, + "loss": 0.3057, + "step": 26461 + }, + { + "epoch": 0.5297300002502314, + "grad_norm": 1.2338258028030396, + "learning_rate": 4.761491853964663e-06, + "loss": 0.3025, + "step": 26462 + }, + { + "epoch": 0.5297500187673598, + "grad_norm": 1.1671323776245117, + "learning_rate": 4.761168040502789e-06, + "loss": 0.2979, + "step": 26463 + }, + { + "epoch": 0.5297700372844881, + "grad_norm": 1.166360855102539, + "learning_rate": 4.760844228044915e-06, + "loss": 0.2691, + "step": 26464 + }, + { + "epoch": 0.5297900558016165, + "grad_norm": 1.0901553630828857, + "learning_rate": 4.7605204165924e-06, + "loss": 0.2981, + "step": 26465 + }, + { + "epoch": 0.5298100743187448, + "grad_norm": 1.2020177841186523, + "learning_rate": 4.7601966061466065e-06, + "loss": 0.3584, + "step": 26466 + }, + { + "epoch": 0.5298300928358732, + "grad_norm": 1.092477560043335, + "learning_rate": 4.759872796708893e-06, + "loss": 0.3238, + "step": 26467 + }, + { + "epoch": 0.5298501113530015, + "grad_norm": 1.1248106956481934, + "learning_rate": 4.759548988280622e-06, + "loss": 0.2847, + "step": 26468 + }, + { + "epoch": 0.5298701298701298, + "grad_norm": 1.0191775560379028, + "learning_rate": 4.7592251808631575e-06, + "loss": 0.2764, + "step": 26469 + }, + { + "epoch": 0.5298901483872582, + "grad_norm": 1.0612579584121704, + "learning_rate": 4.758901374457858e-06, + "loss": 0.3053, + "step": 26470 + }, + { + "epoch": 0.5299101669043865, + "grad_norm": 1.0290699005126953, + "learning_rate": 4.758577569066085e-06, + "loss": 0.2729, + "step": 26471 + }, + { + "epoch": 0.5299301854215149, + "grad_norm": 1.0880545377731323, + "learning_rate": 4.758253764689199e-06, + "loss": 0.3448, + "step": 26472 + }, + { + "epoch": 0.5299502039386432, + "grad_norm": 1.020753264427185, + "learning_rate": 4.757929961328564e-06, + "loss": 0.3588, + "step": 26473 + }, + { + "epoch": 0.5299702224557716, + "grad_norm": 1.0538980960845947, + "learning_rate": 4.757606158985538e-06, + "loss": 0.2954, + "step": 26474 + }, + { + "epoch": 0.5299902409728999, + "grad_norm": 1.2436230182647705, + "learning_rate": 4.757282357661485e-06, + "loss": 0.2933, + "step": 26475 + }, + { + "epoch": 0.5300102594900282, + "grad_norm": 1.0655368566513062, + "learning_rate": 4.756958557357761e-06, + "loss": 0.2709, + "step": 26476 + }, + { + "epoch": 0.5300302780071566, + "grad_norm": 1.0145906209945679, + "learning_rate": 4.756634758075735e-06, + "loss": 0.2879, + "step": 26477 + }, + { + "epoch": 0.5300502965242849, + "grad_norm": 1.1909639835357666, + "learning_rate": 4.756310959816763e-06, + "loss": 0.2636, + "step": 26478 + }, + { + "epoch": 0.5300703150414133, + "grad_norm": 1.0417213439941406, + "learning_rate": 4.755987162582208e-06, + "loss": 0.3091, + "step": 26479 + }, + { + "epoch": 0.5300903335585416, + "grad_norm": 1.2186660766601562, + "learning_rate": 4.755663366373428e-06, + "loss": 0.3238, + "step": 26480 + }, + { + "epoch": 0.53011035207567, + "grad_norm": 1.2141578197479248, + "learning_rate": 4.755339571191787e-06, + "loss": 0.3254, + "step": 26481 + }, + { + "epoch": 0.5301303705927983, + "grad_norm": 1.783867359161377, + "learning_rate": 4.755015777038648e-06, + "loss": 0.758, + "step": 26482 + }, + { + "epoch": 0.5301503891099267, + "grad_norm": 1.889365315437317, + "learning_rate": 4.75469198391537e-06, + "loss": 0.7987, + "step": 26483 + }, + { + "epoch": 0.530170407627055, + "grad_norm": 1.0800154209136963, + "learning_rate": 4.754368191823313e-06, + "loss": 0.3024, + "step": 26484 + }, + { + "epoch": 0.5301904261441833, + "grad_norm": 1.124732255935669, + "learning_rate": 4.754044400763837e-06, + "loss": 0.2892, + "step": 26485 + }, + { + "epoch": 0.5302104446613117, + "grad_norm": 1.1930969953536987, + "learning_rate": 4.7537206107383086e-06, + "loss": 0.3246, + "step": 26486 + }, + { + "epoch": 0.53023046317844, + "grad_norm": 1.0195213556289673, + "learning_rate": 4.7533968217480846e-06, + "loss": 0.2797, + "step": 26487 + }, + { + "epoch": 0.5302504816955684, + "grad_norm": 1.0182347297668457, + "learning_rate": 4.753073033794527e-06, + "loss": 0.3155, + "step": 26488 + }, + { + "epoch": 0.5302705002126967, + "grad_norm": 1.4536000490188599, + "learning_rate": 4.752749246878999e-06, + "loss": 0.2769, + "step": 26489 + }, + { + "epoch": 0.5302905187298251, + "grad_norm": 1.0971553325653076, + "learning_rate": 4.7524254610028566e-06, + "loss": 0.2779, + "step": 26490 + }, + { + "epoch": 0.5303105372469534, + "grad_norm": 1.0139375925064087, + "learning_rate": 4.752101676167467e-06, + "loss": 0.3162, + "step": 26491 + }, + { + "epoch": 0.5303305557640817, + "grad_norm": 1.7364310026168823, + "learning_rate": 4.7517778923741885e-06, + "loss": 0.7748, + "step": 26492 + }, + { + "epoch": 0.5303505742812101, + "grad_norm": 1.0641850233078003, + "learning_rate": 4.7514541096243815e-06, + "loss": 0.2499, + "step": 26493 + }, + { + "epoch": 0.5303705927983384, + "grad_norm": 1.0706679821014404, + "learning_rate": 4.7511303279194065e-06, + "loss": 0.2963, + "step": 26494 + }, + { + "epoch": 0.5303906113154668, + "grad_norm": 1.2928229570388794, + "learning_rate": 4.750806547260628e-06, + "loss": 0.297, + "step": 26495 + }, + { + "epoch": 0.5304106298325951, + "grad_norm": 1.1409679651260376, + "learning_rate": 4.750482767649406e-06, + "loss": 0.287, + "step": 26496 + }, + { + "epoch": 0.5304306483497235, + "grad_norm": 1.1124529838562012, + "learning_rate": 4.7501589890871005e-06, + "loss": 0.3085, + "step": 26497 + }, + { + "epoch": 0.5304506668668518, + "grad_norm": 1.1707998514175415, + "learning_rate": 4.749835211575072e-06, + "loss": 0.3189, + "step": 26498 + }, + { + "epoch": 0.5304706853839802, + "grad_norm": 1.142926573753357, + "learning_rate": 4.749511435114681e-06, + "loss": 0.3275, + "step": 26499 + }, + { + "epoch": 0.5304907039011085, + "grad_norm": 2.262974739074707, + "learning_rate": 4.749187659707291e-06, + "loss": 0.7255, + "step": 26500 + }, + { + "epoch": 0.5305107224182368, + "grad_norm": 1.1694847345352173, + "learning_rate": 4.748863885354264e-06, + "loss": 0.3434, + "step": 26501 + }, + { + "epoch": 0.5305307409353652, + "grad_norm": 1.1409285068511963, + "learning_rate": 4.748540112056957e-06, + "loss": 0.2847, + "step": 26502 + }, + { + "epoch": 0.5305507594524935, + "grad_norm": 1.1814371347427368, + "learning_rate": 4.748216339816733e-06, + "loss": 0.3292, + "step": 26503 + }, + { + "epoch": 0.5305707779696219, + "grad_norm": 1.9058493375778198, + "learning_rate": 4.7478925686349546e-06, + "loss": 0.7625, + "step": 26504 + }, + { + "epoch": 0.5305907964867502, + "grad_norm": 1.245153784751892, + "learning_rate": 4.747568798512982e-06, + "loss": 0.2792, + "step": 26505 + }, + { + "epoch": 0.5306108150038786, + "grad_norm": 1.1300283670425415, + "learning_rate": 4.747245029452175e-06, + "loss": 0.3284, + "step": 26506 + }, + { + "epoch": 0.5306308335210069, + "grad_norm": 1.0574156045913696, + "learning_rate": 4.746921261453894e-06, + "loss": 0.3273, + "step": 26507 + }, + { + "epoch": 0.5306508520381352, + "grad_norm": 1.209949254989624, + "learning_rate": 4.746597494519504e-06, + "loss": 0.3702, + "step": 26508 + }, + { + "epoch": 0.5306708705552636, + "grad_norm": 1.1995702981948853, + "learning_rate": 4.746273728650363e-06, + "loss": 0.2867, + "step": 26509 + }, + { + "epoch": 0.5306908890723919, + "grad_norm": 1.1090576648712158, + "learning_rate": 4.745949963847833e-06, + "loss": 0.2954, + "step": 26510 + }, + { + "epoch": 0.5307109075895203, + "grad_norm": 1.1796941757202148, + "learning_rate": 4.745626200113274e-06, + "loss": 0.3056, + "step": 26511 + }, + { + "epoch": 0.5307309261066486, + "grad_norm": 1.119905948638916, + "learning_rate": 4.745302437448047e-06, + "loss": 0.3063, + "step": 26512 + }, + { + "epoch": 0.530750944623777, + "grad_norm": 1.125356912612915, + "learning_rate": 4.744978675853514e-06, + "loss": 0.3162, + "step": 26513 + }, + { + "epoch": 0.5307709631409053, + "grad_norm": 1.2344225645065308, + "learning_rate": 4.744654915331037e-06, + "loss": 0.3537, + "step": 26514 + }, + { + "epoch": 0.5307909816580337, + "grad_norm": 1.8666470050811768, + "learning_rate": 4.744331155881976e-06, + "loss": 0.8735, + "step": 26515 + }, + { + "epoch": 0.530811000175162, + "grad_norm": 1.1701934337615967, + "learning_rate": 4.744007397507689e-06, + "loss": 0.3139, + "step": 26516 + }, + { + "epoch": 0.5308310186922903, + "grad_norm": 1.0606789588928223, + "learning_rate": 4.743683640209542e-06, + "loss": 0.312, + "step": 26517 + }, + { + "epoch": 0.5308510372094187, + "grad_norm": 1.1152310371398926, + "learning_rate": 4.7433598839888945e-06, + "loss": 0.3074, + "step": 26518 + }, + { + "epoch": 0.530871055726547, + "grad_norm": 1.1363110542297363, + "learning_rate": 4.743036128847105e-06, + "loss": 0.299, + "step": 26519 + }, + { + "epoch": 0.5308910742436754, + "grad_norm": 1.2341750860214233, + "learning_rate": 4.742712374785538e-06, + "loss": 0.3267, + "step": 26520 + }, + { + "epoch": 0.5309110927608037, + "grad_norm": 1.0306092500686646, + "learning_rate": 4.74238862180555e-06, + "loss": 0.3023, + "step": 26521 + }, + { + "epoch": 0.5309311112779321, + "grad_norm": 1.0548157691955566, + "learning_rate": 4.7420648699085074e-06, + "loss": 0.3895, + "step": 26522 + }, + { + "epoch": 0.5309511297950604, + "grad_norm": 1.1235294342041016, + "learning_rate": 4.741741119095769e-06, + "loss": 0.3235, + "step": 26523 + }, + { + "epoch": 0.5309711483121887, + "grad_norm": 1.9535049200057983, + "learning_rate": 4.741417369368694e-06, + "loss": 0.7856, + "step": 26524 + }, + { + "epoch": 0.5309911668293171, + "grad_norm": 1.1667536497116089, + "learning_rate": 4.741093620728644e-06, + "loss": 0.2677, + "step": 26525 + }, + { + "epoch": 0.5310111853464454, + "grad_norm": 2.001880168914795, + "learning_rate": 4.7407698731769815e-06, + "loss": 0.8292, + "step": 26526 + }, + { + "epoch": 0.5310312038635738, + "grad_norm": 1.2291243076324463, + "learning_rate": 4.740446126715068e-06, + "loss": 0.315, + "step": 26527 + }, + { + "epoch": 0.5310512223807021, + "grad_norm": 0.9826939702033997, + "learning_rate": 4.740122381344262e-06, + "loss": 0.2952, + "step": 26528 + }, + { + "epoch": 0.5310712408978305, + "grad_norm": 1.0273185968399048, + "learning_rate": 4.7397986370659245e-06, + "loss": 0.3226, + "step": 26529 + }, + { + "epoch": 0.5310912594149588, + "grad_norm": 1.107029676437378, + "learning_rate": 4.739474893881419e-06, + "loss": 0.3207, + "step": 26530 + }, + { + "epoch": 0.5311112779320872, + "grad_norm": 1.0671886205673218, + "learning_rate": 4.739151151792105e-06, + "loss": 0.279, + "step": 26531 + }, + { + "epoch": 0.5311312964492155, + "grad_norm": 1.898766279220581, + "learning_rate": 4.738827410799343e-06, + "loss": 0.8315, + "step": 26532 + }, + { + "epoch": 0.5311513149663438, + "grad_norm": 1.0943835973739624, + "learning_rate": 4.738503670904494e-06, + "loss": 0.3232, + "step": 26533 + }, + { + "epoch": 0.5311713334834722, + "grad_norm": 1.0151326656341553, + "learning_rate": 4.7381799321089185e-06, + "loss": 0.2494, + "step": 26534 + }, + { + "epoch": 0.5311913520006005, + "grad_norm": 1.9691256284713745, + "learning_rate": 4.737856194413981e-06, + "loss": 0.7156, + "step": 26535 + }, + { + "epoch": 0.5312113705177289, + "grad_norm": 1.0327986478805542, + "learning_rate": 4.737532457821038e-06, + "loss": 0.2615, + "step": 26536 + }, + { + "epoch": 0.5312313890348572, + "grad_norm": 1.1525044441223145, + "learning_rate": 4.737208722331452e-06, + "loss": 0.3562, + "step": 26537 + }, + { + "epoch": 0.5312514075519856, + "grad_norm": 1.056929111480713, + "learning_rate": 4.736884987946583e-06, + "loss": 0.2957, + "step": 26538 + }, + { + "epoch": 0.5312714260691139, + "grad_norm": 1.171927571296692, + "learning_rate": 4.736561254667794e-06, + "loss": 0.287, + "step": 26539 + }, + { + "epoch": 0.5312914445862422, + "grad_norm": 1.0952197313308716, + "learning_rate": 4.736237522496445e-06, + "loss": 0.3138, + "step": 26540 + }, + { + "epoch": 0.5313114631033706, + "grad_norm": 1.1039187908172607, + "learning_rate": 4.735913791433897e-06, + "loss": 0.3, + "step": 26541 + }, + { + "epoch": 0.5313314816204989, + "grad_norm": 1.1154859066009521, + "learning_rate": 4.7355900614815106e-06, + "loss": 0.2509, + "step": 26542 + }, + { + "epoch": 0.5313515001376273, + "grad_norm": 1.662057876586914, + "learning_rate": 4.735266332640645e-06, + "loss": 0.2893, + "step": 26543 + }, + { + "epoch": 0.5313715186547556, + "grad_norm": 0.9580294489860535, + "learning_rate": 4.734942604912665e-06, + "loss": 0.2896, + "step": 26544 + }, + { + "epoch": 0.531391537171884, + "grad_norm": 1.9839035272598267, + "learning_rate": 4.734618878298927e-06, + "loss": 0.8392, + "step": 26545 + }, + { + "epoch": 0.5314115556890123, + "grad_norm": 1.1242520809173584, + "learning_rate": 4.734295152800796e-06, + "loss": 0.2954, + "step": 26546 + }, + { + "epoch": 0.5314315742061407, + "grad_norm": 1.0528687238693237, + "learning_rate": 4.733971428419629e-06, + "loss": 0.3318, + "step": 26547 + }, + { + "epoch": 0.531451592723269, + "grad_norm": 1.0408062934875488, + "learning_rate": 4.7336477051567905e-06, + "loss": 0.2626, + "step": 26548 + }, + { + "epoch": 0.5314716112403973, + "grad_norm": 1.1109687089920044, + "learning_rate": 4.73332398301364e-06, + "loss": 0.3012, + "step": 26549 + }, + { + "epoch": 0.5314916297575257, + "grad_norm": 0.9417381286621094, + "learning_rate": 4.733000261991537e-06, + "loss": 0.2491, + "step": 26550 + }, + { + "epoch": 0.531511648274654, + "grad_norm": 1.074575662612915, + "learning_rate": 4.732676542091845e-06, + "loss": 0.3225, + "step": 26551 + }, + { + "epoch": 0.5315316667917824, + "grad_norm": 1.0964186191558838, + "learning_rate": 4.73235282331592e-06, + "loss": 0.3565, + "step": 26552 + }, + { + "epoch": 0.5315516853089107, + "grad_norm": 1.0995038747787476, + "learning_rate": 4.732029105665129e-06, + "loss": 0.3377, + "step": 26553 + }, + { + "epoch": 0.5315717038260391, + "grad_norm": 1.1361210346221924, + "learning_rate": 4.73170538914083e-06, + "loss": 0.3002, + "step": 26554 + }, + { + "epoch": 0.5315917223431674, + "grad_norm": 1.0473699569702148, + "learning_rate": 4.731381673744384e-06, + "loss": 0.2854, + "step": 26555 + }, + { + "epoch": 0.5316117408602957, + "grad_norm": 1.1113001108169556, + "learning_rate": 4.731057959477149e-06, + "loss": 0.3058, + "step": 26556 + }, + { + "epoch": 0.5316317593774241, + "grad_norm": 1.1938872337341309, + "learning_rate": 4.7307342463404905e-06, + "loss": 0.2927, + "step": 26557 + }, + { + "epoch": 0.5316517778945524, + "grad_norm": 1.0030075311660767, + "learning_rate": 4.730410534335766e-06, + "loss": 0.3137, + "step": 26558 + }, + { + "epoch": 0.5316717964116808, + "grad_norm": 1.0246189832687378, + "learning_rate": 4.73008682346434e-06, + "loss": 0.2906, + "step": 26559 + }, + { + "epoch": 0.5316918149288091, + "grad_norm": 1.0034527778625488, + "learning_rate": 4.729763113727568e-06, + "loss": 0.2932, + "step": 26560 + }, + { + "epoch": 0.5317118334459375, + "grad_norm": 1.0738365650177002, + "learning_rate": 4.729439405126816e-06, + "loss": 0.285, + "step": 26561 + }, + { + "epoch": 0.5317318519630658, + "grad_norm": 1.074989676475525, + "learning_rate": 4.729115697663442e-06, + "loss": 0.3051, + "step": 26562 + }, + { + "epoch": 0.5317518704801942, + "grad_norm": 1.0296518802642822, + "learning_rate": 4.728791991338806e-06, + "loss": 0.2934, + "step": 26563 + }, + { + "epoch": 0.5317718889973225, + "grad_norm": 1.0838303565979004, + "learning_rate": 4.728468286154272e-06, + "loss": 0.308, + "step": 26564 + }, + { + "epoch": 0.5317919075144508, + "grad_norm": 1.3856192827224731, + "learning_rate": 4.728144582111197e-06, + "loss": 0.3657, + "step": 26565 + }, + { + "epoch": 0.5318119260315792, + "grad_norm": 1.1148847341537476, + "learning_rate": 4.727820879210945e-06, + "loss": 0.3596, + "step": 26566 + }, + { + "epoch": 0.5318319445487075, + "grad_norm": 1.2233072519302368, + "learning_rate": 4.727497177454876e-06, + "loss": 0.3149, + "step": 26567 + }, + { + "epoch": 0.5318519630658359, + "grad_norm": 1.196194052696228, + "learning_rate": 4.72717347684435e-06, + "loss": 0.3342, + "step": 26568 + }, + { + "epoch": 0.5318719815829642, + "grad_norm": 1.217758297920227, + "learning_rate": 4.726849777380725e-06, + "loss": 0.3488, + "step": 26569 + }, + { + "epoch": 0.5318920001000926, + "grad_norm": 2.040147066116333, + "learning_rate": 4.726526079065368e-06, + "loss": 0.8175, + "step": 26570 + }, + { + "epoch": 0.5319120186172209, + "grad_norm": 1.0976929664611816, + "learning_rate": 4.726202381899635e-06, + "loss": 0.3227, + "step": 26571 + }, + { + "epoch": 0.5319320371343492, + "grad_norm": 1.0933865308761597, + "learning_rate": 4.72587868588489e-06, + "loss": 0.3239, + "step": 26572 + }, + { + "epoch": 0.5319520556514776, + "grad_norm": 1.1422619819641113, + "learning_rate": 4.725554991022491e-06, + "loss": 0.3135, + "step": 26573 + }, + { + "epoch": 0.5319720741686059, + "grad_norm": 1.2282068729400635, + "learning_rate": 4.725231297313798e-06, + "loss": 0.3188, + "step": 26574 + }, + { + "epoch": 0.5319920926857343, + "grad_norm": 1.0246021747589111, + "learning_rate": 4.724907604760176e-06, + "loss": 0.3012, + "step": 26575 + }, + { + "epoch": 0.5320121112028626, + "grad_norm": 1.153442621231079, + "learning_rate": 4.724583913362982e-06, + "loss": 0.3078, + "step": 26576 + }, + { + "epoch": 0.532032129719991, + "grad_norm": 1.1187632083892822, + "learning_rate": 4.724260223123579e-06, + "loss": 0.2993, + "step": 26577 + }, + { + "epoch": 0.5320521482371193, + "grad_norm": 0.9868220686912537, + "learning_rate": 4.7239365340433244e-06, + "loss": 0.2818, + "step": 26578 + }, + { + "epoch": 0.5320721667542477, + "grad_norm": 1.186301589012146, + "learning_rate": 4.723612846123584e-06, + "loss": 0.2908, + "step": 26579 + }, + { + "epoch": 0.532092185271376, + "grad_norm": 1.1441439390182495, + "learning_rate": 4.7232891593657144e-06, + "loss": 0.3309, + "step": 26580 + }, + { + "epoch": 0.5321122037885043, + "grad_norm": 1.1451189517974854, + "learning_rate": 4.722965473771079e-06, + "loss": 0.3169, + "step": 26581 + }, + { + "epoch": 0.5321322223056327, + "grad_norm": 1.1262855529785156, + "learning_rate": 4.722641789341035e-06, + "loss": 0.2556, + "step": 26582 + }, + { + "epoch": 0.532152240822761, + "grad_norm": 1.084195613861084, + "learning_rate": 4.722318106076946e-06, + "loss": 0.3145, + "step": 26583 + }, + { + "epoch": 0.5321722593398894, + "grad_norm": 1.3978960514068604, + "learning_rate": 4.721994423980171e-06, + "loss": 0.3215, + "step": 26584 + }, + { + "epoch": 0.5321922778570177, + "grad_norm": 1.1336697340011597, + "learning_rate": 4.721670743052073e-06, + "loss": 0.2763, + "step": 26585 + }, + { + "epoch": 0.5322122963741461, + "grad_norm": 1.0810399055480957, + "learning_rate": 4.721347063294012e-06, + "loss": 0.3114, + "step": 26586 + }, + { + "epoch": 0.5322323148912744, + "grad_norm": 1.2496658563613892, + "learning_rate": 4.721023384707346e-06, + "loss": 0.3195, + "step": 26587 + }, + { + "epoch": 0.5322523334084027, + "grad_norm": 1.8671795129776, + "learning_rate": 4.720699707293439e-06, + "loss": 0.8125, + "step": 26588 + }, + { + "epoch": 0.5322723519255311, + "grad_norm": 1.1684693098068237, + "learning_rate": 4.720376031053649e-06, + "loss": 0.3435, + "step": 26589 + }, + { + "epoch": 0.5322923704426594, + "grad_norm": 1.1395747661590576, + "learning_rate": 4.7200523559893396e-06, + "loss": 0.307, + "step": 26590 + }, + { + "epoch": 0.5323123889597878, + "grad_norm": 1.079932689666748, + "learning_rate": 4.719728682101867e-06, + "loss": 0.2982, + "step": 26591 + }, + { + "epoch": 0.5323324074769161, + "grad_norm": 1.1210503578186035, + "learning_rate": 4.719405009392598e-06, + "loss": 0.307, + "step": 26592 + }, + { + "epoch": 0.5323524259940445, + "grad_norm": 1.1305190324783325, + "learning_rate": 4.719081337862889e-06, + "loss": 0.3719, + "step": 26593 + }, + { + "epoch": 0.5323724445111728, + "grad_norm": 1.069693922996521, + "learning_rate": 4.718757667514102e-06, + "loss": 0.3074, + "step": 26594 + }, + { + "epoch": 0.5323924630283012, + "grad_norm": 1.1787669658660889, + "learning_rate": 4.718433998347597e-06, + "loss": 0.2811, + "step": 26595 + }, + { + "epoch": 0.5324124815454295, + "grad_norm": 1.048071026802063, + "learning_rate": 4.718110330364734e-06, + "loss": 0.2991, + "step": 26596 + }, + { + "epoch": 0.5324325000625578, + "grad_norm": 1.9252204895019531, + "learning_rate": 4.717786663566874e-06, + "loss": 0.7164, + "step": 26597 + }, + { + "epoch": 0.5324525185796862, + "grad_norm": 1.162175178527832, + "learning_rate": 4.71746299795538e-06, + "loss": 0.3239, + "step": 26598 + }, + { + "epoch": 0.5324725370968145, + "grad_norm": 1.0709131956100464, + "learning_rate": 4.717139333531611e-06, + "loss": 0.3465, + "step": 26599 + }, + { + "epoch": 0.5324925556139429, + "grad_norm": 1.1764880418777466, + "learning_rate": 4.716815670296925e-06, + "loss": 0.3284, + "step": 26600 + }, + { + "epoch": 0.5325125741310712, + "grad_norm": 1.0935306549072266, + "learning_rate": 4.716492008252686e-06, + "loss": 0.3047, + "step": 26601 + }, + { + "epoch": 0.5325325926481996, + "grad_norm": 1.259009599685669, + "learning_rate": 4.716168347400253e-06, + "loss": 0.3589, + "step": 26602 + }, + { + "epoch": 0.5325526111653279, + "grad_norm": 1.115370512008667, + "learning_rate": 4.715844687740989e-06, + "loss": 0.3079, + "step": 26603 + }, + { + "epoch": 0.5325726296824562, + "grad_norm": 1.053696870803833, + "learning_rate": 4.715521029276253e-06, + "loss": 0.2599, + "step": 26604 + }, + { + "epoch": 0.5325926481995846, + "grad_norm": 3.176379442214966, + "learning_rate": 4.715197372007403e-06, + "loss": 0.3375, + "step": 26605 + }, + { + "epoch": 0.5326126667167129, + "grad_norm": 1.097914457321167, + "learning_rate": 4.714873715935803e-06, + "loss": 0.2987, + "step": 26606 + }, + { + "epoch": 0.5326326852338413, + "grad_norm": 1.091460943222046, + "learning_rate": 4.714550061062814e-06, + "loss": 0.3427, + "step": 26607 + }, + { + "epoch": 0.5326527037509696, + "grad_norm": 1.2398860454559326, + "learning_rate": 4.714226407389792e-06, + "loss": 0.3108, + "step": 26608 + }, + { + "epoch": 0.532672722268098, + "grad_norm": 1.0506190061569214, + "learning_rate": 4.713902754918103e-06, + "loss": 0.3308, + "step": 26609 + }, + { + "epoch": 0.5326927407852263, + "grad_norm": 1.1145495176315308, + "learning_rate": 4.713579103649104e-06, + "loss": 0.3595, + "step": 26610 + }, + { + "epoch": 0.5327127593023547, + "grad_norm": 1.432450532913208, + "learning_rate": 4.713255453584158e-06, + "loss": 0.2972, + "step": 26611 + }, + { + "epoch": 0.532732777819483, + "grad_norm": 1.4837257862091064, + "learning_rate": 4.712931804724625e-06, + "loss": 0.3318, + "step": 26612 + }, + { + "epoch": 0.5327527963366113, + "grad_norm": 1.1310956478118896, + "learning_rate": 4.712608157071864e-06, + "loss": 0.3133, + "step": 26613 + }, + { + "epoch": 0.5327728148537397, + "grad_norm": 1.037867546081543, + "learning_rate": 4.712284510627234e-06, + "loss": 0.2979, + "step": 26614 + }, + { + "epoch": 0.532792833370868, + "grad_norm": 1.148887276649475, + "learning_rate": 4.711960865392101e-06, + "loss": 0.2994, + "step": 26615 + }, + { + "epoch": 0.5328128518879964, + "grad_norm": 1.2052576541900635, + "learning_rate": 4.7116372213678225e-06, + "loss": 0.3015, + "step": 26616 + }, + { + "epoch": 0.5328328704051247, + "grad_norm": 1.3064101934432983, + "learning_rate": 4.711313578555759e-06, + "loss": 0.3227, + "step": 26617 + }, + { + "epoch": 0.5328528889222531, + "grad_norm": 1.2264130115509033, + "learning_rate": 4.7109899369572686e-06, + "loss": 0.3082, + "step": 26618 + }, + { + "epoch": 0.5328729074393814, + "grad_norm": 2.013062000274658, + "learning_rate": 4.710666296573717e-06, + "loss": 0.7013, + "step": 26619 + }, + { + "epoch": 0.5328929259565097, + "grad_norm": 1.0726264715194702, + "learning_rate": 4.710342657406461e-06, + "loss": 0.2795, + "step": 26620 + }, + { + "epoch": 0.5329129444736381, + "grad_norm": 2.001960277557373, + "learning_rate": 4.710019019456863e-06, + "loss": 0.7739, + "step": 26621 + }, + { + "epoch": 0.5329329629907664, + "grad_norm": 1.1469473838806152, + "learning_rate": 4.709695382726281e-06, + "loss": 0.3326, + "step": 26622 + }, + { + "epoch": 0.5329529815078948, + "grad_norm": 1.0534231662750244, + "learning_rate": 4.7093717472160775e-06, + "loss": 0.2913, + "step": 26623 + }, + { + "epoch": 0.5329730000250231, + "grad_norm": 1.0547045469284058, + "learning_rate": 4.709048112927614e-06, + "loss": 0.3044, + "step": 26624 + }, + { + "epoch": 0.5329930185421515, + "grad_norm": 1.149902105331421, + "learning_rate": 4.70872447986225e-06, + "loss": 0.2967, + "step": 26625 + }, + { + "epoch": 0.5330130370592798, + "grad_norm": 1.3278018236160278, + "learning_rate": 4.708400848021345e-06, + "loss": 0.3263, + "step": 26626 + }, + { + "epoch": 0.5330330555764082, + "grad_norm": 1.0840539932250977, + "learning_rate": 4.708077217406258e-06, + "loss": 0.3057, + "step": 26627 + }, + { + "epoch": 0.5330530740935365, + "grad_norm": 1.1017085313796997, + "learning_rate": 4.7077535880183525e-06, + "loss": 0.3326, + "step": 26628 + }, + { + "epoch": 0.5330730926106648, + "grad_norm": 1.0688318014144897, + "learning_rate": 4.707429959858989e-06, + "loss": 0.3667, + "step": 26629 + }, + { + "epoch": 0.5330931111277932, + "grad_norm": 1.046518325805664, + "learning_rate": 4.707106332929527e-06, + "loss": 0.3346, + "step": 26630 + }, + { + "epoch": 0.5331131296449215, + "grad_norm": 1.1594371795654297, + "learning_rate": 4.706782707231325e-06, + "loss": 0.3135, + "step": 26631 + }, + { + "epoch": 0.5331331481620499, + "grad_norm": 1.0357104539871216, + "learning_rate": 4.706459082765748e-06, + "loss": 0.344, + "step": 26632 + }, + { + "epoch": 0.5331531666791782, + "grad_norm": 1.1095255613327026, + "learning_rate": 4.706135459534154e-06, + "loss": 0.2642, + "step": 26633 + }, + { + "epoch": 0.5331731851963066, + "grad_norm": 1.0929577350616455, + "learning_rate": 4.705811837537901e-06, + "loss": 0.3045, + "step": 26634 + }, + { + "epoch": 0.5331932037134349, + "grad_norm": 1.061629056930542, + "learning_rate": 4.7054882167783535e-06, + "loss": 0.3203, + "step": 26635 + }, + { + "epoch": 0.5332132222305632, + "grad_norm": 1.0352858304977417, + "learning_rate": 4.7051645972568675e-06, + "loss": 0.3169, + "step": 26636 + }, + { + "epoch": 0.5332332407476916, + "grad_norm": 0.9377962946891785, + "learning_rate": 4.704840978974809e-06, + "loss": 0.3029, + "step": 26637 + }, + { + "epoch": 0.5332532592648199, + "grad_norm": 1.1984083652496338, + "learning_rate": 4.7045173619335356e-06, + "loss": 0.3452, + "step": 26638 + }, + { + "epoch": 0.5332732777819483, + "grad_norm": 1.217288613319397, + "learning_rate": 4.704193746134407e-06, + "loss": 0.3082, + "step": 26639 + }, + { + "epoch": 0.5332932962990766, + "grad_norm": 1.8517261743545532, + "learning_rate": 4.703870131578782e-06, + "loss": 0.7759, + "step": 26640 + }, + { + "epoch": 0.533313314816205, + "grad_norm": 1.914476990699768, + "learning_rate": 4.7035465182680244e-06, + "loss": 0.7919, + "step": 26641 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.886304497718811, + "learning_rate": 4.7032229062034954e-06, + "loss": 0.8272, + "step": 26642 + }, + { + "epoch": 0.5333533518504617, + "grad_norm": 1.085708498954773, + "learning_rate": 4.702899295386553e-06, + "loss": 0.3392, + "step": 26643 + }, + { + "epoch": 0.53337337036759, + "grad_norm": 1.1731828451156616, + "learning_rate": 4.702575685818558e-06, + "loss": 0.2925, + "step": 26644 + }, + { + "epoch": 0.5333933888847183, + "grad_norm": 1.0650885105133057, + "learning_rate": 4.702252077500868e-06, + "loss": 0.2962, + "step": 26645 + }, + { + "epoch": 0.5334134074018467, + "grad_norm": 1.8360273838043213, + "learning_rate": 4.701928470434848e-06, + "loss": 0.822, + "step": 26646 + }, + { + "epoch": 0.533433425918975, + "grad_norm": 1.1518950462341309, + "learning_rate": 4.701604864621857e-06, + "loss": 0.3614, + "step": 26647 + }, + { + "epoch": 0.5334534444361034, + "grad_norm": 1.8789068460464478, + "learning_rate": 4.701281260063255e-06, + "loss": 0.7724, + "step": 26648 + }, + { + "epoch": 0.5334734629532317, + "grad_norm": 1.1473004817962646, + "learning_rate": 4.7009576567604e-06, + "loss": 0.3055, + "step": 26649 + }, + { + "epoch": 0.5334934814703601, + "grad_norm": 1.8093112707138062, + "learning_rate": 4.700634054714658e-06, + "loss": 0.7765, + "step": 26650 + }, + { + "epoch": 0.5335134999874884, + "grad_norm": 1.2163619995117188, + "learning_rate": 4.7003104539273844e-06, + "loss": 0.3229, + "step": 26651 + }, + { + "epoch": 0.5335335185046167, + "grad_norm": 1.8073198795318604, + "learning_rate": 4.6999868543999426e-06, + "loss": 0.8513, + "step": 26652 + }, + { + "epoch": 0.5335535370217451, + "grad_norm": 1.068933129310608, + "learning_rate": 4.699663256133689e-06, + "loss": 0.2969, + "step": 26653 + }, + { + "epoch": 0.5335735555388734, + "grad_norm": 1.3500016927719116, + "learning_rate": 4.6993396591299865e-06, + "loss": 0.3348, + "step": 26654 + }, + { + "epoch": 0.5335935740560018, + "grad_norm": 1.431366205215454, + "learning_rate": 4.699016063390197e-06, + "loss": 0.3035, + "step": 26655 + }, + { + "epoch": 0.5336135925731301, + "grad_norm": 1.0289034843444824, + "learning_rate": 4.69869246891568e-06, + "loss": 0.3062, + "step": 26656 + }, + { + "epoch": 0.5336336110902585, + "grad_norm": 1.1647216081619263, + "learning_rate": 4.698368875707795e-06, + "loss": 0.3196, + "step": 26657 + }, + { + "epoch": 0.5336536296073868, + "grad_norm": 1.0922231674194336, + "learning_rate": 4.6980452837678995e-06, + "loss": 0.2837, + "step": 26658 + }, + { + "epoch": 0.5336736481245152, + "grad_norm": 1.2179913520812988, + "learning_rate": 4.6977216930973586e-06, + "loss": 0.2883, + "step": 26659 + }, + { + "epoch": 0.5336936666416435, + "grad_norm": 1.210192322731018, + "learning_rate": 4.69739810369753e-06, + "loss": 0.3186, + "step": 26660 + }, + { + "epoch": 0.5337136851587718, + "grad_norm": 1.270298957824707, + "learning_rate": 4.697074515569776e-06, + "loss": 0.3191, + "step": 26661 + }, + { + "epoch": 0.5337337036759002, + "grad_norm": 1.0844690799713135, + "learning_rate": 4.696750928715452e-06, + "loss": 0.3124, + "step": 26662 + }, + { + "epoch": 0.5337537221930285, + "grad_norm": 1.2012224197387695, + "learning_rate": 4.696427343135925e-06, + "loss": 0.2967, + "step": 26663 + }, + { + "epoch": 0.5337737407101569, + "grad_norm": 1.0886001586914062, + "learning_rate": 4.696103758832552e-06, + "loss": 0.2766, + "step": 26664 + }, + { + "epoch": 0.5337937592272852, + "grad_norm": 1.1350207328796387, + "learning_rate": 4.695780175806694e-06, + "loss": 0.319, + "step": 26665 + }, + { + "epoch": 0.5338137777444136, + "grad_norm": 1.2021708488464355, + "learning_rate": 4.695456594059709e-06, + "loss": 0.3007, + "step": 26666 + }, + { + "epoch": 0.5338337962615419, + "grad_norm": 0.9974424839019775, + "learning_rate": 4.695133013592958e-06, + "loss": 0.2541, + "step": 26667 + }, + { + "epoch": 0.5338538147786702, + "grad_norm": 1.0507241487503052, + "learning_rate": 4.6948094344078045e-06, + "loss": 0.3211, + "step": 26668 + }, + { + "epoch": 0.5338738332957986, + "grad_norm": 1.4149354696273804, + "learning_rate": 4.694485856505606e-06, + "loss": 0.3514, + "step": 26669 + }, + { + "epoch": 0.5338938518129269, + "grad_norm": 1.0707025527954102, + "learning_rate": 4.6941622798877235e-06, + "loss": 0.2898, + "step": 26670 + }, + { + "epoch": 0.5339138703300553, + "grad_norm": 1.051215648651123, + "learning_rate": 4.693838704555515e-06, + "loss": 0.2858, + "step": 26671 + }, + { + "epoch": 0.5339338888471836, + "grad_norm": 1.1714777946472168, + "learning_rate": 4.693515130510345e-06, + "loss": 0.2889, + "step": 26672 + }, + { + "epoch": 0.533953907364312, + "grad_norm": 1.1299285888671875, + "learning_rate": 4.69319155775357e-06, + "loss": 0.343, + "step": 26673 + }, + { + "epoch": 0.5339739258814403, + "grad_norm": 1.2245372533798218, + "learning_rate": 4.692867986286552e-06, + "loss": 0.3346, + "step": 26674 + }, + { + "epoch": 0.5339939443985686, + "grad_norm": 2.0177252292633057, + "learning_rate": 4.69254441611065e-06, + "loss": 0.762, + "step": 26675 + }, + { + "epoch": 0.534013962915697, + "grad_norm": 1.115260362625122, + "learning_rate": 4.692220847227226e-06, + "loss": 0.3042, + "step": 26676 + }, + { + "epoch": 0.5340339814328253, + "grad_norm": 1.1128937005996704, + "learning_rate": 4.69189727963764e-06, + "loss": 0.3111, + "step": 26677 + }, + { + "epoch": 0.5340539999499537, + "grad_norm": 1.220824122428894, + "learning_rate": 4.691573713343252e-06, + "loss": 0.3385, + "step": 26678 + }, + { + "epoch": 0.534074018467082, + "grad_norm": 1.5671350955963135, + "learning_rate": 4.69125014834542e-06, + "loss": 0.327, + "step": 26679 + }, + { + "epoch": 0.5340940369842104, + "grad_norm": 1.9181082248687744, + "learning_rate": 4.690926584645505e-06, + "loss": 0.7597, + "step": 26680 + }, + { + "epoch": 0.5341140555013387, + "grad_norm": 1.0862822532653809, + "learning_rate": 4.690603022244871e-06, + "loss": 0.3256, + "step": 26681 + }, + { + "epoch": 0.5341340740184671, + "grad_norm": 1.169655442237854, + "learning_rate": 4.6902794611448745e-06, + "loss": 0.3071, + "step": 26682 + }, + { + "epoch": 0.5341540925355954, + "grad_norm": 1.106747031211853, + "learning_rate": 4.689955901346877e-06, + "loss": 0.2637, + "step": 26683 + }, + { + "epoch": 0.5341741110527237, + "grad_norm": 1.1161329746246338, + "learning_rate": 4.689632342852235e-06, + "loss": 0.3545, + "step": 26684 + }, + { + "epoch": 0.5341941295698521, + "grad_norm": 1.3108633756637573, + "learning_rate": 4.6893087856623155e-06, + "loss": 0.2533, + "step": 26685 + }, + { + "epoch": 0.5342141480869804, + "grad_norm": 1.1700373888015747, + "learning_rate": 4.688985229778473e-06, + "loss": 0.3004, + "step": 26686 + }, + { + "epoch": 0.5342341666041088, + "grad_norm": 1.1313999891281128, + "learning_rate": 4.688661675202071e-06, + "loss": 0.2883, + "step": 26687 + }, + { + "epoch": 0.5342541851212371, + "grad_norm": 1.071746826171875, + "learning_rate": 4.688338121934468e-06, + "loss": 0.2945, + "step": 26688 + }, + { + "epoch": 0.5342742036383655, + "grad_norm": 1.164777398109436, + "learning_rate": 4.6880145699770225e-06, + "loss": 0.3555, + "step": 26689 + }, + { + "epoch": 0.5342942221554938, + "grad_norm": 1.0573182106018066, + "learning_rate": 4.687691019331099e-06, + "loss": 0.2642, + "step": 26690 + }, + { + "epoch": 0.5343142406726221, + "grad_norm": 1.2100216150283813, + "learning_rate": 4.6873674699980545e-06, + "loss": 0.2958, + "step": 26691 + }, + { + "epoch": 0.5343342591897505, + "grad_norm": 1.282293677330017, + "learning_rate": 4.6870439219792494e-06, + "loss": 0.336, + "step": 26692 + }, + { + "epoch": 0.5343542777068788, + "grad_norm": 2.0078368186950684, + "learning_rate": 4.686720375276043e-06, + "loss": 0.7495, + "step": 26693 + }, + { + "epoch": 0.5343742962240072, + "grad_norm": 1.277559757232666, + "learning_rate": 4.6863968298898e-06, + "loss": 0.3469, + "step": 26694 + }, + { + "epoch": 0.5343943147411355, + "grad_norm": 1.2181649208068848, + "learning_rate": 4.6860732858218755e-06, + "loss": 0.3216, + "step": 26695 + }, + { + "epoch": 0.5344143332582639, + "grad_norm": 1.1080574989318848, + "learning_rate": 4.685749743073632e-06, + "loss": 0.3533, + "step": 26696 + }, + { + "epoch": 0.5344343517753922, + "grad_norm": 1.8766748905181885, + "learning_rate": 4.685426201646428e-06, + "loss": 0.756, + "step": 26697 + }, + { + "epoch": 0.5344543702925206, + "grad_norm": 1.154151201248169, + "learning_rate": 4.685102661541625e-06, + "loss": 0.3032, + "step": 26698 + }, + { + "epoch": 0.5344743888096489, + "grad_norm": 1.1672353744506836, + "learning_rate": 4.684779122760582e-06, + "loss": 0.3243, + "step": 26699 + }, + { + "epoch": 0.5344944073267772, + "grad_norm": 1.0306726694107056, + "learning_rate": 4.684455585304661e-06, + "loss": 0.3196, + "step": 26700 + }, + { + "epoch": 0.5345144258439056, + "grad_norm": 1.3241136074066162, + "learning_rate": 4.684132049175221e-06, + "loss": 0.3693, + "step": 26701 + }, + { + "epoch": 0.5345344443610339, + "grad_norm": 1.1476397514343262, + "learning_rate": 4.68380851437362e-06, + "loss": 0.298, + "step": 26702 + }, + { + "epoch": 0.5345544628781623, + "grad_norm": 1.934544563293457, + "learning_rate": 4.683484980901221e-06, + "loss": 0.825, + "step": 26703 + }, + { + "epoch": 0.5345744813952906, + "grad_norm": 1.1023348569869995, + "learning_rate": 4.683161448759384e-06, + "loss": 0.3114, + "step": 26704 + }, + { + "epoch": 0.534594499912419, + "grad_norm": 1.0350338220596313, + "learning_rate": 4.682837917949467e-06, + "loss": 0.291, + "step": 26705 + }, + { + "epoch": 0.5346145184295473, + "grad_norm": 1.0921579599380493, + "learning_rate": 4.682514388472829e-06, + "loss": 0.2758, + "step": 26706 + }, + { + "epoch": 0.5346345369466756, + "grad_norm": 1.1128346920013428, + "learning_rate": 4.682190860330835e-06, + "loss": 0.3041, + "step": 26707 + }, + { + "epoch": 0.534654555463804, + "grad_norm": 1.1118658781051636, + "learning_rate": 4.681867333524842e-06, + "loss": 0.2899, + "step": 26708 + }, + { + "epoch": 0.5346745739809323, + "grad_norm": 1.1244690418243408, + "learning_rate": 4.681543808056211e-06, + "loss": 0.3174, + "step": 26709 + }, + { + "epoch": 0.5346945924980607, + "grad_norm": 1.1631096601486206, + "learning_rate": 4.6812202839263e-06, + "loss": 0.2856, + "step": 26710 + }, + { + "epoch": 0.534714611015189, + "grad_norm": 1.0659362077713013, + "learning_rate": 4.68089676113647e-06, + "loss": 0.2887, + "step": 26711 + }, + { + "epoch": 0.5347346295323174, + "grad_norm": 1.095898985862732, + "learning_rate": 4.680573239688082e-06, + "loss": 0.3197, + "step": 26712 + }, + { + "epoch": 0.5347546480494457, + "grad_norm": 1.1473448276519775, + "learning_rate": 4.680249719582495e-06, + "loss": 0.3318, + "step": 26713 + }, + { + "epoch": 0.5347746665665741, + "grad_norm": 1.1062902212142944, + "learning_rate": 4.679926200821071e-06, + "loss": 0.2752, + "step": 26714 + }, + { + "epoch": 0.5347946850837024, + "grad_norm": 1.0487372875213623, + "learning_rate": 4.679602683405165e-06, + "loss": 0.3285, + "step": 26715 + }, + { + "epoch": 0.5348147036008307, + "grad_norm": 1.2047957181930542, + "learning_rate": 4.679279167336143e-06, + "loss": 0.2931, + "step": 26716 + }, + { + "epoch": 0.5348347221179591, + "grad_norm": 1.0372353792190552, + "learning_rate": 4.678955652615363e-06, + "loss": 0.2739, + "step": 26717 + }, + { + "epoch": 0.5348547406350874, + "grad_norm": 1.0362284183502197, + "learning_rate": 4.678632139244182e-06, + "loss": 0.2993, + "step": 26718 + }, + { + "epoch": 0.5348747591522158, + "grad_norm": 1.0579830408096313, + "learning_rate": 4.678308627223964e-06, + "loss": 0.3109, + "step": 26719 + }, + { + "epoch": 0.5348947776693441, + "grad_norm": 1.0368117094039917, + "learning_rate": 4.677985116556065e-06, + "loss": 0.3394, + "step": 26720 + }, + { + "epoch": 0.5349147961864725, + "grad_norm": 1.138492465019226, + "learning_rate": 4.67766160724185e-06, + "loss": 0.3229, + "step": 26721 + }, + { + "epoch": 0.5349348147036008, + "grad_norm": 1.4397907257080078, + "learning_rate": 4.6773380992826756e-06, + "loss": 0.2953, + "step": 26722 + }, + { + "epoch": 0.5349548332207291, + "grad_norm": 1.1689717769622803, + "learning_rate": 4.677014592679901e-06, + "loss": 0.3042, + "step": 26723 + }, + { + "epoch": 0.5349748517378575, + "grad_norm": 1.1401337385177612, + "learning_rate": 4.67669108743489e-06, + "loss": 0.325, + "step": 26724 + }, + { + "epoch": 0.5349948702549858, + "grad_norm": 1.1106388568878174, + "learning_rate": 4.676367583548997e-06, + "loss": 0.2943, + "step": 26725 + }, + { + "epoch": 0.5350148887721142, + "grad_norm": 1.0465718507766724, + "learning_rate": 4.676044081023587e-06, + "loss": 0.2958, + "step": 26726 + }, + { + "epoch": 0.5350349072892425, + "grad_norm": 1.0918574333190918, + "learning_rate": 4.675720579860019e-06, + "loss": 0.2968, + "step": 26727 + }, + { + "epoch": 0.5350549258063709, + "grad_norm": 1.0553642511367798, + "learning_rate": 4.675397080059651e-06, + "loss": 0.313, + "step": 26728 + }, + { + "epoch": 0.5350749443234992, + "grad_norm": 1.1888744831085205, + "learning_rate": 4.675073581623842e-06, + "loss": 0.3471, + "step": 26729 + }, + { + "epoch": 0.5350949628406276, + "grad_norm": 1.1624187231063843, + "learning_rate": 4.674750084553956e-06, + "loss": 0.3103, + "step": 26730 + }, + { + "epoch": 0.5351149813577559, + "grad_norm": 1.0672463178634644, + "learning_rate": 4.674426588851349e-06, + "loss": 0.3543, + "step": 26731 + }, + { + "epoch": 0.5351349998748842, + "grad_norm": 1.1606930494308472, + "learning_rate": 4.674103094517384e-06, + "loss": 0.2903, + "step": 26732 + }, + { + "epoch": 0.5351550183920126, + "grad_norm": 1.2437061071395874, + "learning_rate": 4.673779601553417e-06, + "loss": 0.3404, + "step": 26733 + }, + { + "epoch": 0.5351750369091409, + "grad_norm": 1.2414281368255615, + "learning_rate": 4.6734561099608125e-06, + "loss": 0.3115, + "step": 26734 + }, + { + "epoch": 0.5351950554262693, + "grad_norm": 1.0350103378295898, + "learning_rate": 4.6731326197409286e-06, + "loss": 0.3124, + "step": 26735 + }, + { + "epoch": 0.5352150739433976, + "grad_norm": 1.1121841669082642, + "learning_rate": 4.672809130895124e-06, + "loss": 0.2935, + "step": 26736 + }, + { + "epoch": 0.535235092460526, + "grad_norm": 1.1577943563461304, + "learning_rate": 4.672485643424759e-06, + "loss": 0.3526, + "step": 26737 + }, + { + "epoch": 0.5352551109776543, + "grad_norm": 1.0578689575195312, + "learning_rate": 4.672162157331194e-06, + "loss": 0.2703, + "step": 26738 + }, + { + "epoch": 0.5352751294947826, + "grad_norm": 1.1485278606414795, + "learning_rate": 4.671838672615789e-06, + "loss": 0.2932, + "step": 26739 + }, + { + "epoch": 0.535295148011911, + "grad_norm": 1.0265642404556274, + "learning_rate": 4.671515189279905e-06, + "loss": 0.3211, + "step": 26740 + }, + { + "epoch": 0.5353151665290393, + "grad_norm": 1.1214998960494995, + "learning_rate": 4.6711917073249e-06, + "loss": 0.3165, + "step": 26741 + }, + { + "epoch": 0.5353351850461677, + "grad_norm": 1.3051636219024658, + "learning_rate": 4.6708682267521316e-06, + "loss": 0.3114, + "step": 26742 + }, + { + "epoch": 0.535355203563296, + "grad_norm": 1.0675227642059326, + "learning_rate": 4.6705447475629654e-06, + "loss": 0.2674, + "step": 26743 + }, + { + "epoch": 0.5353752220804244, + "grad_norm": 1.0858428478240967, + "learning_rate": 4.670221269758757e-06, + "loss": 0.3263, + "step": 26744 + }, + { + "epoch": 0.5353952405975527, + "grad_norm": 1.0796699523925781, + "learning_rate": 4.669897793340867e-06, + "loss": 0.3032, + "step": 26745 + }, + { + "epoch": 0.5354152591146811, + "grad_norm": 1.0442088842391968, + "learning_rate": 4.669574318310655e-06, + "loss": 0.2566, + "step": 26746 + }, + { + "epoch": 0.5354352776318094, + "grad_norm": 1.1182117462158203, + "learning_rate": 4.6692508446694825e-06, + "loss": 0.2914, + "step": 26747 + }, + { + "epoch": 0.5354552961489377, + "grad_norm": 1.0464943647384644, + "learning_rate": 4.668927372418709e-06, + "loss": 0.308, + "step": 26748 + }, + { + "epoch": 0.5354753146660661, + "grad_norm": 1.824384093284607, + "learning_rate": 4.668603901559692e-06, + "loss": 0.7866, + "step": 26749 + }, + { + "epoch": 0.5354953331831944, + "grad_norm": 1.484632134437561, + "learning_rate": 4.6682804320937945e-06, + "loss": 0.3218, + "step": 26750 + }, + { + "epoch": 0.5355153517003228, + "grad_norm": 1.1846885681152344, + "learning_rate": 4.667956964022373e-06, + "loss": 0.2968, + "step": 26751 + }, + { + "epoch": 0.5355353702174511, + "grad_norm": 1.1388434171676636, + "learning_rate": 4.66763349734679e-06, + "loss": 0.3582, + "step": 26752 + }, + { + "epoch": 0.5355553887345795, + "grad_norm": 1.337280511856079, + "learning_rate": 4.667310032068405e-06, + "loss": 0.3031, + "step": 26753 + }, + { + "epoch": 0.5355754072517078, + "grad_norm": 1.072351336479187, + "learning_rate": 4.6669865681885766e-06, + "loss": 0.315, + "step": 26754 + }, + { + "epoch": 0.5355954257688361, + "grad_norm": 1.013680338859558, + "learning_rate": 4.666663105708663e-06, + "loss": 0.322, + "step": 26755 + }, + { + "epoch": 0.5356154442859645, + "grad_norm": 1.0281691551208496, + "learning_rate": 4.666339644630028e-06, + "loss": 0.2978, + "step": 26756 + }, + { + "epoch": 0.5356354628030928, + "grad_norm": 1.8026602268218994, + "learning_rate": 4.666016184954029e-06, + "loss": 0.771, + "step": 26757 + }, + { + "epoch": 0.5356554813202212, + "grad_norm": 1.0886623859405518, + "learning_rate": 4.665692726682026e-06, + "loss": 0.31, + "step": 26758 + }, + { + "epoch": 0.5356754998373495, + "grad_norm": 1.1619044542312622, + "learning_rate": 4.66536926981538e-06, + "loss": 0.3457, + "step": 26759 + }, + { + "epoch": 0.5356955183544779, + "grad_norm": 1.096752405166626, + "learning_rate": 4.665045814355447e-06, + "loss": 0.3477, + "step": 26760 + }, + { + "epoch": 0.5357155368716062, + "grad_norm": 1.1869914531707764, + "learning_rate": 4.66472236030359e-06, + "loss": 0.2985, + "step": 26761 + }, + { + "epoch": 0.5357355553887346, + "grad_norm": 1.134938359260559, + "learning_rate": 4.664398907661169e-06, + "loss": 0.3378, + "step": 26762 + }, + { + "epoch": 0.5357555739058629, + "grad_norm": 1.2259559631347656, + "learning_rate": 4.664075456429543e-06, + "loss": 0.2825, + "step": 26763 + }, + { + "epoch": 0.5357755924229912, + "grad_norm": 1.2378932237625122, + "learning_rate": 4.6637520066100694e-06, + "loss": 0.3114, + "step": 26764 + }, + { + "epoch": 0.5357956109401196, + "grad_norm": 1.1575871706008911, + "learning_rate": 4.663428558204112e-06, + "loss": 0.3173, + "step": 26765 + }, + { + "epoch": 0.5358156294572479, + "grad_norm": 1.2864723205566406, + "learning_rate": 4.6631051112130295e-06, + "loss": 0.3097, + "step": 26766 + }, + { + "epoch": 0.5358356479743763, + "grad_norm": 1.1005078554153442, + "learning_rate": 4.6627816656381795e-06, + "loss": 0.3499, + "step": 26767 + }, + { + "epoch": 0.5358556664915046, + "grad_norm": 1.0622661113739014, + "learning_rate": 4.662458221480921e-06, + "loss": 0.2797, + "step": 26768 + }, + { + "epoch": 0.535875685008633, + "grad_norm": 1.005319356918335, + "learning_rate": 4.662134778742617e-06, + "loss": 0.2887, + "step": 26769 + }, + { + "epoch": 0.5358957035257613, + "grad_norm": 2.1153361797332764, + "learning_rate": 4.661811337424625e-06, + "loss": 0.8086, + "step": 26770 + }, + { + "epoch": 0.5359157220428896, + "grad_norm": 1.153090238571167, + "learning_rate": 4.661487897528307e-06, + "loss": 0.2982, + "step": 26771 + }, + { + "epoch": 0.535935740560018, + "grad_norm": 1.1996605396270752, + "learning_rate": 4.661164459055021e-06, + "loss": 0.3246, + "step": 26772 + }, + { + "epoch": 0.5359557590771463, + "grad_norm": 1.2018475532531738, + "learning_rate": 4.660841022006124e-06, + "loss": 0.2868, + "step": 26773 + }, + { + "epoch": 0.5359757775942747, + "grad_norm": 1.0775866508483887, + "learning_rate": 4.660517586382981e-06, + "loss": 0.2911, + "step": 26774 + }, + { + "epoch": 0.535995796111403, + "grad_norm": 1.1441359519958496, + "learning_rate": 4.660194152186948e-06, + "loss": 0.3173, + "step": 26775 + }, + { + "epoch": 0.5360158146285314, + "grad_norm": 1.0755276679992676, + "learning_rate": 4.659870719419386e-06, + "loss": 0.2829, + "step": 26776 + }, + { + "epoch": 0.5360358331456597, + "grad_norm": 1.1355912685394287, + "learning_rate": 4.659547288081653e-06, + "loss": 0.3322, + "step": 26777 + }, + { + "epoch": 0.5360558516627881, + "grad_norm": 1.7804157733917236, + "learning_rate": 4.6592238581751115e-06, + "loss": 0.7721, + "step": 26778 + }, + { + "epoch": 0.5360758701799164, + "grad_norm": 1.144004464149475, + "learning_rate": 4.65890042970112e-06, + "loss": 0.2886, + "step": 26779 + }, + { + "epoch": 0.5360958886970447, + "grad_norm": 1.0615826845169067, + "learning_rate": 4.658577002661037e-06, + "loss": 0.2865, + "step": 26780 + }, + { + "epoch": 0.5361159072141731, + "grad_norm": 1.1423771381378174, + "learning_rate": 4.658253577056222e-06, + "loss": 0.2799, + "step": 26781 + }, + { + "epoch": 0.5361359257313014, + "grad_norm": 1.0098966360092163, + "learning_rate": 4.657930152888035e-06, + "loss": 0.3228, + "step": 26782 + }, + { + "epoch": 0.5361559442484298, + "grad_norm": 1.2098082304000854, + "learning_rate": 4.657606730157837e-06, + "loss": 0.3271, + "step": 26783 + }, + { + "epoch": 0.5361759627655581, + "grad_norm": 1.251643180847168, + "learning_rate": 4.657283308866987e-06, + "loss": 0.315, + "step": 26784 + }, + { + "epoch": 0.5361959812826865, + "grad_norm": 1.071795105934143, + "learning_rate": 4.656959889016845e-06, + "loss": 0.3423, + "step": 26785 + }, + { + "epoch": 0.5362159997998148, + "grad_norm": 1.0585802793502808, + "learning_rate": 4.656636470608767e-06, + "loss": 0.2914, + "step": 26786 + }, + { + "epoch": 0.5362360183169431, + "grad_norm": 1.057481288909912, + "learning_rate": 4.656313053644117e-06, + "loss": 0.3268, + "step": 26787 + }, + { + "epoch": 0.5362560368340715, + "grad_norm": 1.1364691257476807, + "learning_rate": 4.655989638124253e-06, + "loss": 0.329, + "step": 26788 + }, + { + "epoch": 0.5362760553511998, + "grad_norm": 2.0230326652526855, + "learning_rate": 4.655666224050535e-06, + "loss": 0.801, + "step": 26789 + }, + { + "epoch": 0.5362960738683282, + "grad_norm": 1.0703539848327637, + "learning_rate": 4.6553428114243225e-06, + "loss": 0.2802, + "step": 26790 + }, + { + "epoch": 0.5363160923854565, + "grad_norm": 1.1151247024536133, + "learning_rate": 4.655019400246972e-06, + "loss": 0.3113, + "step": 26791 + }, + { + "epoch": 0.5363361109025849, + "grad_norm": 1.03374445438385, + "learning_rate": 4.654695990519848e-06, + "loss": 0.2851, + "step": 26792 + }, + { + "epoch": 0.5363561294197132, + "grad_norm": 1.0760655403137207, + "learning_rate": 4.654372582244309e-06, + "loss": 0.3111, + "step": 26793 + }, + { + "epoch": 0.5363761479368416, + "grad_norm": 1.0920588970184326, + "learning_rate": 4.654049175421711e-06, + "loss": 0.2767, + "step": 26794 + }, + { + "epoch": 0.5363961664539699, + "grad_norm": 1.0265204906463623, + "learning_rate": 4.653725770053416e-06, + "loss": 0.2869, + "step": 26795 + }, + { + "epoch": 0.5364161849710982, + "grad_norm": 1.0826952457427979, + "learning_rate": 4.6534023661407825e-06, + "loss": 0.2951, + "step": 26796 + }, + { + "epoch": 0.5364362034882266, + "grad_norm": 1.0638765096664429, + "learning_rate": 4.653078963685172e-06, + "loss": 0.3294, + "step": 26797 + }, + { + "epoch": 0.5364562220053549, + "grad_norm": 1.7928740978240967, + "learning_rate": 4.652755562687944e-06, + "loss": 0.7876, + "step": 26798 + }, + { + "epoch": 0.5364762405224833, + "grad_norm": 1.067300796508789, + "learning_rate": 4.652432163150455e-06, + "loss": 0.3086, + "step": 26799 + }, + { + "epoch": 0.5364962590396116, + "grad_norm": 2.1083667278289795, + "learning_rate": 4.652108765074067e-06, + "loss": 0.8266, + "step": 26800 + }, + { + "epoch": 0.53651627755674, + "grad_norm": 1.13467538356781, + "learning_rate": 4.651785368460139e-06, + "loss": 0.3251, + "step": 26801 + }, + { + "epoch": 0.5365362960738683, + "grad_norm": 1.1634494066238403, + "learning_rate": 4.6514619733100304e-06, + "loss": 0.302, + "step": 26802 + }, + { + "epoch": 0.5365563145909966, + "grad_norm": 1.0771185159683228, + "learning_rate": 4.6511385796251015e-06, + "loss": 0.2516, + "step": 26803 + }, + { + "epoch": 0.536576333108125, + "grad_norm": 1.062981367111206, + "learning_rate": 4.6508151874067085e-06, + "loss": 0.328, + "step": 26804 + }, + { + "epoch": 0.5365963516252533, + "grad_norm": 1.0436824560165405, + "learning_rate": 4.650491796656216e-06, + "loss": 0.245, + "step": 26805 + }, + { + "epoch": 0.5366163701423817, + "grad_norm": 1.2080419063568115, + "learning_rate": 4.65016840737498e-06, + "loss": 0.28, + "step": 26806 + }, + { + "epoch": 0.53663638865951, + "grad_norm": 1.0735821723937988, + "learning_rate": 4.64984501956436e-06, + "loss": 0.3241, + "step": 26807 + }, + { + "epoch": 0.5366564071766384, + "grad_norm": 1.078021764755249, + "learning_rate": 4.649521633225717e-06, + "loss": 0.3379, + "step": 26808 + }, + { + "epoch": 0.5366764256937667, + "grad_norm": 1.0796897411346436, + "learning_rate": 4.649198248360408e-06, + "loss": 0.3278, + "step": 26809 + }, + { + "epoch": 0.5366964442108951, + "grad_norm": 1.279333472251892, + "learning_rate": 4.648874864969796e-06, + "loss": 0.2786, + "step": 26810 + }, + { + "epoch": 0.5367164627280234, + "grad_norm": 1.922652244567871, + "learning_rate": 4.6485514830552385e-06, + "loss": 0.8081, + "step": 26811 + }, + { + "epoch": 0.5367364812451517, + "grad_norm": 1.0105100870132446, + "learning_rate": 4.6482281026180955e-06, + "loss": 0.2635, + "step": 26812 + }, + { + "epoch": 0.5367564997622801, + "grad_norm": 1.0431489944458008, + "learning_rate": 4.647904723659723e-06, + "loss": 0.2936, + "step": 26813 + }, + { + "epoch": 0.5367765182794084, + "grad_norm": 1.1754910945892334, + "learning_rate": 4.6475813461814845e-06, + "loss": 0.3302, + "step": 26814 + }, + { + "epoch": 0.5367965367965368, + "grad_norm": 1.048294186592102, + "learning_rate": 4.647257970184739e-06, + "loss": 0.2858, + "step": 26815 + }, + { + "epoch": 0.5368165553136651, + "grad_norm": 1.8124953508377075, + "learning_rate": 4.646934595670845e-06, + "loss": 0.8447, + "step": 26816 + }, + { + "epoch": 0.5368365738307935, + "grad_norm": 1.1588248014450073, + "learning_rate": 4.64661122264116e-06, + "loss": 0.3139, + "step": 26817 + }, + { + "epoch": 0.5368565923479218, + "grad_norm": 1.0513864755630493, + "learning_rate": 4.646287851097048e-06, + "loss": 0.3069, + "step": 26818 + }, + { + "epoch": 0.5368766108650501, + "grad_norm": 1.9186170101165771, + "learning_rate": 4.645964481039865e-06, + "loss": 0.8693, + "step": 26819 + }, + { + "epoch": 0.5368966293821785, + "grad_norm": 1.2247127294540405, + "learning_rate": 4.645641112470971e-06, + "loss": 0.3079, + "step": 26820 + }, + { + "epoch": 0.5369166478993068, + "grad_norm": 1.0129942893981934, + "learning_rate": 4.6453177453917245e-06, + "loss": 0.281, + "step": 26821 + }, + { + "epoch": 0.5369366664164352, + "grad_norm": 1.2003899812698364, + "learning_rate": 4.644994379803487e-06, + "loss": 0.312, + "step": 26822 + }, + { + "epoch": 0.5369566849335635, + "grad_norm": 1.9174748659133911, + "learning_rate": 4.644671015707617e-06, + "loss": 0.7788, + "step": 26823 + }, + { + "epoch": 0.5369767034506919, + "grad_norm": 1.236017107963562, + "learning_rate": 4.644347653105473e-06, + "loss": 0.2979, + "step": 26824 + }, + { + "epoch": 0.5369967219678202, + "grad_norm": 1.059110403060913, + "learning_rate": 4.6440242919984156e-06, + "loss": 0.3308, + "step": 26825 + }, + { + "epoch": 0.5370167404849486, + "grad_norm": 1.1385823488235474, + "learning_rate": 4.643700932387802e-06, + "loss": 0.264, + "step": 26826 + }, + { + "epoch": 0.5370367590020769, + "grad_norm": 2.0702497959136963, + "learning_rate": 4.643377574274993e-06, + "loss": 0.8057, + "step": 26827 + }, + { + "epoch": 0.5370567775192052, + "grad_norm": 1.247758388519287, + "learning_rate": 4.64305421766135e-06, + "loss": 0.2916, + "step": 26828 + }, + { + "epoch": 0.5370767960363336, + "grad_norm": 1.0793601274490356, + "learning_rate": 4.642730862548229e-06, + "loss": 0.2918, + "step": 26829 + }, + { + "epoch": 0.5370968145534619, + "grad_norm": 1.0663928985595703, + "learning_rate": 4.642407508936989e-06, + "loss": 0.3088, + "step": 26830 + }, + { + "epoch": 0.5371168330705903, + "grad_norm": 1.1410986185073853, + "learning_rate": 4.642084156828993e-06, + "loss": 0.3061, + "step": 26831 + }, + { + "epoch": 0.5371368515877186, + "grad_norm": 1.0826692581176758, + "learning_rate": 4.6417608062255984e-06, + "loss": 0.3306, + "step": 26832 + }, + { + "epoch": 0.537156870104847, + "grad_norm": 1.9432398080825806, + "learning_rate": 4.641437457128164e-06, + "loss": 0.7413, + "step": 26833 + }, + { + "epoch": 0.5371768886219753, + "grad_norm": 1.173996090888977, + "learning_rate": 4.6411141095380495e-06, + "loss": 0.2864, + "step": 26834 + }, + { + "epoch": 0.5371969071391036, + "grad_norm": 1.3418997526168823, + "learning_rate": 4.640790763456612e-06, + "loss": 0.3356, + "step": 26835 + }, + { + "epoch": 0.537216925656232, + "grad_norm": 1.0685653686523438, + "learning_rate": 4.640467418885215e-06, + "loss": 0.3366, + "step": 26836 + }, + { + "epoch": 0.5372369441733603, + "grad_norm": 1.0988152027130127, + "learning_rate": 4.640144075825215e-06, + "loss": 0.2852, + "step": 26837 + }, + { + "epoch": 0.5372569626904887, + "grad_norm": 1.0011223554611206, + "learning_rate": 4.6398207342779726e-06, + "loss": 0.2398, + "step": 26838 + }, + { + "epoch": 0.537276981207617, + "grad_norm": 1.1087862253189087, + "learning_rate": 4.639497394244844e-06, + "loss": 0.3402, + "step": 26839 + }, + { + "epoch": 0.5372969997247454, + "grad_norm": 1.190075397491455, + "learning_rate": 4.639174055727191e-06, + "loss": 0.3188, + "step": 26840 + }, + { + "epoch": 0.5373170182418737, + "grad_norm": 1.190730094909668, + "learning_rate": 4.6388507187263756e-06, + "loss": 0.3047, + "step": 26841 + }, + { + "epoch": 0.5373370367590021, + "grad_norm": 1.1763916015625, + "learning_rate": 4.638527383243753e-06, + "loss": 0.2628, + "step": 26842 + }, + { + "epoch": 0.5373570552761304, + "grad_norm": 1.065924882888794, + "learning_rate": 4.638204049280683e-06, + "loss": 0.2741, + "step": 26843 + }, + { + "epoch": 0.5373770737932587, + "grad_norm": 1.136333703994751, + "learning_rate": 4.637880716838524e-06, + "loss": 0.3256, + "step": 26844 + }, + { + "epoch": 0.5373970923103871, + "grad_norm": 1.8269087076187134, + "learning_rate": 4.637557385918638e-06, + "loss": 0.7972, + "step": 26845 + }, + { + "epoch": 0.5374171108275154, + "grad_norm": 1.1723968982696533, + "learning_rate": 4.6372340565223814e-06, + "loss": 0.3104, + "step": 26846 + }, + { + "epoch": 0.5374371293446438, + "grad_norm": 1.122057557106018, + "learning_rate": 4.636910728651117e-06, + "loss": 0.303, + "step": 26847 + }, + { + "epoch": 0.5374571478617721, + "grad_norm": 1.3649046421051025, + "learning_rate": 4.6365874023061995e-06, + "loss": 0.3219, + "step": 26848 + }, + { + "epoch": 0.5374771663789005, + "grad_norm": 1.1103465557098389, + "learning_rate": 4.636264077488991e-06, + "loss": 0.2732, + "step": 26849 + }, + { + "epoch": 0.5374971848960288, + "grad_norm": 1.043799877166748, + "learning_rate": 4.635940754200852e-06, + "loss": 0.2769, + "step": 26850 + }, + { + "epoch": 0.5375172034131571, + "grad_norm": 1.2874888181686401, + "learning_rate": 4.635617432443139e-06, + "loss": 0.3302, + "step": 26851 + }, + { + "epoch": 0.5375372219302855, + "grad_norm": 1.0511919260025024, + "learning_rate": 4.635294112217209e-06, + "loss": 0.3385, + "step": 26852 + }, + { + "epoch": 0.5375572404474138, + "grad_norm": 1.2571748495101929, + "learning_rate": 4.634970793524425e-06, + "loss": 0.3145, + "step": 26853 + }, + { + "epoch": 0.5375772589645422, + "grad_norm": 1.2022690773010254, + "learning_rate": 4.634647476366147e-06, + "loss": 0.3137, + "step": 26854 + }, + { + "epoch": 0.5375972774816705, + "grad_norm": 1.1584845781326294, + "learning_rate": 4.634324160743732e-06, + "loss": 0.2674, + "step": 26855 + }, + { + "epoch": 0.5376172959987989, + "grad_norm": 1.0985565185546875, + "learning_rate": 4.634000846658541e-06, + "loss": 0.335, + "step": 26856 + }, + { + "epoch": 0.5376373145159272, + "grad_norm": 1.1268155574798584, + "learning_rate": 4.633677534111928e-06, + "loss": 0.3378, + "step": 26857 + }, + { + "epoch": 0.5376573330330556, + "grad_norm": 1.083327054977417, + "learning_rate": 4.633354223105258e-06, + "loss": 0.279, + "step": 26858 + }, + { + "epoch": 0.5376773515501839, + "grad_norm": 1.7898088693618774, + "learning_rate": 4.633030913639888e-06, + "loss": 0.7877, + "step": 26859 + }, + { + "epoch": 0.5376973700673122, + "grad_norm": 0.9950739145278931, + "learning_rate": 4.632707605717178e-06, + "loss": 0.3025, + "step": 26860 + }, + { + "epoch": 0.5377173885844406, + "grad_norm": 1.0705195665359497, + "learning_rate": 4.632384299338483e-06, + "loss": 0.3463, + "step": 26861 + }, + { + "epoch": 0.5377374071015689, + "grad_norm": 1.8196852207183838, + "learning_rate": 4.632060994505168e-06, + "loss": 0.763, + "step": 26862 + }, + { + "epoch": 0.5377574256186973, + "grad_norm": 1.1077022552490234, + "learning_rate": 4.631737691218589e-06, + "loss": 0.273, + "step": 26863 + }, + { + "epoch": 0.5377774441358256, + "grad_norm": 1.1282670497894287, + "learning_rate": 4.631414389480106e-06, + "loss": 0.2933, + "step": 26864 + }, + { + "epoch": 0.537797462652954, + "grad_norm": 1.1045830249786377, + "learning_rate": 4.631091089291076e-06, + "loss": 0.2752, + "step": 26865 + }, + { + "epoch": 0.5378174811700823, + "grad_norm": 1.0843653678894043, + "learning_rate": 4.630767790652859e-06, + "loss": 0.3054, + "step": 26866 + }, + { + "epoch": 0.5378374996872106, + "grad_norm": 1.1794664859771729, + "learning_rate": 4.630444493566817e-06, + "loss": 0.3011, + "step": 26867 + }, + { + "epoch": 0.537857518204339, + "grad_norm": 1.0922205448150635, + "learning_rate": 4.6301211980343065e-06, + "loss": 0.3172, + "step": 26868 + }, + { + "epoch": 0.5378775367214673, + "grad_norm": 1.1549055576324463, + "learning_rate": 4.629797904056687e-06, + "loss": 0.269, + "step": 26869 + }, + { + "epoch": 0.5378975552385957, + "grad_norm": 1.2531440258026123, + "learning_rate": 4.629474611635315e-06, + "loss": 0.3092, + "step": 26870 + }, + { + "epoch": 0.537917573755724, + "grad_norm": 1.1821626424789429, + "learning_rate": 4.629151320771555e-06, + "loss": 0.3174, + "step": 26871 + }, + { + "epoch": 0.5379375922728524, + "grad_norm": 1.0190616846084595, + "learning_rate": 4.628828031466761e-06, + "loss": 0.2763, + "step": 26872 + }, + { + "epoch": 0.5379576107899807, + "grad_norm": 1.2190054655075073, + "learning_rate": 4.628504743722296e-06, + "loss": 0.3212, + "step": 26873 + }, + { + "epoch": 0.5379776293071091, + "grad_norm": 1.1752794981002808, + "learning_rate": 4.628181457539516e-06, + "loss": 0.3168, + "step": 26874 + }, + { + "epoch": 0.5379976478242374, + "grad_norm": 1.8993351459503174, + "learning_rate": 4.62785817291978e-06, + "loss": 0.7704, + "step": 26875 + }, + { + "epoch": 0.5380176663413657, + "grad_norm": 1.1864268779754639, + "learning_rate": 4.6275348898644505e-06, + "loss": 0.3003, + "step": 26876 + }, + { + "epoch": 0.5380376848584941, + "grad_norm": 1.0857311487197876, + "learning_rate": 4.627211608374883e-06, + "loss": 0.33, + "step": 26877 + }, + { + "epoch": 0.5380577033756224, + "grad_norm": 1.0024110078811646, + "learning_rate": 4.626888328452437e-06, + "loss": 0.2697, + "step": 26878 + }, + { + "epoch": 0.5380777218927508, + "grad_norm": 1.414565920829773, + "learning_rate": 4.626565050098472e-06, + "loss": 0.3051, + "step": 26879 + }, + { + "epoch": 0.5380977404098791, + "grad_norm": 1.0305970907211304, + "learning_rate": 4.626241773314348e-06, + "loss": 0.2902, + "step": 26880 + }, + { + "epoch": 0.5381177589270075, + "grad_norm": 1.0743502378463745, + "learning_rate": 4.625918498101424e-06, + "loss": 0.3386, + "step": 26881 + }, + { + "epoch": 0.5381377774441358, + "grad_norm": 1.071766972541809, + "learning_rate": 4.625595224461058e-06, + "loss": 0.3346, + "step": 26882 + }, + { + "epoch": 0.5381577959612641, + "grad_norm": 1.0961432456970215, + "learning_rate": 4.625271952394607e-06, + "loss": 0.3286, + "step": 26883 + }, + { + "epoch": 0.5381778144783925, + "grad_norm": 1.923640251159668, + "learning_rate": 4.624948681903434e-06, + "loss": 0.8302, + "step": 26884 + }, + { + "epoch": 0.5381978329955208, + "grad_norm": 1.1594181060791016, + "learning_rate": 4.624625412988895e-06, + "loss": 0.3121, + "step": 26885 + }, + { + "epoch": 0.5382178515126492, + "grad_norm": 1.1183067560195923, + "learning_rate": 4.6243021456523504e-06, + "loss": 0.2913, + "step": 26886 + }, + { + "epoch": 0.5382378700297775, + "grad_norm": 1.109683871269226, + "learning_rate": 4.62397887989516e-06, + "loss": 0.3626, + "step": 26887 + }, + { + "epoch": 0.5382578885469059, + "grad_norm": 1.1311383247375488, + "learning_rate": 4.6236556157186786e-06, + "loss": 0.299, + "step": 26888 + }, + { + "epoch": 0.5382779070640342, + "grad_norm": 1.13901686668396, + "learning_rate": 4.62333235312427e-06, + "loss": 0.281, + "step": 26889 + }, + { + "epoch": 0.5382979255811626, + "grad_norm": 1.1663647890090942, + "learning_rate": 4.623009092113292e-06, + "loss": 0.3146, + "step": 26890 + }, + { + "epoch": 0.5383179440982909, + "grad_norm": 1.041143536567688, + "learning_rate": 4.622685832687101e-06, + "loss": 0.2882, + "step": 26891 + }, + { + "epoch": 0.5383379626154192, + "grad_norm": 0.9875587821006775, + "learning_rate": 4.622362574847057e-06, + "loss": 0.2698, + "step": 26892 + }, + { + "epoch": 0.5383579811325476, + "grad_norm": 1.155408263206482, + "learning_rate": 4.62203931859452e-06, + "loss": 0.2881, + "step": 26893 + }, + { + "epoch": 0.5383779996496759, + "grad_norm": 1.0773029327392578, + "learning_rate": 4.62171606393085e-06, + "loss": 0.3049, + "step": 26894 + }, + { + "epoch": 0.5383980181668043, + "grad_norm": 1.1170501708984375, + "learning_rate": 4.6213928108574035e-06, + "loss": 0.2857, + "step": 26895 + }, + { + "epoch": 0.5384180366839326, + "grad_norm": 1.2029545307159424, + "learning_rate": 4.62106955937554e-06, + "loss": 0.3273, + "step": 26896 + }, + { + "epoch": 0.538438055201061, + "grad_norm": 1.165153980255127, + "learning_rate": 4.620746309486618e-06, + "loss": 0.3204, + "step": 26897 + }, + { + "epoch": 0.5384580737181893, + "grad_norm": 1.195666790008545, + "learning_rate": 4.620423061191996e-06, + "loss": 0.3034, + "step": 26898 + }, + { + "epoch": 0.5384780922353176, + "grad_norm": 1.0710235834121704, + "learning_rate": 4.6200998144930354e-06, + "loss": 0.303, + "step": 26899 + }, + { + "epoch": 0.538498110752446, + "grad_norm": 1.2578552961349487, + "learning_rate": 4.619776569391093e-06, + "loss": 0.2929, + "step": 26900 + }, + { + "epoch": 0.5385181292695743, + "grad_norm": 1.067269206047058, + "learning_rate": 4.6194533258875274e-06, + "loss": 0.2783, + "step": 26901 + }, + { + "epoch": 0.5385381477867027, + "grad_norm": 1.0735394954681396, + "learning_rate": 4.6191300839836995e-06, + "loss": 0.2901, + "step": 26902 + }, + { + "epoch": 0.538558166303831, + "grad_norm": 1.170681357383728, + "learning_rate": 4.618806843680966e-06, + "loss": 0.3244, + "step": 26903 + }, + { + "epoch": 0.5385781848209594, + "grad_norm": 1.0142545700073242, + "learning_rate": 4.6184836049806864e-06, + "loss": 0.3104, + "step": 26904 + }, + { + "epoch": 0.5385982033380877, + "grad_norm": 1.1155246496200562, + "learning_rate": 4.61816036788422e-06, + "loss": 0.3423, + "step": 26905 + }, + { + "epoch": 0.5386182218552161, + "grad_norm": 1.1941041946411133, + "learning_rate": 4.617837132392924e-06, + "loss": 0.2949, + "step": 26906 + }, + { + "epoch": 0.5386382403723444, + "grad_norm": 1.0324434041976929, + "learning_rate": 4.617513898508159e-06, + "loss": 0.2754, + "step": 26907 + }, + { + "epoch": 0.5386582588894727, + "grad_norm": 1.9234226942062378, + "learning_rate": 4.617190666231285e-06, + "loss": 0.782, + "step": 26908 + }, + { + "epoch": 0.5386782774066011, + "grad_norm": 1.172317624092102, + "learning_rate": 4.616867435563658e-06, + "loss": 0.2492, + "step": 26909 + }, + { + "epoch": 0.5386982959237294, + "grad_norm": 1.017915964126587, + "learning_rate": 4.616544206506637e-06, + "loss": 0.2728, + "step": 26910 + }, + { + "epoch": 0.5387183144408578, + "grad_norm": 1.059203863143921, + "learning_rate": 4.616220979061582e-06, + "loss": 0.2749, + "step": 26911 + }, + { + "epoch": 0.5387383329579861, + "grad_norm": 1.0673749446868896, + "learning_rate": 4.615897753229852e-06, + "loss": 0.3183, + "step": 26912 + }, + { + "epoch": 0.5387583514751145, + "grad_norm": 1.2725735902786255, + "learning_rate": 4.615574529012805e-06, + "loss": 0.3106, + "step": 26913 + }, + { + "epoch": 0.5387783699922428, + "grad_norm": 1.342598557472229, + "learning_rate": 4.6152513064117985e-06, + "loss": 0.2732, + "step": 26914 + }, + { + "epoch": 0.5387983885093711, + "grad_norm": 1.9403200149536133, + "learning_rate": 4.614928085428194e-06, + "loss": 0.7368, + "step": 26915 + }, + { + "epoch": 0.5388184070264995, + "grad_norm": 0.9867147207260132, + "learning_rate": 4.61460486606335e-06, + "loss": 0.2822, + "step": 26916 + }, + { + "epoch": 0.5388384255436278, + "grad_norm": 1.2159075736999512, + "learning_rate": 4.614281648318623e-06, + "loss": 0.344, + "step": 26917 + }, + { + "epoch": 0.5388584440607562, + "grad_norm": 1.1199413537979126, + "learning_rate": 4.6139584321953734e-06, + "loss": 0.3133, + "step": 26918 + }, + { + "epoch": 0.5388784625778845, + "grad_norm": 1.0965955257415771, + "learning_rate": 4.613635217694958e-06, + "loss": 0.3073, + "step": 26919 + }, + { + "epoch": 0.5388984810950129, + "grad_norm": 1.064971923828125, + "learning_rate": 4.613312004818739e-06, + "loss": 0.3469, + "step": 26920 + }, + { + "epoch": 0.5389184996121412, + "grad_norm": 1.007520079612732, + "learning_rate": 4.612988793568073e-06, + "loss": 0.2794, + "step": 26921 + }, + { + "epoch": 0.5389385181292696, + "grad_norm": 1.5444844961166382, + "learning_rate": 4.612665583944317e-06, + "loss": 0.2963, + "step": 26922 + }, + { + "epoch": 0.5389585366463979, + "grad_norm": 1.167380690574646, + "learning_rate": 4.612342375948832e-06, + "loss": 0.3215, + "step": 26923 + }, + { + "epoch": 0.5389785551635262, + "grad_norm": 1.17704439163208, + "learning_rate": 4.612019169582977e-06, + "loss": 0.3266, + "step": 26924 + }, + { + "epoch": 0.5389985736806546, + "grad_norm": 1.8146899938583374, + "learning_rate": 4.61169596484811e-06, + "loss": 0.8347, + "step": 26925 + }, + { + "epoch": 0.5390185921977829, + "grad_norm": 1.071004867553711, + "learning_rate": 4.611372761745591e-06, + "loss": 0.3034, + "step": 26926 + }, + { + "epoch": 0.5390386107149113, + "grad_norm": 1.033921480178833, + "learning_rate": 4.611049560276775e-06, + "loss": 0.2881, + "step": 26927 + }, + { + "epoch": 0.5390586292320396, + "grad_norm": 1.0727503299713135, + "learning_rate": 4.6107263604430225e-06, + "loss": 0.3041, + "step": 26928 + }, + { + "epoch": 0.539078647749168, + "grad_norm": 1.0661653280258179, + "learning_rate": 4.610403162245695e-06, + "loss": 0.2479, + "step": 26929 + }, + { + "epoch": 0.5390986662662963, + "grad_norm": 1.0020043849945068, + "learning_rate": 4.610079965686147e-06, + "loss": 0.2808, + "step": 26930 + }, + { + "epoch": 0.5391186847834246, + "grad_norm": 1.0436009168624878, + "learning_rate": 4.60975677076574e-06, + "loss": 0.2816, + "step": 26931 + }, + { + "epoch": 0.539138703300553, + "grad_norm": 1.7143021821975708, + "learning_rate": 4.609433577485829e-06, + "loss": 0.7269, + "step": 26932 + }, + { + "epoch": 0.5391587218176813, + "grad_norm": 1.8532081842422485, + "learning_rate": 4.609110385847778e-06, + "loss": 0.7557, + "step": 26933 + }, + { + "epoch": 0.5391787403348097, + "grad_norm": 1.1492031812667847, + "learning_rate": 4.608787195852942e-06, + "loss": 0.346, + "step": 26934 + }, + { + "epoch": 0.539198758851938, + "grad_norm": 1.027497410774231, + "learning_rate": 4.608464007502681e-06, + "loss": 0.2999, + "step": 26935 + }, + { + "epoch": 0.5392187773690664, + "grad_norm": 1.1940470933914185, + "learning_rate": 4.6081408207983525e-06, + "loss": 0.3186, + "step": 26936 + }, + { + "epoch": 0.5392387958861947, + "grad_norm": 1.3945719003677368, + "learning_rate": 4.607817635741314e-06, + "loss": 0.3258, + "step": 26937 + }, + { + "epoch": 0.5392588144033231, + "grad_norm": 1.0954382419586182, + "learning_rate": 4.607494452332928e-06, + "loss": 0.3192, + "step": 26938 + }, + { + "epoch": 0.5392788329204514, + "grad_norm": 1.4064000844955444, + "learning_rate": 4.607171270574551e-06, + "loss": 0.3303, + "step": 26939 + }, + { + "epoch": 0.5392988514375797, + "grad_norm": 1.0757960081100464, + "learning_rate": 4.606848090467541e-06, + "loss": 0.3074, + "step": 26940 + }, + { + "epoch": 0.5393188699547081, + "grad_norm": 1.1829817295074463, + "learning_rate": 4.606524912013255e-06, + "loss": 0.2925, + "step": 26941 + }, + { + "epoch": 0.5393388884718364, + "grad_norm": 1.0455604791641235, + "learning_rate": 4.606201735213056e-06, + "loss": 0.3033, + "step": 26942 + }, + { + "epoch": 0.5393589069889648, + "grad_norm": 1.1202166080474854, + "learning_rate": 4.605878560068299e-06, + "loss": 0.2712, + "step": 26943 + }, + { + "epoch": 0.5393789255060931, + "grad_norm": 1.0533074140548706, + "learning_rate": 4.605555386580345e-06, + "loss": 0.251, + "step": 26944 + }, + { + "epoch": 0.5393989440232215, + "grad_norm": 1.0832453966140747, + "learning_rate": 4.605232214750549e-06, + "loss": 0.3535, + "step": 26945 + }, + { + "epoch": 0.5394189625403498, + "grad_norm": 1.1322710514068604, + "learning_rate": 4.604909044580273e-06, + "loss": 0.3522, + "step": 26946 + }, + { + "epoch": 0.5394389810574781, + "grad_norm": 1.159133791923523, + "learning_rate": 4.604585876070876e-06, + "loss": 0.2954, + "step": 26947 + }, + { + "epoch": 0.5394589995746065, + "grad_norm": 1.844046711921692, + "learning_rate": 4.604262709223713e-06, + "loss": 0.8041, + "step": 26948 + }, + { + "epoch": 0.5394790180917348, + "grad_norm": 1.2187275886535645, + "learning_rate": 4.6039395440401455e-06, + "loss": 0.2812, + "step": 26949 + }, + { + "epoch": 0.5394990366088632, + "grad_norm": 1.9318791627883911, + "learning_rate": 4.603616380521528e-06, + "loss": 0.8167, + "step": 26950 + }, + { + "epoch": 0.5395190551259915, + "grad_norm": 1.1362606287002563, + "learning_rate": 4.603293218669226e-06, + "loss": 0.3185, + "step": 26951 + }, + { + "epoch": 0.5395390736431199, + "grad_norm": 1.0225549936294556, + "learning_rate": 4.602970058484593e-06, + "loss": 0.2915, + "step": 26952 + }, + { + "epoch": 0.5395590921602482, + "grad_norm": 1.1718424558639526, + "learning_rate": 4.602646899968988e-06, + "loss": 0.3171, + "step": 26953 + }, + { + "epoch": 0.5395791106773766, + "grad_norm": 1.1851445436477661, + "learning_rate": 4.602323743123768e-06, + "loss": 0.356, + "step": 26954 + }, + { + "epoch": 0.5395991291945049, + "grad_norm": 1.1590523719787598, + "learning_rate": 4.602000587950295e-06, + "loss": 0.318, + "step": 26955 + }, + { + "epoch": 0.5396191477116332, + "grad_norm": 1.1723731756210327, + "learning_rate": 4.601677434449926e-06, + "loss": 0.2959, + "step": 26956 + }, + { + "epoch": 0.5396391662287616, + "grad_norm": 1.1167325973510742, + "learning_rate": 4.601354282624019e-06, + "loss": 0.3098, + "step": 26957 + }, + { + "epoch": 0.5396591847458899, + "grad_norm": 1.1011930704116821, + "learning_rate": 4.601031132473934e-06, + "loss": 0.2988, + "step": 26958 + }, + { + "epoch": 0.5396792032630183, + "grad_norm": 1.1720691919326782, + "learning_rate": 4.600707984001026e-06, + "loss": 0.3206, + "step": 26959 + }, + { + "epoch": 0.5396992217801466, + "grad_norm": 0.9661186933517456, + "learning_rate": 4.600384837206657e-06, + "loss": 0.2677, + "step": 26960 + }, + { + "epoch": 0.539719240297275, + "grad_norm": 1.1621352434158325, + "learning_rate": 4.600061692092184e-06, + "loss": 0.2936, + "step": 26961 + }, + { + "epoch": 0.5397392588144033, + "grad_norm": 1.2699271440505981, + "learning_rate": 4.599738548658966e-06, + "loss": 0.3319, + "step": 26962 + }, + { + "epoch": 0.5397592773315316, + "grad_norm": 1.1531472206115723, + "learning_rate": 4.599415406908359e-06, + "loss": 0.3271, + "step": 26963 + }, + { + "epoch": 0.53977929584866, + "grad_norm": 1.131325602531433, + "learning_rate": 4.599092266841726e-06, + "loss": 0.3311, + "step": 26964 + }, + { + "epoch": 0.5397993143657883, + "grad_norm": 1.0706026554107666, + "learning_rate": 4.598769128460422e-06, + "loss": 0.2942, + "step": 26965 + }, + { + "epoch": 0.5398193328829167, + "grad_norm": 1.1847586631774902, + "learning_rate": 4.598445991765807e-06, + "loss": 0.3112, + "step": 26966 + }, + { + "epoch": 0.539839351400045, + "grad_norm": 1.077877402305603, + "learning_rate": 4.598122856759236e-06, + "loss": 0.2849, + "step": 26967 + }, + { + "epoch": 0.5398593699171734, + "grad_norm": 1.1515179872512817, + "learning_rate": 4.597799723442073e-06, + "loss": 0.3333, + "step": 26968 + }, + { + "epoch": 0.5398793884343017, + "grad_norm": 1.1942410469055176, + "learning_rate": 4.5974765918156715e-06, + "loss": 0.2741, + "step": 26969 + }, + { + "epoch": 0.5398994069514301, + "grad_norm": 1.0700632333755493, + "learning_rate": 4.597153461881393e-06, + "loss": 0.3255, + "step": 26970 + }, + { + "epoch": 0.5399194254685584, + "grad_norm": 1.0791677236557007, + "learning_rate": 4.596830333640595e-06, + "loss": 0.2983, + "step": 26971 + }, + { + "epoch": 0.5399394439856867, + "grad_norm": 1.9220502376556396, + "learning_rate": 4.5965072070946334e-06, + "loss": 0.7763, + "step": 26972 + }, + { + "epoch": 0.5399594625028151, + "grad_norm": 1.9864780902862549, + "learning_rate": 4.59618408224487e-06, + "loss": 0.7634, + "step": 26973 + }, + { + "epoch": 0.5399794810199434, + "grad_norm": 1.0198681354522705, + "learning_rate": 4.595860959092662e-06, + "loss": 0.2854, + "step": 26974 + }, + { + "epoch": 0.5399994995370718, + "grad_norm": 1.0805093050003052, + "learning_rate": 4.595537837639368e-06, + "loss": 0.352, + "step": 26975 + }, + { + "epoch": 0.5400195180542001, + "grad_norm": 1.065104603767395, + "learning_rate": 4.595214717886343e-06, + "loss": 0.2998, + "step": 26976 + }, + { + "epoch": 0.5400395365713285, + "grad_norm": 1.3494929075241089, + "learning_rate": 4.594891599834951e-06, + "loss": 0.3091, + "step": 26977 + }, + { + "epoch": 0.5400595550884568, + "grad_norm": 1.190179705619812, + "learning_rate": 4.594568483486548e-06, + "loss": 0.3089, + "step": 26978 + }, + { + "epoch": 0.5400795736055851, + "grad_norm": 1.0448088645935059, + "learning_rate": 4.594245368842491e-06, + "loss": 0.2872, + "step": 26979 + }, + { + "epoch": 0.5400995921227135, + "grad_norm": 1.0156673192977905, + "learning_rate": 4.593922255904138e-06, + "loss": 0.314, + "step": 26980 + }, + { + "epoch": 0.5401196106398418, + "grad_norm": 1.0588023662567139, + "learning_rate": 4.59359914467285e-06, + "loss": 0.2695, + "step": 26981 + }, + { + "epoch": 0.5401396291569702, + "grad_norm": 1.9432125091552734, + "learning_rate": 4.5932760351499826e-06, + "loss": 0.8324, + "step": 26982 + }, + { + "epoch": 0.5401596476740985, + "grad_norm": 1.2848942279815674, + "learning_rate": 4.592952927336896e-06, + "loss": 0.3342, + "step": 26983 + }, + { + "epoch": 0.5401796661912269, + "grad_norm": 1.8458465337753296, + "learning_rate": 4.592629821234949e-06, + "loss": 0.8017, + "step": 26984 + }, + { + "epoch": 0.5401996847083552, + "grad_norm": 1.2855379581451416, + "learning_rate": 4.592306716845495e-06, + "loss": 0.3019, + "step": 26985 + }, + { + "epoch": 0.5402197032254836, + "grad_norm": 1.0392082929611206, + "learning_rate": 4.591983614169898e-06, + "loss": 0.2732, + "step": 26986 + }, + { + "epoch": 0.5402397217426119, + "grad_norm": 1.077776312828064, + "learning_rate": 4.591660513209513e-06, + "loss": 0.3163, + "step": 26987 + }, + { + "epoch": 0.5402597402597402, + "grad_norm": 1.1350351572036743, + "learning_rate": 4.591337413965702e-06, + "loss": 0.3538, + "step": 26988 + }, + { + "epoch": 0.5402797587768686, + "grad_norm": 1.1100484132766724, + "learning_rate": 4.591014316439819e-06, + "loss": 0.2994, + "step": 26989 + }, + { + "epoch": 0.5402997772939969, + "grad_norm": 1.0916637182235718, + "learning_rate": 4.590691220633222e-06, + "loss": 0.3313, + "step": 26990 + }, + { + "epoch": 0.5403197958111253, + "grad_norm": 1.0653431415557861, + "learning_rate": 4.590368126547273e-06, + "loss": 0.315, + "step": 26991 + }, + { + "epoch": 0.5403398143282536, + "grad_norm": 1.0778796672821045, + "learning_rate": 4.590045034183329e-06, + "loss": 0.2919, + "step": 26992 + }, + { + "epoch": 0.540359832845382, + "grad_norm": 1.0625700950622559, + "learning_rate": 4.589721943542746e-06, + "loss": 0.3037, + "step": 26993 + }, + { + "epoch": 0.5403798513625103, + "grad_norm": 0.9922909140586853, + "learning_rate": 4.589398854626883e-06, + "loss": 0.3144, + "step": 26994 + }, + { + "epoch": 0.5403998698796386, + "grad_norm": 1.1375123262405396, + "learning_rate": 4.589075767437099e-06, + "loss": 0.2871, + "step": 26995 + }, + { + "epoch": 0.540419888396767, + "grad_norm": 1.0780495405197144, + "learning_rate": 4.588752681974754e-06, + "loss": 0.3161, + "step": 26996 + }, + { + "epoch": 0.5404399069138953, + "grad_norm": 1.1604979038238525, + "learning_rate": 4.5884295982412045e-06, + "loss": 0.3003, + "step": 26997 + }, + { + "epoch": 0.5404599254310237, + "grad_norm": 1.1042689085006714, + "learning_rate": 4.588106516237805e-06, + "loss": 0.2615, + "step": 26998 + }, + { + "epoch": 0.540479943948152, + "grad_norm": 1.0894594192504883, + "learning_rate": 4.58778343596592e-06, + "loss": 0.3049, + "step": 26999 + }, + { + "epoch": 0.5404999624652804, + "grad_norm": 1.0046515464782715, + "learning_rate": 4.587460357426903e-06, + "loss": 0.2582, + "step": 27000 + }, + { + "epoch": 0.5405199809824087, + "grad_norm": 1.2509886026382446, + "learning_rate": 4.5871372806221154e-06, + "loss": 0.3106, + "step": 27001 + }, + { + "epoch": 0.5405399994995371, + "grad_norm": 1.0358294248580933, + "learning_rate": 4.586814205552914e-06, + "loss": 0.3064, + "step": 27002 + }, + { + "epoch": 0.5405600180166654, + "grad_norm": 1.1413251161575317, + "learning_rate": 4.586491132220654e-06, + "loss": 0.369, + "step": 27003 + }, + { + "epoch": 0.5405800365337937, + "grad_norm": 1.1628304719924927, + "learning_rate": 4.586168060626699e-06, + "loss": 0.3518, + "step": 27004 + }, + { + "epoch": 0.5406000550509221, + "grad_norm": 1.0848206281661987, + "learning_rate": 4.5858449907724036e-06, + "loss": 0.2756, + "step": 27005 + }, + { + "epoch": 0.5406200735680504, + "grad_norm": 1.1172571182250977, + "learning_rate": 4.585521922659126e-06, + "loss": 0.3308, + "step": 27006 + }, + { + "epoch": 0.5406400920851788, + "grad_norm": 1.1880325078964233, + "learning_rate": 4.585198856288226e-06, + "loss": 0.3264, + "step": 27007 + }, + { + "epoch": 0.5406601106023071, + "grad_norm": 1.1142350435256958, + "learning_rate": 4.58487579166106e-06, + "loss": 0.3293, + "step": 27008 + }, + { + "epoch": 0.5406801291194355, + "grad_norm": 1.297067642211914, + "learning_rate": 4.584552728778988e-06, + "loss": 0.3327, + "step": 27009 + }, + { + "epoch": 0.5407001476365638, + "grad_norm": 1.040509581565857, + "learning_rate": 4.584229667643367e-06, + "loss": 0.3084, + "step": 27010 + }, + { + "epoch": 0.5407201661536921, + "grad_norm": 1.0428781509399414, + "learning_rate": 4.583906608255555e-06, + "loss": 0.3027, + "step": 27011 + }, + { + "epoch": 0.5407401846708205, + "grad_norm": 1.05126953125, + "learning_rate": 4.583583550616907e-06, + "loss": 0.2742, + "step": 27012 + }, + { + "epoch": 0.5407602031879488, + "grad_norm": 1.075568675994873, + "learning_rate": 4.583260494728786e-06, + "loss": 0.2637, + "step": 27013 + }, + { + "epoch": 0.5407802217050772, + "grad_norm": 0.9542490839958191, + "learning_rate": 4.58293744059255e-06, + "loss": 0.2667, + "step": 27014 + }, + { + "epoch": 0.5408002402222055, + "grad_norm": 1.0503793954849243, + "learning_rate": 4.582614388209554e-06, + "loss": 0.3486, + "step": 27015 + }, + { + "epoch": 0.5408202587393339, + "grad_norm": 1.1565048694610596, + "learning_rate": 4.582291337581156e-06, + "loss": 0.3732, + "step": 27016 + }, + { + "epoch": 0.5408402772564622, + "grad_norm": 1.2173149585723877, + "learning_rate": 4.581968288708717e-06, + "loss": 0.3077, + "step": 27017 + }, + { + "epoch": 0.5408602957735905, + "grad_norm": 1.149780511856079, + "learning_rate": 4.5816452415935935e-06, + "loss": 0.2947, + "step": 27018 + }, + { + "epoch": 0.5408803142907189, + "grad_norm": 1.1414748430252075, + "learning_rate": 4.581322196237142e-06, + "loss": 0.3056, + "step": 27019 + }, + { + "epoch": 0.5409003328078472, + "grad_norm": 1.144161343574524, + "learning_rate": 4.580999152640723e-06, + "loss": 0.2937, + "step": 27020 + }, + { + "epoch": 0.5409203513249756, + "grad_norm": 1.1363576650619507, + "learning_rate": 4.580676110805692e-06, + "loss": 0.3469, + "step": 27021 + }, + { + "epoch": 0.5409403698421039, + "grad_norm": 1.075121521949768, + "learning_rate": 4.58035307073341e-06, + "loss": 0.2982, + "step": 27022 + }, + { + "epoch": 0.5409603883592323, + "grad_norm": 1.0508593320846558, + "learning_rate": 4.580030032425233e-06, + "loss": 0.2799, + "step": 27023 + }, + { + "epoch": 0.5409804068763606, + "grad_norm": 1.0606026649475098, + "learning_rate": 4.57970699588252e-06, + "loss": 0.3201, + "step": 27024 + }, + { + "epoch": 0.541000425393489, + "grad_norm": 1.130028486251831, + "learning_rate": 4.579383961106626e-06, + "loss": 0.3488, + "step": 27025 + }, + { + "epoch": 0.5410204439106173, + "grad_norm": 1.1491596698760986, + "learning_rate": 4.579060928098913e-06, + "loss": 0.2698, + "step": 27026 + }, + { + "epoch": 0.5410404624277456, + "grad_norm": 1.1932635307312012, + "learning_rate": 4.578737896860737e-06, + "loss": 0.3444, + "step": 27027 + }, + { + "epoch": 0.541060480944874, + "grad_norm": 1.0490621328353882, + "learning_rate": 4.578414867393457e-06, + "loss": 0.3119, + "step": 27028 + }, + { + "epoch": 0.5410804994620023, + "grad_norm": 1.0755616426467896, + "learning_rate": 4.578091839698428e-06, + "loss": 0.2812, + "step": 27029 + }, + { + "epoch": 0.5411005179791307, + "grad_norm": 1.8993154764175415, + "learning_rate": 4.577768813777012e-06, + "loss": 0.7546, + "step": 27030 + }, + { + "epoch": 0.541120536496259, + "grad_norm": 1.0331437587738037, + "learning_rate": 4.577445789630566e-06, + "loss": 0.3256, + "step": 27031 + }, + { + "epoch": 0.5411405550133874, + "grad_norm": 1.2856099605560303, + "learning_rate": 4.577122767260445e-06, + "loss": 0.3435, + "step": 27032 + }, + { + "epoch": 0.5411605735305157, + "grad_norm": 1.0699256658554077, + "learning_rate": 4.57679974666801e-06, + "loss": 0.3081, + "step": 27033 + }, + { + "epoch": 0.541180592047644, + "grad_norm": 1.1390615701675415, + "learning_rate": 4.576476727854615e-06, + "loss": 0.314, + "step": 27034 + }, + { + "epoch": 0.5412006105647724, + "grad_norm": 1.8008298873901367, + "learning_rate": 4.576153710821624e-06, + "loss": 0.7801, + "step": 27035 + }, + { + "epoch": 0.5412206290819007, + "grad_norm": 1.117007851600647, + "learning_rate": 4.5758306955703915e-06, + "loss": 0.3436, + "step": 27036 + }, + { + "epoch": 0.5412406475990291, + "grad_norm": 1.0302238464355469, + "learning_rate": 4.575507682102275e-06, + "loss": 0.2457, + "step": 27037 + }, + { + "epoch": 0.5412606661161574, + "grad_norm": 1.947775959968567, + "learning_rate": 4.575184670418631e-06, + "loss": 0.7636, + "step": 27038 + }, + { + "epoch": 0.5412806846332858, + "grad_norm": 0.9588804841041565, + "learning_rate": 4.57486166052082e-06, + "loss": 0.2726, + "step": 27039 + }, + { + "epoch": 0.5413007031504141, + "grad_norm": 1.0449656248092651, + "learning_rate": 4.5745386524101994e-06, + "loss": 0.3252, + "step": 27040 + }, + { + "epoch": 0.5413207216675425, + "grad_norm": 1.1763856410980225, + "learning_rate": 4.574215646088128e-06, + "loss": 0.331, + "step": 27041 + }, + { + "epoch": 0.5413407401846708, + "grad_norm": 1.0843514204025269, + "learning_rate": 4.573892641555962e-06, + "loss": 0.287, + "step": 27042 + }, + { + "epoch": 0.5413607587017991, + "grad_norm": 1.0651319026947021, + "learning_rate": 4.573569638815058e-06, + "loss": 0.3043, + "step": 27043 + }, + { + "epoch": 0.5413807772189275, + "grad_norm": 0.9752780795097351, + "learning_rate": 4.5732466378667775e-06, + "loss": 0.2437, + "step": 27044 + }, + { + "epoch": 0.5414007957360558, + "grad_norm": 1.1005946397781372, + "learning_rate": 4.572923638712474e-06, + "loss": 0.2978, + "step": 27045 + }, + { + "epoch": 0.5414208142531842, + "grad_norm": 1.0100213289260864, + "learning_rate": 4.57260064135351e-06, + "loss": 0.3408, + "step": 27046 + }, + { + "epoch": 0.5414408327703125, + "grad_norm": 0.9693484902381897, + "learning_rate": 4.572277645791238e-06, + "loss": 0.3063, + "step": 27047 + }, + { + "epoch": 0.5414608512874409, + "grad_norm": 1.0752129554748535, + "learning_rate": 4.571954652027021e-06, + "loss": 0.2713, + "step": 27048 + }, + { + "epoch": 0.5414808698045692, + "grad_norm": 1.1753841638565063, + "learning_rate": 4.571631660062215e-06, + "loss": 0.3309, + "step": 27049 + }, + { + "epoch": 0.5415008883216975, + "grad_norm": 1.0617672204971313, + "learning_rate": 4.571308669898177e-06, + "loss": 0.2578, + "step": 27050 + }, + { + "epoch": 0.5415209068388259, + "grad_norm": 1.080234408378601, + "learning_rate": 4.570985681536265e-06, + "loss": 0.3151, + "step": 27051 + }, + { + "epoch": 0.5415409253559542, + "grad_norm": 1.8982857465744019, + "learning_rate": 4.570662694977834e-06, + "loss": 0.8416, + "step": 27052 + }, + { + "epoch": 0.5415609438730826, + "grad_norm": 1.1225990056991577, + "learning_rate": 4.570339710224248e-06, + "loss": 0.3318, + "step": 27053 + }, + { + "epoch": 0.5415809623902109, + "grad_norm": 1.093163013458252, + "learning_rate": 4.570016727276861e-06, + "loss": 0.3302, + "step": 27054 + }, + { + "epoch": 0.5416009809073393, + "grad_norm": 1.1882332563400269, + "learning_rate": 4.569693746137031e-06, + "loss": 0.3011, + "step": 27055 + }, + { + "epoch": 0.5416209994244676, + "grad_norm": 1.1388553380966187, + "learning_rate": 4.569370766806114e-06, + "loss": 0.2997, + "step": 27056 + }, + { + "epoch": 0.541641017941596, + "grad_norm": 1.1405669450759888, + "learning_rate": 4.5690477892854715e-06, + "loss": 0.3245, + "step": 27057 + }, + { + "epoch": 0.5416610364587243, + "grad_norm": 1.121901273727417, + "learning_rate": 4.5687248135764585e-06, + "loss": 0.2903, + "step": 27058 + }, + { + "epoch": 0.5416810549758526, + "grad_norm": 1.2924256324768066, + "learning_rate": 4.568401839680435e-06, + "loss": 0.3326, + "step": 27059 + }, + { + "epoch": 0.541701073492981, + "grad_norm": 1.9982600212097168, + "learning_rate": 4.568078867598755e-06, + "loss": 0.74, + "step": 27060 + }, + { + "epoch": 0.5417210920101093, + "grad_norm": 1.1592713594436646, + "learning_rate": 4.56775589733278e-06, + "loss": 0.3272, + "step": 27061 + }, + { + "epoch": 0.5417411105272377, + "grad_norm": 1.1622817516326904, + "learning_rate": 4.567432928883867e-06, + "loss": 0.2924, + "step": 27062 + }, + { + "epoch": 0.541761129044366, + "grad_norm": 1.8505367040634155, + "learning_rate": 4.567109962253372e-06, + "loss": 0.7806, + "step": 27063 + }, + { + "epoch": 0.5417811475614944, + "grad_norm": 1.1191859245300293, + "learning_rate": 4.566786997442653e-06, + "loss": 0.2797, + "step": 27064 + }, + { + "epoch": 0.5418011660786227, + "grad_norm": 1.1730725765228271, + "learning_rate": 4.566464034453068e-06, + "loss": 0.3357, + "step": 27065 + }, + { + "epoch": 0.541821184595751, + "grad_norm": 0.9871603846549988, + "learning_rate": 4.566141073285976e-06, + "loss": 0.3085, + "step": 27066 + }, + { + "epoch": 0.5418412031128794, + "grad_norm": 1.1983959674835205, + "learning_rate": 4.565818113942734e-06, + "loss": 0.3378, + "step": 27067 + }, + { + "epoch": 0.5418612216300077, + "grad_norm": 1.3036255836486816, + "learning_rate": 4.5654951564246985e-06, + "loss": 0.3006, + "step": 27068 + }, + { + "epoch": 0.5418812401471361, + "grad_norm": 1.0836786031723022, + "learning_rate": 4.565172200733226e-06, + "loss": 0.3347, + "step": 27069 + }, + { + "epoch": 0.5419012586642644, + "grad_norm": 1.089155912399292, + "learning_rate": 4.564849246869678e-06, + "loss": 0.3104, + "step": 27070 + }, + { + "epoch": 0.5419212771813928, + "grad_norm": 1.0669481754302979, + "learning_rate": 4.56452629483541e-06, + "loss": 0.3217, + "step": 27071 + }, + { + "epoch": 0.5419412956985211, + "grad_norm": 1.066283941268921, + "learning_rate": 4.5642033446317796e-06, + "loss": 0.3122, + "step": 27072 + }, + { + "epoch": 0.5419613142156495, + "grad_norm": 1.1376830339431763, + "learning_rate": 4.563880396260145e-06, + "loss": 0.3293, + "step": 27073 + }, + { + "epoch": 0.5419813327327778, + "grad_norm": 1.1792380809783936, + "learning_rate": 4.563557449721862e-06, + "loss": 0.3252, + "step": 27074 + }, + { + "epoch": 0.5420013512499061, + "grad_norm": 1.1067352294921875, + "learning_rate": 4.563234505018291e-06, + "loss": 0.3127, + "step": 27075 + }, + { + "epoch": 0.5420213697670345, + "grad_norm": 1.088444471359253, + "learning_rate": 4.562911562150788e-06, + "loss": 0.2842, + "step": 27076 + }, + { + "epoch": 0.5420413882841628, + "grad_norm": 1.106930136680603, + "learning_rate": 4.562588621120709e-06, + "loss": 0.3, + "step": 27077 + }, + { + "epoch": 0.5420614068012912, + "grad_norm": 1.1362894773483276, + "learning_rate": 4.5622656819294135e-06, + "loss": 0.3098, + "step": 27078 + }, + { + "epoch": 0.5420814253184195, + "grad_norm": 1.0851296186447144, + "learning_rate": 4.5619427445782604e-06, + "loss": 0.286, + "step": 27079 + }, + { + "epoch": 0.5421014438355479, + "grad_norm": 1.219663381576538, + "learning_rate": 4.561619809068606e-06, + "loss": 0.3308, + "step": 27080 + }, + { + "epoch": 0.5421214623526762, + "grad_norm": 1.1136043071746826, + "learning_rate": 4.5612968754018075e-06, + "loss": 0.2986, + "step": 27081 + }, + { + "epoch": 0.5421414808698045, + "grad_norm": 1.0836237668991089, + "learning_rate": 4.5609739435792215e-06, + "loss": 0.2384, + "step": 27082 + }, + { + "epoch": 0.5421614993869329, + "grad_norm": 1.1291933059692383, + "learning_rate": 4.560651013602206e-06, + "loss": 0.3605, + "step": 27083 + }, + { + "epoch": 0.5421815179040612, + "grad_norm": 1.0739718675613403, + "learning_rate": 4.56032808547212e-06, + "loss": 0.3462, + "step": 27084 + }, + { + "epoch": 0.5422015364211896, + "grad_norm": 1.2108197212219238, + "learning_rate": 4.560005159190321e-06, + "loss": 0.2939, + "step": 27085 + }, + { + "epoch": 0.5422215549383179, + "grad_norm": 1.1027050018310547, + "learning_rate": 4.559682234758165e-06, + "loss": 0.3351, + "step": 27086 + }, + { + "epoch": 0.5422415734554463, + "grad_norm": 1.1295925378799438, + "learning_rate": 4.559359312177008e-06, + "loss": 0.2556, + "step": 27087 + }, + { + "epoch": 0.5422615919725746, + "grad_norm": 1.0531529188156128, + "learning_rate": 4.559036391448213e-06, + "loss": 0.3372, + "step": 27088 + }, + { + "epoch": 0.542281610489703, + "grad_norm": 1.0814790725708008, + "learning_rate": 4.558713472573134e-06, + "loss": 0.2721, + "step": 27089 + }, + { + "epoch": 0.5423016290068313, + "grad_norm": 1.047603726387024, + "learning_rate": 4.558390555553126e-06, + "loss": 0.2653, + "step": 27090 + }, + { + "epoch": 0.5423216475239596, + "grad_norm": 1.1558680534362793, + "learning_rate": 4.558067640389549e-06, + "loss": 0.2898, + "step": 27091 + }, + { + "epoch": 0.542341666041088, + "grad_norm": 1.068454623222351, + "learning_rate": 4.557744727083762e-06, + "loss": 0.293, + "step": 27092 + }, + { + "epoch": 0.5423616845582163, + "grad_norm": 0.9884923696517944, + "learning_rate": 4.557421815637122e-06, + "loss": 0.3079, + "step": 27093 + }, + { + "epoch": 0.5423817030753447, + "grad_norm": 1.1031845808029175, + "learning_rate": 4.557098906050985e-06, + "loss": 0.3393, + "step": 27094 + }, + { + "epoch": 0.542401721592473, + "grad_norm": 1.2395614385604858, + "learning_rate": 4.556775998326707e-06, + "loss": 0.2769, + "step": 27095 + }, + { + "epoch": 0.5424217401096014, + "grad_norm": 1.0311124324798584, + "learning_rate": 4.556453092465649e-06, + "loss": 0.2627, + "step": 27096 + }, + { + "epoch": 0.5424417586267297, + "grad_norm": 1.1756374835968018, + "learning_rate": 4.556130188469166e-06, + "loss": 0.3078, + "step": 27097 + }, + { + "epoch": 0.542461777143858, + "grad_norm": 1.272520899772644, + "learning_rate": 4.555807286338616e-06, + "loss": 0.3392, + "step": 27098 + }, + { + "epoch": 0.5424817956609864, + "grad_norm": 1.0790914297103882, + "learning_rate": 4.555484386075358e-06, + "loss": 0.3009, + "step": 27099 + }, + { + "epoch": 0.5425018141781147, + "grad_norm": 1.16499924659729, + "learning_rate": 4.555161487680746e-06, + "loss": 0.3344, + "step": 27100 + }, + { + "epoch": 0.5425218326952431, + "grad_norm": 1.0764843225479126, + "learning_rate": 4.55483859115614e-06, + "loss": 0.2861, + "step": 27101 + }, + { + "epoch": 0.5425418512123714, + "grad_norm": 1.0472244024276733, + "learning_rate": 4.554515696502898e-06, + "loss": 0.3198, + "step": 27102 + }, + { + "epoch": 0.5425618697294998, + "grad_norm": 1.0133417844772339, + "learning_rate": 4.554192803722375e-06, + "loss": 0.2977, + "step": 27103 + }, + { + "epoch": 0.5425818882466281, + "grad_norm": 1.1072818040847778, + "learning_rate": 4.553869912815931e-06, + "loss": 0.2888, + "step": 27104 + }, + { + "epoch": 0.5426019067637565, + "grad_norm": 1.0959798097610474, + "learning_rate": 4.553547023784918e-06, + "loss": 0.2959, + "step": 27105 + }, + { + "epoch": 0.5426219252808848, + "grad_norm": 1.1612735986709595, + "learning_rate": 4.553224136630701e-06, + "loss": 0.3113, + "step": 27106 + }, + { + "epoch": 0.5426419437980131, + "grad_norm": 1.0866411924362183, + "learning_rate": 4.552901251354633e-06, + "loss": 0.3384, + "step": 27107 + }, + { + "epoch": 0.5426619623151415, + "grad_norm": 1.2025805711746216, + "learning_rate": 4.552578367958071e-06, + "loss": 0.3615, + "step": 27108 + }, + { + "epoch": 0.5426819808322698, + "grad_norm": 1.0922297239303589, + "learning_rate": 4.552255486442373e-06, + "loss": 0.3502, + "step": 27109 + }, + { + "epoch": 0.5427019993493982, + "grad_norm": 1.8049288988113403, + "learning_rate": 4.5519326068088965e-06, + "loss": 0.77, + "step": 27110 + }, + { + "epoch": 0.5427220178665265, + "grad_norm": 1.122925043106079, + "learning_rate": 4.551609729059001e-06, + "loss": 0.2609, + "step": 27111 + }, + { + "epoch": 0.5427420363836549, + "grad_norm": 1.9328958988189697, + "learning_rate": 4.5512868531940404e-06, + "loss": 0.8032, + "step": 27112 + }, + { + "epoch": 0.5427620549007832, + "grad_norm": 1.0446091890335083, + "learning_rate": 4.550963979215372e-06, + "loss": 0.2836, + "step": 27113 + }, + { + "epoch": 0.5427820734179115, + "grad_norm": 1.0882375240325928, + "learning_rate": 4.5506411071243565e-06, + "loss": 0.3034, + "step": 27114 + }, + { + "epoch": 0.5428020919350399, + "grad_norm": 1.183711290359497, + "learning_rate": 4.550318236922348e-06, + "loss": 0.3099, + "step": 27115 + }, + { + "epoch": 0.5428221104521682, + "grad_norm": 1.0748335123062134, + "learning_rate": 4.549995368610705e-06, + "loss": 0.3309, + "step": 27116 + }, + { + "epoch": 0.5428421289692966, + "grad_norm": 1.043184757232666, + "learning_rate": 4.549672502190785e-06, + "loss": 0.2984, + "step": 27117 + }, + { + "epoch": 0.5428621474864249, + "grad_norm": 1.1125835180282593, + "learning_rate": 4.5493496376639436e-06, + "loss": 0.351, + "step": 27118 + }, + { + "epoch": 0.5428821660035533, + "grad_norm": 1.1509181261062622, + "learning_rate": 4.54902677503154e-06, + "loss": 0.3082, + "step": 27119 + }, + { + "epoch": 0.5429021845206816, + "grad_norm": 1.108511209487915, + "learning_rate": 4.548703914294932e-06, + "loss": 0.2937, + "step": 27120 + }, + { + "epoch": 0.54292220303781, + "grad_norm": 1.8900068998336792, + "learning_rate": 4.548381055455475e-06, + "loss": 0.8227, + "step": 27121 + }, + { + "epoch": 0.5429422215549383, + "grad_norm": 1.9835141897201538, + "learning_rate": 4.548058198514526e-06, + "loss": 0.7746, + "step": 27122 + }, + { + "epoch": 0.5429622400720666, + "grad_norm": 1.096388816833496, + "learning_rate": 4.547735343473443e-06, + "loss": 0.3272, + "step": 27123 + }, + { + "epoch": 0.542982258589195, + "grad_norm": 1.1797254085540771, + "learning_rate": 4.5474124903335845e-06, + "loss": 0.3353, + "step": 27124 + }, + { + "epoch": 0.5430022771063233, + "grad_norm": 1.1261593103408813, + "learning_rate": 4.547089639096306e-06, + "loss": 0.3487, + "step": 27125 + }, + { + "epoch": 0.5430222956234517, + "grad_norm": 1.1884899139404297, + "learning_rate": 4.5467667897629666e-06, + "loss": 0.3143, + "step": 27126 + }, + { + "epoch": 0.54304231414058, + "grad_norm": 1.1612884998321533, + "learning_rate": 4.546443942334919e-06, + "loss": 0.3286, + "step": 27127 + }, + { + "epoch": 0.5430623326577084, + "grad_norm": 1.0961707830429077, + "learning_rate": 4.546121096813526e-06, + "loss": 0.3092, + "step": 27128 + }, + { + "epoch": 0.5430823511748367, + "grad_norm": 1.0873490571975708, + "learning_rate": 4.54579825320014e-06, + "loss": 0.2683, + "step": 27129 + }, + { + "epoch": 0.543102369691965, + "grad_norm": 1.9248579740524292, + "learning_rate": 4.545475411496122e-06, + "loss": 0.773, + "step": 27130 + }, + { + "epoch": 0.5431223882090934, + "grad_norm": 1.017706036567688, + "learning_rate": 4.545152571702826e-06, + "loss": 0.3469, + "step": 27131 + }, + { + "epoch": 0.5431424067262217, + "grad_norm": 1.1533464193344116, + "learning_rate": 4.544829733821613e-06, + "loss": 0.3324, + "step": 27132 + }, + { + "epoch": 0.5431624252433501, + "grad_norm": 1.0346208810806274, + "learning_rate": 4.544506897853837e-06, + "loss": 0.3064, + "step": 27133 + }, + { + "epoch": 0.5431824437604784, + "grad_norm": 1.2191413640975952, + "learning_rate": 4.544184063800855e-06, + "loss": 0.2898, + "step": 27134 + }, + { + "epoch": 0.5432024622776068, + "grad_norm": 1.8227527141571045, + "learning_rate": 4.5438612316640276e-06, + "loss": 0.7633, + "step": 27135 + }, + { + "epoch": 0.5432224807947351, + "grad_norm": 1.220854640007019, + "learning_rate": 4.543538401444706e-06, + "loss": 0.3308, + "step": 27136 + }, + { + "epoch": 0.5432424993118635, + "grad_norm": 1.133686900138855, + "learning_rate": 4.543215573144253e-06, + "loss": 0.307, + "step": 27137 + }, + { + "epoch": 0.5432625178289918, + "grad_norm": 1.7813937664031982, + "learning_rate": 4.542892746764024e-06, + "loss": 0.7682, + "step": 27138 + }, + { + "epoch": 0.5432825363461201, + "grad_norm": 1.2102124691009521, + "learning_rate": 4.542569922305374e-06, + "loss": 0.3154, + "step": 27139 + }, + { + "epoch": 0.5433025548632485, + "grad_norm": 1.1741085052490234, + "learning_rate": 4.542247099769661e-06, + "loss": 0.356, + "step": 27140 + }, + { + "epoch": 0.5433225733803768, + "grad_norm": 1.0291939973831177, + "learning_rate": 4.541924279158244e-06, + "loss": 0.2741, + "step": 27141 + }, + { + "epoch": 0.5433425918975052, + "grad_norm": 1.0699611902236938, + "learning_rate": 4.541601460472478e-06, + "loss": 0.2922, + "step": 27142 + }, + { + "epoch": 0.5433626104146335, + "grad_norm": 1.2624837160110474, + "learning_rate": 4.5412786437137216e-06, + "loss": 0.337, + "step": 27143 + }, + { + "epoch": 0.5433826289317619, + "grad_norm": 1.3320978879928589, + "learning_rate": 4.54095582888333e-06, + "loss": 0.3136, + "step": 27144 + }, + { + "epoch": 0.5434026474488902, + "grad_norm": 1.9883190393447876, + "learning_rate": 4.540633015982662e-06, + "loss": 0.8375, + "step": 27145 + }, + { + "epoch": 0.5434226659660185, + "grad_norm": 1.2946726083755493, + "learning_rate": 4.540310205013074e-06, + "loss": 0.3354, + "step": 27146 + }, + { + "epoch": 0.5434426844831469, + "grad_norm": 1.192818284034729, + "learning_rate": 4.539987395975923e-06, + "loss": 0.2875, + "step": 27147 + }, + { + "epoch": 0.5434627030002752, + "grad_norm": 1.1401067972183228, + "learning_rate": 4.5396645888725665e-06, + "loss": 0.3001, + "step": 27148 + }, + { + "epoch": 0.5434827215174036, + "grad_norm": 0.9609905481338501, + "learning_rate": 4.539341783704359e-06, + "loss": 0.2661, + "step": 27149 + }, + { + "epoch": 0.5435027400345319, + "grad_norm": 0.9824905395507812, + "learning_rate": 4.539018980472661e-06, + "loss": 0.3039, + "step": 27150 + }, + { + "epoch": 0.5435227585516603, + "grad_norm": 1.1740580797195435, + "learning_rate": 4.538696179178829e-06, + "loss": 0.3086, + "step": 27151 + }, + { + "epoch": 0.5435427770687886, + "grad_norm": 1.4175437688827515, + "learning_rate": 4.538373379824219e-06, + "loss": 0.2704, + "step": 27152 + }, + { + "epoch": 0.543562795585917, + "grad_norm": 1.8424514532089233, + "learning_rate": 4.538050582410185e-06, + "loss": 0.7629, + "step": 27153 + }, + { + "epoch": 0.5435828141030453, + "grad_norm": 1.0504871606826782, + "learning_rate": 4.537727786938089e-06, + "loss": 0.3207, + "step": 27154 + }, + { + "epoch": 0.5436028326201736, + "grad_norm": 2.067098379135132, + "learning_rate": 4.537404993409285e-06, + "loss": 0.7876, + "step": 27155 + }, + { + "epoch": 0.543622851137302, + "grad_norm": 1.1958637237548828, + "learning_rate": 4.537082201825132e-06, + "loss": 0.352, + "step": 27156 + }, + { + "epoch": 0.5436428696544303, + "grad_norm": 1.2210476398468018, + "learning_rate": 4.536759412186987e-06, + "loss": 0.3316, + "step": 27157 + }, + { + "epoch": 0.5436628881715587, + "grad_norm": 1.148196816444397, + "learning_rate": 4.536436624496202e-06, + "loss": 0.3081, + "step": 27158 + }, + { + "epoch": 0.543682906688687, + "grad_norm": 1.179622769355774, + "learning_rate": 4.536113838754141e-06, + "loss": 0.335, + "step": 27159 + }, + { + "epoch": 0.5437029252058154, + "grad_norm": 1.0376485586166382, + "learning_rate": 4.5357910549621555e-06, + "loss": 0.3208, + "step": 27160 + }, + { + "epoch": 0.5437229437229437, + "grad_norm": 1.0234134197235107, + "learning_rate": 4.5354682731216064e-06, + "loss": 0.3294, + "step": 27161 + }, + { + "epoch": 0.543742962240072, + "grad_norm": 1.1244159936904907, + "learning_rate": 4.535145493233846e-06, + "loss": 0.2879, + "step": 27162 + }, + { + "epoch": 0.5437629807572004, + "grad_norm": 1.0109392404556274, + "learning_rate": 4.5348227153002364e-06, + "loss": 0.3365, + "step": 27163 + }, + { + "epoch": 0.5437829992743287, + "grad_norm": 1.1005401611328125, + "learning_rate": 4.534499939322132e-06, + "loss": 0.349, + "step": 27164 + }, + { + "epoch": 0.5438030177914571, + "grad_norm": 2.045106887817383, + "learning_rate": 4.53417716530089e-06, + "loss": 0.7624, + "step": 27165 + }, + { + "epoch": 0.5438230363085854, + "grad_norm": 1.0552375316619873, + "learning_rate": 4.533854393237865e-06, + "loss": 0.3161, + "step": 27166 + }, + { + "epoch": 0.5438430548257138, + "grad_norm": 1.03106689453125, + "learning_rate": 4.533531623134416e-06, + "loss": 0.2446, + "step": 27167 + }, + { + "epoch": 0.5438630733428421, + "grad_norm": 1.0552338361740112, + "learning_rate": 4.5332088549918995e-06, + "loss": 0.3001, + "step": 27168 + }, + { + "epoch": 0.5438830918599705, + "grad_norm": 1.2792277336120605, + "learning_rate": 4.532886088811674e-06, + "loss": 0.2941, + "step": 27169 + }, + { + "epoch": 0.5439031103770988, + "grad_norm": 1.1528831720352173, + "learning_rate": 4.532563324595095e-06, + "loss": 0.3348, + "step": 27170 + }, + { + "epoch": 0.5439231288942271, + "grad_norm": 0.9840940237045288, + "learning_rate": 4.532240562343517e-06, + "loss": 0.2589, + "step": 27171 + }, + { + "epoch": 0.5439431474113555, + "grad_norm": 1.2391300201416016, + "learning_rate": 4.531917802058301e-06, + "loss": 0.325, + "step": 27172 + }, + { + "epoch": 0.5439631659284838, + "grad_norm": 1.3487164974212646, + "learning_rate": 4.5315950437408005e-06, + "loss": 0.3259, + "step": 27173 + }, + { + "epoch": 0.5439831844456122, + "grad_norm": 1.0481849908828735, + "learning_rate": 4.531272287392375e-06, + "loss": 0.3313, + "step": 27174 + }, + { + "epoch": 0.5440032029627405, + "grad_norm": 1.1815268993377686, + "learning_rate": 4.530949533014378e-06, + "loss": 0.3063, + "step": 27175 + }, + { + "epoch": 0.5440232214798689, + "grad_norm": 1.0553936958312988, + "learning_rate": 4.53062678060817e-06, + "loss": 0.2941, + "step": 27176 + }, + { + "epoch": 0.5440432399969972, + "grad_norm": 1.301134705543518, + "learning_rate": 4.530304030175106e-06, + "loss": 0.3049, + "step": 27177 + }, + { + "epoch": 0.5440632585141255, + "grad_norm": 1.1007431745529175, + "learning_rate": 4.529981281716543e-06, + "loss": 0.306, + "step": 27178 + }, + { + "epoch": 0.5440832770312539, + "grad_norm": 1.0512317419052124, + "learning_rate": 4.529658535233836e-06, + "loss": 0.3246, + "step": 27179 + }, + { + "epoch": 0.5441032955483822, + "grad_norm": 1.1993557214736938, + "learning_rate": 4.529335790728345e-06, + "loss": 0.3257, + "step": 27180 + }, + { + "epoch": 0.5441233140655106, + "grad_norm": 1.0270124673843384, + "learning_rate": 4.529013048201423e-06, + "loss": 0.2822, + "step": 27181 + }, + { + "epoch": 0.5441433325826389, + "grad_norm": 1.0289219617843628, + "learning_rate": 4.528690307654431e-06, + "loss": 0.2848, + "step": 27182 + }, + { + "epoch": 0.5441633510997673, + "grad_norm": 1.117078423500061, + "learning_rate": 4.528367569088723e-06, + "loss": 0.3374, + "step": 27183 + }, + { + "epoch": 0.5441833696168956, + "grad_norm": 1.0915859937667847, + "learning_rate": 4.528044832505654e-06, + "loss": 0.2726, + "step": 27184 + }, + { + "epoch": 0.544203388134024, + "grad_norm": 1.0162996053695679, + "learning_rate": 4.527722097906585e-06, + "loss": 0.2668, + "step": 27185 + }, + { + "epoch": 0.5442234066511523, + "grad_norm": 1.2287144660949707, + "learning_rate": 4.52739936529287e-06, + "loss": 0.3084, + "step": 27186 + }, + { + "epoch": 0.5442434251682806, + "grad_norm": 1.0388613939285278, + "learning_rate": 4.527076634665867e-06, + "loss": 0.3425, + "step": 27187 + }, + { + "epoch": 0.544263443685409, + "grad_norm": 1.1747117042541504, + "learning_rate": 4.526753906026933e-06, + "loss": 0.341, + "step": 27188 + }, + { + "epoch": 0.5442834622025373, + "grad_norm": 1.0064648389816284, + "learning_rate": 4.526431179377421e-06, + "loss": 0.277, + "step": 27189 + }, + { + "epoch": 0.5443034807196657, + "grad_norm": 0.9801376461982727, + "learning_rate": 4.526108454718692e-06, + "loss": 0.2842, + "step": 27190 + }, + { + "epoch": 0.544323499236794, + "grad_norm": 1.1122573614120483, + "learning_rate": 4.525785732052102e-06, + "loss": 0.2903, + "step": 27191 + }, + { + "epoch": 0.5443435177539224, + "grad_norm": 1.9425708055496216, + "learning_rate": 4.525463011379006e-06, + "loss": 0.8429, + "step": 27192 + }, + { + "epoch": 0.5443635362710507, + "grad_norm": 1.0012547969818115, + "learning_rate": 4.52514029270076e-06, + "loss": 0.2999, + "step": 27193 + }, + { + "epoch": 0.544383554788179, + "grad_norm": 1.013414978981018, + "learning_rate": 4.524817576018723e-06, + "loss": 0.3082, + "step": 27194 + }, + { + "epoch": 0.5444035733053074, + "grad_norm": 1.127160668373108, + "learning_rate": 4.5244948613342506e-06, + "loss": 0.391, + "step": 27195 + }, + { + "epoch": 0.5444235918224357, + "grad_norm": 1.1119509935379028, + "learning_rate": 4.5241721486487e-06, + "loss": 0.3426, + "step": 27196 + }, + { + "epoch": 0.5444436103395641, + "grad_norm": 1.0456984043121338, + "learning_rate": 4.523849437963428e-06, + "loss": 0.3077, + "step": 27197 + }, + { + "epoch": 0.5444636288566924, + "grad_norm": 1.150251030921936, + "learning_rate": 4.523526729279788e-06, + "loss": 0.2895, + "step": 27198 + }, + { + "epoch": 0.5444836473738208, + "grad_norm": 1.0778062343597412, + "learning_rate": 4.52320402259914e-06, + "loss": 0.3311, + "step": 27199 + }, + { + "epoch": 0.5445036658909491, + "grad_norm": 1.0241612195968628, + "learning_rate": 4.522881317922841e-06, + "loss": 0.2832, + "step": 27200 + }, + { + "epoch": 0.5445236844080775, + "grad_norm": 1.2063323259353638, + "learning_rate": 4.5225586152522464e-06, + "loss": 0.3523, + "step": 27201 + }, + { + "epoch": 0.5445437029252058, + "grad_norm": 1.0358991622924805, + "learning_rate": 4.52223591458871e-06, + "loss": 0.2892, + "step": 27202 + }, + { + "epoch": 0.5445637214423341, + "grad_norm": 1.117919683456421, + "learning_rate": 4.521913215933593e-06, + "loss": 0.308, + "step": 27203 + }, + { + "epoch": 0.5445837399594625, + "grad_norm": 1.1007097959518433, + "learning_rate": 4.521590519288251e-06, + "loss": 0.3095, + "step": 27204 + }, + { + "epoch": 0.5446037584765908, + "grad_norm": 1.2416906356811523, + "learning_rate": 4.521267824654038e-06, + "loss": 0.2875, + "step": 27205 + }, + { + "epoch": 0.5446237769937192, + "grad_norm": 1.0833691358566284, + "learning_rate": 4.520945132032311e-06, + "loss": 0.2848, + "step": 27206 + }, + { + "epoch": 0.5446437955108475, + "grad_norm": 1.1753469705581665, + "learning_rate": 4.520622441424428e-06, + "loss": 0.2975, + "step": 27207 + }, + { + "epoch": 0.5446638140279759, + "grad_norm": 1.1894420385360718, + "learning_rate": 4.520299752831747e-06, + "loss": 0.2883, + "step": 27208 + }, + { + "epoch": 0.5446838325451042, + "grad_norm": 1.0087931156158447, + "learning_rate": 4.519977066255622e-06, + "loss": 0.2737, + "step": 27209 + }, + { + "epoch": 0.5447038510622325, + "grad_norm": 1.1560676097869873, + "learning_rate": 4.51965438169741e-06, + "loss": 0.2826, + "step": 27210 + }, + { + "epoch": 0.5447238695793609, + "grad_norm": 1.2418270111083984, + "learning_rate": 4.5193316991584654e-06, + "loss": 0.3231, + "step": 27211 + }, + { + "epoch": 0.5447438880964892, + "grad_norm": 1.0970408916473389, + "learning_rate": 4.519009018640148e-06, + "loss": 0.2561, + "step": 27212 + }, + { + "epoch": 0.5447639066136176, + "grad_norm": 1.1027143001556396, + "learning_rate": 4.518686340143816e-06, + "loss": 0.3222, + "step": 27213 + }, + { + "epoch": 0.5447839251307459, + "grad_norm": 1.1779847145080566, + "learning_rate": 4.51836366367082e-06, + "loss": 0.3379, + "step": 27214 + }, + { + "epoch": 0.5448039436478743, + "grad_norm": 1.0759754180908203, + "learning_rate": 4.51804098922252e-06, + "loss": 0.305, + "step": 27215 + }, + { + "epoch": 0.5448239621650026, + "grad_norm": 1.9032576084136963, + "learning_rate": 4.517718316800272e-06, + "loss": 0.8389, + "step": 27216 + }, + { + "epoch": 0.544843980682131, + "grad_norm": 1.1686925888061523, + "learning_rate": 4.517395646405434e-06, + "loss": 0.2795, + "step": 27217 + }, + { + "epoch": 0.5448639991992593, + "grad_norm": 1.0829933881759644, + "learning_rate": 4.5170729780393594e-06, + "loss": 0.3121, + "step": 27218 + }, + { + "epoch": 0.5448840177163876, + "grad_norm": 1.2635196447372437, + "learning_rate": 4.516750311703407e-06, + "loss": 0.332, + "step": 27219 + }, + { + "epoch": 0.544904036233516, + "grad_norm": 0.969291090965271, + "learning_rate": 4.51642764739893e-06, + "loss": 0.281, + "step": 27220 + }, + { + "epoch": 0.5449240547506443, + "grad_norm": 1.1023049354553223, + "learning_rate": 4.516104985127289e-06, + "loss": 0.2928, + "step": 27221 + }, + { + "epoch": 0.5449440732677727, + "grad_norm": 1.163461685180664, + "learning_rate": 4.515782324889839e-06, + "loss": 0.2753, + "step": 27222 + }, + { + "epoch": 0.544964091784901, + "grad_norm": 2.0871777534484863, + "learning_rate": 4.515459666687936e-06, + "loss": 0.7281, + "step": 27223 + }, + { + "epoch": 0.5449841103020294, + "grad_norm": 1.9578266143798828, + "learning_rate": 4.515137010522934e-06, + "loss": 0.7839, + "step": 27224 + }, + { + "epoch": 0.5450041288191577, + "grad_norm": 1.0381730794906616, + "learning_rate": 4.514814356396192e-06, + "loss": 0.2925, + "step": 27225 + }, + { + "epoch": 0.545024147336286, + "grad_norm": 1.8604637384414673, + "learning_rate": 4.514491704309068e-06, + "loss": 0.7324, + "step": 27226 + }, + { + "epoch": 0.5450441658534144, + "grad_norm": 1.025296926498413, + "learning_rate": 4.514169054262916e-06, + "loss": 0.2985, + "step": 27227 + }, + { + "epoch": 0.5450641843705427, + "grad_norm": 1.934380292892456, + "learning_rate": 4.513846406259093e-06, + "loss": 0.7798, + "step": 27228 + }, + { + "epoch": 0.5450842028876711, + "grad_norm": 1.0733586549758911, + "learning_rate": 4.5135237602989525e-06, + "loss": 0.2753, + "step": 27229 + }, + { + "epoch": 0.5451042214047994, + "grad_norm": 1.0406440496444702, + "learning_rate": 4.513201116383856e-06, + "loss": 0.314, + "step": 27230 + }, + { + "epoch": 0.5451242399219278, + "grad_norm": 1.051971197128296, + "learning_rate": 4.512878474515156e-06, + "loss": 0.2831, + "step": 27231 + }, + { + "epoch": 0.5451442584390561, + "grad_norm": 1.1756844520568848, + "learning_rate": 4.512555834694211e-06, + "loss": 0.3003, + "step": 27232 + }, + { + "epoch": 0.5451642769561845, + "grad_norm": 1.3979297876358032, + "learning_rate": 4.512233196922374e-06, + "loss": 0.3646, + "step": 27233 + }, + { + "epoch": 0.5451842954733128, + "grad_norm": 1.198603868484497, + "learning_rate": 4.5119105612010065e-06, + "loss": 0.3104, + "step": 27234 + }, + { + "epoch": 0.5452043139904411, + "grad_norm": 0.927551805973053, + "learning_rate": 4.5115879275314624e-06, + "loss": 0.2677, + "step": 27235 + }, + { + "epoch": 0.5452243325075695, + "grad_norm": 1.175838589668274, + "learning_rate": 4.511265295915097e-06, + "loss": 0.3074, + "step": 27236 + }, + { + "epoch": 0.5452443510246978, + "grad_norm": 1.122750163078308, + "learning_rate": 4.510942666353264e-06, + "loss": 0.3418, + "step": 27237 + }, + { + "epoch": 0.5452643695418262, + "grad_norm": 1.1355808973312378, + "learning_rate": 4.510620038847325e-06, + "loss": 0.3193, + "step": 27238 + }, + { + "epoch": 0.5452843880589545, + "grad_norm": 1.0896590948104858, + "learning_rate": 4.5102974133986345e-06, + "loss": 0.3283, + "step": 27239 + }, + { + "epoch": 0.5453044065760829, + "grad_norm": 1.0883923768997192, + "learning_rate": 4.5099747900085485e-06, + "loss": 0.3427, + "step": 27240 + }, + { + "epoch": 0.5453244250932112, + "grad_norm": 1.2021998167037964, + "learning_rate": 4.509652168678424e-06, + "loss": 0.2977, + "step": 27241 + }, + { + "epoch": 0.5453444436103395, + "grad_norm": 1.0786854028701782, + "learning_rate": 4.509329549409612e-06, + "loss": 0.2847, + "step": 27242 + }, + { + "epoch": 0.5453644621274679, + "grad_norm": 1.0693423748016357, + "learning_rate": 4.5090069322034765e-06, + "loss": 0.3335, + "step": 27243 + }, + { + "epoch": 0.5453844806445962, + "grad_norm": 1.9891825914382935, + "learning_rate": 4.5086843170613695e-06, + "loss": 0.7301, + "step": 27244 + }, + { + "epoch": 0.5454044991617246, + "grad_norm": 1.0426706075668335, + "learning_rate": 4.508361703984648e-06, + "loss": 0.2922, + "step": 27245 + }, + { + "epoch": 0.5454245176788529, + "grad_norm": 1.1695060729980469, + "learning_rate": 4.508039092974666e-06, + "loss": 0.2863, + "step": 27246 + }, + { + "epoch": 0.5454445361959813, + "grad_norm": 1.2379133701324463, + "learning_rate": 4.5077164840327845e-06, + "loss": 0.3219, + "step": 27247 + }, + { + "epoch": 0.5454645547131096, + "grad_norm": 1.043603777885437, + "learning_rate": 4.507393877160357e-06, + "loss": 0.3264, + "step": 27248 + }, + { + "epoch": 0.545484573230238, + "grad_norm": 1.0477838516235352, + "learning_rate": 4.507071272358739e-06, + "loss": 0.3204, + "step": 27249 + }, + { + "epoch": 0.5455045917473663, + "grad_norm": 1.2368634939193726, + "learning_rate": 4.506748669629287e-06, + "loss": 0.3314, + "step": 27250 + }, + { + "epoch": 0.5455246102644946, + "grad_norm": 1.1758478879928589, + "learning_rate": 4.506426068973356e-06, + "loss": 0.2771, + "step": 27251 + }, + { + "epoch": 0.545544628781623, + "grad_norm": 1.2149587869644165, + "learning_rate": 4.506103470392306e-06, + "loss": 0.3236, + "step": 27252 + }, + { + "epoch": 0.5455646472987513, + "grad_norm": 1.3028886318206787, + "learning_rate": 4.50578087388749e-06, + "loss": 0.3465, + "step": 27253 + }, + { + "epoch": 0.5455846658158797, + "grad_norm": 1.194625735282898, + "learning_rate": 4.505458279460265e-06, + "loss": 0.3347, + "step": 27254 + }, + { + "epoch": 0.545604684333008, + "grad_norm": 1.1005584001541138, + "learning_rate": 4.505135687111985e-06, + "loss": 0.2982, + "step": 27255 + }, + { + "epoch": 0.5456247028501364, + "grad_norm": 1.1345773935317993, + "learning_rate": 4.5048130968440104e-06, + "loss": 0.3613, + "step": 27256 + }, + { + "epoch": 0.5456447213672647, + "grad_norm": 1.0981829166412354, + "learning_rate": 4.504490508657694e-06, + "loss": 0.3269, + "step": 27257 + }, + { + "epoch": 0.545664739884393, + "grad_norm": 1.1348214149475098, + "learning_rate": 4.5041679225543936e-06, + "loss": 0.3056, + "step": 27258 + }, + { + "epoch": 0.5456847584015214, + "grad_norm": 1.1506863832473755, + "learning_rate": 4.503845338535463e-06, + "loss": 0.3058, + "step": 27259 + }, + { + "epoch": 0.5457047769186497, + "grad_norm": 1.1704738140106201, + "learning_rate": 4.503522756602262e-06, + "loss": 0.2922, + "step": 27260 + }, + { + "epoch": 0.5457247954357781, + "grad_norm": 1.943738341331482, + "learning_rate": 4.503200176756143e-06, + "loss": 0.7269, + "step": 27261 + }, + { + "epoch": 0.5457448139529064, + "grad_norm": 1.2313816547393799, + "learning_rate": 4.5028775989984655e-06, + "loss": 0.3259, + "step": 27262 + }, + { + "epoch": 0.5457648324700348, + "grad_norm": 1.1785625219345093, + "learning_rate": 4.502555023330582e-06, + "loss": 0.289, + "step": 27263 + }, + { + "epoch": 0.5457848509871631, + "grad_norm": 1.3089854717254639, + "learning_rate": 4.502232449753848e-06, + "loss": 0.3112, + "step": 27264 + }, + { + "epoch": 0.5458048695042915, + "grad_norm": 1.0541785955429077, + "learning_rate": 4.501909878269624e-06, + "loss": 0.3087, + "step": 27265 + }, + { + "epoch": 0.5458248880214198, + "grad_norm": 1.056536316871643, + "learning_rate": 4.501587308879265e-06, + "loss": 0.2696, + "step": 27266 + }, + { + "epoch": 0.5458449065385481, + "grad_norm": 1.1465011835098267, + "learning_rate": 4.501264741584124e-06, + "loss": 0.2523, + "step": 27267 + }, + { + "epoch": 0.5458649250556765, + "grad_norm": 1.111759066581726, + "learning_rate": 4.500942176385558e-06, + "loss": 0.3155, + "step": 27268 + }, + { + "epoch": 0.5458849435728048, + "grad_norm": 1.2098273038864136, + "learning_rate": 4.500619613284925e-06, + "loss": 0.3556, + "step": 27269 + }, + { + "epoch": 0.5459049620899332, + "grad_norm": 1.0692960023880005, + "learning_rate": 4.500297052283579e-06, + "loss": 0.3308, + "step": 27270 + }, + { + "epoch": 0.5459249806070615, + "grad_norm": 1.2502394914627075, + "learning_rate": 4.499974493382877e-06, + "loss": 0.2951, + "step": 27271 + }, + { + "epoch": 0.5459449991241899, + "grad_norm": 1.1370534896850586, + "learning_rate": 4.499651936584175e-06, + "loss": 0.2993, + "step": 27272 + }, + { + "epoch": 0.5459650176413182, + "grad_norm": 1.134567379951477, + "learning_rate": 4.499329381888827e-06, + "loss": 0.3173, + "step": 27273 + }, + { + "epoch": 0.5459850361584465, + "grad_norm": 1.0379786491394043, + "learning_rate": 4.499006829298193e-06, + "loss": 0.3027, + "step": 27274 + }, + { + "epoch": 0.5460050546755749, + "grad_norm": 1.2511669397354126, + "learning_rate": 4.498684278813626e-06, + "loss": 0.3185, + "step": 27275 + }, + { + "epoch": 0.5460250731927032, + "grad_norm": 1.2884565591812134, + "learning_rate": 4.49836173043648e-06, + "loss": 0.3185, + "step": 27276 + }, + { + "epoch": 0.5460450917098316, + "grad_norm": 1.1711102724075317, + "learning_rate": 4.498039184168114e-06, + "loss": 0.3209, + "step": 27277 + }, + { + "epoch": 0.5460651102269599, + "grad_norm": 1.1283601522445679, + "learning_rate": 4.497716640009884e-06, + "loss": 0.322, + "step": 27278 + }, + { + "epoch": 0.5460851287440883, + "grad_norm": 1.2309647798538208, + "learning_rate": 4.497394097963146e-06, + "loss": 0.279, + "step": 27279 + }, + { + "epoch": 0.5461051472612166, + "grad_norm": 1.1339476108551025, + "learning_rate": 4.497071558029255e-06, + "loss": 0.3043, + "step": 27280 + }, + { + "epoch": 0.546125165778345, + "grad_norm": 1.0781877040863037, + "learning_rate": 4.496749020209566e-06, + "loss": 0.2786, + "step": 27281 + }, + { + "epoch": 0.5461451842954733, + "grad_norm": 1.0198917388916016, + "learning_rate": 4.496426484505435e-06, + "loss": 0.2799, + "step": 27282 + }, + { + "epoch": 0.5461652028126016, + "grad_norm": 1.245138168334961, + "learning_rate": 4.4961039509182195e-06, + "loss": 0.2949, + "step": 27283 + }, + { + "epoch": 0.54618522132973, + "grad_norm": 1.1245533227920532, + "learning_rate": 4.495781419449276e-06, + "loss": 0.3551, + "step": 27284 + }, + { + "epoch": 0.5462052398468583, + "grad_norm": 1.2264870405197144, + "learning_rate": 4.4954588900999585e-06, + "loss": 0.2969, + "step": 27285 + }, + { + "epoch": 0.5462252583639867, + "grad_norm": 1.0763615369796753, + "learning_rate": 4.4951363628716206e-06, + "loss": 0.3153, + "step": 27286 + }, + { + "epoch": 0.546245276881115, + "grad_norm": 1.1421043872833252, + "learning_rate": 4.494813837765624e-06, + "loss": 0.3112, + "step": 27287 + }, + { + "epoch": 0.5462652953982434, + "grad_norm": 1.938918948173523, + "learning_rate": 4.49449131478332e-06, + "loss": 0.783, + "step": 27288 + }, + { + "epoch": 0.5462853139153717, + "grad_norm": 1.8352938890457153, + "learning_rate": 4.494168793926065e-06, + "loss": 0.7167, + "step": 27289 + }, + { + "epoch": 0.5463053324325, + "grad_norm": 1.110305905342102, + "learning_rate": 4.493846275195215e-06, + "loss": 0.3741, + "step": 27290 + }, + { + "epoch": 0.5463253509496284, + "grad_norm": 1.134617567062378, + "learning_rate": 4.493523758592128e-06, + "loss": 0.3079, + "step": 27291 + }, + { + "epoch": 0.5463453694667567, + "grad_norm": 0.9944906830787659, + "learning_rate": 4.493201244118158e-06, + "loss": 0.281, + "step": 27292 + }, + { + "epoch": 0.5463653879838851, + "grad_norm": 1.0778276920318604, + "learning_rate": 4.4928787317746614e-06, + "loss": 0.3037, + "step": 27293 + }, + { + "epoch": 0.5463854065010134, + "grad_norm": 1.2270461320877075, + "learning_rate": 4.492556221562993e-06, + "loss": 0.3182, + "step": 27294 + }, + { + "epoch": 0.5464054250181418, + "grad_norm": 1.202942132949829, + "learning_rate": 4.492233713484508e-06, + "loss": 0.3199, + "step": 27295 + }, + { + "epoch": 0.5464254435352701, + "grad_norm": 1.00161612033844, + "learning_rate": 4.491911207540563e-06, + "loss": 0.2765, + "step": 27296 + }, + { + "epoch": 0.5464454620523985, + "grad_norm": 1.1237523555755615, + "learning_rate": 4.4915887037325155e-06, + "loss": 0.3404, + "step": 27297 + }, + { + "epoch": 0.5464654805695268, + "grad_norm": 0.9730547070503235, + "learning_rate": 4.49126620206172e-06, + "loss": 0.2626, + "step": 27298 + }, + { + "epoch": 0.5464854990866551, + "grad_norm": 2.1399223804473877, + "learning_rate": 4.4909437025295286e-06, + "loss": 0.7815, + "step": 27299 + }, + { + "epoch": 0.5465055176037835, + "grad_norm": 1.8630629777908325, + "learning_rate": 4.490621205137304e-06, + "loss": 0.7907, + "step": 27300 + }, + { + "epoch": 0.5465255361209118, + "grad_norm": 1.0766725540161133, + "learning_rate": 4.490298709886397e-06, + "loss": 0.2878, + "step": 27301 + }, + { + "epoch": 0.5465455546380402, + "grad_norm": 1.0966365337371826, + "learning_rate": 4.489976216778164e-06, + "loss": 0.2597, + "step": 27302 + }, + { + "epoch": 0.5465655731551685, + "grad_norm": 1.0514717102050781, + "learning_rate": 4.489653725813962e-06, + "loss": 0.3355, + "step": 27303 + }, + { + "epoch": 0.5465855916722969, + "grad_norm": 1.0202109813690186, + "learning_rate": 4.489331236995145e-06, + "loss": 0.2572, + "step": 27304 + }, + { + "epoch": 0.5466056101894252, + "grad_norm": 1.2089470624923706, + "learning_rate": 4.4890087503230705e-06, + "loss": 0.3076, + "step": 27305 + }, + { + "epoch": 0.5466256287065535, + "grad_norm": 1.021854281425476, + "learning_rate": 4.488686265799093e-06, + "loss": 0.277, + "step": 27306 + }, + { + "epoch": 0.5466456472236819, + "grad_norm": 1.1687829494476318, + "learning_rate": 4.488363783424568e-06, + "loss": 0.3377, + "step": 27307 + }, + { + "epoch": 0.5466656657408102, + "grad_norm": 1.7389209270477295, + "learning_rate": 4.4880413032008515e-06, + "loss": 0.7865, + "step": 27308 + }, + { + "epoch": 0.5466856842579386, + "grad_norm": 1.0048975944519043, + "learning_rate": 4.487718825129299e-06, + "loss": 0.3152, + "step": 27309 + }, + { + "epoch": 0.5467057027750669, + "grad_norm": 1.0345909595489502, + "learning_rate": 4.487396349211268e-06, + "loss": 0.2894, + "step": 27310 + }, + { + "epoch": 0.5467257212921953, + "grad_norm": 1.060637354850769, + "learning_rate": 4.487073875448112e-06, + "loss": 0.3184, + "step": 27311 + }, + { + "epoch": 0.5467457398093236, + "grad_norm": 1.0682841539382935, + "learning_rate": 4.486751403841187e-06, + "loss": 0.3111, + "step": 27312 + }, + { + "epoch": 0.546765758326452, + "grad_norm": 1.9550138711929321, + "learning_rate": 4.486428934391846e-06, + "loss": 0.7905, + "step": 27313 + }, + { + "epoch": 0.5467857768435803, + "grad_norm": 1.1848130226135254, + "learning_rate": 4.48610646710145e-06, + "loss": 0.262, + "step": 27314 + }, + { + "epoch": 0.5468057953607086, + "grad_norm": 1.0557630062103271, + "learning_rate": 4.4857840019713505e-06, + "loss": 0.2941, + "step": 27315 + }, + { + "epoch": 0.546825813877837, + "grad_norm": 1.2733733654022217, + "learning_rate": 4.485461539002905e-06, + "loss": 0.2894, + "step": 27316 + }, + { + "epoch": 0.5468458323949653, + "grad_norm": 1.2622623443603516, + "learning_rate": 4.485139078197467e-06, + "loss": 0.3102, + "step": 27317 + }, + { + "epoch": 0.5468658509120937, + "grad_norm": 1.1978442668914795, + "learning_rate": 4.484816619556396e-06, + "loss": 0.3112, + "step": 27318 + }, + { + "epoch": 0.546885869429222, + "grad_norm": 1.1087791919708252, + "learning_rate": 4.484494163081044e-06, + "loss": 0.3111, + "step": 27319 + }, + { + "epoch": 0.5469058879463504, + "grad_norm": 1.3551380634307861, + "learning_rate": 4.484171708772767e-06, + "loss": 0.2924, + "step": 27320 + }, + { + "epoch": 0.5469259064634787, + "grad_norm": 1.3631913661956787, + "learning_rate": 4.483849256632922e-06, + "loss": 0.3419, + "step": 27321 + }, + { + "epoch": 0.546945924980607, + "grad_norm": 1.1284972429275513, + "learning_rate": 4.483526806662862e-06, + "loss": 0.3019, + "step": 27322 + }, + { + "epoch": 0.5469659434977354, + "grad_norm": 1.9045565128326416, + "learning_rate": 4.483204358863947e-06, + "loss": 0.7922, + "step": 27323 + }, + { + "epoch": 0.5469859620148637, + "grad_norm": 1.7919447422027588, + "learning_rate": 4.482881913237528e-06, + "loss": 0.7738, + "step": 27324 + }, + { + "epoch": 0.5470059805319921, + "grad_norm": 1.235867977142334, + "learning_rate": 4.4825594697849635e-06, + "loss": 0.3072, + "step": 27325 + }, + { + "epoch": 0.5470259990491204, + "grad_norm": 1.1297625303268433, + "learning_rate": 4.482237028507605e-06, + "loss": 0.3201, + "step": 27326 + }, + { + "epoch": 0.5470460175662488, + "grad_norm": 1.1353249549865723, + "learning_rate": 4.481914589406813e-06, + "loss": 0.2891, + "step": 27327 + }, + { + "epoch": 0.5470660360833771, + "grad_norm": 1.2461955547332764, + "learning_rate": 4.481592152483939e-06, + "loss": 0.3346, + "step": 27328 + }, + { + "epoch": 0.5470860546005055, + "grad_norm": 1.179304599761963, + "learning_rate": 4.481269717740341e-06, + "loss": 0.3509, + "step": 27329 + }, + { + "epoch": 0.5471060731176338, + "grad_norm": 1.0962141752243042, + "learning_rate": 4.480947285177371e-06, + "loss": 0.32, + "step": 27330 + }, + { + "epoch": 0.5471260916347621, + "grad_norm": 1.1871676445007324, + "learning_rate": 4.48062485479639e-06, + "loss": 0.2912, + "step": 27331 + }, + { + "epoch": 0.5471461101518905, + "grad_norm": 1.9254955053329468, + "learning_rate": 4.480302426598751e-06, + "loss": 0.7799, + "step": 27332 + }, + { + "epoch": 0.5471661286690188, + "grad_norm": 1.3333947658538818, + "learning_rate": 4.479980000585806e-06, + "loss": 0.2968, + "step": 27333 + }, + { + "epoch": 0.5471861471861472, + "grad_norm": 1.0921263694763184, + "learning_rate": 4.479657576758915e-06, + "loss": 0.3466, + "step": 27334 + }, + { + "epoch": 0.5472061657032755, + "grad_norm": 1.0985374450683594, + "learning_rate": 4.479335155119429e-06, + "loss": 0.3219, + "step": 27335 + }, + { + "epoch": 0.5472261842204039, + "grad_norm": 1.0726776123046875, + "learning_rate": 4.479012735668709e-06, + "loss": 0.3112, + "step": 27336 + }, + { + "epoch": 0.5472462027375322, + "grad_norm": 1.185173511505127, + "learning_rate": 4.478690318408106e-06, + "loss": 0.3532, + "step": 27337 + }, + { + "epoch": 0.5472662212546605, + "grad_norm": 1.0359364748001099, + "learning_rate": 4.478367903338978e-06, + "loss": 0.2809, + "step": 27338 + }, + { + "epoch": 0.5472862397717889, + "grad_norm": 1.1294368505477905, + "learning_rate": 4.478045490462676e-06, + "loss": 0.3426, + "step": 27339 + }, + { + "epoch": 0.5473062582889172, + "grad_norm": 1.107245922088623, + "learning_rate": 4.477723079780562e-06, + "loss": 0.2634, + "step": 27340 + }, + { + "epoch": 0.5473262768060456, + "grad_norm": 1.1811168193817139, + "learning_rate": 4.477400671293985e-06, + "loss": 0.3062, + "step": 27341 + }, + { + "epoch": 0.5473462953231739, + "grad_norm": 1.0569723844528198, + "learning_rate": 4.477078265004305e-06, + "loss": 0.2971, + "step": 27342 + }, + { + "epoch": 0.5473663138403023, + "grad_norm": 1.1305142641067505, + "learning_rate": 4.476755860912875e-06, + "loss": 0.3512, + "step": 27343 + }, + { + "epoch": 0.5473863323574306, + "grad_norm": 1.2703214883804321, + "learning_rate": 4.476433459021049e-06, + "loss": 0.3554, + "step": 27344 + }, + { + "epoch": 0.547406350874559, + "grad_norm": 1.1623154878616333, + "learning_rate": 4.476111059330187e-06, + "loss": 0.3358, + "step": 27345 + }, + { + "epoch": 0.5474263693916873, + "grad_norm": 1.0665427446365356, + "learning_rate": 4.475788661841639e-06, + "loss": 0.3207, + "step": 27346 + }, + { + "epoch": 0.5474463879088156, + "grad_norm": 1.0850051641464233, + "learning_rate": 4.475466266556764e-06, + "loss": 0.2958, + "step": 27347 + }, + { + "epoch": 0.547466406425944, + "grad_norm": 1.2549128532409668, + "learning_rate": 4.475143873476914e-06, + "loss": 0.3075, + "step": 27348 + }, + { + "epoch": 0.5474864249430723, + "grad_norm": 2.048452377319336, + "learning_rate": 4.474821482603449e-06, + "loss": 0.7487, + "step": 27349 + }, + { + "epoch": 0.5475064434602007, + "grad_norm": 1.1480894088745117, + "learning_rate": 4.474499093937721e-06, + "loss": 0.3467, + "step": 27350 + }, + { + "epoch": 0.547526461977329, + "grad_norm": 1.1889472007751465, + "learning_rate": 4.474176707481085e-06, + "loss": 0.284, + "step": 27351 + }, + { + "epoch": 0.5475464804944574, + "grad_norm": 1.020646333694458, + "learning_rate": 4.473854323234895e-06, + "loss": 0.3087, + "step": 27352 + }, + { + "epoch": 0.5475664990115857, + "grad_norm": 1.0839061737060547, + "learning_rate": 4.473531941200511e-06, + "loss": 0.3115, + "step": 27353 + }, + { + "epoch": 0.547586517528714, + "grad_norm": 1.141636610031128, + "learning_rate": 4.473209561379284e-06, + "loss": 0.2848, + "step": 27354 + }, + { + "epoch": 0.5476065360458424, + "grad_norm": 1.1931408643722534, + "learning_rate": 4.472887183772572e-06, + "loss": 0.3339, + "step": 27355 + }, + { + "epoch": 0.5476265545629707, + "grad_norm": 1.166439414024353, + "learning_rate": 4.4725648083817285e-06, + "loss": 0.3345, + "step": 27356 + }, + { + "epoch": 0.5476465730800991, + "grad_norm": 1.1298692226409912, + "learning_rate": 4.472242435208107e-06, + "loss": 0.3512, + "step": 27357 + }, + { + "epoch": 0.5476665915972274, + "grad_norm": 1.1797058582305908, + "learning_rate": 4.471920064253067e-06, + "loss": 0.2936, + "step": 27358 + }, + { + "epoch": 0.5476866101143558, + "grad_norm": 1.0708202123641968, + "learning_rate": 4.471597695517959e-06, + "loss": 0.3318, + "step": 27359 + }, + { + "epoch": 0.5477066286314841, + "grad_norm": 1.0932940244674683, + "learning_rate": 4.471275329004144e-06, + "loss": 0.3373, + "step": 27360 + }, + { + "epoch": 0.5477266471486124, + "grad_norm": 1.065973162651062, + "learning_rate": 4.47095296471297e-06, + "loss": 0.3142, + "step": 27361 + }, + { + "epoch": 0.5477466656657408, + "grad_norm": 1.0168399810791016, + "learning_rate": 4.470630602645798e-06, + "loss": 0.2698, + "step": 27362 + }, + { + "epoch": 0.5477666841828691, + "grad_norm": 1.168777346611023, + "learning_rate": 4.470308242803982e-06, + "loss": 0.2952, + "step": 27363 + }, + { + "epoch": 0.5477867026999975, + "grad_norm": 1.3638333082199097, + "learning_rate": 4.4699858851888756e-06, + "loss": 0.3054, + "step": 27364 + }, + { + "epoch": 0.5478067212171258, + "grad_norm": 0.9791922569274902, + "learning_rate": 4.4696635298018335e-06, + "loss": 0.3128, + "step": 27365 + }, + { + "epoch": 0.5478267397342542, + "grad_norm": 1.0492897033691406, + "learning_rate": 4.469341176644212e-06, + "loss": 0.3116, + "step": 27366 + }, + { + "epoch": 0.5478467582513825, + "grad_norm": 1.0444256067276, + "learning_rate": 4.469018825717366e-06, + "loss": 0.3277, + "step": 27367 + }, + { + "epoch": 0.5478667767685109, + "grad_norm": 1.1518787145614624, + "learning_rate": 4.468696477022651e-06, + "loss": 0.3888, + "step": 27368 + }, + { + "epoch": 0.5478867952856392, + "grad_norm": 1.184441328048706, + "learning_rate": 4.468374130561422e-06, + "loss": 0.2628, + "step": 27369 + }, + { + "epoch": 0.5479068138027675, + "grad_norm": 2.018198013305664, + "learning_rate": 4.468051786335031e-06, + "loss": 0.772, + "step": 27370 + }, + { + "epoch": 0.5479268323198959, + "grad_norm": 1.0374823808670044, + "learning_rate": 4.467729444344839e-06, + "loss": 0.3231, + "step": 27371 + }, + { + "epoch": 0.5479468508370242, + "grad_norm": 1.277428150177002, + "learning_rate": 4.4674071045921964e-06, + "loss": 0.2823, + "step": 27372 + }, + { + "epoch": 0.5479668693541526, + "grad_norm": 1.870848536491394, + "learning_rate": 4.467084767078461e-06, + "loss": 0.7926, + "step": 27373 + }, + { + "epoch": 0.5479868878712809, + "grad_norm": 1.2366621494293213, + "learning_rate": 4.466762431804986e-06, + "loss": 0.2959, + "step": 27374 + }, + { + "epoch": 0.5480069063884093, + "grad_norm": 1.9673420190811157, + "learning_rate": 4.466440098773126e-06, + "loss": 0.7696, + "step": 27375 + }, + { + "epoch": 0.5480269249055376, + "grad_norm": 1.2486423254013062, + "learning_rate": 4.466117767984238e-06, + "loss": 0.3595, + "step": 27376 + }, + { + "epoch": 0.5480469434226659, + "grad_norm": 1.0400484800338745, + "learning_rate": 4.465795439439676e-06, + "loss": 0.2932, + "step": 27377 + }, + { + "epoch": 0.5480669619397943, + "grad_norm": 1.0853151082992554, + "learning_rate": 4.465473113140795e-06, + "loss": 0.2865, + "step": 27378 + }, + { + "epoch": 0.5480869804569226, + "grad_norm": 1.1750519275665283, + "learning_rate": 4.465150789088949e-06, + "loss": 0.3559, + "step": 27379 + }, + { + "epoch": 0.548106998974051, + "grad_norm": 1.1617223024368286, + "learning_rate": 4.464828467285494e-06, + "loss": 0.3091, + "step": 27380 + }, + { + "epoch": 0.5481270174911793, + "grad_norm": 1.1327619552612305, + "learning_rate": 4.464506147731786e-06, + "loss": 0.3112, + "step": 27381 + }, + { + "epoch": 0.5481470360083077, + "grad_norm": 2.017632007598877, + "learning_rate": 4.464183830429179e-06, + "loss": 0.8041, + "step": 27382 + }, + { + "epoch": 0.548167054525436, + "grad_norm": 1.0649476051330566, + "learning_rate": 4.463861515379027e-06, + "loss": 0.3228, + "step": 27383 + }, + { + "epoch": 0.5481870730425644, + "grad_norm": 0.9757369756698608, + "learning_rate": 4.463539202582686e-06, + "loss": 0.2928, + "step": 27384 + }, + { + "epoch": 0.5482070915596927, + "grad_norm": 1.168900966644287, + "learning_rate": 4.463216892041511e-06, + "loss": 0.2884, + "step": 27385 + }, + { + "epoch": 0.548227110076821, + "grad_norm": 1.094698429107666, + "learning_rate": 4.462894583756857e-06, + "loss": 0.318, + "step": 27386 + }, + { + "epoch": 0.5482471285939494, + "grad_norm": 0.9645431041717529, + "learning_rate": 4.462572277730079e-06, + "loss": 0.29, + "step": 27387 + }, + { + "epoch": 0.5482671471110777, + "grad_norm": 1.1265430450439453, + "learning_rate": 4.462249973962529e-06, + "loss": 0.3113, + "step": 27388 + }, + { + "epoch": 0.5482871656282061, + "grad_norm": 1.0101827383041382, + "learning_rate": 4.461927672455567e-06, + "loss": 0.3203, + "step": 27389 + }, + { + "epoch": 0.5483071841453344, + "grad_norm": 1.089026689529419, + "learning_rate": 4.461605373210545e-06, + "loss": 0.3514, + "step": 27390 + }, + { + "epoch": 0.5483272026624628, + "grad_norm": 1.018594741821289, + "learning_rate": 4.4612830762288175e-06, + "loss": 0.3425, + "step": 27391 + }, + { + "epoch": 0.5483472211795911, + "grad_norm": 1.1600536108016968, + "learning_rate": 4.460960781511739e-06, + "loss": 0.3082, + "step": 27392 + }, + { + "epoch": 0.5483672396967194, + "grad_norm": 1.2416353225708008, + "learning_rate": 4.4606384890606665e-06, + "loss": 0.2445, + "step": 27393 + }, + { + "epoch": 0.5483872582138478, + "grad_norm": 1.089136004447937, + "learning_rate": 4.460316198876954e-06, + "loss": 0.2913, + "step": 27394 + }, + { + "epoch": 0.5484072767309761, + "grad_norm": 1.096902847290039, + "learning_rate": 4.4599939109619565e-06, + "loss": 0.3119, + "step": 27395 + }, + { + "epoch": 0.5484272952481045, + "grad_norm": 1.2601642608642578, + "learning_rate": 4.459671625317029e-06, + "loss": 0.3142, + "step": 27396 + }, + { + "epoch": 0.5484473137652328, + "grad_norm": 1.0759894847869873, + "learning_rate": 4.459349341943522e-06, + "loss": 0.2914, + "step": 27397 + }, + { + "epoch": 0.5484673322823612, + "grad_norm": 1.0989911556243896, + "learning_rate": 4.459027060842796e-06, + "loss": 0.3042, + "step": 27398 + }, + { + "epoch": 0.5484873507994895, + "grad_norm": 1.0370744466781616, + "learning_rate": 4.4587047820162045e-06, + "loss": 0.3117, + "step": 27399 + }, + { + "epoch": 0.5485073693166179, + "grad_norm": 1.0982239246368408, + "learning_rate": 4.458382505465102e-06, + "loss": 0.2888, + "step": 27400 + }, + { + "epoch": 0.5485273878337462, + "grad_norm": 1.085814356803894, + "learning_rate": 4.458060231190841e-06, + "loss": 0.2528, + "step": 27401 + }, + { + "epoch": 0.5485474063508745, + "grad_norm": 1.1778573989868164, + "learning_rate": 4.45773795919478e-06, + "loss": 0.3288, + "step": 27402 + }, + { + "epoch": 0.5485674248680029, + "grad_norm": 0.9904807209968567, + "learning_rate": 4.457415689478272e-06, + "loss": 0.2739, + "step": 27403 + }, + { + "epoch": 0.5485874433851312, + "grad_norm": 1.2259482145309448, + "learning_rate": 4.457093422042669e-06, + "loss": 0.3093, + "step": 27404 + }, + { + "epoch": 0.5486074619022596, + "grad_norm": 1.0895248651504517, + "learning_rate": 4.456771156889331e-06, + "loss": 0.316, + "step": 27405 + }, + { + "epoch": 0.5486274804193879, + "grad_norm": 1.0887590646743774, + "learning_rate": 4.456448894019609e-06, + "loss": 0.322, + "step": 27406 + }, + { + "epoch": 0.5486474989365163, + "grad_norm": 1.0843712091445923, + "learning_rate": 4.45612663343486e-06, + "loss": 0.3035, + "step": 27407 + }, + { + "epoch": 0.5486675174536446, + "grad_norm": 1.0778963565826416, + "learning_rate": 4.455804375136438e-06, + "loss": 0.3612, + "step": 27408 + }, + { + "epoch": 0.5486875359707729, + "grad_norm": 1.1374820470809937, + "learning_rate": 4.455482119125696e-06, + "loss": 0.3244, + "step": 27409 + }, + { + "epoch": 0.5487075544879013, + "grad_norm": 1.1619799137115479, + "learning_rate": 4.455159865403989e-06, + "loss": 0.2968, + "step": 27410 + }, + { + "epoch": 0.5487275730050296, + "grad_norm": 1.7617158889770508, + "learning_rate": 4.454837613972673e-06, + "loss": 0.7612, + "step": 27411 + }, + { + "epoch": 0.548747591522158, + "grad_norm": 1.1171435117721558, + "learning_rate": 4.454515364833105e-06, + "loss": 0.2822, + "step": 27412 + }, + { + "epoch": 0.5487676100392863, + "grad_norm": 1.030306339263916, + "learning_rate": 4.454193117986636e-06, + "loss": 0.2755, + "step": 27413 + }, + { + "epoch": 0.5487876285564147, + "grad_norm": 1.1515518426895142, + "learning_rate": 4.453870873434619e-06, + "loss": 0.2725, + "step": 27414 + }, + { + "epoch": 0.548807647073543, + "grad_norm": 1.1838091611862183, + "learning_rate": 4.453548631178414e-06, + "loss": 0.3313, + "step": 27415 + }, + { + "epoch": 0.5488276655906714, + "grad_norm": 1.0642668008804321, + "learning_rate": 4.453226391219373e-06, + "loss": 0.282, + "step": 27416 + }, + { + "epoch": 0.5488476841077997, + "grad_norm": 0.9850391149520874, + "learning_rate": 4.452904153558849e-06, + "loss": 0.2547, + "step": 27417 + }, + { + "epoch": 0.548867702624928, + "grad_norm": 1.1089482307434082, + "learning_rate": 4.452581918198199e-06, + "loss": 0.2857, + "step": 27418 + }, + { + "epoch": 0.5488877211420564, + "grad_norm": 1.1861095428466797, + "learning_rate": 4.452259685138775e-06, + "loss": 0.288, + "step": 27419 + }, + { + "epoch": 0.5489077396591847, + "grad_norm": 0.9348976016044617, + "learning_rate": 4.451937454381937e-06, + "loss": 0.2648, + "step": 27420 + }, + { + "epoch": 0.5489277581763131, + "grad_norm": 1.1021069288253784, + "learning_rate": 4.4516152259290346e-06, + "loss": 0.2952, + "step": 27421 + }, + { + "epoch": 0.5489477766934414, + "grad_norm": 0.9605236649513245, + "learning_rate": 4.451292999781424e-06, + "loss": 0.2622, + "step": 27422 + }, + { + "epoch": 0.5489677952105698, + "grad_norm": 1.155725121498108, + "learning_rate": 4.450970775940457e-06, + "loss": 0.2892, + "step": 27423 + }, + { + "epoch": 0.5489878137276981, + "grad_norm": 1.0451276302337646, + "learning_rate": 4.450648554407493e-06, + "loss": 0.3087, + "step": 27424 + }, + { + "epoch": 0.5490078322448264, + "grad_norm": 1.289013385772705, + "learning_rate": 4.450326335183884e-06, + "loss": 0.3611, + "step": 27425 + }, + { + "epoch": 0.5490278507619548, + "grad_norm": 0.9657692313194275, + "learning_rate": 4.450004118270986e-06, + "loss": 0.2961, + "step": 27426 + }, + { + "epoch": 0.5490478692790831, + "grad_norm": 2.027989625930786, + "learning_rate": 4.449681903670151e-06, + "loss": 0.7922, + "step": 27427 + }, + { + "epoch": 0.5490678877962115, + "grad_norm": 1.0495673418045044, + "learning_rate": 4.449359691382733e-06, + "loss": 0.2866, + "step": 27428 + }, + { + "epoch": 0.5490879063133398, + "grad_norm": 2.013741970062256, + "learning_rate": 4.449037481410091e-06, + "loss": 0.7976, + "step": 27429 + }, + { + "epoch": 0.5491079248304682, + "grad_norm": 1.158820390701294, + "learning_rate": 4.448715273753576e-06, + "loss": 0.3159, + "step": 27430 + }, + { + "epoch": 0.5491279433475965, + "grad_norm": 1.1326502561569214, + "learning_rate": 4.448393068414544e-06, + "loss": 0.348, + "step": 27431 + }, + { + "epoch": 0.5491479618647249, + "grad_norm": 0.9694570899009705, + "learning_rate": 4.4480708653943475e-06, + "loss": 0.3021, + "step": 27432 + }, + { + "epoch": 0.5491679803818532, + "grad_norm": 1.0656791925430298, + "learning_rate": 4.447748664694343e-06, + "loss": 0.3153, + "step": 27433 + }, + { + "epoch": 0.5491879988989815, + "grad_norm": 1.248763918876648, + "learning_rate": 4.447426466315885e-06, + "loss": 0.2791, + "step": 27434 + }, + { + "epoch": 0.5492080174161099, + "grad_norm": 1.161279559135437, + "learning_rate": 4.447104270260328e-06, + "loss": 0.2569, + "step": 27435 + }, + { + "epoch": 0.5492280359332382, + "grad_norm": 1.0997101068496704, + "learning_rate": 4.446782076529022e-06, + "loss": 0.304, + "step": 27436 + }, + { + "epoch": 0.5492480544503666, + "grad_norm": 1.1379761695861816, + "learning_rate": 4.446459885123327e-06, + "loss": 0.3105, + "step": 27437 + }, + { + "epoch": 0.5492680729674949, + "grad_norm": 1.106952428817749, + "learning_rate": 4.446137696044596e-06, + "loss": 0.3173, + "step": 27438 + }, + { + "epoch": 0.5492880914846233, + "grad_norm": 1.085198998451233, + "learning_rate": 4.445815509294184e-06, + "loss": 0.2886, + "step": 27439 + }, + { + "epoch": 0.5493081100017516, + "grad_norm": 1.153154730796814, + "learning_rate": 4.445493324873443e-06, + "loss": 0.3254, + "step": 27440 + }, + { + "epoch": 0.5493281285188799, + "grad_norm": 1.9670730829238892, + "learning_rate": 4.445171142783728e-06, + "loss": 0.7822, + "step": 27441 + }, + { + "epoch": 0.5493481470360083, + "grad_norm": 1.1894619464874268, + "learning_rate": 4.444848963026396e-06, + "loss": 0.3188, + "step": 27442 + }, + { + "epoch": 0.5493681655531366, + "grad_norm": 1.1046632528305054, + "learning_rate": 4.444526785602797e-06, + "loss": 0.349, + "step": 27443 + }, + { + "epoch": 0.549388184070265, + "grad_norm": 1.146077275276184, + "learning_rate": 4.444204610514291e-06, + "loss": 0.3373, + "step": 27444 + }, + { + "epoch": 0.5494082025873933, + "grad_norm": 1.1356916427612305, + "learning_rate": 4.443882437762225e-06, + "loss": 0.2844, + "step": 27445 + }, + { + "epoch": 0.5494282211045217, + "grad_norm": 1.2533655166625977, + "learning_rate": 4.443560267347961e-06, + "loss": 0.3018, + "step": 27446 + }, + { + "epoch": 0.54944823962165, + "grad_norm": 1.1650221347808838, + "learning_rate": 4.44323809927285e-06, + "loss": 0.3274, + "step": 27447 + }, + { + "epoch": 0.5494682581387784, + "grad_norm": 1.8374215364456177, + "learning_rate": 4.442915933538246e-06, + "loss": 0.8294, + "step": 27448 + }, + { + "epoch": 0.5494882766559067, + "grad_norm": 1.0367326736450195, + "learning_rate": 4.442593770145502e-06, + "loss": 0.2758, + "step": 27449 + }, + { + "epoch": 0.549508295173035, + "grad_norm": 1.2150083780288696, + "learning_rate": 4.442271609095973e-06, + "loss": 0.3325, + "step": 27450 + }, + { + "epoch": 0.5495283136901634, + "grad_norm": 1.8036365509033203, + "learning_rate": 4.441949450391016e-06, + "loss": 0.81, + "step": 27451 + }, + { + "epoch": 0.5495483322072917, + "grad_norm": 1.1906862258911133, + "learning_rate": 4.441627294031984e-06, + "loss": 0.3156, + "step": 27452 + }, + { + "epoch": 0.5495683507244201, + "grad_norm": 1.1452010869979858, + "learning_rate": 4.44130514002023e-06, + "loss": 0.2974, + "step": 27453 + }, + { + "epoch": 0.5495883692415484, + "grad_norm": 1.1579228639602661, + "learning_rate": 4.4409829883571074e-06, + "loss": 0.3116, + "step": 27454 + }, + { + "epoch": 0.5496083877586768, + "grad_norm": 1.3895705938339233, + "learning_rate": 4.440660839043974e-06, + "loss": 0.3421, + "step": 27455 + }, + { + "epoch": 0.5496284062758051, + "grad_norm": 1.2207306623458862, + "learning_rate": 4.4403386920821815e-06, + "loss": 0.3276, + "step": 27456 + }, + { + "epoch": 0.5496484247929334, + "grad_norm": 1.168391227722168, + "learning_rate": 4.440016547473085e-06, + "loss": 0.3682, + "step": 27457 + }, + { + "epoch": 0.5496684433100618, + "grad_norm": 1.0685256719589233, + "learning_rate": 4.439694405218038e-06, + "loss": 0.2828, + "step": 27458 + }, + { + "epoch": 0.5496884618271901, + "grad_norm": 1.3526338338851929, + "learning_rate": 4.439372265318394e-06, + "loss": 0.3304, + "step": 27459 + }, + { + "epoch": 0.5497084803443185, + "grad_norm": 1.1036642789840698, + "learning_rate": 4.439050127775511e-06, + "loss": 0.2826, + "step": 27460 + }, + { + "epoch": 0.5497284988614468, + "grad_norm": 1.0582448244094849, + "learning_rate": 4.438727992590739e-06, + "loss": 0.3111, + "step": 27461 + }, + { + "epoch": 0.5497485173785752, + "grad_norm": 1.0909802913665771, + "learning_rate": 4.438405859765433e-06, + "loss": 0.2991, + "step": 27462 + }, + { + "epoch": 0.5497685358957035, + "grad_norm": 1.1224586963653564, + "learning_rate": 4.438083729300948e-06, + "loss": 0.3215, + "step": 27463 + }, + { + "epoch": 0.5497885544128319, + "grad_norm": 1.8314590454101562, + "learning_rate": 4.4377616011986394e-06, + "loss": 0.8185, + "step": 27464 + }, + { + "epoch": 0.5498085729299602, + "grad_norm": 2.107724666595459, + "learning_rate": 4.4374394754598595e-06, + "loss": 0.894, + "step": 27465 + }, + { + "epoch": 0.5498285914470885, + "grad_norm": 1.1323808431625366, + "learning_rate": 4.437117352085964e-06, + "loss": 0.3102, + "step": 27466 + }, + { + "epoch": 0.5498486099642169, + "grad_norm": 1.2169798612594604, + "learning_rate": 4.436795231078303e-06, + "loss": 0.3746, + "step": 27467 + }, + { + "epoch": 0.5498686284813452, + "grad_norm": 1.0003306865692139, + "learning_rate": 4.436473112438237e-06, + "loss": 0.2951, + "step": 27468 + }, + { + "epoch": 0.5498886469984736, + "grad_norm": 1.1294299364089966, + "learning_rate": 4.436150996167114e-06, + "loss": 0.316, + "step": 27469 + }, + { + "epoch": 0.5499086655156019, + "grad_norm": 1.104783535003662, + "learning_rate": 4.435828882266293e-06, + "loss": 0.3136, + "step": 27470 + }, + { + "epoch": 0.5499286840327303, + "grad_norm": 1.1135413646697998, + "learning_rate": 4.435506770737126e-06, + "loss": 0.3383, + "step": 27471 + }, + { + "epoch": 0.5499487025498586, + "grad_norm": 1.2304743528366089, + "learning_rate": 4.435184661580964e-06, + "loss": 0.3254, + "step": 27472 + }, + { + "epoch": 0.5499687210669869, + "grad_norm": 1.0457649230957031, + "learning_rate": 4.434862554799167e-06, + "loss": 0.3606, + "step": 27473 + }, + { + "epoch": 0.5499887395841153, + "grad_norm": 1.1268033981323242, + "learning_rate": 4.4345404503930864e-06, + "loss": 0.3269, + "step": 27474 + }, + { + "epoch": 0.5500087581012436, + "grad_norm": 0.9420168399810791, + "learning_rate": 4.434218348364076e-06, + "loss": 0.2683, + "step": 27475 + }, + { + "epoch": 0.550028776618372, + "grad_norm": 1.0672109127044678, + "learning_rate": 4.433896248713487e-06, + "loss": 0.3094, + "step": 27476 + }, + { + "epoch": 0.5500487951355003, + "grad_norm": 1.8499928712844849, + "learning_rate": 4.433574151442679e-06, + "loss": 0.755, + "step": 27477 + }, + { + "epoch": 0.5500688136526287, + "grad_norm": 1.1797298192977905, + "learning_rate": 4.433252056553003e-06, + "loss": 0.3001, + "step": 27478 + }, + { + "epoch": 0.550088832169757, + "grad_norm": 1.0841630697250366, + "learning_rate": 4.4329299640458145e-06, + "loss": 0.3048, + "step": 27479 + }, + { + "epoch": 0.5501088506868854, + "grad_norm": 1.016706943511963, + "learning_rate": 4.432607873922464e-06, + "loss": 0.3016, + "step": 27480 + }, + { + "epoch": 0.5501288692040137, + "grad_norm": 1.207039475440979, + "learning_rate": 4.432285786184309e-06, + "loss": 0.3152, + "step": 27481 + }, + { + "epoch": 0.550148887721142, + "grad_norm": 1.8929020166397095, + "learning_rate": 4.431963700832702e-06, + "loss": 0.7644, + "step": 27482 + }, + { + "epoch": 0.5501689062382704, + "grad_norm": 1.1184455156326294, + "learning_rate": 4.431641617868999e-06, + "loss": 0.3168, + "step": 27483 + }, + { + "epoch": 0.5501889247553987, + "grad_norm": 1.0766469240188599, + "learning_rate": 4.431319537294551e-06, + "loss": 0.2733, + "step": 27484 + }, + { + "epoch": 0.5502089432725271, + "grad_norm": 1.0596163272857666, + "learning_rate": 4.430997459110712e-06, + "loss": 0.2375, + "step": 27485 + }, + { + "epoch": 0.5502289617896554, + "grad_norm": 1.919823408126831, + "learning_rate": 4.430675383318839e-06, + "loss": 0.7211, + "step": 27486 + }, + { + "epoch": 0.5502489803067838, + "grad_norm": 1.2034416198730469, + "learning_rate": 4.430353309920284e-06, + "loss": 0.3123, + "step": 27487 + }, + { + "epoch": 0.5502689988239121, + "grad_norm": 1.177994966506958, + "learning_rate": 4.4300312389164015e-06, + "loss": 0.3181, + "step": 27488 + }, + { + "epoch": 0.5502890173410404, + "grad_norm": 1.0790127515792847, + "learning_rate": 4.429709170308545e-06, + "loss": 0.3119, + "step": 27489 + }, + { + "epoch": 0.5503090358581688, + "grad_norm": 1.1840592622756958, + "learning_rate": 4.429387104098065e-06, + "loss": 0.3241, + "step": 27490 + }, + { + "epoch": 0.5503290543752971, + "grad_norm": 1.0640418529510498, + "learning_rate": 4.429065040286323e-06, + "loss": 0.2947, + "step": 27491 + }, + { + "epoch": 0.5503490728924255, + "grad_norm": 1.1522098779678345, + "learning_rate": 4.4287429788746686e-06, + "loss": 0.3181, + "step": 27492 + }, + { + "epoch": 0.5503690914095538, + "grad_norm": 1.156654715538025, + "learning_rate": 4.428420919864453e-06, + "loss": 0.2753, + "step": 27493 + }, + { + "epoch": 0.5503891099266822, + "grad_norm": 0.9763086438179016, + "learning_rate": 4.4280988632570345e-06, + "loss": 0.3212, + "step": 27494 + }, + { + "epoch": 0.5504091284438105, + "grad_norm": 1.074623942375183, + "learning_rate": 4.427776809053764e-06, + "loss": 0.3403, + "step": 27495 + }, + { + "epoch": 0.5504291469609389, + "grad_norm": 0.9779897332191467, + "learning_rate": 4.427454757255999e-06, + "loss": 0.2998, + "step": 27496 + }, + { + "epoch": 0.5504491654780672, + "grad_norm": 1.046047568321228, + "learning_rate": 4.4271327078650905e-06, + "loss": 0.3025, + "step": 27497 + }, + { + "epoch": 0.5504691839951955, + "grad_norm": 1.870877742767334, + "learning_rate": 4.4268106608823905e-06, + "loss": 0.8123, + "step": 27498 + }, + { + "epoch": 0.5504892025123239, + "grad_norm": 1.0848684310913086, + "learning_rate": 4.426488616309258e-06, + "loss": 0.2851, + "step": 27499 + }, + { + "epoch": 0.5505092210294522, + "grad_norm": 1.1534777879714966, + "learning_rate": 4.4261665741470425e-06, + "loss": 0.2949, + "step": 27500 + }, + { + "epoch": 0.5505292395465806, + "grad_norm": 1.1119476556777954, + "learning_rate": 4.425844534397099e-06, + "loss": 0.2911, + "step": 27501 + }, + { + "epoch": 0.5505492580637089, + "grad_norm": 1.2546970844268799, + "learning_rate": 4.425522497060783e-06, + "loss": 0.2788, + "step": 27502 + }, + { + "epoch": 0.5505692765808373, + "grad_norm": 1.076298475265503, + "learning_rate": 4.425200462139444e-06, + "loss": 0.2993, + "step": 27503 + }, + { + "epoch": 0.5505892950979656, + "grad_norm": 1.1358855962753296, + "learning_rate": 4.424878429634441e-06, + "loss": 0.3214, + "step": 27504 + }, + { + "epoch": 0.5506093136150939, + "grad_norm": 1.7483556270599365, + "learning_rate": 4.4245563995471256e-06, + "loss": 0.8022, + "step": 27505 + }, + { + "epoch": 0.5506293321322223, + "grad_norm": 1.2225658893585205, + "learning_rate": 4.424234371878849e-06, + "loss": 0.3075, + "step": 27506 + }, + { + "epoch": 0.5506493506493506, + "grad_norm": 1.0398094654083252, + "learning_rate": 4.423912346630968e-06, + "loss": 0.2828, + "step": 27507 + }, + { + "epoch": 0.550669369166479, + "grad_norm": 1.1903228759765625, + "learning_rate": 4.4235903238048355e-06, + "loss": 0.3192, + "step": 27508 + }, + { + "epoch": 0.5506893876836073, + "grad_norm": 1.2075601816177368, + "learning_rate": 4.4232683034018066e-06, + "loss": 0.2973, + "step": 27509 + }, + { + "epoch": 0.5507094062007357, + "grad_norm": 1.1467621326446533, + "learning_rate": 4.422946285423233e-06, + "loss": 0.3171, + "step": 27510 + }, + { + "epoch": 0.550729424717864, + "grad_norm": 1.1795927286148071, + "learning_rate": 4.42262426987047e-06, + "loss": 0.3088, + "step": 27511 + }, + { + "epoch": 0.5507494432349924, + "grad_norm": 1.0803712606430054, + "learning_rate": 4.422302256744867e-06, + "loss": 0.2994, + "step": 27512 + }, + { + "epoch": 0.5507694617521207, + "grad_norm": 0.9296438694000244, + "learning_rate": 4.4219802460477836e-06, + "loss": 0.3189, + "step": 27513 + }, + { + "epoch": 0.550789480269249, + "grad_norm": 1.9491091966629028, + "learning_rate": 4.421658237780571e-06, + "loss": 0.7006, + "step": 27514 + }, + { + "epoch": 0.5508094987863774, + "grad_norm": 1.0139528512954712, + "learning_rate": 4.421336231944582e-06, + "loss": 0.288, + "step": 27515 + }, + { + "epoch": 0.5508295173035057, + "grad_norm": 1.1593996286392212, + "learning_rate": 4.421014228541169e-06, + "loss": 0.3428, + "step": 27516 + }, + { + "epoch": 0.5508495358206341, + "grad_norm": 1.1221113204956055, + "learning_rate": 4.420692227571691e-06, + "loss": 0.2844, + "step": 27517 + }, + { + "epoch": 0.5508695543377624, + "grad_norm": 1.2416576147079468, + "learning_rate": 4.420370229037497e-06, + "loss": 0.2865, + "step": 27518 + }, + { + "epoch": 0.5508895728548908, + "grad_norm": 1.0623191595077515, + "learning_rate": 4.420048232939941e-06, + "loss": 0.3281, + "step": 27519 + }, + { + "epoch": 0.5509095913720191, + "grad_norm": 1.1116836071014404, + "learning_rate": 4.419726239280379e-06, + "loss": 0.3494, + "step": 27520 + }, + { + "epoch": 0.5509296098891474, + "grad_norm": 1.2687335014343262, + "learning_rate": 4.41940424806016e-06, + "loss": 0.3082, + "step": 27521 + }, + { + "epoch": 0.5509496284062758, + "grad_norm": 2.0358479022979736, + "learning_rate": 4.419082259280644e-06, + "loss": 0.7712, + "step": 27522 + }, + { + "epoch": 0.5509696469234041, + "grad_norm": 1.2054044008255005, + "learning_rate": 4.41876027294318e-06, + "loss": 0.3473, + "step": 27523 + }, + { + "epoch": 0.5509896654405325, + "grad_norm": 1.0567678213119507, + "learning_rate": 4.418438289049123e-06, + "loss": 0.2963, + "step": 27524 + }, + { + "epoch": 0.5510096839576608, + "grad_norm": 1.2073922157287598, + "learning_rate": 4.418116307599825e-06, + "loss": 0.2541, + "step": 27525 + }, + { + "epoch": 0.5510297024747892, + "grad_norm": 1.2715835571289062, + "learning_rate": 4.417794328596642e-06, + "loss": 0.2791, + "step": 27526 + }, + { + "epoch": 0.5510497209919175, + "grad_norm": 1.088413119316101, + "learning_rate": 4.417472352040925e-06, + "loss": 0.2967, + "step": 27527 + }, + { + "epoch": 0.5510697395090459, + "grad_norm": 1.970794916152954, + "learning_rate": 4.4171503779340306e-06, + "loss": 0.8649, + "step": 27528 + }, + { + "epoch": 0.5510897580261742, + "grad_norm": 1.1006823778152466, + "learning_rate": 4.416828406277309e-06, + "loss": 0.3039, + "step": 27529 + }, + { + "epoch": 0.5511097765433025, + "grad_norm": 1.0815212726593018, + "learning_rate": 4.416506437072116e-06, + "loss": 0.2996, + "step": 27530 + }, + { + "epoch": 0.5511297950604309, + "grad_norm": 1.055611491203308, + "learning_rate": 4.416184470319806e-06, + "loss": 0.3071, + "step": 27531 + }, + { + "epoch": 0.5511498135775592, + "grad_norm": 1.2406442165374756, + "learning_rate": 4.415862506021728e-06, + "loss": 0.3053, + "step": 27532 + }, + { + "epoch": 0.5511698320946876, + "grad_norm": 1.2177003622055054, + "learning_rate": 4.415540544179241e-06, + "loss": 0.3059, + "step": 27533 + }, + { + "epoch": 0.5511898506118159, + "grad_norm": 1.3708019256591797, + "learning_rate": 4.415218584793692e-06, + "loss": 0.2885, + "step": 27534 + }, + { + "epoch": 0.5512098691289443, + "grad_norm": 1.0879216194152832, + "learning_rate": 4.4148966278664415e-06, + "loss": 0.2838, + "step": 27535 + }, + { + "epoch": 0.5512298876460726, + "grad_norm": 1.1372966766357422, + "learning_rate": 4.41457467339884e-06, + "loss": 0.2973, + "step": 27536 + }, + { + "epoch": 0.5512499061632009, + "grad_norm": 1.156794786453247, + "learning_rate": 4.41425272139224e-06, + "loss": 0.3218, + "step": 27537 + }, + { + "epoch": 0.5512699246803293, + "grad_norm": 1.1246955394744873, + "learning_rate": 4.413930771847993e-06, + "loss": 0.2988, + "step": 27538 + }, + { + "epoch": 0.5512899431974576, + "grad_norm": 1.1562564373016357, + "learning_rate": 4.413608824767458e-06, + "loss": 0.3073, + "step": 27539 + }, + { + "epoch": 0.551309961714586, + "grad_norm": 1.016279935836792, + "learning_rate": 4.413286880151983e-06, + "loss": 0.3037, + "step": 27540 + }, + { + "epoch": 0.5513299802317143, + "grad_norm": 1.1073130369186401, + "learning_rate": 4.412964938002925e-06, + "loss": 0.266, + "step": 27541 + }, + { + "epoch": 0.5513499987488427, + "grad_norm": 1.9586820602416992, + "learning_rate": 4.4126429983216364e-06, + "loss": 0.8318, + "step": 27542 + }, + { + "epoch": 0.551370017265971, + "grad_norm": 1.0696191787719727, + "learning_rate": 4.412321061109469e-06, + "loss": 0.3066, + "step": 27543 + }, + { + "epoch": 0.5513900357830994, + "grad_norm": 1.0529861450195312, + "learning_rate": 4.411999126367779e-06, + "loss": 0.3193, + "step": 27544 + }, + { + "epoch": 0.5514100543002277, + "grad_norm": 1.340654730796814, + "learning_rate": 4.411677194097917e-06, + "loss": 0.351, + "step": 27545 + }, + { + "epoch": 0.551430072817356, + "grad_norm": 1.3516390323638916, + "learning_rate": 4.411355264301237e-06, + "loss": 0.3399, + "step": 27546 + }, + { + "epoch": 0.5514500913344844, + "grad_norm": 1.1236861944198608, + "learning_rate": 4.411033336979093e-06, + "loss": 0.3513, + "step": 27547 + }, + { + "epoch": 0.5514701098516127, + "grad_norm": 1.1684426069259644, + "learning_rate": 4.410711412132839e-06, + "loss": 0.2959, + "step": 27548 + }, + { + "epoch": 0.5514901283687411, + "grad_norm": 1.1020472049713135, + "learning_rate": 4.410389489763828e-06, + "loss": 0.2865, + "step": 27549 + }, + { + "epoch": 0.5515101468858694, + "grad_norm": 1.8337355852127075, + "learning_rate": 4.410067569873412e-06, + "loss": 0.7872, + "step": 27550 + }, + { + "epoch": 0.5515301654029978, + "grad_norm": 1.0766570568084717, + "learning_rate": 4.409745652462943e-06, + "loss": 0.3231, + "step": 27551 + }, + { + "epoch": 0.5515501839201261, + "grad_norm": 1.0918060541152954, + "learning_rate": 4.409423737533779e-06, + "loss": 0.2954, + "step": 27552 + }, + { + "epoch": 0.5515702024372544, + "grad_norm": 1.1187050342559814, + "learning_rate": 4.409101825087269e-06, + "loss": 0.3097, + "step": 27553 + }, + { + "epoch": 0.5515902209543828, + "grad_norm": 1.154057264328003, + "learning_rate": 4.408779915124768e-06, + "loss": 0.3327, + "step": 27554 + }, + { + "epoch": 0.5516102394715111, + "grad_norm": 1.069133996963501, + "learning_rate": 4.4084580076476304e-06, + "loss": 0.2821, + "step": 27555 + }, + { + "epoch": 0.5516302579886395, + "grad_norm": 1.139475703239441, + "learning_rate": 4.408136102657205e-06, + "loss": 0.2738, + "step": 27556 + }, + { + "epoch": 0.5516502765057678, + "grad_norm": 1.012692928314209, + "learning_rate": 4.4078142001548506e-06, + "loss": 0.303, + "step": 27557 + }, + { + "epoch": 0.5516702950228962, + "grad_norm": 1.1691690683364868, + "learning_rate": 4.407492300141917e-06, + "loss": 0.2983, + "step": 27558 + }, + { + "epoch": 0.5516903135400245, + "grad_norm": 1.207658290863037, + "learning_rate": 4.4071704026197595e-06, + "loss": 0.3128, + "step": 27559 + }, + { + "epoch": 0.5517103320571529, + "grad_norm": 1.055840015411377, + "learning_rate": 4.406848507589727e-06, + "loss": 0.3192, + "step": 27560 + }, + { + "epoch": 0.5517303505742812, + "grad_norm": 1.3001646995544434, + "learning_rate": 4.406526615053178e-06, + "loss": 0.3762, + "step": 27561 + }, + { + "epoch": 0.5517503690914095, + "grad_norm": 1.082488775253296, + "learning_rate": 4.406204725011464e-06, + "loss": 0.2545, + "step": 27562 + }, + { + "epoch": 0.5517703876085379, + "grad_norm": 1.1749863624572754, + "learning_rate": 4.4058828374659375e-06, + "loss": 0.2837, + "step": 27563 + }, + { + "epoch": 0.5517904061256662, + "grad_norm": 1.0791387557983398, + "learning_rate": 4.40556095241795e-06, + "loss": 0.2834, + "step": 27564 + }, + { + "epoch": 0.5518104246427946, + "grad_norm": 1.0917713642120361, + "learning_rate": 4.405239069868857e-06, + "loss": 0.2977, + "step": 27565 + }, + { + "epoch": 0.5518304431599229, + "grad_norm": 1.2334210872650146, + "learning_rate": 4.4049171898200104e-06, + "loss": 0.3298, + "step": 27566 + }, + { + "epoch": 0.5518504616770513, + "grad_norm": 1.3245878219604492, + "learning_rate": 4.404595312272765e-06, + "loss": 0.3313, + "step": 27567 + }, + { + "epoch": 0.5518704801941796, + "grad_norm": 1.23260498046875, + "learning_rate": 4.404273437228473e-06, + "loss": 0.3402, + "step": 27568 + }, + { + "epoch": 0.5518904987113079, + "grad_norm": 1.2154003381729126, + "learning_rate": 4.403951564688486e-06, + "loss": 0.303, + "step": 27569 + }, + { + "epoch": 0.5519105172284363, + "grad_norm": 1.0259875059127808, + "learning_rate": 4.403629694654159e-06, + "loss": 0.3419, + "step": 27570 + }, + { + "epoch": 0.5519305357455646, + "grad_norm": 1.0365899801254272, + "learning_rate": 4.403307827126844e-06, + "loss": 0.3217, + "step": 27571 + }, + { + "epoch": 0.551950554262693, + "grad_norm": 1.856589436531067, + "learning_rate": 4.402985962107896e-06, + "loss": 0.8266, + "step": 27572 + }, + { + "epoch": 0.5519705727798213, + "grad_norm": 1.1598026752471924, + "learning_rate": 4.402664099598666e-06, + "loss": 0.3115, + "step": 27573 + }, + { + "epoch": 0.5519905912969497, + "grad_norm": 1.1100542545318604, + "learning_rate": 4.4023422396005055e-06, + "loss": 0.2903, + "step": 27574 + }, + { + "epoch": 0.552010609814078, + "grad_norm": 1.1691635847091675, + "learning_rate": 4.402020382114771e-06, + "loss": 0.3243, + "step": 27575 + }, + { + "epoch": 0.5520306283312064, + "grad_norm": 1.1193139553070068, + "learning_rate": 4.401698527142816e-06, + "loss": 0.3114, + "step": 27576 + }, + { + "epoch": 0.5520506468483347, + "grad_norm": 1.0365186929702759, + "learning_rate": 4.401376674685989e-06, + "loss": 0.3286, + "step": 27577 + }, + { + "epoch": 0.552070665365463, + "grad_norm": 1.0434613227844238, + "learning_rate": 4.401054824745646e-06, + "loss": 0.2778, + "step": 27578 + }, + { + "epoch": 0.5520906838825914, + "grad_norm": 1.0753681659698486, + "learning_rate": 4.40073297732314e-06, + "loss": 0.2842, + "step": 27579 + }, + { + "epoch": 0.5521107023997197, + "grad_norm": 1.1148802042007446, + "learning_rate": 4.400411132419824e-06, + "loss": 0.3047, + "step": 27580 + }, + { + "epoch": 0.5521307209168481, + "grad_norm": 1.215226173400879, + "learning_rate": 4.400089290037051e-06, + "loss": 0.3241, + "step": 27581 + }, + { + "epoch": 0.5521507394339764, + "grad_norm": 1.1158894300460815, + "learning_rate": 4.399767450176171e-06, + "loss": 0.3004, + "step": 27582 + }, + { + "epoch": 0.5521707579511048, + "grad_norm": 1.0941040515899658, + "learning_rate": 4.399445612838542e-06, + "loss": 0.2996, + "step": 27583 + }, + { + "epoch": 0.5521907764682331, + "grad_norm": 1.2128355503082275, + "learning_rate": 4.399123778025514e-06, + "loss": 0.3125, + "step": 27584 + }, + { + "epoch": 0.5522107949853614, + "grad_norm": 1.1965851783752441, + "learning_rate": 4.39880194573844e-06, + "loss": 0.2779, + "step": 27585 + }, + { + "epoch": 0.5522308135024898, + "grad_norm": 1.8785966634750366, + "learning_rate": 4.398480115978674e-06, + "loss": 0.8088, + "step": 27586 + }, + { + "epoch": 0.5522508320196181, + "grad_norm": 1.3062875270843506, + "learning_rate": 4.398158288747566e-06, + "loss": 0.2632, + "step": 27587 + }, + { + "epoch": 0.5522708505367465, + "grad_norm": 1.0111364126205444, + "learning_rate": 4.397836464046473e-06, + "loss": 0.3141, + "step": 27588 + }, + { + "epoch": 0.5522908690538748, + "grad_norm": 1.0860681533813477, + "learning_rate": 4.397514641876746e-06, + "loss": 0.2942, + "step": 27589 + }, + { + "epoch": 0.5523108875710032, + "grad_norm": 1.1806707382202148, + "learning_rate": 4.397192822239737e-06, + "loss": 0.3047, + "step": 27590 + }, + { + "epoch": 0.5523309060881315, + "grad_norm": 1.0768674612045288, + "learning_rate": 4.396871005136799e-06, + "loss": 0.3136, + "step": 27591 + }, + { + "epoch": 0.5523509246052599, + "grad_norm": 1.09376060962677, + "learning_rate": 4.396549190569286e-06, + "loss": 0.334, + "step": 27592 + }, + { + "epoch": 0.5523709431223882, + "grad_norm": 1.128492832183838, + "learning_rate": 4.3962273785385515e-06, + "loss": 0.3136, + "step": 27593 + }, + { + "epoch": 0.5523909616395165, + "grad_norm": 1.1190491914749146, + "learning_rate": 4.395905569045947e-06, + "loss": 0.2903, + "step": 27594 + }, + { + "epoch": 0.5524109801566449, + "grad_norm": 1.1303799152374268, + "learning_rate": 4.3955837620928256e-06, + "loss": 0.3427, + "step": 27595 + }, + { + "epoch": 0.5524309986737732, + "grad_norm": 1.2483488321304321, + "learning_rate": 4.395261957680537e-06, + "loss": 0.3187, + "step": 27596 + }, + { + "epoch": 0.5524510171909016, + "grad_norm": 1.9699679613113403, + "learning_rate": 4.3949401558104396e-06, + "loss": 0.7819, + "step": 27597 + }, + { + "epoch": 0.5524710357080299, + "grad_norm": 1.1251411437988281, + "learning_rate": 4.394618356483883e-06, + "loss": 0.3365, + "step": 27598 + }, + { + "epoch": 0.5524910542251583, + "grad_norm": 1.0100736618041992, + "learning_rate": 4.394296559702222e-06, + "loss": 0.2918, + "step": 27599 + }, + { + "epoch": 0.5525110727422866, + "grad_norm": 1.2717669010162354, + "learning_rate": 4.3939747654668045e-06, + "loss": 0.3333, + "step": 27600 + }, + { + "epoch": 0.5525310912594149, + "grad_norm": 1.0534383058547974, + "learning_rate": 4.3936529737789895e-06, + "loss": 0.2955, + "step": 27601 + }, + { + "epoch": 0.5525511097765433, + "grad_norm": 1.0767358541488647, + "learning_rate": 4.393331184640127e-06, + "loss": 0.3154, + "step": 27602 + }, + { + "epoch": 0.5525711282936716, + "grad_norm": 1.2645114660263062, + "learning_rate": 4.393009398051569e-06, + "loss": 0.3568, + "step": 27603 + }, + { + "epoch": 0.5525911468108, + "grad_norm": 1.2379298210144043, + "learning_rate": 4.392687614014669e-06, + "loss": 0.2815, + "step": 27604 + }, + { + "epoch": 0.5526111653279283, + "grad_norm": 1.252282738685608, + "learning_rate": 4.392365832530778e-06, + "loss": 0.28, + "step": 27605 + }, + { + "epoch": 0.5526311838450567, + "grad_norm": 1.02675461769104, + "learning_rate": 4.392044053601251e-06, + "loss": 0.3069, + "step": 27606 + }, + { + "epoch": 0.552651202362185, + "grad_norm": 1.2115681171417236, + "learning_rate": 4.391722277227442e-06, + "loss": 0.3349, + "step": 27607 + }, + { + "epoch": 0.5526712208793134, + "grad_norm": 1.3084405660629272, + "learning_rate": 4.391400503410701e-06, + "loss": 0.3277, + "step": 27608 + }, + { + "epoch": 0.5526912393964417, + "grad_norm": 1.1525697708129883, + "learning_rate": 4.391078732152378e-06, + "loss": 0.2936, + "step": 27609 + }, + { + "epoch": 0.55271125791357, + "grad_norm": 1.138388752937317, + "learning_rate": 4.390756963453832e-06, + "loss": 0.2829, + "step": 27610 + }, + { + "epoch": 0.5527312764306984, + "grad_norm": 2.6940598487854004, + "learning_rate": 4.3904351973164126e-06, + "loss": 0.4077, + "step": 27611 + }, + { + "epoch": 0.5527512949478267, + "grad_norm": 1.8564966917037964, + "learning_rate": 4.390113433741473e-06, + "loss": 0.7317, + "step": 27612 + }, + { + "epoch": 0.5527713134649551, + "grad_norm": 1.0471842288970947, + "learning_rate": 4.389791672730362e-06, + "loss": 0.3048, + "step": 27613 + }, + { + "epoch": 0.5527913319820834, + "grad_norm": 1.0594919919967651, + "learning_rate": 4.389469914284438e-06, + "loss": 0.295, + "step": 27614 + }, + { + "epoch": 0.5528113504992118, + "grad_norm": 1.1750152111053467, + "learning_rate": 4.389148158405052e-06, + "loss": 0.3055, + "step": 27615 + }, + { + "epoch": 0.5528313690163401, + "grad_norm": 1.1321773529052734, + "learning_rate": 4.388826405093553e-06, + "loss": 0.3112, + "step": 27616 + }, + { + "epoch": 0.5528513875334684, + "grad_norm": 1.074967861175537, + "learning_rate": 4.388504654351299e-06, + "loss": 0.2911, + "step": 27617 + }, + { + "epoch": 0.5528714060505968, + "grad_norm": 1.1606682538986206, + "learning_rate": 4.388182906179637e-06, + "loss": 0.3034, + "step": 27618 + }, + { + "epoch": 0.5528914245677251, + "grad_norm": 1.3543140888214111, + "learning_rate": 4.387861160579925e-06, + "loss": 0.2943, + "step": 27619 + }, + { + "epoch": 0.5529114430848535, + "grad_norm": 1.1793802976608276, + "learning_rate": 4.387539417553512e-06, + "loss": 0.3337, + "step": 27620 + }, + { + "epoch": 0.5529314616019818, + "grad_norm": 1.1269705295562744, + "learning_rate": 4.387217677101752e-06, + "loss": 0.3032, + "step": 27621 + }, + { + "epoch": 0.5529514801191102, + "grad_norm": 1.177253007888794, + "learning_rate": 4.386895939225995e-06, + "loss": 0.2585, + "step": 27622 + }, + { + "epoch": 0.5529714986362385, + "grad_norm": 1.06060791015625, + "learning_rate": 4.386574203927597e-06, + "loss": 0.3203, + "step": 27623 + }, + { + "epoch": 0.5529915171533669, + "grad_norm": 1.0209556818008423, + "learning_rate": 4.386252471207909e-06, + "loss": 0.2994, + "step": 27624 + }, + { + "epoch": 0.5530115356704952, + "grad_norm": 1.0895544290542603, + "learning_rate": 4.385930741068285e-06, + "loss": 0.285, + "step": 27625 + }, + { + "epoch": 0.5530315541876235, + "grad_norm": 1.962661623954773, + "learning_rate": 4.385609013510075e-06, + "loss": 0.7314, + "step": 27626 + }, + { + "epoch": 0.5530515727047519, + "grad_norm": 1.0611482858657837, + "learning_rate": 4.38528728853463e-06, + "loss": 0.3281, + "step": 27627 + }, + { + "epoch": 0.5530715912218802, + "grad_norm": 1.1477103233337402, + "learning_rate": 4.384965566143308e-06, + "loss": 0.2985, + "step": 27628 + }, + { + "epoch": 0.5530916097390086, + "grad_norm": 1.2783939838409424, + "learning_rate": 4.384643846337457e-06, + "loss": 0.3078, + "step": 27629 + }, + { + "epoch": 0.5531116282561369, + "grad_norm": 1.038504719734192, + "learning_rate": 4.384322129118431e-06, + "loss": 0.3133, + "step": 27630 + }, + { + "epoch": 0.5531316467732653, + "grad_norm": 1.1148021221160889, + "learning_rate": 4.384000414487582e-06, + "loss": 0.2818, + "step": 27631 + }, + { + "epoch": 0.5531516652903936, + "grad_norm": 1.081215262413025, + "learning_rate": 4.383678702446264e-06, + "loss": 0.3223, + "step": 27632 + }, + { + "epoch": 0.5531716838075219, + "grad_norm": 1.0727903842926025, + "learning_rate": 4.383356992995827e-06, + "loss": 0.3134, + "step": 27633 + }, + { + "epoch": 0.5531917023246503, + "grad_norm": 1.1275783777236938, + "learning_rate": 4.383035286137626e-06, + "loss": 0.3284, + "step": 27634 + }, + { + "epoch": 0.5532117208417786, + "grad_norm": 1.0180479288101196, + "learning_rate": 4.38271358187301e-06, + "loss": 0.2789, + "step": 27635 + }, + { + "epoch": 0.553231739358907, + "grad_norm": 1.1553606986999512, + "learning_rate": 4.382391880203332e-06, + "loss": 0.3731, + "step": 27636 + }, + { + "epoch": 0.5532517578760353, + "grad_norm": 1.2084406614303589, + "learning_rate": 4.3820701811299485e-06, + "loss": 0.2663, + "step": 27637 + }, + { + "epoch": 0.5532717763931637, + "grad_norm": 1.1412744522094727, + "learning_rate": 4.3817484846542085e-06, + "loss": 0.3031, + "step": 27638 + }, + { + "epoch": 0.553291794910292, + "grad_norm": 1.1647894382476807, + "learning_rate": 4.3814267907774645e-06, + "loss": 0.3119, + "step": 27639 + }, + { + "epoch": 0.5533118134274204, + "grad_norm": 1.0851291418075562, + "learning_rate": 4.381105099501068e-06, + "loss": 0.2983, + "step": 27640 + }, + { + "epoch": 0.5533318319445487, + "grad_norm": 1.1680665016174316, + "learning_rate": 4.380783410826374e-06, + "loss": 0.3889, + "step": 27641 + }, + { + "epoch": 0.553351850461677, + "grad_norm": 1.0695513486862183, + "learning_rate": 4.380461724754732e-06, + "loss": 0.2982, + "step": 27642 + }, + { + "epoch": 0.5533718689788054, + "grad_norm": 2.150130271911621, + "learning_rate": 4.380140041287497e-06, + "loss": 0.7815, + "step": 27643 + }, + { + "epoch": 0.5533918874959337, + "grad_norm": 1.1310482025146484, + "learning_rate": 4.379818360426018e-06, + "loss": 0.3146, + "step": 27644 + }, + { + "epoch": 0.5534119060130621, + "grad_norm": 1.1483261585235596, + "learning_rate": 4.379496682171651e-06, + "loss": 0.2833, + "step": 27645 + }, + { + "epoch": 0.5534319245301904, + "grad_norm": 1.2281044721603394, + "learning_rate": 4.379175006525746e-06, + "loss": 0.3044, + "step": 27646 + }, + { + "epoch": 0.5534519430473188, + "grad_norm": 1.358312964439392, + "learning_rate": 4.378853333489657e-06, + "loss": 0.3144, + "step": 27647 + }, + { + "epoch": 0.5534719615644471, + "grad_norm": 1.9534329175949097, + "learning_rate": 4.378531663064733e-06, + "loss": 0.7452, + "step": 27648 + }, + { + "epoch": 0.5534919800815754, + "grad_norm": 1.05020010471344, + "learning_rate": 4.378209995252327e-06, + "loss": 0.2947, + "step": 27649 + }, + { + "epoch": 0.5535119985987038, + "grad_norm": 1.012169361114502, + "learning_rate": 4.377888330053794e-06, + "loss": 0.3245, + "step": 27650 + }, + { + "epoch": 0.5535320171158321, + "grad_norm": 1.1818487644195557, + "learning_rate": 4.377566667470485e-06, + "loss": 0.3556, + "step": 27651 + }, + { + "epoch": 0.5535520356329605, + "grad_norm": 1.806762933731079, + "learning_rate": 4.377245007503752e-06, + "loss": 0.8393, + "step": 27652 + }, + { + "epoch": 0.5535720541500888, + "grad_norm": 1.3238937854766846, + "learning_rate": 4.376923350154945e-06, + "loss": 0.2974, + "step": 27653 + }, + { + "epoch": 0.5535920726672172, + "grad_norm": 1.2426931858062744, + "learning_rate": 4.376601695425419e-06, + "loss": 0.3577, + "step": 27654 + }, + { + "epoch": 0.5536120911843455, + "grad_norm": 1.1728482246398926, + "learning_rate": 4.376280043316526e-06, + "loss": 0.3311, + "step": 27655 + }, + { + "epoch": 0.5536321097014739, + "grad_norm": 1.0517314672470093, + "learning_rate": 4.375958393829617e-06, + "loss": 0.2982, + "step": 27656 + }, + { + "epoch": 0.5536521282186022, + "grad_norm": 1.113757848739624, + "learning_rate": 4.375636746966046e-06, + "loss": 0.3125, + "step": 27657 + }, + { + "epoch": 0.5536721467357305, + "grad_norm": 1.0241549015045166, + "learning_rate": 4.375315102727161e-06, + "loss": 0.2945, + "step": 27658 + }, + { + "epoch": 0.5536921652528589, + "grad_norm": 1.034464955329895, + "learning_rate": 4.374993461114319e-06, + "loss": 0.3094, + "step": 27659 + }, + { + "epoch": 0.5537121837699872, + "grad_norm": 2.100278854370117, + "learning_rate": 4.37467182212887e-06, + "loss": 0.8348, + "step": 27660 + }, + { + "epoch": 0.5537322022871156, + "grad_norm": 1.1213338375091553, + "learning_rate": 4.374350185772165e-06, + "loss": 0.2972, + "step": 27661 + }, + { + "epoch": 0.5537522208042439, + "grad_norm": 1.039910078048706, + "learning_rate": 4.374028552045556e-06, + "loss": 0.2843, + "step": 27662 + }, + { + "epoch": 0.5537722393213723, + "grad_norm": 1.270268201828003, + "learning_rate": 4.373706920950399e-06, + "loss": 0.2962, + "step": 27663 + }, + { + "epoch": 0.5537922578385006, + "grad_norm": 1.0542656183242798, + "learning_rate": 4.3733852924880425e-06, + "loss": 0.3044, + "step": 27664 + }, + { + "epoch": 0.5538122763556289, + "grad_norm": 1.22785222530365, + "learning_rate": 4.373063666659839e-06, + "loss": 0.3438, + "step": 27665 + }, + { + "epoch": 0.5538322948727573, + "grad_norm": 1.0168993473052979, + "learning_rate": 4.372742043467141e-06, + "loss": 0.2834, + "step": 27666 + }, + { + "epoch": 0.5538523133898856, + "grad_norm": 1.1742898225784302, + "learning_rate": 4.3724204229113e-06, + "loss": 0.3203, + "step": 27667 + }, + { + "epoch": 0.553872331907014, + "grad_norm": 0.9403010010719299, + "learning_rate": 4.372098804993668e-06, + "loss": 0.248, + "step": 27668 + }, + { + "epoch": 0.5538923504241423, + "grad_norm": 1.173791766166687, + "learning_rate": 4.3717771897156e-06, + "loss": 0.3486, + "step": 27669 + }, + { + "epoch": 0.5539123689412707, + "grad_norm": 1.2462189197540283, + "learning_rate": 4.371455577078444e-06, + "loss": 0.3207, + "step": 27670 + }, + { + "epoch": 0.553932387458399, + "grad_norm": 1.188169002532959, + "learning_rate": 4.371133967083553e-06, + "loss": 0.2938, + "step": 27671 + }, + { + "epoch": 0.5539524059755274, + "grad_norm": 1.1058229207992554, + "learning_rate": 4.37081235973228e-06, + "loss": 0.2985, + "step": 27672 + }, + { + "epoch": 0.5539724244926557, + "grad_norm": 1.8305925130844116, + "learning_rate": 4.370490755025977e-06, + "loss": 0.7596, + "step": 27673 + }, + { + "epoch": 0.553992443009784, + "grad_norm": 1.016185998916626, + "learning_rate": 4.370169152965994e-06, + "loss": 0.2744, + "step": 27674 + }, + { + "epoch": 0.5540124615269124, + "grad_norm": 1.0598902702331543, + "learning_rate": 4.3698475535536835e-06, + "loss": 0.2649, + "step": 27675 + }, + { + "epoch": 0.5540324800440407, + "grad_norm": 1.3308354616165161, + "learning_rate": 4.369525956790401e-06, + "loss": 0.3015, + "step": 27676 + }, + { + "epoch": 0.5540524985611691, + "grad_norm": 1.1027830839157104, + "learning_rate": 4.369204362677495e-06, + "loss": 0.3203, + "step": 27677 + }, + { + "epoch": 0.5540725170782974, + "grad_norm": 1.1290909051895142, + "learning_rate": 4.368882771216319e-06, + "loss": 0.3102, + "step": 27678 + }, + { + "epoch": 0.5540925355954258, + "grad_norm": 1.1001477241516113, + "learning_rate": 4.368561182408222e-06, + "loss": 0.3151, + "step": 27679 + }, + { + "epoch": 0.5541125541125541, + "grad_norm": 1.1091899871826172, + "learning_rate": 4.368239596254558e-06, + "loss": 0.3127, + "step": 27680 + }, + { + "epoch": 0.5541325726296824, + "grad_norm": 1.2603245973587036, + "learning_rate": 4.367918012756679e-06, + "loss": 0.2877, + "step": 27681 + }, + { + "epoch": 0.5541525911468108, + "grad_norm": 1.0309568643569946, + "learning_rate": 4.367596431915937e-06, + "loss": 0.2672, + "step": 27682 + }, + { + "epoch": 0.5541726096639391, + "grad_norm": 1.122368335723877, + "learning_rate": 4.3672748537336845e-06, + "loss": 0.3407, + "step": 27683 + }, + { + "epoch": 0.5541926281810675, + "grad_norm": 1.0776597261428833, + "learning_rate": 4.36695327821127e-06, + "loss": 0.338, + "step": 27684 + }, + { + "epoch": 0.5542126466981958, + "grad_norm": 1.3190191984176636, + "learning_rate": 4.36663170535005e-06, + "loss": 0.2912, + "step": 27685 + }, + { + "epoch": 0.5542326652153242, + "grad_norm": 1.6983336210250854, + "learning_rate": 4.366310135151373e-06, + "loss": 0.7726, + "step": 27686 + }, + { + "epoch": 0.5542526837324525, + "grad_norm": 1.0835069417953491, + "learning_rate": 4.365988567616592e-06, + "loss": 0.3529, + "step": 27687 + }, + { + "epoch": 0.5542727022495809, + "grad_norm": 1.3132063150405884, + "learning_rate": 4.365667002747058e-06, + "loss": 0.331, + "step": 27688 + }, + { + "epoch": 0.5542927207667092, + "grad_norm": 1.1101945638656616, + "learning_rate": 4.365345440544123e-06, + "loss": 0.3096, + "step": 27689 + }, + { + "epoch": 0.5543127392838375, + "grad_norm": 1.1292088031768799, + "learning_rate": 4.3650238810091414e-06, + "loss": 0.3263, + "step": 27690 + }, + { + "epoch": 0.5543327578009659, + "grad_norm": 1.1500048637390137, + "learning_rate": 4.364702324143461e-06, + "loss": 0.3409, + "step": 27691 + }, + { + "epoch": 0.5543527763180942, + "grad_norm": 1.120535969734192, + "learning_rate": 4.3643807699484364e-06, + "loss": 0.3259, + "step": 27692 + }, + { + "epoch": 0.5543727948352226, + "grad_norm": 1.1222364902496338, + "learning_rate": 4.364059218425417e-06, + "loss": 0.294, + "step": 27693 + }, + { + "epoch": 0.5543928133523509, + "grad_norm": 1.038790225982666, + "learning_rate": 4.363737669575756e-06, + "loss": 0.2761, + "step": 27694 + }, + { + "epoch": 0.5544128318694793, + "grad_norm": 1.0802195072174072, + "learning_rate": 4.363416123400806e-06, + "loss": 0.2899, + "step": 27695 + }, + { + "epoch": 0.5544328503866076, + "grad_norm": 1.030705213546753, + "learning_rate": 4.363094579901918e-06, + "loss": 0.2805, + "step": 27696 + }, + { + "epoch": 0.5544528689037359, + "grad_norm": 1.091271162033081, + "learning_rate": 4.36277303908044e-06, + "loss": 0.3194, + "step": 27697 + }, + { + "epoch": 0.5544728874208643, + "grad_norm": 1.1714816093444824, + "learning_rate": 4.36245150093773e-06, + "loss": 0.304, + "step": 27698 + }, + { + "epoch": 0.5544929059379926, + "grad_norm": 1.2777180671691895, + "learning_rate": 4.362129965475137e-06, + "loss": 0.2895, + "step": 27699 + }, + { + "epoch": 0.554512924455121, + "grad_norm": 1.0410895347595215, + "learning_rate": 4.361808432694011e-06, + "loss": 0.3167, + "step": 27700 + }, + { + "epoch": 0.5545329429722493, + "grad_norm": 1.200921893119812, + "learning_rate": 4.361486902595705e-06, + "loss": 0.2978, + "step": 27701 + }, + { + "epoch": 0.5545529614893777, + "grad_norm": 1.1334643363952637, + "learning_rate": 4.36116537518157e-06, + "loss": 0.34, + "step": 27702 + }, + { + "epoch": 0.554572980006506, + "grad_norm": 1.2311344146728516, + "learning_rate": 4.36084385045296e-06, + "loss": 0.31, + "step": 27703 + }, + { + "epoch": 0.5545929985236343, + "grad_norm": 1.048050045967102, + "learning_rate": 4.360522328411225e-06, + "loss": 0.2788, + "step": 27704 + }, + { + "epoch": 0.5546130170407627, + "grad_norm": 1.2984132766723633, + "learning_rate": 4.360200809057715e-06, + "loss": 0.3034, + "step": 27705 + }, + { + "epoch": 0.554633035557891, + "grad_norm": 1.9243656396865845, + "learning_rate": 4.359879292393784e-06, + "loss": 0.7884, + "step": 27706 + }, + { + "epoch": 0.5546530540750194, + "grad_norm": 1.178551435470581, + "learning_rate": 4.359557778420782e-06, + "loss": 0.3069, + "step": 27707 + }, + { + "epoch": 0.5546730725921477, + "grad_norm": 1.0222123861312866, + "learning_rate": 4.359236267140062e-06, + "loss": 0.2773, + "step": 27708 + }, + { + "epoch": 0.5546930911092761, + "grad_norm": 1.2270060777664185, + "learning_rate": 4.3589147585529756e-06, + "loss": 0.2977, + "step": 27709 + }, + { + "epoch": 0.5547131096264044, + "grad_norm": 1.9659571647644043, + "learning_rate": 4.3585932526608735e-06, + "loss": 0.7898, + "step": 27710 + }, + { + "epoch": 0.5547331281435328, + "grad_norm": 1.06769597530365, + "learning_rate": 4.358271749465104e-06, + "loss": 0.333, + "step": 27711 + }, + { + "epoch": 0.5547531466606611, + "grad_norm": 1.0212973356246948, + "learning_rate": 4.357950248967026e-06, + "loss": 0.319, + "step": 27712 + }, + { + "epoch": 0.5547731651777894, + "grad_norm": 1.0824764966964722, + "learning_rate": 4.357628751167985e-06, + "loss": 0.2966, + "step": 27713 + }, + { + "epoch": 0.5547931836949178, + "grad_norm": 1.0557596683502197, + "learning_rate": 4.357307256069335e-06, + "loss": 0.3133, + "step": 27714 + }, + { + "epoch": 0.5548132022120461, + "grad_norm": 1.1213027238845825, + "learning_rate": 4.356985763672426e-06, + "loss": 0.3424, + "step": 27715 + }, + { + "epoch": 0.5548332207291745, + "grad_norm": 1.7046006917953491, + "learning_rate": 4.3566642739786115e-06, + "loss": 0.7102, + "step": 27716 + }, + { + "epoch": 0.5548532392463028, + "grad_norm": 1.2119885683059692, + "learning_rate": 4.356342786989242e-06, + "loss": 0.344, + "step": 27717 + }, + { + "epoch": 0.5548732577634312, + "grad_norm": 1.1870845556259155, + "learning_rate": 4.356021302705669e-06, + "loss": 0.3635, + "step": 27718 + }, + { + "epoch": 0.5548932762805595, + "grad_norm": 1.1189496517181396, + "learning_rate": 4.355699821129243e-06, + "loss": 0.2888, + "step": 27719 + }, + { + "epoch": 0.5549132947976878, + "grad_norm": 1.0770525932312012, + "learning_rate": 4.355378342261316e-06, + "loss": 0.2946, + "step": 27720 + }, + { + "epoch": 0.5549333133148162, + "grad_norm": 1.047858715057373, + "learning_rate": 4.355056866103241e-06, + "loss": 0.3061, + "step": 27721 + }, + { + "epoch": 0.5549533318319445, + "grad_norm": 1.921614646911621, + "learning_rate": 4.354735392656369e-06, + "loss": 0.7856, + "step": 27722 + }, + { + "epoch": 0.5549733503490729, + "grad_norm": 1.0548287630081177, + "learning_rate": 4.35441392192205e-06, + "loss": 0.2724, + "step": 27723 + }, + { + "epoch": 0.5549933688662012, + "grad_norm": 1.1216285228729248, + "learning_rate": 4.3540924539016335e-06, + "loss": 0.2868, + "step": 27724 + }, + { + "epoch": 0.5550133873833296, + "grad_norm": 1.1221469640731812, + "learning_rate": 4.3537709885964755e-06, + "loss": 0.2934, + "step": 27725 + }, + { + "epoch": 0.5550334059004579, + "grad_norm": 1.0825003385543823, + "learning_rate": 4.353449526007924e-06, + "loss": 0.2882, + "step": 27726 + }, + { + "epoch": 0.5550534244175863, + "grad_norm": 1.2308684587478638, + "learning_rate": 4.3531280661373335e-06, + "loss": 0.3219, + "step": 27727 + }, + { + "epoch": 0.5550734429347146, + "grad_norm": 1.1009674072265625, + "learning_rate": 4.35280660898605e-06, + "loss": 0.3265, + "step": 27728 + }, + { + "epoch": 0.5550934614518429, + "grad_norm": 1.1257506608963013, + "learning_rate": 4.352485154555431e-06, + "loss": 0.3157, + "step": 27729 + }, + { + "epoch": 0.5551134799689713, + "grad_norm": 1.1090593338012695, + "learning_rate": 4.352163702846826e-06, + "loss": 0.3414, + "step": 27730 + }, + { + "epoch": 0.5551334984860996, + "grad_norm": 1.940556526184082, + "learning_rate": 4.351842253861583e-06, + "loss": 0.7563, + "step": 27731 + }, + { + "epoch": 0.555153517003228, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.351520807601057e-06, + "loss": 0.3153, + "step": 27732 + }, + { + "epoch": 0.5551735355203563, + "grad_norm": 1.1727819442749023, + "learning_rate": 4.351199364066596e-06, + "loss": 0.3104, + "step": 27733 + }, + { + "epoch": 0.5551935540374847, + "grad_norm": 1.1143009662628174, + "learning_rate": 4.350877923259555e-06, + "loss": 0.2995, + "step": 27734 + }, + { + "epoch": 0.555213572554613, + "grad_norm": 1.2784874439239502, + "learning_rate": 4.3505564851812844e-06, + "loss": 0.3442, + "step": 27735 + }, + { + "epoch": 0.5552335910717413, + "grad_norm": 1.0789265632629395, + "learning_rate": 4.350235049833134e-06, + "loss": 0.2577, + "step": 27736 + }, + { + "epoch": 0.5552536095888697, + "grad_norm": 1.121732473373413, + "learning_rate": 4.3499136172164536e-06, + "loss": 0.3114, + "step": 27737 + }, + { + "epoch": 0.555273628105998, + "grad_norm": 1.1891175508499146, + "learning_rate": 4.349592187332599e-06, + "loss": 0.2889, + "step": 27738 + }, + { + "epoch": 0.5552936466231264, + "grad_norm": 1.1473385095596313, + "learning_rate": 4.349270760182917e-06, + "loss": 0.2576, + "step": 27739 + }, + { + "epoch": 0.5553136651402547, + "grad_norm": 1.2255219221115112, + "learning_rate": 4.348949335768762e-06, + "loss": 0.3512, + "step": 27740 + }, + { + "epoch": 0.5553336836573831, + "grad_norm": 1.3907935619354248, + "learning_rate": 4.348627914091484e-06, + "loss": 0.2856, + "step": 27741 + }, + { + "epoch": 0.5553537021745114, + "grad_norm": 1.187983751296997, + "learning_rate": 4.348306495152432e-06, + "loss": 0.2935, + "step": 27742 + }, + { + "epoch": 0.5553737206916398, + "grad_norm": 1.2562779188156128, + "learning_rate": 4.347985078952961e-06, + "loss": 0.3354, + "step": 27743 + }, + { + "epoch": 0.5553937392087681, + "grad_norm": 1.1851850748062134, + "learning_rate": 4.34766366549442e-06, + "loss": 0.3251, + "step": 27744 + }, + { + "epoch": 0.5554137577258964, + "grad_norm": 1.1427953243255615, + "learning_rate": 4.347342254778162e-06, + "loss": 0.332, + "step": 27745 + }, + { + "epoch": 0.5554337762430248, + "grad_norm": 1.0420887470245361, + "learning_rate": 4.347020846805533e-06, + "loss": 0.3244, + "step": 27746 + }, + { + "epoch": 0.5554537947601531, + "grad_norm": 1.1879048347473145, + "learning_rate": 4.346699441577891e-06, + "loss": 0.3116, + "step": 27747 + }, + { + "epoch": 0.5554738132772815, + "grad_norm": 1.2923747301101685, + "learning_rate": 4.346378039096584e-06, + "loss": 0.2896, + "step": 27748 + }, + { + "epoch": 0.5554938317944098, + "grad_norm": 1.0933841466903687, + "learning_rate": 4.346056639362962e-06, + "loss": 0.3095, + "step": 27749 + }, + { + "epoch": 0.5555138503115382, + "grad_norm": 1.1735044717788696, + "learning_rate": 4.3457352423783776e-06, + "loss": 0.3195, + "step": 27750 + }, + { + "epoch": 0.5555338688286665, + "grad_norm": 1.2453396320343018, + "learning_rate": 4.3454138481441804e-06, + "loss": 0.3085, + "step": 27751 + }, + { + "epoch": 0.5555538873457948, + "grad_norm": 1.1932536363601685, + "learning_rate": 4.345092456661723e-06, + "loss": 0.338, + "step": 27752 + }, + { + "epoch": 0.5555739058629232, + "grad_norm": 1.16865873336792, + "learning_rate": 4.344771067932358e-06, + "loss": 0.3127, + "step": 27753 + }, + { + "epoch": 0.5555939243800515, + "grad_norm": 1.105143666267395, + "learning_rate": 4.344449681957432e-06, + "loss": 0.3266, + "step": 27754 + }, + { + "epoch": 0.5556139428971799, + "grad_norm": 0.9820036888122559, + "learning_rate": 4.344128298738298e-06, + "loss": 0.3049, + "step": 27755 + }, + { + "epoch": 0.5556339614143082, + "grad_norm": 1.038866639137268, + "learning_rate": 4.3438069182763095e-06, + "loss": 0.2403, + "step": 27756 + }, + { + "epoch": 0.5556539799314366, + "grad_norm": 2.075556755065918, + "learning_rate": 4.343485540572814e-06, + "loss": 0.8136, + "step": 27757 + }, + { + "epoch": 0.5556739984485649, + "grad_norm": 1.89786958694458, + "learning_rate": 4.343164165629166e-06, + "loss": 0.7732, + "step": 27758 + }, + { + "epoch": 0.5556940169656933, + "grad_norm": 1.1340070962905884, + "learning_rate": 4.342842793446711e-06, + "loss": 0.3208, + "step": 27759 + }, + { + "epoch": 0.5557140354828216, + "grad_norm": 1.138674020767212, + "learning_rate": 4.342521424026806e-06, + "loss": 0.3288, + "step": 27760 + }, + { + "epoch": 0.5557340539999499, + "grad_norm": 1.7820361852645874, + "learning_rate": 4.3422000573708e-06, + "loss": 0.7963, + "step": 27761 + }, + { + "epoch": 0.5557540725170783, + "grad_norm": 1.1389148235321045, + "learning_rate": 4.341878693480044e-06, + "loss": 0.2616, + "step": 27762 + }, + { + "epoch": 0.5557740910342066, + "grad_norm": 1.002392292022705, + "learning_rate": 4.341557332355886e-06, + "loss": 0.2539, + "step": 27763 + }, + { + "epoch": 0.555794109551335, + "grad_norm": 0.9956494569778442, + "learning_rate": 4.341235973999679e-06, + "loss": 0.3238, + "step": 27764 + }, + { + "epoch": 0.5558141280684633, + "grad_norm": 1.1894577741622925, + "learning_rate": 4.340914618412776e-06, + "loss": 0.3323, + "step": 27765 + }, + { + "epoch": 0.5558341465855917, + "grad_norm": 1.0901609659194946, + "learning_rate": 4.3405932655965255e-06, + "loss": 0.3264, + "step": 27766 + }, + { + "epoch": 0.55585416510272, + "grad_norm": 1.1877028942108154, + "learning_rate": 4.3402719155522795e-06, + "loss": 0.2738, + "step": 27767 + }, + { + "epoch": 0.5558741836198483, + "grad_norm": 1.1758239269256592, + "learning_rate": 4.339950568281386e-06, + "loss": 0.3498, + "step": 27768 + }, + { + "epoch": 0.5558942021369767, + "grad_norm": 1.1059144735336304, + "learning_rate": 4.339629223785201e-06, + "loss": 0.2931, + "step": 27769 + }, + { + "epoch": 0.555914220654105, + "grad_norm": 1.056932806968689, + "learning_rate": 4.339307882065071e-06, + "loss": 0.2883, + "step": 27770 + }, + { + "epoch": 0.5559342391712334, + "grad_norm": 1.217367172241211, + "learning_rate": 4.33898654312235e-06, + "loss": 0.3508, + "step": 27771 + }, + { + "epoch": 0.5559542576883617, + "grad_norm": 1.1551252603530884, + "learning_rate": 4.338665206958387e-06, + "loss": 0.3013, + "step": 27772 + }, + { + "epoch": 0.5559742762054901, + "grad_norm": 1.0681824684143066, + "learning_rate": 4.338343873574531e-06, + "loss": 0.3399, + "step": 27773 + }, + { + "epoch": 0.5559942947226184, + "grad_norm": 1.1898810863494873, + "learning_rate": 4.338022542972138e-06, + "loss": 0.3038, + "step": 27774 + }, + { + "epoch": 0.5560143132397468, + "grad_norm": 1.1119112968444824, + "learning_rate": 4.337701215152555e-06, + "loss": 0.2651, + "step": 27775 + }, + { + "epoch": 0.5560343317568751, + "grad_norm": 1.16913902759552, + "learning_rate": 4.337379890117133e-06, + "loss": 0.2809, + "step": 27776 + }, + { + "epoch": 0.5560543502740034, + "grad_norm": 1.1064077615737915, + "learning_rate": 4.3370585678672225e-06, + "loss": 0.3064, + "step": 27777 + }, + { + "epoch": 0.5560743687911318, + "grad_norm": 1.1171585321426392, + "learning_rate": 4.336737248404175e-06, + "loss": 0.3324, + "step": 27778 + }, + { + "epoch": 0.5560943873082601, + "grad_norm": 1.0836294889450073, + "learning_rate": 4.336415931729343e-06, + "loss": 0.344, + "step": 27779 + }, + { + "epoch": 0.5561144058253885, + "grad_norm": 0.9848306179046631, + "learning_rate": 4.336094617844075e-06, + "loss": 0.2836, + "step": 27780 + }, + { + "epoch": 0.5561344243425168, + "grad_norm": 1.0970690250396729, + "learning_rate": 4.335773306749723e-06, + "loss": 0.3265, + "step": 27781 + }, + { + "epoch": 0.5561544428596452, + "grad_norm": 1.0218539237976074, + "learning_rate": 4.335451998447636e-06, + "loss": 0.322, + "step": 27782 + }, + { + "epoch": 0.5561744613767735, + "grad_norm": 1.1000443696975708, + "learning_rate": 4.335130692939165e-06, + "loss": 0.2952, + "step": 27783 + }, + { + "epoch": 0.5561944798939018, + "grad_norm": 1.1566981077194214, + "learning_rate": 4.334809390225663e-06, + "loss": 0.3363, + "step": 27784 + }, + { + "epoch": 0.5562144984110302, + "grad_norm": 1.1423243284225464, + "learning_rate": 4.33448809030848e-06, + "loss": 0.3232, + "step": 27785 + }, + { + "epoch": 0.5562345169281585, + "grad_norm": 1.0635626316070557, + "learning_rate": 4.334166793188963e-06, + "loss": 0.3385, + "step": 27786 + }, + { + "epoch": 0.5562545354452869, + "grad_norm": 1.2880704402923584, + "learning_rate": 4.3338454988684675e-06, + "loss": 0.3367, + "step": 27787 + }, + { + "epoch": 0.5562745539624152, + "grad_norm": 1.0234895944595337, + "learning_rate": 4.333524207348343e-06, + "loss": 0.3271, + "step": 27788 + }, + { + "epoch": 0.5562945724795436, + "grad_norm": 1.1497225761413574, + "learning_rate": 4.333202918629938e-06, + "loss": 0.3086, + "step": 27789 + }, + { + "epoch": 0.5563145909966719, + "grad_norm": 1.1164144277572632, + "learning_rate": 4.3328816327146035e-06, + "loss": 0.2936, + "step": 27790 + }, + { + "epoch": 0.5563346095138003, + "grad_norm": 1.1764624118804932, + "learning_rate": 4.332560349603691e-06, + "loss": 0.3469, + "step": 27791 + }, + { + "epoch": 0.5563546280309286, + "grad_norm": 1.188809871673584, + "learning_rate": 4.332239069298554e-06, + "loss": 0.3448, + "step": 27792 + }, + { + "epoch": 0.5563746465480569, + "grad_norm": 1.19402015209198, + "learning_rate": 4.331917791800539e-06, + "loss": 0.3489, + "step": 27793 + }, + { + "epoch": 0.5563946650651853, + "grad_norm": 0.9956686496734619, + "learning_rate": 4.331596517110997e-06, + "loss": 0.327, + "step": 27794 + }, + { + "epoch": 0.5564146835823136, + "grad_norm": 1.0842822790145874, + "learning_rate": 4.331275245231279e-06, + "loss": 0.3399, + "step": 27795 + }, + { + "epoch": 0.556434702099442, + "grad_norm": 1.1362732648849487, + "learning_rate": 4.330953976162736e-06, + "loss": 0.3132, + "step": 27796 + }, + { + "epoch": 0.5564547206165703, + "grad_norm": 1.8406500816345215, + "learning_rate": 4.3306327099067204e-06, + "loss": 0.8108, + "step": 27797 + }, + { + "epoch": 0.5564747391336987, + "grad_norm": 1.081568956375122, + "learning_rate": 4.33031144646458e-06, + "loss": 0.3249, + "step": 27798 + }, + { + "epoch": 0.556494757650827, + "grad_norm": 1.1635900735855103, + "learning_rate": 4.329990185837664e-06, + "loss": 0.3492, + "step": 27799 + }, + { + "epoch": 0.5565147761679553, + "grad_norm": 1.1653951406478882, + "learning_rate": 4.329668928027328e-06, + "loss": 0.3099, + "step": 27800 + }, + { + "epoch": 0.5565347946850837, + "grad_norm": 1.1077349185943604, + "learning_rate": 4.329347673034919e-06, + "loss": 0.3264, + "step": 27801 + }, + { + "epoch": 0.556554813202212, + "grad_norm": 1.0801502466201782, + "learning_rate": 4.3290264208617875e-06, + "loss": 0.3028, + "step": 27802 + }, + { + "epoch": 0.5565748317193404, + "grad_norm": 1.0730481147766113, + "learning_rate": 4.3287051715092845e-06, + "loss": 0.2945, + "step": 27803 + }, + { + "epoch": 0.5565948502364687, + "grad_norm": 1.0757468938827515, + "learning_rate": 4.328383924978759e-06, + "loss": 0.323, + "step": 27804 + }, + { + "epoch": 0.5566148687535971, + "grad_norm": 1.0694876909255981, + "learning_rate": 4.328062681271565e-06, + "loss": 0.2771, + "step": 27805 + }, + { + "epoch": 0.5566348872707254, + "grad_norm": 1.1317427158355713, + "learning_rate": 4.327741440389052e-06, + "loss": 0.3406, + "step": 27806 + }, + { + "epoch": 0.5566549057878538, + "grad_norm": 1.0999475717544556, + "learning_rate": 4.327420202332568e-06, + "loss": 0.3206, + "step": 27807 + }, + { + "epoch": 0.5566749243049821, + "grad_norm": 1.1395422220230103, + "learning_rate": 4.327098967103462e-06, + "loss": 0.3225, + "step": 27808 + }, + { + "epoch": 0.5566949428221104, + "grad_norm": 1.1901781558990479, + "learning_rate": 4.32677773470309e-06, + "loss": 0.3211, + "step": 27809 + }, + { + "epoch": 0.5567149613392388, + "grad_norm": 1.1885889768600464, + "learning_rate": 4.3264565051328e-06, + "loss": 0.3198, + "step": 27810 + }, + { + "epoch": 0.5567349798563671, + "grad_norm": 1.0843850374221802, + "learning_rate": 4.326135278393941e-06, + "loss": 0.3003, + "step": 27811 + }, + { + "epoch": 0.5567549983734955, + "grad_norm": 1.1282830238342285, + "learning_rate": 4.325814054487864e-06, + "loss": 0.2868, + "step": 27812 + }, + { + "epoch": 0.5567750168906238, + "grad_norm": 1.1157246828079224, + "learning_rate": 4.325492833415919e-06, + "loss": 0.3341, + "step": 27813 + }, + { + "epoch": 0.5567950354077522, + "grad_norm": 1.0044515132904053, + "learning_rate": 4.325171615179459e-06, + "loss": 0.2737, + "step": 27814 + }, + { + "epoch": 0.5568150539248805, + "grad_norm": 1.191785216331482, + "learning_rate": 4.32485039977983e-06, + "loss": 0.2474, + "step": 27815 + }, + { + "epoch": 0.5568350724420088, + "grad_norm": 1.034719705581665, + "learning_rate": 4.324529187218387e-06, + "loss": 0.3214, + "step": 27816 + }, + { + "epoch": 0.5568550909591372, + "grad_norm": 1.0521063804626465, + "learning_rate": 4.324207977496475e-06, + "loss": 0.2966, + "step": 27817 + }, + { + "epoch": 0.5568751094762655, + "grad_norm": 1.1555095911026, + "learning_rate": 4.323886770615449e-06, + "loss": 0.353, + "step": 27818 + }, + { + "epoch": 0.5568951279933939, + "grad_norm": 1.0678174495697021, + "learning_rate": 4.3235655665766586e-06, + "loss": 0.3459, + "step": 27819 + }, + { + "epoch": 0.5569151465105222, + "grad_norm": 1.0863170623779297, + "learning_rate": 4.323244365381452e-06, + "loss": 0.2827, + "step": 27820 + }, + { + "epoch": 0.5569351650276506, + "grad_norm": 1.134482502937317, + "learning_rate": 4.322923167031179e-06, + "loss": 0.3104, + "step": 27821 + }, + { + "epoch": 0.5569551835447789, + "grad_norm": 1.1502368450164795, + "learning_rate": 4.322601971527192e-06, + "loss": 0.3234, + "step": 27822 + }, + { + "epoch": 0.5569752020619073, + "grad_norm": 1.2672805786132812, + "learning_rate": 4.322280778870842e-06, + "loss": 0.3176, + "step": 27823 + }, + { + "epoch": 0.5569952205790356, + "grad_norm": 1.1459496021270752, + "learning_rate": 4.321959589063478e-06, + "loss": 0.3143, + "step": 27824 + }, + { + "epoch": 0.5570152390961639, + "grad_norm": 1.1750599145889282, + "learning_rate": 4.321638402106449e-06, + "loss": 0.3389, + "step": 27825 + }, + { + "epoch": 0.5570352576132923, + "grad_norm": 1.1158661842346191, + "learning_rate": 4.3213172180011045e-06, + "loss": 0.3099, + "step": 27826 + }, + { + "epoch": 0.5570552761304206, + "grad_norm": 1.057721734046936, + "learning_rate": 4.3209960367487985e-06, + "loss": 0.3285, + "step": 27827 + }, + { + "epoch": 0.557075294647549, + "grad_norm": 1.091469645500183, + "learning_rate": 4.3206748583508775e-06, + "loss": 0.2966, + "step": 27828 + }, + { + "epoch": 0.5570953131646773, + "grad_norm": 0.9914799332618713, + "learning_rate": 4.320353682808696e-06, + "loss": 0.2823, + "step": 27829 + }, + { + "epoch": 0.5571153316818057, + "grad_norm": 1.0943788290023804, + "learning_rate": 4.320032510123597e-06, + "loss": 0.3129, + "step": 27830 + }, + { + "epoch": 0.557135350198934, + "grad_norm": 1.1037278175354004, + "learning_rate": 4.319711340296938e-06, + "loss": 0.2941, + "step": 27831 + }, + { + "epoch": 0.5571553687160623, + "grad_norm": 1.1753668785095215, + "learning_rate": 4.319390173330067e-06, + "loss": 0.3126, + "step": 27832 + }, + { + "epoch": 0.5571753872331907, + "grad_norm": 1.0328378677368164, + "learning_rate": 4.319069009224332e-06, + "loss": 0.2877, + "step": 27833 + }, + { + "epoch": 0.557195405750319, + "grad_norm": 1.1098347902297974, + "learning_rate": 4.318747847981084e-06, + "loss": 0.3248, + "step": 27834 + }, + { + "epoch": 0.5572154242674474, + "grad_norm": 1.810876488685608, + "learning_rate": 4.318426689601672e-06, + "loss": 0.763, + "step": 27835 + }, + { + "epoch": 0.5572354427845757, + "grad_norm": 1.0852309465408325, + "learning_rate": 4.31810553408745e-06, + "loss": 0.304, + "step": 27836 + }, + { + "epoch": 0.5572554613017041, + "grad_norm": 1.1337703466415405, + "learning_rate": 4.3177843814397655e-06, + "loss": 0.3167, + "step": 27837 + }, + { + "epoch": 0.5572754798188324, + "grad_norm": 1.0666831731796265, + "learning_rate": 4.317463231659968e-06, + "loss": 0.301, + "step": 27838 + }, + { + "epoch": 0.5572954983359608, + "grad_norm": 1.0626239776611328, + "learning_rate": 4.317142084749407e-06, + "loss": 0.2745, + "step": 27839 + }, + { + "epoch": 0.5573155168530891, + "grad_norm": 1.1364946365356445, + "learning_rate": 4.316820940709435e-06, + "loss": 0.3048, + "step": 27840 + }, + { + "epoch": 0.5573355353702174, + "grad_norm": 1.2128980159759521, + "learning_rate": 4.3164997995414005e-06, + "loss": 0.3855, + "step": 27841 + }, + { + "epoch": 0.5573555538873458, + "grad_norm": 1.1193920373916626, + "learning_rate": 4.316178661246654e-06, + "loss": 0.2585, + "step": 27842 + }, + { + "epoch": 0.5573755724044741, + "grad_norm": 1.945818305015564, + "learning_rate": 4.315857525826544e-06, + "loss": 0.7452, + "step": 27843 + }, + { + "epoch": 0.5573955909216025, + "grad_norm": 1.4719479084014893, + "learning_rate": 4.3155363932824234e-06, + "loss": 0.304, + "step": 27844 + }, + { + "epoch": 0.5574156094387308, + "grad_norm": 1.1404609680175781, + "learning_rate": 4.31521526361564e-06, + "loss": 0.281, + "step": 27845 + }, + { + "epoch": 0.5574356279558592, + "grad_norm": 1.217674970626831, + "learning_rate": 4.314894136827545e-06, + "loss": 0.3103, + "step": 27846 + }, + { + "epoch": 0.5574556464729875, + "grad_norm": 1.1732789278030396, + "learning_rate": 4.3145730129194865e-06, + "loss": 0.3084, + "step": 27847 + }, + { + "epoch": 0.5574756649901158, + "grad_norm": 2.0098085403442383, + "learning_rate": 4.314251891892815e-06, + "loss": 0.7524, + "step": 27848 + }, + { + "epoch": 0.5574956835072442, + "grad_norm": 1.055611252784729, + "learning_rate": 4.313930773748881e-06, + "loss": 0.2866, + "step": 27849 + }, + { + "epoch": 0.5575157020243725, + "grad_norm": 1.0446072816848755, + "learning_rate": 4.313609658489036e-06, + "loss": 0.3016, + "step": 27850 + }, + { + "epoch": 0.5575357205415009, + "grad_norm": 1.1310827732086182, + "learning_rate": 4.3132885461146286e-06, + "loss": 0.3031, + "step": 27851 + }, + { + "epoch": 0.5575557390586292, + "grad_norm": 1.2238942384719849, + "learning_rate": 4.312967436627006e-06, + "loss": 0.3523, + "step": 27852 + }, + { + "epoch": 0.5575757575757576, + "grad_norm": 1.2367926836013794, + "learning_rate": 4.312646330027522e-06, + "loss": 0.313, + "step": 27853 + }, + { + "epoch": 0.5575957760928859, + "grad_norm": 1.102232575416565, + "learning_rate": 4.312325226317524e-06, + "loss": 0.3655, + "step": 27854 + }, + { + "epoch": 0.5576157946100143, + "grad_norm": 1.0264018774032593, + "learning_rate": 4.312004125498364e-06, + "loss": 0.2624, + "step": 27855 + }, + { + "epoch": 0.5576358131271426, + "grad_norm": 1.0405166149139404, + "learning_rate": 4.311683027571391e-06, + "loss": 0.2792, + "step": 27856 + }, + { + "epoch": 0.5576558316442709, + "grad_norm": 1.133635401725769, + "learning_rate": 4.311361932537951e-06, + "loss": 0.2768, + "step": 27857 + }, + { + "epoch": 0.5576758501613993, + "grad_norm": 1.0890158414840698, + "learning_rate": 4.311040840399401e-06, + "loss": 0.2683, + "step": 27858 + }, + { + "epoch": 0.5576958686785276, + "grad_norm": 1.2213495969772339, + "learning_rate": 4.310719751157086e-06, + "loss": 0.3206, + "step": 27859 + }, + { + "epoch": 0.557715887195656, + "grad_norm": 1.1734787225723267, + "learning_rate": 4.310398664812356e-06, + "loss": 0.2741, + "step": 27860 + }, + { + "epoch": 0.5577359057127843, + "grad_norm": 1.9668610095977783, + "learning_rate": 4.31007758136656e-06, + "loss": 0.8342, + "step": 27861 + }, + { + "epoch": 0.5577559242299127, + "grad_norm": 1.3577054738998413, + "learning_rate": 4.309756500821053e-06, + "loss": 0.3142, + "step": 27862 + }, + { + "epoch": 0.557775942747041, + "grad_norm": 1.1756891012191772, + "learning_rate": 4.30943542317718e-06, + "loss": 0.3304, + "step": 27863 + }, + { + "epoch": 0.5577959612641693, + "grad_norm": 1.058365821838379, + "learning_rate": 4.309114348436292e-06, + "loss": 0.2892, + "step": 27864 + }, + { + "epoch": 0.5578159797812977, + "grad_norm": 1.0202327966690063, + "learning_rate": 4.308793276599737e-06, + "loss": 0.3006, + "step": 27865 + }, + { + "epoch": 0.557835998298426, + "grad_norm": 1.118082880973816, + "learning_rate": 4.308472207668867e-06, + "loss": 0.3383, + "step": 27866 + }, + { + "epoch": 0.5578560168155544, + "grad_norm": 1.126816749572754, + "learning_rate": 4.308151141645031e-06, + "loss": 0.2978, + "step": 27867 + }, + { + "epoch": 0.5578760353326827, + "grad_norm": 1.178951621055603, + "learning_rate": 4.30783007852958e-06, + "loss": 0.2923, + "step": 27868 + }, + { + "epoch": 0.5578960538498111, + "grad_norm": 1.13689386844635, + "learning_rate": 4.307509018323862e-06, + "loss": 0.2738, + "step": 27869 + }, + { + "epoch": 0.5579160723669394, + "grad_norm": 1.3367669582366943, + "learning_rate": 4.307187961029225e-06, + "loss": 0.3202, + "step": 27870 + }, + { + "epoch": 0.5579360908840678, + "grad_norm": 2.1227307319641113, + "learning_rate": 4.306866906647023e-06, + "loss": 0.8115, + "step": 27871 + }, + { + "epoch": 0.5579561094011961, + "grad_norm": 1.3330936431884766, + "learning_rate": 4.306545855178603e-06, + "loss": 0.2521, + "step": 27872 + }, + { + "epoch": 0.5579761279183244, + "grad_norm": 1.9059396982192993, + "learning_rate": 4.306224806625314e-06, + "loss": 0.7624, + "step": 27873 + }, + { + "epoch": 0.5579961464354528, + "grad_norm": 1.1281107664108276, + "learning_rate": 4.305903760988506e-06, + "loss": 0.346, + "step": 27874 + }, + { + "epoch": 0.5580161649525811, + "grad_norm": 1.0500235557556152, + "learning_rate": 4.305582718269531e-06, + "loss": 0.2736, + "step": 27875 + }, + { + "epoch": 0.5580361834697095, + "grad_norm": 1.2023992538452148, + "learning_rate": 4.305261678469737e-06, + "loss": 0.3102, + "step": 27876 + }, + { + "epoch": 0.5580562019868378, + "grad_norm": 1.0505374670028687, + "learning_rate": 4.304940641590474e-06, + "loss": 0.2934, + "step": 27877 + }, + { + "epoch": 0.5580762205039662, + "grad_norm": 1.2268555164337158, + "learning_rate": 4.3046196076330884e-06, + "loss": 0.3448, + "step": 27878 + }, + { + "epoch": 0.5580962390210945, + "grad_norm": 1.1640022993087769, + "learning_rate": 4.304298576598933e-06, + "loss": 0.2576, + "step": 27879 + }, + { + "epoch": 0.5581162575382228, + "grad_norm": 1.5446672439575195, + "learning_rate": 4.303977548489357e-06, + "loss": 0.3194, + "step": 27880 + }, + { + "epoch": 0.5581362760553512, + "grad_norm": 1.3998403549194336, + "learning_rate": 4.30365652330571e-06, + "loss": 0.2716, + "step": 27881 + }, + { + "epoch": 0.5581562945724795, + "grad_norm": 1.1865755319595337, + "learning_rate": 4.303335501049342e-06, + "loss": 0.3392, + "step": 27882 + }, + { + "epoch": 0.5581763130896079, + "grad_norm": 1.9447896480560303, + "learning_rate": 4.303014481721599e-06, + "loss": 0.7504, + "step": 27883 + }, + { + "epoch": 0.5581963316067362, + "grad_norm": 1.239649772644043, + "learning_rate": 4.302693465323836e-06, + "loss": 0.3684, + "step": 27884 + }, + { + "epoch": 0.5582163501238646, + "grad_norm": 1.0579432249069214, + "learning_rate": 4.302372451857399e-06, + "loss": 0.338, + "step": 27885 + }, + { + "epoch": 0.5582363686409929, + "grad_norm": 0.9901071786880493, + "learning_rate": 4.302051441323636e-06, + "loss": 0.2448, + "step": 27886 + }, + { + "epoch": 0.5582563871581213, + "grad_norm": 1.168759822845459, + "learning_rate": 4.301730433723901e-06, + "loss": 0.329, + "step": 27887 + }, + { + "epoch": 0.5582764056752496, + "grad_norm": 1.0588798522949219, + "learning_rate": 4.301409429059538e-06, + "loss": 0.2615, + "step": 27888 + }, + { + "epoch": 0.5582964241923779, + "grad_norm": 1.1205730438232422, + "learning_rate": 4.301088427331902e-06, + "loss": 0.2889, + "step": 27889 + }, + { + "epoch": 0.5583164427095063, + "grad_norm": 1.0507827997207642, + "learning_rate": 4.30076742854234e-06, + "loss": 0.2942, + "step": 27890 + }, + { + "epoch": 0.5583364612266346, + "grad_norm": 1.1392011642456055, + "learning_rate": 4.3004464326922e-06, + "loss": 0.3428, + "step": 27891 + }, + { + "epoch": 0.558356479743763, + "grad_norm": 2.0181705951690674, + "learning_rate": 4.300125439782832e-06, + "loss": 0.7865, + "step": 27892 + }, + { + "epoch": 0.5583764982608913, + "grad_norm": 1.1672265529632568, + "learning_rate": 4.299804449815587e-06, + "loss": 0.2814, + "step": 27893 + }, + { + "epoch": 0.5583965167780197, + "grad_norm": 1.0519425868988037, + "learning_rate": 4.2994834627918145e-06, + "loss": 0.3225, + "step": 27894 + }, + { + "epoch": 0.558416535295148, + "grad_norm": 1.1650837659835815, + "learning_rate": 4.299162478712862e-06, + "loss": 0.2959, + "step": 27895 + }, + { + "epoch": 0.5584365538122763, + "grad_norm": 2.0829625129699707, + "learning_rate": 4.29884149758008e-06, + "loss": 0.7839, + "step": 27896 + }, + { + "epoch": 0.5584565723294047, + "grad_norm": 1.1797263622283936, + "learning_rate": 4.298520519394816e-06, + "loss": 0.3423, + "step": 27897 + }, + { + "epoch": 0.558476590846533, + "grad_norm": 1.067657709121704, + "learning_rate": 4.298199544158422e-06, + "loss": 0.3212, + "step": 27898 + }, + { + "epoch": 0.5584966093636614, + "grad_norm": 1.0851889848709106, + "learning_rate": 4.297878571872245e-06, + "loss": 0.2977, + "step": 27899 + }, + { + "epoch": 0.5585166278807897, + "grad_norm": 1.063919186592102, + "learning_rate": 4.297557602537637e-06, + "loss": 0.3369, + "step": 27900 + }, + { + "epoch": 0.5585366463979181, + "grad_norm": 1.188554286956787, + "learning_rate": 4.2972366361559435e-06, + "loss": 0.3085, + "step": 27901 + }, + { + "epoch": 0.5585566649150464, + "grad_norm": 1.1296615600585938, + "learning_rate": 4.296915672728517e-06, + "loss": 0.3094, + "step": 27902 + }, + { + "epoch": 0.5585766834321748, + "grad_norm": 1.1392163038253784, + "learning_rate": 4.296594712256708e-06, + "loss": 0.2792, + "step": 27903 + }, + { + "epoch": 0.5585967019493031, + "grad_norm": 1.15923273563385, + "learning_rate": 4.2962737547418595e-06, + "loss": 0.3555, + "step": 27904 + }, + { + "epoch": 0.5586167204664314, + "grad_norm": 0.9878535270690918, + "learning_rate": 4.2959528001853265e-06, + "loss": 0.3217, + "step": 27905 + }, + { + "epoch": 0.5586367389835598, + "grad_norm": 1.8062723875045776, + "learning_rate": 4.295631848588456e-06, + "loss": 0.8086, + "step": 27906 + }, + { + "epoch": 0.5586567575006881, + "grad_norm": 1.0736244916915894, + "learning_rate": 4.2953108999525995e-06, + "loss": 0.2562, + "step": 27907 + }, + { + "epoch": 0.5586767760178165, + "grad_norm": 1.300748586654663, + "learning_rate": 4.294989954279103e-06, + "loss": 0.3178, + "step": 27908 + }, + { + "epoch": 0.5586967945349448, + "grad_norm": 1.128258466720581, + "learning_rate": 4.294669011569318e-06, + "loss": 0.3488, + "step": 27909 + }, + { + "epoch": 0.5587168130520732, + "grad_norm": 1.8877062797546387, + "learning_rate": 4.29434807182459e-06, + "loss": 0.8126, + "step": 27910 + }, + { + "epoch": 0.5587368315692015, + "grad_norm": 1.6582869291305542, + "learning_rate": 4.294027135046273e-06, + "loss": 0.7585, + "step": 27911 + }, + { + "epoch": 0.5587568500863298, + "grad_norm": 1.0551542043685913, + "learning_rate": 4.293706201235713e-06, + "loss": 0.3011, + "step": 27912 + }, + { + "epoch": 0.5587768686034582, + "grad_norm": 1.0288296937942505, + "learning_rate": 4.293385270394261e-06, + "loss": 0.2918, + "step": 27913 + }, + { + "epoch": 0.5587968871205865, + "grad_norm": 1.0724905729293823, + "learning_rate": 4.293064342523264e-06, + "loss": 0.3139, + "step": 27914 + }, + { + "epoch": 0.5588169056377149, + "grad_norm": 1.0652800798416138, + "learning_rate": 4.292743417624073e-06, + "loss": 0.2954, + "step": 27915 + }, + { + "epoch": 0.5588369241548432, + "grad_norm": 1.1918022632598877, + "learning_rate": 4.292422495698038e-06, + "loss": 0.3563, + "step": 27916 + }, + { + "epoch": 0.5588569426719716, + "grad_norm": 1.0114545822143555, + "learning_rate": 4.292101576746504e-06, + "loss": 0.2911, + "step": 27917 + }, + { + "epoch": 0.5588769611890999, + "grad_norm": 1.030128002166748, + "learning_rate": 4.291780660770825e-06, + "loss": 0.2537, + "step": 27918 + }, + { + "epoch": 0.5588969797062283, + "grad_norm": 1.0906113386154175, + "learning_rate": 4.291459747772345e-06, + "loss": 0.3638, + "step": 27919 + }, + { + "epoch": 0.5589169982233566, + "grad_norm": 1.0669234991073608, + "learning_rate": 4.291138837752418e-06, + "loss": 0.323, + "step": 27920 + }, + { + "epoch": 0.5589370167404849, + "grad_norm": 1.1608818769454956, + "learning_rate": 4.290817930712391e-06, + "loss": 0.3066, + "step": 27921 + }, + { + "epoch": 0.5589570352576133, + "grad_norm": 1.0572547912597656, + "learning_rate": 4.290497026653613e-06, + "loss": 0.2789, + "step": 27922 + }, + { + "epoch": 0.5589770537747416, + "grad_norm": 1.1103166341781616, + "learning_rate": 4.29017612557743e-06, + "loss": 0.3238, + "step": 27923 + }, + { + "epoch": 0.55899707229187, + "grad_norm": 1.3505523204803467, + "learning_rate": 4.289855227485196e-06, + "loss": 0.3202, + "step": 27924 + }, + { + "epoch": 0.5590170908089983, + "grad_norm": 1.0687932968139648, + "learning_rate": 4.289534332378257e-06, + "loss": 0.2971, + "step": 27925 + }, + { + "epoch": 0.5590371093261267, + "grad_norm": 1.0516921281814575, + "learning_rate": 4.289213440257964e-06, + "loss": 0.3175, + "step": 27926 + }, + { + "epoch": 0.559057127843255, + "grad_norm": 1.163757085800171, + "learning_rate": 4.2888925511256644e-06, + "loss": 0.2897, + "step": 27927 + }, + { + "epoch": 0.5590771463603833, + "grad_norm": 1.1704885959625244, + "learning_rate": 4.288571664982705e-06, + "loss": 0.3612, + "step": 27928 + }, + { + "epoch": 0.5590971648775117, + "grad_norm": 1.0230711698532104, + "learning_rate": 4.28825078183044e-06, + "loss": 0.2766, + "step": 27929 + }, + { + "epoch": 0.55911718339464, + "grad_norm": 1.1308097839355469, + "learning_rate": 4.287929901670214e-06, + "loss": 0.3106, + "step": 27930 + }, + { + "epoch": 0.5591372019117684, + "grad_norm": 1.135143518447876, + "learning_rate": 4.287609024503379e-06, + "loss": 0.2926, + "step": 27931 + }, + { + "epoch": 0.5591572204288967, + "grad_norm": 1.23176109790802, + "learning_rate": 4.2872881503312805e-06, + "loss": 0.3326, + "step": 27932 + }, + { + "epoch": 0.5591772389460251, + "grad_norm": 1.1624770164489746, + "learning_rate": 4.28696727915527e-06, + "loss": 0.2831, + "step": 27933 + }, + { + "epoch": 0.5591972574631534, + "grad_norm": 1.0000375509262085, + "learning_rate": 4.286646410976697e-06, + "loss": 0.252, + "step": 27934 + }, + { + "epoch": 0.5592172759802818, + "grad_norm": 1.1135574579238892, + "learning_rate": 4.286325545796909e-06, + "loss": 0.2911, + "step": 27935 + }, + { + "epoch": 0.5592372944974101, + "grad_norm": 1.1896274089813232, + "learning_rate": 4.286004683617252e-06, + "loss": 0.2974, + "step": 27936 + }, + { + "epoch": 0.5592573130145384, + "grad_norm": 1.0936988592147827, + "learning_rate": 4.28568382443908e-06, + "loss": 0.2588, + "step": 27937 + }, + { + "epoch": 0.5592773315316668, + "grad_norm": 1.2030619382858276, + "learning_rate": 4.285362968263737e-06, + "loss": 0.3227, + "step": 27938 + }, + { + "epoch": 0.5592973500487951, + "grad_norm": 1.1426116228103638, + "learning_rate": 4.285042115092577e-06, + "loss": 0.2618, + "step": 27939 + }, + { + "epoch": 0.5593173685659235, + "grad_norm": 1.1274582147598267, + "learning_rate": 4.284721264926946e-06, + "loss": 0.3067, + "step": 27940 + }, + { + "epoch": 0.5593373870830518, + "grad_norm": 1.0592060089111328, + "learning_rate": 4.28440041776819e-06, + "loss": 0.3559, + "step": 27941 + }, + { + "epoch": 0.5593574056001802, + "grad_norm": 1.2200491428375244, + "learning_rate": 4.284079573617665e-06, + "loss": 0.2623, + "step": 27942 + }, + { + "epoch": 0.5593774241173085, + "grad_norm": 1.2036625146865845, + "learning_rate": 4.283758732476711e-06, + "loss": 0.3269, + "step": 27943 + }, + { + "epoch": 0.5593974426344368, + "grad_norm": 1.0882465839385986, + "learning_rate": 4.283437894346685e-06, + "loss": 0.3012, + "step": 27944 + }, + { + "epoch": 0.5594174611515652, + "grad_norm": 1.0874017477035522, + "learning_rate": 4.283117059228928e-06, + "loss": 0.2996, + "step": 27945 + }, + { + "epoch": 0.5594374796686935, + "grad_norm": 1.3204997777938843, + "learning_rate": 4.282796227124796e-06, + "loss": 0.2967, + "step": 27946 + }, + { + "epoch": 0.5594574981858219, + "grad_norm": 0.9880557656288147, + "learning_rate": 4.282475398035633e-06, + "loss": 0.2689, + "step": 27947 + }, + { + "epoch": 0.5594775167029502, + "grad_norm": 1.0017197132110596, + "learning_rate": 4.28215457196279e-06, + "loss": 0.283, + "step": 27948 + }, + { + "epoch": 0.5594975352200786, + "grad_norm": 0.9581779837608337, + "learning_rate": 4.281833748907613e-06, + "loss": 0.2512, + "step": 27949 + }, + { + "epoch": 0.5595175537372069, + "grad_norm": 1.1612342596054077, + "learning_rate": 4.281512928871453e-06, + "loss": 0.3145, + "step": 27950 + }, + { + "epoch": 0.5595375722543353, + "grad_norm": 1.2055742740631104, + "learning_rate": 4.281192111855658e-06, + "loss": 0.3121, + "step": 27951 + }, + { + "epoch": 0.5595575907714636, + "grad_norm": 1.1192127466201782, + "learning_rate": 4.280871297861578e-06, + "loss": 0.3042, + "step": 27952 + }, + { + "epoch": 0.5595776092885919, + "grad_norm": 1.0965847969055176, + "learning_rate": 4.280550486890559e-06, + "loss": 0.32, + "step": 27953 + }, + { + "epoch": 0.5595976278057203, + "grad_norm": 1.0447108745574951, + "learning_rate": 4.280229678943949e-06, + "loss": 0.2898, + "step": 27954 + }, + { + "epoch": 0.5596176463228486, + "grad_norm": 1.121224045753479, + "learning_rate": 4.279908874023101e-06, + "loss": 0.2649, + "step": 27955 + }, + { + "epoch": 0.559637664839977, + "grad_norm": 1.025654911994934, + "learning_rate": 4.279588072129361e-06, + "loss": 0.2959, + "step": 27956 + }, + { + "epoch": 0.5596576833571053, + "grad_norm": 1.0954352617263794, + "learning_rate": 4.279267273264079e-06, + "loss": 0.3505, + "step": 27957 + }, + { + "epoch": 0.5596777018742337, + "grad_norm": 1.1873202323913574, + "learning_rate": 4.2789464774286e-06, + "loss": 0.326, + "step": 27958 + }, + { + "epoch": 0.559697720391362, + "grad_norm": 1.0297819375991821, + "learning_rate": 4.278625684624275e-06, + "loss": 0.2966, + "step": 27959 + }, + { + "epoch": 0.5597177389084903, + "grad_norm": 1.1464238166809082, + "learning_rate": 4.278304894852453e-06, + "loss": 0.321, + "step": 27960 + }, + { + "epoch": 0.5597377574256187, + "grad_norm": 1.1660611629486084, + "learning_rate": 4.277984108114482e-06, + "loss": 0.2668, + "step": 27961 + }, + { + "epoch": 0.559757775942747, + "grad_norm": 2.0340514183044434, + "learning_rate": 4.277663324411711e-06, + "loss": 0.7855, + "step": 27962 + }, + { + "epoch": 0.5597777944598754, + "grad_norm": 0.9801258444786072, + "learning_rate": 4.277342543745486e-06, + "loss": 0.287, + "step": 27963 + }, + { + "epoch": 0.5597978129770037, + "grad_norm": 1.3086342811584473, + "learning_rate": 4.2770217661171585e-06, + "loss": 0.2829, + "step": 27964 + }, + { + "epoch": 0.5598178314941321, + "grad_norm": 1.178669810295105, + "learning_rate": 4.276700991528077e-06, + "loss": 0.3481, + "step": 27965 + }, + { + "epoch": 0.5598378500112604, + "grad_norm": 1.090050220489502, + "learning_rate": 4.2763802199795874e-06, + "loss": 0.2961, + "step": 27966 + }, + { + "epoch": 0.5598578685283888, + "grad_norm": 1.1300342082977295, + "learning_rate": 4.276059451473039e-06, + "loss": 0.3492, + "step": 27967 + }, + { + "epoch": 0.5598778870455171, + "grad_norm": 1.1050407886505127, + "learning_rate": 4.275738686009783e-06, + "loss": 0.2834, + "step": 27968 + }, + { + "epoch": 0.5598979055626454, + "grad_norm": 1.2009949684143066, + "learning_rate": 4.275417923591164e-06, + "loss": 0.3425, + "step": 27969 + }, + { + "epoch": 0.5599179240797738, + "grad_norm": 0.9565103650093079, + "learning_rate": 4.275097164218534e-06, + "loss": 0.2621, + "step": 27970 + }, + { + "epoch": 0.5599379425969021, + "grad_norm": 1.1556479930877686, + "learning_rate": 4.274776407893238e-06, + "loss": 0.2692, + "step": 27971 + }, + { + "epoch": 0.5599579611140305, + "grad_norm": 1.2687402963638306, + "learning_rate": 4.274455654616625e-06, + "loss": 0.2896, + "step": 27972 + }, + { + "epoch": 0.5599779796311588, + "grad_norm": 1.2358381748199463, + "learning_rate": 4.274134904390047e-06, + "loss": 0.3183, + "step": 27973 + }, + { + "epoch": 0.5599979981482872, + "grad_norm": 1.4557868242263794, + "learning_rate": 4.273814157214848e-06, + "loss": 0.3354, + "step": 27974 + }, + { + "epoch": 0.5600180166654155, + "grad_norm": 1.8685353994369507, + "learning_rate": 4.273493413092379e-06, + "loss": 0.8032, + "step": 27975 + }, + { + "epoch": 0.5600380351825438, + "grad_norm": 1.1216224431991577, + "learning_rate": 4.273172672023985e-06, + "loss": 0.3104, + "step": 27976 + }, + { + "epoch": 0.5600580536996722, + "grad_norm": 0.9957671761512756, + "learning_rate": 4.272851934011018e-06, + "loss": 0.2981, + "step": 27977 + }, + { + "epoch": 0.5600780722168005, + "grad_norm": 1.144799828529358, + "learning_rate": 4.2725311990548254e-06, + "loss": 0.3052, + "step": 27978 + }, + { + "epoch": 0.5600980907339289, + "grad_norm": 1.9093343019485474, + "learning_rate": 4.272210467156756e-06, + "loss": 0.7659, + "step": 27979 + }, + { + "epoch": 0.5601181092510572, + "grad_norm": 1.0408058166503906, + "learning_rate": 4.271889738318155e-06, + "loss": 0.2903, + "step": 27980 + }, + { + "epoch": 0.5601381277681856, + "grad_norm": 1.0162700414657593, + "learning_rate": 4.2715690125403746e-06, + "loss": 0.2796, + "step": 27981 + }, + { + "epoch": 0.5601581462853139, + "grad_norm": 0.991349458694458, + "learning_rate": 4.27124828982476e-06, + "loss": 0.3014, + "step": 27982 + }, + { + "epoch": 0.5601781648024423, + "grad_norm": 1.1099117994308472, + "learning_rate": 4.270927570172662e-06, + "loss": 0.2713, + "step": 27983 + }, + { + "epoch": 0.5601981833195706, + "grad_norm": 1.1636724472045898, + "learning_rate": 4.270606853585427e-06, + "loss": 0.3385, + "step": 27984 + }, + { + "epoch": 0.5602182018366989, + "grad_norm": 1.173279881477356, + "learning_rate": 4.270286140064402e-06, + "loss": 0.3223, + "step": 27985 + }, + { + "epoch": 0.5602382203538273, + "grad_norm": 1.1163026094436646, + "learning_rate": 4.269965429610941e-06, + "loss": 0.3305, + "step": 27986 + }, + { + "epoch": 0.5602582388709556, + "grad_norm": 1.022943139076233, + "learning_rate": 4.269644722226386e-06, + "loss": 0.2554, + "step": 27987 + }, + { + "epoch": 0.560278257388084, + "grad_norm": 1.0356276035308838, + "learning_rate": 4.269324017912087e-06, + "loss": 0.3075, + "step": 27988 + }, + { + "epoch": 0.5602982759052123, + "grad_norm": 1.9074368476867676, + "learning_rate": 4.269003316669392e-06, + "loss": 0.7865, + "step": 27989 + }, + { + "epoch": 0.5603182944223407, + "grad_norm": 1.1598474979400635, + "learning_rate": 4.268682618499651e-06, + "loss": 0.3313, + "step": 27990 + }, + { + "epoch": 0.560338312939469, + "grad_norm": 1.0652369260787964, + "learning_rate": 4.2683619234042114e-06, + "loss": 0.3139, + "step": 27991 + }, + { + "epoch": 0.5603583314565973, + "grad_norm": 1.0346260070800781, + "learning_rate": 4.2680412313844215e-06, + "loss": 0.3057, + "step": 27992 + }, + { + "epoch": 0.5603783499737257, + "grad_norm": 1.06075119972229, + "learning_rate": 4.267720542441626e-06, + "loss": 0.3155, + "step": 27993 + }, + { + "epoch": 0.560398368490854, + "grad_norm": 1.1043516397476196, + "learning_rate": 4.267399856577177e-06, + "loss": 0.322, + "step": 27994 + }, + { + "epoch": 0.5604183870079824, + "grad_norm": 0.9680062532424927, + "learning_rate": 4.267079173792421e-06, + "loss": 0.2811, + "step": 27995 + }, + { + "epoch": 0.5604384055251107, + "grad_norm": 1.0098230838775635, + "learning_rate": 4.266758494088707e-06, + "loss": 0.2785, + "step": 27996 + }, + { + "epoch": 0.5604584240422391, + "grad_norm": 1.8550723791122437, + "learning_rate": 4.266437817467384e-06, + "loss": 0.7566, + "step": 27997 + }, + { + "epoch": 0.5604784425593674, + "grad_norm": 1.794395923614502, + "learning_rate": 4.2661171439297946e-06, + "loss": 0.7298, + "step": 27998 + }, + { + "epoch": 0.5604984610764958, + "grad_norm": 1.0576927661895752, + "learning_rate": 4.265796473477294e-06, + "loss": 0.2903, + "step": 27999 + }, + { + "epoch": 0.5605184795936241, + "grad_norm": 1.23858642578125, + "learning_rate": 4.265475806111227e-06, + "loss": 0.312, + "step": 28000 + }, + { + "epoch": 0.5605384981107524, + "grad_norm": 1.2762953042984009, + "learning_rate": 4.26515514183294e-06, + "loss": 0.3053, + "step": 28001 + }, + { + "epoch": 0.5605585166278808, + "grad_norm": 2.321697473526001, + "learning_rate": 4.264834480643785e-06, + "loss": 0.8057, + "step": 28002 + }, + { + "epoch": 0.5605785351450091, + "grad_norm": 1.129327654838562, + "learning_rate": 4.264513822545104e-06, + "loss": 0.2966, + "step": 28003 + }, + { + "epoch": 0.5605985536621375, + "grad_norm": 1.0243130922317505, + "learning_rate": 4.264193167538251e-06, + "loss": 0.2884, + "step": 28004 + }, + { + "epoch": 0.5606185721792658, + "grad_norm": 1.211300015449524, + "learning_rate": 4.263872515624572e-06, + "loss": 0.3244, + "step": 28005 + }, + { + "epoch": 0.5606385906963942, + "grad_norm": 1.0476962327957153, + "learning_rate": 4.263551866805413e-06, + "loss": 0.3497, + "step": 28006 + }, + { + "epoch": 0.5606586092135225, + "grad_norm": 1.9047774076461792, + "learning_rate": 4.263231221082124e-06, + "loss": 0.8165, + "step": 28007 + }, + { + "epoch": 0.5606786277306508, + "grad_norm": 1.232234239578247, + "learning_rate": 4.262910578456052e-06, + "loss": 0.3579, + "step": 28008 + }, + { + "epoch": 0.5606986462477792, + "grad_norm": 1.4001117944717407, + "learning_rate": 4.262589938928547e-06, + "loss": 0.3938, + "step": 28009 + }, + { + "epoch": 0.5607186647649075, + "grad_norm": 1.1898128986358643, + "learning_rate": 4.262269302500954e-06, + "loss": 0.285, + "step": 28010 + }, + { + "epoch": 0.5607386832820359, + "grad_norm": 1.1595797538757324, + "learning_rate": 4.2619486691746235e-06, + "loss": 0.2868, + "step": 28011 + }, + { + "epoch": 0.5607587017991642, + "grad_norm": 1.0236073732376099, + "learning_rate": 4.2616280389509e-06, + "loss": 0.2792, + "step": 28012 + }, + { + "epoch": 0.5607787203162926, + "grad_norm": 1.0006871223449707, + "learning_rate": 4.261307411831134e-06, + "loss": 0.3045, + "step": 28013 + }, + { + "epoch": 0.5607987388334209, + "grad_norm": 1.0862621068954468, + "learning_rate": 4.260986787816672e-06, + "loss": 0.2969, + "step": 28014 + }, + { + "epoch": 0.5608187573505493, + "grad_norm": 1.0473815202713013, + "learning_rate": 4.260666166908865e-06, + "loss": 0.3279, + "step": 28015 + }, + { + "epoch": 0.5608387758676776, + "grad_norm": 1.0189080238342285, + "learning_rate": 4.260345549109054e-06, + "loss": 0.2782, + "step": 28016 + }, + { + "epoch": 0.5608587943848059, + "grad_norm": 1.0711750984191895, + "learning_rate": 4.260024934418594e-06, + "loss": 0.3266, + "step": 28017 + }, + { + "epoch": 0.5608788129019343, + "grad_norm": 1.0420676469802856, + "learning_rate": 4.259704322838831e-06, + "loss": 0.2584, + "step": 28018 + }, + { + "epoch": 0.5608988314190626, + "grad_norm": 1.2279316186904907, + "learning_rate": 4.25938371437111e-06, + "loss": 0.3206, + "step": 28019 + }, + { + "epoch": 0.560918849936191, + "grad_norm": 1.0789220333099365, + "learning_rate": 4.25906310901678e-06, + "loss": 0.284, + "step": 28020 + }, + { + "epoch": 0.5609388684533193, + "grad_norm": 0.9980494976043701, + "learning_rate": 4.25874250677719e-06, + "loss": 0.2942, + "step": 28021 + }, + { + "epoch": 0.5609588869704477, + "grad_norm": 1.2196805477142334, + "learning_rate": 4.258421907653687e-06, + "loss": 0.3436, + "step": 28022 + }, + { + "epoch": 0.560978905487576, + "grad_norm": 1.1337276697158813, + "learning_rate": 4.25810131164762e-06, + "loss": 0.294, + "step": 28023 + }, + { + "epoch": 0.5609989240047043, + "grad_norm": 1.0455163717269897, + "learning_rate": 4.257780718760335e-06, + "loss": 0.3293, + "step": 28024 + }, + { + "epoch": 0.5610189425218327, + "grad_norm": 1.0948022603988647, + "learning_rate": 4.257460128993178e-06, + "loss": 0.2441, + "step": 28025 + }, + { + "epoch": 0.561038961038961, + "grad_norm": 1.0557764768600464, + "learning_rate": 4.2571395423475e-06, + "loss": 0.3284, + "step": 28026 + }, + { + "epoch": 0.5610589795560894, + "grad_norm": 1.0493096113204956, + "learning_rate": 4.2568189588246476e-06, + "loss": 0.2671, + "step": 28027 + }, + { + "epoch": 0.5610789980732177, + "grad_norm": 1.0580177307128906, + "learning_rate": 4.256498378425969e-06, + "loss": 0.2769, + "step": 28028 + }, + { + "epoch": 0.5610990165903461, + "grad_norm": 1.2419755458831787, + "learning_rate": 4.256177801152808e-06, + "loss": 0.298, + "step": 28029 + }, + { + "epoch": 0.5611190351074744, + "grad_norm": 1.1344748735427856, + "learning_rate": 4.25585722700652e-06, + "loss": 0.3271, + "step": 28030 + }, + { + "epoch": 0.5611390536246028, + "grad_norm": 1.1711971759796143, + "learning_rate": 4.255536655988447e-06, + "loss": 0.3379, + "step": 28031 + }, + { + "epoch": 0.5611590721417311, + "grad_norm": 1.0972756147384644, + "learning_rate": 4.255216088099936e-06, + "loss": 0.3091, + "step": 28032 + }, + { + "epoch": 0.5611790906588594, + "grad_norm": 1.1534968614578247, + "learning_rate": 4.254895523342338e-06, + "loss": 0.2952, + "step": 28033 + }, + { + "epoch": 0.5611991091759878, + "grad_norm": 1.2396185398101807, + "learning_rate": 4.2545749617169965e-06, + "loss": 0.3, + "step": 28034 + }, + { + "epoch": 0.5612191276931161, + "grad_norm": 1.1250253915786743, + "learning_rate": 4.254254403225263e-06, + "loss": 0.3527, + "step": 28035 + }, + { + "epoch": 0.5612391462102445, + "grad_norm": 1.2492308616638184, + "learning_rate": 4.253933847868485e-06, + "loss": 0.358, + "step": 28036 + }, + { + "epoch": 0.5612591647273728, + "grad_norm": 1.1251367330551147, + "learning_rate": 4.253613295648007e-06, + "loss": 0.3457, + "step": 28037 + }, + { + "epoch": 0.5612791832445012, + "grad_norm": 1.1821995973587036, + "learning_rate": 4.253292746565176e-06, + "loss": 0.3029, + "step": 28038 + }, + { + "epoch": 0.5612992017616295, + "grad_norm": 1.886661171913147, + "learning_rate": 4.252972200621345e-06, + "loss": 0.7479, + "step": 28039 + }, + { + "epoch": 0.5613192202787578, + "grad_norm": 1.135360598564148, + "learning_rate": 4.2526516578178555e-06, + "loss": 0.2971, + "step": 28040 + }, + { + "epoch": 0.5613392387958862, + "grad_norm": 1.1369459629058838, + "learning_rate": 4.25233111815606e-06, + "loss": 0.3241, + "step": 28041 + }, + { + "epoch": 0.5613592573130145, + "grad_norm": 1.1268054246902466, + "learning_rate": 4.252010581637303e-06, + "loss": 0.2791, + "step": 28042 + }, + { + "epoch": 0.5613792758301429, + "grad_norm": 1.019622802734375, + "learning_rate": 4.251690048262931e-06, + "loss": 0.2992, + "step": 28043 + }, + { + "epoch": 0.5613992943472712, + "grad_norm": 0.9783368706703186, + "learning_rate": 4.251369518034294e-06, + "loss": 0.2823, + "step": 28044 + }, + { + "epoch": 0.5614193128643996, + "grad_norm": 1.1210564374923706, + "learning_rate": 4.251048990952738e-06, + "loss": 0.3058, + "step": 28045 + }, + { + "epoch": 0.5614393313815279, + "grad_norm": 0.9951102137565613, + "learning_rate": 4.250728467019612e-06, + "loss": 0.2779, + "step": 28046 + }, + { + "epoch": 0.5614593498986563, + "grad_norm": 1.1720874309539795, + "learning_rate": 4.25040794623626e-06, + "loss": 0.2961, + "step": 28047 + }, + { + "epoch": 0.5614793684157846, + "grad_norm": 1.172363519668579, + "learning_rate": 4.2500874286040335e-06, + "loss": 0.3088, + "step": 28048 + }, + { + "epoch": 0.5614993869329129, + "grad_norm": 1.1658369302749634, + "learning_rate": 4.24976691412428e-06, + "loss": 0.3217, + "step": 28049 + }, + { + "epoch": 0.5615194054500413, + "grad_norm": 1.0495885610580444, + "learning_rate": 4.249446402798342e-06, + "loss": 0.2689, + "step": 28050 + }, + { + "epoch": 0.5615394239671696, + "grad_norm": 1.1162617206573486, + "learning_rate": 4.249125894627569e-06, + "loss": 0.3164, + "step": 28051 + }, + { + "epoch": 0.561559442484298, + "grad_norm": 1.8720042705535889, + "learning_rate": 4.248805389613312e-06, + "loss": 0.7714, + "step": 28052 + }, + { + "epoch": 0.5615794610014263, + "grad_norm": 1.1769963502883911, + "learning_rate": 4.248484887756914e-06, + "loss": 0.2685, + "step": 28053 + }, + { + "epoch": 0.5615994795185547, + "grad_norm": 1.084510326385498, + "learning_rate": 4.248164389059724e-06, + "loss": 0.3042, + "step": 28054 + }, + { + "epoch": 0.561619498035683, + "grad_norm": 1.051929235458374, + "learning_rate": 4.2478438935230895e-06, + "loss": 0.3204, + "step": 28055 + }, + { + "epoch": 0.5616395165528113, + "grad_norm": 1.3564780950546265, + "learning_rate": 4.247523401148355e-06, + "loss": 0.274, + "step": 28056 + }, + { + "epoch": 0.5616595350699397, + "grad_norm": 1.1322580575942993, + "learning_rate": 4.247202911936873e-06, + "loss": 0.2443, + "step": 28057 + }, + { + "epoch": 0.561679553587068, + "grad_norm": 1.0290812253952026, + "learning_rate": 4.246882425889987e-06, + "loss": 0.2394, + "step": 28058 + }, + { + "epoch": 0.5616995721041964, + "grad_norm": 1.119397521018982, + "learning_rate": 4.246561943009046e-06, + "loss": 0.3409, + "step": 28059 + }, + { + "epoch": 0.5617195906213247, + "grad_norm": 1.1194874048233032, + "learning_rate": 4.2462414632953945e-06, + "loss": 0.3298, + "step": 28060 + }, + { + "epoch": 0.5617396091384531, + "grad_norm": 1.1145904064178467, + "learning_rate": 4.245920986750384e-06, + "loss": 0.3443, + "step": 28061 + }, + { + "epoch": 0.5617596276555814, + "grad_norm": 1.0886948108673096, + "learning_rate": 4.2456005133753585e-06, + "loss": 0.3091, + "step": 28062 + }, + { + "epoch": 0.5617796461727097, + "grad_norm": 1.1823281049728394, + "learning_rate": 4.245280043171667e-06, + "loss": 0.2738, + "step": 28063 + }, + { + "epoch": 0.5617996646898381, + "grad_norm": 1.0836819410324097, + "learning_rate": 4.244959576140654e-06, + "loss": 0.2864, + "step": 28064 + }, + { + "epoch": 0.5618196832069664, + "grad_norm": 0.9546984434127808, + "learning_rate": 4.244639112283669e-06, + "loss": 0.3058, + "step": 28065 + }, + { + "epoch": 0.5618397017240948, + "grad_norm": 1.8192098140716553, + "learning_rate": 4.2443186516020585e-06, + "loss": 0.8445, + "step": 28066 + }, + { + "epoch": 0.5618597202412231, + "grad_norm": 1.2467669248580933, + "learning_rate": 4.243998194097171e-06, + "loss": 0.3028, + "step": 28067 + }, + { + "epoch": 0.5618797387583515, + "grad_norm": 1.230229139328003, + "learning_rate": 4.2436777397703514e-06, + "loss": 0.3656, + "step": 28068 + }, + { + "epoch": 0.5618997572754798, + "grad_norm": 1.0779697895050049, + "learning_rate": 4.243357288622947e-06, + "loss": 0.2986, + "step": 28069 + }, + { + "epoch": 0.5619197757926082, + "grad_norm": 1.0582369565963745, + "learning_rate": 4.243036840656307e-06, + "loss": 0.3085, + "step": 28070 + }, + { + "epoch": 0.5619397943097365, + "grad_norm": 1.270503282546997, + "learning_rate": 4.242716395871777e-06, + "loss": 0.335, + "step": 28071 + }, + { + "epoch": 0.5619598128268648, + "grad_norm": 1.0596972703933716, + "learning_rate": 4.242395954270704e-06, + "loss": 0.2589, + "step": 28072 + }, + { + "epoch": 0.5619798313439932, + "grad_norm": 1.3079357147216797, + "learning_rate": 4.242075515854437e-06, + "loss": 0.3121, + "step": 28073 + }, + { + "epoch": 0.5619998498611215, + "grad_norm": 1.1721370220184326, + "learning_rate": 4.241755080624318e-06, + "loss": 0.3096, + "step": 28074 + }, + { + "epoch": 0.5620198683782499, + "grad_norm": 1.0913420915603638, + "learning_rate": 4.2414346485817e-06, + "loss": 0.3014, + "step": 28075 + }, + { + "epoch": 0.5620398868953782, + "grad_norm": 1.1786727905273438, + "learning_rate": 4.241114219727927e-06, + "loss": 0.3099, + "step": 28076 + }, + { + "epoch": 0.5620599054125066, + "grad_norm": 1.0770082473754883, + "learning_rate": 4.240793794064346e-06, + "loss": 0.2853, + "step": 28077 + }, + { + "epoch": 0.5620799239296349, + "grad_norm": 1.1583260297775269, + "learning_rate": 4.240473371592304e-06, + "loss": 0.3157, + "step": 28078 + }, + { + "epoch": 0.5620999424467632, + "grad_norm": 1.1611504554748535, + "learning_rate": 4.24015295231315e-06, + "loss": 0.2914, + "step": 28079 + }, + { + "epoch": 0.5621199609638916, + "grad_norm": 1.0740280151367188, + "learning_rate": 4.2398325362282295e-06, + "loss": 0.3564, + "step": 28080 + }, + { + "epoch": 0.5621399794810199, + "grad_norm": 1.1160759925842285, + "learning_rate": 4.239512123338889e-06, + "loss": 0.2604, + "step": 28081 + }, + { + "epoch": 0.5621599979981483, + "grad_norm": 1.308445692062378, + "learning_rate": 4.239191713646474e-06, + "loss": 0.3208, + "step": 28082 + }, + { + "epoch": 0.5621800165152766, + "grad_norm": 1.0703802108764648, + "learning_rate": 4.238871307152336e-06, + "loss": 0.3297, + "step": 28083 + }, + { + "epoch": 0.562200035032405, + "grad_norm": 2.14394474029541, + "learning_rate": 4.238550903857818e-06, + "loss": 0.8459, + "step": 28084 + }, + { + "epoch": 0.5622200535495333, + "grad_norm": 1.2708568572998047, + "learning_rate": 4.238230503764269e-06, + "loss": 0.3086, + "step": 28085 + }, + { + "epoch": 0.5622400720666617, + "grad_norm": 1.0012071132659912, + "learning_rate": 4.237910106873035e-06, + "loss": 0.3059, + "step": 28086 + }, + { + "epoch": 0.56226009058379, + "grad_norm": 1.221521019935608, + "learning_rate": 4.2375897131854606e-06, + "loss": 0.306, + "step": 28087 + }, + { + "epoch": 0.5622801091009183, + "grad_norm": 1.06985604763031, + "learning_rate": 4.237269322702897e-06, + "loss": 0.3029, + "step": 28088 + }, + { + "epoch": 0.5623001276180467, + "grad_norm": 1.066977620124817, + "learning_rate": 4.23694893542669e-06, + "loss": 0.3219, + "step": 28089 + }, + { + "epoch": 0.562320146135175, + "grad_norm": 1.1390432119369507, + "learning_rate": 4.236628551358183e-06, + "loss": 0.2733, + "step": 28090 + }, + { + "epoch": 0.5623401646523034, + "grad_norm": 1.243048071861267, + "learning_rate": 4.236308170498726e-06, + "loss": 0.3464, + "step": 28091 + }, + { + "epoch": 0.5623601831694317, + "grad_norm": 1.046822428703308, + "learning_rate": 4.235987792849666e-06, + "loss": 0.2878, + "step": 28092 + }, + { + "epoch": 0.5623802016865601, + "grad_norm": 1.9770569801330566, + "learning_rate": 4.2356674184123485e-06, + "loss": 0.8041, + "step": 28093 + }, + { + "epoch": 0.5624002202036884, + "grad_norm": 2.008713483810425, + "learning_rate": 4.235347047188122e-06, + "loss": 0.8757, + "step": 28094 + }, + { + "epoch": 0.5624202387208167, + "grad_norm": 1.1975059509277344, + "learning_rate": 4.235026679178331e-06, + "loss": 0.324, + "step": 28095 + }, + { + "epoch": 0.5624402572379451, + "grad_norm": 2.013375997543335, + "learning_rate": 4.234706314384321e-06, + "loss": 0.8029, + "step": 28096 + }, + { + "epoch": 0.5624602757550734, + "grad_norm": 1.1306202411651611, + "learning_rate": 4.234385952807442e-06, + "loss": 0.3079, + "step": 28097 + }, + { + "epoch": 0.5624802942722018, + "grad_norm": 1.1182290315628052, + "learning_rate": 4.234065594449042e-06, + "loss": 0.3318, + "step": 28098 + }, + { + "epoch": 0.5625003127893301, + "grad_norm": 1.0392361879348755, + "learning_rate": 4.233745239310464e-06, + "loss": 0.265, + "step": 28099 + }, + { + "epoch": 0.5625203313064585, + "grad_norm": 1.0968213081359863, + "learning_rate": 4.233424887393055e-06, + "loss": 0.3203, + "step": 28100 + }, + { + "epoch": 0.5625403498235868, + "grad_norm": 1.0167537927627563, + "learning_rate": 4.233104538698164e-06, + "loss": 0.2806, + "step": 28101 + }, + { + "epoch": 0.5625603683407152, + "grad_norm": 1.267134428024292, + "learning_rate": 4.232784193227137e-06, + "loss": 0.3205, + "step": 28102 + }, + { + "epoch": 0.5625803868578435, + "grad_norm": 1.095365285873413, + "learning_rate": 4.232463850981319e-06, + "loss": 0.3501, + "step": 28103 + }, + { + "epoch": 0.5626004053749718, + "grad_norm": 1.0899993181228638, + "learning_rate": 4.232143511962059e-06, + "loss": 0.2829, + "step": 28104 + }, + { + "epoch": 0.5626204238921002, + "grad_norm": 1.124023199081421, + "learning_rate": 4.231823176170699e-06, + "loss": 0.3528, + "step": 28105 + }, + { + "epoch": 0.5626404424092285, + "grad_norm": 1.1934438943862915, + "learning_rate": 4.2315028436085935e-06, + "loss": 0.2663, + "step": 28106 + }, + { + "epoch": 0.5626604609263569, + "grad_norm": 1.4975872039794922, + "learning_rate": 4.231182514277083e-06, + "loss": 0.3255, + "step": 28107 + }, + { + "epoch": 0.5626804794434852, + "grad_norm": 1.199055552482605, + "learning_rate": 4.230862188177516e-06, + "loss": 0.3138, + "step": 28108 + }, + { + "epoch": 0.5627004979606136, + "grad_norm": 1.1353965997695923, + "learning_rate": 4.230541865311237e-06, + "loss": 0.2739, + "step": 28109 + }, + { + "epoch": 0.5627205164777419, + "grad_norm": 1.134428858757019, + "learning_rate": 4.230221545679596e-06, + "loss": 0.3203, + "step": 28110 + }, + { + "epoch": 0.5627405349948702, + "grad_norm": 1.0991661548614502, + "learning_rate": 4.229901229283938e-06, + "loss": 0.2815, + "step": 28111 + }, + { + "epoch": 0.5627605535119986, + "grad_norm": 1.0428329706192017, + "learning_rate": 4.22958091612561e-06, + "loss": 0.3076, + "step": 28112 + }, + { + "epoch": 0.5627805720291269, + "grad_norm": 1.1248927116394043, + "learning_rate": 4.229260606205956e-06, + "loss": 0.2759, + "step": 28113 + }, + { + "epoch": 0.5628005905462553, + "grad_norm": 1.0351985692977905, + "learning_rate": 4.228940299526327e-06, + "loss": 0.3115, + "step": 28114 + }, + { + "epoch": 0.5628206090633836, + "grad_norm": 1.0188871622085571, + "learning_rate": 4.228619996088067e-06, + "loss": 0.2829, + "step": 28115 + }, + { + "epoch": 0.562840627580512, + "grad_norm": 1.0614513158798218, + "learning_rate": 4.22829969589252e-06, + "loss": 0.3238, + "step": 28116 + }, + { + "epoch": 0.5628606460976403, + "grad_norm": 1.431684136390686, + "learning_rate": 4.2279793989410375e-06, + "loss": 0.3142, + "step": 28117 + }, + { + "epoch": 0.5628806646147687, + "grad_norm": 1.1888447999954224, + "learning_rate": 4.2276591052349606e-06, + "loss": 0.3345, + "step": 28118 + }, + { + "epoch": 0.562900683131897, + "grad_norm": 1.1030049324035645, + "learning_rate": 4.227338814775641e-06, + "loss": 0.3129, + "step": 28119 + }, + { + "epoch": 0.5629207016490253, + "grad_norm": 1.1584116220474243, + "learning_rate": 4.227018527564423e-06, + "loss": 0.3242, + "step": 28120 + }, + { + "epoch": 0.5629407201661537, + "grad_norm": 1.0533392429351807, + "learning_rate": 4.226698243602652e-06, + "loss": 0.279, + "step": 28121 + }, + { + "epoch": 0.562960738683282, + "grad_norm": 1.010802149772644, + "learning_rate": 4.226377962891674e-06, + "loss": 0.2801, + "step": 28122 + }, + { + "epoch": 0.5629807572004104, + "grad_norm": 1.1717592477798462, + "learning_rate": 4.226057685432838e-06, + "loss": 0.3055, + "step": 28123 + }, + { + "epoch": 0.5630007757175387, + "grad_norm": 1.1451830863952637, + "learning_rate": 4.225737411227489e-06, + "loss": 0.3372, + "step": 28124 + }, + { + "epoch": 0.5630207942346671, + "grad_norm": 1.1641020774841309, + "learning_rate": 4.225417140276974e-06, + "loss": 0.3216, + "step": 28125 + }, + { + "epoch": 0.5630408127517954, + "grad_norm": 1.1890885829925537, + "learning_rate": 4.225096872582638e-06, + "loss": 0.3056, + "step": 28126 + }, + { + "epoch": 0.5630608312689237, + "grad_norm": 1.1051520109176636, + "learning_rate": 4.2247766081458265e-06, + "loss": 0.3414, + "step": 28127 + }, + { + "epoch": 0.5630808497860521, + "grad_norm": 1.1614351272583008, + "learning_rate": 4.224456346967888e-06, + "loss": 0.3258, + "step": 28128 + }, + { + "epoch": 0.5631008683031804, + "grad_norm": 1.1142297983169556, + "learning_rate": 4.224136089050169e-06, + "loss": 0.3012, + "step": 28129 + }, + { + "epoch": 0.5631208868203088, + "grad_norm": 2.020972728729248, + "learning_rate": 4.223815834394015e-06, + "loss": 0.8312, + "step": 28130 + }, + { + "epoch": 0.5631409053374371, + "grad_norm": 1.0125049352645874, + "learning_rate": 4.223495583000769e-06, + "loss": 0.2704, + "step": 28131 + }, + { + "epoch": 0.5631609238545655, + "grad_norm": 1.2545355558395386, + "learning_rate": 4.223175334871784e-06, + "loss": 0.3187, + "step": 28132 + }, + { + "epoch": 0.5631809423716938, + "grad_norm": 1.0841783285140991, + "learning_rate": 4.222855090008403e-06, + "loss": 0.2877, + "step": 28133 + }, + { + "epoch": 0.5632009608888222, + "grad_norm": 1.2835679054260254, + "learning_rate": 4.2225348484119714e-06, + "loss": 0.3048, + "step": 28134 + }, + { + "epoch": 0.5632209794059505, + "grad_norm": 1.0815579891204834, + "learning_rate": 4.222214610083834e-06, + "loss": 0.2844, + "step": 28135 + }, + { + "epoch": 0.5632409979230788, + "grad_norm": 1.1771878004074097, + "learning_rate": 4.22189437502534e-06, + "loss": 0.3456, + "step": 28136 + }, + { + "epoch": 0.5632610164402072, + "grad_norm": 1.9187766313552856, + "learning_rate": 4.221574143237835e-06, + "loss": 0.745, + "step": 28137 + }, + { + "epoch": 0.5632810349573355, + "grad_norm": 1.2074099779129028, + "learning_rate": 4.221253914722666e-06, + "loss": 0.2939, + "step": 28138 + }, + { + "epoch": 0.5633010534744639, + "grad_norm": 1.2026561498641968, + "learning_rate": 4.220933689481177e-06, + "loss": 0.2973, + "step": 28139 + }, + { + "epoch": 0.5633210719915922, + "grad_norm": 1.135593056678772, + "learning_rate": 4.220613467514714e-06, + "loss": 0.3224, + "step": 28140 + }, + { + "epoch": 0.5633410905087206, + "grad_norm": 1.2144416570663452, + "learning_rate": 4.220293248824625e-06, + "loss": 0.3083, + "step": 28141 + }, + { + "epoch": 0.5633611090258489, + "grad_norm": 1.0486806631088257, + "learning_rate": 4.219973033412255e-06, + "loss": 0.2919, + "step": 28142 + }, + { + "epoch": 0.5633811275429772, + "grad_norm": 1.166624903678894, + "learning_rate": 4.219652821278952e-06, + "loss": 0.3188, + "step": 28143 + }, + { + "epoch": 0.5634011460601056, + "grad_norm": 1.9093120098114014, + "learning_rate": 4.219332612426057e-06, + "loss": 0.8183, + "step": 28144 + }, + { + "epoch": 0.5634211645772339, + "grad_norm": 1.2146364450454712, + "learning_rate": 4.219012406854924e-06, + "loss": 0.3467, + "step": 28145 + }, + { + "epoch": 0.5634411830943623, + "grad_norm": 1.251218557357788, + "learning_rate": 4.218692204566893e-06, + "loss": 0.3558, + "step": 28146 + }, + { + "epoch": 0.5634612016114906, + "grad_norm": 1.166273593902588, + "learning_rate": 4.218372005563313e-06, + "loss": 0.3126, + "step": 28147 + }, + { + "epoch": 0.563481220128619, + "grad_norm": 1.0848978757858276, + "learning_rate": 4.218051809845527e-06, + "loss": 0.2927, + "step": 28148 + }, + { + "epoch": 0.5635012386457473, + "grad_norm": 1.0931413173675537, + "learning_rate": 4.217731617414882e-06, + "loss": 0.301, + "step": 28149 + }, + { + "epoch": 0.5635212571628757, + "grad_norm": 0.9866724610328674, + "learning_rate": 4.217411428272728e-06, + "loss": 0.2493, + "step": 28150 + }, + { + "epoch": 0.563541275680004, + "grad_norm": 1.189100980758667, + "learning_rate": 4.217091242420407e-06, + "loss": 0.2927, + "step": 28151 + }, + { + "epoch": 0.5635612941971323, + "grad_norm": 1.090728998184204, + "learning_rate": 4.216771059859266e-06, + "loss": 0.2879, + "step": 28152 + }, + { + "epoch": 0.5635813127142607, + "grad_norm": 1.11062490940094, + "learning_rate": 4.216450880590649e-06, + "loss": 0.3161, + "step": 28153 + }, + { + "epoch": 0.563601331231389, + "grad_norm": 1.0980167388916016, + "learning_rate": 4.216130704615905e-06, + "loss": 0.286, + "step": 28154 + }, + { + "epoch": 0.5636213497485174, + "grad_norm": 1.0552589893341064, + "learning_rate": 4.215810531936379e-06, + "loss": 0.29, + "step": 28155 + }, + { + "epoch": 0.5636413682656457, + "grad_norm": 1.0206654071807861, + "learning_rate": 4.2154903625534176e-06, + "loss": 0.2872, + "step": 28156 + }, + { + "epoch": 0.5636613867827741, + "grad_norm": 1.1001423597335815, + "learning_rate": 4.215170196468366e-06, + "loss": 0.326, + "step": 28157 + }, + { + "epoch": 0.5636814052999024, + "grad_norm": 1.933736801147461, + "learning_rate": 4.214850033682567e-06, + "loss": 0.7758, + "step": 28158 + }, + { + "epoch": 0.5637014238170307, + "grad_norm": 1.0300596952438354, + "learning_rate": 4.214529874197373e-06, + "loss": 0.3084, + "step": 28159 + }, + { + "epoch": 0.5637214423341591, + "grad_norm": 2.158005475997925, + "learning_rate": 4.214209718014125e-06, + "loss": 0.8562, + "step": 28160 + }, + { + "epoch": 0.5637414608512874, + "grad_norm": 1.1436313390731812, + "learning_rate": 4.21388956513417e-06, + "loss": 0.2912, + "step": 28161 + }, + { + "epoch": 0.5637614793684158, + "grad_norm": 1.076026439666748, + "learning_rate": 4.213569415558852e-06, + "loss": 0.2889, + "step": 28162 + }, + { + "epoch": 0.5637814978855441, + "grad_norm": 1.2296257019042969, + "learning_rate": 4.213249269289523e-06, + "loss": 0.3167, + "step": 28163 + }, + { + "epoch": 0.5638015164026725, + "grad_norm": 1.1406025886535645, + "learning_rate": 4.212929126327524e-06, + "loss": 0.3241, + "step": 28164 + }, + { + "epoch": 0.5638215349198008, + "grad_norm": 1.0202298164367676, + "learning_rate": 4.2126089866742004e-06, + "loss": 0.2796, + "step": 28165 + }, + { + "epoch": 0.5638415534369292, + "grad_norm": 1.1854743957519531, + "learning_rate": 4.212288850330898e-06, + "loss": 0.3285, + "step": 28166 + }, + { + "epoch": 0.5638615719540575, + "grad_norm": 0.9977365732192993, + "learning_rate": 4.211968717298966e-06, + "loss": 0.2569, + "step": 28167 + }, + { + "epoch": 0.5638815904711858, + "grad_norm": 1.1330453157424927, + "learning_rate": 4.211648587579746e-06, + "loss": 0.2808, + "step": 28168 + }, + { + "epoch": 0.5639016089883142, + "grad_norm": 1.849272608757019, + "learning_rate": 4.2113284611745875e-06, + "loss": 0.8153, + "step": 28169 + }, + { + "epoch": 0.5639216275054425, + "grad_norm": 1.1057729721069336, + "learning_rate": 4.211008338084834e-06, + "loss": 0.3414, + "step": 28170 + }, + { + "epoch": 0.5639416460225709, + "grad_norm": 1.2344568967819214, + "learning_rate": 4.2106882183118305e-06, + "loss": 0.3353, + "step": 28171 + }, + { + "epoch": 0.5639616645396992, + "grad_norm": 1.0822733640670776, + "learning_rate": 4.210368101856925e-06, + "loss": 0.3174, + "step": 28172 + }, + { + "epoch": 0.5639816830568276, + "grad_norm": 1.015960931777954, + "learning_rate": 4.210047988721463e-06, + "loss": 0.3479, + "step": 28173 + }, + { + "epoch": 0.5640017015739559, + "grad_norm": 1.1092495918273926, + "learning_rate": 4.209727878906787e-06, + "loss": 0.3366, + "step": 28174 + }, + { + "epoch": 0.5640217200910842, + "grad_norm": 1.4951776266098022, + "learning_rate": 4.2094077724142445e-06, + "loss": 0.2928, + "step": 28175 + }, + { + "epoch": 0.5640417386082126, + "grad_norm": 1.2149677276611328, + "learning_rate": 4.209087669245184e-06, + "loss": 0.319, + "step": 28176 + }, + { + "epoch": 0.5640617571253409, + "grad_norm": 1.0450012683868408, + "learning_rate": 4.208767569400948e-06, + "loss": 0.294, + "step": 28177 + }, + { + "epoch": 0.5640817756424693, + "grad_norm": 0.9905558228492737, + "learning_rate": 4.208447472882883e-06, + "loss": 0.3422, + "step": 28178 + }, + { + "epoch": 0.5641017941595976, + "grad_norm": 1.1633983850479126, + "learning_rate": 4.208127379692334e-06, + "loss": 0.3251, + "step": 28179 + }, + { + "epoch": 0.564121812676726, + "grad_norm": 1.0849553346633911, + "learning_rate": 4.2078072898306465e-06, + "loss": 0.2741, + "step": 28180 + }, + { + "epoch": 0.5641418311938543, + "grad_norm": 0.9847909808158875, + "learning_rate": 4.207487203299167e-06, + "loss": 0.2583, + "step": 28181 + }, + { + "epoch": 0.5641618497109827, + "grad_norm": 1.1080349683761597, + "learning_rate": 4.207167120099242e-06, + "loss": 0.2953, + "step": 28182 + }, + { + "epoch": 0.564181868228111, + "grad_norm": 0.9923799633979797, + "learning_rate": 4.206847040232215e-06, + "loss": 0.3176, + "step": 28183 + }, + { + "epoch": 0.5642018867452393, + "grad_norm": 1.128637671470642, + "learning_rate": 4.206526963699431e-06, + "loss": 0.3194, + "step": 28184 + }, + { + "epoch": 0.5642219052623677, + "grad_norm": 1.8795180320739746, + "learning_rate": 4.206206890502239e-06, + "loss": 0.7959, + "step": 28185 + }, + { + "epoch": 0.564241923779496, + "grad_norm": 1.1097807884216309, + "learning_rate": 4.205886820641982e-06, + "loss": 0.2957, + "step": 28186 + }, + { + "epoch": 0.5642619422966244, + "grad_norm": 1.0499736070632935, + "learning_rate": 4.205566754120005e-06, + "loss": 0.3032, + "step": 28187 + }, + { + "epoch": 0.5642819608137527, + "grad_norm": 1.276227593421936, + "learning_rate": 4.205246690937656e-06, + "loss": 0.2778, + "step": 28188 + }, + { + "epoch": 0.5643019793308811, + "grad_norm": 1.0444179773330688, + "learning_rate": 4.204926631096276e-06, + "loss": 0.2838, + "step": 28189 + }, + { + "epoch": 0.5643219978480094, + "grad_norm": 1.0438951253890991, + "learning_rate": 4.204606574597216e-06, + "loss": 0.3181, + "step": 28190 + }, + { + "epoch": 0.5643420163651377, + "grad_norm": 1.153246521949768, + "learning_rate": 4.2042865214418186e-06, + "loss": 0.27, + "step": 28191 + }, + { + "epoch": 0.5643620348822661, + "grad_norm": 1.0534658432006836, + "learning_rate": 4.203966471631427e-06, + "loss": 0.2864, + "step": 28192 + }, + { + "epoch": 0.5643820533993944, + "grad_norm": 1.0378297567367554, + "learning_rate": 4.2036464251673915e-06, + "loss": 0.31, + "step": 28193 + }, + { + "epoch": 0.5644020719165228, + "grad_norm": 1.7913908958435059, + "learning_rate": 4.2033263820510535e-06, + "loss": 0.7229, + "step": 28194 + }, + { + "epoch": 0.5644220904336511, + "grad_norm": 1.0757547616958618, + "learning_rate": 4.203006342283762e-06, + "loss": 0.3274, + "step": 28195 + }, + { + "epoch": 0.5644421089507795, + "grad_norm": 1.1208550930023193, + "learning_rate": 4.202686305866859e-06, + "loss": 0.3429, + "step": 28196 + }, + { + "epoch": 0.5644621274679078, + "grad_norm": 1.151741623878479, + "learning_rate": 4.20236627280169e-06, + "loss": 0.3329, + "step": 28197 + }, + { + "epoch": 0.5644821459850362, + "grad_norm": 1.0125973224639893, + "learning_rate": 4.202046243089603e-06, + "loss": 0.3084, + "step": 28198 + }, + { + "epoch": 0.5645021645021645, + "grad_norm": 1.10518479347229, + "learning_rate": 4.2017262167319425e-06, + "loss": 0.2858, + "step": 28199 + }, + { + "epoch": 0.5645221830192928, + "grad_norm": 1.0357072353363037, + "learning_rate": 4.201406193730052e-06, + "loss": 0.3135, + "step": 28200 + }, + { + "epoch": 0.5645422015364212, + "grad_norm": 1.2397010326385498, + "learning_rate": 4.201086174085279e-06, + "loss": 0.2999, + "step": 28201 + }, + { + "epoch": 0.5645622200535495, + "grad_norm": 1.0784672498703003, + "learning_rate": 4.200766157798966e-06, + "loss": 0.3047, + "step": 28202 + }, + { + "epoch": 0.5645822385706779, + "grad_norm": 1.0982152223587036, + "learning_rate": 4.200446144872462e-06, + "loss": 0.2953, + "step": 28203 + }, + { + "epoch": 0.5646022570878062, + "grad_norm": 1.339834451675415, + "learning_rate": 4.20012613530711e-06, + "loss": 0.326, + "step": 28204 + }, + { + "epoch": 0.5646222756049346, + "grad_norm": 1.041116714477539, + "learning_rate": 4.199806129104254e-06, + "loss": 0.2861, + "step": 28205 + }, + { + "epoch": 0.5646422941220629, + "grad_norm": 1.1737993955612183, + "learning_rate": 4.199486126265242e-06, + "loss": 0.2899, + "step": 28206 + }, + { + "epoch": 0.5646623126391912, + "grad_norm": 1.3636720180511475, + "learning_rate": 4.199166126791417e-06, + "loss": 0.3663, + "step": 28207 + }, + { + "epoch": 0.5646823311563196, + "grad_norm": 1.15692937374115, + "learning_rate": 4.198846130684128e-06, + "loss": 0.3586, + "step": 28208 + }, + { + "epoch": 0.5647023496734479, + "grad_norm": 1.0643677711486816, + "learning_rate": 4.198526137944715e-06, + "loss": 0.2973, + "step": 28209 + }, + { + "epoch": 0.5647223681905763, + "grad_norm": 1.1151299476623535, + "learning_rate": 4.198206148574528e-06, + "loss": 0.3172, + "step": 28210 + }, + { + "epoch": 0.5647423867077046, + "grad_norm": 1.394967794418335, + "learning_rate": 4.197886162574907e-06, + "loss": 0.2977, + "step": 28211 + }, + { + "epoch": 0.564762405224833, + "grad_norm": 1.163225531578064, + "learning_rate": 4.197566179947201e-06, + "loss": 0.2829, + "step": 28212 + }, + { + "epoch": 0.5647824237419613, + "grad_norm": 1.0820319652557373, + "learning_rate": 4.197246200692754e-06, + "loss": 0.291, + "step": 28213 + }, + { + "epoch": 0.5648024422590897, + "grad_norm": 1.0554940700531006, + "learning_rate": 4.196926224812913e-06, + "loss": 0.3031, + "step": 28214 + }, + { + "epoch": 0.564822460776218, + "grad_norm": 1.1162595748901367, + "learning_rate": 4.196606252309017e-06, + "loss": 0.3181, + "step": 28215 + }, + { + "epoch": 0.5648424792933463, + "grad_norm": 1.124068021774292, + "learning_rate": 4.196286283182419e-06, + "loss": 0.267, + "step": 28216 + }, + { + "epoch": 0.5648624978104747, + "grad_norm": 1.0461429357528687, + "learning_rate": 4.195966317434461e-06, + "loss": 0.3328, + "step": 28217 + }, + { + "epoch": 0.564882516327603, + "grad_norm": 2.1460037231445312, + "learning_rate": 4.195646355066485e-06, + "loss": 0.7607, + "step": 28218 + }, + { + "epoch": 0.5649025348447314, + "grad_norm": 1.0314747095108032, + "learning_rate": 4.195326396079841e-06, + "loss": 0.3318, + "step": 28219 + }, + { + "epoch": 0.5649225533618597, + "grad_norm": 1.1769236326217651, + "learning_rate": 4.195006440475869e-06, + "loss": 0.295, + "step": 28220 + }, + { + "epoch": 0.5649425718789881, + "grad_norm": 2.1703851222991943, + "learning_rate": 4.194686488255919e-06, + "loss": 0.8354, + "step": 28221 + }, + { + "epoch": 0.5649625903961164, + "grad_norm": 1.9132368564605713, + "learning_rate": 4.194366539421334e-06, + "loss": 0.8054, + "step": 28222 + }, + { + "epoch": 0.5649826089132447, + "grad_norm": 1.8295378684997559, + "learning_rate": 4.1940465939734585e-06, + "loss": 0.7621, + "step": 28223 + }, + { + "epoch": 0.5650026274303731, + "grad_norm": 1.1521313190460205, + "learning_rate": 4.193726651913635e-06, + "loss": 0.3334, + "step": 28224 + }, + { + "epoch": 0.5650226459475014, + "grad_norm": 1.1710689067840576, + "learning_rate": 4.193406713243213e-06, + "loss": 0.2672, + "step": 28225 + }, + { + "epoch": 0.5650426644646298, + "grad_norm": 1.0534859895706177, + "learning_rate": 4.193086777963535e-06, + "loss": 0.2767, + "step": 28226 + }, + { + "epoch": 0.5650626829817581, + "grad_norm": 1.8944921493530273, + "learning_rate": 4.1927668460759485e-06, + "loss": 0.7397, + "step": 28227 + }, + { + "epoch": 0.5650827014988865, + "grad_norm": 1.2392531633377075, + "learning_rate": 4.192446917581794e-06, + "loss": 0.3194, + "step": 28228 + }, + { + "epoch": 0.5651027200160148, + "grad_norm": 1.5385302305221558, + "learning_rate": 4.19212699248242e-06, + "loss": 0.3142, + "step": 28229 + }, + { + "epoch": 0.5651227385331432, + "grad_norm": 1.257717490196228, + "learning_rate": 4.191807070779171e-06, + "loss": 0.2413, + "step": 28230 + }, + { + "epoch": 0.5651427570502715, + "grad_norm": 1.0808035135269165, + "learning_rate": 4.191487152473389e-06, + "loss": 0.3248, + "step": 28231 + }, + { + "epoch": 0.5651627755673998, + "grad_norm": 1.1859095096588135, + "learning_rate": 4.191167237566424e-06, + "loss": 0.3252, + "step": 28232 + }, + { + "epoch": 0.5651827940845282, + "grad_norm": 1.1918699741363525, + "learning_rate": 4.190847326059615e-06, + "loss": 0.3221, + "step": 28233 + }, + { + "epoch": 0.5652028126016565, + "grad_norm": 1.1161926984786987, + "learning_rate": 4.190527417954311e-06, + "loss": 0.3352, + "step": 28234 + }, + { + "epoch": 0.5652228311187849, + "grad_norm": 1.1012829542160034, + "learning_rate": 4.190207513251856e-06, + "loss": 0.3421, + "step": 28235 + }, + { + "epoch": 0.5652428496359132, + "grad_norm": 1.2599074840545654, + "learning_rate": 4.189887611953594e-06, + "loss": 0.2866, + "step": 28236 + }, + { + "epoch": 0.5652628681530416, + "grad_norm": 1.0639593601226807, + "learning_rate": 4.1895677140608696e-06, + "loss": 0.2925, + "step": 28237 + }, + { + "epoch": 0.5652828866701699, + "grad_norm": 1.0331387519836426, + "learning_rate": 4.189247819575028e-06, + "loss": 0.2591, + "step": 28238 + }, + { + "epoch": 0.5653029051872982, + "grad_norm": 1.1202552318572998, + "learning_rate": 4.1889279284974145e-06, + "loss": 0.2932, + "step": 28239 + }, + { + "epoch": 0.5653229237044266, + "grad_norm": 1.1004177331924438, + "learning_rate": 4.188608040829374e-06, + "loss": 0.3104, + "step": 28240 + }, + { + "epoch": 0.5653429422215549, + "grad_norm": 1.2501485347747803, + "learning_rate": 4.188288156572251e-06, + "loss": 0.3336, + "step": 28241 + }, + { + "epoch": 0.5653629607386833, + "grad_norm": 1.0221666097640991, + "learning_rate": 4.187968275727388e-06, + "loss": 0.3277, + "step": 28242 + }, + { + "epoch": 0.5653829792558116, + "grad_norm": 1.0394163131713867, + "learning_rate": 4.187648398296134e-06, + "loss": 0.2877, + "step": 28243 + }, + { + "epoch": 0.56540299777294, + "grad_norm": 1.097378134727478, + "learning_rate": 4.187328524279829e-06, + "loss": 0.3256, + "step": 28244 + }, + { + "epoch": 0.5654230162900683, + "grad_norm": 1.0914078950881958, + "learning_rate": 4.187008653679822e-06, + "loss": 0.291, + "step": 28245 + }, + { + "epoch": 0.5654430348071967, + "grad_norm": 1.1669676303863525, + "learning_rate": 4.186688786497453e-06, + "loss": 0.3016, + "step": 28246 + }, + { + "epoch": 0.565463053324325, + "grad_norm": 1.0719795227050781, + "learning_rate": 4.186368922734072e-06, + "loss": 0.2948, + "step": 28247 + }, + { + "epoch": 0.5654830718414533, + "grad_norm": 1.1150484085083008, + "learning_rate": 4.18604906239102e-06, + "loss": 0.2579, + "step": 28248 + }, + { + "epoch": 0.5655030903585817, + "grad_norm": 1.0612642765045166, + "learning_rate": 4.185729205469643e-06, + "loss": 0.2985, + "step": 28249 + }, + { + "epoch": 0.56552310887571, + "grad_norm": 0.9861432313919067, + "learning_rate": 4.1854093519712846e-06, + "loss": 0.2843, + "step": 28250 + }, + { + "epoch": 0.5655431273928384, + "grad_norm": 1.1395353078842163, + "learning_rate": 4.18508950189729e-06, + "loss": 0.3177, + "step": 28251 + }, + { + "epoch": 0.5655631459099667, + "grad_norm": 1.0781985521316528, + "learning_rate": 4.184769655249003e-06, + "loss": 0.305, + "step": 28252 + }, + { + "epoch": 0.5655831644270951, + "grad_norm": 1.846455693244934, + "learning_rate": 4.184449812027771e-06, + "loss": 0.7999, + "step": 28253 + }, + { + "epoch": 0.5656031829442234, + "grad_norm": 1.247693657875061, + "learning_rate": 4.184129972234936e-06, + "loss": 0.299, + "step": 28254 + }, + { + "epoch": 0.5656232014613517, + "grad_norm": 1.060974359512329, + "learning_rate": 4.1838101358718405e-06, + "loss": 0.2786, + "step": 28255 + }, + { + "epoch": 0.5656432199784801, + "grad_norm": 1.1049598455429077, + "learning_rate": 4.1834903029398335e-06, + "loss": 0.3244, + "step": 28256 + }, + { + "epoch": 0.5656632384956084, + "grad_norm": 1.0922499895095825, + "learning_rate": 4.183170473440257e-06, + "loss": 0.343, + "step": 28257 + }, + { + "epoch": 0.5656832570127368, + "grad_norm": 1.1431914567947388, + "learning_rate": 4.182850647374458e-06, + "loss": 0.3081, + "step": 28258 + }, + { + "epoch": 0.5657032755298651, + "grad_norm": 1.2473572492599487, + "learning_rate": 4.182530824743776e-06, + "loss": 0.3046, + "step": 28259 + }, + { + "epoch": 0.5657232940469935, + "grad_norm": 1.0456491708755493, + "learning_rate": 4.1822110055495604e-06, + "loss": 0.3053, + "step": 28260 + }, + { + "epoch": 0.5657433125641218, + "grad_norm": 1.139722228050232, + "learning_rate": 4.181891189793154e-06, + "loss": 0.2885, + "step": 28261 + }, + { + "epoch": 0.5657633310812502, + "grad_norm": 1.49515700340271, + "learning_rate": 4.1815713774759e-06, + "loss": 0.3314, + "step": 28262 + }, + { + "epoch": 0.5657833495983785, + "grad_norm": 0.9652070999145508, + "learning_rate": 4.181251568599144e-06, + "loss": 0.2835, + "step": 28263 + }, + { + "epoch": 0.5658033681155068, + "grad_norm": 1.1595821380615234, + "learning_rate": 4.18093176316423e-06, + "loss": 0.3303, + "step": 28264 + }, + { + "epoch": 0.5658233866326352, + "grad_norm": 1.1627895832061768, + "learning_rate": 4.180611961172502e-06, + "loss": 0.2973, + "step": 28265 + }, + { + "epoch": 0.5658434051497635, + "grad_norm": 1.8797024488449097, + "learning_rate": 4.1802921626253074e-06, + "loss": 0.8607, + "step": 28266 + }, + { + "epoch": 0.5658634236668919, + "grad_norm": 1.0111455917358398, + "learning_rate": 4.1799723675239865e-06, + "loss": 0.3178, + "step": 28267 + }, + { + "epoch": 0.5658834421840202, + "grad_norm": 1.0900663137435913, + "learning_rate": 4.179652575869885e-06, + "loss": 0.3303, + "step": 28268 + }, + { + "epoch": 0.5659034607011486, + "grad_norm": 1.0796674489974976, + "learning_rate": 4.179332787664348e-06, + "loss": 0.3224, + "step": 28269 + }, + { + "epoch": 0.5659234792182769, + "grad_norm": 1.1599628925323486, + "learning_rate": 4.179013002908719e-06, + "loss": 0.3041, + "step": 28270 + }, + { + "epoch": 0.5659434977354052, + "grad_norm": 1.1758018732070923, + "learning_rate": 4.178693221604344e-06, + "loss": 0.3046, + "step": 28271 + }, + { + "epoch": 0.5659635162525336, + "grad_norm": 1.251139760017395, + "learning_rate": 4.178373443752565e-06, + "loss": 0.3323, + "step": 28272 + }, + { + "epoch": 0.5659835347696619, + "grad_norm": 1.0533883571624756, + "learning_rate": 4.178053669354726e-06, + "loss": 0.3171, + "step": 28273 + }, + { + "epoch": 0.5660035532867903, + "grad_norm": 1.2506977319717407, + "learning_rate": 4.177733898412174e-06, + "loss": 0.3431, + "step": 28274 + }, + { + "epoch": 0.5660235718039186, + "grad_norm": 1.0849946737289429, + "learning_rate": 4.177414130926252e-06, + "loss": 0.2971, + "step": 28275 + }, + { + "epoch": 0.566043590321047, + "grad_norm": 1.0910396575927734, + "learning_rate": 4.177094366898303e-06, + "loss": 0.3139, + "step": 28276 + }, + { + "epoch": 0.5660636088381753, + "grad_norm": 1.7777514457702637, + "learning_rate": 4.176774606329672e-06, + "loss": 0.8437, + "step": 28277 + }, + { + "epoch": 0.5660836273553037, + "grad_norm": 1.0973918437957764, + "learning_rate": 4.176454849221704e-06, + "loss": 0.3265, + "step": 28278 + }, + { + "epoch": 0.566103645872432, + "grad_norm": 1.904291033744812, + "learning_rate": 4.176135095575743e-06, + "loss": 0.7902, + "step": 28279 + }, + { + "epoch": 0.5661236643895603, + "grad_norm": 1.0121920108795166, + "learning_rate": 4.1758153453931325e-06, + "loss": 0.2638, + "step": 28280 + }, + { + "epoch": 0.5661436829066887, + "grad_norm": 1.0580439567565918, + "learning_rate": 4.175495598675215e-06, + "loss": 0.3269, + "step": 28281 + }, + { + "epoch": 0.566163701423817, + "grad_norm": 1.1412808895111084, + "learning_rate": 4.175175855423339e-06, + "loss": 0.3331, + "step": 28282 + }, + { + "epoch": 0.5661837199409454, + "grad_norm": 1.9434007406234741, + "learning_rate": 4.174856115638845e-06, + "loss": 0.786, + "step": 28283 + }, + { + "epoch": 0.5662037384580737, + "grad_norm": 1.143186330795288, + "learning_rate": 4.17453637932308e-06, + "loss": 0.2962, + "step": 28284 + }, + { + "epoch": 0.5662237569752021, + "grad_norm": 1.0951228141784668, + "learning_rate": 4.174216646477386e-06, + "loss": 0.2861, + "step": 28285 + }, + { + "epoch": 0.5662437754923304, + "grad_norm": 1.0472458600997925, + "learning_rate": 4.173896917103105e-06, + "loss": 0.3515, + "step": 28286 + }, + { + "epoch": 0.5662637940094587, + "grad_norm": 1.9353407621383667, + "learning_rate": 4.1735771912015855e-06, + "loss": 0.7984, + "step": 28287 + }, + { + "epoch": 0.5662838125265871, + "grad_norm": 1.0588010549545288, + "learning_rate": 4.17325746877417e-06, + "loss": 0.2795, + "step": 28288 + }, + { + "epoch": 0.5663038310437154, + "grad_norm": 1.0494247674942017, + "learning_rate": 4.172937749822201e-06, + "loss": 0.3491, + "step": 28289 + }, + { + "epoch": 0.5663238495608438, + "grad_norm": 1.0964362621307373, + "learning_rate": 4.172618034347025e-06, + "loss": 0.2663, + "step": 28290 + }, + { + "epoch": 0.5663438680779721, + "grad_norm": 1.9496543407440186, + "learning_rate": 4.172298322349983e-06, + "loss": 0.7783, + "step": 28291 + }, + { + "epoch": 0.5663638865951005, + "grad_norm": 1.2325795888900757, + "learning_rate": 4.171978613832423e-06, + "loss": 0.3333, + "step": 28292 + }, + { + "epoch": 0.5663839051122288, + "grad_norm": 1.4358406066894531, + "learning_rate": 4.171658908795686e-06, + "loss": 0.3155, + "step": 28293 + }, + { + "epoch": 0.5664039236293572, + "grad_norm": 1.0144851207733154, + "learning_rate": 4.171339207241116e-06, + "loss": 0.3042, + "step": 28294 + }, + { + "epoch": 0.5664239421464855, + "grad_norm": 1.0909956693649292, + "learning_rate": 4.171019509170057e-06, + "loss": 0.317, + "step": 28295 + }, + { + "epoch": 0.5664439606636138, + "grad_norm": 1.0843214988708496, + "learning_rate": 4.170699814583854e-06, + "loss": 0.2944, + "step": 28296 + }, + { + "epoch": 0.5664639791807422, + "grad_norm": 1.2529851198196411, + "learning_rate": 4.170380123483851e-06, + "loss": 0.2859, + "step": 28297 + }, + { + "epoch": 0.5664839976978705, + "grad_norm": 1.0321893692016602, + "learning_rate": 4.170060435871393e-06, + "loss": 0.2972, + "step": 28298 + }, + { + "epoch": 0.5665040162149989, + "grad_norm": 1.1594570875167847, + "learning_rate": 4.1697407517478185e-06, + "loss": 0.2973, + "step": 28299 + }, + { + "epoch": 0.5665240347321272, + "grad_norm": 1.1850146055221558, + "learning_rate": 4.1694210711144775e-06, + "loss": 0.2821, + "step": 28300 + }, + { + "epoch": 0.5665440532492556, + "grad_norm": 1.8580490350723267, + "learning_rate": 4.169101393972713e-06, + "loss": 0.787, + "step": 28301 + }, + { + "epoch": 0.5665640717663839, + "grad_norm": 1.2202812433242798, + "learning_rate": 4.168781720323865e-06, + "loss": 0.3204, + "step": 28302 + }, + { + "epoch": 0.5665840902835122, + "grad_norm": 1.1317722797393799, + "learning_rate": 4.168462050169281e-06, + "loss": 0.2756, + "step": 28303 + }, + { + "epoch": 0.5666041088006406, + "grad_norm": 1.147813081741333, + "learning_rate": 4.168142383510302e-06, + "loss": 0.295, + "step": 28304 + }, + { + "epoch": 0.5666241273177689, + "grad_norm": 1.1067614555358887, + "learning_rate": 4.167822720348275e-06, + "loss": 0.3377, + "step": 28305 + }, + { + "epoch": 0.5666441458348973, + "grad_norm": 1.1383740901947021, + "learning_rate": 4.167503060684543e-06, + "loss": 0.3435, + "step": 28306 + }, + { + "epoch": 0.5666641643520256, + "grad_norm": 1.0923652648925781, + "learning_rate": 4.167183404520448e-06, + "loss": 0.2949, + "step": 28307 + }, + { + "epoch": 0.566684182869154, + "grad_norm": 1.3947439193725586, + "learning_rate": 4.166863751857334e-06, + "loss": 0.281, + "step": 28308 + }, + { + "epoch": 0.5667042013862823, + "grad_norm": 1.0780214071273804, + "learning_rate": 4.166544102696545e-06, + "loss": 0.3005, + "step": 28309 + }, + { + "epoch": 0.5667242199034107, + "grad_norm": 0.9847460389137268, + "learning_rate": 4.166224457039427e-06, + "loss": 0.2831, + "step": 28310 + }, + { + "epoch": 0.566744238420539, + "grad_norm": 1.8759733438491821, + "learning_rate": 4.165904814887321e-06, + "loss": 0.7102, + "step": 28311 + }, + { + "epoch": 0.5667642569376673, + "grad_norm": 1.1494982242584229, + "learning_rate": 4.165585176241571e-06, + "loss": 0.2637, + "step": 28312 + }, + { + "epoch": 0.5667842754547957, + "grad_norm": 1.0568748712539673, + "learning_rate": 4.1652655411035235e-06, + "loss": 0.3137, + "step": 28313 + }, + { + "epoch": 0.566804293971924, + "grad_norm": 1.081295371055603, + "learning_rate": 4.164945909474519e-06, + "loss": 0.3342, + "step": 28314 + }, + { + "epoch": 0.5668243124890524, + "grad_norm": 1.1287795305252075, + "learning_rate": 4.164626281355901e-06, + "loss": 0.3098, + "step": 28315 + }, + { + "epoch": 0.5668443310061807, + "grad_norm": 1.115492343902588, + "learning_rate": 4.1643066567490165e-06, + "loss": 0.2808, + "step": 28316 + }, + { + "epoch": 0.5668643495233091, + "grad_norm": 1.5158889293670654, + "learning_rate": 4.1639870356552045e-06, + "loss": 0.3766, + "step": 28317 + }, + { + "epoch": 0.5668843680404374, + "grad_norm": 1.1268198490142822, + "learning_rate": 4.163667418075813e-06, + "loss": 0.2968, + "step": 28318 + }, + { + "epoch": 0.5669043865575657, + "grad_norm": 1.9301639795303345, + "learning_rate": 4.1633478040121835e-06, + "loss": 0.842, + "step": 28319 + }, + { + "epoch": 0.5669244050746941, + "grad_norm": 1.215577244758606, + "learning_rate": 4.1630281934656605e-06, + "loss": 0.3171, + "step": 28320 + }, + { + "epoch": 0.5669444235918224, + "grad_norm": 1.15141761302948, + "learning_rate": 4.1627085864375836e-06, + "loss": 0.2761, + "step": 28321 + }, + { + "epoch": 0.5669644421089508, + "grad_norm": 1.1979022026062012, + "learning_rate": 4.162388982929301e-06, + "loss": 0.3469, + "step": 28322 + }, + { + "epoch": 0.5669844606260791, + "grad_norm": 1.9470313787460327, + "learning_rate": 4.162069382942157e-06, + "loss": 0.805, + "step": 28323 + }, + { + "epoch": 0.5670044791432075, + "grad_norm": 1.0871798992156982, + "learning_rate": 4.161749786477492e-06, + "loss": 0.2903, + "step": 28324 + }, + { + "epoch": 0.5670244976603358, + "grad_norm": 1.2118103504180908, + "learning_rate": 4.161430193536651e-06, + "loss": 0.2976, + "step": 28325 + }, + { + "epoch": 0.5670445161774642, + "grad_norm": 1.0986882448196411, + "learning_rate": 4.161110604120974e-06, + "loss": 0.2952, + "step": 28326 + }, + { + "epoch": 0.5670645346945925, + "grad_norm": 1.066092848777771, + "learning_rate": 4.16079101823181e-06, + "loss": 0.2548, + "step": 28327 + }, + { + "epoch": 0.5670845532117208, + "grad_norm": 1.0814690589904785, + "learning_rate": 4.160471435870499e-06, + "loss": 0.2883, + "step": 28328 + }, + { + "epoch": 0.5671045717288492, + "grad_norm": 1.1958599090576172, + "learning_rate": 4.160151857038387e-06, + "loss": 0.3323, + "step": 28329 + }, + { + "epoch": 0.5671245902459775, + "grad_norm": 1.1677758693695068, + "learning_rate": 4.159832281736813e-06, + "loss": 0.2987, + "step": 28330 + }, + { + "epoch": 0.5671446087631059, + "grad_norm": 1.1571935415267944, + "learning_rate": 4.159512709967126e-06, + "loss": 0.3193, + "step": 28331 + }, + { + "epoch": 0.5671646272802342, + "grad_norm": 1.115627408027649, + "learning_rate": 4.159193141730666e-06, + "loss": 0.2886, + "step": 28332 + }, + { + "epoch": 0.5671846457973626, + "grad_norm": 1.074244737625122, + "learning_rate": 4.158873577028778e-06, + "loss": 0.328, + "step": 28333 + }, + { + "epoch": 0.5672046643144909, + "grad_norm": 1.0865217447280884, + "learning_rate": 4.158554015862803e-06, + "loss": 0.2801, + "step": 28334 + }, + { + "epoch": 0.5672246828316192, + "grad_norm": 1.0861676931381226, + "learning_rate": 4.158234458234085e-06, + "loss": 0.2927, + "step": 28335 + }, + { + "epoch": 0.5672447013487476, + "grad_norm": 1.2173807621002197, + "learning_rate": 4.15791490414397e-06, + "loss": 0.308, + "step": 28336 + }, + { + "epoch": 0.5672647198658759, + "grad_norm": 1.173990249633789, + "learning_rate": 4.157595353593799e-06, + "loss": 0.3109, + "step": 28337 + }, + { + "epoch": 0.5672847383830043, + "grad_norm": 1.1713494062423706, + "learning_rate": 4.157275806584916e-06, + "loss": 0.3319, + "step": 28338 + }, + { + "epoch": 0.5673047569001326, + "grad_norm": 1.1872843503952026, + "learning_rate": 4.156956263118663e-06, + "loss": 0.305, + "step": 28339 + }, + { + "epoch": 0.567324775417261, + "grad_norm": 1.2889809608459473, + "learning_rate": 4.156636723196386e-06, + "loss": 0.2992, + "step": 28340 + }, + { + "epoch": 0.5673447939343893, + "grad_norm": 1.0396323204040527, + "learning_rate": 4.156317186819425e-06, + "loss": 0.2758, + "step": 28341 + }, + { + "epoch": 0.5673648124515177, + "grad_norm": 1.0449810028076172, + "learning_rate": 4.1559976539891265e-06, + "loss": 0.3188, + "step": 28342 + }, + { + "epoch": 0.567384830968646, + "grad_norm": 1.0792200565338135, + "learning_rate": 4.155678124706831e-06, + "loss": 0.3471, + "step": 28343 + }, + { + "epoch": 0.5674048494857743, + "grad_norm": 1.0262408256530762, + "learning_rate": 4.155358598973885e-06, + "loss": 0.3074, + "step": 28344 + }, + { + "epoch": 0.5674248680029027, + "grad_norm": 1.1418501138687134, + "learning_rate": 4.155039076791628e-06, + "loss": 0.3602, + "step": 28345 + }, + { + "epoch": 0.567444886520031, + "grad_norm": 1.1091830730438232, + "learning_rate": 4.154719558161406e-06, + "loss": 0.2998, + "step": 28346 + }, + { + "epoch": 0.5674649050371594, + "grad_norm": 1.1262760162353516, + "learning_rate": 4.15440004308456e-06, + "loss": 0.2811, + "step": 28347 + }, + { + "epoch": 0.5674849235542877, + "grad_norm": 0.9547728300094604, + "learning_rate": 4.154080531562434e-06, + "loss": 0.2948, + "step": 28348 + }, + { + "epoch": 0.5675049420714161, + "grad_norm": 1.1962114572525024, + "learning_rate": 4.153761023596372e-06, + "loss": 0.3215, + "step": 28349 + }, + { + "epoch": 0.5675249605885444, + "grad_norm": 1.8895256519317627, + "learning_rate": 4.153441519187717e-06, + "loss": 0.8018, + "step": 28350 + }, + { + "epoch": 0.5675449791056727, + "grad_norm": 1.1589354276657104, + "learning_rate": 4.153122018337812e-06, + "loss": 0.3133, + "step": 28351 + }, + { + "epoch": 0.5675649976228011, + "grad_norm": 1.1199945211410522, + "learning_rate": 4.1528025210479976e-06, + "loss": 0.3174, + "step": 28352 + }, + { + "epoch": 0.5675850161399294, + "grad_norm": 1.135783076286316, + "learning_rate": 4.15248302731962e-06, + "loss": 0.2819, + "step": 28353 + }, + { + "epoch": 0.5676050346570578, + "grad_norm": 1.8055484294891357, + "learning_rate": 4.152163537154022e-06, + "loss": 0.756, + "step": 28354 + }, + { + "epoch": 0.5676250531741861, + "grad_norm": 1.15402352809906, + "learning_rate": 4.151844050552547e-06, + "loss": 0.3036, + "step": 28355 + }, + { + "epoch": 0.5676450716913145, + "grad_norm": 1.8292509317398071, + "learning_rate": 4.151524567516536e-06, + "loss": 0.8255, + "step": 28356 + }, + { + "epoch": 0.5676650902084428, + "grad_norm": 1.080817461013794, + "learning_rate": 4.151205088047332e-06, + "loss": 0.2678, + "step": 28357 + }, + { + "epoch": 0.5676851087255712, + "grad_norm": 1.1845229864120483, + "learning_rate": 4.15088561214628e-06, + "loss": 0.3248, + "step": 28358 + }, + { + "epoch": 0.5677051272426995, + "grad_norm": 1.1228471994400024, + "learning_rate": 4.1505661398147235e-06, + "loss": 0.3021, + "step": 28359 + }, + { + "epoch": 0.5677251457598278, + "grad_norm": 1.1099061965942383, + "learning_rate": 4.150246671054003e-06, + "loss": 0.3019, + "step": 28360 + }, + { + "epoch": 0.5677451642769562, + "grad_norm": 1.0959265232086182, + "learning_rate": 4.149927205865461e-06, + "loss": 0.3051, + "step": 28361 + }, + { + "epoch": 0.5677651827940845, + "grad_norm": 1.2236329317092896, + "learning_rate": 4.149607744250445e-06, + "loss": 0.3209, + "step": 28362 + }, + { + "epoch": 0.5677852013112129, + "grad_norm": 1.098034143447876, + "learning_rate": 4.149288286210294e-06, + "loss": 0.2973, + "step": 28363 + }, + { + "epoch": 0.5678052198283412, + "grad_norm": 1.1407554149627686, + "learning_rate": 4.148968831746353e-06, + "loss": 0.3205, + "step": 28364 + }, + { + "epoch": 0.5678252383454696, + "grad_norm": 1.0808732509613037, + "learning_rate": 4.148649380859962e-06, + "loss": 0.2761, + "step": 28365 + }, + { + "epoch": 0.5678452568625979, + "grad_norm": 1.1365572214126587, + "learning_rate": 4.148329933552466e-06, + "loss": 0.2981, + "step": 28366 + }, + { + "epoch": 0.5678652753797262, + "grad_norm": 1.125827431678772, + "learning_rate": 4.148010489825207e-06, + "loss": 0.3374, + "step": 28367 + }, + { + "epoch": 0.5678852938968546, + "grad_norm": 1.0366435050964355, + "learning_rate": 4.147691049679531e-06, + "loss": 0.3031, + "step": 28368 + }, + { + "epoch": 0.5679053124139829, + "grad_norm": 1.156978964805603, + "learning_rate": 4.147371613116778e-06, + "loss": 0.3335, + "step": 28369 + }, + { + "epoch": 0.5679253309311113, + "grad_norm": 1.1495158672332764, + "learning_rate": 4.147052180138288e-06, + "loss": 0.3358, + "step": 28370 + }, + { + "epoch": 0.5679453494482396, + "grad_norm": 1.2146680355072021, + "learning_rate": 4.14673275074541e-06, + "loss": 0.3162, + "step": 28371 + }, + { + "epoch": 0.567965367965368, + "grad_norm": 1.1246991157531738, + "learning_rate": 4.1464133249394835e-06, + "loss": 0.3445, + "step": 28372 + }, + { + "epoch": 0.5679853864824963, + "grad_norm": 1.0598567724227905, + "learning_rate": 4.146093902721852e-06, + "loss": 0.2802, + "step": 28373 + }, + { + "epoch": 0.5680054049996247, + "grad_norm": 1.0819342136383057, + "learning_rate": 4.145774484093856e-06, + "loss": 0.2741, + "step": 28374 + }, + { + "epoch": 0.568025423516753, + "grad_norm": 1.009134292602539, + "learning_rate": 4.145455069056842e-06, + "loss": 0.3006, + "step": 28375 + }, + { + "epoch": 0.5680454420338813, + "grad_norm": 1.0818617343902588, + "learning_rate": 4.145135657612151e-06, + "loss": 0.2961, + "step": 28376 + }, + { + "epoch": 0.5680654605510097, + "grad_norm": 1.2941243648529053, + "learning_rate": 4.1448162497611265e-06, + "loss": 0.3218, + "step": 28377 + }, + { + "epoch": 0.568085479068138, + "grad_norm": 1.1777206659317017, + "learning_rate": 4.144496845505109e-06, + "loss": 0.3059, + "step": 28378 + }, + { + "epoch": 0.5681054975852664, + "grad_norm": 0.9829642176628113, + "learning_rate": 4.144177444845442e-06, + "loss": 0.317, + "step": 28379 + }, + { + "epoch": 0.5681255161023947, + "grad_norm": 1.1315193176269531, + "learning_rate": 4.14385804778347e-06, + "loss": 0.3075, + "step": 28380 + }, + { + "epoch": 0.5681455346195231, + "grad_norm": 1.2901519536972046, + "learning_rate": 4.143538654320535e-06, + "loss": 0.3415, + "step": 28381 + }, + { + "epoch": 0.5681655531366514, + "grad_norm": 1.0868982076644897, + "learning_rate": 4.143219264457979e-06, + "loss": 0.3102, + "step": 28382 + }, + { + "epoch": 0.5681855716537797, + "grad_norm": 1.2719242572784424, + "learning_rate": 4.142899878197143e-06, + "loss": 0.3095, + "step": 28383 + }, + { + "epoch": 0.5682055901709081, + "grad_norm": 1.0884008407592773, + "learning_rate": 4.142580495539374e-06, + "loss": 0.3216, + "step": 28384 + }, + { + "epoch": 0.5682256086880364, + "grad_norm": 1.0976601839065552, + "learning_rate": 4.142261116486011e-06, + "loss": 0.3107, + "step": 28385 + }, + { + "epoch": 0.5682456272051648, + "grad_norm": 1.1365267038345337, + "learning_rate": 4.1419417410383975e-06, + "loss": 0.2681, + "step": 28386 + }, + { + "epoch": 0.5682656457222931, + "grad_norm": 1.114119052886963, + "learning_rate": 4.141622369197877e-06, + "loss": 0.3199, + "step": 28387 + }, + { + "epoch": 0.5682856642394215, + "grad_norm": 1.0733462572097778, + "learning_rate": 4.141303000965789e-06, + "loss": 0.3026, + "step": 28388 + }, + { + "epoch": 0.5683056827565498, + "grad_norm": 1.0169377326965332, + "learning_rate": 4.1409836363434805e-06, + "loss": 0.3049, + "step": 28389 + }, + { + "epoch": 0.5683257012736782, + "grad_norm": 1.0412667989730835, + "learning_rate": 4.140664275332292e-06, + "loss": 0.2792, + "step": 28390 + }, + { + "epoch": 0.5683457197908065, + "grad_norm": 1.100780963897705, + "learning_rate": 4.140344917933565e-06, + "loss": 0.3094, + "step": 28391 + }, + { + "epoch": 0.5683657383079348, + "grad_norm": 1.0978541374206543, + "learning_rate": 4.1400255641486434e-06, + "loss": 0.3305, + "step": 28392 + }, + { + "epoch": 0.5683857568250632, + "grad_norm": 1.2942531108856201, + "learning_rate": 4.139706213978869e-06, + "loss": 0.2913, + "step": 28393 + }, + { + "epoch": 0.5684057753421915, + "grad_norm": 1.7548632621765137, + "learning_rate": 4.139386867425585e-06, + "loss": 0.7369, + "step": 28394 + }, + { + "epoch": 0.5684257938593199, + "grad_norm": 1.0903412103652954, + "learning_rate": 4.139067524490135e-06, + "loss": 0.3026, + "step": 28395 + }, + { + "epoch": 0.5684458123764482, + "grad_norm": 1.0111839771270752, + "learning_rate": 4.138748185173858e-06, + "loss": 0.3411, + "step": 28396 + }, + { + "epoch": 0.5684658308935766, + "grad_norm": 1.116855502128601, + "learning_rate": 4.138428849478097e-06, + "loss": 0.2692, + "step": 28397 + }, + { + "epoch": 0.5684858494107049, + "grad_norm": 1.1170133352279663, + "learning_rate": 4.138109517404197e-06, + "loss": 0.2777, + "step": 28398 + }, + { + "epoch": 0.5685058679278332, + "grad_norm": 1.0770142078399658, + "learning_rate": 4.137790188953499e-06, + "loss": 0.2789, + "step": 28399 + }, + { + "epoch": 0.5685258864449616, + "grad_norm": 1.049651861190796, + "learning_rate": 4.137470864127347e-06, + "loss": 0.2951, + "step": 28400 + }, + { + "epoch": 0.5685459049620899, + "grad_norm": 1.0675911903381348, + "learning_rate": 4.137151542927079e-06, + "loss": 0.3005, + "step": 28401 + }, + { + "epoch": 0.5685659234792183, + "grad_norm": 1.17424476146698, + "learning_rate": 4.136832225354043e-06, + "loss": 0.2827, + "step": 28402 + }, + { + "epoch": 0.5685859419963466, + "grad_norm": 1.9592536687850952, + "learning_rate": 4.136512911409578e-06, + "loss": 0.8289, + "step": 28403 + }, + { + "epoch": 0.568605960513475, + "grad_norm": 1.2222455739974976, + "learning_rate": 4.1361936010950255e-06, + "loss": 0.3038, + "step": 28404 + }, + { + "epoch": 0.5686259790306033, + "grad_norm": 1.1509164571762085, + "learning_rate": 4.135874294411729e-06, + "loss": 0.3177, + "step": 28405 + }, + { + "epoch": 0.5686459975477316, + "grad_norm": 1.0626146793365479, + "learning_rate": 4.1355549913610314e-06, + "loss": 0.3008, + "step": 28406 + }, + { + "epoch": 0.56866601606486, + "grad_norm": 1.278083086013794, + "learning_rate": 4.135235691944276e-06, + "loss": 0.3116, + "step": 28407 + }, + { + "epoch": 0.5686860345819883, + "grad_norm": 1.9348983764648438, + "learning_rate": 4.134916396162804e-06, + "loss": 0.6865, + "step": 28408 + }, + { + "epoch": 0.5687060530991167, + "grad_norm": 1.023758053779602, + "learning_rate": 4.134597104017957e-06, + "loss": 0.2859, + "step": 28409 + }, + { + "epoch": 0.568726071616245, + "grad_norm": 1.0786488056182861, + "learning_rate": 4.134277815511074e-06, + "loss": 0.3275, + "step": 28410 + }, + { + "epoch": 0.5687460901333734, + "grad_norm": 1.170743703842163, + "learning_rate": 4.133958530643505e-06, + "loss": 0.2859, + "step": 28411 + }, + { + "epoch": 0.5687661086505017, + "grad_norm": 1.0018254518508911, + "learning_rate": 4.133639249416585e-06, + "loss": 0.2725, + "step": 28412 + }, + { + "epoch": 0.5687861271676301, + "grad_norm": 1.0591375827789307, + "learning_rate": 4.133319971831662e-06, + "loss": 0.2861, + "step": 28413 + }, + { + "epoch": 0.5688061456847584, + "grad_norm": 1.296307921409607, + "learning_rate": 4.133000697890072e-06, + "loss": 0.3666, + "step": 28414 + }, + { + "epoch": 0.5688261642018867, + "grad_norm": 1.1479777097702026, + "learning_rate": 4.1326814275931635e-06, + "loss": 0.2919, + "step": 28415 + }, + { + "epoch": 0.5688461827190151, + "grad_norm": 1.2200819253921509, + "learning_rate": 4.132362160942275e-06, + "loss": 0.3275, + "step": 28416 + }, + { + "epoch": 0.5688662012361434, + "grad_norm": 1.0859975814819336, + "learning_rate": 4.1320428979387485e-06, + "loss": 0.3232, + "step": 28417 + }, + { + "epoch": 0.5688862197532718, + "grad_norm": 1.1186970472335815, + "learning_rate": 4.131723638583928e-06, + "loss": 0.2994, + "step": 28418 + }, + { + "epoch": 0.5689062382704001, + "grad_norm": 1.1136175394058228, + "learning_rate": 4.1314043828791516e-06, + "loss": 0.2755, + "step": 28419 + }, + { + "epoch": 0.5689262567875285, + "grad_norm": 1.1087419986724854, + "learning_rate": 4.131085130825767e-06, + "loss": 0.2944, + "step": 28420 + }, + { + "epoch": 0.5689462753046568, + "grad_norm": 1.1333658695220947, + "learning_rate": 4.130765882425113e-06, + "loss": 0.3176, + "step": 28421 + }, + { + "epoch": 0.5689662938217851, + "grad_norm": 1.063452959060669, + "learning_rate": 4.130446637678533e-06, + "loss": 0.2764, + "step": 28422 + }, + { + "epoch": 0.5689863123389135, + "grad_norm": 1.0350104570388794, + "learning_rate": 4.130127396587365e-06, + "loss": 0.3179, + "step": 28423 + }, + { + "epoch": 0.5690063308560418, + "grad_norm": 1.0558613538742065, + "learning_rate": 4.129808159152957e-06, + "loss": 0.2937, + "step": 28424 + }, + { + "epoch": 0.5690263493731702, + "grad_norm": 1.0424247980117798, + "learning_rate": 4.129488925376647e-06, + "loss": 0.3648, + "step": 28425 + }, + { + "epoch": 0.5690463678902985, + "grad_norm": 1.7586957216262817, + "learning_rate": 4.12916969525978e-06, + "loss": 0.7882, + "step": 28426 + }, + { + "epoch": 0.5690663864074269, + "grad_norm": 1.1065623760223389, + "learning_rate": 4.1288504688036935e-06, + "loss": 0.3153, + "step": 28427 + }, + { + "epoch": 0.5690864049245552, + "grad_norm": 1.792230486869812, + "learning_rate": 4.128531246009734e-06, + "loss": 0.7553, + "step": 28428 + }, + { + "epoch": 0.5691064234416836, + "grad_norm": 1.0356324911117554, + "learning_rate": 4.128212026879242e-06, + "loss": 0.2944, + "step": 28429 + }, + { + "epoch": 0.5691264419588119, + "grad_norm": 1.101770281791687, + "learning_rate": 4.127892811413557e-06, + "loss": 0.3049, + "step": 28430 + }, + { + "epoch": 0.5691464604759402, + "grad_norm": 1.0063483715057373, + "learning_rate": 4.127573599614026e-06, + "loss": 0.3144, + "step": 28431 + }, + { + "epoch": 0.5691664789930686, + "grad_norm": 1.0801116228103638, + "learning_rate": 4.127254391481984e-06, + "loss": 0.3686, + "step": 28432 + }, + { + "epoch": 0.5691864975101969, + "grad_norm": 1.1601977348327637, + "learning_rate": 4.12693518701878e-06, + "loss": 0.3028, + "step": 28433 + }, + { + "epoch": 0.5692065160273253, + "grad_norm": 1.0944374799728394, + "learning_rate": 4.126615986225752e-06, + "loss": 0.323, + "step": 28434 + }, + { + "epoch": 0.5692265345444536, + "grad_norm": 1.0382816791534424, + "learning_rate": 4.1262967891042425e-06, + "loss": 0.3109, + "step": 28435 + }, + { + "epoch": 0.569246553061582, + "grad_norm": 1.1048387289047241, + "learning_rate": 4.12597759565559e-06, + "loss": 0.3044, + "step": 28436 + }, + { + "epoch": 0.5692665715787103, + "grad_norm": 1.0424569845199585, + "learning_rate": 4.125658405881143e-06, + "loss": 0.288, + "step": 28437 + }, + { + "epoch": 0.5692865900958386, + "grad_norm": 1.9654914140701294, + "learning_rate": 4.125339219782239e-06, + "loss": 0.8207, + "step": 28438 + }, + { + "epoch": 0.569306608612967, + "grad_norm": 1.169229507446289, + "learning_rate": 4.125020037360222e-06, + "loss": 0.3161, + "step": 28439 + }, + { + "epoch": 0.5693266271300953, + "grad_norm": 1.0327613353729248, + "learning_rate": 4.124700858616432e-06, + "loss": 0.2468, + "step": 28440 + }, + { + "epoch": 0.5693466456472237, + "grad_norm": 1.8720848560333252, + "learning_rate": 4.124381683552208e-06, + "loss": 0.7894, + "step": 28441 + }, + { + "epoch": 0.569366664164352, + "grad_norm": 0.9993842840194702, + "learning_rate": 4.124062512168898e-06, + "loss": 0.2732, + "step": 28442 + }, + { + "epoch": 0.5693866826814804, + "grad_norm": 1.106023907661438, + "learning_rate": 4.12374334446784e-06, + "loss": 0.3034, + "step": 28443 + }, + { + "epoch": 0.5694067011986087, + "grad_norm": 1.1538392305374146, + "learning_rate": 4.123424180450377e-06, + "loss": 0.2782, + "step": 28444 + }, + { + "epoch": 0.5694267197157371, + "grad_norm": 1.0583150386810303, + "learning_rate": 4.123105020117849e-06, + "loss": 0.3164, + "step": 28445 + }, + { + "epoch": 0.5694467382328654, + "grad_norm": 1.096252679824829, + "learning_rate": 4.1227858634716e-06, + "loss": 0.3121, + "step": 28446 + }, + { + "epoch": 0.5694667567499937, + "grad_norm": 1.1090892553329468, + "learning_rate": 4.1224667105129704e-06, + "loss": 0.3368, + "step": 28447 + }, + { + "epoch": 0.5694867752671221, + "grad_norm": 1.070383071899414, + "learning_rate": 4.122147561243302e-06, + "loss": 0.3147, + "step": 28448 + }, + { + "epoch": 0.5695067937842504, + "grad_norm": 1.0891512632369995, + "learning_rate": 4.121828415663935e-06, + "loss": 0.3098, + "step": 28449 + }, + { + "epoch": 0.5695268123013788, + "grad_norm": 1.0602896213531494, + "learning_rate": 4.121509273776213e-06, + "loss": 0.293, + "step": 28450 + }, + { + "epoch": 0.5695468308185071, + "grad_norm": 1.1180018186569214, + "learning_rate": 4.121190135581476e-06, + "loss": 0.2956, + "step": 28451 + }, + { + "epoch": 0.5695668493356355, + "grad_norm": 1.5143073797225952, + "learning_rate": 4.120871001081068e-06, + "loss": 0.3192, + "step": 28452 + }, + { + "epoch": 0.5695868678527638, + "grad_norm": 1.054198145866394, + "learning_rate": 4.120551870276329e-06, + "loss": 0.2556, + "step": 28453 + }, + { + "epoch": 0.5696068863698921, + "grad_norm": 1.2491594552993774, + "learning_rate": 4.120232743168599e-06, + "loss": 0.2925, + "step": 28454 + }, + { + "epoch": 0.5696269048870205, + "grad_norm": 1.059326410293579, + "learning_rate": 4.119913619759224e-06, + "loss": 0.3299, + "step": 28455 + }, + { + "epoch": 0.5696469234041488, + "grad_norm": 1.2953709363937378, + "learning_rate": 4.119594500049541e-06, + "loss": 0.3301, + "step": 28456 + }, + { + "epoch": 0.5696669419212772, + "grad_norm": 1.0713943243026733, + "learning_rate": 4.119275384040894e-06, + "loss": 0.282, + "step": 28457 + }, + { + "epoch": 0.5696869604384055, + "grad_norm": 1.118725061416626, + "learning_rate": 4.118956271734622e-06, + "loss": 0.3152, + "step": 28458 + }, + { + "epoch": 0.5697069789555339, + "grad_norm": 1.0650291442871094, + "learning_rate": 4.1186371631320694e-06, + "loss": 0.2637, + "step": 28459 + }, + { + "epoch": 0.5697269974726622, + "grad_norm": 1.086255431175232, + "learning_rate": 4.118318058234577e-06, + "loss": 0.3046, + "step": 28460 + }, + { + "epoch": 0.5697470159897906, + "grad_norm": 1.1710050106048584, + "learning_rate": 4.117998957043487e-06, + "loss": 0.3057, + "step": 28461 + }, + { + "epoch": 0.5697670345069189, + "grad_norm": 1.1533244848251343, + "learning_rate": 4.117679859560137e-06, + "loss": 0.3273, + "step": 28462 + }, + { + "epoch": 0.5697870530240472, + "grad_norm": 1.2402210235595703, + "learning_rate": 4.117360765785872e-06, + "loss": 0.3008, + "step": 28463 + }, + { + "epoch": 0.5698070715411756, + "grad_norm": 2.0540757179260254, + "learning_rate": 4.117041675722032e-06, + "loss": 0.74, + "step": 28464 + }, + { + "epoch": 0.5698270900583039, + "grad_norm": 1.1865495443344116, + "learning_rate": 4.11672258936996e-06, + "loss": 0.3192, + "step": 28465 + }, + { + "epoch": 0.5698471085754323, + "grad_norm": 1.1593490839004517, + "learning_rate": 4.116403506730996e-06, + "loss": 0.3321, + "step": 28466 + }, + { + "epoch": 0.5698671270925606, + "grad_norm": 1.0500179529190063, + "learning_rate": 4.1160844278064785e-06, + "loss": 0.306, + "step": 28467 + }, + { + "epoch": 0.569887145609689, + "grad_norm": 1.1569557189941406, + "learning_rate": 4.1157653525977555e-06, + "loss": 0.3162, + "step": 28468 + }, + { + "epoch": 0.5699071641268173, + "grad_norm": 1.8436020612716675, + "learning_rate": 4.115446281106162e-06, + "loss": 0.7898, + "step": 28469 + }, + { + "epoch": 0.5699271826439456, + "grad_norm": 1.1052850484848022, + "learning_rate": 4.115127213333044e-06, + "loss": 0.2717, + "step": 28470 + }, + { + "epoch": 0.569947201161074, + "grad_norm": 1.0644233226776123, + "learning_rate": 4.114808149279742e-06, + "loss": 0.2825, + "step": 28471 + }, + { + "epoch": 0.5699672196782023, + "grad_norm": 1.2104897499084473, + "learning_rate": 4.114489088947592e-06, + "loss": 0.3206, + "step": 28472 + }, + { + "epoch": 0.5699872381953307, + "grad_norm": 1.0371567010879517, + "learning_rate": 4.114170032337942e-06, + "loss": 0.3195, + "step": 28473 + }, + { + "epoch": 0.570007256712459, + "grad_norm": 1.2299450635910034, + "learning_rate": 4.113850979452131e-06, + "loss": 0.3124, + "step": 28474 + }, + { + "epoch": 0.5700272752295874, + "grad_norm": 0.9959115982055664, + "learning_rate": 4.113531930291498e-06, + "loss": 0.289, + "step": 28475 + }, + { + "epoch": 0.5700472937467157, + "grad_norm": 1.0420440435409546, + "learning_rate": 4.113212884857386e-06, + "loss": 0.2931, + "step": 28476 + }, + { + "epoch": 0.5700673122638441, + "grad_norm": 1.8261895179748535, + "learning_rate": 4.112893843151137e-06, + "loss": 0.7875, + "step": 28477 + }, + { + "epoch": 0.5700873307809724, + "grad_norm": 1.0081398487091064, + "learning_rate": 4.112574805174092e-06, + "loss": 0.2925, + "step": 28478 + }, + { + "epoch": 0.5701073492981007, + "grad_norm": 1.1456083059310913, + "learning_rate": 4.1122557709275916e-06, + "loss": 0.3417, + "step": 28479 + }, + { + "epoch": 0.5701273678152291, + "grad_norm": 1.7649872303009033, + "learning_rate": 4.111936740412976e-06, + "loss": 0.7445, + "step": 28480 + }, + { + "epoch": 0.5701473863323574, + "grad_norm": 1.1161065101623535, + "learning_rate": 4.111617713631586e-06, + "loss": 0.2873, + "step": 28481 + }, + { + "epoch": 0.5701674048494858, + "grad_norm": 1.0531976222991943, + "learning_rate": 4.111298690584765e-06, + "loss": 0.2934, + "step": 28482 + }, + { + "epoch": 0.5701874233666141, + "grad_norm": 1.1209417581558228, + "learning_rate": 4.110979671273854e-06, + "loss": 0.2614, + "step": 28483 + }, + { + "epoch": 0.5702074418837425, + "grad_norm": 1.6978832483291626, + "learning_rate": 4.110660655700193e-06, + "loss": 0.7679, + "step": 28484 + }, + { + "epoch": 0.5702274604008708, + "grad_norm": 1.0706090927124023, + "learning_rate": 4.110341643865122e-06, + "loss": 0.2951, + "step": 28485 + }, + { + "epoch": 0.5702474789179991, + "grad_norm": 1.061179280281067, + "learning_rate": 4.110022635769984e-06, + "loss": 0.2659, + "step": 28486 + }, + { + "epoch": 0.5702674974351275, + "grad_norm": 1.125606656074524, + "learning_rate": 4.10970363141612e-06, + "loss": 0.3297, + "step": 28487 + }, + { + "epoch": 0.5702875159522558, + "grad_norm": 1.011596918106079, + "learning_rate": 4.10938463080487e-06, + "loss": 0.328, + "step": 28488 + }, + { + "epoch": 0.5703075344693842, + "grad_norm": 1.0246385335922241, + "learning_rate": 4.109065633937574e-06, + "loss": 0.2937, + "step": 28489 + }, + { + "epoch": 0.5703275529865125, + "grad_norm": 1.0760051012039185, + "learning_rate": 4.108746640815575e-06, + "loss": 0.2844, + "step": 28490 + }, + { + "epoch": 0.5703475715036409, + "grad_norm": 1.1194963455200195, + "learning_rate": 4.108427651440214e-06, + "loss": 0.3086, + "step": 28491 + }, + { + "epoch": 0.5703675900207692, + "grad_norm": 1.2489053010940552, + "learning_rate": 4.108108665812832e-06, + "loss": 0.3011, + "step": 28492 + }, + { + "epoch": 0.5703876085378976, + "grad_norm": 1.0814733505249023, + "learning_rate": 4.1077896839347684e-06, + "loss": 0.262, + "step": 28493 + }, + { + "epoch": 0.5704076270550259, + "grad_norm": 1.134130597114563, + "learning_rate": 4.107470705807364e-06, + "loss": 0.305, + "step": 28494 + }, + { + "epoch": 0.5704276455721542, + "grad_norm": 1.161911129951477, + "learning_rate": 4.107151731431961e-06, + "loss": 0.3213, + "step": 28495 + }, + { + "epoch": 0.5704476640892826, + "grad_norm": 1.1438322067260742, + "learning_rate": 4.1068327608099e-06, + "loss": 0.2877, + "step": 28496 + }, + { + "epoch": 0.5704676826064109, + "grad_norm": 1.132184386253357, + "learning_rate": 4.106513793942524e-06, + "loss": 0.2728, + "step": 28497 + }, + { + "epoch": 0.5704877011235393, + "grad_norm": 0.9510580897331238, + "learning_rate": 4.106194830831169e-06, + "loss": 0.2373, + "step": 28498 + }, + { + "epoch": 0.5705077196406676, + "grad_norm": 1.1655510663986206, + "learning_rate": 4.10587587147718e-06, + "loss": 0.3327, + "step": 28499 + }, + { + "epoch": 0.570527738157796, + "grad_norm": 1.119327187538147, + "learning_rate": 4.105556915881896e-06, + "loss": 0.3042, + "step": 28500 + }, + { + "epoch": 0.5705477566749243, + "grad_norm": 1.1925137042999268, + "learning_rate": 4.105237964046658e-06, + "loss": 0.3293, + "step": 28501 + }, + { + "epoch": 0.5705677751920526, + "grad_norm": 1.1278311014175415, + "learning_rate": 4.104919015972808e-06, + "loss": 0.3319, + "step": 28502 + }, + { + "epoch": 0.570587793709181, + "grad_norm": 2.044999361038208, + "learning_rate": 4.104600071661684e-06, + "loss": 0.7783, + "step": 28503 + }, + { + "epoch": 0.5706078122263093, + "grad_norm": 1.072911262512207, + "learning_rate": 4.10428113111463e-06, + "loss": 0.3167, + "step": 28504 + }, + { + "epoch": 0.5706278307434377, + "grad_norm": 1.0276381969451904, + "learning_rate": 4.1039621943329855e-06, + "loss": 0.2836, + "step": 28505 + }, + { + "epoch": 0.570647849260566, + "grad_norm": 1.0958740711212158, + "learning_rate": 4.1036432613180915e-06, + "loss": 0.2697, + "step": 28506 + }, + { + "epoch": 0.5706678677776944, + "grad_norm": 1.1744974851608276, + "learning_rate": 4.103324332071285e-06, + "loss": 0.3066, + "step": 28507 + }, + { + "epoch": 0.5706878862948227, + "grad_norm": 1.0987004041671753, + "learning_rate": 4.103005406593913e-06, + "loss": 0.2903, + "step": 28508 + }, + { + "epoch": 0.5707079048119511, + "grad_norm": 1.8405746221542358, + "learning_rate": 4.102686484887314e-06, + "loss": 0.8108, + "step": 28509 + }, + { + "epoch": 0.5707279233290794, + "grad_norm": 1.1526503562927246, + "learning_rate": 4.102367566952827e-06, + "loss": 0.3456, + "step": 28510 + }, + { + "epoch": 0.5707479418462077, + "grad_norm": 1.1856743097305298, + "learning_rate": 4.102048652791794e-06, + "loss": 0.2844, + "step": 28511 + }, + { + "epoch": 0.5707679603633361, + "grad_norm": 1.1367138624191284, + "learning_rate": 4.101729742405553e-06, + "loss": 0.2925, + "step": 28512 + }, + { + "epoch": 0.5707879788804644, + "grad_norm": 1.1211153268814087, + "learning_rate": 4.101410835795449e-06, + "loss": 0.3812, + "step": 28513 + }, + { + "epoch": 0.5708079973975928, + "grad_norm": 1.146972417831421, + "learning_rate": 4.101091932962819e-06, + "loss": 0.3215, + "step": 28514 + }, + { + "epoch": 0.5708280159147211, + "grad_norm": 1.0889010429382324, + "learning_rate": 4.100773033909007e-06, + "loss": 0.2997, + "step": 28515 + }, + { + "epoch": 0.5708480344318495, + "grad_norm": 1.097678780555725, + "learning_rate": 4.1004541386353495e-06, + "loss": 0.3009, + "step": 28516 + }, + { + "epoch": 0.5708680529489778, + "grad_norm": 1.1159011125564575, + "learning_rate": 4.1001352471431905e-06, + "loss": 0.3042, + "step": 28517 + }, + { + "epoch": 0.5708880714661061, + "grad_norm": 1.086929440498352, + "learning_rate": 4.09981635943387e-06, + "loss": 0.2657, + "step": 28518 + }, + { + "epoch": 0.5709080899832345, + "grad_norm": 1.0313228368759155, + "learning_rate": 4.099497475508727e-06, + "loss": 0.256, + "step": 28519 + }, + { + "epoch": 0.5709281085003628, + "grad_norm": 1.169260859489441, + "learning_rate": 4.099178595369101e-06, + "loss": 0.3027, + "step": 28520 + }, + { + "epoch": 0.5709481270174912, + "grad_norm": 1.1912028789520264, + "learning_rate": 4.098859719016336e-06, + "loss": 0.3277, + "step": 28521 + }, + { + "epoch": 0.5709681455346195, + "grad_norm": 1.0947052240371704, + "learning_rate": 4.098540846451771e-06, + "loss": 0.3036, + "step": 28522 + }, + { + "epoch": 0.5709881640517479, + "grad_norm": 1.9601994752883911, + "learning_rate": 4.0982219776767476e-06, + "loss": 0.7853, + "step": 28523 + }, + { + "epoch": 0.5710081825688762, + "grad_norm": 1.1172584295272827, + "learning_rate": 4.097903112692603e-06, + "loss": 0.2687, + "step": 28524 + }, + { + "epoch": 0.5710282010860046, + "grad_norm": 1.1313694715499878, + "learning_rate": 4.09758425150068e-06, + "loss": 0.3539, + "step": 28525 + }, + { + "epoch": 0.5710482196031329, + "grad_norm": 1.0128796100616455, + "learning_rate": 4.097265394102318e-06, + "loss": 0.2641, + "step": 28526 + }, + { + "epoch": 0.5710682381202612, + "grad_norm": 1.2449716329574585, + "learning_rate": 4.096946540498859e-06, + "loss": 0.2947, + "step": 28527 + }, + { + "epoch": 0.5710882566373896, + "grad_norm": 1.1617999076843262, + "learning_rate": 4.096627690691642e-06, + "loss": 0.2923, + "step": 28528 + }, + { + "epoch": 0.5711082751545179, + "grad_norm": 1.1122573614120483, + "learning_rate": 4.096308844682007e-06, + "loss": 0.349, + "step": 28529 + }, + { + "epoch": 0.5711282936716463, + "grad_norm": 1.0946767330169678, + "learning_rate": 4.095990002471296e-06, + "loss": 0.3277, + "step": 28530 + }, + { + "epoch": 0.5711483121887746, + "grad_norm": 1.0626137256622314, + "learning_rate": 4.09567116406085e-06, + "loss": 0.2931, + "step": 28531 + }, + { + "epoch": 0.571168330705903, + "grad_norm": 1.1037007570266724, + "learning_rate": 4.0953523294520056e-06, + "loss": 0.2791, + "step": 28532 + }, + { + "epoch": 0.5711883492230313, + "grad_norm": 1.2477344274520874, + "learning_rate": 4.095033498646106e-06, + "loss": 0.2833, + "step": 28533 + }, + { + "epoch": 0.5712083677401596, + "grad_norm": 1.2217202186584473, + "learning_rate": 4.09471467164449e-06, + "loss": 0.2942, + "step": 28534 + }, + { + "epoch": 0.571228386257288, + "grad_norm": 1.100911021232605, + "learning_rate": 4.0943958484485e-06, + "loss": 0.2853, + "step": 28535 + }, + { + "epoch": 0.5712484047744163, + "grad_norm": 1.1437609195709229, + "learning_rate": 4.094077029059474e-06, + "loss": 0.3414, + "step": 28536 + }, + { + "epoch": 0.5712684232915447, + "grad_norm": 1.1683818101882935, + "learning_rate": 4.093758213478755e-06, + "loss": 0.3201, + "step": 28537 + }, + { + "epoch": 0.571288441808673, + "grad_norm": 1.166985273361206, + "learning_rate": 4.093439401707678e-06, + "loss": 0.3063, + "step": 28538 + }, + { + "epoch": 0.5713084603258014, + "grad_norm": 1.2152807712554932, + "learning_rate": 4.09312059374759e-06, + "loss": 0.2927, + "step": 28539 + }, + { + "epoch": 0.5713284788429297, + "grad_norm": 1.1927841901779175, + "learning_rate": 4.092801789599826e-06, + "loss": 0.3684, + "step": 28540 + }, + { + "epoch": 0.5713484973600581, + "grad_norm": 1.0910557508468628, + "learning_rate": 4.09248298926573e-06, + "loss": 0.3072, + "step": 28541 + }, + { + "epoch": 0.5713685158771864, + "grad_norm": 1.1940287351608276, + "learning_rate": 4.092164192746639e-06, + "loss": 0.332, + "step": 28542 + }, + { + "epoch": 0.5713885343943147, + "grad_norm": 1.1335630416870117, + "learning_rate": 4.091845400043893e-06, + "loss": 0.303, + "step": 28543 + }, + { + "epoch": 0.5714085529114431, + "grad_norm": 1.0018236637115479, + "learning_rate": 4.091526611158835e-06, + "loss": 0.3062, + "step": 28544 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.1451523303985596, + "learning_rate": 4.0912078260928044e-06, + "loss": 0.353, + "step": 28545 + }, + { + "epoch": 0.5714485899456998, + "grad_norm": 1.2179492712020874, + "learning_rate": 4.0908890448471386e-06, + "loss": 0.3384, + "step": 28546 + }, + { + "epoch": 0.5714686084628281, + "grad_norm": 1.053862452507019, + "learning_rate": 4.0905702674231795e-06, + "loss": 0.2844, + "step": 28547 + }, + { + "epoch": 0.5714886269799565, + "grad_norm": 1.1877799034118652, + "learning_rate": 4.09025149382227e-06, + "loss": 0.3102, + "step": 28548 + }, + { + "epoch": 0.5715086454970848, + "grad_norm": 1.2437853813171387, + "learning_rate": 4.089932724045745e-06, + "loss": 0.3016, + "step": 28549 + }, + { + "epoch": 0.5715286640142131, + "grad_norm": 1.0931743383407593, + "learning_rate": 4.089613958094949e-06, + "loss": 0.3257, + "step": 28550 + }, + { + "epoch": 0.5715486825313415, + "grad_norm": 1.0461935997009277, + "learning_rate": 4.089295195971217e-06, + "loss": 0.3109, + "step": 28551 + }, + { + "epoch": 0.5715687010484698, + "grad_norm": 1.1132087707519531, + "learning_rate": 4.088976437675894e-06, + "loss": 0.3115, + "step": 28552 + }, + { + "epoch": 0.5715887195655982, + "grad_norm": 1.068487286567688, + "learning_rate": 4.088657683210318e-06, + "loss": 0.3187, + "step": 28553 + }, + { + "epoch": 0.5716087380827265, + "grad_norm": 1.9037877321243286, + "learning_rate": 4.08833893257583e-06, + "loss": 0.7577, + "step": 28554 + }, + { + "epoch": 0.5716287565998549, + "grad_norm": 1.1953495740890503, + "learning_rate": 4.0880201857737675e-06, + "loss": 0.3517, + "step": 28555 + }, + { + "epoch": 0.5716487751169832, + "grad_norm": 0.989442765712738, + "learning_rate": 4.0877014428054705e-06, + "loss": 0.2836, + "step": 28556 + }, + { + "epoch": 0.5716687936341116, + "grad_norm": 1.0882846117019653, + "learning_rate": 4.087382703672282e-06, + "loss": 0.3285, + "step": 28557 + }, + { + "epoch": 0.5716888121512399, + "grad_norm": 1.1413654088974, + "learning_rate": 4.0870639683755405e-06, + "loss": 0.3574, + "step": 28558 + }, + { + "epoch": 0.5717088306683682, + "grad_norm": 1.1301597356796265, + "learning_rate": 4.086745236916586e-06, + "loss": 0.3019, + "step": 28559 + }, + { + "epoch": 0.5717288491854966, + "grad_norm": 1.0216350555419922, + "learning_rate": 4.0864265092967545e-06, + "loss": 0.3018, + "step": 28560 + }, + { + "epoch": 0.5717488677026249, + "grad_norm": 1.87684166431427, + "learning_rate": 4.0861077855173935e-06, + "loss": 0.746, + "step": 28561 + }, + { + "epoch": 0.5717688862197533, + "grad_norm": 1.3037880659103394, + "learning_rate": 4.085789065579838e-06, + "loss": 0.3273, + "step": 28562 + }, + { + "epoch": 0.5717889047368816, + "grad_norm": 0.9866102337837219, + "learning_rate": 4.085470349485428e-06, + "loss": 0.3186, + "step": 28563 + }, + { + "epoch": 0.57180892325401, + "grad_norm": 1.0462074279785156, + "learning_rate": 4.085151637235503e-06, + "loss": 0.3017, + "step": 28564 + }, + { + "epoch": 0.5718289417711383, + "grad_norm": 1.0360146760940552, + "learning_rate": 4.084832928831404e-06, + "loss": 0.2802, + "step": 28565 + }, + { + "epoch": 0.5718489602882666, + "grad_norm": 1.1825417280197144, + "learning_rate": 4.08451422427447e-06, + "loss": 0.2902, + "step": 28566 + }, + { + "epoch": 0.571868978805395, + "grad_norm": 1.199681043624878, + "learning_rate": 4.084195523566042e-06, + "loss": 0.3525, + "step": 28567 + }, + { + "epoch": 0.5718889973225233, + "grad_norm": 1.0473980903625488, + "learning_rate": 4.08387682670746e-06, + "loss": 0.2756, + "step": 28568 + }, + { + "epoch": 0.5719090158396517, + "grad_norm": 1.08712899684906, + "learning_rate": 4.08355813370006e-06, + "loss": 0.3386, + "step": 28569 + }, + { + "epoch": 0.57192903435678, + "grad_norm": 1.1283684968948364, + "learning_rate": 4.0832394445451865e-06, + "loss": 0.3084, + "step": 28570 + }, + { + "epoch": 0.5719490528739084, + "grad_norm": 1.2078224420547485, + "learning_rate": 4.0829207592441775e-06, + "loss": 0.3211, + "step": 28571 + }, + { + "epoch": 0.5719690713910367, + "grad_norm": 1.2864011526107788, + "learning_rate": 4.08260207779837e-06, + "loss": 0.3278, + "step": 28572 + }, + { + "epoch": 0.5719890899081651, + "grad_norm": 1.8216145038604736, + "learning_rate": 4.082283400209106e-06, + "loss": 0.7748, + "step": 28573 + }, + { + "epoch": 0.5720091084252934, + "grad_norm": 1.0847694873809814, + "learning_rate": 4.081964726477727e-06, + "loss": 0.2746, + "step": 28574 + }, + { + "epoch": 0.5720291269424217, + "grad_norm": 1.1050701141357422, + "learning_rate": 4.08164605660557e-06, + "loss": 0.266, + "step": 28575 + }, + { + "epoch": 0.5720491454595501, + "grad_norm": 1.1036663055419922, + "learning_rate": 4.081327390593977e-06, + "loss": 0.2908, + "step": 28576 + }, + { + "epoch": 0.5720691639766784, + "grad_norm": 1.2578762769699097, + "learning_rate": 4.081008728444283e-06, + "loss": 0.3092, + "step": 28577 + }, + { + "epoch": 0.5720891824938068, + "grad_norm": 0.9655755758285522, + "learning_rate": 4.080690070157831e-06, + "loss": 0.2348, + "step": 28578 + }, + { + "epoch": 0.5721092010109351, + "grad_norm": 1.810997724533081, + "learning_rate": 4.080371415735961e-06, + "loss": 0.8196, + "step": 28579 + }, + { + "epoch": 0.5721292195280635, + "grad_norm": 1.234586477279663, + "learning_rate": 4.080052765180012e-06, + "loss": 0.2852, + "step": 28580 + }, + { + "epoch": 0.5721492380451918, + "grad_norm": 1.1894044876098633, + "learning_rate": 4.0797341184913236e-06, + "loss": 0.273, + "step": 28581 + }, + { + "epoch": 0.5721692565623201, + "grad_norm": 1.1006138324737549, + "learning_rate": 4.079415475671233e-06, + "loss": 0.3108, + "step": 28582 + }, + { + "epoch": 0.5721892750794485, + "grad_norm": 1.1158829927444458, + "learning_rate": 4.079096836721083e-06, + "loss": 0.3226, + "step": 28583 + }, + { + "epoch": 0.5722092935965768, + "grad_norm": 1.1143109798431396, + "learning_rate": 4.078778201642212e-06, + "loss": 0.3274, + "step": 28584 + }, + { + "epoch": 0.5722293121137052, + "grad_norm": 1.3131985664367676, + "learning_rate": 4.078459570435958e-06, + "loss": 0.2765, + "step": 28585 + }, + { + "epoch": 0.5722493306308335, + "grad_norm": 1.1010053157806396, + "learning_rate": 4.078140943103663e-06, + "loss": 0.2965, + "step": 28586 + }, + { + "epoch": 0.5722693491479619, + "grad_norm": 1.1992830038070679, + "learning_rate": 4.0778223196466626e-06, + "loss": 0.328, + "step": 28587 + }, + { + "epoch": 0.5722893676650902, + "grad_norm": 1.0711064338684082, + "learning_rate": 4.077503700066301e-06, + "loss": 0.3056, + "step": 28588 + }, + { + "epoch": 0.5723093861822186, + "grad_norm": 1.1420655250549316, + "learning_rate": 4.077185084363915e-06, + "loss": 0.3241, + "step": 28589 + }, + { + "epoch": 0.5723294046993469, + "grad_norm": 1.1635411977767944, + "learning_rate": 4.076866472540844e-06, + "loss": 0.2996, + "step": 28590 + }, + { + "epoch": 0.5723494232164752, + "grad_norm": 1.1958789825439453, + "learning_rate": 4.076547864598428e-06, + "loss": 0.3202, + "step": 28591 + }, + { + "epoch": 0.5723694417336036, + "grad_norm": 1.2156566381454468, + "learning_rate": 4.076229260538005e-06, + "loss": 0.2929, + "step": 28592 + }, + { + "epoch": 0.5723894602507319, + "grad_norm": 1.1081761121749878, + "learning_rate": 4.075910660360917e-06, + "loss": 0.3349, + "step": 28593 + }, + { + "epoch": 0.5724094787678603, + "grad_norm": 0.9957554340362549, + "learning_rate": 4.075592064068502e-06, + "loss": 0.3032, + "step": 28594 + }, + { + "epoch": 0.5724294972849886, + "grad_norm": 1.0583977699279785, + "learning_rate": 4.0752734716620975e-06, + "loss": 0.3227, + "step": 28595 + }, + { + "epoch": 0.572449515802117, + "grad_norm": 1.1395373344421387, + "learning_rate": 4.074954883143043e-06, + "loss": 0.3455, + "step": 28596 + }, + { + "epoch": 0.5724695343192453, + "grad_norm": 1.109135627746582, + "learning_rate": 4.074636298512682e-06, + "loss": 0.3673, + "step": 28597 + }, + { + "epoch": 0.5724895528363736, + "grad_norm": 1.1115537881851196, + "learning_rate": 4.07431771777235e-06, + "loss": 0.3269, + "step": 28598 + }, + { + "epoch": 0.572509571353502, + "grad_norm": 1.2037395238876343, + "learning_rate": 4.073999140923387e-06, + "loss": 0.3466, + "step": 28599 + }, + { + "epoch": 0.5725295898706303, + "grad_norm": 1.0753768682479858, + "learning_rate": 4.073680567967131e-06, + "loss": 0.2969, + "step": 28600 + }, + { + "epoch": 0.5725496083877587, + "grad_norm": 1.9401098489761353, + "learning_rate": 4.0733619989049245e-06, + "loss": 0.8062, + "step": 28601 + }, + { + "epoch": 0.572569626904887, + "grad_norm": 1.1556055545806885, + "learning_rate": 4.073043433738104e-06, + "loss": 0.3432, + "step": 28602 + }, + { + "epoch": 0.5725896454220154, + "grad_norm": 1.1044695377349854, + "learning_rate": 4.072724872468009e-06, + "loss": 0.3189, + "step": 28603 + }, + { + "epoch": 0.5726096639391437, + "grad_norm": 1.1857035160064697, + "learning_rate": 4.072406315095979e-06, + "loss": 0.2964, + "step": 28604 + }, + { + "epoch": 0.5726296824562721, + "grad_norm": 1.9795528650283813, + "learning_rate": 4.072087761623353e-06, + "loss": 0.7295, + "step": 28605 + }, + { + "epoch": 0.5726497009734004, + "grad_norm": 1.0461660623550415, + "learning_rate": 4.071769212051473e-06, + "loss": 0.2853, + "step": 28606 + }, + { + "epoch": 0.5726697194905287, + "grad_norm": 1.0508148670196533, + "learning_rate": 4.071450666381673e-06, + "loss": 0.3068, + "step": 28607 + }, + { + "epoch": 0.5726897380076571, + "grad_norm": 1.0767186880111694, + "learning_rate": 4.0711321246152965e-06, + "loss": 0.2777, + "step": 28608 + }, + { + "epoch": 0.5727097565247854, + "grad_norm": 1.2175250053405762, + "learning_rate": 4.070813586753677e-06, + "loss": 0.3093, + "step": 28609 + }, + { + "epoch": 0.5727297750419138, + "grad_norm": 1.2228114604949951, + "learning_rate": 4.0704950527981605e-06, + "loss": 0.3065, + "step": 28610 + }, + { + "epoch": 0.5727497935590421, + "grad_norm": 1.1342015266418457, + "learning_rate": 4.070176522750081e-06, + "loss": 0.3286, + "step": 28611 + }, + { + "epoch": 0.5727698120761705, + "grad_norm": 1.1070036888122559, + "learning_rate": 4.069857996610781e-06, + "loss": 0.3459, + "step": 28612 + }, + { + "epoch": 0.5727898305932988, + "grad_norm": 1.100518822669983, + "learning_rate": 4.0695394743815955e-06, + "loss": 0.2616, + "step": 28613 + }, + { + "epoch": 0.5728098491104271, + "grad_norm": 1.091988444328308, + "learning_rate": 4.069220956063868e-06, + "loss": 0.3193, + "step": 28614 + }, + { + "epoch": 0.5728298676275555, + "grad_norm": 1.089714527130127, + "learning_rate": 4.068902441658935e-06, + "loss": 0.2977, + "step": 28615 + }, + { + "epoch": 0.5728498861446838, + "grad_norm": 1.1153051853179932, + "learning_rate": 4.068583931168135e-06, + "loss": 0.3241, + "step": 28616 + }, + { + "epoch": 0.5728699046618122, + "grad_norm": 1.1006895303726196, + "learning_rate": 4.068265424592808e-06, + "loss": 0.3288, + "step": 28617 + }, + { + "epoch": 0.5728899231789405, + "grad_norm": 1.6868691444396973, + "learning_rate": 4.067946921934292e-06, + "loss": 0.7358, + "step": 28618 + }, + { + "epoch": 0.5729099416960689, + "grad_norm": 1.0909950733184814, + "learning_rate": 4.0676284231939275e-06, + "loss": 0.2958, + "step": 28619 + }, + { + "epoch": 0.5729299602131972, + "grad_norm": 1.1832873821258545, + "learning_rate": 4.067309928373052e-06, + "loss": 0.2933, + "step": 28620 + }, + { + "epoch": 0.5729499787303256, + "grad_norm": 1.1804111003875732, + "learning_rate": 4.066991437473006e-06, + "loss": 0.3595, + "step": 28621 + }, + { + "epoch": 0.5729699972474539, + "grad_norm": 1.148260474205017, + "learning_rate": 4.0666729504951245e-06, + "loss": 0.3218, + "step": 28622 + }, + { + "epoch": 0.5729900157645822, + "grad_norm": 1.4544512033462524, + "learning_rate": 4.066354467440752e-06, + "loss": 0.2814, + "step": 28623 + }, + { + "epoch": 0.5730100342817106, + "grad_norm": 1.2003223896026611, + "learning_rate": 4.066035988311222e-06, + "loss": 0.3188, + "step": 28624 + }, + { + "epoch": 0.5730300527988389, + "grad_norm": 2.0436909198760986, + "learning_rate": 4.0657175131078775e-06, + "loss": 0.8263, + "step": 28625 + }, + { + "epoch": 0.5730500713159673, + "grad_norm": 1.106031894683838, + "learning_rate": 4.065399041832055e-06, + "loss": 0.292, + "step": 28626 + }, + { + "epoch": 0.5730700898330956, + "grad_norm": 1.8551257848739624, + "learning_rate": 4.065080574485093e-06, + "loss": 0.8249, + "step": 28627 + }, + { + "epoch": 0.573090108350224, + "grad_norm": 1.7830370664596558, + "learning_rate": 4.064762111068333e-06, + "loss": 0.7876, + "step": 28628 + }, + { + "epoch": 0.5731101268673523, + "grad_norm": 1.1714391708374023, + "learning_rate": 4.064443651583109e-06, + "loss": 0.3193, + "step": 28629 + }, + { + "epoch": 0.5731301453844806, + "grad_norm": 1.018049955368042, + "learning_rate": 4.064125196030765e-06, + "loss": 0.2816, + "step": 28630 + }, + { + "epoch": 0.573150163901609, + "grad_norm": 1.2941159009933472, + "learning_rate": 4.063806744412635e-06, + "loss": 0.2952, + "step": 28631 + }, + { + "epoch": 0.5731701824187373, + "grad_norm": 1.0943914651870728, + "learning_rate": 4.063488296730061e-06, + "loss": 0.3079, + "step": 28632 + }, + { + "epoch": 0.5731902009358657, + "grad_norm": 1.1463521718978882, + "learning_rate": 4.0631698529843825e-06, + "loss": 0.2516, + "step": 28633 + }, + { + "epoch": 0.573210219452994, + "grad_norm": 1.0568350553512573, + "learning_rate": 4.062851413176935e-06, + "loss": 0.2858, + "step": 28634 + }, + { + "epoch": 0.5732302379701224, + "grad_norm": 1.150054693222046, + "learning_rate": 4.062532977309057e-06, + "loss": 0.3066, + "step": 28635 + }, + { + "epoch": 0.5732502564872507, + "grad_norm": 1.1325803995132446, + "learning_rate": 4.06221454538209e-06, + "loss": 0.3171, + "step": 28636 + }, + { + "epoch": 0.5732702750043791, + "grad_norm": 1.1611554622650146, + "learning_rate": 4.061896117397371e-06, + "loss": 0.2868, + "step": 28637 + }, + { + "epoch": 0.5732902935215074, + "grad_norm": 1.0842061042785645, + "learning_rate": 4.06157769335624e-06, + "loss": 0.2891, + "step": 28638 + }, + { + "epoch": 0.5733103120386357, + "grad_norm": 1.0351669788360596, + "learning_rate": 4.061259273260034e-06, + "loss": 0.3122, + "step": 28639 + }, + { + "epoch": 0.5733303305557641, + "grad_norm": 1.0656007528305054, + "learning_rate": 4.06094085711009e-06, + "loss": 0.2907, + "step": 28640 + }, + { + "epoch": 0.5733503490728924, + "grad_norm": 1.0611400604248047, + "learning_rate": 4.060622444907751e-06, + "loss": 0.2458, + "step": 28641 + }, + { + "epoch": 0.5733703675900208, + "grad_norm": 1.1336978673934937, + "learning_rate": 4.060304036654351e-06, + "loss": 0.2926, + "step": 28642 + }, + { + "epoch": 0.5733903861071491, + "grad_norm": 1.1258586645126343, + "learning_rate": 4.059985632351233e-06, + "loss": 0.3245, + "step": 28643 + }, + { + "epoch": 0.5734104046242775, + "grad_norm": 1.0347015857696533, + "learning_rate": 4.05966723199973e-06, + "loss": 0.2945, + "step": 28644 + }, + { + "epoch": 0.5734304231414058, + "grad_norm": 1.1043955087661743, + "learning_rate": 4.059348835601186e-06, + "loss": 0.2914, + "step": 28645 + }, + { + "epoch": 0.5734504416585341, + "grad_norm": 1.0829304456710815, + "learning_rate": 4.0590304431569374e-06, + "loss": 0.3288, + "step": 28646 + }, + { + "epoch": 0.5734704601756625, + "grad_norm": 1.161285161972046, + "learning_rate": 4.058712054668321e-06, + "loss": 0.3117, + "step": 28647 + }, + { + "epoch": 0.5734904786927908, + "grad_norm": 1.0549310445785522, + "learning_rate": 4.058393670136677e-06, + "loss": 0.3148, + "step": 28648 + }, + { + "epoch": 0.5735104972099192, + "grad_norm": 1.3738285303115845, + "learning_rate": 4.058075289563343e-06, + "loss": 0.2867, + "step": 28649 + }, + { + "epoch": 0.5735305157270475, + "grad_norm": 1.0707005262374878, + "learning_rate": 4.057756912949658e-06, + "loss": 0.3284, + "step": 28650 + }, + { + "epoch": 0.5735505342441759, + "grad_norm": 1.0882465839385986, + "learning_rate": 4.05743854029696e-06, + "loss": 0.2861, + "step": 28651 + }, + { + "epoch": 0.5735705527613042, + "grad_norm": 1.2304068803787231, + "learning_rate": 4.057120171606589e-06, + "loss": 0.301, + "step": 28652 + }, + { + "epoch": 0.5735905712784326, + "grad_norm": 1.8724390268325806, + "learning_rate": 4.056801806879879e-06, + "loss": 0.7867, + "step": 28653 + }, + { + "epoch": 0.5736105897955609, + "grad_norm": 2.0695550441741943, + "learning_rate": 4.056483446118173e-06, + "loss": 0.7233, + "step": 28654 + }, + { + "epoch": 0.5736306083126892, + "grad_norm": 1.09123694896698, + "learning_rate": 4.056165089322808e-06, + "loss": 0.3341, + "step": 28655 + }, + { + "epoch": 0.5736506268298176, + "grad_norm": 1.1041209697723389, + "learning_rate": 4.055846736495122e-06, + "loss": 0.3218, + "step": 28656 + }, + { + "epoch": 0.5736706453469459, + "grad_norm": 1.1690747737884521, + "learning_rate": 4.055528387636452e-06, + "loss": 0.3048, + "step": 28657 + }, + { + "epoch": 0.5736906638640743, + "grad_norm": 1.1697455644607544, + "learning_rate": 4.055210042748136e-06, + "loss": 0.3129, + "step": 28658 + }, + { + "epoch": 0.5737106823812026, + "grad_norm": 1.1592789888381958, + "learning_rate": 4.054891701831517e-06, + "loss": 0.2597, + "step": 28659 + }, + { + "epoch": 0.573730700898331, + "grad_norm": 1.0226224660873413, + "learning_rate": 4.054573364887929e-06, + "loss": 0.2861, + "step": 28660 + }, + { + "epoch": 0.5737507194154593, + "grad_norm": 1.013609528541565, + "learning_rate": 4.05425503191871e-06, + "loss": 0.3045, + "step": 28661 + }, + { + "epoch": 0.5737707379325876, + "grad_norm": 1.1182259321212769, + "learning_rate": 4.0539367029251995e-06, + "loss": 0.3288, + "step": 28662 + }, + { + "epoch": 0.573790756449716, + "grad_norm": 1.0622152090072632, + "learning_rate": 4.053618377908735e-06, + "loss": 0.2859, + "step": 28663 + }, + { + "epoch": 0.5738107749668443, + "grad_norm": 1.095685601234436, + "learning_rate": 4.053300056870657e-06, + "loss": 0.281, + "step": 28664 + }, + { + "epoch": 0.5738307934839727, + "grad_norm": 1.874429702758789, + "learning_rate": 4.052981739812302e-06, + "loss": 0.7693, + "step": 28665 + }, + { + "epoch": 0.573850812001101, + "grad_norm": 1.3177458047866821, + "learning_rate": 4.052663426735005e-06, + "loss": 0.2967, + "step": 28666 + }, + { + "epoch": 0.5738708305182294, + "grad_norm": 1.2045286893844604, + "learning_rate": 4.052345117640109e-06, + "loss": 0.325, + "step": 28667 + }, + { + "epoch": 0.5738908490353577, + "grad_norm": 1.129982590675354, + "learning_rate": 4.05202681252895e-06, + "loss": 0.308, + "step": 28668 + }, + { + "epoch": 0.5739108675524861, + "grad_norm": 1.0394774675369263, + "learning_rate": 4.051708511402866e-06, + "loss": 0.3093, + "step": 28669 + }, + { + "epoch": 0.5739308860696144, + "grad_norm": 0.9777259826660156, + "learning_rate": 4.051390214263197e-06, + "loss": 0.275, + "step": 28670 + }, + { + "epoch": 0.5739509045867427, + "grad_norm": 1.0350126028060913, + "learning_rate": 4.0510719211112766e-06, + "loss": 0.3235, + "step": 28671 + }, + { + "epoch": 0.5739709231038711, + "grad_norm": 1.2175190448760986, + "learning_rate": 4.050753631948448e-06, + "loss": 0.3511, + "step": 28672 + }, + { + "epoch": 0.5739909416209994, + "grad_norm": 1.0960532426834106, + "learning_rate": 4.050435346776047e-06, + "loss": 0.3106, + "step": 28673 + }, + { + "epoch": 0.5740109601381278, + "grad_norm": 1.7270472049713135, + "learning_rate": 4.050117065595409e-06, + "loss": 0.7514, + "step": 28674 + }, + { + "epoch": 0.5740309786552561, + "grad_norm": 1.966044545173645, + "learning_rate": 4.049798788407876e-06, + "loss": 0.7879, + "step": 28675 + }, + { + "epoch": 0.5740509971723845, + "grad_norm": 1.1509310007095337, + "learning_rate": 4.0494805152147835e-06, + "loss": 0.2955, + "step": 28676 + }, + { + "epoch": 0.5740710156895128, + "grad_norm": 1.0647083520889282, + "learning_rate": 4.0491622460174715e-06, + "loss": 0.3094, + "step": 28677 + }, + { + "epoch": 0.5740910342066411, + "grad_norm": 1.842908501625061, + "learning_rate": 4.0488439808172775e-06, + "loss": 0.8308, + "step": 28678 + }, + { + "epoch": 0.5741110527237695, + "grad_norm": 1.023280382156372, + "learning_rate": 4.048525719615538e-06, + "loss": 0.2706, + "step": 28679 + }, + { + "epoch": 0.5741310712408978, + "grad_norm": 1.0994007587432861, + "learning_rate": 4.04820746241359e-06, + "loss": 0.3006, + "step": 28680 + }, + { + "epoch": 0.5741510897580262, + "grad_norm": 1.1213592290878296, + "learning_rate": 4.047889209212774e-06, + "loss": 0.2778, + "step": 28681 + }, + { + "epoch": 0.5741711082751545, + "grad_norm": 1.084324836730957, + "learning_rate": 4.047570960014427e-06, + "loss": 0.2951, + "step": 28682 + }, + { + "epoch": 0.5741911267922829, + "grad_norm": 1.0051419734954834, + "learning_rate": 4.0472527148198885e-06, + "loss": 0.2989, + "step": 28683 + }, + { + "epoch": 0.5742111453094112, + "grad_norm": 1.110276699066162, + "learning_rate": 4.046934473630491e-06, + "loss": 0.3002, + "step": 28684 + }, + { + "epoch": 0.5742311638265396, + "grad_norm": 1.3231416940689087, + "learning_rate": 4.046616236447578e-06, + "loss": 0.3215, + "step": 28685 + }, + { + "epoch": 0.5742511823436679, + "grad_norm": 1.0021051168441772, + "learning_rate": 4.046298003272486e-06, + "loss": 0.2442, + "step": 28686 + }, + { + "epoch": 0.5742712008607962, + "grad_norm": 1.181313157081604, + "learning_rate": 4.0459797741065495e-06, + "loss": 0.3118, + "step": 28687 + }, + { + "epoch": 0.5742912193779246, + "grad_norm": 1.1751688718795776, + "learning_rate": 4.045661548951111e-06, + "loss": 0.3062, + "step": 28688 + }, + { + "epoch": 0.5743112378950529, + "grad_norm": 1.0111888647079468, + "learning_rate": 4.045343327807504e-06, + "loss": 0.3283, + "step": 28689 + }, + { + "epoch": 0.5743312564121813, + "grad_norm": 1.1661261320114136, + "learning_rate": 4.04502511067707e-06, + "loss": 0.2906, + "step": 28690 + }, + { + "epoch": 0.5743512749293096, + "grad_norm": 1.1604862213134766, + "learning_rate": 4.0447068975611444e-06, + "loss": 0.2771, + "step": 28691 + }, + { + "epoch": 0.574371293446438, + "grad_norm": 1.0465768575668335, + "learning_rate": 4.0443886884610665e-06, + "loss": 0.2779, + "step": 28692 + }, + { + "epoch": 0.5743913119635663, + "grad_norm": 1.11391019821167, + "learning_rate": 4.0440704833781694e-06, + "loss": 0.3489, + "step": 28693 + }, + { + "epoch": 0.5744113304806946, + "grad_norm": 1.1388167142868042, + "learning_rate": 4.0437522823137965e-06, + "loss": 0.3565, + "step": 28694 + }, + { + "epoch": 0.574431348997823, + "grad_norm": 1.1464157104492188, + "learning_rate": 4.043434085269284e-06, + "loss": 0.2588, + "step": 28695 + }, + { + "epoch": 0.5744513675149513, + "grad_norm": 1.1064103841781616, + "learning_rate": 4.043115892245968e-06, + "loss": 0.3312, + "step": 28696 + }, + { + "epoch": 0.5744713860320797, + "grad_norm": 1.0870671272277832, + "learning_rate": 4.042797703245186e-06, + "loss": 0.3185, + "step": 28697 + }, + { + "epoch": 0.574491404549208, + "grad_norm": 1.14197838306427, + "learning_rate": 4.042479518268278e-06, + "loss": 0.2819, + "step": 28698 + }, + { + "epoch": 0.5745114230663364, + "grad_norm": 1.7542978525161743, + "learning_rate": 4.042161337316579e-06, + "loss": 0.7886, + "step": 28699 + }, + { + "epoch": 0.5745314415834647, + "grad_norm": 1.0420994758605957, + "learning_rate": 4.041843160391428e-06, + "loss": 0.3185, + "step": 28700 + }, + { + "epoch": 0.5745514601005931, + "grad_norm": 1.0541508197784424, + "learning_rate": 4.041524987494163e-06, + "loss": 0.3037, + "step": 28701 + }, + { + "epoch": 0.5745714786177214, + "grad_norm": 1.0575120449066162, + "learning_rate": 4.041206818626119e-06, + "loss": 0.2793, + "step": 28702 + }, + { + "epoch": 0.5745914971348497, + "grad_norm": 1.1033108234405518, + "learning_rate": 4.040888653788636e-06, + "loss": 0.3453, + "step": 28703 + }, + { + "epoch": 0.5746115156519781, + "grad_norm": 1.0765490531921387, + "learning_rate": 4.040570492983051e-06, + "loss": 0.2911, + "step": 28704 + }, + { + "epoch": 0.5746315341691064, + "grad_norm": 1.1350932121276855, + "learning_rate": 4.040252336210702e-06, + "loss": 0.2993, + "step": 28705 + }, + { + "epoch": 0.5746515526862348, + "grad_norm": 1.1203190088272095, + "learning_rate": 4.039934183472923e-06, + "loss": 0.3355, + "step": 28706 + }, + { + "epoch": 0.5746715712033631, + "grad_norm": 1.0507036447525024, + "learning_rate": 4.039616034771055e-06, + "loss": 0.2789, + "step": 28707 + }, + { + "epoch": 0.5746915897204915, + "grad_norm": 1.2411047220230103, + "learning_rate": 4.039297890106436e-06, + "loss": 0.3128, + "step": 28708 + }, + { + "epoch": 0.5747116082376198, + "grad_norm": 1.1188931465148926, + "learning_rate": 4.038979749480401e-06, + "loss": 0.3079, + "step": 28709 + }, + { + "epoch": 0.5747316267547481, + "grad_norm": 1.1026016473770142, + "learning_rate": 4.038661612894289e-06, + "loss": 0.2914, + "step": 28710 + }, + { + "epoch": 0.5747516452718765, + "grad_norm": 1.1135523319244385, + "learning_rate": 4.038343480349435e-06, + "loss": 0.3184, + "step": 28711 + }, + { + "epoch": 0.5747716637890048, + "grad_norm": 1.0814793109893799, + "learning_rate": 4.038025351847179e-06, + "loss": 0.3077, + "step": 28712 + }, + { + "epoch": 0.5747916823061332, + "grad_norm": 1.1832304000854492, + "learning_rate": 4.037707227388857e-06, + "loss": 0.317, + "step": 28713 + }, + { + "epoch": 0.5748117008232615, + "grad_norm": 1.1211339235305786, + "learning_rate": 4.037389106975808e-06, + "loss": 0.3326, + "step": 28714 + }, + { + "epoch": 0.5748317193403899, + "grad_norm": 1.2477374076843262, + "learning_rate": 4.0370709906093655e-06, + "loss": 0.2976, + "step": 28715 + }, + { + "epoch": 0.5748517378575182, + "grad_norm": 1.917320728302002, + "learning_rate": 4.036752878290872e-06, + "loss": 0.7426, + "step": 28716 + }, + { + "epoch": 0.5748717563746466, + "grad_norm": 2.0711820125579834, + "learning_rate": 4.036434770021663e-06, + "loss": 0.7442, + "step": 28717 + }, + { + "epoch": 0.5748917748917749, + "grad_norm": 1.1050766706466675, + "learning_rate": 4.036116665803073e-06, + "loss": 0.3367, + "step": 28718 + }, + { + "epoch": 0.5749117934089032, + "grad_norm": 1.1451177597045898, + "learning_rate": 4.0357985656364395e-06, + "loss": 0.2836, + "step": 28719 + }, + { + "epoch": 0.5749318119260316, + "grad_norm": 1.0692490339279175, + "learning_rate": 4.035480469523103e-06, + "loss": 0.2691, + "step": 28720 + }, + { + "epoch": 0.5749518304431599, + "grad_norm": 1.1660895347595215, + "learning_rate": 4.035162377464399e-06, + "loss": 0.3525, + "step": 28721 + }, + { + "epoch": 0.5749718489602883, + "grad_norm": 1.0992461442947388, + "learning_rate": 4.034844289461666e-06, + "loss": 0.3523, + "step": 28722 + }, + { + "epoch": 0.5749918674774166, + "grad_norm": 1.1140038967132568, + "learning_rate": 4.034526205516239e-06, + "loss": 0.3041, + "step": 28723 + }, + { + "epoch": 0.575011885994545, + "grad_norm": 1.094444990158081, + "learning_rate": 4.034208125629454e-06, + "loss": 0.2833, + "step": 28724 + }, + { + "epoch": 0.5750319045116733, + "grad_norm": 1.081173062324524, + "learning_rate": 4.033890049802653e-06, + "loss": 0.3071, + "step": 28725 + }, + { + "epoch": 0.5750519230288016, + "grad_norm": 1.0756007432937622, + "learning_rate": 4.033571978037168e-06, + "loss": 0.2756, + "step": 28726 + }, + { + "epoch": 0.57507194154593, + "grad_norm": 1.1542669534683228, + "learning_rate": 4.033253910334341e-06, + "loss": 0.2982, + "step": 28727 + }, + { + "epoch": 0.5750919600630583, + "grad_norm": 1.0480601787567139, + "learning_rate": 4.032935846695504e-06, + "loss": 0.2787, + "step": 28728 + }, + { + "epoch": 0.5751119785801867, + "grad_norm": 1.0885752439498901, + "learning_rate": 4.032617787121998e-06, + "loss": 0.3362, + "step": 28729 + }, + { + "epoch": 0.575131997097315, + "grad_norm": 1.1611864566802979, + "learning_rate": 4.032299731615159e-06, + "loss": 0.271, + "step": 28730 + }, + { + "epoch": 0.5751520156144434, + "grad_norm": 1.0585743188858032, + "learning_rate": 4.031981680176324e-06, + "loss": 0.2847, + "step": 28731 + }, + { + "epoch": 0.5751720341315717, + "grad_norm": 1.07866370677948, + "learning_rate": 4.031663632806828e-06, + "loss": 0.3434, + "step": 28732 + }, + { + "epoch": 0.5751920526487001, + "grad_norm": 1.0369815826416016, + "learning_rate": 4.031345589508009e-06, + "loss": 0.2902, + "step": 28733 + }, + { + "epoch": 0.5752120711658284, + "grad_norm": 1.0882971286773682, + "learning_rate": 4.031027550281207e-06, + "loss": 0.3183, + "step": 28734 + }, + { + "epoch": 0.5752320896829567, + "grad_norm": 1.1282572746276855, + "learning_rate": 4.030709515127756e-06, + "loss": 0.2442, + "step": 28735 + }, + { + "epoch": 0.5752521082000851, + "grad_norm": 1.0906932353973389, + "learning_rate": 4.0303914840489945e-06, + "loss": 0.3408, + "step": 28736 + }, + { + "epoch": 0.5752721267172134, + "grad_norm": 1.1136268377304077, + "learning_rate": 4.030073457046256e-06, + "loss": 0.3183, + "step": 28737 + }, + { + "epoch": 0.5752921452343418, + "grad_norm": 1.0997051000595093, + "learning_rate": 4.0297554341208825e-06, + "loss": 0.2926, + "step": 28738 + }, + { + "epoch": 0.5753121637514701, + "grad_norm": 1.1345078945159912, + "learning_rate": 4.029437415274206e-06, + "loss": 0.3231, + "step": 28739 + }, + { + "epoch": 0.5753321822685985, + "grad_norm": 1.2082980871200562, + "learning_rate": 4.029119400507568e-06, + "loss": 0.2658, + "step": 28740 + }, + { + "epoch": 0.5753522007857268, + "grad_norm": 1.0844526290893555, + "learning_rate": 4.028801389822303e-06, + "loss": 0.2913, + "step": 28741 + }, + { + "epoch": 0.5753722193028551, + "grad_norm": 1.2620129585266113, + "learning_rate": 4.028483383219746e-06, + "loss": 0.2956, + "step": 28742 + }, + { + "epoch": 0.5753922378199835, + "grad_norm": 1.106622338294983, + "learning_rate": 4.0281653807012366e-06, + "loss": 0.3111, + "step": 28743 + }, + { + "epoch": 0.5754122563371118, + "grad_norm": 1.049965262413025, + "learning_rate": 4.027847382268112e-06, + "loss": 0.3016, + "step": 28744 + }, + { + "epoch": 0.5754322748542402, + "grad_norm": 1.14466392993927, + "learning_rate": 4.0275293879217065e-06, + "loss": 0.3007, + "step": 28745 + }, + { + "epoch": 0.5754522933713685, + "grad_norm": 1.0949331521987915, + "learning_rate": 4.027211397663357e-06, + "loss": 0.2766, + "step": 28746 + }, + { + "epoch": 0.5754723118884969, + "grad_norm": 1.1986479759216309, + "learning_rate": 4.026893411494403e-06, + "loss": 0.3465, + "step": 28747 + }, + { + "epoch": 0.5754923304056252, + "grad_norm": 1.2479588985443115, + "learning_rate": 4.0265754294161806e-06, + "loss": 0.3508, + "step": 28748 + }, + { + "epoch": 0.5755123489227535, + "grad_norm": 1.1241157054901123, + "learning_rate": 4.026257451430026e-06, + "loss": 0.3251, + "step": 28749 + }, + { + "epoch": 0.5755323674398819, + "grad_norm": 1.035112738609314, + "learning_rate": 4.025939477537272e-06, + "loss": 0.2966, + "step": 28750 + }, + { + "epoch": 0.5755523859570102, + "grad_norm": 1.1242679357528687, + "learning_rate": 4.025621507739262e-06, + "loss": 0.3006, + "step": 28751 + }, + { + "epoch": 0.5755724044741386, + "grad_norm": 1.109139084815979, + "learning_rate": 4.025303542037328e-06, + "loss": 0.3279, + "step": 28752 + }, + { + "epoch": 0.5755924229912669, + "grad_norm": 1.0232352018356323, + "learning_rate": 4.0249855804328095e-06, + "loss": 0.272, + "step": 28753 + }, + { + "epoch": 0.5756124415083953, + "grad_norm": 1.1557031869888306, + "learning_rate": 4.024667622927041e-06, + "loss": 0.3389, + "step": 28754 + }, + { + "epoch": 0.5756324600255236, + "grad_norm": 1.1844568252563477, + "learning_rate": 4.024349669521358e-06, + "loss": 0.3075, + "step": 28755 + }, + { + "epoch": 0.575652478542652, + "grad_norm": 1.132022500038147, + "learning_rate": 4.024031720217102e-06, + "loss": 0.2896, + "step": 28756 + }, + { + "epoch": 0.5756724970597803, + "grad_norm": 1.9641209840774536, + "learning_rate": 4.023713775015606e-06, + "loss": 0.7794, + "step": 28757 + }, + { + "epoch": 0.5756925155769086, + "grad_norm": 1.099002718925476, + "learning_rate": 4.0233958339182065e-06, + "loss": 0.3162, + "step": 28758 + }, + { + "epoch": 0.575712534094037, + "grad_norm": 1.9111255407333374, + "learning_rate": 4.02307789692624e-06, + "loss": 0.7452, + "step": 28759 + }, + { + "epoch": 0.5757325526111653, + "grad_norm": 1.332999348640442, + "learning_rate": 4.022759964041045e-06, + "loss": 0.3134, + "step": 28760 + }, + { + "epoch": 0.5757525711282937, + "grad_norm": 1.1652494668960571, + "learning_rate": 4.022442035263957e-06, + "loss": 0.3228, + "step": 28761 + }, + { + "epoch": 0.575772589645422, + "grad_norm": 1.085434913635254, + "learning_rate": 4.0221241105963125e-06, + "loss": 0.2875, + "step": 28762 + }, + { + "epoch": 0.5757926081625504, + "grad_norm": 1.2145947217941284, + "learning_rate": 4.021806190039446e-06, + "loss": 0.3514, + "step": 28763 + }, + { + "epoch": 0.5758126266796787, + "grad_norm": 1.9164975881576538, + "learning_rate": 4.021488273594697e-06, + "loss": 0.8111, + "step": 28764 + }, + { + "epoch": 0.575832645196807, + "grad_norm": 1.3253203630447388, + "learning_rate": 4.0211703612634e-06, + "loss": 0.3228, + "step": 28765 + }, + { + "epoch": 0.5758526637139354, + "grad_norm": 1.0566003322601318, + "learning_rate": 4.0208524530468936e-06, + "loss": 0.3138, + "step": 28766 + }, + { + "epoch": 0.5758726822310637, + "grad_norm": 1.1302850246429443, + "learning_rate": 4.020534548946513e-06, + "loss": 0.3562, + "step": 28767 + }, + { + "epoch": 0.5758927007481921, + "grad_norm": 1.2087959051132202, + "learning_rate": 4.020216648963592e-06, + "loss": 0.2996, + "step": 28768 + }, + { + "epoch": 0.5759127192653204, + "grad_norm": 1.0788037776947021, + "learning_rate": 4.019898753099472e-06, + "loss": 0.2869, + "step": 28769 + }, + { + "epoch": 0.5759327377824488, + "grad_norm": 1.1633148193359375, + "learning_rate": 4.019580861355486e-06, + "loss": 0.3476, + "step": 28770 + }, + { + "epoch": 0.5759527562995771, + "grad_norm": 1.1085448265075684, + "learning_rate": 4.019262973732969e-06, + "loss": 0.3528, + "step": 28771 + }, + { + "epoch": 0.5759727748167055, + "grad_norm": 1.3754456043243408, + "learning_rate": 4.0189450902332615e-06, + "loss": 0.3214, + "step": 28772 + }, + { + "epoch": 0.5759927933338338, + "grad_norm": 1.0609104633331299, + "learning_rate": 4.0186272108576964e-06, + "loss": 0.268, + "step": 28773 + }, + { + "epoch": 0.5760128118509621, + "grad_norm": 1.2017072439193726, + "learning_rate": 4.018309335607612e-06, + "loss": 0.2849, + "step": 28774 + }, + { + "epoch": 0.5760328303680905, + "grad_norm": 1.0521305799484253, + "learning_rate": 4.017991464484345e-06, + "loss": 0.284, + "step": 28775 + }, + { + "epoch": 0.5760528488852188, + "grad_norm": 1.1853363513946533, + "learning_rate": 4.017673597489229e-06, + "loss": 0.2647, + "step": 28776 + }, + { + "epoch": 0.5760728674023472, + "grad_norm": 1.1028496026992798, + "learning_rate": 4.017355734623601e-06, + "loss": 0.3462, + "step": 28777 + }, + { + "epoch": 0.5760928859194755, + "grad_norm": 1.035089373588562, + "learning_rate": 4.017037875888799e-06, + "loss": 0.308, + "step": 28778 + }, + { + "epoch": 0.5761129044366039, + "grad_norm": 2.076106071472168, + "learning_rate": 4.01672002128616e-06, + "loss": 0.8458, + "step": 28779 + }, + { + "epoch": 0.5761329229537322, + "grad_norm": 1.1699837446212769, + "learning_rate": 4.016402170817017e-06, + "loss": 0.2893, + "step": 28780 + }, + { + "epoch": 0.5761529414708605, + "grad_norm": 1.1171025037765503, + "learning_rate": 4.016084324482706e-06, + "loss": 0.3263, + "step": 28781 + }, + { + "epoch": 0.5761729599879889, + "grad_norm": 1.2012982368469238, + "learning_rate": 4.015766482284567e-06, + "loss": 0.2581, + "step": 28782 + }, + { + "epoch": 0.5761929785051172, + "grad_norm": 1.0969204902648926, + "learning_rate": 4.015448644223934e-06, + "loss": 0.3028, + "step": 28783 + }, + { + "epoch": 0.5762129970222456, + "grad_norm": 1.0106440782546997, + "learning_rate": 4.015130810302142e-06, + "loss": 0.2905, + "step": 28784 + }, + { + "epoch": 0.5762330155393739, + "grad_norm": 1.7050246000289917, + "learning_rate": 4.014812980520529e-06, + "loss": 0.7705, + "step": 28785 + }, + { + "epoch": 0.5762530340565023, + "grad_norm": 1.1651250123977661, + "learning_rate": 4.0144951548804275e-06, + "loss": 0.3049, + "step": 28786 + }, + { + "epoch": 0.5762730525736306, + "grad_norm": 1.3350857496261597, + "learning_rate": 4.014177333383179e-06, + "loss": 0.3317, + "step": 28787 + }, + { + "epoch": 0.576293071090759, + "grad_norm": 1.2455264329910278, + "learning_rate": 4.013859516030117e-06, + "loss": 0.2895, + "step": 28788 + }, + { + "epoch": 0.5763130896078873, + "grad_norm": 1.0630435943603516, + "learning_rate": 4.013541702822576e-06, + "loss": 0.3268, + "step": 28789 + }, + { + "epoch": 0.5763331081250156, + "grad_norm": 1.0523866415023804, + "learning_rate": 4.0132238937618936e-06, + "loss": 0.333, + "step": 28790 + }, + { + "epoch": 0.576353126642144, + "grad_norm": 1.7436598539352417, + "learning_rate": 4.012906088849406e-06, + "loss": 0.7737, + "step": 28791 + }, + { + "epoch": 0.5763731451592723, + "grad_norm": 1.083989143371582, + "learning_rate": 4.012588288086449e-06, + "loss": 0.2633, + "step": 28792 + }, + { + "epoch": 0.5763931636764007, + "grad_norm": 1.0755120515823364, + "learning_rate": 4.012270491474359e-06, + "loss": 0.2942, + "step": 28793 + }, + { + "epoch": 0.576413182193529, + "grad_norm": 1.2252475023269653, + "learning_rate": 4.011952699014471e-06, + "loss": 0.3526, + "step": 28794 + }, + { + "epoch": 0.5764332007106574, + "grad_norm": 1.3871994018554688, + "learning_rate": 4.011634910708118e-06, + "loss": 0.3279, + "step": 28795 + }, + { + "epoch": 0.5764532192277857, + "grad_norm": 1.2237439155578613, + "learning_rate": 4.0113171265566435e-06, + "loss": 0.3542, + "step": 28796 + }, + { + "epoch": 0.576473237744914, + "grad_norm": 1.2985453605651855, + "learning_rate": 4.010999346561376e-06, + "loss": 0.3338, + "step": 28797 + }, + { + "epoch": 0.5764932562620424, + "grad_norm": 1.1082185506820679, + "learning_rate": 4.010681570723657e-06, + "loss": 0.3038, + "step": 28798 + }, + { + "epoch": 0.5765132747791707, + "grad_norm": 1.279288649559021, + "learning_rate": 4.010363799044817e-06, + "loss": 0.2687, + "step": 28799 + }, + { + "epoch": 0.5765332932962991, + "grad_norm": 1.1810659170150757, + "learning_rate": 4.010046031526197e-06, + "loss": 0.3432, + "step": 28800 + }, + { + "epoch": 0.5765533118134274, + "grad_norm": 1.1584874391555786, + "learning_rate": 4.00972826816913e-06, + "loss": 0.3571, + "step": 28801 + }, + { + "epoch": 0.5765733303305558, + "grad_norm": 1.1165688037872314, + "learning_rate": 4.009410508974951e-06, + "loss": 0.3048, + "step": 28802 + }, + { + "epoch": 0.5765933488476841, + "grad_norm": 1.0507752895355225, + "learning_rate": 4.009092753944999e-06, + "loss": 0.3313, + "step": 28803 + }, + { + "epoch": 0.5766133673648125, + "grad_norm": 1.0609548091888428, + "learning_rate": 4.008775003080604e-06, + "loss": 0.2809, + "step": 28804 + }, + { + "epoch": 0.5766333858819408, + "grad_norm": 1.1048094034194946, + "learning_rate": 4.008457256383109e-06, + "loss": 0.3182, + "step": 28805 + }, + { + "epoch": 0.5766534043990691, + "grad_norm": 1.0592390298843384, + "learning_rate": 4.008139513853847e-06, + "loss": 0.3529, + "step": 28806 + }, + { + "epoch": 0.5766734229161975, + "grad_norm": 1.684043049812317, + "learning_rate": 4.007821775494152e-06, + "loss": 0.7142, + "step": 28807 + }, + { + "epoch": 0.5766934414333258, + "grad_norm": 1.0288567543029785, + "learning_rate": 4.007504041305359e-06, + "loss": 0.236, + "step": 28808 + }, + { + "epoch": 0.5767134599504542, + "grad_norm": 1.0959458351135254, + "learning_rate": 4.007186311288806e-06, + "loss": 0.3529, + "step": 28809 + }, + { + "epoch": 0.5767334784675825, + "grad_norm": 1.1189923286437988, + "learning_rate": 4.006868585445829e-06, + "loss": 0.3296, + "step": 28810 + }, + { + "epoch": 0.5767534969847109, + "grad_norm": 1.0955052375793457, + "learning_rate": 4.0065508637777635e-06, + "loss": 0.3151, + "step": 28811 + }, + { + "epoch": 0.5767735155018392, + "grad_norm": 0.9941386580467224, + "learning_rate": 4.006233146285942e-06, + "loss": 0.3013, + "step": 28812 + }, + { + "epoch": 0.5767935340189675, + "grad_norm": 2.132404327392578, + "learning_rate": 4.0059154329717045e-06, + "loss": 0.8241, + "step": 28813 + }, + { + "epoch": 0.5768135525360959, + "grad_norm": 0.9935211539268494, + "learning_rate": 4.005597723836384e-06, + "loss": 0.2925, + "step": 28814 + }, + { + "epoch": 0.5768335710532242, + "grad_norm": 1.101544737815857, + "learning_rate": 4.005280018881317e-06, + "loss": 0.3133, + "step": 28815 + }, + { + "epoch": 0.5768535895703526, + "grad_norm": 1.0720422267913818, + "learning_rate": 4.004962318107839e-06, + "loss": 0.3476, + "step": 28816 + }, + { + "epoch": 0.5768736080874809, + "grad_norm": 1.9692569971084595, + "learning_rate": 4.004644621517283e-06, + "loss": 0.6976, + "step": 28817 + }, + { + "epoch": 0.5768936266046093, + "grad_norm": 1.0802534818649292, + "learning_rate": 4.004326929110989e-06, + "loss": 0.296, + "step": 28818 + }, + { + "epoch": 0.5769136451217376, + "grad_norm": 1.4165407419204712, + "learning_rate": 4.004009240890291e-06, + "loss": 0.2717, + "step": 28819 + }, + { + "epoch": 0.576933663638866, + "grad_norm": 1.8605555295944214, + "learning_rate": 4.003691556856523e-06, + "loss": 0.7684, + "step": 28820 + }, + { + "epoch": 0.5769536821559943, + "grad_norm": 1.1644999980926514, + "learning_rate": 4.003373877011019e-06, + "loss": 0.3354, + "step": 28821 + }, + { + "epoch": 0.5769737006731226, + "grad_norm": 1.2314881086349487, + "learning_rate": 4.003056201355119e-06, + "loss": 0.3329, + "step": 28822 + }, + { + "epoch": 0.576993719190251, + "grad_norm": 1.3684780597686768, + "learning_rate": 4.0027385298901555e-06, + "loss": 0.368, + "step": 28823 + }, + { + "epoch": 0.5770137377073793, + "grad_norm": 1.1951544284820557, + "learning_rate": 4.0024208626174655e-06, + "loss": 0.3354, + "step": 28824 + }, + { + "epoch": 0.5770337562245077, + "grad_norm": 1.1924759149551392, + "learning_rate": 4.002103199538384e-06, + "loss": 0.3327, + "step": 28825 + }, + { + "epoch": 0.577053774741636, + "grad_norm": 1.059478521347046, + "learning_rate": 4.001785540654242e-06, + "loss": 0.3063, + "step": 28826 + }, + { + "epoch": 0.5770737932587644, + "grad_norm": 1.0160191059112549, + "learning_rate": 4.001467885966383e-06, + "loss": 0.3261, + "step": 28827 + }, + { + "epoch": 0.5770938117758927, + "grad_norm": 1.088235855102539, + "learning_rate": 4.001150235476137e-06, + "loss": 0.3132, + "step": 28828 + }, + { + "epoch": 0.577113830293021, + "grad_norm": 1.0606098175048828, + "learning_rate": 4.000832589184841e-06, + "loss": 0.2834, + "step": 28829 + }, + { + "epoch": 0.5771338488101494, + "grad_norm": 1.151750087738037, + "learning_rate": 4.000514947093827e-06, + "loss": 0.3761, + "step": 28830 + }, + { + "epoch": 0.5771538673272777, + "grad_norm": 1.1715953350067139, + "learning_rate": 4.000197309204435e-06, + "loss": 0.3053, + "step": 28831 + }, + { + "epoch": 0.5771738858444061, + "grad_norm": 1.2036129236221313, + "learning_rate": 3.999879675518e-06, + "loss": 0.2985, + "step": 28832 + }, + { + "epoch": 0.5771939043615344, + "grad_norm": 1.8617416620254517, + "learning_rate": 3.9995620460358544e-06, + "loss": 0.7951, + "step": 28833 + }, + { + "epoch": 0.5772139228786628, + "grad_norm": 1.2445695400238037, + "learning_rate": 3.999244420759334e-06, + "loss": 0.309, + "step": 28834 + }, + { + "epoch": 0.5772339413957911, + "grad_norm": 1.3880374431610107, + "learning_rate": 3.998926799689774e-06, + "loss": 0.3277, + "step": 28835 + }, + { + "epoch": 0.5772539599129195, + "grad_norm": 1.1626169681549072, + "learning_rate": 3.9986091828285115e-06, + "loss": 0.3265, + "step": 28836 + }, + { + "epoch": 0.5772739784300478, + "grad_norm": 1.1573572158813477, + "learning_rate": 3.99829157017688e-06, + "loss": 0.3074, + "step": 28837 + }, + { + "epoch": 0.5772939969471761, + "grad_norm": 1.4538047313690186, + "learning_rate": 3.997973961736216e-06, + "loss": 0.3447, + "step": 28838 + }, + { + "epoch": 0.5773140154643045, + "grad_norm": 1.170682668685913, + "learning_rate": 3.997656357507852e-06, + "loss": 0.3397, + "step": 28839 + }, + { + "epoch": 0.5773340339814328, + "grad_norm": 1.0808385610580444, + "learning_rate": 3.997338757493127e-06, + "loss": 0.2935, + "step": 28840 + }, + { + "epoch": 0.5773540524985612, + "grad_norm": 1.195074439048767, + "learning_rate": 3.997021161693373e-06, + "loss": 0.307, + "step": 28841 + }, + { + "epoch": 0.5773740710156895, + "grad_norm": 1.0829246044158936, + "learning_rate": 3.996703570109927e-06, + "loss": 0.3242, + "step": 28842 + }, + { + "epoch": 0.5773940895328179, + "grad_norm": 1.1315172910690308, + "learning_rate": 3.996385982744121e-06, + "loss": 0.3026, + "step": 28843 + }, + { + "epoch": 0.5774141080499462, + "grad_norm": 1.106528639793396, + "learning_rate": 3.996068399597295e-06, + "loss": 0.3365, + "step": 28844 + }, + { + "epoch": 0.5774341265670745, + "grad_norm": 1.0679272413253784, + "learning_rate": 3.995750820670781e-06, + "loss": 0.2918, + "step": 28845 + }, + { + "epoch": 0.5774541450842029, + "grad_norm": 1.0100579261779785, + "learning_rate": 3.995433245965915e-06, + "loss": 0.3085, + "step": 28846 + }, + { + "epoch": 0.5774741636013312, + "grad_norm": 1.1638426780700684, + "learning_rate": 3.99511567548403e-06, + "loss": 0.3311, + "step": 28847 + }, + { + "epoch": 0.5774941821184596, + "grad_norm": 1.1802269220352173, + "learning_rate": 3.994798109226462e-06, + "loss": 0.3125, + "step": 28848 + }, + { + "epoch": 0.5775142006355879, + "grad_norm": 1.9639995098114014, + "learning_rate": 3.994480547194547e-06, + "loss": 0.8138, + "step": 28849 + }, + { + "epoch": 0.5775342191527163, + "grad_norm": 1.236656904220581, + "learning_rate": 3.994162989389621e-06, + "loss": 0.3326, + "step": 28850 + }, + { + "epoch": 0.5775542376698446, + "grad_norm": 1.0684778690338135, + "learning_rate": 3.993845435813016e-06, + "loss": 0.3005, + "step": 28851 + }, + { + "epoch": 0.577574256186973, + "grad_norm": 1.828290581703186, + "learning_rate": 3.993527886466067e-06, + "loss": 0.7393, + "step": 28852 + }, + { + "epoch": 0.5775942747041013, + "grad_norm": 1.0484791994094849, + "learning_rate": 3.993210341350112e-06, + "loss": 0.2875, + "step": 28853 + }, + { + "epoch": 0.5776142932212296, + "grad_norm": 1.1638357639312744, + "learning_rate": 3.992892800466483e-06, + "loss": 0.3018, + "step": 28854 + }, + { + "epoch": 0.577634311738358, + "grad_norm": 1.0609623193740845, + "learning_rate": 3.992575263816517e-06, + "loss": 0.3016, + "step": 28855 + }, + { + "epoch": 0.5776543302554863, + "grad_norm": 1.0739794969558716, + "learning_rate": 3.992257731401548e-06, + "loss": 0.2558, + "step": 28856 + }, + { + "epoch": 0.5776743487726147, + "grad_norm": 1.0830180644989014, + "learning_rate": 3.991940203222908e-06, + "loss": 0.3051, + "step": 28857 + }, + { + "epoch": 0.577694367289743, + "grad_norm": 1.2155627012252808, + "learning_rate": 3.9916226792819364e-06, + "loss": 0.3146, + "step": 28858 + }, + { + "epoch": 0.5777143858068714, + "grad_norm": 1.142971396446228, + "learning_rate": 3.991305159579968e-06, + "loss": 0.3728, + "step": 28859 + }, + { + "epoch": 0.5777344043239997, + "grad_norm": 0.9673069715499878, + "learning_rate": 3.990987644118332e-06, + "loss": 0.2931, + "step": 28860 + }, + { + "epoch": 0.577754422841128, + "grad_norm": 2.02009916305542, + "learning_rate": 3.9906701328983685e-06, + "loss": 0.816, + "step": 28861 + }, + { + "epoch": 0.5777744413582564, + "grad_norm": 1.2034835815429688, + "learning_rate": 3.9903526259214096e-06, + "loss": 0.2842, + "step": 28862 + }, + { + "epoch": 0.5777944598753847, + "grad_norm": 1.0108639001846313, + "learning_rate": 3.990035123188792e-06, + "loss": 0.3026, + "step": 28863 + }, + { + "epoch": 0.5778144783925131, + "grad_norm": 1.0763545036315918, + "learning_rate": 3.98971762470185e-06, + "loss": 0.3155, + "step": 28864 + }, + { + "epoch": 0.5778344969096414, + "grad_norm": 1.1395189762115479, + "learning_rate": 3.9894001304619156e-06, + "loss": 0.3123, + "step": 28865 + }, + { + "epoch": 0.5778545154267698, + "grad_norm": 1.1093566417694092, + "learning_rate": 3.989082640470328e-06, + "loss": 0.2917, + "step": 28866 + }, + { + "epoch": 0.5778745339438981, + "grad_norm": 1.1304514408111572, + "learning_rate": 3.988765154728418e-06, + "loss": 0.3255, + "step": 28867 + }, + { + "epoch": 0.5778945524610265, + "grad_norm": 1.1746597290039062, + "learning_rate": 3.988447673237522e-06, + "loss": 0.3224, + "step": 28868 + }, + { + "epoch": 0.5779145709781548, + "grad_norm": 1.0428086519241333, + "learning_rate": 3.988130195998975e-06, + "loss": 0.2831, + "step": 28869 + }, + { + "epoch": 0.5779345894952831, + "grad_norm": 1.221311330795288, + "learning_rate": 3.987812723014108e-06, + "loss": 0.2921, + "step": 28870 + }, + { + "epoch": 0.5779546080124115, + "grad_norm": 1.288561224937439, + "learning_rate": 3.987495254284262e-06, + "loss": 0.2932, + "step": 28871 + }, + { + "epoch": 0.5779746265295398, + "grad_norm": 1.2012782096862793, + "learning_rate": 3.9871777898107676e-06, + "loss": 0.3127, + "step": 28872 + }, + { + "epoch": 0.5779946450466682, + "grad_norm": 1.1561464071273804, + "learning_rate": 3.986860329594958e-06, + "loss": 0.3177, + "step": 28873 + }, + { + "epoch": 0.5780146635637965, + "grad_norm": 1.1350330114364624, + "learning_rate": 3.98654287363817e-06, + "loss": 0.292, + "step": 28874 + }, + { + "epoch": 0.5780346820809249, + "grad_norm": 1.110275387763977, + "learning_rate": 3.986225421941738e-06, + "loss": 0.2652, + "step": 28875 + }, + { + "epoch": 0.5780547005980532, + "grad_norm": 1.1168123483657837, + "learning_rate": 3.985907974506996e-06, + "loss": 0.3409, + "step": 28876 + }, + { + "epoch": 0.5780747191151815, + "grad_norm": 1.0827122926712036, + "learning_rate": 3.98559053133528e-06, + "loss": 0.348, + "step": 28877 + }, + { + "epoch": 0.5780947376323099, + "grad_norm": 1.1969956159591675, + "learning_rate": 3.985273092427923e-06, + "loss": 0.3051, + "step": 28878 + }, + { + "epoch": 0.5781147561494382, + "grad_norm": 1.8434925079345703, + "learning_rate": 3.984955657786257e-06, + "loss": 0.8529, + "step": 28879 + }, + { + "epoch": 0.5781347746665666, + "grad_norm": 0.8966357111930847, + "learning_rate": 3.9846382274116205e-06, + "loss": 0.2653, + "step": 28880 + }, + { + "epoch": 0.5781547931836949, + "grad_norm": 1.127193808555603, + "learning_rate": 3.9843208013053466e-06, + "loss": 0.3183, + "step": 28881 + }, + { + "epoch": 0.5781748117008233, + "grad_norm": 1.0347247123718262, + "learning_rate": 3.984003379468771e-06, + "loss": 0.3005, + "step": 28882 + }, + { + "epoch": 0.5781948302179516, + "grad_norm": 0.9851395487785339, + "learning_rate": 3.9836859619032224e-06, + "loss": 0.2897, + "step": 28883 + }, + { + "epoch": 0.57821484873508, + "grad_norm": 1.0874228477478027, + "learning_rate": 3.983368548610043e-06, + "loss": 0.3013, + "step": 28884 + }, + { + "epoch": 0.5782348672522083, + "grad_norm": 1.1075363159179688, + "learning_rate": 3.983051139590562e-06, + "loss": 0.3178, + "step": 28885 + }, + { + "epoch": 0.5782548857693366, + "grad_norm": 1.031489372253418, + "learning_rate": 3.982733734846115e-06, + "loss": 0.2761, + "step": 28886 + }, + { + "epoch": 0.578274904286465, + "grad_norm": 1.063414454460144, + "learning_rate": 3.982416334378038e-06, + "loss": 0.3069, + "step": 28887 + }, + { + "epoch": 0.5782949228035933, + "grad_norm": 1.0462980270385742, + "learning_rate": 3.982098938187661e-06, + "loss": 0.2353, + "step": 28888 + }, + { + "epoch": 0.5783149413207217, + "grad_norm": 1.2792309522628784, + "learning_rate": 3.981781546276323e-06, + "loss": 0.3208, + "step": 28889 + }, + { + "epoch": 0.57833495983785, + "grad_norm": 1.0703399181365967, + "learning_rate": 3.981464158645356e-06, + "loss": 0.2921, + "step": 28890 + }, + { + "epoch": 0.5783549783549784, + "grad_norm": 1.0388877391815186, + "learning_rate": 3.9811467752960954e-06, + "loss": 0.3117, + "step": 28891 + }, + { + "epoch": 0.5783749968721067, + "grad_norm": 1.1457602977752686, + "learning_rate": 3.980829396229872e-06, + "loss": 0.3292, + "step": 28892 + }, + { + "epoch": 0.578395015389235, + "grad_norm": 1.0859053134918213, + "learning_rate": 3.980512021448023e-06, + "loss": 0.2777, + "step": 28893 + }, + { + "epoch": 0.5784150339063634, + "grad_norm": 1.2503553628921509, + "learning_rate": 3.980194650951884e-06, + "loss": 0.2956, + "step": 28894 + }, + { + "epoch": 0.5784350524234917, + "grad_norm": 1.9215900897979736, + "learning_rate": 3.979877284742787e-06, + "loss": 0.7642, + "step": 28895 + }, + { + "epoch": 0.5784550709406201, + "grad_norm": 1.1120057106018066, + "learning_rate": 3.979559922822064e-06, + "loss": 0.3335, + "step": 28896 + }, + { + "epoch": 0.5784750894577484, + "grad_norm": 1.1234852075576782, + "learning_rate": 3.9792425651910535e-06, + "loss": 0.3295, + "step": 28897 + }, + { + "epoch": 0.5784951079748768, + "grad_norm": 1.13345205783844, + "learning_rate": 3.978925211851088e-06, + "loss": 0.3132, + "step": 28898 + }, + { + "epoch": 0.5785151264920051, + "grad_norm": 1.1034148931503296, + "learning_rate": 3.9786078628035e-06, + "loss": 0.3491, + "step": 28899 + }, + { + "epoch": 0.5785351450091335, + "grad_norm": 1.1454625129699707, + "learning_rate": 3.978290518049625e-06, + "loss": 0.333, + "step": 28900 + }, + { + "epoch": 0.5785551635262618, + "grad_norm": 1.340835690498352, + "learning_rate": 3.977973177590796e-06, + "loss": 0.3365, + "step": 28901 + }, + { + "epoch": 0.5785751820433901, + "grad_norm": 0.9908438920974731, + "learning_rate": 3.977655841428349e-06, + "loss": 0.3015, + "step": 28902 + }, + { + "epoch": 0.5785952005605185, + "grad_norm": 0.9664313793182373, + "learning_rate": 3.977338509563617e-06, + "loss": 0.2459, + "step": 28903 + }, + { + "epoch": 0.5786152190776468, + "grad_norm": 1.234668493270874, + "learning_rate": 3.977021181997933e-06, + "loss": 0.3165, + "step": 28904 + }, + { + "epoch": 0.5786352375947752, + "grad_norm": 1.0998024940490723, + "learning_rate": 3.976703858732632e-06, + "loss": 0.274, + "step": 28905 + }, + { + "epoch": 0.5786552561119035, + "grad_norm": 1.0154085159301758, + "learning_rate": 3.976386539769047e-06, + "loss": 0.2508, + "step": 28906 + }, + { + "epoch": 0.5786752746290319, + "grad_norm": 1.1297056674957275, + "learning_rate": 3.976069225108514e-06, + "loss": 0.3363, + "step": 28907 + }, + { + "epoch": 0.5786952931461602, + "grad_norm": 1.1234077215194702, + "learning_rate": 3.975751914752366e-06, + "loss": 0.3135, + "step": 28908 + }, + { + "epoch": 0.5787153116632885, + "grad_norm": 1.1474733352661133, + "learning_rate": 3.9754346087019365e-06, + "loss": 0.325, + "step": 28909 + }, + { + "epoch": 0.5787353301804169, + "grad_norm": 1.9466063976287842, + "learning_rate": 3.975117306958557e-06, + "loss": 0.7997, + "step": 28910 + }, + { + "epoch": 0.5787553486975452, + "grad_norm": 1.0718671083450317, + "learning_rate": 3.974800009523566e-06, + "loss": 0.2962, + "step": 28911 + }, + { + "epoch": 0.5787753672146736, + "grad_norm": 1.3462077379226685, + "learning_rate": 3.974482716398294e-06, + "loss": 0.3139, + "step": 28912 + }, + { + "epoch": 0.5787953857318019, + "grad_norm": 1.223680019378662, + "learning_rate": 3.974165427584077e-06, + "loss": 0.2806, + "step": 28913 + }, + { + "epoch": 0.5788154042489303, + "grad_norm": 1.0939441919326782, + "learning_rate": 3.973848143082246e-06, + "loss": 0.3046, + "step": 28914 + }, + { + "epoch": 0.5788354227660586, + "grad_norm": 1.0893796682357788, + "learning_rate": 3.973530862894139e-06, + "loss": 0.3137, + "step": 28915 + }, + { + "epoch": 0.578855441283187, + "grad_norm": 0.9968298077583313, + "learning_rate": 3.973213587021087e-06, + "loss": 0.2647, + "step": 28916 + }, + { + "epoch": 0.5788754598003153, + "grad_norm": 1.1034058332443237, + "learning_rate": 3.972896315464423e-06, + "loss": 0.3381, + "step": 28917 + }, + { + "epoch": 0.5788954783174436, + "grad_norm": 1.1049970388412476, + "learning_rate": 3.972579048225482e-06, + "loss": 0.3343, + "step": 28918 + }, + { + "epoch": 0.578915496834572, + "grad_norm": 1.050907015800476, + "learning_rate": 3.972261785305596e-06, + "loss": 0.2743, + "step": 28919 + }, + { + "epoch": 0.5789355153517003, + "grad_norm": 1.1614336967468262, + "learning_rate": 3.971944526706103e-06, + "loss": 0.2935, + "step": 28920 + }, + { + "epoch": 0.5789555338688287, + "grad_norm": 1.2348166704177856, + "learning_rate": 3.971627272428334e-06, + "loss": 0.336, + "step": 28921 + }, + { + "epoch": 0.578975552385957, + "grad_norm": 1.098332405090332, + "learning_rate": 3.9713100224736216e-06, + "loss": 0.2879, + "step": 28922 + }, + { + "epoch": 0.5789955709030854, + "grad_norm": 1.2324795722961426, + "learning_rate": 3.970992776843299e-06, + "loss": 0.3336, + "step": 28923 + }, + { + "epoch": 0.5790155894202137, + "grad_norm": 1.2036441564559937, + "learning_rate": 3.970675535538704e-06, + "loss": 0.3459, + "step": 28924 + }, + { + "epoch": 0.579035607937342, + "grad_norm": 1.2740856409072876, + "learning_rate": 3.9703582985611655e-06, + "loss": 0.3091, + "step": 28925 + }, + { + "epoch": 0.5790556264544704, + "grad_norm": 1.092058777809143, + "learning_rate": 3.970041065912021e-06, + "loss": 0.2643, + "step": 28926 + }, + { + "epoch": 0.5790756449715987, + "grad_norm": 1.892526626586914, + "learning_rate": 3.9697238375926e-06, + "loss": 0.751, + "step": 28927 + }, + { + "epoch": 0.5790956634887271, + "grad_norm": 1.1307425498962402, + "learning_rate": 3.96940661360424e-06, + "loss": 0.2796, + "step": 28928 + }, + { + "epoch": 0.5791156820058554, + "grad_norm": 1.1940059661865234, + "learning_rate": 3.969089393948273e-06, + "loss": 0.3265, + "step": 28929 + }, + { + "epoch": 0.5791357005229838, + "grad_norm": 1.352052927017212, + "learning_rate": 3.968772178626032e-06, + "loss": 0.3611, + "step": 28930 + }, + { + "epoch": 0.5791557190401121, + "grad_norm": 1.2300032377243042, + "learning_rate": 3.96845496763885e-06, + "loss": 0.2961, + "step": 28931 + }, + { + "epoch": 0.5791757375572405, + "grad_norm": 1.2121533155441284, + "learning_rate": 3.9681377609880605e-06, + "loss": 0.309, + "step": 28932 + }, + { + "epoch": 0.5791957560743688, + "grad_norm": 1.1318039894104004, + "learning_rate": 3.9678205586749995e-06, + "loss": 0.2877, + "step": 28933 + }, + { + "epoch": 0.5792157745914971, + "grad_norm": 1.887984275817871, + "learning_rate": 3.967503360700999e-06, + "loss": 0.8509, + "step": 28934 + }, + { + "epoch": 0.5792357931086255, + "grad_norm": 1.0215163230895996, + "learning_rate": 3.967186167067393e-06, + "loss": 0.3161, + "step": 28935 + }, + { + "epoch": 0.5792558116257538, + "grad_norm": 1.2676833868026733, + "learning_rate": 3.96686897777551e-06, + "loss": 0.2929, + "step": 28936 + }, + { + "epoch": 0.5792758301428822, + "grad_norm": 1.8822782039642334, + "learning_rate": 3.966551792826691e-06, + "loss": 0.7769, + "step": 28937 + }, + { + "epoch": 0.5792958486600105, + "grad_norm": 1.111647605895996, + "learning_rate": 3.966234612222265e-06, + "loss": 0.3401, + "step": 28938 + }, + { + "epoch": 0.5793158671771389, + "grad_norm": 1.0312817096710205, + "learning_rate": 3.965917435963566e-06, + "loss": 0.3286, + "step": 28939 + }, + { + "epoch": 0.5793358856942672, + "grad_norm": 1.1931654214859009, + "learning_rate": 3.965600264051929e-06, + "loss": 0.3247, + "step": 28940 + }, + { + "epoch": 0.5793559042113955, + "grad_norm": 1.136896014213562, + "learning_rate": 3.9652830964886834e-06, + "loss": 0.3668, + "step": 28941 + }, + { + "epoch": 0.5793759227285239, + "grad_norm": 1.1847233772277832, + "learning_rate": 3.964965933275166e-06, + "loss": 0.3044, + "step": 28942 + }, + { + "epoch": 0.5793959412456522, + "grad_norm": 1.2878626585006714, + "learning_rate": 3.96464877441271e-06, + "loss": 0.2931, + "step": 28943 + }, + { + "epoch": 0.5794159597627806, + "grad_norm": 1.0751755237579346, + "learning_rate": 3.964331619902647e-06, + "loss": 0.2406, + "step": 28944 + }, + { + "epoch": 0.5794359782799089, + "grad_norm": 1.1347895860671997, + "learning_rate": 3.96401446974631e-06, + "loss": 0.2921, + "step": 28945 + }, + { + "epoch": 0.5794559967970373, + "grad_norm": 1.0771733522415161, + "learning_rate": 3.963697323945035e-06, + "loss": 0.3007, + "step": 28946 + }, + { + "epoch": 0.5794760153141656, + "grad_norm": 1.1277484893798828, + "learning_rate": 3.963380182500153e-06, + "loss": 0.3087, + "step": 28947 + }, + { + "epoch": 0.579496033831294, + "grad_norm": 1.2421159744262695, + "learning_rate": 3.963063045412998e-06, + "loss": 0.3333, + "step": 28948 + }, + { + "epoch": 0.5795160523484223, + "grad_norm": 1.0231250524520874, + "learning_rate": 3.962745912684901e-06, + "loss": 0.3052, + "step": 28949 + }, + { + "epoch": 0.5795360708655506, + "grad_norm": 1.1179343461990356, + "learning_rate": 3.962428784317198e-06, + "loss": 0.2438, + "step": 28950 + }, + { + "epoch": 0.579556089382679, + "grad_norm": 1.0668559074401855, + "learning_rate": 3.962111660311221e-06, + "loss": 0.2771, + "step": 28951 + }, + { + "epoch": 0.5795761078998073, + "grad_norm": 1.2823127508163452, + "learning_rate": 3.961794540668305e-06, + "loss": 0.3548, + "step": 28952 + }, + { + "epoch": 0.5795961264169357, + "grad_norm": 1.1529672145843506, + "learning_rate": 3.96147742538978e-06, + "loss": 0.3077, + "step": 28953 + }, + { + "epoch": 0.579616144934064, + "grad_norm": 1.063064694404602, + "learning_rate": 3.961160314476979e-06, + "loss": 0.3407, + "step": 28954 + }, + { + "epoch": 0.5796361634511924, + "grad_norm": 1.2868825197219849, + "learning_rate": 3.960843207931237e-06, + "loss": 0.3487, + "step": 28955 + }, + { + "epoch": 0.5796561819683207, + "grad_norm": 1.098684549331665, + "learning_rate": 3.960526105753889e-06, + "loss": 0.3306, + "step": 28956 + }, + { + "epoch": 0.579676200485449, + "grad_norm": 1.1213499307632446, + "learning_rate": 3.960209007946263e-06, + "loss": 0.292, + "step": 28957 + }, + { + "epoch": 0.5796962190025774, + "grad_norm": 1.8786107301712036, + "learning_rate": 3.959891914509694e-06, + "loss": 0.8102, + "step": 28958 + }, + { + "epoch": 0.5797162375197057, + "grad_norm": 1.0978795289993286, + "learning_rate": 3.959574825445517e-06, + "loss": 0.2852, + "step": 28959 + }, + { + "epoch": 0.5797362560368341, + "grad_norm": 1.174000859260559, + "learning_rate": 3.959257740755064e-06, + "loss": 0.2697, + "step": 28960 + }, + { + "epoch": 0.5797562745539624, + "grad_norm": 1.2411878108978271, + "learning_rate": 3.958940660439668e-06, + "loss": 0.3109, + "step": 28961 + }, + { + "epoch": 0.5797762930710908, + "grad_norm": 1.111951470375061, + "learning_rate": 3.95862358450066e-06, + "loss": 0.3219, + "step": 28962 + }, + { + "epoch": 0.5797963115882191, + "grad_norm": 1.1891255378723145, + "learning_rate": 3.958306512939374e-06, + "loss": 0.3087, + "step": 28963 + }, + { + "epoch": 0.5798163301053475, + "grad_norm": 1.2479227781295776, + "learning_rate": 3.957989445757143e-06, + "loss": 0.2925, + "step": 28964 + }, + { + "epoch": 0.5798363486224758, + "grad_norm": 0.9769840836524963, + "learning_rate": 3.957672382955302e-06, + "loss": 0.2602, + "step": 28965 + }, + { + "epoch": 0.5798563671396041, + "grad_norm": 1.0768437385559082, + "learning_rate": 3.957355324535181e-06, + "loss": 0.3337, + "step": 28966 + }, + { + "epoch": 0.5798763856567325, + "grad_norm": 1.0546306371688843, + "learning_rate": 3.957038270498113e-06, + "loss": 0.304, + "step": 28967 + }, + { + "epoch": 0.5798964041738608, + "grad_norm": 1.1270877122879028, + "learning_rate": 3.956721220845432e-06, + "loss": 0.3546, + "step": 28968 + }, + { + "epoch": 0.5799164226909892, + "grad_norm": 1.0173171758651733, + "learning_rate": 3.956404175578472e-06, + "loss": 0.3069, + "step": 28969 + }, + { + "epoch": 0.5799364412081175, + "grad_norm": 1.0914684534072876, + "learning_rate": 3.956087134698563e-06, + "loss": 0.2737, + "step": 28970 + }, + { + "epoch": 0.5799564597252459, + "grad_norm": 1.8991931676864624, + "learning_rate": 3.95577009820704e-06, + "loss": 0.753, + "step": 28971 + }, + { + "epoch": 0.5799764782423742, + "grad_norm": 0.9709622263908386, + "learning_rate": 3.955453066105232e-06, + "loss": 0.2554, + "step": 28972 + }, + { + "epoch": 0.5799964967595025, + "grad_norm": 1.0742243528366089, + "learning_rate": 3.955136038394478e-06, + "loss": 0.316, + "step": 28973 + }, + { + "epoch": 0.5800165152766309, + "grad_norm": 1.1041550636291504, + "learning_rate": 3.954819015076107e-06, + "loss": 0.3064, + "step": 28974 + }, + { + "epoch": 0.5800365337937592, + "grad_norm": 1.1687934398651123, + "learning_rate": 3.9545019961514495e-06, + "loss": 0.2808, + "step": 28975 + }, + { + "epoch": 0.5800565523108876, + "grad_norm": 1.113093376159668, + "learning_rate": 3.9541849816218414e-06, + "loss": 0.3419, + "step": 28976 + }, + { + "epoch": 0.5800765708280159, + "grad_norm": 1.2899972200393677, + "learning_rate": 3.953867971488614e-06, + "loss": 0.2684, + "step": 28977 + }, + { + "epoch": 0.5800965893451443, + "grad_norm": 1.167120337486267, + "learning_rate": 3.9535509657531025e-06, + "loss": 0.3151, + "step": 28978 + }, + { + "epoch": 0.5801166078622726, + "grad_norm": 1.1855857372283936, + "learning_rate": 3.953233964416637e-06, + "loss": 0.2998, + "step": 28979 + }, + { + "epoch": 0.580136626379401, + "grad_norm": 1.120108723640442, + "learning_rate": 3.9529169674805516e-06, + "loss": 0.3146, + "step": 28980 + }, + { + "epoch": 0.5801566448965293, + "grad_norm": 1.0998132228851318, + "learning_rate": 3.952599974946175e-06, + "loss": 0.2556, + "step": 28981 + }, + { + "epoch": 0.5801766634136576, + "grad_norm": 1.0819227695465088, + "learning_rate": 3.952282986814845e-06, + "loss": 0.2861, + "step": 28982 + }, + { + "epoch": 0.580196681930786, + "grad_norm": 1.1288025379180908, + "learning_rate": 3.951966003087891e-06, + "loss": 0.3571, + "step": 28983 + }, + { + "epoch": 0.5802167004479143, + "grad_norm": 1.133277416229248, + "learning_rate": 3.951649023766646e-06, + "loss": 0.2813, + "step": 28984 + }, + { + "epoch": 0.5802367189650427, + "grad_norm": 1.1576391458511353, + "learning_rate": 3.951332048852442e-06, + "loss": 0.3498, + "step": 28985 + }, + { + "epoch": 0.580256737482171, + "grad_norm": 1.2926925420761108, + "learning_rate": 3.951015078346615e-06, + "loss": 0.3079, + "step": 28986 + }, + { + "epoch": 0.5802767559992994, + "grad_norm": 1.185869812965393, + "learning_rate": 3.950698112250494e-06, + "loss": 0.3416, + "step": 28987 + }, + { + "epoch": 0.5802967745164277, + "grad_norm": 1.0690181255340576, + "learning_rate": 3.950381150565411e-06, + "loss": 0.3, + "step": 28988 + }, + { + "epoch": 0.580316793033556, + "grad_norm": 1.230328917503357, + "learning_rate": 3.950064193292701e-06, + "loss": 0.3536, + "step": 28989 + }, + { + "epoch": 0.5803368115506844, + "grad_norm": 1.1448664665222168, + "learning_rate": 3.949747240433694e-06, + "loss": 0.3022, + "step": 28990 + }, + { + "epoch": 0.5803568300678127, + "grad_norm": 1.1797919273376465, + "learning_rate": 3.9494302919897255e-06, + "loss": 0.3303, + "step": 28991 + }, + { + "epoch": 0.5803768485849411, + "grad_norm": 1.0563390254974365, + "learning_rate": 3.9491133479621255e-06, + "loss": 0.2883, + "step": 28992 + }, + { + "epoch": 0.5803968671020694, + "grad_norm": 1.0553290843963623, + "learning_rate": 3.9487964083522275e-06, + "loss": 0.3034, + "step": 28993 + }, + { + "epoch": 0.5804168856191978, + "grad_norm": 1.0723729133605957, + "learning_rate": 3.9484794731613606e-06, + "loss": 0.2788, + "step": 28994 + }, + { + "epoch": 0.5804369041363261, + "grad_norm": 0.9542825222015381, + "learning_rate": 3.948162542390861e-06, + "loss": 0.2625, + "step": 28995 + }, + { + "epoch": 0.5804569226534545, + "grad_norm": 1.247365951538086, + "learning_rate": 3.9478456160420594e-06, + "loss": 0.2966, + "step": 28996 + }, + { + "epoch": 0.5804769411705828, + "grad_norm": 1.131675362586975, + "learning_rate": 3.947528694116289e-06, + "loss": 0.2998, + "step": 28997 + }, + { + "epoch": 0.5804969596877111, + "grad_norm": 1.3024169206619263, + "learning_rate": 3.94721177661488e-06, + "loss": 0.3298, + "step": 28998 + }, + { + "epoch": 0.5805169782048395, + "grad_norm": 1.1408427953720093, + "learning_rate": 3.946894863539169e-06, + "loss": 0.2993, + "step": 28999 + }, + { + "epoch": 0.5805369967219678, + "grad_norm": 1.850277304649353, + "learning_rate": 3.946577954890484e-06, + "loss": 0.8266, + "step": 29000 + }, + { + "epoch": 0.5805570152390962, + "grad_norm": 1.1067571640014648, + "learning_rate": 3.946261050670158e-06, + "loss": 0.2959, + "step": 29001 + }, + { + "epoch": 0.5805770337562245, + "grad_norm": 2.0009310245513916, + "learning_rate": 3.945944150879525e-06, + "loss": 0.812, + "step": 29002 + }, + { + "epoch": 0.5805970522733529, + "grad_norm": 1.0632755756378174, + "learning_rate": 3.945627255519913e-06, + "loss": 0.2739, + "step": 29003 + }, + { + "epoch": 0.5806170707904812, + "grad_norm": 1.060248851776123, + "learning_rate": 3.94531036459266e-06, + "loss": 0.3205, + "step": 29004 + }, + { + "epoch": 0.5806370893076095, + "grad_norm": 1.132313847541809, + "learning_rate": 3.944993478099095e-06, + "loss": 0.3092, + "step": 29005 + }, + { + "epoch": 0.5806571078247379, + "grad_norm": 1.1068744659423828, + "learning_rate": 3.944676596040551e-06, + "loss": 0.2963, + "step": 29006 + }, + { + "epoch": 0.5806771263418662, + "grad_norm": 1.170745611190796, + "learning_rate": 3.944359718418356e-06, + "loss": 0.2616, + "step": 29007 + }, + { + "epoch": 0.5806971448589946, + "grad_norm": 1.120551347732544, + "learning_rate": 3.944042845233849e-06, + "loss": 0.3156, + "step": 29008 + }, + { + "epoch": 0.5807171633761229, + "grad_norm": 1.9908298254013062, + "learning_rate": 3.943725976488357e-06, + "loss": 0.7574, + "step": 29009 + }, + { + "epoch": 0.5807371818932513, + "grad_norm": 1.1358903646469116, + "learning_rate": 3.943409112183215e-06, + "loss": 0.2955, + "step": 29010 + }, + { + "epoch": 0.5807572004103796, + "grad_norm": 1.2762075662612915, + "learning_rate": 3.943092252319751e-06, + "loss": 0.2876, + "step": 29011 + }, + { + "epoch": 0.580777218927508, + "grad_norm": 1.1626896858215332, + "learning_rate": 3.942775396899302e-06, + "loss": 0.2858, + "step": 29012 + }, + { + "epoch": 0.5807972374446363, + "grad_norm": 1.1897953748703003, + "learning_rate": 3.942458545923199e-06, + "loss": 0.3339, + "step": 29013 + }, + { + "epoch": 0.5808172559617646, + "grad_norm": 1.1108242273330688, + "learning_rate": 3.9421416993927705e-06, + "loss": 0.3258, + "step": 29014 + }, + { + "epoch": 0.580837274478893, + "grad_norm": 1.0704740285873413, + "learning_rate": 3.941824857309352e-06, + "loss": 0.3206, + "step": 29015 + }, + { + "epoch": 0.5808572929960213, + "grad_norm": 0.9379902482032776, + "learning_rate": 3.941508019674271e-06, + "loss": 0.2409, + "step": 29016 + }, + { + "epoch": 0.5808773115131497, + "grad_norm": 1.1655867099761963, + "learning_rate": 3.9411911864888656e-06, + "loss": 0.299, + "step": 29017 + }, + { + "epoch": 0.580897330030278, + "grad_norm": 1.1619206666946411, + "learning_rate": 3.940874357754465e-06, + "loss": 0.3179, + "step": 29018 + }, + { + "epoch": 0.5809173485474064, + "grad_norm": 1.2052556276321411, + "learning_rate": 3.9405575334724e-06, + "loss": 0.3041, + "step": 29019 + }, + { + "epoch": 0.5809373670645347, + "grad_norm": 1.1821635961532593, + "learning_rate": 3.940240713644001e-06, + "loss": 0.2752, + "step": 29020 + }, + { + "epoch": 0.580957385581663, + "grad_norm": 1.274563193321228, + "learning_rate": 3.9399238982706036e-06, + "loss": 0.3334, + "step": 29021 + }, + { + "epoch": 0.5809774040987914, + "grad_norm": 1.0151017904281616, + "learning_rate": 3.9396070873535375e-06, + "loss": 0.3188, + "step": 29022 + }, + { + "epoch": 0.5809974226159197, + "grad_norm": 1.0406697988510132, + "learning_rate": 3.9392902808941365e-06, + "loss": 0.2841, + "step": 29023 + }, + { + "epoch": 0.5810174411330481, + "grad_norm": 1.0364280939102173, + "learning_rate": 3.93897347889373e-06, + "loss": 0.3156, + "step": 29024 + }, + { + "epoch": 0.5810374596501764, + "grad_norm": 1.0946599245071411, + "learning_rate": 3.938656681353649e-06, + "loss": 0.2984, + "step": 29025 + }, + { + "epoch": 0.5810574781673048, + "grad_norm": 1.2246426343917847, + "learning_rate": 3.938339888275229e-06, + "loss": 0.3284, + "step": 29026 + }, + { + "epoch": 0.5810774966844331, + "grad_norm": 1.046683669090271, + "learning_rate": 3.938023099659799e-06, + "loss": 0.2842, + "step": 29027 + }, + { + "epoch": 0.5810975152015615, + "grad_norm": 1.2161095142364502, + "learning_rate": 3.937706315508692e-06, + "loss": 0.3182, + "step": 29028 + }, + { + "epoch": 0.5811175337186898, + "grad_norm": 1.0947144031524658, + "learning_rate": 3.937389535823237e-06, + "loss": 0.3308, + "step": 29029 + }, + { + "epoch": 0.5811375522358181, + "grad_norm": 2.1238882541656494, + "learning_rate": 3.937072760604769e-06, + "loss": 0.8224, + "step": 29030 + }, + { + "epoch": 0.5811575707529465, + "grad_norm": 1.179427146911621, + "learning_rate": 3.9367559898546195e-06, + "loss": 0.3303, + "step": 29031 + }, + { + "epoch": 0.5811775892700748, + "grad_norm": 1.1293038129806519, + "learning_rate": 3.936439223574119e-06, + "loss": 0.391, + "step": 29032 + }, + { + "epoch": 0.5811976077872032, + "grad_norm": 1.0615893602371216, + "learning_rate": 3.9361224617645975e-06, + "loss": 0.3101, + "step": 29033 + }, + { + "epoch": 0.5812176263043315, + "grad_norm": 1.1584103107452393, + "learning_rate": 3.935805704427388e-06, + "loss": 0.3342, + "step": 29034 + }, + { + "epoch": 0.5812376448214599, + "grad_norm": 1.03008234500885, + "learning_rate": 3.9354889515638225e-06, + "loss": 0.3259, + "step": 29035 + }, + { + "epoch": 0.5812576633385882, + "grad_norm": 1.8532665967941284, + "learning_rate": 3.935172203175234e-06, + "loss": 0.7842, + "step": 29036 + }, + { + "epoch": 0.5812776818557165, + "grad_norm": 1.119112253189087, + "learning_rate": 3.934855459262953e-06, + "loss": 0.3128, + "step": 29037 + }, + { + "epoch": 0.5812977003728449, + "grad_norm": 0.9952954649925232, + "learning_rate": 3.934538719828307e-06, + "loss": 0.3059, + "step": 29038 + }, + { + "epoch": 0.5813177188899732, + "grad_norm": 1.0723748207092285, + "learning_rate": 3.934221984872633e-06, + "loss": 0.3081, + "step": 29039 + }, + { + "epoch": 0.5813377374071016, + "grad_norm": 1.0755536556243896, + "learning_rate": 3.93390525439726e-06, + "loss": 0.3242, + "step": 29040 + }, + { + "epoch": 0.5813577559242299, + "grad_norm": 1.215332269668579, + "learning_rate": 3.93358852840352e-06, + "loss": 0.4012, + "step": 29041 + }, + { + "epoch": 0.5813777744413583, + "grad_norm": 1.0187952518463135, + "learning_rate": 3.933271806892744e-06, + "loss": 0.2887, + "step": 29042 + }, + { + "epoch": 0.5813977929584866, + "grad_norm": 1.922379493713379, + "learning_rate": 3.932955089866264e-06, + "loss": 0.7279, + "step": 29043 + }, + { + "epoch": 0.581417811475615, + "grad_norm": 1.2320979833602905, + "learning_rate": 3.932638377325412e-06, + "loss": 0.3591, + "step": 29044 + }, + { + "epoch": 0.5814378299927433, + "grad_norm": 1.0727158784866333, + "learning_rate": 3.932321669271519e-06, + "loss": 0.3071, + "step": 29045 + }, + { + "epoch": 0.5814578485098716, + "grad_norm": 1.1292544603347778, + "learning_rate": 3.932004965705915e-06, + "loss": 0.376, + "step": 29046 + }, + { + "epoch": 0.581477867027, + "grad_norm": 1.1225318908691406, + "learning_rate": 3.931688266629931e-06, + "loss": 0.3025, + "step": 29047 + }, + { + "epoch": 0.5814978855441283, + "grad_norm": 1.2137620449066162, + "learning_rate": 3.9313715720449e-06, + "loss": 0.3484, + "step": 29048 + }, + { + "epoch": 0.5815179040612567, + "grad_norm": 1.1748054027557373, + "learning_rate": 3.931054881952155e-06, + "loss": 0.3425, + "step": 29049 + }, + { + "epoch": 0.581537922578385, + "grad_norm": 1.0870857238769531, + "learning_rate": 3.930738196353024e-06, + "loss": 0.3014, + "step": 29050 + }, + { + "epoch": 0.5815579410955134, + "grad_norm": 1.113211989402771, + "learning_rate": 3.930421515248839e-06, + "loss": 0.381, + "step": 29051 + }, + { + "epoch": 0.5815779596126417, + "grad_norm": 0.9801912903785706, + "learning_rate": 3.930104838640933e-06, + "loss": 0.2955, + "step": 29052 + }, + { + "epoch": 0.58159797812977, + "grad_norm": 1.0784461498260498, + "learning_rate": 3.929788166530634e-06, + "loss": 0.305, + "step": 29053 + }, + { + "epoch": 0.5816179966468984, + "grad_norm": 1.1293622255325317, + "learning_rate": 3.929471498919277e-06, + "loss": 0.3092, + "step": 29054 + }, + { + "epoch": 0.5816380151640267, + "grad_norm": 1.1151443719863892, + "learning_rate": 3.929154835808192e-06, + "loss": 0.3214, + "step": 29055 + }, + { + "epoch": 0.5816580336811551, + "grad_norm": 1.1179858446121216, + "learning_rate": 3.928838177198707e-06, + "loss": 0.3197, + "step": 29056 + }, + { + "epoch": 0.5816780521982834, + "grad_norm": 1.9120771884918213, + "learning_rate": 3.928521523092157e-06, + "loss": 0.74, + "step": 29057 + }, + { + "epoch": 0.5816980707154118, + "grad_norm": 1.05875825881958, + "learning_rate": 3.928204873489874e-06, + "loss": 0.2987, + "step": 29058 + }, + { + "epoch": 0.5817180892325401, + "grad_norm": 1.1866724491119385, + "learning_rate": 3.927888228393184e-06, + "loss": 0.2916, + "step": 29059 + }, + { + "epoch": 0.5817381077496685, + "grad_norm": 1.8918354511260986, + "learning_rate": 3.9275715878034224e-06, + "loss": 0.7344, + "step": 29060 + }, + { + "epoch": 0.5817581262667968, + "grad_norm": 1.031581997871399, + "learning_rate": 3.927254951721919e-06, + "loss": 0.3015, + "step": 29061 + }, + { + "epoch": 0.5817781447839251, + "grad_norm": 1.0696494579315186, + "learning_rate": 3.926938320150006e-06, + "loss": 0.3553, + "step": 29062 + }, + { + "epoch": 0.5817981633010535, + "grad_norm": 1.1462396383285522, + "learning_rate": 3.926621693089013e-06, + "loss": 0.3726, + "step": 29063 + }, + { + "epoch": 0.5818181818181818, + "grad_norm": 1.1612533330917358, + "learning_rate": 3.9263050705402715e-06, + "loss": 0.3116, + "step": 29064 + }, + { + "epoch": 0.5818382003353102, + "grad_norm": 1.0979269742965698, + "learning_rate": 3.92598845250511e-06, + "loss": 0.2852, + "step": 29065 + }, + { + "epoch": 0.5818582188524385, + "grad_norm": 1.128743052482605, + "learning_rate": 3.925671838984863e-06, + "loss": 0.3205, + "step": 29066 + }, + { + "epoch": 0.5818782373695669, + "grad_norm": 1.2756112813949585, + "learning_rate": 3.9253552299808614e-06, + "loss": 0.2904, + "step": 29067 + }, + { + "epoch": 0.5818982558866952, + "grad_norm": 1.0655078887939453, + "learning_rate": 3.925038625494436e-06, + "loss": 0.2638, + "step": 29068 + }, + { + "epoch": 0.5819182744038235, + "grad_norm": 1.1147680282592773, + "learning_rate": 3.924722025526914e-06, + "loss": 0.2772, + "step": 29069 + }, + { + "epoch": 0.5819382929209519, + "grad_norm": 1.1399444341659546, + "learning_rate": 3.924405430079632e-06, + "loss": 0.2793, + "step": 29070 + }, + { + "epoch": 0.5819583114380802, + "grad_norm": 1.9181634187698364, + "learning_rate": 3.924088839153917e-06, + "loss": 0.7526, + "step": 29071 + }, + { + "epoch": 0.5819783299552086, + "grad_norm": 1.091425895690918, + "learning_rate": 3.9237722527511015e-06, + "loss": 0.2948, + "step": 29072 + }, + { + "epoch": 0.5819983484723369, + "grad_norm": 1.0600303411483765, + "learning_rate": 3.923455670872514e-06, + "loss": 0.2743, + "step": 29073 + }, + { + "epoch": 0.5820183669894653, + "grad_norm": 1.988553762435913, + "learning_rate": 3.923139093519489e-06, + "loss": 0.7533, + "step": 29074 + }, + { + "epoch": 0.5820383855065936, + "grad_norm": 1.9555097818374634, + "learning_rate": 3.922822520693355e-06, + "loss": 0.7795, + "step": 29075 + }, + { + "epoch": 0.582058404023722, + "grad_norm": 1.103608250617981, + "learning_rate": 3.922505952395444e-06, + "loss": 0.3355, + "step": 29076 + }, + { + "epoch": 0.5820784225408503, + "grad_norm": 1.09513521194458, + "learning_rate": 3.922189388627086e-06, + "loss": 0.3437, + "step": 29077 + }, + { + "epoch": 0.5820984410579786, + "grad_norm": 1.0008858442306519, + "learning_rate": 3.92187282938961e-06, + "loss": 0.3118, + "step": 29078 + }, + { + "epoch": 0.582118459575107, + "grad_norm": 1.1207072734832764, + "learning_rate": 3.9215562746843495e-06, + "loss": 0.3379, + "step": 29079 + }, + { + "epoch": 0.5821384780922353, + "grad_norm": 1.10090172290802, + "learning_rate": 3.921239724512636e-06, + "loss": 0.3054, + "step": 29080 + }, + { + "epoch": 0.5821584966093637, + "grad_norm": 1.1531997919082642, + "learning_rate": 3.920923178875797e-06, + "loss": 0.32, + "step": 29081 + }, + { + "epoch": 0.582178515126492, + "grad_norm": 1.2208396196365356, + "learning_rate": 3.9206066377751635e-06, + "loss": 0.3728, + "step": 29082 + }, + { + "epoch": 0.5821985336436204, + "grad_norm": 1.9080802202224731, + "learning_rate": 3.92029010121207e-06, + "loss": 0.7144, + "step": 29083 + }, + { + "epoch": 0.5822185521607487, + "grad_norm": 1.0567409992218018, + "learning_rate": 3.919973569187844e-06, + "loss": 0.2995, + "step": 29084 + }, + { + "epoch": 0.582238570677877, + "grad_norm": 1.074111819267273, + "learning_rate": 3.919657041703816e-06, + "loss": 0.318, + "step": 29085 + }, + { + "epoch": 0.5822585891950054, + "grad_norm": 1.2488503456115723, + "learning_rate": 3.919340518761318e-06, + "loss": 0.2856, + "step": 29086 + }, + { + "epoch": 0.5822786077121337, + "grad_norm": 1.185615062713623, + "learning_rate": 3.919024000361678e-06, + "loss": 0.2877, + "step": 29087 + }, + { + "epoch": 0.5822986262292621, + "grad_norm": 1.0788577795028687, + "learning_rate": 3.918707486506231e-06, + "loss": 0.3042, + "step": 29088 + }, + { + "epoch": 0.5823186447463904, + "grad_norm": 1.7574161291122437, + "learning_rate": 3.918390977196304e-06, + "loss": 0.8489, + "step": 29089 + }, + { + "epoch": 0.5823386632635188, + "grad_norm": 1.4685873985290527, + "learning_rate": 3.918074472433229e-06, + "loss": 0.3174, + "step": 29090 + }, + { + "epoch": 0.5823586817806471, + "grad_norm": 1.0346903800964355, + "learning_rate": 3.917757972218334e-06, + "loss": 0.3228, + "step": 29091 + }, + { + "epoch": 0.5823787002977754, + "grad_norm": 1.1913450956344604, + "learning_rate": 3.9174414765529525e-06, + "loss": 0.3313, + "step": 29092 + }, + { + "epoch": 0.5823987188149038, + "grad_norm": 1.1923800706863403, + "learning_rate": 3.917124985438415e-06, + "loss": 0.2814, + "step": 29093 + }, + { + "epoch": 0.5824187373320321, + "grad_norm": 1.1997240781784058, + "learning_rate": 3.916808498876052e-06, + "loss": 0.3687, + "step": 29094 + }, + { + "epoch": 0.5824387558491605, + "grad_norm": 1.0346814393997192, + "learning_rate": 3.916492016867192e-06, + "loss": 0.3197, + "step": 29095 + }, + { + "epoch": 0.5824587743662888, + "grad_norm": 1.3656587600708008, + "learning_rate": 3.916175539413165e-06, + "loss": 0.3035, + "step": 29096 + }, + { + "epoch": 0.5824787928834172, + "grad_norm": 1.081831455230713, + "learning_rate": 3.915859066515305e-06, + "loss": 0.2873, + "step": 29097 + }, + { + "epoch": 0.5824988114005455, + "grad_norm": 1.7008494138717651, + "learning_rate": 3.9155425981749375e-06, + "loss": 0.8163, + "step": 29098 + }, + { + "epoch": 0.5825188299176739, + "grad_norm": 1.059037685394287, + "learning_rate": 3.915226134393398e-06, + "loss": 0.2816, + "step": 29099 + }, + { + "epoch": 0.5825388484348022, + "grad_norm": 1.1669121980667114, + "learning_rate": 3.914909675172012e-06, + "loss": 0.343, + "step": 29100 + }, + { + "epoch": 0.5825588669519305, + "grad_norm": 1.1097124814987183, + "learning_rate": 3.9145932205121146e-06, + "loss": 0.3364, + "step": 29101 + }, + { + "epoch": 0.5825788854690589, + "grad_norm": 1.1145201921463013, + "learning_rate": 3.914276770415033e-06, + "loss": 0.3072, + "step": 29102 + }, + { + "epoch": 0.5825989039861872, + "grad_norm": 1.8215011358261108, + "learning_rate": 3.913960324882099e-06, + "loss": 0.7989, + "step": 29103 + }, + { + "epoch": 0.5826189225033156, + "grad_norm": 1.0975148677825928, + "learning_rate": 3.91364388391464e-06, + "loss": 0.3136, + "step": 29104 + }, + { + "epoch": 0.5826389410204439, + "grad_norm": 1.227327823638916, + "learning_rate": 3.9133274475139885e-06, + "loss": 0.3156, + "step": 29105 + }, + { + "epoch": 0.5826589595375723, + "grad_norm": 1.141147494316101, + "learning_rate": 3.9130110156814764e-06, + "loss": 0.3579, + "step": 29106 + }, + { + "epoch": 0.5826789780547006, + "grad_norm": 1.3114029169082642, + "learning_rate": 3.912694588418432e-06, + "loss": 0.2392, + "step": 29107 + }, + { + "epoch": 0.5826989965718289, + "grad_norm": 1.1426258087158203, + "learning_rate": 3.912378165726185e-06, + "loss": 0.3003, + "step": 29108 + }, + { + "epoch": 0.5827190150889573, + "grad_norm": 1.778480887413025, + "learning_rate": 3.912061747606064e-06, + "loss": 0.7307, + "step": 29109 + }, + { + "epoch": 0.5827390336060856, + "grad_norm": 1.0796449184417725, + "learning_rate": 3.911745334059404e-06, + "loss": 0.3034, + "step": 29110 + }, + { + "epoch": 0.582759052123214, + "grad_norm": 1.1290878057479858, + "learning_rate": 3.911428925087531e-06, + "loss": 0.3377, + "step": 29111 + }, + { + "epoch": 0.5827790706403423, + "grad_norm": 1.0907567739486694, + "learning_rate": 3.911112520691778e-06, + "loss": 0.2814, + "step": 29112 + }, + { + "epoch": 0.5827990891574707, + "grad_norm": 1.0207509994506836, + "learning_rate": 3.910796120873471e-06, + "loss": 0.3138, + "step": 29113 + }, + { + "epoch": 0.582819107674599, + "grad_norm": 1.1894854307174683, + "learning_rate": 3.910479725633945e-06, + "loss": 0.286, + "step": 29114 + }, + { + "epoch": 0.5828391261917274, + "grad_norm": 1.0366787910461426, + "learning_rate": 3.910163334974528e-06, + "loss": 0.3085, + "step": 29115 + }, + { + "epoch": 0.5828591447088557, + "grad_norm": 1.1843290328979492, + "learning_rate": 3.9098469488965495e-06, + "loss": 0.2928, + "step": 29116 + }, + { + "epoch": 0.582879163225984, + "grad_norm": 1.0473406314849854, + "learning_rate": 3.909530567401339e-06, + "loss": 0.3039, + "step": 29117 + }, + { + "epoch": 0.5828991817431124, + "grad_norm": 1.3180112838745117, + "learning_rate": 3.909214190490226e-06, + "loss": 0.2839, + "step": 29118 + }, + { + "epoch": 0.5829192002602407, + "grad_norm": 1.003983736038208, + "learning_rate": 3.908897818164544e-06, + "loss": 0.2639, + "step": 29119 + }, + { + "epoch": 0.5829392187773691, + "grad_norm": 1.8756319284439087, + "learning_rate": 3.90858145042562e-06, + "loss": 0.7857, + "step": 29120 + }, + { + "epoch": 0.5829592372944974, + "grad_norm": 1.1932059526443481, + "learning_rate": 3.908265087274786e-06, + "loss": 0.319, + "step": 29121 + }, + { + "epoch": 0.5829792558116258, + "grad_norm": 1.2560712099075317, + "learning_rate": 3.907948728713369e-06, + "loss": 0.3037, + "step": 29122 + }, + { + "epoch": 0.5829992743287541, + "grad_norm": 1.1779855489730835, + "learning_rate": 3.907632374742701e-06, + "loss": 0.2945, + "step": 29123 + }, + { + "epoch": 0.5830192928458824, + "grad_norm": 1.0814898014068604, + "learning_rate": 3.9073160253641105e-06, + "loss": 0.2769, + "step": 29124 + }, + { + "epoch": 0.5830393113630108, + "grad_norm": 1.9948369264602661, + "learning_rate": 3.90699968057893e-06, + "loss": 0.7712, + "step": 29125 + }, + { + "epoch": 0.5830593298801391, + "grad_norm": 1.234032154083252, + "learning_rate": 3.906683340388488e-06, + "loss": 0.3173, + "step": 29126 + }, + { + "epoch": 0.5830793483972675, + "grad_norm": 1.1002973318099976, + "learning_rate": 3.906367004794111e-06, + "loss": 0.285, + "step": 29127 + }, + { + "epoch": 0.5830993669143958, + "grad_norm": 1.1401255130767822, + "learning_rate": 3.906050673797134e-06, + "loss": 0.3166, + "step": 29128 + }, + { + "epoch": 0.5831193854315242, + "grad_norm": 1.0222755670547485, + "learning_rate": 3.9057343473988855e-06, + "loss": 0.3104, + "step": 29129 + }, + { + "epoch": 0.5831394039486525, + "grad_norm": 1.1253701448440552, + "learning_rate": 3.9054180256006924e-06, + "loss": 0.2845, + "step": 29130 + }, + { + "epoch": 0.5831594224657809, + "grad_norm": 1.0963658094406128, + "learning_rate": 3.905101708403885e-06, + "loss": 0.2927, + "step": 29131 + }, + { + "epoch": 0.5831794409829092, + "grad_norm": 1.1431605815887451, + "learning_rate": 3.904785395809797e-06, + "loss": 0.2971, + "step": 29132 + }, + { + "epoch": 0.5831994595000375, + "grad_norm": 1.1261274814605713, + "learning_rate": 3.904469087819756e-06, + "loss": 0.326, + "step": 29133 + }, + { + "epoch": 0.5832194780171659, + "grad_norm": 2.1373348236083984, + "learning_rate": 3.90415278443509e-06, + "loss": 0.7666, + "step": 29134 + }, + { + "epoch": 0.5832394965342942, + "grad_norm": 1.1072241067886353, + "learning_rate": 3.903836485657129e-06, + "loss": 0.3412, + "step": 29135 + }, + { + "epoch": 0.5832595150514226, + "grad_norm": 1.1102126836776733, + "learning_rate": 3.903520191487204e-06, + "loss": 0.3592, + "step": 29136 + }, + { + "epoch": 0.5832795335685509, + "grad_norm": 1.1835464239120483, + "learning_rate": 3.9032039019266445e-06, + "loss": 0.2993, + "step": 29137 + }, + { + "epoch": 0.5832995520856793, + "grad_norm": 1.2615963220596313, + "learning_rate": 3.90288761697678e-06, + "loss": 0.2976, + "step": 29138 + }, + { + "epoch": 0.5833195706028076, + "grad_norm": 1.1365363597869873, + "learning_rate": 3.9025713366389414e-06, + "loss": 0.3439, + "step": 29139 + }, + { + "epoch": 0.5833395891199359, + "grad_norm": 1.1260401010513306, + "learning_rate": 3.902255060914453e-06, + "loss": 0.3248, + "step": 29140 + }, + { + "epoch": 0.5833596076370643, + "grad_norm": 1.8134074211120605, + "learning_rate": 3.901938789804651e-06, + "loss": 0.771, + "step": 29141 + }, + { + "epoch": 0.5833796261541926, + "grad_norm": 1.1560951471328735, + "learning_rate": 3.901622523310862e-06, + "loss": 0.3738, + "step": 29142 + }, + { + "epoch": 0.583399644671321, + "grad_norm": 2.1073648929595947, + "learning_rate": 3.9013062614344135e-06, + "loss": 0.7709, + "step": 29143 + }, + { + "epoch": 0.5834196631884493, + "grad_norm": 1.2748382091522217, + "learning_rate": 3.900990004176637e-06, + "loss": 0.3241, + "step": 29144 + }, + { + "epoch": 0.5834396817055777, + "grad_norm": 1.3470301628112793, + "learning_rate": 3.900673751538864e-06, + "loss": 0.3543, + "step": 29145 + }, + { + "epoch": 0.583459700222706, + "grad_norm": 1.0177102088928223, + "learning_rate": 3.900357503522422e-06, + "loss": 0.2547, + "step": 29146 + }, + { + "epoch": 0.5834797187398344, + "grad_norm": 1.135445237159729, + "learning_rate": 3.90004126012864e-06, + "loss": 0.3241, + "step": 29147 + }, + { + "epoch": 0.5834997372569627, + "grad_norm": 1.0757273435592651, + "learning_rate": 3.8997250213588465e-06, + "loss": 0.2887, + "step": 29148 + }, + { + "epoch": 0.583519755774091, + "grad_norm": 1.2015589475631714, + "learning_rate": 3.899408787214372e-06, + "loss": 0.3274, + "step": 29149 + }, + { + "epoch": 0.5835397742912194, + "grad_norm": 1.3165483474731445, + "learning_rate": 3.899092557696548e-06, + "loss": 0.321, + "step": 29150 + }, + { + "epoch": 0.5835597928083477, + "grad_norm": 1.6655337810516357, + "learning_rate": 3.898776332806702e-06, + "loss": 0.3269, + "step": 29151 + }, + { + "epoch": 0.5835798113254761, + "grad_norm": 1.9393752813339233, + "learning_rate": 3.898460112546163e-06, + "loss": 0.8139, + "step": 29152 + }, + { + "epoch": 0.5835998298426044, + "grad_norm": 1.059976577758789, + "learning_rate": 3.898143896916258e-06, + "loss": 0.3008, + "step": 29153 + }, + { + "epoch": 0.5836198483597328, + "grad_norm": 1.1052290201187134, + "learning_rate": 3.897827685918322e-06, + "loss": 0.3062, + "step": 29154 + }, + { + "epoch": 0.5836398668768611, + "grad_norm": 1.1428608894348145, + "learning_rate": 3.897511479553681e-06, + "loss": 0.3177, + "step": 29155 + }, + { + "epoch": 0.5836598853939894, + "grad_norm": 1.0499181747436523, + "learning_rate": 3.8971952778236636e-06, + "loss": 0.2448, + "step": 29156 + }, + { + "epoch": 0.5836799039111178, + "grad_norm": 1.240617275238037, + "learning_rate": 3.896879080729599e-06, + "loss": 0.2846, + "step": 29157 + }, + { + "epoch": 0.5836999224282461, + "grad_norm": 1.0143088102340698, + "learning_rate": 3.8965628882728185e-06, + "loss": 0.3012, + "step": 29158 + }, + { + "epoch": 0.5837199409453745, + "grad_norm": 1.191733479499817, + "learning_rate": 3.8962467004546505e-06, + "loss": 0.3306, + "step": 29159 + }, + { + "epoch": 0.5837399594625028, + "grad_norm": 1.18197500705719, + "learning_rate": 3.895930517276425e-06, + "loss": 0.2223, + "step": 29160 + }, + { + "epoch": 0.5837599779796312, + "grad_norm": 1.1026543378829956, + "learning_rate": 3.895614338739467e-06, + "loss": 0.2524, + "step": 29161 + }, + { + "epoch": 0.5837799964967595, + "grad_norm": 1.1212878227233887, + "learning_rate": 3.89529816484511e-06, + "loss": 0.3216, + "step": 29162 + }, + { + "epoch": 0.5838000150138879, + "grad_norm": 1.0317338705062866, + "learning_rate": 3.8949819955946815e-06, + "loss": 0.3064, + "step": 29163 + }, + { + "epoch": 0.5838200335310162, + "grad_norm": 1.1196982860565186, + "learning_rate": 3.894665830989512e-06, + "loss": 0.3273, + "step": 29164 + }, + { + "epoch": 0.5838400520481445, + "grad_norm": 1.1190311908721924, + "learning_rate": 3.894349671030929e-06, + "loss": 0.2974, + "step": 29165 + }, + { + "epoch": 0.5838600705652729, + "grad_norm": 1.9099045991897583, + "learning_rate": 3.894033515720261e-06, + "loss": 0.7882, + "step": 29166 + }, + { + "epoch": 0.5838800890824012, + "grad_norm": 1.1556437015533447, + "learning_rate": 3.893717365058839e-06, + "loss": 0.3235, + "step": 29167 + }, + { + "epoch": 0.5839001075995296, + "grad_norm": 1.3105888366699219, + "learning_rate": 3.893401219047992e-06, + "loss": 0.329, + "step": 29168 + }, + { + "epoch": 0.5839201261166579, + "grad_norm": 2.1334543228149414, + "learning_rate": 3.893085077689047e-06, + "loss": 0.8362, + "step": 29169 + }, + { + "epoch": 0.5839401446337863, + "grad_norm": 1.0592544078826904, + "learning_rate": 3.892768940983335e-06, + "loss": 0.3143, + "step": 29170 + }, + { + "epoch": 0.5839601631509146, + "grad_norm": 1.0791172981262207, + "learning_rate": 3.892452808932181e-06, + "loss": 0.2661, + "step": 29171 + }, + { + "epoch": 0.5839801816680429, + "grad_norm": 1.146331787109375, + "learning_rate": 3.8921366815369206e-06, + "loss": 0.3417, + "step": 29172 + }, + { + "epoch": 0.5840002001851713, + "grad_norm": 1.0477690696716309, + "learning_rate": 3.891820558798878e-06, + "loss": 0.2838, + "step": 29173 + }, + { + "epoch": 0.5840202187022996, + "grad_norm": 1.86821448802948, + "learning_rate": 3.891504440719383e-06, + "loss": 0.7987, + "step": 29174 + }, + { + "epoch": 0.584040237219428, + "grad_norm": 1.8379135131835938, + "learning_rate": 3.891188327299765e-06, + "loss": 0.7815, + "step": 29175 + }, + { + "epoch": 0.5840602557365563, + "grad_norm": 1.1597331762313843, + "learning_rate": 3.890872218541352e-06, + "loss": 0.305, + "step": 29176 + }, + { + "epoch": 0.5840802742536847, + "grad_norm": 1.1192744970321655, + "learning_rate": 3.890556114445475e-06, + "loss": 0.258, + "step": 29177 + }, + { + "epoch": 0.584100292770813, + "grad_norm": 1.0349425077438354, + "learning_rate": 3.8902400150134615e-06, + "loss": 0.2772, + "step": 29178 + }, + { + "epoch": 0.5841203112879414, + "grad_norm": 1.0096601247787476, + "learning_rate": 3.889923920246639e-06, + "loss": 0.2699, + "step": 29179 + }, + { + "epoch": 0.5841403298050697, + "grad_norm": 1.185215950012207, + "learning_rate": 3.889607830146337e-06, + "loss": 0.3022, + "step": 29180 + }, + { + "epoch": 0.584160348322198, + "grad_norm": 1.0890849828720093, + "learning_rate": 3.889291744713885e-06, + "loss": 0.2738, + "step": 29181 + }, + { + "epoch": 0.5841803668393264, + "grad_norm": 1.1023049354553223, + "learning_rate": 3.888975663950612e-06, + "loss": 0.298, + "step": 29182 + }, + { + "epoch": 0.5842003853564547, + "grad_norm": 1.1011172533035278, + "learning_rate": 3.888659587857847e-06, + "loss": 0.3469, + "step": 29183 + }, + { + "epoch": 0.5842204038735831, + "grad_norm": 1.2086132764816284, + "learning_rate": 3.888343516436915e-06, + "loss": 0.3503, + "step": 29184 + }, + { + "epoch": 0.5842404223907114, + "grad_norm": 0.9640875458717346, + "learning_rate": 3.888027449689151e-06, + "loss": 0.2675, + "step": 29185 + }, + { + "epoch": 0.5842604409078398, + "grad_norm": 1.0774736404418945, + "learning_rate": 3.887711387615879e-06, + "loss": 0.268, + "step": 29186 + }, + { + "epoch": 0.5842804594249681, + "grad_norm": 1.042636752128601, + "learning_rate": 3.887395330218429e-06, + "loss": 0.3084, + "step": 29187 + }, + { + "epoch": 0.5843004779420964, + "grad_norm": 1.1627418994903564, + "learning_rate": 3.887079277498129e-06, + "loss": 0.3064, + "step": 29188 + }, + { + "epoch": 0.5843204964592248, + "grad_norm": 1.10817551612854, + "learning_rate": 3.886763229456308e-06, + "loss": 0.3365, + "step": 29189 + }, + { + "epoch": 0.5843405149763531, + "grad_norm": 1.1162887811660767, + "learning_rate": 3.8864471860942956e-06, + "loss": 0.3317, + "step": 29190 + }, + { + "epoch": 0.5843605334934815, + "grad_norm": 1.20549738407135, + "learning_rate": 3.88613114741342e-06, + "loss": 0.3238, + "step": 29191 + }, + { + "epoch": 0.5843805520106098, + "grad_norm": 1.0767031908035278, + "learning_rate": 3.88581511341501e-06, + "loss": 0.323, + "step": 29192 + }, + { + "epoch": 0.5844005705277382, + "grad_norm": 1.1473153829574585, + "learning_rate": 3.885499084100391e-06, + "loss": 0.2692, + "step": 29193 + }, + { + "epoch": 0.5844205890448665, + "grad_norm": 1.195825457572937, + "learning_rate": 3.885183059470896e-06, + "loss": 0.2693, + "step": 29194 + }, + { + "epoch": 0.5844406075619949, + "grad_norm": 1.1377885341644287, + "learning_rate": 3.88486703952785e-06, + "loss": 0.2677, + "step": 29195 + }, + { + "epoch": 0.5844606260791232, + "grad_norm": 1.2223482131958008, + "learning_rate": 3.884551024272585e-06, + "loss": 0.3427, + "step": 29196 + }, + { + "epoch": 0.5844806445962515, + "grad_norm": 1.1476107835769653, + "learning_rate": 3.8842350137064245e-06, + "loss": 0.2908, + "step": 29197 + }, + { + "epoch": 0.5845006631133799, + "grad_norm": 2.1275691986083984, + "learning_rate": 3.883919007830702e-06, + "loss": 0.7967, + "step": 29198 + }, + { + "epoch": 0.5845206816305082, + "grad_norm": 1.1073764562606812, + "learning_rate": 3.883603006646745e-06, + "loss": 0.3251, + "step": 29199 + }, + { + "epoch": 0.5845407001476366, + "grad_norm": 1.100852608680725, + "learning_rate": 3.8832870101558785e-06, + "loss": 0.3249, + "step": 29200 + }, + { + "epoch": 0.5845607186647649, + "grad_norm": 1.1349581480026245, + "learning_rate": 3.882971018359435e-06, + "loss": 0.2829, + "step": 29201 + }, + { + "epoch": 0.5845807371818933, + "grad_norm": 1.1088606119155884, + "learning_rate": 3.882655031258737e-06, + "loss": 0.283, + "step": 29202 + }, + { + "epoch": 0.5846007556990216, + "grad_norm": 1.18006432056427, + "learning_rate": 3.882339048855121e-06, + "loss": 0.3183, + "step": 29203 + }, + { + "epoch": 0.5846207742161499, + "grad_norm": 1.0331538915634155, + "learning_rate": 3.882023071149909e-06, + "loss": 0.2976, + "step": 29204 + }, + { + "epoch": 0.5846407927332783, + "grad_norm": 1.0700626373291016, + "learning_rate": 3.881707098144434e-06, + "loss": 0.2947, + "step": 29205 + }, + { + "epoch": 0.5846608112504066, + "grad_norm": 1.071476936340332, + "learning_rate": 3.881391129840018e-06, + "loss": 0.3193, + "step": 29206 + }, + { + "epoch": 0.584680829767535, + "grad_norm": 0.9573571085929871, + "learning_rate": 3.881075166237995e-06, + "loss": 0.287, + "step": 29207 + }, + { + "epoch": 0.5847008482846633, + "grad_norm": 1.0108617544174194, + "learning_rate": 3.880759207339691e-06, + "loss": 0.2749, + "step": 29208 + }, + { + "epoch": 0.5847208668017917, + "grad_norm": 1.265067219734192, + "learning_rate": 3.8804432531464345e-06, + "loss": 0.3047, + "step": 29209 + }, + { + "epoch": 0.58474088531892, + "grad_norm": 1.0677354335784912, + "learning_rate": 3.880127303659554e-06, + "loss": 0.2996, + "step": 29210 + }, + { + "epoch": 0.5847609038360484, + "grad_norm": 1.0738048553466797, + "learning_rate": 3.879811358880375e-06, + "loss": 0.3221, + "step": 29211 + }, + { + "epoch": 0.5847809223531767, + "grad_norm": 1.0387442111968994, + "learning_rate": 3.879495418810231e-06, + "loss": 0.2687, + "step": 29212 + }, + { + "epoch": 0.584800940870305, + "grad_norm": 1.0829936265945435, + "learning_rate": 3.879179483450446e-06, + "loss": 0.2699, + "step": 29213 + }, + { + "epoch": 0.5848209593874334, + "grad_norm": 1.0468010902404785, + "learning_rate": 3.87886355280235e-06, + "loss": 0.3256, + "step": 29214 + }, + { + "epoch": 0.5848409779045617, + "grad_norm": 1.059644103050232, + "learning_rate": 3.878547626867267e-06, + "loss": 0.3, + "step": 29215 + }, + { + "epoch": 0.5848609964216901, + "grad_norm": 0.9923021197319031, + "learning_rate": 3.8782317056465315e-06, + "loss": 0.2537, + "step": 29216 + }, + { + "epoch": 0.5848810149388184, + "grad_norm": 1.0879578590393066, + "learning_rate": 3.877915789141469e-06, + "loss": 0.2666, + "step": 29217 + }, + { + "epoch": 0.5849010334559468, + "grad_norm": 1.1168251037597656, + "learning_rate": 3.877599877353407e-06, + "loss": 0.3087, + "step": 29218 + }, + { + "epoch": 0.5849210519730751, + "grad_norm": 1.1748696565628052, + "learning_rate": 3.877283970283671e-06, + "loss": 0.3703, + "step": 29219 + }, + { + "epoch": 0.5849410704902034, + "grad_norm": 1.0211750268936157, + "learning_rate": 3.8769680679335936e-06, + "loss": 0.288, + "step": 29220 + }, + { + "epoch": 0.5849610890073318, + "grad_norm": 1.0623438358306885, + "learning_rate": 3.8766521703045e-06, + "loss": 0.2671, + "step": 29221 + }, + { + "epoch": 0.5849811075244601, + "grad_norm": 1.861004114151001, + "learning_rate": 3.87633627739772e-06, + "loss": 0.8593, + "step": 29222 + }, + { + "epoch": 0.5850011260415885, + "grad_norm": 1.214034080505371, + "learning_rate": 3.87602038921458e-06, + "loss": 0.3065, + "step": 29223 + }, + { + "epoch": 0.5850211445587168, + "grad_norm": 1.1281452178955078, + "learning_rate": 3.875704505756407e-06, + "loss": 0.3575, + "step": 29224 + }, + { + "epoch": 0.5850411630758452, + "grad_norm": 1.017833948135376, + "learning_rate": 3.8753886270245315e-06, + "loss": 0.3032, + "step": 29225 + }, + { + "epoch": 0.5850611815929735, + "grad_norm": 1.2089282274246216, + "learning_rate": 3.87507275302028e-06, + "loss": 0.3361, + "step": 29226 + }, + { + "epoch": 0.5850812001101019, + "grad_norm": 1.0346448421478271, + "learning_rate": 3.874756883744981e-06, + "loss": 0.2953, + "step": 29227 + }, + { + "epoch": 0.5851012186272302, + "grad_norm": 1.0544501543045044, + "learning_rate": 3.87444101919996e-06, + "loss": 0.2707, + "step": 29228 + }, + { + "epoch": 0.5851212371443585, + "grad_norm": 1.1049007177352905, + "learning_rate": 3.87412515938655e-06, + "loss": 0.252, + "step": 29229 + }, + { + "epoch": 0.5851412556614869, + "grad_norm": 1.8429996967315674, + "learning_rate": 3.873809304306074e-06, + "loss": 0.7355, + "step": 29230 + }, + { + "epoch": 0.5851612741786152, + "grad_norm": 1.1528881788253784, + "learning_rate": 3.873493453959862e-06, + "loss": 0.2899, + "step": 29231 + }, + { + "epoch": 0.5851812926957436, + "grad_norm": 1.0764029026031494, + "learning_rate": 3.87317760834924e-06, + "loss": 0.2984, + "step": 29232 + }, + { + "epoch": 0.5852013112128719, + "grad_norm": 1.1541109085083008, + "learning_rate": 3.872861767475536e-06, + "loss": 0.2833, + "step": 29233 + }, + { + "epoch": 0.5852213297300003, + "grad_norm": 1.1275790929794312, + "learning_rate": 3.872545931340079e-06, + "loss": 0.3337, + "step": 29234 + }, + { + "epoch": 0.5852413482471286, + "grad_norm": 1.0311532020568848, + "learning_rate": 3.872230099944198e-06, + "loss": 0.2892, + "step": 29235 + }, + { + "epoch": 0.5852613667642569, + "grad_norm": 1.1295673847198486, + "learning_rate": 3.871914273289218e-06, + "loss": 0.3338, + "step": 29236 + }, + { + "epoch": 0.5852813852813853, + "grad_norm": 1.1224889755249023, + "learning_rate": 3.871598451376466e-06, + "loss": 0.2767, + "step": 29237 + }, + { + "epoch": 0.5853014037985136, + "grad_norm": 0.9791145920753479, + "learning_rate": 3.871282634207274e-06, + "loss": 0.2588, + "step": 29238 + }, + { + "epoch": 0.585321422315642, + "grad_norm": 1.126900315284729, + "learning_rate": 3.870966821782966e-06, + "loss": 0.3213, + "step": 29239 + }, + { + "epoch": 0.5853414408327703, + "grad_norm": 1.170182466506958, + "learning_rate": 3.87065101410487e-06, + "loss": 0.3038, + "step": 29240 + }, + { + "epoch": 0.5853614593498987, + "grad_norm": 1.0023881196975708, + "learning_rate": 3.870335211174315e-06, + "loss": 0.2462, + "step": 29241 + }, + { + "epoch": 0.585381477867027, + "grad_norm": 1.1670057773590088, + "learning_rate": 3.870019412992625e-06, + "loss": 0.2822, + "step": 29242 + }, + { + "epoch": 0.5854014963841554, + "grad_norm": 1.1920102834701538, + "learning_rate": 3.869703619561134e-06, + "loss": 0.3169, + "step": 29243 + }, + { + "epoch": 0.5854215149012837, + "grad_norm": 1.2297303676605225, + "learning_rate": 3.869387830881164e-06, + "loss": 0.2741, + "step": 29244 + }, + { + "epoch": 0.585441533418412, + "grad_norm": 1.106026530265808, + "learning_rate": 3.869072046954044e-06, + "loss": 0.2915, + "step": 29245 + }, + { + "epoch": 0.5854615519355404, + "grad_norm": 1.0897425413131714, + "learning_rate": 3.868756267781102e-06, + "loss": 0.2713, + "step": 29246 + }, + { + "epoch": 0.5854815704526687, + "grad_norm": 1.0732417106628418, + "learning_rate": 3.868440493363664e-06, + "loss": 0.306, + "step": 29247 + }, + { + "epoch": 0.5855015889697971, + "grad_norm": 1.1317524909973145, + "learning_rate": 3.8681247237030605e-06, + "loss": 0.3512, + "step": 29248 + }, + { + "epoch": 0.5855216074869254, + "grad_norm": 1.136236548423767, + "learning_rate": 3.8678089588006165e-06, + "loss": 0.2709, + "step": 29249 + }, + { + "epoch": 0.5855416260040538, + "grad_norm": 1.1378179788589478, + "learning_rate": 3.867493198657658e-06, + "loss": 0.3214, + "step": 29250 + }, + { + "epoch": 0.5855616445211821, + "grad_norm": 1.0473430156707764, + "learning_rate": 3.867177443275517e-06, + "loss": 0.302, + "step": 29251 + }, + { + "epoch": 0.5855816630383104, + "grad_norm": 1.1870869398117065, + "learning_rate": 3.866861692655518e-06, + "loss": 0.3299, + "step": 29252 + }, + { + "epoch": 0.5856016815554388, + "grad_norm": 1.240260362625122, + "learning_rate": 3.866545946798988e-06, + "loss": 0.3356, + "step": 29253 + }, + { + "epoch": 0.5856217000725671, + "grad_norm": 1.057527780532837, + "learning_rate": 3.866230205707256e-06, + "loss": 0.2543, + "step": 29254 + }, + { + "epoch": 0.5856417185896955, + "grad_norm": 1.1500990390777588, + "learning_rate": 3.8659144693816455e-06, + "loss": 0.3246, + "step": 29255 + }, + { + "epoch": 0.5856617371068238, + "grad_norm": 1.0263911485671997, + "learning_rate": 3.865598737823489e-06, + "loss": 0.2869, + "step": 29256 + }, + { + "epoch": 0.5856817556239522, + "grad_norm": 1.1753008365631104, + "learning_rate": 3.865283011034111e-06, + "loss": 0.3434, + "step": 29257 + }, + { + "epoch": 0.5857017741410805, + "grad_norm": 1.091701865196228, + "learning_rate": 3.864967289014838e-06, + "loss": 0.3032, + "step": 29258 + }, + { + "epoch": 0.5857217926582089, + "grad_norm": 1.1435744762420654, + "learning_rate": 3.864651571766999e-06, + "loss": 0.2685, + "step": 29259 + }, + { + "epoch": 0.5857418111753372, + "grad_norm": 1.818514108657837, + "learning_rate": 3.864335859291919e-06, + "loss": 0.7636, + "step": 29260 + }, + { + "epoch": 0.5857618296924655, + "grad_norm": 1.0929838418960571, + "learning_rate": 3.864020151590929e-06, + "loss": 0.3535, + "step": 29261 + }, + { + "epoch": 0.5857818482095939, + "grad_norm": 1.0749340057373047, + "learning_rate": 3.863704448665353e-06, + "loss": 0.2954, + "step": 29262 + }, + { + "epoch": 0.5858018667267222, + "grad_norm": 1.2040588855743408, + "learning_rate": 3.86338875051652e-06, + "loss": 0.2807, + "step": 29263 + }, + { + "epoch": 0.5858218852438506, + "grad_norm": 1.1865098476409912, + "learning_rate": 3.863073057145753e-06, + "loss": 0.2702, + "step": 29264 + }, + { + "epoch": 0.5858419037609789, + "grad_norm": 1.0858205556869507, + "learning_rate": 3.862757368554383e-06, + "loss": 0.2425, + "step": 29265 + }, + { + "epoch": 0.5858619222781073, + "grad_norm": 0.9971851706504822, + "learning_rate": 3.862441684743739e-06, + "loss": 0.2817, + "step": 29266 + }, + { + "epoch": 0.5858819407952356, + "grad_norm": 1.3263219594955444, + "learning_rate": 3.862126005715143e-06, + "loss": 0.3474, + "step": 29267 + }, + { + "epoch": 0.5859019593123639, + "grad_norm": 1.1688627004623413, + "learning_rate": 3.861810331469924e-06, + "loss": 0.3195, + "step": 29268 + }, + { + "epoch": 0.5859219778294923, + "grad_norm": 1.0902615785598755, + "learning_rate": 3.861494662009411e-06, + "loss": 0.3276, + "step": 29269 + }, + { + "epoch": 0.5859419963466206, + "grad_norm": 1.1473195552825928, + "learning_rate": 3.86117899733493e-06, + "loss": 0.2867, + "step": 29270 + }, + { + "epoch": 0.585962014863749, + "grad_norm": 1.1199562549591064, + "learning_rate": 3.860863337447805e-06, + "loss": 0.3094, + "step": 29271 + }, + { + "epoch": 0.5859820333808773, + "grad_norm": 1.2127940654754639, + "learning_rate": 3.860547682349368e-06, + "loss": 0.3174, + "step": 29272 + }, + { + "epoch": 0.5860020518980057, + "grad_norm": 1.0966451168060303, + "learning_rate": 3.8602320320409405e-06, + "loss": 0.3161, + "step": 29273 + }, + { + "epoch": 0.586022070415134, + "grad_norm": 1.1921682357788086, + "learning_rate": 3.859916386523854e-06, + "loss": 0.3524, + "step": 29274 + }, + { + "epoch": 0.5860420889322624, + "grad_norm": 1.835845708847046, + "learning_rate": 3.859600745799435e-06, + "loss": 0.8221, + "step": 29275 + }, + { + "epoch": 0.5860621074493907, + "grad_norm": 1.0577495098114014, + "learning_rate": 3.859285109869007e-06, + "loss": 0.2895, + "step": 29276 + }, + { + "epoch": 0.586082125966519, + "grad_norm": 1.0959808826446533, + "learning_rate": 3.858969478733898e-06, + "loss": 0.2526, + "step": 29277 + }, + { + "epoch": 0.5861021444836474, + "grad_norm": 1.1972241401672363, + "learning_rate": 3.858653852395436e-06, + "loss": 0.3638, + "step": 29278 + }, + { + "epoch": 0.5861221630007757, + "grad_norm": 1.0883879661560059, + "learning_rate": 3.85833823085495e-06, + "loss": 0.3167, + "step": 29279 + }, + { + "epoch": 0.5861421815179041, + "grad_norm": 1.0902397632598877, + "learning_rate": 3.8580226141137635e-06, + "loss": 0.2739, + "step": 29280 + }, + { + "epoch": 0.5861622000350324, + "grad_norm": 1.17265784740448, + "learning_rate": 3.857707002173203e-06, + "loss": 0.3081, + "step": 29281 + }, + { + "epoch": 0.5861822185521608, + "grad_norm": 1.111360788345337, + "learning_rate": 3.857391395034598e-06, + "loss": 0.31, + "step": 29282 + }, + { + "epoch": 0.5862022370692891, + "grad_norm": 1.9032485485076904, + "learning_rate": 3.857075792699273e-06, + "loss": 0.7835, + "step": 29283 + }, + { + "epoch": 0.5862222555864174, + "grad_norm": 1.2027804851531982, + "learning_rate": 3.856760195168555e-06, + "loss": 0.3026, + "step": 29284 + }, + { + "epoch": 0.5862422741035458, + "grad_norm": 1.2627686262130737, + "learning_rate": 3.856444602443771e-06, + "loss": 0.3415, + "step": 29285 + }, + { + "epoch": 0.5862622926206741, + "grad_norm": 1.1399635076522827, + "learning_rate": 3.856129014526248e-06, + "loss": 0.3191, + "step": 29286 + }, + { + "epoch": 0.5862823111378025, + "grad_norm": 1.035942554473877, + "learning_rate": 3.8558134314173125e-06, + "loss": 0.2735, + "step": 29287 + }, + { + "epoch": 0.5863023296549308, + "grad_norm": 1.0874788761138916, + "learning_rate": 3.855497853118293e-06, + "loss": 0.3412, + "step": 29288 + }, + { + "epoch": 0.5863223481720592, + "grad_norm": 1.088062047958374, + "learning_rate": 3.855182279630512e-06, + "loss": 0.3442, + "step": 29289 + }, + { + "epoch": 0.5863423666891875, + "grad_norm": 1.0499229431152344, + "learning_rate": 3.854866710955298e-06, + "loss": 0.3127, + "step": 29290 + }, + { + "epoch": 0.5863623852063159, + "grad_norm": 2.031585216522217, + "learning_rate": 3.8545511470939786e-06, + "loss": 0.829, + "step": 29291 + }, + { + "epoch": 0.5863824037234442, + "grad_norm": 1.1131600141525269, + "learning_rate": 3.85423558804788e-06, + "loss": 0.3212, + "step": 29292 + }, + { + "epoch": 0.5864024222405725, + "grad_norm": 1.2313600778579712, + "learning_rate": 3.853920033818329e-06, + "loss": 0.3183, + "step": 29293 + }, + { + "epoch": 0.5864224407577009, + "grad_norm": 1.215579628944397, + "learning_rate": 3.853604484406651e-06, + "loss": 0.2861, + "step": 29294 + }, + { + "epoch": 0.5864424592748292, + "grad_norm": 1.8995689153671265, + "learning_rate": 3.853288939814171e-06, + "loss": 0.8004, + "step": 29295 + }, + { + "epoch": 0.5864624777919576, + "grad_norm": 1.0207997560501099, + "learning_rate": 3.852973400042221e-06, + "loss": 0.3107, + "step": 29296 + }, + { + "epoch": 0.5864824963090859, + "grad_norm": 1.91753089427948, + "learning_rate": 3.852657865092121e-06, + "loss": 0.7892, + "step": 29297 + }, + { + "epoch": 0.5865025148262143, + "grad_norm": 1.114227056503296, + "learning_rate": 3.8523423349652025e-06, + "loss": 0.3643, + "step": 29298 + }, + { + "epoch": 0.5865225333433426, + "grad_norm": 1.0873113870620728, + "learning_rate": 3.852026809662788e-06, + "loss": 0.3458, + "step": 29299 + }, + { + "epoch": 0.5865425518604709, + "grad_norm": 1.30739426612854, + "learning_rate": 3.851711289186207e-06, + "loss": 0.3101, + "step": 29300 + }, + { + "epoch": 0.5865625703775993, + "grad_norm": 1.0064882040023804, + "learning_rate": 3.851395773536786e-06, + "loss": 0.2628, + "step": 29301 + }, + { + "epoch": 0.5865825888947276, + "grad_norm": 0.959438681602478, + "learning_rate": 3.851080262715849e-06, + "loss": 0.2277, + "step": 29302 + }, + { + "epoch": 0.586602607411856, + "grad_norm": 1.2065585851669312, + "learning_rate": 3.850764756724721e-06, + "loss": 0.3238, + "step": 29303 + }, + { + "epoch": 0.5866226259289843, + "grad_norm": 1.0625638961791992, + "learning_rate": 3.850449255564732e-06, + "loss": 0.3004, + "step": 29304 + }, + { + "epoch": 0.5866426444461127, + "grad_norm": 0.9896727800369263, + "learning_rate": 3.850133759237208e-06, + "loss": 0.2943, + "step": 29305 + }, + { + "epoch": 0.586662662963241, + "grad_norm": 0.9819552898406982, + "learning_rate": 3.849818267743474e-06, + "loss": 0.2343, + "step": 29306 + }, + { + "epoch": 0.5866826814803694, + "grad_norm": 1.82210111618042, + "learning_rate": 3.849502781084857e-06, + "loss": 0.7629, + "step": 29307 + }, + { + "epoch": 0.5867026999974977, + "grad_norm": 1.0100654363632202, + "learning_rate": 3.84918729926268e-06, + "loss": 0.3022, + "step": 29308 + }, + { + "epoch": 0.586722718514626, + "grad_norm": 1.0081214904785156, + "learning_rate": 3.848871822278274e-06, + "loss": 0.2595, + "step": 29309 + }, + { + "epoch": 0.5867427370317544, + "grad_norm": 1.1503289937973022, + "learning_rate": 3.848556350132962e-06, + "loss": 0.2532, + "step": 29310 + }, + { + "epoch": 0.5867627555488827, + "grad_norm": 1.108878254890442, + "learning_rate": 3.8482408828280735e-06, + "loss": 0.3647, + "step": 29311 + }, + { + "epoch": 0.5867827740660111, + "grad_norm": 1.070939540863037, + "learning_rate": 3.847925420364929e-06, + "loss": 0.3115, + "step": 29312 + }, + { + "epoch": 0.5868027925831394, + "grad_norm": 1.1589982509613037, + "learning_rate": 3.847609962744862e-06, + "loss": 0.352, + "step": 29313 + }, + { + "epoch": 0.5868228111002678, + "grad_norm": 1.4442633390426636, + "learning_rate": 3.847294509969194e-06, + "loss": 0.3231, + "step": 29314 + }, + { + "epoch": 0.5868428296173961, + "grad_norm": 1.0252012014389038, + "learning_rate": 3.846979062039251e-06, + "loss": 0.2822, + "step": 29315 + }, + { + "epoch": 0.5868628481345244, + "grad_norm": 1.209912657737732, + "learning_rate": 3.8466636189563585e-06, + "loss": 0.314, + "step": 29316 + }, + { + "epoch": 0.5868828666516528, + "grad_norm": 1.0421980619430542, + "learning_rate": 3.846348180721844e-06, + "loss": 0.2972, + "step": 29317 + }, + { + "epoch": 0.5869028851687811, + "grad_norm": 1.1404762268066406, + "learning_rate": 3.846032747337035e-06, + "loss": 0.3105, + "step": 29318 + }, + { + "epoch": 0.5869229036859095, + "grad_norm": 1.1252211332321167, + "learning_rate": 3.845717318803256e-06, + "loss": 0.3066, + "step": 29319 + }, + { + "epoch": 0.5869429222030378, + "grad_norm": 1.0103825330734253, + "learning_rate": 3.845401895121833e-06, + "loss": 0.2791, + "step": 29320 + }, + { + "epoch": 0.5869629407201662, + "grad_norm": 1.2742223739624023, + "learning_rate": 3.84508647629409e-06, + "loss": 0.336, + "step": 29321 + }, + { + "epoch": 0.5869829592372945, + "grad_norm": 1.1140718460083008, + "learning_rate": 3.844771062321358e-06, + "loss": 0.2995, + "step": 29322 + }, + { + "epoch": 0.5870029777544229, + "grad_norm": 1.0765236616134644, + "learning_rate": 3.844455653204958e-06, + "loss": 0.3101, + "step": 29323 + }, + { + "epoch": 0.5870229962715512, + "grad_norm": 1.2435466051101685, + "learning_rate": 3.8441402489462185e-06, + "loss": 0.3505, + "step": 29324 + }, + { + "epoch": 0.5870430147886795, + "grad_norm": 1.122435212135315, + "learning_rate": 3.843824849546464e-06, + "loss": 0.3286, + "step": 29325 + }, + { + "epoch": 0.5870630333058079, + "grad_norm": 1.0501747131347656, + "learning_rate": 3.84350945500702e-06, + "loss": 0.3569, + "step": 29326 + }, + { + "epoch": 0.5870830518229362, + "grad_norm": 1.1362378597259521, + "learning_rate": 3.843194065329215e-06, + "loss": 0.339, + "step": 29327 + }, + { + "epoch": 0.5871030703400646, + "grad_norm": 1.138792634010315, + "learning_rate": 3.842878680514373e-06, + "loss": 0.3109, + "step": 29328 + }, + { + "epoch": 0.5871230888571929, + "grad_norm": 1.0839875936508179, + "learning_rate": 3.84256330056382e-06, + "loss": 0.3585, + "step": 29329 + }, + { + "epoch": 0.5871431073743213, + "grad_norm": 1.0294910669326782, + "learning_rate": 3.842247925478881e-06, + "loss": 0.3128, + "step": 29330 + }, + { + "epoch": 0.5871631258914496, + "grad_norm": 1.1582181453704834, + "learning_rate": 3.841932555260884e-06, + "loss": 0.3568, + "step": 29331 + }, + { + "epoch": 0.5871831444085779, + "grad_norm": 1.0737262964248657, + "learning_rate": 3.841617189911152e-06, + "loss": 0.2861, + "step": 29332 + }, + { + "epoch": 0.5872031629257063, + "grad_norm": 1.1250531673431396, + "learning_rate": 3.841301829431013e-06, + "loss": 0.3173, + "step": 29333 + }, + { + "epoch": 0.5872231814428346, + "grad_norm": 1.7260946035385132, + "learning_rate": 3.84098647382179e-06, + "loss": 0.7519, + "step": 29334 + }, + { + "epoch": 0.587243199959963, + "grad_norm": 1.1655017137527466, + "learning_rate": 3.8406711230848124e-06, + "loss": 0.3106, + "step": 29335 + }, + { + "epoch": 0.5872632184770913, + "grad_norm": 1.0560462474822998, + "learning_rate": 3.840355777221403e-06, + "loss": 0.2825, + "step": 29336 + }, + { + "epoch": 0.5872832369942197, + "grad_norm": 1.0832014083862305, + "learning_rate": 3.840040436232889e-06, + "loss": 0.3149, + "step": 29337 + }, + { + "epoch": 0.587303255511348, + "grad_norm": 1.2745176553726196, + "learning_rate": 3.839725100120596e-06, + "loss": 0.2766, + "step": 29338 + }, + { + "epoch": 0.5873232740284764, + "grad_norm": 1.2698280811309814, + "learning_rate": 3.8394097688858464e-06, + "loss": 0.3011, + "step": 29339 + }, + { + "epoch": 0.5873432925456047, + "grad_norm": 1.178744912147522, + "learning_rate": 3.83909444252997e-06, + "loss": 0.3621, + "step": 29340 + }, + { + "epoch": 0.587363311062733, + "grad_norm": 1.0268691778182983, + "learning_rate": 3.838779121054292e-06, + "loss": 0.2785, + "step": 29341 + }, + { + "epoch": 0.5873833295798614, + "grad_norm": 1.03275465965271, + "learning_rate": 3.838463804460135e-06, + "loss": 0.304, + "step": 29342 + }, + { + "epoch": 0.5874033480969897, + "grad_norm": 0.992846667766571, + "learning_rate": 3.8381484927488244e-06, + "loss": 0.278, + "step": 29343 + }, + { + "epoch": 0.5874233666141181, + "grad_norm": 1.2779042720794678, + "learning_rate": 3.837833185921691e-06, + "loss": 0.3132, + "step": 29344 + }, + { + "epoch": 0.5874433851312464, + "grad_norm": 1.1199967861175537, + "learning_rate": 3.837517883980055e-06, + "loss": 0.328, + "step": 29345 + }, + { + "epoch": 0.5874634036483748, + "grad_norm": 1.1050297021865845, + "learning_rate": 3.837202586925245e-06, + "loss": 0.3223, + "step": 29346 + }, + { + "epoch": 0.5874834221655031, + "grad_norm": 1.0272783041000366, + "learning_rate": 3.8368872947585836e-06, + "loss": 0.2644, + "step": 29347 + }, + { + "epoch": 0.5875034406826314, + "grad_norm": 1.0990914106369019, + "learning_rate": 3.836572007481398e-06, + "loss": 0.3123, + "step": 29348 + }, + { + "epoch": 0.5875234591997598, + "grad_norm": 1.0900136232376099, + "learning_rate": 3.836256725095013e-06, + "loss": 0.381, + "step": 29349 + }, + { + "epoch": 0.5875434777168881, + "grad_norm": 1.2348636388778687, + "learning_rate": 3.8359414476007565e-06, + "loss": 0.3046, + "step": 29350 + }, + { + "epoch": 0.5875634962340165, + "grad_norm": 1.0401856899261475, + "learning_rate": 3.83562617499995e-06, + "loss": 0.2723, + "step": 29351 + }, + { + "epoch": 0.5875835147511448, + "grad_norm": 1.0947861671447754, + "learning_rate": 3.835310907293919e-06, + "loss": 0.3013, + "step": 29352 + }, + { + "epoch": 0.5876035332682732, + "grad_norm": 1.1545161008834839, + "learning_rate": 3.8349956444839925e-06, + "loss": 0.2694, + "step": 29353 + }, + { + "epoch": 0.5876235517854015, + "grad_norm": 1.1211014986038208, + "learning_rate": 3.834680386571493e-06, + "loss": 0.3324, + "step": 29354 + }, + { + "epoch": 0.5876435703025299, + "grad_norm": 1.2962642908096313, + "learning_rate": 3.834365133557746e-06, + "loss": 0.3065, + "step": 29355 + }, + { + "epoch": 0.5876635888196582, + "grad_norm": 1.0785363912582397, + "learning_rate": 3.834049885444079e-06, + "loss": 0.2973, + "step": 29356 + }, + { + "epoch": 0.5876836073367865, + "grad_norm": 1.133786916732788, + "learning_rate": 3.833734642231812e-06, + "loss": 0.3132, + "step": 29357 + }, + { + "epoch": 0.5877036258539149, + "grad_norm": 1.1692805290222168, + "learning_rate": 3.8334194039222755e-06, + "loss": 0.3448, + "step": 29358 + }, + { + "epoch": 0.5877236443710432, + "grad_norm": 1.0502691268920898, + "learning_rate": 3.833104170516793e-06, + "loss": 0.3033, + "step": 29359 + }, + { + "epoch": 0.5877436628881716, + "grad_norm": 1.0568809509277344, + "learning_rate": 3.832788942016689e-06, + "loss": 0.2934, + "step": 29360 + }, + { + "epoch": 0.5877636814052999, + "grad_norm": 1.191080927848816, + "learning_rate": 3.8324737184232885e-06, + "loss": 0.329, + "step": 29361 + }, + { + "epoch": 0.5877836999224283, + "grad_norm": 1.0685665607452393, + "learning_rate": 3.832158499737918e-06, + "loss": 0.3285, + "step": 29362 + }, + { + "epoch": 0.5878037184395566, + "grad_norm": 1.144777774810791, + "learning_rate": 3.831843285961902e-06, + "loss": 0.2964, + "step": 29363 + }, + { + "epoch": 0.5878237369566849, + "grad_norm": 1.2552704811096191, + "learning_rate": 3.831528077096566e-06, + "loss": 0.3259, + "step": 29364 + }, + { + "epoch": 0.5878437554738133, + "grad_norm": 1.1944706439971924, + "learning_rate": 3.831212873143232e-06, + "loss": 0.2721, + "step": 29365 + }, + { + "epoch": 0.5878637739909416, + "grad_norm": 0.9392929077148438, + "learning_rate": 3.83089767410323e-06, + "loss": 0.2579, + "step": 29366 + }, + { + "epoch": 0.58788379250807, + "grad_norm": 1.1669431924819946, + "learning_rate": 3.8305824799778825e-06, + "loss": 0.3183, + "step": 29367 + }, + { + "epoch": 0.5879038110251983, + "grad_norm": 1.081690788269043, + "learning_rate": 3.830267290768513e-06, + "loss": 0.2673, + "step": 29368 + }, + { + "epoch": 0.5879238295423267, + "grad_norm": 1.164555549621582, + "learning_rate": 3.82995210647645e-06, + "loss": 0.3271, + "step": 29369 + }, + { + "epoch": 0.587943848059455, + "grad_norm": 1.1504231691360474, + "learning_rate": 3.829636927103014e-06, + "loss": 0.2902, + "step": 29370 + }, + { + "epoch": 0.5879638665765834, + "grad_norm": 1.2416093349456787, + "learning_rate": 3.829321752649535e-06, + "loss": 0.3003, + "step": 29371 + }, + { + "epoch": 0.5879838850937117, + "grad_norm": 1.980712652206421, + "learning_rate": 3.829006583117335e-06, + "loss": 0.7597, + "step": 29372 + }, + { + "epoch": 0.58800390361084, + "grad_norm": 1.1535406112670898, + "learning_rate": 3.828691418507738e-06, + "loss": 0.3106, + "step": 29373 + }, + { + "epoch": 0.5880239221279684, + "grad_norm": 1.2828314304351807, + "learning_rate": 3.828376258822071e-06, + "loss": 0.2869, + "step": 29374 + }, + { + "epoch": 0.5880439406450967, + "grad_norm": 1.1080142259597778, + "learning_rate": 3.828061104061657e-06, + "loss": 0.3035, + "step": 29375 + }, + { + "epoch": 0.5880639591622251, + "grad_norm": 1.1692044734954834, + "learning_rate": 3.827745954227824e-06, + "loss": 0.294, + "step": 29376 + }, + { + "epoch": 0.5880839776793534, + "grad_norm": 1.278417706489563, + "learning_rate": 3.827430809321895e-06, + "loss": 0.3397, + "step": 29377 + }, + { + "epoch": 0.5881039961964818, + "grad_norm": 1.114133596420288, + "learning_rate": 3.827115669345193e-06, + "loss": 0.3403, + "step": 29378 + }, + { + "epoch": 0.5881240147136101, + "grad_norm": 2.057996988296509, + "learning_rate": 3.826800534299043e-06, + "loss": 0.7742, + "step": 29379 + }, + { + "epoch": 0.5881440332307384, + "grad_norm": 1.1204103231430054, + "learning_rate": 3.826485404184774e-06, + "loss": 0.2817, + "step": 29380 + }, + { + "epoch": 0.5881640517478668, + "grad_norm": 1.3001688718795776, + "learning_rate": 3.826170279003706e-06, + "loss": 0.256, + "step": 29381 + }, + { + "epoch": 0.5881840702649951, + "grad_norm": 1.120333194732666, + "learning_rate": 3.825855158757166e-06, + "loss": 0.2778, + "step": 29382 + }, + { + "epoch": 0.5882040887821235, + "grad_norm": 1.2730520963668823, + "learning_rate": 3.8255400434464776e-06, + "loss": 0.3083, + "step": 29383 + }, + { + "epoch": 0.5882241072992518, + "grad_norm": 1.1393964290618896, + "learning_rate": 3.825224933072967e-06, + "loss": 0.2825, + "step": 29384 + }, + { + "epoch": 0.5882441258163802, + "grad_norm": 1.1791349649429321, + "learning_rate": 3.824909827637958e-06, + "loss": 0.3069, + "step": 29385 + }, + { + "epoch": 0.5882641443335085, + "grad_norm": 1.1721553802490234, + "learning_rate": 3.824594727142775e-06, + "loss": 0.373, + "step": 29386 + }, + { + "epoch": 0.5882841628506369, + "grad_norm": 0.9939619302749634, + "learning_rate": 3.824279631588743e-06, + "loss": 0.2673, + "step": 29387 + }, + { + "epoch": 0.5883041813677652, + "grad_norm": 1.0159683227539062, + "learning_rate": 3.8239645409771845e-06, + "loss": 0.2939, + "step": 29388 + }, + { + "epoch": 0.5883241998848935, + "grad_norm": 1.148986577987671, + "learning_rate": 3.823649455309428e-06, + "loss": 0.2804, + "step": 29389 + }, + { + "epoch": 0.5883442184020219, + "grad_norm": 1.884033203125, + "learning_rate": 3.823334374586797e-06, + "loss": 0.8677, + "step": 29390 + }, + { + "epoch": 0.5883642369191502, + "grad_norm": 1.159652590751648, + "learning_rate": 3.823019298810614e-06, + "loss": 0.312, + "step": 29391 + }, + { + "epoch": 0.5883842554362786, + "grad_norm": 0.9836158156394958, + "learning_rate": 3.822704227982203e-06, + "loss": 0.3017, + "step": 29392 + }, + { + "epoch": 0.5884042739534069, + "grad_norm": 1.2053741216659546, + "learning_rate": 3.822389162102892e-06, + "loss": 0.3526, + "step": 29393 + }, + { + "epoch": 0.5884242924705353, + "grad_norm": 1.213855504989624, + "learning_rate": 3.822074101174003e-06, + "loss": 0.2784, + "step": 29394 + }, + { + "epoch": 0.5884443109876636, + "grad_norm": 1.155868411064148, + "learning_rate": 3.8217590451968614e-06, + "loss": 0.2943, + "step": 29395 + }, + { + "epoch": 0.5884643295047919, + "grad_norm": 2.0051753520965576, + "learning_rate": 3.8214439941727885e-06, + "loss": 0.8194, + "step": 29396 + }, + { + "epoch": 0.5884843480219203, + "grad_norm": 1.1621241569519043, + "learning_rate": 3.821128948103115e-06, + "loss": 0.315, + "step": 29397 + }, + { + "epoch": 0.5885043665390486, + "grad_norm": 1.1242530345916748, + "learning_rate": 3.820813906989161e-06, + "loss": 0.2983, + "step": 29398 + }, + { + "epoch": 0.588524385056177, + "grad_norm": 1.968748688697815, + "learning_rate": 3.82049887083225e-06, + "loss": 0.7713, + "step": 29399 + }, + { + "epoch": 0.5885444035733053, + "grad_norm": 1.1225892305374146, + "learning_rate": 3.82018383963371e-06, + "loss": 0.2735, + "step": 29400 + }, + { + "epoch": 0.5885644220904337, + "grad_norm": 1.1121567487716675, + "learning_rate": 3.81986881339486e-06, + "loss": 0.3164, + "step": 29401 + }, + { + "epoch": 0.588584440607562, + "grad_norm": 1.2125035524368286, + "learning_rate": 3.81955379211703e-06, + "loss": 0.391, + "step": 29402 + }, + { + "epoch": 0.5886044591246904, + "grad_norm": 1.2459006309509277, + "learning_rate": 3.8192387758015426e-06, + "loss": 0.3369, + "step": 29403 + }, + { + "epoch": 0.5886244776418187, + "grad_norm": 1.1333197355270386, + "learning_rate": 3.818923764449721e-06, + "loss": 0.2687, + "step": 29404 + }, + { + "epoch": 0.588644496158947, + "grad_norm": 1.2568360567092896, + "learning_rate": 3.818608758062887e-06, + "loss": 0.3366, + "step": 29405 + }, + { + "epoch": 0.5886645146760754, + "grad_norm": 1.4031175374984741, + "learning_rate": 3.8182937566423705e-06, + "loss": 0.3309, + "step": 29406 + }, + { + "epoch": 0.5886845331932037, + "grad_norm": 1.8985111713409424, + "learning_rate": 3.817978760189491e-06, + "loss": 0.7522, + "step": 29407 + }, + { + "epoch": 0.5887045517103321, + "grad_norm": 1.178817629814148, + "learning_rate": 3.817663768705576e-06, + "loss": 0.332, + "step": 29408 + }, + { + "epoch": 0.5887245702274604, + "grad_norm": 1.2671302556991577, + "learning_rate": 3.817348782191948e-06, + "loss": 0.3072, + "step": 29409 + }, + { + "epoch": 0.5887445887445888, + "grad_norm": 0.9443565607070923, + "learning_rate": 3.817033800649928e-06, + "loss": 0.241, + "step": 29410 + }, + { + "epoch": 0.5887646072617171, + "grad_norm": 1.1707117557525635, + "learning_rate": 3.816718824080847e-06, + "loss": 0.303, + "step": 29411 + }, + { + "epoch": 0.5887846257788454, + "grad_norm": 1.0646073818206787, + "learning_rate": 3.816403852486023e-06, + "loss": 0.3184, + "step": 29412 + }, + { + "epoch": 0.5888046442959738, + "grad_norm": 1.174160122871399, + "learning_rate": 3.816088885866784e-06, + "loss": 0.3756, + "step": 29413 + }, + { + "epoch": 0.5888246628131021, + "grad_norm": 1.151068925857544, + "learning_rate": 3.815773924224451e-06, + "loss": 0.2708, + "step": 29414 + }, + { + "epoch": 0.5888446813302305, + "grad_norm": 1.0576171875, + "learning_rate": 3.815458967560351e-06, + "loss": 0.3057, + "step": 29415 + }, + { + "epoch": 0.5888646998473588, + "grad_norm": 1.9623876810073853, + "learning_rate": 3.815144015875808e-06, + "loss": 0.7016, + "step": 29416 + }, + { + "epoch": 0.5888847183644872, + "grad_norm": 1.0392990112304688, + "learning_rate": 3.814829069172143e-06, + "loss": 0.3294, + "step": 29417 + }, + { + "epoch": 0.5889047368816155, + "grad_norm": 1.1061261892318726, + "learning_rate": 3.814514127450681e-06, + "loss": 0.2679, + "step": 29418 + }, + { + "epoch": 0.5889247553987439, + "grad_norm": 1.0490516424179077, + "learning_rate": 3.814199190712746e-06, + "loss": 0.3035, + "step": 29419 + }, + { + "epoch": 0.5889447739158722, + "grad_norm": 1.1908867359161377, + "learning_rate": 3.8138842589596632e-06, + "loss": 0.3113, + "step": 29420 + }, + { + "epoch": 0.5889647924330005, + "grad_norm": 1.1521703004837036, + "learning_rate": 3.8135693321927563e-06, + "loss": 0.2778, + "step": 29421 + }, + { + "epoch": 0.5889848109501289, + "grad_norm": 1.0969105958938599, + "learning_rate": 3.8132544104133483e-06, + "loss": 0.2873, + "step": 29422 + }, + { + "epoch": 0.5890048294672572, + "grad_norm": 1.1535037755966187, + "learning_rate": 3.812939493622762e-06, + "loss": 0.3025, + "step": 29423 + }, + { + "epoch": 0.5890248479843856, + "grad_norm": 1.1852881908416748, + "learning_rate": 3.8126245818223244e-06, + "loss": 0.353, + "step": 29424 + }, + { + "epoch": 0.5890448665015139, + "grad_norm": 1.1499698162078857, + "learning_rate": 3.8123096750133564e-06, + "loss": 0.2772, + "step": 29425 + }, + { + "epoch": 0.5890648850186423, + "grad_norm": 1.1238863468170166, + "learning_rate": 3.811994773197184e-06, + "loss": 0.3025, + "step": 29426 + }, + { + "epoch": 0.5890849035357706, + "grad_norm": 1.1940761804580688, + "learning_rate": 3.8116798763751286e-06, + "loss": 0.3359, + "step": 29427 + }, + { + "epoch": 0.5891049220528989, + "grad_norm": 1.982344388961792, + "learning_rate": 3.811364984548516e-06, + "loss": 0.7041, + "step": 29428 + }, + { + "epoch": 0.5891249405700273, + "grad_norm": 1.2202458381652832, + "learning_rate": 3.8110500977186706e-06, + "loss": 0.2938, + "step": 29429 + }, + { + "epoch": 0.5891449590871556, + "grad_norm": 1.0020854473114014, + "learning_rate": 3.810735215886914e-06, + "loss": 0.3265, + "step": 29430 + }, + { + "epoch": 0.589164977604284, + "grad_norm": 1.0475690364837646, + "learning_rate": 3.810420339054569e-06, + "loss": 0.3179, + "step": 29431 + }, + { + "epoch": 0.5891849961214123, + "grad_norm": 1.1685124635696411, + "learning_rate": 3.8101054672229613e-06, + "loss": 0.3076, + "step": 29432 + }, + { + "epoch": 0.5892050146385407, + "grad_norm": 1.191921353340149, + "learning_rate": 3.8097906003934148e-06, + "loss": 0.3673, + "step": 29433 + }, + { + "epoch": 0.589225033155669, + "grad_norm": 1.3115839958190918, + "learning_rate": 3.8094757385672533e-06, + "loss": 0.3327, + "step": 29434 + }, + { + "epoch": 0.5892450516727974, + "grad_norm": 1.0641433000564575, + "learning_rate": 3.809160881745799e-06, + "loss": 0.3084, + "step": 29435 + }, + { + "epoch": 0.5892650701899257, + "grad_norm": 1.0461907386779785, + "learning_rate": 3.808846029930374e-06, + "loss": 0.2902, + "step": 29436 + }, + { + "epoch": 0.589285088707054, + "grad_norm": 1.1435269117355347, + "learning_rate": 3.8085311831223067e-06, + "loss": 0.3089, + "step": 29437 + }, + { + "epoch": 0.5893051072241824, + "grad_norm": 1.1382097005844116, + "learning_rate": 3.8082163413229166e-06, + "loss": 0.2805, + "step": 29438 + }, + { + "epoch": 0.5893251257413107, + "grad_norm": 1.0414398908615112, + "learning_rate": 3.8079015045335295e-06, + "loss": 0.3138, + "step": 29439 + }, + { + "epoch": 0.5893451442584391, + "grad_norm": 1.0758305788040161, + "learning_rate": 3.807586672755468e-06, + "loss": 0.3268, + "step": 29440 + }, + { + "epoch": 0.5893651627755674, + "grad_norm": 1.2186005115509033, + "learning_rate": 3.807271845990053e-06, + "loss": 0.3297, + "step": 29441 + }, + { + "epoch": 0.5893851812926958, + "grad_norm": 1.1052716970443726, + "learning_rate": 3.8069570242386135e-06, + "loss": 0.2867, + "step": 29442 + }, + { + "epoch": 0.5894051998098241, + "grad_norm": 1.1610769033432007, + "learning_rate": 3.806642207502469e-06, + "loss": 0.313, + "step": 29443 + }, + { + "epoch": 0.5894252183269524, + "grad_norm": 1.0639123916625977, + "learning_rate": 3.806327395782943e-06, + "loss": 0.3153, + "step": 29444 + }, + { + "epoch": 0.5894452368440808, + "grad_norm": 1.0928008556365967, + "learning_rate": 3.80601258908136e-06, + "loss": 0.3226, + "step": 29445 + }, + { + "epoch": 0.5894652553612091, + "grad_norm": 1.1288529634475708, + "learning_rate": 3.805697787399043e-06, + "loss": 0.2859, + "step": 29446 + }, + { + "epoch": 0.5894852738783375, + "grad_norm": 1.1129281520843506, + "learning_rate": 3.8053829907373163e-06, + "loss": 0.3, + "step": 29447 + }, + { + "epoch": 0.5895052923954658, + "grad_norm": 1.0172646045684814, + "learning_rate": 3.805068199097503e-06, + "loss": 0.2721, + "step": 29448 + }, + { + "epoch": 0.5895253109125942, + "grad_norm": 1.061676263809204, + "learning_rate": 3.8047534124809232e-06, + "loss": 0.2876, + "step": 29449 + }, + { + "epoch": 0.5895453294297225, + "grad_norm": 1.2670226097106934, + "learning_rate": 3.8044386308889053e-06, + "loss": 0.2611, + "step": 29450 + }, + { + "epoch": 0.5895653479468508, + "grad_norm": 1.1077759265899658, + "learning_rate": 3.8041238543227687e-06, + "loss": 0.3045, + "step": 29451 + }, + { + "epoch": 0.5895853664639792, + "grad_norm": 0.9888092875480652, + "learning_rate": 3.8038090827838392e-06, + "loss": 0.2921, + "step": 29452 + }, + { + "epoch": 0.5896053849811075, + "grad_norm": 1.148556113243103, + "learning_rate": 3.8034943162734394e-06, + "loss": 0.317, + "step": 29453 + }, + { + "epoch": 0.5896254034982359, + "grad_norm": 1.1039140224456787, + "learning_rate": 3.8031795547928898e-06, + "loss": 0.3132, + "step": 29454 + }, + { + "epoch": 0.5896454220153642, + "grad_norm": 1.0734872817993164, + "learning_rate": 3.8028647983435172e-06, + "loss": 0.2841, + "step": 29455 + }, + { + "epoch": 0.5896654405324926, + "grad_norm": 1.1972882747650146, + "learning_rate": 3.802550046926644e-06, + "loss": 0.3156, + "step": 29456 + }, + { + "epoch": 0.5896854590496209, + "grad_norm": 1.1056073904037476, + "learning_rate": 3.8022353005435914e-06, + "loss": 0.2934, + "step": 29457 + }, + { + "epoch": 0.5897054775667493, + "grad_norm": 1.8127930164337158, + "learning_rate": 3.8019205591956838e-06, + "loss": 0.7543, + "step": 29458 + }, + { + "epoch": 0.5897254960838776, + "grad_norm": 1.0383540391921997, + "learning_rate": 3.8016058228842447e-06, + "loss": 0.3146, + "step": 29459 + }, + { + "epoch": 0.5897455146010059, + "grad_norm": 0.9875364303588867, + "learning_rate": 3.801291091610597e-06, + "loss": 0.2673, + "step": 29460 + }, + { + "epoch": 0.5897655331181343, + "grad_norm": 1.2630606889724731, + "learning_rate": 3.8009763653760647e-06, + "loss": 0.3087, + "step": 29461 + }, + { + "epoch": 0.5897855516352626, + "grad_norm": 1.1190537214279175, + "learning_rate": 3.800661644181969e-06, + "loss": 0.291, + "step": 29462 + }, + { + "epoch": 0.589805570152391, + "grad_norm": 1.1489307880401611, + "learning_rate": 3.800346928029631e-06, + "loss": 0.3036, + "step": 29463 + }, + { + "epoch": 0.5898255886695193, + "grad_norm": 1.0415860414505005, + "learning_rate": 3.8000322169203784e-06, + "loss": 0.3337, + "step": 29464 + }, + { + "epoch": 0.5898456071866477, + "grad_norm": 1.983650803565979, + "learning_rate": 3.799717510855533e-06, + "loss": 0.8188, + "step": 29465 + }, + { + "epoch": 0.589865625703776, + "grad_norm": 0.9961115121841431, + "learning_rate": 3.799402809836417e-06, + "loss": 0.2666, + "step": 29466 + }, + { + "epoch": 0.5898856442209043, + "grad_norm": 1.1842057704925537, + "learning_rate": 3.799088113864351e-06, + "loss": 0.3189, + "step": 29467 + }, + { + "epoch": 0.5899056627380327, + "grad_norm": 1.0280178785324097, + "learning_rate": 3.798773422940662e-06, + "loss": 0.2943, + "step": 29468 + }, + { + "epoch": 0.589925681255161, + "grad_norm": 1.0304155349731445, + "learning_rate": 3.7984587370666715e-06, + "loss": 0.2948, + "step": 29469 + }, + { + "epoch": 0.5899456997722894, + "grad_norm": 1.2657853364944458, + "learning_rate": 3.7981440562437e-06, + "loss": 0.2686, + "step": 29470 + }, + { + "epoch": 0.5899657182894177, + "grad_norm": 1.9179083108901978, + "learning_rate": 3.7978293804730736e-06, + "loss": 0.8252, + "step": 29471 + }, + { + "epoch": 0.5899857368065461, + "grad_norm": 1.007714867591858, + "learning_rate": 3.797514709756112e-06, + "loss": 0.2686, + "step": 29472 + }, + { + "epoch": 0.5900057553236744, + "grad_norm": 1.2302000522613525, + "learning_rate": 3.797200044094142e-06, + "loss": 0.3367, + "step": 29473 + }, + { + "epoch": 0.5900257738408028, + "grad_norm": 1.1221383810043335, + "learning_rate": 3.796885383488484e-06, + "loss": 0.3097, + "step": 29474 + }, + { + "epoch": 0.5900457923579311, + "grad_norm": 1.097917079925537, + "learning_rate": 3.7965707279404607e-06, + "loss": 0.3385, + "step": 29475 + }, + { + "epoch": 0.5900658108750594, + "grad_norm": 1.877803087234497, + "learning_rate": 3.7962560774513934e-06, + "loss": 0.7954, + "step": 29476 + }, + { + "epoch": 0.5900858293921878, + "grad_norm": 1.0160335302352905, + "learning_rate": 3.795941432022607e-06, + "loss": 0.2886, + "step": 29477 + }, + { + "epoch": 0.5901058479093161, + "grad_norm": 1.094090461730957, + "learning_rate": 3.7956267916554252e-06, + "loss": 0.2819, + "step": 29478 + }, + { + "epoch": 0.5901258664264445, + "grad_norm": 1.0022320747375488, + "learning_rate": 3.7953121563511692e-06, + "loss": 0.2773, + "step": 29479 + }, + { + "epoch": 0.5901458849435728, + "grad_norm": 0.9776659607887268, + "learning_rate": 3.7949975261111593e-06, + "loss": 0.3001, + "step": 29480 + }, + { + "epoch": 0.5901659034607012, + "grad_norm": 1.0299879312515259, + "learning_rate": 3.7946829009367224e-06, + "loss": 0.3125, + "step": 29481 + }, + { + "epoch": 0.5901859219778295, + "grad_norm": 1.0503534078598022, + "learning_rate": 3.7943682808291794e-06, + "loss": 0.33, + "step": 29482 + }, + { + "epoch": 0.5902059404949578, + "grad_norm": 1.2422337532043457, + "learning_rate": 3.794053665789852e-06, + "loss": 0.3125, + "step": 29483 + }, + { + "epoch": 0.5902259590120862, + "grad_norm": 1.1737720966339111, + "learning_rate": 3.793739055820064e-06, + "loss": 0.3368, + "step": 29484 + }, + { + "epoch": 0.5902459775292145, + "grad_norm": 1.0647608041763306, + "learning_rate": 3.7934244509211356e-06, + "loss": 0.3567, + "step": 29485 + }, + { + "epoch": 0.5902659960463429, + "grad_norm": 1.0198547840118408, + "learning_rate": 3.793109851094393e-06, + "loss": 0.3434, + "step": 29486 + }, + { + "epoch": 0.5902860145634712, + "grad_norm": 1.878690242767334, + "learning_rate": 3.7927952563411574e-06, + "loss": 0.7512, + "step": 29487 + }, + { + "epoch": 0.5903060330805996, + "grad_norm": 1.027559518814087, + "learning_rate": 3.79248066666275e-06, + "loss": 0.2729, + "step": 29488 + }, + { + "epoch": 0.5903260515977279, + "grad_norm": 1.1279550790786743, + "learning_rate": 3.792166082060492e-06, + "loss": 0.3209, + "step": 29489 + }, + { + "epoch": 0.5903460701148563, + "grad_norm": 1.731321930885315, + "learning_rate": 3.7918515025357095e-06, + "loss": 0.7557, + "step": 29490 + }, + { + "epoch": 0.5903660886319846, + "grad_norm": 1.1055935621261597, + "learning_rate": 3.7915369280897242e-06, + "loss": 0.3534, + "step": 29491 + }, + { + "epoch": 0.5903861071491129, + "grad_norm": 1.066396713256836, + "learning_rate": 3.7912223587238572e-06, + "loss": 0.3168, + "step": 29492 + }, + { + "epoch": 0.5904061256662413, + "grad_norm": 1.1317598819732666, + "learning_rate": 3.790907794439432e-06, + "loss": 0.2905, + "step": 29493 + }, + { + "epoch": 0.5904261441833696, + "grad_norm": 1.0153543949127197, + "learning_rate": 3.790593235237767e-06, + "loss": 0.2763, + "step": 29494 + }, + { + "epoch": 0.590446162700498, + "grad_norm": 1.8726252317428589, + "learning_rate": 3.790278681120191e-06, + "loss": 0.7557, + "step": 29495 + }, + { + "epoch": 0.5904661812176263, + "grad_norm": 1.1519261598587036, + "learning_rate": 3.7899641320880216e-06, + "loss": 0.2911, + "step": 29496 + }, + { + "epoch": 0.5904861997347547, + "grad_norm": 1.136979341506958, + "learning_rate": 3.789649588142583e-06, + "loss": 0.2914, + "step": 29497 + }, + { + "epoch": 0.590506218251883, + "grad_norm": 1.1099203824996948, + "learning_rate": 3.789335049285196e-06, + "loss": 0.2626, + "step": 29498 + }, + { + "epoch": 0.5905262367690113, + "grad_norm": 1.1396092176437378, + "learning_rate": 3.7890205155171857e-06, + "loss": 0.3455, + "step": 29499 + }, + { + "epoch": 0.5905462552861397, + "grad_norm": 1.202996850013733, + "learning_rate": 3.7887059868398723e-06, + "loss": 0.306, + "step": 29500 + }, + { + "epoch": 0.590566273803268, + "grad_norm": 1.063757300376892, + "learning_rate": 3.7883914632545784e-06, + "loss": 0.2852, + "step": 29501 + }, + { + "epoch": 0.5905862923203964, + "grad_norm": 1.2861019372940063, + "learning_rate": 3.7880769447626253e-06, + "loss": 0.3413, + "step": 29502 + }, + { + "epoch": 0.5906063108375247, + "grad_norm": 1.0818268060684204, + "learning_rate": 3.7877624313653344e-06, + "loss": 0.3004, + "step": 29503 + }, + { + "epoch": 0.5906263293546531, + "grad_norm": 1.1892839670181274, + "learning_rate": 3.7874479230640317e-06, + "loss": 0.3358, + "step": 29504 + }, + { + "epoch": 0.5906463478717814, + "grad_norm": 1.9885900020599365, + "learning_rate": 3.787133419860037e-06, + "loss": 0.6977, + "step": 29505 + }, + { + "epoch": 0.5906663663889098, + "grad_norm": 1.0409669876098633, + "learning_rate": 3.786818921754672e-06, + "loss": 0.2634, + "step": 29506 + }, + { + "epoch": 0.5906863849060381, + "grad_norm": 1.0831712484359741, + "learning_rate": 3.786504428749257e-06, + "loss": 0.3002, + "step": 29507 + }, + { + "epoch": 0.5907064034231664, + "grad_norm": 1.1453158855438232, + "learning_rate": 3.7861899408451184e-06, + "loss": 0.3177, + "step": 29508 + }, + { + "epoch": 0.5907264219402948, + "grad_norm": 1.1760560274124146, + "learning_rate": 3.7858754580435758e-06, + "loss": 0.3194, + "step": 29509 + }, + { + "epoch": 0.5907464404574231, + "grad_norm": 1.0685631036758423, + "learning_rate": 3.785560980345952e-06, + "loss": 0.3069, + "step": 29510 + }, + { + "epoch": 0.5907664589745515, + "grad_norm": 1.1399391889572144, + "learning_rate": 3.7852465077535667e-06, + "loss": 0.308, + "step": 29511 + }, + { + "epoch": 0.5907864774916798, + "grad_norm": 1.238093614578247, + "learning_rate": 3.784932040267745e-06, + "loss": 0.3439, + "step": 29512 + }, + { + "epoch": 0.5908064960088082, + "grad_norm": 1.213453769683838, + "learning_rate": 3.784617577889809e-06, + "loss": 0.3477, + "step": 29513 + }, + { + "epoch": 0.5908265145259365, + "grad_norm": 1.0820019245147705, + "learning_rate": 3.784303120621078e-06, + "loss": 0.2763, + "step": 29514 + }, + { + "epoch": 0.5908465330430648, + "grad_norm": 1.125976800918579, + "learning_rate": 3.7839886684628735e-06, + "loss": 0.2982, + "step": 29515 + }, + { + "epoch": 0.5908665515601932, + "grad_norm": 1.070248007774353, + "learning_rate": 3.783674221416519e-06, + "loss": 0.3233, + "step": 29516 + }, + { + "epoch": 0.5908865700773215, + "grad_norm": 1.1479990482330322, + "learning_rate": 3.7833597794833377e-06, + "loss": 0.3025, + "step": 29517 + }, + { + "epoch": 0.5909065885944499, + "grad_norm": 1.2400106191635132, + "learning_rate": 3.7830453426646506e-06, + "loss": 0.3277, + "step": 29518 + }, + { + "epoch": 0.5909266071115782, + "grad_norm": 1.040568232536316, + "learning_rate": 3.7827309109617787e-06, + "loss": 0.281, + "step": 29519 + }, + { + "epoch": 0.5909466256287066, + "grad_norm": 1.0022072792053223, + "learning_rate": 3.782416484376042e-06, + "loss": 0.2769, + "step": 29520 + }, + { + "epoch": 0.5909666441458349, + "grad_norm": 1.1496304273605347, + "learning_rate": 3.7821020629087662e-06, + "loss": 0.304, + "step": 29521 + }, + { + "epoch": 0.5909866626629633, + "grad_norm": 1.1425795555114746, + "learning_rate": 3.7817876465612705e-06, + "loss": 0.3129, + "step": 29522 + }, + { + "epoch": 0.5910066811800916, + "grad_norm": 1.0835695266723633, + "learning_rate": 3.781473235334879e-06, + "loss": 0.3061, + "step": 29523 + }, + { + "epoch": 0.5910266996972199, + "grad_norm": 1.069492220878601, + "learning_rate": 3.781158829230911e-06, + "loss": 0.3046, + "step": 29524 + }, + { + "epoch": 0.5910467182143483, + "grad_norm": 1.9972901344299316, + "learning_rate": 3.7808444282506867e-06, + "loss": 0.6964, + "step": 29525 + }, + { + "epoch": 0.5910667367314766, + "grad_norm": 1.1729637384414673, + "learning_rate": 3.7805300323955328e-06, + "loss": 0.3163, + "step": 29526 + }, + { + "epoch": 0.591086755248605, + "grad_norm": 1.18313729763031, + "learning_rate": 3.780215641666768e-06, + "loss": 0.3364, + "step": 29527 + }, + { + "epoch": 0.5911067737657333, + "grad_norm": 1.1682195663452148, + "learning_rate": 3.779901256065713e-06, + "loss": 0.3456, + "step": 29528 + }, + { + "epoch": 0.5911267922828617, + "grad_norm": 1.1256146430969238, + "learning_rate": 3.7795868755936893e-06, + "loss": 0.309, + "step": 29529 + }, + { + "epoch": 0.59114681079999, + "grad_norm": 1.2158548831939697, + "learning_rate": 3.7792725002520225e-06, + "loss": 0.3129, + "step": 29530 + }, + { + "epoch": 0.5911668293171183, + "grad_norm": 2.072498083114624, + "learning_rate": 3.7789581300420307e-06, + "loss": 0.7482, + "step": 29531 + }, + { + "epoch": 0.5911868478342467, + "grad_norm": 1.1104438304901123, + "learning_rate": 3.7786437649650366e-06, + "loss": 0.2991, + "step": 29532 + }, + { + "epoch": 0.591206866351375, + "grad_norm": 1.1213815212249756, + "learning_rate": 3.77832940502236e-06, + "loss": 0.2643, + "step": 29533 + }, + { + "epoch": 0.5912268848685034, + "grad_norm": 0.9163010120391846, + "learning_rate": 3.778015050215324e-06, + "loss": 0.2583, + "step": 29534 + }, + { + "epoch": 0.5912469033856317, + "grad_norm": 1.9748057126998901, + "learning_rate": 3.7777007005452494e-06, + "loss": 0.7549, + "step": 29535 + }, + { + "epoch": 0.5912669219027601, + "grad_norm": 1.9104479551315308, + "learning_rate": 3.7773863560134594e-06, + "loss": 0.7648, + "step": 29536 + }, + { + "epoch": 0.5912869404198884, + "grad_norm": 1.049646258354187, + "learning_rate": 3.7770720166212736e-06, + "loss": 0.2738, + "step": 29537 + }, + { + "epoch": 0.5913069589370168, + "grad_norm": 1.0780812501907349, + "learning_rate": 3.776757682370012e-06, + "loss": 0.3143, + "step": 29538 + }, + { + "epoch": 0.5913269774541451, + "grad_norm": 1.104286789894104, + "learning_rate": 3.776443353261e-06, + "loss": 0.31, + "step": 29539 + }, + { + "epoch": 0.5913469959712734, + "grad_norm": 1.0106052160263062, + "learning_rate": 3.776129029295557e-06, + "loss": 0.2749, + "step": 29540 + }, + { + "epoch": 0.5913670144884018, + "grad_norm": 1.079361081123352, + "learning_rate": 3.7758147104750026e-06, + "loss": 0.3394, + "step": 29541 + }, + { + "epoch": 0.5913870330055301, + "grad_norm": 1.0146617889404297, + "learning_rate": 3.7755003968006586e-06, + "loss": 0.2635, + "step": 29542 + }, + { + "epoch": 0.5914070515226585, + "grad_norm": 1.0318856239318848, + "learning_rate": 3.7751860882738502e-06, + "loss": 0.2735, + "step": 29543 + }, + { + "epoch": 0.5914270700397868, + "grad_norm": 1.1860592365264893, + "learning_rate": 3.7748717848958955e-06, + "loss": 0.3325, + "step": 29544 + }, + { + "epoch": 0.5914470885569152, + "grad_norm": 1.702692985534668, + "learning_rate": 3.7745574866681163e-06, + "loss": 0.8152, + "step": 29545 + }, + { + "epoch": 0.5914671070740435, + "grad_norm": 1.2987372875213623, + "learning_rate": 3.774243193591832e-06, + "loss": 0.3316, + "step": 29546 + }, + { + "epoch": 0.5914871255911718, + "grad_norm": 1.081620216369629, + "learning_rate": 3.7739289056683663e-06, + "loss": 0.3002, + "step": 29547 + }, + { + "epoch": 0.5915071441083002, + "grad_norm": 1.882901668548584, + "learning_rate": 3.77361462289904e-06, + "loss": 0.7387, + "step": 29548 + }, + { + "epoch": 0.5915271626254285, + "grad_norm": 1.1173876523971558, + "learning_rate": 3.773300345285174e-06, + "loss": 0.2852, + "step": 29549 + }, + { + "epoch": 0.5915471811425569, + "grad_norm": 1.007782220840454, + "learning_rate": 3.77298607282809e-06, + "loss": 0.3302, + "step": 29550 + }, + { + "epoch": 0.5915671996596852, + "grad_norm": 1.1274645328521729, + "learning_rate": 3.7726718055291063e-06, + "loss": 0.3373, + "step": 29551 + }, + { + "epoch": 0.5915872181768136, + "grad_norm": 1.8368028402328491, + "learning_rate": 3.7723575433895482e-06, + "loss": 0.7494, + "step": 29552 + }, + { + "epoch": 0.5916072366939419, + "grad_norm": 1.057665228843689, + "learning_rate": 3.772043286410735e-06, + "loss": 0.2879, + "step": 29553 + }, + { + "epoch": 0.5916272552110703, + "grad_norm": 1.1026360988616943, + "learning_rate": 3.771729034593986e-06, + "loss": 0.3027, + "step": 29554 + }, + { + "epoch": 0.5916472737281986, + "grad_norm": 1.011878252029419, + "learning_rate": 3.771414787940625e-06, + "loss": 0.3114, + "step": 29555 + }, + { + "epoch": 0.5916672922453269, + "grad_norm": 1.0739432573318481, + "learning_rate": 3.7711005464519703e-06, + "loss": 0.2887, + "step": 29556 + }, + { + "epoch": 0.5916873107624553, + "grad_norm": 1.1540944576263428, + "learning_rate": 3.770786310129346e-06, + "loss": 0.2792, + "step": 29557 + }, + { + "epoch": 0.5917073292795836, + "grad_norm": 2.0223000049591064, + "learning_rate": 3.7704720789740712e-06, + "loss": 0.723, + "step": 29558 + }, + { + "epoch": 0.591727347796712, + "grad_norm": 1.1146363019943237, + "learning_rate": 3.7701578529874667e-06, + "loss": 0.2775, + "step": 29559 + }, + { + "epoch": 0.5917473663138403, + "grad_norm": 1.2489136457443237, + "learning_rate": 3.7698436321708535e-06, + "loss": 0.2989, + "step": 29560 + }, + { + "epoch": 0.5917673848309687, + "grad_norm": 1.1050899028778076, + "learning_rate": 3.7695294165255535e-06, + "loss": 0.339, + "step": 29561 + }, + { + "epoch": 0.591787403348097, + "grad_norm": 1.091434121131897, + "learning_rate": 3.769215206052888e-06, + "loss": 0.2839, + "step": 29562 + }, + { + "epoch": 0.5918074218652253, + "grad_norm": 1.0759724378585815, + "learning_rate": 3.7689010007541764e-06, + "loss": 0.2734, + "step": 29563 + }, + { + "epoch": 0.5918274403823537, + "grad_norm": 1.1496796607971191, + "learning_rate": 3.7685868006307407e-06, + "loss": 0.2906, + "step": 29564 + }, + { + "epoch": 0.591847458899482, + "grad_norm": 1.3455251455307007, + "learning_rate": 3.7682726056838987e-06, + "loss": 0.2884, + "step": 29565 + }, + { + "epoch": 0.5918674774166104, + "grad_norm": 1.1242196559906006, + "learning_rate": 3.7679584159149758e-06, + "loss": 0.3197, + "step": 29566 + }, + { + "epoch": 0.5918874959337387, + "grad_norm": 1.150160551071167, + "learning_rate": 3.7676442313252892e-06, + "loss": 0.3209, + "step": 29567 + }, + { + "epoch": 0.5919075144508671, + "grad_norm": 1.113571047782898, + "learning_rate": 3.767330051916162e-06, + "loss": 0.2854, + "step": 29568 + }, + { + "epoch": 0.5919275329679954, + "grad_norm": 1.1729912757873535, + "learning_rate": 3.7670158776889122e-06, + "loss": 0.2563, + "step": 29569 + }, + { + "epoch": 0.5919475514851238, + "grad_norm": 1.2533135414123535, + "learning_rate": 3.766701708644864e-06, + "loss": 0.2949, + "step": 29570 + }, + { + "epoch": 0.5919675700022521, + "grad_norm": 1.8514580726623535, + "learning_rate": 3.766387544785337e-06, + "loss": 0.7232, + "step": 29571 + }, + { + "epoch": 0.5919875885193804, + "grad_norm": 1.0466861724853516, + "learning_rate": 3.7660733861116504e-06, + "loss": 0.284, + "step": 29572 + }, + { + "epoch": 0.5920076070365088, + "grad_norm": 1.042614459991455, + "learning_rate": 3.765759232625125e-06, + "loss": 0.2813, + "step": 29573 + }, + { + "epoch": 0.5920276255536371, + "grad_norm": 1.3108707666397095, + "learning_rate": 3.765445084327083e-06, + "loss": 0.3438, + "step": 29574 + }, + { + "epoch": 0.5920476440707655, + "grad_norm": 1.0708144903182983, + "learning_rate": 3.7651309412188446e-06, + "loss": 0.2436, + "step": 29575 + }, + { + "epoch": 0.5920676625878938, + "grad_norm": 1.0950425863265991, + "learning_rate": 3.76481680330173e-06, + "loss": 0.2934, + "step": 29576 + }, + { + "epoch": 0.5920876811050222, + "grad_norm": 1.2130036354064941, + "learning_rate": 3.7645026705770605e-06, + "loss": 0.2992, + "step": 29577 + }, + { + "epoch": 0.5921076996221505, + "grad_norm": 1.1601914167404175, + "learning_rate": 3.7641885430461526e-06, + "loss": 0.3281, + "step": 29578 + }, + { + "epoch": 0.5921277181392788, + "grad_norm": 1.2686909437179565, + "learning_rate": 3.763874420710333e-06, + "loss": 0.3304, + "step": 29579 + }, + { + "epoch": 0.5921477366564072, + "grad_norm": 1.137787103652954, + "learning_rate": 3.763560303570918e-06, + "loss": 0.3224, + "step": 29580 + }, + { + "epoch": 0.5921677551735355, + "grad_norm": 1.048010230064392, + "learning_rate": 3.7632461916292305e-06, + "loss": 0.2766, + "step": 29581 + }, + { + "epoch": 0.5921877736906639, + "grad_norm": 1.142487645149231, + "learning_rate": 3.762932084886588e-06, + "loss": 0.3496, + "step": 29582 + }, + { + "epoch": 0.5922077922077922, + "grad_norm": 1.0226414203643799, + "learning_rate": 3.7626179833443145e-06, + "loss": 0.2896, + "step": 29583 + }, + { + "epoch": 0.5922278107249206, + "grad_norm": 1.097314715385437, + "learning_rate": 3.762303887003729e-06, + "loss": 0.2861, + "step": 29584 + }, + { + "epoch": 0.5922478292420489, + "grad_norm": 1.040785551071167, + "learning_rate": 3.7619897958661506e-06, + "loss": 0.3022, + "step": 29585 + }, + { + "epoch": 0.5922678477591773, + "grad_norm": 1.239988088607788, + "learning_rate": 3.7616757099329015e-06, + "loss": 0.3779, + "step": 29586 + }, + { + "epoch": 0.5922878662763056, + "grad_norm": 1.0688475370407104, + "learning_rate": 3.761361629205299e-06, + "loss": 0.3549, + "step": 29587 + }, + { + "epoch": 0.5923078847934339, + "grad_norm": 1.2471765279769897, + "learning_rate": 3.761047553684668e-06, + "loss": 0.291, + "step": 29588 + }, + { + "epoch": 0.5923279033105623, + "grad_norm": 1.1142876148223877, + "learning_rate": 3.7607334833723263e-06, + "loss": 0.297, + "step": 29589 + }, + { + "epoch": 0.5923479218276906, + "grad_norm": 1.1284319162368774, + "learning_rate": 3.760419418269595e-06, + "loss": 0.3009, + "step": 29590 + }, + { + "epoch": 0.592367940344819, + "grad_norm": 1.140126347541809, + "learning_rate": 3.7601053583777906e-06, + "loss": 0.3198, + "step": 29591 + }, + { + "epoch": 0.5923879588619473, + "grad_norm": 1.1519968509674072, + "learning_rate": 3.759791303698239e-06, + "loss": 0.3051, + "step": 29592 + }, + { + "epoch": 0.5924079773790757, + "grad_norm": 1.0689884424209595, + "learning_rate": 3.7594772542322565e-06, + "loss": 0.2834, + "step": 29593 + }, + { + "epoch": 0.592427995896204, + "grad_norm": 1.1987794637680054, + "learning_rate": 3.7591632099811663e-06, + "loss": 0.2805, + "step": 29594 + }, + { + "epoch": 0.5924480144133323, + "grad_norm": 1.822715163230896, + "learning_rate": 3.758849170946285e-06, + "loss": 0.8065, + "step": 29595 + }, + { + "epoch": 0.5924680329304607, + "grad_norm": 1.0251966714859009, + "learning_rate": 3.7585351371289358e-06, + "loss": 0.3157, + "step": 29596 + }, + { + "epoch": 0.592488051447589, + "grad_norm": 1.930058240890503, + "learning_rate": 3.758221108530438e-06, + "loss": 0.7242, + "step": 29597 + }, + { + "epoch": 0.5925080699647174, + "grad_norm": 1.0640804767608643, + "learning_rate": 3.757907085152111e-06, + "loss": 0.2975, + "step": 29598 + }, + { + "epoch": 0.5925280884818457, + "grad_norm": 1.1109360456466675, + "learning_rate": 3.757593066995276e-06, + "loss": 0.3181, + "step": 29599 + }, + { + "epoch": 0.5925481069989741, + "grad_norm": 1.070544958114624, + "learning_rate": 3.7572790540612503e-06, + "loss": 0.3035, + "step": 29600 + }, + { + "epoch": 0.5925681255161024, + "grad_norm": 1.0236495733261108, + "learning_rate": 3.756965046351358e-06, + "loss": 0.2589, + "step": 29601 + }, + { + "epoch": 0.5925881440332308, + "grad_norm": 1.1022520065307617, + "learning_rate": 3.7566510438669174e-06, + "loss": 0.3567, + "step": 29602 + }, + { + "epoch": 0.5926081625503591, + "grad_norm": 1.115691900253296, + "learning_rate": 3.756337046609248e-06, + "loss": 0.308, + "step": 29603 + }, + { + "epoch": 0.5926281810674874, + "grad_norm": 1.1146684885025024, + "learning_rate": 3.7560230545796684e-06, + "loss": 0.2944, + "step": 29604 + }, + { + "epoch": 0.5926481995846158, + "grad_norm": 1.121899962425232, + "learning_rate": 3.755709067779502e-06, + "loss": 0.3223, + "step": 29605 + }, + { + "epoch": 0.5926682181017441, + "grad_norm": 1.0557913780212402, + "learning_rate": 3.7553950862100653e-06, + "loss": 0.3052, + "step": 29606 + }, + { + "epoch": 0.5926882366188725, + "grad_norm": 1.0549911260604858, + "learning_rate": 3.7550811098726813e-06, + "loss": 0.3102, + "step": 29607 + }, + { + "epoch": 0.5927082551360008, + "grad_norm": 1.2022534608840942, + "learning_rate": 3.754767138768668e-06, + "loss": 0.2902, + "step": 29608 + }, + { + "epoch": 0.5927282736531292, + "grad_norm": 1.0317168235778809, + "learning_rate": 3.7544531728993438e-06, + "loss": 0.2943, + "step": 29609 + }, + { + "epoch": 0.5927482921702575, + "grad_norm": 1.1467312574386597, + "learning_rate": 3.754139212266033e-06, + "loss": 0.2966, + "step": 29610 + }, + { + "epoch": 0.5927683106873858, + "grad_norm": 1.1412664651870728, + "learning_rate": 3.7538252568700516e-06, + "loss": 0.3277, + "step": 29611 + }, + { + "epoch": 0.5927883292045142, + "grad_norm": 1.0727965831756592, + "learning_rate": 3.7535113067127218e-06, + "loss": 0.2682, + "step": 29612 + }, + { + "epoch": 0.5928083477216425, + "grad_norm": 1.0832548141479492, + "learning_rate": 3.7531973617953597e-06, + "loss": 0.3351, + "step": 29613 + }, + { + "epoch": 0.5928283662387709, + "grad_norm": 1.2306716442108154, + "learning_rate": 3.75288342211929e-06, + "loss": 0.2681, + "step": 29614 + }, + { + "epoch": 0.5928483847558992, + "grad_norm": 1.0818617343902588, + "learning_rate": 3.7525694876858297e-06, + "loss": 0.3166, + "step": 29615 + }, + { + "epoch": 0.5928684032730276, + "grad_norm": 1.0414996147155762, + "learning_rate": 3.752255558496299e-06, + "loss": 0.2855, + "step": 29616 + }, + { + "epoch": 0.5928884217901559, + "grad_norm": 1.1023401021957397, + "learning_rate": 3.7519416345520165e-06, + "loss": 0.2466, + "step": 29617 + }, + { + "epoch": 0.5929084403072843, + "grad_norm": 1.0267720222473145, + "learning_rate": 3.7516277158543022e-06, + "loss": 0.2826, + "step": 29618 + }, + { + "epoch": 0.5929284588244126, + "grad_norm": 1.793198823928833, + "learning_rate": 3.751313802404477e-06, + "loss": 0.7398, + "step": 29619 + }, + { + "epoch": 0.5929484773415409, + "grad_norm": 1.139683485031128, + "learning_rate": 3.7509998942038605e-06, + "loss": 0.3331, + "step": 29620 + }, + { + "epoch": 0.5929684958586693, + "grad_norm": 1.2067497968673706, + "learning_rate": 3.750685991253772e-06, + "loss": 0.321, + "step": 29621 + }, + { + "epoch": 0.5929885143757976, + "grad_norm": 1.2654995918273926, + "learning_rate": 3.7503720935555283e-06, + "loss": 0.309, + "step": 29622 + }, + { + "epoch": 0.593008532892926, + "grad_norm": 1.118432641029358, + "learning_rate": 3.7500582011104537e-06, + "loss": 0.2547, + "step": 29623 + }, + { + "epoch": 0.5930285514100543, + "grad_norm": 1.0458722114562988, + "learning_rate": 3.7497443139198642e-06, + "loss": 0.2935, + "step": 29624 + }, + { + "epoch": 0.5930485699271827, + "grad_norm": 1.1850987672805786, + "learning_rate": 3.7494304319850815e-06, + "loss": 0.305, + "step": 29625 + }, + { + "epoch": 0.593068588444311, + "grad_norm": 1.1812775135040283, + "learning_rate": 3.749116555307423e-06, + "loss": 0.2857, + "step": 29626 + }, + { + "epoch": 0.5930886069614393, + "grad_norm": 1.2417033910751343, + "learning_rate": 3.7488026838882107e-06, + "loss": 0.3191, + "step": 29627 + }, + { + "epoch": 0.5931086254785677, + "grad_norm": 1.0841307640075684, + "learning_rate": 3.7484888177287628e-06, + "loss": 0.3175, + "step": 29628 + }, + { + "epoch": 0.593128643995696, + "grad_norm": 1.0534685850143433, + "learning_rate": 3.7481749568303982e-06, + "loss": 0.2971, + "step": 29629 + }, + { + "epoch": 0.5931486625128244, + "grad_norm": 1.1041053533554077, + "learning_rate": 3.7478611011944355e-06, + "loss": 0.3176, + "step": 29630 + }, + { + "epoch": 0.5931686810299527, + "grad_norm": 1.0643689632415771, + "learning_rate": 3.7475472508221956e-06, + "loss": 0.3174, + "step": 29631 + }, + { + "epoch": 0.5931886995470811, + "grad_norm": 1.7527236938476562, + "learning_rate": 3.7472334057149977e-06, + "loss": 0.805, + "step": 29632 + }, + { + "epoch": 0.5932087180642094, + "grad_norm": 1.005450963973999, + "learning_rate": 3.746919565874162e-06, + "loss": 0.299, + "step": 29633 + }, + { + "epoch": 0.5932287365813378, + "grad_norm": 1.1054607629776, + "learning_rate": 3.7466057313010063e-06, + "loss": 0.3131, + "step": 29634 + }, + { + "epoch": 0.5932487550984661, + "grad_norm": 1.0346710681915283, + "learning_rate": 3.746291901996849e-06, + "loss": 0.2973, + "step": 29635 + }, + { + "epoch": 0.5932687736155944, + "grad_norm": 1.1460373401641846, + "learning_rate": 3.7459780779630123e-06, + "loss": 0.2735, + "step": 29636 + }, + { + "epoch": 0.5932887921327228, + "grad_norm": 1.1434251070022583, + "learning_rate": 3.745664259200813e-06, + "loss": 0.3456, + "step": 29637 + }, + { + "epoch": 0.5933088106498511, + "grad_norm": 1.3041572570800781, + "learning_rate": 3.745350445711572e-06, + "loss": 0.2808, + "step": 29638 + }, + { + "epoch": 0.5933288291669795, + "grad_norm": 1.8531043529510498, + "learning_rate": 3.7450366374966086e-06, + "loss": 0.764, + "step": 29639 + }, + { + "epoch": 0.5933488476841078, + "grad_norm": 1.9491297006607056, + "learning_rate": 3.7447228345572384e-06, + "loss": 0.7563, + "step": 29640 + }, + { + "epoch": 0.5933688662012362, + "grad_norm": 1.163707971572876, + "learning_rate": 3.744409036894785e-06, + "loss": 0.342, + "step": 29641 + }, + { + "epoch": 0.5933888847183645, + "grad_norm": 1.1154097318649292, + "learning_rate": 3.744095244510566e-06, + "loss": 0.3064, + "step": 29642 + }, + { + "epoch": 0.5934089032354928, + "grad_norm": 1.128366231918335, + "learning_rate": 3.7437814574058995e-06, + "loss": 0.2985, + "step": 29643 + }, + { + "epoch": 0.5934289217526212, + "grad_norm": 1.1576861143112183, + "learning_rate": 3.743467675582105e-06, + "loss": 0.2956, + "step": 29644 + }, + { + "epoch": 0.5934489402697495, + "grad_norm": 1.1366969347000122, + "learning_rate": 3.7431538990405023e-06, + "loss": 0.3103, + "step": 29645 + }, + { + "epoch": 0.5934689587868779, + "grad_norm": 1.1176661252975464, + "learning_rate": 3.7428401277824115e-06, + "loss": 0.3121, + "step": 29646 + }, + { + "epoch": 0.5934889773040062, + "grad_norm": 1.2113909721374512, + "learning_rate": 3.7425263618091502e-06, + "loss": 0.3341, + "step": 29647 + }, + { + "epoch": 0.5935089958211346, + "grad_norm": 1.1781516075134277, + "learning_rate": 3.7422126011220373e-06, + "loss": 0.3083, + "step": 29648 + }, + { + "epoch": 0.5935290143382629, + "grad_norm": 1.1452202796936035, + "learning_rate": 3.74189884572239e-06, + "loss": 0.304, + "step": 29649 + }, + { + "epoch": 0.5935490328553913, + "grad_norm": 1.1440309286117554, + "learning_rate": 3.7415850956115295e-06, + "loss": 0.3198, + "step": 29650 + }, + { + "epoch": 0.5935690513725196, + "grad_norm": 1.2070112228393555, + "learning_rate": 3.741271350790776e-06, + "loss": 0.2915, + "step": 29651 + }, + { + "epoch": 0.5935890698896479, + "grad_norm": 1.1295517683029175, + "learning_rate": 3.7409576112614472e-06, + "loss": 0.271, + "step": 29652 + }, + { + "epoch": 0.5936090884067763, + "grad_norm": 1.0313851833343506, + "learning_rate": 3.740643877024859e-06, + "loss": 0.3014, + "step": 29653 + }, + { + "epoch": 0.5936291069239046, + "grad_norm": 1.0685855150222778, + "learning_rate": 3.740330148082335e-06, + "loss": 0.3001, + "step": 29654 + }, + { + "epoch": 0.593649125441033, + "grad_norm": 1.2702903747558594, + "learning_rate": 3.740016424435193e-06, + "loss": 0.2447, + "step": 29655 + }, + { + "epoch": 0.5936691439581613, + "grad_norm": 1.1228628158569336, + "learning_rate": 3.739702706084749e-06, + "loss": 0.3299, + "step": 29656 + }, + { + "epoch": 0.5936891624752897, + "grad_norm": 1.1223564147949219, + "learning_rate": 3.7393889930323235e-06, + "loss": 0.2821, + "step": 29657 + }, + { + "epoch": 0.593709180992418, + "grad_norm": 1.0534405708312988, + "learning_rate": 3.7390752852792358e-06, + "loss": 0.3135, + "step": 29658 + }, + { + "epoch": 0.5937291995095463, + "grad_norm": 1.0742897987365723, + "learning_rate": 3.7387615828268044e-06, + "loss": 0.3092, + "step": 29659 + }, + { + "epoch": 0.5937492180266747, + "grad_norm": 1.0419093370437622, + "learning_rate": 3.738447885676348e-06, + "loss": 0.2886, + "step": 29660 + }, + { + "epoch": 0.593769236543803, + "grad_norm": 1.1177968978881836, + "learning_rate": 3.738134193829186e-06, + "loss": 0.3389, + "step": 29661 + }, + { + "epoch": 0.5937892550609314, + "grad_norm": 1.1745563745498657, + "learning_rate": 3.7378205072866342e-06, + "loss": 0.2723, + "step": 29662 + }, + { + "epoch": 0.5938092735780597, + "grad_norm": 1.074941635131836, + "learning_rate": 3.7375068260500137e-06, + "loss": 0.286, + "step": 29663 + }, + { + "epoch": 0.5938292920951881, + "grad_norm": 1.0864150524139404, + "learning_rate": 3.7371931501206445e-06, + "loss": 0.2771, + "step": 29664 + }, + { + "epoch": 0.5938493106123164, + "grad_norm": 1.1545031070709229, + "learning_rate": 3.736879479499843e-06, + "loss": 0.3401, + "step": 29665 + }, + { + "epoch": 0.5938693291294448, + "grad_norm": 1.7442071437835693, + "learning_rate": 3.7365658141889267e-06, + "loss": 0.7451, + "step": 29666 + }, + { + "epoch": 0.5938893476465731, + "grad_norm": 2.027451515197754, + "learning_rate": 3.736252154189218e-06, + "loss": 0.811, + "step": 29667 + }, + { + "epoch": 0.5939093661637014, + "grad_norm": 1.1103390455245972, + "learning_rate": 3.735938499502033e-06, + "loss": 0.3298, + "step": 29668 + }, + { + "epoch": 0.5939293846808298, + "grad_norm": 1.8339439630508423, + "learning_rate": 3.7356248501286897e-06, + "loss": 0.6948, + "step": 29669 + }, + { + "epoch": 0.5939494031979581, + "grad_norm": 1.0821740627288818, + "learning_rate": 3.7353112060705077e-06, + "loss": 0.311, + "step": 29670 + }, + { + "epoch": 0.5939694217150865, + "grad_norm": 1.1332292556762695, + "learning_rate": 3.734997567328804e-06, + "loss": 0.283, + "step": 29671 + }, + { + "epoch": 0.5939894402322148, + "grad_norm": 1.7996063232421875, + "learning_rate": 3.7346839339049e-06, + "loss": 0.8767, + "step": 29672 + }, + { + "epoch": 0.5940094587493432, + "grad_norm": 1.3433570861816406, + "learning_rate": 3.7343703058001123e-06, + "loss": 0.3411, + "step": 29673 + }, + { + "epoch": 0.5940294772664715, + "grad_norm": 1.0614795684814453, + "learning_rate": 3.73405668301576e-06, + "loss": 0.3125, + "step": 29674 + }, + { + "epoch": 0.5940494957835998, + "grad_norm": 1.1865438222885132, + "learning_rate": 3.733743065553158e-06, + "loss": 0.3229, + "step": 29675 + }, + { + "epoch": 0.5940695143007282, + "grad_norm": 1.1307710409164429, + "learning_rate": 3.7334294534136296e-06, + "loss": 0.2999, + "step": 29676 + }, + { + "epoch": 0.5940895328178565, + "grad_norm": 1.2332360744476318, + "learning_rate": 3.733115846598492e-06, + "loss": 0.291, + "step": 29677 + }, + { + "epoch": 0.5941095513349849, + "grad_norm": 1.191225528717041, + "learning_rate": 3.7328022451090628e-06, + "loss": 0.304, + "step": 29678 + }, + { + "epoch": 0.5941295698521132, + "grad_norm": 2.0041918754577637, + "learning_rate": 3.73248864894666e-06, + "loss": 0.7686, + "step": 29679 + }, + { + "epoch": 0.5941495883692416, + "grad_norm": 1.149436116218567, + "learning_rate": 3.7321750581126003e-06, + "loss": 0.3206, + "step": 29680 + }, + { + "epoch": 0.5941696068863699, + "grad_norm": 1.2252163887023926, + "learning_rate": 3.731861472608206e-06, + "loss": 0.2749, + "step": 29681 + }, + { + "epoch": 0.5941896254034983, + "grad_norm": 1.08323335647583, + "learning_rate": 3.7315478924347916e-06, + "loss": 0.2938, + "step": 29682 + }, + { + "epoch": 0.5942096439206266, + "grad_norm": 1.0264782905578613, + "learning_rate": 3.7312343175936784e-06, + "loss": 0.2803, + "step": 29683 + }, + { + "epoch": 0.5942296624377549, + "grad_norm": 1.0456348657608032, + "learning_rate": 3.730920748086181e-06, + "loss": 0.2912, + "step": 29684 + }, + { + "epoch": 0.5942496809548833, + "grad_norm": 1.0834459066390991, + "learning_rate": 3.7306071839136214e-06, + "loss": 0.2893, + "step": 29685 + }, + { + "epoch": 0.5942696994720116, + "grad_norm": 1.0709925889968872, + "learning_rate": 3.730293625077316e-06, + "loss": 0.321, + "step": 29686 + }, + { + "epoch": 0.59428971798914, + "grad_norm": 1.1932088136672974, + "learning_rate": 3.7299800715785833e-06, + "loss": 0.3195, + "step": 29687 + }, + { + "epoch": 0.5943097365062683, + "grad_norm": 1.3048717975616455, + "learning_rate": 3.729666523418739e-06, + "loss": 0.3287, + "step": 29688 + }, + { + "epoch": 0.5943297550233967, + "grad_norm": 1.2127289772033691, + "learning_rate": 3.7293529805991042e-06, + "loss": 0.3034, + "step": 29689 + }, + { + "epoch": 0.594349773540525, + "grad_norm": 1.0800342559814453, + "learning_rate": 3.7290394431209968e-06, + "loss": 0.3141, + "step": 29690 + }, + { + "epoch": 0.5943697920576533, + "grad_norm": 1.2059305906295776, + "learning_rate": 3.7287259109857343e-06, + "loss": 0.3097, + "step": 29691 + }, + { + "epoch": 0.5943898105747817, + "grad_norm": 1.3783072233200073, + "learning_rate": 3.7284123841946344e-06, + "loss": 0.3215, + "step": 29692 + }, + { + "epoch": 0.59440982909191, + "grad_norm": 1.1707664728164673, + "learning_rate": 3.7280988627490133e-06, + "loss": 0.3696, + "step": 29693 + }, + { + "epoch": 0.5944298476090384, + "grad_norm": 1.1924948692321777, + "learning_rate": 3.7277853466501928e-06, + "loss": 0.3444, + "step": 29694 + }, + { + "epoch": 0.5944498661261667, + "grad_norm": 1.1651455163955688, + "learning_rate": 3.727471835899488e-06, + "loss": 0.2923, + "step": 29695 + }, + { + "epoch": 0.5944698846432951, + "grad_norm": 1.1371902227401733, + "learning_rate": 3.7271583304982183e-06, + "loss": 0.3656, + "step": 29696 + }, + { + "epoch": 0.5944899031604234, + "grad_norm": 1.1740829944610596, + "learning_rate": 3.7268448304477e-06, + "loss": 0.286, + "step": 29697 + }, + { + "epoch": 0.5945099216775518, + "grad_norm": 1.0805658102035522, + "learning_rate": 3.7265313357492532e-06, + "loss": 0.3068, + "step": 29698 + }, + { + "epoch": 0.5945299401946801, + "grad_norm": 1.1155918836593628, + "learning_rate": 3.7262178464041954e-06, + "loss": 0.3336, + "step": 29699 + }, + { + "epoch": 0.5945499587118084, + "grad_norm": 1.0881718397140503, + "learning_rate": 3.7259043624138426e-06, + "loss": 0.2679, + "step": 29700 + }, + { + "epoch": 0.5945699772289368, + "grad_norm": 1.1205931901931763, + "learning_rate": 3.725590883779513e-06, + "loss": 0.3252, + "step": 29701 + }, + { + "epoch": 0.5945899957460651, + "grad_norm": 1.8533374071121216, + "learning_rate": 3.7252774105025245e-06, + "loss": 0.7543, + "step": 29702 + }, + { + "epoch": 0.5946100142631935, + "grad_norm": 1.2888325452804565, + "learning_rate": 3.7249639425841975e-06, + "loss": 0.322, + "step": 29703 + }, + { + "epoch": 0.5946300327803218, + "grad_norm": 1.0434961318969727, + "learning_rate": 3.7246504800258477e-06, + "loss": 0.3184, + "step": 29704 + }, + { + "epoch": 0.5946500512974502, + "grad_norm": 1.0469774007797241, + "learning_rate": 3.7243370228287924e-06, + "loss": 0.311, + "step": 29705 + }, + { + "epoch": 0.5946700698145785, + "grad_norm": 1.0037730932235718, + "learning_rate": 3.7240235709943472e-06, + "loss": 0.266, + "step": 29706 + }, + { + "epoch": 0.5946900883317068, + "grad_norm": 1.0878827571868896, + "learning_rate": 3.7237101245238356e-06, + "loss": 0.3093, + "step": 29707 + }, + { + "epoch": 0.5947101068488352, + "grad_norm": 1.1282209157943726, + "learning_rate": 3.72339668341857e-06, + "loss": 0.3226, + "step": 29708 + }, + { + "epoch": 0.5947301253659635, + "grad_norm": 0.9888226985931396, + "learning_rate": 3.7230832476798717e-06, + "loss": 0.2631, + "step": 29709 + }, + { + "epoch": 0.5947501438830919, + "grad_norm": 1.1933493614196777, + "learning_rate": 3.7227698173090563e-06, + "loss": 0.2988, + "step": 29710 + }, + { + "epoch": 0.5947701624002202, + "grad_norm": 1.0985651016235352, + "learning_rate": 3.7224563923074396e-06, + "loss": 0.2969, + "step": 29711 + }, + { + "epoch": 0.5947901809173486, + "grad_norm": 1.0416293144226074, + "learning_rate": 3.7221429726763435e-06, + "loss": 0.2369, + "step": 29712 + }, + { + "epoch": 0.5948101994344769, + "grad_norm": 1.3413162231445312, + "learning_rate": 3.7218295584170832e-06, + "loss": 0.3087, + "step": 29713 + }, + { + "epoch": 0.5948302179516053, + "grad_norm": 1.0289522409439087, + "learning_rate": 3.721516149530975e-06, + "loss": 0.2778, + "step": 29714 + }, + { + "epoch": 0.5948502364687336, + "grad_norm": 1.1152242422103882, + "learning_rate": 3.721202746019338e-06, + "loss": 0.3099, + "step": 29715 + }, + { + "epoch": 0.5948702549858619, + "grad_norm": 1.0459704399108887, + "learning_rate": 3.7208893478834908e-06, + "loss": 0.3248, + "step": 29716 + }, + { + "epoch": 0.5948902735029903, + "grad_norm": 1.1342352628707886, + "learning_rate": 3.720575955124749e-06, + "loss": 0.3041, + "step": 29717 + }, + { + "epoch": 0.5949102920201186, + "grad_norm": 1.0917521715164185, + "learning_rate": 3.720262567744431e-06, + "loss": 0.289, + "step": 29718 + }, + { + "epoch": 0.594930310537247, + "grad_norm": 0.9983113408088684, + "learning_rate": 3.719949185743852e-06, + "loss": 0.2567, + "step": 29719 + }, + { + "epoch": 0.5949503290543753, + "grad_norm": 1.4256335496902466, + "learning_rate": 3.7196358091243323e-06, + "loss": 0.2528, + "step": 29720 + }, + { + "epoch": 0.5949703475715037, + "grad_norm": 1.7828789949417114, + "learning_rate": 3.719322437887188e-06, + "loss": 0.7806, + "step": 29721 + }, + { + "epoch": 0.594990366088632, + "grad_norm": 1.0334539413452148, + "learning_rate": 3.7190090720337367e-06, + "loss": 0.284, + "step": 29722 + }, + { + "epoch": 0.5950103846057603, + "grad_norm": 1.0602856874465942, + "learning_rate": 3.7186957115652965e-06, + "loss": 0.3088, + "step": 29723 + }, + { + "epoch": 0.5950304031228887, + "grad_norm": 1.0884217023849487, + "learning_rate": 3.7183823564831813e-06, + "loss": 0.2675, + "step": 29724 + }, + { + "epoch": 0.595050421640017, + "grad_norm": 1.1481223106384277, + "learning_rate": 3.718069006788713e-06, + "loss": 0.3518, + "step": 29725 + }, + { + "epoch": 0.5950704401571454, + "grad_norm": 1.110929250717163, + "learning_rate": 3.7177556624832066e-06, + "loss": 0.2767, + "step": 29726 + }, + { + "epoch": 0.5950904586742737, + "grad_norm": 1.997962474822998, + "learning_rate": 3.717442323567979e-06, + "loss": 0.7197, + "step": 29727 + }, + { + "epoch": 0.5951104771914021, + "grad_norm": 1.1211299896240234, + "learning_rate": 3.7171289900443463e-06, + "loss": 0.2799, + "step": 29728 + }, + { + "epoch": 0.5951304957085304, + "grad_norm": 1.1540734767913818, + "learning_rate": 3.7168156619136293e-06, + "loss": 0.3, + "step": 29729 + }, + { + "epoch": 0.5951505142256588, + "grad_norm": 2.0557076930999756, + "learning_rate": 3.716502339177143e-06, + "loss": 0.7351, + "step": 29730 + }, + { + "epoch": 0.5951705327427871, + "grad_norm": 1.25849449634552, + "learning_rate": 3.7161890218362053e-06, + "loss": 0.2911, + "step": 29731 + }, + { + "epoch": 0.5951905512599154, + "grad_norm": 1.1372538805007935, + "learning_rate": 3.7158757098921313e-06, + "loss": 0.3304, + "step": 29732 + }, + { + "epoch": 0.5952105697770438, + "grad_norm": 1.1456496715545654, + "learning_rate": 3.715562403346239e-06, + "loss": 0.2784, + "step": 29733 + }, + { + "epoch": 0.5952305882941721, + "grad_norm": 1.1648271083831787, + "learning_rate": 3.715249102199846e-06, + "loss": 0.3106, + "step": 29734 + }, + { + "epoch": 0.5952506068113005, + "grad_norm": 1.0512937307357788, + "learning_rate": 3.714935806454271e-06, + "loss": 0.2901, + "step": 29735 + }, + { + "epoch": 0.5952706253284288, + "grad_norm": 1.067409634590149, + "learning_rate": 3.714622516110829e-06, + "loss": 0.3072, + "step": 29736 + }, + { + "epoch": 0.5952906438455572, + "grad_norm": 1.2189369201660156, + "learning_rate": 3.714309231170835e-06, + "loss": 0.2892, + "step": 29737 + }, + { + "epoch": 0.5953106623626855, + "grad_norm": 1.1723331212997437, + "learning_rate": 3.7139959516356107e-06, + "loss": 0.3025, + "step": 29738 + }, + { + "epoch": 0.5953306808798138, + "grad_norm": 2.0209996700286865, + "learning_rate": 3.7136826775064705e-06, + "loss": 0.768, + "step": 29739 + }, + { + "epoch": 0.5953506993969422, + "grad_norm": 1.099361538887024, + "learning_rate": 3.7133694087847306e-06, + "loss": 0.3031, + "step": 29740 + }, + { + "epoch": 0.5953707179140705, + "grad_norm": 1.133609652519226, + "learning_rate": 3.7130561454717077e-06, + "loss": 0.3159, + "step": 29741 + }, + { + "epoch": 0.5953907364311989, + "grad_norm": 1.0461088418960571, + "learning_rate": 3.7127428875687224e-06, + "loss": 0.2985, + "step": 29742 + }, + { + "epoch": 0.5954107549483272, + "grad_norm": 1.058842420578003, + "learning_rate": 3.7124296350770883e-06, + "loss": 0.3003, + "step": 29743 + }, + { + "epoch": 0.5954307734654556, + "grad_norm": 1.0882816314697266, + "learning_rate": 3.712116387998123e-06, + "loss": 0.3467, + "step": 29744 + }, + { + "epoch": 0.5954507919825839, + "grad_norm": 1.105031132698059, + "learning_rate": 3.7118031463331422e-06, + "loss": 0.2715, + "step": 29745 + }, + { + "epoch": 0.5954708104997123, + "grad_norm": 1.0797853469848633, + "learning_rate": 3.711489910083464e-06, + "loss": 0.3115, + "step": 29746 + }, + { + "epoch": 0.5954908290168406, + "grad_norm": 1.0236458778381348, + "learning_rate": 3.7111766792504045e-06, + "loss": 0.2423, + "step": 29747 + }, + { + "epoch": 0.5955108475339689, + "grad_norm": 1.1277600526809692, + "learning_rate": 3.7108634538352827e-06, + "loss": 0.3244, + "step": 29748 + }, + { + "epoch": 0.5955308660510973, + "grad_norm": 1.0709203481674194, + "learning_rate": 3.710550233839413e-06, + "loss": 0.3302, + "step": 29749 + }, + { + "epoch": 0.5955508845682256, + "grad_norm": 1.033977746963501, + "learning_rate": 3.71023701926411e-06, + "loss": 0.3027, + "step": 29750 + }, + { + "epoch": 0.595570903085354, + "grad_norm": 1.1164946556091309, + "learning_rate": 3.7099238101106962e-06, + "loss": 0.325, + "step": 29751 + }, + { + "epoch": 0.5955909216024823, + "grad_norm": 1.130438208580017, + "learning_rate": 3.7096106063804844e-06, + "loss": 0.2993, + "step": 29752 + }, + { + "epoch": 0.5956109401196107, + "grad_norm": 1.7866325378417969, + "learning_rate": 3.709297408074791e-06, + "loss": 0.7311, + "step": 29753 + }, + { + "epoch": 0.595630958636739, + "grad_norm": 1.1080601215362549, + "learning_rate": 3.7089842151949345e-06, + "loss": 0.3392, + "step": 29754 + }, + { + "epoch": 0.5956509771538673, + "grad_norm": 0.9819872379302979, + "learning_rate": 3.708671027742229e-06, + "loss": 0.2882, + "step": 29755 + }, + { + "epoch": 0.5956709956709957, + "grad_norm": 1.1876870393753052, + "learning_rate": 3.7083578457179936e-06, + "loss": 0.2981, + "step": 29756 + }, + { + "epoch": 0.595691014188124, + "grad_norm": 1.2769603729248047, + "learning_rate": 3.708044669123545e-06, + "loss": 0.2961, + "step": 29757 + }, + { + "epoch": 0.5957110327052524, + "grad_norm": 1.2307674884796143, + "learning_rate": 3.707731497960197e-06, + "loss": 0.2937, + "step": 29758 + }, + { + "epoch": 0.5957310512223807, + "grad_norm": 1.1146557331085205, + "learning_rate": 3.707418332229268e-06, + "loss": 0.3269, + "step": 29759 + }, + { + "epoch": 0.5957510697395091, + "grad_norm": 1.1168111562728882, + "learning_rate": 3.7071051719320732e-06, + "loss": 0.3214, + "step": 29760 + }, + { + "epoch": 0.5957710882566374, + "grad_norm": 1.2163833379745483, + "learning_rate": 3.7067920170699323e-06, + "loss": 0.3365, + "step": 29761 + }, + { + "epoch": 0.5957911067737658, + "grad_norm": 1.1792035102844238, + "learning_rate": 3.706478867644159e-06, + "loss": 0.3579, + "step": 29762 + }, + { + "epoch": 0.5958111252908941, + "grad_norm": 1.2462482452392578, + "learning_rate": 3.70616572365607e-06, + "loss": 0.3607, + "step": 29763 + }, + { + "epoch": 0.5958311438080224, + "grad_norm": 1.1407889127731323, + "learning_rate": 3.7058525851069792e-06, + "loss": 0.323, + "step": 29764 + }, + { + "epoch": 0.5958511623251508, + "grad_norm": 1.4039883613586426, + "learning_rate": 3.7055394519982084e-06, + "loss": 0.3073, + "step": 29765 + }, + { + "epoch": 0.5958711808422791, + "grad_norm": 1.9318428039550781, + "learning_rate": 3.7052263243310703e-06, + "loss": 0.7335, + "step": 29766 + }, + { + "epoch": 0.5958911993594075, + "grad_norm": 1.074108600616455, + "learning_rate": 3.704913202106883e-06, + "loss": 0.3173, + "step": 29767 + }, + { + "epoch": 0.5959112178765358, + "grad_norm": 1.1240289211273193, + "learning_rate": 3.7046000853269592e-06, + "loss": 0.3215, + "step": 29768 + }, + { + "epoch": 0.5959312363936642, + "grad_norm": 1.0335352420806885, + "learning_rate": 3.7042869739926213e-06, + "loss": 0.271, + "step": 29769 + }, + { + "epoch": 0.5959512549107925, + "grad_norm": 1.0984710454940796, + "learning_rate": 3.703973868105181e-06, + "loss": 0.3025, + "step": 29770 + }, + { + "epoch": 0.5959712734279208, + "grad_norm": 1.1655861139297485, + "learning_rate": 3.7036607676659546e-06, + "loss": 0.311, + "step": 29771 + }, + { + "epoch": 0.5959912919450492, + "grad_norm": 1.2000094652175903, + "learning_rate": 3.7033476726762596e-06, + "loss": 0.354, + "step": 29772 + }, + { + "epoch": 0.5960113104621775, + "grad_norm": 1.3588277101516724, + "learning_rate": 3.7030345831374126e-06, + "loss": 0.3292, + "step": 29773 + }, + { + "epoch": 0.5960313289793059, + "grad_norm": 1.9648034572601318, + "learning_rate": 3.702721499050729e-06, + "loss": 0.8725, + "step": 29774 + }, + { + "epoch": 0.5960513474964342, + "grad_norm": 1.0856668949127197, + "learning_rate": 3.702408420417526e-06, + "loss": 0.288, + "step": 29775 + }, + { + "epoch": 0.5960713660135626, + "grad_norm": 1.2722498178482056, + "learning_rate": 3.7020953472391185e-06, + "loss": 0.3038, + "step": 29776 + }, + { + "epoch": 0.5960913845306909, + "grad_norm": 1.4735969305038452, + "learning_rate": 3.70178227951682e-06, + "loss": 0.3252, + "step": 29777 + }, + { + "epoch": 0.5961114030478193, + "grad_norm": 1.1603319644927979, + "learning_rate": 3.701469217251952e-06, + "loss": 0.3221, + "step": 29778 + }, + { + "epoch": 0.5961314215649476, + "grad_norm": 1.1100140810012817, + "learning_rate": 3.7011561604458274e-06, + "loss": 0.3113, + "step": 29779 + }, + { + "epoch": 0.5961514400820759, + "grad_norm": 1.1261720657348633, + "learning_rate": 3.7008431090997633e-06, + "loss": 0.3118, + "step": 29780 + }, + { + "epoch": 0.5961714585992043, + "grad_norm": 1.0966681241989136, + "learning_rate": 3.700530063215073e-06, + "loss": 0.3019, + "step": 29781 + }, + { + "epoch": 0.5961914771163326, + "grad_norm": 1.0230523347854614, + "learning_rate": 3.7002170227930777e-06, + "loss": 0.2623, + "step": 29782 + }, + { + "epoch": 0.596211495633461, + "grad_norm": 1.0785622596740723, + "learning_rate": 3.6999039878350895e-06, + "loss": 0.3189, + "step": 29783 + }, + { + "epoch": 0.5962315141505893, + "grad_norm": 1.1960499286651611, + "learning_rate": 3.699590958342424e-06, + "loss": 0.3104, + "step": 29784 + }, + { + "epoch": 0.5962515326677177, + "grad_norm": 1.1097338199615479, + "learning_rate": 3.6992779343164e-06, + "loss": 0.2799, + "step": 29785 + }, + { + "epoch": 0.596271551184846, + "grad_norm": 1.1297235488891602, + "learning_rate": 3.6989649157583287e-06, + "loss": 0.3537, + "step": 29786 + }, + { + "epoch": 0.5962915697019743, + "grad_norm": 1.0744338035583496, + "learning_rate": 3.698651902669531e-06, + "loss": 0.3234, + "step": 29787 + }, + { + "epoch": 0.5963115882191027, + "grad_norm": 1.2676976919174194, + "learning_rate": 3.6983388950513217e-06, + "loss": 0.2812, + "step": 29788 + }, + { + "epoch": 0.596331606736231, + "grad_norm": 1.0241217613220215, + "learning_rate": 3.6980258929050152e-06, + "loss": 0.2857, + "step": 29789 + }, + { + "epoch": 0.5963516252533594, + "grad_norm": 1.0900413990020752, + "learning_rate": 3.6977128962319248e-06, + "loss": 0.3419, + "step": 29790 + }, + { + "epoch": 0.5963716437704877, + "grad_norm": 1.004516839981079, + "learning_rate": 3.697399905033372e-06, + "loss": 0.2675, + "step": 29791 + }, + { + "epoch": 0.5963916622876161, + "grad_norm": 1.119869589805603, + "learning_rate": 3.6970869193106686e-06, + "loss": 0.3116, + "step": 29792 + }, + { + "epoch": 0.5964116808047444, + "grad_norm": 1.072521686553955, + "learning_rate": 3.6967739390651313e-06, + "loss": 0.2773, + "step": 29793 + }, + { + "epoch": 0.5964316993218727, + "grad_norm": 1.8765109777450562, + "learning_rate": 3.6964609642980776e-06, + "loss": 0.8311, + "step": 29794 + }, + { + "epoch": 0.5964517178390011, + "grad_norm": 1.1904664039611816, + "learning_rate": 3.696147995010818e-06, + "loss": 0.2859, + "step": 29795 + }, + { + "epoch": 0.5964717363561294, + "grad_norm": 1.040977954864502, + "learning_rate": 3.695835031204674e-06, + "loss": 0.305, + "step": 29796 + }, + { + "epoch": 0.5964917548732578, + "grad_norm": 1.8241961002349854, + "learning_rate": 3.6955220728809584e-06, + "loss": 0.7953, + "step": 29797 + }, + { + "epoch": 0.5965117733903861, + "grad_norm": 2.1112313270568848, + "learning_rate": 3.6952091200409878e-06, + "loss": 0.7411, + "step": 29798 + }, + { + "epoch": 0.5965317919075145, + "grad_norm": 1.2573163509368896, + "learning_rate": 3.6948961726860756e-06, + "loss": 0.3245, + "step": 29799 + }, + { + "epoch": 0.5965518104246428, + "grad_norm": 1.163459300994873, + "learning_rate": 3.694583230817541e-06, + "loss": 0.3313, + "step": 29800 + }, + { + "epoch": 0.5965718289417712, + "grad_norm": 1.249981164932251, + "learning_rate": 3.6942702944366975e-06, + "loss": 0.2993, + "step": 29801 + }, + { + "epoch": 0.5965918474588995, + "grad_norm": 1.1534548997879028, + "learning_rate": 3.6939573635448613e-06, + "loss": 0.313, + "step": 29802 + }, + { + "epoch": 0.5966118659760278, + "grad_norm": 1.1940816640853882, + "learning_rate": 3.6936444381433444e-06, + "loss": 0.2905, + "step": 29803 + }, + { + "epoch": 0.5966318844931562, + "grad_norm": 1.2239458560943604, + "learning_rate": 3.6933315182334673e-06, + "loss": 0.3053, + "step": 29804 + }, + { + "epoch": 0.5966519030102845, + "grad_norm": 1.0463947057724, + "learning_rate": 3.693018603816542e-06, + "loss": 0.2891, + "step": 29805 + }, + { + "epoch": 0.5966719215274129, + "grad_norm": 1.972937822341919, + "learning_rate": 3.6927056948938865e-06, + "loss": 0.7392, + "step": 29806 + }, + { + "epoch": 0.5966919400445412, + "grad_norm": 1.250607967376709, + "learning_rate": 3.6923927914668157e-06, + "loss": 0.2866, + "step": 29807 + }, + { + "epoch": 0.5967119585616696, + "grad_norm": 1.0246074199676514, + "learning_rate": 3.6920798935366415e-06, + "loss": 0.2803, + "step": 29808 + }, + { + "epoch": 0.5967319770787979, + "grad_norm": 1.280330777168274, + "learning_rate": 3.6917670011046835e-06, + "loss": 0.298, + "step": 29809 + }, + { + "epoch": 0.5967519955959262, + "grad_norm": 1.0188947916030884, + "learning_rate": 3.6914541141722555e-06, + "loss": 0.3185, + "step": 29810 + }, + { + "epoch": 0.5967720141130546, + "grad_norm": 1.0679007768630981, + "learning_rate": 3.6911412327406733e-06, + "loss": 0.2933, + "step": 29811 + }, + { + "epoch": 0.5967920326301829, + "grad_norm": 1.1527832746505737, + "learning_rate": 3.6908283568112494e-06, + "loss": 0.3522, + "step": 29812 + }, + { + "epoch": 0.5968120511473113, + "grad_norm": 2.0028975009918213, + "learning_rate": 3.6905154863853036e-06, + "loss": 0.7788, + "step": 29813 + }, + { + "epoch": 0.5968320696644396, + "grad_norm": 1.0227910280227661, + "learning_rate": 3.6902026214641494e-06, + "loss": 0.2801, + "step": 29814 + }, + { + "epoch": 0.596852088181568, + "grad_norm": 2.079875946044922, + "learning_rate": 3.6898897620491008e-06, + "loss": 0.7731, + "step": 29815 + }, + { + "epoch": 0.5968721066986963, + "grad_norm": 1.0962377786636353, + "learning_rate": 3.6895769081414734e-06, + "loss": 0.336, + "step": 29816 + }, + { + "epoch": 0.5968921252158247, + "grad_norm": 1.084317684173584, + "learning_rate": 3.689264059742582e-06, + "loss": 0.2909, + "step": 29817 + }, + { + "epoch": 0.596912143732953, + "grad_norm": 1.0130302906036377, + "learning_rate": 3.688951216853742e-06, + "loss": 0.2909, + "step": 29818 + }, + { + "epoch": 0.5969321622500813, + "grad_norm": 1.2336918115615845, + "learning_rate": 3.6886383794762715e-06, + "loss": 0.3014, + "step": 29819 + }, + { + "epoch": 0.5969521807672097, + "grad_norm": 1.0325437784194946, + "learning_rate": 3.688325547611482e-06, + "loss": 0.2728, + "step": 29820 + }, + { + "epoch": 0.596972199284338, + "grad_norm": 1.1549113988876343, + "learning_rate": 3.6880127212606875e-06, + "loss": 0.2994, + "step": 29821 + }, + { + "epoch": 0.5969922178014664, + "grad_norm": 1.1123521327972412, + "learning_rate": 3.6876999004252075e-06, + "loss": 0.2766, + "step": 29822 + }, + { + "epoch": 0.5970122363185947, + "grad_norm": 1.0233625173568726, + "learning_rate": 3.687387085106353e-06, + "loss": 0.265, + "step": 29823 + }, + { + "epoch": 0.5970322548357231, + "grad_norm": 1.0351319313049316, + "learning_rate": 3.687074275305443e-06, + "loss": 0.2908, + "step": 29824 + }, + { + "epoch": 0.5970522733528514, + "grad_norm": 1.0699928998947144, + "learning_rate": 3.68676147102379e-06, + "loss": 0.3446, + "step": 29825 + }, + { + "epoch": 0.5970722918699797, + "grad_norm": 1.0041168928146362, + "learning_rate": 3.686448672262706e-06, + "loss": 0.2624, + "step": 29826 + }, + { + "epoch": 0.5970923103871081, + "grad_norm": 1.1211202144622803, + "learning_rate": 3.686135879023512e-06, + "loss": 0.3183, + "step": 29827 + }, + { + "epoch": 0.5971123289042364, + "grad_norm": 1.0961896181106567, + "learning_rate": 3.68582309130752e-06, + "loss": 0.326, + "step": 29828 + }, + { + "epoch": 0.5971323474213648, + "grad_norm": 1.1228256225585938, + "learning_rate": 3.6855103091160437e-06, + "loss": 0.3216, + "step": 29829 + }, + { + "epoch": 0.5971523659384931, + "grad_norm": 1.0736137628555298, + "learning_rate": 3.685197532450399e-06, + "loss": 0.2654, + "step": 29830 + }, + { + "epoch": 0.5971723844556215, + "grad_norm": 1.9617196321487427, + "learning_rate": 3.684884761311901e-06, + "loss": 0.8194, + "step": 29831 + }, + { + "epoch": 0.5971924029727498, + "grad_norm": 1.2329809665679932, + "learning_rate": 3.6845719957018655e-06, + "loss": 0.3336, + "step": 29832 + }, + { + "epoch": 0.5972124214898782, + "grad_norm": 1.2450467348098755, + "learning_rate": 3.6842592356216066e-06, + "loss": 0.3726, + "step": 29833 + }, + { + "epoch": 0.5972324400070065, + "grad_norm": 1.3680075407028198, + "learning_rate": 3.683946481072437e-06, + "loss": 0.2952, + "step": 29834 + }, + { + "epoch": 0.5972524585241348, + "grad_norm": 1.1207033395767212, + "learning_rate": 3.6836337320556744e-06, + "loss": 0.3106, + "step": 29835 + }, + { + "epoch": 0.5972724770412632, + "grad_norm": 1.9026325941085815, + "learning_rate": 3.683320988572632e-06, + "loss": 0.7942, + "step": 29836 + }, + { + "epoch": 0.5972924955583915, + "grad_norm": 0.9959787130355835, + "learning_rate": 3.6830082506246243e-06, + "loss": 0.2496, + "step": 29837 + }, + { + "epoch": 0.5973125140755199, + "grad_norm": 1.100563406944275, + "learning_rate": 3.682695518212968e-06, + "loss": 0.3106, + "step": 29838 + }, + { + "epoch": 0.5973325325926482, + "grad_norm": 1.1525291204452515, + "learning_rate": 3.6823827913389737e-06, + "loss": 0.307, + "step": 29839 + }, + { + "epoch": 0.5973525511097766, + "grad_norm": 1.1770164966583252, + "learning_rate": 3.6820700700039607e-06, + "loss": 0.346, + "step": 29840 + }, + { + "epoch": 0.5973725696269049, + "grad_norm": 1.1156829595565796, + "learning_rate": 3.6817573542092414e-06, + "loss": 0.3115, + "step": 29841 + }, + { + "epoch": 0.5973925881440332, + "grad_norm": 1.0337938070297241, + "learning_rate": 3.6814446439561302e-06, + "loss": 0.3021, + "step": 29842 + }, + { + "epoch": 0.5974126066611616, + "grad_norm": 1.080292820930481, + "learning_rate": 3.6811319392459412e-06, + "loss": 0.2991, + "step": 29843 + }, + { + "epoch": 0.5974326251782899, + "grad_norm": 1.1682605743408203, + "learning_rate": 3.68081924007999e-06, + "loss": 0.3361, + "step": 29844 + }, + { + "epoch": 0.5974526436954183, + "grad_norm": 1.4339687824249268, + "learning_rate": 3.680506546459591e-06, + "loss": 0.2819, + "step": 29845 + }, + { + "epoch": 0.5974726622125466, + "grad_norm": 1.0744681358337402, + "learning_rate": 3.6801938583860597e-06, + "loss": 0.2924, + "step": 29846 + }, + { + "epoch": 0.597492680729675, + "grad_norm": 1.2199757099151611, + "learning_rate": 3.679881175860709e-06, + "loss": 0.3221, + "step": 29847 + }, + { + "epoch": 0.5975126992468033, + "grad_norm": 1.0819435119628906, + "learning_rate": 3.679568498884851e-06, + "loss": 0.2901, + "step": 29848 + }, + { + "epoch": 0.5975327177639317, + "grad_norm": 1.0997138023376465, + "learning_rate": 3.679255827459805e-06, + "loss": 0.298, + "step": 29849 + }, + { + "epoch": 0.59755273628106, + "grad_norm": 2.0157909393310547, + "learning_rate": 3.6789431615868835e-06, + "loss": 0.7267, + "step": 29850 + }, + { + "epoch": 0.5975727547981883, + "grad_norm": 1.0340924263000488, + "learning_rate": 3.6786305012674013e-06, + "loss": 0.2895, + "step": 29851 + }, + { + "epoch": 0.5975927733153167, + "grad_norm": 1.131734013557434, + "learning_rate": 3.6783178465026697e-06, + "loss": 0.3025, + "step": 29852 + }, + { + "epoch": 0.597612791832445, + "grad_norm": 1.05851411819458, + "learning_rate": 3.678005197294008e-06, + "loss": 0.3112, + "step": 29853 + }, + { + "epoch": 0.5976328103495734, + "grad_norm": 1.1593329906463623, + "learning_rate": 3.6776925536427264e-06, + "loss": 0.3112, + "step": 29854 + }, + { + "epoch": 0.5976528288667017, + "grad_norm": 1.795975923538208, + "learning_rate": 3.677379915550141e-06, + "loss": 0.7885, + "step": 29855 + }, + { + "epoch": 0.5976728473838301, + "grad_norm": 1.174087643623352, + "learning_rate": 3.677067283017567e-06, + "loss": 0.3038, + "step": 29856 + }, + { + "epoch": 0.5976928659009584, + "grad_norm": 1.2788372039794922, + "learning_rate": 3.6767546560463153e-06, + "loss": 0.2861, + "step": 29857 + }, + { + "epoch": 0.5977128844180867, + "grad_norm": 1.8870882987976074, + "learning_rate": 3.676442034637704e-06, + "loss": 0.7624, + "step": 29858 + }, + { + "epoch": 0.5977329029352151, + "grad_norm": 1.3964980840682983, + "learning_rate": 3.6761294187930452e-06, + "loss": 0.3146, + "step": 29859 + }, + { + "epoch": 0.5977529214523434, + "grad_norm": 1.193823218345642, + "learning_rate": 3.6758168085136535e-06, + "loss": 0.3159, + "step": 29860 + }, + { + "epoch": 0.5977729399694718, + "grad_norm": 1.0163689851760864, + "learning_rate": 3.6755042038008416e-06, + "loss": 0.2651, + "step": 29861 + }, + { + "epoch": 0.5977929584866001, + "grad_norm": 1.2701692581176758, + "learning_rate": 3.6751916046559254e-06, + "loss": 0.3098, + "step": 29862 + }, + { + "epoch": 0.5978129770037285, + "grad_norm": 1.7339130640029907, + "learning_rate": 3.6748790110802203e-06, + "loss": 0.803, + "step": 29863 + }, + { + "epoch": 0.5978329955208568, + "grad_norm": 1.1129921674728394, + "learning_rate": 3.6745664230750376e-06, + "loss": 0.2688, + "step": 29864 + }, + { + "epoch": 0.5978530140379852, + "grad_norm": 1.9151382446289062, + "learning_rate": 3.674253840641692e-06, + "loss": 0.7513, + "step": 29865 + }, + { + "epoch": 0.5978730325551135, + "grad_norm": 1.252520203590393, + "learning_rate": 3.673941263781499e-06, + "loss": 0.3035, + "step": 29866 + }, + { + "epoch": 0.5978930510722418, + "grad_norm": 2.0294413566589355, + "learning_rate": 3.673628692495771e-06, + "loss": 0.7788, + "step": 29867 + }, + { + "epoch": 0.5979130695893702, + "grad_norm": 1.1524075269699097, + "learning_rate": 3.673316126785822e-06, + "loss": 0.3157, + "step": 29868 + }, + { + "epoch": 0.5979330881064985, + "grad_norm": 1.250712513923645, + "learning_rate": 3.6730035666529674e-06, + "loss": 0.3472, + "step": 29869 + }, + { + "epoch": 0.5979531066236269, + "grad_norm": 1.2340749502182007, + "learning_rate": 3.672691012098518e-06, + "loss": 0.3027, + "step": 29870 + }, + { + "epoch": 0.5979731251407552, + "grad_norm": 1.210152506828308, + "learning_rate": 3.672378463123793e-06, + "loss": 0.2898, + "step": 29871 + }, + { + "epoch": 0.5979931436578836, + "grad_norm": 1.1230357885360718, + "learning_rate": 3.6720659197301023e-06, + "loss": 0.2755, + "step": 29872 + }, + { + "epoch": 0.5980131621750119, + "grad_norm": 1.2632825374603271, + "learning_rate": 3.6717533819187613e-06, + "loss": 0.3098, + "step": 29873 + }, + { + "epoch": 0.5980331806921402, + "grad_norm": 1.0891711711883545, + "learning_rate": 3.671440849691081e-06, + "loss": 0.2587, + "step": 29874 + }, + { + "epoch": 0.5980531992092686, + "grad_norm": 1.2330397367477417, + "learning_rate": 3.6711283230483785e-06, + "loss": 0.3325, + "step": 29875 + }, + { + "epoch": 0.5980732177263969, + "grad_norm": 1.8168779611587524, + "learning_rate": 3.670815801991967e-06, + "loss": 0.821, + "step": 29876 + }, + { + "epoch": 0.5980932362435253, + "grad_norm": 1.272695541381836, + "learning_rate": 3.6705032865231603e-06, + "loss": 0.3065, + "step": 29877 + }, + { + "epoch": 0.5981132547606536, + "grad_norm": 1.1637128591537476, + "learning_rate": 3.670190776643272e-06, + "loss": 0.2829, + "step": 29878 + }, + { + "epoch": 0.598133273277782, + "grad_norm": 1.0496668815612793, + "learning_rate": 3.669878272353613e-06, + "loss": 0.2666, + "step": 29879 + }, + { + "epoch": 0.5981532917949103, + "grad_norm": 2.011178731918335, + "learning_rate": 3.6695657736555012e-06, + "loss": 0.7995, + "step": 29880 + }, + { + "epoch": 0.5981733103120387, + "grad_norm": 1.0243767499923706, + "learning_rate": 3.669253280550248e-06, + "loss": 0.2628, + "step": 29881 + }, + { + "epoch": 0.598193328829167, + "grad_norm": 1.086965560913086, + "learning_rate": 3.668940793039169e-06, + "loss": 0.3261, + "step": 29882 + }, + { + "epoch": 0.5982133473462953, + "grad_norm": 1.9333713054656982, + "learning_rate": 3.6686283111235743e-06, + "loss": 0.8353, + "step": 29883 + }, + { + "epoch": 0.5982333658634237, + "grad_norm": 1.0852456092834473, + "learning_rate": 3.668315834804781e-06, + "loss": 0.3143, + "step": 29884 + }, + { + "epoch": 0.598253384380552, + "grad_norm": 1.1286697387695312, + "learning_rate": 3.6680033640841017e-06, + "loss": 0.3112, + "step": 29885 + }, + { + "epoch": 0.5982734028976804, + "grad_norm": 1.0843676328659058, + "learning_rate": 3.66769089896285e-06, + "loss": 0.3124, + "step": 29886 + }, + { + "epoch": 0.5982934214148087, + "grad_norm": 1.0949863195419312, + "learning_rate": 3.6673784394423368e-06, + "loss": 0.3343, + "step": 29887 + }, + { + "epoch": 0.5983134399319371, + "grad_norm": 1.2581367492675781, + "learning_rate": 3.6670659855238786e-06, + "loss": 0.3094, + "step": 29888 + }, + { + "epoch": 0.5983334584490654, + "grad_norm": 1.2237612009048462, + "learning_rate": 3.666753537208789e-06, + "loss": 0.3518, + "step": 29889 + }, + { + "epoch": 0.5983534769661937, + "grad_norm": 2.0400893688201904, + "learning_rate": 3.6664410944983814e-06, + "loss": 0.7285, + "step": 29890 + }, + { + "epoch": 0.5983734954833221, + "grad_norm": 1.2236014604568481, + "learning_rate": 3.666128657393968e-06, + "loss": 0.29, + "step": 29891 + }, + { + "epoch": 0.5983935140004504, + "grad_norm": 1.1831754446029663, + "learning_rate": 3.665816225896861e-06, + "loss": 0.2987, + "step": 29892 + }, + { + "epoch": 0.5984135325175788, + "grad_norm": 1.3267438411712646, + "learning_rate": 3.665503800008377e-06, + "loss": 0.2768, + "step": 29893 + }, + { + "epoch": 0.5984335510347071, + "grad_norm": 1.1244450807571411, + "learning_rate": 3.6651913797298267e-06, + "loss": 0.294, + "step": 29894 + }, + { + "epoch": 0.5984535695518355, + "grad_norm": 1.0891697406768799, + "learning_rate": 3.664878965062526e-06, + "loss": 0.3193, + "step": 29895 + }, + { + "epoch": 0.5984735880689638, + "grad_norm": 1.0749258995056152, + "learning_rate": 3.664566556007785e-06, + "loss": 0.2965, + "step": 29896 + }, + { + "epoch": 0.5984936065860922, + "grad_norm": 1.1774123907089233, + "learning_rate": 3.6642541525669208e-06, + "loss": 0.291, + "step": 29897 + }, + { + "epoch": 0.5985136251032205, + "grad_norm": 1.1843069791793823, + "learning_rate": 3.663941754741244e-06, + "loss": 0.3032, + "step": 29898 + }, + { + "epoch": 0.5985336436203488, + "grad_norm": 1.1857621669769287, + "learning_rate": 3.663629362532069e-06, + "loss": 0.337, + "step": 29899 + }, + { + "epoch": 0.5985536621374772, + "grad_norm": 1.0495247840881348, + "learning_rate": 3.663316975940707e-06, + "loss": 0.2957, + "step": 29900 + }, + { + "epoch": 0.5985736806546055, + "grad_norm": 1.3182233572006226, + "learning_rate": 3.6630045949684723e-06, + "loss": 0.2858, + "step": 29901 + }, + { + "epoch": 0.5985936991717339, + "grad_norm": 1.1382651329040527, + "learning_rate": 3.662692219616681e-06, + "loss": 0.2617, + "step": 29902 + }, + { + "epoch": 0.5986137176888622, + "grad_norm": 1.1369962692260742, + "learning_rate": 3.6623798498866435e-06, + "loss": 0.2985, + "step": 29903 + }, + { + "epoch": 0.5986337362059906, + "grad_norm": 1.06328284740448, + "learning_rate": 3.662067485779673e-06, + "loss": 0.3414, + "step": 29904 + }, + { + "epoch": 0.5986537547231189, + "grad_norm": 1.0710042715072632, + "learning_rate": 3.661755127297081e-06, + "loss": 0.3272, + "step": 29905 + }, + { + "epoch": 0.5986737732402472, + "grad_norm": 1.1331074237823486, + "learning_rate": 3.661442774440184e-06, + "loss": 0.2902, + "step": 29906 + }, + { + "epoch": 0.5986937917573756, + "grad_norm": 1.2404029369354248, + "learning_rate": 3.6611304272102932e-06, + "loss": 0.2477, + "step": 29907 + }, + { + "epoch": 0.5987138102745039, + "grad_norm": 1.2501075267791748, + "learning_rate": 3.660818085608723e-06, + "loss": 0.2935, + "step": 29908 + }, + { + "epoch": 0.5987338287916323, + "grad_norm": 1.0835795402526855, + "learning_rate": 3.6605057496367845e-06, + "loss": 0.2978, + "step": 29909 + }, + { + "epoch": 0.5987538473087606, + "grad_norm": 1.220328688621521, + "learning_rate": 3.66019341929579e-06, + "loss": 0.3333, + "step": 29910 + }, + { + "epoch": 0.598773865825889, + "grad_norm": 1.8908387422561646, + "learning_rate": 3.6598810945870566e-06, + "loss": 0.7655, + "step": 29911 + }, + { + "epoch": 0.5987938843430173, + "grad_norm": 1.0549416542053223, + "learning_rate": 3.659568775511894e-06, + "loss": 0.3461, + "step": 29912 + }, + { + "epoch": 0.5988139028601457, + "grad_norm": 1.1730159521102905, + "learning_rate": 3.6592564620716155e-06, + "loss": 0.3051, + "step": 29913 + }, + { + "epoch": 0.598833921377274, + "grad_norm": 1.165040135383606, + "learning_rate": 3.6589441542675325e-06, + "loss": 0.3213, + "step": 29914 + }, + { + "epoch": 0.5988539398944023, + "grad_norm": 1.1156479120254517, + "learning_rate": 3.6586318521009624e-06, + "loss": 0.2899, + "step": 29915 + }, + { + "epoch": 0.5988739584115307, + "grad_norm": 1.1267962455749512, + "learning_rate": 3.658319555573215e-06, + "loss": 0.3112, + "step": 29916 + }, + { + "epoch": 0.598893976928659, + "grad_norm": 1.9182562828063965, + "learning_rate": 3.6580072646856033e-06, + "loss": 0.7149, + "step": 29917 + }, + { + "epoch": 0.5989139954457874, + "grad_norm": 1.1394801139831543, + "learning_rate": 3.657694979439438e-06, + "loss": 0.3052, + "step": 29918 + }, + { + "epoch": 0.5989340139629157, + "grad_norm": 1.0774266719818115, + "learning_rate": 3.6573826998360367e-06, + "loss": 0.3056, + "step": 29919 + }, + { + "epoch": 0.5989540324800441, + "grad_norm": 1.1342281103134155, + "learning_rate": 3.6570704258767086e-06, + "loss": 0.3249, + "step": 29920 + }, + { + "epoch": 0.5989740509971724, + "grad_norm": 1.255947470664978, + "learning_rate": 3.6567581575627683e-06, + "loss": 0.2877, + "step": 29921 + }, + { + "epoch": 0.5989940695143007, + "grad_norm": 1.8777287006378174, + "learning_rate": 3.6564458948955274e-06, + "loss": 0.8472, + "step": 29922 + }, + { + "epoch": 0.5990140880314291, + "grad_norm": 2.0352609157562256, + "learning_rate": 3.6561336378762966e-06, + "loss": 0.7408, + "step": 29923 + }, + { + "epoch": 0.5990341065485574, + "grad_norm": 1.1297776699066162, + "learning_rate": 3.6558213865063936e-06, + "loss": 0.2529, + "step": 29924 + }, + { + "epoch": 0.5990541250656858, + "grad_norm": 1.1402589082717896, + "learning_rate": 3.655509140787128e-06, + "loss": 0.2894, + "step": 29925 + }, + { + "epoch": 0.5990741435828141, + "grad_norm": 1.1039047241210938, + "learning_rate": 3.655196900719811e-06, + "loss": 0.2759, + "step": 29926 + }, + { + "epoch": 0.5990941620999425, + "grad_norm": 1.3471651077270508, + "learning_rate": 3.6548846663057565e-06, + "loss": 0.2853, + "step": 29927 + }, + { + "epoch": 0.5991141806170708, + "grad_norm": 1.116040587425232, + "learning_rate": 3.6545724375462786e-06, + "loss": 0.2624, + "step": 29928 + }, + { + "epoch": 0.5991341991341992, + "grad_norm": 1.020414113998413, + "learning_rate": 3.6542602144426897e-06, + "loss": 0.2915, + "step": 29929 + }, + { + "epoch": 0.5991542176513275, + "grad_norm": 1.0647097826004028, + "learning_rate": 3.653947996996301e-06, + "loss": 0.3107, + "step": 29930 + }, + { + "epoch": 0.5991742361684558, + "grad_norm": 1.296492099761963, + "learning_rate": 3.6536357852084236e-06, + "loss": 0.2905, + "step": 29931 + }, + { + "epoch": 0.5991942546855842, + "grad_norm": 1.1461782455444336, + "learning_rate": 3.6533235790803723e-06, + "loss": 0.2697, + "step": 29932 + }, + { + "epoch": 0.5992142732027125, + "grad_norm": 1.1453845500946045, + "learning_rate": 3.653011378613458e-06, + "loss": 0.2943, + "step": 29933 + }, + { + "epoch": 0.5992342917198409, + "grad_norm": 1.2417691946029663, + "learning_rate": 3.652699183808996e-06, + "loss": 0.2876, + "step": 29934 + }, + { + "epoch": 0.5992543102369692, + "grad_norm": 1.0035110712051392, + "learning_rate": 3.6523869946682967e-06, + "loss": 0.2498, + "step": 29935 + }, + { + "epoch": 0.5992743287540976, + "grad_norm": 1.0510907173156738, + "learning_rate": 3.6520748111926694e-06, + "loss": 0.3387, + "step": 29936 + }, + { + "epoch": 0.5992943472712259, + "grad_norm": 1.407206416130066, + "learning_rate": 3.651762633383432e-06, + "loss": 0.3336, + "step": 29937 + }, + { + "epoch": 0.5993143657883542, + "grad_norm": 1.1659722328186035, + "learning_rate": 3.6514504612418943e-06, + "loss": 0.2953, + "step": 29938 + }, + { + "epoch": 0.5993343843054826, + "grad_norm": 1.1017558574676514, + "learning_rate": 3.6511382947693684e-06, + "loss": 0.3393, + "step": 29939 + }, + { + "epoch": 0.5993544028226109, + "grad_norm": 1.0854686498641968, + "learning_rate": 3.650826133967167e-06, + "loss": 0.3222, + "step": 29940 + }, + { + "epoch": 0.5993744213397393, + "grad_norm": 1.2618916034698486, + "learning_rate": 3.6505139788366006e-06, + "loss": 0.2944, + "step": 29941 + }, + { + "epoch": 0.5993944398568676, + "grad_norm": 1.9309574365615845, + "learning_rate": 3.650201829378985e-06, + "loss": 0.7251, + "step": 29942 + }, + { + "epoch": 0.599414458373996, + "grad_norm": 1.0364775657653809, + "learning_rate": 3.64988968559563e-06, + "loss": 0.2466, + "step": 29943 + }, + { + "epoch": 0.5994344768911243, + "grad_norm": 1.1819896697998047, + "learning_rate": 3.6495775474878475e-06, + "loss": 0.277, + "step": 29944 + }, + { + "epoch": 0.5994544954082527, + "grad_norm": 1.1302781105041504, + "learning_rate": 3.6492654150569506e-06, + "loss": 0.2886, + "step": 29945 + }, + { + "epoch": 0.599474513925381, + "grad_norm": 1.076965093612671, + "learning_rate": 3.648953288304251e-06, + "loss": 0.3157, + "step": 29946 + }, + { + "epoch": 0.5994945324425093, + "grad_norm": 1.26080322265625, + "learning_rate": 3.6486411672310617e-06, + "loss": 0.3414, + "step": 29947 + }, + { + "epoch": 0.5995145509596377, + "grad_norm": 1.2288522720336914, + "learning_rate": 3.648329051838695e-06, + "loss": 0.3433, + "step": 29948 + }, + { + "epoch": 0.599534569476766, + "grad_norm": 1.2907249927520752, + "learning_rate": 3.648016942128459e-06, + "loss": 0.3482, + "step": 29949 + }, + { + "epoch": 0.5995545879938944, + "grad_norm": 1.2591904401779175, + "learning_rate": 3.6477048381016713e-06, + "loss": 0.2875, + "step": 29950 + }, + { + "epoch": 0.5995746065110227, + "grad_norm": 1.0971858501434326, + "learning_rate": 3.647392739759642e-06, + "loss": 0.2774, + "step": 29951 + }, + { + "epoch": 0.5995946250281511, + "grad_norm": 1.1402218341827393, + "learning_rate": 3.6470806471036806e-06, + "loss": 0.284, + "step": 29952 + }, + { + "epoch": 0.5996146435452794, + "grad_norm": 1.1796460151672363, + "learning_rate": 3.646768560135102e-06, + "loss": 0.341, + "step": 29953 + }, + { + "epoch": 0.5996346620624077, + "grad_norm": 1.2525593042373657, + "learning_rate": 3.6464564788552157e-06, + "loss": 0.3066, + "step": 29954 + }, + { + "epoch": 0.5996546805795361, + "grad_norm": 1.1043678522109985, + "learning_rate": 3.646144403265337e-06, + "loss": 0.3223, + "step": 29955 + }, + { + "epoch": 0.5996746990966644, + "grad_norm": 1.7923126220703125, + "learning_rate": 3.645832333366776e-06, + "loss": 0.7189, + "step": 29956 + }, + { + "epoch": 0.5996947176137928, + "grad_norm": 1.324328064918518, + "learning_rate": 3.6455202691608437e-06, + "loss": 0.2446, + "step": 29957 + }, + { + "epoch": 0.5997147361309211, + "grad_norm": 1.0429505109786987, + "learning_rate": 3.6452082106488516e-06, + "loss": 0.3589, + "step": 29958 + }, + { + "epoch": 0.5997347546480495, + "grad_norm": 1.047493577003479, + "learning_rate": 3.644896157832114e-06, + "loss": 0.2416, + "step": 29959 + }, + { + "epoch": 0.5997547731651778, + "grad_norm": 1.3051362037658691, + "learning_rate": 3.6445841107119417e-06, + "loss": 0.323, + "step": 29960 + }, + { + "epoch": 0.5997747916823062, + "grad_norm": 1.1801341772079468, + "learning_rate": 3.644272069289646e-06, + "loss": 0.2562, + "step": 29961 + }, + { + "epoch": 0.5997948101994345, + "grad_norm": 1.0644850730895996, + "learning_rate": 3.6439600335665388e-06, + "loss": 0.3095, + "step": 29962 + }, + { + "epoch": 0.5998148287165628, + "grad_norm": 1.1288725137710571, + "learning_rate": 3.6436480035439297e-06, + "loss": 0.2901, + "step": 29963 + }, + { + "epoch": 0.5998348472336912, + "grad_norm": 1.0062861442565918, + "learning_rate": 3.643335979223135e-06, + "loss": 0.2569, + "step": 29964 + }, + { + "epoch": 0.5998548657508195, + "grad_norm": 1.1890759468078613, + "learning_rate": 3.643023960605462e-06, + "loss": 0.287, + "step": 29965 + }, + { + "epoch": 0.5998748842679479, + "grad_norm": 1.0126138925552368, + "learning_rate": 3.6427119476922255e-06, + "loss": 0.3061, + "step": 29966 + }, + { + "epoch": 0.5998949027850762, + "grad_norm": 1.8273131847381592, + "learning_rate": 3.6423999404847343e-06, + "loss": 0.7494, + "step": 29967 + }, + { + "epoch": 0.5999149213022046, + "grad_norm": 1.1578738689422607, + "learning_rate": 3.6420879389843034e-06, + "loss": 0.3133, + "step": 29968 + }, + { + "epoch": 0.5999349398193329, + "grad_norm": 1.0794310569763184, + "learning_rate": 3.6417759431922427e-06, + "loss": 0.2832, + "step": 29969 + }, + { + "epoch": 0.5999549583364612, + "grad_norm": 1.0990549325942993, + "learning_rate": 3.641463953109863e-06, + "loss": 0.3153, + "step": 29970 + }, + { + "epoch": 0.5999749768535896, + "grad_norm": 1.057934045791626, + "learning_rate": 3.6411519687384766e-06, + "loss": 0.2905, + "step": 29971 + }, + { + "epoch": 0.5999949953707179, + "grad_norm": 1.3962937593460083, + "learning_rate": 3.640839990079393e-06, + "loss": 0.2782, + "step": 29972 + }, + { + "epoch": 0.6000150138878463, + "grad_norm": 1.111667275428772, + "learning_rate": 3.6405280171339286e-06, + "loss": 0.3018, + "step": 29973 + }, + { + "epoch": 0.6000350324049746, + "grad_norm": 1.0686419010162354, + "learning_rate": 3.640216049903391e-06, + "loss": 0.2822, + "step": 29974 + }, + { + "epoch": 0.600055050922103, + "grad_norm": 1.3152538537979126, + "learning_rate": 3.6399040883890923e-06, + "loss": 0.3307, + "step": 29975 + }, + { + "epoch": 0.6000750694392313, + "grad_norm": 1.0569884777069092, + "learning_rate": 3.639592132592343e-06, + "loss": 0.3059, + "step": 29976 + }, + { + "epoch": 0.6000950879563597, + "grad_norm": 1.0815280675888062, + "learning_rate": 3.639280182514457e-06, + "loss": 0.2953, + "step": 29977 + }, + { + "epoch": 0.600115106473488, + "grad_norm": 1.2644448280334473, + "learning_rate": 3.638968238156743e-06, + "loss": 0.2828, + "step": 29978 + }, + { + "epoch": 0.6001351249906163, + "grad_norm": 1.240125298500061, + "learning_rate": 3.6386562995205153e-06, + "loss": 0.3551, + "step": 29979 + }, + { + "epoch": 0.6001551435077447, + "grad_norm": 1.8979952335357666, + "learning_rate": 3.638344366607081e-06, + "loss": 0.738, + "step": 29980 + }, + { + "epoch": 0.600175162024873, + "grad_norm": 1.2468507289886475, + "learning_rate": 3.638032439417757e-06, + "loss": 0.3038, + "step": 29981 + }, + { + "epoch": 0.6001951805420014, + "grad_norm": 1.0551599264144897, + "learning_rate": 3.63772051795385e-06, + "loss": 0.3061, + "step": 29982 + }, + { + "epoch": 0.6002151990591297, + "grad_norm": 1.0739257335662842, + "learning_rate": 3.6374086022166725e-06, + "loss": 0.2737, + "step": 29983 + }, + { + "epoch": 0.6002352175762581, + "grad_norm": 1.1867696046829224, + "learning_rate": 3.637096692207537e-06, + "loss": 0.2954, + "step": 29984 + }, + { + "epoch": 0.6002552360933864, + "grad_norm": 1.0264767408370972, + "learning_rate": 3.636784787927752e-06, + "loss": 0.295, + "step": 29985 + }, + { + "epoch": 0.6002752546105147, + "grad_norm": 1.0922716856002808, + "learning_rate": 3.6364728893786323e-06, + "loss": 0.278, + "step": 29986 + }, + { + "epoch": 0.6002952731276431, + "grad_norm": 0.9796527028083801, + "learning_rate": 3.6361609965614863e-06, + "loss": 0.2844, + "step": 29987 + }, + { + "epoch": 0.6003152916447714, + "grad_norm": 1.1323140859603882, + "learning_rate": 3.635849109477627e-06, + "loss": 0.296, + "step": 29988 + }, + { + "epoch": 0.6003353101618998, + "grad_norm": 1.13128662109375, + "learning_rate": 3.635537228128362e-06, + "loss": 0.3552, + "step": 29989 + }, + { + "epoch": 0.6003553286790281, + "grad_norm": 1.0770671367645264, + "learning_rate": 3.635225352515007e-06, + "loss": 0.2737, + "step": 29990 + }, + { + "epoch": 0.6003753471961565, + "grad_norm": 1.070867657661438, + "learning_rate": 3.63491348263887e-06, + "loss": 0.3329, + "step": 29991 + }, + { + "epoch": 0.6003953657132848, + "grad_norm": 1.0402804613113403, + "learning_rate": 3.634601618501264e-06, + "loss": 0.3227, + "step": 29992 + }, + { + "epoch": 0.6004153842304132, + "grad_norm": 1.1547956466674805, + "learning_rate": 3.6342897601034987e-06, + "loss": 0.3094, + "step": 29993 + }, + { + "epoch": 0.6004354027475415, + "grad_norm": 1.3509958982467651, + "learning_rate": 3.633977907446883e-06, + "loss": 0.2891, + "step": 29994 + }, + { + "epoch": 0.6004554212646698, + "grad_norm": 1.204087257385254, + "learning_rate": 3.6336660605327326e-06, + "loss": 0.3025, + "step": 29995 + }, + { + "epoch": 0.6004754397817982, + "grad_norm": 1.1345996856689453, + "learning_rate": 3.633354219362355e-06, + "loss": 0.3065, + "step": 29996 + }, + { + "epoch": 0.6004954582989265, + "grad_norm": 1.1719449758529663, + "learning_rate": 3.633042383937063e-06, + "loss": 0.3645, + "step": 29997 + }, + { + "epoch": 0.6005154768160549, + "grad_norm": 1.1709753274917603, + "learning_rate": 3.6327305542581655e-06, + "loss": 0.2808, + "step": 29998 + }, + { + "epoch": 0.6005354953331832, + "grad_norm": 1.1019058227539062, + "learning_rate": 3.632418730326976e-06, + "loss": 0.3427, + "step": 29999 + }, + { + "epoch": 0.6005555138503116, + "grad_norm": 1.0846952199935913, + "learning_rate": 3.6321069121448037e-06, + "loss": 0.3241, + "step": 30000 + }, + { + "epoch": 0.6005755323674399, + "grad_norm": 1.0778000354766846, + "learning_rate": 3.6317950997129593e-06, + "loss": 0.3026, + "step": 30001 + }, + { + "epoch": 0.6005955508845682, + "grad_norm": 1.026973843574524, + "learning_rate": 3.6314832930327536e-06, + "loss": 0.2583, + "step": 30002 + }, + { + "epoch": 0.6006155694016966, + "grad_norm": 1.045087456703186, + "learning_rate": 3.631171492105497e-06, + "loss": 0.2828, + "step": 30003 + }, + { + "epoch": 0.6006355879188249, + "grad_norm": 1.8203710317611694, + "learning_rate": 3.6308596969325004e-06, + "loss": 0.8286, + "step": 30004 + }, + { + "epoch": 0.6006556064359533, + "grad_norm": 1.8625344038009644, + "learning_rate": 3.6305479075150774e-06, + "loss": 0.8379, + "step": 30005 + }, + { + "epoch": 0.6006756249530816, + "grad_norm": 0.976119875907898, + "learning_rate": 3.6302361238545354e-06, + "loss": 0.2402, + "step": 30006 + }, + { + "epoch": 0.60069564347021, + "grad_norm": 1.066076397895813, + "learning_rate": 3.629924345952184e-06, + "loss": 0.3047, + "step": 30007 + }, + { + "epoch": 0.6007156619873383, + "grad_norm": 0.9848458766937256, + "learning_rate": 3.629612573809338e-06, + "loss": 0.2939, + "step": 30008 + }, + { + "epoch": 0.6007356805044667, + "grad_norm": 1.1127578020095825, + "learning_rate": 3.6293008074273046e-06, + "loss": 0.2909, + "step": 30009 + }, + { + "epoch": 0.600755699021595, + "grad_norm": 1.1059452295303345, + "learning_rate": 3.6289890468073966e-06, + "loss": 0.3144, + "step": 30010 + }, + { + "epoch": 0.6007757175387233, + "grad_norm": 1.0747885704040527, + "learning_rate": 3.6286772919509215e-06, + "loss": 0.3662, + "step": 30011 + }, + { + "epoch": 0.6007957360558517, + "grad_norm": 1.0832624435424805, + "learning_rate": 3.6283655428591947e-06, + "loss": 0.3053, + "step": 30012 + }, + { + "epoch": 0.60081575457298, + "grad_norm": 1.1774777173995972, + "learning_rate": 3.6280537995335235e-06, + "loss": 0.2743, + "step": 30013 + }, + { + "epoch": 0.6008357730901084, + "grad_norm": 1.0618034601211548, + "learning_rate": 3.627742061975219e-06, + "loss": 0.3059, + "step": 30014 + }, + { + "epoch": 0.6008557916072367, + "grad_norm": 1.1234596967697144, + "learning_rate": 3.6274303301855907e-06, + "loss": 0.3373, + "step": 30015 + }, + { + "epoch": 0.6008758101243651, + "grad_norm": 1.1520540714263916, + "learning_rate": 3.627118604165949e-06, + "loss": 0.3059, + "step": 30016 + }, + { + "epoch": 0.6008958286414934, + "grad_norm": 1.0930066108703613, + "learning_rate": 3.6268068839176063e-06, + "loss": 0.3449, + "step": 30017 + }, + { + "epoch": 0.6009158471586217, + "grad_norm": 1.0854792594909668, + "learning_rate": 3.6264951694418726e-06, + "loss": 0.2577, + "step": 30018 + }, + { + "epoch": 0.6009358656757501, + "grad_norm": 1.1347094774246216, + "learning_rate": 3.626183460740057e-06, + "loss": 0.2774, + "step": 30019 + }, + { + "epoch": 0.6009558841928784, + "grad_norm": 1.1755092144012451, + "learning_rate": 3.6258717578134695e-06, + "loss": 0.3444, + "step": 30020 + }, + { + "epoch": 0.6009759027100068, + "grad_norm": 1.0583146810531616, + "learning_rate": 3.625560060663423e-06, + "loss": 0.2854, + "step": 30021 + }, + { + "epoch": 0.6009959212271351, + "grad_norm": 1.197796106338501, + "learning_rate": 3.625248369291225e-06, + "loss": 0.2522, + "step": 30022 + }, + { + "epoch": 0.6010159397442635, + "grad_norm": 1.010938048362732, + "learning_rate": 3.624936683698188e-06, + "loss": 0.2764, + "step": 30023 + }, + { + "epoch": 0.6010359582613918, + "grad_norm": 1.0734292268753052, + "learning_rate": 3.6246250038856214e-06, + "loss": 0.3018, + "step": 30024 + }, + { + "epoch": 0.6010559767785202, + "grad_norm": 1.1022709608078003, + "learning_rate": 3.6243133298548334e-06, + "loss": 0.3059, + "step": 30025 + }, + { + "epoch": 0.6010759952956485, + "grad_norm": 1.1840124130249023, + "learning_rate": 3.6240016616071383e-06, + "loss": 0.2818, + "step": 30026 + }, + { + "epoch": 0.6010960138127768, + "grad_norm": 1.2232664823532104, + "learning_rate": 3.6236899991438435e-06, + "loss": 0.2824, + "step": 30027 + }, + { + "epoch": 0.6011160323299052, + "grad_norm": 1.0386755466461182, + "learning_rate": 3.6233783424662593e-06, + "loss": 0.2557, + "step": 30028 + }, + { + "epoch": 0.6011360508470335, + "grad_norm": 1.196542501449585, + "learning_rate": 3.623066691575696e-06, + "loss": 0.3184, + "step": 30029 + }, + { + "epoch": 0.6011560693641619, + "grad_norm": 1.0469220876693726, + "learning_rate": 3.622755046473464e-06, + "loss": 0.3094, + "step": 30030 + }, + { + "epoch": 0.6011760878812902, + "grad_norm": 1.0498027801513672, + "learning_rate": 3.622443407160875e-06, + "loss": 0.2994, + "step": 30031 + }, + { + "epoch": 0.6011961063984186, + "grad_norm": 1.055261254310608, + "learning_rate": 3.622131773639237e-06, + "loss": 0.2867, + "step": 30032 + }, + { + "epoch": 0.6012161249155469, + "grad_norm": 1.0404558181762695, + "learning_rate": 3.6218201459098577e-06, + "loss": 0.2848, + "step": 30033 + }, + { + "epoch": 0.6012361434326752, + "grad_norm": 1.228640079498291, + "learning_rate": 3.6215085239740532e-06, + "loss": 0.3358, + "step": 30034 + }, + { + "epoch": 0.6012561619498036, + "grad_norm": 1.941479206085205, + "learning_rate": 3.621196907833129e-06, + "loss": 0.7903, + "step": 30035 + }, + { + "epoch": 0.6012761804669319, + "grad_norm": 1.0456854104995728, + "learning_rate": 3.620885297488397e-06, + "loss": 0.2916, + "step": 30036 + }, + { + "epoch": 0.6012961989840603, + "grad_norm": 1.0547873973846436, + "learning_rate": 3.6205736929411662e-06, + "loss": 0.3431, + "step": 30037 + }, + { + "epoch": 0.6013162175011886, + "grad_norm": 1.0997731685638428, + "learning_rate": 3.6202620941927447e-06, + "loss": 0.2864, + "step": 30038 + }, + { + "epoch": 0.601336236018317, + "grad_norm": 1.0862394571304321, + "learning_rate": 3.619950501244447e-06, + "loss": 0.3189, + "step": 30039 + }, + { + "epoch": 0.6013562545354453, + "grad_norm": 1.1047122478485107, + "learning_rate": 3.6196389140975794e-06, + "loss": 0.2969, + "step": 30040 + }, + { + "epoch": 0.6013762730525737, + "grad_norm": 1.0738701820373535, + "learning_rate": 3.619327332753453e-06, + "loss": 0.3103, + "step": 30041 + }, + { + "epoch": 0.601396291569702, + "grad_norm": 1.9813092947006226, + "learning_rate": 3.619015757213377e-06, + "loss": 0.7278, + "step": 30042 + }, + { + "epoch": 0.6014163100868303, + "grad_norm": 1.046912431716919, + "learning_rate": 3.618704187478661e-06, + "loss": 0.2687, + "step": 30043 + }, + { + "epoch": 0.6014363286039587, + "grad_norm": 1.2773325443267822, + "learning_rate": 3.6183926235506167e-06, + "loss": 0.3219, + "step": 30044 + }, + { + "epoch": 0.601456347121087, + "grad_norm": 1.0819200277328491, + "learning_rate": 3.6180810654305527e-06, + "loss": 0.2808, + "step": 30045 + }, + { + "epoch": 0.6014763656382154, + "grad_norm": 1.3122586011886597, + "learning_rate": 3.617769513119777e-06, + "loss": 0.3164, + "step": 30046 + }, + { + "epoch": 0.6014963841553437, + "grad_norm": 1.1523268222808838, + "learning_rate": 3.6174579666196007e-06, + "loss": 0.3073, + "step": 30047 + }, + { + "epoch": 0.6015164026724721, + "grad_norm": 1.2221684455871582, + "learning_rate": 3.6171464259313346e-06, + "loss": 0.3327, + "step": 30048 + }, + { + "epoch": 0.6015364211896004, + "grad_norm": 1.0221827030181885, + "learning_rate": 3.6168348910562878e-06, + "loss": 0.3011, + "step": 30049 + }, + { + "epoch": 0.6015564397067287, + "grad_norm": 1.8241583108901978, + "learning_rate": 3.6165233619957698e-06, + "loss": 0.7377, + "step": 30050 + }, + { + "epoch": 0.6015764582238571, + "grad_norm": 1.8791991472244263, + "learning_rate": 3.616211838751087e-06, + "loss": 0.789, + "step": 30051 + }, + { + "epoch": 0.6015964767409854, + "grad_norm": 1.139631748199463, + "learning_rate": 3.6159003213235545e-06, + "loss": 0.3055, + "step": 30052 + }, + { + "epoch": 0.6016164952581138, + "grad_norm": 1.1793159246444702, + "learning_rate": 3.6155888097144788e-06, + "loss": 0.2975, + "step": 30053 + }, + { + "epoch": 0.6016365137752421, + "grad_norm": 1.1543296575546265, + "learning_rate": 3.6152773039251686e-06, + "loss": 0.3335, + "step": 30054 + }, + { + "epoch": 0.6016565322923705, + "grad_norm": 1.0920755863189697, + "learning_rate": 3.614965803956936e-06, + "loss": 0.2652, + "step": 30055 + }, + { + "epoch": 0.6016765508094988, + "grad_norm": 1.2546387910842896, + "learning_rate": 3.6146543098110875e-06, + "loss": 0.3021, + "step": 30056 + }, + { + "epoch": 0.6016965693266272, + "grad_norm": 1.189011812210083, + "learning_rate": 3.6143428214889354e-06, + "loss": 0.328, + "step": 30057 + }, + { + "epoch": 0.6017165878437555, + "grad_norm": 1.2139332294464111, + "learning_rate": 3.614031338991788e-06, + "loss": 0.2922, + "step": 30058 + }, + { + "epoch": 0.6017366063608838, + "grad_norm": 0.9877148270606995, + "learning_rate": 3.613719862320954e-06, + "loss": 0.3019, + "step": 30059 + }, + { + "epoch": 0.6017566248780122, + "grad_norm": 1.2251132726669312, + "learning_rate": 3.6134083914777427e-06, + "loss": 0.3351, + "step": 30060 + }, + { + "epoch": 0.6017766433951405, + "grad_norm": 1.0501024723052979, + "learning_rate": 3.6130969264634653e-06, + "loss": 0.2806, + "step": 30061 + }, + { + "epoch": 0.6017966619122689, + "grad_norm": 1.2373197078704834, + "learning_rate": 3.6127854672794305e-06, + "loss": 0.2861, + "step": 30062 + }, + { + "epoch": 0.6018166804293972, + "grad_norm": 1.0980101823806763, + "learning_rate": 3.6124740139269465e-06, + "loss": 0.2562, + "step": 30063 + }, + { + "epoch": 0.6018366989465256, + "grad_norm": 1.1508662700653076, + "learning_rate": 3.612162566407321e-06, + "loss": 0.2834, + "step": 30064 + }, + { + "epoch": 0.6018567174636539, + "grad_norm": 1.1378926038742065, + "learning_rate": 3.611851124721868e-06, + "loss": 0.3188, + "step": 30065 + }, + { + "epoch": 0.6018767359807822, + "grad_norm": 1.3010241985321045, + "learning_rate": 3.6115396888718947e-06, + "loss": 0.3139, + "step": 30066 + }, + { + "epoch": 0.6018967544979106, + "grad_norm": 1.1769884824752808, + "learning_rate": 3.6112282588587077e-06, + "loss": 0.29, + "step": 30067 + }, + { + "epoch": 0.6019167730150389, + "grad_norm": 1.1357495784759521, + "learning_rate": 3.61091683468362e-06, + "loss": 0.3481, + "step": 30068 + }, + { + "epoch": 0.6019367915321673, + "grad_norm": 1.3158584833145142, + "learning_rate": 3.610605416347936e-06, + "loss": 0.3365, + "step": 30069 + }, + { + "epoch": 0.6019568100492956, + "grad_norm": 0.9681852459907532, + "learning_rate": 3.6102940038529706e-06, + "loss": 0.2733, + "step": 30070 + }, + { + "epoch": 0.601976828566424, + "grad_norm": 1.128468632698059, + "learning_rate": 3.6099825972000303e-06, + "loss": 0.32, + "step": 30071 + }, + { + "epoch": 0.6019968470835523, + "grad_norm": 1.1345558166503906, + "learning_rate": 3.6096711963904217e-06, + "loss": 0.325, + "step": 30072 + }, + { + "epoch": 0.6020168656006807, + "grad_norm": 1.139445424079895, + "learning_rate": 3.6093598014254578e-06, + "loss": 0.3137, + "step": 30073 + }, + { + "epoch": 0.602036884117809, + "grad_norm": 1.0849535465240479, + "learning_rate": 3.609048412306445e-06, + "loss": 0.2891, + "step": 30074 + }, + { + "epoch": 0.6020569026349373, + "grad_norm": 2.0757057666778564, + "learning_rate": 3.608737029034695e-06, + "loss": 0.7666, + "step": 30075 + }, + { + "epoch": 0.6020769211520657, + "grad_norm": 1.1846741437911987, + "learning_rate": 3.6084256516115146e-06, + "loss": 0.286, + "step": 30076 + }, + { + "epoch": 0.602096939669194, + "grad_norm": 1.0692839622497559, + "learning_rate": 3.6081142800382143e-06, + "loss": 0.3477, + "step": 30077 + }, + { + "epoch": 0.6021169581863224, + "grad_norm": 1.9417489767074585, + "learning_rate": 3.6078029143160985e-06, + "loss": 0.7332, + "step": 30078 + }, + { + "epoch": 0.6021369767034507, + "grad_norm": 1.1151304244995117, + "learning_rate": 3.6074915544464828e-06, + "loss": 0.3105, + "step": 30079 + }, + { + "epoch": 0.6021569952205791, + "grad_norm": 1.0877922773361206, + "learning_rate": 3.6071802004306715e-06, + "loss": 0.2891, + "step": 30080 + }, + { + "epoch": 0.6021770137377074, + "grad_norm": 1.0875452756881714, + "learning_rate": 3.606868852269976e-06, + "loss": 0.3277, + "step": 30081 + }, + { + "epoch": 0.6021970322548357, + "grad_norm": 1.0481865406036377, + "learning_rate": 3.6065575099657017e-06, + "loss": 0.2774, + "step": 30082 + }, + { + "epoch": 0.6022170507719641, + "grad_norm": 1.0977863073349, + "learning_rate": 3.606246173519162e-06, + "loss": 0.3096, + "step": 30083 + }, + { + "epoch": 0.6022370692890924, + "grad_norm": 1.1143022775650024, + "learning_rate": 3.605934842931664e-06, + "loss": 0.2914, + "step": 30084 + }, + { + "epoch": 0.6022570878062208, + "grad_norm": 1.0651823282241821, + "learning_rate": 3.6056235182045142e-06, + "loss": 0.3401, + "step": 30085 + }, + { + "epoch": 0.6022771063233491, + "grad_norm": 1.226201057434082, + "learning_rate": 3.6053121993390244e-06, + "loss": 0.3461, + "step": 30086 + }, + { + "epoch": 0.6022971248404775, + "grad_norm": 1.1208746433258057, + "learning_rate": 3.6050008863364993e-06, + "loss": 0.2818, + "step": 30087 + }, + { + "epoch": 0.6023171433576058, + "grad_norm": 1.222066879272461, + "learning_rate": 3.6046895791982535e-06, + "loss": 0.306, + "step": 30088 + }, + { + "epoch": 0.6023371618747342, + "grad_norm": 1.0969539880752563, + "learning_rate": 3.6043782779255915e-06, + "loss": 0.3046, + "step": 30089 + }, + { + "epoch": 0.6023571803918625, + "grad_norm": 2.0934274196624756, + "learning_rate": 3.604066982519823e-06, + "loss": 0.8795, + "step": 30090 + }, + { + "epoch": 0.6023771989089908, + "grad_norm": 1.135451078414917, + "learning_rate": 3.603755692982255e-06, + "loss": 0.2346, + "step": 30091 + }, + { + "epoch": 0.6023972174261192, + "grad_norm": 1.076579213142395, + "learning_rate": 3.603444409314199e-06, + "loss": 0.2961, + "step": 30092 + }, + { + "epoch": 0.6024172359432475, + "grad_norm": 1.033591866493225, + "learning_rate": 3.6031331315169614e-06, + "loss": 0.2883, + "step": 30093 + }, + { + "epoch": 0.6024372544603759, + "grad_norm": 1.0656168460845947, + "learning_rate": 3.6028218595918522e-06, + "loss": 0.2976, + "step": 30094 + }, + { + "epoch": 0.6024572729775042, + "grad_norm": 1.0308916568756104, + "learning_rate": 3.602510593540178e-06, + "loss": 0.2976, + "step": 30095 + }, + { + "epoch": 0.6024772914946326, + "grad_norm": 1.2116241455078125, + "learning_rate": 3.60219933336325e-06, + "loss": 0.3195, + "step": 30096 + }, + { + "epoch": 0.6024973100117609, + "grad_norm": 1.1857470273971558, + "learning_rate": 3.601888079062375e-06, + "loss": 0.2745, + "step": 30097 + }, + { + "epoch": 0.6025173285288892, + "grad_norm": 1.1434773206710815, + "learning_rate": 3.601576830638861e-06, + "loss": 0.3036, + "step": 30098 + }, + { + "epoch": 0.6025373470460176, + "grad_norm": 1.12063729763031, + "learning_rate": 3.601265588094018e-06, + "loss": 0.34, + "step": 30099 + }, + { + "epoch": 0.6025573655631459, + "grad_norm": 1.089725136756897, + "learning_rate": 3.6009543514291514e-06, + "loss": 0.3368, + "step": 30100 + }, + { + "epoch": 0.6025773840802743, + "grad_norm": 1.0976450443267822, + "learning_rate": 3.6006431206455734e-06, + "loss": 0.3084, + "step": 30101 + }, + { + "epoch": 0.6025974025974026, + "grad_norm": 1.0465128421783447, + "learning_rate": 3.600331895744591e-06, + "loss": 0.2842, + "step": 30102 + }, + { + "epoch": 0.602617421114531, + "grad_norm": 1.2375855445861816, + "learning_rate": 3.6000206767275114e-06, + "loss": 0.3001, + "step": 30103 + }, + { + "epoch": 0.6026374396316593, + "grad_norm": 1.0741641521453857, + "learning_rate": 3.599709463595642e-06, + "loss": 0.3203, + "step": 30104 + }, + { + "epoch": 0.6026574581487877, + "grad_norm": 1.0164035558700562, + "learning_rate": 3.599398256350295e-06, + "loss": 0.3358, + "step": 30105 + }, + { + "epoch": 0.602677476665916, + "grad_norm": 1.076224446296692, + "learning_rate": 3.599087054992775e-06, + "loss": 0.2925, + "step": 30106 + }, + { + "epoch": 0.6026974951830443, + "grad_norm": 1.1891223192214966, + "learning_rate": 3.5987758595243925e-06, + "loss": 0.2917, + "step": 30107 + }, + { + "epoch": 0.6027175137001727, + "grad_norm": 1.0449578762054443, + "learning_rate": 3.5984646699464555e-06, + "loss": 0.2746, + "step": 30108 + }, + { + "epoch": 0.602737532217301, + "grad_norm": 1.1492918729782104, + "learning_rate": 3.5981534862602684e-06, + "loss": 0.2997, + "step": 30109 + }, + { + "epoch": 0.6027575507344294, + "grad_norm": 1.133225679397583, + "learning_rate": 3.5978423084671444e-06, + "loss": 0.3163, + "step": 30110 + }, + { + "epoch": 0.6027775692515577, + "grad_norm": 2.1861329078674316, + "learning_rate": 3.597531136568389e-06, + "loss": 0.7727, + "step": 30111 + }, + { + "epoch": 0.6027975877686861, + "grad_norm": 1.2048544883728027, + "learning_rate": 3.597219970565312e-06, + "loss": 0.3469, + "step": 30112 + }, + { + "epoch": 0.6028176062858144, + "grad_norm": 2.034512996673584, + "learning_rate": 3.5969088104592174e-06, + "loss": 0.8117, + "step": 30113 + }, + { + "epoch": 0.6028376248029427, + "grad_norm": 1.0113797187805176, + "learning_rate": 3.5965976562514193e-06, + "loss": 0.2584, + "step": 30114 + }, + { + "epoch": 0.6028576433200711, + "grad_norm": 1.1104406118392944, + "learning_rate": 3.596286507943223e-06, + "loss": 0.2969, + "step": 30115 + }, + { + "epoch": 0.6028776618371994, + "grad_norm": 1.0766857862472534, + "learning_rate": 3.5959753655359354e-06, + "loss": 0.2904, + "step": 30116 + }, + { + "epoch": 0.6028976803543278, + "grad_norm": 1.2755521535873413, + "learning_rate": 3.595664229030864e-06, + "loss": 0.3145, + "step": 30117 + }, + { + "epoch": 0.6029176988714561, + "grad_norm": 1.2327890396118164, + "learning_rate": 3.5953530984293185e-06, + "loss": 0.3184, + "step": 30118 + }, + { + "epoch": 0.6029377173885845, + "grad_norm": 1.1994811296463013, + "learning_rate": 3.595041973732606e-06, + "loss": 0.3081, + "step": 30119 + }, + { + "epoch": 0.6029577359057128, + "grad_norm": 1.212541937828064, + "learning_rate": 3.5947308549420357e-06, + "loss": 0.2995, + "step": 30120 + }, + { + "epoch": 0.6029777544228412, + "grad_norm": 1.062929630279541, + "learning_rate": 3.5944197420589146e-06, + "loss": 0.3222, + "step": 30121 + }, + { + "epoch": 0.6029977729399695, + "grad_norm": 1.1848483085632324, + "learning_rate": 3.5941086350845476e-06, + "loss": 0.296, + "step": 30122 + }, + { + "epoch": 0.6030177914570978, + "grad_norm": 1.022305965423584, + "learning_rate": 3.593797534020248e-06, + "loss": 0.3026, + "step": 30123 + }, + { + "epoch": 0.6030378099742262, + "grad_norm": 1.160748839378357, + "learning_rate": 3.5934864388673204e-06, + "loss": 0.2778, + "step": 30124 + }, + { + "epoch": 0.6030578284913545, + "grad_norm": 1.1835829019546509, + "learning_rate": 3.5931753496270737e-06, + "loss": 0.2931, + "step": 30125 + }, + { + "epoch": 0.6030778470084829, + "grad_norm": 1.1273250579833984, + "learning_rate": 3.592864266300813e-06, + "loss": 0.3242, + "step": 30126 + }, + { + "epoch": 0.6030978655256112, + "grad_norm": 1.968268632888794, + "learning_rate": 3.59255318888985e-06, + "loss": 0.6984, + "step": 30127 + }, + { + "epoch": 0.6031178840427396, + "grad_norm": 1.2102797031402588, + "learning_rate": 3.5922421173954906e-06, + "loss": 0.2826, + "step": 30128 + }, + { + "epoch": 0.6031379025598679, + "grad_norm": 1.0359601974487305, + "learning_rate": 3.591931051819043e-06, + "loss": 0.2655, + "step": 30129 + }, + { + "epoch": 0.6031579210769962, + "grad_norm": 1.0854811668395996, + "learning_rate": 3.5916199921618122e-06, + "loss": 0.288, + "step": 30130 + }, + { + "epoch": 0.6031779395941246, + "grad_norm": 1.0937567949295044, + "learning_rate": 3.5913089384251087e-06, + "loss": 0.3086, + "step": 30131 + }, + { + "epoch": 0.6031979581112529, + "grad_norm": 1.1168479919433594, + "learning_rate": 3.5909978906102387e-06, + "loss": 0.2981, + "step": 30132 + }, + { + "epoch": 0.6032179766283813, + "grad_norm": 1.068384051322937, + "learning_rate": 3.590686848718512e-06, + "loss": 0.2906, + "step": 30133 + }, + { + "epoch": 0.6032379951455096, + "grad_norm": 1.0005749464035034, + "learning_rate": 3.590375812751234e-06, + "loss": 0.3045, + "step": 30134 + }, + { + "epoch": 0.603258013662638, + "grad_norm": 1.2658634185791016, + "learning_rate": 3.5900647827097108e-06, + "loss": 0.3125, + "step": 30135 + }, + { + "epoch": 0.6032780321797663, + "grad_norm": 1.231981635093689, + "learning_rate": 3.5897537585952534e-06, + "loss": 0.3029, + "step": 30136 + }, + { + "epoch": 0.6032980506968946, + "grad_norm": 1.1324059963226318, + "learning_rate": 3.5894427404091677e-06, + "loss": 0.3171, + "step": 30137 + }, + { + "epoch": 0.603318069214023, + "grad_norm": 1.140060544013977, + "learning_rate": 3.5891317281527615e-06, + "loss": 0.3338, + "step": 30138 + }, + { + "epoch": 0.6033380877311513, + "grad_norm": 1.1774287223815918, + "learning_rate": 3.588820721827342e-06, + "loss": 0.331, + "step": 30139 + }, + { + "epoch": 0.6033581062482797, + "grad_norm": 1.2081751823425293, + "learning_rate": 3.5885097214342145e-06, + "loss": 0.3108, + "step": 30140 + }, + { + "epoch": 0.603378124765408, + "grad_norm": 1.1188666820526123, + "learning_rate": 3.588198726974691e-06, + "loss": 0.2707, + "step": 30141 + }, + { + "epoch": 0.6033981432825364, + "grad_norm": 1.0119884014129639, + "learning_rate": 3.5878877384500754e-06, + "loss": 0.2936, + "step": 30142 + }, + { + "epoch": 0.6034181617996647, + "grad_norm": 1.1528507471084595, + "learning_rate": 3.5875767558616746e-06, + "loss": 0.2866, + "step": 30143 + }, + { + "epoch": 0.6034381803167931, + "grad_norm": 1.058684229850769, + "learning_rate": 3.5872657792107977e-06, + "loss": 0.2954, + "step": 30144 + }, + { + "epoch": 0.6034581988339214, + "grad_norm": 0.9492470026016235, + "learning_rate": 3.5869548084987517e-06, + "loss": 0.2758, + "step": 30145 + }, + { + "epoch": 0.6034782173510497, + "grad_norm": 1.0470939874649048, + "learning_rate": 3.586643843726845e-06, + "loss": 0.2965, + "step": 30146 + }, + { + "epoch": 0.6034982358681781, + "grad_norm": 1.1802778244018555, + "learning_rate": 3.586332884896382e-06, + "loss": 0.2823, + "step": 30147 + }, + { + "epoch": 0.6035182543853064, + "grad_norm": 1.1382789611816406, + "learning_rate": 3.586021932008672e-06, + "loss": 0.3029, + "step": 30148 + }, + { + "epoch": 0.6035382729024348, + "grad_norm": 1.1169325113296509, + "learning_rate": 3.5857109850650197e-06, + "loss": 0.2865, + "step": 30149 + }, + { + "epoch": 0.6035582914195631, + "grad_norm": 1.010677456855774, + "learning_rate": 3.5854000440667345e-06, + "loss": 0.2913, + "step": 30150 + }, + { + "epoch": 0.6035783099366915, + "grad_norm": 1.04796302318573, + "learning_rate": 3.585089109015125e-06, + "loss": 0.2731, + "step": 30151 + }, + { + "epoch": 0.6035983284538198, + "grad_norm": 1.262616753578186, + "learning_rate": 3.5847781799114957e-06, + "loss": 0.2997, + "step": 30152 + }, + { + "epoch": 0.6036183469709481, + "grad_norm": 1.0772849321365356, + "learning_rate": 3.5844672567571525e-06, + "loss": 0.2471, + "step": 30153 + }, + { + "epoch": 0.6036383654880765, + "grad_norm": 1.2090559005737305, + "learning_rate": 3.584156339553406e-06, + "loss": 0.3649, + "step": 30154 + }, + { + "epoch": 0.6036583840052048, + "grad_norm": 1.8549890518188477, + "learning_rate": 3.5838454283015623e-06, + "loss": 0.8147, + "step": 30155 + }, + { + "epoch": 0.6036784025223332, + "grad_norm": 1.080531358718872, + "learning_rate": 3.583534523002926e-06, + "loss": 0.3032, + "step": 30156 + }, + { + "epoch": 0.6036984210394615, + "grad_norm": 1.3038239479064941, + "learning_rate": 3.5832236236588062e-06, + "loss": 0.3362, + "step": 30157 + }, + { + "epoch": 0.6037184395565899, + "grad_norm": 1.0316095352172852, + "learning_rate": 3.582912730270509e-06, + "loss": 0.3296, + "step": 30158 + }, + { + "epoch": 0.6037384580737182, + "grad_norm": 1.1376906633377075, + "learning_rate": 3.582601842839344e-06, + "loss": 0.2577, + "step": 30159 + }, + { + "epoch": 0.6037584765908466, + "grad_norm": 1.0570340156555176, + "learning_rate": 3.5822909613666146e-06, + "loss": 0.2962, + "step": 30160 + }, + { + "epoch": 0.6037784951079749, + "grad_norm": 1.136732816696167, + "learning_rate": 3.581980085853629e-06, + "loss": 0.2819, + "step": 30161 + }, + { + "epoch": 0.6037985136251032, + "grad_norm": 1.8604180812835693, + "learning_rate": 3.581669216301692e-06, + "loss": 0.7799, + "step": 30162 + }, + { + "epoch": 0.6038185321422316, + "grad_norm": 1.1505131721496582, + "learning_rate": 3.5813583527121144e-06, + "loss": 0.291, + "step": 30163 + }, + { + "epoch": 0.6038385506593599, + "grad_norm": 1.1168233156204224, + "learning_rate": 3.5810474950861997e-06, + "loss": 0.3082, + "step": 30164 + }, + { + "epoch": 0.6038585691764883, + "grad_norm": 1.010384440422058, + "learning_rate": 3.580736643425258e-06, + "loss": 0.31, + "step": 30165 + }, + { + "epoch": 0.6038785876936166, + "grad_norm": 1.21854829788208, + "learning_rate": 3.5804257977305913e-06, + "loss": 0.3368, + "step": 30166 + }, + { + "epoch": 0.603898606210745, + "grad_norm": 1.2632163763046265, + "learning_rate": 3.5801149580035106e-06, + "loss": 0.3574, + "step": 30167 + }, + { + "epoch": 0.6039186247278733, + "grad_norm": 2.0027592182159424, + "learning_rate": 3.579804124245322e-06, + "loss": 0.7678, + "step": 30168 + }, + { + "epoch": 0.6039386432450016, + "grad_norm": 0.989020049571991, + "learning_rate": 3.579493296457329e-06, + "loss": 0.2527, + "step": 30169 + }, + { + "epoch": 0.60395866176213, + "grad_norm": 1.1734706163406372, + "learning_rate": 3.579182474640843e-06, + "loss": 0.2919, + "step": 30170 + }, + { + "epoch": 0.6039786802792583, + "grad_norm": 1.1255760192871094, + "learning_rate": 3.578871658797165e-06, + "loss": 0.3062, + "step": 30171 + }, + { + "epoch": 0.6039986987963867, + "grad_norm": 2.08290958404541, + "learning_rate": 3.5785608489276067e-06, + "loss": 0.754, + "step": 30172 + }, + { + "epoch": 0.604018717313515, + "grad_norm": 1.9429644346237183, + "learning_rate": 3.5782500450334735e-06, + "loss": 0.7955, + "step": 30173 + }, + { + "epoch": 0.6040387358306434, + "grad_norm": 1.066218376159668, + "learning_rate": 3.5779392471160705e-06, + "loss": 0.2948, + "step": 30174 + }, + { + "epoch": 0.6040587543477717, + "grad_norm": 1.0653181076049805, + "learning_rate": 3.577628455176703e-06, + "loss": 0.2776, + "step": 30175 + }, + { + "epoch": 0.6040787728649001, + "grad_norm": 1.120512843132019, + "learning_rate": 3.5773176692166817e-06, + "loss": 0.3323, + "step": 30176 + }, + { + "epoch": 0.6040987913820284, + "grad_norm": 1.9999399185180664, + "learning_rate": 3.5770068892373095e-06, + "loss": 0.8582, + "step": 30177 + }, + { + "epoch": 0.6041188098991567, + "grad_norm": 1.1741178035736084, + "learning_rate": 3.576696115239895e-06, + "loss": 0.2889, + "step": 30178 + }, + { + "epoch": 0.6041388284162851, + "grad_norm": 1.134487509727478, + "learning_rate": 3.5763853472257416e-06, + "loss": 0.2956, + "step": 30179 + }, + { + "epoch": 0.6041588469334134, + "grad_norm": 2.0747475624084473, + "learning_rate": 3.5760745851961605e-06, + "loss": 0.7828, + "step": 30180 + }, + { + "epoch": 0.6041788654505418, + "grad_norm": 1.9290307760238647, + "learning_rate": 3.575763829152455e-06, + "loss": 0.8082, + "step": 30181 + }, + { + "epoch": 0.6041988839676701, + "grad_norm": 1.1234033107757568, + "learning_rate": 3.5754530790959308e-06, + "loss": 0.2973, + "step": 30182 + }, + { + "epoch": 0.6042189024847985, + "grad_norm": 1.2173672914505005, + "learning_rate": 3.5751423350278964e-06, + "loss": 0.3044, + "step": 30183 + }, + { + "epoch": 0.6042389210019268, + "grad_norm": 1.0703409910202026, + "learning_rate": 3.574831596949655e-06, + "loss": 0.3227, + "step": 30184 + }, + { + "epoch": 0.6042589395190551, + "grad_norm": 1.1119444370269775, + "learning_rate": 3.5745208648625164e-06, + "loss": 0.2651, + "step": 30185 + }, + { + "epoch": 0.6042789580361835, + "grad_norm": 1.062927484512329, + "learning_rate": 3.5742101387677864e-06, + "loss": 0.2929, + "step": 30186 + }, + { + "epoch": 0.6042989765533118, + "grad_norm": 1.347969889640808, + "learning_rate": 3.573899418666769e-06, + "loss": 0.2907, + "step": 30187 + }, + { + "epoch": 0.6043189950704402, + "grad_norm": 1.0280249118804932, + "learning_rate": 3.5735887045607698e-06, + "loss": 0.2712, + "step": 30188 + }, + { + "epoch": 0.6043390135875685, + "grad_norm": 1.1590334177017212, + "learning_rate": 3.573277996451099e-06, + "loss": 0.2982, + "step": 30189 + }, + { + "epoch": 0.6043590321046969, + "grad_norm": 1.1314507722854614, + "learning_rate": 3.5729672943390593e-06, + "loss": 0.3144, + "step": 30190 + }, + { + "epoch": 0.6043790506218252, + "grad_norm": 1.1098464727401733, + "learning_rate": 3.5726565982259596e-06, + "loss": 0.2569, + "step": 30191 + }, + { + "epoch": 0.6043990691389536, + "grad_norm": 1.1130231618881226, + "learning_rate": 3.572345908113104e-06, + "loss": 0.3503, + "step": 30192 + }, + { + "epoch": 0.6044190876560819, + "grad_norm": 1.0779215097427368, + "learning_rate": 3.5720352240017963e-06, + "loss": 0.3069, + "step": 30193 + }, + { + "epoch": 0.6044391061732102, + "grad_norm": 1.0863778591156006, + "learning_rate": 3.571724545893348e-06, + "loss": 0.3212, + "step": 30194 + }, + { + "epoch": 0.6044591246903386, + "grad_norm": 1.074285626411438, + "learning_rate": 3.5714138737890612e-06, + "loss": 0.2808, + "step": 30195 + }, + { + "epoch": 0.6044791432074669, + "grad_norm": 1.0928579568862915, + "learning_rate": 3.571103207690244e-06, + "loss": 0.2812, + "step": 30196 + }, + { + "epoch": 0.6044991617245953, + "grad_norm": 1.0782378911972046, + "learning_rate": 3.570792547598199e-06, + "loss": 0.2885, + "step": 30197 + }, + { + "epoch": 0.6045191802417236, + "grad_norm": 1.252220869064331, + "learning_rate": 3.5704818935142373e-06, + "loss": 0.2843, + "step": 30198 + }, + { + "epoch": 0.604539198758852, + "grad_norm": 1.0684500932693481, + "learning_rate": 3.570171245439662e-06, + "loss": 0.2903, + "step": 30199 + }, + { + "epoch": 0.6045592172759803, + "grad_norm": 0.9560922384262085, + "learning_rate": 3.569860603375779e-06, + "loss": 0.2666, + "step": 30200 + }, + { + "epoch": 0.6045792357931086, + "grad_norm": 1.25242280960083, + "learning_rate": 3.5695499673238927e-06, + "loss": 0.3215, + "step": 30201 + }, + { + "epoch": 0.604599254310237, + "grad_norm": 1.0877656936645508, + "learning_rate": 3.5692393372853107e-06, + "loss": 0.2789, + "step": 30202 + }, + { + "epoch": 0.6046192728273653, + "grad_norm": 1.0282578468322754, + "learning_rate": 3.5689287132613386e-06, + "loss": 0.2979, + "step": 30203 + }, + { + "epoch": 0.6046392913444937, + "grad_norm": 1.0358827114105225, + "learning_rate": 3.5686180952532833e-06, + "loss": 0.2876, + "step": 30204 + }, + { + "epoch": 0.604659309861622, + "grad_norm": 1.1968733072280884, + "learning_rate": 3.5683074832624497e-06, + "loss": 0.3165, + "step": 30205 + }, + { + "epoch": 0.6046793283787504, + "grad_norm": 1.2210979461669922, + "learning_rate": 3.5679968772901414e-06, + "loss": 0.2927, + "step": 30206 + }, + { + "epoch": 0.6046993468958787, + "grad_norm": 1.2467118501663208, + "learning_rate": 3.5676862773376674e-06, + "loss": 0.3529, + "step": 30207 + }, + { + "epoch": 0.6047193654130071, + "grad_norm": 1.1778018474578857, + "learning_rate": 3.567375683406332e-06, + "loss": 0.2884, + "step": 30208 + }, + { + "epoch": 0.6047393839301354, + "grad_norm": 1.1743344068527222, + "learning_rate": 3.567065095497442e-06, + "loss": 0.3116, + "step": 30209 + }, + { + "epoch": 0.6047594024472637, + "grad_norm": 1.176638126373291, + "learning_rate": 3.5667545136122985e-06, + "loss": 0.3329, + "step": 30210 + }, + { + "epoch": 0.6047794209643921, + "grad_norm": 1.058502197265625, + "learning_rate": 3.566443937752214e-06, + "loss": 0.2864, + "step": 30211 + }, + { + "epoch": 0.6047994394815204, + "grad_norm": 1.1064276695251465, + "learning_rate": 3.56613336791849e-06, + "loss": 0.3035, + "step": 30212 + }, + { + "epoch": 0.6048194579986488, + "grad_norm": 1.1153931617736816, + "learning_rate": 3.5658228041124334e-06, + "loss": 0.3241, + "step": 30213 + }, + { + "epoch": 0.6048394765157771, + "grad_norm": 1.9096730947494507, + "learning_rate": 3.5655122463353477e-06, + "loss": 0.7841, + "step": 30214 + }, + { + "epoch": 0.6048594950329055, + "grad_norm": 1.1617733240127563, + "learning_rate": 3.5652016945885394e-06, + "loss": 0.2982, + "step": 30215 + }, + { + "epoch": 0.6048795135500338, + "grad_norm": 1.8456885814666748, + "learning_rate": 3.5648911488733147e-06, + "loss": 0.7407, + "step": 30216 + }, + { + "epoch": 0.6048995320671621, + "grad_norm": 1.1251329183578491, + "learning_rate": 3.5645806091909797e-06, + "loss": 0.3264, + "step": 30217 + }, + { + "epoch": 0.6049195505842905, + "grad_norm": 1.0099399089813232, + "learning_rate": 3.5642700755428387e-06, + "loss": 0.3512, + "step": 30218 + }, + { + "epoch": 0.6049395691014188, + "grad_norm": 1.2113442420959473, + "learning_rate": 3.563959547930196e-06, + "loss": 0.2942, + "step": 30219 + }, + { + "epoch": 0.6049595876185472, + "grad_norm": 1.0337309837341309, + "learning_rate": 3.56364902635436e-06, + "loss": 0.2651, + "step": 30220 + }, + { + "epoch": 0.6049796061356755, + "grad_norm": 1.210097312927246, + "learning_rate": 3.563338510816633e-06, + "loss": 0.2952, + "step": 30221 + }, + { + "epoch": 0.6049996246528039, + "grad_norm": 1.0422778129577637, + "learning_rate": 3.5630280013183233e-06, + "loss": 0.2772, + "step": 30222 + }, + { + "epoch": 0.6050196431699322, + "grad_norm": 0.9908910393714905, + "learning_rate": 3.562717497860735e-06, + "loss": 0.2635, + "step": 30223 + }, + { + "epoch": 0.6050396616870606, + "grad_norm": 1.0954161882400513, + "learning_rate": 3.56240700044517e-06, + "loss": 0.3049, + "step": 30224 + }, + { + "epoch": 0.6050596802041889, + "grad_norm": 1.1615630388259888, + "learning_rate": 3.5620965090729385e-06, + "loss": 0.2851, + "step": 30225 + }, + { + "epoch": 0.6050796987213172, + "grad_norm": 1.1873232126235962, + "learning_rate": 3.561786023745345e-06, + "loss": 0.2735, + "step": 30226 + }, + { + "epoch": 0.6050997172384456, + "grad_norm": 1.2616082429885864, + "learning_rate": 3.5614755444636916e-06, + "loss": 0.3046, + "step": 30227 + }, + { + "epoch": 0.6051197357555739, + "grad_norm": 1.115181803703308, + "learning_rate": 3.5611650712292862e-06, + "loss": 0.2915, + "step": 30228 + }, + { + "epoch": 0.6051397542727023, + "grad_norm": 1.0321589708328247, + "learning_rate": 3.5608546040434326e-06, + "loss": 0.2738, + "step": 30229 + }, + { + "epoch": 0.6051597727898306, + "grad_norm": 1.0488659143447876, + "learning_rate": 3.5605441429074377e-06, + "loss": 0.2845, + "step": 30230 + }, + { + "epoch": 0.605179791306959, + "grad_norm": 1.0822198390960693, + "learning_rate": 3.5602336878226055e-06, + "loss": 0.2783, + "step": 30231 + }, + { + "epoch": 0.6051998098240873, + "grad_norm": 1.2353118658065796, + "learning_rate": 3.5599232387902405e-06, + "loss": 0.3486, + "step": 30232 + }, + { + "epoch": 0.6052198283412156, + "grad_norm": 0.9753400683403015, + "learning_rate": 3.559612795811648e-06, + "loss": 0.2773, + "step": 30233 + }, + { + "epoch": 0.605239846858344, + "grad_norm": 1.0792553424835205, + "learning_rate": 3.5593023588881325e-06, + "loss": 0.3246, + "step": 30234 + }, + { + "epoch": 0.6052598653754723, + "grad_norm": 1.9688104391098022, + "learning_rate": 3.558991928021002e-06, + "loss": 0.8149, + "step": 30235 + }, + { + "epoch": 0.6052798838926007, + "grad_norm": 2.0165255069732666, + "learning_rate": 3.5586815032115586e-06, + "loss": 0.8082, + "step": 30236 + }, + { + "epoch": 0.605299902409729, + "grad_norm": 1.8169435262680054, + "learning_rate": 3.558371084461106e-06, + "loss": 0.7369, + "step": 30237 + }, + { + "epoch": 0.6053199209268574, + "grad_norm": 1.0281505584716797, + "learning_rate": 3.5580606717709533e-06, + "loss": 0.2946, + "step": 30238 + }, + { + "epoch": 0.6053399394439857, + "grad_norm": 1.0469489097595215, + "learning_rate": 3.5577502651424036e-06, + "loss": 0.3219, + "step": 30239 + }, + { + "epoch": 0.6053599579611141, + "grad_norm": 1.080153226852417, + "learning_rate": 3.5574398645767593e-06, + "loss": 0.3119, + "step": 30240 + }, + { + "epoch": 0.6053799764782424, + "grad_norm": 1.070967435836792, + "learning_rate": 3.5571294700753285e-06, + "loss": 0.291, + "step": 30241 + }, + { + "epoch": 0.6053999949953707, + "grad_norm": 1.1212615966796875, + "learning_rate": 3.5568190816394142e-06, + "loss": 0.2859, + "step": 30242 + }, + { + "epoch": 0.6054200135124991, + "grad_norm": 1.2357323169708252, + "learning_rate": 3.5565086992703234e-06, + "loss": 0.3121, + "step": 30243 + }, + { + "epoch": 0.6054400320296274, + "grad_norm": 1.0641344785690308, + "learning_rate": 3.5561983229693593e-06, + "loss": 0.332, + "step": 30244 + }, + { + "epoch": 0.6054600505467558, + "grad_norm": 1.0772596597671509, + "learning_rate": 3.5558879527378255e-06, + "loss": 0.3099, + "step": 30245 + }, + { + "epoch": 0.6054800690638841, + "grad_norm": 1.0321727991104126, + "learning_rate": 3.5555775885770284e-06, + "loss": 0.2483, + "step": 30246 + }, + { + "epoch": 0.6055000875810125, + "grad_norm": 1.1223413944244385, + "learning_rate": 3.5552672304882717e-06, + "loss": 0.3287, + "step": 30247 + }, + { + "epoch": 0.6055201060981408, + "grad_norm": 1.2078922986984253, + "learning_rate": 3.5549568784728624e-06, + "loss": 0.3011, + "step": 30248 + }, + { + "epoch": 0.6055401246152691, + "grad_norm": 1.3601595163345337, + "learning_rate": 3.5546465325321023e-06, + "loss": 0.2912, + "step": 30249 + }, + { + "epoch": 0.6055601431323975, + "grad_norm": 1.1745933294296265, + "learning_rate": 3.554336192667296e-06, + "loss": 0.3183, + "step": 30250 + }, + { + "epoch": 0.6055801616495258, + "grad_norm": 1.0921967029571533, + "learning_rate": 3.554025858879751e-06, + "loss": 0.3011, + "step": 30251 + }, + { + "epoch": 0.6056001801666542, + "grad_norm": 1.2642468214035034, + "learning_rate": 3.5537155311707704e-06, + "loss": 0.3258, + "step": 30252 + }, + { + "epoch": 0.6056201986837825, + "grad_norm": 1.1536266803741455, + "learning_rate": 3.553405209541657e-06, + "loss": 0.3332, + "step": 30253 + }, + { + "epoch": 0.6056402172009109, + "grad_norm": 1.0426738262176514, + "learning_rate": 3.5530948939937182e-06, + "loss": 0.3164, + "step": 30254 + }, + { + "epoch": 0.6056602357180392, + "grad_norm": 1.1172698736190796, + "learning_rate": 3.5527845845282547e-06, + "loss": 0.3116, + "step": 30255 + }, + { + "epoch": 0.6056802542351676, + "grad_norm": 1.0565969944000244, + "learning_rate": 3.5524742811465753e-06, + "loss": 0.3261, + "step": 30256 + }, + { + "epoch": 0.6057002727522959, + "grad_norm": 1.2172563076019287, + "learning_rate": 3.552163983849983e-06, + "loss": 0.3114, + "step": 30257 + }, + { + "epoch": 0.6057202912694242, + "grad_norm": 1.9258143901824951, + "learning_rate": 3.5518536926397805e-06, + "loss": 0.8606, + "step": 30258 + }, + { + "epoch": 0.6057403097865526, + "grad_norm": 1.217826247215271, + "learning_rate": 3.5515434075172735e-06, + "loss": 0.2932, + "step": 30259 + }, + { + "epoch": 0.6057603283036809, + "grad_norm": 1.0454059839248657, + "learning_rate": 3.551233128483766e-06, + "loss": 0.276, + "step": 30260 + }, + { + "epoch": 0.6057803468208093, + "grad_norm": 1.152178406715393, + "learning_rate": 3.550922855540564e-06, + "loss": 0.3166, + "step": 30261 + }, + { + "epoch": 0.6058003653379376, + "grad_norm": 1.1358962059020996, + "learning_rate": 3.55061258868897e-06, + "loss": 0.3272, + "step": 30262 + }, + { + "epoch": 0.605820383855066, + "grad_norm": 1.1464793682098389, + "learning_rate": 3.5503023279302897e-06, + "loss": 0.2903, + "step": 30263 + }, + { + "epoch": 0.6058404023721943, + "grad_norm": 1.056916356086731, + "learning_rate": 3.5499920732658233e-06, + "loss": 0.341, + "step": 30264 + }, + { + "epoch": 0.6058604208893226, + "grad_norm": 1.0787620544433594, + "learning_rate": 3.5496818246968813e-06, + "loss": 0.308, + "step": 30265 + }, + { + "epoch": 0.605880439406451, + "grad_norm": 1.1417158842086792, + "learning_rate": 3.549371582224763e-06, + "loss": 0.3192, + "step": 30266 + }, + { + "epoch": 0.6059004579235793, + "grad_norm": 2.1443002223968506, + "learning_rate": 3.5490613458507755e-06, + "loss": 0.8311, + "step": 30267 + }, + { + "epoch": 0.6059204764407077, + "grad_norm": 1.183985710144043, + "learning_rate": 3.54875111557622e-06, + "loss": 0.3447, + "step": 30268 + }, + { + "epoch": 0.605940494957836, + "grad_norm": 1.0874109268188477, + "learning_rate": 3.548440891402404e-06, + "loss": 0.3006, + "step": 30269 + }, + { + "epoch": 0.6059605134749644, + "grad_norm": 1.1141541004180908, + "learning_rate": 3.548130673330631e-06, + "loss": 0.3259, + "step": 30270 + }, + { + "epoch": 0.6059805319920927, + "grad_norm": 1.0556267499923706, + "learning_rate": 3.5478204613622025e-06, + "loss": 0.2732, + "step": 30271 + }, + { + "epoch": 0.6060005505092211, + "grad_norm": 1.0809557437896729, + "learning_rate": 3.5475102554984246e-06, + "loss": 0.3112, + "step": 30272 + }, + { + "epoch": 0.6060205690263494, + "grad_norm": 1.1985315084457397, + "learning_rate": 3.547200055740601e-06, + "loss": 0.3178, + "step": 30273 + }, + { + "epoch": 0.6060405875434777, + "grad_norm": 1.0871078968048096, + "learning_rate": 3.546889862090037e-06, + "loss": 0.2697, + "step": 30274 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 1.1179730892181396, + "learning_rate": 3.5465796745480348e-06, + "loss": 0.3324, + "step": 30275 + }, + { + "epoch": 0.6060806245777344, + "grad_norm": 1.060332179069519, + "learning_rate": 3.546269493115899e-06, + "loss": 0.2697, + "step": 30276 + }, + { + "epoch": 0.6061006430948628, + "grad_norm": 1.1631630659103394, + "learning_rate": 3.5459593177949312e-06, + "loss": 0.3083, + "step": 30277 + }, + { + "epoch": 0.6061206616119911, + "grad_norm": 1.0893850326538086, + "learning_rate": 3.54564914858644e-06, + "loss": 0.2932, + "step": 30278 + }, + { + "epoch": 0.6061406801291195, + "grad_norm": 1.1935914754867554, + "learning_rate": 3.545338985491725e-06, + "loss": 0.3049, + "step": 30279 + }, + { + "epoch": 0.6061606986462478, + "grad_norm": 1.2748234272003174, + "learning_rate": 3.5450288285120936e-06, + "loss": 0.2857, + "step": 30280 + }, + { + "epoch": 0.6061807171633761, + "grad_norm": 1.0373048782348633, + "learning_rate": 3.5447186776488463e-06, + "loss": 0.2951, + "step": 30281 + }, + { + "epoch": 0.6062007356805045, + "grad_norm": 0.9716477990150452, + "learning_rate": 3.5444085329032894e-06, + "loss": 0.3179, + "step": 30282 + }, + { + "epoch": 0.6062207541976328, + "grad_norm": 1.0786949396133423, + "learning_rate": 3.5440983942767265e-06, + "loss": 0.2717, + "step": 30283 + }, + { + "epoch": 0.6062407727147612, + "grad_norm": 1.185596227645874, + "learning_rate": 3.5437882617704594e-06, + "loss": 0.3086, + "step": 30284 + }, + { + "epoch": 0.6062607912318895, + "grad_norm": 1.8489854335784912, + "learning_rate": 3.543478135385794e-06, + "loss": 0.8208, + "step": 30285 + }, + { + "epoch": 0.6062808097490179, + "grad_norm": 1.109676718711853, + "learning_rate": 3.543168015124031e-06, + "loss": 0.3267, + "step": 30286 + }, + { + "epoch": 0.6063008282661462, + "grad_norm": 1.8042842149734497, + "learning_rate": 3.5428579009864785e-06, + "loss": 0.8275, + "step": 30287 + }, + { + "epoch": 0.6063208467832746, + "grad_norm": 1.2612669467926025, + "learning_rate": 3.542547792974438e-06, + "loss": 0.2864, + "step": 30288 + }, + { + "epoch": 0.6063408653004029, + "grad_norm": 1.0410828590393066, + "learning_rate": 3.542237691089212e-06, + "loss": 0.2662, + "step": 30289 + }, + { + "epoch": 0.6063608838175312, + "grad_norm": 1.0543562173843384, + "learning_rate": 3.541927595332104e-06, + "loss": 0.2992, + "step": 30290 + }, + { + "epoch": 0.6063809023346596, + "grad_norm": 1.2224390506744385, + "learning_rate": 3.54161750570442e-06, + "loss": 0.3234, + "step": 30291 + }, + { + "epoch": 0.6064009208517879, + "grad_norm": 1.2830339670181274, + "learning_rate": 3.5413074222074616e-06, + "loss": 0.2965, + "step": 30292 + }, + { + "epoch": 0.6064209393689163, + "grad_norm": 1.1269376277923584, + "learning_rate": 3.5409973448425337e-06, + "loss": 0.2995, + "step": 30293 + }, + { + "epoch": 0.6064409578860446, + "grad_norm": 1.0923855304718018, + "learning_rate": 3.5406872736109387e-06, + "loss": 0.2747, + "step": 30294 + }, + { + "epoch": 0.606460976403173, + "grad_norm": 1.1084554195404053, + "learning_rate": 3.5403772085139787e-06, + "loss": 0.2997, + "step": 30295 + }, + { + "epoch": 0.6064809949203013, + "grad_norm": 1.751846432685852, + "learning_rate": 3.5400671495529604e-06, + "loss": 0.8222, + "step": 30296 + }, + { + "epoch": 0.6065010134374296, + "grad_norm": 1.1620299816131592, + "learning_rate": 3.539757096729185e-06, + "loss": 0.3236, + "step": 30297 + }, + { + "epoch": 0.606521031954558, + "grad_norm": 1.2512156963348389, + "learning_rate": 3.539447050043957e-06, + "loss": 0.3047, + "step": 30298 + }, + { + "epoch": 0.6065410504716863, + "grad_norm": 2.0197272300720215, + "learning_rate": 3.5391370094985776e-06, + "loss": 0.7895, + "step": 30299 + }, + { + "epoch": 0.6065610689888147, + "grad_norm": 1.8683587312698364, + "learning_rate": 3.5388269750943544e-06, + "loss": 0.6975, + "step": 30300 + }, + { + "epoch": 0.606581087505943, + "grad_norm": 1.9400688409805298, + "learning_rate": 3.5385169468325876e-06, + "loss": 0.9002, + "step": 30301 + }, + { + "epoch": 0.6066011060230714, + "grad_norm": 1.180938959121704, + "learning_rate": 3.538206924714581e-06, + "loss": 0.3259, + "step": 30302 + }, + { + "epoch": 0.6066211245401997, + "grad_norm": 1.0630197525024414, + "learning_rate": 3.537896908741636e-06, + "loss": 0.3217, + "step": 30303 + }, + { + "epoch": 0.6066411430573281, + "grad_norm": 1.2351053953170776, + "learning_rate": 3.5375868989150598e-06, + "loss": 0.3205, + "step": 30304 + }, + { + "epoch": 0.6066611615744564, + "grad_norm": 1.0559165477752686, + "learning_rate": 3.5372768952361525e-06, + "loss": 0.3326, + "step": 30305 + }, + { + "epoch": 0.6066811800915847, + "grad_norm": 2.010587215423584, + "learning_rate": 3.53696689770622e-06, + "loss": 0.7457, + "step": 30306 + }, + { + "epoch": 0.6067011986087131, + "grad_norm": 1.0783336162567139, + "learning_rate": 3.536656906326563e-06, + "loss": 0.3421, + "step": 30307 + }, + { + "epoch": 0.6067212171258414, + "grad_norm": 1.1042463779449463, + "learning_rate": 3.5363469210984834e-06, + "loss": 0.3187, + "step": 30308 + }, + { + "epoch": 0.6067412356429698, + "grad_norm": 1.0869951248168945, + "learning_rate": 3.536036942023289e-06, + "loss": 0.3066, + "step": 30309 + }, + { + "epoch": 0.6067612541600981, + "grad_norm": 1.1889158487319946, + "learning_rate": 3.535726969102279e-06, + "loss": 0.3011, + "step": 30310 + }, + { + "epoch": 0.6067812726772265, + "grad_norm": 1.1480778455734253, + "learning_rate": 3.5354170023367584e-06, + "loss": 0.2637, + "step": 30311 + }, + { + "epoch": 0.6068012911943548, + "grad_norm": 1.063711166381836, + "learning_rate": 3.5351070417280276e-06, + "loss": 0.3097, + "step": 30312 + }, + { + "epoch": 0.6068213097114831, + "grad_norm": 1.0740569829940796, + "learning_rate": 3.5347970872773933e-06, + "loss": 0.3686, + "step": 30313 + }, + { + "epoch": 0.6068413282286115, + "grad_norm": 1.2503734827041626, + "learning_rate": 3.534487138986157e-06, + "loss": 0.3359, + "step": 30314 + }, + { + "epoch": 0.6068613467457398, + "grad_norm": 1.0823837518692017, + "learning_rate": 3.5341771968556217e-06, + "loss": 0.2862, + "step": 30315 + }, + { + "epoch": 0.6068813652628682, + "grad_norm": 1.0896598100662231, + "learning_rate": 3.5338672608870887e-06, + "loss": 0.2791, + "step": 30316 + }, + { + "epoch": 0.6069013837799965, + "grad_norm": 1.0755282640457153, + "learning_rate": 3.5335573310818626e-06, + "loss": 0.2758, + "step": 30317 + }, + { + "epoch": 0.6069214022971249, + "grad_norm": 1.8141013383865356, + "learning_rate": 3.5332474074412453e-06, + "loss": 0.7247, + "step": 30318 + }, + { + "epoch": 0.6069414208142532, + "grad_norm": 1.1198691129684448, + "learning_rate": 3.5329374899665413e-06, + "loss": 0.301, + "step": 30319 + }, + { + "epoch": 0.6069614393313816, + "grad_norm": 0.94795823097229, + "learning_rate": 3.532627578659053e-06, + "loss": 0.2833, + "step": 30320 + }, + { + "epoch": 0.6069814578485099, + "grad_norm": 1.0718772411346436, + "learning_rate": 3.5323176735200804e-06, + "loss": 0.2659, + "step": 30321 + }, + { + "epoch": 0.6070014763656382, + "grad_norm": 1.1339125633239746, + "learning_rate": 3.53200777455093e-06, + "loss": 0.3242, + "step": 30322 + }, + { + "epoch": 0.6070214948827666, + "grad_norm": 1.0722442865371704, + "learning_rate": 3.5316978817529024e-06, + "loss": 0.2998, + "step": 30323 + }, + { + "epoch": 0.6070415133998949, + "grad_norm": 1.055266261100769, + "learning_rate": 3.531387995127302e-06, + "loss": 0.3636, + "step": 30324 + }, + { + "epoch": 0.6070615319170233, + "grad_norm": 0.9354573488235474, + "learning_rate": 3.531078114675428e-06, + "loss": 0.2531, + "step": 30325 + }, + { + "epoch": 0.6070815504341516, + "grad_norm": 1.184859275817871, + "learning_rate": 3.5307682403985876e-06, + "loss": 0.2768, + "step": 30326 + }, + { + "epoch": 0.60710156895128, + "grad_norm": 1.0891563892364502, + "learning_rate": 3.530458372298081e-06, + "loss": 0.2811, + "step": 30327 + }, + { + "epoch": 0.6071215874684083, + "grad_norm": 1.1064847707748413, + "learning_rate": 3.530148510375212e-06, + "loss": 0.3011, + "step": 30328 + }, + { + "epoch": 0.6071416059855366, + "grad_norm": 1.1558393239974976, + "learning_rate": 3.5298386546312813e-06, + "loss": 0.3332, + "step": 30329 + }, + { + "epoch": 0.607161624502665, + "grad_norm": 1.0063502788543701, + "learning_rate": 3.529528805067592e-06, + "loss": 0.2948, + "step": 30330 + }, + { + "epoch": 0.6071816430197933, + "grad_norm": 1.1648919582366943, + "learning_rate": 3.529218961685447e-06, + "loss": 0.3434, + "step": 30331 + }, + { + "epoch": 0.6072016615369217, + "grad_norm": 1.2346649169921875, + "learning_rate": 3.5289091244861506e-06, + "loss": 0.3215, + "step": 30332 + }, + { + "epoch": 0.60722168005405, + "grad_norm": 1.045113205909729, + "learning_rate": 3.5285992934710035e-06, + "loss": 0.2806, + "step": 30333 + }, + { + "epoch": 0.6072416985711784, + "grad_norm": 1.1456737518310547, + "learning_rate": 3.528289468641306e-06, + "loss": 0.3511, + "step": 30334 + }, + { + "epoch": 0.6072617170883067, + "grad_norm": 1.199160099029541, + "learning_rate": 3.527979649998365e-06, + "loss": 0.3226, + "step": 30335 + }, + { + "epoch": 0.6072817356054351, + "grad_norm": 1.1389439105987549, + "learning_rate": 3.527669837543479e-06, + "loss": 0.2922, + "step": 30336 + }, + { + "epoch": 0.6073017541225634, + "grad_norm": 1.1653927564620972, + "learning_rate": 3.527360031277954e-06, + "loss": 0.3307, + "step": 30337 + }, + { + "epoch": 0.6073217726396917, + "grad_norm": 1.1286702156066895, + "learning_rate": 3.5270502312030906e-06, + "loss": 0.2885, + "step": 30338 + }, + { + "epoch": 0.6073417911568201, + "grad_norm": 1.1701436042785645, + "learning_rate": 3.526740437320189e-06, + "loss": 0.2881, + "step": 30339 + }, + { + "epoch": 0.6073618096739484, + "grad_norm": 1.101710319519043, + "learning_rate": 3.5264306496305554e-06, + "loss": 0.3101, + "step": 30340 + }, + { + "epoch": 0.6073818281910768, + "grad_norm": 1.1834009885787964, + "learning_rate": 3.526120868135491e-06, + "loss": 0.3217, + "step": 30341 + }, + { + "epoch": 0.6074018467082051, + "grad_norm": 1.256553053855896, + "learning_rate": 3.5258110928362953e-06, + "loss": 0.4097, + "step": 30342 + }, + { + "epoch": 0.6074218652253335, + "grad_norm": 1.051858901977539, + "learning_rate": 3.5255013237342727e-06, + "loss": 0.333, + "step": 30343 + }, + { + "epoch": 0.6074418837424618, + "grad_norm": 1.9845184087753296, + "learning_rate": 3.525191560830725e-06, + "loss": 0.7837, + "step": 30344 + }, + { + "epoch": 0.6074619022595901, + "grad_norm": 1.1073873043060303, + "learning_rate": 3.5248818041269562e-06, + "loss": 0.2873, + "step": 30345 + }, + { + "epoch": 0.6074819207767185, + "grad_norm": 1.2305731773376465, + "learning_rate": 3.5245720536242665e-06, + "loss": 0.3261, + "step": 30346 + }, + { + "epoch": 0.6075019392938468, + "grad_norm": 1.1052418947219849, + "learning_rate": 3.524262309323958e-06, + "loss": 0.282, + "step": 30347 + }, + { + "epoch": 0.6075219578109752, + "grad_norm": 1.0856276750564575, + "learning_rate": 3.523952571227332e-06, + "loss": 0.2766, + "step": 30348 + }, + { + "epoch": 0.6075419763281035, + "grad_norm": 1.7482671737670898, + "learning_rate": 3.5236428393356924e-06, + "loss": 0.7168, + "step": 30349 + }, + { + "epoch": 0.6075619948452319, + "grad_norm": 1.1178126335144043, + "learning_rate": 3.5233331136503413e-06, + "loss": 0.3059, + "step": 30350 + }, + { + "epoch": 0.6075820133623602, + "grad_norm": 1.1359996795654297, + "learning_rate": 3.52302339417258e-06, + "loss": 0.3309, + "step": 30351 + }, + { + "epoch": 0.6076020318794886, + "grad_norm": 1.1585716009140015, + "learning_rate": 3.5227136809037086e-06, + "loss": 0.271, + "step": 30352 + }, + { + "epoch": 0.6076220503966169, + "grad_norm": 1.1441030502319336, + "learning_rate": 3.5224039738450334e-06, + "loss": 0.2774, + "step": 30353 + }, + { + "epoch": 0.6076420689137452, + "grad_norm": 1.123885154724121, + "learning_rate": 3.5220942729978536e-06, + "loss": 0.3131, + "step": 30354 + }, + { + "epoch": 0.6076620874308736, + "grad_norm": 1.0721240043640137, + "learning_rate": 3.521784578363471e-06, + "loss": 0.2936, + "step": 30355 + }, + { + "epoch": 0.6076821059480019, + "grad_norm": 1.2682487964630127, + "learning_rate": 3.5214748899431872e-06, + "loss": 0.3021, + "step": 30356 + }, + { + "epoch": 0.6077021244651303, + "grad_norm": 1.0949995517730713, + "learning_rate": 3.5211652077383048e-06, + "loss": 0.3228, + "step": 30357 + }, + { + "epoch": 0.6077221429822586, + "grad_norm": 1.2430514097213745, + "learning_rate": 3.520855531750127e-06, + "loss": 0.2706, + "step": 30358 + }, + { + "epoch": 0.607742161499387, + "grad_norm": 1.2881618738174438, + "learning_rate": 3.520545861979954e-06, + "loss": 0.2914, + "step": 30359 + }, + { + "epoch": 0.6077621800165153, + "grad_norm": 1.2647085189819336, + "learning_rate": 3.520236198429088e-06, + "loss": 0.3127, + "step": 30360 + }, + { + "epoch": 0.6077821985336436, + "grad_norm": 1.1517210006713867, + "learning_rate": 3.5199265410988276e-06, + "loss": 0.3144, + "step": 30361 + }, + { + "epoch": 0.607802217050772, + "grad_norm": 1.0972477197647095, + "learning_rate": 3.519616889990479e-06, + "loss": 0.346, + "step": 30362 + }, + { + "epoch": 0.6078222355679003, + "grad_norm": 1.0657294988632202, + "learning_rate": 3.5193072451053445e-06, + "loss": 0.3232, + "step": 30363 + }, + { + "epoch": 0.6078422540850287, + "grad_norm": 1.077736735343933, + "learning_rate": 3.518997606444723e-06, + "loss": 0.3073, + "step": 30364 + }, + { + "epoch": 0.607862272602157, + "grad_norm": 1.2141870260238647, + "learning_rate": 3.5186879740099146e-06, + "loss": 0.3271, + "step": 30365 + }, + { + "epoch": 0.6078822911192854, + "grad_norm": 1.1636430025100708, + "learning_rate": 3.518378347802225e-06, + "loss": 0.2711, + "step": 30366 + }, + { + "epoch": 0.6079023096364137, + "grad_norm": 1.0906552076339722, + "learning_rate": 3.5180687278229543e-06, + "loss": 0.2521, + "step": 30367 + }, + { + "epoch": 0.6079223281535421, + "grad_norm": 1.9191185235977173, + "learning_rate": 3.5177591140734023e-06, + "loss": 0.824, + "step": 30368 + }, + { + "epoch": 0.6079423466706704, + "grad_norm": 1.1055084466934204, + "learning_rate": 3.517449506554874e-06, + "loss": 0.3147, + "step": 30369 + }, + { + "epoch": 0.6079623651877987, + "grad_norm": 1.0840390920639038, + "learning_rate": 3.517139905268666e-06, + "loss": 0.3064, + "step": 30370 + }, + { + "epoch": 0.6079823837049271, + "grad_norm": 1.2293747663497925, + "learning_rate": 3.516830310216085e-06, + "loss": 0.3471, + "step": 30371 + }, + { + "epoch": 0.6080024022220554, + "grad_norm": 1.09813392162323, + "learning_rate": 3.5165207213984305e-06, + "loss": 0.2456, + "step": 30372 + }, + { + "epoch": 0.6080224207391838, + "grad_norm": 0.9975342154502869, + "learning_rate": 3.5162111388170034e-06, + "loss": 0.2982, + "step": 30373 + }, + { + "epoch": 0.6080424392563121, + "grad_norm": 1.0336217880249023, + "learning_rate": 3.515901562473103e-06, + "loss": 0.2996, + "step": 30374 + }, + { + "epoch": 0.6080624577734405, + "grad_norm": 1.1217536926269531, + "learning_rate": 3.515591992368035e-06, + "loss": 0.3144, + "step": 30375 + }, + { + "epoch": 0.6080824762905688, + "grad_norm": 1.1773414611816406, + "learning_rate": 3.515282428503099e-06, + "loss": 0.3466, + "step": 30376 + }, + { + "epoch": 0.6081024948076971, + "grad_norm": 1.019436240196228, + "learning_rate": 3.514972870879596e-06, + "loss": 0.307, + "step": 30377 + }, + { + "epoch": 0.6081225133248255, + "grad_norm": 1.335149884223938, + "learning_rate": 3.5146633194988283e-06, + "loss": 0.3044, + "step": 30378 + }, + { + "epoch": 0.6081425318419538, + "grad_norm": 1.1736253499984741, + "learning_rate": 3.514353774362094e-06, + "loss": 0.3099, + "step": 30379 + }, + { + "epoch": 0.6081625503590822, + "grad_norm": 1.3352124691009521, + "learning_rate": 3.514044235470698e-06, + "loss": 0.2655, + "step": 30380 + }, + { + "epoch": 0.6081825688762105, + "grad_norm": 1.86845862865448, + "learning_rate": 3.5137347028259404e-06, + "loss": 0.7437, + "step": 30381 + }, + { + "epoch": 0.6082025873933389, + "grad_norm": 1.1370751857757568, + "learning_rate": 3.513425176429123e-06, + "loss": 0.3146, + "step": 30382 + }, + { + "epoch": 0.6082226059104672, + "grad_norm": 1.9490941762924194, + "learning_rate": 3.513115656281544e-06, + "loss": 0.7831, + "step": 30383 + }, + { + "epoch": 0.6082426244275956, + "grad_norm": 1.0925097465515137, + "learning_rate": 3.512806142384508e-06, + "loss": 0.3057, + "step": 30384 + }, + { + "epoch": 0.6082626429447239, + "grad_norm": 1.1821668148040771, + "learning_rate": 3.5124966347393163e-06, + "loss": 0.2514, + "step": 30385 + }, + { + "epoch": 0.6082826614618522, + "grad_norm": 1.0956305265426636, + "learning_rate": 3.512187133347268e-06, + "loss": 0.3412, + "step": 30386 + }, + { + "epoch": 0.6083026799789806, + "grad_norm": 1.144821047782898, + "learning_rate": 3.5118776382096633e-06, + "loss": 0.3518, + "step": 30387 + }, + { + "epoch": 0.6083226984961089, + "grad_norm": 1.212548851966858, + "learning_rate": 3.5115681493278047e-06, + "loss": 0.2758, + "step": 30388 + }, + { + "epoch": 0.6083427170132373, + "grad_norm": 1.1148200035095215, + "learning_rate": 3.5112586667029954e-06, + "loss": 0.3457, + "step": 30389 + }, + { + "epoch": 0.6083627355303656, + "grad_norm": 1.1876243352890015, + "learning_rate": 3.5109491903365334e-06, + "loss": 0.3379, + "step": 30390 + }, + { + "epoch": 0.608382754047494, + "grad_norm": 2.0001888275146484, + "learning_rate": 3.510639720229721e-06, + "loss": 0.7717, + "step": 30391 + }, + { + "epoch": 0.6084027725646223, + "grad_norm": 1.110270380973816, + "learning_rate": 3.510330256383857e-06, + "loss": 0.3025, + "step": 30392 + }, + { + "epoch": 0.6084227910817506, + "grad_norm": 1.1291639804840088, + "learning_rate": 3.5100207988002455e-06, + "loss": 0.3265, + "step": 30393 + }, + { + "epoch": 0.608442809598879, + "grad_norm": 1.357142448425293, + "learning_rate": 3.509711347480185e-06, + "loss": 0.2602, + "step": 30394 + }, + { + "epoch": 0.6084628281160073, + "grad_norm": 1.095837116241455, + "learning_rate": 3.509401902424978e-06, + "loss": 0.2628, + "step": 30395 + }, + { + "epoch": 0.6084828466331357, + "grad_norm": 1.3220608234405518, + "learning_rate": 3.5090924636359234e-06, + "loss": 0.3611, + "step": 30396 + }, + { + "epoch": 0.608502865150264, + "grad_norm": 1.066648244857788, + "learning_rate": 3.5087830311143246e-06, + "loss": 0.2987, + "step": 30397 + }, + { + "epoch": 0.6085228836673924, + "grad_norm": 1.1014469861984253, + "learning_rate": 3.5084736048614815e-06, + "loss": 0.3528, + "step": 30398 + }, + { + "epoch": 0.6085429021845207, + "grad_norm": 1.042382001876831, + "learning_rate": 3.508164184878694e-06, + "loss": 0.2621, + "step": 30399 + }, + { + "epoch": 0.6085629207016491, + "grad_norm": 2.2355053424835205, + "learning_rate": 3.507854771167263e-06, + "loss": 0.7909, + "step": 30400 + }, + { + "epoch": 0.6085829392187774, + "grad_norm": 1.0563642978668213, + "learning_rate": 3.5075453637284875e-06, + "loss": 0.2804, + "step": 30401 + }, + { + "epoch": 0.6086029577359057, + "grad_norm": 1.0337483882904053, + "learning_rate": 3.507235962563672e-06, + "loss": 0.2642, + "step": 30402 + }, + { + "epoch": 0.6086229762530341, + "grad_norm": 1.1666579246520996, + "learning_rate": 3.506926567674116e-06, + "loss": 0.3533, + "step": 30403 + }, + { + "epoch": 0.6086429947701624, + "grad_norm": 1.100714087486267, + "learning_rate": 3.506617179061119e-06, + "loss": 0.2698, + "step": 30404 + }, + { + "epoch": 0.6086630132872908, + "grad_norm": 1.2102155685424805, + "learning_rate": 3.5063077967259793e-06, + "loss": 0.3043, + "step": 30405 + }, + { + "epoch": 0.6086830318044191, + "grad_norm": 1.2391411066055298, + "learning_rate": 3.5059984206700037e-06, + "loss": 0.2944, + "step": 30406 + }, + { + "epoch": 0.6087030503215475, + "grad_norm": 1.1322133541107178, + "learning_rate": 3.5056890508944876e-06, + "loss": 0.2641, + "step": 30407 + }, + { + "epoch": 0.6087230688386758, + "grad_norm": 1.0318547487258911, + "learning_rate": 3.505379687400734e-06, + "loss": 0.2912, + "step": 30408 + }, + { + "epoch": 0.6087430873558041, + "grad_norm": 1.0575319528579712, + "learning_rate": 3.5050703301900423e-06, + "loss": 0.309, + "step": 30409 + }, + { + "epoch": 0.6087631058729325, + "grad_norm": 1.08921480178833, + "learning_rate": 3.5047609792637117e-06, + "loss": 0.2919, + "step": 30410 + }, + { + "epoch": 0.6087831243900608, + "grad_norm": 1.2238439321517944, + "learning_rate": 3.5044516346230463e-06, + "loss": 0.3, + "step": 30411 + }, + { + "epoch": 0.6088031429071892, + "grad_norm": 0.9704589247703552, + "learning_rate": 3.504142296269344e-06, + "loss": 0.2544, + "step": 30412 + }, + { + "epoch": 0.6088231614243175, + "grad_norm": 1.2139071226119995, + "learning_rate": 3.5038329642039037e-06, + "loss": 0.2641, + "step": 30413 + }, + { + "epoch": 0.6088431799414459, + "grad_norm": 1.4034864902496338, + "learning_rate": 3.503523638428028e-06, + "loss": 0.2395, + "step": 30414 + }, + { + "epoch": 0.6088631984585742, + "grad_norm": 1.0545088052749634, + "learning_rate": 3.5032143189430185e-06, + "loss": 0.2713, + "step": 30415 + }, + { + "epoch": 0.6088832169757026, + "grad_norm": 1.1117465496063232, + "learning_rate": 3.5029050057501733e-06, + "loss": 0.3231, + "step": 30416 + }, + { + "epoch": 0.6089032354928309, + "grad_norm": 1.136145830154419, + "learning_rate": 3.502595698850794e-06, + "loss": 0.2939, + "step": 30417 + }, + { + "epoch": 0.6089232540099592, + "grad_norm": 1.2139852046966553, + "learning_rate": 3.502286398246178e-06, + "loss": 0.3378, + "step": 30418 + }, + { + "epoch": 0.6089432725270876, + "grad_norm": 1.0859553813934326, + "learning_rate": 3.501977103937629e-06, + "loss": 0.2739, + "step": 30419 + }, + { + "epoch": 0.6089632910442159, + "grad_norm": 1.057362675666809, + "learning_rate": 3.501667815926446e-06, + "loss": 0.2713, + "step": 30420 + }, + { + "epoch": 0.6089833095613443, + "grad_norm": 1.1116158962249756, + "learning_rate": 3.5013585342139302e-06, + "loss": 0.3328, + "step": 30421 + }, + { + "epoch": 0.6090033280784726, + "grad_norm": 1.1383514404296875, + "learning_rate": 3.5010492588013793e-06, + "loss": 0.331, + "step": 30422 + }, + { + "epoch": 0.609023346595601, + "grad_norm": 1.2566859722137451, + "learning_rate": 3.500739989690094e-06, + "loss": 0.3313, + "step": 30423 + }, + { + "epoch": 0.6090433651127293, + "grad_norm": 1.0912048816680908, + "learning_rate": 3.5004307268813766e-06, + "loss": 0.2925, + "step": 30424 + }, + { + "epoch": 0.6090633836298576, + "grad_norm": 1.126700758934021, + "learning_rate": 3.500121470376525e-06, + "loss": 0.3167, + "step": 30425 + }, + { + "epoch": 0.609083402146986, + "grad_norm": 1.1644232273101807, + "learning_rate": 3.49981222017684e-06, + "loss": 0.2995, + "step": 30426 + }, + { + "epoch": 0.6091034206641143, + "grad_norm": 1.0003687143325806, + "learning_rate": 3.4995029762836196e-06, + "loss": 0.3203, + "step": 30427 + }, + { + "epoch": 0.6091234391812427, + "grad_norm": 1.0676851272583008, + "learning_rate": 3.499193738698169e-06, + "loss": 0.3072, + "step": 30428 + }, + { + "epoch": 0.609143457698371, + "grad_norm": 1.0510997772216797, + "learning_rate": 3.4988845074217838e-06, + "loss": 0.2632, + "step": 30429 + }, + { + "epoch": 0.6091634762154994, + "grad_norm": 0.9810059666633606, + "learning_rate": 3.498575282455766e-06, + "loss": 0.3277, + "step": 30430 + }, + { + "epoch": 0.6091834947326277, + "grad_norm": 1.190499186515808, + "learning_rate": 3.4982660638014133e-06, + "loss": 0.3012, + "step": 30431 + }, + { + "epoch": 0.6092035132497561, + "grad_norm": 1.2697864770889282, + "learning_rate": 3.4979568514600266e-06, + "loss": 0.2977, + "step": 30432 + }, + { + "epoch": 0.6092235317668844, + "grad_norm": 1.202904462814331, + "learning_rate": 3.497647645432906e-06, + "loss": 0.3246, + "step": 30433 + }, + { + "epoch": 0.6092435502840127, + "grad_norm": 0.9885960817337036, + "learning_rate": 3.4973384457213527e-06, + "loss": 0.3056, + "step": 30434 + }, + { + "epoch": 0.6092635688011411, + "grad_norm": 1.1969982385635376, + "learning_rate": 3.4970292523266654e-06, + "loss": 0.2833, + "step": 30435 + }, + { + "epoch": 0.6092835873182694, + "grad_norm": 1.130007266998291, + "learning_rate": 3.496720065250141e-06, + "loss": 0.3327, + "step": 30436 + }, + { + "epoch": 0.6093036058353978, + "grad_norm": 1.1917561292648315, + "learning_rate": 3.4964108844930843e-06, + "loss": 0.339, + "step": 30437 + }, + { + "epoch": 0.6093236243525261, + "grad_norm": 1.0644886493682861, + "learning_rate": 3.496101710056793e-06, + "loss": 0.3068, + "step": 30438 + }, + { + "epoch": 0.6093436428696545, + "grad_norm": 1.105687141418457, + "learning_rate": 3.495792541942565e-06, + "loss": 0.3111, + "step": 30439 + }, + { + "epoch": 0.6093636613867828, + "grad_norm": 1.0310477018356323, + "learning_rate": 3.495483380151703e-06, + "loss": 0.2999, + "step": 30440 + }, + { + "epoch": 0.6093836799039111, + "grad_norm": 1.0064496994018555, + "learning_rate": 3.4951742246855026e-06, + "loss": 0.2966, + "step": 30441 + }, + { + "epoch": 0.6094036984210395, + "grad_norm": 1.1566866636276245, + "learning_rate": 3.494865075545268e-06, + "loss": 0.3004, + "step": 30442 + }, + { + "epoch": 0.6094237169381678, + "grad_norm": 1.2781659364700317, + "learning_rate": 3.4945559327322966e-06, + "loss": 0.3179, + "step": 30443 + }, + { + "epoch": 0.6094437354552962, + "grad_norm": 1.2751747369766235, + "learning_rate": 3.494246796247888e-06, + "loss": 0.2848, + "step": 30444 + }, + { + "epoch": 0.6094637539724245, + "grad_norm": 1.1063084602355957, + "learning_rate": 3.49393766609334e-06, + "loss": 0.2933, + "step": 30445 + }, + { + "epoch": 0.6094837724895529, + "grad_norm": 1.3516566753387451, + "learning_rate": 3.4936285422699556e-06, + "loss": 0.3025, + "step": 30446 + }, + { + "epoch": 0.6095037910066812, + "grad_norm": 1.1623578071594238, + "learning_rate": 3.4933194247790325e-06, + "loss": 0.3542, + "step": 30447 + }, + { + "epoch": 0.6095238095238096, + "grad_norm": 1.0588901042938232, + "learning_rate": 3.4930103136218707e-06, + "loss": 0.3378, + "step": 30448 + }, + { + "epoch": 0.6095438280409379, + "grad_norm": 1.2129220962524414, + "learning_rate": 3.492701208799768e-06, + "loss": 0.2856, + "step": 30449 + }, + { + "epoch": 0.6095638465580662, + "grad_norm": 1.0673474073410034, + "learning_rate": 3.4923921103140262e-06, + "loss": 0.2943, + "step": 30450 + }, + { + "epoch": 0.6095838650751946, + "grad_norm": 1.901806354522705, + "learning_rate": 3.492083018165944e-06, + "loss": 0.8775, + "step": 30451 + }, + { + "epoch": 0.6096038835923229, + "grad_norm": 1.2191389799118042, + "learning_rate": 3.491773932356819e-06, + "loss": 0.2639, + "step": 30452 + }, + { + "epoch": 0.6096239021094513, + "grad_norm": 1.1785619258880615, + "learning_rate": 3.4914648528879526e-06, + "loss": 0.2864, + "step": 30453 + }, + { + "epoch": 0.6096439206265796, + "grad_norm": 1.1570106744766235, + "learning_rate": 3.4911557797606414e-06, + "loss": 0.3083, + "step": 30454 + }, + { + "epoch": 0.609663939143708, + "grad_norm": 1.126242995262146, + "learning_rate": 3.490846712976189e-06, + "loss": 0.2992, + "step": 30455 + }, + { + "epoch": 0.6096839576608363, + "grad_norm": 1.090721607208252, + "learning_rate": 3.490537652535892e-06, + "loss": 0.2847, + "step": 30456 + }, + { + "epoch": 0.6097039761779646, + "grad_norm": 1.7717372179031372, + "learning_rate": 3.490228598441049e-06, + "loss": 0.7263, + "step": 30457 + }, + { + "epoch": 0.609723994695093, + "grad_norm": 1.0438281297683716, + "learning_rate": 3.4899195506929596e-06, + "loss": 0.2947, + "step": 30458 + }, + { + "epoch": 0.6097440132122213, + "grad_norm": 1.8312033414840698, + "learning_rate": 3.489610509292924e-06, + "loss": 0.7929, + "step": 30459 + }, + { + "epoch": 0.6097640317293497, + "grad_norm": 1.1161633729934692, + "learning_rate": 3.4893014742422415e-06, + "loss": 0.3015, + "step": 30460 + }, + { + "epoch": 0.609784050246478, + "grad_norm": 1.1337999105453491, + "learning_rate": 3.4889924455422103e-06, + "loss": 0.3135, + "step": 30461 + }, + { + "epoch": 0.6098040687636064, + "grad_norm": 1.092274785041809, + "learning_rate": 3.48868342319413e-06, + "loss": 0.3621, + "step": 30462 + }, + { + "epoch": 0.6098240872807347, + "grad_norm": 1.1031386852264404, + "learning_rate": 3.4883744071992975e-06, + "loss": 0.3208, + "step": 30463 + }, + { + "epoch": 0.6098441057978631, + "grad_norm": 1.0249921083450317, + "learning_rate": 3.488065397559015e-06, + "loss": 0.306, + "step": 30464 + }, + { + "epoch": 0.6098641243149914, + "grad_norm": 2.0983994007110596, + "learning_rate": 3.4877563942745796e-06, + "loss": 0.7295, + "step": 30465 + }, + { + "epoch": 0.6098841428321197, + "grad_norm": 1.2041181325912476, + "learning_rate": 3.487447397347292e-06, + "loss": 0.3264, + "step": 30466 + }, + { + "epoch": 0.6099041613492481, + "grad_norm": 1.1108824014663696, + "learning_rate": 3.487138406778448e-06, + "loss": 0.2949, + "step": 30467 + }, + { + "epoch": 0.6099241798663764, + "grad_norm": 1.8277262449264526, + "learning_rate": 3.486829422569351e-06, + "loss": 0.7684, + "step": 30468 + }, + { + "epoch": 0.6099441983835048, + "grad_norm": 1.8374353647232056, + "learning_rate": 3.486520444721297e-06, + "loss": 0.787, + "step": 30469 + }, + { + "epoch": 0.6099642169006331, + "grad_norm": 1.0727497339248657, + "learning_rate": 3.4862114732355846e-06, + "loss": 0.3469, + "step": 30470 + }, + { + "epoch": 0.6099842354177615, + "grad_norm": 1.0676692724227905, + "learning_rate": 3.4859025081135123e-06, + "loss": 0.2515, + "step": 30471 + }, + { + "epoch": 0.6100042539348898, + "grad_norm": 1.875266194343567, + "learning_rate": 3.485593549356381e-06, + "loss": 0.7933, + "step": 30472 + }, + { + "epoch": 0.6100242724520181, + "grad_norm": 1.095504641532898, + "learning_rate": 3.4852845969654896e-06, + "loss": 0.3129, + "step": 30473 + }, + { + "epoch": 0.6100442909691465, + "grad_norm": 1.0406743288040161, + "learning_rate": 3.4849756509421358e-06, + "loss": 0.2858, + "step": 30474 + }, + { + "epoch": 0.6100643094862748, + "grad_norm": 1.0382057428359985, + "learning_rate": 3.4846667112876176e-06, + "loss": 0.3014, + "step": 30475 + }, + { + "epoch": 0.6100843280034032, + "grad_norm": 1.1885422468185425, + "learning_rate": 3.4843577780032335e-06, + "loss": 0.3593, + "step": 30476 + }, + { + "epoch": 0.6101043465205315, + "grad_norm": 1.1987106800079346, + "learning_rate": 3.4840488510902843e-06, + "loss": 0.3815, + "step": 30477 + }, + { + "epoch": 0.6101243650376599, + "grad_norm": 1.1658685207366943, + "learning_rate": 3.483739930550067e-06, + "loss": 0.2957, + "step": 30478 + }, + { + "epoch": 0.6101443835547882, + "grad_norm": 1.1592841148376465, + "learning_rate": 3.4834310163838815e-06, + "loss": 0.34, + "step": 30479 + }, + { + "epoch": 0.6101644020719165, + "grad_norm": 1.083238124847412, + "learning_rate": 3.4831221085930233e-06, + "loss": 0.3112, + "step": 30480 + }, + { + "epoch": 0.6101844205890449, + "grad_norm": 1.2519721984863281, + "learning_rate": 3.4828132071787956e-06, + "loss": 0.3027, + "step": 30481 + }, + { + "epoch": 0.6102044391061732, + "grad_norm": 1.106980800628662, + "learning_rate": 3.482504312142495e-06, + "loss": 0.2816, + "step": 30482 + }, + { + "epoch": 0.6102244576233016, + "grad_norm": 0.9976306557655334, + "learning_rate": 3.4821954234854184e-06, + "loss": 0.2645, + "step": 30483 + }, + { + "epoch": 0.6102444761404299, + "grad_norm": 1.1763380765914917, + "learning_rate": 3.4818865412088663e-06, + "loss": 0.3091, + "step": 30484 + }, + { + "epoch": 0.6102644946575583, + "grad_norm": 1.1011533737182617, + "learning_rate": 3.4815776653141353e-06, + "loss": 0.3001, + "step": 30485 + }, + { + "epoch": 0.6102845131746866, + "grad_norm": 1.2440165281295776, + "learning_rate": 3.481268795802526e-06, + "loss": 0.2867, + "step": 30486 + }, + { + "epoch": 0.610304531691815, + "grad_norm": 1.0950545072555542, + "learning_rate": 3.480959932675337e-06, + "loss": 0.2923, + "step": 30487 + }, + { + "epoch": 0.6103245502089433, + "grad_norm": 1.005194902420044, + "learning_rate": 3.4806510759338645e-06, + "loss": 0.2751, + "step": 30488 + }, + { + "epoch": 0.6103445687260716, + "grad_norm": 1.0414265394210815, + "learning_rate": 3.4803422255794062e-06, + "loss": 0.2883, + "step": 30489 + }, + { + "epoch": 0.6103645872432, + "grad_norm": 1.1881897449493408, + "learning_rate": 3.4800333816132646e-06, + "loss": 0.3364, + "step": 30490 + }, + { + "epoch": 0.6103846057603283, + "grad_norm": 1.0910019874572754, + "learning_rate": 3.4797245440367345e-06, + "loss": 0.3441, + "step": 30491 + }, + { + "epoch": 0.6104046242774567, + "grad_norm": 1.1603857278823853, + "learning_rate": 3.479415712851116e-06, + "loss": 0.2868, + "step": 30492 + }, + { + "epoch": 0.610424642794585, + "grad_norm": 1.1081724166870117, + "learning_rate": 3.479106888057706e-06, + "loss": 0.3277, + "step": 30493 + }, + { + "epoch": 0.6104446613117134, + "grad_norm": 1.0741482973098755, + "learning_rate": 3.478798069657802e-06, + "loss": 0.281, + "step": 30494 + }, + { + "epoch": 0.6104646798288417, + "grad_norm": 1.138787031173706, + "learning_rate": 3.478489257652706e-06, + "loss": 0.3334, + "step": 30495 + }, + { + "epoch": 0.61048469834597, + "grad_norm": 1.0928897857666016, + "learning_rate": 3.4781804520437122e-06, + "loss": 0.28, + "step": 30496 + }, + { + "epoch": 0.6105047168630984, + "grad_norm": 1.213756799697876, + "learning_rate": 3.477871652832121e-06, + "loss": 0.3165, + "step": 30497 + }, + { + "epoch": 0.6105247353802267, + "grad_norm": 1.2288042306900024, + "learning_rate": 3.477562860019228e-06, + "loss": 0.3186, + "step": 30498 + }, + { + "epoch": 0.6105447538973551, + "grad_norm": 1.213914394378662, + "learning_rate": 3.4772540736063353e-06, + "loss": 0.332, + "step": 30499 + }, + { + "epoch": 0.6105647724144834, + "grad_norm": 1.9434669017791748, + "learning_rate": 3.476945293594739e-06, + "loss": 0.8734, + "step": 30500 + }, + { + "epoch": 0.6105847909316118, + "grad_norm": 1.2180730104446411, + "learning_rate": 3.4766365199857366e-06, + "loss": 0.3015, + "step": 30501 + }, + { + "epoch": 0.6106048094487401, + "grad_norm": 1.1333411931991577, + "learning_rate": 3.4763277527806242e-06, + "loss": 0.3419, + "step": 30502 + }, + { + "epoch": 0.6106248279658685, + "grad_norm": 1.1578295230865479, + "learning_rate": 3.4760189919807043e-06, + "loss": 0.3637, + "step": 30503 + }, + { + "epoch": 0.6106448464829968, + "grad_norm": 2.0620875358581543, + "learning_rate": 3.475710237587272e-06, + "loss": 0.7828, + "step": 30504 + }, + { + "epoch": 0.6106648650001251, + "grad_norm": 1.1283637285232544, + "learning_rate": 3.4754014896016263e-06, + "loss": 0.3264, + "step": 30505 + }, + { + "epoch": 0.6106848835172535, + "grad_norm": 0.9966429471969604, + "learning_rate": 3.4750927480250645e-06, + "loss": 0.2678, + "step": 30506 + }, + { + "epoch": 0.6107049020343818, + "grad_norm": 1.8991363048553467, + "learning_rate": 3.4747840128588827e-06, + "loss": 0.771, + "step": 30507 + }, + { + "epoch": 0.6107249205515102, + "grad_norm": 1.0907584428787231, + "learning_rate": 3.474475284104383e-06, + "loss": 0.2814, + "step": 30508 + }, + { + "epoch": 0.6107449390686385, + "grad_norm": 1.3293075561523438, + "learning_rate": 3.4741665617628593e-06, + "loss": 0.3203, + "step": 30509 + }, + { + "epoch": 0.6107649575857669, + "grad_norm": 1.1363533735275269, + "learning_rate": 3.473857845835613e-06, + "loss": 0.3185, + "step": 30510 + }, + { + "epoch": 0.6107849761028952, + "grad_norm": 1.0816590785980225, + "learning_rate": 3.473549136323937e-06, + "loss": 0.2876, + "step": 30511 + }, + { + "epoch": 0.6108049946200235, + "grad_norm": 1.244195580482483, + "learning_rate": 3.4732404332291348e-06, + "loss": 0.3024, + "step": 30512 + }, + { + "epoch": 0.6108250131371519, + "grad_norm": 1.1910730600357056, + "learning_rate": 3.4729317365525007e-06, + "loss": 0.2536, + "step": 30513 + }, + { + "epoch": 0.6108450316542802, + "grad_norm": 1.289840579032898, + "learning_rate": 3.4726230462953332e-06, + "loss": 0.2989, + "step": 30514 + }, + { + "epoch": 0.6108650501714086, + "grad_norm": 1.0828577280044556, + "learning_rate": 3.4723143624589284e-06, + "loss": 0.3098, + "step": 30515 + }, + { + "epoch": 0.6108850686885369, + "grad_norm": 1.1638952493667603, + "learning_rate": 3.472005685044585e-06, + "loss": 0.2785, + "step": 30516 + }, + { + "epoch": 0.6109050872056653, + "grad_norm": 1.0720347166061401, + "learning_rate": 3.4716970140536016e-06, + "loss": 0.3299, + "step": 30517 + }, + { + "epoch": 0.6109251057227936, + "grad_norm": 1.1423524618148804, + "learning_rate": 3.471388349487276e-06, + "loss": 0.301, + "step": 30518 + }, + { + "epoch": 0.610945124239922, + "grad_norm": 1.0909773111343384, + "learning_rate": 3.4710796913469043e-06, + "loss": 0.3154, + "step": 30519 + }, + { + "epoch": 0.6109651427570503, + "grad_norm": 1.1516958475112915, + "learning_rate": 3.470771039633782e-06, + "loss": 0.2981, + "step": 30520 + }, + { + "epoch": 0.6109851612741786, + "grad_norm": 1.899094581604004, + "learning_rate": 3.470462394349212e-06, + "loss": 0.733, + "step": 30521 + }, + { + "epoch": 0.611005179791307, + "grad_norm": 1.1207811832427979, + "learning_rate": 3.470153755494489e-06, + "loss": 0.2587, + "step": 30522 + }, + { + "epoch": 0.6110251983084353, + "grad_norm": 1.3564525842666626, + "learning_rate": 3.4698451230709095e-06, + "loss": 0.3269, + "step": 30523 + }, + { + "epoch": 0.6110452168255637, + "grad_norm": 1.0770832300186157, + "learning_rate": 3.469536497079773e-06, + "loss": 0.324, + "step": 30524 + }, + { + "epoch": 0.611065235342692, + "grad_norm": 1.0680032968521118, + "learning_rate": 3.4692278775223732e-06, + "loss": 0.3304, + "step": 30525 + }, + { + "epoch": 0.6110852538598204, + "grad_norm": 1.1247786283493042, + "learning_rate": 3.468919264400012e-06, + "loss": 0.3431, + "step": 30526 + }, + { + "epoch": 0.6111052723769487, + "grad_norm": 1.0202139616012573, + "learning_rate": 3.4686106577139845e-06, + "loss": 0.305, + "step": 30527 + }, + { + "epoch": 0.611125290894077, + "grad_norm": 1.0303139686584473, + "learning_rate": 3.468302057465588e-06, + "loss": 0.2852, + "step": 30528 + }, + { + "epoch": 0.6111453094112054, + "grad_norm": 1.145580768585205, + "learning_rate": 3.467993463656119e-06, + "loss": 0.2892, + "step": 30529 + }, + { + "epoch": 0.6111653279283337, + "grad_norm": 1.1800618171691895, + "learning_rate": 3.4676848762868766e-06, + "loss": 0.3276, + "step": 30530 + }, + { + "epoch": 0.6111853464454621, + "grad_norm": 1.1405340433120728, + "learning_rate": 3.4673762953591578e-06, + "loss": 0.2996, + "step": 30531 + }, + { + "epoch": 0.6112053649625904, + "grad_norm": 1.0473840236663818, + "learning_rate": 3.46706772087426e-06, + "loss": 0.2927, + "step": 30532 + }, + { + "epoch": 0.6112253834797188, + "grad_norm": 1.1123121976852417, + "learning_rate": 3.4667591528334767e-06, + "loss": 0.3221, + "step": 30533 + }, + { + "epoch": 0.6112454019968471, + "grad_norm": 1.4024230241775513, + "learning_rate": 3.4664505912381096e-06, + "loss": 0.3276, + "step": 30534 + }, + { + "epoch": 0.6112654205139755, + "grad_norm": 1.1509873867034912, + "learning_rate": 3.4661420360894536e-06, + "loss": 0.2883, + "step": 30535 + }, + { + "epoch": 0.6112854390311038, + "grad_norm": 1.1415421962738037, + "learning_rate": 3.4658334873888084e-06, + "loss": 0.3268, + "step": 30536 + }, + { + "epoch": 0.6113054575482321, + "grad_norm": 1.831823706626892, + "learning_rate": 3.4655249451374682e-06, + "loss": 0.7921, + "step": 30537 + }, + { + "epoch": 0.6113254760653605, + "grad_norm": 1.0851181745529175, + "learning_rate": 3.465216409336729e-06, + "loss": 0.3353, + "step": 30538 + }, + { + "epoch": 0.6113454945824888, + "grad_norm": 1.063394546508789, + "learning_rate": 3.4649078799878915e-06, + "loss": 0.2871, + "step": 30539 + }, + { + "epoch": 0.6113655130996172, + "grad_norm": 1.043317198753357, + "learning_rate": 3.464599357092251e-06, + "loss": 0.2963, + "step": 30540 + }, + { + "epoch": 0.6113855316167455, + "grad_norm": 0.9713556170463562, + "learning_rate": 3.464290840651104e-06, + "loss": 0.2574, + "step": 30541 + }, + { + "epoch": 0.6114055501338739, + "grad_norm": 1.2725229263305664, + "learning_rate": 3.463982330665747e-06, + "loss": 0.2852, + "step": 30542 + }, + { + "epoch": 0.6114255686510022, + "grad_norm": 1.0882149934768677, + "learning_rate": 3.463673827137478e-06, + "loss": 0.3254, + "step": 30543 + }, + { + "epoch": 0.6114455871681305, + "grad_norm": 1.0652668476104736, + "learning_rate": 3.4633653300675953e-06, + "loss": 0.3621, + "step": 30544 + }, + { + "epoch": 0.6114656056852589, + "grad_norm": 1.2233966588974, + "learning_rate": 3.463056839457393e-06, + "loss": 0.3185, + "step": 30545 + }, + { + "epoch": 0.6114856242023872, + "grad_norm": 1.0952006578445435, + "learning_rate": 3.4627483553081697e-06, + "loss": 0.3389, + "step": 30546 + }, + { + "epoch": 0.6115056427195156, + "grad_norm": 2.0026392936706543, + "learning_rate": 3.462439877621219e-06, + "loss": 0.791, + "step": 30547 + }, + { + "epoch": 0.6115256612366439, + "grad_norm": 1.1430869102478027, + "learning_rate": 3.4621314063978417e-06, + "loss": 0.2613, + "step": 30548 + }, + { + "epoch": 0.6115456797537723, + "grad_norm": 1.1045405864715576, + "learning_rate": 3.4618229416393334e-06, + "loss": 0.2596, + "step": 30549 + }, + { + "epoch": 0.6115656982709006, + "grad_norm": 1.1568701267242432, + "learning_rate": 3.4615144833469904e-06, + "loss": 0.2759, + "step": 30550 + }, + { + "epoch": 0.611585716788029, + "grad_norm": 1.148768663406372, + "learning_rate": 3.461206031522108e-06, + "loss": 0.3262, + "step": 30551 + }, + { + "epoch": 0.6116057353051573, + "grad_norm": 1.16385817527771, + "learning_rate": 3.460897586165986e-06, + "loss": 0.2531, + "step": 30552 + }, + { + "epoch": 0.6116257538222856, + "grad_norm": 1.2353695631027222, + "learning_rate": 3.4605891472799195e-06, + "loss": 0.2917, + "step": 30553 + }, + { + "epoch": 0.611645772339414, + "grad_norm": 1.0566339492797852, + "learning_rate": 3.460280714865204e-06, + "loss": 0.2914, + "step": 30554 + }, + { + "epoch": 0.6116657908565423, + "grad_norm": 1.031167984008789, + "learning_rate": 3.4599722889231378e-06, + "loss": 0.2921, + "step": 30555 + }, + { + "epoch": 0.6116858093736707, + "grad_norm": 1.0563608407974243, + "learning_rate": 3.4596638694550145e-06, + "loss": 0.3115, + "step": 30556 + }, + { + "epoch": 0.611705827890799, + "grad_norm": 1.0221617221832275, + "learning_rate": 3.4593554564621353e-06, + "loss": 0.2518, + "step": 30557 + }, + { + "epoch": 0.6117258464079274, + "grad_norm": 1.1094741821289062, + "learning_rate": 3.459047049945794e-06, + "loss": 0.3231, + "step": 30558 + }, + { + "epoch": 0.6117458649250557, + "grad_norm": 1.187745451927185, + "learning_rate": 3.4587386499072874e-06, + "loss": 0.3153, + "step": 30559 + }, + { + "epoch": 0.611765883442184, + "grad_norm": 0.9460142850875854, + "learning_rate": 3.4584302563479093e-06, + "loss": 0.2584, + "step": 30560 + }, + { + "epoch": 0.6117859019593124, + "grad_norm": 1.1381241083145142, + "learning_rate": 3.45812186926896e-06, + "loss": 0.3032, + "step": 30561 + }, + { + "epoch": 0.6118059204764407, + "grad_norm": 1.05094313621521, + "learning_rate": 3.4578134886717356e-06, + "loss": 0.2923, + "step": 30562 + }, + { + "epoch": 0.6118259389935691, + "grad_norm": 1.2638928890228271, + "learning_rate": 3.457505114557531e-06, + "loss": 0.3096, + "step": 30563 + }, + { + "epoch": 0.6118459575106974, + "grad_norm": 1.9258544445037842, + "learning_rate": 3.4571967469276416e-06, + "loss": 0.7498, + "step": 30564 + }, + { + "epoch": 0.6118659760278258, + "grad_norm": 1.103110432624817, + "learning_rate": 3.456888385783367e-06, + "loss": 0.3089, + "step": 30565 + }, + { + "epoch": 0.6118859945449541, + "grad_norm": 1.0990995168685913, + "learning_rate": 3.456580031126001e-06, + "loss": 0.2779, + "step": 30566 + }, + { + "epoch": 0.6119060130620825, + "grad_norm": 1.1882729530334473, + "learning_rate": 3.45627168295684e-06, + "loss": 0.3246, + "step": 30567 + }, + { + "epoch": 0.6119260315792108, + "grad_norm": 0.980663001537323, + "learning_rate": 3.4559633412771813e-06, + "loss": 0.2761, + "step": 30568 + }, + { + "epoch": 0.6119460500963391, + "grad_norm": 1.0573738813400269, + "learning_rate": 3.4556550060883187e-06, + "loss": 0.3047, + "step": 30569 + }, + { + "epoch": 0.6119660686134675, + "grad_norm": 1.1543552875518799, + "learning_rate": 3.4553466773915516e-06, + "loss": 0.327, + "step": 30570 + }, + { + "epoch": 0.6119860871305958, + "grad_norm": 0.9803232550621033, + "learning_rate": 3.455038355188175e-06, + "loss": 0.2652, + "step": 30571 + }, + { + "epoch": 0.6120061056477242, + "grad_norm": 1.1562299728393555, + "learning_rate": 3.4547300394794845e-06, + "loss": 0.2624, + "step": 30572 + }, + { + "epoch": 0.6120261241648525, + "grad_norm": 1.1625555753707886, + "learning_rate": 3.454421730266775e-06, + "loss": 0.3179, + "step": 30573 + }, + { + "epoch": 0.6120461426819809, + "grad_norm": 1.1017100811004639, + "learning_rate": 3.4541134275513445e-06, + "loss": 0.3135, + "step": 30574 + }, + { + "epoch": 0.6120661611991092, + "grad_norm": 1.1133248805999756, + "learning_rate": 3.453805131334489e-06, + "loss": 0.3157, + "step": 30575 + }, + { + "epoch": 0.6120861797162375, + "grad_norm": 1.217333436012268, + "learning_rate": 3.4534968416175047e-06, + "loss": 0.3342, + "step": 30576 + }, + { + "epoch": 0.6121061982333659, + "grad_norm": 1.13469660282135, + "learning_rate": 3.4531885584016868e-06, + "loss": 0.2914, + "step": 30577 + }, + { + "epoch": 0.6121262167504942, + "grad_norm": 1.2088167667388916, + "learning_rate": 3.452880281688329e-06, + "loss": 0.3249, + "step": 30578 + }, + { + "epoch": 0.6121462352676226, + "grad_norm": 1.1655559539794922, + "learning_rate": 3.4525720114787316e-06, + "loss": 0.3427, + "step": 30579 + }, + { + "epoch": 0.6121662537847509, + "grad_norm": 1.2371406555175781, + "learning_rate": 3.452263747774187e-06, + "loss": 0.3134, + "step": 30580 + }, + { + "epoch": 0.6121862723018793, + "grad_norm": 1.1397451162338257, + "learning_rate": 3.4519554905759945e-06, + "loss": 0.2954, + "step": 30581 + }, + { + "epoch": 0.6122062908190076, + "grad_norm": 1.4365530014038086, + "learning_rate": 3.451647239885445e-06, + "loss": 0.2769, + "step": 30582 + }, + { + "epoch": 0.612226309336136, + "grad_norm": 1.0991618633270264, + "learning_rate": 3.4513389957038403e-06, + "loss": 0.3012, + "step": 30583 + }, + { + "epoch": 0.6122463278532643, + "grad_norm": 1.3410167694091797, + "learning_rate": 3.4510307580324728e-06, + "loss": 0.2963, + "step": 30584 + }, + { + "epoch": 0.6122663463703926, + "grad_norm": 1.2322518825531006, + "learning_rate": 3.4507225268726384e-06, + "loss": 0.3598, + "step": 30585 + }, + { + "epoch": 0.612286364887521, + "grad_norm": 1.056474208831787, + "learning_rate": 3.450414302225631e-06, + "loss": 0.3149, + "step": 30586 + }, + { + "epoch": 0.6123063834046493, + "grad_norm": 1.9651633501052856, + "learning_rate": 3.450106084092749e-06, + "loss": 0.7521, + "step": 30587 + }, + { + "epoch": 0.6123264019217777, + "grad_norm": 1.1290956735610962, + "learning_rate": 3.44979787247529e-06, + "loss": 0.2964, + "step": 30588 + }, + { + "epoch": 0.612346420438906, + "grad_norm": 1.0769749879837036, + "learning_rate": 3.4494896673745458e-06, + "loss": 0.3005, + "step": 30589 + }, + { + "epoch": 0.6123664389560344, + "grad_norm": 1.1958539485931396, + "learning_rate": 3.449181468791814e-06, + "loss": 0.3012, + "step": 30590 + }, + { + "epoch": 0.6123864574731627, + "grad_norm": 1.0166983604431152, + "learning_rate": 3.448873276728387e-06, + "loss": 0.3378, + "step": 30591 + }, + { + "epoch": 0.612406475990291, + "grad_norm": 1.0177305936813354, + "learning_rate": 3.4485650911855654e-06, + "loss": 0.2889, + "step": 30592 + }, + { + "epoch": 0.6124264945074194, + "grad_norm": 1.1474519968032837, + "learning_rate": 3.4482569121646412e-06, + "loss": 0.2829, + "step": 30593 + }, + { + "epoch": 0.6124465130245477, + "grad_norm": 1.2666510343551636, + "learning_rate": 3.447948739666912e-06, + "loss": 0.2866, + "step": 30594 + }, + { + "epoch": 0.6124665315416761, + "grad_norm": 1.0979262590408325, + "learning_rate": 3.4476405736936704e-06, + "loss": 0.275, + "step": 30595 + }, + { + "epoch": 0.6124865500588044, + "grad_norm": 1.204947590827942, + "learning_rate": 3.4473324142462156e-06, + "loss": 0.2775, + "step": 30596 + }, + { + "epoch": 0.6125065685759328, + "grad_norm": 1.0990062952041626, + "learning_rate": 3.4470242613258413e-06, + "loss": 0.2816, + "step": 30597 + }, + { + "epoch": 0.6125265870930611, + "grad_norm": 1.2478106021881104, + "learning_rate": 3.446716114933843e-06, + "loss": 0.3362, + "step": 30598 + }, + { + "epoch": 0.6125466056101895, + "grad_norm": 1.1037191152572632, + "learning_rate": 3.446407975071514e-06, + "loss": 0.3209, + "step": 30599 + }, + { + "epoch": 0.6125666241273178, + "grad_norm": 1.7695039510726929, + "learning_rate": 3.4460998417401515e-06, + "loss": 0.7169, + "step": 30600 + }, + { + "epoch": 0.6125866426444461, + "grad_norm": 1.2667008638381958, + "learning_rate": 3.4457917149410527e-06, + "loss": 0.355, + "step": 30601 + }, + { + "epoch": 0.6126066611615745, + "grad_norm": 1.1168336868286133, + "learning_rate": 3.4454835946755103e-06, + "loss": 0.3209, + "step": 30602 + }, + { + "epoch": 0.6126266796787028, + "grad_norm": 1.0624468326568604, + "learning_rate": 3.4451754809448213e-06, + "loss": 0.2902, + "step": 30603 + }, + { + "epoch": 0.6126466981958312, + "grad_norm": 1.2097413539886475, + "learning_rate": 3.4448673737502776e-06, + "loss": 0.3169, + "step": 30604 + }, + { + "epoch": 0.6126667167129595, + "grad_norm": 1.8476229906082153, + "learning_rate": 3.4445592730931787e-06, + "loss": 0.7198, + "step": 30605 + }, + { + "epoch": 0.6126867352300879, + "grad_norm": 1.3312801122665405, + "learning_rate": 3.4442511789748167e-06, + "loss": 0.2705, + "step": 30606 + }, + { + "epoch": 0.6127067537472162, + "grad_norm": 1.2298626899719238, + "learning_rate": 3.44394309139649e-06, + "loss": 0.3157, + "step": 30607 + }, + { + "epoch": 0.6127267722643445, + "grad_norm": 1.0576589107513428, + "learning_rate": 3.4436350103594906e-06, + "loss": 0.289, + "step": 30608 + }, + { + "epoch": 0.6127467907814729, + "grad_norm": 1.2558807134628296, + "learning_rate": 3.443326935865113e-06, + "loss": 0.3057, + "step": 30609 + }, + { + "epoch": 0.6127668092986012, + "grad_norm": 1.0117266178131104, + "learning_rate": 3.443018867914656e-06, + "loss": 0.2886, + "step": 30610 + }, + { + "epoch": 0.6127868278157296, + "grad_norm": 1.0560778379440308, + "learning_rate": 3.442710806509412e-06, + "loss": 0.2788, + "step": 30611 + }, + { + "epoch": 0.6128068463328579, + "grad_norm": 1.2155568599700928, + "learning_rate": 3.4424027516506765e-06, + "loss": 0.3493, + "step": 30612 + }, + { + "epoch": 0.6128268648499863, + "grad_norm": 1.1706733703613281, + "learning_rate": 3.4420947033397432e-06, + "loss": 0.2903, + "step": 30613 + }, + { + "epoch": 0.6128468833671146, + "grad_norm": 1.0895707607269287, + "learning_rate": 3.4417866615779107e-06, + "loss": 0.2949, + "step": 30614 + }, + { + "epoch": 0.612866901884243, + "grad_norm": 1.430889368057251, + "learning_rate": 3.441478626366471e-06, + "loss": 0.2898, + "step": 30615 + }, + { + "epoch": 0.6128869204013713, + "grad_norm": 1.151955008506775, + "learning_rate": 3.4411705977067212e-06, + "loss": 0.2734, + "step": 30616 + }, + { + "epoch": 0.6129069389184996, + "grad_norm": 1.0569032430648804, + "learning_rate": 3.4408625755999516e-06, + "loss": 0.3091, + "step": 30617 + }, + { + "epoch": 0.612926957435628, + "grad_norm": 1.1741279363632202, + "learning_rate": 3.4405545600474627e-06, + "loss": 0.3036, + "step": 30618 + }, + { + "epoch": 0.6129469759527563, + "grad_norm": 1.9954750537872314, + "learning_rate": 3.4402465510505455e-06, + "loss": 0.8165, + "step": 30619 + }, + { + "epoch": 0.6129669944698847, + "grad_norm": 1.3057677745819092, + "learning_rate": 3.4399385486104977e-06, + "loss": 0.2869, + "step": 30620 + }, + { + "epoch": 0.612987012987013, + "grad_norm": 1.1557992696762085, + "learning_rate": 3.439630552728612e-06, + "loss": 0.3372, + "step": 30621 + }, + { + "epoch": 0.6130070315041414, + "grad_norm": 1.1337196826934814, + "learning_rate": 3.4393225634061816e-06, + "loss": 0.2856, + "step": 30622 + }, + { + "epoch": 0.6130270500212697, + "grad_norm": 1.2037081718444824, + "learning_rate": 3.4390145806445052e-06, + "loss": 0.3116, + "step": 30623 + }, + { + "epoch": 0.613047068538398, + "grad_norm": 1.19282066822052, + "learning_rate": 3.4387066044448766e-06, + "loss": 0.3619, + "step": 30624 + }, + { + "epoch": 0.6130670870555264, + "grad_norm": 1.1094752550125122, + "learning_rate": 3.4383986348085873e-06, + "loss": 0.3256, + "step": 30625 + }, + { + "epoch": 0.6130871055726547, + "grad_norm": 1.1206151247024536, + "learning_rate": 3.4380906717369334e-06, + "loss": 0.2995, + "step": 30626 + }, + { + "epoch": 0.6131071240897831, + "grad_norm": 1.1377509832382202, + "learning_rate": 3.4377827152312125e-06, + "loss": 0.2891, + "step": 30627 + }, + { + "epoch": 0.6131271426069114, + "grad_norm": 1.0152316093444824, + "learning_rate": 3.4374747652927163e-06, + "loss": 0.2771, + "step": 30628 + }, + { + "epoch": 0.6131471611240398, + "grad_norm": 1.197885274887085, + "learning_rate": 3.4371668219227405e-06, + "loss": 0.3277, + "step": 30629 + }, + { + "epoch": 0.6131671796411681, + "grad_norm": 1.1228961944580078, + "learning_rate": 3.4368588851225783e-06, + "loss": 0.2854, + "step": 30630 + }, + { + "epoch": 0.6131871981582965, + "grad_norm": 1.1714669466018677, + "learning_rate": 3.436550954893524e-06, + "loss": 0.3143, + "step": 30631 + }, + { + "epoch": 0.6132072166754248, + "grad_norm": 1.216354489326477, + "learning_rate": 3.4362430312368733e-06, + "loss": 0.2656, + "step": 30632 + }, + { + "epoch": 0.6132272351925531, + "grad_norm": 1.1189149618148804, + "learning_rate": 3.435935114153922e-06, + "loss": 0.2943, + "step": 30633 + }, + { + "epoch": 0.6132472537096815, + "grad_norm": 1.1288450956344604, + "learning_rate": 3.4356272036459624e-06, + "loss": 0.303, + "step": 30634 + }, + { + "epoch": 0.6132672722268098, + "grad_norm": 1.3203167915344238, + "learning_rate": 3.435319299714287e-06, + "loss": 0.3348, + "step": 30635 + }, + { + "epoch": 0.6132872907439382, + "grad_norm": 1.0394748449325562, + "learning_rate": 3.4350114023601955e-06, + "loss": 0.3224, + "step": 30636 + }, + { + "epoch": 0.6133073092610665, + "grad_norm": 1.1045931577682495, + "learning_rate": 3.434703511584979e-06, + "loss": 0.3018, + "step": 30637 + }, + { + "epoch": 0.6133273277781949, + "grad_norm": 1.2820162773132324, + "learning_rate": 3.4343956273899304e-06, + "loss": 0.3151, + "step": 30638 + }, + { + "epoch": 0.6133473462953232, + "grad_norm": 1.014604926109314, + "learning_rate": 3.434087749776347e-06, + "loss": 0.3091, + "step": 30639 + }, + { + "epoch": 0.6133673648124515, + "grad_norm": 1.4843320846557617, + "learning_rate": 3.43377987874552e-06, + "loss": 0.3344, + "step": 30640 + }, + { + "epoch": 0.6133873833295799, + "grad_norm": 1.1767858266830444, + "learning_rate": 3.433472014298747e-06, + "loss": 0.3039, + "step": 30641 + }, + { + "epoch": 0.6134074018467082, + "grad_norm": 1.0400795936584473, + "learning_rate": 3.433164156437321e-06, + "loss": 0.3109, + "step": 30642 + }, + { + "epoch": 0.6134274203638366, + "grad_norm": 1.1532119512557983, + "learning_rate": 3.432856305162534e-06, + "loss": 0.3416, + "step": 30643 + }, + { + "epoch": 0.6134474388809649, + "grad_norm": 1.210688591003418, + "learning_rate": 3.432548460475682e-06, + "loss": 0.3306, + "step": 30644 + }, + { + "epoch": 0.6134674573980933, + "grad_norm": 1.8809418678283691, + "learning_rate": 3.4322406223780593e-06, + "loss": 0.7502, + "step": 30645 + }, + { + "epoch": 0.6134874759152216, + "grad_norm": 1.0893633365631104, + "learning_rate": 3.431932790870961e-06, + "loss": 0.3117, + "step": 30646 + }, + { + "epoch": 0.61350749443235, + "grad_norm": 1.2087602615356445, + "learning_rate": 3.431624965955679e-06, + "loss": 0.3056, + "step": 30647 + }, + { + "epoch": 0.6135275129494783, + "grad_norm": 1.0829237699508667, + "learning_rate": 3.4313171476335062e-06, + "loss": 0.3259, + "step": 30648 + }, + { + "epoch": 0.6135475314666066, + "grad_norm": 1.0078353881835938, + "learning_rate": 3.4310093359057407e-06, + "loss": 0.2885, + "step": 30649 + }, + { + "epoch": 0.613567549983735, + "grad_norm": 1.1660616397857666, + "learning_rate": 3.4307015307736747e-06, + "loss": 0.3158, + "step": 30650 + }, + { + "epoch": 0.6135875685008633, + "grad_norm": 1.0570710897445679, + "learning_rate": 3.4303937322386006e-06, + "loss": 0.2857, + "step": 30651 + }, + { + "epoch": 0.6136075870179917, + "grad_norm": 1.0139732360839844, + "learning_rate": 3.4300859403018137e-06, + "loss": 0.2847, + "step": 30652 + }, + { + "epoch": 0.61362760553512, + "grad_norm": 1.00496244430542, + "learning_rate": 3.4297781549646065e-06, + "loss": 0.2524, + "step": 30653 + }, + { + "epoch": 0.6136476240522484, + "grad_norm": 1.2311456203460693, + "learning_rate": 3.4294703762282754e-06, + "loss": 0.3084, + "step": 30654 + }, + { + "epoch": 0.6136676425693767, + "grad_norm": 1.1492022275924683, + "learning_rate": 3.429162604094114e-06, + "loss": 0.3324, + "step": 30655 + }, + { + "epoch": 0.613687661086505, + "grad_norm": 1.071745753288269, + "learning_rate": 3.428854838563413e-06, + "loss": 0.3036, + "step": 30656 + }, + { + "epoch": 0.6137076796036334, + "grad_norm": 1.1216405630111694, + "learning_rate": 3.428547079637468e-06, + "loss": 0.3049, + "step": 30657 + }, + { + "epoch": 0.6137276981207617, + "grad_norm": 1.0645462274551392, + "learning_rate": 3.428239327317573e-06, + "loss": 0.339, + "step": 30658 + }, + { + "epoch": 0.6137477166378901, + "grad_norm": 1.0335406064987183, + "learning_rate": 3.427931581605023e-06, + "loss": 0.2695, + "step": 30659 + }, + { + "epoch": 0.6137677351550184, + "grad_norm": 1.1676982641220093, + "learning_rate": 3.4276238425011105e-06, + "loss": 0.3329, + "step": 30660 + }, + { + "epoch": 0.6137877536721468, + "grad_norm": 1.2644715309143066, + "learning_rate": 3.427316110007128e-06, + "loss": 0.2917, + "step": 30661 + }, + { + "epoch": 0.6138077721892751, + "grad_norm": 1.2273204326629639, + "learning_rate": 3.4270083841243697e-06, + "loss": 0.3036, + "step": 30662 + }, + { + "epoch": 0.6138277907064035, + "grad_norm": 1.0744578838348389, + "learning_rate": 3.4267006648541303e-06, + "loss": 0.2782, + "step": 30663 + }, + { + "epoch": 0.6138478092235318, + "grad_norm": 1.109103798866272, + "learning_rate": 3.4263929521977023e-06, + "loss": 0.2993, + "step": 30664 + }, + { + "epoch": 0.6138678277406601, + "grad_norm": 1.0802541971206665, + "learning_rate": 3.4260852461563802e-06, + "loss": 0.2795, + "step": 30665 + }, + { + "epoch": 0.6138878462577885, + "grad_norm": 1.095576524734497, + "learning_rate": 3.4257775467314557e-06, + "loss": 0.3241, + "step": 30666 + }, + { + "epoch": 0.6139078647749168, + "grad_norm": 1.087056040763855, + "learning_rate": 3.4254698539242246e-06, + "loss": 0.3508, + "step": 30667 + }, + { + "epoch": 0.6139278832920452, + "grad_norm": 1.0597820281982422, + "learning_rate": 3.4251621677359805e-06, + "loss": 0.3183, + "step": 30668 + }, + { + "epoch": 0.6139479018091735, + "grad_norm": 1.0597745180130005, + "learning_rate": 3.4248544881680146e-06, + "loss": 0.3035, + "step": 30669 + }, + { + "epoch": 0.6139679203263019, + "grad_norm": 1.0054192543029785, + "learning_rate": 3.424546815221622e-06, + "loss": 0.2713, + "step": 30670 + }, + { + "epoch": 0.6139879388434302, + "grad_norm": 1.176722526550293, + "learning_rate": 3.4242391488980933e-06, + "loss": 0.3079, + "step": 30671 + }, + { + "epoch": 0.6140079573605585, + "grad_norm": 0.9755074977874756, + "learning_rate": 3.4239314891987264e-06, + "loss": 0.28, + "step": 30672 + }, + { + "epoch": 0.6140279758776869, + "grad_norm": 1.046138048171997, + "learning_rate": 3.423623836124813e-06, + "loss": 0.274, + "step": 30673 + }, + { + "epoch": 0.6140479943948152, + "grad_norm": 1.1309025287628174, + "learning_rate": 3.4233161896776446e-06, + "loss": 0.2381, + "step": 30674 + }, + { + "epoch": 0.6140680129119436, + "grad_norm": 1.1855231523513794, + "learning_rate": 3.423008549858514e-06, + "loss": 0.3101, + "step": 30675 + }, + { + "epoch": 0.6140880314290719, + "grad_norm": 1.143915057182312, + "learning_rate": 3.4227009166687188e-06, + "loss": 0.3165, + "step": 30676 + }, + { + "epoch": 0.6141080499462003, + "grad_norm": 1.2062228918075562, + "learning_rate": 3.422393290109548e-06, + "loss": 0.3161, + "step": 30677 + }, + { + "epoch": 0.6141280684633286, + "grad_norm": 1.1628297567367554, + "learning_rate": 3.4220856701822973e-06, + "loss": 0.3411, + "step": 30678 + }, + { + "epoch": 0.614148086980457, + "grad_norm": 1.0480189323425293, + "learning_rate": 3.421778056888257e-06, + "loss": 0.3223, + "step": 30679 + }, + { + "epoch": 0.6141681054975853, + "grad_norm": 1.2908177375793457, + "learning_rate": 3.4214704502287242e-06, + "loss": 0.3652, + "step": 30680 + }, + { + "epoch": 0.6141881240147136, + "grad_norm": 1.233689546585083, + "learning_rate": 3.4211628502049897e-06, + "loss": 0.321, + "step": 30681 + }, + { + "epoch": 0.614208142531842, + "grad_norm": 1.075475811958313, + "learning_rate": 3.4208552568183457e-06, + "loss": 0.2571, + "step": 30682 + }, + { + "epoch": 0.6142281610489703, + "grad_norm": 1.2996246814727783, + "learning_rate": 3.4205476700700878e-06, + "loss": 0.2941, + "step": 30683 + }, + { + "epoch": 0.6142481795660987, + "grad_norm": 1.9533900022506714, + "learning_rate": 3.420240089961505e-06, + "loss": 0.7578, + "step": 30684 + }, + { + "epoch": 0.614268198083227, + "grad_norm": 1.0813170671463013, + "learning_rate": 3.419932516493895e-06, + "loss": 0.2909, + "step": 30685 + }, + { + "epoch": 0.6142882166003554, + "grad_norm": 1.1034427881240845, + "learning_rate": 3.419624949668549e-06, + "loss": 0.3147, + "step": 30686 + }, + { + "epoch": 0.6143082351174837, + "grad_norm": 1.2254239320755005, + "learning_rate": 3.4193173894867592e-06, + "loss": 0.2816, + "step": 30687 + }, + { + "epoch": 0.614328253634612, + "grad_norm": 1.0661693811416626, + "learning_rate": 3.4190098359498174e-06, + "loss": 0.3061, + "step": 30688 + }, + { + "epoch": 0.6143482721517404, + "grad_norm": 1.0221998691558838, + "learning_rate": 3.41870228905902e-06, + "loss": 0.2642, + "step": 30689 + }, + { + "epoch": 0.6143682906688687, + "grad_norm": 1.1911144256591797, + "learning_rate": 3.4183947488156565e-06, + "loss": 0.2965, + "step": 30690 + }, + { + "epoch": 0.6143883091859971, + "grad_norm": 1.2718557119369507, + "learning_rate": 3.418087215221022e-06, + "loss": 0.3494, + "step": 30691 + }, + { + "epoch": 0.6144083277031254, + "grad_norm": 1.1672031879425049, + "learning_rate": 3.417779688276409e-06, + "loss": 0.2659, + "step": 30692 + }, + { + "epoch": 0.6144283462202538, + "grad_norm": 1.026659369468689, + "learning_rate": 3.4174721679831073e-06, + "loss": 0.2702, + "step": 30693 + }, + { + "epoch": 0.6144483647373821, + "grad_norm": 1.1555923223495483, + "learning_rate": 3.4171646543424143e-06, + "loss": 0.2836, + "step": 30694 + }, + { + "epoch": 0.6144683832545105, + "grad_norm": 1.8161616325378418, + "learning_rate": 3.4168571473556195e-06, + "loss": 0.7345, + "step": 30695 + }, + { + "epoch": 0.6144884017716388, + "grad_norm": 1.0911074876785278, + "learning_rate": 3.4165496470240172e-06, + "loss": 0.3188, + "step": 30696 + }, + { + "epoch": 0.6145084202887671, + "grad_norm": 1.173038363456726, + "learning_rate": 3.4162421533488974e-06, + "loss": 0.2792, + "step": 30697 + }, + { + "epoch": 0.6145284388058955, + "grad_norm": 1.2550593614578247, + "learning_rate": 3.4159346663315574e-06, + "loss": 0.3644, + "step": 30698 + }, + { + "epoch": 0.6145484573230238, + "grad_norm": 1.8824859857559204, + "learning_rate": 3.415627185973287e-06, + "loss": 0.7998, + "step": 30699 + }, + { + "epoch": 0.6145684758401522, + "grad_norm": 1.2001101970672607, + "learning_rate": 3.4153197122753785e-06, + "loss": 0.3506, + "step": 30700 + }, + { + "epoch": 0.6145884943572805, + "grad_norm": 1.1386228799819946, + "learning_rate": 3.415012245239124e-06, + "loss": 0.2827, + "step": 30701 + }, + { + "epoch": 0.6146085128744089, + "grad_norm": 1.052225947380066, + "learning_rate": 3.4147047848658164e-06, + "loss": 0.2916, + "step": 30702 + }, + { + "epoch": 0.6146285313915372, + "grad_norm": 1.092301607131958, + "learning_rate": 3.41439733115675e-06, + "loss": 0.323, + "step": 30703 + }, + { + "epoch": 0.6146485499086655, + "grad_norm": 1.1450059413909912, + "learning_rate": 3.4140898841132163e-06, + "loss": 0.2752, + "step": 30704 + }, + { + "epoch": 0.6146685684257939, + "grad_norm": 1.0596007108688354, + "learning_rate": 3.4137824437365076e-06, + "loss": 0.2979, + "step": 30705 + }, + { + "epoch": 0.6146885869429222, + "grad_norm": 1.1261214017868042, + "learning_rate": 3.4134750100279133e-06, + "loss": 0.3538, + "step": 30706 + }, + { + "epoch": 0.6147086054600506, + "grad_norm": 1.162265658378601, + "learning_rate": 3.413167582988732e-06, + "loss": 0.284, + "step": 30707 + }, + { + "epoch": 0.6147286239771789, + "grad_norm": 1.0713796615600586, + "learning_rate": 3.4128601626202506e-06, + "loss": 0.3002, + "step": 30708 + }, + { + "epoch": 0.6147486424943073, + "grad_norm": 1.2899852991104126, + "learning_rate": 3.4125527489237655e-06, + "loss": 0.3521, + "step": 30709 + }, + { + "epoch": 0.6147686610114356, + "grad_norm": 1.0729118585586548, + "learning_rate": 3.412245341900564e-06, + "loss": 0.3241, + "step": 30710 + }, + { + "epoch": 0.614788679528564, + "grad_norm": 1.1060292720794678, + "learning_rate": 3.411937941551944e-06, + "loss": 0.248, + "step": 30711 + }, + { + "epoch": 0.6148086980456923, + "grad_norm": 1.1034523248672485, + "learning_rate": 3.411630547879195e-06, + "loss": 0.336, + "step": 30712 + }, + { + "epoch": 0.6148287165628206, + "grad_norm": 1.1276389360427856, + "learning_rate": 3.4113231608836095e-06, + "loss": 0.3179, + "step": 30713 + }, + { + "epoch": 0.614848735079949, + "grad_norm": 1.1459007263183594, + "learning_rate": 3.411015780566478e-06, + "loss": 0.3249, + "step": 30714 + }, + { + "epoch": 0.6148687535970773, + "grad_norm": 1.0909384489059448, + "learning_rate": 3.410708406929095e-06, + "loss": 0.2888, + "step": 30715 + }, + { + "epoch": 0.6148887721142057, + "grad_norm": 1.0639357566833496, + "learning_rate": 3.4104010399727518e-06, + "loss": 0.276, + "step": 30716 + }, + { + "epoch": 0.614908790631334, + "grad_norm": 1.2820805311203003, + "learning_rate": 3.4100936796987406e-06, + "loss": 0.3085, + "step": 30717 + }, + { + "epoch": 0.6149288091484624, + "grad_norm": 1.0767704248428345, + "learning_rate": 3.4097863261083543e-06, + "loss": 0.3799, + "step": 30718 + }, + { + "epoch": 0.6149488276655907, + "grad_norm": 1.1549937725067139, + "learning_rate": 3.4094789792028816e-06, + "loss": 0.2735, + "step": 30719 + }, + { + "epoch": 0.614968846182719, + "grad_norm": 1.0825508832931519, + "learning_rate": 3.4091716389836193e-06, + "loss": 0.312, + "step": 30720 + }, + { + "epoch": 0.6149888646998474, + "grad_norm": 1.9482450485229492, + "learning_rate": 3.4088643054518553e-06, + "loss": 0.7566, + "step": 30721 + }, + { + "epoch": 0.6150088832169757, + "grad_norm": 0.9878089427947998, + "learning_rate": 3.4085569786088857e-06, + "loss": 0.2912, + "step": 30722 + }, + { + "epoch": 0.6150289017341041, + "grad_norm": 1.2417266368865967, + "learning_rate": 3.4082496584559987e-06, + "loss": 0.288, + "step": 30723 + }, + { + "epoch": 0.6150489202512324, + "grad_norm": 1.177470326423645, + "learning_rate": 3.4079423449944864e-06, + "loss": 0.2683, + "step": 30724 + }, + { + "epoch": 0.6150689387683608, + "grad_norm": 1.0310875177383423, + "learning_rate": 3.407635038225644e-06, + "loss": 0.287, + "step": 30725 + }, + { + "epoch": 0.6150889572854891, + "grad_norm": 1.191665768623352, + "learning_rate": 3.407327738150761e-06, + "loss": 0.309, + "step": 30726 + }, + { + "epoch": 0.6151089758026175, + "grad_norm": 1.0585368871688843, + "learning_rate": 3.4070204447711284e-06, + "loss": 0.3053, + "step": 30727 + }, + { + "epoch": 0.6151289943197458, + "grad_norm": 1.127571940422058, + "learning_rate": 3.4067131580880385e-06, + "loss": 0.2697, + "step": 30728 + }, + { + "epoch": 0.6151490128368741, + "grad_norm": 1.1207462549209595, + "learning_rate": 3.406405878102784e-06, + "loss": 0.3208, + "step": 30729 + }, + { + "epoch": 0.6151690313540025, + "grad_norm": 1.0424931049346924, + "learning_rate": 3.406098604816658e-06, + "loss": 0.3262, + "step": 30730 + }, + { + "epoch": 0.6151890498711308, + "grad_norm": 1.1011769771575928, + "learning_rate": 3.4057913382309497e-06, + "loss": 0.2772, + "step": 30731 + }, + { + "epoch": 0.6152090683882592, + "grad_norm": 1.052661657333374, + "learning_rate": 3.40548407834695e-06, + "loss": 0.3177, + "step": 30732 + }, + { + "epoch": 0.6152290869053875, + "grad_norm": 1.000280737876892, + "learning_rate": 3.4051768251659534e-06, + "loss": 0.2866, + "step": 30733 + }, + { + "epoch": 0.6152491054225159, + "grad_norm": 2.0501227378845215, + "learning_rate": 3.4048695786892495e-06, + "loss": 0.7028, + "step": 30734 + }, + { + "epoch": 0.6152691239396442, + "grad_norm": 1.1438567638397217, + "learning_rate": 3.4045623389181316e-06, + "loss": 0.2978, + "step": 30735 + }, + { + "epoch": 0.6152891424567725, + "grad_norm": 1.146549940109253, + "learning_rate": 3.4042551058538908e-06, + "loss": 0.3321, + "step": 30736 + }, + { + "epoch": 0.6153091609739009, + "grad_norm": 1.8354735374450684, + "learning_rate": 3.4039478794978155e-06, + "loss": 0.746, + "step": 30737 + }, + { + "epoch": 0.6153291794910292, + "grad_norm": 1.1431090831756592, + "learning_rate": 3.4036406598512017e-06, + "loss": 0.343, + "step": 30738 + }, + { + "epoch": 0.6153491980081576, + "grad_norm": 0.9970269799232483, + "learning_rate": 3.40333344691534e-06, + "loss": 0.2975, + "step": 30739 + }, + { + "epoch": 0.6153692165252859, + "grad_norm": 1.921668529510498, + "learning_rate": 3.403026240691519e-06, + "loss": 0.8302, + "step": 30740 + }, + { + "epoch": 0.6153892350424143, + "grad_norm": 1.064229130744934, + "learning_rate": 3.4027190411810317e-06, + "loss": 0.3218, + "step": 30741 + }, + { + "epoch": 0.6154092535595426, + "grad_norm": 1.0342434644699097, + "learning_rate": 3.402411848385171e-06, + "loss": 0.307, + "step": 30742 + }, + { + "epoch": 0.615429272076671, + "grad_norm": 1.0452945232391357, + "learning_rate": 3.4021046623052272e-06, + "loss": 0.2813, + "step": 30743 + }, + { + "epoch": 0.6154492905937993, + "grad_norm": 1.1802879571914673, + "learning_rate": 3.4017974829424916e-06, + "loss": 0.2726, + "step": 30744 + }, + { + "epoch": 0.6154693091109276, + "grad_norm": 1.1286715269088745, + "learning_rate": 3.401490310298256e-06, + "loss": 0.3335, + "step": 30745 + }, + { + "epoch": 0.615489327628056, + "grad_norm": 1.574907660484314, + "learning_rate": 3.4011831443738088e-06, + "loss": 0.3258, + "step": 30746 + }, + { + "epoch": 0.6155093461451843, + "grad_norm": 1.0763918161392212, + "learning_rate": 3.400875985170444e-06, + "loss": 0.2521, + "step": 30747 + }, + { + "epoch": 0.6155293646623127, + "grad_norm": 1.145990014076233, + "learning_rate": 3.4005688326894544e-06, + "loss": 0.3085, + "step": 30748 + }, + { + "epoch": 0.615549383179441, + "grad_norm": 1.0899271965026855, + "learning_rate": 3.400261686932129e-06, + "loss": 0.3198, + "step": 30749 + }, + { + "epoch": 0.6155694016965694, + "grad_norm": 1.039263129234314, + "learning_rate": 3.3999545478997565e-06, + "loss": 0.2953, + "step": 30750 + }, + { + "epoch": 0.6155894202136977, + "grad_norm": 1.0392060279846191, + "learning_rate": 3.399647415593633e-06, + "loss": 0.297, + "step": 30751 + }, + { + "epoch": 0.615609438730826, + "grad_norm": 1.0674031972885132, + "learning_rate": 3.3993402900150484e-06, + "loss": 0.293, + "step": 30752 + }, + { + "epoch": 0.6156294572479544, + "grad_norm": 1.181540608406067, + "learning_rate": 3.3990331711652906e-06, + "loss": 0.3351, + "step": 30753 + }, + { + "epoch": 0.6156494757650827, + "grad_norm": 1.1207964420318604, + "learning_rate": 3.3987260590456537e-06, + "loss": 0.3457, + "step": 30754 + }, + { + "epoch": 0.6156694942822111, + "grad_norm": 1.3258895874023438, + "learning_rate": 3.3984189536574267e-06, + "loss": 0.3502, + "step": 30755 + }, + { + "epoch": 0.6156895127993394, + "grad_norm": 1.029404640197754, + "learning_rate": 3.3981118550019033e-06, + "loss": 0.301, + "step": 30756 + }, + { + "epoch": 0.6157095313164678, + "grad_norm": 1.8412038087844849, + "learning_rate": 3.3978047630803736e-06, + "loss": 0.7602, + "step": 30757 + }, + { + "epoch": 0.6157295498335961, + "grad_norm": 1.2222228050231934, + "learning_rate": 3.397497677894127e-06, + "loss": 0.3187, + "step": 30758 + }, + { + "epoch": 0.6157495683507245, + "grad_norm": 1.3323417901992798, + "learning_rate": 3.397190599444453e-06, + "loss": 0.3051, + "step": 30759 + }, + { + "epoch": 0.6157695868678528, + "grad_norm": 1.1634550094604492, + "learning_rate": 3.396883527732647e-06, + "loss": 0.3458, + "step": 30760 + }, + { + "epoch": 0.6157896053849811, + "grad_norm": 1.8839716911315918, + "learning_rate": 3.3965764627599984e-06, + "loss": 0.7746, + "step": 30761 + }, + { + "epoch": 0.6158096239021095, + "grad_norm": 1.12812077999115, + "learning_rate": 3.3962694045277968e-06, + "loss": 0.3401, + "step": 30762 + }, + { + "epoch": 0.6158296424192378, + "grad_norm": 1.1585620641708374, + "learning_rate": 3.3959623530373316e-06, + "loss": 0.2744, + "step": 30763 + }, + { + "epoch": 0.6158496609363662, + "grad_norm": 1.6393340826034546, + "learning_rate": 3.3956553082898975e-06, + "loss": 0.363, + "step": 30764 + }, + { + "epoch": 0.6158696794534945, + "grad_norm": 1.1159430742263794, + "learning_rate": 3.395348270286783e-06, + "loss": 0.2898, + "step": 30765 + }, + { + "epoch": 0.6158896979706229, + "grad_norm": 1.1609724760055542, + "learning_rate": 3.3950412390292784e-06, + "loss": 0.3455, + "step": 30766 + }, + { + "epoch": 0.6159097164877512, + "grad_norm": 1.3833945989608765, + "learning_rate": 3.3947342145186756e-06, + "loss": 0.3226, + "step": 30767 + }, + { + "epoch": 0.6159297350048795, + "grad_norm": 1.0089302062988281, + "learning_rate": 3.3944271967562632e-06, + "loss": 0.2419, + "step": 30768 + }, + { + "epoch": 0.6159497535220079, + "grad_norm": 1.167702078819275, + "learning_rate": 3.3941201857433355e-06, + "loss": 0.307, + "step": 30769 + }, + { + "epoch": 0.6159697720391362, + "grad_norm": 1.1417124271392822, + "learning_rate": 3.3938131814811802e-06, + "loss": 0.2862, + "step": 30770 + }, + { + "epoch": 0.6159897905562646, + "grad_norm": 1.1105163097381592, + "learning_rate": 3.3935061839710893e-06, + "loss": 0.32, + "step": 30771 + }, + { + "epoch": 0.6160098090733929, + "grad_norm": 1.090308666229248, + "learning_rate": 3.3931991932143505e-06, + "loss": 0.3322, + "step": 30772 + }, + { + "epoch": 0.6160298275905213, + "grad_norm": 1.3169244527816772, + "learning_rate": 3.3928922092122573e-06, + "loss": 0.3127, + "step": 30773 + }, + { + "epoch": 0.6160498461076496, + "grad_norm": 1.2578610181808472, + "learning_rate": 3.3925852319661e-06, + "loss": 0.3037, + "step": 30774 + }, + { + "epoch": 0.616069864624778, + "grad_norm": 1.0413010120391846, + "learning_rate": 3.39227826147717e-06, + "loss": 0.2623, + "step": 30775 + }, + { + "epoch": 0.6160898831419063, + "grad_norm": 1.8217902183532715, + "learning_rate": 3.3919712977467547e-06, + "loss": 0.7834, + "step": 30776 + }, + { + "epoch": 0.6161099016590346, + "grad_norm": 1.1384166479110718, + "learning_rate": 3.3916643407761446e-06, + "loss": 0.3272, + "step": 30777 + }, + { + "epoch": 0.616129920176163, + "grad_norm": 1.2095532417297363, + "learning_rate": 3.391357390566633e-06, + "loss": 0.258, + "step": 30778 + }, + { + "epoch": 0.6161499386932913, + "grad_norm": 1.1472982168197632, + "learning_rate": 3.3910504471195083e-06, + "loss": 0.3072, + "step": 30779 + }, + { + "epoch": 0.6161699572104197, + "grad_norm": 1.0494884252548218, + "learning_rate": 3.390743510436062e-06, + "loss": 0.2696, + "step": 30780 + }, + { + "epoch": 0.616189975727548, + "grad_norm": 1.1043572425842285, + "learning_rate": 3.390436580517582e-06, + "loss": 0.3247, + "step": 30781 + }, + { + "epoch": 0.6162099942446764, + "grad_norm": 1.0584027767181396, + "learning_rate": 3.390129657365362e-06, + "loss": 0.3054, + "step": 30782 + }, + { + "epoch": 0.6162300127618047, + "grad_norm": 1.1378004550933838, + "learning_rate": 3.3898227409806906e-06, + "loss": 0.2654, + "step": 30783 + }, + { + "epoch": 0.616250031278933, + "grad_norm": 2.0244877338409424, + "learning_rate": 3.389515831364858e-06, + "loss": 0.8266, + "step": 30784 + }, + { + "epoch": 0.6162700497960614, + "grad_norm": 1.10063898563385, + "learning_rate": 3.389208928519153e-06, + "loss": 0.3324, + "step": 30785 + }, + { + "epoch": 0.6162900683131897, + "grad_norm": 1.117422342300415, + "learning_rate": 3.3889020324448655e-06, + "loss": 0.2882, + "step": 30786 + }, + { + "epoch": 0.6163100868303181, + "grad_norm": 1.0528103113174438, + "learning_rate": 3.38859514314329e-06, + "loss": 0.3143, + "step": 30787 + }, + { + "epoch": 0.6163301053474464, + "grad_norm": 1.0911937952041626, + "learning_rate": 3.3882882606157138e-06, + "loss": 0.3151, + "step": 30788 + }, + { + "epoch": 0.6163501238645748, + "grad_norm": 1.0285788774490356, + "learning_rate": 3.3879813848634262e-06, + "loss": 0.3487, + "step": 30789 + }, + { + "epoch": 0.6163701423817031, + "grad_norm": 0.9658216834068298, + "learning_rate": 3.3876745158877165e-06, + "loss": 0.2697, + "step": 30790 + }, + { + "epoch": 0.6163901608988315, + "grad_norm": 1.1177822351455688, + "learning_rate": 3.387367653689878e-06, + "loss": 0.3029, + "step": 30791 + }, + { + "epoch": 0.6164101794159598, + "grad_norm": 1.0538280010223389, + "learning_rate": 3.387060798271198e-06, + "loss": 0.2909, + "step": 30792 + }, + { + "epoch": 0.6164301979330881, + "grad_norm": 0.9936352372169495, + "learning_rate": 3.386753949632968e-06, + "loss": 0.2649, + "step": 30793 + }, + { + "epoch": 0.6164502164502165, + "grad_norm": 1.8137093782424927, + "learning_rate": 3.386447107776475e-06, + "loss": 0.7801, + "step": 30794 + }, + { + "epoch": 0.6164702349673448, + "grad_norm": 1.1060240268707275, + "learning_rate": 3.3861402727030137e-06, + "loss": 0.3304, + "step": 30795 + }, + { + "epoch": 0.6164902534844732, + "grad_norm": 1.144872784614563, + "learning_rate": 3.385833444413871e-06, + "loss": 0.3593, + "step": 30796 + }, + { + "epoch": 0.6165102720016015, + "grad_norm": 0.9927881956100464, + "learning_rate": 3.3855266229103375e-06, + "loss": 0.2856, + "step": 30797 + }, + { + "epoch": 0.6165302905187299, + "grad_norm": 1.2503353357315063, + "learning_rate": 3.385219808193701e-06, + "loss": 0.3054, + "step": 30798 + }, + { + "epoch": 0.6165503090358582, + "grad_norm": 1.1250414848327637, + "learning_rate": 3.384913000265252e-06, + "loss": 0.3429, + "step": 30799 + }, + { + "epoch": 0.6165703275529865, + "grad_norm": 1.0034126043319702, + "learning_rate": 3.3846061991262836e-06, + "loss": 0.2857, + "step": 30800 + }, + { + "epoch": 0.6165903460701149, + "grad_norm": 1.1730376482009888, + "learning_rate": 3.384299404778083e-06, + "loss": 0.3894, + "step": 30801 + }, + { + "epoch": 0.6166103645872432, + "grad_norm": 1.1430250406265259, + "learning_rate": 3.3839926172219408e-06, + "loss": 0.3115, + "step": 30802 + }, + { + "epoch": 0.6166303831043716, + "grad_norm": 1.1081080436706543, + "learning_rate": 3.3836858364591424e-06, + "loss": 0.3186, + "step": 30803 + }, + { + "epoch": 0.6166504016214999, + "grad_norm": 1.023264765739441, + "learning_rate": 3.3833790624909836e-06, + "loss": 0.2756, + "step": 30804 + }, + { + "epoch": 0.6166704201386283, + "grad_norm": 1.028851866722107, + "learning_rate": 3.38307229531875e-06, + "loss": 0.282, + "step": 30805 + }, + { + "epoch": 0.6166904386557566, + "grad_norm": 1.1345142126083374, + "learning_rate": 3.3827655349437338e-06, + "loss": 0.2908, + "step": 30806 + }, + { + "epoch": 0.616710457172885, + "grad_norm": 1.0495741367340088, + "learning_rate": 3.382458781367224e-06, + "loss": 0.3075, + "step": 30807 + }, + { + "epoch": 0.6167304756900133, + "grad_norm": 1.1250466108322144, + "learning_rate": 3.3821520345905063e-06, + "loss": 0.2796, + "step": 30808 + }, + { + "epoch": 0.6167504942071416, + "grad_norm": 0.9763776063919067, + "learning_rate": 3.3818452946148753e-06, + "loss": 0.2902, + "step": 30809 + }, + { + "epoch": 0.61677051272427, + "grad_norm": 1.0418851375579834, + "learning_rate": 3.381538561441619e-06, + "loss": 0.2877, + "step": 30810 + }, + { + "epoch": 0.6167905312413983, + "grad_norm": 1.051066517829895, + "learning_rate": 3.3812318350720252e-06, + "loss": 0.2935, + "step": 30811 + }, + { + "epoch": 0.6168105497585267, + "grad_norm": 1.0844959020614624, + "learning_rate": 3.380925115507383e-06, + "loss": 0.3192, + "step": 30812 + }, + { + "epoch": 0.616830568275655, + "grad_norm": 1.1248812675476074, + "learning_rate": 3.3806184027489854e-06, + "loss": 0.2758, + "step": 30813 + }, + { + "epoch": 0.6168505867927834, + "grad_norm": 2.011826515197754, + "learning_rate": 3.38031169679812e-06, + "loss": 0.8262, + "step": 30814 + }, + { + "epoch": 0.6168706053099117, + "grad_norm": 1.0995173454284668, + "learning_rate": 3.380004997656075e-06, + "loss": 0.3097, + "step": 30815 + }, + { + "epoch": 0.61689062382704, + "grad_norm": 1.1362746953964233, + "learning_rate": 3.3796983053241393e-06, + "loss": 0.2838, + "step": 30816 + }, + { + "epoch": 0.6169106423441684, + "grad_norm": 1.2667384147644043, + "learning_rate": 3.379391619803604e-06, + "loss": 0.3577, + "step": 30817 + }, + { + "epoch": 0.6169306608612967, + "grad_norm": 0.9483007192611694, + "learning_rate": 3.379084941095757e-06, + "loss": 0.2755, + "step": 30818 + }, + { + "epoch": 0.6169506793784251, + "grad_norm": 0.9593276381492615, + "learning_rate": 3.378778269201889e-06, + "loss": 0.2667, + "step": 30819 + }, + { + "epoch": 0.6169706978955534, + "grad_norm": 1.87827467918396, + "learning_rate": 3.378471604123289e-06, + "loss": 0.7791, + "step": 30820 + }, + { + "epoch": 0.6169907164126818, + "grad_norm": 1.316644310951233, + "learning_rate": 3.3781649458612427e-06, + "loss": 0.3385, + "step": 30821 + }, + { + "epoch": 0.6170107349298101, + "grad_norm": 1.9078601598739624, + "learning_rate": 3.3778582944170434e-06, + "loss": 0.8139, + "step": 30822 + }, + { + "epoch": 0.6170307534469384, + "grad_norm": 1.081690788269043, + "learning_rate": 3.3775516497919804e-06, + "loss": 0.3383, + "step": 30823 + }, + { + "epoch": 0.6170507719640668, + "grad_norm": 1.134656310081482, + "learning_rate": 3.377245011987339e-06, + "loss": 0.3117, + "step": 30824 + }, + { + "epoch": 0.6170707904811951, + "grad_norm": 1.09348464012146, + "learning_rate": 3.3769383810044092e-06, + "loss": 0.3079, + "step": 30825 + }, + { + "epoch": 0.6170908089983235, + "grad_norm": 1.2264240980148315, + "learning_rate": 3.3766317568444834e-06, + "loss": 0.3125, + "step": 30826 + }, + { + "epoch": 0.6171108275154518, + "grad_norm": 1.258882761001587, + "learning_rate": 3.3763251395088487e-06, + "loss": 0.358, + "step": 30827 + }, + { + "epoch": 0.6171308460325802, + "grad_norm": 1.2418088912963867, + "learning_rate": 3.3760185289987935e-06, + "loss": 0.3221, + "step": 30828 + }, + { + "epoch": 0.6171508645497085, + "grad_norm": 1.2683483362197876, + "learning_rate": 3.375711925315606e-06, + "loss": 0.3376, + "step": 30829 + }, + { + "epoch": 0.6171708830668369, + "grad_norm": 1.2138780355453491, + "learning_rate": 3.375405328460576e-06, + "loss": 0.3085, + "step": 30830 + }, + { + "epoch": 0.6171909015839652, + "grad_norm": 1.0280410051345825, + "learning_rate": 3.375098738434992e-06, + "loss": 0.3196, + "step": 30831 + }, + { + "epoch": 0.6172109201010935, + "grad_norm": 1.3321943283081055, + "learning_rate": 3.3747921552401443e-06, + "loss": 0.3016, + "step": 30832 + }, + { + "epoch": 0.6172309386182219, + "grad_norm": 1.0581955909729004, + "learning_rate": 3.3744855788773208e-06, + "loss": 0.3119, + "step": 30833 + }, + { + "epoch": 0.6172509571353502, + "grad_norm": 1.1427710056304932, + "learning_rate": 3.3741790093478076e-06, + "loss": 0.332, + "step": 30834 + }, + { + "epoch": 0.6172709756524786, + "grad_norm": 1.1516119241714478, + "learning_rate": 3.3738724466528987e-06, + "loss": 0.2757, + "step": 30835 + }, + { + "epoch": 0.6172909941696069, + "grad_norm": 1.1322826147079468, + "learning_rate": 3.3735658907938803e-06, + "loss": 0.3583, + "step": 30836 + }, + { + "epoch": 0.6173110126867353, + "grad_norm": 0.9946809411048889, + "learning_rate": 3.3732593417720394e-06, + "loss": 0.2476, + "step": 30837 + }, + { + "epoch": 0.6173310312038636, + "grad_norm": 0.9769138693809509, + "learning_rate": 3.372952799588667e-06, + "loss": 0.2938, + "step": 30838 + }, + { + "epoch": 0.6173510497209919, + "grad_norm": 1.1603525876998901, + "learning_rate": 3.372646264245049e-06, + "loss": 0.3309, + "step": 30839 + }, + { + "epoch": 0.6173710682381203, + "grad_norm": 1.2065153121948242, + "learning_rate": 3.372339735742478e-06, + "loss": 0.2841, + "step": 30840 + }, + { + "epoch": 0.6173910867552486, + "grad_norm": 1.7071192264556885, + "learning_rate": 3.372033214082241e-06, + "loss": 0.8176, + "step": 30841 + }, + { + "epoch": 0.617411105272377, + "grad_norm": 1.1084073781967163, + "learning_rate": 3.371726699265624e-06, + "loss": 0.2823, + "step": 30842 + }, + { + "epoch": 0.6174311237895053, + "grad_norm": 1.0996882915496826, + "learning_rate": 3.371420191293918e-06, + "loss": 0.3291, + "step": 30843 + }, + { + "epoch": 0.6174511423066337, + "grad_norm": 1.212228775024414, + "learning_rate": 3.371113690168411e-06, + "loss": 0.2941, + "step": 30844 + }, + { + "epoch": 0.617471160823762, + "grad_norm": 1.1492527723312378, + "learning_rate": 3.3708071958903925e-06, + "loss": 0.3571, + "step": 30845 + }, + { + "epoch": 0.6174911793408904, + "grad_norm": 1.0911465883255005, + "learning_rate": 3.3705007084611503e-06, + "loss": 0.2654, + "step": 30846 + }, + { + "epoch": 0.6175111978580187, + "grad_norm": 1.1825671195983887, + "learning_rate": 3.370194227881972e-06, + "loss": 0.3056, + "step": 30847 + }, + { + "epoch": 0.617531216375147, + "grad_norm": 1.0643091201782227, + "learning_rate": 3.3698877541541443e-06, + "loss": 0.341, + "step": 30848 + }, + { + "epoch": 0.6175512348922754, + "grad_norm": 1.0747748613357544, + "learning_rate": 3.36958128727896e-06, + "loss": 0.3431, + "step": 30849 + }, + { + "epoch": 0.6175712534094037, + "grad_norm": 1.0965877771377563, + "learning_rate": 3.369274827257704e-06, + "loss": 0.2748, + "step": 30850 + }, + { + "epoch": 0.6175912719265321, + "grad_norm": 1.1766552925109863, + "learning_rate": 3.368968374091667e-06, + "loss": 0.2662, + "step": 30851 + }, + { + "epoch": 0.6176112904436604, + "grad_norm": 2.023289442062378, + "learning_rate": 3.368661927782134e-06, + "loss": 0.6996, + "step": 30852 + }, + { + "epoch": 0.6176313089607888, + "grad_norm": 1.1477240324020386, + "learning_rate": 3.368355488330397e-06, + "loss": 0.3342, + "step": 30853 + }, + { + "epoch": 0.6176513274779171, + "grad_norm": 1.1433860063552856, + "learning_rate": 3.368049055737742e-06, + "loss": 0.3061, + "step": 30854 + }, + { + "epoch": 0.6176713459950454, + "grad_norm": 1.149462103843689, + "learning_rate": 3.3677426300054572e-06, + "loss": 0.3242, + "step": 30855 + }, + { + "epoch": 0.6176913645121738, + "grad_norm": 1.0084280967712402, + "learning_rate": 3.36743621113483e-06, + "loss": 0.3356, + "step": 30856 + }, + { + "epoch": 0.6177113830293021, + "grad_norm": 1.1268925666809082, + "learning_rate": 3.3671297991271508e-06, + "loss": 0.2746, + "step": 30857 + }, + { + "epoch": 0.6177314015464305, + "grad_norm": 1.1051522493362427, + "learning_rate": 3.366823393983707e-06, + "loss": 0.3391, + "step": 30858 + }, + { + "epoch": 0.6177514200635588, + "grad_norm": 1.0618860721588135, + "learning_rate": 3.366516995705786e-06, + "loss": 0.2908, + "step": 30859 + }, + { + "epoch": 0.6177714385806872, + "grad_norm": 1.148828387260437, + "learning_rate": 3.366210604294677e-06, + "loss": 0.2764, + "step": 30860 + }, + { + "epoch": 0.6177914570978155, + "grad_norm": 1.2086639404296875, + "learning_rate": 3.3659042197516646e-06, + "loss": 0.3029, + "step": 30861 + }, + { + "epoch": 0.6178114756149439, + "grad_norm": 1.1845389604568481, + "learning_rate": 3.3655978420780406e-06, + "loss": 0.2427, + "step": 30862 + }, + { + "epoch": 0.6178314941320722, + "grad_norm": 1.058423399925232, + "learning_rate": 3.3652914712750908e-06, + "loss": 0.2607, + "step": 30863 + }, + { + "epoch": 0.6178515126492005, + "grad_norm": 1.046855092048645, + "learning_rate": 3.3649851073441053e-06, + "loss": 0.2835, + "step": 30864 + }, + { + "epoch": 0.6178715311663289, + "grad_norm": 0.9903191924095154, + "learning_rate": 3.3646787502863674e-06, + "loss": 0.3108, + "step": 30865 + }, + { + "epoch": 0.6178915496834572, + "grad_norm": 1.2229630947113037, + "learning_rate": 3.3643724001031713e-06, + "loss": 0.2736, + "step": 30866 + }, + { + "epoch": 0.6179115682005856, + "grad_norm": 2.007676124572754, + "learning_rate": 3.3640660567958017e-06, + "loss": 0.8157, + "step": 30867 + }, + { + "epoch": 0.6179315867177139, + "grad_norm": 1.0354013442993164, + "learning_rate": 3.3637597203655446e-06, + "loss": 0.2923, + "step": 30868 + }, + { + "epoch": 0.6179516052348423, + "grad_norm": 1.1693181991577148, + "learning_rate": 3.3634533908136914e-06, + "loss": 0.2994, + "step": 30869 + }, + { + "epoch": 0.6179716237519706, + "grad_norm": 1.0966987609863281, + "learning_rate": 3.3631470681415245e-06, + "loss": 0.3482, + "step": 30870 + }, + { + "epoch": 0.6179916422690989, + "grad_norm": 1.0200293064117432, + "learning_rate": 3.362840752350338e-06, + "loss": 0.2957, + "step": 30871 + }, + { + "epoch": 0.6180116607862273, + "grad_norm": 1.0627926588058472, + "learning_rate": 3.362534443441417e-06, + "loss": 0.3281, + "step": 30872 + }, + { + "epoch": 0.6180316793033556, + "grad_norm": 1.0773026943206787, + "learning_rate": 3.3622281414160483e-06, + "loss": 0.2759, + "step": 30873 + }, + { + "epoch": 0.618051697820484, + "grad_norm": 1.0482293367385864, + "learning_rate": 3.361921846275518e-06, + "loss": 0.2845, + "step": 30874 + }, + { + "epoch": 0.6180717163376123, + "grad_norm": 1.3229377269744873, + "learning_rate": 3.3616155580211187e-06, + "loss": 0.371, + "step": 30875 + }, + { + "epoch": 0.6180917348547407, + "grad_norm": 1.077017903327942, + "learning_rate": 3.3613092766541327e-06, + "loss": 0.3017, + "step": 30876 + }, + { + "epoch": 0.618111753371869, + "grad_norm": 1.1965076923370361, + "learning_rate": 3.361003002175851e-06, + "loss": 0.3189, + "step": 30877 + }, + { + "epoch": 0.6181317718889974, + "grad_norm": 1.1133601665496826, + "learning_rate": 3.3606967345875585e-06, + "loss": 0.2964, + "step": 30878 + }, + { + "epoch": 0.6181517904061257, + "grad_norm": 0.9598623514175415, + "learning_rate": 3.3603904738905456e-06, + "loss": 0.2924, + "step": 30879 + }, + { + "epoch": 0.618171808923254, + "grad_norm": 1.1041972637176514, + "learning_rate": 3.3600842200860994e-06, + "loss": 0.2956, + "step": 30880 + }, + { + "epoch": 0.6181918274403824, + "grad_norm": 1.0820388793945312, + "learning_rate": 3.3597779731755044e-06, + "loss": 0.2922, + "step": 30881 + }, + { + "epoch": 0.6182118459575107, + "grad_norm": 1.1530890464782715, + "learning_rate": 3.3594717331600507e-06, + "loss": 0.3614, + "step": 30882 + }, + { + "epoch": 0.6182318644746391, + "grad_norm": 0.995100200176239, + "learning_rate": 3.359165500041023e-06, + "loss": 0.2441, + "step": 30883 + }, + { + "epoch": 0.6182518829917674, + "grad_norm": 1.0635707378387451, + "learning_rate": 3.3588592738197124e-06, + "loss": 0.3384, + "step": 30884 + }, + { + "epoch": 0.6182719015088958, + "grad_norm": 1.128433346748352, + "learning_rate": 3.3585530544974045e-06, + "loss": 0.3142, + "step": 30885 + }, + { + "epoch": 0.6182919200260241, + "grad_norm": 1.035697340965271, + "learning_rate": 3.358246842075386e-06, + "loss": 0.3047, + "step": 30886 + }, + { + "epoch": 0.6183119385431524, + "grad_norm": 1.0811229944229126, + "learning_rate": 3.3579406365549426e-06, + "loss": 0.309, + "step": 30887 + }, + { + "epoch": 0.6183319570602808, + "grad_norm": 1.1503772735595703, + "learning_rate": 3.3576344379373653e-06, + "loss": 0.3131, + "step": 30888 + }, + { + "epoch": 0.6183519755774091, + "grad_norm": 1.2121909856796265, + "learning_rate": 3.3573282462239388e-06, + "loss": 0.295, + "step": 30889 + }, + { + "epoch": 0.6183719940945375, + "grad_norm": 1.0222923755645752, + "learning_rate": 3.357022061415951e-06, + "loss": 0.3014, + "step": 30890 + }, + { + "epoch": 0.6183920126116658, + "grad_norm": 1.1178346872329712, + "learning_rate": 3.3567158835146895e-06, + "loss": 0.3153, + "step": 30891 + }, + { + "epoch": 0.6184120311287942, + "grad_norm": 1.1406104564666748, + "learning_rate": 3.3564097125214384e-06, + "loss": 0.2854, + "step": 30892 + }, + { + "epoch": 0.6184320496459225, + "grad_norm": 1.1854749917984009, + "learning_rate": 3.3561035484374892e-06, + "loss": 0.3123, + "step": 30893 + }, + { + "epoch": 0.6184520681630509, + "grad_norm": 1.0366123914718628, + "learning_rate": 3.355797391264126e-06, + "loss": 0.2877, + "step": 30894 + }, + { + "epoch": 0.6184720866801792, + "grad_norm": 1.212009072303772, + "learning_rate": 3.3554912410026387e-06, + "loss": 0.2841, + "step": 30895 + }, + { + "epoch": 0.6184921051973075, + "grad_norm": 1.150896668434143, + "learning_rate": 3.355185097654309e-06, + "loss": 0.2841, + "step": 30896 + }, + { + "epoch": 0.6185121237144359, + "grad_norm": 1.3085908889770508, + "learning_rate": 3.35487896122043e-06, + "loss": 0.3283, + "step": 30897 + }, + { + "epoch": 0.6185321422315642, + "grad_norm": 1.1797093152999878, + "learning_rate": 3.354572831702285e-06, + "loss": 0.3147, + "step": 30898 + }, + { + "epoch": 0.6185521607486926, + "grad_norm": 1.331006407737732, + "learning_rate": 3.354266709101163e-06, + "loss": 0.331, + "step": 30899 + }, + { + "epoch": 0.6185721792658209, + "grad_norm": 1.1926003694534302, + "learning_rate": 3.353960593418347e-06, + "loss": 0.2618, + "step": 30900 + }, + { + "epoch": 0.6185921977829493, + "grad_norm": 1.1246591806411743, + "learning_rate": 3.3536544846551273e-06, + "loss": 0.3053, + "step": 30901 + }, + { + "epoch": 0.6186122163000776, + "grad_norm": 1.3355231285095215, + "learning_rate": 3.35334838281279e-06, + "loss": 0.2902, + "step": 30902 + }, + { + "epoch": 0.6186322348172059, + "grad_norm": 0.9506170749664307, + "learning_rate": 3.353042287892622e-06, + "loss": 0.3011, + "step": 30903 + }, + { + "epoch": 0.6186522533343343, + "grad_norm": 1.9116555452346802, + "learning_rate": 3.352736199895911e-06, + "loss": 0.7042, + "step": 30904 + }, + { + "epoch": 0.6186722718514626, + "grad_norm": 1.1233768463134766, + "learning_rate": 3.3524301188239395e-06, + "loss": 0.2794, + "step": 30905 + }, + { + "epoch": 0.618692290368591, + "grad_norm": 1.1023569107055664, + "learning_rate": 3.352124044677999e-06, + "loss": 0.3306, + "step": 30906 + }, + { + "epoch": 0.6187123088857193, + "grad_norm": 1.005251407623291, + "learning_rate": 3.351817977459374e-06, + "loss": 0.2894, + "step": 30907 + }, + { + "epoch": 0.6187323274028477, + "grad_norm": 1.793605089187622, + "learning_rate": 3.3515119171693527e-06, + "loss": 0.7376, + "step": 30908 + }, + { + "epoch": 0.618752345919976, + "grad_norm": 2.006845474243164, + "learning_rate": 3.351205863809218e-06, + "loss": 0.7915, + "step": 30909 + }, + { + "epoch": 0.6187723644371044, + "grad_norm": 1.8619778156280518, + "learning_rate": 3.3508998173802615e-06, + "loss": 0.7374, + "step": 30910 + }, + { + "epoch": 0.6187923829542327, + "grad_norm": 1.1811009645462036, + "learning_rate": 3.350593777883767e-06, + "loss": 0.2709, + "step": 30911 + }, + { + "epoch": 0.618812401471361, + "grad_norm": 1.1858969926834106, + "learning_rate": 3.3502877453210215e-06, + "loss": 0.3042, + "step": 30912 + }, + { + "epoch": 0.6188324199884894, + "grad_norm": 1.200472354888916, + "learning_rate": 3.3499817196933104e-06, + "loss": 0.2939, + "step": 30913 + }, + { + "epoch": 0.6188524385056177, + "grad_norm": 1.1092463731765747, + "learning_rate": 3.349675701001921e-06, + "loss": 0.2832, + "step": 30914 + }, + { + "epoch": 0.6188724570227461, + "grad_norm": 1.110308051109314, + "learning_rate": 3.3493696892481393e-06, + "loss": 0.2882, + "step": 30915 + }, + { + "epoch": 0.6188924755398744, + "grad_norm": 1.1111950874328613, + "learning_rate": 3.3490636844332538e-06, + "loss": 0.2831, + "step": 30916 + }, + { + "epoch": 0.6189124940570028, + "grad_norm": 1.1353278160095215, + "learning_rate": 3.3487576865585492e-06, + "loss": 0.2931, + "step": 30917 + }, + { + "epoch": 0.6189325125741311, + "grad_norm": 1.3594945669174194, + "learning_rate": 3.3484516956253094e-06, + "loss": 0.292, + "step": 30918 + }, + { + "epoch": 0.6189525310912594, + "grad_norm": 1.810800552368164, + "learning_rate": 3.348145711634826e-06, + "loss": 0.752, + "step": 30919 + }, + { + "epoch": 0.6189725496083878, + "grad_norm": 1.2976586818695068, + "learning_rate": 3.3478397345883807e-06, + "loss": 0.3502, + "step": 30920 + }, + { + "epoch": 0.6189925681255161, + "grad_norm": 1.0644502639770508, + "learning_rate": 3.347533764487263e-06, + "loss": 0.2754, + "step": 30921 + }, + { + "epoch": 0.6190125866426445, + "grad_norm": 1.131919503211975, + "learning_rate": 3.347227801332758e-06, + "loss": 0.2823, + "step": 30922 + }, + { + "epoch": 0.6190326051597728, + "grad_norm": 1.226744532585144, + "learning_rate": 3.346921845126149e-06, + "loss": 0.2982, + "step": 30923 + }, + { + "epoch": 0.6190526236769012, + "grad_norm": 1.0766788721084595, + "learning_rate": 3.346615895868728e-06, + "loss": 0.3212, + "step": 30924 + }, + { + "epoch": 0.6190726421940295, + "grad_norm": 1.077458143234253, + "learning_rate": 3.3463099535617773e-06, + "loss": 0.3061, + "step": 30925 + }, + { + "epoch": 0.6190926607111579, + "grad_norm": 1.0958272218704224, + "learning_rate": 3.3460040182065822e-06, + "loss": 0.2928, + "step": 30926 + }, + { + "epoch": 0.6191126792282862, + "grad_norm": 1.2439899444580078, + "learning_rate": 3.3456980898044307e-06, + "loss": 0.2964, + "step": 30927 + }, + { + "epoch": 0.6191326977454145, + "grad_norm": 1.107839822769165, + "learning_rate": 3.3453921683566082e-06, + "loss": 0.3189, + "step": 30928 + }, + { + "epoch": 0.6191527162625429, + "grad_norm": 1.0883636474609375, + "learning_rate": 3.3450862538644026e-06, + "loss": 0.3392, + "step": 30929 + }, + { + "epoch": 0.6191727347796712, + "grad_norm": 1.9424786567687988, + "learning_rate": 3.3447803463290975e-06, + "loss": 0.785, + "step": 30930 + }, + { + "epoch": 0.6191927532967996, + "grad_norm": 1.8857110738754272, + "learning_rate": 3.3444744457519796e-06, + "loss": 0.7357, + "step": 30931 + }, + { + "epoch": 0.6192127718139279, + "grad_norm": 1.0828269720077515, + "learning_rate": 3.344168552134334e-06, + "loss": 0.2768, + "step": 30932 + }, + { + "epoch": 0.6192327903310563, + "grad_norm": 1.0859204530715942, + "learning_rate": 3.343862665477447e-06, + "loss": 0.268, + "step": 30933 + }, + { + "epoch": 0.6192528088481846, + "grad_norm": 1.1016178131103516, + "learning_rate": 3.3435567857826067e-06, + "loss": 0.3412, + "step": 30934 + }, + { + "epoch": 0.6192728273653129, + "grad_norm": 1.1495192050933838, + "learning_rate": 3.3432509130510972e-06, + "loss": 0.3476, + "step": 30935 + }, + { + "epoch": 0.6192928458824413, + "grad_norm": 1.152783989906311, + "learning_rate": 3.342945047284202e-06, + "loss": 0.3073, + "step": 30936 + }, + { + "epoch": 0.6193128643995696, + "grad_norm": 1.765345811843872, + "learning_rate": 3.342639188483211e-06, + "loss": 0.8295, + "step": 30937 + }, + { + "epoch": 0.619332882916698, + "grad_norm": 1.0127487182617188, + "learning_rate": 3.342333336649409e-06, + "loss": 0.303, + "step": 30938 + }, + { + "epoch": 0.6193529014338263, + "grad_norm": 1.377002239227295, + "learning_rate": 3.3420274917840796e-06, + "loss": 0.3169, + "step": 30939 + }, + { + "epoch": 0.6193729199509547, + "grad_norm": 1.1196857690811157, + "learning_rate": 3.34172165388851e-06, + "loss": 0.3367, + "step": 30940 + }, + { + "epoch": 0.619392938468083, + "grad_norm": 1.0175817012786865, + "learning_rate": 3.341415822963985e-06, + "loss": 0.2769, + "step": 30941 + }, + { + "epoch": 0.6194129569852114, + "grad_norm": 1.0334718227386475, + "learning_rate": 3.3411099990117924e-06, + "loss": 0.291, + "step": 30942 + }, + { + "epoch": 0.6194329755023397, + "grad_norm": 1.0676809549331665, + "learning_rate": 3.3408041820332164e-06, + "loss": 0.2893, + "step": 30943 + }, + { + "epoch": 0.619452994019468, + "grad_norm": 1.1697536706924438, + "learning_rate": 3.3404983720295424e-06, + "loss": 0.3185, + "step": 30944 + }, + { + "epoch": 0.6194730125365964, + "grad_norm": 1.02024507522583, + "learning_rate": 3.3401925690020543e-06, + "loss": 0.3019, + "step": 30945 + }, + { + "epoch": 0.6194930310537247, + "grad_norm": 1.0572704076766968, + "learning_rate": 3.3398867729520408e-06, + "loss": 0.2811, + "step": 30946 + }, + { + "epoch": 0.6195130495708531, + "grad_norm": 1.1408458948135376, + "learning_rate": 3.3395809838807863e-06, + "loss": 0.3374, + "step": 30947 + }, + { + "epoch": 0.6195330680879814, + "grad_norm": 1.0622272491455078, + "learning_rate": 3.3392752017895767e-06, + "loss": 0.2906, + "step": 30948 + }, + { + "epoch": 0.6195530866051098, + "grad_norm": 1.9470188617706299, + "learning_rate": 3.338969426679694e-06, + "loss": 0.8001, + "step": 30949 + }, + { + "epoch": 0.6195731051222381, + "grad_norm": 1.037393569946289, + "learning_rate": 3.338663658552429e-06, + "loss": 0.3097, + "step": 30950 + }, + { + "epoch": 0.6195931236393664, + "grad_norm": 1.0560299158096313, + "learning_rate": 3.338357897409064e-06, + "loss": 0.3114, + "step": 30951 + }, + { + "epoch": 0.6196131421564948, + "grad_norm": 1.0303153991699219, + "learning_rate": 3.338052143250884e-06, + "loss": 0.2475, + "step": 30952 + }, + { + "epoch": 0.6196331606736231, + "grad_norm": 1.1469378471374512, + "learning_rate": 3.337746396079177e-06, + "loss": 0.2891, + "step": 30953 + }, + { + "epoch": 0.6196531791907515, + "grad_norm": 1.0219541788101196, + "learning_rate": 3.3374406558952234e-06, + "loss": 0.2508, + "step": 30954 + }, + { + "epoch": 0.6196731977078798, + "grad_norm": 1.3238097429275513, + "learning_rate": 3.3371349227003138e-06, + "loss": 0.3413, + "step": 30955 + }, + { + "epoch": 0.6196932162250082, + "grad_norm": 1.0398741960525513, + "learning_rate": 3.3368291964957307e-06, + "loss": 0.2631, + "step": 30956 + }, + { + "epoch": 0.6197132347421365, + "grad_norm": 1.0802987813949585, + "learning_rate": 3.3365234772827604e-06, + "loss": 0.3257, + "step": 30957 + }, + { + "epoch": 0.6197332532592649, + "grad_norm": 0.9965670704841614, + "learning_rate": 3.336217765062685e-06, + "loss": 0.2734, + "step": 30958 + }, + { + "epoch": 0.6197532717763932, + "grad_norm": 1.2096092700958252, + "learning_rate": 3.3359120598367927e-06, + "loss": 0.2989, + "step": 30959 + }, + { + "epoch": 0.6197732902935215, + "grad_norm": 1.8462435007095337, + "learning_rate": 3.3356063616063693e-06, + "loss": 0.7238, + "step": 30960 + }, + { + "epoch": 0.6197933088106499, + "grad_norm": 1.1461724042892456, + "learning_rate": 3.3353006703726985e-06, + "loss": 0.3147, + "step": 30961 + }, + { + "epoch": 0.6198133273277782, + "grad_norm": 1.1560965776443481, + "learning_rate": 3.334994986137066e-06, + "loss": 0.3124, + "step": 30962 + }, + { + "epoch": 0.6198333458449066, + "grad_norm": 1.080552577972412, + "learning_rate": 3.3346893089007537e-06, + "loss": 0.3155, + "step": 30963 + }, + { + "epoch": 0.6198533643620349, + "grad_norm": 1.192915439605713, + "learning_rate": 3.3343836386650517e-06, + "loss": 0.3022, + "step": 30964 + }, + { + "epoch": 0.6198733828791633, + "grad_norm": 1.0377469062805176, + "learning_rate": 3.3340779754312412e-06, + "loss": 0.281, + "step": 30965 + }, + { + "epoch": 0.6198934013962916, + "grad_norm": 1.213879942893982, + "learning_rate": 3.3337723192006094e-06, + "loss": 0.2873, + "step": 30966 + }, + { + "epoch": 0.6199134199134199, + "grad_norm": 0.9833200573921204, + "learning_rate": 3.3334666699744378e-06, + "loss": 0.3031, + "step": 30967 + }, + { + "epoch": 0.6199334384305483, + "grad_norm": 1.0298799276351929, + "learning_rate": 3.333161027754016e-06, + "loss": 0.3034, + "step": 30968 + }, + { + "epoch": 0.6199534569476766, + "grad_norm": 1.1230524778366089, + "learning_rate": 3.332855392540627e-06, + "loss": 0.2694, + "step": 30969 + }, + { + "epoch": 0.619973475464805, + "grad_norm": 1.0026687383651733, + "learning_rate": 3.332549764335554e-06, + "loss": 0.3246, + "step": 30970 + }, + { + "epoch": 0.6199934939819333, + "grad_norm": 1.2135237455368042, + "learning_rate": 3.332244143140082e-06, + "loss": 0.3009, + "step": 30971 + }, + { + "epoch": 0.6200135124990617, + "grad_norm": 1.1744270324707031, + "learning_rate": 3.3319385289554974e-06, + "loss": 0.312, + "step": 30972 + }, + { + "epoch": 0.62003353101619, + "grad_norm": 1.3474663496017456, + "learning_rate": 3.3316329217830852e-06, + "loss": 0.3049, + "step": 30973 + }, + { + "epoch": 0.6200535495333184, + "grad_norm": 1.1550796031951904, + "learning_rate": 3.331327321624129e-06, + "loss": 0.3092, + "step": 30974 + }, + { + "epoch": 0.6200735680504467, + "grad_norm": 1.1505074501037598, + "learning_rate": 3.331021728479914e-06, + "loss": 0.2462, + "step": 30975 + }, + { + "epoch": 0.620093586567575, + "grad_norm": 1.086175560951233, + "learning_rate": 3.330716142351722e-06, + "loss": 0.2786, + "step": 30976 + }, + { + "epoch": 0.6201136050847034, + "grad_norm": 1.2913267612457275, + "learning_rate": 3.330410563240842e-06, + "loss": 0.3208, + "step": 30977 + }, + { + "epoch": 0.6201336236018317, + "grad_norm": 1.1237212419509888, + "learning_rate": 3.3301049911485565e-06, + "loss": 0.3087, + "step": 30978 + }, + { + "epoch": 0.6201536421189601, + "grad_norm": 1.8442049026489258, + "learning_rate": 3.3297994260761513e-06, + "loss": 0.3566, + "step": 30979 + }, + { + "epoch": 0.6201736606360884, + "grad_norm": 1.195478916168213, + "learning_rate": 3.329493868024907e-06, + "loss": 0.2851, + "step": 30980 + }, + { + "epoch": 0.6201936791532168, + "grad_norm": 0.9840255379676819, + "learning_rate": 3.3291883169961135e-06, + "loss": 0.2505, + "step": 30981 + }, + { + "epoch": 0.6202136976703451, + "grad_norm": 1.2236567735671997, + "learning_rate": 3.328882772991052e-06, + "loss": 0.3318, + "step": 30982 + }, + { + "epoch": 0.6202337161874734, + "grad_norm": 1.082148551940918, + "learning_rate": 3.328577236011009e-06, + "loss": 0.2877, + "step": 30983 + }, + { + "epoch": 0.6202537347046018, + "grad_norm": 1.340948462486267, + "learning_rate": 3.3282717060572654e-06, + "loss": 0.3286, + "step": 30984 + }, + { + "epoch": 0.6202737532217301, + "grad_norm": 1.108672857284546, + "learning_rate": 3.3279661831311076e-06, + "loss": 0.2869, + "step": 30985 + }, + { + "epoch": 0.6202937717388585, + "grad_norm": 0.9838961958885193, + "learning_rate": 3.327660667233822e-06, + "loss": 0.2786, + "step": 30986 + }, + { + "epoch": 0.6203137902559868, + "grad_norm": 1.084929347038269, + "learning_rate": 3.3273551583666906e-06, + "loss": 0.3093, + "step": 30987 + }, + { + "epoch": 0.6203338087731152, + "grad_norm": 1.1240285634994507, + "learning_rate": 3.327049656530999e-06, + "loss": 0.3081, + "step": 30988 + }, + { + "epoch": 0.6203538272902435, + "grad_norm": 1.0844453573226929, + "learning_rate": 3.3267441617280285e-06, + "loss": 0.2932, + "step": 30989 + }, + { + "epoch": 0.6203738458073719, + "grad_norm": 1.0218110084533691, + "learning_rate": 3.3264386739590666e-06, + "loss": 0.2664, + "step": 30990 + }, + { + "epoch": 0.6203938643245002, + "grad_norm": 1.0942151546478271, + "learning_rate": 3.326133193225396e-06, + "loss": 0.3045, + "step": 30991 + }, + { + "epoch": 0.6204138828416285, + "grad_norm": 1.1618050336837769, + "learning_rate": 3.325827719528302e-06, + "loss": 0.3241, + "step": 30992 + }, + { + "epoch": 0.6204339013587569, + "grad_norm": 1.268918514251709, + "learning_rate": 3.3255222528690677e-06, + "loss": 0.3367, + "step": 30993 + }, + { + "epoch": 0.6204539198758852, + "grad_norm": 1.297269582748413, + "learning_rate": 3.3252167932489758e-06, + "loss": 0.3634, + "step": 30994 + }, + { + "epoch": 0.6204739383930136, + "grad_norm": 1.1358659267425537, + "learning_rate": 3.3249113406693146e-06, + "loss": 0.3431, + "step": 30995 + }, + { + "epoch": 0.6204939569101419, + "grad_norm": 1.1473742723464966, + "learning_rate": 3.324605895131365e-06, + "loss": 0.308, + "step": 30996 + }, + { + "epoch": 0.6205139754272703, + "grad_norm": 0.9719729423522949, + "learning_rate": 3.3243004566364106e-06, + "loss": 0.2888, + "step": 30997 + }, + { + "epoch": 0.6205339939443986, + "grad_norm": 1.0291800498962402, + "learning_rate": 3.3239950251857356e-06, + "loss": 0.3199, + "step": 30998 + }, + { + "epoch": 0.6205540124615269, + "grad_norm": 1.1044877767562866, + "learning_rate": 3.3236896007806273e-06, + "loss": 0.3034, + "step": 30999 + }, + { + "epoch": 0.6205740309786553, + "grad_norm": 1.2051599025726318, + "learning_rate": 3.3233841834223668e-06, + "loss": 0.2873, + "step": 31000 + }, + { + "epoch": 0.6205940494957836, + "grad_norm": 1.0777270793914795, + "learning_rate": 3.323078773112238e-06, + "loss": 0.3056, + "step": 31001 + }, + { + "epoch": 0.620614068012912, + "grad_norm": 2.1225388050079346, + "learning_rate": 3.322773369851524e-06, + "loss": 0.761, + "step": 31002 + }, + { + "epoch": 0.6206340865300403, + "grad_norm": 1.2455106973648071, + "learning_rate": 3.322467973641511e-06, + "loss": 0.3164, + "step": 31003 + }, + { + "epoch": 0.6206541050471687, + "grad_norm": 1.158729910850525, + "learning_rate": 3.322162584483481e-06, + "loss": 0.3158, + "step": 31004 + }, + { + "epoch": 0.620674123564297, + "grad_norm": 1.0518864393234253, + "learning_rate": 3.3218572023787197e-06, + "loss": 0.3187, + "step": 31005 + }, + { + "epoch": 0.6206941420814254, + "grad_norm": 1.0388751029968262, + "learning_rate": 3.321551827328509e-06, + "loss": 0.3493, + "step": 31006 + }, + { + "epoch": 0.6207141605985537, + "grad_norm": 1.0900026559829712, + "learning_rate": 3.3212464593341313e-06, + "loss": 0.3514, + "step": 31007 + }, + { + "epoch": 0.620734179115682, + "grad_norm": 0.9849648475646973, + "learning_rate": 3.320941098396875e-06, + "loss": 0.2773, + "step": 31008 + }, + { + "epoch": 0.6207541976328104, + "grad_norm": 1.2270216941833496, + "learning_rate": 3.3206357445180205e-06, + "loss": 0.2983, + "step": 31009 + }, + { + "epoch": 0.6207742161499387, + "grad_norm": 1.132002830505371, + "learning_rate": 3.3203303976988503e-06, + "loss": 0.2725, + "step": 31010 + }, + { + "epoch": 0.6207942346670671, + "grad_norm": 1.1215740442276, + "learning_rate": 3.320025057940649e-06, + "loss": 0.3101, + "step": 31011 + }, + { + "epoch": 0.6208142531841954, + "grad_norm": 1.0586339235305786, + "learning_rate": 3.319719725244703e-06, + "loss": 0.3079, + "step": 31012 + }, + { + "epoch": 0.6208342717013238, + "grad_norm": 1.0926332473754883, + "learning_rate": 3.319414399612294e-06, + "loss": 0.2947, + "step": 31013 + }, + { + "epoch": 0.6208542902184521, + "grad_norm": 1.0371613502502441, + "learning_rate": 3.3191090810447046e-06, + "loss": 0.2606, + "step": 31014 + }, + { + "epoch": 0.6208743087355804, + "grad_norm": 1.0710488557815552, + "learning_rate": 3.3188037695432186e-06, + "loss": 0.2865, + "step": 31015 + }, + { + "epoch": 0.6208943272527088, + "grad_norm": 1.0291671752929688, + "learning_rate": 3.318498465109119e-06, + "loss": 0.3191, + "step": 31016 + }, + { + "epoch": 0.6209143457698371, + "grad_norm": 1.8142045736312866, + "learning_rate": 3.31819316774369e-06, + "loss": 0.7914, + "step": 31017 + }, + { + "epoch": 0.6209343642869655, + "grad_norm": 1.133348822593689, + "learning_rate": 3.3178878774482164e-06, + "loss": 0.3381, + "step": 31018 + }, + { + "epoch": 0.6209543828040938, + "grad_norm": 1.1038094758987427, + "learning_rate": 3.3175825942239805e-06, + "loss": 0.2794, + "step": 31019 + }, + { + "epoch": 0.6209744013212222, + "grad_norm": 1.1030542850494385, + "learning_rate": 3.317277318072263e-06, + "loss": 0.2965, + "step": 31020 + }, + { + "epoch": 0.6209944198383505, + "grad_norm": 1.0565087795257568, + "learning_rate": 3.3169720489943515e-06, + "loss": 0.2655, + "step": 31021 + }, + { + "epoch": 0.6210144383554789, + "grad_norm": 1.2856063842773438, + "learning_rate": 3.316666786991527e-06, + "loss": 0.3202, + "step": 31022 + }, + { + "epoch": 0.6210344568726072, + "grad_norm": 1.114712119102478, + "learning_rate": 3.316361532065072e-06, + "loss": 0.3099, + "step": 31023 + }, + { + "epoch": 0.6210544753897355, + "grad_norm": 1.0876702070236206, + "learning_rate": 3.31605628421627e-06, + "loss": 0.2767, + "step": 31024 + }, + { + "epoch": 0.6210744939068639, + "grad_norm": 1.0660783052444458, + "learning_rate": 3.3157510434464067e-06, + "loss": 0.3114, + "step": 31025 + }, + { + "epoch": 0.6210945124239922, + "grad_norm": 1.1668298244476318, + "learning_rate": 3.3154458097567643e-06, + "loss": 0.2926, + "step": 31026 + }, + { + "epoch": 0.6211145309411206, + "grad_norm": 1.1347490549087524, + "learning_rate": 3.315140583148625e-06, + "loss": 0.2852, + "step": 31027 + }, + { + "epoch": 0.6211345494582489, + "grad_norm": 1.142930269241333, + "learning_rate": 3.314835363623271e-06, + "loss": 0.2989, + "step": 31028 + }, + { + "epoch": 0.6211545679753773, + "grad_norm": 1.9105859994888306, + "learning_rate": 3.314530151181986e-06, + "loss": 0.7343, + "step": 31029 + }, + { + "epoch": 0.6211745864925056, + "grad_norm": 1.2451202869415283, + "learning_rate": 3.3142249458260546e-06, + "loss": 0.3207, + "step": 31030 + }, + { + "epoch": 0.6211946050096339, + "grad_norm": 1.0418634414672852, + "learning_rate": 3.3139197475567585e-06, + "loss": 0.3169, + "step": 31031 + }, + { + "epoch": 0.6212146235267623, + "grad_norm": 0.9914836287498474, + "learning_rate": 3.313614556375382e-06, + "loss": 0.2886, + "step": 31032 + }, + { + "epoch": 0.6212346420438906, + "grad_norm": 1.06345796585083, + "learning_rate": 3.3133093722832044e-06, + "loss": 0.3272, + "step": 31033 + }, + { + "epoch": 0.621254660561019, + "grad_norm": 1.0863327980041504, + "learning_rate": 3.313004195281513e-06, + "loss": 0.3006, + "step": 31034 + }, + { + "epoch": 0.6212746790781473, + "grad_norm": 1.090670108795166, + "learning_rate": 3.3126990253715898e-06, + "loss": 0.2908, + "step": 31035 + }, + { + "epoch": 0.6212946975952757, + "grad_norm": 1.152793526649475, + "learning_rate": 3.312393862554715e-06, + "loss": 0.3225, + "step": 31036 + }, + { + "epoch": 0.621314716112404, + "grad_norm": 1.0782477855682373, + "learning_rate": 3.312088706832174e-06, + "loss": 0.3133, + "step": 31037 + }, + { + "epoch": 0.6213347346295324, + "grad_norm": 1.0815458297729492, + "learning_rate": 3.311783558205247e-06, + "loss": 0.2936, + "step": 31038 + }, + { + "epoch": 0.6213547531466607, + "grad_norm": 1.0030418634414673, + "learning_rate": 3.3114784166752212e-06, + "loss": 0.3169, + "step": 31039 + }, + { + "epoch": 0.621374771663789, + "grad_norm": 1.2582048177719116, + "learning_rate": 3.3111732822433767e-06, + "loss": 0.3043, + "step": 31040 + }, + { + "epoch": 0.6213947901809174, + "grad_norm": 1.0662239789962769, + "learning_rate": 3.310868154910995e-06, + "loss": 0.3252, + "step": 31041 + }, + { + "epoch": 0.6214148086980457, + "grad_norm": 1.1847467422485352, + "learning_rate": 3.3105630346793603e-06, + "loss": 0.3211, + "step": 31042 + }, + { + "epoch": 0.6214348272151741, + "grad_norm": 1.825960397720337, + "learning_rate": 3.3102579215497545e-06, + "loss": 0.7406, + "step": 31043 + }, + { + "epoch": 0.6214548457323024, + "grad_norm": 1.9445277452468872, + "learning_rate": 3.3099528155234622e-06, + "loss": 0.7875, + "step": 31044 + }, + { + "epoch": 0.6214748642494308, + "grad_norm": 1.0614123344421387, + "learning_rate": 3.3096477166017647e-06, + "loss": 0.2926, + "step": 31045 + }, + { + "epoch": 0.6214948827665591, + "grad_norm": 1.2052072286605835, + "learning_rate": 3.309342624785944e-06, + "loss": 0.2927, + "step": 31046 + }, + { + "epoch": 0.6215149012836874, + "grad_norm": 1.0468746423721313, + "learning_rate": 3.309037540077281e-06, + "loss": 0.3123, + "step": 31047 + }, + { + "epoch": 0.6215349198008158, + "grad_norm": 1.8480746746063232, + "learning_rate": 3.308732462477063e-06, + "loss": 0.7364, + "step": 31048 + }, + { + "epoch": 0.6215549383179441, + "grad_norm": 1.1286033391952515, + "learning_rate": 3.3084273919865684e-06, + "loss": 0.3057, + "step": 31049 + }, + { + "epoch": 0.6215749568350725, + "grad_norm": 1.0283483266830444, + "learning_rate": 3.308122328607082e-06, + "loss": 0.2473, + "step": 31050 + }, + { + "epoch": 0.6215949753522008, + "grad_norm": 1.2671754360198975, + "learning_rate": 3.307817272339883e-06, + "loss": 0.3025, + "step": 31051 + }, + { + "epoch": 0.6216149938693292, + "grad_norm": 1.1679080724716187, + "learning_rate": 3.307512223186258e-06, + "loss": 0.3295, + "step": 31052 + }, + { + "epoch": 0.6216350123864575, + "grad_norm": 1.1719244718551636, + "learning_rate": 3.3072071811474875e-06, + "loss": 0.3106, + "step": 31053 + }, + { + "epoch": 0.6216550309035859, + "grad_norm": 1.1432362794876099, + "learning_rate": 3.306902146224853e-06, + "loss": 0.2941, + "step": 31054 + }, + { + "epoch": 0.6216750494207142, + "grad_norm": 1.0361847877502441, + "learning_rate": 3.3065971184196372e-06, + "loss": 0.301, + "step": 31055 + }, + { + "epoch": 0.6216950679378425, + "grad_norm": 1.0791019201278687, + "learning_rate": 3.3062920977331227e-06, + "loss": 0.3119, + "step": 31056 + }, + { + "epoch": 0.6217150864549709, + "grad_norm": 1.1407299041748047, + "learning_rate": 3.305987084166593e-06, + "loss": 0.3095, + "step": 31057 + }, + { + "epoch": 0.6217351049720992, + "grad_norm": 1.862497091293335, + "learning_rate": 3.305682077721329e-06, + "loss": 0.8, + "step": 31058 + }, + { + "epoch": 0.6217551234892276, + "grad_norm": 1.1960481405258179, + "learning_rate": 3.3053770783986127e-06, + "loss": 0.2796, + "step": 31059 + }, + { + "epoch": 0.6217751420063559, + "grad_norm": 1.1946678161621094, + "learning_rate": 3.305072086199724e-06, + "loss": 0.3048, + "step": 31060 + }, + { + "epoch": 0.6217951605234843, + "grad_norm": 1.3003181219100952, + "learning_rate": 3.304767101125951e-06, + "loss": 0.2987, + "step": 31061 + }, + { + "epoch": 0.6218151790406126, + "grad_norm": 1.7917536497116089, + "learning_rate": 3.30446212317857e-06, + "loss": 0.7851, + "step": 31062 + }, + { + "epoch": 0.6218351975577409, + "grad_norm": 1.1429460048675537, + "learning_rate": 3.3041571523588667e-06, + "loss": 0.3043, + "step": 31063 + }, + { + "epoch": 0.6218552160748693, + "grad_norm": 1.2406867742538452, + "learning_rate": 3.3038521886681197e-06, + "loss": 0.3338, + "step": 31064 + }, + { + "epoch": 0.6218752345919976, + "grad_norm": 1.1258010864257812, + "learning_rate": 3.3035472321076156e-06, + "loss": 0.2894, + "step": 31065 + }, + { + "epoch": 0.621895253109126, + "grad_norm": 1.019421935081482, + "learning_rate": 3.3032422826786333e-06, + "loss": 0.3149, + "step": 31066 + }, + { + "epoch": 0.6219152716262543, + "grad_norm": 0.9201266765594482, + "learning_rate": 3.3029373403824537e-06, + "loss": 0.3159, + "step": 31067 + }, + { + "epoch": 0.6219352901433827, + "grad_norm": 1.1445213556289673, + "learning_rate": 3.302632405220363e-06, + "loss": 0.323, + "step": 31068 + }, + { + "epoch": 0.621955308660511, + "grad_norm": 1.0097877979278564, + "learning_rate": 3.302327477193637e-06, + "loss": 0.3286, + "step": 31069 + }, + { + "epoch": 0.6219753271776394, + "grad_norm": 1.0989086627960205, + "learning_rate": 3.3020225563035635e-06, + "loss": 0.3166, + "step": 31070 + }, + { + "epoch": 0.6219953456947677, + "grad_norm": 0.9999676942825317, + "learning_rate": 3.3017176425514213e-06, + "loss": 0.3075, + "step": 31071 + }, + { + "epoch": 0.622015364211896, + "grad_norm": 1.1479591131210327, + "learning_rate": 3.301412735938493e-06, + "loss": 0.2995, + "step": 31072 + }, + { + "epoch": 0.6220353827290244, + "grad_norm": 1.123631477355957, + "learning_rate": 3.301107836466058e-06, + "loss": 0.2966, + "step": 31073 + }, + { + "epoch": 0.6220554012461527, + "grad_norm": 1.0080246925354004, + "learning_rate": 3.3008029441354023e-06, + "loss": 0.2979, + "step": 31074 + }, + { + "epoch": 0.6220754197632811, + "grad_norm": 1.248834252357483, + "learning_rate": 3.300498058947804e-06, + "loss": 0.3181, + "step": 31075 + }, + { + "epoch": 0.6220954382804094, + "grad_norm": 1.9167420864105225, + "learning_rate": 3.3001931809045473e-06, + "loss": 0.7995, + "step": 31076 + }, + { + "epoch": 0.6221154567975378, + "grad_norm": 1.0659143924713135, + "learning_rate": 3.2998883100069123e-06, + "loss": 0.2647, + "step": 31077 + }, + { + "epoch": 0.6221354753146661, + "grad_norm": 1.1018116474151611, + "learning_rate": 3.299583446256179e-06, + "loss": 0.3064, + "step": 31078 + }, + { + "epoch": 0.6221554938317944, + "grad_norm": 0.9646096229553223, + "learning_rate": 3.2992785896536338e-06, + "loss": 0.2875, + "step": 31079 + }, + { + "epoch": 0.6221755123489228, + "grad_norm": 1.188869833946228, + "learning_rate": 3.298973740200554e-06, + "loss": 0.3299, + "step": 31080 + }, + { + "epoch": 0.6221955308660511, + "grad_norm": 0.9827237725257874, + "learning_rate": 3.298668897898223e-06, + "loss": 0.2819, + "step": 31081 + }, + { + "epoch": 0.6222155493831795, + "grad_norm": 1.0755596160888672, + "learning_rate": 3.2983640627479207e-06, + "loss": 0.3247, + "step": 31082 + }, + { + "epoch": 0.6222355679003078, + "grad_norm": 1.442170262336731, + "learning_rate": 3.298059234750931e-06, + "loss": 0.2995, + "step": 31083 + }, + { + "epoch": 0.6222555864174362, + "grad_norm": 1.0952248573303223, + "learning_rate": 3.2977544139085336e-06, + "loss": 0.2944, + "step": 31084 + }, + { + "epoch": 0.6222756049345645, + "grad_norm": 1.3730697631835938, + "learning_rate": 3.2974496002220115e-06, + "loss": 0.2947, + "step": 31085 + }, + { + "epoch": 0.6222956234516929, + "grad_norm": 1.1364758014678955, + "learning_rate": 3.2971447936926425e-06, + "loss": 0.3386, + "step": 31086 + }, + { + "epoch": 0.6223156419688212, + "grad_norm": 1.0460460186004639, + "learning_rate": 3.2968399943217124e-06, + "loss": 0.2528, + "step": 31087 + }, + { + "epoch": 0.6223356604859495, + "grad_norm": 1.054368257522583, + "learning_rate": 3.2965352021104985e-06, + "loss": 0.2981, + "step": 31088 + }, + { + "epoch": 0.6223556790030779, + "grad_norm": 1.1800652742385864, + "learning_rate": 3.2962304170602855e-06, + "loss": 0.2814, + "step": 31089 + }, + { + "epoch": 0.6223756975202062, + "grad_norm": 1.0832328796386719, + "learning_rate": 3.295925639172354e-06, + "loss": 0.332, + "step": 31090 + }, + { + "epoch": 0.6223957160373346, + "grad_norm": 1.060744047164917, + "learning_rate": 3.295620868447982e-06, + "loss": 0.2812, + "step": 31091 + }, + { + "epoch": 0.6224157345544629, + "grad_norm": 1.1833992004394531, + "learning_rate": 3.295316104888454e-06, + "loss": 0.3548, + "step": 31092 + }, + { + "epoch": 0.6224357530715913, + "grad_norm": 1.0487849712371826, + "learning_rate": 3.295011348495051e-06, + "loss": 0.3081, + "step": 31093 + }, + { + "epoch": 0.6224557715887196, + "grad_norm": 1.1434913873672485, + "learning_rate": 3.294706599269053e-06, + "loss": 0.2966, + "step": 31094 + }, + { + "epoch": 0.6224757901058479, + "grad_norm": 1.1206480264663696, + "learning_rate": 3.2944018572117396e-06, + "loss": 0.3316, + "step": 31095 + }, + { + "epoch": 0.6224958086229763, + "grad_norm": 1.1143990755081177, + "learning_rate": 3.294097122324396e-06, + "loss": 0.3063, + "step": 31096 + }, + { + "epoch": 0.6225158271401046, + "grad_norm": 1.1385890245437622, + "learning_rate": 3.2937923946083016e-06, + "loss": 0.3339, + "step": 31097 + }, + { + "epoch": 0.622535845657233, + "grad_norm": 1.0923084020614624, + "learning_rate": 3.293487674064736e-06, + "loss": 0.3205, + "step": 31098 + }, + { + "epoch": 0.6225558641743613, + "grad_norm": 1.3735687732696533, + "learning_rate": 3.2931829606949805e-06, + "loss": 0.3146, + "step": 31099 + }, + { + "epoch": 0.6225758826914897, + "grad_norm": 1.0238981246948242, + "learning_rate": 3.2928782545003156e-06, + "loss": 0.3051, + "step": 31100 + }, + { + "epoch": 0.622595901208618, + "grad_norm": 1.1207090616226196, + "learning_rate": 3.292573555482024e-06, + "loss": 0.3116, + "step": 31101 + }, + { + "epoch": 0.6226159197257464, + "grad_norm": 2.0626063346862793, + "learning_rate": 3.292268863641387e-06, + "loss": 0.756, + "step": 31102 + }, + { + "epoch": 0.6226359382428747, + "grad_norm": 0.9513341784477234, + "learning_rate": 3.291964178979683e-06, + "loss": 0.2708, + "step": 31103 + }, + { + "epoch": 0.622655956760003, + "grad_norm": 1.2520530223846436, + "learning_rate": 3.2916595014981936e-06, + "loss": 0.3199, + "step": 31104 + }, + { + "epoch": 0.6226759752771314, + "grad_norm": 1.0899693965911865, + "learning_rate": 3.2913548311982003e-06, + "loss": 0.2927, + "step": 31105 + }, + { + "epoch": 0.6226959937942597, + "grad_norm": 1.1416891813278198, + "learning_rate": 3.2910501680809838e-06, + "loss": 0.2898, + "step": 31106 + }, + { + "epoch": 0.6227160123113881, + "grad_norm": 1.931384563446045, + "learning_rate": 3.290745512147825e-06, + "loss": 0.7208, + "step": 31107 + }, + { + "epoch": 0.6227360308285164, + "grad_norm": 1.1261614561080933, + "learning_rate": 3.2904408634000045e-06, + "loss": 0.3048, + "step": 31108 + }, + { + "epoch": 0.6227560493456448, + "grad_norm": 1.1411924362182617, + "learning_rate": 3.290136221838801e-06, + "loss": 0.2634, + "step": 31109 + }, + { + "epoch": 0.6227760678627731, + "grad_norm": 1.2754441499710083, + "learning_rate": 3.2898315874654986e-06, + "loss": 0.3172, + "step": 31110 + }, + { + "epoch": 0.6227960863799014, + "grad_norm": 1.1689379215240479, + "learning_rate": 3.2895269602813763e-06, + "loss": 0.2986, + "step": 31111 + }, + { + "epoch": 0.6228161048970298, + "grad_norm": 1.2057620286941528, + "learning_rate": 3.2892223402877133e-06, + "loss": 0.2874, + "step": 31112 + }, + { + "epoch": 0.6228361234141581, + "grad_norm": 1.1426973342895508, + "learning_rate": 3.2889177274857908e-06, + "loss": 0.2843, + "step": 31113 + }, + { + "epoch": 0.6228561419312865, + "grad_norm": 1.2209479808807373, + "learning_rate": 3.2886131218768915e-06, + "loss": 0.3787, + "step": 31114 + }, + { + "epoch": 0.6228761604484148, + "grad_norm": 1.2959760427474976, + "learning_rate": 3.288308523462294e-06, + "loss": 0.2771, + "step": 31115 + }, + { + "epoch": 0.6228961789655432, + "grad_norm": 1.1506420373916626, + "learning_rate": 3.28800393224328e-06, + "loss": 0.3069, + "step": 31116 + }, + { + "epoch": 0.6229161974826715, + "grad_norm": 0.9879854917526245, + "learning_rate": 3.287699348221126e-06, + "loss": 0.2619, + "step": 31117 + }, + { + "epoch": 0.6229362159997999, + "grad_norm": 1.17662513256073, + "learning_rate": 3.287394771397118e-06, + "loss": 0.3564, + "step": 31118 + }, + { + "epoch": 0.6229562345169282, + "grad_norm": 1.0247446298599243, + "learning_rate": 3.287090201772533e-06, + "loss": 0.299, + "step": 31119 + }, + { + "epoch": 0.6229762530340565, + "grad_norm": 1.3208949565887451, + "learning_rate": 3.286785639348653e-06, + "loss": 0.2999, + "step": 31120 + }, + { + "epoch": 0.6229962715511849, + "grad_norm": 1.005041241645813, + "learning_rate": 3.286481084126758e-06, + "loss": 0.2615, + "step": 31121 + }, + { + "epoch": 0.6230162900683132, + "grad_norm": 1.1517211198806763, + "learning_rate": 3.2861765361081254e-06, + "loss": 0.3071, + "step": 31122 + }, + { + "epoch": 0.6230363085854416, + "grad_norm": 1.1025278568267822, + "learning_rate": 3.2858719952940398e-06, + "loss": 0.2923, + "step": 31123 + }, + { + "epoch": 0.6230563271025699, + "grad_norm": 1.0909675359725952, + "learning_rate": 3.285567461685779e-06, + "loss": 0.3107, + "step": 31124 + }, + { + "epoch": 0.6230763456196983, + "grad_norm": 1.01651132106781, + "learning_rate": 3.2852629352846234e-06, + "loss": 0.306, + "step": 31125 + }, + { + "epoch": 0.6230963641368266, + "grad_norm": 1.083670973777771, + "learning_rate": 3.284958416091853e-06, + "loss": 0.3498, + "step": 31126 + }, + { + "epoch": 0.6231163826539549, + "grad_norm": 1.143737554550171, + "learning_rate": 3.284653904108748e-06, + "loss": 0.3043, + "step": 31127 + }, + { + "epoch": 0.6231364011710833, + "grad_norm": 1.1177606582641602, + "learning_rate": 3.2843493993365903e-06, + "loss": 0.3277, + "step": 31128 + }, + { + "epoch": 0.6231564196882116, + "grad_norm": 1.1393189430236816, + "learning_rate": 3.2840449017766584e-06, + "loss": 0.3241, + "step": 31129 + }, + { + "epoch": 0.62317643820534, + "grad_norm": 1.8241443634033203, + "learning_rate": 3.2837404114302328e-06, + "loss": 0.7953, + "step": 31130 + }, + { + "epoch": 0.6231964567224683, + "grad_norm": 1.1519625186920166, + "learning_rate": 3.2834359282985906e-06, + "loss": 0.3417, + "step": 31131 + }, + { + "epoch": 0.6232164752395967, + "grad_norm": 0.9901815056800842, + "learning_rate": 3.283131452383016e-06, + "loss": 0.276, + "step": 31132 + }, + { + "epoch": 0.623236493756725, + "grad_norm": 1.100868821144104, + "learning_rate": 3.2828269836847875e-06, + "loss": 0.2542, + "step": 31133 + }, + { + "epoch": 0.6232565122738534, + "grad_norm": 1.7900941371917725, + "learning_rate": 3.2825225222051853e-06, + "loss": 0.7313, + "step": 31134 + }, + { + "epoch": 0.6232765307909817, + "grad_norm": 1.0209107398986816, + "learning_rate": 3.2822180679454867e-06, + "loss": 0.308, + "step": 31135 + }, + { + "epoch": 0.62329654930811, + "grad_norm": 1.20572030544281, + "learning_rate": 3.281913620906976e-06, + "loss": 0.3318, + "step": 31136 + }, + { + "epoch": 0.6233165678252384, + "grad_norm": 1.079784631729126, + "learning_rate": 3.28160918109093e-06, + "loss": 0.2753, + "step": 31137 + }, + { + "epoch": 0.6233365863423667, + "grad_norm": 1.1175426244735718, + "learning_rate": 3.281304748498629e-06, + "loss": 0.304, + "step": 31138 + }, + { + "epoch": 0.6233566048594951, + "grad_norm": 1.0751049518585205, + "learning_rate": 3.2810003231313537e-06, + "loss": 0.3311, + "step": 31139 + }, + { + "epoch": 0.6233766233766234, + "grad_norm": 1.0323708057403564, + "learning_rate": 3.2806959049903807e-06, + "loss": 0.2968, + "step": 31140 + }, + { + "epoch": 0.6233966418937518, + "grad_norm": 1.1438575983047485, + "learning_rate": 3.2803914940769943e-06, + "loss": 0.3176, + "step": 31141 + }, + { + "epoch": 0.6234166604108801, + "grad_norm": 1.1217668056488037, + "learning_rate": 3.280087090392472e-06, + "loss": 0.3283, + "step": 31142 + }, + { + "epoch": 0.6234366789280084, + "grad_norm": 1.0028806924819946, + "learning_rate": 3.2797826939380938e-06, + "loss": 0.2985, + "step": 31143 + }, + { + "epoch": 0.6234566974451368, + "grad_norm": 1.1660099029541016, + "learning_rate": 3.279478304715137e-06, + "loss": 0.2733, + "step": 31144 + }, + { + "epoch": 0.6234767159622651, + "grad_norm": 1.0975295305252075, + "learning_rate": 3.2791739227248835e-06, + "loss": 0.2939, + "step": 31145 + }, + { + "epoch": 0.6234967344793935, + "grad_norm": 1.1524642705917358, + "learning_rate": 3.2788695479686138e-06, + "loss": 0.337, + "step": 31146 + }, + { + "epoch": 0.6235167529965218, + "grad_norm": 1.2282930612564087, + "learning_rate": 3.278565180447606e-06, + "loss": 0.341, + "step": 31147 + }, + { + "epoch": 0.6235367715136502, + "grad_norm": 1.0405845642089844, + "learning_rate": 3.2782608201631383e-06, + "loss": 0.2729, + "step": 31148 + }, + { + "epoch": 0.6235567900307785, + "grad_norm": 1.0817182064056396, + "learning_rate": 3.2779564671164932e-06, + "loss": 0.287, + "step": 31149 + }, + { + "epoch": 0.6235768085479069, + "grad_norm": 1.06903874874115, + "learning_rate": 3.2776521213089483e-06, + "loss": 0.381, + "step": 31150 + }, + { + "epoch": 0.6235968270650352, + "grad_norm": 1.05440354347229, + "learning_rate": 3.2773477827417826e-06, + "loss": 0.2999, + "step": 31151 + }, + { + "epoch": 0.6236168455821635, + "grad_norm": 1.1859759092330933, + "learning_rate": 3.2770434514162768e-06, + "loss": 0.2991, + "step": 31152 + }, + { + "epoch": 0.6236368640992919, + "grad_norm": 1.9268369674682617, + "learning_rate": 3.2767391273337077e-06, + "loss": 0.8233, + "step": 31153 + }, + { + "epoch": 0.6236568826164202, + "grad_norm": 1.1891446113586426, + "learning_rate": 3.2764348104953588e-06, + "loss": 0.3115, + "step": 31154 + }, + { + "epoch": 0.6236769011335486, + "grad_norm": 1.109836459159851, + "learning_rate": 3.2761305009025068e-06, + "loss": 0.3094, + "step": 31155 + }, + { + "epoch": 0.6236969196506769, + "grad_norm": 1.119701623916626, + "learning_rate": 3.275826198556431e-06, + "loss": 0.3294, + "step": 31156 + }, + { + "epoch": 0.6237169381678053, + "grad_norm": 1.3605217933654785, + "learning_rate": 3.2755219034584085e-06, + "loss": 0.3396, + "step": 31157 + }, + { + "epoch": 0.6237369566849336, + "grad_norm": 1.2567555904388428, + "learning_rate": 3.2752176156097227e-06, + "loss": 0.3199, + "step": 31158 + }, + { + "epoch": 0.6237569752020619, + "grad_norm": 1.056061029434204, + "learning_rate": 3.2749133350116513e-06, + "loss": 0.3268, + "step": 31159 + }, + { + "epoch": 0.6237769937191903, + "grad_norm": 1.752546787261963, + "learning_rate": 3.274609061665473e-06, + "loss": 0.7932, + "step": 31160 + }, + { + "epoch": 0.6237970122363186, + "grad_norm": 1.1221376657485962, + "learning_rate": 3.274304795572467e-06, + "loss": 0.314, + "step": 31161 + }, + { + "epoch": 0.623817030753447, + "grad_norm": 1.1496599912643433, + "learning_rate": 3.27400053673391e-06, + "loss": 0.2963, + "step": 31162 + }, + { + "epoch": 0.6238370492705753, + "grad_norm": 0.9888694882392883, + "learning_rate": 3.2736962851510858e-06, + "loss": 0.3498, + "step": 31163 + }, + { + "epoch": 0.6238570677877037, + "grad_norm": 1.125524878501892, + "learning_rate": 3.2733920408252695e-06, + "loss": 0.2679, + "step": 31164 + }, + { + "epoch": 0.623877086304832, + "grad_norm": 1.1647604703903198, + "learning_rate": 3.273087803757743e-06, + "loss": 0.3259, + "step": 31165 + }, + { + "epoch": 0.6238971048219604, + "grad_norm": 1.0939005613327026, + "learning_rate": 3.272783573949781e-06, + "loss": 0.2991, + "step": 31166 + }, + { + "epoch": 0.6239171233390887, + "grad_norm": 1.1204694509506226, + "learning_rate": 3.2724793514026678e-06, + "loss": 0.344, + "step": 31167 + }, + { + "epoch": 0.623937141856217, + "grad_norm": 1.0987213850021362, + "learning_rate": 3.272175136117679e-06, + "loss": 0.3249, + "step": 31168 + }, + { + "epoch": 0.6239571603733454, + "grad_norm": 1.263824224472046, + "learning_rate": 3.2718709280960946e-06, + "loss": 0.2966, + "step": 31169 + }, + { + "epoch": 0.6239771788904737, + "grad_norm": 1.047772765159607, + "learning_rate": 3.271566727339191e-06, + "loss": 0.2552, + "step": 31170 + }, + { + "epoch": 0.6239971974076021, + "grad_norm": 1.0802369117736816, + "learning_rate": 3.2712625338482496e-06, + "loss": 0.3019, + "step": 31171 + }, + { + "epoch": 0.6240172159247304, + "grad_norm": 1.2031779289245605, + "learning_rate": 3.27095834762455e-06, + "loss": 0.2862, + "step": 31172 + }, + { + "epoch": 0.6240372344418588, + "grad_norm": 1.979842185974121, + "learning_rate": 3.2706541686693683e-06, + "loss": 0.8, + "step": 31173 + }, + { + "epoch": 0.6240572529589871, + "grad_norm": 1.1382758617401123, + "learning_rate": 3.270349996983986e-06, + "loss": 0.3391, + "step": 31174 + }, + { + "epoch": 0.6240772714761154, + "grad_norm": 1.1087368726730347, + "learning_rate": 3.2700458325696765e-06, + "loss": 0.2672, + "step": 31175 + }, + { + "epoch": 0.6240972899932438, + "grad_norm": 1.119080662727356, + "learning_rate": 3.2697416754277244e-06, + "loss": 0.3119, + "step": 31176 + }, + { + "epoch": 0.6241173085103721, + "grad_norm": 1.3127628564834595, + "learning_rate": 3.2694375255594056e-06, + "loss": 0.2895, + "step": 31177 + }, + { + "epoch": 0.6241373270275005, + "grad_norm": 1.0346521139144897, + "learning_rate": 3.2691333829659995e-06, + "loss": 0.3242, + "step": 31178 + }, + { + "epoch": 0.6241573455446288, + "grad_norm": 1.0863497257232666, + "learning_rate": 3.268829247648782e-06, + "loss": 0.3051, + "step": 31179 + }, + { + "epoch": 0.6241773640617572, + "grad_norm": 1.0035345554351807, + "learning_rate": 3.268525119609036e-06, + "loss": 0.3492, + "step": 31180 + }, + { + "epoch": 0.6241973825788855, + "grad_norm": 1.099901795387268, + "learning_rate": 3.2682209988480384e-06, + "loss": 0.3151, + "step": 31181 + }, + { + "epoch": 0.6242174010960138, + "grad_norm": 1.1560511589050293, + "learning_rate": 3.267916885367066e-06, + "loss": 0.3005, + "step": 31182 + }, + { + "epoch": 0.6242374196131422, + "grad_norm": 1.0633257627487183, + "learning_rate": 3.267612779167398e-06, + "loss": 0.3223, + "step": 31183 + }, + { + "epoch": 0.6242574381302705, + "grad_norm": 1.0654218196868896, + "learning_rate": 3.267308680250312e-06, + "loss": 0.3153, + "step": 31184 + }, + { + "epoch": 0.6242774566473989, + "grad_norm": 1.033069133758545, + "learning_rate": 3.267004588617089e-06, + "loss": 0.2733, + "step": 31185 + }, + { + "epoch": 0.6242974751645272, + "grad_norm": 1.04555344581604, + "learning_rate": 3.266700504269006e-06, + "loss": 0.3561, + "step": 31186 + }, + { + "epoch": 0.6243174936816556, + "grad_norm": 1.2215007543563843, + "learning_rate": 3.266396427207341e-06, + "loss": 0.2836, + "step": 31187 + }, + { + "epoch": 0.6243375121987839, + "grad_norm": 1.9798426628112793, + "learning_rate": 3.26609235743337e-06, + "loss": 0.763, + "step": 31188 + }, + { + "epoch": 0.6243575307159123, + "grad_norm": 1.8570181131362915, + "learning_rate": 3.2657882949483764e-06, + "loss": 0.8236, + "step": 31189 + }, + { + "epoch": 0.6243775492330406, + "grad_norm": 1.1917215585708618, + "learning_rate": 3.2654842397536344e-06, + "loss": 0.3428, + "step": 31190 + }, + { + "epoch": 0.6243975677501689, + "grad_norm": 1.0306365489959717, + "learning_rate": 3.2651801918504244e-06, + "loss": 0.3007, + "step": 31191 + }, + { + "epoch": 0.6244175862672973, + "grad_norm": 1.988208293914795, + "learning_rate": 3.2648761512400227e-06, + "loss": 0.7734, + "step": 31192 + }, + { + "epoch": 0.6244376047844256, + "grad_norm": 1.1808995008468628, + "learning_rate": 3.2645721179237072e-06, + "loss": 0.3197, + "step": 31193 + }, + { + "epoch": 0.624457623301554, + "grad_norm": 1.1682549715042114, + "learning_rate": 3.2642680919027586e-06, + "loss": 0.3232, + "step": 31194 + }, + { + "epoch": 0.6244776418186823, + "grad_norm": 1.0687522888183594, + "learning_rate": 3.2639640731784538e-06, + "loss": 0.3078, + "step": 31195 + }, + { + "epoch": 0.6244976603358107, + "grad_norm": 1.0934851169586182, + "learning_rate": 3.2636600617520687e-06, + "loss": 0.3343, + "step": 31196 + }, + { + "epoch": 0.624517678852939, + "grad_norm": 1.0376871824264526, + "learning_rate": 3.2633560576248826e-06, + "loss": 0.2788, + "step": 31197 + }, + { + "epoch": 0.6245376973700673, + "grad_norm": 1.0897201299667358, + "learning_rate": 3.2630520607981764e-06, + "loss": 0.312, + "step": 31198 + }, + { + "epoch": 0.6245577158871957, + "grad_norm": 1.0780812501907349, + "learning_rate": 3.2627480712732246e-06, + "loss": 0.3249, + "step": 31199 + }, + { + "epoch": 0.624577734404324, + "grad_norm": 1.3413808345794678, + "learning_rate": 3.2624440890513065e-06, + "loss": 0.3521, + "step": 31200 + }, + { + "epoch": 0.6245977529214524, + "grad_norm": 1.9392768144607544, + "learning_rate": 3.262140114133697e-06, + "loss": 0.7988, + "step": 31201 + }, + { + "epoch": 0.6246177714385807, + "grad_norm": 1.1954036951065063, + "learning_rate": 3.261836146521679e-06, + "loss": 0.3459, + "step": 31202 + }, + { + "epoch": 0.6246377899557091, + "grad_norm": 1.0702311992645264, + "learning_rate": 3.261532186216527e-06, + "loss": 0.2875, + "step": 31203 + }, + { + "epoch": 0.6246578084728374, + "grad_norm": 1.108628273010254, + "learning_rate": 3.26122823321952e-06, + "loss": 0.3439, + "step": 31204 + }, + { + "epoch": 0.6246778269899658, + "grad_norm": 1.1654025316238403, + "learning_rate": 3.260924287531936e-06, + "loss": 0.347, + "step": 31205 + }, + { + "epoch": 0.6246978455070941, + "grad_norm": 1.1179091930389404, + "learning_rate": 3.260620349155049e-06, + "loss": 0.3394, + "step": 31206 + }, + { + "epoch": 0.6247178640242224, + "grad_norm": 1.2213172912597656, + "learning_rate": 3.2603164180901425e-06, + "loss": 0.3124, + "step": 31207 + }, + { + "epoch": 0.6247378825413508, + "grad_norm": 1.0814155340194702, + "learning_rate": 3.2600124943384912e-06, + "loss": 0.2961, + "step": 31208 + }, + { + "epoch": 0.6247579010584791, + "grad_norm": 1.1833282709121704, + "learning_rate": 3.2597085779013722e-06, + "loss": 0.3591, + "step": 31209 + }, + { + "epoch": 0.6247779195756075, + "grad_norm": 1.0326889753341675, + "learning_rate": 3.2594046687800628e-06, + "loss": 0.3092, + "step": 31210 + }, + { + "epoch": 0.6247979380927358, + "grad_norm": 0.9937896132469177, + "learning_rate": 3.2591007669758432e-06, + "loss": 0.2994, + "step": 31211 + }, + { + "epoch": 0.6248179566098642, + "grad_norm": 1.1990469694137573, + "learning_rate": 3.258796872489989e-06, + "loss": 0.3112, + "step": 31212 + }, + { + "epoch": 0.6248379751269925, + "grad_norm": 1.832316279411316, + "learning_rate": 3.258492985323779e-06, + "loss": 0.7426, + "step": 31213 + }, + { + "epoch": 0.6248579936441208, + "grad_norm": 1.0787019729614258, + "learning_rate": 3.2581891054784887e-06, + "loss": 0.2795, + "step": 31214 + }, + { + "epoch": 0.6248780121612492, + "grad_norm": 1.1357941627502441, + "learning_rate": 3.2578852329553955e-06, + "loss": 0.3725, + "step": 31215 + }, + { + "epoch": 0.6248980306783775, + "grad_norm": 1.196181058883667, + "learning_rate": 3.257581367755779e-06, + "loss": 0.3192, + "step": 31216 + }, + { + "epoch": 0.6249180491955059, + "grad_norm": 1.1421711444854736, + "learning_rate": 3.2572775098809157e-06, + "loss": 0.3187, + "step": 31217 + }, + { + "epoch": 0.6249380677126342, + "grad_norm": 1.0794352293014526, + "learning_rate": 3.2569736593320832e-06, + "loss": 0.3186, + "step": 31218 + }, + { + "epoch": 0.6249580862297626, + "grad_norm": 1.099827766418457, + "learning_rate": 3.256669816110556e-06, + "loss": 0.2819, + "step": 31219 + }, + { + "epoch": 0.6249781047468909, + "grad_norm": 1.045124888420105, + "learning_rate": 3.2563659802176156e-06, + "loss": 0.2812, + "step": 31220 + }, + { + "epoch": 0.6249981232640193, + "grad_norm": 1.1467987298965454, + "learning_rate": 3.2560621516545376e-06, + "loss": 0.3175, + "step": 31221 + }, + { + "epoch": 0.6250181417811476, + "grad_norm": 1.1901450157165527, + "learning_rate": 3.2557583304225984e-06, + "loss": 0.3527, + "step": 31222 + }, + { + "epoch": 0.6250381602982759, + "grad_norm": 1.4060832262039185, + "learning_rate": 3.255454516523076e-06, + "loss": 0.3573, + "step": 31223 + }, + { + "epoch": 0.6250581788154043, + "grad_norm": 1.032831072807312, + "learning_rate": 3.2551507099572455e-06, + "loss": 0.2982, + "step": 31224 + }, + { + "epoch": 0.6250781973325326, + "grad_norm": 1.0818827152252197, + "learning_rate": 3.2548469107263886e-06, + "loss": 0.269, + "step": 31225 + }, + { + "epoch": 0.625098215849661, + "grad_norm": 1.2155961990356445, + "learning_rate": 3.2545431188317794e-06, + "loss": 0.2922, + "step": 31226 + }, + { + "epoch": 0.6251182343667893, + "grad_norm": 2.041630268096924, + "learning_rate": 3.2542393342746943e-06, + "loss": 0.7905, + "step": 31227 + }, + { + "epoch": 0.6251382528839177, + "grad_norm": 1.0878373384475708, + "learning_rate": 3.253935557056411e-06, + "loss": 0.3002, + "step": 31228 + }, + { + "epoch": 0.625158271401046, + "grad_norm": 1.1538439989089966, + "learning_rate": 3.2536317871782075e-06, + "loss": 0.3405, + "step": 31229 + }, + { + "epoch": 0.6251782899181743, + "grad_norm": 1.0312460660934448, + "learning_rate": 3.2533280246413605e-06, + "loss": 0.2907, + "step": 31230 + }, + { + "epoch": 0.6251983084353027, + "grad_norm": 1.1311864852905273, + "learning_rate": 3.253024269447147e-06, + "loss": 0.3001, + "step": 31231 + }, + { + "epoch": 0.625218326952431, + "grad_norm": 1.0944818258285522, + "learning_rate": 3.2527205215968414e-06, + "loss": 0.3108, + "step": 31232 + }, + { + "epoch": 0.6252383454695594, + "grad_norm": 1.125114917755127, + "learning_rate": 3.252416781091725e-06, + "loss": 0.3156, + "step": 31233 + }, + { + "epoch": 0.6252583639866877, + "grad_norm": 1.2209889888763428, + "learning_rate": 3.252113047933072e-06, + "loss": 0.2714, + "step": 31234 + }, + { + "epoch": 0.6252783825038161, + "grad_norm": 0.982514500617981, + "learning_rate": 3.2518093221221587e-06, + "loss": 0.2863, + "step": 31235 + }, + { + "epoch": 0.6252984010209444, + "grad_norm": 1.0566293001174927, + "learning_rate": 3.2515056036602637e-06, + "loss": 0.3065, + "step": 31236 + }, + { + "epoch": 0.6253184195380728, + "grad_norm": 1.088951587677002, + "learning_rate": 3.251201892548661e-06, + "loss": 0.3148, + "step": 31237 + }, + { + "epoch": 0.6253384380552011, + "grad_norm": 1.0966237783432007, + "learning_rate": 3.2508981887886316e-06, + "loss": 0.3139, + "step": 31238 + }, + { + "epoch": 0.6253584565723294, + "grad_norm": 1.0795087814331055, + "learning_rate": 3.250594492381449e-06, + "loss": 0.3238, + "step": 31239 + }, + { + "epoch": 0.6253784750894578, + "grad_norm": 1.153729796409607, + "learning_rate": 3.25029080332839e-06, + "loss": 0.2888, + "step": 31240 + }, + { + "epoch": 0.6253984936065861, + "grad_norm": 1.0759333372116089, + "learning_rate": 3.249987121630732e-06, + "loss": 0.2986, + "step": 31241 + }, + { + "epoch": 0.6254185121237145, + "grad_norm": 1.2108603715896606, + "learning_rate": 3.249683447289751e-06, + "loss": 0.3421, + "step": 31242 + }, + { + "epoch": 0.6254385306408428, + "grad_norm": 1.0896238088607788, + "learning_rate": 3.249379780306726e-06, + "loss": 0.3276, + "step": 31243 + }, + { + "epoch": 0.6254585491579712, + "grad_norm": 1.00736665725708, + "learning_rate": 3.2490761206829315e-06, + "loss": 0.257, + "step": 31244 + }, + { + "epoch": 0.6254785676750995, + "grad_norm": 1.1535707712173462, + "learning_rate": 3.2487724684196436e-06, + "loss": 0.3053, + "step": 31245 + }, + { + "epoch": 0.6254985861922278, + "grad_norm": 1.1409616470336914, + "learning_rate": 3.2484688235181377e-06, + "loss": 0.3143, + "step": 31246 + }, + { + "epoch": 0.6255186047093562, + "grad_norm": 1.044289231300354, + "learning_rate": 3.2481651859796943e-06, + "loss": 0.2909, + "step": 31247 + }, + { + "epoch": 0.6255386232264845, + "grad_norm": 1.112601637840271, + "learning_rate": 3.2478615558055858e-06, + "loss": 0.312, + "step": 31248 + }, + { + "epoch": 0.6255586417436129, + "grad_norm": 1.216113805770874, + "learning_rate": 3.247557932997092e-06, + "loss": 0.3029, + "step": 31249 + }, + { + "epoch": 0.6255786602607412, + "grad_norm": 1.3246855735778809, + "learning_rate": 3.247254317555485e-06, + "loss": 0.3855, + "step": 31250 + }, + { + "epoch": 0.6255986787778696, + "grad_norm": 1.1421167850494385, + "learning_rate": 3.246950709482045e-06, + "loss": 0.3446, + "step": 31251 + }, + { + "epoch": 0.6256186972949979, + "grad_norm": 1.0836405754089355, + "learning_rate": 3.2466471087780482e-06, + "loss": 0.3039, + "step": 31252 + }, + { + "epoch": 0.6256387158121263, + "grad_norm": 1.4868062734603882, + "learning_rate": 3.2463435154447682e-06, + "loss": 0.2783, + "step": 31253 + }, + { + "epoch": 0.6256587343292546, + "grad_norm": 0.999442994594574, + "learning_rate": 3.2460399294834833e-06, + "loss": 0.3311, + "step": 31254 + }, + { + "epoch": 0.6256787528463829, + "grad_norm": 1.6971642971038818, + "learning_rate": 3.2457363508954676e-06, + "loss": 0.7478, + "step": 31255 + }, + { + "epoch": 0.6256987713635113, + "grad_norm": 1.061344861984253, + "learning_rate": 3.2454327796820007e-06, + "loss": 0.2895, + "step": 31256 + }, + { + "epoch": 0.6257187898806396, + "grad_norm": 1.0862493515014648, + "learning_rate": 3.2451292158443566e-06, + "loss": 0.3414, + "step": 31257 + }, + { + "epoch": 0.625738808397768, + "grad_norm": 1.075353980064392, + "learning_rate": 3.2448256593838116e-06, + "loss": 0.2963, + "step": 31258 + }, + { + "epoch": 0.6257588269148963, + "grad_norm": 1.1158961057662964, + "learning_rate": 3.24452211030164e-06, + "loss": 0.2978, + "step": 31259 + }, + { + "epoch": 0.6257788454320247, + "grad_norm": 2.009129524230957, + "learning_rate": 3.244218568599121e-06, + "loss": 0.6893, + "step": 31260 + }, + { + "epoch": 0.625798863949153, + "grad_norm": 1.0311425924301147, + "learning_rate": 3.2439150342775296e-06, + "loss": 0.2944, + "step": 31261 + }, + { + "epoch": 0.6258188824662813, + "grad_norm": 1.176213264465332, + "learning_rate": 3.243611507338141e-06, + "loss": 0.3384, + "step": 31262 + }, + { + "epoch": 0.6258389009834097, + "grad_norm": 1.166428804397583, + "learning_rate": 3.2433079877822305e-06, + "loss": 0.2645, + "step": 31263 + }, + { + "epoch": 0.625858919500538, + "grad_norm": 1.142069935798645, + "learning_rate": 3.2430044756110775e-06, + "loss": 0.2954, + "step": 31264 + }, + { + "epoch": 0.6258789380176664, + "grad_norm": 1.179725170135498, + "learning_rate": 3.2427009708259548e-06, + "loss": 0.2843, + "step": 31265 + }, + { + "epoch": 0.6258989565347947, + "grad_norm": 1.0272772312164307, + "learning_rate": 3.2423974734281384e-06, + "loss": 0.2674, + "step": 31266 + }, + { + "epoch": 0.6259189750519231, + "grad_norm": 1.069952130317688, + "learning_rate": 3.242093983418906e-06, + "loss": 0.2819, + "step": 31267 + }, + { + "epoch": 0.6259389935690514, + "grad_norm": 1.047678828239441, + "learning_rate": 3.24179050079953e-06, + "loss": 0.2858, + "step": 31268 + }, + { + "epoch": 0.6259590120861798, + "grad_norm": 1.16945219039917, + "learning_rate": 3.2414870255712903e-06, + "loss": 0.2939, + "step": 31269 + }, + { + "epoch": 0.6259790306033081, + "grad_norm": 1.2264114618301392, + "learning_rate": 3.2411835577354604e-06, + "loss": 0.2854, + "step": 31270 + }, + { + "epoch": 0.6259990491204364, + "grad_norm": 1.2327542304992676, + "learning_rate": 3.240880097293317e-06, + "loss": 0.2801, + "step": 31271 + }, + { + "epoch": 0.6260190676375648, + "grad_norm": 1.0345971584320068, + "learning_rate": 3.240576644246133e-06, + "loss": 0.3147, + "step": 31272 + }, + { + "epoch": 0.6260390861546931, + "grad_norm": 1.1724618673324585, + "learning_rate": 3.2402731985951873e-06, + "loss": 0.2865, + "step": 31273 + }, + { + "epoch": 0.6260591046718215, + "grad_norm": 1.219768762588501, + "learning_rate": 3.239969760341754e-06, + "loss": 0.3495, + "step": 31274 + }, + { + "epoch": 0.6260791231889498, + "grad_norm": 1.1068612337112427, + "learning_rate": 3.23966632948711e-06, + "loss": 0.2602, + "step": 31275 + }, + { + "epoch": 0.6260991417060782, + "grad_norm": 1.1161364316940308, + "learning_rate": 3.2393629060325305e-06, + "loss": 0.2941, + "step": 31276 + }, + { + "epoch": 0.6261191602232065, + "grad_norm": 1.1929067373275757, + "learning_rate": 3.239059489979287e-06, + "loss": 0.3254, + "step": 31277 + }, + { + "epoch": 0.6261391787403348, + "grad_norm": 1.0604736804962158, + "learning_rate": 3.2387560813286615e-06, + "loss": 0.2752, + "step": 31278 + }, + { + "epoch": 0.6261591972574632, + "grad_norm": 1.1729800701141357, + "learning_rate": 3.2384526800819253e-06, + "loss": 0.312, + "step": 31279 + }, + { + "epoch": 0.6261792157745915, + "grad_norm": 1.0596649646759033, + "learning_rate": 3.2381492862403564e-06, + "loss": 0.3054, + "step": 31280 + }, + { + "epoch": 0.6261992342917199, + "grad_norm": 1.2877917289733887, + "learning_rate": 3.237845899805226e-06, + "loss": 0.2926, + "step": 31281 + }, + { + "epoch": 0.6262192528088482, + "grad_norm": 0.9951925277709961, + "learning_rate": 3.2375425207778143e-06, + "loss": 0.2994, + "step": 31282 + }, + { + "epoch": 0.6262392713259766, + "grad_norm": 1.028288722038269, + "learning_rate": 3.237239149159395e-06, + "loss": 0.2796, + "step": 31283 + }, + { + "epoch": 0.6262592898431049, + "grad_norm": 1.142585277557373, + "learning_rate": 3.2369357849512426e-06, + "loss": 0.2702, + "step": 31284 + }, + { + "epoch": 0.6262793083602333, + "grad_norm": 1.0169910192489624, + "learning_rate": 3.2366324281546318e-06, + "loss": 0.2663, + "step": 31285 + }, + { + "epoch": 0.6262993268773616, + "grad_norm": 1.1601965427398682, + "learning_rate": 3.236329078770839e-06, + "loss": 0.2628, + "step": 31286 + }, + { + "epoch": 0.6263193453944899, + "grad_norm": 1.0480605363845825, + "learning_rate": 3.2360257368011388e-06, + "loss": 0.2962, + "step": 31287 + }, + { + "epoch": 0.6263393639116183, + "grad_norm": 1.2558971643447876, + "learning_rate": 3.235722402246808e-06, + "loss": 0.2676, + "step": 31288 + }, + { + "epoch": 0.6263593824287466, + "grad_norm": 1.180894374847412, + "learning_rate": 3.235419075109121e-06, + "loss": 0.3156, + "step": 31289 + }, + { + "epoch": 0.626379400945875, + "grad_norm": 1.0794968605041504, + "learning_rate": 3.23511575538935e-06, + "loss": 0.2521, + "step": 31290 + }, + { + "epoch": 0.6263994194630033, + "grad_norm": 1.0610766410827637, + "learning_rate": 3.234812443088774e-06, + "loss": 0.2914, + "step": 31291 + }, + { + "epoch": 0.6264194379801317, + "grad_norm": 1.201716423034668, + "learning_rate": 3.234509138208666e-06, + "loss": 0.2787, + "step": 31292 + }, + { + "epoch": 0.62643945649726, + "grad_norm": 1.0201987028121948, + "learning_rate": 3.234205840750303e-06, + "loss": 0.2631, + "step": 31293 + }, + { + "epoch": 0.6264594750143883, + "grad_norm": 1.9935859441757202, + "learning_rate": 3.233902550714957e-06, + "loss": 0.7973, + "step": 31294 + }, + { + "epoch": 0.6264794935315167, + "grad_norm": 1.0719267129898071, + "learning_rate": 3.2335992681039057e-06, + "loss": 0.2621, + "step": 31295 + }, + { + "epoch": 0.626499512048645, + "grad_norm": 1.8985728025436401, + "learning_rate": 3.2332959929184237e-06, + "loss": 0.7028, + "step": 31296 + }, + { + "epoch": 0.6265195305657734, + "grad_norm": 1.1279069185256958, + "learning_rate": 3.2329927251597847e-06, + "loss": 0.3211, + "step": 31297 + }, + { + "epoch": 0.6265395490829017, + "grad_norm": 1.021295189857483, + "learning_rate": 3.232689464829263e-06, + "loss": 0.3001, + "step": 31298 + }, + { + "epoch": 0.6265595676000301, + "grad_norm": 1.0769896507263184, + "learning_rate": 3.2323862119281345e-06, + "loss": 0.2931, + "step": 31299 + }, + { + "epoch": 0.6265795861171584, + "grad_norm": 1.086069941520691, + "learning_rate": 3.232082966457674e-06, + "loss": 0.3375, + "step": 31300 + }, + { + "epoch": 0.6265996046342868, + "grad_norm": 0.9822570085525513, + "learning_rate": 3.2317797284191575e-06, + "loss": 0.2694, + "step": 31301 + }, + { + "epoch": 0.6266196231514151, + "grad_norm": 1.2391105890274048, + "learning_rate": 3.231476497813858e-06, + "loss": 0.3146, + "step": 31302 + }, + { + "epoch": 0.6266396416685434, + "grad_norm": 1.8478455543518066, + "learning_rate": 3.231173274643049e-06, + "loss": 0.7779, + "step": 31303 + }, + { + "epoch": 0.6266596601856718, + "grad_norm": 1.254665732383728, + "learning_rate": 3.230870058908009e-06, + "loss": 0.2968, + "step": 31304 + }, + { + "epoch": 0.6266796787028001, + "grad_norm": 0.993219256401062, + "learning_rate": 3.2305668506100097e-06, + "loss": 0.2627, + "step": 31305 + }, + { + "epoch": 0.6266996972199285, + "grad_norm": 1.0955017805099487, + "learning_rate": 3.230263649750327e-06, + "loss": 0.3091, + "step": 31306 + }, + { + "epoch": 0.6267197157370568, + "grad_norm": 1.0612375736236572, + "learning_rate": 3.2299604563302354e-06, + "loss": 0.2945, + "step": 31307 + }, + { + "epoch": 0.6267397342541852, + "grad_norm": 1.0955123901367188, + "learning_rate": 3.2296572703510075e-06, + "loss": 0.2939, + "step": 31308 + }, + { + "epoch": 0.6267597527713135, + "grad_norm": 1.0118088722229004, + "learning_rate": 3.229354091813921e-06, + "loss": 0.2866, + "step": 31309 + }, + { + "epoch": 0.6267797712884418, + "grad_norm": 1.1950676441192627, + "learning_rate": 3.2290509207202486e-06, + "loss": 0.2993, + "step": 31310 + }, + { + "epoch": 0.6267997898055702, + "grad_norm": 1.1630510091781616, + "learning_rate": 3.2287477570712643e-06, + "loss": 0.2632, + "step": 31311 + }, + { + "epoch": 0.6268198083226985, + "grad_norm": 1.0276788473129272, + "learning_rate": 3.2284446008682434e-06, + "loss": 0.2948, + "step": 31312 + }, + { + "epoch": 0.6268398268398269, + "grad_norm": 1.1245930194854736, + "learning_rate": 3.22814145211246e-06, + "loss": 0.354, + "step": 31313 + }, + { + "epoch": 0.6268598453569552, + "grad_norm": 1.2310130596160889, + "learning_rate": 3.2278383108051896e-06, + "loss": 0.2791, + "step": 31314 + }, + { + "epoch": 0.6268798638740836, + "grad_norm": 1.2367454767227173, + "learning_rate": 3.227535176947706e-06, + "loss": 0.3103, + "step": 31315 + }, + { + "epoch": 0.6268998823912119, + "grad_norm": 1.0621459484100342, + "learning_rate": 3.22723205054128e-06, + "loss": 0.2995, + "step": 31316 + }, + { + "epoch": 0.6269199009083403, + "grad_norm": 0.990882396697998, + "learning_rate": 3.226928931587192e-06, + "loss": 0.2502, + "step": 31317 + }, + { + "epoch": 0.6269399194254686, + "grad_norm": 1.0323654413223267, + "learning_rate": 3.226625820086712e-06, + "loss": 0.3155, + "step": 31318 + }, + { + "epoch": 0.6269599379425969, + "grad_norm": 1.1104426383972168, + "learning_rate": 3.226322716041116e-06, + "loss": 0.3012, + "step": 31319 + }, + { + "epoch": 0.6269799564597253, + "grad_norm": 1.245767593383789, + "learning_rate": 3.226019619451678e-06, + "loss": 0.2978, + "step": 31320 + }, + { + "epoch": 0.6269999749768536, + "grad_norm": 1.0716891288757324, + "learning_rate": 3.2257165303196697e-06, + "loss": 0.3103, + "step": 31321 + }, + { + "epoch": 0.627019993493982, + "grad_norm": 1.1754306554794312, + "learning_rate": 3.2254134486463697e-06, + "loss": 0.2885, + "step": 31322 + }, + { + "epoch": 0.6270400120111103, + "grad_norm": 1.1494728326797485, + "learning_rate": 3.2251103744330494e-06, + "loss": 0.3159, + "step": 31323 + }, + { + "epoch": 0.6270600305282387, + "grad_norm": 1.0120797157287598, + "learning_rate": 3.2248073076809814e-06, + "loss": 0.2935, + "step": 31324 + }, + { + "epoch": 0.627080049045367, + "grad_norm": 1.1332470178604126, + "learning_rate": 3.224504248391442e-06, + "loss": 0.3339, + "step": 31325 + }, + { + "epoch": 0.6271000675624953, + "grad_norm": 1.1816747188568115, + "learning_rate": 3.2242011965657046e-06, + "loss": 0.3247, + "step": 31326 + }, + { + "epoch": 0.6271200860796237, + "grad_norm": 1.1207960844039917, + "learning_rate": 3.223898152205044e-06, + "loss": 0.2803, + "step": 31327 + }, + { + "epoch": 0.627140104596752, + "grad_norm": 1.13078773021698, + "learning_rate": 3.2235951153107337e-06, + "loss": 0.3076, + "step": 31328 + }, + { + "epoch": 0.6271601231138804, + "grad_norm": 0.9939010143280029, + "learning_rate": 3.223292085884047e-06, + "loss": 0.2549, + "step": 31329 + }, + { + "epoch": 0.6271801416310087, + "grad_norm": 1.1662535667419434, + "learning_rate": 3.222989063926256e-06, + "loss": 0.3078, + "step": 31330 + }, + { + "epoch": 0.6272001601481371, + "grad_norm": 1.0925812721252441, + "learning_rate": 3.222686049438638e-06, + "loss": 0.3032, + "step": 31331 + }, + { + "epoch": 0.6272201786652654, + "grad_norm": 1.0013614892959595, + "learning_rate": 3.222383042422466e-06, + "loss": 0.2887, + "step": 31332 + }, + { + "epoch": 0.6272401971823938, + "grad_norm": 1.963686466217041, + "learning_rate": 3.2220800428790134e-06, + "loss": 0.8267, + "step": 31333 + }, + { + "epoch": 0.6272602156995221, + "grad_norm": 1.9409644603729248, + "learning_rate": 3.2217770508095515e-06, + "loss": 0.7877, + "step": 31334 + }, + { + "epoch": 0.6272802342166504, + "grad_norm": 1.3398394584655762, + "learning_rate": 3.2214740662153583e-06, + "loss": 0.3073, + "step": 31335 + }, + { + "epoch": 0.6273002527337788, + "grad_norm": 1.3378491401672363, + "learning_rate": 3.2211710890977055e-06, + "loss": 0.33, + "step": 31336 + }, + { + "epoch": 0.6273202712509071, + "grad_norm": 1.0015954971313477, + "learning_rate": 3.220868119457865e-06, + "loss": 0.2808, + "step": 31337 + }, + { + "epoch": 0.6273402897680355, + "grad_norm": 1.8520382642745972, + "learning_rate": 3.220565157297113e-06, + "loss": 0.822, + "step": 31338 + }, + { + "epoch": 0.6273603082851638, + "grad_norm": 0.9925561547279358, + "learning_rate": 3.2202622026167207e-06, + "loss": 0.337, + "step": 31339 + }, + { + "epoch": 0.6273803268022922, + "grad_norm": 1.1187151670455933, + "learning_rate": 3.2199592554179652e-06, + "loss": 0.2461, + "step": 31340 + }, + { + "epoch": 0.6274003453194205, + "grad_norm": 1.213236689567566, + "learning_rate": 3.2196563157021178e-06, + "loss": 0.3043, + "step": 31341 + }, + { + "epoch": 0.6274203638365488, + "grad_norm": 1.1344494819641113, + "learning_rate": 3.2193533834704523e-06, + "loss": 0.3197, + "step": 31342 + }, + { + "epoch": 0.6274403823536772, + "grad_norm": 1.1266189813613892, + "learning_rate": 3.2190504587242393e-06, + "loss": 0.3265, + "step": 31343 + }, + { + "epoch": 0.6274604008708055, + "grad_norm": 1.133159875869751, + "learning_rate": 3.218747541464756e-06, + "loss": 0.2877, + "step": 31344 + }, + { + "epoch": 0.6274804193879339, + "grad_norm": 1.1989308595657349, + "learning_rate": 3.2184446316932768e-06, + "loss": 0.2823, + "step": 31345 + }, + { + "epoch": 0.6275004379050622, + "grad_norm": 1.2213425636291504, + "learning_rate": 3.2181417294110717e-06, + "loss": 0.279, + "step": 31346 + }, + { + "epoch": 0.6275204564221906, + "grad_norm": 1.0968711376190186, + "learning_rate": 3.217838834619414e-06, + "loss": 0.2996, + "step": 31347 + }, + { + "epoch": 0.6275404749393189, + "grad_norm": 1.2391997575759888, + "learning_rate": 3.2175359473195798e-06, + "loss": 0.3469, + "step": 31348 + }, + { + "epoch": 0.6275604934564473, + "grad_norm": 2.043177366256714, + "learning_rate": 3.2172330675128417e-06, + "loss": 0.8109, + "step": 31349 + }, + { + "epoch": 0.6275805119735756, + "grad_norm": 2.0086889266967773, + "learning_rate": 3.2169301952004707e-06, + "loss": 0.7907, + "step": 31350 + }, + { + "epoch": 0.6276005304907039, + "grad_norm": 1.2125015258789062, + "learning_rate": 3.2166273303837424e-06, + "loss": 0.3227, + "step": 31351 + }, + { + "epoch": 0.6276205490078323, + "grad_norm": 1.0584375858306885, + "learning_rate": 3.2163244730639266e-06, + "loss": 0.3947, + "step": 31352 + }, + { + "epoch": 0.6276405675249606, + "grad_norm": 1.115721344947815, + "learning_rate": 3.2160216232423014e-06, + "loss": 0.294, + "step": 31353 + }, + { + "epoch": 0.627660586042089, + "grad_norm": 1.038563847541809, + "learning_rate": 3.2157187809201373e-06, + "loss": 0.2457, + "step": 31354 + }, + { + "epoch": 0.6276806045592173, + "grad_norm": 1.090457558631897, + "learning_rate": 3.215415946098708e-06, + "loss": 0.324, + "step": 31355 + }, + { + "epoch": 0.6277006230763457, + "grad_norm": 1.119227409362793, + "learning_rate": 3.215113118779283e-06, + "loss": 0.29, + "step": 31356 + }, + { + "epoch": 0.627720641593474, + "grad_norm": 1.1111195087432861, + "learning_rate": 3.2148102989631396e-06, + "loss": 0.3545, + "step": 31357 + }, + { + "epoch": 0.6277406601106023, + "grad_norm": 1.8435872793197632, + "learning_rate": 3.2145074866515506e-06, + "loss": 0.738, + "step": 31358 + }, + { + "epoch": 0.6277606786277307, + "grad_norm": 2.032749652862549, + "learning_rate": 3.2142046818457882e-06, + "loss": 0.8012, + "step": 31359 + }, + { + "epoch": 0.627780697144859, + "grad_norm": 1.088624358177185, + "learning_rate": 3.2139018845471247e-06, + "loss": 0.299, + "step": 31360 + }, + { + "epoch": 0.6278007156619874, + "grad_norm": 1.2542951107025146, + "learning_rate": 3.2135990947568315e-06, + "loss": 0.3132, + "step": 31361 + }, + { + "epoch": 0.6278207341791157, + "grad_norm": 1.1030454635620117, + "learning_rate": 3.213296312476185e-06, + "loss": 0.314, + "step": 31362 + }, + { + "epoch": 0.6278407526962441, + "grad_norm": 1.1781922578811646, + "learning_rate": 3.2129935377064553e-06, + "loss": 0.2923, + "step": 31363 + }, + { + "epoch": 0.6278607712133724, + "grad_norm": 1.1752837896347046, + "learning_rate": 3.2126907704489172e-06, + "loss": 0.3485, + "step": 31364 + }, + { + "epoch": 0.6278807897305008, + "grad_norm": 1.1611888408660889, + "learning_rate": 3.212388010704841e-06, + "loss": 0.3069, + "step": 31365 + }, + { + "epoch": 0.6279008082476291, + "grad_norm": 1.1588499546051025, + "learning_rate": 3.2120852584755025e-06, + "loss": 0.3251, + "step": 31366 + }, + { + "epoch": 0.6279208267647574, + "grad_norm": 1.2129483222961426, + "learning_rate": 3.211782513762173e-06, + "loss": 0.342, + "step": 31367 + }, + { + "epoch": 0.6279408452818858, + "grad_norm": 1.0954700708389282, + "learning_rate": 3.211479776566124e-06, + "loss": 0.3256, + "step": 31368 + }, + { + "epoch": 0.6279608637990141, + "grad_norm": 1.0754579305648804, + "learning_rate": 3.2111770468886293e-06, + "loss": 0.2796, + "step": 31369 + }, + { + "epoch": 0.6279808823161425, + "grad_norm": 1.0204278230667114, + "learning_rate": 3.2108743247309603e-06, + "loss": 0.3353, + "step": 31370 + }, + { + "epoch": 0.6280009008332708, + "grad_norm": 1.0675605535507202, + "learning_rate": 3.2105716100943916e-06, + "loss": 0.3287, + "step": 31371 + }, + { + "epoch": 0.6280209193503992, + "grad_norm": 1.0593026876449585, + "learning_rate": 3.210268902980196e-06, + "loss": 0.2967, + "step": 31372 + }, + { + "epoch": 0.6280409378675275, + "grad_norm": 1.0126023292541504, + "learning_rate": 3.209966203389644e-06, + "loss": 0.2474, + "step": 31373 + }, + { + "epoch": 0.6280609563846558, + "grad_norm": 1.0692451000213623, + "learning_rate": 3.2096635113240073e-06, + "loss": 0.3342, + "step": 31374 + }, + { + "epoch": 0.6280809749017842, + "grad_norm": 1.1582874059677124, + "learning_rate": 3.2093608267845614e-06, + "loss": 0.3158, + "step": 31375 + }, + { + "epoch": 0.6281009934189125, + "grad_norm": 1.0841469764709473, + "learning_rate": 3.209058149772577e-06, + "loss": 0.3037, + "step": 31376 + }, + { + "epoch": 0.6281210119360409, + "grad_norm": 1.2075560092926025, + "learning_rate": 3.2087554802893274e-06, + "loss": 0.2718, + "step": 31377 + }, + { + "epoch": 0.6281410304531692, + "grad_norm": 1.1337320804595947, + "learning_rate": 3.208452818336082e-06, + "loss": 0.2645, + "step": 31378 + }, + { + "epoch": 0.6281610489702976, + "grad_norm": 1.0425678491592407, + "learning_rate": 3.2081501639141177e-06, + "loss": 0.2684, + "step": 31379 + }, + { + "epoch": 0.6281810674874259, + "grad_norm": 1.2217553853988647, + "learning_rate": 3.2078475170247036e-06, + "loss": 0.3037, + "step": 31380 + }, + { + "epoch": 0.6282010860045543, + "grad_norm": 1.0386607646942139, + "learning_rate": 3.2075448776691144e-06, + "loss": 0.2731, + "step": 31381 + }, + { + "epoch": 0.6282211045216826, + "grad_norm": 1.0806245803833008, + "learning_rate": 3.2072422458486184e-06, + "loss": 0.3505, + "step": 31382 + }, + { + "epoch": 0.6282411230388109, + "grad_norm": 1.0675604343414307, + "learning_rate": 3.20693962156449e-06, + "loss": 0.2467, + "step": 31383 + }, + { + "epoch": 0.6282611415559393, + "grad_norm": 1.811447024345398, + "learning_rate": 3.2066370048180033e-06, + "loss": 0.7482, + "step": 31384 + }, + { + "epoch": 0.6282811600730676, + "grad_norm": 1.9413515329360962, + "learning_rate": 3.2063343956104287e-06, + "loss": 0.786, + "step": 31385 + }, + { + "epoch": 0.628301178590196, + "grad_norm": 1.0965455770492554, + "learning_rate": 3.2060317939430385e-06, + "loss": 0.3143, + "step": 31386 + }, + { + "epoch": 0.6283211971073243, + "grad_norm": 1.1962029933929443, + "learning_rate": 3.2057291998171024e-06, + "loss": 0.3373, + "step": 31387 + }, + { + "epoch": 0.6283412156244527, + "grad_norm": 1.032077431678772, + "learning_rate": 3.2054266132338964e-06, + "loss": 0.3136, + "step": 31388 + }, + { + "epoch": 0.628361234141581, + "grad_norm": 1.1626356840133667, + "learning_rate": 3.20512403419469e-06, + "loss": 0.2975, + "step": 31389 + }, + { + "epoch": 0.6283812526587093, + "grad_norm": 1.1621029376983643, + "learning_rate": 3.2048214627007566e-06, + "loss": 0.3665, + "step": 31390 + }, + { + "epoch": 0.6284012711758377, + "grad_norm": 1.1309869289398193, + "learning_rate": 3.204518898753368e-06, + "loss": 0.3371, + "step": 31391 + }, + { + "epoch": 0.628421289692966, + "grad_norm": 1.146911859512329, + "learning_rate": 3.204216342353793e-06, + "loss": 0.3283, + "step": 31392 + }, + { + "epoch": 0.6284413082100944, + "grad_norm": 1.026920199394226, + "learning_rate": 3.203913793503309e-06, + "loss": 0.2736, + "step": 31393 + }, + { + "epoch": 0.6284613267272227, + "grad_norm": 1.0340709686279297, + "learning_rate": 3.2036112522031836e-06, + "loss": 0.3311, + "step": 31394 + }, + { + "epoch": 0.6284813452443511, + "grad_norm": 1.0545917749404907, + "learning_rate": 3.20330871845469e-06, + "loss": 0.2791, + "step": 31395 + }, + { + "epoch": 0.6285013637614794, + "grad_norm": 1.2871143817901611, + "learning_rate": 3.203006192259098e-06, + "loss": 0.3188, + "step": 31396 + }, + { + "epoch": 0.6285213822786078, + "grad_norm": 1.080460548400879, + "learning_rate": 3.2027036736176843e-06, + "loss": 0.3201, + "step": 31397 + }, + { + "epoch": 0.6285414007957361, + "grad_norm": 1.0993921756744385, + "learning_rate": 3.2024011625317163e-06, + "loss": 0.3417, + "step": 31398 + }, + { + "epoch": 0.6285614193128644, + "grad_norm": 1.2294728755950928, + "learning_rate": 3.202098659002468e-06, + "loss": 0.3407, + "step": 31399 + }, + { + "epoch": 0.6285814378299928, + "grad_norm": 1.155684471130371, + "learning_rate": 3.2017961630312082e-06, + "loss": 0.2803, + "step": 31400 + }, + { + "epoch": 0.6286014563471211, + "grad_norm": 1.32632315158844, + "learning_rate": 3.201493674619211e-06, + "loss": 0.2886, + "step": 31401 + }, + { + "epoch": 0.6286214748642495, + "grad_norm": 1.1749184131622314, + "learning_rate": 3.201191193767747e-06, + "loss": 0.35, + "step": 31402 + }, + { + "epoch": 0.6286414933813778, + "grad_norm": 1.0708357095718384, + "learning_rate": 3.200888720478089e-06, + "loss": 0.2949, + "step": 31403 + }, + { + "epoch": 0.6286615118985062, + "grad_norm": 1.2049074172973633, + "learning_rate": 3.200586254751508e-06, + "loss": 0.3313, + "step": 31404 + }, + { + "epoch": 0.6286815304156345, + "grad_norm": 1.476508378982544, + "learning_rate": 3.200283796589273e-06, + "loss": 0.3004, + "step": 31405 + }, + { + "epoch": 0.6287015489327628, + "grad_norm": 1.9037798643112183, + "learning_rate": 3.1999813459926598e-06, + "loss": 0.7878, + "step": 31406 + }, + { + "epoch": 0.6287215674498912, + "grad_norm": 1.1901150941848755, + "learning_rate": 3.199678902962937e-06, + "loss": 0.3984, + "step": 31407 + }, + { + "epoch": 0.6287415859670195, + "grad_norm": 1.1936547756195068, + "learning_rate": 3.1993764675013767e-06, + "loss": 0.298, + "step": 31408 + }, + { + "epoch": 0.6287616044841479, + "grad_norm": 2.222564458847046, + "learning_rate": 3.1990740396092483e-06, + "loss": 0.8514, + "step": 31409 + }, + { + "epoch": 0.6287816230012762, + "grad_norm": 1.1925228834152222, + "learning_rate": 3.1987716192878276e-06, + "loss": 0.3585, + "step": 31410 + }, + { + "epoch": 0.6288016415184046, + "grad_norm": 0.9904078841209412, + "learning_rate": 3.1984692065383833e-06, + "loss": 0.2942, + "step": 31411 + }, + { + "epoch": 0.6288216600355329, + "grad_norm": 1.1921347379684448, + "learning_rate": 3.198166801362187e-06, + "loss": 0.3057, + "step": 31412 + }, + { + "epoch": 0.6288416785526613, + "grad_norm": 1.0979390144348145, + "learning_rate": 3.1978644037605077e-06, + "loss": 0.2999, + "step": 31413 + }, + { + "epoch": 0.6288616970697896, + "grad_norm": 1.330809235572815, + "learning_rate": 3.1975620137346197e-06, + "loss": 0.3452, + "step": 31414 + }, + { + "epoch": 0.6288817155869179, + "grad_norm": 1.0299646854400635, + "learning_rate": 3.197259631285793e-06, + "loss": 0.2851, + "step": 31415 + }, + { + "epoch": 0.6289017341040463, + "grad_norm": 1.1059350967407227, + "learning_rate": 3.1969572564152994e-06, + "loss": 0.242, + "step": 31416 + }, + { + "epoch": 0.6289217526211746, + "grad_norm": 1.010899543762207, + "learning_rate": 3.1966548891244096e-06, + "loss": 0.2411, + "step": 31417 + }, + { + "epoch": 0.628941771138303, + "grad_norm": 1.2954634428024292, + "learning_rate": 3.1963525294143927e-06, + "loss": 0.3347, + "step": 31418 + }, + { + "epoch": 0.6289617896554313, + "grad_norm": 1.0926731824874878, + "learning_rate": 3.196050177286524e-06, + "loss": 0.2641, + "step": 31419 + }, + { + "epoch": 0.6289818081725597, + "grad_norm": 1.9574838876724243, + "learning_rate": 3.195747832742071e-06, + "loss": 0.7791, + "step": 31420 + }, + { + "epoch": 0.629001826689688, + "grad_norm": 1.0477144718170166, + "learning_rate": 3.1954454957823056e-06, + "loss": 0.2518, + "step": 31421 + }, + { + "epoch": 0.6290218452068163, + "grad_norm": 1.8733339309692383, + "learning_rate": 3.1951431664084996e-06, + "loss": 0.7219, + "step": 31422 + }, + { + "epoch": 0.6290418637239447, + "grad_norm": 1.1249439716339111, + "learning_rate": 3.194840844621922e-06, + "loss": 0.3144, + "step": 31423 + }, + { + "epoch": 0.629061882241073, + "grad_norm": 1.170188546180725, + "learning_rate": 3.1945385304238463e-06, + "loss": 0.3023, + "step": 31424 + }, + { + "epoch": 0.6290819007582014, + "grad_norm": 1.1704875230789185, + "learning_rate": 3.194236223815543e-06, + "loss": 0.2746, + "step": 31425 + }, + { + "epoch": 0.6291019192753297, + "grad_norm": 1.1793124675750732, + "learning_rate": 3.1939339247982805e-06, + "loss": 0.3051, + "step": 31426 + }, + { + "epoch": 0.6291219377924581, + "grad_norm": 1.1552544832229614, + "learning_rate": 3.1936316333733307e-06, + "loss": 0.3199, + "step": 31427 + }, + { + "epoch": 0.6291419563095864, + "grad_norm": 1.8858301639556885, + "learning_rate": 3.1933293495419648e-06, + "loss": 0.7781, + "step": 31428 + }, + { + "epoch": 0.6291619748267148, + "grad_norm": 1.1048213243484497, + "learning_rate": 3.1930270733054553e-06, + "loss": 0.3324, + "step": 31429 + }, + { + "epoch": 0.6291819933438431, + "grad_norm": 1.2068326473236084, + "learning_rate": 3.1927248046650705e-06, + "loss": 0.334, + "step": 31430 + }, + { + "epoch": 0.6292020118609714, + "grad_norm": 2.034090518951416, + "learning_rate": 3.192422543622082e-06, + "loss": 0.8148, + "step": 31431 + }, + { + "epoch": 0.6292220303780998, + "grad_norm": 1.1716511249542236, + "learning_rate": 3.1921202901777584e-06, + "loss": 0.2711, + "step": 31432 + }, + { + "epoch": 0.6292420488952281, + "grad_norm": 0.9739219546318054, + "learning_rate": 3.191818044333373e-06, + "loss": 0.2907, + "step": 31433 + }, + { + "epoch": 0.6292620674123565, + "grad_norm": 1.0210635662078857, + "learning_rate": 3.1915158060901953e-06, + "loss": 0.318, + "step": 31434 + }, + { + "epoch": 0.6292820859294848, + "grad_norm": 1.0692671537399292, + "learning_rate": 3.1912135754494965e-06, + "loss": 0.325, + "step": 31435 + }, + { + "epoch": 0.6293021044466132, + "grad_norm": 1.2687079906463623, + "learning_rate": 3.1909113524125448e-06, + "loss": 0.3031, + "step": 31436 + }, + { + "epoch": 0.6293221229637415, + "grad_norm": 1.8577935695648193, + "learning_rate": 3.190609136980615e-06, + "loss": 0.7215, + "step": 31437 + }, + { + "epoch": 0.6293421414808698, + "grad_norm": 1.0473133325576782, + "learning_rate": 3.190306929154974e-06, + "loss": 0.3015, + "step": 31438 + }, + { + "epoch": 0.6293621599979982, + "grad_norm": 1.0679876804351807, + "learning_rate": 3.1900047289368923e-06, + "loss": 0.2571, + "step": 31439 + }, + { + "epoch": 0.6293821785151265, + "grad_norm": 1.0907477140426636, + "learning_rate": 3.1897025363276413e-06, + "loss": 0.3144, + "step": 31440 + }, + { + "epoch": 0.6294021970322549, + "grad_norm": 1.0382533073425293, + "learning_rate": 3.189400351328492e-06, + "loss": 0.2841, + "step": 31441 + }, + { + "epoch": 0.6294222155493832, + "grad_norm": 1.1224055290222168, + "learning_rate": 3.1890981739407145e-06, + "loss": 0.3421, + "step": 31442 + }, + { + "epoch": 0.6294422340665116, + "grad_norm": 1.1420204639434814, + "learning_rate": 3.1887960041655786e-06, + "loss": 0.2589, + "step": 31443 + }, + { + "epoch": 0.6294622525836399, + "grad_norm": 1.0751755237579346, + "learning_rate": 3.188493842004354e-06, + "loss": 0.2983, + "step": 31444 + }, + { + "epoch": 0.6294822711007683, + "grad_norm": 1.0233049392700195, + "learning_rate": 3.1881916874583103e-06, + "loss": 0.292, + "step": 31445 + }, + { + "epoch": 0.6295022896178966, + "grad_norm": 1.0988162755966187, + "learning_rate": 3.1878895405287203e-06, + "loss": 0.3061, + "step": 31446 + }, + { + "epoch": 0.6295223081350249, + "grad_norm": 1.1036603450775146, + "learning_rate": 3.1875874012168516e-06, + "loss": 0.2731, + "step": 31447 + }, + { + "epoch": 0.6295423266521533, + "grad_norm": 1.0272341966629028, + "learning_rate": 3.187285269523977e-06, + "loss": 0.2796, + "step": 31448 + }, + { + "epoch": 0.6295623451692816, + "grad_norm": 2.0812313556671143, + "learning_rate": 3.1869831454513626e-06, + "loss": 0.7895, + "step": 31449 + }, + { + "epoch": 0.62958236368641, + "grad_norm": 1.1327117681503296, + "learning_rate": 3.186681029000283e-06, + "loss": 0.2863, + "step": 31450 + }, + { + "epoch": 0.6296023822035383, + "grad_norm": 1.0329476594924927, + "learning_rate": 3.1863789201720064e-06, + "loss": 0.2363, + "step": 31451 + }, + { + "epoch": 0.6296224007206667, + "grad_norm": 1.1527087688446045, + "learning_rate": 3.186076818967801e-06, + "loss": 0.2899, + "step": 31452 + }, + { + "epoch": 0.629642419237795, + "grad_norm": 1.1488410234451294, + "learning_rate": 3.1857747253889395e-06, + "loss": 0.3209, + "step": 31453 + }, + { + "epoch": 0.6296624377549233, + "grad_norm": 2.1315274238586426, + "learning_rate": 3.1854726394366885e-06, + "loss": 0.7926, + "step": 31454 + }, + { + "epoch": 0.6296824562720517, + "grad_norm": 1.2267698049545288, + "learning_rate": 3.1851705611123223e-06, + "loss": 0.2908, + "step": 31455 + }, + { + "epoch": 0.62970247478918, + "grad_norm": 1.030150055885315, + "learning_rate": 3.184868490417109e-06, + "loss": 0.3434, + "step": 31456 + }, + { + "epoch": 0.6297224933063084, + "grad_norm": 1.0611212253570557, + "learning_rate": 3.184566427352317e-06, + "loss": 0.3288, + "step": 31457 + }, + { + "epoch": 0.6297425118234367, + "grad_norm": 1.770751714706421, + "learning_rate": 3.1842643719192156e-06, + "loss": 0.8202, + "step": 31458 + }, + { + "epoch": 0.6297625303405651, + "grad_norm": 1.0489189624786377, + "learning_rate": 3.1839623241190776e-06, + "loss": 0.2884, + "step": 31459 + }, + { + "epoch": 0.6297825488576934, + "grad_norm": 1.0843998193740845, + "learning_rate": 3.1836602839531706e-06, + "loss": 0.3093, + "step": 31460 + }, + { + "epoch": 0.6298025673748218, + "grad_norm": 1.00859534740448, + "learning_rate": 3.1833582514227657e-06, + "loss": 0.2535, + "step": 31461 + }, + { + "epoch": 0.6298225858919501, + "grad_norm": 1.6804245710372925, + "learning_rate": 3.18305622652913e-06, + "loss": 0.7406, + "step": 31462 + }, + { + "epoch": 0.6298426044090784, + "grad_norm": 1.1836498975753784, + "learning_rate": 3.1827542092735362e-06, + "loss": 0.2902, + "step": 31463 + }, + { + "epoch": 0.6298626229262068, + "grad_norm": 1.0763922929763794, + "learning_rate": 3.1824521996572535e-06, + "loss": 0.2988, + "step": 31464 + }, + { + "epoch": 0.6298826414433351, + "grad_norm": 1.1248382329940796, + "learning_rate": 3.182150197681549e-06, + "loss": 0.2859, + "step": 31465 + }, + { + "epoch": 0.6299026599604635, + "grad_norm": 1.147349238395691, + "learning_rate": 3.181848203347695e-06, + "loss": 0.3291, + "step": 31466 + }, + { + "epoch": 0.6299226784775918, + "grad_norm": 1.129502773284912, + "learning_rate": 3.181546216656958e-06, + "loss": 0.3315, + "step": 31467 + }, + { + "epoch": 0.6299426969947202, + "grad_norm": 1.8618892431259155, + "learning_rate": 3.1812442376106112e-06, + "loss": 0.8025, + "step": 31468 + }, + { + "epoch": 0.6299627155118485, + "grad_norm": 1.1414790153503418, + "learning_rate": 3.180942266209922e-06, + "loss": 0.3536, + "step": 31469 + }, + { + "epoch": 0.6299827340289768, + "grad_norm": 1.1395808458328247, + "learning_rate": 3.1806403024561607e-06, + "loss": 0.2965, + "step": 31470 + }, + { + "epoch": 0.6300027525461052, + "grad_norm": 1.1481921672821045, + "learning_rate": 3.180338346350594e-06, + "loss": 0.3158, + "step": 31471 + }, + { + "epoch": 0.6300227710632335, + "grad_norm": 1.1682687997817993, + "learning_rate": 3.180036397894495e-06, + "loss": 0.2956, + "step": 31472 + }, + { + "epoch": 0.6300427895803619, + "grad_norm": 1.044622540473938, + "learning_rate": 3.1797344570891305e-06, + "loss": 0.2425, + "step": 31473 + }, + { + "epoch": 0.6300628080974902, + "grad_norm": 1.0763672590255737, + "learning_rate": 3.1794325239357714e-06, + "loss": 0.3156, + "step": 31474 + }, + { + "epoch": 0.6300828266146186, + "grad_norm": 1.1066958904266357, + "learning_rate": 3.1791305984356862e-06, + "loss": 0.281, + "step": 31475 + }, + { + "epoch": 0.6301028451317469, + "grad_norm": 1.1796380281448364, + "learning_rate": 3.1788286805901425e-06, + "loss": 0.2962, + "step": 31476 + }, + { + "epoch": 0.6301228636488753, + "grad_norm": 1.1002161502838135, + "learning_rate": 3.1785267704004124e-06, + "loss": 0.3138, + "step": 31477 + }, + { + "epoch": 0.6301428821660036, + "grad_norm": 1.9636735916137695, + "learning_rate": 3.178224867867763e-06, + "loss": 0.8244, + "step": 31478 + }, + { + "epoch": 0.6301629006831319, + "grad_norm": 1.122921109199524, + "learning_rate": 3.177922972993466e-06, + "loss": 0.2754, + "step": 31479 + }, + { + "epoch": 0.6301829192002603, + "grad_norm": 1.1938621997833252, + "learning_rate": 3.177621085778786e-06, + "loss": 0.3004, + "step": 31480 + }, + { + "epoch": 0.6302029377173886, + "grad_norm": 1.1056842803955078, + "learning_rate": 3.177319206224996e-06, + "loss": 0.2897, + "step": 31481 + }, + { + "epoch": 0.630222956234517, + "grad_norm": 1.1264114379882812, + "learning_rate": 3.1770173343333653e-06, + "loss": 0.345, + "step": 31482 + }, + { + "epoch": 0.6302429747516453, + "grad_norm": 1.0393401384353638, + "learning_rate": 3.1767154701051604e-06, + "loss": 0.2811, + "step": 31483 + }, + { + "epoch": 0.6302629932687737, + "grad_norm": 1.0774927139282227, + "learning_rate": 3.176413613541651e-06, + "loss": 0.3349, + "step": 31484 + }, + { + "epoch": 0.630283011785902, + "grad_norm": 1.1106597185134888, + "learning_rate": 3.1761117646441057e-06, + "loss": 0.2893, + "step": 31485 + }, + { + "epoch": 0.6303030303030303, + "grad_norm": 1.0299469232559204, + "learning_rate": 3.175809923413794e-06, + "loss": 0.2965, + "step": 31486 + }, + { + "epoch": 0.6303230488201587, + "grad_norm": 1.0282789468765259, + "learning_rate": 3.1755080898519863e-06, + "loss": 0.2793, + "step": 31487 + }, + { + "epoch": 0.630343067337287, + "grad_norm": 1.0434417724609375, + "learning_rate": 3.17520626395995e-06, + "loss": 0.3111, + "step": 31488 + }, + { + "epoch": 0.6303630858544154, + "grad_norm": 1.1820894479751587, + "learning_rate": 3.1749044457389518e-06, + "loss": 0.3035, + "step": 31489 + }, + { + "epoch": 0.6303831043715437, + "grad_norm": 1.146554708480835, + "learning_rate": 3.1746026351902643e-06, + "loss": 0.3505, + "step": 31490 + }, + { + "epoch": 0.6304031228886721, + "grad_norm": 1.1082061529159546, + "learning_rate": 3.1743008323151537e-06, + "loss": 0.3087, + "step": 31491 + }, + { + "epoch": 0.6304231414058004, + "grad_norm": 1.0536103248596191, + "learning_rate": 3.17399903711489e-06, + "loss": 0.3123, + "step": 31492 + }, + { + "epoch": 0.6304431599229288, + "grad_norm": 1.1434845924377441, + "learning_rate": 3.17369724959074e-06, + "loss": 0.2867, + "step": 31493 + }, + { + "epoch": 0.6304631784400571, + "grad_norm": 1.1857891082763672, + "learning_rate": 3.1733954697439757e-06, + "loss": 0.2673, + "step": 31494 + }, + { + "epoch": 0.6304831969571854, + "grad_norm": 1.1532459259033203, + "learning_rate": 3.173093697575863e-06, + "loss": 0.3363, + "step": 31495 + }, + { + "epoch": 0.6305032154743138, + "grad_norm": 1.8993442058563232, + "learning_rate": 3.1727919330876724e-06, + "loss": 0.7773, + "step": 31496 + }, + { + "epoch": 0.6305232339914421, + "grad_norm": 1.0608009099960327, + "learning_rate": 3.172490176280669e-06, + "loss": 0.2781, + "step": 31497 + }, + { + "epoch": 0.6305432525085705, + "grad_norm": 1.044249415397644, + "learning_rate": 3.172188427156125e-06, + "loss": 0.3115, + "step": 31498 + }, + { + "epoch": 0.6305632710256988, + "grad_norm": 1.0259814262390137, + "learning_rate": 3.171886685715306e-06, + "loss": 0.3174, + "step": 31499 + }, + { + "epoch": 0.6305832895428272, + "grad_norm": 1.0177797079086304, + "learning_rate": 3.171584951959484e-06, + "loss": 0.2369, + "step": 31500 + }, + { + "epoch": 0.6306033080599555, + "grad_norm": 1.8718843460083008, + "learning_rate": 3.1712832258899255e-06, + "loss": 0.6965, + "step": 31501 + }, + { + "epoch": 0.6306233265770838, + "grad_norm": 1.9401063919067383, + "learning_rate": 3.1709815075078964e-06, + "loss": 0.7806, + "step": 31502 + }, + { + "epoch": 0.6306433450942122, + "grad_norm": 1.1355879306793213, + "learning_rate": 3.1706797968146695e-06, + "loss": 0.3442, + "step": 31503 + }, + { + "epoch": 0.6306633636113405, + "grad_norm": 0.997891366481781, + "learning_rate": 3.1703780938115103e-06, + "loss": 0.3162, + "step": 31504 + }, + { + "epoch": 0.6306833821284689, + "grad_norm": 1.0680373907089233, + "learning_rate": 3.1700763984996885e-06, + "loss": 0.3057, + "step": 31505 + }, + { + "epoch": 0.6307034006455972, + "grad_norm": 1.0455459356307983, + "learning_rate": 3.169774710880472e-06, + "loss": 0.2949, + "step": 31506 + }, + { + "epoch": 0.6307234191627256, + "grad_norm": 1.2079895734786987, + "learning_rate": 3.1694730309551263e-06, + "loss": 0.3287, + "step": 31507 + }, + { + "epoch": 0.6307434376798539, + "grad_norm": 1.1435905694961548, + "learning_rate": 3.169171358724924e-06, + "loss": 0.2996, + "step": 31508 + }, + { + "epoch": 0.6307634561969823, + "grad_norm": 1.2336841821670532, + "learning_rate": 3.168869694191132e-06, + "loss": 0.3345, + "step": 31509 + }, + { + "epoch": 0.6307834747141106, + "grad_norm": 1.1173498630523682, + "learning_rate": 3.1685680373550165e-06, + "loss": 0.2986, + "step": 31510 + }, + { + "epoch": 0.6308034932312389, + "grad_norm": 1.2224985361099243, + "learning_rate": 3.1682663882178466e-06, + "loss": 0.3261, + "step": 31511 + }, + { + "epoch": 0.6308235117483673, + "grad_norm": 1.1644707918167114, + "learning_rate": 3.1679647467808905e-06, + "loss": 0.306, + "step": 31512 + }, + { + "epoch": 0.6308435302654956, + "grad_norm": 1.2279884815216064, + "learning_rate": 3.167663113045417e-06, + "loss": 0.3212, + "step": 31513 + }, + { + "epoch": 0.630863548782624, + "grad_norm": 1.2696179151535034, + "learning_rate": 3.1673614870126946e-06, + "loss": 0.3151, + "step": 31514 + }, + { + "epoch": 0.6308835672997523, + "grad_norm": 2.004143238067627, + "learning_rate": 3.167059868683989e-06, + "loss": 0.7453, + "step": 31515 + }, + { + "epoch": 0.6309035858168807, + "grad_norm": 1.014090895652771, + "learning_rate": 3.1667582580605683e-06, + "loss": 0.2547, + "step": 31516 + }, + { + "epoch": 0.630923604334009, + "grad_norm": 1.0959910154342651, + "learning_rate": 3.1664566551437013e-06, + "loss": 0.291, + "step": 31517 + }, + { + "epoch": 0.6309436228511373, + "grad_norm": 1.057895541191101, + "learning_rate": 3.166155059934658e-06, + "loss": 0.2624, + "step": 31518 + }, + { + "epoch": 0.6309636413682657, + "grad_norm": 1.1872938871383667, + "learning_rate": 3.1658534724347035e-06, + "loss": 0.3103, + "step": 31519 + }, + { + "epoch": 0.630983659885394, + "grad_norm": 1.1889866590499878, + "learning_rate": 3.1655518926451047e-06, + "loss": 0.3169, + "step": 31520 + }, + { + "epoch": 0.6310036784025224, + "grad_norm": 1.1034995317459106, + "learning_rate": 3.1652503205671324e-06, + "loss": 0.3256, + "step": 31521 + }, + { + "epoch": 0.6310236969196507, + "grad_norm": 1.053467869758606, + "learning_rate": 3.164948756202053e-06, + "loss": 0.344, + "step": 31522 + }, + { + "epoch": 0.6310437154367791, + "grad_norm": 1.3150184154510498, + "learning_rate": 3.1646471995511342e-06, + "loss": 0.2834, + "step": 31523 + }, + { + "epoch": 0.6310637339539074, + "grad_norm": 1.3396086692810059, + "learning_rate": 3.1643456506156423e-06, + "loss": 0.3238, + "step": 31524 + }, + { + "epoch": 0.6310837524710357, + "grad_norm": 1.1533619165420532, + "learning_rate": 3.1640441093968467e-06, + "loss": 0.3318, + "step": 31525 + }, + { + "epoch": 0.6311037709881641, + "grad_norm": 1.1601171493530273, + "learning_rate": 3.1637425758960154e-06, + "loss": 0.3039, + "step": 31526 + }, + { + "epoch": 0.6311237895052924, + "grad_norm": 1.074086308479309, + "learning_rate": 3.163441050114415e-06, + "loss": 0.3033, + "step": 31527 + }, + { + "epoch": 0.6311438080224208, + "grad_norm": 1.1528573036193848, + "learning_rate": 3.1631395320533145e-06, + "loss": 0.2545, + "step": 31528 + }, + { + "epoch": 0.631163826539549, + "grad_norm": 1.2786519527435303, + "learning_rate": 3.1628380217139766e-06, + "loss": 0.2921, + "step": 31529 + }, + { + "epoch": 0.6311838450566775, + "grad_norm": 1.0476768016815186, + "learning_rate": 3.1625365190976737e-06, + "loss": 0.3018, + "step": 31530 + }, + { + "epoch": 0.6312038635738058, + "grad_norm": 1.0052711963653564, + "learning_rate": 3.162235024205673e-06, + "loss": 0.2698, + "step": 31531 + }, + { + "epoch": 0.6312238820909342, + "grad_norm": 1.1086726188659668, + "learning_rate": 3.1619335370392407e-06, + "loss": 0.3153, + "step": 31532 + }, + { + "epoch": 0.6312439006080625, + "grad_norm": 1.0930402278900146, + "learning_rate": 3.1616320575996422e-06, + "loss": 0.3756, + "step": 31533 + }, + { + "epoch": 0.6312639191251908, + "grad_norm": 1.2256050109863281, + "learning_rate": 3.161330585888149e-06, + "loss": 0.3107, + "step": 31534 + }, + { + "epoch": 0.6312839376423192, + "grad_norm": 1.153764247894287, + "learning_rate": 3.1610291219060262e-06, + "loss": 0.3238, + "step": 31535 + }, + { + "epoch": 0.6313039561594475, + "grad_norm": 1.316163182258606, + "learning_rate": 3.1607276656545404e-06, + "loss": 0.3273, + "step": 31536 + }, + { + "epoch": 0.6313239746765759, + "grad_norm": 1.0675315856933594, + "learning_rate": 3.1604262171349607e-06, + "loss": 0.2774, + "step": 31537 + }, + { + "epoch": 0.6313439931937042, + "grad_norm": 1.1622934341430664, + "learning_rate": 3.160124776348551e-06, + "loss": 0.3106, + "step": 31538 + }, + { + "epoch": 0.6313640117108326, + "grad_norm": 2.062570095062256, + "learning_rate": 3.159823343296583e-06, + "loss": 0.2549, + "step": 31539 + }, + { + "epoch": 0.6313840302279609, + "grad_norm": 1.16836678981781, + "learning_rate": 3.1595219179803214e-06, + "loss": 0.3144, + "step": 31540 + }, + { + "epoch": 0.6314040487450892, + "grad_norm": 1.1817302703857422, + "learning_rate": 3.1592205004010345e-06, + "loss": 0.3464, + "step": 31541 + }, + { + "epoch": 0.6314240672622176, + "grad_norm": 1.7818775177001953, + "learning_rate": 3.1589190905599853e-06, + "loss": 0.794, + "step": 31542 + }, + { + "epoch": 0.6314440857793459, + "grad_norm": 1.045523762702942, + "learning_rate": 3.1586176884584463e-06, + "loss": 0.3178, + "step": 31543 + }, + { + "epoch": 0.6314641042964743, + "grad_norm": 1.1022124290466309, + "learning_rate": 3.158316294097682e-06, + "loss": 0.3208, + "step": 31544 + }, + { + "epoch": 0.6314841228136026, + "grad_norm": 1.1776654720306396, + "learning_rate": 3.15801490747896e-06, + "loss": 0.3678, + "step": 31545 + }, + { + "epoch": 0.631504141330731, + "grad_norm": 1.3080780506134033, + "learning_rate": 3.1577135286035476e-06, + "loss": 0.289, + "step": 31546 + }, + { + "epoch": 0.6315241598478593, + "grad_norm": 1.2206960916519165, + "learning_rate": 3.1574121574727083e-06, + "loss": 0.3096, + "step": 31547 + }, + { + "epoch": 0.6315441783649877, + "grad_norm": 1.2530491352081299, + "learning_rate": 3.1571107940877143e-06, + "loss": 0.2894, + "step": 31548 + }, + { + "epoch": 0.631564196882116, + "grad_norm": 1.1338907480239868, + "learning_rate": 3.1568094384498284e-06, + "loss": 0.268, + "step": 31549 + }, + { + "epoch": 0.6315842153992443, + "grad_norm": 1.0624054670333862, + "learning_rate": 3.156508090560321e-06, + "loss": 0.2995, + "step": 31550 + }, + { + "epoch": 0.6316042339163727, + "grad_norm": 1.315691351890564, + "learning_rate": 3.156206750420453e-06, + "loss": 0.3277, + "step": 31551 + }, + { + "epoch": 0.631624252433501, + "grad_norm": 1.2337161302566528, + "learning_rate": 3.1559054180314987e-06, + "loss": 0.3135, + "step": 31552 + }, + { + "epoch": 0.6316442709506294, + "grad_norm": 1.015901803970337, + "learning_rate": 3.1556040933947206e-06, + "loss": 0.2806, + "step": 31553 + }, + { + "epoch": 0.6316642894677577, + "grad_norm": 1.1665103435516357, + "learning_rate": 3.155302776511386e-06, + "loss": 0.3485, + "step": 31554 + }, + { + "epoch": 0.6316843079848861, + "grad_norm": 1.8614732027053833, + "learning_rate": 3.1550014673827587e-06, + "loss": 0.6944, + "step": 31555 + }, + { + "epoch": 0.6317043265020144, + "grad_norm": 1.2806556224822998, + "learning_rate": 3.1547001660101106e-06, + "loss": 0.2814, + "step": 31556 + }, + { + "epoch": 0.6317243450191427, + "grad_norm": 1.1216833591461182, + "learning_rate": 3.1543988723947063e-06, + "loss": 0.3057, + "step": 31557 + }, + { + "epoch": 0.6317443635362711, + "grad_norm": 1.1489169597625732, + "learning_rate": 3.154097586537812e-06, + "loss": 0.3332, + "step": 31558 + }, + { + "epoch": 0.6317643820533994, + "grad_norm": 1.0735069513320923, + "learning_rate": 3.1537963084406942e-06, + "loss": 0.2467, + "step": 31559 + }, + { + "epoch": 0.6317844005705278, + "grad_norm": 1.1325072050094604, + "learning_rate": 3.153495038104617e-06, + "loss": 0.2971, + "step": 31560 + }, + { + "epoch": 0.631804419087656, + "grad_norm": 1.0368387699127197, + "learning_rate": 3.1531937755308518e-06, + "loss": 0.3214, + "step": 31561 + }, + { + "epoch": 0.6318244376047845, + "grad_norm": 1.2703477144241333, + "learning_rate": 3.152892520720662e-06, + "loss": 0.2686, + "step": 31562 + }, + { + "epoch": 0.6318444561219128, + "grad_norm": 2.007988214492798, + "learning_rate": 3.1525912736753145e-06, + "loss": 0.7646, + "step": 31563 + }, + { + "epoch": 0.6318644746390412, + "grad_norm": 1.0822511911392212, + "learning_rate": 3.152290034396074e-06, + "loss": 0.3262, + "step": 31564 + }, + { + "epoch": 0.6318844931561695, + "grad_norm": 1.0489615201950073, + "learning_rate": 3.1519888028842104e-06, + "loss": 0.291, + "step": 31565 + }, + { + "epoch": 0.6319045116732978, + "grad_norm": 1.1648956537246704, + "learning_rate": 3.1516875791409884e-06, + "loss": 0.2529, + "step": 31566 + }, + { + "epoch": 0.6319245301904262, + "grad_norm": 1.8454540967941284, + "learning_rate": 3.1513863631676746e-06, + "loss": 0.8092, + "step": 31567 + }, + { + "epoch": 0.6319445487075545, + "grad_norm": 0.9757586717605591, + "learning_rate": 3.151085154965533e-06, + "loss": 0.3215, + "step": 31568 + }, + { + "epoch": 0.6319645672246829, + "grad_norm": 1.0472756624221802, + "learning_rate": 3.150783954535831e-06, + "loss": 0.2654, + "step": 31569 + }, + { + "epoch": 0.6319845857418112, + "grad_norm": 1.0834670066833496, + "learning_rate": 3.1504827618798375e-06, + "loss": 0.3249, + "step": 31570 + }, + { + "epoch": 0.6320046042589396, + "grad_norm": 1.9031314849853516, + "learning_rate": 3.1501815769988153e-06, + "loss": 0.8129, + "step": 31571 + }, + { + "epoch": 0.6320246227760679, + "grad_norm": 1.027003526687622, + "learning_rate": 3.1498803998940325e-06, + "loss": 0.2765, + "step": 31572 + }, + { + "epoch": 0.6320446412931962, + "grad_norm": 1.119238018989563, + "learning_rate": 3.149579230566752e-06, + "loss": 0.2904, + "step": 31573 + }, + { + "epoch": 0.6320646598103246, + "grad_norm": 1.1284979581832886, + "learning_rate": 3.1492780690182447e-06, + "loss": 0.3242, + "step": 31574 + }, + { + "epoch": 0.6320846783274529, + "grad_norm": 1.1432156562805176, + "learning_rate": 3.1489769152497727e-06, + "loss": 0.2979, + "step": 31575 + }, + { + "epoch": 0.6321046968445813, + "grad_norm": 2.0678765773773193, + "learning_rate": 3.1486757692626045e-06, + "loss": 0.7476, + "step": 31576 + }, + { + "epoch": 0.6321247153617096, + "grad_norm": 1.061202049255371, + "learning_rate": 3.1483746310580054e-06, + "loss": 0.2948, + "step": 31577 + }, + { + "epoch": 0.632144733878838, + "grad_norm": 0.9839306473731995, + "learning_rate": 3.148073500637238e-06, + "loss": 0.3054, + "step": 31578 + }, + { + "epoch": 0.6321647523959663, + "grad_norm": 1.759080410003662, + "learning_rate": 3.1477723780015735e-06, + "loss": 0.73, + "step": 31579 + }, + { + "epoch": 0.6321847709130947, + "grad_norm": 1.083211898803711, + "learning_rate": 3.1474712631522753e-06, + "loss": 0.3414, + "step": 31580 + }, + { + "epoch": 0.632204789430223, + "grad_norm": 1.8351917266845703, + "learning_rate": 3.1471701560906087e-06, + "loss": 0.7945, + "step": 31581 + }, + { + "epoch": 0.6322248079473513, + "grad_norm": 1.770102620124817, + "learning_rate": 3.1468690568178386e-06, + "loss": 0.8236, + "step": 31582 + }, + { + "epoch": 0.6322448264644797, + "grad_norm": 1.1447858810424805, + "learning_rate": 3.1465679653352343e-06, + "loss": 0.3137, + "step": 31583 + }, + { + "epoch": 0.632264844981608, + "grad_norm": 1.0867319107055664, + "learning_rate": 3.1462668816440587e-06, + "loss": 0.2643, + "step": 31584 + }, + { + "epoch": 0.6322848634987364, + "grad_norm": 1.8227555751800537, + "learning_rate": 3.1459658057455796e-06, + "loss": 0.814, + "step": 31585 + }, + { + "epoch": 0.6323048820158647, + "grad_norm": 1.1036999225616455, + "learning_rate": 3.1456647376410584e-06, + "loss": 0.2898, + "step": 31586 + }, + { + "epoch": 0.6323249005329931, + "grad_norm": 1.1010485887527466, + "learning_rate": 3.1453636773317653e-06, + "loss": 0.3427, + "step": 31587 + }, + { + "epoch": 0.6323449190501214, + "grad_norm": 1.126451015472412, + "learning_rate": 3.1450626248189637e-06, + "loss": 0.2978, + "step": 31588 + }, + { + "epoch": 0.6323649375672497, + "grad_norm": 1.0709657669067383, + "learning_rate": 3.1447615801039207e-06, + "loss": 0.3153, + "step": 31589 + }, + { + "epoch": 0.6323849560843781, + "grad_norm": 1.1299238204956055, + "learning_rate": 3.1444605431879005e-06, + "loss": 0.3315, + "step": 31590 + }, + { + "epoch": 0.6324049746015064, + "grad_norm": 1.076834797859192, + "learning_rate": 3.144159514072167e-06, + "loss": 0.3113, + "step": 31591 + }, + { + "epoch": 0.6324249931186348, + "grad_norm": 1.1630029678344727, + "learning_rate": 3.1438584927579897e-06, + "loss": 0.3454, + "step": 31592 + }, + { + "epoch": 0.632445011635763, + "grad_norm": 1.1353604793548584, + "learning_rate": 3.1435574792466313e-06, + "loss": 0.2913, + "step": 31593 + }, + { + "epoch": 0.6324650301528915, + "grad_norm": 1.0082885026931763, + "learning_rate": 3.1432564735393567e-06, + "loss": 0.2818, + "step": 31594 + }, + { + "epoch": 0.6324850486700198, + "grad_norm": 1.150933027267456, + "learning_rate": 3.1429554756374314e-06, + "loss": 0.3324, + "step": 31595 + }, + { + "epoch": 0.6325050671871482, + "grad_norm": 1.0429903268814087, + "learning_rate": 3.1426544855421243e-06, + "loss": 0.2621, + "step": 31596 + }, + { + "epoch": 0.6325250857042765, + "grad_norm": 1.1451574563980103, + "learning_rate": 3.1423535032546975e-06, + "loss": 0.2861, + "step": 31597 + }, + { + "epoch": 0.6325451042214048, + "grad_norm": 1.0482103824615479, + "learning_rate": 3.1420525287764168e-06, + "loss": 0.318, + "step": 31598 + }, + { + "epoch": 0.6325651227385332, + "grad_norm": 1.1648528575897217, + "learning_rate": 3.141751562108547e-06, + "loss": 0.3041, + "step": 31599 + }, + { + "epoch": 0.6325851412556615, + "grad_norm": 1.1256605386734009, + "learning_rate": 3.1414506032523527e-06, + "loss": 0.3079, + "step": 31600 + }, + { + "epoch": 0.6326051597727899, + "grad_norm": 1.8108028173446655, + "learning_rate": 3.141149652209101e-06, + "loss": 0.7337, + "step": 31601 + }, + { + "epoch": 0.6326251782899182, + "grad_norm": 1.0904698371887207, + "learning_rate": 3.1408487089800567e-06, + "loss": 0.2754, + "step": 31602 + }, + { + "epoch": 0.6326451968070466, + "grad_norm": 1.1765841245651245, + "learning_rate": 3.140547773566485e-06, + "loss": 0.3121, + "step": 31603 + }, + { + "epoch": 0.6326652153241749, + "grad_norm": 1.0511376857757568, + "learning_rate": 3.1402468459696473e-06, + "loss": 0.2997, + "step": 31604 + }, + { + "epoch": 0.6326852338413032, + "grad_norm": 0.9814236760139465, + "learning_rate": 3.139945926190814e-06, + "loss": 0.2981, + "step": 31605 + }, + { + "epoch": 0.6327052523584316, + "grad_norm": 1.3251713514328003, + "learning_rate": 3.139645014231248e-06, + "loss": 0.3238, + "step": 31606 + }, + { + "epoch": 0.6327252708755599, + "grad_norm": 1.0837180614471436, + "learning_rate": 3.139344110092213e-06, + "loss": 0.2721, + "step": 31607 + }, + { + "epoch": 0.6327452893926883, + "grad_norm": 1.1569188833236694, + "learning_rate": 3.1390432137749736e-06, + "loss": 0.3145, + "step": 31608 + }, + { + "epoch": 0.6327653079098166, + "grad_norm": 1.125092625617981, + "learning_rate": 3.138742325280798e-06, + "loss": 0.3359, + "step": 31609 + }, + { + "epoch": 0.632785326426945, + "grad_norm": 1.137918472290039, + "learning_rate": 3.1384414446109495e-06, + "loss": 0.2996, + "step": 31610 + }, + { + "epoch": 0.6328053449440733, + "grad_norm": 1.2864103317260742, + "learning_rate": 3.1381405717666923e-06, + "loss": 0.3006, + "step": 31611 + }, + { + "epoch": 0.6328253634612017, + "grad_norm": 1.1038017272949219, + "learning_rate": 3.1378397067492905e-06, + "loss": 0.2912, + "step": 31612 + }, + { + "epoch": 0.63284538197833, + "grad_norm": 1.0707266330718994, + "learning_rate": 3.13753884956001e-06, + "loss": 0.2761, + "step": 31613 + }, + { + "epoch": 0.6328654004954583, + "grad_norm": 0.9884666204452515, + "learning_rate": 3.1372380002001158e-06, + "loss": 0.2777, + "step": 31614 + }, + { + "epoch": 0.6328854190125867, + "grad_norm": 1.0668308734893799, + "learning_rate": 3.136937158670872e-06, + "loss": 0.2745, + "step": 31615 + }, + { + "epoch": 0.632905437529715, + "grad_norm": 1.062435269355774, + "learning_rate": 3.1366363249735443e-06, + "loss": 0.3127, + "step": 31616 + }, + { + "epoch": 0.6329254560468434, + "grad_norm": 1.2083890438079834, + "learning_rate": 3.1363354991093944e-06, + "loss": 0.2769, + "step": 31617 + }, + { + "epoch": 0.6329454745639717, + "grad_norm": 1.0282779932022095, + "learning_rate": 3.1360346810796906e-06, + "loss": 0.3059, + "step": 31618 + }, + { + "epoch": 0.6329654930811001, + "grad_norm": 1.0949652194976807, + "learning_rate": 3.1357338708856968e-06, + "loss": 0.2653, + "step": 31619 + }, + { + "epoch": 0.6329855115982284, + "grad_norm": 1.0314863920211792, + "learning_rate": 3.1354330685286745e-06, + "loss": 0.2499, + "step": 31620 + }, + { + "epoch": 0.6330055301153567, + "grad_norm": 1.1114566326141357, + "learning_rate": 3.1351322740098915e-06, + "loss": 0.3477, + "step": 31621 + }, + { + "epoch": 0.6330255486324851, + "grad_norm": 1.206070065498352, + "learning_rate": 3.1348314873306097e-06, + "loss": 0.3341, + "step": 31622 + }, + { + "epoch": 0.6330455671496134, + "grad_norm": 1.113735318183899, + "learning_rate": 3.1345307084920963e-06, + "loss": 0.2582, + "step": 31623 + }, + { + "epoch": 0.6330655856667418, + "grad_norm": 1.870021104812622, + "learning_rate": 3.1342299374956147e-06, + "loss": 0.7589, + "step": 31624 + }, + { + "epoch": 0.63308560418387, + "grad_norm": 1.1388263702392578, + "learning_rate": 3.1339291743424282e-06, + "loss": 0.3053, + "step": 31625 + }, + { + "epoch": 0.6331056227009985, + "grad_norm": 1.037071704864502, + "learning_rate": 3.133628419033802e-06, + "loss": 0.2832, + "step": 31626 + }, + { + "epoch": 0.6331256412181268, + "grad_norm": 1.1077672243118286, + "learning_rate": 3.133327671570999e-06, + "loss": 0.326, + "step": 31627 + }, + { + "epoch": 0.6331456597352552, + "grad_norm": 1.1154377460479736, + "learning_rate": 3.133026931955287e-06, + "loss": 0.3355, + "step": 31628 + }, + { + "epoch": 0.6331656782523835, + "grad_norm": 1.1469271183013916, + "learning_rate": 3.1327262001879277e-06, + "loss": 0.2864, + "step": 31629 + }, + { + "epoch": 0.6331856967695118, + "grad_norm": 1.206736445426941, + "learning_rate": 3.1324254762701857e-06, + "loss": 0.3069, + "step": 31630 + }, + { + "epoch": 0.6332057152866402, + "grad_norm": 1.1527518033981323, + "learning_rate": 3.1321247602033223e-06, + "loss": 0.3401, + "step": 31631 + }, + { + "epoch": 0.6332257338037685, + "grad_norm": 1.173730731010437, + "learning_rate": 3.1318240519886072e-06, + "loss": 0.3346, + "step": 31632 + }, + { + "epoch": 0.6332457523208969, + "grad_norm": 1.0692273378372192, + "learning_rate": 3.1315233516273004e-06, + "loss": 0.3097, + "step": 31633 + }, + { + "epoch": 0.6332657708380252, + "grad_norm": 1.1695612668991089, + "learning_rate": 3.1312226591206686e-06, + "loss": 0.2906, + "step": 31634 + }, + { + "epoch": 0.6332857893551536, + "grad_norm": 1.379582405090332, + "learning_rate": 3.1309219744699727e-06, + "loss": 0.3069, + "step": 31635 + }, + { + "epoch": 0.6333058078722819, + "grad_norm": 1.0056939125061035, + "learning_rate": 3.1306212976764805e-06, + "loss": 0.3087, + "step": 31636 + }, + { + "epoch": 0.6333258263894102, + "grad_norm": 1.196904182434082, + "learning_rate": 3.1303206287414535e-06, + "loss": 0.2845, + "step": 31637 + }, + { + "epoch": 0.6333458449065386, + "grad_norm": 1.2760337591171265, + "learning_rate": 3.1300199676661556e-06, + "loss": 0.2592, + "step": 31638 + }, + { + "epoch": 0.6333658634236669, + "grad_norm": 1.4342126846313477, + "learning_rate": 3.1297193144518517e-06, + "loss": 0.3015, + "step": 31639 + }, + { + "epoch": 0.6333858819407953, + "grad_norm": 1.081587553024292, + "learning_rate": 3.1294186690998054e-06, + "loss": 0.2605, + "step": 31640 + }, + { + "epoch": 0.6334059004579236, + "grad_norm": 0.9897975325584412, + "learning_rate": 3.1291180316112805e-06, + "loss": 0.272, + "step": 31641 + }, + { + "epoch": 0.633425918975052, + "grad_norm": 1.1279339790344238, + "learning_rate": 3.1288174019875417e-06, + "loss": 0.3177, + "step": 31642 + }, + { + "epoch": 0.6334459374921803, + "grad_norm": 1.3966463804244995, + "learning_rate": 3.128516780229851e-06, + "loss": 0.3004, + "step": 31643 + }, + { + "epoch": 0.6334659560093087, + "grad_norm": 1.2344238758087158, + "learning_rate": 3.1282161663394717e-06, + "loss": 0.386, + "step": 31644 + }, + { + "epoch": 0.633485974526437, + "grad_norm": 1.086347222328186, + "learning_rate": 3.1279155603176702e-06, + "loss": 0.2769, + "step": 31645 + }, + { + "epoch": 0.6335059930435653, + "grad_norm": 1.2274302244186401, + "learning_rate": 3.1276149621657086e-06, + "loss": 0.2635, + "step": 31646 + }, + { + "epoch": 0.6335260115606937, + "grad_norm": 1.474431037902832, + "learning_rate": 3.1273143718848515e-06, + "loss": 0.2459, + "step": 31647 + }, + { + "epoch": 0.633546030077822, + "grad_norm": 1.0476024150848389, + "learning_rate": 3.1270137894763597e-06, + "loss": 0.292, + "step": 31648 + }, + { + "epoch": 0.6335660485949504, + "grad_norm": 1.0391749143600464, + "learning_rate": 3.1267132149415008e-06, + "loss": 0.2569, + "step": 31649 + }, + { + "epoch": 0.6335860671120787, + "grad_norm": 1.0174624919891357, + "learning_rate": 3.1264126482815364e-06, + "loss": 0.3192, + "step": 31650 + }, + { + "epoch": 0.6336060856292071, + "grad_norm": 1.9054495096206665, + "learning_rate": 3.1261120894977294e-06, + "loss": 0.7284, + "step": 31651 + }, + { + "epoch": 0.6336261041463354, + "grad_norm": 1.086017370223999, + "learning_rate": 3.1258115385913445e-06, + "loss": 0.3316, + "step": 31652 + }, + { + "epoch": 0.6336461226634637, + "grad_norm": 1.143161654472351, + "learning_rate": 3.1255109955636426e-06, + "loss": 0.3456, + "step": 31653 + }, + { + "epoch": 0.6336661411805921, + "grad_norm": 1.0842032432556152, + "learning_rate": 3.1252104604158906e-06, + "loss": 0.2814, + "step": 31654 + }, + { + "epoch": 0.6336861596977204, + "grad_norm": 1.0311617851257324, + "learning_rate": 3.1249099331493517e-06, + "loss": 0.2951, + "step": 31655 + }, + { + "epoch": 0.6337061782148488, + "grad_norm": 1.099900484085083, + "learning_rate": 3.1246094137652865e-06, + "loss": 0.3101, + "step": 31656 + }, + { + "epoch": 0.633726196731977, + "grad_norm": 1.1615840196609497, + "learning_rate": 3.1243089022649585e-06, + "loss": 0.3181, + "step": 31657 + }, + { + "epoch": 0.6337462152491055, + "grad_norm": 1.0486557483673096, + "learning_rate": 3.124008398649634e-06, + "loss": 0.3044, + "step": 31658 + }, + { + "epoch": 0.6337662337662338, + "grad_norm": 1.8007843494415283, + "learning_rate": 3.1237079029205735e-06, + "loss": 0.8043, + "step": 31659 + }, + { + "epoch": 0.6337862522833622, + "grad_norm": 1.7687021493911743, + "learning_rate": 3.123407415079043e-06, + "loss": 0.7176, + "step": 31660 + }, + { + "epoch": 0.6338062708004905, + "grad_norm": 1.953948736190796, + "learning_rate": 3.123106935126302e-06, + "loss": 0.764, + "step": 31661 + }, + { + "epoch": 0.6338262893176188, + "grad_norm": 1.127225637435913, + "learning_rate": 3.122806463063615e-06, + "loss": 0.3217, + "step": 31662 + }, + { + "epoch": 0.6338463078347472, + "grad_norm": 1.1038628816604614, + "learning_rate": 3.1225059988922473e-06, + "loss": 0.3019, + "step": 31663 + }, + { + "epoch": 0.6338663263518755, + "grad_norm": 1.1597874164581299, + "learning_rate": 3.1222055426134597e-06, + "loss": 0.3113, + "step": 31664 + }, + { + "epoch": 0.6338863448690039, + "grad_norm": 1.1451596021652222, + "learning_rate": 3.121905094228516e-06, + "loss": 0.3105, + "step": 31665 + }, + { + "epoch": 0.6339063633861322, + "grad_norm": 1.1670624017715454, + "learning_rate": 3.121604653738678e-06, + "loss": 0.306, + "step": 31666 + }, + { + "epoch": 0.6339263819032606, + "grad_norm": 1.079107642173767, + "learning_rate": 3.1213042211452116e-06, + "loss": 0.2942, + "step": 31667 + }, + { + "epoch": 0.6339464004203889, + "grad_norm": 1.1130547523498535, + "learning_rate": 3.1210037964493777e-06, + "loss": 0.3508, + "step": 31668 + }, + { + "epoch": 0.6339664189375172, + "grad_norm": 1.9214937686920166, + "learning_rate": 3.1207033796524395e-06, + "loss": 0.8169, + "step": 31669 + }, + { + "epoch": 0.6339864374546456, + "grad_norm": 1.1549344062805176, + "learning_rate": 3.1204029707556578e-06, + "loss": 0.2806, + "step": 31670 + }, + { + "epoch": 0.6340064559717739, + "grad_norm": 1.3629825115203857, + "learning_rate": 3.120102569760299e-06, + "loss": 0.3088, + "step": 31671 + }, + { + "epoch": 0.6340264744889023, + "grad_norm": 1.0995924472808838, + "learning_rate": 3.1198021766676246e-06, + "loss": 0.3246, + "step": 31672 + }, + { + "epoch": 0.6340464930060306, + "grad_norm": 1.244413137435913, + "learning_rate": 3.119501791478898e-06, + "loss": 0.3111, + "step": 31673 + }, + { + "epoch": 0.634066511523159, + "grad_norm": 1.279951572418213, + "learning_rate": 3.119201414195381e-06, + "loss": 0.3499, + "step": 31674 + }, + { + "epoch": 0.6340865300402873, + "grad_norm": 1.9577099084854126, + "learning_rate": 3.1189010448183345e-06, + "loss": 0.8587, + "step": 31675 + }, + { + "epoch": 0.6341065485574157, + "grad_norm": 1.3481087684631348, + "learning_rate": 3.1186006833490247e-06, + "loss": 0.298, + "step": 31676 + }, + { + "epoch": 0.634126567074544, + "grad_norm": 1.0682685375213623, + "learning_rate": 3.1183003297887123e-06, + "loss": 0.2735, + "step": 31677 + }, + { + "epoch": 0.6341465855916723, + "grad_norm": 1.1175823211669922, + "learning_rate": 3.117999984138661e-06, + "loss": 0.3676, + "step": 31678 + }, + { + "epoch": 0.6341666041088007, + "grad_norm": 1.1151303052902222, + "learning_rate": 3.1176996464001308e-06, + "loss": 0.3207, + "step": 31679 + }, + { + "epoch": 0.634186622625929, + "grad_norm": 1.0914232730865479, + "learning_rate": 3.1173993165743888e-06, + "loss": 0.3276, + "step": 31680 + }, + { + "epoch": 0.6342066411430574, + "grad_norm": 1.0403844118118286, + "learning_rate": 3.117098994662694e-06, + "loss": 0.3201, + "step": 31681 + }, + { + "epoch": 0.6342266596601857, + "grad_norm": 1.2555781602859497, + "learning_rate": 3.1167986806663107e-06, + "loss": 0.2986, + "step": 31682 + }, + { + "epoch": 0.6342466781773141, + "grad_norm": 1.0303536653518677, + "learning_rate": 3.116498374586499e-06, + "loss": 0.2998, + "step": 31683 + }, + { + "epoch": 0.6342666966944424, + "grad_norm": 1.045949101448059, + "learning_rate": 3.1161980764245225e-06, + "loss": 0.3007, + "step": 31684 + }, + { + "epoch": 0.6342867152115707, + "grad_norm": 1.965523600578308, + "learning_rate": 3.115897786181645e-06, + "loss": 0.7492, + "step": 31685 + }, + { + "epoch": 0.6343067337286991, + "grad_norm": 1.0273573398590088, + "learning_rate": 3.115597503859128e-06, + "loss": 0.2849, + "step": 31686 + }, + { + "epoch": 0.6343267522458274, + "grad_norm": 1.0599514245986938, + "learning_rate": 3.1152972294582335e-06, + "loss": 0.3346, + "step": 31687 + }, + { + "epoch": 0.6343467707629558, + "grad_norm": 1.1632109880447388, + "learning_rate": 3.1149969629802217e-06, + "loss": 0.262, + "step": 31688 + }, + { + "epoch": 0.634366789280084, + "grad_norm": 1.8822917938232422, + "learning_rate": 3.1146967044263595e-06, + "loss": 0.7735, + "step": 31689 + }, + { + "epoch": 0.6343868077972125, + "grad_norm": 1.157450795173645, + "learning_rate": 3.1143964537979054e-06, + "loss": 0.2928, + "step": 31690 + }, + { + "epoch": 0.6344068263143408, + "grad_norm": 1.090643048286438, + "learning_rate": 3.1140962110961237e-06, + "loss": 0.2982, + "step": 31691 + }, + { + "epoch": 0.6344268448314692, + "grad_norm": 1.0691373348236084, + "learning_rate": 3.1137959763222757e-06, + "loss": 0.2867, + "step": 31692 + }, + { + "epoch": 0.6344468633485975, + "grad_norm": 1.0608417987823486, + "learning_rate": 3.113495749477622e-06, + "loss": 0.324, + "step": 31693 + }, + { + "epoch": 0.6344668818657258, + "grad_norm": 1.1434844732284546, + "learning_rate": 3.1131955305634275e-06, + "loss": 0.3309, + "step": 31694 + }, + { + "epoch": 0.6344869003828542, + "grad_norm": 1.014203429222107, + "learning_rate": 3.112895319580953e-06, + "loss": 0.2824, + "step": 31695 + }, + { + "epoch": 0.6345069188999825, + "grad_norm": 1.1461619138717651, + "learning_rate": 3.11259511653146e-06, + "loss": 0.3075, + "step": 31696 + }, + { + "epoch": 0.6345269374171109, + "grad_norm": 1.0584779977798462, + "learning_rate": 3.1122949214162103e-06, + "loss": 0.3222, + "step": 31697 + }, + { + "epoch": 0.6345469559342392, + "grad_norm": 1.045165777206421, + "learning_rate": 3.1119947342364672e-06, + "loss": 0.2725, + "step": 31698 + }, + { + "epoch": 0.6345669744513676, + "grad_norm": 1.0079195499420166, + "learning_rate": 3.1116945549934923e-06, + "loss": 0.2586, + "step": 31699 + }, + { + "epoch": 0.6345869929684959, + "grad_norm": 1.0315700769424438, + "learning_rate": 3.1113943836885476e-06, + "loss": 0.2804, + "step": 31700 + }, + { + "epoch": 0.6346070114856242, + "grad_norm": 0.956457793712616, + "learning_rate": 3.1110942203228923e-06, + "loss": 0.2971, + "step": 31701 + }, + { + "epoch": 0.6346270300027526, + "grad_norm": 1.043519139289856, + "learning_rate": 3.1107940648977927e-06, + "loss": 0.3157, + "step": 31702 + }, + { + "epoch": 0.6346470485198809, + "grad_norm": 1.1374738216400146, + "learning_rate": 3.1104939174145066e-06, + "loss": 0.3322, + "step": 31703 + }, + { + "epoch": 0.6346670670370093, + "grad_norm": 0.9537885785102844, + "learning_rate": 3.1101937778742986e-06, + "loss": 0.3067, + "step": 31704 + }, + { + "epoch": 0.6346870855541376, + "grad_norm": 1.130681037902832, + "learning_rate": 3.10989364627843e-06, + "loss": 0.2909, + "step": 31705 + }, + { + "epoch": 0.634707104071266, + "grad_norm": 1.0332050323486328, + "learning_rate": 3.1095935226281595e-06, + "loss": 0.3152, + "step": 31706 + }, + { + "epoch": 0.6347271225883943, + "grad_norm": 1.0556796789169312, + "learning_rate": 3.1092934069247525e-06, + "loss": 0.307, + "step": 31707 + }, + { + "epoch": 0.6347471411055227, + "grad_norm": 1.1009019613265991, + "learning_rate": 3.1089932991694694e-06, + "loss": 0.3247, + "step": 31708 + }, + { + "epoch": 0.634767159622651, + "grad_norm": 0.9881343245506287, + "learning_rate": 3.1086931993635707e-06, + "loss": 0.3094, + "step": 31709 + }, + { + "epoch": 0.6347871781397793, + "grad_norm": 1.0239604711532593, + "learning_rate": 3.1083931075083183e-06, + "loss": 0.3453, + "step": 31710 + }, + { + "epoch": 0.6348071966569077, + "grad_norm": 1.1912907361984253, + "learning_rate": 3.1080930236049744e-06, + "loss": 0.3045, + "step": 31711 + }, + { + "epoch": 0.634827215174036, + "grad_norm": 1.02439284324646, + "learning_rate": 3.1077929476548014e-06, + "loss": 0.2463, + "step": 31712 + }, + { + "epoch": 0.6348472336911644, + "grad_norm": 1.242761254310608, + "learning_rate": 3.10749287965906e-06, + "loss": 0.2776, + "step": 31713 + }, + { + "epoch": 0.6348672522082927, + "grad_norm": 1.2760006189346313, + "learning_rate": 3.1071928196190104e-06, + "loss": 0.3259, + "step": 31714 + }, + { + "epoch": 0.6348872707254211, + "grad_norm": 1.0471491813659668, + "learning_rate": 3.1068927675359135e-06, + "loss": 0.2835, + "step": 31715 + }, + { + "epoch": 0.6349072892425494, + "grad_norm": 1.1090573072433472, + "learning_rate": 3.1065927234110325e-06, + "loss": 0.3072, + "step": 31716 + }, + { + "epoch": 0.6349273077596777, + "grad_norm": 1.8906538486480713, + "learning_rate": 3.10629268724563e-06, + "loss": 0.7328, + "step": 31717 + }, + { + "epoch": 0.6349473262768061, + "grad_norm": 1.0063257217407227, + "learning_rate": 3.105992659040965e-06, + "loss": 0.2682, + "step": 31718 + }, + { + "epoch": 0.6349673447939344, + "grad_norm": 1.2337322235107422, + "learning_rate": 3.105692638798297e-06, + "loss": 0.2811, + "step": 31719 + }, + { + "epoch": 0.6349873633110628, + "grad_norm": 1.1309256553649902, + "learning_rate": 3.105392626518892e-06, + "loss": 0.3131, + "step": 31720 + }, + { + "epoch": 0.635007381828191, + "grad_norm": 1.9961559772491455, + "learning_rate": 3.105092622204009e-06, + "loss": 0.7615, + "step": 31721 + }, + { + "epoch": 0.6350274003453195, + "grad_norm": 1.0837905406951904, + "learning_rate": 3.1047926258549067e-06, + "loss": 0.2864, + "step": 31722 + }, + { + "epoch": 0.6350474188624478, + "grad_norm": 1.0960674285888672, + "learning_rate": 3.10449263747285e-06, + "loss": 0.2922, + "step": 31723 + }, + { + "epoch": 0.6350674373795762, + "grad_norm": 1.9407989978790283, + "learning_rate": 3.1041926570590964e-06, + "loss": 0.7835, + "step": 31724 + }, + { + "epoch": 0.6350874558967045, + "grad_norm": 1.05609929561615, + "learning_rate": 3.103892684614911e-06, + "loss": 0.3274, + "step": 31725 + }, + { + "epoch": 0.6351074744138328, + "grad_norm": 1.0456384420394897, + "learning_rate": 3.103592720141553e-06, + "loss": 0.2643, + "step": 31726 + }, + { + "epoch": 0.6351274929309612, + "grad_norm": 1.1052614450454712, + "learning_rate": 3.103292763640282e-06, + "loss": 0.3306, + "step": 31727 + }, + { + "epoch": 0.6351475114480895, + "grad_norm": 1.116814374923706, + "learning_rate": 3.102992815112359e-06, + "loss": 0.2867, + "step": 31728 + }, + { + "epoch": 0.6351675299652179, + "grad_norm": 1.0969645977020264, + "learning_rate": 3.102692874559046e-06, + "loss": 0.3292, + "step": 31729 + }, + { + "epoch": 0.6351875484823462, + "grad_norm": 1.1783372163772583, + "learning_rate": 3.1023929419816056e-06, + "loss": 0.3255, + "step": 31730 + }, + { + "epoch": 0.6352075669994746, + "grad_norm": 1.728980302810669, + "learning_rate": 3.102093017381297e-06, + "loss": 0.8775, + "step": 31731 + }, + { + "epoch": 0.6352275855166029, + "grad_norm": 1.0935715436935425, + "learning_rate": 3.1017931007593782e-06, + "loss": 0.3177, + "step": 31732 + }, + { + "epoch": 0.6352476040337312, + "grad_norm": 1.1250813007354736, + "learning_rate": 3.1014931921171156e-06, + "loss": 0.292, + "step": 31733 + }, + { + "epoch": 0.6352676225508596, + "grad_norm": 1.0417430400848389, + "learning_rate": 3.1011932914557664e-06, + "loss": 0.2876, + "step": 31734 + }, + { + "epoch": 0.6352876410679879, + "grad_norm": 1.097333312034607, + "learning_rate": 3.1008933987765914e-06, + "loss": 0.3106, + "step": 31735 + }, + { + "epoch": 0.6353076595851163, + "grad_norm": 1.0811489820480347, + "learning_rate": 3.100593514080853e-06, + "loss": 0.3086, + "step": 31736 + }, + { + "epoch": 0.6353276781022446, + "grad_norm": 1.056809425354004, + "learning_rate": 3.1002936373698077e-06, + "loss": 0.2874, + "step": 31737 + }, + { + "epoch": 0.635347696619373, + "grad_norm": 1.0501980781555176, + "learning_rate": 3.0999937686447223e-06, + "loss": 0.3156, + "step": 31738 + }, + { + "epoch": 0.6353677151365013, + "grad_norm": 1.0397403240203857, + "learning_rate": 3.099693907906853e-06, + "loss": 0.3115, + "step": 31739 + }, + { + "epoch": 0.6353877336536297, + "grad_norm": 1.0659699440002441, + "learning_rate": 3.099394055157463e-06, + "loss": 0.309, + "step": 31740 + }, + { + "epoch": 0.635407752170758, + "grad_norm": 1.0972071886062622, + "learning_rate": 3.0990942103978082e-06, + "loss": 0.3002, + "step": 31741 + }, + { + "epoch": 0.6354277706878863, + "grad_norm": 1.1503218412399292, + "learning_rate": 3.098794373629154e-06, + "loss": 0.2931, + "step": 31742 + }, + { + "epoch": 0.6354477892050147, + "grad_norm": 1.424588680267334, + "learning_rate": 3.0984945448527603e-06, + "loss": 0.3463, + "step": 31743 + }, + { + "epoch": 0.635467807722143, + "grad_norm": 1.2232915163040161, + "learning_rate": 3.0981947240698855e-06, + "loss": 0.331, + "step": 31744 + }, + { + "epoch": 0.6354878262392714, + "grad_norm": 1.1488900184631348, + "learning_rate": 3.0978949112817912e-06, + "loss": 0.2946, + "step": 31745 + }, + { + "epoch": 0.6355078447563997, + "grad_norm": 1.178552269935608, + "learning_rate": 3.0975951064897354e-06, + "loss": 0.3319, + "step": 31746 + }, + { + "epoch": 0.6355278632735281, + "grad_norm": 1.0685797929763794, + "learning_rate": 3.097295309694982e-06, + "loss": 0.3392, + "step": 31747 + }, + { + "epoch": 0.6355478817906564, + "grad_norm": 1.1232820749282837, + "learning_rate": 3.0969955208987888e-06, + "loss": 0.2796, + "step": 31748 + }, + { + "epoch": 0.6355679003077847, + "grad_norm": 1.1209518909454346, + "learning_rate": 3.0966957401024183e-06, + "loss": 0.3122, + "step": 31749 + }, + { + "epoch": 0.6355879188249131, + "grad_norm": 1.1483112573623657, + "learning_rate": 3.0963959673071274e-06, + "loss": 0.3193, + "step": 31750 + }, + { + "epoch": 0.6356079373420414, + "grad_norm": 1.1639842987060547, + "learning_rate": 3.0960962025141794e-06, + "loss": 0.3398, + "step": 31751 + }, + { + "epoch": 0.6356279558591698, + "grad_norm": 1.0480760335922241, + "learning_rate": 3.0957964457248342e-06, + "loss": 0.29, + "step": 31752 + }, + { + "epoch": 0.635647974376298, + "grad_norm": 1.2399932146072388, + "learning_rate": 3.09549669694035e-06, + "loss": 0.3149, + "step": 31753 + }, + { + "epoch": 0.6356679928934265, + "grad_norm": 1.1097482442855835, + "learning_rate": 3.0951969561619866e-06, + "loss": 0.2939, + "step": 31754 + }, + { + "epoch": 0.6356880114105548, + "grad_norm": 1.1673798561096191, + "learning_rate": 3.0948972233910056e-06, + "loss": 0.2836, + "step": 31755 + }, + { + "epoch": 0.6357080299276832, + "grad_norm": 1.0825681686401367, + "learning_rate": 3.0945974986286687e-06, + "loss": 0.2766, + "step": 31756 + }, + { + "epoch": 0.6357280484448115, + "grad_norm": 1.1058564186096191, + "learning_rate": 3.094297781876233e-06, + "loss": 0.2663, + "step": 31757 + }, + { + "epoch": 0.6357480669619398, + "grad_norm": 1.1501410007476807, + "learning_rate": 3.0939980731349595e-06, + "loss": 0.3155, + "step": 31758 + }, + { + "epoch": 0.6357680854790682, + "grad_norm": 2.021698474884033, + "learning_rate": 3.0936983724061054e-06, + "loss": 0.7505, + "step": 31759 + }, + { + "epoch": 0.6357881039961965, + "grad_norm": 2.0337395668029785, + "learning_rate": 3.093398679690936e-06, + "loss": 0.6918, + "step": 31760 + }, + { + "epoch": 0.6358081225133249, + "grad_norm": 1.0324567556381226, + "learning_rate": 3.093098994990707e-06, + "loss": 0.2899, + "step": 31761 + }, + { + "epoch": 0.6358281410304532, + "grad_norm": 1.1771302223205566, + "learning_rate": 3.0927993183066807e-06, + "loss": 0.3073, + "step": 31762 + }, + { + "epoch": 0.6358481595475816, + "grad_norm": 1.1053788661956787, + "learning_rate": 3.092499649640113e-06, + "loss": 0.2861, + "step": 31763 + }, + { + "epoch": 0.6358681780647099, + "grad_norm": 2.0814368724823, + "learning_rate": 3.0921999889922684e-06, + "loss": 0.8384, + "step": 31764 + }, + { + "epoch": 0.6358881965818381, + "grad_norm": 1.0044375658035278, + "learning_rate": 3.0919003363644045e-06, + "loss": 0.279, + "step": 31765 + }, + { + "epoch": 0.6359082150989666, + "grad_norm": 1.3304470777511597, + "learning_rate": 3.0916006917577813e-06, + "loss": 0.3008, + "step": 31766 + }, + { + "epoch": 0.6359282336160949, + "grad_norm": 1.9745696783065796, + "learning_rate": 3.0913010551736566e-06, + "loss": 0.7664, + "step": 31767 + }, + { + "epoch": 0.6359482521332233, + "grad_norm": 2.012742519378662, + "learning_rate": 3.091001426613291e-06, + "loss": 0.7332, + "step": 31768 + }, + { + "epoch": 0.6359682706503516, + "grad_norm": 1.1196364164352417, + "learning_rate": 3.0907018060779464e-06, + "loss": 0.3033, + "step": 31769 + }, + { + "epoch": 0.63598828916748, + "grad_norm": 1.2171905040740967, + "learning_rate": 3.0904021935688804e-06, + "loss": 0.3198, + "step": 31770 + }, + { + "epoch": 0.6360083076846083, + "grad_norm": 1.0602760314941406, + "learning_rate": 3.0901025890873526e-06, + "loss": 0.3142, + "step": 31771 + }, + { + "epoch": 0.6360283262017367, + "grad_norm": 1.1953494548797607, + "learning_rate": 3.0898029926346207e-06, + "loss": 0.3053, + "step": 31772 + }, + { + "epoch": 0.636048344718865, + "grad_norm": 2.1278765201568604, + "learning_rate": 3.0895034042119483e-06, + "loss": 0.7489, + "step": 31773 + }, + { + "epoch": 0.6360683632359933, + "grad_norm": 1.1119691133499146, + "learning_rate": 3.089203823820591e-06, + "loss": 0.3201, + "step": 31774 + }, + { + "epoch": 0.6360883817531217, + "grad_norm": 1.1293094158172607, + "learning_rate": 3.0889042514618105e-06, + "loss": 0.388, + "step": 31775 + }, + { + "epoch": 0.63610840027025, + "grad_norm": 1.7819572687149048, + "learning_rate": 3.088604687136866e-06, + "loss": 0.8186, + "step": 31776 + }, + { + "epoch": 0.6361284187873784, + "grad_norm": 1.8590368032455444, + "learning_rate": 3.0883051308470125e-06, + "loss": 0.7424, + "step": 31777 + }, + { + "epoch": 0.6361484373045067, + "grad_norm": 1.189971685409546, + "learning_rate": 3.088005582593516e-06, + "loss": 0.2834, + "step": 31778 + }, + { + "epoch": 0.6361684558216351, + "grad_norm": 1.0408623218536377, + "learning_rate": 3.087706042377633e-06, + "loss": 0.2886, + "step": 31779 + }, + { + "epoch": 0.6361884743387634, + "grad_norm": 1.8928278684616089, + "learning_rate": 3.08740651020062e-06, + "loss": 0.785, + "step": 31780 + }, + { + "epoch": 0.6362084928558916, + "grad_norm": 1.1009929180145264, + "learning_rate": 3.087106986063738e-06, + "loss": 0.2944, + "step": 31781 + }, + { + "epoch": 0.6362285113730201, + "grad_norm": 1.135877013206482, + "learning_rate": 3.0868074699682486e-06, + "loss": 0.3297, + "step": 31782 + }, + { + "epoch": 0.6362485298901484, + "grad_norm": 1.13131582736969, + "learning_rate": 3.0865079619154092e-06, + "loss": 0.3352, + "step": 31783 + }, + { + "epoch": 0.6362685484072768, + "grad_norm": 1.201817512512207, + "learning_rate": 3.086208461906478e-06, + "loss": 0.3451, + "step": 31784 + }, + { + "epoch": 0.636288566924405, + "grad_norm": 1.1522042751312256, + "learning_rate": 3.085908969942713e-06, + "loss": 0.3361, + "step": 31785 + }, + { + "epoch": 0.6363085854415335, + "grad_norm": 1.2385128736495972, + "learning_rate": 3.0856094860253767e-06, + "loss": 0.3333, + "step": 31786 + }, + { + "epoch": 0.6363286039586618, + "grad_norm": 1.8750396966934204, + "learning_rate": 3.085310010155725e-06, + "loss": 0.7916, + "step": 31787 + }, + { + "epoch": 0.6363486224757902, + "grad_norm": 0.9908729195594788, + "learning_rate": 3.0850105423350185e-06, + "loss": 0.2866, + "step": 31788 + }, + { + "epoch": 0.6363686409929185, + "grad_norm": 2.008768081665039, + "learning_rate": 3.0847110825645166e-06, + "loss": 0.7807, + "step": 31789 + }, + { + "epoch": 0.6363886595100468, + "grad_norm": 1.0043599605560303, + "learning_rate": 3.084411630845474e-06, + "loss": 0.3069, + "step": 31790 + }, + { + "epoch": 0.6364086780271752, + "grad_norm": 1.6870187520980835, + "learning_rate": 3.0841121871791556e-06, + "loss": 0.3147, + "step": 31791 + }, + { + "epoch": 0.6364286965443035, + "grad_norm": 1.0504077672958374, + "learning_rate": 3.083812751566817e-06, + "loss": 0.294, + "step": 31792 + }, + { + "epoch": 0.6364487150614319, + "grad_norm": 1.1835670471191406, + "learning_rate": 3.0835133240097157e-06, + "loss": 0.3211, + "step": 31793 + }, + { + "epoch": 0.6364687335785602, + "grad_norm": 1.3370018005371094, + "learning_rate": 3.0832139045091113e-06, + "loss": 0.3108, + "step": 31794 + }, + { + "epoch": 0.6364887520956886, + "grad_norm": 1.0503405332565308, + "learning_rate": 3.0829144930662656e-06, + "loss": 0.2725, + "step": 31795 + }, + { + "epoch": 0.6365087706128169, + "grad_norm": 1.0948151350021362, + "learning_rate": 3.082615089682435e-06, + "loss": 0.3384, + "step": 31796 + }, + { + "epoch": 0.6365287891299451, + "grad_norm": 1.2082914113998413, + "learning_rate": 3.0823156943588774e-06, + "loss": 0.3337, + "step": 31797 + }, + { + "epoch": 0.6365488076470736, + "grad_norm": 1.1583080291748047, + "learning_rate": 3.0820163070968504e-06, + "loss": 0.3133, + "step": 31798 + }, + { + "epoch": 0.6365688261642019, + "grad_norm": 1.1072888374328613, + "learning_rate": 3.0817169278976145e-06, + "loss": 0.2687, + "step": 31799 + }, + { + "epoch": 0.6365888446813303, + "grad_norm": 1.0729761123657227, + "learning_rate": 3.0814175567624283e-06, + "loss": 0.2628, + "step": 31800 + }, + { + "epoch": 0.6366088631984586, + "grad_norm": 1.0039304494857788, + "learning_rate": 3.0811181936925505e-06, + "loss": 0.2772, + "step": 31801 + }, + { + "epoch": 0.636628881715587, + "grad_norm": 1.1241514682769775, + "learning_rate": 3.080818838689239e-06, + "loss": 0.2898, + "step": 31802 + }, + { + "epoch": 0.6366489002327153, + "grad_norm": 1.0852887630462646, + "learning_rate": 3.0805194917537497e-06, + "loss": 0.3244, + "step": 31803 + }, + { + "epoch": 0.6366689187498437, + "grad_norm": 1.7697124481201172, + "learning_rate": 3.0802201528873454e-06, + "loss": 0.7757, + "step": 31804 + }, + { + "epoch": 0.636688937266972, + "grad_norm": 1.3255932331085205, + "learning_rate": 3.0799208220912825e-06, + "loss": 0.3477, + "step": 31805 + }, + { + "epoch": 0.6367089557841003, + "grad_norm": 1.1015459299087524, + "learning_rate": 3.0796214993668185e-06, + "loss": 0.2681, + "step": 31806 + }, + { + "epoch": 0.6367289743012287, + "grad_norm": 1.265056848526001, + "learning_rate": 3.0793221847152133e-06, + "loss": 0.2754, + "step": 31807 + }, + { + "epoch": 0.636748992818357, + "grad_norm": 1.0448554754257202, + "learning_rate": 3.0790228781377223e-06, + "loss": 0.2877, + "step": 31808 + }, + { + "epoch": 0.6367690113354854, + "grad_norm": 1.0982507467269897, + "learning_rate": 3.078723579635608e-06, + "loss": 0.2794, + "step": 31809 + }, + { + "epoch": 0.6367890298526137, + "grad_norm": 1.1577262878417969, + "learning_rate": 3.0784242892101255e-06, + "loss": 0.3036, + "step": 31810 + }, + { + "epoch": 0.6368090483697421, + "grad_norm": 1.0344041585922241, + "learning_rate": 3.0781250068625333e-06, + "loss": 0.2855, + "step": 31811 + }, + { + "epoch": 0.6368290668868704, + "grad_norm": 1.0612726211547852, + "learning_rate": 3.077825732594091e-06, + "loss": 0.3322, + "step": 31812 + }, + { + "epoch": 0.6368490854039986, + "grad_norm": 1.0790315866470337, + "learning_rate": 3.077526466406054e-06, + "loss": 0.2572, + "step": 31813 + }, + { + "epoch": 0.636869103921127, + "grad_norm": 1.063132405281067, + "learning_rate": 3.0772272082996836e-06, + "loss": 0.3412, + "step": 31814 + }, + { + "epoch": 0.6368891224382554, + "grad_norm": 1.2427691221237183, + "learning_rate": 3.0769279582762366e-06, + "loss": 0.3567, + "step": 31815 + }, + { + "epoch": 0.6369091409553838, + "grad_norm": 1.038824200630188, + "learning_rate": 3.076628716336968e-06, + "loss": 0.2502, + "step": 31816 + }, + { + "epoch": 0.636929159472512, + "grad_norm": 1.1857209205627441, + "learning_rate": 3.076329482483141e-06, + "loss": 0.3103, + "step": 31817 + }, + { + "epoch": 0.6369491779896405, + "grad_norm": 1.2466764450073242, + "learning_rate": 3.0760302567160104e-06, + "loss": 0.3027, + "step": 31818 + }, + { + "epoch": 0.6369691965067688, + "grad_norm": 1.1442158222198486, + "learning_rate": 3.0757310390368333e-06, + "loss": 0.3099, + "step": 31819 + }, + { + "epoch": 0.6369892150238972, + "grad_norm": 1.089109182357788, + "learning_rate": 3.0754318294468704e-06, + "loss": 0.3235, + "step": 31820 + }, + { + "epoch": 0.6370092335410255, + "grad_norm": 1.1236145496368408, + "learning_rate": 3.0751326279473757e-06, + "loss": 0.3193, + "step": 31821 + }, + { + "epoch": 0.6370292520581537, + "grad_norm": 1.2727683782577515, + "learning_rate": 3.0748334345396114e-06, + "loss": 0.2922, + "step": 31822 + }, + { + "epoch": 0.6370492705752822, + "grad_norm": 1.171357274055481, + "learning_rate": 3.074534249224833e-06, + "loss": 0.3529, + "step": 31823 + }, + { + "epoch": 0.6370692890924105, + "grad_norm": 1.044339895248413, + "learning_rate": 3.074235072004297e-06, + "loss": 0.3034, + "step": 31824 + }, + { + "epoch": 0.6370893076095389, + "grad_norm": 1.130822777748108, + "learning_rate": 3.0739359028792625e-06, + "loss": 0.2691, + "step": 31825 + }, + { + "epoch": 0.6371093261266672, + "grad_norm": 1.1037375926971436, + "learning_rate": 3.073636741850987e-06, + "loss": 0.3186, + "step": 31826 + }, + { + "epoch": 0.6371293446437956, + "grad_norm": 1.1759345531463623, + "learning_rate": 3.073337588920729e-06, + "loss": 0.2964, + "step": 31827 + }, + { + "epoch": 0.6371493631609239, + "grad_norm": 1.199214220046997, + "learning_rate": 3.0730384440897454e-06, + "loss": 0.3095, + "step": 31828 + }, + { + "epoch": 0.6371693816780521, + "grad_norm": 1.2299449443817139, + "learning_rate": 3.0727393073592925e-06, + "loss": 0.3027, + "step": 31829 + }, + { + "epoch": 0.6371894001951806, + "grad_norm": 1.0609623193740845, + "learning_rate": 3.072440178730628e-06, + "loss": 0.3148, + "step": 31830 + }, + { + "epoch": 0.6372094187123089, + "grad_norm": 1.0501714944839478, + "learning_rate": 3.0721410582050115e-06, + "loss": 0.277, + "step": 31831 + }, + { + "epoch": 0.6372294372294373, + "grad_norm": 1.0840977430343628, + "learning_rate": 3.0718419457836986e-06, + "loss": 0.2745, + "step": 31832 + }, + { + "epoch": 0.6372494557465656, + "grad_norm": 1.217772364616394, + "learning_rate": 3.071542841467948e-06, + "loss": 0.3204, + "step": 31833 + }, + { + "epoch": 0.637269474263694, + "grad_norm": 1.1321693658828735, + "learning_rate": 3.071243745259014e-06, + "loss": 0.3307, + "step": 31834 + }, + { + "epoch": 0.6372894927808223, + "grad_norm": 1.0991014242172241, + "learning_rate": 3.0709446571581586e-06, + "loss": 0.3184, + "step": 31835 + }, + { + "epoch": 0.6373095112979507, + "grad_norm": 1.209794282913208, + "learning_rate": 3.0706455771666367e-06, + "loss": 0.284, + "step": 31836 + }, + { + "epoch": 0.637329529815079, + "grad_norm": 0.9875596761703491, + "learning_rate": 3.070346505285704e-06, + "loss": 0.2624, + "step": 31837 + }, + { + "epoch": 0.6373495483322072, + "grad_norm": 1.1072611808776855, + "learning_rate": 3.0700474415166214e-06, + "loss": 0.2915, + "step": 31838 + }, + { + "epoch": 0.6373695668493357, + "grad_norm": 1.1567491292953491, + "learning_rate": 3.0697483858606415e-06, + "loss": 0.3017, + "step": 31839 + }, + { + "epoch": 0.637389585366464, + "grad_norm": 1.12168550491333, + "learning_rate": 3.0694493383190255e-06, + "loss": 0.281, + "step": 31840 + }, + { + "epoch": 0.6374096038835924, + "grad_norm": 1.1121824979782104, + "learning_rate": 3.0691502988930295e-06, + "loss": 0.267, + "step": 31841 + }, + { + "epoch": 0.6374296224007207, + "grad_norm": 1.8409807682037354, + "learning_rate": 3.06885126758391e-06, + "loss": 0.7407, + "step": 31842 + }, + { + "epoch": 0.6374496409178491, + "grad_norm": 1.0692830085754395, + "learning_rate": 3.068552244392922e-06, + "loss": 0.3366, + "step": 31843 + }, + { + "epoch": 0.6374696594349774, + "grad_norm": 1.1649737358093262, + "learning_rate": 3.0682532293213274e-06, + "loss": 0.281, + "step": 31844 + }, + { + "epoch": 0.6374896779521056, + "grad_norm": 1.0821667909622192, + "learning_rate": 3.0679542223703786e-06, + "loss": 0.3447, + "step": 31845 + }, + { + "epoch": 0.637509696469234, + "grad_norm": 1.1423245668411255, + "learning_rate": 3.067655223541336e-06, + "loss": 0.3003, + "step": 31846 + }, + { + "epoch": 0.6375297149863624, + "grad_norm": 1.099382758140564, + "learning_rate": 3.0673562328354533e-06, + "loss": 0.3164, + "step": 31847 + }, + { + "epoch": 0.6375497335034908, + "grad_norm": 1.1642385721206665, + "learning_rate": 3.067057250253991e-06, + "loss": 0.3481, + "step": 31848 + }, + { + "epoch": 0.637569752020619, + "grad_norm": 1.0449275970458984, + "learning_rate": 3.0667582757982036e-06, + "loss": 0.3106, + "step": 31849 + }, + { + "epoch": 0.6375897705377475, + "grad_norm": 2.1216678619384766, + "learning_rate": 3.066459309469347e-06, + "loss": 0.726, + "step": 31850 + }, + { + "epoch": 0.6376097890548758, + "grad_norm": 1.1944094896316528, + "learning_rate": 3.066160351268681e-06, + "loss": 0.2809, + "step": 31851 + }, + { + "epoch": 0.6376298075720042, + "grad_norm": 1.2490606307983398, + "learning_rate": 3.0658614011974584e-06, + "loss": 0.2987, + "step": 31852 + }, + { + "epoch": 0.6376498260891325, + "grad_norm": 1.208195447921753, + "learning_rate": 3.065562459256941e-06, + "loss": 0.2734, + "step": 31853 + }, + { + "epoch": 0.6376698446062607, + "grad_norm": 1.333982229232788, + "learning_rate": 3.0652635254483822e-06, + "loss": 0.3343, + "step": 31854 + }, + { + "epoch": 0.6376898631233892, + "grad_norm": 1.885069727897644, + "learning_rate": 3.0649645997730386e-06, + "loss": 0.8131, + "step": 31855 + }, + { + "epoch": 0.6377098816405175, + "grad_norm": 1.103348970413208, + "learning_rate": 3.064665682232166e-06, + "loss": 0.2695, + "step": 31856 + }, + { + "epoch": 0.6377299001576459, + "grad_norm": 1.1531250476837158, + "learning_rate": 3.0643667728270243e-06, + "loss": 0.3025, + "step": 31857 + }, + { + "epoch": 0.6377499186747742, + "grad_norm": 1.1861335039138794, + "learning_rate": 3.0640678715588673e-06, + "loss": 0.307, + "step": 31858 + }, + { + "epoch": 0.6377699371919026, + "grad_norm": 1.1817865371704102, + "learning_rate": 3.0637689784289533e-06, + "loss": 0.2559, + "step": 31859 + }, + { + "epoch": 0.6377899557090309, + "grad_norm": 0.9733739495277405, + "learning_rate": 3.0634700934385376e-06, + "loss": 0.2854, + "step": 31860 + }, + { + "epoch": 0.6378099742261591, + "grad_norm": 1.0265097618103027, + "learning_rate": 3.063171216588875e-06, + "loss": 0.2883, + "step": 31861 + }, + { + "epoch": 0.6378299927432876, + "grad_norm": 1.956555724143982, + "learning_rate": 3.062872347881226e-06, + "loss": 0.7741, + "step": 31862 + }, + { + "epoch": 0.6378500112604159, + "grad_norm": 1.2147645950317383, + "learning_rate": 3.0625734873168435e-06, + "loss": 0.2668, + "step": 31863 + }, + { + "epoch": 0.6378700297775443, + "grad_norm": 1.1382675170898438, + "learning_rate": 3.0622746348969863e-06, + "loss": 0.3388, + "step": 31864 + }, + { + "epoch": 0.6378900482946726, + "grad_norm": 1.046471118927002, + "learning_rate": 3.0619757906229073e-06, + "loss": 0.3113, + "step": 31865 + }, + { + "epoch": 0.637910066811801, + "grad_norm": 1.1540541648864746, + "learning_rate": 3.061676954495867e-06, + "loss": 0.3087, + "step": 31866 + }, + { + "epoch": 0.6379300853289293, + "grad_norm": 1.9620623588562012, + "learning_rate": 3.0613781265171205e-06, + "loss": 0.7668, + "step": 31867 + }, + { + "epoch": 0.6379501038460575, + "grad_norm": 1.2547838687896729, + "learning_rate": 3.061079306687922e-06, + "loss": 0.3243, + "step": 31868 + }, + { + "epoch": 0.637970122363186, + "grad_norm": 1.0196675062179565, + "learning_rate": 3.0607804950095288e-06, + "loss": 0.2947, + "step": 31869 + }, + { + "epoch": 0.6379901408803142, + "grad_norm": 1.0683739185333252, + "learning_rate": 3.060481691483196e-06, + "loss": 0.2698, + "step": 31870 + }, + { + "epoch": 0.6380101593974427, + "grad_norm": 1.063132882118225, + "learning_rate": 3.0601828961101814e-06, + "loss": 0.2919, + "step": 31871 + }, + { + "epoch": 0.638030177914571, + "grad_norm": 1.0761001110076904, + "learning_rate": 3.059884108891742e-06, + "loss": 0.3521, + "step": 31872 + }, + { + "epoch": 0.6380501964316994, + "grad_norm": 1.1417760848999023, + "learning_rate": 3.0595853298291313e-06, + "loss": 0.2782, + "step": 31873 + }, + { + "epoch": 0.6380702149488277, + "grad_norm": 1.0175620317459106, + "learning_rate": 3.059286558923604e-06, + "loss": 0.2928, + "step": 31874 + }, + { + "epoch": 0.6380902334659561, + "grad_norm": 1.049245834350586, + "learning_rate": 3.0589877961764213e-06, + "loss": 0.2958, + "step": 31875 + }, + { + "epoch": 0.6381102519830844, + "grad_norm": 1.0175389051437378, + "learning_rate": 3.0586890415888353e-06, + "loss": 0.2851, + "step": 31876 + }, + { + "epoch": 0.6381302705002126, + "grad_norm": 1.031406044960022, + "learning_rate": 3.0583902951621038e-06, + "loss": 0.3246, + "step": 31877 + }, + { + "epoch": 0.638150289017341, + "grad_norm": 1.1770802736282349, + "learning_rate": 3.058091556897479e-06, + "loss": 0.287, + "step": 31878 + }, + { + "epoch": 0.6381703075344693, + "grad_norm": 1.118659257888794, + "learning_rate": 3.057792826796222e-06, + "loss": 0.3156, + "step": 31879 + }, + { + "epoch": 0.6381903260515978, + "grad_norm": 1.0186805725097656, + "learning_rate": 3.0574941048595853e-06, + "loss": 0.2949, + "step": 31880 + }, + { + "epoch": 0.638210344568726, + "grad_norm": 1.112322449684143, + "learning_rate": 3.057195391088826e-06, + "loss": 0.359, + "step": 31881 + }, + { + "epoch": 0.6382303630858545, + "grad_norm": 1.1900056600570679, + "learning_rate": 3.056896685485198e-06, + "loss": 0.3148, + "step": 31882 + }, + { + "epoch": 0.6382503816029828, + "grad_norm": 1.1905626058578491, + "learning_rate": 3.0565979880499576e-06, + "loss": 0.3111, + "step": 31883 + }, + { + "epoch": 0.638270400120111, + "grad_norm": 1.064573049545288, + "learning_rate": 3.056299298784362e-06, + "loss": 0.3076, + "step": 31884 + }, + { + "epoch": 0.6382904186372395, + "grad_norm": 1.1253219842910767, + "learning_rate": 3.056000617689666e-06, + "loss": 0.3405, + "step": 31885 + }, + { + "epoch": 0.6383104371543677, + "grad_norm": 1.1071441173553467, + "learning_rate": 3.0557019447671256e-06, + "loss": 0.3055, + "step": 31886 + }, + { + "epoch": 0.6383304556714962, + "grad_norm": 1.0744081735610962, + "learning_rate": 3.0554032800179936e-06, + "loss": 0.3315, + "step": 31887 + }, + { + "epoch": 0.6383504741886245, + "grad_norm": 1.065029501914978, + "learning_rate": 3.05510462344353e-06, + "loss": 0.2907, + "step": 31888 + }, + { + "epoch": 0.6383704927057529, + "grad_norm": 1.1049776077270508, + "learning_rate": 3.0548059750449865e-06, + "loss": 0.2756, + "step": 31889 + }, + { + "epoch": 0.6383905112228812, + "grad_norm": 1.149679183959961, + "learning_rate": 3.054507334823621e-06, + "loss": 0.3074, + "step": 31890 + }, + { + "epoch": 0.6384105297400096, + "grad_norm": 1.1343508958816528, + "learning_rate": 3.054208702780689e-06, + "loss": 0.2942, + "step": 31891 + }, + { + "epoch": 0.6384305482571379, + "grad_norm": 1.0500142574310303, + "learning_rate": 3.053910078917442e-06, + "loss": 0.3003, + "step": 31892 + }, + { + "epoch": 0.6384505667742661, + "grad_norm": 1.3287248611450195, + "learning_rate": 3.0536114632351398e-06, + "loss": 0.2944, + "step": 31893 + }, + { + "epoch": 0.6384705852913946, + "grad_norm": 1.8439894914627075, + "learning_rate": 3.053312855735037e-06, + "loss": 0.7556, + "step": 31894 + }, + { + "epoch": 0.6384906038085228, + "grad_norm": 1.4037365913391113, + "learning_rate": 3.053014256418387e-06, + "loss": 0.3094, + "step": 31895 + }, + { + "epoch": 0.6385106223256513, + "grad_norm": 1.1793487071990967, + "learning_rate": 3.0527156652864458e-06, + "loss": 0.3333, + "step": 31896 + }, + { + "epoch": 0.6385306408427796, + "grad_norm": 1.1993722915649414, + "learning_rate": 3.0524170823404692e-06, + "loss": 0.2829, + "step": 31897 + }, + { + "epoch": 0.638550659359908, + "grad_norm": 1.2065469026565552, + "learning_rate": 3.052118507581713e-06, + "loss": 0.3421, + "step": 31898 + }, + { + "epoch": 0.6385706778770363, + "grad_norm": 1.1247025728225708, + "learning_rate": 3.051819941011431e-06, + "loss": 0.3311, + "step": 31899 + }, + { + "epoch": 0.6385906963941645, + "grad_norm": 1.1439956426620483, + "learning_rate": 3.0515213826308767e-06, + "loss": 0.2845, + "step": 31900 + }, + { + "epoch": 0.638610714911293, + "grad_norm": 1.125879168510437, + "learning_rate": 3.0512228324413096e-06, + "loss": 0.3311, + "step": 31901 + }, + { + "epoch": 0.6386307334284212, + "grad_norm": 1.2332637310028076, + "learning_rate": 3.0509242904439813e-06, + "loss": 0.3047, + "step": 31902 + }, + { + "epoch": 0.6386507519455497, + "grad_norm": 1.249057650566101, + "learning_rate": 3.050625756640149e-06, + "loss": 0.3391, + "step": 31903 + }, + { + "epoch": 0.638670770462678, + "grad_norm": 1.1433616876602173, + "learning_rate": 3.0503272310310662e-06, + "loss": 0.3164, + "step": 31904 + }, + { + "epoch": 0.6386907889798064, + "grad_norm": 1.1680021286010742, + "learning_rate": 3.050028713617986e-06, + "loss": 0.3284, + "step": 31905 + }, + { + "epoch": 0.6387108074969347, + "grad_norm": 1.1231507062911987, + "learning_rate": 3.049730204402167e-06, + "loss": 0.3509, + "step": 31906 + }, + { + "epoch": 0.6387308260140631, + "grad_norm": 1.0813438892364502, + "learning_rate": 3.049431703384863e-06, + "loss": 0.288, + "step": 31907 + }, + { + "epoch": 0.6387508445311914, + "grad_norm": 1.1742554903030396, + "learning_rate": 3.0491332105673276e-06, + "loss": 0.3024, + "step": 31908 + }, + { + "epoch": 0.6387708630483196, + "grad_norm": 1.2311351299285889, + "learning_rate": 3.0488347259508165e-06, + "loss": 0.3064, + "step": 31909 + }, + { + "epoch": 0.638790881565448, + "grad_norm": 1.2425867319107056, + "learning_rate": 3.0485362495365846e-06, + "loss": 0.3447, + "step": 31910 + }, + { + "epoch": 0.6388109000825763, + "grad_norm": 1.1189287900924683, + "learning_rate": 3.0482377813258862e-06, + "loss": 0.3318, + "step": 31911 + }, + { + "epoch": 0.6388309185997048, + "grad_norm": 1.1360441446304321, + "learning_rate": 3.047939321319977e-06, + "loss": 0.3441, + "step": 31912 + }, + { + "epoch": 0.638850937116833, + "grad_norm": 1.1540963649749756, + "learning_rate": 3.0476408695201106e-06, + "loss": 0.3013, + "step": 31913 + }, + { + "epoch": 0.6388709556339615, + "grad_norm": 1.2538474798202515, + "learning_rate": 3.0473424259275397e-06, + "loss": 0.3374, + "step": 31914 + }, + { + "epoch": 0.6388909741510898, + "grad_norm": 1.0114495754241943, + "learning_rate": 3.047043990543522e-06, + "loss": 0.283, + "step": 31915 + }, + { + "epoch": 0.638910992668218, + "grad_norm": 1.1929259300231934, + "learning_rate": 3.046745563369312e-06, + "loss": 0.3575, + "step": 31916 + }, + { + "epoch": 0.6389310111853465, + "grad_norm": 1.032845377922058, + "learning_rate": 3.046447144406164e-06, + "loss": 0.2555, + "step": 31917 + }, + { + "epoch": 0.6389510297024747, + "grad_norm": 1.11261785030365, + "learning_rate": 3.0461487336553286e-06, + "loss": 0.2586, + "step": 31918 + }, + { + "epoch": 0.6389710482196032, + "grad_norm": 1.1759941577911377, + "learning_rate": 3.0458503311180665e-06, + "loss": 0.3093, + "step": 31919 + }, + { + "epoch": 0.6389910667367315, + "grad_norm": 1.138464093208313, + "learning_rate": 3.045551936795629e-06, + "loss": 0.2976, + "step": 31920 + }, + { + "epoch": 0.6390110852538599, + "grad_norm": 1.0997179746627808, + "learning_rate": 3.045253550689269e-06, + "loss": 0.2876, + "step": 31921 + }, + { + "epoch": 0.6390311037709882, + "grad_norm": 1.0523145198822021, + "learning_rate": 3.0449551728002434e-06, + "loss": 0.3183, + "step": 31922 + }, + { + "epoch": 0.6390511222881166, + "grad_norm": 1.0786155462265015, + "learning_rate": 3.044656803129804e-06, + "loss": 0.298, + "step": 31923 + }, + { + "epoch": 0.6390711408052449, + "grad_norm": 1.0943433046340942, + "learning_rate": 3.044358441679208e-06, + "loss": 0.2981, + "step": 31924 + }, + { + "epoch": 0.6390911593223731, + "grad_norm": 1.0873090028762817, + "learning_rate": 3.044060088449709e-06, + "loss": 0.3143, + "step": 31925 + }, + { + "epoch": 0.6391111778395016, + "grad_norm": 1.0573407411575317, + "learning_rate": 3.0437617434425605e-06, + "loss": 0.331, + "step": 31926 + }, + { + "epoch": 0.6391311963566298, + "grad_norm": 2.0585267543792725, + "learning_rate": 3.0434634066590143e-06, + "loss": 0.7624, + "step": 31927 + }, + { + "epoch": 0.6391512148737583, + "grad_norm": 1.1663846969604492, + "learning_rate": 3.0431650781003273e-06, + "loss": 0.3253, + "step": 31928 + }, + { + "epoch": 0.6391712333908866, + "grad_norm": 1.0763161182403564, + "learning_rate": 3.042866757767755e-06, + "loss": 0.2961, + "step": 31929 + }, + { + "epoch": 0.639191251908015, + "grad_norm": 1.1201494932174683, + "learning_rate": 3.0425684456625493e-06, + "loss": 0.3222, + "step": 31930 + }, + { + "epoch": 0.6392112704251433, + "grad_norm": 1.273363471031189, + "learning_rate": 3.0422701417859624e-06, + "loss": 0.299, + "step": 31931 + }, + { + "epoch": 0.6392312889422715, + "grad_norm": 1.055376410484314, + "learning_rate": 3.0419718461392535e-06, + "loss": 0.291, + "step": 31932 + }, + { + "epoch": 0.6392513074594, + "grad_norm": 1.9136874675750732, + "learning_rate": 3.0416735587236724e-06, + "loss": 0.7503, + "step": 31933 + }, + { + "epoch": 0.6392713259765282, + "grad_norm": 1.1237839460372925, + "learning_rate": 3.041375279540474e-06, + "loss": 0.2871, + "step": 31934 + }, + { + "epoch": 0.6392913444936567, + "grad_norm": 1.0983355045318604, + "learning_rate": 3.0410770085909126e-06, + "loss": 0.2999, + "step": 31935 + }, + { + "epoch": 0.639311363010785, + "grad_norm": 1.112512469291687, + "learning_rate": 3.0407787458762404e-06, + "loss": 0.3178, + "step": 31936 + }, + { + "epoch": 0.6393313815279134, + "grad_norm": 1.100070834159851, + "learning_rate": 3.040480491397715e-06, + "loss": 0.2515, + "step": 31937 + }, + { + "epoch": 0.6393514000450417, + "grad_norm": 1.866222858428955, + "learning_rate": 3.0401822451565876e-06, + "loss": 0.7643, + "step": 31938 + }, + { + "epoch": 0.6393714185621701, + "grad_norm": 1.1887503862380981, + "learning_rate": 3.0398840071541126e-06, + "loss": 0.3518, + "step": 31939 + }, + { + "epoch": 0.6393914370792984, + "grad_norm": 1.2833397388458252, + "learning_rate": 3.039585777391541e-06, + "loss": 0.3807, + "step": 31940 + }, + { + "epoch": 0.6394114555964266, + "grad_norm": 1.0953876972198486, + "learning_rate": 3.03928755587013e-06, + "loss": 0.2965, + "step": 31941 + }, + { + "epoch": 0.639431474113555, + "grad_norm": 1.1378164291381836, + "learning_rate": 3.0389893425911334e-06, + "loss": 0.3212, + "step": 31942 + }, + { + "epoch": 0.6394514926306833, + "grad_norm": 1.806835651397705, + "learning_rate": 3.0386911375558037e-06, + "loss": 0.7089, + "step": 31943 + }, + { + "epoch": 0.6394715111478118, + "grad_norm": 1.8429770469665527, + "learning_rate": 3.038392940765394e-06, + "loss": 0.816, + "step": 31944 + }, + { + "epoch": 0.63949152966494, + "grad_norm": 1.0549111366271973, + "learning_rate": 3.0380947522211567e-06, + "loss": 0.2953, + "step": 31945 + }, + { + "epoch": 0.6395115481820685, + "grad_norm": 1.166246771812439, + "learning_rate": 3.0377965719243483e-06, + "loss": 0.3152, + "step": 31946 + }, + { + "epoch": 0.6395315666991968, + "grad_norm": 1.1742331981658936, + "learning_rate": 3.03749839987622e-06, + "loss": 0.3615, + "step": 31947 + }, + { + "epoch": 0.639551585216325, + "grad_norm": 2.05253267288208, + "learning_rate": 3.0372002360780273e-06, + "loss": 0.7212, + "step": 31948 + }, + { + "epoch": 0.6395716037334535, + "grad_norm": 1.1857084035873413, + "learning_rate": 3.0369020805310202e-06, + "loss": 0.3, + "step": 31949 + }, + { + "epoch": 0.6395916222505817, + "grad_norm": 1.0946531295776367, + "learning_rate": 3.036603933236456e-06, + "loss": 0.2967, + "step": 31950 + }, + { + "epoch": 0.6396116407677102, + "grad_norm": 1.1493197679519653, + "learning_rate": 3.036305794195587e-06, + "loss": 0.3162, + "step": 31951 + }, + { + "epoch": 0.6396316592848384, + "grad_norm": 1.0991379022598267, + "learning_rate": 3.036007663409666e-06, + "loss": 0.2735, + "step": 31952 + }, + { + "epoch": 0.6396516778019669, + "grad_norm": 1.211625099182129, + "learning_rate": 3.0357095408799443e-06, + "loss": 0.3573, + "step": 31953 + }, + { + "epoch": 0.6396716963190952, + "grad_norm": 1.0557372570037842, + "learning_rate": 3.035411426607676e-06, + "loss": 0.3352, + "step": 31954 + }, + { + "epoch": 0.6396917148362236, + "grad_norm": 1.0718663930892944, + "learning_rate": 3.0351133205941174e-06, + "loss": 0.2884, + "step": 31955 + }, + { + "epoch": 0.6397117333533519, + "grad_norm": 1.2925915718078613, + "learning_rate": 3.03481522284052e-06, + "loss": 0.313, + "step": 31956 + }, + { + "epoch": 0.6397317518704801, + "grad_norm": 0.9997523427009583, + "learning_rate": 3.0345171333481366e-06, + "loss": 0.2687, + "step": 31957 + }, + { + "epoch": 0.6397517703876086, + "grad_norm": 1.1779779195785522, + "learning_rate": 3.034219052118218e-06, + "loss": 0.3231, + "step": 31958 + }, + { + "epoch": 0.6397717889047368, + "grad_norm": 1.1458362340927124, + "learning_rate": 3.0339209791520207e-06, + "loss": 0.2665, + "step": 31959 + }, + { + "epoch": 0.6397918074218653, + "grad_norm": 1.2624821662902832, + "learning_rate": 3.033622914450797e-06, + "loss": 0.2972, + "step": 31960 + }, + { + "epoch": 0.6398118259389936, + "grad_norm": 1.0554850101470947, + "learning_rate": 3.0333248580157993e-06, + "loss": 0.2937, + "step": 31961 + }, + { + "epoch": 0.639831844456122, + "grad_norm": 1.8906660079956055, + "learning_rate": 3.0330268098482783e-06, + "loss": 0.7195, + "step": 31962 + }, + { + "epoch": 0.6398518629732503, + "grad_norm": 1.1093858480453491, + "learning_rate": 3.0327287699494924e-06, + "loss": 0.3213, + "step": 31963 + }, + { + "epoch": 0.6398718814903785, + "grad_norm": 1.2928955554962158, + "learning_rate": 3.0324307383206908e-06, + "loss": 0.2971, + "step": 31964 + }, + { + "epoch": 0.639891900007507, + "grad_norm": 1.0450388193130493, + "learning_rate": 3.032132714963127e-06, + "loss": 0.3259, + "step": 31965 + }, + { + "epoch": 0.6399119185246352, + "grad_norm": 1.3996708393096924, + "learning_rate": 3.0318346998780523e-06, + "loss": 0.2663, + "step": 31966 + }, + { + "epoch": 0.6399319370417637, + "grad_norm": 1.8583797216415405, + "learning_rate": 3.0315366930667205e-06, + "loss": 0.7974, + "step": 31967 + }, + { + "epoch": 0.639951955558892, + "grad_norm": 1.1278597116470337, + "learning_rate": 3.0312386945303873e-06, + "loss": 0.3085, + "step": 31968 + }, + { + "epoch": 0.6399719740760204, + "grad_norm": 1.1116950511932373, + "learning_rate": 3.030940704270302e-06, + "loss": 0.3594, + "step": 31969 + }, + { + "epoch": 0.6399919925931487, + "grad_norm": 1.0017412900924683, + "learning_rate": 3.030642722287719e-06, + "loss": 0.319, + "step": 31970 + }, + { + "epoch": 0.6400120111102771, + "grad_norm": 1.262420415878296, + "learning_rate": 3.030344748583888e-06, + "loss": 0.3139, + "step": 31971 + }, + { + "epoch": 0.6400320296274054, + "grad_norm": 1.3006008863449097, + "learning_rate": 3.0300467831600657e-06, + "loss": 0.3487, + "step": 31972 + }, + { + "epoch": 0.6400520481445336, + "grad_norm": 1.0469510555267334, + "learning_rate": 3.0297488260175012e-06, + "loss": 0.2937, + "step": 31973 + }, + { + "epoch": 0.640072066661662, + "grad_norm": 1.060044527053833, + "learning_rate": 3.0294508771574503e-06, + "loss": 0.2847, + "step": 31974 + }, + { + "epoch": 0.6400920851787903, + "grad_norm": 1.1329143047332764, + "learning_rate": 3.0291529365811634e-06, + "loss": 0.3452, + "step": 31975 + }, + { + "epoch": 0.6401121036959188, + "grad_norm": 1.0289608240127563, + "learning_rate": 3.028855004289892e-06, + "loss": 0.2922, + "step": 31976 + }, + { + "epoch": 0.640132122213047, + "grad_norm": 1.1355582475662231, + "learning_rate": 3.0285570802848918e-06, + "loss": 0.2442, + "step": 31977 + }, + { + "epoch": 0.6401521407301755, + "grad_norm": 1.7901661396026611, + "learning_rate": 3.028259164567413e-06, + "loss": 0.8031, + "step": 31978 + }, + { + "epoch": 0.6401721592473038, + "grad_norm": 1.1234241724014282, + "learning_rate": 3.027961257138708e-06, + "loss": 0.2914, + "step": 31979 + }, + { + "epoch": 0.640192177764432, + "grad_norm": 1.0479696989059448, + "learning_rate": 3.027663358000028e-06, + "loss": 0.3344, + "step": 31980 + }, + { + "epoch": 0.6402121962815605, + "grad_norm": 1.1255611181259155, + "learning_rate": 3.0273654671526288e-06, + "loss": 0.2861, + "step": 31981 + }, + { + "epoch": 0.6402322147986887, + "grad_norm": 1.063361406326294, + "learning_rate": 3.027067584597761e-06, + "loss": 0.307, + "step": 31982 + }, + { + "epoch": 0.6402522333158172, + "grad_norm": 1.086194396018982, + "learning_rate": 3.026769710336676e-06, + "loss": 0.3058, + "step": 31983 + }, + { + "epoch": 0.6402722518329454, + "grad_norm": 1.0993144512176514, + "learning_rate": 3.0264718443706257e-06, + "loss": 0.2828, + "step": 31984 + }, + { + "epoch": 0.6402922703500739, + "grad_norm": 1.1736963987350464, + "learning_rate": 3.0261739867008632e-06, + "loss": 0.2847, + "step": 31985 + }, + { + "epoch": 0.6403122888672022, + "grad_norm": 1.118239164352417, + "learning_rate": 3.0258761373286406e-06, + "loss": 0.3034, + "step": 31986 + }, + { + "epoch": 0.6403323073843306, + "grad_norm": 0.9940183758735657, + "learning_rate": 3.0255782962552105e-06, + "loss": 0.2819, + "step": 31987 + }, + { + "epoch": 0.6403523259014589, + "grad_norm": 1.0648208856582642, + "learning_rate": 3.0252804634818246e-06, + "loss": 0.3493, + "step": 31988 + }, + { + "epoch": 0.6403723444185871, + "grad_norm": 1.1120597124099731, + "learning_rate": 3.0249826390097325e-06, + "loss": 0.2831, + "step": 31989 + }, + { + "epoch": 0.6403923629357156, + "grad_norm": 1.143214464187622, + "learning_rate": 3.0246848228401904e-06, + "loss": 0.2737, + "step": 31990 + }, + { + "epoch": 0.6404123814528438, + "grad_norm": 1.173500657081604, + "learning_rate": 3.024387014974448e-06, + "loss": 0.2943, + "step": 31991 + }, + { + "epoch": 0.6404323999699723, + "grad_norm": 1.038306713104248, + "learning_rate": 3.024089215413757e-06, + "loss": 0.2965, + "step": 31992 + }, + { + "epoch": 0.6404524184871005, + "grad_norm": 0.9766188859939575, + "learning_rate": 3.0237914241593684e-06, + "loss": 0.3037, + "step": 31993 + }, + { + "epoch": 0.640472437004229, + "grad_norm": 1.2059985399246216, + "learning_rate": 3.0234936412125376e-06, + "loss": 0.3653, + "step": 31994 + }, + { + "epoch": 0.6404924555213573, + "grad_norm": 1.175851821899414, + "learning_rate": 3.0231958665745142e-06, + "loss": 0.3148, + "step": 31995 + }, + { + "epoch": 0.6405124740384855, + "grad_norm": 1.1216301918029785, + "learning_rate": 3.0228981002465495e-06, + "loss": 0.3162, + "step": 31996 + }, + { + "epoch": 0.640532492555614, + "grad_norm": 1.110757827758789, + "learning_rate": 3.022600342229895e-06, + "loss": 0.2923, + "step": 31997 + }, + { + "epoch": 0.6405525110727422, + "grad_norm": 1.13767409324646, + "learning_rate": 3.0223025925258033e-06, + "loss": 0.342, + "step": 31998 + }, + { + "epoch": 0.6405725295898707, + "grad_norm": 0.9634610414505005, + "learning_rate": 3.022004851135525e-06, + "loss": 0.2725, + "step": 31999 + }, + { + "epoch": 0.640592548106999, + "grad_norm": 1.109014630317688, + "learning_rate": 3.0217071180603147e-06, + "loss": 0.2778, + "step": 32000 + }, + { + "epoch": 0.6406125666241274, + "grad_norm": 1.2069337368011475, + "learning_rate": 3.021409393301421e-06, + "loss": 0.3611, + "step": 32001 + }, + { + "epoch": 0.6406325851412557, + "grad_norm": 1.157606601715088, + "learning_rate": 3.0211116768600956e-06, + "loss": 0.3077, + "step": 32002 + }, + { + "epoch": 0.6406526036583841, + "grad_norm": 1.056445598602295, + "learning_rate": 3.020813968737592e-06, + "loss": 0.2831, + "step": 32003 + }, + { + "epoch": 0.6406726221755124, + "grad_norm": 1.0293865203857422, + "learning_rate": 3.0205162689351606e-06, + "loss": 0.2994, + "step": 32004 + }, + { + "epoch": 0.6406926406926406, + "grad_norm": 1.8718469142913818, + "learning_rate": 3.0202185774540523e-06, + "loss": 0.7665, + "step": 32005 + }, + { + "epoch": 0.640712659209769, + "grad_norm": 1.195779800415039, + "learning_rate": 3.0199208942955195e-06, + "loss": 0.3366, + "step": 32006 + }, + { + "epoch": 0.6407326777268973, + "grad_norm": 1.1052186489105225, + "learning_rate": 3.0196232194608112e-06, + "loss": 0.3023, + "step": 32007 + }, + { + "epoch": 0.6407526962440258, + "grad_norm": 1.216978669166565, + "learning_rate": 3.0193255529511833e-06, + "loss": 0.3117, + "step": 32008 + }, + { + "epoch": 0.640772714761154, + "grad_norm": 1.064012050628662, + "learning_rate": 3.0190278947678836e-06, + "loss": 0.3221, + "step": 32009 + }, + { + "epoch": 0.6407927332782825, + "grad_norm": 1.0803699493408203, + "learning_rate": 3.0187302449121634e-06, + "loss": 0.309, + "step": 32010 + }, + { + "epoch": 0.6408127517954108, + "grad_norm": 1.12117600440979, + "learning_rate": 3.0184326033852763e-06, + "loss": 0.2976, + "step": 32011 + }, + { + "epoch": 0.640832770312539, + "grad_norm": 1.08783757686615, + "learning_rate": 3.0181349701884703e-06, + "loss": 0.3067, + "step": 32012 + }, + { + "epoch": 0.6408527888296675, + "grad_norm": 1.884904146194458, + "learning_rate": 3.0178373453230003e-06, + "loss": 0.7367, + "step": 32013 + }, + { + "epoch": 0.6408728073467957, + "grad_norm": 1.1350128650665283, + "learning_rate": 3.0175397287901153e-06, + "loss": 0.2754, + "step": 32014 + }, + { + "epoch": 0.6408928258639242, + "grad_norm": 1.1903660297393799, + "learning_rate": 3.0172421205910672e-06, + "loss": 0.2941, + "step": 32015 + }, + { + "epoch": 0.6409128443810524, + "grad_norm": 1.0969711542129517, + "learning_rate": 3.016944520727104e-06, + "loss": 0.3001, + "step": 32016 + }, + { + "epoch": 0.6409328628981809, + "grad_norm": 1.1146806478500366, + "learning_rate": 3.0166469291994817e-06, + "loss": 0.2826, + "step": 32017 + }, + { + "epoch": 0.6409528814153092, + "grad_norm": 1.0352146625518799, + "learning_rate": 3.0163493460094473e-06, + "loss": 0.2714, + "step": 32018 + }, + { + "epoch": 0.6409728999324376, + "grad_norm": 1.2180362939834595, + "learning_rate": 3.0160517711582544e-06, + "loss": 0.3093, + "step": 32019 + }, + { + "epoch": 0.6409929184495659, + "grad_norm": 1.268877387046814, + "learning_rate": 3.0157542046471513e-06, + "loss": 0.2892, + "step": 32020 + }, + { + "epoch": 0.6410129369666941, + "grad_norm": 1.0336546897888184, + "learning_rate": 3.0154566464773923e-06, + "loss": 0.3086, + "step": 32021 + }, + { + "epoch": 0.6410329554838226, + "grad_norm": 1.0107882022857666, + "learning_rate": 3.0151590966502263e-06, + "loss": 0.2711, + "step": 32022 + }, + { + "epoch": 0.6410529740009508, + "grad_norm": 1.1480557918548584, + "learning_rate": 3.0148615551669037e-06, + "loss": 0.3138, + "step": 32023 + }, + { + "epoch": 0.6410729925180793, + "grad_norm": 1.0758105516433716, + "learning_rate": 3.0145640220286755e-06, + "loss": 0.302, + "step": 32024 + }, + { + "epoch": 0.6410930110352075, + "grad_norm": 1.0371769666671753, + "learning_rate": 3.014266497236793e-06, + "loss": 0.2808, + "step": 32025 + }, + { + "epoch": 0.641113029552336, + "grad_norm": 1.0825202465057373, + "learning_rate": 3.0139689807925077e-06, + "loss": 0.3027, + "step": 32026 + }, + { + "epoch": 0.6411330480694643, + "grad_norm": 1.19426691532135, + "learning_rate": 3.01367147269707e-06, + "loss": 0.3063, + "step": 32027 + }, + { + "epoch": 0.6411530665865925, + "grad_norm": 1.0844818353652954, + "learning_rate": 3.01337397295173e-06, + "loss": 0.3385, + "step": 32028 + }, + { + "epoch": 0.641173085103721, + "grad_norm": 1.1419333219528198, + "learning_rate": 3.0130764815577353e-06, + "loss": 0.2831, + "step": 32029 + }, + { + "epoch": 0.6411931036208492, + "grad_norm": 1.1759591102600098, + "learning_rate": 3.0127789985163424e-06, + "loss": 0.2968, + "step": 32030 + }, + { + "epoch": 0.6412131221379777, + "grad_norm": 1.2594956159591675, + "learning_rate": 3.0124815238287976e-06, + "loss": 0.2962, + "step": 32031 + }, + { + "epoch": 0.641233140655106, + "grad_norm": 1.1557939052581787, + "learning_rate": 3.0121840574963535e-06, + "loss": 0.2779, + "step": 32032 + }, + { + "epoch": 0.6412531591722344, + "grad_norm": 1.7744258642196655, + "learning_rate": 3.0118865995202583e-06, + "loss": 0.3801, + "step": 32033 + }, + { + "epoch": 0.6412731776893627, + "grad_norm": 1.0831060409545898, + "learning_rate": 3.0115891499017657e-06, + "loss": 0.281, + "step": 32034 + }, + { + "epoch": 0.6412931962064911, + "grad_norm": 0.9891027808189392, + "learning_rate": 3.0112917086421245e-06, + "loss": 0.2664, + "step": 32035 + }, + { + "epoch": 0.6413132147236194, + "grad_norm": 1.1176646947860718, + "learning_rate": 3.0109942757425838e-06, + "loss": 0.3091, + "step": 32036 + }, + { + "epoch": 0.6413332332407476, + "grad_norm": 1.172684907913208, + "learning_rate": 3.0106968512043966e-06, + "loss": 0.3232, + "step": 32037 + }, + { + "epoch": 0.641353251757876, + "grad_norm": 1.1578232049942017, + "learning_rate": 3.010399435028809e-06, + "loss": 0.3034, + "step": 32038 + }, + { + "epoch": 0.6413732702750043, + "grad_norm": 1.1053853034973145, + "learning_rate": 3.010102027217077e-06, + "loss": 0.2703, + "step": 32039 + }, + { + "epoch": 0.6413932887921328, + "grad_norm": 1.0868372917175293, + "learning_rate": 3.0098046277704467e-06, + "loss": 0.3088, + "step": 32040 + }, + { + "epoch": 0.641413307309261, + "grad_norm": 1.0636942386627197, + "learning_rate": 3.0095072366901702e-06, + "loss": 0.3197, + "step": 32041 + }, + { + "epoch": 0.6414333258263895, + "grad_norm": 1.1182485818862915, + "learning_rate": 3.009209853977495e-06, + "loss": 0.3362, + "step": 32042 + }, + { + "epoch": 0.6414533443435178, + "grad_norm": 1.0637894868850708, + "learning_rate": 3.0089124796336744e-06, + "loss": 0.2639, + "step": 32043 + }, + { + "epoch": 0.641473362860646, + "grad_norm": 1.0281744003295898, + "learning_rate": 3.008615113659957e-06, + "loss": 0.2978, + "step": 32044 + }, + { + "epoch": 0.6414933813777745, + "grad_norm": 1.0761560201644897, + "learning_rate": 3.0083177560575937e-06, + "loss": 0.3132, + "step": 32045 + }, + { + "epoch": 0.6415133998949027, + "grad_norm": 1.046415090560913, + "learning_rate": 3.0080204068278324e-06, + "loss": 0.2658, + "step": 32046 + }, + { + "epoch": 0.6415334184120312, + "grad_norm": 1.283575177192688, + "learning_rate": 3.007723065971926e-06, + "loss": 0.3225, + "step": 32047 + }, + { + "epoch": 0.6415534369291594, + "grad_norm": 1.16232168674469, + "learning_rate": 3.007425733491124e-06, + "loss": 0.2915, + "step": 32048 + }, + { + "epoch": 0.6415734554462879, + "grad_norm": 1.768571376800537, + "learning_rate": 3.0071284093866735e-06, + "loss": 0.8268, + "step": 32049 + }, + { + "epoch": 0.6415934739634161, + "grad_norm": 1.2208868265151978, + "learning_rate": 3.006831093659828e-06, + "loss": 0.3503, + "step": 32050 + }, + { + "epoch": 0.6416134924805446, + "grad_norm": 1.2500650882720947, + "learning_rate": 3.0065337863118325e-06, + "loss": 0.3807, + "step": 32051 + }, + { + "epoch": 0.6416335109976729, + "grad_norm": 1.115777850151062, + "learning_rate": 3.0062364873439433e-06, + "loss": 0.3309, + "step": 32052 + }, + { + "epoch": 0.6416535295148011, + "grad_norm": 1.1461279392242432, + "learning_rate": 3.005939196757406e-06, + "loss": 0.2759, + "step": 32053 + }, + { + "epoch": 0.6416735480319296, + "grad_norm": 1.0373162031173706, + "learning_rate": 3.0056419145534715e-06, + "loss": 0.2958, + "step": 32054 + }, + { + "epoch": 0.6416935665490578, + "grad_norm": 1.2079033851623535, + "learning_rate": 3.0053446407333874e-06, + "loss": 0.2949, + "step": 32055 + }, + { + "epoch": 0.6417135850661863, + "grad_norm": 1.1143174171447754, + "learning_rate": 3.0050473752984068e-06, + "loss": 0.2809, + "step": 32056 + }, + { + "epoch": 0.6417336035833145, + "grad_norm": 1.972199559211731, + "learning_rate": 3.004750118249777e-06, + "loss": 0.7069, + "step": 32057 + }, + { + "epoch": 0.641753622100443, + "grad_norm": 1.214874029159546, + "learning_rate": 3.0044528695887487e-06, + "loss": 0.3148, + "step": 32058 + }, + { + "epoch": 0.6417736406175713, + "grad_norm": 1.1402593851089478, + "learning_rate": 3.004155629316572e-06, + "loss": 0.3533, + "step": 32059 + }, + { + "epoch": 0.6417936591346995, + "grad_norm": 1.0233330726623535, + "learning_rate": 3.003858397434493e-06, + "loss": 0.2798, + "step": 32060 + }, + { + "epoch": 0.641813677651828, + "grad_norm": 1.0369656085968018, + "learning_rate": 3.003561173943766e-06, + "loss": 0.353, + "step": 32061 + }, + { + "epoch": 0.6418336961689562, + "grad_norm": 1.1191176176071167, + "learning_rate": 3.003263958845637e-06, + "loss": 0.2611, + "step": 32062 + }, + { + "epoch": 0.6418537146860847, + "grad_norm": 0.9402889013290405, + "learning_rate": 3.0029667521413575e-06, + "loss": 0.2309, + "step": 32063 + }, + { + "epoch": 0.641873733203213, + "grad_norm": 1.9635651111602783, + "learning_rate": 3.002669553832174e-06, + "loss": 0.7579, + "step": 32064 + }, + { + "epoch": 0.6418937517203414, + "grad_norm": 1.1826531887054443, + "learning_rate": 3.0023723639193404e-06, + "loss": 0.3317, + "step": 32065 + }, + { + "epoch": 0.6419137702374696, + "grad_norm": 1.2161616086959839, + "learning_rate": 3.002075182404103e-06, + "loss": 0.3285, + "step": 32066 + }, + { + "epoch": 0.6419337887545981, + "grad_norm": 1.1562644243240356, + "learning_rate": 3.0017780092877112e-06, + "loss": 0.3666, + "step": 32067 + }, + { + "epoch": 0.6419538072717264, + "grad_norm": 1.8348983526229858, + "learning_rate": 3.0014808445714143e-06, + "loss": 0.7641, + "step": 32068 + }, + { + "epoch": 0.6419738257888546, + "grad_norm": 2.058581829071045, + "learning_rate": 3.0011836882564615e-06, + "loss": 0.8281, + "step": 32069 + }, + { + "epoch": 0.641993844305983, + "grad_norm": 1.000672698020935, + "learning_rate": 3.0008865403441023e-06, + "loss": 0.269, + "step": 32070 + }, + { + "epoch": 0.6420138628231113, + "grad_norm": 1.0761839151382446, + "learning_rate": 3.000589400835587e-06, + "loss": 0.3054, + "step": 32071 + }, + { + "epoch": 0.6420338813402398, + "grad_norm": 1.1676526069641113, + "learning_rate": 3.000292269732164e-06, + "loss": 0.3014, + "step": 32072 + }, + { + "epoch": 0.642053899857368, + "grad_norm": 1.1237472295761108, + "learning_rate": 2.99999514703508e-06, + "loss": 0.283, + "step": 32073 + }, + { + "epoch": 0.6420739183744965, + "grad_norm": 1.1930670738220215, + "learning_rate": 2.999698032745587e-06, + "loss": 0.3069, + "step": 32074 + }, + { + "epoch": 0.6420939368916248, + "grad_norm": 1.0778355598449707, + "learning_rate": 2.9994009268649327e-06, + "loss": 0.3139, + "step": 32075 + }, + { + "epoch": 0.642113955408753, + "grad_norm": 1.1346595287322998, + "learning_rate": 2.9991038293943676e-06, + "loss": 0.2409, + "step": 32076 + }, + { + "epoch": 0.6421339739258815, + "grad_norm": 1.9759677648544312, + "learning_rate": 2.998806740335136e-06, + "loss": 0.7459, + "step": 32077 + }, + { + "epoch": 0.6421539924430097, + "grad_norm": 1.128904104232788, + "learning_rate": 2.998509659688494e-06, + "loss": 0.335, + "step": 32078 + }, + { + "epoch": 0.6421740109601382, + "grad_norm": 0.9264213442802429, + "learning_rate": 2.9982125874556855e-06, + "loss": 0.2679, + "step": 32079 + }, + { + "epoch": 0.6421940294772664, + "grad_norm": 1.067220687866211, + "learning_rate": 2.997915523637961e-06, + "loss": 0.287, + "step": 32080 + }, + { + "epoch": 0.6422140479943949, + "grad_norm": 1.0364389419555664, + "learning_rate": 2.997618468236568e-06, + "loss": 0.3332, + "step": 32081 + }, + { + "epoch": 0.6422340665115231, + "grad_norm": 1.1052229404449463, + "learning_rate": 2.9973214212527557e-06, + "loss": 0.3311, + "step": 32082 + }, + { + "epoch": 0.6422540850286516, + "grad_norm": 1.2089141607284546, + "learning_rate": 2.9970243826877734e-06, + "loss": 0.3031, + "step": 32083 + }, + { + "epoch": 0.6422741035457799, + "grad_norm": 1.0842407941818237, + "learning_rate": 2.99672735254287e-06, + "loss": 0.3228, + "step": 32084 + }, + { + "epoch": 0.6422941220629081, + "grad_norm": 1.0565834045410156, + "learning_rate": 2.996430330819294e-06, + "loss": 0.2718, + "step": 32085 + }, + { + "epoch": 0.6423141405800366, + "grad_norm": 1.0476725101470947, + "learning_rate": 2.9961333175182915e-06, + "loss": 0.2648, + "step": 32086 + }, + { + "epoch": 0.6423341590971648, + "grad_norm": 1.1208847761154175, + "learning_rate": 2.995836312641116e-06, + "loss": 0.295, + "step": 32087 + }, + { + "epoch": 0.6423541776142933, + "grad_norm": 1.1713930368423462, + "learning_rate": 2.995539316189011e-06, + "loss": 0.2923, + "step": 32088 + }, + { + "epoch": 0.6423741961314215, + "grad_norm": 1.033287763595581, + "learning_rate": 2.99524232816323e-06, + "loss": 0.293, + "step": 32089 + }, + { + "epoch": 0.64239421464855, + "grad_norm": 1.8164039850234985, + "learning_rate": 2.994945348565018e-06, + "loss": 0.8139, + "step": 32090 + }, + { + "epoch": 0.6424142331656783, + "grad_norm": 1.1679654121398926, + "learning_rate": 2.9946483773956225e-06, + "loss": 0.2636, + "step": 32091 + }, + { + "epoch": 0.6424342516828065, + "grad_norm": 1.9803959131240845, + "learning_rate": 2.9943514146562953e-06, + "loss": 0.7704, + "step": 32092 + }, + { + "epoch": 0.642454270199935, + "grad_norm": 1.273884892463684, + "learning_rate": 2.9940544603482837e-06, + "loss": 0.3408, + "step": 32093 + }, + { + "epoch": 0.6424742887170632, + "grad_norm": 1.120457410812378, + "learning_rate": 2.9937575144728336e-06, + "loss": 0.2807, + "step": 32094 + }, + { + "epoch": 0.6424943072341917, + "grad_norm": 1.1490873098373413, + "learning_rate": 2.9934605770311952e-06, + "loss": 0.2956, + "step": 32095 + }, + { + "epoch": 0.64251432575132, + "grad_norm": 1.1113619804382324, + "learning_rate": 2.993163648024617e-06, + "loss": 0.295, + "step": 32096 + }, + { + "epoch": 0.6425343442684484, + "grad_norm": 1.0833182334899902, + "learning_rate": 2.9928667274543488e-06, + "loss": 0.2661, + "step": 32097 + }, + { + "epoch": 0.6425543627855766, + "grad_norm": 1.045855164527893, + "learning_rate": 2.9925698153216364e-06, + "loss": 0.3172, + "step": 32098 + }, + { + "epoch": 0.642574381302705, + "grad_norm": 1.2596367597579956, + "learning_rate": 2.9922729116277283e-06, + "loss": 0.2988, + "step": 32099 + }, + { + "epoch": 0.6425943998198334, + "grad_norm": 1.0791949033737183, + "learning_rate": 2.9919760163738707e-06, + "loss": 0.2611, + "step": 32100 + }, + { + "epoch": 0.6426144183369616, + "grad_norm": 1.0710517168045044, + "learning_rate": 2.9916791295613144e-06, + "loss": 0.2975, + "step": 32101 + }, + { + "epoch": 0.64263443685409, + "grad_norm": 1.067251205444336, + "learning_rate": 2.9913822511913087e-06, + "loss": 0.2883, + "step": 32102 + }, + { + "epoch": 0.6426544553712183, + "grad_norm": 1.0587531328201294, + "learning_rate": 2.9910853812650996e-06, + "loss": 0.32, + "step": 32103 + }, + { + "epoch": 0.6426744738883468, + "grad_norm": 1.085802674293518, + "learning_rate": 2.990788519783933e-06, + "loss": 0.3254, + "step": 32104 + }, + { + "epoch": 0.642694492405475, + "grad_norm": 1.381629467010498, + "learning_rate": 2.9904916667490614e-06, + "loss": 0.2753, + "step": 32105 + }, + { + "epoch": 0.6427145109226035, + "grad_norm": 1.0910422801971436, + "learning_rate": 2.9901948221617298e-06, + "loss": 0.2974, + "step": 32106 + }, + { + "epoch": 0.6427345294397317, + "grad_norm": 0.9949861168861389, + "learning_rate": 2.989897986023186e-06, + "loss": 0.2859, + "step": 32107 + }, + { + "epoch": 0.64275454795686, + "grad_norm": 1.1785298585891724, + "learning_rate": 2.9896011583346783e-06, + "loss": 0.3043, + "step": 32108 + }, + { + "epoch": 0.6427745664739885, + "grad_norm": 1.7247096300125122, + "learning_rate": 2.989304339097455e-06, + "loss": 0.7597, + "step": 32109 + }, + { + "epoch": 0.6427945849911167, + "grad_norm": 1.0855180025100708, + "learning_rate": 2.989007528312765e-06, + "loss": 0.3288, + "step": 32110 + }, + { + "epoch": 0.6428146035082452, + "grad_norm": 1.1112538576126099, + "learning_rate": 2.9887107259818536e-06, + "loss": 0.3487, + "step": 32111 + }, + { + "epoch": 0.6428346220253734, + "grad_norm": 1.2503491640090942, + "learning_rate": 2.9884139321059704e-06, + "loss": 0.3093, + "step": 32112 + }, + { + "epoch": 0.6428546405425019, + "grad_norm": 1.2246118783950806, + "learning_rate": 2.988117146686359e-06, + "loss": 0.2824, + "step": 32113 + }, + { + "epoch": 0.6428746590596301, + "grad_norm": 1.1609079837799072, + "learning_rate": 2.9878203697242717e-06, + "loss": 0.335, + "step": 32114 + }, + { + "epoch": 0.6428946775767586, + "grad_norm": 1.1419589519500732, + "learning_rate": 2.987523601220956e-06, + "loss": 0.3093, + "step": 32115 + }, + { + "epoch": 0.6429146960938869, + "grad_norm": 1.0416898727416992, + "learning_rate": 2.9872268411776573e-06, + "loss": 0.2833, + "step": 32116 + }, + { + "epoch": 0.6429347146110151, + "grad_norm": 1.3036624193191528, + "learning_rate": 2.986930089595622e-06, + "loss": 0.3217, + "step": 32117 + }, + { + "epoch": 0.6429547331281436, + "grad_norm": 0.9929806590080261, + "learning_rate": 2.986633346476102e-06, + "loss": 0.3024, + "step": 32118 + }, + { + "epoch": 0.6429747516452718, + "grad_norm": 1.115264654159546, + "learning_rate": 2.9863366118203413e-06, + "loss": 0.3102, + "step": 32119 + }, + { + "epoch": 0.6429947701624003, + "grad_norm": 1.0621572732925415, + "learning_rate": 2.9860398856295873e-06, + "loss": 0.2916, + "step": 32120 + }, + { + "epoch": 0.6430147886795285, + "grad_norm": 1.1267154216766357, + "learning_rate": 2.9857431679050895e-06, + "loss": 0.2825, + "step": 32121 + }, + { + "epoch": 0.643034807196657, + "grad_norm": 1.057045817375183, + "learning_rate": 2.985446458648092e-06, + "loss": 0.2775, + "step": 32122 + }, + { + "epoch": 0.6430548257137852, + "grad_norm": 1.1185038089752197, + "learning_rate": 2.9851497578598456e-06, + "loss": 0.2997, + "step": 32123 + }, + { + "epoch": 0.6430748442309135, + "grad_norm": 1.044604778289795, + "learning_rate": 2.9848530655415966e-06, + "loss": 0.3002, + "step": 32124 + }, + { + "epoch": 0.643094862748042, + "grad_norm": 1.1894018650054932, + "learning_rate": 2.984556381694591e-06, + "loss": 0.3353, + "step": 32125 + }, + { + "epoch": 0.6431148812651702, + "grad_norm": 1.0160619020462036, + "learning_rate": 2.9842597063200752e-06, + "loss": 0.2396, + "step": 32126 + }, + { + "epoch": 0.6431348997822987, + "grad_norm": 1.1058449745178223, + "learning_rate": 2.9839630394192987e-06, + "loss": 0.2898, + "step": 32127 + }, + { + "epoch": 0.643154918299427, + "grad_norm": 1.071923017501831, + "learning_rate": 2.9836663809935086e-06, + "loss": 0.3007, + "step": 32128 + }, + { + "epoch": 0.6431749368165554, + "grad_norm": 1.0399631261825562, + "learning_rate": 2.9833697310439514e-06, + "loss": 0.3492, + "step": 32129 + }, + { + "epoch": 0.6431949553336836, + "grad_norm": 1.1910592317581177, + "learning_rate": 2.983073089571873e-06, + "loss": 0.303, + "step": 32130 + }, + { + "epoch": 0.643214973850812, + "grad_norm": 1.1188746690750122, + "learning_rate": 2.98277645657852e-06, + "loss": 0.3056, + "step": 32131 + }, + { + "epoch": 0.6432349923679404, + "grad_norm": 1.112234115600586, + "learning_rate": 2.9824798320651427e-06, + "loss": 0.3317, + "step": 32132 + }, + { + "epoch": 0.6432550108850686, + "grad_norm": 1.1351382732391357, + "learning_rate": 2.9821832160329844e-06, + "loss": 0.3362, + "step": 32133 + }, + { + "epoch": 0.643275029402197, + "grad_norm": 1.0980620384216309, + "learning_rate": 2.9818866084832953e-06, + "loss": 0.3123, + "step": 32134 + }, + { + "epoch": 0.6432950479193253, + "grad_norm": 1.8956152200698853, + "learning_rate": 2.9815900094173183e-06, + "loss": 0.3291, + "step": 32135 + }, + { + "epoch": 0.6433150664364538, + "grad_norm": 1.0697550773620605, + "learning_rate": 2.9812934188363046e-06, + "loss": 0.2975, + "step": 32136 + }, + { + "epoch": 0.643335084953582, + "grad_norm": 0.9416620135307312, + "learning_rate": 2.980996836741499e-06, + "loss": 0.2696, + "step": 32137 + }, + { + "epoch": 0.6433551034707105, + "grad_norm": 1.089624047279358, + "learning_rate": 2.9807002631341487e-06, + "loss": 0.2892, + "step": 32138 + }, + { + "epoch": 0.6433751219878387, + "grad_norm": 1.171801209449768, + "learning_rate": 2.9804036980154973e-06, + "loss": 0.3437, + "step": 32139 + }, + { + "epoch": 0.643395140504967, + "grad_norm": 1.1540251970291138, + "learning_rate": 2.9801071413867942e-06, + "loss": 0.3169, + "step": 32140 + }, + { + "epoch": 0.6434151590220955, + "grad_norm": 1.0177228450775146, + "learning_rate": 2.979810593249288e-06, + "loss": 0.3072, + "step": 32141 + }, + { + "epoch": 0.6434351775392237, + "grad_norm": 1.084865689277649, + "learning_rate": 2.979514053604224e-06, + "loss": 0.3131, + "step": 32142 + }, + { + "epoch": 0.6434551960563522, + "grad_norm": 1.9403820037841797, + "learning_rate": 2.9792175224528476e-06, + "loss": 0.8103, + "step": 32143 + }, + { + "epoch": 0.6434752145734804, + "grad_norm": 1.0573921203613281, + "learning_rate": 2.978920999796403e-06, + "loss": 0.3272, + "step": 32144 + }, + { + "epoch": 0.6434952330906089, + "grad_norm": 1.0924797058105469, + "learning_rate": 2.9786244856361424e-06, + "loss": 0.3122, + "step": 32145 + }, + { + "epoch": 0.6435152516077371, + "grad_norm": 2.021610975265503, + "learning_rate": 2.978327979973309e-06, + "loss": 0.7959, + "step": 32146 + }, + { + "epoch": 0.6435352701248656, + "grad_norm": 1.1027168035507202, + "learning_rate": 2.9780314828091496e-06, + "loss": 0.3586, + "step": 32147 + }, + { + "epoch": 0.6435552886419939, + "grad_norm": 1.0765856504440308, + "learning_rate": 2.977734994144909e-06, + "loss": 0.2832, + "step": 32148 + }, + { + "epoch": 0.6435753071591221, + "grad_norm": 1.0914264917373657, + "learning_rate": 2.9774385139818373e-06, + "loss": 0.2776, + "step": 32149 + }, + { + "epoch": 0.6435953256762506, + "grad_norm": 1.193921685218811, + "learning_rate": 2.9771420423211793e-06, + "loss": 0.281, + "step": 32150 + }, + { + "epoch": 0.6436153441933788, + "grad_norm": 1.159522533416748, + "learning_rate": 2.9768455791641803e-06, + "loss": 0.2789, + "step": 32151 + }, + { + "epoch": 0.6436353627105073, + "grad_norm": 1.2656735181808472, + "learning_rate": 2.9765491245120857e-06, + "loss": 0.2862, + "step": 32152 + }, + { + "epoch": 0.6436553812276355, + "grad_norm": 1.015657663345337, + "learning_rate": 2.9762526783661434e-06, + "loss": 0.2696, + "step": 32153 + }, + { + "epoch": 0.643675399744764, + "grad_norm": 2.005350112915039, + "learning_rate": 2.9759562407276005e-06, + "loss": 0.7739, + "step": 32154 + }, + { + "epoch": 0.6436954182618922, + "grad_norm": 1.0542508363723755, + "learning_rate": 2.975659811597702e-06, + "loss": 0.2583, + "step": 32155 + }, + { + "epoch": 0.6437154367790205, + "grad_norm": 1.1108298301696777, + "learning_rate": 2.975363390977694e-06, + "loss": 0.3106, + "step": 32156 + }, + { + "epoch": 0.643735455296149, + "grad_norm": 2.034895420074463, + "learning_rate": 2.9750669788688207e-06, + "loss": 0.7711, + "step": 32157 + }, + { + "epoch": 0.6437554738132772, + "grad_norm": 1.091898798942566, + "learning_rate": 2.974770575272332e-06, + "loss": 0.2863, + "step": 32158 + }, + { + "epoch": 0.6437754923304057, + "grad_norm": 0.9904435276985168, + "learning_rate": 2.9744741801894715e-06, + "loss": 0.2925, + "step": 32159 + }, + { + "epoch": 0.643795510847534, + "grad_norm": 1.1112085580825806, + "learning_rate": 2.974177793621486e-06, + "loss": 0.3062, + "step": 32160 + }, + { + "epoch": 0.6438155293646624, + "grad_norm": 1.8809528350830078, + "learning_rate": 2.9738814155696217e-06, + "loss": 0.8061, + "step": 32161 + }, + { + "epoch": 0.6438355478817906, + "grad_norm": 1.1442806720733643, + "learning_rate": 2.9735850460351213e-06, + "loss": 0.2799, + "step": 32162 + }, + { + "epoch": 0.643855566398919, + "grad_norm": 0.9945605993270874, + "learning_rate": 2.9732886850192356e-06, + "loss": 0.2428, + "step": 32163 + }, + { + "epoch": 0.6438755849160473, + "grad_norm": 1.4929040670394897, + "learning_rate": 2.972992332523208e-06, + "loss": 0.2837, + "step": 32164 + }, + { + "epoch": 0.6438956034331756, + "grad_norm": 1.1005939245224, + "learning_rate": 2.972695988548283e-06, + "loss": 0.3277, + "step": 32165 + }, + { + "epoch": 0.643915621950304, + "grad_norm": 1.030277967453003, + "learning_rate": 2.972399653095707e-06, + "loss": 0.2949, + "step": 32166 + }, + { + "epoch": 0.6439356404674323, + "grad_norm": 1.9129174947738647, + "learning_rate": 2.9721033261667286e-06, + "loss": 0.8807, + "step": 32167 + }, + { + "epoch": 0.6439556589845608, + "grad_norm": 1.1576074361801147, + "learning_rate": 2.9718070077625916e-06, + "loss": 0.3, + "step": 32168 + }, + { + "epoch": 0.643975677501689, + "grad_norm": 1.1920785903930664, + "learning_rate": 2.9715106978845417e-06, + "loss": 0.2848, + "step": 32169 + }, + { + "epoch": 0.6439956960188175, + "grad_norm": 1.9345206022262573, + "learning_rate": 2.9712143965338215e-06, + "loss": 0.7759, + "step": 32170 + }, + { + "epoch": 0.6440157145359457, + "grad_norm": 1.024033784866333, + "learning_rate": 2.970918103711682e-06, + "loss": 0.2479, + "step": 32171 + }, + { + "epoch": 0.644035733053074, + "grad_norm": 1.0713014602661133, + "learning_rate": 2.9706218194193648e-06, + "loss": 0.2915, + "step": 32172 + }, + { + "epoch": 0.6440557515702025, + "grad_norm": 1.1458923816680908, + "learning_rate": 2.970325543658118e-06, + "loss": 0.28, + "step": 32173 + }, + { + "epoch": 0.6440757700873307, + "grad_norm": 1.1674610376358032, + "learning_rate": 2.970029276429186e-06, + "loss": 0.3337, + "step": 32174 + }, + { + "epoch": 0.6440957886044592, + "grad_norm": 1.0876349210739136, + "learning_rate": 2.9697330177338113e-06, + "loss": 0.3012, + "step": 32175 + }, + { + "epoch": 0.6441158071215874, + "grad_norm": 1.1381720304489136, + "learning_rate": 2.9694367675732448e-06, + "loss": 0.299, + "step": 32176 + }, + { + "epoch": 0.6441358256387159, + "grad_norm": 1.1129056215286255, + "learning_rate": 2.9691405259487294e-06, + "loss": 0.3056, + "step": 32177 + }, + { + "epoch": 0.6441558441558441, + "grad_norm": 1.192955493927002, + "learning_rate": 2.968844292861508e-06, + "loss": 0.2855, + "step": 32178 + }, + { + "epoch": 0.6441758626729726, + "grad_norm": 1.0045965909957886, + "learning_rate": 2.9685480683128282e-06, + "loss": 0.2717, + "step": 32179 + }, + { + "epoch": 0.6441958811901008, + "grad_norm": 1.113381266593933, + "learning_rate": 2.9682518523039365e-06, + "loss": 0.261, + "step": 32180 + }, + { + "epoch": 0.6442158997072291, + "grad_norm": 1.9013522863388062, + "learning_rate": 2.967955644836077e-06, + "loss": 0.6733, + "step": 32181 + }, + { + "epoch": 0.6442359182243576, + "grad_norm": 1.4075113534927368, + "learning_rate": 2.9676594459104945e-06, + "loss": 0.3172, + "step": 32182 + }, + { + "epoch": 0.6442559367414858, + "grad_norm": 1.2372655868530273, + "learning_rate": 2.9673632555284338e-06, + "loss": 0.3421, + "step": 32183 + }, + { + "epoch": 0.6442759552586143, + "grad_norm": 1.1442207098007202, + "learning_rate": 2.96706707369114e-06, + "loss": 0.3191, + "step": 32184 + }, + { + "epoch": 0.6442959737757425, + "grad_norm": 1.0819320678710938, + "learning_rate": 2.966770900399859e-06, + "loss": 0.2936, + "step": 32185 + }, + { + "epoch": 0.644315992292871, + "grad_norm": 1.2135199308395386, + "learning_rate": 2.9664747356558367e-06, + "loss": 0.3161, + "step": 32186 + }, + { + "epoch": 0.6443360108099992, + "grad_norm": 1.1927671432495117, + "learning_rate": 2.966178579460317e-06, + "loss": 0.3318, + "step": 32187 + }, + { + "epoch": 0.6443560293271275, + "grad_norm": 0.974310576915741, + "learning_rate": 2.965882431814543e-06, + "loss": 0.3027, + "step": 32188 + }, + { + "epoch": 0.644376047844256, + "grad_norm": 1.0210909843444824, + "learning_rate": 2.965586292719763e-06, + "loss": 0.2648, + "step": 32189 + }, + { + "epoch": 0.6443960663613842, + "grad_norm": 1.0669407844543457, + "learning_rate": 2.965290162177221e-06, + "loss": 0.3222, + "step": 32190 + }, + { + "epoch": 0.6444160848785127, + "grad_norm": 1.07253897190094, + "learning_rate": 2.9649940401881594e-06, + "loss": 0.2592, + "step": 32191 + }, + { + "epoch": 0.644436103395641, + "grad_norm": 1.3647569417953491, + "learning_rate": 2.9646979267538247e-06, + "loss": 0.3139, + "step": 32192 + }, + { + "epoch": 0.6444561219127694, + "grad_norm": 1.236367106437683, + "learning_rate": 2.9644018218754633e-06, + "loss": 0.29, + "step": 32193 + }, + { + "epoch": 0.6444761404298976, + "grad_norm": 1.707810401916504, + "learning_rate": 2.964105725554319e-06, + "loss": 0.3071, + "step": 32194 + }, + { + "epoch": 0.644496158947026, + "grad_norm": 1.1641780138015747, + "learning_rate": 2.9638096377916357e-06, + "loss": 0.2995, + "step": 32195 + }, + { + "epoch": 0.6445161774641543, + "grad_norm": 2.0989394187927246, + "learning_rate": 2.963513558588658e-06, + "loss": 0.745, + "step": 32196 + }, + { + "epoch": 0.6445361959812826, + "grad_norm": 1.135055422782898, + "learning_rate": 2.963217487946631e-06, + "loss": 0.3109, + "step": 32197 + }, + { + "epoch": 0.644556214498411, + "grad_norm": 1.1688443422317505, + "learning_rate": 2.9629214258667995e-06, + "loss": 0.3313, + "step": 32198 + }, + { + "epoch": 0.6445762330155393, + "grad_norm": 1.3166216611862183, + "learning_rate": 2.9626253723504086e-06, + "loss": 0.3095, + "step": 32199 + }, + { + "epoch": 0.6445962515326678, + "grad_norm": 1.3285499811172485, + "learning_rate": 2.962329327398702e-06, + "loss": 0.3072, + "step": 32200 + }, + { + "epoch": 0.644616270049796, + "grad_norm": 0.938417911529541, + "learning_rate": 2.9620332910129223e-06, + "loss": 0.272, + "step": 32201 + }, + { + "epoch": 0.6446362885669245, + "grad_norm": 1.1549570560455322, + "learning_rate": 2.961737263194319e-06, + "loss": 0.2776, + "step": 32202 + }, + { + "epoch": 0.6446563070840527, + "grad_norm": 1.0737009048461914, + "learning_rate": 2.961441243944133e-06, + "loss": 0.2762, + "step": 32203 + }, + { + "epoch": 0.644676325601181, + "grad_norm": 1.8056789636611938, + "learning_rate": 2.961145233263608e-06, + "loss": 0.7422, + "step": 32204 + }, + { + "epoch": 0.6446963441183095, + "grad_norm": 1.144308090209961, + "learning_rate": 2.9608492311539915e-06, + "loss": 0.3014, + "step": 32205 + }, + { + "epoch": 0.6447163626354377, + "grad_norm": 1.0053949356079102, + "learning_rate": 2.9605532376165237e-06, + "loss": 0.3226, + "step": 32206 + }, + { + "epoch": 0.6447363811525662, + "grad_norm": 1.0444037914276123, + "learning_rate": 2.9602572526524527e-06, + "loss": 0.2879, + "step": 32207 + }, + { + "epoch": 0.6447563996696944, + "grad_norm": 1.1644253730773926, + "learning_rate": 2.9599612762630213e-06, + "loss": 0.3474, + "step": 32208 + }, + { + "epoch": 0.6447764181868229, + "grad_norm": 1.0374245643615723, + "learning_rate": 2.959665308449473e-06, + "loss": 0.3081, + "step": 32209 + }, + { + "epoch": 0.6447964367039511, + "grad_norm": 1.0858594179153442, + "learning_rate": 2.9593693492130526e-06, + "loss": 0.3284, + "step": 32210 + }, + { + "epoch": 0.6448164552210794, + "grad_norm": 1.1538569927215576, + "learning_rate": 2.959073398555005e-06, + "loss": 0.3105, + "step": 32211 + }, + { + "epoch": 0.6448364737382078, + "grad_norm": 1.9204593896865845, + "learning_rate": 2.9587774564765733e-06, + "loss": 0.7141, + "step": 32212 + }, + { + "epoch": 0.6448564922553361, + "grad_norm": 1.1734659671783447, + "learning_rate": 2.958481522979002e-06, + "loss": 0.2864, + "step": 32213 + }, + { + "epoch": 0.6448765107724646, + "grad_norm": 1.0467146635055542, + "learning_rate": 2.958185598063536e-06, + "loss": 0.3356, + "step": 32214 + }, + { + "epoch": 0.6448965292895928, + "grad_norm": 1.242315411567688, + "learning_rate": 2.957889681731416e-06, + "loss": 0.3038, + "step": 32215 + }, + { + "epoch": 0.6449165478067213, + "grad_norm": 1.1789921522140503, + "learning_rate": 2.95759377398389e-06, + "loss": 0.3166, + "step": 32216 + }, + { + "epoch": 0.6449365663238495, + "grad_norm": 1.1674150228500366, + "learning_rate": 2.9572978748221994e-06, + "loss": 0.2624, + "step": 32217 + }, + { + "epoch": 0.644956584840978, + "grad_norm": 1.0948249101638794, + "learning_rate": 2.95700198424759e-06, + "loss": 0.3316, + "step": 32218 + }, + { + "epoch": 0.6449766033581062, + "grad_norm": 1.126405954360962, + "learning_rate": 2.9567061022613024e-06, + "loss": 0.3246, + "step": 32219 + }, + { + "epoch": 0.6449966218752345, + "grad_norm": 2.070828437805176, + "learning_rate": 2.9564102288645848e-06, + "loss": 0.7529, + "step": 32220 + }, + { + "epoch": 0.645016640392363, + "grad_norm": 1.235565185546875, + "learning_rate": 2.9561143640586786e-06, + "loss": 0.318, + "step": 32221 + }, + { + "epoch": 0.6450366589094912, + "grad_norm": 1.1815271377563477, + "learning_rate": 2.9558185078448277e-06, + "loss": 0.2781, + "step": 32222 + }, + { + "epoch": 0.6450566774266197, + "grad_norm": 1.317863941192627, + "learning_rate": 2.955522660224275e-06, + "loss": 0.3435, + "step": 32223 + }, + { + "epoch": 0.645076695943748, + "grad_norm": 1.1305553913116455, + "learning_rate": 2.955226821198265e-06, + "loss": 0.3273, + "step": 32224 + }, + { + "epoch": 0.6450967144608764, + "grad_norm": 1.0310382843017578, + "learning_rate": 2.9549309907680423e-06, + "loss": 0.2875, + "step": 32225 + }, + { + "epoch": 0.6451167329780046, + "grad_norm": 1.1309787034988403, + "learning_rate": 2.9546351689348505e-06, + "loss": 0.2886, + "step": 32226 + }, + { + "epoch": 0.6451367514951329, + "grad_norm": 1.081810474395752, + "learning_rate": 2.9543393556999317e-06, + "loss": 0.3175, + "step": 32227 + }, + { + "epoch": 0.6451567700122613, + "grad_norm": 1.1837742328643799, + "learning_rate": 2.9540435510645284e-06, + "loss": 0.3121, + "step": 32228 + }, + { + "epoch": 0.6451767885293896, + "grad_norm": 1.1133148670196533, + "learning_rate": 2.9537477550298866e-06, + "loss": 0.278, + "step": 32229 + }, + { + "epoch": 0.645196807046518, + "grad_norm": 1.8852527141571045, + "learning_rate": 2.953451967597249e-06, + "loss": 0.8129, + "step": 32230 + }, + { + "epoch": 0.6452168255636463, + "grad_norm": 1.0979013442993164, + "learning_rate": 2.9531561887678593e-06, + "loss": 0.3009, + "step": 32231 + }, + { + "epoch": 0.6452368440807748, + "grad_norm": 0.9444642663002014, + "learning_rate": 2.9528604185429586e-06, + "loss": 0.2911, + "step": 32232 + }, + { + "epoch": 0.645256862597903, + "grad_norm": 2.0217630863189697, + "learning_rate": 2.952564656923794e-06, + "loss": 0.8248, + "step": 32233 + }, + { + "epoch": 0.6452768811150315, + "grad_norm": 1.1305919885635376, + "learning_rate": 2.9522689039116082e-06, + "loss": 0.288, + "step": 32234 + }, + { + "epoch": 0.6452968996321597, + "grad_norm": 1.104451298713684, + "learning_rate": 2.951973159507641e-06, + "loss": 0.3496, + "step": 32235 + }, + { + "epoch": 0.645316918149288, + "grad_norm": 1.7999180555343628, + "learning_rate": 2.9516774237131395e-06, + "loss": 0.7924, + "step": 32236 + }, + { + "epoch": 0.6453369366664164, + "grad_norm": 1.1438485383987427, + "learning_rate": 2.951381696529343e-06, + "loss": 0.2717, + "step": 32237 + }, + { + "epoch": 0.6453569551835447, + "grad_norm": 1.0235705375671387, + "learning_rate": 2.9510859779574986e-06, + "loss": 0.3075, + "step": 32238 + }, + { + "epoch": 0.6453769737006732, + "grad_norm": 1.100440502166748, + "learning_rate": 2.950790267998849e-06, + "loss": 0.2997, + "step": 32239 + }, + { + "epoch": 0.6453969922178014, + "grad_norm": 1.060618281364441, + "learning_rate": 2.950494566654635e-06, + "loss": 0.3016, + "step": 32240 + }, + { + "epoch": 0.6454170107349299, + "grad_norm": 1.0456393957138062, + "learning_rate": 2.950198873926099e-06, + "loss": 0.288, + "step": 32241 + }, + { + "epoch": 0.6454370292520581, + "grad_norm": 1.02915358543396, + "learning_rate": 2.949903189814488e-06, + "loss": 0.2617, + "step": 32242 + }, + { + "epoch": 0.6454570477691864, + "grad_norm": 1.1375837326049805, + "learning_rate": 2.949607514321041e-06, + "loss": 0.2755, + "step": 32243 + }, + { + "epoch": 0.6454770662863148, + "grad_norm": 1.226022720336914, + "learning_rate": 2.949311847447005e-06, + "loss": 0.3252, + "step": 32244 + }, + { + "epoch": 0.6454970848034431, + "grad_norm": 1.0395087003707886, + "learning_rate": 2.9490161891936197e-06, + "loss": 0.2245, + "step": 32245 + }, + { + "epoch": 0.6455171033205716, + "grad_norm": 1.040291428565979, + "learning_rate": 2.948720539562128e-06, + "loss": 0.3533, + "step": 32246 + }, + { + "epoch": 0.6455371218376998, + "grad_norm": 1.154281735420227, + "learning_rate": 2.948424898553775e-06, + "loss": 0.3494, + "step": 32247 + }, + { + "epoch": 0.6455571403548283, + "grad_norm": 1.1889517307281494, + "learning_rate": 2.948129266169801e-06, + "loss": 0.306, + "step": 32248 + }, + { + "epoch": 0.6455771588719565, + "grad_norm": 1.8976150751113892, + "learning_rate": 2.9478336424114516e-06, + "loss": 0.7797, + "step": 32249 + }, + { + "epoch": 0.645597177389085, + "grad_norm": 1.0941201448440552, + "learning_rate": 2.9475380272799657e-06, + "loss": 0.2885, + "step": 32250 + }, + { + "epoch": 0.6456171959062132, + "grad_norm": 1.1358048915863037, + "learning_rate": 2.9472424207765904e-06, + "loss": 0.3219, + "step": 32251 + }, + { + "epoch": 0.6456372144233415, + "grad_norm": 1.1750108003616333, + "learning_rate": 2.946946822902566e-06, + "loss": 0.3294, + "step": 32252 + }, + { + "epoch": 0.64565723294047, + "grad_norm": 1.2031010389328003, + "learning_rate": 2.9466512336591357e-06, + "loss": 0.3285, + "step": 32253 + }, + { + "epoch": 0.6456772514575982, + "grad_norm": 1.139003038406372, + "learning_rate": 2.946355653047539e-06, + "loss": 0.388, + "step": 32254 + }, + { + "epoch": 0.6456972699747267, + "grad_norm": 1.1514217853546143, + "learning_rate": 2.946060081069024e-06, + "loss": 0.2876, + "step": 32255 + }, + { + "epoch": 0.645717288491855, + "grad_norm": 1.2195838689804077, + "learning_rate": 2.9457645177248283e-06, + "loss": 0.3186, + "step": 32256 + }, + { + "epoch": 0.6457373070089834, + "grad_norm": 1.202133059501648, + "learning_rate": 2.945468963016199e-06, + "loss": 0.3041, + "step": 32257 + }, + { + "epoch": 0.6457573255261116, + "grad_norm": 1.1860244274139404, + "learning_rate": 2.9451734169443747e-06, + "loss": 0.2705, + "step": 32258 + }, + { + "epoch": 0.6457773440432399, + "grad_norm": 1.0485285520553589, + "learning_rate": 2.944877879510598e-06, + "loss": 0.2651, + "step": 32259 + }, + { + "epoch": 0.6457973625603683, + "grad_norm": 1.126037359237671, + "learning_rate": 2.944582350716114e-06, + "loss": 0.3141, + "step": 32260 + }, + { + "epoch": 0.6458173810774966, + "grad_norm": 1.073491096496582, + "learning_rate": 2.9442868305621624e-06, + "loss": 0.283, + "step": 32261 + }, + { + "epoch": 0.645837399594625, + "grad_norm": 1.2053028345108032, + "learning_rate": 2.943991319049988e-06, + "loss": 0.3304, + "step": 32262 + }, + { + "epoch": 0.6458574181117533, + "grad_norm": 1.269612431526184, + "learning_rate": 2.943695816180829e-06, + "loss": 0.3134, + "step": 32263 + }, + { + "epoch": 0.6458774366288818, + "grad_norm": 1.7701817750930786, + "learning_rate": 2.9434003219559324e-06, + "loss": 0.7922, + "step": 32264 + }, + { + "epoch": 0.64589745514601, + "grad_norm": 1.0524730682373047, + "learning_rate": 2.943104836376539e-06, + "loss": 0.3056, + "step": 32265 + }, + { + "epoch": 0.6459174736631385, + "grad_norm": 1.1998881101608276, + "learning_rate": 2.94280935944389e-06, + "loss": 0.3472, + "step": 32266 + }, + { + "epoch": 0.6459374921802667, + "grad_norm": 1.9253672361373901, + "learning_rate": 2.942513891159226e-06, + "loss": 0.7227, + "step": 32267 + }, + { + "epoch": 0.645957510697395, + "grad_norm": 1.7019081115722656, + "learning_rate": 2.9422184315237912e-06, + "loss": 0.7088, + "step": 32268 + }, + { + "epoch": 0.6459775292145234, + "grad_norm": 1.117653250694275, + "learning_rate": 2.941922980538828e-06, + "loss": 0.3289, + "step": 32269 + }, + { + "epoch": 0.6459975477316517, + "grad_norm": 1.1335645914077759, + "learning_rate": 2.9416275382055775e-06, + "loss": 0.3172, + "step": 32270 + }, + { + "epoch": 0.6460175662487802, + "grad_norm": 1.0365058183670044, + "learning_rate": 2.941332104525283e-06, + "loss": 0.2797, + "step": 32271 + }, + { + "epoch": 0.6460375847659084, + "grad_norm": 1.1530084609985352, + "learning_rate": 2.9410366794991824e-06, + "loss": 0.3188, + "step": 32272 + }, + { + "epoch": 0.6460576032830369, + "grad_norm": 1.117572546005249, + "learning_rate": 2.9407412631285228e-06, + "loss": 0.2619, + "step": 32273 + }, + { + "epoch": 0.6460776218001651, + "grad_norm": 1.2627809047698975, + "learning_rate": 2.940445855414543e-06, + "loss": 0.3099, + "step": 32274 + }, + { + "epoch": 0.6460976403172934, + "grad_norm": 1.077522873878479, + "learning_rate": 2.940150456358486e-06, + "loss": 0.2942, + "step": 32275 + }, + { + "epoch": 0.6461176588344218, + "grad_norm": 1.096114158630371, + "learning_rate": 2.9398550659615937e-06, + "loss": 0.3291, + "step": 32276 + }, + { + "epoch": 0.6461376773515501, + "grad_norm": 1.1961240768432617, + "learning_rate": 2.939559684225105e-06, + "loss": 0.3252, + "step": 32277 + }, + { + "epoch": 0.6461576958686785, + "grad_norm": 1.1439307928085327, + "learning_rate": 2.939264311150267e-06, + "loss": 0.3446, + "step": 32278 + }, + { + "epoch": 0.6461777143858068, + "grad_norm": 0.9740327000617981, + "learning_rate": 2.938968946738317e-06, + "loss": 0.2692, + "step": 32279 + }, + { + "epoch": 0.6461977329029353, + "grad_norm": 1.9411952495574951, + "learning_rate": 2.938673590990498e-06, + "loss": 0.7038, + "step": 32280 + }, + { + "epoch": 0.6462177514200635, + "grad_norm": 1.1131565570831299, + "learning_rate": 2.938378243908051e-06, + "loss": 0.2828, + "step": 32281 + }, + { + "epoch": 0.646237769937192, + "grad_norm": 1.0797851085662842, + "learning_rate": 2.9380829054922185e-06, + "loss": 0.3094, + "step": 32282 + }, + { + "epoch": 0.6462577884543202, + "grad_norm": 1.0193020105361938, + "learning_rate": 2.9377875757442425e-06, + "loss": 0.3104, + "step": 32283 + }, + { + "epoch": 0.6462778069714485, + "grad_norm": 1.0188120603561401, + "learning_rate": 2.9374922546653638e-06, + "loss": 0.2986, + "step": 32284 + }, + { + "epoch": 0.646297825488577, + "grad_norm": 1.9221030473709106, + "learning_rate": 2.937196942256822e-06, + "loss": 0.7359, + "step": 32285 + }, + { + "epoch": 0.6463178440057052, + "grad_norm": 1.1468514204025269, + "learning_rate": 2.9369016385198622e-06, + "loss": 0.3082, + "step": 32286 + }, + { + "epoch": 0.6463378625228337, + "grad_norm": 1.1022528409957886, + "learning_rate": 2.936606343455723e-06, + "loss": 0.3096, + "step": 32287 + }, + { + "epoch": 0.646357881039962, + "grad_norm": 1.0647331476211548, + "learning_rate": 2.936311057065648e-06, + "loss": 0.2981, + "step": 32288 + }, + { + "epoch": 0.6463778995570904, + "grad_norm": 1.1589598655700684, + "learning_rate": 2.9360157793508763e-06, + "loss": 0.3211, + "step": 32289 + }, + { + "epoch": 0.6463979180742186, + "grad_norm": 1.8961261510849, + "learning_rate": 2.9357205103126485e-06, + "loss": 0.7597, + "step": 32290 + }, + { + "epoch": 0.6464179365913469, + "grad_norm": 1.085012435913086, + "learning_rate": 2.9354252499522098e-06, + "loss": 0.3087, + "step": 32291 + }, + { + "epoch": 0.6464379551084753, + "grad_norm": 1.0603007078170776, + "learning_rate": 2.9351299982707992e-06, + "loss": 0.3158, + "step": 32292 + }, + { + "epoch": 0.6464579736256036, + "grad_norm": 1.2788035869598389, + "learning_rate": 2.934834755269656e-06, + "loss": 0.2725, + "step": 32293 + }, + { + "epoch": 0.646477992142732, + "grad_norm": 1.1687082052230835, + "learning_rate": 2.934539520950024e-06, + "loss": 0.3447, + "step": 32294 + }, + { + "epoch": 0.6464980106598603, + "grad_norm": 1.118001103401184, + "learning_rate": 2.9342442953131422e-06, + "loss": 0.2787, + "step": 32295 + }, + { + "epoch": 0.6465180291769888, + "grad_norm": 1.8040887117385864, + "learning_rate": 2.933949078360254e-06, + "loss": 0.7526, + "step": 32296 + }, + { + "epoch": 0.646538047694117, + "grad_norm": 1.8224966526031494, + "learning_rate": 2.9336538700926e-06, + "loss": 0.7837, + "step": 32297 + }, + { + "epoch": 0.6465580662112455, + "grad_norm": 1.8329511880874634, + "learning_rate": 2.9333586705114194e-06, + "loss": 0.7402, + "step": 32298 + }, + { + "epoch": 0.6465780847283737, + "grad_norm": 1.1288533210754395, + "learning_rate": 2.9330634796179534e-06, + "loss": 0.2616, + "step": 32299 + }, + { + "epoch": 0.646598103245502, + "grad_norm": 1.1323926448822021, + "learning_rate": 2.932768297413444e-06, + "loss": 0.3507, + "step": 32300 + }, + { + "epoch": 0.6466181217626304, + "grad_norm": 1.1673533916473389, + "learning_rate": 2.9324731238991334e-06, + "loss": 0.2784, + "step": 32301 + }, + { + "epoch": 0.6466381402797587, + "grad_norm": 1.1260954141616821, + "learning_rate": 2.9321779590762608e-06, + "loss": 0.2976, + "step": 32302 + }, + { + "epoch": 0.6466581587968872, + "grad_norm": 1.0859483480453491, + "learning_rate": 2.9318828029460646e-06, + "loss": 0.2966, + "step": 32303 + }, + { + "epoch": 0.6466781773140154, + "grad_norm": 1.1218128204345703, + "learning_rate": 2.93158765550979e-06, + "loss": 0.2961, + "step": 32304 + }, + { + "epoch": 0.6466981958311439, + "grad_norm": 1.103979229927063, + "learning_rate": 2.9312925167686762e-06, + "loss": 0.3335, + "step": 32305 + }, + { + "epoch": 0.6467182143482721, + "grad_norm": 1.081095576286316, + "learning_rate": 2.930997386723962e-06, + "loss": 0.2999, + "step": 32306 + }, + { + "epoch": 0.6467382328654004, + "grad_norm": 1.141709327697754, + "learning_rate": 2.9307022653768913e-06, + "loss": 0.2829, + "step": 32307 + }, + { + "epoch": 0.6467582513825288, + "grad_norm": 1.1590876579284668, + "learning_rate": 2.9304071527287004e-06, + "loss": 0.2874, + "step": 32308 + }, + { + "epoch": 0.6467782698996571, + "grad_norm": 1.0347399711608887, + "learning_rate": 2.930112048780634e-06, + "loss": 0.2611, + "step": 32309 + }, + { + "epoch": 0.6467982884167855, + "grad_norm": 1.0832241773605347, + "learning_rate": 2.9298169535339317e-06, + "loss": 0.3055, + "step": 32310 + }, + { + "epoch": 0.6468183069339138, + "grad_norm": 1.1377991437911987, + "learning_rate": 2.9295218669898317e-06, + "loss": 0.2933, + "step": 32311 + }, + { + "epoch": 0.6468383254510423, + "grad_norm": 1.291947603225708, + "learning_rate": 2.9292267891495775e-06, + "loss": 0.3224, + "step": 32312 + }, + { + "epoch": 0.6468583439681705, + "grad_norm": 1.0539798736572266, + "learning_rate": 2.9289317200144075e-06, + "loss": 0.3282, + "step": 32313 + }, + { + "epoch": 0.646878362485299, + "grad_norm": 1.067122459411621, + "learning_rate": 2.9286366595855635e-06, + "loss": 0.289, + "step": 32314 + }, + { + "epoch": 0.6468983810024272, + "grad_norm": 1.06624174118042, + "learning_rate": 2.9283416078642856e-06, + "loss": 0.2824, + "step": 32315 + }, + { + "epoch": 0.6469183995195555, + "grad_norm": 1.1371209621429443, + "learning_rate": 2.928046564851812e-06, + "loss": 0.3029, + "step": 32316 + }, + { + "epoch": 0.646938418036684, + "grad_norm": 0.9945608973503113, + "learning_rate": 2.9277515305493863e-06, + "loss": 0.2121, + "step": 32317 + }, + { + "epoch": 0.6469584365538122, + "grad_norm": 1.2673654556274414, + "learning_rate": 2.9274565049582472e-06, + "loss": 0.297, + "step": 32318 + }, + { + "epoch": 0.6469784550709407, + "grad_norm": 1.1324232816696167, + "learning_rate": 2.927161488079634e-06, + "loss": 0.2886, + "step": 32319 + }, + { + "epoch": 0.646998473588069, + "grad_norm": 1.146094799041748, + "learning_rate": 2.9268664799147896e-06, + "loss": 0.3167, + "step": 32320 + }, + { + "epoch": 0.6470184921051974, + "grad_norm": 1.0421223640441895, + "learning_rate": 2.926571480464949e-06, + "loss": 0.3307, + "step": 32321 + }, + { + "epoch": 0.6470385106223256, + "grad_norm": 1.0583585500717163, + "learning_rate": 2.9262764897313587e-06, + "loss": 0.2559, + "step": 32322 + }, + { + "epoch": 0.6470585291394539, + "grad_norm": 1.7666449546813965, + "learning_rate": 2.9259815077152553e-06, + "loss": 0.7781, + "step": 32323 + }, + { + "epoch": 0.6470785476565823, + "grad_norm": 0.9660688042640686, + "learning_rate": 2.9256865344178785e-06, + "loss": 0.2887, + "step": 32324 + }, + { + "epoch": 0.6470985661737106, + "grad_norm": 1.1542940139770508, + "learning_rate": 2.9253915698404695e-06, + "loss": 0.2996, + "step": 32325 + }, + { + "epoch": 0.647118584690839, + "grad_norm": 1.82645583152771, + "learning_rate": 2.925096613984267e-06, + "loss": 0.7836, + "step": 32326 + }, + { + "epoch": 0.6471386032079673, + "grad_norm": 0.988902747631073, + "learning_rate": 2.924801666850514e-06, + "loss": 0.2966, + "step": 32327 + }, + { + "epoch": 0.6471586217250958, + "grad_norm": 1.1518610715866089, + "learning_rate": 2.924506728440447e-06, + "loss": 0.2947, + "step": 32328 + }, + { + "epoch": 0.647178640242224, + "grad_norm": 1.828386664390564, + "learning_rate": 2.9242117987553074e-06, + "loss": 0.7918, + "step": 32329 + }, + { + "epoch": 0.6471986587593525, + "grad_norm": 1.088375210762024, + "learning_rate": 2.9239168777963336e-06, + "loss": 0.3119, + "step": 32330 + }, + { + "epoch": 0.6472186772764807, + "grad_norm": 1.8587185144424438, + "learning_rate": 2.9236219655647675e-06, + "loss": 0.7706, + "step": 32331 + }, + { + "epoch": 0.647238695793609, + "grad_norm": 1.134013295173645, + "learning_rate": 2.923327062061847e-06, + "loss": 0.2748, + "step": 32332 + }, + { + "epoch": 0.6472587143107374, + "grad_norm": 1.2561774253845215, + "learning_rate": 2.9230321672888142e-06, + "loss": 0.2591, + "step": 32333 + }, + { + "epoch": 0.6472787328278657, + "grad_norm": 1.11186945438385, + "learning_rate": 2.922737281246904e-06, + "loss": 0.2593, + "step": 32334 + }, + { + "epoch": 0.6472987513449941, + "grad_norm": 1.1050200462341309, + "learning_rate": 2.922442403937362e-06, + "loss": 0.2977, + "step": 32335 + }, + { + "epoch": 0.6473187698621224, + "grad_norm": 1.1226969957351685, + "learning_rate": 2.922147535361425e-06, + "loss": 0.3223, + "step": 32336 + }, + { + "epoch": 0.6473387883792509, + "grad_norm": 1.1775400638580322, + "learning_rate": 2.9218526755203314e-06, + "loss": 0.3021, + "step": 32337 + }, + { + "epoch": 0.6473588068963791, + "grad_norm": 1.1492644548416138, + "learning_rate": 2.921557824415322e-06, + "loss": 0.3439, + "step": 32338 + }, + { + "epoch": 0.6473788254135074, + "grad_norm": 1.060085415840149, + "learning_rate": 2.9212629820476368e-06, + "loss": 0.3021, + "step": 32339 + }, + { + "epoch": 0.6473988439306358, + "grad_norm": 1.301596760749817, + "learning_rate": 2.920968148418515e-06, + "loss": 0.2722, + "step": 32340 + }, + { + "epoch": 0.6474188624477641, + "grad_norm": 1.1102381944656372, + "learning_rate": 2.9206733235291954e-06, + "loss": 0.2855, + "step": 32341 + }, + { + "epoch": 0.6474388809648925, + "grad_norm": 1.123520016670227, + "learning_rate": 2.920378507380919e-06, + "loss": 0.3411, + "step": 32342 + }, + { + "epoch": 0.6474588994820208, + "grad_norm": 1.1456043720245361, + "learning_rate": 2.9200836999749206e-06, + "loss": 0.3409, + "step": 32343 + }, + { + "epoch": 0.6474789179991493, + "grad_norm": 1.0752264261245728, + "learning_rate": 2.919788901312445e-06, + "loss": 0.2944, + "step": 32344 + }, + { + "epoch": 0.6474989365162775, + "grad_norm": 1.0294004678726196, + "learning_rate": 2.9194941113947285e-06, + "loss": 0.3016, + "step": 32345 + }, + { + "epoch": 0.647518955033406, + "grad_norm": 1.1272382736206055, + "learning_rate": 2.9191993302230113e-06, + "loss": 0.3023, + "step": 32346 + }, + { + "epoch": 0.6475389735505342, + "grad_norm": 1.1700820922851562, + "learning_rate": 2.918904557798531e-06, + "loss": 0.3051, + "step": 32347 + }, + { + "epoch": 0.6475589920676625, + "grad_norm": 1.140030026435852, + "learning_rate": 2.91860979412253e-06, + "loss": 0.3417, + "step": 32348 + }, + { + "epoch": 0.647579010584791, + "grad_norm": 1.7479000091552734, + "learning_rate": 2.9183150391962458e-06, + "loss": 0.7472, + "step": 32349 + }, + { + "epoch": 0.6475990291019192, + "grad_norm": 1.0780198574066162, + "learning_rate": 2.9180202930209163e-06, + "loss": 0.2832, + "step": 32350 + }, + { + "epoch": 0.6476190476190476, + "grad_norm": 1.1156883239746094, + "learning_rate": 2.9177255555977817e-06, + "loss": 0.3408, + "step": 32351 + }, + { + "epoch": 0.647639066136176, + "grad_norm": 1.160779595375061, + "learning_rate": 2.917430826928079e-06, + "loss": 0.3093, + "step": 32352 + }, + { + "epoch": 0.6476590846533044, + "grad_norm": 1.9518400430679321, + "learning_rate": 2.917136107013051e-06, + "loss": 0.7346, + "step": 32353 + }, + { + "epoch": 0.6476791031704326, + "grad_norm": 1.0316615104675293, + "learning_rate": 2.9168413958539325e-06, + "loss": 0.2715, + "step": 32354 + }, + { + "epoch": 0.6476991216875609, + "grad_norm": 1.098733901977539, + "learning_rate": 2.9165466934519664e-06, + "loss": 0.2836, + "step": 32355 + }, + { + "epoch": 0.6477191402046893, + "grad_norm": 1.0897308588027954, + "learning_rate": 2.916251999808388e-06, + "loss": 0.3151, + "step": 32356 + }, + { + "epoch": 0.6477391587218176, + "grad_norm": 1.3587629795074463, + "learning_rate": 2.915957314924439e-06, + "loss": 0.3427, + "step": 32357 + }, + { + "epoch": 0.647759177238946, + "grad_norm": 1.0931676626205444, + "learning_rate": 2.9156626388013576e-06, + "loss": 0.2629, + "step": 32358 + }, + { + "epoch": 0.6477791957560743, + "grad_norm": 1.0160475969314575, + "learning_rate": 2.9153679714403814e-06, + "loss": 0.2779, + "step": 32359 + }, + { + "epoch": 0.6477992142732028, + "grad_norm": 1.0319632291793823, + "learning_rate": 2.91507331284275e-06, + "loss": 0.3088, + "step": 32360 + }, + { + "epoch": 0.647819232790331, + "grad_norm": 1.234950304031372, + "learning_rate": 2.9147786630096996e-06, + "loss": 0.3317, + "step": 32361 + }, + { + "epoch": 0.6478392513074595, + "grad_norm": 1.047251582145691, + "learning_rate": 2.914484021942473e-06, + "loss": 0.3267, + "step": 32362 + }, + { + "epoch": 0.6478592698245877, + "grad_norm": 1.060948371887207, + "learning_rate": 2.9141893896423076e-06, + "loss": 0.2876, + "step": 32363 + }, + { + "epoch": 0.647879288341716, + "grad_norm": 1.0504939556121826, + "learning_rate": 2.91389476611044e-06, + "loss": 0.3125, + "step": 32364 + }, + { + "epoch": 0.6478993068588444, + "grad_norm": 1.0919127464294434, + "learning_rate": 2.9136001513481083e-06, + "loss": 0.2788, + "step": 32365 + }, + { + "epoch": 0.6479193253759727, + "grad_norm": 1.1450223922729492, + "learning_rate": 2.913305545356554e-06, + "loss": 0.2277, + "step": 32366 + }, + { + "epoch": 0.6479393438931011, + "grad_norm": 2.056663751602173, + "learning_rate": 2.913010948137013e-06, + "loss": 0.8297, + "step": 32367 + }, + { + "epoch": 0.6479593624102294, + "grad_norm": 1.12708580493927, + "learning_rate": 2.912716359690726e-06, + "loss": 0.3042, + "step": 32368 + }, + { + "epoch": 0.6479793809273579, + "grad_norm": 1.8359804153442383, + "learning_rate": 2.912421780018929e-06, + "loss": 0.7506, + "step": 32369 + }, + { + "epoch": 0.6479993994444861, + "grad_norm": 1.1624157428741455, + "learning_rate": 2.9121272091228638e-06, + "loss": 0.3405, + "step": 32370 + }, + { + "epoch": 0.6480194179616144, + "grad_norm": 1.1089988946914673, + "learning_rate": 2.911832647003765e-06, + "loss": 0.2913, + "step": 32371 + }, + { + "epoch": 0.6480394364787428, + "grad_norm": 1.7156481742858887, + "learning_rate": 2.9115380936628736e-06, + "loss": 0.7001, + "step": 32372 + }, + { + "epoch": 0.6480594549958711, + "grad_norm": 1.2154899835586548, + "learning_rate": 2.9112435491014268e-06, + "loss": 0.297, + "step": 32373 + }, + { + "epoch": 0.6480794735129995, + "grad_norm": 1.9994088411331177, + "learning_rate": 2.9109490133206598e-06, + "loss": 0.7651, + "step": 32374 + }, + { + "epoch": 0.6480994920301278, + "grad_norm": 1.152612328529358, + "learning_rate": 2.9106544863218156e-06, + "loss": 0.2928, + "step": 32375 + }, + { + "epoch": 0.6481195105472563, + "grad_norm": 1.2074964046478271, + "learning_rate": 2.9103599681061313e-06, + "loss": 0.3224, + "step": 32376 + }, + { + "epoch": 0.6481395290643845, + "grad_norm": 1.1907182931900024, + "learning_rate": 2.910065458674843e-06, + "loss": 0.2784, + "step": 32377 + }, + { + "epoch": 0.648159547581513, + "grad_norm": 1.4430656433105469, + "learning_rate": 2.909770958029188e-06, + "loss": 0.2745, + "step": 32378 + }, + { + "epoch": 0.6481795660986412, + "grad_norm": 1.0551525354385376, + "learning_rate": 2.9094764661704084e-06, + "loss": 0.3158, + "step": 32379 + }, + { + "epoch": 0.6481995846157695, + "grad_norm": 1.1419482231140137, + "learning_rate": 2.9091819830997377e-06, + "loss": 0.2829, + "step": 32380 + }, + { + "epoch": 0.648219603132898, + "grad_norm": 1.0720911026000977, + "learning_rate": 2.9088875088184174e-06, + "loss": 0.3574, + "step": 32381 + }, + { + "epoch": 0.6482396216500262, + "grad_norm": 1.2393771409988403, + "learning_rate": 2.9085930433276847e-06, + "loss": 0.2989, + "step": 32382 + }, + { + "epoch": 0.6482596401671546, + "grad_norm": 1.1302464008331299, + "learning_rate": 2.908298586628775e-06, + "loss": 0.3268, + "step": 32383 + }, + { + "epoch": 0.648279658684283, + "grad_norm": 1.128382682800293, + "learning_rate": 2.908004138722929e-06, + "loss": 0.3598, + "step": 32384 + }, + { + "epoch": 0.6482996772014114, + "grad_norm": 1.1171941757202148, + "learning_rate": 2.907709699611384e-06, + "loss": 0.2868, + "step": 32385 + }, + { + "epoch": 0.6483196957185396, + "grad_norm": 1.2390168905258179, + "learning_rate": 2.907415269295377e-06, + "loss": 0.3348, + "step": 32386 + }, + { + "epoch": 0.6483397142356679, + "grad_norm": 1.1118875741958618, + "learning_rate": 2.9071208477761435e-06, + "loss": 0.3226, + "step": 32387 + }, + { + "epoch": 0.6483597327527963, + "grad_norm": 1.147885799407959, + "learning_rate": 2.9068264350549264e-06, + "loss": 0.3414, + "step": 32388 + }, + { + "epoch": 0.6483797512699246, + "grad_norm": 1.183562994003296, + "learning_rate": 2.9065320311329603e-06, + "loss": 0.2868, + "step": 32389 + }, + { + "epoch": 0.648399769787053, + "grad_norm": 1.0275243520736694, + "learning_rate": 2.9062376360114823e-06, + "loss": 0.2858, + "step": 32390 + }, + { + "epoch": 0.6484197883041813, + "grad_norm": 1.2535289525985718, + "learning_rate": 2.9059432496917295e-06, + "loss": 0.324, + "step": 32391 + }, + { + "epoch": 0.6484398068213097, + "grad_norm": 1.0797550678253174, + "learning_rate": 2.905648872174942e-06, + "loss": 0.2726, + "step": 32392 + }, + { + "epoch": 0.648459825338438, + "grad_norm": 1.102298617362976, + "learning_rate": 2.905354503462354e-06, + "loss": 0.3168, + "step": 32393 + }, + { + "epoch": 0.6484798438555665, + "grad_norm": 1.065786361694336, + "learning_rate": 2.905060143555208e-06, + "loss": 0.2955, + "step": 32394 + }, + { + "epoch": 0.6484998623726947, + "grad_norm": 1.1077522039413452, + "learning_rate": 2.9047657924547374e-06, + "loss": 0.2743, + "step": 32395 + }, + { + "epoch": 0.648519880889823, + "grad_norm": 1.1459238529205322, + "learning_rate": 2.9044714501621783e-06, + "loss": 0.3044, + "step": 32396 + }, + { + "epoch": 0.6485398994069514, + "grad_norm": 1.0535019636154175, + "learning_rate": 2.9041771166787725e-06, + "loss": 0.3572, + "step": 32397 + }, + { + "epoch": 0.6485599179240797, + "grad_norm": 1.240107774734497, + "learning_rate": 2.9038827920057557e-06, + "loss": 0.3007, + "step": 32398 + }, + { + "epoch": 0.6485799364412081, + "grad_norm": 1.9302912950515747, + "learning_rate": 2.9035884761443634e-06, + "loss": 0.8332, + "step": 32399 + }, + { + "epoch": 0.6485999549583364, + "grad_norm": 1.0709632635116577, + "learning_rate": 2.9032941690958326e-06, + "loss": 0.335, + "step": 32400 + }, + { + "epoch": 0.6486199734754649, + "grad_norm": 1.8349974155426025, + "learning_rate": 2.902999870861404e-06, + "loss": 0.7499, + "step": 32401 + }, + { + "epoch": 0.6486399919925931, + "grad_norm": 1.2166117429733276, + "learning_rate": 2.902705581442312e-06, + "loss": 0.3237, + "step": 32402 + }, + { + "epoch": 0.6486600105097214, + "grad_norm": 1.129367709159851, + "learning_rate": 2.9024113008397958e-06, + "loss": 0.3135, + "step": 32403 + }, + { + "epoch": 0.6486800290268498, + "grad_norm": 1.1003007888793945, + "learning_rate": 2.902117029055088e-06, + "loss": 0.2857, + "step": 32404 + }, + { + "epoch": 0.6487000475439781, + "grad_norm": 1.0330228805541992, + "learning_rate": 2.9018227660894303e-06, + "loss": 0.2773, + "step": 32405 + }, + { + "epoch": 0.6487200660611065, + "grad_norm": 1.2057987451553345, + "learning_rate": 2.9015285119440575e-06, + "loss": 0.3321, + "step": 32406 + }, + { + "epoch": 0.6487400845782348, + "grad_norm": 0.990350604057312, + "learning_rate": 2.901234266620209e-06, + "loss": 0.2331, + "step": 32407 + }, + { + "epoch": 0.6487601030953632, + "grad_norm": 1.2189359664916992, + "learning_rate": 2.9009400301191193e-06, + "loss": 0.3275, + "step": 32408 + }, + { + "epoch": 0.6487801216124915, + "grad_norm": 1.0526453256607056, + "learning_rate": 2.9006458024420234e-06, + "loss": 0.2405, + "step": 32409 + }, + { + "epoch": 0.64880014012962, + "grad_norm": 1.2503857612609863, + "learning_rate": 2.9003515835901643e-06, + "loss": 0.323, + "step": 32410 + }, + { + "epoch": 0.6488201586467482, + "grad_norm": 1.0574398040771484, + "learning_rate": 2.9000573735647743e-06, + "loss": 0.3106, + "step": 32411 + }, + { + "epoch": 0.6488401771638765, + "grad_norm": 1.9987441301345825, + "learning_rate": 2.8997631723670918e-06, + "loss": 0.7469, + "step": 32412 + }, + { + "epoch": 0.648860195681005, + "grad_norm": 1.0906951427459717, + "learning_rate": 2.899468979998352e-06, + "loss": 0.307, + "step": 32413 + }, + { + "epoch": 0.6488802141981332, + "grad_norm": 1.0987091064453125, + "learning_rate": 2.8991747964597917e-06, + "loss": 0.3424, + "step": 32414 + }, + { + "epoch": 0.6489002327152616, + "grad_norm": 1.1956603527069092, + "learning_rate": 2.89888062175265e-06, + "loss": 0.3479, + "step": 32415 + }, + { + "epoch": 0.6489202512323899, + "grad_norm": 0.9795850515365601, + "learning_rate": 2.8985864558781616e-06, + "loss": 0.2395, + "step": 32416 + }, + { + "epoch": 0.6489402697495184, + "grad_norm": 1.1498475074768066, + "learning_rate": 2.8982922988375615e-06, + "loss": 0.3796, + "step": 32417 + }, + { + "epoch": 0.6489602882666466, + "grad_norm": 1.175500512123108, + "learning_rate": 2.897998150632091e-06, + "loss": 0.2872, + "step": 32418 + }, + { + "epoch": 0.6489803067837749, + "grad_norm": 2.0220041275024414, + "learning_rate": 2.8977040112629818e-06, + "loss": 0.7553, + "step": 32419 + }, + { + "epoch": 0.6490003253009033, + "grad_norm": 1.1004561185836792, + "learning_rate": 2.897409880731474e-06, + "loss": 0.3375, + "step": 32420 + }, + { + "epoch": 0.6490203438180316, + "grad_norm": 1.8043558597564697, + "learning_rate": 2.8971157590388033e-06, + "loss": 0.7597, + "step": 32421 + }, + { + "epoch": 0.64904036233516, + "grad_norm": 1.0080751180648804, + "learning_rate": 2.896821646186204e-06, + "loss": 0.2767, + "step": 32422 + }, + { + "epoch": 0.6490603808522883, + "grad_norm": 0.9423055648803711, + "learning_rate": 2.8965275421749135e-06, + "loss": 0.3015, + "step": 32423 + }, + { + "epoch": 0.6490803993694167, + "grad_norm": 1.1940550804138184, + "learning_rate": 2.89623344700617e-06, + "loss": 0.2726, + "step": 32424 + }, + { + "epoch": 0.649100417886545, + "grad_norm": 1.0666457414627075, + "learning_rate": 2.895939360681208e-06, + "loss": 0.3094, + "step": 32425 + }, + { + "epoch": 0.6491204364036735, + "grad_norm": 1.0883938074111938, + "learning_rate": 2.8956452832012642e-06, + "loss": 0.3312, + "step": 32426 + }, + { + "epoch": 0.6491404549208017, + "grad_norm": 1.1169068813323975, + "learning_rate": 2.895351214567573e-06, + "loss": 0.2947, + "step": 32427 + }, + { + "epoch": 0.64916047343793, + "grad_norm": 1.1429601907730103, + "learning_rate": 2.8950571547813746e-06, + "loss": 0.3267, + "step": 32428 + }, + { + "epoch": 0.6491804919550584, + "grad_norm": 0.9826423525810242, + "learning_rate": 2.8947631038439026e-06, + "loss": 0.28, + "step": 32429 + }, + { + "epoch": 0.6492005104721867, + "grad_norm": 1.0768028497695923, + "learning_rate": 2.8944690617563914e-06, + "loss": 0.2897, + "step": 32430 + }, + { + "epoch": 0.6492205289893151, + "grad_norm": 1.8038265705108643, + "learning_rate": 2.894175028520081e-06, + "loss": 0.8205, + "step": 32431 + }, + { + "epoch": 0.6492405475064434, + "grad_norm": 1.263627290725708, + "learning_rate": 2.8938810041362042e-06, + "loss": 0.2973, + "step": 32432 + }, + { + "epoch": 0.6492605660235719, + "grad_norm": 1.0474529266357422, + "learning_rate": 2.893586988606e-06, + "loss": 0.3139, + "step": 32433 + }, + { + "epoch": 0.6492805845407001, + "grad_norm": 1.0844513177871704, + "learning_rate": 2.8932929819307026e-06, + "loss": 0.2905, + "step": 32434 + }, + { + "epoch": 0.6493006030578284, + "grad_norm": 1.0800367593765259, + "learning_rate": 2.8929989841115485e-06, + "loss": 0.3131, + "step": 32435 + }, + { + "epoch": 0.6493206215749568, + "grad_norm": 1.2480406761169434, + "learning_rate": 2.892704995149771e-06, + "loss": 0.3259, + "step": 32436 + }, + { + "epoch": 0.6493406400920851, + "grad_norm": 1.1555384397506714, + "learning_rate": 2.89241101504661e-06, + "loss": 0.3229, + "step": 32437 + }, + { + "epoch": 0.6493606586092135, + "grad_norm": 1.0814176797866821, + "learning_rate": 2.8921170438033e-06, + "loss": 0.3139, + "step": 32438 + }, + { + "epoch": 0.6493806771263418, + "grad_norm": 1.1043168306350708, + "learning_rate": 2.8918230814210768e-06, + "loss": 0.3154, + "step": 32439 + }, + { + "epoch": 0.6494006956434702, + "grad_norm": 1.2015758752822876, + "learning_rate": 2.8915291279011724e-06, + "loss": 0.2842, + "step": 32440 + }, + { + "epoch": 0.6494207141605985, + "grad_norm": 1.2510617971420288, + "learning_rate": 2.8912351832448287e-06, + "loss": 0.3073, + "step": 32441 + }, + { + "epoch": 0.649440732677727, + "grad_norm": 1.8205952644348145, + "learning_rate": 2.890941247453278e-06, + "loss": 0.7374, + "step": 32442 + }, + { + "epoch": 0.6494607511948552, + "grad_norm": 1.1919981241226196, + "learning_rate": 2.8906473205277553e-06, + "loss": 0.297, + "step": 32443 + }, + { + "epoch": 0.6494807697119835, + "grad_norm": 1.2067182064056396, + "learning_rate": 2.8903534024694986e-06, + "loss": 0.2953, + "step": 32444 + }, + { + "epoch": 0.649500788229112, + "grad_norm": 1.181902289390564, + "learning_rate": 2.89005949327974e-06, + "loss": 0.3278, + "step": 32445 + }, + { + "epoch": 0.6495208067462402, + "grad_norm": 0.9808923006057739, + "learning_rate": 2.8897655929597196e-06, + "loss": 0.2551, + "step": 32446 + }, + { + "epoch": 0.6495408252633686, + "grad_norm": 0.9635278582572937, + "learning_rate": 2.8894717015106704e-06, + "loss": 0.2906, + "step": 32447 + }, + { + "epoch": 0.6495608437804969, + "grad_norm": 1.136193871498108, + "learning_rate": 2.889177818933828e-06, + "loss": 0.3498, + "step": 32448 + }, + { + "epoch": 0.6495808622976253, + "grad_norm": 1.2152339220046997, + "learning_rate": 2.888883945230426e-06, + "loss": 0.3088, + "step": 32449 + }, + { + "epoch": 0.6496008808147536, + "grad_norm": 1.0380858182907104, + "learning_rate": 2.888590080401703e-06, + "loss": 0.2647, + "step": 32450 + }, + { + "epoch": 0.6496208993318819, + "grad_norm": 1.0426812171936035, + "learning_rate": 2.8882962244488934e-06, + "loss": 0.3141, + "step": 32451 + }, + { + "epoch": 0.6496409178490103, + "grad_norm": 1.1389964818954468, + "learning_rate": 2.8880023773732325e-06, + "loss": 0.325, + "step": 32452 + }, + { + "epoch": 0.6496609363661386, + "grad_norm": 1.0727174282073975, + "learning_rate": 2.8877085391759542e-06, + "loss": 0.2799, + "step": 32453 + }, + { + "epoch": 0.649680954883267, + "grad_norm": 1.061010718345642, + "learning_rate": 2.887414709858293e-06, + "loss": 0.3408, + "step": 32454 + }, + { + "epoch": 0.6497009734003953, + "grad_norm": 1.1312973499298096, + "learning_rate": 2.8871208894214875e-06, + "loss": 0.2894, + "step": 32455 + }, + { + "epoch": 0.6497209919175237, + "grad_norm": 1.2131109237670898, + "learning_rate": 2.8868270778667696e-06, + "loss": 0.3044, + "step": 32456 + }, + { + "epoch": 0.649741010434652, + "grad_norm": 1.0196723937988281, + "learning_rate": 2.8865332751953774e-06, + "loss": 0.2614, + "step": 32457 + }, + { + "epoch": 0.6497610289517805, + "grad_norm": 0.9938032031059265, + "learning_rate": 2.8862394814085426e-06, + "loss": 0.299, + "step": 32458 + }, + { + "epoch": 0.6497810474689087, + "grad_norm": 1.2239089012145996, + "learning_rate": 2.885945696507504e-06, + "loss": 0.2954, + "step": 32459 + }, + { + "epoch": 0.649801065986037, + "grad_norm": 1.083748459815979, + "learning_rate": 2.885651920493495e-06, + "loss": 0.2851, + "step": 32460 + }, + { + "epoch": 0.6498210845031654, + "grad_norm": 1.1713957786560059, + "learning_rate": 2.8853581533677504e-06, + "loss": 0.3487, + "step": 32461 + }, + { + "epoch": 0.6498411030202937, + "grad_norm": 1.1977698802947998, + "learning_rate": 2.8850643951315025e-06, + "loss": 0.2747, + "step": 32462 + }, + { + "epoch": 0.6498611215374221, + "grad_norm": 1.150086760520935, + "learning_rate": 2.8847706457859915e-06, + "loss": 0.2863, + "step": 32463 + }, + { + "epoch": 0.6498811400545504, + "grad_norm": 1.256657361984253, + "learning_rate": 2.8844769053324495e-06, + "loss": 0.2702, + "step": 32464 + }, + { + "epoch": 0.6499011585716788, + "grad_norm": 1.1559641361236572, + "learning_rate": 2.8841831737721105e-06, + "loss": 0.3196, + "step": 32465 + }, + { + "epoch": 0.6499211770888071, + "grad_norm": 1.2468229532241821, + "learning_rate": 2.883889451106211e-06, + "loss": 0.3171, + "step": 32466 + }, + { + "epoch": 0.6499411956059354, + "grad_norm": 1.2941044569015503, + "learning_rate": 2.8835957373359823e-06, + "loss": 0.3344, + "step": 32467 + }, + { + "epoch": 0.6499612141230638, + "grad_norm": 1.1282320022583008, + "learning_rate": 2.8833020324626635e-06, + "loss": 0.2956, + "step": 32468 + }, + { + "epoch": 0.6499812326401921, + "grad_norm": 1.1596653461456299, + "learning_rate": 2.883008336487486e-06, + "loss": 0.3364, + "step": 32469 + }, + { + "epoch": 0.6500012511573205, + "grad_norm": 1.0650427341461182, + "learning_rate": 2.8827146494116877e-06, + "loss": 0.273, + "step": 32470 + }, + { + "epoch": 0.6500212696744488, + "grad_norm": 1.2307440042495728, + "learning_rate": 2.8824209712364993e-06, + "loss": 0.3342, + "step": 32471 + }, + { + "epoch": 0.6500412881915772, + "grad_norm": 1.021274447441101, + "learning_rate": 2.882127301963159e-06, + "loss": 0.302, + "step": 32472 + }, + { + "epoch": 0.6500613067087055, + "grad_norm": 1.1714155673980713, + "learning_rate": 2.8818336415929007e-06, + "loss": 0.296, + "step": 32473 + }, + { + "epoch": 0.650081325225834, + "grad_norm": 0.9924495816230774, + "learning_rate": 2.881539990126957e-06, + "loss": 0.2852, + "step": 32474 + }, + { + "epoch": 0.6501013437429622, + "grad_norm": 1.1067591905593872, + "learning_rate": 2.8812463475665644e-06, + "loss": 0.3302, + "step": 32475 + }, + { + "epoch": 0.6501213622600905, + "grad_norm": 1.1370737552642822, + "learning_rate": 2.880952713912953e-06, + "loss": 0.2949, + "step": 32476 + }, + { + "epoch": 0.650141380777219, + "grad_norm": 1.1446759700775146, + "learning_rate": 2.8806590891673626e-06, + "loss": 0.3191, + "step": 32477 + }, + { + "epoch": 0.6501613992943472, + "grad_norm": 1.170220136642456, + "learning_rate": 2.880365473331026e-06, + "loss": 0.2578, + "step": 32478 + }, + { + "epoch": 0.6501814178114756, + "grad_norm": 1.0426305532455444, + "learning_rate": 2.8800718664051765e-06, + "loss": 0.3031, + "step": 32479 + }, + { + "epoch": 0.6502014363286039, + "grad_norm": 1.12764573097229, + "learning_rate": 2.8797782683910458e-06, + "loss": 0.286, + "step": 32480 + }, + { + "epoch": 0.6502214548457323, + "grad_norm": 1.0862082242965698, + "learning_rate": 2.879484679289874e-06, + "loss": 0.2738, + "step": 32481 + }, + { + "epoch": 0.6502414733628606, + "grad_norm": 1.1370840072631836, + "learning_rate": 2.8791910991028903e-06, + "loss": 0.2922, + "step": 32482 + }, + { + "epoch": 0.6502614918799889, + "grad_norm": 1.13229501247406, + "learning_rate": 2.8788975278313325e-06, + "loss": 0.2669, + "step": 32483 + }, + { + "epoch": 0.6502815103971173, + "grad_norm": 1.2559982538223267, + "learning_rate": 2.8786039654764308e-06, + "loss": 0.3096, + "step": 32484 + }, + { + "epoch": 0.6503015289142456, + "grad_norm": 1.2093725204467773, + "learning_rate": 2.8783104120394236e-06, + "loss": 0.3205, + "step": 32485 + }, + { + "epoch": 0.650321547431374, + "grad_norm": 1.2248375415802002, + "learning_rate": 2.8780168675215433e-06, + "loss": 0.2834, + "step": 32486 + }, + { + "epoch": 0.6503415659485023, + "grad_norm": 1.054236888885498, + "learning_rate": 2.8777233319240228e-06, + "loss": 0.297, + "step": 32487 + }, + { + "epoch": 0.6503615844656307, + "grad_norm": 1.8042030334472656, + "learning_rate": 2.877429805248097e-06, + "loss": 0.8479, + "step": 32488 + }, + { + "epoch": 0.650381602982759, + "grad_norm": 1.1330993175506592, + "learning_rate": 2.877136287494997e-06, + "loss": 0.3117, + "step": 32489 + }, + { + "epoch": 0.6504016214998875, + "grad_norm": 1.058797836303711, + "learning_rate": 2.876842778665962e-06, + "loss": 0.2848, + "step": 32490 + }, + { + "epoch": 0.6504216400170157, + "grad_norm": 1.1593812704086304, + "learning_rate": 2.876549278762223e-06, + "loss": 0.3479, + "step": 32491 + }, + { + "epoch": 0.650441658534144, + "grad_norm": 1.0974277257919312, + "learning_rate": 2.8762557877850133e-06, + "loss": 0.3036, + "step": 32492 + }, + { + "epoch": 0.6504616770512724, + "grad_norm": 1.2190769910812378, + "learning_rate": 2.8759623057355656e-06, + "loss": 0.2968, + "step": 32493 + }, + { + "epoch": 0.6504816955684007, + "grad_norm": 1.0194814205169678, + "learning_rate": 2.8756688326151173e-06, + "loss": 0.2857, + "step": 32494 + }, + { + "epoch": 0.6505017140855291, + "grad_norm": 0.9800155758857727, + "learning_rate": 2.8753753684248976e-06, + "loss": 0.2817, + "step": 32495 + }, + { + "epoch": 0.6505217326026574, + "grad_norm": 1.0680673122406006, + "learning_rate": 2.8750819131661457e-06, + "loss": 0.3462, + "step": 32496 + }, + { + "epoch": 0.6505417511197858, + "grad_norm": 1.1103826761245728, + "learning_rate": 2.874788466840091e-06, + "loss": 0.2553, + "step": 32497 + }, + { + "epoch": 0.6505617696369141, + "grad_norm": 1.1820319890975952, + "learning_rate": 2.8744950294479668e-06, + "loss": 0.3256, + "step": 32498 + }, + { + "epoch": 0.6505817881540424, + "grad_norm": 1.0441595315933228, + "learning_rate": 2.87420160099101e-06, + "loss": 0.3097, + "step": 32499 + }, + { + "epoch": 0.6506018066711708, + "grad_norm": 1.221224069595337, + "learning_rate": 2.873908181470452e-06, + "loss": 0.3011, + "step": 32500 + }, + { + "epoch": 0.6506218251882991, + "grad_norm": 1.9292998313903809, + "learning_rate": 2.8736147708875265e-06, + "loss": 0.748, + "step": 32501 + }, + { + "epoch": 0.6506418437054275, + "grad_norm": 1.2547763586044312, + "learning_rate": 2.873321369243465e-06, + "loss": 0.2807, + "step": 32502 + }, + { + "epoch": 0.6506618622225558, + "grad_norm": 1.0861854553222656, + "learning_rate": 2.8730279765395055e-06, + "loss": 0.2761, + "step": 32503 + }, + { + "epoch": 0.6506818807396842, + "grad_norm": 0.9911026954650879, + "learning_rate": 2.872734592776877e-06, + "loss": 0.2592, + "step": 32504 + }, + { + "epoch": 0.6507018992568125, + "grad_norm": 1.1452484130859375, + "learning_rate": 2.872441217956816e-06, + "loss": 0.332, + "step": 32505 + }, + { + "epoch": 0.650721917773941, + "grad_norm": 1.0483468770980835, + "learning_rate": 2.8721478520805536e-06, + "loss": 0.3119, + "step": 32506 + }, + { + "epoch": 0.6507419362910692, + "grad_norm": 1.2234009504318237, + "learning_rate": 2.871854495149322e-06, + "loss": 0.3055, + "step": 32507 + }, + { + "epoch": 0.6507619548081975, + "grad_norm": 1.1495323181152344, + "learning_rate": 2.8715611471643557e-06, + "loss": 0.3049, + "step": 32508 + }, + { + "epoch": 0.650781973325326, + "grad_norm": 1.1115388870239258, + "learning_rate": 2.8712678081268907e-06, + "loss": 0.3198, + "step": 32509 + }, + { + "epoch": 0.6508019918424542, + "grad_norm": 1.0673837661743164, + "learning_rate": 2.8709744780381576e-06, + "loss": 0.3125, + "step": 32510 + }, + { + "epoch": 0.6508220103595826, + "grad_norm": 1.0218069553375244, + "learning_rate": 2.8706811568993874e-06, + "loss": 0.2883, + "step": 32511 + }, + { + "epoch": 0.6508420288767109, + "grad_norm": 1.0252013206481934, + "learning_rate": 2.8703878447118173e-06, + "loss": 0.2966, + "step": 32512 + }, + { + "epoch": 0.6508620473938393, + "grad_norm": 1.1784312725067139, + "learning_rate": 2.870094541476679e-06, + "loss": 0.325, + "step": 32513 + }, + { + "epoch": 0.6508820659109676, + "grad_norm": 1.8740043640136719, + "learning_rate": 2.8698012471952043e-06, + "loss": 0.7979, + "step": 32514 + }, + { + "epoch": 0.6509020844280959, + "grad_norm": 1.240180492401123, + "learning_rate": 2.869507961868625e-06, + "loss": 0.3392, + "step": 32515 + }, + { + "epoch": 0.6509221029452243, + "grad_norm": 1.0973100662231445, + "learning_rate": 2.869214685498178e-06, + "loss": 0.3108, + "step": 32516 + }, + { + "epoch": 0.6509421214623526, + "grad_norm": 1.0887309312820435, + "learning_rate": 2.868921418085093e-06, + "loss": 0.2641, + "step": 32517 + }, + { + "epoch": 0.650962139979481, + "grad_norm": 1.0968772172927856, + "learning_rate": 2.8686281596306047e-06, + "loss": 0.2893, + "step": 32518 + }, + { + "epoch": 0.6509821584966093, + "grad_norm": 0.9820753931999207, + "learning_rate": 2.868334910135945e-06, + "loss": 0.2608, + "step": 32519 + }, + { + "epoch": 0.6510021770137377, + "grad_norm": 1.9968290328979492, + "learning_rate": 2.868041669602344e-06, + "loss": 0.7338, + "step": 32520 + }, + { + "epoch": 0.651022195530866, + "grad_norm": 1.2099668979644775, + "learning_rate": 2.8677484380310372e-06, + "loss": 0.2942, + "step": 32521 + }, + { + "epoch": 0.6510422140479944, + "grad_norm": 1.0642900466918945, + "learning_rate": 2.8674552154232593e-06, + "loss": 0.3019, + "step": 32522 + }, + { + "epoch": 0.6510622325651227, + "grad_norm": 1.0906816720962524, + "learning_rate": 2.8671620017802405e-06, + "loss": 0.3047, + "step": 32523 + }, + { + "epoch": 0.651082251082251, + "grad_norm": 1.9439489841461182, + "learning_rate": 2.8668687971032116e-06, + "loss": 0.7794, + "step": 32524 + }, + { + "epoch": 0.6511022695993794, + "grad_norm": 1.0490593910217285, + "learning_rate": 2.8665756013934097e-06, + "loss": 0.3127, + "step": 32525 + }, + { + "epoch": 0.6511222881165077, + "grad_norm": 1.0708898305892944, + "learning_rate": 2.866282414652064e-06, + "loss": 0.305, + "step": 32526 + }, + { + "epoch": 0.6511423066336361, + "grad_norm": 1.0026719570159912, + "learning_rate": 2.8659892368804087e-06, + "loss": 0.2925, + "step": 32527 + }, + { + "epoch": 0.6511623251507644, + "grad_norm": 1.0722010135650635, + "learning_rate": 2.8656960680796754e-06, + "loss": 0.2685, + "step": 32528 + }, + { + "epoch": 0.6511823436678928, + "grad_norm": 1.2080367803573608, + "learning_rate": 2.8654029082510937e-06, + "loss": 0.3417, + "step": 32529 + }, + { + "epoch": 0.6512023621850211, + "grad_norm": 1.1397393941879272, + "learning_rate": 2.865109757395902e-06, + "loss": 0.279, + "step": 32530 + }, + { + "epoch": 0.6512223807021494, + "grad_norm": 1.0321730375289917, + "learning_rate": 2.864816615515328e-06, + "loss": 0.311, + "step": 32531 + }, + { + "epoch": 0.6512423992192778, + "grad_norm": 1.8287893533706665, + "learning_rate": 2.8645234826106063e-06, + "loss": 0.7111, + "step": 32532 + }, + { + "epoch": 0.6512624177364061, + "grad_norm": 1.1112803220748901, + "learning_rate": 2.8642303586829667e-06, + "loss": 0.277, + "step": 32533 + }, + { + "epoch": 0.6512824362535345, + "grad_norm": 1.0757936239242554, + "learning_rate": 2.8639372437336422e-06, + "loss": 0.2834, + "step": 32534 + }, + { + "epoch": 0.6513024547706628, + "grad_norm": 1.927058458328247, + "learning_rate": 2.8636441377638678e-06, + "loss": 0.8014, + "step": 32535 + }, + { + "epoch": 0.6513224732877912, + "grad_norm": 1.0800678730010986, + "learning_rate": 2.8633510407748745e-06, + "loss": 0.267, + "step": 32536 + }, + { + "epoch": 0.6513424918049195, + "grad_norm": 1.3087866306304932, + "learning_rate": 2.8630579527678935e-06, + "loss": 0.3371, + "step": 32537 + }, + { + "epoch": 0.651362510322048, + "grad_norm": 1.1306767463684082, + "learning_rate": 2.8627648737441543e-06, + "loss": 0.3282, + "step": 32538 + }, + { + "epoch": 0.6513825288391762, + "grad_norm": 1.1985019445419312, + "learning_rate": 2.8624718037048945e-06, + "loss": 0.3082, + "step": 32539 + }, + { + "epoch": 0.6514025473563045, + "grad_norm": 1.1741888523101807, + "learning_rate": 2.8621787426513426e-06, + "loss": 0.2957, + "step": 32540 + }, + { + "epoch": 0.651422565873433, + "grad_norm": 1.02582585811615, + "learning_rate": 2.8618856905847315e-06, + "loss": 0.3031, + "step": 32541 + }, + { + "epoch": 0.6514425843905612, + "grad_norm": 1.0819411277770996, + "learning_rate": 2.8615926475062906e-06, + "loss": 0.306, + "step": 32542 + }, + { + "epoch": 0.6514626029076896, + "grad_norm": 1.1794646978378296, + "learning_rate": 2.861299613417256e-06, + "loss": 0.3197, + "step": 32543 + }, + { + "epoch": 0.6514826214248179, + "grad_norm": 1.0538209676742554, + "learning_rate": 2.8610065883188575e-06, + "loss": 0.319, + "step": 32544 + }, + { + "epoch": 0.6515026399419463, + "grad_norm": 1.1821357011795044, + "learning_rate": 2.8607135722123275e-06, + "loss": 0.3105, + "step": 32545 + }, + { + "epoch": 0.6515226584590746, + "grad_norm": 2.1982414722442627, + "learning_rate": 2.8604205650988946e-06, + "loss": 0.7847, + "step": 32546 + }, + { + "epoch": 0.6515426769762029, + "grad_norm": 1.2562029361724854, + "learning_rate": 2.8601275669797933e-06, + "loss": 0.2993, + "step": 32547 + }, + { + "epoch": 0.6515626954933313, + "grad_norm": 1.103653907775879, + "learning_rate": 2.8598345778562577e-06, + "loss": 0.3058, + "step": 32548 + }, + { + "epoch": 0.6515827140104596, + "grad_norm": 1.1914633512496948, + "learning_rate": 2.859541597729517e-06, + "loss": 0.2594, + "step": 32549 + }, + { + "epoch": 0.651602732527588, + "grad_norm": 1.0570429563522339, + "learning_rate": 2.8592486266008022e-06, + "loss": 0.278, + "step": 32550 + }, + { + "epoch": 0.6516227510447163, + "grad_norm": 1.0879337787628174, + "learning_rate": 2.858955664471344e-06, + "loss": 0.3194, + "step": 32551 + }, + { + "epoch": 0.6516427695618447, + "grad_norm": 1.1145813465118408, + "learning_rate": 2.858662711342377e-06, + "loss": 0.3062, + "step": 32552 + }, + { + "epoch": 0.651662788078973, + "grad_norm": 1.2814770936965942, + "learning_rate": 2.8583697672151317e-06, + "loss": 0.2994, + "step": 32553 + }, + { + "epoch": 0.6516828065961013, + "grad_norm": 1.0859248638153076, + "learning_rate": 2.858076832090839e-06, + "loss": 0.2912, + "step": 32554 + }, + { + "epoch": 0.6517028251132297, + "grad_norm": 1.2187044620513916, + "learning_rate": 2.857783905970728e-06, + "loss": 0.3676, + "step": 32555 + }, + { + "epoch": 0.651722843630358, + "grad_norm": 1.9697165489196777, + "learning_rate": 2.8574909888560345e-06, + "loss": 0.7212, + "step": 32556 + }, + { + "epoch": 0.6517428621474864, + "grad_norm": 1.0530058145523071, + "learning_rate": 2.857198080747987e-06, + "loss": 0.2953, + "step": 32557 + }, + { + "epoch": 0.6517628806646147, + "grad_norm": 1.1615660190582275, + "learning_rate": 2.856905181647819e-06, + "loss": 0.3058, + "step": 32558 + }, + { + "epoch": 0.6517828991817431, + "grad_norm": 1.1116647720336914, + "learning_rate": 2.8566122915567574e-06, + "loss": 0.2981, + "step": 32559 + }, + { + "epoch": 0.6518029176988714, + "grad_norm": 1.0300847291946411, + "learning_rate": 2.856319410476037e-06, + "loss": 0.3133, + "step": 32560 + }, + { + "epoch": 0.6518229362159998, + "grad_norm": 1.215726375579834, + "learning_rate": 2.8560265384068907e-06, + "loss": 0.2516, + "step": 32561 + }, + { + "epoch": 0.6518429547331281, + "grad_norm": 1.2002928256988525, + "learning_rate": 2.855733675350547e-06, + "loss": 0.3178, + "step": 32562 + }, + { + "epoch": 0.6518629732502564, + "grad_norm": 1.0672482252120972, + "learning_rate": 2.8554408213082374e-06, + "loss": 0.3164, + "step": 32563 + }, + { + "epoch": 0.6518829917673848, + "grad_norm": 1.0826371908187866, + "learning_rate": 2.855147976281191e-06, + "loss": 0.3107, + "step": 32564 + }, + { + "epoch": 0.6519030102845131, + "grad_norm": 0.9948444366455078, + "learning_rate": 2.854855140270644e-06, + "loss": 0.308, + "step": 32565 + }, + { + "epoch": 0.6519230288016415, + "grad_norm": 1.1915076971054077, + "learning_rate": 2.854562313277823e-06, + "loss": 0.3235, + "step": 32566 + }, + { + "epoch": 0.6519430473187698, + "grad_norm": 1.2203871011734009, + "learning_rate": 2.8542694953039607e-06, + "loss": 0.2964, + "step": 32567 + }, + { + "epoch": 0.6519630658358982, + "grad_norm": 1.0883580446243286, + "learning_rate": 2.8539766863502884e-06, + "loss": 0.2841, + "step": 32568 + }, + { + "epoch": 0.6519830843530265, + "grad_norm": 1.0455960035324097, + "learning_rate": 2.8536838864180333e-06, + "loss": 0.2771, + "step": 32569 + }, + { + "epoch": 0.6520031028701548, + "grad_norm": 1.2514417171478271, + "learning_rate": 2.853391095508432e-06, + "loss": 0.3271, + "step": 32570 + }, + { + "epoch": 0.6520231213872832, + "grad_norm": 2.0158979892730713, + "learning_rate": 2.853098313622712e-06, + "loss": 0.7894, + "step": 32571 + }, + { + "epoch": 0.6520431399044115, + "grad_norm": 1.1388877630233765, + "learning_rate": 2.852805540762103e-06, + "loss": 0.3253, + "step": 32572 + }, + { + "epoch": 0.65206315842154, + "grad_norm": 1.0259618759155273, + "learning_rate": 2.8525127769278373e-06, + "loss": 0.3221, + "step": 32573 + }, + { + "epoch": 0.6520831769386682, + "grad_norm": 1.1626782417297363, + "learning_rate": 2.852220022121148e-06, + "loss": 0.2932, + "step": 32574 + }, + { + "epoch": 0.6521031954557966, + "grad_norm": 1.0146538019180298, + "learning_rate": 2.851927276343263e-06, + "loss": 0.2554, + "step": 32575 + }, + { + "epoch": 0.6521232139729249, + "grad_norm": 1.0518461465835571, + "learning_rate": 2.8516345395954137e-06, + "loss": 0.3071, + "step": 32576 + }, + { + "epoch": 0.6521432324900533, + "grad_norm": 1.0685731172561646, + "learning_rate": 2.8513418118788287e-06, + "loss": 0.2243, + "step": 32577 + }, + { + "epoch": 0.6521632510071816, + "grad_norm": 1.1464418172836304, + "learning_rate": 2.851049093194742e-06, + "loss": 0.3546, + "step": 32578 + }, + { + "epoch": 0.6521832695243099, + "grad_norm": 1.0440384149551392, + "learning_rate": 2.850756383544383e-06, + "loss": 0.2822, + "step": 32579 + }, + { + "epoch": 0.6522032880414383, + "grad_norm": 1.2006080150604248, + "learning_rate": 2.850463682928981e-06, + "loss": 0.3103, + "step": 32580 + }, + { + "epoch": 0.6522233065585666, + "grad_norm": 1.0008913278579712, + "learning_rate": 2.8501709913497676e-06, + "loss": 0.3117, + "step": 32581 + }, + { + "epoch": 0.652243325075695, + "grad_norm": 1.1821410655975342, + "learning_rate": 2.849878308807971e-06, + "loss": 0.2788, + "step": 32582 + }, + { + "epoch": 0.6522633435928233, + "grad_norm": 1.0055667161941528, + "learning_rate": 2.849585635304824e-06, + "loss": 0.3097, + "step": 32583 + }, + { + "epoch": 0.6522833621099517, + "grad_norm": 1.1609832048416138, + "learning_rate": 2.8492929708415574e-06, + "loss": 0.2953, + "step": 32584 + }, + { + "epoch": 0.65230338062708, + "grad_norm": 1.0479036569595337, + "learning_rate": 2.8490003154193975e-06, + "loss": 0.2964, + "step": 32585 + }, + { + "epoch": 0.6523233991442083, + "grad_norm": 1.0130400657653809, + "learning_rate": 2.8487076690395777e-06, + "loss": 0.3148, + "step": 32586 + }, + { + "epoch": 0.6523434176613367, + "grad_norm": 1.9686943292617798, + "learning_rate": 2.8484150317033297e-06, + "loss": 0.7737, + "step": 32587 + }, + { + "epoch": 0.652363436178465, + "grad_norm": 1.1193715333938599, + "learning_rate": 2.8481224034118824e-06, + "loss": 0.2896, + "step": 32588 + }, + { + "epoch": 0.6523834546955934, + "grad_norm": 1.2407721281051636, + "learning_rate": 2.847829784166465e-06, + "loss": 0.2806, + "step": 32589 + }, + { + "epoch": 0.6524034732127217, + "grad_norm": 1.001875638961792, + "learning_rate": 2.8475371739683085e-06, + "loss": 0.2845, + "step": 32590 + }, + { + "epoch": 0.6524234917298501, + "grad_norm": 1.781325101852417, + "learning_rate": 2.84724457281864e-06, + "loss": 0.7557, + "step": 32591 + }, + { + "epoch": 0.6524435102469784, + "grad_norm": 1.0192290544509888, + "learning_rate": 2.8469519807186942e-06, + "loss": 0.2506, + "step": 32592 + }, + { + "epoch": 0.6524635287641068, + "grad_norm": 1.0034549236297607, + "learning_rate": 2.8466593976696987e-06, + "loss": 0.2888, + "step": 32593 + }, + { + "epoch": 0.6524835472812351, + "grad_norm": 1.2835009098052979, + "learning_rate": 2.8463668236728843e-06, + "loss": 0.2678, + "step": 32594 + }, + { + "epoch": 0.6525035657983634, + "grad_norm": 1.1539043188095093, + "learning_rate": 2.8460742587294776e-06, + "loss": 0.3001, + "step": 32595 + }, + { + "epoch": 0.6525235843154918, + "grad_norm": 1.0656367540359497, + "learning_rate": 2.8457817028407135e-06, + "loss": 0.2621, + "step": 32596 + }, + { + "epoch": 0.6525436028326201, + "grad_norm": 1.0195140838623047, + "learning_rate": 2.84548915600782e-06, + "loss": 0.2919, + "step": 32597 + }, + { + "epoch": 0.6525636213497485, + "grad_norm": 1.0583631992340088, + "learning_rate": 2.8451966182320236e-06, + "loss": 0.2526, + "step": 32598 + }, + { + "epoch": 0.6525836398668768, + "grad_norm": 1.2029846906661987, + "learning_rate": 2.8449040895145598e-06, + "loss": 0.3169, + "step": 32599 + }, + { + "epoch": 0.6526036583840052, + "grad_norm": 1.1398992538452148, + "learning_rate": 2.8446115698566525e-06, + "loss": 0.3293, + "step": 32600 + }, + { + "epoch": 0.6526236769011335, + "grad_norm": 1.145663857460022, + "learning_rate": 2.844319059259537e-06, + "loss": 0.3224, + "step": 32601 + }, + { + "epoch": 0.6526436954182618, + "grad_norm": 1.071845531463623, + "learning_rate": 2.8440265577244396e-06, + "loss": 0.292, + "step": 32602 + }, + { + "epoch": 0.6526637139353902, + "grad_norm": 1.3539584875106812, + "learning_rate": 2.8437340652525914e-06, + "loss": 0.341, + "step": 32603 + }, + { + "epoch": 0.6526837324525185, + "grad_norm": 1.1189388036727905, + "learning_rate": 2.8434415818452187e-06, + "loss": 0.3066, + "step": 32604 + }, + { + "epoch": 0.652703750969647, + "grad_norm": 1.1137430667877197, + "learning_rate": 2.8431491075035555e-06, + "loss": 0.3277, + "step": 32605 + }, + { + "epoch": 0.6527237694867752, + "grad_norm": 1.087188720703125, + "learning_rate": 2.8428566422288293e-06, + "loss": 0.2975, + "step": 32606 + }, + { + "epoch": 0.6527437880039036, + "grad_norm": 1.0879813432693481, + "learning_rate": 2.8425641860222698e-06, + "loss": 0.2918, + "step": 32607 + }, + { + "epoch": 0.6527638065210319, + "grad_norm": 0.9809579253196716, + "learning_rate": 2.842271738885104e-06, + "loss": 0.2926, + "step": 32608 + }, + { + "epoch": 0.6527838250381603, + "grad_norm": 1.1883845329284668, + "learning_rate": 2.841979300818565e-06, + "loss": 0.2941, + "step": 32609 + }, + { + "epoch": 0.6528038435552886, + "grad_norm": 1.0487687587738037, + "learning_rate": 2.841686871823881e-06, + "loss": 0.2958, + "step": 32610 + }, + { + "epoch": 0.6528238620724169, + "grad_norm": 1.089429497718811, + "learning_rate": 2.841394451902279e-06, + "loss": 0.3221, + "step": 32611 + }, + { + "epoch": 0.6528438805895453, + "grad_norm": 1.0847421884536743, + "learning_rate": 2.8411020410549922e-06, + "loss": 0.3225, + "step": 32612 + }, + { + "epoch": 0.6528638991066736, + "grad_norm": 1.1634032726287842, + "learning_rate": 2.8408096392832456e-06, + "loss": 0.3438, + "step": 32613 + }, + { + "epoch": 0.652883917623802, + "grad_norm": 1.990279197692871, + "learning_rate": 2.8405172465882723e-06, + "loss": 0.792, + "step": 32614 + }, + { + "epoch": 0.6529039361409303, + "grad_norm": 1.1083173751831055, + "learning_rate": 2.840224862971301e-06, + "loss": 0.3035, + "step": 32615 + }, + { + "epoch": 0.6529239546580587, + "grad_norm": 1.4658418893814087, + "learning_rate": 2.8399324884335587e-06, + "loss": 0.3157, + "step": 32616 + }, + { + "epoch": 0.652943973175187, + "grad_norm": 1.101886510848999, + "learning_rate": 2.839640122976273e-06, + "loss": 0.2857, + "step": 32617 + }, + { + "epoch": 0.6529639916923153, + "grad_norm": 1.0683165788650513, + "learning_rate": 2.839347766600678e-06, + "loss": 0.305, + "step": 32618 + }, + { + "epoch": 0.6529840102094437, + "grad_norm": 1.071777105331421, + "learning_rate": 2.839055419308e-06, + "loss": 0.3038, + "step": 32619 + }, + { + "epoch": 0.653004028726572, + "grad_norm": 1.1467317342758179, + "learning_rate": 2.8387630810994676e-06, + "loss": 0.3238, + "step": 32620 + }, + { + "epoch": 0.6530240472437004, + "grad_norm": 1.8705878257751465, + "learning_rate": 2.8384707519763104e-06, + "loss": 0.7226, + "step": 32621 + }, + { + "epoch": 0.6530440657608287, + "grad_norm": 1.1128484010696411, + "learning_rate": 2.8381784319397547e-06, + "loss": 0.3237, + "step": 32622 + }, + { + "epoch": 0.6530640842779571, + "grad_norm": 1.0635757446289062, + "learning_rate": 2.837886120991034e-06, + "loss": 0.3545, + "step": 32623 + }, + { + "epoch": 0.6530841027950854, + "grad_norm": 1.1820135116577148, + "learning_rate": 2.837593819131372e-06, + "loss": 0.3217, + "step": 32624 + }, + { + "epoch": 0.6531041213122138, + "grad_norm": 1.1795926094055176, + "learning_rate": 2.8373015263620034e-06, + "loss": 0.2692, + "step": 32625 + }, + { + "epoch": 0.6531241398293421, + "grad_norm": 1.113031029701233, + "learning_rate": 2.837009242684151e-06, + "loss": 0.2785, + "step": 32626 + }, + { + "epoch": 0.6531441583464704, + "grad_norm": 1.1729472875595093, + "learning_rate": 2.8367169680990487e-06, + "loss": 0.3165, + "step": 32627 + }, + { + "epoch": 0.6531641768635988, + "grad_norm": 1.0529299974441528, + "learning_rate": 2.8364247026079224e-06, + "loss": 0.3342, + "step": 32628 + }, + { + "epoch": 0.6531841953807271, + "grad_norm": 1.1211216449737549, + "learning_rate": 2.836132446212001e-06, + "loss": 0.3021, + "step": 32629 + }, + { + "epoch": 0.6532042138978555, + "grad_norm": 1.136138677597046, + "learning_rate": 2.835840198912511e-06, + "loss": 0.2728, + "step": 32630 + }, + { + "epoch": 0.6532242324149838, + "grad_norm": 1.0797655582427979, + "learning_rate": 2.8355479607106858e-06, + "loss": 0.3301, + "step": 32631 + }, + { + "epoch": 0.6532442509321122, + "grad_norm": 1.1352968215942383, + "learning_rate": 2.83525573160775e-06, + "loss": 0.2608, + "step": 32632 + }, + { + "epoch": 0.6532642694492405, + "grad_norm": 1.2132619619369507, + "learning_rate": 2.8349635116049347e-06, + "loss": 0.3196, + "step": 32633 + }, + { + "epoch": 0.6532842879663688, + "grad_norm": 1.0870987176895142, + "learning_rate": 2.8346713007034653e-06, + "loss": 0.3236, + "step": 32634 + }, + { + "epoch": 0.6533043064834972, + "grad_norm": 1.087831735610962, + "learning_rate": 2.834379098904571e-06, + "loss": 0.3295, + "step": 32635 + }, + { + "epoch": 0.6533243250006255, + "grad_norm": 1.1632379293441772, + "learning_rate": 2.834086906209482e-06, + "loss": 0.3197, + "step": 32636 + }, + { + "epoch": 0.653344343517754, + "grad_norm": 1.039597988128662, + "learning_rate": 2.8337947226194246e-06, + "loss": 0.2888, + "step": 32637 + }, + { + "epoch": 0.6533643620348822, + "grad_norm": 1.1376398801803589, + "learning_rate": 2.8335025481356294e-06, + "loss": 0.3389, + "step": 32638 + }, + { + "epoch": 0.6533843805520106, + "grad_norm": 1.087946891784668, + "learning_rate": 2.8332103827593214e-06, + "loss": 0.2886, + "step": 32639 + }, + { + "epoch": 0.6534043990691389, + "grad_norm": 1.0965110063552856, + "learning_rate": 2.832918226491732e-06, + "loss": 0.3211, + "step": 32640 + }, + { + "epoch": 0.6534244175862673, + "grad_norm": 1.168102502822876, + "learning_rate": 2.832626079334089e-06, + "loss": 0.3102, + "step": 32641 + }, + { + "epoch": 0.6534444361033956, + "grad_norm": 1.2267224788665771, + "learning_rate": 2.8323339412876195e-06, + "loss": 0.2893, + "step": 32642 + }, + { + "epoch": 0.6534644546205239, + "grad_norm": 0.9832386374473572, + "learning_rate": 2.832041812353551e-06, + "loss": 0.261, + "step": 32643 + }, + { + "epoch": 0.6534844731376523, + "grad_norm": 1.2204070091247559, + "learning_rate": 2.83174969253311e-06, + "loss": 0.3436, + "step": 32644 + }, + { + "epoch": 0.6535044916547806, + "grad_norm": 1.1805901527404785, + "learning_rate": 2.8314575818275293e-06, + "loss": 0.3093, + "step": 32645 + }, + { + "epoch": 0.653524510171909, + "grad_norm": 1.0437663793563843, + "learning_rate": 2.831165480238034e-06, + "loss": 0.2986, + "step": 32646 + }, + { + "epoch": 0.6535445286890373, + "grad_norm": 0.9833207130432129, + "learning_rate": 2.8308733877658525e-06, + "loss": 0.3028, + "step": 32647 + }, + { + "epoch": 0.6535645472061657, + "grad_norm": 1.3404144048690796, + "learning_rate": 2.83058130441221e-06, + "loss": 0.2733, + "step": 32648 + }, + { + "epoch": 0.653584565723294, + "grad_norm": 1.9140406847000122, + "learning_rate": 2.8302892301783393e-06, + "loss": 0.7599, + "step": 32649 + }, + { + "epoch": 0.6536045842404223, + "grad_norm": 1.1439988613128662, + "learning_rate": 2.8299971650654636e-06, + "loss": 0.3266, + "step": 32650 + }, + { + "epoch": 0.6536246027575507, + "grad_norm": 1.1039358377456665, + "learning_rate": 2.8297051090748146e-06, + "loss": 0.3213, + "step": 32651 + }, + { + "epoch": 0.653644621274679, + "grad_norm": 1.0896415710449219, + "learning_rate": 2.829413062207619e-06, + "loss": 0.273, + "step": 32652 + }, + { + "epoch": 0.6536646397918074, + "grad_norm": 1.1092901229858398, + "learning_rate": 2.829121024465101e-06, + "loss": 0.3147, + "step": 32653 + }, + { + "epoch": 0.6536846583089357, + "grad_norm": 1.1905559301376343, + "learning_rate": 2.8288289958484926e-06, + "loss": 0.2877, + "step": 32654 + }, + { + "epoch": 0.6537046768260641, + "grad_norm": 1.3172646760940552, + "learning_rate": 2.828536976359021e-06, + "loss": 0.2941, + "step": 32655 + }, + { + "epoch": 0.6537246953431924, + "grad_norm": 1.0471917390823364, + "learning_rate": 2.8282449659979118e-06, + "loss": 0.2939, + "step": 32656 + }, + { + "epoch": 0.6537447138603208, + "grad_norm": 1.9250197410583496, + "learning_rate": 2.8279529647663915e-06, + "loss": 0.7801, + "step": 32657 + }, + { + "epoch": 0.6537647323774491, + "grad_norm": 1.1893675327301025, + "learning_rate": 2.8276609726656924e-06, + "loss": 0.2798, + "step": 32658 + }, + { + "epoch": 0.6537847508945774, + "grad_norm": 1.0799793004989624, + "learning_rate": 2.827368989697038e-06, + "loss": 0.315, + "step": 32659 + }, + { + "epoch": 0.6538047694117058, + "grad_norm": 1.8803735971450806, + "learning_rate": 2.827077015861658e-06, + "loss": 0.7122, + "step": 32660 + }, + { + "epoch": 0.6538247879288341, + "grad_norm": 1.1103824377059937, + "learning_rate": 2.826785051160775e-06, + "loss": 0.2977, + "step": 32661 + }, + { + "epoch": 0.6538448064459625, + "grad_norm": 1.1055538654327393, + "learning_rate": 2.8264930955956234e-06, + "loss": 0.2544, + "step": 32662 + }, + { + "epoch": 0.6538648249630908, + "grad_norm": 1.051273226737976, + "learning_rate": 2.8262011491674245e-06, + "loss": 0.305, + "step": 32663 + }, + { + "epoch": 0.6538848434802192, + "grad_norm": 1.1778124570846558, + "learning_rate": 2.82590921187741e-06, + "loss": 0.3122, + "step": 32664 + }, + { + "epoch": 0.6539048619973475, + "grad_norm": 1.1595479249954224, + "learning_rate": 2.8256172837268057e-06, + "loss": 0.3087, + "step": 32665 + }, + { + "epoch": 0.6539248805144758, + "grad_norm": 1.1041914224624634, + "learning_rate": 2.825325364716836e-06, + "loss": 0.2707, + "step": 32666 + }, + { + "epoch": 0.6539448990316042, + "grad_norm": 1.1239001750946045, + "learning_rate": 2.825033454848732e-06, + "loss": 0.2736, + "step": 32667 + }, + { + "epoch": 0.6539649175487325, + "grad_norm": 1.214015007019043, + "learning_rate": 2.8247415541237206e-06, + "loss": 0.3266, + "step": 32668 + }, + { + "epoch": 0.653984936065861, + "grad_norm": 1.0713552236557007, + "learning_rate": 2.8244496625430273e-06, + "loss": 0.2874, + "step": 32669 + }, + { + "epoch": 0.6540049545829892, + "grad_norm": 1.1941876411437988, + "learning_rate": 2.8241577801078766e-06, + "loss": 0.3384, + "step": 32670 + }, + { + "epoch": 0.6540249731001176, + "grad_norm": 1.114829659461975, + "learning_rate": 2.8238659068195005e-06, + "loss": 0.3145, + "step": 32671 + }, + { + "epoch": 0.6540449916172459, + "grad_norm": 1.2024556398391724, + "learning_rate": 2.8235740426791248e-06, + "loss": 0.2789, + "step": 32672 + }, + { + "epoch": 0.6540650101343743, + "grad_norm": 1.1475584506988525, + "learning_rate": 2.8232821876879746e-06, + "loss": 0.3267, + "step": 32673 + }, + { + "epoch": 0.6540850286515026, + "grad_norm": 1.9496325254440308, + "learning_rate": 2.822990341847276e-06, + "loss": 0.8227, + "step": 32674 + }, + { + "epoch": 0.6541050471686309, + "grad_norm": 1.1069402694702148, + "learning_rate": 2.822698505158259e-06, + "loss": 0.3034, + "step": 32675 + }, + { + "epoch": 0.6541250656857593, + "grad_norm": 2.121490716934204, + "learning_rate": 2.8224066776221477e-06, + "loss": 0.8475, + "step": 32676 + }, + { + "epoch": 0.6541450842028876, + "grad_norm": 1.2326793670654297, + "learning_rate": 2.8221148592401714e-06, + "loss": 0.2803, + "step": 32677 + }, + { + "epoch": 0.654165102720016, + "grad_norm": 1.1360087394714355, + "learning_rate": 2.8218230500135557e-06, + "loss": 0.3176, + "step": 32678 + }, + { + "epoch": 0.6541851212371443, + "grad_norm": 1.3652634620666504, + "learning_rate": 2.8215312499435254e-06, + "loss": 0.3182, + "step": 32679 + }, + { + "epoch": 0.6542051397542727, + "grad_norm": 1.0785140991210938, + "learning_rate": 2.821239459031311e-06, + "loss": 0.2904, + "step": 32680 + }, + { + "epoch": 0.654225158271401, + "grad_norm": 1.3541172742843628, + "learning_rate": 2.8209476772781364e-06, + "loss": 0.2747, + "step": 32681 + }, + { + "epoch": 0.6542451767885293, + "grad_norm": 1.0322016477584839, + "learning_rate": 2.8206559046852294e-06, + "loss": 0.2976, + "step": 32682 + }, + { + "epoch": 0.6542651953056577, + "grad_norm": 1.2055139541625977, + "learning_rate": 2.8203641412538164e-06, + "loss": 0.3732, + "step": 32683 + }, + { + "epoch": 0.654285213822786, + "grad_norm": 2.059424638748169, + "learning_rate": 2.8200723869851204e-06, + "loss": 0.7729, + "step": 32684 + }, + { + "epoch": 0.6543052323399144, + "grad_norm": 1.09842050075531, + "learning_rate": 2.8197806418803735e-06, + "loss": 0.297, + "step": 32685 + }, + { + "epoch": 0.6543252508570427, + "grad_norm": 1.0015865564346313, + "learning_rate": 2.8194889059407992e-06, + "loss": 0.2839, + "step": 32686 + }, + { + "epoch": 0.6543452693741711, + "grad_norm": 1.1945011615753174, + "learning_rate": 2.8191971791676232e-06, + "loss": 0.3351, + "step": 32687 + }, + { + "epoch": 0.6543652878912994, + "grad_norm": 2.044569253921509, + "learning_rate": 2.818905461562074e-06, + "loss": 0.7419, + "step": 32688 + }, + { + "epoch": 0.6543853064084278, + "grad_norm": 1.1294996738433838, + "learning_rate": 2.8186137531253755e-06, + "loss": 0.2884, + "step": 32689 + }, + { + "epoch": 0.6544053249255561, + "grad_norm": 1.1134995222091675, + "learning_rate": 2.8183220538587576e-06, + "loss": 0.2601, + "step": 32690 + }, + { + "epoch": 0.6544253434426844, + "grad_norm": 1.1406584978103638, + "learning_rate": 2.818030363763443e-06, + "loss": 0.3124, + "step": 32691 + }, + { + "epoch": 0.6544453619598128, + "grad_norm": 1.052588701248169, + "learning_rate": 2.8177386828406582e-06, + "loss": 0.3343, + "step": 32692 + }, + { + "epoch": 0.6544653804769411, + "grad_norm": 1.1824194192886353, + "learning_rate": 2.817447011091632e-06, + "loss": 0.3406, + "step": 32693 + }, + { + "epoch": 0.6544853989940695, + "grad_norm": 1.2029587030410767, + "learning_rate": 2.8171553485175894e-06, + "loss": 0.2881, + "step": 32694 + }, + { + "epoch": 0.6545054175111978, + "grad_norm": 1.1626873016357422, + "learning_rate": 2.8168636951197554e-06, + "loss": 0.3281, + "step": 32695 + }, + { + "epoch": 0.6545254360283262, + "grad_norm": 1.1722276210784912, + "learning_rate": 2.8165720508993566e-06, + "loss": 0.2607, + "step": 32696 + }, + { + "epoch": 0.6545454545454545, + "grad_norm": 2.0197315216064453, + "learning_rate": 2.8162804158576173e-06, + "loss": 0.7676, + "step": 32697 + }, + { + "epoch": 0.6545654730625828, + "grad_norm": 1.215482473373413, + "learning_rate": 2.8159887899957674e-06, + "loss": 0.3183, + "step": 32698 + }, + { + "epoch": 0.6545854915797112, + "grad_norm": 1.120548129081726, + "learning_rate": 2.81569717331503e-06, + "loss": 0.3029, + "step": 32699 + }, + { + "epoch": 0.6546055100968395, + "grad_norm": 1.155553936958313, + "learning_rate": 2.8154055658166303e-06, + "loss": 0.3065, + "step": 32700 + }, + { + "epoch": 0.6546255286139679, + "grad_norm": 1.8641250133514404, + "learning_rate": 2.815113967501797e-06, + "loss": 0.8495, + "step": 32701 + }, + { + "epoch": 0.6546455471310962, + "grad_norm": 1.1351728439331055, + "learning_rate": 2.814822378371752e-06, + "loss": 0.2737, + "step": 32702 + }, + { + "epoch": 0.6546655656482246, + "grad_norm": 1.150099754333496, + "learning_rate": 2.814530798427726e-06, + "loss": 0.3379, + "step": 32703 + }, + { + "epoch": 0.6546855841653529, + "grad_norm": 1.1345341205596924, + "learning_rate": 2.814239227670942e-06, + "loss": 0.3279, + "step": 32704 + }, + { + "epoch": 0.6547056026824813, + "grad_norm": 1.8810120820999146, + "learning_rate": 2.8139476661026267e-06, + "loss": 0.765, + "step": 32705 + }, + { + "epoch": 0.6547256211996096, + "grad_norm": 1.0998939275741577, + "learning_rate": 2.813656113724002e-06, + "loss": 0.3209, + "step": 32706 + }, + { + "epoch": 0.6547456397167379, + "grad_norm": 1.9038028717041016, + "learning_rate": 2.813364570536299e-06, + "loss": 0.7355, + "step": 32707 + }, + { + "epoch": 0.6547656582338663, + "grad_norm": 1.2295116186141968, + "learning_rate": 2.8130730365407406e-06, + "loss": 0.2783, + "step": 32708 + }, + { + "epoch": 0.6547856767509946, + "grad_norm": 1.2643604278564453, + "learning_rate": 2.8127815117385525e-06, + "loss": 0.303, + "step": 32709 + }, + { + "epoch": 0.654805695268123, + "grad_norm": 1.2263463735580444, + "learning_rate": 2.8124899961309584e-06, + "loss": 0.2903, + "step": 32710 + }, + { + "epoch": 0.6548257137852513, + "grad_norm": 1.090002179145813, + "learning_rate": 2.8121984897191867e-06, + "loss": 0.2682, + "step": 32711 + }, + { + "epoch": 0.6548457323023797, + "grad_norm": 1.3370641469955444, + "learning_rate": 2.811906992504463e-06, + "loss": 0.3071, + "step": 32712 + }, + { + "epoch": 0.654865750819508, + "grad_norm": 1.9756895303726196, + "learning_rate": 2.811615504488009e-06, + "loss": 0.8186, + "step": 32713 + }, + { + "epoch": 0.6548857693366363, + "grad_norm": 1.0869014263153076, + "learning_rate": 2.8113240256710537e-06, + "loss": 0.3131, + "step": 32714 + }, + { + "epoch": 0.6549057878537647, + "grad_norm": 1.2458306550979614, + "learning_rate": 2.81103255605482e-06, + "loss": 0.2829, + "step": 32715 + }, + { + "epoch": 0.654925806370893, + "grad_norm": 1.056974172592163, + "learning_rate": 2.8107410956405366e-06, + "loss": 0.2705, + "step": 32716 + }, + { + "epoch": 0.6549458248880214, + "grad_norm": 1.1207401752471924, + "learning_rate": 2.810449644429426e-06, + "loss": 0.3005, + "step": 32717 + }, + { + "epoch": 0.6549658434051497, + "grad_norm": 1.022972583770752, + "learning_rate": 2.8101582024227135e-06, + "loss": 0.2735, + "step": 32718 + }, + { + "epoch": 0.6549858619222781, + "grad_norm": 1.1470556259155273, + "learning_rate": 2.8098667696216235e-06, + "loss": 0.2538, + "step": 32719 + }, + { + "epoch": 0.6550058804394064, + "grad_norm": 1.2497308254241943, + "learning_rate": 2.8095753460273845e-06, + "loss": 0.3282, + "step": 32720 + }, + { + "epoch": 0.6550258989565348, + "grad_norm": 1.0596460103988647, + "learning_rate": 2.8092839316412185e-06, + "loss": 0.295, + "step": 32721 + }, + { + "epoch": 0.6550459174736631, + "grad_norm": 1.1696405410766602, + "learning_rate": 2.8089925264643514e-06, + "loss": 0.2848, + "step": 32722 + }, + { + "epoch": 0.6550659359907914, + "grad_norm": 1.195739507675171, + "learning_rate": 2.808701130498007e-06, + "loss": 0.2983, + "step": 32723 + }, + { + "epoch": 0.6550859545079198, + "grad_norm": 0.9874542951583862, + "learning_rate": 2.808409743743413e-06, + "loss": 0.2922, + "step": 32724 + }, + { + "epoch": 0.6551059730250481, + "grad_norm": 1.1924105882644653, + "learning_rate": 2.808118366201793e-06, + "loss": 0.3275, + "step": 32725 + }, + { + "epoch": 0.6551259915421765, + "grad_norm": 1.1542471647262573, + "learning_rate": 2.807826997874369e-06, + "loss": 0.3049, + "step": 32726 + }, + { + "epoch": 0.6551460100593048, + "grad_norm": 1.0907326936721802, + "learning_rate": 2.807535638762371e-06, + "loss": 0.2879, + "step": 32727 + }, + { + "epoch": 0.6551660285764332, + "grad_norm": 1.0857141017913818, + "learning_rate": 2.8072442888670192e-06, + "loss": 0.2827, + "step": 32728 + }, + { + "epoch": 0.6551860470935615, + "grad_norm": 1.0596665143966675, + "learning_rate": 2.806952948189543e-06, + "loss": 0.2641, + "step": 32729 + }, + { + "epoch": 0.6552060656106898, + "grad_norm": 0.9700307250022888, + "learning_rate": 2.8066616167311646e-06, + "loss": 0.2651, + "step": 32730 + }, + { + "epoch": 0.6552260841278182, + "grad_norm": 1.1224948167800903, + "learning_rate": 2.8063702944931082e-06, + "loss": 0.3046, + "step": 32731 + }, + { + "epoch": 0.6552461026449465, + "grad_norm": 1.0695668458938599, + "learning_rate": 2.8060789814765975e-06, + "loss": 0.2934, + "step": 32732 + }, + { + "epoch": 0.6552661211620749, + "grad_norm": 1.0502111911773682, + "learning_rate": 2.8057876776828606e-06, + "loss": 0.2956, + "step": 32733 + }, + { + "epoch": 0.6552861396792032, + "grad_norm": 1.038703203201294, + "learning_rate": 2.8054963831131197e-06, + "loss": 0.2606, + "step": 32734 + }, + { + "epoch": 0.6553061581963316, + "grad_norm": 1.1523869037628174, + "learning_rate": 2.8052050977686003e-06, + "loss": 0.2956, + "step": 32735 + }, + { + "epoch": 0.6553261767134599, + "grad_norm": 0.9674322605133057, + "learning_rate": 2.8049138216505256e-06, + "loss": 0.2908, + "step": 32736 + }, + { + "epoch": 0.6553461952305883, + "grad_norm": 1.0512657165527344, + "learning_rate": 2.804622554760119e-06, + "loss": 0.3277, + "step": 32737 + }, + { + "epoch": 0.6553662137477166, + "grad_norm": 1.0811036825180054, + "learning_rate": 2.8043312970986094e-06, + "loss": 0.2724, + "step": 32738 + }, + { + "epoch": 0.6553862322648449, + "grad_norm": 1.1099997758865356, + "learning_rate": 2.804040048667216e-06, + "loss": 0.2877, + "step": 32739 + }, + { + "epoch": 0.6554062507819733, + "grad_norm": 1.0389318466186523, + "learning_rate": 2.8037488094671678e-06, + "loss": 0.271, + "step": 32740 + }, + { + "epoch": 0.6554262692991016, + "grad_norm": 1.0874348878860474, + "learning_rate": 2.803457579499685e-06, + "loss": 0.3082, + "step": 32741 + }, + { + "epoch": 0.65544628781623, + "grad_norm": 1.138938546180725, + "learning_rate": 2.803166358765996e-06, + "loss": 0.3118, + "step": 32742 + }, + { + "epoch": 0.6554663063333583, + "grad_norm": 1.1529057025909424, + "learning_rate": 2.802875147267322e-06, + "loss": 0.3271, + "step": 32743 + }, + { + "epoch": 0.6554863248504867, + "grad_norm": 1.3037216663360596, + "learning_rate": 2.802583945004889e-06, + "loss": 0.2867, + "step": 32744 + }, + { + "epoch": 0.655506343367615, + "grad_norm": 1.295215129852295, + "learning_rate": 2.8022927519799196e-06, + "loss": 0.3026, + "step": 32745 + }, + { + "epoch": 0.6555263618847433, + "grad_norm": 1.1776371002197266, + "learning_rate": 2.8020015681936374e-06, + "loss": 0.2949, + "step": 32746 + }, + { + "epoch": 0.6555463804018717, + "grad_norm": 1.0094399452209473, + "learning_rate": 2.801710393647269e-06, + "loss": 0.259, + "step": 32747 + }, + { + "epoch": 0.655566398919, + "grad_norm": 1.933353066444397, + "learning_rate": 2.8014192283420364e-06, + "loss": 0.7213, + "step": 32748 + }, + { + "epoch": 0.6555864174361284, + "grad_norm": 1.1783971786499023, + "learning_rate": 2.801128072279165e-06, + "loss": 0.3076, + "step": 32749 + }, + { + "epoch": 0.6556064359532567, + "grad_norm": 1.08092200756073, + "learning_rate": 2.8008369254598753e-06, + "loss": 0.2999, + "step": 32750 + }, + { + "epoch": 0.6556264544703851, + "grad_norm": 1.086256742477417, + "learning_rate": 2.800545787885397e-06, + "loss": 0.2785, + "step": 32751 + }, + { + "epoch": 0.6556464729875134, + "grad_norm": 1.8622313737869263, + "learning_rate": 2.800254659556948e-06, + "loss": 0.8612, + "step": 32752 + }, + { + "epoch": 0.6556664915046418, + "grad_norm": 1.2763947248458862, + "learning_rate": 2.7999635404757574e-06, + "loss": 0.2653, + "step": 32753 + }, + { + "epoch": 0.6556865100217701, + "grad_norm": 1.0683866739273071, + "learning_rate": 2.7996724306430443e-06, + "loss": 0.2539, + "step": 32754 + }, + { + "epoch": 0.6557065285388984, + "grad_norm": 1.1095739603042603, + "learning_rate": 2.7993813300600373e-06, + "loss": 0.2974, + "step": 32755 + }, + { + "epoch": 0.6557265470560268, + "grad_norm": 1.0749585628509521, + "learning_rate": 2.7990902387279574e-06, + "loss": 0.2945, + "step": 32756 + }, + { + "epoch": 0.6557465655731551, + "grad_norm": 1.0691401958465576, + "learning_rate": 2.798799156648029e-06, + "loss": 0.3262, + "step": 32757 + }, + { + "epoch": 0.6557665840902835, + "grad_norm": 1.241319179534912, + "learning_rate": 2.798508083821474e-06, + "loss": 0.3433, + "step": 32758 + }, + { + "epoch": 0.6557866026074118, + "grad_norm": 1.1005864143371582, + "learning_rate": 2.798217020249516e-06, + "loss": 0.3141, + "step": 32759 + }, + { + "epoch": 0.6558066211245402, + "grad_norm": 1.1417460441589355, + "learning_rate": 2.797925965933382e-06, + "loss": 0.2868, + "step": 32760 + }, + { + "epoch": 0.6558266396416685, + "grad_norm": 1.1832431554794312, + "learning_rate": 2.797634920874293e-06, + "loss": 0.3201, + "step": 32761 + }, + { + "epoch": 0.6558466581587968, + "grad_norm": 1.1894532442092896, + "learning_rate": 2.797343885073473e-06, + "loss": 0.28, + "step": 32762 + }, + { + "epoch": 0.6558666766759252, + "grad_norm": 1.0370445251464844, + "learning_rate": 2.797052858532143e-06, + "loss": 0.2897, + "step": 32763 + }, + { + "epoch": 0.6558866951930535, + "grad_norm": 1.095040202140808, + "learning_rate": 2.7967618412515307e-06, + "loss": 0.278, + "step": 32764 + }, + { + "epoch": 0.6559067137101819, + "grad_norm": 1.2049648761749268, + "learning_rate": 2.796470833232856e-06, + "loss": 0.3383, + "step": 32765 + }, + { + "epoch": 0.6559267322273102, + "grad_norm": 1.1612988710403442, + "learning_rate": 2.7961798344773453e-06, + "loss": 0.3217, + "step": 32766 + }, + { + "epoch": 0.6559467507444386, + "grad_norm": 1.2314430475234985, + "learning_rate": 2.7958888449862208e-06, + "loss": 0.3438, + "step": 32767 + }, + { + "epoch": 0.6559667692615669, + "grad_norm": 1.1318389177322388, + "learning_rate": 2.7955978647607028e-06, + "loss": 0.2659, + "step": 32768 + }, + { + "epoch": 0.6559867877786953, + "grad_norm": 1.1264315843582153, + "learning_rate": 2.7953068938020187e-06, + "loss": 0.3276, + "step": 32769 + }, + { + "epoch": 0.6560068062958236, + "grad_norm": 1.922485113143921, + "learning_rate": 2.7950159321113905e-06, + "loss": 0.7881, + "step": 32770 + }, + { + "epoch": 0.6560268248129519, + "grad_norm": 1.0929919481277466, + "learning_rate": 2.794724979690041e-06, + "loss": 0.2856, + "step": 32771 + }, + { + "epoch": 0.6560468433300803, + "grad_norm": 1.2920958995819092, + "learning_rate": 2.7944340365391904e-06, + "loss": 0.3039, + "step": 32772 + }, + { + "epoch": 0.6560668618472086, + "grad_norm": 1.8930433988571167, + "learning_rate": 2.794143102660067e-06, + "loss": 0.8295, + "step": 32773 + }, + { + "epoch": 0.656086880364337, + "grad_norm": 1.1541448831558228, + "learning_rate": 2.7938521780538908e-06, + "loss": 0.2864, + "step": 32774 + }, + { + "epoch": 0.6561068988814653, + "grad_norm": 1.142214059829712, + "learning_rate": 2.7935612627218856e-06, + "loss": 0.2718, + "step": 32775 + }, + { + "epoch": 0.6561269173985937, + "grad_norm": 1.8554173707962036, + "learning_rate": 2.793270356665272e-06, + "loss": 0.7443, + "step": 32776 + }, + { + "epoch": 0.656146935915722, + "grad_norm": 1.1242369413375854, + "learning_rate": 2.792979459885277e-06, + "loss": 0.3256, + "step": 32777 + }, + { + "epoch": 0.6561669544328503, + "grad_norm": 1.4275057315826416, + "learning_rate": 2.7926885723831194e-06, + "loss": 0.2726, + "step": 32778 + }, + { + "epoch": 0.6561869729499787, + "grad_norm": 1.2738815546035767, + "learning_rate": 2.792397694160026e-06, + "loss": 0.3242, + "step": 32779 + }, + { + "epoch": 0.656206991467107, + "grad_norm": 1.2485902309417725, + "learning_rate": 2.7921068252172174e-06, + "loss": 0.2975, + "step": 32780 + }, + { + "epoch": 0.6562270099842354, + "grad_norm": 1.4364722967147827, + "learning_rate": 2.7918159655559152e-06, + "loss": 0.2823, + "step": 32781 + }, + { + "epoch": 0.6562470285013637, + "grad_norm": 1.0698392391204834, + "learning_rate": 2.791525115177345e-06, + "loss": 0.2666, + "step": 32782 + }, + { + "epoch": 0.6562670470184921, + "grad_norm": 1.1181719303131104, + "learning_rate": 2.7912342740827277e-06, + "loss": 0.2937, + "step": 32783 + }, + { + "epoch": 0.6562870655356204, + "grad_norm": 1.1416730880737305, + "learning_rate": 2.7909434422732864e-06, + "loss": 0.2957, + "step": 32784 + }, + { + "epoch": 0.6563070840527488, + "grad_norm": 0.9906237125396729, + "learning_rate": 2.7906526197502416e-06, + "loss": 0.2715, + "step": 32785 + }, + { + "epoch": 0.6563271025698771, + "grad_norm": 1.1244542598724365, + "learning_rate": 2.7903618065148196e-06, + "loss": 0.3075, + "step": 32786 + }, + { + "epoch": 0.6563471210870054, + "grad_norm": 1.0818997621536255, + "learning_rate": 2.7900710025682408e-06, + "loss": 0.3119, + "step": 32787 + }, + { + "epoch": 0.6563671396041338, + "grad_norm": 1.1105753183364868, + "learning_rate": 2.789780207911728e-06, + "loss": 0.2714, + "step": 32788 + }, + { + "epoch": 0.6563871581212621, + "grad_norm": 1.1741546392440796, + "learning_rate": 2.789489422546502e-06, + "loss": 0.2781, + "step": 32789 + }, + { + "epoch": 0.6564071766383905, + "grad_norm": 1.1079363822937012, + "learning_rate": 2.7891986464737886e-06, + "loss": 0.2499, + "step": 32790 + }, + { + "epoch": 0.6564271951555188, + "grad_norm": 2.0485172271728516, + "learning_rate": 2.788907879694806e-06, + "loss": 0.7743, + "step": 32791 + }, + { + "epoch": 0.6564472136726472, + "grad_norm": 1.0722935199737549, + "learning_rate": 2.788617122210781e-06, + "loss": 0.3224, + "step": 32792 + }, + { + "epoch": 0.6564672321897755, + "grad_norm": 0.903317391872406, + "learning_rate": 2.7883263740229336e-06, + "loss": 0.2459, + "step": 32793 + }, + { + "epoch": 0.6564872507069038, + "grad_norm": 1.1539740562438965, + "learning_rate": 2.7880356351324844e-06, + "loss": 0.2659, + "step": 32794 + }, + { + "epoch": 0.6565072692240322, + "grad_norm": 1.1471890211105347, + "learning_rate": 2.7877449055406596e-06, + "loss": 0.2866, + "step": 32795 + }, + { + "epoch": 0.6565272877411605, + "grad_norm": 1.041868805885315, + "learning_rate": 2.7874541852486793e-06, + "loss": 0.3123, + "step": 32796 + }, + { + "epoch": 0.6565473062582889, + "grad_norm": 1.1333212852478027, + "learning_rate": 2.787163474257765e-06, + "loss": 0.2465, + "step": 32797 + }, + { + "epoch": 0.6565673247754172, + "grad_norm": 1.1054669618606567, + "learning_rate": 2.786872772569139e-06, + "loss": 0.2793, + "step": 32798 + }, + { + "epoch": 0.6565873432925456, + "grad_norm": 1.7688896656036377, + "learning_rate": 2.7865820801840216e-06, + "loss": 0.7527, + "step": 32799 + }, + { + "epoch": 0.6566073618096739, + "grad_norm": 1.1308032274246216, + "learning_rate": 2.786291397103639e-06, + "loss": 0.3275, + "step": 32800 + }, + { + "epoch": 0.6566273803268023, + "grad_norm": 1.0872597694396973, + "learning_rate": 2.7860007233292115e-06, + "loss": 0.3294, + "step": 32801 + }, + { + "epoch": 0.6566473988439306, + "grad_norm": 1.0550810098648071, + "learning_rate": 2.785710058861958e-06, + "loss": 0.2892, + "step": 32802 + }, + { + "epoch": 0.6566674173610589, + "grad_norm": 1.0906325578689575, + "learning_rate": 2.7854194037031045e-06, + "loss": 0.338, + "step": 32803 + }, + { + "epoch": 0.6566874358781873, + "grad_norm": 1.3248037099838257, + "learning_rate": 2.78512875785387e-06, + "loss": 0.2715, + "step": 32804 + }, + { + "epoch": 0.6567074543953156, + "grad_norm": 1.104536533355713, + "learning_rate": 2.7848381213154797e-06, + "loss": 0.3113, + "step": 32805 + }, + { + "epoch": 0.656727472912444, + "grad_norm": 1.0901700258255005, + "learning_rate": 2.7845474940891527e-06, + "loss": 0.2559, + "step": 32806 + }, + { + "epoch": 0.6567474914295723, + "grad_norm": 1.099942684173584, + "learning_rate": 2.784256876176109e-06, + "loss": 0.277, + "step": 32807 + }, + { + "epoch": 0.6567675099467007, + "grad_norm": 1.0853767395019531, + "learning_rate": 2.783966267577575e-06, + "loss": 0.3119, + "step": 32808 + }, + { + "epoch": 0.656787528463829, + "grad_norm": 1.0703409910202026, + "learning_rate": 2.7836756682947695e-06, + "loss": 0.3337, + "step": 32809 + }, + { + "epoch": 0.6568075469809573, + "grad_norm": 1.1552215814590454, + "learning_rate": 2.783385078328915e-06, + "loss": 0.3307, + "step": 32810 + }, + { + "epoch": 0.6568275654980857, + "grad_norm": 1.124911904335022, + "learning_rate": 2.7830944976812325e-06, + "loss": 0.2529, + "step": 32811 + }, + { + "epoch": 0.656847584015214, + "grad_norm": 1.151428461074829, + "learning_rate": 2.7828039263529416e-06, + "loss": 0.3398, + "step": 32812 + }, + { + "epoch": 0.6568676025323424, + "grad_norm": 1.1623934507369995, + "learning_rate": 2.7825133643452674e-06, + "loss": 0.3268, + "step": 32813 + }, + { + "epoch": 0.6568876210494707, + "grad_norm": 1.0326675176620483, + "learning_rate": 2.782222811659431e-06, + "loss": 0.3224, + "step": 32814 + }, + { + "epoch": 0.6569076395665991, + "grad_norm": 1.3971766233444214, + "learning_rate": 2.7819322682966493e-06, + "loss": 0.3249, + "step": 32815 + }, + { + "epoch": 0.6569276580837274, + "grad_norm": 1.077967882156372, + "learning_rate": 2.7816417342581496e-06, + "loss": 0.3556, + "step": 32816 + }, + { + "epoch": 0.6569476766008558, + "grad_norm": 1.0668251514434814, + "learning_rate": 2.781351209545149e-06, + "loss": 0.3012, + "step": 32817 + }, + { + "epoch": 0.6569676951179841, + "grad_norm": 1.0069659948349, + "learning_rate": 2.781060694158872e-06, + "loss": 0.2588, + "step": 32818 + }, + { + "epoch": 0.6569877136351124, + "grad_norm": 1.077077865600586, + "learning_rate": 2.780770188100538e-06, + "loss": 0.3019, + "step": 32819 + }, + { + "epoch": 0.6570077321522408, + "grad_norm": 1.1287333965301514, + "learning_rate": 2.7804796913713693e-06, + "loss": 0.2395, + "step": 32820 + }, + { + "epoch": 0.6570277506693691, + "grad_norm": 1.0967992544174194, + "learning_rate": 2.7801892039725844e-06, + "loss": 0.2965, + "step": 32821 + }, + { + "epoch": 0.6570477691864975, + "grad_norm": 1.9583412408828735, + "learning_rate": 2.7798987259054077e-06, + "loss": 0.755, + "step": 32822 + }, + { + "epoch": 0.6570677877036258, + "grad_norm": 1.3402974605560303, + "learning_rate": 2.7796082571710594e-06, + "loss": 0.312, + "step": 32823 + }, + { + "epoch": 0.6570878062207542, + "grad_norm": 1.7558382749557495, + "learning_rate": 2.7793177977707596e-06, + "loss": 0.8156, + "step": 32824 + }, + { + "epoch": 0.6571078247378825, + "grad_norm": 1.232973337173462, + "learning_rate": 2.779027347705728e-06, + "loss": 0.3386, + "step": 32825 + }, + { + "epoch": 0.6571278432550108, + "grad_norm": 1.0861092805862427, + "learning_rate": 2.77873690697719e-06, + "loss": 0.2829, + "step": 32826 + }, + { + "epoch": 0.6571478617721392, + "grad_norm": 1.3102136850357056, + "learning_rate": 2.7784464755863637e-06, + "loss": 0.282, + "step": 32827 + }, + { + "epoch": 0.6571678802892675, + "grad_norm": 1.214071273803711, + "learning_rate": 2.7781560535344685e-06, + "loss": 0.3213, + "step": 32828 + }, + { + "epoch": 0.6571878988063959, + "grad_norm": 1.0571236610412598, + "learning_rate": 2.777865640822729e-06, + "loss": 0.2882, + "step": 32829 + }, + { + "epoch": 0.6572079173235242, + "grad_norm": 1.1711052656173706, + "learning_rate": 2.7775752374523623e-06, + "loss": 0.3113, + "step": 32830 + }, + { + "epoch": 0.6572279358406526, + "grad_norm": 1.0557711124420166, + "learning_rate": 2.777284843424593e-06, + "loss": 0.3208, + "step": 32831 + }, + { + "epoch": 0.6572479543577809, + "grad_norm": 1.8299603462219238, + "learning_rate": 2.7769944587406395e-06, + "loss": 0.7589, + "step": 32832 + }, + { + "epoch": 0.6572679728749093, + "grad_norm": 1.997359275817871, + "learning_rate": 2.7767040834017233e-06, + "loss": 0.8648, + "step": 32833 + }, + { + "epoch": 0.6572879913920376, + "grad_norm": 0.9529711008071899, + "learning_rate": 2.7764137174090626e-06, + "loss": 0.2518, + "step": 32834 + }, + { + "epoch": 0.6573080099091659, + "grad_norm": 1.1355613470077515, + "learning_rate": 2.7761233607638823e-06, + "loss": 0.3162, + "step": 32835 + }, + { + "epoch": 0.6573280284262943, + "grad_norm": 1.1417639255523682, + "learning_rate": 2.7758330134674004e-06, + "loss": 0.3397, + "step": 32836 + }, + { + "epoch": 0.6573480469434226, + "grad_norm": 1.92902410030365, + "learning_rate": 2.7755426755208383e-06, + "loss": 0.8007, + "step": 32837 + }, + { + "epoch": 0.657368065460551, + "grad_norm": 0.9539008140563965, + "learning_rate": 2.775252346925413e-06, + "loss": 0.2646, + "step": 32838 + }, + { + "epoch": 0.6573880839776793, + "grad_norm": 1.1490768194198608, + "learning_rate": 2.7749620276823512e-06, + "loss": 0.327, + "step": 32839 + }, + { + "epoch": 0.6574081024948077, + "grad_norm": 1.0677999258041382, + "learning_rate": 2.77467171779287e-06, + "loss": 0.288, + "step": 32840 + }, + { + "epoch": 0.657428121011936, + "grad_norm": 1.0751197338104248, + "learning_rate": 2.774381417258188e-06, + "loss": 0.2921, + "step": 32841 + }, + { + "epoch": 0.6574481395290643, + "grad_norm": 1.0448522567749023, + "learning_rate": 2.7740911260795297e-06, + "loss": 0.3237, + "step": 32842 + }, + { + "epoch": 0.6574681580461927, + "grad_norm": 1.1486175060272217, + "learning_rate": 2.773800844258111e-06, + "loss": 0.2845, + "step": 32843 + }, + { + "epoch": 0.657488176563321, + "grad_norm": 1.0947017669677734, + "learning_rate": 2.773510571795156e-06, + "loss": 0.3224, + "step": 32844 + }, + { + "epoch": 0.6575081950804494, + "grad_norm": 1.1989706754684448, + "learning_rate": 2.773220308691884e-06, + "loss": 0.3647, + "step": 32845 + }, + { + "epoch": 0.6575282135975777, + "grad_norm": 1.1848698854446411, + "learning_rate": 2.7729300549495147e-06, + "loss": 0.3057, + "step": 32846 + }, + { + "epoch": 0.6575482321147061, + "grad_norm": 1.0662649869918823, + "learning_rate": 2.772639810569266e-06, + "loss": 0.3179, + "step": 32847 + }, + { + "epoch": 0.6575682506318344, + "grad_norm": 1.060335397720337, + "learning_rate": 2.772349575552362e-06, + "loss": 0.2619, + "step": 32848 + }, + { + "epoch": 0.6575882691489628, + "grad_norm": 1.0739727020263672, + "learning_rate": 2.7720593499000216e-06, + "loss": 0.2901, + "step": 32849 + }, + { + "epoch": 0.6576082876660911, + "grad_norm": 1.0647165775299072, + "learning_rate": 2.7717691336134632e-06, + "loss": 0.2957, + "step": 32850 + }, + { + "epoch": 0.6576283061832194, + "grad_norm": 1.2131930589675903, + "learning_rate": 2.771478926693909e-06, + "loss": 0.2867, + "step": 32851 + }, + { + "epoch": 0.6576483247003478, + "grad_norm": 1.137785792350769, + "learning_rate": 2.7711887291425755e-06, + "loss": 0.3122, + "step": 32852 + }, + { + "epoch": 0.6576683432174761, + "grad_norm": 1.061872959136963, + "learning_rate": 2.7708985409606864e-06, + "loss": 0.3147, + "step": 32853 + }, + { + "epoch": 0.6576883617346045, + "grad_norm": 1.7772408723831177, + "learning_rate": 2.7706083621494584e-06, + "loss": 0.7595, + "step": 32854 + }, + { + "epoch": 0.6577083802517328, + "grad_norm": 1.0594673156738281, + "learning_rate": 2.7703181927101155e-06, + "loss": 0.2708, + "step": 32855 + }, + { + "epoch": 0.6577283987688612, + "grad_norm": 1.0387567281723022, + "learning_rate": 2.7700280326438723e-06, + "loss": 0.3259, + "step": 32856 + }, + { + "epoch": 0.6577484172859895, + "grad_norm": 0.9629361033439636, + "learning_rate": 2.7697378819519537e-06, + "loss": 0.2582, + "step": 32857 + }, + { + "epoch": 0.6577684358031178, + "grad_norm": 0.9702091813087463, + "learning_rate": 2.7694477406355776e-06, + "loss": 0.328, + "step": 32858 + }, + { + "epoch": 0.6577884543202462, + "grad_norm": 1.1568598747253418, + "learning_rate": 2.7691576086959626e-06, + "loss": 0.3059, + "step": 32859 + }, + { + "epoch": 0.6578084728373745, + "grad_norm": 1.8824272155761719, + "learning_rate": 2.768867486134329e-06, + "loss": 0.8339, + "step": 32860 + }, + { + "epoch": 0.6578284913545029, + "grad_norm": 2.0339553356170654, + "learning_rate": 2.7685773729518945e-06, + "loss": 0.7901, + "step": 32861 + }, + { + "epoch": 0.6578485098716312, + "grad_norm": 1.1643640995025635, + "learning_rate": 2.7682872691498818e-06, + "loss": 0.324, + "step": 32862 + }, + { + "epoch": 0.6578685283887596, + "grad_norm": 1.2388962507247925, + "learning_rate": 2.76799717472951e-06, + "loss": 0.3183, + "step": 32863 + }, + { + "epoch": 0.6578885469058879, + "grad_norm": 1.1334993839263916, + "learning_rate": 2.767707089691998e-06, + "loss": 0.2878, + "step": 32864 + }, + { + "epoch": 0.6579085654230163, + "grad_norm": 1.1152070760726929, + "learning_rate": 2.7674170140385625e-06, + "loss": 0.2711, + "step": 32865 + }, + { + "epoch": 0.6579285839401446, + "grad_norm": 1.379096269607544, + "learning_rate": 2.767126947770427e-06, + "loss": 0.3097, + "step": 32866 + }, + { + "epoch": 0.6579486024572729, + "grad_norm": 1.1215497255325317, + "learning_rate": 2.766836890888808e-06, + "loss": 0.2963, + "step": 32867 + }, + { + "epoch": 0.6579686209744013, + "grad_norm": 1.1404362916946411, + "learning_rate": 2.7665468433949277e-06, + "loss": 0.3213, + "step": 32868 + }, + { + "epoch": 0.6579886394915296, + "grad_norm": 1.0002175569534302, + "learning_rate": 2.7662568052900016e-06, + "loss": 0.2319, + "step": 32869 + }, + { + "epoch": 0.658008658008658, + "grad_norm": 1.0726077556610107, + "learning_rate": 2.765966776575254e-06, + "loss": 0.2892, + "step": 32870 + }, + { + "epoch": 0.6580286765257863, + "grad_norm": 1.04000723361969, + "learning_rate": 2.7656767572519007e-06, + "loss": 0.3072, + "step": 32871 + }, + { + "epoch": 0.6580486950429147, + "grad_norm": 1.3060450553894043, + "learning_rate": 2.7653867473211616e-06, + "loss": 0.2627, + "step": 32872 + }, + { + "epoch": 0.658068713560043, + "grad_norm": 1.0408910512924194, + "learning_rate": 2.765096746784256e-06, + "loss": 0.2796, + "step": 32873 + }, + { + "epoch": 0.6580887320771713, + "grad_norm": 2.060145139694214, + "learning_rate": 2.7648067556424006e-06, + "loss": 0.7674, + "step": 32874 + }, + { + "epoch": 0.6581087505942997, + "grad_norm": 1.3354182243347168, + "learning_rate": 2.7645167738968183e-06, + "loss": 0.3186, + "step": 32875 + }, + { + "epoch": 0.658128769111428, + "grad_norm": 1.1556111574172974, + "learning_rate": 2.764226801548726e-06, + "loss": 0.2766, + "step": 32876 + }, + { + "epoch": 0.6581487876285564, + "grad_norm": 1.0504591464996338, + "learning_rate": 2.7639368385993443e-06, + "loss": 0.2978, + "step": 32877 + }, + { + "epoch": 0.6581688061456847, + "grad_norm": 0.9502865076065063, + "learning_rate": 2.7636468850498884e-06, + "loss": 0.2564, + "step": 32878 + }, + { + "epoch": 0.6581888246628131, + "grad_norm": 1.2225068807601929, + "learning_rate": 2.7633569409015816e-06, + "loss": 0.3343, + "step": 32879 + }, + { + "epoch": 0.6582088431799414, + "grad_norm": 1.1485466957092285, + "learning_rate": 2.7630670061556387e-06, + "loss": 0.3305, + "step": 32880 + }, + { + "epoch": 0.6582288616970698, + "grad_norm": 1.3363733291625977, + "learning_rate": 2.7627770808132824e-06, + "loss": 0.3294, + "step": 32881 + }, + { + "epoch": 0.6582488802141981, + "grad_norm": 1.142561674118042, + "learning_rate": 2.7624871648757307e-06, + "loss": 0.2704, + "step": 32882 + }, + { + "epoch": 0.6582688987313264, + "grad_norm": 1.9313924312591553, + "learning_rate": 2.762197258344199e-06, + "loss": 0.7532, + "step": 32883 + }, + { + "epoch": 0.6582889172484548, + "grad_norm": 1.0865092277526855, + "learning_rate": 2.7619073612199106e-06, + "loss": 0.2474, + "step": 32884 + }, + { + "epoch": 0.6583089357655831, + "grad_norm": 1.063099980354309, + "learning_rate": 2.7616174735040817e-06, + "loss": 0.2444, + "step": 32885 + }, + { + "epoch": 0.6583289542827115, + "grad_norm": 1.892098069190979, + "learning_rate": 2.7613275951979312e-06, + "loss": 0.6838, + "step": 32886 + }, + { + "epoch": 0.6583489727998398, + "grad_norm": 1.0418881177902222, + "learning_rate": 2.761037726302677e-06, + "loss": 0.2862, + "step": 32887 + }, + { + "epoch": 0.6583689913169682, + "grad_norm": 2.128711700439453, + "learning_rate": 2.7607478668195386e-06, + "loss": 0.877, + "step": 32888 + }, + { + "epoch": 0.6583890098340965, + "grad_norm": 1.3916138410568237, + "learning_rate": 2.7604580167497354e-06, + "loss": 0.2934, + "step": 32889 + }, + { + "epoch": 0.6584090283512248, + "grad_norm": 1.0224026441574097, + "learning_rate": 2.7601681760944844e-06, + "loss": 0.2839, + "step": 32890 + }, + { + "epoch": 0.6584290468683532, + "grad_norm": 1.042915940284729, + "learning_rate": 2.7598783448550038e-06, + "loss": 0.2488, + "step": 32891 + }, + { + "epoch": 0.6584490653854815, + "grad_norm": 1.0731679201126099, + "learning_rate": 2.7595885230325114e-06, + "loss": 0.286, + "step": 32892 + }, + { + "epoch": 0.6584690839026099, + "grad_norm": 1.38486647605896, + "learning_rate": 2.7592987106282263e-06, + "loss": 0.3102, + "step": 32893 + }, + { + "epoch": 0.6584891024197382, + "grad_norm": 1.1331932544708252, + "learning_rate": 2.759008907643369e-06, + "loss": 0.2812, + "step": 32894 + }, + { + "epoch": 0.6585091209368666, + "grad_norm": 1.0698670148849487, + "learning_rate": 2.7587191140791565e-06, + "loss": 0.2924, + "step": 32895 + }, + { + "epoch": 0.6585291394539949, + "grad_norm": 1.0826655626296997, + "learning_rate": 2.758429329936804e-06, + "loss": 0.2669, + "step": 32896 + }, + { + "epoch": 0.6585491579711233, + "grad_norm": 1.143900990486145, + "learning_rate": 2.758139555217535e-06, + "loss": 0.3111, + "step": 32897 + }, + { + "epoch": 0.6585691764882516, + "grad_norm": 1.1577792167663574, + "learning_rate": 2.7578497899225643e-06, + "loss": 0.3497, + "step": 32898 + }, + { + "epoch": 0.6585891950053799, + "grad_norm": 1.83438241481781, + "learning_rate": 2.7575600340531104e-06, + "loss": 0.7472, + "step": 32899 + }, + { + "epoch": 0.6586092135225083, + "grad_norm": 1.0603398084640503, + "learning_rate": 2.7572702876103895e-06, + "loss": 0.285, + "step": 32900 + }, + { + "epoch": 0.6586292320396366, + "grad_norm": 1.7862610816955566, + "learning_rate": 2.7569805505956242e-06, + "loss": 0.8076, + "step": 32901 + }, + { + "epoch": 0.658649250556765, + "grad_norm": 1.0945448875427246, + "learning_rate": 2.7566908230100298e-06, + "loss": 0.2914, + "step": 32902 + }, + { + "epoch": 0.6586692690738933, + "grad_norm": 1.0178531408309937, + "learning_rate": 2.7564011048548246e-06, + "loss": 0.2848, + "step": 32903 + }, + { + "epoch": 0.6586892875910217, + "grad_norm": 1.1360540390014648, + "learning_rate": 2.7561113961312257e-06, + "loss": 0.3006, + "step": 32904 + }, + { + "epoch": 0.65870930610815, + "grad_norm": 0.9827717542648315, + "learning_rate": 2.7558216968404493e-06, + "loss": 0.2687, + "step": 32905 + }, + { + "epoch": 0.6587293246252783, + "grad_norm": 1.2441576719284058, + "learning_rate": 2.7555320069837165e-06, + "loss": 0.3111, + "step": 32906 + }, + { + "epoch": 0.6587493431424067, + "grad_norm": 1.1278202533721924, + "learning_rate": 2.7552423265622464e-06, + "loss": 0.3057, + "step": 32907 + }, + { + "epoch": 0.658769361659535, + "grad_norm": 1.1194124221801758, + "learning_rate": 2.754952655577253e-06, + "loss": 0.3133, + "step": 32908 + }, + { + "epoch": 0.6587893801766634, + "grad_norm": 1.1107679605484009, + "learning_rate": 2.754662994029955e-06, + "loss": 0.2912, + "step": 32909 + }, + { + "epoch": 0.6588093986937917, + "grad_norm": 1.137597918510437, + "learning_rate": 2.754373341921572e-06, + "loss": 0.3516, + "step": 32910 + }, + { + "epoch": 0.6588294172109201, + "grad_norm": 1.2683645486831665, + "learning_rate": 2.75408369925332e-06, + "loss": 0.3117, + "step": 32911 + }, + { + "epoch": 0.6588494357280484, + "grad_norm": 1.125269889831543, + "learning_rate": 2.7537940660264173e-06, + "loss": 0.3024, + "step": 32912 + }, + { + "epoch": 0.6588694542451767, + "grad_norm": 1.1569287776947021, + "learning_rate": 2.7535044422420797e-06, + "loss": 0.2688, + "step": 32913 + }, + { + "epoch": 0.6588894727623051, + "grad_norm": 1.2278391122817993, + "learning_rate": 2.7532148279015254e-06, + "loss": 0.291, + "step": 32914 + }, + { + "epoch": 0.6589094912794334, + "grad_norm": 1.2762176990509033, + "learning_rate": 2.7529252230059738e-06, + "loss": 0.3148, + "step": 32915 + }, + { + "epoch": 0.6589295097965618, + "grad_norm": 1.1461904048919678, + "learning_rate": 2.752635627556641e-06, + "loss": 0.2776, + "step": 32916 + }, + { + "epoch": 0.6589495283136901, + "grad_norm": 1.1421337127685547, + "learning_rate": 2.7523460415547447e-06, + "loss": 0.2713, + "step": 32917 + }, + { + "epoch": 0.6589695468308185, + "grad_norm": 1.0818486213684082, + "learning_rate": 2.7520564650014995e-06, + "loss": 0.305, + "step": 32918 + }, + { + "epoch": 0.6589895653479468, + "grad_norm": 1.195335030555725, + "learning_rate": 2.751766897898126e-06, + "loss": 0.3196, + "step": 32919 + }, + { + "epoch": 0.6590095838650752, + "grad_norm": 1.1105656623840332, + "learning_rate": 2.7514773402458418e-06, + "loss": 0.3358, + "step": 32920 + }, + { + "epoch": 0.6590296023822035, + "grad_norm": 1.2349579334259033, + "learning_rate": 2.751187792045863e-06, + "loss": 0.2945, + "step": 32921 + }, + { + "epoch": 0.6590496208993318, + "grad_norm": 0.9965563416481018, + "learning_rate": 2.7508982532994055e-06, + "loss": 0.2984, + "step": 32922 + }, + { + "epoch": 0.6590696394164602, + "grad_norm": 1.1743488311767578, + "learning_rate": 2.750608724007689e-06, + "loss": 0.2982, + "step": 32923 + }, + { + "epoch": 0.6590896579335885, + "grad_norm": 1.9438979625701904, + "learning_rate": 2.7503192041719294e-06, + "loss": 0.758, + "step": 32924 + }, + { + "epoch": 0.6591096764507169, + "grad_norm": 1.1457841396331787, + "learning_rate": 2.750029693793344e-06, + "loss": 0.3082, + "step": 32925 + }, + { + "epoch": 0.6591296949678452, + "grad_norm": 1.1356006860733032, + "learning_rate": 2.7497401928731493e-06, + "loss": 0.2825, + "step": 32926 + }, + { + "epoch": 0.6591497134849736, + "grad_norm": 1.0572470426559448, + "learning_rate": 2.7494507014125604e-06, + "loss": 0.2919, + "step": 32927 + }, + { + "epoch": 0.6591697320021019, + "grad_norm": 1.1427241563796997, + "learning_rate": 2.7491612194127993e-06, + "loss": 0.2918, + "step": 32928 + }, + { + "epoch": 0.6591897505192302, + "grad_norm": 1.9991800785064697, + "learning_rate": 2.7488717468750793e-06, + "loss": 0.861, + "step": 32929 + }, + { + "epoch": 0.6592097690363586, + "grad_norm": 1.066907286643982, + "learning_rate": 2.748582283800618e-06, + "loss": 0.2662, + "step": 32930 + }, + { + "epoch": 0.6592297875534869, + "grad_norm": 1.0629538297653198, + "learning_rate": 2.7482928301906307e-06, + "loss": 0.295, + "step": 32931 + }, + { + "epoch": 0.6592498060706153, + "grad_norm": 1.1050236225128174, + "learning_rate": 2.7480033860463357e-06, + "loss": 0.3088, + "step": 32932 + }, + { + "epoch": 0.6592698245877436, + "grad_norm": 1.0401431322097778, + "learning_rate": 2.747713951368952e-06, + "loss": 0.3011, + "step": 32933 + }, + { + "epoch": 0.659289843104872, + "grad_norm": 1.0988123416900635, + "learning_rate": 2.7474245261596932e-06, + "loss": 0.3284, + "step": 32934 + }, + { + "epoch": 0.6593098616220003, + "grad_norm": 1.8954684734344482, + "learning_rate": 2.747135110419778e-06, + "loss": 0.7531, + "step": 32935 + }, + { + "epoch": 0.6593298801391287, + "grad_norm": 1.0730351209640503, + "learning_rate": 2.7468457041504194e-06, + "loss": 0.3326, + "step": 32936 + }, + { + "epoch": 0.659349898656257, + "grad_norm": 1.1643586158752441, + "learning_rate": 2.746556307352839e-06, + "loss": 0.2871, + "step": 32937 + }, + { + "epoch": 0.6593699171733853, + "grad_norm": 1.0129345655441284, + "learning_rate": 2.7462669200282504e-06, + "loss": 0.285, + "step": 32938 + }, + { + "epoch": 0.6593899356905137, + "grad_norm": 1.1673139333724976, + "learning_rate": 2.745977542177871e-06, + "loss": 0.3281, + "step": 32939 + }, + { + "epoch": 0.659409954207642, + "grad_norm": 1.1787312030792236, + "learning_rate": 2.7456881738029152e-06, + "loss": 0.2841, + "step": 32940 + }, + { + "epoch": 0.6594299727247704, + "grad_norm": 1.258841872215271, + "learning_rate": 2.745398814904603e-06, + "loss": 0.3024, + "step": 32941 + }, + { + "epoch": 0.6594499912418987, + "grad_norm": 1.3853743076324463, + "learning_rate": 2.745109465484148e-06, + "loss": 0.3243, + "step": 32942 + }, + { + "epoch": 0.6594700097590271, + "grad_norm": 1.1203300952911377, + "learning_rate": 2.7448201255427686e-06, + "loss": 0.3217, + "step": 32943 + }, + { + "epoch": 0.6594900282761554, + "grad_norm": 1.1932363510131836, + "learning_rate": 2.744530795081678e-06, + "loss": 0.3103, + "step": 32944 + }, + { + "epoch": 0.6595100467932837, + "grad_norm": 1.0666497945785522, + "learning_rate": 2.7442414741020944e-06, + "loss": 0.3244, + "step": 32945 + }, + { + "epoch": 0.6595300653104121, + "grad_norm": 1.0863064527511597, + "learning_rate": 2.7439521626052366e-06, + "loss": 0.3019, + "step": 32946 + }, + { + "epoch": 0.6595500838275404, + "grad_norm": 1.137440800666809, + "learning_rate": 2.7436628605923175e-06, + "loss": 0.297, + "step": 32947 + }, + { + "epoch": 0.6595701023446688, + "grad_norm": 1.1030385494232178, + "learning_rate": 2.7433735680645546e-06, + "loss": 0.2609, + "step": 32948 + }, + { + "epoch": 0.6595901208617971, + "grad_norm": 1.1049708127975464, + "learning_rate": 2.7430842850231622e-06, + "loss": 0.3392, + "step": 32949 + }, + { + "epoch": 0.6596101393789255, + "grad_norm": 1.2205020189285278, + "learning_rate": 2.742795011469359e-06, + "loss": 0.3231, + "step": 32950 + }, + { + "epoch": 0.6596301578960538, + "grad_norm": 1.2432217597961426, + "learning_rate": 2.74250574740436e-06, + "loss": 0.3044, + "step": 32951 + }, + { + "epoch": 0.6596501764131822, + "grad_norm": 1.1080204248428345, + "learning_rate": 2.7422164928293815e-06, + "loss": 0.3059, + "step": 32952 + }, + { + "epoch": 0.6596701949303105, + "grad_norm": 1.0526372194290161, + "learning_rate": 2.741927247745636e-06, + "loss": 0.2887, + "step": 32953 + }, + { + "epoch": 0.6596902134474388, + "grad_norm": 1.1331924200057983, + "learning_rate": 2.741638012154345e-06, + "loss": 0.3143, + "step": 32954 + }, + { + "epoch": 0.6597102319645672, + "grad_norm": 1.0537668466567993, + "learning_rate": 2.7413487860567225e-06, + "loss": 0.324, + "step": 32955 + }, + { + "epoch": 0.6597302504816955, + "grad_norm": 2.0451292991638184, + "learning_rate": 2.7410595694539822e-06, + "loss": 0.7894, + "step": 32956 + }, + { + "epoch": 0.6597502689988239, + "grad_norm": 1.0856244564056396, + "learning_rate": 2.74077036234734e-06, + "loss": 0.3145, + "step": 32957 + }, + { + "epoch": 0.6597702875159522, + "grad_norm": 1.043686032295227, + "learning_rate": 2.7404811647380136e-06, + "loss": 0.2977, + "step": 32958 + }, + { + "epoch": 0.6597903060330806, + "grad_norm": 1.0865378379821777, + "learning_rate": 2.7401919766272196e-06, + "loss": 0.2574, + "step": 32959 + }, + { + "epoch": 0.6598103245502089, + "grad_norm": 1.155525803565979, + "learning_rate": 2.739902798016172e-06, + "loss": 0.3233, + "step": 32960 + }, + { + "epoch": 0.6598303430673372, + "grad_norm": 1.1636321544647217, + "learning_rate": 2.739613628906087e-06, + "loss": 0.2717, + "step": 32961 + }, + { + "epoch": 0.6598503615844656, + "grad_norm": 1.1071714162826538, + "learning_rate": 2.7393244692981775e-06, + "loss": 0.3085, + "step": 32962 + }, + { + "epoch": 0.6598703801015939, + "grad_norm": 1.2625821828842163, + "learning_rate": 2.7390353191936638e-06, + "loss": 0.2717, + "step": 32963 + }, + { + "epoch": 0.6598903986187223, + "grad_norm": 1.085623860359192, + "learning_rate": 2.7387461785937584e-06, + "loss": 0.2853, + "step": 32964 + }, + { + "epoch": 0.6599104171358506, + "grad_norm": 1.0278067588806152, + "learning_rate": 2.7384570474996786e-06, + "loss": 0.2959, + "step": 32965 + }, + { + "epoch": 0.659930435652979, + "grad_norm": 1.1158453226089478, + "learning_rate": 2.7381679259126377e-06, + "loss": 0.2993, + "step": 32966 + }, + { + "epoch": 0.6599504541701073, + "grad_norm": 1.1228270530700684, + "learning_rate": 2.7378788138338508e-06, + "loss": 0.3332, + "step": 32967 + }, + { + "epoch": 0.6599704726872357, + "grad_norm": 1.0890690088272095, + "learning_rate": 2.7375897112645355e-06, + "loss": 0.3101, + "step": 32968 + }, + { + "epoch": 0.659990491204364, + "grad_norm": 1.2448426485061646, + "learning_rate": 2.737300618205907e-06, + "loss": 0.2768, + "step": 32969 + }, + { + "epoch": 0.6600105097214923, + "grad_norm": 1.9206663370132446, + "learning_rate": 2.7370115346591775e-06, + "loss": 0.8186, + "step": 32970 + }, + { + "epoch": 0.6600305282386207, + "grad_norm": 1.1245633363723755, + "learning_rate": 2.736722460625565e-06, + "loss": 0.2751, + "step": 32971 + }, + { + "epoch": 0.660050546755749, + "grad_norm": 1.1423590183258057, + "learning_rate": 2.7364333961062855e-06, + "loss": 0.2701, + "step": 32972 + }, + { + "epoch": 0.6600705652728774, + "grad_norm": 1.0714285373687744, + "learning_rate": 2.7361443411025534e-06, + "loss": 0.2869, + "step": 32973 + }, + { + "epoch": 0.6600905837900057, + "grad_norm": 1.225715160369873, + "learning_rate": 2.7358552956155825e-06, + "loss": 0.3402, + "step": 32974 + }, + { + "epoch": 0.6601106023071341, + "grad_norm": 1.1272681951522827, + "learning_rate": 2.73556625964659e-06, + "loss": 0.3063, + "step": 32975 + }, + { + "epoch": 0.6601306208242624, + "grad_norm": 1.1190670728683472, + "learning_rate": 2.735277233196787e-06, + "loss": 0.3022, + "step": 32976 + }, + { + "epoch": 0.6601506393413907, + "grad_norm": 1.1325395107269287, + "learning_rate": 2.7349882162673923e-06, + "loss": 0.3006, + "step": 32977 + }, + { + "epoch": 0.6601706578585191, + "grad_norm": 1.115036129951477, + "learning_rate": 2.7346992088596206e-06, + "loss": 0.3323, + "step": 32978 + }, + { + "epoch": 0.6601906763756474, + "grad_norm": 1.1139311790466309, + "learning_rate": 2.7344102109746863e-06, + "loss": 0.3235, + "step": 32979 + }, + { + "epoch": 0.6602106948927758, + "grad_norm": 0.9711446166038513, + "learning_rate": 2.7341212226138015e-06, + "loss": 0.2883, + "step": 32980 + }, + { + "epoch": 0.6602307134099041, + "grad_norm": 1.1624314785003662, + "learning_rate": 2.733832243778185e-06, + "loss": 0.3167, + "step": 32981 + }, + { + "epoch": 0.6602507319270325, + "grad_norm": 1.0430229902267456, + "learning_rate": 2.7335432744690502e-06, + "loss": 0.2562, + "step": 32982 + }, + { + "epoch": 0.6602707504441608, + "grad_norm": 1.2563508749008179, + "learning_rate": 2.73325431468761e-06, + "loss": 0.3166, + "step": 32983 + }, + { + "epoch": 0.6602907689612892, + "grad_norm": 1.0961785316467285, + "learning_rate": 2.732965364435081e-06, + "loss": 0.3006, + "step": 32984 + }, + { + "epoch": 0.6603107874784175, + "grad_norm": 1.0680817365646362, + "learning_rate": 2.7326764237126798e-06, + "loss": 0.2964, + "step": 32985 + }, + { + "epoch": 0.6603308059955458, + "grad_norm": 1.8271737098693848, + "learning_rate": 2.732387492521619e-06, + "loss": 0.7585, + "step": 32986 + }, + { + "epoch": 0.6603508245126742, + "grad_norm": 1.3172787427902222, + "learning_rate": 2.732098570863112e-06, + "loss": 0.2907, + "step": 32987 + }, + { + "epoch": 0.6603708430298025, + "grad_norm": 1.0726122856140137, + "learning_rate": 2.731809658738376e-06, + "loss": 0.2922, + "step": 32988 + }, + { + "epoch": 0.6603908615469309, + "grad_norm": 1.2776559591293335, + "learning_rate": 2.731520756148621e-06, + "loss": 0.2999, + "step": 32989 + }, + { + "epoch": 0.6604108800640592, + "grad_norm": 1.100472092628479, + "learning_rate": 2.7312318630950674e-06, + "loss": 0.3432, + "step": 32990 + }, + { + "epoch": 0.6604308985811876, + "grad_norm": 0.9970044493675232, + "learning_rate": 2.730942979578927e-06, + "loss": 0.2868, + "step": 32991 + }, + { + "epoch": 0.6604509170983159, + "grad_norm": 1.1074305772781372, + "learning_rate": 2.730654105601413e-06, + "loss": 0.2954, + "step": 32992 + }, + { + "epoch": 0.6604709356154442, + "grad_norm": 1.829949140548706, + "learning_rate": 2.7303652411637393e-06, + "loss": 0.7748, + "step": 32993 + }, + { + "epoch": 0.6604909541325726, + "grad_norm": 1.0973248481750488, + "learning_rate": 2.7300763862671237e-06, + "loss": 0.307, + "step": 32994 + }, + { + "epoch": 0.6605109726497009, + "grad_norm": 1.085087776184082, + "learning_rate": 2.729787540912778e-06, + "loss": 0.2587, + "step": 32995 + }, + { + "epoch": 0.6605309911668293, + "grad_norm": 1.0740697383880615, + "learning_rate": 2.729498705101916e-06, + "loss": 0.2697, + "step": 32996 + }, + { + "epoch": 0.6605510096839576, + "grad_norm": 1.0211104154586792, + "learning_rate": 2.729209878835754e-06, + "loss": 0.3176, + "step": 32997 + }, + { + "epoch": 0.660571028201086, + "grad_norm": 1.1759545803070068, + "learning_rate": 2.7289210621155033e-06, + "loss": 0.2926, + "step": 32998 + }, + { + "epoch": 0.6605910467182143, + "grad_norm": 1.0664316415786743, + "learning_rate": 2.7286322549423817e-06, + "loss": 0.3249, + "step": 32999 + }, + { + "epoch": 0.6606110652353427, + "grad_norm": 1.2067010402679443, + "learning_rate": 2.728343457317601e-06, + "loss": 0.312, + "step": 33000 + }, + { + "epoch": 0.660631083752471, + "grad_norm": 1.2738018035888672, + "learning_rate": 2.728054669242376e-06, + "loss": 0.3252, + "step": 33001 + }, + { + "epoch": 0.6606511022695993, + "grad_norm": 1.1311595439910889, + "learning_rate": 2.7277658907179177e-06, + "loss": 0.2784, + "step": 33002 + }, + { + "epoch": 0.6606711207867277, + "grad_norm": 1.243627667427063, + "learning_rate": 2.727477121745445e-06, + "loss": 0.3456, + "step": 33003 + }, + { + "epoch": 0.660691139303856, + "grad_norm": 1.211330771446228, + "learning_rate": 2.727188362326169e-06, + "loss": 0.3155, + "step": 33004 + }, + { + "epoch": 0.6607111578209844, + "grad_norm": 1.1865603923797607, + "learning_rate": 2.7268996124613047e-06, + "loss": 0.3681, + "step": 33005 + }, + { + "epoch": 0.6607311763381127, + "grad_norm": 1.1360681056976318, + "learning_rate": 2.7266108721520644e-06, + "loss": 0.3123, + "step": 33006 + }, + { + "epoch": 0.6607511948552411, + "grad_norm": 1.404831051826477, + "learning_rate": 2.7263221413996617e-06, + "loss": 0.2582, + "step": 33007 + }, + { + "epoch": 0.6607712133723694, + "grad_norm": 1.8695834875106812, + "learning_rate": 2.726033420205313e-06, + "loss": 0.7071, + "step": 33008 + }, + { + "epoch": 0.6607912318894977, + "grad_norm": 1.0690399408340454, + "learning_rate": 2.725744708570228e-06, + "loss": 0.3218, + "step": 33009 + }, + { + "epoch": 0.6608112504066261, + "grad_norm": 1.1019911766052246, + "learning_rate": 2.7254560064956255e-06, + "loss": 0.2929, + "step": 33010 + }, + { + "epoch": 0.6608312689237544, + "grad_norm": 1.0609486103057861, + "learning_rate": 2.7251673139827137e-06, + "loss": 0.2892, + "step": 33011 + }, + { + "epoch": 0.6608512874408828, + "grad_norm": 1.0395443439483643, + "learning_rate": 2.724878631032712e-06, + "loss": 0.2959, + "step": 33012 + }, + { + "epoch": 0.6608713059580111, + "grad_norm": 1.119327425956726, + "learning_rate": 2.7245899576468304e-06, + "loss": 0.3102, + "step": 33013 + }, + { + "epoch": 0.6608913244751395, + "grad_norm": 1.8010286092758179, + "learning_rate": 2.7243012938262826e-06, + "loss": 0.7688, + "step": 33014 + }, + { + "epoch": 0.6609113429922678, + "grad_norm": 1.1333019733428955, + "learning_rate": 2.724012639572281e-06, + "loss": 0.3195, + "step": 33015 + }, + { + "epoch": 0.6609313615093962, + "grad_norm": 2.0346455574035645, + "learning_rate": 2.7237239948860416e-06, + "loss": 0.785, + "step": 33016 + }, + { + "epoch": 0.6609513800265245, + "grad_norm": 1.087725043296814, + "learning_rate": 2.7234353597687778e-06, + "loss": 0.287, + "step": 33017 + }, + { + "epoch": 0.6609713985436528, + "grad_norm": 1.0602465867996216, + "learning_rate": 2.723146734221701e-06, + "loss": 0.2923, + "step": 33018 + }, + { + "epoch": 0.6609914170607812, + "grad_norm": 1.8072423934936523, + "learning_rate": 2.7228581182460255e-06, + "loss": 0.764, + "step": 33019 + }, + { + "epoch": 0.6610114355779095, + "grad_norm": 1.160345435142517, + "learning_rate": 2.722569511842963e-06, + "loss": 0.3015, + "step": 33020 + }, + { + "epoch": 0.6610314540950379, + "grad_norm": 1.860032320022583, + "learning_rate": 2.7222809150137296e-06, + "loss": 0.8095, + "step": 33021 + }, + { + "epoch": 0.6610514726121662, + "grad_norm": 1.1659901142120361, + "learning_rate": 2.721992327759535e-06, + "loss": 0.3381, + "step": 33022 + }, + { + "epoch": 0.6610714911292946, + "grad_norm": 1.21123468875885, + "learning_rate": 2.7217037500815968e-06, + "loss": 0.2902, + "step": 33023 + }, + { + "epoch": 0.6610915096464229, + "grad_norm": 1.0240650177001953, + "learning_rate": 2.721415181981123e-06, + "loss": 0.2719, + "step": 33024 + }, + { + "epoch": 0.6611115281635512, + "grad_norm": 1.1206318140029907, + "learning_rate": 2.7211266234593326e-06, + "loss": 0.3372, + "step": 33025 + }, + { + "epoch": 0.6611315466806796, + "grad_norm": 1.1091270446777344, + "learning_rate": 2.7208380745174343e-06, + "loss": 0.3062, + "step": 33026 + }, + { + "epoch": 0.6611515651978079, + "grad_norm": 1.1230641603469849, + "learning_rate": 2.720549535156643e-06, + "loss": 0.3256, + "step": 33027 + }, + { + "epoch": 0.6611715837149363, + "grad_norm": 1.1310796737670898, + "learning_rate": 2.720261005378171e-06, + "loss": 0.3264, + "step": 33028 + }, + { + "epoch": 0.6611916022320646, + "grad_norm": 1.0382754802703857, + "learning_rate": 2.7199724851832287e-06, + "loss": 0.2817, + "step": 33029 + }, + { + "epoch": 0.661211620749193, + "grad_norm": 1.112713098526001, + "learning_rate": 2.719683974573033e-06, + "loss": 0.3539, + "step": 33030 + }, + { + "epoch": 0.6612316392663213, + "grad_norm": 1.2272684574127197, + "learning_rate": 2.7193954735487958e-06, + "loss": 0.2957, + "step": 33031 + }, + { + "epoch": 0.6612516577834497, + "grad_norm": 1.1411062479019165, + "learning_rate": 2.719106982111729e-06, + "loss": 0.3174, + "step": 33032 + }, + { + "epoch": 0.661271676300578, + "grad_norm": 1.0847604274749756, + "learning_rate": 2.718818500263044e-06, + "loss": 0.2751, + "step": 33033 + }, + { + "epoch": 0.6612916948177063, + "grad_norm": 1.2825779914855957, + "learning_rate": 2.718530028003957e-06, + "loss": 0.3289, + "step": 33034 + }, + { + "epoch": 0.6613117133348347, + "grad_norm": 1.1002047061920166, + "learning_rate": 2.7182415653356766e-06, + "loss": 0.2826, + "step": 33035 + }, + { + "epoch": 0.661331731851963, + "grad_norm": 1.0609657764434814, + "learning_rate": 2.7179531122594193e-06, + "loss": 0.3284, + "step": 33036 + }, + { + "epoch": 0.6613517503690914, + "grad_norm": 1.2144532203674316, + "learning_rate": 2.7176646687763963e-06, + "loss": 0.3565, + "step": 33037 + }, + { + "epoch": 0.6613717688862197, + "grad_norm": 1.2479889392852783, + "learning_rate": 2.7173762348878176e-06, + "loss": 0.2965, + "step": 33038 + }, + { + "epoch": 0.6613917874033481, + "grad_norm": 0.9922155141830444, + "learning_rate": 2.7170878105949007e-06, + "loss": 0.2778, + "step": 33039 + }, + { + "epoch": 0.6614118059204764, + "grad_norm": 1.0936702489852905, + "learning_rate": 2.716799395898855e-06, + "loss": 0.3317, + "step": 33040 + }, + { + "epoch": 0.6614318244376047, + "grad_norm": 1.0751773118972778, + "learning_rate": 2.7165109908008925e-06, + "loss": 0.3493, + "step": 33041 + }, + { + "epoch": 0.6614518429547331, + "grad_norm": 1.147793173789978, + "learning_rate": 2.7162225953022254e-06, + "loss": 0.2648, + "step": 33042 + }, + { + "epoch": 0.6614718614718614, + "grad_norm": 0.9817382097244263, + "learning_rate": 2.7159342094040686e-06, + "loss": 0.279, + "step": 33043 + }, + { + "epoch": 0.6614918799889898, + "grad_norm": 0.922433614730835, + "learning_rate": 2.715645833107633e-06, + "loss": 0.28, + "step": 33044 + }, + { + "epoch": 0.6615118985061181, + "grad_norm": 1.0424681901931763, + "learning_rate": 2.715357466414131e-06, + "loss": 0.3002, + "step": 33045 + }, + { + "epoch": 0.6615319170232465, + "grad_norm": 2.190249443054199, + "learning_rate": 2.7150691093247716e-06, + "loss": 0.7434, + "step": 33046 + }, + { + "epoch": 0.6615519355403748, + "grad_norm": 1.084038257598877, + "learning_rate": 2.7147807618407728e-06, + "loss": 0.3003, + "step": 33047 + }, + { + "epoch": 0.6615719540575032, + "grad_norm": 1.1466015577316284, + "learning_rate": 2.7144924239633417e-06, + "loss": 0.2503, + "step": 33048 + }, + { + "epoch": 0.6615919725746315, + "grad_norm": 1.1450598239898682, + "learning_rate": 2.7142040956936945e-06, + "loss": 0.3153, + "step": 33049 + }, + { + "epoch": 0.6616119910917598, + "grad_norm": 1.0659295320510864, + "learning_rate": 2.7139157770330414e-06, + "loss": 0.3191, + "step": 33050 + }, + { + "epoch": 0.6616320096088882, + "grad_norm": 1.0674618482589722, + "learning_rate": 2.7136274679825925e-06, + "loss": 0.3176, + "step": 33051 + }, + { + "epoch": 0.6616520281260165, + "grad_norm": 1.3333826065063477, + "learning_rate": 2.713339168543564e-06, + "loss": 0.3111, + "step": 33052 + }, + { + "epoch": 0.6616720466431449, + "grad_norm": 1.045291781425476, + "learning_rate": 2.713050878717165e-06, + "loss": 0.3085, + "step": 33053 + }, + { + "epoch": 0.6616920651602732, + "grad_norm": 1.130764126777649, + "learning_rate": 2.712762598504608e-06, + "loss": 0.2975, + "step": 33054 + }, + { + "epoch": 0.6617120836774016, + "grad_norm": 1.050153136253357, + "learning_rate": 2.7124743279071033e-06, + "loss": 0.2771, + "step": 33055 + }, + { + "epoch": 0.6617321021945299, + "grad_norm": 1.1040124893188477, + "learning_rate": 2.712186066925866e-06, + "loss": 0.3003, + "step": 33056 + }, + { + "epoch": 0.6617521207116582, + "grad_norm": 1.0051504373550415, + "learning_rate": 2.7118978155621055e-06, + "loss": 0.3112, + "step": 33057 + }, + { + "epoch": 0.6617721392287866, + "grad_norm": 1.3204151391983032, + "learning_rate": 2.711609573817035e-06, + "loss": 0.3538, + "step": 33058 + }, + { + "epoch": 0.6617921577459149, + "grad_norm": 1.0958634614944458, + "learning_rate": 2.711321341691863e-06, + "loss": 0.2657, + "step": 33059 + }, + { + "epoch": 0.6618121762630433, + "grad_norm": 1.1331101655960083, + "learning_rate": 2.7110331191878058e-06, + "loss": 0.2928, + "step": 33060 + }, + { + "epoch": 0.6618321947801716, + "grad_norm": 1.8137212991714478, + "learning_rate": 2.710744906306071e-06, + "loss": 0.7392, + "step": 33061 + }, + { + "epoch": 0.6618522132973, + "grad_norm": 1.0362051725387573, + "learning_rate": 2.7104567030478733e-06, + "loss": 0.3048, + "step": 33062 + }, + { + "epoch": 0.6618722318144283, + "grad_norm": 1.1207655668258667, + "learning_rate": 2.7101685094144226e-06, + "loss": 0.3168, + "step": 33063 + }, + { + "epoch": 0.6618922503315567, + "grad_norm": 1.0212923288345337, + "learning_rate": 2.7098803254069296e-06, + "loss": 0.2844, + "step": 33064 + }, + { + "epoch": 0.661912268848685, + "grad_norm": 1.0805209875106812, + "learning_rate": 2.709592151026609e-06, + "loss": 0.3009, + "step": 33065 + }, + { + "epoch": 0.6619322873658133, + "grad_norm": 1.130867600440979, + "learning_rate": 2.7093039862746694e-06, + "loss": 0.3091, + "step": 33066 + }, + { + "epoch": 0.6619523058829417, + "grad_norm": 1.1684125661849976, + "learning_rate": 2.709015831152323e-06, + "loss": 0.3151, + "step": 33067 + }, + { + "epoch": 0.66197232440007, + "grad_norm": 1.89547598361969, + "learning_rate": 2.7087276856607792e-06, + "loss": 0.7511, + "step": 33068 + }, + { + "epoch": 0.6619923429171984, + "grad_norm": 1.0563558340072632, + "learning_rate": 2.708439549801253e-06, + "loss": 0.2879, + "step": 33069 + }, + { + "epoch": 0.6620123614343267, + "grad_norm": 1.047515869140625, + "learning_rate": 2.708151423574954e-06, + "loss": 0.2839, + "step": 33070 + }, + { + "epoch": 0.6620323799514551, + "grad_norm": 1.0826470851898193, + "learning_rate": 2.7078633069830927e-06, + "loss": 0.2952, + "step": 33071 + }, + { + "epoch": 0.6620523984685834, + "grad_norm": 1.1119412183761597, + "learning_rate": 2.7075752000268797e-06, + "loss": 0.3424, + "step": 33072 + }, + { + "epoch": 0.6620724169857117, + "grad_norm": 1.2083311080932617, + "learning_rate": 2.7072871027075286e-06, + "loss": 0.3007, + "step": 33073 + }, + { + "epoch": 0.6620924355028401, + "grad_norm": 1.1623183488845825, + "learning_rate": 2.706999015026247e-06, + "loss": 0.2662, + "step": 33074 + }, + { + "epoch": 0.6621124540199684, + "grad_norm": 1.158453106880188, + "learning_rate": 2.7067109369842503e-06, + "loss": 0.3034, + "step": 33075 + }, + { + "epoch": 0.6621324725370968, + "grad_norm": 1.1364365816116333, + "learning_rate": 2.7064228685827476e-06, + "loss": 0.3124, + "step": 33076 + }, + { + "epoch": 0.6621524910542251, + "grad_norm": 1.107028841972351, + "learning_rate": 2.706134809822947e-06, + "loss": 0.2954, + "step": 33077 + }, + { + "epoch": 0.6621725095713535, + "grad_norm": 1.0979446172714233, + "learning_rate": 2.7058467607060644e-06, + "loss": 0.291, + "step": 33078 + }, + { + "epoch": 0.6621925280884818, + "grad_norm": 1.1168444156646729, + "learning_rate": 2.7055587212333077e-06, + "loss": 0.254, + "step": 33079 + }, + { + "epoch": 0.6622125466056102, + "grad_norm": 1.0568493604660034, + "learning_rate": 2.705270691405889e-06, + "loss": 0.3169, + "step": 33080 + }, + { + "epoch": 0.6622325651227385, + "grad_norm": 1.1555986404418945, + "learning_rate": 2.7049826712250184e-06, + "loss": 0.3605, + "step": 33081 + }, + { + "epoch": 0.6622525836398668, + "grad_norm": 1.3077318668365479, + "learning_rate": 2.7046946606919046e-06, + "loss": 0.3166, + "step": 33082 + }, + { + "epoch": 0.6622726021569952, + "grad_norm": 1.0987154245376587, + "learning_rate": 2.7044066598077624e-06, + "loss": 0.3308, + "step": 33083 + }, + { + "epoch": 0.6622926206741235, + "grad_norm": 1.7799022197723389, + "learning_rate": 2.704118668573801e-06, + "loss": 0.8139, + "step": 33084 + }, + { + "epoch": 0.6623126391912519, + "grad_norm": 1.2561924457550049, + "learning_rate": 2.7038306869912284e-06, + "loss": 0.2819, + "step": 33085 + }, + { + "epoch": 0.6623326577083802, + "grad_norm": 1.9140233993530273, + "learning_rate": 2.703542715061259e-06, + "loss": 0.8244, + "step": 33086 + }, + { + "epoch": 0.6623526762255086, + "grad_norm": 1.9930616617202759, + "learning_rate": 2.7032547527851006e-06, + "loss": 0.8157, + "step": 33087 + }, + { + "epoch": 0.6623726947426369, + "grad_norm": 1.1331512928009033, + "learning_rate": 2.7029668001639666e-06, + "loss": 0.2971, + "step": 33088 + }, + { + "epoch": 0.6623927132597652, + "grad_norm": 1.0836946964263916, + "learning_rate": 2.7026788571990657e-06, + "loss": 0.2486, + "step": 33089 + }, + { + "epoch": 0.6624127317768936, + "grad_norm": 1.1555492877960205, + "learning_rate": 2.702390923891609e-06, + "loss": 0.3326, + "step": 33090 + }, + { + "epoch": 0.6624327502940219, + "grad_norm": 1.15302312374115, + "learning_rate": 2.702103000242804e-06, + "loss": 0.324, + "step": 33091 + }, + { + "epoch": 0.6624527688111503, + "grad_norm": 1.0618581771850586, + "learning_rate": 2.7018150862538655e-06, + "loss": 0.3072, + "step": 33092 + }, + { + "epoch": 0.6624727873282786, + "grad_norm": 1.2165299654006958, + "learning_rate": 2.701527181926002e-06, + "loss": 0.2977, + "step": 33093 + }, + { + "epoch": 0.662492805845407, + "grad_norm": 1.415967583656311, + "learning_rate": 2.7012392872604232e-06, + "loss": 0.3277, + "step": 33094 + }, + { + "epoch": 0.6625128243625353, + "grad_norm": 1.1343063116073608, + "learning_rate": 2.7009514022583384e-06, + "loss": 0.3132, + "step": 33095 + }, + { + "epoch": 0.6625328428796637, + "grad_norm": 2.007510185241699, + "learning_rate": 2.70066352692096e-06, + "loss": 0.7188, + "step": 33096 + }, + { + "epoch": 0.662552861396792, + "grad_norm": 1.629598617553711, + "learning_rate": 2.7003756612494988e-06, + "loss": 0.2895, + "step": 33097 + }, + { + "epoch": 0.6625728799139203, + "grad_norm": 1.207562804222107, + "learning_rate": 2.700087805245161e-06, + "loss": 0.3142, + "step": 33098 + }, + { + "epoch": 0.6625928984310487, + "grad_norm": 1.1051501035690308, + "learning_rate": 2.6997999589091607e-06, + "loss": 0.3229, + "step": 33099 + }, + { + "epoch": 0.662612916948177, + "grad_norm": 1.040781021118164, + "learning_rate": 2.699512122242704e-06, + "loss": 0.2946, + "step": 33100 + }, + { + "epoch": 0.6626329354653054, + "grad_norm": 1.0824360847473145, + "learning_rate": 2.699224295247006e-06, + "loss": 0.2763, + "step": 33101 + }, + { + "epoch": 0.6626529539824337, + "grad_norm": 1.2026519775390625, + "learning_rate": 2.698936477923273e-06, + "loss": 0.3077, + "step": 33102 + }, + { + "epoch": 0.6626729724995621, + "grad_norm": 0.9794133901596069, + "learning_rate": 2.6986486702727167e-06, + "loss": 0.2627, + "step": 33103 + }, + { + "epoch": 0.6626929910166904, + "grad_norm": 1.3130067586898804, + "learning_rate": 2.698360872296544e-06, + "loss": 0.2954, + "step": 33104 + }, + { + "epoch": 0.6627130095338187, + "grad_norm": 1.110764741897583, + "learning_rate": 2.6980730839959692e-06, + "loss": 0.2947, + "step": 33105 + }, + { + "epoch": 0.6627330280509471, + "grad_norm": 1.8516368865966797, + "learning_rate": 2.697785305372199e-06, + "loss": 0.828, + "step": 33106 + }, + { + "epoch": 0.6627530465680754, + "grad_norm": 1.0997081995010376, + "learning_rate": 2.697497536426444e-06, + "loss": 0.28, + "step": 33107 + }, + { + "epoch": 0.6627730650852038, + "grad_norm": 1.0236068964004517, + "learning_rate": 2.6972097771599126e-06, + "loss": 0.2475, + "step": 33108 + }, + { + "epoch": 0.6627930836023321, + "grad_norm": 1.0946846008300781, + "learning_rate": 2.6969220275738173e-06, + "loss": 0.2764, + "step": 33109 + }, + { + "epoch": 0.6628131021194605, + "grad_norm": 1.0876619815826416, + "learning_rate": 2.6966342876693662e-06, + "loss": 0.2924, + "step": 33110 + }, + { + "epoch": 0.6628331206365888, + "grad_norm": 1.152713656425476, + "learning_rate": 2.6963465574477663e-06, + "loss": 0.3155, + "step": 33111 + }, + { + "epoch": 0.6628531391537172, + "grad_norm": 1.0656927824020386, + "learning_rate": 2.6960588369102327e-06, + "loss": 0.3117, + "step": 33112 + }, + { + "epoch": 0.6628731576708455, + "grad_norm": 1.1229156255722046, + "learning_rate": 2.6957711260579694e-06, + "loss": 0.2895, + "step": 33113 + }, + { + "epoch": 0.6628931761879738, + "grad_norm": 1.0568499565124512, + "learning_rate": 2.6954834248921907e-06, + "loss": 0.3195, + "step": 33114 + }, + { + "epoch": 0.6629131947051022, + "grad_norm": 1.2445889711380005, + "learning_rate": 2.695195733414103e-06, + "loss": 0.3205, + "step": 33115 + }, + { + "epoch": 0.6629332132222305, + "grad_norm": 1.0847270488739014, + "learning_rate": 2.694908051624917e-06, + "loss": 0.2538, + "step": 33116 + }, + { + "epoch": 0.6629532317393589, + "grad_norm": 2.162811279296875, + "learning_rate": 2.6946203795258397e-06, + "loss": 0.7037, + "step": 33117 + }, + { + "epoch": 0.6629732502564872, + "grad_norm": 0.9983120560646057, + "learning_rate": 2.6943327171180846e-06, + "loss": 0.306, + "step": 33118 + }, + { + "epoch": 0.6629932687736156, + "grad_norm": 1.8285510540008545, + "learning_rate": 2.6940450644028577e-06, + "loss": 0.7352, + "step": 33119 + }, + { + "epoch": 0.6630132872907439, + "grad_norm": 1.069583773612976, + "learning_rate": 2.693757421381369e-06, + "loss": 0.2971, + "step": 33120 + }, + { + "epoch": 0.6630333058078722, + "grad_norm": 1.1928062438964844, + "learning_rate": 2.6934697880548282e-06, + "loss": 0.2935, + "step": 33121 + }, + { + "epoch": 0.6630533243250006, + "grad_norm": 2.0344817638397217, + "learning_rate": 2.6931821644244423e-06, + "loss": 0.7357, + "step": 33122 + }, + { + "epoch": 0.6630733428421289, + "grad_norm": 1.1379421949386597, + "learning_rate": 2.692894550491424e-06, + "loss": 0.3104, + "step": 33123 + }, + { + "epoch": 0.6630933613592573, + "grad_norm": 1.1698702573776245, + "learning_rate": 2.692606946256978e-06, + "loss": 0.3211, + "step": 33124 + }, + { + "epoch": 0.6631133798763856, + "grad_norm": 1.323715090751648, + "learning_rate": 2.692319351722318e-06, + "loss": 0.2893, + "step": 33125 + }, + { + "epoch": 0.663133398393514, + "grad_norm": 1.1771737337112427, + "learning_rate": 2.692031766888649e-06, + "loss": 0.2748, + "step": 33126 + }, + { + "epoch": 0.6631534169106423, + "grad_norm": 1.1172144412994385, + "learning_rate": 2.6917441917571842e-06, + "loss": 0.2974, + "step": 33127 + }, + { + "epoch": 0.6631734354277707, + "grad_norm": 1.074050784111023, + "learning_rate": 2.69145662632913e-06, + "loss": 0.3196, + "step": 33128 + }, + { + "epoch": 0.663193453944899, + "grad_norm": 1.0805400609970093, + "learning_rate": 2.6911690706056946e-06, + "loss": 0.3077, + "step": 33129 + }, + { + "epoch": 0.6632134724620273, + "grad_norm": 1.0394644737243652, + "learning_rate": 2.690881524588086e-06, + "loss": 0.295, + "step": 33130 + }, + { + "epoch": 0.6632334909791557, + "grad_norm": 1.282490611076355, + "learning_rate": 2.6905939882775163e-06, + "loss": 0.3363, + "step": 33131 + }, + { + "epoch": 0.663253509496284, + "grad_norm": 1.0697542428970337, + "learning_rate": 2.6903064616751928e-06, + "loss": 0.2847, + "step": 33132 + }, + { + "epoch": 0.6632735280134124, + "grad_norm": 1.0962594747543335, + "learning_rate": 2.690018944782323e-06, + "loss": 0.2961, + "step": 33133 + }, + { + "epoch": 0.6632935465305407, + "grad_norm": 0.9805858135223389, + "learning_rate": 2.689731437600118e-06, + "loss": 0.2694, + "step": 33134 + }, + { + "epoch": 0.6633135650476691, + "grad_norm": 1.0089915990829468, + "learning_rate": 2.689443940129782e-06, + "loss": 0.304, + "step": 33135 + }, + { + "epoch": 0.6633335835647974, + "grad_norm": 1.07649564743042, + "learning_rate": 2.6891564523725285e-06, + "loss": 0.2918, + "step": 33136 + }, + { + "epoch": 0.6633536020819257, + "grad_norm": 1.2726969718933105, + "learning_rate": 2.688868974329562e-06, + "loss": 0.3174, + "step": 33137 + }, + { + "epoch": 0.6633736205990541, + "grad_norm": 1.5920586585998535, + "learning_rate": 2.6885815060020947e-06, + "loss": 0.3459, + "step": 33138 + }, + { + "epoch": 0.6633936391161824, + "grad_norm": 1.1477627754211426, + "learning_rate": 2.688294047391332e-06, + "loss": 0.2827, + "step": 33139 + }, + { + "epoch": 0.6634136576333108, + "grad_norm": 1.0595767498016357, + "learning_rate": 2.6880065984984848e-06, + "loss": 0.2837, + "step": 33140 + }, + { + "epoch": 0.6634336761504391, + "grad_norm": 1.1074333190917969, + "learning_rate": 2.6877191593247607e-06, + "loss": 0.2997, + "step": 33141 + }, + { + "epoch": 0.6634536946675675, + "grad_norm": 1.0292197465896606, + "learning_rate": 2.6874317298713683e-06, + "loss": 0.2572, + "step": 33142 + }, + { + "epoch": 0.6634737131846958, + "grad_norm": 1.118585228919983, + "learning_rate": 2.6871443101395145e-06, + "loss": 0.3125, + "step": 33143 + }, + { + "epoch": 0.6634937317018242, + "grad_norm": 1.1609944105148315, + "learning_rate": 2.6868569001304062e-06, + "loss": 0.2614, + "step": 33144 + }, + { + "epoch": 0.6635137502189525, + "grad_norm": 1.0504926443099976, + "learning_rate": 2.686569499845256e-06, + "loss": 0.3046, + "step": 33145 + }, + { + "epoch": 0.6635337687360808, + "grad_norm": 1.1617077589035034, + "learning_rate": 2.68628210928527e-06, + "loss": 0.2693, + "step": 33146 + }, + { + "epoch": 0.6635537872532092, + "grad_norm": 1.1417635679244995, + "learning_rate": 2.685994728451655e-06, + "loss": 0.2931, + "step": 33147 + }, + { + "epoch": 0.6635738057703375, + "grad_norm": 1.1564573049545288, + "learning_rate": 2.6857073573456194e-06, + "loss": 0.3033, + "step": 33148 + }, + { + "epoch": 0.6635938242874659, + "grad_norm": 1.045011043548584, + "learning_rate": 2.6854199959683738e-06, + "loss": 0.2692, + "step": 33149 + }, + { + "epoch": 0.6636138428045942, + "grad_norm": 1.1176843643188477, + "learning_rate": 2.685132644321122e-06, + "loss": 0.2929, + "step": 33150 + }, + { + "epoch": 0.6636338613217226, + "grad_norm": 1.0939087867736816, + "learning_rate": 2.6848453024050765e-06, + "loss": 0.2794, + "step": 33151 + }, + { + "epoch": 0.6636538798388509, + "grad_norm": 1.0071947574615479, + "learning_rate": 2.684557970221443e-06, + "loss": 0.3088, + "step": 33152 + }, + { + "epoch": 0.6636738983559792, + "grad_norm": 1.9719287157058716, + "learning_rate": 2.6842706477714283e-06, + "loss": 0.7152, + "step": 33153 + }, + { + "epoch": 0.6636939168731076, + "grad_norm": 1.1363564729690552, + "learning_rate": 2.683983335056243e-06, + "loss": 0.3251, + "step": 33154 + }, + { + "epoch": 0.6637139353902359, + "grad_norm": 1.0955307483673096, + "learning_rate": 2.6836960320770925e-06, + "loss": 0.3083, + "step": 33155 + }, + { + "epoch": 0.6637339539073643, + "grad_norm": 1.0592042207717896, + "learning_rate": 2.6834087388351865e-06, + "loss": 0.2451, + "step": 33156 + }, + { + "epoch": 0.6637539724244926, + "grad_norm": 1.1657739877700806, + "learning_rate": 2.6831214553317292e-06, + "loss": 0.327, + "step": 33157 + }, + { + "epoch": 0.663773990941621, + "grad_norm": 1.0216853618621826, + "learning_rate": 2.682834181567933e-06, + "loss": 0.2577, + "step": 33158 + }, + { + "epoch": 0.6637940094587493, + "grad_norm": 1.1944001913070679, + "learning_rate": 2.6825469175450026e-06, + "loss": 0.2973, + "step": 33159 + }, + { + "epoch": 0.6638140279758777, + "grad_norm": 1.1700540781021118, + "learning_rate": 2.682259663264147e-06, + "loss": 0.2893, + "step": 33160 + }, + { + "epoch": 0.663834046493006, + "grad_norm": 1.0504995584487915, + "learning_rate": 2.6819724187265705e-06, + "loss": 0.2837, + "step": 33161 + }, + { + "epoch": 0.6638540650101343, + "grad_norm": 1.0967090129852295, + "learning_rate": 2.681685183933485e-06, + "loss": 0.2721, + "step": 33162 + }, + { + "epoch": 0.6638740835272627, + "grad_norm": 1.8443657159805298, + "learning_rate": 2.6813979588860946e-06, + "loss": 0.7965, + "step": 33163 + }, + { + "epoch": 0.663894102044391, + "grad_norm": 1.2549911737442017, + "learning_rate": 2.68111074358561e-06, + "loss": 0.2732, + "step": 33164 + }, + { + "epoch": 0.6639141205615194, + "grad_norm": 1.073782205581665, + "learning_rate": 2.6808235380332366e-06, + "loss": 0.2775, + "step": 33165 + }, + { + "epoch": 0.6639341390786477, + "grad_norm": 1.2661535739898682, + "learning_rate": 2.68053634223018e-06, + "loss": 0.3212, + "step": 33166 + }, + { + "epoch": 0.6639541575957761, + "grad_norm": 1.1434288024902344, + "learning_rate": 2.680249156177652e-06, + "loss": 0.2954, + "step": 33167 + }, + { + "epoch": 0.6639741761129044, + "grad_norm": 1.1672148704528809, + "learning_rate": 2.679961979876856e-06, + "loss": 0.2979, + "step": 33168 + }, + { + "epoch": 0.6639941946300327, + "grad_norm": 1.0993424654006958, + "learning_rate": 2.679674813329002e-06, + "loss": 0.2991, + "step": 33169 + }, + { + "epoch": 0.6640142131471611, + "grad_norm": 1.877926230430603, + "learning_rate": 2.6793876565352933e-06, + "loss": 0.7498, + "step": 33170 + }, + { + "epoch": 0.6640342316642894, + "grad_norm": 1.0892606973648071, + "learning_rate": 2.6791005094969415e-06, + "loss": 0.2795, + "step": 33171 + }, + { + "epoch": 0.6640542501814178, + "grad_norm": 1.2001410722732544, + "learning_rate": 2.678813372215151e-06, + "loss": 0.3076, + "step": 33172 + }, + { + "epoch": 0.6640742686985461, + "grad_norm": 1.0375876426696777, + "learning_rate": 2.6785262446911308e-06, + "loss": 0.3348, + "step": 33173 + }, + { + "epoch": 0.6640942872156745, + "grad_norm": 1.1521645784378052, + "learning_rate": 2.678239126926084e-06, + "loss": 0.2657, + "step": 33174 + }, + { + "epoch": 0.6641143057328028, + "grad_norm": 1.0907024145126343, + "learning_rate": 2.6779520189212214e-06, + "loss": 0.2394, + "step": 33175 + }, + { + "epoch": 0.6641343242499312, + "grad_norm": 1.252252459526062, + "learning_rate": 2.6776649206777483e-06, + "loss": 0.2906, + "step": 33176 + }, + { + "epoch": 0.6641543427670595, + "grad_norm": 1.7766444683074951, + "learning_rate": 2.6773778321968734e-06, + "loss": 0.7453, + "step": 33177 + }, + { + "epoch": 0.6641743612841878, + "grad_norm": 1.0947009325027466, + "learning_rate": 2.6770907534798022e-06, + "loss": 0.2815, + "step": 33178 + }, + { + "epoch": 0.6641943798013162, + "grad_norm": 1.1403707265853882, + "learning_rate": 2.6768036845277402e-06, + "loss": 0.3066, + "step": 33179 + }, + { + "epoch": 0.6642143983184445, + "grad_norm": 1.2066196203231812, + "learning_rate": 2.6765166253418966e-06, + "loss": 0.3043, + "step": 33180 + }, + { + "epoch": 0.6642344168355729, + "grad_norm": 1.9031715393066406, + "learning_rate": 2.676229575923478e-06, + "loss": 0.7835, + "step": 33181 + }, + { + "epoch": 0.6642544353527012, + "grad_norm": 1.1094774007797241, + "learning_rate": 2.6759425362736897e-06, + "loss": 0.3132, + "step": 33182 + }, + { + "epoch": 0.6642744538698296, + "grad_norm": 1.1197618246078491, + "learning_rate": 2.6756555063937384e-06, + "loss": 0.325, + "step": 33183 + }, + { + "epoch": 0.6642944723869579, + "grad_norm": 1.1790685653686523, + "learning_rate": 2.67536848628483e-06, + "loss": 0.2894, + "step": 33184 + }, + { + "epoch": 0.6643144909040862, + "grad_norm": 1.1322507858276367, + "learning_rate": 2.6750814759481736e-06, + "loss": 0.3349, + "step": 33185 + }, + { + "epoch": 0.6643345094212146, + "grad_norm": 1.115889072418213, + "learning_rate": 2.6747944753849745e-06, + "loss": 0.2921, + "step": 33186 + }, + { + "epoch": 0.6643545279383429, + "grad_norm": 1.1916956901550293, + "learning_rate": 2.6745074845964374e-06, + "loss": 0.3131, + "step": 33187 + }, + { + "epoch": 0.6643745464554713, + "grad_norm": 1.0253759622573853, + "learning_rate": 2.6742205035837722e-06, + "loss": 0.3131, + "step": 33188 + }, + { + "epoch": 0.6643945649725996, + "grad_norm": 1.1428813934326172, + "learning_rate": 2.6739335323481814e-06, + "loss": 0.3045, + "step": 33189 + }, + { + "epoch": 0.664414583489728, + "grad_norm": 1.0812574625015259, + "learning_rate": 2.6736465708908752e-06, + "loss": 0.2715, + "step": 33190 + }, + { + "epoch": 0.6644346020068563, + "grad_norm": 1.0958755016326904, + "learning_rate": 2.6733596192130583e-06, + "loss": 0.2775, + "step": 33191 + }, + { + "epoch": 0.6644546205239847, + "grad_norm": 1.8227031230926514, + "learning_rate": 2.6730726773159354e-06, + "loss": 0.798, + "step": 33192 + }, + { + "epoch": 0.664474639041113, + "grad_norm": 2.0140397548675537, + "learning_rate": 2.6727857452007154e-06, + "loss": 0.7243, + "step": 33193 + }, + { + "epoch": 0.6644946575582413, + "grad_norm": 2.010999917984009, + "learning_rate": 2.672498822868604e-06, + "loss": 0.7932, + "step": 33194 + }, + { + "epoch": 0.6645146760753697, + "grad_norm": 1.125541090965271, + "learning_rate": 2.6722119103208068e-06, + "loss": 0.2975, + "step": 33195 + }, + { + "epoch": 0.664534694592498, + "grad_norm": 1.8081711530685425, + "learning_rate": 2.671925007558529e-06, + "loss": 0.7563, + "step": 33196 + }, + { + "epoch": 0.6645547131096264, + "grad_norm": 2.020090341567993, + "learning_rate": 2.6716381145829755e-06, + "loss": 0.7816, + "step": 33197 + }, + { + "epoch": 0.6645747316267547, + "grad_norm": 1.4691206216812134, + "learning_rate": 2.671351231395357e-06, + "loss": 0.326, + "step": 33198 + }, + { + "epoch": 0.6645947501438831, + "grad_norm": 1.112399697303772, + "learning_rate": 2.6710643579968764e-06, + "loss": 0.3046, + "step": 33199 + }, + { + "epoch": 0.6646147686610114, + "grad_norm": 1.1805174350738525, + "learning_rate": 2.6707774943887375e-06, + "loss": 0.3182, + "step": 33200 + }, + { + "epoch": 0.6646347871781397, + "grad_norm": 1.0352245569229126, + "learning_rate": 2.670490640572152e-06, + "loss": 0.2709, + "step": 33201 + }, + { + "epoch": 0.6646548056952681, + "grad_norm": 1.0308414697647095, + "learning_rate": 2.6702037965483197e-06, + "loss": 0.2868, + "step": 33202 + }, + { + "epoch": 0.6646748242123964, + "grad_norm": 1.142435073852539, + "learning_rate": 2.6699169623184513e-06, + "loss": 0.2651, + "step": 33203 + }, + { + "epoch": 0.6646948427295248, + "grad_norm": 2.050856828689575, + "learning_rate": 2.6696301378837507e-06, + "loss": 0.7659, + "step": 33204 + }, + { + "epoch": 0.6647148612466531, + "grad_norm": 1.1121872663497925, + "learning_rate": 2.6693433232454237e-06, + "loss": 0.3235, + "step": 33205 + }, + { + "epoch": 0.6647348797637815, + "grad_norm": 1.161987066268921, + "learning_rate": 2.6690565184046734e-06, + "loss": 0.3237, + "step": 33206 + }, + { + "epoch": 0.6647548982809098, + "grad_norm": 1.1951712369918823, + "learning_rate": 2.66876972336271e-06, + "loss": 0.3177, + "step": 33207 + }, + { + "epoch": 0.6647749167980382, + "grad_norm": 1.0624899864196777, + "learning_rate": 2.6684829381207366e-06, + "loss": 0.3048, + "step": 33208 + }, + { + "epoch": 0.6647949353151665, + "grad_norm": 1.159011721611023, + "learning_rate": 2.6681961626799603e-06, + "loss": 0.3139, + "step": 33209 + }, + { + "epoch": 0.6648149538322948, + "grad_norm": 1.1559361219406128, + "learning_rate": 2.6679093970415825e-06, + "loss": 0.3085, + "step": 33210 + }, + { + "epoch": 0.6648349723494232, + "grad_norm": 1.045154333114624, + "learning_rate": 2.667622641206814e-06, + "loss": 0.3128, + "step": 33211 + }, + { + "epoch": 0.6648549908665515, + "grad_norm": 1.0374499559402466, + "learning_rate": 2.667335895176858e-06, + "loss": 0.2821, + "step": 33212 + }, + { + "epoch": 0.6648750093836799, + "grad_norm": 1.1280643939971924, + "learning_rate": 2.667049158952918e-06, + "loss": 0.2885, + "step": 33213 + }, + { + "epoch": 0.6648950279008082, + "grad_norm": 1.1523215770721436, + "learning_rate": 2.666762432536203e-06, + "loss": 0.3216, + "step": 33214 + }, + { + "epoch": 0.6649150464179366, + "grad_norm": 1.2064197063446045, + "learning_rate": 2.666475715927915e-06, + "loss": 0.2686, + "step": 33215 + }, + { + "epoch": 0.6649350649350649, + "grad_norm": 0.9980133175849915, + "learning_rate": 2.666189009129263e-06, + "loss": 0.2305, + "step": 33216 + }, + { + "epoch": 0.6649550834521932, + "grad_norm": 1.2085988521575928, + "learning_rate": 2.6659023121414495e-06, + "loss": 0.2999, + "step": 33217 + }, + { + "epoch": 0.6649751019693216, + "grad_norm": 1.10770583152771, + "learning_rate": 2.6656156249656805e-06, + "loss": 0.3427, + "step": 33218 + }, + { + "epoch": 0.6649951204864499, + "grad_norm": 1.0816516876220703, + "learning_rate": 2.6653289476031597e-06, + "loss": 0.2773, + "step": 33219 + }, + { + "epoch": 0.6650151390035783, + "grad_norm": 1.0778791904449463, + "learning_rate": 2.665042280055095e-06, + "loss": 0.3223, + "step": 33220 + }, + { + "epoch": 0.6650351575207066, + "grad_norm": 2.021315336227417, + "learning_rate": 2.6647556223226904e-06, + "loss": 0.7377, + "step": 33221 + }, + { + "epoch": 0.665055176037835, + "grad_norm": 1.2642713785171509, + "learning_rate": 2.6644689744071505e-06, + "loss": 0.2856, + "step": 33222 + }, + { + "epoch": 0.6650751945549633, + "grad_norm": 1.161838412284851, + "learning_rate": 2.6641823363096788e-06, + "loss": 0.2898, + "step": 33223 + }, + { + "epoch": 0.6650952130720917, + "grad_norm": 1.0763596296310425, + "learning_rate": 2.663895708031483e-06, + "loss": 0.281, + "step": 33224 + }, + { + "epoch": 0.66511523158922, + "grad_norm": 1.0967094898223877, + "learning_rate": 2.6636090895737675e-06, + "loss": 0.2477, + "step": 33225 + }, + { + "epoch": 0.6651352501063483, + "grad_norm": 1.060287356376648, + "learning_rate": 2.6633224809377343e-06, + "loss": 0.2842, + "step": 33226 + }, + { + "epoch": 0.6651552686234767, + "grad_norm": 1.2035728693008423, + "learning_rate": 2.6630358821245927e-06, + "loss": 0.3082, + "step": 33227 + }, + { + "epoch": 0.665175287140605, + "grad_norm": 1.0886973142623901, + "learning_rate": 2.662749293135544e-06, + "loss": 0.2745, + "step": 33228 + }, + { + "epoch": 0.6651953056577334, + "grad_norm": 2.065993070602417, + "learning_rate": 2.662462713971795e-06, + "loss": 0.7396, + "step": 33229 + }, + { + "epoch": 0.6652153241748617, + "grad_norm": 1.1776050329208374, + "learning_rate": 2.6621761446345505e-06, + "loss": 0.3345, + "step": 33230 + }, + { + "epoch": 0.6652353426919901, + "grad_norm": 1.2722129821777344, + "learning_rate": 2.661889585125014e-06, + "loss": 0.2984, + "step": 33231 + }, + { + "epoch": 0.6652553612091184, + "grad_norm": 1.194358229637146, + "learning_rate": 2.661603035444389e-06, + "loss": 0.2863, + "step": 33232 + }, + { + "epoch": 0.6652753797262467, + "grad_norm": 1.2602472305297852, + "learning_rate": 2.6613164955938837e-06, + "loss": 0.2898, + "step": 33233 + }, + { + "epoch": 0.6652953982433751, + "grad_norm": 1.1924598217010498, + "learning_rate": 2.6610299655746998e-06, + "loss": 0.2911, + "step": 33234 + }, + { + "epoch": 0.6653154167605034, + "grad_norm": 1.256566047668457, + "learning_rate": 2.6607434453880433e-06, + "loss": 0.3236, + "step": 33235 + }, + { + "epoch": 0.6653354352776318, + "grad_norm": 1.2429921627044678, + "learning_rate": 2.660456935035117e-06, + "loss": 0.3317, + "step": 33236 + }, + { + "epoch": 0.6653554537947601, + "grad_norm": 1.171359658241272, + "learning_rate": 2.660170434517125e-06, + "loss": 0.2795, + "step": 33237 + }, + { + "epoch": 0.6653754723118885, + "grad_norm": 1.0438109636306763, + "learning_rate": 2.6598839438352753e-06, + "loss": 0.3427, + "step": 33238 + }, + { + "epoch": 0.6653954908290168, + "grad_norm": 1.0788739919662476, + "learning_rate": 2.6595974629907675e-06, + "loss": 0.2621, + "step": 33239 + }, + { + "epoch": 0.6654155093461452, + "grad_norm": 1.187423586845398, + "learning_rate": 2.6593109919848103e-06, + "loss": 0.27, + "step": 33240 + }, + { + "epoch": 0.6654355278632735, + "grad_norm": 1.123993158340454, + "learning_rate": 2.6590245308186037e-06, + "loss": 0.3047, + "step": 33241 + }, + { + "epoch": 0.6654555463804018, + "grad_norm": 1.0894635915756226, + "learning_rate": 2.658738079493357e-06, + "loss": 0.3481, + "step": 33242 + }, + { + "epoch": 0.6654755648975302, + "grad_norm": 1.1486257314682007, + "learning_rate": 2.658451638010271e-06, + "loss": 0.344, + "step": 33243 + }, + { + "epoch": 0.6654955834146585, + "grad_norm": 1.9433085918426514, + "learning_rate": 2.65816520637055e-06, + "loss": 0.8464, + "step": 33244 + }, + { + "epoch": 0.6655156019317869, + "grad_norm": 1.1166037321090698, + "learning_rate": 2.6578787845753972e-06, + "loss": 0.2914, + "step": 33245 + }, + { + "epoch": 0.6655356204489152, + "grad_norm": 1.0163369178771973, + "learning_rate": 2.6575923726260192e-06, + "loss": 0.2881, + "step": 33246 + }, + { + "epoch": 0.6655556389660436, + "grad_norm": 1.1325721740722656, + "learning_rate": 2.6573059705236195e-06, + "loss": 0.3254, + "step": 33247 + }, + { + "epoch": 0.6655756574831719, + "grad_norm": 1.1853095293045044, + "learning_rate": 2.6570195782694007e-06, + "loss": 0.3089, + "step": 33248 + }, + { + "epoch": 0.6655956760003002, + "grad_norm": 1.3104748725891113, + "learning_rate": 2.656733195864568e-06, + "loss": 0.2856, + "step": 33249 + }, + { + "epoch": 0.6656156945174286, + "grad_norm": 1.16049063205719, + "learning_rate": 2.6564468233103226e-06, + "loss": 0.26, + "step": 33250 + }, + { + "epoch": 0.6656357130345569, + "grad_norm": 1.1408685445785522, + "learning_rate": 2.656160460607872e-06, + "loss": 0.2862, + "step": 33251 + }, + { + "epoch": 0.6656557315516853, + "grad_norm": 2.0042688846588135, + "learning_rate": 2.6558741077584173e-06, + "loss": 0.7226, + "step": 33252 + }, + { + "epoch": 0.6656757500688136, + "grad_norm": 2.0014634132385254, + "learning_rate": 2.6555877647631645e-06, + "loss": 0.7145, + "step": 33253 + }, + { + "epoch": 0.665695768585942, + "grad_norm": 1.0499824285507202, + "learning_rate": 2.6553014316233146e-06, + "loss": 0.2776, + "step": 33254 + }, + { + "epoch": 0.6657157871030703, + "grad_norm": 1.7823997735977173, + "learning_rate": 2.6550151083400746e-06, + "loss": 0.806, + "step": 33255 + }, + { + "epoch": 0.6657358056201986, + "grad_norm": 1.1514477729797363, + "learning_rate": 2.654728794914646e-06, + "loss": 0.2835, + "step": 33256 + }, + { + "epoch": 0.665755824137327, + "grad_norm": 1.2504496574401855, + "learning_rate": 2.6544424913482335e-06, + "loss": 0.3163, + "step": 33257 + }, + { + "epoch": 0.6657758426544553, + "grad_norm": 1.299501657485962, + "learning_rate": 2.6541561976420406e-06, + "loss": 0.3099, + "step": 33258 + }, + { + "epoch": 0.6657958611715837, + "grad_norm": 1.3690978288650513, + "learning_rate": 2.6538699137972674e-06, + "loss": 0.3377, + "step": 33259 + }, + { + "epoch": 0.665815879688712, + "grad_norm": 1.1236463785171509, + "learning_rate": 2.6535836398151226e-06, + "loss": 0.3113, + "step": 33260 + }, + { + "epoch": 0.6658358982058404, + "grad_norm": 1.1696901321411133, + "learning_rate": 2.6532973756968065e-06, + "loss": 0.2937, + "step": 33261 + }, + { + "epoch": 0.6658559167229687, + "grad_norm": 1.1930876970291138, + "learning_rate": 2.653011121443524e-06, + "loss": 0.3165, + "step": 33262 + }, + { + "epoch": 0.6658759352400971, + "grad_norm": 1.160688877105713, + "learning_rate": 2.652724877056475e-06, + "loss": 0.2741, + "step": 33263 + }, + { + "epoch": 0.6658959537572254, + "grad_norm": 1.1127800941467285, + "learning_rate": 2.652438642536868e-06, + "loss": 0.3516, + "step": 33264 + }, + { + "epoch": 0.6659159722743537, + "grad_norm": 2.0723257064819336, + "learning_rate": 2.6521524178859015e-06, + "loss": 0.7843, + "step": 33265 + }, + { + "epoch": 0.6659359907914821, + "grad_norm": 1.1025418043136597, + "learning_rate": 2.6518662031047834e-06, + "loss": 0.2587, + "step": 33266 + }, + { + "epoch": 0.6659560093086104, + "grad_norm": 1.2286016941070557, + "learning_rate": 2.6515799981947136e-06, + "loss": 0.319, + "step": 33267 + }, + { + "epoch": 0.6659760278257388, + "grad_norm": 1.1803874969482422, + "learning_rate": 2.6512938031568946e-06, + "loss": 0.2994, + "step": 33268 + }, + { + "epoch": 0.6659960463428671, + "grad_norm": 1.150140643119812, + "learning_rate": 2.651007617992534e-06, + "loss": 0.3376, + "step": 33269 + }, + { + "epoch": 0.6660160648599955, + "grad_norm": 1.1117151975631714, + "learning_rate": 2.6507214427028306e-06, + "loss": 0.2743, + "step": 33270 + }, + { + "epoch": 0.6660360833771238, + "grad_norm": 2.094453811645508, + "learning_rate": 2.6504352772889886e-06, + "loss": 0.7919, + "step": 33271 + }, + { + "epoch": 0.6660561018942521, + "grad_norm": 1.0814365148544312, + "learning_rate": 2.650149121752209e-06, + "loss": 0.2914, + "step": 33272 + }, + { + "epoch": 0.6660761204113805, + "grad_norm": 0.9676142930984497, + "learning_rate": 2.6498629760937e-06, + "loss": 0.293, + "step": 33273 + }, + { + "epoch": 0.6660961389285088, + "grad_norm": 1.0370001792907715, + "learning_rate": 2.6495768403146603e-06, + "loss": 0.2921, + "step": 33274 + }, + { + "epoch": 0.6661161574456372, + "grad_norm": 1.1098791360855103, + "learning_rate": 2.649290714416294e-06, + "loss": 0.3531, + "step": 33275 + }, + { + "epoch": 0.6661361759627655, + "grad_norm": 1.1363284587860107, + "learning_rate": 2.6490045983998016e-06, + "loss": 0.3233, + "step": 33276 + }, + { + "epoch": 0.6661561944798939, + "grad_norm": 1.1471173763275146, + "learning_rate": 2.64871849226639e-06, + "loss": 0.3429, + "step": 33277 + }, + { + "epoch": 0.6661762129970222, + "grad_norm": 2.030620813369751, + "learning_rate": 2.6484323960172576e-06, + "loss": 0.8294, + "step": 33278 + }, + { + "epoch": 0.6661962315141506, + "grad_norm": 1.1446404457092285, + "learning_rate": 2.648146309653612e-06, + "loss": 0.3236, + "step": 33279 + }, + { + "epoch": 0.6662162500312789, + "grad_norm": 1.0768847465515137, + "learning_rate": 2.6478602331766523e-06, + "loss": 0.2607, + "step": 33280 + }, + { + "epoch": 0.6662362685484072, + "grad_norm": 1.1504466533660889, + "learning_rate": 2.6475741665875804e-06, + "loss": 0.2933, + "step": 33281 + }, + { + "epoch": 0.6662562870655356, + "grad_norm": 1.1498757600784302, + "learning_rate": 2.6472881098876024e-06, + "loss": 0.2797, + "step": 33282 + }, + { + "epoch": 0.6662763055826639, + "grad_norm": 1.1458193063735962, + "learning_rate": 2.647002063077919e-06, + "loss": 0.2789, + "step": 33283 + }, + { + "epoch": 0.6662963240997923, + "grad_norm": 1.0547486543655396, + "learning_rate": 2.6467160261597325e-06, + "loss": 0.2925, + "step": 33284 + }, + { + "epoch": 0.6663163426169206, + "grad_norm": 1.853528618812561, + "learning_rate": 2.6464299991342436e-06, + "loss": 0.7515, + "step": 33285 + }, + { + "epoch": 0.666336361134049, + "grad_norm": 1.161110281944275, + "learning_rate": 2.646143982002658e-06, + "loss": 0.2993, + "step": 33286 + }, + { + "epoch": 0.6663563796511773, + "grad_norm": 1.7637025117874146, + "learning_rate": 2.645857974766177e-06, + "loss": 0.7156, + "step": 33287 + }, + { + "epoch": 0.6663763981683056, + "grad_norm": 1.7166301012039185, + "learning_rate": 2.6455719774260025e-06, + "loss": 0.7596, + "step": 33288 + }, + { + "epoch": 0.666396416685434, + "grad_norm": 1.1776347160339355, + "learning_rate": 2.6452859899833364e-06, + "loss": 0.3131, + "step": 33289 + }, + { + "epoch": 0.6664164352025623, + "grad_norm": 1.0323079824447632, + "learning_rate": 2.6450000124393794e-06, + "loss": 0.2968, + "step": 33290 + }, + { + "epoch": 0.6664364537196907, + "grad_norm": 1.0140305757522583, + "learning_rate": 2.6447140447953355e-06, + "loss": 0.3199, + "step": 33291 + }, + { + "epoch": 0.666456472236819, + "grad_norm": 1.0637211799621582, + "learning_rate": 2.6444280870524093e-06, + "loss": 0.2751, + "step": 33292 + }, + { + "epoch": 0.6664764907539474, + "grad_norm": 1.0934127569198608, + "learning_rate": 2.6441421392118e-06, + "loss": 0.3205, + "step": 33293 + }, + { + "epoch": 0.6664965092710757, + "grad_norm": 1.1330879926681519, + "learning_rate": 2.643856201274708e-06, + "loss": 0.2705, + "step": 33294 + }, + { + "epoch": 0.6665165277882041, + "grad_norm": 1.1695077419281006, + "learning_rate": 2.64357027324234e-06, + "loss": 0.2628, + "step": 33295 + }, + { + "epoch": 0.6665365463053324, + "grad_norm": 1.2409197092056274, + "learning_rate": 2.6432843551158947e-06, + "loss": 0.2716, + "step": 33296 + }, + { + "epoch": 0.6665565648224607, + "grad_norm": 1.0366450548171997, + "learning_rate": 2.642998446896576e-06, + "loss": 0.2852, + "step": 33297 + }, + { + "epoch": 0.6665765833395891, + "grad_norm": 1.0885663032531738, + "learning_rate": 2.6427125485855843e-06, + "loss": 0.3504, + "step": 33298 + }, + { + "epoch": 0.6665966018567174, + "grad_norm": 1.1227360963821411, + "learning_rate": 2.6424266601841187e-06, + "loss": 0.2761, + "step": 33299 + }, + { + "epoch": 0.6666166203738458, + "grad_norm": 1.2531776428222656, + "learning_rate": 2.6421407816933868e-06, + "loss": 0.331, + "step": 33300 + }, + { + "epoch": 0.6666366388909741, + "grad_norm": 1.0551860332489014, + "learning_rate": 2.6418549131145878e-06, + "loss": 0.3014, + "step": 33301 + }, + { + "epoch": 0.6666566574081025, + "grad_norm": 1.0994677543640137, + "learning_rate": 2.6415690544489232e-06, + "loss": 0.2986, + "step": 33302 + }, + { + "epoch": 0.6666766759252308, + "grad_norm": 1.1606218814849854, + "learning_rate": 2.641283205697592e-06, + "loss": 0.3237, + "step": 33303 + }, + { + "epoch": 0.6666966944423591, + "grad_norm": 1.0755505561828613, + "learning_rate": 2.6409973668617995e-06, + "loss": 0.2758, + "step": 33304 + }, + { + "epoch": 0.6667167129594875, + "grad_norm": 1.0932434797286987, + "learning_rate": 2.6407115379427483e-06, + "loss": 0.3072, + "step": 33305 + }, + { + "epoch": 0.6667367314766158, + "grad_norm": 1.1999938488006592, + "learning_rate": 2.640425718941638e-06, + "loss": 0.2846, + "step": 33306 + }, + { + "epoch": 0.6667567499937442, + "grad_norm": 1.088685154914856, + "learning_rate": 2.640139909859668e-06, + "loss": 0.2955, + "step": 33307 + }, + { + "epoch": 0.6667767685108725, + "grad_norm": 1.084131121635437, + "learning_rate": 2.639854110698044e-06, + "loss": 0.3116, + "step": 33308 + }, + { + "epoch": 0.6667967870280009, + "grad_norm": 1.0563783645629883, + "learning_rate": 2.639568321457966e-06, + "loss": 0.2602, + "step": 33309 + }, + { + "epoch": 0.6668168055451292, + "grad_norm": 1.0450444221496582, + "learning_rate": 2.6392825421406336e-06, + "loss": 0.2705, + "step": 33310 + }, + { + "epoch": 0.6668368240622576, + "grad_norm": 1.0600571632385254, + "learning_rate": 2.6389967727472497e-06, + "loss": 0.2481, + "step": 33311 + }, + { + "epoch": 0.6668568425793859, + "grad_norm": 1.977029800415039, + "learning_rate": 2.6387110132790138e-06, + "loss": 0.7533, + "step": 33312 + }, + { + "epoch": 0.6668768610965142, + "grad_norm": 1.304509162902832, + "learning_rate": 2.638425263737131e-06, + "loss": 0.3309, + "step": 33313 + }, + { + "epoch": 0.6668968796136426, + "grad_norm": 1.1171563863754272, + "learning_rate": 2.638139524122799e-06, + "loss": 0.3109, + "step": 33314 + }, + { + "epoch": 0.6669168981307709, + "grad_norm": 1.1188695430755615, + "learning_rate": 2.637853794437221e-06, + "loss": 0.3475, + "step": 33315 + }, + { + "epoch": 0.6669369166478993, + "grad_norm": 1.189538836479187, + "learning_rate": 2.6375680746815955e-06, + "loss": 0.257, + "step": 33316 + }, + { + "epoch": 0.6669569351650276, + "grad_norm": 0.9810280799865723, + "learning_rate": 2.637282364857125e-06, + "loss": 0.2552, + "step": 33317 + }, + { + "epoch": 0.666976953682156, + "grad_norm": 1.863722562789917, + "learning_rate": 2.6369966649650124e-06, + "loss": 0.7764, + "step": 33318 + }, + { + "epoch": 0.6669969721992843, + "grad_norm": 1.7988651990890503, + "learning_rate": 2.636710975006458e-06, + "loss": 0.75, + "step": 33319 + }, + { + "epoch": 0.6670169907164126, + "grad_norm": 1.104538083076477, + "learning_rate": 2.636425294982662e-06, + "loss": 0.2823, + "step": 33320 + }, + { + "epoch": 0.667037009233541, + "grad_norm": 1.148449420928955, + "learning_rate": 2.6361396248948236e-06, + "loss": 0.3695, + "step": 33321 + }, + { + "epoch": 0.6670570277506693, + "grad_norm": 1.0511269569396973, + "learning_rate": 2.635853964744147e-06, + "loss": 0.3234, + "step": 33322 + }, + { + "epoch": 0.6670770462677977, + "grad_norm": 1.0904831886291504, + "learning_rate": 2.6355683145318324e-06, + "loss": 0.2764, + "step": 33323 + }, + { + "epoch": 0.667097064784926, + "grad_norm": 1.1468729972839355, + "learning_rate": 2.6352826742590786e-06, + "loss": 0.3647, + "step": 33324 + }, + { + "epoch": 0.6671170833020544, + "grad_norm": 1.8784713745117188, + "learning_rate": 2.6349970439270866e-06, + "loss": 0.7061, + "step": 33325 + }, + { + "epoch": 0.6671371018191827, + "grad_norm": 1.0893406867980957, + "learning_rate": 2.6347114235370595e-06, + "loss": 0.283, + "step": 33326 + }, + { + "epoch": 0.6671571203363111, + "grad_norm": 1.114864468574524, + "learning_rate": 2.634425813090197e-06, + "loss": 0.2765, + "step": 33327 + }, + { + "epoch": 0.6671771388534394, + "grad_norm": 1.032137393951416, + "learning_rate": 2.6341402125876986e-06, + "loss": 0.3028, + "step": 33328 + }, + { + "epoch": 0.6671971573705677, + "grad_norm": 1.056666374206543, + "learning_rate": 2.633854622030764e-06, + "loss": 0.323, + "step": 33329 + }, + { + "epoch": 0.6672171758876961, + "grad_norm": 1.1550898551940918, + "learning_rate": 2.6335690414205954e-06, + "loss": 0.278, + "step": 33330 + }, + { + "epoch": 0.6672371944048244, + "grad_norm": 1.205747365951538, + "learning_rate": 2.6332834707583954e-06, + "loss": 0.2835, + "step": 33331 + }, + { + "epoch": 0.6672572129219528, + "grad_norm": 1.1891131401062012, + "learning_rate": 2.6329979100453623e-06, + "loss": 0.3443, + "step": 33332 + }, + { + "epoch": 0.6672772314390811, + "grad_norm": 1.1812931299209595, + "learning_rate": 2.6327123592826962e-06, + "loss": 0.3219, + "step": 33333 + }, + { + "epoch": 0.6672972499562095, + "grad_norm": 1.2438573837280273, + "learning_rate": 2.6324268184715963e-06, + "loss": 0.281, + "step": 33334 + }, + { + "epoch": 0.6673172684733378, + "grad_norm": 1.1000633239746094, + "learning_rate": 2.632141287613267e-06, + "loss": 0.3389, + "step": 33335 + }, + { + "epoch": 0.6673372869904661, + "grad_norm": 1.074704647064209, + "learning_rate": 2.631855766708905e-06, + "loss": 0.2891, + "step": 33336 + }, + { + "epoch": 0.6673573055075945, + "grad_norm": 1.235518455505371, + "learning_rate": 2.6315702557597125e-06, + "loss": 0.3292, + "step": 33337 + }, + { + "epoch": 0.6673773240247228, + "grad_norm": 1.0882624387741089, + "learning_rate": 2.631284754766887e-06, + "loss": 0.2916, + "step": 33338 + }, + { + "epoch": 0.6673973425418512, + "grad_norm": 1.1610896587371826, + "learning_rate": 2.630999263731632e-06, + "loss": 0.3327, + "step": 33339 + }, + { + "epoch": 0.6674173610589795, + "grad_norm": 1.1044518947601318, + "learning_rate": 2.6307137826551473e-06, + "loss": 0.3249, + "step": 33340 + }, + { + "epoch": 0.6674373795761079, + "grad_norm": 1.057494878768921, + "learning_rate": 2.630428311538631e-06, + "loss": 0.2582, + "step": 33341 + }, + { + "epoch": 0.6674573980932362, + "grad_norm": 1.1112703084945679, + "learning_rate": 2.6301428503832827e-06, + "loss": 0.3571, + "step": 33342 + }, + { + "epoch": 0.6674774166103646, + "grad_norm": 1.1747865676879883, + "learning_rate": 2.6298573991903043e-06, + "loss": 0.2911, + "step": 33343 + }, + { + "epoch": 0.6674974351274929, + "grad_norm": 1.074617862701416, + "learning_rate": 2.629571957960896e-06, + "loss": 0.3248, + "step": 33344 + }, + { + "epoch": 0.6675174536446212, + "grad_norm": 1.098731279373169, + "learning_rate": 2.629286526696258e-06, + "loss": 0.306, + "step": 33345 + }, + { + "epoch": 0.6675374721617496, + "grad_norm": 1.0347552299499512, + "learning_rate": 2.6290011053975895e-06, + "loss": 0.2567, + "step": 33346 + }, + { + "epoch": 0.6675574906788779, + "grad_norm": 2.1167964935302734, + "learning_rate": 2.6287156940660875e-06, + "loss": 0.7759, + "step": 33347 + }, + { + "epoch": 0.6675775091960063, + "grad_norm": 2.23846697807312, + "learning_rate": 2.6284302927029563e-06, + "loss": 0.7191, + "step": 33348 + }, + { + "epoch": 0.6675975277131346, + "grad_norm": 1.0222545862197876, + "learning_rate": 2.628144901309394e-06, + "loss": 0.2692, + "step": 33349 + }, + { + "epoch": 0.667617546230263, + "grad_norm": 2.0738911628723145, + "learning_rate": 2.6278595198866e-06, + "loss": 0.7684, + "step": 33350 + }, + { + "epoch": 0.6676375647473913, + "grad_norm": 1.169859766960144, + "learning_rate": 2.6275741484357746e-06, + "loss": 0.3037, + "step": 33351 + }, + { + "epoch": 0.6676575832645196, + "grad_norm": 1.2105507850646973, + "learning_rate": 2.627288786958114e-06, + "loss": 0.3213, + "step": 33352 + }, + { + "epoch": 0.667677601781648, + "grad_norm": 1.1027640104293823, + "learning_rate": 2.627003435454824e-06, + "loss": 0.3029, + "step": 33353 + }, + { + "epoch": 0.6676976202987763, + "grad_norm": 1.2969237565994263, + "learning_rate": 2.6267180939270997e-06, + "loss": 0.2868, + "step": 33354 + }, + { + "epoch": 0.6677176388159047, + "grad_norm": 1.2740651369094849, + "learning_rate": 2.626432762376141e-06, + "loss": 0.2845, + "step": 33355 + }, + { + "epoch": 0.667737657333033, + "grad_norm": 1.1704312562942505, + "learning_rate": 2.626147440803147e-06, + "loss": 0.3295, + "step": 33356 + }, + { + "epoch": 0.6677576758501614, + "grad_norm": 1.116357445716858, + "learning_rate": 2.625862129209321e-06, + "loss": 0.3496, + "step": 33357 + }, + { + "epoch": 0.6677776943672897, + "grad_norm": 1.1640163660049438, + "learning_rate": 2.625576827595859e-06, + "loss": 0.2791, + "step": 33358 + }, + { + "epoch": 0.6677977128844181, + "grad_norm": 1.224159598350525, + "learning_rate": 2.625291535963962e-06, + "loss": 0.3119, + "step": 33359 + }, + { + "epoch": 0.6678177314015464, + "grad_norm": 1.210261583328247, + "learning_rate": 2.625006254314826e-06, + "loss": 0.3566, + "step": 33360 + }, + { + "epoch": 0.6678377499186747, + "grad_norm": 1.1438897848129272, + "learning_rate": 2.6247209826496545e-06, + "loss": 0.3138, + "step": 33361 + }, + { + "epoch": 0.6678577684358031, + "grad_norm": 1.158082365989685, + "learning_rate": 2.6244357209696446e-06, + "loss": 0.3289, + "step": 33362 + }, + { + "epoch": 0.6678777869529314, + "grad_norm": 1.0226892232894897, + "learning_rate": 2.6241504692759955e-06, + "loss": 0.2628, + "step": 33363 + }, + { + "epoch": 0.6678978054700598, + "grad_norm": 1.1637966632843018, + "learning_rate": 2.6238652275699072e-06, + "loss": 0.3202, + "step": 33364 + }, + { + "epoch": 0.6679178239871881, + "grad_norm": 1.0318917036056519, + "learning_rate": 2.623579995852576e-06, + "loss": 0.3128, + "step": 33365 + }, + { + "epoch": 0.6679378425043165, + "grad_norm": 1.2718693017959595, + "learning_rate": 2.6232947741252044e-06, + "loss": 0.3012, + "step": 33366 + }, + { + "epoch": 0.6679578610214448, + "grad_norm": 1.1179344654083252, + "learning_rate": 2.6230095623889904e-06, + "loss": 0.3275, + "step": 33367 + }, + { + "epoch": 0.6679778795385731, + "grad_norm": 1.151702880859375, + "learning_rate": 2.6227243606451314e-06, + "loss": 0.3382, + "step": 33368 + }, + { + "epoch": 0.6679978980557015, + "grad_norm": 1.111211895942688, + "learning_rate": 2.622439168894827e-06, + "loss": 0.3066, + "step": 33369 + }, + { + "epoch": 0.6680179165728298, + "grad_norm": 1.0995746850967407, + "learning_rate": 2.6221539871392786e-06, + "loss": 0.2842, + "step": 33370 + }, + { + "epoch": 0.6680379350899582, + "grad_norm": 1.5304499864578247, + "learning_rate": 2.6218688153796833e-06, + "loss": 0.3002, + "step": 33371 + }, + { + "epoch": 0.6680579536070865, + "grad_norm": 1.0932615995407104, + "learning_rate": 2.62158365361724e-06, + "loss": 0.3032, + "step": 33372 + }, + { + "epoch": 0.6680779721242149, + "grad_norm": 1.2631094455718994, + "learning_rate": 2.6212985018531466e-06, + "loss": 0.3224, + "step": 33373 + }, + { + "epoch": 0.6680979906413432, + "grad_norm": 1.096418857574463, + "learning_rate": 2.621013360088601e-06, + "loss": 0.2772, + "step": 33374 + }, + { + "epoch": 0.6681180091584716, + "grad_norm": 1.160339117050171, + "learning_rate": 2.6207282283248046e-06, + "loss": 0.3364, + "step": 33375 + }, + { + "epoch": 0.6681380276755999, + "grad_norm": 1.3384552001953125, + "learning_rate": 2.6204431065629553e-06, + "loss": 0.3175, + "step": 33376 + }, + { + "epoch": 0.6681580461927282, + "grad_norm": 1.085584282875061, + "learning_rate": 2.6201579948042512e-06, + "loss": 0.2988, + "step": 33377 + }, + { + "epoch": 0.6681780647098566, + "grad_norm": 2.0302693843841553, + "learning_rate": 2.6198728930498885e-06, + "loss": 0.6926, + "step": 33378 + }, + { + "epoch": 0.6681980832269849, + "grad_norm": 1.1178696155548096, + "learning_rate": 2.6195878013010694e-06, + "loss": 0.3096, + "step": 33379 + }, + { + "epoch": 0.6682181017441133, + "grad_norm": 1.1891162395477295, + "learning_rate": 2.6193027195589916e-06, + "loss": 0.2851, + "step": 33380 + }, + { + "epoch": 0.6682381202612416, + "grad_norm": 1.1569929122924805, + "learning_rate": 2.6190176478248504e-06, + "loss": 0.3127, + "step": 33381 + }, + { + "epoch": 0.66825813877837, + "grad_norm": 1.161436915397644, + "learning_rate": 2.618732586099849e-06, + "loss": 0.3496, + "step": 33382 + }, + { + "epoch": 0.6682781572954983, + "grad_norm": 1.217804193496704, + "learning_rate": 2.618447534385181e-06, + "loss": 0.3134, + "step": 33383 + }, + { + "epoch": 0.6682981758126266, + "grad_norm": 1.0483101606369019, + "learning_rate": 2.6181624926820492e-06, + "loss": 0.3416, + "step": 33384 + }, + { + "epoch": 0.668318194329755, + "grad_norm": 1.0513591766357422, + "learning_rate": 2.6178774609916498e-06, + "loss": 0.3045, + "step": 33385 + }, + { + "epoch": 0.6683382128468833, + "grad_norm": 1.0678681135177612, + "learning_rate": 2.617592439315181e-06, + "loss": 0.3102, + "step": 33386 + }, + { + "epoch": 0.6683582313640117, + "grad_norm": 1.818624496459961, + "learning_rate": 2.617307427653838e-06, + "loss": 0.7706, + "step": 33387 + }, + { + "epoch": 0.66837824988114, + "grad_norm": 1.0974845886230469, + "learning_rate": 2.617022426008825e-06, + "loss": 0.3141, + "step": 33388 + }, + { + "epoch": 0.6683982683982684, + "grad_norm": 1.1050591468811035, + "learning_rate": 2.6167374343813363e-06, + "loss": 0.2618, + "step": 33389 + }, + { + "epoch": 0.6684182869153967, + "grad_norm": 1.1770652532577515, + "learning_rate": 2.616452452772571e-06, + "loss": 0.2794, + "step": 33390 + }, + { + "epoch": 0.6684383054325251, + "grad_norm": 1.1612493991851807, + "learning_rate": 2.616167481183724e-06, + "loss": 0.2669, + "step": 33391 + }, + { + "epoch": 0.6684583239496534, + "grad_norm": 1.3250678777694702, + "learning_rate": 2.615882519615998e-06, + "loss": 0.3076, + "step": 33392 + }, + { + "epoch": 0.6684783424667817, + "grad_norm": 1.0657131671905518, + "learning_rate": 2.6155975680705893e-06, + "loss": 0.315, + "step": 33393 + }, + { + "epoch": 0.6684983609839101, + "grad_norm": 1.442082166671753, + "learning_rate": 2.6153126265486927e-06, + "loss": 0.3169, + "step": 33394 + }, + { + "epoch": 0.6685183795010384, + "grad_norm": 0.9874754548072815, + "learning_rate": 2.6150276950515108e-06, + "loss": 0.2556, + "step": 33395 + }, + { + "epoch": 0.6685383980181668, + "grad_norm": 1.119279384613037, + "learning_rate": 2.6147427735802377e-06, + "loss": 0.2828, + "step": 33396 + }, + { + "epoch": 0.6685584165352951, + "grad_norm": 1.1019648313522339, + "learning_rate": 2.6144578621360745e-06, + "loss": 0.2992, + "step": 33397 + }, + { + "epoch": 0.6685784350524235, + "grad_norm": 1.1109055280685425, + "learning_rate": 2.614172960720217e-06, + "loss": 0.2635, + "step": 33398 + }, + { + "epoch": 0.6685984535695518, + "grad_norm": 1.3715623617172241, + "learning_rate": 2.6138880693338626e-06, + "loss": 0.3345, + "step": 33399 + }, + { + "epoch": 0.6686184720866801, + "grad_norm": 1.1739301681518555, + "learning_rate": 2.6136031879782075e-06, + "loss": 0.3614, + "step": 33400 + }, + { + "epoch": 0.6686384906038085, + "grad_norm": 1.8828904628753662, + "learning_rate": 2.613318316654453e-06, + "loss": 0.7739, + "step": 33401 + }, + { + "epoch": 0.6686585091209368, + "grad_norm": 1.0757249593734741, + "learning_rate": 2.613033455363795e-06, + "loss": 0.2756, + "step": 33402 + }, + { + "epoch": 0.6686785276380652, + "grad_norm": 1.1617330312728882, + "learning_rate": 2.61274860410743e-06, + "loss": 0.3338, + "step": 33403 + }, + { + "epoch": 0.6686985461551935, + "grad_norm": 1.8564214706420898, + "learning_rate": 2.612463762886557e-06, + "loss": 0.7154, + "step": 33404 + }, + { + "epoch": 0.6687185646723219, + "grad_norm": 1.1459044218063354, + "learning_rate": 2.6121789317023695e-06, + "loss": 0.2815, + "step": 33405 + }, + { + "epoch": 0.6687385831894502, + "grad_norm": 1.187387466430664, + "learning_rate": 2.6118941105560703e-06, + "loss": 0.3323, + "step": 33406 + }, + { + "epoch": 0.6687586017065786, + "grad_norm": 1.0980719327926636, + "learning_rate": 2.611609299448853e-06, + "loss": 0.3366, + "step": 33407 + }, + { + "epoch": 0.6687786202237069, + "grad_norm": 0.9566930532455444, + "learning_rate": 2.6113244983819174e-06, + "loss": 0.2804, + "step": 33408 + }, + { + "epoch": 0.6687986387408352, + "grad_norm": 1.113579273223877, + "learning_rate": 2.611039707356458e-06, + "loss": 0.2966, + "step": 33409 + }, + { + "epoch": 0.6688186572579636, + "grad_norm": 1.2082953453063965, + "learning_rate": 2.610754926373675e-06, + "loss": 0.2674, + "step": 33410 + }, + { + "epoch": 0.6688386757750919, + "grad_norm": 1.1007715463638306, + "learning_rate": 2.610470155434765e-06, + "loss": 0.2994, + "step": 33411 + }, + { + "epoch": 0.6688586942922203, + "grad_norm": 1.160054087638855, + "learning_rate": 2.610185394540924e-06, + "loss": 0.3533, + "step": 33412 + }, + { + "epoch": 0.6688787128093486, + "grad_norm": 1.0994861125946045, + "learning_rate": 2.6099006436933487e-06, + "loss": 0.3029, + "step": 33413 + }, + { + "epoch": 0.668898731326477, + "grad_norm": 1.1327499151229858, + "learning_rate": 2.6096159028932355e-06, + "loss": 0.2841, + "step": 33414 + }, + { + "epoch": 0.6689187498436053, + "grad_norm": 1.13503897190094, + "learning_rate": 2.609331172141784e-06, + "loss": 0.323, + "step": 33415 + }, + { + "epoch": 0.6689387683607336, + "grad_norm": 1.2015962600708008, + "learning_rate": 2.6090464514401903e-06, + "loss": 0.3057, + "step": 33416 + }, + { + "epoch": 0.668958786877862, + "grad_norm": 1.0261468887329102, + "learning_rate": 2.6087617407896503e-06, + "loss": 0.278, + "step": 33417 + }, + { + "epoch": 0.6689788053949903, + "grad_norm": 1.0924233198165894, + "learning_rate": 2.6084770401913594e-06, + "loss": 0.337, + "step": 33418 + }, + { + "epoch": 0.6689988239121187, + "grad_norm": 1.1375912427902222, + "learning_rate": 2.608192349646519e-06, + "loss": 0.317, + "step": 33419 + }, + { + "epoch": 0.669018842429247, + "grad_norm": 1.1423847675323486, + "learning_rate": 2.6079076691563206e-06, + "loss": 0.3151, + "step": 33420 + }, + { + "epoch": 0.6690388609463754, + "grad_norm": 1.0210835933685303, + "learning_rate": 2.607622998721966e-06, + "loss": 0.3152, + "step": 33421 + }, + { + "epoch": 0.6690588794635037, + "grad_norm": 1.0790427923202515, + "learning_rate": 2.607338338344647e-06, + "loss": 0.2854, + "step": 33422 + }, + { + "epoch": 0.6690788979806321, + "grad_norm": 1.0765806436538696, + "learning_rate": 2.607053688025565e-06, + "loss": 0.2718, + "step": 33423 + }, + { + "epoch": 0.6690989164977604, + "grad_norm": 1.144997477531433, + "learning_rate": 2.6067690477659137e-06, + "loss": 0.2815, + "step": 33424 + }, + { + "epoch": 0.6691189350148887, + "grad_norm": 1.1764473915100098, + "learning_rate": 2.6064844175668913e-06, + "loss": 0.3053, + "step": 33425 + }, + { + "epoch": 0.6691389535320171, + "grad_norm": 1.2859039306640625, + "learning_rate": 2.6061997974296928e-06, + "loss": 0.2607, + "step": 33426 + }, + { + "epoch": 0.6691589720491454, + "grad_norm": 1.8700320720672607, + "learning_rate": 2.6059151873555133e-06, + "loss": 0.8059, + "step": 33427 + }, + { + "epoch": 0.6691789905662738, + "grad_norm": 1.0241864919662476, + "learning_rate": 2.6056305873455528e-06, + "loss": 0.2944, + "step": 33428 + }, + { + "epoch": 0.6691990090834021, + "grad_norm": 1.1994800567626953, + "learning_rate": 2.6053459974010063e-06, + "loss": 0.2707, + "step": 33429 + }, + { + "epoch": 0.6692190276005305, + "grad_norm": 1.0957399606704712, + "learning_rate": 2.60506141752307e-06, + "loss": 0.2868, + "step": 33430 + }, + { + "epoch": 0.6692390461176588, + "grad_norm": 1.0377405881881714, + "learning_rate": 2.6047768477129383e-06, + "loss": 0.2693, + "step": 33431 + }, + { + "epoch": 0.6692590646347871, + "grad_norm": 1.1943132877349854, + "learning_rate": 2.6044922879718105e-06, + "loss": 0.2806, + "step": 33432 + }, + { + "epoch": 0.6692790831519155, + "grad_norm": 1.0668984651565552, + "learning_rate": 2.60420773830088e-06, + "loss": 0.2864, + "step": 33433 + }, + { + "epoch": 0.6692991016690438, + "grad_norm": 1.4091277122497559, + "learning_rate": 2.603923198701347e-06, + "loss": 0.2935, + "step": 33434 + }, + { + "epoch": 0.6693191201861722, + "grad_norm": 1.064861536026001, + "learning_rate": 2.6036386691744053e-06, + "loss": 0.285, + "step": 33435 + }, + { + "epoch": 0.6693391387033005, + "grad_norm": 1.1777024269104004, + "learning_rate": 2.6033541497212485e-06, + "loss": 0.3091, + "step": 33436 + }, + { + "epoch": 0.6693591572204289, + "grad_norm": 1.0350674390792847, + "learning_rate": 2.603069640343077e-06, + "loss": 0.2528, + "step": 33437 + }, + { + "epoch": 0.6693791757375572, + "grad_norm": 1.1719210147857666, + "learning_rate": 2.602785141041085e-06, + "loss": 0.3472, + "step": 33438 + }, + { + "epoch": 0.6693991942546856, + "grad_norm": 1.181164264678955, + "learning_rate": 2.6025006518164686e-06, + "loss": 0.264, + "step": 33439 + }, + { + "epoch": 0.6694192127718139, + "grad_norm": 1.1164567470550537, + "learning_rate": 2.602216172670421e-06, + "loss": 0.312, + "step": 33440 + }, + { + "epoch": 0.6694392312889422, + "grad_norm": 1.084837555885315, + "learning_rate": 2.6019317036041432e-06, + "loss": 0.2539, + "step": 33441 + }, + { + "epoch": 0.6694592498060706, + "grad_norm": 0.9273545742034912, + "learning_rate": 2.601647244618828e-06, + "loss": 0.2382, + "step": 33442 + }, + { + "epoch": 0.6694792683231989, + "grad_norm": 1.264993667602539, + "learning_rate": 2.6013627957156717e-06, + "loss": 0.2746, + "step": 33443 + }, + { + "epoch": 0.6694992868403273, + "grad_norm": 1.092529535293579, + "learning_rate": 2.6010783568958684e-06, + "loss": 0.3246, + "step": 33444 + }, + { + "epoch": 0.6695193053574556, + "grad_norm": 1.1736104488372803, + "learning_rate": 2.600793928160617e-06, + "loss": 0.3055, + "step": 33445 + }, + { + "epoch": 0.669539323874584, + "grad_norm": 1.059247612953186, + "learning_rate": 2.60050950951111e-06, + "loss": 0.2721, + "step": 33446 + }, + { + "epoch": 0.6695593423917123, + "grad_norm": 1.1168545484542847, + "learning_rate": 2.600225100948546e-06, + "loss": 0.2952, + "step": 33447 + }, + { + "epoch": 0.6695793609088406, + "grad_norm": 1.8188722133636475, + "learning_rate": 2.5999407024741203e-06, + "loss": 0.7366, + "step": 33448 + }, + { + "epoch": 0.669599379425969, + "grad_norm": 1.1525816917419434, + "learning_rate": 2.5996563140890244e-06, + "loss": 0.3026, + "step": 33449 + }, + { + "epoch": 0.6696193979430973, + "grad_norm": 1.082886815071106, + "learning_rate": 2.5993719357944593e-06, + "loss": 0.3296, + "step": 33450 + }, + { + "epoch": 0.6696394164602257, + "grad_norm": 1.1479012966156006, + "learning_rate": 2.5990875675916183e-06, + "loss": 0.3128, + "step": 33451 + }, + { + "epoch": 0.669659434977354, + "grad_norm": 1.956580400466919, + "learning_rate": 2.5988032094816955e-06, + "loss": 0.7003, + "step": 33452 + }, + { + "epoch": 0.6696794534944824, + "grad_norm": 1.1062901020050049, + "learning_rate": 2.5985188614658862e-06, + "loss": 0.2722, + "step": 33453 + }, + { + "epoch": 0.6696994720116107, + "grad_norm": 1.1304346323013306, + "learning_rate": 2.5982345235453884e-06, + "loss": 0.3369, + "step": 33454 + }, + { + "epoch": 0.6697194905287391, + "grad_norm": 1.2237223386764526, + "learning_rate": 2.5979501957213953e-06, + "loss": 0.2811, + "step": 33455 + }, + { + "epoch": 0.6697395090458674, + "grad_norm": 1.0655879974365234, + "learning_rate": 2.5976658779951035e-06, + "loss": 0.3119, + "step": 33456 + }, + { + "epoch": 0.6697595275629957, + "grad_norm": 1.0956966876983643, + "learning_rate": 2.5973815703677052e-06, + "loss": 0.3415, + "step": 33457 + }, + { + "epoch": 0.6697795460801241, + "grad_norm": 1.163552165031433, + "learning_rate": 2.5970972728403993e-06, + "loss": 0.2834, + "step": 33458 + }, + { + "epoch": 0.6697995645972524, + "grad_norm": 1.0844149589538574, + "learning_rate": 2.5968129854143777e-06, + "loss": 0.3132, + "step": 33459 + }, + { + "epoch": 0.6698195831143808, + "grad_norm": 1.911020278930664, + "learning_rate": 2.5965287080908388e-06, + "loss": 0.7567, + "step": 33460 + }, + { + "epoch": 0.6698396016315091, + "grad_norm": 1.0653878450393677, + "learning_rate": 2.5962444408709765e-06, + "loss": 0.2065, + "step": 33461 + }, + { + "epoch": 0.6698596201486375, + "grad_norm": 1.6792495250701904, + "learning_rate": 2.5959601837559823e-06, + "loss": 0.3449, + "step": 33462 + }, + { + "epoch": 0.6698796386657658, + "grad_norm": 1.0850275754928589, + "learning_rate": 2.5956759367470576e-06, + "loss": 0.3136, + "step": 33463 + }, + { + "epoch": 0.6698996571828941, + "grad_norm": 1.2168090343475342, + "learning_rate": 2.5953916998453925e-06, + "loss": 0.3619, + "step": 33464 + }, + { + "epoch": 0.6699196757000225, + "grad_norm": 1.1015037298202515, + "learning_rate": 2.5951074730521843e-06, + "loss": 0.3081, + "step": 33465 + }, + { + "epoch": 0.6699396942171508, + "grad_norm": 2.0780789852142334, + "learning_rate": 2.594823256368626e-06, + "loss": 0.7527, + "step": 33466 + }, + { + "epoch": 0.6699597127342792, + "grad_norm": 1.1497619152069092, + "learning_rate": 2.594539049795911e-06, + "loss": 0.3229, + "step": 33467 + }, + { + "epoch": 0.6699797312514075, + "grad_norm": 1.0972355604171753, + "learning_rate": 2.5942548533352386e-06, + "loss": 0.3119, + "step": 33468 + }, + { + "epoch": 0.6699997497685359, + "grad_norm": 1.129719853401184, + "learning_rate": 2.5939706669878007e-06, + "loss": 0.3001, + "step": 33469 + }, + { + "epoch": 0.6700197682856642, + "grad_norm": 1.1583794355392456, + "learning_rate": 2.593686490754791e-06, + "loss": 0.3021, + "step": 33470 + }, + { + "epoch": 0.6700397868027926, + "grad_norm": 1.1047396659851074, + "learning_rate": 2.5934023246374075e-06, + "loss": 0.2571, + "step": 33471 + }, + { + "epoch": 0.6700598053199209, + "grad_norm": 1.0073416233062744, + "learning_rate": 2.59311816863684e-06, + "loss": 0.2868, + "step": 33472 + }, + { + "epoch": 0.6700798238370492, + "grad_norm": 1.010787844657898, + "learning_rate": 2.5928340227542883e-06, + "loss": 0.2695, + "step": 33473 + }, + { + "epoch": 0.6700998423541776, + "grad_norm": 1.0078366994857788, + "learning_rate": 2.592549886990944e-06, + "loss": 0.2615, + "step": 33474 + }, + { + "epoch": 0.6701198608713059, + "grad_norm": 1.7431856393814087, + "learning_rate": 2.5922657613480017e-06, + "loss": 0.7298, + "step": 33475 + }, + { + "epoch": 0.6701398793884343, + "grad_norm": 1.816162109375, + "learning_rate": 2.5919816458266543e-06, + "loss": 0.7396, + "step": 33476 + }, + { + "epoch": 0.6701598979055626, + "grad_norm": 1.7470078468322754, + "learning_rate": 2.5916975404281e-06, + "loss": 0.8123, + "step": 33477 + }, + { + "epoch": 0.670179916422691, + "grad_norm": 1.14427649974823, + "learning_rate": 2.5914134451535305e-06, + "loss": 0.317, + "step": 33478 + }, + { + "epoch": 0.6701999349398193, + "grad_norm": 1.3076332807540894, + "learning_rate": 2.591129360004141e-06, + "loss": 0.3206, + "step": 33479 + }, + { + "epoch": 0.6702199534569476, + "grad_norm": 1.1058850288391113, + "learning_rate": 2.5908452849811227e-06, + "loss": 0.3112, + "step": 33480 + }, + { + "epoch": 0.670239971974076, + "grad_norm": 1.233302116394043, + "learning_rate": 2.590561220085675e-06, + "loss": 0.3055, + "step": 33481 + }, + { + "epoch": 0.6702599904912043, + "grad_norm": 1.1680439710617065, + "learning_rate": 2.590277165318989e-06, + "loss": 0.3195, + "step": 33482 + }, + { + "epoch": 0.6702800090083327, + "grad_norm": 1.2001291513442993, + "learning_rate": 2.5899931206822577e-06, + "loss": 0.2572, + "step": 33483 + }, + { + "epoch": 0.670300027525461, + "grad_norm": 1.2707115411758423, + "learning_rate": 2.5897090861766785e-06, + "loss": 0.2694, + "step": 33484 + }, + { + "epoch": 0.6703200460425894, + "grad_norm": 1.5577046871185303, + "learning_rate": 2.589425061803441e-06, + "loss": 0.3499, + "step": 33485 + }, + { + "epoch": 0.6703400645597177, + "grad_norm": 1.0368983745574951, + "learning_rate": 2.5891410475637447e-06, + "loss": 0.3044, + "step": 33486 + }, + { + "epoch": 0.6703600830768461, + "grad_norm": 1.0630377531051636, + "learning_rate": 2.58885704345878e-06, + "loss": 0.2877, + "step": 33487 + }, + { + "epoch": 0.6703801015939744, + "grad_norm": 1.1506167650222778, + "learning_rate": 2.5885730494897414e-06, + "loss": 0.2429, + "step": 33488 + }, + { + "epoch": 0.6704001201111027, + "grad_norm": 1.0940372943878174, + "learning_rate": 2.588289065657821e-06, + "loss": 0.2999, + "step": 33489 + }, + { + "epoch": 0.6704201386282311, + "grad_norm": 1.012035846710205, + "learning_rate": 2.5880050919642165e-06, + "loss": 0.2401, + "step": 33490 + }, + { + "epoch": 0.6704401571453594, + "grad_norm": 1.066653847694397, + "learning_rate": 2.5877211284101187e-06, + "loss": 0.3022, + "step": 33491 + }, + { + "epoch": 0.6704601756624878, + "grad_norm": 1.0901386737823486, + "learning_rate": 2.587437174996723e-06, + "loss": 0.2663, + "step": 33492 + }, + { + "epoch": 0.6704801941796161, + "grad_norm": 1.0369133949279785, + "learning_rate": 2.58715323172522e-06, + "loss": 0.2828, + "step": 33493 + }, + { + "epoch": 0.6705002126967445, + "grad_norm": 1.196398138999939, + "learning_rate": 2.586869298596808e-06, + "loss": 0.2754, + "step": 33494 + }, + { + "epoch": 0.6705202312138728, + "grad_norm": 1.110569953918457, + "learning_rate": 2.5865853756126775e-06, + "loss": 0.3381, + "step": 33495 + }, + { + "epoch": 0.6705402497310011, + "grad_norm": 1.2836225032806396, + "learning_rate": 2.5863014627740214e-06, + "loss": 0.31, + "step": 33496 + }, + { + "epoch": 0.6705602682481295, + "grad_norm": 1.1460038423538208, + "learning_rate": 2.586017560082036e-06, + "loss": 0.3016, + "step": 33497 + }, + { + "epoch": 0.6705802867652578, + "grad_norm": 1.086503505706787, + "learning_rate": 2.5857336675379107e-06, + "loss": 0.274, + "step": 33498 + }, + { + "epoch": 0.6706003052823862, + "grad_norm": 1.1996431350708008, + "learning_rate": 2.585449785142844e-06, + "loss": 0.3233, + "step": 33499 + }, + { + "epoch": 0.6706203237995145, + "grad_norm": 0.994690477848053, + "learning_rate": 2.585165912898027e-06, + "loss": 0.2716, + "step": 33500 + }, + { + "epoch": 0.6706403423166429, + "grad_norm": 1.1558837890625, + "learning_rate": 2.584882050804653e-06, + "loss": 0.3408, + "step": 33501 + }, + { + "epoch": 0.6706603608337712, + "grad_norm": 1.1251808404922485, + "learning_rate": 2.584598198863913e-06, + "loss": 0.3262, + "step": 33502 + }, + { + "epoch": 0.6706803793508996, + "grad_norm": 1.1099456548690796, + "learning_rate": 2.584314357077004e-06, + "loss": 0.3122, + "step": 33503 + }, + { + "epoch": 0.6707003978680279, + "grad_norm": 0.9935820698738098, + "learning_rate": 2.584030525445118e-06, + "loss": 0.3203, + "step": 33504 + }, + { + "epoch": 0.6707204163851562, + "grad_norm": 2.027775287628174, + "learning_rate": 2.5837467039694475e-06, + "loss": 0.8145, + "step": 33505 + }, + { + "epoch": 0.6707404349022846, + "grad_norm": 1.0740810632705688, + "learning_rate": 2.583462892651184e-06, + "loss": 0.2624, + "step": 33506 + }, + { + "epoch": 0.6707604534194129, + "grad_norm": 1.0575190782546997, + "learning_rate": 2.5831790914915245e-06, + "loss": 0.2893, + "step": 33507 + }, + { + "epoch": 0.6707804719365413, + "grad_norm": 1.1938620805740356, + "learning_rate": 2.5828953004916596e-06, + "loss": 0.3531, + "step": 33508 + }, + { + "epoch": 0.6708004904536696, + "grad_norm": 1.1082676649093628, + "learning_rate": 2.582611519652781e-06, + "loss": 0.3077, + "step": 33509 + }, + { + "epoch": 0.670820508970798, + "grad_norm": 1.0540350675582886, + "learning_rate": 2.5823277489760857e-06, + "loss": 0.2891, + "step": 33510 + }, + { + "epoch": 0.6708405274879263, + "grad_norm": 1.0713937282562256, + "learning_rate": 2.582043988462761e-06, + "loss": 0.3048, + "step": 33511 + }, + { + "epoch": 0.6708605460050546, + "grad_norm": 1.1520934104919434, + "learning_rate": 2.581760238114006e-06, + "loss": 0.2946, + "step": 33512 + }, + { + "epoch": 0.670880564522183, + "grad_norm": 1.894494652748108, + "learning_rate": 2.5814764979310107e-06, + "loss": 0.7283, + "step": 33513 + }, + { + "epoch": 0.6709005830393113, + "grad_norm": 1.9358932971954346, + "learning_rate": 2.581192767914967e-06, + "loss": 0.784, + "step": 33514 + }, + { + "epoch": 0.6709206015564397, + "grad_norm": 1.3188127279281616, + "learning_rate": 2.580909048067067e-06, + "loss": 0.3364, + "step": 33515 + }, + { + "epoch": 0.670940620073568, + "grad_norm": 1.2515748739242554, + "learning_rate": 2.580625338388507e-06, + "loss": 0.282, + "step": 33516 + }, + { + "epoch": 0.6709606385906964, + "grad_norm": 1.9801194667816162, + "learning_rate": 2.580341638880477e-06, + "loss": 0.7878, + "step": 33517 + }, + { + "epoch": 0.6709806571078247, + "grad_norm": 1.1006100177764893, + "learning_rate": 2.5800579495441703e-06, + "loss": 0.2683, + "step": 33518 + }, + { + "epoch": 0.6710006756249531, + "grad_norm": 1.1602890491485596, + "learning_rate": 2.579774270380779e-06, + "loss": 0.3098, + "step": 33519 + }, + { + "epoch": 0.6710206941420814, + "grad_norm": 1.1892633438110352, + "learning_rate": 2.5794906013914933e-06, + "loss": 0.2724, + "step": 33520 + }, + { + "epoch": 0.6710407126592097, + "grad_norm": 1.083336591720581, + "learning_rate": 2.579206942577511e-06, + "loss": 0.3107, + "step": 33521 + }, + { + "epoch": 0.6710607311763381, + "grad_norm": 1.1506640911102295, + "learning_rate": 2.5789232939400195e-06, + "loss": 0.3027, + "step": 33522 + }, + { + "epoch": 0.6710807496934664, + "grad_norm": 1.0367017984390259, + "learning_rate": 2.5786396554802153e-06, + "loss": 0.2807, + "step": 33523 + }, + { + "epoch": 0.6711007682105948, + "grad_norm": 1.1773043870925903, + "learning_rate": 2.578356027199287e-06, + "loss": 0.3258, + "step": 33524 + }, + { + "epoch": 0.6711207867277231, + "grad_norm": 1.1703351736068726, + "learning_rate": 2.578072409098431e-06, + "loss": 0.3391, + "step": 33525 + }, + { + "epoch": 0.6711408052448515, + "grad_norm": 1.0768688917160034, + "learning_rate": 2.577788801178837e-06, + "loss": 0.2762, + "step": 33526 + }, + { + "epoch": 0.6711608237619798, + "grad_norm": 1.283554196357727, + "learning_rate": 2.5775052034416983e-06, + "loss": 0.3028, + "step": 33527 + }, + { + "epoch": 0.6711808422791081, + "grad_norm": 1.1934832334518433, + "learning_rate": 2.5772216158882058e-06, + "loss": 0.3706, + "step": 33528 + }, + { + "epoch": 0.6712008607962365, + "grad_norm": 0.9518446922302246, + "learning_rate": 2.57693803851955e-06, + "loss": 0.2622, + "step": 33529 + }, + { + "epoch": 0.6712208793133648, + "grad_norm": 1.077394723892212, + "learning_rate": 2.576654471336928e-06, + "loss": 0.3024, + "step": 33530 + }, + { + "epoch": 0.6712408978304932, + "grad_norm": 1.9032655954360962, + "learning_rate": 2.5763709143415285e-06, + "loss": 0.7731, + "step": 33531 + }, + { + "epoch": 0.6712609163476215, + "grad_norm": 1.067531943321228, + "learning_rate": 2.5760873675345445e-06, + "loss": 0.271, + "step": 33532 + }, + { + "epoch": 0.6712809348647499, + "grad_norm": 1.4423339366912842, + "learning_rate": 2.5758038309171653e-06, + "loss": 0.2918, + "step": 33533 + }, + { + "epoch": 0.6713009533818782, + "grad_norm": 1.1667437553405762, + "learning_rate": 2.5755203044905876e-06, + "loss": 0.3223, + "step": 33534 + }, + { + "epoch": 0.6713209718990066, + "grad_norm": 1.16473388671875, + "learning_rate": 2.5752367882559983e-06, + "loss": 0.3442, + "step": 33535 + }, + { + "epoch": 0.6713409904161349, + "grad_norm": 1.8911399841308594, + "learning_rate": 2.5749532822145933e-06, + "loss": 0.7195, + "step": 33536 + }, + { + "epoch": 0.6713610089332632, + "grad_norm": 1.0340691804885864, + "learning_rate": 2.574669786367562e-06, + "loss": 0.3242, + "step": 33537 + }, + { + "epoch": 0.6713810274503916, + "grad_norm": 1.310834527015686, + "learning_rate": 2.5743863007160975e-06, + "loss": 0.2518, + "step": 33538 + }, + { + "epoch": 0.6714010459675199, + "grad_norm": 1.1925758123397827, + "learning_rate": 2.574102825261392e-06, + "loss": 0.3243, + "step": 33539 + }, + { + "epoch": 0.6714210644846483, + "grad_norm": 0.9829285740852356, + "learning_rate": 2.5738193600046356e-06, + "loss": 0.2385, + "step": 33540 + }, + { + "epoch": 0.6714410830017766, + "grad_norm": 1.0325846672058105, + "learning_rate": 2.573535904947021e-06, + "loss": 0.2674, + "step": 33541 + }, + { + "epoch": 0.671461101518905, + "grad_norm": 1.0995357036590576, + "learning_rate": 2.5732524600897375e-06, + "loss": 0.3194, + "step": 33542 + }, + { + "epoch": 0.6714811200360333, + "grad_norm": 1.0788885354995728, + "learning_rate": 2.5729690254339802e-06, + "loss": 0.2782, + "step": 33543 + }, + { + "epoch": 0.6715011385531616, + "grad_norm": 1.1802877187728882, + "learning_rate": 2.5726856009809385e-06, + "loss": 0.313, + "step": 33544 + }, + { + "epoch": 0.67152115707029, + "grad_norm": 1.2244489192962646, + "learning_rate": 2.5724021867318037e-06, + "loss": 0.3681, + "step": 33545 + }, + { + "epoch": 0.6715411755874183, + "grad_norm": 1.1646125316619873, + "learning_rate": 2.572118782687767e-06, + "loss": 0.3623, + "step": 33546 + }, + { + "epoch": 0.6715611941045467, + "grad_norm": 1.132199764251709, + "learning_rate": 2.5718353888500214e-06, + "loss": 0.2882, + "step": 33547 + }, + { + "epoch": 0.671581212621675, + "grad_norm": 1.016505241394043, + "learning_rate": 2.5715520052197563e-06, + "loss": 0.2341, + "step": 33548 + }, + { + "epoch": 0.6716012311388034, + "grad_norm": 1.2615888118743896, + "learning_rate": 2.571268631798165e-06, + "loss": 0.2771, + "step": 33549 + }, + { + "epoch": 0.6716212496559317, + "grad_norm": 1.8139146566390991, + "learning_rate": 2.5709852685864377e-06, + "loss": 0.7867, + "step": 33550 + }, + { + "epoch": 0.6716412681730601, + "grad_norm": 1.2948241233825684, + "learning_rate": 2.5707019155857636e-06, + "loss": 0.2928, + "step": 33551 + }, + { + "epoch": 0.6716612866901884, + "grad_norm": 1.13715398311615, + "learning_rate": 2.5704185727973384e-06, + "loss": 0.3039, + "step": 33552 + }, + { + "epoch": 0.6716813052073167, + "grad_norm": 1.2292917966842651, + "learning_rate": 2.5701352402223505e-06, + "loss": 0.305, + "step": 33553 + }, + { + "epoch": 0.6717013237244451, + "grad_norm": 1.1444954872131348, + "learning_rate": 2.5698519178619912e-06, + "loss": 0.2732, + "step": 33554 + }, + { + "epoch": 0.6717213422415734, + "grad_norm": 1.1130214929580688, + "learning_rate": 2.5695686057174495e-06, + "loss": 0.2858, + "step": 33555 + }, + { + "epoch": 0.6717413607587018, + "grad_norm": 2.0066001415252686, + "learning_rate": 2.5692853037899203e-06, + "loss": 0.7603, + "step": 33556 + }, + { + "epoch": 0.6717613792758301, + "grad_norm": 1.132895588874817, + "learning_rate": 2.5690020120805926e-06, + "loss": 0.3106, + "step": 33557 + }, + { + "epoch": 0.6717813977929585, + "grad_norm": 1.3880236148834229, + "learning_rate": 2.568718730590657e-06, + "loss": 0.3233, + "step": 33558 + }, + { + "epoch": 0.6718014163100868, + "grad_norm": 1.3240774869918823, + "learning_rate": 2.5684354593213023e-06, + "loss": 0.3557, + "step": 33559 + }, + { + "epoch": 0.6718214348272151, + "grad_norm": 1.2001354694366455, + "learning_rate": 2.5681521982737245e-06, + "loss": 0.2839, + "step": 33560 + }, + { + "epoch": 0.6718414533443435, + "grad_norm": 1.131461262702942, + "learning_rate": 2.5678689474491093e-06, + "loss": 0.3126, + "step": 33561 + }, + { + "epoch": 0.6718614718614718, + "grad_norm": 1.9597448110580444, + "learning_rate": 2.5675857068486514e-06, + "loss": 0.8062, + "step": 33562 + }, + { + "epoch": 0.6718814903786002, + "grad_norm": 1.2138952016830444, + "learning_rate": 2.5673024764735398e-06, + "loss": 0.3295, + "step": 33563 + }, + { + "epoch": 0.6719015088957285, + "grad_norm": 1.0036447048187256, + "learning_rate": 2.567019256324963e-06, + "loss": 0.2486, + "step": 33564 + }, + { + "epoch": 0.6719215274128569, + "grad_norm": 1.154078483581543, + "learning_rate": 2.5667360464041156e-06, + "loss": 0.3225, + "step": 33565 + }, + { + "epoch": 0.6719415459299852, + "grad_norm": 1.053833246231079, + "learning_rate": 2.5664528467121863e-06, + "loss": 0.3071, + "step": 33566 + }, + { + "epoch": 0.6719615644471136, + "grad_norm": 1.1617740392684937, + "learning_rate": 2.5661696572503656e-06, + "loss": 0.3312, + "step": 33567 + }, + { + "epoch": 0.6719815829642419, + "grad_norm": 1.0767619609832764, + "learning_rate": 2.5658864780198423e-06, + "loss": 0.2879, + "step": 33568 + }, + { + "epoch": 0.6720016014813702, + "grad_norm": 1.08805251121521, + "learning_rate": 2.5656033090218103e-06, + "loss": 0.3316, + "step": 33569 + }, + { + "epoch": 0.6720216199984986, + "grad_norm": 1.2045512199401855, + "learning_rate": 2.5653201502574576e-06, + "loss": 0.32, + "step": 33570 + }, + { + "epoch": 0.6720416385156269, + "grad_norm": 1.0979080200195312, + "learning_rate": 2.565037001727975e-06, + "loss": 0.3343, + "step": 33571 + }, + { + "epoch": 0.6720616570327553, + "grad_norm": 1.2888813018798828, + "learning_rate": 2.5647538634345516e-06, + "loss": 0.315, + "step": 33572 + }, + { + "epoch": 0.6720816755498836, + "grad_norm": 1.4627370834350586, + "learning_rate": 2.5644707353783806e-06, + "loss": 0.3144, + "step": 33573 + }, + { + "epoch": 0.672101694067012, + "grad_norm": 1.1933420896530151, + "learning_rate": 2.564187617560648e-06, + "loss": 0.3097, + "step": 33574 + }, + { + "epoch": 0.6721217125841403, + "grad_norm": 1.2551528215408325, + "learning_rate": 2.5639045099825488e-06, + "loss": 0.3541, + "step": 33575 + }, + { + "epoch": 0.6721417311012686, + "grad_norm": 1.056666374206543, + "learning_rate": 2.5636214126452706e-06, + "loss": 0.3423, + "step": 33576 + }, + { + "epoch": 0.672161749618397, + "grad_norm": 1.1376105546951294, + "learning_rate": 2.5633383255500015e-06, + "loss": 0.2898, + "step": 33577 + }, + { + "epoch": 0.6721817681355253, + "grad_norm": 1.0082910060882568, + "learning_rate": 2.5630552486979365e-06, + "loss": 0.3319, + "step": 33578 + }, + { + "epoch": 0.6722017866526537, + "grad_norm": 1.9470329284667969, + "learning_rate": 2.562772182090262e-06, + "loss": 0.7888, + "step": 33579 + }, + { + "epoch": 0.672221805169782, + "grad_norm": 1.1924912929534912, + "learning_rate": 2.562489125728169e-06, + "loss": 0.2636, + "step": 33580 + }, + { + "epoch": 0.6722418236869104, + "grad_norm": 1.0964866876602173, + "learning_rate": 2.5622060796128473e-06, + "loss": 0.2819, + "step": 33581 + }, + { + "epoch": 0.6722618422040387, + "grad_norm": 1.2092394828796387, + "learning_rate": 2.5619230437454845e-06, + "loss": 0.3148, + "step": 33582 + }, + { + "epoch": 0.6722818607211671, + "grad_norm": 1.0396621227264404, + "learning_rate": 2.561640018127275e-06, + "loss": 0.2991, + "step": 33583 + }, + { + "epoch": 0.6723018792382954, + "grad_norm": 2.1991212368011475, + "learning_rate": 2.5613570027594047e-06, + "loss": 0.7525, + "step": 33584 + }, + { + "epoch": 0.6723218977554237, + "grad_norm": 1.0563786029815674, + "learning_rate": 2.5610739976430644e-06, + "loss": 0.2702, + "step": 33585 + }, + { + "epoch": 0.6723419162725521, + "grad_norm": 1.2071484327316284, + "learning_rate": 2.560791002779445e-06, + "loss": 0.2946, + "step": 33586 + }, + { + "epoch": 0.6723619347896804, + "grad_norm": 1.1984943151474, + "learning_rate": 2.560508018169734e-06, + "loss": 0.2808, + "step": 33587 + }, + { + "epoch": 0.6723819533068088, + "grad_norm": 1.120750069618225, + "learning_rate": 2.5602250438151243e-06, + "loss": 0.2635, + "step": 33588 + }, + { + "epoch": 0.6724019718239371, + "grad_norm": 2.0878641605377197, + "learning_rate": 2.5599420797168034e-06, + "loss": 0.7986, + "step": 33589 + }, + { + "epoch": 0.6724219903410655, + "grad_norm": 1.1067545413970947, + "learning_rate": 2.5596591258759605e-06, + "loss": 0.2901, + "step": 33590 + }, + { + "epoch": 0.6724420088581938, + "grad_norm": 1.07167387008667, + "learning_rate": 2.559376182293784e-06, + "loss": 0.2771, + "step": 33591 + }, + { + "epoch": 0.6724620273753221, + "grad_norm": 1.2311424016952515, + "learning_rate": 2.5590932489714666e-06, + "loss": 0.2758, + "step": 33592 + }, + { + "epoch": 0.6724820458924505, + "grad_norm": 1.9350515604019165, + "learning_rate": 2.558810325910196e-06, + "loss": 0.7898, + "step": 33593 + }, + { + "epoch": 0.6725020644095788, + "grad_norm": 1.1147533655166626, + "learning_rate": 2.5585274131111608e-06, + "loss": 0.3103, + "step": 33594 + }, + { + "epoch": 0.6725220829267072, + "grad_norm": 1.051162838935852, + "learning_rate": 2.5582445105755494e-06, + "loss": 0.2586, + "step": 33595 + }, + { + "epoch": 0.6725421014438355, + "grad_norm": 0.9562118053436279, + "learning_rate": 2.5579616183045547e-06, + "loss": 0.2454, + "step": 33596 + }, + { + "epoch": 0.6725621199609639, + "grad_norm": 1.1571184396743774, + "learning_rate": 2.557678736299364e-06, + "loss": 0.282, + "step": 33597 + }, + { + "epoch": 0.6725821384780922, + "grad_norm": 1.1455795764923096, + "learning_rate": 2.557395864561164e-06, + "loss": 0.2877, + "step": 33598 + }, + { + "epoch": 0.6726021569952205, + "grad_norm": 1.8970515727996826, + "learning_rate": 2.5571130030911483e-06, + "loss": 0.7921, + "step": 33599 + }, + { + "epoch": 0.6726221755123489, + "grad_norm": 2.068450689315796, + "learning_rate": 2.5568301518905014e-06, + "loss": 0.7756, + "step": 33600 + }, + { + "epoch": 0.6726421940294772, + "grad_norm": 1.13228178024292, + "learning_rate": 2.556547310960417e-06, + "loss": 0.3095, + "step": 33601 + }, + { + "epoch": 0.6726622125466056, + "grad_norm": 1.1442925930023193, + "learning_rate": 2.556264480302082e-06, + "loss": 0.3012, + "step": 33602 + }, + { + "epoch": 0.6726822310637339, + "grad_norm": 1.191220760345459, + "learning_rate": 2.555981659916685e-06, + "loss": 0.3151, + "step": 33603 + }, + { + "epoch": 0.6727022495808623, + "grad_norm": 1.0979329347610474, + "learning_rate": 2.5556988498054135e-06, + "loss": 0.3284, + "step": 33604 + }, + { + "epoch": 0.6727222680979906, + "grad_norm": 1.0295368432998657, + "learning_rate": 2.5554160499694603e-06, + "loss": 0.2677, + "step": 33605 + }, + { + "epoch": 0.672742286615119, + "grad_norm": 1.0503313541412354, + "learning_rate": 2.555133260410011e-06, + "loss": 0.2946, + "step": 33606 + }, + { + "epoch": 0.6727623051322473, + "grad_norm": 1.1091983318328857, + "learning_rate": 2.5548504811282558e-06, + "loss": 0.3038, + "step": 33607 + }, + { + "epoch": 0.6727823236493756, + "grad_norm": 1.0251258611679077, + "learning_rate": 2.554567712125381e-06, + "loss": 0.2822, + "step": 33608 + }, + { + "epoch": 0.672802342166504, + "grad_norm": 1.0224268436431885, + "learning_rate": 2.5542849534025793e-06, + "loss": 0.2778, + "step": 33609 + }, + { + "epoch": 0.6728223606836323, + "grad_norm": 1.079199194908142, + "learning_rate": 2.5540022049610368e-06, + "loss": 0.3351, + "step": 33610 + }, + { + "epoch": 0.6728423792007607, + "grad_norm": 1.1171222925186157, + "learning_rate": 2.5537194668019415e-06, + "loss": 0.3051, + "step": 33611 + }, + { + "epoch": 0.672862397717889, + "grad_norm": 2.000899076461792, + "learning_rate": 2.5534367389264847e-06, + "loss": 0.7301, + "step": 33612 + }, + { + "epoch": 0.6728824162350174, + "grad_norm": 1.1893559694290161, + "learning_rate": 2.553154021335852e-06, + "loss": 0.3004, + "step": 33613 + }, + { + "epoch": 0.6729024347521457, + "grad_norm": 1.3234282732009888, + "learning_rate": 2.5528713140312344e-06, + "loss": 0.3146, + "step": 33614 + }, + { + "epoch": 0.672922453269274, + "grad_norm": 1.0840106010437012, + "learning_rate": 2.5525886170138194e-06, + "loss": 0.3217, + "step": 33615 + }, + { + "epoch": 0.6729424717864024, + "grad_norm": 1.0786898136138916, + "learning_rate": 2.552305930284795e-06, + "loss": 0.283, + "step": 33616 + }, + { + "epoch": 0.6729624903035307, + "grad_norm": 1.177883267402649, + "learning_rate": 2.552023253845348e-06, + "loss": 0.2691, + "step": 33617 + }, + { + "epoch": 0.6729825088206591, + "grad_norm": 1.2286665439605713, + "learning_rate": 2.55174058769667e-06, + "loss": 0.3223, + "step": 33618 + }, + { + "epoch": 0.6730025273377874, + "grad_norm": 1.0611251592636108, + "learning_rate": 2.551457931839948e-06, + "loss": 0.3197, + "step": 33619 + }, + { + "epoch": 0.6730225458549158, + "grad_norm": 1.151877760887146, + "learning_rate": 2.55117528627637e-06, + "loss": 0.3472, + "step": 33620 + }, + { + "epoch": 0.6730425643720441, + "grad_norm": 1.2343392372131348, + "learning_rate": 2.550892651007124e-06, + "loss": 0.303, + "step": 33621 + }, + { + "epoch": 0.6730625828891725, + "grad_norm": 1.2357105016708374, + "learning_rate": 2.5506100260333967e-06, + "loss": 0.3083, + "step": 33622 + }, + { + "epoch": 0.6730826014063008, + "grad_norm": 1.0651257038116455, + "learning_rate": 2.5503274113563793e-06, + "loss": 0.3008, + "step": 33623 + }, + { + "epoch": 0.6731026199234291, + "grad_norm": 1.3087947368621826, + "learning_rate": 2.550044806977256e-06, + "loss": 0.3659, + "step": 33624 + }, + { + "epoch": 0.6731226384405575, + "grad_norm": 1.3498010635375977, + "learning_rate": 2.5497622128972195e-06, + "loss": 0.3107, + "step": 33625 + }, + { + "epoch": 0.6731426569576858, + "grad_norm": 1.9183861017227173, + "learning_rate": 2.549479629117453e-06, + "loss": 0.7637, + "step": 33626 + }, + { + "epoch": 0.6731626754748142, + "grad_norm": 1.0038292407989502, + "learning_rate": 2.5491970556391487e-06, + "loss": 0.2491, + "step": 33627 + }, + { + "epoch": 0.6731826939919425, + "grad_norm": 1.9554799795150757, + "learning_rate": 2.5489144924634934e-06, + "loss": 0.8538, + "step": 33628 + }, + { + "epoch": 0.6732027125090709, + "grad_norm": 1.0144717693328857, + "learning_rate": 2.5486319395916737e-06, + "loss": 0.2969, + "step": 33629 + }, + { + "epoch": 0.6732227310261992, + "grad_norm": 1.1549206972122192, + "learning_rate": 2.548349397024875e-06, + "loss": 0.2875, + "step": 33630 + }, + { + "epoch": 0.6732427495433275, + "grad_norm": 1.9924654960632324, + "learning_rate": 2.548066864764291e-06, + "loss": 0.703, + "step": 33631 + }, + { + "epoch": 0.6732627680604559, + "grad_norm": 1.1874585151672363, + "learning_rate": 2.547784342811106e-06, + "loss": 0.3337, + "step": 33632 + }, + { + "epoch": 0.6732827865775842, + "grad_norm": 1.1886684894561768, + "learning_rate": 2.5475018311665073e-06, + "loss": 0.2962, + "step": 33633 + }, + { + "epoch": 0.6733028050947126, + "grad_norm": 1.2898513078689575, + "learning_rate": 2.547219329831683e-06, + "loss": 0.3004, + "step": 33634 + }, + { + "epoch": 0.6733228236118409, + "grad_norm": 1.8713033199310303, + "learning_rate": 2.5469368388078197e-06, + "loss": 0.8118, + "step": 33635 + }, + { + "epoch": 0.6733428421289693, + "grad_norm": 0.9271827340126038, + "learning_rate": 2.5466543580961077e-06, + "loss": 0.2368, + "step": 33636 + }, + { + "epoch": 0.6733628606460976, + "grad_norm": 1.0505127906799316, + "learning_rate": 2.5463718876977308e-06, + "loss": 0.273, + "step": 33637 + }, + { + "epoch": 0.673382879163226, + "grad_norm": 1.1748524904251099, + "learning_rate": 2.5460894276138803e-06, + "loss": 0.3412, + "step": 33638 + }, + { + "epoch": 0.6734028976803543, + "grad_norm": 1.2004125118255615, + "learning_rate": 2.5458069778457396e-06, + "loss": 0.3338, + "step": 33639 + }, + { + "epoch": 0.6734229161974826, + "grad_norm": 1.1368359327316284, + "learning_rate": 2.5455245383945004e-06, + "loss": 0.3342, + "step": 33640 + }, + { + "epoch": 0.673442934714611, + "grad_norm": 1.0932657718658447, + "learning_rate": 2.5452421092613477e-06, + "loss": 0.3717, + "step": 33641 + }, + { + "epoch": 0.6734629532317393, + "grad_norm": 1.0417571067810059, + "learning_rate": 2.544959690447469e-06, + "loss": 0.3075, + "step": 33642 + }, + { + "epoch": 0.6734829717488677, + "grad_norm": 1.1498383283615112, + "learning_rate": 2.5446772819540518e-06, + "loss": 0.33, + "step": 33643 + }, + { + "epoch": 0.673502990265996, + "grad_norm": 1.2523198127746582, + "learning_rate": 2.5443948837822806e-06, + "loss": 0.3298, + "step": 33644 + }, + { + "epoch": 0.6735230087831244, + "grad_norm": 1.0397790670394897, + "learning_rate": 2.5441124959333465e-06, + "loss": 0.2935, + "step": 33645 + }, + { + "epoch": 0.6735430273002527, + "grad_norm": 1.2328072786331177, + "learning_rate": 2.5438301184084358e-06, + "loss": 0.2963, + "step": 33646 + }, + { + "epoch": 0.673563045817381, + "grad_norm": 1.15797758102417, + "learning_rate": 2.5435477512087335e-06, + "loss": 0.338, + "step": 33647 + }, + { + "epoch": 0.6735830643345094, + "grad_norm": 1.141334891319275, + "learning_rate": 2.543265394335427e-06, + "loss": 0.3019, + "step": 33648 + }, + { + "epoch": 0.6736030828516377, + "grad_norm": 1.069111943244934, + "learning_rate": 2.5429830477897054e-06, + "loss": 0.3224, + "step": 33649 + }, + { + "epoch": 0.6736231013687661, + "grad_norm": 1.1994928121566772, + "learning_rate": 2.542700711572753e-06, + "loss": 0.3152, + "step": 33650 + }, + { + "epoch": 0.6736431198858944, + "grad_norm": 1.156711220741272, + "learning_rate": 2.5424183856857592e-06, + "loss": 0.3171, + "step": 33651 + }, + { + "epoch": 0.6736631384030228, + "grad_norm": 1.2025970220565796, + "learning_rate": 2.542136070129908e-06, + "loss": 0.3409, + "step": 33652 + }, + { + "epoch": 0.6736831569201511, + "grad_norm": 1.1848279237747192, + "learning_rate": 2.5418537649063902e-06, + "loss": 0.3361, + "step": 33653 + }, + { + "epoch": 0.6737031754372795, + "grad_norm": 1.1501095294952393, + "learning_rate": 2.5415714700163897e-06, + "loss": 0.3377, + "step": 33654 + }, + { + "epoch": 0.6737231939544078, + "grad_norm": 1.0637333393096924, + "learning_rate": 2.541289185461093e-06, + "loss": 0.2707, + "step": 33655 + }, + { + "epoch": 0.6737432124715361, + "grad_norm": 0.9672160744667053, + "learning_rate": 2.5410069112416887e-06, + "loss": 0.2927, + "step": 33656 + }, + { + "epoch": 0.6737632309886645, + "grad_norm": 1.1562814712524414, + "learning_rate": 2.5407246473593595e-06, + "loss": 0.2899, + "step": 33657 + }, + { + "epoch": 0.6737832495057928, + "grad_norm": 1.0253263711929321, + "learning_rate": 2.5404423938152965e-06, + "loss": 0.3164, + "step": 33658 + }, + { + "epoch": 0.6738032680229212, + "grad_norm": 1.1275606155395508, + "learning_rate": 2.540160150610685e-06, + "loss": 0.2959, + "step": 33659 + }, + { + "epoch": 0.6738232865400495, + "grad_norm": 1.0488295555114746, + "learning_rate": 2.5398779177467102e-06, + "loss": 0.3058, + "step": 33660 + }, + { + "epoch": 0.6738433050571779, + "grad_norm": 1.1680934429168701, + "learning_rate": 2.539595695224558e-06, + "loss": 0.3133, + "step": 33661 + }, + { + "epoch": 0.6738633235743062, + "grad_norm": 1.1574307680130005, + "learning_rate": 2.5393134830454174e-06, + "loss": 0.2604, + "step": 33662 + }, + { + "epoch": 0.6738833420914345, + "grad_norm": 1.1028324365615845, + "learning_rate": 2.539031281210471e-06, + "loss": 0.3276, + "step": 33663 + }, + { + "epoch": 0.6739033606085629, + "grad_norm": 1.9760960340499878, + "learning_rate": 2.5387490897209106e-06, + "loss": 0.824, + "step": 33664 + }, + { + "epoch": 0.6739233791256912, + "grad_norm": 1.280315637588501, + "learning_rate": 2.538466908577919e-06, + "loss": 0.294, + "step": 33665 + }, + { + "epoch": 0.6739433976428196, + "grad_norm": 1.071298599243164, + "learning_rate": 2.53818473778268e-06, + "loss": 0.2554, + "step": 33666 + }, + { + "epoch": 0.6739634161599479, + "grad_norm": 1.2894774675369263, + "learning_rate": 2.5379025773363854e-06, + "loss": 0.3535, + "step": 33667 + }, + { + "epoch": 0.6739834346770763, + "grad_norm": 1.164042592048645, + "learning_rate": 2.5376204272402183e-06, + "loss": 0.2876, + "step": 33668 + }, + { + "epoch": 0.6740034531942046, + "grad_norm": 1.8298693895339966, + "learning_rate": 2.5373382874953647e-06, + "loss": 0.7168, + "step": 33669 + }, + { + "epoch": 0.674023471711333, + "grad_norm": 1.156137466430664, + "learning_rate": 2.5370561581030096e-06, + "loss": 0.3049, + "step": 33670 + }, + { + "epoch": 0.6740434902284613, + "grad_norm": 1.0138165950775146, + "learning_rate": 2.5367740390643414e-06, + "loss": 0.2605, + "step": 33671 + }, + { + "epoch": 0.6740635087455896, + "grad_norm": 1.8695220947265625, + "learning_rate": 2.5364919303805456e-06, + "loss": 0.7528, + "step": 33672 + }, + { + "epoch": 0.674083527262718, + "grad_norm": 1.3113172054290771, + "learning_rate": 2.5362098320528083e-06, + "loss": 0.3148, + "step": 33673 + }, + { + "epoch": 0.6741035457798463, + "grad_norm": 1.031604528427124, + "learning_rate": 2.535927744082314e-06, + "loss": 0.312, + "step": 33674 + }, + { + "epoch": 0.6741235642969747, + "grad_norm": 1.7570310831069946, + "learning_rate": 2.5356456664702466e-06, + "loss": 0.735, + "step": 33675 + }, + { + "epoch": 0.674143582814103, + "grad_norm": 1.154939889907837, + "learning_rate": 2.535363599217795e-06, + "loss": 0.2981, + "step": 33676 + }, + { + "epoch": 0.6741636013312314, + "grad_norm": 1.1301008462905884, + "learning_rate": 2.5350815423261465e-06, + "loss": 0.2959, + "step": 33677 + }, + { + "epoch": 0.6741836198483597, + "grad_norm": 1.0515806674957275, + "learning_rate": 2.534799495796485e-06, + "loss": 0.3035, + "step": 33678 + }, + { + "epoch": 0.674203638365488, + "grad_norm": 1.1412230730056763, + "learning_rate": 2.534517459629994e-06, + "loss": 0.2672, + "step": 33679 + }, + { + "epoch": 0.6742236568826164, + "grad_norm": 2.038811206817627, + "learning_rate": 2.534235433827862e-06, + "loss": 0.7457, + "step": 33680 + }, + { + "epoch": 0.6742436753997447, + "grad_norm": 2.0122923851013184, + "learning_rate": 2.5339534183912746e-06, + "loss": 0.7998, + "step": 33681 + }, + { + "epoch": 0.6742636939168731, + "grad_norm": 0.9746658205986023, + "learning_rate": 2.533671413321416e-06, + "loss": 0.2382, + "step": 33682 + }, + { + "epoch": 0.6742837124340014, + "grad_norm": 1.1024901866912842, + "learning_rate": 2.5333894186194696e-06, + "loss": 0.3583, + "step": 33683 + }, + { + "epoch": 0.6743037309511298, + "grad_norm": 1.0822205543518066, + "learning_rate": 2.533107434286626e-06, + "loss": 0.3013, + "step": 33684 + }, + { + "epoch": 0.6743237494682581, + "grad_norm": 1.0274103879928589, + "learning_rate": 2.5328254603240678e-06, + "loss": 0.2549, + "step": 33685 + }, + { + "epoch": 0.6743437679853865, + "grad_norm": 1.2081468105316162, + "learning_rate": 2.5325434967329797e-06, + "loss": 0.2969, + "step": 33686 + }, + { + "epoch": 0.6743637865025148, + "grad_norm": 1.1997498273849487, + "learning_rate": 2.532261543514548e-06, + "loss": 0.3211, + "step": 33687 + }, + { + "epoch": 0.6743838050196431, + "grad_norm": 1.0412371158599854, + "learning_rate": 2.531979600669956e-06, + "loss": 0.2661, + "step": 33688 + }, + { + "epoch": 0.6744038235367715, + "grad_norm": 1.1428320407867432, + "learning_rate": 2.531697668200391e-06, + "loss": 0.3592, + "step": 33689 + }, + { + "epoch": 0.6744238420538998, + "grad_norm": 1.2320103645324707, + "learning_rate": 2.531415746107039e-06, + "loss": 0.2648, + "step": 33690 + }, + { + "epoch": 0.6744438605710282, + "grad_norm": 1.0527229309082031, + "learning_rate": 2.531133834391084e-06, + "loss": 0.3009, + "step": 33691 + }, + { + "epoch": 0.6744638790881565, + "grad_norm": 1.8936834335327148, + "learning_rate": 2.5308519330537097e-06, + "loss": 0.7562, + "step": 33692 + }, + { + "epoch": 0.6744838976052849, + "grad_norm": 1.0715895891189575, + "learning_rate": 2.5305700420961043e-06, + "loss": 0.2467, + "step": 33693 + }, + { + "epoch": 0.6745039161224132, + "grad_norm": 1.1808457374572754, + "learning_rate": 2.530288161519451e-06, + "loss": 0.3009, + "step": 33694 + }, + { + "epoch": 0.6745239346395415, + "grad_norm": 1.0712999105453491, + "learning_rate": 2.5300062913249345e-06, + "loss": 0.2521, + "step": 33695 + }, + { + "epoch": 0.6745439531566699, + "grad_norm": 1.1446161270141602, + "learning_rate": 2.52972443151374e-06, + "loss": 0.2909, + "step": 33696 + }, + { + "epoch": 0.6745639716737982, + "grad_norm": 1.0825083255767822, + "learning_rate": 2.5294425820870504e-06, + "loss": 0.3286, + "step": 33697 + }, + { + "epoch": 0.6745839901909266, + "grad_norm": 1.081475019454956, + "learning_rate": 2.529160743046055e-06, + "loss": 0.3093, + "step": 33698 + }, + { + "epoch": 0.6746040087080549, + "grad_norm": 1.1058969497680664, + "learning_rate": 2.5288789143919357e-06, + "loss": 0.3164, + "step": 33699 + }, + { + "epoch": 0.6746240272251833, + "grad_norm": 1.135697841644287, + "learning_rate": 2.528597096125878e-06, + "loss": 0.293, + "step": 33700 + }, + { + "epoch": 0.6746440457423116, + "grad_norm": 1.2218658924102783, + "learning_rate": 2.5283152882490637e-06, + "loss": 0.3365, + "step": 33701 + }, + { + "epoch": 0.67466406425944, + "grad_norm": 1.1724408864974976, + "learning_rate": 2.5280334907626802e-06, + "loss": 0.3446, + "step": 33702 + }, + { + "epoch": 0.6746840827765683, + "grad_norm": 1.1065107583999634, + "learning_rate": 2.527751703667914e-06, + "loss": 0.2927, + "step": 33703 + }, + { + "epoch": 0.6747041012936966, + "grad_norm": 1.082279086112976, + "learning_rate": 2.5274699269659474e-06, + "loss": 0.2838, + "step": 33704 + }, + { + "epoch": 0.674724119810825, + "grad_norm": 1.1945006847381592, + "learning_rate": 2.527188160657965e-06, + "loss": 0.3011, + "step": 33705 + }, + { + "epoch": 0.6747441383279533, + "grad_norm": 0.9439219236373901, + "learning_rate": 2.5269064047451504e-06, + "loss": 0.255, + "step": 33706 + }, + { + "epoch": 0.6747641568450817, + "grad_norm": 1.1612719297409058, + "learning_rate": 2.5266246592286903e-06, + "loss": 0.2838, + "step": 33707 + }, + { + "epoch": 0.67478417536221, + "grad_norm": 1.0785539150238037, + "learning_rate": 2.526342924109768e-06, + "loss": 0.3108, + "step": 33708 + }, + { + "epoch": 0.6748041938793384, + "grad_norm": 0.9845927357673645, + "learning_rate": 2.526061199389568e-06, + "loss": 0.3055, + "step": 33709 + }, + { + "epoch": 0.6748242123964667, + "grad_norm": 1.113595962524414, + "learning_rate": 2.5257794850692717e-06, + "loss": 0.3424, + "step": 33710 + }, + { + "epoch": 0.674844230913595, + "grad_norm": 1.1446490287780762, + "learning_rate": 2.525497781150068e-06, + "loss": 0.3043, + "step": 33711 + }, + { + "epoch": 0.6748642494307234, + "grad_norm": 1.1595919132232666, + "learning_rate": 2.5252160876331398e-06, + "loss": 0.2975, + "step": 33712 + }, + { + "epoch": 0.6748842679478517, + "grad_norm": 1.1387499570846558, + "learning_rate": 2.52493440451967e-06, + "loss": 0.3396, + "step": 33713 + }, + { + "epoch": 0.6749042864649801, + "grad_norm": 1.2085659503936768, + "learning_rate": 2.5246527318108417e-06, + "loss": 0.3115, + "step": 33714 + }, + { + "epoch": 0.6749243049821084, + "grad_norm": 1.0365054607391357, + "learning_rate": 2.5243710695078406e-06, + "loss": 0.2893, + "step": 33715 + }, + { + "epoch": 0.6749443234992368, + "grad_norm": 1.1531951427459717, + "learning_rate": 2.5240894176118523e-06, + "loss": 0.3586, + "step": 33716 + }, + { + "epoch": 0.6749643420163651, + "grad_norm": 1.0369126796722412, + "learning_rate": 2.52380777612406e-06, + "loss": 0.3002, + "step": 33717 + }, + { + "epoch": 0.6749843605334935, + "grad_norm": 1.1835041046142578, + "learning_rate": 2.523526145045646e-06, + "loss": 0.2921, + "step": 33718 + }, + { + "epoch": 0.6750043790506218, + "grad_norm": 1.899933099746704, + "learning_rate": 2.5232445243777936e-06, + "loss": 0.7034, + "step": 33719 + }, + { + "epoch": 0.6750243975677501, + "grad_norm": 1.0967154502868652, + "learning_rate": 2.5229629141216904e-06, + "loss": 0.2453, + "step": 33720 + }, + { + "epoch": 0.6750444160848785, + "grad_norm": 1.0366237163543701, + "learning_rate": 2.5226813142785178e-06, + "loss": 0.2999, + "step": 33721 + }, + { + "epoch": 0.6750644346020068, + "grad_norm": 1.1294506788253784, + "learning_rate": 2.52239972484946e-06, + "loss": 0.272, + "step": 33722 + }, + { + "epoch": 0.6750844531191352, + "grad_norm": 1.0506551265716553, + "learning_rate": 2.5221181458356984e-06, + "loss": 0.2522, + "step": 33723 + }, + { + "epoch": 0.6751044716362635, + "grad_norm": 1.0963166952133179, + "learning_rate": 2.5218365772384213e-06, + "loss": 0.3668, + "step": 33724 + }, + { + "epoch": 0.6751244901533919, + "grad_norm": 1.131719708442688, + "learning_rate": 2.5215550190588096e-06, + "loss": 0.2918, + "step": 33725 + }, + { + "epoch": 0.6751445086705202, + "grad_norm": 1.0779200792312622, + "learning_rate": 2.521273471298047e-06, + "loss": 0.2596, + "step": 33726 + }, + { + "epoch": 0.6751645271876485, + "grad_norm": 1.2717252969741821, + "learning_rate": 2.5209919339573153e-06, + "loss": 0.3172, + "step": 33727 + }, + { + "epoch": 0.6751845457047769, + "grad_norm": 1.0319621562957764, + "learning_rate": 2.520710407037801e-06, + "loss": 0.2727, + "step": 33728 + }, + { + "epoch": 0.6752045642219052, + "grad_norm": 1.1654654741287231, + "learning_rate": 2.520428890540687e-06, + "loss": 0.3302, + "step": 33729 + }, + { + "epoch": 0.6752245827390336, + "grad_norm": 1.1171109676361084, + "learning_rate": 2.5201473844671575e-06, + "loss": 0.308, + "step": 33730 + }, + { + "epoch": 0.6752446012561619, + "grad_norm": 1.0503485202789307, + "learning_rate": 2.5198658888183945e-06, + "loss": 0.3034, + "step": 33731 + }, + { + "epoch": 0.6752646197732903, + "grad_norm": 1.117409348487854, + "learning_rate": 2.5195844035955796e-06, + "loss": 0.3096, + "step": 33732 + }, + { + "epoch": 0.6752846382904186, + "grad_norm": 1.174110770225525, + "learning_rate": 2.5193029287999e-06, + "loss": 0.3086, + "step": 33733 + }, + { + "epoch": 0.675304656807547, + "grad_norm": 1.8370908498764038, + "learning_rate": 2.519021464432537e-06, + "loss": 0.7504, + "step": 33734 + }, + { + "epoch": 0.6753246753246753, + "grad_norm": 1.1326985359191895, + "learning_rate": 2.5187400104946734e-06, + "loss": 0.2799, + "step": 33735 + }, + { + "epoch": 0.6753446938418036, + "grad_norm": 1.2060402631759644, + "learning_rate": 2.5184585669874934e-06, + "loss": 0.2801, + "step": 33736 + }, + { + "epoch": 0.675364712358932, + "grad_norm": 1.0773810148239136, + "learning_rate": 2.5181771339121772e-06, + "loss": 0.2785, + "step": 33737 + }, + { + "epoch": 0.6753847308760603, + "grad_norm": 1.1213597059249878, + "learning_rate": 2.5178957112699124e-06, + "loss": 0.2972, + "step": 33738 + }, + { + "epoch": 0.6754047493931887, + "grad_norm": 1.0130373239517212, + "learning_rate": 2.51761429906188e-06, + "loss": 0.2812, + "step": 33739 + }, + { + "epoch": 0.675424767910317, + "grad_norm": 1.9788135290145874, + "learning_rate": 2.5173328972892607e-06, + "loss": 0.7472, + "step": 33740 + }, + { + "epoch": 0.6754447864274454, + "grad_norm": 1.0190099477767944, + "learning_rate": 2.51705150595324e-06, + "loss": 0.3086, + "step": 33741 + }, + { + "epoch": 0.6754648049445737, + "grad_norm": 1.0749256610870361, + "learning_rate": 2.5167701250550026e-06, + "loss": 0.3239, + "step": 33742 + }, + { + "epoch": 0.675484823461702, + "grad_norm": 1.1361238956451416, + "learning_rate": 2.5164887545957285e-06, + "loss": 0.2873, + "step": 33743 + }, + { + "epoch": 0.6755048419788304, + "grad_norm": 1.135408878326416, + "learning_rate": 2.5162073945766012e-06, + "loss": 0.3509, + "step": 33744 + }, + { + "epoch": 0.6755248604959587, + "grad_norm": 1.0758544206619263, + "learning_rate": 2.515926044998802e-06, + "loss": 0.302, + "step": 33745 + }, + { + "epoch": 0.6755448790130871, + "grad_norm": 1.2278695106506348, + "learning_rate": 2.515644705863517e-06, + "loss": 0.2905, + "step": 33746 + }, + { + "epoch": 0.6755648975302154, + "grad_norm": 1.0424836874008179, + "learning_rate": 2.5153633771719266e-06, + "loss": 0.2884, + "step": 33747 + }, + { + "epoch": 0.6755849160473438, + "grad_norm": 1.102026343345642, + "learning_rate": 2.5150820589252147e-06, + "loss": 0.3008, + "step": 33748 + }, + { + "epoch": 0.6756049345644721, + "grad_norm": 1.3624550104141235, + "learning_rate": 2.514800751124562e-06, + "loss": 0.3216, + "step": 33749 + }, + { + "epoch": 0.6756249530816005, + "grad_norm": 1.997496247291565, + "learning_rate": 2.51451945377115e-06, + "loss": 0.7463, + "step": 33750 + }, + { + "epoch": 0.6756449715987288, + "grad_norm": 1.001437783241272, + "learning_rate": 2.5142381668661663e-06, + "loss": 0.263, + "step": 33751 + }, + { + "epoch": 0.6756649901158571, + "grad_norm": 1.1553378105163574, + "learning_rate": 2.5139568904107903e-06, + "loss": 0.2467, + "step": 33752 + }, + { + "epoch": 0.6756850086329855, + "grad_norm": 1.012640118598938, + "learning_rate": 2.5136756244062025e-06, + "loss": 0.2908, + "step": 33753 + }, + { + "epoch": 0.6757050271501138, + "grad_norm": 1.194671392440796, + "learning_rate": 2.513394368853587e-06, + "loss": 0.3185, + "step": 33754 + }, + { + "epoch": 0.6757250456672422, + "grad_norm": 1.2125213146209717, + "learning_rate": 2.513113123754129e-06, + "loss": 0.2778, + "step": 33755 + }, + { + "epoch": 0.6757450641843705, + "grad_norm": 1.174721121788025, + "learning_rate": 2.5128318891090075e-06, + "loss": 0.3158, + "step": 33756 + }, + { + "epoch": 0.6757650827014989, + "grad_norm": 1.1891050338745117, + "learning_rate": 2.512550664919406e-06, + "loss": 0.3018, + "step": 33757 + }, + { + "epoch": 0.6757851012186272, + "grad_norm": 1.0996849536895752, + "learning_rate": 2.512269451186506e-06, + "loss": 0.2867, + "step": 33758 + }, + { + "epoch": 0.6758051197357555, + "grad_norm": 1.1089959144592285, + "learning_rate": 2.5119882479114876e-06, + "loss": 0.3182, + "step": 33759 + }, + { + "epoch": 0.6758251382528839, + "grad_norm": 1.02187180519104, + "learning_rate": 2.511707055095538e-06, + "loss": 0.2772, + "step": 33760 + }, + { + "epoch": 0.6758451567700122, + "grad_norm": 1.0383681058883667, + "learning_rate": 2.5114258727398355e-06, + "loss": 0.2737, + "step": 33761 + }, + { + "epoch": 0.6758651752871406, + "grad_norm": 1.1301956176757812, + "learning_rate": 2.5111447008455637e-06, + "loss": 0.2929, + "step": 33762 + }, + { + "epoch": 0.6758851938042689, + "grad_norm": 1.2194876670837402, + "learning_rate": 2.5108635394139013e-06, + "loss": 0.3295, + "step": 33763 + }, + { + "epoch": 0.6759052123213973, + "grad_norm": 1.0965591669082642, + "learning_rate": 2.5105823884460355e-06, + "loss": 0.2837, + "step": 33764 + }, + { + "epoch": 0.6759252308385256, + "grad_norm": 1.1299816370010376, + "learning_rate": 2.5103012479431452e-06, + "loss": 0.2838, + "step": 33765 + }, + { + "epoch": 0.675945249355654, + "grad_norm": 1.0895999670028687, + "learning_rate": 2.5100201179064114e-06, + "loss": 0.301, + "step": 33766 + }, + { + "epoch": 0.6759652678727823, + "grad_norm": 1.1967335939407349, + "learning_rate": 2.5097389983370193e-06, + "loss": 0.2721, + "step": 33767 + }, + { + "epoch": 0.6759852863899106, + "grad_norm": 1.9298088550567627, + "learning_rate": 2.5094578892361455e-06, + "loss": 0.7737, + "step": 33768 + }, + { + "epoch": 0.676005304907039, + "grad_norm": 1.8304699659347534, + "learning_rate": 2.5091767906049775e-06, + "loss": 0.772, + "step": 33769 + }, + { + "epoch": 0.6760253234241673, + "grad_norm": 1.014865756034851, + "learning_rate": 2.5088957024446937e-06, + "loss": 0.2412, + "step": 33770 + }, + { + "epoch": 0.6760453419412957, + "grad_norm": 1.0208747386932373, + "learning_rate": 2.5086146247564768e-06, + "loss": 0.3037, + "step": 33771 + }, + { + "epoch": 0.676065360458424, + "grad_norm": 1.1429216861724854, + "learning_rate": 2.508333557541506e-06, + "loss": 0.2791, + "step": 33772 + }, + { + "epoch": 0.6760853789755524, + "grad_norm": 1.1385762691497803, + "learning_rate": 2.508052500800966e-06, + "loss": 0.2987, + "step": 33773 + }, + { + "epoch": 0.6761053974926807, + "grad_norm": 1.2336267232894897, + "learning_rate": 2.507771454536037e-06, + "loss": 0.2809, + "step": 33774 + }, + { + "epoch": 0.676125416009809, + "grad_norm": 1.1673587560653687, + "learning_rate": 2.507490418747901e-06, + "loss": 0.3286, + "step": 33775 + }, + { + "epoch": 0.6761454345269374, + "grad_norm": 1.0253976583480835, + "learning_rate": 2.507209393437737e-06, + "loss": 0.273, + "step": 33776 + }, + { + "epoch": 0.6761654530440657, + "grad_norm": 1.0052623748779297, + "learning_rate": 2.50692837860673e-06, + "loss": 0.2923, + "step": 33777 + }, + { + "epoch": 0.6761854715611941, + "grad_norm": 1.0351672172546387, + "learning_rate": 2.5066473742560593e-06, + "loss": 0.2718, + "step": 33778 + }, + { + "epoch": 0.6762054900783224, + "grad_norm": 1.0346159934997559, + "learning_rate": 2.506366380386904e-06, + "loss": 0.3114, + "step": 33779 + }, + { + "epoch": 0.6762255085954508, + "grad_norm": 1.1055189371109009, + "learning_rate": 2.5060853970004514e-06, + "loss": 0.2815, + "step": 33780 + }, + { + "epoch": 0.6762455271125791, + "grad_norm": 1.1631580591201782, + "learning_rate": 2.505804424097876e-06, + "loss": 0.2843, + "step": 33781 + }, + { + "epoch": 0.6762655456297075, + "grad_norm": 1.0859017372131348, + "learning_rate": 2.5055234616803642e-06, + "loss": 0.3228, + "step": 33782 + }, + { + "epoch": 0.6762855641468358, + "grad_norm": 1.207899808883667, + "learning_rate": 2.5052425097490953e-06, + "loss": 0.3261, + "step": 33783 + }, + { + "epoch": 0.6763055826639641, + "grad_norm": 1.0590816736221313, + "learning_rate": 2.504961568305249e-06, + "loss": 0.2811, + "step": 33784 + }, + { + "epoch": 0.6763256011810925, + "grad_norm": 1.1652016639709473, + "learning_rate": 2.5046806373500066e-06, + "loss": 0.2952, + "step": 33785 + }, + { + "epoch": 0.6763456196982208, + "grad_norm": 1.1249679327011108, + "learning_rate": 2.5043997168845514e-06, + "loss": 0.3371, + "step": 33786 + }, + { + "epoch": 0.6763656382153492, + "grad_norm": 1.1445863246917725, + "learning_rate": 2.504118806910062e-06, + "loss": 0.2918, + "step": 33787 + }, + { + "epoch": 0.6763856567324775, + "grad_norm": 1.0989840030670166, + "learning_rate": 2.5038379074277208e-06, + "loss": 0.3416, + "step": 33788 + }, + { + "epoch": 0.6764056752496059, + "grad_norm": 1.3174430131912231, + "learning_rate": 2.5035570184387078e-06, + "loss": 0.3469, + "step": 33789 + }, + { + "epoch": 0.6764256937667342, + "grad_norm": 1.3084509372711182, + "learning_rate": 2.503276139944202e-06, + "loss": 0.3655, + "step": 33790 + }, + { + "epoch": 0.6764457122838625, + "grad_norm": 1.311901569366455, + "learning_rate": 2.502995271945387e-06, + "loss": 0.2752, + "step": 33791 + }, + { + "epoch": 0.6764657308009909, + "grad_norm": 1.2676035165786743, + "learning_rate": 2.5027144144434414e-06, + "loss": 0.3235, + "step": 33792 + }, + { + "epoch": 0.6764857493181192, + "grad_norm": 1.306863784790039, + "learning_rate": 2.5024335674395485e-06, + "loss": 0.3192, + "step": 33793 + }, + { + "epoch": 0.6765057678352476, + "grad_norm": 1.0795828104019165, + "learning_rate": 2.5021527309348857e-06, + "loss": 0.3193, + "step": 33794 + }, + { + "epoch": 0.6765257863523759, + "grad_norm": 1.2746540307998657, + "learning_rate": 2.501871904930637e-06, + "loss": 0.3211, + "step": 33795 + }, + { + "epoch": 0.6765458048695043, + "grad_norm": 1.2975372076034546, + "learning_rate": 2.501591089427982e-06, + "loss": 0.337, + "step": 33796 + }, + { + "epoch": 0.6765658233866326, + "grad_norm": 1.196576714515686, + "learning_rate": 2.5013102844281e-06, + "loss": 0.354, + "step": 33797 + }, + { + "epoch": 0.676585841903761, + "grad_norm": 1.1610822677612305, + "learning_rate": 2.5010294899321695e-06, + "loss": 0.246, + "step": 33798 + }, + { + "epoch": 0.6766058604208893, + "grad_norm": 1.1088781356811523, + "learning_rate": 2.500748705941376e-06, + "loss": 0.3182, + "step": 33799 + }, + { + "epoch": 0.6766258789380176, + "grad_norm": 1.0081125497817993, + "learning_rate": 2.500467932456896e-06, + "loss": 0.2973, + "step": 33800 + }, + { + "epoch": 0.676645897455146, + "grad_norm": 1.0294215679168701, + "learning_rate": 2.5001871694799117e-06, + "loss": 0.3093, + "step": 33801 + }, + { + "epoch": 0.6766659159722743, + "grad_norm": 0.9766483902931213, + "learning_rate": 2.499906417011602e-06, + "loss": 0.2764, + "step": 33802 + }, + { + "epoch": 0.6766859344894027, + "grad_norm": 1.4060535430908203, + "learning_rate": 2.499625675053146e-06, + "loss": 0.2919, + "step": 33803 + }, + { + "epoch": 0.676705953006531, + "grad_norm": 1.0086307525634766, + "learning_rate": 2.4993449436057277e-06, + "loss": 0.3232, + "step": 33804 + }, + { + "epoch": 0.6767259715236594, + "grad_norm": 1.372741460800171, + "learning_rate": 2.499064222670523e-06, + "loss": 0.2593, + "step": 33805 + }, + { + "epoch": 0.6767459900407877, + "grad_norm": 1.09503173828125, + "learning_rate": 2.4987835122487164e-06, + "loss": 0.2784, + "step": 33806 + }, + { + "epoch": 0.676766008557916, + "grad_norm": 1.0895684957504272, + "learning_rate": 2.4985028123414834e-06, + "loss": 0.332, + "step": 33807 + }, + { + "epoch": 0.6767860270750444, + "grad_norm": 1.1600757837295532, + "learning_rate": 2.498222122950008e-06, + "loss": 0.3165, + "step": 33808 + }, + { + "epoch": 0.6768060455921727, + "grad_norm": 1.7875727415084839, + "learning_rate": 2.4979414440754683e-06, + "loss": 0.684, + "step": 33809 + }, + { + "epoch": 0.6768260641093011, + "grad_norm": 1.0857542753219604, + "learning_rate": 2.4976607757190454e-06, + "loss": 0.308, + "step": 33810 + }, + { + "epoch": 0.6768460826264294, + "grad_norm": 1.2375425100326538, + "learning_rate": 2.497380117881917e-06, + "loss": 0.2477, + "step": 33811 + }, + { + "epoch": 0.6768661011435578, + "grad_norm": 1.1493664979934692, + "learning_rate": 2.4970994705652625e-06, + "loss": 0.2762, + "step": 33812 + }, + { + "epoch": 0.6768861196606861, + "grad_norm": 1.0603615045547485, + "learning_rate": 2.4968188337702646e-06, + "loss": 0.3303, + "step": 33813 + }, + { + "epoch": 0.6769061381778145, + "grad_norm": 1.1781178712844849, + "learning_rate": 2.496538207498102e-06, + "loss": 0.3239, + "step": 33814 + }, + { + "epoch": 0.6769261566949428, + "grad_norm": 1.2656036615371704, + "learning_rate": 2.4962575917499542e-06, + "loss": 0.2558, + "step": 33815 + }, + { + "epoch": 0.6769461752120711, + "grad_norm": 1.2456505298614502, + "learning_rate": 2.495976986526999e-06, + "loss": 0.3519, + "step": 33816 + }, + { + "epoch": 0.6769661937291995, + "grad_norm": 1.1096959114074707, + "learning_rate": 2.495696391830419e-06, + "loss": 0.3331, + "step": 33817 + }, + { + "epoch": 0.6769862122463278, + "grad_norm": 1.1123650074005127, + "learning_rate": 2.4954158076613906e-06, + "loss": 0.2693, + "step": 33818 + }, + { + "epoch": 0.6770062307634562, + "grad_norm": 1.1320874691009521, + "learning_rate": 2.4951352340210972e-06, + "loss": 0.3238, + "step": 33819 + }, + { + "epoch": 0.6770262492805845, + "grad_norm": 1.0560219287872314, + "learning_rate": 2.4948546709107164e-06, + "loss": 0.2809, + "step": 33820 + }, + { + "epoch": 0.6770462677977129, + "grad_norm": 1.1519198417663574, + "learning_rate": 2.4945741183314255e-06, + "loss": 0.3311, + "step": 33821 + }, + { + "epoch": 0.6770662863148412, + "grad_norm": 1.154165267944336, + "learning_rate": 2.4942935762844074e-06, + "loss": 0.3515, + "step": 33822 + }, + { + "epoch": 0.6770863048319695, + "grad_norm": 1.0896397829055786, + "learning_rate": 2.49401304477084e-06, + "loss": 0.3048, + "step": 33823 + }, + { + "epoch": 0.6771063233490979, + "grad_norm": 1.1125596761703491, + "learning_rate": 2.4937325237919024e-06, + "loss": 0.3034, + "step": 33824 + }, + { + "epoch": 0.6771263418662262, + "grad_norm": 1.0707035064697266, + "learning_rate": 2.493452013348772e-06, + "loss": 0.299, + "step": 33825 + }, + { + "epoch": 0.6771463603833546, + "grad_norm": 1.4029223918914795, + "learning_rate": 2.4931715134426327e-06, + "loss": 0.2917, + "step": 33826 + }, + { + "epoch": 0.6771663789004829, + "grad_norm": 2.2286062240600586, + "learning_rate": 2.49289102407466e-06, + "loss": 0.7619, + "step": 33827 + }, + { + "epoch": 0.6771863974176113, + "grad_norm": 1.115849494934082, + "learning_rate": 2.4926105452460344e-06, + "loss": 0.3075, + "step": 33828 + }, + { + "epoch": 0.6772064159347396, + "grad_norm": 1.0108201503753662, + "learning_rate": 2.4923300769579317e-06, + "loss": 0.307, + "step": 33829 + }, + { + "epoch": 0.677226434451868, + "grad_norm": 1.1236222982406616, + "learning_rate": 2.492049619211536e-06, + "loss": 0.3305, + "step": 33830 + }, + { + "epoch": 0.6772464529689963, + "grad_norm": 1.2109650373458862, + "learning_rate": 2.4917691720080222e-06, + "loss": 0.2873, + "step": 33831 + }, + { + "epoch": 0.6772664714861246, + "grad_norm": 1.2091026306152344, + "learning_rate": 2.4914887353485732e-06, + "loss": 0.3161, + "step": 33832 + }, + { + "epoch": 0.677286490003253, + "grad_norm": 1.169265627861023, + "learning_rate": 2.4912083092343655e-06, + "loss": 0.2753, + "step": 33833 + }, + { + "epoch": 0.6773065085203813, + "grad_norm": 1.1083768606185913, + "learning_rate": 2.490927893666576e-06, + "loss": 0.3008, + "step": 33834 + }, + { + "epoch": 0.6773265270375097, + "grad_norm": 1.3733333349227905, + "learning_rate": 2.4906474886463876e-06, + "loss": 0.3216, + "step": 33835 + }, + { + "epoch": 0.677346545554638, + "grad_norm": 1.1695985794067383, + "learning_rate": 2.4903670941749773e-06, + "loss": 0.3181, + "step": 33836 + }, + { + "epoch": 0.6773665640717664, + "grad_norm": 1.1933143138885498, + "learning_rate": 2.490086710253524e-06, + "loss": 0.2917, + "step": 33837 + }, + { + "epoch": 0.6773865825888947, + "grad_norm": 1.088045358657837, + "learning_rate": 2.4898063368832038e-06, + "loss": 0.3239, + "step": 33838 + }, + { + "epoch": 0.677406601106023, + "grad_norm": 2.0299923419952393, + "learning_rate": 2.4895259740651996e-06, + "loss": 0.7877, + "step": 33839 + }, + { + "epoch": 0.6774266196231514, + "grad_norm": 1.160902976989746, + "learning_rate": 2.4892456218006876e-06, + "loss": 0.2867, + "step": 33840 + }, + { + "epoch": 0.6774466381402797, + "grad_norm": 1.1695753335952759, + "learning_rate": 2.488965280090847e-06, + "loss": 0.2824, + "step": 33841 + }, + { + "epoch": 0.6774666566574081, + "grad_norm": 1.142081618309021, + "learning_rate": 2.488684948936854e-06, + "loss": 0.3441, + "step": 33842 + }, + { + "epoch": 0.6774866751745364, + "grad_norm": 1.1692460775375366, + "learning_rate": 2.488404628339891e-06, + "loss": 0.3151, + "step": 33843 + }, + { + "epoch": 0.6775066936916648, + "grad_norm": 1.1102067232131958, + "learning_rate": 2.4881243183011328e-06, + "loss": 0.2817, + "step": 33844 + }, + { + "epoch": 0.6775267122087931, + "grad_norm": 1.049312949180603, + "learning_rate": 2.487844018821761e-06, + "loss": 0.2991, + "step": 33845 + }, + { + "epoch": 0.6775467307259215, + "grad_norm": 1.1246051788330078, + "learning_rate": 2.487563729902952e-06, + "loss": 0.2905, + "step": 33846 + }, + { + "epoch": 0.6775667492430498, + "grad_norm": 1.068735122680664, + "learning_rate": 2.4872834515458828e-06, + "loss": 0.2966, + "step": 33847 + }, + { + "epoch": 0.6775867677601781, + "grad_norm": 1.0351766347885132, + "learning_rate": 2.487003183751735e-06, + "loss": 0.2649, + "step": 33848 + }, + { + "epoch": 0.6776067862773065, + "grad_norm": 1.0230177640914917, + "learning_rate": 2.4867229265216853e-06, + "loss": 0.2685, + "step": 33849 + }, + { + "epoch": 0.6776268047944348, + "grad_norm": 1.1837950944900513, + "learning_rate": 2.4864426798569114e-06, + "loss": 0.3408, + "step": 33850 + }, + { + "epoch": 0.6776468233115632, + "grad_norm": 1.1647534370422363, + "learning_rate": 2.486162443758592e-06, + "loss": 0.3533, + "step": 33851 + }, + { + "epoch": 0.6776668418286915, + "grad_norm": 1.045254111289978, + "learning_rate": 2.4858822182279017e-06, + "loss": 0.2886, + "step": 33852 + }, + { + "epoch": 0.6776868603458199, + "grad_norm": 1.8715611696243286, + "learning_rate": 2.4856020032660244e-06, + "loss": 0.7202, + "step": 33853 + }, + { + "epoch": 0.6777068788629482, + "grad_norm": 1.1192623376846313, + "learning_rate": 2.4853217988741344e-06, + "loss": 0.2887, + "step": 33854 + }, + { + "epoch": 0.6777268973800765, + "grad_norm": 1.0900565385818481, + "learning_rate": 2.485041605053409e-06, + "loss": 0.3376, + "step": 33855 + }, + { + "epoch": 0.6777469158972049, + "grad_norm": 1.0980619192123413, + "learning_rate": 2.48476142180503e-06, + "loss": 0.3021, + "step": 33856 + }, + { + "epoch": 0.6777669344143332, + "grad_norm": 1.8418552875518799, + "learning_rate": 2.4844812491301696e-06, + "loss": 0.7405, + "step": 33857 + }, + { + "epoch": 0.6777869529314616, + "grad_norm": 1.1450661420822144, + "learning_rate": 2.484201087030011e-06, + "loss": 0.3124, + "step": 33858 + }, + { + "epoch": 0.6778069714485899, + "grad_norm": 1.1290220022201538, + "learning_rate": 2.4839209355057298e-06, + "loss": 0.3024, + "step": 33859 + }, + { + "epoch": 0.6778269899657183, + "grad_norm": 1.1414064168930054, + "learning_rate": 2.4836407945585015e-06, + "loss": 0.3369, + "step": 33860 + }, + { + "epoch": 0.6778470084828466, + "grad_norm": 2.0238735675811768, + "learning_rate": 2.4833606641895082e-06, + "loss": 0.7653, + "step": 33861 + }, + { + "epoch": 0.677867026999975, + "grad_norm": 1.1084808111190796, + "learning_rate": 2.4830805443999238e-06, + "loss": 0.287, + "step": 33862 + }, + { + "epoch": 0.6778870455171033, + "grad_norm": 1.0853601694107056, + "learning_rate": 2.4828004351909286e-06, + "loss": 0.2912, + "step": 33863 + }, + { + "epoch": 0.6779070640342316, + "grad_norm": 1.2715072631835938, + "learning_rate": 2.482520336563698e-06, + "loss": 0.3119, + "step": 33864 + }, + { + "epoch": 0.67792708255136, + "grad_norm": 1.042269229888916, + "learning_rate": 2.482240248519408e-06, + "loss": 0.2899, + "step": 33865 + }, + { + "epoch": 0.6779471010684883, + "grad_norm": 1.101765751838684, + "learning_rate": 2.48196017105924e-06, + "loss": 0.2771, + "step": 33866 + }, + { + "epoch": 0.6779671195856167, + "grad_norm": 1.1920114755630493, + "learning_rate": 2.4816801041843704e-06, + "loss": 0.2553, + "step": 33867 + }, + { + "epoch": 0.677987138102745, + "grad_norm": 1.0665572881698608, + "learning_rate": 2.4814000478959727e-06, + "loss": 0.2746, + "step": 33868 + }, + { + "epoch": 0.6780071566198734, + "grad_norm": 1.0525003671646118, + "learning_rate": 2.48112000219523e-06, + "loss": 0.2873, + "step": 33869 + }, + { + "epoch": 0.6780271751370017, + "grad_norm": 1.1229499578475952, + "learning_rate": 2.4808399670833144e-06, + "loss": 0.3084, + "step": 33870 + }, + { + "epoch": 0.67804719365413, + "grad_norm": 1.3231213092803955, + "learning_rate": 2.4805599425614073e-06, + "loss": 0.2844, + "step": 33871 + }, + { + "epoch": 0.6780672121712584, + "grad_norm": 1.1286042928695679, + "learning_rate": 2.480279928630684e-06, + "loss": 0.3175, + "step": 33872 + }, + { + "epoch": 0.6780872306883867, + "grad_norm": 1.0254151821136475, + "learning_rate": 2.4799999252923217e-06, + "loss": 0.2959, + "step": 33873 + }, + { + "epoch": 0.6781072492055151, + "grad_norm": 1.1267672777175903, + "learning_rate": 2.479719932547496e-06, + "loss": 0.2983, + "step": 33874 + }, + { + "epoch": 0.6781272677226434, + "grad_norm": 1.1901888847351074, + "learning_rate": 2.4794399503973866e-06, + "loss": 0.2821, + "step": 33875 + }, + { + "epoch": 0.6781472862397718, + "grad_norm": 1.1438326835632324, + "learning_rate": 2.479159978843169e-06, + "loss": 0.3024, + "step": 33876 + }, + { + "epoch": 0.6781673047569001, + "grad_norm": 1.1890264749526978, + "learning_rate": 2.4788800178860214e-06, + "loss": 0.2564, + "step": 33877 + }, + { + "epoch": 0.6781873232740285, + "grad_norm": 1.9155596494674683, + "learning_rate": 2.4786000675271164e-06, + "loss": 0.8129, + "step": 33878 + }, + { + "epoch": 0.6782073417911568, + "grad_norm": 1.817040205001831, + "learning_rate": 2.478320127767637e-06, + "loss": 0.7314, + "step": 33879 + }, + { + "epoch": 0.6782273603082851, + "grad_norm": 1.158955454826355, + "learning_rate": 2.4780401986087572e-06, + "loss": 0.2904, + "step": 33880 + }, + { + "epoch": 0.6782473788254135, + "grad_norm": 1.1533137559890747, + "learning_rate": 2.4777602800516514e-06, + "loss": 0.2943, + "step": 33881 + }, + { + "epoch": 0.6782673973425418, + "grad_norm": 1.1165932416915894, + "learning_rate": 2.4774803720975006e-06, + "loss": 0.315, + "step": 33882 + }, + { + "epoch": 0.6782874158596702, + "grad_norm": 1.1936514377593994, + "learning_rate": 2.477200474747477e-06, + "loss": 0.3021, + "step": 33883 + }, + { + "epoch": 0.6783074343767985, + "grad_norm": 1.0028891563415527, + "learning_rate": 2.4769205880027623e-06, + "loss": 0.2917, + "step": 33884 + }, + { + "epoch": 0.6783274528939269, + "grad_norm": 1.9725308418273926, + "learning_rate": 2.4766407118645302e-06, + "loss": 0.7817, + "step": 33885 + }, + { + "epoch": 0.6783474714110552, + "grad_norm": 1.0872646570205688, + "learning_rate": 2.4763608463339577e-06, + "loss": 0.3096, + "step": 33886 + }, + { + "epoch": 0.6783674899281835, + "grad_norm": 1.107587456703186, + "learning_rate": 2.4760809914122192e-06, + "loss": 0.3085, + "step": 33887 + }, + { + "epoch": 0.6783875084453119, + "grad_norm": 1.1577658653259277, + "learning_rate": 2.4758011471004946e-06, + "loss": 0.3103, + "step": 33888 + }, + { + "epoch": 0.6784075269624402, + "grad_norm": 1.1185885667800903, + "learning_rate": 2.4755213133999588e-06, + "loss": 0.303, + "step": 33889 + }, + { + "epoch": 0.6784275454795686, + "grad_norm": 1.0999376773834229, + "learning_rate": 2.475241490311788e-06, + "loss": 0.2775, + "step": 33890 + }, + { + "epoch": 0.6784475639966969, + "grad_norm": 1.1329336166381836, + "learning_rate": 2.4749616778371576e-06, + "loss": 0.2873, + "step": 33891 + }, + { + "epoch": 0.6784675825138253, + "grad_norm": 1.2079826593399048, + "learning_rate": 2.4746818759772463e-06, + "loss": 0.3439, + "step": 33892 + }, + { + "epoch": 0.6784876010309536, + "grad_norm": 1.2694368362426758, + "learning_rate": 2.474402084733229e-06, + "loss": 0.3285, + "step": 33893 + }, + { + "epoch": 0.678507619548082, + "grad_norm": 1.0440272092819214, + "learning_rate": 2.4741223041062795e-06, + "loss": 0.2946, + "step": 33894 + }, + { + "epoch": 0.6785276380652103, + "grad_norm": 0.9650672078132629, + "learning_rate": 2.4738425340975785e-06, + "loss": 0.299, + "step": 33895 + }, + { + "epoch": 0.6785476565823386, + "grad_norm": 1.101675271987915, + "learning_rate": 2.473562774708298e-06, + "loss": 0.3254, + "step": 33896 + }, + { + "epoch": 0.678567675099467, + "grad_norm": 1.1987850666046143, + "learning_rate": 2.4732830259396177e-06, + "loss": 0.3327, + "step": 33897 + }, + { + "epoch": 0.6785876936165953, + "grad_norm": 1.2224384546279907, + "learning_rate": 2.473003287792712e-06, + "loss": 0.2836, + "step": 33898 + }, + { + "epoch": 0.6786077121337237, + "grad_norm": 1.159806728363037, + "learning_rate": 2.472723560268756e-06, + "loss": 0.2705, + "step": 33899 + }, + { + "epoch": 0.678627730650852, + "grad_norm": 1.2594380378723145, + "learning_rate": 2.4724438433689254e-06, + "loss": 0.2998, + "step": 33900 + }, + { + "epoch": 0.6786477491679804, + "grad_norm": 1.1067980527877808, + "learning_rate": 2.4721641370943985e-06, + "loss": 0.3085, + "step": 33901 + }, + { + "epoch": 0.6786677676851087, + "grad_norm": 1.333254337310791, + "learning_rate": 2.471884441446349e-06, + "loss": 0.3151, + "step": 33902 + }, + { + "epoch": 0.678687786202237, + "grad_norm": 1.1143176555633545, + "learning_rate": 2.471604756425954e-06, + "loss": 0.2647, + "step": 33903 + }, + { + "epoch": 0.6787078047193654, + "grad_norm": 1.045119285583496, + "learning_rate": 2.4713250820343875e-06, + "loss": 0.318, + "step": 33904 + }, + { + "epoch": 0.6787278232364937, + "grad_norm": 1.1816754341125488, + "learning_rate": 2.471045418272825e-06, + "loss": 0.3177, + "step": 33905 + }, + { + "epoch": 0.6787478417536221, + "grad_norm": 1.0595543384552002, + "learning_rate": 2.4707657651424455e-06, + "loss": 0.2691, + "step": 33906 + }, + { + "epoch": 0.6787678602707504, + "grad_norm": 1.1469300985336304, + "learning_rate": 2.47048612264442e-06, + "loss": 0.2888, + "step": 33907 + }, + { + "epoch": 0.6787878787878788, + "grad_norm": 1.0822725296020508, + "learning_rate": 2.470206490779928e-06, + "loss": 0.3, + "step": 33908 + }, + { + "epoch": 0.6788078973050071, + "grad_norm": 0.9828285574913025, + "learning_rate": 2.4699268695501426e-06, + "loss": 0.3069, + "step": 33909 + }, + { + "epoch": 0.6788279158221355, + "grad_norm": 1.1441261768341064, + "learning_rate": 2.4696472589562415e-06, + "loss": 0.3184, + "step": 33910 + }, + { + "epoch": 0.6788479343392638, + "grad_norm": 1.0029677152633667, + "learning_rate": 2.469367658999399e-06, + "loss": 0.3154, + "step": 33911 + }, + { + "epoch": 0.6788679528563921, + "grad_norm": 1.1329655647277832, + "learning_rate": 2.46908806968079e-06, + "loss": 0.3459, + "step": 33912 + }, + { + "epoch": 0.6788879713735205, + "grad_norm": 1.0765328407287598, + "learning_rate": 2.4688084910015903e-06, + "loss": 0.2746, + "step": 33913 + }, + { + "epoch": 0.6789079898906488, + "grad_norm": 1.1185716390609741, + "learning_rate": 2.4685289229629734e-06, + "loss": 0.2823, + "step": 33914 + }, + { + "epoch": 0.6789280084077772, + "grad_norm": 1.0765959024429321, + "learning_rate": 2.468249365566117e-06, + "loss": 0.2858, + "step": 33915 + }, + { + "epoch": 0.6789480269249055, + "grad_norm": 0.9922264814376831, + "learning_rate": 2.4679698188121965e-06, + "loss": 0.2833, + "step": 33916 + }, + { + "epoch": 0.6789680454420339, + "grad_norm": 1.758101224899292, + "learning_rate": 2.4676902827023858e-06, + "loss": 0.7234, + "step": 33917 + }, + { + "epoch": 0.6789880639591622, + "grad_norm": 1.2322747707366943, + "learning_rate": 2.467410757237858e-06, + "loss": 0.2688, + "step": 33918 + }, + { + "epoch": 0.6790080824762905, + "grad_norm": 1.2917406558990479, + "learning_rate": 2.4671312424197926e-06, + "loss": 0.2718, + "step": 33919 + }, + { + "epoch": 0.6790281009934189, + "grad_norm": 1.9158858060836792, + "learning_rate": 2.4668517382493594e-06, + "loss": 0.7262, + "step": 33920 + }, + { + "epoch": 0.6790481195105472, + "grad_norm": 1.0067393779754639, + "learning_rate": 2.4665722447277397e-06, + "loss": 0.2758, + "step": 33921 + }, + { + "epoch": 0.6790681380276756, + "grad_norm": 1.065495491027832, + "learning_rate": 2.466292761856102e-06, + "loss": 0.3108, + "step": 33922 + }, + { + "epoch": 0.6790881565448039, + "grad_norm": 1.1325498819351196, + "learning_rate": 2.466013289635627e-06, + "loss": 0.3236, + "step": 33923 + }, + { + "epoch": 0.6791081750619323, + "grad_norm": 1.2444729804992676, + "learning_rate": 2.4657338280674864e-06, + "loss": 0.2942, + "step": 33924 + }, + { + "epoch": 0.6791281935790606, + "grad_norm": 1.1759239435195923, + "learning_rate": 2.4654543771528555e-06, + "loss": 0.2922, + "step": 33925 + }, + { + "epoch": 0.679148212096189, + "grad_norm": 1.170943021774292, + "learning_rate": 2.465174936892909e-06, + "loss": 0.3317, + "step": 33926 + }, + { + "epoch": 0.6791682306133173, + "grad_norm": 1.2557944059371948, + "learning_rate": 2.46489550728882e-06, + "loss": 0.2889, + "step": 33927 + }, + { + "epoch": 0.6791882491304456, + "grad_norm": 1.0943677425384521, + "learning_rate": 2.464616088341766e-06, + "loss": 0.2377, + "step": 33928 + }, + { + "epoch": 0.679208267647574, + "grad_norm": 1.1144347190856934, + "learning_rate": 2.4643366800529204e-06, + "loss": 0.2758, + "step": 33929 + }, + { + "epoch": 0.6792282861647023, + "grad_norm": 1.1703755855560303, + "learning_rate": 2.464057282423458e-06, + "loss": 0.3371, + "step": 33930 + }, + { + "epoch": 0.6792483046818307, + "grad_norm": 1.0205755233764648, + "learning_rate": 2.463777895454551e-06, + "loss": 0.2478, + "step": 33931 + }, + { + "epoch": 0.679268323198959, + "grad_norm": 1.1114002466201782, + "learning_rate": 2.4634985191473783e-06, + "loss": 0.3158, + "step": 33932 + }, + { + "epoch": 0.6792883417160874, + "grad_norm": 1.1171313524246216, + "learning_rate": 2.463219153503109e-06, + "loss": 0.3074, + "step": 33933 + }, + { + "epoch": 0.6793083602332157, + "grad_norm": 1.1073812246322632, + "learning_rate": 2.4629397985229237e-06, + "loss": 0.3042, + "step": 33934 + }, + { + "epoch": 0.679328378750344, + "grad_norm": 1.141385555267334, + "learning_rate": 2.4626604542079924e-06, + "loss": 0.2918, + "step": 33935 + }, + { + "epoch": 0.6793483972674724, + "grad_norm": 1.1368794441223145, + "learning_rate": 2.4623811205594894e-06, + "loss": 0.272, + "step": 33936 + }, + { + "epoch": 0.6793684157846007, + "grad_norm": 1.1284228563308716, + "learning_rate": 2.4621017975785914e-06, + "loss": 0.2873, + "step": 33937 + }, + { + "epoch": 0.6793884343017291, + "grad_norm": 1.23042631149292, + "learning_rate": 2.461822485266471e-06, + "loss": 0.334, + "step": 33938 + }, + { + "epoch": 0.6794084528188574, + "grad_norm": 1.149466633796692, + "learning_rate": 2.4615431836243036e-06, + "loss": 0.3052, + "step": 33939 + }, + { + "epoch": 0.6794284713359858, + "grad_norm": 1.0863940715789795, + "learning_rate": 2.46126389265326e-06, + "loss": 0.2841, + "step": 33940 + }, + { + "epoch": 0.6794484898531141, + "grad_norm": 1.1752749681472778, + "learning_rate": 2.4609846123545183e-06, + "loss": 0.2902, + "step": 33941 + }, + { + "epoch": 0.6794685083702424, + "grad_norm": 1.1398285627365112, + "learning_rate": 2.4607053427292514e-06, + "loss": 0.3105, + "step": 33942 + }, + { + "epoch": 0.6794885268873708, + "grad_norm": 1.0005195140838623, + "learning_rate": 2.4604260837786325e-06, + "loss": 0.3002, + "step": 33943 + }, + { + "epoch": 0.6795085454044991, + "grad_norm": 1.1071230173110962, + "learning_rate": 2.460146835503834e-06, + "loss": 0.2727, + "step": 33944 + }, + { + "epoch": 0.6795285639216275, + "grad_norm": 1.025633454322815, + "learning_rate": 2.459867597906033e-06, + "loss": 0.279, + "step": 33945 + }, + { + "epoch": 0.6795485824387558, + "grad_norm": 1.1544270515441895, + "learning_rate": 2.459588370986401e-06, + "loss": 0.2982, + "step": 33946 + }, + { + "epoch": 0.6795686009558842, + "grad_norm": 1.1405431032180786, + "learning_rate": 2.4593091547461143e-06, + "loss": 0.3106, + "step": 33947 + }, + { + "epoch": 0.6795886194730125, + "grad_norm": 1.0520308017730713, + "learning_rate": 2.459029949186345e-06, + "loss": 0.2923, + "step": 33948 + }, + { + "epoch": 0.6796086379901409, + "grad_norm": 1.8472048044204712, + "learning_rate": 2.458750754308265e-06, + "loss": 0.2694, + "step": 33949 + }, + { + "epoch": 0.6796286565072692, + "grad_norm": 1.132627010345459, + "learning_rate": 2.4584715701130525e-06, + "loss": 0.3273, + "step": 33950 + }, + { + "epoch": 0.6796486750243975, + "grad_norm": 1.11235511302948, + "learning_rate": 2.458192396601878e-06, + "loss": 0.2668, + "step": 33951 + }, + { + "epoch": 0.6796686935415259, + "grad_norm": 1.1073060035705566, + "learning_rate": 2.4579132337759155e-06, + "loss": 0.3209, + "step": 33952 + }, + { + "epoch": 0.6796887120586542, + "grad_norm": 1.758966326713562, + "learning_rate": 2.457634081636337e-06, + "loss": 0.7391, + "step": 33953 + }, + { + "epoch": 0.6797087305757826, + "grad_norm": 1.166996717453003, + "learning_rate": 2.4573549401843194e-06, + "loss": 0.2611, + "step": 33954 + }, + { + "epoch": 0.6797287490929109, + "grad_norm": 1.1316096782684326, + "learning_rate": 2.457075809421035e-06, + "loss": 0.2687, + "step": 33955 + }, + { + "epoch": 0.6797487676100393, + "grad_norm": 1.0745177268981934, + "learning_rate": 2.4567966893476563e-06, + "loss": 0.3073, + "step": 33956 + }, + { + "epoch": 0.6797687861271676, + "grad_norm": 1.0465980768203735, + "learning_rate": 2.456517579965355e-06, + "loss": 0.2823, + "step": 33957 + }, + { + "epoch": 0.6797888046442959, + "grad_norm": 1.2394661903381348, + "learning_rate": 2.4562384812753086e-06, + "loss": 0.3241, + "step": 33958 + }, + { + "epoch": 0.6798088231614243, + "grad_norm": 1.0952386856079102, + "learning_rate": 2.4559593932786864e-06, + "loss": 0.3302, + "step": 33959 + }, + { + "epoch": 0.6798288416785526, + "grad_norm": 1.1774568557739258, + "learning_rate": 2.4556803159766653e-06, + "loss": 0.3007, + "step": 33960 + }, + { + "epoch": 0.679848860195681, + "grad_norm": 1.146700382232666, + "learning_rate": 2.455401249370416e-06, + "loss": 0.3055, + "step": 33961 + }, + { + "epoch": 0.6798688787128093, + "grad_norm": 1.1225643157958984, + "learning_rate": 2.4551221934611107e-06, + "loss": 0.3029, + "step": 33962 + }, + { + "epoch": 0.6798888972299377, + "grad_norm": 1.1067967414855957, + "learning_rate": 2.4548431482499264e-06, + "loss": 0.2842, + "step": 33963 + }, + { + "epoch": 0.679908915747066, + "grad_norm": 1.2238792181015015, + "learning_rate": 2.4545641137380333e-06, + "loss": 0.2951, + "step": 33964 + }, + { + "epoch": 0.6799289342641944, + "grad_norm": 1.1389727592468262, + "learning_rate": 2.4542850899266045e-06, + "loss": 0.2992, + "step": 33965 + }, + { + "epoch": 0.6799489527813227, + "grad_norm": 1.1343635320663452, + "learning_rate": 2.454006076816814e-06, + "loss": 0.2946, + "step": 33966 + }, + { + "epoch": 0.679968971298451, + "grad_norm": 1.015870451927185, + "learning_rate": 2.453727074409832e-06, + "loss": 0.2505, + "step": 33967 + }, + { + "epoch": 0.6799889898155794, + "grad_norm": 1.1548309326171875, + "learning_rate": 2.4534480827068353e-06, + "loss": 0.2729, + "step": 33968 + }, + { + "epoch": 0.6800090083327077, + "grad_norm": 1.2337710857391357, + "learning_rate": 2.4531691017089946e-06, + "loss": 0.3056, + "step": 33969 + }, + { + "epoch": 0.6800290268498361, + "grad_norm": 1.0347179174423218, + "learning_rate": 2.4528901314174815e-06, + "loss": 0.2886, + "step": 33970 + }, + { + "epoch": 0.6800490453669644, + "grad_norm": 1.0358023643493652, + "learning_rate": 2.4526111718334715e-06, + "loss": 0.3109, + "step": 33971 + }, + { + "epoch": 0.6800690638840928, + "grad_norm": 2.126466989517212, + "learning_rate": 2.452332222958134e-06, + "loss": 0.7521, + "step": 33972 + }, + { + "epoch": 0.6800890824012211, + "grad_norm": 1.1723006963729858, + "learning_rate": 2.452053284792646e-06, + "loss": 0.2791, + "step": 33973 + }, + { + "epoch": 0.6801091009183494, + "grad_norm": 0.903542697429657, + "learning_rate": 2.4517743573381775e-06, + "loss": 0.2746, + "step": 33974 + }, + { + "epoch": 0.6801291194354778, + "grad_norm": 1.1725915670394897, + "learning_rate": 2.4514954405958992e-06, + "loss": 0.2959, + "step": 33975 + }, + { + "epoch": 0.6801491379526061, + "grad_norm": 1.991402268409729, + "learning_rate": 2.4512165345669873e-06, + "loss": 0.6908, + "step": 33976 + }, + { + "epoch": 0.6801691564697345, + "grad_norm": 0.984974205493927, + "learning_rate": 2.450937639252613e-06, + "loss": 0.2967, + "step": 33977 + }, + { + "epoch": 0.6801891749868628, + "grad_norm": 1.1474406719207764, + "learning_rate": 2.4506587546539486e-06, + "loss": 0.289, + "step": 33978 + }, + { + "epoch": 0.6802091935039912, + "grad_norm": 1.2041964530944824, + "learning_rate": 2.4503798807721655e-06, + "loss": 0.2955, + "step": 33979 + }, + { + "epoch": 0.6802292120211195, + "grad_norm": 1.0752408504486084, + "learning_rate": 2.450101017608435e-06, + "loss": 0.2719, + "step": 33980 + }, + { + "epoch": 0.6802492305382479, + "grad_norm": 1.0617125034332275, + "learning_rate": 2.449822165163933e-06, + "loss": 0.2826, + "step": 33981 + }, + { + "epoch": 0.6802692490553762, + "grad_norm": 2.0718283653259277, + "learning_rate": 2.4495433234398296e-06, + "loss": 0.7366, + "step": 33982 + }, + { + "epoch": 0.6802892675725045, + "grad_norm": 1.2959421873092651, + "learning_rate": 2.4492644924372955e-06, + "loss": 0.2623, + "step": 33983 + }, + { + "epoch": 0.6803092860896329, + "grad_norm": 1.1046693325042725, + "learning_rate": 2.4489856721575065e-06, + "loss": 0.3691, + "step": 33984 + }, + { + "epoch": 0.6803293046067612, + "grad_norm": 1.0966066122055054, + "learning_rate": 2.4487068626016306e-06, + "loss": 0.2879, + "step": 33985 + }, + { + "epoch": 0.6803493231238896, + "grad_norm": 1.130843997001648, + "learning_rate": 2.4484280637708444e-06, + "loss": 0.3349, + "step": 33986 + }, + { + "epoch": 0.6803693416410179, + "grad_norm": 1.9319970607757568, + "learning_rate": 2.4481492756663172e-06, + "loss": 0.8345, + "step": 33987 + }, + { + "epoch": 0.6803893601581463, + "grad_norm": 1.1459192037582397, + "learning_rate": 2.4478704982892213e-06, + "loss": 0.3286, + "step": 33988 + }, + { + "epoch": 0.6804093786752746, + "grad_norm": 1.905893087387085, + "learning_rate": 2.4475917316407265e-06, + "loss": 0.7597, + "step": 33989 + }, + { + "epoch": 0.6804293971924029, + "grad_norm": 1.851592779159546, + "learning_rate": 2.447312975722009e-06, + "loss": 0.8317, + "step": 33990 + }, + { + "epoch": 0.6804494157095313, + "grad_norm": 0.9912912249565125, + "learning_rate": 2.447034230534239e-06, + "loss": 0.2795, + "step": 33991 + }, + { + "epoch": 0.6804694342266596, + "grad_norm": 1.0305819511413574, + "learning_rate": 2.446755496078587e-06, + "loss": 0.3473, + "step": 33992 + }, + { + "epoch": 0.680489452743788, + "grad_norm": 1.1752516031265259, + "learning_rate": 2.4464767723562233e-06, + "loss": 0.2969, + "step": 33993 + }, + { + "epoch": 0.6805094712609163, + "grad_norm": 1.0192872285842896, + "learning_rate": 2.4461980593683236e-06, + "loss": 0.2925, + "step": 33994 + }, + { + "epoch": 0.6805294897780447, + "grad_norm": 1.1121833324432373, + "learning_rate": 2.445919357116058e-06, + "loss": 0.3173, + "step": 33995 + }, + { + "epoch": 0.680549508295173, + "grad_norm": 1.0293622016906738, + "learning_rate": 2.4456406656005954e-06, + "loss": 0.2803, + "step": 33996 + }, + { + "epoch": 0.6805695268123014, + "grad_norm": 1.002378225326538, + "learning_rate": 2.4453619848231124e-06, + "loss": 0.2984, + "step": 33997 + }, + { + "epoch": 0.6805895453294297, + "grad_norm": 1.0635409355163574, + "learning_rate": 2.445083314784775e-06, + "loss": 0.2745, + "step": 33998 + }, + { + "epoch": 0.680609563846558, + "grad_norm": 1.2014827728271484, + "learning_rate": 2.4448046554867605e-06, + "loss": 0.2782, + "step": 33999 + }, + { + "epoch": 0.6806295823636864, + "grad_norm": 1.195446491241455, + "learning_rate": 2.4445260069302367e-06, + "loss": 0.2989, + "step": 34000 + }, + { + "epoch": 0.6806496008808147, + "grad_norm": 1.1342016458511353, + "learning_rate": 2.444247369116376e-06, + "loss": 0.3221, + "step": 34001 + }, + { + "epoch": 0.6806696193979431, + "grad_norm": 1.1461327075958252, + "learning_rate": 2.4439687420463472e-06, + "loss": 0.3138, + "step": 34002 + }, + { + "epoch": 0.6806896379150714, + "grad_norm": 1.1469430923461914, + "learning_rate": 2.4436901257213257e-06, + "loss": 0.2531, + "step": 34003 + }, + { + "epoch": 0.6807096564321998, + "grad_norm": 1.0887593030929565, + "learning_rate": 2.4434115201424807e-06, + "loss": 0.2967, + "step": 34004 + }, + { + "epoch": 0.6807296749493281, + "grad_norm": 1.1843266487121582, + "learning_rate": 2.4431329253109838e-06, + "loss": 0.3231, + "step": 34005 + }, + { + "epoch": 0.6807496934664564, + "grad_norm": 1.0720608234405518, + "learning_rate": 2.4428543412280033e-06, + "loss": 0.3333, + "step": 34006 + }, + { + "epoch": 0.6807697119835848, + "grad_norm": 1.0772799253463745, + "learning_rate": 2.4425757678947154e-06, + "loss": 0.2889, + "step": 34007 + }, + { + "epoch": 0.6807897305007131, + "grad_norm": 1.3229228258132935, + "learning_rate": 2.4422972053122886e-06, + "loss": 0.2988, + "step": 34008 + }, + { + "epoch": 0.6808097490178415, + "grad_norm": 1.0600098371505737, + "learning_rate": 2.4420186534818913e-06, + "loss": 0.2572, + "step": 34009 + }, + { + "epoch": 0.6808297675349698, + "grad_norm": 1.1799323558807373, + "learning_rate": 2.4417401124046997e-06, + "loss": 0.3139, + "step": 34010 + }, + { + "epoch": 0.6808497860520982, + "grad_norm": 1.0335224866867065, + "learning_rate": 2.4414615820818797e-06, + "loss": 0.3039, + "step": 34011 + }, + { + "epoch": 0.6808698045692265, + "grad_norm": 1.195241928100586, + "learning_rate": 2.4411830625146066e-06, + "loss": 0.2893, + "step": 34012 + }, + { + "epoch": 0.6808898230863549, + "grad_norm": 1.0902478694915771, + "learning_rate": 2.44090455370405e-06, + "loss": 0.3204, + "step": 34013 + }, + { + "epoch": 0.6809098416034832, + "grad_norm": 1.948639988899231, + "learning_rate": 2.440626055651379e-06, + "loss": 0.7637, + "step": 34014 + }, + { + "epoch": 0.6809298601206115, + "grad_norm": 1.0492299795150757, + "learning_rate": 2.4403475683577638e-06, + "loss": 0.3205, + "step": 34015 + }, + { + "epoch": 0.6809498786377399, + "grad_norm": 1.79261314868927, + "learning_rate": 2.4400690918243786e-06, + "loss": 0.2858, + "step": 34016 + }, + { + "epoch": 0.6809698971548682, + "grad_norm": 1.1552979946136475, + "learning_rate": 2.439790626052392e-06, + "loss": 0.2919, + "step": 34017 + }, + { + "epoch": 0.6809899156719966, + "grad_norm": 1.1065236330032349, + "learning_rate": 2.439512171042974e-06, + "loss": 0.2823, + "step": 34018 + }, + { + "epoch": 0.6810099341891249, + "grad_norm": 1.1022731065750122, + "learning_rate": 2.4392337267972965e-06, + "loss": 0.2726, + "step": 34019 + }, + { + "epoch": 0.6810299527062533, + "grad_norm": 1.054065465927124, + "learning_rate": 2.438955293316527e-06, + "loss": 0.3175, + "step": 34020 + }, + { + "epoch": 0.6810499712233816, + "grad_norm": 1.1144860982894897, + "learning_rate": 2.4386768706018404e-06, + "loss": 0.318, + "step": 34021 + }, + { + "epoch": 0.6810699897405099, + "grad_norm": 1.0014852285385132, + "learning_rate": 2.4383984586544036e-06, + "loss": 0.2987, + "step": 34022 + }, + { + "epoch": 0.6810900082576383, + "grad_norm": 1.8585232496261597, + "learning_rate": 2.43812005747539e-06, + "loss": 0.7329, + "step": 34023 + }, + { + "epoch": 0.6811100267747666, + "grad_norm": 0.9772434234619141, + "learning_rate": 2.437841667065966e-06, + "loss": 0.2902, + "step": 34024 + }, + { + "epoch": 0.681130045291895, + "grad_norm": 1.0862414836883545, + "learning_rate": 2.4375632874273066e-06, + "loss": 0.3297, + "step": 34025 + }, + { + "epoch": 0.6811500638090233, + "grad_norm": 1.0510112047195435, + "learning_rate": 2.4372849185605796e-06, + "loss": 0.2716, + "step": 34026 + }, + { + "epoch": 0.6811700823261517, + "grad_norm": 1.0477689504623413, + "learning_rate": 2.437006560466955e-06, + "loss": 0.2773, + "step": 34027 + }, + { + "epoch": 0.68119010084328, + "grad_norm": 1.0951968431472778, + "learning_rate": 2.436728213147604e-06, + "loss": 0.3358, + "step": 34028 + }, + { + "epoch": 0.6812101193604084, + "grad_norm": 1.9335598945617676, + "learning_rate": 2.4364498766036933e-06, + "loss": 0.7374, + "step": 34029 + }, + { + "epoch": 0.6812301378775367, + "grad_norm": 1.1151976585388184, + "learning_rate": 2.436171550836398e-06, + "loss": 0.3022, + "step": 34030 + }, + { + "epoch": 0.681250156394665, + "grad_norm": 1.159935712814331, + "learning_rate": 2.4358932358468855e-06, + "loss": 0.3442, + "step": 34031 + }, + { + "epoch": 0.6812701749117934, + "grad_norm": 1.086100697517395, + "learning_rate": 2.4356149316363264e-06, + "loss": 0.3284, + "step": 34032 + }, + { + "epoch": 0.6812901934289217, + "grad_norm": 1.1025468111038208, + "learning_rate": 2.435336638205888e-06, + "loss": 0.2916, + "step": 34033 + }, + { + "epoch": 0.6813102119460501, + "grad_norm": 1.052175521850586, + "learning_rate": 2.435058355556744e-06, + "loss": 0.3295, + "step": 34034 + }, + { + "epoch": 0.6813302304631784, + "grad_norm": 1.1634571552276611, + "learning_rate": 2.4347800836900615e-06, + "loss": 0.3576, + "step": 34035 + }, + { + "epoch": 0.6813502489803068, + "grad_norm": 1.1469141244888306, + "learning_rate": 2.4345018226070132e-06, + "loss": 0.3203, + "step": 34036 + }, + { + "epoch": 0.6813702674974351, + "grad_norm": 1.1272519826889038, + "learning_rate": 2.4342235723087654e-06, + "loss": 0.3442, + "step": 34037 + }, + { + "epoch": 0.6813902860145634, + "grad_norm": 0.9879494309425354, + "learning_rate": 2.433945332796491e-06, + "loss": 0.2631, + "step": 34038 + }, + { + "epoch": 0.6814103045316918, + "grad_norm": 0.9613001346588135, + "learning_rate": 2.4336671040713585e-06, + "loss": 0.2954, + "step": 34039 + }, + { + "epoch": 0.6814303230488201, + "grad_norm": 1.2677556276321411, + "learning_rate": 2.433388886134537e-06, + "loss": 0.3201, + "step": 34040 + }, + { + "epoch": 0.6814503415659485, + "grad_norm": 1.167671799659729, + "learning_rate": 2.4331106789871966e-06, + "loss": 0.2912, + "step": 34041 + }, + { + "epoch": 0.6814703600830768, + "grad_norm": 1.2207422256469727, + "learning_rate": 2.4328324826305043e-06, + "loss": 0.3265, + "step": 34042 + }, + { + "epoch": 0.6814903786002052, + "grad_norm": 1.1117007732391357, + "learning_rate": 2.432554297065634e-06, + "loss": 0.2809, + "step": 34043 + }, + { + "epoch": 0.6815103971173335, + "grad_norm": 1.2137646675109863, + "learning_rate": 2.4322761222937523e-06, + "loss": 0.3073, + "step": 34044 + }, + { + "epoch": 0.6815304156344619, + "grad_norm": 1.056125521659851, + "learning_rate": 2.43199795831603e-06, + "loss": 0.2691, + "step": 34045 + }, + { + "epoch": 0.6815504341515902, + "grad_norm": 1.1065146923065186, + "learning_rate": 2.431719805133633e-06, + "loss": 0.2563, + "step": 34046 + }, + { + "epoch": 0.6815704526687185, + "grad_norm": 1.16524076461792, + "learning_rate": 2.4314416627477356e-06, + "loss": 0.2984, + "step": 34047 + }, + { + "epoch": 0.6815904711858469, + "grad_norm": 1.0957565307617188, + "learning_rate": 2.4311635311595027e-06, + "loss": 0.3205, + "step": 34048 + }, + { + "epoch": 0.6816104897029752, + "grad_norm": 1.1117560863494873, + "learning_rate": 2.4308854103701073e-06, + "loss": 0.3281, + "step": 34049 + }, + { + "epoch": 0.6816305082201036, + "grad_norm": 1.9317772388458252, + "learning_rate": 2.4306073003807175e-06, + "loss": 0.7599, + "step": 34050 + }, + { + "epoch": 0.6816505267372319, + "grad_norm": 1.1005349159240723, + "learning_rate": 2.430329201192499e-06, + "loss": 0.3066, + "step": 34051 + }, + { + "epoch": 0.6816705452543603, + "grad_norm": 2.060779094696045, + "learning_rate": 2.430051112806625e-06, + "loss": 0.6952, + "step": 34052 + }, + { + "epoch": 0.6816905637714886, + "grad_norm": 1.0748803615570068, + "learning_rate": 2.429773035224264e-06, + "loss": 0.2871, + "step": 34053 + }, + { + "epoch": 0.6817105822886169, + "grad_norm": 1.1241081953048706, + "learning_rate": 2.4294949684465835e-06, + "loss": 0.2876, + "step": 34054 + }, + { + "epoch": 0.6817306008057453, + "grad_norm": 2.143990993499756, + "learning_rate": 2.4292169124747513e-06, + "loss": 0.6925, + "step": 34055 + }, + { + "epoch": 0.6817506193228736, + "grad_norm": 1.1702603101730347, + "learning_rate": 2.4289388673099397e-06, + "loss": 0.3249, + "step": 34056 + }, + { + "epoch": 0.681770637840002, + "grad_norm": 1.030896544456482, + "learning_rate": 2.4286608329533156e-06, + "loss": 0.2746, + "step": 34057 + }, + { + "epoch": 0.6817906563571303, + "grad_norm": 1.1535180807113647, + "learning_rate": 2.4283828094060487e-06, + "loss": 0.2721, + "step": 34058 + }, + { + "epoch": 0.6818106748742587, + "grad_norm": 1.0746432542800903, + "learning_rate": 2.4281047966693065e-06, + "loss": 0.2866, + "step": 34059 + }, + { + "epoch": 0.681830693391387, + "grad_norm": 1.092370629310608, + "learning_rate": 2.4278267947442564e-06, + "loss": 0.2923, + "step": 34060 + }, + { + "epoch": 0.6818507119085154, + "grad_norm": 1.0591189861297607, + "learning_rate": 2.4275488036320693e-06, + "loss": 0.2683, + "step": 34061 + }, + { + "epoch": 0.6818707304256437, + "grad_norm": 1.0930410623550415, + "learning_rate": 2.427270823333915e-06, + "loss": 0.3118, + "step": 34062 + }, + { + "epoch": 0.681890748942772, + "grad_norm": 1.8922494649887085, + "learning_rate": 2.4269928538509607e-06, + "loss": 0.7485, + "step": 34063 + }, + { + "epoch": 0.6819107674599004, + "grad_norm": 1.9282349348068237, + "learning_rate": 2.4267148951843723e-06, + "loss": 0.8101, + "step": 34064 + }, + { + "epoch": 0.6819307859770287, + "grad_norm": 0.984358549118042, + "learning_rate": 2.4264369473353235e-06, + "loss": 0.2823, + "step": 34065 + }, + { + "epoch": 0.6819508044941571, + "grad_norm": 1.1265629529953003, + "learning_rate": 2.4261590103049792e-06, + "loss": 0.288, + "step": 34066 + }, + { + "epoch": 0.6819708230112854, + "grad_norm": 1.1667371988296509, + "learning_rate": 2.4258810840945085e-06, + "loss": 0.3202, + "step": 34067 + }, + { + "epoch": 0.6819908415284138, + "grad_norm": 1.0531920194625854, + "learning_rate": 2.425603168705078e-06, + "loss": 0.2903, + "step": 34068 + }, + { + "epoch": 0.6820108600455421, + "grad_norm": 1.0746067762374878, + "learning_rate": 2.42532526413786e-06, + "loss": 0.2965, + "step": 34069 + }, + { + "epoch": 0.6820308785626704, + "grad_norm": 1.1159472465515137, + "learning_rate": 2.4250473703940204e-06, + "loss": 0.3216, + "step": 34070 + }, + { + "epoch": 0.6820508970797988, + "grad_norm": 1.2591848373413086, + "learning_rate": 2.4247694874747273e-06, + "loss": 0.2882, + "step": 34071 + }, + { + "epoch": 0.6820709155969271, + "grad_norm": 1.243708610534668, + "learning_rate": 2.424491615381149e-06, + "loss": 0.289, + "step": 34072 + }, + { + "epoch": 0.6820909341140555, + "grad_norm": 1.0205727815628052, + "learning_rate": 2.424213754114452e-06, + "loss": 0.2938, + "step": 34073 + }, + { + "epoch": 0.6821109526311838, + "grad_norm": 1.195076584815979, + "learning_rate": 2.4239359036758058e-06, + "loss": 0.3327, + "step": 34074 + }, + { + "epoch": 0.6821309711483122, + "grad_norm": 1.0025932788848877, + "learning_rate": 2.423658064066381e-06, + "loss": 0.2988, + "step": 34075 + }, + { + "epoch": 0.6821509896654405, + "grad_norm": 1.06851065158844, + "learning_rate": 2.423380235287343e-06, + "loss": 0.2888, + "step": 34076 + }, + { + "epoch": 0.6821710081825689, + "grad_norm": 1.1151920557022095, + "learning_rate": 2.4231024173398583e-06, + "loss": 0.2794, + "step": 34077 + }, + { + "epoch": 0.6821910266996972, + "grad_norm": 1.1292070150375366, + "learning_rate": 2.4228246102250986e-06, + "loss": 0.3146, + "step": 34078 + }, + { + "epoch": 0.6822110452168255, + "grad_norm": 1.2023983001708984, + "learning_rate": 2.422546813944229e-06, + "loss": 0.3166, + "step": 34079 + }, + { + "epoch": 0.6822310637339539, + "grad_norm": 1.3530670404434204, + "learning_rate": 2.4222690284984183e-06, + "loss": 0.3108, + "step": 34080 + }, + { + "epoch": 0.6822510822510822, + "grad_norm": 1.178856611251831, + "learning_rate": 2.4219912538888342e-06, + "loss": 0.2783, + "step": 34081 + }, + { + "epoch": 0.6822711007682106, + "grad_norm": 1.1573247909545898, + "learning_rate": 2.421713490116641e-06, + "loss": 0.3141, + "step": 34082 + }, + { + "epoch": 0.6822911192853389, + "grad_norm": 1.1937717199325562, + "learning_rate": 2.4214357371830122e-06, + "loss": 0.2839, + "step": 34083 + }, + { + "epoch": 0.6823111378024673, + "grad_norm": 2.1724298000335693, + "learning_rate": 2.421157995089113e-06, + "loss": 0.8148, + "step": 34084 + }, + { + "epoch": 0.6823311563195956, + "grad_norm": 1.0602726936340332, + "learning_rate": 2.42088026383611e-06, + "loss": 0.2869, + "step": 34085 + }, + { + "epoch": 0.6823511748367239, + "grad_norm": 1.0723843574523926, + "learning_rate": 2.4206025434251696e-06, + "loss": 0.2934, + "step": 34086 + }, + { + "epoch": 0.6823711933538523, + "grad_norm": 1.2200942039489746, + "learning_rate": 2.420324833857461e-06, + "loss": 0.3366, + "step": 34087 + }, + { + "epoch": 0.6823912118709806, + "grad_norm": 1.2569071054458618, + "learning_rate": 2.4200471351341536e-06, + "loss": 0.2934, + "step": 34088 + }, + { + "epoch": 0.682411230388109, + "grad_norm": 1.173685073852539, + "learning_rate": 2.4197694472564125e-06, + "loss": 0.2845, + "step": 34089 + }, + { + "epoch": 0.6824312489052373, + "grad_norm": 1.0904443264007568, + "learning_rate": 2.419491770225404e-06, + "loss": 0.2656, + "step": 34090 + }, + { + "epoch": 0.6824512674223657, + "grad_norm": 1.1193292140960693, + "learning_rate": 2.4192141040422983e-06, + "loss": 0.26, + "step": 34091 + }, + { + "epoch": 0.682471285939494, + "grad_norm": 1.1614587306976318, + "learning_rate": 2.4189364487082613e-06, + "loss": 0.3154, + "step": 34092 + }, + { + "epoch": 0.6824913044566224, + "grad_norm": 1.0802470445632935, + "learning_rate": 2.4186588042244602e-06, + "loss": 0.2573, + "step": 34093 + }, + { + "epoch": 0.6825113229737507, + "grad_norm": 1.1306607723236084, + "learning_rate": 2.4183811705920613e-06, + "loss": 0.363, + "step": 34094 + }, + { + "epoch": 0.682531341490879, + "grad_norm": 1.7623640298843384, + "learning_rate": 2.418103547812231e-06, + "loss": 0.8053, + "step": 34095 + }, + { + "epoch": 0.6825513600080074, + "grad_norm": 1.2596983909606934, + "learning_rate": 2.4178259358861393e-06, + "loss": 0.3028, + "step": 34096 + }, + { + "epoch": 0.6825713785251357, + "grad_norm": 1.7781157493591309, + "learning_rate": 2.4175483348149524e-06, + "loss": 0.7748, + "step": 34097 + }, + { + "epoch": 0.6825913970422641, + "grad_norm": 1.4416651725769043, + "learning_rate": 2.417270744599836e-06, + "loss": 0.313, + "step": 34098 + }, + { + "epoch": 0.6826114155593924, + "grad_norm": 1.0937480926513672, + "learning_rate": 2.416993165241956e-06, + "loss": 0.2734, + "step": 34099 + }, + { + "epoch": 0.6826314340765208, + "grad_norm": 1.0837713479995728, + "learning_rate": 2.4167155967424806e-06, + "loss": 0.3084, + "step": 34100 + }, + { + "epoch": 0.6826514525936491, + "grad_norm": 1.17937433719635, + "learning_rate": 2.416438039102579e-06, + "loss": 0.3298, + "step": 34101 + }, + { + "epoch": 0.6826714711107774, + "grad_norm": 1.0769026279449463, + "learning_rate": 2.4161604923234155e-06, + "loss": 0.2794, + "step": 34102 + }, + { + "epoch": 0.6826914896279058, + "grad_norm": 2.037485122680664, + "learning_rate": 2.4158829564061574e-06, + "loss": 0.823, + "step": 34103 + }, + { + "epoch": 0.6827115081450341, + "grad_norm": 1.1351923942565918, + "learning_rate": 2.4156054313519694e-06, + "loss": 0.3423, + "step": 34104 + }, + { + "epoch": 0.6827315266621625, + "grad_norm": 1.191979169845581, + "learning_rate": 2.415327917162022e-06, + "loss": 0.2947, + "step": 34105 + }, + { + "epoch": 0.6827515451792908, + "grad_norm": 1.140812873840332, + "learning_rate": 2.4150504138374797e-06, + "loss": 0.3154, + "step": 34106 + }, + { + "epoch": 0.6827715636964192, + "grad_norm": 1.1584655046463013, + "learning_rate": 2.414772921379509e-06, + "loss": 0.2888, + "step": 34107 + }, + { + "epoch": 0.6827915822135475, + "grad_norm": 1.1382865905761719, + "learning_rate": 2.414495439789275e-06, + "loss": 0.2644, + "step": 34108 + }, + { + "epoch": 0.6828116007306759, + "grad_norm": 1.125091791152954, + "learning_rate": 2.414217969067948e-06, + "loss": 0.3135, + "step": 34109 + }, + { + "epoch": 0.6828316192478042, + "grad_norm": 1.0621305704116821, + "learning_rate": 2.413940509216691e-06, + "loss": 0.2732, + "step": 34110 + }, + { + "epoch": 0.6828516377649325, + "grad_norm": 1.2100547552108765, + "learning_rate": 2.413663060236673e-06, + "loss": 0.2681, + "step": 34111 + }, + { + "epoch": 0.6828716562820609, + "grad_norm": 1.0962028503417969, + "learning_rate": 2.4133856221290563e-06, + "loss": 0.2931, + "step": 34112 + }, + { + "epoch": 0.6828916747991892, + "grad_norm": 1.8280175924301147, + "learning_rate": 2.41310819489501e-06, + "loss": 0.7908, + "step": 34113 + }, + { + "epoch": 0.6829116933163176, + "grad_norm": 1.0991973876953125, + "learning_rate": 2.4128307785357014e-06, + "loss": 0.2782, + "step": 34114 + }, + { + "epoch": 0.6829317118334459, + "grad_norm": 1.2080063819885254, + "learning_rate": 2.412553373052296e-06, + "loss": 0.3223, + "step": 34115 + }, + { + "epoch": 0.6829517303505743, + "grad_norm": 1.2140830755233765, + "learning_rate": 2.412275978445959e-06, + "loss": 0.3148, + "step": 34116 + }, + { + "epoch": 0.6829717488677026, + "grad_norm": 1.363610863685608, + "learning_rate": 2.411998594717856e-06, + "loss": 0.3024, + "step": 34117 + }, + { + "epoch": 0.6829917673848309, + "grad_norm": 1.0915323495864868, + "learning_rate": 2.411721221869155e-06, + "loss": 0.3142, + "step": 34118 + }, + { + "epoch": 0.6830117859019593, + "grad_norm": 1.9249629974365234, + "learning_rate": 2.411443859901021e-06, + "loss": 0.2896, + "step": 34119 + }, + { + "epoch": 0.6830318044190876, + "grad_norm": 1.8245803117752075, + "learning_rate": 2.4111665088146203e-06, + "loss": 0.7645, + "step": 34120 + }, + { + "epoch": 0.683051822936216, + "grad_norm": 1.0890920162200928, + "learning_rate": 2.4108891686111162e-06, + "loss": 0.3034, + "step": 34121 + }, + { + "epoch": 0.6830718414533443, + "grad_norm": 1.1269400119781494, + "learning_rate": 2.410611839291679e-06, + "loss": 0.2871, + "step": 34122 + }, + { + "epoch": 0.6830918599704727, + "grad_norm": 1.1892822980880737, + "learning_rate": 2.4103345208574725e-06, + "loss": 0.355, + "step": 34123 + }, + { + "epoch": 0.683111878487601, + "grad_norm": 1.1012604236602783, + "learning_rate": 2.4100572133096622e-06, + "loss": 0.3116, + "step": 34124 + }, + { + "epoch": 0.6831318970047294, + "grad_norm": 1.0219205617904663, + "learning_rate": 2.409779916649412e-06, + "loss": 0.2623, + "step": 34125 + }, + { + "epoch": 0.6831519155218577, + "grad_norm": 1.0340152978897095, + "learning_rate": 2.4095026308778895e-06, + "loss": 0.2406, + "step": 34126 + }, + { + "epoch": 0.683171934038986, + "grad_norm": 1.099316954612732, + "learning_rate": 2.4092253559962627e-06, + "loss": 0.2596, + "step": 34127 + }, + { + "epoch": 0.6831919525561144, + "grad_norm": 1.1920418739318848, + "learning_rate": 2.4089480920056945e-06, + "loss": 0.3117, + "step": 34128 + }, + { + "epoch": 0.6832119710732427, + "grad_norm": 1.0758037567138672, + "learning_rate": 2.4086708389073516e-06, + "loss": 0.3108, + "step": 34129 + }, + { + "epoch": 0.6832319895903711, + "grad_norm": 1.1262286901474, + "learning_rate": 2.4083935967023963e-06, + "loss": 0.2518, + "step": 34130 + }, + { + "epoch": 0.6832520081074994, + "grad_norm": 1.1680970191955566, + "learning_rate": 2.408116365391999e-06, + "loss": 0.3356, + "step": 34131 + }, + { + "epoch": 0.6832720266246278, + "grad_norm": 1.0353974103927612, + "learning_rate": 2.4078391449773225e-06, + "loss": 0.2877, + "step": 34132 + }, + { + "epoch": 0.6832920451417561, + "grad_norm": 0.9232929944992065, + "learning_rate": 2.4075619354595324e-06, + "loss": 0.2571, + "step": 34133 + }, + { + "epoch": 0.6833120636588844, + "grad_norm": 1.0939501523971558, + "learning_rate": 2.407284736839794e-06, + "loss": 0.3229, + "step": 34134 + }, + { + "epoch": 0.6833320821760128, + "grad_norm": 1.3709267377853394, + "learning_rate": 2.407007549119271e-06, + "loss": 0.3342, + "step": 34135 + }, + { + "epoch": 0.6833521006931411, + "grad_norm": 1.0866620540618896, + "learning_rate": 2.406730372299132e-06, + "loss": 0.2649, + "step": 34136 + }, + { + "epoch": 0.6833721192102695, + "grad_norm": 1.3113319873809814, + "learning_rate": 2.40645320638054e-06, + "loss": 0.3455, + "step": 34137 + }, + { + "epoch": 0.6833921377273978, + "grad_norm": 1.9713126420974731, + "learning_rate": 2.406176051364659e-06, + "loss": 0.7669, + "step": 34138 + }, + { + "epoch": 0.6834121562445262, + "grad_norm": 1.3728203773498535, + "learning_rate": 2.4058989072526556e-06, + "loss": 0.3032, + "step": 34139 + }, + { + "epoch": 0.6834321747616545, + "grad_norm": 1.0650993585586548, + "learning_rate": 2.4056217740456973e-06, + "loss": 0.2884, + "step": 34140 + }, + { + "epoch": 0.6834521932787829, + "grad_norm": 2.0220298767089844, + "learning_rate": 2.4053446517449464e-06, + "loss": 0.7861, + "step": 34141 + }, + { + "epoch": 0.6834722117959112, + "grad_norm": 1.0898908376693726, + "learning_rate": 2.405067540351568e-06, + "loss": 0.3116, + "step": 34142 + }, + { + "epoch": 0.6834922303130395, + "grad_norm": 1.1713000535964966, + "learning_rate": 2.4047904398667273e-06, + "loss": 0.3281, + "step": 34143 + }, + { + "epoch": 0.6835122488301679, + "grad_norm": 1.286216139793396, + "learning_rate": 2.4045133502915872e-06, + "loss": 0.2699, + "step": 34144 + }, + { + "epoch": 0.6835322673472962, + "grad_norm": 1.2157081365585327, + "learning_rate": 2.4042362716273166e-06, + "loss": 0.2921, + "step": 34145 + }, + { + "epoch": 0.6835522858644246, + "grad_norm": 1.0273185968399048, + "learning_rate": 2.403959203875078e-06, + "loss": 0.3064, + "step": 34146 + }, + { + "epoch": 0.6835723043815529, + "grad_norm": 1.1641945838928223, + "learning_rate": 2.4036821470360363e-06, + "loss": 0.3373, + "step": 34147 + }, + { + "epoch": 0.6835923228986813, + "grad_norm": 1.0930235385894775, + "learning_rate": 2.4034051011113542e-06, + "loss": 0.265, + "step": 34148 + }, + { + "epoch": 0.6836123414158096, + "grad_norm": 1.0661556720733643, + "learning_rate": 2.4031280661021995e-06, + "loss": 0.2491, + "step": 34149 + }, + { + "epoch": 0.6836323599329379, + "grad_norm": 1.0603364706039429, + "learning_rate": 2.402851042009736e-06, + "loss": 0.2881, + "step": 34150 + }, + { + "epoch": 0.6836523784500663, + "grad_norm": 1.2462363243103027, + "learning_rate": 2.402574028835126e-06, + "loss": 0.2725, + "step": 34151 + }, + { + "epoch": 0.6836723969671946, + "grad_norm": 1.2911142110824585, + "learning_rate": 2.402297026579536e-06, + "loss": 0.3408, + "step": 34152 + }, + { + "epoch": 0.683692415484323, + "grad_norm": 1.1490484476089478, + "learning_rate": 2.402020035244132e-06, + "loss": 0.3488, + "step": 34153 + }, + { + "epoch": 0.6837124340014513, + "grad_norm": 1.1350017786026, + "learning_rate": 2.401743054830077e-06, + "loss": 0.2848, + "step": 34154 + }, + { + "epoch": 0.6837324525185797, + "grad_norm": 1.059472680091858, + "learning_rate": 2.401466085338534e-06, + "loss": 0.2988, + "step": 34155 + }, + { + "epoch": 0.683752471035708, + "grad_norm": 1.2511608600616455, + "learning_rate": 2.4011891267706693e-06, + "loss": 0.3378, + "step": 34156 + }, + { + "epoch": 0.6837724895528364, + "grad_norm": 1.8898190259933472, + "learning_rate": 2.400912179127644e-06, + "loss": 0.7918, + "step": 34157 + }, + { + "epoch": 0.6837925080699647, + "grad_norm": 1.8274633884429932, + "learning_rate": 2.4006352424106264e-06, + "loss": 0.742, + "step": 34158 + }, + { + "epoch": 0.683812526587093, + "grad_norm": 1.0185556411743164, + "learning_rate": 2.4003583166207793e-06, + "loss": 0.2611, + "step": 34159 + }, + { + "epoch": 0.6838325451042214, + "grad_norm": 1.1253859996795654, + "learning_rate": 2.4000814017592662e-06, + "loss": 0.2911, + "step": 34160 + }, + { + "epoch": 0.6838525636213497, + "grad_norm": 1.1073906421661377, + "learning_rate": 2.3998044978272496e-06, + "loss": 0.2818, + "step": 34161 + }, + { + "epoch": 0.6838725821384781, + "grad_norm": 1.1649764776229858, + "learning_rate": 2.399527604825897e-06, + "loss": 0.2814, + "step": 34162 + }, + { + "epoch": 0.6838926006556064, + "grad_norm": 1.0916404724121094, + "learning_rate": 2.3992507227563706e-06, + "loss": 0.3009, + "step": 34163 + }, + { + "epoch": 0.6839126191727348, + "grad_norm": 1.142647624015808, + "learning_rate": 2.398973851619833e-06, + "loss": 0.3422, + "step": 34164 + }, + { + "epoch": 0.6839326376898631, + "grad_norm": 1.2161307334899902, + "learning_rate": 2.3986969914174507e-06, + "loss": 0.2654, + "step": 34165 + }, + { + "epoch": 0.6839526562069914, + "grad_norm": 1.134391188621521, + "learning_rate": 2.3984201421503854e-06, + "loss": 0.2808, + "step": 34166 + }, + { + "epoch": 0.6839726747241198, + "grad_norm": 1.1175925731658936, + "learning_rate": 2.3981433038198034e-06, + "loss": 0.2912, + "step": 34167 + }, + { + "epoch": 0.6839926932412481, + "grad_norm": 1.8800410032272339, + "learning_rate": 2.3978664764268672e-06, + "loss": 0.8015, + "step": 34168 + }, + { + "epoch": 0.6840127117583765, + "grad_norm": 1.069438099861145, + "learning_rate": 2.39758965997274e-06, + "loss": 0.3, + "step": 34169 + }, + { + "epoch": 0.6840327302755048, + "grad_norm": 1.0878959894180298, + "learning_rate": 2.3973128544585842e-06, + "loss": 0.2956, + "step": 34170 + }, + { + "epoch": 0.6840527487926332, + "grad_norm": 1.0704294443130493, + "learning_rate": 2.3970360598855667e-06, + "loss": 0.2841, + "step": 34171 + }, + { + "epoch": 0.6840727673097615, + "grad_norm": 1.0335561037063599, + "learning_rate": 2.3967592762548503e-06, + "loss": 0.3054, + "step": 34172 + }, + { + "epoch": 0.6840927858268899, + "grad_norm": 1.1292833089828491, + "learning_rate": 2.396482503567597e-06, + "loss": 0.2913, + "step": 34173 + }, + { + "epoch": 0.6841128043440182, + "grad_norm": 1.0645081996917725, + "learning_rate": 2.3962057418249707e-06, + "loss": 0.297, + "step": 34174 + }, + { + "epoch": 0.6841328228611465, + "grad_norm": 1.0581109523773193, + "learning_rate": 2.3959289910281334e-06, + "loss": 0.2374, + "step": 34175 + }, + { + "epoch": 0.6841528413782749, + "grad_norm": 1.1141867637634277, + "learning_rate": 2.3956522511782525e-06, + "loss": 0.2819, + "step": 34176 + }, + { + "epoch": 0.6841728598954032, + "grad_norm": 1.1034576892852783, + "learning_rate": 2.3953755222764867e-06, + "loss": 0.3242, + "step": 34177 + }, + { + "epoch": 0.6841928784125316, + "grad_norm": 1.068278193473816, + "learning_rate": 2.395098804324004e-06, + "loss": 0.2613, + "step": 34178 + }, + { + "epoch": 0.6842128969296599, + "grad_norm": 1.1000173091888428, + "learning_rate": 2.394822097321963e-06, + "loss": 0.3199, + "step": 34179 + }, + { + "epoch": 0.6842329154467883, + "grad_norm": 1.218274474143982, + "learning_rate": 2.394545401271532e-06, + "loss": 0.3176, + "step": 34180 + }, + { + "epoch": 0.6842529339639166, + "grad_norm": 1.0652726888656616, + "learning_rate": 2.394268716173871e-06, + "loss": 0.3497, + "step": 34181 + }, + { + "epoch": 0.6842729524810449, + "grad_norm": 1.869964838027954, + "learning_rate": 2.3939920420301434e-06, + "loss": 0.7758, + "step": 34182 + }, + { + "epoch": 0.6842929709981733, + "grad_norm": 0.9847730994224548, + "learning_rate": 2.39371537884151e-06, + "loss": 0.2743, + "step": 34183 + }, + { + "epoch": 0.6843129895153016, + "grad_norm": 1.1166651248931885, + "learning_rate": 2.393438726609139e-06, + "loss": 0.3009, + "step": 34184 + }, + { + "epoch": 0.68433300803243, + "grad_norm": 1.955983281135559, + "learning_rate": 2.3931620853341903e-06, + "loss": 0.7643, + "step": 34185 + }, + { + "epoch": 0.6843530265495583, + "grad_norm": 1.078877329826355, + "learning_rate": 2.3928854550178278e-06, + "loss": 0.3041, + "step": 34186 + }, + { + "epoch": 0.6843730450666867, + "grad_norm": 1.1467686891555786, + "learning_rate": 2.392608835661213e-06, + "loss": 0.3167, + "step": 34187 + }, + { + "epoch": 0.684393063583815, + "grad_norm": 1.090934157371521, + "learning_rate": 2.392332227265508e-06, + "loss": 0.3153, + "step": 34188 + }, + { + "epoch": 0.6844130821009434, + "grad_norm": 1.0912787914276123, + "learning_rate": 2.3920556298318785e-06, + "loss": 0.2999, + "step": 34189 + }, + { + "epoch": 0.6844331006180717, + "grad_norm": 1.2253401279449463, + "learning_rate": 2.3917790433614845e-06, + "loss": 0.3021, + "step": 34190 + }, + { + "epoch": 0.6844531191352, + "grad_norm": 1.0641369819641113, + "learning_rate": 2.3915024678554923e-06, + "loss": 0.2753, + "step": 34191 + }, + { + "epoch": 0.6844731376523284, + "grad_norm": 0.9966639876365662, + "learning_rate": 2.3912259033150604e-06, + "loss": 0.2797, + "step": 34192 + }, + { + "epoch": 0.6844931561694567, + "grad_norm": 1.8940194845199585, + "learning_rate": 2.390949349741355e-06, + "loss": 0.7564, + "step": 34193 + }, + { + "epoch": 0.6845131746865851, + "grad_norm": 1.1240909099578857, + "learning_rate": 2.3906728071355367e-06, + "loss": 0.3349, + "step": 34194 + }, + { + "epoch": 0.6845331932037134, + "grad_norm": 1.2106703519821167, + "learning_rate": 2.3903962754987687e-06, + "loss": 0.2943, + "step": 34195 + }, + { + "epoch": 0.6845532117208418, + "grad_norm": 1.5372323989868164, + "learning_rate": 2.3901197548322138e-06, + "loss": 0.3787, + "step": 34196 + }, + { + "epoch": 0.6845732302379701, + "grad_norm": 1.0530823469161987, + "learning_rate": 2.389843245137031e-06, + "loss": 0.2674, + "step": 34197 + }, + { + "epoch": 0.6845932487550984, + "grad_norm": 1.5466831922531128, + "learning_rate": 2.3895667464143874e-06, + "loss": 0.3291, + "step": 34198 + }, + { + "epoch": 0.6846132672722268, + "grad_norm": 1.142991304397583, + "learning_rate": 2.3892902586654437e-06, + "loss": 0.2889, + "step": 34199 + }, + { + "epoch": 0.6846332857893551, + "grad_norm": 1.169796109199524, + "learning_rate": 2.3890137818913622e-06, + "loss": 0.2994, + "step": 34200 + }, + { + "epoch": 0.6846533043064835, + "grad_norm": 1.9711781740188599, + "learning_rate": 2.3887373160933023e-06, + "loss": 0.7534, + "step": 34201 + }, + { + "epoch": 0.6846733228236118, + "grad_norm": 1.2673242092132568, + "learning_rate": 2.3884608612724315e-06, + "loss": 0.3191, + "step": 34202 + }, + { + "epoch": 0.6846933413407402, + "grad_norm": 1.2070696353912354, + "learning_rate": 2.388184417429907e-06, + "loss": 0.2937, + "step": 34203 + }, + { + "epoch": 0.6847133598578685, + "grad_norm": 1.9916208982467651, + "learning_rate": 2.3879079845668952e-06, + "loss": 0.7301, + "step": 34204 + }, + { + "epoch": 0.6847333783749969, + "grad_norm": 1.0364904403686523, + "learning_rate": 2.3876315626845557e-06, + "loss": 0.3331, + "step": 34205 + }, + { + "epoch": 0.6847533968921252, + "grad_norm": 1.2029885053634644, + "learning_rate": 2.387355151784049e-06, + "loss": 0.3248, + "step": 34206 + }, + { + "epoch": 0.6847734154092535, + "grad_norm": 1.8390685319900513, + "learning_rate": 2.387078751866541e-06, + "loss": 0.7816, + "step": 34207 + }, + { + "epoch": 0.6847934339263819, + "grad_norm": 1.097017765045166, + "learning_rate": 2.386802362933192e-06, + "loss": 0.2552, + "step": 34208 + }, + { + "epoch": 0.6848134524435102, + "grad_norm": 1.1104713678359985, + "learning_rate": 2.386525984985163e-06, + "loss": 0.2818, + "step": 34209 + }, + { + "epoch": 0.6848334709606386, + "grad_norm": 1.046722412109375, + "learning_rate": 2.386249618023615e-06, + "loss": 0.3085, + "step": 34210 + }, + { + "epoch": 0.6848534894777669, + "grad_norm": 1.0984901189804077, + "learning_rate": 2.3859732620497123e-06, + "loss": 0.3059, + "step": 34211 + }, + { + "epoch": 0.6848735079948953, + "grad_norm": 1.122886061668396, + "learning_rate": 2.385696917064616e-06, + "loss": 0.3114, + "step": 34212 + }, + { + "epoch": 0.6848935265120236, + "grad_norm": 1.1760386228561401, + "learning_rate": 2.385420583069487e-06, + "loss": 0.2914, + "step": 34213 + }, + { + "epoch": 0.6849135450291519, + "grad_norm": 1.0517758131027222, + "learning_rate": 2.3851442600654857e-06, + "loss": 0.2705, + "step": 34214 + }, + { + "epoch": 0.6849335635462803, + "grad_norm": 1.2757214307785034, + "learning_rate": 2.384867948053777e-06, + "loss": 0.3051, + "step": 34215 + }, + { + "epoch": 0.6849535820634086, + "grad_norm": 1.295169472694397, + "learning_rate": 2.384591647035518e-06, + "loss": 0.3046, + "step": 34216 + }, + { + "epoch": 0.684973600580537, + "grad_norm": 1.0943348407745361, + "learning_rate": 2.3843153570118753e-06, + "loss": 0.2514, + "step": 34217 + }, + { + "epoch": 0.6849936190976653, + "grad_norm": 1.1465260982513428, + "learning_rate": 2.3840390779840085e-06, + "loss": 0.3557, + "step": 34218 + }, + { + "epoch": 0.6850136376147937, + "grad_norm": 1.0479215383529663, + "learning_rate": 2.3837628099530756e-06, + "loss": 0.2671, + "step": 34219 + }, + { + "epoch": 0.685033656131922, + "grad_norm": 1.0446089506149292, + "learning_rate": 2.3834865529202433e-06, + "loss": 0.2806, + "step": 34220 + }, + { + "epoch": 0.6850536746490504, + "grad_norm": 1.1590098142623901, + "learning_rate": 2.38321030688667e-06, + "loss": 0.2939, + "step": 34221 + }, + { + "epoch": 0.6850736931661787, + "grad_norm": 1.028171420097351, + "learning_rate": 2.3829340718535175e-06, + "loss": 0.2614, + "step": 34222 + }, + { + "epoch": 0.685093711683307, + "grad_norm": 1.2267708778381348, + "learning_rate": 2.3826578478219457e-06, + "loss": 0.3218, + "step": 34223 + }, + { + "epoch": 0.6851137302004354, + "grad_norm": 1.4047595262527466, + "learning_rate": 2.3823816347931184e-06, + "loss": 0.3414, + "step": 34224 + }, + { + "epoch": 0.6851337487175637, + "grad_norm": 1.1891926527023315, + "learning_rate": 2.382105432768196e-06, + "loss": 0.3377, + "step": 34225 + }, + { + "epoch": 0.6851537672346921, + "grad_norm": 1.1254746913909912, + "learning_rate": 2.381829241748338e-06, + "loss": 0.2897, + "step": 34226 + }, + { + "epoch": 0.6851737857518204, + "grad_norm": 1.1310486793518066, + "learning_rate": 2.3815530617347054e-06, + "loss": 0.3168, + "step": 34227 + }, + { + "epoch": 0.6851938042689488, + "grad_norm": 0.9933174848556519, + "learning_rate": 2.381276892728462e-06, + "loss": 0.2664, + "step": 34228 + }, + { + "epoch": 0.6852138227860771, + "grad_norm": 1.0458544492721558, + "learning_rate": 2.3810007347307646e-06, + "loss": 0.2798, + "step": 34229 + }, + { + "epoch": 0.6852338413032054, + "grad_norm": 1.0683660507202148, + "learning_rate": 2.3807245877427787e-06, + "loss": 0.3253, + "step": 34230 + }, + { + "epoch": 0.6852538598203338, + "grad_norm": 1.111446499824524, + "learning_rate": 2.380448451765663e-06, + "loss": 0.2901, + "step": 34231 + }, + { + "epoch": 0.6852738783374621, + "grad_norm": 0.9922130107879639, + "learning_rate": 2.380172326800576e-06, + "loss": 0.2737, + "step": 34232 + }, + { + "epoch": 0.6852938968545905, + "grad_norm": 1.0373435020446777, + "learning_rate": 2.379896212848683e-06, + "loss": 0.284, + "step": 34233 + }, + { + "epoch": 0.6853139153717188, + "grad_norm": 1.071488618850708, + "learning_rate": 2.379620109911143e-06, + "loss": 0.3188, + "step": 34234 + }, + { + "epoch": 0.6853339338888472, + "grad_norm": 1.1397792100906372, + "learning_rate": 2.3793440179891152e-06, + "loss": 0.3099, + "step": 34235 + }, + { + "epoch": 0.6853539524059755, + "grad_norm": 1.277060627937317, + "learning_rate": 2.3790679370837596e-06, + "loss": 0.3405, + "step": 34236 + }, + { + "epoch": 0.6853739709231039, + "grad_norm": 1.052021861076355, + "learning_rate": 2.37879186719624e-06, + "loss": 0.2753, + "step": 34237 + }, + { + "epoch": 0.6853939894402322, + "grad_norm": 1.0436639785766602, + "learning_rate": 2.378515808327716e-06, + "loss": 0.2879, + "step": 34238 + }, + { + "epoch": 0.6854140079573605, + "grad_norm": 1.066381812095642, + "learning_rate": 2.3782397604793467e-06, + "loss": 0.299, + "step": 34239 + }, + { + "epoch": 0.6854340264744889, + "grad_norm": 1.061059594154358, + "learning_rate": 2.3779637236522918e-06, + "loss": 0.2672, + "step": 34240 + }, + { + "epoch": 0.6854540449916172, + "grad_norm": 1.121458649635315, + "learning_rate": 2.3776876978477147e-06, + "loss": 0.2761, + "step": 34241 + }, + { + "epoch": 0.6854740635087456, + "grad_norm": 1.011612892150879, + "learning_rate": 2.3774116830667725e-06, + "loss": 0.2478, + "step": 34242 + }, + { + "epoch": 0.6854940820258739, + "grad_norm": 1.2293527126312256, + "learning_rate": 2.3771356793106294e-06, + "loss": 0.3048, + "step": 34243 + }, + { + "epoch": 0.6855141005430023, + "grad_norm": 1.0319156646728516, + "learning_rate": 2.3768596865804426e-06, + "loss": 0.2826, + "step": 34244 + }, + { + "epoch": 0.6855341190601306, + "grad_norm": 1.2369811534881592, + "learning_rate": 2.376583704877372e-06, + "loss": 0.3157, + "step": 34245 + }, + { + "epoch": 0.6855541375772589, + "grad_norm": 1.1358709335327148, + "learning_rate": 2.376307734202581e-06, + "loss": 0.3262, + "step": 34246 + }, + { + "epoch": 0.6855741560943873, + "grad_norm": 1.0431606769561768, + "learning_rate": 2.376031774557227e-06, + "loss": 0.2631, + "step": 34247 + }, + { + "epoch": 0.6855941746115156, + "grad_norm": 1.2009854316711426, + "learning_rate": 2.3757558259424706e-06, + "loss": 0.3156, + "step": 34248 + }, + { + "epoch": 0.685614193128644, + "grad_norm": 1.1283074617385864, + "learning_rate": 2.3754798883594726e-06, + "loss": 0.2851, + "step": 34249 + }, + { + "epoch": 0.6856342116457723, + "grad_norm": 1.0907515287399292, + "learning_rate": 2.3752039618093904e-06, + "loss": 0.3145, + "step": 34250 + }, + { + "epoch": 0.6856542301629007, + "grad_norm": 1.8563681840896606, + "learning_rate": 2.374928046293388e-06, + "loss": 0.7688, + "step": 34251 + }, + { + "epoch": 0.685674248680029, + "grad_norm": 1.1829522848129272, + "learning_rate": 2.374652141812622e-06, + "loss": 0.308, + "step": 34252 + }, + { + "epoch": 0.6856942671971574, + "grad_norm": 1.0036710500717163, + "learning_rate": 2.3743762483682526e-06, + "loss": 0.2574, + "step": 34253 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 1.1200004816055298, + "learning_rate": 2.374100365961442e-06, + "loss": 0.2788, + "step": 34254 + }, + { + "epoch": 0.685734304231414, + "grad_norm": 0.962871789932251, + "learning_rate": 2.3738244945933463e-06, + "loss": 0.2602, + "step": 34255 + }, + { + "epoch": 0.6857543227485424, + "grad_norm": 1.0953030586242676, + "learning_rate": 2.3735486342651296e-06, + "loss": 0.3085, + "step": 34256 + }, + { + "epoch": 0.6857743412656707, + "grad_norm": 2.1409616470336914, + "learning_rate": 2.3732727849779493e-06, + "loss": 0.7474, + "step": 34257 + }, + { + "epoch": 0.6857943597827991, + "grad_norm": 1.0170104503631592, + "learning_rate": 2.3729969467329644e-06, + "loss": 0.2373, + "step": 34258 + }, + { + "epoch": 0.6858143782999274, + "grad_norm": 1.2172527313232422, + "learning_rate": 2.3727211195313334e-06, + "loss": 0.2781, + "step": 34259 + }, + { + "epoch": 0.6858343968170558, + "grad_norm": 1.082292079925537, + "learning_rate": 2.3724453033742188e-06, + "loss": 0.3185, + "step": 34260 + }, + { + "epoch": 0.6858544153341841, + "grad_norm": 1.0575354099273682, + "learning_rate": 2.3721694982627793e-06, + "loss": 0.2738, + "step": 34261 + }, + { + "epoch": 0.6858744338513124, + "grad_norm": 1.1816596984863281, + "learning_rate": 2.3718937041981732e-06, + "loss": 0.3065, + "step": 34262 + }, + { + "epoch": 0.6858944523684408, + "grad_norm": 1.231389045715332, + "learning_rate": 2.3716179211815594e-06, + "loss": 0.2799, + "step": 34263 + }, + { + "epoch": 0.6859144708855691, + "grad_norm": 1.151053547859192, + "learning_rate": 2.3713421492140994e-06, + "loss": 0.2676, + "step": 34264 + }, + { + "epoch": 0.6859344894026975, + "grad_norm": 1.1386082172393799, + "learning_rate": 2.3710663882969516e-06, + "loss": 0.3297, + "step": 34265 + }, + { + "epoch": 0.6859545079198258, + "grad_norm": 1.0272774696350098, + "learning_rate": 2.3707906384312723e-06, + "loss": 0.3092, + "step": 34266 + }, + { + "epoch": 0.6859745264369542, + "grad_norm": 1.834455966949463, + "learning_rate": 2.370514899618226e-06, + "loss": 0.7484, + "step": 34267 + }, + { + "epoch": 0.6859945449540825, + "grad_norm": 1.0549898147583008, + "learning_rate": 2.3702391718589673e-06, + "loss": 0.2625, + "step": 34268 + }, + { + "epoch": 0.6860145634712109, + "grad_norm": 1.1123127937316895, + "learning_rate": 2.369963455154659e-06, + "loss": 0.3271, + "step": 34269 + }, + { + "epoch": 0.6860345819883392, + "grad_norm": 1.1711138486862183, + "learning_rate": 2.369687749506458e-06, + "loss": 0.2514, + "step": 34270 + }, + { + "epoch": 0.6860546005054675, + "grad_norm": 1.0557544231414795, + "learning_rate": 2.369412054915524e-06, + "loss": 0.3146, + "step": 34271 + }, + { + "epoch": 0.6860746190225959, + "grad_norm": 1.1745842695236206, + "learning_rate": 2.3691363713830133e-06, + "loss": 0.2939, + "step": 34272 + }, + { + "epoch": 0.6860946375397242, + "grad_norm": 0.9491142630577087, + "learning_rate": 2.3688606989100897e-06, + "loss": 0.2599, + "step": 34273 + }, + { + "epoch": 0.6861146560568526, + "grad_norm": 1.1442432403564453, + "learning_rate": 2.368585037497909e-06, + "loss": 0.3054, + "step": 34274 + }, + { + "epoch": 0.6861346745739809, + "grad_norm": 1.0895644426345825, + "learning_rate": 2.3683093871476308e-06, + "loss": 0.3176, + "step": 34275 + }, + { + "epoch": 0.6861546930911093, + "grad_norm": 0.9793881773948669, + "learning_rate": 2.3680337478604116e-06, + "loss": 0.2456, + "step": 34276 + }, + { + "epoch": 0.6861747116082376, + "grad_norm": 1.0120843648910522, + "learning_rate": 2.3677581196374137e-06, + "loss": 0.292, + "step": 34277 + }, + { + "epoch": 0.6861947301253659, + "grad_norm": 1.0744428634643555, + "learning_rate": 2.3674825024797947e-06, + "loss": 0.2947, + "step": 34278 + }, + { + "epoch": 0.6862147486424943, + "grad_norm": 1.1485872268676758, + "learning_rate": 2.3672068963887106e-06, + "loss": 0.2801, + "step": 34279 + }, + { + "epoch": 0.6862347671596226, + "grad_norm": 1.1356390714645386, + "learning_rate": 2.3669313013653237e-06, + "loss": 0.264, + "step": 34280 + }, + { + "epoch": 0.686254785676751, + "grad_norm": 1.8767019510269165, + "learning_rate": 2.3666557174107897e-06, + "loss": 0.7664, + "step": 34281 + }, + { + "epoch": 0.6862748041938793, + "grad_norm": 1.1465167999267578, + "learning_rate": 2.3663801445262703e-06, + "loss": 0.3403, + "step": 34282 + }, + { + "epoch": 0.6862948227110077, + "grad_norm": 1.0644986629486084, + "learning_rate": 2.3661045827129214e-06, + "loss": 0.3019, + "step": 34283 + }, + { + "epoch": 0.686314841228136, + "grad_norm": 1.0936251878738403, + "learning_rate": 2.3658290319719024e-06, + "loss": 0.3349, + "step": 34284 + }, + { + "epoch": 0.6863348597452643, + "grad_norm": 0.9908130764961243, + "learning_rate": 2.3655534923043687e-06, + "loss": 0.2677, + "step": 34285 + }, + { + "epoch": 0.6863548782623927, + "grad_norm": 1.258810043334961, + "learning_rate": 2.365277963711484e-06, + "loss": 0.3002, + "step": 34286 + }, + { + "epoch": 0.686374896779521, + "grad_norm": 1.1815001964569092, + "learning_rate": 2.3650024461944034e-06, + "loss": 0.311, + "step": 34287 + }, + { + "epoch": 0.6863949152966494, + "grad_norm": 1.1206676959991455, + "learning_rate": 2.3647269397542854e-06, + "loss": 0.3233, + "step": 34288 + }, + { + "epoch": 0.6864149338137777, + "grad_norm": 1.1192271709442139, + "learning_rate": 2.3644514443922885e-06, + "loss": 0.3229, + "step": 34289 + }, + { + "epoch": 0.6864349523309061, + "grad_norm": 1.0600332021713257, + "learning_rate": 2.364175960109568e-06, + "loss": 0.2864, + "step": 34290 + }, + { + "epoch": 0.6864549708480344, + "grad_norm": 1.0331803560256958, + "learning_rate": 2.363900486907287e-06, + "loss": 0.2519, + "step": 34291 + }, + { + "epoch": 0.6864749893651628, + "grad_norm": 1.2072614431381226, + "learning_rate": 2.3636250247865987e-06, + "loss": 0.2981, + "step": 34292 + }, + { + "epoch": 0.6864950078822911, + "grad_norm": 1.1292585134506226, + "learning_rate": 2.363349573748666e-06, + "loss": 0.2911, + "step": 34293 + }, + { + "epoch": 0.6865150263994194, + "grad_norm": 1.1452723741531372, + "learning_rate": 2.3630741337946422e-06, + "loss": 0.2861, + "step": 34294 + }, + { + "epoch": 0.6865350449165478, + "grad_norm": 1.1488367319107056, + "learning_rate": 2.362798704925689e-06, + "loss": 0.3083, + "step": 34295 + }, + { + "epoch": 0.6865550634336761, + "grad_norm": 1.100554347038269, + "learning_rate": 2.362523287142962e-06, + "loss": 0.2904, + "step": 34296 + }, + { + "epoch": 0.6865750819508045, + "grad_norm": 1.1597105264663696, + "learning_rate": 2.36224788044762e-06, + "loss": 0.3255, + "step": 34297 + }, + { + "epoch": 0.6865951004679328, + "grad_norm": 1.1126333475112915, + "learning_rate": 2.361972484840819e-06, + "loss": 0.2602, + "step": 34298 + }, + { + "epoch": 0.6866151189850612, + "grad_norm": 1.1236933469772339, + "learning_rate": 2.361697100323719e-06, + "loss": 0.2997, + "step": 34299 + }, + { + "epoch": 0.6866351375021895, + "grad_norm": 1.139709234237671, + "learning_rate": 2.3614217268974765e-06, + "loss": 0.2962, + "step": 34300 + }, + { + "epoch": 0.6866551560193178, + "grad_norm": 1.1281269788742065, + "learning_rate": 2.36114636456325e-06, + "loss": 0.2786, + "step": 34301 + }, + { + "epoch": 0.6866751745364462, + "grad_norm": 1.1559535264968872, + "learning_rate": 2.3608710133221956e-06, + "loss": 0.3228, + "step": 34302 + }, + { + "epoch": 0.6866951930535745, + "grad_norm": 1.1250364780426025, + "learning_rate": 2.36059567317547e-06, + "loss": 0.2959, + "step": 34303 + }, + { + "epoch": 0.6867152115707029, + "grad_norm": 1.1849613189697266, + "learning_rate": 2.360320344124234e-06, + "loss": 0.3482, + "step": 34304 + }, + { + "epoch": 0.6867352300878312, + "grad_norm": 1.1109976768493652, + "learning_rate": 2.360045026169641e-06, + "loss": 0.3297, + "step": 34305 + }, + { + "epoch": 0.6867552486049596, + "grad_norm": 1.1210734844207764, + "learning_rate": 2.359769719312853e-06, + "loss": 0.355, + "step": 34306 + }, + { + "epoch": 0.6867752671220879, + "grad_norm": 1.3063018321990967, + "learning_rate": 2.359494423555023e-06, + "loss": 0.329, + "step": 34307 + }, + { + "epoch": 0.6867952856392163, + "grad_norm": 2.003551483154297, + "learning_rate": 2.359219138897312e-06, + "loss": 0.7027, + "step": 34308 + }, + { + "epoch": 0.6868153041563446, + "grad_norm": 1.0700985193252563, + "learning_rate": 2.3589438653408755e-06, + "loss": 0.278, + "step": 34309 + }, + { + "epoch": 0.6868353226734729, + "grad_norm": 1.1031759977340698, + "learning_rate": 2.35866860288687e-06, + "loss": 0.3048, + "step": 34310 + }, + { + "epoch": 0.6868553411906013, + "grad_norm": 1.311799168586731, + "learning_rate": 2.3583933515364538e-06, + "loss": 0.3168, + "step": 34311 + }, + { + "epoch": 0.6868753597077296, + "grad_norm": 1.0007786750793457, + "learning_rate": 2.3581181112907814e-06, + "loss": 0.2866, + "step": 34312 + }, + { + "epoch": 0.686895378224858, + "grad_norm": 1.2309184074401855, + "learning_rate": 2.357842882151014e-06, + "loss": 0.3071, + "step": 34313 + }, + { + "epoch": 0.6869153967419863, + "grad_norm": 1.2436974048614502, + "learning_rate": 2.3575676641183065e-06, + "loss": 0.3035, + "step": 34314 + }, + { + "epoch": 0.6869354152591147, + "grad_norm": 1.100347638130188, + "learning_rate": 2.357292457193816e-06, + "loss": 0.3067, + "step": 34315 + }, + { + "epoch": 0.686955433776243, + "grad_norm": 1.1016088724136353, + "learning_rate": 2.357017261378697e-06, + "loss": 0.3554, + "step": 34316 + }, + { + "epoch": 0.6869754522933713, + "grad_norm": 1.0375767946243286, + "learning_rate": 2.3567420766741107e-06, + "loss": 0.3138, + "step": 34317 + }, + { + "epoch": 0.6869954708104997, + "grad_norm": 1.071326494216919, + "learning_rate": 2.3564669030812103e-06, + "loss": 0.3005, + "step": 34318 + }, + { + "epoch": 0.687015489327628, + "grad_norm": 1.054716944694519, + "learning_rate": 2.356191740601156e-06, + "loss": 0.3054, + "step": 34319 + }, + { + "epoch": 0.6870355078447564, + "grad_norm": 1.0324732065200806, + "learning_rate": 2.355916589235102e-06, + "loss": 0.2519, + "step": 34320 + }, + { + "epoch": 0.6870555263618847, + "grad_norm": 1.3005051612854004, + "learning_rate": 2.3556414489842044e-06, + "loss": 0.2867, + "step": 34321 + }, + { + "epoch": 0.6870755448790131, + "grad_norm": 1.1416407823562622, + "learning_rate": 2.355366319849623e-06, + "loss": 0.3186, + "step": 34322 + }, + { + "epoch": 0.6870955633961414, + "grad_norm": 1.2313122749328613, + "learning_rate": 2.3550912018325117e-06, + "loss": 0.3297, + "step": 34323 + }, + { + "epoch": 0.6871155819132698, + "grad_norm": 1.0030938386917114, + "learning_rate": 2.3548160949340288e-06, + "loss": 0.2707, + "step": 34324 + }, + { + "epoch": 0.6871356004303981, + "grad_norm": 1.085056185722351, + "learning_rate": 2.354540999155327e-06, + "loss": 0.2742, + "step": 34325 + }, + { + "epoch": 0.6871556189475264, + "grad_norm": 1.0766847133636475, + "learning_rate": 2.354265914497568e-06, + "loss": 0.3176, + "step": 34326 + }, + { + "epoch": 0.6871756374646548, + "grad_norm": 1.1147946119308472, + "learning_rate": 2.3539908409619055e-06, + "loss": 0.2811, + "step": 34327 + }, + { + "epoch": 0.6871956559817831, + "grad_norm": 1.2711962461471558, + "learning_rate": 2.3537157785494964e-06, + "loss": 0.3562, + "step": 34328 + }, + { + "epoch": 0.6872156744989115, + "grad_norm": 1.269358515739441, + "learning_rate": 2.3534407272614946e-06, + "loss": 0.3082, + "step": 34329 + }, + { + "epoch": 0.6872356930160398, + "grad_norm": 1.180908441543579, + "learning_rate": 2.35316568709906e-06, + "loss": 0.3049, + "step": 34330 + }, + { + "epoch": 0.6872557115331682, + "grad_norm": 1.2803974151611328, + "learning_rate": 2.352890658063346e-06, + "loss": 0.3376, + "step": 34331 + }, + { + "epoch": 0.6872757300502965, + "grad_norm": 1.1427620649337769, + "learning_rate": 2.352615640155511e-06, + "loss": 0.2774, + "step": 34332 + }, + { + "epoch": 0.6872957485674248, + "grad_norm": 1.0553444623947144, + "learning_rate": 2.3523406333767114e-06, + "loss": 0.2916, + "step": 34333 + }, + { + "epoch": 0.6873157670845532, + "grad_norm": 1.2204649448394775, + "learning_rate": 2.3520656377280986e-06, + "loss": 0.3205, + "step": 34334 + }, + { + "epoch": 0.6873357856016815, + "grad_norm": 1.28317391872406, + "learning_rate": 2.351790653210835e-06, + "loss": 0.2312, + "step": 34335 + }, + { + "epoch": 0.6873558041188099, + "grad_norm": 1.0494136810302734, + "learning_rate": 2.3515156798260736e-06, + "loss": 0.2774, + "step": 34336 + }, + { + "epoch": 0.6873758226359382, + "grad_norm": 1.2745245695114136, + "learning_rate": 2.3512407175749696e-06, + "loss": 0.2963, + "step": 34337 + }, + { + "epoch": 0.6873958411530666, + "grad_norm": 1.750842809677124, + "learning_rate": 2.3509657664586782e-06, + "loss": 0.7298, + "step": 34338 + }, + { + "epoch": 0.6874158596701949, + "grad_norm": 1.2139536142349243, + "learning_rate": 2.3506908264783586e-06, + "loss": 0.2933, + "step": 34339 + }, + { + "epoch": 0.6874358781873233, + "grad_norm": 0.9612581133842468, + "learning_rate": 2.350415897635164e-06, + "loss": 0.2727, + "step": 34340 + }, + { + "epoch": 0.6874558967044516, + "grad_norm": 1.9126754999160767, + "learning_rate": 2.3501409799302517e-06, + "loss": 0.8076, + "step": 34341 + }, + { + "epoch": 0.6874759152215799, + "grad_norm": 1.0105617046356201, + "learning_rate": 2.3498660733647737e-06, + "loss": 0.2657, + "step": 34342 + }, + { + "epoch": 0.6874959337387083, + "grad_norm": 1.1142170429229736, + "learning_rate": 2.349591177939891e-06, + "loss": 0.3056, + "step": 34343 + }, + { + "epoch": 0.6875159522558366, + "grad_norm": 1.2016361951828003, + "learning_rate": 2.3493162936567544e-06, + "loss": 0.3155, + "step": 34344 + }, + { + "epoch": 0.687535970772965, + "grad_norm": 1.1292189359664917, + "learning_rate": 2.3490414205165234e-06, + "loss": 0.3139, + "step": 34345 + }, + { + "epoch": 0.6875559892900933, + "grad_norm": 1.1681545972824097, + "learning_rate": 2.348766558520352e-06, + "loss": 0.2995, + "step": 34346 + }, + { + "epoch": 0.6875760078072217, + "grad_norm": 1.0147780179977417, + "learning_rate": 2.3484917076693938e-06, + "loss": 0.3104, + "step": 34347 + }, + { + "epoch": 0.68759602632435, + "grad_norm": 1.0726009607315063, + "learning_rate": 2.348216867964807e-06, + "loss": 0.3261, + "step": 34348 + }, + { + "epoch": 0.6876160448414783, + "grad_norm": 1.1507266759872437, + "learning_rate": 2.347942039407747e-06, + "loss": 0.3252, + "step": 34349 + }, + { + "epoch": 0.6876360633586067, + "grad_norm": 1.8389497995376587, + "learning_rate": 2.3476672219993667e-06, + "loss": 0.7524, + "step": 34350 + }, + { + "epoch": 0.687656081875735, + "grad_norm": 1.1327534914016724, + "learning_rate": 2.347392415740823e-06, + "loss": 0.3259, + "step": 34351 + }, + { + "epoch": 0.6876761003928634, + "grad_norm": 1.0917526483535767, + "learning_rate": 2.3471176206332693e-06, + "loss": 0.2889, + "step": 34352 + }, + { + "epoch": 0.6876961189099917, + "grad_norm": 1.0274704694747925, + "learning_rate": 2.346842836677863e-06, + "loss": 0.2916, + "step": 34353 + }, + { + "epoch": 0.6877161374271201, + "grad_norm": 1.077998161315918, + "learning_rate": 2.34656806387576e-06, + "loss": 0.3041, + "step": 34354 + }, + { + "epoch": 0.6877361559442484, + "grad_norm": 1.0904297828674316, + "learning_rate": 2.346293302228111e-06, + "loss": 0.3277, + "step": 34355 + }, + { + "epoch": 0.6877561744613768, + "grad_norm": 1.0840482711791992, + "learning_rate": 2.3460185517360756e-06, + "loss": 0.2898, + "step": 34356 + }, + { + "epoch": 0.6877761929785051, + "grad_norm": 1.1992902755737305, + "learning_rate": 2.3457438124008058e-06, + "loss": 0.3198, + "step": 34357 + }, + { + "epoch": 0.6877962114956334, + "grad_norm": 1.0104612112045288, + "learning_rate": 2.3454690842234595e-06, + "loss": 0.2974, + "step": 34358 + }, + { + "epoch": 0.6878162300127618, + "grad_norm": 1.221749186515808, + "learning_rate": 2.3451943672051895e-06, + "loss": 0.3072, + "step": 34359 + }, + { + "epoch": 0.6878362485298901, + "grad_norm": 1.0143029689788818, + "learning_rate": 2.34491966134715e-06, + "loss": 0.2789, + "step": 34360 + }, + { + "epoch": 0.6878562670470185, + "grad_norm": 1.1868022680282593, + "learning_rate": 2.344644966650498e-06, + "loss": 0.3545, + "step": 34361 + }, + { + "epoch": 0.6878762855641468, + "grad_norm": 1.0408389568328857, + "learning_rate": 2.3443702831163872e-06, + "loss": 0.2706, + "step": 34362 + }, + { + "epoch": 0.6878963040812752, + "grad_norm": 1.190229058265686, + "learning_rate": 2.3440956107459723e-06, + "loss": 0.2094, + "step": 34363 + }, + { + "epoch": 0.6879163225984035, + "grad_norm": 1.0646814107894897, + "learning_rate": 2.343820949540408e-06, + "loss": 0.3079, + "step": 34364 + }, + { + "epoch": 0.6879363411155318, + "grad_norm": 1.0433586835861206, + "learning_rate": 2.3435462995008468e-06, + "loss": 0.3247, + "step": 34365 + }, + { + "epoch": 0.6879563596326602, + "grad_norm": 1.7998183965682983, + "learning_rate": 2.3432716606284474e-06, + "loss": 0.8069, + "step": 34366 + }, + { + "epoch": 0.6879763781497885, + "grad_norm": 1.092034935951233, + "learning_rate": 2.3429970329243625e-06, + "loss": 0.2698, + "step": 34367 + }, + { + "epoch": 0.6879963966669169, + "grad_norm": 1.0751739740371704, + "learning_rate": 2.342722416389744e-06, + "loss": 0.3062, + "step": 34368 + }, + { + "epoch": 0.6880164151840452, + "grad_norm": 1.0904033184051514, + "learning_rate": 2.3424478110257503e-06, + "loss": 0.2499, + "step": 34369 + }, + { + "epoch": 0.6880364337011736, + "grad_norm": 2.015404224395752, + "learning_rate": 2.342173216833532e-06, + "loss": 0.7941, + "step": 34370 + }, + { + "epoch": 0.6880564522183019, + "grad_norm": 1.1623204946517944, + "learning_rate": 2.341898633814248e-06, + "loss": 0.3216, + "step": 34371 + }, + { + "epoch": 0.6880764707354303, + "grad_norm": 1.3088334798812866, + "learning_rate": 2.34162406196905e-06, + "loss": 0.3157, + "step": 34372 + }, + { + "epoch": 0.6880964892525586, + "grad_norm": 1.97556471824646, + "learning_rate": 2.3413495012990922e-06, + "loss": 0.7469, + "step": 34373 + }, + { + "epoch": 0.6881165077696869, + "grad_norm": 1.1939830780029297, + "learning_rate": 2.341074951805527e-06, + "loss": 0.2939, + "step": 34374 + }, + { + "epoch": 0.6881365262868153, + "grad_norm": 1.1479754447937012, + "learning_rate": 2.3408004134895124e-06, + "loss": 0.3152, + "step": 34375 + }, + { + "epoch": 0.6881565448039436, + "grad_norm": 1.1100850105285645, + "learning_rate": 2.3405258863522005e-06, + "loss": 0.2922, + "step": 34376 + }, + { + "epoch": 0.688176563321072, + "grad_norm": 1.211273431777954, + "learning_rate": 2.340251370394746e-06, + "loss": 0.3013, + "step": 34377 + }, + { + "epoch": 0.6881965818382003, + "grad_norm": 1.011311650276184, + "learning_rate": 2.3399768656183005e-06, + "loss": 0.2627, + "step": 34378 + }, + { + "epoch": 0.6882166003553287, + "grad_norm": 1.0990861654281616, + "learning_rate": 2.339702372024021e-06, + "loss": 0.2498, + "step": 34379 + }, + { + "epoch": 0.688236618872457, + "grad_norm": 0.9837983846664429, + "learning_rate": 2.3394278896130603e-06, + "loss": 0.2756, + "step": 34380 + }, + { + "epoch": 0.6882566373895853, + "grad_norm": 1.1767597198486328, + "learning_rate": 2.3391534183865704e-06, + "loss": 0.2892, + "step": 34381 + }, + { + "epoch": 0.6882766559067137, + "grad_norm": 1.3027135133743286, + "learning_rate": 2.3388789583457087e-06, + "loss": 0.2725, + "step": 34382 + }, + { + "epoch": 0.688296674423842, + "grad_norm": 1.1994680166244507, + "learning_rate": 2.338604509491626e-06, + "loss": 0.2884, + "step": 34383 + }, + { + "epoch": 0.6883166929409704, + "grad_norm": 1.1542459726333618, + "learning_rate": 2.3383300718254785e-06, + "loss": 0.3146, + "step": 34384 + }, + { + "epoch": 0.6883367114580987, + "grad_norm": 1.1365742683410645, + "learning_rate": 2.3380556453484186e-06, + "loss": 0.3431, + "step": 34385 + }, + { + "epoch": 0.6883567299752271, + "grad_norm": 1.24608314037323, + "learning_rate": 2.3377812300616002e-06, + "loss": 0.327, + "step": 34386 + }, + { + "epoch": 0.6883767484923554, + "grad_norm": 1.1034893989562988, + "learning_rate": 2.3375068259661742e-06, + "loss": 0.2349, + "step": 34387 + }, + { + "epoch": 0.6883967670094838, + "grad_norm": 0.9947507977485657, + "learning_rate": 2.337232433063299e-06, + "loss": 0.3128, + "step": 34388 + }, + { + "epoch": 0.6884167855266121, + "grad_norm": 1.107848882675171, + "learning_rate": 2.3369580513541252e-06, + "loss": 0.2994, + "step": 34389 + }, + { + "epoch": 0.6884368040437404, + "grad_norm": 2.1023805141448975, + "learning_rate": 2.336683680839807e-06, + "loss": 0.7385, + "step": 34390 + }, + { + "epoch": 0.6884568225608688, + "grad_norm": 1.1972663402557373, + "learning_rate": 2.3364093215214958e-06, + "loss": 0.2969, + "step": 34391 + }, + { + "epoch": 0.6884768410779971, + "grad_norm": 1.2122712135314941, + "learning_rate": 2.3361349734003472e-06, + "loss": 0.2813, + "step": 34392 + }, + { + "epoch": 0.6884968595951255, + "grad_norm": 1.259863018989563, + "learning_rate": 2.335860636477515e-06, + "loss": 0.3047, + "step": 34393 + }, + { + "epoch": 0.6885168781122538, + "grad_norm": 1.273406982421875, + "learning_rate": 2.3355863107541492e-06, + "loss": 0.3611, + "step": 34394 + }, + { + "epoch": 0.6885368966293822, + "grad_norm": 2.0502121448516846, + "learning_rate": 2.3353119962314075e-06, + "loss": 0.794, + "step": 34395 + }, + { + "epoch": 0.6885569151465105, + "grad_norm": 1.1036601066589355, + "learning_rate": 2.3350376929104385e-06, + "loss": 0.3149, + "step": 34396 + }, + { + "epoch": 0.6885769336636388, + "grad_norm": 1.7937707901000977, + "learning_rate": 2.334763400792399e-06, + "loss": 0.7776, + "step": 34397 + }, + { + "epoch": 0.6885969521807672, + "grad_norm": 1.2531369924545288, + "learning_rate": 2.334489119878441e-06, + "loss": 0.3212, + "step": 34398 + }, + { + "epoch": 0.6886169706978955, + "grad_norm": 1.0742816925048828, + "learning_rate": 2.334214850169717e-06, + "loss": 0.2947, + "step": 34399 + }, + { + "epoch": 0.6886369892150239, + "grad_norm": 1.0131759643554688, + "learning_rate": 2.3339405916673787e-06, + "loss": 0.2746, + "step": 34400 + }, + { + "epoch": 0.6886570077321522, + "grad_norm": 1.0350968837738037, + "learning_rate": 2.3336663443725815e-06, + "loss": 0.3055, + "step": 34401 + }, + { + "epoch": 0.6886770262492806, + "grad_norm": 1.0659279823303223, + "learning_rate": 2.3333921082864773e-06, + "loss": 0.2837, + "step": 34402 + }, + { + "epoch": 0.6886970447664089, + "grad_norm": 1.0077691078186035, + "learning_rate": 2.3331178834102192e-06, + "loss": 0.2665, + "step": 34403 + }, + { + "epoch": 0.6887170632835373, + "grad_norm": 1.1695363521575928, + "learning_rate": 2.33284366974496e-06, + "loss": 0.3139, + "step": 34404 + }, + { + "epoch": 0.6887370818006656, + "grad_norm": 1.039252519607544, + "learning_rate": 2.3325694672918492e-06, + "loss": 0.2817, + "step": 34405 + }, + { + "epoch": 0.6887571003177939, + "grad_norm": 1.0335689783096313, + "learning_rate": 2.3322952760520446e-06, + "loss": 0.3178, + "step": 34406 + }, + { + "epoch": 0.6887771188349223, + "grad_norm": 1.14364492893219, + "learning_rate": 2.3320210960266944e-06, + "loss": 0.3226, + "step": 34407 + }, + { + "epoch": 0.6887971373520506, + "grad_norm": 1.0624281167984009, + "learning_rate": 2.3317469272169558e-06, + "loss": 0.246, + "step": 34408 + }, + { + "epoch": 0.688817155869179, + "grad_norm": 1.0794765949249268, + "learning_rate": 2.3314727696239764e-06, + "loss": 0.3358, + "step": 34409 + }, + { + "epoch": 0.6888371743863073, + "grad_norm": 1.0749032497406006, + "learning_rate": 2.3311986232489138e-06, + "loss": 0.3061, + "step": 34410 + }, + { + "epoch": 0.6888571929034357, + "grad_norm": 1.086198091506958, + "learning_rate": 2.3309244880929173e-06, + "loss": 0.2959, + "step": 34411 + }, + { + "epoch": 0.688877211420564, + "grad_norm": 0.9997239112854004, + "learning_rate": 2.3306503641571398e-06, + "loss": 0.2991, + "step": 34412 + }, + { + "epoch": 0.6888972299376923, + "grad_norm": 1.9701875448226929, + "learning_rate": 2.3303762514427324e-06, + "loss": 0.8155, + "step": 34413 + }, + { + "epoch": 0.6889172484548207, + "grad_norm": 1.3015785217285156, + "learning_rate": 2.3301021499508504e-06, + "loss": 0.3365, + "step": 34414 + }, + { + "epoch": 0.688937266971949, + "grad_norm": 1.2101304531097412, + "learning_rate": 2.329828059682644e-06, + "loss": 0.3175, + "step": 34415 + }, + { + "epoch": 0.6889572854890774, + "grad_norm": 1.0351905822753906, + "learning_rate": 2.3295539806392664e-06, + "loss": 0.2835, + "step": 34416 + }, + { + "epoch": 0.6889773040062057, + "grad_norm": 1.07584547996521, + "learning_rate": 2.3292799128218683e-06, + "loss": 0.2727, + "step": 34417 + }, + { + "epoch": 0.6889973225233341, + "grad_norm": 1.122065782546997, + "learning_rate": 2.3290058562316014e-06, + "loss": 0.3194, + "step": 34418 + }, + { + "epoch": 0.6890173410404624, + "grad_norm": 1.082243800163269, + "learning_rate": 2.328731810869621e-06, + "loss": 0.2865, + "step": 34419 + }, + { + "epoch": 0.6890373595575908, + "grad_norm": 1.0691275596618652, + "learning_rate": 2.3284577767370753e-06, + "loss": 0.3093, + "step": 34420 + }, + { + "epoch": 0.6890573780747191, + "grad_norm": 1.1140764951705933, + "learning_rate": 2.3281837538351205e-06, + "loss": 0.263, + "step": 34421 + }, + { + "epoch": 0.6890773965918474, + "grad_norm": 1.1484841108322144, + "learning_rate": 2.327909742164904e-06, + "loss": 0.3366, + "step": 34422 + }, + { + "epoch": 0.6890974151089758, + "grad_norm": 1.099825382232666, + "learning_rate": 2.3276357417275813e-06, + "loss": 0.2958, + "step": 34423 + }, + { + "epoch": 0.6891174336261041, + "grad_norm": 1.0939193964004517, + "learning_rate": 2.327361752524303e-06, + "loss": 0.2566, + "step": 34424 + }, + { + "epoch": 0.6891374521432325, + "grad_norm": 1.124563455581665, + "learning_rate": 2.3270877745562207e-06, + "loss": 0.3002, + "step": 34425 + }, + { + "epoch": 0.6891574706603608, + "grad_norm": 1.8426697254180908, + "learning_rate": 2.3268138078244867e-06, + "loss": 0.7263, + "step": 34426 + }, + { + "epoch": 0.6891774891774892, + "grad_norm": 1.1570693254470825, + "learning_rate": 2.3265398523302503e-06, + "loss": 0.2745, + "step": 34427 + }, + { + "epoch": 0.6891975076946175, + "grad_norm": 1.0722510814666748, + "learning_rate": 2.3262659080746664e-06, + "loss": 0.2845, + "step": 34428 + }, + { + "epoch": 0.6892175262117458, + "grad_norm": 1.1608798503875732, + "learning_rate": 2.3259919750588854e-06, + "loss": 0.3258, + "step": 34429 + }, + { + "epoch": 0.6892375447288742, + "grad_norm": 1.8157919645309448, + "learning_rate": 2.325718053284059e-06, + "loss": 0.7886, + "step": 34430 + }, + { + "epoch": 0.6892575632460025, + "grad_norm": 1.0621336698532104, + "learning_rate": 2.3254441427513365e-06, + "loss": 0.2865, + "step": 34431 + }, + { + "epoch": 0.6892775817631309, + "grad_norm": 1.164225459098816, + "learning_rate": 2.325170243461872e-06, + "loss": 0.3242, + "step": 34432 + }, + { + "epoch": 0.6892976002802592, + "grad_norm": 1.3791745901107788, + "learning_rate": 2.324896355416816e-06, + "loss": 0.2927, + "step": 34433 + }, + { + "epoch": 0.6893176187973876, + "grad_norm": 1.0210891962051392, + "learning_rate": 2.3246224786173203e-06, + "loss": 0.2721, + "step": 34434 + }, + { + "epoch": 0.6893376373145159, + "grad_norm": 1.3203643560409546, + "learning_rate": 2.324348613064537e-06, + "loss": 0.3411, + "step": 34435 + }, + { + "epoch": 0.6893576558316443, + "grad_norm": 1.1724679470062256, + "learning_rate": 2.324074758759614e-06, + "loss": 0.2897, + "step": 34436 + }, + { + "epoch": 0.6893776743487726, + "grad_norm": 0.9661492109298706, + "learning_rate": 2.323800915703707e-06, + "loss": 0.2841, + "step": 34437 + }, + { + "epoch": 0.6893976928659009, + "grad_norm": 2.0493171215057373, + "learning_rate": 2.323527083897965e-06, + "loss": 0.7699, + "step": 34438 + }, + { + "epoch": 0.6894177113830293, + "grad_norm": 1.027414321899414, + "learning_rate": 2.323253263343539e-06, + "loss": 0.3028, + "step": 34439 + }, + { + "epoch": 0.6894377299001576, + "grad_norm": 1.0355079174041748, + "learning_rate": 2.3229794540415786e-06, + "loss": 0.2787, + "step": 34440 + }, + { + "epoch": 0.689457748417286, + "grad_norm": 1.2610191106796265, + "learning_rate": 2.322705655993238e-06, + "loss": 0.2931, + "step": 34441 + }, + { + "epoch": 0.6894777669344143, + "grad_norm": 1.0526705980300903, + "learning_rate": 2.322431869199667e-06, + "loss": 0.2806, + "step": 34442 + }, + { + "epoch": 0.6894977854515427, + "grad_norm": 1.0170142650604248, + "learning_rate": 2.322158093662016e-06, + "loss": 0.2862, + "step": 34443 + }, + { + "epoch": 0.689517803968671, + "grad_norm": 1.9118815660476685, + "learning_rate": 2.321884329381434e-06, + "loss": 0.765, + "step": 34444 + }, + { + "epoch": 0.6895378224857993, + "grad_norm": 1.014122724533081, + "learning_rate": 2.321610576359076e-06, + "loss": 0.3084, + "step": 34445 + }, + { + "epoch": 0.6895578410029277, + "grad_norm": 1.0634489059448242, + "learning_rate": 2.32133683459609e-06, + "loss": 0.3041, + "step": 34446 + }, + { + "epoch": 0.689577859520056, + "grad_norm": 1.1871819496154785, + "learning_rate": 2.3210631040936277e-06, + "loss": 0.2918, + "step": 34447 + }, + { + "epoch": 0.6895978780371844, + "grad_norm": 1.2010096311569214, + "learning_rate": 2.32078938485284e-06, + "loss": 0.3359, + "step": 34448 + }, + { + "epoch": 0.6896178965543127, + "grad_norm": 1.093684434890747, + "learning_rate": 2.320515676874876e-06, + "loss": 0.2839, + "step": 34449 + }, + { + "epoch": 0.6896379150714411, + "grad_norm": 1.2590655088424683, + "learning_rate": 2.320241980160889e-06, + "loss": 0.3196, + "step": 34450 + }, + { + "epoch": 0.6896579335885694, + "grad_norm": 1.107224941253662, + "learning_rate": 2.319968294712027e-06, + "loss": 0.3174, + "step": 34451 + }, + { + "epoch": 0.6896779521056978, + "grad_norm": 1.0735660791397095, + "learning_rate": 2.319694620529443e-06, + "loss": 0.335, + "step": 34452 + }, + { + "epoch": 0.6896979706228261, + "grad_norm": 1.147456407546997, + "learning_rate": 2.3194209576142834e-06, + "loss": 0.2781, + "step": 34453 + }, + { + "epoch": 0.6897179891399544, + "grad_norm": 1.1715245246887207, + "learning_rate": 2.3191473059677033e-06, + "loss": 0.322, + "step": 34454 + }, + { + "epoch": 0.6897380076570828, + "grad_norm": 1.1763054132461548, + "learning_rate": 2.3188736655908512e-06, + "loss": 0.2396, + "step": 34455 + }, + { + "epoch": 0.6897580261742111, + "grad_norm": 1.0866812467575073, + "learning_rate": 2.3186000364848766e-06, + "loss": 0.3347, + "step": 34456 + }, + { + "epoch": 0.6897780446913395, + "grad_norm": 1.379059910774231, + "learning_rate": 2.318326418650929e-06, + "loss": 0.3058, + "step": 34457 + }, + { + "epoch": 0.6897980632084678, + "grad_norm": 1.1550965309143066, + "learning_rate": 2.318052812090162e-06, + "loss": 0.2941, + "step": 34458 + }, + { + "epoch": 0.6898180817255962, + "grad_norm": 1.080601692199707, + "learning_rate": 2.3177792168037217e-06, + "loss": 0.3287, + "step": 34459 + }, + { + "epoch": 0.6898381002427245, + "grad_norm": 1.1916781663894653, + "learning_rate": 2.3175056327927618e-06, + "loss": 0.3067, + "step": 34460 + }, + { + "epoch": 0.6898581187598528, + "grad_norm": 1.093978762626648, + "learning_rate": 2.317232060058432e-06, + "loss": 0.2648, + "step": 34461 + }, + { + "epoch": 0.6898781372769812, + "grad_norm": 1.1813926696777344, + "learning_rate": 2.3169584986018783e-06, + "loss": 0.3133, + "step": 34462 + }, + { + "epoch": 0.6898981557941095, + "grad_norm": 1.09052574634552, + "learning_rate": 2.3166849484242566e-06, + "loss": 0.3249, + "step": 34463 + }, + { + "epoch": 0.6899181743112379, + "grad_norm": 1.1142421960830688, + "learning_rate": 2.316411409526713e-06, + "loss": 0.2517, + "step": 34464 + }, + { + "epoch": 0.6899381928283662, + "grad_norm": 1.1203670501708984, + "learning_rate": 2.3161378819103985e-06, + "loss": 0.3147, + "step": 34465 + }, + { + "epoch": 0.6899582113454946, + "grad_norm": 1.9291367530822754, + "learning_rate": 2.3158643655764635e-06, + "loss": 0.7578, + "step": 34466 + }, + { + "epoch": 0.6899782298626229, + "grad_norm": 1.0482972860336304, + "learning_rate": 2.3155908605260547e-06, + "loss": 0.3114, + "step": 34467 + }, + { + "epoch": 0.6899982483797513, + "grad_norm": 1.7382172346115112, + "learning_rate": 2.3153173667603258e-06, + "loss": 0.7678, + "step": 34468 + }, + { + "epoch": 0.6900182668968796, + "grad_norm": 1.1646831035614014, + "learning_rate": 2.3150438842804253e-06, + "loss": 0.2451, + "step": 34469 + }, + { + "epoch": 0.6900382854140079, + "grad_norm": 1.191665768623352, + "learning_rate": 2.3147704130875e-06, + "loss": 0.3153, + "step": 34470 + }, + { + "epoch": 0.6900583039311363, + "grad_norm": 1.0907795429229736, + "learning_rate": 2.314496953182705e-06, + "loss": 0.3099, + "step": 34471 + }, + { + "epoch": 0.6900783224482646, + "grad_norm": 1.0309444665908813, + "learning_rate": 2.3142235045671846e-06, + "loss": 0.2714, + "step": 34472 + }, + { + "epoch": 0.690098340965393, + "grad_norm": 1.0644325017929077, + "learning_rate": 2.313950067242092e-06, + "loss": 0.2891, + "step": 34473 + }, + { + "epoch": 0.6901183594825213, + "grad_norm": 1.0309903621673584, + "learning_rate": 2.3136766412085758e-06, + "loss": 0.2858, + "step": 34474 + }, + { + "epoch": 0.6901383779996497, + "grad_norm": 1.1119633913040161, + "learning_rate": 2.3134032264677826e-06, + "loss": 0.2762, + "step": 34475 + }, + { + "epoch": 0.690158396516778, + "grad_norm": 1.1237781047821045, + "learning_rate": 2.3131298230208665e-06, + "loss": 0.3213, + "step": 34476 + }, + { + "epoch": 0.6901784150339063, + "grad_norm": 1.2353039979934692, + "learning_rate": 2.3128564308689737e-06, + "loss": 0.2476, + "step": 34477 + }, + { + "epoch": 0.6901984335510347, + "grad_norm": 1.116790533065796, + "learning_rate": 2.3125830500132546e-06, + "loss": 0.2674, + "step": 34478 + }, + { + "epoch": 0.690218452068163, + "grad_norm": 1.1440401077270508, + "learning_rate": 2.3123096804548582e-06, + "loss": 0.3278, + "step": 34479 + }, + { + "epoch": 0.6902384705852914, + "grad_norm": 1.175461769104004, + "learning_rate": 2.3120363221949314e-06, + "loss": 0.3117, + "step": 34480 + }, + { + "epoch": 0.6902584891024197, + "grad_norm": 1.833925724029541, + "learning_rate": 2.311762975234627e-06, + "loss": 0.8284, + "step": 34481 + }, + { + "epoch": 0.6902785076195481, + "grad_norm": 1.1418864727020264, + "learning_rate": 2.311489639575093e-06, + "loss": 0.335, + "step": 34482 + }, + { + "epoch": 0.6902985261366764, + "grad_norm": 1.1837499141693115, + "learning_rate": 2.311216315217476e-06, + "loss": 0.3105, + "step": 34483 + }, + { + "epoch": 0.6903185446538048, + "grad_norm": 1.0549484491348267, + "learning_rate": 2.3109430021629286e-06, + "loss": 0.3256, + "step": 34484 + }, + { + "epoch": 0.6903385631709331, + "grad_norm": 1.1229361295700073, + "learning_rate": 2.3106697004125965e-06, + "loss": 0.3095, + "step": 34485 + }, + { + "epoch": 0.6903585816880614, + "grad_norm": 1.1065250635147095, + "learning_rate": 2.310396409967632e-06, + "loss": 0.317, + "step": 34486 + }, + { + "epoch": 0.6903786002051898, + "grad_norm": 1.05098557472229, + "learning_rate": 2.3101231308291825e-06, + "loss": 0.2967, + "step": 34487 + }, + { + "epoch": 0.6903986187223181, + "grad_norm": 1.3124216794967651, + "learning_rate": 2.309849862998396e-06, + "loss": 0.3191, + "step": 34488 + }, + { + "epoch": 0.6904186372394465, + "grad_norm": 1.0429534912109375, + "learning_rate": 2.309576606476421e-06, + "loss": 0.2786, + "step": 34489 + }, + { + "epoch": 0.6904386557565748, + "grad_norm": 1.2484886646270752, + "learning_rate": 2.309303361264407e-06, + "loss": 0.3159, + "step": 34490 + }, + { + "epoch": 0.6904586742737032, + "grad_norm": 1.1274670362472534, + "learning_rate": 2.309030127363504e-06, + "loss": 0.2851, + "step": 34491 + }, + { + "epoch": 0.6904786927908315, + "grad_norm": 1.1932460069656372, + "learning_rate": 2.3087569047748587e-06, + "loss": 0.3095, + "step": 34492 + }, + { + "epoch": 0.6904987113079598, + "grad_norm": 1.046308159828186, + "learning_rate": 2.3084836934996186e-06, + "loss": 0.3018, + "step": 34493 + }, + { + "epoch": 0.6905187298250882, + "grad_norm": 1.0510072708129883, + "learning_rate": 2.3082104935389353e-06, + "loss": 0.2649, + "step": 34494 + }, + { + "epoch": 0.6905387483422165, + "grad_norm": 1.071917176246643, + "learning_rate": 2.307937304893956e-06, + "loss": 0.3094, + "step": 34495 + }, + { + "epoch": 0.6905587668593449, + "grad_norm": 1.0867124795913696, + "learning_rate": 2.3076641275658273e-06, + "loss": 0.3178, + "step": 34496 + }, + { + "epoch": 0.6905787853764732, + "grad_norm": 1.2423449754714966, + "learning_rate": 2.307390961555701e-06, + "loss": 0.3169, + "step": 34497 + }, + { + "epoch": 0.6905988038936016, + "grad_norm": 1.1924145221710205, + "learning_rate": 2.3071178068647223e-06, + "loss": 0.3002, + "step": 34498 + }, + { + "epoch": 0.6906188224107299, + "grad_norm": 1.0197036266326904, + "learning_rate": 2.3068446634940417e-06, + "loss": 0.2802, + "step": 34499 + }, + { + "epoch": 0.6906388409278583, + "grad_norm": 1.8655868768692017, + "learning_rate": 2.306571531444807e-06, + "loss": 0.8014, + "step": 34500 + }, + { + "epoch": 0.6906588594449866, + "grad_norm": 1.2831783294677734, + "learning_rate": 2.3062984107181656e-06, + "loss": 0.3725, + "step": 34501 + }, + { + "epoch": 0.6906788779621149, + "grad_norm": 1.9647318124771118, + "learning_rate": 2.3060253013152646e-06, + "loss": 0.7979, + "step": 34502 + }, + { + "epoch": 0.6906988964792433, + "grad_norm": 1.2562720775604248, + "learning_rate": 2.305752203237255e-06, + "loss": 0.3381, + "step": 34503 + }, + { + "epoch": 0.6907189149963716, + "grad_norm": 1.0920848846435547, + "learning_rate": 2.3054791164852836e-06, + "loss": 0.2428, + "step": 34504 + }, + { + "epoch": 0.6907389335135, + "grad_norm": 1.0187503099441528, + "learning_rate": 2.3052060410604982e-06, + "loss": 0.2857, + "step": 34505 + }, + { + "epoch": 0.6907589520306283, + "grad_norm": 1.0916311740875244, + "learning_rate": 2.3049329769640445e-06, + "loss": 0.3232, + "step": 34506 + }, + { + "epoch": 0.6907789705477567, + "grad_norm": 1.0773041248321533, + "learning_rate": 2.3046599241970745e-06, + "loss": 0.3, + "step": 34507 + }, + { + "epoch": 0.690798989064885, + "grad_norm": 1.1629225015640259, + "learning_rate": 2.3043868827607347e-06, + "loss": 0.3042, + "step": 34508 + }, + { + "epoch": 0.6908190075820133, + "grad_norm": 1.209718108177185, + "learning_rate": 2.3041138526561697e-06, + "loss": 0.3293, + "step": 34509 + }, + { + "epoch": 0.6908390260991417, + "grad_norm": 1.1333458423614502, + "learning_rate": 2.303840833884532e-06, + "loss": 0.3366, + "step": 34510 + }, + { + "epoch": 0.69085904461627, + "grad_norm": 1.417756199836731, + "learning_rate": 2.303567826446965e-06, + "loss": 0.2682, + "step": 34511 + }, + { + "epoch": 0.6908790631333984, + "grad_norm": 1.288772463798523, + "learning_rate": 2.303294830344621e-06, + "loss": 0.3151, + "step": 34512 + }, + { + "epoch": 0.6908990816505267, + "grad_norm": 1.1187019348144531, + "learning_rate": 2.3030218455786452e-06, + "loss": 0.2767, + "step": 34513 + }, + { + "epoch": 0.6909191001676551, + "grad_norm": 1.2028417587280273, + "learning_rate": 2.302748872150185e-06, + "loss": 0.3029, + "step": 34514 + }, + { + "epoch": 0.6909391186847834, + "grad_norm": 1.1768243312835693, + "learning_rate": 2.3024759100603867e-06, + "loss": 0.2874, + "step": 34515 + }, + { + "epoch": 0.6909591372019118, + "grad_norm": 1.1013076305389404, + "learning_rate": 2.3022029593104e-06, + "loss": 0.3118, + "step": 34516 + }, + { + "epoch": 0.6909791557190401, + "grad_norm": 0.9277241230010986, + "learning_rate": 2.3019300199013723e-06, + "loss": 0.2746, + "step": 34517 + }, + { + "epoch": 0.6909991742361684, + "grad_norm": 1.0632258653640747, + "learning_rate": 2.30165709183445e-06, + "loss": 0.2852, + "step": 34518 + }, + { + "epoch": 0.6910191927532968, + "grad_norm": 1.822160243988037, + "learning_rate": 2.301384175110781e-06, + "loss": 0.7645, + "step": 34519 + }, + { + "epoch": 0.6910392112704251, + "grad_norm": 1.101257562637329, + "learning_rate": 2.301111269731509e-06, + "loss": 0.2962, + "step": 34520 + }, + { + "epoch": 0.6910592297875535, + "grad_norm": 1.150826334953308, + "learning_rate": 2.3008383756977875e-06, + "loss": 0.3098, + "step": 34521 + }, + { + "epoch": 0.6910792483046818, + "grad_norm": 1.1519545316696167, + "learning_rate": 2.3005654930107583e-06, + "loss": 0.2725, + "step": 34522 + }, + { + "epoch": 0.6910992668218102, + "grad_norm": 1.1762245893478394, + "learning_rate": 2.3002926216715727e-06, + "loss": 0.3176, + "step": 34523 + }, + { + "epoch": 0.6911192853389385, + "grad_norm": 1.197274923324585, + "learning_rate": 2.300019761681374e-06, + "loss": 0.279, + "step": 34524 + }, + { + "epoch": 0.6911393038560668, + "grad_norm": 1.1269850730895996, + "learning_rate": 2.299746913041313e-06, + "loss": 0.3432, + "step": 34525 + }, + { + "epoch": 0.6911593223731952, + "grad_norm": 1.9710060358047485, + "learning_rate": 2.2994740757525352e-06, + "loss": 0.7559, + "step": 34526 + }, + { + "epoch": 0.6911793408903235, + "grad_norm": 1.2005386352539062, + "learning_rate": 2.299201249816187e-06, + "loss": 0.3, + "step": 34527 + }, + { + "epoch": 0.6911993594074519, + "grad_norm": 1.1080834865570068, + "learning_rate": 2.298928435233414e-06, + "loss": 0.2867, + "step": 34528 + }, + { + "epoch": 0.6912193779245802, + "grad_norm": 2.070120334625244, + "learning_rate": 2.298655632005366e-06, + "loss": 0.7336, + "step": 34529 + }, + { + "epoch": 0.6912393964417086, + "grad_norm": 1.1849000453948975, + "learning_rate": 2.2983828401331886e-06, + "loss": 0.2932, + "step": 34530 + }, + { + "epoch": 0.6912594149588369, + "grad_norm": 1.086455225944519, + "learning_rate": 2.2981100596180285e-06, + "loss": 0.2923, + "step": 34531 + }, + { + "epoch": 0.6912794334759653, + "grad_norm": 1.1004905700683594, + "learning_rate": 2.2978372904610318e-06, + "loss": 0.3112, + "step": 34532 + }, + { + "epoch": 0.6912994519930936, + "grad_norm": 1.2119498252868652, + "learning_rate": 2.2975645326633444e-06, + "loss": 0.3187, + "step": 34533 + }, + { + "epoch": 0.6913194705102219, + "grad_norm": 1.2279095649719238, + "learning_rate": 2.297291786226115e-06, + "loss": 0.303, + "step": 34534 + }, + { + "epoch": 0.6913394890273503, + "grad_norm": 1.7107261419296265, + "learning_rate": 2.2970190511504885e-06, + "loss": 0.7636, + "step": 34535 + }, + { + "epoch": 0.6913595075444786, + "grad_norm": 1.1745139360427856, + "learning_rate": 2.296746327437613e-06, + "loss": 0.2785, + "step": 34536 + }, + { + "epoch": 0.691379526061607, + "grad_norm": 1.1370384693145752, + "learning_rate": 2.2964736150886336e-06, + "loss": 0.2985, + "step": 34537 + }, + { + "epoch": 0.6913995445787353, + "grad_norm": 1.1435210704803467, + "learning_rate": 2.2962009141046986e-06, + "loss": 0.2913, + "step": 34538 + }, + { + "epoch": 0.6914195630958637, + "grad_norm": 1.0790568590164185, + "learning_rate": 2.295928224486953e-06, + "loss": 0.2772, + "step": 34539 + }, + { + "epoch": 0.691439581612992, + "grad_norm": 1.22184157371521, + "learning_rate": 2.295655546236543e-06, + "loss": 0.3043, + "step": 34540 + }, + { + "epoch": 0.6914596001301203, + "grad_norm": 1.231421709060669, + "learning_rate": 2.2953828793546158e-06, + "loss": 0.3261, + "step": 34541 + }, + { + "epoch": 0.6914796186472487, + "grad_norm": 1.4454247951507568, + "learning_rate": 2.2951102238423143e-06, + "loss": 0.3374, + "step": 34542 + }, + { + "epoch": 0.691499637164377, + "grad_norm": 1.1421732902526855, + "learning_rate": 2.29483757970079e-06, + "loss": 0.3412, + "step": 34543 + }, + { + "epoch": 0.6915196556815054, + "grad_norm": 1.8197485208511353, + "learning_rate": 2.2945649469311854e-06, + "loss": 0.769, + "step": 34544 + }, + { + "epoch": 0.6915396741986337, + "grad_norm": 1.117677927017212, + "learning_rate": 2.2942923255346483e-06, + "loss": 0.3045, + "step": 34545 + }, + { + "epoch": 0.6915596927157621, + "grad_norm": 1.115544080734253, + "learning_rate": 2.2940197155123215e-06, + "loss": 0.3038, + "step": 34546 + }, + { + "epoch": 0.6915797112328904, + "grad_norm": 1.1495418548583984, + "learning_rate": 2.293747116865356e-06, + "loss": 0.2671, + "step": 34547 + }, + { + "epoch": 0.6915997297500188, + "grad_norm": 1.0790883302688599, + "learning_rate": 2.293474529594893e-06, + "loss": 0.2812, + "step": 34548 + }, + { + "epoch": 0.6916197482671471, + "grad_norm": 1.1174105405807495, + "learning_rate": 2.293201953702083e-06, + "loss": 0.3568, + "step": 34549 + }, + { + "epoch": 0.6916397667842754, + "grad_norm": 1.2315633296966553, + "learning_rate": 2.2929293891880687e-06, + "loss": 0.2957, + "step": 34550 + }, + { + "epoch": 0.6916597853014038, + "grad_norm": 1.0167360305786133, + "learning_rate": 2.292656836053995e-06, + "loss": 0.2653, + "step": 34551 + }, + { + "epoch": 0.6916798038185321, + "grad_norm": 1.0298023223876953, + "learning_rate": 2.2923842943010116e-06, + "loss": 0.3219, + "step": 34552 + }, + { + "epoch": 0.6916998223356605, + "grad_norm": 1.900756597518921, + "learning_rate": 2.2921117639302616e-06, + "loss": 0.7644, + "step": 34553 + }, + { + "epoch": 0.6917198408527888, + "grad_norm": 1.211761236190796, + "learning_rate": 2.291839244942891e-06, + "loss": 0.2992, + "step": 34554 + }, + { + "epoch": 0.6917398593699172, + "grad_norm": 1.0650428533554077, + "learning_rate": 2.2915667373400436e-06, + "loss": 0.2849, + "step": 34555 + }, + { + "epoch": 0.6917598778870455, + "grad_norm": 1.0505839586257935, + "learning_rate": 2.291294241122869e-06, + "loss": 0.2894, + "step": 34556 + }, + { + "epoch": 0.6917798964041738, + "grad_norm": 1.0687824487686157, + "learning_rate": 2.2910217562925096e-06, + "loss": 0.3487, + "step": 34557 + }, + { + "epoch": 0.6917999149213022, + "grad_norm": 1.0865098237991333, + "learning_rate": 2.2907492828501125e-06, + "loss": 0.3219, + "step": 34558 + }, + { + "epoch": 0.6918199334384305, + "grad_norm": 1.1236897706985474, + "learning_rate": 2.2904768207968205e-06, + "loss": 0.2946, + "step": 34559 + }, + { + "epoch": 0.6918399519555589, + "grad_norm": 1.2319031953811646, + "learning_rate": 2.290204370133782e-06, + "loss": 0.2798, + "step": 34560 + }, + { + "epoch": 0.6918599704726872, + "grad_norm": 1.4235349893569946, + "learning_rate": 2.2899319308621405e-06, + "loss": 0.2761, + "step": 34561 + }, + { + "epoch": 0.6918799889898156, + "grad_norm": 1.2075114250183105, + "learning_rate": 2.2896595029830433e-06, + "loss": 0.3426, + "step": 34562 + }, + { + "epoch": 0.6919000075069439, + "grad_norm": 1.1171833276748657, + "learning_rate": 2.2893870864976336e-06, + "loss": 0.3196, + "step": 34563 + }, + { + "epoch": 0.6919200260240723, + "grad_norm": 1.2422661781311035, + "learning_rate": 2.2891146814070565e-06, + "loss": 0.33, + "step": 34564 + }, + { + "epoch": 0.6919400445412006, + "grad_norm": 1.0212957859039307, + "learning_rate": 2.2888422877124587e-06, + "loss": 0.3029, + "step": 34565 + }, + { + "epoch": 0.6919600630583289, + "grad_norm": 1.2269833087921143, + "learning_rate": 2.288569905414985e-06, + "loss": 0.2823, + "step": 34566 + }, + { + "epoch": 0.6919800815754573, + "grad_norm": 1.0633269548416138, + "learning_rate": 2.28829753451578e-06, + "loss": 0.2974, + "step": 34567 + }, + { + "epoch": 0.6920001000925856, + "grad_norm": 1.1883846521377563, + "learning_rate": 2.2880251750159864e-06, + "loss": 0.284, + "step": 34568 + }, + { + "epoch": 0.692020118609714, + "grad_norm": 1.0434925556182861, + "learning_rate": 2.2877528269167536e-06, + "loss": 0.2985, + "step": 34569 + }, + { + "epoch": 0.6920401371268423, + "grad_norm": 1.0759321451187134, + "learning_rate": 2.2874804902192243e-06, + "loss": 0.2754, + "step": 34570 + }, + { + "epoch": 0.6920601556439707, + "grad_norm": 1.0703909397125244, + "learning_rate": 2.287208164924543e-06, + "loss": 0.3016, + "step": 34571 + }, + { + "epoch": 0.692080174161099, + "grad_norm": 1.1531336307525635, + "learning_rate": 2.2869358510338544e-06, + "loss": 0.2773, + "step": 34572 + }, + { + "epoch": 0.6921001926782273, + "grad_norm": 1.8825843334197998, + "learning_rate": 2.2866635485483026e-06, + "loss": 0.7714, + "step": 34573 + }, + { + "epoch": 0.6921202111953557, + "grad_norm": 1.177349328994751, + "learning_rate": 2.286391257469033e-06, + "loss": 0.2666, + "step": 34574 + }, + { + "epoch": 0.692140229712484, + "grad_norm": 1.1257898807525635, + "learning_rate": 2.286118977797192e-06, + "loss": 0.2791, + "step": 34575 + }, + { + "epoch": 0.6921602482296124, + "grad_norm": 1.300689458847046, + "learning_rate": 2.285846709533923e-06, + "loss": 0.2757, + "step": 34576 + }, + { + "epoch": 0.6921802667467407, + "grad_norm": 1.1122573614120483, + "learning_rate": 2.2855744526803693e-06, + "loss": 0.2747, + "step": 34577 + }, + { + "epoch": 0.6922002852638691, + "grad_norm": 1.1978065967559814, + "learning_rate": 2.2853022072376775e-06, + "loss": 0.3063, + "step": 34578 + }, + { + "epoch": 0.6922203037809974, + "grad_norm": 1.9656236171722412, + "learning_rate": 2.2850299732069906e-06, + "loss": 0.7872, + "step": 34579 + }, + { + "epoch": 0.6922403222981258, + "grad_norm": 0.9899279475212097, + "learning_rate": 2.284757750589454e-06, + "loss": 0.2732, + "step": 34580 + }, + { + "epoch": 0.6922603408152541, + "grad_norm": 1.0793837308883667, + "learning_rate": 2.2844855393862115e-06, + "loss": 0.2891, + "step": 34581 + }, + { + "epoch": 0.6922803593323824, + "grad_norm": 1.1035739183425903, + "learning_rate": 2.2842133395984058e-06, + "loss": 0.2856, + "step": 34582 + }, + { + "epoch": 0.6923003778495108, + "grad_norm": 1.9771088361740112, + "learning_rate": 2.283941151227184e-06, + "loss": 0.8408, + "step": 34583 + }, + { + "epoch": 0.6923203963666391, + "grad_norm": 1.181270956993103, + "learning_rate": 2.2836689742736896e-06, + "loss": 0.3372, + "step": 34584 + }, + { + "epoch": 0.6923404148837675, + "grad_norm": 1.1293668746948242, + "learning_rate": 2.283396808739066e-06, + "loss": 0.3193, + "step": 34585 + }, + { + "epoch": 0.6923604334008958, + "grad_norm": 1.0512953996658325, + "learning_rate": 2.2831246546244556e-06, + "loss": 0.3002, + "step": 34586 + }, + { + "epoch": 0.6923804519180242, + "grad_norm": 1.232964038848877, + "learning_rate": 2.282852511931007e-06, + "loss": 0.2958, + "step": 34587 + }, + { + "epoch": 0.6924004704351525, + "grad_norm": 1.9057445526123047, + "learning_rate": 2.282580380659859e-06, + "loss": 0.8031, + "step": 34588 + }, + { + "epoch": 0.6924204889522808, + "grad_norm": 1.1788541078567505, + "learning_rate": 2.282308260812161e-06, + "loss": 0.2742, + "step": 34589 + }, + { + "epoch": 0.6924405074694092, + "grad_norm": 1.9434486627578735, + "learning_rate": 2.2820361523890517e-06, + "loss": 0.8158, + "step": 34590 + }, + { + "epoch": 0.6924605259865375, + "grad_norm": 1.3321398496627808, + "learning_rate": 2.28176405539168e-06, + "loss": 0.3434, + "step": 34591 + }, + { + "epoch": 0.6924805445036659, + "grad_norm": 1.186908483505249, + "learning_rate": 2.2814919698211862e-06, + "loss": 0.3234, + "step": 34592 + }, + { + "epoch": 0.6925005630207942, + "grad_norm": 1.191133975982666, + "learning_rate": 2.2812198956787158e-06, + "loss": 0.3233, + "step": 34593 + }, + { + "epoch": 0.6925205815379226, + "grad_norm": 1.7650902271270752, + "learning_rate": 2.280947832965412e-06, + "loss": 0.7227, + "step": 34594 + }, + { + "epoch": 0.6925406000550509, + "grad_norm": 1.139663815498352, + "learning_rate": 2.2806757816824165e-06, + "loss": 0.3286, + "step": 34595 + }, + { + "epoch": 0.6925606185721793, + "grad_norm": 1.0251818895339966, + "learning_rate": 2.280403741830876e-06, + "loss": 0.3032, + "step": 34596 + }, + { + "epoch": 0.6925806370893076, + "grad_norm": 1.1321784257888794, + "learning_rate": 2.280131713411934e-06, + "loss": 0.3288, + "step": 34597 + }, + { + "epoch": 0.6926006556064359, + "grad_norm": 1.953081488609314, + "learning_rate": 2.279859696426732e-06, + "loss": 0.8157, + "step": 34598 + }, + { + "epoch": 0.6926206741235643, + "grad_norm": 1.7983849048614502, + "learning_rate": 2.279587690876413e-06, + "loss": 0.774, + "step": 34599 + }, + { + "epoch": 0.6926406926406926, + "grad_norm": 0.9941322803497314, + "learning_rate": 2.279315696762124e-06, + "loss": 0.343, + "step": 34600 + }, + { + "epoch": 0.692660711157821, + "grad_norm": 1.2093324661254883, + "learning_rate": 2.2790437140850037e-06, + "loss": 0.3252, + "step": 34601 + }, + { + "epoch": 0.6926807296749493, + "grad_norm": 1.1487653255462646, + "learning_rate": 2.2787717428462e-06, + "loss": 0.3551, + "step": 34602 + }, + { + "epoch": 0.6927007481920777, + "grad_norm": 1.0993977785110474, + "learning_rate": 2.2784997830468554e-06, + "loss": 0.3142, + "step": 34603 + }, + { + "epoch": 0.692720766709206, + "grad_norm": 1.906056523323059, + "learning_rate": 2.2782278346881093e-06, + "loss": 0.7153, + "step": 34604 + }, + { + "epoch": 0.6927407852263343, + "grad_norm": 1.833166241645813, + "learning_rate": 2.2779558977711096e-06, + "loss": 0.7903, + "step": 34605 + }, + { + "epoch": 0.6927608037434627, + "grad_norm": 1.12735915184021, + "learning_rate": 2.2776839722969974e-06, + "loss": 0.3117, + "step": 34606 + }, + { + "epoch": 0.692780822260591, + "grad_norm": 1.13661789894104, + "learning_rate": 2.277412058266916e-06, + "loss": 0.2614, + "step": 34607 + }, + { + "epoch": 0.6928008407777194, + "grad_norm": 1.8493821620941162, + "learning_rate": 2.277140155682007e-06, + "loss": 0.766, + "step": 34608 + }, + { + "epoch": 0.6928208592948477, + "grad_norm": 1.219118595123291, + "learning_rate": 2.2768682645434164e-06, + "loss": 0.2818, + "step": 34609 + }, + { + "epoch": 0.6928408778119761, + "grad_norm": 1.053360939025879, + "learning_rate": 2.2765963848522853e-06, + "loss": 0.3201, + "step": 34610 + }, + { + "epoch": 0.6928608963291044, + "grad_norm": 1.315760612487793, + "learning_rate": 2.2763245166097575e-06, + "loss": 0.2761, + "step": 34611 + }, + { + "epoch": 0.6928809148462328, + "grad_norm": 1.0941179990768433, + "learning_rate": 2.276052659816973e-06, + "loss": 0.2934, + "step": 34612 + }, + { + "epoch": 0.6929009333633611, + "grad_norm": 1.0062580108642578, + "learning_rate": 2.2757808144750787e-06, + "loss": 0.271, + "step": 34613 + }, + { + "epoch": 0.6929209518804894, + "grad_norm": 1.8969950675964355, + "learning_rate": 2.275508980585214e-06, + "loss": 0.7442, + "step": 34614 + }, + { + "epoch": 0.6929409703976178, + "grad_norm": 1.088290810585022, + "learning_rate": 2.275237158148525e-06, + "loss": 0.2822, + "step": 34615 + }, + { + "epoch": 0.6929609889147461, + "grad_norm": 1.1429768800735474, + "learning_rate": 2.2749653471661527e-06, + "loss": 0.2696, + "step": 34616 + }, + { + "epoch": 0.6929810074318745, + "grad_norm": 1.179384708404541, + "learning_rate": 2.274693547639237e-06, + "loss": 0.3211, + "step": 34617 + }, + { + "epoch": 0.6930010259490028, + "grad_norm": 0.9751327037811279, + "learning_rate": 2.2744217595689262e-06, + "loss": 0.2614, + "step": 34618 + }, + { + "epoch": 0.6930210444661312, + "grad_norm": 1.0772088766098022, + "learning_rate": 2.274149982956359e-06, + "loss": 0.2968, + "step": 34619 + }, + { + "epoch": 0.6930410629832595, + "grad_norm": 1.1218113899230957, + "learning_rate": 2.273878217802679e-06, + "loss": 0.3335, + "step": 34620 + }, + { + "epoch": 0.6930610815003878, + "grad_norm": 1.242597222328186, + "learning_rate": 2.273606464109026e-06, + "loss": 0.2924, + "step": 34621 + }, + { + "epoch": 0.6930811000175162, + "grad_norm": 1.146025538444519, + "learning_rate": 2.2733347218765473e-06, + "loss": 0.3124, + "step": 34622 + }, + { + "epoch": 0.6931011185346445, + "grad_norm": 1.2623971700668335, + "learning_rate": 2.2730629911063823e-06, + "loss": 0.3191, + "step": 34623 + }, + { + "epoch": 0.6931211370517729, + "grad_norm": 1.1682567596435547, + "learning_rate": 2.2727912717996735e-06, + "loss": 0.303, + "step": 34624 + }, + { + "epoch": 0.6931411555689012, + "grad_norm": 2.033172845840454, + "learning_rate": 2.2725195639575616e-06, + "loss": 0.6702, + "step": 34625 + }, + { + "epoch": 0.6931611740860296, + "grad_norm": 1.2070472240447998, + "learning_rate": 2.2722478675811922e-06, + "loss": 0.2921, + "step": 34626 + }, + { + "epoch": 0.6931811926031579, + "grad_norm": 1.1136113405227661, + "learning_rate": 2.271976182671704e-06, + "loss": 0.3354, + "step": 34627 + }, + { + "epoch": 0.6932012111202863, + "grad_norm": 1.406574010848999, + "learning_rate": 2.2717045092302426e-06, + "loss": 0.3247, + "step": 34628 + }, + { + "epoch": 0.6932212296374146, + "grad_norm": 1.0404951572418213, + "learning_rate": 2.2714328472579484e-06, + "loss": 0.276, + "step": 34629 + }, + { + "epoch": 0.6932412481545429, + "grad_norm": 2.0039613246917725, + "learning_rate": 2.2711611967559615e-06, + "loss": 0.7953, + "step": 34630 + }, + { + "epoch": 0.6932612666716713, + "grad_norm": 1.1295968294143677, + "learning_rate": 2.2708895577254277e-06, + "loss": 0.3245, + "step": 34631 + }, + { + "epoch": 0.6932812851887996, + "grad_norm": 1.2012913227081299, + "learning_rate": 2.2706179301674863e-06, + "loss": 0.3074, + "step": 34632 + }, + { + "epoch": 0.693301303705928, + "grad_norm": 1.122698426246643, + "learning_rate": 2.27034631408328e-06, + "loss": 0.314, + "step": 34633 + }, + { + "epoch": 0.6933213222230563, + "grad_norm": 1.0887848138809204, + "learning_rate": 2.2700747094739506e-06, + "loss": 0.3102, + "step": 34634 + }, + { + "epoch": 0.6933413407401847, + "grad_norm": 1.1057617664337158, + "learning_rate": 2.2698031163406374e-06, + "loss": 0.2916, + "step": 34635 + }, + { + "epoch": 0.693361359257313, + "grad_norm": 1.056717872619629, + "learning_rate": 2.2695315346844863e-06, + "loss": 0.2956, + "step": 34636 + }, + { + "epoch": 0.6933813777744413, + "grad_norm": 1.0323305130004883, + "learning_rate": 2.2692599645066376e-06, + "loss": 0.2505, + "step": 34637 + }, + { + "epoch": 0.6934013962915697, + "grad_norm": 1.046966314315796, + "learning_rate": 2.26898840580823e-06, + "loss": 0.3077, + "step": 34638 + }, + { + "epoch": 0.693421414808698, + "grad_norm": 1.1768430471420288, + "learning_rate": 2.2687168585904094e-06, + "loss": 0.3253, + "step": 34639 + }, + { + "epoch": 0.6934414333258264, + "grad_norm": 1.1872172355651855, + "learning_rate": 2.2684453228543137e-06, + "loss": 0.3161, + "step": 34640 + }, + { + "epoch": 0.6934614518429547, + "grad_norm": 1.0105582475662231, + "learning_rate": 2.268173798601087e-06, + "loss": 0.2942, + "step": 34641 + }, + { + "epoch": 0.6934814703600831, + "grad_norm": 1.3560036420822144, + "learning_rate": 2.2679022858318707e-06, + "loss": 0.303, + "step": 34642 + }, + { + "epoch": 0.6935014888772114, + "grad_norm": 1.0068241357803345, + "learning_rate": 2.267630784547805e-06, + "loss": 0.2742, + "step": 34643 + }, + { + "epoch": 0.6935215073943397, + "grad_norm": 1.0491142272949219, + "learning_rate": 2.2673592947500295e-06, + "loss": 0.3272, + "step": 34644 + }, + { + "epoch": 0.6935415259114681, + "grad_norm": 1.0129108428955078, + "learning_rate": 2.2670878164396894e-06, + "loss": 0.2875, + "step": 34645 + }, + { + "epoch": 0.6935615444285964, + "grad_norm": 1.1939889192581177, + "learning_rate": 2.266816349617924e-06, + "loss": 0.3982, + "step": 34646 + }, + { + "epoch": 0.6935815629457248, + "grad_norm": 1.0632448196411133, + "learning_rate": 2.2665448942858743e-06, + "loss": 0.2355, + "step": 34647 + }, + { + "epoch": 0.6936015814628531, + "grad_norm": 1.0873589515686035, + "learning_rate": 2.26627345044468e-06, + "loss": 0.2926, + "step": 34648 + }, + { + "epoch": 0.6936215999799815, + "grad_norm": 1.1202974319458008, + "learning_rate": 2.2660020180954855e-06, + "loss": 0.3139, + "step": 34649 + }, + { + "epoch": 0.6936416184971098, + "grad_norm": 1.2665081024169922, + "learning_rate": 2.26573059723943e-06, + "loss": 0.3236, + "step": 34650 + }, + { + "epoch": 0.6936616370142382, + "grad_norm": 1.1286640167236328, + "learning_rate": 2.2654591878776532e-06, + "loss": 0.342, + "step": 34651 + }, + { + "epoch": 0.6936816555313665, + "grad_norm": 1.1237666606903076, + "learning_rate": 2.265187790011299e-06, + "loss": 0.2801, + "step": 34652 + }, + { + "epoch": 0.6937016740484948, + "grad_norm": 1.0690934658050537, + "learning_rate": 2.264916403641505e-06, + "loss": 0.2655, + "step": 34653 + }, + { + "epoch": 0.6937216925656232, + "grad_norm": 1.225121259689331, + "learning_rate": 2.264645028769416e-06, + "loss": 0.3343, + "step": 34654 + }, + { + "epoch": 0.6937417110827515, + "grad_norm": 1.1732627153396606, + "learning_rate": 2.26437366539617e-06, + "loss": 0.2996, + "step": 34655 + }, + { + "epoch": 0.6937617295998799, + "grad_norm": 1.1281859874725342, + "learning_rate": 2.264102313522909e-06, + "loss": 0.2969, + "step": 34656 + }, + { + "epoch": 0.6937817481170082, + "grad_norm": 1.0911757946014404, + "learning_rate": 2.2638309731507713e-06, + "loss": 0.3205, + "step": 34657 + }, + { + "epoch": 0.6938017666341366, + "grad_norm": 1.235031247138977, + "learning_rate": 2.263559644280901e-06, + "loss": 0.2467, + "step": 34658 + }, + { + "epoch": 0.6938217851512649, + "grad_norm": 1.9126546382904053, + "learning_rate": 2.2632883269144367e-06, + "loss": 0.7369, + "step": 34659 + }, + { + "epoch": 0.6938418036683932, + "grad_norm": 1.2670681476593018, + "learning_rate": 2.26301702105252e-06, + "loss": 0.325, + "step": 34660 + }, + { + "epoch": 0.6938618221855216, + "grad_norm": 1.2324203252792358, + "learning_rate": 2.262745726696288e-06, + "loss": 0.3031, + "step": 34661 + }, + { + "epoch": 0.6938818407026499, + "grad_norm": 1.0297529697418213, + "learning_rate": 2.2624744438468865e-06, + "loss": 0.2731, + "step": 34662 + }, + { + "epoch": 0.6939018592197783, + "grad_norm": 1.3792837858200073, + "learning_rate": 2.262203172505453e-06, + "loss": 0.3623, + "step": 34663 + }, + { + "epoch": 0.6939218777369066, + "grad_norm": 1.0367543697357178, + "learning_rate": 2.2619319126731265e-06, + "loss": 0.2948, + "step": 34664 + }, + { + "epoch": 0.693941896254035, + "grad_norm": 1.3527511358261108, + "learning_rate": 2.2616606643510506e-06, + "loss": 0.3155, + "step": 34665 + }, + { + "epoch": 0.6939619147711633, + "grad_norm": 1.0632678270339966, + "learning_rate": 2.261389427540363e-06, + "loss": 0.3062, + "step": 34666 + }, + { + "epoch": 0.6939819332882917, + "grad_norm": 1.2779711484909058, + "learning_rate": 2.2611182022422057e-06, + "loss": 0.272, + "step": 34667 + }, + { + "epoch": 0.69400195180542, + "grad_norm": 1.134426236152649, + "learning_rate": 2.260846988457718e-06, + "loss": 0.3041, + "step": 34668 + }, + { + "epoch": 0.6940219703225483, + "grad_norm": 1.919685959815979, + "learning_rate": 2.2605757861880414e-06, + "loss": 0.7312, + "step": 34669 + }, + { + "epoch": 0.6940419888396767, + "grad_norm": 1.8845938444137573, + "learning_rate": 2.2603045954343115e-06, + "loss": 0.7588, + "step": 34670 + }, + { + "epoch": 0.694062007356805, + "grad_norm": 1.0104724168777466, + "learning_rate": 2.260033416197674e-06, + "loss": 0.3012, + "step": 34671 + }, + { + "epoch": 0.6940820258739334, + "grad_norm": 1.1475187540054321, + "learning_rate": 2.259762248479266e-06, + "loss": 0.2538, + "step": 34672 + }, + { + "epoch": 0.6941020443910617, + "grad_norm": 1.0991700887680054, + "learning_rate": 2.259491092280228e-06, + "loss": 0.2791, + "step": 34673 + }, + { + "epoch": 0.6941220629081901, + "grad_norm": 1.9150338172912598, + "learning_rate": 2.259219947601698e-06, + "loss": 0.7816, + "step": 34674 + }, + { + "epoch": 0.6941420814253184, + "grad_norm": 1.0806190967559814, + "learning_rate": 2.258948814444819e-06, + "loss": 0.2367, + "step": 34675 + }, + { + "epoch": 0.6941620999424467, + "grad_norm": 1.112526774406433, + "learning_rate": 2.2586776928107296e-06, + "loss": 0.3094, + "step": 34676 + }, + { + "epoch": 0.6941821184595751, + "grad_norm": 1.2547944784164429, + "learning_rate": 2.2584065827005668e-06, + "loss": 0.3315, + "step": 34677 + }, + { + "epoch": 0.6942021369767034, + "grad_norm": 1.7863965034484863, + "learning_rate": 2.258135484115475e-06, + "loss": 0.8161, + "step": 34678 + }, + { + "epoch": 0.6942221554938318, + "grad_norm": 1.0739555358886719, + "learning_rate": 2.25786439705659e-06, + "loss": 0.3003, + "step": 34679 + }, + { + "epoch": 0.6942421740109601, + "grad_norm": 1.1333189010620117, + "learning_rate": 2.2575933215250546e-06, + "loss": 0.3219, + "step": 34680 + }, + { + "epoch": 0.6942621925280885, + "grad_norm": 1.1482791900634766, + "learning_rate": 2.2573222575220066e-06, + "loss": 0.2867, + "step": 34681 + }, + { + "epoch": 0.6942822110452168, + "grad_norm": 1.2714369297027588, + "learning_rate": 2.2570512050485853e-06, + "loss": 0.3012, + "step": 34682 + }, + { + "epoch": 0.6943022295623452, + "grad_norm": 1.6163034439086914, + "learning_rate": 2.2567801641059293e-06, + "loss": 0.3245, + "step": 34683 + }, + { + "epoch": 0.6943222480794735, + "grad_norm": 2.1547060012817383, + "learning_rate": 2.2565091346951805e-06, + "loss": 0.7573, + "step": 34684 + }, + { + "epoch": 0.6943422665966018, + "grad_norm": 1.0609036684036255, + "learning_rate": 2.2562381168174773e-06, + "loss": 0.2829, + "step": 34685 + }, + { + "epoch": 0.6943622851137302, + "grad_norm": 1.1621581315994263, + "learning_rate": 2.255967110473958e-06, + "loss": 0.2891, + "step": 34686 + }, + { + "epoch": 0.6943823036308585, + "grad_norm": 1.2835321426391602, + "learning_rate": 2.255696115665763e-06, + "loss": 0.2934, + "step": 34687 + }, + { + "epoch": 0.6944023221479869, + "grad_norm": 1.150238037109375, + "learning_rate": 2.255425132394029e-06, + "loss": 0.3224, + "step": 34688 + }, + { + "epoch": 0.6944223406651152, + "grad_norm": 1.0723916292190552, + "learning_rate": 2.2551541606598986e-06, + "loss": 0.265, + "step": 34689 + }, + { + "epoch": 0.6944423591822436, + "grad_norm": 1.1416727304458618, + "learning_rate": 2.2548832004645076e-06, + "loss": 0.3222, + "step": 34690 + }, + { + "epoch": 0.6944623776993719, + "grad_norm": 1.0966905355453491, + "learning_rate": 2.254612251808999e-06, + "loss": 0.3136, + "step": 34691 + }, + { + "epoch": 0.6944823962165002, + "grad_norm": 1.1707932949066162, + "learning_rate": 2.254341314694508e-06, + "loss": 0.2849, + "step": 34692 + }, + { + "epoch": 0.6945024147336286, + "grad_norm": 2.191052198410034, + "learning_rate": 2.2540703891221767e-06, + "loss": 0.7636, + "step": 34693 + }, + { + "epoch": 0.6945224332507569, + "grad_norm": 1.0961809158325195, + "learning_rate": 2.2537994750931425e-06, + "loss": 0.3116, + "step": 34694 + }, + { + "epoch": 0.6945424517678853, + "grad_norm": 1.0699920654296875, + "learning_rate": 2.253528572608545e-06, + "loss": 0.279, + "step": 34695 + }, + { + "epoch": 0.6945624702850136, + "grad_norm": 1.0541203022003174, + "learning_rate": 2.2532576816695217e-06, + "loss": 0.2887, + "step": 34696 + }, + { + "epoch": 0.694582488802142, + "grad_norm": 1.8977513313293457, + "learning_rate": 2.25298680227721e-06, + "loss": 0.7467, + "step": 34697 + }, + { + "epoch": 0.6946025073192703, + "grad_norm": 1.227516531944275, + "learning_rate": 2.2527159344327526e-06, + "loss": 0.3144, + "step": 34698 + }, + { + "epoch": 0.6946225258363987, + "grad_norm": 1.0755281448364258, + "learning_rate": 2.2524450781372863e-06, + "loss": 0.3095, + "step": 34699 + }, + { + "epoch": 0.694642544353527, + "grad_norm": 1.0348544120788574, + "learning_rate": 2.25217423339195e-06, + "loss": 0.2944, + "step": 34700 + }, + { + "epoch": 0.6946625628706553, + "grad_norm": 0.9635465145111084, + "learning_rate": 2.2519034001978796e-06, + "loss": 0.3088, + "step": 34701 + }, + { + "epoch": 0.6946825813877837, + "grad_norm": 1.2562322616577148, + "learning_rate": 2.2516325785562177e-06, + "loss": 0.3047, + "step": 34702 + }, + { + "epoch": 0.694702599904912, + "grad_norm": 1.100134253501892, + "learning_rate": 2.2513617684680996e-06, + "loss": 0.2912, + "step": 34703 + }, + { + "epoch": 0.6947226184220404, + "grad_norm": 1.0963635444641113, + "learning_rate": 2.2510909699346666e-06, + "loss": 0.3366, + "step": 34704 + }, + { + "epoch": 0.6947426369391687, + "grad_norm": 1.756637692451477, + "learning_rate": 2.2508201829570535e-06, + "loss": 0.7736, + "step": 34705 + }, + { + "epoch": 0.6947626554562971, + "grad_norm": 1.0979615449905396, + "learning_rate": 2.250549407536403e-06, + "loss": 0.2837, + "step": 34706 + }, + { + "epoch": 0.6947826739734254, + "grad_norm": 1.2573060989379883, + "learning_rate": 2.250278643673851e-06, + "loss": 0.2759, + "step": 34707 + }, + { + "epoch": 0.6948026924905537, + "grad_norm": 1.4284480810165405, + "learning_rate": 2.250007891370536e-06, + "loss": 0.3294, + "step": 34708 + }, + { + "epoch": 0.6948227110076821, + "grad_norm": 1.2668412923812866, + "learning_rate": 2.2497371506275956e-06, + "loss": 0.3072, + "step": 34709 + }, + { + "epoch": 0.6948427295248104, + "grad_norm": 1.1328558921813965, + "learning_rate": 2.2494664214461664e-06, + "loss": 0.2783, + "step": 34710 + }, + { + "epoch": 0.6948627480419388, + "grad_norm": 1.167194128036499, + "learning_rate": 2.249195703827391e-06, + "loss": 0.3018, + "step": 34711 + }, + { + "epoch": 0.6948827665590671, + "grad_norm": 1.1072595119476318, + "learning_rate": 2.2489249977724044e-06, + "loss": 0.2634, + "step": 34712 + }, + { + "epoch": 0.6949027850761955, + "grad_norm": 1.064866304397583, + "learning_rate": 2.248654303282345e-06, + "loss": 0.3041, + "step": 34713 + }, + { + "epoch": 0.6949228035933238, + "grad_norm": 1.194239854812622, + "learning_rate": 2.2483836203583494e-06, + "loss": 0.2851, + "step": 34714 + }, + { + "epoch": 0.6949428221104522, + "grad_norm": 1.1586053371429443, + "learning_rate": 2.2481129490015585e-06, + "loss": 0.3336, + "step": 34715 + }, + { + "epoch": 0.6949628406275805, + "grad_norm": 1.0988036394119263, + "learning_rate": 2.247842289213106e-06, + "loss": 0.2866, + "step": 34716 + }, + { + "epoch": 0.6949828591447088, + "grad_norm": 1.047075867652893, + "learning_rate": 2.247571640994135e-06, + "loss": 0.3263, + "step": 34717 + }, + { + "epoch": 0.6950028776618372, + "grad_norm": 1.8857144117355347, + "learning_rate": 2.2473010043457803e-06, + "loss": 0.7437, + "step": 34718 + }, + { + "epoch": 0.6950228961789655, + "grad_norm": 1.305202603340149, + "learning_rate": 2.247030379269178e-06, + "loss": 0.3334, + "step": 34719 + }, + { + "epoch": 0.6950429146960939, + "grad_norm": 1.0838977098464966, + "learning_rate": 2.2467597657654695e-06, + "loss": 0.3157, + "step": 34720 + }, + { + "epoch": 0.6950629332132222, + "grad_norm": 1.0235894918441772, + "learning_rate": 2.24648916383579e-06, + "loss": 0.2941, + "step": 34721 + }, + { + "epoch": 0.6950829517303506, + "grad_norm": 1.9711782932281494, + "learning_rate": 2.246218573481278e-06, + "loss": 0.766, + "step": 34722 + }, + { + "epoch": 0.6951029702474789, + "grad_norm": 1.1197627782821655, + "learning_rate": 2.245947994703068e-06, + "loss": 0.2861, + "step": 34723 + }, + { + "epoch": 0.6951229887646072, + "grad_norm": 1.2262269258499146, + "learning_rate": 2.2456774275023023e-06, + "loss": 0.2665, + "step": 34724 + }, + { + "epoch": 0.6951430072817356, + "grad_norm": 1.1969980001449585, + "learning_rate": 2.2454068718801157e-06, + "loss": 0.3134, + "step": 34725 + }, + { + "epoch": 0.6951630257988639, + "grad_norm": 1.2017900943756104, + "learning_rate": 2.2451363278376466e-06, + "loss": 0.3252, + "step": 34726 + }, + { + "epoch": 0.6951830443159923, + "grad_norm": 1.1591758728027344, + "learning_rate": 2.244865795376029e-06, + "loss": 0.3226, + "step": 34727 + }, + { + "epoch": 0.6952030628331206, + "grad_norm": 1.1406457424163818, + "learning_rate": 2.2445952744964046e-06, + "loss": 0.2819, + "step": 34728 + }, + { + "epoch": 0.695223081350249, + "grad_norm": 1.1239454746246338, + "learning_rate": 2.2443247651999068e-06, + "loss": 0.2711, + "step": 34729 + }, + { + "epoch": 0.6952430998673773, + "grad_norm": 1.0894865989685059, + "learning_rate": 2.2440542674876763e-06, + "loss": 0.331, + "step": 34730 + }, + { + "epoch": 0.6952631183845057, + "grad_norm": 1.2551641464233398, + "learning_rate": 2.2437837813608487e-06, + "loss": 0.2957, + "step": 34731 + }, + { + "epoch": 0.695283136901634, + "grad_norm": 1.046729564666748, + "learning_rate": 2.243513306820559e-06, + "loss": 0.2805, + "step": 34732 + }, + { + "epoch": 0.6953031554187623, + "grad_norm": 1.1610902547836304, + "learning_rate": 2.2432428438679484e-06, + "loss": 0.3287, + "step": 34733 + }, + { + "epoch": 0.6953231739358907, + "grad_norm": 1.1070451736450195, + "learning_rate": 2.2429723925041512e-06, + "loss": 0.2983, + "step": 34734 + }, + { + "epoch": 0.695343192453019, + "grad_norm": 1.0055009126663208, + "learning_rate": 2.242701952730305e-06, + "loss": 0.3177, + "step": 34735 + }, + { + "epoch": 0.6953632109701474, + "grad_norm": 1.2793152332305908, + "learning_rate": 2.242431524547545e-06, + "loss": 0.322, + "step": 34736 + }, + { + "epoch": 0.6953832294872757, + "grad_norm": 1.0507771968841553, + "learning_rate": 2.2421611079570104e-06, + "loss": 0.2792, + "step": 34737 + }, + { + "epoch": 0.6954032480044041, + "grad_norm": 0.9844175577163696, + "learning_rate": 2.2418907029598374e-06, + "loss": 0.2602, + "step": 34738 + }, + { + "epoch": 0.6954232665215324, + "grad_norm": 1.160267949104309, + "learning_rate": 2.2416203095571627e-06, + "loss": 0.2943, + "step": 34739 + }, + { + "epoch": 0.6954432850386607, + "grad_norm": 1.0732617378234863, + "learning_rate": 2.24134992775012e-06, + "loss": 0.2959, + "step": 34740 + }, + { + "epoch": 0.6954633035557891, + "grad_norm": 1.1258635520935059, + "learning_rate": 2.241079557539851e-06, + "loss": 0.3028, + "step": 34741 + }, + { + "epoch": 0.6954833220729174, + "grad_norm": 1.1971582174301147, + "learning_rate": 2.240809198927488e-06, + "loss": 0.3158, + "step": 34742 + }, + { + "epoch": 0.6955033405900458, + "grad_norm": 1.1755849123001099, + "learning_rate": 2.240538851914171e-06, + "loss": 0.3086, + "step": 34743 + }, + { + "epoch": 0.6955233591071741, + "grad_norm": 1.1992474794387817, + "learning_rate": 2.240268516501034e-06, + "loss": 0.2877, + "step": 34744 + }, + { + "epoch": 0.6955433776243025, + "grad_norm": 1.1900804042816162, + "learning_rate": 2.2399981926892133e-06, + "loss": 0.3198, + "step": 34745 + }, + { + "epoch": 0.6955633961414308, + "grad_norm": 1.1105719804763794, + "learning_rate": 2.2397278804798482e-06, + "loss": 0.2626, + "step": 34746 + }, + { + "epoch": 0.6955834146585592, + "grad_norm": 1.3087399005889893, + "learning_rate": 2.2394575798740726e-06, + "loss": 0.3356, + "step": 34747 + }, + { + "epoch": 0.6956034331756875, + "grad_norm": 1.0232131481170654, + "learning_rate": 2.239187290873024e-06, + "loss": 0.2704, + "step": 34748 + }, + { + "epoch": 0.6956234516928158, + "grad_norm": 1.0618666410446167, + "learning_rate": 2.238917013477837e-06, + "loss": 0.2694, + "step": 34749 + }, + { + "epoch": 0.6956434702099442, + "grad_norm": 1.1149128675460815, + "learning_rate": 2.2386467476896473e-06, + "loss": 0.2553, + "step": 34750 + }, + { + "epoch": 0.6956634887270725, + "grad_norm": 1.1231451034545898, + "learning_rate": 2.2383764935095943e-06, + "loss": 0.311, + "step": 34751 + }, + { + "epoch": 0.6956835072442009, + "grad_norm": 1.0714601278305054, + "learning_rate": 2.2381062509388117e-06, + "loss": 0.3085, + "step": 34752 + }, + { + "epoch": 0.6957035257613292, + "grad_norm": 1.0636714696884155, + "learning_rate": 2.2378360199784347e-06, + "loss": 0.2816, + "step": 34753 + }, + { + "epoch": 0.6957235442784576, + "grad_norm": 1.0883464813232422, + "learning_rate": 2.2375658006296026e-06, + "loss": 0.3058, + "step": 34754 + }, + { + "epoch": 0.6957435627955859, + "grad_norm": 1.095543384552002, + "learning_rate": 2.2372955928934477e-06, + "loss": 0.2314, + "step": 34755 + }, + { + "epoch": 0.6957635813127142, + "grad_norm": 1.1584845781326294, + "learning_rate": 2.2370253967711085e-06, + "loss": 0.3486, + "step": 34756 + }, + { + "epoch": 0.6957835998298426, + "grad_norm": 1.1523572206497192, + "learning_rate": 2.236755212263721e-06, + "loss": 0.3205, + "step": 34757 + }, + { + "epoch": 0.6958036183469709, + "grad_norm": 1.0038760900497437, + "learning_rate": 2.236485039372419e-06, + "loss": 0.264, + "step": 34758 + }, + { + "epoch": 0.6958236368640993, + "grad_norm": 1.1084842681884766, + "learning_rate": 2.236214878098338e-06, + "loss": 0.2812, + "step": 34759 + }, + { + "epoch": 0.6958436553812276, + "grad_norm": 1.176809549331665, + "learning_rate": 2.2359447284426167e-06, + "loss": 0.355, + "step": 34760 + }, + { + "epoch": 0.695863673898356, + "grad_norm": 1.1319725513458252, + "learning_rate": 2.2356745904063886e-06, + "loss": 0.25, + "step": 34761 + }, + { + "epoch": 0.6958836924154843, + "grad_norm": 1.2222360372543335, + "learning_rate": 2.2354044639907897e-06, + "loss": 0.3369, + "step": 34762 + }, + { + "epoch": 0.6959037109326127, + "grad_norm": 1.2887895107269287, + "learning_rate": 2.235134349196954e-06, + "loss": 0.3241, + "step": 34763 + }, + { + "epoch": 0.695923729449741, + "grad_norm": 1.035089135169983, + "learning_rate": 2.23486424602602e-06, + "loss": 0.3036, + "step": 34764 + }, + { + "epoch": 0.6959437479668693, + "grad_norm": 1.017784595489502, + "learning_rate": 2.2345941544791216e-06, + "loss": 0.3327, + "step": 34765 + }, + { + "epoch": 0.6959637664839977, + "grad_norm": 1.1031750440597534, + "learning_rate": 2.2343240745573924e-06, + "loss": 0.2802, + "step": 34766 + }, + { + "epoch": 0.695983785001126, + "grad_norm": 1.1056700944900513, + "learning_rate": 2.2340540062619714e-06, + "loss": 0.3392, + "step": 34767 + }, + { + "epoch": 0.6960038035182544, + "grad_norm": 1.0145868062973022, + "learning_rate": 2.2337839495939905e-06, + "loss": 0.2765, + "step": 34768 + }, + { + "epoch": 0.6960238220353827, + "grad_norm": 1.2339516878128052, + "learning_rate": 2.2335139045545884e-06, + "loss": 0.2575, + "step": 34769 + }, + { + "epoch": 0.6960438405525111, + "grad_norm": 1.225111961364746, + "learning_rate": 2.233243871144898e-06, + "loss": 0.3226, + "step": 34770 + }, + { + "epoch": 0.6960638590696394, + "grad_norm": 1.036991834640503, + "learning_rate": 2.232973849366056e-06, + "loss": 0.237, + "step": 34771 + }, + { + "epoch": 0.6960838775867677, + "grad_norm": 1.1075983047485352, + "learning_rate": 2.232703839219194e-06, + "loss": 0.3019, + "step": 34772 + }, + { + "epoch": 0.6961038961038961, + "grad_norm": 1.203598976135254, + "learning_rate": 2.2324338407054513e-06, + "loss": 0.2894, + "step": 34773 + }, + { + "epoch": 0.6961239146210244, + "grad_norm": 1.125619888305664, + "learning_rate": 2.2321638538259614e-06, + "loss": 0.2781, + "step": 34774 + }, + { + "epoch": 0.6961439331381528, + "grad_norm": 1.2173811197280884, + "learning_rate": 2.231893878581859e-06, + "loss": 0.3405, + "step": 34775 + }, + { + "epoch": 0.6961639516552811, + "grad_norm": 1.1516250371932983, + "learning_rate": 2.231623914974277e-06, + "loss": 0.3401, + "step": 34776 + }, + { + "epoch": 0.6961839701724095, + "grad_norm": 2.207479953765869, + "learning_rate": 2.231353963004354e-06, + "loss": 0.291, + "step": 34777 + }, + { + "epoch": 0.6962039886895378, + "grad_norm": 1.0674090385437012, + "learning_rate": 2.2310840226732234e-06, + "loss": 0.2937, + "step": 34778 + }, + { + "epoch": 0.6962240072066662, + "grad_norm": 1.1931551694869995, + "learning_rate": 2.230814093982018e-06, + "loss": 0.2871, + "step": 34779 + }, + { + "epoch": 0.6962440257237945, + "grad_norm": 0.9936679005622864, + "learning_rate": 2.2305441769318754e-06, + "loss": 0.2577, + "step": 34780 + }, + { + "epoch": 0.6962640442409228, + "grad_norm": 1.0770334005355835, + "learning_rate": 2.230274271523928e-06, + "loss": 0.2895, + "step": 34781 + }, + { + "epoch": 0.6962840627580512, + "grad_norm": 1.1136146783828735, + "learning_rate": 2.230004377759313e-06, + "loss": 0.3485, + "step": 34782 + }, + { + "epoch": 0.6963040812751795, + "grad_norm": 1.0560150146484375, + "learning_rate": 2.229734495639163e-06, + "loss": 0.2856, + "step": 34783 + }, + { + "epoch": 0.6963240997923079, + "grad_norm": 1.0550198554992676, + "learning_rate": 2.229464625164614e-06, + "loss": 0.2664, + "step": 34784 + }, + { + "epoch": 0.6963441183094362, + "grad_norm": 1.1841505765914917, + "learning_rate": 2.229194766336797e-06, + "loss": 0.2739, + "step": 34785 + }, + { + "epoch": 0.6963641368265646, + "grad_norm": 1.2463854551315308, + "learning_rate": 2.228924919156851e-06, + "loss": 0.3451, + "step": 34786 + }, + { + "epoch": 0.6963841553436929, + "grad_norm": 1.1882721185684204, + "learning_rate": 2.228655083625908e-06, + "loss": 0.2837, + "step": 34787 + }, + { + "epoch": 0.6964041738608212, + "grad_norm": 1.0657765865325928, + "learning_rate": 2.228385259745103e-06, + "loss": 0.3467, + "step": 34788 + }, + { + "epoch": 0.6964241923779496, + "grad_norm": 1.8027215003967285, + "learning_rate": 2.2281154475155696e-06, + "loss": 0.7853, + "step": 34789 + }, + { + "epoch": 0.6964442108950779, + "grad_norm": 1.0977225303649902, + "learning_rate": 2.227845646938441e-06, + "loss": 0.2886, + "step": 34790 + }, + { + "epoch": 0.6964642294122063, + "grad_norm": 1.1945334672927856, + "learning_rate": 2.2275758580148544e-06, + "loss": 0.3011, + "step": 34791 + }, + { + "epoch": 0.6964842479293346, + "grad_norm": 1.158811092376709, + "learning_rate": 2.2273060807459403e-06, + "loss": 0.2905, + "step": 34792 + }, + { + "epoch": 0.696504266446463, + "grad_norm": 1.7956970930099487, + "learning_rate": 2.227036315132836e-06, + "loss": 0.8037, + "step": 34793 + }, + { + "epoch": 0.6965242849635913, + "grad_norm": 1.7918673753738403, + "learning_rate": 2.226766561176673e-06, + "loss": 0.7287, + "step": 34794 + }, + { + "epoch": 0.6965443034807197, + "grad_norm": 1.0606213808059692, + "learning_rate": 2.2264968188785886e-06, + "loss": 0.3042, + "step": 34795 + }, + { + "epoch": 0.696564321997848, + "grad_norm": 1.069598913192749, + "learning_rate": 2.226227088239714e-06, + "loss": 0.2972, + "step": 34796 + }, + { + "epoch": 0.6965843405149763, + "grad_norm": 1.124819040298462, + "learning_rate": 2.2259573692611845e-06, + "loss": 0.2895, + "step": 34797 + }, + { + "epoch": 0.6966043590321047, + "grad_norm": 1.1138529777526855, + "learning_rate": 2.2256876619441315e-06, + "loss": 0.2755, + "step": 34798 + }, + { + "epoch": 0.696624377549233, + "grad_norm": 1.1736458539962769, + "learning_rate": 2.2254179662896917e-06, + "loss": 0.262, + "step": 34799 + }, + { + "epoch": 0.6966443960663614, + "grad_norm": 1.7469249963760376, + "learning_rate": 2.2251482822989985e-06, + "loss": 0.71, + "step": 34800 + }, + { + "epoch": 0.6966644145834897, + "grad_norm": 1.0364049673080444, + "learning_rate": 2.224878609973184e-06, + "loss": 0.2994, + "step": 34801 + }, + { + "epoch": 0.6966844331006181, + "grad_norm": 1.295374870300293, + "learning_rate": 2.224608949313383e-06, + "loss": 0.3032, + "step": 34802 + }, + { + "epoch": 0.6967044516177464, + "grad_norm": 1.0982431173324585, + "learning_rate": 2.2243393003207268e-06, + "loss": 0.2819, + "step": 34803 + }, + { + "epoch": 0.6967244701348747, + "grad_norm": 1.1038044691085815, + "learning_rate": 2.2240696629963524e-06, + "loss": 0.3081, + "step": 34804 + }, + { + "epoch": 0.6967444886520031, + "grad_norm": 1.0537993907928467, + "learning_rate": 2.2238000373413905e-06, + "loss": 0.3053, + "step": 34805 + }, + { + "epoch": 0.6967645071691314, + "grad_norm": 1.1714115142822266, + "learning_rate": 2.2235304233569773e-06, + "loss": 0.3413, + "step": 34806 + }, + { + "epoch": 0.6967845256862598, + "grad_norm": 1.0242974758148193, + "learning_rate": 2.223260821044243e-06, + "loss": 0.29, + "step": 34807 + }, + { + "epoch": 0.6968045442033881, + "grad_norm": 1.891971468925476, + "learning_rate": 2.2229912304043243e-06, + "loss": 0.7555, + "step": 34808 + }, + { + "epoch": 0.6968245627205165, + "grad_norm": 1.064461350440979, + "learning_rate": 2.2227216514383525e-06, + "loss": 0.2659, + "step": 34809 + }, + { + "epoch": 0.6968445812376448, + "grad_norm": 1.0261708498001099, + "learning_rate": 2.222452084147462e-06, + "loss": 0.2414, + "step": 34810 + }, + { + "epoch": 0.6968645997547732, + "grad_norm": 1.10948646068573, + "learning_rate": 2.222182528532785e-06, + "loss": 0.3444, + "step": 34811 + }, + { + "epoch": 0.6968846182719015, + "grad_norm": 1.1488542556762695, + "learning_rate": 2.2219129845954522e-06, + "loss": 0.2859, + "step": 34812 + }, + { + "epoch": 0.6969046367890298, + "grad_norm": 1.9193824529647827, + "learning_rate": 2.221643452336602e-06, + "loss": 0.7931, + "step": 34813 + }, + { + "epoch": 0.6969246553061582, + "grad_norm": 1.0620355606079102, + "learning_rate": 2.221373931757365e-06, + "loss": 0.3171, + "step": 34814 + }, + { + "epoch": 0.6969446738232865, + "grad_norm": 1.3659074306488037, + "learning_rate": 2.2211044228588736e-06, + "loss": 0.2927, + "step": 34815 + }, + { + "epoch": 0.6969646923404149, + "grad_norm": 1.1260871887207031, + "learning_rate": 2.2208349256422597e-06, + "loss": 0.2999, + "step": 34816 + }, + { + "epoch": 0.6969847108575432, + "grad_norm": 1.143469214439392, + "learning_rate": 2.220565440108659e-06, + "loss": 0.3113, + "step": 34817 + }, + { + "epoch": 0.6970047293746716, + "grad_norm": 1.0764132738113403, + "learning_rate": 2.2202959662592015e-06, + "loss": 0.305, + "step": 34818 + }, + { + "epoch": 0.6970247478917999, + "grad_norm": 1.0378012657165527, + "learning_rate": 2.220026504095023e-06, + "loss": 0.2962, + "step": 34819 + }, + { + "epoch": 0.6970447664089282, + "grad_norm": 1.0843034982681274, + "learning_rate": 2.2197570536172535e-06, + "loss": 0.3132, + "step": 34820 + }, + { + "epoch": 0.6970647849260566, + "grad_norm": 1.0853838920593262, + "learning_rate": 2.219487614827029e-06, + "loss": 0.2852, + "step": 34821 + }, + { + "epoch": 0.6970848034431849, + "grad_norm": 1.3103718757629395, + "learning_rate": 2.2192181877254798e-06, + "loss": 0.3022, + "step": 34822 + }, + { + "epoch": 0.6971048219603133, + "grad_norm": 1.134921908378601, + "learning_rate": 2.218948772313739e-06, + "loss": 0.3185, + "step": 34823 + }, + { + "epoch": 0.6971248404774416, + "grad_norm": 1.27567720413208, + "learning_rate": 2.2186793685929393e-06, + "loss": 0.351, + "step": 34824 + }, + { + "epoch": 0.69714485899457, + "grad_norm": 1.1598244905471802, + "learning_rate": 2.2184099765642108e-06, + "loss": 0.264, + "step": 34825 + }, + { + "epoch": 0.6971648775116983, + "grad_norm": 1.0039128065109253, + "learning_rate": 2.21814059622869e-06, + "loss": 0.2844, + "step": 34826 + }, + { + "epoch": 0.6971848960288267, + "grad_norm": 1.0346651077270508, + "learning_rate": 2.2178712275875074e-06, + "loss": 0.3164, + "step": 34827 + }, + { + "epoch": 0.697204914545955, + "grad_norm": 1.067507028579712, + "learning_rate": 2.2176018706417956e-06, + "loss": 0.3188, + "step": 34828 + }, + { + "epoch": 0.6972249330630833, + "grad_norm": 1.0832103490829468, + "learning_rate": 2.2173325253926854e-06, + "loss": 0.2578, + "step": 34829 + }, + { + "epoch": 0.6972449515802117, + "grad_norm": 1.1224490404129028, + "learning_rate": 2.2170631918413116e-06, + "loss": 0.2753, + "step": 34830 + }, + { + "epoch": 0.69726497009734, + "grad_norm": 1.0990374088287354, + "learning_rate": 2.2167938699888036e-06, + "loss": 0.3113, + "step": 34831 + }, + { + "epoch": 0.6972849886144684, + "grad_norm": 1.1230989694595337, + "learning_rate": 2.2165245598362974e-06, + "loss": 0.2939, + "step": 34832 + }, + { + "epoch": 0.6973050071315967, + "grad_norm": 1.0841691493988037, + "learning_rate": 2.2162552613849224e-06, + "loss": 0.2941, + "step": 34833 + }, + { + "epoch": 0.6973250256487251, + "grad_norm": 1.0371185541152954, + "learning_rate": 2.2159859746358094e-06, + "loss": 0.2724, + "step": 34834 + }, + { + "epoch": 0.6973450441658534, + "grad_norm": 1.0677107572555542, + "learning_rate": 2.2157166995900943e-06, + "loss": 0.2869, + "step": 34835 + }, + { + "epoch": 0.6973650626829817, + "grad_norm": 1.178756833076477, + "learning_rate": 2.215447436248907e-06, + "loss": 0.3243, + "step": 34836 + }, + { + "epoch": 0.6973850812001101, + "grad_norm": 1.052448034286499, + "learning_rate": 2.21517818461338e-06, + "loss": 0.2763, + "step": 34837 + }, + { + "epoch": 0.6974050997172384, + "grad_norm": 1.8885709047317505, + "learning_rate": 2.2149089446846415e-06, + "loss": 0.7752, + "step": 34838 + }, + { + "epoch": 0.6974251182343668, + "grad_norm": 1.0894594192504883, + "learning_rate": 2.214639716463829e-06, + "loss": 0.2705, + "step": 34839 + }, + { + "epoch": 0.6974451367514951, + "grad_norm": 1.173951506614685, + "learning_rate": 2.2143704999520715e-06, + "loss": 0.2739, + "step": 34840 + }, + { + "epoch": 0.6974651552686235, + "grad_norm": 1.093596339225769, + "learning_rate": 2.2141012951505015e-06, + "loss": 0.288, + "step": 34841 + }, + { + "epoch": 0.6974851737857518, + "grad_norm": 1.0972899198532104, + "learning_rate": 2.213832102060248e-06, + "loss": 0.2962, + "step": 34842 + }, + { + "epoch": 0.6975051923028802, + "grad_norm": 1.137644648551941, + "learning_rate": 2.2135629206824464e-06, + "loss": 0.3569, + "step": 34843 + }, + { + "epoch": 0.6975252108200085, + "grad_norm": 1.3691742420196533, + "learning_rate": 2.213293751018225e-06, + "loss": 0.3001, + "step": 34844 + }, + { + "epoch": 0.6975452293371368, + "grad_norm": 1.0882792472839355, + "learning_rate": 2.2130245930687185e-06, + "loss": 0.2699, + "step": 34845 + }, + { + "epoch": 0.6975652478542652, + "grad_norm": 1.138534665107727, + "learning_rate": 2.212755446835057e-06, + "loss": 0.3044, + "step": 34846 + }, + { + "epoch": 0.6975852663713935, + "grad_norm": 1.0948206186294556, + "learning_rate": 2.21248631231837e-06, + "loss": 0.2792, + "step": 34847 + }, + { + "epoch": 0.6976052848885219, + "grad_norm": 1.0805702209472656, + "learning_rate": 2.2122171895197925e-06, + "loss": 0.2895, + "step": 34848 + }, + { + "epoch": 0.6976253034056502, + "grad_norm": 1.0382187366485596, + "learning_rate": 2.211948078440454e-06, + "loss": 0.28, + "step": 34849 + }, + { + "epoch": 0.6976453219227786, + "grad_norm": 1.8477219343185425, + "learning_rate": 2.211678979081486e-06, + "loss": 0.7704, + "step": 34850 + }, + { + "epoch": 0.6976653404399069, + "grad_norm": 1.11897873878479, + "learning_rate": 2.2114098914440173e-06, + "loss": 0.2857, + "step": 34851 + }, + { + "epoch": 0.6976853589570352, + "grad_norm": 1.1411548852920532, + "learning_rate": 2.2111408155291832e-06, + "loss": 0.275, + "step": 34852 + }, + { + "epoch": 0.6977053774741636, + "grad_norm": 1.1264935731887817, + "learning_rate": 2.2108717513381135e-06, + "loss": 0.342, + "step": 34853 + }, + { + "epoch": 0.6977253959912919, + "grad_norm": 1.9002673625946045, + "learning_rate": 2.210602698871938e-06, + "loss": 0.7758, + "step": 34854 + }, + { + "epoch": 0.6977454145084203, + "grad_norm": 1.260343313217163, + "learning_rate": 2.210333658131787e-06, + "loss": 0.2816, + "step": 34855 + }, + { + "epoch": 0.6977654330255486, + "grad_norm": 1.1507669687271118, + "learning_rate": 2.2100646291187944e-06, + "loss": 0.2854, + "step": 34856 + }, + { + "epoch": 0.697785451542677, + "grad_norm": 1.2297297716140747, + "learning_rate": 2.2097956118340884e-06, + "loss": 0.3033, + "step": 34857 + }, + { + "epoch": 0.6978054700598053, + "grad_norm": 1.1864792108535767, + "learning_rate": 2.209526606278803e-06, + "loss": 0.3047, + "step": 34858 + }, + { + "epoch": 0.6978254885769337, + "grad_norm": 1.8921624422073364, + "learning_rate": 2.2092576124540666e-06, + "loss": 0.7775, + "step": 34859 + }, + { + "epoch": 0.697845507094062, + "grad_norm": 1.040068507194519, + "learning_rate": 2.2089886303610092e-06, + "loss": 0.3199, + "step": 34860 + }, + { + "epoch": 0.6978655256111903, + "grad_norm": 1.0611696243286133, + "learning_rate": 2.2087196600007647e-06, + "loss": 0.2893, + "step": 34861 + }, + { + "epoch": 0.6978855441283187, + "grad_norm": 1.1495723724365234, + "learning_rate": 2.208450701374462e-06, + "loss": 0.2939, + "step": 34862 + }, + { + "epoch": 0.697905562645447, + "grad_norm": 1.3099703788757324, + "learning_rate": 2.2081817544832323e-06, + "loss": 0.2858, + "step": 34863 + }, + { + "epoch": 0.6979255811625754, + "grad_norm": 1.1334280967712402, + "learning_rate": 2.207912819328205e-06, + "loss": 0.3331, + "step": 34864 + }, + { + "epoch": 0.6979455996797037, + "grad_norm": 1.0193450450897217, + "learning_rate": 2.20764389591051e-06, + "loss": 0.288, + "step": 34865 + }, + { + "epoch": 0.6979656181968321, + "grad_norm": 1.1931703090667725, + "learning_rate": 2.2073749842312808e-06, + "loss": 0.2862, + "step": 34866 + }, + { + "epoch": 0.6979856367139604, + "grad_norm": 1.1376317739486694, + "learning_rate": 2.207106084291646e-06, + "loss": 0.3015, + "step": 34867 + }, + { + "epoch": 0.6980056552310887, + "grad_norm": 1.2125914096832275, + "learning_rate": 2.206837196092735e-06, + "loss": 0.2954, + "step": 34868 + }, + { + "epoch": 0.6980256737482171, + "grad_norm": 1.1431620121002197, + "learning_rate": 2.2065683196356806e-06, + "loss": 0.3276, + "step": 34869 + }, + { + "epoch": 0.6980456922653454, + "grad_norm": 1.191872477531433, + "learning_rate": 2.2062994549216103e-06, + "loss": 0.3316, + "step": 34870 + }, + { + "epoch": 0.6980657107824738, + "grad_norm": 1.0081886053085327, + "learning_rate": 2.2060306019516577e-06, + "loss": 0.3043, + "step": 34871 + }, + { + "epoch": 0.6980857292996021, + "grad_norm": 1.004793405532837, + "learning_rate": 2.205761760726951e-06, + "loss": 0.2675, + "step": 34872 + }, + { + "epoch": 0.6981057478167305, + "grad_norm": 0.9848254323005676, + "learning_rate": 2.2054929312486205e-06, + "loss": 0.26, + "step": 34873 + }, + { + "epoch": 0.6981257663338588, + "grad_norm": 1.1048314571380615, + "learning_rate": 2.205224113517795e-06, + "loss": 0.324, + "step": 34874 + }, + { + "epoch": 0.6981457848509872, + "grad_norm": 1.1079999208450317, + "learning_rate": 2.2049553075356067e-06, + "loss": 0.3334, + "step": 34875 + }, + { + "epoch": 0.6981658033681155, + "grad_norm": 1.0356780290603638, + "learning_rate": 2.2046865133031856e-06, + "loss": 0.3273, + "step": 34876 + }, + { + "epoch": 0.6981858218852438, + "grad_norm": 1.3327847719192505, + "learning_rate": 2.2044177308216607e-06, + "loss": 0.3179, + "step": 34877 + }, + { + "epoch": 0.6982058404023722, + "grad_norm": 1.158022403717041, + "learning_rate": 2.20414896009216e-06, + "loss": 0.3122, + "step": 34878 + }, + { + "epoch": 0.6982258589195005, + "grad_norm": 1.0711255073547363, + "learning_rate": 2.203880201115817e-06, + "loss": 0.3111, + "step": 34879 + }, + { + "epoch": 0.6982458774366289, + "grad_norm": 1.223031759262085, + "learning_rate": 2.2036114538937607e-06, + "loss": 0.3074, + "step": 34880 + }, + { + "epoch": 0.6982658959537572, + "grad_norm": 1.1934733390808105, + "learning_rate": 2.203342718427118e-06, + "loss": 0.3574, + "step": 34881 + }, + { + "epoch": 0.6982859144708856, + "grad_norm": 1.1411888599395752, + "learning_rate": 2.2030739947170223e-06, + "loss": 0.2675, + "step": 34882 + }, + { + "epoch": 0.6983059329880139, + "grad_norm": 1.0782043933868408, + "learning_rate": 2.2028052827645995e-06, + "loss": 0.287, + "step": 34883 + }, + { + "epoch": 0.6983259515051422, + "grad_norm": 1.0412577390670776, + "learning_rate": 2.2025365825709832e-06, + "loss": 0.317, + "step": 34884 + }, + { + "epoch": 0.6983459700222706, + "grad_norm": 1.2086553573608398, + "learning_rate": 2.2022678941373018e-06, + "loss": 0.2701, + "step": 34885 + }, + { + "epoch": 0.6983659885393989, + "grad_norm": 1.153120517730713, + "learning_rate": 2.201999217464683e-06, + "loss": 0.3456, + "step": 34886 + }, + { + "epoch": 0.6983860070565273, + "grad_norm": 1.3168097734451294, + "learning_rate": 2.2017305525542563e-06, + "loss": 0.3171, + "step": 34887 + }, + { + "epoch": 0.6984060255736556, + "grad_norm": 1.9643480777740479, + "learning_rate": 2.201461899407154e-06, + "loss": 0.738, + "step": 34888 + }, + { + "epoch": 0.698426044090784, + "grad_norm": 1.650467038154602, + "learning_rate": 2.201193258024503e-06, + "loss": 0.7839, + "step": 34889 + }, + { + "epoch": 0.6984460626079123, + "grad_norm": 1.1217129230499268, + "learning_rate": 2.2009246284074342e-06, + "loss": 0.292, + "step": 34890 + }, + { + "epoch": 0.6984660811250407, + "grad_norm": 1.1367448568344116, + "learning_rate": 2.2006560105570735e-06, + "loss": 0.3068, + "step": 34891 + }, + { + "epoch": 0.698486099642169, + "grad_norm": 1.79622483253479, + "learning_rate": 2.200387404474554e-06, + "loss": 0.7931, + "step": 34892 + }, + { + "epoch": 0.6985061181592973, + "grad_norm": 1.1809347867965698, + "learning_rate": 2.200118810161004e-06, + "loss": 0.312, + "step": 34893 + }, + { + "epoch": 0.6985261366764257, + "grad_norm": 1.2662798166275024, + "learning_rate": 2.19985022761755e-06, + "loss": 0.2825, + "step": 34894 + }, + { + "epoch": 0.698546155193554, + "grad_norm": 1.0362290143966675, + "learning_rate": 2.199581656845325e-06, + "loss": 0.2749, + "step": 34895 + }, + { + "epoch": 0.6985661737106824, + "grad_norm": 1.1839395761489868, + "learning_rate": 2.199313097845454e-06, + "loss": 0.2981, + "step": 34896 + }, + { + "epoch": 0.6985861922278107, + "grad_norm": 1.3796082735061646, + "learning_rate": 2.1990445506190698e-06, + "loss": 0.3348, + "step": 34897 + }, + { + "epoch": 0.6986062107449391, + "grad_norm": 1.8840426206588745, + "learning_rate": 2.1987760151672995e-06, + "loss": 0.8074, + "step": 34898 + }, + { + "epoch": 0.6986262292620674, + "grad_norm": 1.8451871871948242, + "learning_rate": 2.198507491491272e-06, + "loss": 0.7202, + "step": 34899 + }, + { + "epoch": 0.6986462477791957, + "grad_norm": 0.9178968071937561, + "learning_rate": 2.1982389795921146e-06, + "loss": 0.2487, + "step": 34900 + }, + { + "epoch": 0.6986662662963241, + "grad_norm": 1.1847071647644043, + "learning_rate": 2.197970479470959e-06, + "loss": 0.2788, + "step": 34901 + }, + { + "epoch": 0.6986862848134524, + "grad_norm": 1.3884892463684082, + "learning_rate": 2.1977019911289327e-06, + "loss": 0.2985, + "step": 34902 + }, + { + "epoch": 0.6987063033305808, + "grad_norm": 1.9938112497329712, + "learning_rate": 2.1974335145671645e-06, + "loss": 0.7208, + "step": 34903 + }, + { + "epoch": 0.6987263218477091, + "grad_norm": 1.0939549207687378, + "learning_rate": 2.197165049786782e-06, + "loss": 0.3306, + "step": 34904 + }, + { + "epoch": 0.6987463403648375, + "grad_norm": 1.1651685237884521, + "learning_rate": 2.1968965967889134e-06, + "loss": 0.3107, + "step": 34905 + }, + { + "epoch": 0.6987663588819658, + "grad_norm": 1.1164218187332153, + "learning_rate": 2.1966281555746894e-06, + "loss": 0.2875, + "step": 34906 + }, + { + "epoch": 0.6987863773990942, + "grad_norm": 1.153530240058899, + "learning_rate": 2.1963597261452357e-06, + "loss": 0.3321, + "step": 34907 + }, + { + "epoch": 0.6988063959162225, + "grad_norm": 1.0325751304626465, + "learning_rate": 2.1960913085016843e-06, + "loss": 0.263, + "step": 34908 + }, + { + "epoch": 0.6988264144333508, + "grad_norm": 1.0969265699386597, + "learning_rate": 2.19582290264516e-06, + "loss": 0.2779, + "step": 34909 + }, + { + "epoch": 0.6988464329504792, + "grad_norm": 1.1103744506835938, + "learning_rate": 2.1955545085767942e-06, + "loss": 0.3055, + "step": 34910 + }, + { + "epoch": 0.6988664514676075, + "grad_norm": 1.129831314086914, + "learning_rate": 2.1952861262977137e-06, + "loss": 0.2679, + "step": 34911 + }, + { + "epoch": 0.6988864699847359, + "grad_norm": 1.1252851486206055, + "learning_rate": 2.1950177558090468e-06, + "loss": 0.2694, + "step": 34912 + }, + { + "epoch": 0.6989064885018642, + "grad_norm": 1.2022024393081665, + "learning_rate": 2.1947493971119204e-06, + "loss": 0.2956, + "step": 34913 + }, + { + "epoch": 0.6989265070189926, + "grad_norm": 1.3202441930770874, + "learning_rate": 2.1944810502074646e-06, + "loss": 0.2802, + "step": 34914 + }, + { + "epoch": 0.6989465255361209, + "grad_norm": 1.1925972700119019, + "learning_rate": 2.194212715096808e-06, + "loss": 0.3304, + "step": 34915 + }, + { + "epoch": 0.6989665440532492, + "grad_norm": 1.1690579652786255, + "learning_rate": 2.1939443917810767e-06, + "loss": 0.3052, + "step": 34916 + }, + { + "epoch": 0.6989865625703776, + "grad_norm": 1.072572946548462, + "learning_rate": 2.193676080261399e-06, + "loss": 0.3082, + "step": 34917 + }, + { + "epoch": 0.6990065810875059, + "grad_norm": 1.7957388162612915, + "learning_rate": 2.1934077805389013e-06, + "loss": 0.7153, + "step": 34918 + }, + { + "epoch": 0.6990265996046343, + "grad_norm": 1.213101863861084, + "learning_rate": 2.1931394926147154e-06, + "loss": 0.2906, + "step": 34919 + }, + { + "epoch": 0.6990466181217626, + "grad_norm": 1.1156554222106934, + "learning_rate": 2.1928712164899653e-06, + "loss": 0.3084, + "step": 34920 + }, + { + "epoch": 0.699066636638891, + "grad_norm": 1.0054036378860474, + "learning_rate": 2.1926029521657822e-06, + "loss": 0.297, + "step": 34921 + }, + { + "epoch": 0.6990866551560193, + "grad_norm": 1.1211532354354858, + "learning_rate": 2.1923346996432905e-06, + "loss": 0.2938, + "step": 34922 + }, + { + "epoch": 0.6991066736731477, + "grad_norm": 1.2908220291137695, + "learning_rate": 2.1920664589236206e-06, + "loss": 0.3127, + "step": 34923 + }, + { + "epoch": 0.699126692190276, + "grad_norm": 1.1292386054992676, + "learning_rate": 2.1917982300079e-06, + "loss": 0.3269, + "step": 34924 + }, + { + "epoch": 0.6991467107074043, + "grad_norm": 1.0482118129730225, + "learning_rate": 2.191530012897255e-06, + "loss": 0.2811, + "step": 34925 + }, + { + "epoch": 0.6991667292245327, + "grad_norm": 1.0586055517196655, + "learning_rate": 2.191261807592813e-06, + "loss": 0.3176, + "step": 34926 + }, + { + "epoch": 0.699186747741661, + "grad_norm": 1.1488924026489258, + "learning_rate": 2.1909936140957005e-06, + "loss": 0.2635, + "step": 34927 + }, + { + "epoch": 0.6992067662587894, + "grad_norm": 1.0770550966262817, + "learning_rate": 2.190725432407048e-06, + "loss": 0.2809, + "step": 34928 + }, + { + "epoch": 0.6992267847759177, + "grad_norm": 1.0433731079101562, + "learning_rate": 2.1904572625279813e-06, + "loss": 0.2858, + "step": 34929 + }, + { + "epoch": 0.6992468032930461, + "grad_norm": 1.1443763971328735, + "learning_rate": 2.190189104459628e-06, + "loss": 0.2749, + "step": 34930 + }, + { + "epoch": 0.6992668218101744, + "grad_norm": 1.2124435901641846, + "learning_rate": 2.189920958203113e-06, + "loss": 0.3079, + "step": 34931 + }, + { + "epoch": 0.6992868403273027, + "grad_norm": 1.124448299407959, + "learning_rate": 2.189652823759567e-06, + "loss": 0.2979, + "step": 34932 + }, + { + "epoch": 0.6993068588444311, + "grad_norm": 0.9950383901596069, + "learning_rate": 2.189384701130114e-06, + "loss": 0.319, + "step": 34933 + }, + { + "epoch": 0.6993268773615594, + "grad_norm": 1.1346980333328247, + "learning_rate": 2.189116590315885e-06, + "loss": 0.2864, + "step": 34934 + }, + { + "epoch": 0.6993468958786878, + "grad_norm": 1.1935110092163086, + "learning_rate": 2.188848491318005e-06, + "loss": 0.2812, + "step": 34935 + }, + { + "epoch": 0.6993669143958161, + "grad_norm": 1.1017897129058838, + "learning_rate": 2.1885804041375986e-06, + "loss": 0.3209, + "step": 34936 + }, + { + "epoch": 0.6993869329129445, + "grad_norm": 1.0957878828048706, + "learning_rate": 2.188312328775797e-06, + "loss": 0.2962, + "step": 34937 + }, + { + "epoch": 0.6994069514300728, + "grad_norm": 1.129154920578003, + "learning_rate": 2.1880442652337256e-06, + "loss": 0.3018, + "step": 34938 + }, + { + "epoch": 0.6994269699472012, + "grad_norm": 1.3674359321594238, + "learning_rate": 2.1877762135125113e-06, + "loss": 0.332, + "step": 34939 + }, + { + "epoch": 0.6994469884643295, + "grad_norm": 1.902053713798523, + "learning_rate": 2.1875081736132782e-06, + "loss": 0.774, + "step": 34940 + }, + { + "epoch": 0.6994670069814578, + "grad_norm": 1.275307059288025, + "learning_rate": 2.1872401455371576e-06, + "loss": 0.3075, + "step": 34941 + }, + { + "epoch": 0.6994870254985862, + "grad_norm": 1.068184494972229, + "learning_rate": 2.1869721292852743e-06, + "loss": 0.2847, + "step": 34942 + }, + { + "epoch": 0.6995070440157145, + "grad_norm": 1.1398693323135376, + "learning_rate": 2.1867041248587545e-06, + "loss": 0.2954, + "step": 34943 + }, + { + "epoch": 0.6995270625328429, + "grad_norm": 1.1461732387542725, + "learning_rate": 2.1864361322587234e-06, + "loss": 0.3583, + "step": 34944 + }, + { + "epoch": 0.6995470810499712, + "grad_norm": 1.2159501314163208, + "learning_rate": 2.186168151486311e-06, + "loss": 0.2878, + "step": 34945 + }, + { + "epoch": 0.6995670995670996, + "grad_norm": 1.181235671043396, + "learning_rate": 2.18590018254264e-06, + "loss": 0.3175, + "step": 34946 + }, + { + "epoch": 0.6995871180842279, + "grad_norm": 1.1144746541976929, + "learning_rate": 2.1856322254288415e-06, + "loss": 0.3124, + "step": 34947 + }, + { + "epoch": 0.6996071366013562, + "grad_norm": 1.227871060371399, + "learning_rate": 2.1853642801460394e-06, + "loss": 0.3528, + "step": 34948 + }, + { + "epoch": 0.6996271551184846, + "grad_norm": 1.1954314708709717, + "learning_rate": 2.1850963466953583e-06, + "loss": 0.284, + "step": 34949 + }, + { + "epoch": 0.6996471736356129, + "grad_norm": 1.1351746320724487, + "learning_rate": 2.1848284250779278e-06, + "loss": 0.301, + "step": 34950 + }, + { + "epoch": 0.6996671921527413, + "grad_norm": 1.1337565183639526, + "learning_rate": 2.1845605152948736e-06, + "loss": 0.3037, + "step": 34951 + }, + { + "epoch": 0.6996872106698696, + "grad_norm": 1.1010547876358032, + "learning_rate": 2.18429261734732e-06, + "loss": 0.3158, + "step": 34952 + }, + { + "epoch": 0.699707229186998, + "grad_norm": 1.7402859926223755, + "learning_rate": 2.184024731236393e-06, + "loss": 0.7459, + "step": 34953 + }, + { + "epoch": 0.6997272477041263, + "grad_norm": 2.035494804382324, + "learning_rate": 2.183756856963222e-06, + "loss": 0.753, + "step": 34954 + }, + { + "epoch": 0.6997472662212547, + "grad_norm": 1.1947376728057861, + "learning_rate": 2.183488994528931e-06, + "loss": 0.3359, + "step": 34955 + }, + { + "epoch": 0.699767284738383, + "grad_norm": 1.1856341361999512, + "learning_rate": 2.183221143934646e-06, + "loss": 0.3339, + "step": 34956 + }, + { + "epoch": 0.6997873032555113, + "grad_norm": 1.0626660585403442, + "learning_rate": 2.1829533051814933e-06, + "loss": 0.3264, + "step": 34957 + }, + { + "epoch": 0.6998073217726397, + "grad_norm": 1.1108165979385376, + "learning_rate": 2.182685478270597e-06, + "loss": 0.2509, + "step": 34958 + }, + { + "epoch": 0.699827340289768, + "grad_norm": 1.0400221347808838, + "learning_rate": 2.1824176632030845e-06, + "loss": 0.2829, + "step": 34959 + }, + { + "epoch": 0.6998473588068964, + "grad_norm": 1.3656272888183594, + "learning_rate": 2.182149859980083e-06, + "loss": 0.3323, + "step": 34960 + }, + { + "epoch": 0.6998673773240247, + "grad_norm": 1.1177140474319458, + "learning_rate": 2.181882068602718e-06, + "loss": 0.3129, + "step": 34961 + }, + { + "epoch": 0.6998873958411531, + "grad_norm": 1.0334242582321167, + "learning_rate": 2.1816142890721126e-06, + "loss": 0.2741, + "step": 34962 + }, + { + "epoch": 0.6999074143582814, + "grad_norm": 1.2609162330627441, + "learning_rate": 2.1813465213893953e-06, + "loss": 0.3295, + "step": 34963 + }, + { + "epoch": 0.6999274328754097, + "grad_norm": 1.3918098211288452, + "learning_rate": 2.181078765555691e-06, + "loss": 0.3027, + "step": 34964 + }, + { + "epoch": 0.6999474513925381, + "grad_norm": 1.158398985862732, + "learning_rate": 2.180811021572125e-06, + "loss": 0.3197, + "step": 34965 + }, + { + "epoch": 0.6999674699096664, + "grad_norm": 1.0169497728347778, + "learning_rate": 2.180543289439821e-06, + "loss": 0.3071, + "step": 34966 + }, + { + "epoch": 0.6999874884267948, + "grad_norm": 1.1840029954910278, + "learning_rate": 2.1802755691599074e-06, + "loss": 0.2882, + "step": 34967 + }, + { + "epoch": 0.7000075069439231, + "grad_norm": 1.29901123046875, + "learning_rate": 2.180007860733509e-06, + "loss": 0.3732, + "step": 34968 + }, + { + "epoch": 0.7000275254610515, + "grad_norm": 1.0827420949935913, + "learning_rate": 2.179740164161751e-06, + "loss": 0.272, + "step": 34969 + }, + { + "epoch": 0.7000475439781798, + "grad_norm": 1.0672129392623901, + "learning_rate": 2.1794724794457577e-06, + "loss": 0.2814, + "step": 34970 + }, + { + "epoch": 0.7000675624953082, + "grad_norm": 1.0710065364837646, + "learning_rate": 2.179204806586654e-06, + "loss": 0.3391, + "step": 34971 + }, + { + "epoch": 0.7000875810124365, + "grad_norm": 0.9824265241622925, + "learning_rate": 2.1789371455855657e-06, + "loss": 0.2737, + "step": 34972 + }, + { + "epoch": 0.7001075995295648, + "grad_norm": 1.1614633798599243, + "learning_rate": 2.1786694964436207e-06, + "loss": 0.3204, + "step": 34973 + }, + { + "epoch": 0.7001276180466932, + "grad_norm": 1.0598796606063843, + "learning_rate": 2.178401859161942e-06, + "loss": 0.2819, + "step": 34974 + }, + { + "epoch": 0.7001476365638215, + "grad_norm": 1.1247602701187134, + "learning_rate": 2.1781342337416527e-06, + "loss": 0.2897, + "step": 34975 + }, + { + "epoch": 0.7001676550809499, + "grad_norm": 1.0181962251663208, + "learning_rate": 2.1778666201838815e-06, + "loss": 0.291, + "step": 34976 + }, + { + "epoch": 0.7001876735980782, + "grad_norm": 1.0734134912490845, + "learning_rate": 2.177599018489752e-06, + "loss": 0.3037, + "step": 34977 + }, + { + "epoch": 0.7002076921152066, + "grad_norm": 1.0662084817886353, + "learning_rate": 2.1773314286603888e-06, + "loss": 0.2512, + "step": 34978 + }, + { + "epoch": 0.7002277106323349, + "grad_norm": 1.038100004196167, + "learning_rate": 2.1770638506969165e-06, + "loss": 0.2641, + "step": 34979 + }, + { + "epoch": 0.7002477291494632, + "grad_norm": 1.023712158203125, + "learning_rate": 2.176796284600459e-06, + "loss": 0.2686, + "step": 34980 + }, + { + "epoch": 0.7002677476665916, + "grad_norm": 1.093148946762085, + "learning_rate": 2.176528730372144e-06, + "loss": 0.2995, + "step": 34981 + }, + { + "epoch": 0.7002877661837199, + "grad_norm": 1.0832117795944214, + "learning_rate": 2.176261188013094e-06, + "loss": 0.2868, + "step": 34982 + }, + { + "epoch": 0.7003077847008483, + "grad_norm": 1.025234580039978, + "learning_rate": 2.175993657524435e-06, + "loss": 0.2764, + "step": 34983 + }, + { + "epoch": 0.7003278032179766, + "grad_norm": 1.0967735052108765, + "learning_rate": 2.1757261389072887e-06, + "loss": 0.276, + "step": 34984 + }, + { + "epoch": 0.700347821735105, + "grad_norm": 1.0849711894989014, + "learning_rate": 2.175458632162782e-06, + "loss": 0.2642, + "step": 34985 + }, + { + "epoch": 0.7003678402522333, + "grad_norm": 1.793260931968689, + "learning_rate": 2.1751911372920416e-06, + "loss": 0.7425, + "step": 34986 + }, + { + "epoch": 0.7003878587693616, + "grad_norm": 1.1612745523452759, + "learning_rate": 2.1749236542961894e-06, + "loss": 0.2704, + "step": 34987 + }, + { + "epoch": 0.70040787728649, + "grad_norm": 1.132369041442871, + "learning_rate": 2.1746561831763502e-06, + "loss": 0.2728, + "step": 34988 + }, + { + "epoch": 0.7004278958036183, + "grad_norm": 1.8398746252059937, + "learning_rate": 2.1743887239336465e-06, + "loss": 0.704, + "step": 34989 + }, + { + "epoch": 0.7004479143207467, + "grad_norm": 1.129321575164795, + "learning_rate": 2.174121276569206e-06, + "loss": 0.2846, + "step": 34990 + }, + { + "epoch": 0.700467932837875, + "grad_norm": 1.0737700462341309, + "learning_rate": 2.173853841084152e-06, + "loss": 0.3024, + "step": 34991 + }, + { + "epoch": 0.7004879513550034, + "grad_norm": 1.0977839231491089, + "learning_rate": 2.1735864174796083e-06, + "loss": 0.2876, + "step": 34992 + }, + { + "epoch": 0.7005079698721317, + "grad_norm": 1.2537988424301147, + "learning_rate": 2.173319005756697e-06, + "loss": 0.2763, + "step": 34993 + }, + { + "epoch": 0.7005279883892601, + "grad_norm": 1.0051066875457764, + "learning_rate": 2.1730516059165465e-06, + "loss": 0.3062, + "step": 34994 + }, + { + "epoch": 0.7005480069063884, + "grad_norm": 1.0970290899276733, + "learning_rate": 2.1727842179602783e-06, + "loss": 0.2669, + "step": 34995 + }, + { + "epoch": 0.7005680254235167, + "grad_norm": 1.1944305896759033, + "learning_rate": 2.172516841889017e-06, + "loss": 0.2826, + "step": 34996 + }, + { + "epoch": 0.7005880439406451, + "grad_norm": 1.263611078262329, + "learning_rate": 2.1722494777038843e-06, + "loss": 0.2747, + "step": 34997 + }, + { + "epoch": 0.7006080624577734, + "grad_norm": 1.24427330493927, + "learning_rate": 2.171982125406007e-06, + "loss": 0.2972, + "step": 34998 + }, + { + "epoch": 0.7006280809749018, + "grad_norm": 1.068117618560791, + "learning_rate": 2.1717147849965094e-06, + "loss": 0.3156, + "step": 34999 + }, + { + "epoch": 0.7006480994920301, + "grad_norm": 1.121668815612793, + "learning_rate": 2.1714474564765143e-06, + "loss": 0.3439, + "step": 35000 + }, + { + "epoch": 0.7006681180091585, + "grad_norm": 1.9313102960586548, + "learning_rate": 2.1711801398471453e-06, + "loss": 0.8062, + "step": 35001 + }, + { + "epoch": 0.7006881365262868, + "grad_norm": 1.1650958061218262, + "learning_rate": 2.170912835109525e-06, + "loss": 0.3182, + "step": 35002 + }, + { + "epoch": 0.7007081550434151, + "grad_norm": 1.1740676164627075, + "learning_rate": 2.1706455422647794e-06, + "loss": 0.2985, + "step": 35003 + }, + { + "epoch": 0.7007281735605435, + "grad_norm": 0.9383339285850525, + "learning_rate": 2.1703782613140314e-06, + "loss": 0.2597, + "step": 35004 + }, + { + "epoch": 0.7007481920776718, + "grad_norm": 1.1237915754318237, + "learning_rate": 2.170110992258404e-06, + "loss": 0.2906, + "step": 35005 + }, + { + "epoch": 0.7007682105948002, + "grad_norm": 1.2438958883285522, + "learning_rate": 2.1698437350990193e-06, + "loss": 0.3234, + "step": 35006 + }, + { + "epoch": 0.7007882291119285, + "grad_norm": 1.2481086254119873, + "learning_rate": 2.169576489837004e-06, + "loss": 0.3461, + "step": 35007 + }, + { + "epoch": 0.7008082476290569, + "grad_norm": 1.005879282951355, + "learning_rate": 2.1693092564734807e-06, + "loss": 0.2551, + "step": 35008 + }, + { + "epoch": 0.7008282661461852, + "grad_norm": 1.100955843925476, + "learning_rate": 2.169042035009572e-06, + "loss": 0.2746, + "step": 35009 + }, + { + "epoch": 0.7008482846633136, + "grad_norm": 2.144601821899414, + "learning_rate": 2.168774825446399e-06, + "loss": 0.786, + "step": 35010 + }, + { + "epoch": 0.7008683031804419, + "grad_norm": 1.1588084697723389, + "learning_rate": 2.168507627785088e-06, + "loss": 0.3337, + "step": 35011 + }, + { + "epoch": 0.7008883216975702, + "grad_norm": 1.1122504472732544, + "learning_rate": 2.1682404420267626e-06, + "loss": 0.3187, + "step": 35012 + }, + { + "epoch": 0.7009083402146986, + "grad_norm": 1.0189155340194702, + "learning_rate": 2.1679732681725455e-06, + "loss": 0.3062, + "step": 35013 + }, + { + "epoch": 0.7009283587318269, + "grad_norm": 1.0649975538253784, + "learning_rate": 2.1677061062235595e-06, + "loss": 0.2869, + "step": 35014 + }, + { + "epoch": 0.7009483772489553, + "grad_norm": 1.2623038291931152, + "learning_rate": 2.1674389561809255e-06, + "loss": 0.2844, + "step": 35015 + }, + { + "epoch": 0.7009683957660836, + "grad_norm": 1.1443872451782227, + "learning_rate": 2.16717181804577e-06, + "loss": 0.2958, + "step": 35016 + }, + { + "epoch": 0.700988414283212, + "grad_norm": 1.0073649883270264, + "learning_rate": 2.1669046918192157e-06, + "loss": 0.2985, + "step": 35017 + }, + { + "epoch": 0.7010084328003403, + "grad_norm": 1.0572795867919922, + "learning_rate": 2.166637577502383e-06, + "loss": 0.2886, + "step": 35018 + }, + { + "epoch": 0.7010284513174686, + "grad_norm": 1.0896743535995483, + "learning_rate": 2.1663704750963973e-06, + "loss": 0.2458, + "step": 35019 + }, + { + "epoch": 0.701048469834597, + "grad_norm": 1.9028490781784058, + "learning_rate": 2.1661033846023775e-06, + "loss": 0.757, + "step": 35020 + }, + { + "epoch": 0.7010684883517253, + "grad_norm": 1.0788987874984741, + "learning_rate": 2.1658363060214514e-06, + "loss": 0.2889, + "step": 35021 + }, + { + "epoch": 0.7010885068688537, + "grad_norm": 1.116417407989502, + "learning_rate": 2.1655692393547395e-06, + "loss": 0.3344, + "step": 35022 + }, + { + "epoch": 0.701108525385982, + "grad_norm": 1.1770973205566406, + "learning_rate": 2.1653021846033633e-06, + "loss": 0.3004, + "step": 35023 + }, + { + "epoch": 0.7011285439031104, + "grad_norm": 1.1453343629837036, + "learning_rate": 2.1650351417684464e-06, + "loss": 0.3049, + "step": 35024 + }, + { + "epoch": 0.7011485624202387, + "grad_norm": 0.9993764162063599, + "learning_rate": 2.1647681108511133e-06, + "loss": 0.3192, + "step": 35025 + }, + { + "epoch": 0.7011685809373671, + "grad_norm": 1.106139063835144, + "learning_rate": 2.1645010918524845e-06, + "loss": 0.3269, + "step": 35026 + }, + { + "epoch": 0.7011885994544954, + "grad_norm": 1.223889946937561, + "learning_rate": 2.1642340847736836e-06, + "loss": 0.3228, + "step": 35027 + }, + { + "epoch": 0.7012086179716237, + "grad_norm": 1.1011216640472412, + "learning_rate": 2.16396708961583e-06, + "loss": 0.2475, + "step": 35028 + }, + { + "epoch": 0.7012286364887521, + "grad_norm": 0.9891629815101624, + "learning_rate": 2.1637001063800504e-06, + "loss": 0.2999, + "step": 35029 + }, + { + "epoch": 0.7012486550058804, + "grad_norm": 1.1771912574768066, + "learning_rate": 2.1634331350674653e-06, + "loss": 0.2963, + "step": 35030 + }, + { + "epoch": 0.7012686735230088, + "grad_norm": 1.9637876749038696, + "learning_rate": 2.1631661756791973e-06, + "loss": 0.6946, + "step": 35031 + }, + { + "epoch": 0.7012886920401371, + "grad_norm": 1.090822696685791, + "learning_rate": 2.1628992282163676e-06, + "loss": 0.3096, + "step": 35032 + }, + { + "epoch": 0.7013087105572655, + "grad_norm": 1.3757580518722534, + "learning_rate": 2.1626322926800972e-06, + "loss": 0.3034, + "step": 35033 + }, + { + "epoch": 0.7013287290743938, + "grad_norm": 1.0564631223678589, + "learning_rate": 2.1623653690715123e-06, + "loss": 0.2785, + "step": 35034 + }, + { + "epoch": 0.7013487475915221, + "grad_norm": 1.166854739189148, + "learning_rate": 2.1620984573917327e-06, + "loss": 0.3049, + "step": 35035 + }, + { + "epoch": 0.7013687661086505, + "grad_norm": 1.1400989294052124, + "learning_rate": 2.161831557641878e-06, + "loss": 0.3191, + "step": 35036 + }, + { + "epoch": 0.7013887846257788, + "grad_norm": 1.016613245010376, + "learning_rate": 2.161564669823073e-06, + "loss": 0.2757, + "step": 35037 + }, + { + "epoch": 0.7014088031429072, + "grad_norm": 1.1621720790863037, + "learning_rate": 2.161297793936441e-06, + "loss": 0.3377, + "step": 35038 + }, + { + "epoch": 0.7014288216600355, + "grad_norm": 1.081021785736084, + "learning_rate": 2.1610309299831024e-06, + "loss": 0.2824, + "step": 35039 + }, + { + "epoch": 0.7014488401771639, + "grad_norm": 1.1051025390625, + "learning_rate": 2.1607640779641786e-06, + "loss": 0.3057, + "step": 35040 + }, + { + "epoch": 0.7014688586942922, + "grad_norm": 1.1804478168487549, + "learning_rate": 2.1604972378807922e-06, + "loss": 0.2857, + "step": 35041 + }, + { + "epoch": 0.7014888772114206, + "grad_norm": 1.1871505975723267, + "learning_rate": 2.1602304097340614e-06, + "loss": 0.3164, + "step": 35042 + }, + { + "epoch": 0.7015088957285489, + "grad_norm": 1.1137781143188477, + "learning_rate": 2.1599635935251135e-06, + "loss": 0.2865, + "step": 35043 + }, + { + "epoch": 0.7015289142456772, + "grad_norm": 1.1107596158981323, + "learning_rate": 2.1596967892550674e-06, + "loss": 0.301, + "step": 35044 + }, + { + "epoch": 0.7015489327628056, + "grad_norm": 1.0155129432678223, + "learning_rate": 2.1594299969250447e-06, + "loss": 0.2758, + "step": 35045 + }, + { + "epoch": 0.7015689512799339, + "grad_norm": 1.0384395122528076, + "learning_rate": 2.1591632165361646e-06, + "loss": 0.2795, + "step": 35046 + }, + { + "epoch": 0.7015889697970623, + "grad_norm": 1.2558023929595947, + "learning_rate": 2.1588964480895534e-06, + "loss": 0.3012, + "step": 35047 + }, + { + "epoch": 0.7016089883141906, + "grad_norm": 1.2605031728744507, + "learning_rate": 2.1586296915863304e-06, + "loss": 0.3338, + "step": 35048 + }, + { + "epoch": 0.701629006831319, + "grad_norm": 1.0291049480438232, + "learning_rate": 2.1583629470276144e-06, + "loss": 0.2764, + "step": 35049 + }, + { + "epoch": 0.7016490253484473, + "grad_norm": 0.983870267868042, + "learning_rate": 2.1580962144145313e-06, + "loss": 0.2576, + "step": 35050 + }, + { + "epoch": 0.7016690438655756, + "grad_norm": 1.0341414213180542, + "learning_rate": 2.1578294937481974e-06, + "loss": 0.3144, + "step": 35051 + }, + { + "epoch": 0.701689062382704, + "grad_norm": 1.2087074518203735, + "learning_rate": 2.1575627850297392e-06, + "loss": 0.3311, + "step": 35052 + }, + { + "epoch": 0.7017090808998323, + "grad_norm": 1.1113289594650269, + "learning_rate": 2.1572960882602757e-06, + "loss": 0.3243, + "step": 35053 + }, + { + "epoch": 0.7017290994169607, + "grad_norm": 1.8534961938858032, + "learning_rate": 2.1570294034409267e-06, + "loss": 0.799, + "step": 35054 + }, + { + "epoch": 0.701749117934089, + "grad_norm": 1.1144214868545532, + "learning_rate": 2.156762730572813e-06, + "loss": 0.3086, + "step": 35055 + }, + { + "epoch": 0.7017691364512174, + "grad_norm": 1.1300334930419922, + "learning_rate": 2.156496069657059e-06, + "loss": 0.2959, + "step": 35056 + }, + { + "epoch": 0.7017891549683457, + "grad_norm": 1.2812623977661133, + "learning_rate": 2.1562294206947832e-06, + "loss": 0.2873, + "step": 35057 + }, + { + "epoch": 0.7018091734854741, + "grad_norm": 1.0450125932693481, + "learning_rate": 2.155962783687107e-06, + "loss": 0.2615, + "step": 35058 + }, + { + "epoch": 0.7018291920026024, + "grad_norm": 1.8401541709899902, + "learning_rate": 2.1556961586351496e-06, + "loss": 0.7784, + "step": 35059 + }, + { + "epoch": 0.7018492105197307, + "grad_norm": 1.073383092880249, + "learning_rate": 2.155429545540035e-06, + "loss": 0.317, + "step": 35060 + }, + { + "epoch": 0.7018692290368591, + "grad_norm": 1.0859755277633667, + "learning_rate": 2.1551629444028827e-06, + "loss": 0.2912, + "step": 35061 + }, + { + "epoch": 0.7018892475539874, + "grad_norm": 1.1128495931625366, + "learning_rate": 2.154896355224811e-06, + "loss": 0.2941, + "step": 35062 + }, + { + "epoch": 0.7019092660711158, + "grad_norm": 1.0793424844741821, + "learning_rate": 2.1546297780069447e-06, + "loss": 0.2791, + "step": 35063 + }, + { + "epoch": 0.7019292845882441, + "grad_norm": 1.2143758535385132, + "learning_rate": 2.1543632127504005e-06, + "loss": 0.2948, + "step": 35064 + }, + { + "epoch": 0.7019493031053725, + "grad_norm": 1.2252779006958008, + "learning_rate": 2.1540966594563027e-06, + "loss": 0.3397, + "step": 35065 + }, + { + "epoch": 0.7019693216225008, + "grad_norm": 1.2313393354415894, + "learning_rate": 2.153830118125771e-06, + "loss": 0.345, + "step": 35066 + }, + { + "epoch": 0.7019893401396291, + "grad_norm": 1.0896215438842773, + "learning_rate": 2.153563588759924e-06, + "loss": 0.2792, + "step": 35067 + }, + { + "epoch": 0.7020093586567575, + "grad_norm": 1.019566535949707, + "learning_rate": 2.1532970713598817e-06, + "loss": 0.3247, + "step": 35068 + }, + { + "epoch": 0.7020293771738858, + "grad_norm": 1.2755848169326782, + "learning_rate": 2.1530305659267673e-06, + "loss": 0.2866, + "step": 35069 + }, + { + "epoch": 0.7020493956910142, + "grad_norm": 1.2460706233978271, + "learning_rate": 2.1527640724617004e-06, + "loss": 0.3053, + "step": 35070 + }, + { + "epoch": 0.7020694142081425, + "grad_norm": 1.0863558053970337, + "learning_rate": 2.1524975909658e-06, + "loss": 0.2682, + "step": 35071 + }, + { + "epoch": 0.7020894327252709, + "grad_norm": 1.1503409147262573, + "learning_rate": 2.1522311214401874e-06, + "loss": 0.3001, + "step": 35072 + }, + { + "epoch": 0.7021094512423992, + "grad_norm": 1.0053452253341675, + "learning_rate": 2.1519646638859805e-06, + "loss": 0.2824, + "step": 35073 + }, + { + "epoch": 0.7021294697595276, + "grad_norm": 1.1775721311569214, + "learning_rate": 2.1516982183043027e-06, + "loss": 0.3008, + "step": 35074 + }, + { + "epoch": 0.7021494882766559, + "grad_norm": 1.0986876487731934, + "learning_rate": 2.151431784696271e-06, + "loss": 0.3199, + "step": 35075 + }, + { + "epoch": 0.7021695067937842, + "grad_norm": 1.0287187099456787, + "learning_rate": 2.151165363063009e-06, + "loss": 0.3266, + "step": 35076 + }, + { + "epoch": 0.7021895253109126, + "grad_norm": 1.1972992420196533, + "learning_rate": 2.150898953405633e-06, + "loss": 0.3304, + "step": 35077 + }, + { + "epoch": 0.7022095438280409, + "grad_norm": 1.0447949171066284, + "learning_rate": 2.1506325557252663e-06, + "loss": 0.3143, + "step": 35078 + }, + { + "epoch": 0.7022295623451693, + "grad_norm": 1.0298559665679932, + "learning_rate": 2.150366170023027e-06, + "loss": 0.3039, + "step": 35079 + }, + { + "epoch": 0.7022495808622976, + "grad_norm": 1.2083743810653687, + "learning_rate": 2.1500997963000352e-06, + "loss": 0.3433, + "step": 35080 + }, + { + "epoch": 0.702269599379426, + "grad_norm": 1.221640706062317, + "learning_rate": 2.149833434557411e-06, + "loss": 0.3113, + "step": 35081 + }, + { + "epoch": 0.7022896178965543, + "grad_norm": 1.155276894569397, + "learning_rate": 2.1495670847962713e-06, + "loss": 0.302, + "step": 35082 + }, + { + "epoch": 0.7023096364136826, + "grad_norm": 1.2292202711105347, + "learning_rate": 2.14930074701774e-06, + "loss": 0.293, + "step": 35083 + }, + { + "epoch": 0.702329654930811, + "grad_norm": 0.9550153017044067, + "learning_rate": 2.149034421222935e-06, + "loss": 0.2522, + "step": 35084 + }, + { + "epoch": 0.7023496734479393, + "grad_norm": 1.1837074756622314, + "learning_rate": 2.1487681074129757e-06, + "loss": 0.3155, + "step": 35085 + }, + { + "epoch": 0.7023696919650677, + "grad_norm": 1.203731894493103, + "learning_rate": 2.14850180558898e-06, + "loss": 0.3108, + "step": 35086 + }, + { + "epoch": 0.702389710482196, + "grad_norm": 1.2021210193634033, + "learning_rate": 2.1482355157520706e-06, + "loss": 0.2588, + "step": 35087 + }, + { + "epoch": 0.7024097289993244, + "grad_norm": 1.33333420753479, + "learning_rate": 2.147969237903364e-06, + "loss": 0.3319, + "step": 35088 + }, + { + "epoch": 0.7024297475164527, + "grad_norm": 1.157529354095459, + "learning_rate": 2.147702972043982e-06, + "loss": 0.3135, + "step": 35089 + }, + { + "epoch": 0.7024497660335811, + "grad_norm": 1.1650257110595703, + "learning_rate": 2.1474367181750423e-06, + "loss": 0.3329, + "step": 35090 + }, + { + "epoch": 0.7024697845507094, + "grad_norm": 1.0493264198303223, + "learning_rate": 2.147170476297666e-06, + "loss": 0.2946, + "step": 35091 + }, + { + "epoch": 0.7024898030678377, + "grad_norm": 1.079961895942688, + "learning_rate": 2.146904246412971e-06, + "loss": 0.2711, + "step": 35092 + }, + { + "epoch": 0.7025098215849661, + "grad_norm": 1.1993427276611328, + "learning_rate": 2.146638028522076e-06, + "loss": 0.2825, + "step": 35093 + }, + { + "epoch": 0.7025298401020944, + "grad_norm": 1.788763165473938, + "learning_rate": 2.146371822626101e-06, + "loss": 0.7728, + "step": 35094 + }, + { + "epoch": 0.7025498586192228, + "grad_norm": 1.1019885540008545, + "learning_rate": 2.1461056287261632e-06, + "loss": 0.2697, + "step": 35095 + }, + { + "epoch": 0.7025698771363511, + "grad_norm": 1.1118754148483276, + "learning_rate": 2.145839446823385e-06, + "loss": 0.3019, + "step": 35096 + }, + { + "epoch": 0.7025898956534795, + "grad_norm": 1.1926212310791016, + "learning_rate": 2.1455732769188837e-06, + "loss": 0.3048, + "step": 35097 + }, + { + "epoch": 0.7026099141706078, + "grad_norm": 1.0884640216827393, + "learning_rate": 2.1453071190137775e-06, + "loss": 0.3228, + "step": 35098 + }, + { + "epoch": 0.7026299326877361, + "grad_norm": 1.1759966611862183, + "learning_rate": 2.1450409731091843e-06, + "loss": 0.2887, + "step": 35099 + }, + { + "epoch": 0.7026499512048645, + "grad_norm": 1.0044407844543457, + "learning_rate": 2.1447748392062268e-06, + "loss": 0.2806, + "step": 35100 + }, + { + "epoch": 0.7026699697219928, + "grad_norm": 1.0943328142166138, + "learning_rate": 2.1445087173060187e-06, + "loss": 0.3107, + "step": 35101 + }, + { + "epoch": 0.7026899882391212, + "grad_norm": 1.1551241874694824, + "learning_rate": 2.144242607409684e-06, + "loss": 0.3142, + "step": 35102 + }, + { + "epoch": 0.7027100067562495, + "grad_norm": 1.8049204349517822, + "learning_rate": 2.1439765095183384e-06, + "loss": 0.7302, + "step": 35103 + }, + { + "epoch": 0.7027300252733779, + "grad_norm": 1.1243044137954712, + "learning_rate": 2.1437104236330992e-06, + "loss": 0.2791, + "step": 35104 + }, + { + "epoch": 0.7027500437905062, + "grad_norm": 1.3324835300445557, + "learning_rate": 2.1434443497550883e-06, + "loss": 0.2817, + "step": 35105 + }, + { + "epoch": 0.7027700623076346, + "grad_norm": 1.23029363155365, + "learning_rate": 2.143178287885423e-06, + "loss": 0.3567, + "step": 35106 + }, + { + "epoch": 0.7027900808247629, + "grad_norm": 1.1326688528060913, + "learning_rate": 2.142912238025221e-06, + "loss": 0.3051, + "step": 35107 + }, + { + "epoch": 0.7028100993418912, + "grad_norm": 1.093529462814331, + "learning_rate": 2.1426462001756e-06, + "loss": 0.327, + "step": 35108 + }, + { + "epoch": 0.7028301178590196, + "grad_norm": 1.1514278650283813, + "learning_rate": 2.1423801743376806e-06, + "loss": 0.2833, + "step": 35109 + }, + { + "epoch": 0.7028501363761479, + "grad_norm": 1.0686662197113037, + "learning_rate": 2.14211416051258e-06, + "loss": 0.3219, + "step": 35110 + }, + { + "epoch": 0.7028701548932763, + "grad_norm": 1.215469241142273, + "learning_rate": 2.1418481587014165e-06, + "loss": 0.2871, + "step": 35111 + }, + { + "epoch": 0.7028901734104046, + "grad_norm": 1.1206239461898804, + "learning_rate": 2.1415821689053068e-06, + "loss": 0.2882, + "step": 35112 + }, + { + "epoch": 0.702910191927533, + "grad_norm": 1.1946083307266235, + "learning_rate": 2.141316191125372e-06, + "loss": 0.2986, + "step": 35113 + }, + { + "epoch": 0.7029302104446613, + "grad_norm": 1.1865379810333252, + "learning_rate": 2.141050225362727e-06, + "loss": 0.3022, + "step": 35114 + }, + { + "epoch": 0.7029502289617896, + "grad_norm": 1.9778656959533691, + "learning_rate": 2.1407842716184934e-06, + "loss": 0.7496, + "step": 35115 + }, + { + "epoch": 0.702970247478918, + "grad_norm": 1.102940320968628, + "learning_rate": 2.1405183298937875e-06, + "loss": 0.2673, + "step": 35116 + }, + { + "epoch": 0.7029902659960463, + "grad_norm": 1.0167971849441528, + "learning_rate": 2.140252400189725e-06, + "loss": 0.323, + "step": 35117 + }, + { + "epoch": 0.7030102845131747, + "grad_norm": 1.135430097579956, + "learning_rate": 2.1399864825074284e-06, + "loss": 0.3002, + "step": 35118 + }, + { + "epoch": 0.703030303030303, + "grad_norm": 1.1259090900421143, + "learning_rate": 2.1397205768480127e-06, + "loss": 0.3241, + "step": 35119 + }, + { + "epoch": 0.7030503215474314, + "grad_norm": 1.1219085454940796, + "learning_rate": 2.139454683212596e-06, + "loss": 0.2951, + "step": 35120 + }, + { + "epoch": 0.7030703400645597, + "grad_norm": 1.127718210220337, + "learning_rate": 2.1391888016022948e-06, + "loss": 0.3223, + "step": 35121 + }, + { + "epoch": 0.7030903585816881, + "grad_norm": 1.1064361333847046, + "learning_rate": 2.1389229320182304e-06, + "loss": 0.3346, + "step": 35122 + }, + { + "epoch": 0.7031103770988164, + "grad_norm": 1.107620358467102, + "learning_rate": 2.1386570744615176e-06, + "loss": 0.3301, + "step": 35123 + }, + { + "epoch": 0.7031303956159447, + "grad_norm": 1.2364214658737183, + "learning_rate": 2.138391228933275e-06, + "loss": 0.2882, + "step": 35124 + }, + { + "epoch": 0.7031504141330731, + "grad_norm": 1.2034963369369507, + "learning_rate": 2.1381253954346183e-06, + "loss": 0.3019, + "step": 35125 + }, + { + "epoch": 0.7031704326502014, + "grad_norm": 1.9734714031219482, + "learning_rate": 2.1378595739666676e-06, + "loss": 0.7606, + "step": 35126 + }, + { + "epoch": 0.7031904511673298, + "grad_norm": 1.821600317955017, + "learning_rate": 2.1375937645305377e-06, + "loss": 0.8, + "step": 35127 + }, + { + "epoch": 0.7032104696844581, + "grad_norm": 1.042137861251831, + "learning_rate": 2.13732796712735e-06, + "loss": 0.3132, + "step": 35128 + }, + { + "epoch": 0.7032304882015865, + "grad_norm": 1.1191127300262451, + "learning_rate": 2.1370621817582188e-06, + "loss": 0.3359, + "step": 35129 + }, + { + "epoch": 0.7032505067187148, + "grad_norm": 1.1828513145446777, + "learning_rate": 2.136796408424261e-06, + "loss": 0.2973, + "step": 35130 + }, + { + "epoch": 0.7032705252358431, + "grad_norm": 1.0762752294540405, + "learning_rate": 2.1365306471265964e-06, + "loss": 0.3013, + "step": 35131 + }, + { + "epoch": 0.7032905437529715, + "grad_norm": 1.0369352102279663, + "learning_rate": 2.1362648978663404e-06, + "loss": 0.2934, + "step": 35132 + }, + { + "epoch": 0.7033105622700998, + "grad_norm": 1.0839197635650635, + "learning_rate": 2.1359991606446106e-06, + "loss": 0.2904, + "step": 35133 + }, + { + "epoch": 0.7033305807872282, + "grad_norm": 1.0523946285247803, + "learning_rate": 2.135733435462524e-06, + "loss": 0.266, + "step": 35134 + }, + { + "epoch": 0.7033505993043565, + "grad_norm": 1.8401602506637573, + "learning_rate": 2.1354677223211963e-06, + "loss": 0.7754, + "step": 35135 + }, + { + "epoch": 0.7033706178214849, + "grad_norm": 1.0974363088607788, + "learning_rate": 2.1352020212217472e-06, + "loss": 0.3001, + "step": 35136 + }, + { + "epoch": 0.7033906363386132, + "grad_norm": 1.0722534656524658, + "learning_rate": 2.1349363321652927e-06, + "loss": 0.2976, + "step": 35137 + }, + { + "epoch": 0.7034106548557416, + "grad_norm": 1.3623448610305786, + "learning_rate": 2.134670655152947e-06, + "loss": 0.3408, + "step": 35138 + }, + { + "epoch": 0.7034306733728699, + "grad_norm": 1.1887693405151367, + "learning_rate": 2.134404990185831e-06, + "loss": 0.2886, + "step": 35139 + }, + { + "epoch": 0.7034506918899982, + "grad_norm": 1.3302115201950073, + "learning_rate": 2.134139337265058e-06, + "loss": 0.3057, + "step": 35140 + }, + { + "epoch": 0.7034707104071266, + "grad_norm": 1.9424484968185425, + "learning_rate": 2.133873696391749e-06, + "loss": 0.6921, + "step": 35141 + }, + { + "epoch": 0.7034907289242549, + "grad_norm": 1.0168462991714478, + "learning_rate": 2.1336080675670173e-06, + "loss": 0.2592, + "step": 35142 + }, + { + "epoch": 0.7035107474413833, + "grad_norm": 1.211329460144043, + "learning_rate": 2.133342450791979e-06, + "loss": 0.3128, + "step": 35143 + }, + { + "epoch": 0.7035307659585116, + "grad_norm": 1.0102614164352417, + "learning_rate": 2.133076846067754e-06, + "loss": 0.2687, + "step": 35144 + }, + { + "epoch": 0.70355078447564, + "grad_norm": 1.151584267616272, + "learning_rate": 2.132811253395457e-06, + "loss": 0.2848, + "step": 35145 + }, + { + "epoch": 0.7035708029927683, + "grad_norm": 1.1027567386627197, + "learning_rate": 2.1325456727762045e-06, + "loss": 0.3143, + "step": 35146 + }, + { + "epoch": 0.7035908215098966, + "grad_norm": 1.1939396858215332, + "learning_rate": 2.1322801042111125e-06, + "loss": 0.3027, + "step": 35147 + }, + { + "epoch": 0.703610840027025, + "grad_norm": 1.0848362445831299, + "learning_rate": 2.1320145477012967e-06, + "loss": 0.3012, + "step": 35148 + }, + { + "epoch": 0.7036308585441533, + "grad_norm": 1.3456263542175293, + "learning_rate": 2.1317490032478756e-06, + "loss": 0.2814, + "step": 35149 + }, + { + "epoch": 0.7036508770612817, + "grad_norm": 0.9342580437660217, + "learning_rate": 2.1314834708519646e-06, + "loss": 0.2423, + "step": 35150 + }, + { + "epoch": 0.70367089557841, + "grad_norm": 1.1925156116485596, + "learning_rate": 2.131217950514678e-06, + "loss": 0.2642, + "step": 35151 + }, + { + "epoch": 0.7036909140955384, + "grad_norm": 1.1861469745635986, + "learning_rate": 2.130952442237136e-06, + "loss": 0.3024, + "step": 35152 + }, + { + "epoch": 0.7037109326126667, + "grad_norm": 1.1203287839889526, + "learning_rate": 2.13068694602045e-06, + "loss": 0.3132, + "step": 35153 + }, + { + "epoch": 0.7037309511297951, + "grad_norm": 1.1736197471618652, + "learning_rate": 2.130421461865741e-06, + "loss": 0.3196, + "step": 35154 + }, + { + "epoch": 0.7037509696469234, + "grad_norm": 1.062703013420105, + "learning_rate": 2.1301559897741223e-06, + "loss": 0.2713, + "step": 35155 + }, + { + "epoch": 0.7037709881640517, + "grad_norm": 1.171895146369934, + "learning_rate": 2.1298905297467103e-06, + "loss": 0.3164, + "step": 35156 + }, + { + "epoch": 0.7037910066811801, + "grad_norm": 1.0926622152328491, + "learning_rate": 2.129625081784619e-06, + "loss": 0.2624, + "step": 35157 + }, + { + "epoch": 0.7038110251983084, + "grad_norm": 0.9919817447662354, + "learning_rate": 2.129359645888968e-06, + "loss": 0.2813, + "step": 35158 + }, + { + "epoch": 0.7038310437154368, + "grad_norm": 1.026578664779663, + "learning_rate": 2.1290942220608713e-06, + "loss": 0.2617, + "step": 35159 + }, + { + "epoch": 0.7038510622325651, + "grad_norm": 1.085616946220398, + "learning_rate": 2.128828810301445e-06, + "loss": 0.3081, + "step": 35160 + }, + { + "epoch": 0.7038710807496935, + "grad_norm": 1.1778008937835693, + "learning_rate": 2.1285634106118025e-06, + "loss": 0.2912, + "step": 35161 + }, + { + "epoch": 0.7038910992668218, + "grad_norm": 1.2601253986358643, + "learning_rate": 2.128298022993063e-06, + "loss": 0.3135, + "step": 35162 + }, + { + "epoch": 0.7039111177839501, + "grad_norm": 1.3097888231277466, + "learning_rate": 2.1280326474463408e-06, + "loss": 0.2822, + "step": 35163 + }, + { + "epoch": 0.7039311363010785, + "grad_norm": 1.1282563209533691, + "learning_rate": 2.1277672839727494e-06, + "loss": 0.2923, + "step": 35164 + }, + { + "epoch": 0.7039511548182068, + "grad_norm": 1.060371994972229, + "learning_rate": 2.127501932573408e-06, + "loss": 0.2994, + "step": 35165 + }, + { + "epoch": 0.7039711733353352, + "grad_norm": 2.25563907623291, + "learning_rate": 2.127236593249429e-06, + "loss": 0.7046, + "step": 35166 + }, + { + "epoch": 0.7039911918524635, + "grad_norm": 1.0759023427963257, + "learning_rate": 2.1269712660019303e-06, + "loss": 0.2877, + "step": 35167 + }, + { + "epoch": 0.7040112103695919, + "grad_norm": 1.0485329627990723, + "learning_rate": 2.1267059508320266e-06, + "loss": 0.2737, + "step": 35168 + }, + { + "epoch": 0.7040312288867202, + "grad_norm": 1.0544871091842651, + "learning_rate": 2.1264406477408322e-06, + "loss": 0.3114, + "step": 35169 + }, + { + "epoch": 0.7040512474038486, + "grad_norm": 1.1183973550796509, + "learning_rate": 2.1261753567294614e-06, + "loss": 0.2917, + "step": 35170 + }, + { + "epoch": 0.7040712659209769, + "grad_norm": 1.8720877170562744, + "learning_rate": 2.1259100777990327e-06, + "loss": 0.7251, + "step": 35171 + }, + { + "epoch": 0.7040912844381052, + "grad_norm": 1.1188045740127563, + "learning_rate": 2.1256448109506595e-06, + "loss": 0.2627, + "step": 35172 + }, + { + "epoch": 0.7041113029552336, + "grad_norm": 1.0896159410476685, + "learning_rate": 2.1253795561854574e-06, + "loss": 0.3091, + "step": 35173 + }, + { + "epoch": 0.7041313214723619, + "grad_norm": 1.1178761720657349, + "learning_rate": 2.125114313504539e-06, + "loss": 0.3653, + "step": 35174 + }, + { + "epoch": 0.7041513399894903, + "grad_norm": 1.812813639640808, + "learning_rate": 2.1248490829090226e-06, + "loss": 0.8084, + "step": 35175 + }, + { + "epoch": 0.7041713585066186, + "grad_norm": 1.108669400215149, + "learning_rate": 2.124583864400022e-06, + "loss": 0.2983, + "step": 35176 + }, + { + "epoch": 0.704191377023747, + "grad_norm": 1.0656194686889648, + "learning_rate": 2.124318657978651e-06, + "loss": 0.2824, + "step": 35177 + }, + { + "epoch": 0.7042113955408753, + "grad_norm": 0.9822883605957031, + "learning_rate": 2.124053463646027e-06, + "loss": 0.2364, + "step": 35178 + }, + { + "epoch": 0.7042314140580036, + "grad_norm": 1.0793848037719727, + "learning_rate": 2.123788281403261e-06, + "loss": 0.2755, + "step": 35179 + }, + { + "epoch": 0.704251432575132, + "grad_norm": 1.1574010848999023, + "learning_rate": 2.1235231112514724e-06, + "loss": 0.3299, + "step": 35180 + }, + { + "epoch": 0.7042714510922603, + "grad_norm": 1.0384767055511475, + "learning_rate": 2.1232579531917737e-06, + "loss": 0.2728, + "step": 35181 + }, + { + "epoch": 0.7042914696093887, + "grad_norm": 1.1885391473770142, + "learning_rate": 2.122992807225279e-06, + "loss": 0.2926, + "step": 35182 + }, + { + "epoch": 0.704311488126517, + "grad_norm": 1.2197668552398682, + "learning_rate": 2.122727673353102e-06, + "loss": 0.3618, + "step": 35183 + }, + { + "epoch": 0.7043315066436454, + "grad_norm": 1.0807814598083496, + "learning_rate": 2.1224625515763604e-06, + "loss": 0.2982, + "step": 35184 + }, + { + "epoch": 0.7043515251607737, + "grad_norm": 1.132858157157898, + "learning_rate": 2.1221974418961665e-06, + "loss": 0.3018, + "step": 35185 + }, + { + "epoch": 0.7043715436779021, + "grad_norm": 0.9925404787063599, + "learning_rate": 2.121932344313636e-06, + "loss": 0.2702, + "step": 35186 + }, + { + "epoch": 0.7043915621950304, + "grad_norm": 1.2369568347930908, + "learning_rate": 2.121667258829882e-06, + "loss": 0.2557, + "step": 35187 + }, + { + "epoch": 0.7044115807121587, + "grad_norm": 1.1314960718154907, + "learning_rate": 2.1214021854460182e-06, + "loss": 0.2881, + "step": 35188 + }, + { + "epoch": 0.7044315992292871, + "grad_norm": 1.864713430404663, + "learning_rate": 2.1211371241631613e-06, + "loss": 0.7208, + "step": 35189 + }, + { + "epoch": 0.7044516177464154, + "grad_norm": 1.060116171836853, + "learning_rate": 2.120872074982423e-06, + "loss": 0.2711, + "step": 35190 + }, + { + "epoch": 0.7044716362635438, + "grad_norm": 1.041882872581482, + "learning_rate": 2.1206070379049205e-06, + "loss": 0.2808, + "step": 35191 + }, + { + "epoch": 0.7044916547806721, + "grad_norm": 0.9895033240318298, + "learning_rate": 2.120342012931765e-06, + "loss": 0.294, + "step": 35192 + }, + { + "epoch": 0.7045116732978005, + "grad_norm": 1.152408242225647, + "learning_rate": 2.120077000064073e-06, + "loss": 0.3288, + "step": 35193 + }, + { + "epoch": 0.7045316918149288, + "grad_norm": 1.2444407939910889, + "learning_rate": 2.1198119993029583e-06, + "loss": 0.3115, + "step": 35194 + }, + { + "epoch": 0.7045517103320571, + "grad_norm": 0.9634509682655334, + "learning_rate": 2.1195470106495335e-06, + "loss": 0.2876, + "step": 35195 + }, + { + "epoch": 0.7045717288491855, + "grad_norm": 1.8010587692260742, + "learning_rate": 2.1192820341049135e-06, + "loss": 0.7519, + "step": 35196 + }, + { + "epoch": 0.7045917473663138, + "grad_norm": 1.0268608331680298, + "learning_rate": 2.11901706967021e-06, + "loss": 0.2688, + "step": 35197 + }, + { + "epoch": 0.7046117658834422, + "grad_norm": 2.3493378162384033, + "learning_rate": 2.118752117346541e-06, + "loss": 0.7979, + "step": 35198 + }, + { + "epoch": 0.7046317844005705, + "grad_norm": 0.9231210947036743, + "learning_rate": 2.1184871771350176e-06, + "loss": 0.2258, + "step": 35199 + }, + { + "epoch": 0.7046518029176989, + "grad_norm": 1.991047739982605, + "learning_rate": 2.1182222490367536e-06, + "loss": 0.7322, + "step": 35200 + }, + { + "epoch": 0.7046718214348272, + "grad_norm": 1.139280080795288, + "learning_rate": 2.117957333052862e-06, + "loss": 0.333, + "step": 35201 + }, + { + "epoch": 0.7046918399519556, + "grad_norm": 1.1567550897598267, + "learning_rate": 2.117692429184459e-06, + "loss": 0.2668, + "step": 35202 + }, + { + "epoch": 0.7047118584690839, + "grad_norm": 1.2981150150299072, + "learning_rate": 2.1174275374326548e-06, + "loss": 0.3471, + "step": 35203 + }, + { + "epoch": 0.7047318769862122, + "grad_norm": 1.1196887493133545, + "learning_rate": 2.117162657798567e-06, + "loss": 0.3172, + "step": 35204 + }, + { + "epoch": 0.7047518955033406, + "grad_norm": 1.1206783056259155, + "learning_rate": 2.116897790283305e-06, + "loss": 0.3013, + "step": 35205 + }, + { + "epoch": 0.7047719140204689, + "grad_norm": 1.841892123222351, + "learning_rate": 2.116632934887986e-06, + "loss": 0.7732, + "step": 35206 + }, + { + "epoch": 0.7047919325375973, + "grad_norm": 1.0389424562454224, + "learning_rate": 2.116368091613722e-06, + "loss": 0.3147, + "step": 35207 + }, + { + "epoch": 0.7048119510547256, + "grad_norm": 1.0028722286224365, + "learning_rate": 2.116103260461626e-06, + "loss": 0.2876, + "step": 35208 + }, + { + "epoch": 0.704831969571854, + "grad_norm": 1.0516300201416016, + "learning_rate": 2.115838441432811e-06, + "loss": 0.3021, + "step": 35209 + }, + { + "epoch": 0.7048519880889823, + "grad_norm": 1.2874207496643066, + "learning_rate": 2.1155736345283885e-06, + "loss": 0.2673, + "step": 35210 + }, + { + "epoch": 0.7048720066061106, + "grad_norm": 1.1399527788162231, + "learning_rate": 2.115308839749476e-06, + "loss": 0.2739, + "step": 35211 + }, + { + "epoch": 0.704892025123239, + "grad_norm": 1.21932852268219, + "learning_rate": 2.115044057097184e-06, + "loss": 0.2696, + "step": 35212 + }, + { + "epoch": 0.7049120436403673, + "grad_norm": 1.601462483406067, + "learning_rate": 2.1147792865726257e-06, + "loss": 0.2887, + "step": 35213 + }, + { + "epoch": 0.7049320621574957, + "grad_norm": 1.1477351188659668, + "learning_rate": 2.114514528176913e-06, + "loss": 0.2751, + "step": 35214 + }, + { + "epoch": 0.704952080674624, + "grad_norm": 1.116357684135437, + "learning_rate": 2.1142497819111623e-06, + "loss": 0.3222, + "step": 35215 + }, + { + "epoch": 0.7049720991917524, + "grad_norm": 1.1416869163513184, + "learning_rate": 2.1139850477764824e-06, + "loss": 0.3141, + "step": 35216 + }, + { + "epoch": 0.7049921177088807, + "grad_norm": 1.0879307985305786, + "learning_rate": 2.1137203257739896e-06, + "loss": 0.2631, + "step": 35217 + }, + { + "epoch": 0.7050121362260091, + "grad_norm": 1.146708369255066, + "learning_rate": 2.1134556159047966e-06, + "loss": 0.3211, + "step": 35218 + }, + { + "epoch": 0.7050321547431374, + "grad_norm": 1.0304163694381714, + "learning_rate": 2.113190918170012e-06, + "loss": 0.3216, + "step": 35219 + }, + { + "epoch": 0.7050521732602657, + "grad_norm": 0.9906494617462158, + "learning_rate": 2.112926232570754e-06, + "loss": 0.2487, + "step": 35220 + }, + { + "epoch": 0.7050721917773941, + "grad_norm": 1.1390912532806396, + "learning_rate": 2.1126615591081323e-06, + "loss": 0.2912, + "step": 35221 + }, + { + "epoch": 0.7050922102945224, + "grad_norm": 1.0520445108413696, + "learning_rate": 2.1123968977832602e-06, + "loss": 0.3179, + "step": 35222 + }, + { + "epoch": 0.7051122288116508, + "grad_norm": 1.1277912855148315, + "learning_rate": 2.112132248597249e-06, + "loss": 0.2711, + "step": 35223 + }, + { + "epoch": 0.7051322473287791, + "grad_norm": 1.8981069326400757, + "learning_rate": 2.111867611551213e-06, + "loss": 0.7424, + "step": 35224 + }, + { + "epoch": 0.7051522658459075, + "grad_norm": 1.0574102401733398, + "learning_rate": 2.1116029866462652e-06, + "loss": 0.2806, + "step": 35225 + }, + { + "epoch": 0.7051722843630358, + "grad_norm": 1.1665593385696411, + "learning_rate": 2.1113383738835165e-06, + "loss": 0.3045, + "step": 35226 + }, + { + "epoch": 0.7051923028801641, + "grad_norm": 1.0747098922729492, + "learning_rate": 2.1110737732640774e-06, + "loss": 0.2958, + "step": 35227 + }, + { + "epoch": 0.7052123213972925, + "grad_norm": 1.09461510181427, + "learning_rate": 2.110809184789064e-06, + "loss": 0.3184, + "step": 35228 + }, + { + "epoch": 0.7052323399144208, + "grad_norm": 1.0629838705062866, + "learning_rate": 2.110544608459586e-06, + "loss": 0.3331, + "step": 35229 + }, + { + "epoch": 0.7052523584315492, + "grad_norm": 1.0777350664138794, + "learning_rate": 2.1102800442767587e-06, + "loss": 0.2979, + "step": 35230 + }, + { + "epoch": 0.7052723769486775, + "grad_norm": 1.115973711013794, + "learning_rate": 2.110015492241691e-06, + "loss": 0.3078, + "step": 35231 + }, + { + "epoch": 0.7052923954658059, + "grad_norm": 1.0625667572021484, + "learning_rate": 2.1097509523554956e-06, + "loss": 0.2914, + "step": 35232 + }, + { + "epoch": 0.7053124139829342, + "grad_norm": 1.11552095413208, + "learning_rate": 2.1094864246192863e-06, + "loss": 0.2899, + "step": 35233 + }, + { + "epoch": 0.7053324325000626, + "grad_norm": 1.1726539134979248, + "learning_rate": 2.109221909034174e-06, + "loss": 0.3595, + "step": 35234 + }, + { + "epoch": 0.7053524510171909, + "grad_norm": 2.078341245651245, + "learning_rate": 2.1089574056012714e-06, + "loss": 0.6469, + "step": 35235 + }, + { + "epoch": 0.7053724695343192, + "grad_norm": 1.0408329963684082, + "learning_rate": 2.108692914321687e-06, + "loss": 0.2706, + "step": 35236 + }, + { + "epoch": 0.7053924880514476, + "grad_norm": 1.0580978393554688, + "learning_rate": 2.108428435196538e-06, + "loss": 0.2979, + "step": 35237 + }, + { + "epoch": 0.7054125065685759, + "grad_norm": 1.8177707195281982, + "learning_rate": 2.1081639682269334e-06, + "loss": 0.726, + "step": 35238 + }, + { + "epoch": 0.7054325250857043, + "grad_norm": 1.0709882974624634, + "learning_rate": 2.1078995134139844e-06, + "loss": 0.303, + "step": 35239 + }, + { + "epoch": 0.7054525436028326, + "grad_norm": 1.206889033317566, + "learning_rate": 2.107635070758802e-06, + "loss": 0.3448, + "step": 35240 + }, + { + "epoch": 0.705472562119961, + "grad_norm": 1.0268224477767944, + "learning_rate": 2.1073706402625016e-06, + "loss": 0.2955, + "step": 35241 + }, + { + "epoch": 0.7054925806370893, + "grad_norm": 1.1940277814865112, + "learning_rate": 2.1071062219261904e-06, + "loss": 0.3083, + "step": 35242 + }, + { + "epoch": 0.7055125991542176, + "grad_norm": 1.0902047157287598, + "learning_rate": 2.106841815750984e-06, + "loss": 0.3091, + "step": 35243 + }, + { + "epoch": 0.705532617671346, + "grad_norm": 1.123788833618164, + "learning_rate": 2.106577421737992e-06, + "loss": 0.2988, + "step": 35244 + }, + { + "epoch": 0.7055526361884743, + "grad_norm": 1.1935499906539917, + "learning_rate": 2.106313039888324e-06, + "loss": 0.3475, + "step": 35245 + }, + { + "epoch": 0.7055726547056027, + "grad_norm": 1.8734562397003174, + "learning_rate": 2.106048670203095e-06, + "loss": 0.8593, + "step": 35246 + }, + { + "epoch": 0.705592673222731, + "grad_norm": 1.0218287706375122, + "learning_rate": 2.105784312683415e-06, + "loss": 0.315, + "step": 35247 + }, + { + "epoch": 0.7056126917398594, + "grad_norm": 1.34586763381958, + "learning_rate": 2.1055199673303946e-06, + "loss": 0.3092, + "step": 35248 + }, + { + "epoch": 0.7056327102569877, + "grad_norm": 1.5488466024398804, + "learning_rate": 2.1052556341451452e-06, + "loss": 0.2423, + "step": 35249 + }, + { + "epoch": 0.7056527287741161, + "grad_norm": 1.0945773124694824, + "learning_rate": 2.1049913131287764e-06, + "loss": 0.3422, + "step": 35250 + }, + { + "epoch": 0.7056727472912444, + "grad_norm": 1.1253582239151, + "learning_rate": 2.104727004282403e-06, + "loss": 0.2928, + "step": 35251 + }, + { + "epoch": 0.7056927658083727, + "grad_norm": 1.0027891397476196, + "learning_rate": 2.1044627076071344e-06, + "loss": 0.3052, + "step": 35252 + }, + { + "epoch": 0.7057127843255011, + "grad_norm": 1.0416980981826782, + "learning_rate": 2.104198423104079e-06, + "loss": 0.3108, + "step": 35253 + }, + { + "epoch": 0.7057328028426294, + "grad_norm": 1.0611438751220703, + "learning_rate": 2.1039341507743527e-06, + "loss": 0.2771, + "step": 35254 + }, + { + "epoch": 0.7057528213597578, + "grad_norm": 1.0605241060256958, + "learning_rate": 2.1036698906190618e-06, + "loss": 0.3343, + "step": 35255 + }, + { + "epoch": 0.7057728398768861, + "grad_norm": 1.0787670612335205, + "learning_rate": 2.1034056426393214e-06, + "loss": 0.2145, + "step": 35256 + }, + { + "epoch": 0.7057928583940145, + "grad_norm": 1.94963538646698, + "learning_rate": 2.1031414068362404e-06, + "loss": 0.8079, + "step": 35257 + }, + { + "epoch": 0.7058128769111428, + "grad_norm": 1.1532139778137207, + "learning_rate": 2.1028771832109274e-06, + "loss": 0.3088, + "step": 35258 + }, + { + "epoch": 0.7058328954282711, + "grad_norm": 1.3469206094741821, + "learning_rate": 2.1026129717644974e-06, + "loss": 0.2987, + "step": 35259 + }, + { + "epoch": 0.7058529139453995, + "grad_norm": 1.0830354690551758, + "learning_rate": 2.102348772498059e-06, + "loss": 0.283, + "step": 35260 + }, + { + "epoch": 0.7058729324625278, + "grad_norm": 1.0439001321792603, + "learning_rate": 2.1020845854127224e-06, + "loss": 0.2693, + "step": 35261 + }, + { + "epoch": 0.7058929509796562, + "grad_norm": 1.1497530937194824, + "learning_rate": 2.1018204105095984e-06, + "loss": 0.2819, + "step": 35262 + }, + { + "epoch": 0.7059129694967845, + "grad_norm": 1.2749319076538086, + "learning_rate": 2.1015562477897965e-06, + "loss": 0.3206, + "step": 35263 + }, + { + "epoch": 0.7059329880139129, + "grad_norm": 1.1418462991714478, + "learning_rate": 2.1012920972544298e-06, + "loss": 0.3011, + "step": 35264 + }, + { + "epoch": 0.7059530065310412, + "grad_norm": 1.1838641166687012, + "learning_rate": 2.1010279589046078e-06, + "loss": 0.266, + "step": 35265 + }, + { + "epoch": 0.7059730250481696, + "grad_norm": 1.0241198539733887, + "learning_rate": 2.100763832741438e-06, + "loss": 0.2596, + "step": 35266 + }, + { + "epoch": 0.7059930435652979, + "grad_norm": 1.904969573020935, + "learning_rate": 2.1004997187660355e-06, + "loss": 0.7745, + "step": 35267 + }, + { + "epoch": 0.7060130620824262, + "grad_norm": 1.1378912925720215, + "learning_rate": 2.1002356169795058e-06, + "loss": 0.3077, + "step": 35268 + }, + { + "epoch": 0.7060330805995546, + "grad_norm": 1.9675495624542236, + "learning_rate": 2.0999715273829635e-06, + "loss": 0.7877, + "step": 35269 + }, + { + "epoch": 0.7060530991166829, + "grad_norm": 1.1570463180541992, + "learning_rate": 2.0997074499775165e-06, + "loss": 0.3235, + "step": 35270 + }, + { + "epoch": 0.7060731176338113, + "grad_norm": 1.0103384256362915, + "learning_rate": 2.0994433847642757e-06, + "loss": 0.3123, + "step": 35271 + }, + { + "epoch": 0.7060931361509396, + "grad_norm": 1.2040950059890747, + "learning_rate": 2.099179331744349e-06, + "loss": 0.3152, + "step": 35272 + }, + { + "epoch": 0.706113154668068, + "grad_norm": 1.0742806196212769, + "learning_rate": 2.09891529091885e-06, + "loss": 0.2789, + "step": 35273 + }, + { + "epoch": 0.7061331731851963, + "grad_norm": 1.2318782806396484, + "learning_rate": 2.0986512622888865e-06, + "loss": 0.2525, + "step": 35274 + }, + { + "epoch": 0.7061531917023246, + "grad_norm": 1.9771465063095093, + "learning_rate": 2.098387245855569e-06, + "loss": 0.8026, + "step": 35275 + }, + { + "epoch": 0.706173210219453, + "grad_norm": 1.2072232961654663, + "learning_rate": 2.0981232416200046e-06, + "loss": 0.2891, + "step": 35276 + }, + { + "epoch": 0.7061932287365813, + "grad_norm": 1.0751104354858398, + "learning_rate": 2.0978592495833077e-06, + "loss": 0.2972, + "step": 35277 + }, + { + "epoch": 0.7062132472537097, + "grad_norm": 1.1712654829025269, + "learning_rate": 2.0975952697465864e-06, + "loss": 0.3368, + "step": 35278 + }, + { + "epoch": 0.706233265770838, + "grad_norm": 1.2534329891204834, + "learning_rate": 2.0973313021109474e-06, + "loss": 0.3199, + "step": 35279 + }, + { + "epoch": 0.7062532842879664, + "grad_norm": 1.3093842267990112, + "learning_rate": 2.0970673466775054e-06, + "loss": 0.3093, + "step": 35280 + }, + { + "epoch": 0.7062733028050947, + "grad_norm": 1.0230369567871094, + "learning_rate": 2.096803403447366e-06, + "loss": 0.2907, + "step": 35281 + }, + { + "epoch": 0.7062933213222231, + "grad_norm": 1.1020251512527466, + "learning_rate": 2.0965394724216413e-06, + "loss": 0.288, + "step": 35282 + }, + { + "epoch": 0.7063133398393514, + "grad_norm": 1.415918231010437, + "learning_rate": 2.0962755536014407e-06, + "loss": 0.2826, + "step": 35283 + }, + { + "epoch": 0.7063333583564797, + "grad_norm": 1.8561192750930786, + "learning_rate": 2.096011646987872e-06, + "loss": 0.7495, + "step": 35284 + }, + { + "epoch": 0.7063533768736081, + "grad_norm": 1.101792335510254, + "learning_rate": 2.0957477525820443e-06, + "loss": 0.286, + "step": 35285 + }, + { + "epoch": 0.7063733953907364, + "grad_norm": 1.0504486560821533, + "learning_rate": 2.0954838703850695e-06, + "loss": 0.2794, + "step": 35286 + }, + { + "epoch": 0.7063934139078648, + "grad_norm": 1.0390900373458862, + "learning_rate": 2.095220000398055e-06, + "loss": 0.2973, + "step": 35287 + }, + { + "epoch": 0.7064134324249931, + "grad_norm": 1.0800988674163818, + "learning_rate": 2.094956142622111e-06, + "loss": 0.3082, + "step": 35288 + }, + { + "epoch": 0.7064334509421215, + "grad_norm": 1.2395604848861694, + "learning_rate": 2.0946922970583444e-06, + "loss": 0.3535, + "step": 35289 + }, + { + "epoch": 0.7064534694592498, + "grad_norm": 1.0661814212799072, + "learning_rate": 2.094428463707868e-06, + "loss": 0.2694, + "step": 35290 + }, + { + "epoch": 0.7064734879763781, + "grad_norm": 1.1481972932815552, + "learning_rate": 2.094164642571789e-06, + "loss": 0.3298, + "step": 35291 + }, + { + "epoch": 0.7064935064935065, + "grad_norm": 1.8979637622833252, + "learning_rate": 2.0939008336512145e-06, + "loss": 0.736, + "step": 35292 + }, + { + "epoch": 0.7065135250106348, + "grad_norm": 1.1012465953826904, + "learning_rate": 2.0936370369472575e-06, + "loss": 0.277, + "step": 35293 + }, + { + "epoch": 0.7065335435277632, + "grad_norm": 1.1396629810333252, + "learning_rate": 2.0933732524610228e-06, + "loss": 0.259, + "step": 35294 + }, + { + "epoch": 0.7065535620448915, + "grad_norm": 1.1603835821151733, + "learning_rate": 2.0931094801936237e-06, + "loss": 0.3414, + "step": 35295 + }, + { + "epoch": 0.7065735805620199, + "grad_norm": 1.0888983011245728, + "learning_rate": 2.0928457201461665e-06, + "loss": 0.2985, + "step": 35296 + }, + { + "epoch": 0.7065935990791482, + "grad_norm": 1.1901036500930786, + "learning_rate": 2.0925819723197604e-06, + "loss": 0.3232, + "step": 35297 + }, + { + "epoch": 0.7066136175962766, + "grad_norm": 1.1665430068969727, + "learning_rate": 2.092318236715512e-06, + "loss": 0.3044, + "step": 35298 + }, + { + "epoch": 0.7066336361134049, + "grad_norm": 1.2351146936416626, + "learning_rate": 2.092054513334534e-06, + "loss": 0.3418, + "step": 35299 + }, + { + "epoch": 0.7066536546305332, + "grad_norm": 1.1240822076797485, + "learning_rate": 2.0917908021779327e-06, + "loss": 0.279, + "step": 35300 + }, + { + "epoch": 0.7066736731476616, + "grad_norm": 1.1695060729980469, + "learning_rate": 2.0915271032468177e-06, + "loss": 0.316, + "step": 35301 + }, + { + "epoch": 0.7066936916647899, + "grad_norm": 1.1440562009811401, + "learning_rate": 2.0912634165422964e-06, + "loss": 0.3259, + "step": 35302 + }, + { + "epoch": 0.7067137101819183, + "grad_norm": 1.07475745677948, + "learning_rate": 2.090999742065476e-06, + "loss": 0.2992, + "step": 35303 + }, + { + "epoch": 0.7067337286990466, + "grad_norm": 1.9623485803604126, + "learning_rate": 2.090736079817468e-06, + "loss": 0.7031, + "step": 35304 + }, + { + "epoch": 0.706753747216175, + "grad_norm": 1.0693784952163696, + "learning_rate": 2.0904724297993784e-06, + "loss": 0.3122, + "step": 35305 + }, + { + "epoch": 0.7067737657333033, + "grad_norm": 1.154883861541748, + "learning_rate": 2.0902087920123176e-06, + "loss": 0.3434, + "step": 35306 + }, + { + "epoch": 0.7067937842504316, + "grad_norm": 1.1228349208831787, + "learning_rate": 2.0899451664573917e-06, + "loss": 0.3167, + "step": 35307 + }, + { + "epoch": 0.70681380276756, + "grad_norm": 1.1847457885742188, + "learning_rate": 2.0896815531357116e-06, + "loss": 0.2477, + "step": 35308 + }, + { + "epoch": 0.7068338212846883, + "grad_norm": 1.0341674089431763, + "learning_rate": 2.0894179520483844e-06, + "loss": 0.2801, + "step": 35309 + }, + { + "epoch": 0.7068538398018167, + "grad_norm": 0.9829238057136536, + "learning_rate": 2.089154363196517e-06, + "loss": 0.2992, + "step": 35310 + }, + { + "epoch": 0.706873858318945, + "grad_norm": 1.049275279045105, + "learning_rate": 2.0888907865812186e-06, + "loss": 0.3169, + "step": 35311 + }, + { + "epoch": 0.7068938768360734, + "grad_norm": 1.0436009168624878, + "learning_rate": 2.0886272222035946e-06, + "loss": 0.3453, + "step": 35312 + }, + { + "epoch": 0.7069138953532017, + "grad_norm": 1.2337687015533447, + "learning_rate": 2.088363670064758e-06, + "loss": 0.3255, + "step": 35313 + }, + { + "epoch": 0.7069339138703301, + "grad_norm": 1.1184659004211426, + "learning_rate": 2.0881001301658133e-06, + "loss": 0.3351, + "step": 35314 + }, + { + "epoch": 0.7069539323874584, + "grad_norm": 1.1018328666687012, + "learning_rate": 2.087836602507869e-06, + "loss": 0.2873, + "step": 35315 + }, + { + "epoch": 0.7069739509045867, + "grad_norm": 1.050130844116211, + "learning_rate": 2.0875730870920314e-06, + "loss": 0.2696, + "step": 35316 + }, + { + "epoch": 0.7069939694217151, + "grad_norm": 1.1612991094589233, + "learning_rate": 2.087309583919411e-06, + "loss": 0.3025, + "step": 35317 + }, + { + "epoch": 0.7070139879388434, + "grad_norm": 1.130038857460022, + "learning_rate": 2.087046092991113e-06, + "loss": 0.3137, + "step": 35318 + }, + { + "epoch": 0.7070340064559718, + "grad_norm": 1.0712196826934814, + "learning_rate": 2.0867826143082483e-06, + "loss": 0.3187, + "step": 35319 + }, + { + "epoch": 0.7070540249731001, + "grad_norm": 1.9280500411987305, + "learning_rate": 2.0865191478719204e-06, + "loss": 0.8616, + "step": 35320 + }, + { + "epoch": 0.7070740434902285, + "grad_norm": 1.1751829385757446, + "learning_rate": 2.086255693683241e-06, + "loss": 0.2809, + "step": 35321 + }, + { + "epoch": 0.7070940620073568, + "grad_norm": 1.8144747018814087, + "learning_rate": 2.085992251743316e-06, + "loss": 0.7842, + "step": 35322 + }, + { + "epoch": 0.7071140805244851, + "grad_norm": 1.196682333946228, + "learning_rate": 2.085728822053251e-06, + "loss": 0.265, + "step": 35323 + }, + { + "epoch": 0.7071340990416135, + "grad_norm": 1.1004794836044312, + "learning_rate": 2.0854654046141565e-06, + "loss": 0.3024, + "step": 35324 + }, + { + "epoch": 0.7071541175587418, + "grad_norm": 0.9926008582115173, + "learning_rate": 2.0852019994271355e-06, + "loss": 0.2807, + "step": 35325 + }, + { + "epoch": 0.7071741360758702, + "grad_norm": 1.1104614734649658, + "learning_rate": 2.0849386064933003e-06, + "loss": 0.2746, + "step": 35326 + }, + { + "epoch": 0.7071941545929985, + "grad_norm": 1.0703017711639404, + "learning_rate": 2.084675225813756e-06, + "loss": 0.2491, + "step": 35327 + }, + { + "epoch": 0.7072141731101269, + "grad_norm": 1.1695268154144287, + "learning_rate": 2.084411857389609e-06, + "loss": 0.3095, + "step": 35328 + }, + { + "epoch": 0.7072341916272552, + "grad_norm": 1.2760143280029297, + "learning_rate": 2.084148501221966e-06, + "loss": 0.2862, + "step": 35329 + }, + { + "epoch": 0.7072542101443835, + "grad_norm": 1.1596909761428833, + "learning_rate": 2.0838851573119363e-06, + "loss": 0.3083, + "step": 35330 + }, + { + "epoch": 0.7072742286615119, + "grad_norm": 1.1975128650665283, + "learning_rate": 2.0836218256606244e-06, + "loss": 0.3165, + "step": 35331 + }, + { + "epoch": 0.7072942471786402, + "grad_norm": 1.0920666456222534, + "learning_rate": 2.0833585062691408e-06, + "loss": 0.2264, + "step": 35332 + }, + { + "epoch": 0.7073142656957686, + "grad_norm": 1.199596881866455, + "learning_rate": 2.08309519913859e-06, + "loss": 0.3237, + "step": 35333 + }, + { + "epoch": 0.7073342842128969, + "grad_norm": 1.0874419212341309, + "learning_rate": 2.082831904270077e-06, + "loss": 0.2895, + "step": 35334 + }, + { + "epoch": 0.7073543027300253, + "grad_norm": 1.2820467948913574, + "learning_rate": 2.082568621664714e-06, + "loss": 0.3251, + "step": 35335 + }, + { + "epoch": 0.7073743212471536, + "grad_norm": 1.0940064191818237, + "learning_rate": 2.0823053513236034e-06, + "loss": 0.2736, + "step": 35336 + }, + { + "epoch": 0.707394339764282, + "grad_norm": 1.0680798292160034, + "learning_rate": 2.082042093247854e-06, + "loss": 0.3005, + "step": 35337 + }, + { + "epoch": 0.7074143582814103, + "grad_norm": 1.14065420627594, + "learning_rate": 2.0817788474385697e-06, + "loss": 0.3094, + "step": 35338 + }, + { + "epoch": 0.7074343767985386, + "grad_norm": 1.2528139352798462, + "learning_rate": 2.081515613896861e-06, + "loss": 0.2845, + "step": 35339 + }, + { + "epoch": 0.707454395315667, + "grad_norm": 1.0524930953979492, + "learning_rate": 2.0812523926238325e-06, + "loss": 0.2716, + "step": 35340 + }, + { + "epoch": 0.7074744138327953, + "grad_norm": 1.1126301288604736, + "learning_rate": 2.080989183620591e-06, + "loss": 0.2924, + "step": 35341 + }, + { + "epoch": 0.7074944323499237, + "grad_norm": 1.2242573499679565, + "learning_rate": 2.0807259868882422e-06, + "loss": 0.3135, + "step": 35342 + }, + { + "epoch": 0.707514450867052, + "grad_norm": 0.9953442811965942, + "learning_rate": 2.0804628024278916e-06, + "loss": 0.2594, + "step": 35343 + }, + { + "epoch": 0.7075344693841804, + "grad_norm": 1.1506177186965942, + "learning_rate": 2.0801996302406473e-06, + "loss": 0.2968, + "step": 35344 + }, + { + "epoch": 0.7075544879013087, + "grad_norm": 1.1101956367492676, + "learning_rate": 2.0799364703276172e-06, + "loss": 0.2835, + "step": 35345 + }, + { + "epoch": 0.707574506418437, + "grad_norm": 1.1458138227462769, + "learning_rate": 2.0796733226899055e-06, + "loss": 0.3312, + "step": 35346 + }, + { + "epoch": 0.7075945249355654, + "grad_norm": 1.0718342065811157, + "learning_rate": 2.079410187328617e-06, + "loss": 0.3105, + "step": 35347 + }, + { + "epoch": 0.7076145434526937, + "grad_norm": 1.9744523763656616, + "learning_rate": 2.0791470642448613e-06, + "loss": 0.7933, + "step": 35348 + }, + { + "epoch": 0.7076345619698221, + "grad_norm": 1.0881171226501465, + "learning_rate": 2.0788839534397433e-06, + "loss": 0.2979, + "step": 35349 + }, + { + "epoch": 0.7076545804869504, + "grad_norm": 1.1529428958892822, + "learning_rate": 2.0786208549143678e-06, + "loss": 0.321, + "step": 35350 + }, + { + "epoch": 0.7076745990040788, + "grad_norm": 1.1234554052352905, + "learning_rate": 2.0783577686698404e-06, + "loss": 0.261, + "step": 35351 + }, + { + "epoch": 0.7076946175212071, + "grad_norm": 1.1638298034667969, + "learning_rate": 2.0780946947072696e-06, + "loss": 0.3012, + "step": 35352 + }, + { + "epoch": 0.7077146360383355, + "grad_norm": 1.2072197198867798, + "learning_rate": 2.07783163302776e-06, + "loss": 0.3404, + "step": 35353 + }, + { + "epoch": 0.7077346545554638, + "grad_norm": 1.233834147453308, + "learning_rate": 2.077568583632417e-06, + "loss": 0.3084, + "step": 35354 + }, + { + "epoch": 0.7077546730725921, + "grad_norm": 1.1099951267242432, + "learning_rate": 2.077305546522347e-06, + "loss": 0.3121, + "step": 35355 + }, + { + "epoch": 0.7077746915897205, + "grad_norm": 1.3187037706375122, + "learning_rate": 2.077042521698654e-06, + "loss": 0.3235, + "step": 35356 + }, + { + "epoch": 0.7077947101068488, + "grad_norm": 1.048478603363037, + "learning_rate": 2.0767795091624447e-06, + "loss": 0.2999, + "step": 35357 + }, + { + "epoch": 0.7078147286239772, + "grad_norm": 1.587915301322937, + "learning_rate": 2.0765165089148275e-06, + "loss": 0.277, + "step": 35358 + }, + { + "epoch": 0.7078347471411055, + "grad_norm": 1.144735336303711, + "learning_rate": 2.0762535209569057e-06, + "loss": 0.2998, + "step": 35359 + }, + { + "epoch": 0.7078547656582339, + "grad_norm": 1.3167577981948853, + "learning_rate": 2.0759905452897827e-06, + "loss": 0.3077, + "step": 35360 + }, + { + "epoch": 0.7078747841753622, + "grad_norm": 1.1538066864013672, + "learning_rate": 2.0757275819145685e-06, + "loss": 0.3309, + "step": 35361 + }, + { + "epoch": 0.7078948026924905, + "grad_norm": 1.15496027469635, + "learning_rate": 2.0754646308323662e-06, + "loss": 0.3083, + "step": 35362 + }, + { + "epoch": 0.7079148212096189, + "grad_norm": 1.071019172668457, + "learning_rate": 2.0752016920442806e-06, + "loss": 0.2937, + "step": 35363 + }, + { + "epoch": 0.7079348397267472, + "grad_norm": 1.259757161140442, + "learning_rate": 2.0749387655514186e-06, + "loss": 0.305, + "step": 35364 + }, + { + "epoch": 0.7079548582438756, + "grad_norm": 1.1901204586029053, + "learning_rate": 2.0746758513548822e-06, + "loss": 0.3018, + "step": 35365 + }, + { + "epoch": 0.7079748767610039, + "grad_norm": 1.1363593339920044, + "learning_rate": 2.0744129494557807e-06, + "loss": 0.2815, + "step": 35366 + }, + { + "epoch": 0.7079948952781323, + "grad_norm": 1.9683443307876587, + "learning_rate": 2.0741500598552176e-06, + "loss": 0.7944, + "step": 35367 + }, + { + "epoch": 0.7080149137952606, + "grad_norm": 1.1096380949020386, + "learning_rate": 2.0738871825542984e-06, + "loss": 0.2657, + "step": 35368 + }, + { + "epoch": 0.708034932312389, + "grad_norm": 1.080119013786316, + "learning_rate": 2.0736243175541255e-06, + "loss": 0.3046, + "step": 35369 + }, + { + "epoch": 0.7080549508295173, + "grad_norm": 1.1240888833999634, + "learning_rate": 2.0733614648558064e-06, + "loss": 0.2934, + "step": 35370 + }, + { + "epoch": 0.7080749693466456, + "grad_norm": 1.0732747316360474, + "learning_rate": 2.0730986244604477e-06, + "loss": 0.2627, + "step": 35371 + }, + { + "epoch": 0.708094987863774, + "grad_norm": 1.0547237396240234, + "learning_rate": 2.0728357963691528e-06, + "loss": 0.3327, + "step": 35372 + }, + { + "epoch": 0.7081150063809023, + "grad_norm": 1.2654048204421997, + "learning_rate": 2.072572980583026e-06, + "loss": 0.3407, + "step": 35373 + }, + { + "epoch": 0.7081350248980307, + "grad_norm": 1.1474864482879639, + "learning_rate": 2.07231017710317e-06, + "loss": 0.3436, + "step": 35374 + }, + { + "epoch": 0.708155043415159, + "grad_norm": 1.051975965499878, + "learning_rate": 2.0720473859306945e-06, + "loss": 0.3439, + "step": 35375 + }, + { + "epoch": 0.7081750619322874, + "grad_norm": 1.0937985181808472, + "learning_rate": 2.0717846070667013e-06, + "loss": 0.3334, + "step": 35376 + }, + { + "epoch": 0.7081950804494157, + "grad_norm": 1.3695303201675415, + "learning_rate": 2.071521840512296e-06, + "loss": 0.3561, + "step": 35377 + }, + { + "epoch": 0.708215098966544, + "grad_norm": 1.1686928272247314, + "learning_rate": 2.0712590862685806e-06, + "loss": 0.3006, + "step": 35378 + }, + { + "epoch": 0.7082351174836724, + "grad_norm": 1.0460129976272583, + "learning_rate": 2.070996344336663e-06, + "loss": 0.3042, + "step": 35379 + }, + { + "epoch": 0.7082551360008007, + "grad_norm": 1.1348010301589966, + "learning_rate": 2.070733614717647e-06, + "loss": 0.2878, + "step": 35380 + }, + { + "epoch": 0.7082751545179291, + "grad_norm": 1.0683749914169312, + "learning_rate": 2.0704708974126364e-06, + "loss": 0.3345, + "step": 35381 + }, + { + "epoch": 0.7082951730350574, + "grad_norm": 1.0724056959152222, + "learning_rate": 2.070208192422734e-06, + "loss": 0.272, + "step": 35382 + }, + { + "epoch": 0.7083151915521858, + "grad_norm": 1.071937084197998, + "learning_rate": 2.069945499749046e-06, + "loss": 0.2607, + "step": 35383 + }, + { + "epoch": 0.7083352100693141, + "grad_norm": 1.0666440725326538, + "learning_rate": 2.0696828193926784e-06, + "loss": 0.2777, + "step": 35384 + }, + { + "epoch": 0.7083552285864425, + "grad_norm": 1.9721956253051758, + "learning_rate": 2.069420151354733e-06, + "loss": 0.7759, + "step": 35385 + }, + { + "epoch": 0.7083752471035708, + "grad_norm": 1.2364637851715088, + "learning_rate": 2.0691574956363145e-06, + "loss": 0.3064, + "step": 35386 + }, + { + "epoch": 0.7083952656206991, + "grad_norm": 1.8703018426895142, + "learning_rate": 2.068894852238526e-06, + "loss": 0.802, + "step": 35387 + }, + { + "epoch": 0.7084152841378275, + "grad_norm": 1.1555665731430054, + "learning_rate": 2.0686322211624748e-06, + "loss": 0.2839, + "step": 35388 + }, + { + "epoch": 0.7084353026549558, + "grad_norm": 1.0214184522628784, + "learning_rate": 2.0683696024092626e-06, + "loss": 0.2588, + "step": 35389 + }, + { + "epoch": 0.7084553211720842, + "grad_norm": 1.089686632156372, + "learning_rate": 2.068106995979993e-06, + "loss": 0.3328, + "step": 35390 + }, + { + "epoch": 0.7084753396892125, + "grad_norm": 1.046926736831665, + "learning_rate": 2.0678444018757698e-06, + "loss": 0.2239, + "step": 35391 + }, + { + "epoch": 0.7084953582063409, + "grad_norm": 1.1362247467041016, + "learning_rate": 2.0675818200976993e-06, + "loss": 0.2637, + "step": 35392 + }, + { + "epoch": 0.7085153767234692, + "grad_norm": 1.0931838750839233, + "learning_rate": 2.0673192506468836e-06, + "loss": 0.28, + "step": 35393 + }, + { + "epoch": 0.7085353952405975, + "grad_norm": 1.1262105703353882, + "learning_rate": 2.067056693524427e-06, + "loss": 0.3492, + "step": 35394 + }, + { + "epoch": 0.7085554137577259, + "grad_norm": 1.1114234924316406, + "learning_rate": 2.06679414873143e-06, + "loss": 0.2979, + "step": 35395 + }, + { + "epoch": 0.7085754322748542, + "grad_norm": 1.2130366563796997, + "learning_rate": 2.0665316162690002e-06, + "loss": 0.2963, + "step": 35396 + }, + { + "epoch": 0.7085954507919826, + "grad_norm": 1.118482232093811, + "learning_rate": 2.066269096138242e-06, + "loss": 0.2958, + "step": 35397 + }, + { + "epoch": 0.7086154693091109, + "grad_norm": 1.153592586517334, + "learning_rate": 2.066006588340257e-06, + "loss": 0.3212, + "step": 35398 + }, + { + "epoch": 0.7086354878262393, + "grad_norm": 1.1272895336151123, + "learning_rate": 2.0657440928761487e-06, + "loss": 0.2905, + "step": 35399 + }, + { + "epoch": 0.7086555063433676, + "grad_norm": 1.185746669769287, + "learning_rate": 2.0654816097470187e-06, + "loss": 0.3345, + "step": 35400 + }, + { + "epoch": 0.708675524860496, + "grad_norm": 1.249202013015747, + "learning_rate": 2.065219138953975e-06, + "loss": 0.2999, + "step": 35401 + }, + { + "epoch": 0.7086955433776243, + "grad_norm": 1.1831740140914917, + "learning_rate": 2.064956680498118e-06, + "loss": 0.3284, + "step": 35402 + }, + { + "epoch": 0.7087155618947526, + "grad_norm": 1.0326908826828003, + "learning_rate": 2.0646942343805515e-06, + "loss": 0.2765, + "step": 35403 + }, + { + "epoch": 0.708735580411881, + "grad_norm": 1.1789358854293823, + "learning_rate": 2.064431800602377e-06, + "loss": 0.2926, + "step": 35404 + }, + { + "epoch": 0.7087555989290093, + "grad_norm": 1.2198487520217896, + "learning_rate": 2.064169379164702e-06, + "loss": 0.2848, + "step": 35405 + }, + { + "epoch": 0.7087756174461377, + "grad_norm": 1.1456503868103027, + "learning_rate": 2.063906970068626e-06, + "loss": 0.2674, + "step": 35406 + }, + { + "epoch": 0.708795635963266, + "grad_norm": 1.0526803731918335, + "learning_rate": 2.0636445733152536e-06, + "loss": 0.3039, + "step": 35407 + }, + { + "epoch": 0.7088156544803944, + "grad_norm": 1.0972826480865479, + "learning_rate": 2.063382188905686e-06, + "loss": 0.3162, + "step": 35408 + }, + { + "epoch": 0.7088356729975227, + "grad_norm": 1.0444560050964355, + "learning_rate": 2.0631198168410276e-06, + "loss": 0.2824, + "step": 35409 + }, + { + "epoch": 0.708855691514651, + "grad_norm": 1.1527361869812012, + "learning_rate": 2.062857457122383e-06, + "loss": 0.334, + "step": 35410 + }, + { + "epoch": 0.7088757100317794, + "grad_norm": 1.7550512552261353, + "learning_rate": 2.062595109750854e-06, + "loss": 0.748, + "step": 35411 + }, + { + "epoch": 0.7088957285489077, + "grad_norm": 0.993001401424408, + "learning_rate": 2.062332774727543e-06, + "loss": 0.3075, + "step": 35412 + }, + { + "epoch": 0.7089157470660361, + "grad_norm": 1.2060410976409912, + "learning_rate": 2.0620704520535508e-06, + "loss": 0.2696, + "step": 35413 + }, + { + "epoch": 0.7089357655831644, + "grad_norm": 1.1182814836502075, + "learning_rate": 2.0618081417299833e-06, + "loss": 0.3041, + "step": 35414 + }, + { + "epoch": 0.7089557841002928, + "grad_norm": 1.0990002155303955, + "learning_rate": 2.0615458437579433e-06, + "loss": 0.3002, + "step": 35415 + }, + { + "epoch": 0.7089758026174211, + "grad_norm": 1.2479547262191772, + "learning_rate": 2.0612835581385317e-06, + "loss": 0.3588, + "step": 35416 + }, + { + "epoch": 0.7089958211345495, + "grad_norm": 1.0894927978515625, + "learning_rate": 2.0610212848728513e-06, + "loss": 0.2751, + "step": 35417 + }, + { + "epoch": 0.7090158396516778, + "grad_norm": 1.1650776863098145, + "learning_rate": 2.0607590239620036e-06, + "loss": 0.2756, + "step": 35418 + }, + { + "epoch": 0.7090358581688061, + "grad_norm": 1.0316826105117798, + "learning_rate": 2.0604967754070936e-06, + "loss": 0.2255, + "step": 35419 + }, + { + "epoch": 0.7090558766859345, + "grad_norm": 2.032914638519287, + "learning_rate": 2.0602345392092233e-06, + "loss": 0.8473, + "step": 35420 + }, + { + "epoch": 0.7090758952030628, + "grad_norm": 1.9079368114471436, + "learning_rate": 2.059972315369492e-06, + "loss": 0.7482, + "step": 35421 + }, + { + "epoch": 0.7090959137201912, + "grad_norm": 1.1247634887695312, + "learning_rate": 2.059710103889005e-06, + "loss": 0.2632, + "step": 35422 + }, + { + "epoch": 0.7091159322373195, + "grad_norm": 1.443930745124817, + "learning_rate": 2.059447904768865e-06, + "loss": 0.2856, + "step": 35423 + }, + { + "epoch": 0.7091359507544479, + "grad_norm": 1.128981590270996, + "learning_rate": 2.059185718010174e-06, + "loss": 0.3451, + "step": 35424 + }, + { + "epoch": 0.7091559692715762, + "grad_norm": 1.023322343826294, + "learning_rate": 2.058923543614033e-06, + "loss": 0.2911, + "step": 35425 + }, + { + "epoch": 0.7091759877887045, + "grad_norm": 1.1093848943710327, + "learning_rate": 2.0586613815815443e-06, + "loss": 0.295, + "step": 35426 + }, + { + "epoch": 0.7091960063058329, + "grad_norm": 1.2252388000488281, + "learning_rate": 2.0583992319138087e-06, + "loss": 0.317, + "step": 35427 + }, + { + "epoch": 0.7092160248229612, + "grad_norm": 1.4277369976043701, + "learning_rate": 2.0581370946119312e-06, + "loss": 0.2945, + "step": 35428 + }, + { + "epoch": 0.7092360433400896, + "grad_norm": 1.0494199991226196, + "learning_rate": 2.0578749696770122e-06, + "loss": 0.3093, + "step": 35429 + }, + { + "epoch": 0.7092560618572179, + "grad_norm": 1.1142222881317139, + "learning_rate": 2.057612857110154e-06, + "loss": 0.3317, + "step": 35430 + }, + { + "epoch": 0.7092760803743463, + "grad_norm": 1.9182374477386475, + "learning_rate": 2.057350756912456e-06, + "loss": 0.6855, + "step": 35431 + }, + { + "epoch": 0.7092960988914746, + "grad_norm": 1.235120415687561, + "learning_rate": 2.0570886690850244e-06, + "loss": 0.2764, + "step": 35432 + }, + { + "epoch": 0.709316117408603, + "grad_norm": 1.1269152164459229, + "learning_rate": 2.056826593628959e-06, + "loss": 0.3107, + "step": 35433 + }, + { + "epoch": 0.7093361359257313, + "grad_norm": 2.0706498622894287, + "learning_rate": 2.0565645305453586e-06, + "loss": 0.7504, + "step": 35434 + }, + { + "epoch": 0.7093561544428596, + "grad_norm": 1.1054978370666504, + "learning_rate": 2.0563024798353277e-06, + "loss": 0.3049, + "step": 35435 + }, + { + "epoch": 0.709376172959988, + "grad_norm": 1.4136834144592285, + "learning_rate": 2.0560404414999696e-06, + "loss": 0.3308, + "step": 35436 + }, + { + "epoch": 0.7093961914771163, + "grad_norm": 1.8567509651184082, + "learning_rate": 2.0557784155403838e-06, + "loss": 0.319, + "step": 35437 + }, + { + "epoch": 0.7094162099942447, + "grad_norm": 1.0701525211334229, + "learning_rate": 2.055516401957672e-06, + "loss": 0.2935, + "step": 35438 + }, + { + "epoch": 0.709436228511373, + "grad_norm": 1.2956647872924805, + "learning_rate": 2.055254400752936e-06, + "loss": 0.269, + "step": 35439 + }, + { + "epoch": 0.7094562470285014, + "grad_norm": 1.2222929000854492, + "learning_rate": 2.054992411927274e-06, + "loss": 0.2879, + "step": 35440 + }, + { + "epoch": 0.7094762655456297, + "grad_norm": 1.1140859127044678, + "learning_rate": 2.0547304354817927e-06, + "loss": 0.3304, + "step": 35441 + }, + { + "epoch": 0.709496284062758, + "grad_norm": 1.1620573997497559, + "learning_rate": 2.0544684714175907e-06, + "loss": 0.326, + "step": 35442 + }, + { + "epoch": 0.7095163025798864, + "grad_norm": 1.1324187517166138, + "learning_rate": 2.054206519735769e-06, + "loss": 0.312, + "step": 35443 + }, + { + "epoch": 0.7095363210970147, + "grad_norm": 1.1037771701812744, + "learning_rate": 2.053944580437428e-06, + "loss": 0.3294, + "step": 35444 + }, + { + "epoch": 0.7095563396141431, + "grad_norm": 0.9940500855445862, + "learning_rate": 2.0536826535236713e-06, + "loss": 0.3008, + "step": 35445 + }, + { + "epoch": 0.7095763581312714, + "grad_norm": 0.9478356838226318, + "learning_rate": 2.053420738995599e-06, + "loss": 0.2642, + "step": 35446 + }, + { + "epoch": 0.7095963766483998, + "grad_norm": 1.2657114267349243, + "learning_rate": 2.0531588368543097e-06, + "loss": 0.322, + "step": 35447 + }, + { + "epoch": 0.7096163951655281, + "grad_norm": 1.0971918106079102, + "learning_rate": 2.0528969471009086e-06, + "loss": 0.2978, + "step": 35448 + }, + { + "epoch": 0.7096364136826565, + "grad_norm": 0.9969542026519775, + "learning_rate": 2.0526350697364927e-06, + "loss": 0.2801, + "step": 35449 + }, + { + "epoch": 0.7096564321997848, + "grad_norm": 2.026987075805664, + "learning_rate": 2.052373204762166e-06, + "loss": 0.7774, + "step": 35450 + }, + { + "epoch": 0.7096764507169131, + "grad_norm": 0.9668809771537781, + "learning_rate": 2.052111352179028e-06, + "loss": 0.2415, + "step": 35451 + }, + { + "epoch": 0.7096964692340415, + "grad_norm": 1.1279743909835815, + "learning_rate": 2.0518495119881805e-06, + "loss": 0.2946, + "step": 35452 + }, + { + "epoch": 0.7097164877511698, + "grad_norm": 1.2171099185943604, + "learning_rate": 2.0515876841907207e-06, + "loss": 0.2868, + "step": 35453 + }, + { + "epoch": 0.7097365062682982, + "grad_norm": 1.0285686254501343, + "learning_rate": 2.051325868787753e-06, + "loss": 0.2799, + "step": 35454 + }, + { + "epoch": 0.7097565247854265, + "grad_norm": 1.1639009714126587, + "learning_rate": 2.0510640657803775e-06, + "loss": 0.2778, + "step": 35455 + }, + { + "epoch": 0.7097765433025549, + "grad_norm": 1.070037603378296, + "learning_rate": 2.050802275169694e-06, + "loss": 0.3229, + "step": 35456 + }, + { + "epoch": 0.7097965618196832, + "grad_norm": 1.0476388931274414, + "learning_rate": 2.0505404969568025e-06, + "loss": 0.3378, + "step": 35457 + }, + { + "epoch": 0.7098165803368115, + "grad_norm": 1.0748655796051025, + "learning_rate": 2.050278731142803e-06, + "loss": 0.2839, + "step": 35458 + }, + { + "epoch": 0.7098365988539399, + "grad_norm": 1.2176487445831299, + "learning_rate": 2.0500169777287985e-06, + "loss": 0.3078, + "step": 35459 + }, + { + "epoch": 0.7098566173710682, + "grad_norm": 1.1718120574951172, + "learning_rate": 2.0497552367158857e-06, + "loss": 0.2353, + "step": 35460 + }, + { + "epoch": 0.7098766358881966, + "grad_norm": 1.0111693143844604, + "learning_rate": 2.049493508105169e-06, + "loss": 0.2496, + "step": 35461 + }, + { + "epoch": 0.7098966544053249, + "grad_norm": 1.0945335626602173, + "learning_rate": 2.049231791897745e-06, + "loss": 0.3187, + "step": 35462 + }, + { + "epoch": 0.7099166729224533, + "grad_norm": 1.1414873600006104, + "learning_rate": 2.048970088094717e-06, + "loss": 0.3265, + "step": 35463 + }, + { + "epoch": 0.7099366914395816, + "grad_norm": 0.991197407245636, + "learning_rate": 2.048708396697184e-06, + "loss": 0.2771, + "step": 35464 + }, + { + "epoch": 0.70995670995671, + "grad_norm": 1.1435664892196655, + "learning_rate": 2.0484467177062456e-06, + "loss": 0.2631, + "step": 35465 + }, + { + "epoch": 0.7099767284738383, + "grad_norm": 1.0785714387893677, + "learning_rate": 2.048185051123e-06, + "loss": 0.2794, + "step": 35466 + }, + { + "epoch": 0.7099967469909666, + "grad_norm": 1.003197193145752, + "learning_rate": 2.047923396948551e-06, + "loss": 0.3051, + "step": 35467 + }, + { + "epoch": 0.710016765508095, + "grad_norm": 1.030272364616394, + "learning_rate": 2.0476617551839973e-06, + "loss": 0.2522, + "step": 35468 + }, + { + "epoch": 0.7100367840252233, + "grad_norm": 1.1065716743469238, + "learning_rate": 2.0474001258304383e-06, + "loss": 0.3249, + "step": 35469 + }, + { + "epoch": 0.7100568025423517, + "grad_norm": 2.107703447341919, + "learning_rate": 2.0471385088889736e-06, + "loss": 0.821, + "step": 35470 + }, + { + "epoch": 0.71007682105948, + "grad_norm": 1.0709956884384155, + "learning_rate": 2.046876904360701e-06, + "loss": 0.2772, + "step": 35471 + }, + { + "epoch": 0.7100968395766084, + "grad_norm": 1.1961036920547485, + "learning_rate": 2.0466153122467243e-06, + "loss": 0.299, + "step": 35472 + }, + { + "epoch": 0.7101168580937367, + "grad_norm": 1.1006438732147217, + "learning_rate": 2.046353732548139e-06, + "loss": 0.2803, + "step": 35473 + }, + { + "epoch": 0.710136876610865, + "grad_norm": 1.1568043231964111, + "learning_rate": 2.0460921652660493e-06, + "loss": 0.3016, + "step": 35474 + }, + { + "epoch": 0.7101568951279934, + "grad_norm": 1.0643593072891235, + "learning_rate": 2.0458306104015506e-06, + "loss": 0.3298, + "step": 35475 + }, + { + "epoch": 0.7101769136451217, + "grad_norm": 1.0480796098709106, + "learning_rate": 2.045569067955746e-06, + "loss": 0.2987, + "step": 35476 + }, + { + "epoch": 0.7101969321622501, + "grad_norm": 1.184114933013916, + "learning_rate": 2.0453075379297333e-06, + "loss": 0.3166, + "step": 35477 + }, + { + "epoch": 0.7102169506793784, + "grad_norm": 1.0860344171524048, + "learning_rate": 2.045046020324612e-06, + "loss": 0.2706, + "step": 35478 + }, + { + "epoch": 0.7102369691965068, + "grad_norm": 1.2309974431991577, + "learning_rate": 2.044784515141481e-06, + "loss": 0.3346, + "step": 35479 + }, + { + "epoch": 0.7102569877136351, + "grad_norm": 1.9365061521530151, + "learning_rate": 2.0445230223814384e-06, + "loss": 0.805, + "step": 35480 + }, + { + "epoch": 0.7102770062307635, + "grad_norm": 2.1608262062072754, + "learning_rate": 2.044261542045586e-06, + "loss": 0.7508, + "step": 35481 + }, + { + "epoch": 0.7102970247478918, + "grad_norm": 1.1689555644989014, + "learning_rate": 2.044000074135023e-06, + "loss": 0.3129, + "step": 35482 + }, + { + "epoch": 0.7103170432650201, + "grad_norm": 1.2813202142715454, + "learning_rate": 2.043738618650847e-06, + "loss": 0.2816, + "step": 35483 + }, + { + "epoch": 0.7103370617821485, + "grad_norm": 1.1503008604049683, + "learning_rate": 2.0434771755941562e-06, + "loss": 0.3027, + "step": 35484 + }, + { + "epoch": 0.7103570802992768, + "grad_norm": 1.0655328035354614, + "learning_rate": 2.0432157449660526e-06, + "loss": 0.2968, + "step": 35485 + }, + { + "epoch": 0.7103770988164052, + "grad_norm": 1.278425931930542, + "learning_rate": 2.0429543267676324e-06, + "loss": 0.3317, + "step": 35486 + }, + { + "epoch": 0.7103971173335335, + "grad_norm": 1.144083857536316, + "learning_rate": 2.0426929209999974e-06, + "loss": 0.3241, + "step": 35487 + }, + { + "epoch": 0.7104171358506619, + "grad_norm": 1.1130160093307495, + "learning_rate": 2.0424315276642448e-06, + "loss": 0.3037, + "step": 35488 + }, + { + "epoch": 0.7104371543677902, + "grad_norm": 1.0042378902435303, + "learning_rate": 2.042170146761472e-06, + "loss": 0.3238, + "step": 35489 + }, + { + "epoch": 0.7104571728849185, + "grad_norm": 1.0690733194351196, + "learning_rate": 2.041908778292781e-06, + "loss": 0.3085, + "step": 35490 + }, + { + "epoch": 0.7104771914020469, + "grad_norm": 1.1759910583496094, + "learning_rate": 2.0416474222592687e-06, + "loss": 0.3331, + "step": 35491 + }, + { + "epoch": 0.7104972099191752, + "grad_norm": 1.054766058921814, + "learning_rate": 2.0413860786620343e-06, + "loss": 0.2923, + "step": 35492 + }, + { + "epoch": 0.7105172284363036, + "grad_norm": 2.0560474395751953, + "learning_rate": 2.041124747502174e-06, + "loss": 0.7603, + "step": 35493 + }, + { + "epoch": 0.7105372469534319, + "grad_norm": 1.0349559783935547, + "learning_rate": 2.0408634287807904e-06, + "loss": 0.305, + "step": 35494 + }, + { + "epoch": 0.7105572654705603, + "grad_norm": 1.1784125566482544, + "learning_rate": 2.0406021224989806e-06, + "loss": 0.3119, + "step": 35495 + }, + { + "epoch": 0.7105772839876886, + "grad_norm": 1.100545048713684, + "learning_rate": 2.0403408286578423e-06, + "loss": 0.2789, + "step": 35496 + }, + { + "epoch": 0.710597302504817, + "grad_norm": 1.0970439910888672, + "learning_rate": 2.0400795472584723e-06, + "loss": 0.3276, + "step": 35497 + }, + { + "epoch": 0.7106173210219453, + "grad_norm": 1.1441892385482788, + "learning_rate": 2.0398182783019727e-06, + "loss": 0.2919, + "step": 35498 + }, + { + "epoch": 0.7106373395390736, + "grad_norm": 1.124289631843567, + "learning_rate": 2.0395570217894383e-06, + "loss": 0.3012, + "step": 35499 + }, + { + "epoch": 0.710657358056202, + "grad_norm": 1.1695153713226318, + "learning_rate": 2.0392957777219714e-06, + "loss": 0.2864, + "step": 35500 + }, + { + "epoch": 0.7106773765733303, + "grad_norm": 1.2393512725830078, + "learning_rate": 2.0390345461006674e-06, + "loss": 0.2688, + "step": 35501 + }, + { + "epoch": 0.7106973950904587, + "grad_norm": 1.0487867593765259, + "learning_rate": 2.038773326926623e-06, + "loss": 0.3063, + "step": 35502 + }, + { + "epoch": 0.710717413607587, + "grad_norm": 1.461026906967163, + "learning_rate": 2.03851212020094e-06, + "loss": 0.3319, + "step": 35503 + }, + { + "epoch": 0.7107374321247154, + "grad_norm": 1.2119567394256592, + "learning_rate": 2.0382509259247154e-06, + "loss": 0.3221, + "step": 35504 + }, + { + "epoch": 0.7107574506418437, + "grad_norm": 1.0523159503936768, + "learning_rate": 2.037989744099046e-06, + "loss": 0.2832, + "step": 35505 + }, + { + "epoch": 0.710777469158972, + "grad_norm": 1.0270723104476929, + "learning_rate": 2.0377285747250285e-06, + "loss": 0.2955, + "step": 35506 + }, + { + "epoch": 0.7107974876761004, + "grad_norm": 1.108977198600769, + "learning_rate": 2.0374674178037646e-06, + "loss": 0.3284, + "step": 35507 + }, + { + "epoch": 0.7108175061932287, + "grad_norm": 1.1217923164367676, + "learning_rate": 2.0372062733363495e-06, + "loss": 0.305, + "step": 35508 + }, + { + "epoch": 0.7108375247103571, + "grad_norm": 1.3560385704040527, + "learning_rate": 2.036945141323882e-06, + "loss": 0.3581, + "step": 35509 + }, + { + "epoch": 0.7108575432274854, + "grad_norm": 1.2631791830062866, + "learning_rate": 2.0366840217674583e-06, + "loss": 0.3267, + "step": 35510 + }, + { + "epoch": 0.7108775617446138, + "grad_norm": 1.0783567428588867, + "learning_rate": 2.0364229146681786e-06, + "loss": 0.2728, + "step": 35511 + }, + { + "epoch": 0.7108975802617421, + "grad_norm": 1.2850172519683838, + "learning_rate": 2.0361618200271373e-06, + "loss": 0.2847, + "step": 35512 + }, + { + "epoch": 0.7109175987788705, + "grad_norm": 0.947094202041626, + "learning_rate": 2.035900737845436e-06, + "loss": 0.2651, + "step": 35513 + }, + { + "epoch": 0.7109376172959988, + "grad_norm": 1.220970869064331, + "learning_rate": 2.03563966812417e-06, + "loss": 0.3017, + "step": 35514 + }, + { + "epoch": 0.7109576358131271, + "grad_norm": 1.2233586311340332, + "learning_rate": 2.0353786108644345e-06, + "loss": 0.3572, + "step": 35515 + }, + { + "epoch": 0.7109776543302555, + "grad_norm": 1.6962276697158813, + "learning_rate": 2.0351175660673317e-06, + "loss": 0.7151, + "step": 35516 + }, + { + "epoch": 0.7109976728473838, + "grad_norm": 1.919595718383789, + "learning_rate": 2.0348565337339566e-06, + "loss": 0.8449, + "step": 35517 + }, + { + "epoch": 0.7110176913645122, + "grad_norm": 1.1416953802108765, + "learning_rate": 2.034595513865406e-06, + "loss": 0.3459, + "step": 35518 + }, + { + "epoch": 0.7110377098816405, + "grad_norm": 1.3575021028518677, + "learning_rate": 2.0343345064627766e-06, + "loss": 0.3604, + "step": 35519 + }, + { + "epoch": 0.7110577283987689, + "grad_norm": 1.7599693536758423, + "learning_rate": 2.034073511527168e-06, + "loss": 0.7554, + "step": 35520 + }, + { + "epoch": 0.7110777469158972, + "grad_norm": 1.26942777633667, + "learning_rate": 2.033812529059677e-06, + "loss": 0.2753, + "step": 35521 + }, + { + "epoch": 0.7110977654330255, + "grad_norm": 1.6705360412597656, + "learning_rate": 2.0335515590613985e-06, + "loss": 0.753, + "step": 35522 + }, + { + "epoch": 0.7111177839501539, + "grad_norm": 1.2441167831420898, + "learning_rate": 2.0332906015334298e-06, + "loss": 0.3241, + "step": 35523 + }, + { + "epoch": 0.7111378024672822, + "grad_norm": 1.1352064609527588, + "learning_rate": 2.0330296564768705e-06, + "loss": 0.2907, + "step": 35524 + }, + { + "epoch": 0.7111578209844106, + "grad_norm": 1.0676401853561401, + "learning_rate": 2.0327687238928146e-06, + "loss": 0.3041, + "step": 35525 + }, + { + "epoch": 0.7111778395015389, + "grad_norm": 1.242764949798584, + "learning_rate": 2.032507803782362e-06, + "loss": 0.3249, + "step": 35526 + }, + { + "epoch": 0.7111978580186673, + "grad_norm": 1.0997977256774902, + "learning_rate": 2.032246896146608e-06, + "loss": 0.3193, + "step": 35527 + }, + { + "epoch": 0.7112178765357956, + "grad_norm": 1.1572232246398926, + "learning_rate": 2.031986000986647e-06, + "loss": 0.2524, + "step": 35528 + }, + { + "epoch": 0.711237895052924, + "grad_norm": 1.0693423748016357, + "learning_rate": 2.0317251183035807e-06, + "loss": 0.2627, + "step": 35529 + }, + { + "epoch": 0.7112579135700523, + "grad_norm": 1.0833410024642944, + "learning_rate": 2.031464248098503e-06, + "loss": 0.2929, + "step": 35530 + }, + { + "epoch": 0.7112779320871806, + "grad_norm": 1.7806645631790161, + "learning_rate": 2.031203390372511e-06, + "loss": 0.8025, + "step": 35531 + }, + { + "epoch": 0.711297950604309, + "grad_norm": 1.06222665309906, + "learning_rate": 2.0309425451267004e-06, + "loss": 0.3469, + "step": 35532 + }, + { + "epoch": 0.7113179691214373, + "grad_norm": 1.1143239736557007, + "learning_rate": 2.0306817123621676e-06, + "loss": 0.2894, + "step": 35533 + }, + { + "epoch": 0.7113379876385657, + "grad_norm": 1.1746830940246582, + "learning_rate": 2.0304208920800108e-06, + "loss": 0.3469, + "step": 35534 + }, + { + "epoch": 0.711358006155694, + "grad_norm": 1.028744101524353, + "learning_rate": 2.0301600842813256e-06, + "loss": 0.2933, + "step": 35535 + }, + { + "epoch": 0.7113780246728224, + "grad_norm": 1.1980953216552734, + "learning_rate": 2.029899288967207e-06, + "loss": 0.3259, + "step": 35536 + }, + { + "epoch": 0.7113980431899507, + "grad_norm": 1.127798080444336, + "learning_rate": 2.0296385061387543e-06, + "loss": 0.2902, + "step": 35537 + }, + { + "epoch": 0.711418061707079, + "grad_norm": 1.0421680212020874, + "learning_rate": 2.0293777357970595e-06, + "loss": 0.312, + "step": 35538 + }, + { + "epoch": 0.7114380802242074, + "grad_norm": 1.2238399982452393, + "learning_rate": 2.029116977943224e-06, + "loss": 0.3077, + "step": 35539 + }, + { + "epoch": 0.7114580987413357, + "grad_norm": 1.1504878997802734, + "learning_rate": 2.028856232578341e-06, + "loss": 0.3225, + "step": 35540 + }, + { + "epoch": 0.7114781172584641, + "grad_norm": 1.0758651494979858, + "learning_rate": 2.0285954997035073e-06, + "loss": 0.3196, + "step": 35541 + }, + { + "epoch": 0.7114981357755924, + "grad_norm": 1.1445634365081787, + "learning_rate": 2.028334779319817e-06, + "loss": 0.2934, + "step": 35542 + }, + { + "epoch": 0.7115181542927208, + "grad_norm": 1.163775086402893, + "learning_rate": 2.0280740714283693e-06, + "loss": 0.2785, + "step": 35543 + }, + { + "epoch": 0.7115381728098491, + "grad_norm": 1.0999176502227783, + "learning_rate": 2.0278133760302587e-06, + "loss": 0.294, + "step": 35544 + }, + { + "epoch": 0.7115581913269775, + "grad_norm": 1.229938268661499, + "learning_rate": 2.027552693126581e-06, + "loss": 0.3284, + "step": 35545 + }, + { + "epoch": 0.7115782098441058, + "grad_norm": 1.0373499393463135, + "learning_rate": 2.02729202271843e-06, + "loss": 0.2957, + "step": 35546 + }, + { + "epoch": 0.7115982283612341, + "grad_norm": 1.1432414054870605, + "learning_rate": 2.027031364806906e-06, + "loss": 0.3116, + "step": 35547 + }, + { + "epoch": 0.7116182468783625, + "grad_norm": 1.0574272871017456, + "learning_rate": 2.026770719393102e-06, + "loss": 0.3162, + "step": 35548 + }, + { + "epoch": 0.7116382653954908, + "grad_norm": 1.0432958602905273, + "learning_rate": 2.0265100864781124e-06, + "loss": 0.2903, + "step": 35549 + }, + { + "epoch": 0.7116582839126192, + "grad_norm": 1.252000093460083, + "learning_rate": 2.026249466063036e-06, + "loss": 0.325, + "step": 35550 + }, + { + "epoch": 0.7116783024297475, + "grad_norm": 1.0574164390563965, + "learning_rate": 2.0259888581489658e-06, + "loss": 0.2486, + "step": 35551 + }, + { + "epoch": 0.7116983209468759, + "grad_norm": 1.1069726943969727, + "learning_rate": 2.0257282627369993e-06, + "loss": 0.3143, + "step": 35552 + }, + { + "epoch": 0.7117183394640042, + "grad_norm": 1.1205140352249146, + "learning_rate": 2.025467679828232e-06, + "loss": 0.2836, + "step": 35553 + }, + { + "epoch": 0.7117383579811325, + "grad_norm": 1.7923401594161987, + "learning_rate": 2.0252071094237574e-06, + "loss": 0.7362, + "step": 35554 + }, + { + "epoch": 0.7117583764982609, + "grad_norm": 1.0163713693618774, + "learning_rate": 2.0249465515246707e-06, + "loss": 0.2561, + "step": 35555 + }, + { + "epoch": 0.7117783950153892, + "grad_norm": 1.2082222700119019, + "learning_rate": 2.0246860061320704e-06, + "loss": 0.3482, + "step": 35556 + }, + { + "epoch": 0.7117984135325176, + "grad_norm": 1.065497875213623, + "learning_rate": 2.02442547324705e-06, + "loss": 0.3083, + "step": 35557 + }, + { + "epoch": 0.7118184320496459, + "grad_norm": 1.100088119506836, + "learning_rate": 2.024164952870704e-06, + "loss": 0.3138, + "step": 35558 + }, + { + "epoch": 0.7118384505667743, + "grad_norm": 2.228515386581421, + "learning_rate": 2.023904445004127e-06, + "loss": 0.8028, + "step": 35559 + }, + { + "epoch": 0.7118584690839026, + "grad_norm": 1.0762540102005005, + "learning_rate": 2.023643949648417e-06, + "loss": 0.2936, + "step": 35560 + }, + { + "epoch": 0.711878487601031, + "grad_norm": 1.0859168767929077, + "learning_rate": 2.0233834668046664e-06, + "loss": 0.3264, + "step": 35561 + }, + { + "epoch": 0.7118985061181593, + "grad_norm": 1.8836315870285034, + "learning_rate": 2.02312299647397e-06, + "loss": 0.7356, + "step": 35562 + }, + { + "epoch": 0.7119185246352876, + "grad_norm": 1.198508858680725, + "learning_rate": 2.022862538657426e-06, + "loss": 0.2659, + "step": 35563 + }, + { + "epoch": 0.711938543152416, + "grad_norm": 1.0810796022415161, + "learning_rate": 2.0226020933561246e-06, + "loss": 0.3157, + "step": 35564 + }, + { + "epoch": 0.7119585616695443, + "grad_norm": 1.0830762386322021, + "learning_rate": 2.0223416605711654e-06, + "loss": 0.3212, + "step": 35565 + }, + { + "epoch": 0.7119785801866727, + "grad_norm": 1.0339850187301636, + "learning_rate": 2.022081240303641e-06, + "loss": 0.2817, + "step": 35566 + }, + { + "epoch": 0.711998598703801, + "grad_norm": 1.163480520248413, + "learning_rate": 2.021820832554646e-06, + "loss": 0.3173, + "step": 35567 + }, + { + "epoch": 0.7120186172209294, + "grad_norm": 1.1488126516342163, + "learning_rate": 2.0215604373252744e-06, + "loss": 0.289, + "step": 35568 + }, + { + "epoch": 0.7120386357380577, + "grad_norm": 1.0702546834945679, + "learning_rate": 2.0213000546166226e-06, + "loss": 0.2891, + "step": 35569 + }, + { + "epoch": 0.712058654255186, + "grad_norm": 1.200219988822937, + "learning_rate": 2.021039684429785e-06, + "loss": 0.3526, + "step": 35570 + }, + { + "epoch": 0.7120786727723144, + "grad_norm": 1.204649806022644, + "learning_rate": 2.0207793267658554e-06, + "loss": 0.3512, + "step": 35571 + }, + { + "epoch": 0.7120986912894427, + "grad_norm": 0.9903343915939331, + "learning_rate": 2.020518981625928e-06, + "loss": 0.2716, + "step": 35572 + }, + { + "epoch": 0.7121187098065711, + "grad_norm": 0.9842861294746399, + "learning_rate": 2.020258649011096e-06, + "loss": 0.2738, + "step": 35573 + }, + { + "epoch": 0.7121387283236994, + "grad_norm": 1.0309017896652222, + "learning_rate": 2.0199983289224574e-06, + "loss": 0.3018, + "step": 35574 + }, + { + "epoch": 0.7121587468408278, + "grad_norm": 1.1792142391204834, + "learning_rate": 2.0197380213611025e-06, + "loss": 0.2681, + "step": 35575 + }, + { + "epoch": 0.7121787653579561, + "grad_norm": 1.3273909091949463, + "learning_rate": 2.019477726328129e-06, + "loss": 0.3086, + "step": 35576 + }, + { + "epoch": 0.7121987838750845, + "grad_norm": 1.1319656372070312, + "learning_rate": 2.019217443824629e-06, + "loss": 0.2758, + "step": 35577 + }, + { + "epoch": 0.7122188023922128, + "grad_norm": 1.9951932430267334, + "learning_rate": 2.0189571738516984e-06, + "loss": 0.7448, + "step": 35578 + }, + { + "epoch": 0.7122388209093411, + "grad_norm": 1.8613406419754028, + "learning_rate": 2.0186969164104303e-06, + "loss": 0.7612, + "step": 35579 + }, + { + "epoch": 0.7122588394264695, + "grad_norm": 1.149738073348999, + "learning_rate": 2.018436671501919e-06, + "loss": 0.3002, + "step": 35580 + }, + { + "epoch": 0.7122788579435978, + "grad_norm": 1.070082426071167, + "learning_rate": 2.0181764391272563e-06, + "loss": 0.2898, + "step": 35581 + }, + { + "epoch": 0.7122988764607262, + "grad_norm": 1.0136754512786865, + "learning_rate": 2.01791621928754e-06, + "loss": 0.2892, + "step": 35582 + }, + { + "epoch": 0.7123188949778545, + "grad_norm": 1.777138352394104, + "learning_rate": 2.017656011983862e-06, + "loss": 0.7458, + "step": 35583 + }, + { + "epoch": 0.7123389134949829, + "grad_norm": 1.9306975603103638, + "learning_rate": 2.017395817217317e-06, + "loss": 0.7621, + "step": 35584 + }, + { + "epoch": 0.7123589320121112, + "grad_norm": 1.0548298358917236, + "learning_rate": 2.0171356349889974e-06, + "loss": 0.3256, + "step": 35585 + }, + { + "epoch": 0.7123789505292395, + "grad_norm": 1.111789584159851, + "learning_rate": 2.016875465299996e-06, + "loss": 0.2964, + "step": 35586 + }, + { + "epoch": 0.7123989690463679, + "grad_norm": 2.0863559246063232, + "learning_rate": 2.0166153081514105e-06, + "loss": 0.7496, + "step": 35587 + }, + { + "epoch": 0.7124189875634962, + "grad_norm": 1.1105002164840698, + "learning_rate": 2.01635516354433e-06, + "loss": 0.2955, + "step": 35588 + }, + { + "epoch": 0.7124390060806246, + "grad_norm": 1.133724331855774, + "learning_rate": 2.016095031479852e-06, + "loss": 0.2879, + "step": 35589 + }, + { + "epoch": 0.7124590245977529, + "grad_norm": 1.2434505224227905, + "learning_rate": 2.015834911959067e-06, + "loss": 0.3117, + "step": 35590 + }, + { + "epoch": 0.7124790431148813, + "grad_norm": 1.0663883686065674, + "learning_rate": 2.015574804983071e-06, + "loss": 0.2819, + "step": 35591 + }, + { + "epoch": 0.7124990616320096, + "grad_norm": 1.0040290355682373, + "learning_rate": 2.0153147105529564e-06, + "loss": 0.2758, + "step": 35592 + }, + { + "epoch": 0.712519080149138, + "grad_norm": 1.2422689199447632, + "learning_rate": 2.015054628669817e-06, + "loss": 0.2676, + "step": 35593 + }, + { + "epoch": 0.7125390986662663, + "grad_norm": 1.9073420763015747, + "learning_rate": 2.014794559334745e-06, + "loss": 0.7703, + "step": 35594 + }, + { + "epoch": 0.7125591171833946, + "grad_norm": 1.324708342552185, + "learning_rate": 2.014534502548833e-06, + "loss": 0.3034, + "step": 35595 + }, + { + "epoch": 0.712579135700523, + "grad_norm": 1.8584833145141602, + "learning_rate": 2.0142744583131767e-06, + "loss": 0.7247, + "step": 35596 + }, + { + "epoch": 0.7125991542176513, + "grad_norm": 1.9742376804351807, + "learning_rate": 2.014014426628868e-06, + "loss": 0.7731, + "step": 35597 + }, + { + "epoch": 0.7126191727347797, + "grad_norm": 1.2625198364257812, + "learning_rate": 2.0137544074970004e-06, + "loss": 0.2526, + "step": 35598 + }, + { + "epoch": 0.712639191251908, + "grad_norm": 1.0872694253921509, + "learning_rate": 2.0134944009186647e-06, + "loss": 0.2958, + "step": 35599 + }, + { + "epoch": 0.7126592097690364, + "grad_norm": 1.9578325748443604, + "learning_rate": 2.0132344068949578e-06, + "loss": 0.752, + "step": 35600 + }, + { + "epoch": 0.7126792282861647, + "grad_norm": 1.0586926937103271, + "learning_rate": 2.012974425426969e-06, + "loss": 0.2979, + "step": 35601 + }, + { + "epoch": 0.712699246803293, + "grad_norm": 0.9700664281845093, + "learning_rate": 2.012714456515794e-06, + "loss": 0.2561, + "step": 35602 + }, + { + "epoch": 0.7127192653204214, + "grad_norm": 1.123141884803772, + "learning_rate": 2.012454500162525e-06, + "loss": 0.304, + "step": 35603 + }, + { + "epoch": 0.7127392838375497, + "grad_norm": 1.2096662521362305, + "learning_rate": 2.012194556368253e-06, + "loss": 0.3309, + "step": 35604 + }, + { + "epoch": 0.7127593023546781, + "grad_norm": 2.034702777862549, + "learning_rate": 2.0119346251340733e-06, + "loss": 0.7947, + "step": 35605 + }, + { + "epoch": 0.7127793208718064, + "grad_norm": 1.1430222988128662, + "learning_rate": 2.0116747064610774e-06, + "loss": 0.318, + "step": 35606 + }, + { + "epoch": 0.7127993393889348, + "grad_norm": 1.0523513555526733, + "learning_rate": 2.011414800350358e-06, + "loss": 0.265, + "step": 35607 + }, + { + "epoch": 0.7128193579060631, + "grad_norm": 1.0625934600830078, + "learning_rate": 2.0111549068030063e-06, + "loss": 0.3325, + "step": 35608 + }, + { + "epoch": 0.7128393764231915, + "grad_norm": 1.021301507949829, + "learning_rate": 2.0108950258201178e-06, + "loss": 0.3167, + "step": 35609 + }, + { + "epoch": 0.7128593949403198, + "grad_norm": 1.1830084323883057, + "learning_rate": 2.0106351574027826e-06, + "loss": 0.3221, + "step": 35610 + }, + { + "epoch": 0.7128794134574481, + "grad_norm": 1.0486581325531006, + "learning_rate": 2.0103753015520947e-06, + "loss": 0.3199, + "step": 35611 + }, + { + "epoch": 0.7128994319745765, + "grad_norm": 1.1243278980255127, + "learning_rate": 2.0101154582691434e-06, + "loss": 0.2851, + "step": 35612 + }, + { + "epoch": 0.7129194504917048, + "grad_norm": 1.025269627571106, + "learning_rate": 2.0098556275550256e-06, + "loss": 0.2968, + "step": 35613 + }, + { + "epoch": 0.7129394690088332, + "grad_norm": 1.119615912437439, + "learning_rate": 2.0095958094108293e-06, + "loss": 0.3027, + "step": 35614 + }, + { + "epoch": 0.7129594875259615, + "grad_norm": 1.074625015258789, + "learning_rate": 2.00933600383765e-06, + "loss": 0.2804, + "step": 35615 + }, + { + "epoch": 0.7129795060430899, + "grad_norm": 1.1225680112838745, + "learning_rate": 2.009076210836579e-06, + "loss": 0.2613, + "step": 35616 + }, + { + "epoch": 0.7129995245602182, + "grad_norm": 1.2959917783737183, + "learning_rate": 2.008816430408706e-06, + "loss": 0.3056, + "step": 35617 + }, + { + "epoch": 0.7130195430773465, + "grad_norm": 1.1786658763885498, + "learning_rate": 2.0085566625551266e-06, + "loss": 0.3101, + "step": 35618 + }, + { + "epoch": 0.7130395615944749, + "grad_norm": 1.014517068862915, + "learning_rate": 2.0082969072769313e-06, + "loss": 0.2644, + "step": 35619 + }, + { + "epoch": 0.7130595801116032, + "grad_norm": 1.158660650253296, + "learning_rate": 2.0080371645752122e-06, + "loss": 0.2724, + "step": 35620 + }, + { + "epoch": 0.7130795986287316, + "grad_norm": 1.2011297941207886, + "learning_rate": 2.007777434451059e-06, + "loss": 0.3063, + "step": 35621 + }, + { + "epoch": 0.7130996171458599, + "grad_norm": 1.0417088270187378, + "learning_rate": 2.0075177169055677e-06, + "loss": 0.2615, + "step": 35622 + }, + { + "epoch": 0.7131196356629883, + "grad_norm": 2.090487003326416, + "learning_rate": 2.0072580119398276e-06, + "loss": 0.7858, + "step": 35623 + }, + { + "epoch": 0.7131396541801166, + "grad_norm": 1.2870014905929565, + "learning_rate": 2.006998319554931e-06, + "loss": 0.3221, + "step": 35624 + }, + { + "epoch": 0.713159672697245, + "grad_norm": 1.1988303661346436, + "learning_rate": 2.0067386397519673e-06, + "loss": 0.3034, + "step": 35625 + }, + { + "epoch": 0.7131796912143733, + "grad_norm": 1.1456294059753418, + "learning_rate": 2.006478972532032e-06, + "loss": 0.3114, + "step": 35626 + }, + { + "epoch": 0.7131997097315016, + "grad_norm": 1.160130262374878, + "learning_rate": 2.0062193178962134e-06, + "loss": 0.3043, + "step": 35627 + }, + { + "epoch": 0.71321972824863, + "grad_norm": 1.9302089214324951, + "learning_rate": 2.0059596758456064e-06, + "loss": 0.7484, + "step": 35628 + }, + { + "epoch": 0.7132397467657583, + "grad_norm": 1.0931416749954224, + "learning_rate": 2.0057000463813004e-06, + "loss": 0.2883, + "step": 35629 + }, + { + "epoch": 0.7132597652828867, + "grad_norm": 0.9993718266487122, + "learning_rate": 2.005440429504385e-06, + "loss": 0.2645, + "step": 35630 + }, + { + "epoch": 0.713279783800015, + "grad_norm": 1.2817186117172241, + "learning_rate": 2.0051808252159554e-06, + "loss": 0.3043, + "step": 35631 + }, + { + "epoch": 0.7132998023171434, + "grad_norm": 1.0537968873977661, + "learning_rate": 2.0049212335171012e-06, + "loss": 0.2953, + "step": 35632 + }, + { + "epoch": 0.7133198208342717, + "grad_norm": 1.1107062101364136, + "learning_rate": 2.0046616544089137e-06, + "loss": 0.2846, + "step": 35633 + }, + { + "epoch": 0.7133398393514, + "grad_norm": 1.0753597021102905, + "learning_rate": 2.004402087892484e-06, + "loss": 0.2702, + "step": 35634 + }, + { + "epoch": 0.7133598578685284, + "grad_norm": 1.062453269958496, + "learning_rate": 2.0041425339689015e-06, + "loss": 0.2694, + "step": 35635 + }, + { + "epoch": 0.7133798763856567, + "grad_norm": 1.2287306785583496, + "learning_rate": 2.0038829926392606e-06, + "loss": 0.2975, + "step": 35636 + }, + { + "epoch": 0.7133998949027851, + "grad_norm": 1.9231157302856445, + "learning_rate": 2.003623463904651e-06, + "loss": 0.784, + "step": 35637 + }, + { + "epoch": 0.7134199134199134, + "grad_norm": 1.0641220808029175, + "learning_rate": 2.003363947766162e-06, + "loss": 0.31, + "step": 35638 + }, + { + "epoch": 0.7134399319370418, + "grad_norm": 1.8204635381698608, + "learning_rate": 2.0031044442248876e-06, + "loss": 0.7893, + "step": 35639 + }, + { + "epoch": 0.7134599504541701, + "grad_norm": 1.1811518669128418, + "learning_rate": 2.0028449532819155e-06, + "loss": 0.3335, + "step": 35640 + }, + { + "epoch": 0.7134799689712985, + "grad_norm": 1.0556716918945312, + "learning_rate": 2.0025854749383394e-06, + "loss": 0.2999, + "step": 35641 + }, + { + "epoch": 0.7134999874884268, + "grad_norm": 1.1293970346450806, + "learning_rate": 2.0023260091952497e-06, + "loss": 0.3105, + "step": 35642 + }, + { + "epoch": 0.7135200060055551, + "grad_norm": 1.1636073589324951, + "learning_rate": 2.0020665560537346e-06, + "loss": 0.3014, + "step": 35643 + }, + { + "epoch": 0.7135400245226835, + "grad_norm": 2.0349953174591064, + "learning_rate": 2.0018071155148877e-06, + "loss": 0.7431, + "step": 35644 + }, + { + "epoch": 0.7135600430398118, + "grad_norm": 1.2345670461654663, + "learning_rate": 2.001547687579799e-06, + "loss": 0.3559, + "step": 35645 + }, + { + "epoch": 0.7135800615569402, + "grad_norm": 1.1915606260299683, + "learning_rate": 2.001288272249558e-06, + "loss": 0.289, + "step": 35646 + }, + { + "epoch": 0.7136000800740685, + "grad_norm": 1.0481293201446533, + "learning_rate": 2.001028869525256e-06, + "loss": 0.2671, + "step": 35647 + }, + { + "epoch": 0.7136200985911969, + "grad_norm": 1.1121042966842651, + "learning_rate": 2.0007694794079818e-06, + "loss": 0.2942, + "step": 35648 + }, + { + "epoch": 0.7136401171083252, + "grad_norm": 1.0592920780181885, + "learning_rate": 2.000510101898829e-06, + "loss": 0.2561, + "step": 35649 + }, + { + "epoch": 0.7136601356254535, + "grad_norm": 1.1580222845077515, + "learning_rate": 2.0002507369988862e-06, + "loss": 0.3498, + "step": 35650 + }, + { + "epoch": 0.7136801541425819, + "grad_norm": 1.1729599237442017, + "learning_rate": 1.999991384709242e-06, + "loss": 0.2811, + "step": 35651 + }, + { + "epoch": 0.7137001726597102, + "grad_norm": 1.2418076992034912, + "learning_rate": 1.9997320450309893e-06, + "loss": 0.2771, + "step": 35652 + }, + { + "epoch": 0.7137201911768386, + "grad_norm": 1.1236393451690674, + "learning_rate": 1.999472717965216e-06, + "loss": 0.3199, + "step": 35653 + }, + { + "epoch": 0.7137402096939669, + "grad_norm": 1.0001014471054077, + "learning_rate": 1.9992134035130164e-06, + "loss": 0.2888, + "step": 35654 + }, + { + "epoch": 0.7137602282110953, + "grad_norm": 1.4149655103683472, + "learning_rate": 1.998954101675477e-06, + "loss": 0.3021, + "step": 35655 + }, + { + "epoch": 0.7137802467282236, + "grad_norm": 1.2600528001785278, + "learning_rate": 1.9986948124536893e-06, + "loss": 0.3072, + "step": 35656 + }, + { + "epoch": 0.713800265245352, + "grad_norm": 1.18205726146698, + "learning_rate": 1.9984355358487406e-06, + "loss": 0.2805, + "step": 35657 + }, + { + "epoch": 0.7138202837624803, + "grad_norm": 0.9800121784210205, + "learning_rate": 1.998176271861725e-06, + "loss": 0.2772, + "step": 35658 + }, + { + "epoch": 0.7138403022796086, + "grad_norm": 1.207626223564148, + "learning_rate": 1.99791702049373e-06, + "loss": 0.3198, + "step": 35659 + }, + { + "epoch": 0.713860320796737, + "grad_norm": 1.21853768825531, + "learning_rate": 1.997657781745846e-06, + "loss": 0.3001, + "step": 35660 + }, + { + "epoch": 0.7138803393138653, + "grad_norm": 1.124742031097412, + "learning_rate": 1.9973985556191604e-06, + "loss": 0.2703, + "step": 35661 + }, + { + "epoch": 0.7139003578309937, + "grad_norm": 2.089219570159912, + "learning_rate": 1.997139342114767e-06, + "loss": 0.7957, + "step": 35662 + }, + { + "epoch": 0.713920376348122, + "grad_norm": 1.913521409034729, + "learning_rate": 1.996880141233754e-06, + "loss": 0.7747, + "step": 35663 + }, + { + "epoch": 0.7139403948652504, + "grad_norm": 1.0939334630966187, + "learning_rate": 1.9966209529772086e-06, + "loss": 0.2724, + "step": 35664 + }, + { + "epoch": 0.7139604133823787, + "grad_norm": 1.7775683403015137, + "learning_rate": 1.996361777346224e-06, + "loss": 0.714, + "step": 35665 + }, + { + "epoch": 0.713980431899507, + "grad_norm": 1.26255464553833, + "learning_rate": 1.9961026143418864e-06, + "loss": 0.3163, + "step": 35666 + }, + { + "epoch": 0.7140004504166354, + "grad_norm": 1.7394099235534668, + "learning_rate": 1.995843463965289e-06, + "loss": 0.7631, + "step": 35667 + }, + { + "epoch": 0.7140204689337637, + "grad_norm": 1.1147959232330322, + "learning_rate": 1.995584326217519e-06, + "loss": 0.3162, + "step": 35668 + }, + { + "epoch": 0.7140404874508921, + "grad_norm": 0.9914811253547668, + "learning_rate": 1.995325201099666e-06, + "loss": 0.3097, + "step": 35669 + }, + { + "epoch": 0.7140605059680204, + "grad_norm": 1.0874903202056885, + "learning_rate": 1.9950660886128177e-06, + "loss": 0.3138, + "step": 35670 + }, + { + "epoch": 0.7140805244851488, + "grad_norm": 1.0983089208602905, + "learning_rate": 1.9948069887580664e-06, + "loss": 0.3046, + "step": 35671 + }, + { + "epoch": 0.7141005430022771, + "grad_norm": 1.0235790014266968, + "learning_rate": 1.9945479015364994e-06, + "loss": 0.3176, + "step": 35672 + }, + { + "epoch": 0.7141205615194054, + "grad_norm": 1.118221640586853, + "learning_rate": 1.9942888269492067e-06, + "loss": 0.3237, + "step": 35673 + }, + { + "epoch": 0.7141405800365338, + "grad_norm": 1.1404197216033936, + "learning_rate": 1.9940297649972753e-06, + "loss": 0.3152, + "step": 35674 + }, + { + "epoch": 0.7141605985536621, + "grad_norm": 1.2308293581008911, + "learning_rate": 1.9937707156817976e-06, + "loss": 0.3195, + "step": 35675 + }, + { + "epoch": 0.7141806170707905, + "grad_norm": 2.001758337020874, + "learning_rate": 1.9935116790038605e-06, + "loss": 0.7413, + "step": 35676 + }, + { + "epoch": 0.7142006355879188, + "grad_norm": 1.1328837871551514, + "learning_rate": 1.9932526549645516e-06, + "loss": 0.2946, + "step": 35677 + }, + { + "epoch": 0.7142206541050472, + "grad_norm": 1.1076620817184448, + "learning_rate": 1.992993643564963e-06, + "loss": 0.2971, + "step": 35678 + }, + { + "epoch": 0.7142406726221755, + "grad_norm": 1.0118094682693481, + "learning_rate": 1.992734644806181e-06, + "loss": 0.332, + "step": 35679 + }, + { + "epoch": 0.7142606911393039, + "grad_norm": 1.1538581848144531, + "learning_rate": 1.9924756586892968e-06, + "loss": 0.3206, + "step": 35680 + }, + { + "epoch": 0.7142807096564322, + "grad_norm": 1.1795674562454224, + "learning_rate": 1.9922166852153975e-06, + "loss": 0.2984, + "step": 35681 + }, + { + "epoch": 0.7143007281735605, + "grad_norm": 1.1040332317352295, + "learning_rate": 1.991957724385572e-06, + "loss": 0.2843, + "step": 35682 + }, + { + "epoch": 0.7143207466906889, + "grad_norm": 1.9361306428909302, + "learning_rate": 1.991698776200908e-06, + "loss": 0.7073, + "step": 35683 + }, + { + "epoch": 0.7143407652078172, + "grad_norm": 1.0731401443481445, + "learning_rate": 1.9914398406624956e-06, + "loss": 0.2688, + "step": 35684 + }, + { + "epoch": 0.7143607837249456, + "grad_norm": 0.9615651369094849, + "learning_rate": 1.991180917771423e-06, + "loss": 0.2703, + "step": 35685 + }, + { + "epoch": 0.7143808022420739, + "grad_norm": 1.3494646549224854, + "learning_rate": 1.9909220075287783e-06, + "loss": 0.2748, + "step": 35686 + }, + { + "epoch": 0.7144008207592023, + "grad_norm": 1.0457383394241333, + "learning_rate": 1.99066310993565e-06, + "loss": 0.2715, + "step": 35687 + }, + { + "epoch": 0.7144208392763306, + "grad_norm": 1.1326004266738892, + "learning_rate": 1.9904042249931242e-06, + "loss": 0.2635, + "step": 35688 + }, + { + "epoch": 0.7144408577934589, + "grad_norm": 1.2872979640960693, + "learning_rate": 1.990145352702294e-06, + "loss": 0.2674, + "step": 35689 + }, + { + "epoch": 0.7144608763105873, + "grad_norm": 1.0235034227371216, + "learning_rate": 1.9898864930642426e-06, + "loss": 0.2203, + "step": 35690 + }, + { + "epoch": 0.7144808948277156, + "grad_norm": 1.1307369470596313, + "learning_rate": 1.9896276460800625e-06, + "loss": 0.3181, + "step": 35691 + }, + { + "epoch": 0.714500913344844, + "grad_norm": 0.980364203453064, + "learning_rate": 1.989368811750838e-06, + "loss": 0.3439, + "step": 35692 + }, + { + "epoch": 0.7145209318619723, + "grad_norm": 1.2015236616134644, + "learning_rate": 1.9891099900776612e-06, + "loss": 0.3402, + "step": 35693 + }, + { + "epoch": 0.7145409503791007, + "grad_norm": 1.2907841205596924, + "learning_rate": 1.988851181061618e-06, + "loss": 0.3109, + "step": 35694 + }, + { + "epoch": 0.714560968896229, + "grad_norm": 1.1688724756240845, + "learning_rate": 1.9885923847037965e-06, + "loss": 0.3003, + "step": 35695 + }, + { + "epoch": 0.7145809874133574, + "grad_norm": 1.164238452911377, + "learning_rate": 1.9883336010052824e-06, + "loss": 0.3477, + "step": 35696 + }, + { + "epoch": 0.7146010059304857, + "grad_norm": 1.1302461624145508, + "learning_rate": 1.9880748299671678e-06, + "loss": 0.288, + "step": 35697 + }, + { + "epoch": 0.714621024447614, + "grad_norm": 1.19767165184021, + "learning_rate": 1.987816071590538e-06, + "loss": 0.2698, + "step": 35698 + }, + { + "epoch": 0.7146410429647424, + "grad_norm": 1.1869617700576782, + "learning_rate": 1.9875573258764817e-06, + "loss": 0.2495, + "step": 35699 + }, + { + "epoch": 0.7146610614818707, + "grad_norm": 1.014423131942749, + "learning_rate": 1.9872985928260862e-06, + "loss": 0.232, + "step": 35700 + }, + { + "epoch": 0.7146810799989991, + "grad_norm": 1.9089593887329102, + "learning_rate": 1.9870398724404372e-06, + "loss": 0.8553, + "step": 35701 + }, + { + "epoch": 0.7147010985161274, + "grad_norm": 1.0884276628494263, + "learning_rate": 1.986781164720626e-06, + "loss": 0.3265, + "step": 35702 + }, + { + "epoch": 0.7147211170332558, + "grad_norm": 1.158401370048523, + "learning_rate": 1.986522469667736e-06, + "loss": 0.2924, + "step": 35703 + }, + { + "epoch": 0.7147411355503841, + "grad_norm": 1.996821641921997, + "learning_rate": 1.9862637872828594e-06, + "loss": 0.7792, + "step": 35704 + }, + { + "epoch": 0.7147611540675124, + "grad_norm": 1.0790629386901855, + "learning_rate": 1.986005117567079e-06, + "loss": 0.3104, + "step": 35705 + }, + { + "epoch": 0.7147811725846408, + "grad_norm": 2.0120627880096436, + "learning_rate": 1.985746460521487e-06, + "loss": 0.7961, + "step": 35706 + }, + { + "epoch": 0.7148011911017691, + "grad_norm": 1.1382378339767456, + "learning_rate": 1.9854878161471675e-06, + "loss": 0.2694, + "step": 35707 + }, + { + "epoch": 0.7148212096188975, + "grad_norm": 1.092422366142273, + "learning_rate": 1.9852291844452086e-06, + "loss": 0.241, + "step": 35708 + }, + { + "epoch": 0.7148412281360258, + "grad_norm": 1.135396122932434, + "learning_rate": 1.9849705654166978e-06, + "loss": 0.3271, + "step": 35709 + }, + { + "epoch": 0.7148612466531542, + "grad_norm": 1.0925992727279663, + "learning_rate": 1.98471195906272e-06, + "loss": 0.2739, + "step": 35710 + }, + { + "epoch": 0.7148812651702825, + "grad_norm": 2.0111782550811768, + "learning_rate": 1.9844533653843657e-06, + "loss": 0.7437, + "step": 35711 + }, + { + "epoch": 0.7149012836874109, + "grad_norm": 1.06602144241333, + "learning_rate": 1.984194784382721e-06, + "loss": 0.2858, + "step": 35712 + }, + { + "epoch": 0.7149213022045392, + "grad_norm": 1.1029350757598877, + "learning_rate": 1.983936216058872e-06, + "loss": 0.3266, + "step": 35713 + }, + { + "epoch": 0.7149413207216675, + "grad_norm": 1.088063359260559, + "learning_rate": 1.9836776604139048e-06, + "loss": 0.2827, + "step": 35714 + }, + { + "epoch": 0.7149613392387959, + "grad_norm": 1.1866896152496338, + "learning_rate": 1.9834191174489085e-06, + "loss": 0.3181, + "step": 35715 + }, + { + "epoch": 0.7149813577559242, + "grad_norm": 1.2491698265075684, + "learning_rate": 1.983160587164968e-06, + "loss": 0.3424, + "step": 35716 + }, + { + "epoch": 0.7150013762730526, + "grad_norm": 1.1001558303833008, + "learning_rate": 1.982902069563172e-06, + "loss": 0.281, + "step": 35717 + }, + { + "epoch": 0.7150213947901809, + "grad_norm": 1.2603673934936523, + "learning_rate": 1.982643564644608e-06, + "loss": 0.2598, + "step": 35718 + }, + { + "epoch": 0.7150414133073093, + "grad_norm": 1.6966612339019775, + "learning_rate": 1.982385072410358e-06, + "loss": 0.7926, + "step": 35719 + }, + { + "epoch": 0.7150614318244376, + "grad_norm": 1.1243834495544434, + "learning_rate": 1.982126592861514e-06, + "loss": 0.2685, + "step": 35720 + }, + { + "epoch": 0.7150814503415659, + "grad_norm": 1.0239366292953491, + "learning_rate": 1.981868125999161e-06, + "loss": 0.2951, + "step": 35721 + }, + { + "epoch": 0.7151014688586943, + "grad_norm": 1.0456922054290771, + "learning_rate": 1.981609671824384e-06, + "loss": 0.3018, + "step": 35722 + }, + { + "epoch": 0.7151214873758226, + "grad_norm": 1.2018799781799316, + "learning_rate": 1.9813512303382686e-06, + "loss": 0.3082, + "step": 35723 + }, + { + "epoch": 0.715141505892951, + "grad_norm": 1.0799800157546997, + "learning_rate": 1.981092801541905e-06, + "loss": 0.2711, + "step": 35724 + }, + { + "epoch": 0.7151615244100793, + "grad_norm": 1.0810500383377075, + "learning_rate": 1.9808343854363777e-06, + "loss": 0.3127, + "step": 35725 + }, + { + "epoch": 0.7151815429272077, + "grad_norm": 1.2862735986709595, + "learning_rate": 1.9805759820227725e-06, + "loss": 0.3112, + "step": 35726 + }, + { + "epoch": 0.715201561444336, + "grad_norm": 1.082160234451294, + "learning_rate": 1.980317591302175e-06, + "loss": 0.2599, + "step": 35727 + }, + { + "epoch": 0.7152215799614644, + "grad_norm": 1.0261482000350952, + "learning_rate": 1.9800592132756737e-06, + "loss": 0.2228, + "step": 35728 + }, + { + "epoch": 0.7152415984785927, + "grad_norm": 1.126055121421814, + "learning_rate": 1.979800847944352e-06, + "loss": 0.3147, + "step": 35729 + }, + { + "epoch": 0.715261616995721, + "grad_norm": 1.8829164505004883, + "learning_rate": 1.9795424953092995e-06, + "loss": 0.7545, + "step": 35730 + }, + { + "epoch": 0.7152816355128494, + "grad_norm": 2.1411449909210205, + "learning_rate": 1.9792841553716e-06, + "loss": 0.7235, + "step": 35731 + }, + { + "epoch": 0.7153016540299777, + "grad_norm": 1.2064542770385742, + "learning_rate": 1.9790258281323384e-06, + "loss": 0.2914, + "step": 35732 + }, + { + "epoch": 0.7153216725471061, + "grad_norm": 1.1062928438186646, + "learning_rate": 1.978767513592604e-06, + "loss": 0.3031, + "step": 35733 + }, + { + "epoch": 0.7153416910642344, + "grad_norm": 1.2407718896865845, + "learning_rate": 1.9785092117534805e-06, + "loss": 0.3471, + "step": 35734 + }, + { + "epoch": 0.7153617095813628, + "grad_norm": 1.0843406915664673, + "learning_rate": 1.978250922616054e-06, + "loss": 0.2977, + "step": 35735 + }, + { + "epoch": 0.7153817280984911, + "grad_norm": 1.1180055141448975, + "learning_rate": 1.977992646181409e-06, + "loss": 0.3079, + "step": 35736 + }, + { + "epoch": 0.7154017466156194, + "grad_norm": 1.0466662645339966, + "learning_rate": 1.9777343824506335e-06, + "loss": 0.3043, + "step": 35737 + }, + { + "epoch": 0.7154217651327478, + "grad_norm": 1.0461446046829224, + "learning_rate": 1.9774761314248125e-06, + "loss": 0.3144, + "step": 35738 + }, + { + "epoch": 0.7154417836498761, + "grad_norm": 1.26376211643219, + "learning_rate": 1.9772178931050316e-06, + "loss": 0.3223, + "step": 35739 + }, + { + "epoch": 0.7154618021670045, + "grad_norm": 1.2026143074035645, + "learning_rate": 1.9769596674923762e-06, + "loss": 0.267, + "step": 35740 + }, + { + "epoch": 0.7154818206841328, + "grad_norm": 1.1383320093154907, + "learning_rate": 1.9767014545879303e-06, + "loss": 0.2759, + "step": 35741 + }, + { + "epoch": 0.7155018392012612, + "grad_norm": 1.1080280542373657, + "learning_rate": 1.9764432543927804e-06, + "loss": 0.3536, + "step": 35742 + }, + { + "epoch": 0.7155218577183895, + "grad_norm": 1.1587481498718262, + "learning_rate": 1.9761850669080147e-06, + "loss": 0.2952, + "step": 35743 + }, + { + "epoch": 0.7155418762355179, + "grad_norm": 1.0370817184448242, + "learning_rate": 1.9759268921347157e-06, + "loss": 0.2716, + "step": 35744 + }, + { + "epoch": 0.7155618947526462, + "grad_norm": 1.2295899391174316, + "learning_rate": 1.9756687300739674e-06, + "loss": 0.2955, + "step": 35745 + }, + { + "epoch": 0.7155819132697745, + "grad_norm": 1.1082974672317505, + "learning_rate": 1.9754105807268593e-06, + "loss": 0.3107, + "step": 35746 + }, + { + "epoch": 0.7156019317869029, + "grad_norm": 2.062959909439087, + "learning_rate": 1.9751524440944734e-06, + "loss": 0.7988, + "step": 35747 + }, + { + "epoch": 0.7156219503040312, + "grad_norm": 1.0442355871200562, + "learning_rate": 1.9748943201778965e-06, + "loss": 0.3087, + "step": 35748 + }, + { + "epoch": 0.7156419688211596, + "grad_norm": 0.9765448570251465, + "learning_rate": 1.9746362089782123e-06, + "loss": 0.2562, + "step": 35749 + }, + { + "epoch": 0.7156619873382879, + "grad_norm": 1.1862366199493408, + "learning_rate": 1.974378110496505e-06, + "loss": 0.2816, + "step": 35750 + }, + { + "epoch": 0.7156820058554163, + "grad_norm": 1.0839250087738037, + "learning_rate": 1.9741200247338627e-06, + "loss": 0.3499, + "step": 35751 + }, + { + "epoch": 0.7157020243725446, + "grad_norm": 1.088161826133728, + "learning_rate": 1.973861951691368e-06, + "loss": 0.3044, + "step": 35752 + }, + { + "epoch": 0.7157220428896729, + "grad_norm": 1.8056527376174927, + "learning_rate": 1.973603891370107e-06, + "loss": 0.7426, + "step": 35753 + }, + { + "epoch": 0.7157420614068013, + "grad_norm": 1.2843761444091797, + "learning_rate": 1.9733458437711627e-06, + "loss": 0.2976, + "step": 35754 + }, + { + "epoch": 0.7157620799239296, + "grad_norm": 1.1997038125991821, + "learning_rate": 1.9730878088956205e-06, + "loss": 0.267, + "step": 35755 + }, + { + "epoch": 0.715782098441058, + "grad_norm": 1.1092536449432373, + "learning_rate": 1.972829786744567e-06, + "loss": 0.2888, + "step": 35756 + }, + { + "epoch": 0.7158021169581863, + "grad_norm": 1.0942749977111816, + "learning_rate": 1.9725717773190866e-06, + "loss": 0.3072, + "step": 35757 + }, + { + "epoch": 0.7158221354753147, + "grad_norm": 1.1191760301589966, + "learning_rate": 1.972313780620261e-06, + "loss": 0.3513, + "step": 35758 + }, + { + "epoch": 0.715842153992443, + "grad_norm": 1.1209458112716675, + "learning_rate": 1.9720557966491782e-06, + "loss": 0.2996, + "step": 35759 + }, + { + "epoch": 0.7158621725095714, + "grad_norm": 1.0330581665039062, + "learning_rate": 1.9717978254069214e-06, + "loss": 0.2522, + "step": 35760 + }, + { + "epoch": 0.7158821910266997, + "grad_norm": 1.9099453687667847, + "learning_rate": 1.9715398668945744e-06, + "loss": 0.7643, + "step": 35761 + }, + { + "epoch": 0.715902209543828, + "grad_norm": 1.079960823059082, + "learning_rate": 1.971281921113223e-06, + "loss": 0.3286, + "step": 35762 + }, + { + "epoch": 0.7159222280609564, + "grad_norm": 1.0830310583114624, + "learning_rate": 1.971023988063948e-06, + "loss": 0.2656, + "step": 35763 + }, + { + "epoch": 0.7159422465780847, + "grad_norm": 1.196666955947876, + "learning_rate": 1.9707660677478387e-06, + "loss": 0.3148, + "step": 35764 + }, + { + "epoch": 0.7159622650952131, + "grad_norm": 1.3100371360778809, + "learning_rate": 1.9705081601659765e-06, + "loss": 0.2997, + "step": 35765 + }, + { + "epoch": 0.7159822836123414, + "grad_norm": 1.0406392812728882, + "learning_rate": 1.970250265319446e-06, + "loss": 0.3054, + "step": 35766 + }, + { + "epoch": 0.7160023021294698, + "grad_norm": 0.993095874786377, + "learning_rate": 1.96999238320933e-06, + "loss": 0.2635, + "step": 35767 + }, + { + "epoch": 0.7160223206465981, + "grad_norm": 1.0698540210723877, + "learning_rate": 1.9697345138367137e-06, + "loss": 0.3072, + "step": 35768 + }, + { + "epoch": 0.7160423391637264, + "grad_norm": 1.1498159170150757, + "learning_rate": 1.969476657202683e-06, + "loss": 0.3618, + "step": 35769 + }, + { + "epoch": 0.7160623576808548, + "grad_norm": 1.38490629196167, + "learning_rate": 1.96921881330832e-06, + "loss": 0.3297, + "step": 35770 + }, + { + "epoch": 0.7160823761979831, + "grad_norm": 1.0374202728271484, + "learning_rate": 1.9689609821547095e-06, + "loss": 0.2622, + "step": 35771 + }, + { + "epoch": 0.7161023947151115, + "grad_norm": 1.206141471862793, + "learning_rate": 1.9687031637429326e-06, + "loss": 0.3001, + "step": 35772 + }, + { + "epoch": 0.7161224132322398, + "grad_norm": 1.1904096603393555, + "learning_rate": 1.9684453580740773e-06, + "loss": 0.2474, + "step": 35773 + }, + { + "epoch": 0.7161424317493682, + "grad_norm": 1.069965124130249, + "learning_rate": 1.9681875651492253e-06, + "loss": 0.2908, + "step": 35774 + }, + { + "epoch": 0.7161624502664965, + "grad_norm": 1.220505952835083, + "learning_rate": 1.96792978496946e-06, + "loss": 0.3047, + "step": 35775 + }, + { + "epoch": 0.7161824687836249, + "grad_norm": 1.1155261993408203, + "learning_rate": 1.9676720175358642e-06, + "loss": 0.2715, + "step": 35776 + }, + { + "epoch": 0.7162024873007532, + "grad_norm": 1.110386610031128, + "learning_rate": 1.967414262849524e-06, + "loss": 0.3099, + "step": 35777 + }, + { + "epoch": 0.7162225058178815, + "grad_norm": 1.0358964204788208, + "learning_rate": 1.9671565209115216e-06, + "loss": 0.2905, + "step": 35778 + }, + { + "epoch": 0.7162425243350099, + "grad_norm": 1.1062089204788208, + "learning_rate": 1.966898791722941e-06, + "loss": 0.3264, + "step": 35779 + }, + { + "epoch": 0.7162625428521382, + "grad_norm": 1.8259226083755493, + "learning_rate": 1.9666410752848627e-06, + "loss": 0.7139, + "step": 35780 + }, + { + "epoch": 0.7162825613692666, + "grad_norm": 1.2063287496566772, + "learning_rate": 1.9663833715983727e-06, + "loss": 0.2967, + "step": 35781 + }, + { + "epoch": 0.7163025798863949, + "grad_norm": 1.2261396646499634, + "learning_rate": 1.9661256806645563e-06, + "loss": 0.2709, + "step": 35782 + }, + { + "epoch": 0.7163225984035233, + "grad_norm": 1.3557828664779663, + "learning_rate": 1.9658680024844947e-06, + "loss": 0.3232, + "step": 35783 + }, + { + "epoch": 0.7163426169206516, + "grad_norm": 1.1224602460861206, + "learning_rate": 1.9656103370592707e-06, + "loss": 0.2769, + "step": 35784 + }, + { + "epoch": 0.7163626354377799, + "grad_norm": 1.0234488248825073, + "learning_rate": 1.9653526843899657e-06, + "loss": 0.2817, + "step": 35785 + }, + { + "epoch": 0.7163826539549083, + "grad_norm": 1.109800934791565, + "learning_rate": 1.965095044477667e-06, + "loss": 0.3108, + "step": 35786 + }, + { + "epoch": 0.7164026724720366, + "grad_norm": 2.093629837036133, + "learning_rate": 1.964837417323456e-06, + "loss": 0.6627, + "step": 35787 + }, + { + "epoch": 0.716422690989165, + "grad_norm": 1.3143596649169922, + "learning_rate": 1.964579802928415e-06, + "loss": 0.2539, + "step": 35788 + }, + { + "epoch": 0.7164427095062933, + "grad_norm": 1.140423059463501, + "learning_rate": 1.964322201293626e-06, + "loss": 0.3274, + "step": 35789 + }, + { + "epoch": 0.7164627280234217, + "grad_norm": 1.8193422555923462, + "learning_rate": 1.9640646124201744e-06, + "loss": 0.8115, + "step": 35790 + }, + { + "epoch": 0.71648274654055, + "grad_norm": 1.0370184183120728, + "learning_rate": 1.9638070363091423e-06, + "loss": 0.2912, + "step": 35791 + }, + { + "epoch": 0.7165027650576784, + "grad_norm": 1.1078269481658936, + "learning_rate": 1.9635494729616113e-06, + "loss": 0.288, + "step": 35792 + }, + { + "epoch": 0.7165227835748067, + "grad_norm": 1.0664353370666504, + "learning_rate": 1.9632919223786636e-06, + "loss": 0.2667, + "step": 35793 + }, + { + "epoch": 0.716542802091935, + "grad_norm": 1.1213667392730713, + "learning_rate": 1.9630343845613833e-06, + "loss": 0.3075, + "step": 35794 + }, + { + "epoch": 0.7165628206090634, + "grad_norm": 1.0897340774536133, + "learning_rate": 1.962776859510854e-06, + "loss": 0.3052, + "step": 35795 + }, + { + "epoch": 0.7165828391261917, + "grad_norm": 0.9601306319236755, + "learning_rate": 1.962519347228158e-06, + "loss": 0.2646, + "step": 35796 + }, + { + "epoch": 0.7166028576433201, + "grad_norm": 1.7948991060256958, + "learning_rate": 1.962261847714376e-06, + "loss": 0.7599, + "step": 35797 + }, + { + "epoch": 0.7166228761604484, + "grad_norm": 1.0410690307617188, + "learning_rate": 1.9620043609705902e-06, + "loss": 0.2813, + "step": 35798 + }, + { + "epoch": 0.7166428946775768, + "grad_norm": 1.1280004978179932, + "learning_rate": 1.9617468869978864e-06, + "loss": 0.2603, + "step": 35799 + }, + { + "epoch": 0.7166629131947051, + "grad_norm": 1.1144102811813354, + "learning_rate": 1.9614894257973446e-06, + "loss": 0.324, + "step": 35800 + }, + { + "epoch": 0.7166829317118334, + "grad_norm": 1.013094186782837, + "learning_rate": 1.9612319773700473e-06, + "loss": 0.3359, + "step": 35801 + }, + { + "epoch": 0.7167029502289618, + "grad_norm": 1.893835425376892, + "learning_rate": 1.9609745417170767e-06, + "loss": 0.7873, + "step": 35802 + }, + { + "epoch": 0.7167229687460901, + "grad_norm": 1.0774627923965454, + "learning_rate": 1.9607171188395136e-06, + "loss": 0.3445, + "step": 35803 + }, + { + "epoch": 0.7167429872632185, + "grad_norm": 2.0976078510284424, + "learning_rate": 1.960459708738443e-06, + "loss": 0.7522, + "step": 35804 + }, + { + "epoch": 0.7167630057803468, + "grad_norm": 1.1001496315002441, + "learning_rate": 1.9602023114149465e-06, + "loss": 0.2445, + "step": 35805 + }, + { + "epoch": 0.7167830242974752, + "grad_norm": 1.0315228700637817, + "learning_rate": 1.9599449268701025e-06, + "loss": 0.2798, + "step": 35806 + }, + { + "epoch": 0.7168030428146035, + "grad_norm": 1.1405057907104492, + "learning_rate": 1.9596875551049964e-06, + "loss": 0.3279, + "step": 35807 + }, + { + "epoch": 0.7168230613317319, + "grad_norm": 1.2576786279678345, + "learning_rate": 1.959430196120711e-06, + "loss": 0.3009, + "step": 35808 + }, + { + "epoch": 0.7168430798488602, + "grad_norm": 1.0781503915786743, + "learning_rate": 1.9591728499183276e-06, + "loss": 0.2703, + "step": 35809 + }, + { + "epoch": 0.7168630983659885, + "grad_norm": 1.1641628742218018, + "learning_rate": 1.9589155164989265e-06, + "loss": 0.3148, + "step": 35810 + }, + { + "epoch": 0.7168831168831169, + "grad_norm": 1.1485973596572876, + "learning_rate": 1.9586581958635883e-06, + "loss": 0.3132, + "step": 35811 + }, + { + "epoch": 0.7169031354002452, + "grad_norm": 1.2284153699874878, + "learning_rate": 1.958400888013398e-06, + "loss": 0.2954, + "step": 35812 + }, + { + "epoch": 0.7169231539173736, + "grad_norm": 1.071757435798645, + "learning_rate": 1.9581435929494364e-06, + "loss": 0.2673, + "step": 35813 + }, + { + "epoch": 0.7169431724345019, + "grad_norm": 1.0442380905151367, + "learning_rate": 1.9578863106727843e-06, + "loss": 0.295, + "step": 35814 + }, + { + "epoch": 0.7169631909516303, + "grad_norm": 1.2015293836593628, + "learning_rate": 1.9576290411845235e-06, + "loss": 0.3193, + "step": 35815 + }, + { + "epoch": 0.7169832094687586, + "grad_norm": 1.0721938610076904, + "learning_rate": 1.957371784485734e-06, + "loss": 0.3074, + "step": 35816 + }, + { + "epoch": 0.7170032279858869, + "grad_norm": 1.899503469467163, + "learning_rate": 1.9571145405775e-06, + "loss": 0.7426, + "step": 35817 + }, + { + "epoch": 0.7170232465030153, + "grad_norm": 2.252659559249878, + "learning_rate": 1.956857309460902e-06, + "loss": 0.7604, + "step": 35818 + }, + { + "epoch": 0.7170432650201436, + "grad_norm": 1.0513875484466553, + "learning_rate": 1.9566000911370186e-06, + "loss": 0.2956, + "step": 35819 + }, + { + "epoch": 0.717063283537272, + "grad_norm": 1.1011947393417358, + "learning_rate": 1.9563428856069337e-06, + "loss": 0.3302, + "step": 35820 + }, + { + "epoch": 0.7170833020544003, + "grad_norm": 1.9432460069656372, + "learning_rate": 1.9560856928717307e-06, + "loss": 0.799, + "step": 35821 + }, + { + "epoch": 0.7171033205715287, + "grad_norm": 1.2728134393692017, + "learning_rate": 1.9558285129324876e-06, + "loss": 0.3056, + "step": 35822 + }, + { + "epoch": 0.717123339088657, + "grad_norm": 1.1289279460906982, + "learning_rate": 1.955571345790287e-06, + "loss": 0.2302, + "step": 35823 + }, + { + "epoch": 0.7171433576057854, + "grad_norm": 1.3065046072006226, + "learning_rate": 1.9553141914462094e-06, + "loss": 0.2869, + "step": 35824 + }, + { + "epoch": 0.7171633761229137, + "grad_norm": 1.1406975984573364, + "learning_rate": 1.9550570499013333e-06, + "loss": 0.319, + "step": 35825 + }, + { + "epoch": 0.717183394640042, + "grad_norm": 1.1112602949142456, + "learning_rate": 1.9547999211567442e-06, + "loss": 0.2966, + "step": 35826 + }, + { + "epoch": 0.7172034131571704, + "grad_norm": 1.1875029802322388, + "learning_rate": 1.9545428052135207e-06, + "loss": 0.3204, + "step": 35827 + }, + { + "epoch": 0.7172234316742987, + "grad_norm": 1.0377572774887085, + "learning_rate": 1.954285702072744e-06, + "loss": 0.2626, + "step": 35828 + }, + { + "epoch": 0.7172434501914271, + "grad_norm": 1.2900277376174927, + "learning_rate": 1.9540286117354927e-06, + "loss": 0.3224, + "step": 35829 + }, + { + "epoch": 0.7172634687085554, + "grad_norm": 1.4854273796081543, + "learning_rate": 1.953771534202852e-06, + "loss": 0.3151, + "step": 35830 + }, + { + "epoch": 0.7172834872256838, + "grad_norm": 1.1426236629486084, + "learning_rate": 1.9535144694758994e-06, + "loss": 0.3642, + "step": 35831 + }, + { + "epoch": 0.7173035057428121, + "grad_norm": 1.0622730255126953, + "learning_rate": 1.953257417555715e-06, + "loss": 0.3041, + "step": 35832 + }, + { + "epoch": 0.7173235242599404, + "grad_norm": 1.0824826955795288, + "learning_rate": 1.9530003784433824e-06, + "loss": 0.3099, + "step": 35833 + }, + { + "epoch": 0.7173435427770688, + "grad_norm": 1.1758780479431152, + "learning_rate": 1.952743352139979e-06, + "loss": 0.2929, + "step": 35834 + }, + { + "epoch": 0.7173635612941971, + "grad_norm": 1.1670889854431152, + "learning_rate": 1.9524863386465887e-06, + "loss": 0.2811, + "step": 35835 + }, + { + "epoch": 0.7173835798113255, + "grad_norm": 1.0264462232589722, + "learning_rate": 1.952229337964289e-06, + "loss": 0.2937, + "step": 35836 + }, + { + "epoch": 0.7174035983284538, + "grad_norm": 1.024274468421936, + "learning_rate": 1.9519723500941625e-06, + "loss": 0.2818, + "step": 35837 + }, + { + "epoch": 0.7174236168455822, + "grad_norm": 1.0915884971618652, + "learning_rate": 1.951715375037286e-06, + "loss": 0.311, + "step": 35838 + }, + { + "epoch": 0.7174436353627105, + "grad_norm": 1.264866828918457, + "learning_rate": 1.951458412794744e-06, + "loss": 0.2839, + "step": 35839 + }, + { + "epoch": 0.7174636538798389, + "grad_norm": 1.0564193725585938, + "learning_rate": 1.951201463367615e-06, + "loss": 0.2962, + "step": 35840 + }, + { + "epoch": 0.7174836723969672, + "grad_norm": 1.124760389328003, + "learning_rate": 1.9509445267569783e-06, + "loss": 0.2772, + "step": 35841 + }, + { + "epoch": 0.7175036909140955, + "grad_norm": 1.8255586624145508, + "learning_rate": 1.950687602963913e-06, + "loss": 0.7666, + "step": 35842 + }, + { + "epoch": 0.7175237094312239, + "grad_norm": 1.2593046426773071, + "learning_rate": 1.9504306919895027e-06, + "loss": 0.2942, + "step": 35843 + }, + { + "epoch": 0.7175437279483522, + "grad_norm": 1.109046459197998, + "learning_rate": 1.950173793834825e-06, + "loss": 0.2888, + "step": 35844 + }, + { + "epoch": 0.7175637464654806, + "grad_norm": 1.1128065586090088, + "learning_rate": 1.9499169085009595e-06, + "loss": 0.3277, + "step": 35845 + }, + { + "epoch": 0.7175837649826089, + "grad_norm": 1.2295172214508057, + "learning_rate": 1.949660035988988e-06, + "loss": 0.3215, + "step": 35846 + }, + { + "epoch": 0.7176037834997373, + "grad_norm": 2.0833094120025635, + "learning_rate": 1.949403176299988e-06, + "loss": 0.7345, + "step": 35847 + }, + { + "epoch": 0.7176238020168656, + "grad_norm": 2.1006598472595215, + "learning_rate": 1.949146329435042e-06, + "loss": 0.6741, + "step": 35848 + }, + { + "epoch": 0.7176438205339939, + "grad_norm": 1.072800636291504, + "learning_rate": 1.9488894953952287e-06, + "loss": 0.2766, + "step": 35849 + }, + { + "epoch": 0.7176638390511223, + "grad_norm": 1.4298715591430664, + "learning_rate": 1.9486326741816267e-06, + "loss": 0.3255, + "step": 35850 + }, + { + "epoch": 0.7176838575682506, + "grad_norm": 1.1255342960357666, + "learning_rate": 1.948375865795315e-06, + "loss": 0.3098, + "step": 35851 + }, + { + "epoch": 0.717703876085379, + "grad_norm": 1.0922868251800537, + "learning_rate": 1.948119070237376e-06, + "loss": 0.2929, + "step": 35852 + }, + { + "epoch": 0.7177238946025073, + "grad_norm": 1.1451890468597412, + "learning_rate": 1.9478622875088878e-06, + "loss": 0.3291, + "step": 35853 + }, + { + "epoch": 0.7177439131196357, + "grad_norm": 1.955706238746643, + "learning_rate": 1.9476055176109295e-06, + "loss": 0.7143, + "step": 35854 + }, + { + "epoch": 0.717763931636764, + "grad_norm": 1.2332085371017456, + "learning_rate": 1.9473487605445806e-06, + "loss": 0.3079, + "step": 35855 + }, + { + "epoch": 0.7177839501538924, + "grad_norm": 1.0902745723724365, + "learning_rate": 1.9470920163109193e-06, + "loss": 0.33, + "step": 35856 + }, + { + "epoch": 0.7178039686710207, + "grad_norm": 1.1138086318969727, + "learning_rate": 1.946835284911028e-06, + "loss": 0.3188, + "step": 35857 + }, + { + "epoch": 0.717823987188149, + "grad_norm": 1.8713332414627075, + "learning_rate": 1.9465785663459817e-06, + "loss": 0.7259, + "step": 35858 + }, + { + "epoch": 0.7178440057052774, + "grad_norm": 1.1214534044265747, + "learning_rate": 1.9463218606168644e-06, + "loss": 0.3194, + "step": 35859 + }, + { + "epoch": 0.7178640242224057, + "grad_norm": 1.1771472692489624, + "learning_rate": 1.9460651677247505e-06, + "loss": 0.2554, + "step": 35860 + }, + { + "epoch": 0.7178840427395341, + "grad_norm": 1.1127930879592896, + "learning_rate": 1.9458084876707235e-06, + "loss": 0.2884, + "step": 35861 + }, + { + "epoch": 0.7179040612566624, + "grad_norm": 1.1638529300689697, + "learning_rate": 1.9455518204558606e-06, + "loss": 0.3064, + "step": 35862 + }, + { + "epoch": 0.7179240797737908, + "grad_norm": 1.126410961151123, + "learning_rate": 1.9452951660812396e-06, + "loss": 0.2918, + "step": 35863 + }, + { + "epoch": 0.7179440982909191, + "grad_norm": 1.0863511562347412, + "learning_rate": 1.9450385245479414e-06, + "loss": 0.2792, + "step": 35864 + }, + { + "epoch": 0.7179641168080474, + "grad_norm": 1.1388280391693115, + "learning_rate": 1.9447818958570416e-06, + "loss": 0.3238, + "step": 35865 + }, + { + "epoch": 0.7179841353251758, + "grad_norm": 1.1020630598068237, + "learning_rate": 1.9445252800096227e-06, + "loss": 0.3235, + "step": 35866 + }, + { + "epoch": 0.7180041538423041, + "grad_norm": 1.1271617412567139, + "learning_rate": 1.9442686770067627e-06, + "loss": 0.3078, + "step": 35867 + }, + { + "epoch": 0.7180241723594325, + "grad_norm": 1.8992459774017334, + "learning_rate": 1.944012086849539e-06, + "loss": 0.8035, + "step": 35868 + }, + { + "epoch": 0.7180441908765608, + "grad_norm": 1.0785000324249268, + "learning_rate": 1.943755509539029e-06, + "loss": 0.3118, + "step": 35869 + }, + { + "epoch": 0.7180642093936892, + "grad_norm": 1.014717936515808, + "learning_rate": 1.943498945076315e-06, + "loss": 0.2894, + "step": 35870 + }, + { + "epoch": 0.7180842279108175, + "grad_norm": 1.1076472997665405, + "learning_rate": 1.943242393462472e-06, + "loss": 0.2598, + "step": 35871 + }, + { + "epoch": 0.7181042464279459, + "grad_norm": 1.0000548362731934, + "learning_rate": 1.942985854698582e-06, + "loss": 0.2682, + "step": 35872 + }, + { + "epoch": 0.7181242649450742, + "grad_norm": 1.1008893251419067, + "learning_rate": 1.94272932878572e-06, + "loss": 0.3359, + "step": 35873 + }, + { + "epoch": 0.7181442834622025, + "grad_norm": 1.02494215965271, + "learning_rate": 1.9424728157249672e-06, + "loss": 0.3002, + "step": 35874 + }, + { + "epoch": 0.7181643019793309, + "grad_norm": 1.1347858905792236, + "learning_rate": 1.9422163155174014e-06, + "loss": 0.3302, + "step": 35875 + }, + { + "epoch": 0.7181843204964592, + "grad_norm": 1.2391726970672607, + "learning_rate": 1.9419598281640993e-06, + "loss": 0.2938, + "step": 35876 + }, + { + "epoch": 0.7182043390135876, + "grad_norm": 1.03522789478302, + "learning_rate": 1.9417033536661408e-06, + "loss": 0.2974, + "step": 35877 + }, + { + "epoch": 0.7182243575307159, + "grad_norm": 1.2101211547851562, + "learning_rate": 1.941446892024601e-06, + "loss": 0.2849, + "step": 35878 + }, + { + "epoch": 0.7182443760478443, + "grad_norm": 1.0916695594787598, + "learning_rate": 1.941190443240562e-06, + "loss": 0.2824, + "step": 35879 + }, + { + "epoch": 0.7182643945649726, + "grad_norm": 1.3353745937347412, + "learning_rate": 1.9409340073151006e-06, + "loss": 0.2636, + "step": 35880 + }, + { + "epoch": 0.7182844130821009, + "grad_norm": 1.8964563608169556, + "learning_rate": 1.9406775842492936e-06, + "loss": 0.7432, + "step": 35881 + }, + { + "epoch": 0.7183044315992293, + "grad_norm": 1.0573418140411377, + "learning_rate": 1.9404211740442185e-06, + "loss": 0.2857, + "step": 35882 + }, + { + "epoch": 0.7183244501163576, + "grad_norm": 1.0117322206497192, + "learning_rate": 1.9401647767009555e-06, + "loss": 0.2701, + "step": 35883 + }, + { + "epoch": 0.718344468633486, + "grad_norm": 1.132355809211731, + "learning_rate": 1.9399083922205803e-06, + "loss": 0.3351, + "step": 35884 + }, + { + "epoch": 0.7183644871506143, + "grad_norm": 1.114774227142334, + "learning_rate": 1.939652020604173e-06, + "loss": 0.2718, + "step": 35885 + }, + { + "epoch": 0.7183845056677427, + "grad_norm": 1.0788154602050781, + "learning_rate": 1.9393956618528104e-06, + "loss": 0.3597, + "step": 35886 + }, + { + "epoch": 0.718404524184871, + "grad_norm": 1.897825002670288, + "learning_rate": 1.9391393159675676e-06, + "loss": 0.762, + "step": 35887 + }, + { + "epoch": 0.7184245427019994, + "grad_norm": 1.0622438192367554, + "learning_rate": 1.9388829829495266e-06, + "loss": 0.2312, + "step": 35888 + }, + { + "epoch": 0.7184445612191277, + "grad_norm": 1.1672154664993286, + "learning_rate": 1.938626662799762e-06, + "loss": 0.3355, + "step": 35889 + }, + { + "epoch": 0.718464579736256, + "grad_norm": 1.0470631122589111, + "learning_rate": 1.938370355519353e-06, + "loss": 0.308, + "step": 35890 + }, + { + "epoch": 0.7184845982533844, + "grad_norm": 1.0326155424118042, + "learning_rate": 1.9381140611093733e-06, + "loss": 0.256, + "step": 35891 + }, + { + "epoch": 0.7185046167705127, + "grad_norm": 1.0818159580230713, + "learning_rate": 1.9378577795709057e-06, + "loss": 0.2857, + "step": 35892 + }, + { + "epoch": 0.7185246352876411, + "grad_norm": 1.1182937622070312, + "learning_rate": 1.937601510905025e-06, + "loss": 0.3006, + "step": 35893 + }, + { + "epoch": 0.7185446538047694, + "grad_norm": 1.051458477973938, + "learning_rate": 1.9373452551128087e-06, + "loss": 0.2791, + "step": 35894 + }, + { + "epoch": 0.7185646723218978, + "grad_norm": 1.0659704208374023, + "learning_rate": 1.9370890121953317e-06, + "loss": 0.3174, + "step": 35895 + }, + { + "epoch": 0.7185846908390261, + "grad_norm": 1.1374131441116333, + "learning_rate": 1.9368327821536754e-06, + "loss": 0.3361, + "step": 35896 + }, + { + "epoch": 0.7186047093561544, + "grad_norm": 1.092417597770691, + "learning_rate": 1.936576564988913e-06, + "loss": 0.3507, + "step": 35897 + }, + { + "epoch": 0.7186247278732828, + "grad_norm": 1.207265853881836, + "learning_rate": 1.936320360702125e-06, + "loss": 0.2869, + "step": 35898 + }, + { + "epoch": 0.7186447463904111, + "grad_norm": 1.2213077545166016, + "learning_rate": 1.936064169294387e-06, + "loss": 0.2803, + "step": 35899 + }, + { + "epoch": 0.7186647649075395, + "grad_norm": 1.3278037309646606, + "learning_rate": 1.935807990766774e-06, + "loss": 0.3604, + "step": 35900 + }, + { + "epoch": 0.7186847834246678, + "grad_norm": 1.1355708837509155, + "learning_rate": 1.935551825120367e-06, + "loss": 0.2935, + "step": 35901 + }, + { + "epoch": 0.7187048019417962, + "grad_norm": 1.2499085664749146, + "learning_rate": 1.93529567235624e-06, + "loss": 0.3284, + "step": 35902 + }, + { + "epoch": 0.7187248204589245, + "grad_norm": 0.9946842193603516, + "learning_rate": 1.9350395324754713e-06, + "loss": 0.3266, + "step": 35903 + }, + { + "epoch": 0.7187448389760529, + "grad_norm": 1.930092453956604, + "learning_rate": 1.9347834054791343e-06, + "loss": 0.802, + "step": 35904 + }, + { + "epoch": 0.7187648574931812, + "grad_norm": 1.7418506145477295, + "learning_rate": 1.93452729136831e-06, + "loss": 0.7517, + "step": 35905 + }, + { + "epoch": 0.7187848760103095, + "grad_norm": 1.074834942817688, + "learning_rate": 1.9342711901440737e-06, + "loss": 0.2879, + "step": 35906 + }, + { + "epoch": 0.7188048945274379, + "grad_norm": 1.4720147848129272, + "learning_rate": 1.9340151018075016e-06, + "loss": 0.2986, + "step": 35907 + }, + { + "epoch": 0.7188249130445662, + "grad_norm": 1.8052220344543457, + "learning_rate": 1.933759026359668e-06, + "loss": 0.6976, + "step": 35908 + }, + { + "epoch": 0.7188449315616946, + "grad_norm": 1.088900089263916, + "learning_rate": 1.933502963801653e-06, + "loss": 0.3163, + "step": 35909 + }, + { + "epoch": 0.7188649500788229, + "grad_norm": 1.0716644525527954, + "learning_rate": 1.933246914134531e-06, + "loss": 0.2977, + "step": 35910 + }, + { + "epoch": 0.7188849685959513, + "grad_norm": 1.8631298542022705, + "learning_rate": 1.93299087735938e-06, + "loss": 0.8429, + "step": 35911 + }, + { + "epoch": 0.7189049871130796, + "grad_norm": 1.8533135652542114, + "learning_rate": 1.9327348534772754e-06, + "loss": 0.7715, + "step": 35912 + }, + { + "epoch": 0.7189250056302079, + "grad_norm": 1.0516152381896973, + "learning_rate": 1.9324788424892922e-06, + "loss": 0.3325, + "step": 35913 + }, + { + "epoch": 0.7189450241473363, + "grad_norm": 1.014613389968872, + "learning_rate": 1.932222844396509e-06, + "loss": 0.2621, + "step": 35914 + }, + { + "epoch": 0.7189650426644646, + "grad_norm": 1.114922046661377, + "learning_rate": 1.9319668592000006e-06, + "loss": 0.281, + "step": 35915 + }, + { + "epoch": 0.718985061181593, + "grad_norm": 1.099103331565857, + "learning_rate": 1.9317108869008436e-06, + "loss": 0.3053, + "step": 35916 + }, + { + "epoch": 0.7190050796987213, + "grad_norm": 1.1925450563430786, + "learning_rate": 1.931454927500114e-06, + "loss": 0.285, + "step": 35917 + }, + { + "epoch": 0.7190250982158497, + "grad_norm": 1.9712035655975342, + "learning_rate": 1.9311989809988856e-06, + "loss": 0.7335, + "step": 35918 + }, + { + "epoch": 0.719045116732978, + "grad_norm": 1.0434170961380005, + "learning_rate": 1.9309430473982376e-06, + "loss": 0.2695, + "step": 35919 + }, + { + "epoch": 0.7190651352501064, + "grad_norm": 1.1530797481536865, + "learning_rate": 1.930687126699245e-06, + "loss": 0.3097, + "step": 35920 + }, + { + "epoch": 0.7190851537672347, + "grad_norm": 1.1276789903640747, + "learning_rate": 1.9304312189029813e-06, + "loss": 0.2744, + "step": 35921 + }, + { + "epoch": 0.719105172284363, + "grad_norm": 1.233690857887268, + "learning_rate": 1.9301753240105257e-06, + "loss": 0.3513, + "step": 35922 + }, + { + "epoch": 0.7191251908014914, + "grad_norm": 1.7658671140670776, + "learning_rate": 1.929919442022951e-06, + "loss": 0.7242, + "step": 35923 + }, + { + "epoch": 0.7191452093186197, + "grad_norm": 1.1372923851013184, + "learning_rate": 1.929663572941336e-06, + "loss": 0.3089, + "step": 35924 + }, + { + "epoch": 0.7191652278357481, + "grad_norm": 1.04096519947052, + "learning_rate": 1.9294077167667547e-06, + "loss": 0.3042, + "step": 35925 + }, + { + "epoch": 0.7191852463528764, + "grad_norm": 2.129707098007202, + "learning_rate": 1.929151873500282e-06, + "loss": 0.7582, + "step": 35926 + }, + { + "epoch": 0.7192052648700048, + "grad_norm": 1.900827407836914, + "learning_rate": 1.9288960431429927e-06, + "loss": 0.7632, + "step": 35927 + }, + { + "epoch": 0.7192252833871331, + "grad_norm": 1.8556864261627197, + "learning_rate": 1.9286402256959652e-06, + "loss": 0.7313, + "step": 35928 + }, + { + "epoch": 0.7192453019042614, + "grad_norm": 1.0102357864379883, + "learning_rate": 1.928384421160273e-06, + "loss": 0.31, + "step": 35929 + }, + { + "epoch": 0.7192653204213898, + "grad_norm": 1.1778082847595215, + "learning_rate": 1.928128629536992e-06, + "loss": 0.3263, + "step": 35930 + }, + { + "epoch": 0.7192853389385181, + "grad_norm": 1.1307549476623535, + "learning_rate": 1.9278728508271956e-06, + "loss": 0.3285, + "step": 35931 + }, + { + "epoch": 0.7193053574556465, + "grad_norm": 1.7915574312210083, + "learning_rate": 1.927617085031962e-06, + "loss": 0.7691, + "step": 35932 + }, + { + "epoch": 0.7193253759727748, + "grad_norm": 1.0974546670913696, + "learning_rate": 1.927361332152365e-06, + "loss": 0.3019, + "step": 35933 + }, + { + "epoch": 0.7193453944899032, + "grad_norm": 1.2179338932037354, + "learning_rate": 1.9271055921894776e-06, + "loss": 0.358, + "step": 35934 + }, + { + "epoch": 0.7193654130070315, + "grad_norm": 1.1647788286209106, + "learning_rate": 1.9268498651443796e-06, + "loss": 0.2756, + "step": 35935 + }, + { + "epoch": 0.7193854315241599, + "grad_norm": 1.1098556518554688, + "learning_rate": 1.9265941510181413e-06, + "loss": 0.3377, + "step": 35936 + }, + { + "epoch": 0.7194054500412882, + "grad_norm": 1.0863701105117798, + "learning_rate": 1.9263384498118414e-06, + "loss": 0.3373, + "step": 35937 + }, + { + "epoch": 0.7194254685584165, + "grad_norm": 1.059688925743103, + "learning_rate": 1.926082761526553e-06, + "loss": 0.3144, + "step": 35938 + }, + { + "epoch": 0.7194454870755449, + "grad_norm": 1.1510522365570068, + "learning_rate": 1.9258270861633516e-06, + "loss": 0.2765, + "step": 35939 + }, + { + "epoch": 0.7194655055926732, + "grad_norm": 1.0858092308044434, + "learning_rate": 1.92557142372331e-06, + "loss": 0.3137, + "step": 35940 + }, + { + "epoch": 0.7194855241098016, + "grad_norm": 0.9763340950012207, + "learning_rate": 1.925315774207505e-06, + "loss": 0.2879, + "step": 35941 + }, + { + "epoch": 0.7195055426269299, + "grad_norm": 1.0281351804733276, + "learning_rate": 1.925060137617012e-06, + "loss": 0.2704, + "step": 35942 + }, + { + "epoch": 0.7195255611440583, + "grad_norm": 1.0962356328964233, + "learning_rate": 1.9248045139529044e-06, + "loss": 0.2558, + "step": 35943 + }, + { + "epoch": 0.7195455796611866, + "grad_norm": 1.1676310300827026, + "learning_rate": 1.9245489032162547e-06, + "loss": 0.2854, + "step": 35944 + }, + { + "epoch": 0.7195655981783149, + "grad_norm": 1.1924140453338623, + "learning_rate": 1.9242933054081412e-06, + "loss": 0.2728, + "step": 35945 + }, + { + "epoch": 0.7195856166954433, + "grad_norm": 1.0143235921859741, + "learning_rate": 1.924037720529637e-06, + "loss": 0.2603, + "step": 35946 + }, + { + "epoch": 0.7196056352125716, + "grad_norm": 1.1364961862564087, + "learning_rate": 1.923782148581815e-06, + "loss": 0.2997, + "step": 35947 + }, + { + "epoch": 0.7196256537297, + "grad_norm": 1.0239908695220947, + "learning_rate": 1.9235265895657516e-06, + "loss": 0.2796, + "step": 35948 + }, + { + "epoch": 0.7196456722468283, + "grad_norm": 1.1334484815597534, + "learning_rate": 1.923271043482519e-06, + "loss": 0.3121, + "step": 35949 + }, + { + "epoch": 0.7196656907639567, + "grad_norm": 1.0707933902740479, + "learning_rate": 1.923015510333195e-06, + "loss": 0.2899, + "step": 35950 + }, + { + "epoch": 0.719685709281085, + "grad_norm": 1.178032398223877, + "learning_rate": 1.922759990118851e-06, + "loss": 0.2972, + "step": 35951 + }, + { + "epoch": 0.7197057277982134, + "grad_norm": 1.1779961585998535, + "learning_rate": 1.9225044828405622e-06, + "loss": 0.3024, + "step": 35952 + }, + { + "epoch": 0.7197257463153417, + "grad_norm": 1.2327497005462646, + "learning_rate": 1.9222489884994e-06, + "loss": 0.3247, + "step": 35953 + }, + { + "epoch": 0.71974576483247, + "grad_norm": 1.1247351169586182, + "learning_rate": 1.921993507096443e-06, + "loss": 0.3065, + "step": 35954 + }, + { + "epoch": 0.7197657833495984, + "grad_norm": 1.1187728643417358, + "learning_rate": 1.9217380386327627e-06, + "loss": 0.3157, + "step": 35955 + }, + { + "epoch": 0.7197858018667267, + "grad_norm": 1.0925909280776978, + "learning_rate": 1.9214825831094335e-06, + "loss": 0.2787, + "step": 35956 + }, + { + "epoch": 0.7198058203838551, + "grad_norm": 1.144640564918518, + "learning_rate": 1.9212271405275267e-06, + "loss": 0.3197, + "step": 35957 + }, + { + "epoch": 0.7198258389009834, + "grad_norm": 1.1680972576141357, + "learning_rate": 1.920971710888121e-06, + "loss": 0.2822, + "step": 35958 + }, + { + "epoch": 0.7198458574181118, + "grad_norm": 1.116080403327942, + "learning_rate": 1.9207162941922868e-06, + "loss": 0.2793, + "step": 35959 + }, + { + "epoch": 0.7198658759352401, + "grad_norm": 1.1787562370300293, + "learning_rate": 1.920460890441097e-06, + "loss": 0.3475, + "step": 35960 + }, + { + "epoch": 0.7198858944523684, + "grad_norm": 1.0574904680252075, + "learning_rate": 1.9202054996356288e-06, + "loss": 0.3112, + "step": 35961 + }, + { + "epoch": 0.7199059129694968, + "grad_norm": 1.0786930322647095, + "learning_rate": 1.9199501217769525e-06, + "loss": 0.2686, + "step": 35962 + }, + { + "epoch": 0.7199259314866251, + "grad_norm": 1.2890864610671997, + "learning_rate": 1.9196947568661446e-06, + "loss": 0.3074, + "step": 35963 + }, + { + "epoch": 0.7199459500037535, + "grad_norm": 1.050695538520813, + "learning_rate": 1.919439404904277e-06, + "loss": 0.3433, + "step": 35964 + }, + { + "epoch": 0.7199659685208818, + "grad_norm": 0.987062931060791, + "learning_rate": 1.919184065892423e-06, + "loss": 0.2985, + "step": 35965 + }, + { + "epoch": 0.7199859870380102, + "grad_norm": 1.1113083362579346, + "learning_rate": 1.9189287398316548e-06, + "loss": 0.3081, + "step": 35966 + }, + { + "epoch": 0.7200060055551385, + "grad_norm": 1.0921437740325928, + "learning_rate": 1.9186734267230484e-06, + "loss": 0.3479, + "step": 35967 + }, + { + "epoch": 0.7200260240722669, + "grad_norm": 1.2368364334106445, + "learning_rate": 1.918418126567676e-06, + "loss": 0.2771, + "step": 35968 + }, + { + "epoch": 0.7200460425893952, + "grad_norm": 1.0902820825576782, + "learning_rate": 1.9181628393666108e-06, + "loss": 0.2636, + "step": 35969 + }, + { + "epoch": 0.7200660611065235, + "grad_norm": 1.083358883857727, + "learning_rate": 1.9179075651209255e-06, + "loss": 0.304, + "step": 35970 + }, + { + "epoch": 0.7200860796236519, + "grad_norm": 1.1907061338424683, + "learning_rate": 1.9176523038316925e-06, + "loss": 0.311, + "step": 35971 + }, + { + "epoch": 0.7201060981407802, + "grad_norm": 1.1593761444091797, + "learning_rate": 1.917397055499987e-06, + "loss": 0.3244, + "step": 35972 + }, + { + "epoch": 0.7201261166579086, + "grad_norm": 1.0052052736282349, + "learning_rate": 1.9171418201268795e-06, + "loss": 0.3, + "step": 35973 + }, + { + "epoch": 0.7201461351750369, + "grad_norm": 1.0895540714263916, + "learning_rate": 1.9168865977134455e-06, + "loss": 0.2808, + "step": 35974 + }, + { + "epoch": 0.7201661536921653, + "grad_norm": 1.3983092308044434, + "learning_rate": 1.9166313882607556e-06, + "loss": 0.3008, + "step": 35975 + }, + { + "epoch": 0.7201861722092936, + "grad_norm": 1.0625898838043213, + "learning_rate": 1.9163761917698853e-06, + "loss": 0.2848, + "step": 35976 + }, + { + "epoch": 0.7202061907264219, + "grad_norm": 0.9973911046981812, + "learning_rate": 1.9161210082419056e-06, + "loss": 0.3289, + "step": 35977 + }, + { + "epoch": 0.7202262092435503, + "grad_norm": 1.0292937755584717, + "learning_rate": 1.91586583767789e-06, + "loss": 0.3015, + "step": 35978 + }, + { + "epoch": 0.7202462277606786, + "grad_norm": 1.1615939140319824, + "learning_rate": 1.9156106800789106e-06, + "loss": 0.353, + "step": 35979 + }, + { + "epoch": 0.720266246277807, + "grad_norm": 0.992211103439331, + "learning_rate": 1.915355535446038e-06, + "loss": 0.2523, + "step": 35980 + }, + { + "epoch": 0.7202862647949353, + "grad_norm": 1.0935052633285522, + "learning_rate": 1.915100403780349e-06, + "loss": 0.3188, + "step": 35981 + }, + { + "epoch": 0.7203062833120637, + "grad_norm": 1.1807805299758911, + "learning_rate": 1.914845285082914e-06, + "loss": 0.3447, + "step": 35982 + }, + { + "epoch": 0.720326301829192, + "grad_norm": 1.1867128610610962, + "learning_rate": 1.9145901793548055e-06, + "loss": 0.3199, + "step": 35983 + }, + { + "epoch": 0.7203463203463204, + "grad_norm": 1.137798547744751, + "learning_rate": 1.9143350865970943e-06, + "loss": 0.2854, + "step": 35984 + }, + { + "epoch": 0.7203663388634487, + "grad_norm": 1.1497420072555542, + "learning_rate": 1.9140800068108563e-06, + "loss": 0.3589, + "step": 35985 + }, + { + "epoch": 0.720386357380577, + "grad_norm": 1.0796626806259155, + "learning_rate": 1.913824939997159e-06, + "loss": 0.3194, + "step": 35986 + }, + { + "epoch": 0.7204063758977054, + "grad_norm": 1.1023024320602417, + "learning_rate": 1.91356988615708e-06, + "loss": 0.3054, + "step": 35987 + }, + { + "epoch": 0.7204263944148337, + "grad_norm": 1.0814905166625977, + "learning_rate": 1.9133148452916876e-06, + "loss": 0.306, + "step": 35988 + }, + { + "epoch": 0.7204464129319621, + "grad_norm": 0.9774127006530762, + "learning_rate": 1.9130598174020567e-06, + "loss": 0.2521, + "step": 35989 + }, + { + "epoch": 0.7204664314490904, + "grad_norm": 1.1821913719177246, + "learning_rate": 1.912804802489258e-06, + "loss": 0.2904, + "step": 35990 + }, + { + "epoch": 0.7204864499662188, + "grad_norm": 1.019884467124939, + "learning_rate": 1.9125498005543637e-06, + "loss": 0.2533, + "step": 35991 + }, + { + "epoch": 0.7205064684833471, + "grad_norm": 1.08307945728302, + "learning_rate": 1.912294811598445e-06, + "loss": 0.2882, + "step": 35992 + }, + { + "epoch": 0.7205264870004754, + "grad_norm": 1.9706535339355469, + "learning_rate": 1.9120398356225733e-06, + "loss": 0.7895, + "step": 35993 + }, + { + "epoch": 0.7205465055176038, + "grad_norm": 1.2040024995803833, + "learning_rate": 1.911784872627823e-06, + "loss": 0.3448, + "step": 35994 + }, + { + "epoch": 0.7205665240347321, + "grad_norm": 1.1223163604736328, + "learning_rate": 1.911529922615265e-06, + "loss": 0.3495, + "step": 35995 + }, + { + "epoch": 0.7205865425518605, + "grad_norm": 1.0294543504714966, + "learning_rate": 1.91127498558597e-06, + "loss": 0.2657, + "step": 35996 + }, + { + "epoch": 0.7206065610689888, + "grad_norm": 1.0024124383926392, + "learning_rate": 1.911020061541009e-06, + "loss": 0.2568, + "step": 35997 + }, + { + "epoch": 0.7206265795861172, + "grad_norm": 1.0233620405197144, + "learning_rate": 1.910765150481456e-06, + "loss": 0.3013, + "step": 35998 + }, + { + "epoch": 0.7206465981032455, + "grad_norm": 1.1201469898223877, + "learning_rate": 1.910510252408381e-06, + "loss": 0.2763, + "step": 35999 + }, + { + "epoch": 0.7206666166203739, + "grad_norm": 1.2971069812774658, + "learning_rate": 1.9102553673228565e-06, + "loss": 0.2835, + "step": 36000 + }, + { + "epoch": 0.7206866351375022, + "grad_norm": 1.9827862977981567, + "learning_rate": 1.910000495225954e-06, + "loss": 0.7852, + "step": 36001 + }, + { + "epoch": 0.7207066536546305, + "grad_norm": 1.2297489643096924, + "learning_rate": 1.9097456361187426e-06, + "loss": 0.3122, + "step": 36002 + }, + { + "epoch": 0.7207266721717589, + "grad_norm": 1.1827642917633057, + "learning_rate": 1.9094907900022975e-06, + "loss": 0.3435, + "step": 36003 + }, + { + "epoch": 0.7207466906888872, + "grad_norm": 1.025067687034607, + "learning_rate": 1.909235956877688e-06, + "loss": 0.3006, + "step": 36004 + }, + { + "epoch": 0.7207667092060156, + "grad_norm": 1.1455844640731812, + "learning_rate": 1.908981136745985e-06, + "loss": 0.3002, + "step": 36005 + }, + { + "epoch": 0.7207867277231439, + "grad_norm": 1.1123466491699219, + "learning_rate": 1.9087263296082587e-06, + "loss": 0.2938, + "step": 36006 + }, + { + "epoch": 0.7208067462402723, + "grad_norm": 1.0512701272964478, + "learning_rate": 1.908471535465583e-06, + "loss": 0.318, + "step": 36007 + }, + { + "epoch": 0.7208267647574006, + "grad_norm": 1.1172393560409546, + "learning_rate": 1.9082167543190284e-06, + "loss": 0.2733, + "step": 36008 + }, + { + "epoch": 0.7208467832745289, + "grad_norm": 1.1450937986373901, + "learning_rate": 1.9079619861696647e-06, + "loss": 0.2536, + "step": 36009 + }, + { + "epoch": 0.7208668017916573, + "grad_norm": 1.0843615531921387, + "learning_rate": 1.907707231018562e-06, + "loss": 0.3007, + "step": 36010 + }, + { + "epoch": 0.7208868203087856, + "grad_norm": 1.1724451780319214, + "learning_rate": 1.9074524888667938e-06, + "loss": 0.2736, + "step": 36011 + }, + { + "epoch": 0.720906838825914, + "grad_norm": 1.0225651264190674, + "learning_rate": 1.9071977597154283e-06, + "loss": 0.2903, + "step": 36012 + }, + { + "epoch": 0.7209268573430423, + "grad_norm": 0.9780268669128418, + "learning_rate": 1.9069430435655394e-06, + "loss": 0.2932, + "step": 36013 + }, + { + "epoch": 0.7209468758601707, + "grad_norm": 1.9886823892593384, + "learning_rate": 1.9066883404181968e-06, + "loss": 0.7356, + "step": 36014 + }, + { + "epoch": 0.720966894377299, + "grad_norm": 1.0169655084609985, + "learning_rate": 1.9064336502744691e-06, + "loss": 0.2925, + "step": 36015 + }, + { + "epoch": 0.7209869128944273, + "grad_norm": 1.1045405864715576, + "learning_rate": 1.9061789731354297e-06, + "loss": 0.2924, + "step": 36016 + }, + { + "epoch": 0.7210069314115557, + "grad_norm": 1.179831624031067, + "learning_rate": 1.9059243090021485e-06, + "loss": 0.2909, + "step": 36017 + }, + { + "epoch": 0.721026949928684, + "grad_norm": 1.2096502780914307, + "learning_rate": 1.9056696578756957e-06, + "loss": 0.2608, + "step": 36018 + }, + { + "epoch": 0.7210469684458124, + "grad_norm": 1.0839735269546509, + "learning_rate": 1.9054150197571396e-06, + "loss": 0.2496, + "step": 36019 + }, + { + "epoch": 0.7210669869629407, + "grad_norm": 1.7678992748260498, + "learning_rate": 1.9051603946475549e-06, + "loss": 0.7629, + "step": 36020 + }, + { + "epoch": 0.7210870054800691, + "grad_norm": 0.9780595898628235, + "learning_rate": 1.90490578254801e-06, + "loss": 0.2773, + "step": 36021 + }, + { + "epoch": 0.7211070239971974, + "grad_norm": 1.1273325681686401, + "learning_rate": 1.9046511834595749e-06, + "loss": 0.2818, + "step": 36022 + }, + { + "epoch": 0.7211270425143258, + "grad_norm": 1.159080982208252, + "learning_rate": 1.9043965973833177e-06, + "loss": 0.3215, + "step": 36023 + }, + { + "epoch": 0.7211470610314541, + "grad_norm": 1.2566617727279663, + "learning_rate": 1.904142024320314e-06, + "loss": 0.3137, + "step": 36024 + }, + { + "epoch": 0.7211670795485824, + "grad_norm": 1.0180878639221191, + "learning_rate": 1.9038874642716288e-06, + "loss": 0.297, + "step": 36025 + }, + { + "epoch": 0.7211870980657108, + "grad_norm": 1.257078766822815, + "learning_rate": 1.9036329172383355e-06, + "loss": 0.3151, + "step": 36026 + }, + { + "epoch": 0.7212071165828391, + "grad_norm": 1.0941413640975952, + "learning_rate": 1.9033783832215037e-06, + "loss": 0.2685, + "step": 36027 + }, + { + "epoch": 0.7212271350999675, + "grad_norm": 1.1330475807189941, + "learning_rate": 1.9031238622222009e-06, + "loss": 0.3271, + "step": 36028 + }, + { + "epoch": 0.7212471536170958, + "grad_norm": 1.1114287376403809, + "learning_rate": 1.9028693542415012e-06, + "loss": 0.2879, + "step": 36029 + }, + { + "epoch": 0.7212671721342242, + "grad_norm": 1.1719672679901123, + "learning_rate": 1.9026148592804717e-06, + "loss": 0.3232, + "step": 36030 + }, + { + "epoch": 0.7212871906513525, + "grad_norm": 1.2035239934921265, + "learning_rate": 1.9023603773401821e-06, + "loss": 0.2471, + "step": 36031 + }, + { + "epoch": 0.7213072091684808, + "grad_norm": 1.1680923700332642, + "learning_rate": 1.9021059084217043e-06, + "loss": 0.3246, + "step": 36032 + }, + { + "epoch": 0.7213272276856092, + "grad_norm": 1.8189936876296997, + "learning_rate": 1.9018514525261038e-06, + "loss": 0.7928, + "step": 36033 + }, + { + "epoch": 0.7213472462027375, + "grad_norm": 1.2883012294769287, + "learning_rate": 1.9015970096544551e-06, + "loss": 0.3166, + "step": 36034 + }, + { + "epoch": 0.7213672647198659, + "grad_norm": 1.0731256008148193, + "learning_rate": 1.9013425798078256e-06, + "loss": 0.3014, + "step": 36035 + }, + { + "epoch": 0.7213872832369942, + "grad_norm": 1.0696382522583008, + "learning_rate": 1.9010881629872835e-06, + "loss": 0.2506, + "step": 36036 + }, + { + "epoch": 0.7214073017541226, + "grad_norm": 1.1403870582580566, + "learning_rate": 1.900833759193902e-06, + "loss": 0.3218, + "step": 36037 + }, + { + "epoch": 0.7214273202712509, + "grad_norm": 1.0168277025222778, + "learning_rate": 1.900579368428746e-06, + "loss": 0.3023, + "step": 36038 + }, + { + "epoch": 0.7214473387883793, + "grad_norm": 1.154066801071167, + "learning_rate": 1.9003249906928895e-06, + "loss": 0.3264, + "step": 36039 + }, + { + "epoch": 0.7214673573055076, + "grad_norm": 1.0884523391723633, + "learning_rate": 1.9000706259873997e-06, + "loss": 0.3015, + "step": 36040 + }, + { + "epoch": 0.7214873758226359, + "grad_norm": 1.1003824472427368, + "learning_rate": 1.8998162743133452e-06, + "loss": 0.2709, + "step": 36041 + }, + { + "epoch": 0.7215073943397643, + "grad_norm": 1.242424488067627, + "learning_rate": 1.8995619356717948e-06, + "loss": 0.2596, + "step": 36042 + }, + { + "epoch": 0.7215274128568926, + "grad_norm": 1.9540934562683105, + "learning_rate": 1.8993076100638208e-06, + "loss": 0.7178, + "step": 36043 + }, + { + "epoch": 0.721547431374021, + "grad_norm": 1.0624514818191528, + "learning_rate": 1.8990532974904895e-06, + "loss": 0.338, + "step": 36044 + }, + { + "epoch": 0.7215674498911493, + "grad_norm": 1.236006259918213, + "learning_rate": 1.8987989979528714e-06, + "loss": 0.3259, + "step": 36045 + }, + { + "epoch": 0.7215874684082777, + "grad_norm": 1.134226679801941, + "learning_rate": 1.8985447114520333e-06, + "loss": 0.2766, + "step": 36046 + }, + { + "epoch": 0.721607486925406, + "grad_norm": 1.2398959398269653, + "learning_rate": 1.8982904379890472e-06, + "loss": 0.3101, + "step": 36047 + }, + { + "epoch": 0.7216275054425343, + "grad_norm": 1.177059292793274, + "learning_rate": 1.8980361775649808e-06, + "loss": 0.3258, + "step": 36048 + }, + { + "epoch": 0.7216475239596627, + "grad_norm": 1.1077589988708496, + "learning_rate": 1.8977819301809008e-06, + "loss": 0.3009, + "step": 36049 + }, + { + "epoch": 0.721667542476791, + "grad_norm": 1.3500334024429321, + "learning_rate": 1.8975276958378797e-06, + "loss": 0.3013, + "step": 36050 + }, + { + "epoch": 0.7216875609939194, + "grad_norm": 1.1386631727218628, + "learning_rate": 1.897273474536983e-06, + "loss": 0.336, + "step": 36051 + }, + { + "epoch": 0.7217075795110477, + "grad_norm": 1.0588973760604858, + "learning_rate": 1.8970192662792824e-06, + "loss": 0.2609, + "step": 36052 + }, + { + "epoch": 0.7217275980281761, + "grad_norm": 1.111537218093872, + "learning_rate": 1.8967650710658452e-06, + "loss": 0.3218, + "step": 36053 + }, + { + "epoch": 0.7217476165453044, + "grad_norm": 1.0108188390731812, + "learning_rate": 1.8965108888977402e-06, + "loss": 0.2946, + "step": 36054 + }, + { + "epoch": 0.7217676350624328, + "grad_norm": 1.073002576828003, + "learning_rate": 1.896256719776033e-06, + "loss": 0.3138, + "step": 36055 + }, + { + "epoch": 0.7217876535795611, + "grad_norm": 1.1371057033538818, + "learning_rate": 1.896002563701797e-06, + "loss": 0.2915, + "step": 36056 + }, + { + "epoch": 0.7218076720966894, + "grad_norm": 1.121996521949768, + "learning_rate": 1.8957484206760979e-06, + "loss": 0.3129, + "step": 36057 + }, + { + "epoch": 0.7218276906138178, + "grad_norm": 1.2787771224975586, + "learning_rate": 1.8954942907000045e-06, + "loss": 0.3204, + "step": 36058 + }, + { + "epoch": 0.7218477091309461, + "grad_norm": 1.0761593580245972, + "learning_rate": 1.8952401737745828e-06, + "loss": 0.2985, + "step": 36059 + }, + { + "epoch": 0.7218677276480745, + "grad_norm": 1.1212674379348755, + "learning_rate": 1.894986069900906e-06, + "loss": 0.2778, + "step": 36060 + }, + { + "epoch": 0.7218877461652028, + "grad_norm": 1.1641582250595093, + "learning_rate": 1.894731979080039e-06, + "loss": 0.2898, + "step": 36061 + }, + { + "epoch": 0.7219077646823312, + "grad_norm": 1.1281508207321167, + "learning_rate": 1.8944779013130488e-06, + "loss": 0.2993, + "step": 36062 + }, + { + "epoch": 0.7219277831994595, + "grad_norm": 1.1269959211349487, + "learning_rate": 1.8942238366010068e-06, + "loss": 0.306, + "step": 36063 + }, + { + "epoch": 0.7219478017165878, + "grad_norm": 1.2884539365768433, + "learning_rate": 1.8939697849449778e-06, + "loss": 0.3282, + "step": 36064 + }, + { + "epoch": 0.7219678202337162, + "grad_norm": 0.9912779331207275, + "learning_rate": 1.8937157463460332e-06, + "loss": 0.2626, + "step": 36065 + }, + { + "epoch": 0.7219878387508445, + "grad_norm": 1.1927361488342285, + "learning_rate": 1.8934617208052387e-06, + "loss": 0.3129, + "step": 36066 + }, + { + "epoch": 0.7220078572679729, + "grad_norm": 1.1142932176589966, + "learning_rate": 1.8932077083236627e-06, + "loss": 0.277, + "step": 36067 + }, + { + "epoch": 0.7220278757851012, + "grad_norm": 1.1202834844589233, + "learning_rate": 1.8929537089023714e-06, + "loss": 0.2834, + "step": 36068 + }, + { + "epoch": 0.7220478943022296, + "grad_norm": 1.0795879364013672, + "learning_rate": 1.892699722542436e-06, + "loss": 0.2798, + "step": 36069 + }, + { + "epoch": 0.7220679128193579, + "grad_norm": 1.1355565786361694, + "learning_rate": 1.8924457492449222e-06, + "loss": 0.24, + "step": 36070 + }, + { + "epoch": 0.7220879313364863, + "grad_norm": 1.9668406248092651, + "learning_rate": 1.8921917890108975e-06, + "loss": 0.7138, + "step": 36071 + }, + { + "epoch": 0.7221079498536146, + "grad_norm": 1.9354380369186401, + "learning_rate": 1.8919378418414296e-06, + "loss": 0.7152, + "step": 36072 + }, + { + "epoch": 0.7221279683707429, + "grad_norm": 1.4550172090530396, + "learning_rate": 1.8916839077375848e-06, + "loss": 0.2814, + "step": 36073 + }, + { + "epoch": 0.7221479868878713, + "grad_norm": 1.871611475944519, + "learning_rate": 1.8914299867004333e-06, + "loss": 0.7018, + "step": 36074 + }, + { + "epoch": 0.7221680054049996, + "grad_norm": 1.0974781513214111, + "learning_rate": 1.8911760787310397e-06, + "loss": 0.3019, + "step": 36075 + }, + { + "epoch": 0.722188023922128, + "grad_norm": 1.0518745183944702, + "learning_rate": 1.8909221838304742e-06, + "loss": 0.3179, + "step": 36076 + }, + { + "epoch": 0.7222080424392563, + "grad_norm": 1.205298900604248, + "learning_rate": 1.8906683019998018e-06, + "loss": 0.2954, + "step": 36077 + }, + { + "epoch": 0.7222280609563847, + "grad_norm": 0.9682614803314209, + "learning_rate": 1.8904144332400914e-06, + "loss": 0.3089, + "step": 36078 + }, + { + "epoch": 0.722248079473513, + "grad_norm": 1.130176305770874, + "learning_rate": 1.89016057755241e-06, + "loss": 0.2788, + "step": 36079 + }, + { + "epoch": 0.7222680979906413, + "grad_norm": 1.8836590051651, + "learning_rate": 1.8899067349378247e-06, + "loss": 0.7855, + "step": 36080 + }, + { + "epoch": 0.7222881165077697, + "grad_norm": 1.2516287565231323, + "learning_rate": 1.8896529053974e-06, + "loss": 0.3398, + "step": 36081 + }, + { + "epoch": 0.722308135024898, + "grad_norm": 1.1642612218856812, + "learning_rate": 1.8893990889322073e-06, + "loss": 0.3056, + "step": 36082 + }, + { + "epoch": 0.7223281535420264, + "grad_norm": 1.1400381326675415, + "learning_rate": 1.8891452855433112e-06, + "loss": 0.2958, + "step": 36083 + }, + { + "epoch": 0.7223481720591547, + "grad_norm": 1.1599483489990234, + "learning_rate": 1.8888914952317783e-06, + "loss": 0.2816, + "step": 36084 + }, + { + "epoch": 0.7223681905762831, + "grad_norm": 1.1554268598556519, + "learning_rate": 1.8886377179986765e-06, + "loss": 0.3182, + "step": 36085 + }, + { + "epoch": 0.7223882090934114, + "grad_norm": 1.0976959466934204, + "learning_rate": 1.8883839538450705e-06, + "loss": 0.3062, + "step": 36086 + }, + { + "epoch": 0.7224082276105398, + "grad_norm": 1.10023033618927, + "learning_rate": 1.8881302027720306e-06, + "loss": 0.2853, + "step": 36087 + }, + { + "epoch": 0.7224282461276681, + "grad_norm": 1.007765531539917, + "learning_rate": 1.8878764647806198e-06, + "loss": 0.264, + "step": 36088 + }, + { + "epoch": 0.7224482646447964, + "grad_norm": 1.087647795677185, + "learning_rate": 1.8876227398719083e-06, + "loss": 0.292, + "step": 36089 + }, + { + "epoch": 0.7224682831619248, + "grad_norm": 1.0606441497802734, + "learning_rate": 1.8873690280469593e-06, + "loss": 0.3019, + "step": 36090 + }, + { + "epoch": 0.7224883016790531, + "grad_norm": 1.0901265144348145, + "learning_rate": 1.8871153293068423e-06, + "loss": 0.3081, + "step": 36091 + }, + { + "epoch": 0.7225083201961815, + "grad_norm": 1.0229628086090088, + "learning_rate": 1.8868616436526232e-06, + "loss": 0.3068, + "step": 36092 + }, + { + "epoch": 0.7225283387133098, + "grad_norm": 1.0172306299209595, + "learning_rate": 1.8866079710853675e-06, + "loss": 0.2798, + "step": 36093 + }, + { + "epoch": 0.7225483572304382, + "grad_norm": 2.0314290523529053, + "learning_rate": 1.8863543116061417e-06, + "loss": 0.7219, + "step": 36094 + }, + { + "epoch": 0.7225683757475665, + "grad_norm": 1.003672480583191, + "learning_rate": 1.8861006652160108e-06, + "loss": 0.2778, + "step": 36095 + }, + { + "epoch": 0.7225883942646948, + "grad_norm": 1.1291155815124512, + "learning_rate": 1.8858470319160444e-06, + "loss": 0.3012, + "step": 36096 + }, + { + "epoch": 0.7226084127818232, + "grad_norm": 1.0720422267913818, + "learning_rate": 1.8855934117073072e-06, + "loss": 0.3225, + "step": 36097 + }, + { + "epoch": 0.7226284312989515, + "grad_norm": 2.036393880844116, + "learning_rate": 1.8853398045908645e-06, + "loss": 0.7496, + "step": 36098 + }, + { + "epoch": 0.7226484498160799, + "grad_norm": 1.983354091644287, + "learning_rate": 1.8850862105677813e-06, + "loss": 0.8013, + "step": 36099 + }, + { + "epoch": 0.7226684683332082, + "grad_norm": 1.1888704299926758, + "learning_rate": 1.8848326296391273e-06, + "loss": 0.3079, + "step": 36100 + }, + { + "epoch": 0.7226884868503366, + "grad_norm": 1.0521721839904785, + "learning_rate": 1.884579061805965e-06, + "loss": 0.2907, + "step": 36101 + }, + { + "epoch": 0.7227085053674649, + "grad_norm": 1.1594585180282593, + "learning_rate": 1.8843255070693633e-06, + "loss": 0.2942, + "step": 36102 + }, + { + "epoch": 0.7227285238845933, + "grad_norm": 2.217937707901001, + "learning_rate": 1.8840719654303846e-06, + "loss": 0.7758, + "step": 36103 + }, + { + "epoch": 0.7227485424017216, + "grad_norm": 1.1780956983566284, + "learning_rate": 1.883818436890099e-06, + "loss": 0.3463, + "step": 36104 + }, + { + "epoch": 0.7227685609188499, + "grad_norm": 1.0827252864837646, + "learning_rate": 1.88356492144957e-06, + "loss": 0.2934, + "step": 36105 + }, + { + "epoch": 0.7227885794359783, + "grad_norm": 1.0466365814208984, + "learning_rate": 1.8833114191098634e-06, + "loss": 0.2952, + "step": 36106 + }, + { + "epoch": 0.7228085979531066, + "grad_norm": 1.2685291767120361, + "learning_rate": 1.8830579298720452e-06, + "loss": 0.3476, + "step": 36107 + }, + { + "epoch": 0.722828616470235, + "grad_norm": 1.3641844987869263, + "learning_rate": 1.8828044537371787e-06, + "loss": 0.3039, + "step": 36108 + }, + { + "epoch": 0.7228486349873633, + "grad_norm": 1.1646463871002197, + "learning_rate": 1.8825509907063328e-06, + "loss": 0.3013, + "step": 36109 + }, + { + "epoch": 0.7228686535044917, + "grad_norm": 1.3571662902832031, + "learning_rate": 1.8822975407805721e-06, + "loss": 0.3101, + "step": 36110 + }, + { + "epoch": 0.72288867202162, + "grad_norm": 1.0804543495178223, + "learning_rate": 1.8820441039609616e-06, + "loss": 0.2873, + "step": 36111 + }, + { + "epoch": 0.7229086905387483, + "grad_norm": 1.0838509798049927, + "learning_rate": 1.8817906802485648e-06, + "loss": 0.2744, + "step": 36112 + }, + { + "epoch": 0.7229287090558767, + "grad_norm": 1.9934940338134766, + "learning_rate": 1.8815372696444505e-06, + "loss": 0.7136, + "step": 36113 + }, + { + "epoch": 0.722948727573005, + "grad_norm": 1.2049343585968018, + "learning_rate": 1.8812838721496807e-06, + "loss": 0.2987, + "step": 36114 + }, + { + "epoch": 0.7229687460901334, + "grad_norm": 1.0701521635055542, + "learning_rate": 1.8810304877653246e-06, + "loss": 0.2914, + "step": 36115 + }, + { + "epoch": 0.7229887646072617, + "grad_norm": 1.1291462182998657, + "learning_rate": 1.880777116492445e-06, + "loss": 0.2827, + "step": 36116 + }, + { + "epoch": 0.7230087831243901, + "grad_norm": 1.40842866897583, + "learning_rate": 1.8805237583321045e-06, + "loss": 0.3151, + "step": 36117 + }, + { + "epoch": 0.7230288016415184, + "grad_norm": 1.2206785678863525, + "learning_rate": 1.8802704132853734e-06, + "loss": 0.2373, + "step": 36118 + }, + { + "epoch": 0.7230488201586468, + "grad_norm": 1.2804772853851318, + "learning_rate": 1.8800170813533142e-06, + "loss": 0.3055, + "step": 36119 + }, + { + "epoch": 0.7230688386757751, + "grad_norm": 0.9534772634506226, + "learning_rate": 1.8797637625369914e-06, + "loss": 0.2624, + "step": 36120 + }, + { + "epoch": 0.7230888571929034, + "grad_norm": 1.0685943365097046, + "learning_rate": 1.8795104568374684e-06, + "loss": 0.2713, + "step": 36121 + }, + { + "epoch": 0.7231088757100318, + "grad_norm": 1.9441887140274048, + "learning_rate": 1.8792571642558139e-06, + "loss": 0.8089, + "step": 36122 + }, + { + "epoch": 0.7231288942271601, + "grad_norm": 1.2061415910720825, + "learning_rate": 1.8790038847930908e-06, + "loss": 0.3182, + "step": 36123 + }, + { + "epoch": 0.7231489127442885, + "grad_norm": 1.1137720346450806, + "learning_rate": 1.8787506184503635e-06, + "loss": 0.2829, + "step": 36124 + }, + { + "epoch": 0.7231689312614168, + "grad_norm": 1.13886559009552, + "learning_rate": 1.878497365228697e-06, + "loss": 0.3292, + "step": 36125 + }, + { + "epoch": 0.7231889497785452, + "grad_norm": 1.102394938468933, + "learning_rate": 1.8782441251291538e-06, + "loss": 0.3069, + "step": 36126 + }, + { + "epoch": 0.7232089682956735, + "grad_norm": 1.1445130109786987, + "learning_rate": 1.877990898152801e-06, + "loss": 0.3047, + "step": 36127 + }, + { + "epoch": 0.7232289868128018, + "grad_norm": 1.4769984483718872, + "learning_rate": 1.8777376843007038e-06, + "loss": 0.3119, + "step": 36128 + }, + { + "epoch": 0.7232490053299302, + "grad_norm": 1.0873219966888428, + "learning_rate": 1.877484483573926e-06, + "loss": 0.3186, + "step": 36129 + }, + { + "epoch": 0.7232690238470585, + "grad_norm": 1.0939979553222656, + "learning_rate": 1.8772312959735294e-06, + "loss": 0.3007, + "step": 36130 + }, + { + "epoch": 0.7232890423641869, + "grad_norm": 1.1429799795150757, + "learning_rate": 1.8769781215005822e-06, + "loss": 0.3273, + "step": 36131 + }, + { + "epoch": 0.7233090608813152, + "grad_norm": 1.2155663967132568, + "learning_rate": 1.8767249601561472e-06, + "loss": 0.3413, + "step": 36132 + }, + { + "epoch": 0.7233290793984436, + "grad_norm": 1.1402404308319092, + "learning_rate": 1.8764718119412879e-06, + "loss": 0.2928, + "step": 36133 + }, + { + "epoch": 0.7233490979155719, + "grad_norm": 1.0728541612625122, + "learning_rate": 1.8762186768570672e-06, + "loss": 0.261, + "step": 36134 + }, + { + "epoch": 0.7233691164327003, + "grad_norm": 1.1610257625579834, + "learning_rate": 1.8759655549045525e-06, + "loss": 0.3379, + "step": 36135 + }, + { + "epoch": 0.7233891349498286, + "grad_norm": 1.7464576959609985, + "learning_rate": 1.875712446084807e-06, + "loss": 0.7196, + "step": 36136 + }, + { + "epoch": 0.7234091534669569, + "grad_norm": 1.192154884338379, + "learning_rate": 1.8754593503988932e-06, + "loss": 0.3353, + "step": 36137 + }, + { + "epoch": 0.7234291719840853, + "grad_norm": 1.2199008464813232, + "learning_rate": 1.8752062678478766e-06, + "loss": 0.3176, + "step": 36138 + }, + { + "epoch": 0.7234491905012136, + "grad_norm": 1.0623819828033447, + "learning_rate": 1.8749531984328185e-06, + "loss": 0.2831, + "step": 36139 + }, + { + "epoch": 0.723469209018342, + "grad_norm": 1.0685454607009888, + "learning_rate": 1.874700142154784e-06, + "loss": 0.2652, + "step": 36140 + }, + { + "epoch": 0.7234892275354703, + "grad_norm": 1.0361016988754272, + "learning_rate": 1.87444709901484e-06, + "loss": 0.2758, + "step": 36141 + }, + { + "epoch": 0.7235092460525987, + "grad_norm": 1.258666753768921, + "learning_rate": 1.8741940690140476e-06, + "loss": 0.301, + "step": 36142 + }, + { + "epoch": 0.723529264569727, + "grad_norm": 1.0875194072723389, + "learning_rate": 1.8739410521534684e-06, + "loss": 0.2911, + "step": 36143 + }, + { + "epoch": 0.7235492830868553, + "grad_norm": 1.0589195489883423, + "learning_rate": 1.8736880484341707e-06, + "loss": 0.302, + "step": 36144 + }, + { + "epoch": 0.7235693016039837, + "grad_norm": 1.0656253099441528, + "learning_rate": 1.8734350578572153e-06, + "loss": 0.2865, + "step": 36145 + }, + { + "epoch": 0.723589320121112, + "grad_norm": 1.0420500040054321, + "learning_rate": 1.873182080423666e-06, + "loss": 0.3228, + "step": 36146 + }, + { + "epoch": 0.7236093386382404, + "grad_norm": 1.2325472831726074, + "learning_rate": 1.872929116134587e-06, + "loss": 0.333, + "step": 36147 + }, + { + "epoch": 0.7236293571553687, + "grad_norm": 2.1103768348693848, + "learning_rate": 1.8726761649910385e-06, + "loss": 0.839, + "step": 36148 + }, + { + "epoch": 0.7236493756724971, + "grad_norm": 1.0308544635772705, + "learning_rate": 1.8724232269940885e-06, + "loss": 0.343, + "step": 36149 + }, + { + "epoch": 0.7236693941896254, + "grad_norm": 1.046755313873291, + "learning_rate": 1.8721703021447978e-06, + "loss": 0.2648, + "step": 36150 + }, + { + "epoch": 0.7236894127067538, + "grad_norm": 1.0641148090362549, + "learning_rate": 1.8719173904442307e-06, + "loss": 0.2621, + "step": 36151 + }, + { + "epoch": 0.7237094312238821, + "grad_norm": 1.1260688304901123, + "learning_rate": 1.8716644918934474e-06, + "loss": 0.3024, + "step": 36152 + }, + { + "epoch": 0.7237294497410104, + "grad_norm": 1.2029361724853516, + "learning_rate": 1.871411606493514e-06, + "loss": 0.3533, + "step": 36153 + }, + { + "epoch": 0.7237494682581388, + "grad_norm": 1.0300071239471436, + "learning_rate": 1.871158734245494e-06, + "loss": 0.2338, + "step": 36154 + }, + { + "epoch": 0.7237694867752671, + "grad_norm": 1.1473567485809326, + "learning_rate": 1.8709058751504493e-06, + "loss": 0.3087, + "step": 36155 + }, + { + "epoch": 0.7237895052923955, + "grad_norm": 1.1358407735824585, + "learning_rate": 1.8706530292094438e-06, + "loss": 0.266, + "step": 36156 + }, + { + "epoch": 0.7238095238095238, + "grad_norm": 1.1354577541351318, + "learning_rate": 1.8704001964235368e-06, + "loss": 0.3285, + "step": 36157 + }, + { + "epoch": 0.7238295423266522, + "grad_norm": 1.1271365880966187, + "learning_rate": 1.870147376793796e-06, + "loss": 0.2956, + "step": 36158 + }, + { + "epoch": 0.7238495608437805, + "grad_norm": 1.149730920791626, + "learning_rate": 1.8698945703212822e-06, + "loss": 0.2956, + "step": 36159 + }, + { + "epoch": 0.7238695793609088, + "grad_norm": 1.2306716442108154, + "learning_rate": 1.869641777007058e-06, + "loss": 0.2805, + "step": 36160 + }, + { + "epoch": 0.7238895978780372, + "grad_norm": 0.9745610356330872, + "learning_rate": 1.8693889968521845e-06, + "loss": 0.2661, + "step": 36161 + }, + { + "epoch": 0.7239096163951655, + "grad_norm": 1.090946078300476, + "learning_rate": 1.869136229857727e-06, + "loss": 0.2712, + "step": 36162 + }, + { + "epoch": 0.7239296349122939, + "grad_norm": 1.1861729621887207, + "learning_rate": 1.8688834760247475e-06, + "loss": 0.3192, + "step": 36163 + }, + { + "epoch": 0.7239496534294222, + "grad_norm": 1.1026122570037842, + "learning_rate": 1.8686307353543082e-06, + "loss": 0.2811, + "step": 36164 + }, + { + "epoch": 0.7239696719465506, + "grad_norm": 1.0090923309326172, + "learning_rate": 1.8683780078474695e-06, + "loss": 0.2954, + "step": 36165 + }, + { + "epoch": 0.7239896904636789, + "grad_norm": 1.947186827659607, + "learning_rate": 1.8681252935052957e-06, + "loss": 0.7294, + "step": 36166 + }, + { + "epoch": 0.7240097089808073, + "grad_norm": 1.240666389465332, + "learning_rate": 1.867872592328851e-06, + "loss": 0.3072, + "step": 36167 + }, + { + "epoch": 0.7240297274979356, + "grad_norm": 1.8238128423690796, + "learning_rate": 1.8676199043191962e-06, + "loss": 0.7363, + "step": 36168 + }, + { + "epoch": 0.7240497460150639, + "grad_norm": 1.1532790660858154, + "learning_rate": 1.8673672294773926e-06, + "loss": 0.2767, + "step": 36169 + }, + { + "epoch": 0.7240697645321923, + "grad_norm": 1.0073965787887573, + "learning_rate": 1.8671145678045016e-06, + "loss": 0.3229, + "step": 36170 + }, + { + "epoch": 0.7240897830493206, + "grad_norm": 1.1546580791473389, + "learning_rate": 1.8668619193015885e-06, + "loss": 0.3128, + "step": 36171 + }, + { + "epoch": 0.724109801566449, + "grad_norm": 1.2332714796066284, + "learning_rate": 1.8666092839697135e-06, + "loss": 0.3552, + "step": 36172 + }, + { + "epoch": 0.7241298200835773, + "grad_norm": 1.9159152507781982, + "learning_rate": 1.8663566618099389e-06, + "loss": 0.7242, + "step": 36173 + }, + { + "epoch": 0.7241498386007057, + "grad_norm": 1.3329529762268066, + "learning_rate": 1.8661040528233244e-06, + "loss": 0.283, + "step": 36174 + }, + { + "epoch": 0.724169857117834, + "grad_norm": 1.9163885116577148, + "learning_rate": 1.865851457010936e-06, + "loss": 0.6853, + "step": 36175 + }, + { + "epoch": 0.7241898756349623, + "grad_norm": 1.1162245273590088, + "learning_rate": 1.865598874373834e-06, + "loss": 0.3394, + "step": 36176 + }, + { + "epoch": 0.7242098941520907, + "grad_norm": 1.1714376211166382, + "learning_rate": 1.8653463049130793e-06, + "loss": 0.2768, + "step": 36177 + }, + { + "epoch": 0.724229912669219, + "grad_norm": 1.1014010906219482, + "learning_rate": 1.8650937486297327e-06, + "loss": 0.3299, + "step": 36178 + }, + { + "epoch": 0.7242499311863474, + "grad_norm": 1.1466093063354492, + "learning_rate": 1.8648412055248572e-06, + "loss": 0.2986, + "step": 36179 + }, + { + "epoch": 0.7242699497034757, + "grad_norm": 1.7821553945541382, + "learning_rate": 1.8645886755995164e-06, + "loss": 0.7518, + "step": 36180 + }, + { + "epoch": 0.7242899682206041, + "grad_norm": 1.1599985361099243, + "learning_rate": 1.86433615885477e-06, + "loss": 0.2995, + "step": 36181 + }, + { + "epoch": 0.7243099867377324, + "grad_norm": 1.0655087232589722, + "learning_rate": 1.8640836552916796e-06, + "loss": 0.3482, + "step": 36182 + }, + { + "epoch": 0.7243300052548608, + "grad_norm": 1.0772136449813843, + "learning_rate": 1.863831164911305e-06, + "loss": 0.3216, + "step": 36183 + }, + { + "epoch": 0.7243500237719891, + "grad_norm": 1.1556458473205566, + "learning_rate": 1.8635786877147104e-06, + "loss": 0.2511, + "step": 36184 + }, + { + "epoch": 0.7243700422891174, + "grad_norm": 1.0198643207550049, + "learning_rate": 1.863326223702957e-06, + "loss": 0.244, + "step": 36185 + }, + { + "epoch": 0.7243900608062458, + "grad_norm": 1.1011395454406738, + "learning_rate": 1.8630737728771049e-06, + "loss": 0.3008, + "step": 36186 + }, + { + "epoch": 0.7244100793233741, + "grad_norm": 1.2264845371246338, + "learning_rate": 1.8628213352382153e-06, + "loss": 0.2954, + "step": 36187 + }, + { + "epoch": 0.7244300978405025, + "grad_norm": 1.7723203897476196, + "learning_rate": 1.8625689107873479e-06, + "loss": 0.7267, + "step": 36188 + }, + { + "epoch": 0.7244501163576308, + "grad_norm": 1.0831270217895508, + "learning_rate": 1.8623164995255672e-06, + "loss": 0.323, + "step": 36189 + }, + { + "epoch": 0.7244701348747592, + "grad_norm": 1.0890498161315918, + "learning_rate": 1.862064101453933e-06, + "loss": 0.2786, + "step": 36190 + }, + { + "epoch": 0.7244901533918875, + "grad_norm": 1.0933226346969604, + "learning_rate": 1.8618117165735045e-06, + "loss": 0.3012, + "step": 36191 + }, + { + "epoch": 0.7245101719090158, + "grad_norm": 1.1234660148620605, + "learning_rate": 1.8615593448853436e-06, + "loss": 0.3251, + "step": 36192 + }, + { + "epoch": 0.7245301904261442, + "grad_norm": 1.2243462800979614, + "learning_rate": 1.8613069863905142e-06, + "loss": 0.2942, + "step": 36193 + }, + { + "epoch": 0.7245502089432725, + "grad_norm": 1.1741076707839966, + "learning_rate": 1.8610546410900743e-06, + "loss": 0.2942, + "step": 36194 + }, + { + "epoch": 0.7245702274604009, + "grad_norm": 1.2939735651016235, + "learning_rate": 1.8608023089850853e-06, + "loss": 0.283, + "step": 36195 + }, + { + "epoch": 0.7245902459775292, + "grad_norm": 1.178462266921997, + "learning_rate": 1.8605499900766062e-06, + "loss": 0.2968, + "step": 36196 + }, + { + "epoch": 0.7246102644946576, + "grad_norm": 1.1769160032272339, + "learning_rate": 1.860297684365701e-06, + "loss": 0.3109, + "step": 36197 + }, + { + "epoch": 0.7246302830117859, + "grad_norm": 1.092929482460022, + "learning_rate": 1.8600453918534289e-06, + "loss": 0.2792, + "step": 36198 + }, + { + "epoch": 0.7246503015289143, + "grad_norm": 1.2193721532821655, + "learning_rate": 1.8597931125408498e-06, + "loss": 0.2596, + "step": 36199 + }, + { + "epoch": 0.7246703200460426, + "grad_norm": 1.1693285703659058, + "learning_rate": 1.8595408464290248e-06, + "loss": 0.3394, + "step": 36200 + }, + { + "epoch": 0.7246903385631709, + "grad_norm": 1.0848406553268433, + "learning_rate": 1.859288593519013e-06, + "loss": 0.3414, + "step": 36201 + }, + { + "epoch": 0.7247103570802993, + "grad_norm": 1.094955563545227, + "learning_rate": 1.8590363538118772e-06, + "loss": 0.3127, + "step": 36202 + }, + { + "epoch": 0.7247303755974276, + "grad_norm": 1.1185686588287354, + "learning_rate": 1.858784127308677e-06, + "loss": 0.2626, + "step": 36203 + }, + { + "epoch": 0.724750394114556, + "grad_norm": 1.1160364151000977, + "learning_rate": 1.8585319140104702e-06, + "loss": 0.2854, + "step": 36204 + }, + { + "epoch": 0.7247704126316843, + "grad_norm": 1.2216477394104004, + "learning_rate": 1.8582797139183195e-06, + "loss": 0.3261, + "step": 36205 + }, + { + "epoch": 0.7247904311488127, + "grad_norm": 1.9660497903823853, + "learning_rate": 1.8580275270332865e-06, + "loss": 0.7063, + "step": 36206 + }, + { + "epoch": 0.724810449665941, + "grad_norm": 1.160657525062561, + "learning_rate": 1.8577753533564296e-06, + "loss": 0.2911, + "step": 36207 + }, + { + "epoch": 0.7248304681830693, + "grad_norm": 1.2287312746047974, + "learning_rate": 1.8575231928888087e-06, + "loss": 0.2975, + "step": 36208 + }, + { + "epoch": 0.7248504867001977, + "grad_norm": 1.1481140851974487, + "learning_rate": 1.8572710456314841e-06, + "loss": 0.337, + "step": 36209 + }, + { + "epoch": 0.724870505217326, + "grad_norm": 1.9077647924423218, + "learning_rate": 1.8570189115855141e-06, + "loss": 0.7086, + "step": 36210 + }, + { + "epoch": 0.7248905237344544, + "grad_norm": 1.1487095355987549, + "learning_rate": 1.8567667907519622e-06, + "loss": 0.2915, + "step": 36211 + }, + { + "epoch": 0.7249105422515827, + "grad_norm": 1.276623010635376, + "learning_rate": 1.856514683131886e-06, + "loss": 0.3259, + "step": 36212 + }, + { + "epoch": 0.7249305607687111, + "grad_norm": 1.1209681034088135, + "learning_rate": 1.8562625887263453e-06, + "loss": 0.3034, + "step": 36213 + }, + { + "epoch": 0.7249505792858394, + "grad_norm": 1.3412139415740967, + "learning_rate": 1.8560105075363989e-06, + "loss": 0.2994, + "step": 36214 + }, + { + "epoch": 0.7249705978029678, + "grad_norm": 1.942362666130066, + "learning_rate": 1.855758439563109e-06, + "loss": 0.6771, + "step": 36215 + }, + { + "epoch": 0.7249906163200961, + "grad_norm": 1.173616886138916, + "learning_rate": 1.8555063848075338e-06, + "loss": 0.3127, + "step": 36216 + }, + { + "epoch": 0.7250106348372244, + "grad_norm": 1.0988497734069824, + "learning_rate": 1.855254343270732e-06, + "loss": 0.2496, + "step": 36217 + }, + { + "epoch": 0.7250306533543528, + "grad_norm": 0.9930261373519897, + "learning_rate": 1.8550023149537654e-06, + "loss": 0.2921, + "step": 36218 + }, + { + "epoch": 0.7250506718714811, + "grad_norm": 1.0917876958847046, + "learning_rate": 1.854750299857691e-06, + "loss": 0.3263, + "step": 36219 + }, + { + "epoch": 0.7250706903886095, + "grad_norm": 1.155363917350769, + "learning_rate": 1.8544982979835707e-06, + "loss": 0.2823, + "step": 36220 + }, + { + "epoch": 0.7250907089057378, + "grad_norm": 1.9550272226333618, + "learning_rate": 1.8542463093324631e-06, + "loss": 0.7321, + "step": 36221 + }, + { + "epoch": 0.7251107274228662, + "grad_norm": 0.9764054417610168, + "learning_rate": 1.8539943339054268e-06, + "loss": 0.2851, + "step": 36222 + }, + { + "epoch": 0.7251307459399945, + "grad_norm": 0.9776896834373474, + "learning_rate": 1.8537423717035197e-06, + "loss": 0.2972, + "step": 36223 + }, + { + "epoch": 0.7251507644571228, + "grad_norm": 1.011940598487854, + "learning_rate": 1.8534904227278045e-06, + "loss": 0.3168, + "step": 36224 + }, + { + "epoch": 0.7251707829742512, + "grad_norm": 1.1249322891235352, + "learning_rate": 1.853238486979338e-06, + "loss": 0.296, + "step": 36225 + }, + { + "epoch": 0.7251908014913795, + "grad_norm": 1.136669635772705, + "learning_rate": 1.85298656445918e-06, + "loss": 0.3078, + "step": 36226 + }, + { + "epoch": 0.7252108200085079, + "grad_norm": 1.189944863319397, + "learning_rate": 1.8527346551683873e-06, + "loss": 0.3085, + "step": 36227 + }, + { + "epoch": 0.7252308385256362, + "grad_norm": 1.155888319015503, + "learning_rate": 1.8524827591080224e-06, + "loss": 0.2853, + "step": 36228 + }, + { + "epoch": 0.7252508570427646, + "grad_norm": 1.2594019174575806, + "learning_rate": 1.852230876279143e-06, + "loss": 0.3213, + "step": 36229 + }, + { + "epoch": 0.7252708755598929, + "grad_norm": 1.143391728401184, + "learning_rate": 1.8519790066828058e-06, + "loss": 0.337, + "step": 36230 + }, + { + "epoch": 0.7252908940770213, + "grad_norm": 1.2638068199157715, + "learning_rate": 1.8517271503200728e-06, + "loss": 0.2821, + "step": 36231 + }, + { + "epoch": 0.7253109125941496, + "grad_norm": 1.089884877204895, + "learning_rate": 1.851475307192e-06, + "loss": 0.313, + "step": 36232 + }, + { + "epoch": 0.7253309311112779, + "grad_norm": 1.0874913930892944, + "learning_rate": 1.851223477299649e-06, + "loss": 0.3321, + "step": 36233 + }, + { + "epoch": 0.7253509496284063, + "grad_norm": 1.049965262413025, + "learning_rate": 1.850971660644077e-06, + "loss": 0.2729, + "step": 36234 + }, + { + "epoch": 0.7253709681455346, + "grad_norm": 1.0842291116714478, + "learning_rate": 1.8507198572263419e-06, + "loss": 0.3059, + "step": 36235 + }, + { + "epoch": 0.725390986662663, + "grad_norm": 1.091048002243042, + "learning_rate": 1.8504680670475016e-06, + "loss": 0.322, + "step": 36236 + }, + { + "epoch": 0.7254110051797913, + "grad_norm": 1.9116779565811157, + "learning_rate": 1.8502162901086175e-06, + "loss": 0.8205, + "step": 36237 + }, + { + "epoch": 0.7254310236969197, + "grad_norm": 1.0107593536376953, + "learning_rate": 1.8499645264107463e-06, + "loss": 0.2798, + "step": 36238 + }, + { + "epoch": 0.725451042214048, + "grad_norm": 1.0836780071258545, + "learning_rate": 1.8497127759549461e-06, + "loss": 0.2539, + "step": 36239 + }, + { + "epoch": 0.7254710607311763, + "grad_norm": 1.0456823110580444, + "learning_rate": 1.8494610387422757e-06, + "loss": 0.3115, + "step": 36240 + }, + { + "epoch": 0.7254910792483047, + "grad_norm": 1.9482150077819824, + "learning_rate": 1.849209314773791e-06, + "loss": 0.7964, + "step": 36241 + }, + { + "epoch": 0.725511097765433, + "grad_norm": 1.2079310417175293, + "learning_rate": 1.848957604050554e-06, + "loss": 0.3556, + "step": 36242 + }, + { + "epoch": 0.7255311162825614, + "grad_norm": 1.0648951530456543, + "learning_rate": 1.8487059065736196e-06, + "loss": 0.2929, + "step": 36243 + }, + { + "epoch": 0.7255511347996897, + "grad_norm": 1.0499910116195679, + "learning_rate": 1.8484542223440495e-06, + "loss": 0.2699, + "step": 36244 + }, + { + "epoch": 0.7255711533168181, + "grad_norm": 1.0904552936553955, + "learning_rate": 1.8482025513628977e-06, + "loss": 0.3014, + "step": 36245 + }, + { + "epoch": 0.7255911718339464, + "grad_norm": 2.0748510360717773, + "learning_rate": 1.8479508936312252e-06, + "loss": 0.8148, + "step": 36246 + }, + { + "epoch": 0.7256111903510748, + "grad_norm": 1.146021842956543, + "learning_rate": 1.8476992491500894e-06, + "loss": 0.2636, + "step": 36247 + }, + { + "epoch": 0.7256312088682031, + "grad_norm": 1.1706761121749878, + "learning_rate": 1.8474476179205475e-06, + "loss": 0.3092, + "step": 36248 + }, + { + "epoch": 0.7256512273853314, + "grad_norm": 1.1145350933074951, + "learning_rate": 1.8471959999436555e-06, + "loss": 0.293, + "step": 36249 + }, + { + "epoch": 0.7256712459024598, + "grad_norm": 1.0322209596633911, + "learning_rate": 1.8469443952204751e-06, + "loss": 0.2598, + "step": 36250 + }, + { + "epoch": 0.7256912644195881, + "grad_norm": 1.3789348602294922, + "learning_rate": 1.8466928037520616e-06, + "loss": 0.3216, + "step": 36251 + }, + { + "epoch": 0.7257112829367165, + "grad_norm": 1.0697156190872192, + "learning_rate": 1.8464412255394731e-06, + "loss": 0.3297, + "step": 36252 + }, + { + "epoch": 0.7257313014538448, + "grad_norm": 2.099400043487549, + "learning_rate": 1.846189660583767e-06, + "loss": 0.7549, + "step": 36253 + }, + { + "epoch": 0.7257513199709732, + "grad_norm": 1.1337611675262451, + "learning_rate": 1.845938108885999e-06, + "loss": 0.2854, + "step": 36254 + }, + { + "epoch": 0.7257713384881015, + "grad_norm": 0.9950451254844666, + "learning_rate": 1.8456865704472304e-06, + "loss": 0.3159, + "step": 36255 + }, + { + "epoch": 0.7257913570052298, + "grad_norm": 1.1127649545669556, + "learning_rate": 1.8454350452685144e-06, + "loss": 0.3153, + "step": 36256 + }, + { + "epoch": 0.7258113755223582, + "grad_norm": 1.0336319208145142, + "learning_rate": 1.8451835333509126e-06, + "loss": 0.3158, + "step": 36257 + }, + { + "epoch": 0.7258313940394865, + "grad_norm": 1.0939557552337646, + "learning_rate": 1.8449320346954791e-06, + "loss": 0.2854, + "step": 36258 + }, + { + "epoch": 0.7258514125566149, + "grad_norm": 1.0920466184616089, + "learning_rate": 1.8446805493032733e-06, + "loss": 0.2944, + "step": 36259 + }, + { + "epoch": 0.7258714310737432, + "grad_norm": 1.174513578414917, + "learning_rate": 1.8444290771753521e-06, + "loss": 0.3212, + "step": 36260 + }, + { + "epoch": 0.7258914495908716, + "grad_norm": 1.3093154430389404, + "learning_rate": 1.8441776183127713e-06, + "loss": 0.2853, + "step": 36261 + }, + { + "epoch": 0.7259114681079999, + "grad_norm": 1.1308377981185913, + "learning_rate": 1.8439261727165891e-06, + "loss": 0.3194, + "step": 36262 + }, + { + "epoch": 0.7259314866251283, + "grad_norm": 1.141157865524292, + "learning_rate": 1.8436747403878603e-06, + "loss": 0.324, + "step": 36263 + }, + { + "epoch": 0.7259515051422566, + "grad_norm": 1.2053189277648926, + "learning_rate": 1.8434233213276453e-06, + "loss": 0.2617, + "step": 36264 + }, + { + "epoch": 0.7259715236593849, + "grad_norm": 1.3341766595840454, + "learning_rate": 1.8431719155369993e-06, + "loss": 0.3519, + "step": 36265 + }, + { + "epoch": 0.7259915421765133, + "grad_norm": 1.1395959854125977, + "learning_rate": 1.8429205230169794e-06, + "loss": 0.3081, + "step": 36266 + }, + { + "epoch": 0.7260115606936416, + "grad_norm": 1.2377129793167114, + "learning_rate": 1.8426691437686406e-06, + "loss": 0.3291, + "step": 36267 + }, + { + "epoch": 0.72603157921077, + "grad_norm": 1.125554084777832, + "learning_rate": 1.8424177777930424e-06, + "loss": 0.3195, + "step": 36268 + }, + { + "epoch": 0.7260515977278983, + "grad_norm": 1.1780121326446533, + "learning_rate": 1.8421664250912392e-06, + "loss": 0.3106, + "step": 36269 + }, + { + "epoch": 0.7260716162450267, + "grad_norm": 1.8549489974975586, + "learning_rate": 1.8419150856642903e-06, + "loss": 0.7767, + "step": 36270 + }, + { + "epoch": 0.726091634762155, + "grad_norm": 1.0916595458984375, + "learning_rate": 1.8416637595132507e-06, + "loss": 0.3302, + "step": 36271 + }, + { + "epoch": 0.7261116532792833, + "grad_norm": 1.0907273292541504, + "learning_rate": 1.8414124466391752e-06, + "loss": 0.2921, + "step": 36272 + }, + { + "epoch": 0.7261316717964117, + "grad_norm": 1.155771017074585, + "learning_rate": 1.841161147043124e-06, + "loss": 0.3098, + "step": 36273 + }, + { + "epoch": 0.72615169031354, + "grad_norm": 1.087699055671692, + "learning_rate": 1.840909860726151e-06, + "loss": 0.331, + "step": 36274 + }, + { + "epoch": 0.7261717088306684, + "grad_norm": 1.1856709718704224, + "learning_rate": 1.8406585876893135e-06, + "loss": 0.3109, + "step": 36275 + }, + { + "epoch": 0.7261917273477967, + "grad_norm": 1.9192661046981812, + "learning_rate": 1.840407327933666e-06, + "loss": 0.8643, + "step": 36276 + }, + { + "epoch": 0.7262117458649251, + "grad_norm": 1.0801399946212769, + "learning_rate": 1.8401560814602675e-06, + "loss": 0.3224, + "step": 36277 + }, + { + "epoch": 0.7262317643820534, + "grad_norm": 1.1563217639923096, + "learning_rate": 1.8399048482701726e-06, + "loss": 0.2637, + "step": 36278 + }, + { + "epoch": 0.7262517828991818, + "grad_norm": 1.0994956493377686, + "learning_rate": 1.839653628364438e-06, + "loss": 0.2765, + "step": 36279 + }, + { + "epoch": 0.7262718014163101, + "grad_norm": 1.081424593925476, + "learning_rate": 1.8394024217441175e-06, + "loss": 0.2696, + "step": 36280 + }, + { + "epoch": 0.7262918199334384, + "grad_norm": 1.972111463546753, + "learning_rate": 1.8391512284102708e-06, + "loss": 0.8027, + "step": 36281 + }, + { + "epoch": 0.7263118384505668, + "grad_norm": 1.1044175624847412, + "learning_rate": 1.8389000483639508e-06, + "loss": 0.2901, + "step": 36282 + }, + { + "epoch": 0.7263318569676951, + "grad_norm": 1.189508318901062, + "learning_rate": 1.838648881606216e-06, + "loss": 0.2907, + "step": 36283 + }, + { + "epoch": 0.7263518754848235, + "grad_norm": 1.1502317190170288, + "learning_rate": 1.838397728138121e-06, + "loss": 0.2859, + "step": 36284 + }, + { + "epoch": 0.7263718940019518, + "grad_norm": 1.1348903179168701, + "learning_rate": 1.8381465879607202e-06, + "loss": 0.3118, + "step": 36285 + }, + { + "epoch": 0.7263919125190802, + "grad_norm": 1.0988258123397827, + "learning_rate": 1.837895461075072e-06, + "loss": 0.3066, + "step": 36286 + }, + { + "epoch": 0.7264119310362085, + "grad_norm": 1.0656192302703857, + "learning_rate": 1.837644347482231e-06, + "loss": 0.2613, + "step": 36287 + }, + { + "epoch": 0.7264319495533368, + "grad_norm": 1.100885272026062, + "learning_rate": 1.8373932471832528e-06, + "loss": 0.3033, + "step": 36288 + }, + { + "epoch": 0.7264519680704652, + "grad_norm": 1.1730254888534546, + "learning_rate": 1.8371421601791905e-06, + "loss": 0.3052, + "step": 36289 + }, + { + "epoch": 0.7264719865875935, + "grad_norm": 1.1042510271072388, + "learning_rate": 1.8368910864711043e-06, + "loss": 0.293, + "step": 36290 + }, + { + "epoch": 0.7264920051047219, + "grad_norm": 0.9801605343818665, + "learning_rate": 1.836640026060047e-06, + "loss": 0.2618, + "step": 36291 + }, + { + "epoch": 0.7265120236218502, + "grad_norm": 1.0944368839263916, + "learning_rate": 1.836388978947074e-06, + "loss": 0.3632, + "step": 36292 + }, + { + "epoch": 0.7265320421389786, + "grad_norm": 1.1359491348266602, + "learning_rate": 1.8361379451332395e-06, + "loss": 0.2861, + "step": 36293 + }, + { + "epoch": 0.7265520606561069, + "grad_norm": 1.0879298448562622, + "learning_rate": 1.8358869246196015e-06, + "loss": 0.316, + "step": 36294 + }, + { + "epoch": 0.7265720791732353, + "grad_norm": 1.0706803798675537, + "learning_rate": 1.835635917407213e-06, + "loss": 0.3076, + "step": 36295 + }, + { + "epoch": 0.7265920976903636, + "grad_norm": 1.0929274559020996, + "learning_rate": 1.8353849234971311e-06, + "loss": 0.2788, + "step": 36296 + }, + { + "epoch": 0.7266121162074919, + "grad_norm": 1.2587381601333618, + "learning_rate": 1.8351339428904102e-06, + "loss": 0.3167, + "step": 36297 + }, + { + "epoch": 0.7266321347246203, + "grad_norm": 1.0949206352233887, + "learning_rate": 1.8348829755881036e-06, + "loss": 0.3086, + "step": 36298 + }, + { + "epoch": 0.7266521532417486, + "grad_norm": 1.1806093454360962, + "learning_rate": 1.8346320215912694e-06, + "loss": 0.2969, + "step": 36299 + }, + { + "epoch": 0.726672171758877, + "grad_norm": 1.1627159118652344, + "learning_rate": 1.834381080900961e-06, + "loss": 0.3027, + "step": 36300 + }, + { + "epoch": 0.7266921902760053, + "grad_norm": 1.1416499614715576, + "learning_rate": 1.8341301535182331e-06, + "loss": 0.294, + "step": 36301 + }, + { + "epoch": 0.7267122087931337, + "grad_norm": 1.1009643077850342, + "learning_rate": 1.8338792394441412e-06, + "loss": 0.2999, + "step": 36302 + }, + { + "epoch": 0.726732227310262, + "grad_norm": 1.1615458726882935, + "learning_rate": 1.8336283386797372e-06, + "loss": 0.2906, + "step": 36303 + }, + { + "epoch": 0.7267522458273903, + "grad_norm": 1.1091049909591675, + "learning_rate": 1.8333774512260804e-06, + "loss": 0.2827, + "step": 36304 + }, + { + "epoch": 0.7267722643445187, + "grad_norm": 1.178024411201477, + "learning_rate": 1.8331265770842233e-06, + "loss": 0.3161, + "step": 36305 + }, + { + "epoch": 0.726792282861647, + "grad_norm": 1.0983998775482178, + "learning_rate": 1.8328757162552191e-06, + "loss": 0.2945, + "step": 36306 + }, + { + "epoch": 0.7268123013787754, + "grad_norm": 1.219619870185852, + "learning_rate": 1.832624868740125e-06, + "loss": 0.2888, + "step": 36307 + }, + { + "epoch": 0.7268323198959037, + "grad_norm": 1.767157793045044, + "learning_rate": 1.832374034539993e-06, + "loss": 0.7612, + "step": 36308 + }, + { + "epoch": 0.7268523384130321, + "grad_norm": 1.2072657346725464, + "learning_rate": 1.8321232136558803e-06, + "loss": 0.2685, + "step": 36309 + }, + { + "epoch": 0.7268723569301604, + "grad_norm": 1.0660563707351685, + "learning_rate": 1.8318724060888405e-06, + "loss": 0.3325, + "step": 36310 + }, + { + "epoch": 0.7268923754472888, + "grad_norm": 0.9419927597045898, + "learning_rate": 1.831621611839925e-06, + "loss": 0.2758, + "step": 36311 + }, + { + "epoch": 0.7269123939644171, + "grad_norm": 1.1076171398162842, + "learning_rate": 1.8313708309101924e-06, + "loss": 0.3093, + "step": 36312 + }, + { + "epoch": 0.7269324124815454, + "grad_norm": 1.175260066986084, + "learning_rate": 1.8311200633006948e-06, + "loss": 0.3109, + "step": 36313 + }, + { + "epoch": 0.7269524309986738, + "grad_norm": 1.1111692190170288, + "learning_rate": 1.8308693090124862e-06, + "loss": 0.2985, + "step": 36314 + }, + { + "epoch": 0.7269724495158021, + "grad_norm": 1.2316782474517822, + "learning_rate": 1.8306185680466214e-06, + "loss": 0.2738, + "step": 36315 + }, + { + "epoch": 0.7269924680329305, + "grad_norm": 1.3180909156799316, + "learning_rate": 1.8303678404041526e-06, + "loss": 0.3434, + "step": 36316 + }, + { + "epoch": 0.7270124865500588, + "grad_norm": 1.0575506687164307, + "learning_rate": 1.8301171260861362e-06, + "loss": 0.3164, + "step": 36317 + }, + { + "epoch": 0.7270325050671872, + "grad_norm": 1.1315672397613525, + "learning_rate": 1.829866425093626e-06, + "loss": 0.2923, + "step": 36318 + }, + { + "epoch": 0.7270525235843155, + "grad_norm": 1.3129023313522339, + "learning_rate": 1.829615737427673e-06, + "loss": 0.2727, + "step": 36319 + }, + { + "epoch": 0.7270725421014438, + "grad_norm": 1.1539967060089111, + "learning_rate": 1.829365063089335e-06, + "loss": 0.2741, + "step": 36320 + }, + { + "epoch": 0.7270925606185722, + "grad_norm": 1.410550594329834, + "learning_rate": 1.8291144020796625e-06, + "loss": 0.2934, + "step": 36321 + }, + { + "epoch": 0.7271125791357005, + "grad_norm": 1.101272702217102, + "learning_rate": 1.8288637543997117e-06, + "loss": 0.3206, + "step": 36322 + }, + { + "epoch": 0.7271325976528289, + "grad_norm": 1.796985149383545, + "learning_rate": 1.8286131200505358e-06, + "loss": 0.7488, + "step": 36323 + }, + { + "epoch": 0.7271526161699572, + "grad_norm": 1.0887956619262695, + "learning_rate": 1.8283624990331878e-06, + "loss": 0.2886, + "step": 36324 + }, + { + "epoch": 0.7271726346870856, + "grad_norm": 1.1408857107162476, + "learning_rate": 1.8281118913487194e-06, + "loss": 0.2701, + "step": 36325 + }, + { + "epoch": 0.7271926532042139, + "grad_norm": 1.181759238243103, + "learning_rate": 1.827861296998188e-06, + "loss": 0.28, + "step": 36326 + }, + { + "epoch": 0.7272126717213423, + "grad_norm": 1.0821119546890259, + "learning_rate": 1.8276107159826451e-06, + "loss": 0.3096, + "step": 36327 + }, + { + "epoch": 0.7272326902384706, + "grad_norm": 1.0538033246994019, + "learning_rate": 1.8273601483031433e-06, + "loss": 0.2669, + "step": 36328 + }, + { + "epoch": 0.7272527087555989, + "grad_norm": 1.1814554929733276, + "learning_rate": 1.827109593960736e-06, + "loss": 0.3174, + "step": 36329 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 1.0612068176269531, + "learning_rate": 1.8268590529564784e-06, + "loss": 0.301, + "step": 36330 + }, + { + "epoch": 0.7272927457898556, + "grad_norm": 1.0915766954421997, + "learning_rate": 1.8266085252914222e-06, + "loss": 0.2631, + "step": 36331 + }, + { + "epoch": 0.727312764306984, + "grad_norm": 1.1339491605758667, + "learning_rate": 1.8263580109666195e-06, + "loss": 0.3221, + "step": 36332 + }, + { + "epoch": 0.7273327828241123, + "grad_norm": 1.7992885112762451, + "learning_rate": 1.826107509983126e-06, + "loss": 0.7939, + "step": 36333 + }, + { + "epoch": 0.7273528013412407, + "grad_norm": 1.0878431797027588, + "learning_rate": 1.8258570223419924e-06, + "loss": 0.3253, + "step": 36334 + }, + { + "epoch": 0.727372819858369, + "grad_norm": 1.253709316253662, + "learning_rate": 1.8256065480442742e-06, + "loss": 0.3477, + "step": 36335 + }, + { + "epoch": 0.7273928383754973, + "grad_norm": 1.0761752128601074, + "learning_rate": 1.825356087091023e-06, + "loss": 0.2943, + "step": 36336 + }, + { + "epoch": 0.7274128568926257, + "grad_norm": 1.2162344455718994, + "learning_rate": 1.8251056394832917e-06, + "loss": 0.3426, + "step": 36337 + }, + { + "epoch": 0.727432875409754, + "grad_norm": 1.9220852851867676, + "learning_rate": 1.8248552052221307e-06, + "loss": 0.7921, + "step": 36338 + }, + { + "epoch": 0.7274528939268824, + "grad_norm": 1.2196940183639526, + "learning_rate": 1.8246047843085979e-06, + "loss": 0.3027, + "step": 36339 + }, + { + "epoch": 0.7274729124440107, + "grad_norm": 1.2176780700683594, + "learning_rate": 1.8243543767437427e-06, + "loss": 0.3809, + "step": 36340 + }, + { + "epoch": 0.7274929309611391, + "grad_norm": 1.048885464668274, + "learning_rate": 1.8241039825286183e-06, + "loss": 0.2797, + "step": 36341 + }, + { + "epoch": 0.7275129494782674, + "grad_norm": 1.3441451787948608, + "learning_rate": 1.8238536016642756e-06, + "loss": 0.3161, + "step": 36342 + }, + { + "epoch": 0.7275329679953958, + "grad_norm": 1.0396559238433838, + "learning_rate": 1.8236032341517702e-06, + "loss": 0.271, + "step": 36343 + }, + { + "epoch": 0.7275529865125241, + "grad_norm": 1.0665730237960815, + "learning_rate": 1.8233528799921535e-06, + "loss": 0.2749, + "step": 36344 + }, + { + "epoch": 0.7275730050296524, + "grad_norm": 1.7595278024673462, + "learning_rate": 1.823102539186476e-06, + "loss": 0.7373, + "step": 36345 + }, + { + "epoch": 0.7275930235467808, + "grad_norm": 1.2845075130462646, + "learning_rate": 1.8228522117357933e-06, + "loss": 0.3064, + "step": 36346 + }, + { + "epoch": 0.7276130420639091, + "grad_norm": 1.0715564489364624, + "learning_rate": 1.8226018976411547e-06, + "loss": 0.2805, + "step": 36347 + }, + { + "epoch": 0.7276330605810375, + "grad_norm": 1.0045104026794434, + "learning_rate": 1.8223515969036159e-06, + "loss": 0.2797, + "step": 36348 + }, + { + "epoch": 0.7276530790981658, + "grad_norm": 1.2004035711288452, + "learning_rate": 1.8221013095242269e-06, + "loss": 0.3218, + "step": 36349 + }, + { + "epoch": 0.7276730976152942, + "grad_norm": 1.861959457397461, + "learning_rate": 1.8218510355040397e-06, + "loss": 0.7319, + "step": 36350 + }, + { + "epoch": 0.7276931161324225, + "grad_norm": 1.2319364547729492, + "learning_rate": 1.8216007748441055e-06, + "loss": 0.2861, + "step": 36351 + }, + { + "epoch": 0.7277131346495508, + "grad_norm": 1.1719721555709839, + "learning_rate": 1.8213505275454795e-06, + "loss": 0.3264, + "step": 36352 + }, + { + "epoch": 0.7277331531666792, + "grad_norm": 1.0047223567962646, + "learning_rate": 1.8211002936092114e-06, + "loss": 0.2573, + "step": 36353 + }, + { + "epoch": 0.7277531716838075, + "grad_norm": 1.1594001054763794, + "learning_rate": 1.8208500730363537e-06, + "loss": 0.2967, + "step": 36354 + }, + { + "epoch": 0.7277731902009359, + "grad_norm": 1.1851800680160522, + "learning_rate": 1.8205998658279584e-06, + "loss": 0.3126, + "step": 36355 + }, + { + "epoch": 0.7277932087180642, + "grad_norm": 1.1620479822158813, + "learning_rate": 1.820349671985075e-06, + "loss": 0.3348, + "step": 36356 + }, + { + "epoch": 0.7278132272351926, + "grad_norm": 1.163357138633728, + "learning_rate": 1.8200994915087594e-06, + "loss": 0.3251, + "step": 36357 + }, + { + "epoch": 0.7278332457523209, + "grad_norm": 1.1417224407196045, + "learning_rate": 1.8198493244000593e-06, + "loss": 0.2547, + "step": 36358 + }, + { + "epoch": 0.7278532642694493, + "grad_norm": 1.1701494455337524, + "learning_rate": 1.8195991706600297e-06, + "loss": 0.3115, + "step": 36359 + }, + { + "epoch": 0.7278732827865776, + "grad_norm": 1.0901168584823608, + "learning_rate": 1.8193490302897192e-06, + "loss": 0.3057, + "step": 36360 + }, + { + "epoch": 0.7278933013037059, + "grad_norm": 1.2595818042755127, + "learning_rate": 1.819098903290183e-06, + "loss": 0.3271, + "step": 36361 + }, + { + "epoch": 0.7279133198208343, + "grad_norm": 1.0877641439437866, + "learning_rate": 1.81884878966247e-06, + "loss": 0.3182, + "step": 36362 + }, + { + "epoch": 0.7279333383379626, + "grad_norm": 1.1105074882507324, + "learning_rate": 1.8185986894076323e-06, + "loss": 0.2876, + "step": 36363 + }, + { + "epoch": 0.727953356855091, + "grad_norm": 1.1810095310211182, + "learning_rate": 1.818348602526721e-06, + "loss": 0.302, + "step": 36364 + }, + { + "epoch": 0.7279733753722193, + "grad_norm": 1.985368013381958, + "learning_rate": 1.8180985290207858e-06, + "loss": 0.7209, + "step": 36365 + }, + { + "epoch": 0.7279933938893477, + "grad_norm": 1.157616376876831, + "learning_rate": 1.8178484688908816e-06, + "loss": 0.2975, + "step": 36366 + }, + { + "epoch": 0.728013412406476, + "grad_norm": 1.2239086627960205, + "learning_rate": 1.8175984221380572e-06, + "loss": 0.2945, + "step": 36367 + }, + { + "epoch": 0.7280334309236043, + "grad_norm": 1.1343215703964233, + "learning_rate": 1.8173483887633642e-06, + "loss": 0.3213, + "step": 36368 + }, + { + "epoch": 0.7280534494407327, + "grad_norm": 1.1190780401229858, + "learning_rate": 1.8170983687678522e-06, + "loss": 0.2714, + "step": 36369 + }, + { + "epoch": 0.728073467957861, + "grad_norm": 1.359750509262085, + "learning_rate": 1.816848362152575e-06, + "loss": 0.3236, + "step": 36370 + }, + { + "epoch": 0.7280934864749894, + "grad_norm": 0.9908714890480042, + "learning_rate": 1.8165983689185813e-06, + "loss": 0.2719, + "step": 36371 + }, + { + "epoch": 0.7281135049921177, + "grad_norm": 1.2906033992767334, + "learning_rate": 1.8163483890669236e-06, + "loss": 0.2708, + "step": 36372 + }, + { + "epoch": 0.7281335235092461, + "grad_norm": 1.1484912633895874, + "learning_rate": 1.8160984225986516e-06, + "loss": 0.3166, + "step": 36373 + }, + { + "epoch": 0.7281535420263744, + "grad_norm": 1.0283879041671753, + "learning_rate": 1.8158484695148177e-06, + "loss": 0.3163, + "step": 36374 + }, + { + "epoch": 0.7281735605435027, + "grad_norm": 1.0366178750991821, + "learning_rate": 1.8155985298164719e-06, + "loss": 0.2658, + "step": 36375 + }, + { + "epoch": 0.7281935790606311, + "grad_norm": 1.0438857078552246, + "learning_rate": 1.8153486035046642e-06, + "loss": 0.3099, + "step": 36376 + }, + { + "epoch": 0.7282135975777594, + "grad_norm": 1.216367483139038, + "learning_rate": 1.8150986905804457e-06, + "loss": 0.2879, + "step": 36377 + }, + { + "epoch": 0.7282336160948878, + "grad_norm": 1.1196775436401367, + "learning_rate": 1.8148487910448653e-06, + "loss": 0.268, + "step": 36378 + }, + { + "epoch": 0.7282536346120161, + "grad_norm": 1.6959151029586792, + "learning_rate": 1.814598904898977e-06, + "loss": 0.7855, + "step": 36379 + }, + { + "epoch": 0.7282736531291445, + "grad_norm": 1.111196517944336, + "learning_rate": 1.8143490321438289e-06, + "loss": 0.3103, + "step": 36380 + }, + { + "epoch": 0.7282936716462728, + "grad_norm": 1.1426266431808472, + "learning_rate": 1.8140991727804723e-06, + "loss": 0.3086, + "step": 36381 + }, + { + "epoch": 0.7283136901634012, + "grad_norm": 1.0364603996276855, + "learning_rate": 1.8138493268099555e-06, + "loss": 0.3143, + "step": 36382 + }, + { + "epoch": 0.7283337086805295, + "grad_norm": 1.1220299005508423, + "learning_rate": 1.8135994942333323e-06, + "loss": 0.2781, + "step": 36383 + }, + { + "epoch": 0.7283537271976578, + "grad_norm": 1.022920846939087, + "learning_rate": 1.8133496750516495e-06, + "loss": 0.2469, + "step": 36384 + }, + { + "epoch": 0.7283737457147862, + "grad_norm": 1.1876322031021118, + "learning_rate": 1.8130998692659602e-06, + "loss": 0.3631, + "step": 36385 + }, + { + "epoch": 0.7283937642319145, + "grad_norm": 1.0667372941970825, + "learning_rate": 1.8128500768773133e-06, + "loss": 0.2813, + "step": 36386 + }, + { + "epoch": 0.7284137827490429, + "grad_norm": 1.040351390838623, + "learning_rate": 1.8126002978867568e-06, + "loss": 0.2743, + "step": 36387 + }, + { + "epoch": 0.7284338012661712, + "grad_norm": 1.1741644144058228, + "learning_rate": 1.812350532295345e-06, + "loss": 0.3007, + "step": 36388 + }, + { + "epoch": 0.7284538197832996, + "grad_norm": 1.0439234972000122, + "learning_rate": 1.8121007801041252e-06, + "loss": 0.3151, + "step": 36389 + }, + { + "epoch": 0.7284738383004279, + "grad_norm": 1.0190560817718506, + "learning_rate": 1.8118510413141477e-06, + "loss": 0.2825, + "step": 36390 + }, + { + "epoch": 0.7284938568175562, + "grad_norm": 1.05606210231781, + "learning_rate": 1.8116013159264607e-06, + "loss": 0.2914, + "step": 36391 + }, + { + "epoch": 0.7285138753346846, + "grad_norm": 1.2524722814559937, + "learning_rate": 1.8113516039421175e-06, + "loss": 0.3459, + "step": 36392 + }, + { + "epoch": 0.7285338938518129, + "grad_norm": 0.9943835735321045, + "learning_rate": 1.8111019053621658e-06, + "loss": 0.2735, + "step": 36393 + }, + { + "epoch": 0.7285539123689413, + "grad_norm": 1.0381500720977783, + "learning_rate": 1.8108522201876555e-06, + "loss": 0.3357, + "step": 36394 + }, + { + "epoch": 0.7285739308860696, + "grad_norm": 1.103949785232544, + "learning_rate": 1.8106025484196343e-06, + "loss": 0.303, + "step": 36395 + }, + { + "epoch": 0.728593949403198, + "grad_norm": 1.1891050338745117, + "learning_rate": 1.810352890059155e-06, + "loss": 0.3375, + "step": 36396 + }, + { + "epoch": 0.7286139679203263, + "grad_norm": 1.1057010889053345, + "learning_rate": 1.8101032451072647e-06, + "loss": 0.3049, + "step": 36397 + }, + { + "epoch": 0.7286339864374547, + "grad_norm": 1.139559030532837, + "learning_rate": 1.8098536135650152e-06, + "loss": 0.3208, + "step": 36398 + }, + { + "epoch": 0.728654004954583, + "grad_norm": 1.0773426294326782, + "learning_rate": 1.809603995433455e-06, + "loss": 0.3176, + "step": 36399 + }, + { + "epoch": 0.7286740234717113, + "grad_norm": 0.9811414480209351, + "learning_rate": 1.8093543907136307e-06, + "loss": 0.2458, + "step": 36400 + }, + { + "epoch": 0.7286940419888397, + "grad_norm": 1.0963023900985718, + "learning_rate": 1.8091047994065958e-06, + "loss": 0.2893, + "step": 36401 + }, + { + "epoch": 0.728714060505968, + "grad_norm": 1.363968849182129, + "learning_rate": 1.8088552215133975e-06, + "loss": 0.3005, + "step": 36402 + }, + { + "epoch": 0.7287340790230964, + "grad_norm": 1.1233186721801758, + "learning_rate": 1.8086056570350858e-06, + "loss": 0.3189, + "step": 36403 + }, + { + "epoch": 0.7287540975402247, + "grad_norm": 1.1287708282470703, + "learning_rate": 1.8083561059727066e-06, + "loss": 0.264, + "step": 36404 + }, + { + "epoch": 0.7287741160573531, + "grad_norm": 1.1490716934204102, + "learning_rate": 1.8081065683273136e-06, + "loss": 0.3214, + "step": 36405 + }, + { + "epoch": 0.7287941345744814, + "grad_norm": 1.0252084732055664, + "learning_rate": 1.8078570440999538e-06, + "loss": 0.297, + "step": 36406 + }, + { + "epoch": 0.7288141530916097, + "grad_norm": 1.1037428379058838, + "learning_rate": 1.807607533291676e-06, + "loss": 0.3044, + "step": 36407 + }, + { + "epoch": 0.7288341716087381, + "grad_norm": 0.9853706359863281, + "learning_rate": 1.807358035903527e-06, + "loss": 0.283, + "step": 36408 + }, + { + "epoch": 0.7288541901258664, + "grad_norm": 1.0485738515853882, + "learning_rate": 1.80710855193656e-06, + "loss": 0.2798, + "step": 36409 + }, + { + "epoch": 0.7288742086429948, + "grad_norm": 1.1357502937316895, + "learning_rate": 1.8068590813918196e-06, + "loss": 0.3212, + "step": 36410 + }, + { + "epoch": 0.7288942271601231, + "grad_norm": 1.1279031038284302, + "learning_rate": 1.8066096242703578e-06, + "loss": 0.2624, + "step": 36411 + }, + { + "epoch": 0.7289142456772515, + "grad_norm": 1.8360854387283325, + "learning_rate": 1.8063601805732223e-06, + "loss": 0.7606, + "step": 36412 + }, + { + "epoch": 0.7289342641943798, + "grad_norm": 1.1329156160354614, + "learning_rate": 1.8061107503014596e-06, + "loss": 0.319, + "step": 36413 + }, + { + "epoch": 0.7289542827115082, + "grad_norm": 1.0557901859283447, + "learning_rate": 1.8058613334561215e-06, + "loss": 0.3166, + "step": 36414 + }, + { + "epoch": 0.7289743012286365, + "grad_norm": 1.1376665830612183, + "learning_rate": 1.8056119300382552e-06, + "loss": 0.2903, + "step": 36415 + }, + { + "epoch": 0.7289943197457648, + "grad_norm": 1.0604053735733032, + "learning_rate": 1.805362540048909e-06, + "loss": 0.3243, + "step": 36416 + }, + { + "epoch": 0.7290143382628932, + "grad_norm": 1.0137138366699219, + "learning_rate": 1.8051131634891305e-06, + "loss": 0.2431, + "step": 36417 + }, + { + "epoch": 0.7290343567800215, + "grad_norm": 1.0861921310424805, + "learning_rate": 1.8048638003599679e-06, + "loss": 0.3272, + "step": 36418 + }, + { + "epoch": 0.7290543752971499, + "grad_norm": 1.1545988321304321, + "learning_rate": 1.8046144506624714e-06, + "loss": 0.3307, + "step": 36419 + }, + { + "epoch": 0.7290743938142782, + "grad_norm": 1.112888216972351, + "learning_rate": 1.8043651143976882e-06, + "loss": 0.3114, + "step": 36420 + }, + { + "epoch": 0.7290944123314066, + "grad_norm": 1.1835623979568481, + "learning_rate": 1.8041157915666646e-06, + "loss": 0.2829, + "step": 36421 + }, + { + "epoch": 0.7291144308485349, + "grad_norm": 0.9879348278045654, + "learning_rate": 1.8038664821704526e-06, + "loss": 0.2378, + "step": 36422 + }, + { + "epoch": 0.7291344493656632, + "grad_norm": 1.1328397989273071, + "learning_rate": 1.803617186210096e-06, + "loss": 0.3166, + "step": 36423 + }, + { + "epoch": 0.7291544678827916, + "grad_norm": 1.0839457511901855, + "learning_rate": 1.8033679036866463e-06, + "loss": 0.2842, + "step": 36424 + }, + { + "epoch": 0.7291744863999199, + "grad_norm": 1.0439079999923706, + "learning_rate": 1.803118634601151e-06, + "loss": 0.269, + "step": 36425 + }, + { + "epoch": 0.7291945049170483, + "grad_norm": 1.2663017511367798, + "learning_rate": 1.8028693789546542e-06, + "loss": 0.2961, + "step": 36426 + }, + { + "epoch": 0.7292145234341766, + "grad_norm": 1.158423900604248, + "learning_rate": 1.8026201367482089e-06, + "loss": 0.2332, + "step": 36427 + }, + { + "epoch": 0.729234541951305, + "grad_norm": 1.0708515644073486, + "learning_rate": 1.8023709079828599e-06, + "loss": 0.2761, + "step": 36428 + }, + { + "epoch": 0.7292545604684333, + "grad_norm": 1.0753812789916992, + "learning_rate": 1.8021216926596557e-06, + "loss": 0.3265, + "step": 36429 + }, + { + "epoch": 0.7292745789855617, + "grad_norm": 1.200583577156067, + "learning_rate": 1.8018724907796443e-06, + "loss": 0.3099, + "step": 36430 + }, + { + "epoch": 0.72929459750269, + "grad_norm": 1.0972466468811035, + "learning_rate": 1.80162330234387e-06, + "loss": 0.2734, + "step": 36431 + }, + { + "epoch": 0.7293146160198183, + "grad_norm": 1.17308509349823, + "learning_rate": 1.8013741273533852e-06, + "loss": 0.3159, + "step": 36432 + }, + { + "epoch": 0.7293346345369467, + "grad_norm": 1.1232801675796509, + "learning_rate": 1.8011249658092356e-06, + "loss": 0.3166, + "step": 36433 + }, + { + "epoch": 0.729354653054075, + "grad_norm": 1.9073066711425781, + "learning_rate": 1.8008758177124663e-06, + "loss": 0.7593, + "step": 36434 + }, + { + "epoch": 0.7293746715712034, + "grad_norm": 1.0897636413574219, + "learning_rate": 1.8006266830641279e-06, + "loss": 0.2952, + "step": 36435 + }, + { + "epoch": 0.7293946900883317, + "grad_norm": 1.196826696395874, + "learning_rate": 1.8003775618652653e-06, + "loss": 0.3621, + "step": 36436 + }, + { + "epoch": 0.7294147086054601, + "grad_norm": 1.0444831848144531, + "learning_rate": 1.8001284541169278e-06, + "loss": 0.2964, + "step": 36437 + }, + { + "epoch": 0.7294347271225884, + "grad_norm": 1.2333341836929321, + "learning_rate": 1.7998793598201624e-06, + "loss": 0.3842, + "step": 36438 + }, + { + "epoch": 0.7294547456397167, + "grad_norm": 1.064112663269043, + "learning_rate": 1.7996302789760151e-06, + "loss": 0.3072, + "step": 36439 + }, + { + "epoch": 0.7294747641568451, + "grad_norm": 1.0959677696228027, + "learning_rate": 1.7993812115855319e-06, + "loss": 0.3065, + "step": 36440 + }, + { + "epoch": 0.7294947826739734, + "grad_norm": 1.103100299835205, + "learning_rate": 1.7991321576497627e-06, + "loss": 0.3092, + "step": 36441 + }, + { + "epoch": 0.7295148011911018, + "grad_norm": 1.1428264379501343, + "learning_rate": 1.7988831171697536e-06, + "loss": 0.3036, + "step": 36442 + }, + { + "epoch": 0.7295348197082301, + "grad_norm": 1.1705601215362549, + "learning_rate": 1.7986340901465505e-06, + "loss": 0.3108, + "step": 36443 + }, + { + "epoch": 0.7295548382253585, + "grad_norm": 1.1692417860031128, + "learning_rate": 1.7983850765811994e-06, + "loss": 0.3175, + "step": 36444 + }, + { + "epoch": 0.7295748567424868, + "grad_norm": 1.7566349506378174, + "learning_rate": 1.7981360764747497e-06, + "loss": 0.7464, + "step": 36445 + }, + { + "epoch": 0.7295948752596152, + "grad_norm": 1.017165184020996, + "learning_rate": 1.7978870898282469e-06, + "loss": 0.2829, + "step": 36446 + }, + { + "epoch": 0.7296148937767435, + "grad_norm": 1.7558200359344482, + "learning_rate": 1.7976381166427364e-06, + "loss": 0.7772, + "step": 36447 + }, + { + "epoch": 0.7296349122938718, + "grad_norm": 1.073048710823059, + "learning_rate": 1.7973891569192675e-06, + "loss": 0.3074, + "step": 36448 + }, + { + "epoch": 0.7296549308110002, + "grad_norm": 1.0790960788726807, + "learning_rate": 1.7971402106588841e-06, + "loss": 0.2835, + "step": 36449 + }, + { + "epoch": 0.7296749493281285, + "grad_norm": 1.060288429260254, + "learning_rate": 1.7968912778626352e-06, + "loss": 0.2751, + "step": 36450 + }, + { + "epoch": 0.7296949678452569, + "grad_norm": 1.073564887046814, + "learning_rate": 1.7966423585315661e-06, + "loss": 0.2815, + "step": 36451 + }, + { + "epoch": 0.7297149863623852, + "grad_norm": 1.185718297958374, + "learning_rate": 1.7963934526667238e-06, + "loss": 0.2732, + "step": 36452 + }, + { + "epoch": 0.7297350048795136, + "grad_norm": 1.0590044260025024, + "learning_rate": 1.7961445602691512e-06, + "loss": 0.2928, + "step": 36453 + }, + { + "epoch": 0.7297550233966419, + "grad_norm": 1.2328144311904907, + "learning_rate": 1.7958956813398999e-06, + "loss": 0.2877, + "step": 36454 + }, + { + "epoch": 0.7297750419137702, + "grad_norm": 1.243860125541687, + "learning_rate": 1.7956468158800133e-06, + "loss": 0.3237, + "step": 36455 + }, + { + "epoch": 0.7297950604308986, + "grad_norm": 1.0657250881195068, + "learning_rate": 1.7953979638905384e-06, + "loss": 0.3403, + "step": 36456 + }, + { + "epoch": 0.7298150789480269, + "grad_norm": 1.0700451135635376, + "learning_rate": 1.7951491253725183e-06, + "loss": 0.2749, + "step": 36457 + }, + { + "epoch": 0.7298350974651553, + "grad_norm": 1.9394607543945312, + "learning_rate": 1.7949003003270038e-06, + "loss": 0.7957, + "step": 36458 + }, + { + "epoch": 0.7298551159822836, + "grad_norm": 1.2674744129180908, + "learning_rate": 1.7946514887550388e-06, + "loss": 0.3519, + "step": 36459 + }, + { + "epoch": 0.729875134499412, + "grad_norm": 1.8852183818817139, + "learning_rate": 1.7944026906576672e-06, + "loss": 0.7801, + "step": 36460 + }, + { + "epoch": 0.7298951530165403, + "grad_norm": 1.7805625200271606, + "learning_rate": 1.7941539060359385e-06, + "loss": 0.7629, + "step": 36461 + }, + { + "epoch": 0.7299151715336687, + "grad_norm": 1.0543043613433838, + "learning_rate": 1.7939051348908954e-06, + "loss": 0.2825, + "step": 36462 + }, + { + "epoch": 0.729935190050797, + "grad_norm": 1.078236699104309, + "learning_rate": 1.7936563772235865e-06, + "loss": 0.2811, + "step": 36463 + }, + { + "epoch": 0.7299552085679253, + "grad_norm": 1.0571922063827515, + "learning_rate": 1.7934076330350565e-06, + "loss": 0.2751, + "step": 36464 + }, + { + "epoch": 0.7299752270850537, + "grad_norm": 1.1359347105026245, + "learning_rate": 1.7931589023263506e-06, + "loss": 0.3397, + "step": 36465 + }, + { + "epoch": 0.729995245602182, + "grad_norm": 1.2100369930267334, + "learning_rate": 1.7929101850985125e-06, + "loss": 0.3359, + "step": 36466 + }, + { + "epoch": 0.7300152641193104, + "grad_norm": 1.0798654556274414, + "learning_rate": 1.7926614813525922e-06, + "loss": 0.3109, + "step": 36467 + }, + { + "epoch": 0.7300352826364387, + "grad_norm": 1.8688087463378906, + "learning_rate": 1.7924127910896327e-06, + "loss": 0.8199, + "step": 36468 + }, + { + "epoch": 0.7300553011535671, + "grad_norm": 1.3782284259796143, + "learning_rate": 1.7921641143106794e-06, + "loss": 0.2879, + "step": 36469 + }, + { + "epoch": 0.7300753196706954, + "grad_norm": 1.8041329383850098, + "learning_rate": 1.7919154510167774e-06, + "loss": 0.8122, + "step": 36470 + }, + { + "epoch": 0.7300953381878237, + "grad_norm": 1.063730239868164, + "learning_rate": 1.7916668012089712e-06, + "loss": 0.2744, + "step": 36471 + }, + { + "epoch": 0.7301153567049521, + "grad_norm": 1.1322309970855713, + "learning_rate": 1.7914181648883088e-06, + "loss": 0.3064, + "step": 36472 + }, + { + "epoch": 0.7301353752220804, + "grad_norm": 1.2077107429504395, + "learning_rate": 1.7911695420558328e-06, + "loss": 0.3175, + "step": 36473 + }, + { + "epoch": 0.7301553937392088, + "grad_norm": 1.082331895828247, + "learning_rate": 1.7909209327125904e-06, + "loss": 0.3089, + "step": 36474 + }, + { + "epoch": 0.7301754122563371, + "grad_norm": 1.1085764169692993, + "learning_rate": 1.7906723368596246e-06, + "loss": 0.3064, + "step": 36475 + }, + { + "epoch": 0.7301954307734655, + "grad_norm": 1.2139586210250854, + "learning_rate": 1.790423754497983e-06, + "loss": 0.2769, + "step": 36476 + }, + { + "epoch": 0.7302154492905938, + "grad_norm": 1.8961164951324463, + "learning_rate": 1.7901751856287097e-06, + "loss": 0.7414, + "step": 36477 + }, + { + "epoch": 0.7302354678077222, + "grad_norm": 1.2969471216201782, + "learning_rate": 1.789926630252849e-06, + "loss": 0.3321, + "step": 36478 + }, + { + "epoch": 0.7302554863248505, + "grad_norm": 1.2560527324676514, + "learning_rate": 1.7896780883714459e-06, + "loss": 0.2354, + "step": 36479 + }, + { + "epoch": 0.7302755048419788, + "grad_norm": 1.1948820352554321, + "learning_rate": 1.7894295599855433e-06, + "loss": 0.257, + "step": 36480 + }, + { + "epoch": 0.7302955233591072, + "grad_norm": 1.1350897550582886, + "learning_rate": 1.78918104509619e-06, + "loss": 0.2544, + "step": 36481 + }, + { + "epoch": 0.7303155418762355, + "grad_norm": 1.0768389701843262, + "learning_rate": 1.788932543704428e-06, + "loss": 0.2996, + "step": 36482 + }, + { + "epoch": 0.7303355603933639, + "grad_norm": 1.102896809577942, + "learning_rate": 1.7886840558113034e-06, + "loss": 0.2946, + "step": 36483 + }, + { + "epoch": 0.7303555789104922, + "grad_norm": 1.865348219871521, + "learning_rate": 1.7884355814178578e-06, + "loss": 0.7921, + "step": 36484 + }, + { + "epoch": 0.7303755974276206, + "grad_norm": 1.080471158027649, + "learning_rate": 1.7881871205251394e-06, + "loss": 0.2509, + "step": 36485 + }, + { + "epoch": 0.7303956159447489, + "grad_norm": 1.102437138557434, + "learning_rate": 1.7879386731341898e-06, + "loss": 0.3176, + "step": 36486 + }, + { + "epoch": 0.7304156344618772, + "grad_norm": 1.0335606336593628, + "learning_rate": 1.787690239246056e-06, + "loss": 0.3162, + "step": 36487 + }, + { + "epoch": 0.7304356529790056, + "grad_norm": 1.1390832662582397, + "learning_rate": 1.78744181886178e-06, + "loss": 0.3209, + "step": 36488 + }, + { + "epoch": 0.7304556714961339, + "grad_norm": 1.075229525566101, + "learning_rate": 1.7871934119824087e-06, + "loss": 0.3566, + "step": 36489 + }, + { + "epoch": 0.7304756900132623, + "grad_norm": 0.9982583522796631, + "learning_rate": 1.7869450186089847e-06, + "loss": 0.2837, + "step": 36490 + }, + { + "epoch": 0.7304957085303906, + "grad_norm": 1.0754706859588623, + "learning_rate": 1.7866966387425528e-06, + "loss": 0.2708, + "step": 36491 + }, + { + "epoch": 0.730515727047519, + "grad_norm": 1.1103659868240356, + "learning_rate": 1.7864482723841565e-06, + "loss": 0.3996, + "step": 36492 + }, + { + "epoch": 0.7305357455646473, + "grad_norm": 1.2041010856628418, + "learning_rate": 1.7861999195348384e-06, + "loss": 0.3103, + "step": 36493 + }, + { + "epoch": 0.7305557640817757, + "grad_norm": 1.810996413230896, + "learning_rate": 1.7859515801956462e-06, + "loss": 0.7516, + "step": 36494 + }, + { + "epoch": 0.730575782598904, + "grad_norm": 1.15781569480896, + "learning_rate": 1.7857032543676213e-06, + "loss": 0.2995, + "step": 36495 + }, + { + "epoch": 0.7305958011160323, + "grad_norm": 1.1515421867370605, + "learning_rate": 1.7854549420518085e-06, + "loss": 0.3163, + "step": 36496 + }, + { + "epoch": 0.7306158196331607, + "grad_norm": 1.1632287502288818, + "learning_rate": 1.78520664324925e-06, + "loss": 0.3032, + "step": 36497 + }, + { + "epoch": 0.730635838150289, + "grad_norm": 1.100227952003479, + "learning_rate": 1.784958357960992e-06, + "loss": 0.3002, + "step": 36498 + }, + { + "epoch": 0.7306558566674174, + "grad_norm": 1.1331652402877808, + "learning_rate": 1.7847100861880756e-06, + "loss": 0.319, + "step": 36499 + }, + { + "epoch": 0.7306758751845457, + "grad_norm": 1.0884740352630615, + "learning_rate": 1.784461827931548e-06, + "loss": 0.291, + "step": 36500 + }, + { + "epoch": 0.7306958937016741, + "grad_norm": 1.0333696603775024, + "learning_rate": 1.7842135831924506e-06, + "loss": 0.2601, + "step": 36501 + }, + { + "epoch": 0.7307159122188024, + "grad_norm": 1.0902667045593262, + "learning_rate": 1.7839653519718259e-06, + "loss": 0.3385, + "step": 36502 + }, + { + "epoch": 0.7307359307359307, + "grad_norm": 1.0279443264007568, + "learning_rate": 1.7837171342707205e-06, + "loss": 0.2589, + "step": 36503 + }, + { + "epoch": 0.7307559492530591, + "grad_norm": 1.1696476936340332, + "learning_rate": 1.7834689300901759e-06, + "loss": 0.2401, + "step": 36504 + }, + { + "epoch": 0.7307759677701874, + "grad_norm": 1.0425341129302979, + "learning_rate": 1.7832207394312356e-06, + "loss": 0.3009, + "step": 36505 + }, + { + "epoch": 0.7307959862873158, + "grad_norm": 1.0698778629302979, + "learning_rate": 1.7829725622949416e-06, + "loss": 0.2968, + "step": 36506 + }, + { + "epoch": 0.7308160048044441, + "grad_norm": 1.1102582216262817, + "learning_rate": 1.78272439868234e-06, + "loss": 0.3013, + "step": 36507 + }, + { + "epoch": 0.7308360233215725, + "grad_norm": 1.1882737874984741, + "learning_rate": 1.782476248594473e-06, + "loss": 0.3428, + "step": 36508 + }, + { + "epoch": 0.7308560418387008, + "grad_norm": 1.2237765789031982, + "learning_rate": 1.7822281120323837e-06, + "loss": 0.244, + "step": 36509 + }, + { + "epoch": 0.7308760603558292, + "grad_norm": 1.0106991529464722, + "learning_rate": 1.7819799889971146e-06, + "loss": 0.3031, + "step": 36510 + }, + { + "epoch": 0.7308960788729575, + "grad_norm": 1.0662627220153809, + "learning_rate": 1.781731879489707e-06, + "loss": 0.2985, + "step": 36511 + }, + { + "epoch": 0.7309160973900858, + "grad_norm": 1.078521490097046, + "learning_rate": 1.7814837835112064e-06, + "loss": 0.2922, + "step": 36512 + }, + { + "epoch": 0.7309361159072142, + "grad_norm": 1.1146477460861206, + "learning_rate": 1.7812357010626574e-06, + "loss": 0.2684, + "step": 36513 + }, + { + "epoch": 0.7309561344243425, + "grad_norm": 0.9284566044807434, + "learning_rate": 1.7809876321451003e-06, + "loss": 0.2824, + "step": 36514 + }, + { + "epoch": 0.7309761529414709, + "grad_norm": 1.1554527282714844, + "learning_rate": 1.780739576759577e-06, + "loss": 0.2896, + "step": 36515 + }, + { + "epoch": 0.7309961714585992, + "grad_norm": 1.053296446800232, + "learning_rate": 1.7804915349071333e-06, + "loss": 0.2935, + "step": 36516 + }, + { + "epoch": 0.7310161899757276, + "grad_norm": 1.2940869331359863, + "learning_rate": 1.7802435065888103e-06, + "loss": 0.2968, + "step": 36517 + }, + { + "epoch": 0.7310362084928559, + "grad_norm": 1.051957130432129, + "learning_rate": 1.7799954918056505e-06, + "loss": 0.2877, + "step": 36518 + }, + { + "epoch": 0.7310562270099842, + "grad_norm": 1.4825431108474731, + "learning_rate": 1.7797474905586953e-06, + "loss": 0.2903, + "step": 36519 + }, + { + "epoch": 0.7310762455271126, + "grad_norm": 1.0167043209075928, + "learning_rate": 1.7794995028489904e-06, + "loss": 0.2622, + "step": 36520 + }, + { + "epoch": 0.7310962640442409, + "grad_norm": 1.1078521013259888, + "learning_rate": 1.7792515286775764e-06, + "loss": 0.2885, + "step": 36521 + }, + { + "epoch": 0.7311162825613693, + "grad_norm": 1.3461463451385498, + "learning_rate": 1.779003568045496e-06, + "loss": 0.2661, + "step": 36522 + }, + { + "epoch": 0.7311363010784976, + "grad_norm": 0.9607859253883362, + "learning_rate": 1.7787556209537915e-06, + "loss": 0.2835, + "step": 36523 + }, + { + "epoch": 0.731156319595626, + "grad_norm": 1.0378026962280273, + "learning_rate": 1.7785076874035035e-06, + "loss": 0.2783, + "step": 36524 + }, + { + "epoch": 0.7311763381127543, + "grad_norm": 1.1339255571365356, + "learning_rate": 1.7782597673956754e-06, + "loss": 0.3389, + "step": 36525 + }, + { + "epoch": 0.7311963566298827, + "grad_norm": 1.063338041305542, + "learning_rate": 1.778011860931352e-06, + "loss": 0.3103, + "step": 36526 + }, + { + "epoch": 0.731216375147011, + "grad_norm": 1.285375714302063, + "learning_rate": 1.7777639680115733e-06, + "loss": 0.2993, + "step": 36527 + }, + { + "epoch": 0.7312363936641393, + "grad_norm": 1.0680137872695923, + "learning_rate": 1.7775160886373798e-06, + "loss": 0.3095, + "step": 36528 + }, + { + "epoch": 0.7312564121812677, + "grad_norm": 1.013075828552246, + "learning_rate": 1.7772682228098165e-06, + "loss": 0.2722, + "step": 36529 + }, + { + "epoch": 0.731276430698396, + "grad_norm": 1.141200065612793, + "learning_rate": 1.7770203705299244e-06, + "loss": 0.3108, + "step": 36530 + }, + { + "epoch": 0.7312964492155244, + "grad_norm": 1.074302315711975, + "learning_rate": 1.776772531798745e-06, + "loss": 0.3171, + "step": 36531 + }, + { + "epoch": 0.7313164677326527, + "grad_norm": 1.0491119623184204, + "learning_rate": 1.7765247066173203e-06, + "loss": 0.2473, + "step": 36532 + }, + { + "epoch": 0.7313364862497811, + "grad_norm": 1.9516575336456299, + "learning_rate": 1.7762768949866903e-06, + "loss": 0.7354, + "step": 36533 + }, + { + "epoch": 0.7313565047669094, + "grad_norm": 1.0865286588668823, + "learning_rate": 1.7760290969079002e-06, + "loss": 0.263, + "step": 36534 + }, + { + "epoch": 0.7313765232840377, + "grad_norm": 1.370162010192871, + "learning_rate": 1.77578131238199e-06, + "loss": 0.3079, + "step": 36535 + }, + { + "epoch": 0.7313965418011661, + "grad_norm": 1.2526812553405762, + "learning_rate": 1.7755335414100012e-06, + "loss": 0.3321, + "step": 36536 + }, + { + "epoch": 0.7314165603182944, + "grad_norm": 1.1389551162719727, + "learning_rate": 1.7752857839929737e-06, + "loss": 0.3212, + "step": 36537 + }, + { + "epoch": 0.7314365788354228, + "grad_norm": 1.0672475099563599, + "learning_rate": 1.775038040131951e-06, + "loss": 0.2932, + "step": 36538 + }, + { + "epoch": 0.7314565973525511, + "grad_norm": 1.9489943981170654, + "learning_rate": 1.7747903098279762e-06, + "loss": 0.7843, + "step": 36539 + }, + { + "epoch": 0.7314766158696795, + "grad_norm": 1.0937074422836304, + "learning_rate": 1.774542593082088e-06, + "loss": 0.2806, + "step": 36540 + }, + { + "epoch": 0.7314966343868078, + "grad_norm": 1.0979456901550293, + "learning_rate": 1.7742948898953277e-06, + "loss": 0.2464, + "step": 36541 + }, + { + "epoch": 0.7315166529039362, + "grad_norm": 1.815305471420288, + "learning_rate": 1.7740472002687392e-06, + "loss": 0.7366, + "step": 36542 + }, + { + "epoch": 0.7315366714210645, + "grad_norm": 1.9659186601638794, + "learning_rate": 1.7737995242033618e-06, + "loss": 0.7771, + "step": 36543 + }, + { + "epoch": 0.7315566899381928, + "grad_norm": 1.3432782888412476, + "learning_rate": 1.7735518617002373e-06, + "loss": 0.3007, + "step": 36544 + }, + { + "epoch": 0.7315767084553212, + "grad_norm": 1.231836199760437, + "learning_rate": 1.773304212760406e-06, + "loss": 0.2913, + "step": 36545 + }, + { + "epoch": 0.7315967269724495, + "grad_norm": 1.0443546772003174, + "learning_rate": 1.7730565773849078e-06, + "loss": 0.2877, + "step": 36546 + }, + { + "epoch": 0.7316167454895779, + "grad_norm": 1.8280020952224731, + "learning_rate": 1.7728089555747868e-06, + "loss": 0.7066, + "step": 36547 + }, + { + "epoch": 0.7316367640067062, + "grad_norm": 1.013070821762085, + "learning_rate": 1.7725613473310832e-06, + "loss": 0.2377, + "step": 36548 + }, + { + "epoch": 0.7316567825238346, + "grad_norm": 1.1077200174331665, + "learning_rate": 1.7723137526548362e-06, + "loss": 0.2655, + "step": 36549 + }, + { + "epoch": 0.7316768010409629, + "grad_norm": 1.230058193206787, + "learning_rate": 1.7720661715470867e-06, + "loss": 0.3349, + "step": 36550 + }, + { + "epoch": 0.7316968195580912, + "grad_norm": 1.1605221033096313, + "learning_rate": 1.771818604008876e-06, + "loss": 0.2968, + "step": 36551 + }, + { + "epoch": 0.7317168380752196, + "grad_norm": 1.094477891921997, + "learning_rate": 1.7715710500412464e-06, + "loss": 0.3155, + "step": 36552 + }, + { + "epoch": 0.7317368565923479, + "grad_norm": 1.0740227699279785, + "learning_rate": 1.771323509645238e-06, + "loss": 0.3143, + "step": 36553 + }, + { + "epoch": 0.7317568751094763, + "grad_norm": 1.2282382249832153, + "learning_rate": 1.7710759828218905e-06, + "loss": 0.2761, + "step": 36554 + }, + { + "epoch": 0.7317768936266046, + "grad_norm": 1.0912864208221436, + "learning_rate": 1.7708284695722428e-06, + "loss": 0.3069, + "step": 36555 + }, + { + "epoch": 0.731796912143733, + "grad_norm": 1.91989004611969, + "learning_rate": 1.7705809698973392e-06, + "loss": 0.7167, + "step": 36556 + }, + { + "epoch": 0.7318169306608613, + "grad_norm": 1.0098241567611694, + "learning_rate": 1.7703334837982183e-06, + "loss": 0.2552, + "step": 36557 + }, + { + "epoch": 0.7318369491779897, + "grad_norm": 1.1383744478225708, + "learning_rate": 1.7700860112759205e-06, + "loss": 0.323, + "step": 36558 + }, + { + "epoch": 0.731856967695118, + "grad_norm": 1.9447518587112427, + "learning_rate": 1.7698385523314843e-06, + "loss": 0.8206, + "step": 36559 + }, + { + "epoch": 0.7318769862122463, + "grad_norm": 1.0497666597366333, + "learning_rate": 1.7695911069659533e-06, + "loss": 0.2894, + "step": 36560 + }, + { + "epoch": 0.7318970047293747, + "grad_norm": 1.1233103275299072, + "learning_rate": 1.769343675180366e-06, + "loss": 0.2737, + "step": 36561 + }, + { + "epoch": 0.731917023246503, + "grad_norm": 1.4646941423416138, + "learning_rate": 1.7690962569757626e-06, + "loss": 0.2591, + "step": 36562 + }, + { + "epoch": 0.7319370417636314, + "grad_norm": 1.1173796653747559, + "learning_rate": 1.7688488523531817e-06, + "loss": 0.322, + "step": 36563 + }, + { + "epoch": 0.7319570602807597, + "grad_norm": 1.993216872215271, + "learning_rate": 1.7686014613136648e-06, + "loss": 0.7945, + "step": 36564 + }, + { + "epoch": 0.7319770787978881, + "grad_norm": 1.1594418287277222, + "learning_rate": 1.7683540838582537e-06, + "loss": 0.318, + "step": 36565 + }, + { + "epoch": 0.7319970973150164, + "grad_norm": 1.0669684410095215, + "learning_rate": 1.7681067199879864e-06, + "loss": 0.2628, + "step": 36566 + }, + { + "epoch": 0.7320171158321447, + "grad_norm": 1.1743923425674438, + "learning_rate": 1.7678593697039032e-06, + "loss": 0.3063, + "step": 36567 + }, + { + "epoch": 0.7320371343492731, + "grad_norm": 1.1406569480895996, + "learning_rate": 1.7676120330070418e-06, + "loss": 0.3382, + "step": 36568 + }, + { + "epoch": 0.7320571528664014, + "grad_norm": 1.0517898797988892, + "learning_rate": 1.7673647098984453e-06, + "loss": 0.2857, + "step": 36569 + }, + { + "epoch": 0.7320771713835298, + "grad_norm": 1.2169471979141235, + "learning_rate": 1.7671174003791519e-06, + "loss": 0.3164, + "step": 36570 + }, + { + "epoch": 0.7320971899006581, + "grad_norm": 1.2701512575149536, + "learning_rate": 1.7668701044502018e-06, + "loss": 0.281, + "step": 36571 + }, + { + "epoch": 0.7321172084177865, + "grad_norm": 1.1100465059280396, + "learning_rate": 1.7666228221126318e-06, + "loss": 0.2724, + "step": 36572 + }, + { + "epoch": 0.7321372269349148, + "grad_norm": 1.0657365322113037, + "learning_rate": 1.7663755533674853e-06, + "loss": 0.2798, + "step": 36573 + }, + { + "epoch": 0.7321572454520432, + "grad_norm": 1.1761618852615356, + "learning_rate": 1.7661282982158001e-06, + "loss": 0.2846, + "step": 36574 + }, + { + "epoch": 0.7321772639691715, + "grad_norm": 1.1133924722671509, + "learning_rate": 1.765881056658616e-06, + "loss": 0.2871, + "step": 36575 + }, + { + "epoch": 0.7321972824862998, + "grad_norm": 1.1211029291152954, + "learning_rate": 1.7656338286969698e-06, + "loss": 0.2765, + "step": 36576 + }, + { + "epoch": 0.7322173010034282, + "grad_norm": 1.1340930461883545, + "learning_rate": 1.765386614331903e-06, + "loss": 0.2933, + "step": 36577 + }, + { + "epoch": 0.7322373195205565, + "grad_norm": 1.0575957298278809, + "learning_rate": 1.7651394135644568e-06, + "loss": 0.2532, + "step": 36578 + }, + { + "epoch": 0.7322573380376849, + "grad_norm": 1.138647198677063, + "learning_rate": 1.7648922263956681e-06, + "loss": 0.3245, + "step": 36579 + }, + { + "epoch": 0.7322773565548132, + "grad_norm": 1.875182867050171, + "learning_rate": 1.7646450528265762e-06, + "loss": 0.8366, + "step": 36580 + }, + { + "epoch": 0.7322973750719416, + "grad_norm": 1.1951245069503784, + "learning_rate": 1.7643978928582188e-06, + "loss": 0.296, + "step": 36581 + }, + { + "epoch": 0.7323173935890699, + "grad_norm": 1.1361708641052246, + "learning_rate": 1.764150746491638e-06, + "loss": 0.3361, + "step": 36582 + }, + { + "epoch": 0.7323374121061982, + "grad_norm": 1.1391842365264893, + "learning_rate": 1.763903613727871e-06, + "loss": 0.3053, + "step": 36583 + }, + { + "epoch": 0.7323574306233266, + "grad_norm": 1.1483217477798462, + "learning_rate": 1.7636564945679569e-06, + "loss": 0.326, + "step": 36584 + }, + { + "epoch": 0.7323774491404549, + "grad_norm": 1.1775085926055908, + "learning_rate": 1.7634093890129344e-06, + "loss": 0.3279, + "step": 36585 + }, + { + "epoch": 0.7323974676575833, + "grad_norm": 1.1027880907058716, + "learning_rate": 1.7631622970638407e-06, + "loss": 0.2953, + "step": 36586 + }, + { + "epoch": 0.7324174861747116, + "grad_norm": 1.0951330661773682, + "learning_rate": 1.7629152187217174e-06, + "loss": 0.2811, + "step": 36587 + }, + { + "epoch": 0.73243750469184, + "grad_norm": 1.8330310583114624, + "learning_rate": 1.7626681539876024e-06, + "loss": 0.8002, + "step": 36588 + }, + { + "epoch": 0.7324575232089683, + "grad_norm": 1.0756216049194336, + "learning_rate": 1.7624211028625315e-06, + "loss": 0.3002, + "step": 36589 + }, + { + "epoch": 0.7324775417260967, + "grad_norm": 1.296800136566162, + "learning_rate": 1.7621740653475462e-06, + "loss": 0.3334, + "step": 36590 + }, + { + "epoch": 0.732497560243225, + "grad_norm": 1.9713503122329712, + "learning_rate": 1.7619270414436856e-06, + "loss": 0.7372, + "step": 36591 + }, + { + "epoch": 0.7325175787603533, + "grad_norm": 1.0782769918441772, + "learning_rate": 1.7616800311519872e-06, + "loss": 0.2545, + "step": 36592 + }, + { + "epoch": 0.7325375972774817, + "grad_norm": 1.1336950063705444, + "learning_rate": 1.7614330344734886e-06, + "loss": 0.3102, + "step": 36593 + }, + { + "epoch": 0.73255761579461, + "grad_norm": 1.0995354652404785, + "learning_rate": 1.7611860514092288e-06, + "loss": 0.2698, + "step": 36594 + }, + { + "epoch": 0.7325776343117384, + "grad_norm": 1.2021872997283936, + "learning_rate": 1.7609390819602446e-06, + "loss": 0.3416, + "step": 36595 + }, + { + "epoch": 0.7325976528288667, + "grad_norm": 1.1782461404800415, + "learning_rate": 1.7606921261275767e-06, + "loss": 0.3198, + "step": 36596 + }, + { + "epoch": 0.7326176713459951, + "grad_norm": 1.136061429977417, + "learning_rate": 1.7604451839122621e-06, + "loss": 0.3574, + "step": 36597 + }, + { + "epoch": 0.7326376898631234, + "grad_norm": 1.0318524837493896, + "learning_rate": 1.7601982553153385e-06, + "loss": 0.3036, + "step": 36598 + }, + { + "epoch": 0.7326577083802517, + "grad_norm": 1.2016572952270508, + "learning_rate": 1.7599513403378427e-06, + "loss": 0.2595, + "step": 36599 + }, + { + "epoch": 0.7326777268973801, + "grad_norm": 1.129059910774231, + "learning_rate": 1.759704438980816e-06, + "loss": 0.2668, + "step": 36600 + }, + { + "epoch": 0.7326977454145084, + "grad_norm": 0.969264566898346, + "learning_rate": 1.7594575512452943e-06, + "loss": 0.2776, + "step": 36601 + }, + { + "epoch": 0.7327177639316368, + "grad_norm": 1.1570481061935425, + "learning_rate": 1.7592106771323143e-06, + "loss": 0.2792, + "step": 36602 + }, + { + "epoch": 0.7327377824487651, + "grad_norm": 1.1298775672912598, + "learning_rate": 1.7589638166429152e-06, + "loss": 0.2736, + "step": 36603 + }, + { + "epoch": 0.7327578009658935, + "grad_norm": 1.0303356647491455, + "learning_rate": 1.7587169697781364e-06, + "loss": 0.2966, + "step": 36604 + }, + { + "epoch": 0.7327778194830218, + "grad_norm": 1.087050199508667, + "learning_rate": 1.7584701365390133e-06, + "loss": 0.2771, + "step": 36605 + }, + { + "epoch": 0.7327978380001502, + "grad_norm": 1.0320509672164917, + "learning_rate": 1.758223316926585e-06, + "loss": 0.2608, + "step": 36606 + }, + { + "epoch": 0.7328178565172785, + "grad_norm": 1.047710657119751, + "learning_rate": 1.7579765109418878e-06, + "loss": 0.2575, + "step": 36607 + }, + { + "epoch": 0.7328378750344068, + "grad_norm": 1.0849709510803223, + "learning_rate": 1.7577297185859583e-06, + "loss": 0.2824, + "step": 36608 + }, + { + "epoch": 0.7328578935515352, + "grad_norm": 1.030645728111267, + "learning_rate": 1.7574829398598364e-06, + "loss": 0.2817, + "step": 36609 + }, + { + "epoch": 0.7328779120686635, + "grad_norm": 1.2283819913864136, + "learning_rate": 1.7572361747645589e-06, + "loss": 0.3342, + "step": 36610 + }, + { + "epoch": 0.7328979305857919, + "grad_norm": 1.0647294521331787, + "learning_rate": 1.7569894233011625e-06, + "loss": 0.2874, + "step": 36611 + }, + { + "epoch": 0.7329179491029202, + "grad_norm": 1.0879384279251099, + "learning_rate": 1.756742685470683e-06, + "loss": 0.2846, + "step": 36612 + }, + { + "epoch": 0.7329379676200486, + "grad_norm": 1.1521382331848145, + "learning_rate": 1.7564959612741612e-06, + "loss": 0.2803, + "step": 36613 + }, + { + "epoch": 0.7329579861371769, + "grad_norm": 1.1512975692749023, + "learning_rate": 1.7562492507126323e-06, + "loss": 0.296, + "step": 36614 + }, + { + "epoch": 0.7329780046543052, + "grad_norm": 1.0354539155960083, + "learning_rate": 1.7560025537871317e-06, + "loss": 0.2896, + "step": 36615 + }, + { + "epoch": 0.7329980231714336, + "grad_norm": 1.0646933317184448, + "learning_rate": 1.7557558704987e-06, + "loss": 0.3071, + "step": 36616 + }, + { + "epoch": 0.7330180416885619, + "grad_norm": 2.1340842247009277, + "learning_rate": 1.7555092008483705e-06, + "loss": 0.7494, + "step": 36617 + }, + { + "epoch": 0.7330380602056903, + "grad_norm": 1.018604040145874, + "learning_rate": 1.755262544837184e-06, + "loss": 0.2763, + "step": 36618 + }, + { + "epoch": 0.7330580787228186, + "grad_norm": 1.1065330505371094, + "learning_rate": 1.7550159024661756e-06, + "loss": 0.3278, + "step": 36619 + }, + { + "epoch": 0.733078097239947, + "grad_norm": 1.791633129119873, + "learning_rate": 1.754769273736382e-06, + "loss": 0.7406, + "step": 36620 + }, + { + "epoch": 0.7330981157570753, + "grad_norm": 0.9522184729576111, + "learning_rate": 1.754522658648838e-06, + "loss": 0.298, + "step": 36621 + }, + { + "epoch": 0.7331181342742037, + "grad_norm": 1.2199249267578125, + "learning_rate": 1.7542760572045842e-06, + "loss": 0.3007, + "step": 36622 + }, + { + "epoch": 0.733138152791332, + "grad_norm": 1.0069583654403687, + "learning_rate": 1.754029469404655e-06, + "loss": 0.3156, + "step": 36623 + }, + { + "epoch": 0.7331581713084603, + "grad_norm": 1.1100934743881226, + "learning_rate": 1.753782895250088e-06, + "loss": 0.3088, + "step": 36624 + }, + { + "epoch": 0.7331781898255887, + "grad_norm": 1.1043877601623535, + "learning_rate": 1.7535363347419188e-06, + "loss": 0.3272, + "step": 36625 + }, + { + "epoch": 0.733198208342717, + "grad_norm": 1.1300389766693115, + "learning_rate": 1.7532897878811827e-06, + "loss": 0.3048, + "step": 36626 + }, + { + "epoch": 0.7332182268598454, + "grad_norm": 1.2137690782546997, + "learning_rate": 1.753043254668919e-06, + "loss": 0.3259, + "step": 36627 + }, + { + "epoch": 0.7332382453769737, + "grad_norm": 1.4368386268615723, + "learning_rate": 1.7527967351061615e-06, + "loss": 0.2903, + "step": 36628 + }, + { + "epoch": 0.7332582638941021, + "grad_norm": 1.3951611518859863, + "learning_rate": 1.7525502291939489e-06, + "loss": 0.3113, + "step": 36629 + }, + { + "epoch": 0.7332782824112304, + "grad_norm": 1.2530171871185303, + "learning_rate": 1.7523037369333146e-06, + "loss": 0.3076, + "step": 36630 + }, + { + "epoch": 0.7332983009283587, + "grad_norm": 1.8511362075805664, + "learning_rate": 1.7520572583252982e-06, + "loss": 0.7205, + "step": 36631 + }, + { + "epoch": 0.7333183194454871, + "grad_norm": 1.162987470626831, + "learning_rate": 1.7518107933709343e-06, + "loss": 0.2904, + "step": 36632 + }, + { + "epoch": 0.7333383379626154, + "grad_norm": 1.1681185960769653, + "learning_rate": 1.751564342071259e-06, + "loss": 0.2875, + "step": 36633 + }, + { + "epoch": 0.7333583564797438, + "grad_norm": 1.0789886713027954, + "learning_rate": 1.751317904427306e-06, + "loss": 0.2919, + "step": 36634 + }, + { + "epoch": 0.7333783749968721, + "grad_norm": 1.1792807579040527, + "learning_rate": 1.7510714804401157e-06, + "loss": 0.328, + "step": 36635 + }, + { + "epoch": 0.7333983935140005, + "grad_norm": 1.0754066705703735, + "learning_rate": 1.750825070110721e-06, + "loss": 0.2952, + "step": 36636 + }, + { + "epoch": 0.7334184120311288, + "grad_norm": 1.1698684692382812, + "learning_rate": 1.7505786734401592e-06, + "loss": 0.2872, + "step": 36637 + }, + { + "epoch": 0.7334384305482572, + "grad_norm": 1.1223064661026, + "learning_rate": 1.7503322904294646e-06, + "loss": 0.2797, + "step": 36638 + }, + { + "epoch": 0.7334584490653855, + "grad_norm": 1.0871200561523438, + "learning_rate": 1.7500859210796728e-06, + "loss": 0.2518, + "step": 36639 + }, + { + "epoch": 0.7334784675825138, + "grad_norm": 1.2397994995117188, + "learning_rate": 1.7498395653918216e-06, + "loss": 0.2883, + "step": 36640 + }, + { + "epoch": 0.7334984860996422, + "grad_norm": 1.9499748945236206, + "learning_rate": 1.7495932233669438e-06, + "loss": 0.7037, + "step": 36641 + }, + { + "epoch": 0.7335185046167705, + "grad_norm": 1.2519124746322632, + "learning_rate": 1.7493468950060782e-06, + "loss": 0.2959, + "step": 36642 + }, + { + "epoch": 0.7335385231338989, + "grad_norm": 1.0933918952941895, + "learning_rate": 1.7491005803102573e-06, + "loss": 0.3012, + "step": 36643 + }, + { + "epoch": 0.7335585416510272, + "grad_norm": 1.0926530361175537, + "learning_rate": 1.748854279280519e-06, + "loss": 0.3344, + "step": 36644 + }, + { + "epoch": 0.7335785601681556, + "grad_norm": 1.1225978136062622, + "learning_rate": 1.7486079919178982e-06, + "loss": 0.2883, + "step": 36645 + }, + { + "epoch": 0.7335985786852839, + "grad_norm": 1.1228697299957275, + "learning_rate": 1.7483617182234296e-06, + "loss": 0.3008, + "step": 36646 + }, + { + "epoch": 0.7336185972024122, + "grad_norm": 1.037489891052246, + "learning_rate": 1.7481154581981492e-06, + "loss": 0.2796, + "step": 36647 + }, + { + "epoch": 0.7336386157195406, + "grad_norm": 1.144917368888855, + "learning_rate": 1.747869211843089e-06, + "loss": 0.3245, + "step": 36648 + }, + { + "epoch": 0.7336586342366689, + "grad_norm": 1.1935445070266724, + "learning_rate": 1.7476229791592886e-06, + "loss": 0.3024, + "step": 36649 + }, + { + "epoch": 0.7336786527537973, + "grad_norm": 1.2083590030670166, + "learning_rate": 1.7473767601477814e-06, + "loss": 0.3174, + "step": 36650 + }, + { + "epoch": 0.7336986712709256, + "grad_norm": 1.0639442205429077, + "learning_rate": 1.7471305548096024e-06, + "loss": 0.2509, + "step": 36651 + }, + { + "epoch": 0.733718689788054, + "grad_norm": 1.049372673034668, + "learning_rate": 1.7468843631457848e-06, + "loss": 0.2973, + "step": 36652 + }, + { + "epoch": 0.7337387083051823, + "grad_norm": 1.0688605308532715, + "learning_rate": 1.7466381851573671e-06, + "loss": 0.3116, + "step": 36653 + }, + { + "epoch": 0.7337587268223107, + "grad_norm": 1.0293432474136353, + "learning_rate": 1.7463920208453806e-06, + "loss": 0.3456, + "step": 36654 + }, + { + "epoch": 0.733778745339439, + "grad_norm": 1.0401184558868408, + "learning_rate": 1.7461458702108636e-06, + "loss": 0.281, + "step": 36655 + }, + { + "epoch": 0.7337987638565673, + "grad_norm": 1.8732447624206543, + "learning_rate": 1.7458997332548488e-06, + "loss": 0.7968, + "step": 36656 + }, + { + "epoch": 0.7338187823736957, + "grad_norm": 1.1802486181259155, + "learning_rate": 1.7456536099783699e-06, + "loss": 0.2988, + "step": 36657 + }, + { + "epoch": 0.733838800890824, + "grad_norm": 1.2590926885604858, + "learning_rate": 1.7454075003824643e-06, + "loss": 0.3419, + "step": 36658 + }, + { + "epoch": 0.7338588194079524, + "grad_norm": 1.0322266817092896, + "learning_rate": 1.7451614044681653e-06, + "loss": 0.2807, + "step": 36659 + }, + { + "epoch": 0.7338788379250807, + "grad_norm": 1.1395584344863892, + "learning_rate": 1.7449153222365074e-06, + "loss": 0.287, + "step": 36660 + }, + { + "epoch": 0.7338988564422091, + "grad_norm": 1.0892274379730225, + "learning_rate": 1.744669253688523e-06, + "loss": 0.327, + "step": 36661 + }, + { + "epoch": 0.7339188749593374, + "grad_norm": 1.1072343587875366, + "learning_rate": 1.7444231988252503e-06, + "loss": 0.2645, + "step": 36662 + }, + { + "epoch": 0.7339388934764657, + "grad_norm": 2.0200209617614746, + "learning_rate": 1.7441771576477219e-06, + "loss": 0.7128, + "step": 36663 + }, + { + "epoch": 0.7339589119935941, + "grad_norm": 1.0801386833190918, + "learning_rate": 1.7439311301569721e-06, + "loss": 0.3179, + "step": 36664 + }, + { + "epoch": 0.7339789305107224, + "grad_norm": 1.1484911441802979, + "learning_rate": 1.7436851163540337e-06, + "loss": 0.2684, + "step": 36665 + }, + { + "epoch": 0.7339989490278508, + "grad_norm": 1.0836009979248047, + "learning_rate": 1.7434391162399434e-06, + "loss": 0.3061, + "step": 36666 + }, + { + "epoch": 0.7340189675449791, + "grad_norm": 1.071597933769226, + "learning_rate": 1.7431931298157328e-06, + "loss": 0.2978, + "step": 36667 + }, + { + "epoch": 0.7340389860621075, + "grad_norm": 1.8342961072921753, + "learning_rate": 1.7429471570824391e-06, + "loss": 0.8245, + "step": 36668 + }, + { + "epoch": 0.7340590045792358, + "grad_norm": 1.1631046533584595, + "learning_rate": 1.7427011980410946e-06, + "loss": 0.2431, + "step": 36669 + }, + { + "epoch": 0.7340790230963642, + "grad_norm": 1.1150654554367065, + "learning_rate": 1.7424552526927318e-06, + "loss": 0.232, + "step": 36670 + }, + { + "epoch": 0.7340990416134925, + "grad_norm": 1.1786998510360718, + "learning_rate": 1.7422093210383878e-06, + "loss": 0.3457, + "step": 36671 + }, + { + "epoch": 0.7341190601306208, + "grad_norm": 2.251322031021118, + "learning_rate": 1.7419634030790944e-06, + "loss": 0.82, + "step": 36672 + }, + { + "epoch": 0.7341390786477492, + "grad_norm": 1.8883882761001587, + "learning_rate": 1.7417174988158858e-06, + "loss": 0.7487, + "step": 36673 + }, + { + "epoch": 0.7341590971648775, + "grad_norm": 1.3061751127243042, + "learning_rate": 1.7414716082497946e-06, + "loss": 0.3293, + "step": 36674 + }, + { + "epoch": 0.7341791156820059, + "grad_norm": 2.1048173904418945, + "learning_rate": 1.7412257313818564e-06, + "loss": 0.7033, + "step": 36675 + }, + { + "epoch": 0.7341991341991342, + "grad_norm": 1.0837209224700928, + "learning_rate": 1.7409798682131041e-06, + "loss": 0.2929, + "step": 36676 + }, + { + "epoch": 0.7342191527162626, + "grad_norm": 1.1061545610427856, + "learning_rate": 1.7407340187445715e-06, + "loss": 0.2893, + "step": 36677 + }, + { + "epoch": 0.7342391712333909, + "grad_norm": 1.0637823343276978, + "learning_rate": 1.7404881829772897e-06, + "loss": 0.2858, + "step": 36678 + }, + { + "epoch": 0.7342591897505192, + "grad_norm": 1.9812856912612915, + "learning_rate": 1.7402423609122954e-06, + "loss": 0.7462, + "step": 36679 + }, + { + "epoch": 0.7342792082676476, + "grad_norm": 1.2019100189208984, + "learning_rate": 1.7399965525506195e-06, + "loss": 0.3038, + "step": 36680 + }, + { + "epoch": 0.7342992267847759, + "grad_norm": 1.2214272022247314, + "learning_rate": 1.7397507578932981e-06, + "loss": 0.3192, + "step": 36681 + }, + { + "epoch": 0.7343192453019043, + "grad_norm": 1.103400468826294, + "learning_rate": 1.739504976941363e-06, + "loss": 0.2951, + "step": 36682 + }, + { + "epoch": 0.7343392638190326, + "grad_norm": 1.0387178659439087, + "learning_rate": 1.7392592096958456e-06, + "loss": 0.2523, + "step": 36683 + }, + { + "epoch": 0.734359282336161, + "grad_norm": 1.0938948392868042, + "learning_rate": 1.739013456157782e-06, + "loss": 0.317, + "step": 36684 + }, + { + "epoch": 0.7343793008532893, + "grad_norm": 1.0756287574768066, + "learning_rate": 1.7387677163282047e-06, + "loss": 0.3102, + "step": 36685 + }, + { + "epoch": 0.7343993193704177, + "grad_norm": 1.03836190700531, + "learning_rate": 1.7385219902081457e-06, + "loss": 0.2858, + "step": 36686 + }, + { + "epoch": 0.734419337887546, + "grad_norm": 1.035760521888733, + "learning_rate": 1.7382762777986373e-06, + "loss": 0.2583, + "step": 36687 + }, + { + "epoch": 0.7344393564046743, + "grad_norm": 1.1644684076309204, + "learning_rate": 1.7380305791007145e-06, + "loss": 0.2776, + "step": 36688 + }, + { + "epoch": 0.7344593749218027, + "grad_norm": 1.1746621131896973, + "learning_rate": 1.737784894115409e-06, + "loss": 0.3411, + "step": 36689 + }, + { + "epoch": 0.734479393438931, + "grad_norm": 1.8128912448883057, + "learning_rate": 1.7375392228437544e-06, + "loss": 0.7148, + "step": 36690 + }, + { + "epoch": 0.7344994119560594, + "grad_norm": 1.1263861656188965, + "learning_rate": 1.7372935652867811e-06, + "loss": 0.3489, + "step": 36691 + }, + { + "epoch": 0.7345194304731877, + "grad_norm": 1.1358246803283691, + "learning_rate": 1.7370479214455249e-06, + "loss": 0.3009, + "step": 36692 + }, + { + "epoch": 0.7345394489903161, + "grad_norm": 1.1393741369247437, + "learning_rate": 1.7368022913210153e-06, + "loss": 0.3211, + "step": 36693 + }, + { + "epoch": 0.7345594675074444, + "grad_norm": 1.0895206928253174, + "learning_rate": 1.7365566749142886e-06, + "loss": 0.2775, + "step": 36694 + }, + { + "epoch": 0.7345794860245727, + "grad_norm": 1.031213402748108, + "learning_rate": 1.7363110722263748e-06, + "loss": 0.2814, + "step": 36695 + }, + { + "epoch": 0.7345995045417011, + "grad_norm": 1.0689202547073364, + "learning_rate": 1.7360654832583057e-06, + "loss": 0.315, + "step": 36696 + }, + { + "epoch": 0.7346195230588294, + "grad_norm": 1.0085374116897583, + "learning_rate": 1.7358199080111166e-06, + "loss": 0.3118, + "step": 36697 + }, + { + "epoch": 0.7346395415759578, + "grad_norm": 1.3319694995880127, + "learning_rate": 1.7355743464858383e-06, + "loss": 0.2899, + "step": 36698 + }, + { + "epoch": 0.7346595600930861, + "grad_norm": 1.1784050464630127, + "learning_rate": 1.7353287986835026e-06, + "loss": 0.3034, + "step": 36699 + }, + { + "epoch": 0.7346795786102145, + "grad_norm": 1.130921721458435, + "learning_rate": 1.735083264605142e-06, + "loss": 0.3161, + "step": 36700 + }, + { + "epoch": 0.7346995971273428, + "grad_norm": 1.162737250328064, + "learning_rate": 1.7348377442517871e-06, + "loss": 0.34, + "step": 36701 + }, + { + "epoch": 0.7347196156444712, + "grad_norm": 1.108690857887268, + "learning_rate": 1.7345922376244733e-06, + "loss": 0.2652, + "step": 36702 + }, + { + "epoch": 0.7347396341615995, + "grad_norm": 1.7073074579238892, + "learning_rate": 1.7343467447242312e-06, + "loss": 0.727, + "step": 36703 + }, + { + "epoch": 0.7347596526787278, + "grad_norm": 1.0579307079315186, + "learning_rate": 1.734101265552091e-06, + "loss": 0.2982, + "step": 36704 + }, + { + "epoch": 0.7347796711958562, + "grad_norm": 1.1610534191131592, + "learning_rate": 1.7338558001090877e-06, + "loss": 0.3042, + "step": 36705 + }, + { + "epoch": 0.7347996897129845, + "grad_norm": 1.0770413875579834, + "learning_rate": 1.7336103483962501e-06, + "loss": 0.2947, + "step": 36706 + }, + { + "epoch": 0.7348197082301129, + "grad_norm": 1.0004724264144897, + "learning_rate": 1.7333649104146133e-06, + "loss": 0.2833, + "step": 36707 + }, + { + "epoch": 0.7348397267472412, + "grad_norm": 1.123145341873169, + "learning_rate": 1.7331194861652078e-06, + "loss": 0.2994, + "step": 36708 + }, + { + "epoch": 0.7348597452643696, + "grad_norm": 1.1709301471710205, + "learning_rate": 1.7328740756490647e-06, + "loss": 0.2864, + "step": 36709 + }, + { + "epoch": 0.7348797637814979, + "grad_norm": 1.058824062347412, + "learning_rate": 1.7326286788672141e-06, + "loss": 0.2657, + "step": 36710 + }, + { + "epoch": 0.7348997822986262, + "grad_norm": 1.0848625898361206, + "learning_rate": 1.732383295820691e-06, + "loss": 0.2837, + "step": 36711 + }, + { + "epoch": 0.7349198008157546, + "grad_norm": 1.2489352226257324, + "learning_rate": 1.7321379265105254e-06, + "loss": 0.2936, + "step": 36712 + }, + { + "epoch": 0.7349398193328829, + "grad_norm": 1.0841729640960693, + "learning_rate": 1.7318925709377487e-06, + "loss": 0.2838, + "step": 36713 + }, + { + "epoch": 0.7349598378500113, + "grad_norm": 1.2273750305175781, + "learning_rate": 1.731647229103391e-06, + "loss": 0.3596, + "step": 36714 + }, + { + "epoch": 0.7349798563671396, + "grad_norm": 1.0593384504318237, + "learning_rate": 1.7314019010084865e-06, + "loss": 0.2538, + "step": 36715 + }, + { + "epoch": 0.734999874884268, + "grad_norm": 1.3119475841522217, + "learning_rate": 1.7311565866540648e-06, + "loss": 0.3242, + "step": 36716 + }, + { + "epoch": 0.7350198934013963, + "grad_norm": 1.1209399700164795, + "learning_rate": 1.7309112860411553e-06, + "loss": 0.3053, + "step": 36717 + }, + { + "epoch": 0.7350399119185246, + "grad_norm": 1.9097980260849, + "learning_rate": 1.7306659991707936e-06, + "loss": 0.7452, + "step": 36718 + }, + { + "epoch": 0.735059930435653, + "grad_norm": 1.1799978017807007, + "learning_rate": 1.7304207260440065e-06, + "loss": 0.3044, + "step": 36719 + }, + { + "epoch": 0.7350799489527813, + "grad_norm": 1.1504583358764648, + "learning_rate": 1.7301754666618287e-06, + "loss": 0.3663, + "step": 36720 + }, + { + "epoch": 0.7350999674699097, + "grad_norm": 1.2649694681167603, + "learning_rate": 1.72993022102529e-06, + "loss": 0.3361, + "step": 36721 + }, + { + "epoch": 0.735119985987038, + "grad_norm": 1.1048024892807007, + "learning_rate": 1.7296849891354207e-06, + "loss": 0.2923, + "step": 36722 + }, + { + "epoch": 0.7351400045041664, + "grad_norm": 1.009856939315796, + "learning_rate": 1.7294397709932499e-06, + "loss": 0.2748, + "step": 36723 + }, + { + "epoch": 0.7351600230212947, + "grad_norm": 1.9029276371002197, + "learning_rate": 1.7291945665998123e-06, + "loss": 0.8528, + "step": 36724 + }, + { + "epoch": 0.7351800415384231, + "grad_norm": 1.2317962646484375, + "learning_rate": 1.728949375956137e-06, + "loss": 0.305, + "step": 36725 + }, + { + "epoch": 0.7352000600555514, + "grad_norm": 0.968998372554779, + "learning_rate": 1.7287041990632542e-06, + "loss": 0.2962, + "step": 36726 + }, + { + "epoch": 0.7352200785726797, + "grad_norm": 1.103670358657837, + "learning_rate": 1.7284590359221936e-06, + "loss": 0.3235, + "step": 36727 + }, + { + "epoch": 0.7352400970898081, + "grad_norm": 1.0424604415893555, + "learning_rate": 1.7282138865339886e-06, + "loss": 0.2832, + "step": 36728 + }, + { + "epoch": 0.7352601156069364, + "grad_norm": 1.1210107803344727, + "learning_rate": 1.727968750899669e-06, + "loss": 0.2947, + "step": 36729 + }, + { + "epoch": 0.7352801341240648, + "grad_norm": 1.1085957288742065, + "learning_rate": 1.7277236290202626e-06, + "loss": 0.3273, + "step": 36730 + }, + { + "epoch": 0.7353001526411931, + "grad_norm": 1.1297602653503418, + "learning_rate": 1.7274785208968031e-06, + "loss": 0.3071, + "step": 36731 + }, + { + "epoch": 0.7353201711583215, + "grad_norm": 1.1831878423690796, + "learning_rate": 1.7272334265303187e-06, + "loss": 0.3248, + "step": 36732 + }, + { + "epoch": 0.7353401896754498, + "grad_norm": 1.0740125179290771, + "learning_rate": 1.7269883459218417e-06, + "loss": 0.3245, + "step": 36733 + }, + { + "epoch": 0.7353602081925781, + "grad_norm": 1.2602587938308716, + "learning_rate": 1.7267432790724015e-06, + "loss": 0.2912, + "step": 36734 + }, + { + "epoch": 0.7353802267097065, + "grad_norm": 1.191226840019226, + "learning_rate": 1.7264982259830282e-06, + "loss": 0.2941, + "step": 36735 + }, + { + "epoch": 0.7354002452268348, + "grad_norm": 1.0909950733184814, + "learning_rate": 1.7262531866547504e-06, + "loss": 0.293, + "step": 36736 + }, + { + "epoch": 0.7354202637439632, + "grad_norm": 1.0381675958633423, + "learning_rate": 1.7260081610886008e-06, + "loss": 0.3099, + "step": 36737 + }, + { + "epoch": 0.7354402822610915, + "grad_norm": 1.0724961757659912, + "learning_rate": 1.7257631492856092e-06, + "loss": 0.2989, + "step": 36738 + }, + { + "epoch": 0.7354603007782199, + "grad_norm": 1.151284098625183, + "learning_rate": 1.725518151246804e-06, + "loss": 0.2634, + "step": 36739 + }, + { + "epoch": 0.7354803192953482, + "grad_norm": 1.0391159057617188, + "learning_rate": 1.725273166973216e-06, + "loss": 0.2695, + "step": 36740 + }, + { + "epoch": 0.7355003378124766, + "grad_norm": 1.0953495502471924, + "learning_rate": 1.7250281964658732e-06, + "loss": 0.2741, + "step": 36741 + }, + { + "epoch": 0.7355203563296049, + "grad_norm": 1.1286622285842896, + "learning_rate": 1.724783239725809e-06, + "loss": 0.3323, + "step": 36742 + }, + { + "epoch": 0.7355403748467332, + "grad_norm": 1.1941500902175903, + "learning_rate": 1.7245382967540492e-06, + "loss": 0.2789, + "step": 36743 + }, + { + "epoch": 0.7355603933638616, + "grad_norm": 1.8542059659957886, + "learning_rate": 1.7242933675516276e-06, + "loss": 0.8229, + "step": 36744 + }, + { + "epoch": 0.7355804118809899, + "grad_norm": 1.9541102647781372, + "learning_rate": 1.7240484521195698e-06, + "loss": 0.7998, + "step": 36745 + }, + { + "epoch": 0.7356004303981183, + "grad_norm": 1.1014485359191895, + "learning_rate": 1.723803550458909e-06, + "loss": 0.336, + "step": 36746 + }, + { + "epoch": 0.7356204489152466, + "grad_norm": 1.1299545764923096, + "learning_rate": 1.7235586625706729e-06, + "loss": 0.2974, + "step": 36747 + }, + { + "epoch": 0.735640467432375, + "grad_norm": 1.1219239234924316, + "learning_rate": 1.7233137884558908e-06, + "loss": 0.3239, + "step": 36748 + }, + { + "epoch": 0.7356604859495033, + "grad_norm": 1.072500228881836, + "learning_rate": 1.7230689281155916e-06, + "loss": 0.2972, + "step": 36749 + }, + { + "epoch": 0.7356805044666316, + "grad_norm": 1.0445106029510498, + "learning_rate": 1.7228240815508063e-06, + "loss": 0.262, + "step": 36750 + }, + { + "epoch": 0.73570052298376, + "grad_norm": 1.0291755199432373, + "learning_rate": 1.7225792487625636e-06, + "loss": 0.2954, + "step": 36751 + }, + { + "epoch": 0.7357205415008883, + "grad_norm": 1.056307077407837, + "learning_rate": 1.722334429751893e-06, + "loss": 0.302, + "step": 36752 + }, + { + "epoch": 0.7357405600180167, + "grad_norm": 1.062807321548462, + "learning_rate": 1.722089624519822e-06, + "loss": 0.2761, + "step": 36753 + }, + { + "epoch": 0.735760578535145, + "grad_norm": 1.3078200817108154, + "learning_rate": 1.7218448330673799e-06, + "loss": 0.3413, + "step": 36754 + }, + { + "epoch": 0.7357805970522734, + "grad_norm": 1.0886260271072388, + "learning_rate": 1.7216000553955981e-06, + "loss": 0.3333, + "step": 36755 + }, + { + "epoch": 0.7358006155694017, + "grad_norm": 1.1999329328536987, + "learning_rate": 1.7213552915055026e-06, + "loss": 0.2564, + "step": 36756 + }, + { + "epoch": 0.7358206340865301, + "grad_norm": 0.9589716792106628, + "learning_rate": 1.7211105413981255e-06, + "loss": 0.2527, + "step": 36757 + }, + { + "epoch": 0.7358406526036584, + "grad_norm": 1.2396456003189087, + "learning_rate": 1.720865805074492e-06, + "loss": 0.3039, + "step": 36758 + }, + { + "epoch": 0.7358606711207867, + "grad_norm": 1.0848352909088135, + "learning_rate": 1.7206210825356352e-06, + "loss": 0.2957, + "step": 36759 + }, + { + "epoch": 0.7358806896379151, + "grad_norm": 0.9695229530334473, + "learning_rate": 1.7203763737825812e-06, + "loss": 0.2431, + "step": 36760 + }, + { + "epoch": 0.7359007081550434, + "grad_norm": 1.1917756795883179, + "learning_rate": 1.7201316788163598e-06, + "loss": 0.2888, + "step": 36761 + }, + { + "epoch": 0.7359207266721718, + "grad_norm": 1.144872784614563, + "learning_rate": 1.7198869976379983e-06, + "loss": 0.2908, + "step": 36762 + }, + { + "epoch": 0.7359407451893001, + "grad_norm": 1.2065279483795166, + "learning_rate": 1.7196423302485244e-06, + "loss": 0.3268, + "step": 36763 + }, + { + "epoch": 0.7359607637064285, + "grad_norm": 1.0677541494369507, + "learning_rate": 1.71939767664897e-06, + "loss": 0.2641, + "step": 36764 + }, + { + "epoch": 0.7359807822235568, + "grad_norm": 1.1154111623764038, + "learning_rate": 1.719153036840362e-06, + "loss": 0.2726, + "step": 36765 + }, + { + "epoch": 0.7360008007406851, + "grad_norm": 1.1135221719741821, + "learning_rate": 1.718908410823728e-06, + "loss": 0.313, + "step": 36766 + }, + { + "epoch": 0.7360208192578135, + "grad_norm": 1.0543062686920166, + "learning_rate": 1.7186637986000959e-06, + "loss": 0.2927, + "step": 36767 + }, + { + "epoch": 0.7360408377749418, + "grad_norm": 1.1525222063064575, + "learning_rate": 1.7184192001704964e-06, + "loss": 0.2829, + "step": 36768 + }, + { + "epoch": 0.7360608562920702, + "grad_norm": 1.1728477478027344, + "learning_rate": 1.7181746155359546e-06, + "loss": 0.3182, + "step": 36769 + }, + { + "epoch": 0.7360808748091985, + "grad_norm": 1.1991550922393799, + "learning_rate": 1.7179300446975022e-06, + "loss": 0.257, + "step": 36770 + }, + { + "epoch": 0.7361008933263269, + "grad_norm": 1.1682006120681763, + "learning_rate": 1.717685487656166e-06, + "loss": 0.3198, + "step": 36771 + }, + { + "epoch": 0.7361209118434552, + "grad_norm": 1.1755475997924805, + "learning_rate": 1.7174409444129713e-06, + "loss": 0.3128, + "step": 36772 + }, + { + "epoch": 0.7361409303605836, + "grad_norm": 1.129210114479065, + "learning_rate": 1.7171964149689508e-06, + "loss": 0.3126, + "step": 36773 + }, + { + "epoch": 0.7361609488777119, + "grad_norm": 1.0620489120483398, + "learning_rate": 1.71695189932513e-06, + "loss": 0.2855, + "step": 36774 + }, + { + "epoch": 0.7361809673948402, + "grad_norm": 1.0469456911087036, + "learning_rate": 1.7167073974825365e-06, + "loss": 0.3171, + "step": 36775 + }, + { + "epoch": 0.7362009859119686, + "grad_norm": 1.1567418575286865, + "learning_rate": 1.716462909442197e-06, + "loss": 0.3074, + "step": 36776 + }, + { + "epoch": 0.7362210044290969, + "grad_norm": 1.1421290636062622, + "learning_rate": 1.7162184352051425e-06, + "loss": 0.2759, + "step": 36777 + }, + { + "epoch": 0.7362410229462253, + "grad_norm": 1.2420103549957275, + "learning_rate": 1.7159739747723992e-06, + "loss": 0.3003, + "step": 36778 + }, + { + "epoch": 0.7362610414633536, + "grad_norm": 1.0748966932296753, + "learning_rate": 1.7157295281449944e-06, + "loss": 0.2834, + "step": 36779 + }, + { + "epoch": 0.736281059980482, + "grad_norm": 1.1065125465393066, + "learning_rate": 1.7154850953239539e-06, + "loss": 0.2703, + "step": 36780 + }, + { + "epoch": 0.7363010784976103, + "grad_norm": 1.0222547054290771, + "learning_rate": 1.7152406763103091e-06, + "loss": 0.256, + "step": 36781 + }, + { + "epoch": 0.7363210970147386, + "grad_norm": 1.0793919563293457, + "learning_rate": 1.7149962711050844e-06, + "loss": 0.2938, + "step": 36782 + }, + { + "epoch": 0.736341115531867, + "grad_norm": 1.0968279838562012, + "learning_rate": 1.7147518797093093e-06, + "loss": 0.3116, + "step": 36783 + }, + { + "epoch": 0.7363611340489953, + "grad_norm": 1.1332720518112183, + "learning_rate": 1.7145075021240109e-06, + "loss": 0.3045, + "step": 36784 + }, + { + "epoch": 0.7363811525661237, + "grad_norm": 1.7972370386123657, + "learning_rate": 1.7142631383502146e-06, + "loss": 0.8656, + "step": 36785 + }, + { + "epoch": 0.736401171083252, + "grad_norm": 1.6485540866851807, + "learning_rate": 1.7140187883889497e-06, + "loss": 0.3737, + "step": 36786 + }, + { + "epoch": 0.7364211896003804, + "grad_norm": 1.280320167541504, + "learning_rate": 1.7137744522412431e-06, + "loss": 0.3087, + "step": 36787 + }, + { + "epoch": 0.7364412081175087, + "grad_norm": 1.1036796569824219, + "learning_rate": 1.7135301299081215e-06, + "loss": 0.2675, + "step": 36788 + }, + { + "epoch": 0.7364612266346371, + "grad_norm": 1.0460785627365112, + "learning_rate": 1.7132858213906106e-06, + "loss": 0.3005, + "step": 36789 + }, + { + "epoch": 0.7364812451517654, + "grad_norm": 1.1896158456802368, + "learning_rate": 1.7130415266897398e-06, + "loss": 0.2759, + "step": 36790 + }, + { + "epoch": 0.7365012636688937, + "grad_norm": 1.071374773979187, + "learning_rate": 1.7127972458065357e-06, + "loss": 0.2544, + "step": 36791 + }, + { + "epoch": 0.7365212821860221, + "grad_norm": 2.0184645652770996, + "learning_rate": 1.7125529787420242e-06, + "loss": 0.8071, + "step": 36792 + }, + { + "epoch": 0.7365413007031504, + "grad_norm": 1.1444268226623535, + "learning_rate": 1.7123087254972309e-06, + "loss": 0.3158, + "step": 36793 + }, + { + "epoch": 0.7365613192202788, + "grad_norm": 1.1498924493789673, + "learning_rate": 1.712064486073186e-06, + "loss": 0.3081, + "step": 36794 + }, + { + "epoch": 0.7365813377374071, + "grad_norm": 1.1017495393753052, + "learning_rate": 1.7118202604709127e-06, + "loss": 0.3155, + "step": 36795 + }, + { + "epoch": 0.7366013562545355, + "grad_norm": 1.0138156414031982, + "learning_rate": 1.7115760486914412e-06, + "loss": 0.2542, + "step": 36796 + }, + { + "epoch": 0.7366213747716638, + "grad_norm": 1.0884556770324707, + "learning_rate": 1.7113318507357963e-06, + "loss": 0.3314, + "step": 36797 + }, + { + "epoch": 0.7366413932887921, + "grad_norm": 1.0779118537902832, + "learning_rate": 1.7110876666050026e-06, + "loss": 0.3209, + "step": 36798 + }, + { + "epoch": 0.7366614118059205, + "grad_norm": 1.00595223903656, + "learning_rate": 1.7108434963000902e-06, + "loss": 0.3185, + "step": 36799 + }, + { + "epoch": 0.7366814303230488, + "grad_norm": 1.223308801651001, + "learning_rate": 1.7105993398220845e-06, + "loss": 0.3426, + "step": 36800 + }, + { + "epoch": 0.7367014488401772, + "grad_norm": 2.03210711479187, + "learning_rate": 1.7103551971720105e-06, + "loss": 0.7617, + "step": 36801 + }, + { + "epoch": 0.7367214673573055, + "grad_norm": 1.1289424896240234, + "learning_rate": 1.710111068350896e-06, + "loss": 0.235, + "step": 36802 + }, + { + "epoch": 0.7367414858744339, + "grad_norm": 1.1216174364089966, + "learning_rate": 1.7098669533597645e-06, + "loss": 0.2892, + "step": 36803 + }, + { + "epoch": 0.7367615043915622, + "grad_norm": 1.1623029708862305, + "learning_rate": 1.709622852199646e-06, + "loss": 0.3033, + "step": 36804 + }, + { + "epoch": 0.7367815229086906, + "grad_norm": 1.1520671844482422, + "learning_rate": 1.7093787648715649e-06, + "loss": 0.3007, + "step": 36805 + }, + { + "epoch": 0.7368015414258189, + "grad_norm": 1.1272870302200317, + "learning_rate": 1.709134691376546e-06, + "loss": 0.3076, + "step": 36806 + }, + { + "epoch": 0.7368215599429472, + "grad_norm": 1.8596627712249756, + "learning_rate": 1.708890631715618e-06, + "loss": 0.7389, + "step": 36807 + }, + { + "epoch": 0.7368415784600756, + "grad_norm": 1.1736571788787842, + "learning_rate": 1.7086465858898038e-06, + "loss": 0.3076, + "step": 36808 + }, + { + "epoch": 0.7368615969772039, + "grad_norm": 1.4103350639343262, + "learning_rate": 1.708402553900133e-06, + "loss": 0.3332, + "step": 36809 + }, + { + "epoch": 0.7368816154943323, + "grad_norm": 1.1540145874023438, + "learning_rate": 1.7081585357476294e-06, + "loss": 0.2805, + "step": 36810 + }, + { + "epoch": 0.7369016340114606, + "grad_norm": 1.0686620473861694, + "learning_rate": 1.7079145314333174e-06, + "loss": 0.2671, + "step": 36811 + }, + { + "epoch": 0.736921652528589, + "grad_norm": 1.2383395433425903, + "learning_rate": 1.7076705409582255e-06, + "loss": 0.3494, + "step": 36812 + }, + { + "epoch": 0.7369416710457173, + "grad_norm": 1.155916690826416, + "learning_rate": 1.7074265643233783e-06, + "loss": 0.266, + "step": 36813 + }, + { + "epoch": 0.7369616895628456, + "grad_norm": 0.9672959446907043, + "learning_rate": 1.707182601529801e-06, + "loss": 0.2508, + "step": 36814 + }, + { + "epoch": 0.736981708079974, + "grad_norm": 1.11680269241333, + "learning_rate": 1.7069386525785198e-06, + "loss": 0.2906, + "step": 36815 + }, + { + "epoch": 0.7370017265971023, + "grad_norm": 1.0734394788742065, + "learning_rate": 1.7066947174705585e-06, + "loss": 0.3452, + "step": 36816 + }, + { + "epoch": 0.7370217451142307, + "grad_norm": 1.019754409790039, + "learning_rate": 1.7064507962069449e-06, + "loss": 0.309, + "step": 36817 + }, + { + "epoch": 0.737041763631359, + "grad_norm": 1.1212078332901, + "learning_rate": 1.7062068887887034e-06, + "loss": 0.2742, + "step": 36818 + }, + { + "epoch": 0.7370617821484874, + "grad_norm": 1.0953655242919922, + "learning_rate": 1.7059629952168578e-06, + "loss": 0.2941, + "step": 36819 + }, + { + "epoch": 0.7370818006656157, + "grad_norm": 1.9697355031967163, + "learning_rate": 1.7057191154924368e-06, + "loss": 0.7051, + "step": 36820 + }, + { + "epoch": 0.7371018191827441, + "grad_norm": 1.2003931999206543, + "learning_rate": 1.7054752496164618e-06, + "loss": 0.3059, + "step": 36821 + }, + { + "epoch": 0.7371218376998724, + "grad_norm": 1.0213661193847656, + "learning_rate": 1.7052313975899615e-06, + "loss": 0.2676, + "step": 36822 + }, + { + "epoch": 0.7371418562170007, + "grad_norm": 1.2284575700759888, + "learning_rate": 1.7049875594139593e-06, + "loss": 0.2684, + "step": 36823 + }, + { + "epoch": 0.7371618747341291, + "grad_norm": 1.0796648263931274, + "learning_rate": 1.7047437350894807e-06, + "loss": 0.307, + "step": 36824 + }, + { + "epoch": 0.7371818932512574, + "grad_norm": 1.8326717615127563, + "learning_rate": 1.704499924617548e-06, + "loss": 0.7488, + "step": 36825 + }, + { + "epoch": 0.7372019117683858, + "grad_norm": 1.4171503782272339, + "learning_rate": 1.7042561279991905e-06, + "loss": 0.3346, + "step": 36826 + }, + { + "epoch": 0.7372219302855141, + "grad_norm": 1.0487632751464844, + "learning_rate": 1.7040123452354306e-06, + "loss": 0.3315, + "step": 36827 + }, + { + "epoch": 0.7372419488026425, + "grad_norm": 1.0546952486038208, + "learning_rate": 1.7037685763272942e-06, + "loss": 0.2241, + "step": 36828 + }, + { + "epoch": 0.7372619673197708, + "grad_norm": 1.0858261585235596, + "learning_rate": 1.7035248212758033e-06, + "loss": 0.2893, + "step": 36829 + }, + { + "epoch": 0.7372819858368991, + "grad_norm": 1.183610439300537, + "learning_rate": 1.703281080081986e-06, + "loss": 0.2749, + "step": 36830 + }, + { + "epoch": 0.7373020043540275, + "grad_norm": 1.326788067817688, + "learning_rate": 1.7030373527468662e-06, + "loss": 0.2849, + "step": 36831 + }, + { + "epoch": 0.7373220228711558, + "grad_norm": 1.2830008268356323, + "learning_rate": 1.7027936392714655e-06, + "loss": 0.261, + "step": 36832 + }, + { + "epoch": 0.7373420413882842, + "grad_norm": 1.0521390438079834, + "learning_rate": 1.7025499396568129e-06, + "loss": 0.2713, + "step": 36833 + }, + { + "epoch": 0.7373620599054125, + "grad_norm": 1.074689507484436, + "learning_rate": 1.7023062539039293e-06, + "loss": 0.2691, + "step": 36834 + }, + { + "epoch": 0.7373820784225409, + "grad_norm": 1.1995837688446045, + "learning_rate": 1.702062582013842e-06, + "loss": 0.2783, + "step": 36835 + }, + { + "epoch": 0.7374020969396692, + "grad_norm": 1.0777390003204346, + "learning_rate": 1.7018189239875738e-06, + "loss": 0.2933, + "step": 36836 + }, + { + "epoch": 0.7374221154567976, + "grad_norm": 0.9940654039382935, + "learning_rate": 1.7015752798261487e-06, + "loss": 0.2308, + "step": 36837 + }, + { + "epoch": 0.7374421339739259, + "grad_norm": 1.081737756729126, + "learning_rate": 1.7013316495305898e-06, + "loss": 0.3462, + "step": 36838 + }, + { + "epoch": 0.7374621524910542, + "grad_norm": 1.239875078201294, + "learning_rate": 1.7010880331019248e-06, + "loss": 0.3528, + "step": 36839 + }, + { + "epoch": 0.7374821710081826, + "grad_norm": 1.1283146142959595, + "learning_rate": 1.7008444305411754e-06, + "loss": 0.3354, + "step": 36840 + }, + { + "epoch": 0.7375021895253109, + "grad_norm": 2.0321943759918213, + "learning_rate": 1.7006008418493664e-06, + "loss": 0.8195, + "step": 36841 + }, + { + "epoch": 0.7375222080424393, + "grad_norm": 0.9790890216827393, + "learning_rate": 1.7003572670275192e-06, + "loss": 0.2806, + "step": 36842 + }, + { + "epoch": 0.7375422265595676, + "grad_norm": 1.0057096481323242, + "learning_rate": 1.700113706076662e-06, + "loss": 0.2762, + "step": 36843 + }, + { + "epoch": 0.737562245076696, + "grad_norm": 1.0512222051620483, + "learning_rate": 1.6998701589978166e-06, + "loss": 0.2874, + "step": 36844 + }, + { + "epoch": 0.7375822635938243, + "grad_norm": 1.264396071434021, + "learning_rate": 1.6996266257920052e-06, + "loss": 0.3244, + "step": 36845 + }, + { + "epoch": 0.7376022821109526, + "grad_norm": 1.0219677686691284, + "learning_rate": 1.6993831064602552e-06, + "loss": 0.285, + "step": 36846 + }, + { + "epoch": 0.737622300628081, + "grad_norm": 1.1936527490615845, + "learning_rate": 1.699139601003586e-06, + "loss": 0.3163, + "step": 36847 + }, + { + "epoch": 0.7376423191452093, + "grad_norm": 1.109696626663208, + "learning_rate": 1.6988961094230255e-06, + "loss": 0.2981, + "step": 36848 + }, + { + "epoch": 0.7376623376623377, + "grad_norm": 0.9434003233909607, + "learning_rate": 1.6986526317195955e-06, + "loss": 0.2713, + "step": 36849 + }, + { + "epoch": 0.737682356179466, + "grad_norm": 1.084251046180725, + "learning_rate": 1.6984091678943192e-06, + "loss": 0.3105, + "step": 36850 + }, + { + "epoch": 0.7377023746965944, + "grad_norm": 1.100950837135315, + "learning_rate": 1.6981657179482192e-06, + "loss": 0.2767, + "step": 36851 + }, + { + "epoch": 0.7377223932137227, + "grad_norm": 1.10985267162323, + "learning_rate": 1.697922281882321e-06, + "loss": 0.3081, + "step": 36852 + }, + { + "epoch": 0.7377424117308511, + "grad_norm": 1.1261229515075684, + "learning_rate": 1.6976788596976468e-06, + "loss": 0.2865, + "step": 36853 + }, + { + "epoch": 0.7377624302479794, + "grad_norm": 1.0478829145431519, + "learning_rate": 1.69743545139522e-06, + "loss": 0.2934, + "step": 36854 + }, + { + "epoch": 0.7377824487651077, + "grad_norm": 1.0513461828231812, + "learning_rate": 1.6971920569760642e-06, + "loss": 0.2738, + "step": 36855 + }, + { + "epoch": 0.7378024672822361, + "grad_norm": 0.9771524667739868, + "learning_rate": 1.6969486764412002e-06, + "loss": 0.2757, + "step": 36856 + }, + { + "epoch": 0.7378224857993644, + "grad_norm": 1.1070605516433716, + "learning_rate": 1.6967053097916553e-06, + "loss": 0.2504, + "step": 36857 + }, + { + "epoch": 0.7378425043164928, + "grad_norm": 1.102927803993225, + "learning_rate": 1.6964619570284486e-06, + "loss": 0.3153, + "step": 36858 + }, + { + "epoch": 0.7378625228336211, + "grad_norm": 1.362882137298584, + "learning_rate": 1.6962186181526063e-06, + "loss": 0.3151, + "step": 36859 + }, + { + "epoch": 0.7378825413507495, + "grad_norm": 1.1304147243499756, + "learning_rate": 1.6959752931651486e-06, + "loss": 0.3101, + "step": 36860 + }, + { + "epoch": 0.7379025598678778, + "grad_norm": 1.2608891725540161, + "learning_rate": 1.6957319820671008e-06, + "loss": 0.3295, + "step": 36861 + }, + { + "epoch": 0.7379225783850061, + "grad_norm": 1.1650582551956177, + "learning_rate": 1.6954886848594853e-06, + "loss": 0.2776, + "step": 36862 + }, + { + "epoch": 0.7379425969021345, + "grad_norm": 1.0803080797195435, + "learning_rate": 1.695245401543324e-06, + "loss": 0.27, + "step": 36863 + }, + { + "epoch": 0.7379626154192628, + "grad_norm": 1.0780431032180786, + "learning_rate": 1.695002132119638e-06, + "loss": 0.2755, + "step": 36864 + }, + { + "epoch": 0.7379826339363912, + "grad_norm": 1.0933494567871094, + "learning_rate": 1.6947588765894536e-06, + "loss": 0.2703, + "step": 36865 + }, + { + "epoch": 0.7380026524535195, + "grad_norm": 0.9766425490379333, + "learning_rate": 1.6945156349537917e-06, + "loss": 0.2659, + "step": 36866 + }, + { + "epoch": 0.7380226709706479, + "grad_norm": 1.196532130241394, + "learning_rate": 1.6942724072136746e-06, + "loss": 0.3247, + "step": 36867 + }, + { + "epoch": 0.7380426894877762, + "grad_norm": 1.152601957321167, + "learning_rate": 1.6940291933701247e-06, + "loss": 0.3396, + "step": 36868 + }, + { + "epoch": 0.7380627080049046, + "grad_norm": 1.0482826232910156, + "learning_rate": 1.6937859934241635e-06, + "loss": 0.2913, + "step": 36869 + }, + { + "epoch": 0.7380827265220329, + "grad_norm": 1.24613618850708, + "learning_rate": 1.6935428073768157e-06, + "loss": 0.2338, + "step": 36870 + }, + { + "epoch": 0.7381027450391612, + "grad_norm": 1.051317572593689, + "learning_rate": 1.6932996352291008e-06, + "loss": 0.2867, + "step": 36871 + }, + { + "epoch": 0.7381227635562896, + "grad_norm": 1.0445563793182373, + "learning_rate": 1.6930564769820445e-06, + "loss": 0.3497, + "step": 36872 + }, + { + "epoch": 0.7381427820734179, + "grad_norm": 1.273295521736145, + "learning_rate": 1.6928133326366653e-06, + "loss": 0.3138, + "step": 36873 + }, + { + "epoch": 0.7381628005905463, + "grad_norm": 1.1768262386322021, + "learning_rate": 1.6925702021939888e-06, + "loss": 0.3322, + "step": 36874 + }, + { + "epoch": 0.7381828191076746, + "grad_norm": 1.4353018999099731, + "learning_rate": 1.6923270856550355e-06, + "loss": 0.2519, + "step": 36875 + }, + { + "epoch": 0.738202837624803, + "grad_norm": 1.2137354612350464, + "learning_rate": 1.692083983020827e-06, + "loss": 0.3269, + "step": 36876 + }, + { + "epoch": 0.7382228561419313, + "grad_norm": 1.279584527015686, + "learning_rate": 1.6918408942923858e-06, + "loss": 0.3212, + "step": 36877 + }, + { + "epoch": 0.7382428746590596, + "grad_norm": 1.0676448345184326, + "learning_rate": 1.6915978194707316e-06, + "loss": 0.2624, + "step": 36878 + }, + { + "epoch": 0.738262893176188, + "grad_norm": 0.9823907017707825, + "learning_rate": 1.69135475855689e-06, + "loss": 0.2124, + "step": 36879 + }, + { + "epoch": 0.7382829116933163, + "grad_norm": 1.104486107826233, + "learning_rate": 1.6911117115518816e-06, + "loss": 0.3212, + "step": 36880 + }, + { + "epoch": 0.7383029302104447, + "grad_norm": 1.133366584777832, + "learning_rate": 1.6908686784567268e-06, + "loss": 0.283, + "step": 36881 + }, + { + "epoch": 0.738322948727573, + "grad_norm": 1.3865069150924683, + "learning_rate": 1.6906256592724462e-06, + "loss": 0.3156, + "step": 36882 + }, + { + "epoch": 0.7383429672447014, + "grad_norm": 1.0850802659988403, + "learning_rate": 1.6903826540000651e-06, + "loss": 0.3011, + "step": 36883 + }, + { + "epoch": 0.7383629857618297, + "grad_norm": 1.085085391998291, + "learning_rate": 1.6901396626406013e-06, + "loss": 0.3195, + "step": 36884 + }, + { + "epoch": 0.7383830042789581, + "grad_norm": 1.116981029510498, + "learning_rate": 1.6898966851950798e-06, + "loss": 0.2635, + "step": 36885 + }, + { + "epoch": 0.7384030227960864, + "grad_norm": 1.0957413911819458, + "learning_rate": 1.6896537216645203e-06, + "loss": 0.3078, + "step": 36886 + }, + { + "epoch": 0.7384230413132147, + "grad_norm": 1.3053163290023804, + "learning_rate": 1.6894107720499425e-06, + "loss": 0.3149, + "step": 36887 + }, + { + "epoch": 0.7384430598303431, + "grad_norm": 1.2158663272857666, + "learning_rate": 1.689167836352371e-06, + "loss": 0.2929, + "step": 36888 + }, + { + "epoch": 0.7384630783474714, + "grad_norm": 1.021759033203125, + "learning_rate": 1.6889249145728253e-06, + "loss": 0.2817, + "step": 36889 + }, + { + "epoch": 0.7384830968645998, + "grad_norm": 1.1350035667419434, + "learning_rate": 1.6886820067123272e-06, + "loss": 0.3122, + "step": 36890 + }, + { + "epoch": 0.7385031153817281, + "grad_norm": 1.1428567171096802, + "learning_rate": 1.6884391127718952e-06, + "loss": 0.3154, + "step": 36891 + }, + { + "epoch": 0.7385231338988565, + "grad_norm": 1.0393264293670654, + "learning_rate": 1.6881962327525541e-06, + "loss": 0.2761, + "step": 36892 + }, + { + "epoch": 0.7385431524159848, + "grad_norm": 1.0210530757904053, + "learning_rate": 1.6879533666553238e-06, + "loss": 0.2921, + "step": 36893 + }, + { + "epoch": 0.7385631709331131, + "grad_norm": 1.3274264335632324, + "learning_rate": 1.6877105144812244e-06, + "loss": 0.3259, + "step": 36894 + }, + { + "epoch": 0.7385831894502415, + "grad_norm": 1.2786533832550049, + "learning_rate": 1.6874676762312758e-06, + "loss": 0.2996, + "step": 36895 + }, + { + "epoch": 0.7386032079673698, + "grad_norm": 1.090045690536499, + "learning_rate": 1.6872248519065016e-06, + "loss": 0.29, + "step": 36896 + }, + { + "epoch": 0.7386232264844982, + "grad_norm": 1.0438132286071777, + "learning_rate": 1.6869820415079196e-06, + "loss": 0.2745, + "step": 36897 + }, + { + "epoch": 0.7386432450016265, + "grad_norm": 1.061208724975586, + "learning_rate": 1.6867392450365537e-06, + "loss": 0.2961, + "step": 36898 + }, + { + "epoch": 0.7386632635187549, + "grad_norm": 1.162941336631775, + "learning_rate": 1.6864964624934233e-06, + "loss": 0.3172, + "step": 36899 + }, + { + "epoch": 0.7386832820358832, + "grad_norm": 1.1358916759490967, + "learning_rate": 1.686253693879547e-06, + "loss": 0.32, + "step": 36900 + }, + { + "epoch": 0.7387033005530116, + "grad_norm": 0.9941840767860413, + "learning_rate": 1.6860109391959484e-06, + "loss": 0.3197, + "step": 36901 + }, + { + "epoch": 0.7387233190701399, + "grad_norm": 1.1412429809570312, + "learning_rate": 1.685768198443647e-06, + "loss": 0.3247, + "step": 36902 + }, + { + "epoch": 0.7387433375872682, + "grad_norm": 1.1142760515213013, + "learning_rate": 1.6855254716236625e-06, + "loss": 0.3048, + "step": 36903 + }, + { + "epoch": 0.7387633561043966, + "grad_norm": 1.0793657302856445, + "learning_rate": 1.685282758737014e-06, + "loss": 0.2901, + "step": 36904 + }, + { + "epoch": 0.7387833746215249, + "grad_norm": 1.0035200119018555, + "learning_rate": 1.6850400597847255e-06, + "loss": 0.2912, + "step": 36905 + }, + { + "epoch": 0.7388033931386533, + "grad_norm": 1.077558994293213, + "learning_rate": 1.6847973747678148e-06, + "loss": 0.2577, + "step": 36906 + }, + { + "epoch": 0.7388234116557816, + "grad_norm": 1.0959709882736206, + "learning_rate": 1.6845547036873027e-06, + "loss": 0.2793, + "step": 36907 + }, + { + "epoch": 0.73884343017291, + "grad_norm": 1.1439800262451172, + "learning_rate": 1.6843120465442076e-06, + "loss": 0.3099, + "step": 36908 + }, + { + "epoch": 0.7388634486900383, + "grad_norm": 1.116899847984314, + "learning_rate": 1.6840694033395526e-06, + "loss": 0.3052, + "step": 36909 + }, + { + "epoch": 0.7388834672071666, + "grad_norm": 1.0595595836639404, + "learning_rate": 1.6838267740743547e-06, + "loss": 0.2808, + "step": 36910 + }, + { + "epoch": 0.738903485724295, + "grad_norm": 1.1815153360366821, + "learning_rate": 1.683584158749637e-06, + "loss": 0.2726, + "step": 36911 + }, + { + "epoch": 0.7389235042414233, + "grad_norm": 1.0302927494049072, + "learning_rate": 1.6833415573664175e-06, + "loss": 0.3179, + "step": 36912 + }, + { + "epoch": 0.7389435227585517, + "grad_norm": 0.9804315567016602, + "learning_rate": 1.6830989699257149e-06, + "loss": 0.2913, + "step": 36913 + }, + { + "epoch": 0.73896354127568, + "grad_norm": 1.0842279195785522, + "learning_rate": 1.6828563964285521e-06, + "loss": 0.3392, + "step": 36914 + }, + { + "epoch": 0.7389835597928084, + "grad_norm": 1.127842903137207, + "learning_rate": 1.682613836875947e-06, + "loss": 0.2707, + "step": 36915 + }, + { + "epoch": 0.7390035783099367, + "grad_norm": 1.1260876655578613, + "learning_rate": 1.68237129126892e-06, + "loss": 0.3383, + "step": 36916 + }, + { + "epoch": 0.7390235968270651, + "grad_norm": 1.0377962589263916, + "learning_rate": 1.6821287596084896e-06, + "loss": 0.2712, + "step": 36917 + }, + { + "epoch": 0.7390436153441934, + "grad_norm": 1.005509853363037, + "learning_rate": 1.6818862418956743e-06, + "loss": 0.2457, + "step": 36918 + }, + { + "epoch": 0.7390636338613217, + "grad_norm": 1.0381309986114502, + "learning_rate": 1.681643738131497e-06, + "loss": 0.2974, + "step": 36919 + }, + { + "epoch": 0.7390836523784501, + "grad_norm": 1.8870491981506348, + "learning_rate": 1.681401248316975e-06, + "loss": 0.7809, + "step": 36920 + }, + { + "epoch": 0.7391036708955784, + "grad_norm": 1.2357040643692017, + "learning_rate": 1.6811587724531265e-06, + "loss": 0.2714, + "step": 36921 + }, + { + "epoch": 0.7391236894127068, + "grad_norm": 1.1881673336029053, + "learning_rate": 1.6809163105409736e-06, + "loss": 0.3336, + "step": 36922 + }, + { + "epoch": 0.7391437079298351, + "grad_norm": 1.9971669912338257, + "learning_rate": 1.6806738625815333e-06, + "loss": 0.7948, + "step": 36923 + }, + { + "epoch": 0.7391637264469635, + "grad_norm": 1.2007701396942139, + "learning_rate": 1.680431428575827e-06, + "loss": 0.304, + "step": 36924 + }, + { + "epoch": 0.7391837449640918, + "grad_norm": 1.0655030012130737, + "learning_rate": 1.6801890085248728e-06, + "loss": 0.2868, + "step": 36925 + }, + { + "epoch": 0.7392037634812201, + "grad_norm": 1.8229265213012695, + "learning_rate": 1.6799466024296874e-06, + "loss": 0.7674, + "step": 36926 + }, + { + "epoch": 0.7392237819983485, + "grad_norm": 1.0710668563842773, + "learning_rate": 1.6797042102912942e-06, + "loss": 0.283, + "step": 36927 + }, + { + "epoch": 0.7392438005154768, + "grad_norm": 1.1931816339492798, + "learning_rate": 1.6794618321107098e-06, + "loss": 0.2807, + "step": 36928 + }, + { + "epoch": 0.7392638190326052, + "grad_norm": 1.1549125909805298, + "learning_rate": 1.6792194678889534e-06, + "loss": 0.3037, + "step": 36929 + }, + { + "epoch": 0.7392838375497335, + "grad_norm": 1.135048508644104, + "learning_rate": 1.6789771176270437e-06, + "loss": 0.3108, + "step": 36930 + }, + { + "epoch": 0.7393038560668619, + "grad_norm": 1.0463087558746338, + "learning_rate": 1.6787347813259973e-06, + "loss": 0.3011, + "step": 36931 + }, + { + "epoch": 0.7393238745839902, + "grad_norm": 1.145228385925293, + "learning_rate": 1.6784924589868373e-06, + "loss": 0.3248, + "step": 36932 + }, + { + "epoch": 0.7393438931011186, + "grad_norm": 1.2784404754638672, + "learning_rate": 1.6782501506105802e-06, + "loss": 0.3726, + "step": 36933 + }, + { + "epoch": 0.7393639116182469, + "grad_norm": 1.0446927547454834, + "learning_rate": 1.6780078561982427e-06, + "loss": 0.287, + "step": 36934 + }, + { + "epoch": 0.7393839301353752, + "grad_norm": 1.0542361736297607, + "learning_rate": 1.677765575750847e-06, + "loss": 0.3126, + "step": 36935 + }, + { + "epoch": 0.7394039486525036, + "grad_norm": 1.9828375577926636, + "learning_rate": 1.6775233092694088e-06, + "loss": 0.7628, + "step": 36936 + }, + { + "epoch": 0.7394239671696319, + "grad_norm": 2.026954174041748, + "learning_rate": 1.6772810567549485e-06, + "loss": 0.6904, + "step": 36937 + }, + { + "epoch": 0.7394439856867603, + "grad_norm": 1.1948728561401367, + "learning_rate": 1.677038818208484e-06, + "loss": 0.2884, + "step": 36938 + }, + { + "epoch": 0.7394640042038886, + "grad_norm": 1.0676380395889282, + "learning_rate": 1.6767965936310332e-06, + "loss": 0.31, + "step": 36939 + }, + { + "epoch": 0.739484022721017, + "grad_norm": 1.1116000413894653, + "learning_rate": 1.6765543830236125e-06, + "loss": 0.2623, + "step": 36940 + }, + { + "epoch": 0.7395040412381453, + "grad_norm": 1.0599122047424316, + "learning_rate": 1.6763121863872434e-06, + "loss": 0.3023, + "step": 36941 + }, + { + "epoch": 0.7395240597552736, + "grad_norm": 1.1849559545516968, + "learning_rate": 1.6760700037229433e-06, + "loss": 0.3463, + "step": 36942 + }, + { + "epoch": 0.739544078272402, + "grad_norm": 1.3355480432510376, + "learning_rate": 1.6758278350317286e-06, + "loss": 0.3385, + "step": 36943 + }, + { + "epoch": 0.7395640967895303, + "grad_norm": 1.89308762550354, + "learning_rate": 1.6755856803146175e-06, + "loss": 0.7775, + "step": 36944 + }, + { + "epoch": 0.7395841153066587, + "grad_norm": 1.965383529663086, + "learning_rate": 1.6753435395726302e-06, + "loss": 0.734, + "step": 36945 + }, + { + "epoch": 0.739604133823787, + "grad_norm": 2.2622008323669434, + "learning_rate": 1.6751014128067833e-06, + "loss": 0.6943, + "step": 36946 + }, + { + "epoch": 0.7396241523409154, + "grad_norm": 1.0203133821487427, + "learning_rate": 1.6748593000180924e-06, + "loss": 0.2919, + "step": 36947 + }, + { + "epoch": 0.7396441708580437, + "grad_norm": 1.0952236652374268, + "learning_rate": 1.6746172012075795e-06, + "loss": 0.2837, + "step": 36948 + }, + { + "epoch": 0.7396641893751721, + "grad_norm": 1.1518794298171997, + "learning_rate": 1.6743751163762583e-06, + "loss": 0.2937, + "step": 36949 + }, + { + "epoch": 0.7396842078923004, + "grad_norm": 1.0801149606704712, + "learning_rate": 1.6741330455251504e-06, + "loss": 0.2671, + "step": 36950 + }, + { + "epoch": 0.7397042264094287, + "grad_norm": 1.2652626037597656, + "learning_rate": 1.6738909886552711e-06, + "loss": 0.3067, + "step": 36951 + }, + { + "epoch": 0.7397242449265571, + "grad_norm": 1.1766457557678223, + "learning_rate": 1.6736489457676392e-06, + "loss": 0.3037, + "step": 36952 + }, + { + "epoch": 0.7397442634436854, + "grad_norm": 1.3125993013381958, + "learning_rate": 1.673406916863269e-06, + "loss": 0.2248, + "step": 36953 + }, + { + "epoch": 0.7397642819608138, + "grad_norm": 1.2509238719940186, + "learning_rate": 1.6731649019431817e-06, + "loss": 0.3286, + "step": 36954 + }, + { + "epoch": 0.7397843004779421, + "grad_norm": 1.9628642797470093, + "learning_rate": 1.6729229010083931e-06, + "loss": 0.7854, + "step": 36955 + }, + { + "epoch": 0.7398043189950705, + "grad_norm": 1.1398714780807495, + "learning_rate": 1.672680914059921e-06, + "loss": 0.2588, + "step": 36956 + }, + { + "epoch": 0.7398243375121988, + "grad_norm": 1.3320611715316772, + "learning_rate": 1.6724389410987808e-06, + "loss": 0.3042, + "step": 36957 + }, + { + "epoch": 0.7398443560293271, + "grad_norm": 1.049486517906189, + "learning_rate": 1.672196982125992e-06, + "loss": 0.2751, + "step": 36958 + }, + { + "epoch": 0.7398643745464555, + "grad_norm": 1.1511292457580566, + "learning_rate": 1.6719550371425718e-06, + "loss": 0.2988, + "step": 36959 + }, + { + "epoch": 0.7398843930635838, + "grad_norm": 1.1441643238067627, + "learning_rate": 1.6717131061495345e-06, + "loss": 0.3229, + "step": 36960 + }, + { + "epoch": 0.7399044115807122, + "grad_norm": 1.172316551208496, + "learning_rate": 1.6714711891479008e-06, + "loss": 0.2537, + "step": 36961 + }, + { + "epoch": 0.7399244300978405, + "grad_norm": 1.1612147092819214, + "learning_rate": 1.6712292861386836e-06, + "loss": 0.312, + "step": 36962 + }, + { + "epoch": 0.7399444486149689, + "grad_norm": 1.9021931886672974, + "learning_rate": 1.6709873971229046e-06, + "loss": 0.6966, + "step": 36963 + }, + { + "epoch": 0.7399644671320972, + "grad_norm": 1.216796875, + "learning_rate": 1.6707455221015778e-06, + "loss": 0.2854, + "step": 36964 + }, + { + "epoch": 0.7399844856492256, + "grad_norm": 1.12794828414917, + "learning_rate": 1.6705036610757203e-06, + "loss": 0.3136, + "step": 36965 + }, + { + "epoch": 0.7400045041663539, + "grad_norm": 1.115249514579773, + "learning_rate": 1.6702618140463478e-06, + "loss": 0.2826, + "step": 36966 + }, + { + "epoch": 0.7400245226834822, + "grad_norm": 1.1454463005065918, + "learning_rate": 1.670019981014479e-06, + "loss": 0.2557, + "step": 36967 + }, + { + "epoch": 0.7400445412006106, + "grad_norm": 2.039761781692505, + "learning_rate": 1.6697781619811299e-06, + "loss": 0.7962, + "step": 36968 + }, + { + "epoch": 0.7400645597177389, + "grad_norm": 1.1622165441513062, + "learning_rate": 1.6695363569473166e-06, + "loss": 0.3295, + "step": 36969 + }, + { + "epoch": 0.7400845782348673, + "grad_norm": 1.7509593963623047, + "learning_rate": 1.6692945659140558e-06, + "loss": 0.7115, + "step": 36970 + }, + { + "epoch": 0.7401045967519956, + "grad_norm": 1.1040288209915161, + "learning_rate": 1.6690527888823626e-06, + "loss": 0.2788, + "step": 36971 + }, + { + "epoch": 0.740124615269124, + "grad_norm": 1.0735673904418945, + "learning_rate": 1.6688110258532558e-06, + "loss": 0.2517, + "step": 36972 + }, + { + "epoch": 0.7401446337862523, + "grad_norm": 1.3148934841156006, + "learning_rate": 1.668569276827749e-06, + "loss": 0.2991, + "step": 36973 + }, + { + "epoch": 0.7401646523033806, + "grad_norm": 1.0967209339141846, + "learning_rate": 1.6683275418068618e-06, + "loss": 0.2731, + "step": 36974 + }, + { + "epoch": 0.740184670820509, + "grad_norm": 1.0914466381072998, + "learning_rate": 1.6680858207916073e-06, + "loss": 0.2805, + "step": 36975 + }, + { + "epoch": 0.7402046893376373, + "grad_norm": 1.264548897743225, + "learning_rate": 1.6678441137830042e-06, + "loss": 0.2955, + "step": 36976 + }, + { + "epoch": 0.7402247078547657, + "grad_norm": 1.1669278144836426, + "learning_rate": 1.6676024207820674e-06, + "loss": 0.3233, + "step": 36977 + }, + { + "epoch": 0.740244726371894, + "grad_norm": 1.1552999019622803, + "learning_rate": 1.667360741789813e-06, + "loss": 0.2731, + "step": 36978 + }, + { + "epoch": 0.7402647448890224, + "grad_norm": 1.078916072845459, + "learning_rate": 1.6671190768072548e-06, + "loss": 0.3037, + "step": 36979 + }, + { + "epoch": 0.7402847634061507, + "grad_norm": 1.1678376197814941, + "learning_rate": 1.6668774258354125e-06, + "loss": 0.2897, + "step": 36980 + }, + { + "epoch": 0.7403047819232791, + "grad_norm": 1.0574111938476562, + "learning_rate": 1.6666357888753004e-06, + "loss": 0.3161, + "step": 36981 + }, + { + "epoch": 0.7403248004404074, + "grad_norm": 1.3483965396881104, + "learning_rate": 1.6663941659279343e-06, + "loss": 0.3056, + "step": 36982 + }, + { + "epoch": 0.7403448189575357, + "grad_norm": 1.111971378326416, + "learning_rate": 1.66615255699433e-06, + "loss": 0.3233, + "step": 36983 + }, + { + "epoch": 0.7403648374746641, + "grad_norm": 1.0887442827224731, + "learning_rate": 1.6659109620755005e-06, + "loss": 0.2775, + "step": 36984 + }, + { + "epoch": 0.7403848559917924, + "grad_norm": 1.1016833782196045, + "learning_rate": 1.6656693811724656e-06, + "loss": 0.3089, + "step": 36985 + }, + { + "epoch": 0.7404048745089208, + "grad_norm": 1.0652235746383667, + "learning_rate": 1.665427814286238e-06, + "loss": 0.3162, + "step": 36986 + }, + { + "epoch": 0.7404248930260491, + "grad_norm": 1.1423239707946777, + "learning_rate": 1.665186261417835e-06, + "loss": 0.3065, + "step": 36987 + }, + { + "epoch": 0.7404449115431775, + "grad_norm": 1.185428500175476, + "learning_rate": 1.6649447225682701e-06, + "loss": 0.3082, + "step": 36988 + }, + { + "epoch": 0.7404649300603058, + "grad_norm": 1.11137855052948, + "learning_rate": 1.6647031977385619e-06, + "loss": 0.2917, + "step": 36989 + }, + { + "epoch": 0.7404849485774341, + "grad_norm": 1.0297366380691528, + "learning_rate": 1.664461686929723e-06, + "loss": 0.2697, + "step": 36990 + }, + { + "epoch": 0.7405049670945625, + "grad_norm": 1.0541249513626099, + "learning_rate": 1.6642201901427696e-06, + "loss": 0.2944, + "step": 36991 + }, + { + "epoch": 0.7405249856116908, + "grad_norm": 1.2901023626327515, + "learning_rate": 1.6639787073787167e-06, + "loss": 0.2936, + "step": 36992 + }, + { + "epoch": 0.7405450041288192, + "grad_norm": 1.1324626207351685, + "learning_rate": 1.663737238638578e-06, + "loss": 0.3236, + "step": 36993 + }, + { + "epoch": 0.7405650226459475, + "grad_norm": 1.174944281578064, + "learning_rate": 1.6634957839233717e-06, + "loss": 0.3005, + "step": 36994 + }, + { + "epoch": 0.7405850411630759, + "grad_norm": 1.1272739171981812, + "learning_rate": 1.663254343234111e-06, + "loss": 0.2709, + "step": 36995 + }, + { + "epoch": 0.7406050596802042, + "grad_norm": 7.284607410430908, + "learning_rate": 1.6630129165718106e-06, + "loss": 0.3232, + "step": 36996 + }, + { + "epoch": 0.7406250781973326, + "grad_norm": 0.9959206581115723, + "learning_rate": 1.6627715039374843e-06, + "loss": 0.291, + "step": 36997 + }, + { + "epoch": 0.7406450967144609, + "grad_norm": 1.7984979152679443, + "learning_rate": 1.6625301053321496e-06, + "loss": 0.7512, + "step": 36998 + }, + { + "epoch": 0.7406651152315892, + "grad_norm": 1.1809862852096558, + "learning_rate": 1.662288720756819e-06, + "loss": 0.3099, + "step": 36999 + }, + { + "epoch": 0.7406851337487176, + "grad_norm": 1.1521720886230469, + "learning_rate": 1.6620473502125101e-06, + "loss": 0.305, + "step": 37000 + }, + { + "epoch": 0.7407051522658459, + "grad_norm": 1.06095290184021, + "learning_rate": 1.6618059937002357e-06, + "loss": 0.2922, + "step": 37001 + }, + { + "epoch": 0.7407251707829743, + "grad_norm": 1.2053207159042358, + "learning_rate": 1.6615646512210088e-06, + "loss": 0.2664, + "step": 37002 + }, + { + "epoch": 0.7407451893001026, + "grad_norm": 1.252439260482788, + "learning_rate": 1.6613233227758474e-06, + "loss": 0.2724, + "step": 37003 + }, + { + "epoch": 0.740765207817231, + "grad_norm": 1.1785115003585815, + "learning_rate": 1.661082008365764e-06, + "loss": 0.3249, + "step": 37004 + }, + { + "epoch": 0.7407852263343593, + "grad_norm": 1.1476486921310425, + "learning_rate": 1.6608407079917733e-06, + "loss": 0.282, + "step": 37005 + }, + { + "epoch": 0.7408052448514876, + "grad_norm": 1.0000150203704834, + "learning_rate": 1.6605994216548881e-06, + "loss": 0.2918, + "step": 37006 + }, + { + "epoch": 0.740825263368616, + "grad_norm": 1.2706152200698853, + "learning_rate": 1.660358149356126e-06, + "loss": 0.2973, + "step": 37007 + }, + { + "epoch": 0.7408452818857443, + "grad_norm": 1.0901697874069214, + "learning_rate": 1.6601168910964999e-06, + "loss": 0.3171, + "step": 37008 + }, + { + "epoch": 0.7408653004028727, + "grad_norm": 1.1967867612838745, + "learning_rate": 1.6598756468770234e-06, + "loss": 0.2774, + "step": 37009 + }, + { + "epoch": 0.740885318920001, + "grad_norm": 1.110675573348999, + "learning_rate": 1.6596344166987089e-06, + "loss": 0.3081, + "step": 37010 + }, + { + "epoch": 0.7409053374371294, + "grad_norm": 1.1250379085540771, + "learning_rate": 1.659393200562574e-06, + "loss": 0.2612, + "step": 37011 + }, + { + "epoch": 0.7409253559542577, + "grad_norm": 1.0834282636642456, + "learning_rate": 1.6591519984696303e-06, + "loss": 0.3016, + "step": 37012 + }, + { + "epoch": 0.7409453744713861, + "grad_norm": 1.095579743385315, + "learning_rate": 1.6589108104208934e-06, + "loss": 0.2759, + "step": 37013 + }, + { + "epoch": 0.7409653929885144, + "grad_norm": 1.2086204290390015, + "learning_rate": 1.658669636417377e-06, + "loss": 0.2906, + "step": 37014 + }, + { + "epoch": 0.7409854115056427, + "grad_norm": 1.172878623008728, + "learning_rate": 1.6584284764600927e-06, + "loss": 0.3013, + "step": 37015 + }, + { + "epoch": 0.7410054300227711, + "grad_norm": 1.920573353767395, + "learning_rate": 1.6581873305500573e-06, + "loss": 0.8243, + "step": 37016 + }, + { + "epoch": 0.7410254485398994, + "grad_norm": 1.0629262924194336, + "learning_rate": 1.6579461986882839e-06, + "loss": 0.2472, + "step": 37017 + }, + { + "epoch": 0.7410454670570278, + "grad_norm": 1.070706844329834, + "learning_rate": 1.6577050808757849e-06, + "loss": 0.2807, + "step": 37018 + }, + { + "epoch": 0.7410654855741561, + "grad_norm": 1.1415977478027344, + "learning_rate": 1.6574639771135726e-06, + "loss": 0.2786, + "step": 37019 + }, + { + "epoch": 0.7410855040912845, + "grad_norm": 1.323615550994873, + "learning_rate": 1.6572228874026647e-06, + "loss": 0.3106, + "step": 37020 + }, + { + "epoch": 0.7411055226084128, + "grad_norm": 1.2428065538406372, + "learning_rate": 1.6569818117440722e-06, + "loss": 0.3429, + "step": 37021 + }, + { + "epoch": 0.7411255411255411, + "grad_norm": 1.0616470575332642, + "learning_rate": 1.6567407501388083e-06, + "loss": 0.3028, + "step": 37022 + }, + { + "epoch": 0.7411455596426695, + "grad_norm": 1.0535320043563843, + "learning_rate": 1.6564997025878872e-06, + "loss": 0.2852, + "step": 37023 + }, + { + "epoch": 0.7411655781597978, + "grad_norm": 1.1800230741500854, + "learning_rate": 1.6562586690923198e-06, + "loss": 0.2929, + "step": 37024 + }, + { + "epoch": 0.7411855966769262, + "grad_norm": 1.1780437231063843, + "learning_rate": 1.6560176496531222e-06, + "loss": 0.3159, + "step": 37025 + }, + { + "epoch": 0.7412056151940545, + "grad_norm": 1.1690369844436646, + "learning_rate": 1.6557766442713081e-06, + "loss": 0.3031, + "step": 37026 + }, + { + "epoch": 0.7412256337111829, + "grad_norm": 1.1148852109909058, + "learning_rate": 1.655535652947889e-06, + "loss": 0.305, + "step": 37027 + }, + { + "epoch": 0.7412456522283112, + "grad_norm": 0.9918504357337952, + "learning_rate": 1.6552946756838767e-06, + "loss": 0.2931, + "step": 37028 + }, + { + "epoch": 0.7412656707454396, + "grad_norm": 1.1606810092926025, + "learning_rate": 1.6550537124802874e-06, + "loss": 0.3228, + "step": 37029 + }, + { + "epoch": 0.7412856892625679, + "grad_norm": 1.1879076957702637, + "learning_rate": 1.6548127633381322e-06, + "loss": 0.358, + "step": 37030 + }, + { + "epoch": 0.7413057077796962, + "grad_norm": 1.1455477476119995, + "learning_rate": 1.654571828258425e-06, + "loss": 0.2826, + "step": 37031 + }, + { + "epoch": 0.7413257262968246, + "grad_norm": 1.1474202871322632, + "learning_rate": 1.654330907242177e-06, + "loss": 0.3478, + "step": 37032 + }, + { + "epoch": 0.7413457448139529, + "grad_norm": 1.1935468912124634, + "learning_rate": 1.654090000290401e-06, + "loss": 0.2952, + "step": 37033 + }, + { + "epoch": 0.7413657633310813, + "grad_norm": 1.0477557182312012, + "learning_rate": 1.653849107404112e-06, + "loss": 0.3088, + "step": 37034 + }, + { + "epoch": 0.7413857818482096, + "grad_norm": 1.1364307403564453, + "learning_rate": 1.6536082285843214e-06, + "loss": 0.3443, + "step": 37035 + }, + { + "epoch": 0.741405800365338, + "grad_norm": 1.1007541418075562, + "learning_rate": 1.6533673638320413e-06, + "loss": 0.3161, + "step": 37036 + }, + { + "epoch": 0.7414258188824663, + "grad_norm": 1.177291989326477, + "learning_rate": 1.6531265131482831e-06, + "loss": 0.3492, + "step": 37037 + }, + { + "epoch": 0.7414458373995946, + "grad_norm": 1.1116015911102295, + "learning_rate": 1.6528856765340606e-06, + "loss": 0.3052, + "step": 37038 + }, + { + "epoch": 0.741465855916723, + "grad_norm": 1.0996625423431396, + "learning_rate": 1.6526448539903882e-06, + "loss": 0.316, + "step": 37039 + }, + { + "epoch": 0.7414858744338513, + "grad_norm": 1.1864032745361328, + "learning_rate": 1.6524040455182766e-06, + "loss": 0.271, + "step": 37040 + }, + { + "epoch": 0.7415058929509797, + "grad_norm": 1.1190276145935059, + "learning_rate": 1.6521632511187363e-06, + "loss": 0.2826, + "step": 37041 + }, + { + "epoch": 0.741525911468108, + "grad_norm": 1.8520723581314087, + "learning_rate": 1.6519224707927828e-06, + "loss": 0.7147, + "step": 37042 + }, + { + "epoch": 0.7415459299852364, + "grad_norm": 1.008237361907959, + "learning_rate": 1.6516817045414268e-06, + "loss": 0.2886, + "step": 37043 + }, + { + "epoch": 0.7415659485023647, + "grad_norm": 1.0966216325759888, + "learning_rate": 1.6514409523656799e-06, + "loss": 0.3034, + "step": 37044 + }, + { + "epoch": 0.7415859670194931, + "grad_norm": 1.1029951572418213, + "learning_rate": 1.651200214266555e-06, + "loss": 0.2875, + "step": 37045 + }, + { + "epoch": 0.7416059855366214, + "grad_norm": 1.1437746286392212, + "learning_rate": 1.650959490245062e-06, + "loss": 0.2921, + "step": 37046 + }, + { + "epoch": 0.7416260040537497, + "grad_norm": 1.1476904153823853, + "learning_rate": 1.6507187803022168e-06, + "loss": 0.2857, + "step": 37047 + }, + { + "epoch": 0.7416460225708781, + "grad_norm": 1.0740710496902466, + "learning_rate": 1.6504780844390284e-06, + "loss": 0.2719, + "step": 37048 + }, + { + "epoch": 0.7416660410880064, + "grad_norm": 1.1013193130493164, + "learning_rate": 1.6502374026565094e-06, + "loss": 0.2848, + "step": 37049 + }, + { + "epoch": 0.7416860596051348, + "grad_norm": 1.0782603025436401, + "learning_rate": 1.64999673495567e-06, + "loss": 0.3157, + "step": 37050 + }, + { + "epoch": 0.7417060781222631, + "grad_norm": 1.1380892992019653, + "learning_rate": 1.6497560813375235e-06, + "loss": 0.2711, + "step": 37051 + }, + { + "epoch": 0.7417260966393915, + "grad_norm": 1.1554598808288574, + "learning_rate": 1.6495154418030828e-06, + "loss": 0.3169, + "step": 37052 + }, + { + "epoch": 0.7417461151565198, + "grad_norm": 1.086980938911438, + "learning_rate": 1.6492748163533584e-06, + "loss": 0.2782, + "step": 37053 + }, + { + "epoch": 0.7417661336736481, + "grad_norm": 1.180452585220337, + "learning_rate": 1.6490342049893616e-06, + "loss": 0.3044, + "step": 37054 + }, + { + "epoch": 0.7417861521907765, + "grad_norm": 1.029227614402771, + "learning_rate": 1.6487936077121019e-06, + "loss": 0.2513, + "step": 37055 + }, + { + "epoch": 0.7418061707079048, + "grad_norm": 1.021910309791565, + "learning_rate": 1.6485530245225944e-06, + "loss": 0.2803, + "step": 37056 + }, + { + "epoch": 0.7418261892250332, + "grad_norm": 1.2607355117797852, + "learning_rate": 1.6483124554218488e-06, + "loss": 0.3344, + "step": 37057 + }, + { + "epoch": 0.7418462077421615, + "grad_norm": 1.2091870307922363, + "learning_rate": 1.6480719004108765e-06, + "loss": 0.31, + "step": 37058 + }, + { + "epoch": 0.7418662262592899, + "grad_norm": 1.106074333190918, + "learning_rate": 1.6478313594906869e-06, + "loss": 0.295, + "step": 37059 + }, + { + "epoch": 0.7418862447764182, + "grad_norm": 1.1062523126602173, + "learning_rate": 1.6475908326622947e-06, + "loss": 0.2667, + "step": 37060 + }, + { + "epoch": 0.7419062632935465, + "grad_norm": 1.0630595684051514, + "learning_rate": 1.6473503199267089e-06, + "loss": 0.325, + "step": 37061 + }, + { + "epoch": 0.7419262818106749, + "grad_norm": 1.0328336954116821, + "learning_rate": 1.6471098212849406e-06, + "loss": 0.2798, + "step": 37062 + }, + { + "epoch": 0.7419463003278032, + "grad_norm": 1.3707853555679321, + "learning_rate": 1.6468693367380001e-06, + "loss": 0.3096, + "step": 37063 + }, + { + "epoch": 0.7419663188449316, + "grad_norm": 1.158254861831665, + "learning_rate": 1.646628866286899e-06, + "loss": 0.2903, + "step": 37064 + }, + { + "epoch": 0.7419863373620599, + "grad_norm": 1.8440594673156738, + "learning_rate": 1.64638840993265e-06, + "loss": 0.7727, + "step": 37065 + }, + { + "epoch": 0.7420063558791883, + "grad_norm": 1.0926188230514526, + "learning_rate": 1.6461479676762627e-06, + "loss": 0.289, + "step": 37066 + }, + { + "epoch": 0.7420263743963166, + "grad_norm": 1.017423152923584, + "learning_rate": 1.6459075395187468e-06, + "loss": 0.2874, + "step": 37067 + }, + { + "epoch": 0.742046392913445, + "grad_norm": 1.0510236024856567, + "learning_rate": 1.645667125461113e-06, + "loss": 0.3114, + "step": 37068 + }, + { + "epoch": 0.7420664114305733, + "grad_norm": 1.167916178703308, + "learning_rate": 1.6454267255043743e-06, + "loss": 0.3567, + "step": 37069 + }, + { + "epoch": 0.7420864299477016, + "grad_norm": 1.257405161857605, + "learning_rate": 1.6451863396495394e-06, + "loss": 0.3008, + "step": 37070 + }, + { + "epoch": 0.74210644846483, + "grad_norm": 1.256266474723816, + "learning_rate": 1.6449459678976193e-06, + "loss": 0.2902, + "step": 37071 + }, + { + "epoch": 0.7421264669819583, + "grad_norm": 1.0223796367645264, + "learning_rate": 1.6447056102496229e-06, + "loss": 0.2737, + "step": 37072 + }, + { + "epoch": 0.7421464854990867, + "grad_norm": 1.1675714254379272, + "learning_rate": 1.644465266706563e-06, + "loss": 0.2879, + "step": 37073 + }, + { + "epoch": 0.742166504016215, + "grad_norm": 0.9857761263847351, + "learning_rate": 1.6442249372694497e-06, + "loss": 0.2908, + "step": 37074 + }, + { + "epoch": 0.7421865225333434, + "grad_norm": 1.1874408721923828, + "learning_rate": 1.6439846219392925e-06, + "loss": 0.3114, + "step": 37075 + }, + { + "epoch": 0.7422065410504717, + "grad_norm": 1.0734851360321045, + "learning_rate": 1.6437443207171e-06, + "loss": 0.2777, + "step": 37076 + }, + { + "epoch": 0.7422265595676, + "grad_norm": 1.1572171449661255, + "learning_rate": 1.6435040336038842e-06, + "loss": 0.2964, + "step": 37077 + }, + { + "epoch": 0.7422465780847284, + "grad_norm": 1.8047386407852173, + "learning_rate": 1.6432637606006563e-06, + "loss": 0.7251, + "step": 37078 + }, + { + "epoch": 0.7422665966018567, + "grad_norm": 1.158941626548767, + "learning_rate": 1.6430235017084255e-06, + "loss": 0.3635, + "step": 37079 + }, + { + "epoch": 0.7422866151189851, + "grad_norm": 1.0926411151885986, + "learning_rate": 1.6427832569282016e-06, + "loss": 0.2799, + "step": 37080 + }, + { + "epoch": 0.7423066336361134, + "grad_norm": 1.0682520866394043, + "learning_rate": 1.6425430262609926e-06, + "loss": 0.3392, + "step": 37081 + }, + { + "epoch": 0.7423266521532418, + "grad_norm": 1.1775511503219604, + "learning_rate": 1.642302809707812e-06, + "loss": 0.2873, + "step": 37082 + }, + { + "epoch": 0.7423466706703701, + "grad_norm": 1.8043347597122192, + "learning_rate": 1.6420626072696677e-06, + "loss": 0.7205, + "step": 37083 + }, + { + "epoch": 0.7423666891874985, + "grad_norm": 1.0714706182479858, + "learning_rate": 1.6418224189475695e-06, + "loss": 0.2618, + "step": 37084 + }, + { + "epoch": 0.7423867077046268, + "grad_norm": 1.1064646244049072, + "learning_rate": 1.6415822447425273e-06, + "loss": 0.3447, + "step": 37085 + }, + { + "epoch": 0.7424067262217551, + "grad_norm": 1.8137129545211792, + "learning_rate": 1.641342084655549e-06, + "loss": 0.7134, + "step": 37086 + }, + { + "epoch": 0.7424267447388835, + "grad_norm": 1.218648076057434, + "learning_rate": 1.641101938687647e-06, + "loss": 0.3197, + "step": 37087 + }, + { + "epoch": 0.7424467632560118, + "grad_norm": 1.116673469543457, + "learning_rate": 1.6408618068398302e-06, + "loss": 0.2951, + "step": 37088 + }, + { + "epoch": 0.7424667817731402, + "grad_norm": 1.857385277748108, + "learning_rate": 1.6406216891131054e-06, + "loss": 0.7493, + "step": 37089 + }, + { + "epoch": 0.7424868002902685, + "grad_norm": 1.1493232250213623, + "learning_rate": 1.6403815855084843e-06, + "loss": 0.317, + "step": 37090 + }, + { + "epoch": 0.7425068188073969, + "grad_norm": 1.1710032224655151, + "learning_rate": 1.6401414960269773e-06, + "loss": 0.325, + "step": 37091 + }, + { + "epoch": 0.7425268373245252, + "grad_norm": 1.2214442491531372, + "learning_rate": 1.639901420669593e-06, + "loss": 0.3109, + "step": 37092 + }, + { + "epoch": 0.7425468558416535, + "grad_norm": 1.0563054084777832, + "learning_rate": 1.6396613594373395e-06, + "loss": 0.2963, + "step": 37093 + }, + { + "epoch": 0.7425668743587819, + "grad_norm": 1.0101717710494995, + "learning_rate": 1.6394213123312269e-06, + "loss": 0.2635, + "step": 37094 + }, + { + "epoch": 0.7425868928759102, + "grad_norm": 1.0535610914230347, + "learning_rate": 1.6391812793522616e-06, + "loss": 0.2959, + "step": 37095 + }, + { + "epoch": 0.7426069113930386, + "grad_norm": 1.0650233030319214, + "learning_rate": 1.6389412605014571e-06, + "loss": 0.3304, + "step": 37096 + }, + { + "epoch": 0.7426269299101669, + "grad_norm": 1.0542263984680176, + "learning_rate": 1.6387012557798203e-06, + "loss": 0.2893, + "step": 37097 + }, + { + "epoch": 0.7426469484272953, + "grad_norm": 1.0735129117965698, + "learning_rate": 1.6384612651883596e-06, + "loss": 0.3397, + "step": 37098 + }, + { + "epoch": 0.7426669669444236, + "grad_norm": 1.0082952976226807, + "learning_rate": 1.6382212887280835e-06, + "loss": 0.2565, + "step": 37099 + }, + { + "epoch": 0.742686985461552, + "grad_norm": 1.1425836086273193, + "learning_rate": 1.6379813264000027e-06, + "loss": 0.2858, + "step": 37100 + }, + { + "epoch": 0.7427070039786803, + "grad_norm": 1.0568665266036987, + "learning_rate": 1.6377413782051249e-06, + "loss": 0.3004, + "step": 37101 + }, + { + "epoch": 0.7427270224958086, + "grad_norm": 1.167144536972046, + "learning_rate": 1.637501444144457e-06, + "loss": 0.293, + "step": 37102 + }, + { + "epoch": 0.742747041012937, + "grad_norm": 1.0698164701461792, + "learning_rate": 1.6372615242190099e-06, + "loss": 0.2924, + "step": 37103 + }, + { + "epoch": 0.7427670595300653, + "grad_norm": 1.077793836593628, + "learning_rate": 1.6370216184297932e-06, + "loss": 0.2984, + "step": 37104 + }, + { + "epoch": 0.7427870780471937, + "grad_norm": 0.9998796582221985, + "learning_rate": 1.6367817267778136e-06, + "loss": 0.2946, + "step": 37105 + }, + { + "epoch": 0.742807096564322, + "grad_norm": 1.0962533950805664, + "learning_rate": 1.63654184926408e-06, + "loss": 0.2764, + "step": 37106 + }, + { + "epoch": 0.7428271150814504, + "grad_norm": 2.0335869789123535, + "learning_rate": 1.6363019858896006e-06, + "loss": 0.7465, + "step": 37107 + }, + { + "epoch": 0.7428471335985787, + "grad_norm": 1.0473029613494873, + "learning_rate": 1.636062136655382e-06, + "loss": 0.2867, + "step": 37108 + }, + { + "epoch": 0.742867152115707, + "grad_norm": 1.1496989727020264, + "learning_rate": 1.6358223015624364e-06, + "loss": 0.3243, + "step": 37109 + }, + { + "epoch": 0.7428871706328354, + "grad_norm": 1.135657548904419, + "learning_rate": 1.6355824806117694e-06, + "loss": 0.2888, + "step": 37110 + }, + { + "epoch": 0.7429071891499637, + "grad_norm": 1.031798243522644, + "learning_rate": 1.6353426738043899e-06, + "loss": 0.3156, + "step": 37111 + }, + { + "epoch": 0.7429272076670921, + "grad_norm": 1.2514361143112183, + "learning_rate": 1.635102881141304e-06, + "loss": 0.3073, + "step": 37112 + }, + { + "epoch": 0.7429472261842204, + "grad_norm": 1.2183061838150024, + "learning_rate": 1.634863102623523e-06, + "loss": 0.2929, + "step": 37113 + }, + { + "epoch": 0.7429672447013488, + "grad_norm": 1.053349494934082, + "learning_rate": 1.6346233382520532e-06, + "loss": 0.2962, + "step": 37114 + }, + { + "epoch": 0.7429872632184771, + "grad_norm": 1.1552916765213013, + "learning_rate": 1.6343835880279008e-06, + "loss": 0.2782, + "step": 37115 + }, + { + "epoch": 0.7430072817356055, + "grad_norm": 1.0962730646133423, + "learning_rate": 1.6341438519520774e-06, + "loss": 0.2804, + "step": 37116 + }, + { + "epoch": 0.7430273002527338, + "grad_norm": 1.9412041902542114, + "learning_rate": 1.6339041300255875e-06, + "loss": 0.6796, + "step": 37117 + }, + { + "epoch": 0.7430473187698621, + "grad_norm": 1.071326494216919, + "learning_rate": 1.633664422249442e-06, + "loss": 0.2821, + "step": 37118 + }, + { + "epoch": 0.7430673372869905, + "grad_norm": 1.081355333328247, + "learning_rate": 1.6334247286246464e-06, + "loss": 0.3198, + "step": 37119 + }, + { + "epoch": 0.7430873558041188, + "grad_norm": 1.0452181100845337, + "learning_rate": 1.6331850491522088e-06, + "loss": 0.2821, + "step": 37120 + }, + { + "epoch": 0.7431073743212472, + "grad_norm": 1.0830152034759521, + "learning_rate": 1.6329453838331355e-06, + "loss": 0.2641, + "step": 37121 + }, + { + "epoch": 0.7431273928383755, + "grad_norm": 1.0346139669418335, + "learning_rate": 1.6327057326684364e-06, + "loss": 0.3085, + "step": 37122 + }, + { + "epoch": 0.7431474113555039, + "grad_norm": 1.1618303060531616, + "learning_rate": 1.6324660956591182e-06, + "loss": 0.2922, + "step": 37123 + }, + { + "epoch": 0.7431674298726322, + "grad_norm": 1.1429568529129028, + "learning_rate": 1.632226472806187e-06, + "loss": 0.2929, + "step": 37124 + }, + { + "epoch": 0.7431874483897605, + "grad_norm": 1.2288354635238647, + "learning_rate": 1.63198686411065e-06, + "loss": 0.3032, + "step": 37125 + }, + { + "epoch": 0.7432074669068889, + "grad_norm": 1.1430000066757202, + "learning_rate": 1.631747269573517e-06, + "loss": 0.2661, + "step": 37126 + }, + { + "epoch": 0.7432274854240172, + "grad_norm": 1.1655598878860474, + "learning_rate": 1.6315076891957938e-06, + "loss": 0.3198, + "step": 37127 + }, + { + "epoch": 0.7432475039411456, + "grad_norm": 1.0402542352676392, + "learning_rate": 1.6312681229784854e-06, + "loss": 0.2651, + "step": 37128 + }, + { + "epoch": 0.7432675224582739, + "grad_norm": 1.1795670986175537, + "learning_rate": 1.6310285709226025e-06, + "loss": 0.2765, + "step": 37129 + }, + { + "epoch": 0.7432875409754023, + "grad_norm": 1.1193366050720215, + "learning_rate": 1.6307890330291488e-06, + "loss": 0.3184, + "step": 37130 + }, + { + "epoch": 0.7433075594925306, + "grad_norm": 1.057396650314331, + "learning_rate": 1.6305495092991348e-06, + "loss": 0.2603, + "step": 37131 + }, + { + "epoch": 0.743327578009659, + "grad_norm": 1.0796871185302734, + "learning_rate": 1.6303099997335653e-06, + "loss": 0.3638, + "step": 37132 + }, + { + "epoch": 0.7433475965267873, + "grad_norm": 1.366883397102356, + "learning_rate": 1.6300705043334474e-06, + "loss": 0.3123, + "step": 37133 + }, + { + "epoch": 0.7433676150439156, + "grad_norm": 1.204263687133789, + "learning_rate": 1.6298310230997867e-06, + "loss": 0.3168, + "step": 37134 + }, + { + "epoch": 0.743387633561044, + "grad_norm": 1.096942663192749, + "learning_rate": 1.6295915560335924e-06, + "loss": 0.2976, + "step": 37135 + }, + { + "epoch": 0.7434076520781723, + "grad_norm": 1.2444167137145996, + "learning_rate": 1.6293521031358694e-06, + "loss": 0.3091, + "step": 37136 + }, + { + "epoch": 0.7434276705953007, + "grad_norm": 1.3560117483139038, + "learning_rate": 1.6291126644076255e-06, + "loss": 0.3641, + "step": 37137 + }, + { + "epoch": 0.743447689112429, + "grad_norm": 0.9320969581604004, + "learning_rate": 1.6288732398498663e-06, + "loss": 0.2806, + "step": 37138 + }, + { + "epoch": 0.7434677076295574, + "grad_norm": 1.265942096710205, + "learning_rate": 1.6286338294635968e-06, + "loss": 0.3094, + "step": 37139 + }, + { + "epoch": 0.7434877261466857, + "grad_norm": 1.1734797954559326, + "learning_rate": 1.6283944332498268e-06, + "loss": 0.2887, + "step": 37140 + }, + { + "epoch": 0.743507744663814, + "grad_norm": 1.0935776233673096, + "learning_rate": 1.6281550512095596e-06, + "loss": 0.2573, + "step": 37141 + }, + { + "epoch": 0.7435277631809424, + "grad_norm": 1.1252248287200928, + "learning_rate": 1.6279156833438043e-06, + "loss": 0.3061, + "step": 37142 + }, + { + "epoch": 0.7435477816980707, + "grad_norm": 1.21354341506958, + "learning_rate": 1.6276763296535642e-06, + "loss": 0.2908, + "step": 37143 + }, + { + "epoch": 0.7435678002151991, + "grad_norm": 1.0245152711868286, + "learning_rate": 1.627436990139849e-06, + "loss": 0.2595, + "step": 37144 + }, + { + "epoch": 0.7435878187323274, + "grad_norm": 1.242885947227478, + "learning_rate": 1.6271976648036624e-06, + "loss": 0.3279, + "step": 37145 + }, + { + "epoch": 0.7436078372494558, + "grad_norm": 1.1754586696624756, + "learning_rate": 1.6269583536460116e-06, + "loss": 0.3345, + "step": 37146 + }, + { + "epoch": 0.7436278557665841, + "grad_norm": 1.855652093887329, + "learning_rate": 1.626719056667902e-06, + "loss": 0.7192, + "step": 37147 + }, + { + "epoch": 0.7436478742837125, + "grad_norm": 1.134297251701355, + "learning_rate": 1.6264797738703376e-06, + "loss": 0.3133, + "step": 37148 + }, + { + "epoch": 0.7436678928008408, + "grad_norm": 1.0582733154296875, + "learning_rate": 1.6262405052543279e-06, + "loss": 0.3079, + "step": 37149 + }, + { + "epoch": 0.7436879113179691, + "grad_norm": 1.0891953706741333, + "learning_rate": 1.6260012508208766e-06, + "loss": 0.3022, + "step": 37150 + }, + { + "epoch": 0.7437079298350975, + "grad_norm": 1.0480120182037354, + "learning_rate": 1.6257620105709904e-06, + "loss": 0.286, + "step": 37151 + }, + { + "epoch": 0.7437279483522258, + "grad_norm": 1.9286545515060425, + "learning_rate": 1.6255227845056732e-06, + "loss": 0.7413, + "step": 37152 + }, + { + "epoch": 0.7437479668693542, + "grad_norm": 1.0904356241226196, + "learning_rate": 1.625283572625933e-06, + "loss": 0.2563, + "step": 37153 + }, + { + "epoch": 0.7437679853864825, + "grad_norm": 1.2623555660247803, + "learning_rate": 1.6250443749327732e-06, + "loss": 0.3389, + "step": 37154 + }, + { + "epoch": 0.7437880039036109, + "grad_norm": 2.096177816390991, + "learning_rate": 1.6248051914272023e-06, + "loss": 0.7613, + "step": 37155 + }, + { + "epoch": 0.7438080224207392, + "grad_norm": 1.0763953924179077, + "learning_rate": 1.6245660221102221e-06, + "loss": 0.3158, + "step": 37156 + }, + { + "epoch": 0.7438280409378675, + "grad_norm": 1.2195433378219604, + "learning_rate": 1.624326866982842e-06, + "loss": 0.2831, + "step": 37157 + }, + { + "epoch": 0.7438480594549959, + "grad_norm": 1.2818317413330078, + "learning_rate": 1.6240877260460652e-06, + "loss": 0.3485, + "step": 37158 + }, + { + "epoch": 0.7438680779721242, + "grad_norm": 0.9861271977424622, + "learning_rate": 1.6238485993008968e-06, + "loss": 0.2648, + "step": 37159 + }, + { + "epoch": 0.7438880964892526, + "grad_norm": 1.0560617446899414, + "learning_rate": 1.623609486748342e-06, + "loss": 0.3199, + "step": 37160 + }, + { + "epoch": 0.7439081150063809, + "grad_norm": 2.1025896072387695, + "learning_rate": 1.6233703883894053e-06, + "loss": 0.7618, + "step": 37161 + }, + { + "epoch": 0.7439281335235093, + "grad_norm": 1.307624101638794, + "learning_rate": 1.6231313042250946e-06, + "loss": 0.2954, + "step": 37162 + }, + { + "epoch": 0.7439481520406376, + "grad_norm": 1.040175199508667, + "learning_rate": 1.622892234256413e-06, + "loss": 0.2642, + "step": 37163 + }, + { + "epoch": 0.743968170557766, + "grad_norm": 1.0341840982437134, + "learning_rate": 1.6226531784843651e-06, + "loss": 0.2644, + "step": 37164 + }, + { + "epoch": 0.7439881890748943, + "grad_norm": 1.0392810106277466, + "learning_rate": 1.6224141369099556e-06, + "loss": 0.2661, + "step": 37165 + }, + { + "epoch": 0.7440082075920226, + "grad_norm": 1.1064950227737427, + "learning_rate": 1.6221751095341914e-06, + "loss": 0.3175, + "step": 37166 + }, + { + "epoch": 0.744028226109151, + "grad_norm": 0.9599957466125488, + "learning_rate": 1.6219360963580744e-06, + "loss": 0.2899, + "step": 37167 + }, + { + "epoch": 0.7440482446262793, + "grad_norm": 1.0366772413253784, + "learning_rate": 1.6216970973826129e-06, + "loss": 0.2794, + "step": 37168 + }, + { + "epoch": 0.7440682631434077, + "grad_norm": 1.1677333116531372, + "learning_rate": 1.6214581126088097e-06, + "loss": 0.3473, + "step": 37169 + }, + { + "epoch": 0.744088281660536, + "grad_norm": 1.1450386047363281, + "learning_rate": 1.6212191420376682e-06, + "loss": 0.2959, + "step": 37170 + }, + { + "epoch": 0.7441083001776644, + "grad_norm": 1.0717898607254028, + "learning_rate": 1.6209801856701956e-06, + "loss": 0.2696, + "step": 37171 + }, + { + "epoch": 0.7441283186947927, + "grad_norm": 1.249698281288147, + "learning_rate": 1.6207412435073954e-06, + "loss": 0.2991, + "step": 37172 + }, + { + "epoch": 0.744148337211921, + "grad_norm": 1.2515959739685059, + "learning_rate": 1.6205023155502714e-06, + "loss": 0.3092, + "step": 37173 + }, + { + "epoch": 0.7441683557290494, + "grad_norm": 0.9686893820762634, + "learning_rate": 1.620263401799827e-06, + "loss": 0.2806, + "step": 37174 + }, + { + "epoch": 0.7441883742461777, + "grad_norm": 1.1453560590744019, + "learning_rate": 1.6200245022570693e-06, + "loss": 0.2602, + "step": 37175 + }, + { + "epoch": 0.7442083927633061, + "grad_norm": 1.8227766752243042, + "learning_rate": 1.619785616923002e-06, + "loss": 0.7063, + "step": 37176 + }, + { + "epoch": 0.7442284112804344, + "grad_norm": 1.0575133562088013, + "learning_rate": 1.6195467457986275e-06, + "loss": 0.2681, + "step": 37177 + }, + { + "epoch": 0.7442484297975628, + "grad_norm": 2.0763487815856934, + "learning_rate": 1.61930788888495e-06, + "loss": 0.7947, + "step": 37178 + }, + { + "epoch": 0.7442684483146911, + "grad_norm": 1.1428627967834473, + "learning_rate": 1.619069046182976e-06, + "loss": 0.2933, + "step": 37179 + }, + { + "epoch": 0.7442884668318195, + "grad_norm": 1.1559752225875854, + "learning_rate": 1.6188302176937065e-06, + "loss": 0.3421, + "step": 37180 + }, + { + "epoch": 0.7443084853489478, + "grad_norm": 1.0402905941009521, + "learning_rate": 1.6185914034181487e-06, + "loss": 0.2835, + "step": 37181 + }, + { + "epoch": 0.7443285038660761, + "grad_norm": 1.1734358072280884, + "learning_rate": 1.6183526033573056e-06, + "loss": 0.3401, + "step": 37182 + }, + { + "epoch": 0.7443485223832045, + "grad_norm": 1.1397329568862915, + "learning_rate": 1.618113817512178e-06, + "loss": 0.273, + "step": 37183 + }, + { + "epoch": 0.7443685409003328, + "grad_norm": 1.1626206636428833, + "learning_rate": 1.6178750458837745e-06, + "loss": 0.2697, + "step": 37184 + }, + { + "epoch": 0.7443885594174612, + "grad_norm": 1.8989461660385132, + "learning_rate": 1.6176362884730955e-06, + "loss": 0.7722, + "step": 37185 + }, + { + "epoch": 0.7444085779345895, + "grad_norm": 1.856524109840393, + "learning_rate": 1.6173975452811464e-06, + "loss": 0.804, + "step": 37186 + }, + { + "epoch": 0.7444285964517179, + "grad_norm": 1.1844862699508667, + "learning_rate": 1.617158816308928e-06, + "loss": 0.356, + "step": 37187 + }, + { + "epoch": 0.7444486149688462, + "grad_norm": 1.082929015159607, + "learning_rate": 1.6169201015574482e-06, + "loss": 0.3075, + "step": 37188 + }, + { + "epoch": 0.7444686334859745, + "grad_norm": 1.3043230772018433, + "learning_rate": 1.6166814010277076e-06, + "loss": 0.3005, + "step": 37189 + }, + { + "epoch": 0.7444886520031029, + "grad_norm": 1.9445335865020752, + "learning_rate": 1.6164427147207107e-06, + "loss": 0.7834, + "step": 37190 + }, + { + "epoch": 0.7445086705202312, + "grad_norm": 1.274085283279419, + "learning_rate": 1.6162040426374587e-06, + "loss": 0.3401, + "step": 37191 + }, + { + "epoch": 0.7445286890373596, + "grad_norm": 1.4825996160507202, + "learning_rate": 1.615965384778958e-06, + "loss": 0.3324, + "step": 37192 + }, + { + "epoch": 0.7445487075544879, + "grad_norm": 1.1120691299438477, + "learning_rate": 1.6157267411462097e-06, + "loss": 0.2757, + "step": 37193 + }, + { + "epoch": 0.7445687260716163, + "grad_norm": 1.1266090869903564, + "learning_rate": 1.6154881117402187e-06, + "loss": 0.2436, + "step": 37194 + }, + { + "epoch": 0.7445887445887446, + "grad_norm": 1.1684199571609497, + "learning_rate": 1.6152494965619875e-06, + "loss": 0.288, + "step": 37195 + }, + { + "epoch": 0.744608763105873, + "grad_norm": 1.2085332870483398, + "learning_rate": 1.6150108956125177e-06, + "loss": 0.3044, + "step": 37196 + }, + { + "epoch": 0.7446287816230013, + "grad_norm": 1.0367776155471802, + "learning_rate": 1.6147723088928152e-06, + "loss": 0.308, + "step": 37197 + }, + { + "epoch": 0.7446488001401296, + "grad_norm": 1.1768656969070435, + "learning_rate": 1.6145337364038816e-06, + "loss": 0.3111, + "step": 37198 + }, + { + "epoch": 0.744668818657258, + "grad_norm": 0.9983969926834106, + "learning_rate": 1.6142951781467193e-06, + "loss": 0.3138, + "step": 37199 + }, + { + "epoch": 0.7446888371743863, + "grad_norm": 1.0291041135787964, + "learning_rate": 1.6140566341223312e-06, + "loss": 0.2859, + "step": 37200 + }, + { + "epoch": 0.7447088556915147, + "grad_norm": 1.1357442140579224, + "learning_rate": 1.6138181043317192e-06, + "loss": 0.282, + "step": 37201 + }, + { + "epoch": 0.744728874208643, + "grad_norm": 1.1110279560089111, + "learning_rate": 1.6135795887758887e-06, + "loss": 0.2956, + "step": 37202 + }, + { + "epoch": 0.7447488927257714, + "grad_norm": 1.2566568851470947, + "learning_rate": 1.6133410874558409e-06, + "loss": 0.2903, + "step": 37203 + }, + { + "epoch": 0.7447689112428997, + "grad_norm": 1.1787580251693726, + "learning_rate": 1.6131026003725763e-06, + "loss": 0.3167, + "step": 37204 + }, + { + "epoch": 0.744788929760028, + "grad_norm": 1.1872122287750244, + "learning_rate": 1.6128641275271012e-06, + "loss": 0.2722, + "step": 37205 + }, + { + "epoch": 0.7448089482771564, + "grad_norm": 1.936766266822815, + "learning_rate": 1.612625668920415e-06, + "loss": 0.7946, + "step": 37206 + }, + { + "epoch": 0.7448289667942847, + "grad_norm": 1.2106627225875854, + "learning_rate": 1.6123872245535232e-06, + "loss": 0.3402, + "step": 37207 + }, + { + "epoch": 0.7448489853114131, + "grad_norm": 1.1008176803588867, + "learning_rate": 1.6121487944274268e-06, + "loss": 0.2915, + "step": 37208 + }, + { + "epoch": 0.7448690038285414, + "grad_norm": 1.142229437828064, + "learning_rate": 1.6119103785431272e-06, + "loss": 0.2666, + "step": 37209 + }, + { + "epoch": 0.7448890223456698, + "grad_norm": 0.9725869297981262, + "learning_rate": 1.6116719769016259e-06, + "loss": 0.2846, + "step": 37210 + }, + { + "epoch": 0.7449090408627981, + "grad_norm": 1.06899893283844, + "learning_rate": 1.611433589503928e-06, + "loss": 0.243, + "step": 37211 + }, + { + "epoch": 0.7449290593799265, + "grad_norm": 1.0274940729141235, + "learning_rate": 1.6111952163510337e-06, + "loss": 0.2658, + "step": 37212 + }, + { + "epoch": 0.7449490778970548, + "grad_norm": 1.1255756616592407, + "learning_rate": 1.610956857443946e-06, + "loss": 0.2941, + "step": 37213 + }, + { + "epoch": 0.7449690964141831, + "grad_norm": 1.1802146434783936, + "learning_rate": 1.6107185127836644e-06, + "loss": 0.3485, + "step": 37214 + }, + { + "epoch": 0.7449891149313115, + "grad_norm": 1.04517662525177, + "learning_rate": 1.6104801823711946e-06, + "loss": 0.2564, + "step": 37215 + }, + { + "epoch": 0.7450091334484398, + "grad_norm": 1.1874537467956543, + "learning_rate": 1.6102418662075365e-06, + "loss": 0.3723, + "step": 37216 + }, + { + "epoch": 0.7450291519655682, + "grad_norm": 2.0580854415893555, + "learning_rate": 1.6100035642936907e-06, + "loss": 0.8114, + "step": 37217 + }, + { + "epoch": 0.7450491704826965, + "grad_norm": 1.1323429346084595, + "learning_rate": 1.6097652766306614e-06, + "loss": 0.315, + "step": 37218 + }, + { + "epoch": 0.7450691889998249, + "grad_norm": 1.034979224205017, + "learning_rate": 1.6095270032194483e-06, + "loss": 0.308, + "step": 37219 + }, + { + "epoch": 0.7450892075169532, + "grad_norm": 1.0742299556732178, + "learning_rate": 1.6092887440610554e-06, + "loss": 0.2957, + "step": 37220 + }, + { + "epoch": 0.7451092260340815, + "grad_norm": 1.0571935176849365, + "learning_rate": 1.609050499156483e-06, + "loss": 0.3187, + "step": 37221 + }, + { + "epoch": 0.7451292445512099, + "grad_norm": 1.139410376548767, + "learning_rate": 1.6088122685067326e-06, + "loss": 0.3689, + "step": 37222 + }, + { + "epoch": 0.7451492630683382, + "grad_norm": 1.2690279483795166, + "learning_rate": 1.608574052112804e-06, + "loss": 0.3118, + "step": 37223 + }, + { + "epoch": 0.7451692815854666, + "grad_norm": 1.1934895515441895, + "learning_rate": 1.6083358499757017e-06, + "loss": 0.3066, + "step": 37224 + }, + { + "epoch": 0.7451893001025949, + "grad_norm": 1.2462488412857056, + "learning_rate": 1.6080976620964261e-06, + "loss": 0.3299, + "step": 37225 + }, + { + "epoch": 0.7452093186197233, + "grad_norm": 1.0903916358947754, + "learning_rate": 1.6078594884759773e-06, + "loss": 0.3219, + "step": 37226 + }, + { + "epoch": 0.7452293371368516, + "grad_norm": 1.062646746635437, + "learning_rate": 1.6076213291153564e-06, + "loss": 0.255, + "step": 37227 + }, + { + "epoch": 0.74524935565398, + "grad_norm": 1.0690582990646362, + "learning_rate": 1.6073831840155662e-06, + "loss": 0.2694, + "step": 37228 + }, + { + "epoch": 0.7452693741711083, + "grad_norm": 1.1304017305374146, + "learning_rate": 1.6071450531776079e-06, + "loss": 0.3092, + "step": 37229 + }, + { + "epoch": 0.7452893926882366, + "grad_norm": 1.2437044382095337, + "learning_rate": 1.6069069366024793e-06, + "loss": 0.3324, + "step": 37230 + }, + { + "epoch": 0.745309411205365, + "grad_norm": 1.169856309890747, + "learning_rate": 1.6066688342911852e-06, + "loss": 0.2857, + "step": 37231 + }, + { + "epoch": 0.7453294297224933, + "grad_norm": 1.855178952217102, + "learning_rate": 1.6064307462447238e-06, + "loss": 0.8103, + "step": 37232 + }, + { + "epoch": 0.7453494482396217, + "grad_norm": 1.2056350708007812, + "learning_rate": 1.6061926724640992e-06, + "loss": 0.2798, + "step": 37233 + }, + { + "epoch": 0.74536946675675, + "grad_norm": 1.0864214897155762, + "learning_rate": 1.6059546129503096e-06, + "loss": 0.3199, + "step": 37234 + }, + { + "epoch": 0.7453894852738784, + "grad_norm": 1.0784834623336792, + "learning_rate": 1.6057165677043568e-06, + "loss": 0.2806, + "step": 37235 + }, + { + "epoch": 0.7454095037910067, + "grad_norm": 1.0856926441192627, + "learning_rate": 1.6054785367272392e-06, + "loss": 0.2867, + "step": 37236 + }, + { + "epoch": 0.745429522308135, + "grad_norm": 1.0174740552902222, + "learning_rate": 1.605240520019961e-06, + "loss": 0.3213, + "step": 37237 + }, + { + "epoch": 0.7454495408252634, + "grad_norm": 1.2370816469192505, + "learning_rate": 1.6050025175835216e-06, + "loss": 0.3052, + "step": 37238 + }, + { + "epoch": 0.7454695593423917, + "grad_norm": 1.1309994459152222, + "learning_rate": 1.60476452941892e-06, + "loss": 0.289, + "step": 37239 + }, + { + "epoch": 0.7454895778595201, + "grad_norm": 1.1978615522384644, + "learning_rate": 1.6045265555271583e-06, + "loss": 0.3148, + "step": 37240 + }, + { + "epoch": 0.7455095963766484, + "grad_norm": 1.1589864492416382, + "learning_rate": 1.6042885959092347e-06, + "loss": 0.3052, + "step": 37241 + }, + { + "epoch": 0.7455296148937768, + "grad_norm": 0.9865028262138367, + "learning_rate": 1.6040506505661523e-06, + "loss": 0.2392, + "step": 37242 + }, + { + "epoch": 0.7455496334109051, + "grad_norm": 1.215523362159729, + "learning_rate": 1.6038127194989088e-06, + "loss": 0.3685, + "step": 37243 + }, + { + "epoch": 0.7455696519280335, + "grad_norm": 1.9538549184799194, + "learning_rate": 1.603574802708508e-06, + "loss": 0.766, + "step": 37244 + }, + { + "epoch": 0.7455896704451618, + "grad_norm": 1.0611987113952637, + "learning_rate": 1.6033369001959454e-06, + "loss": 0.2983, + "step": 37245 + }, + { + "epoch": 0.7456096889622901, + "grad_norm": 1.2180681228637695, + "learning_rate": 1.6030990119622253e-06, + "loss": 0.3626, + "step": 37246 + }, + { + "epoch": 0.7456297074794185, + "grad_norm": 1.8292089700698853, + "learning_rate": 1.6028611380083458e-06, + "loss": 0.7902, + "step": 37247 + }, + { + "epoch": 0.7456497259965468, + "grad_norm": 1.108143925666809, + "learning_rate": 1.6026232783353067e-06, + "loss": 0.3055, + "step": 37248 + }, + { + "epoch": 0.7456697445136752, + "grad_norm": 1.1793205738067627, + "learning_rate": 1.6023854329441074e-06, + "loss": 0.3248, + "step": 37249 + }, + { + "epoch": 0.7456897630308035, + "grad_norm": 1.9785757064819336, + "learning_rate": 1.6021476018357495e-06, + "loss": 0.7697, + "step": 37250 + }, + { + "epoch": 0.7457097815479319, + "grad_norm": 1.0966650247573853, + "learning_rate": 1.6019097850112326e-06, + "loss": 0.3231, + "step": 37251 + }, + { + "epoch": 0.7457298000650602, + "grad_norm": 1.9151146411895752, + "learning_rate": 1.601671982471555e-06, + "loss": 0.681, + "step": 37252 + }, + { + "epoch": 0.7457498185821885, + "grad_norm": 1.0286905765533447, + "learning_rate": 1.6014341942177175e-06, + "loss": 0.272, + "step": 37253 + }, + { + "epoch": 0.7457698370993169, + "grad_norm": 1.0398461818695068, + "learning_rate": 1.6011964202507175e-06, + "loss": 0.3057, + "step": 37254 + }, + { + "epoch": 0.7457898556164452, + "grad_norm": 1.9444530010223389, + "learning_rate": 1.6009586605715577e-06, + "loss": 0.7298, + "step": 37255 + }, + { + "epoch": 0.7458098741335736, + "grad_norm": 1.1056544780731201, + "learning_rate": 1.6007209151812347e-06, + "loss": 0.3098, + "step": 37256 + }, + { + "epoch": 0.7458298926507019, + "grad_norm": 1.1173521280288696, + "learning_rate": 1.600483184080751e-06, + "loss": 0.3172, + "step": 37257 + }, + { + "epoch": 0.7458499111678303, + "grad_norm": 1.1433807611465454, + "learning_rate": 1.6002454672711033e-06, + "loss": 0.2611, + "step": 37258 + }, + { + "epoch": 0.7458699296849586, + "grad_norm": 1.0512412786483765, + "learning_rate": 1.600007764753293e-06, + "loss": 0.366, + "step": 37259 + }, + { + "epoch": 0.745889948202087, + "grad_norm": 1.0600886344909668, + "learning_rate": 1.5997700765283186e-06, + "loss": 0.3151, + "step": 37260 + }, + { + "epoch": 0.7459099667192153, + "grad_norm": 1.1151041984558105, + "learning_rate": 1.5995324025971793e-06, + "loss": 0.3517, + "step": 37261 + }, + { + "epoch": 0.7459299852363436, + "grad_norm": 1.2644810676574707, + "learning_rate": 1.599294742960874e-06, + "loss": 0.3707, + "step": 37262 + }, + { + "epoch": 0.745950003753472, + "grad_norm": 1.060060977935791, + "learning_rate": 1.5990570976203995e-06, + "loss": 0.3343, + "step": 37263 + }, + { + "epoch": 0.7459700222706003, + "grad_norm": 1.0388132333755493, + "learning_rate": 1.5988194665767593e-06, + "loss": 0.3284, + "step": 37264 + }, + { + "epoch": 0.7459900407877287, + "grad_norm": 1.1104769706726074, + "learning_rate": 1.5985818498309503e-06, + "loss": 0.3003, + "step": 37265 + }, + { + "epoch": 0.746010059304857, + "grad_norm": 1.0244075059890747, + "learning_rate": 1.5983442473839705e-06, + "loss": 0.2774, + "step": 37266 + }, + { + "epoch": 0.7460300778219854, + "grad_norm": 1.0773284435272217, + "learning_rate": 1.5981066592368182e-06, + "loss": 0.2975, + "step": 37267 + }, + { + "epoch": 0.7460500963391137, + "grad_norm": 1.0705485343933105, + "learning_rate": 1.5978690853904949e-06, + "loss": 0.2526, + "step": 37268 + }, + { + "epoch": 0.746070114856242, + "grad_norm": 1.1662836074829102, + "learning_rate": 1.5976315258459963e-06, + "loss": 0.3567, + "step": 37269 + }, + { + "epoch": 0.7460901333733704, + "grad_norm": 1.0831434726715088, + "learning_rate": 1.5973939806043243e-06, + "loss": 0.2567, + "step": 37270 + }, + { + "epoch": 0.7461101518904987, + "grad_norm": 1.1021705865859985, + "learning_rate": 1.5971564496664737e-06, + "loss": 0.27, + "step": 37271 + }, + { + "epoch": 0.7461301704076271, + "grad_norm": 1.0751696825027466, + "learning_rate": 1.5969189330334473e-06, + "loss": 0.3281, + "step": 37272 + }, + { + "epoch": 0.7461501889247554, + "grad_norm": 1.1550484895706177, + "learning_rate": 1.5966814307062411e-06, + "loss": 0.2788, + "step": 37273 + }, + { + "epoch": 0.7461702074418838, + "grad_norm": 1.2811247110366821, + "learning_rate": 1.5964439426858536e-06, + "loss": 0.3096, + "step": 37274 + }, + { + "epoch": 0.7461902259590121, + "grad_norm": 1.08773672580719, + "learning_rate": 1.5962064689732837e-06, + "loss": 0.2922, + "step": 37275 + }, + { + "epoch": 0.7462102444761405, + "grad_norm": 1.0606309175491333, + "learning_rate": 1.5959690095695273e-06, + "loss": 0.3204, + "step": 37276 + }, + { + "epoch": 0.7462302629932688, + "grad_norm": 1.1456866264343262, + "learning_rate": 1.5957315644755867e-06, + "loss": 0.2319, + "step": 37277 + }, + { + "epoch": 0.7462502815103971, + "grad_norm": 1.0810784101486206, + "learning_rate": 1.5954941336924573e-06, + "loss": 0.2674, + "step": 37278 + }, + { + "epoch": 0.7462703000275255, + "grad_norm": 1.112088918685913, + "learning_rate": 1.5952567172211387e-06, + "loss": 0.261, + "step": 37279 + }, + { + "epoch": 0.7462903185446538, + "grad_norm": 1.1600852012634277, + "learning_rate": 1.5950193150626259e-06, + "loss": 0.3399, + "step": 37280 + }, + { + "epoch": 0.7463103370617822, + "grad_norm": 1.9158111810684204, + "learning_rate": 1.594781927217921e-06, + "loss": 0.811, + "step": 37281 + }, + { + "epoch": 0.7463303555789105, + "grad_norm": 1.0337355136871338, + "learning_rate": 1.5945445536880183e-06, + "loss": 0.2676, + "step": 37282 + }, + { + "epoch": 0.7463503740960389, + "grad_norm": 1.2331558465957642, + "learning_rate": 1.5943071944739191e-06, + "loss": 0.2854, + "step": 37283 + }, + { + "epoch": 0.7463703926131672, + "grad_norm": 1.1373587846755981, + "learning_rate": 1.59406984957662e-06, + "loss": 0.3263, + "step": 37284 + }, + { + "epoch": 0.7463904111302955, + "grad_norm": 1.2327189445495605, + "learning_rate": 1.593832518997116e-06, + "loss": 0.3189, + "step": 37285 + }, + { + "epoch": 0.7464104296474239, + "grad_norm": 1.0886415243148804, + "learning_rate": 1.5935952027364088e-06, + "loss": 0.2509, + "step": 37286 + }, + { + "epoch": 0.7464304481645522, + "grad_norm": 1.1403337717056274, + "learning_rate": 1.5933579007954946e-06, + "loss": 0.2641, + "step": 37287 + }, + { + "epoch": 0.7464504666816806, + "grad_norm": 1.1290065050125122, + "learning_rate": 1.5931206131753707e-06, + "loss": 0.3093, + "step": 37288 + }, + { + "epoch": 0.7464704851988089, + "grad_norm": 2.00691819190979, + "learning_rate": 1.5928833398770328e-06, + "loss": 0.7749, + "step": 37289 + }, + { + "epoch": 0.7464905037159373, + "grad_norm": 1.0567597150802612, + "learning_rate": 1.5926460809014816e-06, + "loss": 0.2884, + "step": 37290 + }, + { + "epoch": 0.7465105222330656, + "grad_norm": 1.0320557355880737, + "learning_rate": 1.592408836249713e-06, + "loss": 0.3084, + "step": 37291 + }, + { + "epoch": 0.746530540750194, + "grad_norm": 1.2529728412628174, + "learning_rate": 1.5921716059227249e-06, + "loss": 0.2976, + "step": 37292 + }, + { + "epoch": 0.7465505592673223, + "grad_norm": 1.260069489479065, + "learning_rate": 1.5919343899215118e-06, + "loss": 0.3324, + "step": 37293 + }, + { + "epoch": 0.7465705777844506, + "grad_norm": 1.2064751386642456, + "learning_rate": 1.591697188247075e-06, + "loss": 0.3553, + "step": 37294 + }, + { + "epoch": 0.746590596301579, + "grad_norm": 1.1548449993133545, + "learning_rate": 1.5914600009004078e-06, + "loss": 0.2355, + "step": 37295 + }, + { + "epoch": 0.7466106148187073, + "grad_norm": 1.0951077938079834, + "learning_rate": 1.591222827882511e-06, + "loss": 0.2534, + "step": 37296 + }, + { + "epoch": 0.7466306333358357, + "grad_norm": 1.1665722131729126, + "learning_rate": 1.5909856691943798e-06, + "loss": 0.2994, + "step": 37297 + }, + { + "epoch": 0.746650651852964, + "grad_norm": 1.014581561088562, + "learning_rate": 1.5907485248370097e-06, + "loss": 0.2845, + "step": 37298 + }, + { + "epoch": 0.7466706703700924, + "grad_norm": 0.9945443868637085, + "learning_rate": 1.590511394811401e-06, + "loss": 0.2767, + "step": 37299 + }, + { + "epoch": 0.7466906888872207, + "grad_norm": 1.1023919582366943, + "learning_rate": 1.5902742791185482e-06, + "loss": 0.2945, + "step": 37300 + }, + { + "epoch": 0.746710707404349, + "grad_norm": 0.972027063369751, + "learning_rate": 1.5900371777594486e-06, + "loss": 0.2539, + "step": 37301 + }, + { + "epoch": 0.7467307259214774, + "grad_norm": 1.2147141695022583, + "learning_rate": 1.5898000907350975e-06, + "loss": 0.3087, + "step": 37302 + }, + { + "epoch": 0.7467507444386057, + "grad_norm": 1.0287353992462158, + "learning_rate": 1.589563018046495e-06, + "loss": 0.3055, + "step": 37303 + }, + { + "epoch": 0.7467707629557341, + "grad_norm": 1.177328109741211, + "learning_rate": 1.5893259596946348e-06, + "loss": 0.2623, + "step": 37304 + }, + { + "epoch": 0.7467907814728624, + "grad_norm": 1.1343717575073242, + "learning_rate": 1.5890889156805144e-06, + "loss": 0.2602, + "step": 37305 + }, + { + "epoch": 0.7468107999899908, + "grad_norm": 1.4981971979141235, + "learning_rate": 1.5888518860051289e-06, + "loss": 0.3092, + "step": 37306 + }, + { + "epoch": 0.7468308185071191, + "grad_norm": 1.0670291185379028, + "learning_rate": 1.5886148706694775e-06, + "loss": 0.3168, + "step": 37307 + }, + { + "epoch": 0.7468508370242475, + "grad_norm": 1.042154312133789, + "learning_rate": 1.5883778696745533e-06, + "loss": 0.3537, + "step": 37308 + }, + { + "epoch": 0.7468708555413758, + "grad_norm": 1.3564549684524536, + "learning_rate": 1.5881408830213563e-06, + "loss": 0.2961, + "step": 37309 + }, + { + "epoch": 0.7468908740585041, + "grad_norm": 1.090742826461792, + "learning_rate": 1.58790391071088e-06, + "loss": 0.2775, + "step": 37310 + }, + { + "epoch": 0.7469108925756325, + "grad_norm": 1.102997064590454, + "learning_rate": 1.5876669527441208e-06, + "loss": 0.3393, + "step": 37311 + }, + { + "epoch": 0.7469309110927608, + "grad_norm": 1.150599718093872, + "learning_rate": 1.5874300091220762e-06, + "loss": 0.2944, + "step": 37312 + }, + { + "epoch": 0.7469509296098892, + "grad_norm": 1.0723766088485718, + "learning_rate": 1.5871930798457419e-06, + "loss": 0.2989, + "step": 37313 + }, + { + "epoch": 0.7469709481270175, + "grad_norm": 1.1977760791778564, + "learning_rate": 1.586956164916113e-06, + "loss": 0.3545, + "step": 37314 + }, + { + "epoch": 0.7469909666441459, + "grad_norm": 1.0904781818389893, + "learning_rate": 1.586719264334186e-06, + "loss": 0.2746, + "step": 37315 + }, + { + "epoch": 0.7470109851612742, + "grad_norm": 1.0775644779205322, + "learning_rate": 1.5864823781009553e-06, + "loss": 0.3567, + "step": 37316 + }, + { + "epoch": 0.7470310036784025, + "grad_norm": 0.9631309509277344, + "learning_rate": 1.5862455062174199e-06, + "loss": 0.3048, + "step": 37317 + }, + { + "epoch": 0.7470510221955309, + "grad_norm": 1.106099009513855, + "learning_rate": 1.5860086486845733e-06, + "loss": 0.2781, + "step": 37318 + }, + { + "epoch": 0.7470710407126592, + "grad_norm": 1.1586763858795166, + "learning_rate": 1.58577180550341e-06, + "loss": 0.3181, + "step": 37319 + }, + { + "epoch": 0.7470910592297876, + "grad_norm": 1.0988988876342773, + "learning_rate": 1.5855349766749295e-06, + "loss": 0.3354, + "step": 37320 + }, + { + "epoch": 0.7471110777469159, + "grad_norm": 1.0899031162261963, + "learning_rate": 1.585298162200123e-06, + "loss": 0.3137, + "step": 37321 + }, + { + "epoch": 0.7471310962640443, + "grad_norm": 1.478920578956604, + "learning_rate": 1.5850613620799899e-06, + "loss": 0.3185, + "step": 37322 + }, + { + "epoch": 0.7471511147811726, + "grad_norm": 1.1025053262710571, + "learning_rate": 1.584824576315524e-06, + "loss": 0.3155, + "step": 37323 + }, + { + "epoch": 0.747171133298301, + "grad_norm": 1.0454708337783813, + "learning_rate": 1.5845878049077202e-06, + "loss": 0.257, + "step": 37324 + }, + { + "epoch": 0.7471911518154293, + "grad_norm": 1.993528962135315, + "learning_rate": 1.584351047857573e-06, + "loss": 0.7528, + "step": 37325 + }, + { + "epoch": 0.7472111703325576, + "grad_norm": 1.2081629037857056, + "learning_rate": 1.5841143051660806e-06, + "loss": 0.3214, + "step": 37326 + }, + { + "epoch": 0.747231188849686, + "grad_norm": 1.045193076133728, + "learning_rate": 1.5838775768342368e-06, + "loss": 0.2982, + "step": 37327 + }, + { + "epoch": 0.7472512073668143, + "grad_norm": 1.861026644706726, + "learning_rate": 1.5836408628630361e-06, + "loss": 0.7527, + "step": 37328 + }, + { + "epoch": 0.7472712258839427, + "grad_norm": 1.2335991859436035, + "learning_rate": 1.5834041632534724e-06, + "loss": 0.3428, + "step": 37329 + }, + { + "epoch": 0.747291244401071, + "grad_norm": 1.078144907951355, + "learning_rate": 1.5831674780065443e-06, + "loss": 0.3036, + "step": 37330 + }, + { + "epoch": 0.7473112629181994, + "grad_norm": 1.1454071998596191, + "learning_rate": 1.5829308071232447e-06, + "loss": 0.2984, + "step": 37331 + }, + { + "epoch": 0.7473312814353277, + "grad_norm": 1.1138919591903687, + "learning_rate": 1.5826941506045669e-06, + "loss": 0.2721, + "step": 37332 + }, + { + "epoch": 0.747351299952456, + "grad_norm": 1.0995994806289673, + "learning_rate": 1.5824575084515087e-06, + "loss": 0.3053, + "step": 37333 + }, + { + "epoch": 0.7473713184695844, + "grad_norm": 1.1852961778640747, + "learning_rate": 1.5822208806650623e-06, + "loss": 0.2704, + "step": 37334 + }, + { + "epoch": 0.7473913369867127, + "grad_norm": 1.9858771562576294, + "learning_rate": 1.581984267246226e-06, + "loss": 0.7465, + "step": 37335 + }, + { + "epoch": 0.7474113555038411, + "grad_norm": 1.0634206533432007, + "learning_rate": 1.5817476681959915e-06, + "loss": 0.2993, + "step": 37336 + }, + { + "epoch": 0.7474313740209694, + "grad_norm": 1.052258014678955, + "learning_rate": 1.5815110835153546e-06, + "loss": 0.3196, + "step": 37337 + }, + { + "epoch": 0.7474513925380978, + "grad_norm": 1.0490925312042236, + "learning_rate": 1.5812745132053076e-06, + "loss": 0.2918, + "step": 37338 + }, + { + "epoch": 0.7474714110552261, + "grad_norm": 1.2516522407531738, + "learning_rate": 1.5810379572668488e-06, + "loss": 0.294, + "step": 37339 + }, + { + "epoch": 0.7474914295723545, + "grad_norm": 1.9321926832199097, + "learning_rate": 1.5808014157009705e-06, + "loss": 0.7544, + "step": 37340 + }, + { + "epoch": 0.7475114480894828, + "grad_norm": 1.3714059591293335, + "learning_rate": 1.5805648885086673e-06, + "loss": 0.3318, + "step": 37341 + }, + { + "epoch": 0.7475314666066111, + "grad_norm": 1.0625592470169067, + "learning_rate": 1.5803283756909316e-06, + "loss": 0.2846, + "step": 37342 + }, + { + "epoch": 0.7475514851237395, + "grad_norm": 1.3128901720046997, + "learning_rate": 1.580091877248761e-06, + "loss": 0.2581, + "step": 37343 + }, + { + "epoch": 0.7475715036408678, + "grad_norm": 0.9774867296218872, + "learning_rate": 1.5798553931831484e-06, + "loss": 0.2405, + "step": 37344 + }, + { + "epoch": 0.7475915221579962, + "grad_norm": 1.045690894126892, + "learning_rate": 1.579618923495086e-06, + "loss": 0.2661, + "step": 37345 + }, + { + "epoch": 0.7476115406751245, + "grad_norm": 1.0681917667388916, + "learning_rate": 1.5793824681855714e-06, + "loss": 0.2899, + "step": 37346 + }, + { + "epoch": 0.7476315591922529, + "grad_norm": 1.7967021465301514, + "learning_rate": 1.5791460272555952e-06, + "loss": 0.7456, + "step": 37347 + }, + { + "epoch": 0.7476515777093812, + "grad_norm": 1.065718173980713, + "learning_rate": 1.5789096007061543e-06, + "loss": 0.2723, + "step": 37348 + }, + { + "epoch": 0.7476715962265095, + "grad_norm": 1.0941110849380493, + "learning_rate": 1.5786731885382411e-06, + "loss": 0.299, + "step": 37349 + }, + { + "epoch": 0.7476916147436379, + "grad_norm": 1.2281134128570557, + "learning_rate": 1.5784367907528497e-06, + "loss": 0.3153, + "step": 37350 + }, + { + "epoch": 0.7477116332607662, + "grad_norm": 1.1915357112884521, + "learning_rate": 1.5782004073509722e-06, + "loss": 0.3183, + "step": 37351 + }, + { + "epoch": 0.7477316517778946, + "grad_norm": 1.0444319248199463, + "learning_rate": 1.5779640383336054e-06, + "loss": 0.2356, + "step": 37352 + }, + { + "epoch": 0.7477516702950229, + "grad_norm": 1.0980889797210693, + "learning_rate": 1.5777276837017413e-06, + "loss": 0.2853, + "step": 37353 + }, + { + "epoch": 0.7477716888121513, + "grad_norm": 1.1467530727386475, + "learning_rate": 1.5774913434563733e-06, + "loss": 0.2763, + "step": 37354 + }, + { + "epoch": 0.7477917073292796, + "grad_norm": 1.1697105169296265, + "learning_rate": 1.5772550175984952e-06, + "loss": 0.2778, + "step": 37355 + }, + { + "epoch": 0.747811725846408, + "grad_norm": 1.0425260066986084, + "learning_rate": 1.5770187061290992e-06, + "loss": 0.249, + "step": 37356 + }, + { + "epoch": 0.7478317443635363, + "grad_norm": 1.257542371749878, + "learning_rate": 1.5767824090491812e-06, + "loss": 0.3022, + "step": 37357 + }, + { + "epoch": 0.7478517628806646, + "grad_norm": 1.0082905292510986, + "learning_rate": 1.576546126359732e-06, + "loss": 0.3099, + "step": 37358 + }, + { + "epoch": 0.747871781397793, + "grad_norm": 1.1148349046707153, + "learning_rate": 1.5763098580617475e-06, + "loss": 0.2298, + "step": 37359 + }, + { + "epoch": 0.7478917999149213, + "grad_norm": 1.2207694053649902, + "learning_rate": 1.576073604156218e-06, + "loss": 0.2839, + "step": 37360 + }, + { + "epoch": 0.7479118184320497, + "grad_norm": 1.2571643590927124, + "learning_rate": 1.57583736464414e-06, + "loss": 0.3444, + "step": 37361 + }, + { + "epoch": 0.747931836949178, + "grad_norm": 1.0949288606643677, + "learning_rate": 1.5756011395265042e-06, + "loss": 0.2973, + "step": 37362 + }, + { + "epoch": 0.7479518554663064, + "grad_norm": 1.0831512212753296, + "learning_rate": 1.5753649288043048e-06, + "loss": 0.321, + "step": 37363 + }, + { + "epoch": 0.7479718739834347, + "grad_norm": 1.1586097478866577, + "learning_rate": 1.5751287324785325e-06, + "loss": 0.3083, + "step": 37364 + }, + { + "epoch": 0.747991892500563, + "grad_norm": 1.1758520603179932, + "learning_rate": 1.574892550550184e-06, + "loss": 0.281, + "step": 37365 + }, + { + "epoch": 0.7480119110176914, + "grad_norm": 2.0004751682281494, + "learning_rate": 1.5746563830202494e-06, + "loss": 0.7383, + "step": 37366 + }, + { + "epoch": 0.7480319295348197, + "grad_norm": 0.9968757033348083, + "learning_rate": 1.5744202298897227e-06, + "loss": 0.2497, + "step": 37367 + }, + { + "epoch": 0.7480519480519481, + "grad_norm": 1.0815118551254272, + "learning_rate": 1.5741840911595964e-06, + "loss": 0.3225, + "step": 37368 + }, + { + "epoch": 0.7480719665690764, + "grad_norm": 1.130168080329895, + "learning_rate": 1.573947966830861e-06, + "loss": 0.3291, + "step": 37369 + }, + { + "epoch": 0.7480919850862048, + "grad_norm": 1.0776580572128296, + "learning_rate": 1.5737118569045123e-06, + "loss": 0.2861, + "step": 37370 + }, + { + "epoch": 0.7481120036033331, + "grad_norm": 1.172047734260559, + "learning_rate": 1.5734757613815404e-06, + "loss": 0.3228, + "step": 37371 + }, + { + "epoch": 0.7481320221204615, + "grad_norm": 1.11337411403656, + "learning_rate": 1.5732396802629408e-06, + "loss": 0.2802, + "step": 37372 + }, + { + "epoch": 0.7481520406375898, + "grad_norm": 1.0714787244796753, + "learning_rate": 1.5730036135497022e-06, + "loss": 0.3231, + "step": 37373 + }, + { + "epoch": 0.7481720591547181, + "grad_norm": 1.1908149719238281, + "learning_rate": 1.5727675612428206e-06, + "loss": 0.3026, + "step": 37374 + }, + { + "epoch": 0.7481920776718465, + "grad_norm": 1.125404715538025, + "learning_rate": 1.5725315233432864e-06, + "loss": 0.2761, + "step": 37375 + }, + { + "epoch": 0.7482120961889748, + "grad_norm": 1.0338726043701172, + "learning_rate": 1.5722954998520923e-06, + "loss": 0.2973, + "step": 37376 + }, + { + "epoch": 0.7482321147061032, + "grad_norm": 1.3968322277069092, + "learning_rate": 1.5720594907702303e-06, + "loss": 0.3277, + "step": 37377 + }, + { + "epoch": 0.7482521332232315, + "grad_norm": 1.1578625440597534, + "learning_rate": 1.571823496098691e-06, + "loss": 0.3472, + "step": 37378 + }, + { + "epoch": 0.7482721517403599, + "grad_norm": 1.101725459098816, + "learning_rate": 1.5715875158384697e-06, + "loss": 0.2892, + "step": 37379 + }, + { + "epoch": 0.7482921702574882, + "grad_norm": 1.1175146102905273, + "learning_rate": 1.5713515499905563e-06, + "loss": 0.281, + "step": 37380 + }, + { + "epoch": 0.7483121887746165, + "grad_norm": 1.1658480167388916, + "learning_rate": 1.5711155985559434e-06, + "loss": 0.3082, + "step": 37381 + }, + { + "epoch": 0.7483322072917449, + "grad_norm": 1.3845871686935425, + "learning_rate": 1.570879661535621e-06, + "loss": 0.2516, + "step": 37382 + }, + { + "epoch": 0.7483522258088732, + "grad_norm": 1.1506634950637817, + "learning_rate": 1.5706437389305833e-06, + "loss": 0.2857, + "step": 37383 + }, + { + "epoch": 0.7483722443260016, + "grad_norm": 1.0289853811264038, + "learning_rate": 1.5704078307418207e-06, + "loss": 0.2823, + "step": 37384 + }, + { + "epoch": 0.7483922628431299, + "grad_norm": 1.014540195465088, + "learning_rate": 1.5701719369703271e-06, + "loss": 0.2628, + "step": 37385 + }, + { + "epoch": 0.7484122813602583, + "grad_norm": 1.3072487115859985, + "learning_rate": 1.5699360576170925e-06, + "loss": 0.301, + "step": 37386 + }, + { + "epoch": 0.7484322998773866, + "grad_norm": 1.1318825483322144, + "learning_rate": 1.5697001926831063e-06, + "loss": 0.3325, + "step": 37387 + }, + { + "epoch": 0.748452318394515, + "grad_norm": 1.158854603767395, + "learning_rate": 1.5694643421693645e-06, + "loss": 0.3029, + "step": 37388 + }, + { + "epoch": 0.7484723369116433, + "grad_norm": 1.0854519605636597, + "learning_rate": 1.5692285060768564e-06, + "loss": 0.2477, + "step": 37389 + }, + { + "epoch": 0.7484923554287716, + "grad_norm": 1.081751823425293, + "learning_rate": 1.568992684406573e-06, + "loss": 0.3033, + "step": 37390 + }, + { + "epoch": 0.7485123739459, + "grad_norm": 1.1186330318450928, + "learning_rate": 1.5687568771595046e-06, + "loss": 0.3061, + "step": 37391 + }, + { + "epoch": 0.7485323924630283, + "grad_norm": 1.2219067811965942, + "learning_rate": 1.5685210843366455e-06, + "loss": 0.2925, + "step": 37392 + }, + { + "epoch": 0.7485524109801567, + "grad_norm": 1.161577820777893, + "learning_rate": 1.5682853059389852e-06, + "loss": 0.3705, + "step": 37393 + }, + { + "epoch": 0.748572429497285, + "grad_norm": 1.162667155265808, + "learning_rate": 1.5680495419675152e-06, + "loss": 0.3039, + "step": 37394 + }, + { + "epoch": 0.7485924480144134, + "grad_norm": 1.1621181964874268, + "learning_rate": 1.5678137924232246e-06, + "loss": 0.3673, + "step": 37395 + }, + { + "epoch": 0.7486124665315417, + "grad_norm": 1.2913494110107422, + "learning_rate": 1.5675780573071076e-06, + "loss": 0.273, + "step": 37396 + }, + { + "epoch": 0.74863248504867, + "grad_norm": 1.1908795833587646, + "learning_rate": 1.5673423366201523e-06, + "loss": 0.2857, + "step": 37397 + }, + { + "epoch": 0.7486525035657984, + "grad_norm": 1.1414724588394165, + "learning_rate": 1.5671066303633525e-06, + "loss": 0.2561, + "step": 37398 + }, + { + "epoch": 0.7486725220829267, + "grad_norm": 1.067987322807312, + "learning_rate": 1.5668709385376978e-06, + "loss": 0.2802, + "step": 37399 + }, + { + "epoch": 0.7486925406000551, + "grad_norm": 1.1180152893066406, + "learning_rate": 1.5666352611441776e-06, + "loss": 0.3139, + "step": 37400 + }, + { + "epoch": 0.7487125591171834, + "grad_norm": 1.1856284141540527, + "learning_rate": 1.5663995981837854e-06, + "loss": 0.3231, + "step": 37401 + }, + { + "epoch": 0.7487325776343118, + "grad_norm": 1.1245224475860596, + "learning_rate": 1.5661639496575103e-06, + "loss": 0.2762, + "step": 37402 + }, + { + "epoch": 0.7487525961514401, + "grad_norm": 1.2003180980682373, + "learning_rate": 1.565928315566343e-06, + "loss": 0.3393, + "step": 37403 + }, + { + "epoch": 0.7487726146685684, + "grad_norm": 1.13905930519104, + "learning_rate": 1.5656926959112723e-06, + "loss": 0.2625, + "step": 37404 + }, + { + "epoch": 0.7487926331856968, + "grad_norm": 1.148803949356079, + "learning_rate": 1.565457090693292e-06, + "loss": 0.2748, + "step": 37405 + }, + { + "epoch": 0.7488126517028251, + "grad_norm": 1.037357211112976, + "learning_rate": 1.5652214999133908e-06, + "loss": 0.321, + "step": 37406 + }, + { + "epoch": 0.7488326702199535, + "grad_norm": 1.180271863937378, + "learning_rate": 1.5649859235725595e-06, + "loss": 0.3034, + "step": 37407 + }, + { + "epoch": 0.7488526887370818, + "grad_norm": 1.0662332773208618, + "learning_rate": 1.5647503616717885e-06, + "loss": 0.2941, + "step": 37408 + }, + { + "epoch": 0.7488727072542102, + "grad_norm": 1.1430420875549316, + "learning_rate": 1.5645148142120653e-06, + "loss": 0.283, + "step": 37409 + }, + { + "epoch": 0.7488927257713385, + "grad_norm": 1.164839506149292, + "learning_rate": 1.564279281194383e-06, + "loss": 0.2619, + "step": 37410 + }, + { + "epoch": 0.7489127442884669, + "grad_norm": 0.9969694018363953, + "learning_rate": 1.564043762619733e-06, + "loss": 0.2584, + "step": 37411 + }, + { + "epoch": 0.7489327628055952, + "grad_norm": 1.1000010967254639, + "learning_rate": 1.5638082584891034e-06, + "loss": 0.2811, + "step": 37412 + }, + { + "epoch": 0.7489527813227235, + "grad_norm": 1.1456767320632935, + "learning_rate": 1.563572768803483e-06, + "loss": 0.3291, + "step": 37413 + }, + { + "epoch": 0.7489727998398519, + "grad_norm": 1.1727900505065918, + "learning_rate": 1.5633372935638641e-06, + "loss": 0.2569, + "step": 37414 + }, + { + "epoch": 0.7489928183569802, + "grad_norm": 1.1717233657836914, + "learning_rate": 1.563101832771236e-06, + "loss": 0.2948, + "step": 37415 + }, + { + "epoch": 0.7490128368741086, + "grad_norm": 1.946705937385559, + "learning_rate": 1.5628663864265886e-06, + "loss": 0.7208, + "step": 37416 + }, + { + "epoch": 0.7490328553912369, + "grad_norm": 1.3332103490829468, + "learning_rate": 1.5626309545309094e-06, + "loss": 0.3157, + "step": 37417 + }, + { + "epoch": 0.7490528739083653, + "grad_norm": 1.1812247037887573, + "learning_rate": 1.5623955370851913e-06, + "loss": 0.297, + "step": 37418 + }, + { + "epoch": 0.7490728924254936, + "grad_norm": 1.0880422592163086, + "learning_rate": 1.5621601340904225e-06, + "loss": 0.3019, + "step": 37419 + }, + { + "epoch": 0.7490929109426219, + "grad_norm": 1.1792110204696655, + "learning_rate": 1.5619247455475927e-06, + "loss": 0.3288, + "step": 37420 + }, + { + "epoch": 0.7491129294597503, + "grad_norm": 2.1177139282226562, + "learning_rate": 1.561689371457692e-06, + "loss": 0.7707, + "step": 37421 + }, + { + "epoch": 0.7491329479768786, + "grad_norm": 1.1004979610443115, + "learning_rate": 1.561454011821707e-06, + "loss": 0.3114, + "step": 37422 + }, + { + "epoch": 0.749152966494007, + "grad_norm": 1.0601410865783691, + "learning_rate": 1.5612186666406294e-06, + "loss": 0.2895, + "step": 37423 + }, + { + "epoch": 0.7491729850111353, + "grad_norm": 1.0650827884674072, + "learning_rate": 1.5609833359154503e-06, + "loss": 0.2988, + "step": 37424 + }, + { + "epoch": 0.7491930035282637, + "grad_norm": 1.0201598405838013, + "learning_rate": 1.5607480196471568e-06, + "loss": 0.263, + "step": 37425 + }, + { + "epoch": 0.749213022045392, + "grad_norm": 1.082459807395935, + "learning_rate": 1.5605127178367375e-06, + "loss": 0.3276, + "step": 37426 + }, + { + "epoch": 0.7492330405625204, + "grad_norm": 1.1348446607589722, + "learning_rate": 1.5602774304851836e-06, + "loss": 0.251, + "step": 37427 + }, + { + "epoch": 0.7492530590796487, + "grad_norm": 1.1087738275527954, + "learning_rate": 1.5600421575934838e-06, + "loss": 0.3164, + "step": 37428 + }, + { + "epoch": 0.749273077596777, + "grad_norm": 1.077867031097412, + "learning_rate": 1.559806899162626e-06, + "loss": 0.2735, + "step": 37429 + }, + { + "epoch": 0.7492930961139054, + "grad_norm": 1.0601850748062134, + "learning_rate": 1.5595716551935996e-06, + "loss": 0.294, + "step": 37430 + }, + { + "epoch": 0.7493131146310337, + "grad_norm": 1.2115190029144287, + "learning_rate": 1.5593364256873922e-06, + "loss": 0.3338, + "step": 37431 + }, + { + "epoch": 0.7493331331481621, + "grad_norm": 1.0794912576675415, + "learning_rate": 1.5591012106449954e-06, + "loss": 0.2796, + "step": 37432 + }, + { + "epoch": 0.7493531516652904, + "grad_norm": 1.1026116609573364, + "learning_rate": 1.5588660100673969e-06, + "loss": 0.3374, + "step": 37433 + }, + { + "epoch": 0.7493731701824188, + "grad_norm": 1.1682488918304443, + "learning_rate": 1.5586308239555848e-06, + "loss": 0.3035, + "step": 37434 + }, + { + "epoch": 0.7493931886995471, + "grad_norm": 0.9952520728111267, + "learning_rate": 1.558395652310547e-06, + "loss": 0.2814, + "step": 37435 + }, + { + "epoch": 0.7494132072166754, + "grad_norm": 1.3267041444778442, + "learning_rate": 1.558160495133273e-06, + "loss": 0.3343, + "step": 37436 + }, + { + "epoch": 0.7494332257338038, + "grad_norm": 1.1110848188400269, + "learning_rate": 1.557925352424753e-06, + "loss": 0.2545, + "step": 37437 + }, + { + "epoch": 0.7494532442509321, + "grad_norm": 1.0369930267333984, + "learning_rate": 1.5576902241859742e-06, + "loss": 0.3286, + "step": 37438 + }, + { + "epoch": 0.7494732627680605, + "grad_norm": 1.0455818176269531, + "learning_rate": 1.5574551104179247e-06, + "loss": 0.2599, + "step": 37439 + }, + { + "epoch": 0.7494932812851888, + "grad_norm": 1.1719894409179688, + "learning_rate": 1.5572200111215917e-06, + "loss": 0.3124, + "step": 37440 + }, + { + "epoch": 0.7495132998023172, + "grad_norm": 1.1490695476531982, + "learning_rate": 1.556984926297967e-06, + "loss": 0.2688, + "step": 37441 + }, + { + "epoch": 0.7495333183194455, + "grad_norm": 1.1229861974716187, + "learning_rate": 1.5567498559480359e-06, + "loss": 0.2719, + "step": 37442 + }, + { + "epoch": 0.7495533368365739, + "grad_norm": 1.2164697647094727, + "learning_rate": 1.5565148000727876e-06, + "loss": 0.3136, + "step": 37443 + }, + { + "epoch": 0.7495733553537022, + "grad_norm": 1.3052172660827637, + "learning_rate": 1.5562797586732087e-06, + "loss": 0.3514, + "step": 37444 + }, + { + "epoch": 0.7495933738708305, + "grad_norm": 1.214213490486145, + "learning_rate": 1.5560447317502903e-06, + "loss": 0.2993, + "step": 37445 + }, + { + "epoch": 0.7496133923879589, + "grad_norm": 1.1660056114196777, + "learning_rate": 1.555809719305018e-06, + "loss": 0.3013, + "step": 37446 + }, + { + "epoch": 0.7496334109050872, + "grad_norm": 2.279595375061035, + "learning_rate": 1.5555747213383808e-06, + "loss": 0.8088, + "step": 37447 + }, + { + "epoch": 0.7496534294222156, + "grad_norm": 1.1636288166046143, + "learning_rate": 1.555339737851365e-06, + "loss": 0.3181, + "step": 37448 + }, + { + "epoch": 0.7496734479393439, + "grad_norm": 1.0381863117218018, + "learning_rate": 1.5551047688449595e-06, + "loss": 0.2744, + "step": 37449 + }, + { + "epoch": 0.7496934664564723, + "grad_norm": 1.129148006439209, + "learning_rate": 1.5548698143201536e-06, + "loss": 0.3173, + "step": 37450 + }, + { + "epoch": 0.7497134849736006, + "grad_norm": 1.0400787591934204, + "learning_rate": 1.5546348742779337e-06, + "loss": 0.2517, + "step": 37451 + }, + { + "epoch": 0.7497335034907289, + "grad_norm": 0.998814582824707, + "learning_rate": 1.5543999487192874e-06, + "loss": 0.2352, + "step": 37452 + }, + { + "epoch": 0.7497535220078573, + "grad_norm": 1.0577939748764038, + "learning_rate": 1.5541650376452005e-06, + "loss": 0.2814, + "step": 37453 + }, + { + "epoch": 0.7497735405249856, + "grad_norm": 1.1331678628921509, + "learning_rate": 1.5539301410566638e-06, + "loss": 0.3073, + "step": 37454 + }, + { + "epoch": 0.749793559042114, + "grad_norm": 1.0964688062667847, + "learning_rate": 1.5536952589546633e-06, + "loss": 0.2585, + "step": 37455 + }, + { + "epoch": 0.7498135775592423, + "grad_norm": 1.1139986515045166, + "learning_rate": 1.5534603913401858e-06, + "loss": 0.3009, + "step": 37456 + }, + { + "epoch": 0.7498335960763707, + "grad_norm": 1.8986254930496216, + "learning_rate": 1.5532255382142181e-06, + "loss": 0.6998, + "step": 37457 + }, + { + "epoch": 0.749853614593499, + "grad_norm": 1.8606457710266113, + "learning_rate": 1.55299069957775e-06, + "loss": 0.7148, + "step": 37458 + }, + { + "epoch": 0.7498736331106274, + "grad_norm": 1.2783515453338623, + "learning_rate": 1.5527558754317673e-06, + "loss": 0.3016, + "step": 37459 + }, + { + "epoch": 0.7498936516277557, + "grad_norm": 0.980032742023468, + "learning_rate": 1.5525210657772565e-06, + "loss": 0.2761, + "step": 37460 + }, + { + "epoch": 0.749913670144884, + "grad_norm": 1.0154502391815186, + "learning_rate": 1.5522862706152042e-06, + "loss": 0.3178, + "step": 37461 + }, + { + "epoch": 0.7499336886620124, + "grad_norm": 1.1603388786315918, + "learning_rate": 1.5520514899465983e-06, + "loss": 0.317, + "step": 37462 + }, + { + "epoch": 0.7499537071791407, + "grad_norm": 1.0949698686599731, + "learning_rate": 1.5518167237724275e-06, + "loss": 0.3186, + "step": 37463 + }, + { + "epoch": 0.7499737256962691, + "grad_norm": 1.093497395515442, + "learning_rate": 1.551581972093677e-06, + "loss": 0.284, + "step": 37464 + }, + { + "epoch": 0.7499937442133974, + "grad_norm": 1.043237328529358, + "learning_rate": 1.551347234911334e-06, + "loss": 0.2964, + "step": 37465 + }, + { + "epoch": 0.7500137627305258, + "grad_norm": 1.1285653114318848, + "learning_rate": 1.5511125122263832e-06, + "loss": 0.3375, + "step": 37466 + }, + { + "epoch": 0.7500337812476541, + "grad_norm": 1.1705231666564941, + "learning_rate": 1.5508778040398148e-06, + "loss": 0.2784, + "step": 37467 + }, + { + "epoch": 0.7500537997647824, + "grad_norm": 1.1900262832641602, + "learning_rate": 1.5506431103526142e-06, + "loss": 0.2819, + "step": 37468 + }, + { + "epoch": 0.7500738182819108, + "grad_norm": 1.1530896425247192, + "learning_rate": 1.5504084311657675e-06, + "loss": 0.3095, + "step": 37469 + }, + { + "epoch": 0.7500938367990391, + "grad_norm": 1.192857265472412, + "learning_rate": 1.5501737664802612e-06, + "loss": 0.3162, + "step": 37470 + }, + { + "epoch": 0.7501138553161675, + "grad_norm": 1.1727889776229858, + "learning_rate": 1.5499391162970807e-06, + "loss": 0.2788, + "step": 37471 + }, + { + "epoch": 0.7501338738332958, + "grad_norm": 1.8777872323989868, + "learning_rate": 1.549704480617215e-06, + "loss": 0.6948, + "step": 37472 + }, + { + "epoch": 0.7501538923504242, + "grad_norm": 1.15067720413208, + "learning_rate": 1.549469859441649e-06, + "loss": 0.3136, + "step": 37473 + }, + { + "epoch": 0.7501739108675525, + "grad_norm": 1.094446063041687, + "learning_rate": 1.5492352527713678e-06, + "loss": 0.2713, + "step": 37474 + }, + { + "epoch": 0.7501939293846809, + "grad_norm": 1.0227000713348389, + "learning_rate": 1.5490006606073592e-06, + "loss": 0.2883, + "step": 37475 + }, + { + "epoch": 0.7502139479018092, + "grad_norm": 1.072880744934082, + "learning_rate": 1.5487660829506108e-06, + "loss": 0.3332, + "step": 37476 + }, + { + "epoch": 0.7502339664189375, + "grad_norm": 1.2376192808151245, + "learning_rate": 1.5485315198021067e-06, + "loss": 0.3255, + "step": 37477 + }, + { + "epoch": 0.7502539849360659, + "grad_norm": 1.049559473991394, + "learning_rate": 1.5482969711628332e-06, + "loss": 0.3119, + "step": 37478 + }, + { + "epoch": 0.7502740034531942, + "grad_norm": 1.2905848026275635, + "learning_rate": 1.5480624370337749e-06, + "loss": 0.3051, + "step": 37479 + }, + { + "epoch": 0.7502940219703226, + "grad_norm": 1.0886468887329102, + "learning_rate": 1.547827917415921e-06, + "loss": 0.32, + "step": 37480 + }, + { + "epoch": 0.7503140404874509, + "grad_norm": 1.1390113830566406, + "learning_rate": 1.547593412310256e-06, + "loss": 0.2948, + "step": 37481 + }, + { + "epoch": 0.7503340590045793, + "grad_norm": 1.8749428987503052, + "learning_rate": 1.5473589217177649e-06, + "loss": 0.8119, + "step": 37482 + }, + { + "epoch": 0.7503540775217076, + "grad_norm": 1.267970085144043, + "learning_rate": 1.547124445639434e-06, + "loss": 0.3014, + "step": 37483 + }, + { + "epoch": 0.7503740960388359, + "grad_norm": 1.196399211883545, + "learning_rate": 1.546889984076247e-06, + "loss": 0.31, + "step": 37484 + }, + { + "epoch": 0.7503941145559643, + "grad_norm": 1.1876791715621948, + "learning_rate": 1.5466555370291935e-06, + "loss": 0.3237, + "step": 37485 + }, + { + "epoch": 0.7504141330730926, + "grad_norm": 1.0587928295135498, + "learning_rate": 1.5464211044992567e-06, + "loss": 0.3063, + "step": 37486 + }, + { + "epoch": 0.750434151590221, + "grad_norm": 1.0869961977005005, + "learning_rate": 1.5461866864874203e-06, + "loss": 0.3082, + "step": 37487 + }, + { + "epoch": 0.7504541701073493, + "grad_norm": 1.1105515956878662, + "learning_rate": 1.545952282994672e-06, + "loss": 0.3081, + "step": 37488 + }, + { + "epoch": 0.7504741886244777, + "grad_norm": 1.1305636167526245, + "learning_rate": 1.5457178940219991e-06, + "loss": 0.2993, + "step": 37489 + }, + { + "epoch": 0.750494207141606, + "grad_norm": 1.227245807647705, + "learning_rate": 1.5454835195703843e-06, + "loss": 0.2838, + "step": 37490 + }, + { + "epoch": 0.7505142256587344, + "grad_norm": 1.1313589811325073, + "learning_rate": 1.5452491596408136e-06, + "loss": 0.3268, + "step": 37491 + }, + { + "epoch": 0.7505342441758627, + "grad_norm": 1.195247769355774, + "learning_rate": 1.5450148142342726e-06, + "loss": 0.2563, + "step": 37492 + }, + { + "epoch": 0.750554262692991, + "grad_norm": 1.127869725227356, + "learning_rate": 1.5447804833517432e-06, + "loss": 0.3355, + "step": 37493 + }, + { + "epoch": 0.7505742812101194, + "grad_norm": 1.1773184537887573, + "learning_rate": 1.5445461669942146e-06, + "loss": 0.273, + "step": 37494 + }, + { + "epoch": 0.7505942997272477, + "grad_norm": 0.9897717833518982, + "learning_rate": 1.544311865162671e-06, + "loss": 0.2523, + "step": 37495 + }, + { + "epoch": 0.7506143182443761, + "grad_norm": 1.1374553442001343, + "learning_rate": 1.5440775778580963e-06, + "loss": 0.281, + "step": 37496 + }, + { + "epoch": 0.7506343367615044, + "grad_norm": 1.2282931804656982, + "learning_rate": 1.5438433050814744e-06, + "loss": 0.274, + "step": 37497 + }, + { + "epoch": 0.7506543552786328, + "grad_norm": 1.0807424783706665, + "learning_rate": 1.5436090468337927e-06, + "loss": 0.2602, + "step": 37498 + }, + { + "epoch": 0.7506743737957611, + "grad_norm": 1.1225053071975708, + "learning_rate": 1.5433748031160351e-06, + "loss": 0.2432, + "step": 37499 + }, + { + "epoch": 0.7506943923128894, + "grad_norm": 1.184881567955017, + "learning_rate": 1.543140573929184e-06, + "loss": 0.2962, + "step": 37500 + }, + { + "epoch": 0.7507144108300178, + "grad_norm": 1.2971479892730713, + "learning_rate": 1.5429063592742277e-06, + "loss": 0.3262, + "step": 37501 + }, + { + "epoch": 0.7507344293471461, + "grad_norm": 1.1429321765899658, + "learning_rate": 1.5426721591521476e-06, + "loss": 0.2967, + "step": 37502 + }, + { + "epoch": 0.7507544478642745, + "grad_norm": 1.0802948474884033, + "learning_rate": 1.542437973563931e-06, + "loss": 0.2629, + "step": 37503 + }, + { + "epoch": 0.7507744663814028, + "grad_norm": 1.043624758720398, + "learning_rate": 1.5422038025105619e-06, + "loss": 0.3271, + "step": 37504 + }, + { + "epoch": 0.7507944848985312, + "grad_norm": 1.1611429452896118, + "learning_rate": 1.5419696459930228e-06, + "loss": 0.2736, + "step": 37505 + }, + { + "epoch": 0.7508145034156595, + "grad_norm": 1.1934516429901123, + "learning_rate": 1.5417355040122983e-06, + "loss": 0.2818, + "step": 37506 + }, + { + "epoch": 0.7508345219327879, + "grad_norm": 1.1228439807891846, + "learning_rate": 1.5415013765693748e-06, + "loss": 0.2937, + "step": 37507 + }, + { + "epoch": 0.7508545404499162, + "grad_norm": 1.5244338512420654, + "learning_rate": 1.5412672636652354e-06, + "loss": 0.2876, + "step": 37508 + }, + { + "epoch": 0.7508745589670445, + "grad_norm": 1.1265398263931274, + "learning_rate": 1.5410331653008636e-06, + "loss": 0.2994, + "step": 37509 + }, + { + "epoch": 0.7508945774841729, + "grad_norm": 1.2776421308517456, + "learning_rate": 1.5407990814772432e-06, + "loss": 0.2726, + "step": 37510 + }, + { + "epoch": 0.7509145960013012, + "grad_norm": 1.1488070487976074, + "learning_rate": 1.5405650121953602e-06, + "loss": 0.2935, + "step": 37511 + }, + { + "epoch": 0.7509346145184296, + "grad_norm": 1.0910305976867676, + "learning_rate": 1.5403309574561976e-06, + "loss": 0.2715, + "step": 37512 + }, + { + "epoch": 0.7509546330355579, + "grad_norm": 1.067821979522705, + "learning_rate": 1.540096917260737e-06, + "loss": 0.2945, + "step": 37513 + }, + { + "epoch": 0.7509746515526863, + "grad_norm": 1.3626272678375244, + "learning_rate": 1.5398628916099667e-06, + "loss": 0.2664, + "step": 37514 + }, + { + "epoch": 0.7509946700698146, + "grad_norm": 1.0949187278747559, + "learning_rate": 1.539628880504866e-06, + "loss": 0.3271, + "step": 37515 + }, + { + "epoch": 0.7510146885869429, + "grad_norm": 1.0903204679489136, + "learning_rate": 1.539394883946423e-06, + "loss": 0.34, + "step": 37516 + }, + { + "epoch": 0.7510347071040713, + "grad_norm": 1.0455703735351562, + "learning_rate": 1.5391609019356186e-06, + "loss": 0.2828, + "step": 37517 + }, + { + "epoch": 0.7510547256211996, + "grad_norm": 1.1398611068725586, + "learning_rate": 1.538926934473437e-06, + "loss": 0.3324, + "step": 37518 + }, + { + "epoch": 0.751074744138328, + "grad_norm": 1.1277103424072266, + "learning_rate": 1.5386929815608604e-06, + "loss": 0.2692, + "step": 37519 + }, + { + "epoch": 0.7510947626554563, + "grad_norm": 0.9742605686187744, + "learning_rate": 1.5384590431988754e-06, + "loss": 0.2865, + "step": 37520 + }, + { + "epoch": 0.7511147811725847, + "grad_norm": 1.1134705543518066, + "learning_rate": 1.5382251193884634e-06, + "loss": 0.2674, + "step": 37521 + }, + { + "epoch": 0.751134799689713, + "grad_norm": 1.8333989381790161, + "learning_rate": 1.5379912101306076e-06, + "loss": 0.7192, + "step": 37522 + }, + { + "epoch": 0.7511548182068414, + "grad_norm": 1.1425487995147705, + "learning_rate": 1.5377573154262926e-06, + "loss": 0.233, + "step": 37523 + }, + { + "epoch": 0.7511748367239697, + "grad_norm": 1.066615104675293, + "learning_rate": 1.5375234352764983e-06, + "loss": 0.3199, + "step": 37524 + }, + { + "epoch": 0.751194855241098, + "grad_norm": 1.9340460300445557, + "learning_rate": 1.5372895696822127e-06, + "loss": 0.8315, + "step": 37525 + }, + { + "epoch": 0.7512148737582264, + "grad_norm": 1.1540309190750122, + "learning_rate": 1.5370557186444146e-06, + "loss": 0.3057, + "step": 37526 + }, + { + "epoch": 0.7512348922753547, + "grad_norm": 1.0557546615600586, + "learning_rate": 1.5368218821640906e-06, + "loss": 0.2851, + "step": 37527 + }, + { + "epoch": 0.7512549107924831, + "grad_norm": 1.1380283832550049, + "learning_rate": 1.5365880602422206e-06, + "loss": 0.3423, + "step": 37528 + }, + { + "epoch": 0.7512749293096114, + "grad_norm": 1.3133771419525146, + "learning_rate": 1.5363542528797903e-06, + "loss": 0.2745, + "step": 37529 + }, + { + "epoch": 0.7512949478267398, + "grad_norm": 1.1653809547424316, + "learning_rate": 1.5361204600777819e-06, + "loss": 0.3345, + "step": 37530 + }, + { + "epoch": 0.7513149663438681, + "grad_norm": 1.1501014232635498, + "learning_rate": 1.5358866818371776e-06, + "loss": 0.2916, + "step": 37531 + }, + { + "epoch": 0.7513349848609964, + "grad_norm": 1.0780218839645386, + "learning_rate": 1.5356529181589598e-06, + "loss": 0.3275, + "step": 37532 + }, + { + "epoch": 0.7513550033781248, + "grad_norm": 1.07502019405365, + "learning_rate": 1.53541916904411e-06, + "loss": 0.3227, + "step": 37533 + }, + { + "epoch": 0.7513750218952531, + "grad_norm": 1.1649928092956543, + "learning_rate": 1.5351854344936146e-06, + "loss": 0.2844, + "step": 37534 + }, + { + "epoch": 0.7513950404123815, + "grad_norm": 1.1352207660675049, + "learning_rate": 1.5349517145084536e-06, + "loss": 0.2767, + "step": 37535 + }, + { + "epoch": 0.7514150589295098, + "grad_norm": 1.3138957023620605, + "learning_rate": 1.5347180090896096e-06, + "loss": 0.3018, + "step": 37536 + }, + { + "epoch": 0.7514350774466382, + "grad_norm": 1.0867239236831665, + "learning_rate": 1.5344843182380643e-06, + "loss": 0.3045, + "step": 37537 + }, + { + "epoch": 0.7514550959637665, + "grad_norm": 1.016324520111084, + "learning_rate": 1.5342506419548025e-06, + "loss": 0.2382, + "step": 37538 + }, + { + "epoch": 0.7514751144808949, + "grad_norm": 1.2059907913208008, + "learning_rate": 1.5340169802408039e-06, + "loss": 0.2888, + "step": 37539 + }, + { + "epoch": 0.7514951329980232, + "grad_norm": 1.0229324102401733, + "learning_rate": 1.533783333097053e-06, + "loss": 0.2743, + "step": 37540 + }, + { + "epoch": 0.7515151515151515, + "grad_norm": 1.2017134428024292, + "learning_rate": 1.5335497005245297e-06, + "loss": 0.2954, + "step": 37541 + }, + { + "epoch": 0.7515351700322799, + "grad_norm": 1.2216668128967285, + "learning_rate": 1.5333160825242195e-06, + "loss": 0.3353, + "step": 37542 + }, + { + "epoch": 0.7515551885494082, + "grad_norm": 1.216150164604187, + "learning_rate": 1.5330824790971022e-06, + "loss": 0.2662, + "step": 37543 + }, + { + "epoch": 0.7515752070665366, + "grad_norm": 1.0986679792404175, + "learning_rate": 1.5328488902441596e-06, + "loss": 0.3324, + "step": 37544 + }, + { + "epoch": 0.7515952255836649, + "grad_norm": 0.9631084203720093, + "learning_rate": 1.5326153159663747e-06, + "loss": 0.2251, + "step": 37545 + }, + { + "epoch": 0.7516152441007933, + "grad_norm": 1.0914274454116821, + "learning_rate": 1.532381756264727e-06, + "loss": 0.3388, + "step": 37546 + }, + { + "epoch": 0.7516352626179216, + "grad_norm": 1.1523573398590088, + "learning_rate": 1.5321482111402013e-06, + "loss": 0.309, + "step": 37547 + }, + { + "epoch": 0.7516552811350499, + "grad_norm": 1.4999884366989136, + "learning_rate": 1.531914680593779e-06, + "loss": 0.2824, + "step": 37548 + }, + { + "epoch": 0.7516752996521783, + "grad_norm": 1.0335617065429688, + "learning_rate": 1.5316811646264407e-06, + "loss": 0.2772, + "step": 37549 + }, + { + "epoch": 0.7516953181693066, + "grad_norm": 1.0390698909759521, + "learning_rate": 1.5314476632391668e-06, + "loss": 0.2913, + "step": 37550 + }, + { + "epoch": 0.751715336686435, + "grad_norm": 1.0815593004226685, + "learning_rate": 1.5312141764329419e-06, + "loss": 0.2737, + "step": 37551 + }, + { + "epoch": 0.7517353552035633, + "grad_norm": 1.1018702983856201, + "learning_rate": 1.5309807042087448e-06, + "loss": 0.2469, + "step": 37552 + }, + { + "epoch": 0.7517553737206917, + "grad_norm": 1.0080797672271729, + "learning_rate": 1.5307472465675598e-06, + "loss": 0.2573, + "step": 37553 + }, + { + "epoch": 0.75177539223782, + "grad_norm": 1.1487987041473389, + "learning_rate": 1.5305138035103668e-06, + "loss": 0.3226, + "step": 37554 + }, + { + "epoch": 0.7517954107549484, + "grad_norm": 1.0636932849884033, + "learning_rate": 1.5302803750381455e-06, + "loss": 0.2622, + "step": 37555 + }, + { + "epoch": 0.7518154292720767, + "grad_norm": 1.210031509399414, + "learning_rate": 1.5300469611518804e-06, + "loss": 0.3265, + "step": 37556 + }, + { + "epoch": 0.751835447789205, + "grad_norm": 1.1003355979919434, + "learning_rate": 1.529813561852551e-06, + "loss": 0.3087, + "step": 37557 + }, + { + "epoch": 0.7518554663063334, + "grad_norm": 1.9132927656173706, + "learning_rate": 1.529580177141139e-06, + "loss": 0.7394, + "step": 37558 + }, + { + "epoch": 0.7518754848234617, + "grad_norm": 1.2060843706130981, + "learning_rate": 1.5293468070186235e-06, + "loss": 0.2982, + "step": 37559 + }, + { + "epoch": 0.7518955033405901, + "grad_norm": 1.0609192848205566, + "learning_rate": 1.5291134514859884e-06, + "loss": 0.2794, + "step": 37560 + }, + { + "epoch": 0.7519155218577184, + "grad_norm": 1.225244164466858, + "learning_rate": 1.5288801105442135e-06, + "loss": 0.291, + "step": 37561 + }, + { + "epoch": 0.7519355403748468, + "grad_norm": 1.295143961906433, + "learning_rate": 1.5286467841942792e-06, + "loss": 0.2764, + "step": 37562 + }, + { + "epoch": 0.7519555588919751, + "grad_norm": 1.0447330474853516, + "learning_rate": 1.5284134724371658e-06, + "loss": 0.3092, + "step": 37563 + }, + { + "epoch": 0.7519755774091034, + "grad_norm": 1.162697672843933, + "learning_rate": 1.5281801752738561e-06, + "loss": 0.326, + "step": 37564 + }, + { + "epoch": 0.7519955959262318, + "grad_norm": 0.9858720302581787, + "learning_rate": 1.5279468927053286e-06, + "loss": 0.2581, + "step": 37565 + }, + { + "epoch": 0.7520156144433601, + "grad_norm": 1.1211721897125244, + "learning_rate": 1.527713624732567e-06, + "loss": 0.272, + "step": 37566 + }, + { + "epoch": 0.7520356329604885, + "grad_norm": 1.0453015565872192, + "learning_rate": 1.52748037135655e-06, + "loss": 0.2542, + "step": 37567 + }, + { + "epoch": 0.7520556514776168, + "grad_norm": 1.0529699325561523, + "learning_rate": 1.527247132578256e-06, + "loss": 0.3175, + "step": 37568 + }, + { + "epoch": 0.7520756699947452, + "grad_norm": 1.107155203819275, + "learning_rate": 1.5270139083986702e-06, + "loss": 0.2552, + "step": 37569 + }, + { + "epoch": 0.7520956885118735, + "grad_norm": 1.1533697843551636, + "learning_rate": 1.52678069881877e-06, + "loss": 0.3457, + "step": 37570 + }, + { + "epoch": 0.7521157070290019, + "grad_norm": 1.0569878816604614, + "learning_rate": 1.526547503839536e-06, + "loss": 0.32, + "step": 37571 + }, + { + "epoch": 0.7521357255461302, + "grad_norm": 1.0381450653076172, + "learning_rate": 1.5263143234619477e-06, + "loss": 0.2889, + "step": 37572 + }, + { + "epoch": 0.7521557440632585, + "grad_norm": 1.1255863904953003, + "learning_rate": 1.5260811576869878e-06, + "loss": 0.3168, + "step": 37573 + }, + { + "epoch": 0.7521757625803869, + "grad_norm": 1.1349083185195923, + "learning_rate": 1.525848006515635e-06, + "loss": 0.3682, + "step": 37574 + }, + { + "epoch": 0.7521957810975152, + "grad_norm": 1.1341410875320435, + "learning_rate": 1.52561486994887e-06, + "loss": 0.236, + "step": 37575 + }, + { + "epoch": 0.7522157996146436, + "grad_norm": 1.1632026433944702, + "learning_rate": 1.5253817479876703e-06, + "loss": 0.3401, + "step": 37576 + }, + { + "epoch": 0.7522358181317719, + "grad_norm": 1.1134004592895508, + "learning_rate": 1.5251486406330202e-06, + "loss": 0.2783, + "step": 37577 + }, + { + "epoch": 0.7522558366489003, + "grad_norm": 1.1400299072265625, + "learning_rate": 1.5249155478858952e-06, + "loss": 0.2795, + "step": 37578 + }, + { + "epoch": 0.7522758551660286, + "grad_norm": 1.068362832069397, + "learning_rate": 1.5246824697472796e-06, + "loss": 0.2739, + "step": 37579 + }, + { + "epoch": 0.7522958736831569, + "grad_norm": 1.1724976301193237, + "learning_rate": 1.5244494062181502e-06, + "loss": 0.314, + "step": 37580 + }, + { + "epoch": 0.7523158922002853, + "grad_norm": 1.0384780168533325, + "learning_rate": 1.5242163572994866e-06, + "loss": 0.3383, + "step": 37581 + }, + { + "epoch": 0.7523359107174136, + "grad_norm": 1.138139009475708, + "learning_rate": 1.5239833229922707e-06, + "loss": 0.3155, + "step": 37582 + }, + { + "epoch": 0.752355929234542, + "grad_norm": 1.0339807271957397, + "learning_rate": 1.5237503032974809e-06, + "loss": 0.2442, + "step": 37583 + }, + { + "epoch": 0.7523759477516703, + "grad_norm": 1.009053349494934, + "learning_rate": 1.5235172982160967e-06, + "loss": 0.2696, + "step": 37584 + }, + { + "epoch": 0.7523959662687987, + "grad_norm": 1.0891467332839966, + "learning_rate": 1.5232843077490978e-06, + "loss": 0.2784, + "step": 37585 + }, + { + "epoch": 0.752415984785927, + "grad_norm": 1.023386001586914, + "learning_rate": 1.5230513318974621e-06, + "loss": 0.3085, + "step": 37586 + }, + { + "epoch": 0.7524360033030554, + "grad_norm": 1.116085171699524, + "learning_rate": 1.5228183706621714e-06, + "loss": 0.3368, + "step": 37587 + }, + { + "epoch": 0.7524560218201837, + "grad_norm": 1.9865537881851196, + "learning_rate": 1.5225854240442044e-06, + "loss": 0.7764, + "step": 37588 + }, + { + "epoch": 0.752476040337312, + "grad_norm": 1.230971097946167, + "learning_rate": 1.5223524920445381e-06, + "loss": 0.3167, + "step": 37589 + }, + { + "epoch": 0.7524960588544404, + "grad_norm": 1.043743371963501, + "learning_rate": 1.5221195746641554e-06, + "loss": 0.2781, + "step": 37590 + }, + { + "epoch": 0.7525160773715687, + "grad_norm": 1.054339051246643, + "learning_rate": 1.5218866719040315e-06, + "loss": 0.3037, + "step": 37591 + }, + { + "epoch": 0.7525360958886971, + "grad_norm": 1.0304746627807617, + "learning_rate": 1.5216537837651496e-06, + "loss": 0.2812, + "step": 37592 + }, + { + "epoch": 0.7525561144058254, + "grad_norm": 1.1166071891784668, + "learning_rate": 1.5214209102484862e-06, + "loss": 0.2984, + "step": 37593 + }, + { + "epoch": 0.7525761329229538, + "grad_norm": 1.2481138706207275, + "learning_rate": 1.5211880513550191e-06, + "loss": 0.2862, + "step": 37594 + }, + { + "epoch": 0.7525961514400821, + "grad_norm": 1.437565803527832, + "learning_rate": 1.520955207085731e-06, + "loss": 0.3329, + "step": 37595 + }, + { + "epoch": 0.7526161699572104, + "grad_norm": 1.0650074481964111, + "learning_rate": 1.520722377441598e-06, + "loss": 0.2813, + "step": 37596 + }, + { + "epoch": 0.7526361884743388, + "grad_norm": 1.1760387420654297, + "learning_rate": 1.5204895624235994e-06, + "loss": 0.2961, + "step": 37597 + }, + { + "epoch": 0.7526562069914671, + "grad_norm": 1.1054198741912842, + "learning_rate": 1.520256762032714e-06, + "loss": 0.3091, + "step": 37598 + }, + { + "epoch": 0.7526762255085955, + "grad_norm": 1.101995587348938, + "learning_rate": 1.5200239762699187e-06, + "loss": 0.2839, + "step": 37599 + }, + { + "epoch": 0.7526962440257238, + "grad_norm": 1.896316409111023, + "learning_rate": 1.519791205136195e-06, + "loss": 0.3193, + "step": 37600 + }, + { + "epoch": 0.7527162625428522, + "grad_norm": 1.1562836170196533, + "learning_rate": 1.519558448632521e-06, + "loss": 0.3057, + "step": 37601 + }, + { + "epoch": 0.7527362810599805, + "grad_norm": 1.952094554901123, + "learning_rate": 1.5193257067598716e-06, + "loss": 0.7232, + "step": 37602 + }, + { + "epoch": 0.7527562995771089, + "grad_norm": 1.1191850900650024, + "learning_rate": 1.51909297951923e-06, + "loss": 0.2808, + "step": 37603 + }, + { + "epoch": 0.7527763180942372, + "grad_norm": 1.1124647855758667, + "learning_rate": 1.518860266911571e-06, + "loss": 0.2712, + "step": 37604 + }, + { + "epoch": 0.7527963366113655, + "grad_norm": 1.8890348672866821, + "learning_rate": 1.5186275689378755e-06, + "loss": 0.7638, + "step": 37605 + }, + { + "epoch": 0.7528163551284939, + "grad_norm": 1.436246633529663, + "learning_rate": 1.518394885599121e-06, + "loss": 0.3085, + "step": 37606 + }, + { + "epoch": 0.7528363736456222, + "grad_norm": 1.971653699874878, + "learning_rate": 1.5181622168962846e-06, + "loss": 0.7637, + "step": 37607 + }, + { + "epoch": 0.7528563921627506, + "grad_norm": 1.0132032632827759, + "learning_rate": 1.5179295628303436e-06, + "loss": 0.2864, + "step": 37608 + }, + { + "epoch": 0.7528764106798789, + "grad_norm": 0.9674171209335327, + "learning_rate": 1.5176969234022793e-06, + "loss": 0.2737, + "step": 37609 + }, + { + "epoch": 0.7528964291970073, + "grad_norm": 1.976671576499939, + "learning_rate": 1.5174642986130672e-06, + "loss": 0.7055, + "step": 37610 + }, + { + "epoch": 0.7529164477141356, + "grad_norm": 1.0352576971054077, + "learning_rate": 1.5172316884636856e-06, + "loss": 0.268, + "step": 37611 + }, + { + "epoch": 0.7529364662312639, + "grad_norm": 1.10075843334198, + "learning_rate": 1.5169990929551115e-06, + "loss": 0.2866, + "step": 37612 + }, + { + "epoch": 0.7529564847483923, + "grad_norm": 1.0215040445327759, + "learning_rate": 1.516766512088325e-06, + "loss": 0.2986, + "step": 37613 + }, + { + "epoch": 0.7529765032655206, + "grad_norm": 1.150069236755371, + "learning_rate": 1.5165339458643025e-06, + "loss": 0.3526, + "step": 37614 + }, + { + "epoch": 0.752996521782649, + "grad_norm": 1.17808997631073, + "learning_rate": 1.5163013942840199e-06, + "loss": 0.3714, + "step": 37615 + }, + { + "epoch": 0.7530165402997773, + "grad_norm": 1.225709080696106, + "learning_rate": 1.5160688573484583e-06, + "loss": 0.2868, + "step": 37616 + }, + { + "epoch": 0.7530365588169057, + "grad_norm": 1.1881649494171143, + "learning_rate": 1.515836335058592e-06, + "loss": 0.3367, + "step": 37617 + }, + { + "epoch": 0.753056577334034, + "grad_norm": 1.266574501991272, + "learning_rate": 1.5156038274154012e-06, + "loss": 0.3064, + "step": 37618 + }, + { + "epoch": 0.7530765958511624, + "grad_norm": 1.2192939519882202, + "learning_rate": 1.5153713344198623e-06, + "loss": 0.258, + "step": 37619 + }, + { + "epoch": 0.7530966143682907, + "grad_norm": 1.0596070289611816, + "learning_rate": 1.5151388560729518e-06, + "loss": 0.3119, + "step": 37620 + }, + { + "epoch": 0.753116632885419, + "grad_norm": 1.10133957862854, + "learning_rate": 1.5149063923756468e-06, + "loss": 0.2751, + "step": 37621 + }, + { + "epoch": 0.7531366514025474, + "grad_norm": 1.2936631441116333, + "learning_rate": 1.5146739433289265e-06, + "loss": 0.3374, + "step": 37622 + }, + { + "epoch": 0.7531566699196757, + "grad_norm": 1.08720064163208, + "learning_rate": 1.514441508933767e-06, + "loss": 0.3318, + "step": 37623 + }, + { + "epoch": 0.7531766884368041, + "grad_norm": 1.0790550708770752, + "learning_rate": 1.5142090891911454e-06, + "loss": 0.296, + "step": 37624 + }, + { + "epoch": 0.7531967069539324, + "grad_norm": 1.0441489219665527, + "learning_rate": 1.513976684102037e-06, + "loss": 0.3239, + "step": 37625 + }, + { + "epoch": 0.7532167254710608, + "grad_norm": 1.138014316558838, + "learning_rate": 1.5137442936674218e-06, + "loss": 0.3422, + "step": 37626 + }, + { + "epoch": 0.7532367439881891, + "grad_norm": 1.0495514869689941, + "learning_rate": 1.5135119178882757e-06, + "loss": 0.2906, + "step": 37627 + }, + { + "epoch": 0.7532567625053174, + "grad_norm": 1.1637053489685059, + "learning_rate": 1.5132795567655733e-06, + "loss": 0.317, + "step": 37628 + }, + { + "epoch": 0.7532767810224458, + "grad_norm": 1.1211284399032593, + "learning_rate": 1.5130472103002953e-06, + "loss": 0.2898, + "step": 37629 + }, + { + "epoch": 0.7532967995395741, + "grad_norm": 1.1946619749069214, + "learning_rate": 1.5128148784934144e-06, + "loss": 0.3058, + "step": 37630 + }, + { + "epoch": 0.7533168180567025, + "grad_norm": 1.153609275817871, + "learning_rate": 1.512582561345911e-06, + "loss": 0.3077, + "step": 37631 + }, + { + "epoch": 0.7533368365738308, + "grad_norm": 1.0755618810653687, + "learning_rate": 1.5123502588587602e-06, + "loss": 0.293, + "step": 37632 + }, + { + "epoch": 0.7533568550909592, + "grad_norm": 1.231225848197937, + "learning_rate": 1.5121179710329381e-06, + "loss": 0.3992, + "step": 37633 + }, + { + "epoch": 0.7533768736080875, + "grad_norm": 1.0557702779769897, + "learning_rate": 1.5118856978694202e-06, + "loss": 0.3289, + "step": 37634 + }, + { + "epoch": 0.7533968921252159, + "grad_norm": 1.319692611694336, + "learning_rate": 1.5116534393691857e-06, + "loss": 0.3204, + "step": 37635 + }, + { + "epoch": 0.7534169106423442, + "grad_norm": 1.1578643321990967, + "learning_rate": 1.511421195533209e-06, + "loss": 0.317, + "step": 37636 + }, + { + "epoch": 0.7534369291594725, + "grad_norm": 1.0751776695251465, + "learning_rate": 1.5111889663624668e-06, + "loss": 0.2742, + "step": 37637 + }, + { + "epoch": 0.7534569476766009, + "grad_norm": 1.0320063829421997, + "learning_rate": 1.510956751857936e-06, + "loss": 0.2773, + "step": 37638 + }, + { + "epoch": 0.7534769661937292, + "grad_norm": 2.090466260910034, + "learning_rate": 1.5107245520205904e-06, + "loss": 0.7363, + "step": 37639 + }, + { + "epoch": 0.7534969847108576, + "grad_norm": 1.1821144819259644, + "learning_rate": 1.5104923668514094e-06, + "loss": 0.3392, + "step": 37640 + }, + { + "epoch": 0.7535170032279859, + "grad_norm": 1.053008794784546, + "learning_rate": 1.5102601963513658e-06, + "loss": 0.2956, + "step": 37641 + }, + { + "epoch": 0.7535370217451143, + "grad_norm": 1.0628631114959717, + "learning_rate": 1.510028040521439e-06, + "loss": 0.2905, + "step": 37642 + }, + { + "epoch": 0.7535570402622426, + "grad_norm": 1.1140260696411133, + "learning_rate": 1.5097958993626017e-06, + "loss": 0.3507, + "step": 37643 + }, + { + "epoch": 0.7535770587793709, + "grad_norm": 1.1346096992492676, + "learning_rate": 1.5095637728758327e-06, + "loss": 0.335, + "step": 37644 + }, + { + "epoch": 0.7535970772964993, + "grad_norm": 1.1740063428878784, + "learning_rate": 1.5093316610621066e-06, + "loss": 0.3061, + "step": 37645 + }, + { + "epoch": 0.7536170958136276, + "grad_norm": 1.1587778329849243, + "learning_rate": 1.5090995639223993e-06, + "loss": 0.2786, + "step": 37646 + }, + { + "epoch": 0.753637114330756, + "grad_norm": 1.0584760904312134, + "learning_rate": 1.508867481457686e-06, + "loss": 0.2897, + "step": 37647 + }, + { + "epoch": 0.7536571328478843, + "grad_norm": 1.0181983709335327, + "learning_rate": 1.5086354136689407e-06, + "loss": 0.3025, + "step": 37648 + }, + { + "epoch": 0.7536771513650127, + "grad_norm": 1.0965497493743896, + "learning_rate": 1.508403360557143e-06, + "loss": 0.2995, + "step": 37649 + }, + { + "epoch": 0.753697169882141, + "grad_norm": 1.1413663625717163, + "learning_rate": 1.5081713221232653e-06, + "loss": 0.3139, + "step": 37650 + }, + { + "epoch": 0.7537171883992694, + "grad_norm": 1.0928972959518433, + "learning_rate": 1.5079392983682845e-06, + "loss": 0.3194, + "step": 37651 + }, + { + "epoch": 0.7537372069163977, + "grad_norm": 1.9897860288619995, + "learning_rate": 1.5077072892931738e-06, + "loss": 0.7813, + "step": 37652 + }, + { + "epoch": 0.753757225433526, + "grad_norm": 2.060865879058838, + "learning_rate": 1.5074752948989113e-06, + "loss": 0.7641, + "step": 37653 + }, + { + "epoch": 0.7537772439506544, + "grad_norm": 1.0362898111343384, + "learning_rate": 1.5072433151864697e-06, + "loss": 0.2705, + "step": 37654 + }, + { + "epoch": 0.7537972624677827, + "grad_norm": 1.2146786451339722, + "learning_rate": 1.5070113501568273e-06, + "loss": 0.2893, + "step": 37655 + }, + { + "epoch": 0.7538172809849111, + "grad_norm": 1.0565487146377563, + "learning_rate": 1.5067793998109553e-06, + "loss": 0.2844, + "step": 37656 + }, + { + "epoch": 0.7538372995020394, + "grad_norm": 2.1945595741271973, + "learning_rate": 1.5065474641498335e-06, + "loss": 0.8097, + "step": 37657 + }, + { + "epoch": 0.7538573180191678, + "grad_norm": 1.1834042072296143, + "learning_rate": 1.5063155431744336e-06, + "loss": 0.255, + "step": 37658 + }, + { + "epoch": 0.7538773365362961, + "grad_norm": 1.058370590209961, + "learning_rate": 1.5060836368857318e-06, + "loss": 0.3134, + "step": 37659 + }, + { + "epoch": 0.7538973550534244, + "grad_norm": 1.112791657447815, + "learning_rate": 1.5058517452847022e-06, + "loss": 0.3047, + "step": 37660 + }, + { + "epoch": 0.7539173735705528, + "grad_norm": 1.2424031496047974, + "learning_rate": 1.5056198683723183e-06, + "loss": 0.3158, + "step": 37661 + }, + { + "epoch": 0.7539373920876811, + "grad_norm": 1.1586321592330933, + "learning_rate": 1.505388006149558e-06, + "loss": 0.2938, + "step": 37662 + }, + { + "epoch": 0.7539574106048095, + "grad_norm": 1.245873212814331, + "learning_rate": 1.5051561586173947e-06, + "loss": 0.3133, + "step": 37663 + }, + { + "epoch": 0.7539774291219378, + "grad_norm": 1.0883548259735107, + "learning_rate": 1.5049243257768025e-06, + "loss": 0.2607, + "step": 37664 + }, + { + "epoch": 0.7539974476390662, + "grad_norm": 1.116109013557434, + "learning_rate": 1.5046925076287549e-06, + "loss": 0.2789, + "step": 37665 + }, + { + "epoch": 0.7540174661561945, + "grad_norm": 1.2306522130966187, + "learning_rate": 1.5044607041742293e-06, + "loss": 0.2517, + "step": 37666 + }, + { + "epoch": 0.7540374846733229, + "grad_norm": 0.9707712531089783, + "learning_rate": 1.504228915414197e-06, + "loss": 0.2592, + "step": 37667 + }, + { + "epoch": 0.7540575031904512, + "grad_norm": 1.0626308917999268, + "learning_rate": 1.5039971413496362e-06, + "loss": 0.2839, + "step": 37668 + }, + { + "epoch": 0.7540775217075795, + "grad_norm": 1.098499059677124, + "learning_rate": 1.5037653819815185e-06, + "loss": 0.2616, + "step": 37669 + }, + { + "epoch": 0.7540975402247079, + "grad_norm": 1.0696600675582886, + "learning_rate": 1.5035336373108173e-06, + "loss": 0.2809, + "step": 37670 + }, + { + "epoch": 0.7541175587418362, + "grad_norm": 1.2965178489685059, + "learning_rate": 1.5033019073385096e-06, + "loss": 0.3183, + "step": 37671 + }, + { + "epoch": 0.7541375772589646, + "grad_norm": 0.972589373588562, + "learning_rate": 1.5030701920655687e-06, + "loss": 0.2551, + "step": 37672 + }, + { + "epoch": 0.7541575957760929, + "grad_norm": 1.991153597831726, + "learning_rate": 1.5028384914929679e-06, + "loss": 0.7255, + "step": 37673 + }, + { + "epoch": 0.7541776142932213, + "grad_norm": 1.0170916318893433, + "learning_rate": 1.5026068056216797e-06, + "loss": 0.3079, + "step": 37674 + }, + { + "epoch": 0.7541976328103496, + "grad_norm": 1.0653741359710693, + "learning_rate": 1.5023751344526815e-06, + "loss": 0.2894, + "step": 37675 + }, + { + "epoch": 0.7542176513274779, + "grad_norm": 2.0540835857391357, + "learning_rate": 1.5021434779869459e-06, + "loss": 0.8096, + "step": 37676 + }, + { + "epoch": 0.7542376698446063, + "grad_norm": 1.193357229232788, + "learning_rate": 1.501911836225446e-06, + "loss": 0.2686, + "step": 37677 + }, + { + "epoch": 0.7542576883617346, + "grad_norm": 1.1035577058792114, + "learning_rate": 1.5016802091691547e-06, + "loss": 0.2676, + "step": 37678 + }, + { + "epoch": 0.754277706878863, + "grad_norm": 1.8508542776107788, + "learning_rate": 1.5014485968190485e-06, + "loss": 0.7464, + "step": 37679 + }, + { + "epoch": 0.7542977253959913, + "grad_norm": 1.2423205375671387, + "learning_rate": 1.5012169991760972e-06, + "loss": 0.3051, + "step": 37680 + }, + { + "epoch": 0.7543177439131197, + "grad_norm": 1.3681508302688599, + "learning_rate": 1.500985416241279e-06, + "loss": 0.3186, + "step": 37681 + }, + { + "epoch": 0.754337762430248, + "grad_norm": 1.0842472314834595, + "learning_rate": 1.5007538480155648e-06, + "loss": 0.2855, + "step": 37682 + }, + { + "epoch": 0.7543577809473764, + "grad_norm": 1.0372638702392578, + "learning_rate": 1.500522294499927e-06, + "loss": 0.3136, + "step": 37683 + }, + { + "epoch": 0.7543777994645047, + "grad_norm": 1.1109426021575928, + "learning_rate": 1.5002907556953417e-06, + "loss": 0.3116, + "step": 37684 + }, + { + "epoch": 0.754397817981633, + "grad_norm": 1.226797342300415, + "learning_rate": 1.5000592316027811e-06, + "loss": 0.2485, + "step": 37685 + }, + { + "epoch": 0.7544178364987614, + "grad_norm": 1.112024188041687, + "learning_rate": 1.4998277222232182e-06, + "loss": 0.2725, + "step": 37686 + }, + { + "epoch": 0.7544378550158897, + "grad_norm": 1.1207337379455566, + "learning_rate": 1.4995962275576249e-06, + "loss": 0.2872, + "step": 37687 + }, + { + "epoch": 0.7544578735330181, + "grad_norm": 1.0951430797576904, + "learning_rate": 1.4993647476069772e-06, + "loss": 0.2784, + "step": 37688 + }, + { + "epoch": 0.7544778920501464, + "grad_norm": 1.2014719247817993, + "learning_rate": 1.499133282372247e-06, + "loss": 0.2914, + "step": 37689 + }, + { + "epoch": 0.7544979105672748, + "grad_norm": 1.3211493492126465, + "learning_rate": 1.498901831854407e-06, + "loss": 0.3366, + "step": 37690 + }, + { + "epoch": 0.7545179290844031, + "grad_norm": 1.1158143281936646, + "learning_rate": 1.4986703960544286e-06, + "loss": 0.3492, + "step": 37691 + }, + { + "epoch": 0.7545379476015314, + "grad_norm": 1.1767189502716064, + "learning_rate": 1.4984389749732881e-06, + "loss": 0.2962, + "step": 37692 + }, + { + "epoch": 0.7545579661186598, + "grad_norm": 2.0741019248962402, + "learning_rate": 1.4982075686119547e-06, + "loss": 0.7038, + "step": 37693 + }, + { + "epoch": 0.7545779846357881, + "grad_norm": 1.1641042232513428, + "learning_rate": 1.4979761769714046e-06, + "loss": 0.3307, + "step": 37694 + }, + { + "epoch": 0.7545980031529165, + "grad_norm": 1.112680196762085, + "learning_rate": 1.497744800052609e-06, + "loss": 0.3174, + "step": 37695 + }, + { + "epoch": 0.7546180216700448, + "grad_norm": 1.278279423713684, + "learning_rate": 1.4975134378565392e-06, + "loss": 0.333, + "step": 37696 + }, + { + "epoch": 0.7546380401871732, + "grad_norm": 1.1432744264602661, + "learning_rate": 1.4972820903841712e-06, + "loss": 0.3485, + "step": 37697 + }, + { + "epoch": 0.7546580587043015, + "grad_norm": 1.0088504552841187, + "learning_rate": 1.4970507576364746e-06, + "loss": 0.2692, + "step": 37698 + }, + { + "epoch": 0.7546780772214299, + "grad_norm": 1.2359795570373535, + "learning_rate": 1.4968194396144236e-06, + "loss": 0.3401, + "step": 37699 + }, + { + "epoch": 0.7546980957385582, + "grad_norm": 1.1369740962982178, + "learning_rate": 1.4965881363189894e-06, + "loss": 0.3095, + "step": 37700 + }, + { + "epoch": 0.7547181142556865, + "grad_norm": 1.1154643297195435, + "learning_rate": 1.4963568477511431e-06, + "loss": 0.2523, + "step": 37701 + }, + { + "epoch": 0.7547381327728149, + "grad_norm": 1.357824444770813, + "learning_rate": 1.4961255739118603e-06, + "loss": 0.3051, + "step": 37702 + }, + { + "epoch": 0.7547581512899432, + "grad_norm": 1.828857421875, + "learning_rate": 1.4958943148021115e-06, + "loss": 0.725, + "step": 37703 + }, + { + "epoch": 0.7547781698070716, + "grad_norm": 1.0291393995285034, + "learning_rate": 1.495663070422867e-06, + "loss": 0.2567, + "step": 37704 + }, + { + "epoch": 0.7547981883241999, + "grad_norm": 1.2923389673233032, + "learning_rate": 1.495431840775103e-06, + "loss": 0.3152, + "step": 37705 + }, + { + "epoch": 0.7548182068413283, + "grad_norm": 1.2087798118591309, + "learning_rate": 1.4952006258597873e-06, + "loss": 0.3548, + "step": 37706 + }, + { + "epoch": 0.7548382253584566, + "grad_norm": 1.16717529296875, + "learning_rate": 1.4949694256778957e-06, + "loss": 0.278, + "step": 37707 + }, + { + "epoch": 0.7548582438755849, + "grad_norm": 1.4146891832351685, + "learning_rate": 1.4947382402303983e-06, + "loss": 0.2431, + "step": 37708 + }, + { + "epoch": 0.7548782623927133, + "grad_norm": 1.0121800899505615, + "learning_rate": 1.4945070695182655e-06, + "loss": 0.2452, + "step": 37709 + }, + { + "epoch": 0.7548982809098416, + "grad_norm": 1.1540145874023438, + "learning_rate": 1.4942759135424723e-06, + "loss": 0.3228, + "step": 37710 + }, + { + "epoch": 0.75491829942697, + "grad_norm": 1.0286611318588257, + "learning_rate": 1.4940447723039887e-06, + "loss": 0.3028, + "step": 37711 + }, + { + "epoch": 0.7549383179440983, + "grad_norm": 1.993760108947754, + "learning_rate": 1.493813645803786e-06, + "loss": 0.6381, + "step": 37712 + }, + { + "epoch": 0.7549583364612267, + "grad_norm": 1.0120518207550049, + "learning_rate": 1.493582534042836e-06, + "loss": 0.2835, + "step": 37713 + }, + { + "epoch": 0.754978354978355, + "grad_norm": 1.0188088417053223, + "learning_rate": 1.4933514370221098e-06, + "loss": 0.2945, + "step": 37714 + }, + { + "epoch": 0.7549983734954834, + "grad_norm": 1.1878637075424194, + "learning_rate": 1.4931203547425805e-06, + "loss": 0.281, + "step": 37715 + }, + { + "epoch": 0.7550183920126117, + "grad_norm": 1.258535623550415, + "learning_rate": 1.4928892872052186e-06, + "loss": 0.2934, + "step": 37716 + }, + { + "epoch": 0.75503841052974, + "grad_norm": 1.8736492395401, + "learning_rate": 1.4926582344109935e-06, + "loss": 0.7044, + "step": 37717 + }, + { + "epoch": 0.7550584290468684, + "grad_norm": 1.047865390777588, + "learning_rate": 1.4924271963608806e-06, + "loss": 0.2793, + "step": 37718 + }, + { + "epoch": 0.7550784475639967, + "grad_norm": 0.9911138415336609, + "learning_rate": 1.492196173055847e-06, + "loss": 0.275, + "step": 37719 + }, + { + "epoch": 0.7550984660811251, + "grad_norm": 1.3005547523498535, + "learning_rate": 1.4919651644968675e-06, + "loss": 0.3239, + "step": 37720 + }, + { + "epoch": 0.7551184845982534, + "grad_norm": 1.0601191520690918, + "learning_rate": 1.4917341706849114e-06, + "loss": 0.2866, + "step": 37721 + }, + { + "epoch": 0.7551385031153818, + "grad_norm": 1.0880963802337646, + "learning_rate": 1.49150319162095e-06, + "loss": 0.282, + "step": 37722 + }, + { + "epoch": 0.7551585216325101, + "grad_norm": 1.3463473320007324, + "learning_rate": 1.4912722273059527e-06, + "loss": 0.2731, + "step": 37723 + }, + { + "epoch": 0.7551785401496384, + "grad_norm": 1.164746642112732, + "learning_rate": 1.4910412777408929e-06, + "loss": 0.3369, + "step": 37724 + }, + { + "epoch": 0.7551985586667668, + "grad_norm": 1.1412941217422485, + "learning_rate": 1.4908103429267407e-06, + "loss": 0.3245, + "step": 37725 + }, + { + "epoch": 0.7552185771838951, + "grad_norm": 1.1770633459091187, + "learning_rate": 1.4905794228644667e-06, + "loss": 0.3008, + "step": 37726 + }, + { + "epoch": 0.7552385957010235, + "grad_norm": 1.084327220916748, + "learning_rate": 1.4903485175550402e-06, + "loss": 0.2612, + "step": 37727 + }, + { + "epoch": 0.7552586142181518, + "grad_norm": 1.138728141784668, + "learning_rate": 1.4901176269994344e-06, + "loss": 0.3153, + "step": 37728 + }, + { + "epoch": 0.7552786327352802, + "grad_norm": 1.8908320665359497, + "learning_rate": 1.4898867511986193e-06, + "loss": 0.7576, + "step": 37729 + }, + { + "epoch": 0.7552986512524085, + "grad_norm": 1.1810132265090942, + "learning_rate": 1.489655890153563e-06, + "loss": 0.3508, + "step": 37730 + }, + { + "epoch": 0.7553186697695369, + "grad_norm": 1.1316742897033691, + "learning_rate": 1.489425043865239e-06, + "loss": 0.3312, + "step": 37731 + }, + { + "epoch": 0.7553386882866652, + "grad_norm": 1.0581170320510864, + "learning_rate": 1.489194212334616e-06, + "loss": 0.2917, + "step": 37732 + }, + { + "epoch": 0.7553587068037935, + "grad_norm": 1.0139811038970947, + "learning_rate": 1.488963395562666e-06, + "loss": 0.2755, + "step": 37733 + }, + { + "epoch": 0.7553787253209219, + "grad_norm": 1.1383056640625, + "learning_rate": 1.4887325935503582e-06, + "loss": 0.29, + "step": 37734 + }, + { + "epoch": 0.7553987438380502, + "grad_norm": 1.0531011819839478, + "learning_rate": 1.4885018062986634e-06, + "loss": 0.274, + "step": 37735 + }, + { + "epoch": 0.7554187623551786, + "grad_norm": 1.1632096767425537, + "learning_rate": 1.4882710338085493e-06, + "loss": 0.3226, + "step": 37736 + }, + { + "epoch": 0.7554387808723069, + "grad_norm": 1.0028502941131592, + "learning_rate": 1.4880402760809899e-06, + "loss": 0.2482, + "step": 37737 + }, + { + "epoch": 0.7554587993894353, + "grad_norm": 1.0647741556167603, + "learning_rate": 1.4878095331169534e-06, + "loss": 0.2806, + "step": 37738 + }, + { + "epoch": 0.7554788179065636, + "grad_norm": 1.0676014423370361, + "learning_rate": 1.4875788049174095e-06, + "loss": 0.3061, + "step": 37739 + }, + { + "epoch": 0.7554988364236919, + "grad_norm": 1.2694588899612427, + "learning_rate": 1.4873480914833272e-06, + "loss": 0.3068, + "step": 37740 + }, + { + "epoch": 0.7555188549408203, + "grad_norm": 1.0444329977035522, + "learning_rate": 1.487117392815679e-06, + "loss": 0.3137, + "step": 37741 + }, + { + "epoch": 0.7555388734579486, + "grad_norm": 1.0183357000350952, + "learning_rate": 1.4868867089154332e-06, + "loss": 0.2584, + "step": 37742 + }, + { + "epoch": 0.755558891975077, + "grad_norm": 1.1281787157058716, + "learning_rate": 1.4866560397835584e-06, + "loss": 0.3624, + "step": 37743 + }, + { + "epoch": 0.7555789104922053, + "grad_norm": 1.2434592247009277, + "learning_rate": 1.486425385421027e-06, + "loss": 0.3426, + "step": 37744 + }, + { + "epoch": 0.7555989290093337, + "grad_norm": 1.2360793352127075, + "learning_rate": 1.4861947458288052e-06, + "loss": 0.3144, + "step": 37745 + }, + { + "epoch": 0.755618947526462, + "grad_norm": 1.2761613130569458, + "learning_rate": 1.4859641210078662e-06, + "loss": 0.3117, + "step": 37746 + }, + { + "epoch": 0.7556389660435903, + "grad_norm": 0.9263076782226562, + "learning_rate": 1.485733510959178e-06, + "loss": 0.2935, + "step": 37747 + }, + { + "epoch": 0.7556589845607187, + "grad_norm": 1.1433720588684082, + "learning_rate": 1.4855029156837098e-06, + "loss": 0.3114, + "step": 37748 + }, + { + "epoch": 0.755679003077847, + "grad_norm": 1.1153753995895386, + "learning_rate": 1.4852723351824294e-06, + "loss": 0.2819, + "step": 37749 + }, + { + "epoch": 0.7556990215949754, + "grad_norm": 1.0545271635055542, + "learning_rate": 1.4850417694563095e-06, + "loss": 0.3021, + "step": 37750 + }, + { + "epoch": 0.7557190401121037, + "grad_norm": 1.10292387008667, + "learning_rate": 1.4848112185063173e-06, + "loss": 0.3302, + "step": 37751 + }, + { + "epoch": 0.7557390586292321, + "grad_norm": 1.0670149326324463, + "learning_rate": 1.4845806823334224e-06, + "loss": 0.2677, + "step": 37752 + }, + { + "epoch": 0.7557590771463604, + "grad_norm": 0.8886722326278687, + "learning_rate": 1.484350160938594e-06, + "loss": 0.2395, + "step": 37753 + }, + { + "epoch": 0.7557790956634888, + "grad_norm": 1.1499500274658203, + "learning_rate": 1.4841196543227992e-06, + "loss": 0.3239, + "step": 37754 + }, + { + "epoch": 0.7557991141806171, + "grad_norm": 1.1507773399353027, + "learning_rate": 1.48388916248701e-06, + "loss": 0.2818, + "step": 37755 + }, + { + "epoch": 0.7558191326977454, + "grad_norm": 1.0357130765914917, + "learning_rate": 1.4836586854321927e-06, + "loss": 0.2562, + "step": 37756 + }, + { + "epoch": 0.7558391512148738, + "grad_norm": 1.1800687313079834, + "learning_rate": 1.4834282231593195e-06, + "loss": 0.2471, + "step": 37757 + }, + { + "epoch": 0.7558591697320021, + "grad_norm": 1.1052826642990112, + "learning_rate": 1.4831977756693554e-06, + "loss": 0.2653, + "step": 37758 + }, + { + "epoch": 0.7558791882491305, + "grad_norm": 1.1103912591934204, + "learning_rate": 1.4829673429632725e-06, + "loss": 0.2651, + "step": 37759 + }, + { + "epoch": 0.7558992067662588, + "grad_norm": 1.2509582042694092, + "learning_rate": 1.482736925042038e-06, + "loss": 0.303, + "step": 37760 + }, + { + "epoch": 0.7559192252833872, + "grad_norm": 1.123611569404602, + "learning_rate": 1.482506521906621e-06, + "loss": 0.3063, + "step": 37761 + }, + { + "epoch": 0.7559392438005155, + "grad_norm": 1.1543840169906616, + "learning_rate": 1.4822761335579888e-06, + "loss": 0.2957, + "step": 37762 + }, + { + "epoch": 0.7559592623176438, + "grad_norm": 1.1029332876205444, + "learning_rate": 1.48204575999711e-06, + "loss": 0.3224, + "step": 37763 + }, + { + "epoch": 0.7559792808347722, + "grad_norm": 1.10162353515625, + "learning_rate": 1.4818154012249548e-06, + "loss": 0.3316, + "step": 37764 + }, + { + "epoch": 0.7559992993519005, + "grad_norm": 1.1915464401245117, + "learning_rate": 1.4815850572424905e-06, + "loss": 0.2949, + "step": 37765 + }, + { + "epoch": 0.7560193178690289, + "grad_norm": 1.1459885835647583, + "learning_rate": 1.4813547280506856e-06, + "loss": 0.3119, + "step": 37766 + }, + { + "epoch": 0.7560393363861572, + "grad_norm": 1.2101452350616455, + "learning_rate": 1.4811244136505072e-06, + "loss": 0.3141, + "step": 37767 + }, + { + "epoch": 0.7560593549032856, + "grad_norm": 1.1810388565063477, + "learning_rate": 1.480894114042925e-06, + "loss": 0.318, + "step": 37768 + }, + { + "epoch": 0.7560793734204139, + "grad_norm": 1.0304542779922485, + "learning_rate": 1.480663829228906e-06, + "loss": 0.354, + "step": 37769 + }, + { + "epoch": 0.7560993919375423, + "grad_norm": 1.184775710105896, + "learning_rate": 1.48043355920942e-06, + "loss": 0.2735, + "step": 37770 + }, + { + "epoch": 0.7561194104546706, + "grad_norm": 1.1784305572509766, + "learning_rate": 1.4802033039854325e-06, + "loss": 0.3473, + "step": 37771 + }, + { + "epoch": 0.7561394289717989, + "grad_norm": 1.215958595275879, + "learning_rate": 1.479973063557914e-06, + "loss": 0.3285, + "step": 37772 + }, + { + "epoch": 0.7561594474889273, + "grad_norm": 1.2467252016067505, + "learning_rate": 1.4797428379278317e-06, + "loss": 0.3398, + "step": 37773 + }, + { + "epoch": 0.7561794660060556, + "grad_norm": 1.0494966506958008, + "learning_rate": 1.4795126270961524e-06, + "loss": 0.3161, + "step": 37774 + }, + { + "epoch": 0.756199484523184, + "grad_norm": 1.2313847541809082, + "learning_rate": 1.4792824310638448e-06, + "loss": 0.3948, + "step": 37775 + }, + { + "epoch": 0.7562195030403123, + "grad_norm": 1.155063271522522, + "learning_rate": 1.4790522498318743e-06, + "loss": 0.3255, + "step": 37776 + }, + { + "epoch": 0.7562395215574407, + "grad_norm": 1.092942476272583, + "learning_rate": 1.4788220834012123e-06, + "loss": 0.267, + "step": 37777 + }, + { + "epoch": 0.756259540074569, + "grad_norm": 1.932846188545227, + "learning_rate": 1.4785919317728248e-06, + "loss": 0.7739, + "step": 37778 + }, + { + "epoch": 0.7562795585916973, + "grad_norm": 1.1735668182373047, + "learning_rate": 1.4783617949476787e-06, + "loss": 0.3356, + "step": 37779 + }, + { + "epoch": 0.7562995771088257, + "grad_norm": 1.0111242532730103, + "learning_rate": 1.4781316729267398e-06, + "loss": 0.2774, + "step": 37780 + }, + { + "epoch": 0.756319595625954, + "grad_norm": 1.1224983930587769, + "learning_rate": 1.4779015657109797e-06, + "loss": 0.2873, + "step": 37781 + }, + { + "epoch": 0.7563396141430824, + "grad_norm": 0.9691582918167114, + "learning_rate": 1.4776714733013614e-06, + "loss": 0.2512, + "step": 37782 + }, + { + "epoch": 0.7563596326602107, + "grad_norm": 2.0411365032196045, + "learning_rate": 1.477441395698856e-06, + "loss": 0.795, + "step": 37783 + }, + { + "epoch": 0.7563796511773391, + "grad_norm": 1.507012963294983, + "learning_rate": 1.4772113329044286e-06, + "loss": 0.26, + "step": 37784 + }, + { + "epoch": 0.7563996696944674, + "grad_norm": 1.1622796058654785, + "learning_rate": 1.4769812849190452e-06, + "loss": 0.3033, + "step": 37785 + }, + { + "epoch": 0.7564196882115958, + "grad_norm": 1.072175145149231, + "learning_rate": 1.4767512517436762e-06, + "loss": 0.3049, + "step": 37786 + }, + { + "epoch": 0.7564397067287241, + "grad_norm": 1.0871881246566772, + "learning_rate": 1.4765212333792868e-06, + "loss": 0.2867, + "step": 37787 + }, + { + "epoch": 0.7564597252458524, + "grad_norm": 1.1646345853805542, + "learning_rate": 1.4762912298268433e-06, + "loss": 0.3119, + "step": 37788 + }, + { + "epoch": 0.7564797437629808, + "grad_norm": 1.269237756729126, + "learning_rate": 1.4760612410873116e-06, + "loss": 0.3106, + "step": 37789 + }, + { + "epoch": 0.7564997622801091, + "grad_norm": 1.2565664052963257, + "learning_rate": 1.475831267161662e-06, + "loss": 0.2733, + "step": 37790 + }, + { + "epoch": 0.7565197807972375, + "grad_norm": 1.1932278871536255, + "learning_rate": 1.475601308050859e-06, + "loss": 0.2776, + "step": 37791 + }, + { + "epoch": 0.7565397993143658, + "grad_norm": 1.0672287940979004, + "learning_rate": 1.4753713637558703e-06, + "loss": 0.274, + "step": 37792 + }, + { + "epoch": 0.7565598178314942, + "grad_norm": 1.0886507034301758, + "learning_rate": 1.4751414342776614e-06, + "loss": 0.318, + "step": 37793 + }, + { + "epoch": 0.7565798363486225, + "grad_norm": 1.0405957698822021, + "learning_rate": 1.4749115196171976e-06, + "loss": 0.2599, + "step": 37794 + }, + { + "epoch": 0.7565998548657508, + "grad_norm": 1.1446071863174438, + "learning_rate": 1.4746816197754476e-06, + "loss": 0.3037, + "step": 37795 + }, + { + "epoch": 0.7566198733828792, + "grad_norm": 1.0860815048217773, + "learning_rate": 1.4744517347533787e-06, + "loss": 0.2662, + "step": 37796 + }, + { + "epoch": 0.7566398919000075, + "grad_norm": 1.190227746963501, + "learning_rate": 1.4742218645519562e-06, + "loss": 0.2676, + "step": 37797 + }, + { + "epoch": 0.7566599104171359, + "grad_norm": 0.9927364587783813, + "learning_rate": 1.4739920091721443e-06, + "loss": 0.2279, + "step": 37798 + }, + { + "epoch": 0.7566799289342642, + "grad_norm": 1.0828759670257568, + "learning_rate": 1.4737621686149135e-06, + "loss": 0.3019, + "step": 37799 + }, + { + "epoch": 0.7566999474513926, + "grad_norm": 1.1518174409866333, + "learning_rate": 1.473532342881227e-06, + "loss": 0.3057, + "step": 37800 + }, + { + "epoch": 0.7567199659685209, + "grad_norm": 1.1021746397018433, + "learning_rate": 1.4733025319720517e-06, + "loss": 0.3026, + "step": 37801 + }, + { + "epoch": 0.7567399844856493, + "grad_norm": 1.8483575582504272, + "learning_rate": 1.4730727358883523e-06, + "loss": 0.7426, + "step": 37802 + }, + { + "epoch": 0.7567600030027776, + "grad_norm": 1.0567858219146729, + "learning_rate": 1.4728429546310974e-06, + "loss": 0.2614, + "step": 37803 + }, + { + "epoch": 0.7567800215199059, + "grad_norm": 1.0931503772735596, + "learning_rate": 1.4726131882012517e-06, + "loss": 0.3384, + "step": 37804 + }, + { + "epoch": 0.7568000400370343, + "grad_norm": 1.1367840766906738, + "learning_rate": 1.4723834365997814e-06, + "loss": 0.3053, + "step": 37805 + }, + { + "epoch": 0.7568200585541626, + "grad_norm": 1.1662124395370483, + "learning_rate": 1.472153699827652e-06, + "loss": 0.355, + "step": 37806 + }, + { + "epoch": 0.756840077071291, + "grad_norm": 1.2404261827468872, + "learning_rate": 1.4719239778858274e-06, + "loss": 0.2882, + "step": 37807 + }, + { + "epoch": 0.7568600955884193, + "grad_norm": 1.0568997859954834, + "learning_rate": 1.471694270775275e-06, + "loss": 0.3239, + "step": 37808 + }, + { + "epoch": 0.7568801141055477, + "grad_norm": 1.0284606218338013, + "learning_rate": 1.4714645784969621e-06, + "loss": 0.3027, + "step": 37809 + }, + { + "epoch": 0.756900132622676, + "grad_norm": 1.0479670763015747, + "learning_rate": 1.4712349010518533e-06, + "loss": 0.2694, + "step": 37810 + }, + { + "epoch": 0.7569201511398043, + "grad_norm": 1.0892527103424072, + "learning_rate": 1.471005238440912e-06, + "loss": 0.3032, + "step": 37811 + }, + { + "epoch": 0.7569401696569327, + "grad_norm": 1.0153601169586182, + "learning_rate": 1.470775590665106e-06, + "loss": 0.2808, + "step": 37812 + }, + { + "epoch": 0.756960188174061, + "grad_norm": 1.1248339414596558, + "learning_rate": 1.4705459577254006e-06, + "loss": 0.2904, + "step": 37813 + }, + { + "epoch": 0.7569802066911894, + "grad_norm": 1.7705689668655396, + "learning_rate": 1.4703163396227604e-06, + "loss": 0.7244, + "step": 37814 + }, + { + "epoch": 0.7570002252083177, + "grad_norm": 1.2518309354782104, + "learning_rate": 1.4700867363581506e-06, + "loss": 0.3037, + "step": 37815 + }, + { + "epoch": 0.7570202437254461, + "grad_norm": 1.0840883255004883, + "learning_rate": 1.4698571479325346e-06, + "loss": 0.3164, + "step": 37816 + }, + { + "epoch": 0.7570402622425744, + "grad_norm": 1.0576704740524292, + "learning_rate": 1.4696275743468813e-06, + "loss": 0.2722, + "step": 37817 + }, + { + "epoch": 0.7570602807597028, + "grad_norm": 1.1065374612808228, + "learning_rate": 1.4693980156021542e-06, + "loss": 0.3157, + "step": 37818 + }, + { + "epoch": 0.7570802992768311, + "grad_norm": 1.1636507511138916, + "learning_rate": 1.4691684716993177e-06, + "loss": 0.2907, + "step": 37819 + }, + { + "epoch": 0.7571003177939594, + "grad_norm": 1.1182626485824585, + "learning_rate": 1.4689389426393359e-06, + "loss": 0.295, + "step": 37820 + }, + { + "epoch": 0.7571203363110878, + "grad_norm": 1.9404709339141846, + "learning_rate": 1.4687094284231746e-06, + "loss": 0.724, + "step": 37821 + }, + { + "epoch": 0.7571403548282161, + "grad_norm": 1.1334872245788574, + "learning_rate": 1.4684799290518004e-06, + "loss": 0.3166, + "step": 37822 + }, + { + "epoch": 0.7571603733453445, + "grad_norm": 1.0981571674346924, + "learning_rate": 1.4682504445261769e-06, + "loss": 0.2837, + "step": 37823 + }, + { + "epoch": 0.7571803918624728, + "grad_norm": 1.1584059000015259, + "learning_rate": 1.4680209748472684e-06, + "loss": 0.2879, + "step": 37824 + }, + { + "epoch": 0.7572004103796012, + "grad_norm": 1.0802156925201416, + "learning_rate": 1.467791520016038e-06, + "loss": 0.286, + "step": 37825 + }, + { + "epoch": 0.7572204288967295, + "grad_norm": 1.2171218395233154, + "learning_rate": 1.4675620800334534e-06, + "loss": 0.3102, + "step": 37826 + }, + { + "epoch": 0.7572404474138578, + "grad_norm": 1.0721919536590576, + "learning_rate": 1.4673326549004773e-06, + "loss": 0.261, + "step": 37827 + }, + { + "epoch": 0.7572604659309862, + "grad_norm": 1.0727390050888062, + "learning_rate": 1.4671032446180749e-06, + "loss": 0.285, + "step": 37828 + }, + { + "epoch": 0.7572804844481145, + "grad_norm": 1.1949437856674194, + "learning_rate": 1.4668738491872087e-06, + "loss": 0.3266, + "step": 37829 + }, + { + "epoch": 0.7573005029652429, + "grad_norm": 1.1194729804992676, + "learning_rate": 1.466644468608846e-06, + "loss": 0.2426, + "step": 37830 + }, + { + "epoch": 0.7573205214823712, + "grad_norm": 1.0037071704864502, + "learning_rate": 1.4664151028839496e-06, + "loss": 0.2967, + "step": 37831 + }, + { + "epoch": 0.7573405399994996, + "grad_norm": 1.0691912174224854, + "learning_rate": 1.4661857520134837e-06, + "loss": 0.3333, + "step": 37832 + }, + { + "epoch": 0.7573605585166279, + "grad_norm": 1.0689843893051147, + "learning_rate": 1.4659564159984109e-06, + "loss": 0.2786, + "step": 37833 + }, + { + "epoch": 0.7573805770337563, + "grad_norm": 1.1903108358383179, + "learning_rate": 1.4657270948396962e-06, + "loss": 0.304, + "step": 37834 + }, + { + "epoch": 0.7574005955508846, + "grad_norm": 1.0444425344467163, + "learning_rate": 1.4654977885383064e-06, + "loss": 0.2718, + "step": 37835 + }, + { + "epoch": 0.7574206140680129, + "grad_norm": 1.2748647928237915, + "learning_rate": 1.465268497095203e-06, + "loss": 0.3361, + "step": 37836 + }, + { + "epoch": 0.7574406325851413, + "grad_norm": 1.1066935062408447, + "learning_rate": 1.4650392205113501e-06, + "loss": 0.2918, + "step": 37837 + }, + { + "epoch": 0.7574606511022696, + "grad_norm": 1.0571318864822388, + "learning_rate": 1.4648099587877102e-06, + "loss": 0.2878, + "step": 37838 + }, + { + "epoch": 0.757480669619398, + "grad_norm": 1.1315747499465942, + "learning_rate": 1.4645807119252498e-06, + "loss": 0.3002, + "step": 37839 + }, + { + "epoch": 0.7575006881365263, + "grad_norm": 1.2280583381652832, + "learning_rate": 1.4643514799249314e-06, + "loss": 0.3278, + "step": 37840 + }, + { + "epoch": 0.7575207066536547, + "grad_norm": 1.1365227699279785, + "learning_rate": 1.4641222627877188e-06, + "loss": 0.3081, + "step": 37841 + }, + { + "epoch": 0.757540725170783, + "grad_norm": 1.2051557302474976, + "learning_rate": 1.4638930605145735e-06, + "loss": 0.3245, + "step": 37842 + }, + { + "epoch": 0.7575607436879113, + "grad_norm": 1.1174284219741821, + "learning_rate": 1.4636638731064623e-06, + "loss": 0.288, + "step": 37843 + }, + { + "epoch": 0.7575807622050397, + "grad_norm": 1.2299144268035889, + "learning_rate": 1.4634347005643468e-06, + "loss": 0.3411, + "step": 37844 + }, + { + "epoch": 0.757600780722168, + "grad_norm": 1.2283138036727905, + "learning_rate": 1.463205542889191e-06, + "loss": 0.2846, + "step": 37845 + }, + { + "epoch": 0.7576207992392964, + "grad_norm": 1.2990330457687378, + "learning_rate": 1.462976400081957e-06, + "loss": 0.3253, + "step": 37846 + }, + { + "epoch": 0.7576408177564247, + "grad_norm": 1.2508043050765991, + "learning_rate": 1.462747272143608e-06, + "loss": 0.2841, + "step": 37847 + }, + { + "epoch": 0.7576608362735531, + "grad_norm": 1.2121788263320923, + "learning_rate": 1.4625181590751103e-06, + "loss": 0.3281, + "step": 37848 + }, + { + "epoch": 0.7576808547906814, + "grad_norm": 1.109487533569336, + "learning_rate": 1.4622890608774254e-06, + "loss": 0.2769, + "step": 37849 + }, + { + "epoch": 0.7577008733078098, + "grad_norm": 1.0468370914459229, + "learning_rate": 1.4620599775515154e-06, + "loss": 0.3169, + "step": 37850 + }, + { + "epoch": 0.7577208918249381, + "grad_norm": 1.2020283937454224, + "learning_rate": 1.4618309090983423e-06, + "loss": 0.3055, + "step": 37851 + }, + { + "epoch": 0.7577409103420664, + "grad_norm": 1.0819205045700073, + "learning_rate": 1.4616018555188727e-06, + "loss": 0.2856, + "step": 37852 + }, + { + "epoch": 0.7577609288591948, + "grad_norm": 1.1639404296875, + "learning_rate": 1.4613728168140668e-06, + "loss": 0.2909, + "step": 37853 + }, + { + "epoch": 0.7577809473763231, + "grad_norm": 1.064827799797058, + "learning_rate": 1.4611437929848887e-06, + "loss": 0.271, + "step": 37854 + }, + { + "epoch": 0.7578009658934515, + "grad_norm": 1.0869498252868652, + "learning_rate": 1.4609147840322985e-06, + "loss": 0.3569, + "step": 37855 + }, + { + "epoch": 0.7578209844105798, + "grad_norm": 1.349168062210083, + "learning_rate": 1.4606857899572625e-06, + "loss": 0.3047, + "step": 37856 + }, + { + "epoch": 0.7578410029277082, + "grad_norm": 1.125835657119751, + "learning_rate": 1.460456810760742e-06, + "loss": 0.3288, + "step": 37857 + }, + { + "epoch": 0.7578610214448365, + "grad_norm": 1.1854643821716309, + "learning_rate": 1.4602278464436992e-06, + "loss": 0.2971, + "step": 37858 + }, + { + "epoch": 0.7578810399619648, + "grad_norm": 1.126339316368103, + "learning_rate": 1.4599988970070955e-06, + "loss": 0.3114, + "step": 37859 + }, + { + "epoch": 0.7579010584790932, + "grad_norm": 1.0658669471740723, + "learning_rate": 1.459769962451894e-06, + "loss": 0.2895, + "step": 37860 + }, + { + "epoch": 0.7579210769962215, + "grad_norm": 1.0470151901245117, + "learning_rate": 1.4595410427790596e-06, + "loss": 0.2891, + "step": 37861 + }, + { + "epoch": 0.7579410955133499, + "grad_norm": 1.136069655418396, + "learning_rate": 1.459312137989553e-06, + "loss": 0.2871, + "step": 37862 + }, + { + "epoch": 0.7579611140304782, + "grad_norm": 1.055419921875, + "learning_rate": 1.4590832480843358e-06, + "loss": 0.3164, + "step": 37863 + }, + { + "epoch": 0.7579811325476066, + "grad_norm": 1.0657118558883667, + "learning_rate": 1.4588543730643695e-06, + "loss": 0.2959, + "step": 37864 + }, + { + "epoch": 0.7580011510647349, + "grad_norm": 1.1897971630096436, + "learning_rate": 1.4586255129306187e-06, + "loss": 0.309, + "step": 37865 + }, + { + "epoch": 0.7580211695818633, + "grad_norm": 1.0836690664291382, + "learning_rate": 1.4583966676840439e-06, + "loss": 0.2718, + "step": 37866 + }, + { + "epoch": 0.7580411880989916, + "grad_norm": 1.3859435319900513, + "learning_rate": 1.4581678373256075e-06, + "loss": 0.3023, + "step": 37867 + }, + { + "epoch": 0.7580612066161199, + "grad_norm": 1.7886642217636108, + "learning_rate": 1.4579390218562716e-06, + "loss": 0.7591, + "step": 37868 + }, + { + "epoch": 0.7580812251332483, + "grad_norm": 1.2222617864608765, + "learning_rate": 1.4577102212769956e-06, + "loss": 0.3524, + "step": 37869 + }, + { + "epoch": 0.7581012436503766, + "grad_norm": 1.8404293060302734, + "learning_rate": 1.4574814355887456e-06, + "loss": 0.7589, + "step": 37870 + }, + { + "epoch": 0.758121262167505, + "grad_norm": 1.1023571491241455, + "learning_rate": 1.457252664792481e-06, + "loss": 0.3213, + "step": 37871 + }, + { + "epoch": 0.7581412806846333, + "grad_norm": 1.2394747734069824, + "learning_rate": 1.4570239088891623e-06, + "loss": 0.2956, + "step": 37872 + }, + { + "epoch": 0.7581612992017617, + "grad_norm": 1.0775117874145508, + "learning_rate": 1.456795167879753e-06, + "loss": 0.3073, + "step": 37873 + }, + { + "epoch": 0.75818131771889, + "grad_norm": 1.062620997428894, + "learning_rate": 1.4565664417652154e-06, + "loss": 0.305, + "step": 37874 + }, + { + "epoch": 0.7582013362360183, + "grad_norm": 1.8776289224624634, + "learning_rate": 1.4563377305465103e-06, + "loss": 0.7319, + "step": 37875 + }, + { + "epoch": 0.7582213547531467, + "grad_norm": 1.0158203840255737, + "learning_rate": 1.4561090342245987e-06, + "loss": 0.2996, + "step": 37876 + }, + { + "epoch": 0.758241373270275, + "grad_norm": 1.0468145608901978, + "learning_rate": 1.4558803528004417e-06, + "loss": 0.3153, + "step": 37877 + }, + { + "epoch": 0.7582613917874034, + "grad_norm": 1.1662427186965942, + "learning_rate": 1.4556516862749998e-06, + "loss": 0.2826, + "step": 37878 + }, + { + "epoch": 0.7582814103045317, + "grad_norm": 1.1734920740127563, + "learning_rate": 1.4554230346492366e-06, + "loss": 0.3227, + "step": 37879 + }, + { + "epoch": 0.7583014288216601, + "grad_norm": 1.1468337774276733, + "learning_rate": 1.4551943979241123e-06, + "loss": 0.2728, + "step": 37880 + }, + { + "epoch": 0.7583214473387884, + "grad_norm": 1.1099520921707153, + "learning_rate": 1.4549657761005881e-06, + "loss": 0.2915, + "step": 37881 + }, + { + "epoch": 0.7583414658559168, + "grad_norm": 1.0540821552276611, + "learning_rate": 1.4547371691796236e-06, + "loss": 0.2862, + "step": 37882 + }, + { + "epoch": 0.7583614843730451, + "grad_norm": 1.0544121265411377, + "learning_rate": 1.454508577162182e-06, + "loss": 0.3165, + "step": 37883 + }, + { + "epoch": 0.7583815028901734, + "grad_norm": 1.8548341989517212, + "learning_rate": 1.4542800000492235e-06, + "loss": 0.7974, + "step": 37884 + }, + { + "epoch": 0.7584015214073018, + "grad_norm": 1.9446250200271606, + "learning_rate": 1.4540514378417075e-06, + "loss": 0.7146, + "step": 37885 + }, + { + "epoch": 0.7584215399244301, + "grad_norm": 1.2018202543258667, + "learning_rate": 1.4538228905405965e-06, + "loss": 0.323, + "step": 37886 + }, + { + "epoch": 0.7584415584415585, + "grad_norm": 1.2043311595916748, + "learning_rate": 1.453594358146852e-06, + "loss": 0.272, + "step": 37887 + }, + { + "epoch": 0.7584615769586868, + "grad_norm": 1.1221041679382324, + "learning_rate": 1.4533658406614337e-06, + "loss": 0.3097, + "step": 37888 + }, + { + "epoch": 0.7584815954758152, + "grad_norm": 1.1129307746887207, + "learning_rate": 1.453137338085302e-06, + "loss": 0.3266, + "step": 37889 + }, + { + "epoch": 0.7585016139929435, + "grad_norm": 1.0801719427108765, + "learning_rate": 1.4529088504194177e-06, + "loss": 0.2626, + "step": 37890 + }, + { + "epoch": 0.7585216325100718, + "grad_norm": 1.1671582460403442, + "learning_rate": 1.4526803776647398e-06, + "loss": 0.2846, + "step": 37891 + }, + { + "epoch": 0.7585416510272002, + "grad_norm": 1.0152264833450317, + "learning_rate": 1.452451919822232e-06, + "loss": 0.3046, + "step": 37892 + }, + { + "epoch": 0.7585616695443285, + "grad_norm": 1.1107499599456787, + "learning_rate": 1.4522234768928522e-06, + "loss": 0.3065, + "step": 37893 + }, + { + "epoch": 0.7585816880614569, + "grad_norm": 1.17079758644104, + "learning_rate": 1.451995048877562e-06, + "loss": 0.2798, + "step": 37894 + }, + { + "epoch": 0.7586017065785852, + "grad_norm": 1.240431547164917, + "learning_rate": 1.4517666357773196e-06, + "loss": 0.3044, + "step": 37895 + }, + { + "epoch": 0.7586217250957136, + "grad_norm": 1.2993372678756714, + "learning_rate": 1.4515382375930881e-06, + "loss": 0.3469, + "step": 37896 + }, + { + "epoch": 0.7586417436128419, + "grad_norm": 1.0675568580627441, + "learning_rate": 1.4513098543258258e-06, + "loss": 0.2971, + "step": 37897 + }, + { + "epoch": 0.7586617621299703, + "grad_norm": 1.1152437925338745, + "learning_rate": 1.4510814859764922e-06, + "loss": 0.279, + "step": 37898 + }, + { + "epoch": 0.7586817806470986, + "grad_norm": 1.3220243453979492, + "learning_rate": 1.4508531325460495e-06, + "loss": 0.2744, + "step": 37899 + }, + { + "epoch": 0.7587017991642269, + "grad_norm": 1.124773621559143, + "learning_rate": 1.4506247940354552e-06, + "loss": 0.3102, + "step": 37900 + }, + { + "epoch": 0.7587218176813553, + "grad_norm": 1.1947450637817383, + "learning_rate": 1.4503964704456713e-06, + "loss": 0.3507, + "step": 37901 + }, + { + "epoch": 0.7587418361984836, + "grad_norm": 1.234728455543518, + "learning_rate": 1.4501681617776575e-06, + "loss": 0.3141, + "step": 37902 + }, + { + "epoch": 0.758761854715612, + "grad_norm": 1.0396101474761963, + "learning_rate": 1.4499398680323724e-06, + "loss": 0.3042, + "step": 37903 + }, + { + "epoch": 0.7587818732327403, + "grad_norm": 1.1267348527908325, + "learning_rate": 1.449711589210775e-06, + "loss": 0.3033, + "step": 37904 + }, + { + "epoch": 0.7588018917498687, + "grad_norm": 1.1048200130462646, + "learning_rate": 1.4494833253138268e-06, + "loss": 0.26, + "step": 37905 + }, + { + "epoch": 0.758821910266997, + "grad_norm": 1.9576210975646973, + "learning_rate": 1.4492550763424873e-06, + "loss": 0.7768, + "step": 37906 + }, + { + "epoch": 0.7588419287841253, + "grad_norm": 1.8925553560256958, + "learning_rate": 1.449026842297715e-06, + "loss": 0.7445, + "step": 37907 + }, + { + "epoch": 0.7588619473012537, + "grad_norm": 1.166457176208496, + "learning_rate": 1.44879862318047e-06, + "loss": 0.2944, + "step": 37908 + }, + { + "epoch": 0.758881965818382, + "grad_norm": 1.02736496925354, + "learning_rate": 1.4485704189917094e-06, + "loss": 0.2771, + "step": 37909 + }, + { + "epoch": 0.7589019843355104, + "grad_norm": 2.1906259059906006, + "learning_rate": 1.4483422297323963e-06, + "loss": 0.8482, + "step": 37910 + }, + { + "epoch": 0.7589220028526387, + "grad_norm": 1.0077879428863525, + "learning_rate": 1.4481140554034867e-06, + "loss": 0.2643, + "step": 37911 + }, + { + "epoch": 0.7589420213697671, + "grad_norm": 1.1122781038284302, + "learning_rate": 1.4478858960059428e-06, + "loss": 0.2967, + "step": 37912 + }, + { + "epoch": 0.7589620398868954, + "grad_norm": 1.039229393005371, + "learning_rate": 1.4476577515407203e-06, + "loss": 0.2322, + "step": 37913 + }, + { + "epoch": 0.7589820584040238, + "grad_norm": 1.1208280324935913, + "learning_rate": 1.4474296220087818e-06, + "loss": 0.2977, + "step": 37914 + }, + { + "epoch": 0.7590020769211521, + "grad_norm": 1.012080192565918, + "learning_rate": 1.4472015074110845e-06, + "loss": 0.2803, + "step": 37915 + }, + { + "epoch": 0.7590220954382804, + "grad_norm": 1.1220200061798096, + "learning_rate": 1.446973407748588e-06, + "loss": 0.2968, + "step": 37916 + }, + { + "epoch": 0.7590421139554088, + "grad_norm": 1.993126392364502, + "learning_rate": 1.4467453230222488e-06, + "loss": 0.7491, + "step": 37917 + }, + { + "epoch": 0.7590621324725371, + "grad_norm": 1.0295863151550293, + "learning_rate": 1.446517253233029e-06, + "loss": 0.3029, + "step": 37918 + }, + { + "epoch": 0.7590821509896655, + "grad_norm": 1.1275957822799683, + "learning_rate": 1.446289198381886e-06, + "loss": 0.2926, + "step": 37919 + }, + { + "epoch": 0.7591021695067938, + "grad_norm": 1.344972848892212, + "learning_rate": 1.4460611584697786e-06, + "loss": 0.3414, + "step": 37920 + }, + { + "epoch": 0.7591221880239222, + "grad_norm": 1.1105681657791138, + "learning_rate": 1.4458331334976655e-06, + "loss": 0.2839, + "step": 37921 + }, + { + "epoch": 0.7591422065410505, + "grad_norm": 1.176809549331665, + "learning_rate": 1.445605123466503e-06, + "loss": 0.3249, + "step": 37922 + }, + { + "epoch": 0.7591622250581788, + "grad_norm": 1.197013258934021, + "learning_rate": 1.4453771283772532e-06, + "loss": 0.297, + "step": 37923 + }, + { + "epoch": 0.7591822435753072, + "grad_norm": 1.045179843902588, + "learning_rate": 1.445149148230872e-06, + "loss": 0.3073, + "step": 37924 + }, + { + "epoch": 0.7592022620924355, + "grad_norm": 1.3208166360855103, + "learning_rate": 1.44492118302832e-06, + "loss": 0.3222, + "step": 37925 + }, + { + "epoch": 0.7592222806095639, + "grad_norm": 1.154437780380249, + "learning_rate": 1.4446932327705526e-06, + "loss": 0.2824, + "step": 37926 + }, + { + "epoch": 0.7592422991266922, + "grad_norm": 1.0494897365570068, + "learning_rate": 1.4444652974585317e-06, + "loss": 0.2756, + "step": 37927 + }, + { + "epoch": 0.7592623176438206, + "grad_norm": 1.4679462909698486, + "learning_rate": 1.4442373770932128e-06, + "loss": 0.319, + "step": 37928 + }, + { + "epoch": 0.7592823361609489, + "grad_norm": 1.8980867862701416, + "learning_rate": 1.4440094716755554e-06, + "loss": 0.6957, + "step": 37929 + }, + { + "epoch": 0.7593023546780773, + "grad_norm": 1.0926132202148438, + "learning_rate": 1.4437815812065164e-06, + "loss": 0.2755, + "step": 37930 + }, + { + "epoch": 0.7593223731952056, + "grad_norm": 1.117283582687378, + "learning_rate": 1.4435537056870535e-06, + "loss": 0.3234, + "step": 37931 + }, + { + "epoch": 0.7593423917123339, + "grad_norm": 1.2472752332687378, + "learning_rate": 1.4433258451181264e-06, + "loss": 0.3046, + "step": 37932 + }, + { + "epoch": 0.7593624102294623, + "grad_norm": 1.1695244312286377, + "learning_rate": 1.4430979995006922e-06, + "loss": 0.3518, + "step": 37933 + }, + { + "epoch": 0.7593824287465906, + "grad_norm": 1.1623753309249878, + "learning_rate": 1.442870168835709e-06, + "loss": 0.3182, + "step": 37934 + }, + { + "epoch": 0.759402447263719, + "grad_norm": 1.0989001989364624, + "learning_rate": 1.4426423531241318e-06, + "loss": 0.2643, + "step": 37935 + }, + { + "epoch": 0.7594224657808473, + "grad_norm": 1.0225701332092285, + "learning_rate": 1.4424145523669226e-06, + "loss": 0.2677, + "step": 37936 + }, + { + "epoch": 0.7594424842979757, + "grad_norm": 1.1042847633361816, + "learning_rate": 1.4421867665650351e-06, + "loss": 0.3066, + "step": 37937 + }, + { + "epoch": 0.759462502815104, + "grad_norm": 1.1161437034606934, + "learning_rate": 1.4419589957194308e-06, + "loss": 0.3135, + "step": 37938 + }, + { + "epoch": 0.7594825213322323, + "grad_norm": 1.1791893243789673, + "learning_rate": 1.4417312398310651e-06, + "loss": 0.2963, + "step": 37939 + }, + { + "epoch": 0.7595025398493607, + "grad_norm": 1.2565783262252808, + "learning_rate": 1.4415034989008935e-06, + "loss": 0.343, + "step": 37940 + }, + { + "epoch": 0.759522558366489, + "grad_norm": 1.0956921577453613, + "learning_rate": 1.4412757729298777e-06, + "loss": 0.2767, + "step": 37941 + }, + { + "epoch": 0.7595425768836174, + "grad_norm": 1.2594414949417114, + "learning_rate": 1.441048061918972e-06, + "loss": 0.3262, + "step": 37942 + }, + { + "epoch": 0.7595625954007457, + "grad_norm": 1.0232182741165161, + "learning_rate": 1.440820365869135e-06, + "loss": 0.2635, + "step": 37943 + }, + { + "epoch": 0.7595826139178741, + "grad_norm": 1.1274558305740356, + "learning_rate": 1.4405926847813211e-06, + "loss": 0.2968, + "step": 37944 + }, + { + "epoch": 0.7596026324350024, + "grad_norm": 1.1230851411819458, + "learning_rate": 1.4403650186564916e-06, + "loss": 0.253, + "step": 37945 + }, + { + "epoch": 0.7596226509521308, + "grad_norm": 1.1308774948120117, + "learning_rate": 1.440137367495601e-06, + "loss": 0.3113, + "step": 37946 + }, + { + "epoch": 0.7596426694692591, + "grad_norm": 2.0994012355804443, + "learning_rate": 1.4399097312996074e-06, + "loss": 0.7464, + "step": 37947 + }, + { + "epoch": 0.7596626879863874, + "grad_norm": 1.8381919860839844, + "learning_rate": 1.4396821100694652e-06, + "loss": 0.8006, + "step": 37948 + }, + { + "epoch": 0.7596827065035158, + "grad_norm": 1.0836492776870728, + "learning_rate": 1.4394545038061348e-06, + "loss": 0.2801, + "step": 37949 + }, + { + "epoch": 0.7597027250206441, + "grad_norm": 1.9106683731079102, + "learning_rate": 1.4392269125105702e-06, + "loss": 0.7607, + "step": 37950 + }, + { + "epoch": 0.7597227435377725, + "grad_norm": 1.098112940788269, + "learning_rate": 1.4389993361837306e-06, + "loss": 0.3229, + "step": 37951 + }, + { + "epoch": 0.7597427620549008, + "grad_norm": 1.3299920558929443, + "learning_rate": 1.4387717748265712e-06, + "loss": 0.2857, + "step": 37952 + }, + { + "epoch": 0.7597627805720292, + "grad_norm": 1.044052243232727, + "learning_rate": 1.438544228440048e-06, + "loss": 0.2628, + "step": 37953 + }, + { + "epoch": 0.7597827990891575, + "grad_norm": 1.1717236042022705, + "learning_rate": 1.4383166970251194e-06, + "loss": 0.2738, + "step": 37954 + }, + { + "epoch": 0.7598028176062858, + "grad_norm": 1.1913270950317383, + "learning_rate": 1.4380891805827413e-06, + "loss": 0.2989, + "step": 37955 + }, + { + "epoch": 0.7598228361234142, + "grad_norm": 1.1669031381607056, + "learning_rate": 1.4378616791138694e-06, + "loss": 0.3001, + "step": 37956 + }, + { + "epoch": 0.7598428546405425, + "grad_norm": 1.2299782037734985, + "learning_rate": 1.4376341926194593e-06, + "loss": 0.3113, + "step": 37957 + }, + { + "epoch": 0.7598628731576709, + "grad_norm": 1.21431303024292, + "learning_rate": 1.4374067211004695e-06, + "loss": 0.286, + "step": 37958 + }, + { + "epoch": 0.7598828916747992, + "grad_norm": 1.2426813840866089, + "learning_rate": 1.4371792645578552e-06, + "loss": 0.3395, + "step": 37959 + }, + { + "epoch": 0.7599029101919276, + "grad_norm": 1.2345707416534424, + "learning_rate": 1.4369518229925732e-06, + "loss": 0.3357, + "step": 37960 + }, + { + "epoch": 0.7599229287090559, + "grad_norm": 1.0156112909317017, + "learning_rate": 1.4367243964055767e-06, + "loss": 0.2903, + "step": 37961 + }, + { + "epoch": 0.7599429472261843, + "grad_norm": 1.2844635248184204, + "learning_rate": 1.436496984797826e-06, + "loss": 0.2901, + "step": 37962 + }, + { + "epoch": 0.7599629657433126, + "grad_norm": 1.160178303718567, + "learning_rate": 1.4362695881702732e-06, + "loss": 0.2974, + "step": 37963 + }, + { + "epoch": 0.7599829842604409, + "grad_norm": 1.2031676769256592, + "learning_rate": 1.4360422065238777e-06, + "loss": 0.3059, + "step": 37964 + }, + { + "epoch": 0.7600030027775693, + "grad_norm": 1.1605497598648071, + "learning_rate": 1.4358148398595945e-06, + "loss": 0.2939, + "step": 37965 + }, + { + "epoch": 0.7600230212946976, + "grad_norm": 1.1313904523849487, + "learning_rate": 1.4355874881783767e-06, + "loss": 0.2458, + "step": 37966 + }, + { + "epoch": 0.760043039811826, + "grad_norm": 1.1358424425125122, + "learning_rate": 1.4353601514811831e-06, + "loss": 0.2992, + "step": 37967 + }, + { + "epoch": 0.7600630583289543, + "grad_norm": 1.1285845041275024, + "learning_rate": 1.4351328297689687e-06, + "loss": 0.3294, + "step": 37968 + }, + { + "epoch": 0.7600830768460827, + "grad_norm": 1.116302251815796, + "learning_rate": 1.4349055230426884e-06, + "loss": 0.2796, + "step": 37969 + }, + { + "epoch": 0.760103095363211, + "grad_norm": 1.0365396738052368, + "learning_rate": 1.4346782313032987e-06, + "loss": 0.2908, + "step": 37970 + }, + { + "epoch": 0.7601231138803393, + "grad_norm": 2.057927131652832, + "learning_rate": 1.4344509545517526e-06, + "loss": 0.7255, + "step": 37971 + }, + { + "epoch": 0.7601431323974677, + "grad_norm": 1.0588836669921875, + "learning_rate": 1.4342236927890086e-06, + "loss": 0.314, + "step": 37972 + }, + { + "epoch": 0.760163150914596, + "grad_norm": 1.1865077018737793, + "learning_rate": 1.433996446016021e-06, + "loss": 0.3379, + "step": 37973 + }, + { + "epoch": 0.7601831694317244, + "grad_norm": 1.3041123151779175, + "learning_rate": 1.4337692142337439e-06, + "loss": 0.2578, + "step": 37974 + }, + { + "epoch": 0.7602031879488527, + "grad_norm": 1.0185362100601196, + "learning_rate": 1.4335419974431347e-06, + "loss": 0.3043, + "step": 37975 + }, + { + "epoch": 0.7602232064659811, + "grad_norm": 1.2818135023117065, + "learning_rate": 1.4333147956451455e-06, + "loss": 0.3035, + "step": 37976 + }, + { + "epoch": 0.7602432249831094, + "grad_norm": 1.090425968170166, + "learning_rate": 1.4330876088407353e-06, + "loss": 0.2833, + "step": 37977 + }, + { + "epoch": 0.7602632435002378, + "grad_norm": 1.1151437759399414, + "learning_rate": 1.4328604370308574e-06, + "loss": 0.2809, + "step": 37978 + }, + { + "epoch": 0.7602832620173661, + "grad_norm": 1.0640320777893066, + "learning_rate": 1.4326332802164644e-06, + "loss": 0.2812, + "step": 37979 + }, + { + "epoch": 0.7603032805344944, + "grad_norm": 1.1177661418914795, + "learning_rate": 1.4324061383985155e-06, + "loss": 0.332, + "step": 37980 + }, + { + "epoch": 0.7603232990516228, + "grad_norm": 1.1009360551834106, + "learning_rate": 1.4321790115779632e-06, + "loss": 0.3268, + "step": 37981 + }, + { + "epoch": 0.7603433175687511, + "grad_norm": 1.1861295700073242, + "learning_rate": 1.4319518997557625e-06, + "loss": 0.2662, + "step": 37982 + }, + { + "epoch": 0.7603633360858795, + "grad_norm": 1.1768896579742432, + "learning_rate": 1.4317248029328685e-06, + "loss": 0.326, + "step": 37983 + }, + { + "epoch": 0.7603833546030078, + "grad_norm": 1.1137406826019287, + "learning_rate": 1.4314977211102338e-06, + "loss": 0.303, + "step": 37984 + }, + { + "epoch": 0.7604033731201362, + "grad_norm": 1.8479281663894653, + "learning_rate": 1.431270654288816e-06, + "loss": 0.7454, + "step": 37985 + }, + { + "epoch": 0.7604233916372645, + "grad_norm": 1.0862290859222412, + "learning_rate": 1.4310436024695685e-06, + "loss": 0.3001, + "step": 37986 + }, + { + "epoch": 0.7604434101543928, + "grad_norm": 2.0321719646453857, + "learning_rate": 1.4308165656534446e-06, + "loss": 0.7479, + "step": 37987 + }, + { + "epoch": 0.7604634286715212, + "grad_norm": 1.1428555250167847, + "learning_rate": 1.4305895438414008e-06, + "loss": 0.259, + "step": 37988 + }, + { + "epoch": 0.7604834471886495, + "grad_norm": 1.11195707321167, + "learning_rate": 1.4303625370343887e-06, + "loss": 0.2904, + "step": 37989 + }, + { + "epoch": 0.7605034657057779, + "grad_norm": 1.1540940999984741, + "learning_rate": 1.430135545233366e-06, + "loss": 0.2636, + "step": 37990 + }, + { + "epoch": 0.7605234842229062, + "grad_norm": 1.102253794670105, + "learning_rate": 1.4299085684392855e-06, + "loss": 0.3091, + "step": 37991 + }, + { + "epoch": 0.7605435027400346, + "grad_norm": 1.2822606563568115, + "learning_rate": 1.4296816066531005e-06, + "loss": 0.2846, + "step": 37992 + }, + { + "epoch": 0.7605635212571629, + "grad_norm": 1.154658555984497, + "learning_rate": 1.4294546598757647e-06, + "loss": 0.2428, + "step": 37993 + }, + { + "epoch": 0.7605835397742913, + "grad_norm": 1.0467476844787598, + "learning_rate": 1.4292277281082345e-06, + "loss": 0.2861, + "step": 37994 + }, + { + "epoch": 0.7606035582914196, + "grad_norm": 1.1268017292022705, + "learning_rate": 1.4290008113514624e-06, + "loss": 0.3114, + "step": 37995 + }, + { + "epoch": 0.7606235768085479, + "grad_norm": 1.1853976249694824, + "learning_rate": 1.4287739096064028e-06, + "loss": 0.3085, + "step": 37996 + }, + { + "epoch": 0.7606435953256763, + "grad_norm": 1.2712349891662598, + "learning_rate": 1.4285470228740072e-06, + "loss": 0.259, + "step": 37997 + }, + { + "epoch": 0.7606636138428046, + "grad_norm": 1.7915563583374023, + "learning_rate": 1.428320151155233e-06, + "loss": 0.6426, + "step": 37998 + }, + { + "epoch": 0.760683632359933, + "grad_norm": 1.0612905025482178, + "learning_rate": 1.4280932944510323e-06, + "loss": 0.3134, + "step": 37999 + }, + { + "epoch": 0.7607036508770613, + "grad_norm": 1.0916112661361694, + "learning_rate": 1.427866452762357e-06, + "loss": 0.2969, + "step": 38000 + }, + { + "epoch": 0.7607236693941897, + "grad_norm": 0.9571207761764526, + "learning_rate": 1.4276396260901638e-06, + "loss": 0.23, + "step": 38001 + }, + { + "epoch": 0.760743687911318, + "grad_norm": 1.1410069465637207, + "learning_rate": 1.4274128144354037e-06, + "loss": 0.3508, + "step": 38002 + }, + { + "epoch": 0.7607637064284463, + "grad_norm": 1.127590537071228, + "learning_rate": 1.4271860177990327e-06, + "loss": 0.2883, + "step": 38003 + }, + { + "epoch": 0.7607837249455747, + "grad_norm": 1.1901270151138306, + "learning_rate": 1.4269592361820028e-06, + "loss": 0.3286, + "step": 38004 + }, + { + "epoch": 0.760803743462703, + "grad_norm": 1.097823977470398, + "learning_rate": 1.4267324695852676e-06, + "loss": 0.3013, + "step": 38005 + }, + { + "epoch": 0.7608237619798314, + "grad_norm": 1.1202951669692993, + "learning_rate": 1.426505718009778e-06, + "loss": 0.3019, + "step": 38006 + }, + { + "epoch": 0.7608437804969597, + "grad_norm": 1.2114351987838745, + "learning_rate": 1.426278981456491e-06, + "loss": 0.3172, + "step": 38007 + }, + { + "epoch": 0.7608637990140881, + "grad_norm": 1.8723610639572144, + "learning_rate": 1.4260522599263581e-06, + "loss": 0.7538, + "step": 38008 + }, + { + "epoch": 0.7608838175312164, + "grad_norm": 1.1603533029556274, + "learning_rate": 1.425825553420333e-06, + "loss": 0.284, + "step": 38009 + }, + { + "epoch": 0.7609038360483448, + "grad_norm": 1.0350590944290161, + "learning_rate": 1.4255988619393656e-06, + "loss": 0.284, + "step": 38010 + }, + { + "epoch": 0.7609238545654731, + "grad_norm": 1.181570053100586, + "learning_rate": 1.4253721854844139e-06, + "loss": 0.3639, + "step": 38011 + }, + { + "epoch": 0.7609438730826014, + "grad_norm": 1.0792770385742188, + "learning_rate": 1.4251455240564277e-06, + "loss": 0.2988, + "step": 38012 + }, + { + "epoch": 0.7609638915997298, + "grad_norm": 1.1421781778335571, + "learning_rate": 1.424918877656359e-06, + "loss": 0.2727, + "step": 38013 + }, + { + "epoch": 0.7609839101168581, + "grad_norm": 1.079978346824646, + "learning_rate": 1.424692246285163e-06, + "loss": 0.2738, + "step": 38014 + }, + { + "epoch": 0.7610039286339865, + "grad_norm": 1.0841747522354126, + "learning_rate": 1.4244656299437904e-06, + "loss": 0.3152, + "step": 38015 + }, + { + "epoch": 0.7610239471511148, + "grad_norm": 1.111810326576233, + "learning_rate": 1.4242390286331965e-06, + "loss": 0.2788, + "step": 38016 + }, + { + "epoch": 0.7610439656682432, + "grad_norm": 1.0610216856002808, + "learning_rate": 1.4240124423543317e-06, + "loss": 0.3069, + "step": 38017 + }, + { + "epoch": 0.7610639841853715, + "grad_norm": 1.0897334814071655, + "learning_rate": 1.4237858711081493e-06, + "loss": 0.3292, + "step": 38018 + }, + { + "epoch": 0.7610840027024998, + "grad_norm": 1.8158085346221924, + "learning_rate": 1.4235593148956001e-06, + "loss": 0.7505, + "step": 38019 + }, + { + "epoch": 0.7611040212196282, + "grad_norm": 1.015363097190857, + "learning_rate": 1.423332773717639e-06, + "loss": 0.2788, + "step": 38020 + }, + { + "epoch": 0.7611240397367565, + "grad_norm": 2.0222418308258057, + "learning_rate": 1.4231062475752178e-06, + "loss": 0.6935, + "step": 38021 + }, + { + "epoch": 0.7611440582538849, + "grad_norm": 1.0986155271530151, + "learning_rate": 1.4228797364692875e-06, + "loss": 0.3265, + "step": 38022 + }, + { + "epoch": 0.7611640767710132, + "grad_norm": 1.186815619468689, + "learning_rate": 1.4226532404008015e-06, + "loss": 0.2589, + "step": 38023 + }, + { + "epoch": 0.7611840952881416, + "grad_norm": 1.2031880617141724, + "learning_rate": 1.4224267593707098e-06, + "loss": 0.3019, + "step": 38024 + }, + { + "epoch": 0.7612041138052699, + "grad_norm": 2.1353795528411865, + "learning_rate": 1.4222002933799671e-06, + "loss": 0.6786, + "step": 38025 + }, + { + "epoch": 0.7612241323223983, + "grad_norm": 1.1486648321151733, + "learning_rate": 1.4219738424295237e-06, + "loss": 0.2826, + "step": 38026 + }, + { + "epoch": 0.7612441508395266, + "grad_norm": 1.0635336637496948, + "learning_rate": 1.4217474065203335e-06, + "loss": 0.3232, + "step": 38027 + }, + { + "epoch": 0.7612641693566549, + "grad_norm": 1.0672374963760376, + "learning_rate": 1.4215209856533452e-06, + "loss": 0.3019, + "step": 38028 + }, + { + "epoch": 0.7612841878737833, + "grad_norm": 1.1204274892807007, + "learning_rate": 1.4212945798295146e-06, + "loss": 0.3564, + "step": 38029 + }, + { + "epoch": 0.7613042063909116, + "grad_norm": 1.5658551454544067, + "learning_rate": 1.4210681890497913e-06, + "loss": 0.2729, + "step": 38030 + }, + { + "epoch": 0.76132422490804, + "grad_norm": 1.23867928981781, + "learning_rate": 1.4208418133151276e-06, + "loss": 0.2729, + "step": 38031 + }, + { + "epoch": 0.7613442434251683, + "grad_norm": 1.1832144260406494, + "learning_rate": 1.4206154526264727e-06, + "loss": 0.3026, + "step": 38032 + }, + { + "epoch": 0.7613642619422967, + "grad_norm": 1.0753072500228882, + "learning_rate": 1.4203891069847813e-06, + "loss": 0.2638, + "step": 38033 + }, + { + "epoch": 0.761384280459425, + "grad_norm": 1.120098352432251, + "learning_rate": 1.4201627763910042e-06, + "loss": 0.2845, + "step": 38034 + }, + { + "epoch": 0.7614042989765533, + "grad_norm": 1.9516347646713257, + "learning_rate": 1.4199364608460924e-06, + "loss": 0.7025, + "step": 38035 + }, + { + "epoch": 0.7614243174936817, + "grad_norm": 1.1151552200317383, + "learning_rate": 1.4197101603509973e-06, + "loss": 0.3062, + "step": 38036 + }, + { + "epoch": 0.76144433601081, + "grad_norm": 1.1053709983825684, + "learning_rate": 1.4194838749066686e-06, + "loss": 0.3055, + "step": 38037 + }, + { + "epoch": 0.7614643545279384, + "grad_norm": 1.215909481048584, + "learning_rate": 1.4192576045140605e-06, + "loss": 0.3189, + "step": 38038 + }, + { + "epoch": 0.7614843730450667, + "grad_norm": 1.2412570714950562, + "learning_rate": 1.4190313491741214e-06, + "loss": 0.3282, + "step": 38039 + }, + { + "epoch": 0.7615043915621951, + "grad_norm": 1.9511536359786987, + "learning_rate": 1.4188051088878053e-06, + "loss": 0.7446, + "step": 38040 + }, + { + "epoch": 0.7615244100793234, + "grad_norm": 1.163404941558838, + "learning_rate": 1.4185788836560605e-06, + "loss": 0.2708, + "step": 38041 + }, + { + "epoch": 0.7615444285964518, + "grad_norm": 1.1741477251052856, + "learning_rate": 1.4183526734798408e-06, + "loss": 0.335, + "step": 38042 + }, + { + "epoch": 0.7615644471135801, + "grad_norm": 1.020156741142273, + "learning_rate": 1.4181264783600957e-06, + "loss": 0.2585, + "step": 38043 + }, + { + "epoch": 0.7615844656307084, + "grad_norm": 1.0907422304153442, + "learning_rate": 1.4179002982977764e-06, + "loss": 0.2895, + "step": 38044 + }, + { + "epoch": 0.7616044841478368, + "grad_norm": 1.4874277114868164, + "learning_rate": 1.4176741332938333e-06, + "loss": 0.3187, + "step": 38045 + }, + { + "epoch": 0.7616245026649651, + "grad_norm": 1.219414234161377, + "learning_rate": 1.417447983349215e-06, + "loss": 0.2801, + "step": 38046 + }, + { + "epoch": 0.7616445211820935, + "grad_norm": 1.1480026245117188, + "learning_rate": 1.4172218484648765e-06, + "loss": 0.2798, + "step": 38047 + }, + { + "epoch": 0.7616645396992218, + "grad_norm": 1.0510642528533936, + "learning_rate": 1.4169957286417663e-06, + "loss": 0.3079, + "step": 38048 + }, + { + "epoch": 0.7616845582163502, + "grad_norm": 1.271851658821106, + "learning_rate": 1.4167696238808348e-06, + "loss": 0.3345, + "step": 38049 + }, + { + "epoch": 0.7617045767334785, + "grad_norm": 1.1439415216445923, + "learning_rate": 1.4165435341830314e-06, + "loss": 0.2763, + "step": 38050 + }, + { + "epoch": 0.7617245952506068, + "grad_norm": 1.0397909879684448, + "learning_rate": 1.416317459549309e-06, + "loss": 0.3072, + "step": 38051 + }, + { + "epoch": 0.7617446137677352, + "grad_norm": 1.046755313873291, + "learning_rate": 1.4160913999806152e-06, + "loss": 0.2871, + "step": 38052 + }, + { + "epoch": 0.7617646322848635, + "grad_norm": 1.3038558959960938, + "learning_rate": 1.415865355477904e-06, + "loss": 0.2904, + "step": 38053 + }, + { + "epoch": 0.7617846508019919, + "grad_norm": 1.167127013206482, + "learning_rate": 1.4156393260421231e-06, + "loss": 0.3255, + "step": 38054 + }, + { + "epoch": 0.7618046693191202, + "grad_norm": 1.1181319952011108, + "learning_rate": 1.4154133116742219e-06, + "loss": 0.3078, + "step": 38055 + }, + { + "epoch": 0.7618246878362486, + "grad_norm": 1.1774613857269287, + "learning_rate": 1.415187312375153e-06, + "loss": 0.2757, + "step": 38056 + }, + { + "epoch": 0.7618447063533769, + "grad_norm": 1.0276786088943481, + "learning_rate": 1.4149613281458651e-06, + "loss": 0.3114, + "step": 38057 + }, + { + "epoch": 0.7618647248705053, + "grad_norm": 1.2103759050369263, + "learning_rate": 1.4147353589873087e-06, + "loss": 0.3111, + "step": 38058 + }, + { + "epoch": 0.7618847433876336, + "grad_norm": 1.0994079113006592, + "learning_rate": 1.414509404900431e-06, + "loss": 0.3117, + "step": 38059 + }, + { + "epoch": 0.7619047619047619, + "grad_norm": 1.2179982662200928, + "learning_rate": 1.4142834658861864e-06, + "loss": 0.3211, + "step": 38060 + }, + { + "epoch": 0.7619247804218903, + "grad_norm": 1.0631310939788818, + "learning_rate": 1.414057541945522e-06, + "loss": 0.2603, + "step": 38061 + }, + { + "epoch": 0.7619447989390186, + "grad_norm": 1.3900678157806396, + "learning_rate": 1.4138316330793884e-06, + "loss": 0.3348, + "step": 38062 + }, + { + "epoch": 0.761964817456147, + "grad_norm": 1.2686767578125, + "learning_rate": 1.413605739288733e-06, + "loss": 0.3238, + "step": 38063 + }, + { + "epoch": 0.7619848359732753, + "grad_norm": 1.240296483039856, + "learning_rate": 1.4133798605745087e-06, + "loss": 0.2919, + "step": 38064 + }, + { + "epoch": 0.7620048544904037, + "grad_norm": 1.1766085624694824, + "learning_rate": 1.4131539969376624e-06, + "loss": 0.3071, + "step": 38065 + }, + { + "epoch": 0.762024873007532, + "grad_norm": 1.0676532983779907, + "learning_rate": 1.4129281483791462e-06, + "loss": 0.2547, + "step": 38066 + }, + { + "epoch": 0.7620448915246603, + "grad_norm": 1.1960119009017944, + "learning_rate": 1.412702314899908e-06, + "loss": 0.336, + "step": 38067 + }, + { + "epoch": 0.7620649100417887, + "grad_norm": 1.1251444816589355, + "learning_rate": 1.4124764965008958e-06, + "loss": 0.2535, + "step": 38068 + }, + { + "epoch": 0.762084928558917, + "grad_norm": 0.9938868880271912, + "learning_rate": 1.412250693183062e-06, + "loss": 0.3037, + "step": 38069 + }, + { + "epoch": 0.7621049470760454, + "grad_norm": 1.0914227962493896, + "learning_rate": 1.4120249049473544e-06, + "loss": 0.2908, + "step": 38070 + }, + { + "epoch": 0.7621249655931737, + "grad_norm": 1.1792895793914795, + "learning_rate": 1.4117991317947216e-06, + "loss": 0.2617, + "step": 38071 + }, + { + "epoch": 0.7621449841103021, + "grad_norm": 1.253143548965454, + "learning_rate": 1.4115733737261116e-06, + "loss": 0.3125, + "step": 38072 + }, + { + "epoch": 0.7621650026274304, + "grad_norm": 1.2422839403152466, + "learning_rate": 1.4113476307424768e-06, + "loss": 0.2971, + "step": 38073 + }, + { + "epoch": 0.7621850211445588, + "grad_norm": 1.223512887954712, + "learning_rate": 1.4111219028447637e-06, + "loss": 0.2746, + "step": 38074 + }, + { + "epoch": 0.7622050396616871, + "grad_norm": 1.0766761302947998, + "learning_rate": 1.4108961900339225e-06, + "loss": 0.2753, + "step": 38075 + }, + { + "epoch": 0.7622250581788154, + "grad_norm": 1.1494208574295044, + "learning_rate": 1.4106704923108994e-06, + "loss": 0.343, + "step": 38076 + }, + { + "epoch": 0.7622450766959438, + "grad_norm": 1.1780807971954346, + "learning_rate": 1.4104448096766464e-06, + "loss": 0.2956, + "step": 38077 + }, + { + "epoch": 0.7622650952130721, + "grad_norm": 1.8053585290908813, + "learning_rate": 1.4102191421321092e-06, + "loss": 0.7364, + "step": 38078 + }, + { + "epoch": 0.7622851137302005, + "grad_norm": 1.9555628299713135, + "learning_rate": 1.4099934896782403e-06, + "loss": 0.799, + "step": 38079 + }, + { + "epoch": 0.7623051322473288, + "grad_norm": 1.1157102584838867, + "learning_rate": 1.4097678523159864e-06, + "loss": 0.2732, + "step": 38080 + }, + { + "epoch": 0.7623251507644572, + "grad_norm": 1.1902806758880615, + "learning_rate": 1.4095422300462936e-06, + "loss": 0.2987, + "step": 38081 + }, + { + "epoch": 0.7623451692815855, + "grad_norm": 1.429617166519165, + "learning_rate": 1.4093166228701143e-06, + "loss": 0.305, + "step": 38082 + }, + { + "epoch": 0.7623651877987138, + "grad_norm": 1.2082393169403076, + "learning_rate": 1.4090910307883955e-06, + "loss": 0.2863, + "step": 38083 + }, + { + "epoch": 0.7623852063158422, + "grad_norm": 1.9261680841445923, + "learning_rate": 1.4088654538020853e-06, + "loss": 0.7568, + "step": 38084 + }, + { + "epoch": 0.7624052248329705, + "grad_norm": 1.0888633728027344, + "learning_rate": 1.4086398919121314e-06, + "loss": 0.2823, + "step": 38085 + }, + { + "epoch": 0.7624252433500989, + "grad_norm": 1.0502320528030396, + "learning_rate": 1.4084143451194814e-06, + "loss": 0.2821, + "step": 38086 + }, + { + "epoch": 0.7624452618672272, + "grad_norm": 1.1601991653442383, + "learning_rate": 1.408188813425086e-06, + "loss": 0.3419, + "step": 38087 + }, + { + "epoch": 0.7624652803843556, + "grad_norm": 1.1757638454437256, + "learning_rate": 1.4079632968298917e-06, + "loss": 0.3303, + "step": 38088 + }, + { + "epoch": 0.7624852989014839, + "grad_norm": 1.8708641529083252, + "learning_rate": 1.4077377953348453e-06, + "loss": 0.6954, + "step": 38089 + }, + { + "epoch": 0.7625053174186123, + "grad_norm": 1.9197630882263184, + "learning_rate": 1.4075123089408976e-06, + "loss": 0.703, + "step": 38090 + }, + { + "epoch": 0.7625253359357406, + "grad_norm": 1.2356398105621338, + "learning_rate": 1.407286837648994e-06, + "loss": 0.3461, + "step": 38091 + }, + { + "epoch": 0.7625453544528689, + "grad_norm": 1.2104569673538208, + "learning_rate": 1.4070613814600842e-06, + "loss": 0.358, + "step": 38092 + }, + { + "epoch": 0.7625653729699973, + "grad_norm": 1.1541773080825806, + "learning_rate": 1.4068359403751153e-06, + "loss": 0.3148, + "step": 38093 + }, + { + "epoch": 0.7625853914871256, + "grad_norm": 1.0715880393981934, + "learning_rate": 1.4066105143950338e-06, + "loss": 0.2881, + "step": 38094 + }, + { + "epoch": 0.762605410004254, + "grad_norm": 1.1446479558944702, + "learning_rate": 1.4063851035207894e-06, + "loss": 0.2945, + "step": 38095 + }, + { + "epoch": 0.7626254285213823, + "grad_norm": 1.1591328382492065, + "learning_rate": 1.406159707753329e-06, + "loss": 0.3316, + "step": 38096 + }, + { + "epoch": 0.7626454470385107, + "grad_norm": 2.013002872467041, + "learning_rate": 1.4059343270935993e-06, + "loss": 0.8227, + "step": 38097 + }, + { + "epoch": 0.762665465555639, + "grad_norm": 1.104162573814392, + "learning_rate": 1.405708961542549e-06, + "loss": 0.2856, + "step": 38098 + }, + { + "epoch": 0.7626854840727673, + "grad_norm": 1.1542266607284546, + "learning_rate": 1.4054836111011227e-06, + "loss": 0.3085, + "step": 38099 + }, + { + "epoch": 0.7627055025898957, + "grad_norm": 1.0825475454330444, + "learning_rate": 1.4052582757702709e-06, + "loss": 0.2794, + "step": 38100 + }, + { + "epoch": 0.762725521107024, + "grad_norm": 1.1819298267364502, + "learning_rate": 1.4050329555509406e-06, + "loss": 0.2653, + "step": 38101 + }, + { + "epoch": 0.7627455396241524, + "grad_norm": 1.1410942077636719, + "learning_rate": 1.4048076504440762e-06, + "loss": 0.3163, + "step": 38102 + }, + { + "epoch": 0.7627655581412807, + "grad_norm": 1.207233190536499, + "learning_rate": 1.4045823604506281e-06, + "loss": 0.3002, + "step": 38103 + }, + { + "epoch": 0.7627855766584091, + "grad_norm": 1.0673470497131348, + "learning_rate": 1.4043570855715405e-06, + "loss": 0.3241, + "step": 38104 + }, + { + "epoch": 0.7628055951755374, + "grad_norm": 1.1046662330627441, + "learning_rate": 1.4041318258077636e-06, + "loss": 0.3341, + "step": 38105 + }, + { + "epoch": 0.7628256136926657, + "grad_norm": 1.2099653482437134, + "learning_rate": 1.4039065811602426e-06, + "loss": 0.3112, + "step": 38106 + }, + { + "epoch": 0.7628456322097941, + "grad_norm": 1.2252826690673828, + "learning_rate": 1.4036813516299246e-06, + "loss": 0.294, + "step": 38107 + }, + { + "epoch": 0.7628656507269224, + "grad_norm": 0.9917402267456055, + "learning_rate": 1.4034561372177546e-06, + "loss": 0.2658, + "step": 38108 + }, + { + "epoch": 0.7628856692440508, + "grad_norm": 1.1115977764129639, + "learning_rate": 1.4032309379246822e-06, + "loss": 0.2848, + "step": 38109 + }, + { + "epoch": 0.7629056877611791, + "grad_norm": 2.0329318046569824, + "learning_rate": 1.4030057537516534e-06, + "loss": 0.6897, + "step": 38110 + }, + { + "epoch": 0.7629257062783075, + "grad_norm": 1.1242539882659912, + "learning_rate": 1.402780584699614e-06, + "loss": 0.2248, + "step": 38111 + }, + { + "epoch": 0.7629457247954358, + "grad_norm": 1.1449609994888306, + "learning_rate": 1.4025554307695093e-06, + "loss": 0.2809, + "step": 38112 + }, + { + "epoch": 0.7629657433125642, + "grad_norm": 1.0267874002456665, + "learning_rate": 1.4023302919622893e-06, + "loss": 0.2953, + "step": 38113 + }, + { + "epoch": 0.7629857618296925, + "grad_norm": 1.144118070602417, + "learning_rate": 1.4021051682788983e-06, + "loss": 0.3226, + "step": 38114 + }, + { + "epoch": 0.7630057803468208, + "grad_norm": 1.2814793586730957, + "learning_rate": 1.401880059720282e-06, + "loss": 0.2886, + "step": 38115 + }, + { + "epoch": 0.7630257988639492, + "grad_norm": 1.174154281616211, + "learning_rate": 1.4016549662873885e-06, + "loss": 0.2844, + "step": 38116 + }, + { + "epoch": 0.7630458173810775, + "grad_norm": 1.078803539276123, + "learning_rate": 1.4014298879811616e-06, + "loss": 0.2875, + "step": 38117 + }, + { + "epoch": 0.7630658358982059, + "grad_norm": 1.1978579759597778, + "learning_rate": 1.4012048248025505e-06, + "loss": 0.3021, + "step": 38118 + }, + { + "epoch": 0.7630858544153342, + "grad_norm": 1.1140013933181763, + "learning_rate": 1.4009797767525002e-06, + "loss": 0.2964, + "step": 38119 + }, + { + "epoch": 0.7631058729324626, + "grad_norm": 0.9868497848510742, + "learning_rate": 1.4007547438319558e-06, + "loss": 0.3025, + "step": 38120 + }, + { + "epoch": 0.7631258914495909, + "grad_norm": 1.1027741432189941, + "learning_rate": 1.4005297260418631e-06, + "loss": 0.2797, + "step": 38121 + }, + { + "epoch": 0.7631459099667192, + "grad_norm": 1.182599425315857, + "learning_rate": 1.4003047233831702e-06, + "loss": 0.3167, + "step": 38122 + }, + { + "epoch": 0.7631659284838476, + "grad_norm": 1.1482216119766235, + "learning_rate": 1.4000797358568213e-06, + "loss": 0.2833, + "step": 38123 + }, + { + "epoch": 0.7631859470009759, + "grad_norm": 1.0305763483047485, + "learning_rate": 1.3998547634637622e-06, + "loss": 0.2715, + "step": 38124 + }, + { + "epoch": 0.7632059655181043, + "grad_norm": 1.112497329711914, + "learning_rate": 1.399629806204938e-06, + "loss": 0.3198, + "step": 38125 + }, + { + "epoch": 0.7632259840352326, + "grad_norm": 1.1457422971725464, + "learning_rate": 1.3994048640812963e-06, + "loss": 0.3079, + "step": 38126 + }, + { + "epoch": 0.763246002552361, + "grad_norm": 1.168988585472107, + "learning_rate": 1.399179937093782e-06, + "loss": 0.3391, + "step": 38127 + }, + { + "epoch": 0.7632660210694893, + "grad_norm": 1.1415910720825195, + "learning_rate": 1.3989550252433387e-06, + "loss": 0.2864, + "step": 38128 + }, + { + "epoch": 0.7632860395866177, + "grad_norm": 1.1724095344543457, + "learning_rate": 1.3987301285309152e-06, + "loss": 0.3031, + "step": 38129 + }, + { + "epoch": 0.763306058103746, + "grad_norm": 1.2357044219970703, + "learning_rate": 1.3985052469574539e-06, + "loss": 0.309, + "step": 38130 + }, + { + "epoch": 0.7633260766208743, + "grad_norm": 1.134259819984436, + "learning_rate": 1.3982803805239026e-06, + "loss": 0.326, + "step": 38131 + }, + { + "epoch": 0.7633460951380027, + "grad_norm": 1.1158703565597534, + "learning_rate": 1.3980555292312058e-06, + "loss": 0.2982, + "step": 38132 + }, + { + "epoch": 0.763366113655131, + "grad_norm": 1.0008995532989502, + "learning_rate": 1.397830693080308e-06, + "loss": 0.2688, + "step": 38133 + }, + { + "epoch": 0.7633861321722594, + "grad_norm": 1.0546200275421143, + "learning_rate": 1.3976058720721535e-06, + "loss": 0.3207, + "step": 38134 + }, + { + "epoch": 0.7634061506893877, + "grad_norm": 1.2024766206741333, + "learning_rate": 1.3973810662076902e-06, + "loss": 0.3722, + "step": 38135 + }, + { + "epoch": 0.7634261692065161, + "grad_norm": 1.0909273624420166, + "learning_rate": 1.3971562754878616e-06, + "loss": 0.2835, + "step": 38136 + }, + { + "epoch": 0.7634461877236444, + "grad_norm": 1.054760217666626, + "learning_rate": 1.3969314999136124e-06, + "loss": 0.2624, + "step": 38137 + }, + { + "epoch": 0.7634662062407727, + "grad_norm": 2.2022311687469482, + "learning_rate": 1.3967067394858885e-06, + "loss": 0.788, + "step": 38138 + }, + { + "epoch": 0.7634862247579011, + "grad_norm": 1.2240031957626343, + "learning_rate": 1.3964819942056319e-06, + "loss": 0.3268, + "step": 38139 + }, + { + "epoch": 0.7635062432750294, + "grad_norm": 1.0004335641860962, + "learning_rate": 1.3962572640737908e-06, + "loss": 0.266, + "step": 38140 + }, + { + "epoch": 0.7635262617921578, + "grad_norm": 1.9804085493087769, + "learning_rate": 1.396032549091308e-06, + "loss": 0.7211, + "step": 38141 + }, + { + "epoch": 0.7635462803092861, + "grad_norm": 1.1915053129196167, + "learning_rate": 1.3958078492591299e-06, + "loss": 0.2617, + "step": 38142 + }, + { + "epoch": 0.7635662988264145, + "grad_norm": 1.0704752206802368, + "learning_rate": 1.3955831645781982e-06, + "loss": 0.309, + "step": 38143 + }, + { + "epoch": 0.7635863173435428, + "grad_norm": 1.0737557411193848, + "learning_rate": 1.395358495049461e-06, + "loss": 0.2934, + "step": 38144 + }, + { + "epoch": 0.7636063358606712, + "grad_norm": 1.194455862045288, + "learning_rate": 1.3951338406738608e-06, + "loss": 0.2599, + "step": 38145 + }, + { + "epoch": 0.7636263543777995, + "grad_norm": 1.799460530281067, + "learning_rate": 1.3949092014523419e-06, + "loss": 0.7641, + "step": 38146 + }, + { + "epoch": 0.7636463728949278, + "grad_norm": 1.1345041990280151, + "learning_rate": 1.3946845773858475e-06, + "loss": 0.3131, + "step": 38147 + }, + { + "epoch": 0.7636663914120562, + "grad_norm": 1.0339053869247437, + "learning_rate": 1.3944599684753252e-06, + "loss": 0.2752, + "step": 38148 + }, + { + "epoch": 0.7636864099291845, + "grad_norm": 1.0537078380584717, + "learning_rate": 1.3942353747217169e-06, + "loss": 0.2989, + "step": 38149 + }, + { + "epoch": 0.7637064284463129, + "grad_norm": 1.1528522968292236, + "learning_rate": 1.3940107961259675e-06, + "loss": 0.2848, + "step": 38150 + }, + { + "epoch": 0.7637264469634412, + "grad_norm": 1.0594714879989624, + "learning_rate": 1.39378623268902e-06, + "loss": 0.2729, + "step": 38151 + }, + { + "epoch": 0.7637464654805696, + "grad_norm": 1.0912305116653442, + "learning_rate": 1.3935616844118183e-06, + "loss": 0.2071, + "step": 38152 + }, + { + "epoch": 0.7637664839976979, + "grad_norm": 1.1426100730895996, + "learning_rate": 1.3933371512953081e-06, + "loss": 0.3112, + "step": 38153 + }, + { + "epoch": 0.7637865025148262, + "grad_norm": 1.9031105041503906, + "learning_rate": 1.3931126333404315e-06, + "loss": 0.7715, + "step": 38154 + }, + { + "epoch": 0.7638065210319546, + "grad_norm": 1.0419838428497314, + "learning_rate": 1.392888130548134e-06, + "loss": 0.2885, + "step": 38155 + }, + { + "epoch": 0.7638265395490829, + "grad_norm": 1.1611554622650146, + "learning_rate": 1.3926636429193573e-06, + "loss": 0.3441, + "step": 38156 + }, + { + "epoch": 0.7638465580662113, + "grad_norm": 1.0557359457015991, + "learning_rate": 1.3924391704550483e-06, + "loss": 0.2888, + "step": 38157 + }, + { + "epoch": 0.7638665765833396, + "grad_norm": 1.3419277667999268, + "learning_rate": 1.392214713156148e-06, + "loss": 0.2743, + "step": 38158 + }, + { + "epoch": 0.763886595100468, + "grad_norm": 1.03727388381958, + "learning_rate": 1.391990271023601e-06, + "loss": 0.274, + "step": 38159 + }, + { + "epoch": 0.7639066136175963, + "grad_norm": 1.2915658950805664, + "learning_rate": 1.3917658440583503e-06, + "loss": 0.2952, + "step": 38160 + }, + { + "epoch": 0.7639266321347247, + "grad_norm": 2.0244321823120117, + "learning_rate": 1.391541432261338e-06, + "loss": 0.7319, + "step": 38161 + }, + { + "epoch": 0.763946650651853, + "grad_norm": 1.1651875972747803, + "learning_rate": 1.391317035633511e-06, + "loss": 0.2579, + "step": 38162 + }, + { + "epoch": 0.7639666691689813, + "grad_norm": 1.1654655933380127, + "learning_rate": 1.3910926541758097e-06, + "loss": 0.3177, + "step": 38163 + }, + { + "epoch": 0.7639866876861097, + "grad_norm": 1.1407030820846558, + "learning_rate": 1.390868287889179e-06, + "loss": 0.3448, + "step": 38164 + }, + { + "epoch": 0.764006706203238, + "grad_norm": 1.0530765056610107, + "learning_rate": 1.3906439367745595e-06, + "loss": 0.2794, + "step": 38165 + }, + { + "epoch": 0.7640267247203664, + "grad_norm": 1.1879327297210693, + "learning_rate": 1.3904196008328975e-06, + "loss": 0.3038, + "step": 38166 + }, + { + "epoch": 0.7640467432374947, + "grad_norm": 1.1566176414489746, + "learning_rate": 1.390195280065133e-06, + "loss": 0.2737, + "step": 38167 + }, + { + "epoch": 0.7640667617546231, + "grad_norm": 1.2533884048461914, + "learning_rate": 1.3899709744722128e-06, + "loss": 0.3099, + "step": 38168 + }, + { + "epoch": 0.7640867802717514, + "grad_norm": 1.1052618026733398, + "learning_rate": 1.3897466840550772e-06, + "loss": 0.2839, + "step": 38169 + }, + { + "epoch": 0.7641067987888797, + "grad_norm": 1.1926677227020264, + "learning_rate": 1.3895224088146686e-06, + "loss": 0.3101, + "step": 38170 + }, + { + "epoch": 0.7641268173060081, + "grad_norm": 1.104267954826355, + "learning_rate": 1.389298148751932e-06, + "loss": 0.2911, + "step": 38171 + }, + { + "epoch": 0.7641468358231364, + "grad_norm": 1.9780274629592896, + "learning_rate": 1.3890739038678092e-06, + "loss": 0.8146, + "step": 38172 + }, + { + "epoch": 0.7641668543402648, + "grad_norm": 1.42998206615448, + "learning_rate": 1.3888496741632424e-06, + "loss": 0.3202, + "step": 38173 + }, + { + "epoch": 0.7641868728573931, + "grad_norm": 1.25499427318573, + "learning_rate": 1.388625459639173e-06, + "loss": 0.2911, + "step": 38174 + }, + { + "epoch": 0.7642068913745215, + "grad_norm": 1.1228257417678833, + "learning_rate": 1.3884012602965463e-06, + "loss": 0.2885, + "step": 38175 + }, + { + "epoch": 0.7642269098916498, + "grad_norm": 1.187088966369629, + "learning_rate": 1.3881770761363039e-06, + "loss": 0.3305, + "step": 38176 + }, + { + "epoch": 0.7642469284087782, + "grad_norm": 1.1986042261123657, + "learning_rate": 1.3879529071593877e-06, + "loss": 0.2594, + "step": 38177 + }, + { + "epoch": 0.7642669469259065, + "grad_norm": 1.0020866394042969, + "learning_rate": 1.3877287533667382e-06, + "loss": 0.2739, + "step": 38178 + }, + { + "epoch": 0.7642869654430348, + "grad_norm": 1.2003217935562134, + "learning_rate": 1.3875046147593018e-06, + "loss": 0.2853, + "step": 38179 + }, + { + "epoch": 0.7643069839601632, + "grad_norm": 1.0643714666366577, + "learning_rate": 1.3872804913380167e-06, + "loss": 0.3038, + "step": 38180 + }, + { + "epoch": 0.7643270024772915, + "grad_norm": 1.1258715391159058, + "learning_rate": 1.3870563831038285e-06, + "loss": 0.2663, + "step": 38181 + }, + { + "epoch": 0.7643470209944199, + "grad_norm": 1.9671733379364014, + "learning_rate": 1.3868322900576775e-06, + "loss": 0.8237, + "step": 38182 + }, + { + "epoch": 0.7643670395115482, + "grad_norm": 1.112084150314331, + "learning_rate": 1.3866082122005047e-06, + "loss": 0.2719, + "step": 38183 + }, + { + "epoch": 0.7643870580286766, + "grad_norm": 1.0522618293762207, + "learning_rate": 1.3863841495332553e-06, + "loss": 0.2763, + "step": 38184 + }, + { + "epoch": 0.7644070765458049, + "grad_norm": 1.3359761238098145, + "learning_rate": 1.3861601020568688e-06, + "loss": 0.3539, + "step": 38185 + }, + { + "epoch": 0.7644270950629332, + "grad_norm": 1.1360293626785278, + "learning_rate": 1.3859360697722874e-06, + "loss": 0.2905, + "step": 38186 + }, + { + "epoch": 0.7644471135800616, + "grad_norm": 1.7858394384384155, + "learning_rate": 1.3857120526804519e-06, + "loss": 0.8205, + "step": 38187 + }, + { + "epoch": 0.7644671320971899, + "grad_norm": 1.1104750633239746, + "learning_rate": 1.3854880507823065e-06, + "loss": 0.3204, + "step": 38188 + }, + { + "epoch": 0.7644871506143183, + "grad_norm": 1.8979909420013428, + "learning_rate": 1.3852640640787912e-06, + "loss": 0.8084, + "step": 38189 + }, + { + "epoch": 0.7645071691314466, + "grad_norm": 1.108435869216919, + "learning_rate": 1.3850400925708486e-06, + "loss": 0.3134, + "step": 38190 + }, + { + "epoch": 0.764527187648575, + "grad_norm": 1.2338119745254517, + "learning_rate": 1.3848161362594186e-06, + "loss": 0.2572, + "step": 38191 + }, + { + "epoch": 0.7645472061657033, + "grad_norm": 1.1260133981704712, + "learning_rate": 1.3845921951454428e-06, + "loss": 0.2816, + "step": 38192 + }, + { + "epoch": 0.7645672246828317, + "grad_norm": 1.0762901306152344, + "learning_rate": 1.3843682692298627e-06, + "loss": 0.2905, + "step": 38193 + }, + { + "epoch": 0.76458724319996, + "grad_norm": 1.1274703741073608, + "learning_rate": 1.3841443585136221e-06, + "loss": 0.3014, + "step": 38194 + }, + { + "epoch": 0.7646072617170883, + "grad_norm": 2.0230934619903564, + "learning_rate": 1.3839204629976605e-06, + "loss": 0.7937, + "step": 38195 + }, + { + "epoch": 0.7646272802342167, + "grad_norm": 1.178542137145996, + "learning_rate": 1.3836965826829179e-06, + "loss": 0.2986, + "step": 38196 + }, + { + "epoch": 0.764647298751345, + "grad_norm": 1.175183653831482, + "learning_rate": 1.3834727175703378e-06, + "loss": 0.3049, + "step": 38197 + }, + { + "epoch": 0.7646673172684734, + "grad_norm": 1.302754282951355, + "learning_rate": 1.3832488676608603e-06, + "loss": 0.3011, + "step": 38198 + }, + { + "epoch": 0.7646873357856017, + "grad_norm": 1.008789300918579, + "learning_rate": 1.3830250329554257e-06, + "loss": 0.3096, + "step": 38199 + }, + { + "epoch": 0.7647073543027301, + "grad_norm": 1.1197729110717773, + "learning_rate": 1.382801213454976e-06, + "loss": 0.2797, + "step": 38200 + }, + { + "epoch": 0.7647273728198584, + "grad_norm": 1.2176786661148071, + "learning_rate": 1.3825774091604499e-06, + "loss": 0.3015, + "step": 38201 + }, + { + "epoch": 0.7647473913369867, + "grad_norm": 1.1029607057571411, + "learning_rate": 1.3823536200727917e-06, + "loss": 0.2777, + "step": 38202 + }, + { + "epoch": 0.7647674098541151, + "grad_norm": 1.0682040452957153, + "learning_rate": 1.38212984619294e-06, + "loss": 0.2931, + "step": 38203 + }, + { + "epoch": 0.7647874283712434, + "grad_norm": 1.1392359733581543, + "learning_rate": 1.381906087521836e-06, + "loss": 0.2842, + "step": 38204 + }, + { + "epoch": 0.7648074468883718, + "grad_norm": 1.1062556505203247, + "learning_rate": 1.3816823440604182e-06, + "loss": 0.2643, + "step": 38205 + }, + { + "epoch": 0.7648274654055001, + "grad_norm": 1.0889520645141602, + "learning_rate": 1.38145861580963e-06, + "loss": 0.2769, + "step": 38206 + }, + { + "epoch": 0.7648474839226285, + "grad_norm": 1.9389499425888062, + "learning_rate": 1.381234902770412e-06, + "loss": 0.7901, + "step": 38207 + }, + { + "epoch": 0.7648675024397568, + "grad_norm": 1.160264015197754, + "learning_rate": 1.3810112049437035e-06, + "loss": 0.3279, + "step": 38208 + }, + { + "epoch": 0.7648875209568852, + "grad_norm": 1.072251558303833, + "learning_rate": 1.380787522330444e-06, + "loss": 0.2312, + "step": 38209 + }, + { + "epoch": 0.7649075394740135, + "grad_norm": 1.0990129709243774, + "learning_rate": 1.3805638549315765e-06, + "loss": 0.2437, + "step": 38210 + }, + { + "epoch": 0.7649275579911418, + "grad_norm": 1.1621531248092651, + "learning_rate": 1.3803402027480394e-06, + "loss": 0.2995, + "step": 38211 + }, + { + "epoch": 0.7649475765082702, + "grad_norm": 1.8583132028579712, + "learning_rate": 1.380116565780773e-06, + "loss": 0.7569, + "step": 38212 + }, + { + "epoch": 0.7649675950253985, + "grad_norm": 1.1325398683547974, + "learning_rate": 1.379892944030718e-06, + "loss": 0.3014, + "step": 38213 + }, + { + "epoch": 0.7649876135425269, + "grad_norm": 1.16497004032135, + "learning_rate": 1.379669337498812e-06, + "loss": 0.2954, + "step": 38214 + }, + { + "epoch": 0.7650076320596552, + "grad_norm": 1.0921534299850464, + "learning_rate": 1.379445746185999e-06, + "loss": 0.2965, + "step": 38215 + }, + { + "epoch": 0.7650276505767836, + "grad_norm": 1.085274577140808, + "learning_rate": 1.3792221700932162e-06, + "loss": 0.2642, + "step": 38216 + }, + { + "epoch": 0.7650476690939119, + "grad_norm": 1.233063817024231, + "learning_rate": 1.3789986092214047e-06, + "loss": 0.3085, + "step": 38217 + }, + { + "epoch": 0.7650676876110402, + "grad_norm": 1.1085968017578125, + "learning_rate": 1.3787750635715026e-06, + "loss": 0.2813, + "step": 38218 + }, + { + "epoch": 0.7650877061281686, + "grad_norm": 1.0428316593170166, + "learning_rate": 1.3785515331444505e-06, + "loss": 0.2853, + "step": 38219 + }, + { + "epoch": 0.7651077246452969, + "grad_norm": 1.1469975709915161, + "learning_rate": 1.3783280179411894e-06, + "loss": 0.3168, + "step": 38220 + }, + { + "epoch": 0.7651277431624253, + "grad_norm": 1.2258154153823853, + "learning_rate": 1.378104517962659e-06, + "loss": 0.3173, + "step": 38221 + }, + { + "epoch": 0.7651477616795536, + "grad_norm": 1.0677752494812012, + "learning_rate": 1.377881033209797e-06, + "loss": 0.2814, + "step": 38222 + }, + { + "epoch": 0.765167780196682, + "grad_norm": 1.0869606733322144, + "learning_rate": 1.377657563683542e-06, + "loss": 0.2397, + "step": 38223 + }, + { + "epoch": 0.7651877987138103, + "grad_norm": 1.1523830890655518, + "learning_rate": 1.377434109384837e-06, + "loss": 0.2712, + "step": 38224 + }, + { + "epoch": 0.7652078172309387, + "grad_norm": 1.195639729499817, + "learning_rate": 1.3772106703146192e-06, + "loss": 0.3255, + "step": 38225 + }, + { + "epoch": 0.765227835748067, + "grad_norm": 1.0899224281311035, + "learning_rate": 1.3769872464738288e-06, + "loss": 0.3299, + "step": 38226 + }, + { + "epoch": 0.7652478542651953, + "grad_norm": 1.0406886339187622, + "learning_rate": 1.3767638378634017e-06, + "loss": 0.2939, + "step": 38227 + }, + { + "epoch": 0.7652678727823237, + "grad_norm": 1.1064397096633911, + "learning_rate": 1.3765404444842822e-06, + "loss": 0.3192, + "step": 38228 + }, + { + "epoch": 0.765287891299452, + "grad_norm": 2.09342622756958, + "learning_rate": 1.3763170663374064e-06, + "loss": 0.8122, + "step": 38229 + }, + { + "epoch": 0.7653079098165804, + "grad_norm": 1.1524471044540405, + "learning_rate": 1.3760937034237138e-06, + "loss": 0.2937, + "step": 38230 + }, + { + "epoch": 0.7653279283337087, + "grad_norm": 1.112969160079956, + "learning_rate": 1.3758703557441421e-06, + "loss": 0.2903, + "step": 38231 + }, + { + "epoch": 0.7653479468508371, + "grad_norm": 1.0748525857925415, + "learning_rate": 1.3756470232996311e-06, + "loss": 0.2734, + "step": 38232 + }, + { + "epoch": 0.7653679653679654, + "grad_norm": 1.0914067029953003, + "learning_rate": 1.375423706091122e-06, + "loss": 0.2858, + "step": 38233 + }, + { + "epoch": 0.7653879838850937, + "grad_norm": 1.0590262413024902, + "learning_rate": 1.3752004041195517e-06, + "loss": 0.281, + "step": 38234 + }, + { + "epoch": 0.7654080024022221, + "grad_norm": 2.031280040740967, + "learning_rate": 1.3749771173858583e-06, + "loss": 0.7127, + "step": 38235 + }, + { + "epoch": 0.7654280209193504, + "grad_norm": 1.148840069770813, + "learning_rate": 1.37475384589098e-06, + "loss": 0.3109, + "step": 38236 + }, + { + "epoch": 0.7654480394364788, + "grad_norm": 1.1390266418457031, + "learning_rate": 1.3745305896358573e-06, + "loss": 0.2947, + "step": 38237 + }, + { + "epoch": 0.7654680579536071, + "grad_norm": 1.9706242084503174, + "learning_rate": 1.3743073486214281e-06, + "loss": 0.7771, + "step": 38238 + }, + { + "epoch": 0.7654880764707355, + "grad_norm": 1.2617300748825073, + "learning_rate": 1.3740841228486308e-06, + "loss": 0.3478, + "step": 38239 + }, + { + "epoch": 0.7655080949878638, + "grad_norm": 1.479264497756958, + "learning_rate": 1.3738609123184022e-06, + "loss": 0.2868, + "step": 38240 + }, + { + "epoch": 0.7655281135049922, + "grad_norm": 1.1147528886795044, + "learning_rate": 1.3736377170316828e-06, + "loss": 0.2763, + "step": 38241 + }, + { + "epoch": 0.7655481320221205, + "grad_norm": 0.9873397350311279, + "learning_rate": 1.3734145369894103e-06, + "loss": 0.2817, + "step": 38242 + }, + { + "epoch": 0.7655681505392488, + "grad_norm": 1.0942140817642212, + "learning_rate": 1.3731913721925228e-06, + "loss": 0.2992, + "step": 38243 + }, + { + "epoch": 0.7655881690563772, + "grad_norm": 1.2212557792663574, + "learning_rate": 1.3729682226419567e-06, + "loss": 0.2922, + "step": 38244 + }, + { + "epoch": 0.7656081875735055, + "grad_norm": 2.13350248336792, + "learning_rate": 1.3727450883386513e-06, + "loss": 0.7943, + "step": 38245 + }, + { + "epoch": 0.7656282060906339, + "grad_norm": 1.0983861684799194, + "learning_rate": 1.3725219692835468e-06, + "loss": 0.2638, + "step": 38246 + }, + { + "epoch": 0.7656482246077622, + "grad_norm": 1.1585264205932617, + "learning_rate": 1.3722988654775792e-06, + "loss": 0.2673, + "step": 38247 + }, + { + "epoch": 0.7656682431248906, + "grad_norm": 1.284475326538086, + "learning_rate": 1.372075776921687e-06, + "loss": 0.3415, + "step": 38248 + }, + { + "epoch": 0.7656882616420189, + "grad_norm": 1.1272333860397339, + "learning_rate": 1.371852703616805e-06, + "loss": 0.3215, + "step": 38249 + }, + { + "epoch": 0.7657082801591472, + "grad_norm": 1.0470186471939087, + "learning_rate": 1.3716296455638755e-06, + "loss": 0.2819, + "step": 38250 + }, + { + "epoch": 0.7657282986762756, + "grad_norm": 1.0438439846038818, + "learning_rate": 1.371406602763834e-06, + "loss": 0.3015, + "step": 38251 + }, + { + "epoch": 0.7657483171934039, + "grad_norm": 1.144975185394287, + "learning_rate": 1.3711835752176184e-06, + "loss": 0.2602, + "step": 38252 + }, + { + "epoch": 0.7657683357105323, + "grad_norm": 1.0790684223175049, + "learning_rate": 1.3709605629261663e-06, + "loss": 0.3126, + "step": 38253 + }, + { + "epoch": 0.7657883542276606, + "grad_norm": 1.7351561784744263, + "learning_rate": 1.3707375658904132e-06, + "loss": 0.7107, + "step": 38254 + }, + { + "epoch": 0.765808372744789, + "grad_norm": 1.691997766494751, + "learning_rate": 1.3705145841113e-06, + "loss": 0.7381, + "step": 38255 + }, + { + "epoch": 0.7658283912619173, + "grad_norm": 1.191676378250122, + "learning_rate": 1.3702916175897624e-06, + "loss": 0.3424, + "step": 38256 + }, + { + "epoch": 0.7658484097790457, + "grad_norm": 1.141890048980713, + "learning_rate": 1.370068666326736e-06, + "loss": 0.2851, + "step": 38257 + }, + { + "epoch": 0.765868428296174, + "grad_norm": 1.2042654752731323, + "learning_rate": 1.3698457303231599e-06, + "loss": 0.2693, + "step": 38258 + }, + { + "epoch": 0.7658884468133023, + "grad_norm": 1.1303622722625732, + "learning_rate": 1.369622809579973e-06, + "loss": 0.3107, + "step": 38259 + }, + { + "epoch": 0.7659084653304307, + "grad_norm": 1.0896806716918945, + "learning_rate": 1.3693999040981098e-06, + "loss": 0.3162, + "step": 38260 + }, + { + "epoch": 0.765928483847559, + "grad_norm": 1.998345971107483, + "learning_rate": 1.3691770138785088e-06, + "loss": 0.742, + "step": 38261 + }, + { + "epoch": 0.7659485023646874, + "grad_norm": 1.1463583707809448, + "learning_rate": 1.3689541389221061e-06, + "loss": 0.2644, + "step": 38262 + }, + { + "epoch": 0.7659685208818157, + "grad_norm": 1.056166172027588, + "learning_rate": 1.3687312792298369e-06, + "loss": 0.3192, + "step": 38263 + }, + { + "epoch": 0.7659885393989441, + "grad_norm": 1.1744157075881958, + "learning_rate": 1.3685084348026416e-06, + "loss": 0.3542, + "step": 38264 + }, + { + "epoch": 0.7660085579160724, + "grad_norm": 1.141417384147644, + "learning_rate": 1.3682856056414556e-06, + "loss": 0.322, + "step": 38265 + }, + { + "epoch": 0.7660285764332007, + "grad_norm": 0.9749436378479004, + "learning_rate": 1.368062791747215e-06, + "loss": 0.2868, + "step": 38266 + }, + { + "epoch": 0.7660485949503291, + "grad_norm": 1.114203929901123, + "learning_rate": 1.367839993120856e-06, + "loss": 0.2986, + "step": 38267 + }, + { + "epoch": 0.7660686134674574, + "grad_norm": 1.1000382900238037, + "learning_rate": 1.3676172097633168e-06, + "loss": 0.3067, + "step": 38268 + }, + { + "epoch": 0.7660886319845858, + "grad_norm": 1.1806050539016724, + "learning_rate": 1.3673944416755336e-06, + "loss": 0.2848, + "step": 38269 + }, + { + "epoch": 0.7661086505017141, + "grad_norm": 1.112520456314087, + "learning_rate": 1.3671716888584408e-06, + "loss": 0.2708, + "step": 38270 + }, + { + "epoch": 0.7661286690188425, + "grad_norm": 1.9353963136672974, + "learning_rate": 1.3669489513129757e-06, + "loss": 0.7599, + "step": 38271 + }, + { + "epoch": 0.7661486875359708, + "grad_norm": 1.338166356086731, + "learning_rate": 1.366726229040078e-06, + "loss": 0.282, + "step": 38272 + }, + { + "epoch": 0.7661687060530992, + "grad_norm": 1.1888630390167236, + "learning_rate": 1.366503522040681e-06, + "loss": 0.3035, + "step": 38273 + }, + { + "epoch": 0.7661887245702275, + "grad_norm": 1.016439437866211, + "learning_rate": 1.366280830315721e-06, + "loss": 0.2837, + "step": 38274 + }, + { + "epoch": 0.7662087430873558, + "grad_norm": 1.0718127489089966, + "learning_rate": 1.3660581538661345e-06, + "loss": 0.3046, + "step": 38275 + }, + { + "epoch": 0.7662287616044842, + "grad_norm": 1.0177524089813232, + "learning_rate": 1.3658354926928557e-06, + "loss": 0.3296, + "step": 38276 + }, + { + "epoch": 0.7662487801216125, + "grad_norm": 1.7360128164291382, + "learning_rate": 1.3656128467968243e-06, + "loss": 0.7527, + "step": 38277 + }, + { + "epoch": 0.7662687986387409, + "grad_norm": 1.0583888292312622, + "learning_rate": 1.3653902161789744e-06, + "loss": 0.3257, + "step": 38278 + }, + { + "epoch": 0.7662888171558692, + "grad_norm": 1.2180724143981934, + "learning_rate": 1.3651676008402415e-06, + "loss": 0.2739, + "step": 38279 + }, + { + "epoch": 0.7663088356729976, + "grad_norm": 1.1299930810928345, + "learning_rate": 1.3649450007815607e-06, + "loss": 0.2885, + "step": 38280 + }, + { + "epoch": 0.7663288541901259, + "grad_norm": 1.0332130193710327, + "learning_rate": 1.3647224160038701e-06, + "loss": 0.2942, + "step": 38281 + }, + { + "epoch": 0.7663488727072542, + "grad_norm": 1.1678718328475952, + "learning_rate": 1.3644998465081045e-06, + "loss": 0.2861, + "step": 38282 + }, + { + "epoch": 0.7663688912243826, + "grad_norm": 1.0991499423980713, + "learning_rate": 1.364277292295197e-06, + "loss": 0.3117, + "step": 38283 + }, + { + "epoch": 0.7663889097415109, + "grad_norm": 1.9239283800125122, + "learning_rate": 1.3640547533660875e-06, + "loss": 0.7604, + "step": 38284 + }, + { + "epoch": 0.7664089282586393, + "grad_norm": 1.0237247943878174, + "learning_rate": 1.3638322297217071e-06, + "loss": 0.3329, + "step": 38285 + }, + { + "epoch": 0.7664289467757676, + "grad_norm": 1.1448136568069458, + "learning_rate": 1.3636097213629957e-06, + "loss": 0.2992, + "step": 38286 + }, + { + "epoch": 0.766448965292896, + "grad_norm": 1.1409504413604736, + "learning_rate": 1.3633872282908861e-06, + "loss": 0.3475, + "step": 38287 + }, + { + "epoch": 0.7664689838100243, + "grad_norm": 1.0852406024932861, + "learning_rate": 1.3631647505063145e-06, + "loss": 0.2967, + "step": 38288 + }, + { + "epoch": 0.7664890023271527, + "grad_norm": 1.0690406560897827, + "learning_rate": 1.3629422880102133e-06, + "loss": 0.2944, + "step": 38289 + }, + { + "epoch": 0.766509020844281, + "grad_norm": 1.0671483278274536, + "learning_rate": 1.3627198408035224e-06, + "loss": 0.3332, + "step": 38290 + }, + { + "epoch": 0.7665290393614093, + "grad_norm": 1.2495298385620117, + "learning_rate": 1.362497408887174e-06, + "loss": 0.3508, + "step": 38291 + }, + { + "epoch": 0.7665490578785377, + "grad_norm": 1.1328030824661255, + "learning_rate": 1.3622749922621037e-06, + "loss": 0.3207, + "step": 38292 + }, + { + "epoch": 0.766569076395666, + "grad_norm": 1.115098237991333, + "learning_rate": 1.3620525909292454e-06, + "loss": 0.3451, + "step": 38293 + }, + { + "epoch": 0.7665890949127944, + "grad_norm": 1.0479949712753296, + "learning_rate": 1.361830204889536e-06, + "loss": 0.2318, + "step": 38294 + }, + { + "epoch": 0.7666091134299227, + "grad_norm": 1.0650653839111328, + "learning_rate": 1.36160783414391e-06, + "loss": 0.2622, + "step": 38295 + }, + { + "epoch": 0.7666291319470511, + "grad_norm": 1.1727694272994995, + "learning_rate": 1.3613854786933001e-06, + "loss": 0.3179, + "step": 38296 + }, + { + "epoch": 0.7666491504641794, + "grad_norm": 1.1274831295013428, + "learning_rate": 1.3611631385386436e-06, + "loss": 0.2614, + "step": 38297 + }, + { + "epoch": 0.7666691689813077, + "grad_norm": 1.0237679481506348, + "learning_rate": 1.3609408136808733e-06, + "loss": 0.2805, + "step": 38298 + }, + { + "epoch": 0.7666891874984361, + "grad_norm": 1.1365342140197754, + "learning_rate": 1.360718504120926e-06, + "loss": 0.2803, + "step": 38299 + }, + { + "epoch": 0.7667092060155644, + "grad_norm": 1.1569695472717285, + "learning_rate": 1.3604962098597347e-06, + "loss": 0.3308, + "step": 38300 + }, + { + "epoch": 0.7667292245326928, + "grad_norm": 1.1276806592941284, + "learning_rate": 1.3602739308982344e-06, + "loss": 0.2767, + "step": 38301 + }, + { + "epoch": 0.7667492430498211, + "grad_norm": 1.1190299987792969, + "learning_rate": 1.3600516672373575e-06, + "loss": 0.3316, + "step": 38302 + }, + { + "epoch": 0.7667692615669495, + "grad_norm": 1.9484763145446777, + "learning_rate": 1.3598294188780414e-06, + "loss": 0.7263, + "step": 38303 + }, + { + "epoch": 0.7667892800840778, + "grad_norm": 1.154526710510254, + "learning_rate": 1.3596071858212196e-06, + "loss": 0.3208, + "step": 38304 + }, + { + "epoch": 0.7668092986012062, + "grad_norm": 1.085012435913086, + "learning_rate": 1.359384968067825e-06, + "loss": 0.2897, + "step": 38305 + }, + { + "epoch": 0.7668293171183345, + "grad_norm": 1.2216081619262695, + "learning_rate": 1.3591627656187933e-06, + "loss": 0.3204, + "step": 38306 + }, + { + "epoch": 0.7668493356354628, + "grad_norm": 1.1034550666809082, + "learning_rate": 1.358940578475056e-06, + "loss": 0.3354, + "step": 38307 + }, + { + "epoch": 0.7668693541525912, + "grad_norm": 1.8336445093154907, + "learning_rate": 1.3587184066375503e-06, + "loss": 0.7246, + "step": 38308 + }, + { + "epoch": 0.7668893726697195, + "grad_norm": 1.2150812149047852, + "learning_rate": 1.3584962501072074e-06, + "loss": 0.266, + "step": 38309 + }, + { + "epoch": 0.7669093911868479, + "grad_norm": 1.2030096054077148, + "learning_rate": 1.3582741088849644e-06, + "loss": 0.294, + "step": 38310 + }, + { + "epoch": 0.7669294097039762, + "grad_norm": 1.1578338146209717, + "learning_rate": 1.3580519829717514e-06, + "loss": 0.2988, + "step": 38311 + }, + { + "epoch": 0.7669494282211046, + "grad_norm": 1.212371587753296, + "learning_rate": 1.3578298723685058e-06, + "loss": 0.3162, + "step": 38312 + }, + { + "epoch": 0.7669694467382329, + "grad_norm": 1.1621968746185303, + "learning_rate": 1.3576077770761598e-06, + "loss": 0.2939, + "step": 38313 + }, + { + "epoch": 0.7669894652553612, + "grad_norm": 1.1237502098083496, + "learning_rate": 1.3573856970956468e-06, + "loss": 0.2816, + "step": 38314 + }, + { + "epoch": 0.7670094837724896, + "grad_norm": 1.9348567724227905, + "learning_rate": 1.3571636324279002e-06, + "loss": 0.7098, + "step": 38315 + }, + { + "epoch": 0.7670295022896179, + "grad_norm": 1.0878716707229614, + "learning_rate": 1.356941583073852e-06, + "loss": 0.2918, + "step": 38316 + }, + { + "epoch": 0.7670495208067463, + "grad_norm": 1.1160290241241455, + "learning_rate": 1.3567195490344392e-06, + "loss": 0.3135, + "step": 38317 + }, + { + "epoch": 0.7670695393238746, + "grad_norm": 1.152457594871521, + "learning_rate": 1.3564975303105936e-06, + "loss": 0.2636, + "step": 38318 + }, + { + "epoch": 0.767089557841003, + "grad_norm": 1.138185977935791, + "learning_rate": 1.356275526903248e-06, + "loss": 0.2654, + "step": 38319 + }, + { + "epoch": 0.7671095763581313, + "grad_norm": 1.0861694812774658, + "learning_rate": 1.3560535388133344e-06, + "loss": 0.2575, + "step": 38320 + }, + { + "epoch": 0.7671295948752597, + "grad_norm": 1.8933988809585571, + "learning_rate": 1.3558315660417892e-06, + "loss": 0.7367, + "step": 38321 + }, + { + "epoch": 0.767149613392388, + "grad_norm": 1.1739120483398438, + "learning_rate": 1.3556096085895427e-06, + "loss": 0.3191, + "step": 38322 + }, + { + "epoch": 0.7671696319095163, + "grad_norm": 1.0817136764526367, + "learning_rate": 1.35538766645753e-06, + "loss": 0.292, + "step": 38323 + }, + { + "epoch": 0.7671896504266447, + "grad_norm": 1.9920014142990112, + "learning_rate": 1.3551657396466822e-06, + "loss": 0.7346, + "step": 38324 + }, + { + "epoch": 0.767209668943773, + "grad_norm": 1.0816929340362549, + "learning_rate": 1.3549438281579347e-06, + "loss": 0.3317, + "step": 38325 + }, + { + "epoch": 0.7672296874609014, + "grad_norm": 1.982150673866272, + "learning_rate": 1.3547219319922184e-06, + "loss": 0.7407, + "step": 38326 + }, + { + "epoch": 0.7672497059780297, + "grad_norm": 1.1669484376907349, + "learning_rate": 1.354500051150467e-06, + "loss": 0.2557, + "step": 38327 + }, + { + "epoch": 0.7672697244951581, + "grad_norm": 1.1502933502197266, + "learning_rate": 1.3542781856336129e-06, + "loss": 0.3005, + "step": 38328 + }, + { + "epoch": 0.7672897430122864, + "grad_norm": 1.1994948387145996, + "learning_rate": 1.3540563354425874e-06, + "loss": 0.2814, + "step": 38329 + }, + { + "epoch": 0.7673097615294147, + "grad_norm": 1.1086331605911255, + "learning_rate": 1.3538345005783254e-06, + "loss": 0.3136, + "step": 38330 + }, + { + "epoch": 0.7673297800465431, + "grad_norm": 1.166980266571045, + "learning_rate": 1.3536126810417587e-06, + "loss": 0.2647, + "step": 38331 + }, + { + "epoch": 0.7673497985636714, + "grad_norm": 1.0069127082824707, + "learning_rate": 1.35339087683382e-06, + "loss": 0.3001, + "step": 38332 + }, + { + "epoch": 0.7673698170807998, + "grad_norm": 1.11481511592865, + "learning_rate": 1.3531690879554394e-06, + "loss": 0.2937, + "step": 38333 + }, + { + "epoch": 0.7673898355979281, + "grad_norm": 1.0537933111190796, + "learning_rate": 1.3529473144075528e-06, + "loss": 0.2751, + "step": 38334 + }, + { + "epoch": 0.7674098541150565, + "grad_norm": 1.899705410003662, + "learning_rate": 1.3527255561910895e-06, + "loss": 0.7758, + "step": 38335 + }, + { + "epoch": 0.7674298726321848, + "grad_norm": 1.23712158203125, + "learning_rate": 1.3525038133069846e-06, + "loss": 0.3162, + "step": 38336 + }, + { + "epoch": 0.7674498911493132, + "grad_norm": 1.1914844512939453, + "learning_rate": 1.352282085756168e-06, + "loss": 0.2936, + "step": 38337 + }, + { + "epoch": 0.7674699096664415, + "grad_norm": 1.2249808311462402, + "learning_rate": 1.3520603735395715e-06, + "loss": 0.307, + "step": 38338 + }, + { + "epoch": 0.7674899281835698, + "grad_norm": 1.0837372541427612, + "learning_rate": 1.3518386766581298e-06, + "loss": 0.3286, + "step": 38339 + }, + { + "epoch": 0.7675099467006982, + "grad_norm": 1.1636605262756348, + "learning_rate": 1.3516169951127727e-06, + "loss": 0.3523, + "step": 38340 + }, + { + "epoch": 0.7675299652178265, + "grad_norm": 1.2463856935501099, + "learning_rate": 1.3513953289044335e-06, + "loss": 0.327, + "step": 38341 + }, + { + "epoch": 0.7675499837349549, + "grad_norm": 1.0360302925109863, + "learning_rate": 1.3511736780340407e-06, + "loss": 0.3167, + "step": 38342 + }, + { + "epoch": 0.7675700022520832, + "grad_norm": 0.9838012456893921, + "learning_rate": 1.3509520425025308e-06, + "loss": 0.3068, + "step": 38343 + }, + { + "epoch": 0.7675900207692116, + "grad_norm": 1.1299757957458496, + "learning_rate": 1.3507304223108326e-06, + "loss": 0.3018, + "step": 38344 + }, + { + "epoch": 0.7676100392863399, + "grad_norm": 1.2567600011825562, + "learning_rate": 1.3505088174598785e-06, + "loss": 0.3291, + "step": 38345 + }, + { + "epoch": 0.7676300578034682, + "grad_norm": 1.1470723152160645, + "learning_rate": 1.3502872279505986e-06, + "loss": 0.3312, + "step": 38346 + }, + { + "epoch": 0.7676500763205966, + "grad_norm": 1.8349401950836182, + "learning_rate": 1.3500656537839273e-06, + "loss": 0.7747, + "step": 38347 + }, + { + "epoch": 0.7676700948377249, + "grad_norm": 1.1112338304519653, + "learning_rate": 1.3498440949607933e-06, + "loss": 0.3065, + "step": 38348 + }, + { + "epoch": 0.7676901133548533, + "grad_norm": 1.211826205253601, + "learning_rate": 1.3496225514821304e-06, + "loss": 0.3348, + "step": 38349 + }, + { + "epoch": 0.7677101318719816, + "grad_norm": 1.0290096998214722, + "learning_rate": 1.3494010233488687e-06, + "loss": 0.2551, + "step": 38350 + }, + { + "epoch": 0.76773015038911, + "grad_norm": 1.1308684349060059, + "learning_rate": 1.3491795105619377e-06, + "loss": 0.3174, + "step": 38351 + }, + { + "epoch": 0.7677501689062383, + "grad_norm": 0.9891320466995239, + "learning_rate": 1.3489580131222724e-06, + "loss": 0.2684, + "step": 38352 + }, + { + "epoch": 0.7677701874233667, + "grad_norm": 1.063396692276001, + "learning_rate": 1.3487365310308015e-06, + "loss": 0.3271, + "step": 38353 + }, + { + "epoch": 0.767790205940495, + "grad_norm": 1.0732455253601074, + "learning_rate": 1.3485150642884565e-06, + "loss": 0.2792, + "step": 38354 + }, + { + "epoch": 0.7678102244576233, + "grad_norm": 1.9153547286987305, + "learning_rate": 1.3482936128961672e-06, + "loss": 0.7876, + "step": 38355 + }, + { + "epoch": 0.7678302429747517, + "grad_norm": 1.2045212984085083, + "learning_rate": 1.3480721768548667e-06, + "loss": 0.3036, + "step": 38356 + }, + { + "epoch": 0.76785026149188, + "grad_norm": 1.045299768447876, + "learning_rate": 1.3478507561654852e-06, + "loss": 0.2623, + "step": 38357 + }, + { + "epoch": 0.7678702800090084, + "grad_norm": 1.123616337776184, + "learning_rate": 1.3476293508289528e-06, + "loss": 0.2826, + "step": 38358 + }, + { + "epoch": 0.7678902985261367, + "grad_norm": 1.027697205543518, + "learning_rate": 1.3474079608461998e-06, + "loss": 0.2918, + "step": 38359 + }, + { + "epoch": 0.7679103170432651, + "grad_norm": 1.2416622638702393, + "learning_rate": 1.3471865862181589e-06, + "loss": 0.3229, + "step": 38360 + }, + { + "epoch": 0.7679303355603934, + "grad_norm": 1.0050281286239624, + "learning_rate": 1.3469652269457579e-06, + "loss": 0.2801, + "step": 38361 + }, + { + "epoch": 0.7679503540775217, + "grad_norm": 1.1724731922149658, + "learning_rate": 1.3467438830299301e-06, + "loss": 0.3306, + "step": 38362 + }, + { + "epoch": 0.7679703725946501, + "grad_norm": 1.0905948877334595, + "learning_rate": 1.3465225544716053e-06, + "loss": 0.2743, + "step": 38363 + }, + { + "epoch": 0.7679903911117784, + "grad_norm": 1.1373306512832642, + "learning_rate": 1.346301241271712e-06, + "loss": 0.2918, + "step": 38364 + }, + { + "epoch": 0.7680104096289068, + "grad_norm": 1.9104305505752563, + "learning_rate": 1.3460799434311829e-06, + "loss": 0.7213, + "step": 38365 + }, + { + "epoch": 0.7680304281460351, + "grad_norm": 1.054965615272522, + "learning_rate": 1.3458586609509477e-06, + "loss": 0.2962, + "step": 38366 + }, + { + "epoch": 0.7680504466631635, + "grad_norm": 1.014043927192688, + "learning_rate": 1.3456373938319368e-06, + "loss": 0.2677, + "step": 38367 + }, + { + "epoch": 0.7680704651802918, + "grad_norm": 1.11318039894104, + "learning_rate": 1.3454161420750795e-06, + "loss": 0.302, + "step": 38368 + }, + { + "epoch": 0.7680904836974202, + "grad_norm": 1.919693946838379, + "learning_rate": 1.3451949056813047e-06, + "loss": 0.7653, + "step": 38369 + }, + { + "epoch": 0.7681105022145485, + "grad_norm": 1.3971363306045532, + "learning_rate": 1.3449736846515454e-06, + "loss": 0.3116, + "step": 38370 + }, + { + "epoch": 0.7681305207316768, + "grad_norm": 1.1290521621704102, + "learning_rate": 1.34475247898673e-06, + "loss": 0.2892, + "step": 38371 + }, + { + "epoch": 0.7681505392488052, + "grad_norm": 1.1398932933807373, + "learning_rate": 1.3445312886877876e-06, + "loss": 0.3225, + "step": 38372 + }, + { + "epoch": 0.7681705577659335, + "grad_norm": 1.19730806350708, + "learning_rate": 1.3443101137556503e-06, + "loss": 0.3764, + "step": 38373 + }, + { + "epoch": 0.7681905762830619, + "grad_norm": 1.941678762435913, + "learning_rate": 1.344088954191245e-06, + "loss": 0.7638, + "step": 38374 + }, + { + "epoch": 0.7682105948001902, + "grad_norm": 1.168139100074768, + "learning_rate": 1.3438678099955045e-06, + "loss": 0.2916, + "step": 38375 + }, + { + "epoch": 0.7682306133173186, + "grad_norm": 1.1023399829864502, + "learning_rate": 1.3436466811693566e-06, + "loss": 0.2917, + "step": 38376 + }, + { + "epoch": 0.7682506318344469, + "grad_norm": 1.0647640228271484, + "learning_rate": 1.3434255677137314e-06, + "loss": 0.2786, + "step": 38377 + }, + { + "epoch": 0.7682706503515752, + "grad_norm": 1.1667466163635254, + "learning_rate": 1.3432044696295572e-06, + "loss": 0.3201, + "step": 38378 + }, + { + "epoch": 0.7682906688687036, + "grad_norm": 1.096683144569397, + "learning_rate": 1.3429833869177655e-06, + "loss": 0.278, + "step": 38379 + }, + { + "epoch": 0.7683106873858319, + "grad_norm": 1.177558422088623, + "learning_rate": 1.3427623195792844e-06, + "loss": 0.2762, + "step": 38380 + }, + { + "epoch": 0.7683307059029603, + "grad_norm": 1.152876853942871, + "learning_rate": 1.342541267615044e-06, + "loss": 0.3048, + "step": 38381 + }, + { + "epoch": 0.7683507244200886, + "grad_norm": 1.1607288122177124, + "learning_rate": 1.3423202310259714e-06, + "loss": 0.299, + "step": 38382 + }, + { + "epoch": 0.768370742937217, + "grad_norm": 1.3037681579589844, + "learning_rate": 1.3420992098129988e-06, + "loss": 0.2997, + "step": 38383 + }, + { + "epoch": 0.7683907614543453, + "grad_norm": 1.9086542129516602, + "learning_rate": 1.341878203977054e-06, + "loss": 0.76, + "step": 38384 + }, + { + "epoch": 0.7684107799714737, + "grad_norm": 1.6204890012741089, + "learning_rate": 1.3416572135190642e-06, + "loss": 0.2821, + "step": 38385 + }, + { + "epoch": 0.768430798488602, + "grad_norm": 1.011942982673645, + "learning_rate": 1.341436238439962e-06, + "loss": 0.2655, + "step": 38386 + }, + { + "epoch": 0.7684508170057303, + "grad_norm": 1.1267505884170532, + "learning_rate": 1.3412152787406728e-06, + "loss": 0.2378, + "step": 38387 + }, + { + "epoch": 0.7684708355228587, + "grad_norm": 1.8579267263412476, + "learning_rate": 1.3409943344221288e-06, + "loss": 0.7701, + "step": 38388 + }, + { + "epoch": 0.768490854039987, + "grad_norm": 1.0800657272338867, + "learning_rate": 1.3407734054852573e-06, + "loss": 0.2889, + "step": 38389 + }, + { + "epoch": 0.7685108725571154, + "grad_norm": 1.161995530128479, + "learning_rate": 1.340552491930987e-06, + "loss": 0.2698, + "step": 38390 + }, + { + "epoch": 0.7685308910742437, + "grad_norm": 1.0183331966400146, + "learning_rate": 1.3403315937602445e-06, + "loss": 0.3082, + "step": 38391 + }, + { + "epoch": 0.7685509095913721, + "grad_norm": 1.0853594541549683, + "learning_rate": 1.3401107109739625e-06, + "loss": 0.26, + "step": 38392 + }, + { + "epoch": 0.7685709281085004, + "grad_norm": 1.1503592729568481, + "learning_rate": 1.3398898435730673e-06, + "loss": 0.2636, + "step": 38393 + }, + { + "epoch": 0.7685909466256287, + "grad_norm": 1.1113437414169312, + "learning_rate": 1.339668991558487e-06, + "loss": 0.3278, + "step": 38394 + }, + { + "epoch": 0.7686109651427571, + "grad_norm": 1.0537174940109253, + "learning_rate": 1.33944815493115e-06, + "loss": 0.2906, + "step": 38395 + }, + { + "epoch": 0.7686309836598854, + "grad_norm": 1.1426211595535278, + "learning_rate": 1.3392273336919859e-06, + "loss": 0.3106, + "step": 38396 + }, + { + "epoch": 0.7686510021770138, + "grad_norm": 1.1624891757965088, + "learning_rate": 1.3390065278419229e-06, + "loss": 0.2768, + "step": 38397 + }, + { + "epoch": 0.7686710206941421, + "grad_norm": 1.4478493928909302, + "learning_rate": 1.3387857373818863e-06, + "loss": 0.2934, + "step": 38398 + }, + { + "epoch": 0.7686910392112705, + "grad_norm": 1.1957515478134155, + "learning_rate": 1.3385649623128088e-06, + "loss": 0.3014, + "step": 38399 + }, + { + "epoch": 0.7687110577283988, + "grad_norm": 1.0181959867477417, + "learning_rate": 1.3383442026356142e-06, + "loss": 0.2952, + "step": 38400 + }, + { + "epoch": 0.7687310762455272, + "grad_norm": 1.0331974029541016, + "learning_rate": 1.3381234583512348e-06, + "loss": 0.2563, + "step": 38401 + }, + { + "epoch": 0.7687510947626555, + "grad_norm": 2.106358766555786, + "learning_rate": 1.3379027294605955e-06, + "loss": 0.7503, + "step": 38402 + }, + { + "epoch": 0.7687711132797838, + "grad_norm": 0.9930840134620667, + "learning_rate": 1.3376820159646253e-06, + "loss": 0.278, + "step": 38403 + }, + { + "epoch": 0.7687911317969122, + "grad_norm": 1.028866171836853, + "learning_rate": 1.3374613178642505e-06, + "loss": 0.2922, + "step": 38404 + }, + { + "epoch": 0.7688111503140405, + "grad_norm": 1.1556015014648438, + "learning_rate": 1.3372406351604018e-06, + "loss": 0.3101, + "step": 38405 + }, + { + "epoch": 0.7688311688311689, + "grad_norm": 1.197935700416565, + "learning_rate": 1.3370199678540046e-06, + "loss": 0.2912, + "step": 38406 + }, + { + "epoch": 0.7688511873482972, + "grad_norm": 1.1067968606948853, + "learning_rate": 1.336799315945988e-06, + "loss": 0.2991, + "step": 38407 + }, + { + "epoch": 0.7688712058654256, + "grad_norm": 1.0860832929611206, + "learning_rate": 1.3365786794372781e-06, + "loss": 0.3083, + "step": 38408 + }, + { + "epoch": 0.7688912243825539, + "grad_norm": 1.1810659170150757, + "learning_rate": 1.3363580583288016e-06, + "loss": 0.2958, + "step": 38409 + }, + { + "epoch": 0.7689112428996822, + "grad_norm": 1.0712428092956543, + "learning_rate": 1.3361374526214894e-06, + "loss": 0.3209, + "step": 38410 + }, + { + "epoch": 0.7689312614168106, + "grad_norm": 1.1060585975646973, + "learning_rate": 1.335916862316265e-06, + "loss": 0.2908, + "step": 38411 + }, + { + "epoch": 0.7689512799339389, + "grad_norm": 1.0386099815368652, + "learning_rate": 1.3356962874140589e-06, + "loss": 0.3103, + "step": 38412 + }, + { + "epoch": 0.7689712984510673, + "grad_norm": 1.917145013809204, + "learning_rate": 1.3354757279157954e-06, + "loss": 0.8273, + "step": 38413 + }, + { + "epoch": 0.7689913169681956, + "grad_norm": 1.0767775774002075, + "learning_rate": 1.3352551838224054e-06, + "loss": 0.2636, + "step": 38414 + }, + { + "epoch": 0.769011335485324, + "grad_norm": 0.9823621511459351, + "learning_rate": 1.3350346551348137e-06, + "loss": 0.2652, + "step": 38415 + }, + { + "epoch": 0.7690313540024523, + "grad_norm": 1.0290663242340088, + "learning_rate": 1.3348141418539478e-06, + "loss": 0.2343, + "step": 38416 + }, + { + "epoch": 0.7690513725195807, + "grad_norm": 1.1539275646209717, + "learning_rate": 1.3345936439807327e-06, + "loss": 0.2909, + "step": 38417 + }, + { + "epoch": 0.769071391036709, + "grad_norm": 1.2728188037872314, + "learning_rate": 1.3343731615160987e-06, + "loss": 0.3302, + "step": 38418 + }, + { + "epoch": 0.7690914095538373, + "grad_norm": 1.9353938102722168, + "learning_rate": 1.3341526944609712e-06, + "loss": 0.74, + "step": 38419 + }, + { + "epoch": 0.7691114280709657, + "grad_norm": 1.031410813331604, + "learning_rate": 1.3339322428162765e-06, + "loss": 0.2693, + "step": 38420 + }, + { + "epoch": 0.769131446588094, + "grad_norm": 1.0684006214141846, + "learning_rate": 1.333711806582942e-06, + "loss": 0.2788, + "step": 38421 + }, + { + "epoch": 0.7691514651052224, + "grad_norm": 1.152767300605774, + "learning_rate": 1.3334913857618926e-06, + "loss": 0.3106, + "step": 38422 + }, + { + "epoch": 0.7691714836223507, + "grad_norm": 1.009847640991211, + "learning_rate": 1.3332709803540578e-06, + "loss": 0.2735, + "step": 38423 + }, + { + "epoch": 0.7691915021394791, + "grad_norm": 1.1882585287094116, + "learning_rate": 1.3330505903603614e-06, + "loss": 0.2816, + "step": 38424 + }, + { + "epoch": 0.7692115206566074, + "grad_norm": 1.1277124881744385, + "learning_rate": 1.3328302157817324e-06, + "loss": 0.2967, + "step": 38425 + }, + { + "epoch": 0.7692315391737357, + "grad_norm": 1.0701169967651367, + "learning_rate": 1.3326098566190948e-06, + "loss": 0.2863, + "step": 38426 + }, + { + "epoch": 0.7692515576908641, + "grad_norm": 1.1322702169418335, + "learning_rate": 1.3323895128733772e-06, + "loss": 0.3259, + "step": 38427 + }, + { + "epoch": 0.7692715762079924, + "grad_norm": 1.984537959098816, + "learning_rate": 1.3321691845455048e-06, + "loss": 0.7451, + "step": 38428 + }, + { + "epoch": 0.7692915947251208, + "grad_norm": 1.0733797550201416, + "learning_rate": 1.3319488716364043e-06, + "loss": 0.237, + "step": 38429 + }, + { + "epoch": 0.7693116132422491, + "grad_norm": 0.9653420448303223, + "learning_rate": 1.3317285741470009e-06, + "loss": 0.2685, + "step": 38430 + }, + { + "epoch": 0.7693316317593775, + "grad_norm": 1.9106700420379639, + "learning_rate": 1.3315082920782202e-06, + "loss": 0.766, + "step": 38431 + }, + { + "epoch": 0.7693516502765058, + "grad_norm": 1.1229145526885986, + "learning_rate": 1.3312880254309902e-06, + "loss": 0.2942, + "step": 38432 + }, + { + "epoch": 0.7693716687936342, + "grad_norm": 1.1551945209503174, + "learning_rate": 1.331067774206236e-06, + "loss": 0.3552, + "step": 38433 + }, + { + "epoch": 0.7693916873107625, + "grad_norm": 1.1544071435928345, + "learning_rate": 1.3308475384048835e-06, + "loss": 0.3255, + "step": 38434 + }, + { + "epoch": 0.7694117058278908, + "grad_norm": 1.2599021196365356, + "learning_rate": 1.3306273180278568e-06, + "loss": 0.2835, + "step": 38435 + }, + { + "epoch": 0.7694317243450192, + "grad_norm": 1.1698181629180908, + "learning_rate": 1.3304071130760842e-06, + "loss": 0.2894, + "step": 38436 + }, + { + "epoch": 0.7694517428621475, + "grad_norm": 1.8904117345809937, + "learning_rate": 1.3301869235504893e-06, + "loss": 0.7806, + "step": 38437 + }, + { + "epoch": 0.7694717613792759, + "grad_norm": 1.1104991436004639, + "learning_rate": 1.3299667494520002e-06, + "loss": 0.3013, + "step": 38438 + }, + { + "epoch": 0.7694917798964042, + "grad_norm": 1.3220337629318237, + "learning_rate": 1.3297465907815393e-06, + "loss": 0.2755, + "step": 38439 + }, + { + "epoch": 0.7695117984135326, + "grad_norm": 1.1053932905197144, + "learning_rate": 1.329526447540036e-06, + "loss": 0.3055, + "step": 38440 + }, + { + "epoch": 0.7695318169306609, + "grad_norm": 1.074406623840332, + "learning_rate": 1.3293063197284135e-06, + "loss": 0.2777, + "step": 38441 + }, + { + "epoch": 0.7695518354477892, + "grad_norm": 1.0418412685394287, + "learning_rate": 1.3290862073475969e-06, + "loss": 0.2808, + "step": 38442 + }, + { + "epoch": 0.7695718539649176, + "grad_norm": 1.0364214181900024, + "learning_rate": 1.3288661103985123e-06, + "loss": 0.2848, + "step": 38443 + }, + { + "epoch": 0.7695918724820459, + "grad_norm": 0.9778189063072205, + "learning_rate": 1.328646028882083e-06, + "loss": 0.257, + "step": 38444 + }, + { + "epoch": 0.7696118909991743, + "grad_norm": 1.7405706644058228, + "learning_rate": 1.3284259627992368e-06, + "loss": 0.8035, + "step": 38445 + }, + { + "epoch": 0.7696319095163026, + "grad_norm": 2.1092684268951416, + "learning_rate": 1.3282059121508978e-06, + "loss": 0.7673, + "step": 38446 + }, + { + "epoch": 0.769651928033431, + "grad_norm": 1.327212929725647, + "learning_rate": 1.3279858769379916e-06, + "loss": 0.3559, + "step": 38447 + }, + { + "epoch": 0.7696719465505593, + "grad_norm": 1.9308667182922363, + "learning_rate": 1.3277658571614404e-06, + "loss": 0.6855, + "step": 38448 + }, + { + "epoch": 0.7696919650676876, + "grad_norm": 1.1262272596359253, + "learning_rate": 1.327545852822173e-06, + "loss": 0.293, + "step": 38449 + }, + { + "epoch": 0.769711983584816, + "grad_norm": 1.139635682106018, + "learning_rate": 1.3273258639211105e-06, + "loss": 0.3298, + "step": 38450 + }, + { + "epoch": 0.7697320021019443, + "grad_norm": 1.1148632764816284, + "learning_rate": 1.3271058904591822e-06, + "loss": 0.346, + "step": 38451 + }, + { + "epoch": 0.7697520206190727, + "grad_norm": 1.0102145671844482, + "learning_rate": 1.3268859324373095e-06, + "loss": 0.2203, + "step": 38452 + }, + { + "epoch": 0.769772039136201, + "grad_norm": 1.075745701789856, + "learning_rate": 1.3266659898564165e-06, + "loss": 0.2999, + "step": 38453 + }, + { + "epoch": 0.7697920576533294, + "grad_norm": 1.2864761352539062, + "learning_rate": 1.3264460627174313e-06, + "loss": 0.308, + "step": 38454 + }, + { + "epoch": 0.7698120761704577, + "grad_norm": 1.2877764701843262, + "learning_rate": 1.326226151021276e-06, + "loss": 0.2733, + "step": 38455 + }, + { + "epoch": 0.7698320946875861, + "grad_norm": 1.244406819343567, + "learning_rate": 1.3260062547688752e-06, + "loss": 0.3023, + "step": 38456 + }, + { + "epoch": 0.7698521132047144, + "grad_norm": 1.1632719039916992, + "learning_rate": 1.3257863739611515e-06, + "loss": 0.3215, + "step": 38457 + }, + { + "epoch": 0.7698721317218427, + "grad_norm": 1.1701685190200806, + "learning_rate": 1.3255665085990333e-06, + "loss": 0.2936, + "step": 38458 + }, + { + "epoch": 0.7698921502389711, + "grad_norm": 1.1812740564346313, + "learning_rate": 1.3253466586834424e-06, + "loss": 0.2493, + "step": 38459 + }, + { + "epoch": 0.7699121687560994, + "grad_norm": 1.0454374551773071, + "learning_rate": 1.3251268242153036e-06, + "loss": 0.2782, + "step": 38460 + }, + { + "epoch": 0.7699321872732278, + "grad_norm": 1.2259454727172852, + "learning_rate": 1.3249070051955394e-06, + "loss": 0.2994, + "step": 38461 + }, + { + "epoch": 0.7699522057903561, + "grad_norm": 1.1387534141540527, + "learning_rate": 1.3246872016250767e-06, + "loss": 0.4011, + "step": 38462 + }, + { + "epoch": 0.7699722243074845, + "grad_norm": 1.2341678142547607, + "learning_rate": 1.3244674135048363e-06, + "loss": 0.301, + "step": 38463 + }, + { + "epoch": 0.7699922428246128, + "grad_norm": 1.0392897129058838, + "learning_rate": 1.3242476408357458e-06, + "loss": 0.2667, + "step": 38464 + }, + { + "epoch": 0.7700122613417411, + "grad_norm": 1.1716111898422241, + "learning_rate": 1.3240278836187264e-06, + "loss": 0.3063, + "step": 38465 + }, + { + "epoch": 0.7700322798588695, + "grad_norm": 1.074713110923767, + "learning_rate": 1.3238081418547021e-06, + "loss": 0.3054, + "step": 38466 + }, + { + "epoch": 0.7700522983759978, + "grad_norm": 1.0495238304138184, + "learning_rate": 1.3235884155445983e-06, + "loss": 0.2957, + "step": 38467 + }, + { + "epoch": 0.7700723168931262, + "grad_norm": 1.1167579889297485, + "learning_rate": 1.323368704689338e-06, + "loss": 0.2804, + "step": 38468 + }, + { + "epoch": 0.7700923354102545, + "grad_norm": 1.9755744934082031, + "learning_rate": 1.3231490092898442e-06, + "loss": 0.7808, + "step": 38469 + }, + { + "epoch": 0.7701123539273829, + "grad_norm": 1.073822259902954, + "learning_rate": 1.3229293293470392e-06, + "loss": 0.3166, + "step": 38470 + }, + { + "epoch": 0.7701323724445112, + "grad_norm": 1.244954228401184, + "learning_rate": 1.3227096648618492e-06, + "loss": 0.3036, + "step": 38471 + }, + { + "epoch": 0.7701523909616396, + "grad_norm": 0.9550624489784241, + "learning_rate": 1.3224900158351966e-06, + "loss": 0.2799, + "step": 38472 + }, + { + "epoch": 0.7701724094787679, + "grad_norm": 0.9751400351524353, + "learning_rate": 1.3222703822680044e-06, + "loss": 0.2713, + "step": 38473 + }, + { + "epoch": 0.7701924279958962, + "grad_norm": 1.2177432775497437, + "learning_rate": 1.3220507641611952e-06, + "loss": 0.3171, + "step": 38474 + }, + { + "epoch": 0.7702124465130246, + "grad_norm": 1.0614768266677856, + "learning_rate": 1.3218311615156943e-06, + "loss": 0.3143, + "step": 38475 + }, + { + "epoch": 0.7702324650301529, + "grad_norm": 1.1539386510849, + "learning_rate": 1.3216115743324215e-06, + "loss": 0.3084, + "step": 38476 + }, + { + "epoch": 0.7702524835472813, + "grad_norm": 1.0527656078338623, + "learning_rate": 1.3213920026123044e-06, + "loss": 0.2756, + "step": 38477 + }, + { + "epoch": 0.7702725020644096, + "grad_norm": 1.195970892906189, + "learning_rate": 1.321172446356263e-06, + "loss": 0.291, + "step": 38478 + }, + { + "epoch": 0.770292520581538, + "grad_norm": 1.0171267986297607, + "learning_rate": 1.3209529055652199e-06, + "loss": 0.2997, + "step": 38479 + }, + { + "epoch": 0.7703125390986663, + "grad_norm": 1.1659331321716309, + "learning_rate": 1.3207333802401007e-06, + "loss": 0.2991, + "step": 38480 + }, + { + "epoch": 0.7703325576157946, + "grad_norm": 1.4142154455184937, + "learning_rate": 1.320513870381826e-06, + "loss": 0.2922, + "step": 38481 + }, + { + "epoch": 0.770352576132923, + "grad_norm": 1.1265501976013184, + "learning_rate": 1.3202943759913194e-06, + "loss": 0.2987, + "step": 38482 + }, + { + "epoch": 0.7703725946500513, + "grad_norm": 1.4501159191131592, + "learning_rate": 1.3200748970695037e-06, + "loss": 0.3181, + "step": 38483 + }, + { + "epoch": 0.7703926131671797, + "grad_norm": 1.2020097970962524, + "learning_rate": 1.3198554336172993e-06, + "loss": 0.3122, + "step": 38484 + }, + { + "epoch": 0.770412631684308, + "grad_norm": 0.9950610995292664, + "learning_rate": 1.3196359856356322e-06, + "loss": 0.2983, + "step": 38485 + }, + { + "epoch": 0.7704326502014364, + "grad_norm": 1.1739963293075562, + "learning_rate": 1.3194165531254238e-06, + "loss": 0.2529, + "step": 38486 + }, + { + "epoch": 0.7704526687185647, + "grad_norm": 1.0808643102645874, + "learning_rate": 1.3191971360875944e-06, + "loss": 0.2867, + "step": 38487 + }, + { + "epoch": 0.7704726872356931, + "grad_norm": 1.0998430252075195, + "learning_rate": 1.3189777345230692e-06, + "loss": 0.3313, + "step": 38488 + }, + { + "epoch": 0.7704927057528214, + "grad_norm": 1.0917879343032837, + "learning_rate": 1.3187583484327683e-06, + "loss": 0.2887, + "step": 38489 + }, + { + "epoch": 0.7705127242699497, + "grad_norm": 1.8422075510025024, + "learning_rate": 1.3185389778176165e-06, + "loss": 0.7371, + "step": 38490 + }, + { + "epoch": 0.7705327427870781, + "grad_norm": 1.0233485698699951, + "learning_rate": 1.318319622678535e-06, + "loss": 0.2627, + "step": 38491 + }, + { + "epoch": 0.7705527613042064, + "grad_norm": 1.0362777709960938, + "learning_rate": 1.3181002830164451e-06, + "loss": 0.2782, + "step": 38492 + }, + { + "epoch": 0.7705727798213348, + "grad_norm": 1.069165825843811, + "learning_rate": 1.3178809588322678e-06, + "loss": 0.2974, + "step": 38493 + }, + { + "epoch": 0.7705927983384631, + "grad_norm": 1.2398854494094849, + "learning_rate": 1.317661650126928e-06, + "loss": 0.2892, + "step": 38494 + }, + { + "epoch": 0.7706128168555915, + "grad_norm": 1.1281788349151611, + "learning_rate": 1.3174423569013462e-06, + "loss": 0.3267, + "step": 38495 + }, + { + "epoch": 0.7706328353727198, + "grad_norm": 1.0504637956619263, + "learning_rate": 1.3172230791564445e-06, + "loss": 0.2959, + "step": 38496 + }, + { + "epoch": 0.770652853889848, + "grad_norm": 1.10721755027771, + "learning_rate": 1.3170038168931426e-06, + "loss": 0.2743, + "step": 38497 + }, + { + "epoch": 0.7706728724069765, + "grad_norm": 1.08929443359375, + "learning_rate": 1.3167845701123655e-06, + "loss": 0.2679, + "step": 38498 + }, + { + "epoch": 0.7706928909241048, + "grad_norm": 1.0916154384613037, + "learning_rate": 1.316565338815034e-06, + "loss": 0.304, + "step": 38499 + }, + { + "epoch": 0.7707129094412332, + "grad_norm": 1.0870202779769897, + "learning_rate": 1.3163461230020669e-06, + "loss": 0.2868, + "step": 38500 + }, + { + "epoch": 0.7707329279583615, + "grad_norm": 1.0983458757400513, + "learning_rate": 1.31612692267439e-06, + "loss": 0.2381, + "step": 38501 + }, + { + "epoch": 0.7707529464754899, + "grad_norm": 1.1464468240737915, + "learning_rate": 1.315907737832921e-06, + "loss": 0.2833, + "step": 38502 + }, + { + "epoch": 0.7707729649926182, + "grad_norm": 1.1240739822387695, + "learning_rate": 1.3156885684785848e-06, + "loss": 0.2923, + "step": 38503 + }, + { + "epoch": 0.7707929835097466, + "grad_norm": 1.2405019998550415, + "learning_rate": 1.3154694146123004e-06, + "loss": 0.2848, + "step": 38504 + }, + { + "epoch": 0.7708130020268749, + "grad_norm": 1.8372609615325928, + "learning_rate": 1.31525027623499e-06, + "loss": 0.8096, + "step": 38505 + }, + { + "epoch": 0.7708330205440032, + "grad_norm": 1.1029318571090698, + "learning_rate": 1.3150311533475736e-06, + "loss": 0.3173, + "step": 38506 + }, + { + "epoch": 0.7708530390611316, + "grad_norm": 1.1453163623809814, + "learning_rate": 1.314812045950974e-06, + "loss": 0.2641, + "step": 38507 + }, + { + "epoch": 0.7708730575782599, + "grad_norm": 1.3008533716201782, + "learning_rate": 1.3145929540461116e-06, + "loss": 0.3004, + "step": 38508 + }, + { + "epoch": 0.7708930760953883, + "grad_norm": 1.1308119297027588, + "learning_rate": 1.314373877633907e-06, + "loss": 0.2783, + "step": 38509 + }, + { + "epoch": 0.7709130946125166, + "grad_norm": 1.2502039670944214, + "learning_rate": 1.3141548167152806e-06, + "loss": 0.3324, + "step": 38510 + }, + { + "epoch": 0.770933113129645, + "grad_norm": 1.080736517906189, + "learning_rate": 1.313935771291155e-06, + "loss": 0.295, + "step": 38511 + }, + { + "epoch": 0.7709531316467733, + "grad_norm": 1.038499116897583, + "learning_rate": 1.3137167413624508e-06, + "loss": 0.3293, + "step": 38512 + }, + { + "epoch": 0.7709731501639016, + "grad_norm": 1.1711664199829102, + "learning_rate": 1.3134977269300864e-06, + "loss": 0.2779, + "step": 38513 + }, + { + "epoch": 0.77099316868103, + "grad_norm": 1.1245561838150024, + "learning_rate": 1.3132787279949855e-06, + "loss": 0.2848, + "step": 38514 + }, + { + "epoch": 0.7710131871981583, + "grad_norm": 1.0623490810394287, + "learning_rate": 1.3130597445580662e-06, + "loss": 0.3257, + "step": 38515 + }, + { + "epoch": 0.7710332057152867, + "grad_norm": 0.9919365644454956, + "learning_rate": 1.3128407766202516e-06, + "loss": 0.283, + "step": 38516 + }, + { + "epoch": 0.771053224232415, + "grad_norm": 1.28110671043396, + "learning_rate": 1.3126218241824612e-06, + "loss": 0.3009, + "step": 38517 + }, + { + "epoch": 0.7710732427495434, + "grad_norm": 1.323434591293335, + "learning_rate": 1.3124028872456147e-06, + "loss": 0.2939, + "step": 38518 + }, + { + "epoch": 0.7710932612666717, + "grad_norm": 1.1983230113983154, + "learning_rate": 1.3121839658106316e-06, + "loss": 0.3174, + "step": 38519 + }, + { + "epoch": 0.7711132797838001, + "grad_norm": 1.0778945684432983, + "learning_rate": 1.311965059878435e-06, + "loss": 0.2873, + "step": 38520 + }, + { + "epoch": 0.7711332983009284, + "grad_norm": 1.3712642192840576, + "learning_rate": 1.3117461694499434e-06, + "loss": 0.2989, + "step": 38521 + }, + { + "epoch": 0.7711533168180567, + "grad_norm": 1.2410670518875122, + "learning_rate": 1.3115272945260777e-06, + "loss": 0.2892, + "step": 38522 + }, + { + "epoch": 0.7711733353351851, + "grad_norm": 1.0421252250671387, + "learning_rate": 1.311308435107757e-06, + "loss": 0.3171, + "step": 38523 + }, + { + "epoch": 0.7711933538523134, + "grad_norm": 1.3363707065582275, + "learning_rate": 1.3110895911959004e-06, + "loss": 0.3185, + "step": 38524 + }, + { + "epoch": 0.7712133723694418, + "grad_norm": 1.2184306383132935, + "learning_rate": 1.3108707627914308e-06, + "loss": 0.2798, + "step": 38525 + }, + { + "epoch": 0.7712333908865701, + "grad_norm": 1.1921800374984741, + "learning_rate": 1.3106519498952652e-06, + "loss": 0.3201, + "step": 38526 + }, + { + "epoch": 0.7712534094036985, + "grad_norm": 1.2271456718444824, + "learning_rate": 1.3104331525083265e-06, + "loss": 0.2648, + "step": 38527 + }, + { + "epoch": 0.7712734279208268, + "grad_norm": 1.1715142726898193, + "learning_rate": 1.3102143706315306e-06, + "loss": 0.3158, + "step": 38528 + }, + { + "epoch": 0.771293446437955, + "grad_norm": 1.146837830543518, + "learning_rate": 1.3099956042658018e-06, + "loss": 0.2632, + "step": 38529 + }, + { + "epoch": 0.7713134649550835, + "grad_norm": 1.031326174736023, + "learning_rate": 1.309776853412057e-06, + "loss": 0.2674, + "step": 38530 + }, + { + "epoch": 0.7713334834722118, + "grad_norm": 1.142948031425476, + "learning_rate": 1.3095581180712164e-06, + "loss": 0.2917, + "step": 38531 + }, + { + "epoch": 0.7713535019893402, + "grad_norm": 1.1116929054260254, + "learning_rate": 1.3093393982441976e-06, + "loss": 0.3259, + "step": 38532 + }, + { + "epoch": 0.7713735205064685, + "grad_norm": 1.153473138809204, + "learning_rate": 1.3091206939319229e-06, + "loss": 0.3592, + "step": 38533 + }, + { + "epoch": 0.7713935390235969, + "grad_norm": 1.2105122804641724, + "learning_rate": 1.3089020051353108e-06, + "loss": 0.3528, + "step": 38534 + }, + { + "epoch": 0.7714135575407252, + "grad_norm": 1.2547450065612793, + "learning_rate": 1.3086833318552806e-06, + "loss": 0.265, + "step": 38535 + }, + { + "epoch": 0.7714335760578536, + "grad_norm": 1.0313944816589355, + "learning_rate": 1.308464674092751e-06, + "loss": 0.3187, + "step": 38536 + }, + { + "epoch": 0.7714535945749819, + "grad_norm": 1.0616310834884644, + "learning_rate": 1.30824603184864e-06, + "loss": 0.322, + "step": 38537 + }, + { + "epoch": 0.7714736130921102, + "grad_norm": 1.9327934980392456, + "learning_rate": 1.30802740512387e-06, + "loss": 0.7593, + "step": 38538 + }, + { + "epoch": 0.7714936316092386, + "grad_norm": 1.0309803485870361, + "learning_rate": 1.3078087939193562e-06, + "loss": 0.2909, + "step": 38539 + }, + { + "epoch": 0.7715136501263669, + "grad_norm": 1.022252082824707, + "learning_rate": 1.3075901982360217e-06, + "loss": 0.2741, + "step": 38540 + }, + { + "epoch": 0.7715336686434953, + "grad_norm": 1.13008713722229, + "learning_rate": 1.3073716180747815e-06, + "loss": 0.2904, + "step": 38541 + }, + { + "epoch": 0.7715536871606236, + "grad_norm": 1.1132798194885254, + "learning_rate": 1.3071530534365585e-06, + "loss": 0.2848, + "step": 38542 + }, + { + "epoch": 0.771573705677752, + "grad_norm": 1.0890616178512573, + "learning_rate": 1.3069345043222687e-06, + "loss": 0.286, + "step": 38543 + }, + { + "epoch": 0.7715937241948803, + "grad_norm": 1.2036219835281372, + "learning_rate": 1.3067159707328313e-06, + "loss": 0.2788, + "step": 38544 + }, + { + "epoch": 0.7716137427120086, + "grad_norm": 1.0138723850250244, + "learning_rate": 1.306497452669166e-06, + "loss": 0.2605, + "step": 38545 + }, + { + "epoch": 0.771633761229137, + "grad_norm": 1.0431379079818726, + "learning_rate": 1.3062789501321881e-06, + "loss": 0.2761, + "step": 38546 + }, + { + "epoch": 0.7716537797462653, + "grad_norm": 1.2730894088745117, + "learning_rate": 1.3060604631228208e-06, + "loss": 0.2767, + "step": 38547 + }, + { + "epoch": 0.7716737982633937, + "grad_norm": 1.1658694744110107, + "learning_rate": 1.30584199164198e-06, + "loss": 0.3168, + "step": 38548 + }, + { + "epoch": 0.771693816780522, + "grad_norm": 1.0373512506484985, + "learning_rate": 1.3056235356905849e-06, + "loss": 0.2728, + "step": 38549 + }, + { + "epoch": 0.7717138352976504, + "grad_norm": 0.9965107440948486, + "learning_rate": 1.305405095269552e-06, + "loss": 0.245, + "step": 38550 + }, + { + "epoch": 0.7717338538147787, + "grad_norm": 1.2432869672775269, + "learning_rate": 1.3051866703798017e-06, + "loss": 0.3142, + "step": 38551 + }, + { + "epoch": 0.7717538723319071, + "grad_norm": 2.1381595134735107, + "learning_rate": 1.3049682610222502e-06, + "loss": 0.7494, + "step": 38552 + }, + { + "epoch": 0.7717738908490354, + "grad_norm": 1.891984462738037, + "learning_rate": 1.3047498671978192e-06, + "loss": 0.8125, + "step": 38553 + }, + { + "epoch": 0.7717939093661637, + "grad_norm": 1.1522154808044434, + "learning_rate": 1.304531488907424e-06, + "loss": 0.3191, + "step": 38554 + }, + { + "epoch": 0.7718139278832921, + "grad_norm": 1.172178864479065, + "learning_rate": 1.3043131261519816e-06, + "loss": 0.3293, + "step": 38555 + }, + { + "epoch": 0.7718339464004204, + "grad_norm": 1.1658798456192017, + "learning_rate": 1.3040947789324133e-06, + "loss": 0.2512, + "step": 38556 + }, + { + "epoch": 0.7718539649175488, + "grad_norm": 1.1430325508117676, + "learning_rate": 1.3038764472496345e-06, + "loss": 0.2704, + "step": 38557 + }, + { + "epoch": 0.7718739834346771, + "grad_norm": 1.0655988454818726, + "learning_rate": 1.3036581311045643e-06, + "loss": 0.3026, + "step": 38558 + }, + { + "epoch": 0.7718940019518055, + "grad_norm": 1.0946542024612427, + "learning_rate": 1.3034398304981182e-06, + "loss": 0.2859, + "step": 38559 + }, + { + "epoch": 0.7719140204689338, + "grad_norm": 1.2793734073638916, + "learning_rate": 1.3032215454312168e-06, + "loss": 0.3019, + "step": 38560 + }, + { + "epoch": 0.771934038986062, + "grad_norm": 1.1178802251815796, + "learning_rate": 1.3030032759047773e-06, + "loss": 0.3549, + "step": 38561 + }, + { + "epoch": 0.7719540575031905, + "grad_norm": 1.148287296295166, + "learning_rate": 1.3027850219197158e-06, + "loss": 0.2841, + "step": 38562 + }, + { + "epoch": 0.7719740760203188, + "grad_norm": 1.0262603759765625, + "learning_rate": 1.302566783476949e-06, + "loss": 0.289, + "step": 38563 + }, + { + "epoch": 0.7719940945374472, + "grad_norm": 1.119939923286438, + "learning_rate": 1.3023485605773967e-06, + "loss": 0.2991, + "step": 38564 + }, + { + "epoch": 0.7720141130545755, + "grad_norm": 1.0376219749450684, + "learning_rate": 1.3021303532219748e-06, + "loss": 0.2819, + "step": 38565 + }, + { + "epoch": 0.7720341315717039, + "grad_norm": 1.098366141319275, + "learning_rate": 1.301912161411602e-06, + "loss": 0.2633, + "step": 38566 + }, + { + "epoch": 0.7720541500888322, + "grad_norm": 1.0954068899154663, + "learning_rate": 1.301693985147195e-06, + "loss": 0.298, + "step": 38567 + }, + { + "epoch": 0.7720741686059606, + "grad_norm": 1.1887589693069458, + "learning_rate": 1.3014758244296688e-06, + "loss": 0.3182, + "step": 38568 + }, + { + "epoch": 0.7720941871230889, + "grad_norm": 1.0524753332138062, + "learning_rate": 1.3012576792599434e-06, + "loss": 0.305, + "step": 38569 + }, + { + "epoch": 0.7721142056402172, + "grad_norm": 1.2318159341812134, + "learning_rate": 1.3010395496389355e-06, + "loss": 0.3518, + "step": 38570 + }, + { + "epoch": 0.7721342241573456, + "grad_norm": 1.105640172958374, + "learning_rate": 1.3008214355675608e-06, + "loss": 0.2858, + "step": 38571 + }, + { + "epoch": 0.7721542426744739, + "grad_norm": 1.1452850103378296, + "learning_rate": 1.3006033370467357e-06, + "loss": 0.3263, + "step": 38572 + }, + { + "epoch": 0.7721742611916023, + "grad_norm": 1.0896414518356323, + "learning_rate": 1.3003852540773792e-06, + "loss": 0.2715, + "step": 38573 + }, + { + "epoch": 0.7721942797087306, + "grad_norm": 1.142167091369629, + "learning_rate": 1.300167186660407e-06, + "loss": 0.2462, + "step": 38574 + }, + { + "epoch": 0.772214298225859, + "grad_norm": 1.1350510120391846, + "learning_rate": 1.2999491347967359e-06, + "loss": 0.2812, + "step": 38575 + }, + { + "epoch": 0.7722343167429873, + "grad_norm": 1.1099382638931274, + "learning_rate": 1.299731098487282e-06, + "loss": 0.3114, + "step": 38576 + }, + { + "epoch": 0.7722543352601156, + "grad_norm": 1.2045912742614746, + "learning_rate": 1.2995130777329612e-06, + "loss": 0.3253, + "step": 38577 + }, + { + "epoch": 0.772274353777244, + "grad_norm": 1.161781668663025, + "learning_rate": 1.2992950725346903e-06, + "loss": 0.3245, + "step": 38578 + }, + { + "epoch": 0.7722943722943723, + "grad_norm": 1.1164458990097046, + "learning_rate": 1.299077082893389e-06, + "loss": 0.3266, + "step": 38579 + }, + { + "epoch": 0.7723143908115007, + "grad_norm": 1.9478858709335327, + "learning_rate": 1.2988591088099707e-06, + "loss": 0.8025, + "step": 38580 + }, + { + "epoch": 0.772334409328629, + "grad_norm": 1.1362712383270264, + "learning_rate": 1.2986411502853507e-06, + "loss": 0.2988, + "step": 38581 + }, + { + "epoch": 0.7723544278457574, + "grad_norm": 1.1982145309448242, + "learning_rate": 1.2984232073204484e-06, + "loss": 0.3857, + "step": 38582 + }, + { + "epoch": 0.7723744463628857, + "grad_norm": 1.1809098720550537, + "learning_rate": 1.2982052799161783e-06, + "loss": 0.2851, + "step": 38583 + }, + { + "epoch": 0.7723944648800141, + "grad_norm": 1.0533314943313599, + "learning_rate": 1.2979873680734562e-06, + "loss": 0.2856, + "step": 38584 + }, + { + "epoch": 0.7724144833971424, + "grad_norm": 1.8247387409210205, + "learning_rate": 1.2977694717931976e-06, + "loss": 0.8252, + "step": 38585 + }, + { + "epoch": 0.7724345019142707, + "grad_norm": 0.9868065714836121, + "learning_rate": 1.297551591076321e-06, + "loss": 0.2533, + "step": 38586 + }, + { + "epoch": 0.7724545204313991, + "grad_norm": 1.137416958808899, + "learning_rate": 1.2973337259237401e-06, + "loss": 0.2963, + "step": 38587 + }, + { + "epoch": 0.7724745389485274, + "grad_norm": 1.080379605293274, + "learning_rate": 1.2971158763363716e-06, + "loss": 0.2919, + "step": 38588 + }, + { + "epoch": 0.7724945574656558, + "grad_norm": 1.1333521604537964, + "learning_rate": 1.2968980423151311e-06, + "loss": 0.2981, + "step": 38589 + }, + { + "epoch": 0.7725145759827841, + "grad_norm": 1.153160572052002, + "learning_rate": 1.2966802238609333e-06, + "loss": 0.2319, + "step": 38590 + }, + { + "epoch": 0.7725345944999125, + "grad_norm": 1.1537373065948486, + "learning_rate": 1.2964624209746946e-06, + "loss": 0.3166, + "step": 38591 + }, + { + "epoch": 0.7725546130170408, + "grad_norm": 1.1567816734313965, + "learning_rate": 1.296244633657332e-06, + "loss": 0.3192, + "step": 38592 + }, + { + "epoch": 0.772574631534169, + "grad_norm": 1.984845519065857, + "learning_rate": 1.2960268619097599e-06, + "loss": 0.7802, + "step": 38593 + }, + { + "epoch": 0.7725946500512975, + "grad_norm": 1.0147640705108643, + "learning_rate": 1.2958091057328925e-06, + "loss": 0.2691, + "step": 38594 + }, + { + "epoch": 0.7726146685684258, + "grad_norm": 1.358881950378418, + "learning_rate": 1.2955913651276481e-06, + "loss": 0.3019, + "step": 38595 + }, + { + "epoch": 0.7726346870855542, + "grad_norm": 1.1209301948547363, + "learning_rate": 1.2953736400949401e-06, + "loss": 0.2951, + "step": 38596 + }, + { + "epoch": 0.7726547056026825, + "grad_norm": 1.2747528553009033, + "learning_rate": 1.2951559306356848e-06, + "loss": 0.2832, + "step": 38597 + }, + { + "epoch": 0.7726747241198109, + "grad_norm": 1.0020437240600586, + "learning_rate": 1.2949382367507957e-06, + "loss": 0.2445, + "step": 38598 + }, + { + "epoch": 0.7726947426369392, + "grad_norm": 1.9548600912094116, + "learning_rate": 1.2947205584411882e-06, + "loss": 0.7701, + "step": 38599 + }, + { + "epoch": 0.7727147611540676, + "grad_norm": 1.151814579963684, + "learning_rate": 1.2945028957077793e-06, + "loss": 0.2856, + "step": 38600 + }, + { + "epoch": 0.7727347796711959, + "grad_norm": 1.3426226377487183, + "learning_rate": 1.294285248551483e-06, + "loss": 0.2776, + "step": 38601 + }, + { + "epoch": 0.7727547981883242, + "grad_norm": 1.1373201608657837, + "learning_rate": 1.2940676169732136e-06, + "loss": 0.3035, + "step": 38602 + }, + { + "epoch": 0.7727748167054526, + "grad_norm": 1.0261772871017456, + "learning_rate": 1.2938500009738847e-06, + "loss": 0.2952, + "step": 38603 + }, + { + "epoch": 0.7727948352225809, + "grad_norm": 1.1876312494277954, + "learning_rate": 1.2936324005544131e-06, + "loss": 0.2605, + "step": 38604 + }, + { + "epoch": 0.7728148537397093, + "grad_norm": 0.9923000931739807, + "learning_rate": 1.2934148157157151e-06, + "loss": 0.2726, + "step": 38605 + }, + { + "epoch": 0.7728348722568376, + "grad_norm": 1.1288609504699707, + "learning_rate": 1.2931972464587034e-06, + "loss": 0.2892, + "step": 38606 + }, + { + "epoch": 0.772854890773966, + "grad_norm": 1.1235929727554321, + "learning_rate": 1.2929796927842926e-06, + "loss": 0.2676, + "step": 38607 + }, + { + "epoch": 0.7728749092910943, + "grad_norm": 1.2353851795196533, + "learning_rate": 1.292762154693396e-06, + "loss": 0.3146, + "step": 38608 + }, + { + "epoch": 0.7728949278082226, + "grad_norm": 1.1529712677001953, + "learning_rate": 1.292544632186931e-06, + "loss": 0.3178, + "step": 38609 + }, + { + "epoch": 0.772914946325351, + "grad_norm": 1.1794873476028442, + "learning_rate": 1.2923271252658103e-06, + "loss": 0.3612, + "step": 38610 + }, + { + "epoch": 0.7729349648424793, + "grad_norm": 1.0350892543792725, + "learning_rate": 1.2921096339309491e-06, + "loss": 0.2615, + "step": 38611 + }, + { + "epoch": 0.7729549833596077, + "grad_norm": 1.3684191703796387, + "learning_rate": 1.2918921581832588e-06, + "loss": 0.2585, + "step": 38612 + }, + { + "epoch": 0.772975001876736, + "grad_norm": 1.311460018157959, + "learning_rate": 1.2916746980236578e-06, + "loss": 0.276, + "step": 38613 + }, + { + "epoch": 0.7729950203938644, + "grad_norm": 1.1392548084259033, + "learning_rate": 1.2914572534530583e-06, + "loss": 0.3176, + "step": 38614 + }, + { + "epoch": 0.7730150389109927, + "grad_norm": 1.1280725002288818, + "learning_rate": 1.2912398244723745e-06, + "loss": 0.2889, + "step": 38615 + }, + { + "epoch": 0.7730350574281211, + "grad_norm": 1.926086664199829, + "learning_rate": 1.2910224110825187e-06, + "loss": 0.7532, + "step": 38616 + }, + { + "epoch": 0.7730550759452494, + "grad_norm": 1.1698747873306274, + "learning_rate": 1.2908050132844064e-06, + "loss": 0.2913, + "step": 38617 + }, + { + "epoch": 0.7730750944623777, + "grad_norm": 1.9462803602218628, + "learning_rate": 1.2905876310789533e-06, + "loss": 0.7548, + "step": 38618 + }, + { + "epoch": 0.7730951129795061, + "grad_norm": 1.0679186582565308, + "learning_rate": 1.2903702644670712e-06, + "loss": 0.3038, + "step": 38619 + }, + { + "epoch": 0.7731151314966344, + "grad_norm": 1.1292840242385864, + "learning_rate": 1.2901529134496749e-06, + "loss": 0.32, + "step": 38620 + }, + { + "epoch": 0.7731351500137628, + "grad_norm": 1.06437349319458, + "learning_rate": 1.2899355780276751e-06, + "loss": 0.2886, + "step": 38621 + }, + { + "epoch": 0.7731551685308911, + "grad_norm": 1.2343958616256714, + "learning_rate": 1.2897182582019897e-06, + "loss": 0.2961, + "step": 38622 + }, + { + "epoch": 0.7731751870480195, + "grad_norm": 1.230725646018982, + "learning_rate": 1.2895009539735298e-06, + "loss": 0.3157, + "step": 38623 + }, + { + "epoch": 0.7731952055651478, + "grad_norm": 1.1247358322143555, + "learning_rate": 1.28928366534321e-06, + "loss": 0.311, + "step": 38624 + }, + { + "epoch": 0.773215224082276, + "grad_norm": 1.0366946458816528, + "learning_rate": 1.2890663923119412e-06, + "loss": 0.2664, + "step": 38625 + }, + { + "epoch": 0.7732352425994045, + "grad_norm": 1.0632548332214355, + "learning_rate": 1.2888491348806402e-06, + "loss": 0.2872, + "step": 38626 + }, + { + "epoch": 0.7732552611165328, + "grad_norm": 1.1996058225631714, + "learning_rate": 1.2886318930502189e-06, + "loss": 0.2788, + "step": 38627 + }, + { + "epoch": 0.7732752796336612, + "grad_norm": 1.2301076650619507, + "learning_rate": 1.2884146668215903e-06, + "loss": 0.3031, + "step": 38628 + }, + { + "epoch": 0.7732952981507895, + "grad_norm": 1.1184109449386597, + "learning_rate": 1.2881974561956662e-06, + "loss": 0.3055, + "step": 38629 + }, + { + "epoch": 0.7733153166679179, + "grad_norm": 1.1222749948501587, + "learning_rate": 1.2879802611733617e-06, + "loss": 0.3392, + "step": 38630 + }, + { + "epoch": 0.7733353351850462, + "grad_norm": 1.0577800273895264, + "learning_rate": 1.2877630817555902e-06, + "loss": 0.2782, + "step": 38631 + }, + { + "epoch": 0.7733553537021746, + "grad_norm": 1.154030442237854, + "learning_rate": 1.2875459179432641e-06, + "loss": 0.3062, + "step": 38632 + }, + { + "epoch": 0.7733753722193029, + "grad_norm": 0.9961863160133362, + "learning_rate": 1.287328769737296e-06, + "loss": 0.3259, + "step": 38633 + }, + { + "epoch": 0.7733953907364312, + "grad_norm": 1.2132537364959717, + "learning_rate": 1.2871116371385971e-06, + "loss": 0.326, + "step": 38634 + }, + { + "epoch": 0.7734154092535596, + "grad_norm": 1.1008079051971436, + "learning_rate": 1.2868945201480837e-06, + "loss": 0.298, + "step": 38635 + }, + { + "epoch": 0.7734354277706879, + "grad_norm": 1.101042628288269, + "learning_rate": 1.2866774187666664e-06, + "loss": 0.2943, + "step": 38636 + }, + { + "epoch": 0.7734554462878163, + "grad_norm": 1.122218132019043, + "learning_rate": 1.286460332995258e-06, + "loss": 0.3056, + "step": 38637 + }, + { + "epoch": 0.7734754648049446, + "grad_norm": 1.0737277269363403, + "learning_rate": 1.2862432628347716e-06, + "loss": 0.2737, + "step": 38638 + }, + { + "epoch": 0.773495483322073, + "grad_norm": 1.1982340812683105, + "learning_rate": 1.2860262082861174e-06, + "loss": 0.3121, + "step": 38639 + }, + { + "epoch": 0.7735155018392013, + "grad_norm": 1.1135272979736328, + "learning_rate": 1.2858091693502116e-06, + "loss": 0.2691, + "step": 38640 + }, + { + "epoch": 0.7735355203563296, + "grad_norm": 1.16537606716156, + "learning_rate": 1.2855921460279647e-06, + "loss": 0.2765, + "step": 38641 + }, + { + "epoch": 0.773555538873458, + "grad_norm": 1.033944845199585, + "learning_rate": 1.2853751383202873e-06, + "loss": 0.2806, + "step": 38642 + }, + { + "epoch": 0.7735755573905863, + "grad_norm": 1.1019212007522583, + "learning_rate": 1.2851581462280937e-06, + "loss": 0.2815, + "step": 38643 + }, + { + "epoch": 0.7735955759077147, + "grad_norm": 1.9926517009735107, + "learning_rate": 1.2849411697522973e-06, + "loss": 0.7434, + "step": 38644 + }, + { + "epoch": 0.773615594424843, + "grad_norm": 1.8216191530227661, + "learning_rate": 1.284724208893809e-06, + "loss": 0.757, + "step": 38645 + }, + { + "epoch": 0.7736356129419714, + "grad_norm": 1.014501690864563, + "learning_rate": 1.28450726365354e-06, + "loss": 0.318, + "step": 38646 + }, + { + "epoch": 0.7736556314590997, + "grad_norm": 1.175481915473938, + "learning_rate": 1.2842903340324014e-06, + "loss": 0.2896, + "step": 38647 + }, + { + "epoch": 0.7736756499762281, + "grad_norm": 1.167502522468567, + "learning_rate": 1.2840734200313087e-06, + "loss": 0.3393, + "step": 38648 + }, + { + "epoch": 0.7736956684933564, + "grad_norm": 1.0921316146850586, + "learning_rate": 1.2838565216511712e-06, + "loss": 0.2658, + "step": 38649 + }, + { + "epoch": 0.7737156870104847, + "grad_norm": 1.2475897073745728, + "learning_rate": 1.2836396388929011e-06, + "loss": 0.2801, + "step": 38650 + }, + { + "epoch": 0.7737357055276131, + "grad_norm": 1.0539790391921997, + "learning_rate": 1.2834227717574105e-06, + "loss": 0.2728, + "step": 38651 + }, + { + "epoch": 0.7737557240447414, + "grad_norm": 1.126246690750122, + "learning_rate": 1.2832059202456092e-06, + "loss": 0.2721, + "step": 38652 + }, + { + "epoch": 0.7737757425618698, + "grad_norm": 1.018869400024414, + "learning_rate": 1.2829890843584119e-06, + "loss": 0.2987, + "step": 38653 + }, + { + "epoch": 0.7737957610789981, + "grad_norm": 2.118709087371826, + "learning_rate": 1.2827722640967282e-06, + "loss": 0.8127, + "step": 38654 + }, + { + "epoch": 0.7738157795961265, + "grad_norm": 1.4073580503463745, + "learning_rate": 1.2825554594614687e-06, + "loss": 0.2992, + "step": 38655 + }, + { + "epoch": 0.7738357981132548, + "grad_norm": 1.2801226377487183, + "learning_rate": 1.2823386704535462e-06, + "loss": 0.3031, + "step": 38656 + }, + { + "epoch": 0.773855816630383, + "grad_norm": 1.0580298900604248, + "learning_rate": 1.282121897073873e-06, + "loss": 0.3215, + "step": 38657 + }, + { + "epoch": 0.7738758351475115, + "grad_norm": 1.3207244873046875, + "learning_rate": 1.2819051393233596e-06, + "loss": 0.3155, + "step": 38658 + }, + { + "epoch": 0.7738958536646398, + "grad_norm": 1.0991337299346924, + "learning_rate": 1.2816883972029166e-06, + "loss": 0.274, + "step": 38659 + }, + { + "epoch": 0.7739158721817682, + "grad_norm": 1.164220929145813, + "learning_rate": 1.2814716707134555e-06, + "loss": 0.2491, + "step": 38660 + }, + { + "epoch": 0.7739358906988965, + "grad_norm": 1.2434464693069458, + "learning_rate": 1.281254959855886e-06, + "loss": 0.3113, + "step": 38661 + }, + { + "epoch": 0.7739559092160249, + "grad_norm": 1.09331476688385, + "learning_rate": 1.2810382646311215e-06, + "loss": 0.2692, + "step": 38662 + }, + { + "epoch": 0.7739759277331532, + "grad_norm": 1.0941309928894043, + "learning_rate": 1.2808215850400718e-06, + "loss": 0.3107, + "step": 38663 + }, + { + "epoch": 0.7739959462502816, + "grad_norm": 1.9076039791107178, + "learning_rate": 1.280604921083648e-06, + "loss": 0.745, + "step": 38664 + }, + { + "epoch": 0.7740159647674099, + "grad_norm": 1.0915802717208862, + "learning_rate": 1.2803882727627592e-06, + "loss": 0.3336, + "step": 38665 + }, + { + "epoch": 0.7740359832845382, + "grad_norm": 1.1834181547164917, + "learning_rate": 1.2801716400783187e-06, + "loss": 0.2965, + "step": 38666 + }, + { + "epoch": 0.7740560018016666, + "grad_norm": 1.1386827230453491, + "learning_rate": 1.279955023031237e-06, + "loss": 0.2224, + "step": 38667 + }, + { + "epoch": 0.7740760203187949, + "grad_norm": 1.1033697128295898, + "learning_rate": 1.2797384216224211e-06, + "loss": 0.2938, + "step": 38668 + }, + { + "epoch": 0.7740960388359233, + "grad_norm": 1.0460830926895142, + "learning_rate": 1.2795218358527867e-06, + "loss": 0.2596, + "step": 38669 + }, + { + "epoch": 0.7741160573530516, + "grad_norm": 2.018437623977661, + "learning_rate": 1.2793052657232403e-06, + "loss": 0.7373, + "step": 38670 + }, + { + "epoch": 0.77413607587018, + "grad_norm": 1.1585975885391235, + "learning_rate": 1.279088711234695e-06, + "loss": 0.307, + "step": 38671 + }, + { + "epoch": 0.7741560943873083, + "grad_norm": 1.1203936338424683, + "learning_rate": 1.2788721723880599e-06, + "loss": 0.3549, + "step": 38672 + }, + { + "epoch": 0.7741761129044366, + "grad_norm": 1.149956464767456, + "learning_rate": 1.2786556491842456e-06, + "loss": 0.2777, + "step": 38673 + }, + { + "epoch": 0.774196131421565, + "grad_norm": 1.2040244340896606, + "learning_rate": 1.278439141624161e-06, + "loss": 0.3507, + "step": 38674 + }, + { + "epoch": 0.7742161499386933, + "grad_norm": 1.7954202890396118, + "learning_rate": 1.2782226497087186e-06, + "loss": 0.7638, + "step": 38675 + }, + { + "epoch": 0.7742361684558217, + "grad_norm": 1.215712308883667, + "learning_rate": 1.2780061734388272e-06, + "loss": 0.3231, + "step": 38676 + }, + { + "epoch": 0.77425618697295, + "grad_norm": 1.2867984771728516, + "learning_rate": 1.2777897128153965e-06, + "loss": 0.3193, + "step": 38677 + }, + { + "epoch": 0.7742762054900784, + "grad_norm": 1.176547884941101, + "learning_rate": 1.2775732678393355e-06, + "loss": 0.3302, + "step": 38678 + }, + { + "epoch": 0.7742962240072067, + "grad_norm": 1.076117992401123, + "learning_rate": 1.277356838511557e-06, + "loss": 0.3015, + "step": 38679 + }, + { + "epoch": 0.7743162425243351, + "grad_norm": 1.189673662185669, + "learning_rate": 1.2771404248329694e-06, + "loss": 0.317, + "step": 38680 + }, + { + "epoch": 0.7743362610414634, + "grad_norm": 1.1199877262115479, + "learning_rate": 1.2769240268044809e-06, + "loss": 0.308, + "step": 38681 + }, + { + "epoch": 0.7743562795585917, + "grad_norm": 1.922632098197937, + "learning_rate": 1.2767076444270037e-06, + "loss": 0.7711, + "step": 38682 + }, + { + "epoch": 0.7743762980757201, + "grad_norm": 1.2156785726547241, + "learning_rate": 1.276491277701445e-06, + "loss": 0.3244, + "step": 38683 + }, + { + "epoch": 0.7743963165928484, + "grad_norm": 1.2766942977905273, + "learning_rate": 1.2762749266287173e-06, + "loss": 0.2513, + "step": 38684 + }, + { + "epoch": 0.7744163351099768, + "grad_norm": 1.797873616218567, + "learning_rate": 1.2760585912097279e-06, + "loss": 0.7527, + "step": 38685 + }, + { + "epoch": 0.7744363536271051, + "grad_norm": 1.0937986373901367, + "learning_rate": 1.2758422714453871e-06, + "loss": 0.3167, + "step": 38686 + }, + { + "epoch": 0.7744563721442335, + "grad_norm": 2.0181236267089844, + "learning_rate": 1.2756259673366022e-06, + "loss": 0.7528, + "step": 38687 + }, + { + "epoch": 0.7744763906613618, + "grad_norm": 1.1477495431900024, + "learning_rate": 1.2754096788842862e-06, + "loss": 0.3259, + "step": 38688 + }, + { + "epoch": 0.77449640917849, + "grad_norm": 1.1318955421447754, + "learning_rate": 1.2751934060893462e-06, + "loss": 0.3177, + "step": 38689 + }, + { + "epoch": 0.7745164276956185, + "grad_norm": 1.1290451288223267, + "learning_rate": 1.2749771489526918e-06, + "loss": 0.2984, + "step": 38690 + }, + { + "epoch": 0.7745364462127468, + "grad_norm": 1.207627534866333, + "learning_rate": 1.2747609074752315e-06, + "loss": 0.2816, + "step": 38691 + }, + { + "epoch": 0.7745564647298752, + "grad_norm": 1.0438802242279053, + "learning_rate": 1.2745446816578734e-06, + "loss": 0.2858, + "step": 38692 + }, + { + "epoch": 0.7745764832470035, + "grad_norm": 1.044823169708252, + "learning_rate": 1.2743284715015287e-06, + "loss": 0.2712, + "step": 38693 + }, + { + "epoch": 0.7745965017641319, + "grad_norm": 1.1087931394577026, + "learning_rate": 1.274112277007104e-06, + "loss": 0.3241, + "step": 38694 + }, + { + "epoch": 0.7746165202812602, + "grad_norm": 1.279229760169983, + "learning_rate": 1.2738960981755112e-06, + "loss": 0.3244, + "step": 38695 + }, + { + "epoch": 0.7746365387983886, + "grad_norm": 1.130239486694336, + "learning_rate": 1.2736799350076563e-06, + "loss": 0.2987, + "step": 38696 + }, + { + "epoch": 0.7746565573155169, + "grad_norm": 1.0503273010253906, + "learning_rate": 1.2734637875044498e-06, + "loss": 0.2656, + "step": 38697 + }, + { + "epoch": 0.7746765758326452, + "grad_norm": 1.1695963144302368, + "learning_rate": 1.2732476556667995e-06, + "loss": 0.348, + "step": 38698 + }, + { + "epoch": 0.7746965943497736, + "grad_norm": 1.0866587162017822, + "learning_rate": 1.2730315394956145e-06, + "loss": 0.2913, + "step": 38699 + }, + { + "epoch": 0.7747166128669019, + "grad_norm": 1.030852198600769, + "learning_rate": 1.272815438991803e-06, + "loss": 0.3022, + "step": 38700 + }, + { + "epoch": 0.7747366313840303, + "grad_norm": 1.2204513549804688, + "learning_rate": 1.2725993541562715e-06, + "loss": 0.3393, + "step": 38701 + }, + { + "epoch": 0.7747566499011586, + "grad_norm": 1.2095731496810913, + "learning_rate": 1.2723832849899314e-06, + "loss": 0.3016, + "step": 38702 + }, + { + "epoch": 0.774776668418287, + "grad_norm": 1.1342617273330688, + "learning_rate": 1.27216723149369e-06, + "loss": 0.3082, + "step": 38703 + }, + { + "epoch": 0.7747966869354153, + "grad_norm": 1.220366358757019, + "learning_rate": 1.2719511936684553e-06, + "loss": 0.2439, + "step": 38704 + }, + { + "epoch": 0.7748167054525436, + "grad_norm": 1.2197214365005493, + "learning_rate": 1.2717351715151343e-06, + "loss": 0.2744, + "step": 38705 + }, + { + "epoch": 0.774836723969672, + "grad_norm": 1.0813922882080078, + "learning_rate": 1.2715191650346375e-06, + "loss": 0.2755, + "step": 38706 + }, + { + "epoch": 0.7748567424868003, + "grad_norm": 1.1414794921875, + "learning_rate": 1.2713031742278704e-06, + "loss": 0.3144, + "step": 38707 + }, + { + "epoch": 0.7748767610039287, + "grad_norm": 1.1163300275802612, + "learning_rate": 1.2710871990957436e-06, + "loss": 0.2549, + "step": 38708 + }, + { + "epoch": 0.774896779521057, + "grad_norm": 1.2819623947143555, + "learning_rate": 1.2708712396391625e-06, + "loss": 0.302, + "step": 38709 + }, + { + "epoch": 0.7749167980381854, + "grad_norm": 1.1181308031082153, + "learning_rate": 1.2706552958590374e-06, + "loss": 0.2862, + "step": 38710 + }, + { + "epoch": 0.7749368165553137, + "grad_norm": 1.1346979141235352, + "learning_rate": 1.2704393677562748e-06, + "loss": 0.3166, + "step": 38711 + }, + { + "epoch": 0.7749568350724421, + "grad_norm": 0.9950791597366333, + "learning_rate": 1.2702234553317832e-06, + "loss": 0.2635, + "step": 38712 + }, + { + "epoch": 0.7749768535895704, + "grad_norm": 1.1555955410003662, + "learning_rate": 1.2700075585864691e-06, + "loss": 0.316, + "step": 38713 + }, + { + "epoch": 0.7749968721066987, + "grad_norm": 1.259652018547058, + "learning_rate": 1.2697916775212388e-06, + "loss": 0.272, + "step": 38714 + }, + { + "epoch": 0.7750168906238271, + "grad_norm": 1.8928223848342896, + "learning_rate": 1.2695758121370034e-06, + "loss": 0.7302, + "step": 38715 + }, + { + "epoch": 0.7750369091409554, + "grad_norm": 1.1530983448028564, + "learning_rate": 1.269359962434668e-06, + "loss": 0.2877, + "step": 38716 + }, + { + "epoch": 0.7750569276580838, + "grad_norm": 1.279140591621399, + "learning_rate": 1.2691441284151413e-06, + "loss": 0.2723, + "step": 38717 + }, + { + "epoch": 0.7750769461752121, + "grad_norm": 1.1198054552078247, + "learning_rate": 1.2689283100793276e-06, + "loss": 0.2606, + "step": 38718 + }, + { + "epoch": 0.7750969646923405, + "grad_norm": 1.1196776628494263, + "learning_rate": 1.268712507428138e-06, + "loss": 0.2846, + "step": 38719 + }, + { + "epoch": 0.7751169832094688, + "grad_norm": 1.0944029092788696, + "learning_rate": 1.2684967204624766e-06, + "loss": 0.2897, + "step": 38720 + }, + { + "epoch": 0.775137001726597, + "grad_norm": 1.0886303186416626, + "learning_rate": 1.2682809491832533e-06, + "loss": 0.2889, + "step": 38721 + }, + { + "epoch": 0.7751570202437255, + "grad_norm": 1.1813199520111084, + "learning_rate": 1.268065193591374e-06, + "loss": 0.3339, + "step": 38722 + }, + { + "epoch": 0.7751770387608538, + "grad_norm": 1.1747865676879883, + "learning_rate": 1.2678494536877438e-06, + "loss": 0.2988, + "step": 38723 + }, + { + "epoch": 0.7751970572779822, + "grad_norm": 1.1001683473587036, + "learning_rate": 1.2676337294732733e-06, + "loss": 0.2823, + "step": 38724 + }, + { + "epoch": 0.7752170757951105, + "grad_norm": 1.0432974100112915, + "learning_rate": 1.2674180209488667e-06, + "loss": 0.3017, + "step": 38725 + }, + { + "epoch": 0.7752370943122389, + "grad_norm": 1.2049787044525146, + "learning_rate": 1.2672023281154323e-06, + "loss": 0.3734, + "step": 38726 + }, + { + "epoch": 0.7752571128293672, + "grad_norm": 1.1340060234069824, + "learning_rate": 1.2669866509738738e-06, + "loss": 0.2756, + "step": 38727 + }, + { + "epoch": 0.7752771313464956, + "grad_norm": 1.028708815574646, + "learning_rate": 1.2667709895251018e-06, + "loss": 0.2838, + "step": 38728 + }, + { + "epoch": 0.7752971498636239, + "grad_norm": 1.03982412815094, + "learning_rate": 1.2665553437700211e-06, + "loss": 0.3028, + "step": 38729 + }, + { + "epoch": 0.7753171683807522, + "grad_norm": 1.1026140451431274, + "learning_rate": 1.2663397137095385e-06, + "loss": 0.3188, + "step": 38730 + }, + { + "epoch": 0.7753371868978806, + "grad_norm": 1.9924780130386353, + "learning_rate": 1.266124099344559e-06, + "loss": 0.7162, + "step": 38731 + }, + { + "epoch": 0.7753572054150089, + "grad_norm": 1.3474234342575073, + "learning_rate": 1.2659085006759913e-06, + "loss": 0.3221, + "step": 38732 + }, + { + "epoch": 0.7753772239321373, + "grad_norm": 0.9582845568656921, + "learning_rate": 1.2656929177047389e-06, + "loss": 0.2572, + "step": 38733 + }, + { + "epoch": 0.7753972424492656, + "grad_norm": 1.0703003406524658, + "learning_rate": 1.265477350431712e-06, + "loss": 0.2969, + "step": 38734 + }, + { + "epoch": 0.775417260966394, + "grad_norm": 1.0562858581542969, + "learning_rate": 1.2652617988578143e-06, + "loss": 0.283, + "step": 38735 + }, + { + "epoch": 0.7754372794835223, + "grad_norm": 1.0949851274490356, + "learning_rate": 1.265046262983951e-06, + "loss": 0.2698, + "step": 38736 + }, + { + "epoch": 0.7754572980006506, + "grad_norm": 1.295478343963623, + "learning_rate": 1.2648307428110307e-06, + "loss": 0.343, + "step": 38737 + }, + { + "epoch": 0.775477316517779, + "grad_norm": 1.2706776857376099, + "learning_rate": 1.2646152383399579e-06, + "loss": 0.2899, + "step": 38738 + }, + { + "epoch": 0.7754973350349073, + "grad_norm": 1.3634754419326782, + "learning_rate": 1.2643997495716392e-06, + "loss": 0.2788, + "step": 38739 + }, + { + "epoch": 0.7755173535520357, + "grad_norm": 1.135340929031372, + "learning_rate": 1.2641842765069785e-06, + "loss": 0.2844, + "step": 38740 + }, + { + "epoch": 0.775537372069164, + "grad_norm": 1.0939658880233765, + "learning_rate": 1.2639688191468846e-06, + "loss": 0.2792, + "step": 38741 + }, + { + "epoch": 0.7755573905862924, + "grad_norm": 1.0487818717956543, + "learning_rate": 1.2637533774922617e-06, + "loss": 0.2951, + "step": 38742 + }, + { + "epoch": 0.7755774091034207, + "grad_norm": 1.2731239795684814, + "learning_rate": 1.2635379515440156e-06, + "loss": 0.3118, + "step": 38743 + }, + { + "epoch": 0.7755974276205491, + "grad_norm": 1.080722451210022, + "learning_rate": 1.2633225413030504e-06, + "loss": 0.3102, + "step": 38744 + }, + { + "epoch": 0.7756174461376774, + "grad_norm": 1.1238216161727905, + "learning_rate": 1.2631071467702743e-06, + "loss": 0.3203, + "step": 38745 + }, + { + "epoch": 0.7756374646548057, + "grad_norm": 1.2065438032150269, + "learning_rate": 1.2628917679465901e-06, + "loss": 0.3302, + "step": 38746 + }, + { + "epoch": 0.7756574831719341, + "grad_norm": 1.0305436849594116, + "learning_rate": 1.2626764048329065e-06, + "loss": 0.3196, + "step": 38747 + }, + { + "epoch": 0.7756775016890624, + "grad_norm": 1.198499083518982, + "learning_rate": 1.2624610574301272e-06, + "loss": 0.3131, + "step": 38748 + }, + { + "epoch": 0.7756975202061908, + "grad_norm": 1.053407907485962, + "learning_rate": 1.2622457257391552e-06, + "loss": 0.2511, + "step": 38749 + }, + { + "epoch": 0.7757175387233191, + "grad_norm": 1.248410940170288, + "learning_rate": 1.2620304097609e-06, + "loss": 0.2829, + "step": 38750 + }, + { + "epoch": 0.7757375572404475, + "grad_norm": 1.0394017696380615, + "learning_rate": 1.261815109496264e-06, + "loss": 0.2715, + "step": 38751 + }, + { + "epoch": 0.7757575757575758, + "grad_norm": 1.1468359231948853, + "learning_rate": 1.2615998249461524e-06, + "loss": 0.3539, + "step": 38752 + }, + { + "epoch": 0.775777594274704, + "grad_norm": 0.9983379244804382, + "learning_rate": 1.2613845561114712e-06, + "loss": 0.2886, + "step": 38753 + }, + { + "epoch": 0.7757976127918325, + "grad_norm": 1.039016604423523, + "learning_rate": 1.2611693029931238e-06, + "loss": 0.2772, + "step": 38754 + }, + { + "epoch": 0.7758176313089608, + "grad_norm": 1.3963489532470703, + "learning_rate": 1.2609540655920165e-06, + "loss": 0.3222, + "step": 38755 + }, + { + "epoch": 0.7758376498260892, + "grad_norm": 1.0821106433868408, + "learning_rate": 1.2607388439090546e-06, + "loss": 0.3042, + "step": 38756 + }, + { + "epoch": 0.7758576683432175, + "grad_norm": 1.0194189548492432, + "learning_rate": 1.2605236379451396e-06, + "loss": 0.3123, + "step": 38757 + }, + { + "epoch": 0.7758776868603459, + "grad_norm": 1.0992772579193115, + "learning_rate": 1.26030844770118e-06, + "loss": 0.2732, + "step": 38758 + }, + { + "epoch": 0.7758977053774742, + "grad_norm": 1.173714518547058, + "learning_rate": 1.260093273178078e-06, + "loss": 0.2661, + "step": 38759 + }, + { + "epoch": 0.7759177238946026, + "grad_norm": 1.1179935932159424, + "learning_rate": 1.2598781143767402e-06, + "loss": 0.3105, + "step": 38760 + }, + { + "epoch": 0.7759377424117309, + "grad_norm": 1.0851598978042603, + "learning_rate": 1.25966297129807e-06, + "loss": 0.2853, + "step": 38761 + }, + { + "epoch": 0.7759577609288592, + "grad_norm": 1.400087594985962, + "learning_rate": 1.2594478439429702e-06, + "loss": 0.2799, + "step": 38762 + }, + { + "epoch": 0.7759777794459876, + "grad_norm": 1.0672550201416016, + "learning_rate": 1.2592327323123482e-06, + "loss": 0.3248, + "step": 38763 + }, + { + "epoch": 0.7759977979631159, + "grad_norm": 1.2088871002197266, + "learning_rate": 1.2590176364071066e-06, + "loss": 0.2717, + "step": 38764 + }, + { + "epoch": 0.7760178164802443, + "grad_norm": 1.0983009338378906, + "learning_rate": 1.2588025562281503e-06, + "loss": 0.3134, + "step": 38765 + }, + { + "epoch": 0.7760378349973726, + "grad_norm": 1.0941834449768066, + "learning_rate": 1.2585874917763823e-06, + "loss": 0.3024, + "step": 38766 + }, + { + "epoch": 0.776057853514501, + "grad_norm": 1.1503933668136597, + "learning_rate": 1.2583724430527062e-06, + "loss": 0.2887, + "step": 38767 + }, + { + "epoch": 0.7760778720316293, + "grad_norm": 1.1854133605957031, + "learning_rate": 1.2581574100580285e-06, + "loss": 0.3252, + "step": 38768 + }, + { + "epoch": 0.7760978905487576, + "grad_norm": 1.226332187652588, + "learning_rate": 1.2579423927932522e-06, + "loss": 0.3373, + "step": 38769 + }, + { + "epoch": 0.776117909065886, + "grad_norm": 1.2174910306930542, + "learning_rate": 1.2577273912592785e-06, + "loss": 0.2892, + "step": 38770 + }, + { + "epoch": 0.7761379275830143, + "grad_norm": 1.2190589904785156, + "learning_rate": 1.257512405457016e-06, + "loss": 0.319, + "step": 38771 + }, + { + "epoch": 0.7761579461001427, + "grad_norm": 1.6957857608795166, + "learning_rate": 1.2572974353873635e-06, + "loss": 0.3155, + "step": 38772 + }, + { + "epoch": 0.776177964617271, + "grad_norm": 1.0342333316802979, + "learning_rate": 1.2570824810512288e-06, + "loss": 0.2866, + "step": 38773 + }, + { + "epoch": 0.7761979831343994, + "grad_norm": 1.0805143117904663, + "learning_rate": 1.2568675424495142e-06, + "loss": 0.348, + "step": 38774 + }, + { + "epoch": 0.7762180016515277, + "grad_norm": 1.1951138973236084, + "learning_rate": 1.256652619583123e-06, + "loss": 0.2575, + "step": 38775 + }, + { + "epoch": 0.7762380201686561, + "grad_norm": 1.278131127357483, + "learning_rate": 1.256437712452957e-06, + "loss": 0.3345, + "step": 38776 + }, + { + "epoch": 0.7762580386857844, + "grad_norm": 1.1213892698287964, + "learning_rate": 1.2562228210599225e-06, + "loss": 0.2976, + "step": 38777 + }, + { + "epoch": 0.7762780572029127, + "grad_norm": 1.884478211402893, + "learning_rate": 1.2560079454049218e-06, + "loss": 0.7929, + "step": 38778 + }, + { + "epoch": 0.7762980757200411, + "grad_norm": 1.0223886966705322, + "learning_rate": 1.2557930854888578e-06, + "loss": 0.3264, + "step": 38779 + }, + { + "epoch": 0.7763180942371694, + "grad_norm": 1.1071611642837524, + "learning_rate": 1.2555782413126327e-06, + "loss": 0.3169, + "step": 38780 + }, + { + "epoch": 0.7763381127542978, + "grad_norm": 1.9872900247573853, + "learning_rate": 1.2553634128771525e-06, + "loss": 0.7546, + "step": 38781 + }, + { + "epoch": 0.776358131271426, + "grad_norm": 1.0752995014190674, + "learning_rate": 1.255148600183318e-06, + "loss": 0.3333, + "step": 38782 + }, + { + "epoch": 0.7763781497885545, + "grad_norm": 1.0588386058807373, + "learning_rate": 1.254933803232032e-06, + "loss": 0.2685, + "step": 38783 + }, + { + "epoch": 0.7763981683056828, + "grad_norm": 1.1635148525238037, + "learning_rate": 1.2547190220241995e-06, + "loss": 0.3135, + "step": 38784 + }, + { + "epoch": 0.776418186822811, + "grad_norm": 1.1138184070587158, + "learning_rate": 1.2545042565607208e-06, + "loss": 0.2638, + "step": 38785 + }, + { + "epoch": 0.7764382053399395, + "grad_norm": 1.2480093240737915, + "learning_rate": 1.2542895068425016e-06, + "loss": 0.2404, + "step": 38786 + }, + { + "epoch": 0.7764582238570678, + "grad_norm": 1.0891070365905762, + "learning_rate": 1.2540747728704434e-06, + "loss": 0.2934, + "step": 38787 + }, + { + "epoch": 0.7764782423741962, + "grad_norm": 1.1001747846603394, + "learning_rate": 1.2538600546454488e-06, + "loss": 0.2899, + "step": 38788 + }, + { + "epoch": 0.7764982608913245, + "grad_norm": 1.3317351341247559, + "learning_rate": 1.2536453521684182e-06, + "loss": 0.3538, + "step": 38789 + }, + { + "epoch": 0.7765182794084529, + "grad_norm": 1.4271870851516724, + "learning_rate": 1.2534306654402584e-06, + "loss": 0.3291, + "step": 38790 + }, + { + "epoch": 0.7765382979255812, + "grad_norm": 1.040372610092163, + "learning_rate": 1.2532159944618698e-06, + "loss": 0.2911, + "step": 38791 + }, + { + "epoch": 0.7765583164427095, + "grad_norm": 1.0529918670654297, + "learning_rate": 1.253001339234155e-06, + "loss": 0.3017, + "step": 38792 + }, + { + "epoch": 0.7765783349598379, + "grad_norm": 1.026046633720398, + "learning_rate": 1.2527866997580147e-06, + "loss": 0.2694, + "step": 38793 + }, + { + "epoch": 0.7765983534769662, + "grad_norm": 1.0843170881271362, + "learning_rate": 1.2525720760343536e-06, + "loss": 0.2931, + "step": 38794 + }, + { + "epoch": 0.7766183719940946, + "grad_norm": 1.0061566829681396, + "learning_rate": 1.2523574680640731e-06, + "loss": 0.254, + "step": 38795 + }, + { + "epoch": 0.7766383905112229, + "grad_norm": 1.216411828994751, + "learning_rate": 1.2521428758480746e-06, + "loss": 0.32, + "step": 38796 + }, + { + "epoch": 0.7766584090283513, + "grad_norm": 1.1241625547409058, + "learning_rate": 1.2519282993872616e-06, + "loss": 0.3045, + "step": 38797 + }, + { + "epoch": 0.7766784275454796, + "grad_norm": 1.096635103225708, + "learning_rate": 1.2517137386825346e-06, + "loss": 0.2715, + "step": 38798 + }, + { + "epoch": 0.776698446062608, + "grad_norm": 0.9867520332336426, + "learning_rate": 1.2514991937347976e-06, + "loss": 0.2498, + "step": 38799 + }, + { + "epoch": 0.7767184645797363, + "grad_norm": 0.9960129261016846, + "learning_rate": 1.2512846645449512e-06, + "loss": 0.2453, + "step": 38800 + }, + { + "epoch": 0.7767384830968646, + "grad_norm": 1.1364961862564087, + "learning_rate": 1.2510701511138974e-06, + "loss": 0.3243, + "step": 38801 + }, + { + "epoch": 0.776758501613993, + "grad_norm": 1.131470799446106, + "learning_rate": 1.2508556534425365e-06, + "loss": 0.2865, + "step": 38802 + }, + { + "epoch": 0.7767785201311213, + "grad_norm": 1.2470980882644653, + "learning_rate": 1.2506411715317729e-06, + "loss": 0.3108, + "step": 38803 + }, + { + "epoch": 0.7767985386482497, + "grad_norm": 1.0216073989868164, + "learning_rate": 1.250426705382507e-06, + "loss": 0.3094, + "step": 38804 + }, + { + "epoch": 0.776818557165378, + "grad_norm": 1.104041576385498, + "learning_rate": 1.2502122549956403e-06, + "loss": 0.3031, + "step": 38805 + }, + { + "epoch": 0.7768385756825064, + "grad_norm": 1.1684566736221313, + "learning_rate": 1.249997820372074e-06, + "loss": 0.28, + "step": 38806 + }, + { + "epoch": 0.7768585941996347, + "grad_norm": 1.1002508401870728, + "learning_rate": 1.2497834015127092e-06, + "loss": 0.3473, + "step": 38807 + }, + { + "epoch": 0.776878612716763, + "grad_norm": 1.1092206239700317, + "learning_rate": 1.249568998418449e-06, + "loss": 0.2751, + "step": 38808 + }, + { + "epoch": 0.7768986312338914, + "grad_norm": 1.0789533853530884, + "learning_rate": 1.2493546110901921e-06, + "loss": 0.2693, + "step": 38809 + }, + { + "epoch": 0.7769186497510197, + "grad_norm": 1.0867223739624023, + "learning_rate": 1.2491402395288427e-06, + "loss": 0.3318, + "step": 38810 + }, + { + "epoch": 0.7769386682681481, + "grad_norm": 0.9617967009544373, + "learning_rate": 1.2489258837352991e-06, + "loss": 0.2522, + "step": 38811 + }, + { + "epoch": 0.7769586867852764, + "grad_norm": 1.1680322885513306, + "learning_rate": 1.2487115437104652e-06, + "loss": 0.2671, + "step": 38812 + }, + { + "epoch": 0.7769787053024048, + "grad_norm": 1.0462331771850586, + "learning_rate": 1.2484972194552408e-06, + "loss": 0.2778, + "step": 38813 + }, + { + "epoch": 0.776998723819533, + "grad_norm": 1.1340054273605347, + "learning_rate": 1.248282910970527e-06, + "loss": 0.3091, + "step": 38814 + }, + { + "epoch": 0.7770187423366615, + "grad_norm": 1.1070711612701416, + "learning_rate": 1.2480686182572243e-06, + "loss": 0.2868, + "step": 38815 + }, + { + "epoch": 0.7770387608537898, + "grad_norm": 1.097350001335144, + "learning_rate": 1.247854341316232e-06, + "loss": 0.2953, + "step": 38816 + }, + { + "epoch": 0.777058779370918, + "grad_norm": 1.0903981924057007, + "learning_rate": 1.247640080148454e-06, + "loss": 0.2345, + "step": 38817 + }, + { + "epoch": 0.7770787978880465, + "grad_norm": 1.1091409921646118, + "learning_rate": 1.2474258347547902e-06, + "loss": 0.3118, + "step": 38818 + }, + { + "epoch": 0.7770988164051748, + "grad_norm": 1.124820590019226, + "learning_rate": 1.24721160513614e-06, + "loss": 0.297, + "step": 38819 + }, + { + "epoch": 0.7771188349223032, + "grad_norm": 1.3488373756408691, + "learning_rate": 1.2469973912934036e-06, + "loss": 0.2901, + "step": 38820 + }, + { + "epoch": 0.7771388534394315, + "grad_norm": 1.0930333137512207, + "learning_rate": 1.2467831932274838e-06, + "loss": 0.3295, + "step": 38821 + }, + { + "epoch": 0.7771588719565599, + "grad_norm": 1.456789493560791, + "learning_rate": 1.246569010939278e-06, + "loss": 0.2904, + "step": 38822 + }, + { + "epoch": 0.7771788904736882, + "grad_norm": 1.1372623443603516, + "learning_rate": 1.2463548444296902e-06, + "loss": 0.2989, + "step": 38823 + }, + { + "epoch": 0.7771989089908165, + "grad_norm": 1.0908946990966797, + "learning_rate": 1.2461406936996174e-06, + "loss": 0.3271, + "step": 38824 + }, + { + "epoch": 0.7772189275079449, + "grad_norm": 2.017461061477661, + "learning_rate": 1.2459265587499625e-06, + "loss": 0.7446, + "step": 38825 + }, + { + "epoch": 0.7772389460250732, + "grad_norm": 1.084244966506958, + "learning_rate": 1.2457124395816244e-06, + "loss": 0.2838, + "step": 38826 + }, + { + "epoch": 0.7772589645422016, + "grad_norm": 1.0728485584259033, + "learning_rate": 1.2454983361955031e-06, + "loss": 0.303, + "step": 38827 + }, + { + "epoch": 0.7772789830593299, + "grad_norm": 1.0587542057037354, + "learning_rate": 1.2452842485924993e-06, + "loss": 0.2805, + "step": 38828 + }, + { + "epoch": 0.7772990015764583, + "grad_norm": 1.080230474472046, + "learning_rate": 1.2450701767735107e-06, + "loss": 0.2607, + "step": 38829 + }, + { + "epoch": 0.7773190200935866, + "grad_norm": 1.1008152961730957, + "learning_rate": 1.2448561207394405e-06, + "loss": 0.3146, + "step": 38830 + }, + { + "epoch": 0.777339038610715, + "grad_norm": 1.8265101909637451, + "learning_rate": 1.2446420804911873e-06, + "loss": 0.7329, + "step": 38831 + }, + { + "epoch": 0.7773590571278433, + "grad_norm": 1.0565851926803589, + "learning_rate": 1.2444280560296506e-06, + "loss": 0.3349, + "step": 38832 + }, + { + "epoch": 0.7773790756449716, + "grad_norm": 1.3085321187973022, + "learning_rate": 1.2442140473557285e-06, + "loss": 0.2899, + "step": 38833 + }, + { + "epoch": 0.7773990941621, + "grad_norm": 1.0186327695846558, + "learning_rate": 1.244000054470324e-06, + "loss": 0.2518, + "step": 38834 + }, + { + "epoch": 0.7774191126792283, + "grad_norm": 1.0997382402420044, + "learning_rate": 1.2437860773743331e-06, + "loss": 0.3085, + "step": 38835 + }, + { + "epoch": 0.7774391311963567, + "grad_norm": 1.124350666999817, + "learning_rate": 1.2435721160686593e-06, + "loss": 0.3006, + "step": 38836 + }, + { + "epoch": 0.777459149713485, + "grad_norm": 1.1315133571624756, + "learning_rate": 1.2433581705541992e-06, + "loss": 0.2877, + "step": 38837 + }, + { + "epoch": 0.7774791682306134, + "grad_norm": 1.1723536252975464, + "learning_rate": 1.2431442408318518e-06, + "loss": 0.3215, + "step": 38838 + }, + { + "epoch": 0.7774991867477417, + "grad_norm": 1.296862006187439, + "learning_rate": 1.242930326902519e-06, + "loss": 0.3004, + "step": 38839 + }, + { + "epoch": 0.77751920526487, + "grad_norm": 1.2860617637634277, + "learning_rate": 1.2427164287670985e-06, + "loss": 0.3198, + "step": 38840 + }, + { + "epoch": 0.7775392237819984, + "grad_norm": 1.0729748010635376, + "learning_rate": 1.2425025464264894e-06, + "loss": 0.2828, + "step": 38841 + }, + { + "epoch": 0.7775592422991267, + "grad_norm": 1.0464810132980347, + "learning_rate": 1.2422886798815898e-06, + "loss": 0.299, + "step": 38842 + }, + { + "epoch": 0.7775792608162551, + "grad_norm": 1.9585916996002197, + "learning_rate": 1.2420748291333012e-06, + "loss": 0.7095, + "step": 38843 + }, + { + "epoch": 0.7775992793333834, + "grad_norm": 0.9207661747932434, + "learning_rate": 1.2418609941825215e-06, + "loss": 0.2386, + "step": 38844 + }, + { + "epoch": 0.7776192978505118, + "grad_norm": 1.0999975204467773, + "learning_rate": 1.2416471750301495e-06, + "loss": 0.2845, + "step": 38845 + }, + { + "epoch": 0.77763931636764, + "grad_norm": 1.1529731750488281, + "learning_rate": 1.241433371677082e-06, + "loss": 0.3084, + "step": 38846 + }, + { + "epoch": 0.7776593348847685, + "grad_norm": 1.056707501411438, + "learning_rate": 1.241219584124222e-06, + "loss": 0.3032, + "step": 38847 + }, + { + "epoch": 0.7776793534018968, + "grad_norm": 1.053373098373413, + "learning_rate": 1.2410058123724639e-06, + "loss": 0.2581, + "step": 38848 + }, + { + "epoch": 0.777699371919025, + "grad_norm": 1.1960636377334595, + "learning_rate": 1.24079205642271e-06, + "loss": 0.3046, + "step": 38849 + }, + { + "epoch": 0.7777193904361535, + "grad_norm": 1.4910377264022827, + "learning_rate": 1.2405783162758572e-06, + "loss": 0.3087, + "step": 38850 + }, + { + "epoch": 0.7777394089532818, + "grad_norm": 1.1523394584655762, + "learning_rate": 1.2403645919328029e-06, + "loss": 0.2841, + "step": 38851 + }, + { + "epoch": 0.7777594274704102, + "grad_norm": 1.1413962841033936, + "learning_rate": 1.240150883394448e-06, + "loss": 0.3322, + "step": 38852 + }, + { + "epoch": 0.7777794459875385, + "grad_norm": 1.0880314111709595, + "learning_rate": 1.2399371906616896e-06, + "loss": 0.3402, + "step": 38853 + }, + { + "epoch": 0.7777994645046669, + "grad_norm": 1.0398097038269043, + "learning_rate": 1.2397235137354262e-06, + "loss": 0.2629, + "step": 38854 + }, + { + "epoch": 0.7778194830217952, + "grad_norm": 1.2354248762130737, + "learning_rate": 1.239509852616555e-06, + "loss": 0.3211, + "step": 38855 + }, + { + "epoch": 0.7778395015389235, + "grad_norm": 1.3557500839233398, + "learning_rate": 1.239296207305976e-06, + "loss": 0.323, + "step": 38856 + }, + { + "epoch": 0.7778595200560519, + "grad_norm": 1.2408474683761597, + "learning_rate": 1.2390825778045861e-06, + "loss": 0.2776, + "step": 38857 + }, + { + "epoch": 0.7778795385731802, + "grad_norm": 1.0648273229599, + "learning_rate": 1.2388689641132845e-06, + "loss": 0.2718, + "step": 38858 + }, + { + "epoch": 0.7778995570903086, + "grad_norm": 1.101318120956421, + "learning_rate": 1.2386553662329664e-06, + "loss": 0.2678, + "step": 38859 + }, + { + "epoch": 0.7779195756074369, + "grad_norm": 1.144761562347412, + "learning_rate": 1.2384417841645336e-06, + "loss": 0.2792, + "step": 38860 + }, + { + "epoch": 0.7779395941245653, + "grad_norm": 1.1277449131011963, + "learning_rate": 1.23822821790888e-06, + "loss": 0.3008, + "step": 38861 + }, + { + "epoch": 0.7779596126416936, + "grad_norm": 1.0382461547851562, + "learning_rate": 1.238014667466907e-06, + "loss": 0.2934, + "step": 38862 + }, + { + "epoch": 0.777979631158822, + "grad_norm": 1.119564414024353, + "learning_rate": 1.2378011328395112e-06, + "loss": 0.319, + "step": 38863 + }, + { + "epoch": 0.7779996496759503, + "grad_norm": 1.1997966766357422, + "learning_rate": 1.2375876140275878e-06, + "loss": 0.3134, + "step": 38864 + }, + { + "epoch": 0.7780196681930786, + "grad_norm": 1.1001031398773193, + "learning_rate": 1.2373741110320382e-06, + "loss": 0.3164, + "step": 38865 + }, + { + "epoch": 0.778039686710207, + "grad_norm": 1.0434318780899048, + "learning_rate": 1.2371606238537582e-06, + "loss": 0.2765, + "step": 38866 + }, + { + "epoch": 0.7780597052273353, + "grad_norm": 1.0229055881500244, + "learning_rate": 1.2369471524936444e-06, + "loss": 0.2688, + "step": 38867 + }, + { + "epoch": 0.7780797237444637, + "grad_norm": 1.2715293169021606, + "learning_rate": 1.2367336969525957e-06, + "loss": 0.3389, + "step": 38868 + }, + { + "epoch": 0.778099742261592, + "grad_norm": 1.083477258682251, + "learning_rate": 1.236520257231507e-06, + "loss": 0.2956, + "step": 38869 + }, + { + "epoch": 0.7781197607787204, + "grad_norm": 1.2297077178955078, + "learning_rate": 1.2363068333312789e-06, + "loss": 0.2713, + "step": 38870 + }, + { + "epoch": 0.7781397792958487, + "grad_norm": 1.037395715713501, + "learning_rate": 1.2360934252528068e-06, + "loss": 0.3028, + "step": 38871 + }, + { + "epoch": 0.778159797812977, + "grad_norm": 1.0812633037567139, + "learning_rate": 1.235880032996986e-06, + "loss": 0.2288, + "step": 38872 + }, + { + "epoch": 0.7781798163301054, + "grad_norm": 1.1201268434524536, + "learning_rate": 1.2356666565647173e-06, + "loss": 0.3266, + "step": 38873 + }, + { + "epoch": 0.7781998348472337, + "grad_norm": 1.0934624671936035, + "learning_rate": 1.235453295956895e-06, + "loss": 0.2986, + "step": 38874 + }, + { + "epoch": 0.7782198533643621, + "grad_norm": 1.0745841264724731, + "learning_rate": 1.2352399511744179e-06, + "loss": 0.2866, + "step": 38875 + }, + { + "epoch": 0.7782398718814904, + "grad_norm": 1.097129225730896, + "learning_rate": 1.2350266222181818e-06, + "loss": 0.2823, + "step": 38876 + }, + { + "epoch": 0.7782598903986188, + "grad_norm": 0.9808147549629211, + "learning_rate": 1.2348133090890817e-06, + "loss": 0.3044, + "step": 38877 + }, + { + "epoch": 0.778279908915747, + "grad_norm": 1.1735553741455078, + "learning_rate": 1.234600011788018e-06, + "loss": 0.3077, + "step": 38878 + }, + { + "epoch": 0.7782999274328755, + "grad_norm": 1.026628851890564, + "learning_rate": 1.2343867303158858e-06, + "loss": 0.299, + "step": 38879 + }, + { + "epoch": 0.7783199459500038, + "grad_norm": 1.101406455039978, + "learning_rate": 1.234173464673581e-06, + "loss": 0.2914, + "step": 38880 + }, + { + "epoch": 0.778339964467132, + "grad_norm": 1.0247210264205933, + "learning_rate": 1.2339602148620011e-06, + "loss": 0.2492, + "step": 38881 + }, + { + "epoch": 0.7783599829842605, + "grad_norm": 1.062050461769104, + "learning_rate": 1.23374698088204e-06, + "loss": 0.3082, + "step": 38882 + }, + { + "epoch": 0.7783800015013888, + "grad_norm": 1.1257634162902832, + "learning_rate": 1.2335337627345978e-06, + "loss": 0.2975, + "step": 38883 + }, + { + "epoch": 0.7784000200185172, + "grad_norm": 1.0987827777862549, + "learning_rate": 1.233320560420569e-06, + "loss": 0.3196, + "step": 38884 + }, + { + "epoch": 0.7784200385356455, + "grad_norm": 1.0340479612350464, + "learning_rate": 1.2331073739408483e-06, + "loss": 0.2735, + "step": 38885 + }, + { + "epoch": 0.7784400570527739, + "grad_norm": 1.2527194023132324, + "learning_rate": 1.2328942032963355e-06, + "loss": 0.2604, + "step": 38886 + }, + { + "epoch": 0.7784600755699022, + "grad_norm": 1.0188919305801392, + "learning_rate": 1.232681048487923e-06, + "loss": 0.2426, + "step": 38887 + }, + { + "epoch": 0.7784800940870304, + "grad_norm": 1.2427500486373901, + "learning_rate": 1.23246790951651e-06, + "loss": 0.31, + "step": 38888 + }, + { + "epoch": 0.7785001126041589, + "grad_norm": 1.0708311796188354, + "learning_rate": 1.2322547863829914e-06, + "loss": 0.3024, + "step": 38889 + }, + { + "epoch": 0.7785201311212872, + "grad_norm": 1.1228992938995361, + "learning_rate": 1.2320416790882629e-06, + "loss": 0.2881, + "step": 38890 + }, + { + "epoch": 0.7785401496384156, + "grad_norm": 1.082175374031067, + "learning_rate": 1.2318285876332182e-06, + "loss": 0.2981, + "step": 38891 + }, + { + "epoch": 0.7785601681555439, + "grad_norm": 1.1310484409332275, + "learning_rate": 1.2316155120187573e-06, + "loss": 0.289, + "step": 38892 + }, + { + "epoch": 0.7785801866726723, + "grad_norm": 1.243439793586731, + "learning_rate": 1.2314024522457736e-06, + "loss": 0.3, + "step": 38893 + }, + { + "epoch": 0.7786002051898006, + "grad_norm": 1.1416985988616943, + "learning_rate": 1.2311894083151632e-06, + "loss": 0.3094, + "step": 38894 + }, + { + "epoch": 0.778620223706929, + "grad_norm": 1.0714341402053833, + "learning_rate": 1.2309763802278197e-06, + "loss": 0.2803, + "step": 38895 + }, + { + "epoch": 0.7786402422240573, + "grad_norm": 1.1183146238327026, + "learning_rate": 1.230763367984642e-06, + "loss": 0.3135, + "step": 38896 + }, + { + "epoch": 0.7786602607411856, + "grad_norm": 1.2121549844741821, + "learning_rate": 1.2305503715865241e-06, + "loss": 0.2879, + "step": 38897 + }, + { + "epoch": 0.778680279258314, + "grad_norm": 1.151307225227356, + "learning_rate": 1.2303373910343596e-06, + "loss": 0.317, + "step": 38898 + }, + { + "epoch": 0.7787002977754423, + "grad_norm": 1.0605332851409912, + "learning_rate": 1.2301244263290469e-06, + "loss": 0.2614, + "step": 38899 + }, + { + "epoch": 0.7787203162925707, + "grad_norm": 1.2010024785995483, + "learning_rate": 1.2299114774714781e-06, + "loss": 0.2882, + "step": 38900 + }, + { + "epoch": 0.778740334809699, + "grad_norm": 1.726081371307373, + "learning_rate": 1.229698544462552e-06, + "loss": 0.7495, + "step": 38901 + }, + { + "epoch": 0.7787603533268274, + "grad_norm": 1.0679028034210205, + "learning_rate": 1.2294856273031619e-06, + "loss": 0.306, + "step": 38902 + }, + { + "epoch": 0.7787803718439557, + "grad_norm": 1.099468469619751, + "learning_rate": 1.2292727259942027e-06, + "loss": 0.2924, + "step": 38903 + }, + { + "epoch": 0.778800390361084, + "grad_norm": 1.0195324420928955, + "learning_rate": 1.2290598405365679e-06, + "loss": 0.2931, + "step": 38904 + }, + { + "epoch": 0.7788204088782124, + "grad_norm": 1.3090320825576782, + "learning_rate": 1.2288469709311557e-06, + "loss": 0.2821, + "step": 38905 + }, + { + "epoch": 0.7788404273953407, + "grad_norm": 1.1898632049560547, + "learning_rate": 1.228634117178859e-06, + "loss": 0.2932, + "step": 38906 + }, + { + "epoch": 0.7788604459124691, + "grad_norm": 1.131888508796692, + "learning_rate": 1.2284212792805729e-06, + "loss": 0.2853, + "step": 38907 + }, + { + "epoch": 0.7788804644295974, + "grad_norm": 1.2213212251663208, + "learning_rate": 1.228208457237191e-06, + "loss": 0.3312, + "step": 38908 + }, + { + "epoch": 0.7789004829467258, + "grad_norm": 1.9252195358276367, + "learning_rate": 1.22799565104961e-06, + "loss": 0.7367, + "step": 38909 + }, + { + "epoch": 0.778920501463854, + "grad_norm": 1.0643131732940674, + "learning_rate": 1.2277828607187238e-06, + "loss": 0.2702, + "step": 38910 + }, + { + "epoch": 0.7789405199809825, + "grad_norm": 1.105765700340271, + "learning_rate": 1.2275700862454255e-06, + "loss": 0.3121, + "step": 38911 + }, + { + "epoch": 0.7789605384981108, + "grad_norm": 1.061416506767273, + "learning_rate": 1.2273573276306116e-06, + "loss": 0.2639, + "step": 38912 + }, + { + "epoch": 0.778980557015239, + "grad_norm": 1.040968894958496, + "learning_rate": 1.2271445848751746e-06, + "loss": 0.2853, + "step": 38913 + }, + { + "epoch": 0.7790005755323675, + "grad_norm": 1.0596054792404175, + "learning_rate": 1.226931857980011e-06, + "loss": 0.295, + "step": 38914 + }, + { + "epoch": 0.7790205940494958, + "grad_norm": 1.906201958656311, + "learning_rate": 1.2267191469460139e-06, + "loss": 0.7397, + "step": 38915 + }, + { + "epoch": 0.7790406125666242, + "grad_norm": 1.1941035985946655, + "learning_rate": 1.2265064517740776e-06, + "loss": 0.3255, + "step": 38916 + }, + { + "epoch": 0.7790606310837525, + "grad_norm": 1.0317264795303345, + "learning_rate": 1.2262937724650947e-06, + "loss": 0.2793, + "step": 38917 + }, + { + "epoch": 0.7790806496008809, + "grad_norm": 1.154914379119873, + "learning_rate": 1.226081109019962e-06, + "loss": 0.3026, + "step": 38918 + }, + { + "epoch": 0.7791006681180092, + "grad_norm": 1.1996314525604248, + "learning_rate": 1.2258684614395722e-06, + "loss": 0.2823, + "step": 38919 + }, + { + "epoch": 0.7791206866351374, + "grad_norm": 1.2079365253448486, + "learning_rate": 1.225655829724819e-06, + "loss": 0.3312, + "step": 38920 + }, + { + "epoch": 0.7791407051522659, + "grad_norm": 1.1497981548309326, + "learning_rate": 1.2254432138765964e-06, + "loss": 0.3111, + "step": 38921 + }, + { + "epoch": 0.7791607236693942, + "grad_norm": 1.0595455169677734, + "learning_rate": 1.2252306138957976e-06, + "loss": 0.266, + "step": 38922 + }, + { + "epoch": 0.7791807421865226, + "grad_norm": 1.2034305334091187, + "learning_rate": 1.2250180297833174e-06, + "loss": 0.2858, + "step": 38923 + }, + { + "epoch": 0.7792007607036509, + "grad_norm": 1.197693943977356, + "learning_rate": 1.2248054615400484e-06, + "loss": 0.3321, + "step": 38924 + }, + { + "epoch": 0.7792207792207793, + "grad_norm": 1.1671266555786133, + "learning_rate": 1.2245929091668857e-06, + "loss": 0.2843, + "step": 38925 + }, + { + "epoch": 0.7792407977379076, + "grad_norm": 1.2914106845855713, + "learning_rate": 1.2243803726647208e-06, + "loss": 0.3015, + "step": 38926 + }, + { + "epoch": 0.779260816255036, + "grad_norm": 1.9746462106704712, + "learning_rate": 1.2241678520344496e-06, + "loss": 0.8049, + "step": 38927 + }, + { + "epoch": 0.7792808347721643, + "grad_norm": 1.1623691320419312, + "learning_rate": 1.2239553472769639e-06, + "loss": 0.2872, + "step": 38928 + }, + { + "epoch": 0.7793008532892925, + "grad_norm": 1.2026971578598022, + "learning_rate": 1.2237428583931576e-06, + "loss": 0.3057, + "step": 38929 + }, + { + "epoch": 0.779320871806421, + "grad_norm": 1.1974287033081055, + "learning_rate": 1.2235303853839236e-06, + "loss": 0.3284, + "step": 38930 + }, + { + "epoch": 0.7793408903235493, + "grad_norm": 1.014227271080017, + "learning_rate": 1.2233179282501538e-06, + "loss": 0.2928, + "step": 38931 + }, + { + "epoch": 0.7793609088406777, + "grad_norm": 1.0841237306594849, + "learning_rate": 1.2231054869927444e-06, + "loss": 0.2722, + "step": 38932 + }, + { + "epoch": 0.779380927357806, + "grad_norm": 1.8827778100967407, + "learning_rate": 1.2228930616125862e-06, + "loss": 0.7497, + "step": 38933 + }, + { + "epoch": 0.7794009458749344, + "grad_norm": 1.0906697511672974, + "learning_rate": 1.2226806521105728e-06, + "loss": 0.3243, + "step": 38934 + }, + { + "epoch": 0.7794209643920627, + "grad_norm": 1.0996406078338623, + "learning_rate": 1.2224682584875958e-06, + "loss": 0.2676, + "step": 38935 + }, + { + "epoch": 0.779440982909191, + "grad_norm": 1.119022011756897, + "learning_rate": 1.2222558807445506e-06, + "loss": 0.2619, + "step": 38936 + }, + { + "epoch": 0.7794610014263194, + "grad_norm": 1.7421647310256958, + "learning_rate": 1.2220435188823275e-06, + "loss": 0.7489, + "step": 38937 + }, + { + "epoch": 0.7794810199434477, + "grad_norm": 1.119605541229248, + "learning_rate": 1.2218311729018217e-06, + "loss": 0.3007, + "step": 38938 + }, + { + "epoch": 0.7795010384605761, + "grad_norm": 1.4435757398605347, + "learning_rate": 1.2216188428039232e-06, + "loss": 0.367, + "step": 38939 + }, + { + "epoch": 0.7795210569777044, + "grad_norm": 1.1016730070114136, + "learning_rate": 1.2214065285895271e-06, + "loss": 0.2905, + "step": 38940 + }, + { + "epoch": 0.7795410754948328, + "grad_norm": 1.2198712825775146, + "learning_rate": 1.2211942302595253e-06, + "loss": 0.3046, + "step": 38941 + }, + { + "epoch": 0.779561094011961, + "grad_norm": 1.1026803255081177, + "learning_rate": 1.2209819478148094e-06, + "loss": 0.3031, + "step": 38942 + }, + { + "epoch": 0.7795811125290895, + "grad_norm": 1.0213369131088257, + "learning_rate": 1.2207696812562724e-06, + "loss": 0.259, + "step": 38943 + }, + { + "epoch": 0.7796011310462178, + "grad_norm": 1.1516788005828857, + "learning_rate": 1.2205574305848044e-06, + "loss": 0.3049, + "step": 38944 + }, + { + "epoch": 0.779621149563346, + "grad_norm": 1.0789783000946045, + "learning_rate": 1.2203451958013018e-06, + "loss": 0.2828, + "step": 38945 + }, + { + "epoch": 0.7796411680804745, + "grad_norm": 1.9562304019927979, + "learning_rate": 1.2201329769066545e-06, + "loss": 0.7627, + "step": 38946 + }, + { + "epoch": 0.7796611865976028, + "grad_norm": 1.0563713312149048, + "learning_rate": 1.2199207739017543e-06, + "loss": 0.2573, + "step": 38947 + }, + { + "epoch": 0.7796812051147312, + "grad_norm": 1.0676817893981934, + "learning_rate": 1.2197085867874924e-06, + "loss": 0.3057, + "step": 38948 + }, + { + "epoch": 0.7797012236318595, + "grad_norm": 1.0529155731201172, + "learning_rate": 1.2194964155647632e-06, + "loss": 0.2931, + "step": 38949 + }, + { + "epoch": 0.7797212421489879, + "grad_norm": 2.080129623413086, + "learning_rate": 1.2192842602344562e-06, + "loss": 0.7407, + "step": 38950 + }, + { + "epoch": 0.7797412606661162, + "grad_norm": 1.1816201210021973, + "learning_rate": 1.2190721207974665e-06, + "loss": 0.2945, + "step": 38951 + }, + { + "epoch": 0.7797612791832444, + "grad_norm": 1.3144351243972778, + "learning_rate": 1.2188599972546833e-06, + "loss": 0.3079, + "step": 38952 + }, + { + "epoch": 0.7797812977003729, + "grad_norm": 1.0982415676116943, + "learning_rate": 1.218647889606998e-06, + "loss": 0.2737, + "step": 38953 + }, + { + "epoch": 0.7798013162175012, + "grad_norm": 1.1097495555877686, + "learning_rate": 1.2184357978553046e-06, + "loss": 0.2566, + "step": 38954 + }, + { + "epoch": 0.7798213347346296, + "grad_norm": 1.2237900495529175, + "learning_rate": 1.2182237220004933e-06, + "loss": 0.2952, + "step": 38955 + }, + { + "epoch": 0.7798413532517579, + "grad_norm": 1.7597404718399048, + "learning_rate": 1.218011662043455e-06, + "loss": 0.7436, + "step": 38956 + }, + { + "epoch": 0.7798613717688863, + "grad_norm": 1.1486634016036987, + "learning_rate": 1.217799617985081e-06, + "loss": 0.2569, + "step": 38957 + }, + { + "epoch": 0.7798813902860146, + "grad_norm": 1.097982406616211, + "learning_rate": 1.2175875898262652e-06, + "loss": 0.3006, + "step": 38958 + }, + { + "epoch": 0.779901408803143, + "grad_norm": 1.9332290887832642, + "learning_rate": 1.2173755775678964e-06, + "loss": 0.7434, + "step": 38959 + }, + { + "epoch": 0.7799214273202713, + "grad_norm": 2.0328450202941895, + "learning_rate": 1.2171635812108673e-06, + "loss": 0.7976, + "step": 38960 + }, + { + "epoch": 0.7799414458373995, + "grad_norm": 1.1726775169372559, + "learning_rate": 1.2169516007560666e-06, + "loss": 0.3075, + "step": 38961 + }, + { + "epoch": 0.779961464354528, + "grad_norm": 1.0338994264602661, + "learning_rate": 1.2167396362043888e-06, + "loss": 0.2407, + "step": 38962 + }, + { + "epoch": 0.7799814828716563, + "grad_norm": 1.0352451801300049, + "learning_rate": 1.2165276875567216e-06, + "loss": 0.2782, + "step": 38963 + }, + { + "epoch": 0.7800015013887847, + "grad_norm": 1.0775467157363892, + "learning_rate": 1.2163157548139598e-06, + "loss": 0.2978, + "step": 38964 + }, + { + "epoch": 0.780021519905913, + "grad_norm": 1.1613608598709106, + "learning_rate": 1.2161038379769919e-06, + "loss": 0.3097, + "step": 38965 + }, + { + "epoch": 0.7800415384230414, + "grad_norm": 1.1508365869522095, + "learning_rate": 1.2158919370467077e-06, + "loss": 0.3058, + "step": 38966 + }, + { + "epoch": 0.7800615569401697, + "grad_norm": 1.1082048416137695, + "learning_rate": 1.2156800520240014e-06, + "loss": 0.2915, + "step": 38967 + }, + { + "epoch": 0.780081575457298, + "grad_norm": 1.0562506914138794, + "learning_rate": 1.2154681829097615e-06, + "loss": 0.2778, + "step": 38968 + }, + { + "epoch": 0.7801015939744264, + "grad_norm": 1.2913790941238403, + "learning_rate": 1.2152563297048787e-06, + "loss": 0.2634, + "step": 38969 + }, + { + "epoch": 0.7801216124915547, + "grad_norm": 1.069651484489441, + "learning_rate": 1.2150444924102422e-06, + "loss": 0.294, + "step": 38970 + }, + { + "epoch": 0.7801416310086831, + "grad_norm": 1.078521966934204, + "learning_rate": 1.2148326710267462e-06, + "loss": 0.3225, + "step": 38971 + }, + { + "epoch": 0.7801616495258114, + "grad_norm": 1.1377415657043457, + "learning_rate": 1.2146208655552788e-06, + "loss": 0.31, + "step": 38972 + }, + { + "epoch": 0.7801816680429398, + "grad_norm": 1.0518114566802979, + "learning_rate": 1.2144090759967303e-06, + "loss": 0.2678, + "step": 38973 + }, + { + "epoch": 0.780201686560068, + "grad_norm": 1.0900856256484985, + "learning_rate": 1.2141973023519904e-06, + "loss": 0.3205, + "step": 38974 + }, + { + "epoch": 0.7802217050771965, + "grad_norm": 1.2598416805267334, + "learning_rate": 1.2139855446219517e-06, + "loss": 0.2797, + "step": 38975 + }, + { + "epoch": 0.7802417235943248, + "grad_norm": 1.1262568235397339, + "learning_rate": 1.2137738028075013e-06, + "loss": 0.2752, + "step": 38976 + }, + { + "epoch": 0.780261742111453, + "grad_norm": 1.1011384725570679, + "learning_rate": 1.213562076909533e-06, + "loss": 0.3105, + "step": 38977 + }, + { + "epoch": 0.7802817606285815, + "grad_norm": 1.893052339553833, + "learning_rate": 1.2133503669289343e-06, + "loss": 0.7238, + "step": 38978 + }, + { + "epoch": 0.7803017791457098, + "grad_norm": 1.3119562864303589, + "learning_rate": 1.2131386728665945e-06, + "loss": 0.3176, + "step": 38979 + }, + { + "epoch": 0.7803217976628382, + "grad_norm": 1.3161736726760864, + "learning_rate": 1.2129269947234063e-06, + "loss": 0.2757, + "step": 38980 + }, + { + "epoch": 0.7803418161799665, + "grad_norm": 1.219738483428955, + "learning_rate": 1.2127153325002583e-06, + "loss": 0.2984, + "step": 38981 + }, + { + "epoch": 0.7803618346970949, + "grad_norm": 1.2027822732925415, + "learning_rate": 1.2125036861980393e-06, + "loss": 0.2832, + "step": 38982 + }, + { + "epoch": 0.7803818532142232, + "grad_norm": 1.163821816444397, + "learning_rate": 1.2122920558176404e-06, + "loss": 0.2963, + "step": 38983 + }, + { + "epoch": 0.7804018717313514, + "grad_norm": 1.1188510656356812, + "learning_rate": 1.2120804413599484e-06, + "loss": 0.2289, + "step": 38984 + }, + { + "epoch": 0.7804218902484799, + "grad_norm": 0.9876076579093933, + "learning_rate": 1.2118688428258574e-06, + "loss": 0.2985, + "step": 38985 + }, + { + "epoch": 0.7804419087656081, + "grad_norm": 1.8900346755981445, + "learning_rate": 1.211657260216254e-06, + "loss": 0.739, + "step": 38986 + }, + { + "epoch": 0.7804619272827366, + "grad_norm": 1.0113797187805176, + "learning_rate": 1.2114456935320267e-06, + "loss": 0.2946, + "step": 38987 + }, + { + "epoch": 0.7804819457998649, + "grad_norm": 1.0561349391937256, + "learning_rate": 1.2112341427740682e-06, + "loss": 0.316, + "step": 38988 + }, + { + "epoch": 0.7805019643169933, + "grad_norm": 1.1342144012451172, + "learning_rate": 1.2110226079432647e-06, + "loss": 0.2588, + "step": 38989 + }, + { + "epoch": 0.7805219828341216, + "grad_norm": 1.8391082286834717, + "learning_rate": 1.2108110890405079e-06, + "loss": 0.8003, + "step": 38990 + }, + { + "epoch": 0.78054200135125, + "grad_norm": 1.0653111934661865, + "learning_rate": 1.2105995860666857e-06, + "loss": 0.2788, + "step": 38991 + }, + { + "epoch": 0.7805620198683783, + "grad_norm": 1.1023404598236084, + "learning_rate": 1.2103880990226874e-06, + "loss": 0.3198, + "step": 38992 + }, + { + "epoch": 0.7805820383855065, + "grad_norm": 1.1381226778030396, + "learning_rate": 1.2101766279094007e-06, + "loss": 0.3542, + "step": 38993 + }, + { + "epoch": 0.780602056902635, + "grad_norm": 1.208565592765808, + "learning_rate": 1.2099651727277173e-06, + "loss": 0.2871, + "step": 38994 + }, + { + "epoch": 0.7806220754197633, + "grad_norm": 1.2116889953613281, + "learning_rate": 1.2097537334785247e-06, + "loss": 0.2542, + "step": 38995 + }, + { + "epoch": 0.7806420939368917, + "grad_norm": 1.1649489402770996, + "learning_rate": 1.2095423101627118e-06, + "loss": 0.3036, + "step": 38996 + }, + { + "epoch": 0.78066211245402, + "grad_norm": 1.268356204032898, + "learning_rate": 1.2093309027811656e-06, + "loss": 0.3289, + "step": 38997 + }, + { + "epoch": 0.7806821309711484, + "grad_norm": 1.0751101970672607, + "learning_rate": 1.209119511334778e-06, + "loss": 0.2349, + "step": 38998 + }, + { + "epoch": 0.7807021494882767, + "grad_norm": 1.0741279125213623, + "learning_rate": 1.2089081358244364e-06, + "loss": 0.2703, + "step": 38999 + }, + { + "epoch": 0.780722168005405, + "grad_norm": 1.9397783279418945, + "learning_rate": 1.2086967762510277e-06, + "loss": 0.7412, + "step": 39000 + }, + { + "epoch": 0.7807421865225334, + "grad_norm": 1.1371586322784424, + "learning_rate": 1.2084854326154432e-06, + "loss": 0.2549, + "step": 39001 + }, + { + "epoch": 0.7807622050396616, + "grad_norm": 1.1443653106689453, + "learning_rate": 1.2082741049185687e-06, + "loss": 0.2957, + "step": 39002 + }, + { + "epoch": 0.7807822235567901, + "grad_norm": 1.0729082822799683, + "learning_rate": 1.2080627931612953e-06, + "loss": 0.3047, + "step": 39003 + }, + { + "epoch": 0.7808022420739184, + "grad_norm": 1.1402626037597656, + "learning_rate": 1.20785149734451e-06, + "loss": 0.307, + "step": 39004 + }, + { + "epoch": 0.7808222605910468, + "grad_norm": 1.111000657081604, + "learning_rate": 1.2076402174691004e-06, + "loss": 0.3526, + "step": 39005 + }, + { + "epoch": 0.780842279108175, + "grad_norm": 1.8450864553451538, + "learning_rate": 1.207428953535954e-06, + "loss": 0.7728, + "step": 39006 + }, + { + "epoch": 0.7808622976253035, + "grad_norm": 1.074397325515747, + "learning_rate": 1.2072177055459617e-06, + "loss": 0.3107, + "step": 39007 + }, + { + "epoch": 0.7808823161424318, + "grad_norm": 1.3939377069473267, + "learning_rate": 1.2070064735000098e-06, + "loss": 0.2774, + "step": 39008 + }, + { + "epoch": 0.78090233465956, + "grad_norm": 1.0702826976776123, + "learning_rate": 1.2067952573989867e-06, + "loss": 0.3228, + "step": 39009 + }, + { + "epoch": 0.7809223531766885, + "grad_norm": 2.0202062129974365, + "learning_rate": 1.206584057243778e-06, + "loss": 0.7287, + "step": 39010 + }, + { + "epoch": 0.7809423716938168, + "grad_norm": 0.9600692391395569, + "learning_rate": 1.206372873035276e-06, + "loss": 0.2855, + "step": 39011 + }, + { + "epoch": 0.7809623902109452, + "grad_norm": 1.0414458513259888, + "learning_rate": 1.2061617047743651e-06, + "loss": 0.2309, + "step": 39012 + }, + { + "epoch": 0.7809824087280735, + "grad_norm": 1.172243595123291, + "learning_rate": 1.2059505524619326e-06, + "loss": 0.3179, + "step": 39013 + }, + { + "epoch": 0.7810024272452019, + "grad_norm": 1.2570656538009644, + "learning_rate": 1.2057394160988695e-06, + "loss": 0.3072, + "step": 39014 + }, + { + "epoch": 0.7810224457623302, + "grad_norm": 1.3442703485488892, + "learning_rate": 1.2055282956860592e-06, + "loss": 0.2871, + "step": 39015 + }, + { + "epoch": 0.7810424642794584, + "grad_norm": 1.0885063409805298, + "learning_rate": 1.2053171912243933e-06, + "loss": 0.2843, + "step": 39016 + }, + { + "epoch": 0.7810624827965869, + "grad_norm": 1.0715203285217285, + "learning_rate": 1.2051061027147575e-06, + "loss": 0.2781, + "step": 39017 + }, + { + "epoch": 0.7810825013137151, + "grad_norm": 1.032584309577942, + "learning_rate": 1.2048950301580386e-06, + "loss": 0.2887, + "step": 39018 + }, + { + "epoch": 0.7811025198308436, + "grad_norm": 1.1034061908721924, + "learning_rate": 1.204683973555123e-06, + "loss": 0.3365, + "step": 39019 + }, + { + "epoch": 0.7811225383479719, + "grad_norm": 1.004241704940796, + "learning_rate": 1.2044729329069005e-06, + "loss": 0.2893, + "step": 39020 + }, + { + "epoch": 0.7811425568651003, + "grad_norm": 1.220094084739685, + "learning_rate": 1.2042619082142571e-06, + "loss": 0.2978, + "step": 39021 + }, + { + "epoch": 0.7811625753822286, + "grad_norm": 1.1166696548461914, + "learning_rate": 1.2040508994780797e-06, + "loss": 0.3209, + "step": 39022 + }, + { + "epoch": 0.781182593899357, + "grad_norm": 1.9357233047485352, + "learning_rate": 1.203839906699254e-06, + "loss": 0.7158, + "step": 39023 + }, + { + "epoch": 0.7812026124164853, + "grad_norm": 1.158534288406372, + "learning_rate": 1.2036289298786696e-06, + "loss": 0.2966, + "step": 39024 + }, + { + "epoch": 0.7812226309336135, + "grad_norm": 1.0985376834869385, + "learning_rate": 1.2034179690172126e-06, + "loss": 0.3213, + "step": 39025 + }, + { + "epoch": 0.781242649450742, + "grad_norm": 1.0306445360183716, + "learning_rate": 1.2032070241157678e-06, + "loss": 0.2748, + "step": 39026 + }, + { + "epoch": 0.7812626679678703, + "grad_norm": 1.2078149318695068, + "learning_rate": 1.2029960951752252e-06, + "loss": 0.2722, + "step": 39027 + }, + { + "epoch": 0.7812826864849987, + "grad_norm": 1.048622727394104, + "learning_rate": 1.2027851821964682e-06, + "loss": 0.2761, + "step": 39028 + }, + { + "epoch": 0.781302705002127, + "grad_norm": 1.778691053390503, + "learning_rate": 1.2025742851803868e-06, + "loss": 0.7801, + "step": 39029 + }, + { + "epoch": 0.7813227235192554, + "grad_norm": 1.391579270362854, + "learning_rate": 1.2023634041278653e-06, + "loss": 0.3031, + "step": 39030 + }, + { + "epoch": 0.7813427420363837, + "grad_norm": 1.1565731763839722, + "learning_rate": 1.2021525390397915e-06, + "loss": 0.2994, + "step": 39031 + }, + { + "epoch": 0.781362760553512, + "grad_norm": 1.1182548999786377, + "learning_rate": 1.2019416899170493e-06, + "loss": 0.3242, + "step": 39032 + }, + { + "epoch": 0.7813827790706404, + "grad_norm": 1.1103005409240723, + "learning_rate": 1.2017308567605284e-06, + "loss": 0.318, + "step": 39033 + }, + { + "epoch": 0.7814027975877686, + "grad_norm": 1.1343162059783936, + "learning_rate": 1.2015200395711135e-06, + "loss": 0.3446, + "step": 39034 + }, + { + "epoch": 0.7814228161048971, + "grad_norm": 1.1181386709213257, + "learning_rate": 1.2013092383496915e-06, + "loss": 0.2784, + "step": 39035 + }, + { + "epoch": 0.7814428346220254, + "grad_norm": 1.2266087532043457, + "learning_rate": 1.2010984530971475e-06, + "loss": 0.2692, + "step": 39036 + }, + { + "epoch": 0.7814628531391538, + "grad_norm": 1.0913701057434082, + "learning_rate": 1.2008876838143662e-06, + "loss": 0.2891, + "step": 39037 + }, + { + "epoch": 0.781482871656282, + "grad_norm": 1.1436941623687744, + "learning_rate": 1.2006769305022376e-06, + "loss": 0.3089, + "step": 39038 + }, + { + "epoch": 0.7815028901734105, + "grad_norm": 0.9813119769096375, + "learning_rate": 1.200466193161644e-06, + "loss": 0.3175, + "step": 39039 + }, + { + "epoch": 0.7815229086905388, + "grad_norm": 1.2041071653366089, + "learning_rate": 1.2002554717934745e-06, + "loss": 0.3007, + "step": 39040 + }, + { + "epoch": 0.781542927207667, + "grad_norm": 1.0724759101867676, + "learning_rate": 1.2000447663986115e-06, + "loss": 0.298, + "step": 39041 + }, + { + "epoch": 0.7815629457247955, + "grad_norm": 1.8390384912490845, + "learning_rate": 1.1998340769779437e-06, + "loss": 0.8107, + "step": 39042 + }, + { + "epoch": 0.7815829642419237, + "grad_norm": 1.0857213735580444, + "learning_rate": 1.1996234035323562e-06, + "loss": 0.2677, + "step": 39043 + }, + { + "epoch": 0.7816029827590522, + "grad_norm": 1.0920244455337524, + "learning_rate": 1.1994127460627336e-06, + "loss": 0.3272, + "step": 39044 + }, + { + "epoch": 0.7816230012761805, + "grad_norm": 1.1518199443817139, + "learning_rate": 1.199202104569962e-06, + "loss": 0.2835, + "step": 39045 + }, + { + "epoch": 0.7816430197933089, + "grad_norm": 1.0713304281234741, + "learning_rate": 1.1989914790549256e-06, + "loss": 0.3386, + "step": 39046 + }, + { + "epoch": 0.7816630383104372, + "grad_norm": 1.048866629600525, + "learning_rate": 1.1987808695185127e-06, + "loss": 0.29, + "step": 39047 + }, + { + "epoch": 0.7816830568275654, + "grad_norm": 1.082267165184021, + "learning_rate": 1.1985702759616063e-06, + "loss": 0.2925, + "step": 39048 + }, + { + "epoch": 0.7817030753446939, + "grad_norm": 1.074043869972229, + "learning_rate": 1.1983596983850931e-06, + "loss": 0.3039, + "step": 39049 + }, + { + "epoch": 0.7817230938618221, + "grad_norm": 1.1196553707122803, + "learning_rate": 1.1981491367898556e-06, + "loss": 0.2767, + "step": 39050 + }, + { + "epoch": 0.7817431123789506, + "grad_norm": 1.1309573650360107, + "learning_rate": 1.1979385911767827e-06, + "loss": 0.2971, + "step": 39051 + }, + { + "epoch": 0.7817631308960789, + "grad_norm": 1.1814217567443848, + "learning_rate": 1.1977280615467562e-06, + "loss": 0.3687, + "step": 39052 + }, + { + "epoch": 0.7817831494132073, + "grad_norm": 1.0699211359024048, + "learning_rate": 1.1975175479006639e-06, + "loss": 0.2805, + "step": 39053 + }, + { + "epoch": 0.7818031679303356, + "grad_norm": 1.1264877319335938, + "learning_rate": 1.1973070502393886e-06, + "loss": 0.3239, + "step": 39054 + }, + { + "epoch": 0.781823186447464, + "grad_norm": 1.0947190523147583, + "learning_rate": 1.1970965685638175e-06, + "loss": 0.3039, + "step": 39055 + }, + { + "epoch": 0.7818432049645923, + "grad_norm": 1.0606873035430908, + "learning_rate": 1.1968861028748335e-06, + "loss": 0.2726, + "step": 39056 + }, + { + "epoch": 0.7818632234817205, + "grad_norm": 1.069417119026184, + "learning_rate": 1.1966756531733226e-06, + "loss": 0.2722, + "step": 39057 + }, + { + "epoch": 0.781883241998849, + "grad_norm": 1.1051485538482666, + "learning_rate": 1.1964652194601683e-06, + "loss": 0.2802, + "step": 39058 + }, + { + "epoch": 0.7819032605159772, + "grad_norm": 1.0492048263549805, + "learning_rate": 1.196254801736254e-06, + "loss": 0.3336, + "step": 39059 + }, + { + "epoch": 0.7819232790331057, + "grad_norm": 1.1486766338348389, + "learning_rate": 1.1960444000024678e-06, + "loss": 0.2874, + "step": 39060 + }, + { + "epoch": 0.781943297550234, + "grad_norm": 1.0315592288970947, + "learning_rate": 1.1958340142596925e-06, + "loss": 0.2731, + "step": 39061 + }, + { + "epoch": 0.7819633160673624, + "grad_norm": 1.0288301706314087, + "learning_rate": 1.195623644508812e-06, + "loss": 0.2623, + "step": 39062 + }, + { + "epoch": 0.7819833345844907, + "grad_norm": 1.0531543493270874, + "learning_rate": 1.1954132907507094e-06, + "loss": 0.2623, + "step": 39063 + }, + { + "epoch": 0.782003353101619, + "grad_norm": 1.920453429222107, + "learning_rate": 1.1952029529862724e-06, + "loss": 0.7175, + "step": 39064 + }, + { + "epoch": 0.7820233716187474, + "grad_norm": 1.2461767196655273, + "learning_rate": 1.1949926312163818e-06, + "loss": 0.3225, + "step": 39065 + }, + { + "epoch": 0.7820433901358756, + "grad_norm": 1.1848559379577637, + "learning_rate": 1.194782325441925e-06, + "loss": 0.3126, + "step": 39066 + }, + { + "epoch": 0.782063408653004, + "grad_norm": 1.1928808689117432, + "learning_rate": 1.1945720356637842e-06, + "loss": 0.2871, + "step": 39067 + }, + { + "epoch": 0.7820834271701324, + "grad_norm": 1.2740827798843384, + "learning_rate": 1.1943617618828417e-06, + "loss": 0.3371, + "step": 39068 + }, + { + "epoch": 0.7821034456872608, + "grad_norm": 1.0925360918045044, + "learning_rate": 1.1941515040999852e-06, + "loss": 0.3318, + "step": 39069 + }, + { + "epoch": 0.782123464204389, + "grad_norm": 1.2196145057678223, + "learning_rate": 1.1939412623160967e-06, + "loss": 0.2585, + "step": 39070 + }, + { + "epoch": 0.7821434827215175, + "grad_norm": 1.1551026105880737, + "learning_rate": 1.1937310365320604e-06, + "loss": 0.2918, + "step": 39071 + }, + { + "epoch": 0.7821635012386458, + "grad_norm": 2.1421844959259033, + "learning_rate": 1.1935208267487581e-06, + "loss": 0.767, + "step": 39072 + }, + { + "epoch": 0.782183519755774, + "grad_norm": 1.108952283859253, + "learning_rate": 1.1933106329670763e-06, + "loss": 0.301, + "step": 39073 + }, + { + "epoch": 0.7822035382729025, + "grad_norm": 1.1432209014892578, + "learning_rate": 1.1931004551878977e-06, + "loss": 0.309, + "step": 39074 + }, + { + "epoch": 0.7822235567900307, + "grad_norm": 1.0553175210952759, + "learning_rate": 1.1928902934121051e-06, + "loss": 0.2848, + "step": 39075 + }, + { + "epoch": 0.7822435753071592, + "grad_norm": 1.1080892086029053, + "learning_rate": 1.192680147640583e-06, + "loss": 0.3374, + "step": 39076 + }, + { + "epoch": 0.7822635938242875, + "grad_norm": 1.1813468933105469, + "learning_rate": 1.1924700178742121e-06, + "loss": 0.3153, + "step": 39077 + }, + { + "epoch": 0.7822836123414159, + "grad_norm": 1.1009247303009033, + "learning_rate": 1.1922599041138783e-06, + "loss": 0.297, + "step": 39078 + }, + { + "epoch": 0.7823036308585442, + "grad_norm": 1.0736160278320312, + "learning_rate": 1.1920498063604657e-06, + "loss": 0.3234, + "step": 39079 + }, + { + "epoch": 0.7823236493756724, + "grad_norm": 1.2160770893096924, + "learning_rate": 1.1918397246148566e-06, + "loss": 0.3309, + "step": 39080 + }, + { + "epoch": 0.7823436678928009, + "grad_norm": 1.1608846187591553, + "learning_rate": 1.1916296588779318e-06, + "loss": 0.3133, + "step": 39081 + }, + { + "epoch": 0.7823636864099291, + "grad_norm": 1.1104079484939575, + "learning_rate": 1.1914196091505781e-06, + "loss": 0.3027, + "step": 39082 + }, + { + "epoch": 0.7823837049270576, + "grad_norm": 1.9578694105148315, + "learning_rate": 1.1912095754336767e-06, + "loss": 0.747, + "step": 39083 + }, + { + "epoch": 0.7824037234441859, + "grad_norm": 1.196500539779663, + "learning_rate": 1.1909995577281109e-06, + "loss": 0.2822, + "step": 39084 + }, + { + "epoch": 0.7824237419613143, + "grad_norm": 1.1295275688171387, + "learning_rate": 1.190789556034761e-06, + "loss": 0.3155, + "step": 39085 + }, + { + "epoch": 0.7824437604784426, + "grad_norm": 1.1844046115875244, + "learning_rate": 1.1905795703545143e-06, + "loss": 0.278, + "step": 39086 + }, + { + "epoch": 0.782463778995571, + "grad_norm": 1.0325583219528198, + "learning_rate": 1.1903696006882508e-06, + "loss": 0.2553, + "step": 39087 + }, + { + "epoch": 0.7824837975126993, + "grad_norm": 1.2190983295440674, + "learning_rate": 1.1901596470368537e-06, + "loss": 0.3201, + "step": 39088 + }, + { + "epoch": 0.7825038160298275, + "grad_norm": 1.0949203968048096, + "learning_rate": 1.1899497094012059e-06, + "loss": 0.3193, + "step": 39089 + }, + { + "epoch": 0.782523834546956, + "grad_norm": 1.069827675819397, + "learning_rate": 1.1897397877821875e-06, + "loss": 0.2816, + "step": 39090 + }, + { + "epoch": 0.7825438530640842, + "grad_norm": 1.1188180446624756, + "learning_rate": 1.189529882180684e-06, + "loss": 0.3526, + "step": 39091 + }, + { + "epoch": 0.7825638715812127, + "grad_norm": 1.1743875741958618, + "learning_rate": 1.1893199925975774e-06, + "loss": 0.2971, + "step": 39092 + }, + { + "epoch": 0.782583890098341, + "grad_norm": 1.06033456325531, + "learning_rate": 1.18911011903375e-06, + "loss": 0.2749, + "step": 39093 + }, + { + "epoch": 0.7826039086154694, + "grad_norm": 1.0819624662399292, + "learning_rate": 1.1889002614900818e-06, + "loss": 0.2876, + "step": 39094 + }, + { + "epoch": 0.7826239271325977, + "grad_norm": 1.101253867149353, + "learning_rate": 1.1886904199674582e-06, + "loss": 0.2887, + "step": 39095 + }, + { + "epoch": 0.782643945649726, + "grad_norm": 1.0022600889205933, + "learning_rate": 1.1884805944667605e-06, + "loss": 0.3135, + "step": 39096 + }, + { + "epoch": 0.7826639641668544, + "grad_norm": 1.232922911643982, + "learning_rate": 1.188270784988869e-06, + "loss": 0.3412, + "step": 39097 + }, + { + "epoch": 0.7826839826839826, + "grad_norm": 1.1653016805648804, + "learning_rate": 1.1880609915346681e-06, + "loss": 0.2647, + "step": 39098 + }, + { + "epoch": 0.782704001201111, + "grad_norm": 1.0728052854537964, + "learning_rate": 1.1878512141050363e-06, + "loss": 0.2984, + "step": 39099 + }, + { + "epoch": 0.7827240197182393, + "grad_norm": 1.8444010019302368, + "learning_rate": 1.1876414527008589e-06, + "loss": 0.7599, + "step": 39100 + }, + { + "epoch": 0.7827440382353678, + "grad_norm": 1.8841428756713867, + "learning_rate": 1.1874317073230168e-06, + "loss": 0.7707, + "step": 39101 + }, + { + "epoch": 0.782764056752496, + "grad_norm": 1.1968200206756592, + "learning_rate": 1.187221977972391e-06, + "loss": 0.2953, + "step": 39102 + }, + { + "epoch": 0.7827840752696245, + "grad_norm": 1.1183322668075562, + "learning_rate": 1.1870122646498621e-06, + "loss": 0.3095, + "step": 39103 + }, + { + "epoch": 0.7828040937867528, + "grad_norm": 1.210423231124878, + "learning_rate": 1.1868025673563132e-06, + "loss": 0.2792, + "step": 39104 + }, + { + "epoch": 0.782824112303881, + "grad_norm": 1.3146897554397583, + "learning_rate": 1.1865928860926273e-06, + "loss": 0.3101, + "step": 39105 + }, + { + "epoch": 0.7828441308210095, + "grad_norm": 1.087761640548706, + "learning_rate": 1.1863832208596843e-06, + "loss": 0.2786, + "step": 39106 + }, + { + "epoch": 0.7828641493381377, + "grad_norm": 1.0635292530059814, + "learning_rate": 1.186173571658365e-06, + "loss": 0.2671, + "step": 39107 + }, + { + "epoch": 0.7828841678552662, + "grad_norm": 1.9555320739746094, + "learning_rate": 1.1859639384895505e-06, + "loss": 0.7712, + "step": 39108 + }, + { + "epoch": 0.7829041863723945, + "grad_norm": 1.0750916004180908, + "learning_rate": 1.1857543213541234e-06, + "loss": 0.3389, + "step": 39109 + }, + { + "epoch": 0.7829242048895229, + "grad_norm": 1.0775699615478516, + "learning_rate": 1.1855447202529653e-06, + "loss": 0.2888, + "step": 39110 + }, + { + "epoch": 0.7829442234066512, + "grad_norm": 1.1466702222824097, + "learning_rate": 1.1853351351869558e-06, + "loss": 0.2447, + "step": 39111 + }, + { + "epoch": 0.7829642419237794, + "grad_norm": 1.2021408081054688, + "learning_rate": 1.185125566156975e-06, + "loss": 0.3525, + "step": 39112 + }, + { + "epoch": 0.7829842604409079, + "grad_norm": 1.0648411512374878, + "learning_rate": 1.1849160131639064e-06, + "loss": 0.2608, + "step": 39113 + }, + { + "epoch": 0.7830042789580361, + "grad_norm": 1.0492011308670044, + "learning_rate": 1.1847064762086302e-06, + "loss": 0.3304, + "step": 39114 + }, + { + "epoch": 0.7830242974751646, + "grad_norm": 1.8354185819625854, + "learning_rate": 1.1844969552920272e-06, + "loss": 0.8029, + "step": 39115 + }, + { + "epoch": 0.7830443159922928, + "grad_norm": 1.0608280897140503, + "learning_rate": 1.1842874504149765e-06, + "loss": 0.3039, + "step": 39116 + }, + { + "epoch": 0.7830643345094213, + "grad_norm": 1.1574000120162964, + "learning_rate": 1.1840779615783598e-06, + "loss": 0.3101, + "step": 39117 + }, + { + "epoch": 0.7830843530265496, + "grad_norm": 1.2156574726104736, + "learning_rate": 1.1838684887830598e-06, + "loss": 0.2907, + "step": 39118 + }, + { + "epoch": 0.783104371543678, + "grad_norm": 1.0442357063293457, + "learning_rate": 1.1836590320299551e-06, + "loss": 0.264, + "step": 39119 + }, + { + "epoch": 0.7831243900608063, + "grad_norm": 1.1525365114212036, + "learning_rate": 1.1834495913199273e-06, + "loss": 0.3029, + "step": 39120 + }, + { + "epoch": 0.7831444085779345, + "grad_norm": 1.0970818996429443, + "learning_rate": 1.183240166653854e-06, + "loss": 0.2943, + "step": 39121 + }, + { + "epoch": 0.783164427095063, + "grad_norm": 1.0813006162643433, + "learning_rate": 1.1830307580326194e-06, + "loss": 0.3327, + "step": 39122 + }, + { + "epoch": 0.7831844456121912, + "grad_norm": 1.1292942762374878, + "learning_rate": 1.182821365457102e-06, + "loss": 0.3219, + "step": 39123 + }, + { + "epoch": 0.7832044641293197, + "grad_norm": 1.3965867757797241, + "learning_rate": 1.1826119889281817e-06, + "loss": 0.2664, + "step": 39124 + }, + { + "epoch": 0.783224482646448, + "grad_norm": 1.1339060068130493, + "learning_rate": 1.1824026284467382e-06, + "loss": 0.34, + "step": 39125 + }, + { + "epoch": 0.7832445011635764, + "grad_norm": 1.1386620998382568, + "learning_rate": 1.1821932840136536e-06, + "loss": 0.3024, + "step": 39126 + }, + { + "epoch": 0.7832645196807047, + "grad_norm": 1.0626857280731201, + "learning_rate": 1.1819839556298073e-06, + "loss": 0.277, + "step": 39127 + }, + { + "epoch": 0.783284538197833, + "grad_norm": 1.1879154443740845, + "learning_rate": 1.1817746432960781e-06, + "loss": 0.3055, + "step": 39128 + }, + { + "epoch": 0.7833045567149614, + "grad_norm": 1.0413975715637207, + "learning_rate": 1.1815653470133459e-06, + "loss": 0.2795, + "step": 39129 + }, + { + "epoch": 0.7833245752320896, + "grad_norm": 1.9915863275527954, + "learning_rate": 1.1813560667824908e-06, + "loss": 0.6733, + "step": 39130 + }, + { + "epoch": 0.783344593749218, + "grad_norm": 1.052648901939392, + "learning_rate": 1.1811468026043944e-06, + "loss": 0.306, + "step": 39131 + }, + { + "epoch": 0.7833646122663463, + "grad_norm": 1.9284377098083496, + "learning_rate": 1.180937554479935e-06, + "loss": 0.6865, + "step": 39132 + }, + { + "epoch": 0.7833846307834748, + "grad_norm": 1.246168613433838, + "learning_rate": 1.180728322409993e-06, + "loss": 0.3107, + "step": 39133 + }, + { + "epoch": 0.783404649300603, + "grad_norm": 1.051805853843689, + "learning_rate": 1.1805191063954452e-06, + "loss": 0.294, + "step": 39134 + }, + { + "epoch": 0.7834246678177313, + "grad_norm": 1.1074621677398682, + "learning_rate": 1.1803099064371743e-06, + "loss": 0.2862, + "step": 39135 + }, + { + "epoch": 0.7834446863348598, + "grad_norm": 1.7685716152191162, + "learning_rate": 1.180100722536059e-06, + "loss": 0.7027, + "step": 39136 + }, + { + "epoch": 0.783464704851988, + "grad_norm": 1.1631345748901367, + "learning_rate": 1.179891554692978e-06, + "loss": 0.2833, + "step": 39137 + }, + { + "epoch": 0.7834847233691165, + "grad_norm": 1.324141025543213, + "learning_rate": 1.1796824029088105e-06, + "loss": 0.2902, + "step": 39138 + }, + { + "epoch": 0.7835047418862447, + "grad_norm": 1.325991153717041, + "learning_rate": 1.179473267184435e-06, + "loss": 0.3051, + "step": 39139 + }, + { + "epoch": 0.7835247604033732, + "grad_norm": 1.1036598682403564, + "learning_rate": 1.1792641475207334e-06, + "loss": 0.2467, + "step": 39140 + }, + { + "epoch": 0.7835447789205015, + "grad_norm": 1.203926682472229, + "learning_rate": 1.1790550439185828e-06, + "loss": 0.2969, + "step": 39141 + }, + { + "epoch": 0.7835647974376299, + "grad_norm": 1.1875369548797607, + "learning_rate": 1.1788459563788608e-06, + "loss": 0.3148, + "step": 39142 + }, + { + "epoch": 0.7835848159547582, + "grad_norm": 1.173233985900879, + "learning_rate": 1.1786368849024482e-06, + "loss": 0.3139, + "step": 39143 + }, + { + "epoch": 0.7836048344718864, + "grad_norm": 1.2000699043273926, + "learning_rate": 1.1784278294902253e-06, + "loss": 0.2893, + "step": 39144 + }, + { + "epoch": 0.7836248529890149, + "grad_norm": 1.0708847045898438, + "learning_rate": 1.1782187901430691e-06, + "loss": 0.2792, + "step": 39145 + }, + { + "epoch": 0.7836448715061431, + "grad_norm": 1.0608878135681152, + "learning_rate": 1.1780097668618595e-06, + "loss": 0.284, + "step": 39146 + }, + { + "epoch": 0.7836648900232716, + "grad_norm": 1.8110486268997192, + "learning_rate": 1.1778007596474723e-06, + "loss": 0.7887, + "step": 39147 + }, + { + "epoch": 0.7836849085403998, + "grad_norm": 1.1276648044586182, + "learning_rate": 1.1775917685007893e-06, + "loss": 0.3338, + "step": 39148 + }, + { + "epoch": 0.7837049270575283, + "grad_norm": 1.2364821434020996, + "learning_rate": 1.1773827934226884e-06, + "loss": 0.2781, + "step": 39149 + }, + { + "epoch": 0.7837249455746566, + "grad_norm": 1.1120203733444214, + "learning_rate": 1.1771738344140476e-06, + "loss": 0.296, + "step": 39150 + }, + { + "epoch": 0.7837449640917848, + "grad_norm": 1.813338041305542, + "learning_rate": 1.1769648914757453e-06, + "loss": 0.7698, + "step": 39151 + }, + { + "epoch": 0.7837649826089133, + "grad_norm": 1.0869899988174438, + "learning_rate": 1.176755964608658e-06, + "loss": 0.3485, + "step": 39152 + }, + { + "epoch": 0.7837850011260415, + "grad_norm": 1.1080349683761597, + "learning_rate": 1.1765470538136675e-06, + "loss": 0.269, + "step": 39153 + }, + { + "epoch": 0.78380501964317, + "grad_norm": 1.3917182683944702, + "learning_rate": 1.1763381590916506e-06, + "loss": 0.3756, + "step": 39154 + }, + { + "epoch": 0.7838250381602982, + "grad_norm": 1.0531014204025269, + "learning_rate": 1.1761292804434838e-06, + "loss": 0.2898, + "step": 39155 + }, + { + "epoch": 0.7838450566774267, + "grad_norm": 1.1649317741394043, + "learning_rate": 1.1759204178700467e-06, + "loss": 0.3085, + "step": 39156 + }, + { + "epoch": 0.783865075194555, + "grad_norm": 1.277752161026001, + "learning_rate": 1.1757115713722184e-06, + "loss": 0.3087, + "step": 39157 + }, + { + "epoch": 0.7838850937116834, + "grad_norm": 1.0427089929580688, + "learning_rate": 1.1755027409508756e-06, + "loss": 0.2671, + "step": 39158 + }, + { + "epoch": 0.7839051122288117, + "grad_norm": 1.1044224500656128, + "learning_rate": 1.1752939266068963e-06, + "loss": 0.3112, + "step": 39159 + }, + { + "epoch": 0.78392513074594, + "grad_norm": 1.060421109199524, + "learning_rate": 1.175085128341158e-06, + "loss": 0.349, + "step": 39160 + }, + { + "epoch": 0.7839451492630684, + "grad_norm": 1.1083444356918335, + "learning_rate": 1.1748763461545377e-06, + "loss": 0.2564, + "step": 39161 + }, + { + "epoch": 0.7839651677801966, + "grad_norm": 1.0766836404800415, + "learning_rate": 1.174667580047915e-06, + "loss": 0.2895, + "step": 39162 + }, + { + "epoch": 0.783985186297325, + "grad_norm": 1.2446805238723755, + "learning_rate": 1.1744588300221672e-06, + "loss": 0.2916, + "step": 39163 + }, + { + "epoch": 0.7840052048144533, + "grad_norm": 1.1771705150604248, + "learning_rate": 1.1742500960781705e-06, + "loss": 0.2872, + "step": 39164 + }, + { + "epoch": 0.7840252233315818, + "grad_norm": 1.154589056968689, + "learning_rate": 1.1740413782168026e-06, + "loss": 0.324, + "step": 39165 + }, + { + "epoch": 0.78404524184871, + "grad_norm": 1.180025339126587, + "learning_rate": 1.1738326764389418e-06, + "loss": 0.3187, + "step": 39166 + }, + { + "epoch": 0.7840652603658383, + "grad_norm": 1.3232958316802979, + "learning_rate": 1.1736239907454655e-06, + "loss": 0.3051, + "step": 39167 + }, + { + "epoch": 0.7840852788829668, + "grad_norm": 1.08352530002594, + "learning_rate": 1.173415321137249e-06, + "loss": 0.3308, + "step": 39168 + }, + { + "epoch": 0.784105297400095, + "grad_norm": 1.1780294179916382, + "learning_rate": 1.1732066676151716e-06, + "loss": 0.2642, + "step": 39169 + }, + { + "epoch": 0.7841253159172235, + "grad_norm": 2.007112503051758, + "learning_rate": 1.1729980301801102e-06, + "loss": 0.7237, + "step": 39170 + }, + { + "epoch": 0.7841453344343517, + "grad_norm": 1.157992959022522, + "learning_rate": 1.1727894088329423e-06, + "loss": 0.3277, + "step": 39171 + }, + { + "epoch": 0.7841653529514802, + "grad_norm": 1.1763139963150024, + "learning_rate": 1.1725808035745439e-06, + "loss": 0.309, + "step": 39172 + }, + { + "epoch": 0.7841853714686084, + "grad_norm": 1.2372307777404785, + "learning_rate": 1.1723722144057919e-06, + "loss": 0.2925, + "step": 39173 + }, + { + "epoch": 0.7842053899857369, + "grad_norm": 1.143000841140747, + "learning_rate": 1.1721636413275622e-06, + "loss": 0.3096, + "step": 39174 + }, + { + "epoch": 0.7842254085028652, + "grad_norm": 1.2009824514389038, + "learning_rate": 1.1719550843407341e-06, + "loss": 0.2856, + "step": 39175 + }, + { + "epoch": 0.7842454270199934, + "grad_norm": 1.1020865440368652, + "learning_rate": 1.1717465434461834e-06, + "loss": 0.295, + "step": 39176 + }, + { + "epoch": 0.7842654455371219, + "grad_norm": 1.065796971321106, + "learning_rate": 1.1715380186447856e-06, + "loss": 0.2629, + "step": 39177 + }, + { + "epoch": 0.7842854640542501, + "grad_norm": 1.1231286525726318, + "learning_rate": 1.1713295099374172e-06, + "loss": 0.318, + "step": 39178 + }, + { + "epoch": 0.7843054825713786, + "grad_norm": 1.1484946012496948, + "learning_rate": 1.171121017324957e-06, + "loss": 0.3037, + "step": 39179 + }, + { + "epoch": 0.7843255010885068, + "grad_norm": 1.0018649101257324, + "learning_rate": 1.17091254080828e-06, + "loss": 0.3099, + "step": 39180 + }, + { + "epoch": 0.7843455196056353, + "grad_norm": 1.0548328161239624, + "learning_rate": 1.1707040803882613e-06, + "loss": 0.2839, + "step": 39181 + }, + { + "epoch": 0.7843655381227636, + "grad_norm": 1.2060822248458862, + "learning_rate": 1.1704956360657798e-06, + "loss": 0.3148, + "step": 39182 + }, + { + "epoch": 0.7843855566398918, + "grad_norm": 1.059899091720581, + "learning_rate": 1.1702872078417093e-06, + "loss": 0.2705, + "step": 39183 + }, + { + "epoch": 0.7844055751570203, + "grad_norm": 1.188543438911438, + "learning_rate": 1.1700787957169286e-06, + "loss": 0.2817, + "step": 39184 + }, + { + "epoch": 0.7844255936741485, + "grad_norm": 1.32099187374115, + "learning_rate": 1.1698703996923121e-06, + "loss": 0.3153, + "step": 39185 + }, + { + "epoch": 0.784445612191277, + "grad_norm": 1.0906111001968384, + "learning_rate": 1.1696620197687364e-06, + "loss": 0.316, + "step": 39186 + }, + { + "epoch": 0.7844656307084052, + "grad_norm": 1.1551321744918823, + "learning_rate": 1.169453655947076e-06, + "loss": 0.3228, + "step": 39187 + }, + { + "epoch": 0.7844856492255337, + "grad_norm": 1.196567416191101, + "learning_rate": 1.1692453082282095e-06, + "loss": 0.2936, + "step": 39188 + }, + { + "epoch": 0.784505667742662, + "grad_norm": 1.1787241697311401, + "learning_rate": 1.1690369766130111e-06, + "loss": 0.3058, + "step": 39189 + }, + { + "epoch": 0.7845256862597904, + "grad_norm": 1.1394509077072144, + "learning_rate": 1.168828661102357e-06, + "loss": 0.2558, + "step": 39190 + }, + { + "epoch": 0.7845457047769187, + "grad_norm": 1.2375577688217163, + "learning_rate": 1.1686203616971231e-06, + "loss": 0.304, + "step": 39191 + }, + { + "epoch": 0.784565723294047, + "grad_norm": 1.21430504322052, + "learning_rate": 1.1684120783981829e-06, + "loss": 0.2974, + "step": 39192 + }, + { + "epoch": 0.7845857418111754, + "grad_norm": 1.0486674308776855, + "learning_rate": 1.1682038112064148e-06, + "loss": 0.2895, + "step": 39193 + }, + { + "epoch": 0.7846057603283036, + "grad_norm": 0.9537044167518616, + "learning_rate": 1.1679955601226922e-06, + "loss": 0.2191, + "step": 39194 + }, + { + "epoch": 0.784625778845432, + "grad_norm": 1.0480406284332275, + "learning_rate": 1.1677873251478934e-06, + "loss": 0.2289, + "step": 39195 + }, + { + "epoch": 0.7846457973625603, + "grad_norm": 1.14601469039917, + "learning_rate": 1.16757910628289e-06, + "loss": 0.3034, + "step": 39196 + }, + { + "epoch": 0.7846658158796888, + "grad_norm": 1.0169072151184082, + "learning_rate": 1.1673709035285607e-06, + "loss": 0.3162, + "step": 39197 + }, + { + "epoch": 0.784685834396817, + "grad_norm": 1.0700510740280151, + "learning_rate": 1.1671627168857797e-06, + "loss": 0.3196, + "step": 39198 + }, + { + "epoch": 0.7847058529139453, + "grad_norm": 1.2784727811813354, + "learning_rate": 1.1669545463554217e-06, + "loss": 0.3058, + "step": 39199 + }, + { + "epoch": 0.7847258714310738, + "grad_norm": 1.1756923198699951, + "learning_rate": 1.1667463919383603e-06, + "loss": 0.3205, + "step": 39200 + }, + { + "epoch": 0.784745889948202, + "grad_norm": 1.3326767683029175, + "learning_rate": 1.1665382536354735e-06, + "loss": 0.3446, + "step": 39201 + }, + { + "epoch": 0.7847659084653305, + "grad_norm": 1.096079707145691, + "learning_rate": 1.166330131447635e-06, + "loss": 0.2659, + "step": 39202 + }, + { + "epoch": 0.7847859269824587, + "grad_norm": 1.0651803016662598, + "learning_rate": 1.16612202537572e-06, + "loss": 0.2723, + "step": 39203 + }, + { + "epoch": 0.7848059454995872, + "grad_norm": 1.1573153734207153, + "learning_rate": 1.1659139354206029e-06, + "loss": 0.281, + "step": 39204 + }, + { + "epoch": 0.7848259640167154, + "grad_norm": 1.187246561050415, + "learning_rate": 1.1657058615831569e-06, + "loss": 0.346, + "step": 39205 + }, + { + "epoch": 0.7848459825338439, + "grad_norm": 0.9891528487205505, + "learning_rate": 1.1654978038642596e-06, + "loss": 0.2915, + "step": 39206 + }, + { + "epoch": 0.7848660010509722, + "grad_norm": 1.0547493696212769, + "learning_rate": 1.1652897622647825e-06, + "loss": 0.3119, + "step": 39207 + }, + { + "epoch": 0.7848860195681004, + "grad_norm": 1.2252538204193115, + "learning_rate": 1.1650817367856043e-06, + "loss": 0.2731, + "step": 39208 + }, + { + "epoch": 0.7849060380852289, + "grad_norm": 1.0771520137786865, + "learning_rate": 1.164873727427595e-06, + "loss": 0.258, + "step": 39209 + }, + { + "epoch": 0.7849260566023571, + "grad_norm": 1.0712798833847046, + "learning_rate": 1.1646657341916334e-06, + "loss": 0.3044, + "step": 39210 + }, + { + "epoch": 0.7849460751194856, + "grad_norm": 1.143774151802063, + "learning_rate": 1.164457757078591e-06, + "loss": 0.3179, + "step": 39211 + }, + { + "epoch": 0.7849660936366138, + "grad_norm": 1.1384975910186768, + "learning_rate": 1.1642497960893434e-06, + "loss": 0.2816, + "step": 39212 + }, + { + "epoch": 0.7849861121537423, + "grad_norm": 1.2551149129867554, + "learning_rate": 1.1640418512247637e-06, + "loss": 0.3007, + "step": 39213 + }, + { + "epoch": 0.7850061306708705, + "grad_norm": 1.8707023859024048, + "learning_rate": 1.1638339224857253e-06, + "loss": 0.8273, + "step": 39214 + }, + { + "epoch": 0.7850261491879988, + "grad_norm": 1.1390806436538696, + "learning_rate": 1.163626009873105e-06, + "loss": 0.2884, + "step": 39215 + }, + { + "epoch": 0.7850461677051273, + "grad_norm": 1.3871724605560303, + "learning_rate": 1.1634181133877754e-06, + "loss": 0.2687, + "step": 39216 + }, + { + "epoch": 0.7850661862222555, + "grad_norm": 1.085425615310669, + "learning_rate": 1.16321023303061e-06, + "loss": 0.2927, + "step": 39217 + }, + { + "epoch": 0.785086204739384, + "grad_norm": 1.062601923942566, + "learning_rate": 1.1630023688024822e-06, + "loss": 0.283, + "step": 39218 + }, + { + "epoch": 0.7851062232565122, + "grad_norm": 1.1010589599609375, + "learning_rate": 1.1627945207042678e-06, + "loss": 0.2948, + "step": 39219 + }, + { + "epoch": 0.7851262417736407, + "grad_norm": 0.9908115863800049, + "learning_rate": 1.162586688736838e-06, + "loss": 0.2674, + "step": 39220 + }, + { + "epoch": 0.785146260290769, + "grad_norm": 1.1627451181411743, + "learning_rate": 1.16237887290107e-06, + "loss": 0.3033, + "step": 39221 + }, + { + "epoch": 0.7851662788078974, + "grad_norm": 1.0617426633834839, + "learning_rate": 1.1621710731978348e-06, + "loss": 0.2881, + "step": 39222 + }, + { + "epoch": 0.7851862973250257, + "grad_norm": 1.9596283435821533, + "learning_rate": 1.1619632896280048e-06, + "loss": 0.7308, + "step": 39223 + }, + { + "epoch": 0.785206315842154, + "grad_norm": 1.1263163089752197, + "learning_rate": 1.161755522192457e-06, + "loss": 0.3058, + "step": 39224 + }, + { + "epoch": 0.7852263343592824, + "grad_norm": 1.024364948272705, + "learning_rate": 1.161547770892063e-06, + "loss": 0.284, + "step": 39225 + }, + { + "epoch": 0.7852463528764106, + "grad_norm": 1.9356023073196411, + "learning_rate": 1.161340035727696e-06, + "loss": 0.6714, + "step": 39226 + }, + { + "epoch": 0.785266371393539, + "grad_norm": 1.0452831983566284, + "learning_rate": 1.161132316700228e-06, + "loss": 0.2669, + "step": 39227 + }, + { + "epoch": 0.7852863899106673, + "grad_norm": 1.0562437772750854, + "learning_rate": 1.1609246138105351e-06, + "loss": 0.3086, + "step": 39228 + }, + { + "epoch": 0.7853064084277958, + "grad_norm": 1.8942853212356567, + "learning_rate": 1.1607169270594893e-06, + "loss": 0.7763, + "step": 39229 + }, + { + "epoch": 0.785326426944924, + "grad_norm": 1.194366216659546, + "learning_rate": 1.1605092564479625e-06, + "loss": 0.2715, + "step": 39230 + }, + { + "epoch": 0.7853464454620523, + "grad_norm": 1.1776083707809448, + "learning_rate": 1.1603016019768275e-06, + "loss": 0.2851, + "step": 39231 + }, + { + "epoch": 0.7853664639791808, + "grad_norm": 1.9829769134521484, + "learning_rate": 1.1600939636469599e-06, + "loss": 0.776, + "step": 39232 + }, + { + "epoch": 0.785386482496309, + "grad_norm": 1.082115650177002, + "learning_rate": 1.1598863414592293e-06, + "loss": 0.2623, + "step": 39233 + }, + { + "epoch": 0.7854065010134375, + "grad_norm": 0.9810118079185486, + "learning_rate": 1.1596787354145118e-06, + "loss": 0.2327, + "step": 39234 + }, + { + "epoch": 0.7854265195305657, + "grad_norm": 1.159342646598816, + "learning_rate": 1.1594711455136782e-06, + "loss": 0.314, + "step": 39235 + }, + { + "epoch": 0.7854465380476942, + "grad_norm": 0.9620673060417175, + "learning_rate": 1.1592635717576e-06, + "loss": 0.2579, + "step": 39236 + }, + { + "epoch": 0.7854665565648224, + "grad_norm": 1.0772548913955688, + "learning_rate": 1.1590560141471524e-06, + "loss": 0.29, + "step": 39237 + }, + { + "epoch": 0.7854865750819509, + "grad_norm": 1.8551980257034302, + "learning_rate": 1.1588484726832071e-06, + "loss": 0.7287, + "step": 39238 + }, + { + "epoch": 0.7855065935990792, + "grad_norm": 0.9962911605834961, + "learning_rate": 1.158640947366636e-06, + "loss": 0.2238, + "step": 39239 + }, + { + "epoch": 0.7855266121162074, + "grad_norm": 1.1581114530563354, + "learning_rate": 1.1584334381983103e-06, + "loss": 0.2983, + "step": 39240 + }, + { + "epoch": 0.7855466306333359, + "grad_norm": 1.8917045593261719, + "learning_rate": 1.158225945179105e-06, + "loss": 0.7748, + "step": 39241 + }, + { + "epoch": 0.7855666491504641, + "grad_norm": 1.1204739809036255, + "learning_rate": 1.1580184683098917e-06, + "loss": 0.2989, + "step": 39242 + }, + { + "epoch": 0.7855866676675926, + "grad_norm": 0.9883084893226624, + "learning_rate": 1.157811007591541e-06, + "loss": 0.281, + "step": 39243 + }, + { + "epoch": 0.7856066861847208, + "grad_norm": 1.1945171356201172, + "learning_rate": 1.1576035630249255e-06, + "loss": 0.3081, + "step": 39244 + }, + { + "epoch": 0.7856267047018493, + "grad_norm": 1.1681674718856812, + "learning_rate": 1.1573961346109186e-06, + "loss": 0.315, + "step": 39245 + }, + { + "epoch": 0.7856467232189775, + "grad_norm": 1.1996976137161255, + "learning_rate": 1.1571887223503897e-06, + "loss": 0.2994, + "step": 39246 + }, + { + "epoch": 0.7856667417361058, + "grad_norm": 1.1469112634658813, + "learning_rate": 1.1569813262442141e-06, + "loss": 0.306, + "step": 39247 + }, + { + "epoch": 0.7856867602532343, + "grad_norm": 1.8947482109069824, + "learning_rate": 1.1567739462932625e-06, + "loss": 0.7667, + "step": 39248 + }, + { + "epoch": 0.7857067787703625, + "grad_norm": 1.0897725820541382, + "learning_rate": 1.1565665824984041e-06, + "loss": 0.2655, + "step": 39249 + }, + { + "epoch": 0.785726797287491, + "grad_norm": 1.1549056768417358, + "learning_rate": 1.1563592348605145e-06, + "loss": 0.3116, + "step": 39250 + }, + { + "epoch": 0.7857468158046192, + "grad_norm": 0.949794352054596, + "learning_rate": 1.1561519033804636e-06, + "loss": 0.2265, + "step": 39251 + }, + { + "epoch": 0.7857668343217477, + "grad_norm": 1.1288702487945557, + "learning_rate": 1.1559445880591225e-06, + "loss": 0.288, + "step": 39252 + }, + { + "epoch": 0.785786852838876, + "grad_norm": 1.0172199010849, + "learning_rate": 1.1557372888973628e-06, + "loss": 0.2856, + "step": 39253 + }, + { + "epoch": 0.7858068713560044, + "grad_norm": 1.0931721925735474, + "learning_rate": 1.1555300058960555e-06, + "loss": 0.2845, + "step": 39254 + }, + { + "epoch": 0.7858268898731327, + "grad_norm": 1.0591362714767456, + "learning_rate": 1.155322739056074e-06, + "loss": 0.3029, + "step": 39255 + }, + { + "epoch": 0.785846908390261, + "grad_norm": 0.9922749400138855, + "learning_rate": 1.1551154883782883e-06, + "loss": 0.324, + "step": 39256 + }, + { + "epoch": 0.7858669269073894, + "grad_norm": 1.2832335233688354, + "learning_rate": 1.154908253863568e-06, + "loss": 0.3175, + "step": 39257 + }, + { + "epoch": 0.7858869454245176, + "grad_norm": 1.7769020795822144, + "learning_rate": 1.1547010355127874e-06, + "loss": 0.7945, + "step": 39258 + }, + { + "epoch": 0.785906963941646, + "grad_norm": 1.148965835571289, + "learning_rate": 1.1544938333268153e-06, + "loss": 0.3291, + "step": 39259 + }, + { + "epoch": 0.7859269824587743, + "grad_norm": 1.047459363937378, + "learning_rate": 1.154286647306524e-06, + "loss": 0.2532, + "step": 39260 + }, + { + "epoch": 0.7859470009759028, + "grad_norm": 1.1726841926574707, + "learning_rate": 1.1540794774527847e-06, + "loss": 0.286, + "step": 39261 + }, + { + "epoch": 0.785967019493031, + "grad_norm": 1.048463225364685, + "learning_rate": 1.153872323766466e-06, + "loss": 0.2827, + "step": 39262 + }, + { + "epoch": 0.7859870380101593, + "grad_norm": 1.0345181226730347, + "learning_rate": 1.1536651862484415e-06, + "loss": 0.3084, + "step": 39263 + }, + { + "epoch": 0.7860070565272878, + "grad_norm": 1.2656468152999878, + "learning_rate": 1.1534580648995814e-06, + "loss": 0.2813, + "step": 39264 + }, + { + "epoch": 0.786027075044416, + "grad_norm": 1.1218786239624023, + "learning_rate": 1.1532509597207553e-06, + "loss": 0.3137, + "step": 39265 + }, + { + "epoch": 0.7860470935615445, + "grad_norm": 1.1373363733291626, + "learning_rate": 1.1530438707128349e-06, + "loss": 0.33, + "step": 39266 + }, + { + "epoch": 0.7860671120786727, + "grad_norm": 1.1561152935028076, + "learning_rate": 1.1528367978766886e-06, + "loss": 0.3114, + "step": 39267 + }, + { + "epoch": 0.7860871305958012, + "grad_norm": 1.1115554571151733, + "learning_rate": 1.1526297412131893e-06, + "loss": 0.309, + "step": 39268 + }, + { + "epoch": 0.7861071491129294, + "grad_norm": 1.064714789390564, + "learning_rate": 1.1524227007232075e-06, + "loss": 0.2614, + "step": 39269 + }, + { + "epoch": 0.7861271676300579, + "grad_norm": 1.0469905138015747, + "learning_rate": 1.1522156764076103e-06, + "loss": 0.267, + "step": 39270 + }, + { + "epoch": 0.7861471861471861, + "grad_norm": 1.7853384017944336, + "learning_rate": 1.1520086682672726e-06, + "loss": 0.7086, + "step": 39271 + }, + { + "epoch": 0.7861672046643144, + "grad_norm": 1.0511066913604736, + "learning_rate": 1.1518016763030604e-06, + "loss": 0.2974, + "step": 39272 + }, + { + "epoch": 0.7861872231814429, + "grad_norm": 1.1106302738189697, + "learning_rate": 1.1515947005158473e-06, + "loss": 0.3067, + "step": 39273 + }, + { + "epoch": 0.7862072416985711, + "grad_norm": 0.99830561876297, + "learning_rate": 1.1513877409065022e-06, + "loss": 0.2949, + "step": 39274 + }, + { + "epoch": 0.7862272602156996, + "grad_norm": 1.8998279571533203, + "learning_rate": 1.1511807974758949e-06, + "loss": 0.7682, + "step": 39275 + }, + { + "epoch": 0.7862472787328278, + "grad_norm": 1.0405462980270386, + "learning_rate": 1.1509738702248935e-06, + "loss": 0.2977, + "step": 39276 + }, + { + "epoch": 0.7862672972499563, + "grad_norm": 1.1308152675628662, + "learning_rate": 1.1507669591543707e-06, + "loss": 0.2881, + "step": 39277 + }, + { + "epoch": 0.7862873157670845, + "grad_norm": 1.1536802053451538, + "learning_rate": 1.1505600642651959e-06, + "loss": 0.3165, + "step": 39278 + }, + { + "epoch": 0.7863073342842128, + "grad_norm": 1.1293919086456299, + "learning_rate": 1.1503531855582378e-06, + "loss": 0.3146, + "step": 39279 + }, + { + "epoch": 0.7863273528013413, + "grad_norm": 1.0033323764801025, + "learning_rate": 1.1501463230343652e-06, + "loss": 0.2764, + "step": 39280 + }, + { + "epoch": 0.7863473713184695, + "grad_norm": 1.1800600290298462, + "learning_rate": 1.1499394766944506e-06, + "loss": 0.2939, + "step": 39281 + }, + { + "epoch": 0.786367389835598, + "grad_norm": 1.1847294569015503, + "learning_rate": 1.1497326465393616e-06, + "loss": 0.3126, + "step": 39282 + }, + { + "epoch": 0.7863874083527262, + "grad_norm": 0.9938536286354065, + "learning_rate": 1.1495258325699664e-06, + "loss": 0.2622, + "step": 39283 + }, + { + "epoch": 0.7864074268698547, + "grad_norm": 1.0717614889144897, + "learning_rate": 1.1493190347871375e-06, + "loss": 0.2915, + "step": 39284 + }, + { + "epoch": 0.786427445386983, + "grad_norm": 1.0974056720733643, + "learning_rate": 1.1491122531917414e-06, + "loss": 0.2631, + "step": 39285 + }, + { + "epoch": 0.7864474639041114, + "grad_norm": 1.1389364004135132, + "learning_rate": 1.14890548778465e-06, + "loss": 0.3033, + "step": 39286 + }, + { + "epoch": 0.7864674824212396, + "grad_norm": 1.1071549654006958, + "learning_rate": 1.1486987385667315e-06, + "loss": 0.2759, + "step": 39287 + }, + { + "epoch": 0.786487500938368, + "grad_norm": 1.073329210281372, + "learning_rate": 1.1484920055388544e-06, + "loss": 0.2687, + "step": 39288 + }, + { + "epoch": 0.7865075194554964, + "grad_norm": 1.0703433752059937, + "learning_rate": 1.1482852887018863e-06, + "loss": 0.2957, + "step": 39289 + }, + { + "epoch": 0.7865275379726246, + "grad_norm": 1.1121548414230347, + "learning_rate": 1.1480785880566997e-06, + "loss": 0.2621, + "step": 39290 + }, + { + "epoch": 0.786547556489753, + "grad_norm": 1.0600714683532715, + "learning_rate": 1.1478719036041618e-06, + "loss": 0.3072, + "step": 39291 + }, + { + "epoch": 0.7865675750068813, + "grad_norm": 1.1924810409545898, + "learning_rate": 1.1476652353451418e-06, + "loss": 0.3398, + "step": 39292 + }, + { + "epoch": 0.7865875935240098, + "grad_norm": 0.972464919090271, + "learning_rate": 1.1474585832805062e-06, + "loss": 0.2504, + "step": 39293 + }, + { + "epoch": 0.786607612041138, + "grad_norm": 1.1271113157272339, + "learning_rate": 1.147251947411127e-06, + "loss": 0.3262, + "step": 39294 + }, + { + "epoch": 0.7866276305582663, + "grad_norm": 1.1357501745224, + "learning_rate": 1.1470453277378718e-06, + "loss": 0.313, + "step": 39295 + }, + { + "epoch": 0.7866476490753948, + "grad_norm": 1.1554419994354248, + "learning_rate": 1.1468387242616074e-06, + "loss": 0.2658, + "step": 39296 + }, + { + "epoch": 0.786667667592523, + "grad_norm": 1.1092636585235596, + "learning_rate": 1.146632136983205e-06, + "loss": 0.2851, + "step": 39297 + }, + { + "epoch": 0.7866876861096515, + "grad_norm": 1.1668593883514404, + "learning_rate": 1.1464255659035305e-06, + "loss": 0.2876, + "step": 39298 + }, + { + "epoch": 0.7867077046267797, + "grad_norm": 1.2299760580062866, + "learning_rate": 1.146219011023455e-06, + "loss": 0.3067, + "step": 39299 + }, + { + "epoch": 0.7867277231439082, + "grad_norm": 1.1572935581207275, + "learning_rate": 1.1460124723438459e-06, + "loss": 0.2598, + "step": 39300 + }, + { + "epoch": 0.7867477416610364, + "grad_norm": 1.0220807790756226, + "learning_rate": 1.1458059498655705e-06, + "loss": 0.27, + "step": 39301 + }, + { + "epoch": 0.7867677601781649, + "grad_norm": 1.195089340209961, + "learning_rate": 1.1455994435894963e-06, + "loss": 0.3289, + "step": 39302 + }, + { + "epoch": 0.7867877786952931, + "grad_norm": 1.051448106765747, + "learning_rate": 1.1453929535164936e-06, + "loss": 0.2883, + "step": 39303 + }, + { + "epoch": 0.7868077972124214, + "grad_norm": 1.0765877962112427, + "learning_rate": 1.1451864796474292e-06, + "loss": 0.306, + "step": 39304 + }, + { + "epoch": 0.7868278157295499, + "grad_norm": 1.162245512008667, + "learning_rate": 1.144980021983172e-06, + "loss": 0.3266, + "step": 39305 + }, + { + "epoch": 0.7868478342466781, + "grad_norm": 1.13864266872406, + "learning_rate": 1.144773580524589e-06, + "loss": 0.321, + "step": 39306 + }, + { + "epoch": 0.7868678527638066, + "grad_norm": 1.2358664274215698, + "learning_rate": 1.1445671552725468e-06, + "loss": 0.2876, + "step": 39307 + }, + { + "epoch": 0.7868878712809348, + "grad_norm": 1.1969401836395264, + "learning_rate": 1.1443607462279155e-06, + "loss": 0.3181, + "step": 39308 + }, + { + "epoch": 0.7869078897980633, + "grad_norm": 1.193422555923462, + "learning_rate": 1.1441543533915612e-06, + "loss": 0.2949, + "step": 39309 + }, + { + "epoch": 0.7869279083151915, + "grad_norm": 1.222334861755371, + "learning_rate": 1.143947976764353e-06, + "loss": 0.3154, + "step": 39310 + }, + { + "epoch": 0.7869479268323198, + "grad_norm": 1.154390573501587, + "learning_rate": 1.1437416163471566e-06, + "loss": 0.2669, + "step": 39311 + }, + { + "epoch": 0.7869679453494483, + "grad_norm": 1.093101143836975, + "learning_rate": 1.143535272140842e-06, + "loss": 0.3106, + "step": 39312 + }, + { + "epoch": 0.7869879638665765, + "grad_norm": 1.167074203491211, + "learning_rate": 1.1433289441462753e-06, + "loss": 0.2479, + "step": 39313 + }, + { + "epoch": 0.787007982383705, + "grad_norm": 1.0891505479812622, + "learning_rate": 1.1431226323643235e-06, + "loss": 0.2988, + "step": 39314 + }, + { + "epoch": 0.7870280009008332, + "grad_norm": 1.2519564628601074, + "learning_rate": 1.142916336795853e-06, + "loss": 0.3132, + "step": 39315 + }, + { + "epoch": 0.7870480194179617, + "grad_norm": 1.3297309875488281, + "learning_rate": 1.1427100574417332e-06, + "loss": 0.3099, + "step": 39316 + }, + { + "epoch": 0.78706803793509, + "grad_norm": 1.072723627090454, + "learning_rate": 1.14250379430283e-06, + "loss": 0.2941, + "step": 39317 + }, + { + "epoch": 0.7870880564522184, + "grad_norm": 1.299537181854248, + "learning_rate": 1.1422975473800113e-06, + "loss": 0.3284, + "step": 39318 + }, + { + "epoch": 0.7871080749693466, + "grad_norm": 1.1710494756698608, + "learning_rate": 1.1420913166741432e-06, + "loss": 0.3552, + "step": 39319 + }, + { + "epoch": 0.787128093486475, + "grad_norm": 1.1127210855484009, + "learning_rate": 1.1418851021860916e-06, + "loss": 0.3212, + "step": 39320 + }, + { + "epoch": 0.7871481120036034, + "grad_norm": 1.93565833568573, + "learning_rate": 1.1416789039167264e-06, + "loss": 0.7145, + "step": 39321 + }, + { + "epoch": 0.7871681305207316, + "grad_norm": 1.0475280284881592, + "learning_rate": 1.1414727218669114e-06, + "loss": 0.275, + "step": 39322 + }, + { + "epoch": 0.78718814903786, + "grad_norm": 1.1904706954956055, + "learning_rate": 1.1412665560375157e-06, + "loss": 0.2672, + "step": 39323 + }, + { + "epoch": 0.7872081675549883, + "grad_norm": 1.3509618043899536, + "learning_rate": 1.141060406429404e-06, + "loss": 0.2872, + "step": 39324 + }, + { + "epoch": 0.7872281860721168, + "grad_norm": 1.0887399911880493, + "learning_rate": 1.1408542730434452e-06, + "loss": 0.2461, + "step": 39325 + }, + { + "epoch": 0.787248204589245, + "grad_norm": 1.0715216398239136, + "learning_rate": 1.1406481558805043e-06, + "loss": 0.2939, + "step": 39326 + }, + { + "epoch": 0.7872682231063733, + "grad_norm": 1.1137229204177856, + "learning_rate": 1.140442054941448e-06, + "loss": 0.2886, + "step": 39327 + }, + { + "epoch": 0.7872882416235017, + "grad_norm": 1.0317434072494507, + "learning_rate": 1.1402359702271433e-06, + "loss": 0.2615, + "step": 39328 + }, + { + "epoch": 0.78730826014063, + "grad_norm": 1.995378017425537, + "learning_rate": 1.1400299017384535e-06, + "loss": 0.7344, + "step": 39329 + }, + { + "epoch": 0.7873282786577585, + "grad_norm": 1.1500822305679321, + "learning_rate": 1.1398238494762499e-06, + "loss": 0.2637, + "step": 39330 + }, + { + "epoch": 0.7873482971748867, + "grad_norm": 1.0505354404449463, + "learning_rate": 1.1396178134413954e-06, + "loss": 0.2966, + "step": 39331 + }, + { + "epoch": 0.7873683156920152, + "grad_norm": 1.0850911140441895, + "learning_rate": 1.1394117936347571e-06, + "loss": 0.2794, + "step": 39332 + }, + { + "epoch": 0.7873883342091434, + "grad_norm": 1.1603938341140747, + "learning_rate": 1.139205790057199e-06, + "loss": 0.3229, + "step": 39333 + }, + { + "epoch": 0.7874083527262719, + "grad_norm": 1.1641952991485596, + "learning_rate": 1.138999802709591e-06, + "loss": 0.3166, + "step": 39334 + }, + { + "epoch": 0.7874283712434001, + "grad_norm": 1.9809541702270508, + "learning_rate": 1.138793831592795e-06, + "loss": 0.7135, + "step": 39335 + }, + { + "epoch": 0.7874483897605284, + "grad_norm": 1.0621230602264404, + "learning_rate": 1.1385878767076802e-06, + "loss": 0.2738, + "step": 39336 + }, + { + "epoch": 0.7874684082776569, + "grad_norm": 1.0605034828186035, + "learning_rate": 1.1383819380551115e-06, + "loss": 0.282, + "step": 39337 + }, + { + "epoch": 0.7874884267947851, + "grad_norm": 1.1944037675857544, + "learning_rate": 1.1381760156359522e-06, + "loss": 0.2908, + "step": 39338 + }, + { + "epoch": 0.7875084453119136, + "grad_norm": 1.1775527000427246, + "learning_rate": 1.1379701094510713e-06, + "loss": 0.2991, + "step": 39339 + }, + { + "epoch": 0.7875284638290418, + "grad_norm": 1.1187562942504883, + "learning_rate": 1.1377642195013332e-06, + "loss": 0.3281, + "step": 39340 + }, + { + "epoch": 0.7875484823461703, + "grad_norm": 1.0664830207824707, + "learning_rate": 1.1375583457876027e-06, + "loss": 0.2752, + "step": 39341 + }, + { + "epoch": 0.7875685008632985, + "grad_norm": 1.1152229309082031, + "learning_rate": 1.137352488310745e-06, + "loss": 0.3095, + "step": 39342 + }, + { + "epoch": 0.7875885193804268, + "grad_norm": 1.209005355834961, + "learning_rate": 1.137146647071627e-06, + "loss": 0.2794, + "step": 39343 + }, + { + "epoch": 0.7876085378975552, + "grad_norm": 1.2186142206192017, + "learning_rate": 1.1369408220711136e-06, + "loss": 0.3007, + "step": 39344 + }, + { + "epoch": 0.7876285564146835, + "grad_norm": 1.101226806640625, + "learning_rate": 1.136735013310069e-06, + "loss": 0.3131, + "step": 39345 + }, + { + "epoch": 0.787648574931812, + "grad_norm": 0.9590772986412048, + "learning_rate": 1.1365292207893586e-06, + "loss": 0.2608, + "step": 39346 + }, + { + "epoch": 0.7876685934489402, + "grad_norm": 1.1204859018325806, + "learning_rate": 1.1363234445098486e-06, + "loss": 0.2544, + "step": 39347 + }, + { + "epoch": 0.7876886119660687, + "grad_norm": 1.00052011013031, + "learning_rate": 1.136117684472402e-06, + "loss": 0.2357, + "step": 39348 + }, + { + "epoch": 0.787708630483197, + "grad_norm": 1.0848623514175415, + "learning_rate": 1.1359119406778867e-06, + "loss": 0.2885, + "step": 39349 + }, + { + "epoch": 0.7877286490003254, + "grad_norm": 1.168060064315796, + "learning_rate": 1.1357062131271663e-06, + "loss": 0.33, + "step": 39350 + }, + { + "epoch": 0.7877486675174536, + "grad_norm": 1.3910759687423706, + "learning_rate": 1.1355005018211036e-06, + "loss": 0.3017, + "step": 39351 + }, + { + "epoch": 0.7877686860345819, + "grad_norm": 1.0414345264434814, + "learning_rate": 1.1352948067605667e-06, + "loss": 0.2956, + "step": 39352 + }, + { + "epoch": 0.7877887045517104, + "grad_norm": 1.0754941701889038, + "learning_rate": 1.1350891279464187e-06, + "loss": 0.278, + "step": 39353 + }, + { + "epoch": 0.7878087230688386, + "grad_norm": 1.1833957433700562, + "learning_rate": 1.1348834653795243e-06, + "loss": 0.3012, + "step": 39354 + }, + { + "epoch": 0.787828741585967, + "grad_norm": 1.1626511812210083, + "learning_rate": 1.1346778190607467e-06, + "loss": 0.2669, + "step": 39355 + }, + { + "epoch": 0.7878487601030953, + "grad_norm": 1.161569595336914, + "learning_rate": 1.1344721889909528e-06, + "loss": 0.2743, + "step": 39356 + }, + { + "epoch": 0.7878687786202238, + "grad_norm": 1.1628062725067139, + "learning_rate": 1.1342665751710059e-06, + "loss": 0.3359, + "step": 39357 + }, + { + "epoch": 0.787888797137352, + "grad_norm": 1.072005271911621, + "learning_rate": 1.1340609776017708e-06, + "loss": 0.3011, + "step": 39358 + }, + { + "epoch": 0.7879088156544803, + "grad_norm": 1.3291503190994263, + "learning_rate": 1.1338553962841097e-06, + "loss": 0.2492, + "step": 39359 + }, + { + "epoch": 0.7879288341716087, + "grad_norm": 1.0284103155136108, + "learning_rate": 1.1336498312188903e-06, + "loss": 0.2601, + "step": 39360 + }, + { + "epoch": 0.787948852688737, + "grad_norm": 1.9010905027389526, + "learning_rate": 1.133444282406973e-06, + "loss": 0.78, + "step": 39361 + }, + { + "epoch": 0.7879688712058655, + "grad_norm": 1.143713116645813, + "learning_rate": 1.133238749849226e-06, + "loss": 0.2908, + "step": 39362 + }, + { + "epoch": 0.7879888897229937, + "grad_norm": 0.970589280128479, + "learning_rate": 1.1330332335465105e-06, + "loss": 0.2504, + "step": 39363 + }, + { + "epoch": 0.7880089082401222, + "grad_norm": 1.0885605812072754, + "learning_rate": 1.13282773349969e-06, + "loss": 0.2493, + "step": 39364 + }, + { + "epoch": 0.7880289267572504, + "grad_norm": 1.0991957187652588, + "learning_rate": 1.1326222497096305e-06, + "loss": 0.3199, + "step": 39365 + }, + { + "epoch": 0.7880489452743789, + "grad_norm": 1.0226950645446777, + "learning_rate": 1.1324167821771953e-06, + "loss": 0.2977, + "step": 39366 + }, + { + "epoch": 0.7880689637915071, + "grad_norm": 1.8018165826797485, + "learning_rate": 1.1322113309032473e-06, + "loss": 0.7716, + "step": 39367 + }, + { + "epoch": 0.7880889823086354, + "grad_norm": 1.1013497114181519, + "learning_rate": 1.1320058958886509e-06, + "loss": 0.277, + "step": 39368 + }, + { + "epoch": 0.7881090008257639, + "grad_norm": 1.1448650360107422, + "learning_rate": 1.1318004771342677e-06, + "loss": 0.2751, + "step": 39369 + }, + { + "epoch": 0.7881290193428921, + "grad_norm": 1.3746436834335327, + "learning_rate": 1.1315950746409643e-06, + "loss": 0.2745, + "step": 39370 + }, + { + "epoch": 0.7881490378600206, + "grad_norm": 1.3074477910995483, + "learning_rate": 1.1313896884096026e-06, + "loss": 0.3012, + "step": 39371 + }, + { + "epoch": 0.7881690563771488, + "grad_norm": 1.0966774225234985, + "learning_rate": 1.131184318441045e-06, + "loss": 0.2848, + "step": 39372 + }, + { + "epoch": 0.7881890748942773, + "grad_norm": 1.0991649627685547, + "learning_rate": 1.130978964736157e-06, + "loss": 0.2725, + "step": 39373 + }, + { + "epoch": 0.7882090934114055, + "grad_norm": 1.1919777393341064, + "learning_rate": 1.1307736272957991e-06, + "loss": 0.3188, + "step": 39374 + }, + { + "epoch": 0.7882291119285338, + "grad_norm": 1.0250682830810547, + "learning_rate": 1.1305683061208384e-06, + "loss": 0.283, + "step": 39375 + }, + { + "epoch": 0.7882491304456622, + "grad_norm": 1.1823110580444336, + "learning_rate": 1.1303630012121353e-06, + "loss": 0.3283, + "step": 39376 + }, + { + "epoch": 0.7882691489627905, + "grad_norm": 1.0862630605697632, + "learning_rate": 1.130157712570552e-06, + "loss": 0.2827, + "step": 39377 + }, + { + "epoch": 0.788289167479919, + "grad_norm": 1.2251302003860474, + "learning_rate": 1.1299524401969543e-06, + "loss": 0.34, + "step": 39378 + }, + { + "epoch": 0.7883091859970472, + "grad_norm": 1.0482240915298462, + "learning_rate": 1.1297471840922036e-06, + "loss": 0.295, + "step": 39379 + }, + { + "epoch": 0.7883292045141757, + "grad_norm": 1.1202881336212158, + "learning_rate": 1.129541944257163e-06, + "loss": 0.2762, + "step": 39380 + }, + { + "epoch": 0.788349223031304, + "grad_norm": 1.0675252676010132, + "learning_rate": 1.1293367206926952e-06, + "loss": 0.2632, + "step": 39381 + }, + { + "epoch": 0.7883692415484324, + "grad_norm": 1.032902479171753, + "learning_rate": 1.1291315133996617e-06, + "loss": 0.3105, + "step": 39382 + }, + { + "epoch": 0.7883892600655606, + "grad_norm": 1.1253107786178589, + "learning_rate": 1.1289263223789277e-06, + "loss": 0.2999, + "step": 39383 + }, + { + "epoch": 0.7884092785826889, + "grad_norm": 1.0252636671066284, + "learning_rate": 1.1287211476313536e-06, + "loss": 0.2806, + "step": 39384 + }, + { + "epoch": 0.7884292970998173, + "grad_norm": 1.4281548261642456, + "learning_rate": 1.1285159891578018e-06, + "loss": 0.2842, + "step": 39385 + }, + { + "epoch": 0.7884493156169456, + "grad_norm": 1.0456398725509644, + "learning_rate": 1.1283108469591376e-06, + "loss": 0.2869, + "step": 39386 + }, + { + "epoch": 0.788469334134074, + "grad_norm": 1.8316811323165894, + "learning_rate": 1.1281057210362189e-06, + "loss": 0.71, + "step": 39387 + }, + { + "epoch": 0.7884893526512023, + "grad_norm": 1.2400015592575073, + "learning_rate": 1.1279006113899126e-06, + "loss": 0.3458, + "step": 39388 + }, + { + "epoch": 0.7885093711683308, + "grad_norm": 2.057053804397583, + "learning_rate": 1.1276955180210785e-06, + "loss": 0.7687, + "step": 39389 + }, + { + "epoch": 0.788529389685459, + "grad_norm": 1.0383756160736084, + "learning_rate": 1.1274904409305792e-06, + "loss": 0.2625, + "step": 39390 + }, + { + "epoch": 0.7885494082025873, + "grad_norm": 1.7825841903686523, + "learning_rate": 1.1272853801192757e-06, + "loss": 0.7277, + "step": 39391 + }, + { + "epoch": 0.7885694267197157, + "grad_norm": 1.0800362825393677, + "learning_rate": 1.1270803355880322e-06, + "loss": 0.2778, + "step": 39392 + }, + { + "epoch": 0.788589445236844, + "grad_norm": 1.0666567087173462, + "learning_rate": 1.1268753073377091e-06, + "loss": 0.2637, + "step": 39393 + }, + { + "epoch": 0.7886094637539725, + "grad_norm": 1.1742864847183228, + "learning_rate": 1.1266702953691694e-06, + "loss": 0.3286, + "step": 39394 + }, + { + "epoch": 0.7886294822711007, + "grad_norm": 1.1656842231750488, + "learning_rate": 1.1264652996832726e-06, + "loss": 0.3136, + "step": 39395 + }, + { + "epoch": 0.7886495007882292, + "grad_norm": 1.1745394468307495, + "learning_rate": 1.126260320280883e-06, + "loss": 0.3394, + "step": 39396 + }, + { + "epoch": 0.7886695193053574, + "grad_norm": 1.1514209508895874, + "learning_rate": 1.1260553571628618e-06, + "loss": 0.2904, + "step": 39397 + }, + { + "epoch": 0.7886895378224859, + "grad_norm": 1.90738046169281, + "learning_rate": 1.1258504103300688e-06, + "loss": 0.7341, + "step": 39398 + }, + { + "epoch": 0.7887095563396141, + "grad_norm": 0.9694016575813293, + "learning_rate": 1.1256454797833682e-06, + "loss": 0.2413, + "step": 39399 + }, + { + "epoch": 0.7887295748567424, + "grad_norm": 0.9825779795646667, + "learning_rate": 1.125440565523619e-06, + "loss": 0.2424, + "step": 39400 + }, + { + "epoch": 0.7887495933738708, + "grad_norm": 1.0971311330795288, + "learning_rate": 1.1252356675516852e-06, + "loss": 0.2548, + "step": 39401 + }, + { + "epoch": 0.7887696118909991, + "grad_norm": 1.1605768203735352, + "learning_rate": 1.1250307858684267e-06, + "loss": 0.2846, + "step": 39402 + }, + { + "epoch": 0.7887896304081276, + "grad_norm": 1.1208572387695312, + "learning_rate": 1.1248259204747052e-06, + "loss": 0.3133, + "step": 39403 + }, + { + "epoch": 0.7888096489252558, + "grad_norm": 1.900648593902588, + "learning_rate": 1.1246210713713796e-06, + "loss": 0.8502, + "step": 39404 + }, + { + "epoch": 0.7888296674423843, + "grad_norm": 1.0132758617401123, + "learning_rate": 1.124416238559315e-06, + "loss": 0.2419, + "step": 39405 + }, + { + "epoch": 0.7888496859595125, + "grad_norm": 1.0932199954986572, + "learning_rate": 1.1242114220393702e-06, + "loss": 0.2723, + "step": 39406 + }, + { + "epoch": 0.7888697044766408, + "grad_norm": 1.1196298599243164, + "learning_rate": 1.1240066218124068e-06, + "loss": 0.3198, + "step": 39407 + }, + { + "epoch": 0.7888897229937692, + "grad_norm": 1.1313868761062622, + "learning_rate": 1.1238018378792837e-06, + "loss": 0.2706, + "step": 39408 + }, + { + "epoch": 0.7889097415108975, + "grad_norm": 1.1915321350097656, + "learning_rate": 1.123597070240865e-06, + "loss": 0.2482, + "step": 39409 + }, + { + "epoch": 0.788929760028026, + "grad_norm": 1.085803747177124, + "learning_rate": 1.1233923188980101e-06, + "loss": 0.2893, + "step": 39410 + }, + { + "epoch": 0.7889497785451542, + "grad_norm": 1.1096248626708984, + "learning_rate": 1.1231875838515778e-06, + "loss": 0.3053, + "step": 39411 + }, + { + "epoch": 0.7889697970622827, + "grad_norm": 1.9505778551101685, + "learning_rate": 1.1229828651024326e-06, + "loss": 0.7115, + "step": 39412 + }, + { + "epoch": 0.788989815579411, + "grad_norm": 1.9742926359176636, + "learning_rate": 1.122778162651431e-06, + "loss": 0.7415, + "step": 39413 + }, + { + "epoch": 0.7890098340965394, + "grad_norm": 1.4125633239746094, + "learning_rate": 1.122573476499438e-06, + "loss": 0.3175, + "step": 39414 + }, + { + "epoch": 0.7890298526136676, + "grad_norm": 1.320374608039856, + "learning_rate": 1.122368806647311e-06, + "loss": 0.3326, + "step": 39415 + }, + { + "epoch": 0.7890498711307959, + "grad_norm": 2.0324201583862305, + "learning_rate": 1.122164153095911e-06, + "loss": 0.7752, + "step": 39416 + }, + { + "epoch": 0.7890698896479243, + "grad_norm": 1.012740135192871, + "learning_rate": 1.1219595158460972e-06, + "loss": 0.262, + "step": 39417 + }, + { + "epoch": 0.7890899081650526, + "grad_norm": 1.2086825370788574, + "learning_rate": 1.1217548948987323e-06, + "loss": 0.2998, + "step": 39418 + }, + { + "epoch": 0.789109926682181, + "grad_norm": 1.0734155178070068, + "learning_rate": 1.1215502902546754e-06, + "loss": 0.2565, + "step": 39419 + }, + { + "epoch": 0.7891299451993093, + "grad_norm": 1.1676181554794312, + "learning_rate": 1.121345701914786e-06, + "loss": 0.3152, + "step": 39420 + }, + { + "epoch": 0.7891499637164378, + "grad_norm": 1.1673316955566406, + "learning_rate": 1.121141129879925e-06, + "loss": 0.3168, + "step": 39421 + }, + { + "epoch": 0.789169982233566, + "grad_norm": 1.0944494009017944, + "learning_rate": 1.1209365741509508e-06, + "loss": 0.2882, + "step": 39422 + }, + { + "epoch": 0.7891900007506943, + "grad_norm": 1.1353514194488525, + "learning_rate": 1.1207320347287254e-06, + "loss": 0.3351, + "step": 39423 + }, + { + "epoch": 0.7892100192678227, + "grad_norm": 1.2151196002960205, + "learning_rate": 1.1205275116141062e-06, + "loss": 0.2833, + "step": 39424 + }, + { + "epoch": 0.789230037784951, + "grad_norm": 1.1215776205062866, + "learning_rate": 1.120323004807956e-06, + "loss": 0.3039, + "step": 39425 + }, + { + "epoch": 0.7892500563020795, + "grad_norm": 1.0116316080093384, + "learning_rate": 1.120118514311132e-06, + "loss": 0.2911, + "step": 39426 + }, + { + "epoch": 0.7892700748192077, + "grad_norm": 0.9856463670730591, + "learning_rate": 1.1199140401244956e-06, + "loss": 0.2662, + "step": 39427 + }, + { + "epoch": 0.7892900933363362, + "grad_norm": 1.2744829654693604, + "learning_rate": 1.1197095822489062e-06, + "loss": 0.2804, + "step": 39428 + }, + { + "epoch": 0.7893101118534644, + "grad_norm": 1.1136722564697266, + "learning_rate": 1.119505140685222e-06, + "loss": 0.2905, + "step": 39429 + }, + { + "epoch": 0.7893301303705929, + "grad_norm": 1.2315181493759155, + "learning_rate": 1.1193007154343032e-06, + "loss": 0.3318, + "step": 39430 + }, + { + "epoch": 0.7893501488877211, + "grad_norm": 1.054076910018921, + "learning_rate": 1.1190963064970084e-06, + "loss": 0.2758, + "step": 39431 + }, + { + "epoch": 0.7893701674048494, + "grad_norm": 1.120600700378418, + "learning_rate": 1.1188919138741983e-06, + "loss": 0.2726, + "step": 39432 + }, + { + "epoch": 0.7893901859219778, + "grad_norm": 1.2087745666503906, + "learning_rate": 1.1186875375667317e-06, + "loss": 0.3005, + "step": 39433 + }, + { + "epoch": 0.7894102044391061, + "grad_norm": 1.0330853462219238, + "learning_rate": 1.1184831775754668e-06, + "loss": 0.2839, + "step": 39434 + }, + { + "epoch": 0.7894302229562346, + "grad_norm": 1.0675163269042969, + "learning_rate": 1.1182788339012623e-06, + "loss": 0.2782, + "step": 39435 + }, + { + "epoch": 0.7894502414733628, + "grad_norm": 1.1374133825302124, + "learning_rate": 1.11807450654498e-06, + "loss": 0.269, + "step": 39436 + }, + { + "epoch": 0.7894702599904913, + "grad_norm": 1.167984962463379, + "learning_rate": 1.117870195507475e-06, + "loss": 0.2627, + "step": 39437 + }, + { + "epoch": 0.7894902785076195, + "grad_norm": 1.2367783784866333, + "learning_rate": 1.1176659007896101e-06, + "loss": 0.3284, + "step": 39438 + }, + { + "epoch": 0.7895102970247478, + "grad_norm": 1.1003884077072144, + "learning_rate": 1.1174616223922409e-06, + "loss": 0.254, + "step": 39439 + }, + { + "epoch": 0.7895303155418762, + "grad_norm": 1.2041853666305542, + "learning_rate": 1.1172573603162285e-06, + "loss": 0.3202, + "step": 39440 + }, + { + "epoch": 0.7895503340590045, + "grad_norm": 1.1731603145599365, + "learning_rate": 1.1170531145624308e-06, + "loss": 0.2805, + "step": 39441 + }, + { + "epoch": 0.789570352576133, + "grad_norm": 1.0344040393829346, + "learning_rate": 1.116848885131706e-06, + "loss": 0.2605, + "step": 39442 + }, + { + "epoch": 0.7895903710932612, + "grad_norm": 1.2976866960525513, + "learning_rate": 1.116644672024913e-06, + "loss": 0.3681, + "step": 39443 + }, + { + "epoch": 0.7896103896103897, + "grad_norm": 1.228820562362671, + "learning_rate": 1.1164404752429091e-06, + "loss": 0.2646, + "step": 39444 + }, + { + "epoch": 0.789630408127518, + "grad_norm": 1.1491154432296753, + "learning_rate": 1.1162362947865545e-06, + "loss": 0.306, + "step": 39445 + }, + { + "epoch": 0.7896504266446464, + "grad_norm": 1.1706600189208984, + "learning_rate": 1.1160321306567068e-06, + "loss": 0.2759, + "step": 39446 + }, + { + "epoch": 0.7896704451617746, + "grad_norm": 1.082485318183899, + "learning_rate": 1.1158279828542245e-06, + "loss": 0.2499, + "step": 39447 + }, + { + "epoch": 0.7896904636789029, + "grad_norm": 1.0924618244171143, + "learning_rate": 1.115623851379964e-06, + "loss": 0.2779, + "step": 39448 + }, + { + "epoch": 0.7897104821960313, + "grad_norm": 0.8892318606376648, + "learning_rate": 1.1154197362347863e-06, + "loss": 0.2465, + "step": 39449 + }, + { + "epoch": 0.7897305007131596, + "grad_norm": 1.091304898262024, + "learning_rate": 1.1152156374195466e-06, + "loss": 0.2661, + "step": 39450 + }, + { + "epoch": 0.789750519230288, + "grad_norm": 1.1706244945526123, + "learning_rate": 1.1150115549351054e-06, + "loss": 0.3047, + "step": 39451 + }, + { + "epoch": 0.7897705377474163, + "grad_norm": 1.1954922676086426, + "learning_rate": 1.1148074887823202e-06, + "loss": 0.32, + "step": 39452 + }, + { + "epoch": 0.7897905562645448, + "grad_norm": 1.1706832647323608, + "learning_rate": 1.1146034389620463e-06, + "loss": 0.3118, + "step": 39453 + }, + { + "epoch": 0.789810574781673, + "grad_norm": 1.1890356540679932, + "learning_rate": 1.114399405475145e-06, + "loss": 0.2893, + "step": 39454 + }, + { + "epoch": 0.7898305932988013, + "grad_norm": 1.1037445068359375, + "learning_rate": 1.114195388322472e-06, + "loss": 0.2616, + "step": 39455 + }, + { + "epoch": 0.7898506118159297, + "grad_norm": 1.06367027759552, + "learning_rate": 1.1139913875048857e-06, + "loss": 0.3134, + "step": 39456 + }, + { + "epoch": 0.789870630333058, + "grad_norm": 1.9169385433197021, + "learning_rate": 1.1137874030232421e-06, + "loss": 0.734, + "step": 39457 + }, + { + "epoch": 0.7898906488501864, + "grad_norm": 1.1299190521240234, + "learning_rate": 1.1135834348784014e-06, + "loss": 0.3031, + "step": 39458 + }, + { + "epoch": 0.7899106673673147, + "grad_norm": 1.0848969221115112, + "learning_rate": 1.1133794830712192e-06, + "loss": 0.281, + "step": 39459 + }, + { + "epoch": 0.7899306858844432, + "grad_norm": 1.0979052782058716, + "learning_rate": 1.1131755476025535e-06, + "loss": 0.2506, + "step": 39460 + }, + { + "epoch": 0.7899507044015714, + "grad_norm": 1.862039566040039, + "learning_rate": 1.11297162847326e-06, + "loss": 0.7552, + "step": 39461 + }, + { + "epoch": 0.7899707229186999, + "grad_norm": 1.1307008266448975, + "learning_rate": 1.1127677256841984e-06, + "loss": 0.2809, + "step": 39462 + }, + { + "epoch": 0.7899907414358281, + "grad_norm": 1.1766916513442993, + "learning_rate": 1.1125638392362237e-06, + "loss": 0.3435, + "step": 39463 + }, + { + "epoch": 0.7900107599529564, + "grad_norm": 1.0456827878952026, + "learning_rate": 1.112359969130195e-06, + "loss": 0.3352, + "step": 39464 + }, + { + "epoch": 0.7900307784700848, + "grad_norm": 1.235783338546753, + "learning_rate": 1.1121561153669686e-06, + "loss": 0.3449, + "step": 39465 + }, + { + "epoch": 0.7900507969872131, + "grad_norm": 1.0609647035598755, + "learning_rate": 1.1119522779473995e-06, + "loss": 0.291, + "step": 39466 + }, + { + "epoch": 0.7900708155043416, + "grad_norm": 1.068729043006897, + "learning_rate": 1.1117484568723485e-06, + "loss": 0.3247, + "step": 39467 + }, + { + "epoch": 0.7900908340214698, + "grad_norm": 1.13889479637146, + "learning_rate": 1.1115446521426692e-06, + "loss": 0.3044, + "step": 39468 + }, + { + "epoch": 0.7901108525385983, + "grad_norm": 1.0119061470031738, + "learning_rate": 1.1113408637592204e-06, + "loss": 0.2721, + "step": 39469 + }, + { + "epoch": 0.7901308710557265, + "grad_norm": 1.1443476676940918, + "learning_rate": 1.1111370917228558e-06, + "loss": 0.2751, + "step": 39470 + }, + { + "epoch": 0.7901508895728548, + "grad_norm": 1.2631274461746216, + "learning_rate": 1.1109333360344355e-06, + "loss": 0.2982, + "step": 39471 + }, + { + "epoch": 0.7901709080899832, + "grad_norm": 1.1977282762527466, + "learning_rate": 1.1107295966948146e-06, + "loss": 0.2872, + "step": 39472 + }, + { + "epoch": 0.7901909266071115, + "grad_norm": 1.1916826963424683, + "learning_rate": 1.110525873704849e-06, + "loss": 0.312, + "step": 39473 + }, + { + "epoch": 0.79021094512424, + "grad_norm": 1.1277316808700562, + "learning_rate": 1.1103221670653958e-06, + "loss": 0.2763, + "step": 39474 + }, + { + "epoch": 0.7902309636413682, + "grad_norm": 1.1414376497268677, + "learning_rate": 1.11011847677731e-06, + "loss": 0.2779, + "step": 39475 + }, + { + "epoch": 0.7902509821584967, + "grad_norm": 1.158483624458313, + "learning_rate": 1.1099148028414492e-06, + "loss": 0.2504, + "step": 39476 + }, + { + "epoch": 0.790271000675625, + "grad_norm": 1.2389568090438843, + "learning_rate": 1.1097111452586701e-06, + "loss": 0.2745, + "step": 39477 + }, + { + "epoch": 0.7902910191927534, + "grad_norm": 1.1579244136810303, + "learning_rate": 1.1095075040298287e-06, + "loss": 0.2404, + "step": 39478 + }, + { + "epoch": 0.7903110377098816, + "grad_norm": 1.0841165781021118, + "learning_rate": 1.1093038791557792e-06, + "loss": 0.2878, + "step": 39479 + }, + { + "epoch": 0.7903310562270099, + "grad_norm": 1.8889365196228027, + "learning_rate": 1.1091002706373798e-06, + "loss": 0.7159, + "step": 39480 + }, + { + "epoch": 0.7903510747441383, + "grad_norm": 1.042412519454956, + "learning_rate": 1.1088966784754857e-06, + "loss": 0.289, + "step": 39481 + }, + { + "epoch": 0.7903710932612666, + "grad_norm": 1.180662751197815, + "learning_rate": 1.108693102670953e-06, + "loss": 0.2874, + "step": 39482 + }, + { + "epoch": 0.790391111778395, + "grad_norm": 1.2021726369857788, + "learning_rate": 1.1084895432246367e-06, + "loss": 0.2746, + "step": 39483 + }, + { + "epoch": 0.7904111302955233, + "grad_norm": 1.0872758626937866, + "learning_rate": 1.1082860001373918e-06, + "loss": 0.3046, + "step": 39484 + }, + { + "epoch": 0.7904311488126518, + "grad_norm": 1.7914905548095703, + "learning_rate": 1.1080824734100758e-06, + "loss": 0.6987, + "step": 39485 + }, + { + "epoch": 0.79045116732978, + "grad_norm": 1.334803581237793, + "learning_rate": 1.1078789630435443e-06, + "loss": 0.3207, + "step": 39486 + }, + { + "epoch": 0.7904711858469083, + "grad_norm": 1.0633233785629272, + "learning_rate": 1.1076754690386514e-06, + "loss": 0.2964, + "step": 39487 + }, + { + "epoch": 0.7904912043640367, + "grad_norm": 1.1104555130004883, + "learning_rate": 1.1074719913962523e-06, + "loss": 0.2907, + "step": 39488 + }, + { + "epoch": 0.790511222881165, + "grad_norm": 0.9663638472557068, + "learning_rate": 1.107268530117203e-06, + "loss": 0.2508, + "step": 39489 + }, + { + "epoch": 0.7905312413982934, + "grad_norm": 1.160093903541565, + "learning_rate": 1.1070650852023607e-06, + "loss": 0.324, + "step": 39490 + }, + { + "epoch": 0.7905512599154217, + "grad_norm": 0.9856176972389221, + "learning_rate": 1.1068616566525785e-06, + "loss": 0.2748, + "step": 39491 + }, + { + "epoch": 0.7905712784325502, + "grad_norm": 1.1244606971740723, + "learning_rate": 1.1066582444687113e-06, + "loss": 0.2967, + "step": 39492 + }, + { + "epoch": 0.7905912969496784, + "grad_norm": 1.1156402826309204, + "learning_rate": 1.1064548486516157e-06, + "loss": 0.3049, + "step": 39493 + }, + { + "epoch": 0.7906113154668067, + "grad_norm": 1.1206769943237305, + "learning_rate": 1.106251469202147e-06, + "loss": 0.3207, + "step": 39494 + }, + { + "epoch": 0.7906313339839351, + "grad_norm": 1.1734126806259155, + "learning_rate": 1.106048106121158e-06, + "loss": 0.3125, + "step": 39495 + }, + { + "epoch": 0.7906513525010634, + "grad_norm": 1.0204730033874512, + "learning_rate": 1.1058447594095056e-06, + "loss": 0.2579, + "step": 39496 + }, + { + "epoch": 0.7906713710181918, + "grad_norm": 1.0127084255218506, + "learning_rate": 1.1056414290680423e-06, + "loss": 0.2349, + "step": 39497 + }, + { + "epoch": 0.7906913895353201, + "grad_norm": 1.17189359664917, + "learning_rate": 1.1054381150976258e-06, + "loss": 0.3, + "step": 39498 + }, + { + "epoch": 0.7907114080524485, + "grad_norm": 1.2311772108078003, + "learning_rate": 1.105234817499109e-06, + "loss": 0.3046, + "step": 39499 + }, + { + "epoch": 0.7907314265695768, + "grad_norm": 1.1506074666976929, + "learning_rate": 1.1050315362733471e-06, + "loss": 0.2729, + "step": 39500 + }, + { + "epoch": 0.7907514450867053, + "grad_norm": 1.1058666706085205, + "learning_rate": 1.1048282714211928e-06, + "loss": 0.2918, + "step": 39501 + }, + { + "epoch": 0.7907714636038335, + "grad_norm": 0.98614102602005, + "learning_rate": 1.1046250229435023e-06, + "loss": 0.2588, + "step": 39502 + }, + { + "epoch": 0.7907914821209618, + "grad_norm": 1.8500194549560547, + "learning_rate": 1.1044217908411315e-06, + "loss": 0.8017, + "step": 39503 + }, + { + "epoch": 0.7908115006380902, + "grad_norm": 1.883471131324768, + "learning_rate": 1.1042185751149331e-06, + "loss": 0.7023, + "step": 39504 + }, + { + "epoch": 0.7908315191552185, + "grad_norm": 2.1547353267669678, + "learning_rate": 1.1040153757657613e-06, + "loss": 0.7955, + "step": 39505 + }, + { + "epoch": 0.790851537672347, + "grad_norm": 1.2829865217208862, + "learning_rate": 1.103812192794469e-06, + "loss": 0.3237, + "step": 39506 + }, + { + "epoch": 0.7908715561894752, + "grad_norm": 1.0652142763137817, + "learning_rate": 1.1036090262019134e-06, + "loss": 0.2555, + "step": 39507 + }, + { + "epoch": 0.7908915747066037, + "grad_norm": 1.0465885400772095, + "learning_rate": 1.1034058759889465e-06, + "loss": 0.2846, + "step": 39508 + }, + { + "epoch": 0.790911593223732, + "grad_norm": 1.1922752857208252, + "learning_rate": 1.1032027421564233e-06, + "loss": 0.3051, + "step": 39509 + }, + { + "epoch": 0.7909316117408602, + "grad_norm": 1.1049072742462158, + "learning_rate": 1.1029996247051956e-06, + "loss": 0.2882, + "step": 39510 + }, + { + "epoch": 0.7909516302579886, + "grad_norm": 1.2133961915969849, + "learning_rate": 1.1027965236361199e-06, + "loss": 0.3021, + "step": 39511 + }, + { + "epoch": 0.7909716487751169, + "grad_norm": 1.0133906602859497, + "learning_rate": 1.1025934389500493e-06, + "loss": 0.2665, + "step": 39512 + }, + { + "epoch": 0.7909916672922453, + "grad_norm": 1.4746729135513306, + "learning_rate": 1.1023903706478373e-06, + "loss": 0.3136, + "step": 39513 + }, + { + "epoch": 0.7910116858093736, + "grad_norm": 1.1225073337554932, + "learning_rate": 1.1021873187303355e-06, + "loss": 0.327, + "step": 39514 + }, + { + "epoch": 0.791031704326502, + "grad_norm": 2.1553244590759277, + "learning_rate": 1.1019842831983995e-06, + "loss": 0.796, + "step": 39515 + }, + { + "epoch": 0.7910517228436303, + "grad_norm": 1.2393001317977905, + "learning_rate": 1.1017812640528847e-06, + "loss": 0.2884, + "step": 39516 + }, + { + "epoch": 0.7910717413607588, + "grad_norm": 1.0390069484710693, + "learning_rate": 1.1015782612946419e-06, + "loss": 0.2731, + "step": 39517 + }, + { + "epoch": 0.791091759877887, + "grad_norm": 1.033122181892395, + "learning_rate": 1.1013752749245255e-06, + "loss": 0.3315, + "step": 39518 + }, + { + "epoch": 0.7911117783950153, + "grad_norm": 1.1660127639770508, + "learning_rate": 1.1011723049433864e-06, + "loss": 0.284, + "step": 39519 + }, + { + "epoch": 0.7911317969121437, + "grad_norm": 1.1071221828460693, + "learning_rate": 1.100969351352082e-06, + "loss": 0.2856, + "step": 39520 + }, + { + "epoch": 0.791151815429272, + "grad_norm": 1.0451241731643677, + "learning_rate": 1.1007664141514629e-06, + "loss": 0.3017, + "step": 39521 + }, + { + "epoch": 0.7911718339464004, + "grad_norm": 1.152814507484436, + "learning_rate": 1.1005634933423831e-06, + "loss": 0.2693, + "step": 39522 + }, + { + "epoch": 0.7911918524635287, + "grad_norm": 1.1120517253875732, + "learning_rate": 1.1003605889256936e-06, + "loss": 0.3096, + "step": 39523 + }, + { + "epoch": 0.7912118709806572, + "grad_norm": 1.1432491540908813, + "learning_rate": 1.1001577009022508e-06, + "loss": 0.316, + "step": 39524 + }, + { + "epoch": 0.7912318894977854, + "grad_norm": 1.26194167137146, + "learning_rate": 1.0999548292729051e-06, + "loss": 0.2702, + "step": 39525 + }, + { + "epoch": 0.7912519080149137, + "grad_norm": 1.1985071897506714, + "learning_rate": 1.0997519740385105e-06, + "loss": 0.3063, + "step": 39526 + }, + { + "epoch": 0.7912719265320421, + "grad_norm": 1.368471384048462, + "learning_rate": 1.0995491351999172e-06, + "loss": 0.2727, + "step": 39527 + }, + { + "epoch": 0.7912919450491704, + "grad_norm": 1.2213960886001587, + "learning_rate": 1.0993463127579806e-06, + "loss": 0.3482, + "step": 39528 + }, + { + "epoch": 0.7913119635662988, + "grad_norm": 1.1855453252792358, + "learning_rate": 1.0991435067135537e-06, + "loss": 0.309, + "step": 39529 + }, + { + "epoch": 0.7913319820834271, + "grad_norm": 1.2147681713104248, + "learning_rate": 1.0989407170674882e-06, + "loss": 0.3307, + "step": 39530 + }, + { + "epoch": 0.7913520006005555, + "grad_norm": 1.149747610092163, + "learning_rate": 1.0987379438206363e-06, + "loss": 0.2932, + "step": 39531 + }, + { + "epoch": 0.7913720191176838, + "grad_norm": 1.0145705938339233, + "learning_rate": 1.0985351869738491e-06, + "loss": 0.2853, + "step": 39532 + }, + { + "epoch": 0.7913920376348123, + "grad_norm": 1.0653181076049805, + "learning_rate": 1.098332446527982e-06, + "loss": 0.284, + "step": 39533 + }, + { + "epoch": 0.7914120561519405, + "grad_norm": 1.8668546676635742, + "learning_rate": 1.0981297224838855e-06, + "loss": 0.7363, + "step": 39534 + }, + { + "epoch": 0.7914320746690688, + "grad_norm": 1.161747694015503, + "learning_rate": 1.097927014842412e-06, + "loss": 0.3208, + "step": 39535 + }, + { + "epoch": 0.7914520931861972, + "grad_norm": 1.7873954772949219, + "learning_rate": 1.0977243236044138e-06, + "loss": 0.6946, + "step": 39536 + }, + { + "epoch": 0.7914721117033255, + "grad_norm": 1.209777593612671, + "learning_rate": 1.0975216487707407e-06, + "loss": 0.2931, + "step": 39537 + }, + { + "epoch": 0.791492130220454, + "grad_norm": 1.2124179601669312, + "learning_rate": 1.0973189903422487e-06, + "loss": 0.3629, + "step": 39538 + }, + { + "epoch": 0.7915121487375822, + "grad_norm": 1.132439374923706, + "learning_rate": 1.0971163483197878e-06, + "loss": 0.3639, + "step": 39539 + }, + { + "epoch": 0.7915321672547107, + "grad_norm": 1.067703127861023, + "learning_rate": 1.0969137227042086e-06, + "loss": 0.2975, + "step": 39540 + }, + { + "epoch": 0.791552185771839, + "grad_norm": 1.1342498064041138, + "learning_rate": 1.0967111134963636e-06, + "loss": 0.2727, + "step": 39541 + }, + { + "epoch": 0.7915722042889672, + "grad_norm": 1.0958541631698608, + "learning_rate": 1.096508520697107e-06, + "loss": 0.3099, + "step": 39542 + }, + { + "epoch": 0.7915922228060956, + "grad_norm": 1.1468695402145386, + "learning_rate": 1.0963059443072883e-06, + "loss": 0.2949, + "step": 39543 + }, + { + "epoch": 0.7916122413232239, + "grad_norm": 1.4316837787628174, + "learning_rate": 1.0961033843277592e-06, + "loss": 0.2985, + "step": 39544 + }, + { + "epoch": 0.7916322598403523, + "grad_norm": 1.2359181642532349, + "learning_rate": 1.0959008407593719e-06, + "loss": 0.3115, + "step": 39545 + }, + { + "epoch": 0.7916522783574806, + "grad_norm": 1.213343620300293, + "learning_rate": 1.0956983136029753e-06, + "loss": 0.3156, + "step": 39546 + }, + { + "epoch": 0.791672296874609, + "grad_norm": 1.1573009490966797, + "learning_rate": 1.0954958028594244e-06, + "loss": 0.3187, + "step": 39547 + }, + { + "epoch": 0.7916923153917373, + "grad_norm": 1.2419626712799072, + "learning_rate": 1.0952933085295686e-06, + "loss": 0.2656, + "step": 39548 + }, + { + "epoch": 0.7917123339088658, + "grad_norm": 1.2128584384918213, + "learning_rate": 1.09509083061426e-06, + "loss": 0.3224, + "step": 39549 + }, + { + "epoch": 0.791732352425994, + "grad_norm": 1.2306486368179321, + "learning_rate": 1.0948883691143475e-06, + "loss": 0.3094, + "step": 39550 + }, + { + "epoch": 0.7917523709431223, + "grad_norm": 1.2052972316741943, + "learning_rate": 1.094685924030685e-06, + "loss": 0.3176, + "step": 39551 + }, + { + "epoch": 0.7917723894602507, + "grad_norm": 1.1785104274749756, + "learning_rate": 1.0944834953641232e-06, + "loss": 0.267, + "step": 39552 + }, + { + "epoch": 0.791792407977379, + "grad_norm": 1.1258407831192017, + "learning_rate": 1.0942810831155105e-06, + "loss": 0.3112, + "step": 39553 + }, + { + "epoch": 0.7918124264945074, + "grad_norm": 1.142879843711853, + "learning_rate": 1.094078687285699e-06, + "loss": 0.2931, + "step": 39554 + }, + { + "epoch": 0.7918324450116357, + "grad_norm": 1.1636285781860352, + "learning_rate": 1.0938763078755421e-06, + "loss": 0.3068, + "step": 39555 + }, + { + "epoch": 0.7918524635287641, + "grad_norm": 1.8751111030578613, + "learning_rate": 1.0936739448858886e-06, + "loss": 0.8276, + "step": 39556 + }, + { + "epoch": 0.7918724820458924, + "grad_norm": 1.1763842105865479, + "learning_rate": 1.0934715983175892e-06, + "loss": 0.2777, + "step": 39557 + }, + { + "epoch": 0.7918925005630207, + "grad_norm": 1.1709613800048828, + "learning_rate": 1.0932692681714946e-06, + "loss": 0.2854, + "step": 39558 + }, + { + "epoch": 0.7919125190801491, + "grad_norm": 1.1859796047210693, + "learning_rate": 1.0930669544484535e-06, + "loss": 0.38, + "step": 39559 + }, + { + "epoch": 0.7919325375972774, + "grad_norm": 1.3617104291915894, + "learning_rate": 1.09286465714932e-06, + "loss": 0.2689, + "step": 39560 + }, + { + "epoch": 0.7919525561144058, + "grad_norm": 1.1618026494979858, + "learning_rate": 1.0926623762749423e-06, + "loss": 0.3278, + "step": 39561 + }, + { + "epoch": 0.7919725746315341, + "grad_norm": 1.0671206712722778, + "learning_rate": 1.0924601118261713e-06, + "loss": 0.2841, + "step": 39562 + }, + { + "epoch": 0.7919925931486625, + "grad_norm": 1.1698887348175049, + "learning_rate": 1.0922578638038555e-06, + "loss": 0.3279, + "step": 39563 + }, + { + "epoch": 0.7920126116657908, + "grad_norm": 0.9667761921882629, + "learning_rate": 1.0920556322088483e-06, + "loss": 0.2802, + "step": 39564 + }, + { + "epoch": 0.7920326301829193, + "grad_norm": 0.9521249532699585, + "learning_rate": 1.0918534170419982e-06, + "loss": 0.2902, + "step": 39565 + }, + { + "epoch": 0.7920526487000475, + "grad_norm": 1.3095749616622925, + "learning_rate": 1.0916512183041538e-06, + "loss": 0.2958, + "step": 39566 + }, + { + "epoch": 0.7920726672171758, + "grad_norm": 1.0489780902862549, + "learning_rate": 1.0914490359961683e-06, + "loss": 0.2934, + "step": 39567 + }, + { + "epoch": 0.7920926857343042, + "grad_norm": 1.1919642686843872, + "learning_rate": 1.0912468701188876e-06, + "loss": 0.3045, + "step": 39568 + }, + { + "epoch": 0.7921127042514325, + "grad_norm": 1.160111904144287, + "learning_rate": 1.0910447206731661e-06, + "loss": 0.3211, + "step": 39569 + }, + { + "epoch": 0.792132722768561, + "grad_norm": 1.124652624130249, + "learning_rate": 1.0908425876598512e-06, + "loss": 0.3249, + "step": 39570 + }, + { + "epoch": 0.7921527412856892, + "grad_norm": 1.195119857788086, + "learning_rate": 1.090640471079793e-06, + "loss": 0.3036, + "step": 39571 + }, + { + "epoch": 0.7921727598028176, + "grad_norm": 1.0107706785202026, + "learning_rate": 1.0904383709338395e-06, + "loss": 0.2694, + "step": 39572 + }, + { + "epoch": 0.792192778319946, + "grad_norm": 1.174052119255066, + "learning_rate": 1.0902362872228427e-06, + "loss": 0.3377, + "step": 39573 + }, + { + "epoch": 0.7922127968370742, + "grad_norm": 1.1153154373168945, + "learning_rate": 1.0900342199476517e-06, + "loss": 0.2695, + "step": 39574 + }, + { + "epoch": 0.7922328153542026, + "grad_norm": 0.9835078120231628, + "learning_rate": 1.0898321691091153e-06, + "loss": 0.2945, + "step": 39575 + }, + { + "epoch": 0.7922528338713309, + "grad_norm": 1.1687785387039185, + "learning_rate": 1.0896301347080822e-06, + "loss": 0.2923, + "step": 39576 + }, + { + "epoch": 0.7922728523884593, + "grad_norm": 1.1274199485778809, + "learning_rate": 1.0894281167454017e-06, + "loss": 0.2816, + "step": 39577 + }, + { + "epoch": 0.7922928709055876, + "grad_norm": 1.151161789894104, + "learning_rate": 1.0892261152219259e-06, + "loss": 0.3093, + "step": 39578 + }, + { + "epoch": 0.792312889422716, + "grad_norm": 1.186120867729187, + "learning_rate": 1.0890241301384996e-06, + "loss": 0.3449, + "step": 39579 + }, + { + "epoch": 0.7923329079398443, + "grad_norm": 1.8511528968811035, + "learning_rate": 1.088822161495976e-06, + "loss": 0.7303, + "step": 39580 + }, + { + "epoch": 0.7923529264569728, + "grad_norm": 1.205983281135559, + "learning_rate": 1.088620209295201e-06, + "loss": 0.2882, + "step": 39581 + }, + { + "epoch": 0.792372944974101, + "grad_norm": 1.138675332069397, + "learning_rate": 1.0884182735370258e-06, + "loss": 0.2739, + "step": 39582 + }, + { + "epoch": 0.7923929634912293, + "grad_norm": 1.1282958984375, + "learning_rate": 1.0882163542222983e-06, + "loss": 0.2707, + "step": 39583 + }, + { + "epoch": 0.7924129820083577, + "grad_norm": 1.1145274639129639, + "learning_rate": 1.088014451351868e-06, + "loss": 0.2984, + "step": 39584 + }, + { + "epoch": 0.792433000525486, + "grad_norm": 1.186307430267334, + "learning_rate": 1.0878125649265819e-06, + "loss": 0.2778, + "step": 39585 + }, + { + "epoch": 0.7924530190426144, + "grad_norm": 1.1487832069396973, + "learning_rate": 1.087610694947291e-06, + "loss": 0.3122, + "step": 39586 + }, + { + "epoch": 0.7924730375597427, + "grad_norm": 1.1357146501541138, + "learning_rate": 1.0874088414148426e-06, + "loss": 0.2969, + "step": 39587 + }, + { + "epoch": 0.7924930560768711, + "grad_norm": 1.0071049928665161, + "learning_rate": 1.0872070043300853e-06, + "loss": 0.2338, + "step": 39588 + }, + { + "epoch": 0.7925130745939994, + "grad_norm": 1.0389821529388428, + "learning_rate": 1.0870051836938678e-06, + "loss": 0.3133, + "step": 39589 + }, + { + "epoch": 0.7925330931111277, + "grad_norm": 1.2106361389160156, + "learning_rate": 1.086803379507037e-06, + "loss": 0.3121, + "step": 39590 + }, + { + "epoch": 0.7925531116282561, + "grad_norm": 1.199819803237915, + "learning_rate": 1.0866015917704443e-06, + "loss": 0.3157, + "step": 39591 + }, + { + "epoch": 0.7925731301453844, + "grad_norm": 1.1319078207015991, + "learning_rate": 1.0863998204849347e-06, + "loss": 0.3194, + "step": 39592 + }, + { + "epoch": 0.7925931486625128, + "grad_norm": 1.4211113452911377, + "learning_rate": 1.0861980656513598e-06, + "loss": 0.3169, + "step": 39593 + }, + { + "epoch": 0.7926131671796411, + "grad_norm": 1.1230318546295166, + "learning_rate": 1.0859963272705643e-06, + "loss": 0.3123, + "step": 39594 + }, + { + "epoch": 0.7926331856967695, + "grad_norm": 1.0307190418243408, + "learning_rate": 1.0857946053433993e-06, + "loss": 0.2744, + "step": 39595 + }, + { + "epoch": 0.7926532042138978, + "grad_norm": 2.0765039920806885, + "learning_rate": 1.0855928998707115e-06, + "loss": 0.7589, + "step": 39596 + }, + { + "epoch": 0.7926732227310263, + "grad_norm": 1.0085666179656982, + "learning_rate": 1.0853912108533487e-06, + "loss": 0.2618, + "step": 39597 + }, + { + "epoch": 0.7926932412481545, + "grad_norm": 1.085078477859497, + "learning_rate": 1.085189538292159e-06, + "loss": 0.3046, + "step": 39598 + }, + { + "epoch": 0.7927132597652828, + "grad_norm": 1.143752098083496, + "learning_rate": 1.0849878821879882e-06, + "loss": 0.3317, + "step": 39599 + }, + { + "epoch": 0.7927332782824112, + "grad_norm": 1.0775187015533447, + "learning_rate": 1.0847862425416878e-06, + "loss": 0.2912, + "step": 39600 + }, + { + "epoch": 0.7927532967995395, + "grad_norm": 1.1563020944595337, + "learning_rate": 1.0845846193541031e-06, + "loss": 0.2858, + "step": 39601 + }, + { + "epoch": 0.792773315316668, + "grad_norm": 1.1869324445724487, + "learning_rate": 1.0843830126260824e-06, + "loss": 0.3099, + "step": 39602 + }, + { + "epoch": 0.7927933338337962, + "grad_norm": 1.087066411972046, + "learning_rate": 1.0841814223584708e-06, + "loss": 0.2972, + "step": 39603 + }, + { + "epoch": 0.7928133523509246, + "grad_norm": 1.082373023033142, + "learning_rate": 1.0839798485521197e-06, + "loss": 0.2979, + "step": 39604 + }, + { + "epoch": 0.792833370868053, + "grad_norm": 1.1225529909133911, + "learning_rate": 1.0837782912078731e-06, + "loss": 0.3072, + "step": 39605 + }, + { + "epoch": 0.7928533893851812, + "grad_norm": 1.2330501079559326, + "learning_rate": 1.083576750326581e-06, + "loss": 0.2703, + "step": 39606 + }, + { + "epoch": 0.7928734079023096, + "grad_norm": 1.0137908458709717, + "learning_rate": 1.0833752259090886e-06, + "loss": 0.2534, + "step": 39607 + }, + { + "epoch": 0.7928934264194379, + "grad_norm": 1.3175047636032104, + "learning_rate": 1.0831737179562446e-06, + "loss": 0.3087, + "step": 39608 + }, + { + "epoch": 0.7929134449365663, + "grad_norm": 1.3394562005996704, + "learning_rate": 1.0829722264688957e-06, + "loss": 0.34, + "step": 39609 + }, + { + "epoch": 0.7929334634536946, + "grad_norm": 1.1332762241363525, + "learning_rate": 1.0827707514478885e-06, + "loss": 0.3048, + "step": 39610 + }, + { + "epoch": 0.792953481970823, + "grad_norm": 1.1149780750274658, + "learning_rate": 1.0825692928940702e-06, + "loss": 0.2529, + "step": 39611 + }, + { + "epoch": 0.7929735004879513, + "grad_norm": 1.0425105094909668, + "learning_rate": 1.0823678508082857e-06, + "loss": 0.297, + "step": 39612 + }, + { + "epoch": 0.7929935190050797, + "grad_norm": 1.1264528036117554, + "learning_rate": 1.082166425191386e-06, + "loss": 0.2607, + "step": 39613 + }, + { + "epoch": 0.793013537522208, + "grad_norm": 1.1561535596847534, + "learning_rate": 1.0819650160442146e-06, + "loss": 0.2827, + "step": 39614 + }, + { + "epoch": 0.7930335560393363, + "grad_norm": 1.3542109727859497, + "learning_rate": 1.0817636233676193e-06, + "loss": 0.332, + "step": 39615 + }, + { + "epoch": 0.7930535745564647, + "grad_norm": 1.106941819190979, + "learning_rate": 1.0815622471624454e-06, + "loss": 0.2513, + "step": 39616 + }, + { + "epoch": 0.793073593073593, + "grad_norm": 1.0298457145690918, + "learning_rate": 1.0813608874295423e-06, + "loss": 0.2845, + "step": 39617 + }, + { + "epoch": 0.7930936115907214, + "grad_norm": 1.211283802986145, + "learning_rate": 1.0811595441697525e-06, + "loss": 0.2904, + "step": 39618 + }, + { + "epoch": 0.7931136301078497, + "grad_norm": 1.0989829301834106, + "learning_rate": 1.0809582173839266e-06, + "loss": 0.2975, + "step": 39619 + }, + { + "epoch": 0.7931336486249781, + "grad_norm": 1.7921065092086792, + "learning_rate": 1.0807569070729085e-06, + "loss": 0.7682, + "step": 39620 + }, + { + "epoch": 0.7931536671421064, + "grad_norm": 1.0819278955459595, + "learning_rate": 1.080555613237544e-06, + "loss": 0.2917, + "step": 39621 + }, + { + "epoch": 0.7931736856592347, + "grad_norm": 1.2426056861877441, + "learning_rate": 1.0803543358786812e-06, + "loss": 0.2866, + "step": 39622 + }, + { + "epoch": 0.7931937041763631, + "grad_norm": 1.0856434106826782, + "learning_rate": 1.0801530749971656e-06, + "loss": 0.3015, + "step": 39623 + }, + { + "epoch": 0.7932137226934914, + "grad_norm": 1.0793050527572632, + "learning_rate": 1.0799518305938427e-06, + "loss": 0.2768, + "step": 39624 + }, + { + "epoch": 0.7932337412106198, + "grad_norm": 1.1917150020599365, + "learning_rate": 1.0797506026695575e-06, + "loss": 0.3133, + "step": 39625 + }, + { + "epoch": 0.7932537597277481, + "grad_norm": 1.0662872791290283, + "learning_rate": 1.0795493912251586e-06, + "loss": 0.2859, + "step": 39626 + }, + { + "epoch": 0.7932737782448765, + "grad_norm": 1.8680777549743652, + "learning_rate": 1.07934819626149e-06, + "loss": 0.8114, + "step": 39627 + }, + { + "epoch": 0.7932937967620048, + "grad_norm": 1.0189464092254639, + "learning_rate": 1.0791470177793977e-06, + "loss": 0.28, + "step": 39628 + }, + { + "epoch": 0.7933138152791332, + "grad_norm": 2.056243658065796, + "learning_rate": 1.0789458557797266e-06, + "loss": 0.7933, + "step": 39629 + }, + { + "epoch": 0.7933338337962615, + "grad_norm": 1.2123008966445923, + "learning_rate": 1.0787447102633247e-06, + "loss": 0.3243, + "step": 39630 + }, + { + "epoch": 0.7933538523133898, + "grad_norm": 1.0237009525299072, + "learning_rate": 1.0785435812310347e-06, + "loss": 0.3199, + "step": 39631 + }, + { + "epoch": 0.7933738708305182, + "grad_norm": 1.0952085256576538, + "learning_rate": 1.078342468683705e-06, + "loss": 0.3041, + "step": 39632 + }, + { + "epoch": 0.7933938893476465, + "grad_norm": 1.0895179510116577, + "learning_rate": 1.0781413726221796e-06, + "loss": 0.2719, + "step": 39633 + }, + { + "epoch": 0.793413907864775, + "grad_norm": 1.0901527404785156, + "learning_rate": 1.0779402930473026e-06, + "loss": 0.3121, + "step": 39634 + }, + { + "epoch": 0.7934339263819032, + "grad_norm": 1.191066861152649, + "learning_rate": 1.0777392299599214e-06, + "loss": 0.2675, + "step": 39635 + }, + { + "epoch": 0.7934539448990316, + "grad_norm": 1.0341153144836426, + "learning_rate": 1.0775381833608811e-06, + "loss": 0.3012, + "step": 39636 + }, + { + "epoch": 0.7934739634161599, + "grad_norm": 1.2725940942764282, + "learning_rate": 1.0773371532510257e-06, + "loss": 0.3077, + "step": 39637 + }, + { + "epoch": 0.7934939819332882, + "grad_norm": 1.1051534414291382, + "learning_rate": 1.0771361396311996e-06, + "loss": 0.2717, + "step": 39638 + }, + { + "epoch": 0.7935140004504166, + "grad_norm": 1.8737565279006958, + "learning_rate": 1.07693514250225e-06, + "loss": 0.7883, + "step": 39639 + }, + { + "epoch": 0.7935340189675449, + "grad_norm": 1.103576898574829, + "learning_rate": 1.0767341618650213e-06, + "loss": 0.2986, + "step": 39640 + }, + { + "epoch": 0.7935540374846733, + "grad_norm": 1.940037488937378, + "learning_rate": 1.0765331977203575e-06, + "loss": 0.7687, + "step": 39641 + }, + { + "epoch": 0.7935740560018016, + "grad_norm": 1.6565163135528564, + "learning_rate": 1.0763322500691025e-06, + "loss": 0.2791, + "step": 39642 + }, + { + "epoch": 0.79359407451893, + "grad_norm": 1.0751858949661255, + "learning_rate": 1.0761313189121036e-06, + "loss": 0.3203, + "step": 39643 + }, + { + "epoch": 0.7936140930360583, + "grad_norm": 1.1771951913833618, + "learning_rate": 1.0759304042502033e-06, + "loss": 0.2753, + "step": 39644 + }, + { + "epoch": 0.7936341115531867, + "grad_norm": 1.020949125289917, + "learning_rate": 1.0757295060842482e-06, + "loss": 0.2901, + "step": 39645 + }, + { + "epoch": 0.793654130070315, + "grad_norm": 1.1723599433898926, + "learning_rate": 1.075528624415082e-06, + "loss": 0.2982, + "step": 39646 + }, + { + "epoch": 0.7936741485874433, + "grad_norm": 1.068054437637329, + "learning_rate": 1.0753277592435469e-06, + "loss": 0.3081, + "step": 39647 + }, + { + "epoch": 0.7936941671045717, + "grad_norm": 1.0970370769500732, + "learning_rate": 1.0751269105704914e-06, + "loss": 0.2736, + "step": 39648 + }, + { + "epoch": 0.7937141856217, + "grad_norm": 1.9001078605651855, + "learning_rate": 1.0749260783967568e-06, + "loss": 0.7648, + "step": 39649 + }, + { + "epoch": 0.7937342041388284, + "grad_norm": 1.0707390308380127, + "learning_rate": 1.0747252627231891e-06, + "loss": 0.2844, + "step": 39650 + }, + { + "epoch": 0.7937542226559567, + "grad_norm": 1.2060993909835815, + "learning_rate": 1.0745244635506313e-06, + "loss": 0.2513, + "step": 39651 + }, + { + "epoch": 0.7937742411730851, + "grad_norm": 1.1020739078521729, + "learning_rate": 1.0743236808799268e-06, + "loss": 0.2783, + "step": 39652 + }, + { + "epoch": 0.7937942596902134, + "grad_norm": 1.1143773794174194, + "learning_rate": 1.074122914711922e-06, + "loss": 0.3447, + "step": 39653 + }, + { + "epoch": 0.7938142782073417, + "grad_norm": 1.1139838695526123, + "learning_rate": 1.0739221650474595e-06, + "loss": 0.2713, + "step": 39654 + }, + { + "epoch": 0.7938342967244701, + "grad_norm": 1.2250436544418335, + "learning_rate": 1.0737214318873823e-06, + "loss": 0.3372, + "step": 39655 + }, + { + "epoch": 0.7938543152415984, + "grad_norm": 1.843509316444397, + "learning_rate": 1.073520715232536e-06, + "loss": 0.7012, + "step": 39656 + }, + { + "epoch": 0.7938743337587268, + "grad_norm": 1.1689350605010986, + "learning_rate": 1.0733200150837625e-06, + "loss": 0.3098, + "step": 39657 + }, + { + "epoch": 0.7938943522758551, + "grad_norm": 1.528515100479126, + "learning_rate": 1.073119331441908e-06, + "loss": 0.3019, + "step": 39658 + }, + { + "epoch": 0.7939143707929835, + "grad_norm": 1.1505002975463867, + "learning_rate": 1.0729186643078148e-06, + "loss": 0.2868, + "step": 39659 + }, + { + "epoch": 0.7939343893101118, + "grad_norm": 1.9267408847808838, + "learning_rate": 1.0727180136823267e-06, + "loss": 0.312, + "step": 39660 + }, + { + "epoch": 0.7939544078272402, + "grad_norm": 1.1112371683120728, + "learning_rate": 1.0725173795662853e-06, + "loss": 0.3133, + "step": 39661 + }, + { + "epoch": 0.7939744263443685, + "grad_norm": 1.8506038188934326, + "learning_rate": 1.0723167619605369e-06, + "loss": 0.7577, + "step": 39662 + }, + { + "epoch": 0.7939944448614968, + "grad_norm": 1.1197915077209473, + "learning_rate": 1.0721161608659237e-06, + "loss": 0.3503, + "step": 39663 + }, + { + "epoch": 0.7940144633786252, + "grad_norm": 1.0433872938156128, + "learning_rate": 1.071915576283289e-06, + "loss": 0.2784, + "step": 39664 + }, + { + "epoch": 0.7940344818957535, + "grad_norm": 1.2299256324768066, + "learning_rate": 1.0717150082134742e-06, + "loss": 0.3077, + "step": 39665 + }, + { + "epoch": 0.794054500412882, + "grad_norm": 1.2268238067626953, + "learning_rate": 1.0715144566573255e-06, + "loss": 0.2948, + "step": 39666 + }, + { + "epoch": 0.7940745189300102, + "grad_norm": 1.0543372631072998, + "learning_rate": 1.0713139216156848e-06, + "loss": 0.2771, + "step": 39667 + }, + { + "epoch": 0.7940945374471386, + "grad_norm": 1.0622843503952026, + "learning_rate": 1.0711134030893938e-06, + "loss": 0.2451, + "step": 39668 + }, + { + "epoch": 0.7941145559642669, + "grad_norm": 1.9329626560211182, + "learning_rate": 1.0709129010792974e-06, + "loss": 0.7728, + "step": 39669 + }, + { + "epoch": 0.7941345744813952, + "grad_norm": 1.2196227312088013, + "learning_rate": 1.0707124155862364e-06, + "loss": 0.2757, + "step": 39670 + }, + { + "epoch": 0.7941545929985236, + "grad_norm": 1.0101978778839111, + "learning_rate": 1.0705119466110564e-06, + "loss": 0.2785, + "step": 39671 + }, + { + "epoch": 0.7941746115156519, + "grad_norm": 1.1935482025146484, + "learning_rate": 1.0703114941545983e-06, + "loss": 0.3399, + "step": 39672 + }, + { + "epoch": 0.7941946300327803, + "grad_norm": 1.1178184747695923, + "learning_rate": 1.0701110582177048e-06, + "loss": 0.3, + "step": 39673 + }, + { + "epoch": 0.7942146485499086, + "grad_norm": 1.2850602865219116, + "learning_rate": 1.0699106388012182e-06, + "loss": 0.2938, + "step": 39674 + }, + { + "epoch": 0.794234667067037, + "grad_norm": 1.7816152572631836, + "learning_rate": 1.0697102359059819e-06, + "loss": 0.7419, + "step": 39675 + }, + { + "epoch": 0.7942546855841653, + "grad_norm": 1.199425220489502, + "learning_rate": 1.0695098495328383e-06, + "loss": 0.2824, + "step": 39676 + }, + { + "epoch": 0.7942747041012937, + "grad_norm": 1.152103304862976, + "learning_rate": 1.0693094796826298e-06, + "loss": 0.3181, + "step": 39677 + }, + { + "epoch": 0.794294722618422, + "grad_norm": 1.1237610578536987, + "learning_rate": 1.0691091263561965e-06, + "loss": 0.3257, + "step": 39678 + }, + { + "epoch": 0.7943147411355503, + "grad_norm": 1.000199794769287, + "learning_rate": 1.0689087895543842e-06, + "loss": 0.2792, + "step": 39679 + }, + { + "epoch": 0.7943347596526787, + "grad_norm": 1.070751428604126, + "learning_rate": 1.068708469278033e-06, + "loss": 0.2412, + "step": 39680 + }, + { + "epoch": 0.794354778169807, + "grad_norm": 1.0593173503875732, + "learning_rate": 1.0685081655279843e-06, + "loss": 0.3187, + "step": 39681 + }, + { + "epoch": 0.7943747966869354, + "grad_norm": 1.049608826637268, + "learning_rate": 1.0683078783050826e-06, + "loss": 0.3202, + "step": 39682 + }, + { + "epoch": 0.7943948152040637, + "grad_norm": 1.1171005964279175, + "learning_rate": 1.0681076076101671e-06, + "loss": 0.2881, + "step": 39683 + }, + { + "epoch": 0.7944148337211921, + "grad_norm": 0.9733232259750366, + "learning_rate": 1.067907353444082e-06, + "loss": 0.2809, + "step": 39684 + }, + { + "epoch": 0.7944348522383204, + "grad_norm": 1.8477246761322021, + "learning_rate": 1.0677071158076684e-06, + "loss": 0.7763, + "step": 39685 + }, + { + "epoch": 0.7944548707554487, + "grad_norm": 1.2952296733856201, + "learning_rate": 1.067506894701768e-06, + "loss": 0.275, + "step": 39686 + }, + { + "epoch": 0.7944748892725771, + "grad_norm": 1.04755437374115, + "learning_rate": 1.067306690127221e-06, + "loss": 0.2939, + "step": 39687 + }, + { + "epoch": 0.7944949077897054, + "grad_norm": 1.0654542446136475, + "learning_rate": 1.0671065020848714e-06, + "loss": 0.2664, + "step": 39688 + }, + { + "epoch": 0.7945149263068338, + "grad_norm": 1.0785717964172363, + "learning_rate": 1.06690633057556e-06, + "loss": 0.3499, + "step": 39689 + }, + { + "epoch": 0.7945349448239621, + "grad_norm": 1.2587021589279175, + "learning_rate": 1.0667061756001279e-06, + "loss": 0.3828, + "step": 39690 + }, + { + "epoch": 0.7945549633410905, + "grad_norm": 1.1733827590942383, + "learning_rate": 1.0665060371594165e-06, + "loss": 0.2759, + "step": 39691 + }, + { + "epoch": 0.7945749818582188, + "grad_norm": 1.1531130075454712, + "learning_rate": 1.0663059152542666e-06, + "loss": 0.2896, + "step": 39692 + }, + { + "epoch": 0.7945950003753472, + "grad_norm": 1.0373926162719727, + "learning_rate": 1.0661058098855203e-06, + "loss": 0.3077, + "step": 39693 + }, + { + "epoch": 0.7946150188924755, + "grad_norm": 1.3038092851638794, + "learning_rate": 1.065905721054018e-06, + "loss": 0.3029, + "step": 39694 + }, + { + "epoch": 0.7946350374096038, + "grad_norm": 1.1961970329284668, + "learning_rate": 1.0657056487606027e-06, + "loss": 0.3101, + "step": 39695 + }, + { + "epoch": 0.7946550559267322, + "grad_norm": 1.2365959882736206, + "learning_rate": 1.0655055930061125e-06, + "loss": 0.3456, + "step": 39696 + }, + { + "epoch": 0.7946750744438605, + "grad_norm": 1.245745301246643, + "learning_rate": 1.0653055537913915e-06, + "loss": 0.2863, + "step": 39697 + }, + { + "epoch": 0.794695092960989, + "grad_norm": 2.144787073135376, + "learning_rate": 1.0651055311172792e-06, + "loss": 0.7247, + "step": 39698 + }, + { + "epoch": 0.7947151114781172, + "grad_norm": 1.023093819618225, + "learning_rate": 1.0649055249846164e-06, + "loss": 0.2746, + "step": 39699 + }, + { + "epoch": 0.7947351299952456, + "grad_norm": 1.0471028089523315, + "learning_rate": 1.0647055353942426e-06, + "loss": 0.3073, + "step": 39700 + }, + { + "epoch": 0.7947551485123739, + "grad_norm": 1.2307405471801758, + "learning_rate": 1.0645055623470012e-06, + "loss": 0.3068, + "step": 39701 + }, + { + "epoch": 0.7947751670295022, + "grad_norm": 1.2009410858154297, + "learning_rate": 1.0643056058437319e-06, + "loss": 0.2712, + "step": 39702 + }, + { + "epoch": 0.7947951855466306, + "grad_norm": 1.1570539474487305, + "learning_rate": 1.0641056658852744e-06, + "loss": 0.2839, + "step": 39703 + }, + { + "epoch": 0.7948152040637589, + "grad_norm": 1.1699224710464478, + "learning_rate": 1.0639057424724703e-06, + "loss": 0.2878, + "step": 39704 + }, + { + "epoch": 0.7948352225808873, + "grad_norm": 1.058603286743164, + "learning_rate": 1.0637058356061575e-06, + "loss": 0.3126, + "step": 39705 + }, + { + "epoch": 0.7948552410980156, + "grad_norm": 1.1410787105560303, + "learning_rate": 1.0635059452871792e-06, + "loss": 0.3213, + "step": 39706 + }, + { + "epoch": 0.794875259615144, + "grad_norm": 1.0826653242111206, + "learning_rate": 1.063306071516374e-06, + "loss": 0.2783, + "step": 39707 + }, + { + "epoch": 0.7948952781322723, + "grad_norm": 1.0912072658538818, + "learning_rate": 1.0631062142945842e-06, + "loss": 0.3441, + "step": 39708 + }, + { + "epoch": 0.7949152966494007, + "grad_norm": 0.9566130042076111, + "learning_rate": 1.0629063736226475e-06, + "loss": 0.2672, + "step": 39709 + }, + { + "epoch": 0.794935315166529, + "grad_norm": 1.1328387260437012, + "learning_rate": 1.062706549501406e-06, + "loss": 0.2949, + "step": 39710 + }, + { + "epoch": 0.7949553336836573, + "grad_norm": 1.0202686786651611, + "learning_rate": 1.0625067419316992e-06, + "loss": 0.2874, + "step": 39711 + }, + { + "epoch": 0.7949753522007857, + "grad_norm": 1.0821504592895508, + "learning_rate": 1.0623069509143663e-06, + "loss": 0.2856, + "step": 39712 + }, + { + "epoch": 0.794995370717914, + "grad_norm": 0.9466056823730469, + "learning_rate": 1.0621071764502477e-06, + "loss": 0.3002, + "step": 39713 + }, + { + "epoch": 0.7950153892350424, + "grad_norm": 1.1523780822753906, + "learning_rate": 1.0619074185401823e-06, + "loss": 0.3236, + "step": 39714 + }, + { + "epoch": 0.7950354077521707, + "grad_norm": 1.1550766229629517, + "learning_rate": 1.0617076771850116e-06, + "loss": 0.2389, + "step": 39715 + }, + { + "epoch": 0.7950554262692991, + "grad_norm": 1.1702414751052856, + "learning_rate": 1.0615079523855742e-06, + "loss": 0.3169, + "step": 39716 + }, + { + "epoch": 0.7950754447864274, + "grad_norm": 1.112273097038269, + "learning_rate": 1.0613082441427098e-06, + "loss": 0.273, + "step": 39717 + }, + { + "epoch": 0.7950954633035557, + "grad_norm": 1.2456060647964478, + "learning_rate": 1.0611085524572568e-06, + "loss": 0.3384, + "step": 39718 + }, + { + "epoch": 0.7951154818206841, + "grad_norm": 1.1404260396957397, + "learning_rate": 1.060908877330057e-06, + "loss": 0.2946, + "step": 39719 + }, + { + "epoch": 0.7951355003378124, + "grad_norm": 1.2591068744659424, + "learning_rate": 1.0607092187619466e-06, + "loss": 0.294, + "step": 39720 + }, + { + "epoch": 0.7951555188549408, + "grad_norm": 1.3225969076156616, + "learning_rate": 1.0605095767537694e-06, + "loss": 0.2782, + "step": 39721 + }, + { + "epoch": 0.7951755373720691, + "grad_norm": 1.048656702041626, + "learning_rate": 1.0603099513063615e-06, + "loss": 0.2899, + "step": 39722 + }, + { + "epoch": 0.7951955558891975, + "grad_norm": 1.0331028699874878, + "learning_rate": 1.060110342420561e-06, + "loss": 0.2949, + "step": 39723 + }, + { + "epoch": 0.7952155744063258, + "grad_norm": 1.136839509010315, + "learning_rate": 1.0599107500972106e-06, + "loss": 0.3376, + "step": 39724 + }, + { + "epoch": 0.7952355929234542, + "grad_norm": 1.16085946559906, + "learning_rate": 1.0597111743371474e-06, + "loss": 0.3258, + "step": 39725 + }, + { + "epoch": 0.7952556114405825, + "grad_norm": 1.215876579284668, + "learning_rate": 1.059511615141211e-06, + "loss": 0.3235, + "step": 39726 + }, + { + "epoch": 0.7952756299577108, + "grad_norm": 1.0975241661071777, + "learning_rate": 1.059312072510238e-06, + "loss": 0.2759, + "step": 39727 + }, + { + "epoch": 0.7952956484748392, + "grad_norm": 1.061024785041809, + "learning_rate": 1.0591125464450702e-06, + "loss": 0.258, + "step": 39728 + }, + { + "epoch": 0.7953156669919675, + "grad_norm": 1.098165512084961, + "learning_rate": 1.058913036946545e-06, + "loss": 0.3428, + "step": 39729 + }, + { + "epoch": 0.795335685509096, + "grad_norm": 1.0601065158843994, + "learning_rate": 1.0587135440155017e-06, + "loss": 0.2861, + "step": 39730 + }, + { + "epoch": 0.7953557040262242, + "grad_norm": 1.0106667280197144, + "learning_rate": 1.0585140676527767e-06, + "loss": 0.2451, + "step": 39731 + }, + { + "epoch": 0.7953757225433526, + "grad_norm": 1.2241004705429077, + "learning_rate": 1.058314607859212e-06, + "loss": 0.3199, + "step": 39732 + }, + { + "epoch": 0.7953957410604809, + "grad_norm": 1.0917819738388062, + "learning_rate": 1.058115164635643e-06, + "loss": 0.3012, + "step": 39733 + }, + { + "epoch": 0.7954157595776092, + "grad_norm": 1.128185749053955, + "learning_rate": 1.0579157379829109e-06, + "loss": 0.2953, + "step": 39734 + }, + { + "epoch": 0.7954357780947376, + "grad_norm": 1.0776671171188354, + "learning_rate": 1.0577163279018526e-06, + "loss": 0.3012, + "step": 39735 + }, + { + "epoch": 0.7954557966118659, + "grad_norm": 1.1007789373397827, + "learning_rate": 1.0575169343933056e-06, + "loss": 0.314, + "step": 39736 + }, + { + "epoch": 0.7954758151289943, + "grad_norm": 1.1343693733215332, + "learning_rate": 1.0573175574581096e-06, + "loss": 0.3155, + "step": 39737 + }, + { + "epoch": 0.7954958336461226, + "grad_norm": 1.0816985368728638, + "learning_rate": 1.0571181970971029e-06, + "loss": 0.3209, + "step": 39738 + }, + { + "epoch": 0.795515852163251, + "grad_norm": 1.0463781356811523, + "learning_rate": 1.0569188533111219e-06, + "loss": 0.3069, + "step": 39739 + }, + { + "epoch": 0.7955358706803793, + "grad_norm": 1.055216670036316, + "learning_rate": 1.056719526101005e-06, + "loss": 0.3222, + "step": 39740 + }, + { + "epoch": 0.7955558891975077, + "grad_norm": 1.0621988773345947, + "learning_rate": 1.0565202154675913e-06, + "loss": 0.2605, + "step": 39741 + }, + { + "epoch": 0.795575907714636, + "grad_norm": 1.0339381694793701, + "learning_rate": 1.056320921411718e-06, + "loss": 0.3307, + "step": 39742 + }, + { + "epoch": 0.7955959262317643, + "grad_norm": 1.1751551628112793, + "learning_rate": 1.0561216439342231e-06, + "loss": 0.2851, + "step": 39743 + }, + { + "epoch": 0.7956159447488927, + "grad_norm": 1.1113780736923218, + "learning_rate": 1.0559223830359427e-06, + "loss": 0.2658, + "step": 39744 + }, + { + "epoch": 0.795635963266021, + "grad_norm": 1.09814453125, + "learning_rate": 1.0557231387177175e-06, + "loss": 0.2907, + "step": 39745 + }, + { + "epoch": 0.7956559817831494, + "grad_norm": 1.0990705490112305, + "learning_rate": 1.0555239109803816e-06, + "loss": 0.2794, + "step": 39746 + }, + { + "epoch": 0.7956760003002777, + "grad_norm": 1.189914345741272, + "learning_rate": 1.0553246998247757e-06, + "loss": 0.3785, + "step": 39747 + }, + { + "epoch": 0.7956960188174061, + "grad_norm": 1.072234869003296, + "learning_rate": 1.0551255052517362e-06, + "loss": 0.3043, + "step": 39748 + }, + { + "epoch": 0.7957160373345344, + "grad_norm": 1.0430619716644287, + "learning_rate": 1.0549263272620986e-06, + "loss": 0.2881, + "step": 39749 + }, + { + "epoch": 0.7957360558516627, + "grad_norm": 1.0461066961288452, + "learning_rate": 1.0547271658567027e-06, + "loss": 0.2406, + "step": 39750 + }, + { + "epoch": 0.7957560743687911, + "grad_norm": 1.1061979532241821, + "learning_rate": 1.054528021036385e-06, + "loss": 0.3087, + "step": 39751 + }, + { + "epoch": 0.7957760928859194, + "grad_norm": 1.0101414918899536, + "learning_rate": 1.0543288928019829e-06, + "loss": 0.2693, + "step": 39752 + }, + { + "epoch": 0.7957961114030478, + "grad_norm": 1.177894115447998, + "learning_rate": 1.054129781154331e-06, + "loss": 0.3287, + "step": 39753 + }, + { + "epoch": 0.7958161299201761, + "grad_norm": 1.4308643341064453, + "learning_rate": 1.05393068609427e-06, + "loss": 0.2739, + "step": 39754 + }, + { + "epoch": 0.7958361484373045, + "grad_norm": 1.1868810653686523, + "learning_rate": 1.053731607622635e-06, + "loss": 0.3442, + "step": 39755 + }, + { + "epoch": 0.7958561669544328, + "grad_norm": 1.0716552734375, + "learning_rate": 1.0535325457402633e-06, + "loss": 0.3093, + "step": 39756 + }, + { + "epoch": 0.7958761854715612, + "grad_norm": 1.038137674331665, + "learning_rate": 1.05333350044799e-06, + "loss": 0.2895, + "step": 39757 + }, + { + "epoch": 0.7958962039886895, + "grad_norm": 1.0861140489578247, + "learning_rate": 1.0531344717466546e-06, + "loss": 0.3054, + "step": 39758 + }, + { + "epoch": 0.7959162225058178, + "grad_norm": 1.9535467624664307, + "learning_rate": 1.0529354596370917e-06, + "loss": 0.8004, + "step": 39759 + }, + { + "epoch": 0.7959362410229462, + "grad_norm": 1.1517640352249146, + "learning_rate": 1.0527364641201394e-06, + "loss": 0.2806, + "step": 39760 + }, + { + "epoch": 0.7959562595400745, + "grad_norm": 1.0589771270751953, + "learning_rate": 1.052537485196634e-06, + "loss": 0.2591, + "step": 39761 + }, + { + "epoch": 0.795976278057203, + "grad_norm": 1.1620928049087524, + "learning_rate": 1.0523385228674099e-06, + "loss": 0.2848, + "step": 39762 + }, + { + "epoch": 0.7959962965743312, + "grad_norm": 2.0113019943237305, + "learning_rate": 1.0521395771333064e-06, + "loss": 0.7774, + "step": 39763 + }, + { + "epoch": 0.7960163150914596, + "grad_norm": 1.339026927947998, + "learning_rate": 1.0519406479951589e-06, + "loss": 0.2587, + "step": 39764 + }, + { + "epoch": 0.7960363336085879, + "grad_norm": 1.1441227197647095, + "learning_rate": 1.0517417354538029e-06, + "loss": 0.2836, + "step": 39765 + }, + { + "epoch": 0.7960563521257162, + "grad_norm": 1.1144767999649048, + "learning_rate": 1.0515428395100753e-06, + "loss": 0.335, + "step": 39766 + }, + { + "epoch": 0.7960763706428446, + "grad_norm": 0.9809184074401855, + "learning_rate": 1.0513439601648102e-06, + "loss": 0.2807, + "step": 39767 + }, + { + "epoch": 0.7960963891599729, + "grad_norm": 1.237278938293457, + "learning_rate": 1.051145097418847e-06, + "loss": 0.2964, + "step": 39768 + }, + { + "epoch": 0.7961164076771013, + "grad_norm": 1.2396312952041626, + "learning_rate": 1.0509462512730196e-06, + "loss": 0.2692, + "step": 39769 + }, + { + "epoch": 0.7961364261942296, + "grad_norm": 1.1779838800430298, + "learning_rate": 1.0507474217281632e-06, + "loss": 0.2758, + "step": 39770 + }, + { + "epoch": 0.796156444711358, + "grad_norm": 1.1157211065292358, + "learning_rate": 1.0505486087851158e-06, + "loss": 0.3122, + "step": 39771 + }, + { + "epoch": 0.7961764632284863, + "grad_norm": 1.8831759691238403, + "learning_rate": 1.0503498124447116e-06, + "loss": 0.7142, + "step": 39772 + }, + { + "epoch": 0.7961964817456147, + "grad_norm": 1.3328425884246826, + "learning_rate": 1.0501510327077875e-06, + "loss": 0.3276, + "step": 39773 + }, + { + "epoch": 0.796216500262743, + "grad_norm": 1.0737311840057373, + "learning_rate": 1.0499522695751784e-06, + "loss": 0.3333, + "step": 39774 + }, + { + "epoch": 0.7962365187798713, + "grad_norm": 0.9975544214248657, + "learning_rate": 1.04975352304772e-06, + "loss": 0.2974, + "step": 39775 + }, + { + "epoch": 0.7962565372969997, + "grad_norm": 1.0006358623504639, + "learning_rate": 1.0495547931262461e-06, + "loss": 0.2491, + "step": 39776 + }, + { + "epoch": 0.796276555814128, + "grad_norm": 1.1373138427734375, + "learning_rate": 1.0493560798115953e-06, + "loss": 0.2749, + "step": 39777 + }, + { + "epoch": 0.7962965743312564, + "grad_norm": 1.2235018014907837, + "learning_rate": 1.049157383104602e-06, + "loss": 0.2673, + "step": 39778 + }, + { + "epoch": 0.7963165928483847, + "grad_norm": 1.2477622032165527, + "learning_rate": 1.0489587030061e-06, + "loss": 0.2982, + "step": 39779 + }, + { + "epoch": 0.7963366113655131, + "grad_norm": 1.1416696310043335, + "learning_rate": 1.0487600395169239e-06, + "loss": 0.295, + "step": 39780 + }, + { + "epoch": 0.7963566298826414, + "grad_norm": 1.2581486701965332, + "learning_rate": 1.0485613926379123e-06, + "loss": 0.2878, + "step": 39781 + }, + { + "epoch": 0.7963766483997697, + "grad_norm": 1.3347123861312866, + "learning_rate": 1.0483627623698978e-06, + "loss": 0.2952, + "step": 39782 + }, + { + "epoch": 0.7963966669168981, + "grad_norm": 1.2059905529022217, + "learning_rate": 1.0481641487137146e-06, + "loss": 0.2498, + "step": 39783 + }, + { + "epoch": 0.7964166854340264, + "grad_norm": 1.1311123371124268, + "learning_rate": 1.0479655516702002e-06, + "loss": 0.2979, + "step": 39784 + }, + { + "epoch": 0.7964367039511548, + "grad_norm": 1.1851600408554077, + "learning_rate": 1.0477669712401867e-06, + "loss": 0.314, + "step": 39785 + }, + { + "epoch": 0.7964567224682831, + "grad_norm": 1.145301342010498, + "learning_rate": 1.0475684074245117e-06, + "loss": 0.317, + "step": 39786 + }, + { + "epoch": 0.7964767409854115, + "grad_norm": 1.363951325416565, + "learning_rate": 1.047369860224009e-06, + "loss": 0.3069, + "step": 39787 + }, + { + "epoch": 0.7964967595025398, + "grad_norm": 1.152092456817627, + "learning_rate": 1.0471713296395126e-06, + "loss": 0.3157, + "step": 39788 + }, + { + "epoch": 0.7965167780196682, + "grad_norm": 1.1669832468032837, + "learning_rate": 1.0469728156718556e-06, + "loss": 0.3419, + "step": 39789 + }, + { + "epoch": 0.7965367965367965, + "grad_norm": 1.1165602207183838, + "learning_rate": 1.0467743183218754e-06, + "loss": 0.2799, + "step": 39790 + }, + { + "epoch": 0.7965568150539248, + "grad_norm": 1.0569322109222412, + "learning_rate": 1.0465758375904057e-06, + "loss": 0.3303, + "step": 39791 + }, + { + "epoch": 0.7965768335710532, + "grad_norm": 1.1564207077026367, + "learning_rate": 1.0463773734782801e-06, + "loss": 0.3284, + "step": 39792 + }, + { + "epoch": 0.7965968520881815, + "grad_norm": 1.148303747177124, + "learning_rate": 1.0461789259863319e-06, + "loss": 0.3114, + "step": 39793 + }, + { + "epoch": 0.79661687060531, + "grad_norm": 1.2336812019348145, + "learning_rate": 1.045980495115398e-06, + "loss": 0.2894, + "step": 39794 + }, + { + "epoch": 0.7966368891224382, + "grad_norm": 1.1994116306304932, + "learning_rate": 1.045782080866311e-06, + "loss": 0.272, + "step": 39795 + }, + { + "epoch": 0.7966569076395666, + "grad_norm": 1.1531991958618164, + "learning_rate": 1.045583683239904e-06, + "loss": 0.2301, + "step": 39796 + }, + { + "epoch": 0.7966769261566949, + "grad_norm": 1.4580605030059814, + "learning_rate": 1.0453853022370136e-06, + "loss": 0.3165, + "step": 39797 + }, + { + "epoch": 0.7966969446738232, + "grad_norm": 1.1569764614105225, + "learning_rate": 1.0451869378584712e-06, + "loss": 0.2894, + "step": 39798 + }, + { + "epoch": 0.7967169631909516, + "grad_norm": 1.2367165088653564, + "learning_rate": 1.0449885901051126e-06, + "loss": 0.2913, + "step": 39799 + }, + { + "epoch": 0.7967369817080799, + "grad_norm": 1.216853141784668, + "learning_rate": 1.0447902589777714e-06, + "loss": 0.3583, + "step": 39800 + }, + { + "epoch": 0.7967570002252083, + "grad_norm": 1.1150543689727783, + "learning_rate": 1.0445919444772805e-06, + "loss": 0.3122, + "step": 39801 + }, + { + "epoch": 0.7967770187423366, + "grad_norm": 1.1511712074279785, + "learning_rate": 1.0443936466044725e-06, + "loss": 0.3149, + "step": 39802 + }, + { + "epoch": 0.796797037259465, + "grad_norm": 1.8916321992874146, + "learning_rate": 1.044195365360184e-06, + "loss": 0.7526, + "step": 39803 + }, + { + "epoch": 0.7968170557765933, + "grad_norm": 1.3836431503295898, + "learning_rate": 1.0439971007452464e-06, + "loss": 0.2799, + "step": 39804 + }, + { + "epoch": 0.7968370742937217, + "grad_norm": 1.1095547676086426, + "learning_rate": 1.0437988527604936e-06, + "loss": 0.2835, + "step": 39805 + }, + { + "epoch": 0.79685709281085, + "grad_norm": 1.177846908569336, + "learning_rate": 1.0436006214067595e-06, + "loss": 0.3313, + "step": 39806 + }, + { + "epoch": 0.7968771113279783, + "grad_norm": 1.1685564517974854, + "learning_rate": 1.0434024066848754e-06, + "loss": 0.2905, + "step": 39807 + }, + { + "epoch": 0.7968971298451067, + "grad_norm": 1.0621660947799683, + "learning_rate": 1.0432042085956772e-06, + "loss": 0.2945, + "step": 39808 + }, + { + "epoch": 0.796917148362235, + "grad_norm": 1.0206235647201538, + "learning_rate": 1.043006027139996e-06, + "loss": 0.3235, + "step": 39809 + }, + { + "epoch": 0.7969371668793634, + "grad_norm": 1.0431280136108398, + "learning_rate": 1.0428078623186677e-06, + "loss": 0.2509, + "step": 39810 + }, + { + "epoch": 0.7969571853964917, + "grad_norm": 1.0503042936325073, + "learning_rate": 1.0426097141325213e-06, + "loss": 0.2842, + "step": 39811 + }, + { + "epoch": 0.7969772039136201, + "grad_norm": 1.120611310005188, + "learning_rate": 1.0424115825823939e-06, + "loss": 0.3062, + "step": 39812 + }, + { + "epoch": 0.7969972224307484, + "grad_norm": 1.1575696468353271, + "learning_rate": 1.0422134676691164e-06, + "loss": 0.2835, + "step": 39813 + }, + { + "epoch": 0.7970172409478767, + "grad_norm": 1.2230448722839355, + "learning_rate": 1.042015369393522e-06, + "loss": 0.2807, + "step": 39814 + }, + { + "epoch": 0.7970372594650051, + "grad_norm": 1.1103626489639282, + "learning_rate": 1.0418172877564413e-06, + "loss": 0.3315, + "step": 39815 + }, + { + "epoch": 0.7970572779821334, + "grad_norm": 1.0541282892227173, + "learning_rate": 1.0416192227587107e-06, + "loss": 0.2702, + "step": 39816 + }, + { + "epoch": 0.7970772964992618, + "grad_norm": 0.9787280559539795, + "learning_rate": 1.041421174401161e-06, + "loss": 0.2956, + "step": 39817 + }, + { + "epoch": 0.7970973150163901, + "grad_norm": 1.0782673358917236, + "learning_rate": 1.0412231426846247e-06, + "loss": 0.3016, + "step": 39818 + }, + { + "epoch": 0.7971173335335185, + "grad_norm": 1.852321982383728, + "learning_rate": 1.0410251276099343e-06, + "loss": 0.7821, + "step": 39819 + }, + { + "epoch": 0.7971373520506468, + "grad_norm": 1.124671459197998, + "learning_rate": 1.0408271291779209e-06, + "loss": 0.307, + "step": 39820 + }, + { + "epoch": 0.7971573705677752, + "grad_norm": 1.2996866703033447, + "learning_rate": 1.0406291473894198e-06, + "loss": 0.3115, + "step": 39821 + }, + { + "epoch": 0.7971773890849035, + "grad_norm": 1.1266182661056519, + "learning_rate": 1.0404311822452601e-06, + "loss": 0.3108, + "step": 39822 + }, + { + "epoch": 0.7971974076020318, + "grad_norm": 1.3890284299850464, + "learning_rate": 1.0402332337462773e-06, + "loss": 0.2387, + "step": 39823 + }, + { + "epoch": 0.7972174261191602, + "grad_norm": 1.048677921295166, + "learning_rate": 1.0400353018933003e-06, + "loss": 0.3248, + "step": 39824 + }, + { + "epoch": 0.7972374446362885, + "grad_norm": 1.0604828596115112, + "learning_rate": 1.0398373866871636e-06, + "loss": 0.3075, + "step": 39825 + }, + { + "epoch": 0.797257463153417, + "grad_norm": 1.1741801500320435, + "learning_rate": 1.039639488128699e-06, + "loss": 0.3165, + "step": 39826 + }, + { + "epoch": 0.7972774816705452, + "grad_norm": 1.2119957208633423, + "learning_rate": 1.0394416062187373e-06, + "loss": 0.3718, + "step": 39827 + }, + { + "epoch": 0.7972975001876736, + "grad_norm": 1.3462815284729004, + "learning_rate": 1.0392437409581107e-06, + "loss": 0.2726, + "step": 39828 + }, + { + "epoch": 0.7973175187048019, + "grad_norm": 1.0559927225112915, + "learning_rate": 1.0390458923476498e-06, + "loss": 0.3161, + "step": 39829 + }, + { + "epoch": 0.7973375372219302, + "grad_norm": 1.1021859645843506, + "learning_rate": 1.0388480603881894e-06, + "loss": 0.2867, + "step": 39830 + }, + { + "epoch": 0.7973575557390586, + "grad_norm": 0.948696494102478, + "learning_rate": 1.0386502450805585e-06, + "loss": 0.2666, + "step": 39831 + }, + { + "epoch": 0.7973775742561869, + "grad_norm": 1.0193673372268677, + "learning_rate": 1.0384524464255902e-06, + "loss": 0.2887, + "step": 39832 + }, + { + "epoch": 0.7973975927733153, + "grad_norm": 1.3136075735092163, + "learning_rate": 1.0382546644241138e-06, + "loss": 0.2933, + "step": 39833 + }, + { + "epoch": 0.7974176112904436, + "grad_norm": 1.9623348712921143, + "learning_rate": 1.038056899076963e-06, + "loss": 0.7293, + "step": 39834 + }, + { + "epoch": 0.797437629807572, + "grad_norm": 1.1371970176696777, + "learning_rate": 1.0378591503849677e-06, + "loss": 0.3299, + "step": 39835 + }, + { + "epoch": 0.7974576483247003, + "grad_norm": 1.2147988080978394, + "learning_rate": 1.037661418348961e-06, + "loss": 0.3039, + "step": 39836 + }, + { + "epoch": 0.7974776668418286, + "grad_norm": 1.0537630319595337, + "learning_rate": 1.0374637029697725e-06, + "loss": 0.2689, + "step": 39837 + }, + { + "epoch": 0.797497685358957, + "grad_norm": 1.0474082231521606, + "learning_rate": 1.0372660042482329e-06, + "loss": 0.2706, + "step": 39838 + }, + { + "epoch": 0.7975177038760853, + "grad_norm": 1.1427335739135742, + "learning_rate": 1.0370683221851757e-06, + "loss": 0.3037, + "step": 39839 + }, + { + "epoch": 0.7975377223932137, + "grad_norm": 1.0661426782608032, + "learning_rate": 1.03687065678143e-06, + "loss": 0.3038, + "step": 39840 + }, + { + "epoch": 0.797557740910342, + "grad_norm": 1.0459773540496826, + "learning_rate": 1.036673008037828e-06, + "loss": 0.2939, + "step": 39841 + }, + { + "epoch": 0.7975777594274704, + "grad_norm": 1.276397705078125, + "learning_rate": 1.0364753759551977e-06, + "loss": 0.295, + "step": 39842 + }, + { + "epoch": 0.7975977779445987, + "grad_norm": 1.950882911682129, + "learning_rate": 1.0362777605343737e-06, + "loss": 0.6863, + "step": 39843 + }, + { + "epoch": 0.7976177964617271, + "grad_norm": 1.1518735885620117, + "learning_rate": 1.0360801617761846e-06, + "loss": 0.3086, + "step": 39844 + }, + { + "epoch": 0.7976378149788554, + "grad_norm": 1.0194566249847412, + "learning_rate": 1.035882579681462e-06, + "loss": 0.2801, + "step": 39845 + }, + { + "epoch": 0.7976578334959837, + "grad_norm": 1.2032440900802612, + "learning_rate": 1.0356850142510338e-06, + "loss": 0.3237, + "step": 39846 + }, + { + "epoch": 0.7976778520131121, + "grad_norm": 1.1842609643936157, + "learning_rate": 1.0354874654857345e-06, + "loss": 0.2677, + "step": 39847 + }, + { + "epoch": 0.7976978705302404, + "grad_norm": 1.0283046960830688, + "learning_rate": 1.0352899333863913e-06, + "loss": 0.2641, + "step": 39848 + }, + { + "epoch": 0.7977178890473688, + "grad_norm": 1.0731909275054932, + "learning_rate": 1.0350924179538374e-06, + "loss": 0.2874, + "step": 39849 + }, + { + "epoch": 0.7977379075644971, + "grad_norm": 1.1401649713516235, + "learning_rate": 1.0348949191889018e-06, + "loss": 0.2513, + "step": 39850 + }, + { + "epoch": 0.7977579260816255, + "grad_norm": 1.1264573335647583, + "learning_rate": 1.0346974370924133e-06, + "loss": 0.2732, + "step": 39851 + }, + { + "epoch": 0.7977779445987538, + "grad_norm": 1.015160083770752, + "learning_rate": 1.0344999716652048e-06, + "loss": 0.2956, + "step": 39852 + }, + { + "epoch": 0.7977979631158821, + "grad_norm": 1.1460485458374023, + "learning_rate": 1.034302522908105e-06, + "loss": 0.3064, + "step": 39853 + }, + { + "epoch": 0.7978179816330105, + "grad_norm": 1.063007116317749, + "learning_rate": 1.034105090821944e-06, + "loss": 0.2958, + "step": 39854 + }, + { + "epoch": 0.7978380001501388, + "grad_norm": 2.025996685028076, + "learning_rate": 1.03390767540755e-06, + "loss": 0.6817, + "step": 39855 + }, + { + "epoch": 0.7978580186672672, + "grad_norm": 1.1097497940063477, + "learning_rate": 1.0337102766657564e-06, + "loss": 0.2854, + "step": 39856 + }, + { + "epoch": 0.7978780371843955, + "grad_norm": 1.1585354804992676, + "learning_rate": 1.033512894597391e-06, + "loss": 0.2988, + "step": 39857 + }, + { + "epoch": 0.797898055701524, + "grad_norm": 1.1281874179840088, + "learning_rate": 1.0333155292032843e-06, + "loss": 0.3387, + "step": 39858 + }, + { + "epoch": 0.7979180742186522, + "grad_norm": 1.098145604133606, + "learning_rate": 1.033118180484265e-06, + "loss": 0.3421, + "step": 39859 + }, + { + "epoch": 0.7979380927357806, + "grad_norm": 1.0849438905715942, + "learning_rate": 1.0329208484411618e-06, + "loss": 0.2963, + "step": 39860 + }, + { + "epoch": 0.7979581112529089, + "grad_norm": 1.1699705123901367, + "learning_rate": 1.032723533074806e-06, + "loss": 0.3497, + "step": 39861 + }, + { + "epoch": 0.7979781297700372, + "grad_norm": 1.1868128776550293, + "learning_rate": 1.0325262343860276e-06, + "loss": 0.2989, + "step": 39862 + }, + { + "epoch": 0.7979981482871656, + "grad_norm": 1.1851458549499512, + "learning_rate": 1.0323289523756551e-06, + "loss": 0.3058, + "step": 39863 + }, + { + "epoch": 0.7980181668042939, + "grad_norm": 1.0189344882965088, + "learning_rate": 1.032131687044517e-06, + "loss": 0.2459, + "step": 39864 + }, + { + "epoch": 0.7980381853214223, + "grad_norm": 1.170372486114502, + "learning_rate": 1.0319344383934443e-06, + "loss": 0.3041, + "step": 39865 + }, + { + "epoch": 0.7980582038385506, + "grad_norm": 1.8406562805175781, + "learning_rate": 1.0317372064232656e-06, + "loss": 0.7203, + "step": 39866 + }, + { + "epoch": 0.798078222355679, + "grad_norm": 1.105106234550476, + "learning_rate": 1.0315399911348094e-06, + "loss": 0.3204, + "step": 39867 + }, + { + "epoch": 0.7980982408728073, + "grad_norm": 1.84834885597229, + "learning_rate": 1.031342792528905e-06, + "loss": 0.7632, + "step": 39868 + }, + { + "epoch": 0.7981182593899356, + "grad_norm": 1.054532766342163, + "learning_rate": 1.0311456106063805e-06, + "loss": 0.3007, + "step": 39869 + }, + { + "epoch": 0.798138277907064, + "grad_norm": 1.8608452081680298, + "learning_rate": 1.0309484453680668e-06, + "loss": 0.8425, + "step": 39870 + }, + { + "epoch": 0.7981582964241923, + "grad_norm": 1.1272519826889038, + "learning_rate": 1.0307512968147916e-06, + "loss": 0.3158, + "step": 39871 + }, + { + "epoch": 0.7981783149413207, + "grad_norm": 0.9879357218742371, + "learning_rate": 1.0305541649473838e-06, + "loss": 0.2945, + "step": 39872 + }, + { + "epoch": 0.798198333458449, + "grad_norm": 1.8615416288375854, + "learning_rate": 1.030357049766671e-06, + "loss": 0.6912, + "step": 39873 + }, + { + "epoch": 0.7982183519755774, + "grad_norm": 1.2103378772735596, + "learning_rate": 1.0301599512734828e-06, + "loss": 0.3182, + "step": 39874 + }, + { + "epoch": 0.7982383704927057, + "grad_norm": 1.2986327409744263, + "learning_rate": 1.0299628694686487e-06, + "loss": 0.3266, + "step": 39875 + }, + { + "epoch": 0.7982583890098341, + "grad_norm": 1.1163941621780396, + "learning_rate": 1.0297658043529969e-06, + "loss": 0.2828, + "step": 39876 + }, + { + "epoch": 0.7982784075269624, + "grad_norm": 2.0174200534820557, + "learning_rate": 1.0295687559273538e-06, + "loss": 0.7147, + "step": 39877 + }, + { + "epoch": 0.7982984260440907, + "grad_norm": 1.1497390270233154, + "learning_rate": 1.0293717241925505e-06, + "loss": 0.332, + "step": 39878 + }, + { + "epoch": 0.7983184445612191, + "grad_norm": 1.0992387533187866, + "learning_rate": 1.0291747091494137e-06, + "loss": 0.2914, + "step": 39879 + }, + { + "epoch": 0.7983384630783474, + "grad_norm": 1.7489224672317505, + "learning_rate": 1.028977710798772e-06, + "loss": 0.7295, + "step": 39880 + }, + { + "epoch": 0.7983584815954758, + "grad_norm": 1.2529687881469727, + "learning_rate": 1.0287807291414537e-06, + "loss": 0.3489, + "step": 39881 + }, + { + "epoch": 0.7983785001126041, + "grad_norm": 1.1464855670928955, + "learning_rate": 1.0285837641782854e-06, + "loss": 0.2905, + "step": 39882 + }, + { + "epoch": 0.7983985186297325, + "grad_norm": 1.025822639465332, + "learning_rate": 1.0283868159100974e-06, + "loss": 0.2957, + "step": 39883 + }, + { + "epoch": 0.7984185371468608, + "grad_norm": 1.8741432428359985, + "learning_rate": 1.0281898843377164e-06, + "loss": 0.2774, + "step": 39884 + }, + { + "epoch": 0.7984385556639891, + "grad_norm": 1.0364270210266113, + "learning_rate": 1.0279929694619707e-06, + "loss": 0.2834, + "step": 39885 + }, + { + "epoch": 0.7984585741811175, + "grad_norm": 1.327661395072937, + "learning_rate": 1.027796071283686e-06, + "loss": 0.3208, + "step": 39886 + }, + { + "epoch": 0.7984785926982458, + "grad_norm": 1.068743348121643, + "learning_rate": 1.0275991898036918e-06, + "loss": 0.3005, + "step": 39887 + }, + { + "epoch": 0.7984986112153742, + "grad_norm": 1.1751519441604614, + "learning_rate": 1.0274023250228171e-06, + "loss": 0.2928, + "step": 39888 + }, + { + "epoch": 0.7985186297325025, + "grad_norm": 1.100555181503296, + "learning_rate": 1.0272054769418888e-06, + "loss": 0.2851, + "step": 39889 + }, + { + "epoch": 0.798538648249631, + "grad_norm": 1.0763262510299683, + "learning_rate": 1.0270086455617328e-06, + "loss": 0.2779, + "step": 39890 + }, + { + "epoch": 0.7985586667667592, + "grad_norm": 1.8444503545761108, + "learning_rate": 1.0268118308831765e-06, + "loss": 0.7745, + "step": 39891 + }, + { + "epoch": 0.7985786852838876, + "grad_norm": 1.1388986110687256, + "learning_rate": 1.0266150329070496e-06, + "loss": 0.3009, + "step": 39892 + }, + { + "epoch": 0.7985987038010159, + "grad_norm": 1.1336265802383423, + "learning_rate": 1.026418251634178e-06, + "loss": 0.2851, + "step": 39893 + }, + { + "epoch": 0.7986187223181442, + "grad_norm": 1.0991512537002563, + "learning_rate": 1.0262214870653892e-06, + "loss": 0.2818, + "step": 39894 + }, + { + "epoch": 0.7986387408352726, + "grad_norm": 1.1308224201202393, + "learning_rate": 1.0260247392015081e-06, + "loss": 0.3023, + "step": 39895 + }, + { + "epoch": 0.7986587593524009, + "grad_norm": 1.118638038635254, + "learning_rate": 1.0258280080433657e-06, + "loss": 0.2635, + "step": 39896 + }, + { + "epoch": 0.7986787778695293, + "grad_norm": 1.9107978343963623, + "learning_rate": 1.0256312935917873e-06, + "loss": 0.7533, + "step": 39897 + }, + { + "epoch": 0.7986987963866576, + "grad_norm": 1.055498480796814, + "learning_rate": 1.0254345958475992e-06, + "loss": 0.2604, + "step": 39898 + }, + { + "epoch": 0.798718814903786, + "grad_norm": 1.1109235286712646, + "learning_rate": 1.0252379148116276e-06, + "loss": 0.2792, + "step": 39899 + }, + { + "epoch": 0.7987388334209143, + "grad_norm": 1.238033413887024, + "learning_rate": 1.0250412504847e-06, + "loss": 0.3213, + "step": 39900 + }, + { + "epoch": 0.7987588519380426, + "grad_norm": 1.1370853185653687, + "learning_rate": 1.024844602867645e-06, + "loss": 0.2729, + "step": 39901 + }, + { + "epoch": 0.798778870455171, + "grad_norm": 1.1359487771987915, + "learning_rate": 1.0246479719612883e-06, + "loss": 0.2857, + "step": 39902 + }, + { + "epoch": 0.7987988889722993, + "grad_norm": 1.198131799697876, + "learning_rate": 1.0244513577664555e-06, + "loss": 0.2719, + "step": 39903 + }, + { + "epoch": 0.7988189074894277, + "grad_norm": 1.1632901430130005, + "learning_rate": 1.0242547602839725e-06, + "loss": 0.283, + "step": 39904 + }, + { + "epoch": 0.798838926006556, + "grad_norm": 1.1564390659332275, + "learning_rate": 1.0240581795146687e-06, + "loss": 0.2996, + "step": 39905 + }, + { + "epoch": 0.7988589445236844, + "grad_norm": 1.1684359312057495, + "learning_rate": 1.0238616154593682e-06, + "loss": 0.3202, + "step": 39906 + }, + { + "epoch": 0.7988789630408127, + "grad_norm": 1.1464903354644775, + "learning_rate": 1.0236650681188982e-06, + "loss": 0.3379, + "step": 39907 + }, + { + "epoch": 0.7988989815579411, + "grad_norm": 1.0087454319000244, + "learning_rate": 1.0234685374940823e-06, + "loss": 0.2593, + "step": 39908 + }, + { + "epoch": 0.7989190000750694, + "grad_norm": 1.1948575973510742, + "learning_rate": 1.0232720235857513e-06, + "loss": 0.3162, + "step": 39909 + }, + { + "epoch": 0.7989390185921977, + "grad_norm": 1.4134103059768677, + "learning_rate": 1.0230755263947283e-06, + "loss": 0.3187, + "step": 39910 + }, + { + "epoch": 0.7989590371093261, + "grad_norm": 1.1365232467651367, + "learning_rate": 1.0228790459218402e-06, + "loss": 0.354, + "step": 39911 + }, + { + "epoch": 0.7989790556264544, + "grad_norm": 1.0550435781478882, + "learning_rate": 1.022682582167911e-06, + "loss": 0.2684, + "step": 39912 + }, + { + "epoch": 0.7989990741435828, + "grad_norm": 1.2230181694030762, + "learning_rate": 1.0224861351337685e-06, + "loss": 0.2735, + "step": 39913 + }, + { + "epoch": 0.7990190926607111, + "grad_norm": 1.0642213821411133, + "learning_rate": 1.0222897048202397e-06, + "loss": 0.2722, + "step": 39914 + }, + { + "epoch": 0.7990391111778395, + "grad_norm": 1.1276271343231201, + "learning_rate": 1.022093291228149e-06, + "loss": 0.277, + "step": 39915 + }, + { + "epoch": 0.7990591296949678, + "grad_norm": 1.111162543296814, + "learning_rate": 1.021896894358322e-06, + "loss": 0.2816, + "step": 39916 + }, + { + "epoch": 0.7990791482120961, + "grad_norm": 0.984825611114502, + "learning_rate": 1.0217005142115832e-06, + "loss": 0.305, + "step": 39917 + }, + { + "epoch": 0.7990991667292245, + "grad_norm": 1.1189088821411133, + "learning_rate": 1.0215041507887602e-06, + "loss": 0.3005, + "step": 39918 + }, + { + "epoch": 0.7991191852463528, + "grad_norm": 1.2235167026519775, + "learning_rate": 1.0213078040906776e-06, + "loss": 0.3058, + "step": 39919 + }, + { + "epoch": 0.7991392037634812, + "grad_norm": 1.9205390214920044, + "learning_rate": 1.021111474118161e-06, + "loss": 0.7104, + "step": 39920 + }, + { + "epoch": 0.7991592222806095, + "grad_norm": 1.0563373565673828, + "learning_rate": 1.0209151608720359e-06, + "loss": 0.2836, + "step": 39921 + }, + { + "epoch": 0.7991792407977379, + "grad_norm": 0.9912059903144836, + "learning_rate": 1.0207188643531251e-06, + "loss": 0.2923, + "step": 39922 + }, + { + "epoch": 0.7991992593148662, + "grad_norm": 1.0881458520889282, + "learning_rate": 1.020522584562258e-06, + "loss": 0.3187, + "step": 39923 + }, + { + "epoch": 0.7992192778319946, + "grad_norm": 1.1702076196670532, + "learning_rate": 1.0203263215002569e-06, + "loss": 0.3506, + "step": 39924 + }, + { + "epoch": 0.7992392963491229, + "grad_norm": 1.163665771484375, + "learning_rate": 1.020130075167946e-06, + "loss": 0.3377, + "step": 39925 + }, + { + "epoch": 0.7992593148662512, + "grad_norm": 1.035310983657837, + "learning_rate": 1.0199338455661522e-06, + "loss": 0.2628, + "step": 39926 + }, + { + "epoch": 0.7992793333833796, + "grad_norm": 1.018328070640564, + "learning_rate": 1.0197376326957015e-06, + "loss": 0.3067, + "step": 39927 + }, + { + "epoch": 0.7992993519005079, + "grad_norm": 1.1510875225067139, + "learning_rate": 1.0195414365574168e-06, + "loss": 0.258, + "step": 39928 + }, + { + "epoch": 0.7993193704176363, + "grad_norm": 1.130305528640747, + "learning_rate": 1.0193452571521234e-06, + "loss": 0.3082, + "step": 39929 + }, + { + "epoch": 0.7993393889347646, + "grad_norm": 1.049579381942749, + "learning_rate": 1.0191490944806447e-06, + "loss": 0.2913, + "step": 39930 + }, + { + "epoch": 0.799359407451893, + "grad_norm": 1.047217845916748, + "learning_rate": 1.0189529485438078e-06, + "loss": 0.3035, + "step": 39931 + }, + { + "epoch": 0.7993794259690213, + "grad_norm": 1.2151612043380737, + "learning_rate": 1.0187568193424358e-06, + "loss": 0.3104, + "step": 39932 + }, + { + "epoch": 0.7993994444861496, + "grad_norm": 2.0781044960021973, + "learning_rate": 1.0185607068773534e-06, + "loss": 0.8121, + "step": 39933 + }, + { + "epoch": 0.799419463003278, + "grad_norm": 1.0525583028793335, + "learning_rate": 1.0183646111493856e-06, + "loss": 0.3085, + "step": 39934 + }, + { + "epoch": 0.7994394815204063, + "grad_norm": 1.1403077840805054, + "learning_rate": 1.0181685321593543e-06, + "loss": 0.3239, + "step": 39935 + }, + { + "epoch": 0.7994595000375347, + "grad_norm": 1.1438244581222534, + "learning_rate": 1.0179724699080867e-06, + "loss": 0.3487, + "step": 39936 + }, + { + "epoch": 0.799479518554663, + "grad_norm": 1.204958200454712, + "learning_rate": 1.0177764243964062e-06, + "loss": 0.2957, + "step": 39937 + }, + { + "epoch": 0.7994995370717914, + "grad_norm": 1.094529628753662, + "learning_rate": 1.017580395625135e-06, + "loss": 0.3303, + "step": 39938 + }, + { + "epoch": 0.7995195555889197, + "grad_norm": 1.2165892124176025, + "learning_rate": 1.017384383595099e-06, + "loss": 0.3979, + "step": 39939 + }, + { + "epoch": 0.7995395741060481, + "grad_norm": 1.000868558883667, + "learning_rate": 1.0171883883071231e-06, + "loss": 0.2541, + "step": 39940 + }, + { + "epoch": 0.7995595926231764, + "grad_norm": 1.1088701486587524, + "learning_rate": 1.0169924097620305e-06, + "loss": 0.2917, + "step": 39941 + }, + { + "epoch": 0.7995796111403047, + "grad_norm": 1.0571131706237793, + "learning_rate": 1.0167964479606445e-06, + "loss": 0.3059, + "step": 39942 + }, + { + "epoch": 0.7995996296574331, + "grad_norm": 1.1380252838134766, + "learning_rate": 1.0166005029037889e-06, + "loss": 0.2923, + "step": 39943 + }, + { + "epoch": 0.7996196481745614, + "grad_norm": 1.2412302494049072, + "learning_rate": 1.0164045745922863e-06, + "loss": 0.3109, + "step": 39944 + }, + { + "epoch": 0.7996396666916898, + "grad_norm": 1.0931869745254517, + "learning_rate": 1.016208663026963e-06, + "loss": 0.3244, + "step": 39945 + }, + { + "epoch": 0.7996596852088181, + "grad_norm": 1.0954477787017822, + "learning_rate": 1.0160127682086412e-06, + "loss": 0.2829, + "step": 39946 + }, + { + "epoch": 0.7996797037259465, + "grad_norm": 1.0953642129898071, + "learning_rate": 1.0158168901381442e-06, + "loss": 0.2999, + "step": 39947 + }, + { + "epoch": 0.7996997222430748, + "grad_norm": 1.8894374370574951, + "learning_rate": 1.0156210288162942e-06, + "loss": 0.7607, + "step": 39948 + }, + { + "epoch": 0.7997197407602031, + "grad_norm": 1.0883166790008545, + "learning_rate": 1.0154251842439173e-06, + "loss": 0.3096, + "step": 39949 + }, + { + "epoch": 0.7997397592773315, + "grad_norm": 1.2051059007644653, + "learning_rate": 1.0152293564218358e-06, + "loss": 0.3069, + "step": 39950 + }, + { + "epoch": 0.7997597777944598, + "grad_norm": 1.0988225936889648, + "learning_rate": 1.0150335453508707e-06, + "loss": 0.2639, + "step": 39951 + }, + { + "epoch": 0.7997797963115882, + "grad_norm": 1.0135451555252075, + "learning_rate": 1.0148377510318485e-06, + "loss": 0.2444, + "step": 39952 + }, + { + "epoch": 0.7997998148287165, + "grad_norm": 1.8185384273529053, + "learning_rate": 1.0146419734655893e-06, + "loss": 0.81, + "step": 39953 + }, + { + "epoch": 0.7998198333458449, + "grad_norm": 1.1335557699203491, + "learning_rate": 1.0144462126529192e-06, + "loss": 0.3119, + "step": 39954 + }, + { + "epoch": 0.7998398518629732, + "grad_norm": 1.2483742237091064, + "learning_rate": 1.0142504685946598e-06, + "loss": 0.3014, + "step": 39955 + }, + { + "epoch": 0.7998598703801016, + "grad_norm": 1.259408712387085, + "learning_rate": 1.0140547412916334e-06, + "loss": 0.303, + "step": 39956 + }, + { + "epoch": 0.7998798888972299, + "grad_norm": 1.1417750120162964, + "learning_rate": 1.0138590307446616e-06, + "loss": 0.3183, + "step": 39957 + }, + { + "epoch": 0.7998999074143582, + "grad_norm": 1.2120985984802246, + "learning_rate": 1.01366333695457e-06, + "loss": 0.2582, + "step": 39958 + }, + { + "epoch": 0.7999199259314866, + "grad_norm": 1.1187282800674438, + "learning_rate": 1.0134676599221798e-06, + "loss": 0.3075, + "step": 39959 + }, + { + "epoch": 0.7999399444486149, + "grad_norm": 1.204725742340088, + "learning_rate": 1.0132719996483136e-06, + "loss": 0.2883, + "step": 39960 + }, + { + "epoch": 0.7999599629657433, + "grad_norm": 1.030792474746704, + "learning_rate": 1.0130763561337932e-06, + "loss": 0.2688, + "step": 39961 + }, + { + "epoch": 0.7999799814828716, + "grad_norm": 1.0726995468139648, + "learning_rate": 1.0128807293794423e-06, + "loss": 0.2711, + "step": 39962 + }, + { + "epoch": 0.8, + "grad_norm": 1.0949245691299438, + "learning_rate": 1.012685119386083e-06, + "loss": 0.2696, + "step": 39963 + }, + { + "epoch": 0.8000200185171283, + "grad_norm": 1.9958282709121704, + "learning_rate": 1.0124895261545365e-06, + "loss": 0.7527, + "step": 39964 + }, + { + "epoch": 0.8000400370342566, + "grad_norm": 1.0121997594833374, + "learning_rate": 1.012293949685627e-06, + "loss": 0.2833, + "step": 39965 + }, + { + "epoch": 0.800060055551385, + "grad_norm": 1.1701165437698364, + "learning_rate": 1.0120983899801745e-06, + "loss": 0.3329, + "step": 39966 + }, + { + "epoch": 0.8000800740685133, + "grad_norm": 1.1341274976730347, + "learning_rate": 1.011902847039003e-06, + "loss": 0.2888, + "step": 39967 + }, + { + "epoch": 0.8001000925856417, + "grad_norm": 1.2421202659606934, + "learning_rate": 1.0117073208629342e-06, + "loss": 0.3224, + "step": 39968 + }, + { + "epoch": 0.80012011110277, + "grad_norm": 1.1976336240768433, + "learning_rate": 1.0115118114527888e-06, + "loss": 0.3268, + "step": 39969 + }, + { + "epoch": 0.8001401296198984, + "grad_norm": 1.1155496835708618, + "learning_rate": 1.0113163188093888e-06, + "loss": 0.315, + "step": 39970 + }, + { + "epoch": 0.8001601481370267, + "grad_norm": 1.2429826259613037, + "learning_rate": 1.0111208429335573e-06, + "loss": 0.2912, + "step": 39971 + }, + { + "epoch": 0.8001801666541551, + "grad_norm": 1.0994819402694702, + "learning_rate": 1.0109253838261157e-06, + "loss": 0.3242, + "step": 39972 + }, + { + "epoch": 0.8002001851712834, + "grad_norm": 1.0646389722824097, + "learning_rate": 1.0107299414878857e-06, + "loss": 0.3214, + "step": 39973 + }, + { + "epoch": 0.8002202036884117, + "grad_norm": 1.0783884525299072, + "learning_rate": 1.0105345159196877e-06, + "loss": 0.301, + "step": 39974 + }, + { + "epoch": 0.8002402222055401, + "grad_norm": 1.1087162494659424, + "learning_rate": 1.0103391071223435e-06, + "loss": 0.348, + "step": 39975 + }, + { + "epoch": 0.8002602407226684, + "grad_norm": 1.009706735610962, + "learning_rate": 1.0101437150966758e-06, + "loss": 0.2593, + "step": 39976 + }, + { + "epoch": 0.8002802592397968, + "grad_norm": 1.2016557455062866, + "learning_rate": 1.0099483398435044e-06, + "loss": 0.3331, + "step": 39977 + }, + { + "epoch": 0.8003002777569251, + "grad_norm": 1.0871660709381104, + "learning_rate": 1.0097529813636531e-06, + "loss": 0.2732, + "step": 39978 + }, + { + "epoch": 0.8003202962740535, + "grad_norm": 1.3872294425964355, + "learning_rate": 1.0095576396579398e-06, + "loss": 0.2958, + "step": 39979 + }, + { + "epoch": 0.8003403147911818, + "grad_norm": 1.0340416431427002, + "learning_rate": 1.0093623147271887e-06, + "loss": 0.3251, + "step": 39980 + }, + { + "epoch": 0.8003603333083101, + "grad_norm": 1.1766741275787354, + "learning_rate": 1.0091670065722199e-06, + "loss": 0.3433, + "step": 39981 + }, + { + "epoch": 0.8003803518254385, + "grad_norm": 1.2226463556289673, + "learning_rate": 1.0089717151938539e-06, + "loss": 0.3289, + "step": 39982 + }, + { + "epoch": 0.8004003703425668, + "grad_norm": 1.1742045879364014, + "learning_rate": 1.0087764405929117e-06, + "loss": 0.2926, + "step": 39983 + }, + { + "epoch": 0.8004203888596952, + "grad_norm": 1.029605746269226, + "learning_rate": 1.0085811827702135e-06, + "loss": 0.2964, + "step": 39984 + }, + { + "epoch": 0.8004404073768235, + "grad_norm": 1.1743645668029785, + "learning_rate": 1.0083859417265824e-06, + "loss": 0.3177, + "step": 39985 + }, + { + "epoch": 0.8004604258939519, + "grad_norm": 1.0791915655136108, + "learning_rate": 1.0081907174628381e-06, + "loss": 0.3284, + "step": 39986 + }, + { + "epoch": 0.8004804444110802, + "grad_norm": 1.1793211698532104, + "learning_rate": 1.0079955099798005e-06, + "loss": 0.3188, + "step": 39987 + }, + { + "epoch": 0.8005004629282086, + "grad_norm": 1.089341640472412, + "learning_rate": 1.0078003192782892e-06, + "loss": 0.3076, + "step": 39988 + }, + { + "epoch": 0.8005204814453369, + "grad_norm": 1.0236896276474, + "learning_rate": 1.0076051453591278e-06, + "loss": 0.2868, + "step": 39989 + }, + { + "epoch": 0.8005404999624652, + "grad_norm": 1.9397403001785278, + "learning_rate": 1.0074099882231342e-06, + "loss": 0.7739, + "step": 39990 + }, + { + "epoch": 0.8005605184795936, + "grad_norm": 1.0626171827316284, + "learning_rate": 1.0072148478711309e-06, + "loss": 0.2419, + "step": 39991 + }, + { + "epoch": 0.8005805369967219, + "grad_norm": 1.129252314567566, + "learning_rate": 1.0070197243039354e-06, + "loss": 0.3323, + "step": 39992 + }, + { + "epoch": 0.8006005555138503, + "grad_norm": 1.0152828693389893, + "learning_rate": 1.0068246175223712e-06, + "loss": 0.3137, + "step": 39993 + }, + { + "epoch": 0.8006205740309786, + "grad_norm": 1.8244184255599976, + "learning_rate": 1.006629527527257e-06, + "loss": 0.764, + "step": 39994 + }, + { + "epoch": 0.800640592548107, + "grad_norm": 1.2618168592453003, + "learning_rate": 1.0064344543194132e-06, + "loss": 0.2973, + "step": 39995 + }, + { + "epoch": 0.8006606110652353, + "grad_norm": 1.2200602293014526, + "learning_rate": 1.0062393978996594e-06, + "loss": 0.2752, + "step": 39996 + }, + { + "epoch": 0.8006806295823636, + "grad_norm": 1.1379090547561646, + "learning_rate": 1.006044358268814e-06, + "loss": 0.2888, + "step": 39997 + }, + { + "epoch": 0.800700648099492, + "grad_norm": 1.2839676141738892, + "learning_rate": 1.0058493354277005e-06, + "loss": 0.2513, + "step": 39998 + }, + { + "epoch": 0.8007206666166203, + "grad_norm": 1.0868111848831177, + "learning_rate": 1.0056543293771365e-06, + "loss": 0.3035, + "step": 39999 + }, + { + "epoch": 0.8007406851337487, + "grad_norm": 1.8489997386932373, + "learning_rate": 1.005459340117942e-06, + "loss": 0.7464, + "step": 40000 + }, + { + "epoch": 0.800760703650877, + "grad_norm": 1.1117827892303467, + "learning_rate": 1.0052643676509355e-06, + "loss": 0.315, + "step": 40001 + }, + { + "epoch": 0.8007807221680054, + "grad_norm": 1.0759509801864624, + "learning_rate": 1.0050694119769394e-06, + "loss": 0.2789, + "step": 40002 + }, + { + "epoch": 0.8008007406851337, + "grad_norm": 1.0731322765350342, + "learning_rate": 1.0048744730967703e-06, + "loss": 0.2817, + "step": 40003 + }, + { + "epoch": 0.8008207592022621, + "grad_norm": 1.4125492572784424, + "learning_rate": 1.0046795510112506e-06, + "loss": 0.2883, + "step": 40004 + }, + { + "epoch": 0.8008407777193904, + "grad_norm": 1.218282699584961, + "learning_rate": 1.0044846457211981e-06, + "loss": 0.2849, + "step": 40005 + }, + { + "epoch": 0.8008607962365187, + "grad_norm": 1.0079262256622314, + "learning_rate": 1.0042897572274312e-06, + "loss": 0.2975, + "step": 40006 + }, + { + "epoch": 0.8008808147536471, + "grad_norm": 1.1556508541107178, + "learning_rate": 1.004094885530772e-06, + "loss": 0.2376, + "step": 40007 + }, + { + "epoch": 0.8009008332707754, + "grad_norm": 1.0635219812393188, + "learning_rate": 1.0039000306320373e-06, + "loss": 0.2664, + "step": 40008 + }, + { + "epoch": 0.8009208517879038, + "grad_norm": 1.004259705543518, + "learning_rate": 1.0037051925320474e-06, + "loss": 0.293, + "step": 40009 + }, + { + "epoch": 0.8009408703050321, + "grad_norm": 1.1718692779541016, + "learning_rate": 1.003510371231619e-06, + "loss": 0.309, + "step": 40010 + }, + { + "epoch": 0.8009608888221605, + "grad_norm": 1.094936728477478, + "learning_rate": 1.0033155667315747e-06, + "loss": 0.2889, + "step": 40011 + }, + { + "epoch": 0.8009809073392888, + "grad_norm": 1.1284466981887817, + "learning_rate": 1.0031207790327313e-06, + "loss": 0.3106, + "step": 40012 + }, + { + "epoch": 0.8010009258564171, + "grad_norm": 1.0802406072616577, + "learning_rate": 1.0029260081359082e-06, + "loss": 0.3251, + "step": 40013 + }, + { + "epoch": 0.8010209443735455, + "grad_norm": 1.304214358329773, + "learning_rate": 1.0027312540419227e-06, + "loss": 0.2735, + "step": 40014 + }, + { + "epoch": 0.8010409628906738, + "grad_norm": 1.149325966835022, + "learning_rate": 1.002536516751596e-06, + "loss": 0.337, + "step": 40015 + }, + { + "epoch": 0.8010609814078022, + "grad_norm": 1.1639518737792969, + "learning_rate": 1.002341796265744e-06, + "loss": 0.2901, + "step": 40016 + }, + { + "epoch": 0.8010809999249305, + "grad_norm": 1.0534166097640991, + "learning_rate": 1.0021470925851884e-06, + "loss": 0.2853, + "step": 40017 + }, + { + "epoch": 0.8011010184420589, + "grad_norm": 1.2270903587341309, + "learning_rate": 1.0019524057107461e-06, + "loss": 0.2646, + "step": 40018 + }, + { + "epoch": 0.8011210369591872, + "grad_norm": 1.6255000829696655, + "learning_rate": 1.001757735643234e-06, + "loss": 0.275, + "step": 40019 + }, + { + "epoch": 0.8011410554763156, + "grad_norm": 1.8581045866012573, + "learning_rate": 1.0015630823834738e-06, + "loss": 0.7246, + "step": 40020 + }, + { + "epoch": 0.8011610739934439, + "grad_norm": 1.0898351669311523, + "learning_rate": 1.0013684459322814e-06, + "loss": 0.3075, + "step": 40021 + }, + { + "epoch": 0.8011810925105722, + "grad_norm": 1.1243056058883667, + "learning_rate": 1.0011738262904753e-06, + "loss": 0.2802, + "step": 40022 + }, + { + "epoch": 0.8012011110277006, + "grad_norm": 1.077285885810852, + "learning_rate": 1.0009792234588733e-06, + "loss": 0.2697, + "step": 40023 + }, + { + "epoch": 0.8012211295448289, + "grad_norm": 1.211892008781433, + "learning_rate": 1.0007846374382945e-06, + "loss": 0.3059, + "step": 40024 + }, + { + "epoch": 0.8012411480619573, + "grad_norm": 1.1092952489852905, + "learning_rate": 1.0005900682295571e-06, + "loss": 0.2999, + "step": 40025 + }, + { + "epoch": 0.8012611665790856, + "grad_norm": 1.7607347965240479, + "learning_rate": 1.0003955158334782e-06, + "loss": 0.7584, + "step": 40026 + }, + { + "epoch": 0.801281185096214, + "grad_norm": 1.1447969675064087, + "learning_rate": 1.0002009802508744e-06, + "loss": 0.3132, + "step": 40027 + }, + { + "epoch": 0.8013012036133423, + "grad_norm": 1.9262710809707642, + "learning_rate": 1.0000064614825667e-06, + "loss": 0.7631, + "step": 40028 + }, + { + "epoch": 0.8013212221304706, + "grad_norm": 1.0962361097335815, + "learning_rate": 9.998119595293694e-07, + "loss": 0.2744, + "step": 40029 + }, + { + "epoch": 0.801341240647599, + "grad_norm": 1.1431903839111328, + "learning_rate": 9.996174743921028e-07, + "loss": 0.3023, + "step": 40030 + }, + { + "epoch": 0.8013612591647273, + "grad_norm": 1.1190097332000732, + "learning_rate": 9.994230060715837e-07, + "loss": 0.3504, + "step": 40031 + }, + { + "epoch": 0.8013812776818557, + "grad_norm": 1.1159601211547852, + "learning_rate": 9.992285545686275e-07, + "loss": 0.2953, + "step": 40032 + }, + { + "epoch": 0.801401296198984, + "grad_norm": 1.1835284233093262, + "learning_rate": 9.990341198840558e-07, + "loss": 0.2586, + "step": 40033 + }, + { + "epoch": 0.8014213147161124, + "grad_norm": 1.037378191947937, + "learning_rate": 9.988397020186829e-07, + "loss": 0.2505, + "step": 40034 + }, + { + "epoch": 0.8014413332332407, + "grad_norm": 1.7257455587387085, + "learning_rate": 9.98645300973327e-07, + "loss": 0.6917, + "step": 40035 + }, + { + "epoch": 0.8014613517503691, + "grad_norm": 1.1355427503585815, + "learning_rate": 9.984509167488054e-07, + "loss": 0.2957, + "step": 40036 + }, + { + "epoch": 0.8014813702674974, + "grad_norm": 1.1907426118850708, + "learning_rate": 9.982565493459334e-07, + "loss": 0.3104, + "step": 40037 + }, + { + "epoch": 0.8015013887846257, + "grad_norm": 1.1395316123962402, + "learning_rate": 9.98062198765531e-07, + "loss": 0.3144, + "step": 40038 + }, + { + "epoch": 0.8015214073017541, + "grad_norm": 1.0966635942459106, + "learning_rate": 9.97867865008414e-07, + "loss": 0.282, + "step": 40039 + }, + { + "epoch": 0.8015414258188824, + "grad_norm": 1.176918625831604, + "learning_rate": 9.97673548075398e-07, + "loss": 0.3088, + "step": 40040 + }, + { + "epoch": 0.8015614443360108, + "grad_norm": 2.070373296737671, + "learning_rate": 9.974792479673023e-07, + "loss": 0.7953, + "step": 40041 + }, + { + "epoch": 0.8015814628531391, + "grad_norm": 1.8819549083709717, + "learning_rate": 9.972849646849414e-07, + "loss": 0.7587, + "step": 40042 + }, + { + "epoch": 0.8016014813702675, + "grad_norm": 1.1549562215805054, + "learning_rate": 9.97090698229134e-07, + "loss": 0.2675, + "step": 40043 + }, + { + "epoch": 0.8016214998873958, + "grad_norm": 1.136173963546753, + "learning_rate": 9.96896448600696e-07, + "loss": 0.3501, + "step": 40044 + }, + { + "epoch": 0.8016415184045241, + "grad_norm": 1.022451400756836, + "learning_rate": 9.96702215800443e-07, + "loss": 0.2725, + "step": 40045 + }, + { + "epoch": 0.8016615369216525, + "grad_norm": 1.1210393905639648, + "learning_rate": 9.965079998291932e-07, + "loss": 0.2592, + "step": 40046 + }, + { + "epoch": 0.8016815554387808, + "grad_norm": 1.1211923360824585, + "learning_rate": 9.963138006877621e-07, + "loss": 0.3026, + "step": 40047 + }, + { + "epoch": 0.8017015739559092, + "grad_norm": 1.2484326362609863, + "learning_rate": 9.961196183769666e-07, + "loss": 0.3235, + "step": 40048 + }, + { + "epoch": 0.8017215924730375, + "grad_norm": 1.0914530754089355, + "learning_rate": 9.95925452897622e-07, + "loss": 0.2828, + "step": 40049 + }, + { + "epoch": 0.8017416109901659, + "grad_norm": 1.1158440113067627, + "learning_rate": 9.957313042505446e-07, + "loss": 0.2574, + "step": 40050 + }, + { + "epoch": 0.8017616295072942, + "grad_norm": 1.9999761581420898, + "learning_rate": 9.95537172436552e-07, + "loss": 0.827, + "step": 40051 + }, + { + "epoch": 0.8017816480244226, + "grad_norm": 2.066915273666382, + "learning_rate": 9.953430574564593e-07, + "loss": 0.7409, + "step": 40052 + }, + { + "epoch": 0.8018016665415509, + "grad_norm": 1.2072335481643677, + "learning_rate": 9.951489593110807e-07, + "loss": 0.2971, + "step": 40053 + }, + { + "epoch": 0.8018216850586792, + "grad_norm": 1.1213557720184326, + "learning_rate": 9.949548780012363e-07, + "loss": 0.2827, + "step": 40054 + }, + { + "epoch": 0.8018417035758076, + "grad_norm": 0.9753834009170532, + "learning_rate": 9.947608135277382e-07, + "loss": 0.2832, + "step": 40055 + }, + { + "epoch": 0.8018617220929359, + "grad_norm": 1.094900131225586, + "learning_rate": 9.945667658914044e-07, + "loss": 0.3015, + "step": 40056 + }, + { + "epoch": 0.8018817406100643, + "grad_norm": 1.0094599723815918, + "learning_rate": 9.943727350930504e-07, + "loss": 0.2567, + "step": 40057 + }, + { + "epoch": 0.8019017591271926, + "grad_norm": 1.9538480043411255, + "learning_rate": 9.941787211334913e-07, + "loss": 0.7259, + "step": 40058 + }, + { + "epoch": 0.801921777644321, + "grad_norm": 1.0035195350646973, + "learning_rate": 9.939847240135413e-07, + "loss": 0.2663, + "step": 40059 + }, + { + "epoch": 0.8019417961614493, + "grad_norm": 1.0322750806808472, + "learning_rate": 9.937907437340194e-07, + "loss": 0.2856, + "step": 40060 + }, + { + "epoch": 0.8019618146785776, + "grad_norm": 2.04254412651062, + "learning_rate": 9.935967802957387e-07, + "loss": 0.7733, + "step": 40061 + }, + { + "epoch": 0.801981833195706, + "grad_norm": 1.0443812608718872, + "learning_rate": 9.93402833699515e-07, + "loss": 0.321, + "step": 40062 + }, + { + "epoch": 0.8020018517128343, + "grad_norm": 1.1491930484771729, + "learning_rate": 9.932089039461617e-07, + "loss": 0.309, + "step": 40063 + }, + { + "epoch": 0.8020218702299627, + "grad_norm": 1.064166784286499, + "learning_rate": 9.930149910364977e-07, + "loss": 0.2588, + "step": 40064 + }, + { + "epoch": 0.802041888747091, + "grad_norm": 1.0319167375564575, + "learning_rate": 9.928210949713362e-07, + "loss": 0.3159, + "step": 40065 + }, + { + "epoch": 0.8020619072642194, + "grad_norm": 1.0563915967941284, + "learning_rate": 9.926272157514916e-07, + "loss": 0.3216, + "step": 40066 + }, + { + "epoch": 0.8020819257813477, + "grad_norm": 1.2091784477233887, + "learning_rate": 9.924333533777803e-07, + "loss": 0.2701, + "step": 40067 + }, + { + "epoch": 0.8021019442984761, + "grad_norm": 1.065517783164978, + "learning_rate": 9.922395078510162e-07, + "loss": 0.2927, + "step": 40068 + }, + { + "epoch": 0.8021219628156044, + "grad_norm": 1.339160442352295, + "learning_rate": 9.920456791720157e-07, + "loss": 0.3381, + "step": 40069 + }, + { + "epoch": 0.8021419813327327, + "grad_norm": 1.0729923248291016, + "learning_rate": 9.918518673415927e-07, + "loss": 0.2606, + "step": 40070 + }, + { + "epoch": 0.8021619998498611, + "grad_norm": 1.070000410079956, + "learning_rate": 9.916580723605617e-07, + "loss": 0.2801, + "step": 40071 + }, + { + "epoch": 0.8021820183669894, + "grad_norm": 1.1688051223754883, + "learning_rate": 9.914642942297365e-07, + "loss": 0.3064, + "step": 40072 + }, + { + "epoch": 0.8022020368841178, + "grad_norm": 1.8739440441131592, + "learning_rate": 9.912705329499339e-07, + "loss": 0.7744, + "step": 40073 + }, + { + "epoch": 0.8022220554012461, + "grad_norm": 1.2121059894561768, + "learning_rate": 9.910767885219673e-07, + "loss": 0.3044, + "step": 40074 + }, + { + "epoch": 0.8022420739183745, + "grad_norm": 1.9507012367248535, + "learning_rate": 9.908830609466513e-07, + "loss": 0.7133, + "step": 40075 + }, + { + "epoch": 0.8022620924355028, + "grad_norm": 1.1114403009414673, + "learning_rate": 9.906893502247987e-07, + "loss": 0.249, + "step": 40076 + }, + { + "epoch": 0.8022821109526311, + "grad_norm": 1.2069613933563232, + "learning_rate": 9.904956563572266e-07, + "loss": 0.3096, + "step": 40077 + }, + { + "epoch": 0.8023021294697595, + "grad_norm": 1.1253737211227417, + "learning_rate": 9.903019793447476e-07, + "loss": 0.3075, + "step": 40078 + }, + { + "epoch": 0.8023221479868878, + "grad_norm": 1.0923411846160889, + "learning_rate": 9.901083191881754e-07, + "loss": 0.3021, + "step": 40079 + }, + { + "epoch": 0.8023421665040162, + "grad_norm": 1.0279268026351929, + "learning_rate": 9.89914675888326e-07, + "loss": 0.271, + "step": 40080 + }, + { + "epoch": 0.8023621850211445, + "grad_norm": 2.1174533367156982, + "learning_rate": 9.897210494460107e-07, + "loss": 0.8088, + "step": 40081 + }, + { + "epoch": 0.8023822035382729, + "grad_norm": 0.9735992550849915, + "learning_rate": 9.89527439862047e-07, + "loss": 0.269, + "step": 40082 + }, + { + "epoch": 0.8024022220554012, + "grad_norm": 0.9953395128250122, + "learning_rate": 9.893338471372465e-07, + "loss": 0.2925, + "step": 40083 + }, + { + "epoch": 0.8024222405725296, + "grad_norm": 1.16606867313385, + "learning_rate": 9.891402712724234e-07, + "loss": 0.3402, + "step": 40084 + }, + { + "epoch": 0.8024422590896579, + "grad_norm": 1.4728158712387085, + "learning_rate": 9.889467122683904e-07, + "loss": 0.3043, + "step": 40085 + }, + { + "epoch": 0.8024622776067862, + "grad_norm": 1.051871657371521, + "learning_rate": 9.887531701259635e-07, + "loss": 0.2808, + "step": 40086 + }, + { + "epoch": 0.8024822961239146, + "grad_norm": 1.131168246269226, + "learning_rate": 9.885596448459546e-07, + "loss": 0.2807, + "step": 40087 + }, + { + "epoch": 0.8025023146410429, + "grad_norm": 1.145673155784607, + "learning_rate": 9.88366136429178e-07, + "loss": 0.2861, + "step": 40088 + }, + { + "epoch": 0.8025223331581713, + "grad_norm": 1.2214263677597046, + "learning_rate": 9.881726448764468e-07, + "loss": 0.3122, + "step": 40089 + }, + { + "epoch": 0.8025423516752996, + "grad_norm": 1.1281288862228394, + "learning_rate": 9.879791701885732e-07, + "loss": 0.2917, + "step": 40090 + }, + { + "epoch": 0.802562370192428, + "grad_norm": 1.169081449508667, + "learning_rate": 9.87785712366373e-07, + "loss": 0.2761, + "step": 40091 + }, + { + "epoch": 0.8025823887095563, + "grad_norm": 1.029619812965393, + "learning_rate": 9.875922714106573e-07, + "loss": 0.2993, + "step": 40092 + }, + { + "epoch": 0.8026024072266846, + "grad_norm": 1.152266025543213, + "learning_rate": 9.873988473222412e-07, + "loss": 0.3081, + "step": 40093 + }, + { + "epoch": 0.802622425743813, + "grad_norm": 1.007009506225586, + "learning_rate": 9.872054401019354e-07, + "loss": 0.2429, + "step": 40094 + }, + { + "epoch": 0.8026424442609413, + "grad_norm": 1.1452410221099854, + "learning_rate": 9.87012049750556e-07, + "loss": 0.3211, + "step": 40095 + }, + { + "epoch": 0.8026624627780697, + "grad_norm": 1.1050145626068115, + "learning_rate": 9.868186762689135e-07, + "loss": 0.2727, + "step": 40096 + }, + { + "epoch": 0.802682481295198, + "grad_norm": 1.0039715766906738, + "learning_rate": 9.866253196578224e-07, + "loss": 0.2781, + "step": 40097 + }, + { + "epoch": 0.8027024998123264, + "grad_norm": 1.1702316999435425, + "learning_rate": 9.864319799180948e-07, + "loss": 0.3161, + "step": 40098 + }, + { + "epoch": 0.8027225183294547, + "grad_norm": 1.0416929721832275, + "learning_rate": 9.862386570505417e-07, + "loss": 0.2331, + "step": 40099 + }, + { + "epoch": 0.8027425368465831, + "grad_norm": 1.1284762620925903, + "learning_rate": 9.860453510559792e-07, + "loss": 0.3076, + "step": 40100 + }, + { + "epoch": 0.8027625553637114, + "grad_norm": 1.0475291013717651, + "learning_rate": 9.85852061935218e-07, + "loss": 0.3, + "step": 40101 + }, + { + "epoch": 0.8027825738808397, + "grad_norm": 1.116949200630188, + "learning_rate": 9.856587896890713e-07, + "loss": 0.2712, + "step": 40102 + }, + { + "epoch": 0.8028025923979681, + "grad_norm": 1.037132978439331, + "learning_rate": 9.854655343183495e-07, + "loss": 0.2932, + "step": 40103 + }, + { + "epoch": 0.8028226109150964, + "grad_norm": 1.0474939346313477, + "learning_rate": 9.852722958238682e-07, + "loss": 0.3267, + "step": 40104 + }, + { + "epoch": 0.8028426294322248, + "grad_norm": 1.0994657278060913, + "learning_rate": 9.850790742064365e-07, + "loss": 0.3439, + "step": 40105 + }, + { + "epoch": 0.8028626479493531, + "grad_norm": 1.1555837392807007, + "learning_rate": 9.848858694668695e-07, + "loss": 0.2789, + "step": 40106 + }, + { + "epoch": 0.8028826664664815, + "grad_norm": 1.2115213871002197, + "learning_rate": 9.846926816059776e-07, + "loss": 0.2798, + "step": 40107 + }, + { + "epoch": 0.8029026849836098, + "grad_norm": 1.4612292051315308, + "learning_rate": 9.844995106245741e-07, + "loss": 0.3608, + "step": 40108 + }, + { + "epoch": 0.8029227035007381, + "grad_norm": 1.1327276229858398, + "learning_rate": 9.843063565234713e-07, + "loss": 0.3498, + "step": 40109 + }, + { + "epoch": 0.8029427220178665, + "grad_norm": 1.2032654285430908, + "learning_rate": 9.841132193034796e-07, + "loss": 0.2924, + "step": 40110 + }, + { + "epoch": 0.8029627405349948, + "grad_norm": 1.123324990272522, + "learning_rate": 9.839200989654124e-07, + "loss": 0.3065, + "step": 40111 + }, + { + "epoch": 0.8029827590521232, + "grad_norm": 2.048349380493164, + "learning_rate": 9.837269955100792e-07, + "loss": 0.7003, + "step": 40112 + }, + { + "epoch": 0.8030027775692515, + "grad_norm": 1.088335394859314, + "learning_rate": 9.83533908938295e-07, + "loss": 0.2978, + "step": 40113 + }, + { + "epoch": 0.8030227960863799, + "grad_norm": 1.1136302947998047, + "learning_rate": 9.833408392508698e-07, + "loss": 0.3104, + "step": 40114 + }, + { + "epoch": 0.8030428146035082, + "grad_norm": 1.106978178024292, + "learning_rate": 9.83147786448615e-07, + "loss": 0.2672, + "step": 40115 + }, + { + "epoch": 0.8030628331206366, + "grad_norm": 1.1015630960464478, + "learning_rate": 9.82954750532341e-07, + "loss": 0.285, + "step": 40116 + }, + { + "epoch": 0.8030828516377649, + "grad_norm": 1.105273962020874, + "learning_rate": 9.827617315028626e-07, + "loss": 0.2547, + "step": 40117 + }, + { + "epoch": 0.8031028701548932, + "grad_norm": 1.0760232210159302, + "learning_rate": 9.825687293609882e-07, + "loss": 0.3133, + "step": 40118 + }, + { + "epoch": 0.8031228886720216, + "grad_norm": 1.0373281240463257, + "learning_rate": 9.82375744107531e-07, + "loss": 0.3005, + "step": 40119 + }, + { + "epoch": 0.8031429071891499, + "grad_norm": 1.065208911895752, + "learning_rate": 9.821827757433017e-07, + "loss": 0.3206, + "step": 40120 + }, + { + "epoch": 0.8031629257062783, + "grad_norm": 0.9938223958015442, + "learning_rate": 9.8198982426911e-07, + "loss": 0.2437, + "step": 40121 + }, + { + "epoch": 0.8031829442234066, + "grad_norm": 1.0436038970947266, + "learning_rate": 9.817968896857698e-07, + "loss": 0.2966, + "step": 40122 + }, + { + "epoch": 0.803202962740535, + "grad_norm": 1.8544219732284546, + "learning_rate": 9.816039719940912e-07, + "loss": 0.7871, + "step": 40123 + }, + { + "epoch": 0.8032229812576633, + "grad_norm": 1.18073570728302, + "learning_rate": 9.814110711948844e-07, + "loss": 0.2839, + "step": 40124 + }, + { + "epoch": 0.8032429997747916, + "grad_norm": 1.1153017282485962, + "learning_rate": 9.812181872889592e-07, + "loss": 0.2698, + "step": 40125 + }, + { + "epoch": 0.80326301829192, + "grad_norm": 1.2346875667572021, + "learning_rate": 9.810253202771292e-07, + "loss": 0.2624, + "step": 40126 + }, + { + "epoch": 0.8032830368090483, + "grad_norm": 0.9946538209915161, + "learning_rate": 9.80832470160204e-07, + "loss": 0.2262, + "step": 40127 + }, + { + "epoch": 0.8033030553261767, + "grad_norm": 1.248218297958374, + "learning_rate": 9.806396369389942e-07, + "loss": 0.3104, + "step": 40128 + }, + { + "epoch": 0.803323073843305, + "grad_norm": 1.9247792959213257, + "learning_rate": 9.804468206143091e-07, + "loss": 0.8028, + "step": 40129 + }, + { + "epoch": 0.8033430923604334, + "grad_norm": 1.0434452295303345, + "learning_rate": 9.802540211869615e-07, + "loss": 0.2266, + "step": 40130 + }, + { + "epoch": 0.8033631108775617, + "grad_norm": 1.2098782062530518, + "learning_rate": 9.800612386577602e-07, + "loss": 0.2743, + "step": 40131 + }, + { + "epoch": 0.8033831293946901, + "grad_norm": 1.0683610439300537, + "learning_rate": 9.798684730275172e-07, + "loss": 0.2763, + "step": 40132 + }, + { + "epoch": 0.8034031479118184, + "grad_norm": 1.1559185981750488, + "learning_rate": 9.796757242970423e-07, + "loss": 0.2827, + "step": 40133 + }, + { + "epoch": 0.8034231664289467, + "grad_norm": 1.0492204427719116, + "learning_rate": 9.79482992467144e-07, + "loss": 0.2685, + "step": 40134 + }, + { + "epoch": 0.8034431849460751, + "grad_norm": 1.0791032314300537, + "learning_rate": 9.792902775386353e-07, + "loss": 0.3053, + "step": 40135 + }, + { + "epoch": 0.8034632034632034, + "grad_norm": 2.09401273727417, + "learning_rate": 9.790975795123248e-07, + "loss": 0.7491, + "step": 40136 + }, + { + "epoch": 0.8034832219803318, + "grad_norm": 0.9901748299598694, + "learning_rate": 9.789048983890232e-07, + "loss": 0.2908, + "step": 40137 + }, + { + "epoch": 0.8035032404974601, + "grad_norm": 1.160799264907837, + "learning_rate": 9.787122341695383e-07, + "loss": 0.3044, + "step": 40138 + }, + { + "epoch": 0.8035232590145885, + "grad_norm": 1.1481472253799438, + "learning_rate": 9.785195868546833e-07, + "loss": 0.307, + "step": 40139 + }, + { + "epoch": 0.8035432775317168, + "grad_norm": 1.1461942195892334, + "learning_rate": 9.78326956445267e-07, + "loss": 0.2811, + "step": 40140 + }, + { + "epoch": 0.8035632960488451, + "grad_norm": 1.982587218284607, + "learning_rate": 9.78134342942098e-07, + "loss": 0.7819, + "step": 40141 + }, + { + "epoch": 0.8035833145659735, + "grad_norm": 1.0643666982650757, + "learning_rate": 9.779417463459857e-07, + "loss": 0.2845, + "step": 40142 + }, + { + "epoch": 0.8036033330831018, + "grad_norm": 1.1785519123077393, + "learning_rate": 9.777491666577416e-07, + "loss": 0.276, + "step": 40143 + }, + { + "epoch": 0.8036233516002302, + "grad_norm": 1.928009271621704, + "learning_rate": 9.775566038781737e-07, + "loss": 0.8019, + "step": 40144 + }, + { + "epoch": 0.8036433701173585, + "grad_norm": 1.0795537233352661, + "learning_rate": 9.773640580080934e-07, + "loss": 0.2891, + "step": 40145 + }, + { + "epoch": 0.8036633886344869, + "grad_norm": 1.0123436450958252, + "learning_rate": 9.771715290483085e-07, + "loss": 0.2977, + "step": 40146 + }, + { + "epoch": 0.8036834071516152, + "grad_norm": 1.2268741130828857, + "learning_rate": 9.769790169996279e-07, + "loss": 0.2597, + "step": 40147 + }, + { + "epoch": 0.8037034256687436, + "grad_norm": 1.1285430192947388, + "learning_rate": 9.76786521862863e-07, + "loss": 0.2965, + "step": 40148 + }, + { + "epoch": 0.8037234441858719, + "grad_norm": 1.2437087297439575, + "learning_rate": 9.765940436388216e-07, + "loss": 0.3186, + "step": 40149 + }, + { + "epoch": 0.8037434627030002, + "grad_norm": 1.0765299797058105, + "learning_rate": 9.76401582328313e-07, + "loss": 0.3232, + "step": 40150 + }, + { + "epoch": 0.8037634812201286, + "grad_norm": 1.0726878643035889, + "learning_rate": 9.762091379321465e-07, + "loss": 0.295, + "step": 40151 + }, + { + "epoch": 0.8037834997372569, + "grad_norm": 1.9182230234146118, + "learning_rate": 9.760167104511292e-07, + "loss": 0.6899, + "step": 40152 + }, + { + "epoch": 0.8038035182543853, + "grad_norm": 1.3064923286437988, + "learning_rate": 9.758242998860733e-07, + "loss": 0.3104, + "step": 40153 + }, + { + "epoch": 0.8038235367715136, + "grad_norm": 1.266028881072998, + "learning_rate": 9.756319062377862e-07, + "loss": 0.3216, + "step": 40154 + }, + { + "epoch": 0.803843555288642, + "grad_norm": 1.1153889894485474, + "learning_rate": 9.754395295070745e-07, + "loss": 0.3926, + "step": 40155 + }, + { + "epoch": 0.8038635738057703, + "grad_norm": 1.0578691959381104, + "learning_rate": 9.752471696947507e-07, + "loss": 0.2547, + "step": 40156 + }, + { + "epoch": 0.8038835923228986, + "grad_norm": 1.0893930196762085, + "learning_rate": 9.750548268016201e-07, + "loss": 0.3085, + "step": 40157 + }, + { + "epoch": 0.803903610840027, + "grad_norm": 1.8187665939331055, + "learning_rate": 9.748625008284945e-07, + "loss": 0.7587, + "step": 40158 + }, + { + "epoch": 0.8039236293571553, + "grad_norm": 1.0233820676803589, + "learning_rate": 9.74670191776181e-07, + "loss": 0.2929, + "step": 40159 + }, + { + "epoch": 0.8039436478742837, + "grad_norm": 1.7871564626693726, + "learning_rate": 9.744778996454867e-07, + "loss": 0.761, + "step": 40160 + }, + { + "epoch": 0.803963666391412, + "grad_norm": 1.1260170936584473, + "learning_rate": 9.742856244372207e-07, + "loss": 0.3079, + "step": 40161 + }, + { + "epoch": 0.8039836849085404, + "grad_norm": 1.1340713500976562, + "learning_rate": 9.740933661521923e-07, + "loss": 0.3389, + "step": 40162 + }, + { + "epoch": 0.8040037034256687, + "grad_norm": 1.2278141975402832, + "learning_rate": 9.739011247912094e-07, + "loss": 0.2635, + "step": 40163 + }, + { + "epoch": 0.8040237219427971, + "grad_norm": 1.116755723953247, + "learning_rate": 9.737089003550792e-07, + "loss": 0.3477, + "step": 40164 + }, + { + "epoch": 0.8040437404599254, + "grad_norm": 1.198998212814331, + "learning_rate": 9.735166928446094e-07, + "loss": 0.2667, + "step": 40165 + }, + { + "epoch": 0.8040637589770537, + "grad_norm": 1.0779918432235718, + "learning_rate": 9.733245022606102e-07, + "loss": 0.2494, + "step": 40166 + }, + { + "epoch": 0.8040837774941821, + "grad_norm": 1.1478077173233032, + "learning_rate": 9.731323286038885e-07, + "loss": 0.3058, + "step": 40167 + }, + { + "epoch": 0.8041037960113104, + "grad_norm": 1.2685052156448364, + "learning_rate": 9.729401718752501e-07, + "loss": 0.3149, + "step": 40168 + }, + { + "epoch": 0.8041238145284388, + "grad_norm": 1.0230392217636108, + "learning_rate": 9.727480320755062e-07, + "loss": 0.3231, + "step": 40169 + }, + { + "epoch": 0.8041438330455671, + "grad_norm": 1.7529001235961914, + "learning_rate": 9.725559092054615e-07, + "loss": 0.7794, + "step": 40170 + }, + { + "epoch": 0.8041638515626955, + "grad_norm": 1.0689603090286255, + "learning_rate": 9.723638032659265e-07, + "loss": 0.2906, + "step": 40171 + }, + { + "epoch": 0.8041838700798238, + "grad_norm": 1.099990963935852, + "learning_rate": 9.721717142577069e-07, + "loss": 0.3082, + "step": 40172 + }, + { + "epoch": 0.8042038885969521, + "grad_norm": 1.057706356048584, + "learning_rate": 9.719796421816109e-07, + "loss": 0.3102, + "step": 40173 + }, + { + "epoch": 0.8042239071140805, + "grad_norm": 1.1756064891815186, + "learning_rate": 9.717875870384446e-07, + "loss": 0.2979, + "step": 40174 + }, + { + "epoch": 0.8042439256312088, + "grad_norm": 1.0662257671356201, + "learning_rate": 9.715955488290175e-07, + "loss": 0.3044, + "step": 40175 + }, + { + "epoch": 0.8042639441483372, + "grad_norm": 1.1874220371246338, + "learning_rate": 9.714035275541356e-07, + "loss": 0.3243, + "step": 40176 + }, + { + "epoch": 0.8042839626654655, + "grad_norm": 1.9866714477539062, + "learning_rate": 9.712115232146063e-07, + "loss": 0.7833, + "step": 40177 + }, + { + "epoch": 0.8043039811825939, + "grad_norm": 1.9049561023712158, + "learning_rate": 9.710195358112356e-07, + "loss": 0.7656, + "step": 40178 + }, + { + "epoch": 0.8043239996997222, + "grad_norm": 1.0961953401565552, + "learning_rate": 9.708275653448329e-07, + "loss": 0.2921, + "step": 40179 + }, + { + "epoch": 0.8043440182168505, + "grad_norm": 1.1074823141098022, + "learning_rate": 9.70635611816204e-07, + "loss": 0.2488, + "step": 40180 + }, + { + "epoch": 0.8043640367339789, + "grad_norm": 1.839919090270996, + "learning_rate": 9.704436752261548e-07, + "loss": 0.7683, + "step": 40181 + }, + { + "epoch": 0.8043840552511072, + "grad_norm": 1.1009514331817627, + "learning_rate": 9.702517555754942e-07, + "loss": 0.2949, + "step": 40182 + }, + { + "epoch": 0.8044040737682356, + "grad_norm": 1.9105395078659058, + "learning_rate": 9.700598528650274e-07, + "loss": 0.7847, + "step": 40183 + }, + { + "epoch": 0.8044240922853639, + "grad_norm": 1.280717372894287, + "learning_rate": 9.698679670955624e-07, + "loss": 0.3473, + "step": 40184 + }, + { + "epoch": 0.8044441108024923, + "grad_norm": 1.1453121900558472, + "learning_rate": 9.696760982679054e-07, + "loss": 0.2614, + "step": 40185 + }, + { + "epoch": 0.8044641293196206, + "grad_norm": 1.2097738981246948, + "learning_rate": 9.694842463828625e-07, + "loss": 0.2887, + "step": 40186 + }, + { + "epoch": 0.804484147836749, + "grad_norm": 1.9891088008880615, + "learning_rate": 9.692924114412395e-07, + "loss": 0.7437, + "step": 40187 + }, + { + "epoch": 0.8045041663538773, + "grad_norm": 1.0837888717651367, + "learning_rate": 9.691005934438447e-07, + "loss": 0.2771, + "step": 40188 + }, + { + "epoch": 0.8045241848710056, + "grad_norm": 1.8327932357788086, + "learning_rate": 9.689087923914837e-07, + "loss": 0.6923, + "step": 40189 + }, + { + "epoch": 0.804544203388134, + "grad_norm": 1.140710473060608, + "learning_rate": 9.687170082849629e-07, + "loss": 0.2821, + "step": 40190 + }, + { + "epoch": 0.8045642219052623, + "grad_norm": 1.1291310787200928, + "learning_rate": 9.685252411250868e-07, + "loss": 0.2483, + "step": 40191 + }, + { + "epoch": 0.8045842404223907, + "grad_norm": 1.1155664920806885, + "learning_rate": 9.68333490912664e-07, + "loss": 0.2566, + "step": 40192 + }, + { + "epoch": 0.804604258939519, + "grad_norm": 1.3502463102340698, + "learning_rate": 9.681417576484997e-07, + "loss": 0.302, + "step": 40193 + }, + { + "epoch": 0.8046242774566474, + "grad_norm": 1.135304570198059, + "learning_rate": 9.679500413333985e-07, + "loss": 0.3103, + "step": 40194 + }, + { + "epoch": 0.8046442959737757, + "grad_norm": 1.1518418788909912, + "learning_rate": 9.67758341968169e-07, + "loss": 0.2751, + "step": 40195 + }, + { + "epoch": 0.804664314490904, + "grad_norm": 1.0980867147445679, + "learning_rate": 9.67566659553615e-07, + "loss": 0.2556, + "step": 40196 + }, + { + "epoch": 0.8046843330080324, + "grad_norm": 1.0890761613845825, + "learning_rate": 9.673749940905436e-07, + "loss": 0.3, + "step": 40197 + }, + { + "epoch": 0.8047043515251607, + "grad_norm": 1.131432056427002, + "learning_rate": 9.671833455797596e-07, + "loss": 0.3291, + "step": 40198 + }, + { + "epoch": 0.8047243700422891, + "grad_norm": 1.0568419694900513, + "learning_rate": 9.669917140220697e-07, + "loss": 0.2733, + "step": 40199 + }, + { + "epoch": 0.8047443885594174, + "grad_norm": 1.1240346431732178, + "learning_rate": 9.66800099418277e-07, + "loss": 0.3086, + "step": 40200 + }, + { + "epoch": 0.8047644070765458, + "grad_norm": 1.3001301288604736, + "learning_rate": 9.6660850176919e-07, + "loss": 0.3549, + "step": 40201 + }, + { + "epoch": 0.8047844255936741, + "grad_norm": 1.0146305561065674, + "learning_rate": 9.66416921075613e-07, + "loss": 0.2662, + "step": 40202 + }, + { + "epoch": 0.8048044441108025, + "grad_norm": 1.0103318691253662, + "learning_rate": 9.662253573383513e-07, + "loss": 0.2911, + "step": 40203 + }, + { + "epoch": 0.8048244626279308, + "grad_norm": 1.1448142528533936, + "learning_rate": 9.660338105582095e-07, + "loss": 0.2803, + "step": 40204 + }, + { + "epoch": 0.8048444811450591, + "grad_norm": 1.1881636381149292, + "learning_rate": 9.65842280735993e-07, + "loss": 0.2871, + "step": 40205 + }, + { + "epoch": 0.8048644996621875, + "grad_norm": 1.1817302703857422, + "learning_rate": 9.65650767872508e-07, + "loss": 0.2642, + "step": 40206 + }, + { + "epoch": 0.8048845181793158, + "grad_norm": 1.4237874746322632, + "learning_rate": 9.65459271968558e-07, + "loss": 0.3289, + "step": 40207 + }, + { + "epoch": 0.8049045366964442, + "grad_norm": 1.1398133039474487, + "learning_rate": 9.652677930249505e-07, + "loss": 0.2709, + "step": 40208 + }, + { + "epoch": 0.8049245552135725, + "grad_norm": 1.168412685394287, + "learning_rate": 9.65076331042487e-07, + "loss": 0.2912, + "step": 40209 + }, + { + "epoch": 0.8049445737307009, + "grad_norm": 1.0248409509658813, + "learning_rate": 9.648848860219761e-07, + "loss": 0.2527, + "step": 40210 + }, + { + "epoch": 0.8049645922478292, + "grad_norm": 1.0614004135131836, + "learning_rate": 9.64693457964221e-07, + "loss": 0.2565, + "step": 40211 + }, + { + "epoch": 0.8049846107649575, + "grad_norm": 1.1997569799423218, + "learning_rate": 9.645020468700255e-07, + "loss": 0.3044, + "step": 40212 + }, + { + "epoch": 0.8050046292820859, + "grad_norm": 1.0252530574798584, + "learning_rate": 9.643106527401951e-07, + "loss": 0.2807, + "step": 40213 + }, + { + "epoch": 0.8050246477992142, + "grad_norm": 1.249366283416748, + "learning_rate": 9.64119275575533e-07, + "loss": 0.3338, + "step": 40214 + }, + { + "epoch": 0.8050446663163426, + "grad_norm": 1.0713034868240356, + "learning_rate": 9.639279153768466e-07, + "loss": 0.2988, + "step": 40215 + }, + { + "epoch": 0.8050646848334709, + "grad_norm": 1.1154981851577759, + "learning_rate": 9.637365721449382e-07, + "loss": 0.3132, + "step": 40216 + }, + { + "epoch": 0.8050847033505993, + "grad_norm": 1.3456512689590454, + "learning_rate": 9.635452458806128e-07, + "loss": 0.2671, + "step": 40217 + }, + { + "epoch": 0.8051047218677276, + "grad_norm": 1.0491013526916504, + "learning_rate": 9.633539365846733e-07, + "loss": 0.2637, + "step": 40218 + }, + { + "epoch": 0.805124740384856, + "grad_norm": 1.2143969535827637, + "learning_rate": 9.631626442579268e-07, + "loss": 0.3204, + "step": 40219 + }, + { + "epoch": 0.8051447589019843, + "grad_norm": 1.0199995040893555, + "learning_rate": 9.629713689011738e-07, + "loss": 0.3243, + "step": 40220 + }, + { + "epoch": 0.8051647774191126, + "grad_norm": 1.1510710716247559, + "learning_rate": 9.627801105152217e-07, + "loss": 0.3088, + "step": 40221 + }, + { + "epoch": 0.805184795936241, + "grad_norm": 1.1210675239562988, + "learning_rate": 9.625888691008728e-07, + "loss": 0.3578, + "step": 40222 + }, + { + "epoch": 0.8052048144533693, + "grad_norm": 1.199695110321045, + "learning_rate": 9.623976446589317e-07, + "loss": 0.2863, + "step": 40223 + }, + { + "epoch": 0.8052248329704977, + "grad_norm": 1.32460355758667, + "learning_rate": 9.62206437190203e-07, + "loss": 0.2962, + "step": 40224 + }, + { + "epoch": 0.805244851487626, + "grad_norm": 1.073861837387085, + "learning_rate": 9.620152466954885e-07, + "loss": 0.2907, + "step": 40225 + }, + { + "epoch": 0.8052648700047544, + "grad_norm": 1.1967191696166992, + "learning_rate": 9.618240731755936e-07, + "loss": 0.287, + "step": 40226 + }, + { + "epoch": 0.8052848885218827, + "grad_norm": 1.2275431156158447, + "learning_rate": 9.6163291663132e-07, + "loss": 0.3257, + "step": 40227 + }, + { + "epoch": 0.805304907039011, + "grad_norm": 1.14738929271698, + "learning_rate": 9.61441777063473e-07, + "loss": 0.2805, + "step": 40228 + }, + { + "epoch": 0.8053249255561394, + "grad_norm": 1.037393569946289, + "learning_rate": 9.61250654472856e-07, + "loss": 0.3017, + "step": 40229 + }, + { + "epoch": 0.8053449440732677, + "grad_norm": 1.290526032447815, + "learning_rate": 9.610595488602725e-07, + "loss": 0.2484, + "step": 40230 + }, + { + "epoch": 0.8053649625903961, + "grad_norm": 1.1562410593032837, + "learning_rate": 9.608684602265245e-07, + "loss": 0.3015, + "step": 40231 + }, + { + "epoch": 0.8053849811075244, + "grad_norm": 1.0542211532592773, + "learning_rate": 9.60677388572417e-07, + "loss": 0.2808, + "step": 40232 + }, + { + "epoch": 0.8054049996246528, + "grad_norm": 1.2330436706542969, + "learning_rate": 9.604863338987513e-07, + "loss": 0.3044, + "step": 40233 + }, + { + "epoch": 0.8054250181417811, + "grad_norm": 1.1256004571914673, + "learning_rate": 9.602952962063328e-07, + "loss": 0.2901, + "step": 40234 + }, + { + "epoch": 0.8054450366589095, + "grad_norm": 1.1570241451263428, + "learning_rate": 9.60104275495964e-07, + "loss": 0.3215, + "step": 40235 + }, + { + "epoch": 0.8054650551760378, + "grad_norm": 1.0541719198226929, + "learning_rate": 9.599132717684455e-07, + "loss": 0.3002, + "step": 40236 + }, + { + "epoch": 0.8054850736931661, + "grad_norm": 1.3063546419143677, + "learning_rate": 9.59722285024584e-07, + "loss": 0.2999, + "step": 40237 + }, + { + "epoch": 0.8055050922102945, + "grad_norm": 1.8802554607391357, + "learning_rate": 9.595313152651803e-07, + "loss": 0.7033, + "step": 40238 + }, + { + "epoch": 0.8055251107274228, + "grad_norm": 1.0644680261611938, + "learning_rate": 9.593403624910374e-07, + "loss": 0.2766, + "step": 40239 + }, + { + "epoch": 0.8055451292445512, + "grad_norm": 1.2215080261230469, + "learning_rate": 9.591494267029567e-07, + "loss": 0.2583, + "step": 40240 + }, + { + "epoch": 0.8055651477616795, + "grad_norm": 1.108936071395874, + "learning_rate": 9.589585079017439e-07, + "loss": 0.3218, + "step": 40241 + }, + { + "epoch": 0.8055851662788079, + "grad_norm": 1.1419672966003418, + "learning_rate": 9.587676060881995e-07, + "loss": 0.3056, + "step": 40242 + }, + { + "epoch": 0.8056051847959362, + "grad_norm": 1.066598892211914, + "learning_rate": 9.585767212631263e-07, + "loss": 0.2767, + "step": 40243 + }, + { + "epoch": 0.8056252033130645, + "grad_norm": 2.032557487487793, + "learning_rate": 9.58385853427327e-07, + "loss": 0.8048, + "step": 40244 + }, + { + "epoch": 0.8056452218301929, + "grad_norm": 1.117875099182129, + "learning_rate": 9.581950025816028e-07, + "loss": 0.2946, + "step": 40245 + }, + { + "epoch": 0.8056652403473212, + "grad_norm": 1.0361177921295166, + "learning_rate": 9.58004168726757e-07, + "loss": 0.3052, + "step": 40246 + }, + { + "epoch": 0.8056852588644496, + "grad_norm": 1.0280917882919312, + "learning_rate": 9.578133518635929e-07, + "loss": 0.2464, + "step": 40247 + }, + { + "epoch": 0.8057052773815779, + "grad_norm": 1.0661654472351074, + "learning_rate": 9.576225519929116e-07, + "loss": 0.2966, + "step": 40248 + }, + { + "epoch": 0.8057252958987063, + "grad_norm": 1.107759714126587, + "learning_rate": 9.574317691155138e-07, + "loss": 0.2927, + "step": 40249 + }, + { + "epoch": 0.8057453144158346, + "grad_norm": 1.2970730066299438, + "learning_rate": 9.572410032322044e-07, + "loss": 0.2947, + "step": 40250 + }, + { + "epoch": 0.805765332932963, + "grad_norm": 1.2163150310516357, + "learning_rate": 9.570502543437837e-07, + "loss": 0.3653, + "step": 40251 + }, + { + "epoch": 0.8057853514500913, + "grad_norm": 1.1747245788574219, + "learning_rate": 9.568595224510535e-07, + "loss": 0.2933, + "step": 40252 + }, + { + "epoch": 0.8058053699672196, + "grad_norm": 1.1779879331588745, + "learning_rate": 9.566688075548147e-07, + "loss": 0.2438, + "step": 40253 + }, + { + "epoch": 0.805825388484348, + "grad_norm": 1.069846749305725, + "learning_rate": 9.564781096558711e-07, + "loss": 0.2821, + "step": 40254 + }, + { + "epoch": 0.8058454070014763, + "grad_norm": 1.0161246061325073, + "learning_rate": 9.56287428755024e-07, + "loss": 0.3101, + "step": 40255 + }, + { + "epoch": 0.8058654255186047, + "grad_norm": 1.1016013622283936, + "learning_rate": 9.560967648530733e-07, + "loss": 0.329, + "step": 40256 + }, + { + "epoch": 0.805885444035733, + "grad_norm": 1.0692247152328491, + "learning_rate": 9.559061179508223e-07, + "loss": 0.2919, + "step": 40257 + }, + { + "epoch": 0.8059054625528614, + "grad_norm": 1.109089732170105, + "learning_rate": 9.557154880490704e-07, + "loss": 0.3193, + "step": 40258 + }, + { + "epoch": 0.8059254810699897, + "grad_norm": 1.0798790454864502, + "learning_rate": 9.555248751486201e-07, + "loss": 0.2984, + "step": 40259 + }, + { + "epoch": 0.805945499587118, + "grad_norm": 1.2445703744888306, + "learning_rate": 9.55334279250274e-07, + "loss": 0.3319, + "step": 40260 + }, + { + "epoch": 0.8059655181042464, + "grad_norm": 1.0841724872589111, + "learning_rate": 9.55143700354832e-07, + "loss": 0.2686, + "step": 40261 + }, + { + "epoch": 0.8059855366213747, + "grad_norm": 1.0890367031097412, + "learning_rate": 9.549531384630945e-07, + "loss": 0.3172, + "step": 40262 + }, + { + "epoch": 0.8060055551385031, + "grad_norm": 2.1917953491210938, + "learning_rate": 9.547625935758648e-07, + "loss": 0.793, + "step": 40263 + }, + { + "epoch": 0.8060255736556314, + "grad_norm": 1.2541687488555908, + "learning_rate": 9.545720656939423e-07, + "loss": 0.3345, + "step": 40264 + }, + { + "epoch": 0.8060455921727598, + "grad_norm": 1.1883797645568848, + "learning_rate": 9.543815548181285e-07, + "loss": 0.3044, + "step": 40265 + }, + { + "epoch": 0.8060656106898881, + "grad_norm": 2.0461251735687256, + "learning_rate": 9.54191060949224e-07, + "loss": 0.7687, + "step": 40266 + }, + { + "epoch": 0.8060856292070165, + "grad_norm": 1.4105720520019531, + "learning_rate": 9.540005840880284e-07, + "loss": 0.2879, + "step": 40267 + }, + { + "epoch": 0.8061056477241448, + "grad_norm": 1.1180496215820312, + "learning_rate": 9.538101242353449e-07, + "loss": 0.331, + "step": 40268 + }, + { + "epoch": 0.8061256662412731, + "grad_norm": 1.102644920349121, + "learning_rate": 9.53619681391973e-07, + "loss": 0.3289, + "step": 40269 + }, + { + "epoch": 0.8061456847584015, + "grad_norm": 1.2278711795806885, + "learning_rate": 9.534292555587127e-07, + "loss": 0.3241, + "step": 40270 + }, + { + "epoch": 0.8061657032755298, + "grad_norm": 1.0468355417251587, + "learning_rate": 9.53238846736364e-07, + "loss": 0.2746, + "step": 40271 + }, + { + "epoch": 0.8061857217926582, + "grad_norm": 1.154284119606018, + "learning_rate": 9.53048454925728e-07, + "loss": 0.2927, + "step": 40272 + }, + { + "epoch": 0.8062057403097865, + "grad_norm": 1.2034051418304443, + "learning_rate": 9.528580801276072e-07, + "loss": 0.3258, + "step": 40273 + }, + { + "epoch": 0.8062257588269149, + "grad_norm": 1.1167526245117188, + "learning_rate": 9.526677223428e-07, + "loss": 0.2999, + "step": 40274 + }, + { + "epoch": 0.8062457773440432, + "grad_norm": 1.1087393760681152, + "learning_rate": 9.524773815721062e-07, + "loss": 0.3037, + "step": 40275 + }, + { + "epoch": 0.8062657958611715, + "grad_norm": 1.101654052734375, + "learning_rate": 9.522870578163257e-07, + "loss": 0.2609, + "step": 40276 + }, + { + "epoch": 0.8062858143782999, + "grad_norm": 1.0543445348739624, + "learning_rate": 9.520967510762602e-07, + "loss": 0.2765, + "step": 40277 + }, + { + "epoch": 0.8063058328954282, + "grad_norm": 1.0751805305480957, + "learning_rate": 9.51906461352709e-07, + "loss": 0.281, + "step": 40278 + }, + { + "epoch": 0.8063258514125566, + "grad_norm": 1.3113515377044678, + "learning_rate": 9.517161886464721e-07, + "loss": 0.339, + "step": 40279 + }, + { + "epoch": 0.8063458699296849, + "grad_norm": 1.2688440084457397, + "learning_rate": 9.515259329583476e-07, + "loss": 0.3224, + "step": 40280 + }, + { + "epoch": 0.8063658884468133, + "grad_norm": 2.041306495666504, + "learning_rate": 9.513356942891383e-07, + "loss": 0.7629, + "step": 40281 + }, + { + "epoch": 0.8063859069639416, + "grad_norm": 1.1658406257629395, + "learning_rate": 9.511454726396424e-07, + "loss": 0.2804, + "step": 40282 + }, + { + "epoch": 0.80640592548107, + "grad_norm": 1.1738348007202148, + "learning_rate": 9.509552680106593e-07, + "loss": 0.3166, + "step": 40283 + }, + { + "epoch": 0.8064259439981983, + "grad_norm": 1.0521714687347412, + "learning_rate": 9.507650804029883e-07, + "loss": 0.2728, + "step": 40284 + }, + { + "epoch": 0.8064459625153266, + "grad_norm": 1.2141079902648926, + "learning_rate": 9.505749098174288e-07, + "loss": 0.3193, + "step": 40285 + }, + { + "epoch": 0.806465981032455, + "grad_norm": 1.0077927112579346, + "learning_rate": 9.503847562547824e-07, + "loss": 0.3012, + "step": 40286 + }, + { + "epoch": 0.8064859995495833, + "grad_norm": 1.1526790857315063, + "learning_rate": 9.501946197158473e-07, + "loss": 0.281, + "step": 40287 + }, + { + "epoch": 0.8065060180667117, + "grad_norm": 1.1041556596755981, + "learning_rate": 9.500045002014225e-07, + "loss": 0.3089, + "step": 40288 + }, + { + "epoch": 0.80652603658384, + "grad_norm": 1.097333312034607, + "learning_rate": 9.498143977123053e-07, + "loss": 0.2966, + "step": 40289 + }, + { + "epoch": 0.8065460551009684, + "grad_norm": 1.125030755996704, + "learning_rate": 9.496243122492987e-07, + "loss": 0.3194, + "step": 40290 + }, + { + "epoch": 0.8065660736180967, + "grad_norm": 1.1026222705841064, + "learning_rate": 9.494342438131993e-07, + "loss": 0.2931, + "step": 40291 + }, + { + "epoch": 0.806586092135225, + "grad_norm": 1.0752816200256348, + "learning_rate": 9.492441924048074e-07, + "loss": 0.252, + "step": 40292 + }, + { + "epoch": 0.8066061106523534, + "grad_norm": 2.038219928741455, + "learning_rate": 9.490541580249191e-07, + "loss": 0.7758, + "step": 40293 + }, + { + "epoch": 0.8066261291694817, + "grad_norm": 1.2290974855422974, + "learning_rate": 9.488641406743371e-07, + "loss": 0.3321, + "step": 40294 + }, + { + "epoch": 0.8066461476866101, + "grad_norm": 2.077456474304199, + "learning_rate": 9.48674140353858e-07, + "loss": 0.7356, + "step": 40295 + }, + { + "epoch": 0.8066661662037384, + "grad_norm": 1.0848441123962402, + "learning_rate": 9.484841570642817e-07, + "loss": 0.3109, + "step": 40296 + }, + { + "epoch": 0.8066861847208668, + "grad_norm": 1.0631952285766602, + "learning_rate": 9.482941908064041e-07, + "loss": 0.2948, + "step": 40297 + }, + { + "epoch": 0.8067062032379951, + "grad_norm": 1.2832003831863403, + "learning_rate": 9.48104241581026e-07, + "loss": 0.2954, + "step": 40298 + }, + { + "epoch": 0.8067262217551235, + "grad_norm": 1.1675326824188232, + "learning_rate": 9.47914309388947e-07, + "loss": 0.3042, + "step": 40299 + }, + { + "epoch": 0.8067462402722518, + "grad_norm": 1.1413441896438599, + "learning_rate": 9.477243942309644e-07, + "loss": 0.2934, + "step": 40300 + }, + { + "epoch": 0.8067662587893801, + "grad_norm": 1.0109390020370483, + "learning_rate": 9.475344961078764e-07, + "loss": 0.3133, + "step": 40301 + }, + { + "epoch": 0.8067862773065085, + "grad_norm": 1.0826983451843262, + "learning_rate": 9.473446150204795e-07, + "loss": 0.3132, + "step": 40302 + }, + { + "epoch": 0.8068062958236368, + "grad_norm": 1.266242504119873, + "learning_rate": 9.471547509695755e-07, + "loss": 0.3239, + "step": 40303 + }, + { + "epoch": 0.8068263143407652, + "grad_norm": 1.1248950958251953, + "learning_rate": 9.469649039559609e-07, + "loss": 0.2883, + "step": 40304 + }, + { + "epoch": 0.8068463328578935, + "grad_norm": 1.1058744192123413, + "learning_rate": 9.467750739804332e-07, + "loss": 0.2995, + "step": 40305 + }, + { + "epoch": 0.8068663513750219, + "grad_norm": 1.0256274938583374, + "learning_rate": 9.465852610437898e-07, + "loss": 0.2701, + "step": 40306 + }, + { + "epoch": 0.8068863698921502, + "grad_norm": 1.1484354734420776, + "learning_rate": 9.463954651468304e-07, + "loss": 0.2921, + "step": 40307 + }, + { + "epoch": 0.8069063884092785, + "grad_norm": 1.1186875104904175, + "learning_rate": 9.46205686290353e-07, + "loss": 0.2834, + "step": 40308 + }, + { + "epoch": 0.8069264069264069, + "grad_norm": 1.1584147214889526, + "learning_rate": 9.460159244751538e-07, + "loss": 0.3279, + "step": 40309 + }, + { + "epoch": 0.8069464254435352, + "grad_norm": 1.9640989303588867, + "learning_rate": 9.458261797020301e-07, + "loss": 0.7637, + "step": 40310 + }, + { + "epoch": 0.8069664439606636, + "grad_norm": 1.1020963191986084, + "learning_rate": 9.456364519717809e-07, + "loss": 0.3199, + "step": 40311 + }, + { + "epoch": 0.8069864624777919, + "grad_norm": 1.1390503644943237, + "learning_rate": 9.454467412852042e-07, + "loss": 0.3388, + "step": 40312 + }, + { + "epoch": 0.8070064809949203, + "grad_norm": 1.2134714126586914, + "learning_rate": 9.452570476430978e-07, + "loss": 0.3198, + "step": 40313 + }, + { + "epoch": 0.8070264995120486, + "grad_norm": 1.0796613693237305, + "learning_rate": 9.450673710462577e-07, + "loss": 0.305, + "step": 40314 + }, + { + "epoch": 0.807046518029177, + "grad_norm": 1.0989924669265747, + "learning_rate": 9.448777114954805e-07, + "loss": 0.2737, + "step": 40315 + }, + { + "epoch": 0.8070665365463053, + "grad_norm": 1.081599235534668, + "learning_rate": 9.446880689915655e-07, + "loss": 0.2768, + "step": 40316 + }, + { + "epoch": 0.8070865550634336, + "grad_norm": 1.2436381578445435, + "learning_rate": 9.4449844353531e-07, + "loss": 0.2693, + "step": 40317 + }, + { + "epoch": 0.807106573580562, + "grad_norm": 1.045973777770996, + "learning_rate": 9.443088351275098e-07, + "loss": 0.2983, + "step": 40318 + }, + { + "epoch": 0.8071265920976903, + "grad_norm": 1.0351258516311646, + "learning_rate": 9.441192437689623e-07, + "loss": 0.2598, + "step": 40319 + }, + { + "epoch": 0.8071466106148187, + "grad_norm": 1.060835361480713, + "learning_rate": 9.439296694604639e-07, + "loss": 0.2971, + "step": 40320 + }, + { + "epoch": 0.807166629131947, + "grad_norm": 1.0348494052886963, + "learning_rate": 9.437401122028133e-07, + "loss": 0.2771, + "step": 40321 + }, + { + "epoch": 0.8071866476490754, + "grad_norm": 1.1177537441253662, + "learning_rate": 9.435505719968064e-07, + "loss": 0.2673, + "step": 40322 + }, + { + "epoch": 0.8072066661662037, + "grad_norm": 1.1415441036224365, + "learning_rate": 9.433610488432388e-07, + "loss": 0.2626, + "step": 40323 + }, + { + "epoch": 0.807226684683332, + "grad_norm": 1.082991600036621, + "learning_rate": 9.431715427429077e-07, + "loss": 0.3083, + "step": 40324 + }, + { + "epoch": 0.8072467032004604, + "grad_norm": 1.9076793193817139, + "learning_rate": 9.429820536966122e-07, + "loss": 0.7414, + "step": 40325 + }, + { + "epoch": 0.8072667217175887, + "grad_norm": 1.0996010303497314, + "learning_rate": 9.42792581705147e-07, + "loss": 0.3183, + "step": 40326 + }, + { + "epoch": 0.8072867402347171, + "grad_norm": 1.0875743627548218, + "learning_rate": 9.426031267693087e-07, + "loss": 0.3313, + "step": 40327 + }, + { + "epoch": 0.8073067587518454, + "grad_norm": 1.1160873174667358, + "learning_rate": 9.424136888898938e-07, + "loss": 0.3059, + "step": 40328 + }, + { + "epoch": 0.8073267772689738, + "grad_norm": 1.172419548034668, + "learning_rate": 9.422242680676974e-07, + "loss": 0.3244, + "step": 40329 + }, + { + "epoch": 0.8073467957861021, + "grad_norm": 1.1215274333953857, + "learning_rate": 9.420348643035176e-07, + "loss": 0.3053, + "step": 40330 + }, + { + "epoch": 0.8073668143032305, + "grad_norm": 0.9982451796531677, + "learning_rate": 9.418454775981506e-07, + "loss": 0.2634, + "step": 40331 + }, + { + "epoch": 0.8073868328203588, + "grad_norm": 1.231902003288269, + "learning_rate": 9.416561079523917e-07, + "loss": 0.3173, + "step": 40332 + }, + { + "epoch": 0.8074068513374871, + "grad_norm": 1.89140784740448, + "learning_rate": 9.414667553670354e-07, + "loss": 0.7471, + "step": 40333 + }, + { + "epoch": 0.8074268698546155, + "grad_norm": 1.3003387451171875, + "learning_rate": 9.412774198428815e-07, + "loss": 0.334, + "step": 40334 + }, + { + "epoch": 0.8074468883717438, + "grad_norm": 1.1061878204345703, + "learning_rate": 9.410881013807232e-07, + "loss": 0.2785, + "step": 40335 + }, + { + "epoch": 0.8074669068888722, + "grad_norm": 1.1501784324645996, + "learning_rate": 9.408987999813562e-07, + "loss": 0.2912, + "step": 40336 + }, + { + "epoch": 0.8074869254060005, + "grad_norm": 1.0451457500457764, + "learning_rate": 9.40709515645577e-07, + "loss": 0.3315, + "step": 40337 + }, + { + "epoch": 0.8075069439231289, + "grad_norm": 1.7810755968093872, + "learning_rate": 9.405202483741826e-07, + "loss": 0.7251, + "step": 40338 + }, + { + "epoch": 0.8075269624402572, + "grad_norm": 1.0939066410064697, + "learning_rate": 9.403309981679676e-07, + "loss": 0.3457, + "step": 40339 + }, + { + "epoch": 0.8075469809573855, + "grad_norm": 1.2116305828094482, + "learning_rate": 9.401417650277272e-07, + "loss": 0.3123, + "step": 40340 + }, + { + "epoch": 0.8075669994745139, + "grad_norm": 1.246282935142517, + "learning_rate": 9.399525489542572e-07, + "loss": 0.3571, + "step": 40341 + }, + { + "epoch": 0.8075870179916422, + "grad_norm": 1.089550256729126, + "learning_rate": 9.397633499483522e-07, + "loss": 0.2863, + "step": 40342 + }, + { + "epoch": 0.8076070365087706, + "grad_norm": 1.1045962572097778, + "learning_rate": 9.39574168010809e-07, + "loss": 0.2947, + "step": 40343 + }, + { + "epoch": 0.8076270550258989, + "grad_norm": 0.9914789795875549, + "learning_rate": 9.393850031424223e-07, + "loss": 0.3075, + "step": 40344 + }, + { + "epoch": 0.8076470735430273, + "grad_norm": 1.1168348789215088, + "learning_rate": 9.391958553439873e-07, + "loss": 0.2872, + "step": 40345 + }, + { + "epoch": 0.8076670920601556, + "grad_norm": 1.054684042930603, + "learning_rate": 9.390067246162976e-07, + "loss": 0.2814, + "step": 40346 + }, + { + "epoch": 0.807687110577284, + "grad_norm": 1.084898829460144, + "learning_rate": 9.38817610960151e-07, + "loss": 0.257, + "step": 40347 + }, + { + "epoch": 0.8077071290944123, + "grad_norm": 1.124638557434082, + "learning_rate": 9.386285143763413e-07, + "loss": 0.2967, + "step": 40348 + }, + { + "epoch": 0.8077271476115406, + "grad_norm": 1.2425981760025024, + "learning_rate": 9.384394348656617e-07, + "loss": 0.3126, + "step": 40349 + }, + { + "epoch": 0.807747166128669, + "grad_norm": 1.1133489608764648, + "learning_rate": 9.382503724289104e-07, + "loss": 0.2779, + "step": 40350 + }, + { + "epoch": 0.8077671846457973, + "grad_norm": 1.2905831336975098, + "learning_rate": 9.380613270668787e-07, + "loss": 0.2959, + "step": 40351 + }, + { + "epoch": 0.8077872031629257, + "grad_norm": 1.2012652158737183, + "learning_rate": 9.378722987803651e-07, + "loss": 0.2803, + "step": 40352 + }, + { + "epoch": 0.807807221680054, + "grad_norm": 1.0969325304031372, + "learning_rate": 9.376832875701614e-07, + "loss": 0.2057, + "step": 40353 + }, + { + "epoch": 0.8078272401971824, + "grad_norm": 1.170589804649353, + "learning_rate": 9.374942934370635e-07, + "loss": 0.2524, + "step": 40354 + }, + { + "epoch": 0.8078472587143107, + "grad_norm": 1.1570937633514404, + "learning_rate": 9.373053163818635e-07, + "loss": 0.2967, + "step": 40355 + }, + { + "epoch": 0.807867277231439, + "grad_norm": 1.0970791578292847, + "learning_rate": 9.371163564053593e-07, + "loss": 0.2625, + "step": 40356 + }, + { + "epoch": 0.8078872957485674, + "grad_norm": 1.1091314554214478, + "learning_rate": 9.369274135083433e-07, + "loss": 0.2974, + "step": 40357 + }, + { + "epoch": 0.8079073142656957, + "grad_norm": 1.8188990354537964, + "learning_rate": 9.367384876916103e-07, + "loss": 0.7066, + "step": 40358 + }, + { + "epoch": 0.8079273327828241, + "grad_norm": 1.1142016649246216, + "learning_rate": 9.365495789559542e-07, + "loss": 0.2604, + "step": 40359 + }, + { + "epoch": 0.8079473512999524, + "grad_norm": 1.064183235168457, + "learning_rate": 9.36360687302168e-07, + "loss": 0.274, + "step": 40360 + }, + { + "epoch": 0.8079673698170808, + "grad_norm": 0.9960860013961792, + "learning_rate": 9.361718127310481e-07, + "loss": 0.2822, + "step": 40361 + }, + { + "epoch": 0.8079873883342091, + "grad_norm": 1.0287446975708008, + "learning_rate": 9.359829552433864e-07, + "loss": 0.2759, + "step": 40362 + }, + { + "epoch": 0.8080074068513375, + "grad_norm": 1.894336223602295, + "learning_rate": 9.357941148399785e-07, + "loss": 0.7159, + "step": 40363 + }, + { + "epoch": 0.8080274253684658, + "grad_norm": 1.0975501537322998, + "learning_rate": 9.356052915216169e-07, + "loss": 0.3253, + "step": 40364 + }, + { + "epoch": 0.8080474438855941, + "grad_norm": 1.0842745304107666, + "learning_rate": 9.354164852890967e-07, + "loss": 0.2949, + "step": 40365 + }, + { + "epoch": 0.8080674624027225, + "grad_norm": 1.1019009351730347, + "learning_rate": 9.352276961432111e-07, + "loss": 0.2864, + "step": 40366 + }, + { + "epoch": 0.8080874809198508, + "grad_norm": 1.9639605283737183, + "learning_rate": 9.350389240847535e-07, + "loss": 0.7702, + "step": 40367 + }, + { + "epoch": 0.8081074994369792, + "grad_norm": 1.0005125999450684, + "learning_rate": 9.348501691145162e-07, + "loss": 0.2751, + "step": 40368 + }, + { + "epoch": 0.8081275179541075, + "grad_norm": 0.9897258281707764, + "learning_rate": 9.346614312332952e-07, + "loss": 0.2888, + "step": 40369 + }, + { + "epoch": 0.8081475364712359, + "grad_norm": 1.9771658182144165, + "learning_rate": 9.344727104418827e-07, + "loss": 0.7525, + "step": 40370 + }, + { + "epoch": 0.8081675549883642, + "grad_norm": 1.1938385963439941, + "learning_rate": 9.342840067410719e-07, + "loss": 0.2843, + "step": 40371 + }, + { + "epoch": 0.8081875735054925, + "grad_norm": 1.024031162261963, + "learning_rate": 9.340953201316566e-07, + "loss": 0.3123, + "step": 40372 + }, + { + "epoch": 0.8082075920226209, + "grad_norm": 0.9832437634468079, + "learning_rate": 9.339066506144279e-07, + "loss": 0.2727, + "step": 40373 + }, + { + "epoch": 0.8082276105397492, + "grad_norm": 1.272033452987671, + "learning_rate": 9.337179981901823e-07, + "loss": 0.3176, + "step": 40374 + }, + { + "epoch": 0.8082476290568776, + "grad_norm": 1.10066556930542, + "learning_rate": 9.335293628597091e-07, + "loss": 0.2991, + "step": 40375 + }, + { + "epoch": 0.8082676475740059, + "grad_norm": 1.1747474670410156, + "learning_rate": 9.333407446238052e-07, + "loss": 0.3058, + "step": 40376 + }, + { + "epoch": 0.8082876660911343, + "grad_norm": 1.1774661540985107, + "learning_rate": 9.331521434832607e-07, + "loss": 0.3244, + "step": 40377 + }, + { + "epoch": 0.8083076846082626, + "grad_norm": 1.0519217252731323, + "learning_rate": 9.329635594388703e-07, + "loss": 0.2849, + "step": 40378 + }, + { + "epoch": 0.808327703125391, + "grad_norm": 1.0331281423568726, + "learning_rate": 9.327749924914259e-07, + "loss": 0.2456, + "step": 40379 + }, + { + "epoch": 0.8083477216425193, + "grad_norm": 1.107338786125183, + "learning_rate": 9.325864426417197e-07, + "loss": 0.3178, + "step": 40380 + }, + { + "epoch": 0.8083677401596476, + "grad_norm": 1.138976812362671, + "learning_rate": 9.32397909890545e-07, + "loss": 0.3006, + "step": 40381 + }, + { + "epoch": 0.808387758676776, + "grad_norm": 1.0592503547668457, + "learning_rate": 9.322093942386934e-07, + "loss": 0.271, + "step": 40382 + }, + { + "epoch": 0.8084077771939043, + "grad_norm": 1.1530145406723022, + "learning_rate": 9.320208956869587e-07, + "loss": 0.2641, + "step": 40383 + }, + { + "epoch": 0.8084277957110327, + "grad_norm": 1.1260454654693604, + "learning_rate": 9.318324142361335e-07, + "loss": 0.2694, + "step": 40384 + }, + { + "epoch": 0.808447814228161, + "grad_norm": 1.162395715713501, + "learning_rate": 9.316439498870083e-07, + "loss": 0.3142, + "step": 40385 + }, + { + "epoch": 0.8084678327452894, + "grad_norm": 1.1068612337112427, + "learning_rate": 9.31455502640376e-07, + "loss": 0.2896, + "step": 40386 + }, + { + "epoch": 0.8084878512624177, + "grad_norm": 1.1344853639602661, + "learning_rate": 9.312670724970302e-07, + "loss": 0.3028, + "step": 40387 + }, + { + "epoch": 0.808507869779546, + "grad_norm": 1.1046264171600342, + "learning_rate": 9.310786594577609e-07, + "loss": 0.321, + "step": 40388 + }, + { + "epoch": 0.8085278882966744, + "grad_norm": 1.1090890169143677, + "learning_rate": 9.308902635233624e-07, + "loss": 0.3082, + "step": 40389 + }, + { + "epoch": 0.8085479068138027, + "grad_norm": 1.2012485265731812, + "learning_rate": 9.307018846946254e-07, + "loss": 0.3014, + "step": 40390 + }, + { + "epoch": 0.8085679253309311, + "grad_norm": 1.0884120464324951, + "learning_rate": 9.305135229723405e-07, + "loss": 0.285, + "step": 40391 + }, + { + "epoch": 0.8085879438480594, + "grad_norm": 1.048480749130249, + "learning_rate": 9.30325178357303e-07, + "loss": 0.2892, + "step": 40392 + }, + { + "epoch": 0.8086079623651878, + "grad_norm": 1.0241402387619019, + "learning_rate": 9.301368508503022e-07, + "loss": 0.2689, + "step": 40393 + }, + { + "epoch": 0.8086279808823161, + "grad_norm": 1.238000750541687, + "learning_rate": 9.299485404521302e-07, + "loss": 0.2873, + "step": 40394 + }, + { + "epoch": 0.8086479993994445, + "grad_norm": 1.0393481254577637, + "learning_rate": 9.297602471635775e-07, + "loss": 0.2672, + "step": 40395 + }, + { + "epoch": 0.8086680179165728, + "grad_norm": 1.089640498161316, + "learning_rate": 9.295719709854378e-07, + "loss": 0.2586, + "step": 40396 + }, + { + "epoch": 0.8086880364337011, + "grad_norm": 1.0315964221954346, + "learning_rate": 9.293837119185017e-07, + "loss": 0.2838, + "step": 40397 + }, + { + "epoch": 0.8087080549508295, + "grad_norm": 1.1990846395492554, + "learning_rate": 9.291954699635608e-07, + "loss": 0.2841, + "step": 40398 + }, + { + "epoch": 0.8087280734679578, + "grad_norm": 2.013101816177368, + "learning_rate": 9.290072451214044e-07, + "loss": 0.8081, + "step": 40399 + }, + { + "epoch": 0.8087480919850862, + "grad_norm": 1.0978882312774658, + "learning_rate": 9.288190373928269e-07, + "loss": 0.3053, + "step": 40400 + }, + { + "epoch": 0.8087681105022145, + "grad_norm": 1.2024251222610474, + "learning_rate": 9.286308467786165e-07, + "loss": 0.3222, + "step": 40401 + }, + { + "epoch": 0.8087881290193429, + "grad_norm": 1.890162467956543, + "learning_rate": 9.284426732795676e-07, + "loss": 0.7266, + "step": 40402 + }, + { + "epoch": 0.8088081475364712, + "grad_norm": 1.144850730895996, + "learning_rate": 9.282545168964691e-07, + "loss": 0.3388, + "step": 40403 + }, + { + "epoch": 0.8088281660535995, + "grad_norm": 1.0986666679382324, + "learning_rate": 9.280663776301114e-07, + "loss": 0.2969, + "step": 40404 + }, + { + "epoch": 0.8088481845707279, + "grad_norm": 1.2174731492996216, + "learning_rate": 9.278782554812877e-07, + "loss": 0.3147, + "step": 40405 + }, + { + "epoch": 0.8088682030878562, + "grad_norm": 1.1376888751983643, + "learning_rate": 9.276901504507873e-07, + "loss": 0.3116, + "step": 40406 + }, + { + "epoch": 0.8088882216049846, + "grad_norm": 1.5399974584579468, + "learning_rate": 9.27502062539401e-07, + "loss": 0.2752, + "step": 40407 + }, + { + "epoch": 0.8089082401221129, + "grad_norm": 1.230574369430542, + "learning_rate": 9.273139917479185e-07, + "loss": 0.3028, + "step": 40408 + }, + { + "epoch": 0.8089282586392413, + "grad_norm": 1.1312611103057861, + "learning_rate": 9.271259380771325e-07, + "loss": 0.298, + "step": 40409 + }, + { + "epoch": 0.8089482771563696, + "grad_norm": 0.9883987307548523, + "learning_rate": 9.269379015278329e-07, + "loss": 0.2486, + "step": 40410 + }, + { + "epoch": 0.808968295673498, + "grad_norm": 0.991461992263794, + "learning_rate": 9.267498821008098e-07, + "loss": 0.2534, + "step": 40411 + }, + { + "epoch": 0.8089883141906263, + "grad_norm": 1.1220662593841553, + "learning_rate": 9.265618797968523e-07, + "loss": 0.2909, + "step": 40412 + }, + { + "epoch": 0.8090083327077546, + "grad_norm": 1.157981038093567, + "learning_rate": 9.263738946167533e-07, + "loss": 0.2606, + "step": 40413 + }, + { + "epoch": 0.809028351224883, + "grad_norm": 1.1560065746307373, + "learning_rate": 9.261859265613005e-07, + "loss": 0.2546, + "step": 40414 + }, + { + "epoch": 0.8090483697420113, + "grad_norm": 1.08950936794281, + "learning_rate": 9.259979756312871e-07, + "loss": 0.3102, + "step": 40415 + }, + { + "epoch": 0.8090683882591397, + "grad_norm": 1.1054974794387817, + "learning_rate": 9.258100418275007e-07, + "loss": 0.2947, + "step": 40416 + }, + { + "epoch": 0.809088406776268, + "grad_norm": 1.154402732849121, + "learning_rate": 9.256221251507314e-07, + "loss": 0.2905, + "step": 40417 + }, + { + "epoch": 0.8091084252933964, + "grad_norm": 1.091028094291687, + "learning_rate": 9.254342256017707e-07, + "loss": 0.2891, + "step": 40418 + }, + { + "epoch": 0.8091284438105247, + "grad_norm": 1.179933786392212, + "learning_rate": 9.252463431814079e-07, + "loss": 0.3009, + "step": 40419 + }, + { + "epoch": 0.809148462327653, + "grad_norm": 1.0326381921768188, + "learning_rate": 9.250584778904326e-07, + "loss": 0.2739, + "step": 40420 + }, + { + "epoch": 0.8091684808447814, + "grad_norm": 1.8127851486206055, + "learning_rate": 9.248706297296345e-07, + "loss": 0.8253, + "step": 40421 + }, + { + "epoch": 0.8091884993619097, + "grad_norm": 1.846030592918396, + "learning_rate": 9.24682798699802e-07, + "loss": 0.7205, + "step": 40422 + }, + { + "epoch": 0.8092085178790381, + "grad_norm": 1.0851160287857056, + "learning_rate": 9.244949848017276e-07, + "loss": 0.3122, + "step": 40423 + }, + { + "epoch": 0.8092285363961664, + "grad_norm": 1.07524836063385, + "learning_rate": 9.243071880361987e-07, + "loss": 0.3273, + "step": 40424 + }, + { + "epoch": 0.8092485549132948, + "grad_norm": 1.2543874979019165, + "learning_rate": 9.241194084040045e-07, + "loss": 0.2935, + "step": 40425 + }, + { + "epoch": 0.8092685734304231, + "grad_norm": 1.1518594026565552, + "learning_rate": 9.239316459059361e-07, + "loss": 0.2654, + "step": 40426 + }, + { + "epoch": 0.8092885919475515, + "grad_norm": 1.040626883506775, + "learning_rate": 9.237439005427807e-07, + "loss": 0.3208, + "step": 40427 + }, + { + "epoch": 0.8093086104646798, + "grad_norm": 1.0636955499649048, + "learning_rate": 9.235561723153297e-07, + "loss": 0.3055, + "step": 40428 + }, + { + "epoch": 0.8093286289818081, + "grad_norm": 1.097160816192627, + "learning_rate": 9.233684612243715e-07, + "loss": 0.3328, + "step": 40429 + }, + { + "epoch": 0.8093486474989365, + "grad_norm": 1.150168538093567, + "learning_rate": 9.231807672706939e-07, + "loss": 0.2379, + "step": 40430 + }, + { + "epoch": 0.8093686660160648, + "grad_norm": 1.0200797319412231, + "learning_rate": 9.229930904550883e-07, + "loss": 0.3166, + "step": 40431 + }, + { + "epoch": 0.8093886845331932, + "grad_norm": 1.1975630521774292, + "learning_rate": 9.228054307783418e-07, + "loss": 0.2897, + "step": 40432 + }, + { + "epoch": 0.8094087030503215, + "grad_norm": 1.1173484325408936, + "learning_rate": 9.226177882412446e-07, + "loss": 0.301, + "step": 40433 + }, + { + "epoch": 0.8094287215674499, + "grad_norm": 1.1022835969924927, + "learning_rate": 9.224301628445842e-07, + "loss": 0.2771, + "step": 40434 + }, + { + "epoch": 0.8094487400845782, + "grad_norm": 1.1118873357772827, + "learning_rate": 9.222425545891489e-07, + "loss": 0.3083, + "step": 40435 + }, + { + "epoch": 0.8094687586017065, + "grad_norm": 1.9146990776062012, + "learning_rate": 9.220549634757297e-07, + "loss": 0.7384, + "step": 40436 + }, + { + "epoch": 0.8094887771188349, + "grad_norm": 1.133787989616394, + "learning_rate": 9.218673895051139e-07, + "loss": 0.2359, + "step": 40437 + }, + { + "epoch": 0.8095087956359632, + "grad_norm": 1.1293424367904663, + "learning_rate": 9.216798326780884e-07, + "loss": 0.3132, + "step": 40438 + }, + { + "epoch": 0.8095288141530916, + "grad_norm": 1.0919841527938843, + "learning_rate": 9.214922929954445e-07, + "loss": 0.2845, + "step": 40439 + }, + { + "epoch": 0.8095488326702199, + "grad_norm": 1.1421608924865723, + "learning_rate": 9.213047704579681e-07, + "loss": 0.3018, + "step": 40440 + }, + { + "epoch": 0.8095688511873483, + "grad_norm": 1.2028086185455322, + "learning_rate": 9.211172650664502e-07, + "loss": 0.3089, + "step": 40441 + }, + { + "epoch": 0.8095888697044766, + "grad_norm": 1.1844000816345215, + "learning_rate": 9.209297768216774e-07, + "loss": 0.2898, + "step": 40442 + }, + { + "epoch": 0.809608888221605, + "grad_norm": 1.0040185451507568, + "learning_rate": 9.20742305724438e-07, + "loss": 0.2755, + "step": 40443 + }, + { + "epoch": 0.8096289067387333, + "grad_norm": 1.0450869798660278, + "learning_rate": 9.205548517755186e-07, + "loss": 0.2519, + "step": 40444 + }, + { + "epoch": 0.8096489252558616, + "grad_norm": 1.0622931718826294, + "learning_rate": 9.203674149757102e-07, + "loss": 0.2496, + "step": 40445 + }, + { + "epoch": 0.80966894377299, + "grad_norm": 1.0797420740127563, + "learning_rate": 9.201799953257989e-07, + "loss": 0.2488, + "step": 40446 + }, + { + "epoch": 0.8096889622901183, + "grad_norm": 1.231719970703125, + "learning_rate": 9.199925928265735e-07, + "loss": 0.371, + "step": 40447 + }, + { + "epoch": 0.8097089808072467, + "grad_norm": 1.1342846155166626, + "learning_rate": 9.198052074788194e-07, + "loss": 0.2794, + "step": 40448 + }, + { + "epoch": 0.809728999324375, + "grad_norm": 1.116929054260254, + "learning_rate": 9.196178392833277e-07, + "loss": 0.3, + "step": 40449 + }, + { + "epoch": 0.8097490178415034, + "grad_norm": 1.1106795072555542, + "learning_rate": 9.194304882408844e-07, + "loss": 0.277, + "step": 40450 + }, + { + "epoch": 0.8097690363586317, + "grad_norm": 1.0731496810913086, + "learning_rate": 9.192431543522756e-07, + "loss": 0.2782, + "step": 40451 + }, + { + "epoch": 0.80978905487576, + "grad_norm": 1.279032588005066, + "learning_rate": 9.190558376182923e-07, + "loss": 0.2918, + "step": 40452 + }, + { + "epoch": 0.8098090733928884, + "grad_norm": 1.1989493370056152, + "learning_rate": 9.188685380397183e-07, + "loss": 0.3079, + "step": 40453 + }, + { + "epoch": 0.8098290919100167, + "grad_norm": 1.11650550365448, + "learning_rate": 9.186812556173442e-07, + "loss": 0.2972, + "step": 40454 + }, + { + "epoch": 0.8098491104271451, + "grad_norm": 1.1169188022613525, + "learning_rate": 9.184939903519557e-07, + "loss": 0.2741, + "step": 40455 + }, + { + "epoch": 0.8098691289442734, + "grad_norm": 1.0965570211410522, + "learning_rate": 9.183067422443404e-07, + "loss": 0.3267, + "step": 40456 + }, + { + "epoch": 0.8098891474614018, + "grad_norm": 1.0986534357070923, + "learning_rate": 9.181195112952834e-07, + "loss": 0.2937, + "step": 40457 + }, + { + "epoch": 0.8099091659785301, + "grad_norm": 0.9890199303627014, + "learning_rate": 9.179322975055749e-07, + "loss": 0.2727, + "step": 40458 + }, + { + "epoch": 0.8099291844956585, + "grad_norm": 1.1116180419921875, + "learning_rate": 9.177451008760008e-07, + "loss": 0.3214, + "step": 40459 + }, + { + "epoch": 0.8099492030127868, + "grad_norm": 1.2700468301773071, + "learning_rate": 9.175579214073477e-07, + "loss": 0.2932, + "step": 40460 + }, + { + "epoch": 0.8099692215299151, + "grad_norm": 1.1935490369796753, + "learning_rate": 9.173707591004011e-07, + "loss": 0.2936, + "step": 40461 + }, + { + "epoch": 0.8099892400470435, + "grad_norm": 1.5822936296463013, + "learning_rate": 9.171836139559509e-07, + "loss": 0.2967, + "step": 40462 + }, + { + "epoch": 0.8100092585641718, + "grad_norm": 1.0398926734924316, + "learning_rate": 9.169964859747815e-07, + "loss": 0.262, + "step": 40463 + }, + { + "epoch": 0.8100292770813002, + "grad_norm": 1.8468425273895264, + "learning_rate": 9.168093751576795e-07, + "loss": 0.6955, + "step": 40464 + }, + { + "epoch": 0.8100492955984285, + "grad_norm": 1.293968677520752, + "learning_rate": 9.166222815054332e-07, + "loss": 0.2823, + "step": 40465 + }, + { + "epoch": 0.8100693141155569, + "grad_norm": 1.0325490236282349, + "learning_rate": 9.164352050188268e-07, + "loss": 0.2979, + "step": 40466 + }, + { + "epoch": 0.8100893326326852, + "grad_norm": 1.8990174531936646, + "learning_rate": 9.162481456986494e-07, + "loss": 0.7501, + "step": 40467 + }, + { + "epoch": 0.8101093511498135, + "grad_norm": 1.2104690074920654, + "learning_rate": 9.160611035456862e-07, + "loss": 0.2821, + "step": 40468 + }, + { + "epoch": 0.8101293696669419, + "grad_norm": 1.1533862352371216, + "learning_rate": 9.158740785607229e-07, + "loss": 0.3052, + "step": 40469 + }, + { + "epoch": 0.8101493881840702, + "grad_norm": 1.145795464515686, + "learning_rate": 9.156870707445448e-07, + "loss": 0.3464, + "step": 40470 + }, + { + "epoch": 0.8101694067011986, + "grad_norm": 1.005907416343689, + "learning_rate": 9.155000800979408e-07, + "loss": 0.2876, + "step": 40471 + }, + { + "epoch": 0.8101894252183269, + "grad_norm": 1.1380586624145508, + "learning_rate": 9.153131066216952e-07, + "loss": 0.2965, + "step": 40472 + }, + { + "epoch": 0.8102094437354553, + "grad_norm": 1.1510690450668335, + "learning_rate": 9.151261503165943e-07, + "loss": 0.2658, + "step": 40473 + }, + { + "epoch": 0.8102294622525836, + "grad_norm": 1.0852768421173096, + "learning_rate": 9.149392111834243e-07, + "loss": 0.2656, + "step": 40474 + }, + { + "epoch": 0.810249480769712, + "grad_norm": 1.083315134048462, + "learning_rate": 9.147522892229693e-07, + "loss": 0.322, + "step": 40475 + }, + { + "epoch": 0.8102694992868403, + "grad_norm": 1.1212066411972046, + "learning_rate": 9.145653844360175e-07, + "loss": 0.3198, + "step": 40476 + }, + { + "epoch": 0.8102895178039686, + "grad_norm": 1.8686301708221436, + "learning_rate": 9.143784968233527e-07, + "loss": 0.7468, + "step": 40477 + }, + { + "epoch": 0.810309536321097, + "grad_norm": 1.0421282052993774, + "learning_rate": 9.141916263857625e-07, + "loss": 0.3154, + "step": 40478 + }, + { + "epoch": 0.8103295548382253, + "grad_norm": 1.0451939105987549, + "learning_rate": 9.140047731240303e-07, + "loss": 0.2765, + "step": 40479 + }, + { + "epoch": 0.8103495733553537, + "grad_norm": 1.0695717334747314, + "learning_rate": 9.138179370389438e-07, + "loss": 0.2917, + "step": 40480 + }, + { + "epoch": 0.810369591872482, + "grad_norm": 1.142829418182373, + "learning_rate": 9.136311181312874e-07, + "loss": 0.3174, + "step": 40481 + }, + { + "epoch": 0.8103896103896104, + "grad_norm": 1.1469923257827759, + "learning_rate": 9.134443164018463e-07, + "loss": 0.3111, + "step": 40482 + }, + { + "epoch": 0.8104096289067387, + "grad_norm": 2.0311737060546875, + "learning_rate": 9.132575318514053e-07, + "loss": 0.6966, + "step": 40483 + }, + { + "epoch": 0.810429647423867, + "grad_norm": 1.1124926805496216, + "learning_rate": 9.130707644807507e-07, + "loss": 0.2724, + "step": 40484 + }, + { + "epoch": 0.8104496659409954, + "grad_norm": 1.3924875259399414, + "learning_rate": 9.128840142906675e-07, + "loss": 0.3024, + "step": 40485 + }, + { + "epoch": 0.8104696844581237, + "grad_norm": 1.1299339532852173, + "learning_rate": 9.126972812819402e-07, + "loss": 0.3064, + "step": 40486 + }, + { + "epoch": 0.8104897029752521, + "grad_norm": 1.1161909103393555, + "learning_rate": 9.125105654553534e-07, + "loss": 0.325, + "step": 40487 + }, + { + "epoch": 0.8105097214923804, + "grad_norm": 1.0052883625030518, + "learning_rate": 9.123238668116919e-07, + "loss": 0.2491, + "step": 40488 + }, + { + "epoch": 0.8105297400095088, + "grad_norm": 1.9245332479476929, + "learning_rate": 9.121371853517424e-07, + "loss": 0.7124, + "step": 40489 + }, + { + "epoch": 0.8105497585266371, + "grad_norm": 1.3016929626464844, + "learning_rate": 9.119505210762869e-07, + "loss": 0.3323, + "step": 40490 + }, + { + "epoch": 0.8105697770437655, + "grad_norm": 1.1677427291870117, + "learning_rate": 9.117638739861135e-07, + "loss": 0.3728, + "step": 40491 + }, + { + "epoch": 0.8105897955608938, + "grad_norm": 1.1435545682907104, + "learning_rate": 9.115772440820031e-07, + "loss": 0.282, + "step": 40492 + }, + { + "epoch": 0.8106098140780221, + "grad_norm": 1.0768647193908691, + "learning_rate": 9.113906313647436e-07, + "loss": 0.285, + "step": 40493 + }, + { + "epoch": 0.8106298325951505, + "grad_norm": 1.7879691123962402, + "learning_rate": 9.112040358351176e-07, + "loss": 0.7414, + "step": 40494 + }, + { + "epoch": 0.8106498511122788, + "grad_norm": 1.1498888731002808, + "learning_rate": 9.110174574939107e-07, + "loss": 0.334, + "step": 40495 + }, + { + "epoch": 0.8106698696294072, + "grad_norm": 1.0328718423843384, + "learning_rate": 9.10830896341906e-07, + "loss": 0.3094, + "step": 40496 + }, + { + "epoch": 0.8106898881465355, + "grad_norm": 1.0914660692214966, + "learning_rate": 9.106443523798869e-07, + "loss": 0.3101, + "step": 40497 + }, + { + "epoch": 0.8107099066636639, + "grad_norm": 1.119338035583496, + "learning_rate": 9.104578256086405e-07, + "loss": 0.3098, + "step": 40498 + }, + { + "epoch": 0.8107299251807922, + "grad_norm": 1.0269674062728882, + "learning_rate": 9.10271316028949e-07, + "loss": 0.2781, + "step": 40499 + }, + { + "epoch": 0.8107499436979205, + "grad_norm": 1.214431881904602, + "learning_rate": 9.100848236415972e-07, + "loss": 0.3165, + "step": 40500 + }, + { + "epoch": 0.8107699622150489, + "grad_norm": 1.1204851865768433, + "learning_rate": 9.098983484473667e-07, + "loss": 0.2718, + "step": 40501 + }, + { + "epoch": 0.8107899807321772, + "grad_norm": 1.0087810754776, + "learning_rate": 9.097118904470448e-07, + "loss": 0.2679, + "step": 40502 + }, + { + "epoch": 0.8108099992493056, + "grad_norm": 1.127780556678772, + "learning_rate": 9.09525449641413e-07, + "loss": 0.2861, + "step": 40503 + }, + { + "epoch": 0.8108300177664339, + "grad_norm": 1.19563627243042, + "learning_rate": 9.093390260312568e-07, + "loss": 0.2744, + "step": 40504 + }, + { + "epoch": 0.8108500362835623, + "grad_norm": 1.098750114440918, + "learning_rate": 9.091526196173594e-07, + "loss": 0.2773, + "step": 40505 + }, + { + "epoch": 0.8108700548006906, + "grad_norm": 1.931276559829712, + "learning_rate": 9.089662304005026e-07, + "loss": 0.7903, + "step": 40506 + }, + { + "epoch": 0.810890073317819, + "grad_norm": 1.110565423965454, + "learning_rate": 9.087798583814722e-07, + "loss": 0.321, + "step": 40507 + }, + { + "epoch": 0.8109100918349473, + "grad_norm": 1.1530977487564087, + "learning_rate": 9.085935035610516e-07, + "loss": 0.3064, + "step": 40508 + }, + { + "epoch": 0.8109301103520756, + "grad_norm": 1.1984593868255615, + "learning_rate": 9.084071659400229e-07, + "loss": 0.2581, + "step": 40509 + }, + { + "epoch": 0.810950128869204, + "grad_norm": 1.968505620956421, + "learning_rate": 9.082208455191688e-07, + "loss": 0.792, + "step": 40510 + }, + { + "epoch": 0.8109701473863323, + "grad_norm": 1.086380958557129, + "learning_rate": 9.080345422992748e-07, + "loss": 0.2763, + "step": 40511 + }, + { + "epoch": 0.8109901659034607, + "grad_norm": 1.088871955871582, + "learning_rate": 9.078482562811236e-07, + "loss": 0.3, + "step": 40512 + }, + { + "epoch": 0.811010184420589, + "grad_norm": 1.05498206615448, + "learning_rate": 9.076619874654969e-07, + "loss": 0.2599, + "step": 40513 + }, + { + "epoch": 0.8110302029377174, + "grad_norm": 1.0773528814315796, + "learning_rate": 9.074757358531777e-07, + "loss": 0.2714, + "step": 40514 + }, + { + "epoch": 0.8110502214548457, + "grad_norm": 1.0233756303787231, + "learning_rate": 9.072895014449512e-07, + "loss": 0.2842, + "step": 40515 + }, + { + "epoch": 0.811070239971974, + "grad_norm": 1.3240776062011719, + "learning_rate": 9.071032842415972e-07, + "loss": 0.2748, + "step": 40516 + }, + { + "epoch": 0.8110902584891024, + "grad_norm": 1.155094861984253, + "learning_rate": 9.069170842439017e-07, + "loss": 0.2712, + "step": 40517 + }, + { + "epoch": 0.8111102770062307, + "grad_norm": 1.12661874294281, + "learning_rate": 9.067309014526459e-07, + "loss": 0.2762, + "step": 40518 + }, + { + "epoch": 0.8111302955233591, + "grad_norm": 1.02927565574646, + "learning_rate": 9.065447358686113e-07, + "loss": 0.287, + "step": 40519 + }, + { + "epoch": 0.8111503140404874, + "grad_norm": 1.153002142906189, + "learning_rate": 9.063585874925828e-07, + "loss": 0.2879, + "step": 40520 + }, + { + "epoch": 0.8111703325576158, + "grad_norm": 1.1240639686584473, + "learning_rate": 9.061724563253421e-07, + "loss": 0.3439, + "step": 40521 + }, + { + "epoch": 0.8111903510747441, + "grad_norm": 1.0937145948410034, + "learning_rate": 9.059863423676713e-07, + "loss": 0.2511, + "step": 40522 + }, + { + "epoch": 0.8112103695918724, + "grad_norm": 1.131560206413269, + "learning_rate": 9.058002456203513e-07, + "loss": 0.2779, + "step": 40523 + }, + { + "epoch": 0.8112303881090008, + "grad_norm": 1.0569584369659424, + "learning_rate": 9.056141660841678e-07, + "loss": 0.2618, + "step": 40524 + }, + { + "epoch": 0.8112504066261291, + "grad_norm": 1.1514431238174438, + "learning_rate": 9.054281037599005e-07, + "loss": 0.3038, + "step": 40525 + }, + { + "epoch": 0.8112704251432575, + "grad_norm": 1.9050190448760986, + "learning_rate": 9.052420586483329e-07, + "loss": 0.7228, + "step": 40526 + }, + { + "epoch": 0.8112904436603858, + "grad_norm": 1.3164305686950684, + "learning_rate": 9.050560307502448e-07, + "loss": 0.3031, + "step": 40527 + }, + { + "epoch": 0.8113104621775142, + "grad_norm": 1.2334415912628174, + "learning_rate": 9.048700200664218e-07, + "loss": 0.2635, + "step": 40528 + }, + { + "epoch": 0.8113304806946425, + "grad_norm": 1.1260859966278076, + "learning_rate": 9.04684026597642e-07, + "loss": 0.294, + "step": 40529 + }, + { + "epoch": 0.8113504992117709, + "grad_norm": 1.2162386178970337, + "learning_rate": 9.04498050344691e-07, + "loss": 0.2904, + "step": 40530 + }, + { + "epoch": 0.8113705177288992, + "grad_norm": 1.11371648311615, + "learning_rate": 9.043120913083487e-07, + "loss": 0.2999, + "step": 40531 + }, + { + "epoch": 0.8113905362460275, + "grad_norm": 1.1839487552642822, + "learning_rate": 9.041261494893955e-07, + "loss": 0.2955, + "step": 40532 + }, + { + "epoch": 0.8114105547631559, + "grad_norm": 1.229502558708191, + "learning_rate": 9.039402248886159e-07, + "loss": 0.3348, + "step": 40533 + }, + { + "epoch": 0.8114305732802842, + "grad_norm": 1.311725378036499, + "learning_rate": 9.037543175067898e-07, + "loss": 0.3, + "step": 40534 + }, + { + "epoch": 0.8114505917974126, + "grad_norm": 1.2047206163406372, + "learning_rate": 9.035684273446993e-07, + "loss": 0.2843, + "step": 40535 + }, + { + "epoch": 0.8114706103145409, + "grad_norm": 1.1212323904037476, + "learning_rate": 9.033825544031261e-07, + "loss": 0.2615, + "step": 40536 + }, + { + "epoch": 0.8114906288316693, + "grad_norm": 1.1103358268737793, + "learning_rate": 9.031966986828488e-07, + "loss": 0.3201, + "step": 40537 + }, + { + "epoch": 0.8115106473487976, + "grad_norm": 1.1582878828048706, + "learning_rate": 9.030108601846533e-07, + "loss": 0.2847, + "step": 40538 + }, + { + "epoch": 0.8115306658659259, + "grad_norm": 1.117828130722046, + "learning_rate": 9.028250389093173e-07, + "loss": 0.305, + "step": 40539 + }, + { + "epoch": 0.8115506843830543, + "grad_norm": 1.1311975717544556, + "learning_rate": 9.026392348576224e-07, + "loss": 0.2867, + "step": 40540 + }, + { + "epoch": 0.8115707029001826, + "grad_norm": 1.1139124631881714, + "learning_rate": 9.024534480303521e-07, + "loss": 0.3097, + "step": 40541 + }, + { + "epoch": 0.811590721417311, + "grad_norm": 1.1912165880203247, + "learning_rate": 9.022676784282836e-07, + "loss": 0.3058, + "step": 40542 + }, + { + "epoch": 0.8116107399344393, + "grad_norm": 1.1699074506759644, + "learning_rate": 9.020819260522018e-07, + "loss": 0.3023, + "step": 40543 + }, + { + "epoch": 0.8116307584515677, + "grad_norm": 1.1517133712768555, + "learning_rate": 9.018961909028856e-07, + "loss": 0.2664, + "step": 40544 + }, + { + "epoch": 0.811650776968696, + "grad_norm": 1.209451675415039, + "learning_rate": 9.017104729811144e-07, + "loss": 0.2829, + "step": 40545 + }, + { + "epoch": 0.8116707954858244, + "grad_norm": 1.2953920364379883, + "learning_rate": 9.015247722876719e-07, + "loss": 0.3379, + "step": 40546 + }, + { + "epoch": 0.8116908140029527, + "grad_norm": 1.191440224647522, + "learning_rate": 9.01339088823337e-07, + "loss": 0.3398, + "step": 40547 + }, + { + "epoch": 0.811710832520081, + "grad_norm": 1.99807608127594, + "learning_rate": 9.011534225888907e-07, + "loss": 0.7909, + "step": 40548 + }, + { + "epoch": 0.8117308510372094, + "grad_norm": 1.1342639923095703, + "learning_rate": 9.009677735851135e-07, + "loss": 0.3152, + "step": 40549 + }, + { + "epoch": 0.8117508695543377, + "grad_norm": 2.048189163208008, + "learning_rate": 9.007821418127844e-07, + "loss": 0.781, + "step": 40550 + }, + { + "epoch": 0.8117708880714661, + "grad_norm": 1.3114495277404785, + "learning_rate": 9.005965272726857e-07, + "loss": 0.3397, + "step": 40551 + }, + { + "epoch": 0.8117909065885944, + "grad_norm": 1.7693233489990234, + "learning_rate": 9.004109299655978e-07, + "loss": 0.7518, + "step": 40552 + }, + { + "epoch": 0.8118109251057228, + "grad_norm": 1.9661219120025635, + "learning_rate": 9.00225349892298e-07, + "loss": 0.7271, + "step": 40553 + }, + { + "epoch": 0.8118309436228511, + "grad_norm": 1.0447814464569092, + "learning_rate": 9.000397870535705e-07, + "loss": 0.2911, + "step": 40554 + }, + { + "epoch": 0.8118509621399794, + "grad_norm": 1.3317677974700928, + "learning_rate": 8.998542414501921e-07, + "loss": 0.2979, + "step": 40555 + }, + { + "epoch": 0.8118709806571078, + "grad_norm": 1.0485869646072388, + "learning_rate": 8.996687130829451e-07, + "loss": 0.3052, + "step": 40556 + }, + { + "epoch": 0.8118909991742361, + "grad_norm": 1.046555995941162, + "learning_rate": 8.994832019526084e-07, + "loss": 0.302, + "step": 40557 + }, + { + "epoch": 0.8119110176913645, + "grad_norm": 1.1438343524932861, + "learning_rate": 8.992977080599618e-07, + "loss": 0.3159, + "step": 40558 + }, + { + "epoch": 0.8119310362084928, + "grad_norm": 1.241478443145752, + "learning_rate": 8.991122314057843e-07, + "loss": 0.285, + "step": 40559 + }, + { + "epoch": 0.8119510547256212, + "grad_norm": 1.0551872253417969, + "learning_rate": 8.989267719908573e-07, + "loss": 0.2788, + "step": 40560 + }, + { + "epoch": 0.8119710732427495, + "grad_norm": 1.3830350637435913, + "learning_rate": 8.987413298159597e-07, + "loss": 0.3253, + "step": 40561 + }, + { + "epoch": 0.8119910917598779, + "grad_norm": 1.0859110355377197, + "learning_rate": 8.98555904881871e-07, + "loss": 0.3243, + "step": 40562 + }, + { + "epoch": 0.8120111102770062, + "grad_norm": 1.7741751670837402, + "learning_rate": 8.983704971893697e-07, + "loss": 0.7696, + "step": 40563 + }, + { + "epoch": 0.8120311287941345, + "grad_norm": 1.1540257930755615, + "learning_rate": 8.981851067392372e-07, + "loss": 0.2926, + "step": 40564 + }, + { + "epoch": 0.8120511473112629, + "grad_norm": 0.9728089570999146, + "learning_rate": 8.979997335322516e-07, + "loss": 0.2783, + "step": 40565 + }, + { + "epoch": 0.8120711658283912, + "grad_norm": 1.032272219657898, + "learning_rate": 8.978143775691911e-07, + "loss": 0.3168, + "step": 40566 + }, + { + "epoch": 0.8120911843455196, + "grad_norm": 1.192868709564209, + "learning_rate": 8.976290388508374e-07, + "loss": 0.2911, + "step": 40567 + }, + { + "epoch": 0.8121112028626479, + "grad_norm": 1.1896295547485352, + "learning_rate": 8.974437173779671e-07, + "loss": 0.2615, + "step": 40568 + }, + { + "epoch": 0.8121312213797763, + "grad_norm": 2.0271756649017334, + "learning_rate": 8.972584131513623e-07, + "loss": 0.8308, + "step": 40569 + }, + { + "epoch": 0.8121512398969046, + "grad_norm": 1.210208535194397, + "learning_rate": 8.970731261717997e-07, + "loss": 0.3104, + "step": 40570 + }, + { + "epoch": 0.8121712584140329, + "grad_norm": 1.0722113847732544, + "learning_rate": 8.968878564400591e-07, + "loss": 0.2706, + "step": 40571 + }, + { + "epoch": 0.8121912769311613, + "grad_norm": 1.0637904405593872, + "learning_rate": 8.967026039569171e-07, + "loss": 0.2813, + "step": 40572 + }, + { + "epoch": 0.8122112954482896, + "grad_norm": 1.1421558856964111, + "learning_rate": 8.96517368723156e-07, + "loss": 0.314, + "step": 40573 + }, + { + "epoch": 0.812231313965418, + "grad_norm": 1.0817772150039673, + "learning_rate": 8.963321507395528e-07, + "loss": 0.27, + "step": 40574 + }, + { + "epoch": 0.8122513324825463, + "grad_norm": 1.0028693675994873, + "learning_rate": 8.961469500068859e-07, + "loss": 0.3131, + "step": 40575 + }, + { + "epoch": 0.8122713509996747, + "grad_norm": 1.0434155464172363, + "learning_rate": 8.959617665259329e-07, + "loss": 0.2806, + "step": 40576 + }, + { + "epoch": 0.812291369516803, + "grad_norm": 1.2488353252410889, + "learning_rate": 8.957766002974749e-07, + "loss": 0.3303, + "step": 40577 + }, + { + "epoch": 0.8123113880339314, + "grad_norm": 1.1957122087478638, + "learning_rate": 8.955914513222886e-07, + "loss": 0.3355, + "step": 40578 + }, + { + "epoch": 0.8123314065510597, + "grad_norm": 1.1969108581542969, + "learning_rate": 8.954063196011514e-07, + "loss": 0.2971, + "step": 40579 + }, + { + "epoch": 0.812351425068188, + "grad_norm": 1.0177037715911865, + "learning_rate": 8.952212051348441e-07, + "loss": 0.2773, + "step": 40580 + }, + { + "epoch": 0.8123714435853164, + "grad_norm": 1.1713981628417969, + "learning_rate": 8.950361079241421e-07, + "loss": 0.307, + "step": 40581 + }, + { + "epoch": 0.8123914621024447, + "grad_norm": 1.103831171989441, + "learning_rate": 8.948510279698264e-07, + "loss": 0.3114, + "step": 40582 + }, + { + "epoch": 0.8124114806195731, + "grad_norm": 0.9795851111412048, + "learning_rate": 8.94665965272673e-07, + "loss": 0.2832, + "step": 40583 + }, + { + "epoch": 0.8124314991367014, + "grad_norm": 1.110137939453125, + "learning_rate": 8.944809198334608e-07, + "loss": 0.2944, + "step": 40584 + }, + { + "epoch": 0.8124515176538298, + "grad_norm": 2.0274720191955566, + "learning_rate": 8.942958916529659e-07, + "loss": 0.7689, + "step": 40585 + }, + { + "epoch": 0.8124715361709581, + "grad_norm": 1.1109814643859863, + "learning_rate": 8.941108807319687e-07, + "loss": 0.319, + "step": 40586 + }, + { + "epoch": 0.8124915546880864, + "grad_norm": 1.0031803846359253, + "learning_rate": 8.939258870712459e-07, + "loss": 0.2865, + "step": 40587 + }, + { + "epoch": 0.8125115732052148, + "grad_norm": 1.111887812614441, + "learning_rate": 8.93740910671575e-07, + "loss": 0.3153, + "step": 40588 + }, + { + "epoch": 0.8125315917223431, + "grad_norm": 1.1867908239364624, + "learning_rate": 8.935559515337339e-07, + "loss": 0.2781, + "step": 40589 + }, + { + "epoch": 0.8125516102394715, + "grad_norm": 1.1128642559051514, + "learning_rate": 8.93371009658498e-07, + "loss": 0.3159, + "step": 40590 + }, + { + "epoch": 0.8125716287565998, + "grad_norm": 1.2241753339767456, + "learning_rate": 8.931860850466484e-07, + "loss": 0.3261, + "step": 40591 + }, + { + "epoch": 0.8125916472737282, + "grad_norm": 1.1913245916366577, + "learning_rate": 8.930011776989594e-07, + "loss": 0.2604, + "step": 40592 + }, + { + "epoch": 0.8126116657908565, + "grad_norm": 1.5077077150344849, + "learning_rate": 8.928162876162105e-07, + "loss": 0.2893, + "step": 40593 + }, + { + "epoch": 0.8126316843079849, + "grad_norm": 1.0664938688278198, + "learning_rate": 8.926314147991771e-07, + "loss": 0.2445, + "step": 40594 + }, + { + "epoch": 0.8126517028251132, + "grad_norm": 0.9421786069869995, + "learning_rate": 8.924465592486386e-07, + "loss": 0.2797, + "step": 40595 + }, + { + "epoch": 0.8126717213422415, + "grad_norm": 1.1153019666671753, + "learning_rate": 8.922617209653705e-07, + "loss": 0.3058, + "step": 40596 + }, + { + "epoch": 0.8126917398593699, + "grad_norm": 1.1683813333511353, + "learning_rate": 8.920768999501506e-07, + "loss": 0.3214, + "step": 40597 + }, + { + "epoch": 0.8127117583764982, + "grad_norm": 1.9216371774673462, + "learning_rate": 8.918920962037536e-07, + "loss": 0.7481, + "step": 40598 + }, + { + "epoch": 0.8127317768936266, + "grad_norm": 1.1736005544662476, + "learning_rate": 8.917073097269596e-07, + "loss": 0.2721, + "step": 40599 + }, + { + "epoch": 0.8127517954107549, + "grad_norm": 1.3118867874145508, + "learning_rate": 8.915225405205441e-07, + "loss": 0.2901, + "step": 40600 + }, + { + "epoch": 0.8127718139278833, + "grad_norm": 1.0420451164245605, + "learning_rate": 8.913377885852837e-07, + "loss": 0.2994, + "step": 40601 + }, + { + "epoch": 0.8127918324450116, + "grad_norm": 1.1105613708496094, + "learning_rate": 8.911530539219548e-07, + "loss": 0.2787, + "step": 40602 + }, + { + "epoch": 0.8128118509621399, + "grad_norm": 1.0749517679214478, + "learning_rate": 8.909683365313332e-07, + "loss": 0.2841, + "step": 40603 + }, + { + "epoch": 0.8128318694792683, + "grad_norm": 1.146234154701233, + "learning_rate": 8.907836364141975e-07, + "loss": 0.3336, + "step": 40604 + }, + { + "epoch": 0.8128518879963966, + "grad_norm": 1.8953502178192139, + "learning_rate": 8.905989535713222e-07, + "loss": 0.7265, + "step": 40605 + }, + { + "epoch": 0.812871906513525, + "grad_norm": 1.8263952732086182, + "learning_rate": 8.904142880034855e-07, + "loss": 0.7673, + "step": 40606 + }, + { + "epoch": 0.8128919250306533, + "grad_norm": 1.1526546478271484, + "learning_rate": 8.902296397114618e-07, + "loss": 0.2772, + "step": 40607 + }, + { + "epoch": 0.8129119435477817, + "grad_norm": 1.1844885349273682, + "learning_rate": 8.900450086960288e-07, + "loss": 0.3126, + "step": 40608 + }, + { + "epoch": 0.81293196206491, + "grad_norm": 0.9999364018440247, + "learning_rate": 8.898603949579627e-07, + "loss": 0.2503, + "step": 40609 + }, + { + "epoch": 0.8129519805820384, + "grad_norm": 1.185995101928711, + "learning_rate": 8.896757984980392e-07, + "loss": 0.3188, + "step": 40610 + }, + { + "epoch": 0.8129719990991667, + "grad_norm": 1.1195207834243774, + "learning_rate": 8.894912193170336e-07, + "loss": 0.2762, + "step": 40611 + }, + { + "epoch": 0.812992017616295, + "grad_norm": 1.1679637432098389, + "learning_rate": 8.893066574157211e-07, + "loss": 0.2968, + "step": 40612 + }, + { + "epoch": 0.8130120361334234, + "grad_norm": 1.023130178451538, + "learning_rate": 8.891221127948801e-07, + "loss": 0.2617, + "step": 40613 + }, + { + "epoch": 0.8130320546505517, + "grad_norm": 1.0784242153167725, + "learning_rate": 8.889375854552851e-07, + "loss": 0.291, + "step": 40614 + }, + { + "epoch": 0.8130520731676801, + "grad_norm": 1.155632734298706, + "learning_rate": 8.887530753977119e-07, + "loss": 0.3155, + "step": 40615 + }, + { + "epoch": 0.8130720916848084, + "grad_norm": 1.9843498468399048, + "learning_rate": 8.885685826229345e-07, + "loss": 0.7809, + "step": 40616 + }, + { + "epoch": 0.8130921102019368, + "grad_norm": 2.1103298664093018, + "learning_rate": 8.883841071317317e-07, + "loss": 0.7283, + "step": 40617 + }, + { + "epoch": 0.8131121287190651, + "grad_norm": 1.1406513452529907, + "learning_rate": 8.881996489248756e-07, + "loss": 0.307, + "step": 40618 + }, + { + "epoch": 0.8131321472361934, + "grad_norm": 1.1371502876281738, + "learning_rate": 8.880152080031445e-07, + "loss": 0.2663, + "step": 40619 + }, + { + "epoch": 0.8131521657533218, + "grad_norm": 1.1391178369522095, + "learning_rate": 8.878307843673128e-07, + "loss": 0.3352, + "step": 40620 + }, + { + "epoch": 0.8131721842704501, + "grad_norm": 1.0315579175949097, + "learning_rate": 8.876463780181543e-07, + "loss": 0.2685, + "step": 40621 + }, + { + "epoch": 0.8131922027875785, + "grad_norm": 2.0898823738098145, + "learning_rate": 8.874619889564468e-07, + "loss": 0.7177, + "step": 40622 + }, + { + "epoch": 0.8132122213047068, + "grad_norm": 1.0801702737808228, + "learning_rate": 8.872776171829639e-07, + "loss": 0.2949, + "step": 40623 + }, + { + "epoch": 0.8132322398218352, + "grad_norm": 1.0859549045562744, + "learning_rate": 8.870932626984807e-07, + "loss": 0.3219, + "step": 40624 + }, + { + "epoch": 0.8132522583389635, + "grad_norm": 1.1189593076705933, + "learning_rate": 8.869089255037711e-07, + "loss": 0.2985, + "step": 40625 + }, + { + "epoch": 0.8132722768560919, + "grad_norm": 1.1271294355392456, + "learning_rate": 8.867246055996126e-07, + "loss": 0.3031, + "step": 40626 + }, + { + "epoch": 0.8132922953732202, + "grad_norm": 1.136833906173706, + "learning_rate": 8.865403029867786e-07, + "loss": 0.3221, + "step": 40627 + }, + { + "epoch": 0.8133123138903485, + "grad_norm": 1.0875582695007324, + "learning_rate": 8.86356017666044e-07, + "loss": 0.2606, + "step": 40628 + }, + { + "epoch": 0.8133323324074769, + "grad_norm": 1.008980631828308, + "learning_rate": 8.861717496381822e-07, + "loss": 0.2658, + "step": 40629 + }, + { + "epoch": 0.8133523509246052, + "grad_norm": 0.9617239236831665, + "learning_rate": 8.859874989039702e-07, + "loss": 0.233, + "step": 40630 + }, + { + "epoch": 0.8133723694417336, + "grad_norm": 1.1085201501846313, + "learning_rate": 8.858032654641801e-07, + "loss": 0.3047, + "step": 40631 + }, + { + "epoch": 0.8133923879588619, + "grad_norm": 1.1594949960708618, + "learning_rate": 8.856190493195893e-07, + "loss": 0.2681, + "step": 40632 + }, + { + "epoch": 0.8134124064759903, + "grad_norm": 1.192635178565979, + "learning_rate": 8.854348504709704e-07, + "loss": 0.323, + "step": 40633 + }, + { + "epoch": 0.8134324249931186, + "grad_norm": 1.0599006414413452, + "learning_rate": 8.852506689190965e-07, + "loss": 0.2592, + "step": 40634 + }, + { + "epoch": 0.8134524435102469, + "grad_norm": 1.105549693107605, + "learning_rate": 8.850665046647449e-07, + "loss": 0.3309, + "step": 40635 + }, + { + "epoch": 0.8134724620273753, + "grad_norm": 1.0904597043991089, + "learning_rate": 8.84882357708688e-07, + "loss": 0.3018, + "step": 40636 + }, + { + "epoch": 0.8134924805445036, + "grad_norm": 1.9762126207351685, + "learning_rate": 8.846982280516997e-07, + "loss": 0.6823, + "step": 40637 + }, + { + "epoch": 0.813512499061632, + "grad_norm": 1.1977413892745972, + "learning_rate": 8.845141156945536e-07, + "loss": 0.2906, + "step": 40638 + }, + { + "epoch": 0.8135325175787603, + "grad_norm": 0.9409886002540588, + "learning_rate": 8.843300206380257e-07, + "loss": 0.2783, + "step": 40639 + }, + { + "epoch": 0.8135525360958887, + "grad_norm": 1.0323281288146973, + "learning_rate": 8.841459428828885e-07, + "loss": 0.2933, + "step": 40640 + }, + { + "epoch": 0.813572554613017, + "grad_norm": 1.2156798839569092, + "learning_rate": 8.839618824299162e-07, + "loss": 0.3653, + "step": 40641 + }, + { + "epoch": 0.8135925731301454, + "grad_norm": 1.210142970085144, + "learning_rate": 8.837778392798823e-07, + "loss": 0.3144, + "step": 40642 + }, + { + "epoch": 0.8136125916472737, + "grad_norm": 1.0837607383728027, + "learning_rate": 8.835938134335592e-07, + "loss": 0.2889, + "step": 40643 + }, + { + "epoch": 0.813632610164402, + "grad_norm": 1.2546030282974243, + "learning_rate": 8.834098048917223e-07, + "loss": 0.3227, + "step": 40644 + }, + { + "epoch": 0.8136526286815304, + "grad_norm": 1.0383787155151367, + "learning_rate": 8.832258136551453e-07, + "loss": 0.2801, + "step": 40645 + }, + { + "epoch": 0.8136726471986587, + "grad_norm": 1.9385008811950684, + "learning_rate": 8.830418397246015e-07, + "loss": 0.8167, + "step": 40646 + }, + { + "epoch": 0.8136926657157871, + "grad_norm": 1.0578705072402954, + "learning_rate": 8.828578831008622e-07, + "loss": 0.2784, + "step": 40647 + }, + { + "epoch": 0.8137126842329154, + "grad_norm": 1.0906437635421753, + "learning_rate": 8.826739437847043e-07, + "loss": 0.3113, + "step": 40648 + }, + { + "epoch": 0.8137327027500438, + "grad_norm": 1.172167181968689, + "learning_rate": 8.824900217768983e-07, + "loss": 0.2967, + "step": 40649 + }, + { + "epoch": 0.8137527212671721, + "grad_norm": 1.1320067644119263, + "learning_rate": 8.823061170782183e-07, + "loss": 0.2746, + "step": 40650 + }, + { + "epoch": 0.8137727397843004, + "grad_norm": 1.8806641101837158, + "learning_rate": 8.821222296894378e-07, + "loss": 0.7509, + "step": 40651 + }, + { + "epoch": 0.8137927583014288, + "grad_norm": 1.134406566619873, + "learning_rate": 8.819383596113274e-07, + "loss": 0.3113, + "step": 40652 + }, + { + "epoch": 0.8138127768185571, + "grad_norm": 1.1972564458847046, + "learning_rate": 8.817545068446632e-07, + "loss": 0.282, + "step": 40653 + }, + { + "epoch": 0.8138327953356855, + "grad_norm": 1.1345465183258057, + "learning_rate": 8.815706713902173e-07, + "loss": 0.2931, + "step": 40654 + }, + { + "epoch": 0.8138528138528138, + "grad_norm": 1.1923346519470215, + "learning_rate": 8.813868532487613e-07, + "loss": 0.3346, + "step": 40655 + }, + { + "epoch": 0.8138728323699422, + "grad_norm": 1.014728307723999, + "learning_rate": 8.81203052421068e-07, + "loss": 0.274, + "step": 40656 + }, + { + "epoch": 0.8138928508870705, + "grad_norm": 1.2198463678359985, + "learning_rate": 8.810192689079106e-07, + "loss": 0.3372, + "step": 40657 + }, + { + "epoch": 0.8139128694041989, + "grad_norm": 1.8080527782440186, + "learning_rate": 8.808355027100624e-07, + "loss": 0.7363, + "step": 40658 + }, + { + "epoch": 0.8139328879213272, + "grad_norm": 1.0718470811843872, + "learning_rate": 8.80651753828296e-07, + "loss": 0.3207, + "step": 40659 + }, + { + "epoch": 0.8139529064384555, + "grad_norm": 1.0521496534347534, + "learning_rate": 8.804680222633815e-07, + "loss": 0.2889, + "step": 40660 + }, + { + "epoch": 0.8139729249555839, + "grad_norm": 1.1036049127578735, + "learning_rate": 8.802843080160945e-07, + "loss": 0.2402, + "step": 40661 + }, + { + "epoch": 0.8139929434727122, + "grad_norm": 2.2055726051330566, + "learning_rate": 8.801006110872052e-07, + "loss": 0.7802, + "step": 40662 + }, + { + "epoch": 0.8140129619898406, + "grad_norm": 1.0736452341079712, + "learning_rate": 8.799169314774869e-07, + "loss": 0.3312, + "step": 40663 + }, + { + "epoch": 0.8140329805069689, + "grad_norm": 1.2015584707260132, + "learning_rate": 8.797332691877109e-07, + "loss": 0.2713, + "step": 40664 + }, + { + "epoch": 0.8140529990240973, + "grad_norm": 1.121912956237793, + "learning_rate": 8.79549624218648e-07, + "loss": 0.3471, + "step": 40665 + }, + { + "epoch": 0.8140730175412256, + "grad_norm": 1.1565502882003784, + "learning_rate": 8.793659965710732e-07, + "loss": 0.2887, + "step": 40666 + }, + { + "epoch": 0.8140930360583539, + "grad_norm": 1.110698938369751, + "learning_rate": 8.791823862457571e-07, + "loss": 0.2507, + "step": 40667 + }, + { + "epoch": 0.8141130545754823, + "grad_norm": 1.1488850116729736, + "learning_rate": 8.789987932434718e-07, + "loss": 0.2747, + "step": 40668 + }, + { + "epoch": 0.8141330730926106, + "grad_norm": 1.1218820810317993, + "learning_rate": 8.788152175649866e-07, + "loss": 0.293, + "step": 40669 + }, + { + "epoch": 0.814153091609739, + "grad_norm": 1.2728753089904785, + "learning_rate": 8.786316592110761e-07, + "loss": 0.3253, + "step": 40670 + }, + { + "epoch": 0.8141731101268673, + "grad_norm": 1.0815637111663818, + "learning_rate": 8.784481181825122e-07, + "loss": 0.2913, + "step": 40671 + }, + { + "epoch": 0.8141931286439957, + "grad_norm": 1.1572608947753906, + "learning_rate": 8.782645944800655e-07, + "loss": 0.2767, + "step": 40672 + }, + { + "epoch": 0.814213147161124, + "grad_norm": 1.260055661201477, + "learning_rate": 8.78081088104507e-07, + "loss": 0.3117, + "step": 40673 + }, + { + "epoch": 0.8142331656782524, + "grad_norm": 1.1503227949142456, + "learning_rate": 8.778975990566074e-07, + "loss": 0.3448, + "step": 40674 + }, + { + "epoch": 0.8142531841953807, + "grad_norm": 1.176698088645935, + "learning_rate": 8.777141273371409e-07, + "loss": 0.322, + "step": 40675 + }, + { + "epoch": 0.814273202712509, + "grad_norm": 1.0973107814788818, + "learning_rate": 8.775306729468769e-07, + "loss": 0.2992, + "step": 40676 + }, + { + "epoch": 0.8142932212296374, + "grad_norm": 1.0683480501174927, + "learning_rate": 8.77347235886587e-07, + "loss": 0.3022, + "step": 40677 + }, + { + "epoch": 0.8143132397467657, + "grad_norm": 1.154708743095398, + "learning_rate": 8.771638161570406e-07, + "loss": 0.3146, + "step": 40678 + }, + { + "epoch": 0.8143332582638941, + "grad_norm": 1.1700197458267212, + "learning_rate": 8.769804137590115e-07, + "loss": 0.2803, + "step": 40679 + }, + { + "epoch": 0.8143532767810224, + "grad_norm": 1.9406665563583374, + "learning_rate": 8.767970286932692e-07, + "loss": 0.7339, + "step": 40680 + }, + { + "epoch": 0.8143732952981508, + "grad_norm": 1.132599115371704, + "learning_rate": 8.766136609605852e-07, + "loss": 0.2579, + "step": 40681 + }, + { + "epoch": 0.8143933138152791, + "grad_norm": 1.9509007930755615, + "learning_rate": 8.764303105617289e-07, + "loss": 0.7414, + "step": 40682 + }, + { + "epoch": 0.8144133323324074, + "grad_norm": 1.1069815158843994, + "learning_rate": 8.762469774974719e-07, + "loss": 0.2815, + "step": 40683 + }, + { + "epoch": 0.8144333508495358, + "grad_norm": 1.3040766716003418, + "learning_rate": 8.760636617685863e-07, + "loss": 0.312, + "step": 40684 + }, + { + "epoch": 0.8144533693666641, + "grad_norm": 1.1545681953430176, + "learning_rate": 8.758803633758423e-07, + "loss": 0.2389, + "step": 40685 + }, + { + "epoch": 0.8144733878837925, + "grad_norm": 1.18994140625, + "learning_rate": 8.75697082320009e-07, + "loss": 0.2952, + "step": 40686 + }, + { + "epoch": 0.8144934064009208, + "grad_norm": 1.0430470705032349, + "learning_rate": 8.755138186018564e-07, + "loss": 0.2977, + "step": 40687 + }, + { + "epoch": 0.8145134249180492, + "grad_norm": 1.1353273391723633, + "learning_rate": 8.753305722221577e-07, + "loss": 0.3327, + "step": 40688 + }, + { + "epoch": 0.8145334434351775, + "grad_norm": 1.074575424194336, + "learning_rate": 8.751473431816815e-07, + "loss": 0.2884, + "step": 40689 + }, + { + "epoch": 0.8145534619523059, + "grad_norm": 1.2512232065200806, + "learning_rate": 8.749641314811979e-07, + "loss": 0.3258, + "step": 40690 + }, + { + "epoch": 0.8145734804694342, + "grad_norm": 1.2316831350326538, + "learning_rate": 8.747809371214766e-07, + "loss": 0.3144, + "step": 40691 + }, + { + "epoch": 0.8145934989865625, + "grad_norm": 1.298284888267517, + "learning_rate": 8.745977601032895e-07, + "loss": 0.2922, + "step": 40692 + }, + { + "epoch": 0.8146135175036909, + "grad_norm": 1.2123371362686157, + "learning_rate": 8.744146004274057e-07, + "loss": 0.3014, + "step": 40693 + }, + { + "epoch": 0.8146335360208192, + "grad_norm": 2.016965866088867, + "learning_rate": 8.742314580945948e-07, + "loss": 0.7858, + "step": 40694 + }, + { + "epoch": 0.8146535545379476, + "grad_norm": 1.0662916898727417, + "learning_rate": 8.74048333105626e-07, + "loss": 0.2865, + "step": 40695 + }, + { + "epoch": 0.8146735730550759, + "grad_norm": 1.2290043830871582, + "learning_rate": 8.738652254612701e-07, + "loss": 0.3446, + "step": 40696 + }, + { + "epoch": 0.8146935915722043, + "grad_norm": 1.1237820386886597, + "learning_rate": 8.736821351622982e-07, + "loss": 0.2813, + "step": 40697 + }, + { + "epoch": 0.8147136100893326, + "grad_norm": 1.9674670696258545, + "learning_rate": 8.734990622094785e-07, + "loss": 0.7204, + "step": 40698 + }, + { + "epoch": 0.8147336286064609, + "grad_norm": 1.0906990766525269, + "learning_rate": 8.733160066035806e-07, + "loss": 0.3243, + "step": 40699 + }, + { + "epoch": 0.8147536471235893, + "grad_norm": 1.7764928340911865, + "learning_rate": 8.731329683453726e-07, + "loss": 0.7998, + "step": 40700 + }, + { + "epoch": 0.8147736656407176, + "grad_norm": 1.086042881011963, + "learning_rate": 8.729499474356273e-07, + "loss": 0.2932, + "step": 40701 + }, + { + "epoch": 0.814793684157846, + "grad_norm": 1.193028450012207, + "learning_rate": 8.727669438751119e-07, + "loss": 0.3068, + "step": 40702 + }, + { + "epoch": 0.8148137026749743, + "grad_norm": 2.202861785888672, + "learning_rate": 8.725839576645962e-07, + "loss": 0.7588, + "step": 40703 + }, + { + "epoch": 0.8148337211921027, + "grad_norm": 1.1099326610565186, + "learning_rate": 8.724009888048495e-07, + "loss": 0.2914, + "step": 40704 + }, + { + "epoch": 0.814853739709231, + "grad_norm": 1.782637119293213, + "learning_rate": 8.722180372966394e-07, + "loss": 0.7441, + "step": 40705 + }, + { + "epoch": 0.8148737582263594, + "grad_norm": 1.0503937005996704, + "learning_rate": 8.720351031407375e-07, + "loss": 0.2803, + "step": 40706 + }, + { + "epoch": 0.8148937767434877, + "grad_norm": 1.225469946861267, + "learning_rate": 8.718521863379115e-07, + "loss": 0.333, + "step": 40707 + }, + { + "epoch": 0.814913795260616, + "grad_norm": 0.9499435424804688, + "learning_rate": 8.716692868889299e-07, + "loss": 0.2631, + "step": 40708 + }, + { + "epoch": 0.8149338137777444, + "grad_norm": 1.140644907951355, + "learning_rate": 8.714864047945614e-07, + "loss": 0.3091, + "step": 40709 + }, + { + "epoch": 0.8149538322948727, + "grad_norm": 2.1369171142578125, + "learning_rate": 8.713035400555775e-07, + "loss": 0.7943, + "step": 40710 + }, + { + "epoch": 0.8149738508120011, + "grad_norm": 1.2751909494400024, + "learning_rate": 8.71120692672745e-07, + "loss": 0.3215, + "step": 40711 + }, + { + "epoch": 0.8149938693291294, + "grad_norm": 1.0434118509292603, + "learning_rate": 8.709378626468324e-07, + "loss": 0.2851, + "step": 40712 + }, + { + "epoch": 0.8150138878462578, + "grad_norm": 1.0756261348724365, + "learning_rate": 8.707550499786083e-07, + "loss": 0.3057, + "step": 40713 + }, + { + "epoch": 0.8150339063633861, + "grad_norm": 1.3372400999069214, + "learning_rate": 8.7057225466884e-07, + "loss": 0.3116, + "step": 40714 + }, + { + "epoch": 0.8150539248805144, + "grad_norm": 1.2597808837890625, + "learning_rate": 8.703894767182986e-07, + "loss": 0.2838, + "step": 40715 + }, + { + "epoch": 0.8150739433976428, + "grad_norm": 1.039674162864685, + "learning_rate": 8.70206716127751e-07, + "loss": 0.282, + "step": 40716 + }, + { + "epoch": 0.8150939619147711, + "grad_norm": 1.8365321159362793, + "learning_rate": 8.70023972897966e-07, + "loss": 0.7347, + "step": 40717 + }, + { + "epoch": 0.8151139804318995, + "grad_norm": 1.3454028367996216, + "learning_rate": 8.698412470297101e-07, + "loss": 0.2868, + "step": 40718 + }, + { + "epoch": 0.8151339989490278, + "grad_norm": 1.087822437286377, + "learning_rate": 8.696585385237538e-07, + "loss": 0.279, + "step": 40719 + }, + { + "epoch": 0.8151540174661562, + "grad_norm": 2.0015501976013184, + "learning_rate": 8.694758473808645e-07, + "loss": 0.7798, + "step": 40720 + }, + { + "epoch": 0.8151740359832845, + "grad_norm": 1.1029069423675537, + "learning_rate": 8.692931736018084e-07, + "loss": 0.2518, + "step": 40721 + }, + { + "epoch": 0.8151940545004129, + "grad_norm": 0.9953269362449646, + "learning_rate": 8.691105171873548e-07, + "loss": 0.2574, + "step": 40722 + }, + { + "epoch": 0.8152140730175412, + "grad_norm": 1.2375930547714233, + "learning_rate": 8.689278781382732e-07, + "loss": 0.2215, + "step": 40723 + }, + { + "epoch": 0.8152340915346695, + "grad_norm": 1.100856065750122, + "learning_rate": 8.687452564553294e-07, + "loss": 0.2881, + "step": 40724 + }, + { + "epoch": 0.8152541100517979, + "grad_norm": 1.0693501234054565, + "learning_rate": 8.685626521392915e-07, + "loss": 0.2708, + "step": 40725 + }, + { + "epoch": 0.8152741285689262, + "grad_norm": 1.158219337463379, + "learning_rate": 8.683800651909274e-07, + "loss": 0.2646, + "step": 40726 + }, + { + "epoch": 0.8152941470860546, + "grad_norm": 1.0744695663452148, + "learning_rate": 8.681974956110029e-07, + "loss": 0.2927, + "step": 40727 + }, + { + "epoch": 0.8153141656031829, + "grad_norm": 1.2198837995529175, + "learning_rate": 8.680149434002883e-07, + "loss": 0.2952, + "step": 40728 + }, + { + "epoch": 0.8153341841203113, + "grad_norm": 1.2274829149246216, + "learning_rate": 8.678324085595491e-07, + "loss": 0.2803, + "step": 40729 + }, + { + "epoch": 0.8153542026374396, + "grad_norm": 1.8837350606918335, + "learning_rate": 8.676498910895542e-07, + "loss": 0.8144, + "step": 40730 + }, + { + "epoch": 0.8153742211545679, + "grad_norm": 1.1002001762390137, + "learning_rate": 8.674673909910681e-07, + "loss": 0.2702, + "step": 40731 + }, + { + "epoch": 0.8153942396716963, + "grad_norm": 1.2508320808410645, + "learning_rate": 8.67284908264861e-07, + "loss": 0.3258, + "step": 40732 + }, + { + "epoch": 0.8154142581888246, + "grad_norm": 1.3505237102508545, + "learning_rate": 8.671024429116986e-07, + "loss": 0.295, + "step": 40733 + }, + { + "epoch": 0.815434276705953, + "grad_norm": 1.8044203519821167, + "learning_rate": 8.66919994932347e-07, + "loss": 0.6964, + "step": 40734 + }, + { + "epoch": 0.8154542952230813, + "grad_norm": 1.9288336038589478, + "learning_rate": 8.667375643275755e-07, + "loss": 0.7272, + "step": 40735 + }, + { + "epoch": 0.8154743137402097, + "grad_norm": 1.0418881177902222, + "learning_rate": 8.66555151098149e-07, + "loss": 0.309, + "step": 40736 + }, + { + "epoch": 0.815494332257338, + "grad_norm": 1.2260974645614624, + "learning_rate": 8.663727552448359e-07, + "loss": 0.2912, + "step": 40737 + }, + { + "epoch": 0.8155143507744664, + "grad_norm": 1.2531825304031372, + "learning_rate": 8.661903767684021e-07, + "loss": 0.33, + "step": 40738 + }, + { + "epoch": 0.8155343692915947, + "grad_norm": 1.255911111831665, + "learning_rate": 8.660080156696149e-07, + "loss": 0.2901, + "step": 40739 + }, + { + "epoch": 0.815554387808723, + "grad_norm": 1.162459135055542, + "learning_rate": 8.658256719492386e-07, + "loss": 0.3234, + "step": 40740 + }, + { + "epoch": 0.8155744063258514, + "grad_norm": 1.0227774381637573, + "learning_rate": 8.656433456080426e-07, + "loss": 0.2463, + "step": 40741 + }, + { + "epoch": 0.8155944248429797, + "grad_norm": 1.0878461599349976, + "learning_rate": 8.65461036646793e-07, + "loss": 0.2746, + "step": 40742 + }, + { + "epoch": 0.8156144433601081, + "grad_norm": 1.07719087600708, + "learning_rate": 8.652787450662548e-07, + "loss": 0.2998, + "step": 40743 + }, + { + "epoch": 0.8156344618772364, + "grad_norm": 1.0667163133621216, + "learning_rate": 8.650964708671938e-07, + "loss": 0.2659, + "step": 40744 + }, + { + "epoch": 0.8156544803943648, + "grad_norm": 1.1475106477737427, + "learning_rate": 8.649142140503785e-07, + "loss": 0.3264, + "step": 40745 + }, + { + "epoch": 0.8156744989114931, + "grad_norm": 1.0578638315200806, + "learning_rate": 8.647319746165744e-07, + "loss": 0.2048, + "step": 40746 + }, + { + "epoch": 0.8156945174286214, + "grad_norm": 1.136597990989685, + "learning_rate": 8.645497525665458e-07, + "loss": 0.3427, + "step": 40747 + }, + { + "epoch": 0.8157145359457498, + "grad_norm": 1.0054683685302734, + "learning_rate": 8.64367547901061e-07, + "loss": 0.313, + "step": 40748 + }, + { + "epoch": 0.8157345544628781, + "grad_norm": 1.17020845413208, + "learning_rate": 8.641853606208844e-07, + "loss": 0.3139, + "step": 40749 + }, + { + "epoch": 0.8157545729800065, + "grad_norm": 1.1515939235687256, + "learning_rate": 8.640031907267832e-07, + "loss": 0.2809, + "step": 40750 + }, + { + "epoch": 0.8157745914971348, + "grad_norm": 1.1575833559036255, + "learning_rate": 8.638210382195227e-07, + "loss": 0.3004, + "step": 40751 + }, + { + "epoch": 0.8157946100142632, + "grad_norm": 1.0927194356918335, + "learning_rate": 8.636389030998682e-07, + "loss": 0.3203, + "step": 40752 + }, + { + "epoch": 0.8158146285313915, + "grad_norm": 1.1207201480865479, + "learning_rate": 8.634567853685849e-07, + "loss": 0.2626, + "step": 40753 + }, + { + "epoch": 0.8158346470485199, + "grad_norm": 1.3413907289505005, + "learning_rate": 8.632746850264401e-07, + "loss": 0.2974, + "step": 40754 + }, + { + "epoch": 0.8158546655656482, + "grad_norm": 1.1561205387115479, + "learning_rate": 8.630926020741981e-07, + "loss": 0.3444, + "step": 40755 + }, + { + "epoch": 0.8158746840827765, + "grad_norm": 1.219788670539856, + "learning_rate": 8.629105365126245e-07, + "loss": 0.3206, + "step": 40756 + }, + { + "epoch": 0.8158947025999049, + "grad_norm": 1.300050139427185, + "learning_rate": 8.627284883424847e-07, + "loss": 0.2966, + "step": 40757 + }, + { + "epoch": 0.8159147211170332, + "grad_norm": 1.1158193349838257, + "learning_rate": 8.625464575645432e-07, + "loss": 0.299, + "step": 40758 + }, + { + "epoch": 0.8159347396341616, + "grad_norm": 1.4301426410675049, + "learning_rate": 8.623644441795664e-07, + "loss": 0.3369, + "step": 40759 + }, + { + "epoch": 0.8159547581512899, + "grad_norm": 1.1040101051330566, + "learning_rate": 8.621824481883184e-07, + "loss": 0.3052, + "step": 40760 + }, + { + "epoch": 0.8159747766684183, + "grad_norm": 1.3049708604812622, + "learning_rate": 8.620004695915663e-07, + "loss": 0.3082, + "step": 40761 + }, + { + "epoch": 0.8159947951855466, + "grad_norm": 1.1083884239196777, + "learning_rate": 8.618185083900721e-07, + "loss": 0.2791, + "step": 40762 + }, + { + "epoch": 0.8160148137026749, + "grad_norm": 1.123063087463379, + "learning_rate": 8.616365645846036e-07, + "loss": 0.3096, + "step": 40763 + }, + { + "epoch": 0.8160348322198033, + "grad_norm": 1.7060198783874512, + "learning_rate": 8.614546381759247e-07, + "loss": 0.3299, + "step": 40764 + }, + { + "epoch": 0.8160548507369316, + "grad_norm": 1.224179744720459, + "learning_rate": 8.612727291647999e-07, + "loss": 0.3124, + "step": 40765 + }, + { + "epoch": 0.81607486925406, + "grad_norm": 1.2502690553665161, + "learning_rate": 8.610908375519933e-07, + "loss": 0.3037, + "step": 40766 + }, + { + "epoch": 0.8160948877711883, + "grad_norm": 1.2315009832382202, + "learning_rate": 8.609089633382695e-07, + "loss": 0.3192, + "step": 40767 + }, + { + "epoch": 0.8161149062883167, + "grad_norm": 1.161002516746521, + "learning_rate": 8.607271065243944e-07, + "loss": 0.3154, + "step": 40768 + }, + { + "epoch": 0.816134924805445, + "grad_norm": 1.029975414276123, + "learning_rate": 8.605452671111319e-07, + "loss": 0.2975, + "step": 40769 + }, + { + "epoch": 0.8161549433225734, + "grad_norm": 1.221118688583374, + "learning_rate": 8.603634450992465e-07, + "loss": 0.2936, + "step": 40770 + }, + { + "epoch": 0.8161749618397017, + "grad_norm": 1.1851060390472412, + "learning_rate": 8.601816404895013e-07, + "loss": 0.2545, + "step": 40771 + }, + { + "epoch": 0.81619498035683, + "grad_norm": 1.2020361423492432, + "learning_rate": 8.599998532826625e-07, + "loss": 0.277, + "step": 40772 + }, + { + "epoch": 0.8162149988739584, + "grad_norm": 1.872132420539856, + "learning_rate": 8.598180834794923e-07, + "loss": 0.7562, + "step": 40773 + }, + { + "epoch": 0.8162350173910867, + "grad_norm": 1.0492489337921143, + "learning_rate": 8.596363310807565e-07, + "loss": 0.287, + "step": 40774 + }, + { + "epoch": 0.8162550359082151, + "grad_norm": 1.3517444133758545, + "learning_rate": 8.594545960872181e-07, + "loss": 0.2931, + "step": 40775 + }, + { + "epoch": 0.8162750544253434, + "grad_norm": 1.1132919788360596, + "learning_rate": 8.592728784996423e-07, + "loss": 0.3161, + "step": 40776 + }, + { + "epoch": 0.8162950729424718, + "grad_norm": 1.0629761219024658, + "learning_rate": 8.590911783187927e-07, + "loss": 0.2304, + "step": 40777 + }, + { + "epoch": 0.8163150914596001, + "grad_norm": 1.8760403394699097, + "learning_rate": 8.589094955454319e-07, + "loss": 0.7628, + "step": 40778 + }, + { + "epoch": 0.8163351099767284, + "grad_norm": 1.1572129726409912, + "learning_rate": 8.587278301803254e-07, + "loss": 0.2699, + "step": 40779 + }, + { + "epoch": 0.8163551284938568, + "grad_norm": 1.3333626985549927, + "learning_rate": 8.585461822242336e-07, + "loss": 0.3309, + "step": 40780 + }, + { + "epoch": 0.8163751470109851, + "grad_norm": 1.212814450263977, + "learning_rate": 8.583645516779244e-07, + "loss": 0.3172, + "step": 40781 + }, + { + "epoch": 0.8163951655281135, + "grad_norm": 1.1410800218582153, + "learning_rate": 8.581829385421592e-07, + "loss": 0.3024, + "step": 40782 + }, + { + "epoch": 0.8164151840452418, + "grad_norm": 1.2676796913146973, + "learning_rate": 8.580013428177014e-07, + "loss": 0.3293, + "step": 40783 + }, + { + "epoch": 0.8164352025623702, + "grad_norm": 1.053560495376587, + "learning_rate": 8.578197645053133e-07, + "loss": 0.2513, + "step": 40784 + }, + { + "epoch": 0.8164552210794985, + "grad_norm": 1.0864169597625732, + "learning_rate": 8.576382036057606e-07, + "loss": 0.2727, + "step": 40785 + }, + { + "epoch": 0.8164752395966269, + "grad_norm": 1.1224056482315063, + "learning_rate": 8.574566601198047e-07, + "loss": 0.2904, + "step": 40786 + }, + { + "epoch": 0.8164952581137552, + "grad_norm": 1.0677778720855713, + "learning_rate": 8.572751340482105e-07, + "loss": 0.2776, + "step": 40787 + }, + { + "epoch": 0.8165152766308835, + "grad_norm": 1.0318880081176758, + "learning_rate": 8.5709362539174e-07, + "loss": 0.2685, + "step": 40788 + }, + { + "epoch": 0.8165352951480119, + "grad_norm": 1.0399688482284546, + "learning_rate": 8.569121341511554e-07, + "loss": 0.3084, + "step": 40789 + }, + { + "epoch": 0.8165553136651402, + "grad_norm": 1.0843526124954224, + "learning_rate": 8.567306603272219e-07, + "loss": 0.3005, + "step": 40790 + }, + { + "epoch": 0.8165753321822686, + "grad_norm": 1.106688380241394, + "learning_rate": 8.565492039207007e-07, + "loss": 0.3036, + "step": 40791 + }, + { + "epoch": 0.8165953506993969, + "grad_norm": 1.032181978225708, + "learning_rate": 8.563677649323548e-07, + "loss": 0.2917, + "step": 40792 + }, + { + "epoch": 0.8166153692165253, + "grad_norm": 1.058010220527649, + "learning_rate": 8.561863433629464e-07, + "loss": 0.2686, + "step": 40793 + }, + { + "epoch": 0.8166353877336536, + "grad_norm": 0.991356372833252, + "learning_rate": 8.560049392132397e-07, + "loss": 0.2815, + "step": 40794 + }, + { + "epoch": 0.8166554062507819, + "grad_norm": 1.220840334892273, + "learning_rate": 8.558235524839964e-07, + "loss": 0.2693, + "step": 40795 + }, + { + "epoch": 0.8166754247679103, + "grad_norm": 1.1234960556030273, + "learning_rate": 8.556421831759793e-07, + "loss": 0.2761, + "step": 40796 + }, + { + "epoch": 0.8166954432850386, + "grad_norm": 0.9888127446174622, + "learning_rate": 8.554608312899492e-07, + "loss": 0.2493, + "step": 40797 + }, + { + "epoch": 0.816715461802167, + "grad_norm": 1.0252026319503784, + "learning_rate": 8.552794968266715e-07, + "loss": 0.2948, + "step": 40798 + }, + { + "epoch": 0.8167354803192953, + "grad_norm": 1.1580311059951782, + "learning_rate": 8.550981797869051e-07, + "loss": 0.3444, + "step": 40799 + }, + { + "epoch": 0.8167554988364237, + "grad_norm": 1.0998526811599731, + "learning_rate": 8.549168801714153e-07, + "loss": 0.3159, + "step": 40800 + }, + { + "epoch": 0.816775517353552, + "grad_norm": 1.0783120393753052, + "learning_rate": 8.547355979809629e-07, + "loss": 0.328, + "step": 40801 + }, + { + "epoch": 0.8167955358706804, + "grad_norm": 1.072144865989685, + "learning_rate": 8.545543332163092e-07, + "loss": 0.3195, + "step": 40802 + }, + { + "epoch": 0.8168155543878087, + "grad_norm": 1.220221996307373, + "learning_rate": 8.543730858782173e-07, + "loss": 0.3003, + "step": 40803 + }, + { + "epoch": 0.816835572904937, + "grad_norm": 1.0212657451629639, + "learning_rate": 8.541918559674495e-07, + "loss": 0.263, + "step": 40804 + }, + { + "epoch": 0.8168555914220654, + "grad_norm": 1.0735394954681396, + "learning_rate": 8.540106434847667e-07, + "loss": 0.3031, + "step": 40805 + }, + { + "epoch": 0.8168756099391937, + "grad_norm": 1.0474623441696167, + "learning_rate": 8.538294484309301e-07, + "loss": 0.303, + "step": 40806 + }, + { + "epoch": 0.8168956284563221, + "grad_norm": 2.0215137004852295, + "learning_rate": 8.536482708067029e-07, + "loss": 0.7686, + "step": 40807 + }, + { + "epoch": 0.8169156469734504, + "grad_norm": 1.1737282276153564, + "learning_rate": 8.534671106128461e-07, + "loss": 0.2959, + "step": 40808 + }, + { + "epoch": 0.8169356654905788, + "grad_norm": 1.116040825843811, + "learning_rate": 8.532859678501215e-07, + "loss": 0.269, + "step": 40809 + }, + { + "epoch": 0.8169556840077071, + "grad_norm": 1.233538031578064, + "learning_rate": 8.531048425192889e-07, + "loss": 0.3233, + "step": 40810 + }, + { + "epoch": 0.8169757025248354, + "grad_norm": 1.1459064483642578, + "learning_rate": 8.529237346211128e-07, + "loss": 0.2689, + "step": 40811 + }, + { + "epoch": 0.8169957210419638, + "grad_norm": 0.9928351044654846, + "learning_rate": 8.527426441563513e-07, + "loss": 0.2895, + "step": 40812 + }, + { + "epoch": 0.8170157395590921, + "grad_norm": 1.1336275339126587, + "learning_rate": 8.525615711257684e-07, + "loss": 0.2921, + "step": 40813 + }, + { + "epoch": 0.8170357580762205, + "grad_norm": 1.1196945905685425, + "learning_rate": 8.523805155301241e-07, + "loss": 0.3145, + "step": 40814 + }, + { + "epoch": 0.8170557765933488, + "grad_norm": 1.1784802675247192, + "learning_rate": 8.52199477370178e-07, + "loss": 0.3123, + "step": 40815 + }, + { + "epoch": 0.8170757951104772, + "grad_norm": 1.0936201810836792, + "learning_rate": 8.520184566466944e-07, + "loss": 0.3175, + "step": 40816 + }, + { + "epoch": 0.8170958136276055, + "grad_norm": 1.032076358795166, + "learning_rate": 8.518374533604324e-07, + "loss": 0.277, + "step": 40817 + }, + { + "epoch": 0.8171158321447339, + "grad_norm": 1.2340112924575806, + "learning_rate": 8.516564675121531e-07, + "loss": 0.336, + "step": 40818 + }, + { + "epoch": 0.8171358506618622, + "grad_norm": 1.146002173423767, + "learning_rate": 8.51475499102617e-07, + "loss": 0.2882, + "step": 40819 + }, + { + "epoch": 0.8171558691789905, + "grad_norm": 1.8130697011947632, + "learning_rate": 8.512945481325841e-07, + "loss": 0.7869, + "step": 40820 + }, + { + "epoch": 0.8171758876961189, + "grad_norm": 1.1773167848587036, + "learning_rate": 8.511136146028176e-07, + "loss": 0.2865, + "step": 40821 + }, + { + "epoch": 0.8171959062132472, + "grad_norm": 1.968011736869812, + "learning_rate": 8.509326985140765e-07, + "loss": 0.7019, + "step": 40822 + }, + { + "epoch": 0.8172159247303756, + "grad_norm": 1.1727538108825684, + "learning_rate": 8.507517998671199e-07, + "loss": 0.2913, + "step": 40823 + }, + { + "epoch": 0.8172359432475039, + "grad_norm": 1.082108736038208, + "learning_rate": 8.505709186627115e-07, + "loss": 0.3168, + "step": 40824 + }, + { + "epoch": 0.8172559617646323, + "grad_norm": 1.8703699111938477, + "learning_rate": 8.503900549016081e-07, + "loss": 0.7543, + "step": 40825 + }, + { + "epoch": 0.8172759802817606, + "grad_norm": 1.113889455795288, + "learning_rate": 8.502092085845737e-07, + "loss": 0.3294, + "step": 40826 + }, + { + "epoch": 0.8172959987988889, + "grad_norm": 1.2326630353927612, + "learning_rate": 8.500283797123665e-07, + "loss": 0.2635, + "step": 40827 + }, + { + "epoch": 0.8173160173160173, + "grad_norm": 1.123704433441162, + "learning_rate": 8.498475682857471e-07, + "loss": 0.2907, + "step": 40828 + }, + { + "epoch": 0.8173360358331456, + "grad_norm": 1.1399612426757812, + "learning_rate": 8.496667743054742e-07, + "loss": 0.2841, + "step": 40829 + }, + { + "epoch": 0.817356054350274, + "grad_norm": 1.8796213865280151, + "learning_rate": 8.494859977723097e-07, + "loss": 0.7418, + "step": 40830 + }, + { + "epoch": 0.8173760728674023, + "grad_norm": 1.0831265449523926, + "learning_rate": 8.493052386870137e-07, + "loss": 0.2833, + "step": 40831 + }, + { + "epoch": 0.8173960913845307, + "grad_norm": 2.089327573776245, + "learning_rate": 8.491244970503449e-07, + "loss": 0.8084, + "step": 40832 + }, + { + "epoch": 0.817416109901659, + "grad_norm": 1.278626561164856, + "learning_rate": 8.489437728630618e-07, + "loss": 0.2821, + "step": 40833 + }, + { + "epoch": 0.8174361284187874, + "grad_norm": 1.173883318901062, + "learning_rate": 8.487630661259272e-07, + "loss": 0.2759, + "step": 40834 + }, + { + "epoch": 0.8174561469359157, + "grad_norm": 1.0249415636062622, + "learning_rate": 8.485823768396995e-07, + "loss": 0.2833, + "step": 40835 + }, + { + "epoch": 0.817476165453044, + "grad_norm": 1.1867568492889404, + "learning_rate": 8.484017050051369e-07, + "loss": 0.3646, + "step": 40836 + }, + { + "epoch": 0.8174961839701724, + "grad_norm": 1.1924952268600464, + "learning_rate": 8.48221050623001e-07, + "loss": 0.3099, + "step": 40837 + }, + { + "epoch": 0.8175162024873007, + "grad_norm": 1.0183799266815186, + "learning_rate": 8.480404136940495e-07, + "loss": 0.279, + "step": 40838 + }, + { + "epoch": 0.8175362210044291, + "grad_norm": 1.2814724445343018, + "learning_rate": 8.478597942190436e-07, + "loss": 0.2907, + "step": 40839 + }, + { + "epoch": 0.8175562395215574, + "grad_norm": 1.066567301750183, + "learning_rate": 8.476791921987415e-07, + "loss": 0.3331, + "step": 40840 + }, + { + "epoch": 0.8175762580386858, + "grad_norm": 1.2755557298660278, + "learning_rate": 8.474986076339026e-07, + "loss": 0.3302, + "step": 40841 + }, + { + "epoch": 0.8175962765558141, + "grad_norm": 1.0671749114990234, + "learning_rate": 8.473180405252845e-07, + "loss": 0.2721, + "step": 40842 + }, + { + "epoch": 0.8176162950729424, + "grad_norm": 1.0865846872329712, + "learning_rate": 8.47137490873649e-07, + "loss": 0.2758, + "step": 40843 + }, + { + "epoch": 0.8176363135900708, + "grad_norm": 1.1970784664154053, + "learning_rate": 8.469569586797544e-07, + "loss": 0.3306, + "step": 40844 + }, + { + "epoch": 0.8176563321071991, + "grad_norm": 1.0516412258148193, + "learning_rate": 8.467764439443582e-07, + "loss": 0.2797, + "step": 40845 + }, + { + "epoch": 0.8176763506243275, + "grad_norm": 1.4129700660705566, + "learning_rate": 8.465959466682189e-07, + "loss": 0.2799, + "step": 40846 + }, + { + "epoch": 0.8176963691414558, + "grad_norm": 1.0796443223953247, + "learning_rate": 8.464154668520979e-07, + "loss": 0.3005, + "step": 40847 + }, + { + "epoch": 0.8177163876585842, + "grad_norm": 1.8173788785934448, + "learning_rate": 8.462350044967527e-07, + "loss": 0.7305, + "step": 40848 + }, + { + "epoch": 0.8177364061757125, + "grad_norm": 1.113623023033142, + "learning_rate": 8.460545596029401e-07, + "loss": 0.3398, + "step": 40849 + }, + { + "epoch": 0.8177564246928409, + "grad_norm": 1.012697696685791, + "learning_rate": 8.458741321714215e-07, + "loss": 0.3239, + "step": 40850 + }, + { + "epoch": 0.8177764432099692, + "grad_norm": 1.0586373805999756, + "learning_rate": 8.456937222029527e-07, + "loss": 0.2921, + "step": 40851 + }, + { + "epoch": 0.8177964617270975, + "grad_norm": 1.1501260995864868, + "learning_rate": 8.455133296982942e-07, + "loss": 0.2979, + "step": 40852 + }, + { + "epoch": 0.8178164802442259, + "grad_norm": 1.9823070764541626, + "learning_rate": 8.453329546582045e-07, + "loss": 0.7297, + "step": 40853 + }, + { + "epoch": 0.8178364987613542, + "grad_norm": 1.0746294260025024, + "learning_rate": 8.451525970834407e-07, + "loss": 0.2989, + "step": 40854 + }, + { + "epoch": 0.8178565172784826, + "grad_norm": 1.1411042213439941, + "learning_rate": 8.449722569747598e-07, + "loss": 0.322, + "step": 40855 + }, + { + "epoch": 0.8178765357956109, + "grad_norm": 1.1580783128738403, + "learning_rate": 8.447919343329225e-07, + "loss": 0.2904, + "step": 40856 + }, + { + "epoch": 0.8178965543127393, + "grad_norm": 1.124462604522705, + "learning_rate": 8.446116291586859e-07, + "loss": 0.2814, + "step": 40857 + }, + { + "epoch": 0.8179165728298676, + "grad_norm": 1.2669297456741333, + "learning_rate": 8.444313414528082e-07, + "loss": 0.3182, + "step": 40858 + }, + { + "epoch": 0.8179365913469959, + "grad_norm": 1.9620102643966675, + "learning_rate": 8.442510712160462e-07, + "loss": 0.7492, + "step": 40859 + }, + { + "epoch": 0.8179566098641243, + "grad_norm": 1.1142582893371582, + "learning_rate": 8.440708184491576e-07, + "loss": 0.3038, + "step": 40860 + }, + { + "epoch": 0.8179766283812526, + "grad_norm": 1.2163243293762207, + "learning_rate": 8.438905831529015e-07, + "loss": 0.3025, + "step": 40861 + }, + { + "epoch": 0.817996646898381, + "grad_norm": 1.217612385749817, + "learning_rate": 8.437103653280343e-07, + "loss": 0.2946, + "step": 40862 + }, + { + "epoch": 0.8180166654155093, + "grad_norm": 1.9739991426467896, + "learning_rate": 8.435301649753152e-07, + "loss": 0.7762, + "step": 40863 + }, + { + "epoch": 0.8180366839326377, + "grad_norm": 1.1670994758605957, + "learning_rate": 8.433499820954999e-07, + "loss": 0.2543, + "step": 40864 + }, + { + "epoch": 0.818056702449766, + "grad_norm": 1.093604326248169, + "learning_rate": 8.431698166893481e-07, + "loss": 0.28, + "step": 40865 + }, + { + "epoch": 0.8180767209668943, + "grad_norm": 1.207985758781433, + "learning_rate": 8.429896687576156e-07, + "loss": 0.228, + "step": 40866 + }, + { + "epoch": 0.8180967394840227, + "grad_norm": 1.177009105682373, + "learning_rate": 8.428095383010598e-07, + "loss": 0.2454, + "step": 40867 + }, + { + "epoch": 0.818116758001151, + "grad_norm": 1.183340072631836, + "learning_rate": 8.42629425320437e-07, + "loss": 0.2634, + "step": 40868 + }, + { + "epoch": 0.8181367765182794, + "grad_norm": 1.069079041481018, + "learning_rate": 8.424493298165065e-07, + "loss": 0.2765, + "step": 40869 + }, + { + "epoch": 0.8181567950354077, + "grad_norm": 1.927134394645691, + "learning_rate": 8.422692517900244e-07, + "loss": 0.743, + "step": 40870 + }, + { + "epoch": 0.8181768135525361, + "grad_norm": 1.4990304708480835, + "learning_rate": 8.420891912417478e-07, + "loss": 0.2761, + "step": 40871 + }, + { + "epoch": 0.8181968320696644, + "grad_norm": 1.1922426223754883, + "learning_rate": 8.419091481724328e-07, + "loss": 0.3127, + "step": 40872 + }, + { + "epoch": 0.8182168505867928, + "grad_norm": 1.0050573348999023, + "learning_rate": 8.417291225828362e-07, + "loss": 0.2776, + "step": 40873 + }, + { + "epoch": 0.8182368691039211, + "grad_norm": 1.2491399049758911, + "learning_rate": 8.41549114473717e-07, + "loss": 0.2679, + "step": 40874 + }, + { + "epoch": 0.8182568876210494, + "grad_norm": 1.8564754724502563, + "learning_rate": 8.413691238458288e-07, + "loss": 0.8014, + "step": 40875 + }, + { + "epoch": 0.8182769061381778, + "grad_norm": 1.2091354131698608, + "learning_rate": 8.411891506999309e-07, + "loss": 0.2784, + "step": 40876 + }, + { + "epoch": 0.8182969246553061, + "grad_norm": 1.1085081100463867, + "learning_rate": 8.410091950367777e-07, + "loss": 0.2438, + "step": 40877 + }, + { + "epoch": 0.8183169431724345, + "grad_norm": 1.2340211868286133, + "learning_rate": 8.408292568571286e-07, + "loss": 0.2941, + "step": 40878 + }, + { + "epoch": 0.8183369616895628, + "grad_norm": 1.10813307762146, + "learning_rate": 8.406493361617374e-07, + "loss": 0.3087, + "step": 40879 + }, + { + "epoch": 0.8183569802066912, + "grad_norm": 1.1382349729537964, + "learning_rate": 8.404694329513619e-07, + "loss": 0.2722, + "step": 40880 + }, + { + "epoch": 0.8183769987238195, + "grad_norm": 1.1592305898666382, + "learning_rate": 8.402895472267575e-07, + "loss": 0.3068, + "step": 40881 + }, + { + "epoch": 0.8183970172409478, + "grad_norm": 1.1863124370574951, + "learning_rate": 8.401096789886798e-07, + "loss": 0.2633, + "step": 40882 + }, + { + "epoch": 0.8184170357580762, + "grad_norm": 1.0845038890838623, + "learning_rate": 8.399298282378864e-07, + "loss": 0.2558, + "step": 40883 + }, + { + "epoch": 0.8184370542752045, + "grad_norm": 1.0934810638427734, + "learning_rate": 8.397499949751331e-07, + "loss": 0.3011, + "step": 40884 + }, + { + "epoch": 0.8184570727923329, + "grad_norm": 1.0594115257263184, + "learning_rate": 8.395701792011756e-07, + "loss": 0.2753, + "step": 40885 + }, + { + "epoch": 0.8184770913094612, + "grad_norm": 1.147605538368225, + "learning_rate": 8.39390380916768e-07, + "loss": 0.3023, + "step": 40886 + }, + { + "epoch": 0.8184971098265896, + "grad_norm": 1.9404739141464233, + "learning_rate": 8.392106001226696e-07, + "loss": 0.7713, + "step": 40887 + }, + { + "epoch": 0.8185171283437179, + "grad_norm": 1.0931918621063232, + "learning_rate": 8.390308368196331e-07, + "loss": 0.2891, + "step": 40888 + }, + { + "epoch": 0.8185371468608463, + "grad_norm": 1.1749716997146606, + "learning_rate": 8.388510910084169e-07, + "loss": 0.3065, + "step": 40889 + }, + { + "epoch": 0.8185571653779746, + "grad_norm": 1.2661693096160889, + "learning_rate": 8.386713626897735e-07, + "loss": 0.3217, + "step": 40890 + }, + { + "epoch": 0.8185771838951029, + "grad_norm": 1.0609517097473145, + "learning_rate": 8.38491651864462e-07, + "loss": 0.2888, + "step": 40891 + }, + { + "epoch": 0.8185972024122313, + "grad_norm": 1.065051555633545, + "learning_rate": 8.383119585332356e-07, + "loss": 0.2833, + "step": 40892 + }, + { + "epoch": 0.8186172209293596, + "grad_norm": 1.0379512310028076, + "learning_rate": 8.381322826968502e-07, + "loss": 0.3389, + "step": 40893 + }, + { + "epoch": 0.818637239446488, + "grad_norm": 1.0130457878112793, + "learning_rate": 8.379526243560615e-07, + "loss": 0.2896, + "step": 40894 + }, + { + "epoch": 0.8186572579636163, + "grad_norm": 1.9463990926742554, + "learning_rate": 8.377729835116227e-07, + "loss": 0.8364, + "step": 40895 + }, + { + "epoch": 0.8186772764807447, + "grad_norm": 1.1870225667953491, + "learning_rate": 8.375933601642916e-07, + "loss": 0.2297, + "step": 40896 + }, + { + "epoch": 0.818697294997873, + "grad_norm": 1.131351351737976, + "learning_rate": 8.374137543148225e-07, + "loss": 0.281, + "step": 40897 + }, + { + "epoch": 0.8187173135150013, + "grad_norm": 1.1873970031738281, + "learning_rate": 8.372341659639699e-07, + "loss": 0.3219, + "step": 40898 + }, + { + "epoch": 0.8187373320321297, + "grad_norm": 1.0825937986373901, + "learning_rate": 8.370545951124881e-07, + "loss": 0.3208, + "step": 40899 + }, + { + "epoch": 0.818757350549258, + "grad_norm": 1.0467643737792969, + "learning_rate": 8.368750417611337e-07, + "loss": 0.2447, + "step": 40900 + }, + { + "epoch": 0.8187773690663864, + "grad_norm": 1.1713529825210571, + "learning_rate": 8.366955059106602e-07, + "loss": 0.3176, + "step": 40901 + }, + { + "epoch": 0.8187973875835147, + "grad_norm": 1.17919921875, + "learning_rate": 8.365159875618234e-07, + "loss": 0.2408, + "step": 40902 + }, + { + "epoch": 0.8188174061006431, + "grad_norm": 1.0873323678970337, + "learning_rate": 8.363364867153778e-07, + "loss": 0.3219, + "step": 40903 + }, + { + "epoch": 0.8188374246177714, + "grad_norm": 1.2664200067520142, + "learning_rate": 8.361570033720762e-07, + "loss": 0.2717, + "step": 40904 + }, + { + "epoch": 0.8188574431348998, + "grad_norm": 1.226977825164795, + "learning_rate": 8.359775375326756e-07, + "loss": 0.284, + "step": 40905 + }, + { + "epoch": 0.8188774616520281, + "grad_norm": 1.2243624925613403, + "learning_rate": 8.357980891979295e-07, + "loss": 0.2894, + "step": 40906 + }, + { + "epoch": 0.8188974801691564, + "grad_norm": 1.1900873184204102, + "learning_rate": 8.356186583685921e-07, + "loss": 0.2989, + "step": 40907 + }, + { + "epoch": 0.8189174986862848, + "grad_norm": 1.280060052871704, + "learning_rate": 8.354392450454162e-07, + "loss": 0.2974, + "step": 40908 + }, + { + "epoch": 0.8189375172034131, + "grad_norm": 1.086698293685913, + "learning_rate": 8.352598492291592e-07, + "loss": 0.2751, + "step": 40909 + }, + { + "epoch": 0.8189575357205415, + "grad_norm": 1.0348117351531982, + "learning_rate": 8.350804709205729e-07, + "loss": 0.2987, + "step": 40910 + }, + { + "epoch": 0.8189775542376698, + "grad_norm": 1.184537649154663, + "learning_rate": 8.349011101204125e-07, + "loss": 0.2679, + "step": 40911 + }, + { + "epoch": 0.8189975727547982, + "grad_norm": 1.149537444114685, + "learning_rate": 8.347217668294299e-07, + "loss": 0.3103, + "step": 40912 + }, + { + "epoch": 0.8190175912719265, + "grad_norm": 1.093522548675537, + "learning_rate": 8.345424410483821e-07, + "loss": 0.2433, + "step": 40913 + }, + { + "epoch": 0.8190376097890548, + "grad_norm": 1.0768619775772095, + "learning_rate": 8.343631327780205e-07, + "loss": 0.2921, + "step": 40914 + }, + { + "epoch": 0.8190576283061832, + "grad_norm": 1.122167944908142, + "learning_rate": 8.341838420191006e-07, + "loss": 0.3009, + "step": 40915 + }, + { + "epoch": 0.8190776468233115, + "grad_norm": 1.1087392568588257, + "learning_rate": 8.340045687723753e-07, + "loss": 0.2904, + "step": 40916 + }, + { + "epoch": 0.8190976653404399, + "grad_norm": 1.0841891765594482, + "learning_rate": 8.338253130385971e-07, + "loss": 0.2873, + "step": 40917 + }, + { + "epoch": 0.8191176838575682, + "grad_norm": 1.1202740669250488, + "learning_rate": 8.336460748185221e-07, + "loss": 0.3111, + "step": 40918 + }, + { + "epoch": 0.8191377023746966, + "grad_norm": 1.0352421998977661, + "learning_rate": 8.334668541129026e-07, + "loss": 0.2813, + "step": 40919 + }, + { + "epoch": 0.8191577208918249, + "grad_norm": 1.0646255016326904, + "learning_rate": 8.332876509224913e-07, + "loss": 0.2692, + "step": 40920 + }, + { + "epoch": 0.8191777394089533, + "grad_norm": 1.0833961963653564, + "learning_rate": 8.331084652480409e-07, + "loss": 0.3367, + "step": 40921 + }, + { + "epoch": 0.8191977579260816, + "grad_norm": 1.7872499227523804, + "learning_rate": 8.329292970903074e-07, + "loss": 0.7717, + "step": 40922 + }, + { + "epoch": 0.8192177764432099, + "grad_norm": 1.1559120416641235, + "learning_rate": 8.32750146450042e-07, + "loss": 0.3008, + "step": 40923 + }, + { + "epoch": 0.8192377949603383, + "grad_norm": 1.150557041168213, + "learning_rate": 8.325710133279985e-07, + "loss": 0.3207, + "step": 40924 + }, + { + "epoch": 0.8192578134774666, + "grad_norm": 1.2035984992980957, + "learning_rate": 8.323918977249279e-07, + "loss": 0.3063, + "step": 40925 + }, + { + "epoch": 0.819277831994595, + "grad_norm": 1.8834277391433716, + "learning_rate": 8.322127996415869e-07, + "loss": 0.7443, + "step": 40926 + }, + { + "epoch": 0.8192978505117233, + "grad_norm": 1.0584214925765991, + "learning_rate": 8.320337190787243e-07, + "loss": 0.2492, + "step": 40927 + }, + { + "epoch": 0.8193178690288517, + "grad_norm": 1.1344149112701416, + "learning_rate": 8.318546560370966e-07, + "loss": 0.3055, + "step": 40928 + }, + { + "epoch": 0.81933788754598, + "grad_norm": 1.0003437995910645, + "learning_rate": 8.31675610517455e-07, + "loss": 0.327, + "step": 40929 + }, + { + "epoch": 0.8193579060631083, + "grad_norm": 1.0995794534683228, + "learning_rate": 8.314965825205507e-07, + "loss": 0.2798, + "step": 40930 + }, + { + "epoch": 0.8193779245802367, + "grad_norm": 1.039347529411316, + "learning_rate": 8.31317572047139e-07, + "loss": 0.2967, + "step": 40931 + }, + { + "epoch": 0.819397943097365, + "grad_norm": 1.0870147943496704, + "learning_rate": 8.31138579097971e-07, + "loss": 0.2807, + "step": 40932 + }, + { + "epoch": 0.8194179616144934, + "grad_norm": 1.083939552307129, + "learning_rate": 8.309596036737993e-07, + "loss": 0.3036, + "step": 40933 + }, + { + "epoch": 0.8194379801316217, + "grad_norm": 1.2226134538650513, + "learning_rate": 8.307806457753764e-07, + "loss": 0.2985, + "step": 40934 + }, + { + "epoch": 0.8194579986487501, + "grad_norm": 1.0223990678787231, + "learning_rate": 8.306017054034532e-07, + "loss": 0.2935, + "step": 40935 + }, + { + "epoch": 0.8194780171658784, + "grad_norm": 1.0342084169387817, + "learning_rate": 8.304227825587846e-07, + "loss": 0.2408, + "step": 40936 + }, + { + "epoch": 0.8194980356830068, + "grad_norm": 1.1108940839767456, + "learning_rate": 8.302438772421206e-07, + "loss": 0.2838, + "step": 40937 + }, + { + "epoch": 0.8195180542001351, + "grad_norm": 1.0612379312515259, + "learning_rate": 8.300649894542134e-07, + "loss": 0.2847, + "step": 40938 + }, + { + "epoch": 0.8195380727172634, + "grad_norm": 1.0799287557601929, + "learning_rate": 8.298861191958169e-07, + "loss": 0.3146, + "step": 40939 + }, + { + "epoch": 0.8195580912343918, + "grad_norm": 1.1094461679458618, + "learning_rate": 8.297072664676803e-07, + "loss": 0.2967, + "step": 40940 + }, + { + "epoch": 0.8195781097515201, + "grad_norm": 1.3197553157806396, + "learning_rate": 8.295284312705576e-07, + "loss": 0.3646, + "step": 40941 + }, + { + "epoch": 0.8195981282686485, + "grad_norm": 0.9841054677963257, + "learning_rate": 8.293496136052004e-07, + "loss": 0.2828, + "step": 40942 + }, + { + "epoch": 0.8196181467857768, + "grad_norm": 1.0952048301696777, + "learning_rate": 8.291708134723602e-07, + "loss": 0.2863, + "step": 40943 + }, + { + "epoch": 0.8196381653029052, + "grad_norm": 1.1862552165985107, + "learning_rate": 8.289920308727867e-07, + "loss": 0.3236, + "step": 40944 + }, + { + "epoch": 0.8196581838200335, + "grad_norm": 2.0016791820526123, + "learning_rate": 8.28813265807234e-07, + "loss": 0.7226, + "step": 40945 + }, + { + "epoch": 0.8196782023371618, + "grad_norm": 1.1726887226104736, + "learning_rate": 8.286345182764533e-07, + "loss": 0.2507, + "step": 40946 + }, + { + "epoch": 0.8196982208542902, + "grad_norm": 1.1542829275131226, + "learning_rate": 8.284557882811949e-07, + "loss": 0.3238, + "step": 40947 + }, + { + "epoch": 0.8197182393714185, + "grad_norm": 1.101891040802002, + "learning_rate": 8.282770758222097e-07, + "loss": 0.2971, + "step": 40948 + }, + { + "epoch": 0.8197382578885469, + "grad_norm": 1.0781629085540771, + "learning_rate": 8.280983809002513e-07, + "loss": 0.3153, + "step": 40949 + }, + { + "epoch": 0.8197582764056752, + "grad_norm": 1.0987415313720703, + "learning_rate": 8.279197035160691e-07, + "loss": 0.3038, + "step": 40950 + }, + { + "epoch": 0.8197782949228036, + "grad_norm": 1.0863516330718994, + "learning_rate": 8.277410436704136e-07, + "loss": 0.3166, + "step": 40951 + }, + { + "epoch": 0.8197983134399319, + "grad_norm": 1.829268217086792, + "learning_rate": 8.275624013640383e-07, + "loss": 0.7771, + "step": 40952 + }, + { + "epoch": 0.8198183319570603, + "grad_norm": 1.8745590448379517, + "learning_rate": 8.27383776597691e-07, + "loss": 0.8256, + "step": 40953 + }, + { + "epoch": 0.8198383504741886, + "grad_norm": 1.90697181224823, + "learning_rate": 8.272051693721261e-07, + "loss": 0.7864, + "step": 40954 + }, + { + "epoch": 0.8198583689913169, + "grad_norm": 1.1251208782196045, + "learning_rate": 8.270265796880928e-07, + "loss": 0.2955, + "step": 40955 + }, + { + "epoch": 0.8198783875084453, + "grad_norm": 1.0985071659088135, + "learning_rate": 8.268480075463408e-07, + "loss": 0.3376, + "step": 40956 + }, + { + "epoch": 0.8198984060255736, + "grad_norm": 2.028752565383911, + "learning_rate": 8.266694529476216e-07, + "loss": 0.7725, + "step": 40957 + }, + { + "epoch": 0.819918424542702, + "grad_norm": 1.1673904657363892, + "learning_rate": 8.264909158926865e-07, + "loss": 0.3152, + "step": 40958 + }, + { + "epoch": 0.8199384430598303, + "grad_norm": 1.1264785528182983, + "learning_rate": 8.263123963822856e-07, + "loss": 0.3259, + "step": 40959 + }, + { + "epoch": 0.8199584615769587, + "grad_norm": 1.0338134765625, + "learning_rate": 8.261338944171687e-07, + "loss": 0.3007, + "step": 40960 + }, + { + "epoch": 0.819978480094087, + "grad_norm": 1.1974053382873535, + "learning_rate": 8.25955409998086e-07, + "loss": 0.3149, + "step": 40961 + }, + { + "epoch": 0.8199984986112153, + "grad_norm": 1.1497880220413208, + "learning_rate": 8.257769431257895e-07, + "loss": 0.3115, + "step": 40962 + }, + { + "epoch": 0.8200185171283437, + "grad_norm": 1.2198927402496338, + "learning_rate": 8.255984938010286e-07, + "loss": 0.2953, + "step": 40963 + }, + { + "epoch": 0.820038535645472, + "grad_norm": 1.0490689277648926, + "learning_rate": 8.254200620245523e-07, + "loss": 0.2538, + "step": 40964 + }, + { + "epoch": 0.8200585541626004, + "grad_norm": 1.0684700012207031, + "learning_rate": 8.252416477971126e-07, + "loss": 0.268, + "step": 40965 + }, + { + "epoch": 0.8200785726797287, + "grad_norm": 1.1564244031906128, + "learning_rate": 8.250632511194573e-07, + "loss": 0.2996, + "step": 40966 + }, + { + "epoch": 0.8200985911968571, + "grad_norm": 1.011996865272522, + "learning_rate": 8.248848719923386e-07, + "loss": 0.2633, + "step": 40967 + }, + { + "epoch": 0.8201186097139854, + "grad_norm": 1.1701488494873047, + "learning_rate": 8.247065104165058e-07, + "loss": 0.3134, + "step": 40968 + }, + { + "epoch": 0.8201386282311138, + "grad_norm": 1.276440978050232, + "learning_rate": 8.245281663927085e-07, + "loss": 0.2327, + "step": 40969 + }, + { + "epoch": 0.8201586467482421, + "grad_norm": 1.1383973360061646, + "learning_rate": 8.243498399216948e-07, + "loss": 0.3059, + "step": 40970 + }, + { + "epoch": 0.8201786652653704, + "grad_norm": 1.92202627658844, + "learning_rate": 8.24171531004217e-07, + "loss": 0.71, + "step": 40971 + }, + { + "epoch": 0.8201986837824988, + "grad_norm": 1.0771970748901367, + "learning_rate": 8.239932396410233e-07, + "loss": 0.3415, + "step": 40972 + }, + { + "epoch": 0.8202187022996271, + "grad_norm": 2.0046002864837646, + "learning_rate": 8.238149658328632e-07, + "loss": 0.7728, + "step": 40973 + }, + { + "epoch": 0.8202387208167555, + "grad_norm": 1.332396149635315, + "learning_rate": 8.236367095804865e-07, + "loss": 0.3254, + "step": 40974 + }, + { + "epoch": 0.8202587393338838, + "grad_norm": 1.1073857545852661, + "learning_rate": 8.23458470884641e-07, + "loss": 0.3036, + "step": 40975 + }, + { + "epoch": 0.8202787578510122, + "grad_norm": 1.072992205619812, + "learning_rate": 8.232802497460784e-07, + "loss": 0.2795, + "step": 40976 + }, + { + "epoch": 0.8202987763681405, + "grad_norm": 1.2538416385650635, + "learning_rate": 8.231020461655459e-07, + "loss": 0.2611, + "step": 40977 + }, + { + "epoch": 0.8203187948852688, + "grad_norm": 1.1817110776901245, + "learning_rate": 8.229238601437945e-07, + "loss": 0.2876, + "step": 40978 + }, + { + "epoch": 0.8203388134023972, + "grad_norm": 1.0511990785598755, + "learning_rate": 8.227456916815712e-07, + "loss": 0.2609, + "step": 40979 + }, + { + "epoch": 0.8203588319195255, + "grad_norm": 0.9696854948997498, + "learning_rate": 8.225675407796269e-07, + "loss": 0.2743, + "step": 40980 + }, + { + "epoch": 0.8203788504366539, + "grad_norm": 2.116079568862915, + "learning_rate": 8.223894074387101e-07, + "loss": 0.7796, + "step": 40981 + }, + { + "epoch": 0.8203988689537822, + "grad_norm": 1.2075127363204956, + "learning_rate": 8.222112916595692e-07, + "loss": 0.3072, + "step": 40982 + }, + { + "epoch": 0.8204188874709106, + "grad_norm": 1.1216826438903809, + "learning_rate": 8.220331934429515e-07, + "loss": 0.2973, + "step": 40983 + }, + { + "epoch": 0.8204389059880389, + "grad_norm": 1.944391131401062, + "learning_rate": 8.218551127896085e-07, + "loss": 0.7813, + "step": 40984 + }, + { + "epoch": 0.8204589245051673, + "grad_norm": 1.1095165014266968, + "learning_rate": 8.216770497002874e-07, + "loss": 0.3023, + "step": 40985 + }, + { + "epoch": 0.8204789430222956, + "grad_norm": 1.0836009979248047, + "learning_rate": 8.214990041757364e-07, + "loss": 0.2536, + "step": 40986 + }, + { + "epoch": 0.8204989615394239, + "grad_norm": 1.1466615200042725, + "learning_rate": 8.213209762167052e-07, + "loss": 0.3117, + "step": 40987 + }, + { + "epoch": 0.8205189800565523, + "grad_norm": 1.22971510887146, + "learning_rate": 8.211429658239395e-07, + "loss": 0.3158, + "step": 40988 + }, + { + "epoch": 0.8205389985736806, + "grad_norm": 1.2119066715240479, + "learning_rate": 8.209649729981911e-07, + "loss": 0.2842, + "step": 40989 + }, + { + "epoch": 0.820559017090809, + "grad_norm": 1.1625968217849731, + "learning_rate": 8.207869977402049e-07, + "loss": 0.2743, + "step": 40990 + }, + { + "epoch": 0.8205790356079373, + "grad_norm": 1.1648755073547363, + "learning_rate": 8.206090400507327e-07, + "loss": 0.2959, + "step": 40991 + }, + { + "epoch": 0.8205990541250657, + "grad_norm": 1.0816224813461304, + "learning_rate": 8.204310999305187e-07, + "loss": 0.299, + "step": 40992 + }, + { + "epoch": 0.820619072642194, + "grad_norm": 1.0560359954833984, + "learning_rate": 8.202531773803146e-07, + "loss": 0.2798, + "step": 40993 + }, + { + "epoch": 0.8206390911593223, + "grad_norm": 1.919867753982544, + "learning_rate": 8.200752724008665e-07, + "loss": 0.7206, + "step": 40994 + }, + { + "epoch": 0.8206591096764507, + "grad_norm": 1.0664697885513306, + "learning_rate": 8.198973849929226e-07, + "loss": 0.3261, + "step": 40995 + }, + { + "epoch": 0.820679128193579, + "grad_norm": 1.0616977214813232, + "learning_rate": 8.197195151572301e-07, + "loss": 0.248, + "step": 40996 + }, + { + "epoch": 0.8206991467107074, + "grad_norm": 1.1383943557739258, + "learning_rate": 8.195416628945363e-07, + "loss": 0.3023, + "step": 40997 + }, + { + "epoch": 0.8207191652278357, + "grad_norm": 1.8313645124435425, + "learning_rate": 8.19363828205591e-07, + "loss": 0.7554, + "step": 40998 + }, + { + "epoch": 0.8207391837449641, + "grad_norm": 1.2648446559906006, + "learning_rate": 8.191860110911404e-07, + "loss": 0.3037, + "step": 40999 + }, + { + "epoch": 0.8207592022620924, + "grad_norm": 1.0719374418258667, + "learning_rate": 8.190082115519321e-07, + "loss": 0.3011, + "step": 41000 + }, + { + "epoch": 0.8207792207792208, + "grad_norm": 1.153232455253601, + "learning_rate": 8.188304295887118e-07, + "loss": 0.2746, + "step": 41001 + }, + { + "epoch": 0.8207992392963491, + "grad_norm": 1.1849762201309204, + "learning_rate": 8.186526652022303e-07, + "loss": 0.3348, + "step": 41002 + }, + { + "epoch": 0.8208192578134774, + "grad_norm": 1.092675805091858, + "learning_rate": 8.184749183932317e-07, + "loss": 0.2548, + "step": 41003 + }, + { + "epoch": 0.8208392763306058, + "grad_norm": 1.1499923467636108, + "learning_rate": 8.182971891624664e-07, + "loss": 0.2985, + "step": 41004 + }, + { + "epoch": 0.8208592948477341, + "grad_norm": 1.0211114883422852, + "learning_rate": 8.181194775106793e-07, + "loss": 0.2712, + "step": 41005 + }, + { + "epoch": 0.8208793133648625, + "grad_norm": 1.1347286701202393, + "learning_rate": 8.179417834386166e-07, + "loss": 0.3101, + "step": 41006 + }, + { + "epoch": 0.8208993318819908, + "grad_norm": 1.0477017164230347, + "learning_rate": 8.177641069470282e-07, + "loss": 0.3004, + "step": 41007 + }, + { + "epoch": 0.8209193503991192, + "grad_norm": 1.2859773635864258, + "learning_rate": 8.175864480366597e-07, + "loss": 0.2594, + "step": 41008 + }, + { + "epoch": 0.8209393689162475, + "grad_norm": 1.113061547279358, + "learning_rate": 8.174088067082575e-07, + "loss": 0.2798, + "step": 41009 + }, + { + "epoch": 0.8209593874333758, + "grad_norm": 1.1262729167938232, + "learning_rate": 8.172311829625673e-07, + "loss": 0.3128, + "step": 41010 + }, + { + "epoch": 0.8209794059505042, + "grad_norm": 1.049560785293579, + "learning_rate": 8.17053576800338e-07, + "loss": 0.3021, + "step": 41011 + }, + { + "epoch": 0.8209994244676325, + "grad_norm": 1.113318681716919, + "learning_rate": 8.16875988222316e-07, + "loss": 0.3082, + "step": 41012 + }, + { + "epoch": 0.8210194429847609, + "grad_norm": 1.1295547485351562, + "learning_rate": 8.166984172292463e-07, + "loss": 0.277, + "step": 41013 + }, + { + "epoch": 0.8210394615018892, + "grad_norm": 1.2664371728897095, + "learning_rate": 8.165208638218752e-07, + "loss": 0.3063, + "step": 41014 + }, + { + "epoch": 0.8210594800190176, + "grad_norm": 1.1718999147415161, + "learning_rate": 8.163433280009519e-07, + "loss": 0.2654, + "step": 41015 + }, + { + "epoch": 0.8210794985361459, + "grad_norm": 1.1545519828796387, + "learning_rate": 8.161658097672192e-07, + "loss": 0.2694, + "step": 41016 + }, + { + "epoch": 0.8210995170532743, + "grad_norm": 1.9881072044372559, + "learning_rate": 8.15988309121426e-07, + "loss": 0.7382, + "step": 41017 + }, + { + "epoch": 0.8211195355704026, + "grad_norm": 1.9380435943603516, + "learning_rate": 8.158108260643183e-07, + "loss": 0.7346, + "step": 41018 + }, + { + "epoch": 0.8211395540875309, + "grad_norm": 0.9729728698730469, + "learning_rate": 8.156333605966399e-07, + "loss": 0.2669, + "step": 41019 + }, + { + "epoch": 0.8211595726046593, + "grad_norm": 1.0840799808502197, + "learning_rate": 8.154559127191391e-07, + "loss": 0.31, + "step": 41020 + }, + { + "epoch": 0.8211795911217876, + "grad_norm": 1.370734691619873, + "learning_rate": 8.152784824325615e-07, + "loss": 0.3284, + "step": 41021 + }, + { + "epoch": 0.821199609638916, + "grad_norm": 1.0507349967956543, + "learning_rate": 8.151010697376527e-07, + "loss": 0.3118, + "step": 41022 + }, + { + "epoch": 0.8212196281560443, + "grad_norm": 1.0613157749176025, + "learning_rate": 8.149236746351574e-07, + "loss": 0.2587, + "step": 41023 + }, + { + "epoch": 0.8212396466731727, + "grad_norm": 1.0266770124435425, + "learning_rate": 8.14746297125823e-07, + "loss": 0.2862, + "step": 41024 + }, + { + "epoch": 0.821259665190301, + "grad_norm": 1.1193865537643433, + "learning_rate": 8.145689372103949e-07, + "loss": 0.2711, + "step": 41025 + }, + { + "epoch": 0.8212796837074293, + "grad_norm": 1.076338768005371, + "learning_rate": 8.14391594889618e-07, + "loss": 0.2936, + "step": 41026 + }, + { + "epoch": 0.8212997022245577, + "grad_norm": 0.9952670931816101, + "learning_rate": 8.142142701642369e-07, + "loss": 0.2586, + "step": 41027 + }, + { + "epoch": 0.821319720741686, + "grad_norm": 1.1155987977981567, + "learning_rate": 8.140369630349998e-07, + "loss": 0.2876, + "step": 41028 + }, + { + "epoch": 0.8213397392588144, + "grad_norm": 1.8310829401016235, + "learning_rate": 8.138596735026488e-07, + "loss": 0.736, + "step": 41029 + }, + { + "epoch": 0.8213597577759427, + "grad_norm": 1.1132402420043945, + "learning_rate": 8.136824015679324e-07, + "loss": 0.2818, + "step": 41030 + }, + { + "epoch": 0.8213797762930711, + "grad_norm": 1.0267627239227295, + "learning_rate": 8.135051472315941e-07, + "loss": 0.253, + "step": 41031 + }, + { + "epoch": 0.8213997948101994, + "grad_norm": 1.1347010135650635, + "learning_rate": 8.133279104943781e-07, + "loss": 0.2576, + "step": 41032 + }, + { + "epoch": 0.8214198133273278, + "grad_norm": 1.1999540328979492, + "learning_rate": 8.131506913570314e-07, + "loss": 0.2743, + "step": 41033 + }, + { + "epoch": 0.8214398318444561, + "grad_norm": 1.2670645713806152, + "learning_rate": 8.129734898202984e-07, + "loss": 0.3176, + "step": 41034 + }, + { + "epoch": 0.8214598503615844, + "grad_norm": 1.1126470565795898, + "learning_rate": 8.127963058849242e-07, + "loss": 0.2562, + "step": 41035 + }, + { + "epoch": 0.8214798688787128, + "grad_norm": 1.2292296886444092, + "learning_rate": 8.126191395516514e-07, + "loss": 0.3239, + "step": 41036 + }, + { + "epoch": 0.8214998873958411, + "grad_norm": 1.079817771911621, + "learning_rate": 8.124419908212283e-07, + "loss": 0.2938, + "step": 41037 + }, + { + "epoch": 0.8215199059129695, + "grad_norm": 1.127406120300293, + "learning_rate": 8.122648596943978e-07, + "loss": 0.3134, + "step": 41038 + }, + { + "epoch": 0.8215399244300978, + "grad_norm": 1.0783696174621582, + "learning_rate": 8.120877461719045e-07, + "loss": 0.3187, + "step": 41039 + }, + { + "epoch": 0.8215599429472262, + "grad_norm": 2.1248204708099365, + "learning_rate": 8.119106502544921e-07, + "loss": 0.7295, + "step": 41040 + }, + { + "epoch": 0.8215799614643545, + "grad_norm": 1.033504605293274, + "learning_rate": 8.11733571942907e-07, + "loss": 0.2677, + "step": 41041 + }, + { + "epoch": 0.8215999799814828, + "grad_norm": 1.1219301223754883, + "learning_rate": 8.115565112378915e-07, + "loss": 0.2766, + "step": 41042 + }, + { + "epoch": 0.8216199984986112, + "grad_norm": 1.98117196559906, + "learning_rate": 8.11379468140192e-07, + "loss": 0.8218, + "step": 41043 + }, + { + "epoch": 0.8216400170157395, + "grad_norm": 1.1023730039596558, + "learning_rate": 8.112024426505521e-07, + "loss": 0.294, + "step": 41044 + }, + { + "epoch": 0.8216600355328679, + "grad_norm": 1.0716180801391602, + "learning_rate": 8.110254347697144e-07, + "loss": 0.2986, + "step": 41045 + }, + { + "epoch": 0.8216800540499962, + "grad_norm": 1.0379459857940674, + "learning_rate": 8.108484444984261e-07, + "loss": 0.274, + "step": 41046 + }, + { + "epoch": 0.8217000725671246, + "grad_norm": 1.114403486251831, + "learning_rate": 8.106714718374286e-07, + "loss": 0.3059, + "step": 41047 + }, + { + "epoch": 0.8217200910842529, + "grad_norm": 0.9767380356788635, + "learning_rate": 8.104945167874668e-07, + "loss": 0.2761, + "step": 41048 + }, + { + "epoch": 0.8217401096013813, + "grad_norm": 1.2110377550125122, + "learning_rate": 8.103175793492845e-07, + "loss": 0.2303, + "step": 41049 + }, + { + "epoch": 0.8217601281185096, + "grad_norm": 1.1311089992523193, + "learning_rate": 8.101406595236244e-07, + "loss": 0.3189, + "step": 41050 + }, + { + "epoch": 0.8217801466356379, + "grad_norm": 1.0282607078552246, + "learning_rate": 8.099637573112328e-07, + "loss": 0.3188, + "step": 41051 + }, + { + "epoch": 0.8218001651527663, + "grad_norm": 1.8673616647720337, + "learning_rate": 8.097868727128516e-07, + "loss": 0.7192, + "step": 41052 + }, + { + "epoch": 0.8218201836698946, + "grad_norm": 1.0687479972839355, + "learning_rate": 8.096100057292233e-07, + "loss": 0.2997, + "step": 41053 + }, + { + "epoch": 0.821840202187023, + "grad_norm": 1.0370362997055054, + "learning_rate": 8.094331563610941e-07, + "loss": 0.2425, + "step": 41054 + }, + { + "epoch": 0.8218602207041513, + "grad_norm": 1.8753997087478638, + "learning_rate": 8.092563246092044e-07, + "loss": 0.7507, + "step": 41055 + }, + { + "epoch": 0.8218802392212797, + "grad_norm": 1.900342583656311, + "learning_rate": 8.09079510474301e-07, + "loss": 0.7238, + "step": 41056 + }, + { + "epoch": 0.821900257738408, + "grad_norm": 1.9038115739822388, + "learning_rate": 8.089027139571254e-07, + "loss": 0.7393, + "step": 41057 + }, + { + "epoch": 0.8219202762555363, + "grad_norm": 1.9261205196380615, + "learning_rate": 8.087259350584209e-07, + "loss": 0.6941, + "step": 41058 + }, + { + "epoch": 0.8219402947726647, + "grad_norm": 1.1410939693450928, + "learning_rate": 8.085491737789292e-07, + "loss": 0.3017, + "step": 41059 + }, + { + "epoch": 0.821960313289793, + "grad_norm": 1.1225600242614746, + "learning_rate": 8.083724301193963e-07, + "loss": 0.2966, + "step": 41060 + }, + { + "epoch": 0.8219803318069214, + "grad_norm": 1.1317472457885742, + "learning_rate": 8.081957040805632e-07, + "loss": 0.264, + "step": 41061 + }, + { + "epoch": 0.8220003503240497, + "grad_norm": 1.1167789697647095, + "learning_rate": 8.080189956631734e-07, + "loss": 0.2648, + "step": 41062 + }, + { + "epoch": 0.8220203688411781, + "grad_norm": 1.0420968532562256, + "learning_rate": 8.078423048679685e-07, + "loss": 0.2935, + "step": 41063 + }, + { + "epoch": 0.8220403873583064, + "grad_norm": 1.1457395553588867, + "learning_rate": 8.076656316956937e-07, + "loss": 0.2624, + "step": 41064 + }, + { + "epoch": 0.8220604058754348, + "grad_norm": 1.131595253944397, + "learning_rate": 8.0748897614709e-07, + "loss": 0.2786, + "step": 41065 + }, + { + "epoch": 0.8220804243925631, + "grad_norm": 1.0385463237762451, + "learning_rate": 8.073123382228997e-07, + "loss": 0.272, + "step": 41066 + }, + { + "epoch": 0.8221004429096914, + "grad_norm": 1.1924256086349487, + "learning_rate": 8.07135717923867e-07, + "loss": 0.3097, + "step": 41067 + }, + { + "epoch": 0.8221204614268198, + "grad_norm": 1.2048466205596924, + "learning_rate": 8.069591152507322e-07, + "loss": 0.3523, + "step": 41068 + }, + { + "epoch": 0.8221404799439481, + "grad_norm": 1.1417219638824463, + "learning_rate": 8.067825302042404e-07, + "loss": 0.2681, + "step": 41069 + }, + { + "epoch": 0.8221604984610765, + "grad_norm": 1.279915452003479, + "learning_rate": 8.06605962785132e-07, + "loss": 0.3237, + "step": 41070 + }, + { + "epoch": 0.8221805169782048, + "grad_norm": 1.040624737739563, + "learning_rate": 8.064294129941503e-07, + "loss": 0.2859, + "step": 41071 + }, + { + "epoch": 0.8222005354953332, + "grad_norm": 1.044464111328125, + "learning_rate": 8.062528808320353e-07, + "loss": 0.2568, + "step": 41072 + }, + { + "epoch": 0.8222205540124615, + "grad_norm": 1.1530451774597168, + "learning_rate": 8.06076366299532e-07, + "loss": 0.29, + "step": 41073 + }, + { + "epoch": 0.8222405725295898, + "grad_norm": 1.2189418077468872, + "learning_rate": 8.058998693973808e-07, + "loss": 0.3213, + "step": 41074 + }, + { + "epoch": 0.8222605910467182, + "grad_norm": 1.1815950870513916, + "learning_rate": 8.05723390126324e-07, + "loss": 0.294, + "step": 41075 + }, + { + "epoch": 0.8222806095638465, + "grad_norm": 2.0721631050109863, + "learning_rate": 8.055469284871026e-07, + "loss": 0.7855, + "step": 41076 + }, + { + "epoch": 0.8223006280809749, + "grad_norm": 1.1756404638290405, + "learning_rate": 8.0537048448046e-07, + "loss": 0.3085, + "step": 41077 + }, + { + "epoch": 0.8223206465981032, + "grad_norm": 1.07863450050354, + "learning_rate": 8.051940581071377e-07, + "loss": 0.3044, + "step": 41078 + }, + { + "epoch": 0.8223406651152316, + "grad_norm": 1.9115746021270752, + "learning_rate": 8.050176493678752e-07, + "loss": 0.7498, + "step": 41079 + }, + { + "epoch": 0.8223606836323599, + "grad_norm": 1.1248793601989746, + "learning_rate": 8.048412582634175e-07, + "loss": 0.3347, + "step": 41080 + }, + { + "epoch": 0.8223807021494883, + "grad_norm": 1.939950704574585, + "learning_rate": 8.046648847945027e-07, + "loss": 0.7975, + "step": 41081 + }, + { + "epoch": 0.8224007206666166, + "grad_norm": 1.2302922010421753, + "learning_rate": 8.044885289618753e-07, + "loss": 0.2883, + "step": 41082 + }, + { + "epoch": 0.8224207391837449, + "grad_norm": 1.2081918716430664, + "learning_rate": 8.043121907662749e-07, + "loss": 0.2968, + "step": 41083 + }, + { + "epoch": 0.8224407577008733, + "grad_norm": 1.2120921611785889, + "learning_rate": 8.041358702084434e-07, + "loss": 0.2945, + "step": 41084 + }, + { + "epoch": 0.8224607762180016, + "grad_norm": 1.0763583183288574, + "learning_rate": 8.039595672891204e-07, + "loss": 0.2478, + "step": 41085 + }, + { + "epoch": 0.82248079473513, + "grad_norm": 1.1935877799987793, + "learning_rate": 8.037832820090497e-07, + "loss": 0.3028, + "step": 41086 + }, + { + "epoch": 0.8225008132522583, + "grad_norm": 1.224122166633606, + "learning_rate": 8.036070143689711e-07, + "loss": 0.3637, + "step": 41087 + }, + { + "epoch": 0.8225208317693867, + "grad_norm": 1.1710703372955322, + "learning_rate": 8.034307643696254e-07, + "loss": 0.2868, + "step": 41088 + }, + { + "epoch": 0.822540850286515, + "grad_norm": 1.2600667476654053, + "learning_rate": 8.032545320117536e-07, + "loss": 0.2819, + "step": 41089 + }, + { + "epoch": 0.8225608688036433, + "grad_norm": 1.2075175046920776, + "learning_rate": 8.030783172960954e-07, + "loss": 0.2601, + "step": 41090 + }, + { + "epoch": 0.8225808873207717, + "grad_norm": 1.0584886074066162, + "learning_rate": 8.029021202233944e-07, + "loss": 0.2935, + "step": 41091 + }, + { + "epoch": 0.8226009058379, + "grad_norm": 1.2038524150848389, + "learning_rate": 8.027259407943883e-07, + "loss": 0.2375, + "step": 41092 + }, + { + "epoch": 0.8226209243550284, + "grad_norm": 1.0206801891326904, + "learning_rate": 8.025497790098196e-07, + "loss": 0.2534, + "step": 41093 + }, + { + "epoch": 0.8226409428721567, + "grad_norm": 1.1006457805633545, + "learning_rate": 8.023736348704281e-07, + "loss": 0.299, + "step": 41094 + }, + { + "epoch": 0.8226609613892851, + "grad_norm": 1.0715091228485107, + "learning_rate": 8.021975083769551e-07, + "loss": 0.3004, + "step": 41095 + }, + { + "epoch": 0.8226809799064134, + "grad_norm": 1.307420015335083, + "learning_rate": 8.020213995301407e-07, + "loss": 0.3016, + "step": 41096 + }, + { + "epoch": 0.8227009984235418, + "grad_norm": 1.1384817361831665, + "learning_rate": 8.018453083307242e-07, + "loss": 0.3159, + "step": 41097 + }, + { + "epoch": 0.8227210169406701, + "grad_norm": 1.0943830013275146, + "learning_rate": 8.016692347794464e-07, + "loss": 0.2973, + "step": 41098 + }, + { + "epoch": 0.8227410354577984, + "grad_norm": 1.026963472366333, + "learning_rate": 8.01493178877048e-07, + "loss": 0.3179, + "step": 41099 + }, + { + "epoch": 0.8227610539749268, + "grad_norm": 1.859194278717041, + "learning_rate": 8.013171406242692e-07, + "loss": 0.732, + "step": 41100 + }, + { + "epoch": 0.8227810724920551, + "grad_norm": 1.2297909259796143, + "learning_rate": 8.0114112002185e-07, + "loss": 0.2794, + "step": 41101 + }, + { + "epoch": 0.8228010910091835, + "grad_norm": 1.139810562133789, + "learning_rate": 8.009651170705291e-07, + "loss": 0.3156, + "step": 41102 + }, + { + "epoch": 0.8228211095263118, + "grad_norm": 1.0937838554382324, + "learning_rate": 8.007891317710465e-07, + "loss": 0.2986, + "step": 41103 + }, + { + "epoch": 0.8228411280434402, + "grad_norm": 1.106397271156311, + "learning_rate": 8.006131641241438e-07, + "loss": 0.3118, + "step": 41104 + }, + { + "epoch": 0.8228611465605685, + "grad_norm": 1.8623987436294556, + "learning_rate": 8.004372141305594e-07, + "loss": 0.7134, + "step": 41105 + }, + { + "epoch": 0.8228811650776968, + "grad_norm": 1.0839647054672241, + "learning_rate": 8.002612817910321e-07, + "loss": 0.2715, + "step": 41106 + }, + { + "epoch": 0.8229011835948252, + "grad_norm": 2.086945056915283, + "learning_rate": 8.000853671063025e-07, + "loss": 0.7147, + "step": 41107 + }, + { + "epoch": 0.8229212021119535, + "grad_norm": 1.1895822286605835, + "learning_rate": 7.999094700771116e-07, + "loss": 0.3066, + "step": 41108 + }, + { + "epoch": 0.8229412206290819, + "grad_norm": 1.239829421043396, + "learning_rate": 7.997335907041975e-07, + "loss": 0.3583, + "step": 41109 + }, + { + "epoch": 0.8229612391462102, + "grad_norm": 1.0756464004516602, + "learning_rate": 7.995577289882994e-07, + "loss": 0.2877, + "step": 41110 + }, + { + "epoch": 0.8229812576633386, + "grad_norm": 1.2059537172317505, + "learning_rate": 7.993818849301566e-07, + "loss": 0.3377, + "step": 41111 + }, + { + "epoch": 0.8230012761804669, + "grad_norm": 1.2815001010894775, + "learning_rate": 7.992060585305073e-07, + "loss": 0.2598, + "step": 41112 + }, + { + "epoch": 0.8230212946975953, + "grad_norm": 1.2239727973937988, + "learning_rate": 7.990302497900926e-07, + "loss": 0.2857, + "step": 41113 + }, + { + "epoch": 0.8230413132147236, + "grad_norm": 1.870288372039795, + "learning_rate": 7.988544587096508e-07, + "loss": 0.7658, + "step": 41114 + }, + { + "epoch": 0.8230613317318519, + "grad_norm": 1.1527669429779053, + "learning_rate": 7.986786852899209e-07, + "loss": 0.3429, + "step": 41115 + }, + { + "epoch": 0.8230813502489803, + "grad_norm": 1.1141972541809082, + "learning_rate": 7.985029295316404e-07, + "loss": 0.3252, + "step": 41116 + }, + { + "epoch": 0.8231013687661086, + "grad_norm": 1.1476410627365112, + "learning_rate": 7.983271914355506e-07, + "loss": 0.3031, + "step": 41117 + }, + { + "epoch": 0.823121387283237, + "grad_norm": 1.902109980583191, + "learning_rate": 7.981514710023891e-07, + "loss": 0.7444, + "step": 41118 + }, + { + "epoch": 0.8231414058003653, + "grad_norm": 1.0368692874908447, + "learning_rate": 7.979757682328931e-07, + "loss": 0.298, + "step": 41119 + }, + { + "epoch": 0.8231614243174937, + "grad_norm": 1.035689115524292, + "learning_rate": 7.978000831278043e-07, + "loss": 0.2448, + "step": 41120 + }, + { + "epoch": 0.823181442834622, + "grad_norm": 1.0052473545074463, + "learning_rate": 7.976244156878582e-07, + "loss": 0.243, + "step": 41121 + }, + { + "epoch": 0.8232014613517503, + "grad_norm": 1.0573809146881104, + "learning_rate": 7.974487659137958e-07, + "loss": 0.3106, + "step": 41122 + }, + { + "epoch": 0.8232214798688787, + "grad_norm": 1.2154433727264404, + "learning_rate": 7.97273133806355e-07, + "loss": 0.2903, + "step": 41123 + }, + { + "epoch": 0.823241498386007, + "grad_norm": 1.059944987297058, + "learning_rate": 7.970975193662728e-07, + "loss": 0.2609, + "step": 41124 + }, + { + "epoch": 0.8232615169031354, + "grad_norm": 1.1176117658615112, + "learning_rate": 7.969219225942871e-07, + "loss": 0.3189, + "step": 41125 + }, + { + "epoch": 0.8232815354202637, + "grad_norm": 1.0381059646606445, + "learning_rate": 7.967463434911382e-07, + "loss": 0.2712, + "step": 41126 + }, + { + "epoch": 0.8233015539373921, + "grad_norm": 1.0177689790725708, + "learning_rate": 7.965707820575635e-07, + "loss": 0.3344, + "step": 41127 + }, + { + "epoch": 0.8233215724545204, + "grad_norm": 1.069163203239441, + "learning_rate": 7.963952382943007e-07, + "loss": 0.3322, + "step": 41128 + }, + { + "epoch": 0.8233415909716488, + "grad_norm": 1.0031989812850952, + "learning_rate": 7.962197122020865e-07, + "loss": 0.3004, + "step": 41129 + }, + { + "epoch": 0.8233616094887771, + "grad_norm": 1.1743918657302856, + "learning_rate": 7.960442037816607e-07, + "loss": 0.278, + "step": 41130 + }, + { + "epoch": 0.8233816280059054, + "grad_norm": 1.122138500213623, + "learning_rate": 7.958687130337606e-07, + "loss": 0.2864, + "step": 41131 + }, + { + "epoch": 0.8234016465230338, + "grad_norm": 1.0460383892059326, + "learning_rate": 7.956932399591227e-07, + "loss": 0.2745, + "step": 41132 + }, + { + "epoch": 0.8234216650401621, + "grad_norm": 0.9967191219329834, + "learning_rate": 7.955177845584866e-07, + "loss": 0.2481, + "step": 41133 + }, + { + "epoch": 0.8234416835572905, + "grad_norm": 1.1382710933685303, + "learning_rate": 7.953423468325877e-07, + "loss": 0.2967, + "step": 41134 + }, + { + "epoch": 0.8234617020744188, + "grad_norm": 1.1104944944381714, + "learning_rate": 7.95166926782166e-07, + "loss": 0.2955, + "step": 41135 + }, + { + "epoch": 0.8234817205915472, + "grad_norm": 0.9689499735832214, + "learning_rate": 7.949915244079576e-07, + "loss": 0.2695, + "step": 41136 + }, + { + "epoch": 0.8235017391086755, + "grad_norm": 1.0971206426620483, + "learning_rate": 7.948161397107001e-07, + "loss": 0.279, + "step": 41137 + }, + { + "epoch": 0.8235217576258038, + "grad_norm": 1.098484992980957, + "learning_rate": 7.946407726911293e-07, + "loss": 0.2298, + "step": 41138 + }, + { + "epoch": 0.8235417761429322, + "grad_norm": 1.9792544841766357, + "learning_rate": 7.944654233499843e-07, + "loss": 0.7698, + "step": 41139 + }, + { + "epoch": 0.8235617946600605, + "grad_norm": 1.0602095127105713, + "learning_rate": 7.942900916880025e-07, + "loss": 0.2634, + "step": 41140 + }, + { + "epoch": 0.8235818131771889, + "grad_norm": 1.0079848766326904, + "learning_rate": 7.941147777059193e-07, + "loss": 0.3059, + "step": 41141 + }, + { + "epoch": 0.8236018316943172, + "grad_norm": 1.068855881690979, + "learning_rate": 7.939394814044732e-07, + "loss": 0.2986, + "step": 41142 + }, + { + "epoch": 0.8236218502114456, + "grad_norm": 1.1940481662750244, + "learning_rate": 7.93764202784399e-07, + "loss": 0.2708, + "step": 41143 + }, + { + "epoch": 0.8236418687285739, + "grad_norm": 1.1256071329116821, + "learning_rate": 7.935889418464354e-07, + "loss": 0.262, + "step": 41144 + }, + { + "epoch": 0.8236618872457023, + "grad_norm": 1.2198724746704102, + "learning_rate": 7.934136985913182e-07, + "loss": 0.3203, + "step": 41145 + }, + { + "epoch": 0.8236819057628306, + "grad_norm": 1.0962940454483032, + "learning_rate": 7.932384730197856e-07, + "loss": 0.2731, + "step": 41146 + }, + { + "epoch": 0.8237019242799589, + "grad_norm": 1.0126816034317017, + "learning_rate": 7.93063265132572e-07, + "loss": 0.2421, + "step": 41147 + }, + { + "epoch": 0.8237219427970873, + "grad_norm": 1.1916389465332031, + "learning_rate": 7.928880749304164e-07, + "loss": 0.3139, + "step": 41148 + }, + { + "epoch": 0.8237419613142156, + "grad_norm": 1.0691041946411133, + "learning_rate": 7.927129024140534e-07, + "loss": 0.2948, + "step": 41149 + }, + { + "epoch": 0.823761979831344, + "grad_norm": 1.2455856800079346, + "learning_rate": 7.925377475842205e-07, + "loss": 0.3132, + "step": 41150 + }, + { + "epoch": 0.8237819983484723, + "grad_norm": 1.123445749282837, + "learning_rate": 7.923626104416532e-07, + "loss": 0.3739, + "step": 41151 + }, + { + "epoch": 0.8238020168656007, + "grad_norm": 1.1605218648910522, + "learning_rate": 7.921874909870869e-07, + "loss": 0.2952, + "step": 41152 + }, + { + "epoch": 0.823822035382729, + "grad_norm": 1.3738641738891602, + "learning_rate": 7.920123892212595e-07, + "loss": 0.2727, + "step": 41153 + }, + { + "epoch": 0.8238420538998573, + "grad_norm": 1.915685772895813, + "learning_rate": 7.918373051449069e-07, + "loss": 0.8073, + "step": 41154 + }, + { + "epoch": 0.8238620724169857, + "grad_norm": 0.9965869784355164, + "learning_rate": 7.916622387587647e-07, + "loss": 0.287, + "step": 41155 + }, + { + "epoch": 0.823882090934114, + "grad_norm": 1.0436187982559204, + "learning_rate": 7.914871900635679e-07, + "loss": 0.3026, + "step": 41156 + }, + { + "epoch": 0.8239021094512424, + "grad_norm": 0.9637020230293274, + "learning_rate": 7.913121590600542e-07, + "loss": 0.2423, + "step": 41157 + }, + { + "epoch": 0.8239221279683707, + "grad_norm": 1.291487693786621, + "learning_rate": 7.911371457489569e-07, + "loss": 0.3225, + "step": 41158 + }, + { + "epoch": 0.8239421464854991, + "grad_norm": 1.9750412702560425, + "learning_rate": 7.90962150131015e-07, + "loss": 0.749, + "step": 41159 + }, + { + "epoch": 0.8239621650026274, + "grad_norm": 1.1156493425369263, + "learning_rate": 7.90787172206961e-07, + "loss": 0.328, + "step": 41160 + }, + { + "epoch": 0.8239821835197558, + "grad_norm": 1.0320535898208618, + "learning_rate": 7.906122119775333e-07, + "loss": 0.2674, + "step": 41161 + }, + { + "epoch": 0.8240022020368841, + "grad_norm": 1.0207479000091553, + "learning_rate": 7.904372694434659e-07, + "loss": 0.2783, + "step": 41162 + }, + { + "epoch": 0.8240222205540124, + "grad_norm": 1.1925227642059326, + "learning_rate": 7.902623446054942e-07, + "loss": 0.3075, + "step": 41163 + }, + { + "epoch": 0.8240422390711408, + "grad_norm": 1.1227787733078003, + "learning_rate": 7.900874374643535e-07, + "loss": 0.3238, + "step": 41164 + }, + { + "epoch": 0.8240622575882691, + "grad_norm": 1.1013803482055664, + "learning_rate": 7.899125480207781e-07, + "loss": 0.2762, + "step": 41165 + }, + { + "epoch": 0.8240822761053975, + "grad_norm": 1.0442900657653809, + "learning_rate": 7.897376762755061e-07, + "loss": 0.2954, + "step": 41166 + }, + { + "epoch": 0.8241022946225258, + "grad_norm": 1.071308970451355, + "learning_rate": 7.895628222292701e-07, + "loss": 0.3078, + "step": 41167 + }, + { + "epoch": 0.8241223131396542, + "grad_norm": 1.0797693729400635, + "learning_rate": 7.893879858828063e-07, + "loss": 0.282, + "step": 41168 + }, + { + "epoch": 0.8241423316567825, + "grad_norm": 1.1372249126434326, + "learning_rate": 7.89213167236848e-07, + "loss": 0.2572, + "step": 41169 + }, + { + "epoch": 0.8241623501739108, + "grad_norm": 0.9332150220870972, + "learning_rate": 7.890383662921325e-07, + "loss": 0.2478, + "step": 41170 + }, + { + "epoch": 0.8241823686910392, + "grad_norm": 1.2400981187820435, + "learning_rate": 7.888635830493924e-07, + "loss": 0.3075, + "step": 41171 + }, + { + "epoch": 0.8242023872081675, + "grad_norm": 1.899087905883789, + "learning_rate": 7.886888175093644e-07, + "loss": 0.7659, + "step": 41172 + }, + { + "epoch": 0.8242224057252959, + "grad_norm": 1.0710463523864746, + "learning_rate": 7.885140696727828e-07, + "loss": 0.2733, + "step": 41173 + }, + { + "epoch": 0.8242424242424242, + "grad_norm": 1.0079926252365112, + "learning_rate": 7.883393395403805e-07, + "loss": 0.2932, + "step": 41174 + }, + { + "epoch": 0.8242624427595526, + "grad_norm": 1.1917717456817627, + "learning_rate": 7.88164627112894e-07, + "loss": 0.3105, + "step": 41175 + }, + { + "epoch": 0.8242824612766809, + "grad_norm": 1.246786117553711, + "learning_rate": 7.879899323910572e-07, + "loss": 0.3209, + "step": 41176 + }, + { + "epoch": 0.8243024797938093, + "grad_norm": 1.3062635660171509, + "learning_rate": 7.878152553756047e-07, + "loss": 0.2868, + "step": 41177 + }, + { + "epoch": 0.8243224983109376, + "grad_norm": 1.8140403032302856, + "learning_rate": 7.87640596067269e-07, + "loss": 0.6878, + "step": 41178 + }, + { + "epoch": 0.8243425168280659, + "grad_norm": 1.9346626996994019, + "learning_rate": 7.874659544667868e-07, + "loss": 0.7101, + "step": 41179 + }, + { + "epoch": 0.8243625353451943, + "grad_norm": 1.1522549390792847, + "learning_rate": 7.872913305748909e-07, + "loss": 0.3472, + "step": 41180 + }, + { + "epoch": 0.8243825538623226, + "grad_norm": 1.1830722093582153, + "learning_rate": 7.871167243923162e-07, + "loss": 0.3338, + "step": 41181 + }, + { + "epoch": 0.824402572379451, + "grad_norm": 1.8619731664657593, + "learning_rate": 7.869421359197943e-07, + "loss": 0.8407, + "step": 41182 + }, + { + "epoch": 0.8244225908965793, + "grad_norm": 1.0729179382324219, + "learning_rate": 7.86767565158063e-07, + "loss": 0.2737, + "step": 41183 + }, + { + "epoch": 0.8244426094137077, + "grad_norm": 1.1657143831253052, + "learning_rate": 7.865930121078525e-07, + "loss": 0.3206, + "step": 41184 + }, + { + "epoch": 0.824462627930836, + "grad_norm": 1.0915089845657349, + "learning_rate": 7.864184767698996e-07, + "loss": 0.2652, + "step": 41185 + }, + { + "epoch": 0.8244826464479643, + "grad_norm": 1.0896135568618774, + "learning_rate": 7.862439591449366e-07, + "loss": 0.2973, + "step": 41186 + }, + { + "epoch": 0.8245026649650927, + "grad_norm": 1.0613369941711426, + "learning_rate": 7.860694592336959e-07, + "loss": 0.3006, + "step": 41187 + }, + { + "epoch": 0.824522683482221, + "grad_norm": 1.1524418592453003, + "learning_rate": 7.858949770369134e-07, + "loss": 0.2925, + "step": 41188 + }, + { + "epoch": 0.8245427019993494, + "grad_norm": 1.1100802421569824, + "learning_rate": 7.857205125553219e-07, + "loss": 0.2894, + "step": 41189 + }, + { + "epoch": 0.8245627205164777, + "grad_norm": 1.8140008449554443, + "learning_rate": 7.85546065789654e-07, + "loss": 0.6833, + "step": 41190 + }, + { + "epoch": 0.8245827390336061, + "grad_norm": 1.1586601734161377, + "learning_rate": 7.853716367406428e-07, + "loss": 0.356, + "step": 41191 + }, + { + "epoch": 0.8246027575507344, + "grad_norm": 1.052551031112671, + "learning_rate": 7.851972254090234e-07, + "loss": 0.3273, + "step": 41192 + }, + { + "epoch": 0.8246227760678628, + "grad_norm": 1.0233584642410278, + "learning_rate": 7.850228317955278e-07, + "loss": 0.2316, + "step": 41193 + }, + { + "epoch": 0.8246427945849911, + "grad_norm": 1.127558708190918, + "learning_rate": 7.848484559008885e-07, + "loss": 0.283, + "step": 41194 + }, + { + "epoch": 0.8246628131021194, + "grad_norm": 0.9696788787841797, + "learning_rate": 7.846740977258388e-07, + "loss": 0.2645, + "step": 41195 + }, + { + "epoch": 0.8246828316192478, + "grad_norm": 1.1945576667785645, + "learning_rate": 7.844997572711127e-07, + "loss": 0.3377, + "step": 41196 + }, + { + "epoch": 0.8247028501363761, + "grad_norm": 1.2365553379058838, + "learning_rate": 7.843254345374412e-07, + "loss": 0.3739, + "step": 41197 + }, + { + "epoch": 0.8247228686535045, + "grad_norm": 1.185776948928833, + "learning_rate": 7.841511295255599e-07, + "loss": 0.2942, + "step": 41198 + }, + { + "epoch": 0.8247428871706328, + "grad_norm": 1.133462905883789, + "learning_rate": 7.839768422361999e-07, + "loss": 0.3148, + "step": 41199 + }, + { + "epoch": 0.8247629056877612, + "grad_norm": 1.172979712486267, + "learning_rate": 7.838025726700927e-07, + "loss": 0.3503, + "step": 41200 + }, + { + "epoch": 0.8247829242048895, + "grad_norm": 1.048181414604187, + "learning_rate": 7.836283208279738e-07, + "loss": 0.3046, + "step": 41201 + }, + { + "epoch": 0.8248029427220178, + "grad_norm": 1.0803412199020386, + "learning_rate": 7.834540867105733e-07, + "loss": 0.2934, + "step": 41202 + }, + { + "epoch": 0.8248229612391462, + "grad_norm": 1.1621506214141846, + "learning_rate": 7.832798703186251e-07, + "loss": 0.293, + "step": 41203 + }, + { + "epoch": 0.8248429797562745, + "grad_norm": 1.9245754480361938, + "learning_rate": 7.831056716528601e-07, + "loss": 0.753, + "step": 41204 + }, + { + "epoch": 0.8248629982734029, + "grad_norm": 1.0990043878555298, + "learning_rate": 7.829314907140106e-07, + "loss": 0.2786, + "step": 41205 + }, + { + "epoch": 0.8248830167905312, + "grad_norm": 1.0612471103668213, + "learning_rate": 7.827573275028111e-07, + "loss": 0.3025, + "step": 41206 + }, + { + "epoch": 0.8249030353076596, + "grad_norm": 1.246120810508728, + "learning_rate": 7.82583182019992e-07, + "loss": 0.2717, + "step": 41207 + }, + { + "epoch": 0.8249230538247879, + "grad_norm": 1.2593700885772705, + "learning_rate": 7.824090542662843e-07, + "loss": 0.3298, + "step": 41208 + }, + { + "epoch": 0.8249430723419163, + "grad_norm": 1.124548316001892, + "learning_rate": 7.822349442424227e-07, + "loss": 0.2928, + "step": 41209 + }, + { + "epoch": 0.8249630908590446, + "grad_norm": 1.1050677299499512, + "learning_rate": 7.820608519491363e-07, + "loss": 0.3192, + "step": 41210 + }, + { + "epoch": 0.8249831093761729, + "grad_norm": 1.2333106994628906, + "learning_rate": 7.818867773871591e-07, + "loss": 0.2946, + "step": 41211 + }, + { + "epoch": 0.8250031278933013, + "grad_norm": 1.1072441339492798, + "learning_rate": 7.817127205572228e-07, + "loss": 0.267, + "step": 41212 + }, + { + "epoch": 0.8250231464104296, + "grad_norm": 1.0869579315185547, + "learning_rate": 7.815386814600572e-07, + "loss": 0.298, + "step": 41213 + }, + { + "epoch": 0.825043164927558, + "grad_norm": 1.0751479864120483, + "learning_rate": 7.813646600963964e-07, + "loss": 0.2574, + "step": 41214 + }, + { + "epoch": 0.8250631834446863, + "grad_norm": 1.3847694396972656, + "learning_rate": 7.811906564669702e-07, + "loss": 0.304, + "step": 41215 + }, + { + "epoch": 0.8250832019618147, + "grad_norm": 1.1092190742492676, + "learning_rate": 7.810166705725114e-07, + "loss": 0.3088, + "step": 41216 + }, + { + "epoch": 0.825103220478943, + "grad_norm": 1.9758249521255493, + "learning_rate": 7.8084270241375e-07, + "loss": 0.76, + "step": 41217 + }, + { + "epoch": 0.8251232389960713, + "grad_norm": 1.0611952543258667, + "learning_rate": 7.806687519914168e-07, + "loss": 0.3116, + "step": 41218 + }, + { + "epoch": 0.8251432575131997, + "grad_norm": 1.0397847890853882, + "learning_rate": 7.804948193062456e-07, + "loss": 0.2847, + "step": 41219 + }, + { + "epoch": 0.825163276030328, + "grad_norm": 1.028928518295288, + "learning_rate": 7.803209043589655e-07, + "loss": 0.294, + "step": 41220 + }, + { + "epoch": 0.8251832945474564, + "grad_norm": 1.135474443435669, + "learning_rate": 7.801470071503076e-07, + "loss": 0.2757, + "step": 41221 + }, + { + "epoch": 0.8252033130645847, + "grad_norm": 1.180584192276001, + "learning_rate": 7.799731276810046e-07, + "loss": 0.2951, + "step": 41222 + }, + { + "epoch": 0.8252233315817131, + "grad_norm": 1.061000943183899, + "learning_rate": 7.797992659517855e-07, + "loss": 0.2709, + "step": 41223 + }, + { + "epoch": 0.8252433500988414, + "grad_norm": 1.0399951934814453, + "learning_rate": 7.796254219633831e-07, + "loss": 0.275, + "step": 41224 + }, + { + "epoch": 0.8252633686159697, + "grad_norm": 1.092430591583252, + "learning_rate": 7.794515957165266e-07, + "loss": 0.2591, + "step": 41225 + }, + { + "epoch": 0.8252833871330981, + "grad_norm": 1.1767374277114868, + "learning_rate": 7.792777872119484e-07, + "loss": 0.3176, + "step": 41226 + }, + { + "epoch": 0.8253034056502264, + "grad_norm": 1.1121246814727783, + "learning_rate": 7.791039964503761e-07, + "loss": 0.2853, + "step": 41227 + }, + { + "epoch": 0.8253234241673548, + "grad_norm": 1.055038332939148, + "learning_rate": 7.789302234325435e-07, + "loss": 0.2945, + "step": 41228 + }, + { + "epoch": 0.8253434426844831, + "grad_norm": 1.1223573684692383, + "learning_rate": 7.787564681591803e-07, + "loss": 0.2969, + "step": 41229 + }, + { + "epoch": 0.8253634612016115, + "grad_norm": 1.156008005142212, + "learning_rate": 7.785827306310162e-07, + "loss": 0.2765, + "step": 41230 + }, + { + "epoch": 0.8253834797187398, + "grad_norm": 1.0795581340789795, + "learning_rate": 7.784090108487802e-07, + "loss": 0.2897, + "step": 41231 + }, + { + "epoch": 0.8254034982358682, + "grad_norm": 1.1036711931228638, + "learning_rate": 7.78235308813206e-07, + "loss": 0.2897, + "step": 41232 + }, + { + "epoch": 0.8254235167529965, + "grad_norm": 1.1134401559829712, + "learning_rate": 7.780616245250222e-07, + "loss": 0.2664, + "step": 41233 + }, + { + "epoch": 0.8254435352701248, + "grad_norm": 1.0283600091934204, + "learning_rate": 7.778879579849568e-07, + "loss": 0.3049, + "step": 41234 + }, + { + "epoch": 0.8254635537872532, + "grad_norm": 1.2558295726776123, + "learning_rate": 7.777143091937439e-07, + "loss": 0.3489, + "step": 41235 + }, + { + "epoch": 0.8254835723043815, + "grad_norm": 1.0663834810256958, + "learning_rate": 7.775406781521095e-07, + "loss": 0.3176, + "step": 41236 + }, + { + "epoch": 0.8255035908215099, + "grad_norm": 1.052668809890747, + "learning_rate": 7.773670648607868e-07, + "loss": 0.3205, + "step": 41237 + }, + { + "epoch": 0.8255236093386382, + "grad_norm": 1.1507517099380493, + "learning_rate": 7.771934693205046e-07, + "loss": 0.3102, + "step": 41238 + }, + { + "epoch": 0.8255436278557666, + "grad_norm": 1.0268628597259521, + "learning_rate": 7.77019891531992e-07, + "loss": 0.3042, + "step": 41239 + }, + { + "epoch": 0.8255636463728949, + "grad_norm": 1.0360294580459595, + "learning_rate": 7.768463314959779e-07, + "loss": 0.3323, + "step": 41240 + }, + { + "epoch": 0.8255836648900232, + "grad_norm": 1.1590690612792969, + "learning_rate": 7.766727892131936e-07, + "loss": 0.3443, + "step": 41241 + }, + { + "epoch": 0.8256036834071516, + "grad_norm": 1.143736720085144, + "learning_rate": 7.764992646843688e-07, + "loss": 0.3166, + "step": 41242 + }, + { + "epoch": 0.8256237019242799, + "grad_norm": 2.031613349914551, + "learning_rate": 7.763257579102318e-07, + "loss": 0.809, + "step": 41243 + }, + { + "epoch": 0.8256437204414083, + "grad_norm": 1.0578209161758423, + "learning_rate": 7.761522688915113e-07, + "loss": 0.2577, + "step": 41244 + }, + { + "epoch": 0.8256637389585366, + "grad_norm": 1.0295143127441406, + "learning_rate": 7.75978797628939e-07, + "loss": 0.2946, + "step": 41245 + }, + { + "epoch": 0.825683757475665, + "grad_norm": 1.7386566400527954, + "learning_rate": 7.758053441232427e-07, + "loss": 0.7159, + "step": 41246 + }, + { + "epoch": 0.8257037759927933, + "grad_norm": 1.1090184450149536, + "learning_rate": 7.756319083751501e-07, + "loss": 0.2877, + "step": 41247 + }, + { + "epoch": 0.8257237945099217, + "grad_norm": 2.13572096824646, + "learning_rate": 7.754584903853934e-07, + "loss": 0.7808, + "step": 41248 + }, + { + "epoch": 0.82574381302705, + "grad_norm": 1.0754629373550415, + "learning_rate": 7.752850901546987e-07, + "loss": 0.3217, + "step": 41249 + }, + { + "epoch": 0.8257638315441783, + "grad_norm": 0.9527814388275146, + "learning_rate": 7.751117076837977e-07, + "loss": 0.2218, + "step": 41250 + }, + { + "epoch": 0.8257838500613067, + "grad_norm": 1.094495415687561, + "learning_rate": 7.74938342973418e-07, + "loss": 0.2838, + "step": 41251 + }, + { + "epoch": 0.825803868578435, + "grad_norm": 1.0057605504989624, + "learning_rate": 7.747649960242881e-07, + "loss": 0.2281, + "step": 41252 + }, + { + "epoch": 0.8258238870955634, + "grad_norm": 0.9675562977790833, + "learning_rate": 7.745916668371356e-07, + "loss": 0.2596, + "step": 41253 + }, + { + "epoch": 0.8258439056126917, + "grad_norm": 1.1396411657333374, + "learning_rate": 7.744183554126916e-07, + "loss": 0.278, + "step": 41254 + }, + { + "epoch": 0.8258639241298201, + "grad_norm": 1.1436256170272827, + "learning_rate": 7.74245061751684e-07, + "loss": 0.2685, + "step": 41255 + }, + { + "epoch": 0.8258839426469484, + "grad_norm": 1.1913925409317017, + "learning_rate": 7.740717858548402e-07, + "loss": 0.3113, + "step": 41256 + }, + { + "epoch": 0.8259039611640767, + "grad_norm": 1.3015782833099365, + "learning_rate": 7.738985277228894e-07, + "loss": 0.3157, + "step": 41257 + }, + { + "epoch": 0.8259239796812051, + "grad_norm": 1.8536518812179565, + "learning_rate": 7.737252873565581e-07, + "loss": 0.7255, + "step": 41258 + }, + { + "epoch": 0.8259439981983334, + "grad_norm": 1.1942881345748901, + "learning_rate": 7.73552064756578e-07, + "loss": 0.3036, + "step": 41259 + }, + { + "epoch": 0.8259640167154618, + "grad_norm": 1.8392333984375, + "learning_rate": 7.733788599236741e-07, + "loss": 0.7928, + "step": 41260 + }, + { + "epoch": 0.8259840352325901, + "grad_norm": 1.315273642539978, + "learning_rate": 7.732056728585768e-07, + "loss": 0.2891, + "step": 41261 + }, + { + "epoch": 0.8260040537497185, + "grad_norm": 1.2006779909133911, + "learning_rate": 7.730325035620123e-07, + "loss": 0.3122, + "step": 41262 + }, + { + "epoch": 0.8260240722668468, + "grad_norm": 1.0621896982192993, + "learning_rate": 7.728593520347105e-07, + "loss": 0.2865, + "step": 41263 + }, + { + "epoch": 0.8260440907839752, + "grad_norm": 1.1550159454345703, + "learning_rate": 7.726862182773986e-07, + "loss": 0.2828, + "step": 41264 + }, + { + "epoch": 0.8260641093011035, + "grad_norm": 1.0691440105438232, + "learning_rate": 7.725131022908039e-07, + "loss": 0.3222, + "step": 41265 + }, + { + "epoch": 0.8260841278182318, + "grad_norm": 1.0842515230178833, + "learning_rate": 7.723400040756546e-07, + "loss": 0.3181, + "step": 41266 + }, + { + "epoch": 0.8261041463353602, + "grad_norm": 1.203224778175354, + "learning_rate": 7.721669236326767e-07, + "loss": 0.2666, + "step": 41267 + }, + { + "epoch": 0.8261241648524885, + "grad_norm": 1.0557583570480347, + "learning_rate": 7.719938609626005e-07, + "loss": 0.2857, + "step": 41268 + }, + { + "epoch": 0.8261441833696169, + "grad_norm": 1.0577815771102905, + "learning_rate": 7.718208160661522e-07, + "loss": 0.3079, + "step": 41269 + }, + { + "epoch": 0.8261642018867452, + "grad_norm": 1.1795815229415894, + "learning_rate": 7.716477889440594e-07, + "loss": 0.3205, + "step": 41270 + }, + { + "epoch": 0.8261842204038736, + "grad_norm": 1.2709558010101318, + "learning_rate": 7.714747795970478e-07, + "loss": 0.2713, + "step": 41271 + }, + { + "epoch": 0.8262042389210019, + "grad_norm": 1.1328638792037964, + "learning_rate": 7.713017880258478e-07, + "loss": 0.3112, + "step": 41272 + }, + { + "epoch": 0.8262242574381302, + "grad_norm": 1.0939750671386719, + "learning_rate": 7.711288142311835e-07, + "loss": 0.304, + "step": 41273 + }, + { + "epoch": 0.8262442759552586, + "grad_norm": 1.2772659063339233, + "learning_rate": 7.70955858213785e-07, + "loss": 0.3005, + "step": 41274 + }, + { + "epoch": 0.8262642944723869, + "grad_norm": 1.1355475187301636, + "learning_rate": 7.707829199743766e-07, + "loss": 0.3086, + "step": 41275 + }, + { + "epoch": 0.8262843129895153, + "grad_norm": 1.1553744077682495, + "learning_rate": 7.706099995136885e-07, + "loss": 0.3186, + "step": 41276 + }, + { + "epoch": 0.8263043315066436, + "grad_norm": 1.167802333831787, + "learning_rate": 7.704370968324449e-07, + "loss": 0.3209, + "step": 41277 + }, + { + "epoch": 0.826324350023772, + "grad_norm": 1.1364496946334839, + "learning_rate": 7.702642119313742e-07, + "loss": 0.2631, + "step": 41278 + }, + { + "epoch": 0.8263443685409003, + "grad_norm": 1.2970614433288574, + "learning_rate": 7.700913448112019e-07, + "loss": 0.3216, + "step": 41279 + }, + { + "epoch": 0.8263643870580287, + "grad_norm": 1.2895156145095825, + "learning_rate": 7.699184954726546e-07, + "loss": 0.2874, + "step": 41280 + }, + { + "epoch": 0.826384405575157, + "grad_norm": 1.065955638885498, + "learning_rate": 7.697456639164608e-07, + "loss": 0.2532, + "step": 41281 + }, + { + "epoch": 0.8264044240922853, + "grad_norm": 1.188093900680542, + "learning_rate": 7.695728501433458e-07, + "loss": 0.2893, + "step": 41282 + }, + { + "epoch": 0.8264244426094137, + "grad_norm": 1.280593991279602, + "learning_rate": 7.694000541540359e-07, + "loss": 0.2809, + "step": 41283 + }, + { + "epoch": 0.826444461126542, + "grad_norm": 1.1437273025512695, + "learning_rate": 7.692272759492569e-07, + "loss": 0.2986, + "step": 41284 + }, + { + "epoch": 0.8264644796436704, + "grad_norm": 1.1660906076431274, + "learning_rate": 7.690545155297369e-07, + "loss": 0.319, + "step": 41285 + }, + { + "epoch": 0.8264844981607987, + "grad_norm": 1.1332610845565796, + "learning_rate": 7.688817728962e-07, + "loss": 0.2777, + "step": 41286 + }, + { + "epoch": 0.8265045166779271, + "grad_norm": 1.1583983898162842, + "learning_rate": 7.687090480493748e-07, + "loss": 0.2913, + "step": 41287 + }, + { + "epoch": 0.8265245351950554, + "grad_norm": 1.9360052347183228, + "learning_rate": 7.685363409899865e-07, + "loss": 0.7076, + "step": 41288 + }, + { + "epoch": 0.8265445537121837, + "grad_norm": 1.229572057723999, + "learning_rate": 7.683636517187592e-07, + "loss": 0.3704, + "step": 41289 + }, + { + "epoch": 0.8265645722293121, + "grad_norm": 1.1428806781768799, + "learning_rate": 7.681909802364218e-07, + "loss": 0.3023, + "step": 41290 + }, + { + "epoch": 0.8265845907464404, + "grad_norm": 1.249334692955017, + "learning_rate": 7.680183265436986e-07, + "loss": 0.3257, + "step": 41291 + }, + { + "epoch": 0.8266046092635688, + "grad_norm": 1.1048730611801147, + "learning_rate": 7.678456906413162e-07, + "loss": 0.3204, + "step": 41292 + }, + { + "epoch": 0.8266246277806971, + "grad_norm": 1.0672364234924316, + "learning_rate": 7.676730725299981e-07, + "loss": 0.2866, + "step": 41293 + }, + { + "epoch": 0.8266446462978255, + "grad_norm": 1.2308496236801147, + "learning_rate": 7.675004722104728e-07, + "loss": 0.2931, + "step": 41294 + }, + { + "epoch": 0.8266646648149538, + "grad_norm": 1.1183782815933228, + "learning_rate": 7.673278896834646e-07, + "loss": 0.3136, + "step": 41295 + }, + { + "epoch": 0.8266846833320822, + "grad_norm": 1.0153454542160034, + "learning_rate": 7.671553249496993e-07, + "loss": 0.2939, + "step": 41296 + }, + { + "epoch": 0.8267047018492105, + "grad_norm": 1.081506371498108, + "learning_rate": 7.66982778009901e-07, + "loss": 0.272, + "step": 41297 + }, + { + "epoch": 0.8267247203663388, + "grad_norm": 1.0122406482696533, + "learning_rate": 7.668102488647972e-07, + "loss": 0.2607, + "step": 41298 + }, + { + "epoch": 0.8267447388834672, + "grad_norm": 1.2291477918624878, + "learning_rate": 7.666377375151107e-07, + "loss": 0.3001, + "step": 41299 + }, + { + "epoch": 0.8267647574005955, + "grad_norm": 1.2629501819610596, + "learning_rate": 7.664652439615694e-07, + "loss": 0.2961, + "step": 41300 + }, + { + "epoch": 0.8267847759177239, + "grad_norm": 1.0959053039550781, + "learning_rate": 7.662927682048976e-07, + "loss": 0.2944, + "step": 41301 + }, + { + "epoch": 0.8268047944348522, + "grad_norm": 1.1455447673797607, + "learning_rate": 7.661203102458181e-07, + "loss": 0.2973, + "step": 41302 + }, + { + "epoch": 0.8268248129519806, + "grad_norm": 1.1007707118988037, + "learning_rate": 7.659478700850592e-07, + "loss": 0.3313, + "step": 41303 + }, + { + "epoch": 0.8268448314691089, + "grad_norm": 1.659843921661377, + "learning_rate": 7.657754477233442e-07, + "loss": 0.2589, + "step": 41304 + }, + { + "epoch": 0.8268648499862372, + "grad_norm": 0.9796315431594849, + "learning_rate": 7.656030431613982e-07, + "loss": 0.282, + "step": 41305 + }, + { + "epoch": 0.8268848685033656, + "grad_norm": 1.1701382398605347, + "learning_rate": 7.654306563999442e-07, + "loss": 0.2992, + "step": 41306 + }, + { + "epoch": 0.8269048870204939, + "grad_norm": 1.1333701610565186, + "learning_rate": 7.652582874397096e-07, + "loss": 0.3269, + "step": 41307 + }, + { + "epoch": 0.8269249055376223, + "grad_norm": 1.18690824508667, + "learning_rate": 7.650859362814183e-07, + "loss": 0.3088, + "step": 41308 + }, + { + "epoch": 0.8269449240547506, + "grad_norm": 1.0886133909225464, + "learning_rate": 7.649136029257942e-07, + "loss": 0.3617, + "step": 41309 + }, + { + "epoch": 0.826964942571879, + "grad_norm": 1.131913423538208, + "learning_rate": 7.647412873735605e-07, + "loss": 0.2933, + "step": 41310 + }, + { + "epoch": 0.8269849610890073, + "grad_norm": 1.1512796878814697, + "learning_rate": 7.645689896254444e-07, + "loss": 0.3077, + "step": 41311 + }, + { + "epoch": 0.8270049796061357, + "grad_norm": 1.1299257278442383, + "learning_rate": 7.643967096821669e-07, + "loss": 0.3138, + "step": 41312 + }, + { + "epoch": 0.827024998123264, + "grad_norm": 1.340466022491455, + "learning_rate": 7.64224447544456e-07, + "loss": 0.327, + "step": 41313 + }, + { + "epoch": 0.8270450166403923, + "grad_norm": 1.074938178062439, + "learning_rate": 7.640522032130343e-07, + "loss": 0.293, + "step": 41314 + }, + { + "epoch": 0.8270650351575207, + "grad_norm": 1.1897565126419067, + "learning_rate": 7.638799766886235e-07, + "loss": 0.2682, + "step": 41315 + }, + { + "epoch": 0.827085053674649, + "grad_norm": 1.2109675407409668, + "learning_rate": 7.637077679719512e-07, + "loss": 0.3204, + "step": 41316 + }, + { + "epoch": 0.8271050721917774, + "grad_norm": 1.0291413068771362, + "learning_rate": 7.635355770637404e-07, + "loss": 0.2732, + "step": 41317 + }, + { + "epoch": 0.8271250907089057, + "grad_norm": 1.8756706714630127, + "learning_rate": 7.633634039647137e-07, + "loss": 0.7059, + "step": 41318 + }, + { + "epoch": 0.8271451092260341, + "grad_norm": 1.2164199352264404, + "learning_rate": 7.631912486755955e-07, + "loss": 0.3355, + "step": 41319 + }, + { + "epoch": 0.8271651277431624, + "grad_norm": 1.0015212297439575, + "learning_rate": 7.630191111971086e-07, + "loss": 0.2681, + "step": 41320 + }, + { + "epoch": 0.8271851462602907, + "grad_norm": 1.0893205404281616, + "learning_rate": 7.628469915299786e-07, + "loss": 0.2893, + "step": 41321 + }, + { + "epoch": 0.8272051647774191, + "grad_norm": 1.1048492193222046, + "learning_rate": 7.626748896749275e-07, + "loss": 0.3327, + "step": 41322 + }, + { + "epoch": 0.8272251832945474, + "grad_norm": 1.211538553237915, + "learning_rate": 7.625028056326789e-07, + "loss": 0.2541, + "step": 41323 + }, + { + "epoch": 0.8272452018116758, + "grad_norm": 1.1410475969314575, + "learning_rate": 7.623307394039575e-07, + "loss": 0.2995, + "step": 41324 + }, + { + "epoch": 0.8272652203288041, + "grad_norm": 1.133372187614441, + "learning_rate": 7.621586909894846e-07, + "loss": 0.2641, + "step": 41325 + }, + { + "epoch": 0.8272852388459325, + "grad_norm": 2.002066135406494, + "learning_rate": 7.619866603899856e-07, + "loss": 0.764, + "step": 41326 + }, + { + "epoch": 0.8273052573630608, + "grad_norm": 1.0717220306396484, + "learning_rate": 7.618146476061828e-07, + "loss": 0.2644, + "step": 41327 + }, + { + "epoch": 0.8273252758801892, + "grad_norm": 1.0998351573944092, + "learning_rate": 7.616426526387977e-07, + "loss": 0.2995, + "step": 41328 + }, + { + "epoch": 0.8273452943973175, + "grad_norm": 1.0613187551498413, + "learning_rate": 7.614706754885559e-07, + "loss": 0.2927, + "step": 41329 + }, + { + "epoch": 0.8273653129144458, + "grad_norm": 1.1627311706542969, + "learning_rate": 7.612987161561797e-07, + "loss": 0.3305, + "step": 41330 + }, + { + "epoch": 0.8273853314315742, + "grad_norm": 1.1320210695266724, + "learning_rate": 7.611267746423912e-07, + "loss": 0.2882, + "step": 41331 + }, + { + "epoch": 0.8274053499487025, + "grad_norm": 1.9529967308044434, + "learning_rate": 7.609548509479136e-07, + "loss": 0.8417, + "step": 41332 + }, + { + "epoch": 0.8274253684658309, + "grad_norm": 1.3760024309158325, + "learning_rate": 7.607829450734683e-07, + "loss": 0.3323, + "step": 41333 + }, + { + "epoch": 0.8274453869829592, + "grad_norm": 1.8784295320510864, + "learning_rate": 7.606110570197806e-07, + "loss": 0.7136, + "step": 41334 + }, + { + "epoch": 0.8274654055000876, + "grad_norm": 1.2402386665344238, + "learning_rate": 7.604391867875716e-07, + "loss": 0.2863, + "step": 41335 + }, + { + "epoch": 0.8274854240172159, + "grad_norm": 1.083044409751892, + "learning_rate": 7.602673343775624e-07, + "loss": 0.2856, + "step": 41336 + }, + { + "epoch": 0.8275054425343442, + "grad_norm": 1.8218740224838257, + "learning_rate": 7.600954997904786e-07, + "loss": 0.6679, + "step": 41337 + }, + { + "epoch": 0.8275254610514726, + "grad_norm": 1.1422874927520752, + "learning_rate": 7.599236830270396e-07, + "loss": 0.2976, + "step": 41338 + }, + { + "epoch": 0.8275454795686009, + "grad_norm": 1.8808341026306152, + "learning_rate": 7.597518840879697e-07, + "loss": 0.8517, + "step": 41339 + }, + { + "epoch": 0.8275654980857293, + "grad_norm": 1.254451870918274, + "learning_rate": 7.595801029739907e-07, + "loss": 0.2902, + "step": 41340 + }, + { + "epoch": 0.8275855166028576, + "grad_norm": 1.1132110357284546, + "learning_rate": 7.594083396858243e-07, + "loss": 0.2906, + "step": 41341 + }, + { + "epoch": 0.827605535119986, + "grad_norm": 1.0413364171981812, + "learning_rate": 7.59236594224192e-07, + "loss": 0.279, + "step": 41342 + }, + { + "epoch": 0.8276255536371143, + "grad_norm": 1.1252706050872803, + "learning_rate": 7.590648665898171e-07, + "loss": 0.3016, + "step": 41343 + }, + { + "epoch": 0.8276455721542427, + "grad_norm": 0.9761425256729126, + "learning_rate": 7.588931567834207e-07, + "loss": 0.2897, + "step": 41344 + }, + { + "epoch": 0.827665590671371, + "grad_norm": 1.0765470266342163, + "learning_rate": 7.58721464805725e-07, + "loss": 0.2743, + "step": 41345 + }, + { + "epoch": 0.8276856091884993, + "grad_norm": 1.4375641345977783, + "learning_rate": 7.585497906574501e-07, + "loss": 0.2846, + "step": 41346 + }, + { + "epoch": 0.8277056277056277, + "grad_norm": 1.0816017389297485, + "learning_rate": 7.58378134339321e-07, + "loss": 0.2993, + "step": 41347 + }, + { + "epoch": 0.827725646222756, + "grad_norm": 1.3829491138458252, + "learning_rate": 7.582064958520569e-07, + "loss": 0.3021, + "step": 41348 + }, + { + "epoch": 0.8277456647398844, + "grad_norm": 1.197835087776184, + "learning_rate": 7.580348751963784e-07, + "loss": 0.247, + "step": 41349 + }, + { + "epoch": 0.8277656832570127, + "grad_norm": 1.1383898258209229, + "learning_rate": 7.578632723730101e-07, + "loss": 0.3339, + "step": 41350 + }, + { + "epoch": 0.8277857017741411, + "grad_norm": 1.1313998699188232, + "learning_rate": 7.576916873826701e-07, + "loss": 0.2955, + "step": 41351 + }, + { + "epoch": 0.8278057202912694, + "grad_norm": 1.106009840965271, + "learning_rate": 7.575201202260829e-07, + "loss": 0.27, + "step": 41352 + }, + { + "epoch": 0.8278257388083977, + "grad_norm": 1.084848403930664, + "learning_rate": 7.57348570903968e-07, + "loss": 0.2607, + "step": 41353 + }, + { + "epoch": 0.8278457573255261, + "grad_norm": 1.2746843099594116, + "learning_rate": 7.571770394170464e-07, + "loss": 0.2687, + "step": 41354 + }, + { + "epoch": 0.8278657758426544, + "grad_norm": 1.231195330619812, + "learning_rate": 7.570055257660385e-07, + "loss": 0.3252, + "step": 41355 + }, + { + "epoch": 0.8278857943597828, + "grad_norm": 1.1438782215118408, + "learning_rate": 7.568340299516674e-07, + "loss": 0.2785, + "step": 41356 + }, + { + "epoch": 0.8279058128769111, + "grad_norm": 1.163212776184082, + "learning_rate": 7.566625519746529e-07, + "loss": 0.3146, + "step": 41357 + }, + { + "epoch": 0.8279258313940395, + "grad_norm": 1.1239041090011597, + "learning_rate": 7.56491091835716e-07, + "loss": 0.3532, + "step": 41358 + }, + { + "epoch": 0.8279458499111678, + "grad_norm": 1.8272875547409058, + "learning_rate": 7.563196495355756e-07, + "loss": 0.7804, + "step": 41359 + }, + { + "epoch": 0.8279658684282962, + "grad_norm": 1.891992211341858, + "learning_rate": 7.561482250749558e-07, + "loss": 0.7085, + "step": 41360 + }, + { + "epoch": 0.8279858869454245, + "grad_norm": 1.077736496925354, + "learning_rate": 7.559768184545757e-07, + "loss": 0.3072, + "step": 41361 + }, + { + "epoch": 0.8280059054625528, + "grad_norm": 1.2146679162979126, + "learning_rate": 7.558054296751543e-07, + "loss": 0.3221, + "step": 41362 + }, + { + "epoch": 0.8280259239796812, + "grad_norm": 1.1322338581085205, + "learning_rate": 7.556340587374145e-07, + "loss": 0.3408, + "step": 41363 + }, + { + "epoch": 0.8280459424968095, + "grad_norm": 1.0846648216247559, + "learning_rate": 7.554627056420749e-07, + "loss": 0.2941, + "step": 41364 + }, + { + "epoch": 0.8280659610139379, + "grad_norm": 1.2590806484222412, + "learning_rate": 7.552913703898578e-07, + "loss": 0.3323, + "step": 41365 + }, + { + "epoch": 0.8280859795310662, + "grad_norm": 1.1781415939331055, + "learning_rate": 7.551200529814823e-07, + "loss": 0.3086, + "step": 41366 + }, + { + "epoch": 0.8281059980481946, + "grad_norm": 1.2931078672409058, + "learning_rate": 7.549487534176686e-07, + "loss": 0.2899, + "step": 41367 + }, + { + "epoch": 0.8281260165653229, + "grad_norm": 1.1913498640060425, + "learning_rate": 7.547774716991351e-07, + "loss": 0.2774, + "step": 41368 + }, + { + "epoch": 0.8281460350824512, + "grad_norm": 1.0384564399719238, + "learning_rate": 7.546062078266053e-07, + "loss": 0.2669, + "step": 41369 + }, + { + "epoch": 0.8281660535995796, + "grad_norm": 1.1468818187713623, + "learning_rate": 7.544349618007973e-07, + "loss": 0.3062, + "step": 41370 + }, + { + "epoch": 0.8281860721167079, + "grad_norm": 1.0667239427566528, + "learning_rate": 7.542637336224312e-07, + "loss": 0.3117, + "step": 41371 + }, + { + "epoch": 0.8282060906338363, + "grad_norm": 1.136348009109497, + "learning_rate": 7.540925232922264e-07, + "loss": 0.2544, + "step": 41372 + }, + { + "epoch": 0.8282261091509646, + "grad_norm": 1.0328938961029053, + "learning_rate": 7.539213308109017e-07, + "loss": 0.2781, + "step": 41373 + }, + { + "epoch": 0.828246127668093, + "grad_norm": 1.057196021080017, + "learning_rate": 7.537501561791794e-07, + "loss": 0.282, + "step": 41374 + }, + { + "epoch": 0.8282661461852213, + "grad_norm": 1.116532325744629, + "learning_rate": 7.53578999397776e-07, + "loss": 0.3237, + "step": 41375 + }, + { + "epoch": 0.8282861647023497, + "grad_norm": 1.1960535049438477, + "learning_rate": 7.534078604674145e-07, + "loss": 0.3112, + "step": 41376 + }, + { + "epoch": 0.828306183219478, + "grad_norm": 1.1746972799301147, + "learning_rate": 7.532367393888107e-07, + "loss": 0.3429, + "step": 41377 + }, + { + "epoch": 0.8283262017366063, + "grad_norm": 1.0278019905090332, + "learning_rate": 7.53065636162687e-07, + "loss": 0.313, + "step": 41378 + }, + { + "epoch": 0.8283462202537347, + "grad_norm": 1.1056082248687744, + "learning_rate": 7.528945507897617e-07, + "loss": 0.333, + "step": 41379 + }, + { + "epoch": 0.828366238770863, + "grad_norm": 1.2034296989440918, + "learning_rate": 7.527234832707536e-07, + "loss": 0.3221, + "step": 41380 + }, + { + "epoch": 0.8283862572879914, + "grad_norm": 1.0132911205291748, + "learning_rate": 7.525524336063816e-07, + "loss": 0.2569, + "step": 41381 + }, + { + "epoch": 0.8284062758051197, + "grad_norm": 0.9899630546569824, + "learning_rate": 7.523814017973646e-07, + "loss": 0.2893, + "step": 41382 + }, + { + "epoch": 0.8284262943222481, + "grad_norm": 1.064307451248169, + "learning_rate": 7.522103878444226e-07, + "loss": 0.2879, + "step": 41383 + }, + { + "epoch": 0.8284463128393764, + "grad_norm": 1.419050693511963, + "learning_rate": 7.520393917482743e-07, + "loss": 0.3358, + "step": 41384 + }, + { + "epoch": 0.8284663313565047, + "grad_norm": 1.8835020065307617, + "learning_rate": 7.518684135096383e-07, + "loss": 0.8139, + "step": 41385 + }, + { + "epoch": 0.8284863498736331, + "grad_norm": 1.2067416906356812, + "learning_rate": 7.51697453129232e-07, + "loss": 0.3035, + "step": 41386 + }, + { + "epoch": 0.8285063683907614, + "grad_norm": 1.12295663356781, + "learning_rate": 7.515265106077768e-07, + "loss": 0.3531, + "step": 41387 + }, + { + "epoch": 0.8285263869078898, + "grad_norm": 1.0810719728469849, + "learning_rate": 7.513555859459882e-07, + "loss": 0.3203, + "step": 41388 + }, + { + "epoch": 0.8285464054250181, + "grad_norm": 1.0513378381729126, + "learning_rate": 7.511846791445881e-07, + "loss": 0.3194, + "step": 41389 + }, + { + "epoch": 0.8285664239421465, + "grad_norm": 1.1887609958648682, + "learning_rate": 7.510137902042919e-07, + "loss": 0.3001, + "step": 41390 + }, + { + "epoch": 0.8285864424592748, + "grad_norm": 1.1510926485061646, + "learning_rate": 7.508429191258204e-07, + "loss": 0.3232, + "step": 41391 + }, + { + "epoch": 0.8286064609764032, + "grad_norm": 1.27504301071167, + "learning_rate": 7.506720659098909e-07, + "loss": 0.2841, + "step": 41392 + }, + { + "epoch": 0.8286264794935315, + "grad_norm": 1.1017870903015137, + "learning_rate": 7.505012305572212e-07, + "loss": 0.3005, + "step": 41393 + }, + { + "epoch": 0.8286464980106598, + "grad_norm": 1.0099897384643555, + "learning_rate": 7.503304130685302e-07, + "loss": 0.2409, + "step": 41394 + }, + { + "epoch": 0.8286665165277882, + "grad_norm": 2.0514519214630127, + "learning_rate": 7.501596134445344e-07, + "loss": 0.7516, + "step": 41395 + }, + { + "epoch": 0.8286865350449165, + "grad_norm": 1.2448301315307617, + "learning_rate": 7.499888316859543e-07, + "loss": 0.3286, + "step": 41396 + }, + { + "epoch": 0.8287065535620449, + "grad_norm": 1.0353103876113892, + "learning_rate": 7.498180677935063e-07, + "loss": 0.2964, + "step": 41397 + }, + { + "epoch": 0.8287265720791732, + "grad_norm": 1.0879019498825073, + "learning_rate": 7.496473217679084e-07, + "loss": 0.2732, + "step": 41398 + }, + { + "epoch": 0.8287465905963016, + "grad_norm": 1.0381544828414917, + "learning_rate": 7.49476593609878e-07, + "loss": 0.2975, + "step": 41399 + }, + { + "epoch": 0.8287666091134299, + "grad_norm": 1.1447030305862427, + "learning_rate": 7.493058833201339e-07, + "loss": 0.2618, + "step": 41400 + }, + { + "epoch": 0.8287866276305582, + "grad_norm": 1.0775532722473145, + "learning_rate": 7.491351908993916e-07, + "loss": 0.3002, + "step": 41401 + }, + { + "epoch": 0.8288066461476866, + "grad_norm": 1.0065735578536987, + "learning_rate": 7.489645163483722e-07, + "loss": 0.3104, + "step": 41402 + }, + { + "epoch": 0.8288266646648149, + "grad_norm": 1.2897188663482666, + "learning_rate": 7.487938596677902e-07, + "loss": 0.2755, + "step": 41403 + }, + { + "epoch": 0.8288466831819433, + "grad_norm": 1.1686822175979614, + "learning_rate": 7.486232208583633e-07, + "loss": 0.3293, + "step": 41404 + }, + { + "epoch": 0.8288667016990716, + "grad_norm": 1.152694821357727, + "learning_rate": 7.484525999208109e-07, + "loss": 0.2925, + "step": 41405 + }, + { + "epoch": 0.8288867202162, + "grad_norm": 1.1259845495224, + "learning_rate": 7.482819968558486e-07, + "loss": 0.2738, + "step": 41406 + }, + { + "epoch": 0.8289067387333283, + "grad_norm": 1.7847422361373901, + "learning_rate": 7.481114116641936e-07, + "loss": 0.7643, + "step": 41407 + }, + { + "epoch": 0.8289267572504567, + "grad_norm": 1.129219889640808, + "learning_rate": 7.479408443465618e-07, + "loss": 0.327, + "step": 41408 + }, + { + "epoch": 0.828946775767585, + "grad_norm": 1.0490890741348267, + "learning_rate": 7.477702949036736e-07, + "loss": 0.2946, + "step": 41409 + }, + { + "epoch": 0.8289667942847133, + "grad_norm": 1.079442024230957, + "learning_rate": 7.475997633362431e-07, + "loss": 0.3162, + "step": 41410 + }, + { + "epoch": 0.8289868128018417, + "grad_norm": 0.9827859997749329, + "learning_rate": 7.474292496449886e-07, + "loss": 0.3142, + "step": 41411 + }, + { + "epoch": 0.82900683131897, + "grad_norm": 1.2161877155303955, + "learning_rate": 7.472587538306253e-07, + "loss": 0.3091, + "step": 41412 + }, + { + "epoch": 0.8290268498360984, + "grad_norm": 1.2044570446014404, + "learning_rate": 7.470882758938719e-07, + "loss": 0.2362, + "step": 41413 + }, + { + "epoch": 0.8290468683532267, + "grad_norm": 1.0770293474197388, + "learning_rate": 7.469178158354429e-07, + "loss": 0.304, + "step": 41414 + }, + { + "epoch": 0.8290668868703551, + "grad_norm": 1.1872942447662354, + "learning_rate": 7.467473736560576e-07, + "loss": 0.2838, + "step": 41415 + }, + { + "epoch": 0.8290869053874834, + "grad_norm": 1.2575017213821411, + "learning_rate": 7.465769493564313e-07, + "loss": 0.3197, + "step": 41416 + }, + { + "epoch": 0.8291069239046117, + "grad_norm": 1.119895339012146, + "learning_rate": 7.46406542937278e-07, + "loss": 0.2601, + "step": 41417 + }, + { + "epoch": 0.8291269424217401, + "grad_norm": 1.099380373954773, + "learning_rate": 7.462361543993179e-07, + "loss": 0.2962, + "step": 41418 + }, + { + "epoch": 0.8291469609388684, + "grad_norm": 1.4879812002182007, + "learning_rate": 7.460657837432661e-07, + "loss": 0.2596, + "step": 41419 + }, + { + "epoch": 0.8291669794559968, + "grad_norm": 1.137129306793213, + "learning_rate": 7.45895430969838e-07, + "loss": 0.3087, + "step": 41420 + }, + { + "epoch": 0.8291869979731251, + "grad_norm": 1.1149306297302246, + "learning_rate": 7.457250960797486e-07, + "loss": 0.276, + "step": 41421 + }, + { + "epoch": 0.8292070164902535, + "grad_norm": 1.1240451335906982, + "learning_rate": 7.455547790737166e-07, + "loss": 0.2857, + "step": 41422 + }, + { + "epoch": 0.8292270350073818, + "grad_norm": 1.1307300329208374, + "learning_rate": 7.453844799524568e-07, + "loss": 0.2592, + "step": 41423 + }, + { + "epoch": 0.8292470535245102, + "grad_norm": 1.2180335521697998, + "learning_rate": 7.45214198716685e-07, + "loss": 0.2878, + "step": 41424 + }, + { + "epoch": 0.8292670720416385, + "grad_norm": 1.1591341495513916, + "learning_rate": 7.450439353671157e-07, + "loss": 0.2748, + "step": 41425 + }, + { + "epoch": 0.8292870905587668, + "grad_norm": 1.1251097917556763, + "learning_rate": 7.448736899044673e-07, + "loss": 0.2994, + "step": 41426 + }, + { + "epoch": 0.8293071090758952, + "grad_norm": 1.9275349378585815, + "learning_rate": 7.44703462329453e-07, + "loss": 0.7468, + "step": 41427 + }, + { + "epoch": 0.8293271275930235, + "grad_norm": 1.0085376501083374, + "learning_rate": 7.445332526427912e-07, + "loss": 0.2861, + "step": 41428 + }, + { + "epoch": 0.8293471461101519, + "grad_norm": 1.0179144144058228, + "learning_rate": 7.443630608451952e-07, + "loss": 0.2765, + "step": 41429 + }, + { + "epoch": 0.8293671646272802, + "grad_norm": 1.081557035446167, + "learning_rate": 7.441928869373805e-07, + "loss": 0.2795, + "step": 41430 + }, + { + "epoch": 0.8293871831444086, + "grad_norm": 1.279144287109375, + "learning_rate": 7.440227309200637e-07, + "loss": 0.2919, + "step": 41431 + }, + { + "epoch": 0.8294072016615369, + "grad_norm": 1.8756861686706543, + "learning_rate": 7.438525927939599e-07, + "loss": 0.7299, + "step": 41432 + }, + { + "epoch": 0.8294272201786652, + "grad_norm": 1.2181980609893799, + "learning_rate": 7.436824725597836e-07, + "loss": 0.3203, + "step": 41433 + }, + { + "epoch": 0.8294472386957936, + "grad_norm": 1.1040329933166504, + "learning_rate": 7.4351237021825e-07, + "loss": 0.2638, + "step": 41434 + }, + { + "epoch": 0.8294672572129219, + "grad_norm": 1.2165948152542114, + "learning_rate": 7.433422857700733e-07, + "loss": 0.2632, + "step": 41435 + }, + { + "epoch": 0.8294872757300503, + "grad_norm": 1.1004548072814941, + "learning_rate": 7.431722192159712e-07, + "loss": 0.2765, + "step": 41436 + }, + { + "epoch": 0.8295072942471786, + "grad_norm": 1.0314024686813354, + "learning_rate": 7.43002170556657e-07, + "loss": 0.2731, + "step": 41437 + }, + { + "epoch": 0.829527312764307, + "grad_norm": 1.9273570775985718, + "learning_rate": 7.428321397928439e-07, + "loss": 0.7755, + "step": 41438 + }, + { + "epoch": 0.8295473312814353, + "grad_norm": 1.1629502773284912, + "learning_rate": 7.426621269252498e-07, + "loss": 0.3003, + "step": 41439 + }, + { + "epoch": 0.8295673497985637, + "grad_norm": 1.2281414270401, + "learning_rate": 7.424921319545864e-07, + "loss": 0.2814, + "step": 41440 + }, + { + "epoch": 0.829587368315692, + "grad_norm": 1.113594651222229, + "learning_rate": 7.423221548815712e-07, + "loss": 0.3175, + "step": 41441 + }, + { + "epoch": 0.8296073868328203, + "grad_norm": 1.2112351655960083, + "learning_rate": 7.421521957069178e-07, + "loss": 0.2803, + "step": 41442 + }, + { + "epoch": 0.8296274053499487, + "grad_norm": 1.252469539642334, + "learning_rate": 7.419822544313398e-07, + "loss": 0.3164, + "step": 41443 + }, + { + "epoch": 0.829647423867077, + "grad_norm": 1.2105568647384644, + "learning_rate": 7.418123310555514e-07, + "loss": 0.2321, + "step": 41444 + }, + { + "epoch": 0.8296674423842054, + "grad_norm": 1.0158246755599976, + "learning_rate": 7.416424255802685e-07, + "loss": 0.3151, + "step": 41445 + }, + { + "epoch": 0.8296874609013337, + "grad_norm": 1.0939151048660278, + "learning_rate": 7.414725380062043e-07, + "loss": 0.271, + "step": 41446 + }, + { + "epoch": 0.8297074794184621, + "grad_norm": 1.1280473470687866, + "learning_rate": 7.413026683340729e-07, + "loss": 0.2639, + "step": 41447 + }, + { + "epoch": 0.8297274979355904, + "grad_norm": 1.0617380142211914, + "learning_rate": 7.411328165645876e-07, + "loss": 0.2751, + "step": 41448 + }, + { + "epoch": 0.8297475164527187, + "grad_norm": 1.1351090669631958, + "learning_rate": 7.409629826984649e-07, + "loss": 0.3313, + "step": 41449 + }, + { + "epoch": 0.8297675349698471, + "grad_norm": 1.2179841995239258, + "learning_rate": 7.407931667364171e-07, + "loss": 0.2997, + "step": 41450 + }, + { + "epoch": 0.8297875534869754, + "grad_norm": 1.136687159538269, + "learning_rate": 7.40623368679157e-07, + "loss": 0.306, + "step": 41451 + }, + { + "epoch": 0.8298075720041038, + "grad_norm": 1.0103847980499268, + "learning_rate": 7.404535885274005e-07, + "loss": 0.2828, + "step": 41452 + }, + { + "epoch": 0.8298275905212321, + "grad_norm": 1.217280626296997, + "learning_rate": 7.402838262818595e-07, + "loss": 0.2957, + "step": 41453 + }, + { + "epoch": 0.8298476090383605, + "grad_norm": 0.9670287370681763, + "learning_rate": 7.4011408194325e-07, + "loss": 0.2609, + "step": 41454 + }, + { + "epoch": 0.8298676275554888, + "grad_norm": 1.2452657222747803, + "learning_rate": 7.399443555122843e-07, + "loss": 0.2617, + "step": 41455 + }, + { + "epoch": 0.8298876460726172, + "grad_norm": 1.208935260772705, + "learning_rate": 7.397746469896754e-07, + "loss": 0.2653, + "step": 41456 + }, + { + "epoch": 0.8299076645897455, + "grad_norm": 1.0314114093780518, + "learning_rate": 7.39604956376136e-07, + "loss": 0.2609, + "step": 41457 + }, + { + "epoch": 0.8299276831068738, + "grad_norm": 1.1106767654418945, + "learning_rate": 7.394352836723817e-07, + "loss": 0.2848, + "step": 41458 + }, + { + "epoch": 0.8299477016240022, + "grad_norm": 1.009279727935791, + "learning_rate": 7.392656288791245e-07, + "loss": 0.2656, + "step": 41459 + }, + { + "epoch": 0.8299677201411305, + "grad_norm": 1.3936225175857544, + "learning_rate": 7.390959919970775e-07, + "loss": 0.346, + "step": 41460 + }, + { + "epoch": 0.8299877386582589, + "grad_norm": 1.1949777603149414, + "learning_rate": 7.38926373026953e-07, + "loss": 0.3033, + "step": 41461 + }, + { + "epoch": 0.8300077571753872, + "grad_norm": 1.0324280261993408, + "learning_rate": 7.387567719694666e-07, + "loss": 0.2434, + "step": 41462 + }, + { + "epoch": 0.8300277756925156, + "grad_norm": 1.0198668241500854, + "learning_rate": 7.385871888253293e-07, + "loss": 0.3004, + "step": 41463 + }, + { + "epoch": 0.8300477942096439, + "grad_norm": 1.1338818073272705, + "learning_rate": 7.384176235952533e-07, + "loss": 0.3218, + "step": 41464 + }, + { + "epoch": 0.8300678127267722, + "grad_norm": 1.073864459991455, + "learning_rate": 7.382480762799538e-07, + "loss": 0.3049, + "step": 41465 + }, + { + "epoch": 0.8300878312439006, + "grad_norm": 1.1093798875808716, + "learning_rate": 7.380785468801405e-07, + "loss": 0.3162, + "step": 41466 + }, + { + "epoch": 0.8301078497610289, + "grad_norm": 1.1372584104537964, + "learning_rate": 7.379090353965296e-07, + "loss": 0.3303, + "step": 41467 + }, + { + "epoch": 0.8301278682781573, + "grad_norm": 2.036695957183838, + "learning_rate": 7.377395418298316e-07, + "loss": 0.7517, + "step": 41468 + }, + { + "epoch": 0.8301478867952856, + "grad_norm": 1.830599308013916, + "learning_rate": 7.375700661807595e-07, + "loss": 0.7758, + "step": 41469 + }, + { + "epoch": 0.830167905312414, + "grad_norm": 1.3226914405822754, + "learning_rate": 7.374006084500246e-07, + "loss": 0.295, + "step": 41470 + }, + { + "epoch": 0.8301879238295423, + "grad_norm": 1.13490891456604, + "learning_rate": 7.37231168638341e-07, + "loss": 0.2814, + "step": 41471 + }, + { + "epoch": 0.8302079423466707, + "grad_norm": 1.115424633026123, + "learning_rate": 7.370617467464203e-07, + "loss": 0.2883, + "step": 41472 + }, + { + "epoch": 0.830227960863799, + "grad_norm": 1.2361454963684082, + "learning_rate": 7.368923427749746e-07, + "loss": 0.3045, + "step": 41473 + }, + { + "epoch": 0.8302479793809273, + "grad_norm": 1.0357005596160889, + "learning_rate": 7.367229567247147e-07, + "loss": 0.3048, + "step": 41474 + }, + { + "epoch": 0.8302679978980557, + "grad_norm": 1.1330971717834473, + "learning_rate": 7.365535885963554e-07, + "loss": 0.3026, + "step": 41475 + }, + { + "epoch": 0.830288016415184, + "grad_norm": 1.2963064908981323, + "learning_rate": 7.363842383906078e-07, + "loss": 0.3021, + "step": 41476 + }, + { + "epoch": 0.8303080349323124, + "grad_norm": 1.0371259450912476, + "learning_rate": 7.362149061081814e-07, + "loss": 0.2889, + "step": 41477 + }, + { + "epoch": 0.8303280534494407, + "grad_norm": 1.1524710655212402, + "learning_rate": 7.360455917497916e-07, + "loss": 0.2806, + "step": 41478 + }, + { + "epoch": 0.8303480719665691, + "grad_norm": 1.0922679901123047, + "learning_rate": 7.358762953161469e-07, + "loss": 0.2877, + "step": 41479 + }, + { + "epoch": 0.8303680904836974, + "grad_norm": 1.8441399335861206, + "learning_rate": 7.357070168079622e-07, + "loss": 0.7314, + "step": 41480 + }, + { + "epoch": 0.8303881090008257, + "grad_norm": 1.1041866540908813, + "learning_rate": 7.355377562259475e-07, + "loss": 0.318, + "step": 41481 + }, + { + "epoch": 0.8304081275179541, + "grad_norm": 1.1433310508728027, + "learning_rate": 7.353685135708138e-07, + "loss": 0.2874, + "step": 41482 + }, + { + "epoch": 0.8304281460350824, + "grad_norm": 1.188289761543274, + "learning_rate": 7.351992888432724e-07, + "loss": 0.3273, + "step": 41483 + }, + { + "epoch": 0.8304481645522108, + "grad_norm": 1.12320077419281, + "learning_rate": 7.350300820440365e-07, + "loss": 0.2971, + "step": 41484 + }, + { + "epoch": 0.8304681830693391, + "grad_norm": 1.1832079887390137, + "learning_rate": 7.348608931738166e-07, + "loss": 0.3211, + "step": 41485 + }, + { + "epoch": 0.8304882015864675, + "grad_norm": 1.045658826828003, + "learning_rate": 7.346917222333233e-07, + "loss": 0.3007, + "step": 41486 + }, + { + "epoch": 0.8305082201035958, + "grad_norm": 1.0767096281051636, + "learning_rate": 7.345225692232683e-07, + "loss": 0.3163, + "step": 41487 + }, + { + "epoch": 0.8305282386207242, + "grad_norm": 1.0505093336105347, + "learning_rate": 7.343534341443609e-07, + "loss": 0.3074, + "step": 41488 + }, + { + "epoch": 0.8305482571378525, + "grad_norm": 1.145350456237793, + "learning_rate": 7.341843169973151e-07, + "loss": 0.2808, + "step": 41489 + }, + { + "epoch": 0.8305682756549808, + "grad_norm": 1.2795974016189575, + "learning_rate": 7.340152177828391e-07, + "loss": 0.2995, + "step": 41490 + }, + { + "epoch": 0.8305882941721092, + "grad_norm": 1.258416771888733, + "learning_rate": 7.338461365016464e-07, + "loss": 0.2601, + "step": 41491 + }, + { + "epoch": 0.8306083126892375, + "grad_norm": 1.0922688245773315, + "learning_rate": 7.336770731544457e-07, + "loss": 0.2764, + "step": 41492 + }, + { + "epoch": 0.8306283312063659, + "grad_norm": 0.9718204140663147, + "learning_rate": 7.335080277419487e-07, + "loss": 0.2226, + "step": 41493 + }, + { + "epoch": 0.8306483497234942, + "grad_norm": 1.0627861022949219, + "learning_rate": 7.333390002648666e-07, + "loss": 0.2992, + "step": 41494 + }, + { + "epoch": 0.8306683682406226, + "grad_norm": 1.697427749633789, + "learning_rate": 7.331699907239087e-07, + "loss": 0.755, + "step": 41495 + }, + { + "epoch": 0.8306883867577509, + "grad_norm": 1.090714931488037, + "learning_rate": 7.330009991197862e-07, + "loss": 0.2868, + "step": 41496 + }, + { + "epoch": 0.8307084052748792, + "grad_norm": 1.1375948190689087, + "learning_rate": 7.328320254532079e-07, + "loss": 0.3371, + "step": 41497 + }, + { + "epoch": 0.8307284237920076, + "grad_norm": 1.1352698802947998, + "learning_rate": 7.326630697248865e-07, + "loss": 0.2816, + "step": 41498 + }, + { + "epoch": 0.8307484423091359, + "grad_norm": 1.117409586906433, + "learning_rate": 7.324941319355317e-07, + "loss": 0.3033, + "step": 41499 + }, + { + "epoch": 0.8307684608262643, + "grad_norm": 1.1242989301681519, + "learning_rate": 7.323252120858527e-07, + "loss": 0.324, + "step": 41500 + }, + { + "epoch": 0.8307884793433926, + "grad_norm": 1.1466401815414429, + "learning_rate": 7.321563101765594e-07, + "loss": 0.279, + "step": 41501 + }, + { + "epoch": 0.830808497860521, + "grad_norm": 1.287434458732605, + "learning_rate": 7.319874262083632e-07, + "loss": 0.2778, + "step": 41502 + }, + { + "epoch": 0.8308285163776493, + "grad_norm": 1.1774357557296753, + "learning_rate": 7.318185601819721e-07, + "loss": 0.304, + "step": 41503 + }, + { + "epoch": 0.8308485348947777, + "grad_norm": 1.0158684253692627, + "learning_rate": 7.316497120980993e-07, + "loss": 0.2537, + "step": 41504 + }, + { + "epoch": 0.830868553411906, + "grad_norm": 1.168113350868225, + "learning_rate": 7.314808819574504e-07, + "loss": 0.3235, + "step": 41505 + }, + { + "epoch": 0.8308885719290343, + "grad_norm": 1.2066820859909058, + "learning_rate": 7.31312069760739e-07, + "loss": 0.3436, + "step": 41506 + }, + { + "epoch": 0.8309085904461627, + "grad_norm": 1.2741727828979492, + "learning_rate": 7.311432755086728e-07, + "loss": 0.3237, + "step": 41507 + }, + { + "epoch": 0.830928608963291, + "grad_norm": 1.3590419292449951, + "learning_rate": 7.309744992019619e-07, + "loss": 0.335, + "step": 41508 + }, + { + "epoch": 0.8309486274804194, + "grad_norm": 1.2233855724334717, + "learning_rate": 7.30805740841315e-07, + "loss": 0.2895, + "step": 41509 + }, + { + "epoch": 0.8309686459975477, + "grad_norm": 1.107909917831421, + "learning_rate": 7.30637000427441e-07, + "loss": 0.3018, + "step": 41510 + }, + { + "epoch": 0.8309886645146761, + "grad_norm": 1.2550548315048218, + "learning_rate": 7.304682779610517e-07, + "loss": 0.2614, + "step": 41511 + }, + { + "epoch": 0.8310086830318044, + "grad_norm": 1.1065164804458618, + "learning_rate": 7.302995734428547e-07, + "loss": 0.2598, + "step": 41512 + }, + { + "epoch": 0.8310287015489327, + "grad_norm": 1.1078851222991943, + "learning_rate": 7.301308868735596e-07, + "loss": 0.2634, + "step": 41513 + }, + { + "epoch": 0.8310487200660611, + "grad_norm": 1.803910732269287, + "learning_rate": 7.299622182538746e-07, + "loss": 0.7545, + "step": 41514 + }, + { + "epoch": 0.8310687385831894, + "grad_norm": 1.0071450471878052, + "learning_rate": 7.297935675845103e-07, + "loss": 0.2854, + "step": 41515 + }, + { + "epoch": 0.8310887571003178, + "grad_norm": 1.1510471105575562, + "learning_rate": 7.296249348661732e-07, + "loss": 0.2967, + "step": 41516 + }, + { + "epoch": 0.8311087756174461, + "grad_norm": 1.0050228834152222, + "learning_rate": 7.294563200995758e-07, + "loss": 0.3047, + "step": 41517 + }, + { + "epoch": 0.8311287941345745, + "grad_norm": 1.080786108970642, + "learning_rate": 7.292877232854246e-07, + "loss": 0.2621, + "step": 41518 + }, + { + "epoch": 0.8311488126517028, + "grad_norm": 1.1061925888061523, + "learning_rate": 7.291191444244278e-07, + "loss": 0.3011, + "step": 41519 + }, + { + "epoch": 0.8311688311688312, + "grad_norm": 0.9992131590843201, + "learning_rate": 7.289505835172961e-07, + "loss": 0.2986, + "step": 41520 + }, + { + "epoch": 0.8311888496859595, + "grad_norm": 1.2121983766555786, + "learning_rate": 7.287820405647366e-07, + "loss": 0.3153, + "step": 41521 + }, + { + "epoch": 0.8312088682030878, + "grad_norm": 1.2101471424102783, + "learning_rate": 7.286135155674589e-07, + "loss": 0.2661, + "step": 41522 + }, + { + "epoch": 0.8312288867202162, + "grad_norm": 1.1490830183029175, + "learning_rate": 7.28445008526169e-07, + "loss": 0.2955, + "step": 41523 + }, + { + "epoch": 0.8312489052373445, + "grad_norm": 1.0805041790008545, + "learning_rate": 7.282765194415786e-07, + "loss": 0.2858, + "step": 41524 + }, + { + "epoch": 0.8312689237544729, + "grad_norm": 1.0850396156311035, + "learning_rate": 7.281080483143943e-07, + "loss": 0.2642, + "step": 41525 + }, + { + "epoch": 0.8312889422716012, + "grad_norm": 1.1475147008895874, + "learning_rate": 7.279395951453244e-07, + "loss": 0.2684, + "step": 41526 + }, + { + "epoch": 0.8313089607887296, + "grad_norm": 1.0526108741760254, + "learning_rate": 7.277711599350773e-07, + "loss": 0.2697, + "step": 41527 + }, + { + "epoch": 0.8313289793058579, + "grad_norm": 1.1764459609985352, + "learning_rate": 7.276027426843596e-07, + "loss": 0.3017, + "step": 41528 + }, + { + "epoch": 0.8313489978229862, + "grad_norm": 1.0201176404953003, + "learning_rate": 7.274343433938801e-07, + "loss": 0.2786, + "step": 41529 + }, + { + "epoch": 0.8313690163401146, + "grad_norm": 1.0302109718322754, + "learning_rate": 7.272659620643491e-07, + "loss": 0.2885, + "step": 41530 + }, + { + "epoch": 0.8313890348572429, + "grad_norm": 1.8774704933166504, + "learning_rate": 7.270975986964717e-07, + "loss": 0.7317, + "step": 41531 + }, + { + "epoch": 0.8314090533743713, + "grad_norm": 1.3481297492980957, + "learning_rate": 7.269292532909561e-07, + "loss": 0.3176, + "step": 41532 + }, + { + "epoch": 0.8314290718914996, + "grad_norm": 1.1282135248184204, + "learning_rate": 7.267609258485109e-07, + "loss": 0.2741, + "step": 41533 + }, + { + "epoch": 0.831449090408628, + "grad_norm": 1.6570641994476318, + "learning_rate": 7.265926163698433e-07, + "loss": 0.7123, + "step": 41534 + }, + { + "epoch": 0.8314691089257563, + "grad_norm": 1.2113398313522339, + "learning_rate": 7.264243248556607e-07, + "loss": 0.3332, + "step": 41535 + }, + { + "epoch": 0.8314891274428847, + "grad_norm": 1.0077577829360962, + "learning_rate": 7.262560513066696e-07, + "loss": 0.2451, + "step": 41536 + }, + { + "epoch": 0.831509145960013, + "grad_norm": 1.0924986600875854, + "learning_rate": 7.260877957235796e-07, + "loss": 0.2767, + "step": 41537 + }, + { + "epoch": 0.8315291644771413, + "grad_norm": 1.7702029943466187, + "learning_rate": 7.25919558107096e-07, + "loss": 0.8048, + "step": 41538 + }, + { + "epoch": 0.8315491829942697, + "grad_norm": 1.034317135810852, + "learning_rate": 7.257513384579274e-07, + "loss": 0.2637, + "step": 41539 + }, + { + "epoch": 0.831569201511398, + "grad_norm": 1.1879682540893555, + "learning_rate": 7.255831367767802e-07, + "loss": 0.2824, + "step": 41540 + }, + { + "epoch": 0.8315892200285264, + "grad_norm": 1.1953284740447998, + "learning_rate": 7.254149530643606e-07, + "loss": 0.3155, + "step": 41541 + }, + { + "epoch": 0.8316092385456547, + "grad_norm": 1.071455717086792, + "learning_rate": 7.252467873213764e-07, + "loss": 0.3266, + "step": 41542 + }, + { + "epoch": 0.8316292570627831, + "grad_norm": 1.1117032766342163, + "learning_rate": 7.25078639548536e-07, + "loss": 0.3077, + "step": 41543 + }, + { + "epoch": 0.8316492755799114, + "grad_norm": 1.5229787826538086, + "learning_rate": 7.249105097465453e-07, + "loss": 0.3059, + "step": 41544 + }, + { + "epoch": 0.8316692940970397, + "grad_norm": 1.0441230535507202, + "learning_rate": 7.247423979161094e-07, + "loss": 0.2961, + "step": 41545 + }, + { + "epoch": 0.8316893126141681, + "grad_norm": 1.109947919845581, + "learning_rate": 7.245743040579372e-07, + "loss": 0.292, + "step": 41546 + }, + { + "epoch": 0.8317093311312964, + "grad_norm": 1.1653850078582764, + "learning_rate": 7.244062281727355e-07, + "loss": 0.3438, + "step": 41547 + }, + { + "epoch": 0.8317293496484248, + "grad_norm": 1.3138058185577393, + "learning_rate": 7.242381702612089e-07, + "loss": 0.2951, + "step": 41548 + }, + { + "epoch": 0.8317493681655531, + "grad_norm": 1.0802099704742432, + "learning_rate": 7.240701303240654e-07, + "loss": 0.3225, + "step": 41549 + }, + { + "epoch": 0.8317693866826815, + "grad_norm": 1.247321367263794, + "learning_rate": 7.239021083620096e-07, + "loss": 0.3007, + "step": 41550 + }, + { + "epoch": 0.8317894051998098, + "grad_norm": 1.0893702507019043, + "learning_rate": 7.237341043757507e-07, + "loss": 0.2507, + "step": 41551 + }, + { + "epoch": 0.8318094237169382, + "grad_norm": 1.3682891130447388, + "learning_rate": 7.235661183659925e-07, + "loss": 0.304, + "step": 41552 + }, + { + "epoch": 0.8318294422340665, + "grad_norm": 1.2970134019851685, + "learning_rate": 7.233981503334426e-07, + "loss": 0.2812, + "step": 41553 + }, + { + "epoch": 0.8318494607511948, + "grad_norm": 1.1063461303710938, + "learning_rate": 7.232302002788055e-07, + "loss": 0.2628, + "step": 41554 + }, + { + "epoch": 0.8318694792683232, + "grad_norm": 1.1340996026992798, + "learning_rate": 7.230622682027882e-07, + "loss": 0.2625, + "step": 41555 + }, + { + "epoch": 0.8318894977854515, + "grad_norm": 1.9202691316604614, + "learning_rate": 7.228943541060973e-07, + "loss": 0.8509, + "step": 41556 + }, + { + "epoch": 0.8319095163025799, + "grad_norm": 1.0590386390686035, + "learning_rate": 7.227264579894389e-07, + "loss": 0.3013, + "step": 41557 + }, + { + "epoch": 0.8319295348197082, + "grad_norm": 1.0582374334335327, + "learning_rate": 7.225585798535173e-07, + "loss": 0.2435, + "step": 41558 + }, + { + "epoch": 0.8319495533368366, + "grad_norm": 1.1260986328125, + "learning_rate": 7.223907196990381e-07, + "loss": 0.2821, + "step": 41559 + }, + { + "epoch": 0.8319695718539649, + "grad_norm": 1.1652947664260864, + "learning_rate": 7.222228775267093e-07, + "loss": 0.2879, + "step": 41560 + }, + { + "epoch": 0.8319895903710932, + "grad_norm": 1.0569974184036255, + "learning_rate": 7.220550533372345e-07, + "loss": 0.2748, + "step": 41561 + }, + { + "epoch": 0.8320096088882216, + "grad_norm": 1.9132962226867676, + "learning_rate": 7.218872471313193e-07, + "loss": 0.7503, + "step": 41562 + }, + { + "epoch": 0.8320296274053499, + "grad_norm": 1.2520815134048462, + "learning_rate": 7.217194589096688e-07, + "loss": 0.3251, + "step": 41563 + }, + { + "epoch": 0.8320496459224783, + "grad_norm": 0.9410869479179382, + "learning_rate": 7.215516886729901e-07, + "loss": 0.2424, + "step": 41564 + }, + { + "epoch": 0.8320696644396066, + "grad_norm": 1.0935213565826416, + "learning_rate": 7.213839364219871e-07, + "loss": 0.3243, + "step": 41565 + }, + { + "epoch": 0.832089682956735, + "grad_norm": 1.0637623071670532, + "learning_rate": 7.212162021573655e-07, + "loss": 0.296, + "step": 41566 + }, + { + "epoch": 0.8321097014738633, + "grad_norm": 1.1243702173233032, + "learning_rate": 7.210484858798289e-07, + "loss": 0.3285, + "step": 41567 + }, + { + "epoch": 0.8321297199909916, + "grad_norm": 1.229256510734558, + "learning_rate": 7.208807875900836e-07, + "loss": 0.2945, + "step": 41568 + }, + { + "epoch": 0.83214973850812, + "grad_norm": 0.9858840107917786, + "learning_rate": 7.207131072888363e-07, + "loss": 0.2459, + "step": 41569 + }, + { + "epoch": 0.8321697570252483, + "grad_norm": 1.212482213973999, + "learning_rate": 7.205454449767896e-07, + "loss": 0.2898, + "step": 41570 + }, + { + "epoch": 0.8321897755423767, + "grad_norm": 1.1375477313995361, + "learning_rate": 7.203778006546485e-07, + "loss": 0.3459, + "step": 41571 + }, + { + "epoch": 0.832209794059505, + "grad_norm": 1.0715886354446411, + "learning_rate": 7.202101743231177e-07, + "loss": 0.3129, + "step": 41572 + }, + { + "epoch": 0.8322298125766334, + "grad_norm": 1.1708457469940186, + "learning_rate": 7.200425659829035e-07, + "loss": 0.2958, + "step": 41573 + }, + { + "epoch": 0.8322498310937617, + "grad_norm": 1.1430901288986206, + "learning_rate": 7.198749756347089e-07, + "loss": 0.2911, + "step": 41574 + }, + { + "epoch": 0.8322698496108901, + "grad_norm": 1.9296960830688477, + "learning_rate": 7.19707403279239e-07, + "loss": 0.723, + "step": 41575 + }, + { + "epoch": 0.8322898681280184, + "grad_norm": 1.179599642753601, + "learning_rate": 7.195398489171968e-07, + "loss": 0.3057, + "step": 41576 + }, + { + "epoch": 0.8323098866451467, + "grad_norm": 0.9822899103164673, + "learning_rate": 7.193723125492891e-07, + "loss": 0.2793, + "step": 41577 + }, + { + "epoch": 0.8323299051622751, + "grad_norm": 1.1974955797195435, + "learning_rate": 7.192047941762192e-07, + "loss": 0.2796, + "step": 41578 + }, + { + "epoch": 0.8323499236794034, + "grad_norm": 1.1693572998046875, + "learning_rate": 7.190372937986912e-07, + "loss": 0.2559, + "step": 41579 + }, + { + "epoch": 0.8323699421965318, + "grad_norm": 1.1105968952178955, + "learning_rate": 7.188698114174075e-07, + "loss": 0.297, + "step": 41580 + }, + { + "epoch": 0.8323899607136601, + "grad_norm": 1.1233651638031006, + "learning_rate": 7.187023470330739e-07, + "loss": 0.2906, + "step": 41581 + }, + { + "epoch": 0.8324099792307885, + "grad_norm": 1.0781346559524536, + "learning_rate": 7.185349006463955e-07, + "loss": 0.2944, + "step": 41582 + }, + { + "epoch": 0.8324299977479168, + "grad_norm": 1.6169919967651367, + "learning_rate": 7.183674722580752e-07, + "loss": 0.7155, + "step": 41583 + }, + { + "epoch": 0.8324500162650451, + "grad_norm": 1.0386158227920532, + "learning_rate": 7.182000618688167e-07, + "loss": 0.3145, + "step": 41584 + }, + { + "epoch": 0.8324700347821735, + "grad_norm": 1.1648015975952148, + "learning_rate": 7.180326694793222e-07, + "loss": 0.3349, + "step": 41585 + }, + { + "epoch": 0.8324900532993018, + "grad_norm": 1.0942727327346802, + "learning_rate": 7.178652950902981e-07, + "loss": 0.2847, + "step": 41586 + }, + { + "epoch": 0.8325100718164302, + "grad_norm": 1.7817569971084595, + "learning_rate": 7.176979387024468e-07, + "loss": 0.7468, + "step": 41587 + }, + { + "epoch": 0.8325300903335585, + "grad_norm": 1.1121679544448853, + "learning_rate": 7.175306003164717e-07, + "loss": 0.2657, + "step": 41588 + }, + { + "epoch": 0.8325501088506869, + "grad_norm": 1.0701708793640137, + "learning_rate": 7.173632799330766e-07, + "loss": 0.3015, + "step": 41589 + }, + { + "epoch": 0.8325701273678152, + "grad_norm": 1.0412001609802246, + "learning_rate": 7.171959775529636e-07, + "loss": 0.2665, + "step": 41590 + }, + { + "epoch": 0.8325901458849436, + "grad_norm": 1.0802977085113525, + "learning_rate": 7.170286931768378e-07, + "loss": 0.2562, + "step": 41591 + }, + { + "epoch": 0.8326101644020719, + "grad_norm": 1.1406681537628174, + "learning_rate": 7.168614268054019e-07, + "loss": 0.3036, + "step": 41592 + }, + { + "epoch": 0.8326301829192002, + "grad_norm": 1.1273291110992432, + "learning_rate": 7.166941784393577e-07, + "loss": 0.3047, + "step": 41593 + }, + { + "epoch": 0.8326502014363286, + "grad_norm": 1.1418884992599487, + "learning_rate": 7.16526948079409e-07, + "loss": 0.3046, + "step": 41594 + }, + { + "epoch": 0.8326702199534569, + "grad_norm": 1.0847346782684326, + "learning_rate": 7.163597357262609e-07, + "loss": 0.2735, + "step": 41595 + }, + { + "epoch": 0.8326902384705853, + "grad_norm": 1.0981050729751587, + "learning_rate": 7.161925413806142e-07, + "loss": 0.2903, + "step": 41596 + }, + { + "epoch": 0.8327102569877136, + "grad_norm": 1.066314458847046, + "learning_rate": 7.160253650431725e-07, + "loss": 0.3004, + "step": 41597 + }, + { + "epoch": 0.832730275504842, + "grad_norm": 1.2730793952941895, + "learning_rate": 7.158582067146369e-07, + "loss": 0.2645, + "step": 41598 + }, + { + "epoch": 0.8327502940219703, + "grad_norm": 1.9773170948028564, + "learning_rate": 7.156910663957128e-07, + "loss": 0.7239, + "step": 41599 + }, + { + "epoch": 0.8327703125390986, + "grad_norm": 1.1363792419433594, + "learning_rate": 7.155239440871009e-07, + "loss": 0.2638, + "step": 41600 + }, + { + "epoch": 0.832790331056227, + "grad_norm": 2.075888156890869, + "learning_rate": 7.15356839789505e-07, + "loss": 0.8081, + "step": 41601 + }, + { + "epoch": 0.8328103495733553, + "grad_norm": 1.080776572227478, + "learning_rate": 7.151897535036261e-07, + "loss": 0.2924, + "step": 41602 + }, + { + "epoch": 0.8328303680904837, + "grad_norm": 1.0958606004714966, + "learning_rate": 7.150226852301673e-07, + "loss": 0.2974, + "step": 41603 + }, + { + "epoch": 0.832850386607612, + "grad_norm": 0.9625899195671082, + "learning_rate": 7.148556349698316e-07, + "loss": 0.2773, + "step": 41604 + }, + { + "epoch": 0.8328704051247404, + "grad_norm": 1.321461796760559, + "learning_rate": 7.146886027233201e-07, + "loss": 0.2897, + "step": 41605 + }, + { + "epoch": 0.8328904236418687, + "grad_norm": 1.0591208934783936, + "learning_rate": 7.145215884913348e-07, + "loss": 0.3365, + "step": 41606 + }, + { + "epoch": 0.8329104421589971, + "grad_norm": 1.1018493175506592, + "learning_rate": 7.143545922745787e-07, + "loss": 0.2502, + "step": 41607 + }, + { + "epoch": 0.8329304606761254, + "grad_norm": 1.1127341985702515, + "learning_rate": 7.141876140737547e-07, + "loss": 0.2875, + "step": 41608 + }, + { + "epoch": 0.8329504791932537, + "grad_norm": 2.025407314300537, + "learning_rate": 7.140206538895633e-07, + "loss": 0.7644, + "step": 41609 + }, + { + "epoch": 0.8329704977103821, + "grad_norm": 1.042997121810913, + "learning_rate": 7.138537117227073e-07, + "loss": 0.3031, + "step": 41610 + }, + { + "epoch": 0.8329905162275104, + "grad_norm": 1.1487972736358643, + "learning_rate": 7.136867875738873e-07, + "loss": 0.2839, + "step": 41611 + }, + { + "epoch": 0.8330105347446388, + "grad_norm": 1.1083593368530273, + "learning_rate": 7.135198814438049e-07, + "loss": 0.2838, + "step": 41612 + }, + { + "epoch": 0.8330305532617671, + "grad_norm": 1.1771819591522217, + "learning_rate": 7.133529933331634e-07, + "loss": 0.2951, + "step": 41613 + }, + { + "epoch": 0.8330505717788955, + "grad_norm": 1.0178635120391846, + "learning_rate": 7.131861232426635e-07, + "loss": 0.2902, + "step": 41614 + }, + { + "epoch": 0.8330705902960238, + "grad_norm": 1.1264455318450928, + "learning_rate": 7.130192711730066e-07, + "loss": 0.3134, + "step": 41615 + }, + { + "epoch": 0.8330906088131521, + "grad_norm": 1.059165596961975, + "learning_rate": 7.128524371248929e-07, + "loss": 0.2994, + "step": 41616 + }, + { + "epoch": 0.8331106273302805, + "grad_norm": 1.3557214736938477, + "learning_rate": 7.126856210990263e-07, + "loss": 0.3186, + "step": 41617 + }, + { + "epoch": 0.8331306458474088, + "grad_norm": 1.1039447784423828, + "learning_rate": 7.125188230961066e-07, + "loss": 0.2981, + "step": 41618 + }, + { + "epoch": 0.8331506643645372, + "grad_norm": 1.259583830833435, + "learning_rate": 7.123520431168341e-07, + "loss": 0.269, + "step": 41619 + }, + { + "epoch": 0.8331706828816655, + "grad_norm": 1.2534111738204956, + "learning_rate": 7.121852811619101e-07, + "loss": 0.3177, + "step": 41620 + }, + { + "epoch": 0.8331907013987939, + "grad_norm": 1.2124531269073486, + "learning_rate": 7.120185372320381e-07, + "loss": 0.2602, + "step": 41621 + }, + { + "epoch": 0.8332107199159222, + "grad_norm": 1.0927962064743042, + "learning_rate": 7.118518113279177e-07, + "loss": 0.3355, + "step": 41622 + }, + { + "epoch": 0.8332307384330506, + "grad_norm": 1.1789803504943848, + "learning_rate": 7.116851034502492e-07, + "loss": 0.2961, + "step": 41623 + }, + { + "epoch": 0.8332507569501789, + "grad_norm": 1.1700774431228638, + "learning_rate": 7.115184135997338e-07, + "loss": 0.2943, + "step": 41624 + }, + { + "epoch": 0.8332707754673072, + "grad_norm": 1.1131744384765625, + "learning_rate": 7.11351741777071e-07, + "loss": 0.2857, + "step": 41625 + }, + { + "epoch": 0.8332907939844356, + "grad_norm": 1.0589494705200195, + "learning_rate": 7.111850879829635e-07, + "loss": 0.2798, + "step": 41626 + }, + { + "epoch": 0.8333108125015639, + "grad_norm": 1.135787010192871, + "learning_rate": 7.110184522181107e-07, + "loss": 0.3132, + "step": 41627 + }, + { + "epoch": 0.8333308310186923, + "grad_norm": 1.071975588798523, + "learning_rate": 7.108518344832138e-07, + "loss": 0.3024, + "step": 41628 + }, + { + "epoch": 0.8333508495358206, + "grad_norm": 1.1727807521820068, + "learning_rate": 7.106852347789711e-07, + "loss": 0.3087, + "step": 41629 + }, + { + "epoch": 0.833370868052949, + "grad_norm": 1.0473088026046753, + "learning_rate": 7.105186531060865e-07, + "loss": 0.323, + "step": 41630 + }, + { + "epoch": 0.8333908865700773, + "grad_norm": 1.04685640335083, + "learning_rate": 7.103520894652577e-07, + "loss": 0.3238, + "step": 41631 + }, + { + "epoch": 0.8334109050872056, + "grad_norm": 1.132215976715088, + "learning_rate": 7.101855438571847e-07, + "loss": 0.32, + "step": 41632 + }, + { + "epoch": 0.833430923604334, + "grad_norm": 1.1544750928878784, + "learning_rate": 7.100190162825699e-07, + "loss": 0.2905, + "step": 41633 + }, + { + "epoch": 0.8334509421214623, + "grad_norm": 1.171453833580017, + "learning_rate": 7.098525067421103e-07, + "loss": 0.2865, + "step": 41634 + }, + { + "epoch": 0.8334709606385907, + "grad_norm": 1.867227554321289, + "learning_rate": 7.096860152365087e-07, + "loss": 0.788, + "step": 41635 + }, + { + "epoch": 0.833490979155719, + "grad_norm": 1.1993827819824219, + "learning_rate": 7.095195417664641e-07, + "loss": 0.2941, + "step": 41636 + }, + { + "epoch": 0.8335109976728474, + "grad_norm": 1.2324979305267334, + "learning_rate": 7.093530863326759e-07, + "loss": 0.3095, + "step": 41637 + }, + { + "epoch": 0.8335310161899757, + "grad_norm": 1.0225807428359985, + "learning_rate": 7.091866489358428e-07, + "loss": 0.2728, + "step": 41638 + }, + { + "epoch": 0.8335510347071041, + "grad_norm": 1.2462552785873413, + "learning_rate": 7.090202295766668e-07, + "loss": 0.3427, + "step": 41639 + }, + { + "epoch": 0.8335710532242324, + "grad_norm": 1.1917644739151, + "learning_rate": 7.08853828255846e-07, + "loss": 0.2905, + "step": 41640 + }, + { + "epoch": 0.8335910717413607, + "grad_norm": 1.1966365575790405, + "learning_rate": 7.086874449740804e-07, + "loss": 0.3482, + "step": 41641 + }, + { + "epoch": 0.8336110902584891, + "grad_norm": 1.3513307571411133, + "learning_rate": 7.085210797320696e-07, + "loss": 0.2896, + "step": 41642 + }, + { + "epoch": 0.8336311087756174, + "grad_norm": 1.204312801361084, + "learning_rate": 7.083547325305112e-07, + "loss": 0.2898, + "step": 41643 + }, + { + "epoch": 0.8336511272927458, + "grad_norm": 1.105241060256958, + "learning_rate": 7.081884033701076e-07, + "loss": 0.3132, + "step": 41644 + }, + { + "epoch": 0.8336711458098741, + "grad_norm": 1.0402793884277344, + "learning_rate": 7.080220922515546e-07, + "loss": 0.2779, + "step": 41645 + }, + { + "epoch": 0.8336911643270025, + "grad_norm": 0.986793041229248, + "learning_rate": 7.078557991755547e-07, + "loss": 0.2339, + "step": 41646 + }, + { + "epoch": 0.8337111828441308, + "grad_norm": 1.151406168937683, + "learning_rate": 7.076895241428039e-07, + "loss": 0.2945, + "step": 41647 + }, + { + "epoch": 0.8337312013612591, + "grad_norm": 1.1990795135498047, + "learning_rate": 7.075232671540038e-07, + "loss": 0.2763, + "step": 41648 + }, + { + "epoch": 0.8337512198783875, + "grad_norm": 1.8308683633804321, + "learning_rate": 7.073570282098519e-07, + "loss": 0.7471, + "step": 41649 + }, + { + "epoch": 0.8337712383955158, + "grad_norm": 1.0394785404205322, + "learning_rate": 7.071908073110478e-07, + "loss": 0.3031, + "step": 41650 + }, + { + "epoch": 0.8337912569126442, + "grad_norm": 1.0676954984664917, + "learning_rate": 7.070246044582879e-07, + "loss": 0.2839, + "step": 41651 + }, + { + "epoch": 0.8338112754297725, + "grad_norm": 1.024696946144104, + "learning_rate": 7.068584196522743e-07, + "loss": 0.2952, + "step": 41652 + }, + { + "epoch": 0.8338312939469009, + "grad_norm": 1.196763038635254, + "learning_rate": 7.066922528937043e-07, + "loss": 0.2745, + "step": 41653 + }, + { + "epoch": 0.8338513124640292, + "grad_norm": 1.3586010932922363, + "learning_rate": 7.065261041832754e-07, + "loss": 0.3381, + "step": 41654 + }, + { + "epoch": 0.8338713309811576, + "grad_norm": 1.1775020360946655, + "learning_rate": 7.063599735216875e-07, + "loss": 0.2852, + "step": 41655 + }, + { + "epoch": 0.8338913494982859, + "grad_norm": 1.1845803260803223, + "learning_rate": 7.061938609096364e-07, + "loss": 0.3161, + "step": 41656 + }, + { + "epoch": 0.8339113680154142, + "grad_norm": 1.1099706888198853, + "learning_rate": 7.060277663478237e-07, + "loss": 0.2657, + "step": 41657 + }, + { + "epoch": 0.8339313865325426, + "grad_norm": 1.2308762073516846, + "learning_rate": 7.058616898369452e-07, + "loss": 0.3424, + "step": 41658 + }, + { + "epoch": 0.8339514050496709, + "grad_norm": 1.1260669231414795, + "learning_rate": 7.056956313777013e-07, + "loss": 0.2727, + "step": 41659 + }, + { + "epoch": 0.8339714235667993, + "grad_norm": 1.128536581993103, + "learning_rate": 7.055295909707877e-07, + "loss": 0.3193, + "step": 41660 + }, + { + "epoch": 0.8339914420839276, + "grad_norm": 1.11117422580719, + "learning_rate": 7.053635686169041e-07, + "loss": 0.3183, + "step": 41661 + }, + { + "epoch": 0.834011460601056, + "grad_norm": 1.2105027437210083, + "learning_rate": 7.051975643167486e-07, + "loss": 0.2976, + "step": 41662 + }, + { + "epoch": 0.8340314791181843, + "grad_norm": 1.1329457759857178, + "learning_rate": 7.050315780710176e-07, + "loss": 0.3213, + "step": 41663 + }, + { + "epoch": 0.8340514976353126, + "grad_norm": 1.205447793006897, + "learning_rate": 7.048656098804102e-07, + "loss": 0.2841, + "step": 41664 + }, + { + "epoch": 0.834071516152441, + "grad_norm": 1.1179677248001099, + "learning_rate": 7.046996597456219e-07, + "loss": 0.2993, + "step": 41665 + }, + { + "epoch": 0.8340915346695693, + "grad_norm": 1.1682991981506348, + "learning_rate": 7.045337276673531e-07, + "loss": 0.3202, + "step": 41666 + }, + { + "epoch": 0.8341115531866977, + "grad_norm": 1.1414451599121094, + "learning_rate": 7.043678136463e-07, + "loss": 0.2719, + "step": 41667 + }, + { + "epoch": 0.834131571703826, + "grad_norm": 1.0119060277938843, + "learning_rate": 7.042019176831605e-07, + "loss": 0.2672, + "step": 41668 + }, + { + "epoch": 0.8341515902209544, + "grad_norm": 1.0278847217559814, + "learning_rate": 7.0403603977863e-07, + "loss": 0.3199, + "step": 41669 + }, + { + "epoch": 0.8341716087380827, + "grad_norm": 1.23222017288208, + "learning_rate": 7.038701799334091e-07, + "loss": 0.2751, + "step": 41670 + }, + { + "epoch": 0.8341916272552111, + "grad_norm": 1.1825190782546997, + "learning_rate": 7.037043381481923e-07, + "loss": 0.2901, + "step": 41671 + }, + { + "epoch": 0.8342116457723394, + "grad_norm": 1.1746245622634888, + "learning_rate": 7.03538514423679e-07, + "loss": 0.2884, + "step": 41672 + }, + { + "epoch": 0.8342316642894677, + "grad_norm": 1.200061321258545, + "learning_rate": 7.033727087605647e-07, + "loss": 0.3188, + "step": 41673 + }, + { + "epoch": 0.8342516828065961, + "grad_norm": 1.0389403104782104, + "learning_rate": 7.032069211595461e-07, + "loss": 0.3004, + "step": 41674 + }, + { + "epoch": 0.8342717013237244, + "grad_norm": 1.137922763824463, + "learning_rate": 7.03041151621322e-07, + "loss": 0.3226, + "step": 41675 + }, + { + "epoch": 0.8342917198408528, + "grad_norm": 1.0602047443389893, + "learning_rate": 7.028754001465882e-07, + "loss": 0.2475, + "step": 41676 + }, + { + "epoch": 0.8343117383579811, + "grad_norm": 1.083957314491272, + "learning_rate": 7.027096667360416e-07, + "loss": 0.3016, + "step": 41677 + }, + { + "epoch": 0.8343317568751095, + "grad_norm": 1.0427296161651611, + "learning_rate": 7.025439513903776e-07, + "loss": 0.2887, + "step": 41678 + }, + { + "epoch": 0.8343517753922378, + "grad_norm": 1.2437653541564941, + "learning_rate": 7.023782541102953e-07, + "loss": 0.2721, + "step": 41679 + }, + { + "epoch": 0.8343717939093661, + "grad_norm": 1.083665370941162, + "learning_rate": 7.022125748964897e-07, + "loss": 0.2603, + "step": 41680 + }, + { + "epoch": 0.8343918124264945, + "grad_norm": 1.1465445756912231, + "learning_rate": 7.020469137496578e-07, + "loss": 0.2807, + "step": 41681 + }, + { + "epoch": 0.8344118309436228, + "grad_norm": 1.113535761833191, + "learning_rate": 7.018812706704942e-07, + "loss": 0.3058, + "step": 41682 + }, + { + "epoch": 0.8344318494607512, + "grad_norm": 0.9976658821105957, + "learning_rate": 7.017156456596979e-07, + "loss": 0.2905, + "step": 41683 + }, + { + "epoch": 0.8344518679778795, + "grad_norm": 1.3499739170074463, + "learning_rate": 7.01550038717963e-07, + "loss": 0.3133, + "step": 41684 + }, + { + "epoch": 0.8344718864950079, + "grad_norm": 1.1261597871780396, + "learning_rate": 7.013844498459882e-07, + "loss": 0.3227, + "step": 41685 + }, + { + "epoch": 0.8344919050121362, + "grad_norm": 1.0673282146453857, + "learning_rate": 7.01218879044468e-07, + "loss": 0.2814, + "step": 41686 + }, + { + "epoch": 0.8345119235292646, + "grad_norm": 1.9591566324234009, + "learning_rate": 7.010533263140967e-07, + "loss": 0.7439, + "step": 41687 + }, + { + "epoch": 0.8345319420463929, + "grad_norm": 1.0491697788238525, + "learning_rate": 7.00887791655574e-07, + "loss": 0.3006, + "step": 41688 + }, + { + "epoch": 0.8345519605635212, + "grad_norm": 1.0146582126617432, + "learning_rate": 7.007222750695935e-07, + "loss": 0.2877, + "step": 41689 + }, + { + "epoch": 0.8345719790806496, + "grad_norm": 1.0508064031600952, + "learning_rate": 7.005567765568517e-07, + "loss": 0.2587, + "step": 41690 + }, + { + "epoch": 0.8345919975977779, + "grad_norm": 1.2335747480392456, + "learning_rate": 7.003912961180426e-07, + "loss": 0.2993, + "step": 41691 + }, + { + "epoch": 0.8346120161149063, + "grad_norm": 1.1930654048919678, + "learning_rate": 7.002258337538642e-07, + "loss": 0.2952, + "step": 41692 + }, + { + "epoch": 0.8346320346320346, + "grad_norm": 1.248374581336975, + "learning_rate": 7.00060389465011e-07, + "loss": 0.299, + "step": 41693 + }, + { + "epoch": 0.834652053149163, + "grad_norm": 1.4254391193389893, + "learning_rate": 6.99894963252179e-07, + "loss": 0.2622, + "step": 41694 + }, + { + "epoch": 0.8346720716662913, + "grad_norm": 1.0949773788452148, + "learning_rate": 6.997295551160615e-07, + "loss": 0.2789, + "step": 41695 + }, + { + "epoch": 0.8346920901834196, + "grad_norm": 1.2462972402572632, + "learning_rate": 6.99564165057357e-07, + "loss": 0.3171, + "step": 41696 + }, + { + "epoch": 0.834712108700548, + "grad_norm": 1.9102988243103027, + "learning_rate": 6.993987930767582e-07, + "loss": 0.7421, + "step": 41697 + }, + { + "epoch": 0.8347321272176763, + "grad_norm": 1.122743844985962, + "learning_rate": 6.992334391749623e-07, + "loss": 0.2678, + "step": 41698 + }, + { + "epoch": 0.8347521457348047, + "grad_norm": 1.1667617559432983, + "learning_rate": 6.990681033526636e-07, + "loss": 0.326, + "step": 41699 + }, + { + "epoch": 0.834772164251933, + "grad_norm": 1.1082795858383179, + "learning_rate": 6.989027856105557e-07, + "loss": 0.2648, + "step": 41700 + }, + { + "epoch": 0.8347921827690614, + "grad_norm": 1.2219599485397339, + "learning_rate": 6.987374859493362e-07, + "loss": 0.2846, + "step": 41701 + }, + { + "epoch": 0.8348122012861897, + "grad_norm": 1.0527899265289307, + "learning_rate": 6.985722043696985e-07, + "loss": 0.2929, + "step": 41702 + }, + { + "epoch": 0.8348322198033181, + "grad_norm": 0.9932075142860413, + "learning_rate": 6.984069408723377e-07, + "loss": 0.2628, + "step": 41703 + }, + { + "epoch": 0.8348522383204464, + "grad_norm": 1.075802206993103, + "learning_rate": 6.982416954579485e-07, + "loss": 0.2708, + "step": 41704 + }, + { + "epoch": 0.8348722568375747, + "grad_norm": 1.0303419828414917, + "learning_rate": 6.980764681272245e-07, + "loss": 0.2904, + "step": 41705 + }, + { + "epoch": 0.8348922753547031, + "grad_norm": 1.0548069477081299, + "learning_rate": 6.979112588808617e-07, + "loss": 0.2876, + "step": 41706 + }, + { + "epoch": 0.8349122938718314, + "grad_norm": 1.1496614217758179, + "learning_rate": 6.977460677195546e-07, + "loss": 0.3086, + "step": 41707 + }, + { + "epoch": 0.8349323123889598, + "grad_norm": 1.0724972486495972, + "learning_rate": 6.97580894643996e-07, + "loss": 0.2654, + "step": 41708 + }, + { + "epoch": 0.8349523309060881, + "grad_norm": 1.9429161548614502, + "learning_rate": 6.974157396548831e-07, + "loss": 0.7305, + "step": 41709 + }, + { + "epoch": 0.8349723494232165, + "grad_norm": 1.1363868713378906, + "learning_rate": 6.972506027529063e-07, + "loss": 0.3049, + "step": 41710 + }, + { + "epoch": 0.8349923679403448, + "grad_norm": 1.2050906419754028, + "learning_rate": 6.970854839387642e-07, + "loss": 0.3199, + "step": 41711 + }, + { + "epoch": 0.8350123864574731, + "grad_norm": 1.1599888801574707, + "learning_rate": 6.969203832131477e-07, + "loss": 0.3342, + "step": 41712 + }, + { + "epoch": 0.8350324049746015, + "grad_norm": 1.0509421825408936, + "learning_rate": 6.967553005767513e-07, + "loss": 0.297, + "step": 41713 + }, + { + "epoch": 0.8350524234917298, + "grad_norm": 1.9009068012237549, + "learning_rate": 6.965902360302706e-07, + "loss": 0.824, + "step": 41714 + }, + { + "epoch": 0.8350724420088582, + "grad_norm": 1.373481035232544, + "learning_rate": 6.96425189574399e-07, + "loss": 0.2863, + "step": 41715 + }, + { + "epoch": 0.8350924605259865, + "grad_norm": 1.0894155502319336, + "learning_rate": 6.962601612098291e-07, + "loss": 0.3021, + "step": 41716 + }, + { + "epoch": 0.8351124790431149, + "grad_norm": 1.157110333442688, + "learning_rate": 6.960951509372555e-07, + "loss": 0.2617, + "step": 41717 + }, + { + "epoch": 0.8351324975602432, + "grad_norm": 1.9580742120742798, + "learning_rate": 6.959301587573703e-07, + "loss": 0.7716, + "step": 41718 + }, + { + "epoch": 0.8351525160773716, + "grad_norm": 1.0435831546783447, + "learning_rate": 6.957651846708697e-07, + "loss": 0.3075, + "step": 41719 + }, + { + "epoch": 0.8351725345944999, + "grad_norm": 1.0530673265457153, + "learning_rate": 6.956002286784463e-07, + "loss": 0.314, + "step": 41720 + }, + { + "epoch": 0.8351925531116282, + "grad_norm": 1.0215766429901123, + "learning_rate": 6.954352907807921e-07, + "loss": 0.261, + "step": 41721 + }, + { + "epoch": 0.8352125716287566, + "grad_norm": 1.1106719970703125, + "learning_rate": 6.952703709786024e-07, + "loss": 0.3132, + "step": 41722 + }, + { + "epoch": 0.8352325901458849, + "grad_norm": 1.113956332206726, + "learning_rate": 6.951054692725684e-07, + "loss": 0.2815, + "step": 41723 + }, + { + "epoch": 0.8352526086630133, + "grad_norm": 1.2067487239837646, + "learning_rate": 6.949405856633862e-07, + "loss": 0.3671, + "step": 41724 + }, + { + "epoch": 0.8352726271801416, + "grad_norm": 1.149041771888733, + "learning_rate": 6.947757201517469e-07, + "loss": 0.369, + "step": 41725 + }, + { + "epoch": 0.83529264569727, + "grad_norm": 1.208918571472168, + "learning_rate": 6.946108727383438e-07, + "loss": 0.2712, + "step": 41726 + }, + { + "epoch": 0.8353126642143983, + "grad_norm": 1.0897612571716309, + "learning_rate": 6.944460434238698e-07, + "loss": 0.2708, + "step": 41727 + }, + { + "epoch": 0.8353326827315266, + "grad_norm": 1.0402623414993286, + "learning_rate": 6.942812322090187e-07, + "loss": 0.2882, + "step": 41728 + }, + { + "epoch": 0.835352701248655, + "grad_norm": 1.1712175607681274, + "learning_rate": 6.941164390944827e-07, + "loss": 0.2895, + "step": 41729 + }, + { + "epoch": 0.8353727197657833, + "grad_norm": 1.100721001625061, + "learning_rate": 6.939516640809541e-07, + "loss": 0.2743, + "step": 41730 + }, + { + "epoch": 0.8353927382829117, + "grad_norm": 1.1636964082717896, + "learning_rate": 6.937869071691256e-07, + "loss": 0.2704, + "step": 41731 + }, + { + "epoch": 0.83541275680004, + "grad_norm": 1.0974801778793335, + "learning_rate": 6.936221683596911e-07, + "loss": 0.2746, + "step": 41732 + }, + { + "epoch": 0.8354327753171684, + "grad_norm": 1.136203408241272, + "learning_rate": 6.934574476533424e-07, + "loss": 0.2576, + "step": 41733 + }, + { + "epoch": 0.8354527938342967, + "grad_norm": 1.0828676223754883, + "learning_rate": 6.932927450507703e-07, + "loss": 0.2997, + "step": 41734 + }, + { + "epoch": 0.8354728123514251, + "grad_norm": 1.100334882736206, + "learning_rate": 6.931280605526703e-07, + "loss": 0.3238, + "step": 41735 + }, + { + "epoch": 0.8354928308685534, + "grad_norm": 1.255719780921936, + "learning_rate": 6.929633941597313e-07, + "loss": 0.3066, + "step": 41736 + }, + { + "epoch": 0.8355128493856817, + "grad_norm": 1.088786244392395, + "learning_rate": 6.927987458726488e-07, + "loss": 0.2562, + "step": 41737 + }, + { + "epoch": 0.8355328679028101, + "grad_norm": 1.1653722524642944, + "learning_rate": 6.926341156921135e-07, + "loss": 0.3251, + "step": 41738 + }, + { + "epoch": 0.8355528864199384, + "grad_norm": 1.4093564748764038, + "learning_rate": 6.924695036188172e-07, + "loss": 0.2973, + "step": 41739 + }, + { + "epoch": 0.8355729049370668, + "grad_norm": 1.1569957733154297, + "learning_rate": 6.923049096534518e-07, + "loss": 0.2802, + "step": 41740 + }, + { + "epoch": 0.8355929234541951, + "grad_norm": 1.8896448612213135, + "learning_rate": 6.921403337967098e-07, + "loss": 0.7453, + "step": 41741 + }, + { + "epoch": 0.8356129419713235, + "grad_norm": 1.03568696975708, + "learning_rate": 6.919757760492829e-07, + "loss": 0.274, + "step": 41742 + }, + { + "epoch": 0.8356329604884518, + "grad_norm": 1.1122221946716309, + "learning_rate": 6.918112364118629e-07, + "loss": 0.3058, + "step": 41743 + }, + { + "epoch": 0.8356529790055801, + "grad_norm": 1.092818021774292, + "learning_rate": 6.916467148851402e-07, + "loss": 0.3185, + "step": 41744 + }, + { + "epoch": 0.8356729975227085, + "grad_norm": 1.085402488708496, + "learning_rate": 6.914822114698089e-07, + "loss": 0.3311, + "step": 41745 + }, + { + "epoch": 0.8356930160398368, + "grad_norm": 1.2066713571548462, + "learning_rate": 6.913177261665587e-07, + "loss": 0.2654, + "step": 41746 + }, + { + "epoch": 0.8357130345569652, + "grad_norm": 1.0056380033493042, + "learning_rate": 6.911532589760805e-07, + "loss": 0.2734, + "step": 41747 + }, + { + "epoch": 0.8357330530740935, + "grad_norm": 1.1336137056350708, + "learning_rate": 6.909888098990681e-07, + "loss": 0.3307, + "step": 41748 + }, + { + "epoch": 0.8357530715912219, + "grad_norm": 1.0762234926223755, + "learning_rate": 6.908243789362101e-07, + "loss": 0.2449, + "step": 41749 + }, + { + "epoch": 0.8357730901083502, + "grad_norm": 1.1008280515670776, + "learning_rate": 6.906599660882007e-07, + "loss": 0.2641, + "step": 41750 + }, + { + "epoch": 0.8357931086254786, + "grad_norm": 1.1157197952270508, + "learning_rate": 6.904955713557288e-07, + "loss": 0.3302, + "step": 41751 + }, + { + "epoch": 0.8358131271426069, + "grad_norm": 0.9834643006324768, + "learning_rate": 6.903311947394864e-07, + "loss": 0.2679, + "step": 41752 + }, + { + "epoch": 0.8358331456597352, + "grad_norm": 1.1175967454910278, + "learning_rate": 6.901668362401631e-07, + "loss": 0.3063, + "step": 41753 + }, + { + "epoch": 0.8358531641768636, + "grad_norm": 1.0977973937988281, + "learning_rate": 6.900024958584517e-07, + "loss": 0.3, + "step": 41754 + }, + { + "epoch": 0.8358731826939919, + "grad_norm": 1.1020221710205078, + "learning_rate": 6.898381735950427e-07, + "loss": 0.2926, + "step": 41755 + }, + { + "epoch": 0.8358932012111203, + "grad_norm": 1.9664661884307861, + "learning_rate": 6.896738694506261e-07, + "loss": 0.7355, + "step": 41756 + }, + { + "epoch": 0.8359132197282486, + "grad_norm": 1.2138549089431763, + "learning_rate": 6.89509583425893e-07, + "loss": 0.2958, + "step": 41757 + }, + { + "epoch": 0.835933238245377, + "grad_norm": 1.204679250717163, + "learning_rate": 6.893453155215335e-07, + "loss": 0.3305, + "step": 41758 + }, + { + "epoch": 0.8359532567625053, + "grad_norm": 1.1892824172973633, + "learning_rate": 6.891810657382392e-07, + "loss": 0.3563, + "step": 41759 + }, + { + "epoch": 0.8359732752796336, + "grad_norm": 1.0866565704345703, + "learning_rate": 6.890168340766989e-07, + "loss": 0.308, + "step": 41760 + }, + { + "epoch": 0.835993293796762, + "grad_norm": 1.7957637310028076, + "learning_rate": 6.888526205376051e-07, + "loss": 0.7423, + "step": 41761 + }, + { + "epoch": 0.8360133123138903, + "grad_norm": 1.7693639993667603, + "learning_rate": 6.886884251216463e-07, + "loss": 0.7557, + "step": 41762 + }, + { + "epoch": 0.8360333308310187, + "grad_norm": 1.0328506231307983, + "learning_rate": 6.885242478295145e-07, + "loss": 0.2761, + "step": 41763 + }, + { + "epoch": 0.836053349348147, + "grad_norm": 1.0844999551773071, + "learning_rate": 6.883600886618991e-07, + "loss": 0.2929, + "step": 41764 + }, + { + "epoch": 0.8360733678652754, + "grad_norm": 1.0185235738754272, + "learning_rate": 6.8819594761949e-07, + "loss": 0.2664, + "step": 41765 + }, + { + "epoch": 0.8360933863824037, + "grad_norm": 1.1084505319595337, + "learning_rate": 6.88031824702976e-07, + "loss": 0.2615, + "step": 41766 + }, + { + "epoch": 0.8361134048995321, + "grad_norm": 1.1419137716293335, + "learning_rate": 6.87867719913049e-07, + "loss": 0.2975, + "step": 41767 + }, + { + "epoch": 0.8361334234166604, + "grad_norm": 1.827390193939209, + "learning_rate": 6.877036332503989e-07, + "loss": 0.8072, + "step": 41768 + }, + { + "epoch": 0.8361534419337887, + "grad_norm": 1.1439485549926758, + "learning_rate": 6.875395647157141e-07, + "loss": 0.2677, + "step": 41769 + }, + { + "epoch": 0.8361734604509171, + "grad_norm": 1.1093604564666748, + "learning_rate": 6.87375514309685e-07, + "loss": 0.3494, + "step": 41770 + }, + { + "epoch": 0.8361934789680454, + "grad_norm": 1.0727980136871338, + "learning_rate": 6.87211482033e-07, + "loss": 0.2558, + "step": 41771 + }, + { + "epoch": 0.8362134974851738, + "grad_norm": 1.1523358821868896, + "learning_rate": 6.870474678863514e-07, + "loss": 0.3582, + "step": 41772 + }, + { + "epoch": 0.8362335160023021, + "grad_norm": 1.0799651145935059, + "learning_rate": 6.868834718704254e-07, + "loss": 0.2192, + "step": 41773 + }, + { + "epoch": 0.8362535345194305, + "grad_norm": 1.8084003925323486, + "learning_rate": 6.867194939859145e-07, + "loss": 0.7307, + "step": 41774 + }, + { + "epoch": 0.8362735530365588, + "grad_norm": 1.1035500764846802, + "learning_rate": 6.865555342335057e-07, + "loss": 0.2788, + "step": 41775 + }, + { + "epoch": 0.8362935715536871, + "grad_norm": 1.150740623474121, + "learning_rate": 6.863915926138898e-07, + "loss": 0.2934, + "step": 41776 + }, + { + "epoch": 0.8363135900708155, + "grad_norm": 1.0368560552597046, + "learning_rate": 6.862276691277558e-07, + "loss": 0.2515, + "step": 41777 + }, + { + "epoch": 0.8363336085879438, + "grad_norm": 1.1447317600250244, + "learning_rate": 6.860637637757922e-07, + "loss": 0.3181, + "step": 41778 + }, + { + "epoch": 0.8363536271050722, + "grad_norm": 1.1031049489974976, + "learning_rate": 6.858998765586877e-07, + "loss": 0.3231, + "step": 41779 + }, + { + "epoch": 0.8363736456222005, + "grad_norm": 1.0646767616271973, + "learning_rate": 6.857360074771313e-07, + "loss": 0.2825, + "step": 41780 + }, + { + "epoch": 0.8363936641393289, + "grad_norm": 1.0474979877471924, + "learning_rate": 6.85572156531813e-07, + "loss": 0.2956, + "step": 41781 + }, + { + "epoch": 0.8364136826564572, + "grad_norm": 1.0642094612121582, + "learning_rate": 6.85408323723421e-07, + "loss": 0.2454, + "step": 41782 + }, + { + "epoch": 0.8364337011735856, + "grad_norm": 1.2206151485443115, + "learning_rate": 6.852445090526439e-07, + "loss": 0.2683, + "step": 41783 + }, + { + "epoch": 0.8364537196907139, + "grad_norm": 1.017760992050171, + "learning_rate": 6.850807125201692e-07, + "loss": 0.3113, + "step": 41784 + }, + { + "epoch": 0.8364737382078422, + "grad_norm": 1.1638822555541992, + "learning_rate": 6.849169341266876e-07, + "loss": 0.2562, + "step": 41785 + }, + { + "epoch": 0.8364937567249706, + "grad_norm": 1.0848500728607178, + "learning_rate": 6.847531738728857e-07, + "loss": 0.3219, + "step": 41786 + }, + { + "epoch": 0.8365137752420989, + "grad_norm": 1.066335916519165, + "learning_rate": 6.845894317594537e-07, + "loss": 0.2858, + "step": 41787 + }, + { + "epoch": 0.8365337937592273, + "grad_norm": 1.0588239431381226, + "learning_rate": 6.844257077870791e-07, + "loss": 0.3179, + "step": 41788 + }, + { + "epoch": 0.8365538122763556, + "grad_norm": 0.9937403798103333, + "learning_rate": 6.842620019564495e-07, + "loss": 0.2942, + "step": 41789 + }, + { + "epoch": 0.836573830793484, + "grad_norm": 1.1603583097457886, + "learning_rate": 6.840983142682539e-07, + "loss": 0.2995, + "step": 41790 + }, + { + "epoch": 0.8365938493106123, + "grad_norm": 1.1244064569473267, + "learning_rate": 6.83934644723181e-07, + "loss": 0.3111, + "step": 41791 + }, + { + "epoch": 0.8366138678277406, + "grad_norm": 1.2391765117645264, + "learning_rate": 6.837709933219178e-07, + "loss": 0.2809, + "step": 41792 + }, + { + "epoch": 0.836633886344869, + "grad_norm": 0.9857776761054993, + "learning_rate": 6.83607360065151e-07, + "loss": 0.2707, + "step": 41793 + }, + { + "epoch": 0.8366539048619973, + "grad_norm": 1.0561134815216064, + "learning_rate": 6.834437449535713e-07, + "loss": 0.3111, + "step": 41794 + }, + { + "epoch": 0.8366739233791257, + "grad_norm": 1.1714390516281128, + "learning_rate": 6.832801479878654e-07, + "loss": 0.2852, + "step": 41795 + }, + { + "epoch": 0.836693941896254, + "grad_norm": 1.0381282567977905, + "learning_rate": 6.831165691687202e-07, + "loss": 0.2964, + "step": 41796 + }, + { + "epoch": 0.8367139604133824, + "grad_norm": 1.12740159034729, + "learning_rate": 6.829530084968233e-07, + "loss": 0.2795, + "step": 41797 + }, + { + "epoch": 0.8367339789305107, + "grad_norm": 1.7294508218765259, + "learning_rate": 6.82789465972864e-07, + "loss": 0.7989, + "step": 41798 + }, + { + "epoch": 0.8367539974476391, + "grad_norm": 1.807724952697754, + "learning_rate": 6.826259415975272e-07, + "loss": 0.7585, + "step": 41799 + }, + { + "epoch": 0.8367740159647674, + "grad_norm": 1.1026802062988281, + "learning_rate": 6.824624353715037e-07, + "loss": 0.2781, + "step": 41800 + }, + { + "epoch": 0.8367940344818957, + "grad_norm": 1.2012133598327637, + "learning_rate": 6.822989472954783e-07, + "loss": 0.3041, + "step": 41801 + }, + { + "epoch": 0.8368140529990241, + "grad_norm": 1.1097314357757568, + "learning_rate": 6.821354773701383e-07, + "loss": 0.3175, + "step": 41802 + }, + { + "epoch": 0.8368340715161524, + "grad_norm": 1.1639453172683716, + "learning_rate": 6.81972025596172e-07, + "loss": 0.2782, + "step": 41803 + }, + { + "epoch": 0.8368540900332808, + "grad_norm": 1.155455470085144, + "learning_rate": 6.818085919742667e-07, + "loss": 0.3151, + "step": 41804 + }, + { + "epoch": 0.8368741085504091, + "grad_norm": 1.3350549936294556, + "learning_rate": 6.816451765051086e-07, + "loss": 0.3435, + "step": 41805 + }, + { + "epoch": 0.8368941270675375, + "grad_norm": 1.2108230590820312, + "learning_rate": 6.814817791893835e-07, + "loss": 0.3028, + "step": 41806 + }, + { + "epoch": 0.8369141455846658, + "grad_norm": 1.1391531229019165, + "learning_rate": 6.813184000277812e-07, + "loss": 0.3238, + "step": 41807 + }, + { + "epoch": 0.8369341641017941, + "grad_norm": 1.1999099254608154, + "learning_rate": 6.811550390209864e-07, + "loss": 0.2959, + "step": 41808 + }, + { + "epoch": 0.8369541826189225, + "grad_norm": 1.0852084159851074, + "learning_rate": 6.809916961696871e-07, + "loss": 0.3205, + "step": 41809 + }, + { + "epoch": 0.8369742011360508, + "grad_norm": 1.0846011638641357, + "learning_rate": 6.808283714745678e-07, + "loss": 0.2946, + "step": 41810 + }, + { + "epoch": 0.8369942196531792, + "grad_norm": 1.9510009288787842, + "learning_rate": 6.806650649363172e-07, + "loss": 0.7398, + "step": 41811 + }, + { + "epoch": 0.8370142381703075, + "grad_norm": 1.0388505458831787, + "learning_rate": 6.805017765556204e-07, + "loss": 0.2844, + "step": 41812 + }, + { + "epoch": 0.8370342566874359, + "grad_norm": 1.0949214696884155, + "learning_rate": 6.803385063331658e-07, + "loss": 0.2897, + "step": 41813 + }, + { + "epoch": 0.8370542752045642, + "grad_norm": 1.0845391750335693, + "learning_rate": 6.801752542696388e-07, + "loss": 0.3005, + "step": 41814 + }, + { + "epoch": 0.8370742937216926, + "grad_norm": 1.1109055280685425, + "learning_rate": 6.800120203657234e-07, + "loss": 0.3077, + "step": 41815 + }, + { + "epoch": 0.8370943122388209, + "grad_norm": 1.0726535320281982, + "learning_rate": 6.798488046221096e-07, + "loss": 0.3032, + "step": 41816 + }, + { + "epoch": 0.8371143307559492, + "grad_norm": 1.0949674844741821, + "learning_rate": 6.796856070394814e-07, + "loss": 0.2937, + "step": 41817 + }, + { + "epoch": 0.8371343492730776, + "grad_norm": 1.0762312412261963, + "learning_rate": 6.795224276185252e-07, + "loss": 0.2815, + "step": 41818 + }, + { + "epoch": 0.8371543677902059, + "grad_norm": 1.9378687143325806, + "learning_rate": 6.793592663599274e-07, + "loss": 0.6998, + "step": 41819 + }, + { + "epoch": 0.8371743863073343, + "grad_norm": 1.960707664489746, + "learning_rate": 6.791961232643718e-07, + "loss": 0.8045, + "step": 41820 + }, + { + "epoch": 0.8371944048244626, + "grad_norm": 1.1713372468948364, + "learning_rate": 6.790329983325472e-07, + "loss": 0.3007, + "step": 41821 + }, + { + "epoch": 0.837214423341591, + "grad_norm": 1.030125379562378, + "learning_rate": 6.788698915651382e-07, + "loss": 0.2662, + "step": 41822 + }, + { + "epoch": 0.8372344418587193, + "grad_norm": 1.0368740558624268, + "learning_rate": 6.787068029628286e-07, + "loss": 0.2666, + "step": 41823 + }, + { + "epoch": 0.8372544603758476, + "grad_norm": 1.092093586921692, + "learning_rate": 6.785437325263066e-07, + "loss": 0.3111, + "step": 41824 + }, + { + "epoch": 0.837274478892976, + "grad_norm": 1.1886132955551147, + "learning_rate": 6.783806802562565e-07, + "loss": 0.3148, + "step": 41825 + }, + { + "epoch": 0.8372944974101043, + "grad_norm": 1.0299890041351318, + "learning_rate": 6.782176461533646e-07, + "loss": 0.314, + "step": 41826 + }, + { + "epoch": 0.8373145159272327, + "grad_norm": 0.9953134059906006, + "learning_rate": 6.780546302183155e-07, + "loss": 0.2949, + "step": 41827 + }, + { + "epoch": 0.837334534444361, + "grad_norm": 0.941887378692627, + "learning_rate": 6.778916324517936e-07, + "loss": 0.2273, + "step": 41828 + }, + { + "epoch": 0.8373545529614894, + "grad_norm": 1.0040814876556396, + "learning_rate": 6.777286528544869e-07, + "loss": 0.3023, + "step": 41829 + }, + { + "epoch": 0.8373745714786177, + "grad_norm": 1.144906997680664, + "learning_rate": 6.775656914270784e-07, + "loss": 0.2875, + "step": 41830 + }, + { + "epoch": 0.8373945899957461, + "grad_norm": 1.18894362449646, + "learning_rate": 6.774027481702533e-07, + "loss": 0.2928, + "step": 41831 + }, + { + "epoch": 0.8374146085128744, + "grad_norm": 1.098557949066162, + "learning_rate": 6.772398230846966e-07, + "loss": 0.2873, + "step": 41832 + }, + { + "epoch": 0.8374346270300027, + "grad_norm": 1.436842918395996, + "learning_rate": 6.770769161710928e-07, + "loss": 0.2784, + "step": 41833 + }, + { + "epoch": 0.8374546455471311, + "grad_norm": 1.1557339429855347, + "learning_rate": 6.769140274301284e-07, + "loss": 0.3052, + "step": 41834 + }, + { + "epoch": 0.8374746640642594, + "grad_norm": 1.0576622486114502, + "learning_rate": 6.767511568624868e-07, + "loss": 0.2813, + "step": 41835 + }, + { + "epoch": 0.8374946825813878, + "grad_norm": 1.2170202732086182, + "learning_rate": 6.76588304468852e-07, + "loss": 0.3077, + "step": 41836 + }, + { + "epoch": 0.8375147010985161, + "grad_norm": 1.3540467023849487, + "learning_rate": 6.764254702499107e-07, + "loss": 0.2628, + "step": 41837 + }, + { + "epoch": 0.8375347196156445, + "grad_norm": 1.2033021450042725, + "learning_rate": 6.762626542063455e-07, + "loss": 0.2747, + "step": 41838 + }, + { + "epoch": 0.8375547381327728, + "grad_norm": 1.1134123802185059, + "learning_rate": 6.760998563388422e-07, + "loss": 0.2634, + "step": 41839 + }, + { + "epoch": 0.8375747566499011, + "grad_norm": 1.1590648889541626, + "learning_rate": 6.759370766480849e-07, + "loss": 0.3032, + "step": 41840 + }, + { + "epoch": 0.8375947751670295, + "grad_norm": 0.9940987825393677, + "learning_rate": 6.757743151347573e-07, + "loss": 0.2635, + "step": 41841 + }, + { + "epoch": 0.8376147936841578, + "grad_norm": 1.1222715377807617, + "learning_rate": 6.756115717995432e-07, + "loss": 0.2914, + "step": 41842 + }, + { + "epoch": 0.8376348122012862, + "grad_norm": 1.1148873567581177, + "learning_rate": 6.754488466431281e-07, + "loss": 0.321, + "step": 41843 + }, + { + "epoch": 0.8376548307184145, + "grad_norm": 1.1113896369934082, + "learning_rate": 6.752861396661958e-07, + "loss": 0.2642, + "step": 41844 + }, + { + "epoch": 0.8376748492355429, + "grad_norm": 1.1713393926620483, + "learning_rate": 6.751234508694299e-07, + "loss": 0.3225, + "step": 41845 + }, + { + "epoch": 0.8376948677526712, + "grad_norm": 1.1614024639129639, + "learning_rate": 6.74960780253513e-07, + "loss": 0.2642, + "step": 41846 + }, + { + "epoch": 0.8377148862697996, + "grad_norm": 1.1146756410598755, + "learning_rate": 6.747981278191312e-07, + "loss": 0.3221, + "step": 41847 + }, + { + "epoch": 0.8377349047869279, + "grad_norm": 1.8940961360931396, + "learning_rate": 6.746354935669674e-07, + "loss": 0.7638, + "step": 41848 + }, + { + "epoch": 0.8377549233040562, + "grad_norm": 1.0965039730072021, + "learning_rate": 6.744728774977039e-07, + "loss": 0.304, + "step": 41849 + }, + { + "epoch": 0.8377749418211846, + "grad_norm": 0.9972425699234009, + "learning_rate": 6.743102796120266e-07, + "loss": 0.2991, + "step": 41850 + }, + { + "epoch": 0.8377949603383129, + "grad_norm": 1.1877835988998413, + "learning_rate": 6.741476999106172e-07, + "loss": 0.3537, + "step": 41851 + }, + { + "epoch": 0.8378149788554413, + "grad_norm": 1.2498581409454346, + "learning_rate": 6.739851383941609e-07, + "loss": 0.2536, + "step": 41852 + }, + { + "epoch": 0.8378349973725696, + "grad_norm": 1.241546392440796, + "learning_rate": 6.738225950633404e-07, + "loss": 0.3376, + "step": 41853 + }, + { + "epoch": 0.837855015889698, + "grad_norm": 1.083696961402893, + "learning_rate": 6.736600699188384e-07, + "loss": 0.2565, + "step": 41854 + }, + { + "epoch": 0.8378750344068263, + "grad_norm": 1.280792474746704, + "learning_rate": 6.734975629613371e-07, + "loss": 0.3134, + "step": 41855 + }, + { + "epoch": 0.8378950529239546, + "grad_norm": 1.15378737449646, + "learning_rate": 6.733350741915223e-07, + "loss": 0.2748, + "step": 41856 + }, + { + "epoch": 0.837915071441083, + "grad_norm": 1.1735563278198242, + "learning_rate": 6.731726036100761e-07, + "loss": 0.2385, + "step": 41857 + }, + { + "epoch": 0.8379350899582113, + "grad_norm": 1.4195573329925537, + "learning_rate": 6.730101512176806e-07, + "loss": 0.324, + "step": 41858 + }, + { + "epoch": 0.8379551084753397, + "grad_norm": 1.0633608102798462, + "learning_rate": 6.728477170150182e-07, + "loss": 0.318, + "step": 41859 + }, + { + "epoch": 0.837975126992468, + "grad_norm": 1.0455056428909302, + "learning_rate": 6.72685301002774e-07, + "loss": 0.2633, + "step": 41860 + }, + { + "epoch": 0.8379951455095964, + "grad_norm": 1.2238229513168335, + "learning_rate": 6.725229031816299e-07, + "loss": 0.3168, + "step": 41861 + }, + { + "epoch": 0.8380151640267247, + "grad_norm": 0.963043212890625, + "learning_rate": 6.723605235522668e-07, + "loss": 0.2288, + "step": 41862 + }, + { + "epoch": 0.8380351825438531, + "grad_norm": 1.450020432472229, + "learning_rate": 6.721981621153695e-07, + "loss": 0.32, + "step": 41863 + }, + { + "epoch": 0.8380552010609814, + "grad_norm": 1.6524850130081177, + "learning_rate": 6.720358188716191e-07, + "loss": 0.6414, + "step": 41864 + }, + { + "epoch": 0.8380752195781097, + "grad_norm": 1.7619271278381348, + "learning_rate": 6.718734938216997e-07, + "loss": 0.7364, + "step": 41865 + }, + { + "epoch": 0.8380952380952381, + "grad_norm": 1.1623759269714355, + "learning_rate": 6.717111869662928e-07, + "loss": 0.2792, + "step": 41866 + }, + { + "epoch": 0.8381152566123664, + "grad_norm": 1.0486111640930176, + "learning_rate": 6.715488983060808e-07, + "loss": 0.2556, + "step": 41867 + }, + { + "epoch": 0.8381352751294948, + "grad_norm": 1.1657565832138062, + "learning_rate": 6.713866278417441e-07, + "loss": 0.2947, + "step": 41868 + }, + { + "epoch": 0.8381552936466231, + "grad_norm": 1.1554851531982422, + "learning_rate": 6.712243755739678e-07, + "loss": 0.2811, + "step": 41869 + }, + { + "epoch": 0.8381753121637515, + "grad_norm": 1.2200247049331665, + "learning_rate": 6.71062141503433e-07, + "loss": 0.2977, + "step": 41870 + }, + { + "epoch": 0.8381953306808798, + "grad_norm": 1.1565773487091064, + "learning_rate": 6.708999256308213e-07, + "loss": 0.3024, + "step": 41871 + }, + { + "epoch": 0.8382153491980081, + "grad_norm": 1.3839970827102661, + "learning_rate": 6.707377279568145e-07, + "loss": 0.2813, + "step": 41872 + }, + { + "epoch": 0.8382353677151365, + "grad_norm": 2.011411666870117, + "learning_rate": 6.705755484820936e-07, + "loss": 0.7528, + "step": 41873 + }, + { + "epoch": 0.8382553862322648, + "grad_norm": 1.8614501953125, + "learning_rate": 6.704133872073421e-07, + "loss": 0.7534, + "step": 41874 + }, + { + "epoch": 0.8382754047493932, + "grad_norm": 1.8493797779083252, + "learning_rate": 6.702512441332404e-07, + "loss": 0.7459, + "step": 41875 + }, + { + "epoch": 0.8382954232665215, + "grad_norm": 1.157227635383606, + "learning_rate": 6.700891192604714e-07, + "loss": 0.2282, + "step": 41876 + }, + { + "epoch": 0.8383154417836499, + "grad_norm": 1.1325567960739136, + "learning_rate": 6.69927012589715e-07, + "loss": 0.2915, + "step": 41877 + }, + { + "epoch": 0.8383354603007782, + "grad_norm": 1.1768563985824585, + "learning_rate": 6.697649241216547e-07, + "loss": 0.2999, + "step": 41878 + }, + { + "epoch": 0.8383554788179066, + "grad_norm": 1.0761511325836182, + "learning_rate": 6.69602853856971e-07, + "loss": 0.3017, + "step": 41879 + }, + { + "epoch": 0.8383754973350349, + "grad_norm": 1.2672752141952515, + "learning_rate": 6.69440801796345e-07, + "loss": 0.2938, + "step": 41880 + }, + { + "epoch": 0.8383955158521632, + "grad_norm": 1.1954033374786377, + "learning_rate": 6.692787679404572e-07, + "loss": 0.3127, + "step": 41881 + }, + { + "epoch": 0.8384155343692916, + "grad_norm": 1.011738896369934, + "learning_rate": 6.691167522899894e-07, + "loss": 0.263, + "step": 41882 + }, + { + "epoch": 0.8384355528864199, + "grad_norm": 1.1263501644134521, + "learning_rate": 6.689547548456232e-07, + "loss": 0.3038, + "step": 41883 + }, + { + "epoch": 0.8384555714035483, + "grad_norm": 1.070959448814392, + "learning_rate": 6.687927756080392e-07, + "loss": 0.2794, + "step": 41884 + }, + { + "epoch": 0.8384755899206766, + "grad_norm": 1.0433237552642822, + "learning_rate": 6.686308145779185e-07, + "loss": 0.3049, + "step": 41885 + }, + { + "epoch": 0.838495608437805, + "grad_norm": 1.1036198139190674, + "learning_rate": 6.684688717559407e-07, + "loss": 0.3226, + "step": 41886 + }, + { + "epoch": 0.8385156269549333, + "grad_norm": 1.137839674949646, + "learning_rate": 6.683069471427883e-07, + "loss": 0.3311, + "step": 41887 + }, + { + "epoch": 0.8385356454720616, + "grad_norm": 1.0843398571014404, + "learning_rate": 6.681450407391404e-07, + "loss": 0.3083, + "step": 41888 + }, + { + "epoch": 0.83855566398919, + "grad_norm": 1.1124963760375977, + "learning_rate": 6.679831525456798e-07, + "loss": 0.309, + "step": 41889 + }, + { + "epoch": 0.8385756825063183, + "grad_norm": 0.9798847436904907, + "learning_rate": 6.678212825630842e-07, + "loss": 0.2909, + "step": 41890 + }, + { + "epoch": 0.8385957010234467, + "grad_norm": 1.1611257791519165, + "learning_rate": 6.676594307920375e-07, + "loss": 0.2876, + "step": 41891 + }, + { + "epoch": 0.838615719540575, + "grad_norm": 1.3083521127700806, + "learning_rate": 6.674975972332176e-07, + "loss": 0.2912, + "step": 41892 + }, + { + "epoch": 0.8386357380577034, + "grad_norm": 1.152471899986267, + "learning_rate": 6.673357818873055e-07, + "loss": 0.2884, + "step": 41893 + }, + { + "epoch": 0.8386557565748317, + "grad_norm": 1.0896984338760376, + "learning_rate": 6.671739847549819e-07, + "loss": 0.2627, + "step": 41894 + }, + { + "epoch": 0.8386757750919601, + "grad_norm": 1.0368895530700684, + "learning_rate": 6.670122058369249e-07, + "loss": 0.2863, + "step": 41895 + }, + { + "epoch": 0.8386957936090884, + "grad_norm": 1.0841304063796997, + "learning_rate": 6.668504451338171e-07, + "loss": 0.3143, + "step": 41896 + }, + { + "epoch": 0.8387158121262167, + "grad_norm": 1.0124510526657104, + "learning_rate": 6.666887026463376e-07, + "loss": 0.2491, + "step": 41897 + }, + { + "epoch": 0.8387358306433451, + "grad_norm": 1.0398794412612915, + "learning_rate": 6.665269783751666e-07, + "loss": 0.2603, + "step": 41898 + }, + { + "epoch": 0.8387558491604734, + "grad_norm": 1.2044105529785156, + "learning_rate": 6.663652723209818e-07, + "loss": 0.3327, + "step": 41899 + }, + { + "epoch": 0.8387758676776018, + "grad_norm": 1.0323055982589722, + "learning_rate": 6.662035844844667e-07, + "loss": 0.288, + "step": 41900 + }, + { + "epoch": 0.8387958861947301, + "grad_norm": 1.0583446025848389, + "learning_rate": 6.660419148662972e-07, + "loss": 0.2807, + "step": 41901 + }, + { + "epoch": 0.8388159047118585, + "grad_norm": 2.0014185905456543, + "learning_rate": 6.658802634671563e-07, + "loss": 0.7169, + "step": 41902 + }, + { + "epoch": 0.8388359232289868, + "grad_norm": 1.3473198413848877, + "learning_rate": 6.657186302877222e-07, + "loss": 0.3267, + "step": 41903 + }, + { + "epoch": 0.8388559417461151, + "grad_norm": 1.1870150566101074, + "learning_rate": 6.655570153286733e-07, + "loss": 0.3042, + "step": 41904 + }, + { + "epoch": 0.8388759602632435, + "grad_norm": 1.1170740127563477, + "learning_rate": 6.653954185906908e-07, + "loss": 0.3359, + "step": 41905 + }, + { + "epoch": 0.8388959787803718, + "grad_norm": 1.1090296506881714, + "learning_rate": 6.652338400744529e-07, + "loss": 0.2848, + "step": 41906 + }, + { + "epoch": 0.8389159972975002, + "grad_norm": 1.1180005073547363, + "learning_rate": 6.650722797806391e-07, + "loss": 0.2783, + "step": 41907 + }, + { + "epoch": 0.8389360158146285, + "grad_norm": 1.187146544456482, + "learning_rate": 6.649107377099278e-07, + "loss": 0.2786, + "step": 41908 + }, + { + "epoch": 0.8389560343317569, + "grad_norm": 1.218608021736145, + "learning_rate": 6.647492138629996e-07, + "loss": 0.3, + "step": 41909 + }, + { + "epoch": 0.8389760528488852, + "grad_norm": 1.131745457649231, + "learning_rate": 6.645877082405327e-07, + "loss": 0.3499, + "step": 41910 + }, + { + "epoch": 0.8389960713660135, + "grad_norm": 1.2521860599517822, + "learning_rate": 6.644262208432068e-07, + "loss": 0.326, + "step": 41911 + }, + { + "epoch": 0.8390160898831419, + "grad_norm": 1.979816198348999, + "learning_rate": 6.642647516716983e-07, + "loss": 0.768, + "step": 41912 + }, + { + "epoch": 0.8390361084002702, + "grad_norm": 1.1129562854766846, + "learning_rate": 6.64103300726689e-07, + "loss": 0.3194, + "step": 41913 + }, + { + "epoch": 0.8390561269173986, + "grad_norm": 1.154744267463684, + "learning_rate": 6.639418680088549e-07, + "loss": 0.2488, + "step": 41914 + }, + { + "epoch": 0.8390761454345269, + "grad_norm": 1.087944746017456, + "learning_rate": 6.637804535188774e-07, + "loss": 0.3234, + "step": 41915 + }, + { + "epoch": 0.8390961639516553, + "grad_norm": 1.0766953229904175, + "learning_rate": 6.636190572574341e-07, + "loss": 0.2966, + "step": 41916 + }, + { + "epoch": 0.8391161824687836, + "grad_norm": 1.8543187379837036, + "learning_rate": 6.634576792252018e-07, + "loss": 0.761, + "step": 41917 + }, + { + "epoch": 0.839136200985912, + "grad_norm": 1.1011366844177246, + "learning_rate": 6.632963194228609e-07, + "loss": 0.2964, + "step": 41918 + }, + { + "epoch": 0.8391562195030403, + "grad_norm": 1.1383873224258423, + "learning_rate": 6.631349778510893e-07, + "loss": 0.3401, + "step": 41919 + }, + { + "epoch": 0.8391762380201686, + "grad_norm": 1.1167534589767456, + "learning_rate": 6.629736545105648e-07, + "loss": 0.3249, + "step": 41920 + }, + { + "epoch": 0.839196256537297, + "grad_norm": 1.2470811605453491, + "learning_rate": 6.628123494019645e-07, + "loss": 0.3182, + "step": 41921 + }, + { + "epoch": 0.8392162750544253, + "grad_norm": 1.077439546585083, + "learning_rate": 6.626510625259691e-07, + "loss": 0.32, + "step": 41922 + }, + { + "epoch": 0.8392362935715537, + "grad_norm": 1.1878491640090942, + "learning_rate": 6.624897938832553e-07, + "loss": 0.3331, + "step": 41923 + }, + { + "epoch": 0.839256312088682, + "grad_norm": 1.1736106872558594, + "learning_rate": 6.623285434745009e-07, + "loss": 0.2618, + "step": 41924 + }, + { + "epoch": 0.8392763306058104, + "grad_norm": 0.9972468018531799, + "learning_rate": 6.621673113003835e-07, + "loss": 0.2602, + "step": 41925 + }, + { + "epoch": 0.8392963491229387, + "grad_norm": 1.1889398097991943, + "learning_rate": 6.620060973615799e-07, + "loss": 0.3369, + "step": 41926 + }, + { + "epoch": 0.839316367640067, + "grad_norm": 1.1049357652664185, + "learning_rate": 6.618449016587697e-07, + "loss": 0.3288, + "step": 41927 + }, + { + "epoch": 0.8393363861571954, + "grad_norm": 1.0658161640167236, + "learning_rate": 6.616837241926311e-07, + "loss": 0.3083, + "step": 41928 + }, + { + "epoch": 0.8393564046743237, + "grad_norm": 1.242735743522644, + "learning_rate": 6.615225649638401e-07, + "loss": 0.3161, + "step": 41929 + }, + { + "epoch": 0.8393764231914521, + "grad_norm": 1.1932320594787598, + "learning_rate": 6.613614239730736e-07, + "loss": 0.2976, + "step": 41930 + }, + { + "epoch": 0.8393964417085804, + "grad_norm": 1.2168736457824707, + "learning_rate": 6.612003012210116e-07, + "loss": 0.3004, + "step": 41931 + }, + { + "epoch": 0.8394164602257088, + "grad_norm": 1.043498158454895, + "learning_rate": 6.610391967083291e-07, + "loss": 0.273, + "step": 41932 + }, + { + "epoch": 0.8394364787428371, + "grad_norm": 1.2741892337799072, + "learning_rate": 6.608781104357042e-07, + "loss": 0.2996, + "step": 41933 + }, + { + "epoch": 0.8394564972599655, + "grad_norm": 1.1544557809829712, + "learning_rate": 6.607170424038146e-07, + "loss": 0.3134, + "step": 41934 + }, + { + "epoch": 0.8394765157770938, + "grad_norm": 1.3879778385162354, + "learning_rate": 6.605559926133348e-07, + "loss": 0.2835, + "step": 41935 + }, + { + "epoch": 0.8394965342942221, + "grad_norm": 1.1049680709838867, + "learning_rate": 6.603949610649452e-07, + "loss": 0.2741, + "step": 41936 + }, + { + "epoch": 0.8395165528113505, + "grad_norm": 1.8643404245376587, + "learning_rate": 6.602339477593212e-07, + "loss": 0.7903, + "step": 41937 + }, + { + "epoch": 0.8395365713284788, + "grad_norm": 1.2818374633789062, + "learning_rate": 6.6007295269714e-07, + "loss": 0.3157, + "step": 41938 + }, + { + "epoch": 0.8395565898456072, + "grad_norm": 1.0802218914031982, + "learning_rate": 6.599119758790768e-07, + "loss": 0.2831, + "step": 41939 + }, + { + "epoch": 0.8395766083627355, + "grad_norm": 1.2509715557098389, + "learning_rate": 6.597510173058097e-07, + "loss": 0.2809, + "step": 41940 + }, + { + "epoch": 0.8395966268798639, + "grad_norm": 1.1375415325164795, + "learning_rate": 6.595900769780161e-07, + "loss": 0.3124, + "step": 41941 + }, + { + "epoch": 0.8396166453969922, + "grad_norm": 1.0827994346618652, + "learning_rate": 6.594291548963722e-07, + "loss": 0.3202, + "step": 41942 + }, + { + "epoch": 0.8396366639141205, + "grad_norm": 1.0599170923233032, + "learning_rate": 6.592682510615528e-07, + "loss": 0.2872, + "step": 41943 + }, + { + "epoch": 0.8396566824312489, + "grad_norm": 1.043015480041504, + "learning_rate": 6.591073654742369e-07, + "loss": 0.3175, + "step": 41944 + }, + { + "epoch": 0.8396767009483772, + "grad_norm": 1.1386804580688477, + "learning_rate": 6.589464981350991e-07, + "loss": 0.2787, + "step": 41945 + }, + { + "epoch": 0.8396967194655056, + "grad_norm": 1.0365709066390991, + "learning_rate": 6.587856490448157e-07, + "loss": 0.2853, + "step": 41946 + }, + { + "epoch": 0.8397167379826339, + "grad_norm": 1.2824523448944092, + "learning_rate": 6.586248182040639e-07, + "loss": 0.3253, + "step": 41947 + }, + { + "epoch": 0.8397367564997623, + "grad_norm": 1.917974829673767, + "learning_rate": 6.584640056135177e-07, + "loss": 0.6876, + "step": 41948 + }, + { + "epoch": 0.8397567750168906, + "grad_norm": 1.129872441291809, + "learning_rate": 6.583032112738557e-07, + "loss": 0.2745, + "step": 41949 + }, + { + "epoch": 0.839776793534019, + "grad_norm": 1.1276386976242065, + "learning_rate": 6.581424351857524e-07, + "loss": 0.2942, + "step": 41950 + }, + { + "epoch": 0.8397968120511473, + "grad_norm": 1.13917875289917, + "learning_rate": 6.579816773498842e-07, + "loss": 0.2506, + "step": 41951 + }, + { + "epoch": 0.8398168305682756, + "grad_norm": 1.2114779949188232, + "learning_rate": 6.578209377669253e-07, + "loss": 0.3091, + "step": 41952 + }, + { + "epoch": 0.839836849085404, + "grad_norm": 1.1804053783416748, + "learning_rate": 6.576602164375528e-07, + "loss": 0.3428, + "step": 41953 + }, + { + "epoch": 0.8398568676025323, + "grad_norm": 1.09047269821167, + "learning_rate": 6.574995133624435e-07, + "loss": 0.2368, + "step": 41954 + }, + { + "epoch": 0.8398768861196607, + "grad_norm": 1.820162296295166, + "learning_rate": 6.573388285422716e-07, + "loss": 0.7737, + "step": 41955 + }, + { + "epoch": 0.839896904636789, + "grad_norm": 1.0499707460403442, + "learning_rate": 6.571781619777129e-07, + "loss": 0.2824, + "step": 41956 + }, + { + "epoch": 0.8399169231539174, + "grad_norm": 1.0953336954116821, + "learning_rate": 6.570175136694412e-07, + "loss": 0.3245, + "step": 41957 + }, + { + "epoch": 0.8399369416710457, + "grad_norm": 1.9973353147506714, + "learning_rate": 6.568568836181344e-07, + "loss": 0.7468, + "step": 41958 + }, + { + "epoch": 0.839956960188174, + "grad_norm": 1.1544373035430908, + "learning_rate": 6.566962718244668e-07, + "loss": 0.3192, + "step": 41959 + }, + { + "epoch": 0.8399769787053024, + "grad_norm": 1.3595452308654785, + "learning_rate": 6.56535678289113e-07, + "loss": 0.3126, + "step": 41960 + }, + { + "epoch": 0.8399969972224307, + "grad_norm": 1.1973273754119873, + "learning_rate": 6.56375103012748e-07, + "loss": 0.2873, + "step": 41961 + }, + { + "epoch": 0.8400170157395591, + "grad_norm": 1.2993625402450562, + "learning_rate": 6.562145459960478e-07, + "loss": 0.3133, + "step": 41962 + }, + { + "epoch": 0.8400370342566874, + "grad_norm": 1.186916708946228, + "learning_rate": 6.56054007239687e-07, + "loss": 0.2921, + "step": 41963 + }, + { + "epoch": 0.8400570527738158, + "grad_norm": 1.1341350078582764, + "learning_rate": 6.558934867443406e-07, + "loss": 0.2928, + "step": 41964 + }, + { + "epoch": 0.8400770712909441, + "grad_norm": 1.056979775428772, + "learning_rate": 6.557329845106819e-07, + "loss": 0.2995, + "step": 41965 + }, + { + "epoch": 0.8400970898080725, + "grad_norm": 1.1367789506912231, + "learning_rate": 6.55572500539387e-07, + "loss": 0.3248, + "step": 41966 + }, + { + "epoch": 0.8401171083252008, + "grad_norm": 1.2335318326950073, + "learning_rate": 6.554120348311316e-07, + "loss": 0.3661, + "step": 41967 + }, + { + "epoch": 0.8401371268423291, + "grad_norm": 1.1580885648727417, + "learning_rate": 6.552515873865883e-07, + "loss": 0.3163, + "step": 41968 + }, + { + "epoch": 0.8401571453594575, + "grad_norm": 1.8509727716445923, + "learning_rate": 6.550911582064328e-07, + "loss": 0.7601, + "step": 41969 + }, + { + "epoch": 0.8401771638765858, + "grad_norm": 1.2394485473632812, + "learning_rate": 6.54930747291338e-07, + "loss": 0.3141, + "step": 41970 + }, + { + "epoch": 0.8401971823937142, + "grad_norm": 1.082765817642212, + "learning_rate": 6.547703546419803e-07, + "loss": 0.3077, + "step": 41971 + }, + { + "epoch": 0.8402172009108425, + "grad_norm": 1.1485118865966797, + "learning_rate": 6.546099802590328e-07, + "loss": 0.279, + "step": 41972 + }, + { + "epoch": 0.8402372194279709, + "grad_norm": 1.1846063137054443, + "learning_rate": 6.544496241431702e-07, + "loss": 0.288, + "step": 41973 + }, + { + "epoch": 0.8402572379450992, + "grad_norm": 1.0270955562591553, + "learning_rate": 6.542892862950645e-07, + "loss": 0.2644, + "step": 41974 + }, + { + "epoch": 0.8402772564622275, + "grad_norm": 1.1828558444976807, + "learning_rate": 6.541289667153928e-07, + "loss": 0.286, + "step": 41975 + }, + { + "epoch": 0.8402972749793559, + "grad_norm": 1.2112643718719482, + "learning_rate": 6.539686654048277e-07, + "loss": 0.2728, + "step": 41976 + }, + { + "epoch": 0.8403172934964842, + "grad_norm": 1.1818463802337646, + "learning_rate": 6.538083823640434e-07, + "loss": 0.2776, + "step": 41977 + }, + { + "epoch": 0.8403373120136126, + "grad_norm": 1.025215744972229, + "learning_rate": 6.53648117593712e-07, + "loss": 0.2838, + "step": 41978 + }, + { + "epoch": 0.8403573305307409, + "grad_norm": 1.1025092601776123, + "learning_rate": 6.53487871094508e-07, + "loss": 0.2943, + "step": 41979 + }, + { + "epoch": 0.8403773490478693, + "grad_norm": 1.053052306175232, + "learning_rate": 6.533276428671076e-07, + "loss": 0.3475, + "step": 41980 + }, + { + "epoch": 0.8403973675649976, + "grad_norm": 1.150197148323059, + "learning_rate": 6.531674329121817e-07, + "loss": 0.3079, + "step": 41981 + }, + { + "epoch": 0.840417386082126, + "grad_norm": 1.132380723953247, + "learning_rate": 6.530072412304051e-07, + "loss": 0.3164, + "step": 41982 + }, + { + "epoch": 0.8404374045992543, + "grad_norm": 1.119093894958496, + "learning_rate": 6.528470678224491e-07, + "loss": 0.3089, + "step": 41983 + }, + { + "epoch": 0.8404574231163826, + "grad_norm": 1.0516701936721802, + "learning_rate": 6.526869126889895e-07, + "loss": 0.282, + "step": 41984 + }, + { + "epoch": 0.840477441633511, + "grad_norm": 1.280738115310669, + "learning_rate": 6.525267758306986e-07, + "loss": 0.2766, + "step": 41985 + }, + { + "epoch": 0.8404974601506393, + "grad_norm": 1.0933486223220825, + "learning_rate": 6.523666572482495e-07, + "loss": 0.2937, + "step": 41986 + }, + { + "epoch": 0.8405174786677677, + "grad_norm": 0.9937964081764221, + "learning_rate": 6.522065569423158e-07, + "loss": 0.2974, + "step": 41987 + }, + { + "epoch": 0.840537497184896, + "grad_norm": 1.0692986249923706, + "learning_rate": 6.520464749135685e-07, + "loss": 0.2712, + "step": 41988 + }, + { + "epoch": 0.8405575157020244, + "grad_norm": 1.0192255973815918, + "learning_rate": 6.518864111626838e-07, + "loss": 0.2603, + "step": 41989 + }, + { + "epoch": 0.8405775342191527, + "grad_norm": 1.1072428226470947, + "learning_rate": 6.517263656903322e-07, + "loss": 0.2813, + "step": 41990 + }, + { + "epoch": 0.840597552736281, + "grad_norm": 1.0899068117141724, + "learning_rate": 6.515663384971865e-07, + "loss": 0.2749, + "step": 41991 + }, + { + "epoch": 0.8406175712534094, + "grad_norm": 1.076515793800354, + "learning_rate": 6.514063295839202e-07, + "loss": 0.289, + "step": 41992 + }, + { + "epoch": 0.8406375897705377, + "grad_norm": 1.1494609117507935, + "learning_rate": 6.512463389512064e-07, + "loss": 0.331, + "step": 41993 + }, + { + "epoch": 0.8406576082876661, + "grad_norm": 1.9188770055770874, + "learning_rate": 6.510863665997174e-07, + "loss": 0.7248, + "step": 41994 + }, + { + "epoch": 0.8406776268047944, + "grad_norm": 2.056711435317993, + "learning_rate": 6.509264125301257e-07, + "loss": 0.6933, + "step": 41995 + }, + { + "epoch": 0.8406976453219228, + "grad_norm": 1.1132012605667114, + "learning_rate": 6.507664767431032e-07, + "loss": 0.2803, + "step": 41996 + }, + { + "epoch": 0.8407176638390511, + "grad_norm": 1.081449270248413, + "learning_rate": 6.506065592393218e-07, + "loss": 0.2788, + "step": 41997 + }, + { + "epoch": 0.8407376823561795, + "grad_norm": 2.0841636657714844, + "learning_rate": 6.504466600194548e-07, + "loss": 0.7721, + "step": 41998 + }, + { + "epoch": 0.8407577008733078, + "grad_norm": 1.0195014476776123, + "learning_rate": 6.502867790841744e-07, + "loss": 0.2653, + "step": 41999 + }, + { + "epoch": 0.8407777193904361, + "grad_norm": 1.103948712348938, + "learning_rate": 6.501269164341523e-07, + "loss": 0.2915, + "step": 42000 + }, + { + "epoch": 0.8407977379075645, + "grad_norm": 1.1327232122421265, + "learning_rate": 6.499670720700596e-07, + "loss": 0.3061, + "step": 42001 + }, + { + "epoch": 0.8408177564246928, + "grad_norm": 1.0532509088516235, + "learning_rate": 6.498072459925703e-07, + "loss": 0.3013, + "step": 42002 + }, + { + "epoch": 0.8408377749418212, + "grad_norm": 1.0542728900909424, + "learning_rate": 6.496474382023549e-07, + "loss": 0.2822, + "step": 42003 + }, + { + "epoch": 0.8408577934589495, + "grad_norm": 1.1139506101608276, + "learning_rate": 6.494876487000846e-07, + "loss": 0.2987, + "step": 42004 + }, + { + "epoch": 0.8408778119760779, + "grad_norm": 1.0859203338623047, + "learning_rate": 6.493278774864314e-07, + "loss": 0.2734, + "step": 42005 + }, + { + "epoch": 0.8408978304932062, + "grad_norm": 1.1239898204803467, + "learning_rate": 6.49168124562069e-07, + "loss": 0.2618, + "step": 42006 + }, + { + "epoch": 0.8409178490103345, + "grad_norm": 0.992682158946991, + "learning_rate": 6.490083899276678e-07, + "loss": 0.3105, + "step": 42007 + }, + { + "epoch": 0.8409378675274629, + "grad_norm": 1.1589113473892212, + "learning_rate": 6.488486735838989e-07, + "loss": 0.3204, + "step": 42008 + }, + { + "epoch": 0.8409578860445912, + "grad_norm": 1.7067930698394775, + "learning_rate": 6.486889755314335e-07, + "loss": 0.7501, + "step": 42009 + }, + { + "epoch": 0.8409779045617196, + "grad_norm": 1.03469979763031, + "learning_rate": 6.485292957709422e-07, + "loss": 0.3207, + "step": 42010 + }, + { + "epoch": 0.8409979230788479, + "grad_norm": 1.9394922256469727, + "learning_rate": 6.483696343030987e-07, + "loss": 0.754, + "step": 42011 + }, + { + "epoch": 0.8410179415959763, + "grad_norm": 1.0794868469238281, + "learning_rate": 6.482099911285722e-07, + "loss": 0.2444, + "step": 42012 + }, + { + "epoch": 0.8410379601131046, + "grad_norm": 1.1830334663391113, + "learning_rate": 6.480503662480348e-07, + "loss": 0.3138, + "step": 42013 + }, + { + "epoch": 0.841057978630233, + "grad_norm": 1.1662975549697876, + "learning_rate": 6.478907596621558e-07, + "loss": 0.2931, + "step": 42014 + }, + { + "epoch": 0.8410779971473613, + "grad_norm": 1.1791083812713623, + "learning_rate": 6.477311713716089e-07, + "loss": 0.3211, + "step": 42015 + }, + { + "epoch": 0.8410980156644896, + "grad_norm": 1.171356439590454, + "learning_rate": 6.475716013770633e-07, + "loss": 0.2903, + "step": 42016 + }, + { + "epoch": 0.841118034181618, + "grad_norm": 1.115302324295044, + "learning_rate": 6.474120496791892e-07, + "loss": 0.2938, + "step": 42017 + }, + { + "epoch": 0.8411380526987463, + "grad_norm": 1.1159236431121826, + "learning_rate": 6.472525162786586e-07, + "loss": 0.2986, + "step": 42018 + }, + { + "epoch": 0.8411580712158747, + "grad_norm": 1.9436582326889038, + "learning_rate": 6.470930011761412e-07, + "loss": 0.6933, + "step": 42019 + }, + { + "epoch": 0.841178089733003, + "grad_norm": 1.8516745567321777, + "learning_rate": 6.469335043723091e-07, + "loss": 0.7328, + "step": 42020 + }, + { + "epoch": 0.8411981082501314, + "grad_norm": 1.2098857164382935, + "learning_rate": 6.467740258678317e-07, + "loss": 0.2964, + "step": 42021 + }, + { + "epoch": 0.8412181267672597, + "grad_norm": 1.0814452171325684, + "learning_rate": 6.466145656633794e-07, + "loss": 0.2877, + "step": 42022 + }, + { + "epoch": 0.841238145284388, + "grad_norm": 1.1349139213562012, + "learning_rate": 6.464551237596212e-07, + "loss": 0.2734, + "step": 42023 + }, + { + "epoch": 0.8412581638015164, + "grad_norm": 1.0535513162612915, + "learning_rate": 6.462957001572301e-07, + "loss": 0.2808, + "step": 42024 + }, + { + "epoch": 0.8412781823186447, + "grad_norm": 1.983544945716858, + "learning_rate": 6.461362948568756e-07, + "loss": 0.7419, + "step": 42025 + }, + { + "epoch": 0.8412982008357731, + "grad_norm": 1.0506476163864136, + "learning_rate": 6.459769078592265e-07, + "loss": 0.3114, + "step": 42026 + }, + { + "epoch": 0.8413182193529014, + "grad_norm": 1.2341517210006714, + "learning_rate": 6.458175391649535e-07, + "loss": 0.3733, + "step": 42027 + }, + { + "epoch": 0.8413382378700298, + "grad_norm": 1.0636887550354004, + "learning_rate": 6.456581887747254e-07, + "loss": 0.2835, + "step": 42028 + }, + { + "epoch": 0.8413582563871581, + "grad_norm": 1.2353096008300781, + "learning_rate": 6.454988566892145e-07, + "loss": 0.3001, + "step": 42029 + }, + { + "epoch": 0.8413782749042865, + "grad_norm": 1.2016597986221313, + "learning_rate": 6.45339542909088e-07, + "loss": 0.2915, + "step": 42030 + }, + { + "epoch": 0.8413982934214148, + "grad_norm": 1.3818873167037964, + "learning_rate": 6.451802474350182e-07, + "loss": 0.2685, + "step": 42031 + }, + { + "epoch": 0.8414183119385431, + "grad_norm": 2.0865402221679688, + "learning_rate": 6.450209702676724e-07, + "loss": 0.7706, + "step": 42032 + }, + { + "epoch": 0.8414383304556715, + "grad_norm": 1.0426547527313232, + "learning_rate": 6.448617114077221e-07, + "loss": 0.274, + "step": 42033 + }, + { + "epoch": 0.8414583489727998, + "grad_norm": 1.1270018815994263, + "learning_rate": 6.447024708558364e-07, + "loss": 0.3491, + "step": 42034 + }, + { + "epoch": 0.8414783674899282, + "grad_norm": 1.963953971862793, + "learning_rate": 6.445432486126835e-07, + "loss": 0.7705, + "step": 42035 + }, + { + "epoch": 0.8414983860070565, + "grad_norm": 1.0177046060562134, + "learning_rate": 6.443840446789329e-07, + "loss": 0.2378, + "step": 42036 + }, + { + "epoch": 0.8415184045241849, + "grad_norm": 1.0244953632354736, + "learning_rate": 6.442248590552557e-07, + "loss": 0.2671, + "step": 42037 + }, + { + "epoch": 0.8415384230413132, + "grad_norm": 1.8598506450653076, + "learning_rate": 6.440656917423194e-07, + "loss": 0.8007, + "step": 42038 + }, + { + "epoch": 0.8415584415584415, + "grad_norm": 1.0640037059783936, + "learning_rate": 6.439065427407936e-07, + "loss": 0.2861, + "step": 42039 + }, + { + "epoch": 0.8415784600755699, + "grad_norm": 1.056937336921692, + "learning_rate": 6.437474120513476e-07, + "loss": 0.3001, + "step": 42040 + }, + { + "epoch": 0.8415984785926982, + "grad_norm": 1.0294348001480103, + "learning_rate": 6.435882996746484e-07, + "loss": 0.2602, + "step": 42041 + }, + { + "epoch": 0.8416184971098266, + "grad_norm": 1.1587862968444824, + "learning_rate": 6.434292056113677e-07, + "loss": 0.2854, + "step": 42042 + }, + { + "epoch": 0.8416385156269549, + "grad_norm": 1.1887086629867554, + "learning_rate": 6.432701298621724e-07, + "loss": 0.2701, + "step": 42043 + }, + { + "epoch": 0.8416585341440833, + "grad_norm": 1.0820358991622925, + "learning_rate": 6.431110724277329e-07, + "loss": 0.3063, + "step": 42044 + }, + { + "epoch": 0.8416785526612116, + "grad_norm": 1.0138623714447021, + "learning_rate": 6.429520333087152e-07, + "loss": 0.3204, + "step": 42045 + }, + { + "epoch": 0.84169857117834, + "grad_norm": 1.1458666324615479, + "learning_rate": 6.427930125057913e-07, + "loss": 0.2806, + "step": 42046 + }, + { + "epoch": 0.8417185896954683, + "grad_norm": 1.7935023307800293, + "learning_rate": 6.426340100196271e-07, + "loss": 0.6503, + "step": 42047 + }, + { + "epoch": 0.8417386082125966, + "grad_norm": 1.1726036071777344, + "learning_rate": 6.424750258508927e-07, + "loss": 0.3225, + "step": 42048 + }, + { + "epoch": 0.841758626729725, + "grad_norm": 1.246352195739746, + "learning_rate": 6.423160600002548e-07, + "loss": 0.3056, + "step": 42049 + }, + { + "epoch": 0.8417786452468533, + "grad_norm": 1.1307406425476074, + "learning_rate": 6.421571124683817e-07, + "loss": 0.3045, + "step": 42050 + }, + { + "epoch": 0.8417986637639817, + "grad_norm": 1.1623005867004395, + "learning_rate": 6.41998183255943e-07, + "loss": 0.2973, + "step": 42051 + }, + { + "epoch": 0.84181868228111, + "grad_norm": 1.0974276065826416, + "learning_rate": 6.418392723636064e-07, + "loss": 0.311, + "step": 42052 + }, + { + "epoch": 0.8418387007982384, + "grad_norm": 1.1210952997207642, + "learning_rate": 6.416803797920396e-07, + "loss": 0.333, + "step": 42053 + }, + { + "epoch": 0.8418587193153667, + "grad_norm": 1.139639973640442, + "learning_rate": 6.415215055419094e-07, + "loss": 0.3158, + "step": 42054 + }, + { + "epoch": 0.841878737832495, + "grad_norm": 1.11481773853302, + "learning_rate": 6.413626496138858e-07, + "loss": 0.2564, + "step": 42055 + }, + { + "epoch": 0.8418987563496234, + "grad_norm": 1.1184159517288208, + "learning_rate": 6.41203812008635e-07, + "loss": 0.3259, + "step": 42056 + }, + { + "epoch": 0.8419187748667517, + "grad_norm": 1.1134053468704224, + "learning_rate": 6.410449927268264e-07, + "loss": 0.3207, + "step": 42057 + }, + { + "epoch": 0.8419387933838801, + "grad_norm": 1.1679861545562744, + "learning_rate": 6.40886191769125e-07, + "loss": 0.3343, + "step": 42058 + }, + { + "epoch": 0.8419588119010084, + "grad_norm": 1.1383379697799683, + "learning_rate": 6.407274091362015e-07, + "loss": 0.3367, + "step": 42059 + }, + { + "epoch": 0.8419788304181368, + "grad_norm": 1.8616554737091064, + "learning_rate": 6.405686448287213e-07, + "loss": 0.77, + "step": 42060 + }, + { + "epoch": 0.8419988489352651, + "grad_norm": 0.9946395754814148, + "learning_rate": 6.404098988473529e-07, + "loss": 0.2475, + "step": 42061 + }, + { + "epoch": 0.8420188674523935, + "grad_norm": 1.0665730237960815, + "learning_rate": 6.402511711927628e-07, + "loss": 0.2858, + "step": 42062 + }, + { + "epoch": 0.8420388859695218, + "grad_norm": 1.1041673421859741, + "learning_rate": 6.400924618656174e-07, + "loss": 0.2658, + "step": 42063 + }, + { + "epoch": 0.8420589044866501, + "grad_norm": 1.2034701108932495, + "learning_rate": 6.399337708665865e-07, + "loss": 0.3321, + "step": 42064 + }, + { + "epoch": 0.8420789230037785, + "grad_norm": 1.2603628635406494, + "learning_rate": 6.397750981963352e-07, + "loss": 0.3096, + "step": 42065 + }, + { + "epoch": 0.8420989415209068, + "grad_norm": 1.2455414533615112, + "learning_rate": 6.396164438555314e-07, + "loss": 0.3124, + "step": 42066 + }, + { + "epoch": 0.8421189600380352, + "grad_norm": 1.1925522089004517, + "learning_rate": 6.394578078448411e-07, + "loss": 0.2967, + "step": 42067 + }, + { + "epoch": 0.8421389785551635, + "grad_norm": 1.2537380456924438, + "learning_rate": 6.39299190164932e-07, + "loss": 0.3021, + "step": 42068 + }, + { + "epoch": 0.8421589970722919, + "grad_norm": 1.0936475992202759, + "learning_rate": 6.3914059081647e-07, + "loss": 0.2988, + "step": 42069 + }, + { + "epoch": 0.8421790155894202, + "grad_norm": 2.197362184524536, + "learning_rate": 6.389820098001231e-07, + "loss": 0.7836, + "step": 42070 + }, + { + "epoch": 0.8421990341065485, + "grad_norm": 1.101701259613037, + "learning_rate": 6.388234471165577e-07, + "loss": 0.3264, + "step": 42071 + }, + { + "epoch": 0.8422190526236769, + "grad_norm": 1.081274390220642, + "learning_rate": 6.386649027664393e-07, + "loss": 0.2717, + "step": 42072 + }, + { + "epoch": 0.8422390711408052, + "grad_norm": 1.3132762908935547, + "learning_rate": 6.385063767504357e-07, + "loss": 0.3, + "step": 42073 + }, + { + "epoch": 0.8422590896579336, + "grad_norm": 1.852654218673706, + "learning_rate": 6.38347869069213e-07, + "loss": 0.7195, + "step": 42074 + }, + { + "epoch": 0.8422791081750619, + "grad_norm": 1.1012253761291504, + "learning_rate": 6.381893797234368e-07, + "loss": 0.3149, + "step": 42075 + }, + { + "epoch": 0.8422991266921903, + "grad_norm": 1.0015451908111572, + "learning_rate": 6.380309087137731e-07, + "loss": 0.2816, + "step": 42076 + }, + { + "epoch": 0.8423191452093186, + "grad_norm": 1.1391373872756958, + "learning_rate": 6.378724560408895e-07, + "loss": 0.3076, + "step": 42077 + }, + { + "epoch": 0.842339163726447, + "grad_norm": 1.0216373205184937, + "learning_rate": 6.377140217054517e-07, + "loss": 0.2771, + "step": 42078 + }, + { + "epoch": 0.8423591822435753, + "grad_norm": 1.0177620649337769, + "learning_rate": 6.37555605708125e-07, + "loss": 0.2912, + "step": 42079 + }, + { + "epoch": 0.8423792007607036, + "grad_norm": 2.0358922481536865, + "learning_rate": 6.373972080495749e-07, + "loss": 0.7898, + "step": 42080 + }, + { + "epoch": 0.842399219277832, + "grad_norm": 1.180909276008606, + "learning_rate": 6.372388287304688e-07, + "loss": 0.2923, + "step": 42081 + }, + { + "epoch": 0.8424192377949603, + "grad_norm": 1.1729905605316162, + "learning_rate": 6.370804677514708e-07, + "loss": 0.3134, + "step": 42082 + }, + { + "epoch": 0.8424392563120887, + "grad_norm": 1.0629600286483765, + "learning_rate": 6.36922125113249e-07, + "loss": 0.2647, + "step": 42083 + }, + { + "epoch": 0.842459274829217, + "grad_norm": 1.1393638849258423, + "learning_rate": 6.367638008164678e-07, + "loss": 0.2856, + "step": 42084 + }, + { + "epoch": 0.8424792933463454, + "grad_norm": 0.9915076494216919, + "learning_rate": 6.366054948617911e-07, + "loss": 0.2443, + "step": 42085 + }, + { + "epoch": 0.8424993118634737, + "grad_norm": 1.8883029222488403, + "learning_rate": 6.364472072498873e-07, + "loss": 0.6455, + "step": 42086 + }, + { + "epoch": 0.842519330380602, + "grad_norm": 1.0769375562667847, + "learning_rate": 6.362889379814202e-07, + "loss": 0.2768, + "step": 42087 + }, + { + "epoch": 0.8425393488977304, + "grad_norm": 2.0847926139831543, + "learning_rate": 6.361306870570549e-07, + "loss": 0.7216, + "step": 42088 + }, + { + "epoch": 0.8425593674148587, + "grad_norm": 1.0848166942596436, + "learning_rate": 6.359724544774564e-07, + "loss": 0.2876, + "step": 42089 + }, + { + "epoch": 0.8425793859319871, + "grad_norm": 1.0598822832107544, + "learning_rate": 6.358142402432921e-07, + "loss": 0.2911, + "step": 42090 + }, + { + "epoch": 0.8425994044491154, + "grad_norm": 1.7934107780456543, + "learning_rate": 6.356560443552251e-07, + "loss": 0.78, + "step": 42091 + }, + { + "epoch": 0.8426194229662438, + "grad_norm": 1.0986287593841553, + "learning_rate": 6.354978668139206e-07, + "loss": 0.2855, + "step": 42092 + }, + { + "epoch": 0.8426394414833721, + "grad_norm": 0.9904391765594482, + "learning_rate": 6.35339707620043e-07, + "loss": 0.2594, + "step": 42093 + }, + { + "epoch": 0.8426594600005005, + "grad_norm": 0.9962363839149475, + "learning_rate": 6.351815667742589e-07, + "loss": 0.3025, + "step": 42094 + }, + { + "epoch": 0.8426794785176288, + "grad_norm": 1.1569082736968994, + "learning_rate": 6.350234442772313e-07, + "loss": 0.2859, + "step": 42095 + }, + { + "epoch": 0.8426994970347571, + "grad_norm": 1.2071304321289062, + "learning_rate": 6.348653401296273e-07, + "loss": 0.2851, + "step": 42096 + }, + { + "epoch": 0.8427195155518855, + "grad_norm": 1.0649369955062866, + "learning_rate": 6.347072543321092e-07, + "loss": 0.2772, + "step": 42097 + }, + { + "epoch": 0.8427395340690138, + "grad_norm": 1.0765936374664307, + "learning_rate": 6.345491868853415e-07, + "loss": 0.283, + "step": 42098 + }, + { + "epoch": 0.8427595525861422, + "grad_norm": 1.7865712642669678, + "learning_rate": 6.343911377899908e-07, + "loss": 0.7234, + "step": 42099 + }, + { + "epoch": 0.8427795711032705, + "grad_norm": 1.772657871246338, + "learning_rate": 6.342331070467206e-07, + "loss": 0.7283, + "step": 42100 + }, + { + "epoch": 0.8427995896203989, + "grad_norm": 1.1776247024536133, + "learning_rate": 6.340750946561947e-07, + "loss": 0.2911, + "step": 42101 + }, + { + "epoch": 0.8428196081375272, + "grad_norm": 1.1427855491638184, + "learning_rate": 6.33917100619077e-07, + "loss": 0.3264, + "step": 42102 + }, + { + "epoch": 0.8428396266546555, + "grad_norm": 1.169378399848938, + "learning_rate": 6.33759124936032e-07, + "loss": 0.2988, + "step": 42103 + }, + { + "epoch": 0.8428596451717839, + "grad_norm": 1.1162511110305786, + "learning_rate": 6.336011676077242e-07, + "loss": 0.2683, + "step": 42104 + }, + { + "epoch": 0.8428796636889122, + "grad_norm": 1.3393871784210205, + "learning_rate": 6.33443228634818e-07, + "loss": 0.3417, + "step": 42105 + }, + { + "epoch": 0.8428996822060406, + "grad_norm": 1.0431658029556274, + "learning_rate": 6.332853080179757e-07, + "loss": 0.2763, + "step": 42106 + }, + { + "epoch": 0.8429197007231689, + "grad_norm": 0.9798051714897156, + "learning_rate": 6.331274057578629e-07, + "loss": 0.2824, + "step": 42107 + }, + { + "epoch": 0.8429397192402973, + "grad_norm": 1.1502447128295898, + "learning_rate": 6.329695218551423e-07, + "loss": 0.2826, + "step": 42108 + }, + { + "epoch": 0.8429597377574256, + "grad_norm": 1.0991661548614502, + "learning_rate": 6.328116563104786e-07, + "loss": 0.3173, + "step": 42109 + }, + { + "epoch": 0.842979756274554, + "grad_norm": 1.3744958639144897, + "learning_rate": 6.326538091245349e-07, + "loss": 0.2628, + "step": 42110 + }, + { + "epoch": 0.8429997747916823, + "grad_norm": 1.090827226638794, + "learning_rate": 6.324959802979747e-07, + "loss": 0.2562, + "step": 42111 + }, + { + "epoch": 0.8430197933088106, + "grad_norm": 1.1685529947280884, + "learning_rate": 6.323381698314607e-07, + "loss": 0.3289, + "step": 42112 + }, + { + "epoch": 0.843039811825939, + "grad_norm": 1.1749250888824463, + "learning_rate": 6.32180377725658e-07, + "loss": 0.308, + "step": 42113 + }, + { + "epoch": 0.8430598303430673, + "grad_norm": 1.1639938354492188, + "learning_rate": 6.320226039812294e-07, + "loss": 0.2768, + "step": 42114 + }, + { + "epoch": 0.8430798488601957, + "grad_norm": 1.143972635269165, + "learning_rate": 6.318648485988372e-07, + "loss": 0.3542, + "step": 42115 + }, + { + "epoch": 0.843099867377324, + "grad_norm": 1.0970760583877563, + "learning_rate": 6.317071115791446e-07, + "loss": 0.2707, + "step": 42116 + }, + { + "epoch": 0.8431198858944524, + "grad_norm": 1.1692782640457153, + "learning_rate": 6.315493929228155e-07, + "loss": 0.2675, + "step": 42117 + }, + { + "epoch": 0.8431399044115807, + "grad_norm": 1.06952702999115, + "learning_rate": 6.313916926305136e-07, + "loss": 0.2681, + "step": 42118 + }, + { + "epoch": 0.843159922928709, + "grad_norm": 1.108956217765808, + "learning_rate": 6.312340107028992e-07, + "loss": 0.2467, + "step": 42119 + }, + { + "epoch": 0.8431799414458374, + "grad_norm": 1.1698460578918457, + "learning_rate": 6.310763471406383e-07, + "loss": 0.2779, + "step": 42120 + }, + { + "epoch": 0.8431999599629657, + "grad_norm": 1.16512930393219, + "learning_rate": 6.309187019443907e-07, + "loss": 0.2742, + "step": 42121 + }, + { + "epoch": 0.8432199784800941, + "grad_norm": 2.011115789413452, + "learning_rate": 6.307610751148219e-07, + "loss": 0.7405, + "step": 42122 + }, + { + "epoch": 0.8432399969972224, + "grad_norm": 1.9360053539276123, + "learning_rate": 6.306034666525934e-07, + "loss": 0.7792, + "step": 42123 + }, + { + "epoch": 0.8432600155143508, + "grad_norm": 1.1365671157836914, + "learning_rate": 6.304458765583676e-07, + "loss": 0.339, + "step": 42124 + }, + { + "epoch": 0.8432800340314791, + "grad_norm": 1.123223066329956, + "learning_rate": 6.302883048328057e-07, + "loss": 0.2866, + "step": 42125 + }, + { + "epoch": 0.8433000525486075, + "grad_norm": 1.1403958797454834, + "learning_rate": 6.301307514765731e-07, + "loss": 0.2896, + "step": 42126 + }, + { + "epoch": 0.8433200710657358, + "grad_norm": 1.0445377826690674, + "learning_rate": 6.299732164903299e-07, + "loss": 0.3051, + "step": 42127 + }, + { + "epoch": 0.8433400895828641, + "grad_norm": 1.130692720413208, + "learning_rate": 6.29815699874739e-07, + "loss": 0.2703, + "step": 42128 + }, + { + "epoch": 0.8433601080999925, + "grad_norm": 1.0631667375564575, + "learning_rate": 6.296582016304609e-07, + "loss": 0.29, + "step": 42129 + }, + { + "epoch": 0.8433801266171208, + "grad_norm": 1.164420485496521, + "learning_rate": 6.295007217581611e-07, + "loss": 0.2973, + "step": 42130 + }, + { + "epoch": 0.8434001451342492, + "grad_norm": 1.090103030204773, + "learning_rate": 6.293432602584993e-07, + "loss": 0.2973, + "step": 42131 + }, + { + "epoch": 0.8434201636513775, + "grad_norm": 1.0155761241912842, + "learning_rate": 6.291858171321364e-07, + "loss": 0.2658, + "step": 42132 + }, + { + "epoch": 0.8434401821685059, + "grad_norm": 1.0629653930664062, + "learning_rate": 6.290283923797375e-07, + "loss": 0.2778, + "step": 42133 + }, + { + "epoch": 0.8434602006856342, + "grad_norm": 1.11709725856781, + "learning_rate": 6.288709860019609e-07, + "loss": 0.3517, + "step": 42134 + }, + { + "epoch": 0.8434802192027625, + "grad_norm": 0.9935476183891296, + "learning_rate": 6.287135979994713e-07, + "loss": 0.2877, + "step": 42135 + }, + { + "epoch": 0.8435002377198909, + "grad_norm": 1.1157817840576172, + "learning_rate": 6.285562283729291e-07, + "loss": 0.3071, + "step": 42136 + }, + { + "epoch": 0.8435202562370192, + "grad_norm": 1.8847367763519287, + "learning_rate": 6.28398877122996e-07, + "loss": 0.7048, + "step": 42137 + }, + { + "epoch": 0.8435402747541476, + "grad_norm": 1.1162132024765015, + "learning_rate": 6.28241544250332e-07, + "loss": 0.3015, + "step": 42138 + }, + { + "epoch": 0.8435602932712759, + "grad_norm": 1.154740333557129, + "learning_rate": 6.280842297556005e-07, + "loss": 0.3214, + "step": 42139 + }, + { + "epoch": 0.8435803117884043, + "grad_norm": 1.0237624645233154, + "learning_rate": 6.279269336394623e-07, + "loss": 0.2629, + "step": 42140 + }, + { + "epoch": 0.8436003303055326, + "grad_norm": 1.1793311834335327, + "learning_rate": 6.277696559025787e-07, + "loss": 0.3022, + "step": 42141 + }, + { + "epoch": 0.843620348822661, + "grad_norm": 1.36082923412323, + "learning_rate": 6.276123965456099e-07, + "loss": 0.2892, + "step": 42142 + }, + { + "epoch": 0.8436403673397893, + "grad_norm": 1.063740611076355, + "learning_rate": 6.274551555692171e-07, + "loss": 0.3525, + "step": 42143 + }, + { + "epoch": 0.8436603858569176, + "grad_norm": 1.8443739414215088, + "learning_rate": 6.272979329740625e-07, + "loss": 0.7319, + "step": 42144 + }, + { + "epoch": 0.843680404374046, + "grad_norm": 1.1182894706726074, + "learning_rate": 6.271407287608056e-07, + "loss": 0.2533, + "step": 42145 + }, + { + "epoch": 0.8437004228911743, + "grad_norm": 1.2404890060424805, + "learning_rate": 6.269835429301091e-07, + "loss": 0.3058, + "step": 42146 + }, + { + "epoch": 0.8437204414083027, + "grad_norm": 1.9134818315505981, + "learning_rate": 6.268263754826315e-07, + "loss": 0.7634, + "step": 42147 + }, + { + "epoch": 0.843740459925431, + "grad_norm": 1.0918300151824951, + "learning_rate": 6.266692264190355e-07, + "loss": 0.3216, + "step": 42148 + }, + { + "epoch": 0.8437604784425594, + "grad_norm": 2.0034427642822266, + "learning_rate": 6.265120957399812e-07, + "loss": 0.7195, + "step": 42149 + }, + { + "epoch": 0.8437804969596877, + "grad_norm": 0.9952355623245239, + "learning_rate": 6.263549834461291e-07, + "loss": 0.2698, + "step": 42150 + }, + { + "epoch": 0.843800515476816, + "grad_norm": 1.8958740234375, + "learning_rate": 6.261978895381382e-07, + "loss": 0.7914, + "step": 42151 + }, + { + "epoch": 0.8438205339939444, + "grad_norm": 1.8309968709945679, + "learning_rate": 6.260408140166713e-07, + "loss": 0.7544, + "step": 42152 + }, + { + "epoch": 0.8438405525110727, + "grad_norm": 1.4048041105270386, + "learning_rate": 6.258837568823872e-07, + "loss": 0.2739, + "step": 42153 + }, + { + "epoch": 0.8438605710282011, + "grad_norm": 1.0664345026016235, + "learning_rate": 6.257267181359466e-07, + "loss": 0.2535, + "step": 42154 + }, + { + "epoch": 0.8438805895453294, + "grad_norm": 1.05260169506073, + "learning_rate": 6.255696977780096e-07, + "loss": 0.3273, + "step": 42155 + }, + { + "epoch": 0.8439006080624578, + "grad_norm": 1.0271100997924805, + "learning_rate": 6.254126958092349e-07, + "loss": 0.2593, + "step": 42156 + }, + { + "epoch": 0.8439206265795861, + "grad_norm": 1.0091642141342163, + "learning_rate": 6.252557122302849e-07, + "loss": 0.2758, + "step": 42157 + }, + { + "epoch": 0.8439406450967145, + "grad_norm": 1.0451072454452515, + "learning_rate": 6.25098747041818e-07, + "loss": 0.3151, + "step": 42158 + }, + { + "epoch": 0.8439606636138428, + "grad_norm": 1.1787910461425781, + "learning_rate": 6.249418002444952e-07, + "loss": 0.2698, + "step": 42159 + }, + { + "epoch": 0.8439806821309711, + "grad_norm": 1.0516499280929565, + "learning_rate": 6.247848718389743e-07, + "loss": 0.279, + "step": 42160 + }, + { + "epoch": 0.8440007006480995, + "grad_norm": 1.1091222763061523, + "learning_rate": 6.246279618259171e-07, + "loss": 0.282, + "step": 42161 + }, + { + "epoch": 0.8440207191652278, + "grad_norm": 1.129343032836914, + "learning_rate": 6.244710702059825e-07, + "loss": 0.3039, + "step": 42162 + }, + { + "epoch": 0.8440407376823562, + "grad_norm": 1.8996587991714478, + "learning_rate": 6.2431419697983e-07, + "loss": 0.7173, + "step": 42163 + }, + { + "epoch": 0.8440607561994845, + "grad_norm": 1.1711293458938599, + "learning_rate": 6.24157342148119e-07, + "loss": 0.2917, + "step": 42164 + }, + { + "epoch": 0.8440807747166129, + "grad_norm": 1.1049588918685913, + "learning_rate": 6.240005057115078e-07, + "loss": 0.3023, + "step": 42165 + }, + { + "epoch": 0.8441007932337412, + "grad_norm": 1.0989174842834473, + "learning_rate": 6.238436876706578e-07, + "loss": 0.3029, + "step": 42166 + }, + { + "epoch": 0.8441208117508695, + "grad_norm": 1.1821889877319336, + "learning_rate": 6.23686888026227e-07, + "loss": 0.2822, + "step": 42167 + }, + { + "epoch": 0.8441408302679979, + "grad_norm": 1.781825065612793, + "learning_rate": 6.235301067788752e-07, + "loss": 0.7739, + "step": 42168 + }, + { + "epoch": 0.8441608487851262, + "grad_norm": 1.030977487564087, + "learning_rate": 6.233733439292594e-07, + "loss": 0.2834, + "step": 42169 + }, + { + "epoch": 0.8441808673022546, + "grad_norm": 1.1722118854522705, + "learning_rate": 6.232165994780415e-07, + "loss": 0.3428, + "step": 42170 + }, + { + "epoch": 0.8442008858193829, + "grad_norm": 1.1032662391662598, + "learning_rate": 6.23059873425878e-07, + "loss": 0.2861, + "step": 42171 + }, + { + "epoch": 0.8442209043365113, + "grad_norm": 1.0147864818572998, + "learning_rate": 6.229031657734303e-07, + "loss": 0.2649, + "step": 42172 + }, + { + "epoch": 0.8442409228536396, + "grad_norm": 1.2657701969146729, + "learning_rate": 6.227464765213554e-07, + "loss": 0.3221, + "step": 42173 + }, + { + "epoch": 0.844260941370768, + "grad_norm": 1.2754943370819092, + "learning_rate": 6.225898056703111e-07, + "loss": 0.3529, + "step": 42174 + }, + { + "epoch": 0.8442809598878963, + "grad_norm": 1.0736606121063232, + "learning_rate": 6.224331532209582e-07, + "loss": 0.2506, + "step": 42175 + }, + { + "epoch": 0.8443009784050246, + "grad_norm": 1.004884123802185, + "learning_rate": 6.22276519173955e-07, + "loss": 0.2504, + "step": 42176 + }, + { + "epoch": 0.844320996922153, + "grad_norm": 1.8295719623565674, + "learning_rate": 6.221199035299586e-07, + "loss": 0.7875, + "step": 42177 + }, + { + "epoch": 0.8443410154392813, + "grad_norm": 1.221914291381836, + "learning_rate": 6.219633062896268e-07, + "loss": 0.281, + "step": 42178 + }, + { + "epoch": 0.8443610339564097, + "grad_norm": 2.046651601791382, + "learning_rate": 6.218067274536205e-07, + "loss": 0.8021, + "step": 42179 + }, + { + "epoch": 0.844381052473538, + "grad_norm": 0.9876418709754944, + "learning_rate": 6.216501670225966e-07, + "loss": 0.2666, + "step": 42180 + }, + { + "epoch": 0.8444010709906664, + "grad_norm": 1.2733681201934814, + "learning_rate": 6.214936249972126e-07, + "loss": 0.3068, + "step": 42181 + }, + { + "epoch": 0.8444210895077947, + "grad_norm": 1.1014490127563477, + "learning_rate": 6.213371013781266e-07, + "loss": 0.2732, + "step": 42182 + }, + { + "epoch": 0.844441108024923, + "grad_norm": 1.2055904865264893, + "learning_rate": 6.211805961659983e-07, + "loss": 0.2781, + "step": 42183 + }, + { + "epoch": 0.8444611265420514, + "grad_norm": 1.0482211112976074, + "learning_rate": 6.210241093614827e-07, + "loss": 0.2483, + "step": 42184 + }, + { + "epoch": 0.8444811450591797, + "grad_norm": 1.0718199014663696, + "learning_rate": 6.208676409652415e-07, + "loss": 0.3018, + "step": 42185 + }, + { + "epoch": 0.8445011635763081, + "grad_norm": 1.030397891998291, + "learning_rate": 6.207111909779296e-07, + "loss": 0.2824, + "step": 42186 + }, + { + "epoch": 0.8445211820934364, + "grad_norm": 1.3374816179275513, + "learning_rate": 6.205547594002043e-07, + "loss": 0.2737, + "step": 42187 + }, + { + "epoch": 0.8445412006105648, + "grad_norm": 1.0902760028839111, + "learning_rate": 6.203983462327256e-07, + "loss": 0.3022, + "step": 42188 + }, + { + "epoch": 0.8445612191276931, + "grad_norm": 1.1656692028045654, + "learning_rate": 6.202419514761499e-07, + "loss": 0.3366, + "step": 42189 + }, + { + "epoch": 0.8445812376448215, + "grad_norm": 1.2452268600463867, + "learning_rate": 6.200855751311341e-07, + "loss": 0.3343, + "step": 42190 + }, + { + "epoch": 0.8446012561619498, + "grad_norm": 1.0232062339782715, + "learning_rate": 6.199292171983351e-07, + "loss": 0.2665, + "step": 42191 + }, + { + "epoch": 0.8446212746790781, + "grad_norm": 1.0219789743423462, + "learning_rate": 6.197728776784124e-07, + "loss": 0.2987, + "step": 42192 + }, + { + "epoch": 0.8446412931962065, + "grad_norm": 1.1058636903762817, + "learning_rate": 6.196165565720213e-07, + "loss": 0.3018, + "step": 42193 + }, + { + "epoch": 0.8446613117133348, + "grad_norm": 1.104961633682251, + "learning_rate": 6.1946025387982e-07, + "loss": 0.291, + "step": 42194 + }, + { + "epoch": 0.8446813302304632, + "grad_norm": 1.0932880640029907, + "learning_rate": 6.193039696024639e-07, + "loss": 0.3124, + "step": 42195 + }, + { + "epoch": 0.8447013487475915, + "grad_norm": 1.0686025619506836, + "learning_rate": 6.191477037406124e-07, + "loss": 0.2729, + "step": 42196 + }, + { + "epoch": 0.8447213672647199, + "grad_norm": 1.1226786375045776, + "learning_rate": 6.189914562949196e-07, + "loss": 0.297, + "step": 42197 + }, + { + "epoch": 0.8447413857818482, + "grad_norm": 1.1131712198257446, + "learning_rate": 6.188352272660453e-07, + "loss": 0.3094, + "step": 42198 + }, + { + "epoch": 0.8447614042989765, + "grad_norm": 1.1927409172058105, + "learning_rate": 6.186790166546452e-07, + "loss": 0.2891, + "step": 42199 + }, + { + "epoch": 0.8447814228161049, + "grad_norm": 1.1224274635314941, + "learning_rate": 6.185228244613745e-07, + "loss": 0.3138, + "step": 42200 + }, + { + "epoch": 0.8448014413332332, + "grad_norm": 1.2018264532089233, + "learning_rate": 6.183666506868913e-07, + "loss": 0.349, + "step": 42201 + }, + { + "epoch": 0.8448214598503616, + "grad_norm": 1.1466290950775146, + "learning_rate": 6.182104953318524e-07, + "loss": 0.2843, + "step": 42202 + }, + { + "epoch": 0.8448414783674899, + "grad_norm": 1.1779791116714478, + "learning_rate": 6.180543583969134e-07, + "loss": 0.298, + "step": 42203 + }, + { + "epoch": 0.8448614968846183, + "grad_norm": 1.1025428771972656, + "learning_rate": 6.178982398827299e-07, + "loss": 0.3208, + "step": 42204 + }, + { + "epoch": 0.8448815154017466, + "grad_norm": 1.103843331336975, + "learning_rate": 6.177421397899602e-07, + "loss": 0.2871, + "step": 42205 + }, + { + "epoch": 0.844901533918875, + "grad_norm": 1.2509655952453613, + "learning_rate": 6.175860581192594e-07, + "loss": 0.2726, + "step": 42206 + }, + { + "epoch": 0.8449215524360033, + "grad_norm": 1.0940285921096802, + "learning_rate": 6.174299948712837e-07, + "loss": 0.3074, + "step": 42207 + }, + { + "epoch": 0.8449415709531316, + "grad_norm": 1.1074596643447876, + "learning_rate": 6.172739500466884e-07, + "loss": 0.295, + "step": 42208 + }, + { + "epoch": 0.84496158947026, + "grad_norm": 1.240576148033142, + "learning_rate": 6.171179236461311e-07, + "loss": 0.3201, + "step": 42209 + }, + { + "epoch": 0.8449816079873883, + "grad_norm": 1.114803433418274, + "learning_rate": 6.169619156702655e-07, + "loss": 0.2832, + "step": 42210 + }, + { + "epoch": 0.8450016265045167, + "grad_norm": 1.0993597507476807, + "learning_rate": 6.168059261197501e-07, + "loss": 0.3089, + "step": 42211 + }, + { + "epoch": 0.845021645021645, + "grad_norm": 1.1201084852218628, + "learning_rate": 6.166499549952393e-07, + "loss": 0.3597, + "step": 42212 + }, + { + "epoch": 0.8450416635387734, + "grad_norm": 1.8117313385009766, + "learning_rate": 6.164940022973881e-07, + "loss": 0.7741, + "step": 42213 + }, + { + "epoch": 0.8450616820559017, + "grad_norm": 1.974021553993225, + "learning_rate": 6.163380680268532e-07, + "loss": 0.7214, + "step": 42214 + }, + { + "epoch": 0.84508170057303, + "grad_norm": 1.1957333087921143, + "learning_rate": 6.161821521842898e-07, + "loss": 0.352, + "step": 42215 + }, + { + "epoch": 0.8451017190901584, + "grad_norm": 1.0218905210494995, + "learning_rate": 6.160262547703532e-07, + "loss": 0.2956, + "step": 42216 + }, + { + "epoch": 0.8451217376072867, + "grad_norm": 1.2198927402496338, + "learning_rate": 6.158703757856993e-07, + "loss": 0.2472, + "step": 42217 + }, + { + "epoch": 0.8451417561244151, + "grad_norm": 1.1720435619354248, + "learning_rate": 6.157145152309812e-07, + "loss": 0.3181, + "step": 42218 + }, + { + "epoch": 0.8451617746415434, + "grad_norm": 1.047529935836792, + "learning_rate": 6.155586731068574e-07, + "loss": 0.3499, + "step": 42219 + }, + { + "epoch": 0.8451817931586718, + "grad_norm": 1.1820071935653687, + "learning_rate": 6.154028494139807e-07, + "loss": 0.306, + "step": 42220 + }, + { + "epoch": 0.8452018116758001, + "grad_norm": 1.255934715270996, + "learning_rate": 6.152470441530062e-07, + "loss": 0.3348, + "step": 42221 + }, + { + "epoch": 0.8452218301929285, + "grad_norm": 1.1531109809875488, + "learning_rate": 6.15091257324591e-07, + "loss": 0.3148, + "step": 42222 + }, + { + "epoch": 0.8452418487100568, + "grad_norm": 1.131093978881836, + "learning_rate": 6.149354889293868e-07, + "loss": 0.3058, + "step": 42223 + }, + { + "epoch": 0.8452618672271851, + "grad_norm": 1.036325216293335, + "learning_rate": 6.147797389680515e-07, + "loss": 0.291, + "step": 42224 + }, + { + "epoch": 0.8452818857443135, + "grad_norm": 1.1779600381851196, + "learning_rate": 6.14624007441238e-07, + "loss": 0.2752, + "step": 42225 + }, + { + "epoch": 0.8453019042614418, + "grad_norm": 1.1866077184677124, + "learning_rate": 6.144682943496016e-07, + "loss": 0.3124, + "step": 42226 + }, + { + "epoch": 0.8453219227785702, + "grad_norm": 1.1985585689544678, + "learning_rate": 6.14312599693796e-07, + "loss": 0.2788, + "step": 42227 + }, + { + "epoch": 0.8453419412956985, + "grad_norm": 1.002379298210144, + "learning_rate": 6.141569234744777e-07, + "loss": 0.2776, + "step": 42228 + }, + { + "epoch": 0.8453619598128269, + "grad_norm": 1.1566669940948486, + "learning_rate": 6.140012656922994e-07, + "loss": 0.2744, + "step": 42229 + }, + { + "epoch": 0.8453819783299552, + "grad_norm": 1.279024600982666, + "learning_rate": 6.138456263479159e-07, + "loss": 0.2824, + "step": 42230 + }, + { + "epoch": 0.8454019968470835, + "grad_norm": 1.1225547790527344, + "learning_rate": 6.136900054419803e-07, + "loss": 0.3263, + "step": 42231 + }, + { + "epoch": 0.8454220153642119, + "grad_norm": 1.0998681783676147, + "learning_rate": 6.135344029751494e-07, + "loss": 0.2864, + "step": 42232 + }, + { + "epoch": 0.8454420338813402, + "grad_norm": 1.2019085884094238, + "learning_rate": 6.133788189480755e-07, + "loss": 0.3112, + "step": 42233 + }, + { + "epoch": 0.8454620523984686, + "grad_norm": 1.1045745611190796, + "learning_rate": 6.132232533614124e-07, + "loss": 0.2865, + "step": 42234 + }, + { + "epoch": 0.8454820709155969, + "grad_norm": 1.3180859088897705, + "learning_rate": 6.130677062158153e-07, + "loss": 0.3093, + "step": 42235 + }, + { + "epoch": 0.8455020894327253, + "grad_norm": 1.2080590724945068, + "learning_rate": 6.129121775119368e-07, + "loss": 0.3342, + "step": 42236 + }, + { + "epoch": 0.8455221079498536, + "grad_norm": 1.045031189918518, + "learning_rate": 6.12756667250432e-07, + "loss": 0.2722, + "step": 42237 + }, + { + "epoch": 0.845542126466982, + "grad_norm": 1.17351233959198, + "learning_rate": 6.126011754319544e-07, + "loss": 0.3281, + "step": 42238 + }, + { + "epoch": 0.8455621449841103, + "grad_norm": 0.9580721855163574, + "learning_rate": 6.124457020571573e-07, + "loss": 0.238, + "step": 42239 + }, + { + "epoch": 0.8455821635012386, + "grad_norm": 1.8902852535247803, + "learning_rate": 6.122902471266934e-07, + "loss": 0.7158, + "step": 42240 + }, + { + "epoch": 0.845602182018367, + "grad_norm": 0.9915464520454407, + "learning_rate": 6.121348106412178e-07, + "loss": 0.2652, + "step": 42241 + }, + { + "epoch": 0.8456222005354953, + "grad_norm": 1.7906534671783447, + "learning_rate": 6.119793926013828e-07, + "loss": 0.7513, + "step": 42242 + }, + { + "epoch": 0.8456422190526237, + "grad_norm": 1.0568876266479492, + "learning_rate": 6.118239930078429e-07, + "loss": 0.3019, + "step": 42243 + }, + { + "epoch": 0.845662237569752, + "grad_norm": 1.9145210981369019, + "learning_rate": 6.116686118612492e-07, + "loss": 0.6939, + "step": 42244 + }, + { + "epoch": 0.8456822560868804, + "grad_norm": 1.8206908702850342, + "learning_rate": 6.115132491622572e-07, + "loss": 0.7255, + "step": 42245 + }, + { + "epoch": 0.8457022746040087, + "grad_norm": 1.1172572374343872, + "learning_rate": 6.113579049115192e-07, + "loss": 0.3059, + "step": 42246 + }, + { + "epoch": 0.845722293121137, + "grad_norm": 1.16953706741333, + "learning_rate": 6.112025791096871e-07, + "loss": 0.3284, + "step": 42247 + }, + { + "epoch": 0.8457423116382654, + "grad_norm": 1.0970046520233154, + "learning_rate": 6.110472717574156e-07, + "loss": 0.2722, + "step": 42248 + }, + { + "epoch": 0.8457623301553937, + "grad_norm": 1.8059958219528198, + "learning_rate": 6.108919828553561e-07, + "loss": 0.7336, + "step": 42249 + }, + { + "epoch": 0.8457823486725221, + "grad_norm": 1.0578795671463013, + "learning_rate": 6.10736712404163e-07, + "loss": 0.2922, + "step": 42250 + }, + { + "epoch": 0.8458023671896504, + "grad_norm": 1.1760647296905518, + "learning_rate": 6.105814604044885e-07, + "loss": 0.2753, + "step": 42251 + }, + { + "epoch": 0.8458223857067788, + "grad_norm": 1.0870064496994019, + "learning_rate": 6.104262268569849e-07, + "loss": 0.2949, + "step": 42252 + }, + { + "epoch": 0.8458424042239071, + "grad_norm": 1.0627976655960083, + "learning_rate": 6.102710117623034e-07, + "loss": 0.2657, + "step": 42253 + }, + { + "epoch": 0.8458624227410354, + "grad_norm": 1.1637829542160034, + "learning_rate": 6.101158151210995e-07, + "loss": 0.3455, + "step": 42254 + }, + { + "epoch": 0.8458824412581638, + "grad_norm": 1.0187965631484985, + "learning_rate": 6.099606369340233e-07, + "loss": 0.3105, + "step": 42255 + }, + { + "epoch": 0.8459024597752921, + "grad_norm": 1.1014798879623413, + "learning_rate": 6.098054772017287e-07, + "loss": 0.327, + "step": 42256 + }, + { + "epoch": 0.8459224782924205, + "grad_norm": 1.3061797618865967, + "learning_rate": 6.096503359248662e-07, + "loss": 0.2788, + "step": 42257 + }, + { + "epoch": 0.8459424968095488, + "grad_norm": 1.9472700357437134, + "learning_rate": 6.094952131040882e-07, + "loss": 0.7657, + "step": 42258 + }, + { + "epoch": 0.8459625153266772, + "grad_norm": 1.4226040840148926, + "learning_rate": 6.093401087400486e-07, + "loss": 0.2646, + "step": 42259 + }, + { + "epoch": 0.8459825338438055, + "grad_norm": 1.1442835330963135, + "learning_rate": 6.091850228333973e-07, + "loss": 0.2921, + "step": 42260 + }, + { + "epoch": 0.8460025523609339, + "grad_norm": 1.1217035055160522, + "learning_rate": 6.090299553847884e-07, + "loss": 0.3134, + "step": 42261 + }, + { + "epoch": 0.8460225708780622, + "grad_norm": 1.0499579906463623, + "learning_rate": 6.088749063948712e-07, + "loss": 0.2727, + "step": 42262 + }, + { + "epoch": 0.8460425893951905, + "grad_norm": 0.9851587414741516, + "learning_rate": 6.087198758643003e-07, + "loss": 0.2873, + "step": 42263 + }, + { + "epoch": 0.8460626079123189, + "grad_norm": 1.0999751091003418, + "learning_rate": 6.085648637937258e-07, + "loss": 0.2966, + "step": 42264 + }, + { + "epoch": 0.8460826264294472, + "grad_norm": 1.0677711963653564, + "learning_rate": 6.084098701837998e-07, + "loss": 0.2896, + "step": 42265 + }, + { + "epoch": 0.8461026449465756, + "grad_norm": 1.0626896619796753, + "learning_rate": 6.082548950351724e-07, + "loss": 0.2816, + "step": 42266 + }, + { + "epoch": 0.8461226634637039, + "grad_norm": 1.1215087175369263, + "learning_rate": 6.08099938348497e-07, + "loss": 0.2813, + "step": 42267 + }, + { + "epoch": 0.8461426819808323, + "grad_norm": 1.0929228067398071, + "learning_rate": 6.079450001244247e-07, + "loss": 0.3034, + "step": 42268 + }, + { + "epoch": 0.8461627004979606, + "grad_norm": 1.1080647706985474, + "learning_rate": 6.077900803636066e-07, + "loss": 0.3005, + "step": 42269 + }, + { + "epoch": 0.8461827190150889, + "grad_norm": 1.096007227897644, + "learning_rate": 6.076351790666934e-07, + "loss": 0.2878, + "step": 42270 + }, + { + "epoch": 0.8462027375322173, + "grad_norm": 1.089907169342041, + "learning_rate": 6.074802962343362e-07, + "loss": 0.2598, + "step": 42271 + }, + { + "epoch": 0.8462227560493456, + "grad_norm": 1.0877747535705566, + "learning_rate": 6.073254318671873e-07, + "loss": 0.2748, + "step": 42272 + }, + { + "epoch": 0.846242774566474, + "grad_norm": 1.1044472455978394, + "learning_rate": 6.071705859658955e-07, + "loss": 0.3293, + "step": 42273 + }, + { + "epoch": 0.8462627930836023, + "grad_norm": 1.0821248292922974, + "learning_rate": 6.070157585311148e-07, + "loss": 0.2975, + "step": 42274 + }, + { + "epoch": 0.8462828116007307, + "grad_norm": 1.0605612993240356, + "learning_rate": 6.068609495634931e-07, + "loss": 0.3338, + "step": 42275 + }, + { + "epoch": 0.846302830117859, + "grad_norm": 1.0767899751663208, + "learning_rate": 6.067061590636836e-07, + "loss": 0.2863, + "step": 42276 + }, + { + "epoch": 0.8463228486349874, + "grad_norm": 1.0296149253845215, + "learning_rate": 6.065513870323364e-07, + "loss": 0.292, + "step": 42277 + }, + { + "epoch": 0.8463428671521157, + "grad_norm": 1.1191526651382446, + "learning_rate": 6.063966334701011e-07, + "loss": 0.2666, + "step": 42278 + }, + { + "epoch": 0.846362885669244, + "grad_norm": 1.2850109338760376, + "learning_rate": 6.062418983776292e-07, + "loss": 0.3228, + "step": 42279 + }, + { + "epoch": 0.8463829041863724, + "grad_norm": 1.2763820886611938, + "learning_rate": 6.060871817555697e-07, + "loss": 0.2854, + "step": 42280 + }, + { + "epoch": 0.8464029227035007, + "grad_norm": 1.108572006225586, + "learning_rate": 6.059324836045749e-07, + "loss": 0.3254, + "step": 42281 + }, + { + "epoch": 0.8464229412206291, + "grad_norm": 1.1247053146362305, + "learning_rate": 6.057778039252948e-07, + "loss": 0.3561, + "step": 42282 + }, + { + "epoch": 0.8464429597377574, + "grad_norm": 1.1123064756393433, + "learning_rate": 6.05623142718379e-07, + "loss": 0.3221, + "step": 42283 + }, + { + "epoch": 0.8464629782548858, + "grad_norm": 1.2052726745605469, + "learning_rate": 6.054684999844768e-07, + "loss": 0.322, + "step": 42284 + }, + { + "epoch": 0.8464829967720141, + "grad_norm": 1.1167551279067993, + "learning_rate": 6.053138757242399e-07, + "loss": 0.2449, + "step": 42285 + }, + { + "epoch": 0.8465030152891424, + "grad_norm": 1.1859575510025024, + "learning_rate": 6.051592699383168e-07, + "loss": 0.3185, + "step": 42286 + }, + { + "epoch": 0.8465230338062708, + "grad_norm": 1.9533390998840332, + "learning_rate": 6.050046826273598e-07, + "loss": 0.7753, + "step": 42287 + }, + { + "epoch": 0.8465430523233991, + "grad_norm": 1.1331208944320679, + "learning_rate": 6.048501137920171e-07, + "loss": 0.2934, + "step": 42288 + }, + { + "epoch": 0.8465630708405275, + "grad_norm": 1.1466474533081055, + "learning_rate": 6.046955634329377e-07, + "loss": 0.3292, + "step": 42289 + }, + { + "epoch": 0.8465830893576558, + "grad_norm": 1.1303437948226929, + "learning_rate": 6.045410315507733e-07, + "loss": 0.3064, + "step": 42290 + }, + { + "epoch": 0.8466031078747842, + "grad_norm": 1.0720136165618896, + "learning_rate": 6.043865181461728e-07, + "loss": 0.2726, + "step": 42291 + }, + { + "epoch": 0.8466231263919125, + "grad_norm": 1.148892879486084, + "learning_rate": 6.042320232197851e-07, + "loss": 0.2514, + "step": 42292 + }, + { + "epoch": 0.8466431449090409, + "grad_norm": 1.0621427297592163, + "learning_rate": 6.040775467722587e-07, + "loss": 0.3039, + "step": 42293 + }, + { + "epoch": 0.8466631634261692, + "grad_norm": 2.069265842437744, + "learning_rate": 6.039230888042458e-07, + "loss": 0.7292, + "step": 42294 + }, + { + "epoch": 0.8466831819432975, + "grad_norm": 1.0939191579818726, + "learning_rate": 6.037686493163935e-07, + "loss": 0.2973, + "step": 42295 + }, + { + "epoch": 0.8467032004604259, + "grad_norm": 1.2064341306686401, + "learning_rate": 6.036142283093527e-07, + "loss": 0.3281, + "step": 42296 + }, + { + "epoch": 0.8467232189775542, + "grad_norm": 1.7718069553375244, + "learning_rate": 6.034598257837698e-07, + "loss": 0.7093, + "step": 42297 + }, + { + "epoch": 0.8467432374946826, + "grad_norm": 1.0806176662445068, + "learning_rate": 6.033054417402967e-07, + "loss": 0.3171, + "step": 42298 + }, + { + "epoch": 0.8467632560118109, + "grad_norm": 1.184311866760254, + "learning_rate": 6.031510761795806e-07, + "loss": 0.3081, + "step": 42299 + }, + { + "epoch": 0.8467832745289393, + "grad_norm": 1.1835495233535767, + "learning_rate": 6.02996729102272e-07, + "loss": 0.2666, + "step": 42300 + }, + { + "epoch": 0.8468032930460676, + "grad_norm": 0.9647976756095886, + "learning_rate": 6.028424005090189e-07, + "loss": 0.2539, + "step": 42301 + }, + { + "epoch": 0.8468233115631959, + "grad_norm": 1.0748603343963623, + "learning_rate": 6.026880904004695e-07, + "loss": 0.2988, + "step": 42302 + }, + { + "epoch": 0.8468433300803243, + "grad_norm": 1.1400827169418335, + "learning_rate": 6.025337987772734e-07, + "loss": 0.3007, + "step": 42303 + }, + { + "epoch": 0.8468633485974526, + "grad_norm": 1.0766310691833496, + "learning_rate": 6.023795256400794e-07, + "loss": 0.3022, + "step": 42304 + }, + { + "epoch": 0.846883367114581, + "grad_norm": 1.0028377771377563, + "learning_rate": 6.022252709895354e-07, + "loss": 0.287, + "step": 42305 + }, + { + "epoch": 0.8469033856317093, + "grad_norm": 1.292222499847412, + "learning_rate": 6.020710348262887e-07, + "loss": 0.2956, + "step": 42306 + }, + { + "epoch": 0.8469234041488377, + "grad_norm": 1.079715609550476, + "learning_rate": 6.019168171509898e-07, + "loss": 0.2905, + "step": 42307 + }, + { + "epoch": 0.846943422665966, + "grad_norm": 1.1211552619934082, + "learning_rate": 6.017626179642866e-07, + "loss": 0.3159, + "step": 42308 + }, + { + "epoch": 0.8469634411830944, + "grad_norm": 2.2529211044311523, + "learning_rate": 6.016084372668268e-07, + "loss": 0.7478, + "step": 42309 + }, + { + "epoch": 0.8469834597002227, + "grad_norm": 1.0954492092132568, + "learning_rate": 6.014542750592583e-07, + "loss": 0.3355, + "step": 42310 + }, + { + "epoch": 0.847003478217351, + "grad_norm": 1.1325044631958008, + "learning_rate": 6.013001313422284e-07, + "loss": 0.2773, + "step": 42311 + }, + { + "epoch": 0.8470234967344794, + "grad_norm": 1.068867564201355, + "learning_rate": 6.011460061163865e-07, + "loss": 0.2764, + "step": 42312 + }, + { + "epoch": 0.8470435152516077, + "grad_norm": 1.1558905839920044, + "learning_rate": 6.00991899382381e-07, + "loss": 0.2877, + "step": 42313 + }, + { + "epoch": 0.8470635337687361, + "grad_norm": 2.0556654930114746, + "learning_rate": 6.008378111408586e-07, + "loss": 0.7746, + "step": 42314 + }, + { + "epoch": 0.8470835522858644, + "grad_norm": 2.057579517364502, + "learning_rate": 6.006837413924666e-07, + "loss": 0.7838, + "step": 42315 + }, + { + "epoch": 0.8471035708029928, + "grad_norm": 1.1883641481399536, + "learning_rate": 6.005296901378543e-07, + "loss": 0.3141, + "step": 42316 + }, + { + "epoch": 0.8471235893201211, + "grad_norm": 1.2393300533294678, + "learning_rate": 6.003756573776687e-07, + "loss": 0.3225, + "step": 42317 + }, + { + "epoch": 0.8471436078372494, + "grad_norm": 1.0543190240859985, + "learning_rate": 6.002216431125568e-07, + "loss": 0.3018, + "step": 42318 + }, + { + "epoch": 0.8471636263543778, + "grad_norm": 1.825214147567749, + "learning_rate": 6.00067647343166e-07, + "loss": 0.7262, + "step": 42319 + }, + { + "epoch": 0.8471836448715061, + "grad_norm": 1.031328797340393, + "learning_rate": 5.99913670070143e-07, + "loss": 0.275, + "step": 42320 + }, + { + "epoch": 0.8472036633886345, + "grad_norm": 1.0997893810272217, + "learning_rate": 5.997597112941372e-07, + "loss": 0.3191, + "step": 42321 + }, + { + "epoch": 0.8472236819057628, + "grad_norm": 1.091471552848816, + "learning_rate": 5.996057710157949e-07, + "loss": 0.3093, + "step": 42322 + }, + { + "epoch": 0.8472437004228912, + "grad_norm": 1.8651063442230225, + "learning_rate": 5.994518492357626e-07, + "loss": 0.7102, + "step": 42323 + }, + { + "epoch": 0.8472637189400195, + "grad_norm": 1.1675046682357788, + "learning_rate": 5.992979459546871e-07, + "loss": 0.3173, + "step": 42324 + }, + { + "epoch": 0.8472837374571479, + "grad_norm": 1.2104219198226929, + "learning_rate": 5.991440611732158e-07, + "loss": 0.2563, + "step": 42325 + }, + { + "epoch": 0.8473037559742762, + "grad_norm": 1.0789529085159302, + "learning_rate": 5.989901948919963e-07, + "loss": 0.3469, + "step": 42326 + }, + { + "epoch": 0.8473237744914045, + "grad_norm": 1.1216791868209839, + "learning_rate": 5.988363471116754e-07, + "loss": 0.2879, + "step": 42327 + }, + { + "epoch": 0.8473437930085329, + "grad_norm": 0.9758076667785645, + "learning_rate": 5.986825178328987e-07, + "loss": 0.2294, + "step": 42328 + }, + { + "epoch": 0.8473638115256612, + "grad_norm": 1.0603700876235962, + "learning_rate": 5.98528707056314e-07, + "loss": 0.2621, + "step": 42329 + }, + { + "epoch": 0.8473838300427896, + "grad_norm": 1.4024555683135986, + "learning_rate": 5.983749147825674e-07, + "loss": 0.3374, + "step": 42330 + }, + { + "epoch": 0.8474038485599179, + "grad_norm": 1.1743987798690796, + "learning_rate": 5.982211410123057e-07, + "loss": 0.2843, + "step": 42331 + }, + { + "epoch": 0.8474238670770463, + "grad_norm": 1.1055352687835693, + "learning_rate": 5.980673857461749e-07, + "loss": 0.299, + "step": 42332 + }, + { + "epoch": 0.8474438855941746, + "grad_norm": 1.0551704168319702, + "learning_rate": 5.979136489848203e-07, + "loss": 0.2584, + "step": 42333 + }, + { + "epoch": 0.8474639041113029, + "grad_norm": 1.0760899782180786, + "learning_rate": 5.977599307288906e-07, + "loss": 0.2967, + "step": 42334 + }, + { + "epoch": 0.8474839226284313, + "grad_norm": 1.0963401794433594, + "learning_rate": 5.976062309790309e-07, + "loss": 0.2932, + "step": 42335 + }, + { + "epoch": 0.8475039411455596, + "grad_norm": 1.0731037855148315, + "learning_rate": 5.974525497358868e-07, + "loss": 0.2903, + "step": 42336 + }, + { + "epoch": 0.847523959662688, + "grad_norm": 1.125837802886963, + "learning_rate": 5.972988870001039e-07, + "loss": 0.3081, + "step": 42337 + }, + { + "epoch": 0.8475439781798163, + "grad_norm": 1.0749986171722412, + "learning_rate": 5.97145242772329e-07, + "loss": 0.2928, + "step": 42338 + }, + { + "epoch": 0.8475639966969447, + "grad_norm": 1.0933078527450562, + "learning_rate": 5.969916170532092e-07, + "loss": 0.3345, + "step": 42339 + }, + { + "epoch": 0.847584015214073, + "grad_norm": 1.0477430820465088, + "learning_rate": 5.96838009843389e-07, + "loss": 0.2251, + "step": 42340 + }, + { + "epoch": 0.8476040337312014, + "grad_norm": 1.1089788675308228, + "learning_rate": 5.966844211435141e-07, + "loss": 0.3128, + "step": 42341 + }, + { + "epoch": 0.8476240522483297, + "grad_norm": 1.1567949056625366, + "learning_rate": 5.965308509542294e-07, + "loss": 0.3078, + "step": 42342 + }, + { + "epoch": 0.847644070765458, + "grad_norm": 1.2464790344238281, + "learning_rate": 5.963772992761823e-07, + "loss": 0.3128, + "step": 42343 + }, + { + "epoch": 0.8476640892825864, + "grad_norm": 1.2019829750061035, + "learning_rate": 5.962237661100179e-07, + "loss": 0.2735, + "step": 42344 + }, + { + "epoch": 0.8476841077997147, + "grad_norm": 1.147963047027588, + "learning_rate": 5.960702514563804e-07, + "loss": 0.2661, + "step": 42345 + }, + { + "epoch": 0.8477041263168431, + "grad_norm": 1.061000943183899, + "learning_rate": 5.959167553159151e-07, + "loss": 0.2962, + "step": 42346 + }, + { + "epoch": 0.8477241448339714, + "grad_norm": 1.2687747478485107, + "learning_rate": 5.957632776892691e-07, + "loss": 0.3365, + "step": 42347 + }, + { + "epoch": 0.8477441633510998, + "grad_norm": 1.9706796407699585, + "learning_rate": 5.956098185770864e-07, + "loss": 0.7509, + "step": 42348 + }, + { + "epoch": 0.8477641818682281, + "grad_norm": 1.3769769668579102, + "learning_rate": 5.954563779800121e-07, + "loss": 0.3472, + "step": 42349 + }, + { + "epoch": 0.8477842003853564, + "grad_norm": 1.1140775680541992, + "learning_rate": 5.953029558986901e-07, + "loss": 0.3144, + "step": 42350 + }, + { + "epoch": 0.8478042189024848, + "grad_norm": 1.1863629817962646, + "learning_rate": 5.95149552333767e-07, + "loss": 0.267, + "step": 42351 + }, + { + "epoch": 0.8478242374196131, + "grad_norm": 1.1814939975738525, + "learning_rate": 5.949961672858878e-07, + "loss": 0.3045, + "step": 42352 + }, + { + "epoch": 0.8478442559367415, + "grad_norm": 1.1727092266082764, + "learning_rate": 5.948428007556972e-07, + "loss": 0.2943, + "step": 42353 + }, + { + "epoch": 0.8478642744538698, + "grad_norm": 1.1048028469085693, + "learning_rate": 5.946894527438391e-07, + "loss": 0.3141, + "step": 42354 + }, + { + "epoch": 0.8478842929709982, + "grad_norm": 1.2746922969818115, + "learning_rate": 5.945361232509578e-07, + "loss": 0.275, + "step": 42355 + }, + { + "epoch": 0.8479043114881265, + "grad_norm": 1.0969756841659546, + "learning_rate": 5.943828122776995e-07, + "loss": 0.3077, + "step": 42356 + }, + { + "epoch": 0.8479243300052549, + "grad_norm": 1.4043172597885132, + "learning_rate": 5.942295198247078e-07, + "loss": 0.2919, + "step": 42357 + }, + { + "epoch": 0.8479443485223832, + "grad_norm": 1.1474475860595703, + "learning_rate": 5.940762458926269e-07, + "loss": 0.2972, + "step": 42358 + }, + { + "epoch": 0.8479643670395115, + "grad_norm": 1.1715956926345825, + "learning_rate": 5.939229904821004e-07, + "loss": 0.3458, + "step": 42359 + }, + { + "epoch": 0.8479843855566399, + "grad_norm": 1.077290415763855, + "learning_rate": 5.937697535937742e-07, + "loss": 0.3266, + "step": 42360 + }, + { + "epoch": 0.8480044040737682, + "grad_norm": 1.1009881496429443, + "learning_rate": 5.936165352282919e-07, + "loss": 0.2526, + "step": 42361 + }, + { + "epoch": 0.8480244225908966, + "grad_norm": 1.025207281112671, + "learning_rate": 5.934633353862973e-07, + "loss": 0.267, + "step": 42362 + }, + { + "epoch": 0.8480444411080249, + "grad_norm": 1.113267183303833, + "learning_rate": 5.933101540684338e-07, + "loss": 0.3476, + "step": 42363 + }, + { + "epoch": 0.8480644596251533, + "grad_norm": 1.160463809967041, + "learning_rate": 5.93156991275346e-07, + "loss": 0.3095, + "step": 42364 + }, + { + "epoch": 0.8480844781422816, + "grad_norm": 1.2343136072158813, + "learning_rate": 5.930038470076787e-07, + "loss": 0.3455, + "step": 42365 + }, + { + "epoch": 0.8481044966594099, + "grad_norm": 1.0995203256607056, + "learning_rate": 5.92850721266075e-07, + "loss": 0.2911, + "step": 42366 + }, + { + "epoch": 0.8481245151765383, + "grad_norm": 1.2130959033966064, + "learning_rate": 5.926976140511787e-07, + "loss": 0.3039, + "step": 42367 + }, + { + "epoch": 0.8481445336936666, + "grad_norm": 1.32475745677948, + "learning_rate": 5.925445253636313e-07, + "loss": 0.2991, + "step": 42368 + }, + { + "epoch": 0.848164552210795, + "grad_norm": 1.337185263633728, + "learning_rate": 5.923914552040799e-07, + "loss": 0.3091, + "step": 42369 + }, + { + "epoch": 0.8481845707279233, + "grad_norm": 1.0640887022018433, + "learning_rate": 5.922384035731665e-07, + "loss": 0.2673, + "step": 42370 + }, + { + "epoch": 0.8482045892450517, + "grad_norm": 1.0104817152023315, + "learning_rate": 5.92085370471534e-07, + "loss": 0.2475, + "step": 42371 + }, + { + "epoch": 0.84822460776218, + "grad_norm": 1.1101754903793335, + "learning_rate": 5.919323558998258e-07, + "loss": 0.2732, + "step": 42372 + }, + { + "epoch": 0.8482446262793084, + "grad_norm": 1.1700905561447144, + "learning_rate": 5.917793598586846e-07, + "loss": 0.2594, + "step": 42373 + }, + { + "epoch": 0.8482646447964367, + "grad_norm": 1.1704883575439453, + "learning_rate": 5.91626382348755e-07, + "loss": 0.3185, + "step": 42374 + }, + { + "epoch": 0.848284663313565, + "grad_norm": 1.0182007551193237, + "learning_rate": 5.914734233706798e-07, + "loss": 0.2631, + "step": 42375 + }, + { + "epoch": 0.8483046818306934, + "grad_norm": 1.1468627452850342, + "learning_rate": 5.913204829251007e-07, + "loss": 0.2957, + "step": 42376 + }, + { + "epoch": 0.8483247003478217, + "grad_norm": 1.7698948383331299, + "learning_rate": 5.91167561012661e-07, + "loss": 0.7227, + "step": 42377 + }, + { + "epoch": 0.8483447188649501, + "grad_norm": 1.1250157356262207, + "learning_rate": 5.910146576340054e-07, + "loss": 0.2663, + "step": 42378 + }, + { + "epoch": 0.8483647373820784, + "grad_norm": 1.0997631549835205, + "learning_rate": 5.908617727897753e-07, + "loss": 0.2395, + "step": 42379 + }, + { + "epoch": 0.8483847558992068, + "grad_norm": 1.2106437683105469, + "learning_rate": 5.907089064806137e-07, + "loss": 0.345, + "step": 42380 + }, + { + "epoch": 0.8484047744163351, + "grad_norm": 1.2572338581085205, + "learning_rate": 5.905560587071613e-07, + "loss": 0.3252, + "step": 42381 + }, + { + "epoch": 0.8484247929334634, + "grad_norm": 1.1053335666656494, + "learning_rate": 5.90403229470064e-07, + "loss": 0.2962, + "step": 42382 + }, + { + "epoch": 0.8484448114505918, + "grad_norm": 1.1649317741394043, + "learning_rate": 5.902504187699621e-07, + "loss": 0.3127, + "step": 42383 + }, + { + "epoch": 0.8484648299677201, + "grad_norm": 1.189156174659729, + "learning_rate": 5.900976266074987e-07, + "loss": 0.3585, + "step": 42384 + }, + { + "epoch": 0.8484848484848485, + "grad_norm": 1.0630940198898315, + "learning_rate": 5.899448529833157e-07, + "loss": 0.2795, + "step": 42385 + }, + { + "epoch": 0.8485048670019768, + "grad_norm": 1.118595838546753, + "learning_rate": 5.897920978980548e-07, + "loss": 0.3384, + "step": 42386 + }, + { + "epoch": 0.8485248855191052, + "grad_norm": 1.3590209484100342, + "learning_rate": 5.896393613523599e-07, + "loss": 0.286, + "step": 42387 + }, + { + "epoch": 0.8485449040362335, + "grad_norm": 1.9210636615753174, + "learning_rate": 5.894866433468716e-07, + "loss": 0.7183, + "step": 42388 + }, + { + "epoch": 0.8485649225533619, + "grad_norm": 1.0744824409484863, + "learning_rate": 5.893339438822315e-07, + "loss": 0.3203, + "step": 42389 + }, + { + "epoch": 0.8485849410704902, + "grad_norm": 1.0967166423797607, + "learning_rate": 5.891812629590821e-07, + "loss": 0.2867, + "step": 42390 + }, + { + "epoch": 0.8486049595876185, + "grad_norm": 1.247046947479248, + "learning_rate": 5.890286005780665e-07, + "loss": 0.3132, + "step": 42391 + }, + { + "epoch": 0.8486249781047469, + "grad_norm": 0.9889549612998962, + "learning_rate": 5.888759567398256e-07, + "loss": 0.2612, + "step": 42392 + }, + { + "epoch": 0.8486449966218752, + "grad_norm": 1.08772611618042, + "learning_rate": 5.887233314450008e-07, + "loss": 0.274, + "step": 42393 + }, + { + "epoch": 0.8486650151390036, + "grad_norm": 1.19194757938385, + "learning_rate": 5.885707246942335e-07, + "loss": 0.2899, + "step": 42394 + }, + { + "epoch": 0.8486850336561319, + "grad_norm": 1.050731897354126, + "learning_rate": 5.884181364881653e-07, + "loss": 0.3041, + "step": 42395 + }, + { + "epoch": 0.8487050521732603, + "grad_norm": 1.0555659532546997, + "learning_rate": 5.882655668274379e-07, + "loss": 0.2744, + "step": 42396 + }, + { + "epoch": 0.8487250706903886, + "grad_norm": 1.055417776107788, + "learning_rate": 5.881130157126935e-07, + "loss": 0.2892, + "step": 42397 + }, + { + "epoch": 0.8487450892075169, + "grad_norm": 1.1920162439346313, + "learning_rate": 5.879604831445717e-07, + "loss": 0.3278, + "step": 42398 + }, + { + "epoch": 0.8487651077246453, + "grad_norm": 1.2987180948257446, + "learning_rate": 5.878079691237143e-07, + "loss": 0.3545, + "step": 42399 + }, + { + "epoch": 0.8487851262417736, + "grad_norm": 1.0989415645599365, + "learning_rate": 5.876554736507634e-07, + "loss": 0.3067, + "step": 42400 + }, + { + "epoch": 0.848805144758902, + "grad_norm": 1.0434895753860474, + "learning_rate": 5.875029967263596e-07, + "loss": 0.2921, + "step": 42401 + }, + { + "epoch": 0.8488251632760303, + "grad_norm": 1.9468021392822266, + "learning_rate": 5.873505383511419e-07, + "loss": 0.8297, + "step": 42402 + }, + { + "epoch": 0.8488451817931587, + "grad_norm": 1.009535312652588, + "learning_rate": 5.871980985257547e-07, + "loss": 0.2605, + "step": 42403 + }, + { + "epoch": 0.848865200310287, + "grad_norm": 1.112114667892456, + "learning_rate": 5.870456772508354e-07, + "loss": 0.261, + "step": 42404 + }, + { + "epoch": 0.8488852188274154, + "grad_norm": 1.220266580581665, + "learning_rate": 5.868932745270278e-07, + "loss": 0.2819, + "step": 42405 + }, + { + "epoch": 0.8489052373445437, + "grad_norm": 1.1694458723068237, + "learning_rate": 5.86740890354971e-07, + "loss": 0.2592, + "step": 42406 + }, + { + "epoch": 0.848925255861672, + "grad_norm": 1.0550857782363892, + "learning_rate": 5.865885247353059e-07, + "loss": 0.291, + "step": 42407 + }, + { + "epoch": 0.8489452743788004, + "grad_norm": 1.1371939182281494, + "learning_rate": 5.864361776686722e-07, + "loss": 0.3012, + "step": 42408 + }, + { + "epoch": 0.8489652928959287, + "grad_norm": 1.1983897686004639, + "learning_rate": 5.862838491557116e-07, + "loss": 0.318, + "step": 42409 + }, + { + "epoch": 0.8489853114130571, + "grad_norm": 1.1035137176513672, + "learning_rate": 5.861315391970634e-07, + "loss": 0.2859, + "step": 42410 + }, + { + "epoch": 0.8490053299301854, + "grad_norm": 1.1042736768722534, + "learning_rate": 5.85979247793369e-07, + "loss": 0.3002, + "step": 42411 + }, + { + "epoch": 0.8490253484473138, + "grad_norm": 1.0802216529846191, + "learning_rate": 5.858269749452666e-07, + "loss": 0.3101, + "step": 42412 + }, + { + "epoch": 0.8490453669644421, + "grad_norm": 1.0931490659713745, + "learning_rate": 5.856747206533991e-07, + "loss": 0.2839, + "step": 42413 + }, + { + "epoch": 0.8490653854815704, + "grad_norm": 1.1819506883621216, + "learning_rate": 5.855224849184049e-07, + "loss": 0.2826, + "step": 42414 + }, + { + "epoch": 0.8490854039986988, + "grad_norm": 1.0448567867279053, + "learning_rate": 5.853702677409229e-07, + "loss": 0.2886, + "step": 42415 + }, + { + "epoch": 0.8491054225158271, + "grad_norm": 1.0408474206924438, + "learning_rate": 5.852180691215953e-07, + "loss": 0.2813, + "step": 42416 + }, + { + "epoch": 0.8491254410329555, + "grad_norm": 1.0816153287887573, + "learning_rate": 5.8506588906106e-07, + "loss": 0.3549, + "step": 42417 + }, + { + "epoch": 0.8491454595500838, + "grad_norm": 1.120379090309143, + "learning_rate": 5.849137275599587e-07, + "loss": 0.2789, + "step": 42418 + }, + { + "epoch": 0.8491654780672122, + "grad_norm": 1.0230433940887451, + "learning_rate": 5.847615846189297e-07, + "loss": 0.2648, + "step": 42419 + }, + { + "epoch": 0.8491854965843405, + "grad_norm": 1.136983871459961, + "learning_rate": 5.846094602386132e-07, + "loss": 0.3039, + "step": 42420 + }, + { + "epoch": 0.8492055151014689, + "grad_norm": 1.3037508726119995, + "learning_rate": 5.844573544196469e-07, + "loss": 0.3089, + "step": 42421 + }, + { + "epoch": 0.8492255336185972, + "grad_norm": 1.2131729125976562, + "learning_rate": 5.843052671626726e-07, + "loss": 0.2724, + "step": 42422 + }, + { + "epoch": 0.8492455521357255, + "grad_norm": 1.0863314867019653, + "learning_rate": 5.841531984683291e-07, + "loss": 0.3018, + "step": 42423 + }, + { + "epoch": 0.8492655706528539, + "grad_norm": 1.272749900817871, + "learning_rate": 5.840011483372548e-07, + "loss": 0.2864, + "step": 42424 + }, + { + "epoch": 0.8492855891699822, + "grad_norm": 1.0393260717391968, + "learning_rate": 5.8384911677009e-07, + "loss": 0.2818, + "step": 42425 + }, + { + "epoch": 0.8493056076871106, + "grad_norm": 1.0518124103546143, + "learning_rate": 5.836971037674716e-07, + "loss": 0.2416, + "step": 42426 + }, + { + "epoch": 0.8493256262042389, + "grad_norm": 1.8930673599243164, + "learning_rate": 5.835451093300415e-07, + "loss": 0.714, + "step": 42427 + }, + { + "epoch": 0.8493456447213673, + "grad_norm": 0.9998301863670349, + "learning_rate": 5.833931334584358e-07, + "loss": 0.2675, + "step": 42428 + }, + { + "epoch": 0.8493656632384956, + "grad_norm": 1.0216938257217407, + "learning_rate": 5.832411761532963e-07, + "loss": 0.268, + "step": 42429 + }, + { + "epoch": 0.8493856817556239, + "grad_norm": 1.1255109310150146, + "learning_rate": 5.830892374152597e-07, + "loss": 0.2772, + "step": 42430 + }, + { + "epoch": 0.8494057002727523, + "grad_norm": 1.1811981201171875, + "learning_rate": 5.82937317244966e-07, + "loss": 0.29, + "step": 42431 + }, + { + "epoch": 0.8494257187898806, + "grad_norm": 1.2474322319030762, + "learning_rate": 5.827854156430534e-07, + "loss": 0.3063, + "step": 42432 + }, + { + "epoch": 0.849445737307009, + "grad_norm": 1.9404120445251465, + "learning_rate": 5.826335326101601e-07, + "loss": 0.7799, + "step": 42433 + }, + { + "epoch": 0.8494657558241373, + "grad_norm": 1.533233404159546, + "learning_rate": 5.824816681469248e-07, + "loss": 0.3293, + "step": 42434 + }, + { + "epoch": 0.8494857743412657, + "grad_norm": 1.1319515705108643, + "learning_rate": 5.823298222539853e-07, + "loss": 0.2949, + "step": 42435 + }, + { + "epoch": 0.849505792858394, + "grad_norm": 1.1153857707977295, + "learning_rate": 5.821779949319812e-07, + "loss": 0.314, + "step": 42436 + }, + { + "epoch": 0.8495258113755224, + "grad_norm": 1.1777774095535278, + "learning_rate": 5.820261861815502e-07, + "loss": 0.3074, + "step": 42437 + }, + { + "epoch": 0.8495458298926507, + "grad_norm": 1.391668438911438, + "learning_rate": 5.818743960033302e-07, + "loss": 0.3058, + "step": 42438 + }, + { + "epoch": 0.849565848409779, + "grad_norm": 1.1491873264312744, + "learning_rate": 5.817226243979584e-07, + "loss": 0.3196, + "step": 42439 + }, + { + "epoch": 0.8495858669269074, + "grad_norm": 1.1961967945098877, + "learning_rate": 5.815708713660756e-07, + "loss": 0.2538, + "step": 42440 + }, + { + "epoch": 0.8496058854440357, + "grad_norm": 1.1765403747558594, + "learning_rate": 5.814191369083161e-07, + "loss": 0.3035, + "step": 42441 + }, + { + "epoch": 0.8496259039611641, + "grad_norm": 1.1475471258163452, + "learning_rate": 5.812674210253211e-07, + "loss": 0.2644, + "step": 42442 + }, + { + "epoch": 0.8496459224782924, + "grad_norm": 1.1288419961929321, + "learning_rate": 5.811157237177256e-07, + "loss": 0.2767, + "step": 42443 + }, + { + "epoch": 0.8496659409954208, + "grad_norm": 1.1138134002685547, + "learning_rate": 5.809640449861703e-07, + "loss": 0.2663, + "step": 42444 + }, + { + "epoch": 0.8496859595125491, + "grad_norm": 1.9522860050201416, + "learning_rate": 5.808123848312908e-07, + "loss": 0.7677, + "step": 42445 + }, + { + "epoch": 0.8497059780296774, + "grad_norm": 1.2219921350479126, + "learning_rate": 5.806607432537248e-07, + "loss": 0.3009, + "step": 42446 + }, + { + "epoch": 0.8497259965468058, + "grad_norm": 1.1194380521774292, + "learning_rate": 5.805091202541102e-07, + "loss": 0.2813, + "step": 42447 + }, + { + "epoch": 0.8497460150639341, + "grad_norm": 1.011678695678711, + "learning_rate": 5.803575158330832e-07, + "loss": 0.299, + "step": 42448 + }, + { + "epoch": 0.8497660335810625, + "grad_norm": 1.1749759912490845, + "learning_rate": 5.802059299912826e-07, + "loss": 0.305, + "step": 42449 + }, + { + "epoch": 0.8497860520981908, + "grad_norm": 1.109976053237915, + "learning_rate": 5.800543627293459e-07, + "loss": 0.3308, + "step": 42450 + }, + { + "epoch": 0.8498060706153192, + "grad_norm": 1.2395740747451782, + "learning_rate": 5.79902814047909e-07, + "loss": 0.3095, + "step": 42451 + }, + { + "epoch": 0.8498260891324475, + "grad_norm": 2.0757241249084473, + "learning_rate": 5.797512839476083e-07, + "loss": 0.7446, + "step": 42452 + }, + { + "epoch": 0.8498461076495759, + "grad_norm": 1.2051962614059448, + "learning_rate": 5.795997724290836e-07, + "loss": 0.2825, + "step": 42453 + }, + { + "epoch": 0.8498661261667042, + "grad_norm": 1.1503099203109741, + "learning_rate": 5.794482794929684e-07, + "loss": 0.2963, + "step": 42454 + }, + { + "epoch": 0.8498861446838325, + "grad_norm": 1.0252043008804321, + "learning_rate": 5.792968051399028e-07, + "loss": 0.2879, + "step": 42455 + }, + { + "epoch": 0.8499061632009609, + "grad_norm": 1.1453423500061035, + "learning_rate": 5.791453493705223e-07, + "loss": 0.3181, + "step": 42456 + }, + { + "epoch": 0.8499261817180892, + "grad_norm": 1.336055040359497, + "learning_rate": 5.789939121854621e-07, + "loss": 0.2957, + "step": 42457 + }, + { + "epoch": 0.8499462002352176, + "grad_norm": 1.9327198266983032, + "learning_rate": 5.788424935853609e-07, + "loss": 0.69, + "step": 42458 + }, + { + "epoch": 0.8499662187523459, + "grad_norm": 1.2212072610855103, + "learning_rate": 5.786910935708545e-07, + "loss": 0.3432, + "step": 42459 + }, + { + "epoch": 0.8499862372694743, + "grad_norm": 1.0573769807815552, + "learning_rate": 5.785397121425796e-07, + "loss": 0.2887, + "step": 42460 + }, + { + "epoch": 0.8500062557866026, + "grad_norm": 1.3319041728973389, + "learning_rate": 5.783883493011705e-07, + "loss": 0.3513, + "step": 42461 + }, + { + "epoch": 0.8500262743037309, + "grad_norm": 1.1433637142181396, + "learning_rate": 5.782370050472669e-07, + "loss": 0.2975, + "step": 42462 + }, + { + "epoch": 0.8500462928208593, + "grad_norm": 1.1286885738372803, + "learning_rate": 5.780856793815032e-07, + "loss": 0.2682, + "step": 42463 + }, + { + "epoch": 0.8500663113379876, + "grad_norm": 1.070513129234314, + "learning_rate": 5.779343723045155e-07, + "loss": 0.3029, + "step": 42464 + }, + { + "epoch": 0.850086329855116, + "grad_norm": 1.1318119764328003, + "learning_rate": 5.777830838169396e-07, + "loss": 0.3072, + "step": 42465 + }, + { + "epoch": 0.8501063483722443, + "grad_norm": 1.0546728372573853, + "learning_rate": 5.776318139194126e-07, + "loss": 0.2672, + "step": 42466 + }, + { + "epoch": 0.8501263668893727, + "grad_norm": 1.1751585006713867, + "learning_rate": 5.774805626125685e-07, + "loss": 0.2846, + "step": 42467 + }, + { + "epoch": 0.850146385406501, + "grad_norm": 1.1061911582946777, + "learning_rate": 5.773293298970456e-07, + "loss": 0.2791, + "step": 42468 + }, + { + "epoch": 0.8501664039236294, + "grad_norm": 1.1462647914886475, + "learning_rate": 5.771781157734785e-07, + "loss": 0.2947, + "step": 42469 + }, + { + "epoch": 0.8501864224407577, + "grad_norm": 1.1064051389694214, + "learning_rate": 5.770269202425016e-07, + "loss": 0.2643, + "step": 42470 + }, + { + "epoch": 0.850206440957886, + "grad_norm": 1.1710257530212402, + "learning_rate": 5.768757433047529e-07, + "loss": 0.3319, + "step": 42471 + }, + { + "epoch": 0.8502264594750144, + "grad_norm": 1.1087043285369873, + "learning_rate": 5.767245849608666e-07, + "loss": 0.3013, + "step": 42472 + }, + { + "epoch": 0.8502464779921427, + "grad_norm": 1.2803703546524048, + "learning_rate": 5.765734452114785e-07, + "loss": 0.2998, + "step": 42473 + }, + { + "epoch": 0.8502664965092711, + "grad_norm": 1.234357237815857, + "learning_rate": 5.76422324057222e-07, + "loss": 0.3134, + "step": 42474 + }, + { + "epoch": 0.8502865150263994, + "grad_norm": 1.0633267164230347, + "learning_rate": 5.76271221498736e-07, + "loss": 0.2767, + "step": 42475 + }, + { + "epoch": 0.8503065335435278, + "grad_norm": 1.1495732069015503, + "learning_rate": 5.761201375366531e-07, + "loss": 0.2719, + "step": 42476 + }, + { + "epoch": 0.8503265520606561, + "grad_norm": 1.2160770893096924, + "learning_rate": 5.759690721716093e-07, + "loss": 0.3329, + "step": 42477 + }, + { + "epoch": 0.8503465705777844, + "grad_norm": 0.9216701984405518, + "learning_rate": 5.758180254042383e-07, + "loss": 0.306, + "step": 42478 + }, + { + "epoch": 0.8503665890949128, + "grad_norm": 1.0880542993545532, + "learning_rate": 5.756669972351775e-07, + "loss": 0.3241, + "step": 42479 + }, + { + "epoch": 0.8503866076120411, + "grad_norm": 1.2389980554580688, + "learning_rate": 5.755159876650596e-07, + "loss": 0.2964, + "step": 42480 + }, + { + "epoch": 0.8504066261291695, + "grad_norm": 1.0015647411346436, + "learning_rate": 5.753649966945213e-07, + "loss": 0.2638, + "step": 42481 + }, + { + "epoch": 0.8504266446462978, + "grad_norm": 1.1747326850891113, + "learning_rate": 5.75214024324196e-07, + "loss": 0.3213, + "step": 42482 + }, + { + "epoch": 0.8504466631634262, + "grad_norm": 1.2820578813552856, + "learning_rate": 5.750630705547184e-07, + "loss": 0.354, + "step": 42483 + }, + { + "epoch": 0.8504666816805545, + "grad_norm": 1.1386709213256836, + "learning_rate": 5.749121353867238e-07, + "loss": 0.2644, + "step": 42484 + }, + { + "epoch": 0.8504867001976829, + "grad_norm": 1.9321320056915283, + "learning_rate": 5.747612188208462e-07, + "loss": 0.7617, + "step": 42485 + }, + { + "epoch": 0.8505067187148112, + "grad_norm": 1.1174957752227783, + "learning_rate": 5.746103208577208e-07, + "loss": 0.2691, + "step": 42486 + }, + { + "epoch": 0.8505267372319395, + "grad_norm": 1.1652765274047852, + "learning_rate": 5.744594414979804e-07, + "loss": 0.2928, + "step": 42487 + }, + { + "epoch": 0.8505467557490679, + "grad_norm": 1.0133880376815796, + "learning_rate": 5.743085807422599e-07, + "loss": 0.2983, + "step": 42488 + }, + { + "epoch": 0.8505667742661962, + "grad_norm": 1.1536855697631836, + "learning_rate": 5.741577385911945e-07, + "loss": 0.2894, + "step": 42489 + }, + { + "epoch": 0.8505867927833246, + "grad_norm": 1.1445059776306152, + "learning_rate": 5.74006915045417e-07, + "loss": 0.3308, + "step": 42490 + }, + { + "epoch": 0.8506068113004529, + "grad_norm": 1.124525547027588, + "learning_rate": 5.738561101055612e-07, + "loss": 0.2787, + "step": 42491 + }, + { + "epoch": 0.8506268298175813, + "grad_norm": 1.0742413997650146, + "learning_rate": 5.737053237722634e-07, + "loss": 0.2427, + "step": 42492 + }, + { + "epoch": 0.8506468483347096, + "grad_norm": 1.3992431163787842, + "learning_rate": 5.735545560461542e-07, + "loss": 0.2681, + "step": 42493 + }, + { + "epoch": 0.8506668668518379, + "grad_norm": 1.0625883340835571, + "learning_rate": 5.734038069278702e-07, + "loss": 0.3181, + "step": 42494 + }, + { + "epoch": 0.8506868853689663, + "grad_norm": 1.0624873638153076, + "learning_rate": 5.732530764180438e-07, + "loss": 0.231, + "step": 42495 + }, + { + "epoch": 0.8507069038860946, + "grad_norm": 1.0760165452957153, + "learning_rate": 5.731023645173084e-07, + "loss": 0.2922, + "step": 42496 + }, + { + "epoch": 0.850726922403223, + "grad_norm": 1.084511160850525, + "learning_rate": 5.729516712262984e-07, + "loss": 0.2768, + "step": 42497 + }, + { + "epoch": 0.8507469409203513, + "grad_norm": 1.1185606718063354, + "learning_rate": 5.728009965456471e-07, + "loss": 0.3199, + "step": 42498 + }, + { + "epoch": 0.8507669594374797, + "grad_norm": 1.1614384651184082, + "learning_rate": 5.726503404759875e-07, + "loss": 0.2844, + "step": 42499 + }, + { + "epoch": 0.850786977954608, + "grad_norm": 1.1623197793960571, + "learning_rate": 5.724997030179535e-07, + "loss": 0.2829, + "step": 42500 + }, + { + "epoch": 0.8508069964717364, + "grad_norm": 1.1027835607528687, + "learning_rate": 5.723490841721768e-07, + "loss": 0.2899, + "step": 42501 + }, + { + "epoch": 0.8508270149888647, + "grad_norm": 1.113385558128357, + "learning_rate": 5.721984839392924e-07, + "loss": 0.3071, + "step": 42502 + }, + { + "epoch": 0.850847033505993, + "grad_norm": 1.053114891052246, + "learning_rate": 5.720479023199332e-07, + "loss": 0.2909, + "step": 42503 + }, + { + "epoch": 0.8508670520231214, + "grad_norm": 1.1293416023254395, + "learning_rate": 5.718973393147303e-07, + "loss": 0.2583, + "step": 42504 + }, + { + "epoch": 0.8508870705402497, + "grad_norm": 1.9180973768234253, + "learning_rate": 5.717467949243189e-07, + "loss": 0.7227, + "step": 42505 + }, + { + "epoch": 0.8509070890573781, + "grad_norm": 1.985036849975586, + "learning_rate": 5.715962691493304e-07, + "loss": 0.7206, + "step": 42506 + }, + { + "epoch": 0.8509271075745064, + "grad_norm": 1.0692638158798218, + "learning_rate": 5.71445761990399e-07, + "loss": 0.2192, + "step": 42507 + }, + { + "epoch": 0.8509471260916348, + "grad_norm": 1.1056584119796753, + "learning_rate": 5.712952734481569e-07, + "loss": 0.2857, + "step": 42508 + }, + { + "epoch": 0.8509671446087631, + "grad_norm": 1.143398404121399, + "learning_rate": 5.711448035232359e-07, + "loss": 0.2779, + "step": 42509 + }, + { + "epoch": 0.8509871631258914, + "grad_norm": 1.1180931329727173, + "learning_rate": 5.709943522162681e-07, + "loss": 0.3076, + "step": 42510 + }, + { + "epoch": 0.8510071816430198, + "grad_norm": 1.0755046606063843, + "learning_rate": 5.708439195278881e-07, + "loss": 0.2607, + "step": 42511 + }, + { + "epoch": 0.8510272001601481, + "grad_norm": 1.167966604232788, + "learning_rate": 5.70693505458727e-07, + "loss": 0.2662, + "step": 42512 + }, + { + "epoch": 0.8510472186772765, + "grad_norm": 1.1592968702316284, + "learning_rate": 5.705431100094177e-07, + "loss": 0.2832, + "step": 42513 + }, + { + "epoch": 0.8510672371944048, + "grad_norm": 1.1996135711669922, + "learning_rate": 5.703927331805903e-07, + "loss": 0.3219, + "step": 42514 + }, + { + "epoch": 0.8510872557115332, + "grad_norm": 1.746888279914856, + "learning_rate": 5.702423749728797e-07, + "loss": 0.8338, + "step": 42515 + }, + { + "epoch": 0.8511072742286615, + "grad_norm": 1.116175651550293, + "learning_rate": 5.700920353869172e-07, + "loss": 0.2843, + "step": 42516 + }, + { + "epoch": 0.8511272927457899, + "grad_norm": 1.1213593482971191, + "learning_rate": 5.699417144233333e-07, + "loss": 0.3143, + "step": 42517 + }, + { + "epoch": 0.8511473112629182, + "grad_norm": 1.064620018005371, + "learning_rate": 5.697914120827619e-07, + "loss": 0.2766, + "step": 42518 + }, + { + "epoch": 0.8511673297800465, + "grad_norm": 1.1304683685302734, + "learning_rate": 5.696411283658331e-07, + "loss": 0.2828, + "step": 42519 + }, + { + "epoch": 0.8511873482971749, + "grad_norm": 1.1244066953659058, + "learning_rate": 5.694908632731805e-07, + "loss": 0.3148, + "step": 42520 + }, + { + "epoch": 0.8512073668143032, + "grad_norm": 1.0513852834701538, + "learning_rate": 5.69340616805435e-07, + "loss": 0.2784, + "step": 42521 + }, + { + "epoch": 0.8512273853314316, + "grad_norm": 1.2233078479766846, + "learning_rate": 5.691903889632284e-07, + "loss": 0.3054, + "step": 42522 + }, + { + "epoch": 0.8512474038485599, + "grad_norm": 1.0305927991867065, + "learning_rate": 5.6904017974719e-07, + "loss": 0.3493, + "step": 42523 + }, + { + "epoch": 0.8512674223656883, + "grad_norm": 1.15713369846344, + "learning_rate": 5.688899891579547e-07, + "loss": 0.2562, + "step": 42524 + }, + { + "epoch": 0.8512874408828166, + "grad_norm": 1.0973472595214844, + "learning_rate": 5.687398171961523e-07, + "loss": 0.2834, + "step": 42525 + }, + { + "epoch": 0.8513074593999449, + "grad_norm": 1.2474586963653564, + "learning_rate": 5.685896638624138e-07, + "loss": 0.3912, + "step": 42526 + }, + { + "epoch": 0.8513274779170733, + "grad_norm": 1.1215343475341797, + "learning_rate": 5.684395291573702e-07, + "loss": 0.3095, + "step": 42527 + }, + { + "epoch": 0.8513474964342016, + "grad_norm": 1.1761060953140259, + "learning_rate": 5.682894130816535e-07, + "loss": 0.3178, + "step": 42528 + }, + { + "epoch": 0.85136751495133, + "grad_norm": 1.0875718593597412, + "learning_rate": 5.68139315635895e-07, + "loss": 0.3079, + "step": 42529 + }, + { + "epoch": 0.8513875334684583, + "grad_norm": 1.1523795127868652, + "learning_rate": 5.67989236820724e-07, + "loss": 0.2877, + "step": 42530 + }, + { + "epoch": 0.8514075519855867, + "grad_norm": 1.1475176811218262, + "learning_rate": 5.67839176636773e-07, + "loss": 0.3041, + "step": 42531 + }, + { + "epoch": 0.851427570502715, + "grad_norm": 0.9984130263328552, + "learning_rate": 5.676891350846714e-07, + "loss": 0.2498, + "step": 42532 + }, + { + "epoch": 0.8514475890198434, + "grad_norm": 0.9837369918823242, + "learning_rate": 5.67539112165052e-07, + "loss": 0.2762, + "step": 42533 + }, + { + "epoch": 0.8514676075369717, + "grad_norm": 1.89650559425354, + "learning_rate": 5.673891078785443e-07, + "loss": 0.7671, + "step": 42534 + }, + { + "epoch": 0.8514876260541, + "grad_norm": 1.0640904903411865, + "learning_rate": 5.672391222257789e-07, + "loss": 0.3178, + "step": 42535 + }, + { + "epoch": 0.8515076445712284, + "grad_norm": 1.1268398761749268, + "learning_rate": 5.670891552073854e-07, + "loss": 0.3087, + "step": 42536 + }, + { + "epoch": 0.8515276630883567, + "grad_norm": 1.0478049516677856, + "learning_rate": 5.669392068239959e-07, + "loss": 0.2999, + "step": 42537 + }, + { + "epoch": 0.8515476816054851, + "grad_norm": 1.1694897413253784, + "learning_rate": 5.667892770762401e-07, + "loss": 0.2883, + "step": 42538 + }, + { + "epoch": 0.8515677001226134, + "grad_norm": 1.1350539922714233, + "learning_rate": 5.666393659647485e-07, + "loss": 0.2917, + "step": 42539 + }, + { + "epoch": 0.8515877186397418, + "grad_norm": 1.1861919164657593, + "learning_rate": 5.664894734901505e-07, + "loss": 0.2979, + "step": 42540 + }, + { + "epoch": 0.8516077371568701, + "grad_norm": 1.0988600254058838, + "learning_rate": 5.663395996530757e-07, + "loss": 0.2972, + "step": 42541 + }, + { + "epoch": 0.8516277556739984, + "grad_norm": 1.0304217338562012, + "learning_rate": 5.661897444541558e-07, + "loss": 0.2576, + "step": 42542 + }, + { + "epoch": 0.8516477741911268, + "grad_norm": 1.0294588804244995, + "learning_rate": 5.660399078940193e-07, + "loss": 0.285, + "step": 42543 + }, + { + "epoch": 0.8516677927082551, + "grad_norm": 1.3238224983215332, + "learning_rate": 5.658900899732977e-07, + "loss": 0.2983, + "step": 42544 + }, + { + "epoch": 0.8516878112253835, + "grad_norm": 1.0004355907440186, + "learning_rate": 5.65740290692619e-07, + "loss": 0.2703, + "step": 42545 + }, + { + "epoch": 0.8517078297425118, + "grad_norm": 1.9645973443984985, + "learning_rate": 5.655905100526155e-07, + "loss": 0.7718, + "step": 42546 + }, + { + "epoch": 0.8517278482596402, + "grad_norm": 1.221652626991272, + "learning_rate": 5.654407480539142e-07, + "loss": 0.3153, + "step": 42547 + }, + { + "epoch": 0.8517478667767685, + "grad_norm": 1.0606210231781006, + "learning_rate": 5.65291004697146e-07, + "loss": 0.2964, + "step": 42548 + }, + { + "epoch": 0.8517678852938969, + "grad_norm": 1.1345093250274658, + "learning_rate": 5.651412799829404e-07, + "loss": 0.3104, + "step": 42549 + }, + { + "epoch": 0.8517879038110252, + "grad_norm": 1.0571681261062622, + "learning_rate": 5.649915739119255e-07, + "loss": 0.2455, + "step": 42550 + }, + { + "epoch": 0.8518079223281535, + "grad_norm": 1.1199674606323242, + "learning_rate": 5.648418864847322e-07, + "loss": 0.3062, + "step": 42551 + }, + { + "epoch": 0.8518279408452819, + "grad_norm": 1.1952215433120728, + "learning_rate": 5.646922177019893e-07, + "loss": 0.3085, + "step": 42552 + }, + { + "epoch": 0.8518479593624102, + "grad_norm": 1.0036332607269287, + "learning_rate": 5.645425675643256e-07, + "loss": 0.2757, + "step": 42553 + }, + { + "epoch": 0.8518679778795386, + "grad_norm": 1.1327756643295288, + "learning_rate": 5.643929360723699e-07, + "loss": 0.3529, + "step": 42554 + }, + { + "epoch": 0.8518879963966669, + "grad_norm": 2.0159828662872314, + "learning_rate": 5.642433232267525e-07, + "loss": 0.6906, + "step": 42555 + }, + { + "epoch": 0.8519080149137953, + "grad_norm": 1.1062430143356323, + "learning_rate": 5.640937290281002e-07, + "loss": 0.3217, + "step": 42556 + }, + { + "epoch": 0.8519280334309236, + "grad_norm": 1.0782654285430908, + "learning_rate": 5.639441534770446e-07, + "loss": 0.2555, + "step": 42557 + }, + { + "epoch": 0.8519480519480519, + "grad_norm": 1.161376714706421, + "learning_rate": 5.637945965742125e-07, + "loss": 0.34, + "step": 42558 + }, + { + "epoch": 0.8519680704651803, + "grad_norm": 1.1924266815185547, + "learning_rate": 5.636450583202336e-07, + "loss": 0.2856, + "step": 42559 + }, + { + "epoch": 0.8519880889823086, + "grad_norm": 1.1327850818634033, + "learning_rate": 5.634955387157365e-07, + "loss": 0.286, + "step": 42560 + }, + { + "epoch": 0.852008107499437, + "grad_norm": 1.1218910217285156, + "learning_rate": 5.633460377613492e-07, + "loss": 0.3285, + "step": 42561 + }, + { + "epoch": 0.8520281260165653, + "grad_norm": 1.2379590272903442, + "learning_rate": 5.631965554577007e-07, + "loss": 0.3032, + "step": 42562 + }, + { + "epoch": 0.8520481445336937, + "grad_norm": 1.157106637954712, + "learning_rate": 5.630470918054176e-07, + "loss": 0.3414, + "step": 42563 + }, + { + "epoch": 0.852068163050822, + "grad_norm": 1.1286836862564087, + "learning_rate": 5.628976468051312e-07, + "loss": 0.3028, + "step": 42564 + }, + { + "epoch": 0.8520881815679504, + "grad_norm": 1.103921890258789, + "learning_rate": 5.62748220457468e-07, + "loss": 0.3021, + "step": 42565 + }, + { + "epoch": 0.8521082000850787, + "grad_norm": 1.065752387046814, + "learning_rate": 5.625988127630566e-07, + "loss": 0.2952, + "step": 42566 + }, + { + "epoch": 0.852128218602207, + "grad_norm": 1.2537564039230347, + "learning_rate": 5.624494237225231e-07, + "loss": 0.2761, + "step": 42567 + }, + { + "epoch": 0.8521482371193354, + "grad_norm": 1.3076646327972412, + "learning_rate": 5.623000533364992e-07, + "loss": 0.2463, + "step": 42568 + }, + { + "epoch": 0.8521682556364637, + "grad_norm": 1.0980677604675293, + "learning_rate": 5.621507016056094e-07, + "loss": 0.3064, + "step": 42569 + }, + { + "epoch": 0.8521882741535921, + "grad_norm": 1.0561603307724, + "learning_rate": 5.620013685304837e-07, + "loss": 0.2767, + "step": 42570 + }, + { + "epoch": 0.8522082926707204, + "grad_norm": 2.1246025562286377, + "learning_rate": 5.618520541117495e-07, + "loss": 0.8007, + "step": 42571 + }, + { + "epoch": 0.8522283111878488, + "grad_norm": 1.1558563709259033, + "learning_rate": 5.617027583500334e-07, + "loss": 0.3023, + "step": 42572 + }, + { + "epoch": 0.8522483297049771, + "grad_norm": 1.1384090185165405, + "learning_rate": 5.615534812459645e-07, + "loss": 0.3052, + "step": 42573 + }, + { + "epoch": 0.8522683482221054, + "grad_norm": 1.2017916440963745, + "learning_rate": 5.614042228001698e-07, + "loss": 0.3182, + "step": 42574 + }, + { + "epoch": 0.8522883667392338, + "grad_norm": 1.0605058670043945, + "learning_rate": 5.612549830132763e-07, + "loss": 0.3017, + "step": 42575 + }, + { + "epoch": 0.8523083852563621, + "grad_norm": 1.1768282651901245, + "learning_rate": 5.611057618859105e-07, + "loss": 0.3001, + "step": 42576 + }, + { + "epoch": 0.8523284037734905, + "grad_norm": 1.0647988319396973, + "learning_rate": 5.609565594187022e-07, + "loss": 0.2719, + "step": 42577 + }, + { + "epoch": 0.8523484222906188, + "grad_norm": 1.0574198961257935, + "learning_rate": 5.608073756122773e-07, + "loss": 0.3266, + "step": 42578 + }, + { + "epoch": 0.8523684408077472, + "grad_norm": 1.0630205869674683, + "learning_rate": 5.606582104672626e-07, + "loss": 0.271, + "step": 42579 + }, + { + "epoch": 0.8523884593248755, + "grad_norm": 1.2042816877365112, + "learning_rate": 5.605090639842847e-07, + "loss": 0.3155, + "step": 42580 + }, + { + "epoch": 0.8524084778420039, + "grad_norm": 1.8196790218353271, + "learning_rate": 5.603599361639717e-07, + "loss": 0.7313, + "step": 42581 + }, + { + "epoch": 0.8524284963591322, + "grad_norm": 2.0574092864990234, + "learning_rate": 5.602108270069495e-07, + "loss": 0.767, + "step": 42582 + }, + { + "epoch": 0.8524485148762605, + "grad_norm": 1.0521403551101685, + "learning_rate": 5.600617365138467e-07, + "loss": 0.2786, + "step": 42583 + }, + { + "epoch": 0.8524685333933889, + "grad_norm": 1.1839278936386108, + "learning_rate": 5.599126646852887e-07, + "loss": 0.2912, + "step": 42584 + }, + { + "epoch": 0.8524885519105172, + "grad_norm": 1.0106043815612793, + "learning_rate": 5.597636115219013e-07, + "loss": 0.2859, + "step": 42585 + }, + { + "epoch": 0.8525085704276456, + "grad_norm": 2.025994300842285, + "learning_rate": 5.596145770243133e-07, + "loss": 0.7859, + "step": 42586 + }, + { + "epoch": 0.8525285889447739, + "grad_norm": 1.7983767986297607, + "learning_rate": 5.594655611931499e-07, + "loss": 0.784, + "step": 42587 + }, + { + "epoch": 0.8525486074619023, + "grad_norm": 1.929390788078308, + "learning_rate": 5.593165640290377e-07, + "loss": 0.7477, + "step": 42588 + }, + { + "epoch": 0.8525686259790306, + "grad_norm": 1.022465705871582, + "learning_rate": 5.591675855326017e-07, + "loss": 0.2768, + "step": 42589 + }, + { + "epoch": 0.8525886444961589, + "grad_norm": 1.0779306888580322, + "learning_rate": 5.59018625704471e-07, + "loss": 0.2881, + "step": 42590 + }, + { + "epoch": 0.8526086630132873, + "grad_norm": 1.088168978691101, + "learning_rate": 5.588696845452701e-07, + "loss": 0.3318, + "step": 42591 + }, + { + "epoch": 0.8526286815304156, + "grad_norm": 1.0524688959121704, + "learning_rate": 5.58720762055625e-07, + "loss": 0.3081, + "step": 42592 + }, + { + "epoch": 0.852648700047544, + "grad_norm": 1.060364007949829, + "learning_rate": 5.585718582361616e-07, + "loss": 0.271, + "step": 42593 + }, + { + "epoch": 0.8526687185646723, + "grad_norm": 1.1147422790527344, + "learning_rate": 5.584229730875068e-07, + "loss": 0.3039, + "step": 42594 + }, + { + "epoch": 0.8526887370818007, + "grad_norm": 1.2476874589920044, + "learning_rate": 5.582741066102853e-07, + "loss": 0.2784, + "step": 42595 + }, + { + "epoch": 0.852708755598929, + "grad_norm": 1.072714924812317, + "learning_rate": 5.581252588051244e-07, + "loss": 0.3037, + "step": 42596 + }, + { + "epoch": 0.8527287741160573, + "grad_norm": 1.0752921104431152, + "learning_rate": 5.579764296726492e-07, + "loss": 0.2618, + "step": 42597 + }, + { + "epoch": 0.8527487926331857, + "grad_norm": 1.0646891593933105, + "learning_rate": 5.578276192134841e-07, + "loss": 0.2412, + "step": 42598 + }, + { + "epoch": 0.852768811150314, + "grad_norm": 1.1238867044448853, + "learning_rate": 5.576788274282568e-07, + "loss": 0.2999, + "step": 42599 + }, + { + "epoch": 0.8527888296674424, + "grad_norm": 1.088754415512085, + "learning_rate": 5.575300543175915e-07, + "loss": 0.2763, + "step": 42600 + }, + { + "epoch": 0.8528088481845707, + "grad_norm": 1.1970213651657104, + "learning_rate": 5.573812998821137e-07, + "loss": 0.2851, + "step": 42601 + }, + { + "epoch": 0.8528288667016991, + "grad_norm": 1.112720251083374, + "learning_rate": 5.572325641224491e-07, + "loss": 0.2528, + "step": 42602 + }, + { + "epoch": 0.8528488852188274, + "grad_norm": 0.957224428653717, + "learning_rate": 5.570838470392221e-07, + "loss": 0.2484, + "step": 42603 + }, + { + "epoch": 0.8528689037359558, + "grad_norm": 1.0605049133300781, + "learning_rate": 5.569351486330588e-07, + "loss": 0.2748, + "step": 42604 + }, + { + "epoch": 0.8528889222530841, + "grad_norm": 1.0707268714904785, + "learning_rate": 5.567864689045843e-07, + "loss": 0.3211, + "step": 42605 + }, + { + "epoch": 0.8529089407702124, + "grad_norm": 1.0872496366500854, + "learning_rate": 5.566378078544222e-07, + "loss": 0.2961, + "step": 42606 + }, + { + "epoch": 0.8529289592873408, + "grad_norm": 1.1160439252853394, + "learning_rate": 5.564891654831994e-07, + "loss": 0.2854, + "step": 42607 + }, + { + "epoch": 0.8529489778044691, + "grad_norm": 1.0587180852890015, + "learning_rate": 5.56340541791539e-07, + "loss": 0.3073, + "step": 42608 + }, + { + "epoch": 0.8529689963215975, + "grad_norm": 1.106156349182129, + "learning_rate": 5.561919367800677e-07, + "loss": 0.3053, + "step": 42609 + }, + { + "epoch": 0.8529890148387258, + "grad_norm": 2.089900493621826, + "learning_rate": 5.560433504494095e-07, + "loss": 0.6603, + "step": 42610 + }, + { + "epoch": 0.8530090333558542, + "grad_norm": 1.151230812072754, + "learning_rate": 5.558947828001882e-07, + "loss": 0.2927, + "step": 42611 + }, + { + "epoch": 0.8530290518729825, + "grad_norm": 1.0216697454452515, + "learning_rate": 5.557462338330282e-07, + "loss": 0.2646, + "step": 42612 + }, + { + "epoch": 0.8530490703901108, + "grad_norm": 1.2146283388137817, + "learning_rate": 5.555977035485554e-07, + "loss": 0.2703, + "step": 42613 + }, + { + "epoch": 0.8530690889072392, + "grad_norm": 1.1592143774032593, + "learning_rate": 5.554491919473937e-07, + "loss": 0.2187, + "step": 42614 + }, + { + "epoch": 0.8530891074243675, + "grad_norm": 1.1892812252044678, + "learning_rate": 5.553006990301669e-07, + "loss": 0.3098, + "step": 42615 + }, + { + "epoch": 0.8531091259414959, + "grad_norm": 1.126829743385315, + "learning_rate": 5.551522247974983e-07, + "loss": 0.3235, + "step": 42616 + }, + { + "epoch": 0.8531291444586242, + "grad_norm": 1.1111019849777222, + "learning_rate": 5.550037692500143e-07, + "loss": 0.3431, + "step": 42617 + }, + { + "epoch": 0.8531491629757526, + "grad_norm": 1.225833535194397, + "learning_rate": 5.548553323883382e-07, + "loss": 0.3563, + "step": 42618 + }, + { + "epoch": 0.8531691814928809, + "grad_norm": 1.2848167419433594, + "learning_rate": 5.547069142130923e-07, + "loss": 0.3461, + "step": 42619 + }, + { + "epoch": 0.8531892000100093, + "grad_norm": 1.0577601194381714, + "learning_rate": 5.545585147249028e-07, + "loss": 0.2934, + "step": 42620 + }, + { + "epoch": 0.8532092185271376, + "grad_norm": 1.108527660369873, + "learning_rate": 5.544101339243918e-07, + "loss": 0.2562, + "step": 42621 + }, + { + "epoch": 0.8532292370442659, + "grad_norm": 1.08705735206604, + "learning_rate": 5.542617718121851e-07, + "loss": 0.2818, + "step": 42622 + }, + { + "epoch": 0.8532492555613943, + "grad_norm": 1.2631016969680786, + "learning_rate": 5.541134283889049e-07, + "loss": 0.3378, + "step": 42623 + }, + { + "epoch": 0.8532692740785226, + "grad_norm": 1.341374397277832, + "learning_rate": 5.539651036551752e-07, + "loss": 0.294, + "step": 42624 + }, + { + "epoch": 0.853289292595651, + "grad_norm": 1.2309612035751343, + "learning_rate": 5.538167976116188e-07, + "loss": 0.2876, + "step": 42625 + }, + { + "epoch": 0.8533093111127793, + "grad_norm": 1.1459580659866333, + "learning_rate": 5.536685102588602e-07, + "loss": 0.2588, + "step": 42626 + }, + { + "epoch": 0.8533293296299077, + "grad_norm": 1.0746363401412964, + "learning_rate": 5.535202415975227e-07, + "loss": 0.2927, + "step": 42627 + }, + { + "epoch": 0.853349348147036, + "grad_norm": 1.0383354425430298, + "learning_rate": 5.533719916282288e-07, + "loss": 0.2802, + "step": 42628 + }, + { + "epoch": 0.8533693666641643, + "grad_norm": 1.2191052436828613, + "learning_rate": 5.532237603516011e-07, + "loss": 0.3096, + "step": 42629 + }, + { + "epoch": 0.8533893851812927, + "grad_norm": 1.1408811807632446, + "learning_rate": 5.530755477682653e-07, + "loss": 0.3315, + "step": 42630 + }, + { + "epoch": 0.853409403698421, + "grad_norm": 1.2984685897827148, + "learning_rate": 5.529273538788427e-07, + "loss": 0.2614, + "step": 42631 + }, + { + "epoch": 0.8534294222155494, + "grad_norm": 1.103971242904663, + "learning_rate": 5.527791786839554e-07, + "loss": 0.3796, + "step": 42632 + }, + { + "epoch": 0.8534494407326777, + "grad_norm": 1.0277208089828491, + "learning_rate": 5.52631022184228e-07, + "loss": 0.2445, + "step": 42633 + }, + { + "epoch": 0.8534694592498061, + "grad_norm": 1.082661509513855, + "learning_rate": 5.524828843802821e-07, + "loss": 0.2917, + "step": 42634 + }, + { + "epoch": 0.8534894777669344, + "grad_norm": 1.1392707824707031, + "learning_rate": 5.523347652727417e-07, + "loss": 0.2912, + "step": 42635 + }, + { + "epoch": 0.8535094962840628, + "grad_norm": 1.0819196701049805, + "learning_rate": 5.521866648622292e-07, + "loss": 0.3185, + "step": 42636 + }, + { + "epoch": 0.8535295148011911, + "grad_norm": 1.188233733177185, + "learning_rate": 5.520385831493669e-07, + "loss": 0.3586, + "step": 42637 + }, + { + "epoch": 0.8535495333183194, + "grad_norm": 1.4528570175170898, + "learning_rate": 5.518905201347757e-07, + "loss": 0.2903, + "step": 42638 + }, + { + "epoch": 0.8535695518354478, + "grad_norm": 1.0531708002090454, + "learning_rate": 5.517424758190809e-07, + "loss": 0.2604, + "step": 42639 + }, + { + "epoch": 0.8535895703525761, + "grad_norm": 1.2191810607910156, + "learning_rate": 5.515944502029036e-07, + "loss": 0.3115, + "step": 42640 + }, + { + "epoch": 0.8536095888697045, + "grad_norm": 1.8916324377059937, + "learning_rate": 5.514464432868654e-07, + "loss": 0.7718, + "step": 42641 + }, + { + "epoch": 0.8536296073868328, + "grad_norm": 1.1216083765029907, + "learning_rate": 5.512984550715883e-07, + "loss": 0.2891, + "step": 42642 + }, + { + "epoch": 0.8536496259039612, + "grad_norm": 1.9387903213500977, + "learning_rate": 5.51150485557696e-07, + "loss": 0.8212, + "step": 42643 + }, + { + "epoch": 0.8536696444210895, + "grad_norm": 1.231136441230774, + "learning_rate": 5.510025347458097e-07, + "loss": 0.2846, + "step": 42644 + }, + { + "epoch": 0.8536896629382178, + "grad_norm": 1.0392625331878662, + "learning_rate": 5.508546026365502e-07, + "loss": 0.2887, + "step": 42645 + }, + { + "epoch": 0.8537096814553462, + "grad_norm": 1.093319058418274, + "learning_rate": 5.507066892305418e-07, + "loss": 0.3225, + "step": 42646 + }, + { + "epoch": 0.8537296999724745, + "grad_norm": 1.1966384649276733, + "learning_rate": 5.505587945284035e-07, + "loss": 0.311, + "step": 42647 + }, + { + "epoch": 0.8537497184896029, + "grad_norm": 1.0970821380615234, + "learning_rate": 5.504109185307599e-07, + "loss": 0.2805, + "step": 42648 + }, + { + "epoch": 0.8537697370067312, + "grad_norm": 1.1520030498504639, + "learning_rate": 5.502630612382315e-07, + "loss": 0.3056, + "step": 42649 + }, + { + "epoch": 0.8537897555238596, + "grad_norm": 1.2365621328353882, + "learning_rate": 5.501152226514389e-07, + "loss": 0.2868, + "step": 42650 + }, + { + "epoch": 0.8538097740409879, + "grad_norm": 1.1442688703536987, + "learning_rate": 5.49967402771004e-07, + "loss": 0.3236, + "step": 42651 + }, + { + "epoch": 0.8538297925581163, + "grad_norm": 0.9935909509658813, + "learning_rate": 5.498196015975488e-07, + "loss": 0.2543, + "step": 42652 + }, + { + "epoch": 0.8538498110752446, + "grad_norm": 1.0424449443817139, + "learning_rate": 5.496718191316946e-07, + "loss": 0.2586, + "step": 42653 + }, + { + "epoch": 0.8538698295923729, + "grad_norm": 1.1283893585205078, + "learning_rate": 5.495240553740628e-07, + "loss": 0.2422, + "step": 42654 + }, + { + "epoch": 0.8538898481095013, + "grad_norm": 1.0639638900756836, + "learning_rate": 5.493763103252736e-07, + "loss": 0.3023, + "step": 42655 + }, + { + "epoch": 0.8539098666266296, + "grad_norm": 1.0711488723754883, + "learning_rate": 5.492285839859474e-07, + "loss": 0.3097, + "step": 42656 + }, + { + "epoch": 0.853929885143758, + "grad_norm": 1.1270209550857544, + "learning_rate": 5.49080876356708e-07, + "loss": 0.2689, + "step": 42657 + }, + { + "epoch": 0.8539499036608863, + "grad_norm": 1.0320522785186768, + "learning_rate": 5.489331874381732e-07, + "loss": 0.2909, + "step": 42658 + }, + { + "epoch": 0.8539699221780147, + "grad_norm": 1.1430511474609375, + "learning_rate": 5.487855172309664e-07, + "loss": 0.251, + "step": 42659 + }, + { + "epoch": 0.853989940695143, + "grad_norm": 1.0278174877166748, + "learning_rate": 5.486378657357066e-07, + "loss": 0.2788, + "step": 42660 + }, + { + "epoch": 0.8540099592122713, + "grad_norm": 1.071193814277649, + "learning_rate": 5.484902329530167e-07, + "loss": 0.253, + "step": 42661 + }, + { + "epoch": 0.8540299777293997, + "grad_norm": 1.1457569599151611, + "learning_rate": 5.48342618883515e-07, + "loss": 0.3087, + "step": 42662 + }, + { + "epoch": 0.854049996246528, + "grad_norm": 1.9092363119125366, + "learning_rate": 5.481950235278239e-07, + "loss": 0.7599, + "step": 42663 + }, + { + "epoch": 0.8540700147636564, + "grad_norm": 1.0957422256469727, + "learning_rate": 5.480474468865621e-07, + "loss": 0.2897, + "step": 42664 + }, + { + "epoch": 0.8540900332807847, + "grad_norm": 1.1576505899429321, + "learning_rate": 5.478998889603499e-07, + "loss": 0.3264, + "step": 42665 + }, + { + "epoch": 0.8541100517979131, + "grad_norm": 0.9680042266845703, + "learning_rate": 5.4775234974981e-07, + "loss": 0.2437, + "step": 42666 + }, + { + "epoch": 0.8541300703150414, + "grad_norm": 1.1383090019226074, + "learning_rate": 5.476048292555602e-07, + "loss": 0.3014, + "step": 42667 + }, + { + "epoch": 0.8541500888321698, + "grad_norm": 1.1253265142440796, + "learning_rate": 5.474573274782225e-07, + "loss": 0.2872, + "step": 42668 + }, + { + "epoch": 0.8541701073492981, + "grad_norm": 1.4085432291030884, + "learning_rate": 5.473098444184144e-07, + "loss": 0.3042, + "step": 42669 + }, + { + "epoch": 0.8541901258664264, + "grad_norm": 1.1673535108566284, + "learning_rate": 5.471623800767584e-07, + "loss": 0.2982, + "step": 42670 + }, + { + "epoch": 0.8542101443835548, + "grad_norm": 1.1391053199768066, + "learning_rate": 5.470149344538727e-07, + "loss": 0.3429, + "step": 42671 + }, + { + "epoch": 0.8542301629006831, + "grad_norm": 1.1334381103515625, + "learning_rate": 5.468675075503788e-07, + "loss": 0.3362, + "step": 42672 + }, + { + "epoch": 0.8542501814178115, + "grad_norm": 1.9647176265716553, + "learning_rate": 5.467200993668947e-07, + "loss": 0.7952, + "step": 42673 + }, + { + "epoch": 0.8542701999349398, + "grad_norm": 1.1625335216522217, + "learning_rate": 5.465727099040418e-07, + "loss": 0.2656, + "step": 42674 + }, + { + "epoch": 0.8542902184520682, + "grad_norm": 1.1741353273391724, + "learning_rate": 5.464253391624391e-07, + "loss": 0.3027, + "step": 42675 + }, + { + "epoch": 0.8543102369691965, + "grad_norm": 1.7727396488189697, + "learning_rate": 5.462779871427059e-07, + "loss": 0.7942, + "step": 42676 + }, + { + "epoch": 0.8543302554863248, + "grad_norm": 1.1266618967056274, + "learning_rate": 5.461306538454614e-07, + "loss": 0.2962, + "step": 42677 + }, + { + "epoch": 0.8543502740034532, + "grad_norm": 1.0608346462249756, + "learning_rate": 5.459833392713243e-07, + "loss": 0.2867, + "step": 42678 + }, + { + "epoch": 0.8543702925205815, + "grad_norm": 1.1341229677200317, + "learning_rate": 5.458360434209154e-07, + "loss": 0.2616, + "step": 42679 + }, + { + "epoch": 0.8543903110377099, + "grad_norm": 1.1620315313339233, + "learning_rate": 5.456887662948535e-07, + "loss": 0.3108, + "step": 42680 + }, + { + "epoch": 0.8544103295548382, + "grad_norm": 1.0403317213058472, + "learning_rate": 5.455415078937576e-07, + "loss": 0.3031, + "step": 42681 + }, + { + "epoch": 0.8544303480719666, + "grad_norm": 1.0613597631454468, + "learning_rate": 5.45394268218245e-07, + "loss": 0.2513, + "step": 42682 + }, + { + "epoch": 0.8544503665890949, + "grad_norm": 1.0567240715026855, + "learning_rate": 5.452470472689375e-07, + "loss": 0.304, + "step": 42683 + }, + { + "epoch": 0.8544703851062233, + "grad_norm": 1.1696385145187378, + "learning_rate": 5.450998450464523e-07, + "loss": 0.3062, + "step": 42684 + }, + { + "epoch": 0.8544904036233516, + "grad_norm": 2.135319948196411, + "learning_rate": 5.449526615514089e-07, + "loss": 0.7485, + "step": 42685 + }, + { + "epoch": 0.8545104221404799, + "grad_norm": 2.044144868850708, + "learning_rate": 5.448054967844263e-07, + "loss": 0.744, + "step": 42686 + }, + { + "epoch": 0.8545304406576083, + "grad_norm": 1.177493929862976, + "learning_rate": 5.446583507461212e-07, + "loss": 0.3181, + "step": 42687 + }, + { + "epoch": 0.8545504591747366, + "grad_norm": 1.0044629573822021, + "learning_rate": 5.445112234371147e-07, + "loss": 0.3002, + "step": 42688 + }, + { + "epoch": 0.854570477691865, + "grad_norm": 1.1233818531036377, + "learning_rate": 5.443641148580242e-07, + "loss": 0.2522, + "step": 42689 + }, + { + "epoch": 0.8545904962089933, + "grad_norm": 1.0001280307769775, + "learning_rate": 5.442170250094686e-07, + "loss": 0.2711, + "step": 42690 + }, + { + "epoch": 0.8546105147261217, + "grad_norm": 1.1695661544799805, + "learning_rate": 5.44069953892064e-07, + "loss": 0.3338, + "step": 42691 + }, + { + "epoch": 0.85463053324325, + "grad_norm": 0.9941595792770386, + "learning_rate": 5.439229015064317e-07, + "loss": 0.2726, + "step": 42692 + }, + { + "epoch": 0.8546505517603783, + "grad_norm": 1.1145646572113037, + "learning_rate": 5.437758678531884e-07, + "loss": 0.2389, + "step": 42693 + }, + { + "epoch": 0.8546705702775067, + "grad_norm": 2.215088367462158, + "learning_rate": 5.436288529329525e-07, + "loss": 0.7427, + "step": 42694 + }, + { + "epoch": 0.854690588794635, + "grad_norm": 1.1290524005889893, + "learning_rate": 5.434818567463423e-07, + "loss": 0.2898, + "step": 42695 + }, + { + "epoch": 0.8547106073117634, + "grad_norm": 1.113609790802002, + "learning_rate": 5.433348792939735e-07, + "loss": 0.288, + "step": 42696 + }, + { + "epoch": 0.8547306258288917, + "grad_norm": 1.1743227243423462, + "learning_rate": 5.431879205764662e-07, + "loss": 0.3191, + "step": 42697 + }, + { + "epoch": 0.8547506443460201, + "grad_norm": 1.3575222492218018, + "learning_rate": 5.430409805944392e-07, + "loss": 0.326, + "step": 42698 + }, + { + "epoch": 0.8547706628631484, + "grad_norm": 1.9426147937774658, + "learning_rate": 5.428940593485083e-07, + "loss": 0.7523, + "step": 42699 + }, + { + "epoch": 0.8547906813802768, + "grad_norm": 1.1270620822906494, + "learning_rate": 5.427471568392906e-07, + "loss": 0.3357, + "step": 42700 + }, + { + "epoch": 0.8548106998974051, + "grad_norm": 1.041898250579834, + "learning_rate": 5.426002730674057e-07, + "loss": 0.2667, + "step": 42701 + }, + { + "epoch": 0.8548307184145334, + "grad_norm": 1.1299562454223633, + "learning_rate": 5.424534080334698e-07, + "loss": 0.3003, + "step": 42702 + }, + { + "epoch": 0.8548507369316618, + "grad_norm": 1.2137482166290283, + "learning_rate": 5.423065617381007e-07, + "loss": 0.2768, + "step": 42703 + }, + { + "epoch": 0.8548707554487901, + "grad_norm": 1.1208113431930542, + "learning_rate": 5.421597341819146e-07, + "loss": 0.2966, + "step": 42704 + }, + { + "epoch": 0.8548907739659185, + "grad_norm": 1.11642324924469, + "learning_rate": 5.420129253655304e-07, + "loss": 0.2925, + "step": 42705 + }, + { + "epoch": 0.8549107924830468, + "grad_norm": 1.1068217754364014, + "learning_rate": 5.418661352895649e-07, + "loss": 0.3043, + "step": 42706 + }, + { + "epoch": 0.8549308110001752, + "grad_norm": 1.1519644260406494, + "learning_rate": 5.417193639546342e-07, + "loss": 0.2932, + "step": 42707 + }, + { + "epoch": 0.8549508295173035, + "grad_norm": 1.9288456439971924, + "learning_rate": 5.415726113613562e-07, + "loss": 0.7382, + "step": 42708 + }, + { + "epoch": 0.8549708480344318, + "grad_norm": 1.1709858179092407, + "learning_rate": 5.414258775103464e-07, + "loss": 0.3066, + "step": 42709 + }, + { + "epoch": 0.8549908665515602, + "grad_norm": 1.1529844999313354, + "learning_rate": 5.412791624022229e-07, + "loss": 0.3285, + "step": 42710 + }, + { + "epoch": 0.8550108850686885, + "grad_norm": 1.3464305400848389, + "learning_rate": 5.411324660376027e-07, + "loss": 0.303, + "step": 42711 + }, + { + "epoch": 0.8550309035858169, + "grad_norm": 1.039739966392517, + "learning_rate": 5.409857884171027e-07, + "loss": 0.2738, + "step": 42712 + }, + { + "epoch": 0.8550509221029452, + "grad_norm": 1.1361812353134155, + "learning_rate": 5.408391295413378e-07, + "loss": 0.2784, + "step": 42713 + }, + { + "epoch": 0.8550709406200736, + "grad_norm": 1.8021680116653442, + "learning_rate": 5.406924894109261e-07, + "loss": 0.7936, + "step": 42714 + }, + { + "epoch": 0.8550909591372019, + "grad_norm": 1.3482986688613892, + "learning_rate": 5.405458680264841e-07, + "loss": 0.3079, + "step": 42715 + }, + { + "epoch": 0.8551109776543303, + "grad_norm": 1.0511088371276855, + "learning_rate": 5.40399265388627e-07, + "loss": 0.3102, + "step": 42716 + }, + { + "epoch": 0.8551309961714586, + "grad_norm": 0.9969722032546997, + "learning_rate": 5.40252681497972e-07, + "loss": 0.2752, + "step": 42717 + }, + { + "epoch": 0.8551510146885869, + "grad_norm": 1.5909324884414673, + "learning_rate": 5.401061163551341e-07, + "loss": 0.3022, + "step": 42718 + }, + { + "epoch": 0.8551710332057153, + "grad_norm": 1.0909336805343628, + "learning_rate": 5.399595699607313e-07, + "loss": 0.2678, + "step": 42719 + }, + { + "epoch": 0.8551910517228436, + "grad_norm": 1.1340044736862183, + "learning_rate": 5.398130423153785e-07, + "loss": 0.3378, + "step": 42720 + }, + { + "epoch": 0.855211070239972, + "grad_norm": 1.1075615882873535, + "learning_rate": 5.396665334196915e-07, + "loss": 0.2785, + "step": 42721 + }, + { + "epoch": 0.8552310887571003, + "grad_norm": 1.0882567167282104, + "learning_rate": 5.395200432742858e-07, + "loss": 0.3375, + "step": 42722 + }, + { + "epoch": 0.8552511072742287, + "grad_norm": 2.067868232727051, + "learning_rate": 5.393735718797782e-07, + "loss": 0.7747, + "step": 42723 + }, + { + "epoch": 0.855271125791357, + "grad_norm": 1.0198427438735962, + "learning_rate": 5.392271192367849e-07, + "loss": 0.3041, + "step": 42724 + }, + { + "epoch": 0.8552911443084853, + "grad_norm": 1.0849698781967163, + "learning_rate": 5.390806853459207e-07, + "loss": 0.2649, + "step": 42725 + }, + { + "epoch": 0.8553111628256137, + "grad_norm": 1.1262116432189941, + "learning_rate": 5.389342702078016e-07, + "loss": 0.2918, + "step": 42726 + }, + { + "epoch": 0.855331181342742, + "grad_norm": 0.9755104780197144, + "learning_rate": 5.387878738230418e-07, + "loss": 0.2698, + "step": 42727 + }, + { + "epoch": 0.8553511998598704, + "grad_norm": 1.3459392786026, + "learning_rate": 5.386414961922587e-07, + "loss": 0.278, + "step": 42728 + }, + { + "epoch": 0.8553712183769987, + "grad_norm": 1.1608706712722778, + "learning_rate": 5.384951373160663e-07, + "loss": 0.2807, + "step": 42729 + }, + { + "epoch": 0.8553912368941271, + "grad_norm": 1.863012671470642, + "learning_rate": 5.383487971950807e-07, + "loss": 0.796, + "step": 42730 + }, + { + "epoch": 0.8554112554112554, + "grad_norm": 1.8143237829208374, + "learning_rate": 5.382024758299153e-07, + "loss": 0.6991, + "step": 42731 + }, + { + "epoch": 0.8554312739283838, + "grad_norm": 1.2201849222183228, + "learning_rate": 5.380561732211875e-07, + "loss": 0.2848, + "step": 42732 + }, + { + "epoch": 0.8554512924455121, + "grad_norm": 1.1591479778289795, + "learning_rate": 5.379098893695117e-07, + "loss": 0.3082, + "step": 42733 + }, + { + "epoch": 0.8554713109626404, + "grad_norm": 1.339948296546936, + "learning_rate": 5.377636242755018e-07, + "loss": 0.3053, + "step": 42734 + }, + { + "epoch": 0.8554913294797688, + "grad_norm": 1.0273208618164062, + "learning_rate": 5.376173779397731e-07, + "loss": 0.2862, + "step": 42735 + }, + { + "epoch": 0.8555113479968971, + "grad_norm": 1.1880640983581543, + "learning_rate": 5.374711503629399e-07, + "loss": 0.2541, + "step": 42736 + }, + { + "epoch": 0.8555313665140255, + "grad_norm": 1.081028938293457, + "learning_rate": 5.373249415456189e-07, + "loss": 0.294, + "step": 42737 + }, + { + "epoch": 0.8555513850311538, + "grad_norm": 1.0621509552001953, + "learning_rate": 5.371787514884236e-07, + "loss": 0.2738, + "step": 42738 + }, + { + "epoch": 0.8555714035482822, + "grad_norm": 1.196510672569275, + "learning_rate": 5.370325801919685e-07, + "loss": 0.2691, + "step": 42739 + }, + { + "epoch": 0.8555914220654105, + "grad_norm": 1.1821916103363037, + "learning_rate": 5.36886427656867e-07, + "loss": 0.2982, + "step": 42740 + }, + { + "epoch": 0.8556114405825388, + "grad_norm": 1.1597223281860352, + "learning_rate": 5.367402938837351e-07, + "loss": 0.3208, + "step": 42741 + }, + { + "epoch": 0.8556314590996672, + "grad_norm": 1.145685076713562, + "learning_rate": 5.365941788731865e-07, + "loss": 0.3162, + "step": 42742 + }, + { + "epoch": 0.8556514776167955, + "grad_norm": 1.1727508306503296, + "learning_rate": 5.364480826258361e-07, + "loss": 0.3347, + "step": 42743 + }, + { + "epoch": 0.8556714961339239, + "grad_norm": 1.1809037923812866, + "learning_rate": 5.363020051422957e-07, + "loss": 0.3056, + "step": 42744 + }, + { + "epoch": 0.8556915146510522, + "grad_norm": 1.0441133975982666, + "learning_rate": 5.361559464231824e-07, + "loss": 0.2519, + "step": 42745 + }, + { + "epoch": 0.8557115331681806, + "grad_norm": 1.012093186378479, + "learning_rate": 5.360099064691087e-07, + "loss": 0.2634, + "step": 42746 + }, + { + "epoch": 0.8557315516853089, + "grad_norm": 1.1918517351150513, + "learning_rate": 5.358638852806891e-07, + "loss": 0.3185, + "step": 42747 + }, + { + "epoch": 0.8557515702024373, + "grad_norm": 1.7776881456375122, + "learning_rate": 5.357178828585357e-07, + "loss": 0.7498, + "step": 42748 + }, + { + "epoch": 0.8557715887195656, + "grad_norm": 1.2496142387390137, + "learning_rate": 5.355718992032638e-07, + "loss": 0.3711, + "step": 42749 + }, + { + "epoch": 0.8557916072366939, + "grad_norm": 1.058579921722412, + "learning_rate": 5.354259343154872e-07, + "loss": 0.2707, + "step": 42750 + }, + { + "epoch": 0.8558116257538223, + "grad_norm": 1.1199376583099365, + "learning_rate": 5.352799881958199e-07, + "loss": 0.3009, + "step": 42751 + }, + { + "epoch": 0.8558316442709506, + "grad_norm": 1.0502396821975708, + "learning_rate": 5.351340608448747e-07, + "loss": 0.2935, + "step": 42752 + }, + { + "epoch": 0.855851662788079, + "grad_norm": 1.1372785568237305, + "learning_rate": 5.349881522632638e-07, + "loss": 0.3381, + "step": 42753 + }, + { + "epoch": 0.8558716813052073, + "grad_norm": 1.1627273559570312, + "learning_rate": 5.348422624516025e-07, + "loss": 0.3246, + "step": 42754 + }, + { + "epoch": 0.8558916998223357, + "grad_norm": 1.0706892013549805, + "learning_rate": 5.346963914105041e-07, + "loss": 0.3215, + "step": 42755 + }, + { + "epoch": 0.855911718339464, + "grad_norm": 1.8074805736541748, + "learning_rate": 5.345505391405809e-07, + "loss": 0.7085, + "step": 42756 + }, + { + "epoch": 0.8559317368565923, + "grad_norm": 1.0274724960327148, + "learning_rate": 5.344047056424462e-07, + "loss": 0.3056, + "step": 42757 + }, + { + "epoch": 0.8559517553737207, + "grad_norm": 1.2076414823532104, + "learning_rate": 5.342588909167117e-07, + "loss": 0.3038, + "step": 42758 + }, + { + "epoch": 0.855971773890849, + "grad_norm": 1.0564018487930298, + "learning_rate": 5.341130949639933e-07, + "loss": 0.2701, + "step": 42759 + }, + { + "epoch": 0.8559917924079774, + "grad_norm": 1.0672682523727417, + "learning_rate": 5.339673177849026e-07, + "loss": 0.2504, + "step": 42760 + }, + { + "epoch": 0.8560118109251057, + "grad_norm": 1.0585987567901611, + "learning_rate": 5.338215593800505e-07, + "loss": 0.3165, + "step": 42761 + }, + { + "epoch": 0.8560318294422341, + "grad_norm": 1.2730075120925903, + "learning_rate": 5.336758197500519e-07, + "loss": 0.312, + "step": 42762 + }, + { + "epoch": 0.8560518479593624, + "grad_norm": 1.1106566190719604, + "learning_rate": 5.335300988955199e-07, + "loss": 0.3048, + "step": 42763 + }, + { + "epoch": 0.8560718664764908, + "grad_norm": 1.1582918167114258, + "learning_rate": 5.33384396817066e-07, + "loss": 0.2985, + "step": 42764 + }, + { + "epoch": 0.8560918849936191, + "grad_norm": 1.087546706199646, + "learning_rate": 5.332387135153034e-07, + "loss": 0.2633, + "step": 42765 + }, + { + "epoch": 0.8561119035107474, + "grad_norm": 1.2117913961410522, + "learning_rate": 5.330930489908425e-07, + "loss": 0.2616, + "step": 42766 + }, + { + "epoch": 0.8561319220278758, + "grad_norm": 1.128677487373352, + "learning_rate": 5.329474032442983e-07, + "loss": 0.3105, + "step": 42767 + }, + { + "epoch": 0.8561519405450041, + "grad_norm": 1.1601357460021973, + "learning_rate": 5.328017762762816e-07, + "loss": 0.3546, + "step": 42768 + }, + { + "epoch": 0.8561719590621325, + "grad_norm": 1.0845086574554443, + "learning_rate": 5.326561680874054e-07, + "loss": 0.3058, + "step": 42769 + }, + { + "epoch": 0.8561919775792608, + "grad_norm": 0.9999661445617676, + "learning_rate": 5.325105786782808e-07, + "loss": 0.2112, + "step": 42770 + }, + { + "epoch": 0.8562119960963892, + "grad_norm": 1.741969108581543, + "learning_rate": 5.323650080495197e-07, + "loss": 0.7239, + "step": 42771 + }, + { + "epoch": 0.8562320146135175, + "grad_norm": 2.107140064239502, + "learning_rate": 5.322194562017352e-07, + "loss": 0.7583, + "step": 42772 + }, + { + "epoch": 0.8562520331306458, + "grad_norm": 1.0920844078063965, + "learning_rate": 5.320739231355393e-07, + "loss": 0.2889, + "step": 42773 + }, + { + "epoch": 0.8562720516477742, + "grad_norm": 1.0627329349517822, + "learning_rate": 5.319284088515414e-07, + "loss": 0.2725, + "step": 42774 + }, + { + "epoch": 0.8562920701649025, + "grad_norm": 1.186665654182434, + "learning_rate": 5.317829133503555e-07, + "loss": 0.3068, + "step": 42775 + }, + { + "epoch": 0.8563120886820309, + "grad_norm": 1.0519614219665527, + "learning_rate": 5.316374366325933e-07, + "loss": 0.2691, + "step": 42776 + }, + { + "epoch": 0.8563321071991592, + "grad_norm": 1.1342490911483765, + "learning_rate": 5.31491978698866e-07, + "loss": 0.2851, + "step": 42777 + }, + { + "epoch": 0.8563521257162876, + "grad_norm": 1.0519130229949951, + "learning_rate": 5.313465395497847e-07, + "loss": 0.3143, + "step": 42778 + }, + { + "epoch": 0.8563721442334159, + "grad_norm": 1.0133730173110962, + "learning_rate": 5.312011191859606e-07, + "loss": 0.3027, + "step": 42779 + }, + { + "epoch": 0.8563921627505443, + "grad_norm": 1.1561709642410278, + "learning_rate": 5.310557176080045e-07, + "loss": 0.2874, + "step": 42780 + }, + { + "epoch": 0.8564121812676726, + "grad_norm": 1.0336641073226929, + "learning_rate": 5.309103348165296e-07, + "loss": 0.2886, + "step": 42781 + }, + { + "epoch": 0.8564321997848009, + "grad_norm": 1.10044264793396, + "learning_rate": 5.307649708121454e-07, + "loss": 0.3304, + "step": 42782 + }, + { + "epoch": 0.8564522183019293, + "grad_norm": 1.0410820245742798, + "learning_rate": 5.306196255954637e-07, + "loss": 0.2552, + "step": 42783 + }, + { + "epoch": 0.8564722368190576, + "grad_norm": 1.0990484952926636, + "learning_rate": 5.304742991670947e-07, + "loss": 0.2954, + "step": 42784 + }, + { + "epoch": 0.856492255336186, + "grad_norm": 2.02907657623291, + "learning_rate": 5.303289915276504e-07, + "loss": 0.7767, + "step": 42785 + }, + { + "epoch": 0.8565122738533143, + "grad_norm": 1.069189190864563, + "learning_rate": 5.301837026777412e-07, + "loss": 0.2492, + "step": 42786 + }, + { + "epoch": 0.8565322923704427, + "grad_norm": 1.8875110149383545, + "learning_rate": 5.300384326179769e-07, + "loss": 0.7086, + "step": 42787 + }, + { + "epoch": 0.856552310887571, + "grad_norm": 1.036864161491394, + "learning_rate": 5.298931813489694e-07, + "loss": 0.3209, + "step": 42788 + }, + { + "epoch": 0.8565723294046993, + "grad_norm": 1.0488524436950684, + "learning_rate": 5.297479488713297e-07, + "loss": 0.2531, + "step": 42789 + }, + { + "epoch": 0.8565923479218277, + "grad_norm": 1.152270793914795, + "learning_rate": 5.296027351856676e-07, + "loss": 0.2741, + "step": 42790 + }, + { + "epoch": 0.856612366438956, + "grad_norm": 1.008353590965271, + "learning_rate": 5.294575402925933e-07, + "loss": 0.2668, + "step": 42791 + }, + { + "epoch": 0.8566323849560844, + "grad_norm": 1.0610963106155396, + "learning_rate": 5.293123641927178e-07, + "loss": 0.3304, + "step": 42792 + }, + { + "epoch": 0.8566524034732127, + "grad_norm": 1.1684012413024902, + "learning_rate": 5.291672068866499e-07, + "loss": 0.3022, + "step": 42793 + }, + { + "epoch": 0.8566724219903411, + "grad_norm": 1.3116271495819092, + "learning_rate": 5.29022068375002e-07, + "loss": 0.2908, + "step": 42794 + }, + { + "epoch": 0.8566924405074694, + "grad_norm": 1.1570615768432617, + "learning_rate": 5.288769486583833e-07, + "loss": 0.3373, + "step": 42795 + }, + { + "epoch": 0.8567124590245978, + "grad_norm": 1.238624930381775, + "learning_rate": 5.287318477374032e-07, + "loss": 0.2545, + "step": 42796 + }, + { + "epoch": 0.8567324775417261, + "grad_norm": 1.173790693283081, + "learning_rate": 5.285867656126714e-07, + "loss": 0.2873, + "step": 42797 + }, + { + "epoch": 0.8567524960588544, + "grad_norm": 1.0348271131515503, + "learning_rate": 5.284417022847999e-07, + "loss": 0.2527, + "step": 42798 + }, + { + "epoch": 0.8567725145759828, + "grad_norm": 1.1711831092834473, + "learning_rate": 5.282966577543969e-07, + "loss": 0.3075, + "step": 42799 + }, + { + "epoch": 0.8567925330931111, + "grad_norm": 1.06321382522583, + "learning_rate": 5.281516320220714e-07, + "loss": 0.2913, + "step": 42800 + }, + { + "epoch": 0.8568125516102395, + "grad_norm": 1.917043924331665, + "learning_rate": 5.280066250884353e-07, + "loss": 0.7217, + "step": 42801 + }, + { + "epoch": 0.8568325701273678, + "grad_norm": 1.315806269645691, + "learning_rate": 5.278616369540956e-07, + "loss": 0.2729, + "step": 42802 + }, + { + "epoch": 0.8568525886444962, + "grad_norm": 1.0832712650299072, + "learning_rate": 5.277166676196644e-07, + "loss": 0.279, + "step": 42803 + }, + { + "epoch": 0.8568726071616245, + "grad_norm": 1.0242321491241455, + "learning_rate": 5.275717170857503e-07, + "loss": 0.298, + "step": 42804 + }, + { + "epoch": 0.8568926256787528, + "grad_norm": 1.2817224264144897, + "learning_rate": 5.274267853529614e-07, + "loss": 0.3082, + "step": 42805 + }, + { + "epoch": 0.8569126441958812, + "grad_norm": 1.0781389474868774, + "learning_rate": 5.272818724219075e-07, + "loss": 0.3, + "step": 42806 + }, + { + "epoch": 0.8569326627130095, + "grad_norm": 1.2463654279708862, + "learning_rate": 5.271369782931984e-07, + "loss": 0.3449, + "step": 42807 + }, + { + "epoch": 0.8569526812301379, + "grad_norm": 1.1561579704284668, + "learning_rate": 5.269921029674435e-07, + "loss": 0.2558, + "step": 42808 + }, + { + "epoch": 0.8569726997472662, + "grad_norm": 1.1760358810424805, + "learning_rate": 5.268472464452506e-07, + "loss": 0.2984, + "step": 42809 + }, + { + "epoch": 0.8569927182643946, + "grad_norm": 2.07034969329834, + "learning_rate": 5.267024087272299e-07, + "loss": 0.7623, + "step": 42810 + }, + { + "epoch": 0.8570127367815229, + "grad_norm": 1.3279274702072144, + "learning_rate": 5.265575898139886e-07, + "loss": 0.3545, + "step": 42811 + }, + { + "epoch": 0.8570327552986513, + "grad_norm": 1.1323840618133545, + "learning_rate": 5.264127897061366e-07, + "loss": 0.2307, + "step": 42812 + }, + { + "epoch": 0.8570527738157796, + "grad_norm": 1.0178022384643555, + "learning_rate": 5.262680084042826e-07, + "loss": 0.2852, + "step": 42813 + }, + { + "epoch": 0.8570727923329079, + "grad_norm": 1.104878544807434, + "learning_rate": 5.261232459090354e-07, + "loss": 0.2816, + "step": 42814 + }, + { + "epoch": 0.8570928108500363, + "grad_norm": 1.7909919023513794, + "learning_rate": 5.259785022210023e-07, + "loss": 0.7856, + "step": 42815 + }, + { + "epoch": 0.8571128293671646, + "grad_norm": 1.905276894569397, + "learning_rate": 5.25833777340794e-07, + "loss": 0.7678, + "step": 42816 + }, + { + "epoch": 0.857132847884293, + "grad_norm": 1.1274034976959229, + "learning_rate": 5.256890712690177e-07, + "loss": 0.3106, + "step": 42817 + }, + { + "epoch": 0.8571528664014213, + "grad_norm": 1.3134504556655884, + "learning_rate": 5.255443840062819e-07, + "loss": 0.2794, + "step": 42818 + }, + { + "epoch": 0.8571728849185497, + "grad_norm": 1.2060538530349731, + "learning_rate": 5.253997155531931e-07, + "loss": 0.3064, + "step": 42819 + }, + { + "epoch": 0.857192903435678, + "grad_norm": 1.1886250972747803, + "learning_rate": 5.252550659103622e-07, + "loss": 0.2751, + "step": 42820 + }, + { + "epoch": 0.8572129219528063, + "grad_norm": 1.942913293838501, + "learning_rate": 5.251104350783959e-07, + "loss": 0.7769, + "step": 42821 + }, + { + "epoch": 0.8572329404699347, + "grad_norm": 1.1816529035568237, + "learning_rate": 5.249658230579025e-07, + "loss": 0.3075, + "step": 42822 + }, + { + "epoch": 0.857252958987063, + "grad_norm": 1.9292031526565552, + "learning_rate": 5.248212298494892e-07, + "loss": 0.7349, + "step": 42823 + }, + { + "epoch": 0.8572729775041914, + "grad_norm": 1.035219669342041, + "learning_rate": 5.246766554537641e-07, + "loss": 0.2536, + "step": 42824 + }, + { + "epoch": 0.8572929960213197, + "grad_norm": 1.1423141956329346, + "learning_rate": 5.24532099871336e-07, + "loss": 0.2814, + "step": 42825 + }, + { + "epoch": 0.8573130145384481, + "grad_norm": 1.0271738767623901, + "learning_rate": 5.243875631028105e-07, + "loss": 0.2662, + "step": 42826 + }, + { + "epoch": 0.8573330330555764, + "grad_norm": 1.1041502952575684, + "learning_rate": 5.242430451487978e-07, + "loss": 0.3032, + "step": 42827 + }, + { + "epoch": 0.8573530515727048, + "grad_norm": 1.041914939880371, + "learning_rate": 5.24098546009903e-07, + "loss": 0.2943, + "step": 42828 + }, + { + "epoch": 0.8573730700898331, + "grad_norm": 1.3343498706817627, + "learning_rate": 5.239540656867359e-07, + "loss": 0.3118, + "step": 42829 + }, + { + "epoch": 0.8573930886069614, + "grad_norm": 1.158718228340149, + "learning_rate": 5.238096041799029e-07, + "loss": 0.3113, + "step": 42830 + }, + { + "epoch": 0.8574131071240898, + "grad_norm": 1.3173940181732178, + "learning_rate": 5.236651614900107e-07, + "loss": 0.2587, + "step": 42831 + }, + { + "epoch": 0.8574331256412181, + "grad_norm": 0.991831362247467, + "learning_rate": 5.235207376176666e-07, + "loss": 0.2606, + "step": 42832 + }, + { + "epoch": 0.8574531441583465, + "grad_norm": 1.369191288948059, + "learning_rate": 5.233763325634772e-07, + "loss": 0.2893, + "step": 42833 + }, + { + "epoch": 0.8574731626754748, + "grad_norm": 1.1183162927627563, + "learning_rate": 5.232319463280511e-07, + "loss": 0.2909, + "step": 42834 + }, + { + "epoch": 0.8574931811926032, + "grad_norm": 1.219927430152893, + "learning_rate": 5.230875789119949e-07, + "loss": 0.2916, + "step": 42835 + }, + { + "epoch": 0.8575131997097315, + "grad_norm": 1.1246466636657715, + "learning_rate": 5.229432303159148e-07, + "loss": 0.3317, + "step": 42836 + }, + { + "epoch": 0.8575332182268598, + "grad_norm": 2.0270934104919434, + "learning_rate": 5.227989005404171e-07, + "loss": 0.7735, + "step": 42837 + }, + { + "epoch": 0.8575532367439882, + "grad_norm": 1.9964345693588257, + "learning_rate": 5.226545895861102e-07, + "loss": 0.7375, + "step": 42838 + }, + { + "epoch": 0.8575732552611165, + "grad_norm": 1.1414371728897095, + "learning_rate": 5.225102974535989e-07, + "loss": 0.2583, + "step": 42839 + }, + { + "epoch": 0.8575932737782449, + "grad_norm": 1.0836513042449951, + "learning_rate": 5.223660241434919e-07, + "loss": 0.3252, + "step": 42840 + }, + { + "epoch": 0.8576132922953732, + "grad_norm": 1.1806724071502686, + "learning_rate": 5.222217696563941e-07, + "loss": 0.3108, + "step": 42841 + }, + { + "epoch": 0.8576333108125016, + "grad_norm": 1.0513322353363037, + "learning_rate": 5.220775339929118e-07, + "loss": 0.2836, + "step": 42842 + }, + { + "epoch": 0.8576533293296299, + "grad_norm": 1.1292228698730469, + "learning_rate": 5.219333171536528e-07, + "loss": 0.2806, + "step": 42843 + }, + { + "epoch": 0.8576733478467583, + "grad_norm": 1.0012239217758179, + "learning_rate": 5.217891191392222e-07, + "loss": 0.2731, + "step": 42844 + }, + { + "epoch": 0.8576933663638866, + "grad_norm": 1.0559991598129272, + "learning_rate": 5.216449399502266e-07, + "loss": 0.2714, + "step": 42845 + }, + { + "epoch": 0.8577133848810149, + "grad_norm": 1.2088488340377808, + "learning_rate": 5.215007795872707e-07, + "loss": 0.3456, + "step": 42846 + }, + { + "epoch": 0.8577334033981433, + "grad_norm": 1.046721339225769, + "learning_rate": 5.213566380509627e-07, + "loss": 0.2708, + "step": 42847 + }, + { + "epoch": 0.8577534219152716, + "grad_norm": 1.079679012298584, + "learning_rate": 5.212125153419079e-07, + "loss": 0.2909, + "step": 42848 + }, + { + "epoch": 0.8577734404324, + "grad_norm": 1.0711052417755127, + "learning_rate": 5.210684114607118e-07, + "loss": 0.3052, + "step": 42849 + }, + { + "epoch": 0.8577934589495283, + "grad_norm": 1.9101415872573853, + "learning_rate": 5.209243264079795e-07, + "loss": 0.8101, + "step": 42850 + }, + { + "epoch": 0.8578134774666567, + "grad_norm": 1.011770486831665, + "learning_rate": 5.207802601843176e-07, + "loss": 0.2987, + "step": 42851 + }, + { + "epoch": 0.857833495983785, + "grad_norm": 1.03118097782135, + "learning_rate": 5.206362127903314e-07, + "loss": 0.3339, + "step": 42852 + }, + { + "epoch": 0.8578535145009133, + "grad_norm": 1.2083768844604492, + "learning_rate": 5.204921842266269e-07, + "loss": 0.3144, + "step": 42853 + }, + { + "epoch": 0.8578735330180417, + "grad_norm": 1.2020814418792725, + "learning_rate": 5.203481744938099e-07, + "loss": 0.2544, + "step": 42854 + }, + { + "epoch": 0.85789355153517, + "grad_norm": 1.243911862373352, + "learning_rate": 5.202041835924837e-07, + "loss": 0.3015, + "step": 42855 + }, + { + "epoch": 0.8579135700522984, + "grad_norm": 1.8954145908355713, + "learning_rate": 5.200602115232562e-07, + "loss": 0.7245, + "step": 42856 + }, + { + "epoch": 0.8579335885694267, + "grad_norm": 1.0541529655456543, + "learning_rate": 5.199162582867312e-07, + "loss": 0.2726, + "step": 42857 + }, + { + "epoch": 0.8579536070865551, + "grad_norm": 1.1877790689468384, + "learning_rate": 5.197723238835145e-07, + "loss": 0.2929, + "step": 42858 + }, + { + "epoch": 0.8579736256036834, + "grad_norm": 1.1067317724227905, + "learning_rate": 5.196284083142095e-07, + "loss": 0.2538, + "step": 42859 + }, + { + "epoch": 0.8579936441208118, + "grad_norm": 1.9063515663146973, + "learning_rate": 5.194845115794239e-07, + "loss": 0.7831, + "step": 42860 + }, + { + "epoch": 0.8580136626379401, + "grad_norm": 1.043674111366272, + "learning_rate": 5.193406336797607e-07, + "loss": 0.3511, + "step": 42861 + }, + { + "epoch": 0.8580336811550684, + "grad_norm": 1.0253173112869263, + "learning_rate": 5.191967746158256e-07, + "loss": 0.3395, + "step": 42862 + }, + { + "epoch": 0.8580536996721968, + "grad_norm": 1.2677891254425049, + "learning_rate": 5.190529343882217e-07, + "loss": 0.3271, + "step": 42863 + }, + { + "epoch": 0.8580737181893251, + "grad_norm": 1.9152095317840576, + "learning_rate": 5.189091129975559e-07, + "loss": 0.2913, + "step": 42864 + }, + { + "epoch": 0.8580937367064535, + "grad_norm": 1.2911677360534668, + "learning_rate": 5.187653104444313e-07, + "loss": 0.2943, + "step": 42865 + }, + { + "epoch": 0.8581137552235818, + "grad_norm": 1.1423635482788086, + "learning_rate": 5.186215267294536e-07, + "loss": 0.3525, + "step": 42866 + }, + { + "epoch": 0.8581337737407102, + "grad_norm": 1.96064031124115, + "learning_rate": 5.184777618532272e-07, + "loss": 0.7447, + "step": 42867 + }, + { + "epoch": 0.8581537922578385, + "grad_norm": 1.1736968755722046, + "learning_rate": 5.183340158163542e-07, + "loss": 0.3134, + "step": 42868 + }, + { + "epoch": 0.8581738107749668, + "grad_norm": 1.0576443672180176, + "learning_rate": 5.181902886194418e-07, + "loss": 0.3214, + "step": 42869 + }, + { + "epoch": 0.8581938292920952, + "grad_norm": 1.877232551574707, + "learning_rate": 5.180465802630924e-07, + "loss": 0.7442, + "step": 42870 + }, + { + "epoch": 0.8582138478092235, + "grad_norm": 1.226257085800171, + "learning_rate": 5.179028907479111e-07, + "loss": 0.2988, + "step": 42871 + }, + { + "epoch": 0.8582338663263519, + "grad_norm": 1.1687302589416504, + "learning_rate": 5.177592200745019e-07, + "loss": 0.3145, + "step": 42872 + }, + { + "epoch": 0.8582538848434802, + "grad_norm": 1.0710431337356567, + "learning_rate": 5.176155682434669e-07, + "loss": 0.278, + "step": 42873 + }, + { + "epoch": 0.8582739033606086, + "grad_norm": 1.3606536388397217, + "learning_rate": 5.174719352554125e-07, + "loss": 0.3257, + "step": 42874 + }, + { + "epoch": 0.8582939218777369, + "grad_norm": 1.1306639909744263, + "learning_rate": 5.173283211109409e-07, + "loss": 0.3171, + "step": 42875 + }, + { + "epoch": 0.8583139403948653, + "grad_norm": 1.049800157546997, + "learning_rate": 5.17184725810656e-07, + "loss": 0.2539, + "step": 42876 + }, + { + "epoch": 0.8583339589119936, + "grad_norm": 1.9917771816253662, + "learning_rate": 5.170411493551625e-07, + "loss": 0.7411, + "step": 42877 + }, + { + "epoch": 0.8583539774291219, + "grad_norm": 1.2126705646514893, + "learning_rate": 5.168975917450625e-07, + "loss": 0.3058, + "step": 42878 + }, + { + "epoch": 0.8583739959462503, + "grad_norm": 1.1567150354385376, + "learning_rate": 5.167540529809612e-07, + "loss": 0.2724, + "step": 42879 + }, + { + "epoch": 0.8583940144633786, + "grad_norm": 1.1074576377868652, + "learning_rate": 5.166105330634613e-07, + "loss": 0.2814, + "step": 42880 + }, + { + "epoch": 0.858414032980507, + "grad_norm": 1.1187766790390015, + "learning_rate": 5.164670319931641e-07, + "loss": 0.2799, + "step": 42881 + }, + { + "epoch": 0.8584340514976353, + "grad_norm": 1.0027673244476318, + "learning_rate": 5.163235497706764e-07, + "loss": 0.2406, + "step": 42882 + }, + { + "epoch": 0.8584540700147637, + "grad_norm": 1.1051751375198364, + "learning_rate": 5.161800863965994e-07, + "loss": 0.3065, + "step": 42883 + }, + { + "epoch": 0.858474088531892, + "grad_norm": 1.1230791807174683, + "learning_rate": 5.160366418715363e-07, + "loss": 0.3299, + "step": 42884 + }, + { + "epoch": 0.8584941070490203, + "grad_norm": 1.0638128519058228, + "learning_rate": 5.1589321619609e-07, + "loss": 0.2617, + "step": 42885 + }, + { + "epoch": 0.8585141255661487, + "grad_norm": 1.0720417499542236, + "learning_rate": 5.157498093708629e-07, + "loss": 0.317, + "step": 42886 + }, + { + "epoch": 0.858534144083277, + "grad_norm": 1.1949750185012817, + "learning_rate": 5.156064213964596e-07, + "loss": 0.2997, + "step": 42887 + }, + { + "epoch": 0.8585541626004054, + "grad_norm": 1.1530051231384277, + "learning_rate": 5.154630522734816e-07, + "loss": 0.3322, + "step": 42888 + }, + { + "epoch": 0.8585741811175337, + "grad_norm": 1.9457111358642578, + "learning_rate": 5.153197020025313e-07, + "loss": 0.7325, + "step": 42889 + }, + { + "epoch": 0.8585941996346621, + "grad_norm": 1.1450060606002808, + "learning_rate": 5.151763705842122e-07, + "loss": 0.3125, + "step": 42890 + }, + { + "epoch": 0.8586142181517904, + "grad_norm": 1.1273068189620972, + "learning_rate": 5.15033058019126e-07, + "loss": 0.247, + "step": 42891 + }, + { + "epoch": 0.8586342366689188, + "grad_norm": 1.2428666353225708, + "learning_rate": 5.148897643078765e-07, + "loss": 0.2911, + "step": 42892 + }, + { + "epoch": 0.8586542551860471, + "grad_norm": 1.1011145114898682, + "learning_rate": 5.14746489451065e-07, + "loss": 0.2788, + "step": 42893 + }, + { + "epoch": 0.8586742737031754, + "grad_norm": 1.0262022018432617, + "learning_rate": 5.146032334492945e-07, + "loss": 0.3018, + "step": 42894 + }, + { + "epoch": 0.8586942922203038, + "grad_norm": 1.1188620328903198, + "learning_rate": 5.144599963031654e-07, + "loss": 0.2761, + "step": 42895 + }, + { + "epoch": 0.8587143107374321, + "grad_norm": 1.0956029891967773, + "learning_rate": 5.143167780132824e-07, + "loss": 0.2749, + "step": 42896 + }, + { + "epoch": 0.8587343292545605, + "grad_norm": 1.216616153717041, + "learning_rate": 5.141735785802465e-07, + "loss": 0.3233, + "step": 42897 + }, + { + "epoch": 0.8587543477716888, + "grad_norm": 1.092951774597168, + "learning_rate": 5.14030398004659e-07, + "loss": 0.2988, + "step": 42898 + }, + { + "epoch": 0.8587743662888172, + "grad_norm": 1.173191785812378, + "learning_rate": 5.138872362871217e-07, + "loss": 0.3013, + "step": 42899 + }, + { + "epoch": 0.8587943848059455, + "grad_norm": 1.2089793682098389, + "learning_rate": 5.137440934282379e-07, + "loss": 0.2745, + "step": 42900 + }, + { + "epoch": 0.8588144033230738, + "grad_norm": 1.1735172271728516, + "learning_rate": 5.136009694286082e-07, + "loss": 0.3561, + "step": 42901 + }, + { + "epoch": 0.8588344218402022, + "grad_norm": 1.0945414304733276, + "learning_rate": 5.134578642888339e-07, + "loss": 0.3233, + "step": 42902 + }, + { + "epoch": 0.8588544403573305, + "grad_norm": 1.0600193738937378, + "learning_rate": 5.133147780095183e-07, + "loss": 0.2748, + "step": 42903 + }, + { + "epoch": 0.8588744588744589, + "grad_norm": 1.0229287147521973, + "learning_rate": 5.131717105912603e-07, + "loss": 0.3003, + "step": 42904 + }, + { + "epoch": 0.8588944773915872, + "grad_norm": 1.2541260719299316, + "learning_rate": 5.130286620346642e-07, + "loss": 0.2831, + "step": 42905 + }, + { + "epoch": 0.8589144959087156, + "grad_norm": 0.9421502947807312, + "learning_rate": 5.128856323403303e-07, + "loss": 0.2539, + "step": 42906 + }, + { + "epoch": 0.8589345144258439, + "grad_norm": 1.1047650575637817, + "learning_rate": 5.127426215088594e-07, + "loss": 0.294, + "step": 42907 + }, + { + "epoch": 0.8589545329429723, + "grad_norm": 0.99848872423172, + "learning_rate": 5.125996295408514e-07, + "loss": 0.287, + "step": 42908 + }, + { + "epoch": 0.8589745514601006, + "grad_norm": 1.2470015287399292, + "learning_rate": 5.124566564369105e-07, + "loss": 0.2691, + "step": 42909 + }, + { + "epoch": 0.8589945699772289, + "grad_norm": 1.024312138557434, + "learning_rate": 5.123137021976354e-07, + "loss": 0.26, + "step": 42910 + }, + { + "epoch": 0.8590145884943573, + "grad_norm": 1.1842516660690308, + "learning_rate": 5.121707668236281e-07, + "loss": 0.3107, + "step": 42911 + }, + { + "epoch": 0.8590346070114856, + "grad_norm": 1.0779370069503784, + "learning_rate": 5.12027850315488e-07, + "loss": 0.33, + "step": 42912 + }, + { + "epoch": 0.859054625528614, + "grad_norm": 1.245513916015625, + "learning_rate": 5.118849526738185e-07, + "loss": 0.3276, + "step": 42913 + }, + { + "epoch": 0.8590746440457423, + "grad_norm": 1.9915422201156616, + "learning_rate": 5.11742073899218e-07, + "loss": 0.7532, + "step": 42914 + }, + { + "epoch": 0.8590946625628707, + "grad_norm": 1.1833891868591309, + "learning_rate": 5.115992139922877e-07, + "loss": 0.2805, + "step": 42915 + }, + { + "epoch": 0.859114681079999, + "grad_norm": 1.1370861530303955, + "learning_rate": 5.114563729536287e-07, + "loss": 0.2781, + "step": 42916 + }, + { + "epoch": 0.8591346995971273, + "grad_norm": 1.9023667573928833, + "learning_rate": 5.113135507838407e-07, + "loss": 0.7418, + "step": 42917 + }, + { + "epoch": 0.8591547181142557, + "grad_norm": 2.1017706394195557, + "learning_rate": 5.111707474835253e-07, + "loss": 0.7199, + "step": 42918 + }, + { + "epoch": 0.859174736631384, + "grad_norm": 1.9496188163757324, + "learning_rate": 5.110279630532821e-07, + "loss": 0.6665, + "step": 42919 + }, + { + "epoch": 0.8591947551485124, + "grad_norm": 1.1291418075561523, + "learning_rate": 5.108851974937112e-07, + "loss": 0.3034, + "step": 42920 + }, + { + "epoch": 0.8592147736656407, + "grad_norm": 1.0407938957214355, + "learning_rate": 5.10742450805412e-07, + "loss": 0.2869, + "step": 42921 + }, + { + "epoch": 0.8592347921827691, + "grad_norm": 1.1585359573364258, + "learning_rate": 5.105997229889864e-07, + "loss": 0.3343, + "step": 42922 + }, + { + "epoch": 0.8592548106998974, + "grad_norm": 1.041872501373291, + "learning_rate": 5.104570140450338e-07, + "loss": 0.3126, + "step": 42923 + }, + { + "epoch": 0.8592748292170258, + "grad_norm": 1.2953648567199707, + "learning_rate": 5.103143239741532e-07, + "loss": 0.3346, + "step": 42924 + }, + { + "epoch": 0.8592948477341541, + "grad_norm": 1.1202248334884644, + "learning_rate": 5.101716527769452e-07, + "loss": 0.2622, + "step": 42925 + }, + { + "epoch": 0.8593148662512824, + "grad_norm": 1.8080356121063232, + "learning_rate": 5.100290004540081e-07, + "loss": 0.7073, + "step": 42926 + }, + { + "epoch": 0.8593348847684108, + "grad_norm": 1.2747591733932495, + "learning_rate": 5.098863670059446e-07, + "loss": 0.2815, + "step": 42927 + }, + { + "epoch": 0.8593549032855391, + "grad_norm": 1.029640555381775, + "learning_rate": 5.097437524333509e-07, + "loss": 0.2969, + "step": 42928 + }, + { + "epoch": 0.8593749218026675, + "grad_norm": 1.1992608308792114, + "learning_rate": 5.096011567368292e-07, + "loss": 0.2714, + "step": 42929 + }, + { + "epoch": 0.8593949403197958, + "grad_norm": 1.0864050388336182, + "learning_rate": 5.094585799169772e-07, + "loss": 0.3024, + "step": 42930 + }, + { + "epoch": 0.8594149588369242, + "grad_norm": 1.1020087003707886, + "learning_rate": 5.093160219743953e-07, + "loss": 0.362, + "step": 42931 + }, + { + "epoch": 0.8594349773540525, + "grad_norm": 1.136897325515747, + "learning_rate": 5.091734829096829e-07, + "loss": 0.2725, + "step": 42932 + }, + { + "epoch": 0.8594549958711808, + "grad_norm": 1.1659305095672607, + "learning_rate": 5.090309627234386e-07, + "loss": 0.2867, + "step": 42933 + }, + { + "epoch": 0.8594750143883092, + "grad_norm": 1.049367070198059, + "learning_rate": 5.088884614162609e-07, + "loss": 0.2546, + "step": 42934 + }, + { + "epoch": 0.8594950329054375, + "grad_norm": 2.0253355503082275, + "learning_rate": 5.087459789887506e-07, + "loss": 0.79, + "step": 42935 + }, + { + "epoch": 0.8595150514225659, + "grad_norm": 1.2589430809020996, + "learning_rate": 5.086035154415054e-07, + "loss": 0.3382, + "step": 42936 + }, + { + "epoch": 0.8595350699396942, + "grad_norm": 0.9675493836402893, + "learning_rate": 5.084610707751247e-07, + "loss": 0.2545, + "step": 42937 + }, + { + "epoch": 0.8595550884568226, + "grad_norm": 1.1937670707702637, + "learning_rate": 5.083186449902067e-07, + "loss": 0.3117, + "step": 42938 + }, + { + "epoch": 0.8595751069739509, + "grad_norm": 1.9943355321884155, + "learning_rate": 5.0817623808735e-07, + "loss": 0.7985, + "step": 42939 + }, + { + "epoch": 0.8595951254910793, + "grad_norm": 1.145662784576416, + "learning_rate": 5.080338500671545e-07, + "loss": 0.2948, + "step": 42940 + }, + { + "epoch": 0.8596151440082076, + "grad_norm": 1.0294209718704224, + "learning_rate": 5.078914809302171e-07, + "loss": 0.2786, + "step": 42941 + }, + { + "epoch": 0.8596351625253359, + "grad_norm": 1.0891814231872559, + "learning_rate": 5.077491306771382e-07, + "loss": 0.2703, + "step": 42942 + }, + { + "epoch": 0.8596551810424643, + "grad_norm": 1.1325550079345703, + "learning_rate": 5.076067993085138e-07, + "loss": 0.2813, + "step": 42943 + }, + { + "epoch": 0.8596751995595926, + "grad_norm": 1.0723680257797241, + "learning_rate": 5.074644868249451e-07, + "loss": 0.3391, + "step": 42944 + }, + { + "epoch": 0.859695218076721, + "grad_norm": 2.2305452823638916, + "learning_rate": 5.073221932270289e-07, + "loss": 0.7765, + "step": 42945 + }, + { + "epoch": 0.8597152365938493, + "grad_norm": 1.2087981700897217, + "learning_rate": 5.071799185153631e-07, + "loss": 0.3444, + "step": 42946 + }, + { + "epoch": 0.8597352551109777, + "grad_norm": 1.1498711109161377, + "learning_rate": 5.07037662690546e-07, + "loss": 0.3106, + "step": 42947 + }, + { + "epoch": 0.859755273628106, + "grad_norm": 1.2937417030334473, + "learning_rate": 5.068954257531749e-07, + "loss": 0.3419, + "step": 42948 + }, + { + "epoch": 0.8597752921452343, + "grad_norm": 1.1992874145507812, + "learning_rate": 5.067532077038495e-07, + "loss": 0.311, + "step": 42949 + }, + { + "epoch": 0.8597953106623627, + "grad_norm": 1.1078699827194214, + "learning_rate": 5.066110085431669e-07, + "loss": 0.2757, + "step": 42950 + }, + { + "epoch": 0.859815329179491, + "grad_norm": 1.9009047746658325, + "learning_rate": 5.06468828271724e-07, + "loss": 0.6917, + "step": 42951 + }, + { + "epoch": 0.8598353476966194, + "grad_norm": 1.1639457941055298, + "learning_rate": 5.063266668901184e-07, + "loss": 0.2777, + "step": 42952 + }, + { + "epoch": 0.8598553662137477, + "grad_norm": 1.1798020601272583, + "learning_rate": 5.0618452439895e-07, + "loss": 0.2475, + "step": 42953 + }, + { + "epoch": 0.8598753847308761, + "grad_norm": 1.165616512298584, + "learning_rate": 5.060424007988129e-07, + "loss": 0.3318, + "step": 42954 + }, + { + "epoch": 0.8598954032480044, + "grad_norm": 1.0738826990127563, + "learning_rate": 5.059002960903081e-07, + "loss": 0.2674, + "step": 42955 + }, + { + "epoch": 0.8599154217651327, + "grad_norm": 1.1424028873443604, + "learning_rate": 5.057582102740311e-07, + "loss": 0.2784, + "step": 42956 + }, + { + "epoch": 0.8599354402822611, + "grad_norm": 1.2198277711868286, + "learning_rate": 5.056161433505779e-07, + "loss": 0.2836, + "step": 42957 + }, + { + "epoch": 0.8599554587993894, + "grad_norm": 1.1429417133331299, + "learning_rate": 5.054740953205489e-07, + "loss": 0.2853, + "step": 42958 + }, + { + "epoch": 0.8599754773165178, + "grad_norm": 1.1629177331924438, + "learning_rate": 5.053320661845396e-07, + "loss": 0.2681, + "step": 42959 + }, + { + "epoch": 0.8599954958336461, + "grad_norm": 1.1853927373886108, + "learning_rate": 5.051900559431466e-07, + "loss": 0.2511, + "step": 42960 + }, + { + "epoch": 0.8600155143507745, + "grad_norm": 1.0904124975204468, + "learning_rate": 5.050480645969663e-07, + "loss": 0.2955, + "step": 42961 + }, + { + "epoch": 0.8600355328679028, + "grad_norm": 1.0376460552215576, + "learning_rate": 5.049060921465981e-07, + "loss": 0.2856, + "step": 42962 + }, + { + "epoch": 0.8600555513850312, + "grad_norm": 1.0430339574813843, + "learning_rate": 5.047641385926371e-07, + "loss": 0.2741, + "step": 42963 + }, + { + "epoch": 0.8600755699021595, + "grad_norm": 1.056605339050293, + "learning_rate": 5.046222039356802e-07, + "loss": 0.2529, + "step": 42964 + }, + { + "epoch": 0.8600955884192878, + "grad_norm": 1.12298583984375, + "learning_rate": 5.044802881763233e-07, + "loss": 0.3294, + "step": 42965 + }, + { + "epoch": 0.8601156069364162, + "grad_norm": 1.1647623777389526, + "learning_rate": 5.043383913151645e-07, + "loss": 0.328, + "step": 42966 + }, + { + "epoch": 0.8601356254535445, + "grad_norm": 1.0222485065460205, + "learning_rate": 5.041965133527988e-07, + "loss": 0.2647, + "step": 42967 + }, + { + "epoch": 0.8601556439706729, + "grad_norm": 1.2329243421554565, + "learning_rate": 5.040546542898245e-07, + "loss": 0.2581, + "step": 42968 + }, + { + "epoch": 0.8601756624878012, + "grad_norm": 1.2374626398086548, + "learning_rate": 5.039128141268368e-07, + "loss": 0.2659, + "step": 42969 + }, + { + "epoch": 0.8601956810049296, + "grad_norm": 1.7844756841659546, + "learning_rate": 5.037709928644313e-07, + "loss": 0.778, + "step": 42970 + }, + { + "epoch": 0.8602156995220579, + "grad_norm": 1.037981629371643, + "learning_rate": 5.036291905032059e-07, + "loss": 0.298, + "step": 42971 + }, + { + "epoch": 0.8602357180391862, + "grad_norm": 1.1395994424819946, + "learning_rate": 5.034874070437557e-07, + "loss": 0.3122, + "step": 42972 + }, + { + "epoch": 0.8602557365563146, + "grad_norm": 1.323920488357544, + "learning_rate": 5.033456424866762e-07, + "loss": 0.3628, + "step": 42973 + }, + { + "epoch": 0.8602757550734429, + "grad_norm": 1.1664015054702759, + "learning_rate": 5.032038968325637e-07, + "loss": 0.3258, + "step": 42974 + }, + { + "epoch": 0.8602957735905713, + "grad_norm": 1.126717209815979, + "learning_rate": 5.030621700820149e-07, + "loss": 0.282, + "step": 42975 + }, + { + "epoch": 0.8603157921076996, + "grad_norm": 1.0454747676849365, + "learning_rate": 5.029204622356254e-07, + "loss": 0.269, + "step": 42976 + }, + { + "epoch": 0.860335810624828, + "grad_norm": 1.023478627204895, + "learning_rate": 5.027787732939898e-07, + "loss": 0.243, + "step": 42977 + }, + { + "epoch": 0.8603558291419563, + "grad_norm": 1.2529151439666748, + "learning_rate": 5.026371032577044e-07, + "loss": 0.2484, + "step": 42978 + }, + { + "epoch": 0.8603758476590847, + "grad_norm": 1.1066395044326782, + "learning_rate": 5.02495452127365e-07, + "loss": 0.271, + "step": 42979 + }, + { + "epoch": 0.860395866176213, + "grad_norm": 1.0894321203231812, + "learning_rate": 5.02353819903566e-07, + "loss": 0.3353, + "step": 42980 + }, + { + "epoch": 0.8604158846933413, + "grad_norm": 1.1806514263153076, + "learning_rate": 5.022122065869045e-07, + "loss": 0.2857, + "step": 42981 + }, + { + "epoch": 0.8604359032104697, + "grad_norm": 1.0543642044067383, + "learning_rate": 5.02070612177975e-07, + "loss": 0.2925, + "step": 42982 + }, + { + "epoch": 0.860455921727598, + "grad_norm": 1.0889586210250854, + "learning_rate": 5.019290366773721e-07, + "loss": 0.3057, + "step": 42983 + }, + { + "epoch": 0.8604759402447264, + "grad_norm": 2.006068706512451, + "learning_rate": 5.01787480085692e-07, + "loss": 0.7935, + "step": 42984 + }, + { + "epoch": 0.8604959587618547, + "grad_norm": 1.0585529804229736, + "learning_rate": 5.016459424035297e-07, + "loss": 0.3363, + "step": 42985 + }, + { + "epoch": 0.8605159772789831, + "grad_norm": 1.0207144021987915, + "learning_rate": 5.015044236314797e-07, + "loss": 0.3019, + "step": 42986 + }, + { + "epoch": 0.8605359957961114, + "grad_norm": 2.0058045387268066, + "learning_rate": 5.013629237701373e-07, + "loss": 0.7761, + "step": 42987 + }, + { + "epoch": 0.8605560143132397, + "grad_norm": 1.1712583303451538, + "learning_rate": 5.012214428200956e-07, + "loss": 0.2971, + "step": 42988 + }, + { + "epoch": 0.8605760328303681, + "grad_norm": 1.0528026819229126, + "learning_rate": 5.010799807819517e-07, + "loss": 0.3042, + "step": 42989 + }, + { + "epoch": 0.8605960513474964, + "grad_norm": 1.1769777536392212, + "learning_rate": 5.009385376562997e-07, + "loss": 0.3075, + "step": 42990 + }, + { + "epoch": 0.8606160698646248, + "grad_norm": 1.021708369255066, + "learning_rate": 5.007971134437329e-07, + "loss": 0.2718, + "step": 42991 + }, + { + "epoch": 0.8606360883817531, + "grad_norm": 1.1208335161209106, + "learning_rate": 5.006557081448477e-07, + "loss": 0.263, + "step": 42992 + }, + { + "epoch": 0.8606561068988815, + "grad_norm": 1.8557579517364502, + "learning_rate": 5.005143217602365e-07, + "loss": 0.7026, + "step": 42993 + }, + { + "epoch": 0.8606761254160098, + "grad_norm": 1.1021270751953125, + "learning_rate": 5.003729542904961e-07, + "loss": 0.3149, + "step": 42994 + }, + { + "epoch": 0.8606961439331382, + "grad_norm": 1.0377413034439087, + "learning_rate": 5.002316057362194e-07, + "loss": 0.2664, + "step": 42995 + }, + { + "epoch": 0.8607161624502665, + "grad_norm": 1.8850407600402832, + "learning_rate": 5.000902760979992e-07, + "loss": 0.7549, + "step": 42996 + }, + { + "epoch": 0.8607361809673948, + "grad_norm": 1.9303032159805298, + "learning_rate": 4.999489653764328e-07, + "loss": 0.6754, + "step": 42997 + }, + { + "epoch": 0.8607561994845232, + "grad_norm": 1.1412900686264038, + "learning_rate": 4.998076735721119e-07, + "loss": 0.272, + "step": 42998 + }, + { + "epoch": 0.8607762180016515, + "grad_norm": 1.330025315284729, + "learning_rate": 4.996664006856317e-07, + "loss": 0.3214, + "step": 42999 + }, + { + "epoch": 0.8607962365187799, + "grad_norm": 1.1487878561019897, + "learning_rate": 4.995251467175849e-07, + "loss": 0.2909, + "step": 43000 + }, + { + "epoch": 0.8608162550359082, + "grad_norm": 1.1579700708389282, + "learning_rate": 4.99383911668565e-07, + "loss": 0.3163, + "step": 43001 + }, + { + "epoch": 0.8608362735530366, + "grad_norm": 1.0976969003677368, + "learning_rate": 4.992426955391677e-07, + "loss": 0.3233, + "step": 43002 + }, + { + "epoch": 0.8608562920701649, + "grad_norm": 1.0989537239074707, + "learning_rate": 4.991014983299858e-07, + "loss": 0.278, + "step": 43003 + }, + { + "epoch": 0.8608763105872932, + "grad_norm": 1.7342809438705444, + "learning_rate": 4.98960320041611e-07, + "loss": 0.6842, + "step": 43004 + }, + { + "epoch": 0.8608963291044216, + "grad_norm": 2.1561615467071533, + "learning_rate": 4.988191606746395e-07, + "loss": 0.7374, + "step": 43005 + }, + { + "epoch": 0.8609163476215499, + "grad_norm": 1.1550030708312988, + "learning_rate": 4.986780202296631e-07, + "loss": 0.2958, + "step": 43006 + }, + { + "epoch": 0.8609363661386783, + "grad_norm": 1.0676695108413696, + "learning_rate": 4.985368987072764e-07, + "loss": 0.3105, + "step": 43007 + }, + { + "epoch": 0.8609563846558066, + "grad_norm": 1.1556143760681152, + "learning_rate": 4.983957961080715e-07, + "loss": 0.2838, + "step": 43008 + }, + { + "epoch": 0.860976403172935, + "grad_norm": 1.0964293479919434, + "learning_rate": 4.982547124326425e-07, + "loss": 0.3194, + "step": 43009 + }, + { + "epoch": 0.8609964216900633, + "grad_norm": 1.8448340892791748, + "learning_rate": 4.981136476815806e-07, + "loss": 0.7722, + "step": 43010 + }, + { + "epoch": 0.8610164402071917, + "grad_norm": 1.1119195222854614, + "learning_rate": 4.979726018554809e-07, + "loss": 0.2741, + "step": 43011 + }, + { + "epoch": 0.86103645872432, + "grad_norm": 0.9903481602668762, + "learning_rate": 4.978315749549356e-07, + "loss": 0.2683, + "step": 43012 + }, + { + "epoch": 0.8610564772414483, + "grad_norm": 1.1364079713821411, + "learning_rate": 4.976905669805376e-07, + "loss": 0.36, + "step": 43013 + }, + { + "epoch": 0.8610764957585767, + "grad_norm": 1.0505090951919556, + "learning_rate": 4.975495779328787e-07, + "loss": 0.2343, + "step": 43014 + }, + { + "epoch": 0.861096514275705, + "grad_norm": 1.9615685939788818, + "learning_rate": 4.974086078125534e-07, + "loss": 0.7479, + "step": 43015 + }, + { + "epoch": 0.8611165327928334, + "grad_norm": 1.0438393354415894, + "learning_rate": 4.972676566201528e-07, + "loss": 0.2787, + "step": 43016 + }, + { + "epoch": 0.8611365513099617, + "grad_norm": 1.160446047782898, + "learning_rate": 4.971267243562694e-07, + "loss": 0.3634, + "step": 43017 + }, + { + "epoch": 0.8611565698270901, + "grad_norm": 1.0187000036239624, + "learning_rate": 4.969858110214975e-07, + "loss": 0.2915, + "step": 43018 + }, + { + "epoch": 0.8611765883442184, + "grad_norm": 1.0869579315185547, + "learning_rate": 4.968449166164269e-07, + "loss": 0.2973, + "step": 43019 + }, + { + "epoch": 0.8611966068613467, + "grad_norm": 1.1510483026504517, + "learning_rate": 4.967040411416524e-07, + "loss": 0.2919, + "step": 43020 + }, + { + "epoch": 0.8612166253784751, + "grad_norm": 1.09141206741333, + "learning_rate": 4.965631845977647e-07, + "loss": 0.3103, + "step": 43021 + }, + { + "epoch": 0.8612366438956034, + "grad_norm": 1.3147655725479126, + "learning_rate": 4.964223469853563e-07, + "loss": 0.3194, + "step": 43022 + }, + { + "epoch": 0.8612566624127318, + "grad_norm": 1.1332119703292847, + "learning_rate": 4.962815283050182e-07, + "loss": 0.3051, + "step": 43023 + }, + { + "epoch": 0.8612766809298601, + "grad_norm": 1.2240333557128906, + "learning_rate": 4.961407285573438e-07, + "loss": 0.3229, + "step": 43024 + }, + { + "epoch": 0.8612966994469885, + "grad_norm": 1.0777772665023804, + "learning_rate": 4.959999477429256e-07, + "loss": 0.3079, + "step": 43025 + }, + { + "epoch": 0.8613167179641168, + "grad_norm": 1.2139570713043213, + "learning_rate": 4.958591858623535e-07, + "loss": 0.3319, + "step": 43026 + }, + { + "epoch": 0.8613367364812452, + "grad_norm": 1.141097903251648, + "learning_rate": 4.957184429162193e-07, + "loss": 0.2906, + "step": 43027 + }, + { + "epoch": 0.8613567549983735, + "grad_norm": 1.1390440464019775, + "learning_rate": 4.955777189051165e-07, + "loss": 0.3642, + "step": 43028 + }, + { + "epoch": 0.8613767735155018, + "grad_norm": 1.0682224035263062, + "learning_rate": 4.954370138296355e-07, + "loss": 0.2868, + "step": 43029 + }, + { + "epoch": 0.8613967920326302, + "grad_norm": 1.090389370918274, + "learning_rate": 4.952963276903672e-07, + "loss": 0.252, + "step": 43030 + }, + { + "epoch": 0.8614168105497585, + "grad_norm": 1.1009957790374756, + "learning_rate": 4.951556604879049e-07, + "loss": 0.3148, + "step": 43031 + }, + { + "epoch": 0.8614368290668869, + "grad_norm": 1.7664638757705688, + "learning_rate": 4.95015012222837e-07, + "loss": 0.6967, + "step": 43032 + }, + { + "epoch": 0.8614568475840152, + "grad_norm": 1.2700772285461426, + "learning_rate": 4.94874382895758e-07, + "loss": 0.3535, + "step": 43033 + }, + { + "epoch": 0.8614768661011436, + "grad_norm": 1.9645577669143677, + "learning_rate": 4.947337725072577e-07, + "loss": 0.7848, + "step": 43034 + }, + { + "epoch": 0.8614968846182719, + "grad_norm": 1.103902816772461, + "learning_rate": 4.945931810579268e-07, + "loss": 0.2932, + "step": 43035 + }, + { + "epoch": 0.8615169031354002, + "grad_norm": 1.045048713684082, + "learning_rate": 4.944526085483553e-07, + "loss": 0.2781, + "step": 43036 + }, + { + "epoch": 0.8615369216525286, + "grad_norm": 1.1258978843688965, + "learning_rate": 4.943120549791369e-07, + "loss": 0.3282, + "step": 43037 + }, + { + "epoch": 0.8615569401696569, + "grad_norm": 1.1758674383163452, + "learning_rate": 4.941715203508607e-07, + "loss": 0.2746, + "step": 43038 + }, + { + "epoch": 0.8615769586867853, + "grad_norm": 1.1501740217208862, + "learning_rate": 4.940310046641184e-07, + "loss": 0.3266, + "step": 43039 + }, + { + "epoch": 0.8615969772039136, + "grad_norm": 1.0109409093856812, + "learning_rate": 4.938905079194994e-07, + "loss": 0.2046, + "step": 43040 + }, + { + "epoch": 0.861616995721042, + "grad_norm": 1.1428327560424805, + "learning_rate": 4.93750030117594e-07, + "loss": 0.2912, + "step": 43041 + }, + { + "epoch": 0.8616370142381703, + "grad_norm": 1.1259934902191162, + "learning_rate": 4.93609571258995e-07, + "loss": 0.2723, + "step": 43042 + }, + { + "epoch": 0.8616570327552987, + "grad_norm": 1.1466403007507324, + "learning_rate": 4.934691313442902e-07, + "loss": 0.3125, + "step": 43043 + }, + { + "epoch": 0.861677051272427, + "grad_norm": 1.1573388576507568, + "learning_rate": 4.93328710374073e-07, + "loss": 0.34, + "step": 43044 + }, + { + "epoch": 0.8616970697895553, + "grad_norm": 0.9930062294006348, + "learning_rate": 4.931883083489308e-07, + "loss": 0.326, + "step": 43045 + }, + { + "epoch": 0.8617170883066837, + "grad_norm": 1.0937297344207764, + "learning_rate": 4.930479252694559e-07, + "loss": 0.2767, + "step": 43046 + }, + { + "epoch": 0.861737106823812, + "grad_norm": 1.0456335544586182, + "learning_rate": 4.929075611362377e-07, + "loss": 0.2409, + "step": 43047 + }, + { + "epoch": 0.8617571253409404, + "grad_norm": 0.9533478021621704, + "learning_rate": 4.927672159498659e-07, + "loss": 0.2968, + "step": 43048 + }, + { + "epoch": 0.8617771438580687, + "grad_norm": 1.1878467798233032, + "learning_rate": 4.926268897109309e-07, + "loss": 0.3028, + "step": 43049 + }, + { + "epoch": 0.8617971623751971, + "grad_norm": 1.2272508144378662, + "learning_rate": 4.924865824200215e-07, + "loss": 0.2852, + "step": 43050 + }, + { + "epoch": 0.8618171808923254, + "grad_norm": 1.177042841911316, + "learning_rate": 4.923462940777296e-07, + "loss": 0.2996, + "step": 43051 + }, + { + "epoch": 0.8618371994094537, + "grad_norm": 1.0594464540481567, + "learning_rate": 4.922060246846439e-07, + "loss": 0.2978, + "step": 43052 + }, + { + "epoch": 0.8618572179265821, + "grad_norm": 1.0373677015304565, + "learning_rate": 4.920657742413537e-07, + "loss": 0.2974, + "step": 43053 + }, + { + "epoch": 0.8618772364437104, + "grad_norm": 1.0109001398086548, + "learning_rate": 4.919255427484482e-07, + "loss": 0.298, + "step": 43054 + }, + { + "epoch": 0.8618972549608388, + "grad_norm": 1.2125712633132935, + "learning_rate": 4.917853302065184e-07, + "loss": 0.3384, + "step": 43055 + }, + { + "epoch": 0.8619172734779671, + "grad_norm": 1.0954079627990723, + "learning_rate": 4.916451366161518e-07, + "loss": 0.2785, + "step": 43056 + }, + { + "epoch": 0.8619372919950955, + "grad_norm": 1.077621579170227, + "learning_rate": 4.9150496197794e-07, + "loss": 0.2449, + "step": 43057 + }, + { + "epoch": 0.8619573105122238, + "grad_norm": 1.0583512783050537, + "learning_rate": 4.913648062924698e-07, + "loss": 0.3173, + "step": 43058 + }, + { + "epoch": 0.8619773290293522, + "grad_norm": 1.1649497747421265, + "learning_rate": 4.912246695603329e-07, + "loss": 0.2717, + "step": 43059 + }, + { + "epoch": 0.8619973475464805, + "grad_norm": 1.174268364906311, + "learning_rate": 4.910845517821172e-07, + "loss": 0.3113, + "step": 43060 + }, + { + "epoch": 0.8620173660636088, + "grad_norm": 1.266257882118225, + "learning_rate": 4.909444529584118e-07, + "loss": 0.3234, + "step": 43061 + }, + { + "epoch": 0.8620373845807372, + "grad_norm": 1.135867953300476, + "learning_rate": 4.908043730898054e-07, + "loss": 0.2731, + "step": 43062 + }, + { + "epoch": 0.8620574030978655, + "grad_norm": 1.1262441873550415, + "learning_rate": 4.906643121768862e-07, + "loss": 0.313, + "step": 43063 + }, + { + "epoch": 0.8620774216149939, + "grad_norm": 1.1176766157150269, + "learning_rate": 4.905242702202445e-07, + "loss": 0.2622, + "step": 43064 + }, + { + "epoch": 0.8620974401321222, + "grad_norm": 1.2154498100280762, + "learning_rate": 4.903842472204684e-07, + "loss": 0.2671, + "step": 43065 + }, + { + "epoch": 0.8621174586492506, + "grad_norm": 1.9452955722808838, + "learning_rate": 4.902442431781468e-07, + "loss": 0.7026, + "step": 43066 + }, + { + "epoch": 0.8621374771663789, + "grad_norm": 1.8802814483642578, + "learning_rate": 4.901042580938665e-07, + "loss": 0.7578, + "step": 43067 + }, + { + "epoch": 0.8621574956835072, + "grad_norm": 1.1235278844833374, + "learning_rate": 4.899642919682185e-07, + "loss": 0.2449, + "step": 43068 + }, + { + "epoch": 0.8621775142006356, + "grad_norm": 1.2476156949996948, + "learning_rate": 4.898243448017892e-07, + "loss": 0.2928, + "step": 43069 + }, + { + "epoch": 0.8621975327177639, + "grad_norm": 1.1150740385055542, + "learning_rate": 4.896844165951686e-07, + "loss": 0.238, + "step": 43070 + }, + { + "epoch": 0.8622175512348923, + "grad_norm": 1.180956244468689, + "learning_rate": 4.895445073489441e-07, + "loss": 0.2989, + "step": 43071 + }, + { + "epoch": 0.8622375697520206, + "grad_norm": 1.2912887334823608, + "learning_rate": 4.894046170637029e-07, + "loss": 0.3122, + "step": 43072 + }, + { + "epoch": 0.862257588269149, + "grad_norm": 1.227518081665039, + "learning_rate": 4.892647457400346e-07, + "loss": 0.3249, + "step": 43073 + }, + { + "epoch": 0.8622776067862773, + "grad_norm": 1.112762451171875, + "learning_rate": 4.891248933785269e-07, + "loss": 0.2701, + "step": 43074 + }, + { + "epoch": 0.8622976253034057, + "grad_norm": 1.1194391250610352, + "learning_rate": 4.889850599797675e-07, + "loss": 0.2405, + "step": 43075 + }, + { + "epoch": 0.862317643820534, + "grad_norm": 1.0860517024993896, + "learning_rate": 4.888452455443432e-07, + "loss": 0.2831, + "step": 43076 + }, + { + "epoch": 0.8623376623376623, + "grad_norm": 1.2534123659133911, + "learning_rate": 4.887054500728433e-07, + "loss": 0.304, + "step": 43077 + }, + { + "epoch": 0.8623576808547907, + "grad_norm": 1.0103504657745361, + "learning_rate": 4.88565673565855e-07, + "loss": 0.2872, + "step": 43078 + }, + { + "epoch": 0.862377699371919, + "grad_norm": 1.301382064819336, + "learning_rate": 4.884259160239657e-07, + "loss": 0.2893, + "step": 43079 + }, + { + "epoch": 0.8623977178890474, + "grad_norm": 1.1109966039657593, + "learning_rate": 4.88286177447762e-07, + "loss": 0.3318, + "step": 43080 + }, + { + "epoch": 0.8624177364061757, + "grad_norm": 1.1666769981384277, + "learning_rate": 4.881464578378336e-07, + "loss": 0.3321, + "step": 43081 + }, + { + "epoch": 0.8624377549233041, + "grad_norm": 1.0293042659759521, + "learning_rate": 4.880067571947649e-07, + "loss": 0.3095, + "step": 43082 + }, + { + "epoch": 0.8624577734404324, + "grad_norm": 0.9774541258811951, + "learning_rate": 4.878670755191467e-07, + "loss": 0.274, + "step": 43083 + }, + { + "epoch": 0.8624777919575607, + "grad_norm": 1.0089099407196045, + "learning_rate": 4.877274128115634e-07, + "loss": 0.2507, + "step": 43084 + }, + { + "epoch": 0.8624978104746891, + "grad_norm": 1.2139756679534912, + "learning_rate": 4.875877690726022e-07, + "loss": 0.3268, + "step": 43085 + }, + { + "epoch": 0.8625178289918174, + "grad_norm": 1.1171607971191406, + "learning_rate": 4.874481443028522e-07, + "loss": 0.2674, + "step": 43086 + }, + { + "epoch": 0.8625378475089458, + "grad_norm": 1.11946702003479, + "learning_rate": 4.873085385028992e-07, + "loss": 0.3011, + "step": 43087 + }, + { + "epoch": 0.8625578660260741, + "grad_norm": 1.071311116218567, + "learning_rate": 4.871689516733297e-07, + "loss": 0.2661, + "step": 43088 + }, + { + "epoch": 0.8625778845432025, + "grad_norm": 1.2018784284591675, + "learning_rate": 4.8702938381473e-07, + "loss": 0.3555, + "step": 43089 + }, + { + "epoch": 0.8625979030603308, + "grad_norm": 1.2477176189422607, + "learning_rate": 4.86889834927688e-07, + "loss": 0.3342, + "step": 43090 + }, + { + "epoch": 0.8626179215774592, + "grad_norm": 1.1098406314849854, + "learning_rate": 4.867503050127903e-07, + "loss": 0.3183, + "step": 43091 + }, + { + "epoch": 0.8626379400945875, + "grad_norm": 1.0694493055343628, + "learning_rate": 4.866107940706228e-07, + "loss": 0.2805, + "step": 43092 + }, + { + "epoch": 0.8626579586117158, + "grad_norm": 1.0750328302383423, + "learning_rate": 4.864713021017725e-07, + "loss": 0.2447, + "step": 43093 + }, + { + "epoch": 0.8626779771288442, + "grad_norm": 1.0286606550216675, + "learning_rate": 4.863318291068248e-07, + "loss": 0.2836, + "step": 43094 + }, + { + "epoch": 0.8626979956459725, + "grad_norm": 1.2155810594558716, + "learning_rate": 4.861923750863662e-07, + "loss": 0.3178, + "step": 43095 + }, + { + "epoch": 0.8627180141631009, + "grad_norm": 1.1939764022827148, + "learning_rate": 4.860529400409847e-07, + "loss": 0.254, + "step": 43096 + }, + { + "epoch": 0.8627380326802292, + "grad_norm": 1.1203935146331787, + "learning_rate": 4.859135239712654e-07, + "loss": 0.2682, + "step": 43097 + }, + { + "epoch": 0.8627580511973576, + "grad_norm": 1.0907273292541504, + "learning_rate": 4.857741268777927e-07, + "loss": 0.2709, + "step": 43098 + }, + { + "epoch": 0.8627780697144859, + "grad_norm": 1.120539903640747, + "learning_rate": 4.856347487611557e-07, + "loss": 0.2512, + "step": 43099 + }, + { + "epoch": 0.8627980882316142, + "grad_norm": 2.0514891147613525, + "learning_rate": 4.854953896219383e-07, + "loss": 0.7236, + "step": 43100 + }, + { + "epoch": 0.8628181067487426, + "grad_norm": 1.1408170461654663, + "learning_rate": 4.853560494607268e-07, + "loss": 0.306, + "step": 43101 + }, + { + "epoch": 0.8628381252658709, + "grad_norm": 1.072874903678894, + "learning_rate": 4.852167282781067e-07, + "loss": 0.2673, + "step": 43102 + }, + { + "epoch": 0.8628581437829993, + "grad_norm": 1.0546602010726929, + "learning_rate": 4.850774260746633e-07, + "loss": 0.3334, + "step": 43103 + }, + { + "epoch": 0.8628781623001276, + "grad_norm": 1.0888227224349976, + "learning_rate": 4.849381428509836e-07, + "loss": 0.2759, + "step": 43104 + }, + { + "epoch": 0.862898180817256, + "grad_norm": 1.9632837772369385, + "learning_rate": 4.847988786076524e-07, + "loss": 0.7118, + "step": 43105 + }, + { + "epoch": 0.8629181993343843, + "grad_norm": 1.9089587926864624, + "learning_rate": 4.846596333452553e-07, + "loss": 0.7823, + "step": 43106 + }, + { + "epoch": 0.8629382178515127, + "grad_norm": 1.0901044607162476, + "learning_rate": 4.845204070643761e-07, + "loss": 0.2891, + "step": 43107 + }, + { + "epoch": 0.862958236368641, + "grad_norm": 1.0775474309921265, + "learning_rate": 4.843811997656012e-07, + "loss": 0.2895, + "step": 43108 + }, + { + "epoch": 0.8629782548857693, + "grad_norm": 1.1741012334823608, + "learning_rate": 4.842420114495172e-07, + "loss": 0.2615, + "step": 43109 + }, + { + "epoch": 0.8629982734028977, + "grad_norm": 1.0518591403961182, + "learning_rate": 4.841028421167077e-07, + "loss": 0.2538, + "step": 43110 + }, + { + "epoch": 0.863018291920026, + "grad_norm": 2.0311942100524902, + "learning_rate": 4.839636917677571e-07, + "loss": 0.7617, + "step": 43111 + }, + { + "epoch": 0.8630383104371544, + "grad_norm": 1.0208511352539062, + "learning_rate": 4.838245604032527e-07, + "loss": 0.2664, + "step": 43112 + }, + { + "epoch": 0.8630583289542827, + "grad_norm": 1.107090711593628, + "learning_rate": 4.836854480237773e-07, + "loss": 0.2739, + "step": 43113 + }, + { + "epoch": 0.8630783474714111, + "grad_norm": 1.0673432350158691, + "learning_rate": 4.835463546299168e-07, + "loss": 0.2947, + "step": 43114 + }, + { + "epoch": 0.8630983659885394, + "grad_norm": 1.2782751321792603, + "learning_rate": 4.83407280222255e-07, + "loss": 0.2843, + "step": 43115 + }, + { + "epoch": 0.8631183845056677, + "grad_norm": 1.1059396266937256, + "learning_rate": 4.832682248013765e-07, + "loss": 0.3466, + "step": 43116 + }, + { + "epoch": 0.8631384030227961, + "grad_norm": 1.126992106437683, + "learning_rate": 4.831291883678673e-07, + "loss": 0.3051, + "step": 43117 + }, + { + "epoch": 0.8631584215399244, + "grad_norm": 1.0473573207855225, + "learning_rate": 4.829901709223112e-07, + "loss": 0.2951, + "step": 43118 + }, + { + "epoch": 0.8631784400570528, + "grad_norm": 1.136580228805542, + "learning_rate": 4.828511724652918e-07, + "loss": 0.2917, + "step": 43119 + }, + { + "epoch": 0.8631984585741811, + "grad_norm": 1.2240633964538574, + "learning_rate": 4.827121929973933e-07, + "loss": 0.3154, + "step": 43120 + }, + { + "epoch": 0.8632184770913095, + "grad_norm": 1.0456340312957764, + "learning_rate": 4.825732325192006e-07, + "loss": 0.2564, + "step": 43121 + }, + { + "epoch": 0.8632384956084378, + "grad_norm": 1.9044134616851807, + "learning_rate": 4.824342910312984e-07, + "loss": 0.772, + "step": 43122 + }, + { + "epoch": 0.8632585141255662, + "grad_norm": 1.1697584390640259, + "learning_rate": 4.822953685342707e-07, + "loss": 0.3193, + "step": 43123 + }, + { + "epoch": 0.8632785326426945, + "grad_norm": 1.1801197528839111, + "learning_rate": 4.821564650287008e-07, + "loss": 0.3592, + "step": 43124 + }, + { + "epoch": 0.8632985511598228, + "grad_norm": 1.0956859588623047, + "learning_rate": 4.820175805151722e-07, + "loss": 0.2954, + "step": 43125 + }, + { + "epoch": 0.8633185696769512, + "grad_norm": 1.202386736869812, + "learning_rate": 4.818787149942705e-07, + "loss": 0.3305, + "step": 43126 + }, + { + "epoch": 0.8633385881940795, + "grad_norm": 1.29774808883667, + "learning_rate": 4.81739868466578e-07, + "loss": 0.3136, + "step": 43127 + }, + { + "epoch": 0.8633586067112079, + "grad_norm": 1.2091790437698364, + "learning_rate": 4.816010409326788e-07, + "loss": 0.307, + "step": 43128 + }, + { + "epoch": 0.8633786252283362, + "grad_norm": 1.1367772817611694, + "learning_rate": 4.814622323931556e-07, + "loss": 0.2719, + "step": 43129 + }, + { + "epoch": 0.8633986437454646, + "grad_norm": 1.1495050191879272, + "learning_rate": 4.813234428485936e-07, + "loss": 0.2966, + "step": 43130 + }, + { + "epoch": 0.8634186622625929, + "grad_norm": 1.079633355140686, + "learning_rate": 4.811846722995756e-07, + "loss": 0.2745, + "step": 43131 + }, + { + "epoch": 0.8634386807797212, + "grad_norm": 1.2729054689407349, + "learning_rate": 4.81045920746685e-07, + "loss": 0.2832, + "step": 43132 + }, + { + "epoch": 0.8634586992968496, + "grad_norm": 1.3510088920593262, + "learning_rate": 4.809071881905036e-07, + "loss": 0.2937, + "step": 43133 + }, + { + "epoch": 0.8634787178139779, + "grad_norm": 1.1165450811386108, + "learning_rate": 4.807684746316154e-07, + "loss": 0.2958, + "step": 43134 + }, + { + "epoch": 0.8634987363311063, + "grad_norm": 1.1884640455245972, + "learning_rate": 4.806297800706055e-07, + "loss": 0.2515, + "step": 43135 + }, + { + "epoch": 0.8635187548482346, + "grad_norm": 1.0170618295669556, + "learning_rate": 4.804911045080557e-07, + "loss": 0.2902, + "step": 43136 + }, + { + "epoch": 0.863538773365363, + "grad_norm": 2.074122428894043, + "learning_rate": 4.803524479445481e-07, + "loss": 0.7635, + "step": 43137 + }, + { + "epoch": 0.8635587918824913, + "grad_norm": 1.0550951957702637, + "learning_rate": 4.802138103806653e-07, + "loss": 0.3023, + "step": 43138 + }, + { + "epoch": 0.8635788103996197, + "grad_norm": 1.286841869354248, + "learning_rate": 4.800751918169922e-07, + "loss": 0.3184, + "step": 43139 + }, + { + "epoch": 0.863598828916748, + "grad_norm": 1.2824653387069702, + "learning_rate": 4.799365922541099e-07, + "loss": 0.2653, + "step": 43140 + }, + { + "epoch": 0.8636188474338763, + "grad_norm": 2.0107181072235107, + "learning_rate": 4.797980116926021e-07, + "loss": 0.6994, + "step": 43141 + }, + { + "epoch": 0.8636388659510047, + "grad_norm": 1.2494802474975586, + "learning_rate": 4.796594501330493e-07, + "loss": 0.2735, + "step": 43142 + }, + { + "epoch": 0.863658884468133, + "grad_norm": 1.0361578464508057, + "learning_rate": 4.795209075760359e-07, + "loss": 0.2872, + "step": 43143 + }, + { + "epoch": 0.8636789029852614, + "grad_norm": 0.9992208480834961, + "learning_rate": 4.793823840221446e-07, + "loss": 0.3011, + "step": 43144 + }, + { + "epoch": 0.8636989215023897, + "grad_norm": 1.854109287261963, + "learning_rate": 4.792438794719567e-07, + "loss": 0.7801, + "step": 43145 + }, + { + "epoch": 0.8637189400195181, + "grad_norm": 1.337477445602417, + "learning_rate": 4.791053939260532e-07, + "loss": 0.2885, + "step": 43146 + }, + { + "epoch": 0.8637389585366464, + "grad_norm": 1.1995021104812622, + "learning_rate": 4.789669273850184e-07, + "loss": 0.3223, + "step": 43147 + }, + { + "epoch": 0.8637589770537747, + "grad_norm": 1.0196055173873901, + "learning_rate": 4.788284798494341e-07, + "loss": 0.2846, + "step": 43148 + }, + { + "epoch": 0.8637789955709031, + "grad_norm": 1.2292536497116089, + "learning_rate": 4.786900513198822e-07, + "loss": 0.2882, + "step": 43149 + }, + { + "epoch": 0.8637990140880314, + "grad_norm": 1.2408077716827393, + "learning_rate": 4.785516417969444e-07, + "loss": 0.3092, + "step": 43150 + }, + { + "epoch": 0.8638190326051598, + "grad_norm": 1.1570477485656738, + "learning_rate": 4.784132512812012e-07, + "loss": 0.3061, + "step": 43151 + }, + { + "epoch": 0.8638390511222881, + "grad_norm": 0.9742785096168518, + "learning_rate": 4.782748797732368e-07, + "loss": 0.3041, + "step": 43152 + }, + { + "epoch": 0.8638590696394165, + "grad_norm": 1.1115046739578247, + "learning_rate": 4.781365272736316e-07, + "loss": 0.2974, + "step": 43153 + }, + { + "epoch": 0.8638790881565448, + "grad_norm": 1.0262025594711304, + "learning_rate": 4.779981937829675e-07, + "loss": 0.2649, + "step": 43154 + }, + { + "epoch": 0.8638991066736732, + "grad_norm": 1.0755460262298584, + "learning_rate": 4.778598793018257e-07, + "loss": 0.2623, + "step": 43155 + }, + { + "epoch": 0.8639191251908015, + "grad_norm": 1.057110071182251, + "learning_rate": 4.777215838307869e-07, + "loss": 0.2725, + "step": 43156 + }, + { + "epoch": 0.8639391437079298, + "grad_norm": 1.1198234558105469, + "learning_rate": 4.775833073704345e-07, + "loss": 0.3249, + "step": 43157 + }, + { + "epoch": 0.8639591622250582, + "grad_norm": 1.1191890239715576, + "learning_rate": 4.774450499213485e-07, + "loss": 0.305, + "step": 43158 + }, + { + "epoch": 0.8639791807421865, + "grad_norm": 1.0049892663955688, + "learning_rate": 4.773068114841095e-07, + "loss": 0.288, + "step": 43159 + }, + { + "epoch": 0.8639991992593149, + "grad_norm": 0.9907408356666565, + "learning_rate": 4.771685920592989e-07, + "loss": 0.2454, + "step": 43160 + }, + { + "epoch": 0.8640192177764432, + "grad_norm": 1.5152097940444946, + "learning_rate": 4.770303916474994e-07, + "loss": 0.3114, + "step": 43161 + }, + { + "epoch": 0.8640392362935716, + "grad_norm": 1.0993411540985107, + "learning_rate": 4.768922102492906e-07, + "loss": 0.2699, + "step": 43162 + }, + { + "epoch": 0.8640592548106999, + "grad_norm": 1.3390902280807495, + "learning_rate": 4.767540478652538e-07, + "loss": 0.294, + "step": 43163 + }, + { + "epoch": 0.8640792733278282, + "grad_norm": 1.883368730545044, + "learning_rate": 4.766159044959695e-07, + "loss": 0.7274, + "step": 43164 + }, + { + "epoch": 0.8640992918449566, + "grad_norm": 1.314326286315918, + "learning_rate": 4.7647778014201787e-07, + "loss": 0.3516, + "step": 43165 + }, + { + "epoch": 0.8641193103620849, + "grad_norm": 1.0698155164718628, + "learning_rate": 4.7633967480398056e-07, + "loss": 0.2796, + "step": 43166 + }, + { + "epoch": 0.8641393288792133, + "grad_norm": 1.1363401412963867, + "learning_rate": 4.762015884824378e-07, + "loss": 0.3215, + "step": 43167 + }, + { + "epoch": 0.8641593473963416, + "grad_norm": 1.123076319694519, + "learning_rate": 4.7606352117797015e-07, + "loss": 0.3405, + "step": 43168 + }, + { + "epoch": 0.86417936591347, + "grad_norm": 1.1395715475082397, + "learning_rate": 4.759254728911572e-07, + "loss": 0.311, + "step": 43169 + }, + { + "epoch": 0.8641993844305983, + "grad_norm": 1.0589145421981812, + "learning_rate": 4.757874436225807e-07, + "loss": 0.317, + "step": 43170 + }, + { + "epoch": 0.8642194029477267, + "grad_norm": 1.1891990900039673, + "learning_rate": 4.756494333728201e-07, + "loss": 0.3072, + "step": 43171 + }, + { + "epoch": 0.864239421464855, + "grad_norm": 2.102682113647461, + "learning_rate": 4.7551144214245505e-07, + "loss": 0.7383, + "step": 43172 + }, + { + "epoch": 0.8642594399819833, + "grad_norm": 1.4911067485809326, + "learning_rate": 4.7537346993206615e-07, + "loss": 0.2996, + "step": 43173 + }, + { + "epoch": 0.8642794584991117, + "grad_norm": 1.2993770837783813, + "learning_rate": 4.75235516742234e-07, + "loss": 0.3119, + "step": 43174 + }, + { + "epoch": 0.86429947701624, + "grad_norm": 1.0061002969741821, + "learning_rate": 4.750975825735382e-07, + "loss": 0.2529, + "step": 43175 + }, + { + "epoch": 0.8643194955333684, + "grad_norm": 1.1182897090911865, + "learning_rate": 4.749596674265583e-07, + "loss": 0.3102, + "step": 43176 + }, + { + "epoch": 0.8643395140504967, + "grad_norm": 1.0809297561645508, + "learning_rate": 4.7482177130187437e-07, + "loss": 0.2882, + "step": 43177 + }, + { + "epoch": 0.8643595325676251, + "grad_norm": 1.1013518571853638, + "learning_rate": 4.746838942000648e-07, + "loss": 0.2837, + "step": 43178 + }, + { + "epoch": 0.8643795510847534, + "grad_norm": 1.15336275100708, + "learning_rate": 4.7454603612171137e-07, + "loss": 0.3092, + "step": 43179 + }, + { + "epoch": 0.8643995696018817, + "grad_norm": 1.0612099170684814, + "learning_rate": 4.744081970673925e-07, + "loss": 0.2943, + "step": 43180 + }, + { + "epoch": 0.8644195881190101, + "grad_norm": 1.926324725151062, + "learning_rate": 4.7427037703768774e-07, + "loss": 0.8295, + "step": 43181 + }, + { + "epoch": 0.8644396066361384, + "grad_norm": 1.060944676399231, + "learning_rate": 4.74132576033175e-07, + "loss": 0.2617, + "step": 43182 + }, + { + "epoch": 0.8644596251532668, + "grad_norm": 1.1052767038345337, + "learning_rate": 4.7399479405443595e-07, + "loss": 0.2959, + "step": 43183 + }, + { + "epoch": 0.8644796436703951, + "grad_norm": 1.0525411367416382, + "learning_rate": 4.7385703110204906e-07, + "loss": 0.2612, + "step": 43184 + }, + { + "epoch": 0.8644996621875235, + "grad_norm": 1.118072748184204, + "learning_rate": 4.737192871765922e-07, + "loss": 0.3208, + "step": 43185 + }, + { + "epoch": 0.8645196807046518, + "grad_norm": 1.1422114372253418, + "learning_rate": 4.7358156227864603e-07, + "loss": 0.2487, + "step": 43186 + }, + { + "epoch": 0.8645396992217802, + "grad_norm": 1.1519091129302979, + "learning_rate": 4.734438564087884e-07, + "loss": 0.2907, + "step": 43187 + }, + { + "epoch": 0.8645597177389085, + "grad_norm": 1.323992133140564, + "learning_rate": 4.7330616956759945e-07, + "loss": 0.2894, + "step": 43188 + }, + { + "epoch": 0.8645797362560368, + "grad_norm": 1.0367754697799683, + "learning_rate": 4.73168501755657e-07, + "loss": 0.3158, + "step": 43189 + }, + { + "epoch": 0.8645997547731652, + "grad_norm": 1.0511513948440552, + "learning_rate": 4.7303085297354003e-07, + "loss": 0.3305, + "step": 43190 + }, + { + "epoch": 0.8646197732902935, + "grad_norm": 0.9964761137962341, + "learning_rate": 4.7289322322182643e-07, + "loss": 0.2494, + "step": 43191 + }, + { + "epoch": 0.8646397918074219, + "grad_norm": 1.434326410293579, + "learning_rate": 4.7275561250109624e-07, + "loss": 0.2707, + "step": 43192 + }, + { + "epoch": 0.8646598103245502, + "grad_norm": 1.2823073863983154, + "learning_rate": 4.726180208119274e-07, + "loss": 0.3128, + "step": 43193 + }, + { + "epoch": 0.8646798288416786, + "grad_norm": 1.1608214378356934, + "learning_rate": 4.724804481548978e-07, + "loss": 0.2835, + "step": 43194 + }, + { + "epoch": 0.8646998473588069, + "grad_norm": 1.1894965171813965, + "learning_rate": 4.7234289453058633e-07, + "loss": 0.3251, + "step": 43195 + }, + { + "epoch": 0.8647198658759352, + "grad_norm": 1.0746690034866333, + "learning_rate": 4.7220535993956983e-07, + "loss": 0.3216, + "step": 43196 + }, + { + "epoch": 0.8647398843930636, + "grad_norm": 1.1222286224365234, + "learning_rate": 4.7206784438242837e-07, + "loss": 0.3048, + "step": 43197 + }, + { + "epoch": 0.8647599029101919, + "grad_norm": 1.174290418624878, + "learning_rate": 4.7193034785973866e-07, + "loss": 0.2751, + "step": 43198 + }, + { + "epoch": 0.8647799214273203, + "grad_norm": 1.0438129901885986, + "learning_rate": 4.7179287037208034e-07, + "loss": 0.2832, + "step": 43199 + }, + { + "epoch": 0.8647999399444486, + "grad_norm": 1.0702224969863892, + "learning_rate": 4.7165541192002893e-07, + "loss": 0.2797, + "step": 43200 + }, + { + "epoch": 0.864819958461577, + "grad_norm": 1.0850991010665894, + "learning_rate": 4.715179725041652e-07, + "loss": 0.278, + "step": 43201 + }, + { + "epoch": 0.8648399769787053, + "grad_norm": 1.264815330505371, + "learning_rate": 4.7138055212506475e-07, + "loss": 0.3008, + "step": 43202 + }, + { + "epoch": 0.8648599954958337, + "grad_norm": 1.043457269668579, + "learning_rate": 4.7124315078330597e-07, + "loss": 0.2993, + "step": 43203 + }, + { + "epoch": 0.864880014012962, + "grad_norm": 1.0369266271591187, + "learning_rate": 4.7110576847946564e-07, + "loss": 0.2933, + "step": 43204 + }, + { + "epoch": 0.8649000325300903, + "grad_norm": 1.1041213274002075, + "learning_rate": 4.7096840521412334e-07, + "loss": 0.3095, + "step": 43205 + }, + { + "epoch": 0.8649200510472187, + "grad_norm": 1.213514804840088, + "learning_rate": 4.708310609878547e-07, + "loss": 0.2971, + "step": 43206 + }, + { + "epoch": 0.864940069564347, + "grad_norm": 1.033410906791687, + "learning_rate": 4.7069373580123757e-07, + "loss": 0.2848, + "step": 43207 + }, + { + "epoch": 0.8649600880814754, + "grad_norm": 1.220672845840454, + "learning_rate": 4.705564296548493e-07, + "loss": 0.2707, + "step": 43208 + }, + { + "epoch": 0.8649801065986037, + "grad_norm": 1.12308669090271, + "learning_rate": 4.7041914254926657e-07, + "loss": 0.2854, + "step": 43209 + }, + { + "epoch": 0.8650001251157321, + "grad_norm": 1.1511067152023315, + "learning_rate": 4.7028187448506736e-07, + "loss": 0.3002, + "step": 43210 + }, + { + "epoch": 0.8650201436328604, + "grad_norm": 1.1838895082473755, + "learning_rate": 4.701446254628278e-07, + "loss": 0.2717, + "step": 43211 + }, + { + "epoch": 0.8650401621499887, + "grad_norm": 1.2259948253631592, + "learning_rate": 4.7000739548312635e-07, + "loss": 0.2641, + "step": 43212 + }, + { + "epoch": 0.8650601806671171, + "grad_norm": 1.268123745918274, + "learning_rate": 4.6987018454653757e-07, + "loss": 0.3162, + "step": 43213 + }, + { + "epoch": 0.8650801991842454, + "grad_norm": 1.248238205909729, + "learning_rate": 4.6973299265364094e-07, + "loss": 0.2975, + "step": 43214 + }, + { + "epoch": 0.8651002177013738, + "grad_norm": 1.1461156606674194, + "learning_rate": 4.695958198050116e-07, + "loss": 0.3221, + "step": 43215 + }, + { + "epoch": 0.8651202362185021, + "grad_norm": 1.1004607677459717, + "learning_rate": 4.694586660012268e-07, + "loss": 0.288, + "step": 43216 + }, + { + "epoch": 0.8651402547356305, + "grad_norm": 1.0649244785308838, + "learning_rate": 4.6932153124286226e-07, + "loss": 0.3508, + "step": 43217 + }, + { + "epoch": 0.8651602732527588, + "grad_norm": 0.9617221355438232, + "learning_rate": 4.691844155304948e-07, + "loss": 0.2838, + "step": 43218 + }, + { + "epoch": 0.8651802917698872, + "grad_norm": 1.1479811668395996, + "learning_rate": 4.6904731886470157e-07, + "loss": 0.3337, + "step": 43219 + }, + { + "epoch": 0.8652003102870155, + "grad_norm": 1.4616119861602783, + "learning_rate": 4.689102412460583e-07, + "loss": 0.31, + "step": 43220 + }, + { + "epoch": 0.8652203288041438, + "grad_norm": 1.0212945938110352, + "learning_rate": 4.6877318267514125e-07, + "loss": 0.3145, + "step": 43221 + }, + { + "epoch": 0.8652403473212722, + "grad_norm": 1.1473212242126465, + "learning_rate": 4.6863614315252605e-07, + "loss": 0.3136, + "step": 43222 + }, + { + "epoch": 0.8652603658384005, + "grad_norm": 1.2508512735366821, + "learning_rate": 4.6849912267878995e-07, + "loss": 0.3174, + "step": 43223 + }, + { + "epoch": 0.8652803843555289, + "grad_norm": 1.328832745552063, + "learning_rate": 4.68362121254507e-07, + "loss": 0.3, + "step": 43224 + }, + { + "epoch": 0.8653004028726572, + "grad_norm": 1.0656795501708984, + "learning_rate": 4.682251388802561e-07, + "loss": 0.2895, + "step": 43225 + }, + { + "epoch": 0.8653204213897856, + "grad_norm": 1.211018443107605, + "learning_rate": 4.6808817555661023e-07, + "loss": 0.2722, + "step": 43226 + }, + { + "epoch": 0.8653404399069139, + "grad_norm": 1.1173115968704224, + "learning_rate": 4.6795123128414723e-07, + "loss": 0.2974, + "step": 43227 + }, + { + "epoch": 0.8653604584240422, + "grad_norm": 1.1125156879425049, + "learning_rate": 4.6781430606344214e-07, + "loss": 0.3049, + "step": 43228 + }, + { + "epoch": 0.8653804769411706, + "grad_norm": 1.1195333003997803, + "learning_rate": 4.676773998950707e-07, + "loss": 0.3215, + "step": 43229 + }, + { + "epoch": 0.8654004954582989, + "grad_norm": 1.0863332748413086, + "learning_rate": 4.6754051277960735e-07, + "loss": 0.2468, + "step": 43230 + }, + { + "epoch": 0.8654205139754273, + "grad_norm": 1.0121134519577026, + "learning_rate": 4.674036447176278e-07, + "loss": 0.2827, + "step": 43231 + }, + { + "epoch": 0.8654405324925556, + "grad_norm": 1.8895467519760132, + "learning_rate": 4.672667957097088e-07, + "loss": 0.7973, + "step": 43232 + }, + { + "epoch": 0.865460551009684, + "grad_norm": 1.8840471506118774, + "learning_rate": 4.671299657564249e-07, + "loss": 0.7562, + "step": 43233 + }, + { + "epoch": 0.8654805695268123, + "grad_norm": 1.904219388961792, + "learning_rate": 4.669931548583506e-07, + "loss": 0.8106, + "step": 43234 + }, + { + "epoch": 0.8655005880439407, + "grad_norm": 1.0612115859985352, + "learning_rate": 4.668563630160611e-07, + "loss": 0.3091, + "step": 43235 + }, + { + "epoch": 0.865520606561069, + "grad_norm": 1.2334221601486206, + "learning_rate": 4.667195902301325e-07, + "loss": 0.2887, + "step": 43236 + }, + { + "epoch": 0.8655406250781973, + "grad_norm": 1.0541800260543823, + "learning_rate": 4.665828365011388e-07, + "loss": 0.2893, + "step": 43237 + }, + { + "epoch": 0.8655606435953257, + "grad_norm": 1.0925579071044922, + "learning_rate": 4.664461018296557e-07, + "loss": 0.2701, + "step": 43238 + }, + { + "epoch": 0.865580662112454, + "grad_norm": 1.3482041358947754, + "learning_rate": 4.6630938621625767e-07, + "loss": 0.3187, + "step": 43239 + }, + { + "epoch": 0.8656006806295824, + "grad_norm": 1.1415382623672485, + "learning_rate": 4.661726896615182e-07, + "loss": 0.3097, + "step": 43240 + }, + { + "epoch": 0.8656206991467107, + "grad_norm": 1.2615817785263062, + "learning_rate": 4.660360121660146e-07, + "loss": 0.3245, + "step": 43241 + }, + { + "epoch": 0.8656407176638391, + "grad_norm": 1.1428592205047607, + "learning_rate": 4.6589935373031916e-07, + "loss": 0.3284, + "step": 43242 + }, + { + "epoch": 0.8656607361809674, + "grad_norm": 1.2618762254714966, + "learning_rate": 4.657627143550075e-07, + "loss": 0.3398, + "step": 43243 + }, + { + "epoch": 0.8656807546980957, + "grad_norm": 1.039379358291626, + "learning_rate": 4.6562609404065264e-07, + "loss": 0.307, + "step": 43244 + }, + { + "epoch": 0.8657007732152241, + "grad_norm": 1.062949299812317, + "learning_rate": 4.654894927878306e-07, + "loss": 0.2754, + "step": 43245 + }, + { + "epoch": 0.8657207917323524, + "grad_norm": 1.139896035194397, + "learning_rate": 4.6535291059711497e-07, + "loss": 0.2985, + "step": 43246 + }, + { + "epoch": 0.8657408102494808, + "grad_norm": 1.086503028869629, + "learning_rate": 4.652163474690802e-07, + "loss": 0.315, + "step": 43247 + }, + { + "epoch": 0.8657608287666091, + "grad_norm": 1.8879410028457642, + "learning_rate": 4.6507980340429857e-07, + "loss": 0.7533, + "step": 43248 + }, + { + "epoch": 0.8657808472837375, + "grad_norm": 1.1131778955459595, + "learning_rate": 4.64943278403347e-07, + "loss": 0.3435, + "step": 43249 + }, + { + "epoch": 0.8658008658008658, + "grad_norm": 1.0652157068252563, + "learning_rate": 4.648067724667965e-07, + "loss": 0.2877, + "step": 43250 + }, + { + "epoch": 0.8658208843179942, + "grad_norm": 1.2459471225738525, + "learning_rate": 4.646702855952234e-07, + "loss": 0.2805, + "step": 43251 + }, + { + "epoch": 0.8658409028351225, + "grad_norm": 1.2998559474945068, + "learning_rate": 4.645338177892006e-07, + "loss": 0.3389, + "step": 43252 + }, + { + "epoch": 0.8658609213522508, + "grad_norm": 1.9843711853027344, + "learning_rate": 4.643973690493003e-07, + "loss": 0.761, + "step": 43253 + }, + { + "epoch": 0.8658809398693792, + "grad_norm": 1.0618586540222168, + "learning_rate": 4.6426093937609885e-07, + "loss": 0.2483, + "step": 43254 + }, + { + "epoch": 0.8659009583865075, + "grad_norm": 1.1817889213562012, + "learning_rate": 4.641245287701679e-07, + "loss": 0.3086, + "step": 43255 + }, + { + "epoch": 0.8659209769036359, + "grad_norm": 1.0093327760696411, + "learning_rate": 4.6398813723208145e-07, + "loss": 0.3028, + "step": 43256 + }, + { + "epoch": 0.8659409954207642, + "grad_norm": 1.1363345384597778, + "learning_rate": 4.6385176476241135e-07, + "loss": 0.2811, + "step": 43257 + }, + { + "epoch": 0.8659610139378926, + "grad_norm": 1.1605948209762573, + "learning_rate": 4.637154113617337e-07, + "loss": 0.2981, + "step": 43258 + }, + { + "epoch": 0.8659810324550209, + "grad_norm": 1.0728800296783447, + "learning_rate": 4.635790770306203e-07, + "loss": 0.3026, + "step": 43259 + }, + { + "epoch": 0.8660010509721492, + "grad_norm": 1.133091688156128, + "learning_rate": 4.6344276176964355e-07, + "loss": 0.2746, + "step": 43260 + }, + { + "epoch": 0.8660210694892776, + "grad_norm": 1.1233880519866943, + "learning_rate": 4.6330646557937676e-07, + "loss": 0.325, + "step": 43261 + }, + { + "epoch": 0.8660410880064059, + "grad_norm": 1.188385248184204, + "learning_rate": 4.63170188460394e-07, + "loss": 0.2979, + "step": 43262 + }, + { + "epoch": 0.8660611065235343, + "grad_norm": 1.213526964187622, + "learning_rate": 4.6303393041326695e-07, + "loss": 0.2999, + "step": 43263 + }, + { + "epoch": 0.8660811250406626, + "grad_norm": 1.1136584281921387, + "learning_rate": 4.6289769143856913e-07, + "loss": 0.3142, + "step": 43264 + }, + { + "epoch": 0.866101143557791, + "grad_norm": 2.2801005840301514, + "learning_rate": 4.6276147153687334e-07, + "loss": 0.7788, + "step": 43265 + }, + { + "epoch": 0.8661211620749193, + "grad_norm": 1.2515738010406494, + "learning_rate": 4.6262527070875143e-07, + "loss": 0.3093, + "step": 43266 + }, + { + "epoch": 0.8661411805920477, + "grad_norm": 1.1373612880706787, + "learning_rate": 4.6248908895477674e-07, + "loss": 0.277, + "step": 43267 + }, + { + "epoch": 0.866161199109176, + "grad_norm": 1.1641684770584106, + "learning_rate": 4.6235292627552164e-07, + "loss": 0.3149, + "step": 43268 + }, + { + "epoch": 0.8661812176263043, + "grad_norm": 1.3397754430770874, + "learning_rate": 4.6221678267155847e-07, + "loss": 0.2681, + "step": 43269 + }, + { + "epoch": 0.8662012361434327, + "grad_norm": 1.0371971130371094, + "learning_rate": 4.620806581434589e-07, + "loss": 0.2739, + "step": 43270 + }, + { + "epoch": 0.866221254660561, + "grad_norm": 1.2500321865081787, + "learning_rate": 4.619445526917954e-07, + "loss": 0.2889, + "step": 43271 + }, + { + "epoch": 0.8662412731776894, + "grad_norm": 1.0515676736831665, + "learning_rate": 4.618084663171407e-07, + "loss": 0.3425, + "step": 43272 + }, + { + "epoch": 0.8662612916948177, + "grad_norm": 1.0853885412216187, + "learning_rate": 4.616723990200672e-07, + "loss": 0.3594, + "step": 43273 + }, + { + "epoch": 0.8662813102119461, + "grad_norm": 1.0223830938339233, + "learning_rate": 4.6153635080114503e-07, + "loss": 0.26, + "step": 43274 + }, + { + "epoch": 0.8663013287290744, + "grad_norm": 1.078554391860962, + "learning_rate": 4.6140032166094806e-07, + "loss": 0.2494, + "step": 43275 + }, + { + "epoch": 0.8663213472462027, + "grad_norm": 1.234878659248352, + "learning_rate": 4.6126431160004704e-07, + "loss": 0.2823, + "step": 43276 + }, + { + "epoch": 0.8663413657633311, + "grad_norm": 1.130197525024414, + "learning_rate": 4.611283206190148e-07, + "loss": 0.2952, + "step": 43277 + }, + { + "epoch": 0.8663613842804594, + "grad_norm": 1.196658968925476, + "learning_rate": 4.60992348718422e-07, + "loss": 0.3697, + "step": 43278 + }, + { + "epoch": 0.8663814027975878, + "grad_norm": 1.1484190225601196, + "learning_rate": 4.60856395898841e-07, + "loss": 0.2958, + "step": 43279 + }, + { + "epoch": 0.8664014213147161, + "grad_norm": 1.17378568649292, + "learning_rate": 4.6072046216084186e-07, + "loss": 0.3232, + "step": 43280 + }, + { + "epoch": 0.8664214398318445, + "grad_norm": 1.0424060821533203, + "learning_rate": 4.605845475049975e-07, + "loss": 0.2898, + "step": 43281 + }, + { + "epoch": 0.8664414583489728, + "grad_norm": 1.120621919631958, + "learning_rate": 4.60448651931879e-07, + "loss": 0.296, + "step": 43282 + }, + { + "epoch": 0.8664614768661012, + "grad_norm": 1.2422562837600708, + "learning_rate": 4.603127754420578e-07, + "loss": 0.3821, + "step": 43283 + }, + { + "epoch": 0.8664814953832295, + "grad_norm": 1.2461471557617188, + "learning_rate": 4.6017691803610375e-07, + "loss": 0.2805, + "step": 43284 + }, + { + "epoch": 0.8665015139003578, + "grad_norm": 1.29631769657135, + "learning_rate": 4.600410797145899e-07, + "loss": 0.2792, + "step": 43285 + }, + { + "epoch": 0.8665215324174862, + "grad_norm": 1.1427838802337646, + "learning_rate": 4.5990526047808626e-07, + "loss": 0.2636, + "step": 43286 + }, + { + "epoch": 0.8665415509346145, + "grad_norm": 1.0232422351837158, + "learning_rate": 4.597694603271624e-07, + "loss": 0.3039, + "step": 43287 + }, + { + "epoch": 0.8665615694517429, + "grad_norm": 1.123066782951355, + "learning_rate": 4.5963367926239233e-07, + "loss": 0.2514, + "step": 43288 + }, + { + "epoch": 0.8665815879688712, + "grad_norm": 1.046257495880127, + "learning_rate": 4.594979172843439e-07, + "loss": 0.3383, + "step": 43289 + }, + { + "epoch": 0.8666016064859996, + "grad_norm": 1.2324711084365845, + "learning_rate": 4.5936217439359e-07, + "loss": 0.279, + "step": 43290 + }, + { + "epoch": 0.8666216250031279, + "grad_norm": 1.1386600732803345, + "learning_rate": 4.592264505907007e-07, + "loss": 0.3089, + "step": 43291 + }, + { + "epoch": 0.8666416435202562, + "grad_norm": 1.1273878812789917, + "learning_rate": 4.5909074587624613e-07, + "loss": 0.303, + "step": 43292 + }, + { + "epoch": 0.8666616620373846, + "grad_norm": 1.070641040802002, + "learning_rate": 4.589550602507958e-07, + "loss": 0.2814, + "step": 43293 + }, + { + "epoch": 0.8666816805545129, + "grad_norm": 1.1031274795532227, + "learning_rate": 4.588193937149216e-07, + "loss": 0.3621, + "step": 43294 + }, + { + "epoch": 0.8667016990716413, + "grad_norm": 1.278655767440796, + "learning_rate": 4.5868374626919396e-07, + "loss": 0.2649, + "step": 43295 + }, + { + "epoch": 0.8667217175887696, + "grad_norm": 1.1693685054779053, + "learning_rate": 4.5854811791418263e-07, + "loss": 0.2435, + "step": 43296 + }, + { + "epoch": 0.866741736105898, + "grad_norm": 0.9707403779029846, + "learning_rate": 4.584125086504565e-07, + "loss": 0.2906, + "step": 43297 + }, + { + "epoch": 0.8667617546230263, + "grad_norm": 1.082209587097168, + "learning_rate": 4.582769184785879e-07, + "loss": 0.2416, + "step": 43298 + }, + { + "epoch": 0.8667817731401546, + "grad_norm": 1.1362419128417969, + "learning_rate": 4.5814134739914587e-07, + "loss": 0.2793, + "step": 43299 + }, + { + "epoch": 0.866801791657283, + "grad_norm": 1.2871930599212646, + "learning_rate": 4.5800579541269875e-07, + "loss": 0.2709, + "step": 43300 + }, + { + "epoch": 0.8668218101744113, + "grad_norm": 1.0712119340896606, + "learning_rate": 4.5787026251981894e-07, + "loss": 0.2653, + "step": 43301 + }, + { + "epoch": 0.8668418286915397, + "grad_norm": 1.1607133150100708, + "learning_rate": 4.5773474872107425e-07, + "loss": 0.2689, + "step": 43302 + }, + { + "epoch": 0.866861847208668, + "grad_norm": 1.0521764755249023, + "learning_rate": 4.5759925401703655e-07, + "loss": 0.3005, + "step": 43303 + }, + { + "epoch": 0.8668818657257964, + "grad_norm": 1.2496892213821411, + "learning_rate": 4.574637784082736e-07, + "loss": 0.3411, + "step": 43304 + }, + { + "epoch": 0.8669018842429247, + "grad_norm": 1.068479299545288, + "learning_rate": 4.5732832189535504e-07, + "loss": 0.251, + "step": 43305 + }, + { + "epoch": 0.8669219027600531, + "grad_norm": 1.0735398530960083, + "learning_rate": 4.5719288447885034e-07, + "loss": 0.2989, + "step": 43306 + }, + { + "epoch": 0.8669419212771814, + "grad_norm": 1.730351448059082, + "learning_rate": 4.5705746615932966e-07, + "loss": 0.7134, + "step": 43307 + }, + { + "epoch": 0.8669619397943097, + "grad_norm": 0.9545879364013672, + "learning_rate": 4.569220669373614e-07, + "loss": 0.2422, + "step": 43308 + }, + { + "epoch": 0.8669819583114381, + "grad_norm": 1.3150047063827515, + "learning_rate": 4.567866868135157e-07, + "loss": 0.3146, + "step": 43309 + }, + { + "epoch": 0.8670019768285664, + "grad_norm": 1.4149116277694702, + "learning_rate": 4.566513257883609e-07, + "loss": 0.3265, + "step": 43310 + }, + { + "epoch": 0.8670219953456948, + "grad_norm": 1.1927883625030518, + "learning_rate": 4.565159838624655e-07, + "loss": 0.2794, + "step": 43311 + }, + { + "epoch": 0.8670420138628231, + "grad_norm": 1.0656989812850952, + "learning_rate": 4.563806610363997e-07, + "loss": 0.3368, + "step": 43312 + }, + { + "epoch": 0.8670620323799515, + "grad_norm": 1.185996651649475, + "learning_rate": 4.562453573107306e-07, + "loss": 0.3557, + "step": 43313 + }, + { + "epoch": 0.8670820508970798, + "grad_norm": 1.1203473806381226, + "learning_rate": 4.561100726860296e-07, + "loss": 0.3073, + "step": 43314 + }, + { + "epoch": 0.8671020694142081, + "grad_norm": 0.999326229095459, + "learning_rate": 4.5597480716286336e-07, + "loss": 0.2431, + "step": 43315 + }, + { + "epoch": 0.8671220879313365, + "grad_norm": 1.0171940326690674, + "learning_rate": 4.5583956074180147e-07, + "loss": 0.2717, + "step": 43316 + }, + { + "epoch": 0.8671421064484648, + "grad_norm": 1.1537368297576904, + "learning_rate": 4.557043334234124e-07, + "loss": 0.287, + "step": 43317 + }, + { + "epoch": 0.8671621249655932, + "grad_norm": 1.0415118932724, + "learning_rate": 4.5556912520826444e-07, + "loss": 0.304, + "step": 43318 + }, + { + "epoch": 0.8671821434827215, + "grad_norm": 1.545345425605774, + "learning_rate": 4.5543393609692453e-07, + "loss": 0.3201, + "step": 43319 + }, + { + "epoch": 0.8672021619998499, + "grad_norm": 1.120516300201416, + "learning_rate": 4.552987660899638e-07, + "loss": 0.2901, + "step": 43320 + }, + { + "epoch": 0.8672221805169782, + "grad_norm": 1.2570394277572632, + "learning_rate": 4.551636151879485e-07, + "loss": 0.2952, + "step": 43321 + }, + { + "epoch": 0.8672421990341066, + "grad_norm": 1.0490624904632568, + "learning_rate": 4.550284833914481e-07, + "loss": 0.2459, + "step": 43322 + }, + { + "epoch": 0.8672622175512349, + "grad_norm": 1.176782488822937, + "learning_rate": 4.5489337070102947e-07, + "loss": 0.3273, + "step": 43323 + }, + { + "epoch": 0.8672822360683632, + "grad_norm": 1.0023839473724365, + "learning_rate": 4.5475827711725983e-07, + "loss": 0.2573, + "step": 43324 + }, + { + "epoch": 0.8673022545854916, + "grad_norm": 1.2280479669570923, + "learning_rate": 4.546232026407094e-07, + "loss": 0.3489, + "step": 43325 + }, + { + "epoch": 0.8673222731026199, + "grad_norm": 1.0587811470031738, + "learning_rate": 4.544881472719437e-07, + "loss": 0.2603, + "step": 43326 + }, + { + "epoch": 0.8673422916197483, + "grad_norm": 1.1747348308563232, + "learning_rate": 4.543531110115329e-07, + "loss": 0.2578, + "step": 43327 + }, + { + "epoch": 0.8673623101368766, + "grad_norm": 1.2522590160369873, + "learning_rate": 4.542180938600421e-07, + "loss": 0.2868, + "step": 43328 + }, + { + "epoch": 0.867382328654005, + "grad_norm": 1.114154577255249, + "learning_rate": 4.5408309581804145e-07, + "loss": 0.2892, + "step": 43329 + }, + { + "epoch": 0.8674023471711333, + "grad_norm": 1.900081992149353, + "learning_rate": 4.5394811688609705e-07, + "loss": 0.7892, + "step": 43330 + }, + { + "epoch": 0.8674223656882616, + "grad_norm": 1.2771317958831787, + "learning_rate": 4.5381315706477634e-07, + "loss": 0.2842, + "step": 43331 + }, + { + "epoch": 0.86744238420539, + "grad_norm": 1.2393161058425903, + "learning_rate": 4.536782163546466e-07, + "loss": 0.2893, + "step": 43332 + }, + { + "epoch": 0.8674624027225183, + "grad_norm": 1.0421959161758423, + "learning_rate": 4.5354329475627454e-07, + "loss": 0.2737, + "step": 43333 + }, + { + "epoch": 0.8674824212396467, + "grad_norm": 1.0720163583755493, + "learning_rate": 4.5340839227022926e-07, + "loss": 0.2988, + "step": 43334 + }, + { + "epoch": 0.867502439756775, + "grad_norm": 1.072218418121338, + "learning_rate": 4.5327350889707634e-07, + "loss": 0.3385, + "step": 43335 + }, + { + "epoch": 0.8675224582739034, + "grad_norm": 1.132986068725586, + "learning_rate": 4.5313864463738313e-07, + "loss": 0.2776, + "step": 43336 + }, + { + "epoch": 0.8675424767910317, + "grad_norm": 1.1051981449127197, + "learning_rate": 4.530037994917158e-07, + "loss": 0.3034, + "step": 43337 + }, + { + "epoch": 0.8675624953081601, + "grad_norm": 1.150586724281311, + "learning_rate": 4.5286897346064287e-07, + "loss": 0.313, + "step": 43338 + }, + { + "epoch": 0.8675825138252884, + "grad_norm": 1.0693488121032715, + "learning_rate": 4.5273416654472877e-07, + "loss": 0.3419, + "step": 43339 + }, + { + "epoch": 0.8676025323424167, + "grad_norm": 1.1325353384017944, + "learning_rate": 4.5259937874454316e-07, + "loss": 0.358, + "step": 43340 + }, + { + "epoch": 0.8676225508595451, + "grad_norm": 1.2809158563613892, + "learning_rate": 4.524646100606511e-07, + "loss": 0.2756, + "step": 43341 + }, + { + "epoch": 0.8676425693766734, + "grad_norm": 1.9106154441833496, + "learning_rate": 4.523298604936183e-07, + "loss": 0.6886, + "step": 43342 + }, + { + "epoch": 0.8676625878938018, + "grad_norm": 1.1739228963851929, + "learning_rate": 4.521951300440125e-07, + "loss": 0.2727, + "step": 43343 + }, + { + "epoch": 0.8676826064109301, + "grad_norm": 1.4033544063568115, + "learning_rate": 4.5206041871240005e-07, + "loss": 0.3491, + "step": 43344 + }, + { + "epoch": 0.8677026249280585, + "grad_norm": 1.1706392765045166, + "learning_rate": 4.5192572649934705e-07, + "loss": 0.2561, + "step": 43345 + }, + { + "epoch": 0.8677226434451868, + "grad_norm": 1.0261560678482056, + "learning_rate": 4.517910534054187e-07, + "loss": 0.3087, + "step": 43346 + }, + { + "epoch": 0.8677426619623151, + "grad_norm": 1.8825567960739136, + "learning_rate": 4.5165639943118236e-07, + "loss": 0.7423, + "step": 43347 + }, + { + "epoch": 0.8677626804794435, + "grad_norm": 1.1263033151626587, + "learning_rate": 4.515217645772041e-07, + "loss": 0.2918, + "step": 43348 + }, + { + "epoch": 0.8677826989965718, + "grad_norm": 1.23583984375, + "learning_rate": 4.513871488440491e-07, + "loss": 0.2943, + "step": 43349 + }, + { + "epoch": 0.8678027175137002, + "grad_norm": 1.0663819313049316, + "learning_rate": 4.51252552232283e-07, + "loss": 0.288, + "step": 43350 + }, + { + "epoch": 0.8678227360308285, + "grad_norm": 1.1521459817886353, + "learning_rate": 4.5111797474247376e-07, + "loss": 0.2957, + "step": 43351 + }, + { + "epoch": 0.8678427545479569, + "grad_norm": 1.141484022140503, + "learning_rate": 4.509834163751842e-07, + "loss": 0.2619, + "step": 43352 + }, + { + "epoch": 0.8678627730650852, + "grad_norm": 1.0863981246948242, + "learning_rate": 4.5084887713098215e-07, + "loss": 0.3092, + "step": 43353 + }, + { + "epoch": 0.8678827915822136, + "grad_norm": 1.078707218170166, + "learning_rate": 4.507143570104322e-07, + "loss": 0.2822, + "step": 43354 + }, + { + "epoch": 0.8679028100993419, + "grad_norm": 1.1533238887786865, + "learning_rate": 4.505798560140995e-07, + "loss": 0.2737, + "step": 43355 + }, + { + "epoch": 0.8679228286164702, + "grad_norm": 1.1953186988830566, + "learning_rate": 4.504453741425513e-07, + "loss": 0.2634, + "step": 43356 + }, + { + "epoch": 0.8679428471335986, + "grad_norm": 1.2331645488739014, + "learning_rate": 4.503109113963511e-07, + "loss": 0.2669, + "step": 43357 + }, + { + "epoch": 0.8679628656507269, + "grad_norm": 1.9331809282302856, + "learning_rate": 4.5017646777606505e-07, + "loss": 0.6927, + "step": 43358 + }, + { + "epoch": 0.8679828841678553, + "grad_norm": 1.1460529565811157, + "learning_rate": 4.500420432822572e-07, + "loss": 0.2818, + "step": 43359 + }, + { + "epoch": 0.8680029026849836, + "grad_norm": 1.1534450054168701, + "learning_rate": 4.4990763791549374e-07, + "loss": 0.3258, + "step": 43360 + }, + { + "epoch": 0.868022921202112, + "grad_norm": 1.167515516281128, + "learning_rate": 4.497732516763398e-07, + "loss": 0.3166, + "step": 43361 + }, + { + "epoch": 0.8680429397192403, + "grad_norm": 1.1208480596542358, + "learning_rate": 4.496388845653599e-07, + "loss": 0.2736, + "step": 43362 + }, + { + "epoch": 0.8680629582363686, + "grad_norm": 1.269972801208496, + "learning_rate": 4.49504536583118e-07, + "loss": 0.3041, + "step": 43363 + }, + { + "epoch": 0.868082976753497, + "grad_norm": 1.0660747289657593, + "learning_rate": 4.493702077301809e-07, + "loss": 0.2871, + "step": 43364 + }, + { + "epoch": 0.8681029952706253, + "grad_norm": 1.0620933771133423, + "learning_rate": 4.4923589800711096e-07, + "loss": 0.2642, + "step": 43365 + }, + { + "epoch": 0.8681230137877537, + "grad_norm": 1.3108831644058228, + "learning_rate": 4.4910160741447495e-07, + "loss": 0.2759, + "step": 43366 + }, + { + "epoch": 0.868143032304882, + "grad_norm": 1.0755280256271362, + "learning_rate": 4.4896733595283625e-07, + "loss": 0.2698, + "step": 43367 + }, + { + "epoch": 0.8681630508220104, + "grad_norm": 1.0879559516906738, + "learning_rate": 4.4883308362275836e-07, + "loss": 0.2753, + "step": 43368 + }, + { + "epoch": 0.8681830693391387, + "grad_norm": 1.932428002357483, + "learning_rate": 4.48698850424808e-07, + "loss": 0.7002, + "step": 43369 + }, + { + "epoch": 0.8682030878562671, + "grad_norm": 1.0934295654296875, + "learning_rate": 4.4856463635954804e-07, + "loss": 0.3436, + "step": 43370 + }, + { + "epoch": 0.8682231063733954, + "grad_norm": 1.9698225259780884, + "learning_rate": 4.484304414275431e-07, + "loss": 0.8064, + "step": 43371 + }, + { + "epoch": 0.8682431248905237, + "grad_norm": 1.0980963706970215, + "learning_rate": 4.4829626562935603e-07, + "loss": 0.2742, + "step": 43372 + }, + { + "epoch": 0.8682631434076521, + "grad_norm": 1.153314232826233, + "learning_rate": 4.48162108965553e-07, + "loss": 0.2874, + "step": 43373 + }, + { + "epoch": 0.8682831619247804, + "grad_norm": 1.172562599182129, + "learning_rate": 4.4802797143669696e-07, + "loss": 0.2785, + "step": 43374 + }, + { + "epoch": 0.8683031804419088, + "grad_norm": 1.0715669393539429, + "learning_rate": 4.4789385304335133e-07, + "loss": 0.2835, + "step": 43375 + }, + { + "epoch": 0.8683231989590371, + "grad_norm": 1.070934534072876, + "learning_rate": 4.477597537860795e-07, + "loss": 0.312, + "step": 43376 + }, + { + "epoch": 0.8683432174761655, + "grad_norm": 1.2157504558563232, + "learning_rate": 4.4762567366544715e-07, + "loss": 0.3573, + "step": 43377 + }, + { + "epoch": 0.8683632359932938, + "grad_norm": 1.225344181060791, + "learning_rate": 4.474916126820161e-07, + "loss": 0.2617, + "step": 43378 + }, + { + "epoch": 0.8683832545104221, + "grad_norm": 0.9743168950080872, + "learning_rate": 4.473575708363509e-07, + "loss": 0.263, + "step": 43379 + }, + { + "epoch": 0.8684032730275505, + "grad_norm": 1.0765600204467773, + "learning_rate": 4.4722354812901493e-07, + "loss": 0.301, + "step": 43380 + }, + { + "epoch": 0.8684232915446788, + "grad_norm": 1.06340754032135, + "learning_rate": 4.4708954456057054e-07, + "loss": 0.2708, + "step": 43381 + }, + { + "epoch": 0.8684433100618072, + "grad_norm": 1.0069080591201782, + "learning_rate": 4.4695556013158335e-07, + "loss": 0.2942, + "step": 43382 + }, + { + "epoch": 0.8684633285789355, + "grad_norm": 1.1368461847305298, + "learning_rate": 4.4682159484261465e-07, + "loss": 0.3241, + "step": 43383 + }, + { + "epoch": 0.8684833470960639, + "grad_norm": 1.1351367235183716, + "learning_rate": 4.4668764869422786e-07, + "loss": 0.3056, + "step": 43384 + }, + { + "epoch": 0.8685033656131922, + "grad_norm": 1.734864354133606, + "learning_rate": 4.465537216869864e-07, + "loss": 0.7036, + "step": 43385 + }, + { + "epoch": 0.8685233841303206, + "grad_norm": 1.0574829578399658, + "learning_rate": 4.464198138214526e-07, + "loss": 0.327, + "step": 43386 + }, + { + "epoch": 0.8685434026474489, + "grad_norm": 1.085240364074707, + "learning_rate": 4.46285925098191e-07, + "loss": 0.3121, + "step": 43387 + }, + { + "epoch": 0.8685634211645772, + "grad_norm": 1.2215083837509155, + "learning_rate": 4.4615205551776286e-07, + "loss": 0.2973, + "step": 43388 + }, + { + "epoch": 0.8685834396817056, + "grad_norm": 1.1023975610733032, + "learning_rate": 4.46018205080731e-07, + "loss": 0.3402, + "step": 43389 + }, + { + "epoch": 0.8686034581988339, + "grad_norm": 2.0655887126922607, + "learning_rate": 4.458843737876589e-07, + "loss": 0.7587, + "step": 43390 + }, + { + "epoch": 0.8686234767159623, + "grad_norm": 1.0392755270004272, + "learning_rate": 4.4575056163910833e-07, + "loss": 0.2963, + "step": 43391 + }, + { + "epoch": 0.8686434952330906, + "grad_norm": 1.1563289165496826, + "learning_rate": 4.456167686356433e-07, + "loss": 0.2803, + "step": 43392 + }, + { + "epoch": 0.868663513750219, + "grad_norm": 1.1276062726974487, + "learning_rate": 4.4548299477782554e-07, + "loss": 0.286, + "step": 43393 + }, + { + "epoch": 0.8686835322673473, + "grad_norm": 1.3188399076461792, + "learning_rate": 4.4534924006621685e-07, + "loss": 0.307, + "step": 43394 + }, + { + "epoch": 0.8687035507844756, + "grad_norm": 1.1258710622787476, + "learning_rate": 4.452155045013784e-07, + "loss": 0.3035, + "step": 43395 + }, + { + "epoch": 0.868723569301604, + "grad_norm": 0.9753737449645996, + "learning_rate": 4.4508178808387536e-07, + "loss": 0.2762, + "step": 43396 + }, + { + "epoch": 0.8687435878187323, + "grad_norm": 1.0098685026168823, + "learning_rate": 4.4494809081426784e-07, + "loss": 0.2417, + "step": 43397 + }, + { + "epoch": 0.8687636063358607, + "grad_norm": 1.1421144008636475, + "learning_rate": 4.4481441269311863e-07, + "loss": 0.2306, + "step": 43398 + }, + { + "epoch": 0.868783624852989, + "grad_norm": 1.0886104106903076, + "learning_rate": 4.4468075372098795e-07, + "loss": 0.2569, + "step": 43399 + }, + { + "epoch": 0.8688036433701174, + "grad_norm": 1.1032944917678833, + "learning_rate": 4.445471138984403e-07, + "loss": 0.3216, + "step": 43400 + }, + { + "epoch": 0.8688236618872457, + "grad_norm": 1.096547245979309, + "learning_rate": 4.444134932260363e-07, + "loss": 0.3341, + "step": 43401 + }, + { + "epoch": 0.8688436804043741, + "grad_norm": 1.2285641431808472, + "learning_rate": 4.4427989170433673e-07, + "loss": 0.3146, + "step": 43402 + }, + { + "epoch": 0.8688636989215024, + "grad_norm": 1.9495519399642944, + "learning_rate": 4.441463093339049e-07, + "loss": 0.7169, + "step": 43403 + }, + { + "epoch": 0.8688837174386307, + "grad_norm": 1.0500328540802002, + "learning_rate": 4.440127461153004e-07, + "loss": 0.3069, + "step": 43404 + }, + { + "epoch": 0.8689037359557591, + "grad_norm": 1.0456503629684448, + "learning_rate": 4.4387920204908677e-07, + "loss": 0.2828, + "step": 43405 + }, + { + "epoch": 0.8689237544728874, + "grad_norm": 1.1005653142929077, + "learning_rate": 4.4374567713582504e-07, + "loss": 0.2982, + "step": 43406 + }, + { + "epoch": 0.8689437729900158, + "grad_norm": 0.9923685789108276, + "learning_rate": 4.436121713760755e-07, + "loss": 0.2725, + "step": 43407 + }, + { + "epoch": 0.8689637915071441, + "grad_norm": 2.0507123470306396, + "learning_rate": 4.434786847703987e-07, + "loss": 0.7413, + "step": 43408 + }, + { + "epoch": 0.8689838100242725, + "grad_norm": 1.081204891204834, + "learning_rate": 4.4334521731935866e-07, + "loss": 0.306, + "step": 43409 + }, + { + "epoch": 0.8690038285414008, + "grad_norm": 1.020263433456421, + "learning_rate": 4.432117690235138e-07, + "loss": 0.281, + "step": 43410 + }, + { + "epoch": 0.8690238470585291, + "grad_norm": 1.1866904497146606, + "learning_rate": 4.4307833988342654e-07, + "loss": 0.2747, + "step": 43411 + }, + { + "epoch": 0.8690438655756575, + "grad_norm": 1.1216871738433838, + "learning_rate": 4.429449298996563e-07, + "loss": 0.2843, + "step": 43412 + }, + { + "epoch": 0.8690638840927858, + "grad_norm": 1.0889582633972168, + "learning_rate": 4.4281153907276606e-07, + "loss": 0.2858, + "step": 43413 + }, + { + "epoch": 0.8690839026099142, + "grad_norm": 1.094348669052124, + "learning_rate": 4.426781674033154e-07, + "loss": 0.2766, + "step": 43414 + }, + { + "epoch": 0.8691039211270425, + "grad_norm": 1.9368820190429688, + "learning_rate": 4.4254481489186374e-07, + "loss": 0.7758, + "step": 43415 + }, + { + "epoch": 0.8691239396441709, + "grad_norm": 1.2290499210357666, + "learning_rate": 4.424114815389735e-07, + "loss": 0.3096, + "step": 43416 + }, + { + "epoch": 0.8691439581612992, + "grad_norm": 1.1630029678344727, + "learning_rate": 4.4227816734520424e-07, + "loss": 0.3209, + "step": 43417 + }, + { + "epoch": 0.8691639766784276, + "grad_norm": 1.0666362047195435, + "learning_rate": 4.421448723111177e-07, + "loss": 0.2871, + "step": 43418 + }, + { + "epoch": 0.8691839951955559, + "grad_norm": 1.8791650533676147, + "learning_rate": 4.4201159643727286e-07, + "loss": 0.7989, + "step": 43419 + }, + { + "epoch": 0.8692040137126842, + "grad_norm": 1.1816214323043823, + "learning_rate": 4.418783397242304e-07, + "loss": 0.298, + "step": 43420 + }, + { + "epoch": 0.8692240322298126, + "grad_norm": 1.9455374479293823, + "learning_rate": 4.4174510217254994e-07, + "loss": 0.7244, + "step": 43421 + }, + { + "epoch": 0.8692440507469409, + "grad_norm": 1.1781691312789917, + "learning_rate": 4.4161188378279264e-07, + "loss": 0.2714, + "step": 43422 + }, + { + "epoch": 0.8692640692640693, + "grad_norm": 1.1353832483291626, + "learning_rate": 4.4147868455551803e-07, + "loss": 0.3145, + "step": 43423 + }, + { + "epoch": 0.8692840877811976, + "grad_norm": 0.9245299696922302, + "learning_rate": 4.413455044912862e-07, + "loss": 0.2783, + "step": 43424 + }, + { + "epoch": 0.869304106298326, + "grad_norm": 1.125831127166748, + "learning_rate": 4.4121234359065677e-07, + "loss": 0.3174, + "step": 43425 + }, + { + "epoch": 0.8693241248154543, + "grad_norm": 1.1813772916793823, + "learning_rate": 4.410792018541887e-07, + "loss": 0.2765, + "step": 43426 + }, + { + "epoch": 0.8693441433325826, + "grad_norm": 1.0913734436035156, + "learning_rate": 4.4094607928244383e-07, + "loss": 0.3034, + "step": 43427 + }, + { + "epoch": 0.869364161849711, + "grad_norm": 1.1146366596221924, + "learning_rate": 4.408129758759788e-07, + "loss": 0.2889, + "step": 43428 + }, + { + "epoch": 0.8693841803668393, + "grad_norm": 1.1722127199172974, + "learning_rate": 4.4067989163535664e-07, + "loss": 0.3302, + "step": 43429 + }, + { + "epoch": 0.8694041988839677, + "grad_norm": 1.0289756059646606, + "learning_rate": 4.405468265611335e-07, + "loss": 0.2627, + "step": 43430 + }, + { + "epoch": 0.869424217401096, + "grad_norm": 1.030978798866272, + "learning_rate": 4.4041378065387165e-07, + "loss": 0.2664, + "step": 43431 + }, + { + "epoch": 0.8694442359182244, + "grad_norm": 1.0179942846298218, + "learning_rate": 4.402807539141285e-07, + "loss": 0.2896, + "step": 43432 + }, + { + "epoch": 0.8694642544353527, + "grad_norm": 1.0720887184143066, + "learning_rate": 4.4014774634246417e-07, + "loss": 0.3063, + "step": 43433 + }, + { + "epoch": 0.8694842729524811, + "grad_norm": 1.167923927307129, + "learning_rate": 4.400147579394365e-07, + "loss": 0.3228, + "step": 43434 + }, + { + "epoch": 0.8695042914696094, + "grad_norm": 1.346488356590271, + "learning_rate": 4.3988178870560673e-07, + "loss": 0.3156, + "step": 43435 + }, + { + "epoch": 0.8695243099867377, + "grad_norm": 1.1299245357513428, + "learning_rate": 4.397488386415322e-07, + "loss": 0.3013, + "step": 43436 + }, + { + "epoch": 0.8695443285038661, + "grad_norm": 1.1512655019760132, + "learning_rate": 4.3961590774777185e-07, + "loss": 0.3272, + "step": 43437 + }, + { + "epoch": 0.8695643470209944, + "grad_norm": 1.0368553400039673, + "learning_rate": 4.3948299602488533e-07, + "loss": 0.2677, + "step": 43438 + }, + { + "epoch": 0.8695843655381228, + "grad_norm": 1.0744948387145996, + "learning_rate": 4.3935010347342935e-07, + "loss": 0.2879, + "step": 43439 + }, + { + "epoch": 0.8696043840552511, + "grad_norm": 1.0047824382781982, + "learning_rate": 4.3921723009396565e-07, + "loss": 0.2584, + "step": 43440 + }, + { + "epoch": 0.8696244025723795, + "grad_norm": 1.2273497581481934, + "learning_rate": 4.3908437588705e-07, + "loss": 0.2994, + "step": 43441 + }, + { + "epoch": 0.8696444210895078, + "grad_norm": 1.105535864830017, + "learning_rate": 4.38951540853243e-07, + "loss": 0.2606, + "step": 43442 + }, + { + "epoch": 0.8696644396066361, + "grad_norm": 1.1197727918624878, + "learning_rate": 4.388187249931014e-07, + "loss": 0.2597, + "step": 43443 + }, + { + "epoch": 0.8696844581237645, + "grad_norm": 1.1098620891571045, + "learning_rate": 4.3868592830718536e-07, + "loss": 0.3132, + "step": 43444 + }, + { + "epoch": 0.8697044766408928, + "grad_norm": 1.007338047027588, + "learning_rate": 4.3855315079605163e-07, + "loss": 0.2802, + "step": 43445 + }, + { + "epoch": 0.8697244951580212, + "grad_norm": 2.129802703857422, + "learning_rate": 4.3842039246025925e-07, + "loss": 0.8075, + "step": 43446 + }, + { + "epoch": 0.8697445136751495, + "grad_norm": 1.2246370315551758, + "learning_rate": 4.382876533003655e-07, + "loss": 0.3053, + "step": 43447 + }, + { + "epoch": 0.8697645321922779, + "grad_norm": 1.247824788093567, + "learning_rate": 4.3815493331692827e-07, + "loss": 0.3375, + "step": 43448 + }, + { + "epoch": 0.8697845507094062, + "grad_norm": 2.070194959640503, + "learning_rate": 4.380222325105066e-07, + "loss": 0.7536, + "step": 43449 + }, + { + "epoch": 0.8698045692265346, + "grad_norm": 1.0388214588165283, + "learning_rate": 4.3788955088165777e-07, + "loss": 0.2832, + "step": 43450 + }, + { + "epoch": 0.8698245877436629, + "grad_norm": 1.1254935264587402, + "learning_rate": 4.377568884309391e-07, + "loss": 0.3108, + "step": 43451 + }, + { + "epoch": 0.8698446062607912, + "grad_norm": 1.1125670671463013, + "learning_rate": 4.3762424515890853e-07, + "loss": 0.2944, + "step": 43452 + }, + { + "epoch": 0.8698646247779196, + "grad_norm": 1.1551494598388672, + "learning_rate": 4.37491621066124e-07, + "loss": 0.2637, + "step": 43453 + }, + { + "epoch": 0.8698846432950479, + "grad_norm": 1.9820659160614014, + "learning_rate": 4.373590161531427e-07, + "loss": 0.751, + "step": 43454 + }, + { + "epoch": 0.8699046618121763, + "grad_norm": 1.8736132383346558, + "learning_rate": 4.372264304205226e-07, + "loss": 0.8141, + "step": 43455 + }, + { + "epoch": 0.8699246803293046, + "grad_norm": 1.0401781797409058, + "learning_rate": 4.370938638688205e-07, + "loss": 0.2949, + "step": 43456 + }, + { + "epoch": 0.869944698846433, + "grad_norm": 1.1673550605773926, + "learning_rate": 4.369613164985931e-07, + "loss": 0.3176, + "step": 43457 + }, + { + "epoch": 0.8699647173635613, + "grad_norm": 1.1298127174377441, + "learning_rate": 4.3682878831039943e-07, + "loss": 0.3645, + "step": 43458 + }, + { + "epoch": 0.8699847358806896, + "grad_norm": 1.0583549737930298, + "learning_rate": 4.366962793047952e-07, + "loss": 0.2632, + "step": 43459 + }, + { + "epoch": 0.870004754397818, + "grad_norm": 1.1470173597335815, + "learning_rate": 4.365637894823382e-07, + "loss": 0.2657, + "step": 43460 + }, + { + "epoch": 0.8700247729149463, + "grad_norm": 0.9960917830467224, + "learning_rate": 4.364313188435837e-07, + "loss": 0.2906, + "step": 43461 + }, + { + "epoch": 0.8700447914320747, + "grad_norm": 1.107572317123413, + "learning_rate": 4.362988673890911e-07, + "loss": 0.2769, + "step": 43462 + }, + { + "epoch": 0.870064809949203, + "grad_norm": 1.2585033178329468, + "learning_rate": 4.361664351194156e-07, + "loss": 0.296, + "step": 43463 + }, + { + "epoch": 0.8700848284663314, + "grad_norm": 1.2937016487121582, + "learning_rate": 4.360340220351145e-07, + "loss": 0.2805, + "step": 43464 + }, + { + "epoch": 0.8701048469834597, + "grad_norm": 1.1991493701934814, + "learning_rate": 4.359016281367434e-07, + "loss": 0.297, + "step": 43465 + }, + { + "epoch": 0.8701248655005881, + "grad_norm": 1.3362467288970947, + "learning_rate": 4.357692534248603e-07, + "loss": 0.2523, + "step": 43466 + }, + { + "epoch": 0.8701448840177164, + "grad_norm": 1.1141834259033203, + "learning_rate": 4.356368979000203e-07, + "loss": 0.269, + "step": 43467 + }, + { + "epoch": 0.8701649025348447, + "grad_norm": 1.3777480125427246, + "learning_rate": 4.355045615627812e-07, + "loss": 0.2833, + "step": 43468 + }, + { + "epoch": 0.8701849210519731, + "grad_norm": 1.1290218830108643, + "learning_rate": 4.3537224441369873e-07, + "loss": 0.3442, + "step": 43469 + }, + { + "epoch": 0.8702049395691014, + "grad_norm": 0.9960277080535889, + "learning_rate": 4.35239946453328e-07, + "loss": 0.2633, + "step": 43470 + }, + { + "epoch": 0.8702249580862298, + "grad_norm": 1.1528069972991943, + "learning_rate": 4.3510766768222743e-07, + "loss": 0.3166, + "step": 43471 + }, + { + "epoch": 0.8702449766033581, + "grad_norm": 1.1081137657165527, + "learning_rate": 4.349754081009516e-07, + "loss": 0.3224, + "step": 43472 + }, + { + "epoch": 0.8702649951204865, + "grad_norm": 1.953397274017334, + "learning_rate": 4.3484316771005676e-07, + "loss": 0.7864, + "step": 43473 + }, + { + "epoch": 0.8702850136376148, + "grad_norm": 0.9800125956535339, + "learning_rate": 4.3471094651009795e-07, + "loss": 0.2463, + "step": 43474 + }, + { + "epoch": 0.8703050321547431, + "grad_norm": 1.1802031993865967, + "learning_rate": 4.345787445016325e-07, + "loss": 0.3523, + "step": 43475 + }, + { + "epoch": 0.8703250506718715, + "grad_norm": 1.0972076654434204, + "learning_rate": 4.3444656168521614e-07, + "loss": 0.29, + "step": 43476 + }, + { + "epoch": 0.8703450691889998, + "grad_norm": 1.1056056022644043, + "learning_rate": 4.343143980614034e-07, + "loss": 0.3481, + "step": 43477 + }, + { + "epoch": 0.8703650877061282, + "grad_norm": 1.0524015426635742, + "learning_rate": 4.341822536307494e-07, + "loss": 0.2807, + "step": 43478 + }, + { + "epoch": 0.8703851062232565, + "grad_norm": 1.171859860420227, + "learning_rate": 4.34050128393812e-07, + "loss": 0.3039, + "step": 43479 + }, + { + "epoch": 0.8704051247403849, + "grad_norm": 1.0842729806900024, + "learning_rate": 4.339180223511441e-07, + "loss": 0.2503, + "step": 43480 + }, + { + "epoch": 0.8704251432575132, + "grad_norm": 1.515572190284729, + "learning_rate": 4.3378593550330305e-07, + "loss": 0.2991, + "step": 43481 + }, + { + "epoch": 0.8704451617746416, + "grad_norm": 1.008750319480896, + "learning_rate": 4.336538678508434e-07, + "loss": 0.2882, + "step": 43482 + }, + { + "epoch": 0.8704651802917699, + "grad_norm": 1.1482253074645996, + "learning_rate": 4.3352181939431916e-07, + "loss": 0.3333, + "step": 43483 + }, + { + "epoch": 0.8704851988088982, + "grad_norm": 1.0459730625152588, + "learning_rate": 4.333897901342876e-07, + "loss": 0.3295, + "step": 43484 + }, + { + "epoch": 0.8705052173260266, + "grad_norm": 1.266347050666809, + "learning_rate": 4.332577800713023e-07, + "loss": 0.2965, + "step": 43485 + }, + { + "epoch": 0.8705252358431549, + "grad_norm": 1.0856989622116089, + "learning_rate": 4.331257892059187e-07, + "loss": 0.2694, + "step": 43486 + }, + { + "epoch": 0.8705452543602833, + "grad_norm": 1.1807105541229248, + "learning_rate": 4.329938175386916e-07, + "loss": 0.3232, + "step": 43487 + }, + { + "epoch": 0.8705652728774116, + "grad_norm": 2.0216827392578125, + "learning_rate": 4.3286186507017437e-07, + "loss": 0.7716, + "step": 43488 + }, + { + "epoch": 0.87058529139454, + "grad_norm": 1.0684212446212769, + "learning_rate": 4.327299318009243e-07, + "loss": 0.2919, + "step": 43489 + }, + { + "epoch": 0.8706053099116683, + "grad_norm": 1.3026198148727417, + "learning_rate": 4.325980177314948e-07, + "loss": 0.3261, + "step": 43490 + }, + { + "epoch": 0.8706253284287966, + "grad_norm": 1.2860287427902222, + "learning_rate": 4.3246612286243886e-07, + "loss": 0.3094, + "step": 43491 + }, + { + "epoch": 0.870645346945925, + "grad_norm": 1.060723900794983, + "learning_rate": 4.323342471943137e-07, + "loss": 0.2789, + "step": 43492 + }, + { + "epoch": 0.8706653654630533, + "grad_norm": 1.168474793434143, + "learning_rate": 4.3220239072767124e-07, + "loss": 0.2834, + "step": 43493 + }, + { + "epoch": 0.8706853839801817, + "grad_norm": 1.0054411888122559, + "learning_rate": 4.320705534630681e-07, + "loss": 0.2414, + "step": 43494 + }, + { + "epoch": 0.87070540249731, + "grad_norm": 1.9439674615859985, + "learning_rate": 4.3193873540105735e-07, + "loss": 0.7752, + "step": 43495 + }, + { + "epoch": 0.8707254210144384, + "grad_norm": 1.9284521341323853, + "learning_rate": 4.318069365421923e-07, + "loss": 0.7049, + "step": 43496 + }, + { + "epoch": 0.8707454395315667, + "grad_norm": 1.1274081468582153, + "learning_rate": 4.316751568870281e-07, + "loss": 0.3449, + "step": 43497 + }, + { + "epoch": 0.8707654580486951, + "grad_norm": 1.0266081094741821, + "learning_rate": 4.3154339643611873e-07, + "loss": 0.3281, + "step": 43498 + }, + { + "epoch": 0.8707854765658234, + "grad_norm": 1.1235721111297607, + "learning_rate": 4.314116551900183e-07, + "loss": 0.287, + "step": 43499 + }, + { + "epoch": 0.8708054950829517, + "grad_norm": 2.018846273422241, + "learning_rate": 4.312799331492795e-07, + "loss": 0.8368, + "step": 43500 + }, + { + "epoch": 0.8708255136000801, + "grad_norm": 1.0579582452774048, + "learning_rate": 4.31148230314456e-07, + "loss": 0.283, + "step": 43501 + }, + { + "epoch": 0.8708455321172084, + "grad_norm": 1.0559409856796265, + "learning_rate": 4.310165466861027e-07, + "loss": 0.2426, + "step": 43502 + }, + { + "epoch": 0.8708655506343368, + "grad_norm": 1.044628620147705, + "learning_rate": 4.3088488226477263e-07, + "loss": 0.299, + "step": 43503 + }, + { + "epoch": 0.8708855691514651, + "grad_norm": 1.2601436376571655, + "learning_rate": 4.307532370510187e-07, + "loss": 0.3064, + "step": 43504 + }, + { + "epoch": 0.8709055876685935, + "grad_norm": 1.1374155282974243, + "learning_rate": 4.3062161104539535e-07, + "loss": 0.2883, + "step": 43505 + }, + { + "epoch": 0.8709256061857218, + "grad_norm": 1.1754666566848755, + "learning_rate": 4.30490004248455e-07, + "loss": 0.347, + "step": 43506 + }, + { + "epoch": 0.8709456247028501, + "grad_norm": 1.1174674034118652, + "learning_rate": 4.3035841666075163e-07, + "loss": 0.3496, + "step": 43507 + }, + { + "epoch": 0.8709656432199785, + "grad_norm": 1.292251706123352, + "learning_rate": 4.3022684828283866e-07, + "loss": 0.302, + "step": 43508 + }, + { + "epoch": 0.8709856617371068, + "grad_norm": 1.1460679769515991, + "learning_rate": 4.300952991152679e-07, + "loss": 0.3135, + "step": 43509 + }, + { + "epoch": 0.8710056802542352, + "grad_norm": 1.8661633729934692, + "learning_rate": 4.299637691585928e-07, + "loss": 0.7313, + "step": 43510 + }, + { + "epoch": 0.8710256987713635, + "grad_norm": 1.075132966041565, + "learning_rate": 4.298322584133674e-07, + "loss": 0.2826, + "step": 43511 + }, + { + "epoch": 0.8710457172884919, + "grad_norm": 1.9094948768615723, + "learning_rate": 4.297007668801434e-07, + "loss": 0.7394, + "step": 43512 + }, + { + "epoch": 0.8710657358056202, + "grad_norm": 1.1112638711929321, + "learning_rate": 4.295692945594737e-07, + "loss": 0.2717, + "step": 43513 + }, + { + "epoch": 0.8710857543227486, + "grad_norm": 1.157559871673584, + "learning_rate": 4.294378414519107e-07, + "loss": 0.3121, + "step": 43514 + }, + { + "epoch": 0.8711057728398769, + "grad_norm": 1.3846683502197266, + "learning_rate": 4.293064075580078e-07, + "loss": 0.3037, + "step": 43515 + }, + { + "epoch": 0.8711257913570052, + "grad_norm": 1.1256601810455322, + "learning_rate": 4.2917499287831734e-07, + "loss": 0.2956, + "step": 43516 + }, + { + "epoch": 0.8711458098741336, + "grad_norm": 1.096571683883667, + "learning_rate": 4.290435974133905e-07, + "loss": 0.2679, + "step": 43517 + }, + { + "epoch": 0.8711658283912619, + "grad_norm": 1.1457264423370361, + "learning_rate": 4.289122211637819e-07, + "loss": 0.269, + "step": 43518 + }, + { + "epoch": 0.8711858469083903, + "grad_norm": 1.1963508129119873, + "learning_rate": 4.287808641300417e-07, + "loss": 0.2836, + "step": 43519 + }, + { + "epoch": 0.8712058654255186, + "grad_norm": 1.0601551532745361, + "learning_rate": 4.286495263127238e-07, + "loss": 0.263, + "step": 43520 + }, + { + "epoch": 0.871225883942647, + "grad_norm": 1.1423271894454956, + "learning_rate": 4.285182077123795e-07, + "loss": 0.252, + "step": 43521 + }, + { + "epoch": 0.8712459024597753, + "grad_norm": 1.3501547574996948, + "learning_rate": 4.283869083295611e-07, + "loss": 0.312, + "step": 43522 + }, + { + "epoch": 0.8712659209769036, + "grad_norm": 1.0563081502914429, + "learning_rate": 4.282556281648187e-07, + "loss": 0.2831, + "step": 43523 + }, + { + "epoch": 0.871285939494032, + "grad_norm": 1.3096344470977783, + "learning_rate": 4.281243672187074e-07, + "loss": 0.2692, + "step": 43524 + }, + { + "epoch": 0.8713059580111603, + "grad_norm": 1.1501202583312988, + "learning_rate": 4.279931254917774e-07, + "loss": 0.3129, + "step": 43525 + }, + { + "epoch": 0.8713259765282887, + "grad_norm": 1.202858328819275, + "learning_rate": 4.2786190298457986e-07, + "loss": 0.2779, + "step": 43526 + }, + { + "epoch": 0.871345995045417, + "grad_norm": 2.045492649078369, + "learning_rate": 4.2773069969766655e-07, + "loss": 0.7212, + "step": 43527 + }, + { + "epoch": 0.8713660135625454, + "grad_norm": 1.1457685232162476, + "learning_rate": 4.2759951563158983e-07, + "loss": 0.2811, + "step": 43528 + }, + { + "epoch": 0.8713860320796737, + "grad_norm": 1.021554708480835, + "learning_rate": 4.2746835078690097e-07, + "loss": 0.2496, + "step": 43529 + }, + { + "epoch": 0.8714060505968021, + "grad_norm": 1.155186653137207, + "learning_rate": 4.273372051641506e-07, + "loss": 0.32, + "step": 43530 + }, + { + "epoch": 0.8714260691139304, + "grad_norm": 1.2705848217010498, + "learning_rate": 4.27206078763891e-07, + "loss": 0.2779, + "step": 43531 + }, + { + "epoch": 0.8714460876310587, + "grad_norm": 1.1323624849319458, + "learning_rate": 4.2707497158667234e-07, + "loss": 0.2976, + "step": 43532 + }, + { + "epoch": 0.8714661061481871, + "grad_norm": 1.1727755069732666, + "learning_rate": 4.2694388363304753e-07, + "loss": 0.2847, + "step": 43533 + }, + { + "epoch": 0.8714861246653154, + "grad_norm": 1.1038237810134888, + "learning_rate": 4.268128149035661e-07, + "loss": 0.3105, + "step": 43534 + }, + { + "epoch": 0.8715061431824438, + "grad_norm": 1.049606442451477, + "learning_rate": 4.2668176539877925e-07, + "loss": 0.2421, + "step": 43535 + }, + { + "epoch": 0.8715261616995721, + "grad_norm": 1.7903200387954712, + "learning_rate": 4.265507351192377e-07, + "loss": 0.7089, + "step": 43536 + }, + { + "epoch": 0.8715461802167005, + "grad_norm": 1.254354476928711, + "learning_rate": 4.2641972406549326e-07, + "loss": 0.3021, + "step": 43537 + }, + { + "epoch": 0.8715661987338288, + "grad_norm": 1.177062749862671, + "learning_rate": 4.26288732238096e-07, + "loss": 0.2677, + "step": 43538 + }, + { + "epoch": 0.8715862172509571, + "grad_norm": 1.2392390966415405, + "learning_rate": 4.2615775963759656e-07, + "loss": 0.2597, + "step": 43539 + }, + { + "epoch": 0.8716062357680855, + "grad_norm": 1.1745280027389526, + "learning_rate": 4.260268062645456e-07, + "loss": 0.3369, + "step": 43540 + }, + { + "epoch": 0.8716262542852138, + "grad_norm": 1.0925631523132324, + "learning_rate": 4.2589587211949277e-07, + "loss": 0.2573, + "step": 43541 + }, + { + "epoch": 0.8716462728023422, + "grad_norm": 1.121009349822998, + "learning_rate": 4.257649572029904e-07, + "loss": 0.2723, + "step": 43542 + }, + { + "epoch": 0.8716662913194705, + "grad_norm": 1.0267337560653687, + "learning_rate": 4.2563406151558683e-07, + "loss": 0.2659, + "step": 43543 + }, + { + "epoch": 0.8716863098365989, + "grad_norm": 1.8763701915740967, + "learning_rate": 4.255031850578345e-07, + "loss": 0.7751, + "step": 43544 + }, + { + "epoch": 0.8717063283537272, + "grad_norm": 1.1766098737716675, + "learning_rate": 4.2537232783028073e-07, + "loss": 0.3003, + "step": 43545 + }, + { + "epoch": 0.8717263468708556, + "grad_norm": 1.118794322013855, + "learning_rate": 4.2524148983347837e-07, + "loss": 0.2888, + "step": 43546 + }, + { + "epoch": 0.8717463653879839, + "grad_norm": 1.1873219013214111, + "learning_rate": 4.2511067106797643e-07, + "loss": 0.3133, + "step": 43547 + }, + { + "epoch": 0.8717663839051122, + "grad_norm": 2.075695276260376, + "learning_rate": 4.249798715343245e-07, + "loss": 0.7438, + "step": 43548 + }, + { + "epoch": 0.8717864024222406, + "grad_norm": 1.1883519887924194, + "learning_rate": 4.2484909123307216e-07, + "loss": 0.3088, + "step": 43549 + }, + { + "epoch": 0.8718064209393689, + "grad_norm": 0.9851283431053162, + "learning_rate": 4.2471833016477e-07, + "loss": 0.284, + "step": 43550 + }, + { + "epoch": 0.8718264394564973, + "grad_norm": 1.0105122327804565, + "learning_rate": 4.245875883299677e-07, + "loss": 0.2727, + "step": 43551 + }, + { + "epoch": 0.8718464579736256, + "grad_norm": 1.186518907546997, + "learning_rate": 4.2445686572921473e-07, + "loss": 0.3072, + "step": 43552 + }, + { + "epoch": 0.871866476490754, + "grad_norm": 1.0305372476577759, + "learning_rate": 4.243261623630601e-07, + "loss": 0.2959, + "step": 43553 + }, + { + "epoch": 0.8718864950078823, + "grad_norm": 1.0422013998031616, + "learning_rate": 4.241954782320529e-07, + "loss": 0.3303, + "step": 43554 + }, + { + "epoch": 0.8719065135250106, + "grad_norm": 1.1981632709503174, + "learning_rate": 4.2406481333674365e-07, + "loss": 0.2976, + "step": 43555 + }, + { + "epoch": 0.871926532042139, + "grad_norm": 1.048462152481079, + "learning_rate": 4.239341676776809e-07, + "loss": 0.2872, + "step": 43556 + }, + { + "epoch": 0.8719465505592673, + "grad_norm": 1.0370793342590332, + "learning_rate": 4.238035412554142e-07, + "loss": 0.3305, + "step": 43557 + }, + { + "epoch": 0.8719665690763957, + "grad_norm": 1.3606150150299072, + "learning_rate": 4.236729340704926e-07, + "loss": 0.2712, + "step": 43558 + }, + { + "epoch": 0.871986587593524, + "grad_norm": 0.9409019947052002, + "learning_rate": 4.2354234612346557e-07, + "loss": 0.2491, + "step": 43559 + }, + { + "epoch": 0.8720066061106524, + "grad_norm": 1.144163727760315, + "learning_rate": 4.234117774148816e-07, + "loss": 0.3078, + "step": 43560 + }, + { + "epoch": 0.8720266246277807, + "grad_norm": 1.6427006721496582, + "learning_rate": 4.232812279452897e-07, + "loss": 0.3017, + "step": 43561 + }, + { + "epoch": 0.8720466431449091, + "grad_norm": 1.1137721538543701, + "learning_rate": 4.2315069771523887e-07, + "loss": 0.263, + "step": 43562 + }, + { + "epoch": 0.8720666616620374, + "grad_norm": 1.1260594129562378, + "learning_rate": 4.2302018672527643e-07, + "loss": 0.2857, + "step": 43563 + }, + { + "epoch": 0.8720866801791657, + "grad_norm": 1.0599825382232666, + "learning_rate": 4.228896949759531e-07, + "loss": 0.2552, + "step": 43564 + }, + { + "epoch": 0.8721066986962941, + "grad_norm": 1.1744213104248047, + "learning_rate": 4.227592224678162e-07, + "loss": 0.2713, + "step": 43565 + }, + { + "epoch": 0.8721267172134224, + "grad_norm": 1.1263110637664795, + "learning_rate": 4.226287692014147e-07, + "loss": 0.3025, + "step": 43566 + }, + { + "epoch": 0.8721467357305508, + "grad_norm": 1.0395864248275757, + "learning_rate": 4.22498335177296e-07, + "loss": 0.278, + "step": 43567 + }, + { + "epoch": 0.8721667542476791, + "grad_norm": 1.7975070476531982, + "learning_rate": 4.2236792039601015e-07, + "loss": 0.764, + "step": 43568 + }, + { + "epoch": 0.8721867727648075, + "grad_norm": 1.0575305223464966, + "learning_rate": 4.222375248581034e-07, + "loss": 0.3404, + "step": 43569 + }, + { + "epoch": 0.8722067912819358, + "grad_norm": 1.1307830810546875, + "learning_rate": 4.2210714856412594e-07, + "loss": 0.3179, + "step": 43570 + }, + { + "epoch": 0.8722268097990641, + "grad_norm": 1.199392318725586, + "learning_rate": 4.21976791514625e-07, + "loss": 0.2888, + "step": 43571 + }, + { + "epoch": 0.8722468283161925, + "grad_norm": 1.1052711009979248, + "learning_rate": 4.218464537101469e-07, + "loss": 0.2448, + "step": 43572 + }, + { + "epoch": 0.8722668468333208, + "grad_norm": 1.7843267917633057, + "learning_rate": 4.2171613515124276e-07, + "loss": 0.7373, + "step": 43573 + }, + { + "epoch": 0.8722868653504492, + "grad_norm": 1.9162510633468628, + "learning_rate": 4.2158583583845836e-07, + "loss": 0.7849, + "step": 43574 + }, + { + "epoch": 0.8723068838675775, + "grad_norm": 1.0378957986831665, + "learning_rate": 4.214555557723415e-07, + "loss": 0.2919, + "step": 43575 + }, + { + "epoch": 0.8723269023847059, + "grad_norm": 1.2323110103607178, + "learning_rate": 4.2132529495343966e-07, + "loss": 0.2655, + "step": 43576 + }, + { + "epoch": 0.8723469209018342, + "grad_norm": 1.0607613325119019, + "learning_rate": 4.211950533823017e-07, + "loss": 0.299, + "step": 43577 + }, + { + "epoch": 0.8723669394189626, + "grad_norm": 1.318534016609192, + "learning_rate": 4.210648310594745e-07, + "loss": 0.3196, + "step": 43578 + }, + { + "epoch": 0.8723869579360909, + "grad_norm": 1.019620656967163, + "learning_rate": 4.209346279855053e-07, + "loss": 0.3195, + "step": 43579 + }, + { + "epoch": 0.8724069764532192, + "grad_norm": 1.1668471097946167, + "learning_rate": 4.20804444160941e-07, + "loss": 0.3079, + "step": 43580 + }, + { + "epoch": 0.8724269949703476, + "grad_norm": 2.218069076538086, + "learning_rate": 4.2067427958632947e-07, + "loss": 0.7605, + "step": 43581 + }, + { + "epoch": 0.8724470134874759, + "grad_norm": 1.098017692565918, + "learning_rate": 4.205441342622174e-07, + "loss": 0.2822, + "step": 43582 + }, + { + "epoch": 0.8724670320046043, + "grad_norm": 1.3271864652633667, + "learning_rate": 4.2041400818915333e-07, + "loss": 0.294, + "step": 43583 + }, + { + "epoch": 0.8724870505217326, + "grad_norm": 1.0392528772354126, + "learning_rate": 4.2028390136768294e-07, + "loss": 0.2676, + "step": 43584 + }, + { + "epoch": 0.872507069038861, + "grad_norm": 1.1624773740768433, + "learning_rate": 4.201538137983524e-07, + "loss": 0.3026, + "step": 43585 + }, + { + "epoch": 0.8725270875559893, + "grad_norm": 1.1676675081253052, + "learning_rate": 4.2002374548171076e-07, + "loss": 0.2933, + "step": 43586 + }, + { + "epoch": 0.8725471060731176, + "grad_norm": 1.1253975629806519, + "learning_rate": 4.1989369641830367e-07, + "loss": 0.3087, + "step": 43587 + }, + { + "epoch": 0.872567124590246, + "grad_norm": 1.2039790153503418, + "learning_rate": 4.1976366660867793e-07, + "loss": 0.3443, + "step": 43588 + }, + { + "epoch": 0.8725871431073743, + "grad_norm": 1.0968730449676514, + "learning_rate": 4.1963365605337867e-07, + "loss": 0.2958, + "step": 43589 + }, + { + "epoch": 0.8726071616245027, + "grad_norm": 1.2174502611160278, + "learning_rate": 4.1950366475295487e-07, + "loss": 0.2817, + "step": 43590 + }, + { + "epoch": 0.872627180141631, + "grad_norm": 1.184468150138855, + "learning_rate": 4.1937369270795223e-07, + "loss": 0.3127, + "step": 43591 + }, + { + "epoch": 0.8726471986587594, + "grad_norm": 1.1379048824310303, + "learning_rate": 4.19243739918917e-07, + "loss": 0.3282, + "step": 43592 + }, + { + "epoch": 0.8726672171758877, + "grad_norm": 1.2296526432037354, + "learning_rate": 4.1911380638639477e-07, + "loss": 0.293, + "step": 43593 + }, + { + "epoch": 0.8726872356930161, + "grad_norm": 1.1106756925582886, + "learning_rate": 4.189838921109318e-07, + "loss": 0.3024, + "step": 43594 + }, + { + "epoch": 0.8727072542101444, + "grad_norm": 1.0737874507904053, + "learning_rate": 4.188539970930744e-07, + "loss": 0.2787, + "step": 43595 + }, + { + "epoch": 0.8727272727272727, + "grad_norm": 1.146722435951233, + "learning_rate": 4.1872412133336983e-07, + "loss": 0.2953, + "step": 43596 + }, + { + "epoch": 0.8727472912444011, + "grad_norm": 1.140323281288147, + "learning_rate": 4.1859426483236377e-07, + "loss": 0.2875, + "step": 43597 + }, + { + "epoch": 0.8727673097615294, + "grad_norm": 1.0525784492492676, + "learning_rate": 4.1846442759059967e-07, + "loss": 0.3217, + "step": 43598 + }, + { + "epoch": 0.8727873282786578, + "grad_norm": 1.3565433025360107, + "learning_rate": 4.183346096086266e-07, + "loss": 0.3209, + "step": 43599 + }, + { + "epoch": 0.8728073467957861, + "grad_norm": 1.2138590812683105, + "learning_rate": 4.182048108869885e-07, + "loss": 0.3302, + "step": 43600 + }, + { + "epoch": 0.8728273653129145, + "grad_norm": 1.1000255346298218, + "learning_rate": 4.180750314262316e-07, + "loss": 0.218, + "step": 43601 + }, + { + "epoch": 0.8728473838300428, + "grad_norm": 1.0892573595046997, + "learning_rate": 4.1794527122690166e-07, + "loss": 0.3086, + "step": 43602 + }, + { + "epoch": 0.8728674023471711, + "grad_norm": 1.1098169088363647, + "learning_rate": 4.17815530289542e-07, + "loss": 0.3262, + "step": 43603 + }, + { + "epoch": 0.8728874208642995, + "grad_norm": 1.0510523319244385, + "learning_rate": 4.176858086147012e-07, + "loss": 0.2474, + "step": 43604 + }, + { + "epoch": 0.8729074393814278, + "grad_norm": 1.083421230316162, + "learning_rate": 4.175561062029232e-07, + "loss": 0.2896, + "step": 43605 + }, + { + "epoch": 0.8729274578985562, + "grad_norm": 1.0765935182571411, + "learning_rate": 4.174264230547526e-07, + "loss": 0.2748, + "step": 43606 + }, + { + "epoch": 0.8729474764156845, + "grad_norm": 1.0140323638916016, + "learning_rate": 4.1729675917073496e-07, + "loss": 0.236, + "step": 43607 + }, + { + "epoch": 0.8729674949328129, + "grad_norm": 1.0341663360595703, + "learning_rate": 4.17167114551415e-07, + "loss": 0.3216, + "step": 43608 + }, + { + "epoch": 0.8729875134499412, + "grad_norm": 1.0583189725875854, + "learning_rate": 4.1703748919733944e-07, + "loss": 0.2958, + "step": 43609 + }, + { + "epoch": 0.8730075319670696, + "grad_norm": 1.1254276037216187, + "learning_rate": 4.169078831090517e-07, + "loss": 0.2836, + "step": 43610 + }, + { + "epoch": 0.8730275504841979, + "grad_norm": 1.041313886642456, + "learning_rate": 4.1677829628709643e-07, + "loss": 0.241, + "step": 43611 + }, + { + "epoch": 0.8730475690013262, + "grad_norm": 1.2236727476119995, + "learning_rate": 4.166487287320192e-07, + "loss": 0.2993, + "step": 43612 + }, + { + "epoch": 0.8730675875184546, + "grad_norm": 1.3747295141220093, + "learning_rate": 4.1651918044436467e-07, + "loss": 0.3148, + "step": 43613 + }, + { + "epoch": 0.8730876060355829, + "grad_norm": 1.1422959566116333, + "learning_rate": 4.163896514246774e-07, + "loss": 0.3025, + "step": 43614 + }, + { + "epoch": 0.8731076245527113, + "grad_norm": 1.1172436475753784, + "learning_rate": 4.162601416735013e-07, + "loss": 0.2586, + "step": 43615 + }, + { + "epoch": 0.8731276430698396, + "grad_norm": 1.9511966705322266, + "learning_rate": 4.161306511913804e-07, + "loss": 0.7472, + "step": 43616 + }, + { + "epoch": 0.873147661586968, + "grad_norm": 1.0237667560577393, + "learning_rate": 4.1600117997886103e-07, + "loss": 0.2881, + "step": 43617 + }, + { + "epoch": 0.8731676801040963, + "grad_norm": 1.0286097526550293, + "learning_rate": 4.1587172803648547e-07, + "loss": 0.2767, + "step": 43618 + }, + { + "epoch": 0.8731876986212246, + "grad_norm": 1.0759565830230713, + "learning_rate": 4.1574229536479936e-07, + "loss": 0.2922, + "step": 43619 + }, + { + "epoch": 0.873207717138353, + "grad_norm": 1.203782081604004, + "learning_rate": 4.156128819643446e-07, + "loss": 0.316, + "step": 43620 + }, + { + "epoch": 0.8732277356554813, + "grad_norm": 1.136674404144287, + "learning_rate": 4.1548348783566673e-07, + "loss": 0.2402, + "step": 43621 + }, + { + "epoch": 0.8732477541726097, + "grad_norm": 1.2050508260726929, + "learning_rate": 4.153541129793109e-07, + "loss": 0.3118, + "step": 43622 + }, + { + "epoch": 0.873267772689738, + "grad_norm": 1.1982613801956177, + "learning_rate": 4.152247573958196e-07, + "loss": 0.2954, + "step": 43623 + }, + { + "epoch": 0.8732877912068664, + "grad_norm": 1.0728381872177124, + "learning_rate": 4.1509542108573717e-07, + "loss": 0.267, + "step": 43624 + }, + { + "epoch": 0.8733078097239947, + "grad_norm": 1.1735891103744507, + "learning_rate": 4.149661040496056e-07, + "loss": 0.2878, + "step": 43625 + }, + { + "epoch": 0.8733278282411231, + "grad_norm": 1.2517004013061523, + "learning_rate": 4.1483680628797043e-07, + "loss": 0.2645, + "step": 43626 + }, + { + "epoch": 0.8733478467582514, + "grad_norm": 1.062541127204895, + "learning_rate": 4.1470752780137513e-07, + "loss": 0.2717, + "step": 43627 + }, + { + "epoch": 0.8733678652753797, + "grad_norm": 1.0692052841186523, + "learning_rate": 4.1457826859036266e-07, + "loss": 0.2774, + "step": 43628 + }, + { + "epoch": 0.8733878837925081, + "grad_norm": 1.2439206838607788, + "learning_rate": 4.144490286554748e-07, + "loss": 0.3401, + "step": 43629 + }, + { + "epoch": 0.8734079023096364, + "grad_norm": 1.1291797161102295, + "learning_rate": 4.143198079972577e-07, + "loss": 0.2843, + "step": 43630 + }, + { + "epoch": 0.8734279208267648, + "grad_norm": 1.0985009670257568, + "learning_rate": 4.1419060661625374e-07, + "loss": 0.3039, + "step": 43631 + }, + { + "epoch": 0.8734479393438931, + "grad_norm": 1.0519282817840576, + "learning_rate": 4.140614245130048e-07, + "loss": 0.3029, + "step": 43632 + }, + { + "epoch": 0.8734679578610215, + "grad_norm": 1.7912235260009766, + "learning_rate": 4.1393226168805423e-07, + "loss": 0.7459, + "step": 43633 + }, + { + "epoch": 0.8734879763781498, + "grad_norm": 1.0756711959838867, + "learning_rate": 4.138031181419455e-07, + "loss": 0.2801, + "step": 43634 + }, + { + "epoch": 0.8735079948952781, + "grad_norm": 1.1340869665145874, + "learning_rate": 4.136739938752227e-07, + "loss": 0.2981, + "step": 43635 + }, + { + "epoch": 0.8735280134124065, + "grad_norm": 1.1250317096710205, + "learning_rate": 4.135448888884269e-07, + "loss": 0.247, + "step": 43636 + }, + { + "epoch": 0.8735480319295348, + "grad_norm": 1.191375494003296, + "learning_rate": 4.1341580318210115e-07, + "loss": 0.3606, + "step": 43637 + }, + { + "epoch": 0.8735680504466632, + "grad_norm": 1.0978763103485107, + "learning_rate": 4.1328673675678835e-07, + "loss": 0.2876, + "step": 43638 + }, + { + "epoch": 0.8735880689637915, + "grad_norm": 1.070329189300537, + "learning_rate": 4.1315768961303126e-07, + "loss": 0.2351, + "step": 43639 + }, + { + "epoch": 0.8736080874809199, + "grad_norm": 1.042517066001892, + "learning_rate": 4.1302866175137233e-07, + "loss": 0.2926, + "step": 43640 + }, + { + "epoch": 0.8736281059980482, + "grad_norm": 1.2609221935272217, + "learning_rate": 4.128996531723534e-07, + "loss": 0.2991, + "step": 43641 + }, + { + "epoch": 0.8736481245151765, + "grad_norm": 1.0567235946655273, + "learning_rate": 4.1277066387651666e-07, + "loss": 0.295, + "step": 43642 + }, + { + "epoch": 0.8736681430323049, + "grad_norm": 1.1353827714920044, + "learning_rate": 4.1264169386440567e-07, + "loss": 0.2994, + "step": 43643 + }, + { + "epoch": 0.8736881615494332, + "grad_norm": 1.1128208637237549, + "learning_rate": 4.125127431365622e-07, + "loss": 0.2761, + "step": 43644 + }, + { + "epoch": 0.8737081800665616, + "grad_norm": 1.1671987771987915, + "learning_rate": 4.123838116935275e-07, + "loss": 0.2924, + "step": 43645 + }, + { + "epoch": 0.8737281985836899, + "grad_norm": 1.2083758115768433, + "learning_rate": 4.122548995358433e-07, + "loss": 0.3565, + "step": 43646 + }, + { + "epoch": 0.8737482171008183, + "grad_norm": 1.0208503007888794, + "learning_rate": 4.12126006664052e-07, + "loss": 0.3054, + "step": 43647 + }, + { + "epoch": 0.8737682356179466, + "grad_norm": 1.1190364360809326, + "learning_rate": 4.1199713307869647e-07, + "loss": 0.2818, + "step": 43648 + }, + { + "epoch": 0.873788254135075, + "grad_norm": 1.122189998626709, + "learning_rate": 4.11868278780318e-07, + "loss": 0.2856, + "step": 43649 + }, + { + "epoch": 0.8738082726522033, + "grad_norm": 1.1466537714004517, + "learning_rate": 4.1173944376945775e-07, + "loss": 0.2928, + "step": 43650 + }, + { + "epoch": 0.8738282911693316, + "grad_norm": 1.968129277229309, + "learning_rate": 4.116106280466564e-07, + "loss": 0.8278, + "step": 43651 + }, + { + "epoch": 0.87384830968646, + "grad_norm": 1.1186578273773193, + "learning_rate": 4.114818316124575e-07, + "loss": 0.3138, + "step": 43652 + }, + { + "epoch": 0.8738683282035883, + "grad_norm": 1.0389959812164307, + "learning_rate": 4.1135305446740157e-07, + "loss": 0.299, + "step": 43653 + }, + { + "epoch": 0.8738883467207167, + "grad_norm": 1.9655512571334839, + "learning_rate": 4.1122429661203003e-07, + "loss": 0.7708, + "step": 43654 + }, + { + "epoch": 0.873908365237845, + "grad_norm": 1.1453717947006226, + "learning_rate": 4.1109555804688396e-07, + "loss": 0.343, + "step": 43655 + }, + { + "epoch": 0.8739283837549734, + "grad_norm": 1.2770143747329712, + "learning_rate": 4.109668387725035e-07, + "loss": 0.2944, + "step": 43656 + }, + { + "epoch": 0.8739484022721017, + "grad_norm": 1.0572923421859741, + "learning_rate": 4.108381387894317e-07, + "loss": 0.2594, + "step": 43657 + }, + { + "epoch": 0.87396842078923, + "grad_norm": 1.1072304248809814, + "learning_rate": 4.1070945809820907e-07, + "loss": 0.3121, + "step": 43658 + }, + { + "epoch": 0.8739884393063584, + "grad_norm": 1.0828559398651123, + "learning_rate": 4.1058079669937524e-07, + "loss": 0.2535, + "step": 43659 + }, + { + "epoch": 0.8740084578234867, + "grad_norm": 1.1714333295822144, + "learning_rate": 4.1045215459347253e-07, + "loss": 0.3023, + "step": 43660 + }, + { + "epoch": 0.8740284763406151, + "grad_norm": 1.1408030986785889, + "learning_rate": 4.1032353178104165e-07, + "loss": 0.3099, + "step": 43661 + }, + { + "epoch": 0.8740484948577434, + "grad_norm": 1.1814464330673218, + "learning_rate": 4.101949282626233e-07, + "loss": 0.2871, + "step": 43662 + }, + { + "epoch": 0.8740685133748718, + "grad_norm": 1.0430141687393188, + "learning_rate": 4.1006634403875757e-07, + "loss": 0.2357, + "step": 43663 + }, + { + "epoch": 0.8740885318920001, + "grad_norm": 0.9954050183296204, + "learning_rate": 4.09937779109984e-07, + "loss": 0.2591, + "step": 43664 + }, + { + "epoch": 0.8741085504091285, + "grad_norm": 1.863113284111023, + "learning_rate": 4.098092334768455e-07, + "loss": 0.7682, + "step": 43665 + }, + { + "epoch": 0.8741285689262568, + "grad_norm": 1.0908981561660767, + "learning_rate": 4.0968070713988063e-07, + "loss": 0.2684, + "step": 43666 + }, + { + "epoch": 0.8741485874433851, + "grad_norm": 1.0729581117630005, + "learning_rate": 4.095522000996305e-07, + "loss": 0.2729, + "step": 43667 + }, + { + "epoch": 0.8741686059605135, + "grad_norm": 1.1084277629852295, + "learning_rate": 4.0942371235663527e-07, + "loss": 0.3064, + "step": 43668 + }, + { + "epoch": 0.8741886244776418, + "grad_norm": 1.1747409105300903, + "learning_rate": 4.092952439114339e-07, + "loss": 0.2939, + "step": 43669 + }, + { + "epoch": 0.8742086429947702, + "grad_norm": 1.2014198303222656, + "learning_rate": 4.0916679476456833e-07, + "loss": 0.3231, + "step": 43670 + }, + { + "epoch": 0.8742286615118985, + "grad_norm": 1.0677675008773804, + "learning_rate": 4.0903836491657743e-07, + "loss": 0.311, + "step": 43671 + }, + { + "epoch": 0.8742486800290269, + "grad_norm": 1.086115837097168, + "learning_rate": 4.089099543680003e-07, + "loss": 0.2698, + "step": 43672 + }, + { + "epoch": 0.8742686985461552, + "grad_norm": 1.0786319971084595, + "learning_rate": 4.0878156311937753e-07, + "loss": 0.2968, + "step": 43673 + }, + { + "epoch": 0.8742887170632835, + "grad_norm": 1.1945998668670654, + "learning_rate": 4.0865319117125045e-07, + "loss": 0.2697, + "step": 43674 + }, + { + "epoch": 0.8743087355804119, + "grad_norm": 1.3139972686767578, + "learning_rate": 4.0852483852415694e-07, + "loss": 0.3274, + "step": 43675 + }, + { + "epoch": 0.8743287540975402, + "grad_norm": 1.1706562042236328, + "learning_rate": 4.0839650517863703e-07, + "loss": 0.2785, + "step": 43676 + }, + { + "epoch": 0.8743487726146686, + "grad_norm": 0.9850627779960632, + "learning_rate": 4.0826819113522986e-07, + "loss": 0.3093, + "step": 43677 + }, + { + "epoch": 0.8743687911317969, + "grad_norm": 2.055305004119873, + "learning_rate": 4.0813989639447435e-07, + "loss": 0.7479, + "step": 43678 + }, + { + "epoch": 0.8743888096489253, + "grad_norm": 1.166588544845581, + "learning_rate": 4.080116209569118e-07, + "loss": 0.2625, + "step": 43679 + }, + { + "epoch": 0.8744088281660536, + "grad_norm": 1.0421186685562134, + "learning_rate": 4.0788336482307954e-07, + "loss": 0.2247, + "step": 43680 + }, + { + "epoch": 0.874428846683182, + "grad_norm": 1.209985613822937, + "learning_rate": 4.077551279935177e-07, + "loss": 0.3002, + "step": 43681 + }, + { + "epoch": 0.8744488652003103, + "grad_norm": 1.0587435960769653, + "learning_rate": 4.0762691046876415e-07, + "loss": 0.2812, + "step": 43682 + }, + { + "epoch": 0.8744688837174386, + "grad_norm": 1.1384090185165405, + "learning_rate": 4.074987122493601e-07, + "loss": 0.3261, + "step": 43683 + }, + { + "epoch": 0.874488902234567, + "grad_norm": 1.1265156269073486, + "learning_rate": 4.073705333358424e-07, + "loss": 0.2572, + "step": 43684 + }, + { + "epoch": 0.8745089207516953, + "grad_norm": 1.0474739074707031, + "learning_rate": 4.072423737287501e-07, + "loss": 0.2931, + "step": 43685 + }, + { + "epoch": 0.8745289392688237, + "grad_norm": 1.2119132280349731, + "learning_rate": 4.0711423342862324e-07, + "loss": 0.3258, + "step": 43686 + }, + { + "epoch": 0.874548957785952, + "grad_norm": 1.0625132322311401, + "learning_rate": 4.069861124359992e-07, + "loss": 0.3003, + "step": 43687 + }, + { + "epoch": 0.8745689763030804, + "grad_norm": 1.2860487699508667, + "learning_rate": 4.068580107514175e-07, + "loss": 0.2824, + "step": 43688 + }, + { + "epoch": 0.8745889948202087, + "grad_norm": 1.0961065292358398, + "learning_rate": 4.067299283754167e-07, + "loss": 0.2824, + "step": 43689 + }, + { + "epoch": 0.874609013337337, + "grad_norm": 1.1791175603866577, + "learning_rate": 4.0660186530853465e-07, + "loss": 0.3314, + "step": 43690 + }, + { + "epoch": 0.8746290318544654, + "grad_norm": 1.1104909181594849, + "learning_rate": 4.0647382155130866e-07, + "loss": 0.3552, + "step": 43691 + }, + { + "epoch": 0.8746490503715937, + "grad_norm": 1.1892602443695068, + "learning_rate": 4.063457971042795e-07, + "loss": 0.3207, + "step": 43692 + }, + { + "epoch": 0.8746690688887221, + "grad_norm": 1.2486735582351685, + "learning_rate": 4.0621779196798383e-07, + "loss": 0.3229, + "step": 43693 + }, + { + "epoch": 0.8746890874058504, + "grad_norm": 1.0929038524627686, + "learning_rate": 4.0608980614295967e-07, + "loss": 0.2826, + "step": 43694 + }, + { + "epoch": 0.8747091059229788, + "grad_norm": 1.080583930015564, + "learning_rate": 4.0596183962974487e-07, + "loss": 0.2643, + "step": 43695 + }, + { + "epoch": 0.8747291244401071, + "grad_norm": 1.1712466478347778, + "learning_rate": 4.0583389242887905e-07, + "loss": 0.3003, + "step": 43696 + }, + { + "epoch": 0.8747491429572355, + "grad_norm": 1.9457101821899414, + "learning_rate": 4.057059645408984e-07, + "loss": 0.8141, + "step": 43697 + }, + { + "epoch": 0.8747691614743638, + "grad_norm": 1.1838805675506592, + "learning_rate": 4.0557805596634023e-07, + "loss": 0.3091, + "step": 43698 + }, + { + "epoch": 0.8747891799914921, + "grad_norm": 1.2112184762954712, + "learning_rate": 4.0545016670574424e-07, + "loss": 0.2849, + "step": 43699 + }, + { + "epoch": 0.8748091985086205, + "grad_norm": 1.137460470199585, + "learning_rate": 4.05322296759646e-07, + "loss": 0.3068, + "step": 43700 + }, + { + "epoch": 0.8748292170257488, + "grad_norm": 1.0803085565567017, + "learning_rate": 4.05194446128585e-07, + "loss": 0.2893, + "step": 43701 + }, + { + "epoch": 0.8748492355428772, + "grad_norm": 1.0103501081466675, + "learning_rate": 4.0506661481309775e-07, + "loss": 0.2752, + "step": 43702 + }, + { + "epoch": 0.8748692540600055, + "grad_norm": 1.155433177947998, + "learning_rate": 4.0493880281372186e-07, + "loss": 0.275, + "step": 43703 + }, + { + "epoch": 0.8748892725771339, + "grad_norm": 1.4014408588409424, + "learning_rate": 4.048110101309932e-07, + "loss": 0.2933, + "step": 43704 + }, + { + "epoch": 0.8749092910942622, + "grad_norm": 1.9631049633026123, + "learning_rate": 4.0468323676545074e-07, + "loss": 0.7825, + "step": 43705 + }, + { + "epoch": 0.8749293096113905, + "grad_norm": 1.0722289085388184, + "learning_rate": 4.0455548271763123e-07, + "loss": 0.2881, + "step": 43706 + }, + { + "epoch": 0.8749493281285189, + "grad_norm": 1.1285241842269897, + "learning_rate": 4.0442774798807204e-07, + "loss": 0.3175, + "step": 43707 + }, + { + "epoch": 0.8749693466456472, + "grad_norm": 1.1901588439941406, + "learning_rate": 4.0430003257730886e-07, + "loss": 0.2979, + "step": 43708 + }, + { + "epoch": 0.8749893651627756, + "grad_norm": 1.2149436473846436, + "learning_rate": 4.0417233648587906e-07, + "loss": 0.2774, + "step": 43709 + }, + { + "epoch": 0.8750093836799039, + "grad_norm": 1.1315054893493652, + "learning_rate": 4.040446597143205e-07, + "loss": 0.2822, + "step": 43710 + }, + { + "epoch": 0.8750294021970323, + "grad_norm": 2.2243216037750244, + "learning_rate": 4.0391700226316775e-07, + "loss": 0.7272, + "step": 43711 + }, + { + "epoch": 0.8750494207141606, + "grad_norm": 1.1246086359024048, + "learning_rate": 4.037893641329604e-07, + "loss": 0.2671, + "step": 43712 + }, + { + "epoch": 0.875069439231289, + "grad_norm": 1.2296030521392822, + "learning_rate": 4.0366174532423196e-07, + "loss": 0.2512, + "step": 43713 + }, + { + "epoch": 0.8750894577484173, + "grad_norm": 1.2829022407531738, + "learning_rate": 4.0353414583752195e-07, + "loss": 0.3403, + "step": 43714 + }, + { + "epoch": 0.8751094762655456, + "grad_norm": 1.4673402309417725, + "learning_rate": 4.034065656733643e-07, + "loss": 0.3034, + "step": 43715 + }, + { + "epoch": 0.875129494782674, + "grad_norm": 1.125627875328064, + "learning_rate": 4.032790048322971e-07, + "loss": 0.3266, + "step": 43716 + }, + { + "epoch": 0.8751495132998023, + "grad_norm": 1.118312954902649, + "learning_rate": 4.0315146331485535e-07, + "loss": 0.3142, + "step": 43717 + }, + { + "epoch": 0.8751695318169307, + "grad_norm": 1.1564135551452637, + "learning_rate": 4.030239411215747e-07, + "loss": 0.2988, + "step": 43718 + }, + { + "epoch": 0.875189550334059, + "grad_norm": 1.1697725057601929, + "learning_rate": 4.0289643825299316e-07, + "loss": 0.3362, + "step": 43719 + }, + { + "epoch": 0.8752095688511874, + "grad_norm": 1.2156542539596558, + "learning_rate": 4.027689547096458e-07, + "loss": 0.316, + "step": 43720 + }, + { + "epoch": 0.8752295873683157, + "grad_norm": 1.1913131475448608, + "learning_rate": 4.0264149049206835e-07, + "loss": 0.3264, + "step": 43721 + }, + { + "epoch": 0.875249605885444, + "grad_norm": 1.1737428903579712, + "learning_rate": 4.025140456007953e-07, + "loss": 0.2786, + "step": 43722 + }, + { + "epoch": 0.8752696244025724, + "grad_norm": 1.792447566986084, + "learning_rate": 4.023866200363652e-07, + "loss": 0.7372, + "step": 43723 + }, + { + "epoch": 0.8752896429197007, + "grad_norm": 1.2244523763656616, + "learning_rate": 4.0225921379931144e-07, + "loss": 0.2754, + "step": 43724 + }, + { + "epoch": 0.8753096614368291, + "grad_norm": 1.218828558921814, + "learning_rate": 4.021318268901714e-07, + "loss": 0.2882, + "step": 43725 + }, + { + "epoch": 0.8753296799539574, + "grad_norm": 1.965551733970642, + "learning_rate": 4.020044593094785e-07, + "loss": 0.6847, + "step": 43726 + }, + { + "epoch": 0.8753496984710858, + "grad_norm": 1.0843247175216675, + "learning_rate": 4.018771110577707e-07, + "loss": 0.2599, + "step": 43727 + }, + { + "epoch": 0.8753697169882141, + "grad_norm": 1.1234798431396484, + "learning_rate": 4.0174978213558145e-07, + "loss": 0.301, + "step": 43728 + }, + { + "epoch": 0.8753897355053425, + "grad_norm": 0.9845779538154602, + "learning_rate": 4.01622472543447e-07, + "loss": 0.2646, + "step": 43729 + }, + { + "epoch": 0.8754097540224708, + "grad_norm": 1.0964446067810059, + "learning_rate": 4.0149518228190186e-07, + "loss": 0.3018, + "step": 43730 + }, + { + "epoch": 0.8754297725395991, + "grad_norm": 1.0994101762771606, + "learning_rate": 4.0136791135148066e-07, + "loss": 0.3397, + "step": 43731 + }, + { + "epoch": 0.8754497910567275, + "grad_norm": 1.112939476966858, + "learning_rate": 4.012406597527202e-07, + "loss": 0.3231, + "step": 43732 + }, + { + "epoch": 0.8754698095738558, + "grad_norm": 1.2473191022872925, + "learning_rate": 4.011134274861539e-07, + "loss": 0.275, + "step": 43733 + }, + { + "epoch": 0.8754898280909842, + "grad_norm": 1.1884498596191406, + "learning_rate": 4.009862145523169e-07, + "loss": 0.284, + "step": 43734 + }, + { + "epoch": 0.8755098466081125, + "grad_norm": 1.0890250205993652, + "learning_rate": 4.008590209517438e-07, + "loss": 0.2746, + "step": 43735 + }, + { + "epoch": 0.8755298651252409, + "grad_norm": 1.3757916688919067, + "learning_rate": 4.0073184668497023e-07, + "loss": 0.2838, + "step": 43736 + }, + { + "epoch": 0.8755498836423692, + "grad_norm": 1.1653012037277222, + "learning_rate": 4.006046917525297e-07, + "loss": 0.2996, + "step": 43737 + }, + { + "epoch": 0.8755699021594975, + "grad_norm": 1.1443946361541748, + "learning_rate": 4.004775561549573e-07, + "loss": 0.3169, + "step": 43738 + }, + { + "epoch": 0.8755899206766259, + "grad_norm": 1.196825623512268, + "learning_rate": 4.003504398927882e-07, + "loss": 0.3176, + "step": 43739 + }, + { + "epoch": 0.8756099391937542, + "grad_norm": 1.9190369844436646, + "learning_rate": 4.002233429665547e-07, + "loss": 0.7593, + "step": 43740 + }, + { + "epoch": 0.8756299577108826, + "grad_norm": 1.1111608743667603, + "learning_rate": 4.000962653767937e-07, + "loss": 0.3297, + "step": 43741 + }, + { + "epoch": 0.8756499762280109, + "grad_norm": 1.059239149093628, + "learning_rate": 3.999692071240374e-07, + "loss": 0.265, + "step": 43742 + }, + { + "epoch": 0.8756699947451393, + "grad_norm": 1.0892972946166992, + "learning_rate": 3.9984216820882103e-07, + "loss": 0.2789, + "step": 43743 + }, + { + "epoch": 0.8756900132622676, + "grad_norm": 1.1663165092468262, + "learning_rate": 3.997151486316775e-07, + "loss": 0.2721, + "step": 43744 + }, + { + "epoch": 0.875710031779396, + "grad_norm": 1.8738425970077515, + "learning_rate": 3.995881483931424e-07, + "loss": 0.7789, + "step": 43745 + }, + { + "epoch": 0.8757300502965243, + "grad_norm": 1.2078332901000977, + "learning_rate": 3.994611674937482e-07, + "loss": 0.287, + "step": 43746 + }, + { + "epoch": 0.8757500688136526, + "grad_norm": 1.7872895002365112, + "learning_rate": 3.993342059340294e-07, + "loss": 0.7519, + "step": 43747 + }, + { + "epoch": 0.875770087330781, + "grad_norm": 1.2209888696670532, + "learning_rate": 3.992072637145189e-07, + "loss": 0.3135, + "step": 43748 + }, + { + "epoch": 0.8757901058479093, + "grad_norm": 1.176598072052002, + "learning_rate": 3.9908034083575186e-07, + "loss": 0.275, + "step": 43749 + }, + { + "epoch": 0.8758101243650377, + "grad_norm": 1.1846078634262085, + "learning_rate": 3.9895343729825955e-07, + "loss": 0.3154, + "step": 43750 + }, + { + "epoch": 0.875830142882166, + "grad_norm": 1.8277311325073242, + "learning_rate": 3.988265531025781e-07, + "loss": 0.8284, + "step": 43751 + }, + { + "epoch": 0.8758501613992944, + "grad_norm": 1.000972867012024, + "learning_rate": 3.9869968824924e-07, + "loss": 0.2309, + "step": 43752 + }, + { + "epoch": 0.8758701799164227, + "grad_norm": 1.1965402364730835, + "learning_rate": 3.985728427387764e-07, + "loss": 0.3377, + "step": 43753 + }, + { + "epoch": 0.875890198433551, + "grad_norm": 1.038507103919983, + "learning_rate": 3.9844601657172413e-07, + "loss": 0.2737, + "step": 43754 + }, + { + "epoch": 0.8759102169506794, + "grad_norm": 1.1251970529556274, + "learning_rate": 3.9831920974861383e-07, + "loss": 0.3132, + "step": 43755 + }, + { + "epoch": 0.8759302354678077, + "grad_norm": 1.1236791610717773, + "learning_rate": 3.9819242226997957e-07, + "loss": 0.3119, + "step": 43756 + }, + { + "epoch": 0.8759502539849361, + "grad_norm": 1.0863912105560303, + "learning_rate": 3.980656541363531e-07, + "loss": 0.2921, + "step": 43757 + }, + { + "epoch": 0.8759702725020644, + "grad_norm": 2.0582211017608643, + "learning_rate": 3.9793890534826964e-07, + "loss": 0.7755, + "step": 43758 + }, + { + "epoch": 0.8759902910191928, + "grad_norm": 1.3142322301864624, + "learning_rate": 3.978121759062603e-07, + "loss": 0.277, + "step": 43759 + }, + { + "epoch": 0.8760103095363211, + "grad_norm": 1.1512733697891235, + "learning_rate": 3.976854658108581e-07, + "loss": 0.3131, + "step": 43760 + }, + { + "epoch": 0.8760303280534495, + "grad_norm": 1.2836180925369263, + "learning_rate": 3.9755877506259475e-07, + "loss": 0.2975, + "step": 43761 + }, + { + "epoch": 0.8760503465705778, + "grad_norm": 1.062869906425476, + "learning_rate": 3.974321036620049e-07, + "loss": 0.3234, + "step": 43762 + }, + { + "epoch": 0.8760703650877061, + "grad_norm": 1.2287113666534424, + "learning_rate": 3.973054516096192e-07, + "loss": 0.3245, + "step": 43763 + }, + { + "epoch": 0.8760903836048345, + "grad_norm": 1.1901743412017822, + "learning_rate": 3.9717881890597163e-07, + "loss": 0.2797, + "step": 43764 + }, + { + "epoch": 0.8761104021219628, + "grad_norm": 1.049163579940796, + "learning_rate": 3.970522055515935e-07, + "loss": 0.2807, + "step": 43765 + }, + { + "epoch": 0.8761304206390912, + "grad_norm": 1.2102302312850952, + "learning_rate": 3.9692561154701717e-07, + "loss": 0.292, + "step": 43766 + }, + { + "epoch": 0.8761504391562195, + "grad_norm": 1.4849437475204468, + "learning_rate": 3.967990368927749e-07, + "loss": 0.2373, + "step": 43767 + }, + { + "epoch": 0.8761704576733479, + "grad_norm": 1.1716620922088623, + "learning_rate": 3.9667248158939964e-07, + "loss": 0.3299, + "step": 43768 + }, + { + "epoch": 0.8761904761904762, + "grad_norm": 1.0952374935150146, + "learning_rate": 3.965459456374221e-07, + "loss": 0.2719, + "step": 43769 + }, + { + "epoch": 0.8762104947076045, + "grad_norm": 1.0523453950881958, + "learning_rate": 3.9641942903737516e-07, + "loss": 0.3141, + "step": 43770 + }, + { + "epoch": 0.8762305132247329, + "grad_norm": 1.1914303302764893, + "learning_rate": 3.9629293178978956e-07, + "loss": 0.2608, + "step": 43771 + }, + { + "epoch": 0.8762505317418612, + "grad_norm": 1.1648370027542114, + "learning_rate": 3.9616645389519813e-07, + "loss": 0.3015, + "step": 43772 + }, + { + "epoch": 0.8762705502589896, + "grad_norm": 1.1124062538146973, + "learning_rate": 3.9603999535413217e-07, + "loss": 0.2967, + "step": 43773 + }, + { + "epoch": 0.8762905687761179, + "grad_norm": 1.330215573310852, + "learning_rate": 3.9591355616712234e-07, + "loss": 0.3367, + "step": 43774 + }, + { + "epoch": 0.8763105872932463, + "grad_norm": 1.2701677083969116, + "learning_rate": 3.9578713633470265e-07, + "loss": 0.2644, + "step": 43775 + }, + { + "epoch": 0.8763306058103746, + "grad_norm": 1.2435302734375, + "learning_rate": 3.956607358574016e-07, + "loss": 0.2929, + "step": 43776 + }, + { + "epoch": 0.876350624327503, + "grad_norm": 1.0978147983551025, + "learning_rate": 3.9553435473575263e-07, + "loss": 0.2883, + "step": 43777 + }, + { + "epoch": 0.8763706428446313, + "grad_norm": 1.0317232608795166, + "learning_rate": 3.9540799297028645e-07, + "loss": 0.2976, + "step": 43778 + }, + { + "epoch": 0.8763906613617596, + "grad_norm": 1.0935484170913696, + "learning_rate": 3.9528165056153475e-07, + "loss": 0.3201, + "step": 43779 + }, + { + "epoch": 0.876410679878888, + "grad_norm": 1.0348527431488037, + "learning_rate": 3.9515532751002674e-07, + "loss": 0.2998, + "step": 43780 + }, + { + "epoch": 0.8764306983960163, + "grad_norm": 1.0725115537643433, + "learning_rate": 3.9502902381629514e-07, + "loss": 0.3401, + "step": 43781 + }, + { + "epoch": 0.8764507169131447, + "grad_norm": 1.0322067737579346, + "learning_rate": 3.949027394808713e-07, + "loss": 0.2729, + "step": 43782 + }, + { + "epoch": 0.876470735430273, + "grad_norm": 1.2043447494506836, + "learning_rate": 3.947764745042848e-07, + "loss": 0.3183, + "step": 43783 + }, + { + "epoch": 0.8764907539474014, + "grad_norm": 1.94242262840271, + "learning_rate": 3.946502288870663e-07, + "loss": 0.6851, + "step": 43784 + }, + { + "epoch": 0.8765107724645297, + "grad_norm": 1.1468262672424316, + "learning_rate": 3.945240026297481e-07, + "loss": 0.2352, + "step": 43785 + }, + { + "epoch": 0.876530790981658, + "grad_norm": 1.601962924003601, + "learning_rate": 3.943977957328599e-07, + "loss": 0.2869, + "step": 43786 + }, + { + "epoch": 0.8765508094987864, + "grad_norm": 1.2668272256851196, + "learning_rate": 3.942716081969311e-07, + "loss": 0.2879, + "step": 43787 + }, + { + "epoch": 0.8765708280159147, + "grad_norm": 1.05027437210083, + "learning_rate": 3.941454400224942e-07, + "loss": 0.3099, + "step": 43788 + }, + { + "epoch": 0.8765908465330431, + "grad_norm": 1.0358320474624634, + "learning_rate": 3.940192912100782e-07, + "loss": 0.2618, + "step": 43789 + }, + { + "epoch": 0.8766108650501714, + "grad_norm": 1.2847522497177124, + "learning_rate": 3.938931617602143e-07, + "loss": 0.3156, + "step": 43790 + }, + { + "epoch": 0.8766308835672998, + "grad_norm": 1.2346004247665405, + "learning_rate": 3.9376705167343265e-07, + "loss": 0.322, + "step": 43791 + }, + { + "epoch": 0.8766509020844281, + "grad_norm": 1.2693099975585938, + "learning_rate": 3.936409609502628e-07, + "loss": 0.2814, + "step": 43792 + }, + { + "epoch": 0.8766709206015565, + "grad_norm": 1.0692552328109741, + "learning_rate": 3.9351488959123443e-07, + "loss": 0.2946, + "step": 43793 + }, + { + "epoch": 0.8766909391186848, + "grad_norm": 1.1379691362380981, + "learning_rate": 3.933888375968786e-07, + "loss": 0.264, + "step": 43794 + }, + { + "epoch": 0.8767109576358131, + "grad_norm": 1.1010388135910034, + "learning_rate": 3.9326280496772505e-07, + "loss": 0.3089, + "step": 43795 + }, + { + "epoch": 0.8767309761529415, + "grad_norm": 1.1980751752853394, + "learning_rate": 3.931367917043033e-07, + "loss": 0.2706, + "step": 43796 + }, + { + "epoch": 0.8767509946700698, + "grad_norm": 1.2060846090316772, + "learning_rate": 3.9301079780714233e-07, + "loss": 0.2884, + "step": 43797 + }, + { + "epoch": 0.8767710131871982, + "grad_norm": 1.04645574092865, + "learning_rate": 3.928848232767729e-07, + "loss": 0.2742, + "step": 43798 + }, + { + "epoch": 0.8767910317043265, + "grad_norm": 1.1892462968826294, + "learning_rate": 3.9275886811372455e-07, + "loss": 0.2929, + "step": 43799 + }, + { + "epoch": 0.8768110502214549, + "grad_norm": 1.0676844120025635, + "learning_rate": 3.926329323185257e-07, + "loss": 0.2923, + "step": 43800 + }, + { + "epoch": 0.8768310687385832, + "grad_norm": 1.1804678440093994, + "learning_rate": 3.9250701589170714e-07, + "loss": 0.3235, + "step": 43801 + }, + { + "epoch": 0.8768510872557115, + "grad_norm": 1.1186467409133911, + "learning_rate": 3.923811188337967e-07, + "loss": 0.3316, + "step": 43802 + }, + { + "epoch": 0.8768711057728399, + "grad_norm": 1.4934048652648926, + "learning_rate": 3.922552411453251e-07, + "loss": 0.3371, + "step": 43803 + }, + { + "epoch": 0.8768911242899682, + "grad_norm": 1.03628671169281, + "learning_rate": 3.9212938282682135e-07, + "loss": 0.2857, + "step": 43804 + }, + { + "epoch": 0.8769111428070966, + "grad_norm": 0.953217089176178, + "learning_rate": 3.920035438788139e-07, + "loss": 0.2528, + "step": 43805 + }, + { + "epoch": 0.8769311613242249, + "grad_norm": 1.218187689781189, + "learning_rate": 3.918777243018307e-07, + "loss": 0.2883, + "step": 43806 + }, + { + "epoch": 0.8769511798413533, + "grad_norm": 1.0565862655639648, + "learning_rate": 3.917519240964029e-07, + "loss": 0.2859, + "step": 43807 + }, + { + "epoch": 0.8769711983584816, + "grad_norm": 1.0512419939041138, + "learning_rate": 3.916261432630586e-07, + "loss": 0.283, + "step": 43808 + }, + { + "epoch": 0.87699121687561, + "grad_norm": 1.7397087812423706, + "learning_rate": 3.9150038180232606e-07, + "loss": 0.708, + "step": 43809 + }, + { + "epoch": 0.8770112353927383, + "grad_norm": 1.038722276687622, + "learning_rate": 3.913746397147333e-07, + "loss": 0.3281, + "step": 43810 + }, + { + "epoch": 0.8770312539098666, + "grad_norm": 1.4286800622940063, + "learning_rate": 3.9124891700081045e-07, + "loss": 0.3108, + "step": 43811 + }, + { + "epoch": 0.877051272426995, + "grad_norm": 1.1511749029159546, + "learning_rate": 3.911232136610854e-07, + "loss": 0.3284, + "step": 43812 + }, + { + "epoch": 0.8770712909441233, + "grad_norm": 1.0870901346206665, + "learning_rate": 3.9099752969608604e-07, + "loss": 0.3333, + "step": 43813 + }, + { + "epoch": 0.8770913094612517, + "grad_norm": 1.084768533706665, + "learning_rate": 3.9087186510634143e-07, + "loss": 0.2904, + "step": 43814 + }, + { + "epoch": 0.87711132797838, + "grad_norm": 1.2136446237564087, + "learning_rate": 3.9074621989237893e-07, + "loss": 0.3173, + "step": 43815 + }, + { + "epoch": 0.8771313464955084, + "grad_norm": 1.2432564496994019, + "learning_rate": 3.906205940547286e-07, + "loss": 0.2982, + "step": 43816 + }, + { + "epoch": 0.8771513650126367, + "grad_norm": 1.0437160730361938, + "learning_rate": 3.9049498759391736e-07, + "loss": 0.2875, + "step": 43817 + }, + { + "epoch": 0.877171383529765, + "grad_norm": 1.2985002994537354, + "learning_rate": 3.90369400510473e-07, + "loss": 0.2373, + "step": 43818 + }, + { + "epoch": 0.8771914020468934, + "grad_norm": 1.1603275537490845, + "learning_rate": 3.902438328049224e-07, + "loss": 0.2792, + "step": 43819 + }, + { + "epoch": 0.8772114205640217, + "grad_norm": 1.163995385169983, + "learning_rate": 3.901182844777962e-07, + "loss": 0.3076, + "step": 43820 + }, + { + "epoch": 0.8772314390811501, + "grad_norm": 0.9837761521339417, + "learning_rate": 3.899927555296207e-07, + "loss": 0.2444, + "step": 43821 + }, + { + "epoch": 0.8772514575982784, + "grad_norm": 1.123483419418335, + "learning_rate": 3.898672459609232e-07, + "loss": 0.3308, + "step": 43822 + }, + { + "epoch": 0.8772714761154068, + "grad_norm": 1.2037478685379028, + "learning_rate": 3.8974175577223163e-07, + "loss": 0.3442, + "step": 43823 + }, + { + "epoch": 0.8772914946325351, + "grad_norm": 1.0513700246810913, + "learning_rate": 3.8961628496407277e-07, + "loss": 0.3009, + "step": 43824 + }, + { + "epoch": 0.8773115131496635, + "grad_norm": 1.1291494369506836, + "learning_rate": 3.8949083353697625e-07, + "loss": 0.2832, + "step": 43825 + }, + { + "epoch": 0.8773315316667918, + "grad_norm": 1.1562471389770508, + "learning_rate": 3.893654014914666e-07, + "loss": 0.321, + "step": 43826 + }, + { + "epoch": 0.8773515501839201, + "grad_norm": 1.259597659111023, + "learning_rate": 3.89239988828074e-07, + "loss": 0.3345, + "step": 43827 + }, + { + "epoch": 0.8773715687010485, + "grad_norm": 1.059286117553711, + "learning_rate": 3.89114595547323e-07, + "loss": 0.261, + "step": 43828 + }, + { + "epoch": 0.8773915872181768, + "grad_norm": 1.042852520942688, + "learning_rate": 3.8898922164974264e-07, + "loss": 0.2929, + "step": 43829 + }, + { + "epoch": 0.8774116057353052, + "grad_norm": 1.1189333200454712, + "learning_rate": 3.8886386713585964e-07, + "loss": 0.2599, + "step": 43830 + }, + { + "epoch": 0.8774316242524335, + "grad_norm": 1.1651350259780884, + "learning_rate": 3.887385320062004e-07, + "loss": 0.3294, + "step": 43831 + }, + { + "epoch": 0.8774516427695619, + "grad_norm": 1.0387639999389648, + "learning_rate": 3.886132162612921e-07, + "loss": 0.2679, + "step": 43832 + }, + { + "epoch": 0.8774716612866902, + "grad_norm": 1.7136133909225464, + "learning_rate": 3.8848791990166057e-07, + "loss": 0.7725, + "step": 43833 + }, + { + "epoch": 0.8774916798038185, + "grad_norm": 1.1621661186218262, + "learning_rate": 3.883626429278342e-07, + "loss": 0.2857, + "step": 43834 + }, + { + "epoch": 0.8775116983209469, + "grad_norm": 1.926119089126587, + "learning_rate": 3.882373853403393e-07, + "loss": 0.7628, + "step": 43835 + }, + { + "epoch": 0.8775317168380752, + "grad_norm": 1.0872539281845093, + "learning_rate": 3.8811214713970147e-07, + "loss": 0.2806, + "step": 43836 + }, + { + "epoch": 0.8775517353552036, + "grad_norm": 1.0571757555007935, + "learning_rate": 3.87986928326447e-07, + "loss": 0.3022, + "step": 43837 + }, + { + "epoch": 0.8775717538723319, + "grad_norm": 1.9684724807739258, + "learning_rate": 3.8786172890110383e-07, + "loss": 0.7853, + "step": 43838 + }, + { + "epoch": 0.8775917723894603, + "grad_norm": 1.3741748332977295, + "learning_rate": 3.8773654886419656e-07, + "loss": 0.3025, + "step": 43839 + }, + { + "epoch": 0.8776117909065886, + "grad_norm": 1.0950268507003784, + "learning_rate": 3.87611388216253e-07, + "loss": 0.302, + "step": 43840 + }, + { + "epoch": 0.877631809423717, + "grad_norm": 2.1024179458618164, + "learning_rate": 3.874862469577978e-07, + "loss": 0.7227, + "step": 43841 + }, + { + "epoch": 0.8776518279408453, + "grad_norm": 1.138925552368164, + "learning_rate": 3.8736112508935885e-07, + "loss": 0.2711, + "step": 43842 + }, + { + "epoch": 0.8776718464579736, + "grad_norm": 1.2235928773880005, + "learning_rate": 3.8723602261146074e-07, + "loss": 0.3036, + "step": 43843 + }, + { + "epoch": 0.877691864975102, + "grad_norm": 1.1175251007080078, + "learning_rate": 3.871109395246303e-07, + "loss": 0.2725, + "step": 43844 + }, + { + "epoch": 0.8777118834922303, + "grad_norm": 1.1908934116363525, + "learning_rate": 3.869858758293921e-07, + "loss": 0.3034, + "step": 43845 + }, + { + "epoch": 0.8777319020093587, + "grad_norm": 2.07685923576355, + "learning_rate": 3.8686083152627174e-07, + "loss": 0.6576, + "step": 43846 + }, + { + "epoch": 0.877751920526487, + "grad_norm": 1.1464775800704956, + "learning_rate": 3.8673580661579667e-07, + "loss": 0.2873, + "step": 43847 + }, + { + "epoch": 0.8777719390436154, + "grad_norm": 2.053363800048828, + "learning_rate": 3.86610801098492e-07, + "loss": 0.7796, + "step": 43848 + }, + { + "epoch": 0.8777919575607437, + "grad_norm": 1.0017874240875244, + "learning_rate": 3.864858149748824e-07, + "loss": 0.2576, + "step": 43849 + }, + { + "epoch": 0.877811976077872, + "grad_norm": 1.0622529983520508, + "learning_rate": 3.8636084824549226e-07, + "loss": 0.2908, + "step": 43850 + }, + { + "epoch": 0.8778319945950004, + "grad_norm": 1.0674402713775635, + "learning_rate": 3.8623590091084963e-07, + "loss": 0.2875, + "step": 43851 + }, + { + "epoch": 0.8778520131121287, + "grad_norm": 1.0754703283309937, + "learning_rate": 3.8611097297147737e-07, + "loss": 0.3139, + "step": 43852 + }, + { + "epoch": 0.8778720316292571, + "grad_norm": 1.1922091245651245, + "learning_rate": 3.859860644279023e-07, + "loss": 0.3273, + "step": 43853 + }, + { + "epoch": 0.8778920501463854, + "grad_norm": 1.1292858123779297, + "learning_rate": 3.8586117528064903e-07, + "loss": 0.2862, + "step": 43854 + }, + { + "epoch": 0.8779120686635138, + "grad_norm": 1.0564881563186646, + "learning_rate": 3.8573630553024157e-07, + "loss": 0.3136, + "step": 43855 + }, + { + "epoch": 0.8779320871806421, + "grad_norm": 1.159408688545227, + "learning_rate": 3.8561145517720666e-07, + "loss": 0.3364, + "step": 43856 + }, + { + "epoch": 0.8779521056977705, + "grad_norm": 1.002853274345398, + "learning_rate": 3.854866242220678e-07, + "loss": 0.3046, + "step": 43857 + }, + { + "epoch": 0.8779721242148988, + "grad_norm": 1.948907732963562, + "learning_rate": 3.853618126653502e-07, + "loss": 0.761, + "step": 43858 + }, + { + "epoch": 0.8779921427320271, + "grad_norm": 1.1392083168029785, + "learning_rate": 3.8523702050757784e-07, + "loss": 0.3229, + "step": 43859 + }, + { + "epoch": 0.8780121612491555, + "grad_norm": 1.12760591506958, + "learning_rate": 3.851122477492769e-07, + "loss": 0.307, + "step": 43860 + }, + { + "epoch": 0.8780321797662838, + "grad_norm": 1.1256829500198364, + "learning_rate": 3.8498749439097095e-07, + "loss": 0.3004, + "step": 43861 + }, + { + "epoch": 0.8780521982834122, + "grad_norm": 1.1585321426391602, + "learning_rate": 3.8486276043318395e-07, + "loss": 0.2627, + "step": 43862 + }, + { + "epoch": 0.8780722168005405, + "grad_norm": 1.8191072940826416, + "learning_rate": 3.847380458764399e-07, + "loss": 0.7402, + "step": 43863 + }, + { + "epoch": 0.8780922353176689, + "grad_norm": 1.0516785383224487, + "learning_rate": 3.8461335072126515e-07, + "loss": 0.3001, + "step": 43864 + }, + { + "epoch": 0.8781122538347972, + "grad_norm": 1.130510926246643, + "learning_rate": 3.8448867496818145e-07, + "loss": 0.3045, + "step": 43865 + }, + { + "epoch": 0.8781322723519255, + "grad_norm": 1.2537270784378052, + "learning_rate": 3.8436401861771556e-07, + "loss": 0.2698, + "step": 43866 + }, + { + "epoch": 0.8781522908690539, + "grad_norm": 1.0332521200180054, + "learning_rate": 3.8423938167038934e-07, + "loss": 0.289, + "step": 43867 + }, + { + "epoch": 0.8781723093861822, + "grad_norm": 1.1189287900924683, + "learning_rate": 3.8411476412672676e-07, + "loss": 0.3116, + "step": 43868 + }, + { + "epoch": 0.8781923279033106, + "grad_norm": 1.0956205129623413, + "learning_rate": 3.83990165987253e-07, + "loss": 0.2906, + "step": 43869 + }, + { + "epoch": 0.8782123464204389, + "grad_norm": 1.03311288356781, + "learning_rate": 3.8386558725249156e-07, + "loss": 0.299, + "step": 43870 + }, + { + "epoch": 0.8782323649375673, + "grad_norm": 1.2271654605865479, + "learning_rate": 3.8374102792296584e-07, + "loss": 0.2881, + "step": 43871 + }, + { + "epoch": 0.8782523834546956, + "grad_norm": 1.0164501667022705, + "learning_rate": 3.8361648799919824e-07, + "loss": 0.2504, + "step": 43872 + }, + { + "epoch": 0.878272401971824, + "grad_norm": 1.153576374053955, + "learning_rate": 3.8349196748171446e-07, + "loss": 0.3122, + "step": 43873 + }, + { + "epoch": 0.8782924204889523, + "grad_norm": 1.2802858352661133, + "learning_rate": 3.8336746637103737e-07, + "loss": 0.3004, + "step": 43874 + }, + { + "epoch": 0.8783124390060806, + "grad_norm": 1.0989381074905396, + "learning_rate": 3.832429846676894e-07, + "loss": 0.2785, + "step": 43875 + }, + { + "epoch": 0.878332457523209, + "grad_norm": 1.1363303661346436, + "learning_rate": 3.8311852237219394e-07, + "loss": 0.2251, + "step": 43876 + }, + { + "epoch": 0.8783524760403373, + "grad_norm": 1.1845698356628418, + "learning_rate": 3.8299407948507506e-07, + "loss": 0.2868, + "step": 43877 + }, + { + "epoch": 0.8783724945574657, + "grad_norm": 1.122432827949524, + "learning_rate": 3.828696560068551e-07, + "loss": 0.2605, + "step": 43878 + }, + { + "epoch": 0.878392513074594, + "grad_norm": 1.14560067653656, + "learning_rate": 3.827452519380576e-07, + "loss": 0.3236, + "step": 43879 + }, + { + "epoch": 0.8784125315917224, + "grad_norm": 1.2546955347061157, + "learning_rate": 3.8262086727920587e-07, + "loss": 0.3119, + "step": 43880 + }, + { + "epoch": 0.8784325501088507, + "grad_norm": 1.1010940074920654, + "learning_rate": 3.824965020308219e-07, + "loss": 0.3151, + "step": 43881 + }, + { + "epoch": 0.878452568625979, + "grad_norm": 1.1385127305984497, + "learning_rate": 3.8237215619342907e-07, + "loss": 0.3024, + "step": 43882 + }, + { + "epoch": 0.8784725871431074, + "grad_norm": 1.068158507347107, + "learning_rate": 3.8224782976755027e-07, + "loss": 0.2769, + "step": 43883 + }, + { + "epoch": 0.8784926056602357, + "grad_norm": 1.3172069787979126, + "learning_rate": 3.821235227537079e-07, + "loss": 0.303, + "step": 43884 + }, + { + "epoch": 0.8785126241773641, + "grad_norm": 1.9774454832077026, + "learning_rate": 3.819992351524249e-07, + "loss": 0.7255, + "step": 43885 + }, + { + "epoch": 0.8785326426944924, + "grad_norm": 1.0760867595672607, + "learning_rate": 3.818749669642219e-07, + "loss": 0.303, + "step": 43886 + }, + { + "epoch": 0.8785526612116208, + "grad_norm": 1.1650617122650146, + "learning_rate": 3.817507181896235e-07, + "loss": 0.3145, + "step": 43887 + }, + { + "epoch": 0.8785726797287491, + "grad_norm": 1.1390577554702759, + "learning_rate": 3.8162648882915153e-07, + "loss": 0.3048, + "step": 43888 + }, + { + "epoch": 0.8785926982458775, + "grad_norm": 1.1271092891693115, + "learning_rate": 3.815022788833267e-07, + "loss": 0.2778, + "step": 43889 + }, + { + "epoch": 0.8786127167630058, + "grad_norm": 1.0041016340255737, + "learning_rate": 3.8137808835267354e-07, + "loss": 0.2912, + "step": 43890 + }, + { + "epoch": 0.8786327352801341, + "grad_norm": 1.2723208665847778, + "learning_rate": 3.8125391723771167e-07, + "loss": 0.2937, + "step": 43891 + }, + { + "epoch": 0.8786527537972625, + "grad_norm": 1.0499707460403442, + "learning_rate": 3.8112976553896565e-07, + "loss": 0.2976, + "step": 43892 + }, + { + "epoch": 0.8786727723143908, + "grad_norm": 1.1075191497802734, + "learning_rate": 3.8100563325695616e-07, + "loss": 0.318, + "step": 43893 + }, + { + "epoch": 0.8786927908315192, + "grad_norm": 1.2000229358673096, + "learning_rate": 3.8088152039220507e-07, + "loss": 0.2894, + "step": 43894 + }, + { + "epoch": 0.8787128093486475, + "grad_norm": 1.0731817483901978, + "learning_rate": 3.8075742694523245e-07, + "loss": 0.2535, + "step": 43895 + }, + { + "epoch": 0.8787328278657759, + "grad_norm": 1.067562460899353, + "learning_rate": 3.806333529165629e-07, + "loss": 0.2493, + "step": 43896 + }, + { + "epoch": 0.8787528463829042, + "grad_norm": 1.1013214588165283, + "learning_rate": 3.8050929830671656e-07, + "loss": 0.2888, + "step": 43897 + }, + { + "epoch": 0.8787728649000325, + "grad_norm": 1.06818687915802, + "learning_rate": 3.8038526311621525e-07, + "loss": 0.3089, + "step": 43898 + }, + { + "epoch": 0.8787928834171609, + "grad_norm": 1.1517730951309204, + "learning_rate": 3.8026124734557913e-07, + "loss": 0.3501, + "step": 43899 + }, + { + "epoch": 0.8788129019342892, + "grad_norm": 1.0848886966705322, + "learning_rate": 3.8013725099533104e-07, + "loss": 0.2967, + "step": 43900 + }, + { + "epoch": 0.8788329204514176, + "grad_norm": 1.1660149097442627, + "learning_rate": 3.8001327406599233e-07, + "loss": 0.3097, + "step": 43901 + }, + { + "epoch": 0.8788529389685459, + "grad_norm": 1.0822570323944092, + "learning_rate": 3.798893165580825e-07, + "loss": 0.2939, + "step": 43902 + }, + { + "epoch": 0.8788729574856743, + "grad_norm": 1.1202678680419922, + "learning_rate": 3.7976537847212445e-07, + "loss": 0.2812, + "step": 43903 + }, + { + "epoch": 0.8788929760028026, + "grad_norm": 1.1316510438919067, + "learning_rate": 3.7964145980863787e-07, + "loss": 0.3181, + "step": 43904 + }, + { + "epoch": 0.878912994519931, + "grad_norm": 1.1129297018051147, + "learning_rate": 3.795175605681445e-07, + "loss": 0.2544, + "step": 43905 + }, + { + "epoch": 0.8789330130370593, + "grad_norm": 1.1827856302261353, + "learning_rate": 3.7939368075116557e-07, + "loss": 0.257, + "step": 43906 + }, + { + "epoch": 0.8789530315541876, + "grad_norm": 1.1843119859695435, + "learning_rate": 3.7926982035822126e-07, + "loss": 0.3343, + "step": 43907 + }, + { + "epoch": 0.878973050071316, + "grad_norm": 1.0788438320159912, + "learning_rate": 3.791459793898311e-07, + "loss": 0.3104, + "step": 43908 + }, + { + "epoch": 0.8789930685884443, + "grad_norm": 1.1106359958648682, + "learning_rate": 3.7902215784651806e-07, + "loss": 0.3063, + "step": 43909 + }, + { + "epoch": 0.8790130871055727, + "grad_norm": 1.0841248035430908, + "learning_rate": 3.7889835572880065e-07, + "loss": 0.3185, + "step": 43910 + }, + { + "epoch": 0.879033105622701, + "grad_norm": 1.9065275192260742, + "learning_rate": 3.7877457303720054e-07, + "loss": 0.7378, + "step": 43911 + }, + { + "epoch": 0.8790531241398294, + "grad_norm": 2.161985397338867, + "learning_rate": 3.786508097722369e-07, + "loss": 0.7579, + "step": 43912 + }, + { + "epoch": 0.8790731426569577, + "grad_norm": 1.0649256706237793, + "learning_rate": 3.7852706593443143e-07, + "loss": 0.2847, + "step": 43913 + }, + { + "epoch": 0.879093161174086, + "grad_norm": 1.069559931755066, + "learning_rate": 3.784033415243038e-07, + "loss": 0.2798, + "step": 43914 + }, + { + "epoch": 0.8791131796912144, + "grad_norm": 1.1215473413467407, + "learning_rate": 3.78279636542373e-07, + "loss": 0.2766, + "step": 43915 + }, + { + "epoch": 0.8791331982083427, + "grad_norm": 1.7726956605911255, + "learning_rate": 3.781559509891608e-07, + "loss": 0.7268, + "step": 43916 + }, + { + "epoch": 0.8791532167254711, + "grad_norm": 2.081278085708618, + "learning_rate": 3.7803228486518574e-07, + "loss": 0.7139, + "step": 43917 + }, + { + "epoch": 0.8791732352425994, + "grad_norm": 1.1866990327835083, + "learning_rate": 3.77908638170969e-07, + "loss": 0.2737, + "step": 43918 + }, + { + "epoch": 0.8791932537597278, + "grad_norm": 1.148154854774475, + "learning_rate": 3.7778501090702967e-07, + "loss": 0.2869, + "step": 43919 + }, + { + "epoch": 0.8792132722768561, + "grad_norm": 1.0613815784454346, + "learning_rate": 3.7766140307388733e-07, + "loss": 0.2855, + "step": 43920 + }, + { + "epoch": 0.8792332907939845, + "grad_norm": 1.0226311683654785, + "learning_rate": 3.7753781467206097e-07, + "loss": 0.286, + "step": 43921 + }, + { + "epoch": 0.8792533093111128, + "grad_norm": 2.020376682281494, + "learning_rate": 3.774142457020713e-07, + "loss": 0.7421, + "step": 43922 + }, + { + "epoch": 0.8792733278282411, + "grad_norm": 1.9641833305358887, + "learning_rate": 3.7729069616443794e-07, + "loss": 0.7058, + "step": 43923 + }, + { + "epoch": 0.8792933463453695, + "grad_norm": 1.9986423254013062, + "learning_rate": 3.7716716605967927e-07, + "loss": 0.7346, + "step": 43924 + }, + { + "epoch": 0.8793133648624978, + "grad_norm": 1.135107159614563, + "learning_rate": 3.77043655388315e-07, + "loss": 0.2972, + "step": 43925 + }, + { + "epoch": 0.8793333833796262, + "grad_norm": 1.2080148458480835, + "learning_rate": 3.7692016415086406e-07, + "loss": 0.3221, + "step": 43926 + }, + { + "epoch": 0.8793534018967545, + "grad_norm": 1.3103969097137451, + "learning_rate": 3.767966923478461e-07, + "loss": 0.3834, + "step": 43927 + }, + { + "epoch": 0.8793734204138829, + "grad_norm": 2.055467128753662, + "learning_rate": 3.76673239979779e-07, + "loss": 0.782, + "step": 43928 + }, + { + "epoch": 0.8793934389310112, + "grad_norm": 1.321230411529541, + "learning_rate": 3.7654980704718403e-07, + "loss": 0.2662, + "step": 43929 + }, + { + "epoch": 0.8794134574481395, + "grad_norm": 1.872721791267395, + "learning_rate": 3.7642639355057743e-07, + "loss": 0.796, + "step": 43930 + }, + { + "epoch": 0.8794334759652679, + "grad_norm": 2.0106966495513916, + "learning_rate": 3.7630299949048044e-07, + "loss": 0.7571, + "step": 43931 + }, + { + "epoch": 0.8794534944823962, + "grad_norm": 1.036575198173523, + "learning_rate": 3.761796248674104e-07, + "loss": 0.2966, + "step": 43932 + }, + { + "epoch": 0.8794735129995246, + "grad_norm": 0.9978123307228088, + "learning_rate": 3.760562696818859e-07, + "loss": 0.2897, + "step": 43933 + }, + { + "epoch": 0.8794935315166529, + "grad_norm": 1.196596622467041, + "learning_rate": 3.759329339344253e-07, + "loss": 0.3366, + "step": 43934 + }, + { + "epoch": 0.8795135500337813, + "grad_norm": 1.093870759010315, + "learning_rate": 3.7580961762554825e-07, + "loss": 0.2728, + "step": 43935 + }, + { + "epoch": 0.8795335685509096, + "grad_norm": 1.9086788892745972, + "learning_rate": 3.756863207557726e-07, + "loss": 0.7423, + "step": 43936 + }, + { + "epoch": 0.879553587068038, + "grad_norm": 1.1815565824508667, + "learning_rate": 3.7556304332561634e-07, + "loss": 0.2861, + "step": 43937 + }, + { + "epoch": 0.8795736055851663, + "grad_norm": 1.1453887224197388, + "learning_rate": 3.754397853355979e-07, + "loss": 0.2929, + "step": 43938 + }, + { + "epoch": 0.8795936241022946, + "grad_norm": 1.117816686630249, + "learning_rate": 3.7531654678623464e-07, + "loss": 0.2725, + "step": 43939 + }, + { + "epoch": 0.879613642619423, + "grad_norm": 1.0161086320877075, + "learning_rate": 3.751933276780462e-07, + "loss": 0.2735, + "step": 43940 + }, + { + "epoch": 0.8796336611365513, + "grad_norm": 1.0280373096466064, + "learning_rate": 3.7507012801154874e-07, + "loss": 0.2312, + "step": 43941 + }, + { + "epoch": 0.8796536796536797, + "grad_norm": 1.1398438215255737, + "learning_rate": 3.749469477872619e-07, + "loss": 0.2364, + "step": 43942 + }, + { + "epoch": 0.879673698170808, + "grad_norm": 1.1072800159454346, + "learning_rate": 3.74823787005702e-07, + "loss": 0.2808, + "step": 43943 + }, + { + "epoch": 0.8796937166879364, + "grad_norm": 1.0995380878448486, + "learning_rate": 3.7470064566738853e-07, + "loss": 0.312, + "step": 43944 + }, + { + "epoch": 0.8797137352050647, + "grad_norm": 1.1548556089401245, + "learning_rate": 3.745775237728383e-07, + "loss": 0.2665, + "step": 43945 + }, + { + "epoch": 0.879733753722193, + "grad_norm": 1.1908888816833496, + "learning_rate": 3.7445442132256816e-07, + "loss": 0.3105, + "step": 43946 + }, + { + "epoch": 0.8797537722393214, + "grad_norm": 1.114359974861145, + "learning_rate": 3.7433133831709655e-07, + "loss": 0.2652, + "step": 43947 + }, + { + "epoch": 0.8797737907564497, + "grad_norm": 1.0838406085968018, + "learning_rate": 3.7420827475693977e-07, + "loss": 0.2866, + "step": 43948 + }, + { + "epoch": 0.8797938092735781, + "grad_norm": 1.1256701946258545, + "learning_rate": 3.7408523064261684e-07, + "loss": 0.2871, + "step": 43949 + }, + { + "epoch": 0.8798138277907064, + "grad_norm": 1.1174248456954956, + "learning_rate": 3.7396220597464397e-07, + "loss": 0.3159, + "step": 43950 + }, + { + "epoch": 0.8798338463078348, + "grad_norm": 1.0438477993011475, + "learning_rate": 3.7383920075353854e-07, + "loss": 0.2713, + "step": 43951 + }, + { + "epoch": 0.8798538648249631, + "grad_norm": 1.181139349937439, + "learning_rate": 3.737162149798168e-07, + "loss": 0.2811, + "step": 43952 + }, + { + "epoch": 0.8798738833420915, + "grad_norm": 1.1698415279388428, + "learning_rate": 3.735932486539973e-07, + "loss": 0.3389, + "step": 43953 + }, + { + "epoch": 0.8798939018592198, + "grad_norm": 1.1870107650756836, + "learning_rate": 3.7347030177659507e-07, + "loss": 0.2851, + "step": 43954 + }, + { + "epoch": 0.8799139203763481, + "grad_norm": 1.8489370346069336, + "learning_rate": 3.7334737434812916e-07, + "loss": 0.7777, + "step": 43955 + }, + { + "epoch": 0.8799339388934765, + "grad_norm": 1.242085576057434, + "learning_rate": 3.7322446636911425e-07, + "loss": 0.3552, + "step": 43956 + }, + { + "epoch": 0.8799539574106048, + "grad_norm": 1.210475206375122, + "learning_rate": 3.731015778400687e-07, + "loss": 0.2972, + "step": 43957 + }, + { + "epoch": 0.8799739759277332, + "grad_norm": 1.110480546951294, + "learning_rate": 3.729787087615089e-07, + "loss": 0.2759, + "step": 43958 + }, + { + "epoch": 0.8799939944448615, + "grad_norm": 1.033428430557251, + "learning_rate": 3.728558591339504e-07, + "loss": 0.2522, + "step": 43959 + }, + { + "epoch": 0.8800140129619899, + "grad_norm": 1.798481822013855, + "learning_rate": 3.727330289579101e-07, + "loss": 0.6841, + "step": 43960 + }, + { + "epoch": 0.8800340314791182, + "grad_norm": 1.12140691280365, + "learning_rate": 3.7261021823390364e-07, + "loss": 0.3335, + "step": 43961 + }, + { + "epoch": 0.8800540499962465, + "grad_norm": 1.1056076288223267, + "learning_rate": 3.7248742696244897e-07, + "loss": 0.3096, + "step": 43962 + }, + { + "epoch": 0.8800740685133749, + "grad_norm": 1.1327502727508545, + "learning_rate": 3.7236465514406075e-07, + "loss": 0.273, + "step": 43963 + }, + { + "epoch": 0.8800940870305032, + "grad_norm": 1.857465386390686, + "learning_rate": 3.722419027792562e-07, + "loss": 0.7523, + "step": 43964 + }, + { + "epoch": 0.8801141055476316, + "grad_norm": 1.0139597654342651, + "learning_rate": 3.7211916986855e-07, + "loss": 0.2717, + "step": 43965 + }, + { + "epoch": 0.8801341240647599, + "grad_norm": 0.9895359873771667, + "learning_rate": 3.719964564124595e-07, + "loss": 0.2695, + "step": 43966 + }, + { + "epoch": 0.8801541425818883, + "grad_norm": 1.1098849773406982, + "learning_rate": 3.7187376241149877e-07, + "loss": 0.3138, + "step": 43967 + }, + { + "epoch": 0.8801741610990166, + "grad_norm": 1.0515589714050293, + "learning_rate": 3.7175108786618616e-07, + "loss": 0.2972, + "step": 43968 + }, + { + "epoch": 0.880194179616145, + "grad_norm": 1.1941754817962646, + "learning_rate": 3.7162843277703587e-07, + "loss": 0.3122, + "step": 43969 + }, + { + "epoch": 0.8802141981332733, + "grad_norm": 1.1620382070541382, + "learning_rate": 3.7150579714456234e-07, + "loss": 0.3176, + "step": 43970 + }, + { + "epoch": 0.8802342166504016, + "grad_norm": 1.1219888925552368, + "learning_rate": 3.713831809692836e-07, + "loss": 0.3237, + "step": 43971 + }, + { + "epoch": 0.88025423516753, + "grad_norm": 1.123770833015442, + "learning_rate": 3.7126058425171363e-07, + "loss": 0.3271, + "step": 43972 + }, + { + "epoch": 0.8802742536846583, + "grad_norm": 1.1683831214904785, + "learning_rate": 3.711380069923687e-07, + "loss": 0.3082, + "step": 43973 + }, + { + "epoch": 0.8802942722017867, + "grad_norm": 1.2811170816421509, + "learning_rate": 3.710154491917617e-07, + "loss": 0.2876, + "step": 43974 + }, + { + "epoch": 0.880314290718915, + "grad_norm": 1.1121063232421875, + "learning_rate": 3.708929108504111e-07, + "loss": 0.3114, + "step": 43975 + }, + { + "epoch": 0.8803343092360434, + "grad_norm": 1.0859003067016602, + "learning_rate": 3.707703919688299e-07, + "loss": 0.326, + "step": 43976 + }, + { + "epoch": 0.8803543277531717, + "grad_norm": 1.777479887008667, + "learning_rate": 3.706478925475343e-07, + "loss": 0.7066, + "step": 43977 + }, + { + "epoch": 0.8803743462703, + "grad_norm": 1.1016961336135864, + "learning_rate": 3.7052541258703836e-07, + "loss": 0.2751, + "step": 43978 + }, + { + "epoch": 0.8803943647874284, + "grad_norm": 1.2586758136749268, + "learning_rate": 3.704029520878566e-07, + "loss": 0.2991, + "step": 43979 + }, + { + "epoch": 0.8804143833045567, + "grad_norm": 1.874062418937683, + "learning_rate": 3.7028051105050477e-07, + "loss": 0.7537, + "step": 43980 + }, + { + "epoch": 0.8804344018216851, + "grad_norm": 1.0839509963989258, + "learning_rate": 3.70158089475498e-07, + "loss": 0.2831, + "step": 43981 + }, + { + "epoch": 0.8804544203388134, + "grad_norm": 1.2108662128448486, + "learning_rate": 3.7003568736335037e-07, + "loss": 0.2656, + "step": 43982 + }, + { + "epoch": 0.8804744388559418, + "grad_norm": 1.0538097620010376, + "learning_rate": 3.6991330471457534e-07, + "loss": 0.2821, + "step": 43983 + }, + { + "epoch": 0.8804944573730701, + "grad_norm": 1.092227816581726, + "learning_rate": 3.697909415296891e-07, + "loss": 0.2631, + "step": 43984 + }, + { + "epoch": 0.8805144758901984, + "grad_norm": 1.082730770111084, + "learning_rate": 3.696685978092057e-07, + "loss": 0.2689, + "step": 43985 + }, + { + "epoch": 0.8805344944073268, + "grad_norm": 1.1497050523757935, + "learning_rate": 3.695462735536387e-07, + "loss": 0.3058, + "step": 43986 + }, + { + "epoch": 0.8805545129244551, + "grad_norm": 1.0665496587753296, + "learning_rate": 3.694239687635021e-07, + "loss": 0.2484, + "step": 43987 + }, + { + "epoch": 0.8805745314415835, + "grad_norm": 1.1023030281066895, + "learning_rate": 3.6930168343931096e-07, + "loss": 0.3034, + "step": 43988 + }, + { + "epoch": 0.8805945499587118, + "grad_norm": 1.0411893129348755, + "learning_rate": 3.6917941758157936e-07, + "loss": 0.2867, + "step": 43989 + }, + { + "epoch": 0.8806145684758402, + "grad_norm": 1.045218586921692, + "learning_rate": 3.6905717119082086e-07, + "loss": 0.2665, + "step": 43990 + }, + { + "epoch": 0.8806345869929685, + "grad_norm": 0.9946260452270508, + "learning_rate": 3.689349442675494e-07, + "loss": 0.2941, + "step": 43991 + }, + { + "epoch": 0.8806546055100969, + "grad_norm": 1.0224698781967163, + "learning_rate": 3.6881273681227845e-07, + "loss": 0.2686, + "step": 43992 + }, + { + "epoch": 0.8806746240272252, + "grad_norm": 1.3035293817520142, + "learning_rate": 3.6869054882552157e-07, + "loss": 0.3464, + "step": 43993 + }, + { + "epoch": 0.8806946425443535, + "grad_norm": 1.0911046266555786, + "learning_rate": 3.6856838030779385e-07, + "loss": 0.268, + "step": 43994 + }, + { + "epoch": 0.8807146610614819, + "grad_norm": 1.1042746305465698, + "learning_rate": 3.684462312596082e-07, + "loss": 0.3095, + "step": 43995 + }, + { + "epoch": 0.8807346795786102, + "grad_norm": 1.1499806642532349, + "learning_rate": 3.683241016814765e-07, + "loss": 0.2905, + "step": 43996 + }, + { + "epoch": 0.8807546980957386, + "grad_norm": 1.134732961654663, + "learning_rate": 3.682019915739149e-07, + "loss": 0.2673, + "step": 43997 + }, + { + "epoch": 0.8807747166128669, + "grad_norm": 1.1361480951309204, + "learning_rate": 3.680799009374353e-07, + "loss": 0.2417, + "step": 43998 + }, + { + "epoch": 0.8807947351299953, + "grad_norm": 1.0349787473678589, + "learning_rate": 3.679578297725511e-07, + "loss": 0.2629, + "step": 43999 + }, + { + "epoch": 0.8808147536471236, + "grad_norm": 1.0730571746826172, + "learning_rate": 3.6783577807977476e-07, + "loss": 0.2689, + "step": 44000 + }, + { + "epoch": 0.8808347721642519, + "grad_norm": 1.0709118843078613, + "learning_rate": 3.6771374585961973e-07, + "loss": 0.2801, + "step": 44001 + }, + { + "epoch": 0.8808547906813803, + "grad_norm": 1.9208418130874634, + "learning_rate": 3.6759173311259997e-07, + "loss": 0.7615, + "step": 44002 + }, + { + "epoch": 0.8808748091985086, + "grad_norm": 1.1192378997802734, + "learning_rate": 3.674697398392274e-07, + "loss": 0.3311, + "step": 44003 + }, + { + "epoch": 0.880894827715637, + "grad_norm": 1.2068581581115723, + "learning_rate": 3.6734776604001544e-07, + "loss": 0.3372, + "step": 44004 + }, + { + "epoch": 0.8809148462327653, + "grad_norm": 1.2573853731155396, + "learning_rate": 3.672258117154753e-07, + "loss": 0.3247, + "step": 44005 + }, + { + "epoch": 0.8809348647498937, + "grad_norm": 1.076032280921936, + "learning_rate": 3.6710387686612113e-07, + "loss": 0.2944, + "step": 44006 + }, + { + "epoch": 0.880954883267022, + "grad_norm": 1.304340124130249, + "learning_rate": 3.669819614924658e-07, + "loss": 0.2982, + "step": 44007 + }, + { + "epoch": 0.8809749017841504, + "grad_norm": 1.161026120185852, + "learning_rate": 3.6686006559502166e-07, + "loss": 0.2337, + "step": 44008 + }, + { + "epoch": 0.8809949203012787, + "grad_norm": 1.1221990585327148, + "learning_rate": 3.667381891743005e-07, + "loss": 0.2814, + "step": 44009 + }, + { + "epoch": 0.881014938818407, + "grad_norm": 1.0586904287338257, + "learning_rate": 3.6661633223081417e-07, + "loss": 0.3136, + "step": 44010 + }, + { + "epoch": 0.8810349573355354, + "grad_norm": 1.1320360898971558, + "learning_rate": 3.664944947650767e-07, + "loss": 0.2708, + "step": 44011 + }, + { + "epoch": 0.8810549758526637, + "grad_norm": 1.0875186920166016, + "learning_rate": 3.663726767775988e-07, + "loss": 0.2916, + "step": 44012 + }, + { + "epoch": 0.8810749943697921, + "grad_norm": 1.0542831420898438, + "learning_rate": 3.6625087826889336e-07, + "loss": 0.3167, + "step": 44013 + }, + { + "epoch": 0.8810950128869204, + "grad_norm": 1.0939735174179077, + "learning_rate": 3.6612909923947117e-07, + "loss": 0.3027, + "step": 44014 + }, + { + "epoch": 0.8811150314040488, + "grad_norm": 1.0125267505645752, + "learning_rate": 3.6600733968984556e-07, + "loss": 0.2891, + "step": 44015 + }, + { + "epoch": 0.8811350499211771, + "grad_norm": 1.1309114694595337, + "learning_rate": 3.6588559962052795e-07, + "loss": 0.3062, + "step": 44016 + }, + { + "epoch": 0.8811550684383054, + "grad_norm": 1.1097205877304077, + "learning_rate": 3.6576387903203e-07, + "loss": 0.2726, + "step": 44017 + }, + { + "epoch": 0.8811750869554338, + "grad_norm": 1.1986823081970215, + "learning_rate": 3.6564217792486256e-07, + "loss": 0.3148, + "step": 44018 + }, + { + "epoch": 0.8811951054725621, + "grad_norm": 1.1130657196044922, + "learning_rate": 3.6552049629953844e-07, + "loss": 0.3084, + "step": 44019 + }, + { + "epoch": 0.8812151239896905, + "grad_norm": 1.1856988668441772, + "learning_rate": 3.6539883415656894e-07, + "loss": 0.2655, + "step": 44020 + }, + { + "epoch": 0.8812351425068188, + "grad_norm": 1.275437355041504, + "learning_rate": 3.652771914964659e-07, + "loss": 0.2912, + "step": 44021 + }, + { + "epoch": 0.8812551610239472, + "grad_norm": 1.1078872680664062, + "learning_rate": 3.6515556831974e-07, + "loss": 0.31, + "step": 44022 + }, + { + "epoch": 0.8812751795410755, + "grad_norm": 1.0615366697311401, + "learning_rate": 3.6503396462690186e-07, + "loss": 0.2942, + "step": 44023 + }, + { + "epoch": 0.8812951980582039, + "grad_norm": 1.19742751121521, + "learning_rate": 3.6491238041846456e-07, + "loss": 0.275, + "step": 44024 + }, + { + "epoch": 0.8813152165753322, + "grad_norm": 1.025529146194458, + "learning_rate": 3.6479081569493756e-07, + "loss": 0.2947, + "step": 44025 + }, + { + "epoch": 0.8813352350924605, + "grad_norm": 1.1490161418914795, + "learning_rate": 3.6466927045683276e-07, + "loss": 0.2863, + "step": 44026 + }, + { + "epoch": 0.8813552536095889, + "grad_norm": 2.002774238586426, + "learning_rate": 3.6454774470466025e-07, + "loss": 0.769, + "step": 44027 + }, + { + "epoch": 0.8813752721267172, + "grad_norm": 1.1299893856048584, + "learning_rate": 3.644262384389319e-07, + "loss": 0.3489, + "step": 44028 + }, + { + "epoch": 0.8813952906438456, + "grad_norm": 1.1326892375946045, + "learning_rate": 3.6430475166015834e-07, + "loss": 0.2836, + "step": 44029 + }, + { + "epoch": 0.8814153091609739, + "grad_norm": 1.1045244932174683, + "learning_rate": 3.6418328436884973e-07, + "loss": 0.3105, + "step": 44030 + }, + { + "epoch": 0.8814353276781023, + "grad_norm": 1.0376591682434082, + "learning_rate": 3.640618365655163e-07, + "loss": 0.2281, + "step": 44031 + }, + { + "epoch": 0.8814553461952306, + "grad_norm": 1.2485065460205078, + "learning_rate": 3.6394040825066925e-07, + "loss": 0.3054, + "step": 44032 + }, + { + "epoch": 0.8814753647123589, + "grad_norm": 1.2954074144363403, + "learning_rate": 3.638189994248198e-07, + "loss": 0.3278, + "step": 44033 + }, + { + "epoch": 0.8814953832294873, + "grad_norm": 1.0487885475158691, + "learning_rate": 3.636976100884776e-07, + "loss": 0.2679, + "step": 44034 + }, + { + "epoch": 0.8815154017466156, + "grad_norm": 1.0992757081985474, + "learning_rate": 3.635762402421528e-07, + "loss": 0.2964, + "step": 44035 + }, + { + "epoch": 0.881535420263744, + "grad_norm": 1.03860604763031, + "learning_rate": 3.634548898863549e-07, + "loss": 0.2844, + "step": 44036 + }, + { + "epoch": 0.8815554387808723, + "grad_norm": 1.110599398612976, + "learning_rate": 3.6333355902159585e-07, + "loss": 0.3014, + "step": 44037 + }, + { + "epoch": 0.8815754572980007, + "grad_norm": 1.0993802547454834, + "learning_rate": 3.632122476483846e-07, + "loss": 0.3031, + "step": 44038 + }, + { + "epoch": 0.881595475815129, + "grad_norm": 1.1399046182632446, + "learning_rate": 3.630909557672313e-07, + "loss": 0.3096, + "step": 44039 + }, + { + "epoch": 0.8816154943322574, + "grad_norm": 1.1457340717315674, + "learning_rate": 3.629696833786456e-07, + "loss": 0.3041, + "step": 44040 + }, + { + "epoch": 0.8816355128493857, + "grad_norm": 1.1191593408584595, + "learning_rate": 3.6284843048313644e-07, + "loss": 0.2728, + "step": 44041 + }, + { + "epoch": 0.881655531366514, + "grad_norm": 1.8239288330078125, + "learning_rate": 3.627271970812157e-07, + "loss": 0.746, + "step": 44042 + }, + { + "epoch": 0.8816755498836424, + "grad_norm": 1.0574071407318115, + "learning_rate": 3.626059831733919e-07, + "loss": 0.2992, + "step": 44043 + }, + { + "epoch": 0.8816955684007707, + "grad_norm": 1.1538889408111572, + "learning_rate": 3.624847887601735e-07, + "loss": 0.2677, + "step": 44044 + }, + { + "epoch": 0.8817155869178991, + "grad_norm": 1.7787683010101318, + "learning_rate": 3.623636138420711e-07, + "loss": 0.7214, + "step": 44045 + }, + { + "epoch": 0.8817356054350274, + "grad_norm": 1.248321771621704, + "learning_rate": 3.6224245841959494e-07, + "loss": 0.3096, + "step": 44046 + }, + { + "epoch": 0.8817556239521558, + "grad_norm": 1.1635345220565796, + "learning_rate": 3.621213224932535e-07, + "loss": 0.2726, + "step": 44047 + }, + { + "epoch": 0.8817756424692841, + "grad_norm": 1.0299628973007202, + "learning_rate": 3.620002060635558e-07, + "loss": 0.264, + "step": 44048 + }, + { + "epoch": 0.8817956609864124, + "grad_norm": 1.1237754821777344, + "learning_rate": 3.6187910913101033e-07, + "loss": 0.3154, + "step": 44049 + }, + { + "epoch": 0.8818156795035408, + "grad_norm": 1.2091082334518433, + "learning_rate": 3.6175803169612776e-07, + "loss": 0.2796, + "step": 44050 + }, + { + "epoch": 0.8818356980206691, + "grad_norm": 1.2537981271743774, + "learning_rate": 3.616369737594161e-07, + "loss": 0.2898, + "step": 44051 + }, + { + "epoch": 0.8818557165377975, + "grad_norm": 1.0713858604431152, + "learning_rate": 3.6151593532138484e-07, + "loss": 0.2716, + "step": 44052 + }, + { + "epoch": 0.8818757350549258, + "grad_norm": 1.2212296724319458, + "learning_rate": 3.6139491638254197e-07, + "loss": 0.2931, + "step": 44053 + }, + { + "epoch": 0.8818957535720542, + "grad_norm": 1.1066020727157593, + "learning_rate": 3.61273916943396e-07, + "loss": 0.2672, + "step": 44054 + }, + { + "epoch": 0.8819157720891825, + "grad_norm": 1.302193522453308, + "learning_rate": 3.6115293700445697e-07, + "loss": 0.3342, + "step": 44055 + }, + { + "epoch": 0.8819357906063109, + "grad_norm": 1.0133270025253296, + "learning_rate": 3.610319765662329e-07, + "loss": 0.2939, + "step": 44056 + }, + { + "epoch": 0.8819558091234392, + "grad_norm": 1.398392915725708, + "learning_rate": 3.6091103562923115e-07, + "loss": 0.2969, + "step": 44057 + }, + { + "epoch": 0.8819758276405675, + "grad_norm": 1.181129813194275, + "learning_rate": 3.6079011419396073e-07, + "loss": 0.2981, + "step": 44058 + }, + { + "epoch": 0.8819958461576959, + "grad_norm": 1.0507835149765015, + "learning_rate": 3.6066921226093174e-07, + "loss": 0.3046, + "step": 44059 + }, + { + "epoch": 0.8820158646748242, + "grad_norm": 1.102952003479004, + "learning_rate": 3.605483298306506e-07, + "loss": 0.318, + "step": 44060 + }, + { + "epoch": 0.8820358831919526, + "grad_norm": 1.2270973920822144, + "learning_rate": 3.604274669036256e-07, + "loss": 0.339, + "step": 44061 + }, + { + "epoch": 0.8820559017090809, + "grad_norm": 1.0826754570007324, + "learning_rate": 3.6030662348036594e-07, + "loss": 0.2678, + "step": 44062 + }, + { + "epoch": 0.8820759202262093, + "grad_norm": 1.2325717210769653, + "learning_rate": 3.6018579956137723e-07, + "loss": 0.35, + "step": 44063 + }, + { + "epoch": 0.8820959387433376, + "grad_norm": 1.2801958322525024, + "learning_rate": 3.600649951471696e-07, + "loss": 0.2983, + "step": 44064 + }, + { + "epoch": 0.8821159572604659, + "grad_norm": 1.0634733438491821, + "learning_rate": 3.599442102382505e-07, + "loss": 0.2891, + "step": 44065 + }, + { + "epoch": 0.8821359757775943, + "grad_norm": 1.0130407810211182, + "learning_rate": 3.598234448351279e-07, + "loss": 0.2622, + "step": 44066 + }, + { + "epoch": 0.8821559942947226, + "grad_norm": 1.0109431743621826, + "learning_rate": 3.5970269893830733e-07, + "loss": 0.2934, + "step": 44067 + }, + { + "epoch": 0.882176012811851, + "grad_norm": 1.0479648113250732, + "learning_rate": 3.595819725482991e-07, + "loss": 0.2799, + "step": 44068 + }, + { + "epoch": 0.8821960313289793, + "grad_norm": 1.1022311449050903, + "learning_rate": 3.594612656656099e-07, + "loss": 0.3046, + "step": 44069 + }, + { + "epoch": 0.8822160498461077, + "grad_norm": 1.0932223796844482, + "learning_rate": 3.593405782907461e-07, + "loss": 0.3133, + "step": 44070 + }, + { + "epoch": 0.882236068363236, + "grad_norm": 1.1737085580825806, + "learning_rate": 3.592199104242161e-07, + "loss": 0.2955, + "step": 44071 + }, + { + "epoch": 0.8822560868803644, + "grad_norm": 1.2738521099090576, + "learning_rate": 3.5909926206652623e-07, + "loss": 0.3261, + "step": 44072 + }, + { + "epoch": 0.8822761053974927, + "grad_norm": 1.1031243801116943, + "learning_rate": 3.589786332181855e-07, + "loss": 0.2659, + "step": 44073 + }, + { + "epoch": 0.882296123914621, + "grad_norm": 0.992310106754303, + "learning_rate": 3.5885802387969957e-07, + "loss": 0.2731, + "step": 44074 + }, + { + "epoch": 0.8823161424317494, + "grad_norm": 1.2715076208114624, + "learning_rate": 3.587374340515759e-07, + "loss": 0.3248, + "step": 44075 + }, + { + "epoch": 0.8823361609488777, + "grad_norm": 1.0211961269378662, + "learning_rate": 3.5861686373432015e-07, + "loss": 0.2848, + "step": 44076 + }, + { + "epoch": 0.8823561794660061, + "grad_norm": 1.3258181810379028, + "learning_rate": 3.5849631292844134e-07, + "loss": 0.3245, + "step": 44077 + }, + { + "epoch": 0.8823761979831344, + "grad_norm": 1.0671731233596802, + "learning_rate": 3.583757816344452e-07, + "loss": 0.2778, + "step": 44078 + }, + { + "epoch": 0.8823962165002628, + "grad_norm": 1.0631777048110962, + "learning_rate": 3.58255269852838e-07, + "loss": 0.2613, + "step": 44079 + }, + { + "epoch": 0.8824162350173911, + "grad_norm": 1.1176656484603882, + "learning_rate": 3.5813477758412595e-07, + "loss": 0.2823, + "step": 44080 + }, + { + "epoch": 0.8824362535345194, + "grad_norm": 1.151721477508545, + "learning_rate": 3.580143048288176e-07, + "loss": 0.33, + "step": 44081 + }, + { + "epoch": 0.8824562720516478, + "grad_norm": 1.2778733968734741, + "learning_rate": 3.5789385158741807e-07, + "loss": 0.2832, + "step": 44082 + }, + { + "epoch": 0.8824762905687761, + "grad_norm": 1.18098783493042, + "learning_rate": 3.577734178604325e-07, + "loss": 0.27, + "step": 44083 + }, + { + "epoch": 0.8824963090859045, + "grad_norm": 1.1174925565719604, + "learning_rate": 3.5765300364836996e-07, + "loss": 0.3077, + "step": 44084 + }, + { + "epoch": 0.8825163276030328, + "grad_norm": 1.178439736366272, + "learning_rate": 3.5753260895173393e-07, + "loss": 0.2679, + "step": 44085 + }, + { + "epoch": 0.8825363461201612, + "grad_norm": 1.119165301322937, + "learning_rate": 3.5741223377103283e-07, + "loss": 0.3184, + "step": 44086 + }, + { + "epoch": 0.8825563646372895, + "grad_norm": 1.1632194519042969, + "learning_rate": 3.5729187810677134e-07, + "loss": 0.2921, + "step": 44087 + }, + { + "epoch": 0.8825763831544179, + "grad_norm": 1.3089567422866821, + "learning_rate": 3.571715419594557e-07, + "loss": 0.3058, + "step": 44088 + }, + { + "epoch": 0.8825964016715462, + "grad_norm": 1.9377264976501465, + "learning_rate": 3.5705122532959104e-07, + "loss": 0.7064, + "step": 44089 + }, + { + "epoch": 0.8826164201886745, + "grad_norm": 1.146549105644226, + "learning_rate": 3.5693092821768417e-07, + "loss": 0.2993, + "step": 44090 + }, + { + "epoch": 0.8826364387058029, + "grad_norm": 1.1173958778381348, + "learning_rate": 3.5681065062424137e-07, + "loss": 0.3331, + "step": 44091 + }, + { + "epoch": 0.8826564572229312, + "grad_norm": 1.092799186706543, + "learning_rate": 3.566903925497667e-07, + "loss": 0.296, + "step": 44092 + }, + { + "epoch": 0.8826764757400596, + "grad_norm": 1.108107566833496, + "learning_rate": 3.5657015399476634e-07, + "loss": 0.3169, + "step": 44093 + }, + { + "epoch": 0.8826964942571879, + "grad_norm": 1.181644320487976, + "learning_rate": 3.564499349597456e-07, + "loss": 0.2457, + "step": 44094 + }, + { + "epoch": 0.8827165127743163, + "grad_norm": 1.04583740234375, + "learning_rate": 3.5632973544521e-07, + "loss": 0.2673, + "step": 44095 + }, + { + "epoch": 0.8827365312914446, + "grad_norm": 1.0139909982681274, + "learning_rate": 3.562095554516648e-07, + "loss": 0.2789, + "step": 44096 + }, + { + "epoch": 0.8827565498085729, + "grad_norm": 1.2306222915649414, + "learning_rate": 3.560893949796157e-07, + "loss": 0.2997, + "step": 44097 + }, + { + "epoch": 0.8827765683257013, + "grad_norm": 1.0997167825698853, + "learning_rate": 3.559692540295667e-07, + "loss": 0.2858, + "step": 44098 + }, + { + "epoch": 0.8827965868428296, + "grad_norm": 1.1917647123336792, + "learning_rate": 3.5584913260202414e-07, + "loss": 0.3138, + "step": 44099 + }, + { + "epoch": 0.882816605359958, + "grad_norm": 1.1506937742233276, + "learning_rate": 3.5572903069749256e-07, + "loss": 0.2862, + "step": 44100 + }, + { + "epoch": 0.8828366238770863, + "grad_norm": 1.1159268617630005, + "learning_rate": 3.5560894831647707e-07, + "loss": 0.298, + "step": 44101 + }, + { + "epoch": 0.8828566423942147, + "grad_norm": 0.9974951148033142, + "learning_rate": 3.554888854594807e-07, + "loss": 0.2961, + "step": 44102 + }, + { + "epoch": 0.882876660911343, + "grad_norm": 1.2892628908157349, + "learning_rate": 3.553688421270107e-07, + "loss": 0.3018, + "step": 44103 + }, + { + "epoch": 0.8828966794284714, + "grad_norm": 1.1103394031524658, + "learning_rate": 3.5524881831957013e-07, + "loss": 0.2845, + "step": 44104 + }, + { + "epoch": 0.8829166979455997, + "grad_norm": 1.1795042753219604, + "learning_rate": 3.551288140376641e-07, + "loss": 0.2983, + "step": 44105 + }, + { + "epoch": 0.882936716462728, + "grad_norm": 1.2294683456420898, + "learning_rate": 3.5500882928179716e-07, + "loss": 0.3196, + "step": 44106 + }, + { + "epoch": 0.8829567349798564, + "grad_norm": 1.1707696914672852, + "learning_rate": 3.548888640524728e-07, + "loss": 0.2471, + "step": 44107 + }, + { + "epoch": 0.8829767534969847, + "grad_norm": 1.1745833158493042, + "learning_rate": 3.5476891835019676e-07, + "loss": 0.3014, + "step": 44108 + }, + { + "epoch": 0.8829967720141131, + "grad_norm": 0.9464795589447021, + "learning_rate": 3.5464899217547146e-07, + "loss": 0.2509, + "step": 44109 + }, + { + "epoch": 0.8830167905312414, + "grad_norm": 1.134194254875183, + "learning_rate": 3.5452908552880304e-07, + "loss": 0.2687, + "step": 44110 + }, + { + "epoch": 0.8830368090483698, + "grad_norm": 1.1089015007019043, + "learning_rate": 3.54409198410694e-07, + "loss": 0.3126, + "step": 44111 + }, + { + "epoch": 0.8830568275654981, + "grad_norm": 1.118809700012207, + "learning_rate": 3.542893308216494e-07, + "loss": 0.3001, + "step": 44112 + }, + { + "epoch": 0.8830768460826264, + "grad_norm": 1.22903311252594, + "learning_rate": 3.5416948276217277e-07, + "loss": 0.3157, + "step": 44113 + }, + { + "epoch": 0.8830968645997548, + "grad_norm": 2.016503095626831, + "learning_rate": 3.5404965423276813e-07, + "loss": 0.7162, + "step": 44114 + }, + { + "epoch": 0.8831168831168831, + "grad_norm": 1.1358516216278076, + "learning_rate": 3.539298452339385e-07, + "loss": 0.269, + "step": 44115 + }, + { + "epoch": 0.8831369016340115, + "grad_norm": 1.1174925565719604, + "learning_rate": 3.538100557661872e-07, + "loss": 0.3352, + "step": 44116 + }, + { + "epoch": 0.8831569201511398, + "grad_norm": 1.2003517150878906, + "learning_rate": 3.536902858300195e-07, + "loss": 0.2889, + "step": 44117 + }, + { + "epoch": 0.8831769386682682, + "grad_norm": 1.1875133514404297, + "learning_rate": 3.5357053542593834e-07, + "loss": 0.3075, + "step": 44118 + }, + { + "epoch": 0.8831969571853965, + "grad_norm": 1.1438288688659668, + "learning_rate": 3.5345080455444605e-07, + "loss": 0.3096, + "step": 44119 + }, + { + "epoch": 0.8832169757025249, + "grad_norm": 1.0512253046035767, + "learning_rate": 3.5333109321604607e-07, + "loss": 0.2995, + "step": 44120 + }, + { + "epoch": 0.8832369942196532, + "grad_norm": 1.172356367111206, + "learning_rate": 3.532114014112431e-07, + "loss": 0.275, + "step": 44121 + }, + { + "epoch": 0.8832570127367815, + "grad_norm": 1.0484157800674438, + "learning_rate": 3.5309172914053893e-07, + "loss": 0.3005, + "step": 44122 + }, + { + "epoch": 0.8832770312539099, + "grad_norm": 1.7833151817321777, + "learning_rate": 3.529720764044375e-07, + "loss": 0.782, + "step": 44123 + }, + { + "epoch": 0.8832970497710382, + "grad_norm": 0.9852306246757507, + "learning_rate": 3.528524432034414e-07, + "loss": 0.2515, + "step": 44124 + }, + { + "epoch": 0.8833170682881666, + "grad_norm": 1.1186758279800415, + "learning_rate": 3.5273282953805275e-07, + "loss": 0.3194, + "step": 44125 + }, + { + "epoch": 0.8833370868052949, + "grad_norm": 2.0134851932525635, + "learning_rate": 3.526132354087758e-07, + "loss": 0.7888, + "step": 44126 + }, + { + "epoch": 0.8833571053224233, + "grad_norm": 1.1594717502593994, + "learning_rate": 3.524936608161134e-07, + "loss": 0.3063, + "step": 44127 + }, + { + "epoch": 0.8833771238395516, + "grad_norm": 1.0857326984405518, + "learning_rate": 3.5237410576056675e-07, + "loss": 0.3059, + "step": 44128 + }, + { + "epoch": 0.8833971423566799, + "grad_norm": 1.0659167766571045, + "learning_rate": 3.522545702426389e-07, + "loss": 0.2746, + "step": 44129 + }, + { + "epoch": 0.8834171608738083, + "grad_norm": 1.064165472984314, + "learning_rate": 3.5213505426283266e-07, + "loss": 0.2965, + "step": 44130 + }, + { + "epoch": 0.8834371793909366, + "grad_norm": 1.2114653587341309, + "learning_rate": 3.520155578216511e-07, + "loss": 0.306, + "step": 44131 + }, + { + "epoch": 0.883457197908065, + "grad_norm": 1.1677159070968628, + "learning_rate": 3.518960809195959e-07, + "loss": 0.3076, + "step": 44132 + }, + { + "epoch": 0.8834772164251933, + "grad_norm": 1.0602576732635498, + "learning_rate": 3.5177662355716845e-07, + "loss": 0.293, + "step": 44133 + }, + { + "epoch": 0.8834972349423217, + "grad_norm": 1.1566507816314697, + "learning_rate": 3.516571857348722e-07, + "loss": 0.2461, + "step": 44134 + }, + { + "epoch": 0.88351725345945, + "grad_norm": 1.1446064710617065, + "learning_rate": 3.5153776745320835e-07, + "loss": 0.29, + "step": 44135 + }, + { + "epoch": 0.8835372719765784, + "grad_norm": 1.2050385475158691, + "learning_rate": 3.514183687126799e-07, + "loss": 0.3067, + "step": 44136 + }, + { + "epoch": 0.8835572904937067, + "grad_norm": 1.183337926864624, + "learning_rate": 3.512989895137886e-07, + "loss": 0.2718, + "step": 44137 + }, + { + "epoch": 0.883577309010835, + "grad_norm": 1.1421602964401245, + "learning_rate": 3.511796298570347e-07, + "loss": 0.2597, + "step": 44138 + }, + { + "epoch": 0.8835973275279634, + "grad_norm": 1.1137133836746216, + "learning_rate": 3.510602897429222e-07, + "loss": 0.2908, + "step": 44139 + }, + { + "epoch": 0.8836173460450917, + "grad_norm": 1.100144624710083, + "learning_rate": 3.509409691719518e-07, + "loss": 0.3057, + "step": 44140 + }, + { + "epoch": 0.8836373645622201, + "grad_norm": 1.0595289468765259, + "learning_rate": 3.5082166814462524e-07, + "loss": 0.2655, + "step": 44141 + }, + { + "epoch": 0.8836573830793484, + "grad_norm": 1.0826199054718018, + "learning_rate": 3.5070238666144285e-07, + "loss": 0.3502, + "step": 44142 + }, + { + "epoch": 0.8836774015964768, + "grad_norm": 1.1634502410888672, + "learning_rate": 3.50583124722908e-07, + "loss": 0.2706, + "step": 44143 + }, + { + "epoch": 0.8836974201136051, + "grad_norm": 1.0791393518447876, + "learning_rate": 3.504638823295209e-07, + "loss": 0.261, + "step": 44144 + }, + { + "epoch": 0.8837174386307334, + "grad_norm": 1.7388951778411865, + "learning_rate": 3.503446594817833e-07, + "loss": 0.7595, + "step": 44145 + }, + { + "epoch": 0.8837374571478618, + "grad_norm": 1.087918996810913, + "learning_rate": 3.502254561801949e-07, + "loss": 0.2756, + "step": 44146 + }, + { + "epoch": 0.8837574756649901, + "grad_norm": 2.0071256160736084, + "learning_rate": 3.5010627242525965e-07, + "loss": 0.7018, + "step": 44147 + }, + { + "epoch": 0.8837774941821185, + "grad_norm": 1.0631743669509888, + "learning_rate": 3.499871082174755e-07, + "loss": 0.3124, + "step": 44148 + }, + { + "epoch": 0.8837975126992468, + "grad_norm": 1.945065975189209, + "learning_rate": 3.49867963557346e-07, + "loss": 0.8204, + "step": 44149 + }, + { + "epoch": 0.8838175312163752, + "grad_norm": 1.9270001649856567, + "learning_rate": 3.4974883844537065e-07, + "loss": 0.7461, + "step": 44150 + }, + { + "epoch": 0.8838375497335035, + "grad_norm": 1.0708091259002686, + "learning_rate": 3.4962973288204973e-07, + "loss": 0.235, + "step": 44151 + }, + { + "epoch": 0.8838575682506319, + "grad_norm": 1.9533051252365112, + "learning_rate": 3.495106468678855e-07, + "loss": 0.7588, + "step": 44152 + }, + { + "epoch": 0.8838775867677602, + "grad_norm": 1.200710654258728, + "learning_rate": 3.493915804033776e-07, + "loss": 0.2828, + "step": 44153 + }, + { + "epoch": 0.8838976052848885, + "grad_norm": 1.1993441581726074, + "learning_rate": 3.492725334890268e-07, + "loss": 0.2605, + "step": 44154 + }, + { + "epoch": 0.8839176238020169, + "grad_norm": 1.1138124465942383, + "learning_rate": 3.491535061253332e-07, + "loss": 0.2997, + "step": 44155 + }, + { + "epoch": 0.8839376423191452, + "grad_norm": 1.0223463773727417, + "learning_rate": 3.4903449831279636e-07, + "loss": 0.2738, + "step": 44156 + }, + { + "epoch": 0.8839576608362736, + "grad_norm": 1.306536078453064, + "learning_rate": 3.489155100519187e-07, + "loss": 0.2848, + "step": 44157 + }, + { + "epoch": 0.8839776793534019, + "grad_norm": 1.150512933731079, + "learning_rate": 3.4879654134319986e-07, + "loss": 0.3069, + "step": 44158 + }, + { + "epoch": 0.8839976978705303, + "grad_norm": 2.1474204063415527, + "learning_rate": 3.486775921871377e-07, + "loss": 0.7157, + "step": 44159 + }, + { + "epoch": 0.8840177163876586, + "grad_norm": 1.1302087306976318, + "learning_rate": 3.485586625842352e-07, + "loss": 0.3034, + "step": 44160 + }, + { + "epoch": 0.8840377349047869, + "grad_norm": 1.983232855796814, + "learning_rate": 3.484397525349903e-07, + "loss": 0.7111, + "step": 44161 + }, + { + "epoch": 0.8840577534219153, + "grad_norm": 1.143283724784851, + "learning_rate": 3.4832086203990424e-07, + "loss": 0.3019, + "step": 44162 + }, + { + "epoch": 0.8840777719390436, + "grad_norm": 1.0762447118759155, + "learning_rate": 3.482019910994766e-07, + "loss": 0.3013, + "step": 44163 + }, + { + "epoch": 0.884097790456172, + "grad_norm": 1.0985389947891235, + "learning_rate": 3.4808313971420593e-07, + "loss": 0.291, + "step": 44164 + }, + { + "epoch": 0.8841178089733003, + "grad_norm": 1.0854345560073853, + "learning_rate": 3.479643078845929e-07, + "loss": 0.3174, + "step": 44165 + }, + { + "epoch": 0.8841378274904287, + "grad_norm": 1.9017657041549683, + "learning_rate": 3.4784549561113713e-07, + "loss": 0.8051, + "step": 44166 + }, + { + "epoch": 0.884157846007557, + "grad_norm": 1.7769100666046143, + "learning_rate": 3.477267028943376e-07, + "loss": 0.7409, + "step": 44167 + }, + { + "epoch": 0.8841778645246854, + "grad_norm": 0.9874953627586365, + "learning_rate": 3.47607929734694e-07, + "loss": 0.2756, + "step": 44168 + }, + { + "epoch": 0.8841978830418137, + "grad_norm": 1.1305102109909058, + "learning_rate": 3.4748917613270484e-07, + "loss": 0.2519, + "step": 44169 + }, + { + "epoch": 0.884217901558942, + "grad_norm": 1.2008699178695679, + "learning_rate": 3.4737044208887015e-07, + "loss": 0.3041, + "step": 44170 + }, + { + "epoch": 0.8842379200760704, + "grad_norm": 1.1408131122589111, + "learning_rate": 3.4725172760368965e-07, + "loss": 0.3111, + "step": 44171 + }, + { + "epoch": 0.8842579385931987, + "grad_norm": 1.0426914691925049, + "learning_rate": 3.471330326776601e-07, + "loss": 0.2922, + "step": 44172 + }, + { + "epoch": 0.8842779571103271, + "grad_norm": 1.1491624116897583, + "learning_rate": 3.470143573112833e-07, + "loss": 0.3058, + "step": 44173 + }, + { + "epoch": 0.8842979756274554, + "grad_norm": 1.0262328386306763, + "learning_rate": 3.4689570150505556e-07, + "loss": 0.2777, + "step": 44174 + }, + { + "epoch": 0.8843179941445838, + "grad_norm": 1.8384052515029907, + "learning_rate": 3.467770652594782e-07, + "loss": 0.6993, + "step": 44175 + }, + { + "epoch": 0.8843380126617121, + "grad_norm": 1.007417917251587, + "learning_rate": 3.466584485750485e-07, + "loss": 0.2595, + "step": 44176 + }, + { + "epoch": 0.8843580311788404, + "grad_norm": 1.2960686683654785, + "learning_rate": 3.46539851452265e-07, + "loss": 0.2727, + "step": 44177 + }, + { + "epoch": 0.8843780496959688, + "grad_norm": 1.0826061964035034, + "learning_rate": 3.464212738916256e-07, + "loss": 0.3167, + "step": 44178 + }, + { + "epoch": 0.8843980682130971, + "grad_norm": 1.889567255973816, + "learning_rate": 3.4630271589363106e-07, + "loss": 0.7737, + "step": 44179 + }, + { + "epoch": 0.8844180867302255, + "grad_norm": 1.1717314720153809, + "learning_rate": 3.461841774587782e-07, + "loss": 0.3185, + "step": 44180 + }, + { + "epoch": 0.8844381052473538, + "grad_norm": 1.1571496725082397, + "learning_rate": 3.460656585875655e-07, + "loss": 0.2912, + "step": 44181 + }, + { + "epoch": 0.8844581237644822, + "grad_norm": 1.1389094591140747, + "learning_rate": 3.4594715928049026e-07, + "loss": 0.2805, + "step": 44182 + }, + { + "epoch": 0.8844781422816105, + "grad_norm": 1.4243706464767456, + "learning_rate": 3.458286795380528e-07, + "loss": 0.2897, + "step": 44183 + }, + { + "epoch": 0.8844981607987389, + "grad_norm": 1.1813714504241943, + "learning_rate": 3.457102193607498e-07, + "loss": 0.3384, + "step": 44184 + }, + { + "epoch": 0.8845181793158672, + "grad_norm": 1.0113885402679443, + "learning_rate": 3.4559177874907867e-07, + "loss": 0.276, + "step": 44185 + }, + { + "epoch": 0.8845381978329955, + "grad_norm": 1.1802845001220703, + "learning_rate": 3.454733577035385e-07, + "loss": 0.3471, + "step": 44186 + }, + { + "epoch": 0.8845582163501239, + "grad_norm": 1.025285243988037, + "learning_rate": 3.4535495622462613e-07, + "loss": 0.2892, + "step": 44187 + }, + { + "epoch": 0.8845782348672522, + "grad_norm": 1.0843313932418823, + "learning_rate": 3.452365743128405e-07, + "loss": 0.3205, + "step": 44188 + }, + { + "epoch": 0.8845982533843806, + "grad_norm": 2.0036137104034424, + "learning_rate": 3.4511821196867914e-07, + "loss": 0.7019, + "step": 44189 + }, + { + "epoch": 0.8846182719015089, + "grad_norm": 1.1124639511108398, + "learning_rate": 3.449998691926387e-07, + "loss": 0.2801, + "step": 44190 + }, + { + "epoch": 0.8846382904186373, + "grad_norm": 1.261418342590332, + "learning_rate": 3.4488154598521615e-07, + "loss": 0.3094, + "step": 44191 + }, + { + "epoch": 0.8846583089357656, + "grad_norm": 1.0516552925109863, + "learning_rate": 3.447632423469105e-07, + "loss": 0.2568, + "step": 44192 + }, + { + "epoch": 0.8846783274528939, + "grad_norm": 1.106294870376587, + "learning_rate": 3.4464495827821854e-07, + "loss": 0.2783, + "step": 44193 + }, + { + "epoch": 0.8846983459700223, + "grad_norm": 1.966168999671936, + "learning_rate": 3.4452669377963766e-07, + "loss": 0.7576, + "step": 44194 + }, + { + "epoch": 0.8847183644871506, + "grad_norm": 1.1554309129714966, + "learning_rate": 3.44408448851663e-07, + "loss": 0.3046, + "step": 44195 + }, + { + "epoch": 0.884738383004279, + "grad_norm": 0.9657979607582092, + "learning_rate": 3.442902234947948e-07, + "loss": 0.2658, + "step": 44196 + }, + { + "epoch": 0.8847584015214073, + "grad_norm": 1.2123640775680542, + "learning_rate": 3.441720177095287e-07, + "loss": 0.3199, + "step": 44197 + }, + { + "epoch": 0.8847784200385357, + "grad_norm": 1.1130802631378174, + "learning_rate": 3.440538314963604e-07, + "loss": 0.2874, + "step": 44198 + }, + { + "epoch": 0.884798438555664, + "grad_norm": 1.0918033123016357, + "learning_rate": 3.4393566485578845e-07, + "loss": 0.3077, + "step": 44199 + }, + { + "epoch": 0.8848184570727924, + "grad_norm": 1.1540992259979248, + "learning_rate": 3.4381751778830853e-07, + "loss": 0.3249, + "step": 44200 + }, + { + "epoch": 0.8848384755899207, + "grad_norm": 1.1911048889160156, + "learning_rate": 3.436993902944186e-07, + "loss": 0.3271, + "step": 44201 + }, + { + "epoch": 0.884858494107049, + "grad_norm": 1.1521106958389282, + "learning_rate": 3.435812823746143e-07, + "loss": 0.3227, + "step": 44202 + }, + { + "epoch": 0.8848785126241774, + "grad_norm": 1.0035395622253418, + "learning_rate": 3.4346319402939196e-07, + "loss": 0.2778, + "step": 44203 + }, + { + "epoch": 0.8848985311413057, + "grad_norm": 1.080153226852417, + "learning_rate": 3.4334512525924736e-07, + "loss": 0.2728, + "step": 44204 + }, + { + "epoch": 0.8849185496584341, + "grad_norm": 1.9552950859069824, + "learning_rate": 3.4322707606467887e-07, + "loss": 0.7775, + "step": 44205 + }, + { + "epoch": 0.8849385681755624, + "grad_norm": 1.074739933013916, + "learning_rate": 3.431090464461817e-07, + "loss": 0.3409, + "step": 44206 + }, + { + "epoch": 0.8849585866926908, + "grad_norm": 0.9516934156417847, + "learning_rate": 3.429910364042516e-07, + "loss": 0.292, + "step": 44207 + }, + { + "epoch": 0.8849786052098191, + "grad_norm": 1.0112643241882324, + "learning_rate": 3.428730459393853e-07, + "loss": 0.3044, + "step": 44208 + }, + { + "epoch": 0.8849986237269474, + "grad_norm": 1.0431362390518188, + "learning_rate": 3.427550750520775e-07, + "loss": 0.2909, + "step": 44209 + }, + { + "epoch": 0.8850186422440758, + "grad_norm": 1.2437899112701416, + "learning_rate": 3.4263712374282555e-07, + "loss": 0.2934, + "step": 44210 + }, + { + "epoch": 0.8850386607612041, + "grad_norm": 1.0227224826812744, + "learning_rate": 3.425191920121246e-07, + "loss": 0.2674, + "step": 44211 + }, + { + "epoch": 0.8850586792783325, + "grad_norm": 2.0351178646087646, + "learning_rate": 3.4240127986047147e-07, + "loss": 0.8038, + "step": 44212 + }, + { + "epoch": 0.8850786977954608, + "grad_norm": 1.9025944471359253, + "learning_rate": 3.4228338728835963e-07, + "loss": 0.6629, + "step": 44213 + }, + { + "epoch": 0.8850987163125892, + "grad_norm": 1.0685402154922485, + "learning_rate": 3.4216551429628766e-07, + "loss": 0.2694, + "step": 44214 + }, + { + "epoch": 0.8851187348297175, + "grad_norm": 1.074669599533081, + "learning_rate": 3.4204766088474895e-07, + "loss": 0.3146, + "step": 44215 + }, + { + "epoch": 0.8851387533468459, + "grad_norm": 1.1032792329788208, + "learning_rate": 3.419298270542398e-07, + "loss": 0.2681, + "step": 44216 + }, + { + "epoch": 0.8851587718639742, + "grad_norm": 0.9981982111930847, + "learning_rate": 3.418120128052549e-07, + "loss": 0.2924, + "step": 44217 + }, + { + "epoch": 0.8851787903811025, + "grad_norm": 0.9893403649330139, + "learning_rate": 3.416942181382893e-07, + "loss": 0.2585, + "step": 44218 + }, + { + "epoch": 0.8851988088982309, + "grad_norm": 1.1044831275939941, + "learning_rate": 3.415764430538393e-07, + "loss": 0.256, + "step": 44219 + }, + { + "epoch": 0.8852188274153592, + "grad_norm": 1.093894124031067, + "learning_rate": 3.414586875523995e-07, + "loss": 0.2737, + "step": 44220 + }, + { + "epoch": 0.8852388459324876, + "grad_norm": 1.1309564113616943, + "learning_rate": 3.413409516344651e-07, + "loss": 0.2624, + "step": 44221 + }, + { + "epoch": 0.8852588644496159, + "grad_norm": 1.0427242517471313, + "learning_rate": 3.4122323530053016e-07, + "loss": 0.2724, + "step": 44222 + }, + { + "epoch": 0.8852788829667443, + "grad_norm": 1.0467393398284912, + "learning_rate": 3.411055385510903e-07, + "loss": 0.2824, + "step": 44223 + }, + { + "epoch": 0.8852989014838726, + "grad_norm": 1.134317398071289, + "learning_rate": 3.4098786138663963e-07, + "loss": 0.2807, + "step": 44224 + }, + { + "epoch": 0.8853189200010009, + "grad_norm": 1.1304363012313843, + "learning_rate": 3.4087020380767445e-07, + "loss": 0.2779, + "step": 44225 + }, + { + "epoch": 0.8853389385181293, + "grad_norm": 1.0226531028747559, + "learning_rate": 3.4075256581468705e-07, + "loss": 0.264, + "step": 44226 + }, + { + "epoch": 0.8853589570352576, + "grad_norm": 1.0062568187713623, + "learning_rate": 3.406349474081744e-07, + "loss": 0.2595, + "step": 44227 + }, + { + "epoch": 0.885378975552386, + "grad_norm": 1.1114822626113892, + "learning_rate": 3.405173485886293e-07, + "loss": 0.2687, + "step": 44228 + }, + { + "epoch": 0.8853989940695143, + "grad_norm": 1.0878379344940186, + "learning_rate": 3.403997693565464e-07, + "loss": 0.3028, + "step": 44229 + }, + { + "epoch": 0.8854190125866427, + "grad_norm": 1.176080346107483, + "learning_rate": 3.402822097124209e-07, + "loss": 0.2784, + "step": 44230 + }, + { + "epoch": 0.885439031103771, + "grad_norm": 1.2014566659927368, + "learning_rate": 3.4016466965674455e-07, + "loss": 0.2756, + "step": 44231 + }, + { + "epoch": 0.8854590496208994, + "grad_norm": 1.1345552206039429, + "learning_rate": 3.400471491900142e-07, + "loss": 0.2948, + "step": 44232 + }, + { + "epoch": 0.8854790681380277, + "grad_norm": 1.0931727886199951, + "learning_rate": 3.3992964831272345e-07, + "loss": 0.2838, + "step": 44233 + }, + { + "epoch": 0.885499086655156, + "grad_norm": 1.096104383468628, + "learning_rate": 3.3981216702536456e-07, + "loss": 0.323, + "step": 44234 + }, + { + "epoch": 0.8855191051722844, + "grad_norm": 1.0123944282531738, + "learning_rate": 3.3969470532843217e-07, + "loss": 0.2966, + "step": 44235 + }, + { + "epoch": 0.8855391236894127, + "grad_norm": 1.2234885692596436, + "learning_rate": 3.395772632224215e-07, + "loss": 0.2465, + "step": 44236 + }, + { + "epoch": 0.8855591422065411, + "grad_norm": 1.0647379159927368, + "learning_rate": 3.3945984070782366e-07, + "loss": 0.2884, + "step": 44237 + }, + { + "epoch": 0.8855791607236694, + "grad_norm": 1.471832036972046, + "learning_rate": 3.3934243778513455e-07, + "loss": 0.3244, + "step": 44238 + }, + { + "epoch": 0.8855991792407978, + "grad_norm": 1.053971529006958, + "learning_rate": 3.39225054454847e-07, + "loss": 0.2988, + "step": 44239 + }, + { + "epoch": 0.8856191977579261, + "grad_norm": 1.1149402856826782, + "learning_rate": 3.3910769071745396e-07, + "loss": 0.3459, + "step": 44240 + }, + { + "epoch": 0.8856392162750544, + "grad_norm": 1.146907925605774, + "learning_rate": 3.3899034657344954e-07, + "loss": 0.3519, + "step": 44241 + }, + { + "epoch": 0.8856592347921828, + "grad_norm": 1.153232455253601, + "learning_rate": 3.3887302202332604e-07, + "loss": 0.3127, + "step": 44242 + }, + { + "epoch": 0.8856792533093111, + "grad_norm": 1.1108410358428955, + "learning_rate": 3.3875571706757817e-07, + "loss": 0.2796, + "step": 44243 + }, + { + "epoch": 0.8856992718264395, + "grad_norm": 1.1219085454940796, + "learning_rate": 3.386384317066971e-07, + "loss": 0.2589, + "step": 44244 + }, + { + "epoch": 0.8857192903435678, + "grad_norm": 1.439004898071289, + "learning_rate": 3.3852116594117746e-07, + "loss": 0.2967, + "step": 44245 + }, + { + "epoch": 0.8857393088606962, + "grad_norm": 1.077208161354065, + "learning_rate": 3.384039197715122e-07, + "loss": 0.3118, + "step": 44246 + }, + { + "epoch": 0.8857593273778245, + "grad_norm": 0.9656902551651001, + "learning_rate": 3.382866931981932e-07, + "loss": 0.2433, + "step": 44247 + }, + { + "epoch": 0.8857793458949529, + "grad_norm": 1.156363606452942, + "learning_rate": 3.3816948622171273e-07, + "loss": 0.298, + "step": 44248 + }, + { + "epoch": 0.8857993644120812, + "grad_norm": 1.065558910369873, + "learning_rate": 3.380522988425661e-07, + "loss": 0.3003, + "step": 44249 + }, + { + "epoch": 0.8858193829292095, + "grad_norm": 1.1385325193405151, + "learning_rate": 3.3793513106124275e-07, + "loss": 0.3227, + "step": 44250 + }, + { + "epoch": 0.8858394014463379, + "grad_norm": 1.270806074142456, + "learning_rate": 3.37817982878238e-07, + "loss": 0.3034, + "step": 44251 + }, + { + "epoch": 0.8858594199634662, + "grad_norm": 1.1747578382492065, + "learning_rate": 3.37700854294043e-07, + "loss": 0.2953, + "step": 44252 + }, + { + "epoch": 0.8858794384805946, + "grad_norm": 1.1716214418411255, + "learning_rate": 3.375837453091496e-07, + "loss": 0.3081, + "step": 44253 + }, + { + "epoch": 0.8858994569977229, + "grad_norm": 1.0871672630310059, + "learning_rate": 3.374666559240519e-07, + "loss": 0.2565, + "step": 44254 + }, + { + "epoch": 0.8859194755148513, + "grad_norm": 1.2156747579574585, + "learning_rate": 3.3734958613924064e-07, + "loss": 0.3034, + "step": 44255 + }, + { + "epoch": 0.8859394940319796, + "grad_norm": 1.1925479173660278, + "learning_rate": 3.372325359552081e-07, + "loss": 0.3071, + "step": 44256 + }, + { + "epoch": 0.8859595125491079, + "grad_norm": 1.1821743249893188, + "learning_rate": 3.371155053724462e-07, + "loss": 0.2798, + "step": 44257 + }, + { + "epoch": 0.8859795310662363, + "grad_norm": 1.238708734512329, + "learning_rate": 3.369984943914473e-07, + "loss": 0.3291, + "step": 44258 + }, + { + "epoch": 0.8859995495833646, + "grad_norm": 1.898571252822876, + "learning_rate": 3.3688150301270373e-07, + "loss": 0.6906, + "step": 44259 + }, + { + "epoch": 0.886019568100493, + "grad_norm": 1.1122853755950928, + "learning_rate": 3.3676453123670683e-07, + "loss": 0.2941, + "step": 44260 + }, + { + "epoch": 0.8860395866176213, + "grad_norm": 1.0890604257583618, + "learning_rate": 3.3664757906394786e-07, + "loss": 0.271, + "step": 44261 + }, + { + "epoch": 0.8860596051347497, + "grad_norm": 1.2005045413970947, + "learning_rate": 3.3653064649491915e-07, + "loss": 0.2847, + "step": 44262 + }, + { + "epoch": 0.886079623651878, + "grad_norm": 1.3088170289993286, + "learning_rate": 3.364137335301115e-07, + "loss": 0.3512, + "step": 44263 + }, + { + "epoch": 0.8860996421690064, + "grad_norm": 1.146424412727356, + "learning_rate": 3.362968401700173e-07, + "loss": 0.2973, + "step": 44264 + }, + { + "epoch": 0.8861196606861347, + "grad_norm": 1.0616952180862427, + "learning_rate": 3.3617996641512774e-07, + "loss": 0.2976, + "step": 44265 + }, + { + "epoch": 0.886139679203263, + "grad_norm": 1.0307443141937256, + "learning_rate": 3.360631122659336e-07, + "loss": 0.2533, + "step": 44266 + }, + { + "epoch": 0.8861596977203914, + "grad_norm": 1.1777809858322144, + "learning_rate": 3.3594627772292667e-07, + "loss": 0.2809, + "step": 44267 + }, + { + "epoch": 0.8861797162375197, + "grad_norm": 1.1160634756088257, + "learning_rate": 3.358294627865982e-07, + "loss": 0.2684, + "step": 44268 + }, + { + "epoch": 0.8861997347546481, + "grad_norm": 1.0890141725540161, + "learning_rate": 3.35712667457439e-07, + "loss": 0.2526, + "step": 44269 + }, + { + "epoch": 0.8862197532717764, + "grad_norm": 1.1308809518814087, + "learning_rate": 3.3559589173593974e-07, + "loss": 0.3084, + "step": 44270 + }, + { + "epoch": 0.8862397717889048, + "grad_norm": 1.213998794555664, + "learning_rate": 3.354791356225906e-07, + "loss": 0.285, + "step": 44271 + }, + { + "epoch": 0.8862597903060331, + "grad_norm": 1.153664231300354, + "learning_rate": 3.3536239911788446e-07, + "loss": 0.2487, + "step": 44272 + }, + { + "epoch": 0.8862798088231614, + "grad_norm": 1.2040644884109497, + "learning_rate": 3.3524568222231104e-07, + "loss": 0.305, + "step": 44273 + }, + { + "epoch": 0.8862998273402898, + "grad_norm": 1.091762661933899, + "learning_rate": 3.351289849363598e-07, + "loss": 0.2539, + "step": 44274 + }, + { + "epoch": 0.8863198458574181, + "grad_norm": 1.1322399377822876, + "learning_rate": 3.3501230726052323e-07, + "loss": 0.3273, + "step": 44275 + }, + { + "epoch": 0.8863398643745465, + "grad_norm": 1.1240079402923584, + "learning_rate": 3.348956491952904e-07, + "loss": 0.2774, + "step": 44276 + }, + { + "epoch": 0.8863598828916748, + "grad_norm": 1.6471601724624634, + "learning_rate": 3.3477901074115303e-07, + "loss": 0.3038, + "step": 44277 + }, + { + "epoch": 0.8863799014088032, + "grad_norm": 1.13846755027771, + "learning_rate": 3.3466239189860084e-07, + "loss": 0.2746, + "step": 44278 + }, + { + "epoch": 0.8863999199259315, + "grad_norm": 1.1410870552062988, + "learning_rate": 3.3454579266812283e-07, + "loss": 0.2985, + "step": 44279 + }, + { + "epoch": 0.8864199384430599, + "grad_norm": 1.0487993955612183, + "learning_rate": 3.344292130502114e-07, + "loss": 0.2839, + "step": 44280 + }, + { + "epoch": 0.8864399569601882, + "grad_norm": 1.082478404045105, + "learning_rate": 3.343126530453555e-07, + "loss": 0.322, + "step": 44281 + }, + { + "epoch": 0.8864599754773165, + "grad_norm": 1.1804649829864502, + "learning_rate": 3.3419611265404495e-07, + "loss": 0.2948, + "step": 44282 + }, + { + "epoch": 0.8864799939944449, + "grad_norm": 1.1346523761749268, + "learning_rate": 3.3407959187676977e-07, + "loss": 0.2873, + "step": 44283 + }, + { + "epoch": 0.8865000125115732, + "grad_norm": 1.028365969657898, + "learning_rate": 3.3396309071401954e-07, + "loss": 0.324, + "step": 44284 + }, + { + "epoch": 0.8865200310287016, + "grad_norm": 1.2446850538253784, + "learning_rate": 3.3384660916628455e-07, + "loss": 0.3322, + "step": 44285 + }, + { + "epoch": 0.8865400495458299, + "grad_norm": 1.0868399143218994, + "learning_rate": 3.3373014723405486e-07, + "loss": 0.3113, + "step": 44286 + }, + { + "epoch": 0.8865600680629583, + "grad_norm": 1.12575101852417, + "learning_rate": 3.336137049178184e-07, + "loss": 0.2726, + "step": 44287 + }, + { + "epoch": 0.8865800865800866, + "grad_norm": 1.190237283706665, + "learning_rate": 3.33497282218066e-07, + "loss": 0.2688, + "step": 44288 + }, + { + "epoch": 0.8866001050972149, + "grad_norm": 1.180072546005249, + "learning_rate": 3.333808791352866e-07, + "loss": 0.2705, + "step": 44289 + }, + { + "epoch": 0.8866201236143433, + "grad_norm": 2.0264501571655273, + "learning_rate": 3.3326449566997046e-07, + "loss": 0.7449, + "step": 44290 + }, + { + "epoch": 0.8866401421314716, + "grad_norm": 1.1274904012680054, + "learning_rate": 3.33148131822606e-07, + "loss": 0.3078, + "step": 44291 + }, + { + "epoch": 0.8866601606486, + "grad_norm": 1.8499153852462769, + "learning_rate": 3.3303178759368225e-07, + "loss": 0.7439, + "step": 44292 + }, + { + "epoch": 0.8866801791657283, + "grad_norm": 1.2347877025604248, + "learning_rate": 3.3291546298368784e-07, + "loss": 0.282, + "step": 44293 + }, + { + "epoch": 0.8867001976828567, + "grad_norm": 1.4242030382156372, + "learning_rate": 3.3279915799311334e-07, + "loss": 0.3007, + "step": 44294 + }, + { + "epoch": 0.886720216199985, + "grad_norm": 1.2480432987213135, + "learning_rate": 3.326828726224468e-07, + "loss": 0.3281, + "step": 44295 + }, + { + "epoch": 0.8867402347171134, + "grad_norm": 1.1430867910385132, + "learning_rate": 3.325666068721772e-07, + "loss": 0.3135, + "step": 44296 + }, + { + "epoch": 0.8867602532342417, + "grad_norm": 1.0050396919250488, + "learning_rate": 3.32450360742792e-07, + "loss": 0.2962, + "step": 44297 + }, + { + "epoch": 0.88678027175137, + "grad_norm": 1.201914668083191, + "learning_rate": 3.323341342347819e-07, + "loss": 0.2672, + "step": 44298 + }, + { + "epoch": 0.8868002902684984, + "grad_norm": 0.9593555331230164, + "learning_rate": 3.322179273486348e-07, + "loss": 0.2114, + "step": 44299 + }, + { + "epoch": 0.8868203087856267, + "grad_norm": 1.09955894947052, + "learning_rate": 3.321017400848381e-07, + "loss": 0.3187, + "step": 44300 + }, + { + "epoch": 0.8868403273027551, + "grad_norm": 1.8953429460525513, + "learning_rate": 3.31985572443882e-07, + "loss": 0.732, + "step": 44301 + }, + { + "epoch": 0.8868603458198834, + "grad_norm": 1.8711650371551514, + "learning_rate": 3.3186942442625323e-07, + "loss": 0.7636, + "step": 44302 + }, + { + "epoch": 0.8868803643370118, + "grad_norm": 1.1558459997177124, + "learning_rate": 3.3175329603244156e-07, + "loss": 0.2775, + "step": 44303 + }, + { + "epoch": 0.8869003828541401, + "grad_norm": 1.2607996463775635, + "learning_rate": 3.3163718726293426e-07, + "loss": 0.2649, + "step": 44304 + }, + { + "epoch": 0.8869204013712684, + "grad_norm": 1.4126232862472534, + "learning_rate": 3.3152109811821986e-07, + "loss": 0.2806, + "step": 44305 + }, + { + "epoch": 0.8869404198883968, + "grad_norm": 1.1452250480651855, + "learning_rate": 3.314050285987852e-07, + "loss": 0.2692, + "step": 44306 + }, + { + "epoch": 0.8869604384055251, + "grad_norm": 1.2159806489944458, + "learning_rate": 3.312889787051199e-07, + "loss": 0.3207, + "step": 44307 + }, + { + "epoch": 0.8869804569226535, + "grad_norm": 1.0205272436141968, + "learning_rate": 3.3117294843771075e-07, + "loss": 0.2404, + "step": 44308 + }, + { + "epoch": 0.8870004754397818, + "grad_norm": 1.0178941488265991, + "learning_rate": 3.3105693779704573e-07, + "loss": 0.2853, + "step": 44309 + }, + { + "epoch": 0.8870204939569102, + "grad_norm": 1.401395559310913, + "learning_rate": 3.309409467836122e-07, + "loss": 0.3117, + "step": 44310 + }, + { + "epoch": 0.8870405124740385, + "grad_norm": 1.0451111793518066, + "learning_rate": 3.308249753978987e-07, + "loss": 0.3084, + "step": 44311 + }, + { + "epoch": 0.8870605309911669, + "grad_norm": 1.1027777194976807, + "learning_rate": 3.3070902364039257e-07, + "loss": 0.2778, + "step": 44312 + }, + { + "epoch": 0.8870805495082952, + "grad_norm": 1.2912482023239136, + "learning_rate": 3.3059309151157956e-07, + "loss": 0.3069, + "step": 44313 + }, + { + "epoch": 0.8871005680254235, + "grad_norm": 2.154956817626953, + "learning_rate": 3.304771790119493e-07, + "loss": 0.6951, + "step": 44314 + }, + { + "epoch": 0.8871205865425519, + "grad_norm": 1.2263147830963135, + "learning_rate": 3.303612861419875e-07, + "loss": 0.3065, + "step": 44315 + }, + { + "epoch": 0.8871406050596802, + "grad_norm": 1.1296669244766235, + "learning_rate": 3.302454129021826e-07, + "loss": 0.315, + "step": 44316 + }, + { + "epoch": 0.8871606235768086, + "grad_norm": 1.1867924928665161, + "learning_rate": 3.3012955929302093e-07, + "loss": 0.324, + "step": 44317 + }, + { + "epoch": 0.8871806420939369, + "grad_norm": 1.3836164474487305, + "learning_rate": 3.300137253149893e-07, + "loss": 0.2571, + "step": 44318 + }, + { + "epoch": 0.8872006606110653, + "grad_norm": 1.158173680305481, + "learning_rate": 3.298979109685746e-07, + "loss": 0.3083, + "step": 44319 + }, + { + "epoch": 0.8872206791281936, + "grad_norm": 1.2118300199508667, + "learning_rate": 3.297821162542647e-07, + "loss": 0.2299, + "step": 44320 + }, + { + "epoch": 0.8872406976453219, + "grad_norm": 1.0227065086364746, + "learning_rate": 3.2966634117254593e-07, + "loss": 0.3031, + "step": 44321 + }, + { + "epoch": 0.8872607161624503, + "grad_norm": 1.1583172082901, + "learning_rate": 3.295505857239051e-07, + "loss": 0.2619, + "step": 44322 + }, + { + "epoch": 0.8872807346795786, + "grad_norm": 1.096001386642456, + "learning_rate": 3.294348499088279e-07, + "loss": 0.2606, + "step": 44323 + }, + { + "epoch": 0.887300753196707, + "grad_norm": 1.1058216094970703, + "learning_rate": 3.293191337278007e-07, + "loss": 0.2918, + "step": 44324 + }, + { + "epoch": 0.8873207717138353, + "grad_norm": 1.1285128593444824, + "learning_rate": 3.292034371813119e-07, + "loss": 0.3125, + "step": 44325 + }, + { + "epoch": 0.8873407902309637, + "grad_norm": 1.9394210577011108, + "learning_rate": 3.2908776026984555e-07, + "loss": 0.7295, + "step": 44326 + }, + { + "epoch": 0.887360808748092, + "grad_norm": 1.3064297437667847, + "learning_rate": 3.2897210299389026e-07, + "loss": 0.2722, + "step": 44327 + }, + { + "epoch": 0.8873808272652203, + "grad_norm": 1.0875601768493652, + "learning_rate": 3.2885646535393e-07, + "loss": 0.2553, + "step": 44328 + }, + { + "epoch": 0.8874008457823487, + "grad_norm": 1.7937642335891724, + "learning_rate": 3.2874084735045275e-07, + "loss": 0.7855, + "step": 44329 + }, + { + "epoch": 0.887420864299477, + "grad_norm": 1.1048829555511475, + "learning_rate": 3.2862524898394367e-07, + "loss": 0.2799, + "step": 44330 + }, + { + "epoch": 0.8874408828166054, + "grad_norm": 1.1229538917541504, + "learning_rate": 3.2850967025488846e-07, + "loss": 0.306, + "step": 44331 + }, + { + "epoch": 0.8874609013337337, + "grad_norm": 1.088464617729187, + "learning_rate": 3.2839411116377395e-07, + "loss": 0.2955, + "step": 44332 + }, + { + "epoch": 0.8874809198508621, + "grad_norm": 1.2633540630340576, + "learning_rate": 3.2827857171108366e-07, + "loss": 0.3355, + "step": 44333 + }, + { + "epoch": 0.8875009383679904, + "grad_norm": 1.2589011192321777, + "learning_rate": 3.2816305189730615e-07, + "loss": 0.2615, + "step": 44334 + }, + { + "epoch": 0.8875209568851188, + "grad_norm": 1.2003231048583984, + "learning_rate": 3.2804755172292593e-07, + "loss": 0.2614, + "step": 44335 + }, + { + "epoch": 0.8875409754022471, + "grad_norm": 1.241660714149475, + "learning_rate": 3.279320711884282e-07, + "loss": 0.3125, + "step": 44336 + }, + { + "epoch": 0.8875609939193754, + "grad_norm": 1.1743391752243042, + "learning_rate": 3.278166102942976e-07, + "loss": 0.3014, + "step": 44337 + }, + { + "epoch": 0.8875810124365038, + "grad_norm": 1.1475176811218262, + "learning_rate": 3.2770116904102144e-07, + "loss": 0.274, + "step": 44338 + }, + { + "epoch": 0.8876010309536321, + "grad_norm": 1.1542894840240479, + "learning_rate": 3.275857474290839e-07, + "loss": 0.294, + "step": 44339 + }, + { + "epoch": 0.8876210494707605, + "grad_norm": 1.1843886375427246, + "learning_rate": 3.274703454589706e-07, + "loss": 0.2873, + "step": 44340 + }, + { + "epoch": 0.8876410679878888, + "grad_norm": 1.1262946128845215, + "learning_rate": 3.273549631311651e-07, + "loss": 0.3146, + "step": 44341 + }, + { + "epoch": 0.8876610865050172, + "grad_norm": 1.2704215049743652, + "learning_rate": 3.272396004461553e-07, + "loss": 0.2842, + "step": 44342 + }, + { + "epoch": 0.8876811050221455, + "grad_norm": 1.097183346748352, + "learning_rate": 3.271242574044248e-07, + "loss": 0.2769, + "step": 44343 + }, + { + "epoch": 0.8877011235392738, + "grad_norm": 1.0239102840423584, + "learning_rate": 3.2700893400645753e-07, + "loss": 0.265, + "step": 44344 + }, + { + "epoch": 0.8877211420564022, + "grad_norm": 1.008187174797058, + "learning_rate": 3.268936302527398e-07, + "loss": 0.2745, + "step": 44345 + }, + { + "epoch": 0.8877411605735305, + "grad_norm": 1.2127026319503784, + "learning_rate": 3.2677834614375404e-07, + "loss": 0.2672, + "step": 44346 + }, + { + "epoch": 0.8877611790906589, + "grad_norm": 1.1974146366119385, + "learning_rate": 3.266630816799876e-07, + "loss": 0.3414, + "step": 44347 + }, + { + "epoch": 0.8877811976077872, + "grad_norm": 1.0554581880569458, + "learning_rate": 3.2654783686192406e-07, + "loss": 0.2709, + "step": 44348 + }, + { + "epoch": 0.8878012161249156, + "grad_norm": 2.038328170776367, + "learning_rate": 3.2643261169004734e-07, + "loss": 0.7575, + "step": 44349 + }, + { + "epoch": 0.8878212346420439, + "grad_norm": 1.027191162109375, + "learning_rate": 3.2631740616484165e-07, + "loss": 0.2654, + "step": 44350 + }, + { + "epoch": 0.8878412531591723, + "grad_norm": 1.119447112083435, + "learning_rate": 3.262022202867926e-07, + "loss": 0.2854, + "step": 44351 + }, + { + "epoch": 0.8878612716763006, + "grad_norm": 1.1271919012069702, + "learning_rate": 3.2608705405638263e-07, + "loss": 0.279, + "step": 44352 + }, + { + "epoch": 0.8878812901934289, + "grad_norm": 1.1859030723571777, + "learning_rate": 3.2597190747409803e-07, + "loss": 0.2886, + "step": 44353 + }, + { + "epoch": 0.8879013087105573, + "grad_norm": 2.194321393966675, + "learning_rate": 3.2585678054042116e-07, + "loss": 0.7195, + "step": 44354 + }, + { + "epoch": 0.8879213272276856, + "grad_norm": 1.1869559288024902, + "learning_rate": 3.2574167325583607e-07, + "loss": 0.2774, + "step": 44355 + }, + { + "epoch": 0.887941345744814, + "grad_norm": 1.1280630826950073, + "learning_rate": 3.256265856208274e-07, + "loss": 0.2836, + "step": 44356 + }, + { + "epoch": 0.8879613642619423, + "grad_norm": 1.6914315223693848, + "learning_rate": 3.255115176358792e-07, + "loss": 0.6951, + "step": 44357 + }, + { + "epoch": 0.8879813827790707, + "grad_norm": 1.0966928005218506, + "learning_rate": 3.2539646930147384e-07, + "loss": 0.3176, + "step": 44358 + }, + { + "epoch": 0.888001401296199, + "grad_norm": 1.1194047927856445, + "learning_rate": 3.252814406180954e-07, + "loss": 0.2455, + "step": 44359 + }, + { + "epoch": 0.8880214198133273, + "grad_norm": 1.1892428398132324, + "learning_rate": 3.251664315862291e-07, + "loss": 0.3435, + "step": 44360 + }, + { + "epoch": 0.8880414383304557, + "grad_norm": 1.18165922164917, + "learning_rate": 3.250514422063561e-07, + "loss": 0.3201, + "step": 44361 + }, + { + "epoch": 0.888061456847584, + "grad_norm": 1.181455373764038, + "learning_rate": 3.2493647247896167e-07, + "loss": 0.328, + "step": 44362 + }, + { + "epoch": 0.8880814753647124, + "grad_norm": 1.1471858024597168, + "learning_rate": 3.2482152240452756e-07, + "loss": 0.2499, + "step": 44363 + }, + { + "epoch": 0.8881014938818407, + "grad_norm": 1.26261568069458, + "learning_rate": 3.2470659198353736e-07, + "loss": 0.2817, + "step": 44364 + }, + { + "epoch": 0.8881215123989691, + "grad_norm": 1.8718297481536865, + "learning_rate": 3.24591681216474e-07, + "loss": 0.8397, + "step": 44365 + }, + { + "epoch": 0.8881415309160974, + "grad_norm": 1.073390245437622, + "learning_rate": 3.2447679010382205e-07, + "loss": 0.2699, + "step": 44366 + }, + { + "epoch": 0.8881615494332258, + "grad_norm": 1.1004191637039185, + "learning_rate": 3.2436191864606395e-07, + "loss": 0.3318, + "step": 44367 + }, + { + "epoch": 0.8881815679503541, + "grad_norm": 1.127471923828125, + "learning_rate": 3.242470668436809e-07, + "loss": 0.3212, + "step": 44368 + }, + { + "epoch": 0.8882015864674824, + "grad_norm": 1.1869947910308838, + "learning_rate": 3.241322346971576e-07, + "loss": 0.3075, + "step": 44369 + }, + { + "epoch": 0.8882216049846108, + "grad_norm": 1.1555886268615723, + "learning_rate": 3.240174222069764e-07, + "loss": 0.3328, + "step": 44370 + }, + { + "epoch": 0.8882416235017391, + "grad_norm": 1.098239779472351, + "learning_rate": 3.239026293736197e-07, + "loss": 0.2864, + "step": 44371 + }, + { + "epoch": 0.8882616420188675, + "grad_norm": 1.1706929206848145, + "learning_rate": 3.237878561975688e-07, + "loss": 0.3294, + "step": 44372 + }, + { + "epoch": 0.8882816605359958, + "grad_norm": 1.1076266765594482, + "learning_rate": 3.2367310267930885e-07, + "loss": 0.2685, + "step": 44373 + }, + { + "epoch": 0.8883016790531242, + "grad_norm": 1.1850581169128418, + "learning_rate": 3.2355836881932055e-07, + "loss": 0.2728, + "step": 44374 + }, + { + "epoch": 0.8883216975702525, + "grad_norm": 1.073420763015747, + "learning_rate": 3.2344365461808636e-07, + "loss": 0.2731, + "step": 44375 + }, + { + "epoch": 0.8883417160873808, + "grad_norm": 1.163936734199524, + "learning_rate": 3.233289600760886e-07, + "loss": 0.2754, + "step": 44376 + }, + { + "epoch": 0.8883617346045092, + "grad_norm": 1.2645361423492432, + "learning_rate": 3.2321428519380916e-07, + "loss": 0.3354, + "step": 44377 + }, + { + "epoch": 0.8883817531216375, + "grad_norm": 1.281901478767395, + "learning_rate": 3.230996299717298e-07, + "loss": 0.3509, + "step": 44378 + }, + { + "epoch": 0.8884017716387659, + "grad_norm": 1.2764389514923096, + "learning_rate": 3.2298499441033416e-07, + "loss": 0.285, + "step": 44379 + }, + { + "epoch": 0.8884217901558942, + "grad_norm": 1.1304104328155518, + "learning_rate": 3.2287037851010285e-07, + "loss": 0.3125, + "step": 44380 + }, + { + "epoch": 0.8884418086730226, + "grad_norm": 1.144365668296814, + "learning_rate": 3.227557822715177e-07, + "loss": 0.2861, + "step": 44381 + }, + { + "epoch": 0.8884618271901509, + "grad_norm": 1.1994580030441284, + "learning_rate": 3.2264120569506117e-07, + "loss": 0.3388, + "step": 44382 + }, + { + "epoch": 0.8884818457072793, + "grad_norm": 1.1187338829040527, + "learning_rate": 3.2252664878121455e-07, + "loss": 0.337, + "step": 44383 + }, + { + "epoch": 0.8885018642244076, + "grad_norm": 1.0029464960098267, + "learning_rate": 3.2241211153045906e-07, + "loss": 0.2847, + "step": 44384 + }, + { + "epoch": 0.8885218827415359, + "grad_norm": 1.1241657733917236, + "learning_rate": 3.222975939432765e-07, + "loss": 0.2981, + "step": 44385 + }, + { + "epoch": 0.8885419012586643, + "grad_norm": 1.1811591386795044, + "learning_rate": 3.2218309602014774e-07, + "loss": 0.2696, + "step": 44386 + }, + { + "epoch": 0.8885619197757926, + "grad_norm": 1.1884925365447998, + "learning_rate": 3.220686177615551e-07, + "loss": 0.3095, + "step": 44387 + }, + { + "epoch": 0.888581938292921, + "grad_norm": 1.9067161083221436, + "learning_rate": 3.219541591679792e-07, + "loss": 0.8116, + "step": 44388 + }, + { + "epoch": 0.8886019568100493, + "grad_norm": 1.0589712858200073, + "learning_rate": 3.21839720239901e-07, + "loss": 0.2602, + "step": 44389 + }, + { + "epoch": 0.8886219753271777, + "grad_norm": 1.0870113372802734, + "learning_rate": 3.217253009778015e-07, + "loss": 0.2844, + "step": 44390 + }, + { + "epoch": 0.888641993844306, + "grad_norm": 1.0967376232147217, + "learning_rate": 3.216109013821622e-07, + "loss": 0.243, + "step": 44391 + }, + { + "epoch": 0.8886620123614343, + "grad_norm": 1.1040217876434326, + "learning_rate": 3.214965214534643e-07, + "loss": 0.2994, + "step": 44392 + }, + { + "epoch": 0.8886820308785627, + "grad_norm": 1.0970064401626587, + "learning_rate": 3.213821611921886e-07, + "loss": 0.2793, + "step": 44393 + }, + { + "epoch": 0.888702049395691, + "grad_norm": 1.0834447145462036, + "learning_rate": 3.21267820598814e-07, + "loss": 0.3062, + "step": 44394 + }, + { + "epoch": 0.8887220679128194, + "grad_norm": 1.142909049987793, + "learning_rate": 3.2115349967382416e-07, + "loss": 0.2965, + "step": 44395 + }, + { + "epoch": 0.8887420864299477, + "grad_norm": 1.067031741142273, + "learning_rate": 3.210391984176975e-07, + "loss": 0.2524, + "step": 44396 + }, + { + "epoch": 0.8887621049470761, + "grad_norm": 1.1650912761688232, + "learning_rate": 3.209249168309153e-07, + "loss": 0.3035, + "step": 44397 + }, + { + "epoch": 0.8887821234642044, + "grad_norm": 1.116911768913269, + "learning_rate": 3.208106549139578e-07, + "loss": 0.29, + "step": 44398 + }, + { + "epoch": 0.8888021419813328, + "grad_norm": 1.0258426666259766, + "learning_rate": 3.206964126673051e-07, + "loss": 0.2431, + "step": 44399 + }, + { + "epoch": 0.8888221604984611, + "grad_norm": 1.2804911136627197, + "learning_rate": 3.20582190091438e-07, + "loss": 0.301, + "step": 44400 + }, + { + "epoch": 0.8888421790155894, + "grad_norm": 1.4444581270217896, + "learning_rate": 3.2046798718683605e-07, + "loss": 0.3442, + "step": 44401 + }, + { + "epoch": 0.8888621975327178, + "grad_norm": 1.120295763015747, + "learning_rate": 3.2035380395398e-07, + "loss": 0.2559, + "step": 44402 + }, + { + "epoch": 0.8888822160498461, + "grad_norm": 1.0767405033111572, + "learning_rate": 3.202396403933489e-07, + "loss": 0.2483, + "step": 44403 + }, + { + "epoch": 0.8889022345669745, + "grad_norm": 1.0080889463424683, + "learning_rate": 3.201254965054229e-07, + "loss": 0.2716, + "step": 44404 + }, + { + "epoch": 0.8889222530841028, + "grad_norm": 1.0631417036056519, + "learning_rate": 3.200113722906828e-07, + "loss": 0.2538, + "step": 44405 + }, + { + "epoch": 0.8889422716012312, + "grad_norm": 1.4175477027893066, + "learning_rate": 3.1989726774960816e-07, + "loss": 0.3244, + "step": 44406 + }, + { + "epoch": 0.8889622901183595, + "grad_norm": 1.1713773012161255, + "learning_rate": 3.197831828826781e-07, + "loss": 0.3353, + "step": 44407 + }, + { + "epoch": 0.8889823086354878, + "grad_norm": 1.0404653549194336, + "learning_rate": 3.196691176903716e-07, + "loss": 0.3201, + "step": 44408 + }, + { + "epoch": 0.8890023271526162, + "grad_norm": 1.2206153869628906, + "learning_rate": 3.195550721731694e-07, + "loss": 0.3075, + "step": 44409 + }, + { + "epoch": 0.8890223456697445, + "grad_norm": 1.2589083909988403, + "learning_rate": 3.194410463315506e-07, + "loss": 0.3199, + "step": 44410 + }, + { + "epoch": 0.8890423641868729, + "grad_norm": 1.049231767654419, + "learning_rate": 3.193270401659948e-07, + "loss": 0.2826, + "step": 44411 + }, + { + "epoch": 0.8890623827040012, + "grad_norm": 1.8837555646896362, + "learning_rate": 3.192130536769794e-07, + "loss": 0.7263, + "step": 44412 + }, + { + "epoch": 0.8890824012211296, + "grad_norm": 1.1013802289962769, + "learning_rate": 3.1909908686498625e-07, + "loss": 0.2774, + "step": 44413 + }, + { + "epoch": 0.8891024197382579, + "grad_norm": 0.9923580884933472, + "learning_rate": 3.1898513973049325e-07, + "loss": 0.276, + "step": 44414 + }, + { + "epoch": 0.8891224382553863, + "grad_norm": 1.0933549404144287, + "learning_rate": 3.18871212273979e-07, + "loss": 0.2906, + "step": 44415 + }, + { + "epoch": 0.8891424567725146, + "grad_norm": 2.0098376274108887, + "learning_rate": 3.187573044959219e-07, + "loss": 0.745, + "step": 44416 + }, + { + "epoch": 0.8891624752896429, + "grad_norm": 1.8757847547531128, + "learning_rate": 3.1864341639680217e-07, + "loss": 0.7509, + "step": 44417 + }, + { + "epoch": 0.8891824938067713, + "grad_norm": 1.9602442979812622, + "learning_rate": 3.185295479770989e-07, + "loss": 0.778, + "step": 44418 + }, + { + "epoch": 0.8892025123238996, + "grad_norm": 1.152122974395752, + "learning_rate": 3.1841569923728943e-07, + "loss": 0.3028, + "step": 44419 + }, + { + "epoch": 0.889222530841028, + "grad_norm": 1.0271188020706177, + "learning_rate": 3.183018701778529e-07, + "loss": 0.2857, + "step": 44420 + }, + { + "epoch": 0.8892425493581563, + "grad_norm": 1.1068577766418457, + "learning_rate": 3.1818806079926714e-07, + "loss": 0.2737, + "step": 44421 + }, + { + "epoch": 0.8892625678752847, + "grad_norm": 2.0243303775787354, + "learning_rate": 3.180742711020124e-07, + "loss": 0.7794, + "step": 44422 + }, + { + "epoch": 0.889282586392413, + "grad_norm": 1.088634967803955, + "learning_rate": 3.179605010865655e-07, + "loss": 0.2884, + "step": 44423 + }, + { + "epoch": 0.8893026049095413, + "grad_norm": 1.1314215660095215, + "learning_rate": 3.1784675075340496e-07, + "loss": 0.2971, + "step": 44424 + }, + { + "epoch": 0.8893226234266697, + "grad_norm": 1.256047248840332, + "learning_rate": 3.1773302010300876e-07, + "loss": 0.3238, + "step": 44425 + }, + { + "epoch": 0.889342641943798, + "grad_norm": 1.2604354619979858, + "learning_rate": 3.176193091358554e-07, + "loss": 0.3088, + "step": 44426 + }, + { + "epoch": 0.8893626604609264, + "grad_norm": 1.936539888381958, + "learning_rate": 3.175056178524233e-07, + "loss": 0.7942, + "step": 44427 + }, + { + "epoch": 0.8893826789780547, + "grad_norm": 1.098270297050476, + "learning_rate": 3.1739194625318993e-07, + "loss": 0.2856, + "step": 44428 + }, + { + "epoch": 0.8894026974951831, + "grad_norm": 1.15238356590271, + "learning_rate": 3.172782943386321e-07, + "loss": 0.2899, + "step": 44429 + }, + { + "epoch": 0.8894227160123114, + "grad_norm": 1.237528681755066, + "learning_rate": 3.171646621092289e-07, + "loss": 0.3267, + "step": 44430 + }, + { + "epoch": 0.8894427345294398, + "grad_norm": 1.039182424545288, + "learning_rate": 3.170510495654583e-07, + "loss": 0.2646, + "step": 44431 + }, + { + "epoch": 0.8894627530465681, + "grad_norm": 1.9231892824172974, + "learning_rate": 3.1693745670779705e-07, + "loss": 0.7959, + "step": 44432 + }, + { + "epoch": 0.8894827715636964, + "grad_norm": 1.8992568254470825, + "learning_rate": 3.168238835367232e-07, + "loss": 0.7581, + "step": 44433 + }, + { + "epoch": 0.8895027900808248, + "grad_norm": 1.1953562498092651, + "learning_rate": 3.16710330052713e-07, + "loss": 0.3112, + "step": 44434 + }, + { + "epoch": 0.8895228085979531, + "grad_norm": 1.2654950618743896, + "learning_rate": 3.165967962562455e-07, + "loss": 0.2844, + "step": 44435 + }, + { + "epoch": 0.8895428271150815, + "grad_norm": 1.0448238849639893, + "learning_rate": 3.1648328214779754e-07, + "loss": 0.2614, + "step": 44436 + }, + { + "epoch": 0.8895628456322098, + "grad_norm": 1.1113935708999634, + "learning_rate": 3.1636978772784545e-07, + "loss": 0.2681, + "step": 44437 + }, + { + "epoch": 0.8895828641493382, + "grad_norm": 1.0424154996871948, + "learning_rate": 3.162563129968671e-07, + "loss": 0.2814, + "step": 44438 + }, + { + "epoch": 0.8896028826664665, + "grad_norm": 1.2569174766540527, + "learning_rate": 3.161428579553383e-07, + "loss": 0.2903, + "step": 44439 + }, + { + "epoch": 0.8896229011835948, + "grad_norm": 1.1403273344039917, + "learning_rate": 3.1602942260373806e-07, + "loss": 0.301, + "step": 44440 + }, + { + "epoch": 0.8896429197007232, + "grad_norm": 1.1770578622817993, + "learning_rate": 3.159160069425421e-07, + "loss": 0.3316, + "step": 44441 + }, + { + "epoch": 0.8896629382178515, + "grad_norm": 1.2981287240982056, + "learning_rate": 3.158026109722262e-07, + "loss": 0.3033, + "step": 44442 + }, + { + "epoch": 0.8896829567349799, + "grad_norm": 1.0188238620758057, + "learning_rate": 3.1568923469326827e-07, + "loss": 0.2643, + "step": 44443 + }, + { + "epoch": 0.8897029752521082, + "grad_norm": 1.9904202222824097, + "learning_rate": 3.155758781061452e-07, + "loss": 0.7367, + "step": 44444 + }, + { + "epoch": 0.8897229937692366, + "grad_norm": 1.0950727462768555, + "learning_rate": 3.1546254121133324e-07, + "loss": 0.2754, + "step": 44445 + }, + { + "epoch": 0.8897430122863649, + "grad_norm": 1.0915268659591675, + "learning_rate": 3.153492240093087e-07, + "loss": 0.2736, + "step": 44446 + }, + { + "epoch": 0.8897630308034933, + "grad_norm": 1.1244540214538574, + "learning_rate": 3.152359265005478e-07, + "loss": 0.2693, + "step": 44447 + }, + { + "epoch": 0.8897830493206216, + "grad_norm": 1.9087485074996948, + "learning_rate": 3.151226486855258e-07, + "loss": 0.7144, + "step": 44448 + }, + { + "epoch": 0.8898030678377499, + "grad_norm": 1.7593402862548828, + "learning_rate": 3.150093905647211e-07, + "loss": 0.7508, + "step": 44449 + }, + { + "epoch": 0.8898230863548783, + "grad_norm": 1.0554776191711426, + "learning_rate": 3.14896152138609e-07, + "loss": 0.3026, + "step": 44450 + }, + { + "epoch": 0.8898431048720066, + "grad_norm": 1.0904030799865723, + "learning_rate": 3.1478293340766464e-07, + "loss": 0.2898, + "step": 44451 + }, + { + "epoch": 0.889863123389135, + "grad_norm": 1.1549214124679565, + "learning_rate": 3.1466973437236425e-07, + "loss": 0.3017, + "step": 44452 + }, + { + "epoch": 0.8898831419062633, + "grad_norm": 1.1790218353271484, + "learning_rate": 3.145565550331847e-07, + "loss": 0.2914, + "step": 44453 + }, + { + "epoch": 0.8899031604233917, + "grad_norm": 1.1355841159820557, + "learning_rate": 3.1444339539060065e-07, + "loss": 0.299, + "step": 44454 + }, + { + "epoch": 0.88992317894052, + "grad_norm": 1.0184829235076904, + "learning_rate": 3.1433025544508776e-07, + "loss": 0.2613, + "step": 44455 + }, + { + "epoch": 0.8899431974576483, + "grad_norm": 1.0113826990127563, + "learning_rate": 3.1421713519712236e-07, + "loss": 0.2714, + "step": 44456 + }, + { + "epoch": 0.8899632159747767, + "grad_norm": 1.1967756748199463, + "learning_rate": 3.141040346471802e-07, + "loss": 0.2705, + "step": 44457 + }, + { + "epoch": 0.889983234491905, + "grad_norm": 1.250002384185791, + "learning_rate": 3.139909537957359e-07, + "loss": 0.2871, + "step": 44458 + }, + { + "epoch": 0.8900032530090334, + "grad_norm": 1.0073341131210327, + "learning_rate": 3.1387789264326565e-07, + "loss": 0.3141, + "step": 44459 + }, + { + "epoch": 0.8900232715261617, + "grad_norm": 1.0554553270339966, + "learning_rate": 3.137648511902441e-07, + "loss": 0.2639, + "step": 44460 + }, + { + "epoch": 0.8900432900432901, + "grad_norm": 1.2187913656234741, + "learning_rate": 3.13651829437146e-07, + "loss": 0.2696, + "step": 44461 + }, + { + "epoch": 0.8900633085604184, + "grad_norm": 1.1087201833724976, + "learning_rate": 3.1353882738444805e-07, + "loss": 0.2892, + "step": 44462 + }, + { + "epoch": 0.8900833270775468, + "grad_norm": 1.072535514831543, + "learning_rate": 3.134258450326244e-07, + "loss": 0.3217, + "step": 44463 + }, + { + "epoch": 0.8901033455946751, + "grad_norm": 1.0891810655593872, + "learning_rate": 3.133128823821496e-07, + "loss": 0.247, + "step": 44464 + }, + { + "epoch": 0.8901233641118034, + "grad_norm": 1.1456416845321655, + "learning_rate": 3.131999394334978e-07, + "loss": 0.2701, + "step": 44465 + }, + { + "epoch": 0.8901433826289318, + "grad_norm": 1.9572663307189941, + "learning_rate": 3.130870161871463e-07, + "loss": 0.7794, + "step": 44466 + }, + { + "epoch": 0.8901634011460601, + "grad_norm": 1.133335828781128, + "learning_rate": 3.129741126435676e-07, + "loss": 0.3077, + "step": 44467 + }, + { + "epoch": 0.8901834196631885, + "grad_norm": 0.9993687272071838, + "learning_rate": 3.1286122880323676e-07, + "loss": 0.2829, + "step": 44468 + }, + { + "epoch": 0.8902034381803168, + "grad_norm": 1.1411032676696777, + "learning_rate": 3.1274836466662963e-07, + "loss": 0.3072, + "step": 44469 + }, + { + "epoch": 0.8902234566974452, + "grad_norm": 1.0722641944885254, + "learning_rate": 3.126355202342185e-07, + "loss": 0.2716, + "step": 44470 + }, + { + "epoch": 0.8902434752145735, + "grad_norm": 1.1892014741897583, + "learning_rate": 3.1252269550647973e-07, + "loss": 0.3279, + "step": 44471 + }, + { + "epoch": 0.8902634937317018, + "grad_norm": 1.1040946245193481, + "learning_rate": 3.124098904838868e-07, + "loss": 0.3141, + "step": 44472 + }, + { + "epoch": 0.8902835122488302, + "grad_norm": 1.0862053632736206, + "learning_rate": 3.1229710516691434e-07, + "loss": 0.2642, + "step": 44473 + }, + { + "epoch": 0.8903035307659585, + "grad_norm": 1.075552225112915, + "learning_rate": 3.121843395560348e-07, + "loss": 0.2807, + "step": 44474 + }, + { + "epoch": 0.8903235492830869, + "grad_norm": 1.1374778747558594, + "learning_rate": 3.120715936517243e-07, + "loss": 0.3002, + "step": 44475 + }, + { + "epoch": 0.8903435678002152, + "grad_norm": 1.0346081256866455, + "learning_rate": 3.11958867454456e-07, + "loss": 0.264, + "step": 44476 + }, + { + "epoch": 0.8903635863173436, + "grad_norm": 1.0771394968032837, + "learning_rate": 3.118461609647039e-07, + "loss": 0.3029, + "step": 44477 + }, + { + "epoch": 0.8903836048344719, + "grad_norm": 1.2515628337860107, + "learning_rate": 3.1173347418294144e-07, + "loss": 0.2778, + "step": 44478 + }, + { + "epoch": 0.8904036233516003, + "grad_norm": 1.141185998916626, + "learning_rate": 3.116208071096416e-07, + "loss": 0.29, + "step": 44479 + }, + { + "epoch": 0.8904236418687286, + "grad_norm": 1.1551733016967773, + "learning_rate": 3.1150815974528016e-07, + "loss": 0.3219, + "step": 44480 + }, + { + "epoch": 0.8904436603858569, + "grad_norm": 1.0394854545593262, + "learning_rate": 3.113955320903284e-07, + "loss": 0.3024, + "step": 44481 + }, + { + "epoch": 0.8904636789029853, + "grad_norm": 1.0496721267700195, + "learning_rate": 3.112829241452614e-07, + "loss": 0.263, + "step": 44482 + }, + { + "epoch": 0.8904836974201136, + "grad_norm": 1.1933560371398926, + "learning_rate": 3.111703359105517e-07, + "loss": 0.294, + "step": 44483 + }, + { + "epoch": 0.890503715937242, + "grad_norm": 1.097851276397705, + "learning_rate": 3.110577673866733e-07, + "loss": 0.2913, + "step": 44484 + }, + { + "epoch": 0.8905237344543703, + "grad_norm": 1.0525397062301636, + "learning_rate": 3.109452185740991e-07, + "loss": 0.2956, + "step": 44485 + }, + { + "epoch": 0.8905437529714987, + "grad_norm": 1.1208797693252563, + "learning_rate": 3.108326894733016e-07, + "loss": 0.2738, + "step": 44486 + }, + { + "epoch": 0.890563771488627, + "grad_norm": 1.186903715133667, + "learning_rate": 3.1072018008475426e-07, + "loss": 0.2991, + "step": 44487 + }, + { + "epoch": 0.8905837900057553, + "grad_norm": 1.1293129920959473, + "learning_rate": 3.1060769040893055e-07, + "loss": 0.2641, + "step": 44488 + }, + { + "epoch": 0.8906038085228837, + "grad_norm": 1.1399012804031372, + "learning_rate": 3.10495220446303e-07, + "loss": 0.2843, + "step": 44489 + }, + { + "epoch": 0.890623827040012, + "grad_norm": 1.164185643196106, + "learning_rate": 3.1038277019734443e-07, + "loss": 0.2598, + "step": 44490 + }, + { + "epoch": 0.8906438455571404, + "grad_norm": 1.1608178615570068, + "learning_rate": 3.102703396625273e-07, + "loss": 0.2692, + "step": 44491 + }, + { + "epoch": 0.8906638640742687, + "grad_norm": 1.20218825340271, + "learning_rate": 3.10157928842324e-07, + "loss": 0.3072, + "step": 44492 + }, + { + "epoch": 0.8906838825913971, + "grad_norm": 1.1283186674118042, + "learning_rate": 3.1004553773720804e-07, + "loss": 0.3078, + "step": 44493 + }, + { + "epoch": 0.8907039011085254, + "grad_norm": 1.0779616832733154, + "learning_rate": 3.099331663476507e-07, + "loss": 0.2657, + "step": 44494 + }, + { + "epoch": 0.8907239196256538, + "grad_norm": 1.2010842561721802, + "learning_rate": 3.098208146741255e-07, + "loss": 0.251, + "step": 44495 + }, + { + "epoch": 0.8907439381427821, + "grad_norm": 1.2164064645767212, + "learning_rate": 3.0970848271710374e-07, + "loss": 0.3045, + "step": 44496 + }, + { + "epoch": 0.8907639566599104, + "grad_norm": 1.0118834972381592, + "learning_rate": 3.095961704770589e-07, + "loss": 0.2247, + "step": 44497 + }, + { + "epoch": 0.8907839751770388, + "grad_norm": 1.1286391019821167, + "learning_rate": 3.094838779544623e-07, + "loss": 0.2964, + "step": 44498 + }, + { + "epoch": 0.8908039936941671, + "grad_norm": 1.1893541812896729, + "learning_rate": 3.0937160514978636e-07, + "loss": 0.3405, + "step": 44499 + }, + { + "epoch": 0.8908240122112955, + "grad_norm": 1.0401760339736938, + "learning_rate": 3.092593520635023e-07, + "loss": 0.2582, + "step": 44500 + }, + { + "epoch": 0.8908440307284238, + "grad_norm": 1.1233999729156494, + "learning_rate": 3.0914711869608205e-07, + "loss": 0.3562, + "step": 44501 + }, + { + "epoch": 0.8908640492455522, + "grad_norm": 1.140811800956726, + "learning_rate": 3.090349050479985e-07, + "loss": 0.305, + "step": 44502 + }, + { + "epoch": 0.8908840677626805, + "grad_norm": 1.207554817199707, + "learning_rate": 3.0892271111972237e-07, + "loss": 0.3133, + "step": 44503 + }, + { + "epoch": 0.8909040862798088, + "grad_norm": 1.9055614471435547, + "learning_rate": 3.0881053691172616e-07, + "loss": 0.6843, + "step": 44504 + }, + { + "epoch": 0.8909241047969372, + "grad_norm": 1.242095708847046, + "learning_rate": 3.0869838242447993e-07, + "loss": 0.327, + "step": 44505 + }, + { + "epoch": 0.8909441233140655, + "grad_norm": 1.1193724870681763, + "learning_rate": 3.085862476584567e-07, + "loss": 0.2784, + "step": 44506 + }, + { + "epoch": 0.8909641418311939, + "grad_norm": 1.2009865045547485, + "learning_rate": 3.0847413261412664e-07, + "loss": 0.3135, + "step": 44507 + }, + { + "epoch": 0.8909841603483222, + "grad_norm": 1.0815823078155518, + "learning_rate": 3.083620372919627e-07, + "loss": 0.2802, + "step": 44508 + }, + { + "epoch": 0.8910041788654506, + "grad_norm": 1.091772198677063, + "learning_rate": 3.082499616924345e-07, + "loss": 0.2874, + "step": 44509 + }, + { + "epoch": 0.8910241973825789, + "grad_norm": 1.1118031740188599, + "learning_rate": 3.081379058160133e-07, + "loss": 0.3386, + "step": 44510 + }, + { + "epoch": 0.8910442158997073, + "grad_norm": 1.2237595319747925, + "learning_rate": 3.080258696631716e-07, + "loss": 0.2767, + "step": 44511 + }, + { + "epoch": 0.8910642344168356, + "grad_norm": 1.0681980848312378, + "learning_rate": 3.079138532343795e-07, + "loss": 0.28, + "step": 44512 + }, + { + "epoch": 0.8910842529339639, + "grad_norm": 1.1294364929199219, + "learning_rate": 3.0780185653010773e-07, + "loss": 0.3287, + "step": 44513 + }, + { + "epoch": 0.8911042714510923, + "grad_norm": 1.102419137954712, + "learning_rate": 3.0768987955082654e-07, + "loss": 0.2971, + "step": 44514 + }, + { + "epoch": 0.8911242899682206, + "grad_norm": 1.2654328346252441, + "learning_rate": 3.075779222970077e-07, + "loss": 0.3526, + "step": 44515 + }, + { + "epoch": 0.891144308485349, + "grad_norm": 1.1080121994018555, + "learning_rate": 3.0746598476912135e-07, + "loss": 0.2943, + "step": 44516 + }, + { + "epoch": 0.8911643270024773, + "grad_norm": 1.1737513542175293, + "learning_rate": 3.0735406696763836e-07, + "loss": 0.2909, + "step": 44517 + }, + { + "epoch": 0.8911843455196057, + "grad_norm": 1.1430182456970215, + "learning_rate": 3.072421688930283e-07, + "loss": 0.3014, + "step": 44518 + }, + { + "epoch": 0.891204364036734, + "grad_norm": 1.2362189292907715, + "learning_rate": 3.0713029054576346e-07, + "loss": 0.3155, + "step": 44519 + }, + { + "epoch": 0.8912243825538623, + "grad_norm": 1.1402627229690552, + "learning_rate": 3.070184319263114e-07, + "loss": 0.2992, + "step": 44520 + }, + { + "epoch": 0.8912444010709907, + "grad_norm": 1.090337872505188, + "learning_rate": 3.0690659303514505e-07, + "loss": 0.3075, + "step": 44521 + }, + { + "epoch": 0.891264419588119, + "grad_norm": 1.1971794366836548, + "learning_rate": 3.0679477387273337e-07, + "loss": 0.2795, + "step": 44522 + }, + { + "epoch": 0.8912844381052474, + "grad_norm": 1.1861613988876343, + "learning_rate": 3.066829744395461e-07, + "loss": 0.3248, + "step": 44523 + }, + { + "epoch": 0.8913044566223757, + "grad_norm": 1.3322632312774658, + "learning_rate": 3.065711947360539e-07, + "loss": 0.2302, + "step": 44524 + }, + { + "epoch": 0.8913244751395041, + "grad_norm": 1.1319223642349243, + "learning_rate": 3.0645943476272645e-07, + "loss": 0.2724, + "step": 44525 + }, + { + "epoch": 0.8913444936566324, + "grad_norm": 1.1424287557601929, + "learning_rate": 3.063476945200333e-07, + "loss": 0.3025, + "step": 44526 + }, + { + "epoch": 0.8913645121737608, + "grad_norm": 1.0510005950927734, + "learning_rate": 3.062359740084442e-07, + "loss": 0.3072, + "step": 44527 + }, + { + "epoch": 0.8913845306908891, + "grad_norm": 1.1579079627990723, + "learning_rate": 3.061242732284292e-07, + "loss": 0.2969, + "step": 44528 + }, + { + "epoch": 0.8914045492080174, + "grad_norm": 1.189153790473938, + "learning_rate": 3.060125921804574e-07, + "loss": 0.3203, + "step": 44529 + }, + { + "epoch": 0.8914245677251458, + "grad_norm": 1.1069749593734741, + "learning_rate": 3.0590093086499904e-07, + "loss": 0.3074, + "step": 44530 + }, + { + "epoch": 0.8914445862422741, + "grad_norm": 1.228304147720337, + "learning_rate": 3.0578928928252207e-07, + "loss": 0.2936, + "step": 44531 + }, + { + "epoch": 0.8914646047594025, + "grad_norm": 1.0717321634292603, + "learning_rate": 3.0567766743349714e-07, + "loss": 0.2823, + "step": 44532 + }, + { + "epoch": 0.8914846232765308, + "grad_norm": 1.1913410425186157, + "learning_rate": 3.055660653183928e-07, + "loss": 0.2993, + "step": 44533 + }, + { + "epoch": 0.8915046417936592, + "grad_norm": 1.1334890127182007, + "learning_rate": 3.0545448293767874e-07, + "loss": 0.2342, + "step": 44534 + }, + { + "epoch": 0.8915246603107875, + "grad_norm": 1.1157844066619873, + "learning_rate": 3.0534292029182457e-07, + "loss": 0.3321, + "step": 44535 + }, + { + "epoch": 0.8915446788279158, + "grad_norm": 1.1670687198638916, + "learning_rate": 3.05231377381297e-07, + "loss": 0.326, + "step": 44536 + }, + { + "epoch": 0.8915646973450442, + "grad_norm": 1.143570065498352, + "learning_rate": 3.0511985420656753e-07, + "loss": 0.2911, + "step": 44537 + }, + { + "epoch": 0.8915847158621725, + "grad_norm": 1.0190296173095703, + "learning_rate": 3.050083507681034e-07, + "loss": 0.2944, + "step": 44538 + }, + { + "epoch": 0.8916047343793009, + "grad_norm": 1.1266340017318726, + "learning_rate": 3.0489686706637377e-07, + "loss": 0.3098, + "step": 44539 + }, + { + "epoch": 0.8916247528964292, + "grad_norm": 1.0917785167694092, + "learning_rate": 3.047854031018471e-07, + "loss": 0.2889, + "step": 44540 + }, + { + "epoch": 0.8916447714135576, + "grad_norm": 1.1578574180603027, + "learning_rate": 3.046739588749925e-07, + "loss": 0.2539, + "step": 44541 + }, + { + "epoch": 0.8916647899306859, + "grad_norm": 1.1202340126037598, + "learning_rate": 3.0456253438627847e-07, + "loss": 0.2491, + "step": 44542 + }, + { + "epoch": 0.8916848084478143, + "grad_norm": 1.116097331047058, + "learning_rate": 3.0445112963617296e-07, + "loss": 0.2876, + "step": 44543 + }, + { + "epoch": 0.8917048269649426, + "grad_norm": 1.1472781896591187, + "learning_rate": 3.0433974462514336e-07, + "loss": 0.2883, + "step": 44544 + }, + { + "epoch": 0.8917248454820709, + "grad_norm": 1.1073509454727173, + "learning_rate": 3.042283793536599e-07, + "loss": 0.2847, + "step": 44545 + }, + { + "epoch": 0.8917448639991993, + "grad_norm": 1.17271888256073, + "learning_rate": 3.041170338221894e-07, + "loss": 0.3091, + "step": 44546 + }, + { + "epoch": 0.8917648825163276, + "grad_norm": 1.9479377269744873, + "learning_rate": 3.0400570803120033e-07, + "loss": 0.7583, + "step": 44547 + }, + { + "epoch": 0.891784901033456, + "grad_norm": 2.130554676055908, + "learning_rate": 3.038944019811613e-07, + "loss": 0.8346, + "step": 44548 + }, + { + "epoch": 0.8918049195505843, + "grad_norm": 1.0908452272415161, + "learning_rate": 3.037831156725385e-07, + "loss": 0.3068, + "step": 44549 + }, + { + "epoch": 0.8918249380677127, + "grad_norm": 1.3134026527404785, + "learning_rate": 3.036718491058016e-07, + "loss": 0.292, + "step": 44550 + }, + { + "epoch": 0.891844956584841, + "grad_norm": 1.0934902429580688, + "learning_rate": 3.0356060228141803e-07, + "loss": 0.3192, + "step": 44551 + }, + { + "epoch": 0.8918649751019693, + "grad_norm": 1.9045261144638062, + "learning_rate": 3.034493751998546e-07, + "loss": 0.754, + "step": 44552 + }, + { + "epoch": 0.8918849936190977, + "grad_norm": 1.2179510593414307, + "learning_rate": 3.0333816786157934e-07, + "loss": 0.2905, + "step": 44553 + }, + { + "epoch": 0.891905012136226, + "grad_norm": 1.0356446504592896, + "learning_rate": 3.03226980267059e-07, + "loss": 0.2928, + "step": 44554 + }, + { + "epoch": 0.8919250306533544, + "grad_norm": 1.1790863275527954, + "learning_rate": 3.0311581241676216e-07, + "loss": 0.3139, + "step": 44555 + }, + { + "epoch": 0.8919450491704827, + "grad_norm": 1.1215205192565918, + "learning_rate": 3.0300466431115615e-07, + "loss": 0.3062, + "step": 44556 + }, + { + "epoch": 0.8919650676876111, + "grad_norm": 1.2699934244155884, + "learning_rate": 3.028935359507068e-07, + "loss": 0.3185, + "step": 44557 + }, + { + "epoch": 0.8919850862047394, + "grad_norm": 1.1855835914611816, + "learning_rate": 3.027824273358826e-07, + "loss": 0.3203, + "step": 44558 + }, + { + "epoch": 0.8920051047218678, + "grad_norm": 1.0782815217971802, + "learning_rate": 3.026713384671498e-07, + "loss": 0.3028, + "step": 44559 + }, + { + "epoch": 0.8920251232389961, + "grad_norm": 1.3318723440170288, + "learning_rate": 3.025602693449764e-07, + "loss": 0.2954, + "step": 44560 + }, + { + "epoch": 0.8920451417561244, + "grad_norm": 1.0588748455047607, + "learning_rate": 3.0244921996982926e-07, + "loss": 0.3174, + "step": 44561 + }, + { + "epoch": 0.8920651602732528, + "grad_norm": 1.009445071220398, + "learning_rate": 3.0233819034217404e-07, + "loss": 0.2899, + "step": 44562 + }, + { + "epoch": 0.8920851787903811, + "grad_norm": 1.1297845840454102, + "learning_rate": 3.022271804624777e-07, + "loss": 0.3135, + "step": 44563 + }, + { + "epoch": 0.8921051973075095, + "grad_norm": 1.3437942266464233, + "learning_rate": 3.021161903312081e-07, + "loss": 0.3032, + "step": 44564 + }, + { + "epoch": 0.8921252158246378, + "grad_norm": 1.061867594718933, + "learning_rate": 3.020052199488305e-07, + "loss": 0.2729, + "step": 44565 + }, + { + "epoch": 0.8921452343417662, + "grad_norm": 1.125880479812622, + "learning_rate": 3.018942693158128e-07, + "loss": 0.3119, + "step": 44566 + }, + { + "epoch": 0.8921652528588945, + "grad_norm": 1.2856242656707764, + "learning_rate": 3.017833384326191e-07, + "loss": 0.357, + "step": 44567 + }, + { + "epoch": 0.8921852713760228, + "grad_norm": 1.1407393217086792, + "learning_rate": 3.016724272997179e-07, + "loss": 0.3307, + "step": 44568 + }, + { + "epoch": 0.8922052898931512, + "grad_norm": 1.0880964994430542, + "learning_rate": 3.01561535917575e-07, + "loss": 0.2982, + "step": 44569 + }, + { + "epoch": 0.8922253084102795, + "grad_norm": 1.0377939939498901, + "learning_rate": 3.014506642866555e-07, + "loss": 0.2544, + "step": 44570 + }, + { + "epoch": 0.8922453269274079, + "grad_norm": 1.1066231727600098, + "learning_rate": 3.013398124074274e-07, + "loss": 0.2999, + "step": 44571 + }, + { + "epoch": 0.8922653454445362, + "grad_norm": 1.0777534246444702, + "learning_rate": 3.012289802803542e-07, + "loss": 0.3336, + "step": 44572 + }, + { + "epoch": 0.8922853639616646, + "grad_norm": 1.1015782356262207, + "learning_rate": 3.0111816790590444e-07, + "loss": 0.28, + "step": 44573 + }, + { + "epoch": 0.8923053824787929, + "grad_norm": 1.0687705278396606, + "learning_rate": 3.0100737528454216e-07, + "loss": 0.2804, + "step": 44574 + }, + { + "epoch": 0.8923254009959213, + "grad_norm": 1.149383783340454, + "learning_rate": 3.0089660241673424e-07, + "loss": 0.313, + "step": 44575 + }, + { + "epoch": 0.8923454195130496, + "grad_norm": 1.274428129196167, + "learning_rate": 3.0078584930294473e-07, + "loss": 0.3778, + "step": 44576 + }, + { + "epoch": 0.8923654380301779, + "grad_norm": 1.0491100549697876, + "learning_rate": 3.0067511594364053e-07, + "loss": 0.3068, + "step": 44577 + }, + { + "epoch": 0.8923854565473063, + "grad_norm": 1.201869249343872, + "learning_rate": 3.0056440233928727e-07, + "loss": 0.3071, + "step": 44578 + }, + { + "epoch": 0.8924054750644346, + "grad_norm": 1.0059088468551636, + "learning_rate": 3.0045370849035026e-07, + "loss": 0.2759, + "step": 44579 + }, + { + "epoch": 0.892425493581563, + "grad_norm": 1.253221035003662, + "learning_rate": 3.0034303439729296e-07, + "loss": 0.3313, + "step": 44580 + }, + { + "epoch": 0.8924455120986913, + "grad_norm": 1.8215361833572388, + "learning_rate": 3.0023238006058333e-07, + "loss": 0.6938, + "step": 44581 + }, + { + "epoch": 0.8924655306158197, + "grad_norm": 1.2358556985855103, + "learning_rate": 3.001217454806854e-07, + "loss": 0.3087, + "step": 44582 + }, + { + "epoch": 0.892485549132948, + "grad_norm": 1.1720681190490723, + "learning_rate": 3.000111306580639e-07, + "loss": 0.2661, + "step": 44583 + }, + { + "epoch": 0.8925055676500763, + "grad_norm": 1.0951979160308838, + "learning_rate": 2.9990053559318444e-07, + "loss": 0.3108, + "step": 44584 + }, + { + "epoch": 0.8925255861672047, + "grad_norm": 1.0165555477142334, + "learning_rate": 2.997899602865106e-07, + "loss": 0.2809, + "step": 44585 + }, + { + "epoch": 0.892545604684333, + "grad_norm": 1.2408699989318848, + "learning_rate": 2.9967940473850934e-07, + "loss": 0.262, + "step": 44586 + }, + { + "epoch": 0.8925656232014614, + "grad_norm": 1.1446871757507324, + "learning_rate": 2.9956886894964453e-07, + "loss": 0.3227, + "step": 44587 + }, + { + "epoch": 0.8925856417185897, + "grad_norm": 1.0963714122772217, + "learning_rate": 2.994583529203804e-07, + "loss": 0.3106, + "step": 44588 + }, + { + "epoch": 0.8926056602357181, + "grad_norm": 1.0758979320526123, + "learning_rate": 2.993478566511809e-07, + "loss": 0.2302, + "step": 44589 + }, + { + "epoch": 0.8926256787528464, + "grad_norm": 1.1257933378219604, + "learning_rate": 2.9923738014251244e-07, + "loss": 0.2934, + "step": 44590 + }, + { + "epoch": 0.8926456972699748, + "grad_norm": 1.1268596649169922, + "learning_rate": 2.991269233948385e-07, + "loss": 0.3106, + "step": 44591 + }, + { + "epoch": 0.8926657157871031, + "grad_norm": 1.0878605842590332, + "learning_rate": 2.9901648640862314e-07, + "loss": 0.3025, + "step": 44592 + }, + { + "epoch": 0.8926857343042314, + "grad_norm": 1.0730496644973755, + "learning_rate": 2.9890606918433094e-07, + "loss": 0.2749, + "step": 44593 + }, + { + "epoch": 0.8927057528213598, + "grad_norm": 1.0563112497329712, + "learning_rate": 2.9879567172242495e-07, + "loss": 0.3086, + "step": 44594 + }, + { + "epoch": 0.8927257713384881, + "grad_norm": 1.0995863676071167, + "learning_rate": 2.9868529402337145e-07, + "loss": 0.2846, + "step": 44595 + }, + { + "epoch": 0.8927457898556165, + "grad_norm": 1.07853364944458, + "learning_rate": 2.985749360876322e-07, + "loss": 0.276, + "step": 44596 + }, + { + "epoch": 0.8927658083727448, + "grad_norm": 1.0088971853256226, + "learning_rate": 2.9846459791567304e-07, + "loss": 0.2845, + "step": 44597 + }, + { + "epoch": 0.8927858268898732, + "grad_norm": 2.027386426925659, + "learning_rate": 2.983542795079558e-07, + "loss": 0.7801, + "step": 44598 + }, + { + "epoch": 0.8928058454070015, + "grad_norm": 1.0316020250320435, + "learning_rate": 2.982439808649462e-07, + "loss": 0.2636, + "step": 44599 + }, + { + "epoch": 0.8928258639241298, + "grad_norm": 1.0169932842254639, + "learning_rate": 2.981337019871072e-07, + "loss": 0.2331, + "step": 44600 + }, + { + "epoch": 0.8928458824412582, + "grad_norm": 1.056185007095337, + "learning_rate": 2.9802344287490235e-07, + "loss": 0.3057, + "step": 44601 + }, + { + "epoch": 0.8928659009583865, + "grad_norm": 1.9770827293395996, + "learning_rate": 2.979132035287946e-07, + "loss": 0.6924, + "step": 44602 + }, + { + "epoch": 0.8928859194755149, + "grad_norm": 1.4396460056304932, + "learning_rate": 2.97802983949248e-07, + "loss": 0.3307, + "step": 44603 + }, + { + "epoch": 0.8929059379926432, + "grad_norm": 1.079951524734497, + "learning_rate": 2.976927841367261e-07, + "loss": 0.3274, + "step": 44604 + }, + { + "epoch": 0.8929259565097716, + "grad_norm": 1.8987360000610352, + "learning_rate": 2.975826040916918e-07, + "loss": 0.7487, + "step": 44605 + }, + { + "epoch": 0.8929459750268999, + "grad_norm": 1.0637011528015137, + "learning_rate": 2.9747244381460815e-07, + "loss": 0.3008, + "step": 44606 + }, + { + "epoch": 0.8929659935440283, + "grad_norm": 1.2130225896835327, + "learning_rate": 2.973623033059375e-07, + "loss": 0.2851, + "step": 44607 + }, + { + "epoch": 0.8929860120611566, + "grad_norm": 1.1813563108444214, + "learning_rate": 2.972521825661445e-07, + "loss": 0.2948, + "step": 44608 + }, + { + "epoch": 0.8930060305782849, + "grad_norm": 1.1378272771835327, + "learning_rate": 2.971420815956905e-07, + "loss": 0.2524, + "step": 44609 + }, + { + "epoch": 0.8930260490954133, + "grad_norm": 1.0894362926483154, + "learning_rate": 2.9703200039504e-07, + "loss": 0.247, + "step": 44610 + }, + { + "epoch": 0.8930460676125416, + "grad_norm": 1.2118948698043823, + "learning_rate": 2.9692193896465435e-07, + "loss": 0.2954, + "step": 44611 + }, + { + "epoch": 0.89306608612967, + "grad_norm": 1.3466540575027466, + "learning_rate": 2.9681189730499716e-07, + "loss": 0.3045, + "step": 44612 + }, + { + "epoch": 0.8930861046467983, + "grad_norm": 1.220012903213501, + "learning_rate": 2.967018754165307e-07, + "loss": 0.3058, + "step": 44613 + }, + { + "epoch": 0.8931061231639267, + "grad_norm": 1.0374435186386108, + "learning_rate": 2.9659187329971753e-07, + "loss": 0.2835, + "step": 44614 + }, + { + "epoch": 0.893126141681055, + "grad_norm": 1.2131215333938599, + "learning_rate": 2.964818909550193e-07, + "loss": 0.2569, + "step": 44615 + }, + { + "epoch": 0.8931461601981833, + "grad_norm": 1.930864930152893, + "learning_rate": 2.963719283828992e-07, + "loss": 0.8271, + "step": 44616 + }, + { + "epoch": 0.8931661787153117, + "grad_norm": 1.1912846565246582, + "learning_rate": 2.9626198558381945e-07, + "loss": 0.301, + "step": 44617 + }, + { + "epoch": 0.89318619723244, + "grad_norm": 0.9936534762382507, + "learning_rate": 2.96152062558242e-07, + "loss": 0.2615, + "step": 44618 + }, + { + "epoch": 0.8932062157495684, + "grad_norm": 1.0945500135421753, + "learning_rate": 2.9604215930662925e-07, + "loss": 0.2314, + "step": 44619 + }, + { + "epoch": 0.8932262342666967, + "grad_norm": 1.1374059915542603, + "learning_rate": 2.9593227582944253e-07, + "loss": 0.306, + "step": 44620 + }, + { + "epoch": 0.8932462527838251, + "grad_norm": 1.191551923751831, + "learning_rate": 2.958224121271441e-07, + "loss": 0.2521, + "step": 44621 + }, + { + "epoch": 0.8932662713009534, + "grad_norm": 1.2500007152557373, + "learning_rate": 2.95712568200196e-07, + "loss": 0.2949, + "step": 44622 + }, + { + "epoch": 0.8932862898180818, + "grad_norm": 1.0945290327072144, + "learning_rate": 2.9560274404905996e-07, + "loss": 0.2822, + "step": 44623 + }, + { + "epoch": 0.8933063083352101, + "grad_norm": 1.2309342622756958, + "learning_rate": 2.954929396741979e-07, + "loss": 0.2935, + "step": 44624 + }, + { + "epoch": 0.8933263268523384, + "grad_norm": 1.2358238697052002, + "learning_rate": 2.953831550760705e-07, + "loss": 0.2696, + "step": 44625 + }, + { + "epoch": 0.8933463453694668, + "grad_norm": 1.1607974767684937, + "learning_rate": 2.9527339025514023e-07, + "loss": 0.2968, + "step": 44626 + }, + { + "epoch": 0.8933663638865951, + "grad_norm": 1.0631864070892334, + "learning_rate": 2.9516364521186834e-07, + "loss": 0.2845, + "step": 44627 + }, + { + "epoch": 0.8933863824037235, + "grad_norm": 0.9860683083534241, + "learning_rate": 2.950539199467162e-07, + "loss": 0.2698, + "step": 44628 + }, + { + "epoch": 0.8934064009208518, + "grad_norm": 1.0895475149154663, + "learning_rate": 2.949442144601439e-07, + "loss": 0.2756, + "step": 44629 + }, + { + "epoch": 0.8934264194379802, + "grad_norm": 1.1507185697555542, + "learning_rate": 2.948345287526144e-07, + "loss": 0.2932, + "step": 44630 + }, + { + "epoch": 0.8934464379551085, + "grad_norm": 1.177942156791687, + "learning_rate": 2.94724862824588e-07, + "loss": 0.2538, + "step": 44631 + }, + { + "epoch": 0.8934664564722368, + "grad_norm": 1.1816152334213257, + "learning_rate": 2.9461521667652536e-07, + "loss": 0.2522, + "step": 44632 + }, + { + "epoch": 0.8934864749893652, + "grad_norm": 1.9568976163864136, + "learning_rate": 2.9450559030888724e-07, + "loss": 0.7474, + "step": 44633 + }, + { + "epoch": 0.8935064935064935, + "grad_norm": 1.17697274684906, + "learning_rate": 2.9439598372213606e-07, + "loss": 0.2527, + "step": 44634 + }, + { + "epoch": 0.8935265120236219, + "grad_norm": 1.9342446327209473, + "learning_rate": 2.9428639691673034e-07, + "loss": 0.801, + "step": 44635 + }, + { + "epoch": 0.8935465305407502, + "grad_norm": 1.8750919103622437, + "learning_rate": 2.94176829893133e-07, + "loss": 0.7112, + "step": 44636 + }, + { + "epoch": 0.8935665490578786, + "grad_norm": 1.130165696144104, + "learning_rate": 2.9406728265180315e-07, + "loss": 0.3056, + "step": 44637 + }, + { + "epoch": 0.8935865675750069, + "grad_norm": 1.8121836185455322, + "learning_rate": 2.939577551932016e-07, + "loss": 0.7834, + "step": 44638 + }, + { + "epoch": 0.8936065860921353, + "grad_norm": 1.1859111785888672, + "learning_rate": 2.9384824751778896e-07, + "loss": 0.2576, + "step": 44639 + }, + { + "epoch": 0.8936266046092636, + "grad_norm": 1.1430327892303467, + "learning_rate": 2.937387596260255e-07, + "loss": 0.2973, + "step": 44640 + }, + { + "epoch": 0.8936466231263919, + "grad_norm": 1.1571495532989502, + "learning_rate": 2.936292915183719e-07, + "loss": 0.2812, + "step": 44641 + }, + { + "epoch": 0.8936666416435203, + "grad_norm": 1.1342692375183105, + "learning_rate": 2.935198431952874e-07, + "loss": 0.2849, + "step": 44642 + }, + { + "epoch": 0.8936866601606486, + "grad_norm": 1.2762203216552734, + "learning_rate": 2.934104146572331e-07, + "loss": 0.2892, + "step": 44643 + }, + { + "epoch": 0.893706678677777, + "grad_norm": 1.066110372543335, + "learning_rate": 2.9330100590466814e-07, + "loss": 0.2287, + "step": 44644 + }, + { + "epoch": 0.8937266971949053, + "grad_norm": 1.2076411247253418, + "learning_rate": 2.9319161693805334e-07, + "loss": 0.2799, + "step": 44645 + }, + { + "epoch": 0.8937467157120337, + "grad_norm": 1.061935305595398, + "learning_rate": 2.9308224775784713e-07, + "loss": 0.2741, + "step": 44646 + }, + { + "epoch": 0.893766734229162, + "grad_norm": 1.000795602798462, + "learning_rate": 2.929728983645108e-07, + "loss": 0.268, + "step": 44647 + }, + { + "epoch": 0.8937867527462903, + "grad_norm": 1.0419580936431885, + "learning_rate": 2.92863568758503e-07, + "loss": 0.2966, + "step": 44648 + }, + { + "epoch": 0.8938067712634187, + "grad_norm": 1.1690291166305542, + "learning_rate": 2.927542589402843e-07, + "loss": 0.3161, + "step": 44649 + }, + { + "epoch": 0.893826789780547, + "grad_norm": 1.0855562686920166, + "learning_rate": 2.926449689103139e-07, + "loss": 0.295, + "step": 44650 + }, + { + "epoch": 0.8938468082976754, + "grad_norm": 1.0320242643356323, + "learning_rate": 2.925356986690503e-07, + "loss": 0.2687, + "step": 44651 + }, + { + "epoch": 0.8938668268148037, + "grad_norm": 1.0797635316848755, + "learning_rate": 2.924264482169542e-07, + "loss": 0.2732, + "step": 44652 + }, + { + "epoch": 0.8938868453319321, + "grad_norm": 1.1719231605529785, + "learning_rate": 2.9231721755448415e-07, + "loss": 0.2515, + "step": 44653 + }, + { + "epoch": 0.8939068638490604, + "grad_norm": 1.0896738767623901, + "learning_rate": 2.922080066820998e-07, + "loss": 0.2797, + "step": 44654 + }, + { + "epoch": 0.8939268823661888, + "grad_norm": 1.0052900314331055, + "learning_rate": 2.9209881560025966e-07, + "loss": 0.2862, + "step": 44655 + }, + { + "epoch": 0.8939469008833171, + "grad_norm": 1.0871316194534302, + "learning_rate": 2.9198964430942165e-07, + "loss": 0.3064, + "step": 44656 + }, + { + "epoch": 0.8939669194004454, + "grad_norm": 1.0647361278533936, + "learning_rate": 2.918804928100472e-07, + "loss": 0.3199, + "step": 44657 + }, + { + "epoch": 0.8939869379175738, + "grad_norm": 1.208267331123352, + "learning_rate": 2.917713611025941e-07, + "loss": 0.2728, + "step": 44658 + }, + { + "epoch": 0.8940069564347021, + "grad_norm": 1.0079171657562256, + "learning_rate": 2.916622491875198e-07, + "loss": 0.2576, + "step": 44659 + }, + { + "epoch": 0.8940269749518305, + "grad_norm": 1.11976957321167, + "learning_rate": 2.9155315706528576e-07, + "loss": 0.2879, + "step": 44660 + }, + { + "epoch": 0.8940469934689588, + "grad_norm": 1.1410037279129028, + "learning_rate": 2.9144408473634756e-07, + "loss": 0.282, + "step": 44661 + }, + { + "epoch": 0.8940670119860872, + "grad_norm": 1.0655996799468994, + "learning_rate": 2.91335032201166e-07, + "loss": 0.3067, + "step": 44662 + }, + { + "epoch": 0.8940870305032155, + "grad_norm": 1.0194406509399414, + "learning_rate": 2.912259994601985e-07, + "loss": 0.2841, + "step": 44663 + }, + { + "epoch": 0.8941070490203438, + "grad_norm": 1.111738681793213, + "learning_rate": 2.91116986513903e-07, + "loss": 0.2957, + "step": 44664 + }, + { + "epoch": 0.8941270675374722, + "grad_norm": 1.1075024604797363, + "learning_rate": 2.910079933627391e-07, + "loss": 0.2858, + "step": 44665 + }, + { + "epoch": 0.8941470860546005, + "grad_norm": 1.0736886262893677, + "learning_rate": 2.9089902000716376e-07, + "loss": 0.2743, + "step": 44666 + }, + { + "epoch": 0.8941671045717289, + "grad_norm": 1.0892691612243652, + "learning_rate": 2.9079006644763595e-07, + "loss": 0.2965, + "step": 44667 + }, + { + "epoch": 0.8941871230888572, + "grad_norm": 1.1141732931137085, + "learning_rate": 2.906811326846132e-07, + "loss": 0.2476, + "step": 44668 + }, + { + "epoch": 0.8942071416059856, + "grad_norm": 1.199562907218933, + "learning_rate": 2.905722187185528e-07, + "loss": 0.3183, + "step": 44669 + }, + { + "epoch": 0.8942271601231139, + "grad_norm": 1.0695759057998657, + "learning_rate": 2.9046332454991386e-07, + "loss": 0.2944, + "step": 44670 + }, + { + "epoch": 0.8942471786402423, + "grad_norm": 1.0852218866348267, + "learning_rate": 2.9035445017915387e-07, + "loss": 0.3051, + "step": 44671 + }, + { + "epoch": 0.8942671971573706, + "grad_norm": 1.2839431762695312, + "learning_rate": 2.902455956067296e-07, + "loss": 0.319, + "step": 44672 + }, + { + "epoch": 0.8942872156744989, + "grad_norm": 1.1164391040802002, + "learning_rate": 2.901367608330996e-07, + "loss": 0.2733, + "step": 44673 + }, + { + "epoch": 0.8943072341916273, + "grad_norm": 1.1982749700546265, + "learning_rate": 2.9002794585872075e-07, + "loss": 0.2928, + "step": 44674 + }, + { + "epoch": 0.8943272527087556, + "grad_norm": 1.0470750331878662, + "learning_rate": 2.899191506840515e-07, + "loss": 0.2671, + "step": 44675 + }, + { + "epoch": 0.894347271225884, + "grad_norm": 1.0731244087219238, + "learning_rate": 2.8981037530954825e-07, + "loss": 0.3263, + "step": 44676 + }, + { + "epoch": 0.8943672897430123, + "grad_norm": 1.0844371318817139, + "learning_rate": 2.897016197356689e-07, + "loss": 0.3178, + "step": 44677 + }, + { + "epoch": 0.8943873082601407, + "grad_norm": 1.1766421794891357, + "learning_rate": 2.895928839628692e-07, + "loss": 0.3086, + "step": 44678 + }, + { + "epoch": 0.894407326777269, + "grad_norm": 1.092698335647583, + "learning_rate": 2.894841679916083e-07, + "loss": 0.2472, + "step": 44679 + }, + { + "epoch": 0.8944273452943973, + "grad_norm": 1.1276707649230957, + "learning_rate": 2.8937547182234183e-07, + "loss": 0.3092, + "step": 44680 + }, + { + "epoch": 0.8944473638115257, + "grad_norm": 1.0528990030288696, + "learning_rate": 2.892667954555278e-07, + "loss": 0.2505, + "step": 44681 + }, + { + "epoch": 0.894467382328654, + "grad_norm": 1.0961155891418457, + "learning_rate": 2.891581388916209e-07, + "loss": 0.2708, + "step": 44682 + }, + { + "epoch": 0.8944874008457824, + "grad_norm": 1.1018781661987305, + "learning_rate": 2.8904950213108074e-07, + "loss": 0.2682, + "step": 44683 + }, + { + "epoch": 0.8945074193629107, + "grad_norm": 1.8846124410629272, + "learning_rate": 2.889408851743625e-07, + "loss": 0.7315, + "step": 44684 + }, + { + "epoch": 0.8945274378800391, + "grad_norm": 1.1836400032043457, + "learning_rate": 2.8883228802192244e-07, + "loss": 0.2706, + "step": 44685 + }, + { + "epoch": 0.8945474563971674, + "grad_norm": 1.260042428970337, + "learning_rate": 2.88723710674218e-07, + "loss": 0.2773, + "step": 44686 + }, + { + "epoch": 0.8945674749142957, + "grad_norm": 1.070786714553833, + "learning_rate": 2.886151531317044e-07, + "loss": 0.2899, + "step": 44687 + }, + { + "epoch": 0.8945874934314241, + "grad_norm": 1.0759093761444092, + "learning_rate": 2.885066153948396e-07, + "loss": 0.2861, + "step": 44688 + }, + { + "epoch": 0.8946075119485524, + "grad_norm": 1.0293259620666504, + "learning_rate": 2.8839809746407934e-07, + "loss": 0.2993, + "step": 44689 + }, + { + "epoch": 0.8946275304656808, + "grad_norm": 1.1957635879516602, + "learning_rate": 2.882895993398793e-07, + "loss": 0.2843, + "step": 44690 + }, + { + "epoch": 0.8946475489828091, + "grad_norm": 1.1707683801651, + "learning_rate": 2.881811210226948e-07, + "loss": 0.3007, + "step": 44691 + }, + { + "epoch": 0.8946675674999375, + "grad_norm": 1.0902565717697144, + "learning_rate": 2.8807266251298426e-07, + "loss": 0.2651, + "step": 44692 + }, + { + "epoch": 0.8946875860170658, + "grad_norm": 1.1216264963150024, + "learning_rate": 2.879642238112018e-07, + "loss": 0.2617, + "step": 44693 + }, + { + "epoch": 0.8947076045341942, + "grad_norm": 1.2761842012405396, + "learning_rate": 2.878558049178037e-07, + "loss": 0.2996, + "step": 44694 + }, + { + "epoch": 0.8947276230513225, + "grad_norm": 1.3982394933700562, + "learning_rate": 2.8774740583324524e-07, + "loss": 0.2836, + "step": 44695 + }, + { + "epoch": 0.8947476415684508, + "grad_norm": 1.0799113512039185, + "learning_rate": 2.8763902655798313e-07, + "loss": 0.3391, + "step": 44696 + }, + { + "epoch": 0.8947676600855792, + "grad_norm": 1.2678381204605103, + "learning_rate": 2.8753066709247214e-07, + "loss": 0.2851, + "step": 44697 + }, + { + "epoch": 0.8947876786027075, + "grad_norm": 1.0970640182495117, + "learning_rate": 2.8742232743716793e-07, + "loss": 0.2901, + "step": 44698 + }, + { + "epoch": 0.8948076971198359, + "grad_norm": 1.1153830289840698, + "learning_rate": 2.8731400759252684e-07, + "loss": 0.3207, + "step": 44699 + }, + { + "epoch": 0.8948277156369642, + "grad_norm": 1.1193290948867798, + "learning_rate": 2.8720570755900234e-07, + "loss": 0.2848, + "step": 44700 + }, + { + "epoch": 0.8948477341540926, + "grad_norm": 1.2220664024353027, + "learning_rate": 2.8709742733705136e-07, + "loss": 0.3197, + "step": 44701 + }, + { + "epoch": 0.8948677526712209, + "grad_norm": 1.147994041442871, + "learning_rate": 2.8698916692712907e-07, + "loss": 0.2573, + "step": 44702 + }, + { + "epoch": 0.8948877711883492, + "grad_norm": 1.2244737148284912, + "learning_rate": 2.868809263296901e-07, + "loss": 0.33, + "step": 44703 + }, + { + "epoch": 0.8949077897054776, + "grad_norm": 1.1872117519378662, + "learning_rate": 2.8677270554518856e-07, + "loss": 0.2904, + "step": 44704 + }, + { + "epoch": 0.8949278082226059, + "grad_norm": 1.2010457515716553, + "learning_rate": 2.866645045740812e-07, + "loss": 0.3706, + "step": 44705 + }, + { + "epoch": 0.8949478267397343, + "grad_norm": 1.1503294706344604, + "learning_rate": 2.8655632341682173e-07, + "loss": 0.3091, + "step": 44706 + }, + { + "epoch": 0.8949678452568626, + "grad_norm": 1.148463249206543, + "learning_rate": 2.8644816207386517e-07, + "loss": 0.2886, + "step": 44707 + }, + { + "epoch": 0.894987863773991, + "grad_norm": 1.030400276184082, + "learning_rate": 2.863400205456657e-07, + "loss": 0.3066, + "step": 44708 + }, + { + "epoch": 0.8950078822911193, + "grad_norm": 1.1986570358276367, + "learning_rate": 2.8623189883267843e-07, + "loss": 0.3164, + "step": 44709 + }, + { + "epoch": 0.8950279008082477, + "grad_norm": 1.9193989038467407, + "learning_rate": 2.861237969353581e-07, + "loss": 0.7942, + "step": 44710 + }, + { + "epoch": 0.895047919325376, + "grad_norm": 1.145930528640747, + "learning_rate": 2.860157148541581e-07, + "loss": 0.3167, + "step": 44711 + }, + { + "epoch": 0.8950679378425043, + "grad_norm": 1.1304941177368164, + "learning_rate": 2.859076525895349e-07, + "loss": 0.292, + "step": 44712 + }, + { + "epoch": 0.8950879563596327, + "grad_norm": 1.1735374927520752, + "learning_rate": 2.8579961014194027e-07, + "loss": 0.3012, + "step": 44713 + }, + { + "epoch": 0.895107974876761, + "grad_norm": 1.2221037149429321, + "learning_rate": 2.8569158751183046e-07, + "loss": 0.3349, + "step": 44714 + }, + { + "epoch": 0.8951279933938894, + "grad_norm": 1.1336032152175903, + "learning_rate": 2.855835846996585e-07, + "loss": 0.276, + "step": 44715 + }, + { + "epoch": 0.8951480119110177, + "grad_norm": 1.2088242769241333, + "learning_rate": 2.854756017058785e-07, + "loss": 0.3409, + "step": 44716 + }, + { + "epoch": 0.8951680304281461, + "grad_norm": 1.1546740531921387, + "learning_rate": 2.8536763853094394e-07, + "loss": 0.2871, + "step": 44717 + }, + { + "epoch": 0.8951880489452744, + "grad_norm": 1.012495517730713, + "learning_rate": 2.8525969517531005e-07, + "loss": 0.276, + "step": 44718 + }, + { + "epoch": 0.8952080674624027, + "grad_norm": 1.4491931200027466, + "learning_rate": 2.851517716394298e-07, + "loss": 0.3069, + "step": 44719 + }, + { + "epoch": 0.8952280859795311, + "grad_norm": 2.071211338043213, + "learning_rate": 2.850438679237566e-07, + "loss": 0.7024, + "step": 44720 + }, + { + "epoch": 0.8952481044966594, + "grad_norm": 1.0722837448120117, + "learning_rate": 2.849359840287447e-07, + "loss": 0.3007, + "step": 44721 + }, + { + "epoch": 0.8952681230137878, + "grad_norm": 1.138691782951355, + "learning_rate": 2.8482811995484584e-07, + "loss": 0.2637, + "step": 44722 + }, + { + "epoch": 0.8952881415309161, + "grad_norm": 1.1706794500350952, + "learning_rate": 2.847202757025164e-07, + "loss": 0.3236, + "step": 44723 + }, + { + "epoch": 0.8953081600480445, + "grad_norm": 1.8771579265594482, + "learning_rate": 2.846124512722065e-07, + "loss": 0.7536, + "step": 44724 + }, + { + "epoch": 0.8953281785651728, + "grad_norm": 1.1150658130645752, + "learning_rate": 2.845046466643725e-07, + "loss": 0.2728, + "step": 44725 + }, + { + "epoch": 0.8953481970823012, + "grad_norm": 1.2863537073135376, + "learning_rate": 2.843968618794657e-07, + "loss": 0.2851, + "step": 44726 + }, + { + "epoch": 0.8953682155994295, + "grad_norm": 1.1178590059280396, + "learning_rate": 2.842890969179396e-07, + "loss": 0.2938, + "step": 44727 + }, + { + "epoch": 0.8953882341165578, + "grad_norm": 1.419823169708252, + "learning_rate": 2.8418135178024776e-07, + "loss": 0.2876, + "step": 44728 + }, + { + "epoch": 0.8954082526336862, + "grad_norm": 1.1542859077453613, + "learning_rate": 2.840736264668426e-07, + "loss": 0.2867, + "step": 44729 + }, + { + "epoch": 0.8954282711508145, + "grad_norm": 1.8215866088867188, + "learning_rate": 2.83965920978177e-07, + "loss": 0.7263, + "step": 44730 + }, + { + "epoch": 0.8954482896679429, + "grad_norm": 1.0625888109207153, + "learning_rate": 2.8385823531470345e-07, + "loss": 0.3101, + "step": 44731 + }, + { + "epoch": 0.8954683081850712, + "grad_norm": 1.048191785812378, + "learning_rate": 2.83750569476875e-07, + "loss": 0.3031, + "step": 44732 + }, + { + "epoch": 0.8954883267021996, + "grad_norm": 1.166987657546997, + "learning_rate": 2.83642923465145e-07, + "loss": 0.2921, + "step": 44733 + }, + { + "epoch": 0.8955083452193279, + "grad_norm": 1.0899684429168701, + "learning_rate": 2.835352972799643e-07, + "loss": 0.285, + "step": 44734 + }, + { + "epoch": 0.8955283637364562, + "grad_norm": 1.1625369787216187, + "learning_rate": 2.834276909217859e-07, + "loss": 0.3181, + "step": 44735 + }, + { + "epoch": 0.8955483822535846, + "grad_norm": 1.1770858764648438, + "learning_rate": 2.833201043910633e-07, + "loss": 0.2998, + "step": 44736 + }, + { + "epoch": 0.8955684007707129, + "grad_norm": 1.4073978662490845, + "learning_rate": 2.832125376882472e-07, + "loss": 0.3112, + "step": 44737 + }, + { + "epoch": 0.8955884192878413, + "grad_norm": 1.0557059049606323, + "learning_rate": 2.831049908137912e-07, + "loss": 0.2963, + "step": 44738 + }, + { + "epoch": 0.8956084378049696, + "grad_norm": 1.8897722959518433, + "learning_rate": 2.8299746376814653e-07, + "loss": 0.7619, + "step": 44739 + }, + { + "epoch": 0.895628456322098, + "grad_norm": 1.163428544998169, + "learning_rate": 2.828899565517651e-07, + "loss": 0.3251, + "step": 44740 + }, + { + "epoch": 0.8956484748392263, + "grad_norm": 0.9865432977676392, + "learning_rate": 2.827824691650993e-07, + "loss": 0.2818, + "step": 44741 + }, + { + "epoch": 0.8956684933563547, + "grad_norm": 1.1687678098678589, + "learning_rate": 2.826750016086016e-07, + "loss": 0.2359, + "step": 44742 + }, + { + "epoch": 0.895688511873483, + "grad_norm": 1.1188980340957642, + "learning_rate": 2.825675538827227e-07, + "loss": 0.2805, + "step": 44743 + }, + { + "epoch": 0.8957085303906113, + "grad_norm": 1.1778024435043335, + "learning_rate": 2.824601259879134e-07, + "loss": 0.3204, + "step": 44744 + }, + { + "epoch": 0.8957285489077397, + "grad_norm": 1.093585729598999, + "learning_rate": 2.823527179246277e-07, + "loss": 0.2866, + "step": 44745 + }, + { + "epoch": 0.895748567424868, + "grad_norm": 1.2238788604736328, + "learning_rate": 2.8224532969331587e-07, + "loss": 0.2506, + "step": 44746 + }, + { + "epoch": 0.8957685859419964, + "grad_norm": 1.0921262502670288, + "learning_rate": 2.8213796129442915e-07, + "loss": 0.3159, + "step": 44747 + }, + { + "epoch": 0.8957886044591247, + "grad_norm": 1.1034027338027954, + "learning_rate": 2.8203061272841894e-07, + "loss": 0.2491, + "step": 44748 + }, + { + "epoch": 0.8958086229762531, + "grad_norm": 1.194352149963379, + "learning_rate": 2.8192328399573696e-07, + "loss": 0.2659, + "step": 44749 + }, + { + "epoch": 0.8958286414933814, + "grad_norm": 1.0234901905059814, + "learning_rate": 2.818159750968336e-07, + "loss": 0.2656, + "step": 44750 + }, + { + "epoch": 0.8958486600105097, + "grad_norm": 1.1790728569030762, + "learning_rate": 2.8170868603216163e-07, + "loss": 0.2844, + "step": 44751 + }, + { + "epoch": 0.8958686785276381, + "grad_norm": 1.1968539953231812, + "learning_rate": 2.8160141680217025e-07, + "loss": 0.2846, + "step": 44752 + }, + { + "epoch": 0.8958886970447664, + "grad_norm": 1.1291640996932983, + "learning_rate": 2.814941674073107e-07, + "loss": 0.2721, + "step": 44753 + }, + { + "epoch": 0.8959087155618948, + "grad_norm": 1.055787205696106, + "learning_rate": 2.8138693784803495e-07, + "loss": 0.2875, + "step": 44754 + }, + { + "epoch": 0.8959287340790231, + "grad_norm": 1.0398749113082886, + "learning_rate": 2.812797281247931e-07, + "loss": 0.2399, + "step": 44755 + }, + { + "epoch": 0.8959487525961515, + "grad_norm": 1.110905647277832, + "learning_rate": 2.811725382380359e-07, + "loss": 0.2997, + "step": 44756 + }, + { + "epoch": 0.8959687711132798, + "grad_norm": 1.1065011024475098, + "learning_rate": 2.8106536818821304e-07, + "loss": 0.2969, + "step": 44757 + }, + { + "epoch": 0.8959887896304082, + "grad_norm": 1.1814101934432983, + "learning_rate": 2.809582179757764e-07, + "loss": 0.3093, + "step": 44758 + }, + { + "epoch": 0.8960088081475365, + "grad_norm": 1.1433100700378418, + "learning_rate": 2.808510876011761e-07, + "loss": 0.3171, + "step": 44759 + }, + { + "epoch": 0.8960288266646648, + "grad_norm": 1.066794514656067, + "learning_rate": 2.807439770648618e-07, + "loss": 0.3447, + "step": 44760 + }, + { + "epoch": 0.8960488451817932, + "grad_norm": 1.165589451789856, + "learning_rate": 2.806368863672848e-07, + "loss": 0.2609, + "step": 44761 + }, + { + "epoch": 0.8960688636989215, + "grad_norm": 1.1049232482910156, + "learning_rate": 2.805298155088937e-07, + "loss": 0.2759, + "step": 44762 + }, + { + "epoch": 0.8960888822160499, + "grad_norm": 1.124577283859253, + "learning_rate": 2.8042276449013973e-07, + "loss": 0.3016, + "step": 44763 + }, + { + "epoch": 0.8961089007331782, + "grad_norm": 1.0120348930358887, + "learning_rate": 2.8031573331147365e-07, + "loss": 0.2665, + "step": 44764 + }, + { + "epoch": 0.8961289192503066, + "grad_norm": 1.1299638748168945, + "learning_rate": 2.8020872197334405e-07, + "loss": 0.3466, + "step": 44765 + }, + { + "epoch": 0.8961489377674349, + "grad_norm": 1.0175052881240845, + "learning_rate": 2.80101730476201e-07, + "loss": 0.2531, + "step": 44766 + }, + { + "epoch": 0.8961689562845632, + "grad_norm": 1.1024996042251587, + "learning_rate": 2.799947588204949e-07, + "loss": 0.3013, + "step": 44767 + }, + { + "epoch": 0.8961889748016916, + "grad_norm": 1.0617350339889526, + "learning_rate": 2.798878070066752e-07, + "loss": 0.3152, + "step": 44768 + }, + { + "epoch": 0.8962089933188199, + "grad_norm": 1.149780511856079, + "learning_rate": 2.797808750351916e-07, + "loss": 0.2752, + "step": 44769 + }, + { + "epoch": 0.8962290118359483, + "grad_norm": 1.2123417854309082, + "learning_rate": 2.7967396290649317e-07, + "loss": 0.297, + "step": 44770 + }, + { + "epoch": 0.8962490303530766, + "grad_norm": 1.0309375524520874, + "learning_rate": 2.7956707062102907e-07, + "loss": 0.2468, + "step": 44771 + }, + { + "epoch": 0.896269048870205, + "grad_norm": 1.1210017204284668, + "learning_rate": 2.7946019817925e-07, + "loss": 0.3075, + "step": 44772 + }, + { + "epoch": 0.8962890673873333, + "grad_norm": 1.0419193506240845, + "learning_rate": 2.793533455816039e-07, + "loss": 0.2565, + "step": 44773 + }, + { + "epoch": 0.8963090859044617, + "grad_norm": 1.1041511297225952, + "learning_rate": 2.7924651282854043e-07, + "loss": 0.2445, + "step": 44774 + }, + { + "epoch": 0.89632910442159, + "grad_norm": 1.1490752696990967, + "learning_rate": 2.7913969992050873e-07, + "loss": 0.2577, + "step": 44775 + }, + { + "epoch": 0.8963491229387183, + "grad_norm": 1.165178656578064, + "learning_rate": 2.790329068579573e-07, + "loss": 0.2949, + "step": 44776 + }, + { + "epoch": 0.8963691414558467, + "grad_norm": 1.3460986614227295, + "learning_rate": 2.789261336413357e-07, + "loss": 0.3063, + "step": 44777 + }, + { + "epoch": 0.896389159972975, + "grad_norm": 1.093011736869812, + "learning_rate": 2.7881938027109367e-07, + "loss": 0.3016, + "step": 44778 + }, + { + "epoch": 0.8964091784901034, + "grad_norm": 1.0494166612625122, + "learning_rate": 2.787126467476775e-07, + "loss": 0.276, + "step": 44779 + }, + { + "epoch": 0.8964291970072317, + "grad_norm": 1.238036036491394, + "learning_rate": 2.786059330715385e-07, + "loss": 0.2722, + "step": 44780 + }, + { + "epoch": 0.8964492155243601, + "grad_norm": 1.9885947704315186, + "learning_rate": 2.7849923924312403e-07, + "loss": 0.7548, + "step": 44781 + }, + { + "epoch": 0.8964692340414884, + "grad_norm": 1.0840423107147217, + "learning_rate": 2.7839256526288274e-07, + "loss": 0.3196, + "step": 44782 + }, + { + "epoch": 0.8964892525586167, + "grad_norm": 1.0540170669555664, + "learning_rate": 2.7828591113126245e-07, + "loss": 0.3142, + "step": 44783 + }, + { + "epoch": 0.8965092710757451, + "grad_norm": 1.1610647439956665, + "learning_rate": 2.781792768487118e-07, + "loss": 0.3292, + "step": 44784 + }, + { + "epoch": 0.8965292895928734, + "grad_norm": 1.0870182514190674, + "learning_rate": 2.7807266241567985e-07, + "loss": 0.3021, + "step": 44785 + }, + { + "epoch": 0.8965493081100018, + "grad_norm": 1.961601734161377, + "learning_rate": 2.779660678326146e-07, + "loss": 0.7113, + "step": 44786 + }, + { + "epoch": 0.8965693266271301, + "grad_norm": 1.2080247402191162, + "learning_rate": 2.7785949309996343e-07, + "loss": 0.2627, + "step": 44787 + }, + { + "epoch": 0.8965893451442585, + "grad_norm": 1.191384196281433, + "learning_rate": 2.7775293821817427e-07, + "loss": 0.3113, + "step": 44788 + }, + { + "epoch": 0.8966093636613868, + "grad_norm": 1.1247851848602295, + "learning_rate": 2.7764640318769575e-07, + "loss": 0.3226, + "step": 44789 + }, + { + "epoch": 0.8966293821785152, + "grad_norm": 1.2834504842758179, + "learning_rate": 2.775398880089758e-07, + "loss": 0.2939, + "step": 44790 + }, + { + "epoch": 0.8966494006956435, + "grad_norm": 1.0771372318267822, + "learning_rate": 2.7743339268246184e-07, + "loss": 0.3069, + "step": 44791 + }, + { + "epoch": 0.8966694192127718, + "grad_norm": 1.785638689994812, + "learning_rate": 2.773269172086018e-07, + "loss": 0.7764, + "step": 44792 + }, + { + "epoch": 0.8966894377299002, + "grad_norm": 1.0960131883621216, + "learning_rate": 2.772204615878421e-07, + "loss": 0.2918, + "step": 44793 + }, + { + "epoch": 0.8967094562470285, + "grad_norm": 1.1183429956436157, + "learning_rate": 2.7711402582063227e-07, + "loss": 0.294, + "step": 44794 + }, + { + "epoch": 0.8967294747641569, + "grad_norm": 1.161387324333191, + "learning_rate": 2.770076099074187e-07, + "loss": 0.3392, + "step": 44795 + }, + { + "epoch": 0.8967494932812852, + "grad_norm": 1.9819985628128052, + "learning_rate": 2.769012138486482e-07, + "loss": 0.8365, + "step": 44796 + }, + { + "epoch": 0.8967695117984136, + "grad_norm": 1.1101487874984741, + "learning_rate": 2.767948376447688e-07, + "loss": 0.2969, + "step": 44797 + }, + { + "epoch": 0.8967895303155419, + "grad_norm": 1.0955528020858765, + "learning_rate": 2.766884812962273e-07, + "loss": 0.2965, + "step": 44798 + }, + { + "epoch": 0.8968095488326702, + "grad_norm": 1.0542384386062622, + "learning_rate": 2.765821448034711e-07, + "loss": 0.3156, + "step": 44799 + }, + { + "epoch": 0.8968295673497986, + "grad_norm": 1.2337441444396973, + "learning_rate": 2.764758281669477e-07, + "loss": 0.2877, + "step": 44800 + }, + { + "epoch": 0.8968495858669269, + "grad_norm": 1.1061362028121948, + "learning_rate": 2.7636953138710176e-07, + "loss": 0.2905, + "step": 44801 + }, + { + "epoch": 0.8968696043840553, + "grad_norm": 1.4056442975997925, + "learning_rate": 2.762632544643823e-07, + "loss": 0.3402, + "step": 44802 + }, + { + "epoch": 0.8968896229011836, + "grad_norm": 1.2556602954864502, + "learning_rate": 2.761569973992362e-07, + "loss": 0.2713, + "step": 44803 + }, + { + "epoch": 0.896909641418312, + "grad_norm": 1.2513785362243652, + "learning_rate": 2.7605076019210976e-07, + "loss": 0.3285, + "step": 44804 + }, + { + "epoch": 0.8969296599354403, + "grad_norm": 1.1854015588760376, + "learning_rate": 2.759445428434493e-07, + "loss": 0.3264, + "step": 44805 + }, + { + "epoch": 0.8969496784525687, + "grad_norm": 1.1645253896713257, + "learning_rate": 2.7583834535370004e-07, + "loss": 0.3156, + "step": 44806 + }, + { + "epoch": 0.896969696969697, + "grad_norm": 1.0672531127929688, + "learning_rate": 2.75732167723311e-07, + "loss": 0.2918, + "step": 44807 + }, + { + "epoch": 0.8969897154868253, + "grad_norm": 1.0606284141540527, + "learning_rate": 2.7562600995272695e-07, + "loss": 0.292, + "step": 44808 + }, + { + "epoch": 0.8970097340039537, + "grad_norm": 1.1110121011734009, + "learning_rate": 2.755198720423946e-07, + "loss": 0.2586, + "step": 44809 + }, + { + "epoch": 0.897029752521082, + "grad_norm": 1.0517874956130981, + "learning_rate": 2.7541375399275926e-07, + "loss": 0.275, + "step": 44810 + }, + { + "epoch": 0.8970497710382104, + "grad_norm": 0.9531985521316528, + "learning_rate": 2.7530765580426886e-07, + "loss": 0.268, + "step": 44811 + }, + { + "epoch": 0.8970697895553387, + "grad_norm": 1.0786532163619995, + "learning_rate": 2.752015774773675e-07, + "loss": 0.2873, + "step": 44812 + }, + { + "epoch": 0.8970898080724671, + "grad_norm": 1.3271979093551636, + "learning_rate": 2.7509551901250265e-07, + "loss": 0.2972, + "step": 44813 + }, + { + "epoch": 0.8971098265895954, + "grad_norm": 1.0301533937454224, + "learning_rate": 2.749894804101183e-07, + "loss": 0.242, + "step": 44814 + }, + { + "epoch": 0.8971298451067237, + "grad_norm": 1.1334303617477417, + "learning_rate": 2.7488346167066136e-07, + "loss": 0.3137, + "step": 44815 + }, + { + "epoch": 0.8971498636238521, + "grad_norm": 1.0831260681152344, + "learning_rate": 2.747774627945782e-07, + "loss": 0.2671, + "step": 44816 + }, + { + "epoch": 0.8971698821409804, + "grad_norm": 1.9538253545761108, + "learning_rate": 2.746714837823139e-07, + "loss": 0.8107, + "step": 44817 + }, + { + "epoch": 0.8971899006581088, + "grad_norm": 1.0442501306533813, + "learning_rate": 2.745655246343137e-07, + "loss": 0.2445, + "step": 44818 + }, + { + "epoch": 0.8972099191752371, + "grad_norm": 1.1576805114746094, + "learning_rate": 2.744595853510223e-07, + "loss": 0.3174, + "step": 44819 + }, + { + "epoch": 0.8972299376923655, + "grad_norm": 1.0200790166854858, + "learning_rate": 2.743536659328866e-07, + "loss": 0.282, + "step": 44820 + }, + { + "epoch": 0.8972499562094938, + "grad_norm": 1.0337715148925781, + "learning_rate": 2.742477663803511e-07, + "loss": 0.2902, + "step": 44821 + }, + { + "epoch": 0.8972699747266222, + "grad_norm": 1.1314268112182617, + "learning_rate": 2.741418866938611e-07, + "loss": 0.273, + "step": 44822 + }, + { + "epoch": 0.8972899932437505, + "grad_norm": 1.081320881843567, + "learning_rate": 2.740360268738618e-07, + "loss": 0.2764, + "step": 44823 + }, + { + "epoch": 0.8973100117608788, + "grad_norm": 1.8022332191467285, + "learning_rate": 2.739301869207972e-07, + "loss": 0.7655, + "step": 44824 + }, + { + "epoch": 0.8973300302780072, + "grad_norm": 1.0289876461029053, + "learning_rate": 2.7382436683511314e-07, + "loss": 0.2364, + "step": 44825 + }, + { + "epoch": 0.8973500487951355, + "grad_norm": 1.0554553270339966, + "learning_rate": 2.737185666172548e-07, + "loss": 0.2666, + "step": 44826 + }, + { + "epoch": 0.8973700673122639, + "grad_norm": 1.1042304039001465, + "learning_rate": 2.736127862676663e-07, + "loss": 0.2725, + "step": 44827 + }, + { + "epoch": 0.8973900858293922, + "grad_norm": 1.1237202882766724, + "learning_rate": 2.7350702578679167e-07, + "loss": 0.2747, + "step": 44828 + }, + { + "epoch": 0.8974101043465206, + "grad_norm": 1.0393128395080566, + "learning_rate": 2.7340128517507727e-07, + "loss": 0.2838, + "step": 44829 + }, + { + "epoch": 0.8974301228636489, + "grad_norm": 1.126542568206787, + "learning_rate": 2.7329556443296713e-07, + "loss": 0.2633, + "step": 44830 + }, + { + "epoch": 0.8974501413807772, + "grad_norm": 1.135533332824707, + "learning_rate": 2.731898635609048e-07, + "loss": 0.2967, + "step": 44831 + }, + { + "epoch": 0.8974701598979056, + "grad_norm": 1.2528754472732544, + "learning_rate": 2.7308418255933443e-07, + "loss": 0.3093, + "step": 44832 + }, + { + "epoch": 0.8974901784150339, + "grad_norm": 0.9826223254203796, + "learning_rate": 2.729785214287017e-07, + "loss": 0.2801, + "step": 44833 + }, + { + "epoch": 0.8975101969321623, + "grad_norm": 1.2325865030288696, + "learning_rate": 2.7287288016945026e-07, + "loss": 0.269, + "step": 44834 + }, + { + "epoch": 0.8975302154492906, + "grad_norm": 1.055904746055603, + "learning_rate": 2.7276725878202347e-07, + "loss": 0.2607, + "step": 44835 + }, + { + "epoch": 0.897550233966419, + "grad_norm": 1.0957378149032593, + "learning_rate": 2.7266165726686613e-07, + "loss": 0.2951, + "step": 44836 + }, + { + "epoch": 0.8975702524835473, + "grad_norm": 1.3290910720825195, + "learning_rate": 2.725560756244211e-07, + "loss": 0.2836, + "step": 44837 + }, + { + "epoch": 0.8975902710006757, + "grad_norm": 1.2122615575790405, + "learning_rate": 2.724505138551331e-07, + "loss": 0.3077, + "step": 44838 + }, + { + "epoch": 0.897610289517804, + "grad_norm": 1.212389349937439, + "learning_rate": 2.7234497195944676e-07, + "loss": 0.3033, + "step": 44839 + }, + { + "epoch": 0.8976303080349323, + "grad_norm": 1.1565895080566406, + "learning_rate": 2.722394499378034e-07, + "loss": 0.3012, + "step": 44840 + }, + { + "epoch": 0.8976503265520607, + "grad_norm": 1.1624037027359009, + "learning_rate": 2.7213394779064815e-07, + "loss": 0.2646, + "step": 44841 + }, + { + "epoch": 0.897670345069189, + "grad_norm": 1.109520435333252, + "learning_rate": 2.7202846551842463e-07, + "loss": 0.3134, + "step": 44842 + }, + { + "epoch": 0.8976903635863174, + "grad_norm": 1.2108690738677979, + "learning_rate": 2.719230031215764e-07, + "loss": 0.3021, + "step": 44843 + }, + { + "epoch": 0.8977103821034457, + "grad_norm": 1.084887981414795, + "learning_rate": 2.718175606005463e-07, + "loss": 0.2919, + "step": 44844 + }, + { + "epoch": 0.8977304006205741, + "grad_norm": 1.085008144378662, + "learning_rate": 2.71712137955778e-07, + "loss": 0.2775, + "step": 44845 + }, + { + "epoch": 0.8977504191377024, + "grad_norm": 1.1343257427215576, + "learning_rate": 2.716067351877133e-07, + "loss": 0.3464, + "step": 44846 + }, + { + "epoch": 0.8977704376548307, + "grad_norm": 1.440780758857727, + "learning_rate": 2.7150135229679685e-07, + "loss": 0.298, + "step": 44847 + }, + { + "epoch": 0.8977904561719591, + "grad_norm": 1.2565946578979492, + "learning_rate": 2.713959892834711e-07, + "loss": 0.2619, + "step": 44848 + }, + { + "epoch": 0.8978104746890874, + "grad_norm": 1.1026182174682617, + "learning_rate": 2.712906461481796e-07, + "loss": 0.3506, + "step": 44849 + }, + { + "epoch": 0.8978304932062158, + "grad_norm": 1.207568883895874, + "learning_rate": 2.711853228913636e-07, + "loss": 0.2504, + "step": 44850 + }, + { + "epoch": 0.8978505117233441, + "grad_norm": 1.2472037076950073, + "learning_rate": 2.7108001951346717e-07, + "loss": 0.2872, + "step": 44851 + }, + { + "epoch": 0.8978705302404725, + "grad_norm": 1.239471197128296, + "learning_rate": 2.7097473601493284e-07, + "loss": 0.3289, + "step": 44852 + }, + { + "epoch": 0.8978905487576008, + "grad_norm": 1.8977967500686646, + "learning_rate": 2.7086947239620245e-07, + "loss": 0.7341, + "step": 44853 + }, + { + "epoch": 0.8979105672747292, + "grad_norm": 1.1612796783447266, + "learning_rate": 2.7076422865772003e-07, + "loss": 0.2838, + "step": 44854 + }, + { + "epoch": 0.8979305857918575, + "grad_norm": 1.342922329902649, + "learning_rate": 2.706590047999258e-07, + "loss": 0.286, + "step": 44855 + }, + { + "epoch": 0.8979506043089858, + "grad_norm": 2.00761342048645, + "learning_rate": 2.705538008232644e-07, + "loss": 0.7031, + "step": 44856 + }, + { + "epoch": 0.8979706228261142, + "grad_norm": 1.116091251373291, + "learning_rate": 2.7044861672817723e-07, + "loss": 0.3224, + "step": 44857 + }, + { + "epoch": 0.8979906413432425, + "grad_norm": 1.1140327453613281, + "learning_rate": 2.703434525151055e-07, + "loss": 0.2878, + "step": 44858 + }, + { + "epoch": 0.8980106598603709, + "grad_norm": 1.0405412912368774, + "learning_rate": 2.7023830818449227e-07, + "loss": 0.2476, + "step": 44859 + }, + { + "epoch": 0.8980306783774992, + "grad_norm": 1.2554117441177368, + "learning_rate": 2.7013318373677933e-07, + "loss": 0.2757, + "step": 44860 + }, + { + "epoch": 0.8980506968946276, + "grad_norm": 1.2301185131072998, + "learning_rate": 2.700280791724091e-07, + "loss": 0.3491, + "step": 44861 + }, + { + "epoch": 0.8980707154117559, + "grad_norm": 1.1176644563674927, + "learning_rate": 2.6992299449182246e-07, + "loss": 0.3082, + "step": 44862 + }, + { + "epoch": 0.8980907339288842, + "grad_norm": 1.2685378789901733, + "learning_rate": 2.698179296954612e-07, + "loss": 0.3644, + "step": 44863 + }, + { + "epoch": 0.8981107524460126, + "grad_norm": 1.1405799388885498, + "learning_rate": 2.6971288478376825e-07, + "loss": 0.2596, + "step": 44864 + }, + { + "epoch": 0.8981307709631409, + "grad_norm": 1.1212033033370972, + "learning_rate": 2.696078597571844e-07, + "loss": 0.3039, + "step": 44865 + }, + { + "epoch": 0.8981507894802693, + "grad_norm": 1.19236421585083, + "learning_rate": 2.6950285461614987e-07, + "loss": 0.2937, + "step": 44866 + }, + { + "epoch": 0.8981708079973976, + "grad_norm": 1.920615315437317, + "learning_rate": 2.693978693611088e-07, + "loss": 0.6969, + "step": 44867 + }, + { + "epoch": 0.898190826514526, + "grad_norm": 1.015052080154419, + "learning_rate": 2.692929039924996e-07, + "loss": 0.2568, + "step": 44868 + }, + { + "epoch": 0.8982108450316543, + "grad_norm": 1.7895809412002563, + "learning_rate": 2.6918795851076594e-07, + "loss": 0.7479, + "step": 44869 + }, + { + "epoch": 0.8982308635487827, + "grad_norm": 1.23838210105896, + "learning_rate": 2.690830329163485e-07, + "loss": 0.3371, + "step": 44870 + }, + { + "epoch": 0.898250882065911, + "grad_norm": 1.1773446798324585, + "learning_rate": 2.689781272096875e-07, + "loss": 0.2958, + "step": 44871 + }, + { + "epoch": 0.8982709005830393, + "grad_norm": 1.1554341316223145, + "learning_rate": 2.688732413912232e-07, + "loss": 0.3263, + "step": 44872 + }, + { + "epoch": 0.8982909191001677, + "grad_norm": 1.3096097707748413, + "learning_rate": 2.68768375461399e-07, + "loss": 0.2513, + "step": 44873 + }, + { + "epoch": 0.898310937617296, + "grad_norm": 1.1149230003356934, + "learning_rate": 2.686635294206541e-07, + "loss": 0.2581, + "step": 44874 + }, + { + "epoch": 0.8983309561344244, + "grad_norm": 1.1496174335479736, + "learning_rate": 2.6855870326942977e-07, + "loss": 0.2686, + "step": 44875 + }, + { + "epoch": 0.8983509746515527, + "grad_norm": 0.9890714287757874, + "learning_rate": 2.6845389700816624e-07, + "loss": 0.2963, + "step": 44876 + }, + { + "epoch": 0.8983709931686811, + "grad_norm": 1.7395837306976318, + "learning_rate": 2.683491106373032e-07, + "loss": 0.7644, + "step": 44877 + }, + { + "epoch": 0.8983910116858094, + "grad_norm": 1.1174612045288086, + "learning_rate": 2.682443441572835e-07, + "loss": 0.3141, + "step": 44878 + }, + { + "epoch": 0.8984110302029377, + "grad_norm": 1.2015483379364014, + "learning_rate": 2.6813959756854523e-07, + "loss": 0.299, + "step": 44879 + }, + { + "epoch": 0.8984310487200661, + "grad_norm": 1.7630561590194702, + "learning_rate": 2.680348708715308e-07, + "loss": 0.8098, + "step": 44880 + }, + { + "epoch": 0.8984510672371944, + "grad_norm": 1.2127420902252197, + "learning_rate": 2.679301640666787e-07, + "loss": 0.2989, + "step": 44881 + }, + { + "epoch": 0.8984710857543228, + "grad_norm": 1.0142840147018433, + "learning_rate": 2.678254771544303e-07, + "loss": 0.26, + "step": 44882 + }, + { + "epoch": 0.8984911042714511, + "grad_norm": 1.1097702980041504, + "learning_rate": 2.677208101352252e-07, + "loss": 0.2912, + "step": 44883 + }, + { + "epoch": 0.8985111227885795, + "grad_norm": 1.9570826292037964, + "learning_rate": 2.6761616300950367e-07, + "loss": 0.7628, + "step": 44884 + }, + { + "epoch": 0.8985311413057078, + "grad_norm": 1.0097633600234985, + "learning_rate": 2.6751153577770526e-07, + "loss": 0.3057, + "step": 44885 + }, + { + "epoch": 0.8985511598228362, + "grad_norm": 2.1293280124664307, + "learning_rate": 2.6740692844026915e-07, + "loss": 0.8106, + "step": 44886 + }, + { + "epoch": 0.8985711783399645, + "grad_norm": 0.9516837000846863, + "learning_rate": 2.6730234099763666e-07, + "loss": 0.2786, + "step": 44887 + }, + { + "epoch": 0.8985911968570928, + "grad_norm": 1.1489496231079102, + "learning_rate": 2.671977734502468e-07, + "loss": 0.2626, + "step": 44888 + }, + { + "epoch": 0.8986112153742212, + "grad_norm": 1.1041109561920166, + "learning_rate": 2.670932257985387e-07, + "loss": 0.2817, + "step": 44889 + }, + { + "epoch": 0.8986312338913495, + "grad_norm": 1.1719627380371094, + "learning_rate": 2.669886980429515e-07, + "loss": 0.3137, + "step": 44890 + }, + { + "epoch": 0.8986512524084779, + "grad_norm": 1.1525377035140991, + "learning_rate": 2.6688419018392587e-07, + "loss": 0.2884, + "step": 44891 + }, + { + "epoch": 0.8986712709256062, + "grad_norm": 1.0880404710769653, + "learning_rate": 2.6677970222189986e-07, + "loss": 0.2945, + "step": 44892 + }, + { + "epoch": 0.8986912894427346, + "grad_norm": 1.0331474542617798, + "learning_rate": 2.666752341573142e-07, + "loss": 0.3132, + "step": 44893 + }, + { + "epoch": 0.8987113079598629, + "grad_norm": 1.1548012495040894, + "learning_rate": 2.665707859906064e-07, + "loss": 0.2622, + "step": 44894 + }, + { + "epoch": 0.8987313264769912, + "grad_norm": 1.0413744449615479, + "learning_rate": 2.664663577222176e-07, + "loss": 0.2973, + "step": 44895 + }, + { + "epoch": 0.8987513449941196, + "grad_norm": 1.1406092643737793, + "learning_rate": 2.6636194935258486e-07, + "loss": 0.3336, + "step": 44896 + }, + { + "epoch": 0.8987713635112479, + "grad_norm": 1.155386209487915, + "learning_rate": 2.662575608821483e-07, + "loss": 0.3407, + "step": 44897 + }, + { + "epoch": 0.8987913820283763, + "grad_norm": 1.8404897451400757, + "learning_rate": 2.6615319231134583e-07, + "loss": 0.7106, + "step": 44898 + }, + { + "epoch": 0.8988114005455046, + "grad_norm": 1.095011830329895, + "learning_rate": 2.660488436406161e-07, + "loss": 0.3033, + "step": 44899 + }, + { + "epoch": 0.898831419062633, + "grad_norm": 1.3037703037261963, + "learning_rate": 2.6594451487039875e-07, + "loss": 0.2737, + "step": 44900 + }, + { + "epoch": 0.8988514375797613, + "grad_norm": 1.1534446477890015, + "learning_rate": 2.6584020600113223e-07, + "loss": 0.3211, + "step": 44901 + }, + { + "epoch": 0.8988714560968897, + "grad_norm": 1.0446141958236694, + "learning_rate": 2.6573591703325463e-07, + "loss": 0.2686, + "step": 44902 + }, + { + "epoch": 0.898891474614018, + "grad_norm": 1.11061692237854, + "learning_rate": 2.6563164796720333e-07, + "loss": 0.2998, + "step": 44903 + }, + { + "epoch": 0.8989114931311463, + "grad_norm": 1.0070477724075317, + "learning_rate": 2.6552739880341907e-07, + "loss": 0.2599, + "step": 44904 + }, + { + "epoch": 0.8989315116482747, + "grad_norm": 1.7501906156539917, + "learning_rate": 2.654231695423376e-07, + "loss": 0.7438, + "step": 44905 + }, + { + "epoch": 0.898951530165403, + "grad_norm": 1.058129072189331, + "learning_rate": 2.6531896018439916e-07, + "loss": 0.2869, + "step": 44906 + }, + { + "epoch": 0.8989715486825314, + "grad_norm": 1.1628456115722656, + "learning_rate": 2.6521477073004067e-07, + "loss": 0.2753, + "step": 44907 + }, + { + "epoch": 0.8989915671996597, + "grad_norm": 1.0832239389419556, + "learning_rate": 2.651106011797e-07, + "loss": 0.2834, + "step": 44908 + }, + { + "epoch": 0.8990115857167881, + "grad_norm": 1.2346457242965698, + "learning_rate": 2.650064515338158e-07, + "loss": 0.3289, + "step": 44909 + }, + { + "epoch": 0.8990316042339164, + "grad_norm": 1.0833079814910889, + "learning_rate": 2.649023217928254e-07, + "loss": 0.297, + "step": 44910 + }, + { + "epoch": 0.8990516227510447, + "grad_norm": 1.3061593770980835, + "learning_rate": 2.6479821195716683e-07, + "loss": 0.3424, + "step": 44911 + }, + { + "epoch": 0.8990716412681731, + "grad_norm": 1.8531715869903564, + "learning_rate": 2.6469412202727694e-07, + "loss": 0.7845, + "step": 44912 + }, + { + "epoch": 0.8990916597853014, + "grad_norm": 1.1511369943618774, + "learning_rate": 2.645900520035943e-07, + "loss": 0.3003, + "step": 44913 + }, + { + "epoch": 0.8991116783024298, + "grad_norm": 1.1011542081832886, + "learning_rate": 2.6448600188655636e-07, + "loss": 0.2682, + "step": 44914 + }, + { + "epoch": 0.8991316968195581, + "grad_norm": 1.1163781881332397, + "learning_rate": 2.6438197167659996e-07, + "loss": 0.2726, + "step": 44915 + }, + { + "epoch": 0.8991517153366865, + "grad_norm": 1.1323729753494263, + "learning_rate": 2.642779613741619e-07, + "loss": 0.2566, + "step": 44916 + }, + { + "epoch": 0.8991717338538148, + "grad_norm": 1.2660139799118042, + "learning_rate": 2.641739709796809e-07, + "loss": 0.2745, + "step": 44917 + }, + { + "epoch": 0.8991917523709432, + "grad_norm": 1.0941554307937622, + "learning_rate": 2.640700004935925e-07, + "loss": 0.2926, + "step": 44918 + }, + { + "epoch": 0.8992117708880715, + "grad_norm": 1.1433451175689697, + "learning_rate": 2.6396604991633545e-07, + "loss": 0.2834, + "step": 44919 + }, + { + "epoch": 0.8992317894051998, + "grad_norm": 1.8932526111602783, + "learning_rate": 2.638621192483465e-07, + "loss": 0.7442, + "step": 44920 + }, + { + "epoch": 0.8992518079223282, + "grad_norm": 1.371140480041504, + "learning_rate": 2.6375820849006027e-07, + "loss": 0.3442, + "step": 44921 + }, + { + "epoch": 0.8992718264394565, + "grad_norm": 1.3308738470077515, + "learning_rate": 2.636543176419165e-07, + "loss": 0.2865, + "step": 44922 + }, + { + "epoch": 0.8992918449565849, + "grad_norm": 1.006735920906067, + "learning_rate": 2.635504467043509e-07, + "loss": 0.2685, + "step": 44923 + }, + { + "epoch": 0.8993118634737132, + "grad_norm": 1.9126161336898804, + "learning_rate": 2.634465956777998e-07, + "loss": 0.7658, + "step": 44924 + }, + { + "epoch": 0.8993318819908416, + "grad_norm": 1.2563878297805786, + "learning_rate": 2.63342764562699e-07, + "loss": 0.3444, + "step": 44925 + }, + { + "epoch": 0.8993519005079699, + "grad_norm": 1.2766704559326172, + "learning_rate": 2.632389533594865e-07, + "loss": 0.2988, + "step": 44926 + }, + { + "epoch": 0.8993719190250982, + "grad_norm": 1.1364420652389526, + "learning_rate": 2.631351620685985e-07, + "loss": 0.2521, + "step": 44927 + }, + { + "epoch": 0.8993919375422266, + "grad_norm": 1.082489013671875, + "learning_rate": 2.6303139069047077e-07, + "loss": 0.2468, + "step": 44928 + }, + { + "epoch": 0.8994119560593549, + "grad_norm": 1.0708048343658447, + "learning_rate": 2.6292763922553867e-07, + "loss": 0.3204, + "step": 44929 + }, + { + "epoch": 0.8994319745764833, + "grad_norm": 1.8708807229995728, + "learning_rate": 2.6282390767424003e-07, + "loss": 0.7371, + "step": 44930 + }, + { + "epoch": 0.8994519930936116, + "grad_norm": 1.0355693101882935, + "learning_rate": 2.6272019603701013e-07, + "loss": 0.2537, + "step": 44931 + }, + { + "epoch": 0.89947201161074, + "grad_norm": 1.0635473728179932, + "learning_rate": 2.626165043142853e-07, + "loss": 0.2661, + "step": 44932 + }, + { + "epoch": 0.8994920301278683, + "grad_norm": 1.1475430727005005, + "learning_rate": 2.625128325065013e-07, + "loss": 0.2982, + "step": 44933 + }, + { + "epoch": 0.8995120486449967, + "grad_norm": 1.054018259048462, + "learning_rate": 2.624091806140927e-07, + "loss": 0.2485, + "step": 44934 + }, + { + "epoch": 0.899532067162125, + "grad_norm": 1.0968308448791504, + "learning_rate": 2.6230554863749756e-07, + "loss": 0.2642, + "step": 44935 + }, + { + "epoch": 0.8995520856792533, + "grad_norm": 1.0454429388046265, + "learning_rate": 2.6220193657715056e-07, + "loss": 0.2923, + "step": 44936 + }, + { + "epoch": 0.8995721041963817, + "grad_norm": 1.1107993125915527, + "learning_rate": 2.6209834443348624e-07, + "loss": 0.3224, + "step": 44937 + }, + { + "epoch": 0.89959212271351, + "grad_norm": 1.034205675125122, + "learning_rate": 2.6199477220694156e-07, + "loss": 0.293, + "step": 44938 + }, + { + "epoch": 0.8996121412306384, + "grad_norm": 1.1861166954040527, + "learning_rate": 2.6189121989795006e-07, + "loss": 0.3114, + "step": 44939 + }, + { + "epoch": 0.8996321597477667, + "grad_norm": 2.049426794052124, + "learning_rate": 2.6178768750694916e-07, + "loss": 0.7916, + "step": 44940 + }, + { + "epoch": 0.8996521782648951, + "grad_norm": 1.0257470607757568, + "learning_rate": 2.616841750343735e-07, + "loss": 0.2736, + "step": 44941 + }, + { + "epoch": 0.8996721967820234, + "grad_norm": 1.0884966850280762, + "learning_rate": 2.615806824806566e-07, + "loss": 0.285, + "step": 44942 + }, + { + "epoch": 0.8996922152991517, + "grad_norm": 1.3093304634094238, + "learning_rate": 2.614772098462359e-07, + "loss": 0.2727, + "step": 44943 + }, + { + "epoch": 0.8997122338162801, + "grad_norm": 1.116391897201538, + "learning_rate": 2.6137375713154447e-07, + "loss": 0.3498, + "step": 44944 + }, + { + "epoch": 0.8997322523334084, + "grad_norm": 1.0955979824066162, + "learning_rate": 2.612703243370185e-07, + "loss": 0.3059, + "step": 44945 + }, + { + "epoch": 0.8997522708505368, + "grad_norm": 1.206113576889038, + "learning_rate": 2.6116691146309215e-07, + "loss": 0.3285, + "step": 44946 + }, + { + "epoch": 0.8997722893676651, + "grad_norm": 1.0862360000610352, + "learning_rate": 2.610635185102006e-07, + "loss": 0.27, + "step": 44947 + }, + { + "epoch": 0.8997923078847935, + "grad_norm": 1.1479390859603882, + "learning_rate": 2.6096014547877746e-07, + "loss": 0.2873, + "step": 44948 + }, + { + "epoch": 0.8998123264019218, + "grad_norm": 1.0988614559173584, + "learning_rate": 2.6085679236925843e-07, + "loss": 0.2986, + "step": 44949 + }, + { + "epoch": 0.8998323449190502, + "grad_norm": 1.1747167110443115, + "learning_rate": 2.6075345918207815e-07, + "loss": 0.3066, + "step": 44950 + }, + { + "epoch": 0.8998523634361785, + "grad_norm": 1.00822114944458, + "learning_rate": 2.6065014591767025e-07, + "loss": 0.2814, + "step": 44951 + }, + { + "epoch": 0.8998723819533068, + "grad_norm": 1.125325322151184, + "learning_rate": 2.605468525764682e-07, + "loss": 0.2709, + "step": 44952 + }, + { + "epoch": 0.8998924004704352, + "grad_norm": 1.0853692293167114, + "learning_rate": 2.604435791589083e-07, + "loss": 0.291, + "step": 44953 + }, + { + "epoch": 0.8999124189875635, + "grad_norm": 1.2154284715652466, + "learning_rate": 2.60340325665423e-07, + "loss": 0.3138, + "step": 44954 + }, + { + "epoch": 0.8999324375046919, + "grad_norm": 0.9989015460014343, + "learning_rate": 2.60237092096447e-07, + "loss": 0.2852, + "step": 44955 + }, + { + "epoch": 0.8999524560218202, + "grad_norm": 1.0962212085723877, + "learning_rate": 2.601338784524149e-07, + "loss": 0.2847, + "step": 44956 + }, + { + "epoch": 0.8999724745389486, + "grad_norm": 1.1633638143539429, + "learning_rate": 2.6003068473375857e-07, + "loss": 0.3129, + "step": 44957 + }, + { + "epoch": 0.8999924930560769, + "grad_norm": 1.2950701713562012, + "learning_rate": 2.599275109409144e-07, + "loss": 0.3091, + "step": 44958 + }, + { + "epoch": 0.9000125115732052, + "grad_norm": 1.120045781135559, + "learning_rate": 2.5982435707431474e-07, + "loss": 0.3243, + "step": 44959 + }, + { + "epoch": 0.9000325300903336, + "grad_norm": 1.15053129196167, + "learning_rate": 2.5972122313439373e-07, + "loss": 0.2945, + "step": 44960 + }, + { + "epoch": 0.9000525486074619, + "grad_norm": 1.0896399021148682, + "learning_rate": 2.596181091215838e-07, + "loss": 0.325, + "step": 44961 + }, + { + "epoch": 0.9000725671245903, + "grad_norm": 0.9833456873893738, + "learning_rate": 2.595150150363196e-07, + "loss": 0.2724, + "step": 44962 + }, + { + "epoch": 0.9000925856417186, + "grad_norm": 1.1203041076660156, + "learning_rate": 2.594119408790341e-07, + "loss": 0.3351, + "step": 44963 + }, + { + "epoch": 0.900112604158847, + "grad_norm": 1.1400202512741089, + "learning_rate": 2.5930888665016083e-07, + "loss": 0.3176, + "step": 44964 + }, + { + "epoch": 0.9001326226759753, + "grad_norm": 1.135416030883789, + "learning_rate": 2.592058523501323e-07, + "loss": 0.2677, + "step": 44965 + }, + { + "epoch": 0.9001526411931037, + "grad_norm": 0.9961979389190674, + "learning_rate": 2.5910283797938254e-07, + "loss": 0.2824, + "step": 44966 + }, + { + "epoch": 0.900172659710232, + "grad_norm": 1.2278343439102173, + "learning_rate": 2.5899984353834396e-07, + "loss": 0.3443, + "step": 44967 + }, + { + "epoch": 0.9001926782273603, + "grad_norm": 1.0294721126556396, + "learning_rate": 2.588968690274496e-07, + "loss": 0.2696, + "step": 44968 + }, + { + "epoch": 0.9002126967444887, + "grad_norm": 1.0393434762954712, + "learning_rate": 2.58793914447133e-07, + "loss": 0.268, + "step": 44969 + }, + { + "epoch": 0.900232715261617, + "grad_norm": 1.043534517288208, + "learning_rate": 2.586909797978254e-07, + "loss": 0.2707, + "step": 44970 + }, + { + "epoch": 0.9002527337787454, + "grad_norm": 1.4606369733810425, + "learning_rate": 2.585880650799616e-07, + "loss": 0.2884, + "step": 44971 + }, + { + "epoch": 0.9002727522958737, + "grad_norm": 1.209822654724121, + "learning_rate": 2.5848517029397336e-07, + "loss": 0.2854, + "step": 44972 + }, + { + "epoch": 0.9002927708130021, + "grad_norm": 1.2161405086517334, + "learning_rate": 2.5838229544029257e-07, + "loss": 0.3046, + "step": 44973 + }, + { + "epoch": 0.9003127893301304, + "grad_norm": 1.1208281517028809, + "learning_rate": 2.5827944051935173e-07, + "loss": 0.2934, + "step": 44974 + }, + { + "epoch": 0.9003328078472587, + "grad_norm": 1.2464065551757812, + "learning_rate": 2.581766055315849e-07, + "loss": 0.2566, + "step": 44975 + }, + { + "epoch": 0.9003528263643871, + "grad_norm": 1.0094741582870483, + "learning_rate": 2.580737904774228e-07, + "loss": 0.3215, + "step": 44976 + }, + { + "epoch": 0.9003728448815154, + "grad_norm": 1.1106706857681274, + "learning_rate": 2.5797099535729786e-07, + "loss": 0.2771, + "step": 44977 + }, + { + "epoch": 0.9003928633986438, + "grad_norm": 0.9674801826477051, + "learning_rate": 2.578682201716415e-07, + "loss": 0.271, + "step": 44978 + }, + { + "epoch": 0.9004128819157721, + "grad_norm": 1.0598676204681396, + "learning_rate": 2.5776546492088773e-07, + "loss": 0.2913, + "step": 44979 + }, + { + "epoch": 0.9004329004329005, + "grad_norm": 1.0216569900512695, + "learning_rate": 2.5766272960546735e-07, + "loss": 0.2725, + "step": 44980 + }, + { + "epoch": 0.9004529189500288, + "grad_norm": 1.0460339784622192, + "learning_rate": 2.5756001422581166e-07, + "loss": 0.2648, + "step": 44981 + }, + { + "epoch": 0.9004729374671572, + "grad_norm": 1.0240821838378906, + "learning_rate": 2.574573187823537e-07, + "loss": 0.2507, + "step": 44982 + }, + { + "epoch": 0.9004929559842855, + "grad_norm": 1.330464482307434, + "learning_rate": 2.5735464327552364e-07, + "loss": 0.2777, + "step": 44983 + }, + { + "epoch": 0.9005129745014138, + "grad_norm": 1.0132135152816772, + "learning_rate": 2.57251987705755e-07, + "loss": 0.2706, + "step": 44984 + }, + { + "epoch": 0.9005329930185422, + "grad_norm": 1.165131688117981, + "learning_rate": 2.57149352073478e-07, + "loss": 0.2993, + "step": 44985 + }, + { + "epoch": 0.9005530115356705, + "grad_norm": 1.1477031707763672, + "learning_rate": 2.570467363791251e-07, + "loss": 0.2649, + "step": 44986 + }, + { + "epoch": 0.9005730300527989, + "grad_norm": 1.8641550540924072, + "learning_rate": 2.5694414062312546e-07, + "loss": 0.6635, + "step": 44987 + }, + { + "epoch": 0.9005930485699272, + "grad_norm": 1.109158992767334, + "learning_rate": 2.568415648059136e-07, + "loss": 0.3046, + "step": 44988 + }, + { + "epoch": 0.9006130670870556, + "grad_norm": 0.9759003520011902, + "learning_rate": 2.5673900892791814e-07, + "loss": 0.2391, + "step": 44989 + }, + { + "epoch": 0.9006330856041839, + "grad_norm": 1.0979660749435425, + "learning_rate": 2.566364729895715e-07, + "loss": 0.2906, + "step": 44990 + }, + { + "epoch": 0.9006531041213122, + "grad_norm": 1.1156128644943237, + "learning_rate": 2.5653395699130445e-07, + "loss": 0.2869, + "step": 44991 + }, + { + "epoch": 0.9006731226384406, + "grad_norm": 1.114396095275879, + "learning_rate": 2.5643146093354723e-07, + "loss": 0.3288, + "step": 44992 + }, + { + "epoch": 0.9006931411555689, + "grad_norm": 1.038644790649414, + "learning_rate": 2.563289848167322e-07, + "loss": 0.3103, + "step": 44993 + }, + { + "epoch": 0.9007131596726973, + "grad_norm": 1.4239436388015747, + "learning_rate": 2.562265286412879e-07, + "loss": 0.308, + "step": 44994 + }, + { + "epoch": 0.9007331781898256, + "grad_norm": 1.1875290870666504, + "learning_rate": 2.561240924076475e-07, + "loss": 0.2907, + "step": 44995 + }, + { + "epoch": 0.900753196706954, + "grad_norm": 1.1350696086883545, + "learning_rate": 2.5602167611623984e-07, + "loss": 0.2717, + "step": 44996 + }, + { + "epoch": 0.9007732152240823, + "grad_norm": 1.2374721765518188, + "learning_rate": 2.55919279767497e-07, + "loss": 0.2689, + "step": 44997 + }, + { + "epoch": 0.9007932337412107, + "grad_norm": 1.087686538696289, + "learning_rate": 2.5581690336184793e-07, + "loss": 0.3031, + "step": 44998 + }, + { + "epoch": 0.900813252258339, + "grad_norm": 1.1297855377197266, + "learning_rate": 2.557145468997241e-07, + "loss": 0.2876, + "step": 44999 + }, + { + "epoch": 0.9008332707754673, + "grad_norm": 1.204698920249939, + "learning_rate": 2.556122103815556e-07, + "loss": 0.2719, + "step": 45000 + }, + { + "epoch": 0.9008532892925957, + "grad_norm": 1.1019734144210815, + "learning_rate": 2.5550989380777104e-07, + "loss": 0.3228, + "step": 45001 + }, + { + "epoch": 0.900873307809724, + "grad_norm": 1.8659987449645996, + "learning_rate": 2.5540759717880283e-07, + "loss": 0.7556, + "step": 45002 + }, + { + "epoch": 0.9008933263268524, + "grad_norm": 1.154270887374878, + "learning_rate": 2.5530532049508006e-07, + "loss": 0.2939, + "step": 45003 + }, + { + "epoch": 0.9009133448439807, + "grad_norm": 1.238142490386963, + "learning_rate": 2.5520306375703295e-07, + "loss": 0.3345, + "step": 45004 + }, + { + "epoch": 0.9009333633611091, + "grad_norm": 1.7722567319869995, + "learning_rate": 2.551008269650901e-07, + "loss": 0.2915, + "step": 45005 + }, + { + "epoch": 0.9009533818782374, + "grad_norm": 1.184051275253296, + "learning_rate": 2.549986101196827e-07, + "loss": 0.3037, + "step": 45006 + }, + { + "epoch": 0.9009734003953657, + "grad_norm": 1.131211757659912, + "learning_rate": 2.548964132212395e-07, + "loss": 0.2755, + "step": 45007 + }, + { + "epoch": 0.9009934189124941, + "grad_norm": 1.0657217502593994, + "learning_rate": 2.5479423627019163e-07, + "loss": 0.2909, + "step": 45008 + }, + { + "epoch": 0.9010134374296224, + "grad_norm": 1.2585928440093994, + "learning_rate": 2.546920792669666e-07, + "loss": 0.2761, + "step": 45009 + }, + { + "epoch": 0.9010334559467508, + "grad_norm": 1.1373677253723145, + "learning_rate": 2.5458994221199573e-07, + "loss": 0.2868, + "step": 45010 + }, + { + "epoch": 0.9010534744638791, + "grad_norm": 1.0115958452224731, + "learning_rate": 2.54487825105707e-07, + "loss": 0.2835, + "step": 45011 + }, + { + "epoch": 0.9010734929810075, + "grad_norm": 1.1125251054763794, + "learning_rate": 2.543857279485307e-07, + "loss": 0.2633, + "step": 45012 + }, + { + "epoch": 0.9010935114981358, + "grad_norm": 1.4481310844421387, + "learning_rate": 2.542836507408958e-07, + "loss": 0.3006, + "step": 45013 + }, + { + "epoch": 0.9011135300152642, + "grad_norm": 1.0756911039352417, + "learning_rate": 2.541815934832298e-07, + "loss": 0.2627, + "step": 45014 + }, + { + "epoch": 0.9011335485323925, + "grad_norm": 1.1445342302322388, + "learning_rate": 2.5407955617596404e-07, + "loss": 0.2581, + "step": 45015 + }, + { + "epoch": 0.9011535670495208, + "grad_norm": 1.043048620223999, + "learning_rate": 2.5397753881952647e-07, + "loss": 0.3097, + "step": 45016 + }, + { + "epoch": 0.9011735855666492, + "grad_norm": 2.1566905975341797, + "learning_rate": 2.5387554141434625e-07, + "loss": 0.7567, + "step": 45017 + }, + { + "epoch": 0.9011936040837775, + "grad_norm": 1.0066367387771606, + "learning_rate": 2.5377356396085076e-07, + "loss": 0.2818, + "step": 45018 + }, + { + "epoch": 0.9012136226009059, + "grad_norm": 1.0822445154190063, + "learning_rate": 2.5367160645947077e-07, + "loss": 0.3104, + "step": 45019 + }, + { + "epoch": 0.9012336411180342, + "grad_norm": 1.1757326126098633, + "learning_rate": 2.535696689106332e-07, + "loss": 0.3256, + "step": 45020 + }, + { + "epoch": 0.9012536596351626, + "grad_norm": 1.270904779434204, + "learning_rate": 2.534677513147682e-07, + "loss": 0.3054, + "step": 45021 + }, + { + "epoch": 0.9012736781522909, + "grad_norm": 1.1523463726043701, + "learning_rate": 2.533658536723027e-07, + "loss": 0.2757, + "step": 45022 + }, + { + "epoch": 0.9012936966694192, + "grad_norm": 1.1239484548568726, + "learning_rate": 2.5326397598366524e-07, + "loss": 0.2915, + "step": 45023 + }, + { + "epoch": 0.9013137151865476, + "grad_norm": 1.1189892292022705, + "learning_rate": 2.531621182492855e-07, + "loss": 0.3074, + "step": 45024 + }, + { + "epoch": 0.9013337337036759, + "grad_norm": 1.0440220832824707, + "learning_rate": 2.5306028046959033e-07, + "loss": 0.2695, + "step": 45025 + }, + { + "epoch": 0.9013537522208043, + "grad_norm": 1.2538689374923706, + "learning_rate": 2.529584626450088e-07, + "loss": 0.3022, + "step": 45026 + }, + { + "epoch": 0.9013737707379326, + "grad_norm": 1.2632410526275635, + "learning_rate": 2.528566647759667e-07, + "loss": 0.3457, + "step": 45027 + }, + { + "epoch": 0.901393789255061, + "grad_norm": 1.0835002660751343, + "learning_rate": 2.5275488686289483e-07, + "loss": 0.2951, + "step": 45028 + }, + { + "epoch": 0.9014138077721893, + "grad_norm": 1.2656432390213013, + "learning_rate": 2.5265312890622006e-07, + "loss": 0.3273, + "step": 45029 + }, + { + "epoch": 0.9014338262893176, + "grad_norm": 1.1941763162612915, + "learning_rate": 2.525513909063693e-07, + "loss": 0.311, + "step": 45030 + }, + { + "epoch": 0.901453844806446, + "grad_norm": 1.0801032781600952, + "learning_rate": 2.524496728637704e-07, + "loss": 0.2952, + "step": 45031 + }, + { + "epoch": 0.9014738633235743, + "grad_norm": 1.320565104484558, + "learning_rate": 2.523479747788521e-07, + "loss": 0.3398, + "step": 45032 + }, + { + "epoch": 0.9014938818407027, + "grad_norm": 1.0494446754455566, + "learning_rate": 2.5224629665204057e-07, + "loss": 0.3043, + "step": 45033 + }, + { + "epoch": 0.901513900357831, + "grad_norm": 1.0482608079910278, + "learning_rate": 2.5214463848376447e-07, + "loss": 0.2736, + "step": 45034 + }, + { + "epoch": 0.9015339188749594, + "grad_norm": 0.926938533782959, + "learning_rate": 2.520430002744506e-07, + "loss": 0.2281, + "step": 45035 + }, + { + "epoch": 0.9015539373920877, + "grad_norm": 1.1051265001296997, + "learning_rate": 2.5194138202452536e-07, + "loss": 0.315, + "step": 45036 + }, + { + "epoch": 0.9015739559092161, + "grad_norm": 1.0820982456207275, + "learning_rate": 2.518397837344172e-07, + "loss": 0.32, + "step": 45037 + }, + { + "epoch": 0.9015939744263444, + "grad_norm": 1.124380111694336, + "learning_rate": 2.5173820540455363e-07, + "loss": 0.2924, + "step": 45038 + }, + { + "epoch": 0.9016139929434727, + "grad_norm": 1.0400487184524536, + "learning_rate": 2.516366470353598e-07, + "loss": 0.2874, + "step": 45039 + }, + { + "epoch": 0.9016340114606011, + "grad_norm": 1.1501301527023315, + "learning_rate": 2.515351086272638e-07, + "loss": 0.2904, + "step": 45040 + }, + { + "epoch": 0.9016540299777294, + "grad_norm": 1.1046785116195679, + "learning_rate": 2.514335901806925e-07, + "loss": 0.2868, + "step": 45041 + }, + { + "epoch": 0.9016740484948578, + "grad_norm": 1.136149525642395, + "learning_rate": 2.5133209169607265e-07, + "loss": 0.2895, + "step": 45042 + }, + { + "epoch": 0.9016940670119861, + "grad_norm": 1.1929336786270142, + "learning_rate": 2.512306131738307e-07, + "loss": 0.2789, + "step": 45043 + }, + { + "epoch": 0.9017140855291145, + "grad_norm": 1.102159857749939, + "learning_rate": 2.511291546143929e-07, + "loss": 0.2793, + "step": 45044 + }, + { + "epoch": 0.9017341040462428, + "grad_norm": 1.0979597568511963, + "learning_rate": 2.5102771601818677e-07, + "loss": 0.3083, + "step": 45045 + }, + { + "epoch": 0.9017541225633711, + "grad_norm": 0.9815446734428406, + "learning_rate": 2.509262973856369e-07, + "loss": 0.2777, + "step": 45046 + }, + { + "epoch": 0.9017741410804995, + "grad_norm": 1.1358578205108643, + "learning_rate": 2.508248987171724e-07, + "loss": 0.3197, + "step": 45047 + }, + { + "epoch": 0.9017941595976278, + "grad_norm": 0.9947620034217834, + "learning_rate": 2.5072352001321744e-07, + "loss": 0.2772, + "step": 45048 + }, + { + "epoch": 0.9018141781147562, + "grad_norm": 1.309170126914978, + "learning_rate": 2.5062216127419823e-07, + "loss": 0.2558, + "step": 45049 + }, + { + "epoch": 0.9018341966318845, + "grad_norm": 1.118869662284851, + "learning_rate": 2.5052082250054175e-07, + "loss": 0.3035, + "step": 45050 + }, + { + "epoch": 0.9018542151490129, + "grad_norm": 1.0734736919403076, + "learning_rate": 2.504195036926738e-07, + "loss": 0.3124, + "step": 45051 + }, + { + "epoch": 0.9018742336661412, + "grad_norm": 1.2677661180496216, + "learning_rate": 2.5031820485102054e-07, + "loss": 0.2985, + "step": 45052 + }, + { + "epoch": 0.9018942521832696, + "grad_norm": 1.1490215063095093, + "learning_rate": 2.502169259760068e-07, + "loss": 0.3048, + "step": 45053 + }, + { + "epoch": 0.9019142707003979, + "grad_norm": 1.9986786842346191, + "learning_rate": 2.501156670680588e-07, + "loss": 0.7529, + "step": 45054 + }, + { + "epoch": 0.9019342892175262, + "grad_norm": 1.1365330219268799, + "learning_rate": 2.500144281276029e-07, + "loss": 0.3056, + "step": 45055 + }, + { + "epoch": 0.9019543077346546, + "grad_norm": 1.1057007312774658, + "learning_rate": 2.4991320915506377e-07, + "loss": 0.3241, + "step": 45056 + }, + { + "epoch": 0.9019743262517829, + "grad_norm": 1.169650673866272, + "learning_rate": 2.4981201015086665e-07, + "loss": 0.3003, + "step": 45057 + }, + { + "epoch": 0.9019943447689113, + "grad_norm": 1.1912121772766113, + "learning_rate": 2.497108311154389e-07, + "loss": 0.3276, + "step": 45058 + }, + { + "epoch": 0.9020143632860396, + "grad_norm": 1.041382074356079, + "learning_rate": 2.4960967204920303e-07, + "loss": 0.3045, + "step": 45059 + }, + { + "epoch": 0.902034381803168, + "grad_norm": 1.4245365858078003, + "learning_rate": 2.4950853295258705e-07, + "loss": 0.3283, + "step": 45060 + }, + { + "epoch": 0.9020544003202963, + "grad_norm": 1.1102126836776733, + "learning_rate": 2.49407413826015e-07, + "loss": 0.3159, + "step": 45061 + }, + { + "epoch": 0.9020744188374246, + "grad_norm": 1.1742024421691895, + "learning_rate": 2.493063146699115e-07, + "loss": 0.278, + "step": 45062 + }, + { + "epoch": 0.902094437354553, + "grad_norm": 1.0798146724700928, + "learning_rate": 2.4920523548470075e-07, + "loss": 0.284, + "step": 45063 + }, + { + "epoch": 0.9021144558716813, + "grad_norm": 1.0504671335220337, + "learning_rate": 2.491041762708102e-07, + "loss": 0.2626, + "step": 45064 + }, + { + "epoch": 0.9021344743888097, + "grad_norm": 1.1287275552749634, + "learning_rate": 2.490031370286633e-07, + "loss": 0.3181, + "step": 45065 + }, + { + "epoch": 0.902154492905938, + "grad_norm": 1.3276801109313965, + "learning_rate": 2.4890211775868425e-07, + "loss": 0.3433, + "step": 45066 + }, + { + "epoch": 0.9021745114230664, + "grad_norm": 1.3686373233795166, + "learning_rate": 2.4880111846129815e-07, + "loss": 0.2706, + "step": 45067 + }, + { + "epoch": 0.9021945299401947, + "grad_norm": 1.3785649538040161, + "learning_rate": 2.487001391369298e-07, + "loss": 0.3362, + "step": 45068 + }, + { + "epoch": 0.9022145484573231, + "grad_norm": 1.1200447082519531, + "learning_rate": 2.4859917978600436e-07, + "loss": 0.3279, + "step": 45069 + }, + { + "epoch": 0.9022345669744514, + "grad_norm": 1.0758204460144043, + "learning_rate": 2.4849824040894423e-07, + "loss": 0.2993, + "step": 45070 + }, + { + "epoch": 0.9022545854915797, + "grad_norm": 1.9618852138519287, + "learning_rate": 2.4839732100617577e-07, + "loss": 0.7171, + "step": 45071 + }, + { + "epoch": 0.9022746040087081, + "grad_norm": 1.0668200254440308, + "learning_rate": 2.482964215781214e-07, + "loss": 0.2966, + "step": 45072 + }, + { + "epoch": 0.9022946225258364, + "grad_norm": 1.045182466506958, + "learning_rate": 2.4819554212520744e-07, + "loss": 0.2659, + "step": 45073 + }, + { + "epoch": 0.9023146410429648, + "grad_norm": 1.1623731851577759, + "learning_rate": 2.480946826478564e-07, + "loss": 0.2718, + "step": 45074 + }, + { + "epoch": 0.9023346595600931, + "grad_norm": 1.2481188774108887, + "learning_rate": 2.4799384314649287e-07, + "loss": 0.291, + "step": 45075 + }, + { + "epoch": 0.9023546780772215, + "grad_norm": 1.2120105028152466, + "learning_rate": 2.478930236215399e-07, + "loss": 0.3163, + "step": 45076 + }, + { + "epoch": 0.9023746965943498, + "grad_norm": 1.9345406293869019, + "learning_rate": 2.477922240734226e-07, + "loss": 0.7913, + "step": 45077 + }, + { + "epoch": 0.9023947151114781, + "grad_norm": 1.0913822650909424, + "learning_rate": 2.4769144450256467e-07, + "loss": 0.2504, + "step": 45078 + }, + { + "epoch": 0.9024147336286065, + "grad_norm": 1.9728572368621826, + "learning_rate": 2.4759068490938844e-07, + "loss": 0.7406, + "step": 45079 + }, + { + "epoch": 0.9024347521457348, + "grad_norm": 1.371781349182129, + "learning_rate": 2.47489945294318e-07, + "loss": 0.321, + "step": 45080 + }, + { + "epoch": 0.9024547706628632, + "grad_norm": 1.1163257360458374, + "learning_rate": 2.4738922565777756e-07, + "loss": 0.2654, + "step": 45081 + }, + { + "epoch": 0.9024747891799915, + "grad_norm": 1.356658697128296, + "learning_rate": 2.4728852600019004e-07, + "loss": 0.2928, + "step": 45082 + }, + { + "epoch": 0.9024948076971199, + "grad_norm": 0.9817155599594116, + "learning_rate": 2.4718784632197845e-07, + "loss": 0.3037, + "step": 45083 + }, + { + "epoch": 0.9025148262142482, + "grad_norm": 1.032581090927124, + "learning_rate": 2.470871866235669e-07, + "loss": 0.3096, + "step": 45084 + }, + { + "epoch": 0.9025348447313766, + "grad_norm": 1.0355019569396973, + "learning_rate": 2.469865469053767e-07, + "loss": 0.2468, + "step": 45085 + }, + { + "epoch": 0.9025548632485049, + "grad_norm": 1.0952564477920532, + "learning_rate": 2.4688592716783365e-07, + "loss": 0.3053, + "step": 45086 + }, + { + "epoch": 0.9025748817656332, + "grad_norm": 1.1079128980636597, + "learning_rate": 2.467853274113585e-07, + "loss": 0.2697, + "step": 45087 + }, + { + "epoch": 0.9025949002827616, + "grad_norm": 1.0141520500183105, + "learning_rate": 2.466847476363754e-07, + "loss": 0.2916, + "step": 45088 + }, + { + "epoch": 0.9026149187998899, + "grad_norm": 1.1207736730575562, + "learning_rate": 2.465841878433062e-07, + "loss": 0.2867, + "step": 45089 + }, + { + "epoch": 0.9026349373170183, + "grad_norm": 0.9921389818191528, + "learning_rate": 2.464836480325744e-07, + "loss": 0.2884, + "step": 45090 + }, + { + "epoch": 0.9026549558341466, + "grad_norm": 1.191131353378296, + "learning_rate": 2.4638312820460253e-07, + "loss": 0.3665, + "step": 45091 + }, + { + "epoch": 0.902674974351275, + "grad_norm": 1.1537927389144897, + "learning_rate": 2.462826283598135e-07, + "loss": 0.2899, + "step": 45092 + }, + { + "epoch": 0.9026949928684033, + "grad_norm": 1.0025110244750977, + "learning_rate": 2.4618214849862877e-07, + "loss": 0.2656, + "step": 45093 + }, + { + "epoch": 0.9027150113855316, + "grad_norm": 1.1184489727020264, + "learning_rate": 2.4608168862147064e-07, + "loss": 0.2839, + "step": 45094 + }, + { + "epoch": 0.90273502990266, + "grad_norm": 1.0565788745880127, + "learning_rate": 2.459812487287627e-07, + "loss": 0.3055, + "step": 45095 + }, + { + "epoch": 0.9027550484197883, + "grad_norm": 1.1781575679779053, + "learning_rate": 2.4588082882092577e-07, + "loss": 0.2989, + "step": 45096 + }, + { + "epoch": 0.9027750669369167, + "grad_norm": 1.0861377716064453, + "learning_rate": 2.457804288983834e-07, + "loss": 0.2987, + "step": 45097 + }, + { + "epoch": 0.902795085454045, + "grad_norm": 1.2112327814102173, + "learning_rate": 2.456800489615563e-07, + "loss": 0.2671, + "step": 45098 + }, + { + "epoch": 0.9028151039711734, + "grad_norm": 1.7589012384414673, + "learning_rate": 2.4557968901086806e-07, + "loss": 0.7714, + "step": 45099 + }, + { + "epoch": 0.9028351224883017, + "grad_norm": 1.186907172203064, + "learning_rate": 2.454793490467394e-07, + "loss": 0.3056, + "step": 45100 + }, + { + "epoch": 0.9028551410054301, + "grad_norm": 1.0839083194732666, + "learning_rate": 2.4537902906959235e-07, + "loss": 0.3238, + "step": 45101 + }, + { + "epoch": 0.9028751595225584, + "grad_norm": 1.0386978387832642, + "learning_rate": 2.452787290798481e-07, + "loss": 0.3086, + "step": 45102 + }, + { + "epoch": 0.9028951780396867, + "grad_norm": 1.1112313270568848, + "learning_rate": 2.4517844907792964e-07, + "loss": 0.2822, + "step": 45103 + }, + { + "epoch": 0.9029151965568151, + "grad_norm": 1.8570780754089355, + "learning_rate": 2.4507818906425784e-07, + "loss": 0.744, + "step": 45104 + }, + { + "epoch": 0.9029352150739434, + "grad_norm": 1.6409261226654053, + "learning_rate": 2.4497794903925345e-07, + "loss": 0.7545, + "step": 45105 + }, + { + "epoch": 0.9029552335910718, + "grad_norm": 1.830622911453247, + "learning_rate": 2.448777290033388e-07, + "loss": 0.7324, + "step": 45106 + }, + { + "epoch": 0.9029752521082001, + "grad_norm": 0.9696758985519409, + "learning_rate": 2.447775289569343e-07, + "loss": 0.2929, + "step": 45107 + }, + { + "epoch": 0.9029952706253285, + "grad_norm": 1.0441339015960693, + "learning_rate": 2.4467734890046226e-07, + "loss": 0.2838, + "step": 45108 + }, + { + "epoch": 0.9030152891424568, + "grad_norm": 1.1718246936798096, + "learning_rate": 2.445771888343429e-07, + "loss": 0.2877, + "step": 45109 + }, + { + "epoch": 0.9030353076595851, + "grad_norm": 1.084403157234192, + "learning_rate": 2.4447704875899813e-07, + "loss": 0.3154, + "step": 45110 + }, + { + "epoch": 0.9030553261767135, + "grad_norm": 1.0683341026306152, + "learning_rate": 2.4437692867484766e-07, + "loss": 0.2694, + "step": 45111 + }, + { + "epoch": 0.9030753446938418, + "grad_norm": 1.0468841791152954, + "learning_rate": 2.442768285823138e-07, + "loss": 0.3189, + "step": 45112 + }, + { + "epoch": 0.9030953632109702, + "grad_norm": 1.1146724224090576, + "learning_rate": 2.441767484818169e-07, + "loss": 0.2807, + "step": 45113 + }, + { + "epoch": 0.9031153817280985, + "grad_norm": 1.1144616603851318, + "learning_rate": 2.4407668837377775e-07, + "loss": 0.3162, + "step": 45114 + }, + { + "epoch": 0.9031354002452269, + "grad_norm": 1.2185485363006592, + "learning_rate": 2.439766482586164e-07, + "loss": 0.3324, + "step": 45115 + }, + { + "epoch": 0.9031554187623552, + "grad_norm": 1.2698910236358643, + "learning_rate": 2.438766281367527e-07, + "loss": 0.3204, + "step": 45116 + }, + { + "epoch": 0.9031754372794836, + "grad_norm": 1.0710550546646118, + "learning_rate": 2.437766280086096e-07, + "loss": 0.3522, + "step": 45117 + }, + { + "epoch": 0.9031954557966119, + "grad_norm": 1.0799537897109985, + "learning_rate": 2.4367664787460555e-07, + "loss": 0.2768, + "step": 45118 + }, + { + "epoch": 0.9032154743137402, + "grad_norm": 1.0538564920425415, + "learning_rate": 2.435766877351614e-07, + "loss": 0.2785, + "step": 45119 + }, + { + "epoch": 0.9032354928308686, + "grad_norm": 1.1303116083145142, + "learning_rate": 2.434767475906963e-07, + "loss": 0.2811, + "step": 45120 + }, + { + "epoch": 0.9032555113479969, + "grad_norm": 1.958365797996521, + "learning_rate": 2.4337682744163205e-07, + "loss": 0.6787, + "step": 45121 + }, + { + "epoch": 0.9032755298651253, + "grad_norm": 1.0466923713684082, + "learning_rate": 2.432769272883878e-07, + "loss": 0.2747, + "step": 45122 + }, + { + "epoch": 0.9032955483822536, + "grad_norm": 1.0222746133804321, + "learning_rate": 2.431770471313838e-07, + "loss": 0.2602, + "step": 45123 + }, + { + "epoch": 0.903315566899382, + "grad_norm": 1.123231053352356, + "learning_rate": 2.430771869710391e-07, + "loss": 0.3249, + "step": 45124 + }, + { + "epoch": 0.9033355854165103, + "grad_norm": 1.0984537601470947, + "learning_rate": 2.4297734680777565e-07, + "loss": 0.2836, + "step": 45125 + }, + { + "epoch": 0.9033556039336386, + "grad_norm": 0.9164889454841614, + "learning_rate": 2.4287752664201083e-07, + "loss": 0.2733, + "step": 45126 + }, + { + "epoch": 0.903375622450767, + "grad_norm": 1.2243428230285645, + "learning_rate": 2.427777264741654e-07, + "loss": 0.3131, + "step": 45127 + }, + { + "epoch": 0.9033956409678953, + "grad_norm": 1.8830440044403076, + "learning_rate": 2.4267794630465913e-07, + "loss": 0.7605, + "step": 45128 + }, + { + "epoch": 0.9034156594850237, + "grad_norm": 1.4551151990890503, + "learning_rate": 2.4257818613390996e-07, + "loss": 0.275, + "step": 45129 + }, + { + "epoch": 0.903435678002152, + "grad_norm": 1.0189863443374634, + "learning_rate": 2.424784459623386e-07, + "loss": 0.2482, + "step": 45130 + }, + { + "epoch": 0.9034556965192804, + "grad_norm": 1.2576247453689575, + "learning_rate": 2.423787257903648e-07, + "loss": 0.3255, + "step": 45131 + }, + { + "epoch": 0.9034757150364087, + "grad_norm": 1.1020820140838623, + "learning_rate": 2.4227902561840656e-07, + "loss": 0.3005, + "step": 45132 + }, + { + "epoch": 0.9034957335535371, + "grad_norm": 1.1330740451812744, + "learning_rate": 2.421793454468824e-07, + "loss": 0.3049, + "step": 45133 + }, + { + "epoch": 0.9035157520706654, + "grad_norm": 1.0720399618148804, + "learning_rate": 2.420796852762136e-07, + "loss": 0.2385, + "step": 45134 + }, + { + "epoch": 0.9035357705877937, + "grad_norm": 1.117697834968567, + "learning_rate": 2.4198004510681716e-07, + "loss": 0.3093, + "step": 45135 + }, + { + "epoch": 0.9035557891049221, + "grad_norm": 1.8762321472167969, + "learning_rate": 2.4188042493911326e-07, + "loss": 0.804, + "step": 45136 + }, + { + "epoch": 0.9035758076220504, + "grad_norm": 1.104445457458496, + "learning_rate": 2.417808247735198e-07, + "loss": 0.2844, + "step": 45137 + }, + { + "epoch": 0.9035958261391788, + "grad_norm": 1.1769604682922363, + "learning_rate": 2.416812446104555e-07, + "loss": 0.2689, + "step": 45138 + }, + { + "epoch": 0.9036158446563071, + "grad_norm": 1.163915991783142, + "learning_rate": 2.4158168445033935e-07, + "loss": 0.3113, + "step": 45139 + }, + { + "epoch": 0.9036358631734355, + "grad_norm": 1.8876513242721558, + "learning_rate": 2.414821442935905e-07, + "loss": 0.7915, + "step": 45140 + }, + { + "epoch": 0.9036558816905638, + "grad_norm": 1.0932728052139282, + "learning_rate": 2.413826241406264e-07, + "loss": 0.3282, + "step": 45141 + }, + { + "epoch": 0.9036759002076921, + "grad_norm": 1.3417717218399048, + "learning_rate": 2.4128312399186446e-07, + "loss": 0.2717, + "step": 45142 + }, + { + "epoch": 0.9036959187248205, + "grad_norm": 1.0547080039978027, + "learning_rate": 2.4118364384772553e-07, + "loss": 0.3059, + "step": 45143 + }, + { + "epoch": 0.9037159372419488, + "grad_norm": 1.156641960144043, + "learning_rate": 2.410841837086264e-07, + "loss": 0.3154, + "step": 45144 + }, + { + "epoch": 0.9037359557590772, + "grad_norm": 1.1734659671783447, + "learning_rate": 2.4098474357498516e-07, + "loss": 0.2548, + "step": 45145 + }, + { + "epoch": 0.9037559742762055, + "grad_norm": 1.493592381477356, + "learning_rate": 2.408853234472197e-07, + "loss": 0.304, + "step": 45146 + }, + { + "epoch": 0.9037759927933339, + "grad_norm": 1.212181806564331, + "learning_rate": 2.407859233257476e-07, + "loss": 0.2993, + "step": 45147 + }, + { + "epoch": 0.9037960113104622, + "grad_norm": 1.1738663911819458, + "learning_rate": 2.406865432109873e-07, + "loss": 0.3032, + "step": 45148 + }, + { + "epoch": 0.9038160298275906, + "grad_norm": 1.1358349323272705, + "learning_rate": 2.405871831033574e-07, + "loss": 0.3217, + "step": 45149 + }, + { + "epoch": 0.9038360483447189, + "grad_norm": 1.1444867849349976, + "learning_rate": 2.4048784300327475e-07, + "loss": 0.2972, + "step": 45150 + }, + { + "epoch": 0.9038560668618472, + "grad_norm": 0.9721025228500366, + "learning_rate": 2.4038852291115576e-07, + "loss": 0.2997, + "step": 45151 + }, + { + "epoch": 0.9038760853789756, + "grad_norm": 1.161059856414795, + "learning_rate": 2.402892228274206e-07, + "loss": 0.3182, + "step": 45152 + }, + { + "epoch": 0.9038961038961039, + "grad_norm": 1.0892621278762817, + "learning_rate": 2.401899427524845e-07, + "loss": 0.3028, + "step": 45153 + }, + { + "epoch": 0.9039161224132323, + "grad_norm": 1.050843596458435, + "learning_rate": 2.4009068268676607e-07, + "loss": 0.2821, + "step": 45154 + }, + { + "epoch": 0.9039361409303606, + "grad_norm": 1.1077176332473755, + "learning_rate": 2.39991442630681e-07, + "loss": 0.2873, + "step": 45155 + }, + { + "epoch": 0.903956159447489, + "grad_norm": 1.1610445976257324, + "learning_rate": 2.3989222258464837e-07, + "loss": 0.2742, + "step": 45156 + }, + { + "epoch": 0.9039761779646173, + "grad_norm": 1.1565734148025513, + "learning_rate": 2.3979302254908467e-07, + "loss": 0.3133, + "step": 45157 + }, + { + "epoch": 0.9039961964817456, + "grad_norm": 1.0737135410308838, + "learning_rate": 2.3969384252440663e-07, + "loss": 0.328, + "step": 45158 + }, + { + "epoch": 0.904016214998874, + "grad_norm": 1.1361124515533447, + "learning_rate": 2.3959468251103126e-07, + "loss": 0.2598, + "step": 45159 + }, + { + "epoch": 0.9040362335160023, + "grad_norm": 1.148627519607544, + "learning_rate": 2.3949554250937424e-07, + "loss": 0.2826, + "step": 45160 + }, + { + "epoch": 0.9040562520331307, + "grad_norm": 1.0696591138839722, + "learning_rate": 2.3939642251985417e-07, + "loss": 0.2535, + "step": 45161 + }, + { + "epoch": 0.904076270550259, + "grad_norm": 1.08328378200531, + "learning_rate": 2.392973225428874e-07, + "loss": 0.3178, + "step": 45162 + }, + { + "epoch": 0.9040962890673874, + "grad_norm": 1.8358938694000244, + "learning_rate": 2.391982425788902e-07, + "loss": 0.7531, + "step": 45163 + }, + { + "epoch": 0.9041163075845157, + "grad_norm": 1.1183091402053833, + "learning_rate": 2.390991826282779e-07, + "loss": 0.2619, + "step": 45164 + }, + { + "epoch": 0.9041363261016441, + "grad_norm": 1.0897042751312256, + "learning_rate": 2.390001426914695e-07, + "loss": 0.3429, + "step": 45165 + }, + { + "epoch": 0.9041563446187724, + "grad_norm": 1.169759750366211, + "learning_rate": 2.3890112276887975e-07, + "loss": 0.2911, + "step": 45166 + }, + { + "epoch": 0.9041763631359007, + "grad_norm": 1.0984089374542236, + "learning_rate": 2.38802122860925e-07, + "loss": 0.3004, + "step": 45167 + }, + { + "epoch": 0.9041963816530291, + "grad_norm": 1.9743667840957642, + "learning_rate": 2.3870314296802144e-07, + "loss": 0.8454, + "step": 45168 + }, + { + "epoch": 0.9042164001701574, + "grad_norm": 1.125244140625, + "learning_rate": 2.386041830905844e-07, + "loss": 0.2596, + "step": 45169 + }, + { + "epoch": 0.9042364186872858, + "grad_norm": 1.0337239503860474, + "learning_rate": 2.3850524322903134e-07, + "loss": 0.2215, + "step": 45170 + }, + { + "epoch": 0.9042564372044141, + "grad_norm": 1.1541897058486938, + "learning_rate": 2.38406323383778e-07, + "loss": 0.2425, + "step": 45171 + }, + { + "epoch": 0.9042764557215425, + "grad_norm": 1.8971761465072632, + "learning_rate": 2.3830742355523962e-07, + "loss": 0.7219, + "step": 45172 + }, + { + "epoch": 0.9042964742386708, + "grad_norm": 1.1530135869979858, + "learning_rate": 2.382085437438314e-07, + "loss": 0.3079, + "step": 45173 + }, + { + "epoch": 0.9043164927557991, + "grad_norm": 1.1648653745651245, + "learning_rate": 2.381096839499697e-07, + "loss": 0.3372, + "step": 45174 + }, + { + "epoch": 0.9043365112729275, + "grad_norm": 1.0482826232910156, + "learning_rate": 2.3801084417407082e-07, + "loss": 0.3017, + "step": 45175 + }, + { + "epoch": 0.9043565297900558, + "grad_norm": 1.1638977527618408, + "learning_rate": 2.3791202441654947e-07, + "loss": 0.3045, + "step": 45176 + }, + { + "epoch": 0.9043765483071842, + "grad_norm": 1.319924235343933, + "learning_rate": 2.3781322467782086e-07, + "loss": 0.2994, + "step": 45177 + }, + { + "epoch": 0.9043965668243125, + "grad_norm": 1.0358916521072388, + "learning_rate": 2.377144449583002e-07, + "loss": 0.2782, + "step": 45178 + }, + { + "epoch": 0.9044165853414409, + "grad_norm": 1.1493206024169922, + "learning_rate": 2.3761568525840384e-07, + "loss": 0.3109, + "step": 45179 + }, + { + "epoch": 0.9044366038585692, + "grad_norm": 1.099380612373352, + "learning_rate": 2.3751694557854643e-07, + "loss": 0.2873, + "step": 45180 + }, + { + "epoch": 0.9044566223756976, + "grad_norm": 0.9474595785140991, + "learning_rate": 2.3741822591914265e-07, + "loss": 0.2463, + "step": 45181 + }, + { + "epoch": 0.9044766408928259, + "grad_norm": 1.1296888589859009, + "learning_rate": 2.3731952628060718e-07, + "loss": 0.2843, + "step": 45182 + }, + { + "epoch": 0.9044966594099542, + "grad_norm": 1.0919716358184814, + "learning_rate": 2.3722084666335632e-07, + "loss": 0.3097, + "step": 45183 + }, + { + "epoch": 0.9045166779270826, + "grad_norm": 1.1210505962371826, + "learning_rate": 2.371221870678042e-07, + "loss": 0.321, + "step": 45184 + }, + { + "epoch": 0.9045366964442109, + "grad_norm": 1.1448003053665161, + "learning_rate": 2.3702354749436495e-07, + "loss": 0.2688, + "step": 45185 + }, + { + "epoch": 0.9045567149613393, + "grad_norm": 1.0623347759246826, + "learning_rate": 2.3692492794345323e-07, + "loss": 0.3157, + "step": 45186 + }, + { + "epoch": 0.9045767334784676, + "grad_norm": 1.1074752807617188, + "learning_rate": 2.368263284154837e-07, + "loss": 0.2907, + "step": 45187 + }, + { + "epoch": 0.904596751995596, + "grad_norm": 1.1219794750213623, + "learning_rate": 2.367277489108727e-07, + "loss": 0.2986, + "step": 45188 + }, + { + "epoch": 0.9046167705127243, + "grad_norm": 1.4064053297042847, + "learning_rate": 2.3662918943003266e-07, + "loss": 0.2917, + "step": 45189 + }, + { + "epoch": 0.9046367890298526, + "grad_norm": 1.1672383546829224, + "learning_rate": 2.3653064997337827e-07, + "loss": 0.329, + "step": 45190 + }, + { + "epoch": 0.904656807546981, + "grad_norm": 1.883473515510559, + "learning_rate": 2.3643213054132365e-07, + "loss": 0.733, + "step": 45191 + }, + { + "epoch": 0.9046768260641093, + "grad_norm": 1.9123696088790894, + "learning_rate": 2.3633363113428343e-07, + "loss": 0.7469, + "step": 45192 + }, + { + "epoch": 0.9046968445812377, + "grad_norm": 1.087920069694519, + "learning_rate": 2.3623515175267175e-07, + "loss": 0.2572, + "step": 45193 + }, + { + "epoch": 0.904716863098366, + "grad_norm": 1.0485271215438843, + "learning_rate": 2.3613669239690274e-07, + "loss": 0.2664, + "step": 45194 + }, + { + "epoch": 0.9047368816154944, + "grad_norm": 1.142441987991333, + "learning_rate": 2.360382530673888e-07, + "loss": 0.2685, + "step": 45195 + }, + { + "epoch": 0.9047569001326227, + "grad_norm": 2.0196313858032227, + "learning_rate": 2.359398337645452e-07, + "loss": 0.7202, + "step": 45196 + }, + { + "epoch": 0.9047769186497511, + "grad_norm": 1.1706411838531494, + "learning_rate": 2.3584143448878548e-07, + "loss": 0.3137, + "step": 45197 + }, + { + "epoch": 0.9047969371668794, + "grad_norm": 2.1119027137756348, + "learning_rate": 2.3574305524052378e-07, + "loss": 0.7494, + "step": 45198 + }, + { + "epoch": 0.9048169556840077, + "grad_norm": 1.145102858543396, + "learning_rate": 2.3564469602017137e-07, + "loss": 0.3337, + "step": 45199 + }, + { + "epoch": 0.9048369742011361, + "grad_norm": 1.0626386404037476, + "learning_rate": 2.355463568281441e-07, + "loss": 0.3101, + "step": 45200 + }, + { + "epoch": 0.9048569927182644, + "grad_norm": 1.118269920349121, + "learning_rate": 2.354480376648549e-07, + "loss": 0.2668, + "step": 45201 + }, + { + "epoch": 0.9048770112353928, + "grad_norm": 1.083713412284851, + "learning_rate": 2.3534973853071686e-07, + "loss": 0.2974, + "step": 45202 + }, + { + "epoch": 0.9048970297525211, + "grad_norm": 1.957718849182129, + "learning_rate": 2.352514594261429e-07, + "loss": 0.679, + "step": 45203 + }, + { + "epoch": 0.9049170482696495, + "grad_norm": 1.1735302209854126, + "learning_rate": 2.351532003515461e-07, + "loss": 0.3231, + "step": 45204 + }, + { + "epoch": 0.9049370667867778, + "grad_norm": 2.0762808322906494, + "learning_rate": 2.350549613073405e-07, + "loss": 0.7167, + "step": 45205 + }, + { + "epoch": 0.9049570853039061, + "grad_norm": 1.1378757953643799, + "learning_rate": 2.3495674229393806e-07, + "loss": 0.2555, + "step": 45206 + }, + { + "epoch": 0.9049771038210345, + "grad_norm": 2.0972816944122314, + "learning_rate": 2.3485854331175228e-07, + "loss": 0.7032, + "step": 45207 + }, + { + "epoch": 0.9049971223381628, + "grad_norm": 1.0497748851776123, + "learning_rate": 2.347603643611962e-07, + "loss": 0.3261, + "step": 45208 + }, + { + "epoch": 0.9050171408552912, + "grad_norm": 1.1462780237197876, + "learning_rate": 2.3466220544268058e-07, + "loss": 0.3144, + "step": 45209 + }, + { + "epoch": 0.9050371593724195, + "grad_norm": 2.1912481784820557, + "learning_rate": 2.3456406655662067e-07, + "loss": 0.7887, + "step": 45210 + }, + { + "epoch": 0.9050571778895479, + "grad_norm": 2.0962886810302734, + "learning_rate": 2.344659477034278e-07, + "loss": 0.7699, + "step": 45211 + }, + { + "epoch": 0.9050771964066762, + "grad_norm": 1.129006266593933, + "learning_rate": 2.3436784888351384e-07, + "loss": 0.3313, + "step": 45212 + }, + { + "epoch": 0.9050972149238046, + "grad_norm": 1.0470528602600098, + "learning_rate": 2.342697700972918e-07, + "loss": 0.2435, + "step": 45213 + }, + { + "epoch": 0.9051172334409329, + "grad_norm": 1.192152976989746, + "learning_rate": 2.3417171134517469e-07, + "loss": 0.3213, + "step": 45214 + }, + { + "epoch": 0.9051372519580612, + "grad_norm": 1.059415340423584, + "learning_rate": 2.3407367262757442e-07, + "loss": 0.3264, + "step": 45215 + }, + { + "epoch": 0.9051572704751896, + "grad_norm": 1.273194432258606, + "learning_rate": 2.339756539449023e-07, + "loss": 0.2633, + "step": 45216 + }, + { + "epoch": 0.9051772889923179, + "grad_norm": 1.026776909828186, + "learning_rate": 2.338776552975708e-07, + "loss": 0.2658, + "step": 45217 + }, + { + "epoch": 0.9051973075094463, + "grad_norm": 1.055187702178955, + "learning_rate": 2.3377967668599233e-07, + "loss": 0.2886, + "step": 45218 + }, + { + "epoch": 0.9052173260265746, + "grad_norm": 1.1289761066436768, + "learning_rate": 2.3368171811057827e-07, + "loss": 0.2788, + "step": 45219 + }, + { + "epoch": 0.905237344543703, + "grad_norm": 1.0544624328613281, + "learning_rate": 2.335837795717405e-07, + "loss": 0.2803, + "step": 45220 + }, + { + "epoch": 0.9052573630608313, + "grad_norm": 1.0723552703857422, + "learning_rate": 2.3348586106989091e-07, + "loss": 0.3098, + "step": 45221 + }, + { + "epoch": 0.9052773815779596, + "grad_norm": 1.1784496307373047, + "learning_rate": 2.3338796260544027e-07, + "loss": 0.3342, + "step": 45222 + }, + { + "epoch": 0.905297400095088, + "grad_norm": 1.1191600561141968, + "learning_rate": 2.3329008417880162e-07, + "loss": 0.3187, + "step": 45223 + }, + { + "epoch": 0.9053174186122163, + "grad_norm": 1.1343530416488647, + "learning_rate": 2.331922257903857e-07, + "loss": 0.301, + "step": 45224 + }, + { + "epoch": 0.9053374371293447, + "grad_norm": 1.0820616483688354, + "learning_rate": 2.3309438744060275e-07, + "loss": 0.2864, + "step": 45225 + }, + { + "epoch": 0.905357455646473, + "grad_norm": 2.1413369178771973, + "learning_rate": 2.329965691298658e-07, + "loss": 0.7007, + "step": 45226 + }, + { + "epoch": 0.9053774741636014, + "grad_norm": 1.1509745121002197, + "learning_rate": 2.328987708585856e-07, + "loss": 0.3189, + "step": 45227 + }, + { + "epoch": 0.9053974926807297, + "grad_norm": 1.1597967147827148, + "learning_rate": 2.3280099262717294e-07, + "loss": 0.2962, + "step": 45228 + }, + { + "epoch": 0.9054175111978581, + "grad_norm": 1.1259888410568237, + "learning_rate": 2.3270323443603916e-07, + "loss": 0.2626, + "step": 45229 + }, + { + "epoch": 0.9054375297149864, + "grad_norm": 1.0852024555206299, + "learning_rate": 2.326054962855956e-07, + "loss": 0.2967, + "step": 45230 + }, + { + "epoch": 0.9054575482321147, + "grad_norm": 1.0732204914093018, + "learning_rate": 2.3250777817625135e-07, + "loss": 0.3256, + "step": 45231 + }, + { + "epoch": 0.9054775667492431, + "grad_norm": 1.1510802507400513, + "learning_rate": 2.3241008010841892e-07, + "loss": 0.2739, + "step": 45232 + }, + { + "epoch": 0.9054975852663714, + "grad_norm": 1.0310287475585938, + "learning_rate": 2.3231240208250905e-07, + "loss": 0.3304, + "step": 45233 + }, + { + "epoch": 0.9055176037834998, + "grad_norm": 1.1711323261260986, + "learning_rate": 2.322147440989314e-07, + "loss": 0.2756, + "step": 45234 + }, + { + "epoch": 0.9055376223006281, + "grad_norm": 1.2385565042495728, + "learning_rate": 2.3211710615809624e-07, + "loss": 0.3246, + "step": 45235 + }, + { + "epoch": 0.9055576408177565, + "grad_norm": 1.2901216745376587, + "learning_rate": 2.3201948826041597e-07, + "loss": 0.2994, + "step": 45236 + }, + { + "epoch": 0.9055776593348848, + "grad_norm": 2.3118834495544434, + "learning_rate": 2.319218904062992e-07, + "loss": 0.7351, + "step": 45237 + }, + { + "epoch": 0.9055976778520131, + "grad_norm": 1.079317331314087, + "learning_rate": 2.3182431259615613e-07, + "loss": 0.3192, + "step": 45238 + }, + { + "epoch": 0.9056176963691415, + "grad_norm": 1.1178054809570312, + "learning_rate": 2.3172675483039754e-07, + "loss": 0.3135, + "step": 45239 + }, + { + "epoch": 0.9056377148862698, + "grad_norm": 1.2257720232009888, + "learning_rate": 2.316292171094342e-07, + "loss": 0.3, + "step": 45240 + }, + { + "epoch": 0.9056577334033982, + "grad_norm": 1.125600814819336, + "learning_rate": 2.3153169943367526e-07, + "loss": 0.302, + "step": 45241 + }, + { + "epoch": 0.9056777519205265, + "grad_norm": 1.0181055068969727, + "learning_rate": 2.3143420180353094e-07, + "loss": 0.2854, + "step": 45242 + }, + { + "epoch": 0.9056977704376549, + "grad_norm": 1.1132216453552246, + "learning_rate": 2.313367242194109e-07, + "loss": 0.2958, + "step": 45243 + }, + { + "epoch": 0.9057177889547832, + "grad_norm": 1.2297600507736206, + "learning_rate": 2.3123926668172482e-07, + "loss": 0.3326, + "step": 45244 + }, + { + "epoch": 0.9057378074719116, + "grad_norm": 1.0316977500915527, + "learning_rate": 2.311418291908829e-07, + "loss": 0.2636, + "step": 45245 + }, + { + "epoch": 0.9057578259890399, + "grad_norm": 1.165908694267273, + "learning_rate": 2.3104441174729487e-07, + "loss": 0.2911, + "step": 45246 + }, + { + "epoch": 0.9057778445061682, + "grad_norm": 1.154542326927185, + "learning_rate": 2.309470143513698e-07, + "loss": 0.3208, + "step": 45247 + }, + { + "epoch": 0.9057978630232966, + "grad_norm": 1.0815837383270264, + "learning_rate": 2.3084963700351626e-07, + "loss": 0.295, + "step": 45248 + }, + { + "epoch": 0.9058178815404249, + "grad_norm": 1.1135938167572021, + "learning_rate": 2.3075227970414503e-07, + "loss": 0.297, + "step": 45249 + }, + { + "epoch": 0.9058379000575533, + "grad_norm": 1.2810173034667969, + "learning_rate": 2.3065494245366526e-07, + "loss": 0.3107, + "step": 45250 + }, + { + "epoch": 0.9058579185746816, + "grad_norm": 1.1013158559799194, + "learning_rate": 2.3055762525248438e-07, + "loss": 0.2927, + "step": 45251 + }, + { + "epoch": 0.90587793709181, + "grad_norm": 1.072710633277893, + "learning_rate": 2.3046032810101428e-07, + "loss": 0.2953, + "step": 45252 + }, + { + "epoch": 0.9058979556089383, + "grad_norm": 1.1127614974975586, + "learning_rate": 2.3036305099966183e-07, + "loss": 0.2637, + "step": 45253 + }, + { + "epoch": 0.9059179741260666, + "grad_norm": 1.0602139234542847, + "learning_rate": 2.3026579394883675e-07, + "loss": 0.2738, + "step": 45254 + }, + { + "epoch": 0.905937992643195, + "grad_norm": 1.094612956047058, + "learning_rate": 2.3016855694894814e-07, + "loss": 0.2927, + "step": 45255 + }, + { + "epoch": 0.9059580111603233, + "grad_norm": 1.9961528778076172, + "learning_rate": 2.3007134000040455e-07, + "loss": 0.6876, + "step": 45256 + }, + { + "epoch": 0.9059780296774517, + "grad_norm": 1.0017307996749878, + "learning_rate": 2.29974143103614e-07, + "loss": 0.2713, + "step": 45257 + }, + { + "epoch": 0.90599804819458, + "grad_norm": 1.8929076194763184, + "learning_rate": 2.2987696625898614e-07, + "loss": 0.6929, + "step": 45258 + }, + { + "epoch": 0.9060180667117084, + "grad_norm": 1.2210339307785034, + "learning_rate": 2.29779809466929e-07, + "loss": 0.3305, + "step": 45259 + }, + { + "epoch": 0.9060380852288367, + "grad_norm": 1.254391074180603, + "learning_rate": 2.296826727278506e-07, + "loss": 0.339, + "step": 45260 + }, + { + "epoch": 0.9060581037459651, + "grad_norm": 0.9897557497024536, + "learning_rate": 2.2958555604216004e-07, + "loss": 0.2972, + "step": 45261 + }, + { + "epoch": 0.9060781222630934, + "grad_norm": 1.0645461082458496, + "learning_rate": 2.2948845941026421e-07, + "loss": 0.2989, + "step": 45262 + }, + { + "epoch": 0.9060981407802217, + "grad_norm": 1.11197829246521, + "learning_rate": 2.2939138283257333e-07, + "loss": 0.3148, + "step": 45263 + }, + { + "epoch": 0.9061181592973501, + "grad_norm": 1.0095545053482056, + "learning_rate": 2.2929432630949378e-07, + "loss": 0.272, + "step": 45264 + }, + { + "epoch": 0.9061381778144784, + "grad_norm": 1.876800537109375, + "learning_rate": 2.2919728984143464e-07, + "loss": 0.7771, + "step": 45265 + }, + { + "epoch": 0.9061581963316068, + "grad_norm": 1.0440393686294556, + "learning_rate": 2.2910027342880282e-07, + "loss": 0.2979, + "step": 45266 + }, + { + "epoch": 0.9061782148487351, + "grad_norm": 0.939526379108429, + "learning_rate": 2.2900327707200744e-07, + "loss": 0.2601, + "step": 45267 + }, + { + "epoch": 0.9061982333658635, + "grad_norm": 1.2750048637390137, + "learning_rate": 2.2890630077145538e-07, + "loss": 0.3092, + "step": 45268 + }, + { + "epoch": 0.9062182518829918, + "grad_norm": 1.1204885244369507, + "learning_rate": 2.2880934452755464e-07, + "loss": 0.2623, + "step": 45269 + }, + { + "epoch": 0.9062382704001201, + "grad_norm": 1.063011884689331, + "learning_rate": 2.287124083407122e-07, + "loss": 0.3001, + "step": 45270 + }, + { + "epoch": 0.9062582889172485, + "grad_norm": 1.0312162637710571, + "learning_rate": 2.286154922113365e-07, + "loss": 0.2977, + "step": 45271 + }, + { + "epoch": 0.9062783074343768, + "grad_norm": 1.207927942276001, + "learning_rate": 2.2851859613983453e-07, + "loss": 0.2558, + "step": 45272 + }, + { + "epoch": 0.9062983259515052, + "grad_norm": 1.8862899541854858, + "learning_rate": 2.2842172012661313e-07, + "loss": 0.6863, + "step": 45273 + }, + { + "epoch": 0.9063183444686335, + "grad_norm": 1.7099709510803223, + "learning_rate": 2.2832486417208034e-07, + "loss": 0.3025, + "step": 45274 + }, + { + "epoch": 0.9063383629857619, + "grad_norm": 1.0757393836975098, + "learning_rate": 2.2822802827664193e-07, + "loss": 0.2429, + "step": 45275 + }, + { + "epoch": 0.9063583815028902, + "grad_norm": 1.1415026187896729, + "learning_rate": 2.2813121244070703e-07, + "loss": 0.277, + "step": 45276 + }, + { + "epoch": 0.9063784000200186, + "grad_norm": 1.362255334854126, + "learning_rate": 2.2803441666468085e-07, + "loss": 0.2635, + "step": 45277 + }, + { + "epoch": 0.9063984185371469, + "grad_norm": 1.0860490798950195, + "learning_rate": 2.279376409489714e-07, + "loss": 0.2828, + "step": 45278 + }, + { + "epoch": 0.9064184370542752, + "grad_norm": 1.161346673965454, + "learning_rate": 2.2784088529398506e-07, + "loss": 0.2968, + "step": 45279 + }, + { + "epoch": 0.9064384555714036, + "grad_norm": 1.0646286010742188, + "learning_rate": 2.277441497001287e-07, + "loss": 0.3116, + "step": 45280 + }, + { + "epoch": 0.9064584740885319, + "grad_norm": 1.1866270303726196, + "learning_rate": 2.2764743416780976e-07, + "loss": 0.2437, + "step": 45281 + }, + { + "epoch": 0.9064784926056603, + "grad_norm": 1.0998051166534424, + "learning_rate": 2.2755073869743293e-07, + "loss": 0.3156, + "step": 45282 + }, + { + "epoch": 0.9064985111227886, + "grad_norm": 1.1805692911148071, + "learning_rate": 2.274540632894068e-07, + "loss": 0.3171, + "step": 45283 + }, + { + "epoch": 0.906518529639917, + "grad_norm": 1.1326543092727661, + "learning_rate": 2.273574079441354e-07, + "loss": 0.286, + "step": 45284 + }, + { + "epoch": 0.9065385481570453, + "grad_norm": 1.0385143756866455, + "learning_rate": 2.272607726620274e-07, + "loss": 0.2662, + "step": 45285 + }, + { + "epoch": 0.9065585666741736, + "grad_norm": 1.138074517250061, + "learning_rate": 2.2716415744348741e-07, + "loss": 0.3325, + "step": 45286 + }, + { + "epoch": 0.906578585191302, + "grad_norm": 1.0451688766479492, + "learning_rate": 2.2706756228892292e-07, + "loss": 0.2909, + "step": 45287 + }, + { + "epoch": 0.9065986037084303, + "grad_norm": 1.203372836112976, + "learning_rate": 2.26970987198738e-07, + "loss": 0.2926, + "step": 45288 + }, + { + "epoch": 0.9066186222255587, + "grad_norm": 1.881827473640442, + "learning_rate": 2.2687443217334072e-07, + "loss": 0.7728, + "step": 45289 + }, + { + "epoch": 0.906638640742687, + "grad_norm": 1.2085981369018555, + "learning_rate": 2.267778972131357e-07, + "loss": 0.3287, + "step": 45290 + }, + { + "epoch": 0.9066586592598154, + "grad_norm": 1.3527153730392456, + "learning_rate": 2.266813823185293e-07, + "loss": 0.2922, + "step": 45291 + }, + { + "epoch": 0.9066786777769437, + "grad_norm": 1.4834917783737183, + "learning_rate": 2.265848874899279e-07, + "loss": 0.3368, + "step": 45292 + }, + { + "epoch": 0.9066986962940721, + "grad_norm": 1.0062917470932007, + "learning_rate": 2.2648841272773502e-07, + "loss": 0.2707, + "step": 45293 + }, + { + "epoch": 0.9067187148112004, + "grad_norm": 1.0642000436782837, + "learning_rate": 2.2639195803235868e-07, + "loss": 0.3089, + "step": 45294 + }, + { + "epoch": 0.9067387333283287, + "grad_norm": 1.1428996324539185, + "learning_rate": 2.2629552340420303e-07, + "loss": 0.2917, + "step": 45295 + }, + { + "epoch": 0.9067587518454571, + "grad_norm": 1.9072874784469604, + "learning_rate": 2.261991088436738e-07, + "loss": 0.7647, + "step": 45296 + }, + { + "epoch": 0.9067787703625854, + "grad_norm": 1.1455528736114502, + "learning_rate": 2.261027143511757e-07, + "loss": 0.2807, + "step": 45297 + }, + { + "epoch": 0.9067987888797138, + "grad_norm": 1.7868590354919434, + "learning_rate": 2.2600633992711507e-07, + "loss": 0.7355, + "step": 45298 + }, + { + "epoch": 0.9068188073968421, + "grad_norm": 1.110422134399414, + "learning_rate": 2.2590998557189604e-07, + "loss": 0.2632, + "step": 45299 + }, + { + "epoch": 0.9068388259139705, + "grad_norm": 1.8571600914001465, + "learning_rate": 2.2581365128592435e-07, + "loss": 0.7412, + "step": 45300 + }, + { + "epoch": 0.9068588444310988, + "grad_norm": 1.2716923952102661, + "learning_rate": 2.257173370696042e-07, + "loss": 0.3345, + "step": 45301 + }, + { + "epoch": 0.9068788629482271, + "grad_norm": 1.1437218189239502, + "learning_rate": 2.256210429233413e-07, + "loss": 0.2944, + "step": 45302 + }, + { + "epoch": 0.9068988814653555, + "grad_norm": 1.080315113067627, + "learning_rate": 2.2552476884753927e-07, + "loss": 0.2255, + "step": 45303 + }, + { + "epoch": 0.9069188999824838, + "grad_norm": 2.037466526031494, + "learning_rate": 2.254285148426044e-07, + "loss": 0.776, + "step": 45304 + }, + { + "epoch": 0.9069389184996122, + "grad_norm": 1.1046180725097656, + "learning_rate": 2.2533228090894031e-07, + "loss": 0.2797, + "step": 45305 + }, + { + "epoch": 0.9069589370167405, + "grad_norm": 1.111194372177124, + "learning_rate": 2.2523606704695167e-07, + "loss": 0.3021, + "step": 45306 + }, + { + "epoch": 0.9069789555338689, + "grad_norm": 0.9960152506828308, + "learning_rate": 2.2513987325704367e-07, + "loss": 0.2722, + "step": 45307 + }, + { + "epoch": 0.9069989740509972, + "grad_norm": 1.1467355489730835, + "learning_rate": 2.2504369953961992e-07, + "loss": 0.2905, + "step": 45308 + }, + { + "epoch": 0.9070189925681256, + "grad_norm": 1.8239800930023193, + "learning_rate": 2.249475458950845e-07, + "loss": 0.7673, + "step": 45309 + }, + { + "epoch": 0.9070390110852539, + "grad_norm": 1.321864128112793, + "learning_rate": 2.2485141232384155e-07, + "loss": 0.2839, + "step": 45310 + }, + { + "epoch": 0.9070590296023822, + "grad_norm": 1.0959060192108154, + "learning_rate": 2.247552988262963e-07, + "loss": 0.312, + "step": 45311 + }, + { + "epoch": 0.9070790481195106, + "grad_norm": 1.1587239503860474, + "learning_rate": 2.2465920540285237e-07, + "loss": 0.294, + "step": 45312 + }, + { + "epoch": 0.9070990666366389, + "grad_norm": 1.154548168182373, + "learning_rate": 2.2456313205391378e-07, + "loss": 0.327, + "step": 45313 + }, + { + "epoch": 0.9071190851537673, + "grad_norm": 1.1925946474075317, + "learning_rate": 2.244670787798825e-07, + "loss": 0.268, + "step": 45314 + }, + { + "epoch": 0.9071391036708956, + "grad_norm": 1.1464109420776367, + "learning_rate": 2.243710455811654e-07, + "loss": 0.3392, + "step": 45315 + }, + { + "epoch": 0.907159122188024, + "grad_norm": 1.102429747581482, + "learning_rate": 2.2427503245816385e-07, + "loss": 0.2761, + "step": 45316 + }, + { + "epoch": 0.9071791407051523, + "grad_norm": 1.1650941371917725, + "learning_rate": 2.241790394112825e-07, + "loss": 0.2921, + "step": 45317 + }, + { + "epoch": 0.9071991592222806, + "grad_norm": 1.1337357759475708, + "learning_rate": 2.2408306644092492e-07, + "loss": 0.2453, + "step": 45318 + }, + { + "epoch": 0.907219177739409, + "grad_norm": 1.10621178150177, + "learning_rate": 2.2398711354749414e-07, + "loss": 0.2641, + "step": 45319 + }, + { + "epoch": 0.9072391962565373, + "grad_norm": 1.0828288793563843, + "learning_rate": 2.238911807313937e-07, + "loss": 0.2882, + "step": 45320 + }, + { + "epoch": 0.9072592147736657, + "grad_norm": 1.8037749528884888, + "learning_rate": 2.2379526799302775e-07, + "loss": 0.7206, + "step": 45321 + }, + { + "epoch": 0.907279233290794, + "grad_norm": 1.1035243272781372, + "learning_rate": 2.2369937533279817e-07, + "loss": 0.3226, + "step": 45322 + }, + { + "epoch": 0.9072992518079224, + "grad_norm": 1.243450403213501, + "learning_rate": 2.2360350275110855e-07, + "loss": 0.3029, + "step": 45323 + }, + { + "epoch": 0.9073192703250507, + "grad_norm": 1.189978003501892, + "learning_rate": 2.2350765024836075e-07, + "loss": 0.3435, + "step": 45324 + }, + { + "epoch": 0.9073392888421791, + "grad_norm": 1.0437322854995728, + "learning_rate": 2.2341181782496002e-07, + "loss": 0.2603, + "step": 45325 + }, + { + "epoch": 0.9073593073593074, + "grad_norm": 1.9207427501678467, + "learning_rate": 2.2331600548130827e-07, + "loss": 0.7906, + "step": 45326 + }, + { + "epoch": 0.9073793258764357, + "grad_norm": 1.0698174238204956, + "learning_rate": 2.232202132178074e-07, + "loss": 0.3335, + "step": 45327 + }, + { + "epoch": 0.9073993443935641, + "grad_norm": 1.2755227088928223, + "learning_rate": 2.2312444103486098e-07, + "loss": 0.302, + "step": 45328 + }, + { + "epoch": 0.9074193629106924, + "grad_norm": 1.1385859251022339, + "learning_rate": 2.2302868893287088e-07, + "loss": 0.3263, + "step": 45329 + }, + { + "epoch": 0.9074393814278208, + "grad_norm": 1.287654995918274, + "learning_rate": 2.2293295691224125e-07, + "loss": 0.3024, + "step": 45330 + }, + { + "epoch": 0.9074593999449491, + "grad_norm": 1.0445373058319092, + "learning_rate": 2.228372449733729e-07, + "loss": 0.3145, + "step": 45331 + }, + { + "epoch": 0.9074794184620775, + "grad_norm": 1.0705316066741943, + "learning_rate": 2.2274155311666877e-07, + "loss": 0.3211, + "step": 45332 + }, + { + "epoch": 0.9074994369792058, + "grad_norm": 1.021147608757019, + "learning_rate": 2.2264588134253085e-07, + "loss": 0.248, + "step": 45333 + }, + { + "epoch": 0.9075194554963341, + "grad_norm": 1.8965245485305786, + "learning_rate": 2.225502296513621e-07, + "loss": 0.7681, + "step": 45334 + }, + { + "epoch": 0.9075394740134625, + "grad_norm": 1.2987531423568726, + "learning_rate": 2.2245459804356384e-07, + "loss": 0.3411, + "step": 45335 + }, + { + "epoch": 0.9075594925305908, + "grad_norm": 1.1157712936401367, + "learning_rate": 2.2235898651953858e-07, + "loss": 0.2521, + "step": 45336 + }, + { + "epoch": 0.9075795110477192, + "grad_norm": 1.1275540590286255, + "learning_rate": 2.2226339507968707e-07, + "loss": 0.349, + "step": 45337 + }, + { + "epoch": 0.9075995295648475, + "grad_norm": 1.1681193113327026, + "learning_rate": 2.221678237244129e-07, + "loss": 0.3324, + "step": 45338 + }, + { + "epoch": 0.9076195480819759, + "grad_norm": 1.232259750366211, + "learning_rate": 2.220722724541169e-07, + "loss": 0.2819, + "step": 45339 + }, + { + "epoch": 0.9076395665991042, + "grad_norm": 1.0961028337478638, + "learning_rate": 2.2197674126920032e-07, + "loss": 0.2766, + "step": 45340 + }, + { + "epoch": 0.9076595851162326, + "grad_norm": 1.1423022747039795, + "learning_rate": 2.2188123017006567e-07, + "loss": 0.285, + "step": 45341 + }, + { + "epoch": 0.9076796036333609, + "grad_norm": 1.0973188877105713, + "learning_rate": 2.217857391571132e-07, + "loss": 0.2565, + "step": 45342 + }, + { + "epoch": 0.9076996221504892, + "grad_norm": 1.0930166244506836, + "learning_rate": 2.2169026823074645e-07, + "loss": 0.3144, + "step": 45343 + }, + { + "epoch": 0.9077196406676176, + "grad_norm": 1.0191489458084106, + "learning_rate": 2.2159481739136513e-07, + "loss": 0.3105, + "step": 45344 + }, + { + "epoch": 0.9077396591847459, + "grad_norm": 1.1186046600341797, + "learning_rate": 2.2149938663937108e-07, + "loss": 0.3018, + "step": 45345 + }, + { + "epoch": 0.9077596777018743, + "grad_norm": 1.935933232307434, + "learning_rate": 2.214039759751646e-07, + "loss": 0.8179, + "step": 45346 + }, + { + "epoch": 0.9077796962190026, + "grad_norm": 1.1612637042999268, + "learning_rate": 2.2130858539914812e-07, + "loss": 0.2996, + "step": 45347 + }, + { + "epoch": 0.907799714736131, + "grad_norm": 1.0611881017684937, + "learning_rate": 2.2121321491172188e-07, + "loss": 0.3041, + "step": 45348 + }, + { + "epoch": 0.9078197332532593, + "grad_norm": 1.120809555053711, + "learning_rate": 2.2111786451328666e-07, + "loss": 0.301, + "step": 45349 + }, + { + "epoch": 0.9078397517703876, + "grad_norm": 2.0241341590881348, + "learning_rate": 2.2102253420424324e-07, + "loss": 0.723, + "step": 45350 + }, + { + "epoch": 0.907859770287516, + "grad_norm": 1.0901198387145996, + "learning_rate": 2.20927223984993e-07, + "loss": 0.3234, + "step": 45351 + }, + { + "epoch": 0.9078797888046443, + "grad_norm": 1.051758885383606, + "learning_rate": 2.2083193385593616e-07, + "loss": 0.2516, + "step": 45352 + }, + { + "epoch": 0.9078998073217727, + "grad_norm": 1.9924230575561523, + "learning_rate": 2.2073666381747295e-07, + "loss": 0.7722, + "step": 45353 + }, + { + "epoch": 0.907919825838901, + "grad_norm": 2.099536657333374, + "learning_rate": 2.2064141387000526e-07, + "loss": 0.7284, + "step": 45354 + }, + { + "epoch": 0.9079398443560294, + "grad_norm": 1.08603036403656, + "learning_rate": 2.2054618401393113e-07, + "loss": 0.2707, + "step": 45355 + }, + { + "epoch": 0.9079598628731577, + "grad_norm": 1.0628174543380737, + "learning_rate": 2.2045097424965357e-07, + "loss": 0.2622, + "step": 45356 + }, + { + "epoch": 0.9079798813902861, + "grad_norm": 1.2577433586120605, + "learning_rate": 2.2035578457757112e-07, + "loss": 0.3478, + "step": 45357 + }, + { + "epoch": 0.9079998999074144, + "grad_norm": 1.1068284511566162, + "learning_rate": 2.202606149980846e-07, + "loss": 0.3228, + "step": 45358 + }, + { + "epoch": 0.9080199184245427, + "grad_norm": 1.1371896266937256, + "learning_rate": 2.2016546551159314e-07, + "loss": 0.3126, + "step": 45359 + }, + { + "epoch": 0.9080399369416711, + "grad_norm": 1.0055992603302002, + "learning_rate": 2.200703361184986e-07, + "loss": 0.2731, + "step": 45360 + }, + { + "epoch": 0.9080599554587994, + "grad_norm": 1.135599136352539, + "learning_rate": 2.1997522681919903e-07, + "loss": 0.2807, + "step": 45361 + }, + { + "epoch": 0.9080799739759278, + "grad_norm": 1.8678144216537476, + "learning_rate": 2.198801376140952e-07, + "loss": 0.6927, + "step": 45362 + }, + { + "epoch": 0.9080999924930561, + "grad_norm": 1.012615442276001, + "learning_rate": 2.1978506850358572e-07, + "loss": 0.2639, + "step": 45363 + }, + { + "epoch": 0.9081200110101845, + "grad_norm": 1.8535709381103516, + "learning_rate": 2.196900194880719e-07, + "loss": 0.8466, + "step": 45364 + }, + { + "epoch": 0.9081400295273128, + "grad_norm": 1.287627935409546, + "learning_rate": 2.195949905679523e-07, + "loss": 0.3033, + "step": 45365 + }, + { + "epoch": 0.9081600480444411, + "grad_norm": 1.0676944255828857, + "learning_rate": 2.194999817436261e-07, + "loss": 0.2961, + "step": 45366 + }, + { + "epoch": 0.9081800665615695, + "grad_norm": 1.1653685569763184, + "learning_rate": 2.19404993015494e-07, + "loss": 0.356, + "step": 45367 + }, + { + "epoch": 0.9082000850786978, + "grad_norm": 0.943455159664154, + "learning_rate": 2.193100243839541e-07, + "loss": 0.2585, + "step": 45368 + }, + { + "epoch": 0.9082201035958262, + "grad_norm": 1.924511194229126, + "learning_rate": 2.1921507584940604e-07, + "loss": 0.7792, + "step": 45369 + }, + { + "epoch": 0.9082401221129545, + "grad_norm": 1.0466152429580688, + "learning_rate": 2.1912014741224953e-07, + "loss": 0.2886, + "step": 45370 + }, + { + "epoch": 0.9082601406300829, + "grad_norm": 1.2742328643798828, + "learning_rate": 2.1902523907288254e-07, + "loss": 0.3137, + "step": 45371 + }, + { + "epoch": 0.9082801591472112, + "grad_norm": 0.990848183631897, + "learning_rate": 2.189303508317042e-07, + "loss": 0.3095, + "step": 45372 + }, + { + "epoch": 0.9083001776643395, + "grad_norm": 1.1104826927185059, + "learning_rate": 2.1883548268911426e-07, + "loss": 0.2928, + "step": 45373 + }, + { + "epoch": 0.9083201961814679, + "grad_norm": 1.0824347734451294, + "learning_rate": 2.1874063464551122e-07, + "loss": 0.2437, + "step": 45374 + }, + { + "epoch": 0.9083402146985962, + "grad_norm": 1.130986213684082, + "learning_rate": 2.1864580670129365e-07, + "loss": 0.2929, + "step": 45375 + }, + { + "epoch": 0.9083602332157246, + "grad_norm": 1.9397495985031128, + "learning_rate": 2.1855099885686015e-07, + "loss": 0.8065, + "step": 45376 + }, + { + "epoch": 0.9083802517328529, + "grad_norm": 1.2246003150939941, + "learning_rate": 2.1845621111260872e-07, + "loss": 0.3053, + "step": 45377 + }, + { + "epoch": 0.9084002702499813, + "grad_norm": 1.1489652395248413, + "learning_rate": 2.183614434689385e-07, + "loss": 0.3158, + "step": 45378 + }, + { + "epoch": 0.9084202887671096, + "grad_norm": 1.077878713607788, + "learning_rate": 2.182666959262475e-07, + "loss": 0.2888, + "step": 45379 + }, + { + "epoch": 0.908440307284238, + "grad_norm": 1.2293477058410645, + "learning_rate": 2.1817196848493483e-07, + "loss": 0.331, + "step": 45380 + }, + { + "epoch": 0.9084603258013663, + "grad_norm": 1.0985994338989258, + "learning_rate": 2.180772611453974e-07, + "loss": 0.308, + "step": 45381 + }, + { + "epoch": 0.9084803443184946, + "grad_norm": 1.1722320318222046, + "learning_rate": 2.179825739080349e-07, + "loss": 0.2929, + "step": 45382 + }, + { + "epoch": 0.908500362835623, + "grad_norm": 1.0690205097198486, + "learning_rate": 2.1788790677324479e-07, + "loss": 0.3335, + "step": 45383 + }, + { + "epoch": 0.9085203813527513, + "grad_norm": 1.0064406394958496, + "learning_rate": 2.1779325974142452e-07, + "loss": 0.2855, + "step": 45384 + }, + { + "epoch": 0.9085403998698797, + "grad_norm": 1.0991491079330444, + "learning_rate": 2.1769863281297154e-07, + "loss": 0.2526, + "step": 45385 + }, + { + "epoch": 0.908560418387008, + "grad_norm": 1.1113728284835815, + "learning_rate": 2.1760402598828501e-07, + "loss": 0.3088, + "step": 45386 + }, + { + "epoch": 0.9085804369041364, + "grad_norm": 1.1145777702331543, + "learning_rate": 2.1750943926776236e-07, + "loss": 0.3292, + "step": 45387 + }, + { + "epoch": 0.9086004554212647, + "grad_norm": 1.06135094165802, + "learning_rate": 2.174148726518005e-07, + "loss": 0.2961, + "step": 45388 + }, + { + "epoch": 0.908620473938393, + "grad_norm": 1.084197759628296, + "learning_rate": 2.173203261407969e-07, + "loss": 0.2967, + "step": 45389 + }, + { + "epoch": 0.9086404924555214, + "grad_norm": 1.1885418891906738, + "learning_rate": 2.17225799735149e-07, + "loss": 0.2935, + "step": 45390 + }, + { + "epoch": 0.9086605109726497, + "grad_norm": 1.1599935293197632, + "learning_rate": 2.1713129343525542e-07, + "loss": 0.2747, + "step": 45391 + }, + { + "epoch": 0.9086805294897781, + "grad_norm": 1.7642725706100464, + "learning_rate": 2.1703680724151188e-07, + "loss": 0.7726, + "step": 45392 + }, + { + "epoch": 0.9087005480069064, + "grad_norm": 1.1156725883483887, + "learning_rate": 2.16942341154317e-07, + "loss": 0.2688, + "step": 45393 + }, + { + "epoch": 0.9087205665240348, + "grad_norm": 1.1116424798965454, + "learning_rate": 2.1684789517406657e-07, + "loss": 0.2944, + "step": 45394 + }, + { + "epoch": 0.9087405850411631, + "grad_norm": 1.035525918006897, + "learning_rate": 2.1675346930115914e-07, + "loss": 0.2938, + "step": 45395 + }, + { + "epoch": 0.9087606035582915, + "grad_norm": 1.110024094581604, + "learning_rate": 2.1665906353598998e-07, + "loss": 0.3396, + "step": 45396 + }, + { + "epoch": 0.9087806220754198, + "grad_norm": 1.0551567077636719, + "learning_rate": 2.1656467787895762e-07, + "loss": 0.2851, + "step": 45397 + }, + { + "epoch": 0.9088006405925481, + "grad_norm": 1.0602608919143677, + "learning_rate": 2.1647031233045736e-07, + "loss": 0.2439, + "step": 45398 + }, + { + "epoch": 0.9088206591096765, + "grad_norm": 1.0714442729949951, + "learning_rate": 2.1637596689088603e-07, + "loss": 0.3042, + "step": 45399 + }, + { + "epoch": 0.9088406776268048, + "grad_norm": 1.0449912548065186, + "learning_rate": 2.1628164156064113e-07, + "loss": 0.2875, + "step": 45400 + }, + { + "epoch": 0.9088606961439332, + "grad_norm": 1.1692347526550293, + "learning_rate": 2.1618733634011902e-07, + "loss": 0.2858, + "step": 45401 + }, + { + "epoch": 0.9088807146610615, + "grad_norm": 1.0404762029647827, + "learning_rate": 2.1609305122971547e-07, + "loss": 0.2441, + "step": 45402 + }, + { + "epoch": 0.9089007331781899, + "grad_norm": 1.0881778001785278, + "learning_rate": 2.1599878622982684e-07, + "loss": 0.2845, + "step": 45403 + }, + { + "epoch": 0.9089207516953182, + "grad_norm": 1.0421571731567383, + "learning_rate": 2.159045413408506e-07, + "loss": 0.2833, + "step": 45404 + }, + { + "epoch": 0.9089407702124465, + "grad_norm": 1.2208575010299683, + "learning_rate": 2.158103165631814e-07, + "loss": 0.2724, + "step": 45405 + }, + { + "epoch": 0.9089607887295749, + "grad_norm": 1.0035253763198853, + "learning_rate": 2.1571611189721674e-07, + "loss": 0.2926, + "step": 45406 + }, + { + "epoch": 0.9089808072467032, + "grad_norm": 1.120078444480896, + "learning_rate": 2.1562192734335184e-07, + "loss": 0.3027, + "step": 45407 + }, + { + "epoch": 0.9090008257638316, + "grad_norm": 1.0777088403701782, + "learning_rate": 2.1552776290198197e-07, + "loss": 0.2694, + "step": 45408 + }, + { + "epoch": 0.9090208442809599, + "grad_norm": 1.7147184610366821, + "learning_rate": 2.1543361857350453e-07, + "loss": 0.6916, + "step": 45409 + }, + { + "epoch": 0.9090408627980883, + "grad_norm": 1.1373727321624756, + "learning_rate": 2.1533949435831425e-07, + "loss": 0.3131, + "step": 45410 + }, + { + "epoch": 0.9090608813152166, + "grad_norm": 1.075724482536316, + "learning_rate": 2.1524539025680745e-07, + "loss": 0.2886, + "step": 45411 + }, + { + "epoch": 0.909080899832345, + "grad_norm": 1.1464265584945679, + "learning_rate": 2.1515130626937886e-07, + "loss": 0.2728, + "step": 45412 + }, + { + "epoch": 0.9091009183494733, + "grad_norm": 1.2315360307693481, + "learning_rate": 2.1505724239642478e-07, + "loss": 0.2836, + "step": 45413 + }, + { + "epoch": 0.9091209368666016, + "grad_norm": 1.2128031253814697, + "learning_rate": 2.149631986383405e-07, + "loss": 0.3037, + "step": 45414 + }, + { + "epoch": 0.90914095538373, + "grad_norm": 1.145203948020935, + "learning_rate": 2.1486917499552061e-07, + "loss": 0.2419, + "step": 45415 + }, + { + "epoch": 0.9091609739008583, + "grad_norm": 1.4322212934494019, + "learning_rate": 2.1477517146836102e-07, + "loss": 0.2835, + "step": 45416 + }, + { + "epoch": 0.9091809924179867, + "grad_norm": 1.0816762447357178, + "learning_rate": 2.146811880572569e-07, + "loss": 0.281, + "step": 45417 + }, + { + "epoch": 0.909201010935115, + "grad_norm": 1.0717893838882446, + "learning_rate": 2.1458722476260296e-07, + "loss": 0.2855, + "step": 45418 + }, + { + "epoch": 0.9092210294522434, + "grad_norm": 1.1486068964004517, + "learning_rate": 2.14493281584795e-07, + "loss": 0.3042, + "step": 45419 + }, + { + "epoch": 0.9092410479693717, + "grad_norm": 1.044468641281128, + "learning_rate": 2.1439935852422767e-07, + "loss": 0.2695, + "step": 45420 + }, + { + "epoch": 0.9092610664865, + "grad_norm": 1.069092869758606, + "learning_rate": 2.1430545558129457e-07, + "loss": 0.28, + "step": 45421 + }, + { + "epoch": 0.9092810850036284, + "grad_norm": 1.1174513101577759, + "learning_rate": 2.1421157275639203e-07, + "loss": 0.2777, + "step": 45422 + }, + { + "epoch": 0.9093011035207567, + "grad_norm": 1.2327425479888916, + "learning_rate": 2.1411771004991477e-07, + "loss": 0.3089, + "step": 45423 + }, + { + "epoch": 0.9093211220378851, + "grad_norm": 1.270241141319275, + "learning_rate": 2.1402386746225635e-07, + "loss": 0.3058, + "step": 45424 + }, + { + "epoch": 0.9093411405550134, + "grad_norm": 1.0796444416046143, + "learning_rate": 2.1393004499381088e-07, + "loss": 0.3028, + "step": 45425 + }, + { + "epoch": 0.9093611590721418, + "grad_norm": 1.1555880308151245, + "learning_rate": 2.1383624264497415e-07, + "loss": 0.2715, + "step": 45426 + }, + { + "epoch": 0.9093811775892701, + "grad_norm": 1.0217854976654053, + "learning_rate": 2.1374246041613978e-07, + "loss": 0.2601, + "step": 45427 + }, + { + "epoch": 0.9094011961063985, + "grad_norm": 1.2150238752365112, + "learning_rate": 2.136486983077024e-07, + "loss": 0.3114, + "step": 45428 + }, + { + "epoch": 0.9094212146235268, + "grad_norm": 1.0158414840698242, + "learning_rate": 2.1355495632005509e-07, + "loss": 0.279, + "step": 45429 + }, + { + "epoch": 0.9094412331406551, + "grad_norm": 1.8703478574752808, + "learning_rate": 2.13461234453593e-07, + "loss": 0.8306, + "step": 45430 + }, + { + "epoch": 0.9094612516577835, + "grad_norm": 1.0701717138290405, + "learning_rate": 2.133675327087098e-07, + "loss": 0.2986, + "step": 45431 + }, + { + "epoch": 0.9094812701749118, + "grad_norm": 1.1842166185379028, + "learning_rate": 2.1327385108579956e-07, + "loss": 0.2787, + "step": 45432 + }, + { + "epoch": 0.9095012886920402, + "grad_norm": 1.168994665145874, + "learning_rate": 2.1318018958525644e-07, + "loss": 0.2682, + "step": 45433 + }, + { + "epoch": 0.9095213072091685, + "grad_norm": 1.3581918478012085, + "learning_rate": 2.130865482074723e-07, + "loss": 0.3028, + "step": 45434 + }, + { + "epoch": 0.9095413257262969, + "grad_norm": 1.066469669342041, + "learning_rate": 2.1299292695284356e-07, + "loss": 0.3028, + "step": 45435 + }, + { + "epoch": 0.9095613442434252, + "grad_norm": 1.0712523460388184, + "learning_rate": 2.1289932582176153e-07, + "loss": 0.2716, + "step": 45436 + }, + { + "epoch": 0.9095813627605535, + "grad_norm": 0.973519504070282, + "learning_rate": 2.1280574481462147e-07, + "loss": 0.3294, + "step": 45437 + }, + { + "epoch": 0.9096013812776819, + "grad_norm": 1.0930804014205933, + "learning_rate": 2.1271218393181526e-07, + "loss": 0.291, + "step": 45438 + }, + { + "epoch": 0.9096213997948102, + "grad_norm": 1.0922995805740356, + "learning_rate": 2.1261864317373592e-07, + "loss": 0.2841, + "step": 45439 + }, + { + "epoch": 0.9096414183119386, + "grad_norm": 1.8271015882492065, + "learning_rate": 2.1252512254077873e-07, + "loss": 0.7141, + "step": 45440 + }, + { + "epoch": 0.9096614368290669, + "grad_norm": 1.1967836618423462, + "learning_rate": 2.1243162203333555e-07, + "loss": 0.2983, + "step": 45441 + }, + { + "epoch": 0.9096814553461953, + "grad_norm": 1.1103253364562988, + "learning_rate": 2.1233814165179888e-07, + "loss": 0.2847, + "step": 45442 + }, + { + "epoch": 0.9097014738633236, + "grad_norm": 1.113596796989441, + "learning_rate": 2.1224468139656284e-07, + "loss": 0.2667, + "step": 45443 + }, + { + "epoch": 0.909721492380452, + "grad_norm": 1.0943570137023926, + "learning_rate": 2.1215124126801934e-07, + "loss": 0.3429, + "step": 45444 + }, + { + "epoch": 0.9097415108975803, + "grad_norm": 1.0511270761489868, + "learning_rate": 2.1205782126656195e-07, + "loss": 0.2953, + "step": 45445 + }, + { + "epoch": 0.9097615294147086, + "grad_norm": 1.045889139175415, + "learning_rate": 2.1196442139258367e-07, + "loss": 0.2739, + "step": 45446 + }, + { + "epoch": 0.909781547931837, + "grad_norm": 1.11112380027771, + "learning_rate": 2.1187104164647531e-07, + "loss": 0.2888, + "step": 45447 + }, + { + "epoch": 0.9098015664489653, + "grad_norm": 1.3240129947662354, + "learning_rate": 2.1177768202863158e-07, + "loss": 0.3265, + "step": 45448 + }, + { + "epoch": 0.9098215849660937, + "grad_norm": 1.0872530937194824, + "learning_rate": 2.1168434253944437e-07, + "loss": 0.3005, + "step": 45449 + }, + { + "epoch": 0.909841603483222, + "grad_norm": 1.1735806465148926, + "learning_rate": 2.1159102317930558e-07, + "loss": 0.2937, + "step": 45450 + }, + { + "epoch": 0.9098616220003504, + "grad_norm": 1.1591168642044067, + "learning_rate": 2.1149772394860713e-07, + "loss": 0.2538, + "step": 45451 + }, + { + "epoch": 0.9098816405174787, + "grad_norm": 1.0503019094467163, + "learning_rate": 2.1140444484774146e-07, + "loss": 0.2721, + "step": 45452 + }, + { + "epoch": 0.909901659034607, + "grad_norm": 1.1261377334594727, + "learning_rate": 2.1131118587710165e-07, + "loss": 0.3548, + "step": 45453 + }, + { + "epoch": 0.9099216775517354, + "grad_norm": 1.1175119876861572, + "learning_rate": 2.1121794703707953e-07, + "loss": 0.3078, + "step": 45454 + }, + { + "epoch": 0.9099416960688637, + "grad_norm": 1.1609421968460083, + "learning_rate": 2.1112472832806541e-07, + "loss": 0.3074, + "step": 45455 + }, + { + "epoch": 0.9099617145859921, + "grad_norm": 1.316381573677063, + "learning_rate": 2.1103152975045281e-07, + "loss": 0.3301, + "step": 45456 + }, + { + "epoch": 0.9099817331031204, + "grad_norm": 1.017549753189087, + "learning_rate": 2.1093835130463313e-07, + "loss": 0.2514, + "step": 45457 + }, + { + "epoch": 0.9100017516202488, + "grad_norm": 1.1389025449752808, + "learning_rate": 2.1084519299099825e-07, + "loss": 0.2811, + "step": 45458 + }, + { + "epoch": 0.9100217701373771, + "grad_norm": 1.8752340078353882, + "learning_rate": 2.1075205480993954e-07, + "loss": 0.7489, + "step": 45459 + }, + { + "epoch": 0.9100417886545055, + "grad_norm": 0.9977772235870361, + "learning_rate": 2.1065893676184834e-07, + "loss": 0.2688, + "step": 45460 + }, + { + "epoch": 0.9100618071716338, + "grad_norm": 1.014175295829773, + "learning_rate": 2.10565838847116e-07, + "loss": 0.3186, + "step": 45461 + }, + { + "epoch": 0.9100818256887621, + "grad_norm": 1.1306159496307373, + "learning_rate": 2.1047276106613447e-07, + "loss": 0.2978, + "step": 45462 + }, + { + "epoch": 0.9101018442058905, + "grad_norm": 2.0022976398468018, + "learning_rate": 2.1037970341929504e-07, + "loss": 0.7543, + "step": 45463 + }, + { + "epoch": 0.9101218627230188, + "grad_norm": 1.0331807136535645, + "learning_rate": 2.1028666590698854e-07, + "loss": 0.2529, + "step": 45464 + }, + { + "epoch": 0.9101418812401472, + "grad_norm": 2.1646013259887695, + "learning_rate": 2.101936485296052e-07, + "loss": 0.7314, + "step": 45465 + }, + { + "epoch": 0.9101618997572755, + "grad_norm": 1.191173791885376, + "learning_rate": 2.1010065128753808e-07, + "loss": 0.3089, + "step": 45466 + }, + { + "epoch": 0.9101819182744039, + "grad_norm": 1.103357195854187, + "learning_rate": 2.1000767418117685e-07, + "loss": 0.272, + "step": 45467 + }, + { + "epoch": 0.9102019367915322, + "grad_norm": 1.1192593574523926, + "learning_rate": 2.0991471721091172e-07, + "loss": 0.3186, + "step": 45468 + }, + { + "epoch": 0.9102219553086605, + "grad_norm": 1.4610621929168701, + "learning_rate": 2.0982178037713518e-07, + "loss": 0.291, + "step": 45469 + }, + { + "epoch": 0.9102419738257889, + "grad_norm": 1.292542576789856, + "learning_rate": 2.0972886368023582e-07, + "loss": 0.2661, + "step": 45470 + }, + { + "epoch": 0.9102619923429172, + "grad_norm": 1.0434379577636719, + "learning_rate": 2.0963596712060664e-07, + "loss": 0.2232, + "step": 45471 + }, + { + "epoch": 0.9102820108600456, + "grad_norm": 1.0680129528045654, + "learning_rate": 2.0954309069863677e-07, + "loss": 0.3022, + "step": 45472 + }, + { + "epoch": 0.9103020293771739, + "grad_norm": 1.9655206203460693, + "learning_rate": 2.09450234414717e-07, + "loss": 0.7275, + "step": 45473 + }, + { + "epoch": 0.9103220478943023, + "grad_norm": 1.1090569496154785, + "learning_rate": 2.0935739826923652e-07, + "loss": 0.2628, + "step": 45474 + }, + { + "epoch": 0.9103420664114306, + "grad_norm": 1.044716477394104, + "learning_rate": 2.092645822625877e-07, + "loss": 0.278, + "step": 45475 + }, + { + "epoch": 0.910362084928559, + "grad_norm": 1.0776087045669556, + "learning_rate": 2.0917178639515922e-07, + "loss": 0.311, + "step": 45476 + }, + { + "epoch": 0.9103821034456873, + "grad_norm": 1.0717769861221313, + "learning_rate": 2.0907901066734181e-07, + "loss": 0.2776, + "step": 45477 + }, + { + "epoch": 0.9104021219628156, + "grad_norm": 1.054732084274292, + "learning_rate": 2.0898625507952463e-07, + "loss": 0.2882, + "step": 45478 + }, + { + "epoch": 0.910422140479944, + "grad_norm": 1.5667126178741455, + "learning_rate": 2.0889351963209846e-07, + "loss": 0.3269, + "step": 45479 + }, + { + "epoch": 0.9104421589970723, + "grad_norm": 1.0122650861740112, + "learning_rate": 2.0880080432545303e-07, + "loss": 0.2872, + "step": 45480 + }, + { + "epoch": 0.9104621775142007, + "grad_norm": 1.0806375741958618, + "learning_rate": 2.087081091599774e-07, + "loss": 0.3048, + "step": 45481 + }, + { + "epoch": 0.910482196031329, + "grad_norm": 0.9894842505455017, + "learning_rate": 2.0861543413606245e-07, + "loss": 0.2835, + "step": 45482 + }, + { + "epoch": 0.9105022145484574, + "grad_norm": 1.1082854270935059, + "learning_rate": 2.0852277925409613e-07, + "loss": 0.3105, + "step": 45483 + }, + { + "epoch": 0.9105222330655857, + "grad_norm": 1.084561824798584, + "learning_rate": 2.0843014451446985e-07, + "loss": 0.3078, + "step": 45484 + }, + { + "epoch": 0.910542251582714, + "grad_norm": 1.0792052745819092, + "learning_rate": 2.0833752991757216e-07, + "loss": 0.3283, + "step": 45485 + }, + { + "epoch": 0.9105622700998424, + "grad_norm": 1.497384786605835, + "learning_rate": 2.082449354637922e-07, + "loss": 0.3033, + "step": 45486 + }, + { + "epoch": 0.9105822886169707, + "grad_norm": 1.0335701704025269, + "learning_rate": 2.0815236115351856e-07, + "loss": 0.3018, + "step": 45487 + }, + { + "epoch": 0.9106023071340991, + "grad_norm": 1.1464364528656006, + "learning_rate": 2.08059806987142e-07, + "loss": 0.3071, + "step": 45488 + }, + { + "epoch": 0.9106223256512274, + "grad_norm": 1.0742732286453247, + "learning_rate": 2.0796727296505058e-07, + "loss": 0.2904, + "step": 45489 + }, + { + "epoch": 0.9106423441683558, + "grad_norm": 1.083559513092041, + "learning_rate": 2.07874759087634e-07, + "loss": 0.252, + "step": 45490 + }, + { + "epoch": 0.9106623626854841, + "grad_norm": 1.0523251295089722, + "learning_rate": 2.0778226535528022e-07, + "loss": 0.281, + "step": 45491 + }, + { + "epoch": 0.9106823812026125, + "grad_norm": 1.9790517091751099, + "learning_rate": 2.076897917683779e-07, + "loss": 0.784, + "step": 45492 + }, + { + "epoch": 0.9107023997197408, + "grad_norm": 1.1156325340270996, + "learning_rate": 2.0759733832731665e-07, + "loss": 0.2404, + "step": 45493 + }, + { + "epoch": 0.9107224182368691, + "grad_norm": 1.1120082139968872, + "learning_rate": 2.075049050324851e-07, + "loss": 0.3282, + "step": 45494 + }, + { + "epoch": 0.9107424367539975, + "grad_norm": 1.2110708951950073, + "learning_rate": 2.0741249188427125e-07, + "loss": 0.2979, + "step": 45495 + }, + { + "epoch": 0.9107624552711258, + "grad_norm": 1.3051483631134033, + "learning_rate": 2.073200988830637e-07, + "loss": 0.2936, + "step": 45496 + }, + { + "epoch": 0.9107824737882542, + "grad_norm": 1.0357372760772705, + "learning_rate": 2.0722772602925212e-07, + "loss": 0.2516, + "step": 45497 + }, + { + "epoch": 0.9108024923053825, + "grad_norm": 1.0608206987380981, + "learning_rate": 2.0713537332322342e-07, + "loss": 0.262, + "step": 45498 + }, + { + "epoch": 0.9108225108225109, + "grad_norm": 1.0273261070251465, + "learning_rate": 2.0704304076536618e-07, + "loss": 0.2771, + "step": 45499 + }, + { + "epoch": 0.9108425293396392, + "grad_norm": 1.2202249765396118, + "learning_rate": 2.0695072835606844e-07, + "loss": 0.3025, + "step": 45500 + }, + { + "epoch": 0.9108625478567675, + "grad_norm": 1.053948998451233, + "learning_rate": 2.0685843609571764e-07, + "loss": 0.2879, + "step": 45501 + }, + { + "epoch": 0.9108825663738959, + "grad_norm": 1.8057750463485718, + "learning_rate": 2.0676616398470295e-07, + "loss": 0.7569, + "step": 45502 + }, + { + "epoch": 0.9109025848910242, + "grad_norm": 1.2757197618484497, + "learning_rate": 2.0667391202341235e-07, + "loss": 0.2807, + "step": 45503 + }, + { + "epoch": 0.9109226034081526, + "grad_norm": 0.9843984842300415, + "learning_rate": 2.0658168021223278e-07, + "loss": 0.2854, + "step": 45504 + }, + { + "epoch": 0.9109426219252809, + "grad_norm": 1.1344105005264282, + "learning_rate": 2.0648946855155172e-07, + "loss": 0.312, + "step": 45505 + }, + { + "epoch": 0.9109626404424093, + "grad_norm": 1.174813985824585, + "learning_rate": 2.063972770417577e-07, + "loss": 0.3103, + "step": 45506 + }, + { + "epoch": 0.9109826589595376, + "grad_norm": 1.113076090812683, + "learning_rate": 2.063051056832377e-07, + "loss": 0.2903, + "step": 45507 + }, + { + "epoch": 0.911002677476666, + "grad_norm": 1.2087512016296387, + "learning_rate": 2.0621295447638024e-07, + "loss": 0.28, + "step": 45508 + }, + { + "epoch": 0.9110226959937943, + "grad_norm": 1.1808871030807495, + "learning_rate": 2.061208234215706e-07, + "loss": 0.3203, + "step": 45509 + }, + { + "epoch": 0.9110427145109226, + "grad_norm": 1.1108500957489014, + "learning_rate": 2.0602871251919844e-07, + "loss": 0.326, + "step": 45510 + }, + { + "epoch": 0.911062733028051, + "grad_norm": 1.0798786878585815, + "learning_rate": 2.0593662176964958e-07, + "loss": 0.3099, + "step": 45511 + }, + { + "epoch": 0.9110827515451793, + "grad_norm": 1.137980341911316, + "learning_rate": 2.0584455117331203e-07, + "loss": 0.3274, + "step": 45512 + }, + { + "epoch": 0.9111027700623077, + "grad_norm": 1.0500553846359253, + "learning_rate": 2.0575250073057162e-07, + "loss": 0.2691, + "step": 45513 + }, + { + "epoch": 0.911122788579436, + "grad_norm": 1.0715436935424805, + "learning_rate": 2.0566047044181581e-07, + "loss": 0.2675, + "step": 45514 + }, + { + "epoch": 0.9111428070965644, + "grad_norm": 1.948626160621643, + "learning_rate": 2.0556846030743204e-07, + "loss": 0.7197, + "step": 45515 + }, + { + "epoch": 0.9111628256136927, + "grad_norm": 1.9885401725769043, + "learning_rate": 2.0547647032780672e-07, + "loss": 0.7792, + "step": 45516 + }, + { + "epoch": 0.911182844130821, + "grad_norm": 2.0676896572113037, + "learning_rate": 2.053845005033267e-07, + "loss": 0.773, + "step": 45517 + }, + { + "epoch": 0.9112028626479494, + "grad_norm": 1.0588761568069458, + "learning_rate": 2.0529255083437726e-07, + "loss": 0.2691, + "step": 45518 + }, + { + "epoch": 0.9112228811650777, + "grad_norm": 1.582711935043335, + "learning_rate": 2.05200621321347e-07, + "loss": 0.2919, + "step": 45519 + }, + { + "epoch": 0.9112428996822061, + "grad_norm": 1.0544297695159912, + "learning_rate": 2.0510871196462113e-07, + "loss": 0.239, + "step": 45520 + }, + { + "epoch": 0.9112629181993344, + "grad_norm": 1.098355770111084, + "learning_rate": 2.0501682276458658e-07, + "loss": 0.2733, + "step": 45521 + }, + { + "epoch": 0.9112829367164628, + "grad_norm": 1.2785236835479736, + "learning_rate": 2.0492495372162967e-07, + "loss": 0.3215, + "step": 45522 + }, + { + "epoch": 0.9113029552335911, + "grad_norm": 1.0421432256698608, + "learning_rate": 2.048331048361357e-07, + "loss": 0.2523, + "step": 45523 + }, + { + "epoch": 0.9113229737507195, + "grad_norm": 1.1512202024459839, + "learning_rate": 2.0474127610849215e-07, + "loss": 0.2875, + "step": 45524 + }, + { + "epoch": 0.9113429922678478, + "grad_norm": 1.1640377044677734, + "learning_rate": 2.0464946753908421e-07, + "loss": 0.3044, + "step": 45525 + }, + { + "epoch": 0.9113630107849761, + "grad_norm": 1.030387282371521, + "learning_rate": 2.0455767912829828e-07, + "loss": 0.2603, + "step": 45526 + }, + { + "epoch": 0.9113830293021045, + "grad_norm": 1.2962442636489868, + "learning_rate": 2.0446591087651845e-07, + "loss": 0.318, + "step": 45527 + }, + { + "epoch": 0.9114030478192328, + "grad_norm": 1.95806086063385, + "learning_rate": 2.0437416278413335e-07, + "loss": 0.7903, + "step": 45528 + }, + { + "epoch": 0.9114230663363612, + "grad_norm": 1.0394548177719116, + "learning_rate": 2.0428243485152655e-07, + "loss": 0.2624, + "step": 45529 + }, + { + "epoch": 0.9114430848534895, + "grad_norm": 1.0851861238479614, + "learning_rate": 2.0419072707908495e-07, + "loss": 0.2553, + "step": 45530 + }, + { + "epoch": 0.9114631033706179, + "grad_norm": 1.072670578956604, + "learning_rate": 2.0409903946719268e-07, + "loss": 0.3192, + "step": 45531 + }, + { + "epoch": 0.9114831218877462, + "grad_norm": 1.1449742317199707, + "learning_rate": 2.040073720162361e-07, + "loss": 0.3012, + "step": 45532 + }, + { + "epoch": 0.9115031404048745, + "grad_norm": 1.9655271768569946, + "learning_rate": 2.039157247266005e-07, + "loss": 0.7566, + "step": 45533 + }, + { + "epoch": 0.9115231589220029, + "grad_norm": 1.0495755672454834, + "learning_rate": 2.0382409759867105e-07, + "loss": 0.2582, + "step": 45534 + }, + { + "epoch": 0.9115431774391312, + "grad_norm": 1.2610106468200684, + "learning_rate": 2.0373249063283363e-07, + "loss": 0.2844, + "step": 45535 + }, + { + "epoch": 0.9115631959562596, + "grad_norm": 1.5777313709259033, + "learning_rate": 2.0364090382947177e-07, + "loss": 0.2736, + "step": 45536 + }, + { + "epoch": 0.9115832144733879, + "grad_norm": 1.0705353021621704, + "learning_rate": 2.0354933718897186e-07, + "loss": 0.2781, + "step": 45537 + }, + { + "epoch": 0.9116032329905163, + "grad_norm": 1.1597278118133545, + "learning_rate": 2.03457790711718e-07, + "loss": 0.2554, + "step": 45538 + }, + { + "epoch": 0.9116232515076446, + "grad_norm": 1.2756024599075317, + "learning_rate": 2.0336626439809604e-07, + "loss": 0.2819, + "step": 45539 + }, + { + "epoch": 0.911643270024773, + "grad_norm": 1.249193549156189, + "learning_rate": 2.0327475824848897e-07, + "loss": 0.3, + "step": 45540 + }, + { + "epoch": 0.9116632885419013, + "grad_norm": 1.1648527383804321, + "learning_rate": 2.0318327226328315e-07, + "loss": 0.2972, + "step": 45541 + }, + { + "epoch": 0.9116833070590296, + "grad_norm": 1.1847025156021118, + "learning_rate": 2.0309180644286275e-07, + "loss": 0.3102, + "step": 45542 + }, + { + "epoch": 0.911703325576158, + "grad_norm": 1.268180251121521, + "learning_rate": 2.0300036078761188e-07, + "loss": 0.3137, + "step": 45543 + }, + { + "epoch": 0.9117233440932863, + "grad_norm": 1.115407943725586, + "learning_rate": 2.0290893529791468e-07, + "loss": 0.3318, + "step": 45544 + }, + { + "epoch": 0.9117433626104147, + "grad_norm": 1.0890388488769531, + "learning_rate": 2.0281752997415694e-07, + "loss": 0.2784, + "step": 45545 + }, + { + "epoch": 0.911763381127543, + "grad_norm": 1.256805181503296, + "learning_rate": 2.0272614481672114e-07, + "loss": 0.2914, + "step": 45546 + }, + { + "epoch": 0.9117833996446714, + "grad_norm": 1.2126530408859253, + "learning_rate": 2.0263477982599255e-07, + "loss": 0.3176, + "step": 45547 + }, + { + "epoch": 0.9118034181617997, + "grad_norm": 1.0071603059768677, + "learning_rate": 2.0254343500235529e-07, + "loss": 0.2533, + "step": 45548 + }, + { + "epoch": 0.911823436678928, + "grad_norm": 1.007006287574768, + "learning_rate": 2.0245211034619184e-07, + "loss": 0.2805, + "step": 45549 + }, + { + "epoch": 0.9118434551960564, + "grad_norm": 1.057705044746399, + "learning_rate": 2.0236080585788852e-07, + "loss": 0.2733, + "step": 45550 + }, + { + "epoch": 0.9118634737131847, + "grad_norm": 1.8050764799118042, + "learning_rate": 2.0226952153782786e-07, + "loss": 0.7278, + "step": 45551 + }, + { + "epoch": 0.9118834922303131, + "grad_norm": 0.9985877871513367, + "learning_rate": 2.021782573863934e-07, + "loss": 0.2683, + "step": 45552 + }, + { + "epoch": 0.9119035107474414, + "grad_norm": 1.8787903785705566, + "learning_rate": 2.0208701340396875e-07, + "loss": 0.7409, + "step": 45553 + }, + { + "epoch": 0.9119235292645698, + "grad_norm": 1.1234419345855713, + "learning_rate": 2.0199578959093747e-07, + "loss": 0.2517, + "step": 45554 + }, + { + "epoch": 0.9119435477816981, + "grad_norm": 1.0695863962173462, + "learning_rate": 2.0190458594768424e-07, + "loss": 0.3401, + "step": 45555 + }, + { + "epoch": 0.9119635662988265, + "grad_norm": 1.1802735328674316, + "learning_rate": 2.0181340247459104e-07, + "loss": 0.3114, + "step": 45556 + }, + { + "epoch": 0.9119835848159548, + "grad_norm": 1.7981353998184204, + "learning_rate": 2.0172223917204137e-07, + "loss": 0.7065, + "step": 45557 + }, + { + "epoch": 0.9120036033330831, + "grad_norm": 1.1394116878509521, + "learning_rate": 2.0163109604041942e-07, + "loss": 0.3021, + "step": 45558 + }, + { + "epoch": 0.9120236218502115, + "grad_norm": 1.0854593515396118, + "learning_rate": 2.0153997308010764e-07, + "loss": 0.3175, + "step": 45559 + }, + { + "epoch": 0.9120436403673398, + "grad_norm": 2.0014777183532715, + "learning_rate": 2.0144887029148908e-07, + "loss": 0.7222, + "step": 45560 + }, + { + "epoch": 0.9120636588844682, + "grad_norm": 1.1722899675369263, + "learning_rate": 2.0135778767494673e-07, + "loss": 0.3105, + "step": 45561 + }, + { + "epoch": 0.9120836774015965, + "grad_norm": 1.204690933227539, + "learning_rate": 2.0126672523086365e-07, + "loss": 0.3047, + "step": 45562 + }, + { + "epoch": 0.9121036959187249, + "grad_norm": 1.372035264968872, + "learning_rate": 2.011756829596223e-07, + "loss": 0.3129, + "step": 45563 + }, + { + "epoch": 0.9121237144358532, + "grad_norm": 0.9573718905448914, + "learning_rate": 2.0108466086160627e-07, + "loss": 0.2543, + "step": 45564 + }, + { + "epoch": 0.9121437329529815, + "grad_norm": 1.907619595527649, + "learning_rate": 2.0099365893719747e-07, + "loss": 0.7302, + "step": 45565 + }, + { + "epoch": 0.9121637514701099, + "grad_norm": 1.0738059282302856, + "learning_rate": 2.0090267718677836e-07, + "loss": 0.3008, + "step": 45566 + }, + { + "epoch": 0.9121837699872382, + "grad_norm": 2.0128462314605713, + "learning_rate": 2.0081171561073144e-07, + "loss": 0.775, + "step": 45567 + }, + { + "epoch": 0.9122037885043666, + "grad_norm": 1.0530998706817627, + "learning_rate": 2.0072077420943968e-07, + "loss": 0.3087, + "step": 45568 + }, + { + "epoch": 0.9122238070214949, + "grad_norm": 1.1508889198303223, + "learning_rate": 2.0062985298328453e-07, + "loss": 0.3057, + "step": 45569 + }, + { + "epoch": 0.9122438255386233, + "grad_norm": 1.283405065536499, + "learning_rate": 2.005389519326484e-07, + "loss": 0.3331, + "step": 45570 + }, + { + "epoch": 0.9122638440557516, + "grad_norm": 1.197701096534729, + "learning_rate": 2.004480710579143e-07, + "loss": 0.3468, + "step": 45571 + }, + { + "epoch": 0.91228386257288, + "grad_norm": 1.3231751918792725, + "learning_rate": 2.003572103594631e-07, + "loss": 0.3018, + "step": 45572 + }, + { + "epoch": 0.9123038810900083, + "grad_norm": 1.056962013244629, + "learning_rate": 2.0026636983767778e-07, + "loss": 0.3111, + "step": 45573 + }, + { + "epoch": 0.9123238996071366, + "grad_norm": 1.8090922832489014, + "learning_rate": 2.001755494929397e-07, + "loss": 0.7544, + "step": 45574 + }, + { + "epoch": 0.912343918124265, + "grad_norm": 1.2338742017745972, + "learning_rate": 2.0008474932563083e-07, + "loss": 0.3417, + "step": 45575 + }, + { + "epoch": 0.9123639366413933, + "grad_norm": 1.153189778327942, + "learning_rate": 1.999939693361319e-07, + "loss": 0.3072, + "step": 45576 + }, + { + "epoch": 0.9123839551585217, + "grad_norm": 1.1568888425827026, + "learning_rate": 1.99903209524826e-07, + "loss": 0.3347, + "step": 45577 + }, + { + "epoch": 0.91240397367565, + "grad_norm": 1.2640608549118042, + "learning_rate": 1.9981246989209446e-07, + "loss": 0.3224, + "step": 45578 + }, + { + "epoch": 0.9124239921927784, + "grad_norm": 1.1115400791168213, + "learning_rate": 1.9972175043831754e-07, + "loss": 0.295, + "step": 45579 + }, + { + "epoch": 0.9124440107099067, + "grad_norm": 1.1295742988586426, + "learning_rate": 1.9963105116387715e-07, + "loss": 0.3244, + "step": 45580 + }, + { + "epoch": 0.912464029227035, + "grad_norm": 1.1932952404022217, + "learning_rate": 1.995403720691552e-07, + "loss": 0.2585, + "step": 45581 + }, + { + "epoch": 0.9124840477441634, + "grad_norm": 1.168219804763794, + "learning_rate": 1.9944971315453255e-07, + "loss": 0.3545, + "step": 45582 + }, + { + "epoch": 0.9125040662612917, + "grad_norm": 1.2054989337921143, + "learning_rate": 1.9935907442038993e-07, + "loss": 0.3147, + "step": 45583 + }, + { + "epoch": 0.9125240847784201, + "grad_norm": 0.9514439702033997, + "learning_rate": 1.9926845586710876e-07, + "loss": 0.2246, + "step": 45584 + }, + { + "epoch": 0.9125441032955484, + "grad_norm": 1.1559457778930664, + "learning_rate": 1.991778574950698e-07, + "loss": 0.3002, + "step": 45585 + }, + { + "epoch": 0.9125641218126768, + "grad_norm": 1.1395187377929688, + "learning_rate": 1.9908727930465387e-07, + "loss": 0.3003, + "step": 45586 + }, + { + "epoch": 0.9125841403298051, + "grad_norm": 1.0419392585754395, + "learning_rate": 1.9899672129624237e-07, + "loss": 0.3007, + "step": 45587 + }, + { + "epoch": 0.9126041588469335, + "grad_norm": 1.0965278148651123, + "learning_rate": 1.9890618347021551e-07, + "loss": 0.2713, + "step": 45588 + }, + { + "epoch": 0.9126241773640618, + "grad_norm": 1.2293736934661865, + "learning_rate": 1.98815665826953e-07, + "loss": 0.2914, + "step": 45589 + }, + { + "epoch": 0.9126441958811901, + "grad_norm": 1.2593142986297607, + "learning_rate": 1.9872516836683676e-07, + "loss": 0.2781, + "step": 45590 + }, + { + "epoch": 0.9126642143983185, + "grad_norm": 1.1615238189697266, + "learning_rate": 1.9863469109024702e-07, + "loss": 0.3139, + "step": 45591 + }, + { + "epoch": 0.9126842329154468, + "grad_norm": 1.9928910732269287, + "learning_rate": 1.9854423399756352e-07, + "loss": 0.7727, + "step": 45592 + }, + { + "epoch": 0.9127042514325752, + "grad_norm": 1.0418397188186646, + "learning_rate": 1.984537970891659e-07, + "loss": 0.2964, + "step": 45593 + }, + { + "epoch": 0.9127242699497035, + "grad_norm": 1.0217143297195435, + "learning_rate": 1.9836338036543613e-07, + "loss": 0.297, + "step": 45594 + }, + { + "epoch": 0.9127442884668319, + "grad_norm": 1.2246005535125732, + "learning_rate": 1.9827298382675332e-07, + "loss": 0.2785, + "step": 45595 + }, + { + "epoch": 0.9127643069839602, + "grad_norm": 1.1163312196731567, + "learning_rate": 1.9818260747349716e-07, + "loss": 0.2568, + "step": 45596 + }, + { + "epoch": 0.9127843255010885, + "grad_norm": 1.061636209487915, + "learning_rate": 1.9809225130604847e-07, + "loss": 0.2759, + "step": 45597 + }, + { + "epoch": 0.9128043440182169, + "grad_norm": 1.9674798250198364, + "learning_rate": 1.9800191532478585e-07, + "loss": 0.7944, + "step": 45598 + }, + { + "epoch": 0.9128243625353452, + "grad_norm": 1.8105216026306152, + "learning_rate": 1.9791159953009064e-07, + "loss": 0.7205, + "step": 45599 + }, + { + "epoch": 0.9128443810524736, + "grad_norm": 1.250091314315796, + "learning_rate": 1.978213039223409e-07, + "loss": 0.3015, + "step": 45600 + }, + { + "epoch": 0.9128643995696019, + "grad_norm": 1.0107442140579224, + "learning_rate": 1.9773102850191793e-07, + "loss": 0.2885, + "step": 45601 + }, + { + "epoch": 0.9128844180867303, + "grad_norm": 1.1039047241210938, + "learning_rate": 1.9764077326919873e-07, + "loss": 0.2861, + "step": 45602 + }, + { + "epoch": 0.9129044366038586, + "grad_norm": 1.1850796937942505, + "learning_rate": 1.975505382245657e-07, + "loss": 0.2503, + "step": 45603 + }, + { + "epoch": 0.912924455120987, + "grad_norm": 1.2968940734863281, + "learning_rate": 1.974603233683964e-07, + "loss": 0.3253, + "step": 45604 + }, + { + "epoch": 0.9129444736381153, + "grad_norm": 1.1367499828338623, + "learning_rate": 1.9737012870106986e-07, + "loss": 0.2444, + "step": 45605 + }, + { + "epoch": 0.9129644921552436, + "grad_norm": 1.15297269821167, + "learning_rate": 1.972799542229664e-07, + "loss": 0.288, + "step": 45606 + }, + { + "epoch": 0.912984510672372, + "grad_norm": 1.1634010076522827, + "learning_rate": 1.9718979993446352e-07, + "loss": 0.3102, + "step": 45607 + }, + { + "epoch": 0.9130045291895003, + "grad_norm": 1.0728679895401, + "learning_rate": 1.97099665835942e-07, + "loss": 0.321, + "step": 45608 + }, + { + "epoch": 0.9130245477066287, + "grad_norm": 1.1309940814971924, + "learning_rate": 1.9700955192777928e-07, + "loss": 0.3042, + "step": 45609 + }, + { + "epoch": 0.913044566223757, + "grad_norm": 1.2038344144821167, + "learning_rate": 1.9691945821035509e-07, + "loss": 0.2808, + "step": 45610 + }, + { + "epoch": 0.9130645847408854, + "grad_norm": 1.1010996103286743, + "learning_rate": 1.9682938468404744e-07, + "loss": 0.2667, + "step": 45611 + }, + { + "epoch": 0.9130846032580137, + "grad_norm": 1.1380774974822998, + "learning_rate": 1.9673933134923606e-07, + "loss": 0.2844, + "step": 45612 + }, + { + "epoch": 0.913104621775142, + "grad_norm": 1.0583786964416504, + "learning_rate": 1.9664929820629898e-07, + "loss": 0.318, + "step": 45613 + }, + { + "epoch": 0.9131246402922704, + "grad_norm": 1.0168023109436035, + "learning_rate": 1.965592852556142e-07, + "loss": 0.2658, + "step": 45614 + }, + { + "epoch": 0.9131446588093987, + "grad_norm": 2.074655532836914, + "learning_rate": 1.9646929249756085e-07, + "loss": 0.6988, + "step": 45615 + }, + { + "epoch": 0.9131646773265271, + "grad_norm": 1.163155436515808, + "learning_rate": 1.9637931993251592e-07, + "loss": 0.3029, + "step": 45616 + }, + { + "epoch": 0.9131846958436554, + "grad_norm": 1.1097568273544312, + "learning_rate": 1.9628936756085958e-07, + "loss": 0.2854, + "step": 45617 + }, + { + "epoch": 0.9132047143607838, + "grad_norm": 1.7406741380691528, + "learning_rate": 1.961994353829688e-07, + "loss": 0.7145, + "step": 45618 + }, + { + "epoch": 0.9132247328779121, + "grad_norm": 1.2678056955337524, + "learning_rate": 1.9610952339922163e-07, + "loss": 0.2678, + "step": 45619 + }, + { + "epoch": 0.9132447513950405, + "grad_norm": 1.0275797843933105, + "learning_rate": 1.9601963160999548e-07, + "loss": 0.2751, + "step": 45620 + }, + { + "epoch": 0.9132647699121688, + "grad_norm": 1.1049449443817139, + "learning_rate": 1.959297600156701e-07, + "loss": 0.2973, + "step": 45621 + }, + { + "epoch": 0.9132847884292971, + "grad_norm": 1.0515080690383911, + "learning_rate": 1.9583990861662127e-07, + "loss": 0.2557, + "step": 45622 + }, + { + "epoch": 0.9133048069464255, + "grad_norm": 1.208556890487671, + "learning_rate": 1.9575007741322815e-07, + "loss": 0.2773, + "step": 45623 + }, + { + "epoch": 0.9133248254635538, + "grad_norm": 1.1172902584075928, + "learning_rate": 1.9566026640586712e-07, + "loss": 0.3318, + "step": 45624 + }, + { + "epoch": 0.9133448439806822, + "grad_norm": 1.1141536235809326, + "learning_rate": 1.9557047559491727e-07, + "loss": 0.3067, + "step": 45625 + }, + { + "epoch": 0.9133648624978105, + "grad_norm": 1.0973222255706787, + "learning_rate": 1.95480704980755e-07, + "loss": 0.2554, + "step": 45626 + }, + { + "epoch": 0.9133848810149389, + "grad_norm": 1.0882967710494995, + "learning_rate": 1.9539095456375834e-07, + "loss": 0.3129, + "step": 45627 + }, + { + "epoch": 0.9134048995320672, + "grad_norm": 1.8824195861816406, + "learning_rate": 1.9530122434430365e-07, + "loss": 0.8168, + "step": 45628 + }, + { + "epoch": 0.9134249180491955, + "grad_norm": 1.0982812643051147, + "learning_rate": 1.9521151432276786e-07, + "loss": 0.2797, + "step": 45629 + }, + { + "epoch": 0.9134449365663239, + "grad_norm": 1.0781699419021606, + "learning_rate": 1.951218244995301e-07, + "loss": 0.2925, + "step": 45630 + }, + { + "epoch": 0.9134649550834522, + "grad_norm": 1.0466840267181396, + "learning_rate": 1.9503215487496563e-07, + "loss": 0.2492, + "step": 45631 + }, + { + "epoch": 0.9134849736005806, + "grad_norm": 1.2749121189117432, + "learning_rate": 1.9494250544945247e-07, + "loss": 0.317, + "step": 45632 + }, + { + "epoch": 0.9135049921177089, + "grad_norm": 1.0324455499649048, + "learning_rate": 1.948528762233659e-07, + "loss": 0.2608, + "step": 45633 + }, + { + "epoch": 0.9135250106348373, + "grad_norm": 1.0260205268859863, + "learning_rate": 1.947632671970845e-07, + "loss": 0.3085, + "step": 45634 + }, + { + "epoch": 0.9135450291519656, + "grad_norm": 1.1250879764556885, + "learning_rate": 1.946736783709835e-07, + "loss": 0.2983, + "step": 45635 + }, + { + "epoch": 0.913565047669094, + "grad_norm": 1.246936559677124, + "learning_rate": 1.945841097454415e-07, + "loss": 0.2806, + "step": 45636 + }, + { + "epoch": 0.9135850661862223, + "grad_norm": 1.2277636528015137, + "learning_rate": 1.9449456132083323e-07, + "loss": 0.296, + "step": 45637 + }, + { + "epoch": 0.9136050847033506, + "grad_norm": 1.201078176498413, + "learning_rate": 1.94405033097535e-07, + "loss": 0.3008, + "step": 45638 + }, + { + "epoch": 0.913625103220479, + "grad_norm": 1.15877103805542, + "learning_rate": 1.9431552507592433e-07, + "loss": 0.3057, + "step": 45639 + }, + { + "epoch": 0.9136451217376073, + "grad_norm": 1.0667107105255127, + "learning_rate": 1.94226037256377e-07, + "loss": 0.2573, + "step": 45640 + }, + { + "epoch": 0.9136651402547357, + "grad_norm": 1.1277071237564087, + "learning_rate": 1.941365696392694e-07, + "loss": 0.2814, + "step": 45641 + }, + { + "epoch": 0.913685158771864, + "grad_norm": 0.9788466095924377, + "learning_rate": 1.9404712222497733e-07, + "loss": 0.2394, + "step": 45642 + }, + { + "epoch": 0.9137051772889924, + "grad_norm": 1.0764120817184448, + "learning_rate": 1.9395769501387662e-07, + "loss": 0.2657, + "step": 45643 + }, + { + "epoch": 0.9137251958061207, + "grad_norm": 1.2666035890579224, + "learning_rate": 1.9386828800634416e-07, + "loss": 0.3105, + "step": 45644 + }, + { + "epoch": 0.913745214323249, + "grad_norm": 1.1263437271118164, + "learning_rate": 1.9377890120275466e-07, + "loss": 0.2778, + "step": 45645 + }, + { + "epoch": 0.9137652328403774, + "grad_norm": 1.0696078538894653, + "learning_rate": 1.936895346034845e-07, + "loss": 0.3324, + "step": 45646 + }, + { + "epoch": 0.9137852513575057, + "grad_norm": 1.2284080982208252, + "learning_rate": 1.936001882089089e-07, + "loss": 0.2677, + "step": 45647 + }, + { + "epoch": 0.9138052698746341, + "grad_norm": 1.0308994054794312, + "learning_rate": 1.9351086201940316e-07, + "loss": 0.3183, + "step": 45648 + }, + { + "epoch": 0.9138252883917624, + "grad_norm": 1.1119571924209595, + "learning_rate": 1.9342155603534417e-07, + "loss": 0.2512, + "step": 45649 + }, + { + "epoch": 0.9138453069088908, + "grad_norm": 1.132161021232605, + "learning_rate": 1.9333227025710722e-07, + "loss": 0.2609, + "step": 45650 + }, + { + "epoch": 0.9138653254260191, + "grad_norm": 1.0777448415756226, + "learning_rate": 1.9324300468506584e-07, + "loss": 0.2631, + "step": 45651 + }, + { + "epoch": 0.9138853439431475, + "grad_norm": 1.1383352279663086, + "learning_rate": 1.93153759319597e-07, + "loss": 0.3088, + "step": 45652 + }, + { + "epoch": 0.9139053624602758, + "grad_norm": 1.0351991653442383, + "learning_rate": 1.930645341610754e-07, + "loss": 0.2984, + "step": 45653 + }, + { + "epoch": 0.9139253809774041, + "grad_norm": 1.010772705078125, + "learning_rate": 1.9297532920987573e-07, + "loss": 0.226, + "step": 45654 + }, + { + "epoch": 0.9139453994945325, + "grad_norm": 1.09443199634552, + "learning_rate": 1.928861444663732e-07, + "loss": 0.2608, + "step": 45655 + }, + { + "epoch": 0.9139654180116608, + "grad_norm": 1.2240840196609497, + "learning_rate": 1.9279697993094316e-07, + "loss": 0.3239, + "step": 45656 + }, + { + "epoch": 0.9139854365287892, + "grad_norm": 1.1427314281463623, + "learning_rate": 1.9270783560396024e-07, + "loss": 0.3019, + "step": 45657 + }, + { + "epoch": 0.9140054550459175, + "grad_norm": 1.8708258867263794, + "learning_rate": 1.9261871148579915e-07, + "loss": 0.7365, + "step": 45658 + }, + { + "epoch": 0.9140254735630459, + "grad_norm": 1.3830243349075317, + "learning_rate": 1.9252960757683404e-07, + "loss": 0.3116, + "step": 45659 + }, + { + "epoch": 0.9140454920801742, + "grad_norm": 1.1527775526046753, + "learning_rate": 1.9244052387743962e-07, + "loss": 0.2868, + "step": 45660 + }, + { + "epoch": 0.9140655105973025, + "grad_norm": 1.9302107095718384, + "learning_rate": 1.9235146038799056e-07, + "loss": 0.7628, + "step": 45661 + }, + { + "epoch": 0.9140855291144309, + "grad_norm": 1.0799015760421753, + "learning_rate": 1.922624171088616e-07, + "loss": 0.2697, + "step": 45662 + }, + { + "epoch": 0.9141055476315592, + "grad_norm": 1.0949509143829346, + "learning_rate": 1.9217339404042746e-07, + "loss": 0.3163, + "step": 45663 + }, + { + "epoch": 0.9141255661486876, + "grad_norm": 1.4360181093215942, + "learning_rate": 1.9208439118306055e-07, + "loss": 0.3226, + "step": 45664 + }, + { + "epoch": 0.9141455846658159, + "grad_norm": 1.089195966720581, + "learning_rate": 1.9199540853713672e-07, + "loss": 0.3109, + "step": 45665 + }, + { + "epoch": 0.9141656031829443, + "grad_norm": 1.1027228832244873, + "learning_rate": 1.919064461030301e-07, + "loss": 0.3107, + "step": 45666 + }, + { + "epoch": 0.9141856217000726, + "grad_norm": 1.0463054180145264, + "learning_rate": 1.9181750388111374e-07, + "loss": 0.2799, + "step": 45667 + }, + { + "epoch": 0.914205640217201, + "grad_norm": 1.2400203943252563, + "learning_rate": 1.917285818717618e-07, + "loss": 0.2878, + "step": 45668 + }, + { + "epoch": 0.9142256587343293, + "grad_norm": 1.1190121173858643, + "learning_rate": 1.9163968007534728e-07, + "loss": 0.2627, + "step": 45669 + }, + { + "epoch": 0.9142456772514576, + "grad_norm": 1.127625584602356, + "learning_rate": 1.91550798492246e-07, + "loss": 0.2878, + "step": 45670 + }, + { + "epoch": 0.914265695768586, + "grad_norm": 1.004067301750183, + "learning_rate": 1.914619371228299e-07, + "loss": 0.2839, + "step": 45671 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 1.168832778930664, + "learning_rate": 1.9137309596747312e-07, + "loss": 0.3036, + "step": 45672 + }, + { + "epoch": 0.9143057328028427, + "grad_norm": 1.040024995803833, + "learning_rate": 1.9128427502654866e-07, + "loss": 0.2844, + "step": 45673 + }, + { + "epoch": 0.914325751319971, + "grad_norm": 1.250359296798706, + "learning_rate": 1.9119547430043018e-07, + "loss": 0.3412, + "step": 45674 + }, + { + "epoch": 0.9143457698370994, + "grad_norm": 0.9940930604934692, + "learning_rate": 1.911066937894912e-07, + "loss": 0.2655, + "step": 45675 + }, + { + "epoch": 0.9143657883542277, + "grad_norm": 1.2119470834732056, + "learning_rate": 1.9101793349410536e-07, + "loss": 0.2822, + "step": 45676 + }, + { + "epoch": 0.914385806871356, + "grad_norm": 1.1260892152786255, + "learning_rate": 1.9092919341464456e-07, + "loss": 0.3022, + "step": 45677 + }, + { + "epoch": 0.9144058253884844, + "grad_norm": 1.113008975982666, + "learning_rate": 1.9084047355148293e-07, + "loss": 0.2514, + "step": 45678 + }, + { + "epoch": 0.9144258439056127, + "grad_norm": 1.3599621057510376, + "learning_rate": 1.9075177390499299e-07, + "loss": 0.2881, + "step": 45679 + }, + { + "epoch": 0.9144458624227411, + "grad_norm": 0.9691912531852722, + "learning_rate": 1.9066309447554776e-07, + "loss": 0.2463, + "step": 45680 + }, + { + "epoch": 0.9144658809398694, + "grad_norm": 1.0817142724990845, + "learning_rate": 1.9057443526351972e-07, + "loss": 0.2843, + "step": 45681 + }, + { + "epoch": 0.9144858994569978, + "grad_norm": 1.0930300951004028, + "learning_rate": 1.9048579626928133e-07, + "loss": 0.2643, + "step": 45682 + }, + { + "epoch": 0.9145059179741261, + "grad_norm": 1.2014617919921875, + "learning_rate": 1.9039717749320618e-07, + "loss": 0.2881, + "step": 45683 + }, + { + "epoch": 0.9145259364912545, + "grad_norm": 1.2548980712890625, + "learning_rate": 1.9030857893566623e-07, + "loss": 0.3291, + "step": 45684 + }, + { + "epoch": 0.9145459550083828, + "grad_norm": 1.2914977073669434, + "learning_rate": 1.9022000059703394e-07, + "loss": 0.3281, + "step": 45685 + }, + { + "epoch": 0.9145659735255111, + "grad_norm": 1.4179227352142334, + "learning_rate": 1.9013144247768067e-07, + "loss": 0.2696, + "step": 45686 + }, + { + "epoch": 0.9145859920426395, + "grad_norm": 1.0588243007659912, + "learning_rate": 1.9004290457798058e-07, + "loss": 0.3059, + "step": 45687 + }, + { + "epoch": 0.9146060105597678, + "grad_norm": 1.0744305849075317, + "learning_rate": 1.8995438689830502e-07, + "loss": 0.2851, + "step": 45688 + }, + { + "epoch": 0.9146260290768962, + "grad_norm": 1.8168127536773682, + "learning_rate": 1.898658894390265e-07, + "loss": 0.7137, + "step": 45689 + }, + { + "epoch": 0.9146460475940245, + "grad_norm": 1.3261795043945312, + "learning_rate": 1.8977741220051637e-07, + "loss": 0.3105, + "step": 45690 + }, + { + "epoch": 0.9146660661111529, + "grad_norm": 1.12502920627594, + "learning_rate": 1.8968895518314601e-07, + "loss": 0.2611, + "step": 45691 + }, + { + "epoch": 0.9146860846282812, + "grad_norm": 1.1238415241241455, + "learning_rate": 1.8960051838728898e-07, + "loss": 0.2846, + "step": 45692 + }, + { + "epoch": 0.9147061031454095, + "grad_norm": 1.074618935585022, + "learning_rate": 1.8951210181331614e-07, + "loss": 0.2635, + "step": 45693 + }, + { + "epoch": 0.9147261216625379, + "grad_norm": 1.2046098709106445, + "learning_rate": 1.8942370546159881e-07, + "loss": 0.3016, + "step": 45694 + }, + { + "epoch": 0.9147461401796662, + "grad_norm": 1.1415811777114868, + "learning_rate": 1.8933532933250897e-07, + "loss": 0.2884, + "step": 45695 + }, + { + "epoch": 0.9147661586967946, + "grad_norm": 1.1539440155029297, + "learning_rate": 1.8924697342641852e-07, + "loss": 0.2572, + "step": 45696 + }, + { + "epoch": 0.9147861772139229, + "grad_norm": 1.06162691116333, + "learning_rate": 1.8915863774369825e-07, + "loss": 0.3312, + "step": 45697 + }, + { + "epoch": 0.9148061957310513, + "grad_norm": 1.1163620948791504, + "learning_rate": 1.890703222847201e-07, + "loss": 0.3019, + "step": 45698 + }, + { + "epoch": 0.9148262142481796, + "grad_norm": 1.1128324270248413, + "learning_rate": 1.8898202704985436e-07, + "loss": 0.2934, + "step": 45699 + }, + { + "epoch": 0.914846232765308, + "grad_norm": 1.1811045408248901, + "learning_rate": 1.888937520394729e-07, + "loss": 0.3281, + "step": 45700 + }, + { + "epoch": 0.9148662512824363, + "grad_norm": 1.1933661699295044, + "learning_rate": 1.8880549725394715e-07, + "loss": 0.2679, + "step": 45701 + }, + { + "epoch": 0.9148862697995646, + "grad_norm": 1.1895774602890015, + "learning_rate": 1.8871726269364789e-07, + "loss": 0.3276, + "step": 45702 + }, + { + "epoch": 0.914906288316693, + "grad_norm": 1.0973875522613525, + "learning_rate": 1.8862904835894536e-07, + "loss": 0.2742, + "step": 45703 + }, + { + "epoch": 0.9149263068338213, + "grad_norm": 0.9558532238006592, + "learning_rate": 1.8854085425021096e-07, + "loss": 0.2099, + "step": 45704 + }, + { + "epoch": 0.9149463253509497, + "grad_norm": 1.0241655111312866, + "learning_rate": 1.884526803678155e-07, + "loss": 0.2207, + "step": 45705 + }, + { + "epoch": 0.914966343868078, + "grad_norm": 1.1324039697647095, + "learning_rate": 1.883645267121298e-07, + "loss": 0.3002, + "step": 45706 + }, + { + "epoch": 0.9149863623852064, + "grad_norm": 1.0755220651626587, + "learning_rate": 1.882763932835241e-07, + "loss": 0.2449, + "step": 45707 + }, + { + "epoch": 0.9150063809023347, + "grad_norm": 1.8015700578689575, + "learning_rate": 1.881882800823681e-07, + "loss": 0.7051, + "step": 45708 + }, + { + "epoch": 0.915026399419463, + "grad_norm": 1.307689905166626, + "learning_rate": 1.8810018710903377e-07, + "loss": 0.3494, + "step": 45709 + }, + { + "epoch": 0.9150464179365914, + "grad_norm": 1.7426759004592896, + "learning_rate": 1.8801211436389077e-07, + "loss": 0.7858, + "step": 45710 + }, + { + "epoch": 0.9150664364537197, + "grad_norm": 1.1319981813430786, + "learning_rate": 1.879240618473094e-07, + "loss": 0.2935, + "step": 45711 + }, + { + "epoch": 0.9150864549708481, + "grad_norm": 1.2150959968566895, + "learning_rate": 1.8783602955965874e-07, + "loss": 0.2919, + "step": 45712 + }, + { + "epoch": 0.9151064734879764, + "grad_norm": 1.117963433265686, + "learning_rate": 1.8774801750131023e-07, + "loss": 0.2727, + "step": 45713 + }, + { + "epoch": 0.9151264920051048, + "grad_norm": 1.1550337076187134, + "learning_rate": 1.876600256726341e-07, + "loss": 0.323, + "step": 45714 + }, + { + "epoch": 0.9151465105222331, + "grad_norm": 0.9607649445533752, + "learning_rate": 1.875720540739995e-07, + "loss": 0.247, + "step": 45715 + }, + { + "epoch": 0.9151665290393614, + "grad_norm": 1.089637279510498, + "learning_rate": 1.874841027057761e-07, + "loss": 0.3115, + "step": 45716 + }, + { + "epoch": 0.9151865475564898, + "grad_norm": 0.9761909246444702, + "learning_rate": 1.8739617156833367e-07, + "loss": 0.2945, + "step": 45717 + }, + { + "epoch": 0.915206566073618, + "grad_norm": 1.0577175617218018, + "learning_rate": 1.8730826066204245e-07, + "loss": 0.2942, + "step": 45718 + }, + { + "epoch": 0.9152265845907465, + "grad_norm": 1.1102293729782104, + "learning_rate": 1.87220369987271e-07, + "loss": 0.2777, + "step": 45719 + }, + { + "epoch": 0.9152466031078748, + "grad_norm": 1.1820317506790161, + "learning_rate": 1.8713249954439016e-07, + "loss": 0.2844, + "step": 45720 + }, + { + "epoch": 0.9152666216250032, + "grad_norm": 1.1916662454605103, + "learning_rate": 1.8704464933376853e-07, + "loss": 0.2916, + "step": 45721 + }, + { + "epoch": 0.9152866401421315, + "grad_norm": 1.1310737133026123, + "learning_rate": 1.869568193557747e-07, + "loss": 0.2614, + "step": 45722 + }, + { + "epoch": 0.9153066586592599, + "grad_norm": 1.0927975177764893, + "learning_rate": 1.8686900961077892e-07, + "loss": 0.2906, + "step": 45723 + }, + { + "epoch": 0.9153266771763882, + "grad_norm": 1.1527442932128906, + "learning_rate": 1.8678122009915033e-07, + "loss": 0.2711, + "step": 45724 + }, + { + "epoch": 0.9153466956935165, + "grad_norm": 1.185950756072998, + "learning_rate": 1.86693450821257e-07, + "loss": 0.3221, + "step": 45725 + }, + { + "epoch": 0.9153667142106449, + "grad_norm": 1.921019196510315, + "learning_rate": 1.8660570177746862e-07, + "loss": 0.7402, + "step": 45726 + }, + { + "epoch": 0.9153867327277732, + "grad_norm": 1.1514755487442017, + "learning_rate": 1.8651797296815432e-07, + "loss": 0.3063, + "step": 45727 + }, + { + "epoch": 0.9154067512449016, + "grad_norm": 1.0512382984161377, + "learning_rate": 1.8643026439368327e-07, + "loss": 0.3045, + "step": 45728 + }, + { + "epoch": 0.9154267697620299, + "grad_norm": 1.1078723669052124, + "learning_rate": 1.8634257605442297e-07, + "loss": 0.2976, + "step": 45729 + }, + { + "epoch": 0.9154467882791583, + "grad_norm": 1.0992927551269531, + "learning_rate": 1.8625490795074253e-07, + "loss": 0.3163, + "step": 45730 + }, + { + "epoch": 0.9154668067962866, + "grad_norm": 1.194284439086914, + "learning_rate": 1.8616726008301002e-07, + "loss": 0.3542, + "step": 45731 + }, + { + "epoch": 0.9154868253134149, + "grad_norm": 1.133138656616211, + "learning_rate": 1.860796324515951e-07, + "loss": 0.2529, + "step": 45732 + }, + { + "epoch": 0.9155068438305433, + "grad_norm": 1.1654820442199707, + "learning_rate": 1.8599202505686532e-07, + "loss": 0.3243, + "step": 45733 + }, + { + "epoch": 0.9155268623476716, + "grad_norm": 1.0578421354293823, + "learning_rate": 1.859044378991892e-07, + "loss": 0.2557, + "step": 45734 + }, + { + "epoch": 0.9155468808648, + "grad_norm": 1.1135609149932861, + "learning_rate": 1.858168709789343e-07, + "loss": 0.3387, + "step": 45735 + }, + { + "epoch": 0.9155668993819283, + "grad_norm": 1.1796354055404663, + "learning_rate": 1.8572932429646973e-07, + "loss": 0.3192, + "step": 45736 + }, + { + "epoch": 0.9155869178990567, + "grad_norm": 1.1556987762451172, + "learning_rate": 1.8564179785216297e-07, + "loss": 0.3064, + "step": 45737 + }, + { + "epoch": 0.915606936416185, + "grad_norm": 1.216835379600525, + "learning_rate": 1.855542916463815e-07, + "loss": 0.3188, + "step": 45738 + }, + { + "epoch": 0.9156269549333134, + "grad_norm": 1.1241395473480225, + "learning_rate": 1.854668056794934e-07, + "loss": 0.2632, + "step": 45739 + }, + { + "epoch": 0.9156469734504417, + "grad_norm": 1.1195091009140015, + "learning_rate": 1.8537933995186775e-07, + "loss": 0.2509, + "step": 45740 + }, + { + "epoch": 0.91566699196757, + "grad_norm": 1.1309200525283813, + "learning_rate": 1.8529189446387098e-07, + "loss": 0.2983, + "step": 45741 + }, + { + "epoch": 0.9156870104846984, + "grad_norm": 1.2772836685180664, + "learning_rate": 1.852044692158711e-07, + "loss": 0.3012, + "step": 45742 + }, + { + "epoch": 0.9157070290018267, + "grad_norm": 1.9566336870193481, + "learning_rate": 1.851170642082356e-07, + "loss": 0.758, + "step": 45743 + }, + { + "epoch": 0.9157270475189551, + "grad_norm": 1.1429532766342163, + "learning_rate": 1.8502967944133144e-07, + "loss": 0.2894, + "step": 45744 + }, + { + "epoch": 0.9157470660360834, + "grad_norm": 1.1425760984420776, + "learning_rate": 1.8494231491552606e-07, + "loss": 0.2685, + "step": 45745 + }, + { + "epoch": 0.9157670845532118, + "grad_norm": 1.1134915351867676, + "learning_rate": 1.848549706311875e-07, + "loss": 0.2983, + "step": 45746 + }, + { + "epoch": 0.9157871030703401, + "grad_norm": 1.9685779809951782, + "learning_rate": 1.8476764658868273e-07, + "loss": 0.7217, + "step": 45747 + }, + { + "epoch": 0.9158071215874684, + "grad_norm": 1.754305362701416, + "learning_rate": 1.8468034278837754e-07, + "loss": 0.7904, + "step": 45748 + }, + { + "epoch": 0.9158271401045968, + "grad_norm": 1.1002246141433716, + "learning_rate": 1.8459305923064053e-07, + "loss": 0.2939, + "step": 45749 + }, + { + "epoch": 0.915847158621725, + "grad_norm": 1.9075332880020142, + "learning_rate": 1.8450579591583806e-07, + "loss": 0.7317, + "step": 45750 + }, + { + "epoch": 0.9158671771388535, + "grad_norm": 1.0869710445404053, + "learning_rate": 1.8441855284433596e-07, + "loss": 0.3571, + "step": 45751 + }, + { + "epoch": 0.9158871956559818, + "grad_norm": 1.2231639623641968, + "learning_rate": 1.8433133001650284e-07, + "loss": 0.2437, + "step": 45752 + }, + { + "epoch": 0.9159072141731102, + "grad_norm": 0.9924407005310059, + "learning_rate": 1.8424412743270393e-07, + "loss": 0.3286, + "step": 45753 + }, + { + "epoch": 0.9159272326902385, + "grad_norm": 1.0928370952606201, + "learning_rate": 1.8415694509330728e-07, + "loss": 0.3106, + "step": 45754 + }, + { + "epoch": 0.9159472512073669, + "grad_norm": 1.1282211542129517, + "learning_rate": 1.840697829986776e-07, + "loss": 0.321, + "step": 45755 + }, + { + "epoch": 0.9159672697244952, + "grad_norm": 1.0544557571411133, + "learning_rate": 1.8398264114918297e-07, + "loss": 0.3282, + "step": 45756 + }, + { + "epoch": 0.9159872882416235, + "grad_norm": 1.0997754335403442, + "learning_rate": 1.8389551954518802e-07, + "loss": 0.3097, + "step": 45757 + }, + { + "epoch": 0.9160073067587519, + "grad_norm": 1.2080119848251343, + "learning_rate": 1.838084181870603e-07, + "loss": 0.2608, + "step": 45758 + }, + { + "epoch": 0.9160273252758802, + "grad_norm": 1.1454256772994995, + "learning_rate": 1.8372133707516558e-07, + "loss": 0.3178, + "step": 45759 + }, + { + "epoch": 0.9160473437930086, + "grad_norm": 1.2325853109359741, + "learning_rate": 1.8363427620986973e-07, + "loss": 0.3339, + "step": 45760 + }, + { + "epoch": 0.9160673623101369, + "grad_norm": 1.1740778684616089, + "learning_rate": 1.835472355915391e-07, + "loss": 0.2691, + "step": 45761 + }, + { + "epoch": 0.9160873808272653, + "grad_norm": 1.2218881845474243, + "learning_rate": 1.8346021522053891e-07, + "loss": 0.3258, + "step": 45762 + }, + { + "epoch": 0.9161073993443936, + "grad_norm": 1.0760390758514404, + "learning_rate": 1.8337321509723615e-07, + "loss": 0.2895, + "step": 45763 + }, + { + "epoch": 0.9161274178615219, + "grad_norm": 1.0391536951065063, + "learning_rate": 1.8328623522199494e-07, + "loss": 0.3018, + "step": 45764 + }, + { + "epoch": 0.9161474363786503, + "grad_norm": 1.1108319759368896, + "learning_rate": 1.8319927559518225e-07, + "loss": 0.2988, + "step": 45765 + }, + { + "epoch": 0.9161674548957786, + "grad_norm": 1.1743022203445435, + "learning_rate": 1.8311233621716274e-07, + "loss": 0.2792, + "step": 45766 + }, + { + "epoch": 0.916187473412907, + "grad_norm": 0.9493387341499329, + "learning_rate": 1.8302541708830336e-07, + "loss": 0.2859, + "step": 45767 + }, + { + "epoch": 0.9162074919300353, + "grad_norm": 1.1671174764633179, + "learning_rate": 1.8293851820896824e-07, + "loss": 0.3321, + "step": 45768 + }, + { + "epoch": 0.9162275104471637, + "grad_norm": 1.272980809211731, + "learning_rate": 1.8285163957952267e-07, + "loss": 0.2596, + "step": 45769 + }, + { + "epoch": 0.916247528964292, + "grad_norm": 1.1051898002624512, + "learning_rate": 1.8276478120033193e-07, + "loss": 0.3077, + "step": 45770 + }, + { + "epoch": 0.9162675474814204, + "grad_norm": 1.1216020584106445, + "learning_rate": 1.826779430717618e-07, + "loss": 0.3071, + "step": 45771 + }, + { + "epoch": 0.9162875659985487, + "grad_norm": 1.1158462762832642, + "learning_rate": 1.8259112519417698e-07, + "loss": 0.2947, + "step": 45772 + }, + { + "epoch": 0.916307584515677, + "grad_norm": 1.0781009197235107, + "learning_rate": 1.8250432756794223e-07, + "loss": 0.2729, + "step": 45773 + }, + { + "epoch": 0.9163276030328054, + "grad_norm": 1.0732485055923462, + "learning_rate": 1.8241755019342277e-07, + "loss": 0.291, + "step": 45774 + }, + { + "epoch": 0.9163476215499337, + "grad_norm": 1.1594635248184204, + "learning_rate": 1.8233079307098277e-07, + "loss": 0.3123, + "step": 45775 + }, + { + "epoch": 0.9163676400670621, + "grad_norm": 1.0554364919662476, + "learning_rate": 1.8224405620098752e-07, + "loss": 0.2634, + "step": 45776 + }, + { + "epoch": 0.9163876585841904, + "grad_norm": 1.1260031461715698, + "learning_rate": 1.8215733958380056e-07, + "loss": 0.3157, + "step": 45777 + }, + { + "epoch": 0.9164076771013188, + "grad_norm": 1.8995565176010132, + "learning_rate": 1.8207064321978884e-07, + "loss": 0.7188, + "step": 45778 + }, + { + "epoch": 0.9164276956184471, + "grad_norm": 1.0988202095031738, + "learning_rate": 1.8198396710931377e-07, + "loss": 0.2794, + "step": 45779 + }, + { + "epoch": 0.9164477141355754, + "grad_norm": 2.0259106159210205, + "learning_rate": 1.8189731125274224e-07, + "loss": 0.73, + "step": 45780 + }, + { + "epoch": 0.9164677326527038, + "grad_norm": 1.1785907745361328, + "learning_rate": 1.8181067565043787e-07, + "loss": 0.2855, + "step": 45781 + }, + { + "epoch": 0.916487751169832, + "grad_norm": 1.1252490282058716, + "learning_rate": 1.8172406030276425e-07, + "loss": 0.2468, + "step": 45782 + }, + { + "epoch": 0.9165077696869605, + "grad_norm": 1.0936610698699951, + "learning_rate": 1.8163746521008553e-07, + "loss": 0.3144, + "step": 45783 + }, + { + "epoch": 0.9165277882040888, + "grad_norm": 1.1660926342010498, + "learning_rate": 1.8155089037276587e-07, + "loss": 0.3176, + "step": 45784 + }, + { + "epoch": 0.9165478067212172, + "grad_norm": 1.923211932182312, + "learning_rate": 1.814643357911694e-07, + "loss": 0.7187, + "step": 45785 + }, + { + "epoch": 0.9165678252383455, + "grad_norm": 1.0548219680786133, + "learning_rate": 1.813778014656603e-07, + "loss": 0.2963, + "step": 45786 + }, + { + "epoch": 0.9165878437554739, + "grad_norm": 1.7292059659957886, + "learning_rate": 1.8129128739660217e-07, + "loss": 0.6953, + "step": 45787 + }, + { + "epoch": 0.9166078622726022, + "grad_norm": 1.083051085472107, + "learning_rate": 1.812047935843575e-07, + "loss": 0.3061, + "step": 45788 + }, + { + "epoch": 0.9166278807897305, + "grad_norm": 1.1422247886657715, + "learning_rate": 1.8111832002929153e-07, + "loss": 0.2863, + "step": 45789 + }, + { + "epoch": 0.9166478993068589, + "grad_norm": 1.1053053140640259, + "learning_rate": 1.810318667317662e-07, + "loss": 0.3017, + "step": 45790 + }, + { + "epoch": 0.9166679178239872, + "grad_norm": 1.043148159980774, + "learning_rate": 1.809454336921468e-07, + "loss": 0.2763, + "step": 45791 + }, + { + "epoch": 0.9166879363411156, + "grad_norm": 1.0273007154464722, + "learning_rate": 1.8085902091079577e-07, + "loss": 0.2609, + "step": 45792 + }, + { + "epoch": 0.9167079548582439, + "grad_norm": 1.1073588132858276, + "learning_rate": 1.8077262838807564e-07, + "loss": 0.3079, + "step": 45793 + }, + { + "epoch": 0.9167279733753723, + "grad_norm": 0.9677736163139343, + "learning_rate": 1.8068625612435053e-07, + "loss": 0.2157, + "step": 45794 + }, + { + "epoch": 0.9167479918925006, + "grad_norm": 1.2251837253570557, + "learning_rate": 1.8059990411998353e-07, + "loss": 0.286, + "step": 45795 + }, + { + "epoch": 0.9167680104096289, + "grad_norm": 1.0855889320373535, + "learning_rate": 1.8051357237533762e-07, + "loss": 0.3136, + "step": 45796 + }, + { + "epoch": 0.9167880289267573, + "grad_norm": 1.166052222251892, + "learning_rate": 1.8042726089077478e-07, + "loss": 0.3005, + "step": 45797 + }, + { + "epoch": 0.9168080474438856, + "grad_norm": 1.4621336460113525, + "learning_rate": 1.8034096966665914e-07, + "loss": 0.2833, + "step": 45798 + }, + { + "epoch": 0.916828065961014, + "grad_norm": 1.0651013851165771, + "learning_rate": 1.8025469870335267e-07, + "loss": 0.2659, + "step": 45799 + }, + { + "epoch": 0.9168480844781423, + "grad_norm": 1.0358763933181763, + "learning_rate": 1.8016844800121836e-07, + "loss": 0.2563, + "step": 45800 + }, + { + "epoch": 0.9168681029952707, + "grad_norm": 1.062985897064209, + "learning_rate": 1.8008221756061816e-07, + "loss": 0.2775, + "step": 45801 + }, + { + "epoch": 0.916888121512399, + "grad_norm": 1.0705316066741943, + "learning_rate": 1.7999600738191569e-07, + "loss": 0.3347, + "step": 45802 + }, + { + "epoch": 0.9169081400295274, + "grad_norm": 1.3284481763839722, + "learning_rate": 1.7990981746547176e-07, + "loss": 0.3193, + "step": 45803 + }, + { + "epoch": 0.9169281585466557, + "grad_norm": 1.1935017108917236, + "learning_rate": 1.7982364781165053e-07, + "loss": 0.3509, + "step": 45804 + }, + { + "epoch": 0.916948177063784, + "grad_norm": 1.1028554439544678, + "learning_rate": 1.7973749842081335e-07, + "loss": 0.2836, + "step": 45805 + }, + { + "epoch": 0.9169681955809124, + "grad_norm": 1.148008942604065, + "learning_rate": 1.7965136929332162e-07, + "loss": 0.3155, + "step": 45806 + }, + { + "epoch": 0.9169882140980407, + "grad_norm": 1.9321141242980957, + "learning_rate": 1.7956526042953893e-07, + "loss": 0.7802, + "step": 45807 + }, + { + "epoch": 0.9170082326151691, + "grad_norm": 1.273065209388733, + "learning_rate": 1.7947917182982666e-07, + "loss": 0.3461, + "step": 45808 + }, + { + "epoch": 0.9170282511322974, + "grad_norm": 1.0090407133102417, + "learning_rate": 1.793931034945462e-07, + "loss": 0.273, + "step": 45809 + }, + { + "epoch": 0.9170482696494258, + "grad_norm": 1.0762310028076172, + "learning_rate": 1.7930705542405947e-07, + "loss": 0.2527, + "step": 45810 + }, + { + "epoch": 0.9170682881665541, + "grad_norm": 1.0831362009048462, + "learning_rate": 1.7922102761872896e-07, + "loss": 0.2577, + "step": 45811 + }, + { + "epoch": 0.9170883066836824, + "grad_norm": 1.1083818674087524, + "learning_rate": 1.7913502007891604e-07, + "loss": 0.2731, + "step": 45812 + }, + { + "epoch": 0.9171083252008108, + "grad_norm": 1.1081591844558716, + "learning_rate": 1.7904903280498153e-07, + "loss": 0.3133, + "step": 45813 + }, + { + "epoch": 0.917128343717939, + "grad_norm": 1.0363410711288452, + "learning_rate": 1.7896306579728684e-07, + "loss": 0.2501, + "step": 45814 + }, + { + "epoch": 0.9171483622350675, + "grad_norm": 1.1526437997817993, + "learning_rate": 1.7887711905619497e-07, + "loss": 0.2676, + "step": 45815 + }, + { + "epoch": 0.9171683807521958, + "grad_norm": 0.956182062625885, + "learning_rate": 1.7879119258206512e-07, + "loss": 0.2921, + "step": 45816 + }, + { + "epoch": 0.9171883992693242, + "grad_norm": 1.1476532220840454, + "learning_rate": 1.787052863752603e-07, + "loss": 0.3158, + "step": 45817 + }, + { + "epoch": 0.9172084177864525, + "grad_norm": 1.1508655548095703, + "learning_rate": 1.7861940043614136e-07, + "loss": 0.2984, + "step": 45818 + }, + { + "epoch": 0.9172284363035809, + "grad_norm": 1.0538685321807861, + "learning_rate": 1.7853353476506796e-07, + "loss": 0.2792, + "step": 45819 + }, + { + "epoch": 0.9172484548207092, + "grad_norm": 1.283413290977478, + "learning_rate": 1.7844768936240264e-07, + "loss": 0.2891, + "step": 45820 + }, + { + "epoch": 0.9172684733378375, + "grad_norm": 1.052093505859375, + "learning_rate": 1.7836186422850509e-07, + "loss": 0.3196, + "step": 45821 + }, + { + "epoch": 0.9172884918549659, + "grad_norm": 1.1342476606369019, + "learning_rate": 1.7827605936373726e-07, + "loss": 0.3171, + "step": 45822 + }, + { + "epoch": 0.9173085103720942, + "grad_norm": 1.129760980606079, + "learning_rate": 1.781902747684583e-07, + "loss": 0.2873, + "step": 45823 + }, + { + "epoch": 0.9173285288892226, + "grad_norm": 1.1494086980819702, + "learning_rate": 1.7810451044303067e-07, + "loss": 0.3084, + "step": 45824 + }, + { + "epoch": 0.9173485474063509, + "grad_norm": 1.0855518579483032, + "learning_rate": 1.7801876638781356e-07, + "loss": 0.2606, + "step": 45825 + }, + { + "epoch": 0.9173685659234793, + "grad_norm": 1.0817128419876099, + "learning_rate": 1.7793304260316835e-07, + "loss": 0.2944, + "step": 45826 + }, + { + "epoch": 0.9173885844406076, + "grad_norm": 1.0482196807861328, + "learning_rate": 1.7784733908945417e-07, + "loss": 0.2506, + "step": 45827 + }, + { + "epoch": 0.9174086029577359, + "grad_norm": 0.9932364225387573, + "learning_rate": 1.777616558470324e-07, + "loss": 0.2892, + "step": 45828 + }, + { + "epoch": 0.9174286214748643, + "grad_norm": 1.8595407009124756, + "learning_rate": 1.7767599287626226e-07, + "loss": 0.7768, + "step": 45829 + }, + { + "epoch": 0.9174486399919926, + "grad_norm": 1.0871676206588745, + "learning_rate": 1.775903501775056e-07, + "loss": 0.2704, + "step": 45830 + }, + { + "epoch": 0.917468658509121, + "grad_norm": 1.1808964014053345, + "learning_rate": 1.7750472775112048e-07, + "loss": 0.2818, + "step": 45831 + }, + { + "epoch": 0.9174886770262493, + "grad_norm": 1.0952543020248413, + "learning_rate": 1.7741912559746778e-07, + "loss": 0.2638, + "step": 45832 + }, + { + "epoch": 0.9175086955433777, + "grad_norm": 1.1410448551177979, + "learning_rate": 1.773335437169077e-07, + "loss": 0.2994, + "step": 45833 + }, + { + "epoch": 0.917528714060506, + "grad_norm": 1.137434959411621, + "learning_rate": 1.7724798210979944e-07, + "loss": 0.3075, + "step": 45834 + }, + { + "epoch": 0.9175487325776344, + "grad_norm": 1.295447587966919, + "learning_rate": 1.7716244077650267e-07, + "loss": 0.2989, + "step": 45835 + }, + { + "epoch": 0.9175687510947627, + "grad_norm": 1.1298424005508423, + "learning_rate": 1.7707691971737718e-07, + "loss": 0.2962, + "step": 45836 + }, + { + "epoch": 0.917588769611891, + "grad_norm": 1.099031686782837, + "learning_rate": 1.7699141893278204e-07, + "loss": 0.3051, + "step": 45837 + }, + { + "epoch": 0.9176087881290194, + "grad_norm": 1.2114911079406738, + "learning_rate": 1.7690593842307757e-07, + "loss": 0.3611, + "step": 45838 + }, + { + "epoch": 0.9176288066461477, + "grad_norm": 1.063836932182312, + "learning_rate": 1.7682047818862236e-07, + "loss": 0.2919, + "step": 45839 + }, + { + "epoch": 0.9176488251632761, + "grad_norm": 0.9588640928268433, + "learning_rate": 1.7673503822977556e-07, + "loss": 0.2616, + "step": 45840 + }, + { + "epoch": 0.9176688436804044, + "grad_norm": 1.0846213102340698, + "learning_rate": 1.7664961854689688e-07, + "loss": 0.2825, + "step": 45841 + }, + { + "epoch": 0.9176888621975328, + "grad_norm": 1.0380586385726929, + "learning_rate": 1.7656421914034493e-07, + "loss": 0.258, + "step": 45842 + }, + { + "epoch": 0.9177088807146611, + "grad_norm": 1.1078959703445435, + "learning_rate": 1.7647884001048e-07, + "loss": 0.309, + "step": 45843 + }, + { + "epoch": 0.9177288992317894, + "grad_norm": 1.1115802526474, + "learning_rate": 1.7639348115765952e-07, + "loss": 0.3235, + "step": 45844 + }, + { + "epoch": 0.9177489177489178, + "grad_norm": 1.9254602193832397, + "learning_rate": 1.7630814258224272e-07, + "loss": 0.7436, + "step": 45845 + }, + { + "epoch": 0.917768936266046, + "grad_norm": 1.0498285293579102, + "learning_rate": 1.7622282428458758e-07, + "loss": 0.2923, + "step": 45846 + }, + { + "epoch": 0.9177889547831745, + "grad_norm": 1.1446053981781006, + "learning_rate": 1.76137526265055e-07, + "loss": 0.2984, + "step": 45847 + }, + { + "epoch": 0.9178089733003028, + "grad_norm": 1.1301496028900146, + "learning_rate": 1.7605224852400126e-07, + "loss": 0.2593, + "step": 45848 + }, + { + "epoch": 0.9178289918174312, + "grad_norm": 1.1870638132095337, + "learning_rate": 1.7596699106178615e-07, + "loss": 0.3223, + "step": 45849 + }, + { + "epoch": 0.9178490103345595, + "grad_norm": 1.8860033750534058, + "learning_rate": 1.7588175387876716e-07, + "loss": 0.7462, + "step": 45850 + }, + { + "epoch": 0.9178690288516879, + "grad_norm": 2.0673933029174805, + "learning_rate": 1.7579653697530397e-07, + "loss": 0.7533, + "step": 45851 + }, + { + "epoch": 0.9178890473688162, + "grad_norm": 1.1550605297088623, + "learning_rate": 1.7571134035175353e-07, + "loss": 0.318, + "step": 45852 + }, + { + "epoch": 0.9179090658859445, + "grad_norm": 1.0742615461349487, + "learning_rate": 1.7562616400847388e-07, + "loss": 0.2644, + "step": 45853 + }, + { + "epoch": 0.9179290844030729, + "grad_norm": 1.8045638799667358, + "learning_rate": 1.7554100794582417e-07, + "loss": 0.7442, + "step": 45854 + }, + { + "epoch": 0.9179491029202012, + "grad_norm": 1.8721224069595337, + "learning_rate": 1.7545587216416138e-07, + "loss": 0.7142, + "step": 45855 + }, + { + "epoch": 0.9179691214373296, + "grad_norm": 1.9087384939193726, + "learning_rate": 1.7537075666384463e-07, + "loss": 0.7687, + "step": 45856 + }, + { + "epoch": 0.9179891399544579, + "grad_norm": 1.3524130582809448, + "learning_rate": 1.7528566144523084e-07, + "loss": 0.3348, + "step": 45857 + }, + { + "epoch": 0.9180091584715863, + "grad_norm": 1.2009451389312744, + "learning_rate": 1.7520058650867755e-07, + "loss": 0.3446, + "step": 45858 + }, + { + "epoch": 0.9180291769887146, + "grad_norm": 1.1736667156219482, + "learning_rate": 1.7511553185454222e-07, + "loss": 0.3158, + "step": 45859 + }, + { + "epoch": 0.9180491955058429, + "grad_norm": 1.147814154624939, + "learning_rate": 1.7503049748318346e-07, + "loss": 0.3108, + "step": 45860 + }, + { + "epoch": 0.9180692140229713, + "grad_norm": 1.4618256092071533, + "learning_rate": 1.7494548339495822e-07, + "loss": 0.2962, + "step": 45861 + }, + { + "epoch": 0.9180892325400996, + "grad_norm": 1.1065360307693481, + "learning_rate": 1.7486048959022396e-07, + "loss": 0.2841, + "step": 45862 + }, + { + "epoch": 0.918109251057228, + "grad_norm": 0.9872549772262573, + "learning_rate": 1.7477551606933652e-07, + "loss": 0.2661, + "step": 45863 + }, + { + "epoch": 0.9181292695743563, + "grad_norm": 1.0764027833938599, + "learning_rate": 1.746905628326556e-07, + "loss": 0.2617, + "step": 45864 + }, + { + "epoch": 0.9181492880914847, + "grad_norm": 1.1704891920089722, + "learning_rate": 1.746056298805371e-07, + "loss": 0.3479, + "step": 45865 + }, + { + "epoch": 0.918169306608613, + "grad_norm": 1.0306795835494995, + "learning_rate": 1.745207172133373e-07, + "loss": 0.3105, + "step": 45866 + }, + { + "epoch": 0.9181893251257414, + "grad_norm": 1.021086573600769, + "learning_rate": 1.7443582483141486e-07, + "loss": 0.3137, + "step": 45867 + }, + { + "epoch": 0.9182093436428697, + "grad_norm": 1.1550602912902832, + "learning_rate": 1.7435095273512503e-07, + "loss": 0.2938, + "step": 45868 + }, + { + "epoch": 0.918229362159998, + "grad_norm": 0.9835066795349121, + "learning_rate": 1.7426610092482588e-07, + "loss": 0.2751, + "step": 45869 + }, + { + "epoch": 0.9182493806771264, + "grad_norm": 1.1078404188156128, + "learning_rate": 1.7418126940087376e-07, + "loss": 0.3056, + "step": 45870 + }, + { + "epoch": 0.9182693991942547, + "grad_norm": 1.7529926300048828, + "learning_rate": 1.7409645816362452e-07, + "loss": 0.7205, + "step": 45871 + }, + { + "epoch": 0.9182894177113831, + "grad_norm": 1.0904430150985718, + "learning_rate": 1.740116672134351e-07, + "loss": 0.3555, + "step": 45872 + }, + { + "epoch": 0.9183094362285114, + "grad_norm": 0.9979493618011475, + "learning_rate": 1.739268965506624e-07, + "loss": 0.2772, + "step": 45873 + }, + { + "epoch": 0.9183294547456398, + "grad_norm": 1.1435356140136719, + "learning_rate": 1.738421461756623e-07, + "loss": 0.3153, + "step": 45874 + }, + { + "epoch": 0.9183494732627681, + "grad_norm": 1.0496033430099487, + "learning_rate": 1.737574160887917e-07, + "loss": 0.2953, + "step": 45875 + }, + { + "epoch": 0.9183694917798964, + "grad_norm": 1.1206810474395752, + "learning_rate": 1.736727062904059e-07, + "loss": 0.3133, + "step": 45876 + }, + { + "epoch": 0.9183895102970248, + "grad_norm": 1.1553820371627808, + "learning_rate": 1.7358801678086068e-07, + "loss": 0.2484, + "step": 45877 + }, + { + "epoch": 0.918409528814153, + "grad_norm": 1.078431487083435, + "learning_rate": 1.7350334756051413e-07, + "loss": 0.2972, + "step": 45878 + }, + { + "epoch": 0.9184295473312815, + "grad_norm": 1.424530267715454, + "learning_rate": 1.7341869862971928e-07, + "loss": 0.3542, + "step": 45879 + }, + { + "epoch": 0.9184495658484098, + "grad_norm": 1.1522910594940186, + "learning_rate": 1.7333406998883473e-07, + "loss": 0.3171, + "step": 45880 + }, + { + "epoch": 0.9184695843655382, + "grad_norm": 1.203560709953308, + "learning_rate": 1.7324946163821465e-07, + "loss": 0.3326, + "step": 45881 + }, + { + "epoch": 0.9184896028826665, + "grad_norm": 1.218279480934143, + "learning_rate": 1.7316487357821488e-07, + "loss": 0.32, + "step": 45882 + }, + { + "epoch": 0.9185096213997949, + "grad_norm": 1.2564750909805298, + "learning_rate": 1.7308030580919178e-07, + "loss": 0.2816, + "step": 45883 + }, + { + "epoch": 0.9185296399169232, + "grad_norm": 1.0483382940292358, + "learning_rate": 1.7299575833150063e-07, + "loss": 0.266, + "step": 45884 + }, + { + "epoch": 0.9185496584340515, + "grad_norm": 1.0136027336120605, + "learning_rate": 1.72911231145495e-07, + "loss": 0.2938, + "step": 45885 + }, + { + "epoch": 0.9185696769511799, + "grad_norm": 1.1131269931793213, + "learning_rate": 1.7282672425153302e-07, + "loss": 0.2855, + "step": 45886 + }, + { + "epoch": 0.9185896954683082, + "grad_norm": 1.1430515050888062, + "learning_rate": 1.7274223764996879e-07, + "loss": 0.2582, + "step": 45887 + }, + { + "epoch": 0.9186097139854366, + "grad_norm": 1.2060011625289917, + "learning_rate": 1.7265777134115702e-07, + "loss": 0.3255, + "step": 45888 + }, + { + "epoch": 0.9186297325025649, + "grad_norm": 1.1440787315368652, + "learning_rate": 1.7257332532545301e-07, + "loss": 0.2786, + "step": 45889 + }, + { + "epoch": 0.9186497510196933, + "grad_norm": 1.1049004793167114, + "learning_rate": 1.7248889960321148e-07, + "loss": 0.3719, + "step": 45890 + }, + { + "epoch": 0.9186697695368216, + "grad_norm": 1.0067005157470703, + "learning_rate": 1.7240449417478877e-07, + "loss": 0.2659, + "step": 45891 + }, + { + "epoch": 0.9186897880539499, + "grad_norm": 1.1656293869018555, + "learning_rate": 1.7232010904053743e-07, + "loss": 0.3155, + "step": 45892 + }, + { + "epoch": 0.9187098065710783, + "grad_norm": 1.1245009899139404, + "learning_rate": 1.7223574420081434e-07, + "loss": 0.3115, + "step": 45893 + }, + { + "epoch": 0.9187298250882066, + "grad_norm": 1.152315616607666, + "learning_rate": 1.7215139965597205e-07, + "loss": 0.3213, + "step": 45894 + }, + { + "epoch": 0.918749843605335, + "grad_norm": 1.225813627243042, + "learning_rate": 1.7206707540636748e-07, + "loss": 0.2659, + "step": 45895 + }, + { + "epoch": 0.9187698621224633, + "grad_norm": 1.092580795288086, + "learning_rate": 1.7198277145235366e-07, + "loss": 0.2534, + "step": 45896 + }, + { + "epoch": 0.9187898806395917, + "grad_norm": 1.0965991020202637, + "learning_rate": 1.7189848779428586e-07, + "loss": 0.2818, + "step": 45897 + }, + { + "epoch": 0.91880989915672, + "grad_norm": 1.2449157238006592, + "learning_rate": 1.7181422443251718e-07, + "loss": 0.3142, + "step": 45898 + }, + { + "epoch": 0.9188299176738484, + "grad_norm": 1.0988720655441284, + "learning_rate": 1.7172998136740226e-07, + "loss": 0.2921, + "step": 45899 + }, + { + "epoch": 0.9188499361909767, + "grad_norm": 1.1489980220794678, + "learning_rate": 1.716457585992959e-07, + "loss": 0.2665, + "step": 45900 + }, + { + "epoch": 0.918869954708105, + "grad_norm": 1.0896426439285278, + "learning_rate": 1.7156155612855108e-07, + "loss": 0.3168, + "step": 45901 + }, + { + "epoch": 0.9188899732252334, + "grad_norm": 1.157909870147705, + "learning_rate": 1.714773739555231e-07, + "loss": 0.3288, + "step": 45902 + }, + { + "epoch": 0.9189099917423617, + "grad_norm": 1.2581770420074463, + "learning_rate": 1.7139321208056449e-07, + "loss": 0.3066, + "step": 45903 + }, + { + "epoch": 0.9189300102594901, + "grad_norm": 1.0411226749420166, + "learning_rate": 1.713090705040299e-07, + "loss": 0.3313, + "step": 45904 + }, + { + "epoch": 0.9189500287766184, + "grad_norm": 1.0698022842407227, + "learning_rate": 1.7122494922627297e-07, + "loss": 0.2994, + "step": 45905 + }, + { + "epoch": 0.9189700472937468, + "grad_norm": 1.1455745697021484, + "learning_rate": 1.7114084824764733e-07, + "loss": 0.3041, + "step": 45906 + }, + { + "epoch": 0.9189900658108751, + "grad_norm": 1.148835301399231, + "learning_rate": 1.7105676756850598e-07, + "loss": 0.2854, + "step": 45907 + }, + { + "epoch": 0.9190100843280034, + "grad_norm": 1.1150017976760864, + "learning_rate": 1.7097270718920257e-07, + "loss": 0.3077, + "step": 45908 + }, + { + "epoch": 0.9190301028451318, + "grad_norm": 1.1402273178100586, + "learning_rate": 1.7088866711009123e-07, + "loss": 0.301, + "step": 45909 + }, + { + "epoch": 0.91905012136226, + "grad_norm": 1.1742630004882812, + "learning_rate": 1.7080464733152503e-07, + "loss": 0.307, + "step": 45910 + }, + { + "epoch": 0.9190701398793885, + "grad_norm": 1.3398218154907227, + "learning_rate": 1.7072064785385646e-07, + "loss": 0.333, + "step": 45911 + }, + { + "epoch": 0.9190901583965168, + "grad_norm": 1.11098051071167, + "learning_rate": 1.7063666867743856e-07, + "loss": 0.2697, + "step": 45912 + }, + { + "epoch": 0.9191101769136452, + "grad_norm": 1.1401820182800293, + "learning_rate": 1.705527098026255e-07, + "loss": 0.2779, + "step": 45913 + }, + { + "epoch": 0.9191301954307735, + "grad_norm": 1.2177796363830566, + "learning_rate": 1.7046877122976923e-07, + "loss": 0.3307, + "step": 45914 + }, + { + "epoch": 0.9191502139479019, + "grad_norm": 1.054758906364441, + "learning_rate": 1.703848529592228e-07, + "loss": 0.3535, + "step": 45915 + }, + { + "epoch": 0.9191702324650302, + "grad_norm": 1.1127808094024658, + "learning_rate": 1.7030095499133925e-07, + "loss": 0.299, + "step": 45916 + }, + { + "epoch": 0.9191902509821585, + "grad_norm": 1.8263579607009888, + "learning_rate": 1.7021707732647107e-07, + "loss": 0.7273, + "step": 45917 + }, + { + "epoch": 0.9192102694992869, + "grad_norm": 1.730924367904663, + "learning_rate": 1.7013321996497022e-07, + "loss": 0.736, + "step": 45918 + }, + { + "epoch": 0.9192302880164152, + "grad_norm": 1.8406950235366821, + "learning_rate": 1.7004938290719087e-07, + "loss": 0.6963, + "step": 45919 + }, + { + "epoch": 0.9192503065335436, + "grad_norm": 1.0901472568511963, + "learning_rate": 1.6996556615348437e-07, + "loss": 0.2594, + "step": 45920 + }, + { + "epoch": 0.9192703250506719, + "grad_norm": 1.0699348449707031, + "learning_rate": 1.6988176970420267e-07, + "loss": 0.2773, + "step": 45921 + }, + { + "epoch": 0.9192903435678003, + "grad_norm": 1.8151273727416992, + "learning_rate": 1.6979799355969938e-07, + "loss": 0.735, + "step": 45922 + }, + { + "epoch": 0.9193103620849286, + "grad_norm": 1.3964951038360596, + "learning_rate": 1.6971423772032536e-07, + "loss": 0.3581, + "step": 45923 + }, + { + "epoch": 0.9193303806020569, + "grad_norm": 1.0269811153411865, + "learning_rate": 1.6963050218643363e-07, + "loss": 0.2966, + "step": 45924 + }, + { + "epoch": 0.9193503991191853, + "grad_norm": 1.244611382484436, + "learning_rate": 1.69546786958375e-07, + "loss": 0.2971, + "step": 45925 + }, + { + "epoch": 0.9193704176363136, + "grad_norm": 1.171271562576294, + "learning_rate": 1.6946309203650257e-07, + "loss": 0.3129, + "step": 45926 + }, + { + "epoch": 0.919390436153442, + "grad_norm": 1.1229599714279175, + "learning_rate": 1.6937941742116826e-07, + "loss": 0.2937, + "step": 45927 + }, + { + "epoch": 0.9194104546705703, + "grad_norm": 1.0721958875656128, + "learning_rate": 1.6929576311272288e-07, + "loss": 0.3057, + "step": 45928 + }, + { + "epoch": 0.9194304731876987, + "grad_norm": 1.178397536277771, + "learning_rate": 1.6921212911151785e-07, + "loss": 0.2701, + "step": 45929 + }, + { + "epoch": 0.919450491704827, + "grad_norm": 0.9676401019096375, + "learning_rate": 1.691285154179062e-07, + "loss": 0.2372, + "step": 45930 + }, + { + "epoch": 0.9194705102219554, + "grad_norm": 0.9888320565223694, + "learning_rate": 1.690449220322382e-07, + "loss": 0.2804, + "step": 45931 + }, + { + "epoch": 0.9194905287390837, + "grad_norm": 1.0856685638427734, + "learning_rate": 1.6896134895486637e-07, + "loss": 0.3231, + "step": 45932 + }, + { + "epoch": 0.919510547256212, + "grad_norm": 1.0547010898590088, + "learning_rate": 1.6887779618614098e-07, + "loss": 0.2941, + "step": 45933 + }, + { + "epoch": 0.9195305657733404, + "grad_norm": 1.954025149345398, + "learning_rate": 1.687942637264134e-07, + "loss": 0.7406, + "step": 45934 + }, + { + "epoch": 0.9195505842904687, + "grad_norm": 1.1758171319961548, + "learning_rate": 1.6871075157603556e-07, + "loss": 0.3302, + "step": 45935 + }, + { + "epoch": 0.9195706028075971, + "grad_norm": 1.0816826820373535, + "learning_rate": 1.6862725973535777e-07, + "loss": 0.2876, + "step": 45936 + }, + { + "epoch": 0.9195906213247254, + "grad_norm": 1.2059613466262817, + "learning_rate": 1.6854378820473139e-07, + "loss": 0.3452, + "step": 45937 + }, + { + "epoch": 0.9196106398418538, + "grad_norm": 1.1528905630111694, + "learning_rate": 1.6846033698450726e-07, + "loss": 0.3073, + "step": 45938 + }, + { + "epoch": 0.9196306583589821, + "grad_norm": 1.0748649835586548, + "learning_rate": 1.6837690607503566e-07, + "loss": 0.2919, + "step": 45939 + }, + { + "epoch": 0.9196506768761104, + "grad_norm": 1.0928338766098022, + "learning_rate": 1.682934954766685e-07, + "loss": 0.2816, + "step": 45940 + }, + { + "epoch": 0.9196706953932388, + "grad_norm": 0.9854532480239868, + "learning_rate": 1.68210105189755e-07, + "loss": 0.315, + "step": 45941 + }, + { + "epoch": 0.919690713910367, + "grad_norm": 1.0222547054290771, + "learning_rate": 1.681267352146465e-07, + "loss": 0.2659, + "step": 45942 + }, + { + "epoch": 0.9197107324274955, + "grad_norm": 1.1535325050354004, + "learning_rate": 1.6804338555169385e-07, + "loss": 0.2522, + "step": 45943 + }, + { + "epoch": 0.9197307509446238, + "grad_norm": 1.3017569780349731, + "learning_rate": 1.6796005620124623e-07, + "loss": 0.3123, + "step": 45944 + }, + { + "epoch": 0.9197507694617522, + "grad_norm": 1.0618008375167847, + "learning_rate": 1.67876747163655e-07, + "loss": 0.2764, + "step": 45945 + }, + { + "epoch": 0.9197707879788805, + "grad_norm": 1.122038722038269, + "learning_rate": 1.677934584392704e-07, + "loss": 0.2972, + "step": 45946 + }, + { + "epoch": 0.9197908064960089, + "grad_norm": 1.173250675201416, + "learning_rate": 1.6771019002844168e-07, + "loss": 0.3102, + "step": 45947 + }, + { + "epoch": 0.9198108250131372, + "grad_norm": 0.9600649476051331, + "learning_rate": 1.6762694193151963e-07, + "loss": 0.2775, + "step": 45948 + }, + { + "epoch": 0.9198308435302655, + "grad_norm": 1.037049412727356, + "learning_rate": 1.6754371414885395e-07, + "loss": 0.2764, + "step": 45949 + }, + { + "epoch": 0.9198508620473939, + "grad_norm": 1.0785176753997803, + "learning_rate": 1.6746050668079495e-07, + "loss": 0.2936, + "step": 45950 + }, + { + "epoch": 0.9198708805645222, + "grad_norm": 1.2581411600112915, + "learning_rate": 1.6737731952769176e-07, + "loss": 0.3057, + "step": 45951 + }, + { + "epoch": 0.9198908990816506, + "grad_norm": 1.105582356452942, + "learning_rate": 1.6729415268989358e-07, + "loss": 0.2554, + "step": 45952 + }, + { + "epoch": 0.9199109175987789, + "grad_norm": 1.0736082792282104, + "learning_rate": 1.6721100616775177e-07, + "loss": 0.2889, + "step": 45953 + }, + { + "epoch": 0.9199309361159073, + "grad_norm": 1.1415716409683228, + "learning_rate": 1.671278799616144e-07, + "loss": 0.3229, + "step": 45954 + }, + { + "epoch": 0.9199509546330356, + "grad_norm": 1.1642214059829712, + "learning_rate": 1.6704477407183117e-07, + "loss": 0.2809, + "step": 45955 + }, + { + "epoch": 0.9199709731501639, + "grad_norm": 1.0231571197509766, + "learning_rate": 1.669616884987524e-07, + "loss": 0.2822, + "step": 45956 + }, + { + "epoch": 0.9199909916672923, + "grad_norm": 1.0616631507873535, + "learning_rate": 1.6687862324272553e-07, + "loss": 0.2811, + "step": 45957 + }, + { + "epoch": 0.9200110101844206, + "grad_norm": 1.097044587135315, + "learning_rate": 1.6679557830410144e-07, + "loss": 0.2991, + "step": 45958 + }, + { + "epoch": 0.920031028701549, + "grad_norm": 0.9492030739784241, + "learning_rate": 1.667125536832287e-07, + "loss": 0.3166, + "step": 45959 + }, + { + "epoch": 0.9200510472186773, + "grad_norm": 1.3368974924087524, + "learning_rate": 1.666295493804565e-07, + "loss": 0.3107, + "step": 45960 + }, + { + "epoch": 0.9200710657358057, + "grad_norm": 1.1314669847488403, + "learning_rate": 1.665465653961329e-07, + "loss": 0.2803, + "step": 45961 + }, + { + "epoch": 0.920091084252934, + "grad_norm": 1.1908624172210693, + "learning_rate": 1.6646360173060762e-07, + "loss": 0.2956, + "step": 45962 + }, + { + "epoch": 0.9201111027700624, + "grad_norm": 1.3229039907455444, + "learning_rate": 1.6638065838422978e-07, + "loss": 0.316, + "step": 45963 + }, + { + "epoch": 0.9201311212871907, + "grad_norm": 1.0850419998168945, + "learning_rate": 1.6629773535734694e-07, + "loss": 0.3355, + "step": 45964 + }, + { + "epoch": 0.920151139804319, + "grad_norm": 1.046620488166809, + "learning_rate": 1.662148326503077e-07, + "loss": 0.2488, + "step": 45965 + }, + { + "epoch": 0.9201711583214474, + "grad_norm": 1.8040608167648315, + "learning_rate": 1.6613195026346174e-07, + "loss": 0.7683, + "step": 45966 + }, + { + "epoch": 0.9201911768385757, + "grad_norm": 1.1061079502105713, + "learning_rate": 1.660490881971566e-07, + "loss": 0.2406, + "step": 45967 + }, + { + "epoch": 0.9202111953557041, + "grad_norm": 1.0404108762741089, + "learning_rate": 1.659662464517403e-07, + "loss": 0.2822, + "step": 45968 + }, + { + "epoch": 0.9202312138728324, + "grad_norm": 1.0992059707641602, + "learning_rate": 1.6588342502756204e-07, + "loss": 0.2934, + "step": 45969 + }, + { + "epoch": 0.9202512323899608, + "grad_norm": 1.1466858386993408, + "learning_rate": 1.6580062392496932e-07, + "loss": 0.2944, + "step": 45970 + }, + { + "epoch": 0.9202712509070891, + "grad_norm": 1.0955758094787598, + "learning_rate": 1.657178431443107e-07, + "loss": 0.3138, + "step": 45971 + }, + { + "epoch": 0.9202912694242174, + "grad_norm": 1.8147586584091187, + "learning_rate": 1.6563508268593432e-07, + "loss": 0.8012, + "step": 45972 + }, + { + "epoch": 0.9203112879413458, + "grad_norm": 1.04817533493042, + "learning_rate": 1.655523425501876e-07, + "loss": 0.3355, + "step": 45973 + }, + { + "epoch": 0.920331306458474, + "grad_norm": 1.084166169166565, + "learning_rate": 1.654696227374175e-07, + "loss": 0.2715, + "step": 45974 + }, + { + "epoch": 0.9203513249756025, + "grad_norm": 1.1077736616134644, + "learning_rate": 1.6538692324797377e-07, + "loss": 0.2739, + "step": 45975 + }, + { + "epoch": 0.9203713434927308, + "grad_norm": 1.0664795637130737, + "learning_rate": 1.653042440822028e-07, + "loss": 0.2823, + "step": 45976 + }, + { + "epoch": 0.9203913620098592, + "grad_norm": 1.1344821453094482, + "learning_rate": 1.6522158524045206e-07, + "loss": 0.2933, + "step": 45977 + }, + { + "epoch": 0.9204113805269875, + "grad_norm": 1.2043317556381226, + "learning_rate": 1.6513894672306906e-07, + "loss": 0.2801, + "step": 45978 + }, + { + "epoch": 0.9204313990441159, + "grad_norm": 1.272667646408081, + "learning_rate": 1.6505632853040188e-07, + "loss": 0.3504, + "step": 45979 + }, + { + "epoch": 0.9204514175612442, + "grad_norm": 1.0638400316238403, + "learning_rate": 1.6497373066279744e-07, + "loss": 0.3251, + "step": 45980 + }, + { + "epoch": 0.9204714360783725, + "grad_norm": 1.0857231616973877, + "learning_rate": 1.648911531206021e-07, + "loss": 0.3392, + "step": 45981 + }, + { + "epoch": 0.9204914545955009, + "grad_norm": 1.1075222492218018, + "learning_rate": 1.648085959041651e-07, + "loss": 0.2996, + "step": 45982 + }, + { + "epoch": 0.9205114731126292, + "grad_norm": 1.180023431777954, + "learning_rate": 1.6472605901383109e-07, + "loss": 0.2736, + "step": 45983 + }, + { + "epoch": 0.9205314916297576, + "grad_norm": 1.1643751859664917, + "learning_rate": 1.6464354244994873e-07, + "loss": 0.284, + "step": 45984 + }, + { + "epoch": 0.9205515101468859, + "grad_norm": 1.1332062482833862, + "learning_rate": 1.645610462128644e-07, + "loss": 0.3164, + "step": 45985 + }, + { + "epoch": 0.9205715286640143, + "grad_norm": 1.8864165544509888, + "learning_rate": 1.6447857030292503e-07, + "loss": 0.7253, + "step": 45986 + }, + { + "epoch": 0.9205915471811426, + "grad_norm": 1.3002413511276245, + "learning_rate": 1.6439611472047646e-07, + "loss": 0.24, + "step": 45987 + }, + { + "epoch": 0.9206115656982709, + "grad_norm": 1.3261483907699585, + "learning_rate": 1.6431367946586674e-07, + "loss": 0.3055, + "step": 45988 + }, + { + "epoch": 0.9206315842153993, + "grad_norm": 1.1628646850585938, + "learning_rate": 1.6423126453944115e-07, + "loss": 0.3132, + "step": 45989 + }, + { + "epoch": 0.9206516027325276, + "grad_norm": 1.1823866367340088, + "learning_rate": 1.641488699415472e-07, + "loss": 0.263, + "step": 45990 + }, + { + "epoch": 0.920671621249656, + "grad_norm": 1.1323292255401611, + "learning_rate": 1.640664956725302e-07, + "loss": 0.2895, + "step": 45991 + }, + { + "epoch": 0.9206916397667843, + "grad_norm": 1.3120487928390503, + "learning_rate": 1.6398414173273702e-07, + "loss": 0.2776, + "step": 45992 + }, + { + "epoch": 0.9207116582839127, + "grad_norm": 1.1277897357940674, + "learning_rate": 1.6390180812251355e-07, + "loss": 0.2705, + "step": 45993 + }, + { + "epoch": 0.920731676801041, + "grad_norm": 1.1271452903747559, + "learning_rate": 1.6381949484220617e-07, + "loss": 0.3068, + "step": 45994 + }, + { + "epoch": 0.9207516953181694, + "grad_norm": 1.0736562013626099, + "learning_rate": 1.6373720189216124e-07, + "loss": 0.2928, + "step": 45995 + }, + { + "epoch": 0.9207717138352977, + "grad_norm": 1.1663541793823242, + "learning_rate": 1.636549292727241e-07, + "loss": 0.3015, + "step": 45996 + }, + { + "epoch": 0.920791732352426, + "grad_norm": 0.9785362482070923, + "learning_rate": 1.6357267698424106e-07, + "loss": 0.2672, + "step": 45997 + }, + { + "epoch": 0.9208117508695544, + "grad_norm": 1.067569375038147, + "learning_rate": 1.6349044502705748e-07, + "loss": 0.3004, + "step": 45998 + }, + { + "epoch": 0.9208317693866827, + "grad_norm": 1.0222889184951782, + "learning_rate": 1.634082334015197e-07, + "loss": 0.2998, + "step": 45999 + }, + { + "epoch": 0.9208517879038111, + "grad_norm": 2.3148763179779053, + "learning_rate": 1.6332604210797188e-07, + "loss": 0.272, + "step": 46000 + }, + { + "epoch": 0.9208718064209394, + "grad_norm": 1.2692214250564575, + "learning_rate": 1.6324387114676155e-07, + "loss": 0.2884, + "step": 46001 + }, + { + "epoch": 0.9208918249380678, + "grad_norm": 1.0897972583770752, + "learning_rate": 1.6316172051823287e-07, + "loss": 0.3613, + "step": 46002 + }, + { + "epoch": 0.920911843455196, + "grad_norm": 1.1904321908950806, + "learning_rate": 1.6307959022273168e-07, + "loss": 0.2696, + "step": 46003 + }, + { + "epoch": 0.9209318619723244, + "grad_norm": 1.2479506731033325, + "learning_rate": 1.629974802606027e-07, + "loss": 0.2849, + "step": 46004 + }, + { + "epoch": 0.9209518804894528, + "grad_norm": 1.221642255783081, + "learning_rate": 1.6291539063219065e-07, + "loss": 0.2441, + "step": 46005 + }, + { + "epoch": 0.920971899006581, + "grad_norm": 1.8948469161987305, + "learning_rate": 1.628333213378419e-07, + "loss": 0.6699, + "step": 46006 + }, + { + "epoch": 0.9209919175237095, + "grad_norm": 1.1710216999053955, + "learning_rate": 1.6275127237790066e-07, + "loss": 0.2836, + "step": 46007 + }, + { + "epoch": 0.9210119360408378, + "grad_norm": 1.12979257106781, + "learning_rate": 1.626692437527122e-07, + "loss": 0.265, + "step": 46008 + }, + { + "epoch": 0.9210319545579662, + "grad_norm": 1.1493669748306274, + "learning_rate": 1.6258723546262124e-07, + "loss": 0.2833, + "step": 46009 + }, + { + "epoch": 0.9210519730750945, + "grad_norm": 1.3005353212356567, + "learning_rate": 1.6250524750797247e-07, + "loss": 0.2769, + "step": 46010 + }, + { + "epoch": 0.9210719915922229, + "grad_norm": 2.0156900882720947, + "learning_rate": 1.6242327988911122e-07, + "loss": 0.7423, + "step": 46011 + }, + { + "epoch": 0.9210920101093512, + "grad_norm": 1.0522278547286987, + "learning_rate": 1.6234133260638106e-07, + "loss": 0.2476, + "step": 46012 + }, + { + "epoch": 0.9211120286264795, + "grad_norm": 1.1465767621994019, + "learning_rate": 1.6225940566012733e-07, + "loss": 0.2863, + "step": 46013 + }, + { + "epoch": 0.9211320471436079, + "grad_norm": 1.26909601688385, + "learning_rate": 1.62177499050693e-07, + "loss": 0.3303, + "step": 46014 + }, + { + "epoch": 0.9211520656607362, + "grad_norm": 1.0571142435073853, + "learning_rate": 1.6209561277842346e-07, + "loss": 0.2692, + "step": 46015 + }, + { + "epoch": 0.9211720841778646, + "grad_norm": 1.3369433879852295, + "learning_rate": 1.6201374684366333e-07, + "loss": 0.3192, + "step": 46016 + }, + { + "epoch": 0.9211921026949929, + "grad_norm": 1.2198654413223267, + "learning_rate": 1.619319012467563e-07, + "loss": 0.3342, + "step": 46017 + }, + { + "epoch": 0.9212121212121213, + "grad_norm": 1.1460403203964233, + "learning_rate": 1.6185007598804592e-07, + "loss": 0.3266, + "step": 46018 + }, + { + "epoch": 0.9212321397292496, + "grad_norm": 1.2239148616790771, + "learning_rate": 1.6176827106787696e-07, + "loss": 0.2532, + "step": 46019 + }, + { + "epoch": 0.9212521582463779, + "grad_norm": 1.0163854360580444, + "learning_rate": 1.6168648648659247e-07, + "loss": 0.2704, + "step": 46020 + }, + { + "epoch": 0.9212721767635063, + "grad_norm": 1.1478246450424194, + "learning_rate": 1.6160472224453772e-07, + "loss": 0.2975, + "step": 46021 + }, + { + "epoch": 0.9212921952806346, + "grad_norm": 1.3801798820495605, + "learning_rate": 1.615229783420552e-07, + "loss": 0.2991, + "step": 46022 + }, + { + "epoch": 0.921312213797763, + "grad_norm": 1.1608847379684448, + "learning_rate": 1.6144125477948802e-07, + "loss": 0.249, + "step": 46023 + }, + { + "epoch": 0.9213322323148913, + "grad_norm": 1.1904993057250977, + "learning_rate": 1.6135955155718142e-07, + "loss": 0.301, + "step": 46024 + }, + { + "epoch": 0.9213522508320197, + "grad_norm": 1.144263744354248, + "learning_rate": 1.612778686754779e-07, + "loss": 0.2625, + "step": 46025 + }, + { + "epoch": 0.921372269349148, + "grad_norm": 1.244123935699463, + "learning_rate": 1.6119620613472108e-07, + "loss": 0.2506, + "step": 46026 + }, + { + "epoch": 0.9213922878662764, + "grad_norm": 1.1587802171707153, + "learning_rate": 1.6111456393525294e-07, + "loss": 0.323, + "step": 46027 + }, + { + "epoch": 0.9214123063834047, + "grad_norm": 1.0933327674865723, + "learning_rate": 1.6103294207741872e-07, + "loss": 0.3149, + "step": 46028 + }, + { + "epoch": 0.921432324900533, + "grad_norm": 1.0874203443527222, + "learning_rate": 1.609513405615609e-07, + "loss": 0.3064, + "step": 46029 + }, + { + "epoch": 0.9214523434176614, + "grad_norm": 1.1654361486434937, + "learning_rate": 1.6086975938802207e-07, + "loss": 0.3148, + "step": 46030 + }, + { + "epoch": 0.9214723619347897, + "grad_norm": 1.2590121030807495, + "learning_rate": 1.6078819855714468e-07, + "loss": 0.2575, + "step": 46031 + }, + { + "epoch": 0.9214923804519181, + "grad_norm": 1.1243480443954468, + "learning_rate": 1.6070665806927344e-07, + "loss": 0.3048, + "step": 46032 + }, + { + "epoch": 0.9215123989690464, + "grad_norm": 1.0667307376861572, + "learning_rate": 1.6062513792474866e-07, + "loss": 0.2587, + "step": 46033 + }, + { + "epoch": 0.9215324174861748, + "grad_norm": 1.1897265911102295, + "learning_rate": 1.605436381239156e-07, + "loss": 0.3037, + "step": 46034 + }, + { + "epoch": 0.921552436003303, + "grad_norm": 1.0912193059921265, + "learning_rate": 1.6046215866711512e-07, + "loss": 0.2877, + "step": 46035 + }, + { + "epoch": 0.9215724545204314, + "grad_norm": 1.26504647731781, + "learning_rate": 1.6038069955468972e-07, + "loss": 0.2943, + "step": 46036 + }, + { + "epoch": 0.9215924730375598, + "grad_norm": 1.0959516763687134, + "learning_rate": 1.6029926078698298e-07, + "loss": 0.3096, + "step": 46037 + }, + { + "epoch": 0.921612491554688, + "grad_norm": 1.196108341217041, + "learning_rate": 1.602178423643369e-07, + "loss": 0.3731, + "step": 46038 + }, + { + "epoch": 0.9216325100718165, + "grad_norm": 1.1608829498291016, + "learning_rate": 1.601364442870934e-07, + "loss": 0.2914, + "step": 46039 + }, + { + "epoch": 0.9216525285889448, + "grad_norm": 1.1228710412979126, + "learning_rate": 1.6005506655559389e-07, + "loss": 0.298, + "step": 46040 + }, + { + "epoch": 0.9216725471060732, + "grad_norm": 1.2032549381256104, + "learning_rate": 1.5997370917018195e-07, + "loss": 0.2737, + "step": 46041 + }, + { + "epoch": 0.9216925656232015, + "grad_norm": 1.1238640546798706, + "learning_rate": 1.59892372131199e-07, + "loss": 0.2664, + "step": 46042 + }, + { + "epoch": 0.9217125841403299, + "grad_norm": 2.1031932830810547, + "learning_rate": 1.5981105543898702e-07, + "loss": 0.7381, + "step": 46043 + }, + { + "epoch": 0.9217326026574582, + "grad_norm": 1.0589284896850586, + "learning_rate": 1.5972975909388787e-07, + "loss": 0.3104, + "step": 46044 + }, + { + "epoch": 0.9217526211745865, + "grad_norm": 1.0715694427490234, + "learning_rate": 1.5964848309624193e-07, + "loss": 0.2704, + "step": 46045 + }, + { + "epoch": 0.9217726396917149, + "grad_norm": 1.1513739824295044, + "learning_rate": 1.5956722744639273e-07, + "loss": 0.319, + "step": 46046 + }, + { + "epoch": 0.9217926582088432, + "grad_norm": 1.170754313468933, + "learning_rate": 1.5948599214468175e-07, + "loss": 0.2961, + "step": 46047 + }, + { + "epoch": 0.9218126767259716, + "grad_norm": 1.0577088594436646, + "learning_rate": 1.5940477719144975e-07, + "loss": 0.2688, + "step": 46048 + }, + { + "epoch": 0.9218326952430999, + "grad_norm": 1.892126441001892, + "learning_rate": 1.5932358258703817e-07, + "loss": 0.6619, + "step": 46049 + }, + { + "epoch": 0.9218527137602283, + "grad_norm": 1.1206316947937012, + "learning_rate": 1.5924240833178894e-07, + "loss": 0.2822, + "step": 46050 + }, + { + "epoch": 0.9218727322773566, + "grad_norm": 1.0373239517211914, + "learning_rate": 1.5916125442604235e-07, + "loss": 0.2819, + "step": 46051 + }, + { + "epoch": 0.9218927507944848, + "grad_norm": 1.1481724977493286, + "learning_rate": 1.590801208701409e-07, + "loss": 0.2955, + "step": 46052 + }, + { + "epoch": 0.9219127693116133, + "grad_norm": 1.0944570302963257, + "learning_rate": 1.5899900766442432e-07, + "loss": 0.2962, + "step": 46053 + }, + { + "epoch": 0.9219327878287416, + "grad_norm": 1.05928635597229, + "learning_rate": 1.5891791480923347e-07, + "loss": 0.2925, + "step": 46054 + }, + { + "epoch": 0.92195280634587, + "grad_norm": 2.1934616565704346, + "learning_rate": 1.588368423049108e-07, + "loss": 0.7308, + "step": 46055 + }, + { + "epoch": 0.9219728248629983, + "grad_norm": 1.2004985809326172, + "learning_rate": 1.587557901517961e-07, + "loss": 0.2626, + "step": 46056 + }, + { + "epoch": 0.9219928433801267, + "grad_norm": 1.1181854009628296, + "learning_rate": 1.5867475835023016e-07, + "loss": 0.3216, + "step": 46057 + }, + { + "epoch": 0.922012861897255, + "grad_norm": 2.130485773086548, + "learning_rate": 1.5859374690055328e-07, + "loss": 0.7738, + "step": 46058 + }, + { + "epoch": 0.9220328804143832, + "grad_norm": 1.1564873456954956, + "learning_rate": 1.5851275580310632e-07, + "loss": 0.2983, + "step": 46059 + }, + { + "epoch": 0.9220528989315117, + "grad_norm": 1.150425910949707, + "learning_rate": 1.584317850582301e-07, + "loss": 0.2855, + "step": 46060 + }, + { + "epoch": 0.92207291744864, + "grad_norm": 1.1171482801437378, + "learning_rate": 1.5835083466626544e-07, + "loss": 0.2835, + "step": 46061 + }, + { + "epoch": 0.9220929359657684, + "grad_norm": 1.0993505716323853, + "learning_rate": 1.5826990462755098e-07, + "loss": 0.2609, + "step": 46062 + }, + { + "epoch": 0.9221129544828967, + "grad_norm": 1.1396254301071167, + "learning_rate": 1.5818899494242812e-07, + "loss": 0.3249, + "step": 46063 + }, + { + "epoch": 0.9221329730000251, + "grad_norm": 1.0419883728027344, + "learning_rate": 1.5810810561123712e-07, + "loss": 0.309, + "step": 46064 + }, + { + "epoch": 0.9221529915171534, + "grad_norm": 1.032854676246643, + "learning_rate": 1.5802723663431775e-07, + "loss": 0.2843, + "step": 46065 + }, + { + "epoch": 0.9221730100342818, + "grad_norm": 1.0805416107177734, + "learning_rate": 1.5794638801200966e-07, + "loss": 0.2979, + "step": 46066 + }, + { + "epoch": 0.92219302855141, + "grad_norm": 1.227967381477356, + "learning_rate": 1.578655597446521e-07, + "loss": 0.2449, + "step": 46067 + }, + { + "epoch": 0.9222130470685383, + "grad_norm": 1.1760503053665161, + "learning_rate": 1.5778475183258646e-07, + "loss": 0.3191, + "step": 46068 + }, + { + "epoch": 0.9222330655856668, + "grad_norm": 1.8696125745773315, + "learning_rate": 1.5770396427615187e-07, + "loss": 0.7441, + "step": 46069 + }, + { + "epoch": 0.922253084102795, + "grad_norm": 1.2510840892791748, + "learning_rate": 1.576231970756875e-07, + "loss": 0.2497, + "step": 46070 + }, + { + "epoch": 0.9222731026199235, + "grad_norm": 1.1346737146377563, + "learning_rate": 1.5754245023153313e-07, + "loss": 0.3385, + "step": 46071 + }, + { + "epoch": 0.9222931211370518, + "grad_norm": 1.0298187732696533, + "learning_rate": 1.5746172374402735e-07, + "loss": 0.3077, + "step": 46072 + }, + { + "epoch": 0.9223131396541802, + "grad_norm": 1.355005145072937, + "learning_rate": 1.5738101761351156e-07, + "loss": 0.272, + "step": 46073 + }, + { + "epoch": 0.9223331581713085, + "grad_norm": 1.1630724668502808, + "learning_rate": 1.5730033184032378e-07, + "loss": 0.3036, + "step": 46074 + }, + { + "epoch": 0.9223531766884367, + "grad_norm": 1.2022037506103516, + "learning_rate": 1.5721966642480267e-07, + "loss": 0.3363, + "step": 46075 + }, + { + "epoch": 0.9223731952055652, + "grad_norm": 1.0963115692138672, + "learning_rate": 1.5713902136728798e-07, + "loss": 0.2908, + "step": 46076 + }, + { + "epoch": 0.9223932137226935, + "grad_norm": 1.2751835584640503, + "learning_rate": 1.5705839666811885e-07, + "loss": 0.3211, + "step": 46077 + }, + { + "epoch": 0.9224132322398219, + "grad_norm": 1.336405873298645, + "learning_rate": 1.5697779232763444e-07, + "loss": 0.2814, + "step": 46078 + }, + { + "epoch": 0.9224332507569502, + "grad_norm": 1.0706175565719604, + "learning_rate": 1.5689720834617283e-07, + "loss": 0.2602, + "step": 46079 + }, + { + "epoch": 0.9224532692740786, + "grad_norm": 1.1548991203308105, + "learning_rate": 1.5681664472407209e-07, + "loss": 0.2925, + "step": 46080 + }, + { + "epoch": 0.9224732877912069, + "grad_norm": 0.9949164390563965, + "learning_rate": 1.5673610146167306e-07, + "loss": 0.3397, + "step": 46081 + }, + { + "epoch": 0.9224933063083353, + "grad_norm": 1.1906660795211792, + "learning_rate": 1.5665557855931325e-07, + "loss": 0.3474, + "step": 46082 + }, + { + "epoch": 0.9225133248254636, + "grad_norm": 1.0863409042358398, + "learning_rate": 1.5657507601733069e-07, + "loss": 0.2729, + "step": 46083 + }, + { + "epoch": 0.9225333433425918, + "grad_norm": 1.0431777238845825, + "learning_rate": 1.5649459383606403e-07, + "loss": 0.3024, + "step": 46084 + }, + { + "epoch": 0.9225533618597203, + "grad_norm": 1.3179112672805786, + "learning_rate": 1.564141320158513e-07, + "loss": 0.2935, + "step": 46085 + }, + { + "epoch": 0.9225733803768486, + "grad_norm": 1.0964741706848145, + "learning_rate": 1.5633369055703228e-07, + "loss": 0.3147, + "step": 46086 + }, + { + "epoch": 0.922593398893977, + "grad_norm": 1.076130986213684, + "learning_rate": 1.562532694599439e-07, + "loss": 0.2806, + "step": 46087 + }, + { + "epoch": 0.9226134174111053, + "grad_norm": 1.4620715379714966, + "learning_rate": 1.561728687249242e-07, + "loss": 0.2957, + "step": 46088 + }, + { + "epoch": 0.9226334359282337, + "grad_norm": 1.3998408317565918, + "learning_rate": 1.5609248835231072e-07, + "loss": 0.2492, + "step": 46089 + }, + { + "epoch": 0.922653454445362, + "grad_norm": 1.2272061109542847, + "learning_rate": 1.5601212834244316e-07, + "loss": 0.2888, + "step": 46090 + }, + { + "epoch": 0.9226734729624902, + "grad_norm": 1.0552124977111816, + "learning_rate": 1.5593178869565738e-07, + "loss": 0.3205, + "step": 46091 + }, + { + "epoch": 0.9226934914796187, + "grad_norm": 1.9049170017242432, + "learning_rate": 1.5585146941229256e-07, + "loss": 0.7369, + "step": 46092 + }, + { + "epoch": 0.922713509996747, + "grad_norm": 1.1246154308319092, + "learning_rate": 1.5577117049268453e-07, + "loss": 0.3158, + "step": 46093 + }, + { + "epoch": 0.9227335285138754, + "grad_norm": 1.7417856454849243, + "learning_rate": 1.5569089193717302e-07, + "loss": 0.7635, + "step": 46094 + }, + { + "epoch": 0.9227535470310037, + "grad_norm": 1.8895987272262573, + "learning_rate": 1.556106337460944e-07, + "loss": 0.6404, + "step": 46095 + }, + { + "epoch": 0.9227735655481321, + "grad_norm": 1.0524625778198242, + "learning_rate": 1.5553039591978624e-07, + "loss": 0.2965, + "step": 46096 + }, + { + "epoch": 0.9227935840652604, + "grad_norm": 1.0819252729415894, + "learning_rate": 1.554501784585849e-07, + "loss": 0.2798, + "step": 46097 + }, + { + "epoch": 0.9228136025823888, + "grad_norm": 0.9945530891418457, + "learning_rate": 1.55369981362829e-07, + "loss": 0.2746, + "step": 46098 + }, + { + "epoch": 0.922833621099517, + "grad_norm": 1.089390516281128, + "learning_rate": 1.5528980463285494e-07, + "loss": 0.3306, + "step": 46099 + }, + { + "epoch": 0.9228536396166453, + "grad_norm": 1.0334687232971191, + "learning_rate": 1.552096482690002e-07, + "loss": 0.2737, + "step": 46100 + }, + { + "epoch": 0.9228736581337738, + "grad_norm": 1.1667835712432861, + "learning_rate": 1.5512951227160177e-07, + "loss": 0.3151, + "step": 46101 + }, + { + "epoch": 0.922893676650902, + "grad_norm": 1.0765970945358276, + "learning_rate": 1.550493966409955e-07, + "loss": 0.2954, + "step": 46102 + }, + { + "epoch": 0.9229136951680305, + "grad_norm": 1.326134204864502, + "learning_rate": 1.5496930137751943e-07, + "loss": 0.2957, + "step": 46103 + }, + { + "epoch": 0.9229337136851588, + "grad_norm": 0.9838245511054993, + "learning_rate": 1.5488922648150994e-07, + "loss": 0.3043, + "step": 46104 + }, + { + "epoch": 0.9229537322022872, + "grad_norm": 1.8376034498214722, + "learning_rate": 1.5480917195330348e-07, + "loss": 0.7391, + "step": 46105 + }, + { + "epoch": 0.9229737507194155, + "grad_norm": 1.8167582750320435, + "learning_rate": 1.547291377932364e-07, + "loss": 0.7714, + "step": 46106 + }, + { + "epoch": 0.9229937692365437, + "grad_norm": 1.0207973718643188, + "learning_rate": 1.5464912400164456e-07, + "loss": 0.3269, + "step": 46107 + }, + { + "epoch": 0.9230137877536722, + "grad_norm": 1.2745163440704346, + "learning_rate": 1.5456913057886603e-07, + "loss": 0.3272, + "step": 46108 + }, + { + "epoch": 0.9230338062708004, + "grad_norm": 1.0854909420013428, + "learning_rate": 1.5448915752523607e-07, + "loss": 0.3429, + "step": 46109 + }, + { + "epoch": 0.9230538247879289, + "grad_norm": 1.216163158416748, + "learning_rate": 1.5440920484109002e-07, + "loss": 0.2642, + "step": 46110 + }, + { + "epoch": 0.9230738433050572, + "grad_norm": 1.4414782524108887, + "learning_rate": 1.5432927252676533e-07, + "loss": 0.2837, + "step": 46111 + }, + { + "epoch": 0.9230938618221856, + "grad_norm": 1.0007838010787964, + "learning_rate": 1.5424936058259787e-07, + "loss": 0.2866, + "step": 46112 + }, + { + "epoch": 0.9231138803393139, + "grad_norm": 1.3023775815963745, + "learning_rate": 1.5416946900892348e-07, + "loss": 0.3157, + "step": 46113 + }, + { + "epoch": 0.9231338988564423, + "grad_norm": 1.1123913526535034, + "learning_rate": 1.5408959780607747e-07, + "loss": 0.3022, + "step": 46114 + }, + { + "epoch": 0.9231539173735706, + "grad_norm": 1.0984570980072021, + "learning_rate": 1.5400974697439565e-07, + "loss": 0.2957, + "step": 46115 + }, + { + "epoch": 0.9231739358906988, + "grad_norm": 1.0539785623550415, + "learning_rate": 1.539299165142144e-07, + "loss": 0.3324, + "step": 46116 + }, + { + "epoch": 0.9231939544078273, + "grad_norm": 1.0810935497283936, + "learning_rate": 1.5385010642586906e-07, + "loss": 0.2891, + "step": 46117 + }, + { + "epoch": 0.9232139729249556, + "grad_norm": 1.2969955205917358, + "learning_rate": 1.5377031670969488e-07, + "loss": 0.3007, + "step": 46118 + }, + { + "epoch": 0.923233991442084, + "grad_norm": 1.3380452394485474, + "learning_rate": 1.5369054736602718e-07, + "loss": 0.2835, + "step": 46119 + }, + { + "epoch": 0.9232540099592123, + "grad_norm": 1.1031376123428345, + "learning_rate": 1.5361079839520121e-07, + "loss": 0.2763, + "step": 46120 + }, + { + "epoch": 0.9232740284763407, + "grad_norm": 1.0389198064804077, + "learning_rate": 1.5353106979755284e-07, + "loss": 0.2756, + "step": 46121 + }, + { + "epoch": 0.923294046993469, + "grad_norm": 1.1742111444473267, + "learning_rate": 1.5345136157341677e-07, + "loss": 0.3314, + "step": 46122 + }, + { + "epoch": 0.9233140655105972, + "grad_norm": 1.0959415435791016, + "learning_rate": 1.5337167372312834e-07, + "loss": 0.3158, + "step": 46123 + }, + { + "epoch": 0.9233340840277257, + "grad_norm": 1.1080116033554077, + "learning_rate": 1.5329200624702166e-07, + "loss": 0.2928, + "step": 46124 + }, + { + "epoch": 0.923354102544854, + "grad_norm": 1.1635560989379883, + "learning_rate": 1.532123591454332e-07, + "loss": 0.3238, + "step": 46125 + }, + { + "epoch": 0.9233741210619824, + "grad_norm": 1.0402477979660034, + "learning_rate": 1.5313273241869707e-07, + "loss": 0.2954, + "step": 46126 + }, + { + "epoch": 0.9233941395791107, + "grad_norm": 1.0565452575683594, + "learning_rate": 1.530531260671475e-07, + "loss": 0.31, + "step": 46127 + }, + { + "epoch": 0.9234141580962391, + "grad_norm": 1.0421651601791382, + "learning_rate": 1.5297354009111976e-07, + "loss": 0.2843, + "step": 46128 + }, + { + "epoch": 0.9234341766133674, + "grad_norm": 1.0829895734786987, + "learning_rate": 1.5289397449094745e-07, + "loss": 0.2804, + "step": 46129 + }, + { + "epoch": 0.9234541951304958, + "grad_norm": 1.2289568185806274, + "learning_rate": 1.5281442926696644e-07, + "loss": 0.3447, + "step": 46130 + }, + { + "epoch": 0.923474213647624, + "grad_norm": 2.0776419639587402, + "learning_rate": 1.5273490441951035e-07, + "loss": 0.739, + "step": 46131 + }, + { + "epoch": 0.9234942321647523, + "grad_norm": 1.0536468029022217, + "learning_rate": 1.5265539994891386e-07, + "loss": 0.2742, + "step": 46132 + }, + { + "epoch": 0.9235142506818808, + "grad_norm": 1.8943817615509033, + "learning_rate": 1.5257591585551012e-07, + "loss": 0.768, + "step": 46133 + }, + { + "epoch": 0.923534269199009, + "grad_norm": 1.107501745223999, + "learning_rate": 1.5249645213963438e-07, + "loss": 0.2943, + "step": 46134 + }, + { + "epoch": 0.9235542877161375, + "grad_norm": 1.1763592958450317, + "learning_rate": 1.524170088016208e-07, + "loss": 0.3097, + "step": 46135 + }, + { + "epoch": 0.9235743062332658, + "grad_norm": 1.0519402027130127, + "learning_rate": 1.523375858418019e-07, + "loss": 0.2503, + "step": 46136 + }, + { + "epoch": 0.9235943247503942, + "grad_norm": 0.9931122660636902, + "learning_rate": 1.5225818326051355e-07, + "loss": 0.2782, + "step": 46137 + }, + { + "epoch": 0.9236143432675225, + "grad_norm": 1.0194103717803955, + "learning_rate": 1.521788010580877e-07, + "loss": 0.2612, + "step": 46138 + }, + { + "epoch": 0.9236343617846507, + "grad_norm": 1.2188911437988281, + "learning_rate": 1.520994392348596e-07, + "loss": 0.2844, + "step": 46139 + }, + { + "epoch": 0.9236543803017792, + "grad_norm": 1.1127314567565918, + "learning_rate": 1.5202009779116233e-07, + "loss": 0.3102, + "step": 46140 + }, + { + "epoch": 0.9236743988189074, + "grad_norm": 1.030930757522583, + "learning_rate": 1.5194077672732899e-07, + "loss": 0.2714, + "step": 46141 + }, + { + "epoch": 0.9236944173360359, + "grad_norm": 1.1683391332626343, + "learning_rate": 1.5186147604369318e-07, + "loss": 0.2895, + "step": 46142 + }, + { + "epoch": 0.9237144358531642, + "grad_norm": 1.0298080444335938, + "learning_rate": 1.517821957405885e-07, + "loss": 0.278, + "step": 46143 + }, + { + "epoch": 0.9237344543702926, + "grad_norm": 1.939247965812683, + "learning_rate": 1.517029358183486e-07, + "loss": 0.7533, + "step": 46144 + }, + { + "epoch": 0.9237544728874209, + "grad_norm": 1.1404571533203125, + "learning_rate": 1.5162369627730545e-07, + "loss": 0.2789, + "step": 46145 + }, + { + "epoch": 0.9237744914045493, + "grad_norm": 1.0632355213165283, + "learning_rate": 1.5154447711779264e-07, + "loss": 0.2865, + "step": 46146 + }, + { + "epoch": 0.9237945099216776, + "grad_norm": 1.0682373046875, + "learning_rate": 1.5146527834014436e-07, + "loss": 0.2977, + "step": 46147 + }, + { + "epoch": 0.9238145284388058, + "grad_norm": 1.146541714668274, + "learning_rate": 1.5138609994469256e-07, + "loss": 0.3047, + "step": 46148 + }, + { + "epoch": 0.9238345469559343, + "grad_norm": 1.8472734689712524, + "learning_rate": 1.5130694193176921e-07, + "loss": 0.7822, + "step": 46149 + }, + { + "epoch": 0.9238545654730626, + "grad_norm": 1.1027562618255615, + "learning_rate": 1.5122780430170902e-07, + "loss": 0.2545, + "step": 46150 + }, + { + "epoch": 0.923874583990191, + "grad_norm": 1.1248146295547485, + "learning_rate": 1.5114868705484287e-07, + "loss": 0.3129, + "step": 46151 + }, + { + "epoch": 0.9238946025073193, + "grad_norm": 1.0259265899658203, + "learning_rate": 1.5106959019150492e-07, + "loss": 0.2595, + "step": 46152 + }, + { + "epoch": 0.9239146210244477, + "grad_norm": 1.1674965620040894, + "learning_rate": 1.5099051371202655e-07, + "loss": 0.2831, + "step": 46153 + }, + { + "epoch": 0.923934639541576, + "grad_norm": 1.1013118028640747, + "learning_rate": 1.5091145761674087e-07, + "loss": 0.2965, + "step": 46154 + }, + { + "epoch": 0.9239546580587042, + "grad_norm": 1.243544340133667, + "learning_rate": 1.5083242190597924e-07, + "loss": 0.3171, + "step": 46155 + }, + { + "epoch": 0.9239746765758327, + "grad_norm": 1.189224123954773, + "learning_rate": 1.5075340658007475e-07, + "loss": 0.2856, + "step": 46156 + }, + { + "epoch": 0.923994695092961, + "grad_norm": 1.0902308225631714, + "learning_rate": 1.5067441163935992e-07, + "loss": 0.3348, + "step": 46157 + }, + { + "epoch": 0.9240147136100894, + "grad_norm": 1.1219159364700317, + "learning_rate": 1.5059543708416612e-07, + "loss": 0.3204, + "step": 46158 + }, + { + "epoch": 0.9240347321272177, + "grad_norm": 1.0841187238693237, + "learning_rate": 1.5051648291482536e-07, + "loss": 0.2501, + "step": 46159 + }, + { + "epoch": 0.9240547506443461, + "grad_norm": 1.0986565351486206, + "learning_rate": 1.5043754913166896e-07, + "loss": 0.2884, + "step": 46160 + }, + { + "epoch": 0.9240747691614744, + "grad_norm": 0.9592015147209167, + "learning_rate": 1.5035863573503007e-07, + "loss": 0.2582, + "step": 46161 + }, + { + "epoch": 0.9240947876786028, + "grad_norm": 1.1400431394577026, + "learning_rate": 1.502797427252395e-07, + "loss": 0.2922, + "step": 46162 + }, + { + "epoch": 0.924114806195731, + "grad_norm": 1.0634663105010986, + "learning_rate": 1.5020087010262973e-07, + "loss": 0.2997, + "step": 46163 + }, + { + "epoch": 0.9241348247128593, + "grad_norm": 1.075124979019165, + "learning_rate": 1.5012201786753166e-07, + "loss": 0.3159, + "step": 46164 + }, + { + "epoch": 0.9241548432299878, + "grad_norm": 1.1291207075119019, + "learning_rate": 1.5004318602027667e-07, + "loss": 0.2677, + "step": 46165 + }, + { + "epoch": 0.924174861747116, + "grad_norm": 1.1606204509735107, + "learning_rate": 1.4996437456119727e-07, + "loss": 0.321, + "step": 46166 + }, + { + "epoch": 0.9241948802642445, + "grad_norm": 1.7813020944595337, + "learning_rate": 1.498855834906232e-07, + "loss": 0.7529, + "step": 46167 + }, + { + "epoch": 0.9242148987813728, + "grad_norm": 2.1698319911956787, + "learning_rate": 1.4980681280888641e-07, + "loss": 0.7349, + "step": 46168 + }, + { + "epoch": 0.9242349172985012, + "grad_norm": 1.9076550006866455, + "learning_rate": 1.4972806251631776e-07, + "loss": 0.7575, + "step": 46169 + }, + { + "epoch": 0.9242549358156295, + "grad_norm": 1.1881300210952759, + "learning_rate": 1.496493326132492e-07, + "loss": 0.2714, + "step": 46170 + }, + { + "epoch": 0.9242749543327577, + "grad_norm": 1.0675387382507324, + "learning_rate": 1.4957062310001047e-07, + "loss": 0.3041, + "step": 46171 + }, + { + "epoch": 0.9242949728498862, + "grad_norm": 1.1302969455718994, + "learning_rate": 1.4949193397693352e-07, + "loss": 0.2731, + "step": 46172 + }, + { + "epoch": 0.9243149913670144, + "grad_norm": 1.024715542793274, + "learning_rate": 1.4941326524434807e-07, + "loss": 0.2915, + "step": 46173 + }, + { + "epoch": 0.9243350098841429, + "grad_norm": 1.1406060457229614, + "learning_rate": 1.4933461690258554e-07, + "loss": 0.3039, + "step": 46174 + }, + { + "epoch": 0.9243550284012712, + "grad_norm": 1.2045402526855469, + "learning_rate": 1.4925598895197623e-07, + "loss": 0.2497, + "step": 46175 + }, + { + "epoch": 0.9243750469183996, + "grad_norm": 1.0018198490142822, + "learning_rate": 1.4917738139285154e-07, + "loss": 0.2528, + "step": 46176 + }, + { + "epoch": 0.9243950654355279, + "grad_norm": 1.2504762411117554, + "learning_rate": 1.4909879422554063e-07, + "loss": 0.2842, + "step": 46177 + }, + { + "epoch": 0.9244150839526563, + "grad_norm": 1.2050343751907349, + "learning_rate": 1.4902022745037438e-07, + "loss": 0.2779, + "step": 46178 + }, + { + "epoch": 0.9244351024697846, + "grad_norm": 1.9574394226074219, + "learning_rate": 1.489416810676836e-07, + "loss": 0.6974, + "step": 46179 + }, + { + "epoch": 0.9244551209869128, + "grad_norm": 2.010655641555786, + "learning_rate": 1.4886315507779802e-07, + "loss": 0.7824, + "step": 46180 + }, + { + "epoch": 0.9244751395040413, + "grad_norm": 1.074812650680542, + "learning_rate": 1.4878464948104797e-07, + "loss": 0.3174, + "step": 46181 + }, + { + "epoch": 0.9244951580211695, + "grad_norm": 1.0785210132598877, + "learning_rate": 1.4870616427776264e-07, + "loss": 0.2711, + "step": 46182 + }, + { + "epoch": 0.924515176538298, + "grad_norm": 1.4706504344940186, + "learning_rate": 1.4862769946827281e-07, + "loss": 0.2894, + "step": 46183 + }, + { + "epoch": 0.9245351950554263, + "grad_norm": 1.1512800455093384, + "learning_rate": 1.4854925505290829e-07, + "loss": 0.2949, + "step": 46184 + }, + { + "epoch": 0.9245552135725547, + "grad_norm": 1.0284438133239746, + "learning_rate": 1.4847083103199876e-07, + "loss": 0.2975, + "step": 46185 + }, + { + "epoch": 0.924575232089683, + "grad_norm": 1.134382963180542, + "learning_rate": 1.4839242740587345e-07, + "loss": 0.2992, + "step": 46186 + }, + { + "epoch": 0.9245952506068112, + "grad_norm": 1.308489441871643, + "learning_rate": 1.4831404417486205e-07, + "loss": 0.3093, + "step": 46187 + }, + { + "epoch": 0.9246152691239397, + "grad_norm": 1.0467557907104492, + "learning_rate": 1.4823568133929433e-07, + "loss": 0.297, + "step": 46188 + }, + { + "epoch": 0.924635287641068, + "grad_norm": 1.1482051610946655, + "learning_rate": 1.4815733889950058e-07, + "loss": 0.2775, + "step": 46189 + }, + { + "epoch": 0.9246553061581964, + "grad_norm": 1.2827485799789429, + "learning_rate": 1.4807901685580883e-07, + "loss": 0.3128, + "step": 46190 + }, + { + "epoch": 0.9246753246753247, + "grad_norm": 1.1665478944778442, + "learning_rate": 1.480007152085483e-07, + "loss": 0.3379, + "step": 46191 + }, + { + "epoch": 0.9246953431924531, + "grad_norm": 1.1211533546447754, + "learning_rate": 1.4792243395804874e-07, + "loss": 0.2868, + "step": 46192 + }, + { + "epoch": 0.9247153617095814, + "grad_norm": 1.1286076307296753, + "learning_rate": 1.4784417310463983e-07, + "loss": 0.3267, + "step": 46193 + }, + { + "epoch": 0.9247353802267098, + "grad_norm": 1.0967371463775635, + "learning_rate": 1.4776593264864914e-07, + "loss": 0.2911, + "step": 46194 + }, + { + "epoch": 0.924755398743838, + "grad_norm": 1.041587233543396, + "learning_rate": 1.4768771259040582e-07, + "loss": 0.2846, + "step": 46195 + }, + { + "epoch": 0.9247754172609663, + "grad_norm": 1.1401442289352417, + "learning_rate": 1.4760951293024018e-07, + "loss": 0.2676, + "step": 46196 + }, + { + "epoch": 0.9247954357780948, + "grad_norm": 1.0208009481430054, + "learning_rate": 1.4753133366847916e-07, + "loss": 0.2751, + "step": 46197 + }, + { + "epoch": 0.924815454295223, + "grad_norm": 1.108161211013794, + "learning_rate": 1.474531748054525e-07, + "loss": 0.2805, + "step": 46198 + }, + { + "epoch": 0.9248354728123515, + "grad_norm": 1.0933201313018799, + "learning_rate": 1.473750363414883e-07, + "loss": 0.3044, + "step": 46199 + }, + { + "epoch": 0.9248554913294798, + "grad_norm": 1.3517118692398071, + "learning_rate": 1.4729691827691518e-07, + "loss": 0.2555, + "step": 46200 + }, + { + "epoch": 0.9248755098466082, + "grad_norm": 1.1063929796218872, + "learning_rate": 1.4721882061206116e-07, + "loss": 0.245, + "step": 46201 + }, + { + "epoch": 0.9248955283637365, + "grad_norm": 1.0702898502349854, + "learning_rate": 1.4714074334725548e-07, + "loss": 0.2796, + "step": 46202 + }, + { + "epoch": 0.9249155468808647, + "grad_norm": 1.1558643579483032, + "learning_rate": 1.470626864828256e-07, + "loss": 0.284, + "step": 46203 + }, + { + "epoch": 0.9249355653979932, + "grad_norm": 1.2486058473587036, + "learning_rate": 1.4698465001909913e-07, + "loss": 0.2834, + "step": 46204 + }, + { + "epoch": 0.9249555839151214, + "grad_norm": 1.1316542625427246, + "learning_rate": 1.469066339564057e-07, + "loss": 0.3098, + "step": 46205 + }, + { + "epoch": 0.9249756024322499, + "grad_norm": 1.131811499595642, + "learning_rate": 1.4682863829507233e-07, + "loss": 0.2906, + "step": 46206 + }, + { + "epoch": 0.9249956209493782, + "grad_norm": 1.052544116973877, + "learning_rate": 1.467506630354265e-07, + "loss": 0.2758, + "step": 46207 + }, + { + "epoch": 0.9250156394665066, + "grad_norm": 0.9476642608642578, + "learning_rate": 1.4667270817779632e-07, + "loss": 0.2322, + "step": 46208 + }, + { + "epoch": 0.9250356579836349, + "grad_norm": 1.2247343063354492, + "learning_rate": 1.4659477372251042e-07, + "loss": 0.3133, + "step": 46209 + }, + { + "epoch": 0.9250556765007633, + "grad_norm": 1.082931399345398, + "learning_rate": 1.4651685966989515e-07, + "loss": 0.3048, + "step": 46210 + }, + { + "epoch": 0.9250756950178916, + "grad_norm": 2.0195541381835938, + "learning_rate": 1.4643896602027862e-07, + "loss": 0.7342, + "step": 46211 + }, + { + "epoch": 0.9250957135350198, + "grad_norm": 1.1833304166793823, + "learning_rate": 1.463610927739878e-07, + "loss": 0.319, + "step": 46212 + }, + { + "epoch": 0.9251157320521483, + "grad_norm": 1.0566273927688599, + "learning_rate": 1.4628323993135074e-07, + "loss": 0.2958, + "step": 46213 + }, + { + "epoch": 0.9251357505692765, + "grad_norm": 1.1700913906097412, + "learning_rate": 1.462054074926944e-07, + "loss": 0.2705, + "step": 46214 + }, + { + "epoch": 0.925155769086405, + "grad_norm": 1.1408863067626953, + "learning_rate": 1.4612759545834576e-07, + "loss": 0.3131, + "step": 46215 + }, + { + "epoch": 0.9251757876035333, + "grad_norm": 1.1392093896865845, + "learning_rate": 1.4604980382863286e-07, + "loss": 0.2606, + "step": 46216 + }, + { + "epoch": 0.9251958061206617, + "grad_norm": 1.7617443799972534, + "learning_rate": 1.4597203260388104e-07, + "loss": 0.7081, + "step": 46217 + }, + { + "epoch": 0.92521582463779, + "grad_norm": 1.1705971956253052, + "learning_rate": 1.4589428178441888e-07, + "loss": 0.2817, + "step": 46218 + }, + { + "epoch": 0.9252358431549182, + "grad_norm": 1.0621153116226196, + "learning_rate": 1.4581655137057226e-07, + "loss": 0.2448, + "step": 46219 + }, + { + "epoch": 0.9252558616720467, + "grad_norm": 1.0888841152191162, + "learning_rate": 1.4573884136266868e-07, + "loss": 0.318, + "step": 46220 + }, + { + "epoch": 0.925275880189175, + "grad_norm": 1.2217100858688354, + "learning_rate": 1.4566115176103402e-07, + "loss": 0.3104, + "step": 46221 + }, + { + "epoch": 0.9252958987063034, + "grad_norm": 1.1250401735305786, + "learning_rate": 1.4558348256599463e-07, + "loss": 0.2928, + "step": 46222 + }, + { + "epoch": 0.9253159172234316, + "grad_norm": 1.2672988176345825, + "learning_rate": 1.4550583377787865e-07, + "loss": 0.3008, + "step": 46223 + }, + { + "epoch": 0.9253359357405601, + "grad_norm": 1.0313438177108765, + "learning_rate": 1.4542820539701075e-07, + "loss": 0.2481, + "step": 46224 + }, + { + "epoch": 0.9253559542576884, + "grad_norm": 1.508297324180603, + "learning_rate": 1.4535059742371794e-07, + "loss": 0.3363, + "step": 46225 + }, + { + "epoch": 0.9253759727748168, + "grad_norm": 1.0010138750076294, + "learning_rate": 1.452730098583266e-07, + "loss": 0.2686, + "step": 46226 + }, + { + "epoch": 0.925395991291945, + "grad_norm": 1.0814064741134644, + "learning_rate": 1.4519544270116258e-07, + "loss": 0.2581, + "step": 46227 + }, + { + "epoch": 0.9254160098090733, + "grad_norm": 1.208234190940857, + "learning_rate": 1.4511789595255288e-07, + "loss": 0.3026, + "step": 46228 + }, + { + "epoch": 0.9254360283262018, + "grad_norm": 1.2343523502349854, + "learning_rate": 1.450403696128222e-07, + "loss": 0.2549, + "step": 46229 + }, + { + "epoch": 0.92545604684333, + "grad_norm": 1.1329675912857056, + "learning_rate": 1.449628636822975e-07, + "loss": 0.3203, + "step": 46230 + }, + { + "epoch": 0.9254760653604585, + "grad_norm": 1.0524680614471436, + "learning_rate": 1.4488537816130298e-07, + "loss": 0.3226, + "step": 46231 + }, + { + "epoch": 0.9254960838775868, + "grad_norm": 1.0824708938598633, + "learning_rate": 1.4480791305016673e-07, + "loss": 0.3167, + "step": 46232 + }, + { + "epoch": 0.9255161023947152, + "grad_norm": 1.2104966640472412, + "learning_rate": 1.447304683492129e-07, + "loss": 0.3288, + "step": 46233 + }, + { + "epoch": 0.9255361209118435, + "grad_norm": 1.0664335489273071, + "learning_rate": 1.446530440587668e-07, + "loss": 0.2675, + "step": 46234 + }, + { + "epoch": 0.9255561394289717, + "grad_norm": 1.2780753374099731, + "learning_rate": 1.4457564017915483e-07, + "loss": 0.2595, + "step": 46235 + }, + { + "epoch": 0.9255761579461002, + "grad_norm": 1.0104032754898071, + "learning_rate": 1.4449825671070173e-07, + "loss": 0.2779, + "step": 46236 + }, + { + "epoch": 0.9255961764632284, + "grad_norm": 1.3351601362228394, + "learning_rate": 1.4442089365373336e-07, + "loss": 0.3016, + "step": 46237 + }, + { + "epoch": 0.9256161949803569, + "grad_norm": 1.1798491477966309, + "learning_rate": 1.4434355100857388e-07, + "loss": 0.2649, + "step": 46238 + }, + { + "epoch": 0.9256362134974851, + "grad_norm": 1.084571123123169, + "learning_rate": 1.442662287755503e-07, + "loss": 0.3272, + "step": 46239 + }, + { + "epoch": 0.9256562320146136, + "grad_norm": 1.0365592241287231, + "learning_rate": 1.441889269549851e-07, + "loss": 0.2941, + "step": 46240 + }, + { + "epoch": 0.9256762505317419, + "grad_norm": 2.2115707397460938, + "learning_rate": 1.441116455472058e-07, + "loss": 0.8086, + "step": 46241 + }, + { + "epoch": 0.9256962690488703, + "grad_norm": 1.133729100227356, + "learning_rate": 1.4403438455253605e-07, + "loss": 0.2969, + "step": 46242 + }, + { + "epoch": 0.9257162875659986, + "grad_norm": 1.2250264883041382, + "learning_rate": 1.4395714397130055e-07, + "loss": 0.3123, + "step": 46243 + }, + { + "epoch": 0.9257363060831268, + "grad_norm": 1.012528657913208, + "learning_rate": 1.438799238038241e-07, + "loss": 0.2519, + "step": 46244 + }, + { + "epoch": 0.9257563246002553, + "grad_norm": 1.7679284811019897, + "learning_rate": 1.4380272405043137e-07, + "loss": 0.8199, + "step": 46245 + }, + { + "epoch": 0.9257763431173835, + "grad_norm": 1.2110987901687622, + "learning_rate": 1.4372554471144718e-07, + "loss": 0.338, + "step": 46246 + }, + { + "epoch": 0.925796361634512, + "grad_norm": 1.2114826440811157, + "learning_rate": 1.4364838578719564e-07, + "loss": 0.3259, + "step": 46247 + }, + { + "epoch": 0.9258163801516403, + "grad_norm": 1.0193283557891846, + "learning_rate": 1.4357124727800098e-07, + "loss": 0.2454, + "step": 46248 + }, + { + "epoch": 0.9258363986687687, + "grad_norm": 1.3988581895828247, + "learning_rate": 1.4349412918418792e-07, + "loss": 0.3022, + "step": 46249 + }, + { + "epoch": 0.925856417185897, + "grad_norm": 1.1882531642913818, + "learning_rate": 1.4341703150608067e-07, + "loss": 0.3258, + "step": 46250 + }, + { + "epoch": 0.9258764357030252, + "grad_norm": 1.1986829042434692, + "learning_rate": 1.4333995424400227e-07, + "loss": 0.2664, + "step": 46251 + }, + { + "epoch": 0.9258964542201537, + "grad_norm": 1.1438241004943848, + "learning_rate": 1.4326289739827804e-07, + "loss": 0.3047, + "step": 46252 + }, + { + "epoch": 0.925916472737282, + "grad_norm": 1.0997005701065063, + "learning_rate": 1.4318586096923105e-07, + "loss": 0.2796, + "step": 46253 + }, + { + "epoch": 0.9259364912544104, + "grad_norm": 1.0090625286102295, + "learning_rate": 1.43108844957186e-07, + "loss": 0.2405, + "step": 46254 + }, + { + "epoch": 0.9259565097715386, + "grad_norm": 1.2207813262939453, + "learning_rate": 1.4303184936246606e-07, + "loss": 0.297, + "step": 46255 + }, + { + "epoch": 0.9259765282886671, + "grad_norm": 1.1091945171356201, + "learning_rate": 1.4295487418539478e-07, + "loss": 0.2951, + "step": 46256 + }, + { + "epoch": 0.9259965468057954, + "grad_norm": 1.0852198600769043, + "learning_rate": 1.4287791942629582e-07, + "loss": 0.3171, + "step": 46257 + }, + { + "epoch": 0.9260165653229238, + "grad_norm": 1.1898268461227417, + "learning_rate": 1.4280098508549335e-07, + "loss": 0.3025, + "step": 46258 + }, + { + "epoch": 0.926036583840052, + "grad_norm": 1.1170107126235962, + "learning_rate": 1.4272407116330988e-07, + "loss": 0.2644, + "step": 46259 + }, + { + "epoch": 0.9260566023571803, + "grad_norm": 1.8402717113494873, + "learning_rate": 1.4264717766006964e-07, + "loss": 0.7637, + "step": 46260 + }, + { + "epoch": 0.9260766208743088, + "grad_norm": 1.1247221231460571, + "learning_rate": 1.4257030457609456e-07, + "loss": 0.2896, + "step": 46261 + }, + { + "epoch": 0.926096639391437, + "grad_norm": 1.1582280397415161, + "learning_rate": 1.4249345191170884e-07, + "loss": 0.3574, + "step": 46262 + }, + { + "epoch": 0.9261166579085655, + "grad_norm": 1.0240180492401123, + "learning_rate": 1.4241661966723553e-07, + "loss": 0.2852, + "step": 46263 + }, + { + "epoch": 0.9261366764256938, + "grad_norm": 1.0652098655700684, + "learning_rate": 1.4233980784299716e-07, + "loss": 0.2876, + "step": 46264 + }, + { + "epoch": 0.9261566949428222, + "grad_norm": 1.0380162000656128, + "learning_rate": 1.4226301643931683e-07, + "loss": 0.2472, + "step": 46265 + }, + { + "epoch": 0.9261767134599505, + "grad_norm": 1.0902364253997803, + "learning_rate": 1.4218624545651705e-07, + "loss": 0.2689, + "step": 46266 + }, + { + "epoch": 0.9261967319770787, + "grad_norm": 1.1429380178451538, + "learning_rate": 1.4210949489492198e-07, + "loss": 0.3055, + "step": 46267 + }, + { + "epoch": 0.9262167504942072, + "grad_norm": 1.2561005353927612, + "learning_rate": 1.420327647548525e-07, + "loss": 0.2903, + "step": 46268 + }, + { + "epoch": 0.9262367690113354, + "grad_norm": 1.283789873123169, + "learning_rate": 1.4195605503663223e-07, + "loss": 0.2682, + "step": 46269 + }, + { + "epoch": 0.9262567875284639, + "grad_norm": 1.0212093591690063, + "learning_rate": 1.4187936574058203e-07, + "loss": 0.2679, + "step": 46270 + }, + { + "epoch": 0.9262768060455921, + "grad_norm": 1.1898870468139648, + "learning_rate": 1.4180269686702663e-07, + "loss": 0.3082, + "step": 46271 + }, + { + "epoch": 0.9262968245627206, + "grad_norm": 1.8375341892242432, + "learning_rate": 1.417260484162869e-07, + "loss": 0.7233, + "step": 46272 + }, + { + "epoch": 0.9263168430798489, + "grad_norm": 1.0780540704727173, + "learning_rate": 1.416494203886859e-07, + "loss": 0.3016, + "step": 46273 + }, + { + "epoch": 0.9263368615969773, + "grad_norm": 1.1864526271820068, + "learning_rate": 1.415728127845445e-07, + "loss": 0.3057, + "step": 46274 + }, + { + "epoch": 0.9263568801141056, + "grad_norm": 1.9137810468673706, + "learning_rate": 1.414962256041852e-07, + "loss": 0.7593, + "step": 46275 + }, + { + "epoch": 0.9263768986312338, + "grad_norm": 1.1160719394683838, + "learning_rate": 1.4141965884793108e-07, + "loss": 0.2814, + "step": 46276 + }, + { + "epoch": 0.9263969171483623, + "grad_norm": 1.539696455001831, + "learning_rate": 1.4134311251610245e-07, + "loss": 0.3489, + "step": 46277 + }, + { + "epoch": 0.9264169356654905, + "grad_norm": 1.1413031816482544, + "learning_rate": 1.4126658660902182e-07, + "loss": 0.2468, + "step": 46278 + }, + { + "epoch": 0.926436954182619, + "grad_norm": 1.2746084928512573, + "learning_rate": 1.411900811270106e-07, + "loss": 0.3139, + "step": 46279 + }, + { + "epoch": 0.9264569726997472, + "grad_norm": 1.1535320281982422, + "learning_rate": 1.4111359607039132e-07, + "loss": 0.2887, + "step": 46280 + }, + { + "epoch": 0.9264769912168757, + "grad_norm": 0.9942392706871033, + "learning_rate": 1.4103713143948484e-07, + "loss": 0.2528, + "step": 46281 + }, + { + "epoch": 0.926497009734004, + "grad_norm": 0.9827131628990173, + "learning_rate": 1.4096068723461253e-07, + "loss": 0.2682, + "step": 46282 + }, + { + "epoch": 0.9265170282511322, + "grad_norm": 1.0083976984024048, + "learning_rate": 1.4088426345609586e-07, + "loss": 0.342, + "step": 46283 + }, + { + "epoch": 0.9265370467682607, + "grad_norm": 1.3228678703308105, + "learning_rate": 1.4080786010425562e-07, + "loss": 0.2522, + "step": 46284 + }, + { + "epoch": 0.926557065285389, + "grad_norm": 1.1060879230499268, + "learning_rate": 1.4073147717941383e-07, + "loss": 0.2925, + "step": 46285 + }, + { + "epoch": 0.9265770838025174, + "grad_norm": 1.1178109645843506, + "learning_rate": 1.4065511468189075e-07, + "loss": 0.2856, + "step": 46286 + }, + { + "epoch": 0.9265971023196456, + "grad_norm": 1.062820315361023, + "learning_rate": 1.4057877261200835e-07, + "loss": 0.2503, + "step": 46287 + }, + { + "epoch": 0.926617120836774, + "grad_norm": 1.2007088661193848, + "learning_rate": 1.4050245097008641e-07, + "loss": 0.2708, + "step": 46288 + }, + { + "epoch": 0.9266371393539024, + "grad_norm": 1.189517617225647, + "learning_rate": 1.4042614975644685e-07, + "loss": 0.3028, + "step": 46289 + }, + { + "epoch": 0.9266571578710308, + "grad_norm": 2.08807635307312, + "learning_rate": 1.4034986897140945e-07, + "loss": 0.7312, + "step": 46290 + }, + { + "epoch": 0.926677176388159, + "grad_norm": 1.0930670499801636, + "learning_rate": 1.4027360861529615e-07, + "loss": 0.2385, + "step": 46291 + }, + { + "epoch": 0.9266971949052873, + "grad_norm": 1.0918630361557007, + "learning_rate": 1.4019736868842614e-07, + "loss": 0.2854, + "step": 46292 + }, + { + "epoch": 0.9267172134224158, + "grad_norm": 1.2701466083526611, + "learning_rate": 1.4012114919112086e-07, + "loss": 0.3016, + "step": 46293 + }, + { + "epoch": 0.926737231939544, + "grad_norm": 1.0686522722244263, + "learning_rate": 1.4004495012370057e-07, + "loss": 0.316, + "step": 46294 + }, + { + "epoch": 0.9267572504566725, + "grad_norm": 1.1419556140899658, + "learning_rate": 1.3996877148648558e-07, + "loss": 0.3259, + "step": 46295 + }, + { + "epoch": 0.9267772689738007, + "grad_norm": 1.119436264038086, + "learning_rate": 1.3989261327979565e-07, + "loss": 0.2797, + "step": 46296 + }, + { + "epoch": 0.9267972874909292, + "grad_norm": 1.1145002841949463, + "learning_rate": 1.3981647550395106e-07, + "loss": 0.3532, + "step": 46297 + }, + { + "epoch": 0.9268173060080575, + "grad_norm": 1.2601559162139893, + "learning_rate": 1.397403581592721e-07, + "loss": 0.2706, + "step": 46298 + }, + { + "epoch": 0.9268373245251857, + "grad_norm": 1.080556035041809, + "learning_rate": 1.3966426124607913e-07, + "loss": 0.2798, + "step": 46299 + }, + { + "epoch": 0.9268573430423142, + "grad_norm": 1.9357599020004272, + "learning_rate": 1.3958818476469181e-07, + "loss": 0.7599, + "step": 46300 + }, + { + "epoch": 0.9268773615594424, + "grad_norm": 1.0742942094802856, + "learning_rate": 1.3951212871542886e-07, + "loss": 0.3592, + "step": 46301 + }, + { + "epoch": 0.9268973800765709, + "grad_norm": 1.0285334587097168, + "learning_rate": 1.394360930986116e-07, + "loss": 0.2621, + "step": 46302 + }, + { + "epoch": 0.9269173985936991, + "grad_norm": 1.1818710565567017, + "learning_rate": 1.3936007791455874e-07, + "loss": 0.3169, + "step": 46303 + }, + { + "epoch": 0.9269374171108276, + "grad_norm": 1.091166615486145, + "learning_rate": 1.3928408316359055e-07, + "loss": 0.2787, + "step": 46304 + }, + { + "epoch": 0.9269574356279559, + "grad_norm": 1.356568694114685, + "learning_rate": 1.3920810884602565e-07, + "loss": 0.342, + "step": 46305 + }, + { + "epoch": 0.9269774541450843, + "grad_norm": 1.1612493991851807, + "learning_rate": 1.391321549621838e-07, + "loss": 0.2746, + "step": 46306 + }, + { + "epoch": 0.9269974726622126, + "grad_norm": 1.05604887008667, + "learning_rate": 1.3905622151238418e-07, + "loss": 0.295, + "step": 46307 + }, + { + "epoch": 0.9270174911793408, + "grad_norm": 1.1044772863388062, + "learning_rate": 1.3898030849694655e-07, + "loss": 0.304, + "step": 46308 + }, + { + "epoch": 0.9270375096964693, + "grad_norm": 1.0733747482299805, + "learning_rate": 1.3890441591618952e-07, + "loss": 0.2484, + "step": 46309 + }, + { + "epoch": 0.9270575282135975, + "grad_norm": 1.1571569442749023, + "learning_rate": 1.3882854377043175e-07, + "loss": 0.2907, + "step": 46310 + }, + { + "epoch": 0.927077546730726, + "grad_norm": 1.0586769580841064, + "learning_rate": 1.3875269205999298e-07, + "loss": 0.2945, + "step": 46311 + }, + { + "epoch": 0.9270975652478542, + "grad_norm": 1.695270299911499, + "learning_rate": 1.3867686078519183e-07, + "loss": 0.7135, + "step": 46312 + }, + { + "epoch": 0.9271175837649827, + "grad_norm": 1.179298996925354, + "learning_rate": 1.3860104994634748e-07, + "loss": 0.296, + "step": 46313 + }, + { + "epoch": 0.927137602282111, + "grad_norm": 1.0366342067718506, + "learning_rate": 1.3852525954377693e-07, + "loss": 0.2918, + "step": 46314 + }, + { + "epoch": 0.9271576207992392, + "grad_norm": 1.1799777746200562, + "learning_rate": 1.38449489577801e-07, + "loss": 0.3484, + "step": 46315 + }, + { + "epoch": 0.9271776393163677, + "grad_norm": 1.1649763584136963, + "learning_rate": 1.3837374004873616e-07, + "loss": 0.3015, + "step": 46316 + }, + { + "epoch": 0.927197657833496, + "grad_norm": 1.1262162923812866, + "learning_rate": 1.3829801095690264e-07, + "loss": 0.3094, + "step": 46317 + }, + { + "epoch": 0.9272176763506244, + "grad_norm": 1.1442559957504272, + "learning_rate": 1.3822230230261802e-07, + "loss": 0.2884, + "step": 46318 + }, + { + "epoch": 0.9272376948677526, + "grad_norm": 1.1955370903015137, + "learning_rate": 1.3814661408620035e-07, + "loss": 0.2864, + "step": 46319 + }, + { + "epoch": 0.927257713384881, + "grad_norm": 1.1784700155258179, + "learning_rate": 1.3807094630796824e-07, + "loss": 0.3159, + "step": 46320 + }, + { + "epoch": 0.9272777319020094, + "grad_norm": 1.1008248329162598, + "learning_rate": 1.3799529896823982e-07, + "loss": 0.3068, + "step": 46321 + }, + { + "epoch": 0.9272977504191378, + "grad_norm": 1.0767585039138794, + "learning_rate": 1.379196720673326e-07, + "loss": 0.2939, + "step": 46322 + }, + { + "epoch": 0.927317768936266, + "grad_norm": 1.224315881729126, + "learning_rate": 1.3784406560556407e-07, + "loss": 0.2889, + "step": 46323 + }, + { + "epoch": 0.9273377874533943, + "grad_norm": 1.0950812101364136, + "learning_rate": 1.3776847958325345e-07, + "loss": 0.3476, + "step": 46324 + }, + { + "epoch": 0.9273578059705228, + "grad_norm": 1.151111364364624, + "learning_rate": 1.3769291400071772e-07, + "loss": 0.2554, + "step": 46325 + }, + { + "epoch": 0.927377824487651, + "grad_norm": 1.128775715827942, + "learning_rate": 1.3761736885827492e-07, + "loss": 0.2714, + "step": 46326 + }, + { + "epoch": 0.9273978430047795, + "grad_norm": 1.0923811197280884, + "learning_rate": 1.375418441562415e-07, + "loss": 0.2852, + "step": 46327 + }, + { + "epoch": 0.9274178615219077, + "grad_norm": 1.0732260942459106, + "learning_rate": 1.3746633989493608e-07, + "loss": 0.311, + "step": 46328 + }, + { + "epoch": 0.9274378800390362, + "grad_norm": 1.220704197883606, + "learning_rate": 1.373908560746756e-07, + "loss": 0.319, + "step": 46329 + }, + { + "epoch": 0.9274578985561645, + "grad_norm": 1.1109719276428223, + "learning_rate": 1.373153926957782e-07, + "loss": 0.3126, + "step": 46330 + }, + { + "epoch": 0.9274779170732927, + "grad_norm": 2.1109602451324463, + "learning_rate": 1.372399497585597e-07, + "loss": 0.7886, + "step": 46331 + }, + { + "epoch": 0.9274979355904212, + "grad_norm": 1.103706955909729, + "learning_rate": 1.3716452726333818e-07, + "loss": 0.2501, + "step": 46332 + }, + { + "epoch": 0.9275179541075494, + "grad_norm": 1.155854344367981, + "learning_rate": 1.370891252104306e-07, + "loss": 0.337, + "step": 46333 + }, + { + "epoch": 0.9275379726246779, + "grad_norm": 1.218770146369934, + "learning_rate": 1.3701374360015395e-07, + "loss": 0.2805, + "step": 46334 + }, + { + "epoch": 0.9275579911418061, + "grad_norm": 1.2670233249664307, + "learning_rate": 1.369383824328252e-07, + "loss": 0.2862, + "step": 46335 + }, + { + "epoch": 0.9275780096589346, + "grad_norm": 1.1908650398254395, + "learning_rate": 1.3686304170876074e-07, + "loss": 0.316, + "step": 46336 + }, + { + "epoch": 0.9275980281760628, + "grad_norm": 1.0841751098632812, + "learning_rate": 1.3678772142827644e-07, + "loss": 0.2767, + "step": 46337 + }, + { + "epoch": 0.9276180466931913, + "grad_norm": 1.0404376983642578, + "learning_rate": 1.367124215916915e-07, + "loss": 0.3024, + "step": 46338 + }, + { + "epoch": 0.9276380652103196, + "grad_norm": 1.200929045677185, + "learning_rate": 1.366371421993201e-07, + "loss": 0.3015, + "step": 46339 + }, + { + "epoch": 0.9276580837274478, + "grad_norm": 1.1871726512908936, + "learning_rate": 1.365618832514798e-07, + "loss": 0.2822, + "step": 46340 + }, + { + "epoch": 0.9276781022445763, + "grad_norm": 1.1572109460830688, + "learning_rate": 1.3648664474848638e-07, + "loss": 0.2944, + "step": 46341 + }, + { + "epoch": 0.9276981207617045, + "grad_norm": 1.131581425666809, + "learning_rate": 1.3641142669065687e-07, + "loss": 0.2754, + "step": 46342 + }, + { + "epoch": 0.927718139278833, + "grad_norm": 1.068325400352478, + "learning_rate": 1.3633622907830713e-07, + "loss": 0.2728, + "step": 46343 + }, + { + "epoch": 0.9277381577959612, + "grad_norm": 1.1407325267791748, + "learning_rate": 1.36261051911753e-07, + "loss": 0.2822, + "step": 46344 + }, + { + "epoch": 0.9277581763130897, + "grad_norm": 1.0800701379776, + "learning_rate": 1.3618589519131088e-07, + "loss": 0.2639, + "step": 46345 + }, + { + "epoch": 0.927778194830218, + "grad_norm": 1.1462079286575317, + "learning_rate": 1.361107589172961e-07, + "loss": 0.2804, + "step": 46346 + }, + { + "epoch": 0.9277982133473462, + "grad_norm": 1.8718067407608032, + "learning_rate": 1.360356430900256e-07, + "loss": 0.7295, + "step": 46347 + }, + { + "epoch": 0.9278182318644747, + "grad_norm": 1.1801249980926514, + "learning_rate": 1.3596054770981415e-07, + "loss": 0.2907, + "step": 46348 + }, + { + "epoch": 0.927838250381603, + "grad_norm": 1.1802349090576172, + "learning_rate": 1.3588547277697815e-07, + "loss": 0.2736, + "step": 46349 + }, + { + "epoch": 0.9278582688987314, + "grad_norm": 1.0851887464523315, + "learning_rate": 1.358104182918324e-07, + "loss": 0.2579, + "step": 46350 + }, + { + "epoch": 0.9278782874158596, + "grad_norm": 1.152563214302063, + "learning_rate": 1.3573538425469323e-07, + "loss": 0.3098, + "step": 46351 + }, + { + "epoch": 0.927898305932988, + "grad_norm": 1.0349814891815186, + "learning_rate": 1.3566037066587545e-07, + "loss": 0.2895, + "step": 46352 + }, + { + "epoch": 0.9279183244501163, + "grad_norm": 1.149889588356018, + "learning_rate": 1.3558537752569435e-07, + "loss": 0.299, + "step": 46353 + }, + { + "epoch": 0.9279383429672448, + "grad_norm": 1.125019907951355, + "learning_rate": 1.3551040483446632e-07, + "loss": 0.293, + "step": 46354 + }, + { + "epoch": 0.927958361484373, + "grad_norm": 1.3231606483459473, + "learning_rate": 1.3543545259250502e-07, + "loss": 0.2732, + "step": 46355 + }, + { + "epoch": 0.9279783800015013, + "grad_norm": 1.0222740173339844, + "learning_rate": 1.353605208001263e-07, + "loss": 0.2828, + "step": 46356 + }, + { + "epoch": 0.9279983985186298, + "grad_norm": 1.0887454748153687, + "learning_rate": 1.3528560945764546e-07, + "loss": 0.2654, + "step": 46357 + }, + { + "epoch": 0.928018417035758, + "grad_norm": 1.0928758382797241, + "learning_rate": 1.3521071856537672e-07, + "loss": 0.29, + "step": 46358 + }, + { + "epoch": 0.9280384355528865, + "grad_norm": 1.056277871131897, + "learning_rate": 1.3513584812363479e-07, + "loss": 0.3084, + "step": 46359 + }, + { + "epoch": 0.9280584540700147, + "grad_norm": 1.1802539825439453, + "learning_rate": 1.3506099813273556e-07, + "loss": 0.2892, + "step": 46360 + }, + { + "epoch": 0.9280784725871432, + "grad_norm": 1.1429102420806885, + "learning_rate": 1.3498616859299317e-07, + "loss": 0.2747, + "step": 46361 + }, + { + "epoch": 0.9280984911042715, + "grad_norm": 1.0762654542922974, + "learning_rate": 1.3491135950472135e-07, + "loss": 0.2427, + "step": 46362 + }, + { + "epoch": 0.9281185096213997, + "grad_norm": 1.1003040075302124, + "learning_rate": 1.3483657086823533e-07, + "loss": 0.26, + "step": 46363 + }, + { + "epoch": 0.9281385281385282, + "grad_norm": 1.0133956670761108, + "learning_rate": 1.3476180268384932e-07, + "loss": 0.2464, + "step": 46364 + }, + { + "epoch": 0.9281585466556564, + "grad_norm": 1.1655932664871216, + "learning_rate": 1.3468705495187805e-07, + "loss": 0.2621, + "step": 46365 + }, + { + "epoch": 0.9281785651727849, + "grad_norm": 1.0394257307052612, + "learning_rate": 1.3461232767263467e-07, + "loss": 0.2811, + "step": 46366 + }, + { + "epoch": 0.9281985836899131, + "grad_norm": 1.0450832843780518, + "learning_rate": 1.345376208464344e-07, + "loss": 0.2751, + "step": 46367 + }, + { + "epoch": 0.9282186022070416, + "grad_norm": 1.174816608428955, + "learning_rate": 1.344629344735904e-07, + "loss": 0.3325, + "step": 46368 + }, + { + "epoch": 0.9282386207241698, + "grad_norm": 1.22574782371521, + "learning_rate": 1.343882685544179e-07, + "loss": 0.3008, + "step": 46369 + }, + { + "epoch": 0.9282586392412983, + "grad_norm": 1.115655779838562, + "learning_rate": 1.3431362308923003e-07, + "loss": 0.2969, + "step": 46370 + }, + { + "epoch": 0.9282786577584266, + "grad_norm": 0.990218997001648, + "learning_rate": 1.342389980783404e-07, + "loss": 0.2777, + "step": 46371 + }, + { + "epoch": 0.9282986762755548, + "grad_norm": 1.1033670902252197, + "learning_rate": 1.3416439352206267e-07, + "loss": 0.3225, + "step": 46372 + }, + { + "epoch": 0.9283186947926833, + "grad_norm": 1.103644847869873, + "learning_rate": 1.3408980942071105e-07, + "loss": 0.3189, + "step": 46373 + }, + { + "epoch": 0.9283387133098115, + "grad_norm": 1.9623348712921143, + "learning_rate": 1.3401524577459857e-07, + "loss": 0.7811, + "step": 46374 + }, + { + "epoch": 0.92835873182694, + "grad_norm": 1.755049467086792, + "learning_rate": 1.339407025840389e-07, + "loss": 0.713, + "step": 46375 + }, + { + "epoch": 0.9283787503440682, + "grad_norm": 1.098254919052124, + "learning_rate": 1.3386617984934514e-07, + "loss": 0.3167, + "step": 46376 + }, + { + "epoch": 0.9283987688611967, + "grad_norm": 1.0341792106628418, + "learning_rate": 1.3379167757083033e-07, + "loss": 0.3158, + "step": 46377 + }, + { + "epoch": 0.928418787378325, + "grad_norm": 1.0166034698486328, + "learning_rate": 1.3371719574880926e-07, + "loss": 0.2698, + "step": 46378 + }, + { + "epoch": 0.9284388058954532, + "grad_norm": 1.1906764507293701, + "learning_rate": 1.3364273438359276e-07, + "loss": 0.2958, + "step": 46379 + }, + { + "epoch": 0.9284588244125817, + "grad_norm": 1.0988975763320923, + "learning_rate": 1.335682934754956e-07, + "loss": 0.3086, + "step": 46380 + }, + { + "epoch": 0.92847884292971, + "grad_norm": 1.1128123998641968, + "learning_rate": 1.334938730248292e-07, + "loss": 0.2569, + "step": 46381 + }, + { + "epoch": 0.9284988614468384, + "grad_norm": 1.2718982696533203, + "learning_rate": 1.3341947303190827e-07, + "loss": 0.2802, + "step": 46382 + }, + { + "epoch": 0.9285188799639666, + "grad_norm": 1.0259642601013184, + "learning_rate": 1.3334509349704428e-07, + "loss": 0.2549, + "step": 46383 + }, + { + "epoch": 0.928538898481095, + "grad_norm": 1.147858738899231, + "learning_rate": 1.3327073442055026e-07, + "loss": 0.2972, + "step": 46384 + }, + { + "epoch": 0.9285589169982233, + "grad_norm": 1.2607347965240479, + "learning_rate": 1.3319639580273823e-07, + "loss": 0.278, + "step": 46385 + }, + { + "epoch": 0.9285789355153518, + "grad_norm": 1.888607382774353, + "learning_rate": 1.331220776439218e-07, + "loss": 0.736, + "step": 46386 + }, + { + "epoch": 0.92859895403248, + "grad_norm": 1.9512746334075928, + "learning_rate": 1.3304777994441241e-07, + "loss": 0.756, + "step": 46387 + }, + { + "epoch": 0.9286189725496083, + "grad_norm": 1.9879310131072998, + "learning_rate": 1.3297350270452315e-07, + "loss": 0.7939, + "step": 46388 + }, + { + "epoch": 0.9286389910667368, + "grad_norm": 1.1526950597763062, + "learning_rate": 1.3289924592456594e-07, + "loss": 0.3252, + "step": 46389 + }, + { + "epoch": 0.928659009583865, + "grad_norm": 1.1619206666946411, + "learning_rate": 1.3282500960485223e-07, + "loss": 0.2907, + "step": 46390 + }, + { + "epoch": 0.9286790281009935, + "grad_norm": 1.1654795408248901, + "learning_rate": 1.3275079374569566e-07, + "loss": 0.2763, + "step": 46391 + }, + { + "epoch": 0.9286990466181217, + "grad_norm": 2.061793565750122, + "learning_rate": 1.3267659834740653e-07, + "loss": 0.7615, + "step": 46392 + }, + { + "epoch": 0.9287190651352502, + "grad_norm": 1.0556576251983643, + "learning_rate": 1.3260242341029795e-07, + "loss": 0.2871, + "step": 46393 + }, + { + "epoch": 0.9287390836523784, + "grad_norm": 1.1843668222427368, + "learning_rate": 1.3252826893468074e-07, + "loss": 0.2976, + "step": 46394 + }, + { + "epoch": 0.9287591021695067, + "grad_norm": 1.0014846324920654, + "learning_rate": 1.3245413492086745e-07, + "loss": 0.2662, + "step": 46395 + }, + { + "epoch": 0.9287791206866352, + "grad_norm": 1.1239537000656128, + "learning_rate": 1.3238002136917004e-07, + "loss": 0.2637, + "step": 46396 + }, + { + "epoch": 0.9287991392037634, + "grad_norm": 1.0756006240844727, + "learning_rate": 1.3230592827989941e-07, + "loss": 0.2945, + "step": 46397 + }, + { + "epoch": 0.9288191577208919, + "grad_norm": 1.063383936882019, + "learning_rate": 1.3223185565336693e-07, + "loss": 0.2865, + "step": 46398 + }, + { + "epoch": 0.9288391762380201, + "grad_norm": 1.129280686378479, + "learning_rate": 1.321578034898835e-07, + "loss": 0.317, + "step": 46399 + }, + { + "epoch": 0.9288591947551486, + "grad_norm": 1.0369575023651123, + "learning_rate": 1.320837717897616e-07, + "loss": 0.2512, + "step": 46400 + }, + { + "epoch": 0.9288792132722768, + "grad_norm": 2.22717547416687, + "learning_rate": 1.3200976055331215e-07, + "loss": 0.8064, + "step": 46401 + }, + { + "epoch": 0.9288992317894053, + "grad_norm": 1.1032984256744385, + "learning_rate": 1.3193576978084598e-07, + "loss": 0.3238, + "step": 46402 + }, + { + "epoch": 0.9289192503065336, + "grad_norm": 1.3699514865875244, + "learning_rate": 1.3186179947267396e-07, + "loss": 0.3063, + "step": 46403 + }, + { + "epoch": 0.9289392688236618, + "grad_norm": 1.770195484161377, + "learning_rate": 1.3178784962910751e-07, + "loss": 0.7255, + "step": 46404 + }, + { + "epoch": 0.9289592873407903, + "grad_norm": 1.0233418941497803, + "learning_rate": 1.3171392025045693e-07, + "loss": 0.2546, + "step": 46405 + }, + { + "epoch": 0.9289793058579185, + "grad_norm": 1.026668906211853, + "learning_rate": 1.316400113370342e-07, + "loss": 0.2601, + "step": 46406 + }, + { + "epoch": 0.928999324375047, + "grad_norm": 1.2033989429473877, + "learning_rate": 1.3156612288914795e-07, + "loss": 0.2894, + "step": 46407 + }, + { + "epoch": 0.9290193428921752, + "grad_norm": 1.10775625705719, + "learning_rate": 1.314922549071107e-07, + "loss": 0.2771, + "step": 46408 + }, + { + "epoch": 0.9290393614093037, + "grad_norm": 1.0700610876083374, + "learning_rate": 1.314184073912328e-07, + "loss": 0.2973, + "step": 46409 + }, + { + "epoch": 0.929059379926432, + "grad_norm": 1.22794771194458, + "learning_rate": 1.3134458034182396e-07, + "loss": 0.3582, + "step": 46410 + }, + { + "epoch": 0.9290793984435602, + "grad_norm": 1.2482126951217651, + "learning_rate": 1.312707737591945e-07, + "loss": 0.2729, + "step": 46411 + }, + { + "epoch": 0.9290994169606887, + "grad_norm": 1.1401580572128296, + "learning_rate": 1.3119698764365474e-07, + "loss": 0.2934, + "step": 46412 + }, + { + "epoch": 0.929119435477817, + "grad_norm": 1.0058389902114868, + "learning_rate": 1.3112322199551553e-07, + "loss": 0.288, + "step": 46413 + }, + { + "epoch": 0.9291394539949454, + "grad_norm": 1.2879698276519775, + "learning_rate": 1.3104947681508607e-07, + "loss": 0.2992, + "step": 46414 + }, + { + "epoch": 0.9291594725120736, + "grad_norm": 1.017587661743164, + "learning_rate": 1.3097575210267722e-07, + "loss": 0.2418, + "step": 46415 + }, + { + "epoch": 0.929179491029202, + "grad_norm": 1.0665470361709595, + "learning_rate": 1.3090204785859762e-07, + "loss": 0.268, + "step": 46416 + }, + { + "epoch": 0.9291995095463303, + "grad_norm": 1.0941731929779053, + "learning_rate": 1.3082836408315869e-07, + "loss": 0.2807, + "step": 46417 + }, + { + "epoch": 0.9292195280634586, + "grad_norm": 1.0004544258117676, + "learning_rate": 1.3075470077666908e-07, + "loss": 0.2789, + "step": 46418 + }, + { + "epoch": 0.929239546580587, + "grad_norm": 1.1015952825546265, + "learning_rate": 1.3068105793943964e-07, + "loss": 0.3129, + "step": 46419 + }, + { + "epoch": 0.9292595650977153, + "grad_norm": 1.1871446371078491, + "learning_rate": 1.3060743557177846e-07, + "loss": 0.2898, + "step": 46420 + }, + { + "epoch": 0.9292795836148438, + "grad_norm": 1.2098227739334106, + "learning_rate": 1.3053383367399587e-07, + "loss": 0.2698, + "step": 46421 + }, + { + "epoch": 0.929299602131972, + "grad_norm": 1.1429953575134277, + "learning_rate": 1.3046025224640102e-07, + "loss": 0.3504, + "step": 46422 + }, + { + "epoch": 0.9293196206491005, + "grad_norm": 1.1659014225006104, + "learning_rate": 1.303866912893037e-07, + "loss": 0.2706, + "step": 46423 + }, + { + "epoch": 0.9293396391662287, + "grad_norm": 0.9968857765197754, + "learning_rate": 1.3031315080301254e-07, + "loss": 0.2426, + "step": 46424 + }, + { + "epoch": 0.9293596576833572, + "grad_norm": 1.1925506591796875, + "learning_rate": 1.3023963078783674e-07, + "loss": 0.3379, + "step": 46425 + }, + { + "epoch": 0.9293796762004854, + "grad_norm": 0.9369866847991943, + "learning_rate": 1.3016613124408552e-07, + "loss": 0.2678, + "step": 46426 + }, + { + "epoch": 0.9293996947176137, + "grad_norm": 1.1016489267349243, + "learning_rate": 1.3009265217206856e-07, + "loss": 0.2977, + "step": 46427 + }, + { + "epoch": 0.9294197132347422, + "grad_norm": 1.0526845455169678, + "learning_rate": 1.3001919357209347e-07, + "loss": 0.2797, + "step": 46428 + }, + { + "epoch": 0.9294397317518704, + "grad_norm": 1.0383193492889404, + "learning_rate": 1.2994575544446998e-07, + "loss": 0.2603, + "step": 46429 + }, + { + "epoch": 0.9294597502689989, + "grad_norm": 1.1485084295272827, + "learning_rate": 1.2987233778950614e-07, + "loss": 0.3059, + "step": 46430 + }, + { + "epoch": 0.9294797687861271, + "grad_norm": 1.0171399116516113, + "learning_rate": 1.2979894060751064e-07, + "loss": 0.31, + "step": 46431 + }, + { + "epoch": 0.9294997873032556, + "grad_norm": 1.1528377532958984, + "learning_rate": 1.2972556389879265e-07, + "loss": 0.2375, + "step": 46432 + }, + { + "epoch": 0.9295198058203838, + "grad_norm": 1.1571320295333862, + "learning_rate": 1.2965220766366083e-07, + "loss": 0.3337, + "step": 46433 + }, + { + "epoch": 0.9295398243375121, + "grad_norm": 0.9904710054397583, + "learning_rate": 1.2957887190242213e-07, + "loss": 0.251, + "step": 46434 + }, + { + "epoch": 0.9295598428546406, + "grad_norm": 1.0477887392044067, + "learning_rate": 1.2950555661538632e-07, + "loss": 0.3221, + "step": 46435 + }, + { + "epoch": 0.9295798613717688, + "grad_norm": 1.0176353454589844, + "learning_rate": 1.2943226180286096e-07, + "loss": 0.2503, + "step": 46436 + }, + { + "epoch": 0.9295998798888973, + "grad_norm": 1.103701114654541, + "learning_rate": 1.2935898746515407e-07, + "loss": 0.3089, + "step": 46437 + }, + { + "epoch": 0.9296198984060255, + "grad_norm": 1.1243778467178345, + "learning_rate": 1.2928573360257378e-07, + "loss": 0.2743, + "step": 46438 + }, + { + "epoch": 0.929639916923154, + "grad_norm": 1.0700675249099731, + "learning_rate": 1.2921250021542819e-07, + "loss": 0.2514, + "step": 46439 + }, + { + "epoch": 0.9296599354402822, + "grad_norm": 1.0960758924484253, + "learning_rate": 1.2913928730402536e-07, + "loss": 0.3226, + "step": 46440 + }, + { + "epoch": 0.9296799539574107, + "grad_norm": 1.390613079071045, + "learning_rate": 1.2906609486867228e-07, + "loss": 0.3245, + "step": 46441 + }, + { + "epoch": 0.929699972474539, + "grad_norm": 1.2258150577545166, + "learning_rate": 1.2899292290967758e-07, + "loss": 0.2997, + "step": 46442 + }, + { + "epoch": 0.9297199909916672, + "grad_norm": 1.0538007020950317, + "learning_rate": 1.289197714273477e-07, + "loss": 0.2737, + "step": 46443 + }, + { + "epoch": 0.9297400095087957, + "grad_norm": 1.1277681589126587, + "learning_rate": 1.2884664042199068e-07, + "loss": 0.3016, + "step": 46444 + }, + { + "epoch": 0.929760028025924, + "grad_norm": 1.1411811113357544, + "learning_rate": 1.2877352989391468e-07, + "loss": 0.297, + "step": 46445 + }, + { + "epoch": 0.9297800465430524, + "grad_norm": 1.303861379623413, + "learning_rate": 1.287004398434266e-07, + "loss": 0.3221, + "step": 46446 + }, + { + "epoch": 0.9298000650601806, + "grad_norm": 0.9874771237373352, + "learning_rate": 1.2862737027083293e-07, + "loss": 0.2821, + "step": 46447 + }, + { + "epoch": 0.929820083577309, + "grad_norm": 1.3862947225570679, + "learning_rate": 1.2855432117644228e-07, + "loss": 0.3177, + "step": 46448 + }, + { + "epoch": 0.9298401020944373, + "grad_norm": 1.8507660627365112, + "learning_rate": 1.2848129256056053e-07, + "loss": 0.8131, + "step": 46449 + }, + { + "epoch": 0.9298601206115656, + "grad_norm": 1.1697709560394287, + "learning_rate": 1.2840828442349518e-07, + "loss": 0.2786, + "step": 46450 + }, + { + "epoch": 0.929880139128694, + "grad_norm": 1.126789927482605, + "learning_rate": 1.2833529676555267e-07, + "loss": 0.2766, + "step": 46451 + }, + { + "epoch": 0.9299001576458223, + "grad_norm": 0.9674414992332458, + "learning_rate": 1.2826232958704054e-07, + "loss": 0.2466, + "step": 46452 + }, + { + "epoch": 0.9299201761629508, + "grad_norm": 1.0673104524612427, + "learning_rate": 1.2818938288826465e-07, + "loss": 0.3233, + "step": 46453 + }, + { + "epoch": 0.929940194680079, + "grad_norm": 1.0628141164779663, + "learning_rate": 1.2811645666953308e-07, + "loss": 0.2905, + "step": 46454 + }, + { + "epoch": 0.9299602131972075, + "grad_norm": 1.1081352233886719, + "learning_rate": 1.2804355093115117e-07, + "loss": 0.2517, + "step": 46455 + }, + { + "epoch": 0.9299802317143357, + "grad_norm": 1.128003478050232, + "learning_rate": 1.2797066567342476e-07, + "loss": 0.239, + "step": 46456 + }, + { + "epoch": 0.9300002502314642, + "grad_norm": 1.9257713556289673, + "learning_rate": 1.2789780089666192e-07, + "loss": 0.7337, + "step": 46457 + }, + { + "epoch": 0.9300202687485924, + "grad_norm": 1.0639362335205078, + "learning_rate": 1.27824956601168e-07, + "loss": 0.2727, + "step": 46458 + }, + { + "epoch": 0.9300402872657207, + "grad_norm": 1.053646445274353, + "learning_rate": 1.2775213278724996e-07, + "loss": 0.2973, + "step": 46459 + }, + { + "epoch": 0.9300603057828492, + "grad_norm": 1.8420571088790894, + "learning_rate": 1.276793294552131e-07, + "loss": 0.7871, + "step": 46460 + }, + { + "epoch": 0.9300803242999774, + "grad_norm": 1.030129313468933, + "learning_rate": 1.2760654660536387e-07, + "loss": 0.272, + "step": 46461 + }, + { + "epoch": 0.9301003428171059, + "grad_norm": 1.066957950592041, + "learning_rate": 1.275337842380081e-07, + "loss": 0.2722, + "step": 46462 + }, + { + "epoch": 0.9301203613342341, + "grad_norm": 1.0176829099655151, + "learning_rate": 1.2746104235345224e-07, + "loss": 0.2353, + "step": 46463 + }, + { + "epoch": 0.9301403798513626, + "grad_norm": 1.1454381942749023, + "learning_rate": 1.2738832095200105e-07, + "loss": 0.304, + "step": 46464 + }, + { + "epoch": 0.9301603983684908, + "grad_norm": 1.1650135517120361, + "learning_rate": 1.2731562003396035e-07, + "loss": 0.3053, + "step": 46465 + }, + { + "epoch": 0.9301804168856191, + "grad_norm": 1.1038914918899536, + "learning_rate": 1.272429395996372e-07, + "loss": 0.2544, + "step": 46466 + }, + { + "epoch": 0.9302004354027475, + "grad_norm": 1.176080584526062, + "learning_rate": 1.271702796493357e-07, + "loss": 0.3011, + "step": 46467 + }, + { + "epoch": 0.9302204539198758, + "grad_norm": 1.0384684801101685, + "learning_rate": 1.2709764018336123e-07, + "loss": 0.3022, + "step": 46468 + }, + { + "epoch": 0.9302404724370043, + "grad_norm": 1.1124118566513062, + "learning_rate": 1.2702502120201965e-07, + "loss": 0.3185, + "step": 46469 + }, + { + "epoch": 0.9302604909541325, + "grad_norm": 1.0661708116531372, + "learning_rate": 1.2695242270561625e-07, + "loss": 0.2376, + "step": 46470 + }, + { + "epoch": 0.930280509471261, + "grad_norm": 1.075717806816101, + "learning_rate": 1.268798446944569e-07, + "loss": 0.3001, + "step": 46471 + }, + { + "epoch": 0.9303005279883892, + "grad_norm": 1.0222772359848022, + "learning_rate": 1.2680728716884529e-07, + "loss": 0.3317, + "step": 46472 + }, + { + "epoch": 0.9303205465055177, + "grad_norm": 1.0699970722198486, + "learning_rate": 1.2673475012908776e-07, + "loss": 0.2887, + "step": 46473 + }, + { + "epoch": 0.930340565022646, + "grad_norm": 1.2703440189361572, + "learning_rate": 1.2666223357548803e-07, + "loss": 0.2697, + "step": 46474 + }, + { + "epoch": 0.9303605835397742, + "grad_norm": 1.1569488048553467, + "learning_rate": 1.2658973750835135e-07, + "loss": 0.3045, + "step": 46475 + }, + { + "epoch": 0.9303806020569027, + "grad_norm": 1.0148476362228394, + "learning_rate": 1.2651726192798308e-07, + "loss": 0.2962, + "step": 46476 + }, + { + "epoch": 0.930400620574031, + "grad_norm": 1.0641769170761108, + "learning_rate": 1.2644480683468797e-07, + "loss": 0.2769, + "step": 46477 + }, + { + "epoch": 0.9304206390911594, + "grad_norm": 1.0345628261566162, + "learning_rate": 1.2637237222876908e-07, + "loss": 0.2971, + "step": 46478 + }, + { + "epoch": 0.9304406576082876, + "grad_norm": 1.1026663780212402, + "learning_rate": 1.262999581105323e-07, + "loss": 0.2903, + "step": 46479 + }, + { + "epoch": 0.930460676125416, + "grad_norm": 1.21244215965271, + "learning_rate": 1.2622756448028183e-07, + "loss": 0.311, + "step": 46480 + }, + { + "epoch": 0.9304806946425443, + "grad_norm": 1.0529251098632812, + "learning_rate": 1.2615519133832188e-07, + "loss": 0.2934, + "step": 46481 + }, + { + "epoch": 0.9305007131596726, + "grad_norm": 1.0806039571762085, + "learning_rate": 1.2608283868495608e-07, + "loss": 0.2706, + "step": 46482 + }, + { + "epoch": 0.930520731676801, + "grad_norm": 1.121205449104309, + "learning_rate": 1.2601050652048918e-07, + "loss": 0.2836, + "step": 46483 + }, + { + "epoch": 0.9305407501939293, + "grad_norm": 1.0576368570327759, + "learning_rate": 1.259381948452254e-07, + "loss": 0.2796, + "step": 46484 + }, + { + "epoch": 0.9305607687110578, + "grad_norm": 1.0485655069351196, + "learning_rate": 1.2586590365946893e-07, + "loss": 0.2949, + "step": 46485 + }, + { + "epoch": 0.930580787228186, + "grad_norm": 1.1824467182159424, + "learning_rate": 1.2579363296352344e-07, + "loss": 0.337, + "step": 46486 + }, + { + "epoch": 0.9306008057453145, + "grad_norm": 1.181793451309204, + "learning_rate": 1.257213827576914e-07, + "loss": 0.3162, + "step": 46487 + }, + { + "epoch": 0.9306208242624427, + "grad_norm": 1.1412105560302734, + "learning_rate": 1.2564915304227876e-07, + "loss": 0.3235, + "step": 46488 + }, + { + "epoch": 0.9306408427795712, + "grad_norm": 1.1655880212783813, + "learning_rate": 1.25576943817588e-07, + "loss": 0.2762, + "step": 46489 + }, + { + "epoch": 0.9306608612966994, + "grad_norm": 1.2398921251296997, + "learning_rate": 1.255047550839228e-07, + "loss": 0.321, + "step": 46490 + }, + { + "epoch": 0.9306808798138277, + "grad_norm": 1.239074945449829, + "learning_rate": 1.2543258684158622e-07, + "loss": 0.2942, + "step": 46491 + }, + { + "epoch": 0.9307008983309562, + "grad_norm": 1.1341713666915894, + "learning_rate": 1.253604390908819e-07, + "loss": 0.2593, + "step": 46492 + }, + { + "epoch": 0.9307209168480844, + "grad_norm": 1.061214804649353, + "learning_rate": 1.252883118321141e-07, + "loss": 0.3043, + "step": 46493 + }, + { + "epoch": 0.9307409353652129, + "grad_norm": 1.9603941440582275, + "learning_rate": 1.2521620506558473e-07, + "loss": 0.7877, + "step": 46494 + }, + { + "epoch": 0.9307609538823411, + "grad_norm": 1.1028878688812256, + "learning_rate": 1.2514411879159695e-07, + "loss": 0.2985, + "step": 46495 + }, + { + "epoch": 0.9307809723994696, + "grad_norm": 1.065894365310669, + "learning_rate": 1.2507205301045432e-07, + "loss": 0.2935, + "step": 46496 + }, + { + "epoch": 0.9308009909165978, + "grad_norm": 1.1654232740402222, + "learning_rate": 1.2500000772246002e-07, + "loss": 0.3001, + "step": 46497 + }, + { + "epoch": 0.9308210094337261, + "grad_norm": 0.9792236685752869, + "learning_rate": 1.2492798292791653e-07, + "loss": 0.2545, + "step": 46498 + }, + { + "epoch": 0.9308410279508545, + "grad_norm": 1.1148570775985718, + "learning_rate": 1.2485597862712696e-07, + "loss": 0.315, + "step": 46499 + }, + { + "epoch": 0.9308610464679828, + "grad_norm": 1.0495858192443848, + "learning_rate": 1.2478399482039272e-07, + "loss": 0.322, + "step": 46500 + }, + { + "epoch": 0.9308810649851113, + "grad_norm": 1.1343543529510498, + "learning_rate": 1.2471203150801803e-07, + "loss": 0.2821, + "step": 46501 + }, + { + "epoch": 0.9309010835022395, + "grad_norm": 1.1140402555465698, + "learning_rate": 1.2464008869030542e-07, + "loss": 0.3221, + "step": 46502 + }, + { + "epoch": 0.930921102019368, + "grad_norm": 1.0423513650894165, + "learning_rate": 1.2456816636755576e-07, + "loss": 0.2858, + "step": 46503 + }, + { + "epoch": 0.9309411205364962, + "grad_norm": 1.0830124616622925, + "learning_rate": 1.2449626454007325e-07, + "loss": 0.263, + "step": 46504 + }, + { + "epoch": 0.9309611390536247, + "grad_norm": 1.2140470743179321, + "learning_rate": 1.244243832081582e-07, + "loss": 0.3022, + "step": 46505 + }, + { + "epoch": 0.930981157570753, + "grad_norm": 1.8453515768051147, + "learning_rate": 1.2435252237211427e-07, + "loss": 0.7149, + "step": 46506 + }, + { + "epoch": 0.9310011760878812, + "grad_norm": 1.9005672931671143, + "learning_rate": 1.242806820322434e-07, + "loss": 0.7342, + "step": 46507 + }, + { + "epoch": 0.9310211946050096, + "grad_norm": 1.1705435514450073, + "learning_rate": 1.2420886218884652e-07, + "loss": 0.3399, + "step": 46508 + }, + { + "epoch": 0.931041213122138, + "grad_norm": 1.1328877210617065, + "learning_rate": 1.2413706284222615e-07, + "loss": 0.2798, + "step": 46509 + }, + { + "epoch": 0.9310612316392664, + "grad_norm": 1.058316707611084, + "learning_rate": 1.240652839926848e-07, + "loss": 0.2711, + "step": 46510 + }, + { + "epoch": 0.9310812501563946, + "grad_norm": 1.0486901998519897, + "learning_rate": 1.2399352564052392e-07, + "loss": 0.2948, + "step": 46511 + }, + { + "epoch": 0.931101268673523, + "grad_norm": 1.290992021560669, + "learning_rate": 1.2392178778604492e-07, + "loss": 0.3234, + "step": 46512 + }, + { + "epoch": 0.9311212871906513, + "grad_norm": 1.2597405910491943, + "learning_rate": 1.2385007042954923e-07, + "loss": 0.2778, + "step": 46513 + }, + { + "epoch": 0.9311413057077796, + "grad_norm": 2.002495765686035, + "learning_rate": 1.2377837357133827e-07, + "loss": 0.8046, + "step": 46514 + }, + { + "epoch": 0.931161324224908, + "grad_norm": 1.1474158763885498, + "learning_rate": 1.2370669721171403e-07, + "loss": 0.2744, + "step": 46515 + }, + { + "epoch": 0.9311813427420363, + "grad_norm": 1.1572849750518799, + "learning_rate": 1.2363504135097736e-07, + "loss": 0.2938, + "step": 46516 + }, + { + "epoch": 0.9312013612591648, + "grad_norm": 1.07037353515625, + "learning_rate": 1.2356340598942974e-07, + "loss": 0.2594, + "step": 46517 + }, + { + "epoch": 0.931221379776293, + "grad_norm": 1.4136525392532349, + "learning_rate": 1.2349179112737143e-07, + "loss": 0.3125, + "step": 46518 + }, + { + "epoch": 0.9312413982934215, + "grad_norm": 1.0874004364013672, + "learning_rate": 1.2342019676510443e-07, + "loss": 0.2803, + "step": 46519 + }, + { + "epoch": 0.9312614168105497, + "grad_norm": 1.1514660120010376, + "learning_rate": 1.2334862290293014e-07, + "loss": 0.3133, + "step": 46520 + }, + { + "epoch": 0.9312814353276782, + "grad_norm": 1.1193060874938965, + "learning_rate": 1.2327706954114782e-07, + "loss": 0.2952, + "step": 46521 + }, + { + "epoch": 0.9313014538448064, + "grad_norm": 1.1327797174453735, + "learning_rate": 1.2320553668005997e-07, + "loss": 0.3128, + "step": 46522 + }, + { + "epoch": 0.9313214723619347, + "grad_norm": 1.2043230533599854, + "learning_rate": 1.231340243199658e-07, + "loss": 0.3457, + "step": 46523 + }, + { + "epoch": 0.9313414908790631, + "grad_norm": 1.0600686073303223, + "learning_rate": 1.230625324611673e-07, + "loss": 0.3045, + "step": 46524 + }, + { + "epoch": 0.9313615093961914, + "grad_norm": 1.3587114810943604, + "learning_rate": 1.229910611039642e-07, + "loss": 0.3233, + "step": 46525 + }, + { + "epoch": 0.9313815279133199, + "grad_norm": 1.2018715143203735, + "learning_rate": 1.229196102486574e-07, + "loss": 0.311, + "step": 46526 + }, + { + "epoch": 0.9314015464304481, + "grad_norm": 1.0783523321151733, + "learning_rate": 1.2284817989554666e-07, + "loss": 0.2814, + "step": 46527 + }, + { + "epoch": 0.9314215649475766, + "grad_norm": 1.0282530784606934, + "learning_rate": 1.2277677004493282e-07, + "loss": 0.2645, + "step": 46528 + }, + { + "epoch": 0.9314415834647048, + "grad_norm": 1.119165062904358, + "learning_rate": 1.2270538069711567e-07, + "loss": 0.2949, + "step": 46529 + }, + { + "epoch": 0.9314616019818331, + "grad_norm": 1.114982008934021, + "learning_rate": 1.2263401185239554e-07, + "loss": 0.2752, + "step": 46530 + }, + { + "epoch": 0.9314816204989615, + "grad_norm": 1.1003745794296265, + "learning_rate": 1.2256266351107216e-07, + "loss": 0.3277, + "step": 46531 + }, + { + "epoch": 0.9315016390160898, + "grad_norm": 1.0453453063964844, + "learning_rate": 1.2249133567344583e-07, + "loss": 0.2902, + "step": 46532 + }, + { + "epoch": 0.9315216575332183, + "grad_norm": 1.2439746856689453, + "learning_rate": 1.2242002833981692e-07, + "loss": 0.307, + "step": 46533 + }, + { + "epoch": 0.9315416760503465, + "grad_norm": 1.1692478656768799, + "learning_rate": 1.2234874151048348e-07, + "loss": 0.2812, + "step": 46534 + }, + { + "epoch": 0.931561694567475, + "grad_norm": 1.159562587738037, + "learning_rate": 1.2227747518574695e-07, + "loss": 0.3053, + "step": 46535 + }, + { + "epoch": 0.9315817130846032, + "grad_norm": 1.1542285680770874, + "learning_rate": 1.22206229365906e-07, + "loss": 0.2553, + "step": 46536 + }, + { + "epoch": 0.9316017316017317, + "grad_norm": 1.037811040878296, + "learning_rate": 1.2213500405126033e-07, + "loss": 0.2989, + "step": 46537 + }, + { + "epoch": 0.93162175011886, + "grad_norm": 1.079737663269043, + "learning_rate": 1.2206379924210977e-07, + "loss": 0.2757, + "step": 46538 + }, + { + "epoch": 0.9316417686359882, + "grad_norm": 1.1011433601379395, + "learning_rate": 1.219926149387529e-07, + "loss": 0.2725, + "step": 46539 + }, + { + "epoch": 0.9316617871531166, + "grad_norm": 1.0580425262451172, + "learning_rate": 1.21921451141489e-07, + "loss": 0.2662, + "step": 46540 + }, + { + "epoch": 0.931681805670245, + "grad_norm": 1.9027308225631714, + "learning_rate": 1.218503078506178e-07, + "loss": 0.7787, + "step": 46541 + }, + { + "epoch": 0.9317018241873734, + "grad_norm": 1.4942119121551514, + "learning_rate": 1.217791850664385e-07, + "loss": 0.2631, + "step": 46542 + }, + { + "epoch": 0.9317218427045016, + "grad_norm": 1.0850166082382202, + "learning_rate": 1.2170808278924974e-07, + "loss": 0.2914, + "step": 46543 + }, + { + "epoch": 0.93174186122163, + "grad_norm": 1.4295190572738647, + "learning_rate": 1.2163700101935017e-07, + "loss": 0.296, + "step": 46544 + }, + { + "epoch": 0.9317618797387583, + "grad_norm": 1.9511098861694336, + "learning_rate": 1.2156593975703846e-07, + "loss": 0.7108, + "step": 46545 + }, + { + "epoch": 0.9317818982558866, + "grad_norm": 2.061591386795044, + "learning_rate": 1.214948990026138e-07, + "loss": 0.68, + "step": 46546 + }, + { + "epoch": 0.931801916773015, + "grad_norm": 1.0056517124176025, + "learning_rate": 1.2142387875637485e-07, + "loss": 0.2502, + "step": 46547 + }, + { + "epoch": 0.9318219352901433, + "grad_norm": 1.1836957931518555, + "learning_rate": 1.213528790186197e-07, + "loss": 0.3318, + "step": 46548 + }, + { + "epoch": 0.9318419538072718, + "grad_norm": 1.0146077871322632, + "learning_rate": 1.2128189978964754e-07, + "loss": 0.2861, + "step": 46549 + }, + { + "epoch": 0.9318619723244, + "grad_norm": 1.1517298221588135, + "learning_rate": 1.212109410697565e-07, + "loss": 0.3078, + "step": 46550 + }, + { + "epoch": 0.9318819908415285, + "grad_norm": 1.1575809717178345, + "learning_rate": 1.2114000285924467e-07, + "loss": 0.3529, + "step": 46551 + }, + { + "epoch": 0.9319020093586567, + "grad_norm": 1.0760550498962402, + "learning_rate": 1.210690851584101e-07, + "loss": 0.2885, + "step": 46552 + }, + { + "epoch": 0.9319220278757852, + "grad_norm": 1.0790584087371826, + "learning_rate": 1.209981879675509e-07, + "loss": 0.2405, + "step": 46553 + }, + { + "epoch": 0.9319420463929134, + "grad_norm": 1.34153151512146, + "learning_rate": 1.209273112869658e-07, + "loss": 0.2651, + "step": 46554 + }, + { + "epoch": 0.9319620649100417, + "grad_norm": 1.0998042821884155, + "learning_rate": 1.208564551169522e-07, + "loss": 0.3034, + "step": 46555 + }, + { + "epoch": 0.9319820834271701, + "grad_norm": 1.946123719215393, + "learning_rate": 1.207856194578083e-07, + "loss": 0.6741, + "step": 46556 + }, + { + "epoch": 0.9320021019442984, + "grad_norm": 1.9489094018936157, + "learning_rate": 1.207148043098316e-07, + "loss": 0.759, + "step": 46557 + }, + { + "epoch": 0.9320221204614269, + "grad_norm": 1.0742686986923218, + "learning_rate": 1.2064400967331913e-07, + "loss": 0.276, + "step": 46558 + }, + { + "epoch": 0.9320421389785551, + "grad_norm": 1.1387457847595215, + "learning_rate": 1.2057323554857004e-07, + "loss": 0.3204, + "step": 46559 + }, + { + "epoch": 0.9320621574956836, + "grad_norm": 1.085007667541504, + "learning_rate": 1.2050248193588022e-07, + "loss": 0.2733, + "step": 46560 + }, + { + "epoch": 0.9320821760128118, + "grad_norm": 1.7574533224105835, + "learning_rate": 1.204317488355483e-07, + "loss": 0.7184, + "step": 46561 + }, + { + "epoch": 0.9321021945299401, + "grad_norm": 1.0803455114364624, + "learning_rate": 1.2036103624787077e-07, + "loss": 0.3259, + "step": 46562 + }, + { + "epoch": 0.9321222130470685, + "grad_norm": 1.0812757015228271, + "learning_rate": 1.2029034417314622e-07, + "loss": 0.2704, + "step": 46563 + }, + { + "epoch": 0.9321422315641968, + "grad_norm": 1.0548158884048462, + "learning_rate": 1.2021967261167055e-07, + "loss": 0.2791, + "step": 46564 + }, + { + "epoch": 0.9321622500813252, + "grad_norm": 1.0514590740203857, + "learning_rate": 1.2014902156374132e-07, + "loss": 0.2881, + "step": 46565 + }, + { + "epoch": 0.9321822685984535, + "grad_norm": 1.1416667699813843, + "learning_rate": 1.2007839102965546e-07, + "loss": 0.2668, + "step": 46566 + }, + { + "epoch": 0.932202287115582, + "grad_norm": 1.0372896194458008, + "learning_rate": 1.200077810097089e-07, + "loss": 0.3356, + "step": 46567 + }, + { + "epoch": 0.9322223056327102, + "grad_norm": 1.0056504011154175, + "learning_rate": 1.199371915042008e-07, + "loss": 0.2876, + "step": 46568 + }, + { + "epoch": 0.9322423241498387, + "grad_norm": 1.08391535282135, + "learning_rate": 1.1986662251342595e-07, + "loss": 0.2755, + "step": 46569 + }, + { + "epoch": 0.932262342666967, + "grad_norm": 1.009731650352478, + "learning_rate": 1.197960740376819e-07, + "loss": 0.3204, + "step": 46570 + }, + { + "epoch": 0.9322823611840952, + "grad_norm": 1.2603617906570435, + "learning_rate": 1.197255460772645e-07, + "loss": 0.3016, + "step": 46571 + }, + { + "epoch": 0.9323023797012236, + "grad_norm": 2.113762617111206, + "learning_rate": 1.1965503863247074e-07, + "loss": 0.7141, + "step": 46572 + }, + { + "epoch": 0.9323223982183519, + "grad_norm": 0.9663031697273254, + "learning_rate": 1.195845517035965e-07, + "loss": 0.244, + "step": 46573 + }, + { + "epoch": 0.9323424167354804, + "grad_norm": 1.1077152490615845, + "learning_rate": 1.1951408529093934e-07, + "loss": 0.2424, + "step": 46574 + }, + { + "epoch": 0.9323624352526086, + "grad_norm": 1.1393084526062012, + "learning_rate": 1.1944363939479452e-07, + "loss": 0.319, + "step": 46575 + }, + { + "epoch": 0.932382453769737, + "grad_norm": 1.0823365449905396, + "learning_rate": 1.1937321401545798e-07, + "loss": 0.2945, + "step": 46576 + }, + { + "epoch": 0.9324024722868653, + "grad_norm": 1.7881455421447754, + "learning_rate": 1.1930280915322666e-07, + "loss": 0.7019, + "step": 46577 + }, + { + "epoch": 0.9324224908039936, + "grad_norm": 1.8851344585418701, + "learning_rate": 1.1923242480839538e-07, + "loss": 0.7254, + "step": 46578 + }, + { + "epoch": 0.932442509321122, + "grad_norm": 1.1201097965240479, + "learning_rate": 1.1916206098126104e-07, + "loss": 0.309, + "step": 46579 + }, + { + "epoch": 0.9324625278382503, + "grad_norm": 1.1018954515457153, + "learning_rate": 1.1909171767211903e-07, + "loss": 0.3284, + "step": 46580 + }, + { + "epoch": 0.9324825463553787, + "grad_norm": 1.9621236324310303, + "learning_rate": 1.1902139488126462e-07, + "loss": 0.7261, + "step": 46581 + }, + { + "epoch": 0.932502564872507, + "grad_norm": 1.8515323400497437, + "learning_rate": 1.1895109260899485e-07, + "loss": 0.7184, + "step": 46582 + }, + { + "epoch": 0.9325225833896355, + "grad_norm": 0.9781535267829895, + "learning_rate": 1.1888081085560332e-07, + "loss": 0.2628, + "step": 46583 + }, + { + "epoch": 0.9325426019067637, + "grad_norm": 1.1933389902114868, + "learning_rate": 1.1881054962138649e-07, + "loss": 0.3108, + "step": 46584 + }, + { + "epoch": 0.9325626204238922, + "grad_norm": 1.8855394124984741, + "learning_rate": 1.1874030890664024e-07, + "loss": 0.7088, + "step": 46585 + }, + { + "epoch": 0.9325826389410204, + "grad_norm": 1.868457555770874, + "learning_rate": 1.1867008871165874e-07, + "loss": 0.7098, + "step": 46586 + }, + { + "epoch": 0.9326026574581487, + "grad_norm": 1.134111762046814, + "learning_rate": 1.1859988903673791e-07, + "loss": 0.2796, + "step": 46587 + }, + { + "epoch": 0.9326226759752771, + "grad_norm": 1.1645126342773438, + "learning_rate": 1.185297098821725e-07, + "loss": 0.3242, + "step": 46588 + }, + { + "epoch": 0.9326426944924054, + "grad_norm": 1.1764206886291504, + "learning_rate": 1.1845955124825782e-07, + "loss": 0.288, + "step": 46589 + }, + { + "epoch": 0.9326627130095339, + "grad_norm": 1.1112728118896484, + "learning_rate": 1.1838941313528862e-07, + "loss": 0.3103, + "step": 46590 + }, + { + "epoch": 0.9326827315266621, + "grad_norm": 1.0987181663513184, + "learning_rate": 1.1831929554356026e-07, + "loss": 0.2874, + "step": 46591 + }, + { + "epoch": 0.9327027500437906, + "grad_norm": 1.1424908638000488, + "learning_rate": 1.1824919847336635e-07, + "loss": 0.2975, + "step": 46592 + }, + { + "epoch": 0.9327227685609188, + "grad_norm": 1.0384351015090942, + "learning_rate": 1.1817912192500226e-07, + "loss": 0.2672, + "step": 46593 + }, + { + "epoch": 0.9327427870780471, + "grad_norm": 1.0877748727798462, + "learning_rate": 1.1810906589876325e-07, + "loss": 0.3464, + "step": 46594 + }, + { + "epoch": 0.9327628055951755, + "grad_norm": 1.1381093263626099, + "learning_rate": 1.1803903039494303e-07, + "loss": 0.3028, + "step": 46595 + }, + { + "epoch": 0.9327828241123038, + "grad_norm": 1.133062720298767, + "learning_rate": 1.1796901541383576e-07, + "loss": 0.3066, + "step": 46596 + }, + { + "epoch": 0.9328028426294322, + "grad_norm": 1.0374809503555298, + "learning_rate": 1.1789902095573569e-07, + "loss": 0.3001, + "step": 46597 + }, + { + "epoch": 0.9328228611465605, + "grad_norm": 1.8577027320861816, + "learning_rate": 1.1782904702093811e-07, + "loss": 0.7934, + "step": 46598 + }, + { + "epoch": 0.932842879663689, + "grad_norm": 1.1747968196868896, + "learning_rate": 1.1775909360973615e-07, + "loss": 0.2521, + "step": 46599 + }, + { + "epoch": 0.9328628981808172, + "grad_norm": 1.2467001676559448, + "learning_rate": 1.1768916072242453e-07, + "loss": 0.2794, + "step": 46600 + }, + { + "epoch": 0.9328829166979457, + "grad_norm": 1.8108354806900024, + "learning_rate": 1.176192483592975e-07, + "loss": 0.7578, + "step": 46601 + }, + { + "epoch": 0.932902935215074, + "grad_norm": 1.240097165107727, + "learning_rate": 1.1754935652064758e-07, + "loss": 0.3098, + "step": 46602 + }, + { + "epoch": 0.9329229537322022, + "grad_norm": 1.2104551792144775, + "learning_rate": 1.174794852067701e-07, + "loss": 0.2711, + "step": 46603 + }, + { + "epoch": 0.9329429722493306, + "grad_norm": 1.152047038078308, + "learning_rate": 1.1740963441795761e-07, + "loss": 0.3248, + "step": 46604 + }, + { + "epoch": 0.9329629907664589, + "grad_norm": 1.0830841064453125, + "learning_rate": 1.1733980415450485e-07, + "loss": 0.2847, + "step": 46605 + }, + { + "epoch": 0.9329830092835874, + "grad_norm": 1.1616257429122925, + "learning_rate": 1.1726999441670495e-07, + "loss": 0.3101, + "step": 46606 + }, + { + "epoch": 0.9330030278007156, + "grad_norm": 1.222694993019104, + "learning_rate": 1.1720020520485042e-07, + "loss": 0.2987, + "step": 46607 + }, + { + "epoch": 0.933023046317844, + "grad_norm": 1.3211146593093872, + "learning_rate": 1.171304365192355e-07, + "loss": 0.3284, + "step": 46608 + }, + { + "epoch": 0.9330430648349723, + "grad_norm": 1.266697645187378, + "learning_rate": 1.1706068836015383e-07, + "loss": 0.2727, + "step": 46609 + }, + { + "epoch": 0.9330630833521006, + "grad_norm": 1.0009901523590088, + "learning_rate": 1.1699096072789795e-07, + "loss": 0.318, + "step": 46610 + }, + { + "epoch": 0.933083101869229, + "grad_norm": 1.1236580610275269, + "learning_rate": 1.1692125362276152e-07, + "loss": 0.3269, + "step": 46611 + }, + { + "epoch": 0.9331031203863573, + "grad_norm": 1.1697758436203003, + "learning_rate": 1.1685156704503653e-07, + "loss": 0.3242, + "step": 46612 + }, + { + "epoch": 0.9331231389034857, + "grad_norm": 1.0594186782836914, + "learning_rate": 1.1678190099501774e-07, + "loss": 0.2944, + "step": 46613 + }, + { + "epoch": 0.933143157420614, + "grad_norm": 1.0406720638275146, + "learning_rate": 1.1671225547299658e-07, + "loss": 0.2698, + "step": 46614 + }, + { + "epoch": 0.9331631759377425, + "grad_norm": 1.1508796215057373, + "learning_rate": 1.166426304792656e-07, + "loss": 0.336, + "step": 46615 + }, + { + "epoch": 0.9331831944548707, + "grad_norm": 1.0764150619506836, + "learning_rate": 1.1657302601411902e-07, + "loss": 0.2874, + "step": 46616 + }, + { + "epoch": 0.9332032129719992, + "grad_norm": 1.0420587062835693, + "learning_rate": 1.1650344207784825e-07, + "loss": 0.2898, + "step": 46617 + }, + { + "epoch": 0.9332232314891274, + "grad_norm": 1.0473642349243164, + "learning_rate": 1.1643387867074585e-07, + "loss": 0.3062, + "step": 46618 + }, + { + "epoch": 0.9332432500062557, + "grad_norm": 1.095810890197754, + "learning_rate": 1.1636433579310491e-07, + "loss": 0.3002, + "step": 46619 + }, + { + "epoch": 0.9332632685233841, + "grad_norm": 1.1076604127883911, + "learning_rate": 1.1629481344521631e-07, + "loss": 0.315, + "step": 46620 + }, + { + "epoch": 0.9332832870405124, + "grad_norm": 1.2507718801498413, + "learning_rate": 1.1622531162737428e-07, + "loss": 0.2927, + "step": 46621 + }, + { + "epoch": 0.9333033055576408, + "grad_norm": 1.9246734380722046, + "learning_rate": 1.1615583033986966e-07, + "loss": 0.7589, + "step": 46622 + }, + { + "epoch": 0.9333233240747691, + "grad_norm": 1.0339611768722534, + "learning_rate": 1.1608636958299502e-07, + "loss": 0.2821, + "step": 46623 + }, + { + "epoch": 0.9333433425918976, + "grad_norm": 0.993481457233429, + "learning_rate": 1.1601692935704234e-07, + "loss": 0.2685, + "step": 46624 + }, + { + "epoch": 0.9333633611090258, + "grad_norm": 1.1566256284713745, + "learning_rate": 1.1594750966230306e-07, + "loss": 0.2959, + "step": 46625 + }, + { + "epoch": 0.9333833796261541, + "grad_norm": 1.1344164609909058, + "learning_rate": 1.1587811049906972e-07, + "loss": 0.3061, + "step": 46626 + }, + { + "epoch": 0.9334033981432825, + "grad_norm": 1.4095072746276855, + "learning_rate": 1.1580873186763375e-07, + "loss": 0.3422, + "step": 46627 + }, + { + "epoch": 0.9334234166604108, + "grad_norm": 0.9600489735603333, + "learning_rate": 1.1573937376828714e-07, + "loss": 0.2817, + "step": 46628 + }, + { + "epoch": 0.9334434351775392, + "grad_norm": 1.1506125926971436, + "learning_rate": 1.1567003620132022e-07, + "loss": 0.3433, + "step": 46629 + }, + { + "epoch": 0.9334634536946675, + "grad_norm": 1.2560672760009766, + "learning_rate": 1.1560071916702553e-07, + "loss": 0.3007, + "step": 46630 + }, + { + "epoch": 0.933483472211796, + "grad_norm": 0.9922316670417786, + "learning_rate": 1.1553142266569506e-07, + "loss": 0.2838, + "step": 46631 + }, + { + "epoch": 0.9335034907289242, + "grad_norm": 1.0743069648742676, + "learning_rate": 1.1546214669761912e-07, + "loss": 0.2569, + "step": 46632 + }, + { + "epoch": 0.9335235092460527, + "grad_norm": 1.045896053314209, + "learning_rate": 1.1539289126308806e-07, + "loss": 0.2675, + "step": 46633 + }, + { + "epoch": 0.933543527763181, + "grad_norm": 1.116499900817871, + "learning_rate": 1.153236563623955e-07, + "loss": 0.2532, + "step": 46634 + }, + { + "epoch": 0.9335635462803092, + "grad_norm": 1.1396443843841553, + "learning_rate": 1.1525444199583013e-07, + "loss": 0.269, + "step": 46635 + }, + { + "epoch": 0.9335835647974376, + "grad_norm": 1.034041166305542, + "learning_rate": 1.1518524816368392e-07, + "loss": 0.2975, + "step": 46636 + }, + { + "epoch": 0.9336035833145659, + "grad_norm": 1.087202548980713, + "learning_rate": 1.1511607486624831e-07, + "loss": 0.2721, + "step": 46637 + }, + { + "epoch": 0.9336236018316943, + "grad_norm": 1.0761014223098755, + "learning_rate": 1.1504692210381308e-07, + "loss": 0.2478, + "step": 46638 + }, + { + "epoch": 0.9336436203488226, + "grad_norm": 1.85226571559906, + "learning_rate": 1.1497778987666964e-07, + "loss": 0.7515, + "step": 46639 + }, + { + "epoch": 0.933663638865951, + "grad_norm": 1.0798434019088745, + "learning_rate": 1.1490867818510831e-07, + "loss": 0.2454, + "step": 46640 + }, + { + "epoch": 0.9336836573830793, + "grad_norm": 1.0302871465682983, + "learning_rate": 1.1483958702941944e-07, + "loss": 0.2921, + "step": 46641 + }, + { + "epoch": 0.9337036759002076, + "grad_norm": 2.0411360263824463, + "learning_rate": 1.1477051640989335e-07, + "loss": 0.6815, + "step": 46642 + }, + { + "epoch": 0.933723694417336, + "grad_norm": 2.1320626735687256, + "learning_rate": 1.1470146632682089e-07, + "loss": 0.6682, + "step": 46643 + }, + { + "epoch": 0.9337437129344643, + "grad_norm": 1.0919370651245117, + "learning_rate": 1.146324367804924e-07, + "loss": 0.313, + "step": 46644 + }, + { + "epoch": 0.9337637314515927, + "grad_norm": 1.119718074798584, + "learning_rate": 1.1456342777119767e-07, + "loss": 0.2704, + "step": 46645 + }, + { + "epoch": 0.933783749968721, + "grad_norm": 1.2767609357833862, + "learning_rate": 1.1449443929922699e-07, + "loss": 0.3525, + "step": 46646 + }, + { + "epoch": 0.9338037684858495, + "grad_norm": 1.231726050376892, + "learning_rate": 1.1442547136487014e-07, + "loss": 0.3506, + "step": 46647 + }, + { + "epoch": 0.9338237870029777, + "grad_norm": 0.9506151676177979, + "learning_rate": 1.1435652396841745e-07, + "loss": 0.2615, + "step": 46648 + }, + { + "epoch": 0.9338438055201062, + "grad_norm": 1.1758043766021729, + "learning_rate": 1.1428759711015813e-07, + "loss": 0.2722, + "step": 46649 + }, + { + "epoch": 0.9338638240372344, + "grad_norm": 1.016464114189148, + "learning_rate": 1.142186907903825e-07, + "loss": 0.2673, + "step": 46650 + }, + { + "epoch": 0.9338838425543627, + "grad_norm": 1.271959662437439, + "learning_rate": 1.1414980500937977e-07, + "loss": 0.32, + "step": 46651 + }, + { + "epoch": 0.9339038610714911, + "grad_norm": 1.0611463785171509, + "learning_rate": 1.1408093976744028e-07, + "loss": 0.2804, + "step": 46652 + }, + { + "epoch": 0.9339238795886194, + "grad_norm": 1.271989345550537, + "learning_rate": 1.1401209506485323e-07, + "loss": 0.2992, + "step": 46653 + }, + { + "epoch": 0.9339438981057478, + "grad_norm": 1.1015815734863281, + "learning_rate": 1.1394327090190782e-07, + "loss": 0.2972, + "step": 46654 + }, + { + "epoch": 0.9339639166228761, + "grad_norm": 1.137408971786499, + "learning_rate": 1.1387446727889274e-07, + "loss": 0.3046, + "step": 46655 + }, + { + "epoch": 0.9339839351400046, + "grad_norm": 0.9937083721160889, + "learning_rate": 1.1380568419609828e-07, + "loss": 0.253, + "step": 46656 + }, + { + "epoch": 0.9340039536571328, + "grad_norm": 1.0304309129714966, + "learning_rate": 1.1373692165381367e-07, + "loss": 0.294, + "step": 46657 + }, + { + "epoch": 0.9340239721742611, + "grad_norm": 1.183349847793579, + "learning_rate": 1.1366817965232702e-07, + "loss": 0.3052, + "step": 46658 + }, + { + "epoch": 0.9340439906913895, + "grad_norm": 1.0949780941009521, + "learning_rate": 1.1359945819192807e-07, + "loss": 0.2929, + "step": 46659 + }, + { + "epoch": 0.9340640092085178, + "grad_norm": 1.0291963815689087, + "learning_rate": 1.1353075727290497e-07, + "loss": 0.2783, + "step": 46660 + }, + { + "epoch": 0.9340840277256462, + "grad_norm": 1.1230636835098267, + "learning_rate": 1.1346207689554745e-07, + "loss": 0.2967, + "step": 46661 + }, + { + "epoch": 0.9341040462427745, + "grad_norm": 1.1836849451065063, + "learning_rate": 1.1339341706014306e-07, + "loss": 0.3611, + "step": 46662 + }, + { + "epoch": 0.934124064759903, + "grad_norm": 1.1398751735687256, + "learning_rate": 1.1332477776698214e-07, + "loss": 0.2808, + "step": 46663 + }, + { + "epoch": 0.9341440832770312, + "grad_norm": 1.123340129852295, + "learning_rate": 1.1325615901635112e-07, + "loss": 0.3178, + "step": 46664 + }, + { + "epoch": 0.9341641017941597, + "grad_norm": 2.057709217071533, + "learning_rate": 1.1318756080854032e-07, + "loss": 0.6918, + "step": 46665 + }, + { + "epoch": 0.934184120311288, + "grad_norm": 1.142225980758667, + "learning_rate": 1.1311898314383729e-07, + "loss": 0.285, + "step": 46666 + }, + { + "epoch": 0.9342041388284162, + "grad_norm": 1.1075439453125, + "learning_rate": 1.1305042602253069e-07, + "loss": 0.2789, + "step": 46667 + }, + { + "epoch": 0.9342241573455446, + "grad_norm": 1.893929123878479, + "learning_rate": 1.1298188944490807e-07, + "loss": 0.6803, + "step": 46668 + }, + { + "epoch": 0.9342441758626729, + "grad_norm": 1.1430939435958862, + "learning_rate": 1.1291337341125751e-07, + "loss": 0.3282, + "step": 46669 + }, + { + "epoch": 0.9342641943798013, + "grad_norm": 1.9709328413009644, + "learning_rate": 1.1284487792186772e-07, + "loss": 0.7324, + "step": 46670 + }, + { + "epoch": 0.9342842128969296, + "grad_norm": 1.213210940361023, + "learning_rate": 1.1277640297702674e-07, + "loss": 0.296, + "step": 46671 + }, + { + "epoch": 0.934304231414058, + "grad_norm": 1.862112045288086, + "learning_rate": 1.1270794857702161e-07, + "loss": 0.7083, + "step": 46672 + }, + { + "epoch": 0.9343242499311863, + "grad_norm": 1.2191824913024902, + "learning_rate": 1.126395147221404e-07, + "loss": 0.2615, + "step": 46673 + }, + { + "epoch": 0.9343442684483146, + "grad_norm": 1.1131792068481445, + "learning_rate": 1.1257110141267124e-07, + "loss": 0.3197, + "step": 46674 + }, + { + "epoch": 0.934364286965443, + "grad_norm": 1.086605191230774, + "learning_rate": 1.1250270864890056e-07, + "loss": 0.238, + "step": 46675 + }, + { + "epoch": 0.9343843054825713, + "grad_norm": 1.0274841785430908, + "learning_rate": 1.1243433643111756e-07, + "loss": 0.264, + "step": 46676 + }, + { + "epoch": 0.9344043239996997, + "grad_norm": 1.1053959131240845, + "learning_rate": 1.1236598475960813e-07, + "loss": 0.327, + "step": 46677 + }, + { + "epoch": 0.934424342516828, + "grad_norm": 1.0563806295394897, + "learning_rate": 1.1229765363466095e-07, + "loss": 0.3078, + "step": 46678 + }, + { + "epoch": 0.9344443610339564, + "grad_norm": 1.0583407878875732, + "learning_rate": 1.1222934305656242e-07, + "loss": 0.3118, + "step": 46679 + }, + { + "epoch": 0.9344643795510847, + "grad_norm": 1.1144720315933228, + "learning_rate": 1.121610530256001e-07, + "loss": 0.2974, + "step": 46680 + }, + { + "epoch": 0.9344843980682132, + "grad_norm": 1.0413310527801514, + "learning_rate": 1.1209278354206043e-07, + "loss": 0.3045, + "step": 46681 + }, + { + "epoch": 0.9345044165853414, + "grad_norm": 1.2139275074005127, + "learning_rate": 1.1202453460623097e-07, + "loss": 0.2954, + "step": 46682 + }, + { + "epoch": 0.9345244351024697, + "grad_norm": 1.7788598537445068, + "learning_rate": 1.1195630621839815e-07, + "loss": 0.7181, + "step": 46683 + }, + { + "epoch": 0.9345444536195981, + "grad_norm": 1.1826285123825073, + "learning_rate": 1.118880983788495e-07, + "loss": 0.2673, + "step": 46684 + }, + { + "epoch": 0.9345644721367264, + "grad_norm": 1.295570731163025, + "learning_rate": 1.1181991108787149e-07, + "loss": 0.2941, + "step": 46685 + }, + { + "epoch": 0.9345844906538548, + "grad_norm": 1.0426455736160278, + "learning_rate": 1.1175174434574997e-07, + "loss": 0.3279, + "step": 46686 + }, + { + "epoch": 0.9346045091709831, + "grad_norm": 1.0532002449035645, + "learning_rate": 1.1168359815277252e-07, + "loss": 0.2612, + "step": 46687 + }, + { + "epoch": 0.9346245276881116, + "grad_norm": 1.0946002006530762, + "learning_rate": 1.11615472509225e-07, + "loss": 0.2756, + "step": 46688 + }, + { + "epoch": 0.9346445462052398, + "grad_norm": 1.159805178642273, + "learning_rate": 1.115473674153944e-07, + "loss": 0.2767, + "step": 46689 + }, + { + "epoch": 0.9346645647223681, + "grad_norm": 1.3348824977874756, + "learning_rate": 1.1147928287156661e-07, + "loss": 0.3144, + "step": 46690 + }, + { + "epoch": 0.9346845832394965, + "grad_norm": 1.092900037765503, + "learning_rate": 1.1141121887802752e-07, + "loss": 0.2806, + "step": 46691 + }, + { + "epoch": 0.9347046017566248, + "grad_norm": 1.3097292184829712, + "learning_rate": 1.1134317543506412e-07, + "loss": 0.3043, + "step": 46692 + }, + { + "epoch": 0.9347246202737532, + "grad_norm": 1.2220656871795654, + "learning_rate": 1.1127515254296228e-07, + "loss": 0.321, + "step": 46693 + }, + { + "epoch": 0.9347446387908815, + "grad_norm": 1.102475643157959, + "learning_rate": 1.1120715020200734e-07, + "loss": 0.2579, + "step": 46694 + }, + { + "epoch": 0.93476465730801, + "grad_norm": 1.264849066734314, + "learning_rate": 1.1113916841248462e-07, + "loss": 0.2937, + "step": 46695 + }, + { + "epoch": 0.9347846758251382, + "grad_norm": 1.1707615852355957, + "learning_rate": 1.1107120717468167e-07, + "loss": 0.3091, + "step": 46696 + }, + { + "epoch": 0.9348046943422667, + "grad_norm": 1.2058769464492798, + "learning_rate": 1.1100326648888327e-07, + "loss": 0.2784, + "step": 46697 + }, + { + "epoch": 0.934824712859395, + "grad_norm": 1.3410942554473877, + "learning_rate": 1.1093534635537473e-07, + "loss": 0.2854, + "step": 46698 + }, + { + "epoch": 0.9348447313765232, + "grad_norm": 1.0034852027893066, + "learning_rate": 1.1086744677444139e-07, + "loss": 0.2824, + "step": 46699 + }, + { + "epoch": 0.9348647498936516, + "grad_norm": 1.1604586839675903, + "learning_rate": 1.1079956774636969e-07, + "loss": 0.2868, + "step": 46700 + }, + { + "epoch": 0.9348847684107799, + "grad_norm": 1.159403920173645, + "learning_rate": 1.1073170927144384e-07, + "loss": 0.2885, + "step": 46701 + }, + { + "epoch": 0.9349047869279083, + "grad_norm": 1.1390091180801392, + "learning_rate": 1.1066387134995027e-07, + "loss": 0.3138, + "step": 46702 + }, + { + "epoch": 0.9349248054450366, + "grad_norm": 1.0661238431930542, + "learning_rate": 1.1059605398217377e-07, + "loss": 0.3404, + "step": 46703 + }, + { + "epoch": 0.934944823962165, + "grad_norm": 1.1747441291809082, + "learning_rate": 1.1052825716839854e-07, + "loss": 0.3009, + "step": 46704 + }, + { + "epoch": 0.9349648424792933, + "grad_norm": 1.0249170064926147, + "learning_rate": 1.1046048090891104e-07, + "loss": 0.266, + "step": 46705 + }, + { + "epoch": 0.9349848609964216, + "grad_norm": 1.058831810951233, + "learning_rate": 1.1039272520399547e-07, + "loss": 0.2692, + "step": 46706 + }, + { + "epoch": 0.93500487951355, + "grad_norm": 1.840488076210022, + "learning_rate": 1.103249900539366e-07, + "loss": 0.7716, + "step": 46707 + }, + { + "epoch": 0.9350248980306783, + "grad_norm": 1.0355273485183716, + "learning_rate": 1.102572754590181e-07, + "loss": 0.3113, + "step": 46708 + }, + { + "epoch": 0.9350449165478067, + "grad_norm": 0.9769294261932373, + "learning_rate": 1.1018958141952696e-07, + "loss": 0.2928, + "step": 46709 + }, + { + "epoch": 0.935064935064935, + "grad_norm": 1.1464427709579468, + "learning_rate": 1.1012190793574629e-07, + "loss": 0.3337, + "step": 46710 + }, + { + "epoch": 0.9350849535820634, + "grad_norm": 1.2005354166030884, + "learning_rate": 1.1005425500796085e-07, + "loss": 0.3384, + "step": 46711 + }, + { + "epoch": 0.9351049720991917, + "grad_norm": 1.1315267086029053, + "learning_rate": 1.0998662263645432e-07, + "loss": 0.2862, + "step": 46712 + }, + { + "epoch": 0.9351249906163202, + "grad_norm": 1.0692369937896729, + "learning_rate": 1.0991901082151202e-07, + "loss": 0.3402, + "step": 46713 + }, + { + "epoch": 0.9351450091334484, + "grad_norm": 1.110763430595398, + "learning_rate": 1.098514195634176e-07, + "loss": 0.3241, + "step": 46714 + }, + { + "epoch": 0.9351650276505767, + "grad_norm": 1.0702896118164062, + "learning_rate": 1.0978384886245586e-07, + "loss": 0.3356, + "step": 46715 + }, + { + "epoch": 0.9351850461677051, + "grad_norm": 1.097727656364441, + "learning_rate": 1.0971629871891098e-07, + "loss": 0.2704, + "step": 46716 + }, + { + "epoch": 0.9352050646848334, + "grad_norm": 1.1654409170150757, + "learning_rate": 1.0964876913306499e-07, + "loss": 0.242, + "step": 46717 + }, + { + "epoch": 0.9352250832019618, + "grad_norm": 1.101261854171753, + "learning_rate": 1.0958126010520431e-07, + "loss": 0.3154, + "step": 46718 + }, + { + "epoch": 0.9352451017190901, + "grad_norm": 1.1009119749069214, + "learning_rate": 1.095137716356115e-07, + "loss": 0.2786, + "step": 46719 + }, + { + "epoch": 0.9352651202362186, + "grad_norm": 1.1269882917404175, + "learning_rate": 1.0944630372457021e-07, + "loss": 0.2936, + "step": 46720 + }, + { + "epoch": 0.9352851387533468, + "grad_norm": 1.0413579940795898, + "learning_rate": 1.0937885637236411e-07, + "loss": 0.2571, + "step": 46721 + }, + { + "epoch": 0.9353051572704751, + "grad_norm": 1.045711874961853, + "learning_rate": 1.0931142957927632e-07, + "loss": 0.2752, + "step": 46722 + }, + { + "epoch": 0.9353251757876035, + "grad_norm": 1.1119589805603027, + "learning_rate": 1.0924402334559159e-07, + "loss": 0.2839, + "step": 46723 + }, + { + "epoch": 0.9353451943047318, + "grad_norm": 1.2448323965072632, + "learning_rate": 1.0917663767159193e-07, + "loss": 0.2774, + "step": 46724 + }, + { + "epoch": 0.9353652128218602, + "grad_norm": 1.1279902458190918, + "learning_rate": 1.0910927255756098e-07, + "loss": 0.3146, + "step": 46725 + }, + { + "epoch": 0.9353852313389885, + "grad_norm": 1.1243529319763184, + "learning_rate": 1.0904192800378299e-07, + "loss": 0.2544, + "step": 46726 + }, + { + "epoch": 0.935405249856117, + "grad_norm": 1.911428689956665, + "learning_rate": 1.0897460401053938e-07, + "loss": 0.759, + "step": 46727 + }, + { + "epoch": 0.9354252683732452, + "grad_norm": 0.9407288432121277, + "learning_rate": 1.0890730057811439e-07, + "loss": 0.281, + "step": 46728 + }, + { + "epoch": 0.9354452868903737, + "grad_norm": 1.1359995603561401, + "learning_rate": 1.0884001770679053e-07, + "loss": 0.2814, + "step": 46729 + }, + { + "epoch": 0.935465305407502, + "grad_norm": 1.1771596670150757, + "learning_rate": 1.0877275539684984e-07, + "loss": 0.3189, + "step": 46730 + }, + { + "epoch": 0.9354853239246302, + "grad_norm": 1.0690423250198364, + "learning_rate": 1.0870551364857706e-07, + "loss": 0.2561, + "step": 46731 + }, + { + "epoch": 0.9355053424417586, + "grad_norm": 1.0640045404434204, + "learning_rate": 1.086382924622531e-07, + "loss": 0.3112, + "step": 46732 + }, + { + "epoch": 0.9355253609588869, + "grad_norm": 1.0761394500732422, + "learning_rate": 1.0857109183816161e-07, + "loss": 0.2542, + "step": 46733 + }, + { + "epoch": 0.9355453794760153, + "grad_norm": 1.0914756059646606, + "learning_rate": 1.0850391177658404e-07, + "loss": 0.3027, + "step": 46734 + }, + { + "epoch": 0.9355653979931436, + "grad_norm": 1.9228764772415161, + "learning_rate": 1.0843675227780348e-07, + "loss": 0.716, + "step": 46735 + }, + { + "epoch": 0.935585416510272, + "grad_norm": 1.0464322566986084, + "learning_rate": 1.083696133421025e-07, + "loss": 0.2779, + "step": 46736 + }, + { + "epoch": 0.9356054350274003, + "grad_norm": 1.1326029300689697, + "learning_rate": 1.0830249496976253e-07, + "loss": 0.305, + "step": 46737 + }, + { + "epoch": 0.9356254535445286, + "grad_norm": 1.1302528381347656, + "learning_rate": 1.0823539716106612e-07, + "loss": 0.3317, + "step": 46738 + }, + { + "epoch": 0.935645472061657, + "grad_norm": 1.0971447229385376, + "learning_rate": 1.0816831991629584e-07, + "loss": 0.3188, + "step": 46739 + }, + { + "epoch": 0.9356654905787853, + "grad_norm": 1.2036062479019165, + "learning_rate": 1.0810126323573256e-07, + "loss": 0.3157, + "step": 46740 + }, + { + "epoch": 0.9356855090959137, + "grad_norm": 1.1784007549285889, + "learning_rate": 1.0803422711965938e-07, + "loss": 0.3382, + "step": 46741 + }, + { + "epoch": 0.935705527613042, + "grad_norm": 1.082832932472229, + "learning_rate": 1.0796721156835777e-07, + "loss": 0.258, + "step": 46742 + }, + { + "epoch": 0.9357255461301704, + "grad_norm": 2.0234854221343994, + "learning_rate": 1.0790021658210858e-07, + "loss": 0.7004, + "step": 46743 + }, + { + "epoch": 0.9357455646472987, + "grad_norm": 2.0039730072021484, + "learning_rate": 1.078332421611944e-07, + "loss": 0.6989, + "step": 46744 + }, + { + "epoch": 0.9357655831644272, + "grad_norm": 0.9817933440208435, + "learning_rate": 1.0776628830589609e-07, + "loss": 0.2511, + "step": 46745 + }, + { + "epoch": 0.9357856016815554, + "grad_norm": 1.1135568618774414, + "learning_rate": 1.076993550164962e-07, + "loss": 0.2586, + "step": 46746 + }, + { + "epoch": 0.9358056201986837, + "grad_norm": 1.010529637336731, + "learning_rate": 1.0763244229327507e-07, + "loss": 0.2973, + "step": 46747 + }, + { + "epoch": 0.9358256387158121, + "grad_norm": 1.1040658950805664, + "learning_rate": 1.0756555013651415e-07, + "loss": 0.3139, + "step": 46748 + }, + { + "epoch": 0.9358456572329404, + "grad_norm": 1.0730605125427246, + "learning_rate": 1.0749867854649487e-07, + "loss": 0.2613, + "step": 46749 + }, + { + "epoch": 0.9358656757500688, + "grad_norm": 1.0260907411575317, + "learning_rate": 1.0743182752349813e-07, + "loss": 0.2863, + "step": 46750 + }, + { + "epoch": 0.9358856942671971, + "grad_norm": 1.168684482574463, + "learning_rate": 1.0736499706780534e-07, + "loss": 0.2788, + "step": 46751 + }, + { + "epoch": 0.9359057127843255, + "grad_norm": 1.107977271080017, + "learning_rate": 1.0729818717969686e-07, + "loss": 0.2518, + "step": 46752 + }, + { + "epoch": 0.9359257313014538, + "grad_norm": 1.1875325441360474, + "learning_rate": 1.0723139785945413e-07, + "loss": 0.3147, + "step": 46753 + }, + { + "epoch": 0.9359457498185821, + "grad_norm": 1.0091606378555298, + "learning_rate": 1.0716462910735748e-07, + "loss": 0.2975, + "step": 46754 + }, + { + "epoch": 0.9359657683357105, + "grad_norm": 1.1376348733901978, + "learning_rate": 1.0709788092368833e-07, + "loss": 0.2678, + "step": 46755 + }, + { + "epoch": 0.9359857868528388, + "grad_norm": 1.14530611038208, + "learning_rate": 1.0703115330872649e-07, + "loss": 0.2614, + "step": 46756 + }, + { + "epoch": 0.9360058053699672, + "grad_norm": 1.1391940116882324, + "learning_rate": 1.0696444626275226e-07, + "loss": 0.2815, + "step": 46757 + }, + { + "epoch": 0.9360258238870955, + "grad_norm": 1.0842751264572144, + "learning_rate": 1.0689775978604656e-07, + "loss": 0.274, + "step": 46758 + }, + { + "epoch": 0.936045842404224, + "grad_norm": 1.1552749872207642, + "learning_rate": 1.0683109387889023e-07, + "loss": 0.2554, + "step": 46759 + }, + { + "epoch": 0.9360658609213522, + "grad_norm": 1.0437275171279907, + "learning_rate": 1.0676444854156253e-07, + "loss": 0.276, + "step": 46760 + }, + { + "epoch": 0.9360858794384805, + "grad_norm": 1.1924563646316528, + "learning_rate": 1.066978237743438e-07, + "loss": 0.3277, + "step": 46761 + }, + { + "epoch": 0.936105897955609, + "grad_norm": 2.095844268798828, + "learning_rate": 1.0663121957751488e-07, + "loss": 0.8042, + "step": 46762 + }, + { + "epoch": 0.9361259164727372, + "grad_norm": 1.2581233978271484, + "learning_rate": 1.0656463595135558e-07, + "loss": 0.2797, + "step": 46763 + }, + { + "epoch": 0.9361459349898656, + "grad_norm": 1.136004090309143, + "learning_rate": 1.0649807289614455e-07, + "loss": 0.3065, + "step": 46764 + }, + { + "epoch": 0.9361659535069939, + "grad_norm": 1.1194528341293335, + "learning_rate": 1.0643153041216325e-07, + "loss": 0.2923, + "step": 46765 + }, + { + "epoch": 0.9361859720241223, + "grad_norm": 2.038884401321411, + "learning_rate": 1.0636500849968978e-07, + "loss": 0.7437, + "step": 46766 + }, + { + "epoch": 0.9362059905412506, + "grad_norm": 1.0197771787643433, + "learning_rate": 1.0629850715900558e-07, + "loss": 0.3053, + "step": 46767 + }, + { + "epoch": 0.936226009058379, + "grad_norm": 0.9683752059936523, + "learning_rate": 1.0623202639038932e-07, + "loss": 0.2271, + "step": 46768 + }, + { + "epoch": 0.9362460275755073, + "grad_norm": 1.1577281951904297, + "learning_rate": 1.0616556619412022e-07, + "loss": 0.3427, + "step": 46769 + }, + { + "epoch": 0.9362660460926356, + "grad_norm": 1.007233738899231, + "learning_rate": 1.060991265704775e-07, + "loss": 0.2428, + "step": 46770 + }, + { + "epoch": 0.936286064609764, + "grad_norm": 1.0914936065673828, + "learning_rate": 1.0603270751974148e-07, + "loss": 0.3036, + "step": 46771 + }, + { + "epoch": 0.9363060831268923, + "grad_norm": 1.236859679222107, + "learning_rate": 1.0596630904219085e-07, + "loss": 0.3158, + "step": 46772 + }, + { + "epoch": 0.9363261016440207, + "grad_norm": 1.2384958267211914, + "learning_rate": 1.0589993113810427e-07, + "loss": 0.3125, + "step": 46773 + }, + { + "epoch": 0.936346120161149, + "grad_norm": 1.08217453956604, + "learning_rate": 1.0583357380776149e-07, + "loss": 0.2879, + "step": 46774 + }, + { + "epoch": 0.9363661386782774, + "grad_norm": 1.0285615921020508, + "learning_rate": 1.0576723705144065e-07, + "loss": 0.3033, + "step": 46775 + }, + { + "epoch": 0.9363861571954057, + "grad_norm": 1.156620740890503, + "learning_rate": 1.0570092086942153e-07, + "loss": 0.3116, + "step": 46776 + }, + { + "epoch": 0.936406175712534, + "grad_norm": 1.796294927597046, + "learning_rate": 1.0563462526198165e-07, + "loss": 0.8139, + "step": 46777 + }, + { + "epoch": 0.9364261942296624, + "grad_norm": 1.0904806852340698, + "learning_rate": 1.0556835022940137e-07, + "loss": 0.3352, + "step": 46778 + }, + { + "epoch": 0.9364462127467907, + "grad_norm": 1.2709304094314575, + "learning_rate": 1.0550209577195769e-07, + "loss": 0.3611, + "step": 46779 + }, + { + "epoch": 0.9364662312639191, + "grad_norm": 1.0755454301834106, + "learning_rate": 1.0543586188993093e-07, + "loss": 0.2763, + "step": 46780 + }, + { + "epoch": 0.9364862497810474, + "grad_norm": 1.1035094261169434, + "learning_rate": 1.0536964858359811e-07, + "loss": 0.2818, + "step": 46781 + }, + { + "epoch": 0.9365062682981758, + "grad_norm": 1.1920500993728638, + "learning_rate": 1.0530345585323787e-07, + "loss": 0.2978, + "step": 46782 + }, + { + "epoch": 0.9365262868153041, + "grad_norm": 1.0981855392456055, + "learning_rate": 1.0523728369912833e-07, + "loss": 0.319, + "step": 46783 + }, + { + "epoch": 0.9365463053324325, + "grad_norm": 1.140101432800293, + "learning_rate": 1.0517113212154817e-07, + "loss": 0.298, + "step": 46784 + }, + { + "epoch": 0.9365663238495608, + "grad_norm": 1.1252750158309937, + "learning_rate": 1.0510500112077492e-07, + "loss": 0.3504, + "step": 46785 + }, + { + "epoch": 0.9365863423666891, + "grad_norm": 1.9244619607925415, + "learning_rate": 1.0503889069708729e-07, + "loss": 0.6753, + "step": 46786 + }, + { + "epoch": 0.9366063608838175, + "grad_norm": 1.0240391492843628, + "learning_rate": 1.0497280085076278e-07, + "loss": 0.2897, + "step": 46787 + }, + { + "epoch": 0.9366263794009458, + "grad_norm": 1.155544638633728, + "learning_rate": 1.0490673158207843e-07, + "loss": 0.3151, + "step": 46788 + }, + { + "epoch": 0.9366463979180742, + "grad_norm": 1.030817985534668, + "learning_rate": 1.0484068289131344e-07, + "loss": 0.2955, + "step": 46789 + }, + { + "epoch": 0.9366664164352025, + "grad_norm": 1.0617889165878296, + "learning_rate": 1.0477465477874426e-07, + "loss": 0.2688, + "step": 46790 + }, + { + "epoch": 0.936686434952331, + "grad_norm": 1.072761058807373, + "learning_rate": 1.0470864724464902e-07, + "loss": 0.3203, + "step": 46791 + }, + { + "epoch": 0.9367064534694592, + "grad_norm": 1.155699610710144, + "learning_rate": 1.0464266028930526e-07, + "loss": 0.2963, + "step": 46792 + }, + { + "epoch": 0.9367264719865875, + "grad_norm": 1.2480984926223755, + "learning_rate": 1.0457669391299052e-07, + "loss": 0.3389, + "step": 46793 + }, + { + "epoch": 0.936746490503716, + "grad_norm": 1.1739623546600342, + "learning_rate": 1.0451074811598183e-07, + "loss": 0.332, + "step": 46794 + }, + { + "epoch": 0.9367665090208442, + "grad_norm": 1.0687459707260132, + "learning_rate": 1.0444482289855673e-07, + "loss": 0.2697, + "step": 46795 + }, + { + "epoch": 0.9367865275379726, + "grad_norm": 1.2464691400527954, + "learning_rate": 1.0437891826099167e-07, + "loss": 0.2868, + "step": 46796 + }, + { + "epoch": 0.9368065460551009, + "grad_norm": 1.2314302921295166, + "learning_rate": 1.0431303420356365e-07, + "loss": 0.3617, + "step": 46797 + }, + { + "epoch": 0.9368265645722293, + "grad_norm": 1.154403567314148, + "learning_rate": 1.0424717072655077e-07, + "loss": 0.2871, + "step": 46798 + }, + { + "epoch": 0.9368465830893576, + "grad_norm": 1.1277861595153809, + "learning_rate": 1.0418132783022895e-07, + "loss": 0.26, + "step": 46799 + }, + { + "epoch": 0.936866601606486, + "grad_norm": 1.1413042545318604, + "learning_rate": 1.0411550551487515e-07, + "loss": 0.3179, + "step": 46800 + }, + { + "epoch": 0.9368866201236143, + "grad_norm": 1.2208315134048462, + "learning_rate": 1.0404970378076584e-07, + "loss": 0.2961, + "step": 46801 + }, + { + "epoch": 0.9369066386407426, + "grad_norm": 1.744120717048645, + "learning_rate": 1.0398392262817858e-07, + "loss": 0.7844, + "step": 46802 + }, + { + "epoch": 0.936926657157871, + "grad_norm": 1.164155125617981, + "learning_rate": 1.0391816205738869e-07, + "loss": 0.2924, + "step": 46803 + }, + { + "epoch": 0.9369466756749993, + "grad_norm": 1.0401034355163574, + "learning_rate": 1.0385242206867374e-07, + "loss": 0.2699, + "step": 46804 + }, + { + "epoch": 0.9369666941921277, + "grad_norm": 1.0659838914871216, + "learning_rate": 1.0378670266230961e-07, + "loss": 0.2656, + "step": 46805 + }, + { + "epoch": 0.936986712709256, + "grad_norm": 0.9628075957298279, + "learning_rate": 1.0372100383857165e-07, + "loss": 0.232, + "step": 46806 + }, + { + "epoch": 0.9370067312263844, + "grad_norm": 1.0415985584259033, + "learning_rate": 1.0365532559773794e-07, + "loss": 0.2589, + "step": 46807 + }, + { + "epoch": 0.9370267497435127, + "grad_norm": 1.1864981651306152, + "learning_rate": 1.0358966794008329e-07, + "loss": 0.3347, + "step": 46808 + }, + { + "epoch": 0.937046768260641, + "grad_norm": 1.2359274625778198, + "learning_rate": 1.0352403086588358e-07, + "loss": 0.2871, + "step": 46809 + }, + { + "epoch": 0.9370667867777694, + "grad_norm": 1.8824087381362915, + "learning_rate": 1.0345841437541526e-07, + "loss": 0.7036, + "step": 46810 + }, + { + "epoch": 0.9370868052948977, + "grad_norm": 1.7949429750442505, + "learning_rate": 1.0339281846895366e-07, + "loss": 0.7467, + "step": 46811 + }, + { + "epoch": 0.9371068238120261, + "grad_norm": 1.7843618392944336, + "learning_rate": 1.0332724314677578e-07, + "loss": 0.7842, + "step": 46812 + }, + { + "epoch": 0.9371268423291544, + "grad_norm": 1.8968347311019897, + "learning_rate": 1.0326168840915585e-07, + "loss": 0.7658, + "step": 46813 + }, + { + "epoch": 0.9371468608462828, + "grad_norm": 0.9899919629096985, + "learning_rate": 1.031961542563703e-07, + "loss": 0.2677, + "step": 46814 + }, + { + "epoch": 0.9371668793634111, + "grad_norm": 1.391788363456726, + "learning_rate": 1.0313064068869339e-07, + "loss": 0.2544, + "step": 46815 + }, + { + "epoch": 0.9371868978805395, + "grad_norm": 1.1598546504974365, + "learning_rate": 1.0306514770640209e-07, + "loss": 0.2768, + "step": 46816 + }, + { + "epoch": 0.9372069163976678, + "grad_norm": 1.1070077419281006, + "learning_rate": 1.0299967530977118e-07, + "loss": 0.3103, + "step": 46817 + }, + { + "epoch": 0.9372269349147961, + "grad_norm": 0.9961767196655273, + "learning_rate": 1.02934223499076e-07, + "loss": 0.2698, + "step": 46818 + }, + { + "epoch": 0.9372469534319245, + "grad_norm": 1.2387866973876953, + "learning_rate": 1.0286879227459079e-07, + "loss": 0.2729, + "step": 46819 + }, + { + "epoch": 0.9372669719490528, + "grad_norm": 1.2399036884307861, + "learning_rate": 1.0280338163659143e-07, + "loss": 0.3288, + "step": 46820 + }, + { + "epoch": 0.9372869904661812, + "grad_norm": 1.1912864446640015, + "learning_rate": 1.0273799158535325e-07, + "loss": 0.2912, + "step": 46821 + }, + { + "epoch": 0.9373070089833095, + "grad_norm": 1.1058611869812012, + "learning_rate": 1.0267262212115047e-07, + "loss": 0.2767, + "step": 46822 + }, + { + "epoch": 0.937327027500438, + "grad_norm": 2.0439341068267822, + "learning_rate": 1.026072732442579e-07, + "loss": 0.7641, + "step": 46823 + }, + { + "epoch": 0.9373470460175662, + "grad_norm": 1.8377735614776611, + "learning_rate": 1.0254194495495085e-07, + "loss": 0.7041, + "step": 46824 + }, + { + "epoch": 0.9373670645346945, + "grad_norm": 1.1762639284133911, + "learning_rate": 1.0247663725350299e-07, + "loss": 0.2879, + "step": 46825 + }, + { + "epoch": 0.937387083051823, + "grad_norm": 1.05818772315979, + "learning_rate": 1.0241135014019021e-07, + "loss": 0.2938, + "step": 46826 + }, + { + "epoch": 0.9374071015689512, + "grad_norm": 1.1273077726364136, + "learning_rate": 1.0234608361528564e-07, + "loss": 0.2894, + "step": 46827 + }, + { + "epoch": 0.9374271200860796, + "grad_norm": 1.110885500907898, + "learning_rate": 1.022808376790635e-07, + "loss": 0.3213, + "step": 46828 + }, + { + "epoch": 0.9374471386032079, + "grad_norm": 1.101097822189331, + "learning_rate": 1.0221561233179911e-07, + "loss": 0.2985, + "step": 46829 + }, + { + "epoch": 0.9374671571203363, + "grad_norm": 1.1239715814590454, + "learning_rate": 1.021504075737667e-07, + "loss": 0.2778, + "step": 46830 + }, + { + "epoch": 0.9374871756374646, + "grad_norm": 1.1039572954177856, + "learning_rate": 1.0208522340523941e-07, + "loss": 0.3181, + "step": 46831 + }, + { + "epoch": 0.937507194154593, + "grad_norm": 1.0235265493392944, + "learning_rate": 1.0202005982649199e-07, + "loss": 0.2606, + "step": 46832 + }, + { + "epoch": 0.9375272126717213, + "grad_norm": 0.988786518573761, + "learning_rate": 1.0195491683779812e-07, + "loss": 0.2578, + "step": 46833 + }, + { + "epoch": 0.9375472311888496, + "grad_norm": 1.825206995010376, + "learning_rate": 1.0188979443943204e-07, + "loss": 0.7385, + "step": 46834 + }, + { + "epoch": 0.937567249705978, + "grad_norm": 1.094564437866211, + "learning_rate": 1.0182469263166739e-07, + "loss": 0.2997, + "step": 46835 + }, + { + "epoch": 0.9375872682231063, + "grad_norm": 1.0375818014144897, + "learning_rate": 1.017596114147773e-07, + "loss": 0.2793, + "step": 46836 + }, + { + "epoch": 0.9376072867402347, + "grad_norm": 1.041731357574463, + "learning_rate": 1.0169455078903546e-07, + "loss": 0.258, + "step": 46837 + }, + { + "epoch": 0.937627305257363, + "grad_norm": 1.0128674507141113, + "learning_rate": 1.016295107547155e-07, + "loss": 0.2991, + "step": 46838 + }, + { + "epoch": 0.9376473237744914, + "grad_norm": 1.0148371458053589, + "learning_rate": 1.0156449131209167e-07, + "loss": 0.2613, + "step": 46839 + }, + { + "epoch": 0.9376673422916197, + "grad_norm": 1.1807199716567993, + "learning_rate": 1.0149949246143598e-07, + "loss": 0.2913, + "step": 46840 + }, + { + "epoch": 0.937687360808748, + "grad_norm": 1.2081358432769775, + "learning_rate": 1.0143451420302208e-07, + "loss": 0.3035, + "step": 46841 + }, + { + "epoch": 0.9377073793258764, + "grad_norm": 1.1459733247756958, + "learning_rate": 1.013695565371231e-07, + "loss": 0.3164, + "step": 46842 + }, + { + "epoch": 0.9377273978430047, + "grad_norm": 1.0740920305252075, + "learning_rate": 1.0130461946401326e-07, + "loss": 0.2621, + "step": 46843 + }, + { + "epoch": 0.9377474163601331, + "grad_norm": 1.348753571510315, + "learning_rate": 1.0123970298396402e-07, + "loss": 0.3915, + "step": 46844 + }, + { + "epoch": 0.9377674348772614, + "grad_norm": 1.164341688156128, + "learning_rate": 1.0117480709724847e-07, + "loss": 0.3077, + "step": 46845 + }, + { + "epoch": 0.9377874533943898, + "grad_norm": 1.2285428047180176, + "learning_rate": 1.0110993180414086e-07, + "loss": 0.319, + "step": 46846 + }, + { + "epoch": 0.9378074719115181, + "grad_norm": 1.23320734500885, + "learning_rate": 1.0104507710491208e-07, + "loss": 0.3185, + "step": 46847 + }, + { + "epoch": 0.9378274904286465, + "grad_norm": 1.1573976278305054, + "learning_rate": 1.0098024299983577e-07, + "loss": 0.3745, + "step": 46848 + }, + { + "epoch": 0.9378475089457748, + "grad_norm": 1.102415680885315, + "learning_rate": 1.0091542948918454e-07, + "loss": 0.2954, + "step": 46849 + }, + { + "epoch": 0.9378675274629031, + "grad_norm": 1.2541710138320923, + "learning_rate": 1.008506365732298e-07, + "loss": 0.2888, + "step": 46850 + }, + { + "epoch": 0.9378875459800315, + "grad_norm": 1.1855849027633667, + "learning_rate": 1.0078586425224523e-07, + "loss": 0.303, + "step": 46851 + }, + { + "epoch": 0.9379075644971598, + "grad_norm": 1.1340556144714355, + "learning_rate": 1.007211125265023e-07, + "loss": 0.3371, + "step": 46852 + }, + { + "epoch": 0.9379275830142882, + "grad_norm": 1.128849983215332, + "learning_rate": 1.006563813962741e-07, + "loss": 0.2464, + "step": 46853 + }, + { + "epoch": 0.9379476015314165, + "grad_norm": 1.196178674697876, + "learning_rate": 1.0059167086183097e-07, + "loss": 0.3033, + "step": 46854 + }, + { + "epoch": 0.937967620048545, + "grad_norm": 1.0566526651382446, + "learning_rate": 1.005269809234466e-07, + "loss": 0.2812, + "step": 46855 + }, + { + "epoch": 0.9379876385656732, + "grad_norm": 1.0464155673980713, + "learning_rate": 1.0046231158139241e-07, + "loss": 0.2758, + "step": 46856 + }, + { + "epoch": 0.9380076570828015, + "grad_norm": 1.0823227167129517, + "learning_rate": 1.0039766283594043e-07, + "loss": 0.3079, + "step": 46857 + }, + { + "epoch": 0.9380276755999299, + "grad_norm": 1.218798279762268, + "learning_rate": 1.0033303468736266e-07, + "loss": 0.3185, + "step": 46858 + }, + { + "epoch": 0.9380476941170582, + "grad_norm": 1.0487040281295776, + "learning_rate": 1.0026842713592943e-07, + "loss": 0.2828, + "step": 46859 + }, + { + "epoch": 0.9380677126341866, + "grad_norm": 1.1067336797714233, + "learning_rate": 1.0020384018191387e-07, + "loss": 0.2805, + "step": 46860 + }, + { + "epoch": 0.9380877311513149, + "grad_norm": 1.1046130657196045, + "learning_rate": 1.0013927382558741e-07, + "loss": 0.2952, + "step": 46861 + }, + { + "epoch": 0.9381077496684433, + "grad_norm": 1.0844519138336182, + "learning_rate": 1.0007472806722041e-07, + "loss": 0.2727, + "step": 46862 + }, + { + "epoch": 0.9381277681855716, + "grad_norm": 1.0951939821243286, + "learning_rate": 1.000102029070843e-07, + "loss": 0.3409, + "step": 46863 + }, + { + "epoch": 0.9381477867027, + "grad_norm": 1.1184852123260498, + "learning_rate": 9.994569834545164e-08, + "loss": 0.2631, + "step": 46864 + }, + { + "epoch": 0.9381678052198283, + "grad_norm": 1.1662335395812988, + "learning_rate": 9.988121438259279e-08, + "loss": 0.271, + "step": 46865 + }, + { + "epoch": 0.9381878237369566, + "grad_norm": 1.1995078325271606, + "learning_rate": 9.981675101877864e-08, + "loss": 0.2473, + "step": 46866 + }, + { + "epoch": 0.938207842254085, + "grad_norm": 1.0140563249588013, + "learning_rate": 9.97523082542795e-08, + "loss": 0.3144, + "step": 46867 + }, + { + "epoch": 0.9382278607712133, + "grad_norm": 1.1667160987854004, + "learning_rate": 9.968788608936741e-08, + "loss": 0.2905, + "step": 46868 + }, + { + "epoch": 0.9382478792883417, + "grad_norm": 1.1301851272583008, + "learning_rate": 9.962348452431325e-08, + "loss": 0.3048, + "step": 46869 + }, + { + "epoch": 0.93826789780547, + "grad_norm": 1.1910200119018555, + "learning_rate": 9.955910355938792e-08, + "loss": 0.3058, + "step": 46870 + }, + { + "epoch": 0.9382879163225984, + "grad_norm": 1.2228642702102661, + "learning_rate": 9.949474319486119e-08, + "loss": 0.2632, + "step": 46871 + }, + { + "epoch": 0.9383079348397267, + "grad_norm": 1.267590045928955, + "learning_rate": 9.943040343100341e-08, + "loss": 0.2814, + "step": 46872 + }, + { + "epoch": 0.938327953356855, + "grad_norm": 1.3238264322280884, + "learning_rate": 9.936608426808603e-08, + "loss": 0.3318, + "step": 46873 + }, + { + "epoch": 0.9383479718739834, + "grad_norm": 1.9804483652114868, + "learning_rate": 9.930178570637882e-08, + "loss": 0.7897, + "step": 46874 + }, + { + "epoch": 0.9383679903911117, + "grad_norm": 1.8985965251922607, + "learning_rate": 9.92375077461527e-08, + "loss": 0.7074, + "step": 46875 + }, + { + "epoch": 0.9383880089082401, + "grad_norm": 1.8720036745071411, + "learning_rate": 9.917325038767689e-08, + "loss": 0.6982, + "step": 46876 + }, + { + "epoch": 0.9384080274253684, + "grad_norm": 1.1926194429397583, + "learning_rate": 9.910901363122228e-08, + "loss": 0.2856, + "step": 46877 + }, + { + "epoch": 0.9384280459424968, + "grad_norm": 1.1273303031921387, + "learning_rate": 9.904479747705865e-08, + "loss": 0.3324, + "step": 46878 + }, + { + "epoch": 0.9384480644596251, + "grad_norm": 1.249806523323059, + "learning_rate": 9.898060192545633e-08, + "loss": 0.3576, + "step": 46879 + }, + { + "epoch": 0.9384680829767535, + "grad_norm": 2.0756030082702637, + "learning_rate": 9.891642697668403e-08, + "loss": 0.783, + "step": 46880 + }, + { + "epoch": 0.9384881014938818, + "grad_norm": 1.2187148332595825, + "learning_rate": 9.88522726310126e-08, + "loss": 0.2855, + "step": 46881 + }, + { + "epoch": 0.9385081200110101, + "grad_norm": 1.1477246284484863, + "learning_rate": 9.878813888871186e-08, + "loss": 0.2568, + "step": 46882 + }, + { + "epoch": 0.9385281385281385, + "grad_norm": 1.1113988161087036, + "learning_rate": 9.872402575005103e-08, + "loss": 0.2967, + "step": 46883 + }, + { + "epoch": 0.9385481570452668, + "grad_norm": 1.0771219730377197, + "learning_rate": 9.865993321529987e-08, + "loss": 0.2593, + "step": 46884 + }, + { + "epoch": 0.9385681755623952, + "grad_norm": 1.0338817834854126, + "learning_rate": 9.859586128472708e-08, + "loss": 0.2708, + "step": 46885 + }, + { + "epoch": 0.9385881940795235, + "grad_norm": 1.0934839248657227, + "learning_rate": 9.853180995860245e-08, + "loss": 0.3171, + "step": 46886 + }, + { + "epoch": 0.938608212596652, + "grad_norm": 1.228009819984436, + "learning_rate": 9.846777923719575e-08, + "loss": 0.2451, + "step": 46887 + }, + { + "epoch": 0.9386282311137802, + "grad_norm": 1.1939687728881836, + "learning_rate": 9.840376912077564e-08, + "loss": 0.2698, + "step": 46888 + }, + { + "epoch": 0.9386482496309085, + "grad_norm": 1.4042690992355347, + "learning_rate": 9.833977960961083e-08, + "loss": 0.2782, + "step": 46889 + }, + { + "epoch": 0.9386682681480369, + "grad_norm": 1.197139024734497, + "learning_rate": 9.827581070397052e-08, + "loss": 0.2895, + "step": 46890 + }, + { + "epoch": 0.9386882866651652, + "grad_norm": 1.1468251943588257, + "learning_rate": 9.82118624041245e-08, + "loss": 0.2826, + "step": 46891 + }, + { + "epoch": 0.9387083051822936, + "grad_norm": 1.0415077209472656, + "learning_rate": 9.81479347103409e-08, + "loss": 0.2943, + "step": 46892 + }, + { + "epoch": 0.9387283236994219, + "grad_norm": 1.619124174118042, + "learning_rate": 9.808402762288782e-08, + "loss": 0.2909, + "step": 46893 + }, + { + "epoch": 0.9387483422165503, + "grad_norm": 1.1484096050262451, + "learning_rate": 9.802014114203506e-08, + "loss": 0.3023, + "step": 46894 + }, + { + "epoch": 0.9387683607336786, + "grad_norm": 1.1192678213119507, + "learning_rate": 9.795627526805074e-08, + "loss": 0.2604, + "step": 46895 + }, + { + "epoch": 0.938788379250807, + "grad_norm": 1.2131901979446411, + "learning_rate": 9.789243000120351e-08, + "loss": 0.3046, + "step": 46896 + }, + { + "epoch": 0.9388083977679353, + "grad_norm": 1.1118946075439453, + "learning_rate": 9.782860534176098e-08, + "loss": 0.2735, + "step": 46897 + }, + { + "epoch": 0.9388284162850636, + "grad_norm": 1.1865769624710083, + "learning_rate": 9.776480128999288e-08, + "loss": 0.3091, + "step": 46898 + }, + { + "epoch": 0.938848434802192, + "grad_norm": 1.2474948167800903, + "learning_rate": 9.77010178461657e-08, + "loss": 0.318, + "step": 46899 + }, + { + "epoch": 0.9388684533193203, + "grad_norm": 1.0726003646850586, + "learning_rate": 9.763725501054866e-08, + "loss": 0.3159, + "step": 46900 + }, + { + "epoch": 0.9388884718364487, + "grad_norm": 1.1762232780456543, + "learning_rate": 9.757351278340987e-08, + "loss": 0.3281, + "step": 46901 + }, + { + "epoch": 0.938908490353577, + "grad_norm": 1.0532786846160889, + "learning_rate": 9.750979116501691e-08, + "loss": 0.2751, + "step": 46902 + }, + { + "epoch": 0.9389285088707054, + "grad_norm": 1.0929995775222778, + "learning_rate": 9.744609015563789e-08, + "loss": 0.2716, + "step": 46903 + }, + { + "epoch": 0.9389485273878337, + "grad_norm": 1.169217586517334, + "learning_rate": 9.738240975554036e-08, + "loss": 0.3023, + "step": 46904 + }, + { + "epoch": 0.938968545904962, + "grad_norm": 1.069730520248413, + "learning_rate": 9.731874996499191e-08, + "loss": 0.2778, + "step": 46905 + }, + { + "epoch": 0.9389885644220904, + "grad_norm": 1.0883172750473022, + "learning_rate": 9.725511078426064e-08, + "loss": 0.3221, + "step": 46906 + }, + { + "epoch": 0.9390085829392187, + "grad_norm": 1.2017709016799927, + "learning_rate": 9.719149221361301e-08, + "loss": 0.3485, + "step": 46907 + }, + { + "epoch": 0.9390286014563471, + "grad_norm": 1.8801028728485107, + "learning_rate": 9.712789425331826e-08, + "loss": 0.7885, + "step": 46908 + }, + { + "epoch": 0.9390486199734754, + "grad_norm": 0.9760286808013916, + "learning_rate": 9.706431690364282e-08, + "loss": 0.2433, + "step": 46909 + }, + { + "epoch": 0.9390686384906038, + "grad_norm": 1.0372955799102783, + "learning_rate": 9.700076016485316e-08, + "loss": 0.2963, + "step": 46910 + }, + { + "epoch": 0.9390886570077321, + "grad_norm": 1.053061842918396, + "learning_rate": 9.693722403721795e-08, + "loss": 0.2407, + "step": 46911 + }, + { + "epoch": 0.9391086755248605, + "grad_norm": 1.0590616464614868, + "learning_rate": 9.687370852100253e-08, + "loss": 0.2911, + "step": 46912 + }, + { + "epoch": 0.9391286940419888, + "grad_norm": 1.1222001314163208, + "learning_rate": 9.681021361647558e-08, + "loss": 0.2868, + "step": 46913 + }, + { + "epoch": 0.9391487125591171, + "grad_norm": 1.1137681007385254, + "learning_rate": 9.6746739323903e-08, + "loss": 0.2549, + "step": 46914 + }, + { + "epoch": 0.9391687310762455, + "grad_norm": 1.301655650138855, + "learning_rate": 9.668328564355233e-08, + "loss": 0.3471, + "step": 46915 + }, + { + "epoch": 0.9391887495933738, + "grad_norm": 1.0123084783554077, + "learning_rate": 9.661985257568951e-08, + "loss": 0.2617, + "step": 46916 + }, + { + "epoch": 0.9392087681105022, + "grad_norm": 1.0614793300628662, + "learning_rate": 9.655644012058152e-08, + "loss": 0.2831, + "step": 46917 + }, + { + "epoch": 0.9392287866276305, + "grad_norm": 1.056126356124878, + "learning_rate": 9.649304827849537e-08, + "loss": 0.2993, + "step": 46918 + }, + { + "epoch": 0.939248805144759, + "grad_norm": 2.084754228591919, + "learning_rate": 9.642967704969696e-08, + "loss": 0.7558, + "step": 46919 + }, + { + "epoch": 0.9392688236618872, + "grad_norm": 1.0617595911026, + "learning_rate": 9.636632643445332e-08, + "loss": 0.33, + "step": 46920 + }, + { + "epoch": 0.9392888421790155, + "grad_norm": 1.1111682653427124, + "learning_rate": 9.630299643302976e-08, + "loss": 0.3017, + "step": 46921 + }, + { + "epoch": 0.9393088606961439, + "grad_norm": 1.7745704650878906, + "learning_rate": 9.623968704569331e-08, + "loss": 0.7369, + "step": 46922 + }, + { + "epoch": 0.9393288792132722, + "grad_norm": 1.8160992860794067, + "learning_rate": 9.617639827271041e-08, + "loss": 0.7596, + "step": 46923 + }, + { + "epoch": 0.9393488977304006, + "grad_norm": 1.1690595149993896, + "learning_rate": 9.611313011434642e-08, + "loss": 0.2697, + "step": 46924 + }, + { + "epoch": 0.9393689162475289, + "grad_norm": 1.3272188901901245, + "learning_rate": 9.604988257086667e-08, + "loss": 0.2967, + "step": 46925 + }, + { + "epoch": 0.9393889347646573, + "grad_norm": 1.0976749658584595, + "learning_rate": 9.598665564253872e-08, + "loss": 0.2592, + "step": 46926 + }, + { + "epoch": 0.9394089532817856, + "grad_norm": 1.2320109605789185, + "learning_rate": 9.592344932962738e-08, + "loss": 0.2935, + "step": 46927 + }, + { + "epoch": 0.939428971798914, + "grad_norm": 1.1749446392059326, + "learning_rate": 9.586026363239853e-08, + "loss": 0.2709, + "step": 46928 + }, + { + "epoch": 0.9394489903160423, + "grad_norm": 1.0964765548706055, + "learning_rate": 9.579709855111808e-08, + "loss": 0.2929, + "step": 46929 + }, + { + "epoch": 0.9394690088331706, + "grad_norm": 1.1668421030044556, + "learning_rate": 9.573395408605024e-08, + "loss": 0.2683, + "step": 46930 + }, + { + "epoch": 0.939489027350299, + "grad_norm": 1.0221565961837769, + "learning_rate": 9.567083023746204e-08, + "loss": 0.2548, + "step": 46931 + }, + { + "epoch": 0.9395090458674273, + "grad_norm": 1.7452882528305054, + "learning_rate": 9.560772700561827e-08, + "loss": 0.7706, + "step": 46932 + }, + { + "epoch": 0.9395290643845557, + "grad_norm": 1.0551671981811523, + "learning_rate": 9.554464439078425e-08, + "loss": 0.2751, + "step": 46933 + }, + { + "epoch": 0.939549082901684, + "grad_norm": 1.075756311416626, + "learning_rate": 9.54815823932248e-08, + "loss": 0.2574, + "step": 46934 + }, + { + "epoch": 0.9395691014188124, + "grad_norm": 1.3487805128097534, + "learning_rate": 9.541854101320525e-08, + "loss": 0.2937, + "step": 46935 + }, + { + "epoch": 0.9395891199359407, + "grad_norm": 1.3685234785079956, + "learning_rate": 9.535552025099093e-08, + "loss": 0.3161, + "step": 46936 + }, + { + "epoch": 0.939609138453069, + "grad_norm": 1.1091969013214111, + "learning_rate": 9.529252010684664e-08, + "loss": 0.3307, + "step": 46937 + }, + { + "epoch": 0.9396291569701974, + "grad_norm": 1.1685653924942017, + "learning_rate": 9.522954058103662e-08, + "loss": 0.3256, + "step": 46938 + }, + { + "epoch": 0.9396491754873257, + "grad_norm": 1.0250129699707031, + "learning_rate": 9.516658167382675e-08, + "loss": 0.2903, + "step": 46939 + }, + { + "epoch": 0.9396691940044541, + "grad_norm": 1.9719984531402588, + "learning_rate": 9.510364338548073e-08, + "loss": 0.7752, + "step": 46940 + }, + { + "epoch": 0.9396892125215824, + "grad_norm": 1.1700526475906372, + "learning_rate": 9.504072571626388e-08, + "loss": 0.2868, + "step": 46941 + }, + { + "epoch": 0.9397092310387108, + "grad_norm": 1.3175987005233765, + "learning_rate": 9.49778286664399e-08, + "loss": 0.2987, + "step": 46942 + }, + { + "epoch": 0.9397292495558391, + "grad_norm": 1.2935930490493774, + "learning_rate": 9.491495223627301e-08, + "loss": 0.3232, + "step": 46943 + }, + { + "epoch": 0.9397492680729675, + "grad_norm": 2.0530455112457275, + "learning_rate": 9.485209642602855e-08, + "loss": 0.7655, + "step": 46944 + }, + { + "epoch": 0.9397692865900958, + "grad_norm": 1.0818549394607544, + "learning_rate": 9.478926123597021e-08, + "loss": 0.295, + "step": 46945 + }, + { + "epoch": 0.9397893051072241, + "grad_norm": 1.8653275966644287, + "learning_rate": 9.472644666636221e-08, + "loss": 0.753, + "step": 46946 + }, + { + "epoch": 0.9398093236243525, + "grad_norm": 1.254841923713684, + "learning_rate": 9.466365271746825e-08, + "loss": 0.2933, + "step": 46947 + }, + { + "epoch": 0.9398293421414808, + "grad_norm": 1.4133630990982056, + "learning_rate": 9.460087938955309e-08, + "loss": 0.3299, + "step": 46948 + }, + { + "epoch": 0.9398493606586092, + "grad_norm": 0.995145857334137, + "learning_rate": 9.453812668288043e-08, + "loss": 0.3145, + "step": 46949 + }, + { + "epoch": 0.9398693791757375, + "grad_norm": 1.118930697441101, + "learning_rate": 9.447539459771338e-08, + "loss": 0.3023, + "step": 46950 + }, + { + "epoch": 0.939889397692866, + "grad_norm": 1.855201244354248, + "learning_rate": 9.441268313431673e-08, + "loss": 0.7221, + "step": 46951 + }, + { + "epoch": 0.9399094162099942, + "grad_norm": 1.2727024555206299, + "learning_rate": 9.434999229295249e-08, + "loss": 0.2988, + "step": 46952 + }, + { + "epoch": 0.9399294347271225, + "grad_norm": 1.1710419654846191, + "learning_rate": 9.428732207388603e-08, + "loss": 0.284, + "step": 46953 + }, + { + "epoch": 0.9399494532442509, + "grad_norm": 1.0984519720077515, + "learning_rate": 9.422467247737932e-08, + "loss": 0.3377, + "step": 46954 + }, + { + "epoch": 0.9399694717613792, + "grad_norm": 1.0703221559524536, + "learning_rate": 9.416204350369662e-08, + "loss": 0.2674, + "step": 46955 + }, + { + "epoch": 0.9399894902785076, + "grad_norm": 1.079702377319336, + "learning_rate": 9.409943515310105e-08, + "loss": 0.2782, + "step": 46956 + }, + { + "epoch": 0.9400095087956359, + "grad_norm": 0.997610330581665, + "learning_rate": 9.403684742585572e-08, + "loss": 0.2804, + "step": 46957 + }, + { + "epoch": 0.9400295273127643, + "grad_norm": 1.0577837228775024, + "learning_rate": 9.397428032222322e-08, + "loss": 0.2807, + "step": 46958 + }, + { + "epoch": 0.9400495458298926, + "grad_norm": 1.0606952905654907, + "learning_rate": 9.391173384246721e-08, + "loss": 0.2717, + "step": 46959 + }, + { + "epoch": 0.940069564347021, + "grad_norm": 1.0606130361557007, + "learning_rate": 9.384920798685081e-08, + "loss": 0.2757, + "step": 46960 + }, + { + "epoch": 0.9400895828641493, + "grad_norm": 1.0752012729644775, + "learning_rate": 9.378670275563606e-08, + "loss": 0.2763, + "step": 46961 + }, + { + "epoch": 0.9401096013812776, + "grad_norm": 1.061901330947876, + "learning_rate": 9.372421814908661e-08, + "loss": 0.2763, + "step": 46962 + }, + { + "epoch": 0.940129619898406, + "grad_norm": 1.1207468509674072, + "learning_rate": 9.366175416746503e-08, + "loss": 0.2746, + "step": 46963 + }, + { + "epoch": 0.9401496384155343, + "grad_norm": 1.0347223281860352, + "learning_rate": 9.359931081103279e-08, + "loss": 0.2873, + "step": 46964 + }, + { + "epoch": 0.9401696569326627, + "grad_norm": 1.076094388961792, + "learning_rate": 9.353688808005356e-08, + "loss": 0.2923, + "step": 46965 + }, + { + "epoch": 0.940189675449791, + "grad_norm": 1.0805320739746094, + "learning_rate": 9.347448597478936e-08, + "loss": 0.2956, + "step": 46966 + }, + { + "epoch": 0.9402096939669194, + "grad_norm": 1.1104557514190674, + "learning_rate": 9.341210449550276e-08, + "loss": 0.3119, + "step": 46967 + }, + { + "epoch": 0.9402297124840477, + "grad_norm": 1.1043815612792969, + "learning_rate": 9.334974364245574e-08, + "loss": 0.2813, + "step": 46968 + }, + { + "epoch": 0.940249731001176, + "grad_norm": 1.1358115673065186, + "learning_rate": 9.32874034159098e-08, + "loss": 0.2854, + "step": 46969 + }, + { + "epoch": 0.9402697495183044, + "grad_norm": 1.11507248878479, + "learning_rate": 9.322508381612805e-08, + "loss": 0.3031, + "step": 46970 + }, + { + "epoch": 0.9402897680354327, + "grad_norm": 1.2061890363693237, + "learning_rate": 9.316278484337193e-08, + "loss": 0.2866, + "step": 46971 + }, + { + "epoch": 0.9403097865525611, + "grad_norm": 1.1263238191604614, + "learning_rate": 9.310050649790403e-08, + "loss": 0.2723, + "step": 46972 + }, + { + "epoch": 0.9403298050696894, + "grad_norm": 1.0825046300888062, + "learning_rate": 9.303824877998524e-08, + "loss": 0.2591, + "step": 46973 + }, + { + "epoch": 0.9403498235868178, + "grad_norm": 1.09467613697052, + "learning_rate": 9.297601168987758e-08, + "loss": 0.2817, + "step": 46974 + }, + { + "epoch": 0.9403698421039461, + "grad_norm": 1.054560661315918, + "learning_rate": 9.291379522784305e-08, + "loss": 0.2697, + "step": 46975 + }, + { + "epoch": 0.9403898606210745, + "grad_norm": 1.0665284395217896, + "learning_rate": 9.285159939414257e-08, + "loss": 0.2836, + "step": 46976 + }, + { + "epoch": 0.9404098791382028, + "grad_norm": 1.1301798820495605, + "learning_rate": 9.27894241890387e-08, + "loss": 0.3027, + "step": 46977 + }, + { + "epoch": 0.9404298976553311, + "grad_norm": 1.0776662826538086, + "learning_rate": 9.272726961279122e-08, + "loss": 0.3379, + "step": 46978 + }, + { + "epoch": 0.9404499161724595, + "grad_norm": 1.1715980768203735, + "learning_rate": 9.266513566566271e-08, + "loss": 0.2977, + "step": 46979 + }, + { + "epoch": 0.9404699346895878, + "grad_norm": 1.1497563123703003, + "learning_rate": 9.260302234791351e-08, + "loss": 0.2963, + "step": 46980 + }, + { + "epoch": 0.9404899532067162, + "grad_norm": 1.1191819906234741, + "learning_rate": 9.254092965980566e-08, + "loss": 0.3137, + "step": 46981 + }, + { + "epoch": 0.9405099717238445, + "grad_norm": 1.0595688819885254, + "learning_rate": 9.247885760159892e-08, + "loss": 0.2749, + "step": 46982 + }, + { + "epoch": 0.940529990240973, + "grad_norm": 1.1059659719467163, + "learning_rate": 9.24168061735553e-08, + "loss": 0.3163, + "step": 46983 + }, + { + "epoch": 0.9405500087581012, + "grad_norm": 1.0477848052978516, + "learning_rate": 9.235477537593518e-08, + "loss": 0.2411, + "step": 46984 + }, + { + "epoch": 0.9405700272752295, + "grad_norm": 1.0051219463348389, + "learning_rate": 9.229276520899999e-08, + "loss": 0.2896, + "step": 46985 + }, + { + "epoch": 0.9405900457923579, + "grad_norm": 1.0635597705841064, + "learning_rate": 9.223077567300953e-08, + "loss": 0.2819, + "step": 46986 + }, + { + "epoch": 0.9406100643094862, + "grad_norm": 1.1185085773468018, + "learning_rate": 9.216880676822415e-08, + "loss": 0.3202, + "step": 46987 + }, + { + "epoch": 0.9406300828266146, + "grad_norm": 1.8306981325149536, + "learning_rate": 9.210685849490586e-08, + "loss": 0.785, + "step": 46988 + }, + { + "epoch": 0.9406501013437429, + "grad_norm": 1.2567973136901855, + "learning_rate": 9.20449308533139e-08, + "loss": 0.2697, + "step": 46989 + }, + { + "epoch": 0.9406701198608713, + "grad_norm": 1.7746928930282593, + "learning_rate": 9.198302384370861e-08, + "loss": 0.7882, + "step": 46990 + }, + { + "epoch": 0.9406901383779996, + "grad_norm": 1.1527498960494995, + "learning_rate": 9.192113746635033e-08, + "loss": 0.325, + "step": 46991 + }, + { + "epoch": 0.940710156895128, + "grad_norm": 1.201133370399475, + "learning_rate": 9.185927172149944e-08, + "loss": 0.3136, + "step": 46992 + }, + { + "epoch": 0.9407301754122563, + "grad_norm": 1.1511508226394653, + "learning_rate": 9.179742660941626e-08, + "loss": 0.2874, + "step": 46993 + }, + { + "epoch": 0.9407501939293846, + "grad_norm": 0.9970879554748535, + "learning_rate": 9.173560213036059e-08, + "loss": 0.2495, + "step": 46994 + }, + { + "epoch": 0.940770212446513, + "grad_norm": 1.0988025665283203, + "learning_rate": 9.167379828459166e-08, + "loss": 0.2856, + "step": 46995 + }, + { + "epoch": 0.9407902309636413, + "grad_norm": 1.0996296405792236, + "learning_rate": 9.161201507236983e-08, + "loss": 0.2153, + "step": 46996 + }, + { + "epoch": 0.9408102494807697, + "grad_norm": 1.0491324663162231, + "learning_rate": 9.155025249395488e-08, + "loss": 0.2649, + "step": 46997 + }, + { + "epoch": 0.940830267997898, + "grad_norm": 1.9467543363571167, + "learning_rate": 9.148851054960605e-08, + "loss": 0.7548, + "step": 46998 + }, + { + "epoch": 0.9408502865150264, + "grad_norm": 1.300904393196106, + "learning_rate": 9.14267892395837e-08, + "loss": 0.3049, + "step": 46999 + }, + { + "epoch": 0.9408703050321547, + "grad_norm": 1.0094870328903198, + "learning_rate": 9.13650885641465e-08, + "loss": 0.2635, + "step": 47000 + }, + { + "epoch": 0.940890323549283, + "grad_norm": 1.3505303859710693, + "learning_rate": 9.130340852355479e-08, + "loss": 0.3506, + "step": 47001 + }, + { + "epoch": 0.9409103420664114, + "grad_norm": 1.0664393901824951, + "learning_rate": 9.124174911806671e-08, + "loss": 0.2835, + "step": 47002 + }, + { + "epoch": 0.9409303605835397, + "grad_norm": 1.1217957735061646, + "learning_rate": 9.118011034794205e-08, + "loss": 0.2953, + "step": 47003 + }, + { + "epoch": 0.9409503791006681, + "grad_norm": 1.1523699760437012, + "learning_rate": 9.111849221343949e-08, + "loss": 0.2707, + "step": 47004 + }, + { + "epoch": 0.9409703976177964, + "grad_norm": 1.1071434020996094, + "learning_rate": 9.105689471481827e-08, + "loss": 0.2856, + "step": 47005 + }, + { + "epoch": 0.9409904161349248, + "grad_norm": 1.1288083791732788, + "learning_rate": 9.099531785233817e-08, + "loss": 0.2999, + "step": 47006 + }, + { + "epoch": 0.9410104346520531, + "grad_norm": 1.07986581325531, + "learning_rate": 9.093376162625678e-08, + "loss": 0.29, + "step": 47007 + }, + { + "epoch": 0.9410304531691815, + "grad_norm": 1.1381769180297852, + "learning_rate": 9.087222603683333e-08, + "loss": 0.2603, + "step": 47008 + }, + { + "epoch": 0.9410504716863098, + "grad_norm": 1.188348412513733, + "learning_rate": 9.081071108432704e-08, + "loss": 0.3285, + "step": 47009 + }, + { + "epoch": 0.9410704902034381, + "grad_norm": 1.2571989297866821, + "learning_rate": 9.07492167689955e-08, + "loss": 0.2901, + "step": 47010 + }, + { + "epoch": 0.9410905087205665, + "grad_norm": 1.168286681175232, + "learning_rate": 9.068774309109796e-08, + "loss": 0.3082, + "step": 47011 + }, + { + "epoch": 0.9411105272376948, + "grad_norm": 1.1044435501098633, + "learning_rate": 9.062629005089307e-08, + "loss": 0.2775, + "step": 47012 + }, + { + "epoch": 0.9411305457548232, + "grad_norm": 1.0707247257232666, + "learning_rate": 9.056485764863843e-08, + "loss": 0.3002, + "step": 47013 + }, + { + "epoch": 0.9411505642719515, + "grad_norm": 1.1334105730056763, + "learning_rate": 9.050344588459215e-08, + "loss": 0.3021, + "step": 47014 + }, + { + "epoch": 0.94117058278908, + "grad_norm": 1.6496942043304443, + "learning_rate": 9.044205475901346e-08, + "loss": 0.289, + "step": 47015 + }, + { + "epoch": 0.9411906013062082, + "grad_norm": 1.1290274858474731, + "learning_rate": 9.038068427215996e-08, + "loss": 0.2851, + "step": 47016 + }, + { + "epoch": 0.9412106198233365, + "grad_norm": 1.1111558675765991, + "learning_rate": 9.031933442428919e-08, + "loss": 0.2837, + "step": 47017 + }, + { + "epoch": 0.9412306383404649, + "grad_norm": 1.0576144456863403, + "learning_rate": 9.02580052156593e-08, + "loss": 0.3179, + "step": 47018 + }, + { + "epoch": 0.9412506568575932, + "grad_norm": 1.102301836013794, + "learning_rate": 9.019669664652841e-08, + "loss": 0.3158, + "step": 47019 + }, + { + "epoch": 0.9412706753747216, + "grad_norm": 1.107846736907959, + "learning_rate": 9.01354087171541e-08, + "loss": 0.2854, + "step": 47020 + }, + { + "epoch": 0.9412906938918499, + "grad_norm": 1.0189288854599, + "learning_rate": 9.007414142779337e-08, + "loss": 0.3037, + "step": 47021 + }, + { + "epoch": 0.9413107124089783, + "grad_norm": 1.0905020236968994, + "learning_rate": 9.001289477870433e-08, + "loss": 0.2711, + "step": 47022 + }, + { + "epoch": 0.9413307309261066, + "grad_norm": 1.8859587907791138, + "learning_rate": 8.99516687701446e-08, + "loss": 0.7095, + "step": 47023 + }, + { + "epoch": 0.941350749443235, + "grad_norm": 0.9705358743667603, + "learning_rate": 8.989046340237173e-08, + "loss": 0.2699, + "step": 47024 + }, + { + "epoch": 0.9413707679603633, + "grad_norm": 1.0915255546569824, + "learning_rate": 8.982927867564273e-08, + "loss": 0.3032, + "step": 47025 + }, + { + "epoch": 0.9413907864774916, + "grad_norm": 0.9895134568214417, + "learning_rate": 8.976811459021461e-08, + "loss": 0.2653, + "step": 47026 + }, + { + "epoch": 0.94141080499462, + "grad_norm": 1.1475120782852173, + "learning_rate": 8.970697114634441e-08, + "loss": 0.3386, + "step": 47027 + }, + { + "epoch": 0.9414308235117483, + "grad_norm": 1.2917430400848389, + "learning_rate": 8.964584834428968e-08, + "loss": 0.2612, + "step": 47028 + }, + { + "epoch": 0.9414508420288767, + "grad_norm": 1.1994785070419312, + "learning_rate": 8.95847461843069e-08, + "loss": 0.3019, + "step": 47029 + }, + { + "epoch": 0.941470860546005, + "grad_norm": 1.104344367980957, + "learning_rate": 8.952366466665307e-08, + "loss": 0.2805, + "step": 47030 + }, + { + "epoch": 0.9414908790631334, + "grad_norm": 1.0980570316314697, + "learning_rate": 8.946260379158466e-08, + "loss": 0.2172, + "step": 47031 + }, + { + "epoch": 0.9415108975802617, + "grad_norm": 1.0852386951446533, + "learning_rate": 8.940156355935925e-08, + "loss": 0.2708, + "step": 47032 + }, + { + "epoch": 0.94153091609739, + "grad_norm": 1.8608421087265015, + "learning_rate": 8.934054397023328e-08, + "loss": 0.7591, + "step": 47033 + }, + { + "epoch": 0.9415509346145184, + "grad_norm": 1.1096514463424683, + "learning_rate": 8.927954502446212e-08, + "loss": 0.314, + "step": 47034 + }, + { + "epoch": 0.9415709531316467, + "grad_norm": 1.0834345817565918, + "learning_rate": 8.921856672230334e-08, + "loss": 0.275, + "step": 47035 + }, + { + "epoch": 0.9415909716487751, + "grad_norm": 1.9815845489501953, + "learning_rate": 8.915760906401227e-08, + "loss": 0.7495, + "step": 47036 + }, + { + "epoch": 0.9416109901659034, + "grad_norm": 1.0887629985809326, + "learning_rate": 8.909667204984651e-08, + "loss": 0.2919, + "step": 47037 + }, + { + "epoch": 0.9416310086830318, + "grad_norm": 1.2619155645370483, + "learning_rate": 8.90357556800614e-08, + "loss": 0.2769, + "step": 47038 + }, + { + "epoch": 0.9416510272001601, + "grad_norm": 1.1501655578613281, + "learning_rate": 8.897485995491284e-08, + "loss": 0.3001, + "step": 47039 + }, + { + "epoch": 0.9416710457172885, + "grad_norm": 1.2752318382263184, + "learning_rate": 8.89139848746573e-08, + "loss": 0.3183, + "step": 47040 + }, + { + "epoch": 0.9416910642344168, + "grad_norm": 1.2203682661056519, + "learning_rate": 8.885313043955068e-08, + "loss": 0.3465, + "step": 47041 + }, + { + "epoch": 0.9417110827515451, + "grad_norm": 1.0661311149597168, + "learning_rate": 8.879229664984834e-08, + "loss": 0.3202, + "step": 47042 + }, + { + "epoch": 0.9417311012686735, + "grad_norm": 1.1821961402893066, + "learning_rate": 8.873148350580618e-08, + "loss": 0.309, + "step": 47043 + }, + { + "epoch": 0.9417511197858018, + "grad_norm": 1.1442126035690308, + "learning_rate": 8.867069100768066e-08, + "loss": 0.3069, + "step": 47044 + }, + { + "epoch": 0.9417711383029302, + "grad_norm": 1.2541940212249756, + "learning_rate": 8.860991915572548e-08, + "loss": 0.2787, + "step": 47045 + }, + { + "epoch": 0.9417911568200585, + "grad_norm": 1.1505019664764404, + "learning_rate": 8.854916795019819e-08, + "loss": 0.3352, + "step": 47046 + }, + { + "epoch": 0.941811175337187, + "grad_norm": 1.0240980386734009, + "learning_rate": 8.848843739135249e-08, + "loss": 0.3054, + "step": 47047 + }, + { + "epoch": 0.9418311938543152, + "grad_norm": 1.8593254089355469, + "learning_rate": 8.842772747944483e-08, + "loss": 0.7603, + "step": 47048 + }, + { + "epoch": 0.9418512123714435, + "grad_norm": 1.06184720993042, + "learning_rate": 8.836703821473003e-08, + "loss": 0.2897, + "step": 47049 + }, + { + "epoch": 0.9418712308885719, + "grad_norm": 1.2410838603973389, + "learning_rate": 8.830636959746285e-08, + "loss": 0.336, + "step": 47050 + }, + { + "epoch": 0.9418912494057002, + "grad_norm": 1.2363367080688477, + "learning_rate": 8.824572162789924e-08, + "loss": 0.3505, + "step": 47051 + }, + { + "epoch": 0.9419112679228286, + "grad_norm": 1.0355573892593384, + "learning_rate": 8.81850943062934e-08, + "loss": 0.2926, + "step": 47052 + }, + { + "epoch": 0.9419312864399569, + "grad_norm": 1.0878078937530518, + "learning_rate": 8.812448763290015e-08, + "loss": 0.3439, + "step": 47053 + }, + { + "epoch": 0.9419513049570853, + "grad_norm": 0.9935939908027649, + "learning_rate": 8.806390160797485e-08, + "loss": 0.255, + "step": 47054 + }, + { + "epoch": 0.9419713234742136, + "grad_norm": 1.0897948741912842, + "learning_rate": 8.800333623177226e-08, + "loss": 0.3084, + "step": 47055 + }, + { + "epoch": 0.941991341991342, + "grad_norm": 1.1253050565719604, + "learning_rate": 8.79427915045461e-08, + "loss": 0.322, + "step": 47056 + }, + { + "epoch": 0.9420113605084703, + "grad_norm": 1.1389132738113403, + "learning_rate": 8.788226742655115e-08, + "loss": 0.2877, + "step": 47057 + }, + { + "epoch": 0.9420313790255986, + "grad_norm": 1.0759867429733276, + "learning_rate": 8.782176399804221e-08, + "loss": 0.2859, + "step": 47058 + }, + { + "epoch": 0.942051397542727, + "grad_norm": 1.1553951501846313, + "learning_rate": 8.776128121927352e-08, + "loss": 0.3061, + "step": 47059 + }, + { + "epoch": 0.9420714160598553, + "grad_norm": 1.5794068574905396, + "learning_rate": 8.770081909049876e-08, + "loss": 0.2833, + "step": 47060 + }, + { + "epoch": 0.9420914345769837, + "grad_norm": 2.030622959136963, + "learning_rate": 8.76403776119733e-08, + "loss": 0.7669, + "step": 47061 + }, + { + "epoch": 0.942111453094112, + "grad_norm": 1.0715745687484741, + "learning_rate": 8.75799567839497e-08, + "loss": 0.3043, + "step": 47062 + }, + { + "epoch": 0.9421314716112404, + "grad_norm": 1.0773768424987793, + "learning_rate": 8.751955660668388e-08, + "loss": 0.2771, + "step": 47063 + }, + { + "epoch": 0.9421514901283687, + "grad_norm": 1.0609546899795532, + "learning_rate": 8.74591770804284e-08, + "loss": 0.27, + "step": 47064 + }, + { + "epoch": 0.942171508645497, + "grad_norm": 1.1089234352111816, + "learning_rate": 8.739881820543694e-08, + "loss": 0.329, + "step": 47065 + }, + { + "epoch": 0.9421915271626254, + "grad_norm": 1.1260044574737549, + "learning_rate": 8.733847998196376e-08, + "loss": 0.3021, + "step": 47066 + }, + { + "epoch": 0.9422115456797537, + "grad_norm": 1.161374568939209, + "learning_rate": 8.727816241026254e-08, + "loss": 0.2865, + "step": 47067 + }, + { + "epoch": 0.9422315641968821, + "grad_norm": 1.2099896669387817, + "learning_rate": 8.721786549058642e-08, + "loss": 0.276, + "step": 47068 + }, + { + "epoch": 0.9422515827140104, + "grad_norm": 2.0956039428710938, + "learning_rate": 8.715758922318962e-08, + "loss": 0.7536, + "step": 47069 + }, + { + "epoch": 0.9422716012311388, + "grad_norm": 1.2528181076049805, + "learning_rate": 8.709733360832474e-08, + "loss": 0.297, + "step": 47070 + }, + { + "epoch": 0.9422916197482671, + "grad_norm": 1.0455495119094849, + "learning_rate": 8.703709864624543e-08, + "loss": 0.3066, + "step": 47071 + }, + { + "epoch": 0.9423116382653955, + "grad_norm": 1.048363208770752, + "learning_rate": 8.697688433720486e-08, + "loss": 0.3025, + "step": 47072 + }, + { + "epoch": 0.9423316567825238, + "grad_norm": 1.0945316553115845, + "learning_rate": 8.691669068145614e-08, + "loss": 0.2542, + "step": 47073 + }, + { + "epoch": 0.9423516752996521, + "grad_norm": 1.1528983116149902, + "learning_rate": 8.68565176792524e-08, + "loss": 0.3179, + "step": 47074 + }, + { + "epoch": 0.9423716938167805, + "grad_norm": 1.8632127046585083, + "learning_rate": 8.679636533084679e-08, + "loss": 0.7337, + "step": 47075 + }, + { + "epoch": 0.9423917123339088, + "grad_norm": 1.1818292140960693, + "learning_rate": 8.67362336364913e-08, + "loss": 0.3047, + "step": 47076 + }, + { + "epoch": 0.9424117308510372, + "grad_norm": 1.0707958936691284, + "learning_rate": 8.66761225964402e-08, + "loss": 0.264, + "step": 47077 + }, + { + "epoch": 0.9424317493681655, + "grad_norm": 1.1797233819961548, + "learning_rate": 8.661603221094495e-08, + "loss": 0.2937, + "step": 47078 + }, + { + "epoch": 0.942451767885294, + "grad_norm": 1.3385318517684937, + "learning_rate": 8.65559624802581e-08, + "loss": 0.2927, + "step": 47079 + }, + { + "epoch": 0.9424717864024222, + "grad_norm": 1.0705268383026123, + "learning_rate": 8.649591340463281e-08, + "loss": 0.3242, + "step": 47080 + }, + { + "epoch": 0.9424918049195505, + "grad_norm": 1.0410056114196777, + "learning_rate": 8.643588498432165e-08, + "loss": 0.2905, + "step": 47081 + }, + { + "epoch": 0.9425118234366789, + "grad_norm": 1.1284537315368652, + "learning_rate": 8.637587721957664e-08, + "loss": 0.2536, + "step": 47082 + }, + { + "epoch": 0.9425318419538072, + "grad_norm": 1.2788374423980713, + "learning_rate": 8.631589011064978e-08, + "loss": 0.3266, + "step": 47083 + }, + { + "epoch": 0.9425518604709356, + "grad_norm": 1.1362724304199219, + "learning_rate": 8.625592365779312e-08, + "loss": 0.3086, + "step": 47084 + }, + { + "epoch": 0.9425718789880639, + "grad_norm": 1.2079030275344849, + "learning_rate": 8.619597786125977e-08, + "loss": 0.2968, + "step": 47085 + }, + { + "epoch": 0.9425918975051923, + "grad_norm": 1.2519006729125977, + "learning_rate": 8.613605272130066e-08, + "loss": 0.3258, + "step": 47086 + }, + { + "epoch": 0.9426119160223206, + "grad_norm": 2.044551134109497, + "learning_rate": 8.607614823816779e-08, + "loss": 0.725, + "step": 47087 + }, + { + "epoch": 0.942631934539449, + "grad_norm": 1.0854462385177612, + "learning_rate": 8.60162644121143e-08, + "loss": 0.2917, + "step": 47088 + }, + { + "epoch": 0.9426519530565773, + "grad_norm": 1.2133426666259766, + "learning_rate": 8.595640124339e-08, + "loss": 0.3464, + "step": 47089 + }, + { + "epoch": 0.9426719715737056, + "grad_norm": 1.0882468223571777, + "learning_rate": 8.589655873224801e-08, + "loss": 0.3082, + "step": 47090 + }, + { + "epoch": 0.942691990090834, + "grad_norm": 1.1735897064208984, + "learning_rate": 8.583673687893923e-08, + "loss": 0.2664, + "step": 47091 + }, + { + "epoch": 0.9427120086079623, + "grad_norm": 1.8268687725067139, + "learning_rate": 8.577693568371515e-08, + "loss": 0.7352, + "step": 47092 + }, + { + "epoch": 0.9427320271250907, + "grad_norm": 1.0396829843521118, + "learning_rate": 8.571715514682721e-08, + "loss": 0.28, + "step": 47093 + }, + { + "epoch": 0.942752045642219, + "grad_norm": 1.0345158576965332, + "learning_rate": 8.56573952685269e-08, + "loss": 0.2754, + "step": 47094 + }, + { + "epoch": 0.9427720641593474, + "grad_norm": 1.1957324743270874, + "learning_rate": 8.559765604906567e-08, + "loss": 0.2921, + "step": 47095 + }, + { + "epoch": 0.9427920826764757, + "grad_norm": 1.8931900262832642, + "learning_rate": 8.553793748869388e-08, + "loss": 0.7699, + "step": 47096 + }, + { + "epoch": 0.942812101193604, + "grad_norm": 1.1822986602783203, + "learning_rate": 8.547823958766299e-08, + "loss": 0.3218, + "step": 47097 + }, + { + "epoch": 0.9428321197107324, + "grad_norm": 1.07866632938385, + "learning_rate": 8.541856234622391e-08, + "loss": 0.2782, + "step": 47098 + }, + { + "epoch": 0.9428521382278607, + "grad_norm": 1.003787875175476, + "learning_rate": 8.535890576462758e-08, + "loss": 0.3111, + "step": 47099 + }, + { + "epoch": 0.9428721567449891, + "grad_norm": 1.255661129951477, + "learning_rate": 8.529926984312542e-08, + "loss": 0.2825, + "step": 47100 + }, + { + "epoch": 0.9428921752621174, + "grad_norm": 1.0212684869766235, + "learning_rate": 8.523965458196671e-08, + "loss": 0.2237, + "step": 47101 + }, + { + "epoch": 0.9429121937792458, + "grad_norm": 1.0496876239776611, + "learning_rate": 8.518005998140288e-08, + "loss": 0.2865, + "step": 47102 + }, + { + "epoch": 0.9429322122963741, + "grad_norm": 1.089223027229309, + "learning_rate": 8.512048604168487e-08, + "loss": 0.2902, + "step": 47103 + }, + { + "epoch": 0.9429522308135024, + "grad_norm": 1.0862797498703003, + "learning_rate": 8.506093276306248e-08, + "loss": 0.2533, + "step": 47104 + }, + { + "epoch": 0.9429722493306308, + "grad_norm": 1.1099038124084473, + "learning_rate": 8.500140014578606e-08, + "loss": 0.2541, + "step": 47105 + }, + { + "epoch": 0.9429922678477591, + "grad_norm": 1.0490916967391968, + "learning_rate": 8.494188819010651e-08, + "loss": 0.2779, + "step": 47106 + }, + { + "epoch": 0.9430122863648875, + "grad_norm": 1.925284504890442, + "learning_rate": 8.488239689627253e-08, + "loss": 0.7692, + "step": 47107 + }, + { + "epoch": 0.9430323048820158, + "grad_norm": 1.1554954051971436, + "learning_rate": 8.482292626453615e-08, + "loss": 0.3031, + "step": 47108 + }, + { + "epoch": 0.9430523233991442, + "grad_norm": 1.0507827997207642, + "learning_rate": 8.476347629514603e-08, + "loss": 0.2946, + "step": 47109 + }, + { + "epoch": 0.9430723419162725, + "grad_norm": 1.0384423732757568, + "learning_rate": 8.4704046988352e-08, + "loss": 0.29, + "step": 47110 + }, + { + "epoch": 0.943092360433401, + "grad_norm": 1.1410473585128784, + "learning_rate": 8.46446383444055e-08, + "loss": 0.301, + "step": 47111 + }, + { + "epoch": 0.9431123789505292, + "grad_norm": 1.0255165100097656, + "learning_rate": 8.458525036355414e-08, + "loss": 0.2591, + "step": 47112 + }, + { + "epoch": 0.9431323974676575, + "grad_norm": 1.2161401510238647, + "learning_rate": 8.452588304604881e-08, + "loss": 0.3116, + "step": 47113 + }, + { + "epoch": 0.9431524159847859, + "grad_norm": 1.1912178993225098, + "learning_rate": 8.446653639213931e-08, + "loss": 0.3668, + "step": 47114 + }, + { + "epoch": 0.9431724345019142, + "grad_norm": 1.2636842727661133, + "learning_rate": 8.440721040207434e-08, + "loss": 0.2951, + "step": 47115 + }, + { + "epoch": 0.9431924530190426, + "grad_norm": 1.0276448726654053, + "learning_rate": 8.434790507610368e-08, + "loss": 0.2643, + "step": 47116 + }, + { + "epoch": 0.9432124715361709, + "grad_norm": 1.1130836009979248, + "learning_rate": 8.428862041447661e-08, + "loss": 0.3291, + "step": 47117 + }, + { + "epoch": 0.9432324900532993, + "grad_norm": 1.181265115737915, + "learning_rate": 8.422935641744235e-08, + "loss": 0.3059, + "step": 47118 + }, + { + "epoch": 0.9432525085704276, + "grad_norm": 1.0669552087783813, + "learning_rate": 8.417011308524958e-08, + "loss": 0.2452, + "step": 47119 + }, + { + "epoch": 0.9432725270875559, + "grad_norm": 1.2462908029556274, + "learning_rate": 8.411089041814813e-08, + "loss": 0.3185, + "step": 47120 + }, + { + "epoch": 0.9432925456046843, + "grad_norm": 1.1766846179962158, + "learning_rate": 8.405168841638666e-08, + "loss": 0.326, + "step": 47121 + }, + { + "epoch": 0.9433125641218126, + "grad_norm": 1.0645594596862793, + "learning_rate": 8.399250708021389e-08, + "loss": 0.3024, + "step": 47122 + }, + { + "epoch": 0.943332582638941, + "grad_norm": 1.1334550380706787, + "learning_rate": 8.393334640987793e-08, + "loss": 0.3117, + "step": 47123 + }, + { + "epoch": 0.9433526011560693, + "grad_norm": 1.166847586631775, + "learning_rate": 8.387420640562916e-08, + "loss": 0.2787, + "step": 47124 + }, + { + "epoch": 0.9433726196731977, + "grad_norm": 1.081578254699707, + "learning_rate": 8.381508706771458e-08, + "loss": 0.2999, + "step": 47125 + }, + { + "epoch": 0.943392638190326, + "grad_norm": 1.2486814260482788, + "learning_rate": 8.375598839638399e-08, + "loss": 0.2958, + "step": 47126 + }, + { + "epoch": 0.9434126567074544, + "grad_norm": 1.111689567565918, + "learning_rate": 8.369691039188554e-08, + "loss": 0.2832, + "step": 47127 + }, + { + "epoch": 0.9434326752245827, + "grad_norm": 1.0724360942840576, + "learning_rate": 8.36378530544668e-08, + "loss": 0.2876, + "step": 47128 + }, + { + "epoch": 0.943452693741711, + "grad_norm": 1.083793044090271, + "learning_rate": 8.357881638437593e-08, + "loss": 0.2524, + "step": 47129 + }, + { + "epoch": 0.9434727122588394, + "grad_norm": 0.9609453678131104, + "learning_rate": 8.351980038186269e-08, + "loss": 0.3026, + "step": 47130 + }, + { + "epoch": 0.9434927307759677, + "grad_norm": 1.9066245555877686, + "learning_rate": 8.346080504717357e-08, + "loss": 0.7353, + "step": 47131 + }, + { + "epoch": 0.9435127492930961, + "grad_norm": 1.1414777040481567, + "learning_rate": 8.340183038055782e-08, + "loss": 0.2763, + "step": 47132 + }, + { + "epoch": 0.9435327678102244, + "grad_norm": 1.0714237689971924, + "learning_rate": 8.33428763822619e-08, + "loss": 0.2935, + "step": 47133 + }, + { + "epoch": 0.9435527863273528, + "grad_norm": 1.2700908184051514, + "learning_rate": 8.328394305253506e-08, + "loss": 0.303, + "step": 47134 + }, + { + "epoch": 0.9435728048444811, + "grad_norm": 1.061913251876831, + "learning_rate": 8.322503039162433e-08, + "loss": 0.2581, + "step": 47135 + }, + { + "epoch": 0.9435928233616094, + "grad_norm": 1.1417803764343262, + "learning_rate": 8.316613839977672e-08, + "loss": 0.319, + "step": 47136 + }, + { + "epoch": 0.9436128418787378, + "grad_norm": 1.182305097579956, + "learning_rate": 8.310726707724148e-08, + "loss": 0.3796, + "step": 47137 + }, + { + "epoch": 0.9436328603958661, + "grad_norm": 2.0095858573913574, + "learning_rate": 8.304841642426453e-08, + "loss": 0.7288, + "step": 47138 + }, + { + "epoch": 0.9436528789129945, + "grad_norm": 1.2789314985275269, + "learning_rate": 8.2989586441094e-08, + "loss": 0.31, + "step": 47139 + }, + { + "epoch": 0.9436728974301228, + "grad_norm": 1.0487350225448608, + "learning_rate": 8.293077712797748e-08, + "loss": 0.2865, + "step": 47140 + }, + { + "epoch": 0.9436929159472512, + "grad_norm": 1.0519697666168213, + "learning_rate": 8.287198848516198e-08, + "loss": 0.2569, + "step": 47141 + }, + { + "epoch": 0.9437129344643795, + "grad_norm": 2.133652448654175, + "learning_rate": 8.281322051289398e-08, + "loss": 0.7959, + "step": 47142 + }, + { + "epoch": 0.9437329529815079, + "grad_norm": 1.1328623294830322, + "learning_rate": 8.275447321142105e-08, + "loss": 0.2897, + "step": 47143 + }, + { + "epoch": 0.9437529714986362, + "grad_norm": 1.2008789777755737, + "learning_rate": 8.269574658099022e-08, + "loss": 0.3074, + "step": 47144 + }, + { + "epoch": 0.9437729900157645, + "grad_norm": 1.1367474794387817, + "learning_rate": 8.263704062184852e-08, + "loss": 0.2804, + "step": 47145 + }, + { + "epoch": 0.9437930085328929, + "grad_norm": 1.0787990093231201, + "learning_rate": 8.257835533424185e-08, + "loss": 0.231, + "step": 47146 + }, + { + "epoch": 0.9438130270500212, + "grad_norm": 1.1522698402404785, + "learning_rate": 8.251969071841781e-08, + "loss": 0.2858, + "step": 47147 + }, + { + "epoch": 0.9438330455671496, + "grad_norm": 1.0485645532608032, + "learning_rate": 8.246104677462286e-08, + "loss": 0.2637, + "step": 47148 + }, + { + "epoch": 0.9438530640842779, + "grad_norm": 1.772186279296875, + "learning_rate": 8.240242350310346e-08, + "loss": 0.7938, + "step": 47149 + }, + { + "epoch": 0.9438730826014063, + "grad_norm": 1.1418192386627197, + "learning_rate": 8.234382090410553e-08, + "loss": 0.3165, + "step": 47150 + }, + { + "epoch": 0.9438931011185346, + "grad_norm": 1.988803744316101, + "learning_rate": 8.22852389778761e-08, + "loss": 0.7117, + "step": 47151 + }, + { + "epoch": 0.9439131196356629, + "grad_norm": 1.018604040145874, + "learning_rate": 8.222667772466164e-08, + "loss": 0.2916, + "step": 47152 + }, + { + "epoch": 0.9439331381527913, + "grad_norm": 1.1664425134658813, + "learning_rate": 8.216813714470751e-08, + "loss": 0.2596, + "step": 47153 + }, + { + "epoch": 0.9439531566699196, + "grad_norm": 1.1470537185668945, + "learning_rate": 8.210961723826072e-08, + "loss": 0.2872, + "step": 47154 + }, + { + "epoch": 0.943973175187048, + "grad_norm": 1.0813313722610474, + "learning_rate": 8.20511180055661e-08, + "loss": 0.3052, + "step": 47155 + }, + { + "epoch": 0.9439931937041763, + "grad_norm": 0.9892376661300659, + "learning_rate": 8.199263944687064e-08, + "loss": 0.274, + "step": 47156 + }, + { + "epoch": 0.9440132122213047, + "grad_norm": 1.9622071981430054, + "learning_rate": 8.193418156241972e-08, + "loss": 0.759, + "step": 47157 + }, + { + "epoch": 0.944033230738433, + "grad_norm": 1.0491944551467896, + "learning_rate": 8.18757443524587e-08, + "loss": 0.3216, + "step": 47158 + }, + { + "epoch": 0.9440532492555614, + "grad_norm": 1.0974568128585815, + "learning_rate": 8.181732781723406e-08, + "loss": 0.2738, + "step": 47159 + }, + { + "epoch": 0.9440732677726897, + "grad_norm": 1.0791209936141968, + "learning_rate": 8.175893195699058e-08, + "loss": 0.3011, + "step": 47160 + }, + { + "epoch": 0.944093286289818, + "grad_norm": 1.9867039918899536, + "learning_rate": 8.170055677197475e-08, + "loss": 0.6742, + "step": 47161 + }, + { + "epoch": 0.9441133048069464, + "grad_norm": 1.2512240409851074, + "learning_rate": 8.164220226243081e-08, + "loss": 0.2807, + "step": 47162 + }, + { + "epoch": 0.9441333233240747, + "grad_norm": 1.9587913751602173, + "learning_rate": 8.158386842860466e-08, + "loss": 0.7044, + "step": 47163 + }, + { + "epoch": 0.9441533418412031, + "grad_norm": 1.0500775575637817, + "learning_rate": 8.152555527074168e-08, + "loss": 0.2861, + "step": 47164 + }, + { + "epoch": 0.9441733603583314, + "grad_norm": 1.0925430059432983, + "learning_rate": 8.146726278908668e-08, + "loss": 0.299, + "step": 47165 + }, + { + "epoch": 0.9441933788754598, + "grad_norm": 1.071274757385254, + "learning_rate": 8.140899098388499e-08, + "loss": 0.2653, + "step": 47166 + }, + { + "epoch": 0.9442133973925881, + "grad_norm": 0.997732400894165, + "learning_rate": 8.135073985538144e-08, + "loss": 0.2935, + "step": 47167 + }, + { + "epoch": 0.9442334159097164, + "grad_norm": 1.9063881635665894, + "learning_rate": 8.129250940382028e-08, + "loss": 0.7583, + "step": 47168 + }, + { + "epoch": 0.9442534344268448, + "grad_norm": 1.1609410047531128, + "learning_rate": 8.123429962944795e-08, + "loss": 0.3107, + "step": 47169 + }, + { + "epoch": 0.9442734529439731, + "grad_norm": 1.1350294351577759, + "learning_rate": 8.117611053250762e-08, + "loss": 0.2763, + "step": 47170 + }, + { + "epoch": 0.9442934714611015, + "grad_norm": 1.8068991899490356, + "learning_rate": 8.111794211324408e-08, + "loss": 0.7195, + "step": 47171 + }, + { + "epoch": 0.9443134899782298, + "grad_norm": 1.4156712293624878, + "learning_rate": 8.10597943719027e-08, + "loss": 0.2955, + "step": 47172 + }, + { + "epoch": 0.9443335084953582, + "grad_norm": 1.3696916103363037, + "learning_rate": 8.10016673087266e-08, + "loss": 0.2689, + "step": 47173 + }, + { + "epoch": 0.9443535270124865, + "grad_norm": 1.2322667837142944, + "learning_rate": 8.094356092396116e-08, + "loss": 0.2648, + "step": 47174 + }, + { + "epoch": 0.9443735455296149, + "grad_norm": 1.2960708141326904, + "learning_rate": 8.088547521785062e-08, + "loss": 0.2893, + "step": 47175 + }, + { + "epoch": 0.9443935640467432, + "grad_norm": 1.886671781539917, + "learning_rate": 8.082741019063867e-08, + "loss": 0.706, + "step": 47176 + }, + { + "epoch": 0.9444135825638715, + "grad_norm": 1.0392992496490479, + "learning_rate": 8.076936584256956e-08, + "loss": 0.2845, + "step": 47177 + }, + { + "epoch": 0.9444336010809999, + "grad_norm": 1.0921212434768677, + "learning_rate": 8.071134217388809e-08, + "loss": 0.302, + "step": 47178 + }, + { + "epoch": 0.9444536195981282, + "grad_norm": 1.0526034832000732, + "learning_rate": 8.065333918483743e-08, + "loss": 0.2607, + "step": 47179 + }, + { + "epoch": 0.9444736381152566, + "grad_norm": 1.1073206663131714, + "learning_rate": 8.059535687566123e-08, + "loss": 0.2769, + "step": 47180 + }, + { + "epoch": 0.9444936566323849, + "grad_norm": 1.2841532230377197, + "learning_rate": 8.05373952466032e-08, + "loss": 0.308, + "step": 47181 + }, + { + "epoch": 0.9445136751495133, + "grad_norm": 1.1774346828460693, + "learning_rate": 8.04794542979076e-08, + "loss": 0.3133, + "step": 47182 + }, + { + "epoch": 0.9445336936666416, + "grad_norm": 1.0999600887298584, + "learning_rate": 8.042153402981757e-08, + "loss": 0.3264, + "step": 47183 + }, + { + "epoch": 0.9445537121837699, + "grad_norm": 1.1160454750061035, + "learning_rate": 8.036363444257678e-08, + "loss": 0.2699, + "step": 47184 + }, + { + "epoch": 0.9445737307008983, + "grad_norm": 1.1326332092285156, + "learning_rate": 8.030575553642839e-08, + "loss": 0.2887, + "step": 47185 + }, + { + "epoch": 0.9445937492180266, + "grad_norm": 1.093134880065918, + "learning_rate": 8.024789731161609e-08, + "loss": 0.3087, + "step": 47186 + }, + { + "epoch": 0.944613767735155, + "grad_norm": 1.0704470872879028, + "learning_rate": 8.019005976838301e-08, + "loss": 0.3033, + "step": 47187 + }, + { + "epoch": 0.9446337862522833, + "grad_norm": 1.1036899089813232, + "learning_rate": 8.013224290697119e-08, + "loss": 0.3034, + "step": 47188 + }, + { + "epoch": 0.9446538047694117, + "grad_norm": 1.1007286310195923, + "learning_rate": 8.007444672762598e-08, + "loss": 0.3102, + "step": 47189 + }, + { + "epoch": 0.94467382328654, + "grad_norm": 1.1071157455444336, + "learning_rate": 8.001667123058831e-08, + "loss": 0.232, + "step": 47190 + }, + { + "epoch": 0.9446938418036684, + "grad_norm": 1.1472116708755493, + "learning_rate": 7.995891641610187e-08, + "loss": 0.2928, + "step": 47191 + }, + { + "epoch": 0.9447138603207967, + "grad_norm": 1.9705164432525635, + "learning_rate": 7.990118228440924e-08, + "loss": 0.7853, + "step": 47192 + }, + { + "epoch": 0.944733878837925, + "grad_norm": 1.1144990921020508, + "learning_rate": 7.984346883575355e-08, + "loss": 0.3185, + "step": 47193 + }, + { + "epoch": 0.9447538973550534, + "grad_norm": 0.9466654658317566, + "learning_rate": 7.978577607037685e-08, + "loss": 0.3254, + "step": 47194 + }, + { + "epoch": 0.9447739158721817, + "grad_norm": 1.1033843755722046, + "learning_rate": 7.972810398852172e-08, + "loss": 0.2905, + "step": 47195 + }, + { + "epoch": 0.9447939343893101, + "grad_norm": 1.0820692777633667, + "learning_rate": 7.967045259043127e-08, + "loss": 0.2691, + "step": 47196 + }, + { + "epoch": 0.9448139529064384, + "grad_norm": 1.591565489768982, + "learning_rate": 7.9612821876347e-08, + "loss": 0.3226, + "step": 47197 + }, + { + "epoch": 0.9448339714235668, + "grad_norm": 1.077283501625061, + "learning_rate": 7.955521184651205e-08, + "loss": 0.2957, + "step": 47198 + }, + { + "epoch": 0.9448539899406951, + "grad_norm": 1.047857403755188, + "learning_rate": 7.949762250116733e-08, + "loss": 0.3035, + "step": 47199 + }, + { + "epoch": 0.9448740084578234, + "grad_norm": 1.1070852279663086, + "learning_rate": 7.944005384055597e-08, + "loss": 0.2217, + "step": 47200 + }, + { + "epoch": 0.9448940269749518, + "grad_norm": 1.7227919101715088, + "learning_rate": 7.938250586491947e-08, + "loss": 0.7766, + "step": 47201 + }, + { + "epoch": 0.9449140454920801, + "grad_norm": 1.2357228994369507, + "learning_rate": 7.932497857450039e-08, + "loss": 0.2902, + "step": 47202 + }, + { + "epoch": 0.9449340640092085, + "grad_norm": 1.2737690210342407, + "learning_rate": 7.926747196954021e-08, + "loss": 0.2733, + "step": 47203 + }, + { + "epoch": 0.9449540825263368, + "grad_norm": 1.1005115509033203, + "learning_rate": 7.920998605028041e-08, + "loss": 0.2773, + "step": 47204 + }, + { + "epoch": 0.9449741010434652, + "grad_norm": 1.3267563581466675, + "learning_rate": 7.915252081696301e-08, + "loss": 0.3474, + "step": 47205 + }, + { + "epoch": 0.9449941195605935, + "grad_norm": 1.1867237091064453, + "learning_rate": 7.909507626982948e-08, + "loss": 0.3157, + "step": 47206 + }, + { + "epoch": 0.9450141380777219, + "grad_norm": 1.1175912618637085, + "learning_rate": 7.903765240912131e-08, + "loss": 0.3125, + "step": 47207 + }, + { + "epoch": 0.9450341565948502, + "grad_norm": 1.004040241241455, + "learning_rate": 7.898024923507941e-08, + "loss": 0.2716, + "step": 47208 + }, + { + "epoch": 0.9450541751119785, + "grad_norm": 1.1148910522460938, + "learning_rate": 7.89228667479458e-08, + "loss": 0.2969, + "step": 47209 + }, + { + "epoch": 0.9450741936291069, + "grad_norm": 1.098165512084961, + "learning_rate": 7.886550494796197e-08, + "loss": 0.2538, + "step": 47210 + }, + { + "epoch": 0.9450942121462352, + "grad_norm": 1.0578327178955078, + "learning_rate": 7.880816383536771e-08, + "loss": 0.2603, + "step": 47211 + }, + { + "epoch": 0.9451142306633636, + "grad_norm": 1.2059016227722168, + "learning_rate": 7.875084341040562e-08, + "loss": 0.3616, + "step": 47212 + }, + { + "epoch": 0.9451342491804919, + "grad_norm": 1.0903517007827759, + "learning_rate": 7.869354367331495e-08, + "loss": 0.3003, + "step": 47213 + }, + { + "epoch": 0.9451542676976203, + "grad_norm": 1.1303142309188843, + "learning_rate": 7.863626462433771e-08, + "loss": 0.2543, + "step": 47214 + }, + { + "epoch": 0.9451742862147486, + "grad_norm": 1.1480234861373901, + "learning_rate": 7.857900626371484e-08, + "loss": 0.2805, + "step": 47215 + }, + { + "epoch": 0.9451943047318769, + "grad_norm": 1.8578230142593384, + "learning_rate": 7.852176859168725e-08, + "loss": 0.719, + "step": 47216 + }, + { + "epoch": 0.9452143232490053, + "grad_norm": 1.0776551961898804, + "learning_rate": 7.84645516084942e-08, + "loss": 0.3089, + "step": 47217 + }, + { + "epoch": 0.9452343417661336, + "grad_norm": 1.0551451444625854, + "learning_rate": 7.840735531437715e-08, + "loss": 0.3015, + "step": 47218 + }, + { + "epoch": 0.945254360283262, + "grad_norm": 1.0612704753875732, + "learning_rate": 7.835017970957703e-08, + "loss": 0.2764, + "step": 47219 + }, + { + "epoch": 0.9452743788003903, + "grad_norm": 1.1382479667663574, + "learning_rate": 7.829302479433365e-08, + "loss": 0.2809, + "step": 47220 + }, + { + "epoch": 0.9452943973175187, + "grad_norm": 1.161299467086792, + "learning_rate": 7.823589056888681e-08, + "loss": 0.2909, + "step": 47221 + }, + { + "epoch": 0.945314415834647, + "grad_norm": 1.0281578302383423, + "learning_rate": 7.817877703347687e-08, + "loss": 0.2959, + "step": 47222 + }, + { + "epoch": 0.9453344343517754, + "grad_norm": 1.0879325866699219, + "learning_rate": 7.81216841883442e-08, + "loss": 0.3339, + "step": 47223 + }, + { + "epoch": 0.9453544528689037, + "grad_norm": 1.1644216775894165, + "learning_rate": 7.806461203372917e-08, + "loss": 0.2819, + "step": 47224 + }, + { + "epoch": 0.945374471386032, + "grad_norm": 1.2412803173065186, + "learning_rate": 7.800756056987158e-08, + "loss": 0.2911, + "step": 47225 + }, + { + "epoch": 0.9453944899031604, + "grad_norm": 0.9882930517196655, + "learning_rate": 7.795052979701013e-08, + "loss": 0.2188, + "step": 47226 + }, + { + "epoch": 0.9454145084202887, + "grad_norm": 1.2606432437896729, + "learning_rate": 7.789351971538572e-08, + "loss": 0.3038, + "step": 47227 + }, + { + "epoch": 0.9454345269374171, + "grad_norm": 1.0572408437728882, + "learning_rate": 7.783653032523764e-08, + "loss": 0.3035, + "step": 47228 + }, + { + "epoch": 0.9454545454545454, + "grad_norm": 0.9222933650016785, + "learning_rate": 7.777956162680567e-08, + "loss": 0.2762, + "step": 47229 + }, + { + "epoch": 0.9454745639716738, + "grad_norm": 1.1145501136779785, + "learning_rate": 7.772261362032907e-08, + "loss": 0.3375, + "step": 47230 + }, + { + "epoch": 0.9454945824888021, + "grad_norm": 1.0789860486984253, + "learning_rate": 7.766568630604766e-08, + "loss": 0.2716, + "step": 47231 + }, + { + "epoch": 0.9455146010059304, + "grad_norm": 1.8322067260742188, + "learning_rate": 7.760877968420011e-08, + "loss": 0.7537, + "step": 47232 + }, + { + "epoch": 0.9455346195230588, + "grad_norm": 1.2692621946334839, + "learning_rate": 7.755189375502626e-08, + "loss": 0.3052, + "step": 47233 + }, + { + "epoch": 0.9455546380401871, + "grad_norm": 1.1440248489379883, + "learning_rate": 7.749502851876478e-08, + "loss": 0.314, + "step": 47234 + }, + { + "epoch": 0.9455746565573155, + "grad_norm": 1.0912631750106812, + "learning_rate": 7.74381839756544e-08, + "loss": 0.2924, + "step": 47235 + }, + { + "epoch": 0.9455946750744438, + "grad_norm": 0.9667620062828064, + "learning_rate": 7.738136012593489e-08, + "loss": 0.242, + "step": 47236 + }, + { + "epoch": 0.9456146935915722, + "grad_norm": 1.0765987634658813, + "learning_rate": 7.732455696984497e-08, + "loss": 0.2842, + "step": 47237 + }, + { + "epoch": 0.9456347121087005, + "grad_norm": 1.1263587474822998, + "learning_rate": 7.72677745076228e-08, + "loss": 0.3043, + "step": 47238 + }, + { + "epoch": 0.9456547306258289, + "grad_norm": 1.010040044784546, + "learning_rate": 7.72110127395076e-08, + "loss": 0.2536, + "step": 47239 + }, + { + "epoch": 0.9456747491429572, + "grad_norm": 1.0517337322235107, + "learning_rate": 7.715427166573807e-08, + "loss": 0.299, + "step": 47240 + }, + { + "epoch": 0.9456947676600855, + "grad_norm": 1.0993107557296753, + "learning_rate": 7.709755128655239e-08, + "loss": 0.2512, + "step": 47241 + }, + { + "epoch": 0.9457147861772139, + "grad_norm": 1.1440136432647705, + "learning_rate": 7.704085160218977e-08, + "loss": 0.2691, + "step": 47242 + }, + { + "epoch": 0.9457348046943422, + "grad_norm": 1.1296625137329102, + "learning_rate": 7.698417261288727e-08, + "loss": 0.3339, + "step": 47243 + }, + { + "epoch": 0.9457548232114706, + "grad_norm": 1.197094202041626, + "learning_rate": 7.692751431888412e-08, + "loss": 0.2965, + "step": 47244 + }, + { + "epoch": 0.9457748417285989, + "grad_norm": 1.0048037767410278, + "learning_rate": 7.687087672041849e-08, + "loss": 0.2778, + "step": 47245 + }, + { + "epoch": 0.9457948602457273, + "grad_norm": 1.0692335367202759, + "learning_rate": 7.681425981772794e-08, + "loss": 0.2718, + "step": 47246 + }, + { + "epoch": 0.9458148787628556, + "grad_norm": 1.0136330127716064, + "learning_rate": 7.67576636110512e-08, + "loss": 0.2632, + "step": 47247 + }, + { + "epoch": 0.9458348972799839, + "grad_norm": 1.0602984428405762, + "learning_rate": 7.670108810062526e-08, + "loss": 0.2804, + "step": 47248 + }, + { + "epoch": 0.9458549157971123, + "grad_norm": 1.1514641046524048, + "learning_rate": 7.66445332866883e-08, + "loss": 0.3014, + "step": 47249 + }, + { + "epoch": 0.9458749343142406, + "grad_norm": 1.1365877389907837, + "learning_rate": 7.6587999169479e-08, + "loss": 0.2797, + "step": 47250 + }, + { + "epoch": 0.945894952831369, + "grad_norm": 1.1531410217285156, + "learning_rate": 7.653148574923386e-08, + "loss": 0.296, + "step": 47251 + }, + { + "epoch": 0.9459149713484973, + "grad_norm": 1.1453747749328613, + "learning_rate": 7.647499302618988e-08, + "loss": 0.3057, + "step": 47252 + }, + { + "epoch": 0.9459349898656257, + "grad_norm": 1.065258502960205, + "learning_rate": 7.641852100058633e-08, + "loss": 0.2749, + "step": 47253 + }, + { + "epoch": 0.945955008382754, + "grad_norm": 1.1320687532424927, + "learning_rate": 7.636206967265969e-08, + "loss": 0.2906, + "step": 47254 + }, + { + "epoch": 0.9459750268998824, + "grad_norm": 1.209517240524292, + "learning_rate": 7.630563904264754e-08, + "loss": 0.3101, + "step": 47255 + }, + { + "epoch": 0.9459950454170107, + "grad_norm": 1.1569204330444336, + "learning_rate": 7.624922911078691e-08, + "loss": 0.3091, + "step": 47256 + }, + { + "epoch": 0.946015063934139, + "grad_norm": 1.0688917636871338, + "learning_rate": 7.61928398773143e-08, + "loss": 0.323, + "step": 47257 + }, + { + "epoch": 0.9460350824512674, + "grad_norm": 1.0890493392944336, + "learning_rate": 7.613647134246782e-08, + "loss": 0.2647, + "step": 47258 + }, + { + "epoch": 0.9460551009683957, + "grad_norm": 1.0863806009292603, + "learning_rate": 7.608012350648397e-08, + "loss": 0.3025, + "step": 47259 + }, + { + "epoch": 0.9460751194855241, + "grad_norm": 1.1533513069152832, + "learning_rate": 7.602379636959978e-08, + "loss": 0.2866, + "step": 47260 + }, + { + "epoch": 0.9460951380026524, + "grad_norm": 1.7996104955673218, + "learning_rate": 7.596748993205116e-08, + "loss": 0.7181, + "step": 47261 + }, + { + "epoch": 0.9461151565197808, + "grad_norm": 1.2780030965805054, + "learning_rate": 7.591120419407627e-08, + "loss": 0.2679, + "step": 47262 + }, + { + "epoch": 0.9461351750369091, + "grad_norm": 1.1443616151809692, + "learning_rate": 7.585493915591103e-08, + "loss": 0.2825, + "step": 47263 + }, + { + "epoch": 0.9461551935540374, + "grad_norm": 1.0566304922103882, + "learning_rate": 7.579869481779189e-08, + "loss": 0.3275, + "step": 47264 + }, + { + "epoch": 0.9461752120711658, + "grad_norm": 1.0337369441986084, + "learning_rate": 7.574247117995482e-08, + "loss": 0.2832, + "step": 47265 + }, + { + "epoch": 0.9461952305882941, + "grad_norm": 1.1807551383972168, + "learning_rate": 7.568626824263681e-08, + "loss": 0.2756, + "step": 47266 + }, + { + "epoch": 0.9462152491054225, + "grad_norm": 1.0953456163406372, + "learning_rate": 7.563008600607436e-08, + "loss": 0.3106, + "step": 47267 + }, + { + "epoch": 0.9462352676225508, + "grad_norm": 1.1656501293182373, + "learning_rate": 7.557392447050338e-08, + "loss": 0.3288, + "step": 47268 + }, + { + "epoch": 0.9462552861396792, + "grad_norm": 1.008266806602478, + "learning_rate": 7.55177836361598e-08, + "loss": 0.2618, + "step": 47269 + }, + { + "epoch": 0.9462753046568075, + "grad_norm": 1.025099515914917, + "learning_rate": 7.5461663503279e-08, + "loss": 0.2601, + "step": 47270 + }, + { + "epoch": 0.9462953231739359, + "grad_norm": 1.1674362421035767, + "learning_rate": 7.540556407209853e-08, + "loss": 0.2936, + "step": 47271 + }, + { + "epoch": 0.9463153416910642, + "grad_norm": 1.0126469135284424, + "learning_rate": 7.534948534285324e-08, + "loss": 0.2657, + "step": 47272 + }, + { + "epoch": 0.9463353602081925, + "grad_norm": 1.1481664180755615, + "learning_rate": 7.529342731577849e-08, + "loss": 0.2867, + "step": 47273 + }, + { + "epoch": 0.9463553787253209, + "grad_norm": 1.1579749584197998, + "learning_rate": 7.523738999111075e-08, + "loss": 0.3172, + "step": 47274 + }, + { + "epoch": 0.9463753972424492, + "grad_norm": 1.117627739906311, + "learning_rate": 7.518137336908426e-08, + "loss": 0.288, + "step": 47275 + }, + { + "epoch": 0.9463954157595776, + "grad_norm": 1.2167128324508667, + "learning_rate": 7.512537744993665e-08, + "loss": 0.3183, + "step": 47276 + }, + { + "epoch": 0.9464154342767059, + "grad_norm": 1.044924259185791, + "learning_rate": 7.506940223390158e-08, + "loss": 0.2863, + "step": 47277 + }, + { + "epoch": 0.9464354527938343, + "grad_norm": 1.145876407623291, + "learning_rate": 7.501344772121444e-08, + "loss": 0.2705, + "step": 47278 + }, + { + "epoch": 0.9464554713109626, + "grad_norm": 1.0639272928237915, + "learning_rate": 7.495751391211115e-08, + "loss": 0.3016, + "step": 47279 + }, + { + "epoch": 0.9464754898280909, + "grad_norm": 1.1108717918395996, + "learning_rate": 7.490160080682651e-08, + "loss": 0.3324, + "step": 47280 + }, + { + "epoch": 0.9464955083452193, + "grad_norm": 1.0118063688278198, + "learning_rate": 7.484570840559647e-08, + "loss": 0.2745, + "step": 47281 + }, + { + "epoch": 0.9465155268623476, + "grad_norm": 0.9802471995353699, + "learning_rate": 7.47898367086547e-08, + "loss": 0.1982, + "step": 47282 + }, + { + "epoch": 0.946535545379476, + "grad_norm": 1.241594910621643, + "learning_rate": 7.473398571623602e-08, + "loss": 0.3565, + "step": 47283 + }, + { + "epoch": 0.9465555638966043, + "grad_norm": 1.4895495176315308, + "learning_rate": 7.467815542857637e-08, + "loss": 0.2285, + "step": 47284 + }, + { + "epoch": 0.9465755824137327, + "grad_norm": 1.1058356761932373, + "learning_rate": 7.462234584590944e-08, + "loss": 0.2658, + "step": 47285 + }, + { + "epoch": 0.946595600930861, + "grad_norm": 0.9733139276504517, + "learning_rate": 7.456655696847059e-08, + "loss": 0.2362, + "step": 47286 + }, + { + "epoch": 0.9466156194479894, + "grad_norm": 1.0757030248641968, + "learning_rate": 7.451078879649409e-08, + "loss": 0.2733, + "step": 47287 + }, + { + "epoch": 0.9466356379651177, + "grad_norm": 1.2175897359848022, + "learning_rate": 7.445504133021364e-08, + "loss": 0.2572, + "step": 47288 + }, + { + "epoch": 0.946655656482246, + "grad_norm": 1.189624309539795, + "learning_rate": 7.43993145698646e-08, + "loss": 0.2785, + "step": 47289 + }, + { + "epoch": 0.9466756749993744, + "grad_norm": 1.1100903749465942, + "learning_rate": 7.434360851568067e-08, + "loss": 0.3146, + "step": 47290 + }, + { + "epoch": 0.9466956935165027, + "grad_norm": 1.1807277202606201, + "learning_rate": 7.428792316789613e-08, + "loss": 0.2828, + "step": 47291 + }, + { + "epoch": 0.9467157120336311, + "grad_norm": 1.9551359415054321, + "learning_rate": 7.423225852674465e-08, + "loss": 0.7057, + "step": 47292 + }, + { + "epoch": 0.9467357305507594, + "grad_norm": 1.247690200805664, + "learning_rate": 7.417661459246162e-08, + "loss": 0.3165, + "step": 47293 + }, + { + "epoch": 0.9467557490678878, + "grad_norm": 1.1621923446655273, + "learning_rate": 7.412099136527961e-08, + "loss": 0.3508, + "step": 47294 + }, + { + "epoch": 0.9467757675850161, + "grad_norm": 1.0914306640625, + "learning_rate": 7.406538884543346e-08, + "loss": 0.2911, + "step": 47295 + }, + { + "epoch": 0.9467957861021444, + "grad_norm": 1.061378836631775, + "learning_rate": 7.400980703315574e-08, + "loss": 0.2836, + "step": 47296 + }, + { + "epoch": 0.9468158046192728, + "grad_norm": 1.1869004964828491, + "learning_rate": 7.395424592868072e-08, + "loss": 0.3035, + "step": 47297 + }, + { + "epoch": 0.9468358231364011, + "grad_norm": 1.1539608240127563, + "learning_rate": 7.389870553224209e-08, + "loss": 0.2848, + "step": 47298 + }, + { + "epoch": 0.9468558416535295, + "grad_norm": 1.115132451057434, + "learning_rate": 7.384318584407358e-08, + "loss": 0.3255, + "step": 47299 + }, + { + "epoch": 0.9468758601706578, + "grad_norm": 1.0499238967895508, + "learning_rate": 7.378768686440773e-08, + "loss": 0.31, + "step": 47300 + }, + { + "epoch": 0.9468958786877862, + "grad_norm": 1.1333709955215454, + "learning_rate": 7.37322085934783e-08, + "loss": 0.3025, + "step": 47301 + }, + { + "epoch": 0.9469158972049145, + "grad_norm": 1.122819185256958, + "learning_rate": 7.367675103151894e-08, + "loss": 0.3082, + "step": 47302 + }, + { + "epoch": 0.9469359157220429, + "grad_norm": 1.0498346090316772, + "learning_rate": 7.362131417876228e-08, + "loss": 0.2813, + "step": 47303 + }, + { + "epoch": 0.9469559342391712, + "grad_norm": 1.7972084283828735, + "learning_rate": 7.35658980354409e-08, + "loss": 0.7198, + "step": 47304 + }, + { + "epoch": 0.9469759527562995, + "grad_norm": 1.5661070346832275, + "learning_rate": 7.351050260178849e-08, + "loss": 0.2833, + "step": 47305 + }, + { + "epoch": 0.9469959712734279, + "grad_norm": 1.1113489866256714, + "learning_rate": 7.345512787803765e-08, + "loss": 0.2822, + "step": 47306 + }, + { + "epoch": 0.9470159897905562, + "grad_norm": 1.0402880907058716, + "learning_rate": 7.339977386442155e-08, + "loss": 0.2855, + "step": 47307 + }, + { + "epoch": 0.9470360083076846, + "grad_norm": 1.1004287004470825, + "learning_rate": 7.334444056117274e-08, + "loss": 0.293, + "step": 47308 + }, + { + "epoch": 0.9470560268248129, + "grad_norm": 1.1736222505569458, + "learning_rate": 7.328912796852384e-08, + "loss": 0.2808, + "step": 47309 + }, + { + "epoch": 0.9470760453419413, + "grad_norm": 1.0283046960830688, + "learning_rate": 7.323383608670631e-08, + "loss": 0.3057, + "step": 47310 + }, + { + "epoch": 0.9470960638590696, + "grad_norm": 1.145013451576233, + "learning_rate": 7.317856491595388e-08, + "loss": 0.306, + "step": 47311 + }, + { + "epoch": 0.9471160823761979, + "grad_norm": 1.1031421422958374, + "learning_rate": 7.312331445649912e-08, + "loss": 0.3186, + "step": 47312 + }, + { + "epoch": 0.9471361008933263, + "grad_norm": 1.5443977117538452, + "learning_rate": 7.306808470857296e-08, + "loss": 0.3257, + "step": 47313 + }, + { + "epoch": 0.9471561194104546, + "grad_norm": 1.1139942407608032, + "learning_rate": 7.301287567240856e-08, + "loss": 0.2783, + "step": 47314 + }, + { + "epoch": 0.947176137927583, + "grad_norm": 1.2223379611968994, + "learning_rate": 7.295768734823739e-08, + "loss": 0.3042, + "step": 47315 + }, + { + "epoch": 0.9471961564447113, + "grad_norm": 1.045095682144165, + "learning_rate": 7.290251973629259e-08, + "loss": 0.3132, + "step": 47316 + }, + { + "epoch": 0.9472161749618397, + "grad_norm": 1.7434699535369873, + "learning_rate": 7.284737283680398e-08, + "loss": 0.7474, + "step": 47317 + }, + { + "epoch": 0.947236193478968, + "grad_norm": 1.0404253005981445, + "learning_rate": 7.279224665000584e-08, + "loss": 0.2881, + "step": 47318 + }, + { + "epoch": 0.9472562119960964, + "grad_norm": 1.2138978242874146, + "learning_rate": 7.273714117612796e-08, + "loss": 0.2438, + "step": 47319 + }, + { + "epoch": 0.9472762305132247, + "grad_norm": 1.285320520401001, + "learning_rate": 7.268205641540349e-08, + "loss": 0.3372, + "step": 47320 + }, + { + "epoch": 0.947296249030353, + "grad_norm": 1.1706041097640991, + "learning_rate": 7.26269923680628e-08, + "loss": 0.3209, + "step": 47321 + }, + { + "epoch": 0.9473162675474814, + "grad_norm": 1.0745352506637573, + "learning_rate": 7.25719490343385e-08, + "loss": 0.2752, + "step": 47322 + }, + { + "epoch": 0.9473362860646097, + "grad_norm": 1.2889076471328735, + "learning_rate": 7.25169264144604e-08, + "loss": 0.3072, + "step": 47323 + }, + { + "epoch": 0.9473563045817381, + "grad_norm": 1.0523139238357544, + "learning_rate": 7.246192450866108e-08, + "loss": 0.2791, + "step": 47324 + }, + { + "epoch": 0.9473763230988664, + "grad_norm": 1.1352167129516602, + "learning_rate": 7.240694331717202e-08, + "loss": 0.3087, + "step": 47325 + }, + { + "epoch": 0.9473963416159948, + "grad_norm": 1.2170381546020508, + "learning_rate": 7.235198284022304e-08, + "loss": 0.27, + "step": 47326 + }, + { + "epoch": 0.9474163601331231, + "grad_norm": 1.0878747701644897, + "learning_rate": 7.22970430780462e-08, + "loss": 0.2968, + "step": 47327 + }, + { + "epoch": 0.9474363786502514, + "grad_norm": 1.09671151638031, + "learning_rate": 7.224212403087183e-08, + "loss": 0.3064, + "step": 47328 + }, + { + "epoch": 0.9474563971673798, + "grad_norm": 1.150364637374878, + "learning_rate": 7.218722569893144e-08, + "loss": 0.2756, + "step": 47329 + }, + { + "epoch": 0.9474764156845081, + "grad_norm": 1.2150967121124268, + "learning_rate": 7.213234808245484e-08, + "loss": 0.2725, + "step": 47330 + }, + { + "epoch": 0.9474964342016365, + "grad_norm": 1.1757938861846924, + "learning_rate": 7.207749118167406e-08, + "loss": 0.3258, + "step": 47331 + }, + { + "epoch": 0.9475164527187648, + "grad_norm": 1.0996713638305664, + "learning_rate": 7.202265499681837e-08, + "loss": 0.3129, + "step": 47332 + }, + { + "epoch": 0.9475364712358932, + "grad_norm": 1.1440889835357666, + "learning_rate": 7.196783952811981e-08, + "loss": 0.2533, + "step": 47333 + }, + { + "epoch": 0.9475564897530215, + "grad_norm": 1.0761550664901733, + "learning_rate": 7.191304477580763e-08, + "loss": 0.2921, + "step": 47334 + }, + { + "epoch": 0.9475765082701499, + "grad_norm": 1.1905200481414795, + "learning_rate": 7.18582707401122e-08, + "loss": 0.2871, + "step": 47335 + }, + { + "epoch": 0.9475965267872782, + "grad_norm": 1.1440919637680054, + "learning_rate": 7.180351742126446e-08, + "loss": 0.3198, + "step": 47336 + }, + { + "epoch": 0.9476165453044065, + "grad_norm": 1.0285753011703491, + "learning_rate": 7.17487848194931e-08, + "loss": 0.2885, + "step": 47337 + }, + { + "epoch": 0.9476365638215349, + "grad_norm": 1.0294725894927979, + "learning_rate": 7.169407293503017e-08, + "loss": 0.2987, + "step": 47338 + }, + { + "epoch": 0.9476565823386632, + "grad_norm": 1.1453166007995605, + "learning_rate": 7.163938176810492e-08, + "loss": 0.3031, + "step": 47339 + }, + { + "epoch": 0.9476766008557916, + "grad_norm": 1.1915777921676636, + "learning_rate": 7.158471131894717e-08, + "loss": 0.2739, + "step": 47340 + }, + { + "epoch": 0.9476966193729199, + "grad_norm": 1.1903705596923828, + "learning_rate": 7.15300615877862e-08, + "loss": 0.2849, + "step": 47341 + }, + { + "epoch": 0.9477166378900483, + "grad_norm": 1.2753431797027588, + "learning_rate": 7.147543257485234e-08, + "loss": 0.2904, + "step": 47342 + }, + { + "epoch": 0.9477366564071766, + "grad_norm": 2.1588194370269775, + "learning_rate": 7.142082428037489e-08, + "loss": 0.7225, + "step": 47343 + }, + { + "epoch": 0.9477566749243049, + "grad_norm": 0.951806902885437, + "learning_rate": 7.13662367045842e-08, + "loss": 0.2486, + "step": 47344 + }, + { + "epoch": 0.9477766934414333, + "grad_norm": 1.0894858837127686, + "learning_rate": 7.131166984770843e-08, + "loss": 0.2773, + "step": 47345 + }, + { + "epoch": 0.9477967119585616, + "grad_norm": 1.8527255058288574, + "learning_rate": 7.12571237099785e-08, + "loss": 0.7845, + "step": 47346 + }, + { + "epoch": 0.94781673047569, + "grad_norm": 1.8387914896011353, + "learning_rate": 7.120259829162257e-08, + "loss": 0.795, + "step": 47347 + }, + { + "epoch": 0.9478367489928183, + "grad_norm": 1.9610947370529175, + "learning_rate": 7.114809359287044e-08, + "loss": 0.7428, + "step": 47348 + }, + { + "epoch": 0.9478567675099467, + "grad_norm": 1.0484167337417603, + "learning_rate": 7.10936096139514e-08, + "loss": 0.2883, + "step": 47349 + }, + { + "epoch": 0.947876786027075, + "grad_norm": 1.0852437019348145, + "learning_rate": 7.103914635509301e-08, + "loss": 0.2379, + "step": 47350 + }, + { + "epoch": 0.9478968045442034, + "grad_norm": 1.1426304578781128, + "learning_rate": 7.098470381652623e-08, + "loss": 0.3678, + "step": 47351 + }, + { + "epoch": 0.9479168230613317, + "grad_norm": 1.0178686380386353, + "learning_rate": 7.093028199847918e-08, + "loss": 0.2871, + "step": 47352 + }, + { + "epoch": 0.94793684157846, + "grad_norm": 1.2955430746078491, + "learning_rate": 7.087588090118003e-08, + "loss": 0.2804, + "step": 47353 + }, + { + "epoch": 0.9479568600955884, + "grad_norm": 1.1602764129638672, + "learning_rate": 7.082150052485748e-08, + "loss": 0.3155, + "step": 47354 + }, + { + "epoch": 0.9479768786127167, + "grad_norm": 1.3044830560684204, + "learning_rate": 7.076714086974135e-08, + "loss": 0.2668, + "step": 47355 + }, + { + "epoch": 0.9479968971298451, + "grad_norm": 1.064666509628296, + "learning_rate": 7.071280193605868e-08, + "loss": 0.2973, + "step": 47356 + }, + { + "epoch": 0.9480169156469734, + "grad_norm": 1.3265796899795532, + "learning_rate": 7.065848372403928e-08, + "loss": 0.3154, + "step": 47357 + }, + { + "epoch": 0.9480369341641018, + "grad_norm": 1.0183006525039673, + "learning_rate": 7.060418623391019e-08, + "loss": 0.2633, + "step": 47358 + }, + { + "epoch": 0.9480569526812301, + "grad_norm": 0.9780211448669434, + "learning_rate": 7.054990946590067e-08, + "loss": 0.2685, + "step": 47359 + }, + { + "epoch": 0.9480769711983584, + "grad_norm": 1.1631349325180054, + "learning_rate": 7.049565342023835e-08, + "loss": 0.2326, + "step": 47360 + }, + { + "epoch": 0.9480969897154868, + "grad_norm": 1.141784429550171, + "learning_rate": 7.044141809715188e-08, + "loss": 0.2944, + "step": 47361 + }, + { + "epoch": 0.9481170082326151, + "grad_norm": 1.1219695806503296, + "learning_rate": 7.038720349686834e-08, + "loss": 0.264, + "step": 47362 + }, + { + "epoch": 0.9481370267497435, + "grad_norm": 1.1683708429336548, + "learning_rate": 7.033300961961587e-08, + "loss": 0.3136, + "step": 47363 + }, + { + "epoch": 0.9481570452668718, + "grad_norm": 2.232259750366211, + "learning_rate": 7.027883646562317e-08, + "loss": 0.7429, + "step": 47364 + }, + { + "epoch": 0.9481770637840002, + "grad_norm": 0.9820797443389893, + "learning_rate": 7.022468403511728e-08, + "loss": 0.2673, + "step": 47365 + }, + { + "epoch": 0.9481970823011285, + "grad_norm": 1.1950963735580444, + "learning_rate": 7.017055232832582e-08, + "loss": 0.3438, + "step": 47366 + }, + { + "epoch": 0.9482171008182569, + "grad_norm": 1.2083929777145386, + "learning_rate": 7.011644134547635e-08, + "loss": 0.2941, + "step": 47367 + }, + { + "epoch": 0.9482371193353852, + "grad_norm": 1.4654814004898071, + "learning_rate": 7.006235108679648e-08, + "loss": 0.3175, + "step": 47368 + }, + { + "epoch": 0.9482571378525135, + "grad_norm": 1.1180366277694702, + "learning_rate": 7.000828155251327e-08, + "loss": 0.2757, + "step": 47369 + }, + { + "epoch": 0.9482771563696419, + "grad_norm": 1.1333367824554443, + "learning_rate": 6.995423274285484e-08, + "loss": 0.2643, + "step": 47370 + }, + { + "epoch": 0.9482971748867702, + "grad_norm": 1.123960256576538, + "learning_rate": 6.99002046580477e-08, + "loss": 0.3233, + "step": 47371 + }, + { + "epoch": 0.9483171934038986, + "grad_norm": 1.0869450569152832, + "learning_rate": 6.984619729831888e-08, + "loss": 0.2996, + "step": 47372 + }, + { + "epoch": 0.9483372119210269, + "grad_norm": 1.8240901231765747, + "learning_rate": 6.979221066389597e-08, + "loss": 0.6605, + "step": 47373 + }, + { + "epoch": 0.9483572304381553, + "grad_norm": 1.175708293914795, + "learning_rate": 6.973824475500546e-08, + "loss": 0.297, + "step": 47374 + }, + { + "epoch": 0.9483772489552836, + "grad_norm": 1.1742799282073975, + "learning_rate": 6.968429957187439e-08, + "loss": 0.3406, + "step": 47375 + }, + { + "epoch": 0.9483972674724119, + "grad_norm": 1.0483007431030273, + "learning_rate": 6.963037511472926e-08, + "loss": 0.2476, + "step": 47376 + }, + { + "epoch": 0.9484172859895403, + "grad_norm": 1.948854923248291, + "learning_rate": 6.957647138379764e-08, + "loss": 0.7429, + "step": 47377 + }, + { + "epoch": 0.9484373045066686, + "grad_norm": 1.1689105033874512, + "learning_rate": 6.952258837930492e-08, + "loss": 0.3235, + "step": 47378 + }, + { + "epoch": 0.948457323023797, + "grad_norm": 1.039229393005371, + "learning_rate": 6.94687261014787e-08, + "loss": 0.2629, + "step": 47379 + }, + { + "epoch": 0.9484773415409253, + "grad_norm": 1.7054316997528076, + "learning_rate": 6.941488455054434e-08, + "loss": 0.8166, + "step": 47380 + }, + { + "epoch": 0.9484973600580537, + "grad_norm": 1.6618525981903076, + "learning_rate": 6.936106372672891e-08, + "loss": 0.3024, + "step": 47381 + }, + { + "epoch": 0.948517378575182, + "grad_norm": 1.3067384958267212, + "learning_rate": 6.93072636302583e-08, + "loss": 0.3352, + "step": 47382 + }, + { + "epoch": 0.9485373970923104, + "grad_norm": 1.0596038103103638, + "learning_rate": 6.925348426135959e-08, + "loss": 0.2761, + "step": 47383 + }, + { + "epoch": 0.9485574156094387, + "grad_norm": 1.4630144834518433, + "learning_rate": 6.919972562025756e-08, + "loss": 0.3024, + "step": 47384 + }, + { + "epoch": 0.948577434126567, + "grad_norm": 1.0335732698440552, + "learning_rate": 6.914598770717872e-08, + "loss": 0.2485, + "step": 47385 + }, + { + "epoch": 0.9485974526436954, + "grad_norm": 1.2317994832992554, + "learning_rate": 6.909227052234901e-08, + "loss": 0.3009, + "step": 47386 + }, + { + "epoch": 0.9486174711608237, + "grad_norm": 1.153226613998413, + "learning_rate": 6.903857406599435e-08, + "loss": 0.3207, + "step": 47387 + }, + { + "epoch": 0.9486374896779521, + "grad_norm": 1.0560662746429443, + "learning_rate": 6.898489833834066e-08, + "loss": 0.3527, + "step": 47388 + }, + { + "epoch": 0.9486575081950804, + "grad_norm": 1.1576321125030518, + "learning_rate": 6.893124333961331e-08, + "loss": 0.3287, + "step": 47389 + }, + { + "epoch": 0.9486775267122088, + "grad_norm": 1.2398685216903687, + "learning_rate": 6.887760907003716e-08, + "loss": 0.3333, + "step": 47390 + }, + { + "epoch": 0.9486975452293371, + "grad_norm": 1.1037791967391968, + "learning_rate": 6.882399552983865e-08, + "loss": 0.3176, + "step": 47391 + }, + { + "epoch": 0.9487175637464654, + "grad_norm": 1.1851648092269897, + "learning_rate": 6.877040271924318e-08, + "loss": 0.2795, + "step": 47392 + }, + { + "epoch": 0.9487375822635938, + "grad_norm": 1.1848840713500977, + "learning_rate": 6.871683063847501e-08, + "loss": 0.2979, + "step": 47393 + }, + { + "epoch": 0.9487576007807221, + "grad_norm": 1.119690179824829, + "learning_rate": 6.866327928776061e-08, + "loss": 0.2664, + "step": 47394 + }, + { + "epoch": 0.9487776192978505, + "grad_norm": 1.0985051393508911, + "learning_rate": 6.860974866732428e-08, + "loss": 0.3282, + "step": 47395 + }, + { + "epoch": 0.9487976378149788, + "grad_norm": 0.9800220727920532, + "learning_rate": 6.855623877739192e-08, + "loss": 0.232, + "step": 47396 + }, + { + "epoch": 0.9488176563321072, + "grad_norm": 1.1504933834075928, + "learning_rate": 6.850274961818725e-08, + "loss": 0.2954, + "step": 47397 + }, + { + "epoch": 0.9488376748492355, + "grad_norm": 1.1978468894958496, + "learning_rate": 6.844928118993621e-08, + "loss": 0.3512, + "step": 47398 + }, + { + "epoch": 0.9488576933663639, + "grad_norm": 1.2174794673919678, + "learning_rate": 6.83958334928625e-08, + "loss": 0.3296, + "step": 47399 + }, + { + "epoch": 0.9488777118834922, + "grad_norm": 1.9459893703460693, + "learning_rate": 6.834240652719204e-08, + "loss": 0.696, + "step": 47400 + }, + { + "epoch": 0.9488977304006205, + "grad_norm": 1.912887692451477, + "learning_rate": 6.828900029314856e-08, + "loss": 0.7181, + "step": 47401 + }, + { + "epoch": 0.9489177489177489, + "grad_norm": 1.2591255903244019, + "learning_rate": 6.823561479095686e-08, + "loss": 0.3081, + "step": 47402 + }, + { + "epoch": 0.9489377674348772, + "grad_norm": 1.0303982496261597, + "learning_rate": 6.818225002084122e-08, + "loss": 0.2502, + "step": 47403 + }, + { + "epoch": 0.9489577859520056, + "grad_norm": 1.1095610857009888, + "learning_rate": 6.81289059830259e-08, + "loss": 0.2674, + "step": 47404 + }, + { + "epoch": 0.9489778044691339, + "grad_norm": 1.0688832998275757, + "learning_rate": 6.807558267773573e-08, + "loss": 0.3076, + "step": 47405 + }, + { + "epoch": 0.9489978229862623, + "grad_norm": 1.1549572944641113, + "learning_rate": 6.802228010519385e-08, + "loss": 0.2962, + "step": 47406 + }, + { + "epoch": 0.9490178415033906, + "grad_norm": 1.168579339981079, + "learning_rate": 6.796899826562564e-08, + "loss": 0.2851, + "step": 47407 + }, + { + "epoch": 0.9490378600205189, + "grad_norm": 1.0561497211456299, + "learning_rate": 6.79157371592537e-08, + "loss": 0.2972, + "step": 47408 + }, + { + "epoch": 0.9490578785376473, + "grad_norm": 1.104791522026062, + "learning_rate": 6.786249678630286e-08, + "loss": 0.2649, + "step": 47409 + }, + { + "epoch": 0.9490778970547756, + "grad_norm": 1.217697024345398, + "learning_rate": 6.78092771469968e-08, + "loss": 0.2586, + "step": 47410 + }, + { + "epoch": 0.949097915571904, + "grad_norm": 1.1348379850387573, + "learning_rate": 6.775607824155927e-08, + "loss": 0.2848, + "step": 47411 + }, + { + "epoch": 0.9491179340890323, + "grad_norm": 1.0748481750488281, + "learning_rate": 6.770290007021341e-08, + "loss": 0.2398, + "step": 47412 + }, + { + "epoch": 0.9491379526061607, + "grad_norm": 1.0607850551605225, + "learning_rate": 6.764974263318347e-08, + "loss": 0.3232, + "step": 47413 + }, + { + "epoch": 0.949157971123289, + "grad_norm": 1.0209920406341553, + "learning_rate": 6.759660593069262e-08, + "loss": 0.3682, + "step": 47414 + }, + { + "epoch": 0.9491779896404174, + "grad_norm": 1.2229400873184204, + "learning_rate": 6.7543489962964e-08, + "loss": 0.3459, + "step": 47415 + }, + { + "epoch": 0.9491980081575457, + "grad_norm": 1.2025190591812134, + "learning_rate": 6.749039473022023e-08, + "loss": 0.2829, + "step": 47416 + }, + { + "epoch": 0.949218026674674, + "grad_norm": 1.1278058290481567, + "learning_rate": 6.743732023268668e-08, + "loss": 0.2823, + "step": 47417 + }, + { + "epoch": 0.9492380451918024, + "grad_norm": 1.1400530338287354, + "learning_rate": 6.738426647058427e-08, + "loss": 0.342, + "step": 47418 + }, + { + "epoch": 0.9492580637089307, + "grad_norm": 1.1455447673797607, + "learning_rate": 6.733123344413727e-08, + "loss": 0.2693, + "step": 47419 + }, + { + "epoch": 0.9492780822260591, + "grad_norm": 1.159491777420044, + "learning_rate": 6.72782211535683e-08, + "loss": 0.3044, + "step": 47420 + }, + { + "epoch": 0.9492981007431874, + "grad_norm": 1.100279688835144, + "learning_rate": 6.722522959909938e-08, + "loss": 0.3211, + "step": 47421 + }, + { + "epoch": 0.9493181192603158, + "grad_norm": 1.2506896257400513, + "learning_rate": 6.717225878095479e-08, + "loss": 0.3149, + "step": 47422 + }, + { + "epoch": 0.9493381377774441, + "grad_norm": 1.1382933855056763, + "learning_rate": 6.711930869935656e-08, + "loss": 0.2817, + "step": 47423 + }, + { + "epoch": 0.9493581562945724, + "grad_norm": 1.0547285079956055, + "learning_rate": 6.70663793545273e-08, + "loss": 0.2423, + "step": 47424 + }, + { + "epoch": 0.9493781748117008, + "grad_norm": 0.9845942854881287, + "learning_rate": 6.701347074668852e-08, + "loss": 0.2729, + "step": 47425 + }, + { + "epoch": 0.9493981933288291, + "grad_norm": 1.1836789846420288, + "learning_rate": 6.696058287606444e-08, + "loss": 0.3416, + "step": 47426 + }, + { + "epoch": 0.9494182118459575, + "grad_norm": 1.0137501955032349, + "learning_rate": 6.690771574287602e-08, + "loss": 0.2653, + "step": 47427 + }, + { + "epoch": 0.9494382303630858, + "grad_norm": 1.0793967247009277, + "learning_rate": 6.685486934734641e-08, + "loss": 0.2999, + "step": 47428 + }, + { + "epoch": 0.9494582488802142, + "grad_norm": 0.9693064093589783, + "learning_rate": 6.680204368969657e-08, + "loss": 0.2858, + "step": 47429 + }, + { + "epoch": 0.9494782673973425, + "grad_norm": 1.167803168296814, + "learning_rate": 6.674923877014961e-08, + "loss": 0.3282, + "step": 47430 + }, + { + "epoch": 0.9494982859144709, + "grad_norm": 1.196152925491333, + "learning_rate": 6.669645458892759e-08, + "loss": 0.3267, + "step": 47431 + }, + { + "epoch": 0.9495183044315992, + "grad_norm": 1.328920602798462, + "learning_rate": 6.664369114625146e-08, + "loss": 0.3068, + "step": 47432 + }, + { + "epoch": 0.9495383229487275, + "grad_norm": 1.8964163064956665, + "learning_rate": 6.659094844234382e-08, + "loss": 0.75, + "step": 47433 + }, + { + "epoch": 0.9495583414658559, + "grad_norm": 1.0657176971435547, + "learning_rate": 6.653822647742558e-08, + "loss": 0.2754, + "step": 47434 + }, + { + "epoch": 0.9495783599829842, + "grad_norm": 1.1680198907852173, + "learning_rate": 6.648552525171992e-08, + "loss": 0.299, + "step": 47435 + }, + { + "epoch": 0.9495983785001126, + "grad_norm": 1.2033405303955078, + "learning_rate": 6.64328447654472e-08, + "loss": 0.3385, + "step": 47436 + }, + { + "epoch": 0.9496183970172409, + "grad_norm": 1.1522350311279297, + "learning_rate": 6.638018501882892e-08, + "loss": 0.3028, + "step": 47437 + }, + { + "epoch": 0.9496384155343693, + "grad_norm": 1.0734659433364868, + "learning_rate": 6.632754601208601e-08, + "loss": 0.3128, + "step": 47438 + }, + { + "epoch": 0.9496584340514976, + "grad_norm": 1.1102854013442993, + "learning_rate": 6.627492774544109e-08, + "loss": 0.2771, + "step": 47439 + }, + { + "epoch": 0.9496784525686259, + "grad_norm": 1.1066628694534302, + "learning_rate": 6.622233021911451e-08, + "loss": 0.27, + "step": 47440 + }, + { + "epoch": 0.9496984710857543, + "grad_norm": 1.0494986772537231, + "learning_rate": 6.616975343332777e-08, + "loss": 0.2763, + "step": 47441 + }, + { + "epoch": 0.9497184896028826, + "grad_norm": 1.042073130607605, + "learning_rate": 6.611719738830125e-08, + "loss": 0.2892, + "step": 47442 + }, + { + "epoch": 0.949738508120011, + "grad_norm": 1.191631555557251, + "learning_rate": 6.606466208425588e-08, + "loss": 0.3357, + "step": 47443 + }, + { + "epoch": 0.9497585266371393, + "grad_norm": 0.9856221079826355, + "learning_rate": 6.601214752141372e-08, + "loss": 0.2535, + "step": 47444 + }, + { + "epoch": 0.9497785451542677, + "grad_norm": 1.1581382751464844, + "learning_rate": 6.595965369999402e-08, + "loss": 0.2883, + "step": 47445 + }, + { + "epoch": 0.949798563671396, + "grad_norm": 1.0489764213562012, + "learning_rate": 6.590718062021828e-08, + "loss": 0.2408, + "step": 47446 + }, + { + "epoch": 0.9498185821885243, + "grad_norm": 1.0857043266296387, + "learning_rate": 6.585472828230688e-08, + "loss": 0.3159, + "step": 47447 + }, + { + "epoch": 0.9498386007056527, + "grad_norm": 1.1834570169448853, + "learning_rate": 6.580229668648019e-08, + "loss": 0.3263, + "step": 47448 + }, + { + "epoch": 0.949858619222781, + "grad_norm": 1.9255082607269287, + "learning_rate": 6.574988583295916e-08, + "loss": 0.7679, + "step": 47449 + }, + { + "epoch": 0.9498786377399094, + "grad_norm": 1.835647463798523, + "learning_rate": 6.569749572196416e-08, + "loss": 0.717, + "step": 47450 + }, + { + "epoch": 0.9498986562570377, + "grad_norm": 1.0982164144515991, + "learning_rate": 6.564512635371444e-08, + "loss": 0.2749, + "step": 47451 + }, + { + "epoch": 0.9499186747741661, + "grad_norm": 1.0810258388519287, + "learning_rate": 6.559277772843042e-08, + "loss": 0.2805, + "step": 47452 + }, + { + "epoch": 0.9499386932912944, + "grad_norm": 1.0204401016235352, + "learning_rate": 6.554044984633301e-08, + "loss": 0.3058, + "step": 47453 + }, + { + "epoch": 0.9499587118084228, + "grad_norm": 1.0142790079116821, + "learning_rate": 6.548814270764148e-08, + "loss": 0.2662, + "step": 47454 + }, + { + "epoch": 0.9499787303255511, + "grad_norm": 1.1852061748504639, + "learning_rate": 6.543585631257621e-08, + "loss": 0.3042, + "step": 47455 + }, + { + "epoch": 0.9499987488426794, + "grad_norm": 1.051099181175232, + "learning_rate": 6.538359066135591e-08, + "loss": 0.2741, + "step": 47456 + }, + { + "epoch": 0.9500187673598078, + "grad_norm": 1.1167253255844116, + "learning_rate": 6.533134575420152e-08, + "loss": 0.2939, + "step": 47457 + }, + { + "epoch": 0.9500387858769361, + "grad_norm": 1.0512646436691284, + "learning_rate": 6.527912159133177e-08, + "loss": 0.3116, + "step": 47458 + }, + { + "epoch": 0.9500588043940645, + "grad_norm": 1.1049755811691284, + "learning_rate": 6.522691817296756e-08, + "loss": 0.2411, + "step": 47459 + }, + { + "epoch": 0.9500788229111928, + "grad_norm": 1.1186083555221558, + "learning_rate": 6.517473549932651e-08, + "loss": 0.3384, + "step": 47460 + }, + { + "epoch": 0.9500988414283212, + "grad_norm": 1.1002541780471802, + "learning_rate": 6.512257357062902e-08, + "loss": 0.2537, + "step": 47461 + }, + { + "epoch": 0.9501188599454495, + "grad_norm": 1.2104740142822266, + "learning_rate": 6.507043238709487e-08, + "loss": 0.3408, + "step": 47462 + }, + { + "epoch": 0.9501388784625778, + "grad_norm": 1.165095567703247, + "learning_rate": 6.501831194894226e-08, + "loss": 0.2673, + "step": 47463 + }, + { + "epoch": 0.9501588969797062, + "grad_norm": 1.1610400676727295, + "learning_rate": 6.496621225639044e-08, + "loss": 0.307, + "step": 47464 + }, + { + "epoch": 0.9501789154968345, + "grad_norm": 1.0888267755508423, + "learning_rate": 6.491413330965868e-08, + "loss": 0.2996, + "step": 47465 + }, + { + "epoch": 0.9501989340139629, + "grad_norm": 1.1821469068527222, + "learning_rate": 6.486207510896625e-08, + "loss": 0.2736, + "step": 47466 + }, + { + "epoch": 0.9502189525310912, + "grad_norm": 1.2967830896377563, + "learning_rate": 6.481003765453131e-08, + "loss": 0.2979, + "step": 47467 + }, + { + "epoch": 0.9502389710482196, + "grad_norm": 1.0958857536315918, + "learning_rate": 6.475802094657313e-08, + "loss": 0.2843, + "step": 47468 + }, + { + "epoch": 0.9502589895653479, + "grad_norm": 1.0500187873840332, + "learning_rate": 6.470602498530931e-08, + "loss": 0.2881, + "step": 47469 + }, + { + "epoch": 0.9502790080824763, + "grad_norm": 1.0969805717468262, + "learning_rate": 6.465404977096024e-08, + "loss": 0.2672, + "step": 47470 + }, + { + "epoch": 0.9502990265996046, + "grad_norm": 1.0492949485778809, + "learning_rate": 6.460209530374296e-08, + "loss": 0.3419, + "step": 47471 + }, + { + "epoch": 0.9503190451167329, + "grad_norm": 1.3086134195327759, + "learning_rate": 6.455016158387672e-08, + "loss": 0.3229, + "step": 47472 + }, + { + "epoch": 0.9503390636338613, + "grad_norm": 1.1443983316421509, + "learning_rate": 6.449824861157916e-08, + "loss": 0.3138, + "step": 47473 + }, + { + "epoch": 0.9503590821509896, + "grad_norm": 1.1227315664291382, + "learning_rate": 6.444635638706842e-08, + "loss": 0.2755, + "step": 47474 + }, + { + "epoch": 0.950379100668118, + "grad_norm": 1.282123327255249, + "learning_rate": 6.439448491056378e-08, + "loss": 0.2782, + "step": 47475 + }, + { + "epoch": 0.9503991191852463, + "grad_norm": 1.0691250562667847, + "learning_rate": 6.434263418228282e-08, + "loss": 0.2468, + "step": 47476 + }, + { + "epoch": 0.9504191377023747, + "grad_norm": 1.0475013256072998, + "learning_rate": 6.42908042024426e-08, + "loss": 0.2582, + "step": 47477 + }, + { + "epoch": 0.950439156219503, + "grad_norm": 1.3452448844909668, + "learning_rate": 6.423899497126185e-08, + "loss": 0.2885, + "step": 47478 + }, + { + "epoch": 0.9504591747366313, + "grad_norm": 1.1218565702438354, + "learning_rate": 6.418720648895816e-08, + "loss": 0.3358, + "step": 47479 + }, + { + "epoch": 0.9504791932537597, + "grad_norm": 1.1683090925216675, + "learning_rate": 6.413543875574912e-08, + "loss": 0.3301, + "step": 47480 + }, + { + "epoch": 0.950499211770888, + "grad_norm": 1.2894898653030396, + "learning_rate": 6.408369177185292e-08, + "loss": 0.2833, + "step": 47481 + }, + { + "epoch": 0.9505192302880164, + "grad_norm": 1.4660108089447021, + "learning_rate": 6.403196553748603e-08, + "loss": 0.3105, + "step": 47482 + }, + { + "epoch": 0.9505392488051447, + "grad_norm": 1.978385329246521, + "learning_rate": 6.398026005286718e-08, + "loss": 0.7174, + "step": 47483 + }, + { + "epoch": 0.9505592673222731, + "grad_norm": 1.1087702512741089, + "learning_rate": 6.392857531821283e-08, + "loss": 0.2755, + "step": 47484 + }, + { + "epoch": 0.9505792858394014, + "grad_norm": 1.197685956954956, + "learning_rate": 6.387691133374008e-08, + "loss": 0.283, + "step": 47485 + }, + { + "epoch": 0.9505993043565298, + "grad_norm": 1.0720206499099731, + "learning_rate": 6.382526809966704e-08, + "loss": 0.3164, + "step": 47486 + }, + { + "epoch": 0.9506193228736581, + "grad_norm": 1.0955127477645874, + "learning_rate": 6.377364561621024e-08, + "loss": 0.2726, + "step": 47487 + }, + { + "epoch": 0.9506393413907864, + "grad_norm": 1.0044889450073242, + "learning_rate": 6.372204388358671e-08, + "loss": 0.2558, + "step": 47488 + }, + { + "epoch": 0.9506593599079148, + "grad_norm": 1.0726348161697388, + "learning_rate": 6.367046290201351e-08, + "loss": 0.2695, + "step": 47489 + }, + { + "epoch": 0.9506793784250431, + "grad_norm": 1.0793331861495972, + "learning_rate": 6.361890267170712e-08, + "loss": 0.2656, + "step": 47490 + }, + { + "epoch": 0.9506993969421715, + "grad_norm": 1.1438958644866943, + "learning_rate": 6.356736319288459e-08, + "loss": 0.292, + "step": 47491 + }, + { + "epoch": 0.9507194154592998, + "grad_norm": 1.1220351457595825, + "learning_rate": 6.351584446576243e-08, + "loss": 0.2932, + "step": 47492 + }, + { + "epoch": 0.9507394339764282, + "grad_norm": 1.2331584692001343, + "learning_rate": 6.346434649055767e-08, + "loss": 0.2958, + "step": 47493 + }, + { + "epoch": 0.9507594524935565, + "grad_norm": 1.1993659734725952, + "learning_rate": 6.341286926748624e-08, + "loss": 0.2975, + "step": 47494 + }, + { + "epoch": 0.9507794710106848, + "grad_norm": 1.209485650062561, + "learning_rate": 6.336141279676467e-08, + "loss": 0.3093, + "step": 47495 + }, + { + "epoch": 0.9507994895278132, + "grad_norm": 1.8570165634155273, + "learning_rate": 6.330997707860942e-08, + "loss": 0.7218, + "step": 47496 + }, + { + "epoch": 0.9508195080449415, + "grad_norm": 1.0852059125900269, + "learning_rate": 6.325856211323644e-08, + "loss": 0.3016, + "step": 47497 + }, + { + "epoch": 0.9508395265620699, + "grad_norm": 1.097345232963562, + "learning_rate": 6.320716790086224e-08, + "loss": 0.2814, + "step": 47498 + }, + { + "epoch": 0.9508595450791982, + "grad_norm": 1.0398201942443848, + "learning_rate": 6.315579444170273e-08, + "loss": 0.3089, + "step": 47499 + }, + { + "epoch": 0.9508795635963266, + "grad_norm": 1.0316208600997925, + "learning_rate": 6.310444173597386e-08, + "loss": 0.2961, + "step": 47500 + }, + { + "epoch": 0.9508995821134549, + "grad_norm": 1.1471707820892334, + "learning_rate": 6.305310978389156e-08, + "loss": 0.3111, + "step": 47501 + }, + { + "epoch": 0.9509196006305833, + "grad_norm": 1.7903530597686768, + "learning_rate": 6.300179858567124e-08, + "loss": 0.7271, + "step": 47502 + }, + { + "epoch": 0.9509396191477116, + "grad_norm": 1.1375516653060913, + "learning_rate": 6.295050814152937e-08, + "loss": 0.3242, + "step": 47503 + }, + { + "epoch": 0.9509596376648399, + "grad_norm": 1.1604377031326294, + "learning_rate": 6.289923845168077e-08, + "loss": 0.2755, + "step": 47504 + }, + { + "epoch": 0.9509796561819683, + "grad_norm": 1.0825802087783813, + "learning_rate": 6.28479895163414e-08, + "loss": 0.29, + "step": 47505 + }, + { + "epoch": 0.9509996746990966, + "grad_norm": 1.116461157798767, + "learning_rate": 6.279676133572665e-08, + "loss": 0.2844, + "step": 47506 + }, + { + "epoch": 0.951019693216225, + "grad_norm": 1.0995166301727295, + "learning_rate": 6.274555391005189e-08, + "loss": 0.3136, + "step": 47507 + }, + { + "epoch": 0.9510397117333533, + "grad_norm": 1.4442778825759888, + "learning_rate": 6.269436723953193e-08, + "loss": 0.3239, + "step": 47508 + }, + { + "epoch": 0.9510597302504817, + "grad_norm": 1.1670632362365723, + "learning_rate": 6.264320132438273e-08, + "loss": 0.3373, + "step": 47509 + }, + { + "epoch": 0.95107974876761, + "grad_norm": 1.1842352151870728, + "learning_rate": 6.259205616481856e-08, + "loss": 0.3025, + "step": 47510 + }, + { + "epoch": 0.9510997672847383, + "grad_norm": 1.1749597787857056, + "learning_rate": 6.254093176105481e-08, + "loss": 0.3311, + "step": 47511 + }, + { + "epoch": 0.9511197858018667, + "grad_norm": 1.0809733867645264, + "learning_rate": 6.248982811330684e-08, + "loss": 0.26, + "step": 47512 + }, + { + "epoch": 0.951139804318995, + "grad_norm": 1.1198586225509644, + "learning_rate": 6.243874522178894e-08, + "loss": 0.287, + "step": 47513 + }, + { + "epoch": 0.9511598228361234, + "grad_norm": 1.386350393295288, + "learning_rate": 6.238768308671594e-08, + "loss": 0.2775, + "step": 47514 + }, + { + "epoch": 0.9511798413532517, + "grad_norm": 1.249365210533142, + "learning_rate": 6.233664170830267e-08, + "loss": 0.2676, + "step": 47515 + }, + { + "epoch": 0.9511998598703801, + "grad_norm": 1.0407336950302124, + "learning_rate": 6.228562108676339e-08, + "loss": 0.2523, + "step": 47516 + }, + { + "epoch": 0.9512198783875084, + "grad_norm": 1.0453529357910156, + "learning_rate": 6.223462122231295e-08, + "loss": 0.2731, + "step": 47517 + }, + { + "epoch": 0.9512398969046368, + "grad_norm": 1.140432596206665, + "learning_rate": 6.218364211516559e-08, + "loss": 0.3154, + "step": 47518 + }, + { + "epoch": 0.9512599154217651, + "grad_norm": 1.0357441902160645, + "learning_rate": 6.213268376553506e-08, + "loss": 0.2986, + "step": 47519 + }, + { + "epoch": 0.9512799339388934, + "grad_norm": 1.05140221118927, + "learning_rate": 6.208174617363672e-08, + "loss": 0.2472, + "step": 47520 + }, + { + "epoch": 0.9512999524560218, + "grad_norm": 1.3159438371658325, + "learning_rate": 6.203082933968374e-08, + "loss": 0.3342, + "step": 47521 + }, + { + "epoch": 0.9513199709731501, + "grad_norm": 1.056949496269226, + "learning_rate": 6.19799332638904e-08, + "loss": 0.2995, + "step": 47522 + }, + { + "epoch": 0.9513399894902785, + "grad_norm": 1.2322825193405151, + "learning_rate": 6.192905794647041e-08, + "loss": 0.265, + "step": 47523 + }, + { + "epoch": 0.9513600080074068, + "grad_norm": 1.1804068088531494, + "learning_rate": 6.187820338763861e-08, + "loss": 0.2901, + "step": 47524 + }, + { + "epoch": 0.9513800265245352, + "grad_norm": 1.0789471864700317, + "learning_rate": 6.182736958760816e-08, + "loss": 0.2673, + "step": 47525 + }, + { + "epoch": 0.9514000450416635, + "grad_norm": 1.0491697788238525, + "learning_rate": 6.177655654659276e-08, + "loss": 0.2524, + "step": 47526 + }, + { + "epoch": 0.9514200635587918, + "grad_norm": 1.794939637184143, + "learning_rate": 6.172576426480559e-08, + "loss": 0.7533, + "step": 47527 + }, + { + "epoch": 0.9514400820759202, + "grad_norm": 1.2323061227798462, + "learning_rate": 6.167499274246092e-08, + "loss": 0.3037, + "step": 47528 + }, + { + "epoch": 0.9514601005930485, + "grad_norm": 1.050787329673767, + "learning_rate": 6.162424197977135e-08, + "loss": 0.2887, + "step": 47529 + }, + { + "epoch": 0.9514801191101769, + "grad_norm": 1.148905873298645, + "learning_rate": 6.157351197695116e-08, + "loss": 0.2991, + "step": 47530 + }, + { + "epoch": 0.9515001376273052, + "grad_norm": 1.1598128080368042, + "learning_rate": 6.152280273421296e-08, + "loss": 0.2933, + "step": 47531 + }, + { + "epoch": 0.9515201561444336, + "grad_norm": 0.9826052188873291, + "learning_rate": 6.147211425177047e-08, + "loss": 0.2778, + "step": 47532 + }, + { + "epoch": 0.9515401746615619, + "grad_norm": 1.0981471538543701, + "learning_rate": 6.142144652983628e-08, + "loss": 0.3214, + "step": 47533 + }, + { + "epoch": 0.9515601931786903, + "grad_norm": 1.1966861486434937, + "learning_rate": 6.137079956862302e-08, + "loss": 0.3016, + "step": 47534 + }, + { + "epoch": 0.9515802116958186, + "grad_norm": 1.0583455562591553, + "learning_rate": 6.132017336834494e-08, + "loss": 0.2785, + "step": 47535 + }, + { + "epoch": 0.9516002302129469, + "grad_norm": 1.0694438219070435, + "learning_rate": 6.126956792921302e-08, + "loss": 0.2642, + "step": 47536 + }, + { + "epoch": 0.9516202487300753, + "grad_norm": 1.1243910789489746, + "learning_rate": 6.121898325144204e-08, + "loss": 0.2614, + "step": 47537 + }, + { + "epoch": 0.9516402672472036, + "grad_norm": 1.1849641799926758, + "learning_rate": 6.116841933524298e-08, + "loss": 0.3516, + "step": 47538 + }, + { + "epoch": 0.951660285764332, + "grad_norm": 1.9422043561935425, + "learning_rate": 6.111787618082954e-08, + "loss": 0.773, + "step": 47539 + }, + { + "epoch": 0.9516803042814603, + "grad_norm": 1.906715750694275, + "learning_rate": 6.106735378841322e-08, + "loss": 0.7415, + "step": 47540 + }, + { + "epoch": 0.9517003227985887, + "grad_norm": 1.1386257410049438, + "learning_rate": 6.101685215820719e-08, + "loss": 0.3138, + "step": 47541 + }, + { + "epoch": 0.951720341315717, + "grad_norm": 1.1810531616210938, + "learning_rate": 6.096637129042348e-08, + "loss": 0.3005, + "step": 47542 + }, + { + "epoch": 0.9517403598328453, + "grad_norm": 1.13753342628479, + "learning_rate": 6.091591118527417e-08, + "loss": 0.3028, + "step": 47543 + }, + { + "epoch": 0.9517603783499737, + "grad_norm": 0.9731810092926025, + "learning_rate": 6.086547184297132e-08, + "loss": 0.2703, + "step": 47544 + }, + { + "epoch": 0.951780396867102, + "grad_norm": 0.9127434492111206, + "learning_rate": 6.081505326372694e-08, + "loss": 0.2767, + "step": 47545 + }, + { + "epoch": 0.9518004153842304, + "grad_norm": 1.0702199935913086, + "learning_rate": 6.076465544775367e-08, + "loss": 0.2853, + "step": 47546 + }, + { + "epoch": 0.9518204339013587, + "grad_norm": 1.8096587657928467, + "learning_rate": 6.071427839526189e-08, + "loss": 0.7891, + "step": 47547 + }, + { + "epoch": 0.9518404524184871, + "grad_norm": 1.13643217086792, + "learning_rate": 6.066392210646532e-08, + "loss": 0.3079, + "step": 47548 + }, + { + "epoch": 0.9518604709356154, + "grad_norm": 1.0633745193481445, + "learning_rate": 6.061358658157434e-08, + "loss": 0.2631, + "step": 47549 + }, + { + "epoch": 0.9518804894527438, + "grad_norm": 1.1304007768630981, + "learning_rate": 6.056327182080101e-08, + "loss": 0.2838, + "step": 47550 + }, + { + "epoch": 0.9519005079698721, + "grad_norm": 1.0614047050476074, + "learning_rate": 6.051297782435628e-08, + "loss": 0.2617, + "step": 47551 + }, + { + "epoch": 0.9519205264870004, + "grad_norm": 1.097947597503662, + "learning_rate": 6.046270459245274e-08, + "loss": 0.3381, + "step": 47552 + }, + { + "epoch": 0.9519405450041288, + "grad_norm": 1.079525351524353, + "learning_rate": 6.041245212530023e-08, + "loss": 0.2665, + "step": 47553 + }, + { + "epoch": 0.9519605635212571, + "grad_norm": 1.0755647420883179, + "learning_rate": 6.036222042311135e-08, + "loss": 0.2586, + "step": 47554 + }, + { + "epoch": 0.9519805820383855, + "grad_norm": 1.1310237646102905, + "learning_rate": 6.03120094860965e-08, + "loss": 0.3176, + "step": 47555 + }, + { + "epoch": 0.9520006005555138, + "grad_norm": 1.183484673500061, + "learning_rate": 6.026181931446661e-08, + "loss": 0.3048, + "step": 47556 + }, + { + "epoch": 0.9520206190726422, + "grad_norm": 1.0450587272644043, + "learning_rate": 6.02116499084332e-08, + "loss": 0.2814, + "step": 47557 + }, + { + "epoch": 0.9520406375897705, + "grad_norm": 1.063481092453003, + "learning_rate": 6.01615012682072e-08, + "loss": 0.2618, + "step": 47558 + }, + { + "epoch": 0.9520606561068988, + "grad_norm": 1.0306241512298584, + "learning_rate": 6.011137339399897e-08, + "loss": 0.2654, + "step": 47559 + }, + { + "epoch": 0.9520806746240272, + "grad_norm": 1.144608497619629, + "learning_rate": 6.006126628601894e-08, + "loss": 0.2926, + "step": 47560 + }, + { + "epoch": 0.9521006931411555, + "grad_norm": 1.058989405632019, + "learning_rate": 6.001117994447913e-08, + "loss": 0.2911, + "step": 47561 + }, + { + "epoch": 0.9521207116582839, + "grad_norm": 1.0584890842437744, + "learning_rate": 5.996111436958829e-08, + "loss": 0.2821, + "step": 47562 + }, + { + "epoch": 0.9521407301754122, + "grad_norm": 1.8182190656661987, + "learning_rate": 5.991106956155845e-08, + "loss": 0.772, + "step": 47563 + }, + { + "epoch": 0.9521607486925406, + "grad_norm": 1.157728672027588, + "learning_rate": 5.986104552059946e-08, + "loss": 0.3065, + "step": 47564 + }, + { + "epoch": 0.9521807672096689, + "grad_norm": 1.2274956703186035, + "learning_rate": 5.98110422469217e-08, + "loss": 0.3567, + "step": 47565 + }, + { + "epoch": 0.9522007857267973, + "grad_norm": 2.038994550704956, + "learning_rate": 5.976105974073499e-08, + "loss": 0.7659, + "step": 47566 + }, + { + "epoch": 0.9522208042439256, + "grad_norm": 1.9851622581481934, + "learning_rate": 5.971109800224917e-08, + "loss": 0.748, + "step": 47567 + }, + { + "epoch": 0.9522408227610539, + "grad_norm": 1.099266529083252, + "learning_rate": 5.966115703167519e-08, + "loss": 0.2644, + "step": 47568 + }, + { + "epoch": 0.9522608412781823, + "grad_norm": 1.210937261581421, + "learning_rate": 5.961123682922232e-08, + "loss": 0.2685, + "step": 47569 + }, + { + "epoch": 0.9522808597953106, + "grad_norm": 1.1586227416992188, + "learning_rate": 5.956133739510095e-08, + "loss": 0.3078, + "step": 47570 + }, + { + "epoch": 0.952300878312439, + "grad_norm": 1.1851788759231567, + "learning_rate": 5.95114587295198e-08, + "loss": 0.2918, + "step": 47571 + }, + { + "epoch": 0.9523208968295673, + "grad_norm": 1.107500672340393, + "learning_rate": 5.946160083268981e-08, + "loss": 0.2821, + "step": 47572 + }, + { + "epoch": 0.9523409153466957, + "grad_norm": 1.1796621084213257, + "learning_rate": 5.941176370481971e-08, + "loss": 0.2898, + "step": 47573 + }, + { + "epoch": 0.952360933863824, + "grad_norm": 1.2002713680267334, + "learning_rate": 5.936194734611933e-08, + "loss": 0.2994, + "step": 47574 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 1.252072811126709, + "learning_rate": 5.9312151756798495e-08, + "loss": 0.276, + "step": 47575 + }, + { + "epoch": 0.9524009708980807, + "grad_norm": 1.2832190990447998, + "learning_rate": 5.926237693706594e-08, + "loss": 0.2685, + "step": 47576 + }, + { + "epoch": 0.952420989415209, + "grad_norm": 1.1880404949188232, + "learning_rate": 5.921262288713092e-08, + "loss": 0.3701, + "step": 47577 + }, + { + "epoch": 0.9524410079323374, + "grad_norm": 1.090000033378601, + "learning_rate": 5.916288960720329e-08, + "loss": 0.3136, + "step": 47578 + }, + { + "epoch": 0.9524610264494657, + "grad_norm": 1.2795990705490112, + "learning_rate": 5.9113177097491204e-08, + "loss": 0.2731, + "step": 47579 + }, + { + "epoch": 0.9524810449665941, + "grad_norm": 1.1267520189285278, + "learning_rate": 5.9063485358203943e-08, + "loss": 0.3043, + "step": 47580 + }, + { + "epoch": 0.9525010634837224, + "grad_norm": 1.0676449537277222, + "learning_rate": 5.9013814389550785e-08, + "loss": 0.3291, + "step": 47581 + }, + { + "epoch": 0.9525210820008508, + "grad_norm": 1.0540218353271484, + "learning_rate": 5.8964164191739894e-08, + "loss": 0.2738, + "step": 47582 + }, + { + "epoch": 0.9525411005179791, + "grad_norm": 1.2047016620635986, + "learning_rate": 5.891453476498055e-08, + "loss": 0.3257, + "step": 47583 + }, + { + "epoch": 0.9525611190351074, + "grad_norm": 1.2401559352874756, + "learning_rate": 5.8864926109480915e-08, + "loss": 0.2511, + "step": 47584 + }, + { + "epoch": 0.9525811375522358, + "grad_norm": 1.3189163208007812, + "learning_rate": 5.881533822545027e-08, + "loss": 0.2547, + "step": 47585 + }, + { + "epoch": 0.9526011560693641, + "grad_norm": 1.125735878944397, + "learning_rate": 5.8765771113096224e-08, + "loss": 0.3067, + "step": 47586 + }, + { + "epoch": 0.9526211745864925, + "grad_norm": 1.089719533920288, + "learning_rate": 5.871622477262751e-08, + "loss": 0.2491, + "step": 47587 + }, + { + "epoch": 0.9526411931036208, + "grad_norm": 0.9623180031776428, + "learning_rate": 5.866669920425283e-08, + "loss": 0.2912, + "step": 47588 + }, + { + "epoch": 0.9526612116207492, + "grad_norm": 1.1101444959640503, + "learning_rate": 5.861719440817926e-08, + "loss": 0.3118, + "step": 47589 + }, + { + "epoch": 0.9526812301378775, + "grad_norm": 1.118778944015503, + "learning_rate": 5.856771038461606e-08, + "loss": 0.3125, + "step": 47590 + }, + { + "epoch": 0.9527012486550058, + "grad_norm": 1.034368872642517, + "learning_rate": 5.851824713377086e-08, + "loss": 0.2746, + "step": 47591 + }, + { + "epoch": 0.9527212671721342, + "grad_norm": 1.8340595960617065, + "learning_rate": 5.8468804655851254e-08, + "loss": 0.747, + "step": 47592 + }, + { + "epoch": 0.9527412856892625, + "grad_norm": 1.1556028127670288, + "learning_rate": 5.841938295106542e-08, + "loss": 0.2704, + "step": 47593 + }, + { + "epoch": 0.9527613042063909, + "grad_norm": 1.2416672706604004, + "learning_rate": 5.836998201962152e-08, + "loss": 0.3251, + "step": 47594 + }, + { + "epoch": 0.9527813227235192, + "grad_norm": 1.0104708671569824, + "learning_rate": 5.8320601861726056e-08, + "loss": 0.2554, + "step": 47595 + }, + { + "epoch": 0.9528013412406476, + "grad_norm": 1.0504711866378784, + "learning_rate": 5.827124247758775e-08, + "loss": 0.2594, + "step": 47596 + }, + { + "epoch": 0.9528213597577759, + "grad_norm": 1.0546739101409912, + "learning_rate": 5.822190386741367e-08, + "loss": 0.27, + "step": 47597 + }, + { + "epoch": 0.9528413782749043, + "grad_norm": 1.106290578842163, + "learning_rate": 5.817258603141085e-08, + "loss": 0.3137, + "step": 47598 + }, + { + "epoch": 0.9528613967920326, + "grad_norm": 0.9822888374328613, + "learning_rate": 5.812328896978692e-08, + "loss": 0.2845, + "step": 47599 + }, + { + "epoch": 0.9528814153091609, + "grad_norm": 2.0137698650360107, + "learning_rate": 5.807401268275004e-08, + "loss": 0.7303, + "step": 47600 + }, + { + "epoch": 0.9529014338262893, + "grad_norm": 1.9047304391860962, + "learning_rate": 5.8024757170506154e-08, + "loss": 0.7649, + "step": 47601 + }, + { + "epoch": 0.9529214523434176, + "grad_norm": 1.1459193229675293, + "learning_rate": 5.797552243326177e-08, + "loss": 0.2865, + "step": 47602 + }, + { + "epoch": 0.952941470860546, + "grad_norm": 1.5553152561187744, + "learning_rate": 5.79263084712256e-08, + "loss": 0.3048, + "step": 47603 + }, + { + "epoch": 0.9529614893776743, + "grad_norm": 2.0143723487854004, + "learning_rate": 5.78771152846036e-08, + "loss": 0.7459, + "step": 47604 + }, + { + "epoch": 0.9529815078948027, + "grad_norm": 1.20638108253479, + "learning_rate": 5.7827942873602824e-08, + "loss": 0.3042, + "step": 47605 + }, + { + "epoch": 0.953001526411931, + "grad_norm": 1.0085813999176025, + "learning_rate": 5.777879123842922e-08, + "loss": 0.2502, + "step": 47606 + }, + { + "epoch": 0.9530215449290593, + "grad_norm": 1.1837151050567627, + "learning_rate": 5.772966037929095e-08, + "loss": 0.3063, + "step": 47607 + }, + { + "epoch": 0.9530415634461877, + "grad_norm": 1.4218449592590332, + "learning_rate": 5.7680550296392857e-08, + "loss": 0.3223, + "step": 47608 + }, + { + "epoch": 0.953061581963316, + "grad_norm": 0.9728146195411682, + "learning_rate": 5.763146098994254e-08, + "loss": 0.2287, + "step": 47609 + }, + { + "epoch": 0.9530816004804444, + "grad_norm": 1.0882512331008911, + "learning_rate": 5.7582392460145406e-08, + "loss": 0.2966, + "step": 47610 + }, + { + "epoch": 0.9531016189975727, + "grad_norm": 1.1719856262207031, + "learning_rate": 5.753334470720851e-08, + "loss": 0.3586, + "step": 47611 + }, + { + "epoch": 0.9531216375147011, + "grad_norm": 1.061241865158081, + "learning_rate": 5.748431773133778e-08, + "loss": 0.2528, + "step": 47612 + }, + { + "epoch": 0.9531416560318294, + "grad_norm": 1.2930090427398682, + "learning_rate": 5.743531153273918e-08, + "loss": 0.3019, + "step": 47613 + }, + { + "epoch": 0.9531616745489578, + "grad_norm": 1.117423415184021, + "learning_rate": 5.738632611161921e-08, + "loss": 0.2805, + "step": 47614 + }, + { + "epoch": 0.9531816930660861, + "grad_norm": 1.2002973556518555, + "learning_rate": 5.733736146818325e-08, + "loss": 0.2721, + "step": 47615 + }, + { + "epoch": 0.9532017115832144, + "grad_norm": 1.4271039962768555, + "learning_rate": 5.728841760263782e-08, + "loss": 0.317, + "step": 47616 + }, + { + "epoch": 0.9532217301003428, + "grad_norm": 1.0912340879440308, + "learning_rate": 5.723949451518773e-08, + "loss": 0.2896, + "step": 47617 + }, + { + "epoch": 0.9532417486174711, + "grad_norm": 1.1653114557266235, + "learning_rate": 5.719059220603951e-08, + "loss": 0.2589, + "step": 47618 + }, + { + "epoch": 0.9532617671345995, + "grad_norm": 1.0553823709487915, + "learning_rate": 5.7141710675397976e-08, + "loss": 0.2823, + "step": 47619 + }, + { + "epoch": 0.9532817856517278, + "grad_norm": 1.0998883247375488, + "learning_rate": 5.709284992346908e-08, + "loss": 0.2916, + "step": 47620 + }, + { + "epoch": 0.9533018041688562, + "grad_norm": 1.0400949716567993, + "learning_rate": 5.704400995045822e-08, + "loss": 0.2842, + "step": 47621 + }, + { + "epoch": 0.9533218226859845, + "grad_norm": 1.10109543800354, + "learning_rate": 5.699519075657023e-08, + "loss": 0.3111, + "step": 47622 + }, + { + "epoch": 0.9533418412031128, + "grad_norm": 1.0821559429168701, + "learning_rate": 5.6946392342011044e-08, + "loss": 0.2943, + "step": 47623 + }, + { + "epoch": 0.9533618597202412, + "grad_norm": 1.0209927558898926, + "learning_rate": 5.689761470698551e-08, + "loss": 0.3024, + "step": 47624 + }, + { + "epoch": 0.9533818782373695, + "grad_norm": 1.169171929359436, + "learning_rate": 5.6848857851697914e-08, + "loss": 0.2901, + "step": 47625 + }, + { + "epoch": 0.9534018967544979, + "grad_norm": 1.1501926183700562, + "learning_rate": 5.6800121776354746e-08, + "loss": 0.3198, + "step": 47626 + }, + { + "epoch": 0.9534219152716262, + "grad_norm": 1.1491779088974, + "learning_rate": 5.675140648116029e-08, + "loss": 0.3311, + "step": 47627 + }, + { + "epoch": 0.9534419337887546, + "grad_norm": 1.0990207195281982, + "learning_rate": 5.6702711966318267e-08, + "loss": 0.3586, + "step": 47628 + }, + { + "epoch": 0.9534619523058829, + "grad_norm": 1.3561326265335083, + "learning_rate": 5.6654038232034635e-08, + "loss": 0.2268, + "step": 47629 + }, + { + "epoch": 0.9534819708230113, + "grad_norm": 2.2948484420776367, + "learning_rate": 5.660538527851367e-08, + "loss": 0.2756, + "step": 47630 + }, + { + "epoch": 0.9535019893401396, + "grad_norm": 1.879265308380127, + "learning_rate": 5.655675310596021e-08, + "loss": 0.7618, + "step": 47631 + }, + { + "epoch": 0.9535220078572679, + "grad_norm": 1.0515559911727905, + "learning_rate": 5.6508141714577966e-08, + "loss": 0.3179, + "step": 47632 + }, + { + "epoch": 0.9535420263743963, + "grad_norm": 1.8932723999023438, + "learning_rate": 5.645955110457124e-08, + "loss": 0.7533, + "step": 47633 + }, + { + "epoch": 0.9535620448915246, + "grad_norm": 1.2500410079956055, + "learning_rate": 5.641098127614486e-08, + "loss": 0.333, + "step": 47634 + }, + { + "epoch": 0.953582063408653, + "grad_norm": 1.250877022743225, + "learning_rate": 5.636243222950366e-08, + "loss": 0.2779, + "step": 47635 + }, + { + "epoch": 0.9536020819257813, + "grad_norm": 1.182557463645935, + "learning_rate": 5.6313903964850257e-08, + "loss": 0.303, + "step": 47636 + }, + { + "epoch": 0.9536221004429097, + "grad_norm": 1.2852396965026855, + "learning_rate": 5.626539648238893e-08, + "loss": 0.3416, + "step": 47637 + }, + { + "epoch": 0.953642118960038, + "grad_norm": 1.1765199899673462, + "learning_rate": 5.621690978232397e-08, + "loss": 0.3194, + "step": 47638 + }, + { + "epoch": 0.9536621374771663, + "grad_norm": 1.2924083471298218, + "learning_rate": 5.61684438648602e-08, + "loss": 0.3282, + "step": 47639 + }, + { + "epoch": 0.9536821559942947, + "grad_norm": 1.0892773866653442, + "learning_rate": 5.611999873019969e-08, + "loss": 0.2496, + "step": 47640 + }, + { + "epoch": 0.953702174511423, + "grad_norm": 1.0413953065872192, + "learning_rate": 5.607157437854671e-08, + "loss": 0.2892, + "step": 47641 + }, + { + "epoch": 0.9537221930285514, + "grad_norm": 1.0980992317199707, + "learning_rate": 5.6023170810105e-08, + "loss": 0.2677, + "step": 47642 + }, + { + "epoch": 0.9537422115456797, + "grad_norm": 1.096090316772461, + "learning_rate": 5.597478802507827e-08, + "loss": 0.2909, + "step": 47643 + }, + { + "epoch": 0.9537622300628081, + "grad_norm": 1.443550944328308, + "learning_rate": 5.5926426023669155e-08, + "loss": 0.304, + "step": 47644 + }, + { + "epoch": 0.9537822485799364, + "grad_norm": 1.1882113218307495, + "learning_rate": 5.5878084806081365e-08, + "loss": 0.3071, + "step": 47645 + }, + { + "epoch": 0.9538022670970648, + "grad_norm": 0.9889510869979858, + "learning_rate": 5.582976437251808e-08, + "loss": 0.2758, + "step": 47646 + }, + { + "epoch": 0.9538222856141931, + "grad_norm": 1.4786726236343384, + "learning_rate": 5.578146472318302e-08, + "loss": 0.3015, + "step": 47647 + }, + { + "epoch": 0.9538423041313214, + "grad_norm": 1.1896439790725708, + "learning_rate": 5.573318585827825e-08, + "loss": 0.3316, + "step": 47648 + }, + { + "epoch": 0.9538623226484498, + "grad_norm": 1.2284806966781616, + "learning_rate": 5.5684927778006934e-08, + "loss": 0.2943, + "step": 47649 + }, + { + "epoch": 0.9538823411655781, + "grad_norm": 1.1755211353302002, + "learning_rate": 5.563669048257225e-08, + "loss": 0.2804, + "step": 47650 + }, + { + "epoch": 0.9539023596827065, + "grad_norm": 1.2202435731887817, + "learning_rate": 5.5588473972176814e-08, + "loss": 0.2617, + "step": 47651 + }, + { + "epoch": 0.9539223781998348, + "grad_norm": 1.05641508102417, + "learning_rate": 5.5540278247023794e-08, + "loss": 0.2662, + "step": 47652 + }, + { + "epoch": 0.9539423967169632, + "grad_norm": 1.1400748491287231, + "learning_rate": 5.549210330731525e-08, + "loss": 0.2912, + "step": 47653 + }, + { + "epoch": 0.9539624152340915, + "grad_norm": 1.194610834121704, + "learning_rate": 5.5443949153253794e-08, + "loss": 0.3214, + "step": 47654 + }, + { + "epoch": 0.9539824337512198, + "grad_norm": 1.8989205360412598, + "learning_rate": 5.53958157850415e-08, + "loss": 0.7528, + "step": 47655 + }, + { + "epoch": 0.9540024522683482, + "grad_norm": 1.9172934293746948, + "learning_rate": 5.5347703202882074e-08, + "loss": 0.7447, + "step": 47656 + }, + { + "epoch": 0.9540224707854765, + "grad_norm": 1.111065149307251, + "learning_rate": 5.529961140697593e-08, + "loss": 0.268, + "step": 47657 + }, + { + "epoch": 0.9540424893026049, + "grad_norm": 1.0394493341445923, + "learning_rate": 5.525154039752678e-08, + "loss": 0.3046, + "step": 47658 + }, + { + "epoch": 0.9540625078197332, + "grad_norm": 1.8707458972930908, + "learning_rate": 5.5203490174735584e-08, + "loss": 0.7671, + "step": 47659 + }, + { + "epoch": 0.9540825263368616, + "grad_norm": 1.1456862688064575, + "learning_rate": 5.51554607388044e-08, + "loss": 0.3219, + "step": 47660 + }, + { + "epoch": 0.9541025448539899, + "grad_norm": 1.991039752960205, + "learning_rate": 5.51074520899364e-08, + "loss": 0.7504, + "step": 47661 + }, + { + "epoch": 0.9541225633711183, + "grad_norm": 1.2033292055130005, + "learning_rate": 5.505946422833197e-08, + "loss": 0.3235, + "step": 47662 + }, + { + "epoch": 0.9541425818882466, + "grad_norm": 1.0100139379501343, + "learning_rate": 5.501149715419318e-08, + "loss": 0.2658, + "step": 47663 + }, + { + "epoch": 0.9541626004053749, + "grad_norm": 1.9923135042190552, + "learning_rate": 5.4963550867722095e-08, + "loss": 0.7114, + "step": 47664 + }, + { + "epoch": 0.9541826189225033, + "grad_norm": 1.0630794763565063, + "learning_rate": 5.4915625369120206e-08, + "loss": 0.2633, + "step": 47665 + }, + { + "epoch": 0.9542026374396316, + "grad_norm": 1.0454199314117432, + "learning_rate": 5.486772065858903e-08, + "loss": 0.2523, + "step": 47666 + }, + { + "epoch": 0.95422265595676, + "grad_norm": 1.1296510696411133, + "learning_rate": 5.481983673632951e-08, + "loss": 0.3298, + "step": 47667 + }, + { + "epoch": 0.9542426744738883, + "grad_norm": 1.1303086280822754, + "learning_rate": 5.477197360254261e-08, + "loss": 0.2663, + "step": 47668 + }, + { + "epoch": 0.9542626929910167, + "grad_norm": 1.9473859071731567, + "learning_rate": 5.472413125743092e-08, + "loss": 0.7448, + "step": 47669 + }, + { + "epoch": 0.954282711508145, + "grad_norm": 0.9943922162055969, + "learning_rate": 5.4676309701194866e-08, + "loss": 0.2697, + "step": 47670 + }, + { + "epoch": 0.9543027300252733, + "grad_norm": 1.1665873527526855, + "learning_rate": 5.462850893403482e-08, + "loss": 0.2401, + "step": 47671 + }, + { + "epoch": 0.9543227485424017, + "grad_norm": 1.072697639465332, + "learning_rate": 5.458072895615285e-08, + "loss": 0.2885, + "step": 47672 + }, + { + "epoch": 0.95434276705953, + "grad_norm": 0.9959391355514526, + "learning_rate": 5.4532969767748244e-08, + "loss": 0.2723, + "step": 47673 + }, + { + "epoch": 0.9543627855766584, + "grad_norm": 1.3406280279159546, + "learning_rate": 5.4485231369023614e-08, + "loss": 0.2804, + "step": 47674 + }, + { + "epoch": 0.9543828040937867, + "grad_norm": 1.2233505249023438, + "learning_rate": 5.443751376017825e-08, + "loss": 0.2806, + "step": 47675 + }, + { + "epoch": 0.9544028226109151, + "grad_norm": 1.116141438484192, + "learning_rate": 5.438981694141365e-08, + "loss": 0.3089, + "step": 47676 + }, + { + "epoch": 0.9544228411280434, + "grad_norm": 1.9452131986618042, + "learning_rate": 5.4342140912929665e-08, + "loss": 0.7639, + "step": 47677 + }, + { + "epoch": 0.9544428596451718, + "grad_norm": 1.050639033317566, + "learning_rate": 5.429448567492723e-08, + "loss": 0.2742, + "step": 47678 + }, + { + "epoch": 0.9544628781623001, + "grad_norm": 1.1655665636062622, + "learning_rate": 5.42468512276062e-08, + "loss": 0.2868, + "step": 47679 + }, + { + "epoch": 0.9544828966794284, + "grad_norm": 1.2529598474502563, + "learning_rate": 5.419923757116752e-08, + "loss": 0.2727, + "step": 47680 + }, + { + "epoch": 0.9545029151965568, + "grad_norm": 1.2767369747161865, + "learning_rate": 5.415164470581102e-08, + "loss": 0.2947, + "step": 47681 + }, + { + "epoch": 0.9545229337136851, + "grad_norm": 1.010335087776184, + "learning_rate": 5.4104072631736e-08, + "loss": 0.2779, + "step": 47682 + }, + { + "epoch": 0.9545429522308135, + "grad_norm": 1.0959423780441284, + "learning_rate": 5.405652134914341e-08, + "loss": 0.2956, + "step": 47683 + }, + { + "epoch": 0.9545629707479418, + "grad_norm": 1.2347450256347656, + "learning_rate": 5.4008990858232524e-08, + "loss": 0.3264, + "step": 47684 + }, + { + "epoch": 0.9545829892650702, + "grad_norm": 1.172698736190796, + "learning_rate": 5.396148115920374e-08, + "loss": 0.3122, + "step": 47685 + }, + { + "epoch": 0.9546030077821985, + "grad_norm": 1.1031485795974731, + "learning_rate": 5.391399225225635e-08, + "loss": 0.3211, + "step": 47686 + }, + { + "epoch": 0.9546230262993268, + "grad_norm": 1.3417404890060425, + "learning_rate": 5.3866524137590195e-08, + "loss": 0.2491, + "step": 47687 + }, + { + "epoch": 0.9546430448164552, + "grad_norm": 1.234938144683838, + "learning_rate": 5.3819076815405106e-08, + "loss": 0.3261, + "step": 47688 + }, + { + "epoch": 0.9546630633335835, + "grad_norm": 1.1517263650894165, + "learning_rate": 5.377165028589981e-08, + "loss": 0.2843, + "step": 47689 + }, + { + "epoch": 0.9546830818507119, + "grad_norm": 1.0169758796691895, + "learning_rate": 5.372424454927416e-08, + "loss": 0.2708, + "step": 47690 + }, + { + "epoch": 0.9547031003678402, + "grad_norm": 1.2664871215820312, + "learning_rate": 5.367685960572688e-08, + "loss": 0.2558, + "step": 47691 + }, + { + "epoch": 0.9547231188849686, + "grad_norm": 1.1705175638198853, + "learning_rate": 5.362949545545837e-08, + "loss": 0.332, + "step": 47692 + }, + { + "epoch": 0.9547431374020969, + "grad_norm": 1.0707029104232788, + "learning_rate": 5.35821520986668e-08, + "loss": 0.2999, + "step": 47693 + }, + { + "epoch": 0.9547631559192253, + "grad_norm": 1.3794984817504883, + "learning_rate": 5.353482953555089e-08, + "loss": 0.3034, + "step": 47694 + }, + { + "epoch": 0.9547831744363536, + "grad_norm": 1.0284641981124878, + "learning_rate": 5.3487527766310496e-08, + "loss": 0.275, + "step": 47695 + }, + { + "epoch": 0.9548031929534819, + "grad_norm": 1.1005380153656006, + "learning_rate": 5.344024679114379e-08, + "loss": 0.3945, + "step": 47696 + }, + { + "epoch": 0.9548232114706103, + "grad_norm": 1.2248351573944092, + "learning_rate": 5.339298661025005e-08, + "loss": 0.3083, + "step": 47697 + }, + { + "epoch": 0.9548432299877386, + "grad_norm": 1.1991158723831177, + "learning_rate": 5.3345747223828015e-08, + "loss": 0.2923, + "step": 47698 + }, + { + "epoch": 0.954863248504867, + "grad_norm": 1.36908757686615, + "learning_rate": 5.329852863207474e-08, + "loss": 0.306, + "step": 47699 + }, + { + "epoch": 0.9548832670219953, + "grad_norm": 1.2597460746765137, + "learning_rate": 5.3251330835191186e-08, + "loss": 0.2797, + "step": 47700 + }, + { + "epoch": 0.9549032855391237, + "grad_norm": 1.1562339067459106, + "learning_rate": 5.320415383337385e-08, + "loss": 0.3038, + "step": 47701 + }, + { + "epoch": 0.954923304056252, + "grad_norm": 1.0979070663452148, + "learning_rate": 5.3156997626821474e-08, + "loss": 0.2791, + "step": 47702 + }, + { + "epoch": 0.9549433225733803, + "grad_norm": 1.1823967695236206, + "learning_rate": 5.310986221573278e-08, + "loss": 0.3151, + "step": 47703 + }, + { + "epoch": 0.9549633410905087, + "grad_norm": 1.1289918422698975, + "learning_rate": 5.3062747600305384e-08, + "loss": 0.3099, + "step": 47704 + }, + { + "epoch": 0.954983359607637, + "grad_norm": 1.1039077043533325, + "learning_rate": 5.301565378073803e-08, + "loss": 0.2802, + "step": 47705 + }, + { + "epoch": 0.9550033781247654, + "grad_norm": 1.0506705045700073, + "learning_rate": 5.2968580757228325e-08, + "loss": 0.2818, + "step": 47706 + }, + { + "epoch": 0.9550233966418937, + "grad_norm": 1.3658326864242554, + "learning_rate": 5.2921528529973895e-08, + "loss": 0.307, + "step": 47707 + }, + { + "epoch": 0.9550434151590221, + "grad_norm": 1.8367186784744263, + "learning_rate": 5.287449709917292e-08, + "loss": 0.7389, + "step": 47708 + }, + { + "epoch": 0.9550634336761504, + "grad_norm": 1.1527258157730103, + "learning_rate": 5.282748646502245e-08, + "loss": 0.3071, + "step": 47709 + }, + { + "epoch": 0.9550834521932788, + "grad_norm": 1.1656486988067627, + "learning_rate": 5.278049662772122e-08, + "loss": 0.2754, + "step": 47710 + }, + { + "epoch": 0.9551034707104071, + "grad_norm": 0.9764650464057922, + "learning_rate": 5.273352758746575e-08, + "loss": 0.2624, + "step": 47711 + }, + { + "epoch": 0.9551234892275354, + "grad_norm": 1.074008584022522, + "learning_rate": 5.26865793444542e-08, + "loss": 0.2907, + "step": 47712 + }, + { + "epoch": 0.9551435077446638, + "grad_norm": 1.2305359840393066, + "learning_rate": 5.2639651898883094e-08, + "loss": 0.2683, + "step": 47713 + }, + { + "epoch": 0.9551635262617921, + "grad_norm": 1.1137831211090088, + "learning_rate": 5.259274525095004e-08, + "loss": 0.3153, + "step": 47714 + }, + { + "epoch": 0.9551835447789205, + "grad_norm": 1.0543570518493652, + "learning_rate": 5.254585940085266e-08, + "loss": 0.321, + "step": 47715 + }, + { + "epoch": 0.9552035632960488, + "grad_norm": 1.1194231510162354, + "learning_rate": 5.249899434878802e-08, + "loss": 0.305, + "step": 47716 + }, + { + "epoch": 0.9552235818131772, + "grad_norm": 1.1283437013626099, + "learning_rate": 5.245215009495264e-08, + "loss": 0.3009, + "step": 47717 + }, + { + "epoch": 0.9552436003303055, + "grad_norm": 1.074021577835083, + "learning_rate": 5.240532663954412e-08, + "loss": 0.2706, + "step": 47718 + }, + { + "epoch": 0.9552636188474338, + "grad_norm": 1.9489437341690063, + "learning_rate": 5.2358523982758426e-08, + "loss": 0.7326, + "step": 47719 + }, + { + "epoch": 0.9552836373645622, + "grad_norm": 1.2012070417404175, + "learning_rate": 5.2311742124793175e-08, + "loss": 0.3072, + "step": 47720 + }, + { + "epoch": 0.9553036558816905, + "grad_norm": 1.1505203247070312, + "learning_rate": 5.2264981065843764e-08, + "loss": 0.2611, + "step": 47721 + }, + { + "epoch": 0.9553236743988189, + "grad_norm": 1.1011468172073364, + "learning_rate": 5.221824080610838e-08, + "loss": 0.2644, + "step": 47722 + }, + { + "epoch": 0.9553436929159472, + "grad_norm": 1.14754056930542, + "learning_rate": 5.21715213457824e-08, + "loss": 0.2764, + "step": 47723 + }, + { + "epoch": 0.9553637114330756, + "grad_norm": 1.0571585893630981, + "learning_rate": 5.212482268506291e-08, + "loss": 0.2833, + "step": 47724 + }, + { + "epoch": 0.9553837299502039, + "grad_norm": 1.9578297138214111, + "learning_rate": 5.207814482414586e-08, + "loss": 0.6965, + "step": 47725 + }, + { + "epoch": 0.9554037484673323, + "grad_norm": 0.9975510239601135, + "learning_rate": 5.2031487763227194e-08, + "loss": 0.2721, + "step": 47726 + }, + { + "epoch": 0.9554237669844606, + "grad_norm": 1.069947361946106, + "learning_rate": 5.198485150250343e-08, + "loss": 0.2544, + "step": 47727 + }, + { + "epoch": 0.9554437855015889, + "grad_norm": 0.9992867112159729, + "learning_rate": 5.1938236042169966e-08, + "loss": 0.2884, + "step": 47728 + }, + { + "epoch": 0.9554638040187173, + "grad_norm": 1.2147024869918823, + "learning_rate": 5.189164138242442e-08, + "loss": 0.3207, + "step": 47729 + }, + { + "epoch": 0.9554838225358456, + "grad_norm": 1.0471794605255127, + "learning_rate": 5.184506752346052e-08, + "loss": 0.3149, + "step": 47730 + }, + { + "epoch": 0.955503841052974, + "grad_norm": 1.007078766822815, + "learning_rate": 5.17985144654759e-08, + "loss": 0.3112, + "step": 47731 + }, + { + "epoch": 0.9555238595701023, + "grad_norm": 1.3591433763504028, + "learning_rate": 5.175198220866484e-08, + "loss": 0.313, + "step": 47732 + }, + { + "epoch": 0.9555438780872307, + "grad_norm": 1.9607008695602417, + "learning_rate": 5.170547075322441e-08, + "loss": 0.8021, + "step": 47733 + }, + { + "epoch": 0.955563896604359, + "grad_norm": 1.0922753810882568, + "learning_rate": 5.1658980099348885e-08, + "loss": 0.2474, + "step": 47734 + }, + { + "epoch": 0.9555839151214873, + "grad_norm": 1.1277230978012085, + "learning_rate": 5.161251024723368e-08, + "loss": 0.2948, + "step": 47735 + }, + { + "epoch": 0.9556039336386157, + "grad_norm": 1.2356541156768799, + "learning_rate": 5.1566061197074745e-08, + "loss": 0.3061, + "step": 47736 + }, + { + "epoch": 0.955623952155744, + "grad_norm": 1.2744982242584229, + "learning_rate": 5.151963294906748e-08, + "loss": 0.2832, + "step": 47737 + }, + { + "epoch": 0.9556439706728724, + "grad_norm": 1.1057127714157104, + "learning_rate": 5.147322550340672e-08, + "loss": 0.3139, + "step": 47738 + }, + { + "epoch": 0.9556639891900007, + "grad_norm": 1.0468605756759644, + "learning_rate": 5.142683886028732e-08, + "loss": 0.3037, + "step": 47739 + }, + { + "epoch": 0.9556840077071291, + "grad_norm": 1.0054970979690552, + "learning_rate": 5.1380473019904675e-08, + "loss": 0.2526, + "step": 47740 + }, + { + "epoch": 0.9557040262242574, + "grad_norm": 1.1060240268707275, + "learning_rate": 5.133412798245363e-08, + "loss": 0.272, + "step": 47741 + }, + { + "epoch": 0.9557240447413858, + "grad_norm": 1.3390696048736572, + "learning_rate": 5.128780374812903e-08, + "loss": 0.3337, + "step": 47742 + }, + { + "epoch": 0.9557440632585141, + "grad_norm": 1.152761697769165, + "learning_rate": 5.124150031712516e-08, + "loss": 0.3256, + "step": 47743 + }, + { + "epoch": 0.9557640817756424, + "grad_norm": 1.2825263738632202, + "learning_rate": 5.119521768963742e-08, + "loss": 0.2654, + "step": 47744 + }, + { + "epoch": 0.9557841002927708, + "grad_norm": 1.114885926246643, + "learning_rate": 5.114895586585955e-08, + "loss": 0.3154, + "step": 47745 + }, + { + "epoch": 0.9558041188098991, + "grad_norm": 1.0178003311157227, + "learning_rate": 5.110271484598695e-08, + "loss": 0.2502, + "step": 47746 + }, + { + "epoch": 0.9558241373270275, + "grad_norm": 1.0355371236801147, + "learning_rate": 5.105649463021334e-08, + "loss": 0.2637, + "step": 47747 + }, + { + "epoch": 0.9558441558441558, + "grad_norm": 1.0530487298965454, + "learning_rate": 5.101029521873302e-08, + "loss": 0.3069, + "step": 47748 + }, + { + "epoch": 0.9558641743612842, + "grad_norm": 0.994594931602478, + "learning_rate": 5.096411661174083e-08, + "loss": 0.2918, + "step": 47749 + }, + { + "epoch": 0.9558841928784125, + "grad_norm": 1.1183127164840698, + "learning_rate": 5.091795880942996e-08, + "loss": 0.3072, + "step": 47750 + }, + { + "epoch": 0.9559042113955408, + "grad_norm": 1.1858282089233398, + "learning_rate": 5.087182181199524e-08, + "loss": 0.2489, + "step": 47751 + }, + { + "epoch": 0.9559242299126692, + "grad_norm": 1.2761237621307373, + "learning_rate": 5.0825705619630406e-08, + "loss": 0.3046, + "step": 47752 + }, + { + "epoch": 0.9559442484297975, + "grad_norm": 1.2163933515548706, + "learning_rate": 5.0779610232529195e-08, + "loss": 0.3493, + "step": 47753 + }, + { + "epoch": 0.9559642669469259, + "grad_norm": 1.0864033699035645, + "learning_rate": 5.073353565088479e-08, + "loss": 0.2852, + "step": 47754 + }, + { + "epoch": 0.9559842854640542, + "grad_norm": 1.1703475713729858, + "learning_rate": 5.068748187489203e-08, + "loss": 0.2525, + "step": 47755 + }, + { + "epoch": 0.9560043039811826, + "grad_norm": 1.0425196886062622, + "learning_rate": 5.0641448904744095e-08, + "loss": 0.2833, + "step": 47756 + }, + { + "epoch": 0.9560243224983109, + "grad_norm": 1.0483592748641968, + "learning_rate": 5.0595436740634165e-08, + "loss": 0.2807, + "step": 47757 + }, + { + "epoch": 0.9560443410154393, + "grad_norm": 1.0658438205718994, + "learning_rate": 5.054944538275597e-08, + "loss": 0.2885, + "step": 47758 + }, + { + "epoch": 0.9560643595325676, + "grad_norm": 1.146392583847046, + "learning_rate": 5.0503474831303246e-08, + "loss": 0.2886, + "step": 47759 + }, + { + "epoch": 0.9560843780496959, + "grad_norm": 0.957477867603302, + "learning_rate": 5.045752508646806e-08, + "loss": 0.2323, + "step": 47760 + }, + { + "epoch": 0.9561043965668243, + "grad_norm": 1.1021968126296997, + "learning_rate": 5.0411596148444706e-08, + "loss": 0.3185, + "step": 47761 + }, + { + "epoch": 0.9561244150839526, + "grad_norm": 1.3329368829727173, + "learning_rate": 5.036568801742525e-08, + "loss": 0.3503, + "step": 47762 + }, + { + "epoch": 0.956144433601081, + "grad_norm": 1.0856670141220093, + "learning_rate": 5.031980069360398e-08, + "loss": 0.3004, + "step": 47763 + }, + { + "epoch": 0.9561644521182093, + "grad_norm": 1.0401169061660767, + "learning_rate": 5.0273934177172965e-08, + "loss": 0.281, + "step": 47764 + }, + { + "epoch": 0.9561844706353377, + "grad_norm": 1.0500715970993042, + "learning_rate": 5.0228088468324834e-08, + "loss": 0.329, + "step": 47765 + }, + { + "epoch": 0.956204489152466, + "grad_norm": 2.0047054290771484, + "learning_rate": 5.018226356725275e-08, + "loss": 0.7273, + "step": 47766 + }, + { + "epoch": 0.9562245076695943, + "grad_norm": 1.3230746984481812, + "learning_rate": 5.0136459474148805e-08, + "loss": 0.3223, + "step": 47767 + }, + { + "epoch": 0.9562445261867227, + "grad_norm": 1.0289525985717773, + "learning_rate": 5.009067618920671e-08, + "loss": 0.2793, + "step": 47768 + }, + { + "epoch": 0.956264544703851, + "grad_norm": 1.0859780311584473, + "learning_rate": 5.0044913712617995e-08, + "loss": 0.3497, + "step": 47769 + }, + { + "epoch": 0.9562845632209794, + "grad_norm": 1.02741539478302, + "learning_rate": 4.999917204457472e-08, + "loss": 0.2809, + "step": 47770 + }, + { + "epoch": 0.9563045817381077, + "grad_norm": 1.1942644119262695, + "learning_rate": 4.9953451185270066e-08, + "loss": 0.2651, + "step": 47771 + }, + { + "epoch": 0.9563246002552361, + "grad_norm": 1.1074631214141846, + "learning_rate": 4.99077511348961e-08, + "loss": 0.2852, + "step": 47772 + }, + { + "epoch": 0.9563446187723644, + "grad_norm": 1.1908798217773438, + "learning_rate": 4.9862071893644336e-08, + "loss": 0.3125, + "step": 47773 + }, + { + "epoch": 0.9563646372894928, + "grad_norm": 1.1120859384536743, + "learning_rate": 4.98164134617074e-08, + "loss": 0.2645, + "step": 47774 + }, + { + "epoch": 0.9563846558066211, + "grad_norm": 1.135130763053894, + "learning_rate": 4.977077583927625e-08, + "loss": 0.324, + "step": 47775 + }, + { + "epoch": 0.9564046743237494, + "grad_norm": 1.1789952516555786, + "learning_rate": 4.9725159026544065e-08, + "loss": 0.2896, + "step": 47776 + }, + { + "epoch": 0.9564246928408778, + "grad_norm": 1.139692783355713, + "learning_rate": 4.967956302370236e-08, + "loss": 0.2977, + "step": 47777 + }, + { + "epoch": 0.9564447113580061, + "grad_norm": 1.128755807876587, + "learning_rate": 4.963398783094153e-08, + "loss": 0.3142, + "step": 47778 + }, + { + "epoch": 0.9564647298751345, + "grad_norm": 1.9020100831985474, + "learning_rate": 4.958843344845477e-08, + "loss": 0.7335, + "step": 47779 + }, + { + "epoch": 0.9564847483922628, + "grad_norm": 1.0733789205551147, + "learning_rate": 4.954289987643246e-08, + "loss": 0.2552, + "step": 47780 + }, + { + "epoch": 0.9565047669093912, + "grad_norm": 1.1832809448242188, + "learning_rate": 4.9497387115066684e-08, + "loss": 0.2902, + "step": 47781 + }, + { + "epoch": 0.9565247854265195, + "grad_norm": 1.1276459693908691, + "learning_rate": 4.94518951645484e-08, + "loss": 0.3477, + "step": 47782 + }, + { + "epoch": 0.9565448039436478, + "grad_norm": 1.3461188077926636, + "learning_rate": 4.940642402506912e-08, + "loss": 0.3183, + "step": 47783 + }, + { + "epoch": 0.9565648224607762, + "grad_norm": 1.3569022417068481, + "learning_rate": 4.93609736968198e-08, + "loss": 0.3217, + "step": 47784 + }, + { + "epoch": 0.9565848409779045, + "grad_norm": 1.2491320371627808, + "learning_rate": 4.931554417999196e-08, + "loss": 0.264, + "step": 47785 + }, + { + "epoch": 0.9566048594950329, + "grad_norm": 1.0855281352996826, + "learning_rate": 4.927013547477544e-08, + "loss": 0.3111, + "step": 47786 + }, + { + "epoch": 0.9566248780121612, + "grad_norm": 1.0857616662979126, + "learning_rate": 4.922474758136231e-08, + "loss": 0.3061, + "step": 47787 + }, + { + "epoch": 0.9566448965292896, + "grad_norm": 1.1238669157028198, + "learning_rate": 4.9179380499942975e-08, + "loss": 0.3008, + "step": 47788 + }, + { + "epoch": 0.9566649150464179, + "grad_norm": 1.126487374305725, + "learning_rate": 4.913403423070784e-08, + "loss": 0.2937, + "step": 47789 + }, + { + "epoch": 0.9566849335635462, + "grad_norm": 1.8537681102752686, + "learning_rate": 4.908870877384786e-08, + "loss": 0.7414, + "step": 47790 + }, + { + "epoch": 0.9567049520806746, + "grad_norm": 1.0515947341918945, + "learning_rate": 4.904340412955344e-08, + "loss": 0.3091, + "step": 47791 + }, + { + "epoch": 0.9567249705978029, + "grad_norm": 1.1031655073165894, + "learning_rate": 4.899812029801554e-08, + "loss": 0.3147, + "step": 47792 + }, + { + "epoch": 0.9567449891149313, + "grad_norm": 2.0275659561157227, + "learning_rate": 4.895285727942345e-08, + "loss": 0.807, + "step": 47793 + }, + { + "epoch": 0.9567650076320596, + "grad_norm": 1.0585241317749023, + "learning_rate": 4.890761507396813e-08, + "loss": 0.3042, + "step": 47794 + }, + { + "epoch": 0.956785026149188, + "grad_norm": 1.2269020080566406, + "learning_rate": 4.8862393681839983e-08, + "loss": 0.3092, + "step": 47795 + }, + { + "epoch": 0.9568050446663163, + "grad_norm": 1.9950116872787476, + "learning_rate": 4.8817193103228857e-08, + "loss": 0.7172, + "step": 47796 + }, + { + "epoch": 0.9568250631834447, + "grad_norm": 1.6904733180999756, + "learning_rate": 4.8772013338324595e-08, + "loss": 0.656, + "step": 47797 + }, + { + "epoch": 0.956845081700573, + "grad_norm": 1.1170986890792847, + "learning_rate": 4.872685438731761e-08, + "loss": 0.2895, + "step": 47798 + }, + { + "epoch": 0.9568651002177013, + "grad_norm": 1.1185424327850342, + "learning_rate": 4.8681716250397194e-08, + "loss": 0.3138, + "step": 47799 + }, + { + "epoch": 0.9568851187348297, + "grad_norm": 1.8825253248214722, + "learning_rate": 4.863659892775319e-08, + "loss": 0.7316, + "step": 47800 + }, + { + "epoch": 0.956905137251958, + "grad_norm": 1.1944859027862549, + "learning_rate": 4.8591502419575444e-08, + "loss": 0.3133, + "step": 47801 + }, + { + "epoch": 0.9569251557690864, + "grad_norm": 1.0773390531539917, + "learning_rate": 4.854642672605325e-08, + "loss": 0.2535, + "step": 47802 + }, + { + "epoch": 0.9569451742862147, + "grad_norm": 1.1695665121078491, + "learning_rate": 4.850137184737702e-08, + "loss": 0.3336, + "step": 47803 + }, + { + "epoch": 0.9569651928033431, + "grad_norm": 1.2515050172805786, + "learning_rate": 4.845633778373437e-08, + "loss": 0.2895, + "step": 47804 + }, + { + "epoch": 0.9569852113204714, + "grad_norm": 1.0979212522506714, + "learning_rate": 4.841132453531627e-08, + "loss": 0.2998, + "step": 47805 + }, + { + "epoch": 0.9570052298375997, + "grad_norm": 1.138648509979248, + "learning_rate": 4.836633210231145e-08, + "loss": 0.2572, + "step": 47806 + }, + { + "epoch": 0.9570252483547281, + "grad_norm": 1.2125004529953003, + "learning_rate": 4.832136048490865e-08, + "loss": 0.3165, + "step": 47807 + }, + { + "epoch": 0.9570452668718564, + "grad_norm": 1.2595518827438354, + "learning_rate": 4.827640968329772e-08, + "loss": 0.2975, + "step": 47808 + }, + { + "epoch": 0.9570652853889848, + "grad_norm": 1.045607089996338, + "learning_rate": 4.823147969766684e-08, + "loss": 0.2874, + "step": 47809 + }, + { + "epoch": 0.9570853039061131, + "grad_norm": 1.1853289604187012, + "learning_rate": 4.81865705282053e-08, + "loss": 0.3146, + "step": 47810 + }, + { + "epoch": 0.9571053224232415, + "grad_norm": 1.1119405031204224, + "learning_rate": 4.814168217510185e-08, + "loss": 0.2606, + "step": 47811 + }, + { + "epoch": 0.9571253409403698, + "grad_norm": 1.9761137962341309, + "learning_rate": 4.809681463854521e-08, + "loss": 0.7476, + "step": 47812 + }, + { + "epoch": 0.9571453594574982, + "grad_norm": 1.3234182596206665, + "learning_rate": 4.805196791872357e-08, + "loss": 0.3123, + "step": 47813 + }, + { + "epoch": 0.9571653779746265, + "grad_norm": 1.0605180263519287, + "learning_rate": 4.800714201582568e-08, + "loss": 0.317, + "step": 47814 + }, + { + "epoch": 0.9571853964917548, + "grad_norm": 1.0653661489486694, + "learning_rate": 4.796233693004027e-08, + "loss": 0.2854, + "step": 47815 + }, + { + "epoch": 0.9572054150088832, + "grad_norm": 1.103908658027649, + "learning_rate": 4.791755266155551e-08, + "loss": 0.2903, + "step": 47816 + }, + { + "epoch": 0.9572254335260115, + "grad_norm": 0.9860993027687073, + "learning_rate": 4.78727892105596e-08, + "loss": 0.2326, + "step": 47817 + }, + { + "epoch": 0.9572454520431399, + "grad_norm": 1.041894555091858, + "learning_rate": 4.782804657724127e-08, + "loss": 0.3002, + "step": 47818 + }, + { + "epoch": 0.9572654705602682, + "grad_norm": 1.0681241750717163, + "learning_rate": 4.7783324761787044e-08, + "loss": 0.2438, + "step": 47819 + }, + { + "epoch": 0.9572854890773966, + "grad_norm": 1.067176103591919, + "learning_rate": 4.773862376438676e-08, + "loss": 0.2932, + "step": 47820 + }, + { + "epoch": 0.9573055075945249, + "grad_norm": 1.0515037775039673, + "learning_rate": 4.76939435852275e-08, + "loss": 0.2074, + "step": 47821 + }, + { + "epoch": 0.9573255261116532, + "grad_norm": 1.1010795831680298, + "learning_rate": 4.764928422449744e-08, + "loss": 0.309, + "step": 47822 + }, + { + "epoch": 0.9573455446287816, + "grad_norm": 1.113953709602356, + "learning_rate": 4.7604645682383657e-08, + "loss": 0.3069, + "step": 47823 + }, + { + "epoch": 0.9573655631459099, + "grad_norm": 1.2440334558486938, + "learning_rate": 4.7560027959074325e-08, + "loss": 0.3045, + "step": 47824 + }, + { + "epoch": 0.9573855816630383, + "grad_norm": 1.2469583749771118, + "learning_rate": 4.751543105475653e-08, + "loss": 0.2885, + "step": 47825 + }, + { + "epoch": 0.9574056001801666, + "grad_norm": 1.8502832651138306, + "learning_rate": 4.747085496961845e-08, + "loss": 0.7456, + "step": 47826 + }, + { + "epoch": 0.957425618697295, + "grad_norm": 1.1294052600860596, + "learning_rate": 4.742629970384716e-08, + "loss": 0.3253, + "step": 47827 + }, + { + "epoch": 0.9574456372144233, + "grad_norm": 1.1044148206710815, + "learning_rate": 4.738176525762916e-08, + "loss": 0.294, + "step": 47828 + }, + { + "epoch": 0.9574656557315517, + "grad_norm": 1.1304854154586792, + "learning_rate": 4.733725163115321e-08, + "loss": 0.3361, + "step": 47829 + }, + { + "epoch": 0.95748567424868, + "grad_norm": 1.2409412860870361, + "learning_rate": 4.729275882460527e-08, + "loss": 0.3314, + "step": 47830 + }, + { + "epoch": 0.9575056927658083, + "grad_norm": 1.1167935132980347, + "learning_rate": 4.7248286838172955e-08, + "loss": 0.2976, + "step": 47831 + }, + { + "epoch": 0.9575257112829367, + "grad_norm": 1.1113468408584595, + "learning_rate": 4.720383567204223e-08, + "loss": 0.3123, + "step": 47832 + }, + { + "epoch": 0.957545729800065, + "grad_norm": 1.9541447162628174, + "learning_rate": 4.715940532640184e-08, + "loss": 0.701, + "step": 47833 + }, + { + "epoch": 0.9575657483171934, + "grad_norm": 1.142420768737793, + "learning_rate": 4.711499580143664e-08, + "loss": 0.3042, + "step": 47834 + }, + { + "epoch": 0.9575857668343217, + "grad_norm": 1.228255033493042, + "learning_rate": 4.7070607097334795e-08, + "loss": 0.2901, + "step": 47835 + }, + { + "epoch": 0.9576057853514501, + "grad_norm": 1.0237988233566284, + "learning_rate": 4.7026239214281734e-08, + "loss": 0.2397, + "step": 47836 + }, + { + "epoch": 0.9576258038685784, + "grad_norm": 1.2627514600753784, + "learning_rate": 4.6981892152464516e-08, + "loss": 0.2877, + "step": 47837 + }, + { + "epoch": 0.9576458223857067, + "grad_norm": 1.8399280309677124, + "learning_rate": 4.693756591206966e-08, + "loss": 0.7359, + "step": 47838 + }, + { + "epoch": 0.9576658409028351, + "grad_norm": 1.027021050453186, + "learning_rate": 4.689326049328369e-08, + "loss": 0.285, + "step": 47839 + }, + { + "epoch": 0.9576858594199634, + "grad_norm": 1.0670366287231445, + "learning_rate": 4.6848975896292e-08, + "loss": 0.2985, + "step": 47840 + }, + { + "epoch": 0.9577058779370918, + "grad_norm": 1.266205072402954, + "learning_rate": 4.6804712121281124e-08, + "loss": 0.3, + "step": 47841 + }, + { + "epoch": 0.9577258964542201, + "grad_norm": 1.1483668088912964, + "learning_rate": 4.676046916843813e-08, + "loss": 0.3158, + "step": 47842 + }, + { + "epoch": 0.9577459149713485, + "grad_norm": 1.1048243045806885, + "learning_rate": 4.67162470379473e-08, + "loss": 0.2826, + "step": 47843 + }, + { + "epoch": 0.9577659334884768, + "grad_norm": 1.8808178901672363, + "learning_rate": 4.667204572999573e-08, + "loss": 0.6856, + "step": 47844 + }, + { + "epoch": 0.9577859520056052, + "grad_norm": 1.2194459438323975, + "learning_rate": 4.662786524476881e-08, + "loss": 0.2961, + "step": 47845 + }, + { + "epoch": 0.9578059705227335, + "grad_norm": 1.0766181945800781, + "learning_rate": 4.658370558245251e-08, + "loss": 0.2716, + "step": 47846 + }, + { + "epoch": 0.9578259890398618, + "grad_norm": 1.9239437580108643, + "learning_rate": 4.653956674323279e-08, + "loss": 0.7605, + "step": 47847 + }, + { + "epoch": 0.9578460075569902, + "grad_norm": 1.0644241571426392, + "learning_rate": 4.6495448727293946e-08, + "loss": 0.2713, + "step": 47848 + }, + { + "epoch": 0.9578660260741185, + "grad_norm": 1.30924654006958, + "learning_rate": 4.6451351534823054e-08, + "loss": 0.3123, + "step": 47849 + }, + { + "epoch": 0.9578860445912469, + "grad_norm": 1.023941159248352, + "learning_rate": 4.640727516600385e-08, + "loss": 0.2281, + "step": 47850 + }, + { + "epoch": 0.9579060631083752, + "grad_norm": 1.271996021270752, + "learning_rate": 4.636321962102286e-08, + "loss": 0.3258, + "step": 47851 + }, + { + "epoch": 0.9579260816255036, + "grad_norm": 1.0598727464675903, + "learning_rate": 4.631918490006493e-08, + "loss": 0.2765, + "step": 47852 + }, + { + "epoch": 0.9579461001426319, + "grad_norm": 1.2158106565475464, + "learning_rate": 4.627517100331491e-08, + "loss": 0.3605, + "step": 47853 + }, + { + "epoch": 0.9579661186597602, + "grad_norm": 1.0895682573318481, + "learning_rate": 4.623117793095766e-08, + "loss": 0.2847, + "step": 47854 + }, + { + "epoch": 0.9579861371768886, + "grad_norm": 1.0445407629013062, + "learning_rate": 4.618720568317914e-08, + "loss": 0.275, + "step": 47855 + }, + { + "epoch": 0.9580061556940169, + "grad_norm": 1.1034393310546875, + "learning_rate": 4.6143254260163086e-08, + "loss": 0.3112, + "step": 47856 + }, + { + "epoch": 0.9580261742111453, + "grad_norm": 1.1630808115005493, + "learning_rate": 4.6099323662094905e-08, + "loss": 0.3251, + "step": 47857 + }, + { + "epoch": 0.9580461927282736, + "grad_norm": 1.1911789178848267, + "learning_rate": 4.605541388915946e-08, + "loss": 0.3298, + "step": 47858 + }, + { + "epoch": 0.958066211245402, + "grad_norm": 1.2793011665344238, + "learning_rate": 4.601152494153993e-08, + "loss": 0.2997, + "step": 47859 + }, + { + "epoch": 0.9580862297625303, + "grad_norm": 1.1598596572875977, + "learning_rate": 4.596765681942284e-08, + "loss": 0.3213, + "step": 47860 + }, + { + "epoch": 0.9581062482796587, + "grad_norm": 1.0495883226394653, + "learning_rate": 4.592380952299136e-08, + "loss": 0.2839, + "step": 47861 + }, + { + "epoch": 0.958126266796787, + "grad_norm": 1.1235243082046509, + "learning_rate": 4.58799830524298e-08, + "loss": 0.2767, + "step": 47862 + }, + { + "epoch": 0.9581462853139153, + "grad_norm": 1.2058300971984863, + "learning_rate": 4.583617740792245e-08, + "loss": 0.31, + "step": 47863 + }, + { + "epoch": 0.9581663038310437, + "grad_norm": 1.0631136894226074, + "learning_rate": 4.579239258965418e-08, + "loss": 0.3027, + "step": 47864 + }, + { + "epoch": 0.958186322348172, + "grad_norm": 0.9822171926498413, + "learning_rate": 4.5748628597808156e-08, + "loss": 0.2714, + "step": 47865 + }, + { + "epoch": 0.9582063408653004, + "grad_norm": 1.0616940259933472, + "learning_rate": 4.5704885432569235e-08, + "loss": 0.3036, + "step": 47866 + }, + { + "epoch": 0.9582263593824287, + "grad_norm": 1.2056442499160767, + "learning_rate": 4.5661163094120055e-08, + "loss": 0.3151, + "step": 47867 + }, + { + "epoch": 0.9582463778995571, + "grad_norm": 1.0980422496795654, + "learning_rate": 4.5617461582646014e-08, + "loss": 0.3103, + "step": 47868 + }, + { + "epoch": 0.9582663964166854, + "grad_norm": 1.0365647077560425, + "learning_rate": 4.55737808983292e-08, + "loss": 0.3016, + "step": 47869 + }, + { + "epoch": 0.9582864149338137, + "grad_norm": 1.1389083862304688, + "learning_rate": 4.553012104135446e-08, + "loss": 0.3298, + "step": 47870 + }, + { + "epoch": 0.9583064334509421, + "grad_norm": 1.2579683065414429, + "learning_rate": 4.548648201190498e-08, + "loss": 0.3068, + "step": 47871 + }, + { + "epoch": 0.9583264519680704, + "grad_norm": 1.1912500858306885, + "learning_rate": 4.544286381016339e-08, + "loss": 0.3159, + "step": 47872 + }, + { + "epoch": 0.9583464704851988, + "grad_norm": 1.1015275716781616, + "learning_rate": 4.539926643631454e-08, + "loss": 0.2932, + "step": 47873 + }, + { + "epoch": 0.9583664890023271, + "grad_norm": 1.038643717765808, + "learning_rate": 4.535568989054051e-08, + "loss": 0.3308, + "step": 47874 + }, + { + "epoch": 0.9583865075194555, + "grad_norm": 2.012760639190674, + "learning_rate": 4.531213417302505e-08, + "loss": 0.719, + "step": 47875 + }, + { + "epoch": 0.9584065260365838, + "grad_norm": 0.9950318336486816, + "learning_rate": 4.526859928395133e-08, + "loss": 0.2529, + "step": 47876 + }, + { + "epoch": 0.9584265445537122, + "grad_norm": 1.2147024869918823, + "learning_rate": 4.5225085223502e-08, + "loss": 0.2926, + "step": 47877 + }, + { + "epoch": 0.9584465630708405, + "grad_norm": 1.0933541059494019, + "learning_rate": 4.518159199186023e-08, + "loss": 0.2705, + "step": 47878 + }, + { + "epoch": 0.9584665815879688, + "grad_norm": 1.1775462627410889, + "learning_rate": 4.513811958920866e-08, + "loss": 0.3106, + "step": 47879 + }, + { + "epoch": 0.9584866001050972, + "grad_norm": 1.1738550662994385, + "learning_rate": 4.5094668015729926e-08, + "loss": 0.3209, + "step": 47880 + }, + { + "epoch": 0.9585066186222255, + "grad_norm": 1.1299173831939697, + "learning_rate": 4.505123727160665e-08, + "loss": 0.3383, + "step": 47881 + }, + { + "epoch": 0.9585266371393539, + "grad_norm": 1.047112226486206, + "learning_rate": 4.500782735702203e-08, + "loss": 0.3163, + "step": 47882 + }, + { + "epoch": 0.9585466556564822, + "grad_norm": 1.0939563512802124, + "learning_rate": 4.496443827215813e-08, + "loss": 0.2685, + "step": 47883 + }, + { + "epoch": 0.9585666741736106, + "grad_norm": 1.781524896621704, + "learning_rate": 4.4921070017197586e-08, + "loss": 0.2668, + "step": 47884 + }, + { + "epoch": 0.9585866926907389, + "grad_norm": 1.0270124673843384, + "learning_rate": 4.487772259232248e-08, + "loss": 0.2887, + "step": 47885 + }, + { + "epoch": 0.9586067112078672, + "grad_norm": 1.1476489305496216, + "learning_rate": 4.483439599771489e-08, + "loss": 0.2549, + "step": 47886 + }, + { + "epoch": 0.9586267297249956, + "grad_norm": 1.240216612815857, + "learning_rate": 4.479109023355743e-08, + "loss": 0.3042, + "step": 47887 + }, + { + "epoch": 0.9586467482421239, + "grad_norm": 1.1325794458389282, + "learning_rate": 4.474780530003164e-08, + "loss": 0.2992, + "step": 47888 + }, + { + "epoch": 0.9586667667592523, + "grad_norm": 1.1305286884307861, + "learning_rate": 4.470454119732015e-08, + "loss": 0.3091, + "step": 47889 + }, + { + "epoch": 0.9586867852763806, + "grad_norm": 1.108583927154541, + "learning_rate": 4.4661297925603364e-08, + "loss": 0.3085, + "step": 47890 + }, + { + "epoch": 0.958706803793509, + "grad_norm": 1.1167263984680176, + "learning_rate": 4.461807548506503e-08, + "loss": 0.2696, + "step": 47891 + }, + { + "epoch": 0.9587268223106373, + "grad_norm": 1.06256103515625, + "learning_rate": 4.4574873875885546e-08, + "loss": 0.2692, + "step": 47892 + }, + { + "epoch": 0.9587468408277657, + "grad_norm": 1.2537016868591309, + "learning_rate": 4.4531693098246454e-08, + "loss": 0.3352, + "step": 47893 + }, + { + "epoch": 0.958766859344894, + "grad_norm": 1.1973577737808228, + "learning_rate": 4.448853315233037e-08, + "loss": 0.2605, + "step": 47894 + }, + { + "epoch": 0.9587868778620223, + "grad_norm": 1.0646599531173706, + "learning_rate": 4.444539403831771e-08, + "loss": 0.2924, + "step": 47895 + }, + { + "epoch": 0.9588068963791507, + "grad_norm": 1.0901854038238525, + "learning_rate": 4.440227575639e-08, + "loss": 0.2842, + "step": 47896 + }, + { + "epoch": 0.958826914896279, + "grad_norm": 2.1737911701202393, + "learning_rate": 4.435917830672931e-08, + "loss": 0.7296, + "step": 47897 + }, + { + "epoch": 0.9588469334134074, + "grad_norm": 1.1298470497131348, + "learning_rate": 4.43161016895155e-08, + "loss": 0.3315, + "step": 47898 + }, + { + "epoch": 0.9588669519305357, + "grad_norm": 1.184075117111206, + "learning_rate": 4.427304590493065e-08, + "loss": 0.3405, + "step": 47899 + }, + { + "epoch": 0.9588869704476641, + "grad_norm": 1.1172586679458618, + "learning_rate": 4.4230010953155154e-08, + "loss": 0.3087, + "step": 47900 + }, + { + "epoch": 0.9589069889647924, + "grad_norm": 1.1701430082321167, + "learning_rate": 4.4186996834370555e-08, + "loss": 0.3421, + "step": 47901 + }, + { + "epoch": 0.9589270074819207, + "grad_norm": 1.1764144897460938, + "learning_rate": 4.4144003548757255e-08, + "loss": 0.3335, + "step": 47902 + }, + { + "epoch": 0.9589470259990491, + "grad_norm": 1.0777966976165771, + "learning_rate": 4.410103109649566e-08, + "loss": 0.2444, + "step": 47903 + }, + { + "epoch": 0.9589670445161774, + "grad_norm": 1.1896973848342896, + "learning_rate": 4.4058079477767305e-08, + "loss": 0.2496, + "step": 47904 + }, + { + "epoch": 0.9589870630333058, + "grad_norm": 1.047389268875122, + "learning_rate": 4.401514869275259e-08, + "loss": 0.2603, + "step": 47905 + }, + { + "epoch": 0.9590070815504341, + "grad_norm": 1.066266655921936, + "learning_rate": 4.3972238741630816e-08, + "loss": 0.271, + "step": 47906 + }, + { + "epoch": 0.9590271000675625, + "grad_norm": 1.2389262914657593, + "learning_rate": 4.3929349624583505e-08, + "loss": 0.3563, + "step": 47907 + }, + { + "epoch": 0.9590471185846908, + "grad_norm": 1.1621371507644653, + "learning_rate": 4.3886481341790524e-08, + "loss": 0.3116, + "step": 47908 + }, + { + "epoch": 0.9590671371018192, + "grad_norm": 1.1467386484146118, + "learning_rate": 4.384363389343227e-08, + "loss": 0.2933, + "step": 47909 + }, + { + "epoch": 0.9590871556189475, + "grad_norm": 1.8951020240783691, + "learning_rate": 4.380080727968916e-08, + "loss": 0.6717, + "step": 47910 + }, + { + "epoch": 0.9591071741360758, + "grad_norm": 1.141941785812378, + "learning_rate": 4.37580015007405e-08, + "loss": 0.2907, + "step": 47911 + }, + { + "epoch": 0.9591271926532042, + "grad_norm": 1.070955753326416, + "learning_rate": 4.3715216556766695e-08, + "loss": 0.272, + "step": 47912 + }, + { + "epoch": 0.9591472111703325, + "grad_norm": 1.0753943920135498, + "learning_rate": 4.36724524479476e-08, + "loss": 0.3075, + "step": 47913 + }, + { + "epoch": 0.9591672296874609, + "grad_norm": 1.1559609174728394, + "learning_rate": 4.362970917446252e-08, + "loss": 0.3014, + "step": 47914 + }, + { + "epoch": 0.9591872482045892, + "grad_norm": 1.1343443393707275, + "learning_rate": 4.3586986736491866e-08, + "loss": 0.2631, + "step": 47915 + }, + { + "epoch": 0.9592072667217176, + "grad_norm": 1.100277304649353, + "learning_rate": 4.3544285134214936e-08, + "loss": 0.3122, + "step": 47916 + }, + { + "epoch": 0.9592272852388459, + "grad_norm": 1.1428539752960205, + "learning_rate": 4.3501604367811035e-08, + "loss": 0.2975, + "step": 47917 + }, + { + "epoch": 0.9592473037559742, + "grad_norm": 1.2289435863494873, + "learning_rate": 4.345894443746001e-08, + "loss": 0.3424, + "step": 47918 + }, + { + "epoch": 0.9592673222731026, + "grad_norm": 1.2534897327423096, + "learning_rate": 4.341630534334063e-08, + "loss": 0.2966, + "step": 47919 + }, + { + "epoch": 0.9592873407902309, + "grad_norm": 0.9938738346099854, + "learning_rate": 4.337368708563272e-08, + "loss": 0.2595, + "step": 47920 + }, + { + "epoch": 0.9593073593073593, + "grad_norm": 1.4037777185440063, + "learning_rate": 4.333108966451449e-08, + "loss": 0.3197, + "step": 47921 + }, + { + "epoch": 0.9593273778244876, + "grad_norm": 1.1389771699905396, + "learning_rate": 4.328851308016635e-08, + "loss": 0.2857, + "step": 47922 + }, + { + "epoch": 0.959347396341616, + "grad_norm": 1.9320003986358643, + "learning_rate": 4.324595733276649e-08, + "loss": 0.7108, + "step": 47923 + }, + { + "epoch": 0.9593674148587443, + "grad_norm": 1.0909055471420288, + "learning_rate": 4.3203422422493644e-08, + "loss": 0.2457, + "step": 47924 + }, + { + "epoch": 0.9593874333758727, + "grad_norm": 1.3975105285644531, + "learning_rate": 4.316090834952713e-08, + "loss": 0.2815, + "step": 47925 + }, + { + "epoch": 0.959407451893001, + "grad_norm": 1.091104507446289, + "learning_rate": 4.311841511404513e-08, + "loss": 0.2954, + "step": 47926 + }, + { + "epoch": 0.9594274704101293, + "grad_norm": 1.0997881889343262, + "learning_rate": 4.307594271622695e-08, + "loss": 0.2921, + "step": 47927 + }, + { + "epoch": 0.9594474889272577, + "grad_norm": 1.0650379657745361, + "learning_rate": 4.303349115625077e-08, + "loss": 0.2828, + "step": 47928 + }, + { + "epoch": 0.959467507444386, + "grad_norm": 1.0883013010025024, + "learning_rate": 4.299106043429479e-08, + "loss": 0.267, + "step": 47929 + }, + { + "epoch": 0.9594875259615144, + "grad_norm": 1.0871937274932861, + "learning_rate": 4.294865055053776e-08, + "loss": 0.2566, + "step": 47930 + }, + { + "epoch": 0.9595075444786427, + "grad_norm": 1.209738850593567, + "learning_rate": 4.2906261505157864e-08, + "loss": 0.2953, + "step": 47931 + }, + { + "epoch": 0.9595275629957711, + "grad_norm": 1.3140428066253662, + "learning_rate": 4.286389329833329e-08, + "loss": 0.274, + "step": 47932 + }, + { + "epoch": 0.9595475815128994, + "grad_norm": 1.9278322458267212, + "learning_rate": 4.282154593024168e-08, + "loss": 0.7387, + "step": 47933 + }, + { + "epoch": 0.9595676000300277, + "grad_norm": 1.3877232074737549, + "learning_rate": 4.277921940106178e-08, + "loss": 0.282, + "step": 47934 + }, + { + "epoch": 0.9595876185471561, + "grad_norm": 1.2151756286621094, + "learning_rate": 4.273691371097177e-08, + "loss": 0.2989, + "step": 47935 + }, + { + "epoch": 0.9596076370642844, + "grad_norm": 0.9619989395141602, + "learning_rate": 4.2694628860148194e-08, + "loss": 0.3018, + "step": 47936 + }, + { + "epoch": 0.9596276555814128, + "grad_norm": 2.101593017578125, + "learning_rate": 4.265236484877033e-08, + "loss": 0.8296, + "step": 47937 + }, + { + "epoch": 0.9596476740985411, + "grad_norm": 1.0873544216156006, + "learning_rate": 4.261012167701417e-08, + "loss": 0.3082, + "step": 47938 + }, + { + "epoch": 0.9596676926156695, + "grad_norm": 1.217703104019165, + "learning_rate": 4.2567899345059007e-08, + "loss": 0.3074, + "step": 47939 + }, + { + "epoch": 0.9596877111327978, + "grad_norm": 1.0131593942642212, + "learning_rate": 4.252569785308136e-08, + "loss": 0.2551, + "step": 47940 + }, + { + "epoch": 0.9597077296499262, + "grad_norm": 1.169514775276184, + "learning_rate": 4.248351720125887e-08, + "loss": 0.3053, + "step": 47941 + }, + { + "epoch": 0.9597277481670545, + "grad_norm": 1.0532721281051636, + "learning_rate": 4.2441357389768624e-08, + "loss": 0.3166, + "step": 47942 + }, + { + "epoch": 0.9597477666841828, + "grad_norm": 1.1094814538955688, + "learning_rate": 4.239921841878769e-08, + "loss": 0.2629, + "step": 47943 + }, + { + "epoch": 0.9597677852013112, + "grad_norm": 1.0025895833969116, + "learning_rate": 4.2357100288494266e-08, + "loss": 0.2505, + "step": 47944 + }, + { + "epoch": 0.9597878037184395, + "grad_norm": 1.1052709817886353, + "learning_rate": 4.231500299906377e-08, + "loss": 0.3151, + "step": 47945 + }, + { + "epoch": 0.9598078222355679, + "grad_norm": 1.1164519786834717, + "learning_rate": 4.227292655067494e-08, + "loss": 0.2652, + "step": 47946 + }, + { + "epoch": 0.9598278407526962, + "grad_norm": 1.2296111583709717, + "learning_rate": 4.22308709435032e-08, + "loss": 0.278, + "step": 47947 + }, + { + "epoch": 0.9598478592698246, + "grad_norm": 0.9925984144210815, + "learning_rate": 4.2188836177726176e-08, + "loss": 0.2658, + "step": 47948 + }, + { + "epoch": 0.9598678777869529, + "grad_norm": 1.1825112104415894, + "learning_rate": 4.214682225352096e-08, + "loss": 0.2896, + "step": 47949 + }, + { + "epoch": 0.9598878963040812, + "grad_norm": 1.9834918975830078, + "learning_rate": 4.2104829171062956e-08, + "loss": 0.8123, + "step": 47950 + }, + { + "epoch": 0.9599079148212096, + "grad_norm": 1.033634066581726, + "learning_rate": 4.206285693052981e-08, + "loss": 0.3035, + "step": 47951 + }, + { + "epoch": 0.9599279333383379, + "grad_norm": 1.0490968227386475, + "learning_rate": 4.2020905532096366e-08, + "loss": 0.2748, + "step": 47952 + }, + { + "epoch": 0.9599479518554663, + "grad_norm": 1.0504778623580933, + "learning_rate": 4.1978974975940833e-08, + "loss": 0.2678, + "step": 47953 + }, + { + "epoch": 0.9599679703725946, + "grad_norm": 1.3126904964447021, + "learning_rate": 4.1937065262239175e-08, + "loss": 0.272, + "step": 47954 + }, + { + "epoch": 0.959987988889723, + "grad_norm": 1.0446796417236328, + "learning_rate": 4.189517639116625e-08, + "loss": 0.2833, + "step": 47955 + }, + { + "epoch": 0.9600080074068513, + "grad_norm": 2.1144227981567383, + "learning_rate": 4.185330836289914e-08, + "loss": 0.7167, + "step": 47956 + }, + { + "epoch": 0.9600280259239797, + "grad_norm": 1.2940161228179932, + "learning_rate": 4.1811461177614366e-08, + "loss": 0.4105, + "step": 47957 + }, + { + "epoch": 0.960048044441108, + "grad_norm": 1.481912612915039, + "learning_rate": 4.176963483548624e-08, + "loss": 0.2532, + "step": 47958 + }, + { + "epoch": 0.9600680629582363, + "grad_norm": 1.2481168508529663, + "learning_rate": 4.17278293366924e-08, + "loss": 0.3085, + "step": 47959 + }, + { + "epoch": 0.9600880814753647, + "grad_norm": 2.109511137008667, + "learning_rate": 4.1686044681407137e-08, + "loss": 0.7443, + "step": 47960 + }, + { + "epoch": 0.960108099992493, + "grad_norm": 1.2151002883911133, + "learning_rate": 4.1644280869806985e-08, + "loss": 0.2904, + "step": 47961 + }, + { + "epoch": 0.9601281185096214, + "grad_norm": 1.1807284355163574, + "learning_rate": 4.160253790206681e-08, + "loss": 0.3394, + "step": 47962 + }, + { + "epoch": 0.9601481370267497, + "grad_norm": 1.0807589292526245, + "learning_rate": 4.156081577836313e-08, + "loss": 0.2918, + "step": 47963 + }, + { + "epoch": 0.9601681555438781, + "grad_norm": 1.1607041358947754, + "learning_rate": 4.151911449887081e-08, + "loss": 0.2704, + "step": 47964 + }, + { + "epoch": 0.9601881740610064, + "grad_norm": 2.2225661277770996, + "learning_rate": 4.1477434063764145e-08, + "loss": 0.7394, + "step": 47965 + }, + { + "epoch": 0.9602081925781347, + "grad_norm": 1.3574374914169312, + "learning_rate": 4.1435774473220223e-08, + "loss": 0.274, + "step": 47966 + }, + { + "epoch": 0.9602282110952631, + "grad_norm": 1.1691805124282837, + "learning_rate": 4.13941357274128e-08, + "loss": 0.2975, + "step": 47967 + }, + { + "epoch": 0.9602482296123914, + "grad_norm": 1.0345118045806885, + "learning_rate": 4.135251782651728e-08, + "loss": 0.2711, + "step": 47968 + }, + { + "epoch": 0.9602682481295198, + "grad_norm": 1.2164356708526611, + "learning_rate": 4.131092077070853e-08, + "loss": 0.2924, + "step": 47969 + }, + { + "epoch": 0.9602882666466481, + "grad_norm": 1.2172629833221436, + "learning_rate": 4.126934456016196e-08, + "loss": 0.2758, + "step": 47970 + }, + { + "epoch": 0.9603082851637765, + "grad_norm": 1.106513261795044, + "learning_rate": 4.1227789195051325e-08, + "loss": 0.3055, + "step": 47971 + }, + { + "epoch": 0.9603283036809048, + "grad_norm": 1.0354344844818115, + "learning_rate": 4.1186254675552593e-08, + "loss": 0.3223, + "step": 47972 + }, + { + "epoch": 0.9603483221980332, + "grad_norm": 1.0371891260147095, + "learning_rate": 4.114474100183952e-08, + "loss": 0.2646, + "step": 47973 + }, + { + "epoch": 0.9603683407151615, + "grad_norm": 1.1700351238250732, + "learning_rate": 4.1103248174086394e-08, + "loss": 0.3263, + "step": 47974 + }, + { + "epoch": 0.9603883592322898, + "grad_norm": 1.1785143613815308, + "learning_rate": 4.1061776192468096e-08, + "loss": 0.3027, + "step": 47975 + }, + { + "epoch": 0.9604083777494182, + "grad_norm": 1.0634976625442505, + "learning_rate": 4.102032505715947e-08, + "loss": 0.2808, + "step": 47976 + }, + { + "epoch": 0.9604283962665465, + "grad_norm": 1.097535490989685, + "learning_rate": 4.097889476833372e-08, + "loss": 0.3094, + "step": 47977 + }, + { + "epoch": 0.9604484147836749, + "grad_norm": 1.0274267196655273, + "learning_rate": 4.0937485326165703e-08, + "loss": 0.2969, + "step": 47978 + }, + { + "epoch": 0.9604684333008032, + "grad_norm": 1.93130362033844, + "learning_rate": 4.0896096730829174e-08, + "loss": 0.6868, + "step": 47979 + }, + { + "epoch": 0.9604884518179316, + "grad_norm": 1.2977608442306519, + "learning_rate": 4.085472898249843e-08, + "loss": 0.3119, + "step": 47980 + }, + { + "epoch": 0.9605084703350599, + "grad_norm": 1.0899126529693604, + "learning_rate": 4.081338208134722e-08, + "loss": 0.2826, + "step": 47981 + }, + { + "epoch": 0.9605284888521882, + "grad_norm": 1.1084749698638916, + "learning_rate": 4.07720560275493e-08, + "loss": 0.3063, + "step": 47982 + }, + { + "epoch": 0.9605485073693166, + "grad_norm": 1.1924182176589966, + "learning_rate": 4.0730750821278416e-08, + "loss": 0.2646, + "step": 47983 + }, + { + "epoch": 0.9605685258864449, + "grad_norm": 1.0301029682159424, + "learning_rate": 4.068946646270777e-08, + "loss": 0.2733, + "step": 47984 + }, + { + "epoch": 0.9605885444035733, + "grad_norm": 1.0826481580734253, + "learning_rate": 4.064820295201166e-08, + "loss": 0.287, + "step": 47985 + }, + { + "epoch": 0.9606085629207016, + "grad_norm": 1.015712857246399, + "learning_rate": 4.0606960289363284e-08, + "loss": 0.2901, + "step": 47986 + }, + { + "epoch": 0.96062858143783, + "grad_norm": 1.1083502769470215, + "learning_rate": 4.056573847493584e-08, + "loss": 0.2715, + "step": 47987 + }, + { + "epoch": 0.9606485999549583, + "grad_norm": 1.0526880025863647, + "learning_rate": 4.052453750890306e-08, + "loss": 0.258, + "step": 47988 + }, + { + "epoch": 0.9606686184720867, + "grad_norm": 1.075892686843872, + "learning_rate": 4.0483357391437606e-08, + "loss": 0.3021, + "step": 47989 + }, + { + "epoch": 0.960688636989215, + "grad_norm": 1.1524596214294434, + "learning_rate": 4.044219812271322e-08, + "loss": 0.3296, + "step": 47990 + }, + { + "epoch": 0.9607086555063433, + "grad_norm": 1.0691535472869873, + "learning_rate": 4.040105970290198e-08, + "loss": 0.2813, + "step": 47991 + }, + { + "epoch": 0.9607286740234717, + "grad_norm": 1.188208818435669, + "learning_rate": 4.0359942132177645e-08, + "loss": 0.2645, + "step": 47992 + }, + { + "epoch": 0.9607486925406, + "grad_norm": 1.1110715866088867, + "learning_rate": 4.031884541071285e-08, + "loss": 0.3185, + "step": 47993 + }, + { + "epoch": 0.9607687110577284, + "grad_norm": 1.0349235534667969, + "learning_rate": 4.027776953868023e-08, + "loss": 0.3183, + "step": 47994 + }, + { + "epoch": 0.9607887295748567, + "grad_norm": 1.4030998945236206, + "learning_rate": 4.0236714516251884e-08, + "loss": 0.3402, + "step": 47995 + }, + { + "epoch": 0.9608087480919851, + "grad_norm": 1.3892145156860352, + "learning_rate": 4.0195680343602107e-08, + "loss": 0.2852, + "step": 47996 + }, + { + "epoch": 0.9608287666091134, + "grad_norm": 1.2202816009521484, + "learning_rate": 4.015466702090132e-08, + "loss": 0.293, + "step": 47997 + }, + { + "epoch": 0.9608487851262417, + "grad_norm": 1.186785340309143, + "learning_rate": 4.0113674548323264e-08, + "loss": 0.2908, + "step": 47998 + }, + { + "epoch": 0.9608688036433701, + "grad_norm": 1.2604860067367554, + "learning_rate": 4.0072702926040596e-08, + "loss": 0.3099, + "step": 47999 + }, + { + "epoch": 0.9608888221604984, + "grad_norm": 1.0769861936569214, + "learning_rate": 4.0031752154224276e-08, + "loss": 0.2907, + "step": 48000 + }, + { + "epoch": 0.9609088406776268, + "grad_norm": 1.1778042316436768, + "learning_rate": 3.9990822233046954e-08, + "loss": 0.3344, + "step": 48001 + }, + { + "epoch": 0.9609288591947551, + "grad_norm": 1.0136972665786743, + "learning_rate": 3.9949913162681266e-08, + "loss": 0.2573, + "step": 48002 + }, + { + "epoch": 0.9609488777118835, + "grad_norm": 1.0907988548278809, + "learning_rate": 3.9909024943298736e-08, + "loss": 0.2834, + "step": 48003 + }, + { + "epoch": 0.9609688962290118, + "grad_norm": 1.0880814790725708, + "learning_rate": 3.98681575750709e-08, + "loss": 0.3157, + "step": 48004 + }, + { + "epoch": 0.9609889147461402, + "grad_norm": 1.1423407793045044, + "learning_rate": 3.9827311058169835e-08, + "loss": 0.2959, + "step": 48005 + }, + { + "epoch": 0.9610089332632685, + "grad_norm": 1.0655261278152466, + "learning_rate": 3.978648539276764e-08, + "loss": 0.3223, + "step": 48006 + }, + { + "epoch": 0.9610289517803968, + "grad_norm": 1.2701371908187866, + "learning_rate": 3.974568057903583e-08, + "loss": 0.3151, + "step": 48007 + }, + { + "epoch": 0.9610489702975252, + "grad_norm": 1.1047042608261108, + "learning_rate": 3.970489661714483e-08, + "loss": 0.2717, + "step": 48008 + }, + { + "epoch": 0.9610689888146535, + "grad_norm": 1.0180895328521729, + "learning_rate": 3.966413350726783e-08, + "loss": 0.2662, + "step": 48009 + }, + { + "epoch": 0.9610890073317819, + "grad_norm": 1.0807571411132812, + "learning_rate": 3.9623391249574703e-08, + "loss": 0.2465, + "step": 48010 + }, + { + "epoch": 0.9611090258489102, + "grad_norm": 1.108568787574768, + "learning_rate": 3.958266984423753e-08, + "loss": 0.2999, + "step": 48011 + }, + { + "epoch": 0.9611290443660386, + "grad_norm": 1.89537513256073, + "learning_rate": 3.954196929142784e-08, + "loss": 0.8144, + "step": 48012 + }, + { + "epoch": 0.9611490628831669, + "grad_norm": 1.0633727312088013, + "learning_rate": 3.9501289591315494e-08, + "loss": 0.2963, + "step": 48013 + }, + { + "epoch": 0.9611690814002952, + "grad_norm": 1.3372142314910889, + "learning_rate": 3.946063074407258e-08, + "loss": 0.2856, + "step": 48014 + }, + { + "epoch": 0.9611890999174236, + "grad_norm": 1.198966383934021, + "learning_rate": 3.9419992749869516e-08, + "loss": 0.3067, + "step": 48015 + }, + { + "epoch": 0.9612091184345519, + "grad_norm": 1.067764163017273, + "learning_rate": 3.937937560887784e-08, + "loss": 0.2871, + "step": 48016 + }, + { + "epoch": 0.9612291369516803, + "grad_norm": 1.146151065826416, + "learning_rate": 3.933877932126739e-08, + "loss": 0.2995, + "step": 48017 + }, + { + "epoch": 0.9612491554688086, + "grad_norm": 1.1733579635620117, + "learning_rate": 3.929820388720862e-08, + "loss": 0.2705, + "step": 48018 + }, + { + "epoch": 0.961269173985937, + "grad_norm": 1.1710044145584106, + "learning_rate": 3.925764930687303e-08, + "loss": 0.318, + "step": 48019 + }, + { + "epoch": 0.9612891925030653, + "grad_norm": 1.1216591596603394, + "learning_rate": 3.921711558043107e-08, + "loss": 0.3165, + "step": 48020 + }, + { + "epoch": 0.9613092110201937, + "grad_norm": 1.2724800109863281, + "learning_rate": 3.917660270805201e-08, + "loss": 0.337, + "step": 48021 + }, + { + "epoch": 0.961329229537322, + "grad_norm": 1.1692899465560913, + "learning_rate": 3.913611068990797e-08, + "loss": 0.347, + "step": 48022 + }, + { + "epoch": 0.9613492480544503, + "grad_norm": 1.1309927701950073, + "learning_rate": 3.909563952616713e-08, + "loss": 0.275, + "step": 48023 + }, + { + "epoch": 0.9613692665715787, + "grad_norm": 1.005388855934143, + "learning_rate": 3.905518921700102e-08, + "loss": 0.2786, + "step": 48024 + }, + { + "epoch": 0.961389285088707, + "grad_norm": 1.998597502708435, + "learning_rate": 3.9014759762579516e-08, + "loss": 0.8589, + "step": 48025 + }, + { + "epoch": 0.9614093036058354, + "grad_norm": 1.011004090309143, + "learning_rate": 3.897435116307247e-08, + "loss": 0.2643, + "step": 48026 + }, + { + "epoch": 0.9614293221229637, + "grad_norm": 1.2393853664398193, + "learning_rate": 3.8933963418648636e-08, + "loss": 0.3015, + "step": 48027 + }, + { + "epoch": 0.9614493406400921, + "grad_norm": 1.1148680448532104, + "learning_rate": 3.889359652947955e-08, + "loss": 0.2736, + "step": 48028 + }, + { + "epoch": 0.9614693591572204, + "grad_norm": 1.9819185733795166, + "learning_rate": 3.885325049573452e-08, + "loss": 0.7975, + "step": 48029 + }, + { + "epoch": 0.9614893776743487, + "grad_norm": 1.0982072353363037, + "learning_rate": 3.8812925317582296e-08, + "loss": 0.3223, + "step": 48030 + }, + { + "epoch": 0.9615093961914771, + "grad_norm": 1.1807016134262085, + "learning_rate": 3.8772620995192745e-08, + "loss": 0.3466, + "step": 48031 + }, + { + "epoch": 0.9615294147086054, + "grad_norm": 1.0951428413391113, + "learning_rate": 3.873233752873573e-08, + "loss": 0.3143, + "step": 48032 + }, + { + "epoch": 0.9615494332257338, + "grad_norm": 1.2239114046096802, + "learning_rate": 3.8692074918380005e-08, + "loss": 0.2795, + "step": 48033 + }, + { + "epoch": 0.9615694517428621, + "grad_norm": 1.1195663213729858, + "learning_rate": 3.865183316429488e-08, + "loss": 0.2749, + "step": 48034 + }, + { + "epoch": 0.9615894702599905, + "grad_norm": 1.1187382936477661, + "learning_rate": 3.8611612266650224e-08, + "loss": 0.2898, + "step": 48035 + }, + { + "epoch": 0.9616094887771188, + "grad_norm": 1.2477349042892456, + "learning_rate": 3.857141222561423e-08, + "loss": 0.2981, + "step": 48036 + }, + { + "epoch": 0.9616295072942472, + "grad_norm": 1.1187469959259033, + "learning_rate": 3.8531233041356755e-08, + "loss": 0.3273, + "step": 48037 + }, + { + "epoch": 0.9616495258113755, + "grad_norm": 1.147828221321106, + "learning_rate": 3.849107471404656e-08, + "loss": 0.2643, + "step": 48038 + }, + { + "epoch": 0.9616695443285038, + "grad_norm": 1.1455358266830444, + "learning_rate": 3.84509372438513e-08, + "loss": 0.3103, + "step": 48039 + }, + { + "epoch": 0.9616895628456322, + "grad_norm": 1.0297154188156128, + "learning_rate": 3.8410820630940815e-08, + "loss": 0.2931, + "step": 48040 + }, + { + "epoch": 0.9617095813627605, + "grad_norm": 1.371256947517395, + "learning_rate": 3.8370724875483875e-08, + "loss": 0.2717, + "step": 48041 + }, + { + "epoch": 0.9617295998798889, + "grad_norm": 2.037186622619629, + "learning_rate": 3.833064997764868e-08, + "loss": 0.7979, + "step": 48042 + }, + { + "epoch": 0.9617496183970172, + "grad_norm": 1.1119314432144165, + "learning_rate": 3.829059593760342e-08, + "loss": 0.3031, + "step": 48043 + }, + { + "epoch": 0.9617696369141456, + "grad_norm": 1.4517043828964233, + "learning_rate": 3.825056275551631e-08, + "loss": 0.3558, + "step": 48044 + }, + { + "epoch": 0.9617896554312739, + "grad_norm": 1.2335163354873657, + "learning_rate": 3.8210550431556635e-08, + "loss": 0.301, + "step": 48045 + }, + { + "epoch": 0.9618096739484022, + "grad_norm": 1.2802940607070923, + "learning_rate": 3.8170558965892054e-08, + "loss": 0.2912, + "step": 48046 + }, + { + "epoch": 0.9618296924655306, + "grad_norm": 1.15003502368927, + "learning_rate": 3.813058835869021e-08, + "loss": 0.2786, + "step": 48047 + }, + { + "epoch": 0.9618497109826589, + "grad_norm": 1.066282033920288, + "learning_rate": 3.8090638610119854e-08, + "loss": 0.281, + "step": 48048 + }, + { + "epoch": 0.9618697294997873, + "grad_norm": 1.2302134037017822, + "learning_rate": 3.805070972034863e-08, + "loss": 0.3204, + "step": 48049 + }, + { + "epoch": 0.9618897480169156, + "grad_norm": 1.1244169473648071, + "learning_rate": 3.8010801689544186e-08, + "loss": 0.2928, + "step": 48050 + }, + { + "epoch": 0.961909766534044, + "grad_norm": 1.1313360929489136, + "learning_rate": 3.7970914517874715e-08, + "loss": 0.2793, + "step": 48051 + }, + { + "epoch": 0.9619297850511723, + "grad_norm": 1.1351628303527832, + "learning_rate": 3.793104820550786e-08, + "loss": 0.3188, + "step": 48052 + }, + { + "epoch": 0.9619498035683007, + "grad_norm": 1.3381547927856445, + "learning_rate": 3.7891202752610714e-08, + "loss": 0.3123, + "step": 48053 + }, + { + "epoch": 0.961969822085429, + "grad_norm": 1.1300647258758545, + "learning_rate": 3.785137815935091e-08, + "loss": 0.2833, + "step": 48054 + }, + { + "epoch": 0.9619898406025573, + "grad_norm": 1.2634868621826172, + "learning_rate": 3.781157442589667e-08, + "loss": 0.2974, + "step": 48055 + }, + { + "epoch": 0.9620098591196857, + "grad_norm": 1.1507779359817505, + "learning_rate": 3.7771791552414505e-08, + "loss": 0.2991, + "step": 48056 + }, + { + "epoch": 0.962029877636814, + "grad_norm": 1.0547198057174683, + "learning_rate": 3.773202953907151e-08, + "loss": 0.2874, + "step": 48057 + }, + { + "epoch": 0.9620498961539424, + "grad_norm": 1.1254674196243286, + "learning_rate": 3.7692288386035335e-08, + "loss": 0.2918, + "step": 48058 + }, + { + "epoch": 0.9620699146710707, + "grad_norm": 1.134830117225647, + "learning_rate": 3.765256809347251e-08, + "loss": 0.2921, + "step": 48059 + }, + { + "epoch": 0.9620899331881991, + "grad_norm": 1.0226749181747437, + "learning_rate": 3.761286866155067e-08, + "loss": 0.2207, + "step": 48060 + }, + { + "epoch": 0.9621099517053274, + "grad_norm": 1.2443522214889526, + "learning_rate": 3.7573190090436916e-08, + "loss": 0.3084, + "step": 48061 + }, + { + "epoch": 0.9621299702224557, + "grad_norm": 1.2493349313735962, + "learning_rate": 3.7533532380296664e-08, + "loss": 0.3359, + "step": 48062 + }, + { + "epoch": 0.9621499887395841, + "grad_norm": 1.109063982963562, + "learning_rate": 3.749389553129812e-08, + "loss": 0.2987, + "step": 48063 + }, + { + "epoch": 0.9621700072567124, + "grad_norm": 1.0867000818252563, + "learning_rate": 3.7454279543607255e-08, + "loss": 0.2766, + "step": 48064 + }, + { + "epoch": 0.9621900257738408, + "grad_norm": 1.081153154373169, + "learning_rate": 3.74146844173906e-08, + "loss": 0.309, + "step": 48065 + }, + { + "epoch": 0.9622100442909691, + "grad_norm": 1.1229655742645264, + "learning_rate": 3.73751101528147e-08, + "loss": 0.3614, + "step": 48066 + }, + { + "epoch": 0.9622300628080975, + "grad_norm": 1.1419298648834229, + "learning_rate": 3.733555675004552e-08, + "loss": 0.3343, + "step": 48067 + }, + { + "epoch": 0.9622500813252258, + "grad_norm": 1.042171835899353, + "learning_rate": 3.729602420925016e-08, + "loss": 0.2557, + "step": 48068 + }, + { + "epoch": 0.9622700998423542, + "grad_norm": 0.9763078689575195, + "learning_rate": 3.7256512530594035e-08, + "loss": 0.2389, + "step": 48069 + }, + { + "epoch": 0.9622901183594825, + "grad_norm": 1.1921888589859009, + "learning_rate": 3.721702171424368e-08, + "loss": 0.2946, + "step": 48070 + }, + { + "epoch": 0.9623101368766108, + "grad_norm": 1.2387586832046509, + "learning_rate": 3.7177551760364526e-08, + "loss": 0.2764, + "step": 48071 + }, + { + "epoch": 0.9623301553937392, + "grad_norm": 1.0479892492294312, + "learning_rate": 3.7138102669123653e-08, + "loss": 0.2948, + "step": 48072 + }, + { + "epoch": 0.9623501739108675, + "grad_norm": 1.1135239601135254, + "learning_rate": 3.709867444068538e-08, + "loss": 0.2932, + "step": 48073 + }, + { + "epoch": 0.9623701924279959, + "grad_norm": 2.1645288467407227, + "learning_rate": 3.705926707521679e-08, + "loss": 0.7348, + "step": 48074 + }, + { + "epoch": 0.9623902109451242, + "grad_norm": 1.1183563470840454, + "learning_rate": 3.701988057288275e-08, + "loss": 0.2972, + "step": 48075 + }, + { + "epoch": 0.9624102294622526, + "grad_norm": 1.1180236339569092, + "learning_rate": 3.698051493384924e-08, + "loss": 0.289, + "step": 48076 + }, + { + "epoch": 0.9624302479793809, + "grad_norm": 1.013871669769287, + "learning_rate": 3.694117015828169e-08, + "loss": 0.3053, + "step": 48077 + }, + { + "epoch": 0.9624502664965092, + "grad_norm": 1.085302710533142, + "learning_rate": 3.6901846246345516e-08, + "loss": 0.3231, + "step": 48078 + }, + { + "epoch": 0.9624702850136376, + "grad_norm": 1.9184415340423584, + "learning_rate": 3.686254319820559e-08, + "loss": 0.75, + "step": 48079 + }, + { + "epoch": 0.9624903035307659, + "grad_norm": 1.1376065015792847, + "learning_rate": 3.6823261014027334e-08, + "loss": 0.3054, + "step": 48080 + }, + { + "epoch": 0.9625103220478943, + "grad_norm": 1.2484354972839355, + "learning_rate": 3.678399969397617e-08, + "loss": 0.2984, + "step": 48081 + }, + { + "epoch": 0.9625303405650226, + "grad_norm": 1.3680860996246338, + "learning_rate": 3.674475923821697e-08, + "loss": 0.2603, + "step": 48082 + }, + { + "epoch": 0.962550359082151, + "grad_norm": 1.0813795328140259, + "learning_rate": 3.6705539646915147e-08, + "loss": 0.3334, + "step": 48083 + }, + { + "epoch": 0.9625703775992793, + "grad_norm": 0.9962717890739441, + "learning_rate": 3.666634092023447e-08, + "loss": 0.259, + "step": 48084 + }, + { + "epoch": 0.9625903961164077, + "grad_norm": 1.100517749786377, + "learning_rate": 3.6627163058340906e-08, + "loss": 0.271, + "step": 48085 + }, + { + "epoch": 0.962610414633536, + "grad_norm": 1.1348216533660889, + "learning_rate": 3.658800606139767e-08, + "loss": 0.3801, + "step": 48086 + }, + { + "epoch": 0.9626304331506643, + "grad_norm": 1.064502239227295, + "learning_rate": 3.65488699295713e-08, + "loss": 0.2741, + "step": 48087 + }, + { + "epoch": 0.9626504516677927, + "grad_norm": 1.2526986598968506, + "learning_rate": 3.6509754663024974e-08, + "loss": 0.2842, + "step": 48088 + }, + { + "epoch": 0.962670470184921, + "grad_norm": 1.1465998888015747, + "learning_rate": 3.6470660261923585e-08, + "loss": 0.2893, + "step": 48089 + }, + { + "epoch": 0.9626904887020494, + "grad_norm": 1.0750670433044434, + "learning_rate": 3.6431586726431435e-08, + "loss": 0.3187, + "step": 48090 + }, + { + "epoch": 0.9627105072191777, + "grad_norm": 1.0860756635665894, + "learning_rate": 3.639253405671228e-08, + "loss": 0.3099, + "step": 48091 + }, + { + "epoch": 0.9627305257363061, + "grad_norm": 1.1333997249603271, + "learning_rate": 3.635350225293155e-08, + "loss": 0.286, + "step": 48092 + }, + { + "epoch": 0.9627505442534344, + "grad_norm": 1.0509711503982544, + "learning_rate": 3.631449131525189e-08, + "loss": 0.24, + "step": 48093 + }, + { + "epoch": 0.9627705627705627, + "grad_norm": 1.0660678148269653, + "learning_rate": 3.627550124383761e-08, + "loss": 0.2669, + "step": 48094 + }, + { + "epoch": 0.9627905812876911, + "grad_norm": 1.119645118713379, + "learning_rate": 3.623653203885358e-08, + "loss": 0.2956, + "step": 48095 + }, + { + "epoch": 0.9628105998048194, + "grad_norm": 1.216103434562683, + "learning_rate": 3.6197583700463e-08, + "loss": 0.3033, + "step": 48096 + }, + { + "epoch": 0.9628306183219478, + "grad_norm": 1.1059564352035522, + "learning_rate": 3.6158656228829083e-08, + "loss": 0.2926, + "step": 48097 + }, + { + "epoch": 0.9628506368390761, + "grad_norm": 1.0868514776229858, + "learning_rate": 3.611974962411613e-08, + "loss": 0.3029, + "step": 48098 + }, + { + "epoch": 0.9628706553562045, + "grad_norm": 1.0561245679855347, + "learning_rate": 3.608086388648735e-08, + "loss": 0.2395, + "step": 48099 + }, + { + "epoch": 0.9628906738733328, + "grad_norm": 1.1935653686523438, + "learning_rate": 3.60419990161065e-08, + "loss": 0.3026, + "step": 48100 + }, + { + "epoch": 0.9629106923904612, + "grad_norm": 1.3091102838516235, + "learning_rate": 3.600315501313678e-08, + "loss": 0.3218, + "step": 48101 + }, + { + "epoch": 0.9629307109075895, + "grad_norm": 1.1578707695007324, + "learning_rate": 3.5964331877741955e-08, + "loss": 0.2888, + "step": 48102 + }, + { + "epoch": 0.9629507294247178, + "grad_norm": 1.2668945789337158, + "learning_rate": 3.592552961008411e-08, + "loss": 0.2925, + "step": 48103 + }, + { + "epoch": 0.9629707479418462, + "grad_norm": 1.0762040615081787, + "learning_rate": 3.588674821032756e-08, + "loss": 0.2995, + "step": 48104 + }, + { + "epoch": 0.9629907664589745, + "grad_norm": 1.1886826753616333, + "learning_rate": 3.584798767863495e-08, + "loss": 0.2808, + "step": 48105 + }, + { + "epoch": 0.9630107849761029, + "grad_norm": 1.244197130203247, + "learning_rate": 3.580924801516839e-08, + "loss": 0.3309, + "step": 48106 + }, + { + "epoch": 0.9630308034932312, + "grad_norm": 1.8258816003799438, + "learning_rate": 3.577052922009216e-08, + "loss": 0.7048, + "step": 48107 + }, + { + "epoch": 0.9630508220103596, + "grad_norm": 1.1693263053894043, + "learning_rate": 3.5731831293568386e-08, + "loss": 0.3327, + "step": 48108 + }, + { + "epoch": 0.9630708405274879, + "grad_norm": 1.0149321556091309, + "learning_rate": 3.5693154235759144e-08, + "loss": 0.287, + "step": 48109 + }, + { + "epoch": 0.9630908590446162, + "grad_norm": 1.0498965978622437, + "learning_rate": 3.565449804682764e-08, + "loss": 0.3292, + "step": 48110 + }, + { + "epoch": 0.9631108775617446, + "grad_norm": 1.0425724983215332, + "learning_rate": 3.5615862726936515e-08, + "loss": 0.3237, + "step": 48111 + }, + { + "epoch": 0.9631308960788729, + "grad_norm": 1.155027985572815, + "learning_rate": 3.557724827624731e-08, + "loss": 0.3206, + "step": 48112 + }, + { + "epoch": 0.9631509145960013, + "grad_norm": 1.1890637874603271, + "learning_rate": 3.553865469492379e-08, + "loss": 0.304, + "step": 48113 + }, + { + "epoch": 0.9631709331131296, + "grad_norm": 2.0760817527770996, + "learning_rate": 3.550008198312749e-08, + "loss": 0.8106, + "step": 48114 + }, + { + "epoch": 0.963190951630258, + "grad_norm": 0.9914320111274719, + "learning_rate": 3.546153014101994e-08, + "loss": 0.3099, + "step": 48115 + }, + { + "epoch": 0.9632109701473863, + "grad_norm": 1.0331101417541504, + "learning_rate": 3.542299916876379e-08, + "loss": 0.2787, + "step": 48116 + }, + { + "epoch": 0.9632309886645147, + "grad_norm": 2.3530869483947754, + "learning_rate": 3.5384489066521145e-08, + "loss": 0.7467, + "step": 48117 + }, + { + "epoch": 0.963251007181643, + "grad_norm": 1.8526345491409302, + "learning_rate": 3.534599983445353e-08, + "loss": 0.7407, + "step": 48118 + }, + { + "epoch": 0.9632710256987713, + "grad_norm": 1.2083488702774048, + "learning_rate": 3.530753147272359e-08, + "loss": 0.2916, + "step": 48119 + }, + { + "epoch": 0.9632910442158997, + "grad_norm": 1.1691443920135498, + "learning_rate": 3.52690839814912e-08, + "loss": 0.283, + "step": 48120 + }, + { + "epoch": 0.963311062733028, + "grad_norm": 1.1244916915893555, + "learning_rate": 3.523065736092013e-08, + "loss": 0.3014, + "step": 48121 + }, + { + "epoch": 0.9633310812501564, + "grad_norm": 1.2024904489517212, + "learning_rate": 3.519225161117079e-08, + "loss": 0.3067, + "step": 48122 + }, + { + "epoch": 0.9633510997672847, + "grad_norm": 1.0541129112243652, + "learning_rate": 3.515386673240473e-08, + "loss": 0.2697, + "step": 48123 + }, + { + "epoch": 0.9633711182844131, + "grad_norm": 1.0425413846969604, + "learning_rate": 3.511550272478293e-08, + "loss": 0.2953, + "step": 48124 + }, + { + "epoch": 0.9633911368015414, + "grad_norm": 1.2333000898361206, + "learning_rate": 3.5077159588466913e-08, + "loss": 0.3064, + "step": 48125 + }, + { + "epoch": 0.9634111553186697, + "grad_norm": 1.112115502357483, + "learning_rate": 3.503883732361879e-08, + "loss": 0.3137, + "step": 48126 + }, + { + "epoch": 0.9634311738357981, + "grad_norm": 1.9909824132919312, + "learning_rate": 3.500053593039898e-08, + "loss": 0.7177, + "step": 48127 + }, + { + "epoch": 0.9634511923529264, + "grad_norm": 1.235971450805664, + "learning_rate": 3.496225540896791e-08, + "loss": 0.2819, + "step": 48128 + }, + { + "epoch": 0.9634712108700548, + "grad_norm": 1.0300523042678833, + "learning_rate": 3.4923995759487126e-08, + "loss": 0.2668, + "step": 48129 + }, + { + "epoch": 0.9634912293871831, + "grad_norm": 1.2890651226043701, + "learning_rate": 3.4885756982117603e-08, + "loss": 0.3035, + "step": 48130 + }, + { + "epoch": 0.9635112479043115, + "grad_norm": 1.1092140674591064, + "learning_rate": 3.4847539077019764e-08, + "loss": 0.2776, + "step": 48131 + }, + { + "epoch": 0.9635312664214398, + "grad_norm": 1.0144633054733276, + "learning_rate": 3.4809342044354596e-08, + "loss": 0.2726, + "step": 48132 + }, + { + "epoch": 0.9635512849385682, + "grad_norm": 1.209885835647583, + "learning_rate": 3.477116588428198e-08, + "loss": 0.2956, + "step": 48133 + }, + { + "epoch": 0.9635713034556965, + "grad_norm": 1.1635740995407104, + "learning_rate": 3.473301059696288e-08, + "loss": 0.3007, + "step": 48134 + }, + { + "epoch": 0.9635913219728248, + "grad_norm": 1.1600052118301392, + "learning_rate": 3.469487618255829e-08, + "loss": 0.295, + "step": 48135 + }, + { + "epoch": 0.9636113404899532, + "grad_norm": 1.1574629545211792, + "learning_rate": 3.4656762641227526e-08, + "loss": 0.3119, + "step": 48136 + }, + { + "epoch": 0.9636313590070815, + "grad_norm": 1.1995400190353394, + "learning_rate": 3.461866997313101e-08, + "loss": 0.293, + "step": 48137 + }, + { + "epoch": 0.9636513775242099, + "grad_norm": 1.0417903661727905, + "learning_rate": 3.458059817842862e-08, + "loss": 0.2677, + "step": 48138 + }, + { + "epoch": 0.9636713960413382, + "grad_norm": 1.1210047006607056, + "learning_rate": 3.4542547257281346e-08, + "loss": 0.2659, + "step": 48139 + }, + { + "epoch": 0.9636914145584666, + "grad_norm": 1.2165511846542358, + "learning_rate": 3.450451720984904e-08, + "loss": 0.3013, + "step": 48140 + }, + { + "epoch": 0.9637114330755949, + "grad_norm": 1.095350742340088, + "learning_rate": 3.446650803629048e-08, + "loss": 0.2604, + "step": 48141 + }, + { + "epoch": 0.9637314515927232, + "grad_norm": 1.0998221635818481, + "learning_rate": 3.442851973676664e-08, + "loss": 0.3025, + "step": 48142 + }, + { + "epoch": 0.9637514701098516, + "grad_norm": 1.0356683731079102, + "learning_rate": 3.439055231143629e-08, + "loss": 0.2919, + "step": 48143 + }, + { + "epoch": 0.9637714886269799, + "grad_norm": 1.120836615562439, + "learning_rate": 3.435260576045985e-08, + "loss": 0.3201, + "step": 48144 + }, + { + "epoch": 0.9637915071441083, + "grad_norm": 1.3291288614273071, + "learning_rate": 3.431468008399608e-08, + "loss": 0.3098, + "step": 48145 + }, + { + "epoch": 0.9638115256612366, + "grad_norm": 1.1845905780792236, + "learning_rate": 3.4276775282204856e-08, + "loss": 0.309, + "step": 48146 + }, + { + "epoch": 0.963831544178365, + "grad_norm": 1.1240988969802856, + "learning_rate": 3.4238891355245494e-08, + "loss": 0.2818, + "step": 48147 + }, + { + "epoch": 0.9638515626954933, + "grad_norm": 1.1958253383636475, + "learning_rate": 3.420102830327732e-08, + "loss": 0.3377, + "step": 48148 + }, + { + "epoch": 0.9638715812126216, + "grad_norm": 1.168550968170166, + "learning_rate": 3.416318612645908e-08, + "loss": 0.3382, + "step": 48149 + }, + { + "epoch": 0.96389159972975, + "grad_norm": 1.0641977787017822, + "learning_rate": 3.41253648249501e-08, + "loss": 0.3069, + "step": 48150 + }, + { + "epoch": 0.9639116182468783, + "grad_norm": 1.1387417316436768, + "learning_rate": 3.408756439890915e-08, + "loss": 0.2727, + "step": 48151 + }, + { + "epoch": 0.9639316367640067, + "grad_norm": 1.1624189615249634, + "learning_rate": 3.404978484849608e-08, + "loss": 0.2982, + "step": 48152 + }, + { + "epoch": 0.963951655281135, + "grad_norm": 1.2798738479614258, + "learning_rate": 3.401202617386912e-08, + "loss": 0.2973, + "step": 48153 + }, + { + "epoch": 0.9639716737982634, + "grad_norm": 1.2690378427505493, + "learning_rate": 3.397428837518646e-08, + "loss": 0.3343, + "step": 48154 + }, + { + "epoch": 0.9639916923153917, + "grad_norm": 1.0390905141830444, + "learning_rate": 3.393657145260687e-08, + "loss": 0.3337, + "step": 48155 + }, + { + "epoch": 0.9640117108325201, + "grad_norm": 1.1101988554000854, + "learning_rate": 3.3898875406289664e-08, + "loss": 0.281, + "step": 48156 + }, + { + "epoch": 0.9640317293496484, + "grad_norm": 1.8877043724060059, + "learning_rate": 3.386120023639306e-08, + "loss": 0.7695, + "step": 48157 + }, + { + "epoch": 0.9640517478667767, + "grad_norm": 1.083581805229187, + "learning_rate": 3.382354594307524e-08, + "loss": 0.3125, + "step": 48158 + }, + { + "epoch": 0.9640717663839051, + "grad_norm": 1.1844722032546997, + "learning_rate": 3.378591252649388e-08, + "loss": 0.3244, + "step": 48159 + }, + { + "epoch": 0.9640917849010334, + "grad_norm": 1.136742115020752, + "learning_rate": 3.374829998680884e-08, + "loss": 0.3143, + "step": 48160 + }, + { + "epoch": 0.9641118034181618, + "grad_norm": 1.1561473608016968, + "learning_rate": 3.3710708324176664e-08, + "loss": 0.2897, + "step": 48161 + }, + { + "epoch": 0.9641318219352901, + "grad_norm": 1.2787432670593262, + "learning_rate": 3.3673137538755565e-08, + "loss": 0.2924, + "step": 48162 + }, + { + "epoch": 0.9641518404524185, + "grad_norm": 1.1007483005523682, + "learning_rate": 3.3635587630704294e-08, + "loss": 0.2915, + "step": 48163 + }, + { + "epoch": 0.9641718589695468, + "grad_norm": 1.1017944812774658, + "learning_rate": 3.3598058600179395e-08, + "loss": 0.3203, + "step": 48164 + }, + { + "epoch": 0.9641918774866751, + "grad_norm": 1.353251576423645, + "learning_rate": 3.356055044734019e-08, + "loss": 0.307, + "step": 48165 + }, + { + "epoch": 0.9642118960038035, + "grad_norm": 1.0398468971252441, + "learning_rate": 3.3523063172343774e-08, + "loss": 0.2504, + "step": 48166 + }, + { + "epoch": 0.9642319145209318, + "grad_norm": 1.2336395978927612, + "learning_rate": 3.34855967753478e-08, + "loss": 0.3267, + "step": 48167 + }, + { + "epoch": 0.9642519330380602, + "grad_norm": 1.0428189039230347, + "learning_rate": 3.3448151256508795e-08, + "loss": 0.2612, + "step": 48168 + }, + { + "epoch": 0.9642719515551885, + "grad_norm": 1.1487547159194946, + "learning_rate": 3.3410726615985546e-08, + "loss": 0.2776, + "step": 48169 + }, + { + "epoch": 0.9642919700723169, + "grad_norm": 1.1293482780456543, + "learning_rate": 3.337332285393457e-08, + "loss": 0.3019, + "step": 48170 + }, + { + "epoch": 0.9643119885894452, + "grad_norm": 1.0911612510681152, + "learning_rate": 3.333593997051354e-08, + "loss": 0.2714, + "step": 48171 + }, + { + "epoch": 0.9643320071065736, + "grad_norm": 1.9730466604232788, + "learning_rate": 3.329857796587899e-08, + "loss": 0.8199, + "step": 48172 + }, + { + "epoch": 0.9643520256237019, + "grad_norm": 1.1715997457504272, + "learning_rate": 3.326123684018856e-08, + "loss": 0.3015, + "step": 48173 + }, + { + "epoch": 0.9643720441408302, + "grad_norm": 1.1202672719955444, + "learning_rate": 3.3223916593598803e-08, + "loss": 0.2896, + "step": 48174 + }, + { + "epoch": 0.9643920626579586, + "grad_norm": 2.124952554702759, + "learning_rate": 3.3186617226267374e-08, + "loss": 0.7296, + "step": 48175 + }, + { + "epoch": 0.9644120811750869, + "grad_norm": 1.004709005355835, + "learning_rate": 3.314933873834969e-08, + "loss": 0.2458, + "step": 48176 + }, + { + "epoch": 0.9644320996922153, + "grad_norm": 1.1784439086914062, + "learning_rate": 3.311208113000397e-08, + "loss": 0.3399, + "step": 48177 + }, + { + "epoch": 0.9644521182093436, + "grad_norm": 1.1189614534378052, + "learning_rate": 3.3074844401386196e-08, + "loss": 0.3202, + "step": 48178 + }, + { + "epoch": 0.964472136726472, + "grad_norm": 1.0634527206420898, + "learning_rate": 3.303762855265236e-08, + "loss": 0.2829, + "step": 48179 + }, + { + "epoch": 0.9644921552436003, + "grad_norm": 1.1280176639556885, + "learning_rate": 3.30004335839601e-08, + "loss": 0.3075, + "step": 48180 + }, + { + "epoch": 0.9645121737607286, + "grad_norm": 1.1910450458526611, + "learning_rate": 3.2963259495464864e-08, + "loss": 0.3137, + "step": 48181 + }, + { + "epoch": 0.964532192277857, + "grad_norm": 1.1811598539352417, + "learning_rate": 3.292610628732318e-08, + "loss": 0.3026, + "step": 48182 + }, + { + "epoch": 0.9645522107949853, + "grad_norm": 1.816787838935852, + "learning_rate": 3.288897395969104e-08, + "loss": 0.7187, + "step": 48183 + }, + { + "epoch": 0.9645722293121137, + "grad_norm": 1.0486587285995483, + "learning_rate": 3.285186251272499e-08, + "loss": 0.3066, + "step": 48184 + }, + { + "epoch": 0.964592247829242, + "grad_norm": 1.2093180418014526, + "learning_rate": 3.281477194658045e-08, + "loss": 0.309, + "step": 48185 + }, + { + "epoch": 0.9646122663463704, + "grad_norm": 1.092863917350769, + "learning_rate": 3.2777702261413414e-08, + "loss": 0.3347, + "step": 48186 + }, + { + "epoch": 0.9646322848634987, + "grad_norm": 1.1764540672302246, + "learning_rate": 3.274065345738098e-08, + "loss": 0.3081, + "step": 48187 + }, + { + "epoch": 0.9646523033806271, + "grad_norm": 1.2281829118728638, + "learning_rate": 3.270362553463691e-08, + "loss": 0.2883, + "step": 48188 + }, + { + "epoch": 0.9646723218977554, + "grad_norm": 1.0748677253723145, + "learning_rate": 3.26666184933383e-08, + "loss": 0.3088, + "step": 48189 + }, + { + "epoch": 0.9646923404148837, + "grad_norm": 1.645585298538208, + "learning_rate": 3.262963233363947e-08, + "loss": 0.2746, + "step": 48190 + }, + { + "epoch": 0.9647123589320121, + "grad_norm": 1.0413371324539185, + "learning_rate": 3.259266705569753e-08, + "loss": 0.2585, + "step": 48191 + }, + { + "epoch": 0.9647323774491404, + "grad_norm": 1.2190661430358887, + "learning_rate": 3.255572265966678e-08, + "loss": 0.3126, + "step": 48192 + }, + { + "epoch": 0.9647523959662688, + "grad_norm": 1.1474156379699707, + "learning_rate": 3.251879914570322e-08, + "loss": 0.2952, + "step": 48193 + }, + { + "epoch": 0.9647724144833971, + "grad_norm": 1.1411479711532593, + "learning_rate": 3.248189651396116e-08, + "loss": 0.2807, + "step": 48194 + }, + { + "epoch": 0.9647924330005255, + "grad_norm": 1.4121778011322021, + "learning_rate": 3.244501476459605e-08, + "loss": 0.3504, + "step": 48195 + }, + { + "epoch": 0.9648124515176538, + "grad_norm": 1.959243655204773, + "learning_rate": 3.2408153897763306e-08, + "loss": 0.7547, + "step": 48196 + }, + { + "epoch": 0.9648324700347821, + "grad_norm": 1.0895017385482788, + "learning_rate": 3.2371313913617807e-08, + "loss": 0.2793, + "step": 48197 + }, + { + "epoch": 0.9648524885519105, + "grad_norm": 1.1219831705093384, + "learning_rate": 3.2334494812313876e-08, + "loss": 0.3101, + "step": 48198 + }, + { + "epoch": 0.9648725070690388, + "grad_norm": 1.096988558769226, + "learning_rate": 3.229769659400639e-08, + "loss": 0.2777, + "step": 48199 + }, + { + "epoch": 0.9648925255861672, + "grad_norm": 1.1294211149215698, + "learning_rate": 3.226091925885078e-08, + "loss": 0.2817, + "step": 48200 + }, + { + "epoch": 0.9649125441032955, + "grad_norm": 1.046797752380371, + "learning_rate": 3.222416280700136e-08, + "loss": 0.2677, + "step": 48201 + }, + { + "epoch": 0.9649325626204239, + "grad_norm": 1.198838472366333, + "learning_rate": 3.218742723861246e-08, + "loss": 0.3333, + "step": 48202 + }, + { + "epoch": 0.9649525811375522, + "grad_norm": 1.2274309396743774, + "learning_rate": 3.215071255383839e-08, + "loss": 0.2721, + "step": 48203 + }, + { + "epoch": 0.9649725996546806, + "grad_norm": 1.012868881225586, + "learning_rate": 3.2114018752833485e-08, + "loss": 0.2434, + "step": 48204 + }, + { + "epoch": 0.9649926181718089, + "grad_norm": 1.1565632820129395, + "learning_rate": 3.207734583575206e-08, + "loss": 0.2688, + "step": 48205 + }, + { + "epoch": 0.9650126366889372, + "grad_norm": 1.2096703052520752, + "learning_rate": 3.204069380274844e-08, + "loss": 0.2913, + "step": 48206 + }, + { + "epoch": 0.9650326552060656, + "grad_norm": 1.0326993465423584, + "learning_rate": 3.200406265397693e-08, + "loss": 0.3048, + "step": 48207 + }, + { + "epoch": 0.9650526737231939, + "grad_norm": 1.102911353111267, + "learning_rate": 3.1967452389590755e-08, + "loss": 0.3442, + "step": 48208 + }, + { + "epoch": 0.9650726922403223, + "grad_norm": 1.0505433082580566, + "learning_rate": 3.193086300974479e-08, + "loss": 0.2954, + "step": 48209 + }, + { + "epoch": 0.9650927107574506, + "grad_norm": 1.1718326807022095, + "learning_rate": 3.189429451459225e-08, + "loss": 0.3342, + "step": 48210 + }, + { + "epoch": 0.965112729274579, + "grad_norm": 1.9047857522964478, + "learning_rate": 3.1857746904286893e-08, + "loss": 0.7088, + "step": 48211 + }, + { + "epoch": 0.9651327477917073, + "grad_norm": 1.229901909828186, + "learning_rate": 3.1821220178982484e-08, + "loss": 0.2872, + "step": 48212 + }, + { + "epoch": 0.9651527663088356, + "grad_norm": 1.1795836687088013, + "learning_rate": 3.1784714338832235e-08, + "loss": 0.2949, + "step": 48213 + }, + { + "epoch": 0.965172784825964, + "grad_norm": 1.083992838859558, + "learning_rate": 3.174822938398992e-08, + "loss": 0.2593, + "step": 48214 + }, + { + "epoch": 0.9651928033430923, + "grad_norm": 1.2733079195022583, + "learning_rate": 3.171176531460873e-08, + "loss": 0.3229, + "step": 48215 + }, + { + "epoch": 0.9652128218602207, + "grad_norm": 1.2657099962234497, + "learning_rate": 3.167532213084246e-08, + "loss": 0.3323, + "step": 48216 + }, + { + "epoch": 0.965232840377349, + "grad_norm": 1.1372878551483154, + "learning_rate": 3.1638899832843736e-08, + "loss": 0.3113, + "step": 48217 + }, + { + "epoch": 0.9652528588944774, + "grad_norm": 1.0411819219589233, + "learning_rate": 3.1602498420765794e-08, + "loss": 0.3471, + "step": 48218 + }, + { + "epoch": 0.9652728774116057, + "grad_norm": 1.0110729932785034, + "learning_rate": 3.1566117894761825e-08, + "loss": 0.2902, + "step": 48219 + }, + { + "epoch": 0.9652928959287341, + "grad_norm": 1.098994255065918, + "learning_rate": 3.1529758254985055e-08, + "loss": 0.2831, + "step": 48220 + }, + { + "epoch": 0.9653129144458624, + "grad_norm": 1.1759523153305054, + "learning_rate": 3.149341950158702e-08, + "loss": 0.2805, + "step": 48221 + }, + { + "epoch": 0.9653329329629907, + "grad_norm": 1.142346739768982, + "learning_rate": 3.1457101634722045e-08, + "loss": 0.3062, + "step": 48222 + }, + { + "epoch": 0.9653529514801191, + "grad_norm": 1.0184836387634277, + "learning_rate": 3.1420804654542226e-08, + "loss": 0.3015, + "step": 48223 + }, + { + "epoch": 0.9653729699972474, + "grad_norm": 1.1465394496917725, + "learning_rate": 3.1384528561200225e-08, + "loss": 0.3136, + "step": 48224 + }, + { + "epoch": 0.9653929885143758, + "grad_norm": 1.2482300996780396, + "learning_rate": 3.1348273354848136e-08, + "loss": 0.3323, + "step": 48225 + }, + { + "epoch": 0.9654130070315041, + "grad_norm": 1.1619071960449219, + "learning_rate": 3.131203903563862e-08, + "loss": 0.2747, + "step": 48226 + }, + { + "epoch": 0.9654330255486325, + "grad_norm": 1.1102206707000732, + "learning_rate": 3.127582560372433e-08, + "loss": 0.3076, + "step": 48227 + }, + { + "epoch": 0.9654530440657608, + "grad_norm": 1.7676217555999756, + "learning_rate": 3.1239633059256816e-08, + "loss": 0.7379, + "step": 48228 + }, + { + "epoch": 0.9654730625828891, + "grad_norm": 1.054017186164856, + "learning_rate": 3.120346140238872e-08, + "loss": 0.2399, + "step": 48229 + }, + { + "epoch": 0.9654930811000175, + "grad_norm": 1.145888090133667, + "learning_rate": 3.1167310633271607e-08, + "loss": 0.2783, + "step": 48230 + }, + { + "epoch": 0.9655130996171458, + "grad_norm": 1.0735985040664673, + "learning_rate": 3.113118075205812e-08, + "loss": 0.2915, + "step": 48231 + }, + { + "epoch": 0.9655331181342742, + "grad_norm": 1.0108617544174194, + "learning_rate": 3.109507175890036e-08, + "loss": 0.2575, + "step": 48232 + }, + { + "epoch": 0.9655531366514025, + "grad_norm": 1.0721592903137207, + "learning_rate": 3.1058983653948774e-08, + "loss": 0.2795, + "step": 48233 + }, + { + "epoch": 0.9655731551685309, + "grad_norm": 1.203499436378479, + "learning_rate": 3.1022916437356554e-08, + "loss": 0.2995, + "step": 48234 + }, + { + "epoch": 0.9655931736856592, + "grad_norm": 1.852662205696106, + "learning_rate": 3.098687010927415e-08, + "loss": 0.6978, + "step": 48235 + }, + { + "epoch": 0.9656131922027876, + "grad_norm": 1.207871913909912, + "learning_rate": 3.095084466985421e-08, + "loss": 0.304, + "step": 48236 + }, + { + "epoch": 0.9656332107199159, + "grad_norm": 1.1772726774215698, + "learning_rate": 3.091484011924717e-08, + "loss": 0.2488, + "step": 48237 + }, + { + "epoch": 0.9656532292370442, + "grad_norm": 1.0581560134887695, + "learning_rate": 3.0878856457604575e-08, + "loss": 0.2953, + "step": 48238 + }, + { + "epoch": 0.9656732477541726, + "grad_norm": 1.0634039640426636, + "learning_rate": 3.084289368507798e-08, + "loss": 0.2772, + "step": 48239 + }, + { + "epoch": 0.9656932662713009, + "grad_norm": 1.3138340711593628, + "learning_rate": 3.080695180181892e-08, + "loss": 0.3201, + "step": 48240 + }, + { + "epoch": 0.9657132847884293, + "grad_norm": 1.233116626739502, + "learning_rate": 3.077103080797728e-08, + "loss": 0.3431, + "step": 48241 + }, + { + "epoch": 0.9657333033055576, + "grad_norm": 1.2169158458709717, + "learning_rate": 3.073513070370571e-08, + "loss": 0.2476, + "step": 48242 + }, + { + "epoch": 0.965753321822686, + "grad_norm": 1.0544360876083374, + "learning_rate": 3.069925148915354e-08, + "loss": 0.2994, + "step": 48243 + }, + { + "epoch": 0.9657733403398143, + "grad_norm": 1.0368598699569702, + "learning_rate": 3.066339316447231e-08, + "loss": 0.2497, + "step": 48244 + }, + { + "epoch": 0.9657933588569426, + "grad_norm": 1.2460225820541382, + "learning_rate": 3.0627555729813575e-08, + "loss": 0.275, + "step": 48245 + }, + { + "epoch": 0.965813377374071, + "grad_norm": 1.1781240701675415, + "learning_rate": 3.059173918532665e-08, + "loss": 0.3087, + "step": 48246 + }, + { + "epoch": 0.9658333958911993, + "grad_norm": 1.217594861984253, + "learning_rate": 3.0555943531162533e-08, + "loss": 0.3399, + "step": 48247 + }, + { + "epoch": 0.9658534144083277, + "grad_norm": 1.0797802209854126, + "learning_rate": 3.052016876747166e-08, + "loss": 0.2732, + "step": 48248 + }, + { + "epoch": 0.965873432925456, + "grad_norm": 1.157822847366333, + "learning_rate": 3.0484414894405015e-08, + "loss": 0.274, + "step": 48249 + }, + { + "epoch": 0.9658934514425844, + "grad_norm": 1.9598503112792969, + "learning_rate": 3.044868191211248e-08, + "loss": 0.7399, + "step": 48250 + }, + { + "epoch": 0.9659134699597127, + "grad_norm": 1.7985429763793945, + "learning_rate": 3.041296982074393e-08, + "loss": 0.7225, + "step": 48251 + }, + { + "epoch": 0.9659334884768411, + "grad_norm": 1.9392439126968384, + "learning_rate": 3.037727862044981e-08, + "loss": 0.7381, + "step": 48252 + }, + { + "epoch": 0.9659535069939694, + "grad_norm": 1.3214658498764038, + "learning_rate": 3.034160831138e-08, + "loss": 0.306, + "step": 48253 + }, + { + "epoch": 0.9659735255110977, + "grad_norm": 1.046663761138916, + "learning_rate": 3.030595889368437e-08, + "loss": 0.3001, + "step": 48254 + }, + { + "epoch": 0.9659935440282261, + "grad_norm": 1.8264304399490356, + "learning_rate": 3.027033036751337e-08, + "loss": 0.7439, + "step": 48255 + }, + { + "epoch": 0.9660135625453544, + "grad_norm": 1.0748212337493896, + "learning_rate": 3.023472273301686e-08, + "loss": 0.2907, + "step": 48256 + }, + { + "epoch": 0.9660335810624828, + "grad_norm": 1.1581872701644897, + "learning_rate": 3.019913599034308e-08, + "loss": 0.3017, + "step": 48257 + }, + { + "epoch": 0.9660535995796111, + "grad_norm": 1.2444809675216675, + "learning_rate": 3.016357013964355e-08, + "loss": 0.2951, + "step": 48258 + }, + { + "epoch": 0.9660736180967395, + "grad_norm": 0.9938634037971497, + "learning_rate": 3.01280251810665e-08, + "loss": 0.2719, + "step": 48259 + }, + { + "epoch": 0.9660936366138678, + "grad_norm": 1.3491135835647583, + "learning_rate": 3.009250111476181e-08, + "loss": 0.3011, + "step": 48260 + }, + { + "epoch": 0.9661136551309961, + "grad_norm": 1.151774525642395, + "learning_rate": 3.005699794087824e-08, + "loss": 0.3137, + "step": 48261 + }, + { + "epoch": 0.9661336736481245, + "grad_norm": 1.2403539419174194, + "learning_rate": 3.002151565956568e-08, + "loss": 0.3155, + "step": 48262 + }, + { + "epoch": 0.9661536921652528, + "grad_norm": 1.0623801946640015, + "learning_rate": 2.9986054270973455e-08, + "loss": 0.2909, + "step": 48263 + }, + { + "epoch": 0.9661737106823812, + "grad_norm": 1.153331995010376, + "learning_rate": 2.9950613775250327e-08, + "loss": 0.2995, + "step": 48264 + }, + { + "epoch": 0.9661937291995095, + "grad_norm": 1.2713289260864258, + "learning_rate": 2.991519417254452e-08, + "loss": 0.3106, + "step": 48265 + }, + { + "epoch": 0.9662137477166379, + "grad_norm": 1.0288242101669312, + "learning_rate": 2.987979546300646e-08, + "loss": 0.2755, + "step": 48266 + }, + { + "epoch": 0.9662337662337662, + "grad_norm": 1.1169404983520508, + "learning_rate": 2.9844417646783807e-08, + "loss": 0.3102, + "step": 48267 + }, + { + "epoch": 0.9662537847508946, + "grad_norm": 1.3413777351379395, + "learning_rate": 2.9809060724025895e-08, + "loss": 0.2741, + "step": 48268 + }, + { + "epoch": 0.9662738032680229, + "grad_norm": 1.072590947151184, + "learning_rate": 2.9773724694880936e-08, + "loss": 0.2908, + "step": 48269 + }, + { + "epoch": 0.9662938217851512, + "grad_norm": 1.1598057746887207, + "learning_rate": 2.973840955949714e-08, + "loss": 0.3296, + "step": 48270 + }, + { + "epoch": 0.9663138403022796, + "grad_norm": 1.240644931793213, + "learning_rate": 2.9703115318023835e-08, + "loss": 0.2779, + "step": 48271 + }, + { + "epoch": 0.9663338588194079, + "grad_norm": 2.143620252609253, + "learning_rate": 2.9667841970609235e-08, + "loss": 0.7917, + "step": 48272 + }, + { + "epoch": 0.9663538773365363, + "grad_norm": 1.1006964445114136, + "learning_rate": 2.9632589517401555e-08, + "loss": 0.2675, + "step": 48273 + }, + { + "epoch": 0.9663738958536646, + "grad_norm": 1.0767138004302979, + "learning_rate": 2.9597357958548456e-08, + "loss": 0.2899, + "step": 48274 + }, + { + "epoch": 0.966393914370793, + "grad_norm": 1.1066032648086548, + "learning_rate": 2.956214729419815e-08, + "loss": 0.3109, + "step": 48275 + }, + { + "epoch": 0.9664139328879213, + "grad_norm": 1.1574218273162842, + "learning_rate": 2.952695752449941e-08, + "loss": 0.2893, + "step": 48276 + }, + { + "epoch": 0.9664339514050496, + "grad_norm": 1.075238823890686, + "learning_rate": 2.9491788649599896e-08, + "loss": 0.2428, + "step": 48277 + }, + { + "epoch": 0.966453969922178, + "grad_norm": 1.2136497497558594, + "learning_rate": 2.9456640669646707e-08, + "loss": 0.3001, + "step": 48278 + }, + { + "epoch": 0.9664739884393063, + "grad_norm": 1.148816466331482, + "learning_rate": 2.9421513584788063e-08, + "loss": 0.2897, + "step": 48279 + }, + { + "epoch": 0.9664940069564347, + "grad_norm": 1.1320431232452393, + "learning_rate": 2.9386407395172177e-08, + "loss": 0.3093, + "step": 48280 + }, + { + "epoch": 0.966514025473563, + "grad_norm": 1.0994601249694824, + "learning_rate": 2.93513221009456e-08, + "loss": 0.2549, + "step": 48281 + }, + { + "epoch": 0.9665340439906914, + "grad_norm": 1.9888209104537964, + "learning_rate": 2.93162577022571e-08, + "loss": 0.787, + "step": 48282 + }, + { + "epoch": 0.9665540625078197, + "grad_norm": 1.2729145288467407, + "learning_rate": 2.9281214199252673e-08, + "loss": 0.3535, + "step": 48283 + }, + { + "epoch": 0.9665740810249481, + "grad_norm": 1.1075959205627441, + "learning_rate": 2.924619159208053e-08, + "loss": 0.2916, + "step": 48284 + }, + { + "epoch": 0.9665940995420764, + "grad_norm": 1.1755902767181396, + "learning_rate": 2.921118988088778e-08, + "loss": 0.2544, + "step": 48285 + }, + { + "epoch": 0.9666141180592047, + "grad_norm": 1.0563308000564575, + "learning_rate": 2.9176209065821527e-08, + "loss": 0.3319, + "step": 48286 + }, + { + "epoch": 0.9666341365763331, + "grad_norm": 0.9607957601547241, + "learning_rate": 2.9141249147028872e-08, + "loss": 0.2668, + "step": 48287 + }, + { + "epoch": 0.9666541550934614, + "grad_norm": 1.0678746700286865, + "learning_rate": 2.9106310124655813e-08, + "loss": 0.291, + "step": 48288 + }, + { + "epoch": 0.9666741736105898, + "grad_norm": 1.0268197059631348, + "learning_rate": 2.9071391998851118e-08, + "loss": 0.2585, + "step": 48289 + }, + { + "epoch": 0.9666941921277181, + "grad_norm": 1.130885362625122, + "learning_rate": 2.9036494769760227e-08, + "loss": 0.2859, + "step": 48290 + }, + { + "epoch": 0.9667142106448465, + "grad_norm": 1.0167787075042725, + "learning_rate": 2.900161843752969e-08, + "loss": 0.2667, + "step": 48291 + }, + { + "epoch": 0.9667342291619748, + "grad_norm": 1.176308035850525, + "learning_rate": 2.8966763002307163e-08, + "loss": 0.2876, + "step": 48292 + }, + { + "epoch": 0.9667542476791031, + "grad_norm": 1.0334196090698242, + "learning_rate": 2.893192846423809e-08, + "loss": 0.3304, + "step": 48293 + }, + { + "epoch": 0.9667742661962315, + "grad_norm": 1.136173963546753, + "learning_rate": 2.8897114823469573e-08, + "loss": 0.321, + "step": 48294 + }, + { + "epoch": 0.9667942847133598, + "grad_norm": 1.8147624731063843, + "learning_rate": 2.8862322080148164e-08, + "loss": 0.7666, + "step": 48295 + }, + { + "epoch": 0.9668143032304882, + "grad_norm": 1.2141891717910767, + "learning_rate": 2.88275502344193e-08, + "loss": 0.3235, + "step": 48296 + }, + { + "epoch": 0.9668343217476165, + "grad_norm": 1.0477981567382812, + "learning_rate": 2.879279928642953e-08, + "loss": 0.2791, + "step": 48297 + }, + { + "epoch": 0.9668543402647449, + "grad_norm": 1.2737504243850708, + "learning_rate": 2.8758069236325402e-08, + "loss": 0.2693, + "step": 48298 + }, + { + "epoch": 0.9668743587818732, + "grad_norm": 1.0841189622879028, + "learning_rate": 2.872336008425236e-08, + "loss": 0.2709, + "step": 48299 + }, + { + "epoch": 0.9668943772990016, + "grad_norm": 1.1355478763580322, + "learning_rate": 2.8688671830356395e-08, + "loss": 0.3112, + "step": 48300 + }, + { + "epoch": 0.9669143958161299, + "grad_norm": 1.1627753973007202, + "learning_rate": 2.8654004474783504e-08, + "loss": 0.2937, + "step": 48301 + }, + { + "epoch": 0.9669344143332582, + "grad_norm": 1.148637294769287, + "learning_rate": 2.8619358017679677e-08, + "loss": 0.2529, + "step": 48302 + }, + { + "epoch": 0.9669544328503866, + "grad_norm": 1.0772086381912231, + "learning_rate": 2.85847324591898e-08, + "loss": 0.3, + "step": 48303 + }, + { + "epoch": 0.9669744513675149, + "grad_norm": 1.1159045696258545, + "learning_rate": 2.855012779945987e-08, + "loss": 0.3178, + "step": 48304 + }, + { + "epoch": 0.9669944698846433, + "grad_norm": 1.0316283702850342, + "learning_rate": 2.8515544038635322e-08, + "loss": 0.296, + "step": 48305 + }, + { + "epoch": 0.9670144884017716, + "grad_norm": 1.125137209892273, + "learning_rate": 2.84809811768616e-08, + "loss": 0.304, + "step": 48306 + }, + { + "epoch": 0.9670345069189, + "grad_norm": 1.9082932472229004, + "learning_rate": 2.844643921428414e-08, + "loss": 0.7904, + "step": 48307 + }, + { + "epoch": 0.9670545254360283, + "grad_norm": 1.1473093032836914, + "learning_rate": 2.8411918151047822e-08, + "loss": 0.3338, + "step": 48308 + }, + { + "epoch": 0.9670745439531566, + "grad_norm": 1.0848664045333862, + "learning_rate": 2.8377417987298093e-08, + "loss": 0.3033, + "step": 48309 + }, + { + "epoch": 0.967094562470285, + "grad_norm": 0.9684690833091736, + "learning_rate": 2.834293872317928e-08, + "loss": 0.2282, + "step": 48310 + }, + { + "epoch": 0.9671145809874133, + "grad_norm": 1.1674362421035767, + "learning_rate": 2.830848035883682e-08, + "loss": 0.2769, + "step": 48311 + }, + { + "epoch": 0.9671345995045417, + "grad_norm": 1.069334626197815, + "learning_rate": 2.8274042894416155e-08, + "loss": 0.2828, + "step": 48312 + }, + { + "epoch": 0.96715461802167, + "grad_norm": 1.161171555519104, + "learning_rate": 2.8239626330061608e-08, + "loss": 0.2536, + "step": 48313 + }, + { + "epoch": 0.9671746365387984, + "grad_norm": 1.1160818338394165, + "learning_rate": 2.8205230665916962e-08, + "loss": 0.278, + "step": 48314 + }, + { + "epoch": 0.9671946550559267, + "grad_norm": 1.15389084815979, + "learning_rate": 2.8170855902128203e-08, + "loss": 0.2554, + "step": 48315 + }, + { + "epoch": 0.9672146735730551, + "grad_norm": 1.0993448495864868, + "learning_rate": 2.813650203883911e-08, + "loss": 0.2994, + "step": 48316 + }, + { + "epoch": 0.9672346920901834, + "grad_norm": 1.1307307481765747, + "learning_rate": 2.810216907619401e-08, + "loss": 0.3194, + "step": 48317 + }, + { + "epoch": 0.9672547106073117, + "grad_norm": 1.1803487539291382, + "learning_rate": 2.806785701433723e-08, + "loss": 0.3102, + "step": 48318 + }, + { + "epoch": 0.9672747291244401, + "grad_norm": 1.2132508754730225, + "learning_rate": 2.8033565853413658e-08, + "loss": 0.345, + "step": 48319 + }, + { + "epoch": 0.9672947476415684, + "grad_norm": 1.1679587364196777, + "learning_rate": 2.799929559356651e-08, + "loss": 0.2447, + "step": 48320 + }, + { + "epoch": 0.9673147661586968, + "grad_norm": 1.1306095123291016, + "learning_rate": 2.7965046234940672e-08, + "loss": 0.3177, + "step": 48321 + }, + { + "epoch": 0.9673347846758251, + "grad_norm": 0.9738054275512695, + "learning_rate": 2.793081777767992e-08, + "loss": 0.2737, + "step": 48322 + }, + { + "epoch": 0.9673548031929535, + "grad_norm": 1.0514172315597534, + "learning_rate": 2.7896610221928022e-08, + "loss": 0.2535, + "step": 48323 + }, + { + "epoch": 0.9673748217100818, + "grad_norm": 1.2778983116149902, + "learning_rate": 2.7862423567828757e-08, + "loss": 0.2824, + "step": 48324 + }, + { + "epoch": 0.9673948402272101, + "grad_norm": 1.1101911067962646, + "learning_rate": 2.782825781552534e-08, + "loss": 0.2832, + "step": 48325 + }, + { + "epoch": 0.9674148587443385, + "grad_norm": 1.1670798063278198, + "learning_rate": 2.7794112965162657e-08, + "loss": 0.3057, + "step": 48326 + }, + { + "epoch": 0.9674348772614668, + "grad_norm": 1.9366673231124878, + "learning_rate": 2.7759989016882814e-08, + "loss": 0.7948, + "step": 48327 + }, + { + "epoch": 0.9674548957785952, + "grad_norm": 1.1149810552597046, + "learning_rate": 2.7725885970830147e-08, + "loss": 0.2368, + "step": 48328 + }, + { + "epoch": 0.9674749142957235, + "grad_norm": 1.0785882472991943, + "learning_rate": 2.7691803827147867e-08, + "loss": 0.2424, + "step": 48329 + }, + { + "epoch": 0.9674949328128519, + "grad_norm": 1.0258467197418213, + "learning_rate": 2.76577425859792e-08, + "loss": 0.2691, + "step": 48330 + }, + { + "epoch": 0.9675149513299802, + "grad_norm": 1.1428896188735962, + "learning_rate": 2.7623702247467353e-08, + "loss": 0.2785, + "step": 48331 + }, + { + "epoch": 0.9675349698471086, + "grad_norm": 1.126899003982544, + "learning_rate": 2.758968281175556e-08, + "loss": 0.2606, + "step": 48332 + }, + { + "epoch": 0.9675549883642369, + "grad_norm": 1.435562014579773, + "learning_rate": 2.755568427898647e-08, + "loss": 0.2749, + "step": 48333 + }, + { + "epoch": 0.9675750068813652, + "grad_norm": 1.751943826675415, + "learning_rate": 2.7521706649303315e-08, + "loss": 0.7108, + "step": 48334 + }, + { + "epoch": 0.9675950253984936, + "grad_norm": 1.0496587753295898, + "learning_rate": 2.748774992284875e-08, + "loss": 0.3, + "step": 48335 + }, + { + "epoch": 0.9676150439156219, + "grad_norm": 1.0995395183563232, + "learning_rate": 2.7453814099765442e-08, + "loss": 0.2532, + "step": 48336 + }, + { + "epoch": 0.9676350624327503, + "grad_norm": 2.014695882797241, + "learning_rate": 2.7419899180196053e-08, + "loss": 0.868, + "step": 48337 + }, + { + "epoch": 0.9676550809498786, + "grad_norm": 1.3323607444763184, + "learning_rate": 2.7386005164283803e-08, + "loss": 0.2794, + "step": 48338 + }, + { + "epoch": 0.967675099467007, + "grad_norm": 1.076614499092102, + "learning_rate": 2.7352132052170243e-08, + "loss": 0.3371, + "step": 48339 + }, + { + "epoch": 0.9676951179841353, + "grad_norm": 0.9962447285652161, + "learning_rate": 2.7318279843998597e-08, + "loss": 0.264, + "step": 48340 + }, + { + "epoch": 0.9677151365012636, + "grad_norm": 1.267886996269226, + "learning_rate": 2.7284448539910413e-08, + "loss": 0.3287, + "step": 48341 + }, + { + "epoch": 0.967735155018392, + "grad_norm": 1.7267974615097046, + "learning_rate": 2.725063814004836e-08, + "loss": 0.7608, + "step": 48342 + }, + { + "epoch": 0.9677551735355203, + "grad_norm": 1.0776660442352295, + "learning_rate": 2.7216848644554538e-08, + "loss": 0.3381, + "step": 48343 + }, + { + "epoch": 0.9677751920526487, + "grad_norm": 1.1381027698516846, + "learning_rate": 2.7183080053571064e-08, + "loss": 0.2803, + "step": 48344 + }, + { + "epoch": 0.967795210569777, + "grad_norm": 1.2498645782470703, + "learning_rate": 2.7149332367238933e-08, + "loss": 0.2746, + "step": 48345 + }, + { + "epoch": 0.9678152290869054, + "grad_norm": 1.1547000408172607, + "learning_rate": 2.711560558570192e-08, + "loss": 0.3206, + "step": 48346 + }, + { + "epoch": 0.9678352476040337, + "grad_norm": 1.0933133363723755, + "learning_rate": 2.708189970910047e-08, + "loss": 0.3115, + "step": 48347 + }, + { + "epoch": 0.9678552661211621, + "grad_norm": 1.191101312637329, + "learning_rate": 2.7048214737576128e-08, + "loss": 0.2907, + "step": 48348 + }, + { + "epoch": 0.9678752846382904, + "grad_norm": 1.1143772602081299, + "learning_rate": 2.7014550671271013e-08, + "loss": 0.2999, + "step": 48349 + }, + { + "epoch": 0.9678953031554187, + "grad_norm": 1.1243915557861328, + "learning_rate": 2.698090751032667e-08, + "loss": 0.2948, + "step": 48350 + }, + { + "epoch": 0.9679153216725471, + "grad_norm": 1.7331944704055786, + "learning_rate": 2.694728525488466e-08, + "loss": 0.6704, + "step": 48351 + }, + { + "epoch": 0.9679353401896754, + "grad_norm": 1.1088168621063232, + "learning_rate": 2.6913683905085975e-08, + "loss": 0.3353, + "step": 48352 + }, + { + "epoch": 0.9679553587068038, + "grad_norm": 1.902943730354309, + "learning_rate": 2.6880103461071615e-08, + "loss": 0.7884, + "step": 48353 + }, + { + "epoch": 0.9679753772239321, + "grad_norm": 1.224359154701233, + "learning_rate": 2.6846543922983138e-08, + "loss": 0.3399, + "step": 48354 + }, + { + "epoch": 0.9679953957410605, + "grad_norm": 1.229252815246582, + "learning_rate": 2.6813005290961536e-08, + "loss": 0.2929, + "step": 48355 + }, + { + "epoch": 0.9680154142581888, + "grad_norm": 1.9307657480239868, + "learning_rate": 2.677948756514781e-08, + "loss": 0.697, + "step": 48356 + }, + { + "epoch": 0.9680354327753171, + "grad_norm": 2.0062053203582764, + "learning_rate": 2.674599074568296e-08, + "loss": 0.7421, + "step": 48357 + }, + { + "epoch": 0.9680554512924455, + "grad_norm": 1.0301414728164673, + "learning_rate": 2.6712514832707426e-08, + "loss": 0.3007, + "step": 48358 + }, + { + "epoch": 0.9680754698095738, + "grad_norm": 1.8847862482070923, + "learning_rate": 2.6679059826362762e-08, + "loss": 0.8266, + "step": 48359 + }, + { + "epoch": 0.9680954883267022, + "grad_norm": 1.3085273504257202, + "learning_rate": 2.664562572678886e-08, + "loss": 0.3269, + "step": 48360 + }, + { + "epoch": 0.9681155068438305, + "grad_norm": 1.198737382888794, + "learning_rate": 2.6612212534126157e-08, + "loss": 0.3506, + "step": 48361 + }, + { + "epoch": 0.9681355253609589, + "grad_norm": 1.1317381858825684, + "learning_rate": 2.6578820248515657e-08, + "loss": 0.2745, + "step": 48362 + }, + { + "epoch": 0.9681555438780872, + "grad_norm": 1.1048270463943481, + "learning_rate": 2.6545448870097245e-08, + "loss": 0.3225, + "step": 48363 + }, + { + "epoch": 0.9681755623952156, + "grad_norm": 1.8344839811325073, + "learning_rate": 2.651209839901192e-08, + "loss": 0.7347, + "step": 48364 + }, + { + "epoch": 0.9681955809123439, + "grad_norm": 1.9534317255020142, + "learning_rate": 2.6478768835399572e-08, + "loss": 0.8135, + "step": 48365 + }, + { + "epoch": 0.9682155994294722, + "grad_norm": 1.0493805408477783, + "learning_rate": 2.644546017939953e-08, + "loss": 0.3207, + "step": 48366 + }, + { + "epoch": 0.9682356179466006, + "grad_norm": 1.0180259943008423, + "learning_rate": 2.6412172431152794e-08, + "loss": 0.2568, + "step": 48367 + }, + { + "epoch": 0.9682556364637289, + "grad_norm": 2.075906276702881, + "learning_rate": 2.6378905590799254e-08, + "loss": 0.3219, + "step": 48368 + }, + { + "epoch": 0.9682756549808573, + "grad_norm": 1.0879358053207397, + "learning_rate": 2.6345659658477686e-08, + "loss": 0.2869, + "step": 48369 + }, + { + "epoch": 0.9682956734979856, + "grad_norm": 1.2161831855773926, + "learning_rate": 2.6312434634329088e-08, + "loss": 0.2504, + "step": 48370 + }, + { + "epoch": 0.968315692015114, + "grad_norm": 1.58950674533844, + "learning_rate": 2.6279230518492793e-08, + "loss": 0.2868, + "step": 48371 + }, + { + "epoch": 0.9683357105322423, + "grad_norm": 1.1542330980300903, + "learning_rate": 2.6246047311108135e-08, + "loss": 0.32, + "step": 48372 + }, + { + "epoch": 0.9683557290493706, + "grad_norm": 1.0174436569213867, + "learning_rate": 2.6212885012314447e-08, + "loss": 0.3037, + "step": 48373 + }, + { + "epoch": 0.968375747566499, + "grad_norm": 1.0268208980560303, + "learning_rate": 2.617974362225162e-08, + "loss": 0.2947, + "step": 48374 + }, + { + "epoch": 0.9683957660836273, + "grad_norm": 1.2227188348770142, + "learning_rate": 2.614662314105898e-08, + "loss": 0.3456, + "step": 48375 + }, + { + "epoch": 0.9684157846007557, + "grad_norm": 1.184281349182129, + "learning_rate": 2.611352356887531e-08, + "loss": 0.3182, + "step": 48376 + }, + { + "epoch": 0.968435803117884, + "grad_norm": 1.054263949394226, + "learning_rate": 2.6080444905839942e-08, + "loss": 0.2845, + "step": 48377 + }, + { + "epoch": 0.9684558216350124, + "grad_norm": 1.0861608982086182, + "learning_rate": 2.6047387152092206e-08, + "loss": 0.2621, + "step": 48378 + }, + { + "epoch": 0.9684758401521407, + "grad_norm": 1.0981484651565552, + "learning_rate": 2.6014350307770887e-08, + "loss": 0.2633, + "step": 48379 + }, + { + "epoch": 0.9684958586692691, + "grad_norm": 1.9103188514709473, + "learning_rate": 2.5981334373014755e-08, + "loss": 0.8065, + "step": 48380 + }, + { + "epoch": 0.9685158771863974, + "grad_norm": 1.1201125383377075, + "learning_rate": 2.594833934796259e-08, + "loss": 0.306, + "step": 48381 + }, + { + "epoch": 0.9685358957035257, + "grad_norm": 1.1337090730667114, + "learning_rate": 2.5915365232753177e-08, + "loss": 0.2712, + "step": 48382 + }, + { + "epoch": 0.9685559142206541, + "grad_norm": 1.0666383504867554, + "learning_rate": 2.5882412027525284e-08, + "loss": 0.2552, + "step": 48383 + }, + { + "epoch": 0.9685759327377824, + "grad_norm": 1.1715160608291626, + "learning_rate": 2.5849479732417137e-08, + "loss": 0.288, + "step": 48384 + }, + { + "epoch": 0.9685959512549108, + "grad_norm": 1.013267159461975, + "learning_rate": 2.581656834756696e-08, + "loss": 0.2818, + "step": 48385 + }, + { + "epoch": 0.9686159697720391, + "grad_norm": 1.146870493888855, + "learning_rate": 2.578367787311409e-08, + "loss": 0.2846, + "step": 48386 + }, + { + "epoch": 0.9686359882891675, + "grad_norm": 1.0985348224639893, + "learning_rate": 2.5750808309196184e-08, + "loss": 0.3066, + "step": 48387 + }, + { + "epoch": 0.9686560068062958, + "grad_norm": 1.0791807174682617, + "learning_rate": 2.571795965595092e-08, + "loss": 0.3174, + "step": 48388 + }, + { + "epoch": 0.9686760253234241, + "grad_norm": 1.0345172882080078, + "learning_rate": 2.5685131913517625e-08, + "loss": 0.3021, + "step": 48389 + }, + { + "epoch": 0.9686960438405525, + "grad_norm": 1.217299222946167, + "learning_rate": 2.565232508203286e-08, + "loss": 0.251, + "step": 48390 + }, + { + "epoch": 0.9687160623576808, + "grad_norm": 1.08676016330719, + "learning_rate": 2.56195391616354e-08, + "loss": 0.2876, + "step": 48391 + }, + { + "epoch": 0.9687360808748092, + "grad_norm": 0.9875795841217041, + "learning_rate": 2.5586774152462913e-08, + "loss": 0.2374, + "step": 48392 + }, + { + "epoch": 0.9687560993919375, + "grad_norm": 1.768169641494751, + "learning_rate": 2.555403005465307e-08, + "loss": 0.729, + "step": 48393 + }, + { + "epoch": 0.9687761179090659, + "grad_norm": 1.8809669017791748, + "learning_rate": 2.552130686834353e-08, + "loss": 0.7031, + "step": 48394 + }, + { + "epoch": 0.9687961364261942, + "grad_norm": 1.22110116481781, + "learning_rate": 2.5488604593671973e-08, + "loss": 0.2834, + "step": 48395 + }, + { + "epoch": 0.9688161549433226, + "grad_norm": 1.2951637506484985, + "learning_rate": 2.5455923230776057e-08, + "loss": 0.2821, + "step": 48396 + }, + { + "epoch": 0.9688361734604509, + "grad_norm": 1.12847900390625, + "learning_rate": 2.5423262779792345e-08, + "loss": 0.2683, + "step": 48397 + }, + { + "epoch": 0.9688561919775792, + "grad_norm": 1.0920811891555786, + "learning_rate": 2.5390623240859057e-08, + "loss": 0.3034, + "step": 48398 + }, + { + "epoch": 0.9688762104947076, + "grad_norm": 1.0786579847335815, + "learning_rate": 2.535800461411331e-08, + "loss": 0.269, + "step": 48399 + }, + { + "epoch": 0.9688962290118359, + "grad_norm": 1.2978938817977905, + "learning_rate": 2.5325406899691653e-08, + "loss": 0.3038, + "step": 48400 + }, + { + "epoch": 0.9689162475289643, + "grad_norm": 1.1798378229141235, + "learning_rate": 2.5292830097731203e-08, + "loss": 0.2756, + "step": 48401 + }, + { + "epoch": 0.9689362660460926, + "grad_norm": 1.225642442703247, + "learning_rate": 2.526027420836963e-08, + "loss": 0.3094, + "step": 48402 + }, + { + "epoch": 0.968956284563221, + "grad_norm": 1.1836227178573608, + "learning_rate": 2.5227739231742932e-08, + "loss": 0.3128, + "step": 48403 + }, + { + "epoch": 0.9689763030803493, + "grad_norm": 1.0202065706253052, + "learning_rate": 2.5195225167988223e-08, + "loss": 0.2877, + "step": 48404 + }, + { + "epoch": 0.9689963215974776, + "grad_norm": 1.0973782539367676, + "learning_rate": 2.516273201724262e-08, + "loss": 0.2987, + "step": 48405 + }, + { + "epoch": 0.969016340114606, + "grad_norm": 1.136085867881775, + "learning_rate": 2.5130259779641564e-08, + "loss": 0.296, + "step": 48406 + }, + { + "epoch": 0.9690363586317343, + "grad_norm": 1.9444572925567627, + "learning_rate": 2.5097808455322725e-08, + "loss": 0.7079, + "step": 48407 + }, + { + "epoch": 0.9690563771488627, + "grad_norm": 1.090529203414917, + "learning_rate": 2.5065378044422105e-08, + "loss": 0.3304, + "step": 48408 + }, + { + "epoch": 0.969076395665991, + "grad_norm": 1.1562649011611938, + "learning_rate": 2.503296854707571e-08, + "loss": 0.2939, + "step": 48409 + }, + { + "epoch": 0.9690964141831194, + "grad_norm": 1.1079928874969482, + "learning_rate": 2.5000579963420646e-08, + "loss": 0.2783, + "step": 48410 + }, + { + "epoch": 0.9691164327002477, + "grad_norm": 1.9010562896728516, + "learning_rate": 2.4968212293592363e-08, + "loss": 0.7389, + "step": 48411 + }, + { + "epoch": 0.9691364512173761, + "grad_norm": 1.1462773084640503, + "learning_rate": 2.4935865537726868e-08, + "loss": 0.3048, + "step": 48412 + }, + { + "epoch": 0.9691564697345044, + "grad_norm": 1.2220152616500854, + "learning_rate": 2.4903539695960155e-08, + "loss": 0.3034, + "step": 48413 + }, + { + "epoch": 0.9691764882516327, + "grad_norm": 1.1574194431304932, + "learning_rate": 2.4871234768428786e-08, + "loss": 0.2731, + "step": 48414 + }, + { + "epoch": 0.9691965067687611, + "grad_norm": 1.076589584350586, + "learning_rate": 2.483895075526821e-08, + "loss": 0.2529, + "step": 48415 + }, + { + "epoch": 0.9692165252858894, + "grad_norm": 1.5634719133377075, + "learning_rate": 2.4806687656613315e-08, + "loss": 0.2556, + "step": 48416 + }, + { + "epoch": 0.9692365438030178, + "grad_norm": 1.0943267345428467, + "learning_rate": 2.477444547260066e-08, + "loss": 0.2506, + "step": 48417 + }, + { + "epoch": 0.9692565623201461, + "grad_norm": 1.0633625984191895, + "learning_rate": 2.4742224203365695e-08, + "loss": 0.2853, + "step": 48418 + }, + { + "epoch": 0.9692765808372745, + "grad_norm": 1.213840126991272, + "learning_rate": 2.471002384904386e-08, + "loss": 0.3033, + "step": 48419 + }, + { + "epoch": 0.9692965993544028, + "grad_norm": 1.0972480773925781, + "learning_rate": 2.4677844409770056e-08, + "loss": 0.291, + "step": 48420 + }, + { + "epoch": 0.9693166178715311, + "grad_norm": 1.1199450492858887, + "learning_rate": 2.464568588568028e-08, + "loss": 0.3343, + "step": 48421 + }, + { + "epoch": 0.9693366363886595, + "grad_norm": 2.076976776123047, + "learning_rate": 2.461354827690887e-08, + "loss": 0.7511, + "step": 48422 + }, + { + "epoch": 0.9693566549057878, + "grad_norm": 1.0735251903533936, + "learning_rate": 2.458143158359183e-08, + "loss": 0.2586, + "step": 48423 + }, + { + "epoch": 0.9693766734229162, + "grad_norm": 1.2499275207519531, + "learning_rate": 2.4549335805863494e-08, + "loss": 0.2582, + "step": 48424 + }, + { + "epoch": 0.9693966919400445, + "grad_norm": 1.0805163383483887, + "learning_rate": 2.4517260943859312e-08, + "loss": 0.2866, + "step": 48425 + }, + { + "epoch": 0.9694167104571729, + "grad_norm": 1.126056432723999, + "learning_rate": 2.4485206997713617e-08, + "loss": 0.2998, + "step": 48426 + }, + { + "epoch": 0.9694367289743012, + "grad_norm": 1.0754971504211426, + "learning_rate": 2.4453173967561305e-08, + "loss": 0.3109, + "step": 48427 + }, + { + "epoch": 0.9694567474914296, + "grad_norm": 1.1600139141082764, + "learning_rate": 2.4421161853537266e-08, + "loss": 0.3071, + "step": 48428 + }, + { + "epoch": 0.9694767660085579, + "grad_norm": 1.0814480781555176, + "learning_rate": 2.438917065577584e-08, + "loss": 0.3145, + "step": 48429 + }, + { + "epoch": 0.9694967845256862, + "grad_norm": 1.1410549879074097, + "learning_rate": 2.4357200374411362e-08, + "loss": 0.2976, + "step": 48430 + }, + { + "epoch": 0.9695168030428146, + "grad_norm": 1.2636085748672485, + "learning_rate": 2.432525100957872e-08, + "loss": 0.2982, + "step": 48431 + }, + { + "epoch": 0.9695368215599429, + "grad_norm": 1.0820318460464478, + "learning_rate": 2.42933225614117e-08, + "loss": 0.3057, + "step": 48432 + }, + { + "epoch": 0.9695568400770713, + "grad_norm": 1.2178027629852295, + "learning_rate": 2.4261415030044644e-08, + "loss": 0.273, + "step": 48433 + }, + { + "epoch": 0.9695768585941996, + "grad_norm": 1.1047513484954834, + "learning_rate": 2.4229528415612436e-08, + "loss": 0.2845, + "step": 48434 + }, + { + "epoch": 0.969596877111328, + "grad_norm": 1.051742672920227, + "learning_rate": 2.419766271824775e-08, + "loss": 0.3217, + "step": 48435 + }, + { + "epoch": 0.9696168956284563, + "grad_norm": 1.0879573822021484, + "learning_rate": 2.4165817938086035e-08, + "loss": 0.2731, + "step": 48436 + }, + { + "epoch": 0.9696369141455846, + "grad_norm": 1.259476900100708, + "learning_rate": 2.413399407525996e-08, + "loss": 0.3076, + "step": 48437 + }, + { + "epoch": 0.969656932662713, + "grad_norm": 1.9734982252120972, + "learning_rate": 2.4102191129903862e-08, + "loss": 0.7522, + "step": 48438 + }, + { + "epoch": 0.9696769511798413, + "grad_norm": 1.1353988647460938, + "learning_rate": 2.407040910215097e-08, + "loss": 0.3124, + "step": 48439 + }, + { + "epoch": 0.9696969696969697, + "grad_norm": 1.1703170537948608, + "learning_rate": 2.403864799213562e-08, + "loss": 0.3137, + "step": 48440 + }, + { + "epoch": 0.969716988214098, + "grad_norm": 1.2924083471298218, + "learning_rate": 2.400690779999104e-08, + "loss": 0.3238, + "step": 48441 + }, + { + "epoch": 0.9697370067312264, + "grad_norm": 1.0423451662063599, + "learning_rate": 2.3975188525850456e-08, + "loss": 0.2625, + "step": 48442 + }, + { + "epoch": 0.9697570252483547, + "grad_norm": 1.0714198350906372, + "learning_rate": 2.394349016984765e-08, + "loss": 0.2718, + "step": 48443 + }, + { + "epoch": 0.9697770437654831, + "grad_norm": 1.0643588304519653, + "learning_rate": 2.391181273211529e-08, + "loss": 0.3059, + "step": 48444 + }, + { + "epoch": 0.9697970622826114, + "grad_norm": 1.104183316230774, + "learning_rate": 2.388015621278661e-08, + "loss": 0.2699, + "step": 48445 + }, + { + "epoch": 0.9698170807997397, + "grad_norm": 1.3231607675552368, + "learning_rate": 2.384852061199483e-08, + "loss": 0.3212, + "step": 48446 + }, + { + "epoch": 0.9698370993168681, + "grad_norm": 0.9915632605552673, + "learning_rate": 2.381690592987318e-08, + "loss": 0.2429, + "step": 48447 + }, + { + "epoch": 0.9698571178339964, + "grad_norm": 1.0224937200546265, + "learning_rate": 2.378531216655433e-08, + "loss": 0.2798, + "step": 48448 + }, + { + "epoch": 0.9698771363511248, + "grad_norm": 1.140443205833435, + "learning_rate": 2.3753739322170955e-08, + "loss": 0.3241, + "step": 48449 + }, + { + "epoch": 0.9698971548682531, + "grad_norm": 1.142723560333252, + "learning_rate": 2.3722187396856276e-08, + "loss": 0.3266, + "step": 48450 + }, + { + "epoch": 0.9699171733853815, + "grad_norm": 0.9906192421913147, + "learning_rate": 2.3690656390742416e-08, + "loss": 0.2593, + "step": 48451 + }, + { + "epoch": 0.9699371919025098, + "grad_norm": 1.0598920583724976, + "learning_rate": 2.3659146303962045e-08, + "loss": 0.295, + "step": 48452 + }, + { + "epoch": 0.9699572104196381, + "grad_norm": 1.3463494777679443, + "learning_rate": 2.3627657136648386e-08, + "loss": 0.3325, + "step": 48453 + }, + { + "epoch": 0.9699772289367665, + "grad_norm": 1.1505024433135986, + "learning_rate": 2.3596188888932446e-08, + "loss": 0.2615, + "step": 48454 + }, + { + "epoch": 0.9699972474538948, + "grad_norm": 1.129586935043335, + "learning_rate": 2.3564741560947457e-08, + "loss": 0.2645, + "step": 48455 + }, + { + "epoch": 0.9700172659710232, + "grad_norm": 1.1355712413787842, + "learning_rate": 2.3533315152824977e-08, + "loss": 0.2902, + "step": 48456 + }, + { + "epoch": 0.9700372844881515, + "grad_norm": 1.0104148387908936, + "learning_rate": 2.3501909664697675e-08, + "loss": 0.2898, + "step": 48457 + }, + { + "epoch": 0.9700573030052799, + "grad_norm": 1.02992582321167, + "learning_rate": 2.347052509669767e-08, + "loss": 0.3009, + "step": 48458 + }, + { + "epoch": 0.9700773215224082, + "grad_norm": 1.1412484645843506, + "learning_rate": 2.3439161448956527e-08, + "loss": 0.3277, + "step": 48459 + }, + { + "epoch": 0.9700973400395366, + "grad_norm": 1.0687365531921387, + "learning_rate": 2.3407818721605802e-08, + "loss": 0.2782, + "step": 48460 + }, + { + "epoch": 0.9701173585566649, + "grad_norm": 1.2025219202041626, + "learning_rate": 2.337649691477817e-08, + "loss": 0.3397, + "step": 48461 + }, + { + "epoch": 0.9701373770737932, + "grad_norm": 1.1713370084762573, + "learning_rate": 2.3345196028604635e-08, + "loss": 0.3092, + "step": 48462 + }, + { + "epoch": 0.9701573955909216, + "grad_norm": 1.1034537553787231, + "learning_rate": 2.3313916063216758e-08, + "loss": 0.3018, + "step": 48463 + }, + { + "epoch": 0.9701774141080499, + "grad_norm": 1.2569615840911865, + "learning_rate": 2.3282657018746104e-08, + "loss": 0.3034, + "step": 48464 + }, + { + "epoch": 0.9701974326251783, + "grad_norm": 2.009946346282959, + "learning_rate": 2.325141889532423e-08, + "loss": 0.7127, + "step": 48465 + }, + { + "epoch": 0.9702174511423066, + "grad_norm": 1.2855027914047241, + "learning_rate": 2.3220201693082145e-08, + "loss": 0.3339, + "step": 48466 + }, + { + "epoch": 0.970237469659435, + "grad_norm": 1.078134536743164, + "learning_rate": 2.3189005412151967e-08, + "loss": 0.314, + "step": 48467 + }, + { + "epoch": 0.9702574881765633, + "grad_norm": 1.068562388420105, + "learning_rate": 2.315783005266359e-08, + "loss": 0.33, + "step": 48468 + }, + { + "epoch": 0.9702775066936916, + "grad_norm": 1.1317532062530518, + "learning_rate": 2.3126675614749128e-08, + "loss": 0.2814, + "step": 48469 + }, + { + "epoch": 0.97029752521082, + "grad_norm": 1.2153578996658325, + "learning_rate": 2.3095542098538483e-08, + "loss": 0.286, + "step": 48470 + }, + { + "epoch": 0.9703175437279483, + "grad_norm": 1.124259352684021, + "learning_rate": 2.3064429504163766e-08, + "loss": 0.2731, + "step": 48471 + }, + { + "epoch": 0.9703375622450767, + "grad_norm": 1.1558046340942383, + "learning_rate": 2.3033337831754875e-08, + "loss": 0.3358, + "step": 48472 + }, + { + "epoch": 0.970357580762205, + "grad_norm": 1.1334326267242432, + "learning_rate": 2.300226708144282e-08, + "loss": 0.2833, + "step": 48473 + }, + { + "epoch": 0.9703775992793334, + "grad_norm": 1.081502914428711, + "learning_rate": 2.2971217253358047e-08, + "loss": 0.3112, + "step": 48474 + }, + { + "epoch": 0.9703976177964617, + "grad_norm": 1.060191035270691, + "learning_rate": 2.294018834763101e-08, + "loss": 0.2842, + "step": 48475 + }, + { + "epoch": 0.9704176363135901, + "grad_norm": 1.1386855840682983, + "learning_rate": 2.2909180364392715e-08, + "loss": 0.2941, + "step": 48476 + }, + { + "epoch": 0.9704376548307184, + "grad_norm": 1.1418359279632568, + "learning_rate": 2.2878193303773054e-08, + "loss": 0.3115, + "step": 48477 + }, + { + "epoch": 0.9704576733478467, + "grad_norm": 1.2042102813720703, + "learning_rate": 2.2847227165902486e-08, + "loss": 0.3102, + "step": 48478 + }, + { + "epoch": 0.9704776918649751, + "grad_norm": 1.049302101135254, + "learning_rate": 2.28162819509109e-08, + "loss": 0.2696, + "step": 48479 + }, + { + "epoch": 0.9704977103821034, + "grad_norm": 1.1710277795791626, + "learning_rate": 2.2785357658928754e-08, + "loss": 0.3125, + "step": 48480 + }, + { + "epoch": 0.9705177288992318, + "grad_norm": 1.0991662740707397, + "learning_rate": 2.2754454290085937e-08, + "loss": 0.2987, + "step": 48481 + }, + { + "epoch": 0.9705377474163601, + "grad_norm": 1.0124316215515137, + "learning_rate": 2.2723571844511794e-08, + "loss": 0.262, + "step": 48482 + }, + { + "epoch": 0.9705577659334885, + "grad_norm": 1.159606695175171, + "learning_rate": 2.2692710322336774e-08, + "loss": 0.3033, + "step": 48483 + }, + { + "epoch": 0.9705777844506168, + "grad_norm": 1.0344599485397339, + "learning_rate": 2.2661869723690778e-08, + "loss": 0.2969, + "step": 48484 + }, + { + "epoch": 0.9705978029677451, + "grad_norm": 1.1252909898757935, + "learning_rate": 2.263105004870314e-08, + "loss": 0.31, + "step": 48485 + }, + { + "epoch": 0.9706178214848735, + "grad_norm": 1.0237635374069214, + "learning_rate": 2.2600251297503208e-08, + "loss": 0.3021, + "step": 48486 + }, + { + "epoch": 0.9706378400020018, + "grad_norm": 1.0926254987716675, + "learning_rate": 2.2569473470220316e-08, + "loss": 0.2594, + "step": 48487 + }, + { + "epoch": 0.9706578585191302, + "grad_norm": 1.0576330423355103, + "learning_rate": 2.253871656698492e-08, + "loss": 0.2665, + "step": 48488 + }, + { + "epoch": 0.9706778770362585, + "grad_norm": 1.2419131994247437, + "learning_rate": 2.250798058792525e-08, + "loss": 0.2764, + "step": 48489 + }, + { + "epoch": 0.9706978955533869, + "grad_norm": 1.0577797889709473, + "learning_rate": 2.24772655331712e-08, + "loss": 0.3432, + "step": 48490 + }, + { + "epoch": 0.9707179140705152, + "grad_norm": 1.0881389379501343, + "learning_rate": 2.2446571402851002e-08, + "loss": 0.3159, + "step": 48491 + }, + { + "epoch": 0.9707379325876435, + "grad_norm": 1.058932900428772, + "learning_rate": 2.2415898197094556e-08, + "loss": 0.3075, + "step": 48492 + }, + { + "epoch": 0.9707579511047719, + "grad_norm": 1.7523977756500244, + "learning_rate": 2.238524591603064e-08, + "loss": 0.7788, + "step": 48493 + }, + { + "epoch": 0.9707779696219002, + "grad_norm": 0.9795716404914856, + "learning_rate": 2.235461455978749e-08, + "loss": 0.2315, + "step": 48494 + }, + { + "epoch": 0.9707979881390286, + "grad_norm": 1.211500644683838, + "learning_rate": 2.2324004128495004e-08, + "loss": 0.2761, + "step": 48495 + }, + { + "epoch": 0.9708180066561569, + "grad_norm": 1.1876239776611328, + "learning_rate": 2.2293414622280295e-08, + "loss": 0.339, + "step": 48496 + }, + { + "epoch": 0.9708380251732853, + "grad_norm": 1.3468809127807617, + "learning_rate": 2.2262846041273268e-08, + "loss": 0.3181, + "step": 48497 + }, + { + "epoch": 0.9708580436904136, + "grad_norm": 1.1296461820602417, + "learning_rate": 2.2232298385602147e-08, + "loss": 0.2828, + "step": 48498 + }, + { + "epoch": 0.970878062207542, + "grad_norm": 1.1254184246063232, + "learning_rate": 2.2201771655395166e-08, + "loss": 0.31, + "step": 48499 + }, + { + "epoch": 0.9708980807246703, + "grad_norm": 1.086694598197937, + "learning_rate": 2.2171265850780554e-08, + "loss": 0.3304, + "step": 48500 + }, + { + "epoch": 0.9709180992417986, + "grad_norm": 1.0841176509857178, + "learning_rate": 2.2140780971887098e-08, + "loss": 0.2849, + "step": 48501 + }, + { + "epoch": 0.970938117758927, + "grad_norm": 1.2089749574661255, + "learning_rate": 2.2110317018842475e-08, + "loss": 0.293, + "step": 48502 + }, + { + "epoch": 0.9709581362760553, + "grad_norm": 1.2674959897994995, + "learning_rate": 2.207987399177436e-08, + "loss": 0.2863, + "step": 48503 + }, + { + "epoch": 0.9709781547931837, + "grad_norm": 1.3127351999282837, + "learning_rate": 2.2049451890811537e-08, + "loss": 0.2537, + "step": 48504 + }, + { + "epoch": 0.970998173310312, + "grad_norm": 1.028056025505066, + "learning_rate": 2.201905071608168e-08, + "loss": 0.2592, + "step": 48505 + }, + { + "epoch": 0.9710181918274404, + "grad_norm": 1.1087607145309448, + "learning_rate": 2.1988670467711913e-08, + "loss": 0.306, + "step": 48506 + }, + { + "epoch": 0.9710382103445687, + "grad_norm": 1.2076728343963623, + "learning_rate": 2.1958311145831023e-08, + "loss": 0.2445, + "step": 48507 + }, + { + "epoch": 0.971058228861697, + "grad_norm": 1.4517213106155396, + "learning_rate": 2.192797275056613e-08, + "loss": 0.2992, + "step": 48508 + }, + { + "epoch": 0.9710782473788254, + "grad_norm": 1.2070273160934448, + "learning_rate": 2.189765528204435e-08, + "loss": 0.26, + "step": 48509 + }, + { + "epoch": 0.9710982658959537, + "grad_norm": 1.0919674634933472, + "learning_rate": 2.1867358740393363e-08, + "loss": 0.2836, + "step": 48510 + }, + { + "epoch": 0.9711182844130821, + "grad_norm": 1.1159700155258179, + "learning_rate": 2.1837083125741398e-08, + "loss": 0.3167, + "step": 48511 + }, + { + "epoch": 0.9711383029302104, + "grad_norm": 1.1146361827850342, + "learning_rate": 2.1806828438215023e-08, + "loss": 0.263, + "step": 48512 + }, + { + "epoch": 0.9711583214473388, + "grad_norm": 1.8830691576004028, + "learning_rate": 2.17765946779408e-08, + "loss": 0.806, + "step": 48513 + }, + { + "epoch": 0.9711783399644671, + "grad_norm": 1.1005706787109375, + "learning_rate": 2.1746381845046404e-08, + "loss": 0.3288, + "step": 48514 + }, + { + "epoch": 0.9711983584815955, + "grad_norm": 2.045367956161499, + "learning_rate": 2.1716189939659515e-08, + "loss": 0.7119, + "step": 48515 + }, + { + "epoch": 0.9712183769987238, + "grad_norm": 1.088970422744751, + "learning_rate": 2.168601896190614e-08, + "loss": 0.2724, + "step": 48516 + }, + { + "epoch": 0.9712383955158521, + "grad_norm": 1.0901669263839722, + "learning_rate": 2.16558689119134e-08, + "loss": 0.2956, + "step": 48517 + }, + { + "epoch": 0.9712584140329805, + "grad_norm": 1.0881752967834473, + "learning_rate": 2.1625739789807865e-08, + "loss": 0.2855, + "step": 48518 + }, + { + "epoch": 0.9712784325501088, + "grad_norm": 1.1217520236968994, + "learning_rate": 2.159563159571665e-08, + "loss": 0.2903, + "step": 48519 + }, + { + "epoch": 0.9712984510672372, + "grad_norm": 1.133615255355835, + "learning_rate": 2.156554432976632e-08, + "loss": 0.3575, + "step": 48520 + }, + { + "epoch": 0.9713184695843655, + "grad_norm": 1.1834421157836914, + "learning_rate": 2.153547799208233e-08, + "loss": 0.2768, + "step": 48521 + }, + { + "epoch": 0.9713384881014939, + "grad_norm": 1.2608718872070312, + "learning_rate": 2.1505432582792362e-08, + "loss": 0.3457, + "step": 48522 + }, + { + "epoch": 0.9713585066186222, + "grad_norm": 1.22684907913208, + "learning_rate": 2.147540810202131e-08, + "loss": 0.3176, + "step": 48523 + }, + { + "epoch": 0.9713785251357505, + "grad_norm": 1.1249847412109375, + "learning_rate": 2.144540454989741e-08, + "loss": 0.2729, + "step": 48524 + }, + { + "epoch": 0.9713985436528789, + "grad_norm": 1.0461808443069458, + "learning_rate": 2.1415421926545e-08, + "loss": 0.307, + "step": 48525 + }, + { + "epoch": 0.9714185621700072, + "grad_norm": 1.15355384349823, + "learning_rate": 2.1385460232090648e-08, + "loss": 0.2563, + "step": 48526 + }, + { + "epoch": 0.9714385806871356, + "grad_norm": 1.0809097290039062, + "learning_rate": 2.1355519466660922e-08, + "loss": 0.2763, + "step": 48527 + }, + { + "epoch": 0.9714585992042639, + "grad_norm": 0.9327124357223511, + "learning_rate": 2.1325599630380723e-08, + "loss": 0.2434, + "step": 48528 + }, + { + "epoch": 0.9714786177213923, + "grad_norm": 1.258650779724121, + "learning_rate": 2.129570072337661e-08, + "loss": 0.2743, + "step": 48529 + }, + { + "epoch": 0.9714986362385206, + "grad_norm": 1.0449585914611816, + "learning_rate": 2.126582274577349e-08, + "loss": 0.3126, + "step": 48530 + }, + { + "epoch": 0.971518654755649, + "grad_norm": 1.0317143201828003, + "learning_rate": 2.1235965697697923e-08, + "loss": 0.2716, + "step": 48531 + }, + { + "epoch": 0.9715386732727773, + "grad_norm": 1.0420902967453003, + "learning_rate": 2.1206129579274814e-08, + "loss": 0.3013, + "step": 48532 + }, + { + "epoch": 0.9715586917899056, + "grad_norm": 1.132095456123352, + "learning_rate": 2.1176314390629617e-08, + "loss": 0.3252, + "step": 48533 + }, + { + "epoch": 0.971578710307034, + "grad_norm": 0.9407181143760681, + "learning_rate": 2.114652013188778e-08, + "loss": 0.2396, + "step": 48534 + }, + { + "epoch": 0.9715987288241623, + "grad_norm": 1.0864547491073608, + "learning_rate": 2.1116746803174215e-08, + "loss": 0.2956, + "step": 48535 + }, + { + "epoch": 0.9716187473412907, + "grad_norm": 1.0340036153793335, + "learning_rate": 2.1086994404614368e-08, + "loss": 0.2811, + "step": 48536 + }, + { + "epoch": 0.971638765858419, + "grad_norm": 1.1078327894210815, + "learning_rate": 2.10572629363337e-08, + "loss": 0.2697, + "step": 48537 + }, + { + "epoch": 0.9716587843755474, + "grad_norm": 1.1159452199935913, + "learning_rate": 2.102755239845655e-08, + "loss": 0.3048, + "step": 48538 + }, + { + "epoch": 0.9716788028926757, + "grad_norm": 1.080417275428772, + "learning_rate": 2.099786279110838e-08, + "loss": 0.2896, + "step": 48539 + }, + { + "epoch": 0.971698821409804, + "grad_norm": 1.3037283420562744, + "learning_rate": 2.0968194114412977e-08, + "loss": 0.3248, + "step": 48540 + }, + { + "epoch": 0.9717188399269324, + "grad_norm": 1.1816198825836182, + "learning_rate": 2.0938546368496903e-08, + "loss": 0.3288, + "step": 48541 + }, + { + "epoch": 0.9717388584440607, + "grad_norm": 1.1378709077835083, + "learning_rate": 2.0908919553482843e-08, + "loss": 0.293, + "step": 48542 + }, + { + "epoch": 0.9717588769611891, + "grad_norm": 1.9119086265563965, + "learning_rate": 2.087931366949625e-08, + "loss": 0.7088, + "step": 48543 + }, + { + "epoch": 0.9717788954783174, + "grad_norm": 1.2683583498001099, + "learning_rate": 2.084972871666202e-08, + "loss": 0.3237, + "step": 48544 + }, + { + "epoch": 0.9717989139954458, + "grad_norm": 1.0920542478561401, + "learning_rate": 2.0820164695103397e-08, + "loss": 0.2389, + "step": 48545 + }, + { + "epoch": 0.9718189325125741, + "grad_norm": 1.0312775373458862, + "learning_rate": 2.0790621604945828e-08, + "loss": 0.2987, + "step": 48546 + }, + { + "epoch": 0.9718389510297025, + "grad_norm": 2.219008684158325, + "learning_rate": 2.076109944631255e-08, + "loss": 0.7303, + "step": 48547 + }, + { + "epoch": 0.9718589695468308, + "grad_norm": 1.0854225158691406, + "learning_rate": 2.073159821932791e-08, + "loss": 0.3032, + "step": 48548 + }, + { + "epoch": 0.9718789880639591, + "grad_norm": 1.0468666553497314, + "learning_rate": 2.070211792411625e-08, + "loss": 0.2771, + "step": 48549 + }, + { + "epoch": 0.9718990065810875, + "grad_norm": 1.0986987352371216, + "learning_rate": 2.0672658560801363e-08, + "loss": 0.2402, + "step": 48550 + }, + { + "epoch": 0.9719190250982158, + "grad_norm": 1.3196876049041748, + "learning_rate": 2.064322012950759e-08, + "loss": 0.3119, + "step": 48551 + }, + { + "epoch": 0.9719390436153442, + "grad_norm": 1.027937650680542, + "learning_rate": 2.0613802630357616e-08, + "loss": 0.308, + "step": 48552 + }, + { + "epoch": 0.9719590621324725, + "grad_norm": 1.0935360193252563, + "learning_rate": 2.0584406063475228e-08, + "loss": 0.2661, + "step": 48553 + }, + { + "epoch": 0.9719790806496009, + "grad_norm": 1.1602703332901, + "learning_rate": 2.0555030428985323e-08, + "loss": 0.2944, + "step": 48554 + }, + { + "epoch": 0.9719990991667292, + "grad_norm": 1.1713656187057495, + "learning_rate": 2.052567572701003e-08, + "loss": 0.29, + "step": 48555 + }, + { + "epoch": 0.9720191176838575, + "grad_norm": 1.2147244215011597, + "learning_rate": 2.049634195767314e-08, + "loss": 0.3471, + "step": 48556 + }, + { + "epoch": 0.9720391362009859, + "grad_norm": 1.132551670074463, + "learning_rate": 2.0467029121098438e-08, + "loss": 0.2723, + "step": 48557 + }, + { + "epoch": 0.9720591547181142, + "grad_norm": 1.1849756240844727, + "learning_rate": 2.043773721740805e-08, + "loss": 0.3722, + "step": 48558 + }, + { + "epoch": 0.9720791732352426, + "grad_norm": 1.1879080533981323, + "learning_rate": 2.040846624672632e-08, + "loss": 0.3111, + "step": 48559 + }, + { + "epoch": 0.9720991917523709, + "grad_norm": 1.0802959203720093, + "learning_rate": 2.0379216209175378e-08, + "loss": 0.2927, + "step": 48560 + }, + { + "epoch": 0.9721192102694993, + "grad_norm": 1.2149581909179688, + "learning_rate": 2.034998710487901e-08, + "loss": 0.2756, + "step": 48561 + }, + { + "epoch": 0.9721392287866276, + "grad_norm": 1.2160981893539429, + "learning_rate": 2.032077893395934e-08, + "loss": 0.3231, + "step": 48562 + }, + { + "epoch": 0.972159247303756, + "grad_norm": 2.017564296722412, + "learning_rate": 2.0291591696539604e-08, + "loss": 0.7295, + "step": 48563 + }, + { + "epoch": 0.9721792658208843, + "grad_norm": 1.2241687774658203, + "learning_rate": 2.026242539274248e-08, + "loss": 0.3232, + "step": 48564 + }, + { + "epoch": 0.9721992843380126, + "grad_norm": 1.200676441192627, + "learning_rate": 2.0233280022690093e-08, + "loss": 0.2828, + "step": 48565 + }, + { + "epoch": 0.972219302855141, + "grad_norm": 1.1052583456039429, + "learning_rate": 2.020415558650568e-08, + "loss": 0.2834, + "step": 48566 + }, + { + "epoch": 0.9722393213722693, + "grad_norm": 1.0447814464569092, + "learning_rate": 2.0175052084311364e-08, + "loss": 0.2924, + "step": 48567 + }, + { + "epoch": 0.9722593398893977, + "grad_norm": 1.2772581577301025, + "learning_rate": 2.014596951622927e-08, + "loss": 0.3083, + "step": 48568 + }, + { + "epoch": 0.972279358406526, + "grad_norm": 1.043911337852478, + "learning_rate": 2.0116907882382075e-08, + "loss": 0.2437, + "step": 48569 + }, + { + "epoch": 0.9722993769236544, + "grad_norm": 0.9867713451385498, + "learning_rate": 2.0087867182891352e-08, + "loss": 0.2518, + "step": 48570 + }, + { + "epoch": 0.9723193954407827, + "grad_norm": 1.1694378852844238, + "learning_rate": 2.005884741787978e-08, + "loss": 0.2701, + "step": 48571 + }, + { + "epoch": 0.972339413957911, + "grad_norm": 1.254332184791565, + "learning_rate": 2.002984858746948e-08, + "loss": 0.2778, + "step": 48572 + }, + { + "epoch": 0.9723594324750394, + "grad_norm": 1.1990361213684082, + "learning_rate": 2.0000870691781473e-08, + "loss": 0.2473, + "step": 48573 + }, + { + "epoch": 0.9723794509921677, + "grad_norm": 1.1897039413452148, + "learning_rate": 1.9971913730937874e-08, + "loss": 0.316, + "step": 48574 + }, + { + "epoch": 0.9723994695092961, + "grad_norm": 1.8096481561660767, + "learning_rate": 1.9942977705060817e-08, + "loss": 0.6948, + "step": 48575 + }, + { + "epoch": 0.9724194880264244, + "grad_norm": 1.0047630071640015, + "learning_rate": 1.991406261427242e-08, + "loss": 0.2679, + "step": 48576 + }, + { + "epoch": 0.9724395065435528, + "grad_norm": 1.8694229125976562, + "learning_rate": 1.988516845869315e-08, + "loss": 0.7194, + "step": 48577 + }, + { + "epoch": 0.9724595250606811, + "grad_norm": 1.106974482536316, + "learning_rate": 1.9856295238444566e-08, + "loss": 0.2892, + "step": 48578 + }, + { + "epoch": 0.9724795435778095, + "grad_norm": 0.984417736530304, + "learning_rate": 1.98274429536488e-08, + "loss": 0.2574, + "step": 48579 + }, + { + "epoch": 0.9724995620949378, + "grad_norm": 1.0715513229370117, + "learning_rate": 1.979861160442631e-08, + "loss": 0.2625, + "step": 48580 + }, + { + "epoch": 0.9725195806120661, + "grad_norm": 1.168666124343872, + "learning_rate": 1.9769801190899217e-08, + "loss": 0.333, + "step": 48581 + }, + { + "epoch": 0.9725395991291945, + "grad_norm": 1.2141741514205933, + "learning_rate": 1.9741011713187985e-08, + "loss": 0.2759, + "step": 48582 + }, + { + "epoch": 0.9725596176463228, + "grad_norm": 1.135390281677246, + "learning_rate": 1.9712243171413627e-08, + "loss": 0.2569, + "step": 48583 + }, + { + "epoch": 0.9725796361634512, + "grad_norm": 1.8983542919158936, + "learning_rate": 1.968349556569715e-08, + "loss": 0.751, + "step": 48584 + }, + { + "epoch": 0.9725996546805795, + "grad_norm": 1.7988988161087036, + "learning_rate": 1.9654768896159583e-08, + "loss": 0.738, + "step": 48585 + }, + { + "epoch": 0.9726196731977079, + "grad_norm": 1.009543538093567, + "learning_rate": 1.9626063162921927e-08, + "loss": 0.2554, + "step": 48586 + }, + { + "epoch": 0.9726396917148362, + "grad_norm": 1.182830572128296, + "learning_rate": 1.9597378366104092e-08, + "loss": 0.3243, + "step": 48587 + }, + { + "epoch": 0.9726597102319645, + "grad_norm": 1.1367217302322388, + "learning_rate": 1.9568714505827092e-08, + "loss": 0.3065, + "step": 48588 + }, + { + "epoch": 0.9726797287490929, + "grad_norm": 1.331910490989685, + "learning_rate": 1.9540071582211383e-08, + "loss": 0.2903, + "step": 48589 + }, + { + "epoch": 0.9726997472662212, + "grad_norm": 1.1575076580047607, + "learning_rate": 1.951144959537743e-08, + "loss": 0.2919, + "step": 48590 + }, + { + "epoch": 0.9727197657833496, + "grad_norm": 1.0522551536560059, + "learning_rate": 1.9482848545445686e-08, + "loss": 0.2901, + "step": 48591 + }, + { + "epoch": 0.9727397843004779, + "grad_norm": 1.1634622812271118, + "learning_rate": 1.9454268432536616e-08, + "loss": 0.2974, + "step": 48592 + }, + { + "epoch": 0.9727598028176063, + "grad_norm": 1.1119234561920166, + "learning_rate": 1.9425709256769566e-08, + "loss": 0.325, + "step": 48593 + }, + { + "epoch": 0.9727798213347346, + "grad_norm": 1.906032919883728, + "learning_rate": 1.939717101826499e-08, + "loss": 0.6768, + "step": 48594 + }, + { + "epoch": 0.972799839851863, + "grad_norm": 1.2535505294799805, + "learning_rate": 1.9368653717142805e-08, + "loss": 0.3006, + "step": 48595 + }, + { + "epoch": 0.9728198583689913, + "grad_norm": 1.0041130781173706, + "learning_rate": 1.9340157353522902e-08, + "loss": 0.2677, + "step": 48596 + }, + { + "epoch": 0.9728398768861196, + "grad_norm": 1.121770977973938, + "learning_rate": 1.9311681927525193e-08, + "loss": 0.297, + "step": 48597 + }, + { + "epoch": 0.972859895403248, + "grad_norm": 1.1141753196716309, + "learning_rate": 1.928322743926958e-08, + "loss": 0.2654, + "step": 48598 + }, + { + "epoch": 0.9728799139203763, + "grad_norm": 1.0469664335250854, + "learning_rate": 1.9254793888874856e-08, + "loss": 0.2884, + "step": 48599 + }, + { + "epoch": 0.9728999324375047, + "grad_norm": 1.1235761642456055, + "learning_rate": 1.922638127646148e-08, + "loss": 0.2977, + "step": 48600 + }, + { + "epoch": 0.972919950954633, + "grad_norm": 1.056192398071289, + "learning_rate": 1.9197989602148804e-08, + "loss": 0.2966, + "step": 48601 + }, + { + "epoch": 0.9729399694717614, + "grad_norm": 1.9196393489837646, + "learning_rate": 1.9169618866055064e-08, + "loss": 0.7368, + "step": 48602 + }, + { + "epoch": 0.9729599879888897, + "grad_norm": 1.3981513977050781, + "learning_rate": 1.914126906830127e-08, + "loss": 0.2433, + "step": 48603 + }, + { + "epoch": 0.972980006506018, + "grad_norm": 1.132285475730896, + "learning_rate": 1.9112940209005116e-08, + "loss": 0.2839, + "step": 48604 + }, + { + "epoch": 0.9730000250231464, + "grad_norm": 1.1736361980438232, + "learning_rate": 1.90846322882865e-08, + "loss": 0.3018, + "step": 48605 + }, + { + "epoch": 0.9730200435402747, + "grad_norm": 1.03656804561615, + "learning_rate": 1.9056345306264214e-08, + "loss": 0.2843, + "step": 48606 + }, + { + "epoch": 0.9730400620574031, + "grad_norm": 1.0706499814987183, + "learning_rate": 1.9028079263057052e-08, + "loss": 0.3115, + "step": 48607 + }, + { + "epoch": 0.9730600805745314, + "grad_norm": 1.1348178386688232, + "learning_rate": 1.8999834158783815e-08, + "loss": 0.2933, + "step": 48608 + }, + { + "epoch": 0.9730800990916598, + "grad_norm": 1.1620105504989624, + "learning_rate": 1.8971609993563844e-08, + "loss": 0.3157, + "step": 48609 + }, + { + "epoch": 0.9731001176087881, + "grad_norm": 1.1349689960479736, + "learning_rate": 1.8943406767514828e-08, + "loss": 0.2671, + "step": 48610 + }, + { + "epoch": 0.9731201361259165, + "grad_norm": 1.184888482093811, + "learning_rate": 1.8915224480755555e-08, + "loss": 0.3253, + "step": 48611 + }, + { + "epoch": 0.9731401546430448, + "grad_norm": 1.887453556060791, + "learning_rate": 1.8887063133405382e-08, + "loss": 0.7502, + "step": 48612 + }, + { + "epoch": 0.9731601731601731, + "grad_norm": 1.0982285737991333, + "learning_rate": 1.885892272558143e-08, + "loss": 0.2884, + "step": 48613 + }, + { + "epoch": 0.9731801916773015, + "grad_norm": 1.1118271350860596, + "learning_rate": 1.883080325740305e-08, + "loss": 0.2616, + "step": 48614 + }, + { + "epoch": 0.9732002101944298, + "grad_norm": 1.1641180515289307, + "learning_rate": 1.880270472898793e-08, + "loss": 0.3132, + "step": 48615 + }, + { + "epoch": 0.9732202287115582, + "grad_norm": 1.2553097009658813, + "learning_rate": 1.8774627140454303e-08, + "loss": 0.2876, + "step": 48616 + }, + { + "epoch": 0.9732402472286865, + "grad_norm": 1.1358094215393066, + "learning_rate": 1.8746570491919858e-08, + "loss": 0.3688, + "step": 48617 + }, + { + "epoch": 0.9732602657458149, + "grad_norm": 1.0930116176605225, + "learning_rate": 1.8718534783503383e-08, + "loss": 0.2527, + "step": 48618 + }, + { + "epoch": 0.9732802842629432, + "grad_norm": 1.8493934869766235, + "learning_rate": 1.869052001532201e-08, + "loss": 0.6741, + "step": 48619 + }, + { + "epoch": 0.9733003027800715, + "grad_norm": 1.1193894147872925, + "learning_rate": 1.8662526187493424e-08, + "loss": 0.3104, + "step": 48620 + }, + { + "epoch": 0.9733203212971999, + "grad_norm": 1.2078286409378052, + "learning_rate": 1.863455330013586e-08, + "loss": 0.3117, + "step": 48621 + }, + { + "epoch": 0.9733403398143282, + "grad_norm": 1.1484452486038208, + "learning_rate": 1.860660135336645e-08, + "loss": 0.3004, + "step": 48622 + }, + { + "epoch": 0.9733603583314566, + "grad_norm": 1.1691149473190308, + "learning_rate": 1.8578670347302875e-08, + "loss": 0.3036, + "step": 48623 + }, + { + "epoch": 0.9733803768485849, + "grad_norm": 1.0965874195098877, + "learning_rate": 1.855076028206282e-08, + "loss": 0.2406, + "step": 48624 + }, + { + "epoch": 0.9734003953657133, + "grad_norm": 1.147460699081421, + "learning_rate": 1.8522871157762856e-08, + "loss": 0.2553, + "step": 48625 + }, + { + "epoch": 0.9734204138828416, + "grad_norm": 0.997397243976593, + "learning_rate": 1.8495002974521225e-08, + "loss": 0.2574, + "step": 48626 + }, + { + "epoch": 0.97344043239997, + "grad_norm": 1.1398504972457886, + "learning_rate": 1.8467155732454502e-08, + "loss": 0.2902, + "step": 48627 + }, + { + "epoch": 0.9734604509170983, + "grad_norm": 1.1270664930343628, + "learning_rate": 1.8439329431679254e-08, + "loss": 0.2688, + "step": 48628 + }, + { + "epoch": 0.9734804694342266, + "grad_norm": 1.1283223628997803, + "learning_rate": 1.8411524072313724e-08, + "loss": 0.2926, + "step": 48629 + }, + { + "epoch": 0.973500487951355, + "grad_norm": 1.105615258216858, + "learning_rate": 1.838373965447393e-08, + "loss": 0.2979, + "step": 48630 + }, + { + "epoch": 0.9735205064684833, + "grad_norm": 1.0286835432052612, + "learning_rate": 1.8355976178276445e-08, + "loss": 0.3545, + "step": 48631 + }, + { + "epoch": 0.9735405249856117, + "grad_norm": 1.0727862119674683, + "learning_rate": 1.8328233643838957e-08, + "loss": 0.2782, + "step": 48632 + }, + { + "epoch": 0.97356054350274, + "grad_norm": 1.1296992301940918, + "learning_rate": 1.830051205127692e-08, + "loss": 0.3044, + "step": 48633 + }, + { + "epoch": 0.9735805620198684, + "grad_norm": 1.1159849166870117, + "learning_rate": 1.8272811400708024e-08, + "loss": 0.298, + "step": 48634 + }, + { + "epoch": 0.9736005805369967, + "grad_norm": 1.0456234216690063, + "learning_rate": 1.824513169224773e-08, + "loss": 0.3218, + "step": 48635 + }, + { + "epoch": 0.973620599054125, + "grad_norm": 1.1329320669174194, + "learning_rate": 1.821747292601317e-08, + "loss": 0.3064, + "step": 48636 + }, + { + "epoch": 0.9736406175712534, + "grad_norm": 1.8821648359298706, + "learning_rate": 1.81898351021198e-08, + "loss": 0.691, + "step": 48637 + }, + { + "epoch": 0.9736606360883817, + "grad_norm": 1.2042617797851562, + "learning_rate": 1.8162218220684757e-08, + "loss": 0.3181, + "step": 48638 + }, + { + "epoch": 0.9736806546055101, + "grad_norm": 1.2461916208267212, + "learning_rate": 1.81346222818235e-08, + "loss": 0.2842, + "step": 48639 + }, + { + "epoch": 0.9737006731226384, + "grad_norm": 1.2939329147338867, + "learning_rate": 1.8107047285652047e-08, + "loss": 0.2612, + "step": 48640 + }, + { + "epoch": 0.9737206916397668, + "grad_norm": 1.0915213823318481, + "learning_rate": 1.8079493232286415e-08, + "loss": 0.2979, + "step": 48641 + }, + { + "epoch": 0.9737407101568951, + "grad_norm": 1.170615553855896, + "learning_rate": 1.8051960121842628e-08, + "loss": 0.2659, + "step": 48642 + }, + { + "epoch": 0.9737607286740235, + "grad_norm": 1.1800369024276733, + "learning_rate": 1.8024447954436143e-08, + "loss": 0.3341, + "step": 48643 + }, + { + "epoch": 0.9737807471911518, + "grad_norm": 1.1639145612716675, + "learning_rate": 1.799695673018298e-08, + "loss": 0.3093, + "step": 48644 + }, + { + "epoch": 0.9738007657082801, + "grad_norm": 1.3546086549758911, + "learning_rate": 1.796948644919805e-08, + "loss": 0.3371, + "step": 48645 + }, + { + "epoch": 0.9738207842254085, + "grad_norm": 1.1364833116531372, + "learning_rate": 1.7942037111597366e-08, + "loss": 0.2607, + "step": 48646 + }, + { + "epoch": 0.9738408027425368, + "grad_norm": 1.524544596672058, + "learning_rate": 1.7914608717496397e-08, + "loss": 0.2803, + "step": 48647 + }, + { + "epoch": 0.9738608212596652, + "grad_norm": 1.180748462677002, + "learning_rate": 1.78872012670106e-08, + "loss": 0.2899, + "step": 48648 + }, + { + "epoch": 0.9738808397767935, + "grad_norm": 1.1555578708648682, + "learning_rate": 1.7859814760254335e-08, + "loss": 0.3031, + "step": 48649 + }, + { + "epoch": 0.9739008582939219, + "grad_norm": 1.0143400430679321, + "learning_rate": 1.783244919734306e-08, + "loss": 0.249, + "step": 48650 + }, + { + "epoch": 0.9739208768110502, + "grad_norm": 2.0357282161712646, + "learning_rate": 1.7805104578392795e-08, + "loss": 0.8401, + "step": 48651 + }, + { + "epoch": 0.9739408953281785, + "grad_norm": 2.1094236373901367, + "learning_rate": 1.7777780903516785e-08, + "loss": 0.7346, + "step": 48652 + }, + { + "epoch": 0.9739609138453069, + "grad_norm": 1.3552391529083252, + "learning_rate": 1.7750478172831597e-08, + "loss": 0.2623, + "step": 48653 + }, + { + "epoch": 0.9739809323624352, + "grad_norm": 1.0880690813064575, + "learning_rate": 1.772319638645048e-08, + "loss": 0.323, + "step": 48654 + }, + { + "epoch": 0.9740009508795636, + "grad_norm": 1.1451656818389893, + "learning_rate": 1.7695935544489452e-08, + "loss": 0.2806, + "step": 48655 + }, + { + "epoch": 0.9740209693966919, + "grad_norm": 1.237801194190979, + "learning_rate": 1.7668695647061753e-08, + "loss": 0.2955, + "step": 48656 + }, + { + "epoch": 0.9740409879138203, + "grad_norm": 0.987919807434082, + "learning_rate": 1.7641476694283398e-08, + "loss": 0.2396, + "step": 48657 + }, + { + "epoch": 0.9740610064309486, + "grad_norm": 1.0671066045761108, + "learning_rate": 1.761427868626764e-08, + "loss": 0.312, + "step": 48658 + }, + { + "epoch": 0.974081024948077, + "grad_norm": 1.0632613897323608, + "learning_rate": 1.7587101623129376e-08, + "loss": 0.3108, + "step": 48659 + }, + { + "epoch": 0.9741010434652053, + "grad_norm": 1.1981362104415894, + "learning_rate": 1.7559945504982966e-08, + "loss": 0.304, + "step": 48660 + }, + { + "epoch": 0.9741210619823336, + "grad_norm": 1.9981526136398315, + "learning_rate": 1.7532810331942208e-08, + "loss": 0.7825, + "step": 48661 + }, + { + "epoch": 0.974141080499462, + "grad_norm": 1.1589614152908325, + "learning_rate": 1.7505696104120894e-08, + "loss": 0.3503, + "step": 48662 + }, + { + "epoch": 0.9741610990165903, + "grad_norm": 1.195522665977478, + "learning_rate": 1.747860282163394e-08, + "loss": 0.2926, + "step": 48663 + }, + { + "epoch": 0.9741811175337187, + "grad_norm": 1.1613483428955078, + "learning_rate": 1.7451530484594026e-08, + "loss": 0.2535, + "step": 48664 + }, + { + "epoch": 0.974201136050847, + "grad_norm": 1.746883511543274, + "learning_rate": 1.7424479093116064e-08, + "loss": 0.7734, + "step": 48665 + }, + { + "epoch": 0.9742211545679754, + "grad_norm": 1.709048867225647, + "learning_rate": 1.7397448647313296e-08, + "loss": 0.7257, + "step": 48666 + }, + { + "epoch": 0.9742411730851037, + "grad_norm": 1.1067510843276978, + "learning_rate": 1.7370439147299523e-08, + "loss": 0.3061, + "step": 48667 + }, + { + "epoch": 0.974261191602232, + "grad_norm": 1.0205180644989014, + "learning_rate": 1.7343450593187426e-08, + "loss": 0.2808, + "step": 48668 + }, + { + "epoch": 0.9742812101193604, + "grad_norm": 1.7104002237319946, + "learning_rate": 1.7316482985091922e-08, + "loss": 0.7501, + "step": 48669 + }, + { + "epoch": 0.9743012286364887, + "grad_norm": 1.0670361518859863, + "learning_rate": 1.7289536323125688e-08, + "loss": 0.3136, + "step": 48670 + }, + { + "epoch": 0.9743212471536171, + "grad_norm": 1.1596921682357788, + "learning_rate": 1.726261060740142e-08, + "loss": 0.3126, + "step": 48671 + }, + { + "epoch": 0.9743412656707454, + "grad_norm": 1.1055834293365479, + "learning_rate": 1.723570583803347e-08, + "loss": 0.297, + "step": 48672 + }, + { + "epoch": 0.9743612841878738, + "grad_norm": 1.1194965839385986, + "learning_rate": 1.720882201513341e-08, + "loss": 0.3551, + "step": 48673 + }, + { + "epoch": 0.9743813027050021, + "grad_norm": 1.213504672050476, + "learning_rate": 1.718195913881615e-08, + "loss": 0.3346, + "step": 48674 + }, + { + "epoch": 0.9744013212221305, + "grad_norm": 1.0628273487091064, + "learning_rate": 1.7155117209193273e-08, + "loss": 0.3147, + "step": 48675 + }, + { + "epoch": 0.9744213397392588, + "grad_norm": 1.1085542440414429, + "learning_rate": 1.712829622637746e-08, + "loss": 0.3092, + "step": 48676 + }, + { + "epoch": 0.9744413582563871, + "grad_norm": 1.2032997608184814, + "learning_rate": 1.7101496190482514e-08, + "loss": 0.2633, + "step": 48677 + }, + { + "epoch": 0.9744613767735155, + "grad_norm": 1.824163794517517, + "learning_rate": 1.707471710162001e-08, + "loss": 0.6807, + "step": 48678 + }, + { + "epoch": 0.9744813952906438, + "grad_norm": 1.238358497619629, + "learning_rate": 1.7047958959903742e-08, + "loss": 0.3336, + "step": 48679 + }, + { + "epoch": 0.9745014138077722, + "grad_norm": 1.868330717086792, + "learning_rate": 1.7021221765445296e-08, + "loss": 0.7978, + "step": 48680 + }, + { + "epoch": 0.9745214323249005, + "grad_norm": 1.1193925142288208, + "learning_rate": 1.6994505518357352e-08, + "loss": 0.3008, + "step": 48681 + }, + { + "epoch": 0.9745414508420289, + "grad_norm": 0.9991190433502197, + "learning_rate": 1.696781021875149e-08, + "loss": 0.2551, + "step": 48682 + }, + { + "epoch": 0.9745614693591572, + "grad_norm": 1.207215666770935, + "learning_rate": 1.694113586674151e-08, + "loss": 0.3316, + "step": 48683 + }, + { + "epoch": 0.9745814878762855, + "grad_norm": 1.1990690231323242, + "learning_rate": 1.691448246243843e-08, + "loss": 0.3051, + "step": 48684 + }, + { + "epoch": 0.9746015063934139, + "grad_norm": 1.1651803255081177, + "learning_rate": 1.688785000595383e-08, + "loss": 0.3151, + "step": 48685 + }, + { + "epoch": 0.9746215249105422, + "grad_norm": 1.139172911643982, + "learning_rate": 1.6861238497400955e-08, + "loss": 0.3191, + "step": 48686 + }, + { + "epoch": 0.9746415434276706, + "grad_norm": 1.2830007076263428, + "learning_rate": 1.6834647936890826e-08, + "loss": 0.2905, + "step": 48687 + }, + { + "epoch": 0.9746615619447989, + "grad_norm": 1.1029633283615112, + "learning_rate": 1.680807832453557e-08, + "loss": 0.3085, + "step": 48688 + }, + { + "epoch": 0.9746815804619273, + "grad_norm": 1.3464537858963013, + "learning_rate": 1.6781529660446772e-08, + "loss": 0.2907, + "step": 48689 + }, + { + "epoch": 0.9747015989790556, + "grad_norm": 1.8482882976531982, + "learning_rate": 1.6755001944736005e-08, + "loss": 0.7384, + "step": 48690 + }, + { + "epoch": 0.974721617496184, + "grad_norm": 1.1569641828536987, + "learning_rate": 1.672849517751429e-08, + "loss": 0.2897, + "step": 48691 + }, + { + "epoch": 0.9747416360133123, + "grad_norm": 1.2310916185379028, + "learning_rate": 1.670200935889432e-08, + "loss": 0.3243, + "step": 48692 + }, + { + "epoch": 0.9747616545304406, + "grad_norm": 1.2704182863235474, + "learning_rate": 1.6675544488986562e-08, + "loss": 0.3382, + "step": 48693 + }, + { + "epoch": 0.974781673047569, + "grad_norm": 1.9406366348266602, + "learning_rate": 1.6649100567902032e-08, + "loss": 0.6919, + "step": 48694 + }, + { + "epoch": 0.9748016915646973, + "grad_norm": 1.2215279340744019, + "learning_rate": 1.662267759575231e-08, + "loss": 0.2977, + "step": 48695 + }, + { + "epoch": 0.9748217100818257, + "grad_norm": 1.1827250719070435, + "learning_rate": 1.6596275572648423e-08, + "loss": 0.2865, + "step": 48696 + }, + { + "epoch": 0.974841728598954, + "grad_norm": 1.3837862014770508, + "learning_rate": 1.6569894498701945e-08, + "loss": 0.261, + "step": 48697 + }, + { + "epoch": 0.9748617471160824, + "grad_norm": 1.1338763236999512, + "learning_rate": 1.6543534374022786e-08, + "loss": 0.279, + "step": 48698 + }, + { + "epoch": 0.9748817656332107, + "grad_norm": 1.822783350944519, + "learning_rate": 1.651719519872197e-08, + "loss": 0.8175, + "step": 48699 + }, + { + "epoch": 0.974901784150339, + "grad_norm": 1.9495718479156494, + "learning_rate": 1.649087697291052e-08, + "loss": 0.7308, + "step": 48700 + }, + { + "epoch": 0.9749218026674674, + "grad_norm": 1.0690958499908447, + "learning_rate": 1.64645796966989e-08, + "loss": 0.2918, + "step": 48701 + }, + { + "epoch": 0.9749418211845957, + "grad_norm": 1.2142853736877441, + "learning_rate": 1.6438303370197585e-08, + "loss": 0.2611, + "step": 48702 + }, + { + "epoch": 0.9749618397017241, + "grad_norm": 1.0279532670974731, + "learning_rate": 1.6412047993517588e-08, + "loss": 0.2705, + "step": 48703 + }, + { + "epoch": 0.9749818582188524, + "grad_norm": 1.0793180465698242, + "learning_rate": 1.6385813566768272e-08, + "loss": 0.2654, + "step": 48704 + }, + { + "epoch": 0.9750018767359808, + "grad_norm": 1.8219144344329834, + "learning_rate": 1.6359600090061212e-08, + "loss": 0.7623, + "step": 48705 + }, + { + "epoch": 0.9750218952531091, + "grad_norm": 1.0937986373901367, + "learning_rate": 1.633340756350521e-08, + "loss": 0.2918, + "step": 48706 + }, + { + "epoch": 0.9750419137702375, + "grad_norm": 1.8929048776626587, + "learning_rate": 1.6307235987211845e-08, + "loss": 0.8026, + "step": 48707 + }, + { + "epoch": 0.9750619322873658, + "grad_norm": 1.1030094623565674, + "learning_rate": 1.6281085361289915e-08, + "loss": 0.3267, + "step": 48708 + }, + { + "epoch": 0.9750819508044941, + "grad_norm": 0.9844688177108765, + "learning_rate": 1.6254955685849338e-08, + "loss": 0.2878, + "step": 48709 + }, + { + "epoch": 0.9751019693216225, + "grad_norm": 1.208163857460022, + "learning_rate": 1.622884696100113e-08, + "loss": 0.2765, + "step": 48710 + }, + { + "epoch": 0.9751219878387508, + "grad_norm": 1.4030848741531372, + "learning_rate": 1.62027591868541e-08, + "loss": 0.3396, + "step": 48711 + }, + { + "epoch": 0.9751420063558792, + "grad_norm": 0.9597117900848389, + "learning_rate": 1.6176692363518153e-08, + "loss": 0.2297, + "step": 48712 + }, + { + "epoch": 0.9751620248730075, + "grad_norm": 1.1743625402450562, + "learning_rate": 1.6150646491102652e-08, + "loss": 0.2951, + "step": 48713 + }, + { + "epoch": 0.9751820433901359, + "grad_norm": 1.1532799005508423, + "learning_rate": 1.6124621569717502e-08, + "loss": 0.2596, + "step": 48714 + }, + { + "epoch": 0.9752020619072642, + "grad_norm": 1.1605781316757202, + "learning_rate": 1.6098617599472065e-08, + "loss": 0.2929, + "step": 48715 + }, + { + "epoch": 0.9752220804243925, + "grad_norm": 1.2166039943695068, + "learning_rate": 1.6072634580475143e-08, + "loss": 0.2742, + "step": 48716 + }, + { + "epoch": 0.9752420989415209, + "grad_norm": 1.1005733013153076, + "learning_rate": 1.6046672512836647e-08, + "loss": 0.2885, + "step": 48717 + }, + { + "epoch": 0.9752621174586492, + "grad_norm": 1.055711269378662, + "learning_rate": 1.602073139666538e-08, + "loss": 0.2688, + "step": 48718 + }, + { + "epoch": 0.9752821359757776, + "grad_norm": 1.2800562381744385, + "learning_rate": 1.599481123207014e-08, + "loss": 0.2756, + "step": 48719 + }, + { + "epoch": 0.9753021544929059, + "grad_norm": 1.0516372919082642, + "learning_rate": 1.5968912019160286e-08, + "loss": 0.3016, + "step": 48720 + }, + { + "epoch": 0.9753221730100343, + "grad_norm": 1.041600227355957, + "learning_rate": 1.594303375804407e-08, + "loss": 0.31, + "step": 48721 + }, + { + "epoch": 0.9753421915271626, + "grad_norm": 1.8925327062606812, + "learning_rate": 1.5917176448831396e-08, + "loss": 0.803, + "step": 48722 + }, + { + "epoch": 0.975362210044291, + "grad_norm": 1.1298617124557495, + "learning_rate": 1.589134009162996e-08, + "loss": 0.2859, + "step": 48723 + }, + { + "epoch": 0.9753822285614193, + "grad_norm": 1.228020429611206, + "learning_rate": 1.5865524686549117e-08, + "loss": 0.3023, + "step": 48724 + }, + { + "epoch": 0.9754022470785476, + "grad_norm": 1.2017008066177368, + "learning_rate": 1.5839730233697116e-08, + "loss": 0.3414, + "step": 48725 + }, + { + "epoch": 0.975422265595676, + "grad_norm": 1.1249796152114868, + "learning_rate": 1.5813956733181645e-08, + "loss": 0.287, + "step": 48726 + }, + { + "epoch": 0.9754422841128043, + "grad_norm": 1.064598798751831, + "learning_rate": 1.5788204185112067e-08, + "loss": 0.259, + "step": 48727 + }, + { + "epoch": 0.9754623026299327, + "grad_norm": 1.099987506866455, + "learning_rate": 1.5762472589596067e-08, + "loss": 0.2736, + "step": 48728 + }, + { + "epoch": 0.975482321147061, + "grad_norm": 1.1461621522903442, + "learning_rate": 1.573676194674245e-08, + "loss": 0.2613, + "step": 48729 + }, + { + "epoch": 0.9755023396641894, + "grad_norm": 1.794359803199768, + "learning_rate": 1.5711072256658355e-08, + "loss": 0.7383, + "step": 48730 + }, + { + "epoch": 0.9755223581813177, + "grad_norm": 1.0512394905090332, + "learning_rate": 1.568540351945258e-08, + "loss": 0.2662, + "step": 48731 + }, + { + "epoch": 0.975542376698446, + "grad_norm": 1.186198115348816, + "learning_rate": 1.5659755735232262e-08, + "loss": 0.3261, + "step": 48732 + }, + { + "epoch": 0.9755623952155744, + "grad_norm": 1.059051752090454, + "learning_rate": 1.5634128904106207e-08, + "loss": 0.2656, + "step": 48733 + }, + { + "epoch": 0.9755824137327027, + "grad_norm": 1.0910311937332153, + "learning_rate": 1.5608523026181545e-08, + "loss": 0.2758, + "step": 48734 + }, + { + "epoch": 0.9756024322498311, + "grad_norm": 0.9744884371757507, + "learning_rate": 1.558293810156597e-08, + "loss": 0.2557, + "step": 48735 + }, + { + "epoch": 0.9756224507669594, + "grad_norm": 1.2559922933578491, + "learning_rate": 1.5557374130367177e-08, + "loss": 0.2954, + "step": 48736 + }, + { + "epoch": 0.9756424692840878, + "grad_norm": 1.0179609060287476, + "learning_rate": 1.5531831112692296e-08, + "loss": 0.2863, + "step": 48737 + }, + { + "epoch": 0.9756624878012161, + "grad_norm": 1.9322367906570435, + "learning_rate": 1.5506309048649026e-08, + "loss": 0.7484, + "step": 48738 + }, + { + "epoch": 0.9756825063183445, + "grad_norm": 1.1412932872772217, + "learning_rate": 1.54808079383445e-08, + "loss": 0.2786, + "step": 48739 + }, + { + "epoch": 0.9757025248354728, + "grad_norm": 1.3237855434417725, + "learning_rate": 1.5455327781885853e-08, + "loss": 0.2722, + "step": 48740 + }, + { + "epoch": 0.9757225433526011, + "grad_norm": 1.8138442039489746, + "learning_rate": 1.542986857938078e-08, + "loss": 0.7908, + "step": 48741 + }, + { + "epoch": 0.9757425618697295, + "grad_norm": 1.1359586715698242, + "learning_rate": 1.5404430330935304e-08, + "loss": 0.2592, + "step": 48742 + }, + { + "epoch": 0.9757625803868578, + "grad_norm": 1.1435346603393555, + "learning_rate": 1.537901303665712e-08, + "loss": 0.2862, + "step": 48743 + }, + { + "epoch": 0.9757825989039862, + "grad_norm": 1.0093940496444702, + "learning_rate": 1.5353616696652805e-08, + "loss": 0.3248, + "step": 48744 + }, + { + "epoch": 0.9758026174211145, + "grad_norm": 1.9429887533187866, + "learning_rate": 1.5328241311029503e-08, + "loss": 0.7571, + "step": 48745 + }, + { + "epoch": 0.9758226359382429, + "grad_norm": 1.0791984796524048, + "learning_rate": 1.530288687989323e-08, + "loss": 0.3018, + "step": 48746 + }, + { + "epoch": 0.9758426544553712, + "grad_norm": 1.3517709970474243, + "learning_rate": 1.527755340335113e-08, + "loss": 0.2956, + "step": 48747 + }, + { + "epoch": 0.9758626729724995, + "grad_norm": 1.1449310779571533, + "learning_rate": 1.5252240881509227e-08, + "loss": 0.3202, + "step": 48748 + }, + { + "epoch": 0.9758826914896279, + "grad_norm": 1.0798897743225098, + "learning_rate": 1.5226949314474106e-08, + "loss": 0.2709, + "step": 48749 + }, + { + "epoch": 0.9759027100067562, + "grad_norm": 1.1646983623504639, + "learning_rate": 1.520167870235234e-08, + "loss": 0.2981, + "step": 48750 + }, + { + "epoch": 0.9759227285238846, + "grad_norm": 1.1532468795776367, + "learning_rate": 1.517642904525052e-08, + "loss": 0.3639, + "step": 48751 + }, + { + "epoch": 0.9759427470410129, + "grad_norm": 1.066763997077942, + "learning_rate": 1.5151200343273554e-08, + "loss": 0.3145, + "step": 48752 + }, + { + "epoch": 0.9759627655581413, + "grad_norm": 1.1097604036331177, + "learning_rate": 1.512599259652803e-08, + "loss": 0.2857, + "step": 48753 + }, + { + "epoch": 0.9759827840752696, + "grad_norm": 1.1422169208526611, + "learning_rate": 1.510080580512052e-08, + "loss": 0.2564, + "step": 48754 + }, + { + "epoch": 0.976002802592398, + "grad_norm": 1.258430004119873, + "learning_rate": 1.507563996915651e-08, + "loss": 0.2977, + "step": 48755 + }, + { + "epoch": 0.9760228211095263, + "grad_norm": 1.1118277311325073, + "learning_rate": 1.5050495088741456e-08, + "loss": 0.3128, + "step": 48756 + }, + { + "epoch": 0.9760428396266546, + "grad_norm": 1.1767367124557495, + "learning_rate": 1.5025371163981396e-08, + "loss": 0.2811, + "step": 48757 + }, + { + "epoch": 0.976062858143783, + "grad_norm": 1.0525977611541748, + "learning_rate": 1.5000268194981794e-08, + "loss": 0.2785, + "step": 48758 + }, + { + "epoch": 0.9760828766609113, + "grad_norm": 2.0333967208862305, + "learning_rate": 1.4975186181848676e-08, + "loss": 0.7537, + "step": 48759 + }, + { + "epoch": 0.9761028951780397, + "grad_norm": 1.2088898420333862, + "learning_rate": 1.4950125124686964e-08, + "loss": 0.3127, + "step": 48760 + }, + { + "epoch": 0.976122913695168, + "grad_norm": 1.1236379146575928, + "learning_rate": 1.492508502360157e-08, + "loss": 0.3098, + "step": 48761 + }, + { + "epoch": 0.9761429322122964, + "grad_norm": 1.0006505250930786, + "learning_rate": 1.4900065878699077e-08, + "loss": 0.2587, + "step": 48762 + }, + { + "epoch": 0.9761629507294247, + "grad_norm": 1.1474076509475708, + "learning_rate": 1.487506769008329e-08, + "loss": 0.2893, + "step": 48763 + }, + { + "epoch": 0.976182969246553, + "grad_norm": 1.177288293838501, + "learning_rate": 1.4850090457860233e-08, + "loss": 0.2669, + "step": 48764 + }, + { + "epoch": 0.9762029877636814, + "grad_norm": 1.8862059116363525, + "learning_rate": 1.4825134182134826e-08, + "loss": 0.7445, + "step": 48765 + }, + { + "epoch": 0.9762230062808097, + "grad_norm": 1.1789467334747314, + "learning_rate": 1.4800198863011428e-08, + "loss": 0.2896, + "step": 48766 + }, + { + "epoch": 0.9762430247979381, + "grad_norm": 1.1548097133636475, + "learning_rate": 1.4775284500594956e-08, + "loss": 0.3253, + "step": 48767 + }, + { + "epoch": 0.9762630433150664, + "grad_norm": 1.1147561073303223, + "learning_rate": 1.4750391094990879e-08, + "loss": 0.3304, + "step": 48768 + }, + { + "epoch": 0.9762830618321948, + "grad_norm": 1.1336164474487305, + "learning_rate": 1.4725518646303006e-08, + "loss": 0.3116, + "step": 48769 + }, + { + "epoch": 0.9763030803493231, + "grad_norm": 1.1862220764160156, + "learning_rate": 1.4700667154635695e-08, + "loss": 0.3134, + "step": 48770 + }, + { + "epoch": 0.9763230988664515, + "grad_norm": 1.1709154844284058, + "learning_rate": 1.4675836620094974e-08, + "loss": 0.2999, + "step": 48771 + }, + { + "epoch": 0.9763431173835798, + "grad_norm": 1.2333403825759888, + "learning_rate": 1.4651027042783539e-08, + "loss": 0.3167, + "step": 48772 + }, + { + "epoch": 0.9763631359007081, + "grad_norm": 1.152936339378357, + "learning_rate": 1.4626238422806305e-08, + "loss": 0.2915, + "step": 48773 + }, + { + "epoch": 0.9763831544178365, + "grad_norm": 1.3370230197906494, + "learning_rate": 1.4601470760267632e-08, + "loss": 0.2964, + "step": 48774 + }, + { + "epoch": 0.9764031729349648, + "grad_norm": 1.1280051469802856, + "learning_rate": 1.4576724055271884e-08, + "loss": 0.2676, + "step": 48775 + }, + { + "epoch": 0.9764231914520932, + "grad_norm": 1.097994327545166, + "learning_rate": 1.455199830792231e-08, + "loss": 0.2978, + "step": 48776 + }, + { + "epoch": 0.9764432099692215, + "grad_norm": 1.0877548456192017, + "learning_rate": 1.452729351832327e-08, + "loss": 0.2764, + "step": 48777 + }, + { + "epoch": 0.9764632284863499, + "grad_norm": 1.2461605072021484, + "learning_rate": 1.4502609686578573e-08, + "loss": 0.2809, + "step": 48778 + }, + { + "epoch": 0.9764832470034782, + "grad_norm": 1.336328148841858, + "learning_rate": 1.4477946812792021e-08, + "loss": 0.3115, + "step": 48779 + }, + { + "epoch": 0.9765032655206065, + "grad_norm": 1.1445770263671875, + "learning_rate": 1.4453304897067421e-08, + "loss": 0.327, + "step": 48780 + }, + { + "epoch": 0.9765232840377349, + "grad_norm": 1.1300222873687744, + "learning_rate": 1.4428683939508025e-08, + "loss": 0.2927, + "step": 48781 + }, + { + "epoch": 0.9765433025548632, + "grad_norm": 1.2648829221725464, + "learning_rate": 1.4404083940217639e-08, + "loss": 0.3014, + "step": 48782 + }, + { + "epoch": 0.9765633210719916, + "grad_norm": 2.1917710304260254, + "learning_rate": 1.4379504899299512e-08, + "loss": 0.7343, + "step": 48783 + }, + { + "epoch": 0.9765833395891199, + "grad_norm": 1.1681220531463623, + "learning_rate": 1.4354946816857451e-08, + "loss": 0.2649, + "step": 48784 + }, + { + "epoch": 0.9766033581062483, + "grad_norm": 1.2685333490371704, + "learning_rate": 1.4330409692994152e-08, + "loss": 0.2656, + "step": 48785 + }, + { + "epoch": 0.9766233766233766, + "grad_norm": 1.0022542476654053, + "learning_rate": 1.4305893527812309e-08, + "loss": 0.2804, + "step": 48786 + }, + { + "epoch": 0.976643395140505, + "grad_norm": 1.146060824394226, + "learning_rate": 1.4281398321416284e-08, + "loss": 0.2642, + "step": 48787 + }, + { + "epoch": 0.9766634136576333, + "grad_norm": 1.05622398853302, + "learning_rate": 1.4256924073907663e-08, + "loss": 0.2652, + "step": 48788 + }, + { + "epoch": 0.9766834321747616, + "grad_norm": 2.0742838382720947, + "learning_rate": 1.4232470785390251e-08, + "loss": 0.7023, + "step": 48789 + }, + { + "epoch": 0.97670345069189, + "grad_norm": 1.1791132688522339, + "learning_rate": 1.4208038455966744e-08, + "loss": 0.2823, + "step": 48790 + }, + { + "epoch": 0.9767234692090183, + "grad_norm": 0.999646008014679, + "learning_rate": 1.4183627085739282e-08, + "loss": 0.2793, + "step": 48791 + }, + { + "epoch": 0.9767434877261467, + "grad_norm": 1.1806528568267822, + "learning_rate": 1.4159236674811117e-08, + "loss": 0.2936, + "step": 48792 + }, + { + "epoch": 0.976763506243275, + "grad_norm": 1.0645004510879517, + "learning_rate": 1.4134867223284942e-08, + "loss": 0.3161, + "step": 48793 + }, + { + "epoch": 0.9767835247604034, + "grad_norm": 1.0698527097702026, + "learning_rate": 1.4110518731262345e-08, + "loss": 0.2961, + "step": 48794 + }, + { + "epoch": 0.9768035432775317, + "grad_norm": 1.1204841136932373, + "learning_rate": 1.4086191198846577e-08, + "loss": 0.3283, + "step": 48795 + }, + { + "epoch": 0.97682356179466, + "grad_norm": 1.00403892993927, + "learning_rate": 1.4061884626139222e-08, + "loss": 0.2657, + "step": 48796 + }, + { + "epoch": 0.9768435803117884, + "grad_norm": 1.0107073783874512, + "learning_rate": 1.403759901324242e-08, + "loss": 0.25, + "step": 48797 + }, + { + "epoch": 0.9768635988289167, + "grad_norm": 1.1606192588806152, + "learning_rate": 1.4013334360258867e-08, + "loss": 0.2703, + "step": 48798 + }, + { + "epoch": 0.9768836173460451, + "grad_norm": 1.3021161556243896, + "learning_rate": 1.398909066729015e-08, + "loss": 0.3008, + "step": 48799 + }, + { + "epoch": 0.9769036358631734, + "grad_norm": 1.0891731977462769, + "learning_rate": 1.3964867934438408e-08, + "loss": 0.2966, + "step": 48800 + }, + { + "epoch": 0.9769236543803018, + "grad_norm": 1.145978331565857, + "learning_rate": 1.3940666161805228e-08, + "loss": 0.3067, + "step": 48801 + }, + { + "epoch": 0.9769436728974301, + "grad_norm": 2.008358955383301, + "learning_rate": 1.3916485349492748e-08, + "loss": 0.7315, + "step": 48802 + }, + { + "epoch": 0.9769636914145585, + "grad_norm": 1.043145775794983, + "learning_rate": 1.3892325497602e-08, + "loss": 0.3114, + "step": 48803 + }, + { + "epoch": 0.9769837099316868, + "grad_norm": 1.0593687295913696, + "learning_rate": 1.3868186606235124e-08, + "loss": 0.2777, + "step": 48804 + }, + { + "epoch": 0.9770037284488151, + "grad_norm": 1.0982880592346191, + "learning_rate": 1.384406867549315e-08, + "loss": 0.269, + "step": 48805 + }, + { + "epoch": 0.9770237469659435, + "grad_norm": 1.1627306938171387, + "learning_rate": 1.3819971705477664e-08, + "loss": 0.3138, + "step": 48806 + }, + { + "epoch": 0.9770437654830718, + "grad_norm": 1.1369431018829346, + "learning_rate": 1.3795895696290252e-08, + "loss": 0.2813, + "step": 48807 + }, + { + "epoch": 0.9770637840002002, + "grad_norm": 1.1176133155822754, + "learning_rate": 1.3771840648031943e-08, + "loss": 0.2758, + "step": 48808 + }, + { + "epoch": 0.9770838025173285, + "grad_norm": 1.2082443237304688, + "learning_rate": 1.3747806560803213e-08, + "loss": 0.3142, + "step": 48809 + }, + { + "epoch": 0.9771038210344569, + "grad_norm": 1.1109131574630737, + "learning_rate": 1.37237934347062e-08, + "loss": 0.3239, + "step": 48810 + }, + { + "epoch": 0.9771238395515852, + "grad_norm": 1.028389573097229, + "learning_rate": 1.369980126984083e-08, + "loss": 0.3373, + "step": 48811 + }, + { + "epoch": 0.9771438580687135, + "grad_norm": 1.0756322145462036, + "learning_rate": 1.3675830066309236e-08, + "loss": 0.295, + "step": 48812 + }, + { + "epoch": 0.9771638765858419, + "grad_norm": 1.158679723739624, + "learning_rate": 1.365187982421079e-08, + "loss": 0.3258, + "step": 48813 + }, + { + "epoch": 0.9771838951029702, + "grad_norm": 1.2078243494033813, + "learning_rate": 1.3627950543646518e-08, + "loss": 0.2655, + "step": 48814 + }, + { + "epoch": 0.9772039136200986, + "grad_norm": 1.1159995794296265, + "learning_rate": 1.3604042224717451e-08, + "loss": 0.2686, + "step": 48815 + }, + { + "epoch": 0.9772239321372269, + "grad_norm": 1.0570045709609985, + "learning_rate": 1.3580154867524065e-08, + "loss": 0.2938, + "step": 48816 + }, + { + "epoch": 0.9772439506543553, + "grad_norm": 1.2110984325408936, + "learning_rate": 1.3556288472166835e-08, + "loss": 0.3176, + "step": 48817 + }, + { + "epoch": 0.9772639691714836, + "grad_norm": 1.2171411514282227, + "learning_rate": 1.353244303874568e-08, + "loss": 0.2935, + "step": 48818 + }, + { + "epoch": 0.977283987688612, + "grad_norm": 1.175248146057129, + "learning_rate": 1.3508618567360521e-08, + "loss": 0.305, + "step": 48819 + }, + { + "epoch": 0.9773040062057403, + "grad_norm": 1.8865770101547241, + "learning_rate": 1.3484815058112944e-08, + "loss": 0.7756, + "step": 48820 + }, + { + "epoch": 0.9773240247228686, + "grad_norm": 1.2033628225326538, + "learning_rate": 1.3461032511101202e-08, + "loss": 0.3049, + "step": 48821 + }, + { + "epoch": 0.977344043239997, + "grad_norm": 1.1564933061599731, + "learning_rate": 1.3437270926426882e-08, + "loss": 0.2668, + "step": 48822 + }, + { + "epoch": 0.9773640617571253, + "grad_norm": 1.0833452939987183, + "learning_rate": 1.3413530304188794e-08, + "loss": 0.2698, + "step": 48823 + }, + { + "epoch": 0.9773840802742537, + "grad_norm": 1.2357282638549805, + "learning_rate": 1.3389810644487411e-08, + "loss": 0.3167, + "step": 48824 + }, + { + "epoch": 0.977404098791382, + "grad_norm": 1.0599826574325562, + "learning_rate": 1.3366111947421545e-08, + "loss": 0.2807, + "step": 48825 + }, + { + "epoch": 0.9774241173085104, + "grad_norm": 1.1159709692001343, + "learning_rate": 1.334243421309167e-08, + "loss": 0.2714, + "step": 48826 + }, + { + "epoch": 0.9774441358256387, + "grad_norm": 1.1142663955688477, + "learning_rate": 1.3318777441597707e-08, + "loss": 0.2336, + "step": 48827 + }, + { + "epoch": 0.977464154342767, + "grad_norm": 1.1248993873596191, + "learning_rate": 1.3295141633037357e-08, + "loss": 0.2577, + "step": 48828 + }, + { + "epoch": 0.9774841728598954, + "grad_norm": 1.1808786392211914, + "learning_rate": 1.3271526787511647e-08, + "loss": 0.3095, + "step": 48829 + }, + { + "epoch": 0.9775041913770237, + "grad_norm": 1.1371818780899048, + "learning_rate": 1.3247932905119387e-08, + "loss": 0.2775, + "step": 48830 + }, + { + "epoch": 0.9775242098941521, + "grad_norm": 1.0713344812393188, + "learning_rate": 1.322435998595939e-08, + "loss": 0.2725, + "step": 48831 + }, + { + "epoch": 0.9775442284112804, + "grad_norm": 1.204803228378296, + "learning_rate": 1.3200808030131019e-08, + "loss": 0.3413, + "step": 48832 + }, + { + "epoch": 0.9775642469284088, + "grad_norm": 1.0962214469909668, + "learning_rate": 1.3177277037733637e-08, + "loss": 0.3172, + "step": 48833 + }, + { + "epoch": 0.9775842654455371, + "grad_norm": 1.0910362005233765, + "learning_rate": 1.3153767008865503e-08, + "loss": 0.3048, + "step": 48834 + }, + { + "epoch": 0.9776042839626654, + "grad_norm": 1.1076494455337524, + "learning_rate": 1.313027794362598e-08, + "loss": 0.3071, + "step": 48835 + }, + { + "epoch": 0.9776243024797938, + "grad_norm": 1.321288824081421, + "learning_rate": 1.3106809842113322e-08, + "loss": 0.3031, + "step": 48836 + }, + { + "epoch": 0.9776443209969221, + "grad_norm": 1.1224563121795654, + "learning_rate": 1.308336270442634e-08, + "loss": 0.2555, + "step": 48837 + }, + { + "epoch": 0.9776643395140505, + "grad_norm": 1.8520503044128418, + "learning_rate": 1.3059936530664396e-08, + "loss": 0.7938, + "step": 48838 + }, + { + "epoch": 0.9776843580311788, + "grad_norm": 1.1365621089935303, + "learning_rate": 1.303653132092464e-08, + "loss": 0.2795, + "step": 48839 + }, + { + "epoch": 0.9777043765483072, + "grad_norm": 1.1258339881896973, + "learning_rate": 1.3013147075306431e-08, + "loss": 0.2892, + "step": 48840 + }, + { + "epoch": 0.9777243950654355, + "grad_norm": 1.1112794876098633, + "learning_rate": 1.2989783793907474e-08, + "loss": 0.304, + "step": 48841 + }, + { + "epoch": 0.9777444135825639, + "grad_norm": 2.075307607650757, + "learning_rate": 1.296644147682602e-08, + "loss": 0.7647, + "step": 48842 + }, + { + "epoch": 0.9777644320996922, + "grad_norm": 1.9248567819595337, + "learning_rate": 1.294312012416088e-08, + "loss": 0.7166, + "step": 48843 + }, + { + "epoch": 0.9777844506168205, + "grad_norm": 1.0600717067718506, + "learning_rate": 1.2919819736009753e-08, + "loss": 0.2337, + "step": 48844 + }, + { + "epoch": 0.9778044691339489, + "grad_norm": 1.043534278869629, + "learning_rate": 1.2896540312469784e-08, + "loss": 0.3181, + "step": 48845 + }, + { + "epoch": 0.9778244876510772, + "grad_norm": 1.0540651082992554, + "learning_rate": 1.2873281853640341e-08, + "loss": 0.3164, + "step": 48846 + }, + { + "epoch": 0.9778445061682056, + "grad_norm": 1.2772709131240845, + "learning_rate": 1.2850044359618009e-08, + "loss": 0.3188, + "step": 48847 + }, + { + "epoch": 0.9778645246853339, + "grad_norm": 1.0993043184280396, + "learning_rate": 1.2826827830500488e-08, + "loss": 0.3046, + "step": 48848 + }, + { + "epoch": 0.9778845432024623, + "grad_norm": 1.2083556652069092, + "learning_rate": 1.280363226638659e-08, + "loss": 0.2987, + "step": 48849 + }, + { + "epoch": 0.9779045617195906, + "grad_norm": 1.0466002225875854, + "learning_rate": 1.2780457667371793e-08, + "loss": 0.2744, + "step": 48850 + }, + { + "epoch": 0.9779245802367189, + "grad_norm": 1.154701590538025, + "learning_rate": 1.2757304033555462e-08, + "loss": 0.293, + "step": 48851 + }, + { + "epoch": 0.9779445987538473, + "grad_norm": 0.9728411436080933, + "learning_rate": 1.2734171365034187e-08, + "loss": 0.2467, + "step": 48852 + }, + { + "epoch": 0.9779646172709756, + "grad_norm": 1.957300066947937, + "learning_rate": 1.2711059661905111e-08, + "loss": 0.7369, + "step": 48853 + }, + { + "epoch": 0.977984635788104, + "grad_norm": 1.0652830600738525, + "learning_rate": 1.2687968924265382e-08, + "loss": 0.2345, + "step": 48854 + }, + { + "epoch": 0.9780046543052323, + "grad_norm": 1.0837353467941284, + "learning_rate": 1.266489915221214e-08, + "loss": 0.2632, + "step": 48855 + }, + { + "epoch": 0.9780246728223607, + "grad_norm": 1.934062123298645, + "learning_rate": 1.2641850345842532e-08, + "loss": 0.7897, + "step": 48856 + }, + { + "epoch": 0.978044691339489, + "grad_norm": 1.1150367259979248, + "learning_rate": 1.2618822505253148e-08, + "loss": 0.2967, + "step": 48857 + }, + { + "epoch": 0.9780647098566174, + "grad_norm": 1.1977237462997437, + "learning_rate": 1.2595815630541131e-08, + "loss": 0.3144, + "step": 48858 + }, + { + "epoch": 0.9780847283737457, + "grad_norm": 1.2388451099395752, + "learning_rate": 1.2572829721802515e-08, + "loss": 0.3279, + "step": 48859 + }, + { + "epoch": 0.978104746890874, + "grad_norm": 1.1043192148208618, + "learning_rate": 1.2549864779135e-08, + "loss": 0.2885, + "step": 48860 + }, + { + "epoch": 0.9781247654080024, + "grad_norm": 1.3522554636001587, + "learning_rate": 1.2526920802634069e-08, + "loss": 0.3245, + "step": 48861 + }, + { + "epoch": 0.9781447839251307, + "grad_norm": 1.0790657997131348, + "learning_rate": 1.250399779239686e-08, + "loss": 0.2717, + "step": 48862 + }, + { + "epoch": 0.9781648024422591, + "grad_norm": 1.0740993022918701, + "learning_rate": 1.2481095748519412e-08, + "loss": 0.3222, + "step": 48863 + }, + { + "epoch": 0.9781848209593874, + "grad_norm": 1.2797359228134155, + "learning_rate": 1.2458214671098311e-08, + "loss": 0.2596, + "step": 48864 + }, + { + "epoch": 0.9782048394765158, + "grad_norm": 1.757164716720581, + "learning_rate": 1.2435354560229595e-08, + "loss": 0.7559, + "step": 48865 + }, + { + "epoch": 0.9782248579936441, + "grad_norm": 0.9860491752624512, + "learning_rate": 1.2412515416009296e-08, + "loss": 0.2645, + "step": 48866 + }, + { + "epoch": 0.9782448765107724, + "grad_norm": 1.0147862434387207, + "learning_rate": 1.2389697238532893e-08, + "loss": 0.2717, + "step": 48867 + }, + { + "epoch": 0.9782648950279008, + "grad_norm": 1.0628418922424316, + "learning_rate": 1.2366900027897533e-08, + "loss": 0.2937, + "step": 48868 + }, + { + "epoch": 0.9782849135450291, + "grad_norm": 1.0830944776535034, + "learning_rate": 1.234412378419758e-08, + "loss": 0.2732, + "step": 48869 + }, + { + "epoch": 0.9783049320621575, + "grad_norm": 1.136449933052063, + "learning_rate": 1.2321368507530184e-08, + "loss": 0.3089, + "step": 48870 + }, + { + "epoch": 0.9783249505792858, + "grad_norm": 1.0465691089630127, + "learning_rate": 1.2298634197990822e-08, + "loss": 0.2822, + "step": 48871 + }, + { + "epoch": 0.9783449690964142, + "grad_norm": 1.0571551322937012, + "learning_rate": 1.2275920855673862e-08, + "loss": 0.309, + "step": 48872 + }, + { + "epoch": 0.9783649876135425, + "grad_norm": 1.122092843055725, + "learning_rate": 1.2253228480675894e-08, + "loss": 0.2866, + "step": 48873 + }, + { + "epoch": 0.9783850061306709, + "grad_norm": 1.3747565746307373, + "learning_rate": 1.2230557073091843e-08, + "loss": 0.2916, + "step": 48874 + }, + { + "epoch": 0.9784050246477992, + "grad_norm": 1.2578564882278442, + "learning_rate": 1.2207906633017186e-08, + "loss": 0.2898, + "step": 48875 + }, + { + "epoch": 0.9784250431649275, + "grad_norm": 1.208968162536621, + "learning_rate": 1.2185277160547404e-08, + "loss": 0.298, + "step": 48876 + }, + { + "epoch": 0.9784450616820559, + "grad_norm": 1.1323015689849854, + "learning_rate": 1.2162668655776866e-08, + "loss": 0.2853, + "step": 48877 + }, + { + "epoch": 0.9784650801991842, + "grad_norm": 1.1260986328125, + "learning_rate": 1.214008111880105e-08, + "loss": 0.2891, + "step": 48878 + }, + { + "epoch": 0.9784850987163126, + "grad_norm": 1.1743053197860718, + "learning_rate": 1.2117514549715436e-08, + "loss": 0.3137, + "step": 48879 + }, + { + "epoch": 0.9785051172334409, + "grad_norm": 1.0714348554611206, + "learning_rate": 1.2094968948614394e-08, + "loss": 0.2871, + "step": 48880 + }, + { + "epoch": 0.9785251357505693, + "grad_norm": 2.0271804332733154, + "learning_rate": 1.2072444315592292e-08, + "loss": 0.728, + "step": 48881 + }, + { + "epoch": 0.9785451542676976, + "grad_norm": 1.1020606756210327, + "learning_rate": 1.2049940650744053e-08, + "loss": 0.2575, + "step": 48882 + }, + { + "epoch": 0.9785651727848259, + "grad_norm": 1.0841416120529175, + "learning_rate": 1.2027457954164602e-08, + "loss": 0.3217, + "step": 48883 + }, + { + "epoch": 0.9785851913019543, + "grad_norm": 1.1255621910095215, + "learning_rate": 1.2004996225948862e-08, + "loss": 0.3205, + "step": 48884 + }, + { + "epoch": 0.9786052098190826, + "grad_norm": 1.9552730321884155, + "learning_rate": 1.1982555466190094e-08, + "loss": 0.7209, + "step": 48885 + }, + { + "epoch": 0.978625228336211, + "grad_norm": 1.0663127899169922, + "learning_rate": 1.196013567498322e-08, + "loss": 0.2625, + "step": 48886 + }, + { + "epoch": 0.9786452468533393, + "grad_norm": 1.0828756093978882, + "learning_rate": 1.193773685242261e-08, + "loss": 0.3136, + "step": 48887 + }, + { + "epoch": 0.9786652653704677, + "grad_norm": 1.8564045429229736, + "learning_rate": 1.1915358998602634e-08, + "loss": 0.7604, + "step": 48888 + }, + { + "epoch": 0.978685283887596, + "grad_norm": 1.1228641271591187, + "learning_rate": 1.1893002113616548e-08, + "loss": 0.3019, + "step": 48889 + }, + { + "epoch": 0.9787053024047244, + "grad_norm": 1.2406920194625854, + "learning_rate": 1.1870666197559278e-08, + "loss": 0.3029, + "step": 48890 + }, + { + "epoch": 0.9787253209218527, + "grad_norm": 1.9509769678115845, + "learning_rate": 1.1848351250524082e-08, + "loss": 0.8715, + "step": 48891 + }, + { + "epoch": 0.978745339438981, + "grad_norm": 1.9094582796096802, + "learning_rate": 1.1826057272604774e-08, + "loss": 0.7868, + "step": 48892 + }, + { + "epoch": 0.9787653579561094, + "grad_norm": 1.0383877754211426, + "learning_rate": 1.1803784263895168e-08, + "loss": 0.2993, + "step": 48893 + }, + { + "epoch": 0.9787853764732377, + "grad_norm": 1.2081682682037354, + "learning_rate": 1.1781532224489634e-08, + "loss": 0.2959, + "step": 48894 + }, + { + "epoch": 0.9788053949903661, + "grad_norm": 1.297035574913025, + "learning_rate": 1.1759301154480318e-08, + "loss": 0.2906, + "step": 48895 + }, + { + "epoch": 0.9788254135074944, + "grad_norm": 1.841045618057251, + "learning_rate": 1.1737091053961591e-08, + "loss": 0.3308, + "step": 48896 + }, + { + "epoch": 0.9788454320246228, + "grad_norm": 1.2106446027755737, + "learning_rate": 1.1714901923026711e-08, + "loss": 0.2702, + "step": 48897 + }, + { + "epoch": 0.9788654505417511, + "grad_norm": 1.1423524618148804, + "learning_rate": 1.1692733761768937e-08, + "loss": 0.3062, + "step": 48898 + }, + { + "epoch": 0.9788854690588794, + "grad_norm": 1.135310411453247, + "learning_rate": 1.1670586570281528e-08, + "loss": 0.2953, + "step": 48899 + }, + { + "epoch": 0.9789054875760078, + "grad_norm": 1.0087506771087646, + "learning_rate": 1.1648460348657742e-08, + "loss": 0.2776, + "step": 48900 + }, + { + "epoch": 0.9789255060931361, + "grad_norm": 1.3897485733032227, + "learning_rate": 1.1626355096989728e-08, + "loss": 0.2757, + "step": 48901 + }, + { + "epoch": 0.9789455246102645, + "grad_norm": 1.198128342628479, + "learning_rate": 1.1604270815371298e-08, + "loss": 0.2833, + "step": 48902 + }, + { + "epoch": 0.9789655431273928, + "grad_norm": 1.1823829412460327, + "learning_rate": 1.158220750389516e-08, + "loss": 0.3234, + "step": 48903 + }, + { + "epoch": 0.9789855616445212, + "grad_norm": 1.2917416095733643, + "learning_rate": 1.1560165162653459e-08, + "loss": 0.3347, + "step": 48904 + }, + { + "epoch": 0.9790055801616495, + "grad_norm": 1.1822230815887451, + "learning_rate": 1.1538143791739453e-08, + "loss": 0.2759, + "step": 48905 + }, + { + "epoch": 0.9790255986787779, + "grad_norm": 1.2113710641860962, + "learning_rate": 1.1516143391245848e-08, + "loss": 0.2942, + "step": 48906 + }, + { + "epoch": 0.9790456171959062, + "grad_norm": 1.9462096691131592, + "learning_rate": 1.1494163961264792e-08, + "loss": 0.7518, + "step": 48907 + }, + { + "epoch": 0.9790656357130345, + "grad_norm": 0.9673193693161011, + "learning_rate": 1.1472205501888435e-08, + "loss": 0.2476, + "step": 48908 + }, + { + "epoch": 0.9790856542301629, + "grad_norm": 1.1328006982803345, + "learning_rate": 1.1450268013209475e-08, + "loss": 0.3339, + "step": 48909 + }, + { + "epoch": 0.9791056727472912, + "grad_norm": 1.2029342651367188, + "learning_rate": 1.1428351495320066e-08, + "loss": 0.2836, + "step": 48910 + }, + { + "epoch": 0.9791256912644196, + "grad_norm": 1.098634958267212, + "learning_rate": 1.1406455948312357e-08, + "loss": 0.3158, + "step": 48911 + }, + { + "epoch": 0.9791457097815479, + "grad_norm": 1.0767215490341187, + "learning_rate": 1.1384581372278492e-08, + "loss": 0.3119, + "step": 48912 + }, + { + "epoch": 0.9791657282986763, + "grad_norm": 1.166406273841858, + "learning_rate": 1.1362727767310067e-08, + "loss": 0.293, + "step": 48913 + }, + { + "epoch": 0.9791857468158046, + "grad_norm": 1.1172776222229004, + "learning_rate": 1.134089513349923e-08, + "loss": 0.2646, + "step": 48914 + }, + { + "epoch": 0.9792057653329329, + "grad_norm": 1.073569893836975, + "learning_rate": 1.1319083470937575e-08, + "loss": 0.2464, + "step": 48915 + }, + { + "epoch": 0.9792257838500613, + "grad_norm": 1.1217283010482788, + "learning_rate": 1.1297292779717251e-08, + "loss": 0.3412, + "step": 48916 + }, + { + "epoch": 0.9792458023671896, + "grad_norm": 1.1756117343902588, + "learning_rate": 1.1275523059929295e-08, + "loss": 0.3101, + "step": 48917 + }, + { + "epoch": 0.979265820884318, + "grad_norm": 1.0453383922576904, + "learning_rate": 1.12537743116653e-08, + "loss": 0.328, + "step": 48918 + }, + { + "epoch": 0.9792858394014463, + "grad_norm": 0.9941471219062805, + "learning_rate": 1.1232046535017415e-08, + "loss": 0.2457, + "step": 48919 + }, + { + "epoch": 0.9793058579185747, + "grad_norm": 1.1974774599075317, + "learning_rate": 1.121033973007557e-08, + "loss": 0.3038, + "step": 48920 + }, + { + "epoch": 0.979325876435703, + "grad_norm": 1.1631184816360474, + "learning_rate": 1.1188653896932466e-08, + "loss": 0.286, + "step": 48921 + }, + { + "epoch": 0.9793458949528314, + "grad_norm": 1.1621707677841187, + "learning_rate": 1.1166989035678588e-08, + "loss": 0.2772, + "step": 48922 + }, + { + "epoch": 0.9793659134699597, + "grad_norm": 1.4187594652175903, + "learning_rate": 1.1145345146404974e-08, + "loss": 0.2852, + "step": 48923 + }, + { + "epoch": 0.979385931987088, + "grad_norm": 1.1189038753509521, + "learning_rate": 1.112372222920266e-08, + "loss": 0.3203, + "step": 48924 + }, + { + "epoch": 0.9794059505042164, + "grad_norm": 1.3411266803741455, + "learning_rate": 1.1102120284162687e-08, + "loss": 0.3065, + "step": 48925 + }, + { + "epoch": 0.9794259690213447, + "grad_norm": 1.0798499584197998, + "learning_rate": 1.1080539311375537e-08, + "loss": 0.3007, + "step": 48926 + }, + { + "epoch": 0.9794459875384731, + "grad_norm": 1.117393970489502, + "learning_rate": 1.1058979310932805e-08, + "loss": 0.3007, + "step": 48927 + }, + { + "epoch": 0.9794660060556014, + "grad_norm": 1.0617589950561523, + "learning_rate": 1.1037440282923862e-08, + "loss": 0.2564, + "step": 48928 + }, + { + "epoch": 0.9794860245727298, + "grad_norm": 1.2546764612197876, + "learning_rate": 1.1015922227440302e-08, + "loss": 0.3002, + "step": 48929 + }, + { + "epoch": 0.9795060430898581, + "grad_norm": 1.3320014476776123, + "learning_rate": 1.0994425144572052e-08, + "loss": 0.313, + "step": 48930 + }, + { + "epoch": 0.9795260616069864, + "grad_norm": 1.2572932243347168, + "learning_rate": 1.0972949034409597e-08, + "loss": 0.3399, + "step": 48931 + }, + { + "epoch": 0.9795460801241148, + "grad_norm": 1.1471316814422607, + "learning_rate": 1.0951493897042864e-08, + "loss": 0.2832, + "step": 48932 + }, + { + "epoch": 0.9795660986412431, + "grad_norm": 1.2136163711547852, + "learning_rate": 1.0930059732562891e-08, + "loss": 0.354, + "step": 48933 + }, + { + "epoch": 0.9795861171583715, + "grad_norm": 1.1428346633911133, + "learning_rate": 1.0908646541058498e-08, + "loss": 0.2335, + "step": 48934 + }, + { + "epoch": 0.9796061356754998, + "grad_norm": 1.117563009262085, + "learning_rate": 1.0887254322621277e-08, + "loss": 0.3164, + "step": 48935 + }, + { + "epoch": 0.9796261541926282, + "grad_norm": 1.2140854597091675, + "learning_rate": 1.0865883077340044e-08, + "loss": 0.3219, + "step": 48936 + }, + { + "epoch": 0.9796461727097565, + "grad_norm": 1.78026282787323, + "learning_rate": 1.0844532805305285e-08, + "loss": 0.7437, + "step": 48937 + }, + { + "epoch": 0.9796661912268849, + "grad_norm": 1.161179780960083, + "learning_rate": 1.0823203506605817e-08, + "loss": 0.2584, + "step": 48938 + }, + { + "epoch": 0.9796862097440132, + "grad_norm": 1.8406394720077515, + "learning_rate": 1.0801895181332677e-08, + "loss": 0.7569, + "step": 48939 + }, + { + "epoch": 0.9797062282611415, + "grad_norm": 1.1447774171829224, + "learning_rate": 1.078060782957413e-08, + "loss": 0.3262, + "step": 48940 + }, + { + "epoch": 0.9797262467782699, + "grad_norm": 1.16380774974823, + "learning_rate": 1.0759341451420657e-08, + "loss": 0.3052, + "step": 48941 + }, + { + "epoch": 0.9797462652953982, + "grad_norm": 1.1608911752700806, + "learning_rate": 1.0738096046961077e-08, + "loss": 0.2886, + "step": 48942 + }, + { + "epoch": 0.9797662838125266, + "grad_norm": 1.111557126045227, + "learning_rate": 1.0716871616284762e-08, + "loss": 0.2545, + "step": 48943 + }, + { + "epoch": 0.9797863023296549, + "grad_norm": 1.1021939516067505, + "learning_rate": 1.0695668159480532e-08, + "loss": 0.2989, + "step": 48944 + }, + { + "epoch": 0.9798063208467833, + "grad_norm": 1.0732629299163818, + "learning_rate": 1.0674485676638869e-08, + "loss": 0.2392, + "step": 48945 + }, + { + "epoch": 0.9798263393639116, + "grad_norm": 1.2029787302017212, + "learning_rate": 1.0653324167847478e-08, + "loss": 0.2786, + "step": 48946 + }, + { + "epoch": 0.9798463578810399, + "grad_norm": 1.0637660026550293, + "learning_rate": 1.0632183633195737e-08, + "loss": 0.28, + "step": 48947 + }, + { + "epoch": 0.9798663763981683, + "grad_norm": 1.0089303255081177, + "learning_rate": 1.0611064072772459e-08, + "loss": 0.2821, + "step": 48948 + }, + { + "epoch": 0.9798863949152966, + "grad_norm": 1.1135061979293823, + "learning_rate": 1.058996548666702e-08, + "loss": 0.3065, + "step": 48949 + }, + { + "epoch": 0.979906413432425, + "grad_norm": 1.0329804420471191, + "learning_rate": 1.0568887874967127e-08, + "loss": 0.281, + "step": 48950 + }, + { + "epoch": 0.9799264319495533, + "grad_norm": 1.8074809312820435, + "learning_rate": 1.0547831237762152e-08, + "loss": 0.6762, + "step": 48951 + }, + { + "epoch": 0.9799464504666817, + "grad_norm": 1.592052698135376, + "learning_rate": 1.0526795575140359e-08, + "loss": 0.3218, + "step": 48952 + }, + { + "epoch": 0.97996646898381, + "grad_norm": 1.2486852407455444, + "learning_rate": 1.050578088719001e-08, + "loss": 0.2878, + "step": 48953 + }, + { + "epoch": 0.9799864875009384, + "grad_norm": 1.1256003379821777, + "learning_rate": 1.0484787173999923e-08, + "loss": 0.3135, + "step": 48954 + }, + { + "epoch": 0.9800065060180667, + "grad_norm": 1.0699365139007568, + "learning_rate": 1.0463814435657805e-08, + "loss": 0.3012, + "step": 48955 + }, + { + "epoch": 0.980026524535195, + "grad_norm": 1.114693522453308, + "learning_rate": 1.0442862672252473e-08, + "loss": 0.2966, + "step": 48956 + }, + { + "epoch": 0.9800465430523234, + "grad_norm": 1.065228819847107, + "learning_rate": 1.0421931883871084e-08, + "loss": 0.3097, + "step": 48957 + }, + { + "epoch": 0.9800665615694517, + "grad_norm": 1.1680318117141724, + "learning_rate": 1.0401022070602451e-08, + "loss": 0.3079, + "step": 48958 + }, + { + "epoch": 0.9800865800865801, + "grad_norm": 1.9559385776519775, + "learning_rate": 1.0380133232533729e-08, + "loss": 0.6539, + "step": 48959 + }, + { + "epoch": 0.9801065986037084, + "grad_norm": 1.382124423980713, + "learning_rate": 1.035926536975318e-08, + "loss": 0.3452, + "step": 48960 + }, + { + "epoch": 0.9801266171208368, + "grad_norm": 1.0261924266815186, + "learning_rate": 1.033841848234851e-08, + "loss": 0.2401, + "step": 48961 + }, + { + "epoch": 0.9801466356379651, + "grad_norm": 1.979056477546692, + "learning_rate": 1.0317592570407987e-08, + "loss": 0.7264, + "step": 48962 + }, + { + "epoch": 0.9801666541550934, + "grad_norm": 1.2330628633499146, + "learning_rate": 1.0296787634017647e-08, + "loss": 0.2709, + "step": 48963 + }, + { + "epoch": 0.9801866726722218, + "grad_norm": 1.1284853219985962, + "learning_rate": 1.027600367326631e-08, + "loss": 0.2739, + "step": 48964 + }, + { + "epoch": 0.9802066911893501, + "grad_norm": 1.2281208038330078, + "learning_rate": 1.0255240688240576e-08, + "loss": 0.2911, + "step": 48965 + }, + { + "epoch": 0.9802267097064785, + "grad_norm": 1.1220426559448242, + "learning_rate": 1.0234498679028704e-08, + "loss": 0.2894, + "step": 48966 + }, + { + "epoch": 0.9802467282236068, + "grad_norm": 1.1815160512924194, + "learning_rate": 1.0213777645716739e-08, + "loss": 0.2897, + "step": 48967 + }, + { + "epoch": 0.9802667467407352, + "grad_norm": 1.2569735050201416, + "learning_rate": 1.0193077588391831e-08, + "loss": 0.3283, + "step": 48968 + }, + { + "epoch": 0.9802867652578635, + "grad_norm": 1.1474950313568115, + "learning_rate": 1.017239850714169e-08, + "loss": 0.278, + "step": 48969 + }, + { + "epoch": 0.9803067837749919, + "grad_norm": 1.9407448768615723, + "learning_rate": 1.0151740402052912e-08, + "loss": 0.7402, + "step": 48970 + }, + { + "epoch": 0.9803268022921202, + "grad_norm": 1.0752068758010864, + "learning_rate": 1.013110327321265e-08, + "loss": 0.2839, + "step": 48971 + }, + { + "epoch": 0.9803468208092485, + "grad_norm": 1.1269272565841675, + "learning_rate": 1.0110487120707501e-08, + "loss": 0.3224, + "step": 48972 + }, + { + "epoch": 0.9803668393263769, + "grad_norm": 1.0799541473388672, + "learning_rate": 1.0089891944623509e-08, + "loss": 0.2756, + "step": 48973 + }, + { + "epoch": 0.9803868578435052, + "grad_norm": 1.1390705108642578, + "learning_rate": 1.0069317745047824e-08, + "loss": 0.2825, + "step": 48974 + }, + { + "epoch": 0.9804068763606336, + "grad_norm": 1.9425735473632812, + "learning_rate": 1.00487645220676e-08, + "loss": 0.7309, + "step": 48975 + }, + { + "epoch": 0.9804268948777619, + "grad_norm": 1.160962700843811, + "learning_rate": 1.0028232275767769e-08, + "loss": 0.2877, + "step": 48976 + }, + { + "epoch": 0.9804469133948903, + "grad_norm": 2.0258569717407227, + "learning_rate": 1.0007721006235482e-08, + "loss": 0.8005, + "step": 48977 + }, + { + "epoch": 0.9804669319120186, + "grad_norm": 1.9106166362762451, + "learning_rate": 9.987230713557339e-09, + "loss": 0.7352, + "step": 48978 + }, + { + "epoch": 0.9804869504291469, + "grad_norm": 1.9609017372131348, + "learning_rate": 9.966761397818825e-09, + "loss": 0.7505, + "step": 48979 + }, + { + "epoch": 0.9805069689462753, + "grad_norm": 1.12134850025177, + "learning_rate": 9.946313059105983e-09, + "loss": 0.3066, + "step": 48980 + }, + { + "epoch": 0.9805269874634036, + "grad_norm": 0.9802868962287903, + "learning_rate": 9.925885697504855e-09, + "loss": 0.2777, + "step": 48981 + }, + { + "epoch": 0.980547005980532, + "grad_norm": 1.0268908739089966, + "learning_rate": 9.905479313101484e-09, + "loss": 0.2818, + "step": 48982 + }, + { + "epoch": 0.9805670244976603, + "grad_norm": 1.0794681310653687, + "learning_rate": 9.885093905981913e-09, + "loss": 0.2548, + "step": 48983 + }, + { + "epoch": 0.9805870430147887, + "grad_norm": 1.0958750247955322, + "learning_rate": 9.864729476231628e-09, + "loss": 0.2842, + "step": 48984 + }, + { + "epoch": 0.980607061531917, + "grad_norm": 1.172064185142517, + "learning_rate": 9.844386023935559e-09, + "loss": 0.3291, + "step": 48985 + }, + { + "epoch": 0.9806270800490454, + "grad_norm": 1.0679913759231567, + "learning_rate": 9.824063549180307e-09, + "loss": 0.2789, + "step": 48986 + }, + { + "epoch": 0.9806470985661737, + "grad_norm": 1.0414347648620605, + "learning_rate": 9.803762052050247e-09, + "loss": 0.2598, + "step": 48987 + }, + { + "epoch": 0.980667117083302, + "grad_norm": 1.2993361949920654, + "learning_rate": 9.783481532631978e-09, + "loss": 0.32, + "step": 48988 + }, + { + "epoch": 0.9806871356004304, + "grad_norm": 1.1654905080795288, + "learning_rate": 9.76322199100932e-09, + "loss": 0.3104, + "step": 48989 + }, + { + "epoch": 0.9807071541175587, + "grad_norm": 1.206892967224121, + "learning_rate": 9.74298342726887e-09, + "loss": 0.3091, + "step": 48990 + }, + { + "epoch": 0.9807271726346871, + "grad_norm": 1.8759634494781494, + "learning_rate": 9.722765841494452e-09, + "loss": 0.7607, + "step": 48991 + }, + { + "epoch": 0.9807471911518154, + "grad_norm": 1.1081732511520386, + "learning_rate": 9.702569233771552e-09, + "loss": 0.3231, + "step": 48992 + }, + { + "epoch": 0.9807672096689438, + "grad_norm": 1.4617396593093872, + "learning_rate": 9.682393604185658e-09, + "loss": 0.3095, + "step": 48993 + }, + { + "epoch": 0.9807872281860721, + "grad_norm": 1.041467547416687, + "learning_rate": 9.66223895282059e-09, + "loss": 0.2683, + "step": 48994 + }, + { + "epoch": 0.9808072467032004, + "grad_norm": 1.0953501462936401, + "learning_rate": 9.642105279761837e-09, + "loss": 0.3046, + "step": 48995 + }, + { + "epoch": 0.9808272652203288, + "grad_norm": 1.1794706583023071, + "learning_rate": 9.621992585093776e-09, + "loss": 0.3095, + "step": 48996 + }, + { + "epoch": 0.9808472837374571, + "grad_norm": 1.0934187173843384, + "learning_rate": 9.601900868900782e-09, + "loss": 0.298, + "step": 48997 + }, + { + "epoch": 0.9808673022545855, + "grad_norm": 1.8697532415390015, + "learning_rate": 9.581830131267233e-09, + "loss": 0.7983, + "step": 48998 + }, + { + "epoch": 0.9808873207717138, + "grad_norm": 1.1845654249191284, + "learning_rate": 9.561780372278062e-09, + "loss": 0.3063, + "step": 48999 + }, + { + "epoch": 0.9809073392888422, + "grad_norm": 1.1905235052108765, + "learning_rate": 9.54175159201709e-09, + "loss": 0.3392, + "step": 49000 + } + ], + "logging_steps": 1.0, + "max_steps": 49953, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 1.628771379269809e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}