| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9970958373668926, | |
| "eval_steps": 400, | |
| "global_step": 1935, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003097773475314618, | |
| "grad_norm": 25.331384658813477, | |
| "learning_rate": 4.99741468459152e-05, | |
| "loss": 7.6182, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006195546950629236, | |
| "grad_norm": 23.0921688079834, | |
| "learning_rate": 4.994829369183041e-05, | |
| "loss": 6.4939, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009293320425943852, | |
| "grad_norm": 15.174758911132812, | |
| "learning_rate": 4.9896587383660806e-05, | |
| "loss": 5.8833, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.012391093901258471, | |
| "grad_norm": 13.869624137878418, | |
| "learning_rate": 4.984488107549121e-05, | |
| "loss": 5.1932, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.015488867376573089, | |
| "grad_norm": 8.06086254119873, | |
| "learning_rate": 4.9793174767321616e-05, | |
| "loss": 4.6732, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.018586640851887704, | |
| "grad_norm": 7.566930294036865, | |
| "learning_rate": 4.974146845915202e-05, | |
| "loss": 4.2104, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.021684414327202323, | |
| "grad_norm": 7.272712230682373, | |
| "learning_rate": 4.968976215098242e-05, | |
| "loss": 3.9414, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.024782187802516942, | |
| "grad_norm": 5.09606409072876, | |
| "learning_rate": 4.9638055842812824e-05, | |
| "loss": 3.7442, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.027879961277831558, | |
| "grad_norm": 4.3098273277282715, | |
| "learning_rate": 4.958634953464323e-05, | |
| "loss": 3.4832, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.030977734753146177, | |
| "grad_norm": 4.051149368286133, | |
| "learning_rate": 4.9534643226473634e-05, | |
| "loss": 3.2805, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.034075508228460796, | |
| "grad_norm": 3.865760326385498, | |
| "learning_rate": 4.948293691830403e-05, | |
| "loss": 3.0996, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03717328170377541, | |
| "grad_norm": 3.1818125247955322, | |
| "learning_rate": 4.943123061013444e-05, | |
| "loss": 2.9279, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04027105517909003, | |
| "grad_norm": 2.931936502456665, | |
| "learning_rate": 4.937952430196484e-05, | |
| "loss": 2.748, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04336882865440465, | |
| "grad_norm": 3.031834125518799, | |
| "learning_rate": 4.932781799379524e-05, | |
| "loss": 2.6392, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.046466602129719266, | |
| "grad_norm": 2.5047967433929443, | |
| "learning_rate": 4.9276111685625646e-05, | |
| "loss": 2.4719, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.049564375605033885, | |
| "grad_norm": 2.0622289180755615, | |
| "learning_rate": 4.922440537745605e-05, | |
| "loss": 2.33, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0526621490803485, | |
| "grad_norm": 1.961929202079773, | |
| "learning_rate": 4.9172699069286456e-05, | |
| "loss": 2.2305, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.055759922555663116, | |
| "grad_norm": 2.2184677124023438, | |
| "learning_rate": 4.9120992761116855e-05, | |
| "loss": 2.1613, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.058857696030977735, | |
| "grad_norm": 1.6713696718215942, | |
| "learning_rate": 4.906928645294726e-05, | |
| "loss": 2.1162, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.061955469506292354, | |
| "grad_norm": 1.4867547750473022, | |
| "learning_rate": 4.9017580144777665e-05, | |
| "loss": 2.0524, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06505324298160697, | |
| "grad_norm": 1.3332496881484985, | |
| "learning_rate": 4.896587383660807e-05, | |
| "loss": 2.0135, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06815101645692159, | |
| "grad_norm": 1.1923631429672241, | |
| "learning_rate": 4.891416752843847e-05, | |
| "loss": 1.947, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.07124878993223621, | |
| "grad_norm": 1.1316183805465698, | |
| "learning_rate": 4.886246122026887e-05, | |
| "loss": 1.9088, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07434656340755082, | |
| "grad_norm": 1.0954861640930176, | |
| "learning_rate": 4.881075491209928e-05, | |
| "loss": 1.8897, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07744433688286544, | |
| "grad_norm": 1.076762318611145, | |
| "learning_rate": 4.8759048603929683e-05, | |
| "loss": 1.8766, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08054211035818006, | |
| "grad_norm": 0.9524347186088562, | |
| "learning_rate": 4.870734229576008e-05, | |
| "loss": 1.8171, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08363988383349467, | |
| "grad_norm": 1.0217920541763306, | |
| "learning_rate": 4.865563598759049e-05, | |
| "loss": 1.8053, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0867376573088093, | |
| "grad_norm": 0.9322136044502258, | |
| "learning_rate": 4.860392967942089e-05, | |
| "loss": 1.7908, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08983543078412391, | |
| "grad_norm": 1.0043178796768188, | |
| "learning_rate": 4.855222337125129e-05, | |
| "loss": 1.7848, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09293320425943853, | |
| "grad_norm": 0.8848307132720947, | |
| "learning_rate": 4.8500517063081695e-05, | |
| "loss": 1.753, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09603097773475315, | |
| "grad_norm": 0.8497102856636047, | |
| "learning_rate": 4.84488107549121e-05, | |
| "loss": 1.7496, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09912875121006777, | |
| "grad_norm": 0.8451825380325317, | |
| "learning_rate": 4.8397104446742505e-05, | |
| "loss": 1.7324, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.10222652468538238, | |
| "grad_norm": 0.8965175747871399, | |
| "learning_rate": 4.8345398138572904e-05, | |
| "loss": 1.7452, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.105324298160697, | |
| "grad_norm": 0.8017619252204895, | |
| "learning_rate": 4.829369183040331e-05, | |
| "loss": 1.7167, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10842207163601161, | |
| "grad_norm": 0.8092736601829529, | |
| "learning_rate": 4.8241985522233714e-05, | |
| "loss": 1.7169, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11151984511132623, | |
| "grad_norm": 0.7606578469276428, | |
| "learning_rate": 4.819027921406412e-05, | |
| "loss": 1.7131, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11461761858664085, | |
| "grad_norm": 0.8365290760993958, | |
| "learning_rate": 4.813857290589452e-05, | |
| "loss": 1.707, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11771539206195547, | |
| "grad_norm": 0.7831740975379944, | |
| "learning_rate": 4.808686659772492e-05, | |
| "loss": 1.7044, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.12081316553727009, | |
| "grad_norm": 0.7931554913520813, | |
| "learning_rate": 4.803516028955533e-05, | |
| "loss": 1.6685, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.12391093901258471, | |
| "grad_norm": 0.7500888109207153, | |
| "learning_rate": 4.7983453981385726e-05, | |
| "loss": 1.6628, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12700871248789933, | |
| "grad_norm": 0.6964195966720581, | |
| "learning_rate": 4.793174767321613e-05, | |
| "loss": 1.6568, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.13010648596321395, | |
| "grad_norm": 0.8971940875053406, | |
| "learning_rate": 4.788004136504654e-05, | |
| "loss": 1.6734, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.13320425943852857, | |
| "grad_norm": 0.7129445672035217, | |
| "learning_rate": 4.782833505687694e-05, | |
| "loss": 1.628, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.13630203291384319, | |
| "grad_norm": 0.8455966114997864, | |
| "learning_rate": 4.7776628748707346e-05, | |
| "loss": 1.6518, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1393998063891578, | |
| "grad_norm": 0.7180489301681519, | |
| "learning_rate": 4.772492244053775e-05, | |
| "loss": 1.6516, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14249757986447242, | |
| "grad_norm": 0.7145109176635742, | |
| "learning_rate": 4.7673216132368156e-05, | |
| "loss": 1.6319, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.14559535333978701, | |
| "grad_norm": 0.8121469020843506, | |
| "learning_rate": 4.7621509824198554e-05, | |
| "loss": 1.6627, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14869312681510163, | |
| "grad_norm": 0.7496806383132935, | |
| "learning_rate": 4.756980351602896e-05, | |
| "loss": 1.6319, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.15179090029041625, | |
| "grad_norm": 0.7465183734893799, | |
| "learning_rate": 4.7518097207859365e-05, | |
| "loss": 1.6222, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.15488867376573087, | |
| "grad_norm": 0.6814384460449219, | |
| "learning_rate": 4.746639089968977e-05, | |
| "loss": 1.6235, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1579864472410455, | |
| "grad_norm": 0.7421935796737671, | |
| "learning_rate": 4.741468459152017e-05, | |
| "loss": 1.6075, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1610842207163601, | |
| "grad_norm": 0.7680038809776306, | |
| "learning_rate": 4.736297828335057e-05, | |
| "loss": 1.6302, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.16418199419167473, | |
| "grad_norm": 0.6994872093200684, | |
| "learning_rate": 4.731127197518098e-05, | |
| "loss": 1.6262, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.16727976766698935, | |
| "grad_norm": 0.7842795848846436, | |
| "learning_rate": 4.7259565667011376e-05, | |
| "loss": 1.6095, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.17037754114230397, | |
| "grad_norm": 0.9475075006484985, | |
| "learning_rate": 4.720785935884178e-05, | |
| "loss": 1.6044, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1734753146176186, | |
| "grad_norm": 0.7090861201286316, | |
| "learning_rate": 4.7156153050672187e-05, | |
| "loss": 1.6177, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1765730880929332, | |
| "grad_norm": 0.70943683385849, | |
| "learning_rate": 4.710444674250259e-05, | |
| "loss": 1.6166, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.17967086156824782, | |
| "grad_norm": 0.7159855365753174, | |
| "learning_rate": 4.705274043433299e-05, | |
| "loss": 1.6084, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.18276863504356244, | |
| "grad_norm": 0.7541035413742065, | |
| "learning_rate": 4.7001034126163395e-05, | |
| "loss": 1.5959, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.18586640851887706, | |
| "grad_norm": 0.6943245530128479, | |
| "learning_rate": 4.69493278179938e-05, | |
| "loss": 1.5732, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18896418199419168, | |
| "grad_norm": 0.6735599637031555, | |
| "learning_rate": 4.6897621509824205e-05, | |
| "loss": 1.582, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1920619554695063, | |
| "grad_norm": 0.6189006567001343, | |
| "learning_rate": 4.6845915201654603e-05, | |
| "loss": 1.5789, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.19515972894482092, | |
| "grad_norm": 0.6355108022689819, | |
| "learning_rate": 4.679420889348501e-05, | |
| "loss": 1.5852, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.19825750242013554, | |
| "grad_norm": 0.7827373743057251, | |
| "learning_rate": 4.6742502585315414e-05, | |
| "loss": 1.5868, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.20135527589545016, | |
| "grad_norm": 0.7391951680183411, | |
| "learning_rate": 4.669079627714581e-05, | |
| "loss": 1.5543, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.20445304937076475, | |
| "grad_norm": 0.659102201461792, | |
| "learning_rate": 4.663908996897622e-05, | |
| "loss": 1.5933, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.20755082284607937, | |
| "grad_norm": 0.7462721467018127, | |
| "learning_rate": 4.658738366080662e-05, | |
| "loss": 1.5757, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.210648596321394, | |
| "grad_norm": 0.6847939491271973, | |
| "learning_rate": 4.653567735263703e-05, | |
| "loss": 1.5737, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2137463697967086, | |
| "grad_norm": 0.7970624566078186, | |
| "learning_rate": 4.6483971044467425e-05, | |
| "loss": 1.5801, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.21684414327202323, | |
| "grad_norm": 0.6875075101852417, | |
| "learning_rate": 4.643226473629783e-05, | |
| "loss": 1.5971, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21994191674733785, | |
| "grad_norm": 0.708543062210083, | |
| "learning_rate": 4.6380558428128236e-05, | |
| "loss": 1.5693, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.22303969022265246, | |
| "grad_norm": 0.6866245865821838, | |
| "learning_rate": 4.632885211995864e-05, | |
| "loss": 1.5673, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.22613746369796708, | |
| "grad_norm": 0.7913303375244141, | |
| "learning_rate": 4.627714581178904e-05, | |
| "loss": 1.5746, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2292352371732817, | |
| "grad_norm": 0.7373950481414795, | |
| "learning_rate": 4.6225439503619444e-05, | |
| "loss": 1.5535, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.23233301064859632, | |
| "grad_norm": 0.6925334334373474, | |
| "learning_rate": 4.617373319544985e-05, | |
| "loss": 1.5516, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23543078412391094, | |
| "grad_norm": 1.0897713899612427, | |
| "learning_rate": 4.6122026887280254e-05, | |
| "loss": 1.5388, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.23852855759922556, | |
| "grad_norm": 0.7056815028190613, | |
| "learning_rate": 4.607032057911065e-05, | |
| "loss": 1.5398, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.24162633107454018, | |
| "grad_norm": 0.7982754111289978, | |
| "learning_rate": 4.601861427094106e-05, | |
| "loss": 1.5655, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2447241045498548, | |
| "grad_norm": 0.6022890210151672, | |
| "learning_rate": 4.596690796277146e-05, | |
| "loss": 1.5439, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.24782187802516942, | |
| "grad_norm": 0.7785722017288208, | |
| "learning_rate": 4.591520165460186e-05, | |
| "loss": 1.5396, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.250919651500484, | |
| "grad_norm": 0.8552553653717041, | |
| "learning_rate": 4.5863495346432266e-05, | |
| "loss": 1.5387, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.25401742497579866, | |
| "grad_norm": 0.7920766472816467, | |
| "learning_rate": 4.581178903826267e-05, | |
| "loss": 1.5309, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.25711519845111325, | |
| "grad_norm": 0.6684227585792542, | |
| "learning_rate": 4.5760082730093076e-05, | |
| "loss": 1.5686, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.2602129719264279, | |
| "grad_norm": 0.8071036338806152, | |
| "learning_rate": 4.5708376421923474e-05, | |
| "loss": 1.546, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2633107454017425, | |
| "grad_norm": 0.7032163739204407, | |
| "learning_rate": 4.565667011375388e-05, | |
| "loss": 1.5496, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.26640851887705713, | |
| "grad_norm": 0.7134162783622742, | |
| "learning_rate": 4.5604963805584284e-05, | |
| "loss": 1.5376, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2695062923523717, | |
| "grad_norm": 0.6842672228813171, | |
| "learning_rate": 4.555325749741469e-05, | |
| "loss": 1.5381, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.27260406582768637, | |
| "grad_norm": 0.8091727495193481, | |
| "learning_rate": 4.550155118924509e-05, | |
| "loss": 1.5487, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.27570183930300096, | |
| "grad_norm": 0.8914667963981628, | |
| "learning_rate": 4.544984488107549e-05, | |
| "loss": 1.5411, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2787996127783156, | |
| "grad_norm": 0.676179826259613, | |
| "learning_rate": 4.53981385729059e-05, | |
| "loss": 1.5694, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2818973862536302, | |
| "grad_norm": 0.9339081645011902, | |
| "learning_rate": 4.5346432264736296e-05, | |
| "loss": 1.5377, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.28499515972894485, | |
| "grad_norm": 0.7921097874641418, | |
| "learning_rate": 4.52947259565667e-05, | |
| "loss": 1.5275, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.28809293320425944, | |
| "grad_norm": 0.7746542692184448, | |
| "learning_rate": 4.5243019648397106e-05, | |
| "loss": 1.5319, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.29119070667957403, | |
| "grad_norm": 0.9051031470298767, | |
| "learning_rate": 4.519131334022751e-05, | |
| "loss": 1.546, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2942884801548887, | |
| "grad_norm": 0.9229975342750549, | |
| "learning_rate": 4.513960703205791e-05, | |
| "loss": 1.5389, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.29738625363020327, | |
| "grad_norm": 0.7976638674736023, | |
| "learning_rate": 4.5087900723888315e-05, | |
| "loss": 1.5345, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3004840271055179, | |
| "grad_norm": 0.6807886362075806, | |
| "learning_rate": 4.503619441571872e-05, | |
| "loss": 1.5139, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.3035818005808325, | |
| "grad_norm": 0.7215619683265686, | |
| "learning_rate": 4.4984488107549125e-05, | |
| "loss": 1.5431, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.30667957405614715, | |
| "grad_norm": 0.6181749105453491, | |
| "learning_rate": 4.493278179937952e-05, | |
| "loss": 1.5301, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.30977734753146174, | |
| "grad_norm": 0.7625611424446106, | |
| "learning_rate": 4.488107549120993e-05, | |
| "loss": 1.5185, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3128751210067764, | |
| "grad_norm": 0.8464534878730774, | |
| "learning_rate": 4.4829369183040333e-05, | |
| "loss": 1.5293, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.315972894482091, | |
| "grad_norm": 1.0860360860824585, | |
| "learning_rate": 4.477766287487073e-05, | |
| "loss": 1.5289, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.31907066795740563, | |
| "grad_norm": 0.6855882406234741, | |
| "learning_rate": 4.472595656670114e-05, | |
| "loss": 1.513, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3221684414327202, | |
| "grad_norm": 0.6964966654777527, | |
| "learning_rate": 4.467425025853154e-05, | |
| "loss": 1.5256, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.32526621490803487, | |
| "grad_norm": 0.9584571719169617, | |
| "learning_rate": 4.462254395036195e-05, | |
| "loss": 1.5307, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.32836398838334946, | |
| "grad_norm": 0.8495706915855408, | |
| "learning_rate": 4.4570837642192345e-05, | |
| "loss": 1.517, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3314617618586641, | |
| "grad_norm": 0.7504775524139404, | |
| "learning_rate": 4.451913133402275e-05, | |
| "loss": 1.5006, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3345595353339787, | |
| "grad_norm": 0.8532968759536743, | |
| "learning_rate": 4.4467425025853155e-05, | |
| "loss": 1.5357, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.33765730880929334, | |
| "grad_norm": 1.1161147356033325, | |
| "learning_rate": 4.441571871768356e-05, | |
| "loss": 1.5194, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.34075508228460794, | |
| "grad_norm": 0.9745946526527405, | |
| "learning_rate": 4.436401240951396e-05, | |
| "loss": 1.5254, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3438528557599226, | |
| "grad_norm": 1.0027096271514893, | |
| "learning_rate": 4.4312306101344364e-05, | |
| "loss": 1.5295, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.3469506292352372, | |
| "grad_norm": 0.9923873543739319, | |
| "learning_rate": 4.426059979317477e-05, | |
| "loss": 1.517, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.35004840271055176, | |
| "grad_norm": 1.348768711090088, | |
| "learning_rate": 4.420889348500517e-05, | |
| "loss": 1.535, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.3531461761858664, | |
| "grad_norm": 0.808506190776825, | |
| "learning_rate": 4.415718717683557e-05, | |
| "loss": 1.5003, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.356243949661181, | |
| "grad_norm": 0.9981195330619812, | |
| "learning_rate": 4.410548086866598e-05, | |
| "loss": 1.491, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.35934172313649565, | |
| "grad_norm": 0.75667804479599, | |
| "learning_rate": 4.405377456049638e-05, | |
| "loss": 1.5006, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.36243949661181024, | |
| "grad_norm": 0.6836598515510559, | |
| "learning_rate": 4.400206825232678e-05, | |
| "loss": 1.5165, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3655372700871249, | |
| "grad_norm": 0.817143976688385, | |
| "learning_rate": 4.3950361944157186e-05, | |
| "loss": 1.4953, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3686350435624395, | |
| "grad_norm": 0.9403025507926941, | |
| "learning_rate": 4.389865563598759e-05, | |
| "loss": 1.5093, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3717328170377541, | |
| "grad_norm": 0.8944310545921326, | |
| "learning_rate": 4.3846949327817996e-05, | |
| "loss": 1.4989, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3748305905130687, | |
| "grad_norm": 0.8741413950920105, | |
| "learning_rate": 4.3795243019648394e-05, | |
| "loss": 1.5084, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.37792836398838336, | |
| "grad_norm": 0.7388432621955872, | |
| "learning_rate": 4.3743536711478806e-05, | |
| "loss": 1.4904, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.38102613746369796, | |
| "grad_norm": 1.061517596244812, | |
| "learning_rate": 4.369183040330921e-05, | |
| "loss": 1.4917, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3841239109390126, | |
| "grad_norm": 0.9416221380233765, | |
| "learning_rate": 4.364012409513961e-05, | |
| "loss": 1.4798, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3872216844143272, | |
| "grad_norm": 0.9135381579399109, | |
| "learning_rate": 4.3588417786970015e-05, | |
| "loss": 1.4923, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.39031945788964184, | |
| "grad_norm": 0.7566073536872864, | |
| "learning_rate": 4.353671147880042e-05, | |
| "loss": 1.5062, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.39341723136495643, | |
| "grad_norm": 0.8875766396522522, | |
| "learning_rate": 4.3485005170630825e-05, | |
| "loss": 1.5138, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3965150048402711, | |
| "grad_norm": 0.9574587345123291, | |
| "learning_rate": 4.343329886246122e-05, | |
| "loss": 1.5071, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.39961277831558567, | |
| "grad_norm": 0.8392723202705383, | |
| "learning_rate": 4.338159255429163e-05, | |
| "loss": 1.4951, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.4027105517909003, | |
| "grad_norm": 1.0290050506591797, | |
| "learning_rate": 4.332988624612203e-05, | |
| "loss": 1.5016, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4058083252662149, | |
| "grad_norm": 0.8251259922981262, | |
| "learning_rate": 4.327817993795243e-05, | |
| "loss": 1.5002, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4089060987415295, | |
| "grad_norm": 0.645811140537262, | |
| "learning_rate": 4.3226473629782837e-05, | |
| "loss": 1.4892, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.41200387221684415, | |
| "grad_norm": 0.8079040050506592, | |
| "learning_rate": 4.317476732161324e-05, | |
| "loss": 1.4952, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.41510164569215874, | |
| "grad_norm": 0.72142094373703, | |
| "learning_rate": 4.312306101344365e-05, | |
| "loss": 1.4852, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4181994191674734, | |
| "grad_norm": 0.6570938229560852, | |
| "learning_rate": 4.3071354705274045e-05, | |
| "loss": 1.4905, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.421297192642788, | |
| "grad_norm": 0.8945090770721436, | |
| "learning_rate": 4.301964839710445e-05, | |
| "loss": 1.4761, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4243949661181026, | |
| "grad_norm": 0.6998433470726013, | |
| "learning_rate": 4.2967942088934855e-05, | |
| "loss": 1.4766, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4274927395934172, | |
| "grad_norm": 0.8252823948860168, | |
| "learning_rate": 4.291623578076526e-05, | |
| "loss": 1.4991, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.43059051306873186, | |
| "grad_norm": 0.7982930541038513, | |
| "learning_rate": 4.286452947259566e-05, | |
| "loss": 1.491, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.43368828654404645, | |
| "grad_norm": 0.7150436043739319, | |
| "learning_rate": 4.2812823164426064e-05, | |
| "loss": 1.4785, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4367860600193611, | |
| "grad_norm": 0.7387247681617737, | |
| "learning_rate": 4.276111685625647e-05, | |
| "loss": 1.4922, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4398838334946757, | |
| "grad_norm": 0.6871611475944519, | |
| "learning_rate": 4.270941054808687e-05, | |
| "loss": 1.4837, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.44298160696999034, | |
| "grad_norm": 0.8222848176956177, | |
| "learning_rate": 4.265770423991727e-05, | |
| "loss": 1.4894, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.44607938044530493, | |
| "grad_norm": 0.7156632542610168, | |
| "learning_rate": 4.260599793174768e-05, | |
| "loss": 1.4967, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4491771539206196, | |
| "grad_norm": 0.8731895685195923, | |
| "learning_rate": 4.255429162357808e-05, | |
| "loss": 1.4721, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.45227492739593417, | |
| "grad_norm": 0.7288525104522705, | |
| "learning_rate": 4.250258531540848e-05, | |
| "loss": 1.4777, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4553727008712488, | |
| "grad_norm": 0.582266092300415, | |
| "learning_rate": 4.2450879007238886e-05, | |
| "loss": 1.4536, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4584704743465634, | |
| "grad_norm": 0.8917784094810486, | |
| "learning_rate": 4.239917269906929e-05, | |
| "loss": 1.4913, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.46156824782187805, | |
| "grad_norm": 0.7268736958503723, | |
| "learning_rate": 4.2347466390899696e-05, | |
| "loss": 1.5163, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.46466602129719264, | |
| "grad_norm": 0.7215520143508911, | |
| "learning_rate": 4.2295760082730094e-05, | |
| "loss": 1.5, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.46776379477250724, | |
| "grad_norm": 0.7149571180343628, | |
| "learning_rate": 4.22440537745605e-05, | |
| "loss": 1.5089, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4708615682478219, | |
| "grad_norm": 0.8469173312187195, | |
| "learning_rate": 4.2192347466390904e-05, | |
| "loss": 1.4913, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4739593417231365, | |
| "grad_norm": 0.7890795469284058, | |
| "learning_rate": 4.21406411582213e-05, | |
| "loss": 1.4625, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4770571151984511, | |
| "grad_norm": 0.6617977023124695, | |
| "learning_rate": 4.208893485005171e-05, | |
| "loss": 1.468, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4801548886737657, | |
| "grad_norm": 0.8981150388717651, | |
| "learning_rate": 4.203722854188211e-05, | |
| "loss": 1.4746, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.48325266214908036, | |
| "grad_norm": 0.733159601688385, | |
| "learning_rate": 4.198552223371252e-05, | |
| "loss": 1.4685, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.48635043562439495, | |
| "grad_norm": 0.8201707601547241, | |
| "learning_rate": 4.1933815925542916e-05, | |
| "loss": 1.4735, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4894482090997096, | |
| "grad_norm": 0.7135993242263794, | |
| "learning_rate": 4.188210961737332e-05, | |
| "loss": 1.4879, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4925459825750242, | |
| "grad_norm": 0.9840111136436462, | |
| "learning_rate": 4.1830403309203726e-05, | |
| "loss": 1.5049, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.49564375605033884, | |
| "grad_norm": 0.8042699098587036, | |
| "learning_rate": 4.177869700103413e-05, | |
| "loss": 1.4553, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4987415295256534, | |
| "grad_norm": 0.8507205843925476, | |
| "learning_rate": 4.172699069286453e-05, | |
| "loss": 1.4756, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.501839303000968, | |
| "grad_norm": 0.9723739624023438, | |
| "learning_rate": 4.1675284384694934e-05, | |
| "loss": 1.4621, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5049370764762827, | |
| "grad_norm": 0.9678776264190674, | |
| "learning_rate": 4.162357807652534e-05, | |
| "loss": 1.4715, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5080348499515973, | |
| "grad_norm": 1.031358242034912, | |
| "learning_rate": 4.1571871768355745e-05, | |
| "loss": 1.4919, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5111326234269119, | |
| "grad_norm": 0.7192648649215698, | |
| "learning_rate": 4.152016546018614e-05, | |
| "loss": 1.4659, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5142303969022265, | |
| "grad_norm": 0.9962320923805237, | |
| "learning_rate": 4.146845915201655e-05, | |
| "loss": 1.4544, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5173281703775412, | |
| "grad_norm": 0.7830464243888855, | |
| "learning_rate": 4.141675284384695e-05, | |
| "loss": 1.457, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5204259438528558, | |
| "grad_norm": 0.8971250653266907, | |
| "learning_rate": 4.136504653567735e-05, | |
| "loss": 1.4669, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.5235237173281704, | |
| "grad_norm": 0.7550873160362244, | |
| "learning_rate": 4.1313340227507756e-05, | |
| "loss": 1.464, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.526621490803485, | |
| "grad_norm": 0.9138239622116089, | |
| "learning_rate": 4.126163391933816e-05, | |
| "loss": 1.4861, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5297192642787996, | |
| "grad_norm": 0.8233465552330017, | |
| "learning_rate": 4.1209927611168567e-05, | |
| "loss": 1.4731, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5328170377541143, | |
| "grad_norm": 0.8664296865463257, | |
| "learning_rate": 4.1158221302998965e-05, | |
| "loss": 1.4576, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5359148112294289, | |
| "grad_norm": 0.8798326849937439, | |
| "learning_rate": 4.110651499482937e-05, | |
| "loss": 1.4711, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5390125847047434, | |
| "grad_norm": 0.9018617272377014, | |
| "learning_rate": 4.1054808686659775e-05, | |
| "loss": 1.4753, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.542110358180058, | |
| "grad_norm": 1.0225512981414795, | |
| "learning_rate": 4.100310237849018e-05, | |
| "loss": 1.4546, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5452081316553727, | |
| "grad_norm": 0.8715205192565918, | |
| "learning_rate": 4.095139607032058e-05, | |
| "loss": 1.4685, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5483059051306873, | |
| "grad_norm": 0.912350058555603, | |
| "learning_rate": 4.0899689762150983e-05, | |
| "loss": 1.4591, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5514036786060019, | |
| "grad_norm": 0.8565901517868042, | |
| "learning_rate": 4.084798345398139e-05, | |
| "loss": 1.45, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5545014520813165, | |
| "grad_norm": 1.0796242952346802, | |
| "learning_rate": 4.079627714581179e-05, | |
| "loss": 1.4703, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5575992255566312, | |
| "grad_norm": 0.8250919580459595, | |
| "learning_rate": 4.074457083764219e-05, | |
| "loss": 1.4465, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5606969990319458, | |
| "grad_norm": 1.1239674091339111, | |
| "learning_rate": 4.06928645294726e-05, | |
| "loss": 1.4664, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5637947725072604, | |
| "grad_norm": 0.8356937170028687, | |
| "learning_rate": 4.0641158221303e-05, | |
| "loss": 1.4736, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.566892545982575, | |
| "grad_norm": 0.9538320302963257, | |
| "learning_rate": 4.05894519131334e-05, | |
| "loss": 1.4685, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5699903194578897, | |
| "grad_norm": 0.9279565215110779, | |
| "learning_rate": 4.0537745604963805e-05, | |
| "loss": 1.4837, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5730880929332043, | |
| "grad_norm": 0.7745909690856934, | |
| "learning_rate": 4.048603929679421e-05, | |
| "loss": 1.4496, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5761858664085189, | |
| "grad_norm": 1.0521866083145142, | |
| "learning_rate": 4.0434332988624616e-05, | |
| "loss": 1.4368, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5792836398838335, | |
| "grad_norm": 1.0289032459259033, | |
| "learning_rate": 4.0382626680455014e-05, | |
| "loss": 1.4522, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5823814133591481, | |
| "grad_norm": 1.0961400270462036, | |
| "learning_rate": 4.033092037228542e-05, | |
| "loss": 1.4528, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5854791868344628, | |
| "grad_norm": 0.8788878321647644, | |
| "learning_rate": 4.0279214064115824e-05, | |
| "loss": 1.4644, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5885769603097774, | |
| "grad_norm": 0.8058956861495972, | |
| "learning_rate": 4.022750775594622e-05, | |
| "loss": 1.4437, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5916747337850919, | |
| "grad_norm": 0.7191058993339539, | |
| "learning_rate": 4.017580144777663e-05, | |
| "loss": 1.4553, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5947725072604065, | |
| "grad_norm": 0.9950371980667114, | |
| "learning_rate": 4.012409513960703e-05, | |
| "loss": 1.4601, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5978702807357212, | |
| "grad_norm": 1.0689505338668823, | |
| "learning_rate": 4.007238883143744e-05, | |
| "loss": 1.4298, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.6009680542110358, | |
| "grad_norm": 1.0722795724868774, | |
| "learning_rate": 4.0020682523267836e-05, | |
| "loss": 1.4477, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.6040658276863504, | |
| "grad_norm": 1.1227514743804932, | |
| "learning_rate": 3.996897621509824e-05, | |
| "loss": 1.4691, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.607163601161665, | |
| "grad_norm": 0.9659287333488464, | |
| "learning_rate": 3.9917269906928646e-05, | |
| "loss": 1.4575, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.6102613746369797, | |
| "grad_norm": 1.0061907768249512, | |
| "learning_rate": 3.986556359875905e-05, | |
| "loss": 1.4678, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.6133591481122943, | |
| "grad_norm": 1.0684504508972168, | |
| "learning_rate": 3.981385729058945e-05, | |
| "loss": 1.451, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.6164569215876089, | |
| "grad_norm": 0.7698823809623718, | |
| "learning_rate": 3.9762150982419854e-05, | |
| "loss": 1.468, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6195546950629235, | |
| "grad_norm": 1.1838396787643433, | |
| "learning_rate": 3.971044467425026e-05, | |
| "loss": 1.4412, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6195546950629235, | |
| "eval_loss": 1.4199515581130981, | |
| "eval_runtime": 500.1361, | |
| "eval_samples_per_second": 41.309, | |
| "eval_steps_per_second": 5.165, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6226524685382382, | |
| "grad_norm": 0.9022509455680847, | |
| "learning_rate": 3.965873836608066e-05, | |
| "loss": 1.4453, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.6257502420135528, | |
| "grad_norm": 0.9168387055397034, | |
| "learning_rate": 3.960703205791106e-05, | |
| "loss": 1.4888, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6288480154888674, | |
| "grad_norm": 1.0163190364837646, | |
| "learning_rate": 3.9555325749741475e-05, | |
| "loss": 1.449, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.631945788964182, | |
| "grad_norm": 1.0365885496139526, | |
| "learning_rate": 3.950361944157187e-05, | |
| "loss": 1.4691, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6350435624394967, | |
| "grad_norm": 0.9853517413139343, | |
| "learning_rate": 3.945191313340228e-05, | |
| "loss": 1.4544, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6381413359148113, | |
| "grad_norm": 1.1572599411010742, | |
| "learning_rate": 3.940020682523268e-05, | |
| "loss": 1.4577, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6412391093901258, | |
| "grad_norm": 0.9835549592971802, | |
| "learning_rate": 3.934850051706309e-05, | |
| "loss": 1.4408, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6443368828654404, | |
| "grad_norm": 0.792939305305481, | |
| "learning_rate": 3.9296794208893487e-05, | |
| "loss": 1.4579, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.647434656340755, | |
| "grad_norm": 0.9052080512046814, | |
| "learning_rate": 3.924508790072389e-05, | |
| "loss": 1.452, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6505324298160697, | |
| "grad_norm": 0.8612220287322998, | |
| "learning_rate": 3.91933815925543e-05, | |
| "loss": 1.4652, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6536302032913843, | |
| "grad_norm": 0.9994277358055115, | |
| "learning_rate": 3.91416752843847e-05, | |
| "loss": 1.4543, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6567279767666989, | |
| "grad_norm": 0.9547328948974609, | |
| "learning_rate": 3.90899689762151e-05, | |
| "loss": 1.4588, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6598257502420135, | |
| "grad_norm": 0.9314976930618286, | |
| "learning_rate": 3.9038262668045505e-05, | |
| "loss": 1.4889, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6629235237173282, | |
| "grad_norm": 0.891118586063385, | |
| "learning_rate": 3.898655635987591e-05, | |
| "loss": 1.4451, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6660212971926428, | |
| "grad_norm": 0.8572834730148315, | |
| "learning_rate": 3.8934850051706315e-05, | |
| "loss": 1.4312, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6691190706679574, | |
| "grad_norm": 0.994143545627594, | |
| "learning_rate": 3.8883143743536714e-05, | |
| "loss": 1.4357, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.672216844143272, | |
| "grad_norm": 1.099258542060852, | |
| "learning_rate": 3.883143743536712e-05, | |
| "loss": 1.4601, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6753146176185867, | |
| "grad_norm": 0.7827516794204712, | |
| "learning_rate": 3.8779731127197524e-05, | |
| "loss": 1.4351, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6784123910939013, | |
| "grad_norm": 0.952328085899353, | |
| "learning_rate": 3.872802481902792e-05, | |
| "loss": 1.4374, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6815101645692159, | |
| "grad_norm": 0.8575209379196167, | |
| "learning_rate": 3.867631851085833e-05, | |
| "loss": 1.4427, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6846079380445305, | |
| "grad_norm": 0.848095178604126, | |
| "learning_rate": 3.862461220268873e-05, | |
| "loss": 1.4337, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6877057115198452, | |
| "grad_norm": 0.8709319233894348, | |
| "learning_rate": 3.857290589451914e-05, | |
| "loss": 1.4647, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6908034849951598, | |
| "grad_norm": 0.9184627532958984, | |
| "learning_rate": 3.8521199586349535e-05, | |
| "loss": 1.4248, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6939012584704743, | |
| "grad_norm": 0.7510619759559631, | |
| "learning_rate": 3.846949327817994e-05, | |
| "loss": 1.4524, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6969990319457889, | |
| "grad_norm": 0.9397213459014893, | |
| "learning_rate": 3.8417786970010346e-05, | |
| "loss": 1.4324, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7000968054211035, | |
| "grad_norm": 0.9663003087043762, | |
| "learning_rate": 3.836608066184075e-05, | |
| "loss": 1.4538, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.7031945788964182, | |
| "grad_norm": 1.021768569946289, | |
| "learning_rate": 3.831437435367115e-05, | |
| "loss": 1.4208, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.7062923523717328, | |
| "grad_norm": 0.7242577075958252, | |
| "learning_rate": 3.8262668045501554e-05, | |
| "loss": 1.4426, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.7093901258470474, | |
| "grad_norm": 0.8870192766189575, | |
| "learning_rate": 3.821096173733196e-05, | |
| "loss": 1.4317, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.712487899322362, | |
| "grad_norm": 0.7298364043235779, | |
| "learning_rate": 3.815925542916236e-05, | |
| "loss": 1.453, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7155856727976767, | |
| "grad_norm": 1.1145060062408447, | |
| "learning_rate": 3.810754912099276e-05, | |
| "loss": 1.422, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.7186834462729913, | |
| "grad_norm": 0.7591832876205444, | |
| "learning_rate": 3.805584281282317e-05, | |
| "loss": 1.4558, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7217812197483059, | |
| "grad_norm": 0.9220917224884033, | |
| "learning_rate": 3.800413650465357e-05, | |
| "loss": 1.4594, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7248789932236205, | |
| "grad_norm": 0.768500030040741, | |
| "learning_rate": 3.795243019648397e-05, | |
| "loss": 1.4314, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.7279767666989352, | |
| "grad_norm": 1.0767769813537598, | |
| "learning_rate": 3.7900723888314376e-05, | |
| "loss": 1.4502, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7310745401742498, | |
| "grad_norm": 1.253908634185791, | |
| "learning_rate": 3.784901758014478e-05, | |
| "loss": 1.4252, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7341723136495644, | |
| "grad_norm": 0.943250298500061, | |
| "learning_rate": 3.7797311271975186e-05, | |
| "loss": 1.4248, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.737270087124879, | |
| "grad_norm": 0.8801707625389099, | |
| "learning_rate": 3.7745604963805584e-05, | |
| "loss": 1.4419, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.7403678606001937, | |
| "grad_norm": 0.6957564949989319, | |
| "learning_rate": 3.769389865563599e-05, | |
| "loss": 1.4259, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7434656340755083, | |
| "grad_norm": 0.8454000353813171, | |
| "learning_rate": 3.7642192347466395e-05, | |
| "loss": 1.4236, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7465634075508228, | |
| "grad_norm": 0.8354184031486511, | |
| "learning_rate": 3.759048603929679e-05, | |
| "loss": 1.4391, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7496611810261374, | |
| "grad_norm": 0.8452537059783936, | |
| "learning_rate": 3.75387797311272e-05, | |
| "loss": 1.4217, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.752758954501452, | |
| "grad_norm": 0.7954672574996948, | |
| "learning_rate": 3.74870734229576e-05, | |
| "loss": 1.4427, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7558567279767667, | |
| "grad_norm": 0.8007389307022095, | |
| "learning_rate": 3.743536711478801e-05, | |
| "loss": 1.4352, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7589545014520813, | |
| "grad_norm": 0.9226048588752747, | |
| "learning_rate": 3.7383660806618406e-05, | |
| "loss": 1.4431, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7620522749273959, | |
| "grad_norm": 0.9642147421836853, | |
| "learning_rate": 3.733195449844881e-05, | |
| "loss": 1.4365, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7651500484027105, | |
| "grad_norm": 0.8833573460578918, | |
| "learning_rate": 3.7280248190279217e-05, | |
| "loss": 1.4198, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7682478218780252, | |
| "grad_norm": 0.7712429165840149, | |
| "learning_rate": 3.722854188210962e-05, | |
| "loss": 1.4407, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7713455953533398, | |
| "grad_norm": 0.8324020504951477, | |
| "learning_rate": 3.717683557394002e-05, | |
| "loss": 1.4472, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7744433688286544, | |
| "grad_norm": 0.855499267578125, | |
| "learning_rate": 3.7125129265770425e-05, | |
| "loss": 1.4107, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.777541142303969, | |
| "grad_norm": 0.9357978105545044, | |
| "learning_rate": 3.707342295760083e-05, | |
| "loss": 1.4538, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7806389157792837, | |
| "grad_norm": 0.8887574076652527, | |
| "learning_rate": 3.702171664943123e-05, | |
| "loss": 1.4268, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7837366892545983, | |
| "grad_norm": 0.7522751688957214, | |
| "learning_rate": 3.6970010341261633e-05, | |
| "loss": 1.4156, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7868344627299129, | |
| "grad_norm": 0.8015060424804688, | |
| "learning_rate": 3.691830403309204e-05, | |
| "loss": 1.4157, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7899322362052275, | |
| "grad_norm": 0.8720948100090027, | |
| "learning_rate": 3.6866597724922444e-05, | |
| "loss": 1.4153, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7930300096805422, | |
| "grad_norm": 0.9627205729484558, | |
| "learning_rate": 3.681489141675284e-05, | |
| "loss": 1.4361, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7961277831558567, | |
| "grad_norm": 0.8693482279777527, | |
| "learning_rate": 3.676318510858325e-05, | |
| "loss": 1.4587, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7992255566311713, | |
| "grad_norm": 0.9331101775169373, | |
| "learning_rate": 3.671147880041365e-05, | |
| "loss": 1.4454, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.8023233301064859, | |
| "grad_norm": 0.9783412218093872, | |
| "learning_rate": 3.665977249224406e-05, | |
| "loss": 1.4434, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.8054211035818006, | |
| "grad_norm": 0.828762412071228, | |
| "learning_rate": 3.6608066184074455e-05, | |
| "loss": 1.4054, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8085188770571152, | |
| "grad_norm": 1.0303949117660522, | |
| "learning_rate": 3.655635987590486e-05, | |
| "loss": 1.4385, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.8116166505324298, | |
| "grad_norm": 0.7832565903663635, | |
| "learning_rate": 3.6504653567735266e-05, | |
| "loss": 1.4367, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.8147144240077444, | |
| "grad_norm": 0.8763161301612854, | |
| "learning_rate": 3.645294725956567e-05, | |
| "loss": 1.4005, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.817812197483059, | |
| "grad_norm": 0.8118229508399963, | |
| "learning_rate": 3.640124095139607e-05, | |
| "loss": 1.4349, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.8209099709583737, | |
| "grad_norm": 0.9044933319091797, | |
| "learning_rate": 3.6349534643226474e-05, | |
| "loss": 1.4393, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8240077444336883, | |
| "grad_norm": 1.0195043087005615, | |
| "learning_rate": 3.629782833505688e-05, | |
| "loss": 1.4283, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.8271055179090029, | |
| "grad_norm": 0.8480585813522339, | |
| "learning_rate": 3.624612202688728e-05, | |
| "loss": 1.4317, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.8302032913843175, | |
| "grad_norm": 0.9876317381858826, | |
| "learning_rate": 3.619441571871768e-05, | |
| "loss": 1.4356, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.8333010648596322, | |
| "grad_norm": 0.880251944065094, | |
| "learning_rate": 3.614270941054809e-05, | |
| "loss": 1.4226, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8363988383349468, | |
| "grad_norm": 0.9709184169769287, | |
| "learning_rate": 3.609100310237849e-05, | |
| "loss": 1.4318, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8394966118102614, | |
| "grad_norm": 1.1328480243682861, | |
| "learning_rate": 3.603929679420889e-05, | |
| "loss": 1.4243, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.842594385285576, | |
| "grad_norm": 1.0186697244644165, | |
| "learning_rate": 3.5987590486039296e-05, | |
| "loss": 1.4318, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8456921587608907, | |
| "grad_norm": 1.1424143314361572, | |
| "learning_rate": 3.59358841778697e-05, | |
| "loss": 1.434, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8487899322362052, | |
| "grad_norm": 0.8991900086402893, | |
| "learning_rate": 3.5884177869700106e-05, | |
| "loss": 1.425, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8518877057115198, | |
| "grad_norm": 1.0953640937805176, | |
| "learning_rate": 3.5832471561530504e-05, | |
| "loss": 1.4208, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8549854791868344, | |
| "grad_norm": 0.8714589476585388, | |
| "learning_rate": 3.578076525336091e-05, | |
| "loss": 1.4133, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8580832526621491, | |
| "grad_norm": 1.155774474143982, | |
| "learning_rate": 3.5729058945191315e-05, | |
| "loss": 1.4217, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8611810261374637, | |
| "grad_norm": 0.7970629334449768, | |
| "learning_rate": 3.567735263702171e-05, | |
| "loss": 1.4225, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8642787996127783, | |
| "grad_norm": 0.8982592821121216, | |
| "learning_rate": 3.562564632885212e-05, | |
| "loss": 1.4445, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8673765730880929, | |
| "grad_norm": 0.8706935048103333, | |
| "learning_rate": 3.557394002068252e-05, | |
| "loss": 1.4471, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8704743465634075, | |
| "grad_norm": 0.8587532043457031, | |
| "learning_rate": 3.552223371251293e-05, | |
| "loss": 1.4198, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8735721200387222, | |
| "grad_norm": 1.0104795694351196, | |
| "learning_rate": 3.5470527404343326e-05, | |
| "loss": 1.4024, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8766698935140368, | |
| "grad_norm": 0.9810327887535095, | |
| "learning_rate": 3.541882109617373e-05, | |
| "loss": 1.4077, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8797676669893514, | |
| "grad_norm": 0.8219797611236572, | |
| "learning_rate": 3.536711478800414e-05, | |
| "loss": 1.4101, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.882865440464666, | |
| "grad_norm": 0.9589121341705322, | |
| "learning_rate": 3.531540847983454e-05, | |
| "loss": 1.4186, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8859632139399807, | |
| "grad_norm": 0.7841024398803711, | |
| "learning_rate": 3.526370217166495e-05, | |
| "loss": 1.4176, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8890609874152953, | |
| "grad_norm": 0.8052421808242798, | |
| "learning_rate": 3.521199586349535e-05, | |
| "loss": 1.423, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8921587608906099, | |
| "grad_norm": 1.0032799243927002, | |
| "learning_rate": 3.516028955532576e-05, | |
| "loss": 1.3992, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8952565343659245, | |
| "grad_norm": 0.934697151184082, | |
| "learning_rate": 3.5108583247156155e-05, | |
| "loss": 1.4145, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8983543078412392, | |
| "grad_norm": 0.8936486840248108, | |
| "learning_rate": 3.505687693898656e-05, | |
| "loss": 1.4508, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9014520813165537, | |
| "grad_norm": 1.3050090074539185, | |
| "learning_rate": 3.5005170630816965e-05, | |
| "loss": 1.4105, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.9045498547918683, | |
| "grad_norm": 1.2944504022598267, | |
| "learning_rate": 3.4953464322647364e-05, | |
| "loss": 1.4274, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.9076476282671829, | |
| "grad_norm": 0.7632983922958374, | |
| "learning_rate": 3.490175801447777e-05, | |
| "loss": 1.43, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.9107454017424976, | |
| "grad_norm": 1.2242131233215332, | |
| "learning_rate": 3.4850051706308174e-05, | |
| "loss": 1.4083, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.9138431752178122, | |
| "grad_norm": 0.9511488676071167, | |
| "learning_rate": 3.479834539813858e-05, | |
| "loss": 1.411, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9169409486931268, | |
| "grad_norm": 0.9538015723228455, | |
| "learning_rate": 3.474663908996898e-05, | |
| "loss": 1.4136, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.9200387221684414, | |
| "grad_norm": 0.9940462708473206, | |
| "learning_rate": 3.469493278179938e-05, | |
| "loss": 1.4199, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.9231364956437561, | |
| "grad_norm": 1.045178771018982, | |
| "learning_rate": 3.464322647362979e-05, | |
| "loss": 1.4096, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.9262342691190707, | |
| "grad_norm": 0.9768006205558777, | |
| "learning_rate": 3.459152016546019e-05, | |
| "loss": 1.4235, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9293320425943853, | |
| "grad_norm": 1.0503100156784058, | |
| "learning_rate": 3.453981385729059e-05, | |
| "loss": 1.4259, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9324298160696999, | |
| "grad_norm": 1.1488350629806519, | |
| "learning_rate": 3.4488107549120996e-05, | |
| "loss": 1.4031, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.9355275895450145, | |
| "grad_norm": 1.0303666591644287, | |
| "learning_rate": 3.44364012409514e-05, | |
| "loss": 1.4023, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.9386253630203292, | |
| "grad_norm": 0.9419746398925781, | |
| "learning_rate": 3.4384694932781806e-05, | |
| "loss": 1.4151, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9417231364956438, | |
| "grad_norm": 0.9052528738975525, | |
| "learning_rate": 3.4332988624612204e-05, | |
| "loss": 1.4177, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.9448209099709584, | |
| "grad_norm": 1.0753135681152344, | |
| "learning_rate": 3.428128231644261e-05, | |
| "loss": 1.4063, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.947918683446273, | |
| "grad_norm": 0.7896863222122192, | |
| "learning_rate": 3.4229576008273014e-05, | |
| "loss": 1.4309, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9510164569215876, | |
| "grad_norm": 1.0495641231536865, | |
| "learning_rate": 3.417786970010341e-05, | |
| "loss": 1.4284, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9541142303969022, | |
| "grad_norm": 1.0048576593399048, | |
| "learning_rate": 3.412616339193382e-05, | |
| "loss": 1.4346, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9572120038722168, | |
| "grad_norm": 1.0318708419799805, | |
| "learning_rate": 3.407445708376422e-05, | |
| "loss": 1.4106, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9603097773475314, | |
| "grad_norm": 0.9739704132080078, | |
| "learning_rate": 3.402275077559463e-05, | |
| "loss": 1.4137, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9634075508228461, | |
| "grad_norm": 1.0641124248504639, | |
| "learning_rate": 3.3971044467425026e-05, | |
| "loss": 1.4154, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9665053242981607, | |
| "grad_norm": 1.143355369567871, | |
| "learning_rate": 3.391933815925543e-05, | |
| "loss": 1.4049, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9696030977734753, | |
| "grad_norm": 0.7641253471374512, | |
| "learning_rate": 3.3867631851085836e-05, | |
| "loss": 1.4153, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9727008712487899, | |
| "grad_norm": 0.9126153588294983, | |
| "learning_rate": 3.381592554291624e-05, | |
| "loss": 1.4219, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9757986447241046, | |
| "grad_norm": 0.8339759111404419, | |
| "learning_rate": 3.376421923474664e-05, | |
| "loss": 1.4234, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9788964181994192, | |
| "grad_norm": 1.062849760055542, | |
| "learning_rate": 3.3712512926577045e-05, | |
| "loss": 1.4298, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9819941916747338, | |
| "grad_norm": 0.880806565284729, | |
| "learning_rate": 3.366080661840745e-05, | |
| "loss": 1.4041, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9850919651500484, | |
| "grad_norm": 0.9244954586029053, | |
| "learning_rate": 3.360910031023785e-05, | |
| "loss": 1.4208, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.988189738625363, | |
| "grad_norm": 0.9386717677116394, | |
| "learning_rate": 3.355739400206825e-05, | |
| "loss": 1.4153, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9912875121006777, | |
| "grad_norm": 0.7148683667182922, | |
| "learning_rate": 3.350568769389866e-05, | |
| "loss": 1.4211, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9943852855759923, | |
| "grad_norm": 1.0988705158233643, | |
| "learning_rate": 3.345398138572906e-05, | |
| "loss": 1.4169, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9974830590513069, | |
| "grad_norm": 0.9161446690559387, | |
| "learning_rate": 3.340227507755946e-05, | |
| "loss": 1.4126, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.0005808325266214, | |
| "grad_norm": 0.9653096199035645, | |
| "learning_rate": 3.3350568769389867e-05, | |
| "loss": 1.4004, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.003678606001936, | |
| "grad_norm": 1.2281991243362427, | |
| "learning_rate": 3.329886246122027e-05, | |
| "loss": 1.3948, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.0067763794772506, | |
| "grad_norm": 0.8875632882118225, | |
| "learning_rate": 3.324715615305068e-05, | |
| "loss": 1.3869, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0098741529525654, | |
| "grad_norm": 1.2403393983840942, | |
| "learning_rate": 3.3195449844881075e-05, | |
| "loss": 1.3794, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.01297192642788, | |
| "grad_norm": 0.9899982810020447, | |
| "learning_rate": 3.314374353671148e-05, | |
| "loss": 1.3781, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.0160696999031946, | |
| "grad_norm": 1.2559030055999756, | |
| "learning_rate": 3.3092037228541885e-05, | |
| "loss": 1.3978, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.0191674733785092, | |
| "grad_norm": 0.9205394387245178, | |
| "learning_rate": 3.3040330920372283e-05, | |
| "loss": 1.384, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.0222652468538238, | |
| "grad_norm": 1.1866810321807861, | |
| "learning_rate": 3.298862461220269e-05, | |
| "loss": 1.3989, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0253630203291384, | |
| "grad_norm": 0.8332041501998901, | |
| "learning_rate": 3.2936918304033094e-05, | |
| "loss": 1.373, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.028460793804453, | |
| "grad_norm": 0.9644818902015686, | |
| "learning_rate": 3.28852119958635e-05, | |
| "loss": 1.3982, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.0315585672797676, | |
| "grad_norm": 0.9065265655517578, | |
| "learning_rate": 3.28335056876939e-05, | |
| "loss": 1.388, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.0346563407550824, | |
| "grad_norm": 0.8498512506484985, | |
| "learning_rate": 3.27817993795243e-05, | |
| "loss": 1.3833, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.037754114230397, | |
| "grad_norm": 0.7631977796554565, | |
| "learning_rate": 3.273009307135471e-05, | |
| "loss": 1.381, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0408518877057116, | |
| "grad_norm": 0.9017680883407593, | |
| "learning_rate": 3.267838676318511e-05, | |
| "loss": 1.4068, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.0439496611810262, | |
| "grad_norm": 1.02823007106781, | |
| "learning_rate": 3.262668045501551e-05, | |
| "loss": 1.4036, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.0470474346563408, | |
| "grad_norm": 1.0055862665176392, | |
| "learning_rate": 3.2574974146845916e-05, | |
| "loss": 1.3691, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.0501452081316553, | |
| "grad_norm": 0.9213855862617493, | |
| "learning_rate": 3.252326783867632e-05, | |
| "loss": 1.4011, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.05324298160697, | |
| "grad_norm": 0.9935958385467529, | |
| "learning_rate": 3.247156153050672e-05, | |
| "loss": 1.3893, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0563407550822845, | |
| "grad_norm": 0.8763697743415833, | |
| "learning_rate": 3.2419855222337124e-05, | |
| "loss": 1.3952, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.0594385285575991, | |
| "grad_norm": 1.15850830078125, | |
| "learning_rate": 3.236814891416753e-05, | |
| "loss": 1.3923, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.062536302032914, | |
| "grad_norm": 0.9471246600151062, | |
| "learning_rate": 3.2316442605997934e-05, | |
| "loss": 1.3884, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0656340755082285, | |
| "grad_norm": 0.7925785779953003, | |
| "learning_rate": 3.226473629782833e-05, | |
| "loss": 1.3752, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0687318489835431, | |
| "grad_norm": 0.9303650856018066, | |
| "learning_rate": 3.221302998965874e-05, | |
| "loss": 1.3719, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0718296224588577, | |
| "grad_norm": 0.9009895324707031, | |
| "learning_rate": 3.216132368148914e-05, | |
| "loss": 1.376, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0749273959341723, | |
| "grad_norm": 0.922558605670929, | |
| "learning_rate": 3.210961737331955e-05, | |
| "loss": 1.3873, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.078025169409487, | |
| "grad_norm": 0.9685287475585938, | |
| "learning_rate": 3.2057911065149946e-05, | |
| "loss": 1.3911, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0811229428848015, | |
| "grad_norm": 1.0427310466766357, | |
| "learning_rate": 3.200620475698035e-05, | |
| "loss": 1.3862, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.084220716360116, | |
| "grad_norm": 0.8039479851722717, | |
| "learning_rate": 3.1954498448810756e-05, | |
| "loss": 1.3968, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0873184898354307, | |
| "grad_norm": 0.7638404965400696, | |
| "learning_rate": 3.190279214064116e-05, | |
| "loss": 1.3917, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0904162633107455, | |
| "grad_norm": 0.8520601391792297, | |
| "learning_rate": 3.185108583247156e-05, | |
| "loss": 1.3864, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.09351403678606, | |
| "grad_norm": 0.7571600079536438, | |
| "learning_rate": 3.1799379524301965e-05, | |
| "loss": 1.3905, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0966118102613747, | |
| "grad_norm": 0.8143354654312134, | |
| "learning_rate": 3.174767321613237e-05, | |
| "loss": 1.3478, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0997095837366893, | |
| "grad_norm": 0.9007526636123657, | |
| "learning_rate": 3.169596690796277e-05, | |
| "loss": 1.3845, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1028073572120038, | |
| "grad_norm": 0.7659597396850586, | |
| "learning_rate": 3.164426059979317e-05, | |
| "loss": 1.4011, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.1059051306873184, | |
| "grad_norm": 0.9849894642829895, | |
| "learning_rate": 3.159255429162358e-05, | |
| "loss": 1.4066, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.109002904162633, | |
| "grad_norm": 0.7712810635566711, | |
| "learning_rate": 3.154084798345398e-05, | |
| "loss": 1.3568, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.1121006776379476, | |
| "grad_norm": 0.9364888668060303, | |
| "learning_rate": 3.148914167528438e-05, | |
| "loss": 1.3842, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.1151984511132624, | |
| "grad_norm": 0.8143067359924316, | |
| "learning_rate": 3.1437435367114786e-05, | |
| "loss": 1.3957, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.118296224588577, | |
| "grad_norm": 1.0575618743896484, | |
| "learning_rate": 3.138572905894519e-05, | |
| "loss": 1.3808, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.1213939980638916, | |
| "grad_norm": 0.9788165092468262, | |
| "learning_rate": 3.13340227507756e-05, | |
| "loss": 1.4097, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.1244917715392062, | |
| "grad_norm": 0.8391342163085938, | |
| "learning_rate": 3.1282316442605995e-05, | |
| "loss": 1.3683, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.1275895450145208, | |
| "grad_norm": 1.1145310401916504, | |
| "learning_rate": 3.123061013443641e-05, | |
| "loss": 1.3898, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.1306873184898354, | |
| "grad_norm": 1.1125495433807373, | |
| "learning_rate": 3.117890382626681e-05, | |
| "loss": 1.383, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.13378509196515, | |
| "grad_norm": 0.8851980566978455, | |
| "learning_rate": 3.112719751809721e-05, | |
| "loss": 1.3912, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.1368828654404646, | |
| "grad_norm": 0.869816243648529, | |
| "learning_rate": 3.1075491209927615e-05, | |
| "loss": 1.3855, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.1399806389157794, | |
| "grad_norm": 0.9421548247337341, | |
| "learning_rate": 3.102378490175802e-05, | |
| "loss": 1.3921, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.143078412391094, | |
| "grad_norm": 0.9962127208709717, | |
| "learning_rate": 3.097207859358842e-05, | |
| "loss": 1.3872, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.1461761858664086, | |
| "grad_norm": 0.8962863087654114, | |
| "learning_rate": 3.0920372285418824e-05, | |
| "loss": 1.3813, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1492739593417232, | |
| "grad_norm": 1.142207384109497, | |
| "learning_rate": 3.086866597724923e-05, | |
| "loss": 1.379, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.1523717328170378, | |
| "grad_norm": 0.839261531829834, | |
| "learning_rate": 3.0816959669079634e-05, | |
| "loss": 1.3581, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.1554695062923523, | |
| "grad_norm": 1.087727665901184, | |
| "learning_rate": 3.076525336091003e-05, | |
| "loss": 1.3847, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.158567279767667, | |
| "grad_norm": 1.204419732093811, | |
| "learning_rate": 3.071354705274044e-05, | |
| "loss": 1.3883, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.1616650532429815, | |
| "grad_norm": 0.9747138023376465, | |
| "learning_rate": 3.066184074457084e-05, | |
| "loss": 1.3939, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1647628267182961, | |
| "grad_norm": 1.068014144897461, | |
| "learning_rate": 3.061013443640125e-05, | |
| "loss": 1.3776, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.167860600193611, | |
| "grad_norm": 0.9767001271247864, | |
| "learning_rate": 3.0558428128231646e-05, | |
| "loss": 1.4095, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1709583736689255, | |
| "grad_norm": 0.8887537717819214, | |
| "learning_rate": 3.050672182006205e-05, | |
| "loss": 1.3689, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1740561471442401, | |
| "grad_norm": 1.0799994468688965, | |
| "learning_rate": 3.0455015511892452e-05, | |
| "loss": 1.3742, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1771539206195547, | |
| "grad_norm": 0.8181743025779724, | |
| "learning_rate": 3.0403309203722857e-05, | |
| "loss": 1.377, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1802516940948693, | |
| "grad_norm": 0.8285690546035767, | |
| "learning_rate": 3.035160289555326e-05, | |
| "loss": 1.3915, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1833494675701839, | |
| "grad_norm": 0.8738031387329102, | |
| "learning_rate": 3.0299896587383664e-05, | |
| "loss": 1.4063, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1864472410454985, | |
| "grad_norm": 0.8122093677520752, | |
| "learning_rate": 3.0248190279214066e-05, | |
| "loss": 1.3806, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.189545014520813, | |
| "grad_norm": 0.9666309952735901, | |
| "learning_rate": 3.019648397104447e-05, | |
| "loss": 1.3841, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1926427879961277, | |
| "grad_norm": 0.7673875689506531, | |
| "learning_rate": 3.0144777662874873e-05, | |
| "loss": 1.3989, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1957405614714425, | |
| "grad_norm": 1.207763910293579, | |
| "learning_rate": 3.0093071354705278e-05, | |
| "loss": 1.398, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.198838334946757, | |
| "grad_norm": 1.1100952625274658, | |
| "learning_rate": 3.004136504653568e-05, | |
| "loss": 1.3796, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.2019361084220717, | |
| "grad_norm": 0.8612858653068542, | |
| "learning_rate": 2.9989658738366084e-05, | |
| "loss": 1.3632, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.2050338818973862, | |
| "grad_norm": 0.9066482782363892, | |
| "learning_rate": 2.9937952430196486e-05, | |
| "loss": 1.3791, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.2081316553727008, | |
| "grad_norm": 0.8077186346054077, | |
| "learning_rate": 2.988624612202689e-05, | |
| "loss": 1.3735, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2112294288480154, | |
| "grad_norm": 0.7607460618019104, | |
| "learning_rate": 2.9834539813857293e-05, | |
| "loss": 1.376, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.21432720232333, | |
| "grad_norm": 0.8543524146080017, | |
| "learning_rate": 2.9782833505687695e-05, | |
| "loss": 1.3708, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.2174249757986448, | |
| "grad_norm": 0.8722901344299316, | |
| "learning_rate": 2.97311271975181e-05, | |
| "loss": 1.3886, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.2205227492739594, | |
| "grad_norm": 0.8278937935829163, | |
| "learning_rate": 2.96794208893485e-05, | |
| "loss": 1.3654, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.223620522749274, | |
| "grad_norm": 0.8393619656562805, | |
| "learning_rate": 2.9627714581178906e-05, | |
| "loss": 1.3842, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.2267182962245886, | |
| "grad_norm": 0.8890239596366882, | |
| "learning_rate": 2.9576008273009308e-05, | |
| "loss": 1.3767, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.2298160696999032, | |
| "grad_norm": 0.9097030162811279, | |
| "learning_rate": 2.9524301964839713e-05, | |
| "loss": 1.3511, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.2329138431752178, | |
| "grad_norm": 1.070699691772461, | |
| "learning_rate": 2.9472595656670115e-05, | |
| "loss": 1.372, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.2360116166505324, | |
| "grad_norm": 0.994193971157074, | |
| "learning_rate": 2.942088934850052e-05, | |
| "loss": 1.3826, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.239109390125847, | |
| "grad_norm": 0.8802192807197571, | |
| "learning_rate": 2.936918304033092e-05, | |
| "loss": 1.3608, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.239109390125847, | |
| "eval_loss": 1.3684968948364258, | |
| "eval_runtime": 500.0031, | |
| "eval_samples_per_second": 41.32, | |
| "eval_steps_per_second": 5.166, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2422071636011616, | |
| "grad_norm": 0.9239991903305054, | |
| "learning_rate": 2.9317476732161327e-05, | |
| "loss": 1.4047, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.2453049370764764, | |
| "grad_norm": 0.7434487342834473, | |
| "learning_rate": 2.926577042399173e-05, | |
| "loss": 1.3958, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.248402710551791, | |
| "grad_norm": 0.7530505061149597, | |
| "learning_rate": 2.921406411582213e-05, | |
| "loss": 1.3642, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.2515004840271056, | |
| "grad_norm": 0.79911208152771, | |
| "learning_rate": 2.9162357807652535e-05, | |
| "loss": 1.3506, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.2545982575024202, | |
| "grad_norm": 0.6747287511825562, | |
| "learning_rate": 2.9110651499482937e-05, | |
| "loss": 1.3716, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2576960309777347, | |
| "grad_norm": 0.8054267764091492, | |
| "learning_rate": 2.9058945191313342e-05, | |
| "loss": 1.3587, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.2607938044530493, | |
| "grad_norm": 0.8911522030830383, | |
| "learning_rate": 2.9007238883143744e-05, | |
| "loss": 1.3895, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.263891577928364, | |
| "grad_norm": 0.9144203662872314, | |
| "learning_rate": 2.895553257497415e-05, | |
| "loss": 1.3768, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.2669893514036787, | |
| "grad_norm": 0.7684288024902344, | |
| "learning_rate": 2.890382626680455e-05, | |
| "loss": 1.3694, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.2700871248789931, | |
| "grad_norm": 0.868026077747345, | |
| "learning_rate": 2.8852119958634955e-05, | |
| "loss": 1.3739, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.273184898354308, | |
| "grad_norm": 0.9168595671653748, | |
| "learning_rate": 2.8800413650465357e-05, | |
| "loss": 1.3813, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2762826718296225, | |
| "grad_norm": 0.9085325002670288, | |
| "learning_rate": 2.8748707342295762e-05, | |
| "loss": 1.3727, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.279380445304937, | |
| "grad_norm": 0.8276653289794922, | |
| "learning_rate": 2.8697001034126164e-05, | |
| "loss": 1.3726, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2824782187802517, | |
| "grad_norm": 0.9563087821006775, | |
| "learning_rate": 2.864529472595657e-05, | |
| "loss": 1.3881, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2855759922555663, | |
| "grad_norm": 0.8035735487937927, | |
| "learning_rate": 2.859358841778697e-05, | |
| "loss": 1.3732, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2886737657308809, | |
| "grad_norm": 0.9055673480033875, | |
| "learning_rate": 2.8541882109617372e-05, | |
| "loss": 1.3691, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2917715392061955, | |
| "grad_norm": 0.8407905697822571, | |
| "learning_rate": 2.8490175801447777e-05, | |
| "loss": 1.368, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2948693126815103, | |
| "grad_norm": 0.8415255546569824, | |
| "learning_rate": 2.843846949327818e-05, | |
| "loss": 1.396, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2979670861568247, | |
| "grad_norm": 0.8884280920028687, | |
| "learning_rate": 2.8386763185108584e-05, | |
| "loss": 1.3676, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.3010648596321395, | |
| "grad_norm": 0.7399088740348816, | |
| "learning_rate": 2.8335056876938986e-05, | |
| "loss": 1.3803, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.304162633107454, | |
| "grad_norm": 0.9572098851203918, | |
| "learning_rate": 2.828335056876939e-05, | |
| "loss": 1.365, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.3072604065827687, | |
| "grad_norm": 1.2893517017364502, | |
| "learning_rate": 2.8231644260599793e-05, | |
| "loss": 1.3971, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.3103581800580832, | |
| "grad_norm": 0.9179888963699341, | |
| "learning_rate": 2.8179937952430198e-05, | |
| "loss": 1.3602, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.3134559535333978, | |
| "grad_norm": 0.867713987827301, | |
| "learning_rate": 2.81282316442606e-05, | |
| "loss": 1.3971, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.3165537270087124, | |
| "grad_norm": 0.9152940511703491, | |
| "learning_rate": 2.8076525336091004e-05, | |
| "loss": 1.3714, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.319651500484027, | |
| "grad_norm": 1.19929039478302, | |
| "learning_rate": 2.8024819027921406e-05, | |
| "loss": 1.3897, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.3227492739593418, | |
| "grad_norm": 1.3702645301818848, | |
| "learning_rate": 2.7973112719751808e-05, | |
| "loss": 1.3727, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.3258470474346564, | |
| "grad_norm": 0.8655095100402832, | |
| "learning_rate": 2.7921406411582213e-05, | |
| "loss": 1.377, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.328944820909971, | |
| "grad_norm": 0.9268757104873657, | |
| "learning_rate": 2.7869700103412615e-05, | |
| "loss": 1.3847, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.3320425943852856, | |
| "grad_norm": 0.9466863870620728, | |
| "learning_rate": 2.781799379524302e-05, | |
| "loss": 1.3784, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.3351403678606002, | |
| "grad_norm": 0.8696274757385254, | |
| "learning_rate": 2.776628748707342e-05, | |
| "loss": 1.3832, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.3382381413359148, | |
| "grad_norm": 0.8872708678245544, | |
| "learning_rate": 2.7714581178903826e-05, | |
| "loss": 1.3613, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.3413359148112294, | |
| "grad_norm": 0.9495521187782288, | |
| "learning_rate": 2.7662874870734228e-05, | |
| "loss": 1.3744, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.344433688286544, | |
| "grad_norm": 0.9160442352294922, | |
| "learning_rate": 2.7611168562564633e-05, | |
| "loss": 1.3635, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.3475314617618586, | |
| "grad_norm": 1.015899896621704, | |
| "learning_rate": 2.7559462254395035e-05, | |
| "loss": 1.3958, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.3506292352371734, | |
| "grad_norm": 1.0616685152053833, | |
| "learning_rate": 2.750775594622544e-05, | |
| "loss": 1.3594, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.353727008712488, | |
| "grad_norm": 0.9547629952430725, | |
| "learning_rate": 2.745604963805584e-05, | |
| "loss": 1.3835, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.3568247821878026, | |
| "grad_norm": 0.8538408279418945, | |
| "learning_rate": 2.7404343329886247e-05, | |
| "loss": 1.3441, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.3599225556631171, | |
| "grad_norm": 1.0383230447769165, | |
| "learning_rate": 2.735263702171665e-05, | |
| "loss": 1.3528, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.3630203291384317, | |
| "grad_norm": 1.053682804107666, | |
| "learning_rate": 2.730093071354705e-05, | |
| "loss": 1.3871, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.3661181026137463, | |
| "grad_norm": 0.8979085683822632, | |
| "learning_rate": 2.7249224405377455e-05, | |
| "loss": 1.3763, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.369215876089061, | |
| "grad_norm": 0.8620943427085876, | |
| "learning_rate": 2.7197518097207857e-05, | |
| "loss": 1.3604, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.3723136495643757, | |
| "grad_norm": 0.8336718082427979, | |
| "learning_rate": 2.7145811789038262e-05, | |
| "loss": 1.3479, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.37541142303969, | |
| "grad_norm": 0.9134451150894165, | |
| "learning_rate": 2.7094105480868664e-05, | |
| "loss": 1.3716, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.378509196515005, | |
| "grad_norm": 0.8077151775360107, | |
| "learning_rate": 2.7042399172699072e-05, | |
| "loss": 1.3766, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3816069699903195, | |
| "grad_norm": 1.0257856845855713, | |
| "learning_rate": 2.6990692864529477e-05, | |
| "loss": 1.4035, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.384704743465634, | |
| "grad_norm": 0.9979709386825562, | |
| "learning_rate": 2.693898655635988e-05, | |
| "loss": 1.3616, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.3878025169409487, | |
| "grad_norm": 0.9246943593025208, | |
| "learning_rate": 2.6887280248190284e-05, | |
| "loss": 1.3599, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.3909002904162633, | |
| "grad_norm": 0.9028282165527344, | |
| "learning_rate": 2.6835573940020685e-05, | |
| "loss": 1.3517, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.3939980638915779, | |
| "grad_norm": 1.0988807678222656, | |
| "learning_rate": 2.678386763185109e-05, | |
| "loss": 1.3631, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3970958373668925, | |
| "grad_norm": 0.928338885307312, | |
| "learning_rate": 2.6732161323681492e-05, | |
| "loss": 1.3746, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.4001936108422073, | |
| "grad_norm": 0.8771430253982544, | |
| "learning_rate": 2.6680455015511897e-05, | |
| "loss": 1.3812, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.4032913843175217, | |
| "grad_norm": 0.8561460971832275, | |
| "learning_rate": 2.66287487073423e-05, | |
| "loss": 1.3785, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.4063891577928365, | |
| "grad_norm": 0.808969259262085, | |
| "learning_rate": 2.6577042399172704e-05, | |
| "loss": 1.3753, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.409486931268151, | |
| "grad_norm": 0.935157299041748, | |
| "learning_rate": 2.6525336091003106e-05, | |
| "loss": 1.3408, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.4125847047434656, | |
| "grad_norm": 0.884665310382843, | |
| "learning_rate": 2.6473629782833507e-05, | |
| "loss": 1.3733, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.4156824782187802, | |
| "grad_norm": 0.8393154740333557, | |
| "learning_rate": 2.6421923474663913e-05, | |
| "loss": 1.3707, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.4187802516940948, | |
| "grad_norm": 0.9536909461021423, | |
| "learning_rate": 2.6370217166494314e-05, | |
| "loss": 1.3876, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.4218780251694094, | |
| "grad_norm": 1.0733585357666016, | |
| "learning_rate": 2.631851085832472e-05, | |
| "loss": 1.3852, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.424975798644724, | |
| "grad_norm": 0.9174052476882935, | |
| "learning_rate": 2.626680455015512e-05, | |
| "loss": 1.3534, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.4280735721200388, | |
| "grad_norm": 0.9805439114570618, | |
| "learning_rate": 2.6215098241985526e-05, | |
| "loss": 1.3651, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.4311713455953534, | |
| "grad_norm": 1.0409832000732422, | |
| "learning_rate": 2.6163391933815928e-05, | |
| "loss": 1.3703, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.434269119070668, | |
| "grad_norm": 0.8675093650817871, | |
| "learning_rate": 2.6111685625646333e-05, | |
| "loss": 1.3563, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.4373668925459826, | |
| "grad_norm": 0.9059470295906067, | |
| "learning_rate": 2.6059979317476734e-05, | |
| "loss": 1.3721, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.4404646660212972, | |
| "grad_norm": 1.078581690788269, | |
| "learning_rate": 2.600827300930714e-05, | |
| "loss": 1.3809, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4435624394966118, | |
| "grad_norm": 0.7785590291023254, | |
| "learning_rate": 2.595656670113754e-05, | |
| "loss": 1.365, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.4466602129719264, | |
| "grad_norm": 0.7777726650238037, | |
| "learning_rate": 2.5904860392967943e-05, | |
| "loss": 1.3651, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.449757986447241, | |
| "grad_norm": 1.0098230838775635, | |
| "learning_rate": 2.5853154084798348e-05, | |
| "loss": 1.3776, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.4528557599225556, | |
| "grad_norm": 0.8591383695602417, | |
| "learning_rate": 2.580144777662875e-05, | |
| "loss": 1.3874, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.4559535333978704, | |
| "grad_norm": 0.8739891052246094, | |
| "learning_rate": 2.5749741468459155e-05, | |
| "loss": 1.3532, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.459051306873185, | |
| "grad_norm": 1.0277025699615479, | |
| "learning_rate": 2.5698035160289556e-05, | |
| "loss": 1.3717, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.4621490803484996, | |
| "grad_norm": 0.8807665705680847, | |
| "learning_rate": 2.564632885211996e-05, | |
| "loss": 1.3819, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.4652468538238141, | |
| "grad_norm": 0.8883543610572815, | |
| "learning_rate": 2.5594622543950363e-05, | |
| "loss": 1.3844, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.4683446272991287, | |
| "grad_norm": 0.9567596316337585, | |
| "learning_rate": 2.5542916235780768e-05, | |
| "loss": 1.361, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.4714424007744433, | |
| "grad_norm": 0.8780364990234375, | |
| "learning_rate": 2.549120992761117e-05, | |
| "loss": 1.3509, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.474540174249758, | |
| "grad_norm": 0.9909296631813049, | |
| "learning_rate": 2.5439503619441575e-05, | |
| "loss": 1.3497, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.4776379477250727, | |
| "grad_norm": 0.802102267742157, | |
| "learning_rate": 2.5387797311271977e-05, | |
| "loss": 1.3924, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.480735721200387, | |
| "grad_norm": 0.9961832165718079, | |
| "learning_rate": 2.5336091003102382e-05, | |
| "loss": 1.3684, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.483833494675702, | |
| "grad_norm": 0.8613944053649902, | |
| "learning_rate": 2.5284384694932783e-05, | |
| "loss": 1.4083, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4869312681510165, | |
| "grad_norm": 0.8471227884292603, | |
| "learning_rate": 2.5232678386763185e-05, | |
| "loss": 1.3734, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.490029041626331, | |
| "grad_norm": 1.068398118019104, | |
| "learning_rate": 2.518097207859359e-05, | |
| "loss": 1.358, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4931268151016457, | |
| "grad_norm": 0.8443578481674194, | |
| "learning_rate": 2.5129265770423992e-05, | |
| "loss": 1.3777, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4962245885769603, | |
| "grad_norm": 0.8146198391914368, | |
| "learning_rate": 2.5077559462254397e-05, | |
| "loss": 1.3617, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4993223620522749, | |
| "grad_norm": 0.8507145643234253, | |
| "learning_rate": 2.50258531540848e-05, | |
| "loss": 1.3742, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.5024201355275895, | |
| "grad_norm": 1.1908308267593384, | |
| "learning_rate": 2.4974146845915204e-05, | |
| "loss": 1.3716, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.5055179090029043, | |
| "grad_norm": 0.9929447770118713, | |
| "learning_rate": 2.4922440537745605e-05, | |
| "loss": 1.3774, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.5086156824782186, | |
| "grad_norm": 0.8368676900863647, | |
| "learning_rate": 2.487073422957601e-05, | |
| "loss": 1.3765, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.5117134559535335, | |
| "grad_norm": 1.0263066291809082, | |
| "learning_rate": 2.4819027921406412e-05, | |
| "loss": 1.3573, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.514811229428848, | |
| "grad_norm": 0.8279297947883606, | |
| "learning_rate": 2.4767321613236817e-05, | |
| "loss": 1.3713, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.5179090029041626, | |
| "grad_norm": 1.0560111999511719, | |
| "learning_rate": 2.471561530506722e-05, | |
| "loss": 1.3709, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.5210067763794772, | |
| "grad_norm": 0.9566187262535095, | |
| "learning_rate": 2.466390899689762e-05, | |
| "loss": 1.3839, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.5241045498547918, | |
| "grad_norm": 0.9892044067382812, | |
| "learning_rate": 2.4612202688728026e-05, | |
| "loss": 1.3733, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.5272023233301066, | |
| "grad_norm": 0.8076044321060181, | |
| "learning_rate": 2.4560496380558427e-05, | |
| "loss": 1.3751, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.530300096805421, | |
| "grad_norm": 0.7843705415725708, | |
| "learning_rate": 2.4508790072388832e-05, | |
| "loss": 1.3499, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.5333978702807358, | |
| "grad_norm": 0.8170126676559448, | |
| "learning_rate": 2.4457083764219234e-05, | |
| "loss": 1.3634, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.5364956437560502, | |
| "grad_norm": 0.9551861882209778, | |
| "learning_rate": 2.440537745604964e-05, | |
| "loss": 1.3563, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.539593417231365, | |
| "grad_norm": 0.8271490931510925, | |
| "learning_rate": 2.435367114788004e-05, | |
| "loss": 1.3608, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.5426911907066796, | |
| "grad_norm": 0.9556779861450195, | |
| "learning_rate": 2.4301964839710446e-05, | |
| "loss": 1.3652, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.5457889641819942, | |
| "grad_norm": 0.9999971985816956, | |
| "learning_rate": 2.4250258531540848e-05, | |
| "loss": 1.3429, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.5488867376573088, | |
| "grad_norm": 0.8976193070411682, | |
| "learning_rate": 2.4198552223371253e-05, | |
| "loss": 1.3591, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5519845111326234, | |
| "grad_norm": 0.9682601690292358, | |
| "learning_rate": 2.4146845915201654e-05, | |
| "loss": 1.3683, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.5550822846079382, | |
| "grad_norm": 1.0349007844924927, | |
| "learning_rate": 2.409513960703206e-05, | |
| "loss": 1.3673, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.5581800580832526, | |
| "grad_norm": 0.9338064193725586, | |
| "learning_rate": 2.404343329886246e-05, | |
| "loss": 1.391, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.5612778315585674, | |
| "grad_norm": 0.8901142477989197, | |
| "learning_rate": 2.3991726990692863e-05, | |
| "loss": 1.3488, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.5643756050338817, | |
| "grad_norm": 0.9426191449165344, | |
| "learning_rate": 2.394002068252327e-05, | |
| "loss": 1.3566, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.5674733785091965, | |
| "grad_norm": 0.9889611601829529, | |
| "learning_rate": 2.3888314374353673e-05, | |
| "loss": 1.3834, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.5705711519845111, | |
| "grad_norm": 0.8977054357528687, | |
| "learning_rate": 2.3836608066184078e-05, | |
| "loss": 1.3682, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.5736689254598257, | |
| "grad_norm": 1.0564823150634766, | |
| "learning_rate": 2.378490175801448e-05, | |
| "loss": 1.3758, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.5767666989351403, | |
| "grad_norm": 1.1357567310333252, | |
| "learning_rate": 2.3733195449844885e-05, | |
| "loss": 1.3696, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.579864472410455, | |
| "grad_norm": 0.933795690536499, | |
| "learning_rate": 2.3681489141675287e-05, | |
| "loss": 1.3749, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.5829622458857697, | |
| "grad_norm": 0.8098678588867188, | |
| "learning_rate": 2.3629782833505688e-05, | |
| "loss": 1.3497, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.586060019361084, | |
| "grad_norm": 0.9030234813690186, | |
| "learning_rate": 2.3578076525336093e-05, | |
| "loss": 1.3555, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.589157792836399, | |
| "grad_norm": 0.9926664233207703, | |
| "learning_rate": 2.3526370217166495e-05, | |
| "loss": 1.3732, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.5922555663117135, | |
| "grad_norm": 1.2584630250930786, | |
| "learning_rate": 2.34746639089969e-05, | |
| "loss": 1.3608, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.595353339787028, | |
| "grad_norm": 1.0363199710845947, | |
| "learning_rate": 2.3422957600827302e-05, | |
| "loss": 1.3647, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5984511132623427, | |
| "grad_norm": 1.238027572631836, | |
| "learning_rate": 2.3371251292657707e-05, | |
| "loss": 1.3522, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.6015488867376573, | |
| "grad_norm": 0.6948149800300598, | |
| "learning_rate": 2.331954498448811e-05, | |
| "loss": 1.3697, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.604646660212972, | |
| "grad_norm": 0.8757950663566589, | |
| "learning_rate": 2.3267838676318514e-05, | |
| "loss": 1.3593, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.6077444336882865, | |
| "grad_norm": 1.110155701637268, | |
| "learning_rate": 2.3216132368148915e-05, | |
| "loss": 1.3813, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.6108422071636013, | |
| "grad_norm": 0.8213835954666138, | |
| "learning_rate": 2.316442605997932e-05, | |
| "loss": 1.3739, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.6139399806389156, | |
| "grad_norm": 0.8836016654968262, | |
| "learning_rate": 2.3112719751809722e-05, | |
| "loss": 1.3706, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.6170377541142305, | |
| "grad_norm": 1.0370168685913086, | |
| "learning_rate": 2.3061013443640127e-05, | |
| "loss": 1.3669, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.620135527589545, | |
| "grad_norm": 0.8061625957489014, | |
| "learning_rate": 2.300930713547053e-05, | |
| "loss": 1.3712, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.6232333010648596, | |
| "grad_norm": 0.8144744038581848, | |
| "learning_rate": 2.295760082730093e-05, | |
| "loss": 1.3686, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.6263310745401742, | |
| "grad_norm": 1.1386702060699463, | |
| "learning_rate": 2.2905894519131335e-05, | |
| "loss": 1.3453, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6294288480154888, | |
| "grad_norm": 0.9613929986953735, | |
| "learning_rate": 2.2854188210961737e-05, | |
| "loss": 1.3518, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.6325266214908036, | |
| "grad_norm": 0.7813166975975037, | |
| "learning_rate": 2.2802481902792142e-05, | |
| "loss": 1.3571, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.635624394966118, | |
| "grad_norm": 0.8500548601150513, | |
| "learning_rate": 2.2750775594622544e-05, | |
| "loss": 1.3408, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.6387221684414328, | |
| "grad_norm": 0.8827762603759766, | |
| "learning_rate": 2.269906928645295e-05, | |
| "loss": 1.3491, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.6418199419167472, | |
| "grad_norm": 0.8917422890663147, | |
| "learning_rate": 2.264736297828335e-05, | |
| "loss": 1.3676, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.644917715392062, | |
| "grad_norm": 0.9541721940040588, | |
| "learning_rate": 2.2595656670113756e-05, | |
| "loss": 1.3504, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.6480154888673766, | |
| "grad_norm": 1.1979867219924927, | |
| "learning_rate": 2.2543950361944157e-05, | |
| "loss": 1.3873, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.6511132623426912, | |
| "grad_norm": 0.9107701182365417, | |
| "learning_rate": 2.2492244053774563e-05, | |
| "loss": 1.3822, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.6542110358180058, | |
| "grad_norm": 1.0378977060317993, | |
| "learning_rate": 2.2440537745604964e-05, | |
| "loss": 1.3589, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.6573088092933204, | |
| "grad_norm": 0.9246495962142944, | |
| "learning_rate": 2.2388831437435366e-05, | |
| "loss": 1.338, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6604065827686352, | |
| "grad_norm": 1.034191370010376, | |
| "learning_rate": 2.233712512926577e-05, | |
| "loss": 1.3443, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.6635043562439495, | |
| "grad_norm": 0.812461256980896, | |
| "learning_rate": 2.2285418821096173e-05, | |
| "loss": 1.3717, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.6666021297192644, | |
| "grad_norm": 1.0656987428665161, | |
| "learning_rate": 2.2233712512926578e-05, | |
| "loss": 1.3782, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.669699903194579, | |
| "grad_norm": 0.9221978783607483, | |
| "learning_rate": 2.218200620475698e-05, | |
| "loss": 1.3511, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.6727976766698935, | |
| "grad_norm": 1.0364100933074951, | |
| "learning_rate": 2.2130299896587384e-05, | |
| "loss": 1.3518, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.6758954501452081, | |
| "grad_norm": 1.0864959955215454, | |
| "learning_rate": 2.2078593588417786e-05, | |
| "loss": 1.3379, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.6789932236205227, | |
| "grad_norm": 0.9392344951629639, | |
| "learning_rate": 2.202688728024819e-05, | |
| "loss": 1.3683, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.6820909970958373, | |
| "grad_norm": 0.8275219798088074, | |
| "learning_rate": 2.1975180972078593e-05, | |
| "loss": 1.3516, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.685188770571152, | |
| "grad_norm": 0.9747416377067566, | |
| "learning_rate": 2.1923474663908998e-05, | |
| "loss": 1.3571, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.6882865440464667, | |
| "grad_norm": 0.8325587511062622, | |
| "learning_rate": 2.1871768355739403e-05, | |
| "loss": 1.3732, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.691384317521781, | |
| "grad_norm": 1.1191556453704834, | |
| "learning_rate": 2.1820062047569805e-05, | |
| "loss": 1.3539, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.694482090997096, | |
| "grad_norm": 0.9786492586135864, | |
| "learning_rate": 2.176835573940021e-05, | |
| "loss": 1.3506, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6975798644724105, | |
| "grad_norm": 0.7830746173858643, | |
| "learning_rate": 2.171664943123061e-05, | |
| "loss": 1.3699, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.700677637947725, | |
| "grad_norm": 0.8177460432052612, | |
| "learning_rate": 2.1664943123061017e-05, | |
| "loss": 1.35, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.7037754114230397, | |
| "grad_norm": 0.7574586868286133, | |
| "learning_rate": 2.1613236814891418e-05, | |
| "loss": 1.3609, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.7068731848983543, | |
| "grad_norm": 0.909091055393219, | |
| "learning_rate": 2.1561530506721823e-05, | |
| "loss": 1.3593, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.709970958373669, | |
| "grad_norm": 0.8122137188911438, | |
| "learning_rate": 2.1509824198552225e-05, | |
| "loss": 1.3665, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.7130687318489835, | |
| "grad_norm": 0.7794236540794373, | |
| "learning_rate": 2.145811789038263e-05, | |
| "loss": 1.3435, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.7161665053242983, | |
| "grad_norm": 0.7815309166908264, | |
| "learning_rate": 2.1406411582213032e-05, | |
| "loss": 1.3652, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.7192642787996126, | |
| "grad_norm": 0.791810154914856, | |
| "learning_rate": 2.1354705274043433e-05, | |
| "loss": 1.3294, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.7223620522749274, | |
| "grad_norm": 1.0140234231948853, | |
| "learning_rate": 2.130299896587384e-05, | |
| "loss": 1.3682, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.725459825750242, | |
| "grad_norm": 0.9673962593078613, | |
| "learning_rate": 2.125129265770424e-05, | |
| "loss": 1.3639, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.7285575992255566, | |
| "grad_norm": 0.8091711401939392, | |
| "learning_rate": 2.1199586349534645e-05, | |
| "loss": 1.3487, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.7316553727008712, | |
| "grad_norm": 0.8248768448829651, | |
| "learning_rate": 2.1147880041365047e-05, | |
| "loss": 1.3643, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.7347531461761858, | |
| "grad_norm": 0.9795010089874268, | |
| "learning_rate": 2.1096173733195452e-05, | |
| "loss": 1.3715, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.7378509196515006, | |
| "grad_norm": 0.7902389764785767, | |
| "learning_rate": 2.1044467425025854e-05, | |
| "loss": 1.3501, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.740948693126815, | |
| "grad_norm": 1.280175805091858, | |
| "learning_rate": 2.099276111685626e-05, | |
| "loss": 1.3497, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.7440464666021298, | |
| "grad_norm": 0.9128603339195251, | |
| "learning_rate": 2.094105480868666e-05, | |
| "loss": 1.3568, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.7471442400774442, | |
| "grad_norm": 0.7820084095001221, | |
| "learning_rate": 2.0889348500517066e-05, | |
| "loss": 1.3853, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.750242013552759, | |
| "grad_norm": 0.9994757771492004, | |
| "learning_rate": 2.0837642192347467e-05, | |
| "loss": 1.3501, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.7533397870280736, | |
| "grad_norm": 0.9045569896697998, | |
| "learning_rate": 2.0785935884177872e-05, | |
| "loss": 1.3594, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.7564375605033882, | |
| "grad_norm": 0.8555303812026978, | |
| "learning_rate": 2.0734229576008274e-05, | |
| "loss": 1.3539, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.7595353339787028, | |
| "grad_norm": 1.0530476570129395, | |
| "learning_rate": 2.0682523267838676e-05, | |
| "loss": 1.3504, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.7626331074540174, | |
| "grad_norm": 0.904148519039154, | |
| "learning_rate": 2.063081695966908e-05, | |
| "loss": 1.3544, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.7657308809293322, | |
| "grad_norm": 0.8729182481765747, | |
| "learning_rate": 2.0579110651499482e-05, | |
| "loss": 1.3694, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.7688286544046465, | |
| "grad_norm": 0.8215417265892029, | |
| "learning_rate": 2.0527404343329888e-05, | |
| "loss": 1.3497, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.7719264278799614, | |
| "grad_norm": 0.8960113525390625, | |
| "learning_rate": 2.047569803516029e-05, | |
| "loss": 1.3416, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.775024201355276, | |
| "grad_norm": 0.8761835098266602, | |
| "learning_rate": 2.0423991726990694e-05, | |
| "loss": 1.3485, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.7781219748305905, | |
| "grad_norm": 0.9275888204574585, | |
| "learning_rate": 2.0372285418821096e-05, | |
| "loss": 1.3792, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.7812197483059051, | |
| "grad_norm": 1.0560438632965088, | |
| "learning_rate": 2.03205791106515e-05, | |
| "loss": 1.3543, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7843175217812197, | |
| "grad_norm": 0.8371681571006775, | |
| "learning_rate": 2.0268872802481903e-05, | |
| "loss": 1.3322, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.7874152952565345, | |
| "grad_norm": 1.2260630130767822, | |
| "learning_rate": 2.0217166494312308e-05, | |
| "loss": 1.365, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.790513068731849, | |
| "grad_norm": 0.9227527976036072, | |
| "learning_rate": 2.016546018614271e-05, | |
| "loss": 1.3334, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.7936108422071637, | |
| "grad_norm": 0.8147873878479004, | |
| "learning_rate": 2.011375387797311e-05, | |
| "loss": 1.3524, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.796708615682478, | |
| "grad_norm": 1.107546091079712, | |
| "learning_rate": 2.0062047569803516e-05, | |
| "loss": 1.3383, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.799806389157793, | |
| "grad_norm": 0.9934420585632324, | |
| "learning_rate": 2.0010341261633918e-05, | |
| "loss": 1.3381, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.8029041626331075, | |
| "grad_norm": 0.9304853677749634, | |
| "learning_rate": 1.9958634953464323e-05, | |
| "loss": 1.3344, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.806001936108422, | |
| "grad_norm": 0.9126875996589661, | |
| "learning_rate": 1.9906928645294725e-05, | |
| "loss": 1.3671, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.8090997095837367, | |
| "grad_norm": 1.0258123874664307, | |
| "learning_rate": 1.985522233712513e-05, | |
| "loss": 1.3598, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.8121974830590513, | |
| "grad_norm": 0.8520185947418213, | |
| "learning_rate": 1.980351602895553e-05, | |
| "loss": 1.3646, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.815295256534366, | |
| "grad_norm": 1.0395876169204712, | |
| "learning_rate": 1.9751809720785936e-05, | |
| "loss": 1.3408, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.8183930300096804, | |
| "grad_norm": 0.8687078952789307, | |
| "learning_rate": 1.970010341261634e-05, | |
| "loss": 1.3568, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.8214908034849953, | |
| "grad_norm": 1.1169476509094238, | |
| "learning_rate": 1.9648397104446743e-05, | |
| "loss": 1.3519, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.8245885769603096, | |
| "grad_norm": 0.9429073929786682, | |
| "learning_rate": 1.959669079627715e-05, | |
| "loss": 1.3437, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.8276863504356244, | |
| "grad_norm": 0.9102051258087158, | |
| "learning_rate": 1.954498448810755e-05, | |
| "loss": 1.3629, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.830784123910939, | |
| "grad_norm": 0.8507852554321289, | |
| "learning_rate": 1.9493278179937955e-05, | |
| "loss": 1.3536, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.8338818973862536, | |
| "grad_norm": 0.8034945130348206, | |
| "learning_rate": 1.9441571871768357e-05, | |
| "loss": 1.3421, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.8369796708615682, | |
| "grad_norm": 0.9575487971305847, | |
| "learning_rate": 1.9389865563598762e-05, | |
| "loss": 1.3565, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.8400774443368828, | |
| "grad_norm": 0.8735955953598022, | |
| "learning_rate": 1.9338159255429164e-05, | |
| "loss": 1.3537, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.8431752178121976, | |
| "grad_norm": 0.834586501121521, | |
| "learning_rate": 1.928645294725957e-05, | |
| "loss": 1.3935, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.846272991287512, | |
| "grad_norm": 0.8274103999137878, | |
| "learning_rate": 1.923474663908997e-05, | |
| "loss": 1.347, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.8493707647628268, | |
| "grad_norm": 0.9628223180770874, | |
| "learning_rate": 1.9183040330920375e-05, | |
| "loss": 1.3623, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.8524685382381412, | |
| "grad_norm": 0.8151761293411255, | |
| "learning_rate": 1.9131334022750777e-05, | |
| "loss": 1.3565, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.855566311713456, | |
| "grad_norm": 0.8839893341064453, | |
| "learning_rate": 1.907962771458118e-05, | |
| "loss": 1.3636, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.8586640851887706, | |
| "grad_norm": 0.8234869837760925, | |
| "learning_rate": 1.9027921406411584e-05, | |
| "loss": 1.3188, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8586640851887706, | |
| "eval_loss": 1.341654896736145, | |
| "eval_runtime": 499.7205, | |
| "eval_samples_per_second": 41.343, | |
| "eval_steps_per_second": 5.169, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8617618586640852, | |
| "grad_norm": 0.7479894757270813, | |
| "learning_rate": 1.8976215098241985e-05, | |
| "loss": 1.3527, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.8648596321393998, | |
| "grad_norm": 0.8051818609237671, | |
| "learning_rate": 1.892450879007239e-05, | |
| "loss": 1.339, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.8679574056147144, | |
| "grad_norm": 0.9621079564094543, | |
| "learning_rate": 1.8872802481902792e-05, | |
| "loss": 1.3735, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.8710551790900292, | |
| "grad_norm": 0.9237180352210999, | |
| "learning_rate": 1.8821096173733197e-05, | |
| "loss": 1.3566, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.8741529525653435, | |
| "grad_norm": 1.00318443775177, | |
| "learning_rate": 1.87693898655636e-05, | |
| "loss": 1.3396, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.8772507260406583, | |
| "grad_norm": 0.8000593185424805, | |
| "learning_rate": 1.8717683557394004e-05, | |
| "loss": 1.3669, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.880348499515973, | |
| "grad_norm": 0.827609121799469, | |
| "learning_rate": 1.8665977249224406e-05, | |
| "loss": 1.3355, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.8834462729912875, | |
| "grad_norm": 1.0329563617706299, | |
| "learning_rate": 1.861427094105481e-05, | |
| "loss": 1.3708, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.8865440464666021, | |
| "grad_norm": 1.0466892719268799, | |
| "learning_rate": 1.8562564632885213e-05, | |
| "loss": 1.3493, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.8896418199419167, | |
| "grad_norm": 0.9400922060012817, | |
| "learning_rate": 1.8510858324715614e-05, | |
| "loss": 1.3441, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.8927395934172315, | |
| "grad_norm": 0.9035273194313049, | |
| "learning_rate": 1.845915201654602e-05, | |
| "loss": 1.3534, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.895837366892546, | |
| "grad_norm": 0.8702762126922607, | |
| "learning_rate": 1.840744570837642e-05, | |
| "loss": 1.3616, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.8989351403678607, | |
| "grad_norm": 0.9379782676696777, | |
| "learning_rate": 1.8355739400206826e-05, | |
| "loss": 1.3557, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.902032913843175, | |
| "grad_norm": 1.03324294090271, | |
| "learning_rate": 1.8304033092037228e-05, | |
| "loss": 1.3486, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.90513068731849, | |
| "grad_norm": 0.7788193821907043, | |
| "learning_rate": 1.8252326783867633e-05, | |
| "loss": 1.3658, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.9082284607938045, | |
| "grad_norm": 0.879900336265564, | |
| "learning_rate": 1.8200620475698034e-05, | |
| "loss": 1.3363, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.911326234269119, | |
| "grad_norm": 0.9988526105880737, | |
| "learning_rate": 1.814891416752844e-05, | |
| "loss": 1.3396, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.9144240077444337, | |
| "grad_norm": 1.0158812999725342, | |
| "learning_rate": 1.809720785935884e-05, | |
| "loss": 1.3513, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.9175217812197483, | |
| "grad_norm": 0.8834120035171509, | |
| "learning_rate": 1.8045501551189246e-05, | |
| "loss": 1.3536, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.920619554695063, | |
| "grad_norm": 1.0367848873138428, | |
| "learning_rate": 1.7993795243019648e-05, | |
| "loss": 1.3462, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.9237173281703774, | |
| "grad_norm": 0.9409236311912537, | |
| "learning_rate": 1.7942088934850053e-05, | |
| "loss": 1.3435, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.9268151016456923, | |
| "grad_norm": 0.7982214093208313, | |
| "learning_rate": 1.7890382626680455e-05, | |
| "loss": 1.3319, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.9299128751210066, | |
| "grad_norm": 1.1070462465286255, | |
| "learning_rate": 1.7838676318510856e-05, | |
| "loss": 1.3373, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.9330106485963214, | |
| "grad_norm": 1.0409610271453857, | |
| "learning_rate": 1.778697001034126e-05, | |
| "loss": 1.3551, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.936108422071636, | |
| "grad_norm": 0.9913906455039978, | |
| "learning_rate": 1.7735263702171663e-05, | |
| "loss": 1.3651, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9392061955469506, | |
| "grad_norm": 0.9144983887672424, | |
| "learning_rate": 1.768355739400207e-05, | |
| "loss": 1.3688, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.9423039690222652, | |
| "grad_norm": 1.01792311668396, | |
| "learning_rate": 1.7631851085832473e-05, | |
| "loss": 1.3368, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.9454017424975798, | |
| "grad_norm": 0.8271951675415039, | |
| "learning_rate": 1.758014477766288e-05, | |
| "loss": 1.3565, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.9484995159728946, | |
| "grad_norm": 1.008579134941101, | |
| "learning_rate": 1.752843846949328e-05, | |
| "loss": 1.3498, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.951597289448209, | |
| "grad_norm": 1.0562330484390259, | |
| "learning_rate": 1.7476732161323682e-05, | |
| "loss": 1.3745, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.9546950629235238, | |
| "grad_norm": 0.9627982974052429, | |
| "learning_rate": 1.7425025853154087e-05, | |
| "loss": 1.3569, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.9577928363988384, | |
| "grad_norm": 1.1486949920654297, | |
| "learning_rate": 1.737331954498449e-05, | |
| "loss": 1.3576, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.960890609874153, | |
| "grad_norm": 0.8623875379562378, | |
| "learning_rate": 1.7321613236814894e-05, | |
| "loss": 1.3331, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.9639883833494676, | |
| "grad_norm": 0.8003185987472534, | |
| "learning_rate": 1.7269906928645295e-05, | |
| "loss": 1.3126, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.9670861568247822, | |
| "grad_norm": 0.8993198275566101, | |
| "learning_rate": 1.72182006204757e-05, | |
| "loss": 1.3348, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.9701839303000968, | |
| "grad_norm": 0.7497487664222717, | |
| "learning_rate": 1.7166494312306102e-05, | |
| "loss": 1.3587, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.9732817037754113, | |
| "grad_norm": 0.9242996573448181, | |
| "learning_rate": 1.7114788004136507e-05, | |
| "loss": 1.3526, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.9763794772507262, | |
| "grad_norm": 0.9362899661064148, | |
| "learning_rate": 1.706308169596691e-05, | |
| "loss": 1.3403, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.9794772507260405, | |
| "grad_norm": 0.9214730262756348, | |
| "learning_rate": 1.7011375387797314e-05, | |
| "loss": 1.3561, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.9825750242013553, | |
| "grad_norm": 1.1729867458343506, | |
| "learning_rate": 1.6959669079627716e-05, | |
| "loss": 1.3712, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.98567279767667, | |
| "grad_norm": 0.8579219579696655, | |
| "learning_rate": 1.690796277145812e-05, | |
| "loss": 1.3562, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.9887705711519845, | |
| "grad_norm": 0.819837212562561, | |
| "learning_rate": 1.6856256463288522e-05, | |
| "loss": 1.3196, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.9918683446272991, | |
| "grad_norm": 1.0892577171325684, | |
| "learning_rate": 1.6804550155118924e-05, | |
| "loss": 1.3139, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.9949661181026137, | |
| "grad_norm": 0.9745960831642151, | |
| "learning_rate": 1.675284384694933e-05, | |
| "loss": 1.3474, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.9980638915779285, | |
| "grad_norm": 0.9176591038703918, | |
| "learning_rate": 1.670113753877973e-05, | |
| "loss": 1.3377, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.001161665053243, | |
| "grad_norm": 0.8982537388801575, | |
| "learning_rate": 1.6649431230610136e-05, | |
| "loss": 1.3667, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.0042594385285577, | |
| "grad_norm": 0.8645797371864319, | |
| "learning_rate": 1.6597724922440538e-05, | |
| "loss": 1.3238, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.007357212003872, | |
| "grad_norm": 1.0574814081192017, | |
| "learning_rate": 1.6546018614270943e-05, | |
| "loss": 1.3436, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.010454985479187, | |
| "grad_norm": 0.8636010885238647, | |
| "learning_rate": 1.6494312306101344e-05, | |
| "loss": 1.3321, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.0135527589545013, | |
| "grad_norm": 1.040042519569397, | |
| "learning_rate": 1.644260599793175e-05, | |
| "loss": 1.3475, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.016650532429816, | |
| "grad_norm": 0.7811307907104492, | |
| "learning_rate": 1.639089968976215e-05, | |
| "loss": 1.3386, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.019748305905131, | |
| "grad_norm": 0.9275119304656982, | |
| "learning_rate": 1.6339193381592556e-05, | |
| "loss": 1.338, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.0228460793804452, | |
| "grad_norm": 0.8792182803153992, | |
| "learning_rate": 1.6287487073422958e-05, | |
| "loss": 1.341, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.02594385285576, | |
| "grad_norm": 0.9327546954154968, | |
| "learning_rate": 1.623578076525336e-05, | |
| "loss": 1.325, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.0290416263310744, | |
| "grad_norm": 0.9593343734741211, | |
| "learning_rate": 1.6184074457083765e-05, | |
| "loss": 1.3111, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.0321393998063892, | |
| "grad_norm": 0.8487372994422913, | |
| "learning_rate": 1.6132368148914166e-05, | |
| "loss": 1.3308, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.0352371732817036, | |
| "grad_norm": 1.0663917064666748, | |
| "learning_rate": 1.608066184074457e-05, | |
| "loss": 1.3126, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.0383349467570184, | |
| "grad_norm": 0.9352003931999207, | |
| "learning_rate": 1.6028955532574973e-05, | |
| "loss": 1.3374, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.041432720232333, | |
| "grad_norm": 0.8087659478187561, | |
| "learning_rate": 1.5977249224405378e-05, | |
| "loss": 1.3381, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.0445304937076476, | |
| "grad_norm": 0.8721085786819458, | |
| "learning_rate": 1.592554291623578e-05, | |
| "loss": 1.3492, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.0476282671829624, | |
| "grad_norm": 1.012121319770813, | |
| "learning_rate": 1.5873836608066185e-05, | |
| "loss": 1.3422, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.050726040658277, | |
| "grad_norm": 0.8746726512908936, | |
| "learning_rate": 1.5822130299896586e-05, | |
| "loss": 1.3518, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.0538238141335916, | |
| "grad_norm": 0.9453880786895752, | |
| "learning_rate": 1.577042399172699e-05, | |
| "loss": 1.3224, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.056921587608906, | |
| "grad_norm": 1.383927583694458, | |
| "learning_rate": 1.5718717683557393e-05, | |
| "loss": 1.3334, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.060019361084221, | |
| "grad_norm": 0.8216990232467651, | |
| "learning_rate": 1.56670113753878e-05, | |
| "loss": 1.3386, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.063117134559535, | |
| "grad_norm": 0.8967849612236023, | |
| "learning_rate": 1.5615305067218203e-05, | |
| "loss": 1.3212, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.06621490803485, | |
| "grad_norm": 0.960881233215332, | |
| "learning_rate": 1.5563598759048605e-05, | |
| "loss": 1.3269, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.069312681510165, | |
| "grad_norm": 0.8327577114105225, | |
| "learning_rate": 1.551189245087901e-05, | |
| "loss": 1.3226, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.072410454985479, | |
| "grad_norm": 0.9150763154029846, | |
| "learning_rate": 1.5460186142709412e-05, | |
| "loss": 1.3438, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.075508228460794, | |
| "grad_norm": 0.7916013598442078, | |
| "learning_rate": 1.5408479834539817e-05, | |
| "loss": 1.3305, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.0786060019361083, | |
| "grad_norm": 0.9902190566062927, | |
| "learning_rate": 1.535677352637022e-05, | |
| "loss": 1.3598, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.081703775411423, | |
| "grad_norm": 0.9081457853317261, | |
| "learning_rate": 1.5305067218200624e-05, | |
| "loss": 1.3469, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.0848015488867375, | |
| "grad_norm": 0.9101652503013611, | |
| "learning_rate": 1.5253360910031025e-05, | |
| "loss": 1.3147, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.0878993223620523, | |
| "grad_norm": 0.9023634791374207, | |
| "learning_rate": 1.5201654601861429e-05, | |
| "loss": 1.3674, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.0909970958373667, | |
| "grad_norm": 0.9344819188117981, | |
| "learning_rate": 1.5149948293691832e-05, | |
| "loss": 1.3074, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0940948693126815, | |
| "grad_norm": 0.8912569880485535, | |
| "learning_rate": 1.5098241985522235e-05, | |
| "loss": 1.3398, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.0971926427879963, | |
| "grad_norm": 0.9664559960365295, | |
| "learning_rate": 1.5046535677352639e-05, | |
| "loss": 1.3286, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.1002904162633107, | |
| "grad_norm": 0.8646228909492493, | |
| "learning_rate": 1.4994829369183042e-05, | |
| "loss": 1.346, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.1033881897386255, | |
| "grad_norm": 0.939831554889679, | |
| "learning_rate": 1.4943123061013446e-05, | |
| "loss": 1.3479, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.10648596321394, | |
| "grad_norm": 1.0427614450454712, | |
| "learning_rate": 1.4891416752843847e-05, | |
| "loss": 1.3293, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.1095837366892547, | |
| "grad_norm": 0.8650059700012207, | |
| "learning_rate": 1.483971044467425e-05, | |
| "loss": 1.3368, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.112681510164569, | |
| "grad_norm": 0.8453037142753601, | |
| "learning_rate": 1.4788004136504654e-05, | |
| "loss": 1.3463, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.115779283639884, | |
| "grad_norm": 1.010087490081787, | |
| "learning_rate": 1.4736297828335057e-05, | |
| "loss": 1.3337, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.1188770571151982, | |
| "grad_norm": 0.9835523962974548, | |
| "learning_rate": 1.468459152016546e-05, | |
| "loss": 1.3423, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.121974830590513, | |
| "grad_norm": 0.9542858600616455, | |
| "learning_rate": 1.4632885211995864e-05, | |
| "loss": 1.3606, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.125072604065828, | |
| "grad_norm": 0.9768648147583008, | |
| "learning_rate": 1.4581178903826268e-05, | |
| "loss": 1.3388, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.1281703775411422, | |
| "grad_norm": 0.87848961353302, | |
| "learning_rate": 1.4529472595656671e-05, | |
| "loss": 1.3323, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.131268151016457, | |
| "grad_norm": 0.9899694919586182, | |
| "learning_rate": 1.4477766287487074e-05, | |
| "loss": 1.3188, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.1343659244917714, | |
| "grad_norm": 0.9858341813087463, | |
| "learning_rate": 1.4426059979317478e-05, | |
| "loss": 1.3228, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.1374636979670862, | |
| "grad_norm": 0.9731205105781555, | |
| "learning_rate": 1.4374353671147881e-05, | |
| "loss": 1.3143, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.1405614714424006, | |
| "grad_norm": 0.9824615716934204, | |
| "learning_rate": 1.4322647362978284e-05, | |
| "loss": 1.3515, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.1436592449177154, | |
| "grad_norm": 0.90585857629776, | |
| "learning_rate": 1.4270941054808686e-05, | |
| "loss": 1.3191, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.1467570183930302, | |
| "grad_norm": 1.0936884880065918, | |
| "learning_rate": 1.421923474663909e-05, | |
| "loss": 1.3297, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.1498547918683446, | |
| "grad_norm": 0.9065744280815125, | |
| "learning_rate": 1.4167528438469493e-05, | |
| "loss": 1.3311, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.1529525653436594, | |
| "grad_norm": 0.8460130095481873, | |
| "learning_rate": 1.4115822130299896e-05, | |
| "loss": 1.3343, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.156050338818974, | |
| "grad_norm": 0.7978271842002869, | |
| "learning_rate": 1.40641158221303e-05, | |
| "loss": 1.3213, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.1591481122942886, | |
| "grad_norm": 0.9271676540374756, | |
| "learning_rate": 1.4012409513960703e-05, | |
| "loss": 1.3292, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.162245885769603, | |
| "grad_norm": 0.963083028793335, | |
| "learning_rate": 1.3960703205791106e-05, | |
| "loss": 1.3412, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.165343659244918, | |
| "grad_norm": 0.8550283908843994, | |
| "learning_rate": 1.390899689762151e-05, | |
| "loss": 1.3149, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.168441432720232, | |
| "grad_norm": 1.3056062459945679, | |
| "learning_rate": 1.3857290589451913e-05, | |
| "loss": 1.3165, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.171539206195547, | |
| "grad_norm": 0.825268566608429, | |
| "learning_rate": 1.3805584281282317e-05, | |
| "loss": 1.3398, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.1746369796708613, | |
| "grad_norm": 0.8259047269821167, | |
| "learning_rate": 1.375387797311272e-05, | |
| "loss": 1.3278, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.177734753146176, | |
| "grad_norm": 0.7692115306854248, | |
| "learning_rate": 1.3702171664943123e-05, | |
| "loss": 1.3364, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.180832526621491, | |
| "grad_norm": 0.9740896224975586, | |
| "learning_rate": 1.3650465356773525e-05, | |
| "loss": 1.3327, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.1839303000968053, | |
| "grad_norm": 0.7695585489273071, | |
| "learning_rate": 1.3598759048603928e-05, | |
| "loss": 1.3565, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.18702807357212, | |
| "grad_norm": 0.8049722909927368, | |
| "learning_rate": 1.3547052740434332e-05, | |
| "loss": 1.3434, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.1901258470474345, | |
| "grad_norm": 0.8844389915466309, | |
| "learning_rate": 1.3495346432264739e-05, | |
| "loss": 1.3263, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.1932236205227493, | |
| "grad_norm": 0.7504433393478394, | |
| "learning_rate": 1.3443640124095142e-05, | |
| "loss": 1.3431, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.1963213939980637, | |
| "grad_norm": 0.9303148984909058, | |
| "learning_rate": 1.3391933815925545e-05, | |
| "loss": 1.3294, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.1994191674733785, | |
| "grad_norm": 0.9886261224746704, | |
| "learning_rate": 1.3340227507755949e-05, | |
| "loss": 1.3321, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.2025169409486933, | |
| "grad_norm": 0.8835451006889343, | |
| "learning_rate": 1.3288521199586352e-05, | |
| "loss": 1.321, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.2056147144240077, | |
| "grad_norm": 1.0087958574295044, | |
| "learning_rate": 1.3236814891416754e-05, | |
| "loss": 1.3516, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.2087124878993225, | |
| "grad_norm": 0.8284295201301575, | |
| "learning_rate": 1.3185108583247157e-05, | |
| "loss": 1.321, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.211810261374637, | |
| "grad_norm": 0.8967974781990051, | |
| "learning_rate": 1.313340227507756e-05, | |
| "loss": 1.3416, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.2149080348499517, | |
| "grad_norm": 1.1133179664611816, | |
| "learning_rate": 1.3081695966907964e-05, | |
| "loss": 1.3238, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.218005808325266, | |
| "grad_norm": 0.9074902534484863, | |
| "learning_rate": 1.3029989658738367e-05, | |
| "loss": 1.3198, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.221103581800581, | |
| "grad_norm": 0.8816152215003967, | |
| "learning_rate": 1.297828335056877e-05, | |
| "loss": 1.3507, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.2242013552758952, | |
| "grad_norm": 1.0369545221328735, | |
| "learning_rate": 1.2926577042399174e-05, | |
| "loss": 1.297, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.22729912875121, | |
| "grad_norm": 0.8075978755950928, | |
| "learning_rate": 1.2874870734229577e-05, | |
| "loss": 1.3501, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.230396902226525, | |
| "grad_norm": 1.2508447170257568, | |
| "learning_rate": 1.282316442605998e-05, | |
| "loss": 1.3539, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.2334946757018392, | |
| "grad_norm": 0.9969581365585327, | |
| "learning_rate": 1.2771458117890384e-05, | |
| "loss": 1.3049, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.236592449177154, | |
| "grad_norm": 0.921631932258606, | |
| "learning_rate": 1.2719751809720788e-05, | |
| "loss": 1.3437, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.2396902226524684, | |
| "grad_norm": 1.0279971361160278, | |
| "learning_rate": 1.2668045501551191e-05, | |
| "loss": 1.3413, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.2427879961277832, | |
| "grad_norm": 1.0447874069213867, | |
| "learning_rate": 1.2616339193381593e-05, | |
| "loss": 1.3308, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.2458857696030976, | |
| "grad_norm": 0.843579113483429, | |
| "learning_rate": 1.2564632885211996e-05, | |
| "loss": 1.3133, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.2489835430784124, | |
| "grad_norm": 0.8838000297546387, | |
| "learning_rate": 1.25129265770424e-05, | |
| "loss": 1.3288, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.252081316553727, | |
| "grad_norm": 0.9393033385276794, | |
| "learning_rate": 1.2461220268872803e-05, | |
| "loss": 1.3425, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.2551790900290416, | |
| "grad_norm": 0.8526115417480469, | |
| "learning_rate": 1.2409513960703206e-05, | |
| "loss": 1.3372, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.2582768635043564, | |
| "grad_norm": 0.9398928880691528, | |
| "learning_rate": 1.235780765253361e-05, | |
| "loss": 1.3173, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.261374636979671, | |
| "grad_norm": 1.0209931135177612, | |
| "learning_rate": 1.2306101344364013e-05, | |
| "loss": 1.3368, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.2644724104549856, | |
| "grad_norm": 0.9040766954421997, | |
| "learning_rate": 1.2254395036194416e-05, | |
| "loss": 1.357, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.2675701839303, | |
| "grad_norm": 1.052363395690918, | |
| "learning_rate": 1.220268872802482e-05, | |
| "loss": 1.3252, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.270667957405615, | |
| "grad_norm": 1.0517691373825073, | |
| "learning_rate": 1.2150982419855223e-05, | |
| "loss": 1.333, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.273765730880929, | |
| "grad_norm": 1.0435551404953003, | |
| "learning_rate": 1.2099276111685626e-05, | |
| "loss": 1.3338, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.276863504356244, | |
| "grad_norm": 0.9312208890914917, | |
| "learning_rate": 1.204756980351603e-05, | |
| "loss": 1.318, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.2799612778315588, | |
| "grad_norm": 1.1775990724563599, | |
| "learning_rate": 1.1995863495346431e-05, | |
| "loss": 1.3202, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.283059051306873, | |
| "grad_norm": 1.0058298110961914, | |
| "learning_rate": 1.1944157187176836e-05, | |
| "loss": 1.3165, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.286156824782188, | |
| "grad_norm": 1.1250808238983154, | |
| "learning_rate": 1.189245087900724e-05, | |
| "loss": 1.3298, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.2892545982575023, | |
| "grad_norm": 0.8414492607116699, | |
| "learning_rate": 1.1840744570837643e-05, | |
| "loss": 1.3391, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.292352371732817, | |
| "grad_norm": 0.8035596609115601, | |
| "learning_rate": 1.1789038262668047e-05, | |
| "loss": 1.3327, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.2954501452081315, | |
| "grad_norm": 0.8101987242698669, | |
| "learning_rate": 1.173733195449845e-05, | |
| "loss": 1.3102, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.2985479186834463, | |
| "grad_norm": 0.9012944102287292, | |
| "learning_rate": 1.1685625646328853e-05, | |
| "loss": 1.3534, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.301645692158761, | |
| "grad_norm": 0.7570741176605225, | |
| "learning_rate": 1.1633919338159257e-05, | |
| "loss": 1.317, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.3047434656340755, | |
| "grad_norm": 0.7619568109512329, | |
| "learning_rate": 1.158221302998966e-05, | |
| "loss": 1.3601, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.3078412391093903, | |
| "grad_norm": 0.9099006056785583, | |
| "learning_rate": 1.1530506721820064e-05, | |
| "loss": 1.3239, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.3109390125847047, | |
| "grad_norm": 0.7822088599205017, | |
| "learning_rate": 1.1478800413650465e-05, | |
| "loss": 1.3185, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.3140367860600195, | |
| "grad_norm": 0.862535834312439, | |
| "learning_rate": 1.1427094105480869e-05, | |
| "loss": 1.3329, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.317134559535334, | |
| "grad_norm": 1.3833560943603516, | |
| "learning_rate": 1.1375387797311272e-05, | |
| "loss": 1.3377, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.3202323330106487, | |
| "grad_norm": 0.8927620053291321, | |
| "learning_rate": 1.1323681489141675e-05, | |
| "loss": 1.3084, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.323330106485963, | |
| "grad_norm": 0.8435688018798828, | |
| "learning_rate": 1.1271975180972079e-05, | |
| "loss": 1.3322, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.326427879961278, | |
| "grad_norm": 0.9227290153503418, | |
| "learning_rate": 1.1220268872802482e-05, | |
| "loss": 1.3394, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.3295256534365922, | |
| "grad_norm": 0.8425549268722534, | |
| "learning_rate": 1.1168562564632885e-05, | |
| "loss": 1.3486, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.332623426911907, | |
| "grad_norm": 0.8057267069816589, | |
| "learning_rate": 1.1116856256463289e-05, | |
| "loss": 1.3151, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.335721200387222, | |
| "grad_norm": 0.9685359597206116, | |
| "learning_rate": 1.1065149948293692e-05, | |
| "loss": 1.339, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.3388189738625362, | |
| "grad_norm": 0.9330448508262634, | |
| "learning_rate": 1.1013443640124096e-05, | |
| "loss": 1.3112, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.341916747337851, | |
| "grad_norm": 0.9905188083648682, | |
| "learning_rate": 1.0961737331954499e-05, | |
| "loss": 1.3447, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.3450145208131654, | |
| "grad_norm": 0.9230495691299438, | |
| "learning_rate": 1.0910031023784902e-05, | |
| "loss": 1.3457, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.3481122942884802, | |
| "grad_norm": 0.7517797350883484, | |
| "learning_rate": 1.0858324715615306e-05, | |
| "loss": 1.3053, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.3512100677637946, | |
| "grad_norm": 1.13046395778656, | |
| "learning_rate": 1.0806618407445709e-05, | |
| "loss": 1.3442, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.3543078412391094, | |
| "grad_norm": 0.9535288214683533, | |
| "learning_rate": 1.0754912099276113e-05, | |
| "loss": 1.3437, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.3574056147144242, | |
| "grad_norm": 0.9758418798446655, | |
| "learning_rate": 1.0703205791106516e-05, | |
| "loss": 1.3327, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.3605033881897386, | |
| "grad_norm": 0.8258436322212219, | |
| "learning_rate": 1.065149948293692e-05, | |
| "loss": 1.3508, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.3636011616650534, | |
| "grad_norm": 0.9934467077255249, | |
| "learning_rate": 1.0599793174767323e-05, | |
| "loss": 1.3058, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.3666989351403678, | |
| "grad_norm": 0.8944813013076782, | |
| "learning_rate": 1.0548086866597726e-05, | |
| "loss": 1.3083, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.3697967086156826, | |
| "grad_norm": 0.8731038570404053, | |
| "learning_rate": 1.049638055842813e-05, | |
| "loss": 1.3299, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.372894482090997, | |
| "grad_norm": 0.9087830781936646, | |
| "learning_rate": 1.0444674250258533e-05, | |
| "loss": 1.3284, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.3759922555663118, | |
| "grad_norm": 0.8245522975921631, | |
| "learning_rate": 1.0392967942088936e-05, | |
| "loss": 1.3308, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.379090029041626, | |
| "grad_norm": 0.9423663020133972, | |
| "learning_rate": 1.0341261633919338e-05, | |
| "loss": 1.3403, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.382187802516941, | |
| "grad_norm": 0.9050272107124329, | |
| "learning_rate": 1.0289555325749741e-05, | |
| "loss": 1.3267, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.3852855759922553, | |
| "grad_norm": 0.7859249711036682, | |
| "learning_rate": 1.0237849017580145e-05, | |
| "loss": 1.3241, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.38838334946757, | |
| "grad_norm": 0.8981680274009705, | |
| "learning_rate": 1.0186142709410548e-05, | |
| "loss": 1.3367, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.391481122942885, | |
| "grad_norm": 0.9353106021881104, | |
| "learning_rate": 1.0134436401240951e-05, | |
| "loss": 1.3391, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.3945788964181993, | |
| "grad_norm": 0.9247782826423645, | |
| "learning_rate": 1.0082730093071355e-05, | |
| "loss": 1.3047, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.397676669893514, | |
| "grad_norm": 1.141741156578064, | |
| "learning_rate": 1.0031023784901758e-05, | |
| "loss": 1.3307, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.4007744433688285, | |
| "grad_norm": 0.8088661432266235, | |
| "learning_rate": 9.979317476732161e-06, | |
| "loss": 1.3363, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.4038722168441433, | |
| "grad_norm": 0.8670098185539246, | |
| "learning_rate": 9.927611168562565e-06, | |
| "loss": 1.3425, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.4069699903194577, | |
| "grad_norm": 0.8612157702445984, | |
| "learning_rate": 9.875904860392968e-06, | |
| "loss": 1.3253, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.4100677637947725, | |
| "grad_norm": 0.9053961634635925, | |
| "learning_rate": 9.824198552223372e-06, | |
| "loss": 1.3154, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.4131655372700873, | |
| "grad_norm": 0.84452223777771, | |
| "learning_rate": 9.772492244053775e-06, | |
| "loss": 1.3404, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.4162633107454017, | |
| "grad_norm": 0.839474618434906, | |
| "learning_rate": 9.720785935884178e-06, | |
| "loss": 1.3392, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.4193610842207165, | |
| "grad_norm": 0.8587937355041504, | |
| "learning_rate": 9.669079627714582e-06, | |
| "loss": 1.3269, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.422458857696031, | |
| "grad_norm": 0.781345009803772, | |
| "learning_rate": 9.617373319544985e-06, | |
| "loss": 1.3248, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.4255566311713457, | |
| "grad_norm": 0.775817334651947, | |
| "learning_rate": 9.565667011375389e-06, | |
| "loss": 1.3338, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.42865440464666, | |
| "grad_norm": 0.7844461798667908, | |
| "learning_rate": 9.513960703205792e-06, | |
| "loss": 1.3266, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.431752178121975, | |
| "grad_norm": 0.7972658276557922, | |
| "learning_rate": 9.462254395036195e-06, | |
| "loss": 1.3392, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.4348499515972897, | |
| "grad_norm": 0.850536048412323, | |
| "learning_rate": 9.410548086866599e-06, | |
| "loss": 1.3425, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.437947725072604, | |
| "grad_norm": 1.1697067022323608, | |
| "learning_rate": 9.358841778697002e-06, | |
| "loss": 1.3243, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.441045498547919, | |
| "grad_norm": 0.8385635614395142, | |
| "learning_rate": 9.307135470527405e-06, | |
| "loss": 1.3275, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.4441432720232332, | |
| "grad_norm": 0.8202130794525146, | |
| "learning_rate": 9.255429162357807e-06, | |
| "loss": 1.3032, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.447241045498548, | |
| "grad_norm": 0.8149744272232056, | |
| "learning_rate": 9.20372285418821e-06, | |
| "loss": 1.3303, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.4503388189738624, | |
| "grad_norm": 0.9332587122917175, | |
| "learning_rate": 9.152016546018614e-06, | |
| "loss": 1.3391, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.4534365924491772, | |
| "grad_norm": 1.014574646949768, | |
| "learning_rate": 9.100310237849017e-06, | |
| "loss": 1.3455, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.4565343659244916, | |
| "grad_norm": 0.7690302133560181, | |
| "learning_rate": 9.04860392967942e-06, | |
| "loss": 1.3276, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.4596321393998064, | |
| "grad_norm": 1.0496488809585571, | |
| "learning_rate": 8.996897621509824e-06, | |
| "loss": 1.3287, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.4627299128751208, | |
| "grad_norm": 1.0259231328964233, | |
| "learning_rate": 8.945191313340227e-06, | |
| "loss": 1.3298, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.4658276863504356, | |
| "grad_norm": 0.8106045126914978, | |
| "learning_rate": 8.89348500517063e-06, | |
| "loss": 1.311, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.4689254598257504, | |
| "grad_norm": 0.9428908824920654, | |
| "learning_rate": 8.841778697001036e-06, | |
| "loss": 1.336, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.4720232333010648, | |
| "grad_norm": 0.9283081293106079, | |
| "learning_rate": 8.79007238883144e-06, | |
| "loss": 1.3321, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.4751210067763796, | |
| "grad_norm": 0.97194504737854, | |
| "learning_rate": 8.738366080661841e-06, | |
| "loss": 1.3358, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.478218780251694, | |
| "grad_norm": 0.8776614665985107, | |
| "learning_rate": 8.686659772492244e-06, | |
| "loss": 1.3296, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.478218780251694, | |
| "eval_loss": 1.326022982597351, | |
| "eval_runtime": 499.9577, | |
| "eval_samples_per_second": 41.323, | |
| "eval_steps_per_second": 5.166, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.4813165537270088, | |
| "grad_norm": 0.8625020384788513, | |
| "learning_rate": 8.634953464322648e-06, | |
| "loss": 1.3126, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.484414327202323, | |
| "grad_norm": 0.7639974355697632, | |
| "learning_rate": 8.583247156153051e-06, | |
| "loss": 1.3452, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.487512100677638, | |
| "grad_norm": 0.7709546089172363, | |
| "learning_rate": 8.531540847983454e-06, | |
| "loss": 1.3427, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.4906098741529528, | |
| "grad_norm": 0.7982486486434937, | |
| "learning_rate": 8.479834539813858e-06, | |
| "loss": 1.3243, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.493707647628267, | |
| "grad_norm": 0.9817091226577759, | |
| "learning_rate": 8.428128231644261e-06, | |
| "loss": 1.3376, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.496805421103582, | |
| "grad_norm": 0.7842255234718323, | |
| "learning_rate": 8.376421923474665e-06, | |
| "loss": 1.3327, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.4999031945788963, | |
| "grad_norm": 0.8229419589042664, | |
| "learning_rate": 8.324715615305068e-06, | |
| "loss": 1.3284, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.503000968054211, | |
| "grad_norm": 1.0628559589385986, | |
| "learning_rate": 8.273009307135471e-06, | |
| "loss": 1.3413, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.5060987415295255, | |
| "grad_norm": 0.8059125542640686, | |
| "learning_rate": 8.221302998965875e-06, | |
| "loss": 1.3313, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.5091965150048403, | |
| "grad_norm": 0.8325386047363281, | |
| "learning_rate": 8.169596690796278e-06, | |
| "loss": 1.3221, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.512294288480155, | |
| "grad_norm": 0.9245994091033936, | |
| "learning_rate": 8.11789038262668e-06, | |
| "loss": 1.3133, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.5153920619554695, | |
| "grad_norm": 0.9119100570678711, | |
| "learning_rate": 8.066184074457083e-06, | |
| "loss": 1.3211, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.518489835430784, | |
| "grad_norm": 0.9153457283973694, | |
| "learning_rate": 8.014477766287486e-06, | |
| "loss": 1.3226, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.5215876089060987, | |
| "grad_norm": 0.8128604292869568, | |
| "learning_rate": 7.96277145811789e-06, | |
| "loss": 1.3218, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.5246853823814135, | |
| "grad_norm": 0.8200322985649109, | |
| "learning_rate": 7.911065149948293e-06, | |
| "loss": 1.3358, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.527783155856728, | |
| "grad_norm": 0.9329957365989685, | |
| "learning_rate": 7.859358841778697e-06, | |
| "loss": 1.3175, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.5308809293320427, | |
| "grad_norm": 0.9390591979026794, | |
| "learning_rate": 7.807652533609102e-06, | |
| "loss": 1.3442, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.5339787028073575, | |
| "grad_norm": 0.8232764601707458, | |
| "learning_rate": 7.755946225439505e-06, | |
| "loss": 1.3388, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.537076476282672, | |
| "grad_norm": 0.810404360294342, | |
| "learning_rate": 7.704239917269908e-06, | |
| "loss": 1.3216, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.5401742497579862, | |
| "grad_norm": 0.7799009084701538, | |
| "learning_rate": 7.652533609100312e-06, | |
| "loss": 1.3386, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.543272023233301, | |
| "grad_norm": 0.8875685334205627, | |
| "learning_rate": 7.600827300930714e-06, | |
| "loss": 1.3168, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.546369796708616, | |
| "grad_norm": 0.9037766456604004, | |
| "learning_rate": 7.549120992761118e-06, | |
| "loss": 1.3129, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.54946757018393, | |
| "grad_norm": 0.825951099395752, | |
| "learning_rate": 7.497414684591521e-06, | |
| "loss": 1.306, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.552565343659245, | |
| "grad_norm": 0.9290631413459778, | |
| "learning_rate": 7.445708376421924e-06, | |
| "loss": 1.3159, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.5556631171345594, | |
| "grad_norm": 0.9565717577934265, | |
| "learning_rate": 7.394002068252327e-06, | |
| "loss": 1.3072, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.558760890609874, | |
| "grad_norm": 0.7212813496589661, | |
| "learning_rate": 7.34229576008273e-06, | |
| "loss": 1.3388, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.5618586640851886, | |
| "grad_norm": 0.950728178024292, | |
| "learning_rate": 7.290589451913134e-06, | |
| "loss": 1.3248, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.5649564375605034, | |
| "grad_norm": 0.850387454032898, | |
| "learning_rate": 7.238883143743537e-06, | |
| "loss": 1.3425, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.568054211035818, | |
| "grad_norm": 0.9093496203422546, | |
| "learning_rate": 7.1871768355739405e-06, | |
| "loss": 1.3259, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.5711519845111326, | |
| "grad_norm": 0.8944652676582336, | |
| "learning_rate": 7.135470527404343e-06, | |
| "loss": 1.333, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.5742497579864474, | |
| "grad_norm": 0.7491154670715332, | |
| "learning_rate": 7.0837642192347465e-06, | |
| "loss": 1.3226, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.5773475314617618, | |
| "grad_norm": 0.8667898178100586, | |
| "learning_rate": 7.03205791106515e-06, | |
| "loss": 1.3275, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.5804453049370766, | |
| "grad_norm": 1.0023432970046997, | |
| "learning_rate": 6.980351602895553e-06, | |
| "loss": 1.3132, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.583543078412391, | |
| "grad_norm": 0.8249279856681824, | |
| "learning_rate": 6.928645294725957e-06, | |
| "loss": 1.3388, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.5866408518877058, | |
| "grad_norm": 0.9107469320297241, | |
| "learning_rate": 6.87693898655636e-06, | |
| "loss": 1.3089, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.5897386253630206, | |
| "grad_norm": 0.8998382687568665, | |
| "learning_rate": 6.8252326783867625e-06, | |
| "loss": 1.3584, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.592836398838335, | |
| "grad_norm": 0.9280401468276978, | |
| "learning_rate": 6.773526370217166e-06, | |
| "loss": 1.3507, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.5959341723136493, | |
| "grad_norm": 0.7948800921440125, | |
| "learning_rate": 6.721820062047571e-06, | |
| "loss": 1.3209, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.599031945788964, | |
| "grad_norm": 0.7695499062538147, | |
| "learning_rate": 6.670113753877974e-06, | |
| "loss": 1.3084, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.602129719264279, | |
| "grad_norm": 0.8108364939689636, | |
| "learning_rate": 6.618407445708377e-06, | |
| "loss": 1.3213, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.6052274927395933, | |
| "grad_norm": 0.9111447930335999, | |
| "learning_rate": 6.56670113753878e-06, | |
| "loss": 1.2948, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.608325266214908, | |
| "grad_norm": 1.0574729442596436, | |
| "learning_rate": 6.514994829369184e-06, | |
| "loss": 1.3369, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.6114230396902225, | |
| "grad_norm": 0.8202560544013977, | |
| "learning_rate": 6.463288521199587e-06, | |
| "loss": 1.3296, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.6145208131655373, | |
| "grad_norm": 0.754135251045227, | |
| "learning_rate": 6.41158221302999e-06, | |
| "loss": 1.3332, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.6176185866408517, | |
| "grad_norm": 1.0181078910827637, | |
| "learning_rate": 6.359875904860394e-06, | |
| "loss": 1.3354, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.6207163601161665, | |
| "grad_norm": 0.8614677786827087, | |
| "learning_rate": 6.308169596690796e-06, | |
| "loss": 1.3197, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.6238141335914813, | |
| "grad_norm": 0.8117063641548157, | |
| "learning_rate": 6.2564632885212e-06, | |
| "loss": 1.3158, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.6269119070667957, | |
| "grad_norm": 0.9908379912376404, | |
| "learning_rate": 6.204756980351603e-06, | |
| "loss": 1.3188, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.6300096805421105, | |
| "grad_norm": 0.9882792830467224, | |
| "learning_rate": 6.153050672182006e-06, | |
| "loss": 1.3113, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.633107454017425, | |
| "grad_norm": 0.7588277459144592, | |
| "learning_rate": 6.10134436401241e-06, | |
| "loss": 1.316, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.6362052274927397, | |
| "grad_norm": 0.8949116468429565, | |
| "learning_rate": 6.049638055842813e-06, | |
| "loss": 1.3356, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.639303000968054, | |
| "grad_norm": 0.8811196088790894, | |
| "learning_rate": 5.997931747673216e-06, | |
| "loss": 1.3548, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.642400774443369, | |
| "grad_norm": 0.8543995022773743, | |
| "learning_rate": 5.94622543950362e-06, | |
| "loss": 1.3364, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.6454985479186837, | |
| "grad_norm": 0.7959784865379333, | |
| "learning_rate": 5.894519131334023e-06, | |
| "loss": 1.3041, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.648596321393998, | |
| "grad_norm": 0.7849721908569336, | |
| "learning_rate": 5.842812823164427e-06, | |
| "loss": 1.3543, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.651694094869313, | |
| "grad_norm": 0.7859067916870117, | |
| "learning_rate": 5.79110651499483e-06, | |
| "loss": 1.311, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.654791868344627, | |
| "grad_norm": 0.9818124771118164, | |
| "learning_rate": 5.739400206825233e-06, | |
| "loss": 1.3261, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.657889641819942, | |
| "grad_norm": 0.8855445981025696, | |
| "learning_rate": 5.687693898655636e-06, | |
| "loss": 1.3115, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.6609874152952564, | |
| "grad_norm": 0.8744826316833496, | |
| "learning_rate": 5.635987590486039e-06, | |
| "loss": 1.3077, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.664085188770571, | |
| "grad_norm": 0.8999655246734619, | |
| "learning_rate": 5.584281282316443e-06, | |
| "loss": 1.3253, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.667182962245886, | |
| "grad_norm": 0.8045452833175659, | |
| "learning_rate": 5.532574974146846e-06, | |
| "loss": 1.3129, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.6702807357212004, | |
| "grad_norm": 0.8236184120178223, | |
| "learning_rate": 5.4808686659772495e-06, | |
| "loss": 1.3116, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.6733785091965148, | |
| "grad_norm": 0.8479505777359009, | |
| "learning_rate": 5.429162357807653e-06, | |
| "loss": 1.3516, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.6764762826718296, | |
| "grad_norm": 0.9431778192520142, | |
| "learning_rate": 5.377456049638056e-06, | |
| "loss": 1.3424, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.6795740561471444, | |
| "grad_norm": 0.9968345165252686, | |
| "learning_rate": 5.32574974146846e-06, | |
| "loss": 1.3224, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.6826718296224588, | |
| "grad_norm": 0.9029082655906677, | |
| "learning_rate": 5.274043433298863e-06, | |
| "loss": 1.3157, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.6857696030977736, | |
| "grad_norm": 1.0283854007720947, | |
| "learning_rate": 5.222337125129266e-06, | |
| "loss": 1.3191, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.688867376573088, | |
| "grad_norm": 0.7591708898544312, | |
| "learning_rate": 5.170630816959669e-06, | |
| "loss": 1.3148, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.6919651500484028, | |
| "grad_norm": 0.8233999609947205, | |
| "learning_rate": 5.118924508790072e-06, | |
| "loss": 1.323, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.695062923523717, | |
| "grad_norm": 0.82486891746521, | |
| "learning_rate": 5.067218200620476e-06, | |
| "loss": 1.3123, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.698160696999032, | |
| "grad_norm": 0.9283408522605896, | |
| "learning_rate": 5.015511892450879e-06, | |
| "loss": 1.3116, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.7012584704743468, | |
| "grad_norm": 0.8742640614509583, | |
| "learning_rate": 4.9638055842812824e-06, | |
| "loss": 1.3175, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.704356243949661, | |
| "grad_norm": 0.8373504877090454, | |
| "learning_rate": 4.912099276111686e-06, | |
| "loss": 1.3284, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.707454017424976, | |
| "grad_norm": 0.9012168645858765, | |
| "learning_rate": 4.860392967942089e-06, | |
| "loss": 1.3171, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.7105517909002903, | |
| "grad_norm": 0.9048004746437073, | |
| "learning_rate": 4.8086866597724926e-06, | |
| "loss": 1.3148, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.713649564375605, | |
| "grad_norm": 0.8509101271629333, | |
| "learning_rate": 4.756980351602896e-06, | |
| "loss": 1.3238, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.7167473378509195, | |
| "grad_norm": 0.8519226908683777, | |
| "learning_rate": 4.705274043433299e-06, | |
| "loss": 1.3065, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.7198451113262343, | |
| "grad_norm": 0.869109034538269, | |
| "learning_rate": 4.653567735263703e-06, | |
| "loss": 1.3211, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.722942884801549, | |
| "grad_norm": 0.9159611463546753, | |
| "learning_rate": 4.601861427094105e-06, | |
| "loss": 1.334, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.7260406582768635, | |
| "grad_norm": 1.0725860595703125, | |
| "learning_rate": 4.550155118924509e-06, | |
| "loss": 1.33, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.729138431752178, | |
| "grad_norm": 1.0192230939865112, | |
| "learning_rate": 4.498448810754912e-06, | |
| "loss": 1.3405, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.7322362052274927, | |
| "grad_norm": 0.7872644066810608, | |
| "learning_rate": 4.446742502585315e-06, | |
| "loss": 1.3092, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.7353339787028075, | |
| "grad_norm": 0.8949226140975952, | |
| "learning_rate": 4.39503619441572e-06, | |
| "loss": 1.339, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.738431752178122, | |
| "grad_norm": 0.9832562208175659, | |
| "learning_rate": 4.343329886246122e-06, | |
| "loss": 1.2972, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.7415295256534367, | |
| "grad_norm": 1.0453011989593506, | |
| "learning_rate": 4.2916235780765255e-06, | |
| "loss": 1.3298, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.7446272991287515, | |
| "grad_norm": 0.9331648945808411, | |
| "learning_rate": 4.239917269906929e-06, | |
| "loss": 1.3314, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.747725072604066, | |
| "grad_norm": 0.7941224575042725, | |
| "learning_rate": 4.188210961737332e-06, | |
| "loss": 1.3243, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.75082284607938, | |
| "grad_norm": 0.7843746542930603, | |
| "learning_rate": 4.136504653567736e-06, | |
| "loss": 1.3183, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.753920619554695, | |
| "grad_norm": 0.8948341608047485, | |
| "learning_rate": 4.084798345398139e-06, | |
| "loss": 1.3386, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.75701839303001, | |
| "grad_norm": 0.8391401171684265, | |
| "learning_rate": 4.0330920372285416e-06, | |
| "loss": 1.3312, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.760116166505324, | |
| "grad_norm": 0.7821982502937317, | |
| "learning_rate": 3.981385729058945e-06, | |
| "loss": 1.3155, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.763213939980639, | |
| "grad_norm": 0.8525044322013855, | |
| "learning_rate": 3.929679420889348e-06, | |
| "loss": 1.3249, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.7663117134559534, | |
| "grad_norm": 0.8398758172988892, | |
| "learning_rate": 3.8779731127197525e-06, | |
| "loss": 1.3264, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.769409486931268, | |
| "grad_norm": 0.9518385529518127, | |
| "learning_rate": 3.826266804550156e-06, | |
| "loss": 1.3416, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.7725072604065826, | |
| "grad_norm": 0.7718288898468018, | |
| "learning_rate": 3.774560496380559e-06, | |
| "loss": 1.3353, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.7756050338818974, | |
| "grad_norm": 0.8064902424812317, | |
| "learning_rate": 3.722854188210962e-06, | |
| "loss": 1.3179, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.778702807357212, | |
| "grad_norm": 0.9301968216896057, | |
| "learning_rate": 3.671147880041365e-06, | |
| "loss": 1.3467, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.7818005808325266, | |
| "grad_norm": 0.8251471519470215, | |
| "learning_rate": 3.6194415718717686e-06, | |
| "loss": 1.3189, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.7848983543078414, | |
| "grad_norm": 0.9618167877197266, | |
| "learning_rate": 3.5677352637021715e-06, | |
| "loss": 1.3134, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.7879961277831558, | |
| "grad_norm": 0.8314012885093689, | |
| "learning_rate": 3.516028955532575e-06, | |
| "loss": 1.3318, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.7910939012584706, | |
| "grad_norm": 0.7383018136024475, | |
| "learning_rate": 3.4643226473629783e-06, | |
| "loss": 1.3161, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.794191674733785, | |
| "grad_norm": 0.7468191385269165, | |
| "learning_rate": 3.4126163391933813e-06, | |
| "loss": 1.3165, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.7972894482090997, | |
| "grad_norm": 0.9003493785858154, | |
| "learning_rate": 3.3609100310237855e-06, | |
| "loss": 1.3431, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.8003872216844146, | |
| "grad_norm": 0.9882494807243347, | |
| "learning_rate": 3.3092037228541884e-06, | |
| "loss": 1.347, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.803484995159729, | |
| "grad_norm": 0.8110159039497375, | |
| "learning_rate": 3.257497414684592e-06, | |
| "loss": 1.3262, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.8065827686350433, | |
| "grad_norm": 1.0101655721664429, | |
| "learning_rate": 3.205791106514995e-06, | |
| "loss": 1.3372, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.809680542110358, | |
| "grad_norm": 0.8719637393951416, | |
| "learning_rate": 3.154084798345398e-06, | |
| "loss": 1.3051, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.812778315585673, | |
| "grad_norm": 0.868802011013031, | |
| "learning_rate": 3.1023784901758015e-06, | |
| "loss": 1.3299, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.8158760890609873, | |
| "grad_norm": 0.832973301410675, | |
| "learning_rate": 3.050672182006205e-06, | |
| "loss": 1.3182, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.818973862536302, | |
| "grad_norm": 0.8184823393821716, | |
| "learning_rate": 2.998965873836608e-06, | |
| "loss": 1.3083, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.822071636011617, | |
| "grad_norm": 0.909972071647644, | |
| "learning_rate": 2.9472595656670117e-06, | |
| "loss": 1.3224, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.8251694094869313, | |
| "grad_norm": 0.7917800545692444, | |
| "learning_rate": 2.895553257497415e-06, | |
| "loss": 1.325, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.8282671829622457, | |
| "grad_norm": 0.847621738910675, | |
| "learning_rate": 2.843846949327818e-06, | |
| "loss": 1.2891, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.8313649564375605, | |
| "grad_norm": 0.8181946873664856, | |
| "learning_rate": 2.7921406411582214e-06, | |
| "loss": 1.3118, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.8344627299128753, | |
| "grad_norm": 0.8904102444648743, | |
| "learning_rate": 2.7404343329886247e-06, | |
| "loss": 1.323, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.8375605033881897, | |
| "grad_norm": 0.9051535129547119, | |
| "learning_rate": 2.688728024819028e-06, | |
| "loss": 1.3313, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.8406582768635045, | |
| "grad_norm": 0.9703993201255798, | |
| "learning_rate": 2.6370217166494315e-06, | |
| "loss": 1.3269, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.843756050338819, | |
| "grad_norm": 0.9004696011543274, | |
| "learning_rate": 2.5853154084798345e-06, | |
| "loss": 1.3368, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.8468538238141337, | |
| "grad_norm": 0.8330061435699463, | |
| "learning_rate": 2.533609100310238e-06, | |
| "loss": 1.3121, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.849951597289448, | |
| "grad_norm": 0.8227541446685791, | |
| "learning_rate": 2.4819027921406412e-06, | |
| "loss": 1.3146, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.853049370764763, | |
| "grad_norm": 0.945093035697937, | |
| "learning_rate": 2.4301964839710446e-06, | |
| "loss": 1.3237, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.8561471442400777, | |
| "grad_norm": 0.7945578694343567, | |
| "learning_rate": 2.378490175801448e-06, | |
| "loss": 1.3121, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.859244917715392, | |
| "grad_norm": 0.7976880669593811, | |
| "learning_rate": 2.3267838676318514e-06, | |
| "loss": 1.3276, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.862342691190707, | |
| "grad_norm": 0.8588410019874573, | |
| "learning_rate": 2.2750775594622543e-06, | |
| "loss": 1.3509, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.865440464666021, | |
| "grad_norm": 0.8236850500106812, | |
| "learning_rate": 2.2233712512926577e-06, | |
| "loss": 1.3435, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.868538238141336, | |
| "grad_norm": 1.0473836660385132, | |
| "learning_rate": 2.171664943123061e-06, | |
| "loss": 1.3452, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.8716360116166504, | |
| "grad_norm": 0.8921092748641968, | |
| "learning_rate": 2.1199586349534644e-06, | |
| "loss": 1.3129, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.874733785091965, | |
| "grad_norm": 0.9309847950935364, | |
| "learning_rate": 2.068252326783868e-06, | |
| "loss": 1.3445, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.87783155856728, | |
| "grad_norm": 0.8526076078414917, | |
| "learning_rate": 2.0165460186142708e-06, | |
| "loss": 1.3123, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.8809293320425944, | |
| "grad_norm": 0.8316435813903809, | |
| "learning_rate": 1.964839710444674e-06, | |
| "loss": 1.3425, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.8840271055179088, | |
| "grad_norm": 0.7766702771186829, | |
| "learning_rate": 1.913133402275078e-06, | |
| "loss": 1.3024, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.8871248789932236, | |
| "grad_norm": 0.7230123281478882, | |
| "learning_rate": 1.861427094105481e-06, | |
| "loss": 1.3261, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.8902226524685384, | |
| "grad_norm": 0.9641100168228149, | |
| "learning_rate": 1.8097207859358843e-06, | |
| "loss": 1.3273, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.8933204259438527, | |
| "grad_norm": 0.9102080464363098, | |
| "learning_rate": 1.7580144777662875e-06, | |
| "loss": 1.3343, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.8964181994191676, | |
| "grad_norm": 0.8651391267776489, | |
| "learning_rate": 1.7063081695966906e-06, | |
| "loss": 1.3075, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.899515972894482, | |
| "grad_norm": 0.7797631025314331, | |
| "learning_rate": 1.6546018614270942e-06, | |
| "loss": 1.3214, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.9026137463697967, | |
| "grad_norm": 0.7178356051445007, | |
| "learning_rate": 1.6028955532574976e-06, | |
| "loss": 1.3413, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.905711519845111, | |
| "grad_norm": 0.7826308608055115, | |
| "learning_rate": 1.5511892450879008e-06, | |
| "loss": 1.3227, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.908809293320426, | |
| "grad_norm": 0.7831746339797974, | |
| "learning_rate": 1.499482936918304e-06, | |
| "loss": 1.3056, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.9119070667957407, | |
| "grad_norm": 0.9170383214950562, | |
| "learning_rate": 1.4477766287487075e-06, | |
| "loss": 1.2875, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.915004840271055, | |
| "grad_norm": 0.8851009607315063, | |
| "learning_rate": 1.3960703205791107e-06, | |
| "loss": 1.3318, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.91810261374637, | |
| "grad_norm": 0.8773500323295593, | |
| "learning_rate": 1.344364012409514e-06, | |
| "loss": 1.3284, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.9212003872216843, | |
| "grad_norm": 0.8349604606628418, | |
| "learning_rate": 1.2926577042399172e-06, | |
| "loss": 1.3202, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.924298160696999, | |
| "grad_norm": 0.7429217100143433, | |
| "learning_rate": 1.2409513960703206e-06, | |
| "loss": 1.2914, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.9273959341723135, | |
| "grad_norm": 0.8590971231460571, | |
| "learning_rate": 1.189245087900724e-06, | |
| "loss": 1.2972, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.9304937076476283, | |
| "grad_norm": 0.7867938280105591, | |
| "learning_rate": 1.1375387797311272e-06, | |
| "loss": 1.3253, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.933591481122943, | |
| "grad_norm": 0.8447002172470093, | |
| "learning_rate": 1.0858324715615305e-06, | |
| "loss": 1.3536, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.9366892545982575, | |
| "grad_norm": 0.778304934501648, | |
| "learning_rate": 1.034126163391934e-06, | |
| "loss": 1.3141, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.9397870280735723, | |
| "grad_norm": 0.9816215634346008, | |
| "learning_rate": 9.82419855222337e-07, | |
| "loss": 1.3013, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.9428848015488867, | |
| "grad_norm": 0.7762349843978882, | |
| "learning_rate": 9.307135470527405e-07, | |
| "loss": 1.319, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.9459825750242015, | |
| "grad_norm": 0.807734489440918, | |
| "learning_rate": 8.790072388831437e-07, | |
| "loss": 1.2891, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.949080348499516, | |
| "grad_norm": 0.9189450144767761, | |
| "learning_rate": 8.273009307135471e-07, | |
| "loss": 1.314, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.9521781219748306, | |
| "grad_norm": 0.8544689416885376, | |
| "learning_rate": 7.755946225439504e-07, | |
| "loss": 1.3229, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.9552758954501455, | |
| "grad_norm": 0.7630512714385986, | |
| "learning_rate": 7.238883143743538e-07, | |
| "loss": 1.305, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.95837366892546, | |
| "grad_norm": 0.770119845867157, | |
| "learning_rate": 6.72182006204757e-07, | |
| "loss": 1.3178, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.961471442400774, | |
| "grad_norm": 0.7441233396530151, | |
| "learning_rate": 6.204756980351603e-07, | |
| "loss": 1.3228, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.964569215876089, | |
| "grad_norm": 0.849139392375946, | |
| "learning_rate": 5.687693898655636e-07, | |
| "loss": 1.322, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.967666989351404, | |
| "grad_norm": 0.976645290851593, | |
| "learning_rate": 5.17063081695967e-07, | |
| "loss": 1.3295, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.970764762826718, | |
| "grad_norm": 0.8053218722343445, | |
| "learning_rate": 4.6535677352637023e-07, | |
| "loss": 1.3309, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.973862536302033, | |
| "grad_norm": 0.824623167514801, | |
| "learning_rate": 4.1365046535677355e-07, | |
| "loss": 1.3316, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.9769603097773474, | |
| "grad_norm": 0.8549762964248657, | |
| "learning_rate": 3.619441571871769e-07, | |
| "loss": 1.3427, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.980058083252662, | |
| "grad_norm": 0.8293663859367371, | |
| "learning_rate": 3.1023784901758015e-07, | |
| "loss": 1.321, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.9831558567279766, | |
| "grad_norm": 0.810080885887146, | |
| "learning_rate": 2.585315408479835e-07, | |
| "loss": 1.3027, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.9862536302032914, | |
| "grad_norm": 0.863522469997406, | |
| "learning_rate": 2.0682523267838678e-07, | |
| "loss": 1.3436, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.989351403678606, | |
| "grad_norm": 0.9304227828979492, | |
| "learning_rate": 1.5511892450879008e-07, | |
| "loss": 1.3263, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.9924491771539206, | |
| "grad_norm": 0.8415968418121338, | |
| "learning_rate": 1.0341261633919339e-07, | |
| "loss": 1.309, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.9955469506292354, | |
| "grad_norm": 0.9068368673324585, | |
| "learning_rate": 5.1706308169596694e-08, | |
| "loss": 1.3331, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.9970958373668926, | |
| "step": 1935, | |
| "total_flos": 9.200823520827802e+17, | |
| "train_loss": 1.443482890055161, | |
| "train_runtime": 51287.5168, | |
| "train_samples_per_second": 9.668, | |
| "train_steps_per_second": 0.038 | |
| }, | |
| { | |
| "epoch": 2.9970958373668926, | |
| "eval_loss": 1.3220494985580444, | |
| "eval_runtime": 499.8645, | |
| "eval_samples_per_second": 41.331, | |
| "eval_steps_per_second": 5.167, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 2.9970958373668926, | |
| "eval_loss": 1.3218390941619873, | |
| "eval_runtime": 500.7224, | |
| "eval_samples_per_second": 41.26, | |
| "eval_steps_per_second": 5.159, | |
| "step": 1935 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1935, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.200823520827802e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |