| { | |
| "best_metric": 0.03919154778122902, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.378016085790884, | |
| "eval_steps": 10000, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008378016085790885, | |
| "grad_norm": 0.1641591489315033, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2693, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01675603217158177, | |
| "grad_norm": 0.12203536182641983, | |
| "learning_rate": 1.99832299178266e-05, | |
| "loss": 0.1092, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.025134048257372654, | |
| "grad_norm": 0.1460294872522354, | |
| "learning_rate": 1.9966459835653196e-05, | |
| "loss": 0.0971, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03351206434316354, | |
| "grad_norm": 0.18384236097335815, | |
| "learning_rate": 1.9949689753479794e-05, | |
| "loss": 0.0891, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.041890080428954424, | |
| "grad_norm": 0.14532588422298431, | |
| "learning_rate": 1.993291967130639e-05, | |
| "loss": 0.0825, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05026809651474531, | |
| "grad_norm": 0.1657487004995346, | |
| "learning_rate": 1.991614958913299e-05, | |
| "loss": 0.085, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05864611260053619, | |
| "grad_norm": 0.16286590695381165, | |
| "learning_rate": 1.9899379506959587e-05, | |
| "loss": 0.074, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06702412868632708, | |
| "grad_norm": 0.17318418622016907, | |
| "learning_rate": 1.9882609424786182e-05, | |
| "loss": 0.0723, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07540214477211796, | |
| "grad_norm": 0.20146086812019348, | |
| "learning_rate": 1.986583934261278e-05, | |
| "loss": 0.0726, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08378016085790885, | |
| "grad_norm": 0.25428423285484314, | |
| "learning_rate": 1.9849069260439376e-05, | |
| "loss": 0.0745, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09215817694369974, | |
| "grad_norm": 0.25297069549560547, | |
| "learning_rate": 1.9832299178265975e-05, | |
| "loss": 0.0686, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.10053619302949061, | |
| "grad_norm": 0.24197077751159668, | |
| "learning_rate": 1.981552909609257e-05, | |
| "loss": 0.0689, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1089142091152815, | |
| "grad_norm": 0.2025458812713623, | |
| "learning_rate": 1.979875901391917e-05, | |
| "loss": 0.064, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11729222520107238, | |
| "grad_norm": 0.2370821088552475, | |
| "learning_rate": 1.9781988931745768e-05, | |
| "loss": 0.0661, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.12567024128686327, | |
| "grad_norm": 0.23466931283473969, | |
| "learning_rate": 1.9765218849572367e-05, | |
| "loss": 0.0628, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13404825737265416, | |
| "grad_norm": 0.27670082449913025, | |
| "learning_rate": 1.9748448767398962e-05, | |
| "loss": 0.0608, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.14242627345844505, | |
| "grad_norm": 0.25532266497612, | |
| "learning_rate": 1.973167868522556e-05, | |
| "loss": 0.0578, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.15080428954423591, | |
| "grad_norm": 0.2733491063117981, | |
| "learning_rate": 1.9714908603052156e-05, | |
| "loss": 0.0586, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1591823056300268, | |
| "grad_norm": 0.31975990533828735, | |
| "learning_rate": 1.9698138520878755e-05, | |
| "loss": 0.063, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1675603217158177, | |
| "grad_norm": 0.2980721592903137, | |
| "learning_rate": 1.968136843870535e-05, | |
| "loss": 0.0615, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.17593833780160859, | |
| "grad_norm": 0.2662040591239929, | |
| "learning_rate": 1.966459835653195e-05, | |
| "loss": 0.0573, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.18431635388739948, | |
| "grad_norm": 0.24934068322181702, | |
| "learning_rate": 1.9647828274358547e-05, | |
| "loss": 0.0578, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.19269436997319034, | |
| "grad_norm": 0.35513001680374146, | |
| "learning_rate": 1.9631058192185143e-05, | |
| "loss": 0.0567, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.20107238605898123, | |
| "grad_norm": 0.2941363453865051, | |
| "learning_rate": 1.961428811001174e-05, | |
| "loss": 0.0613, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.20945040214477212, | |
| "grad_norm": 0.2334873378276825, | |
| "learning_rate": 1.9597518027838337e-05, | |
| "loss": 0.0564, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.217828418230563, | |
| "grad_norm": 0.3162507116794586, | |
| "learning_rate": 1.9580747945664935e-05, | |
| "loss": 0.0546, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2262064343163539, | |
| "grad_norm": 0.3287353217601776, | |
| "learning_rate": 1.956397786349153e-05, | |
| "loss": 0.0561, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.23458445040214476, | |
| "grad_norm": 0.34116727113723755, | |
| "learning_rate": 1.954720778131813e-05, | |
| "loss": 0.0541, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.24296246648793565, | |
| "grad_norm": 0.2549584209918976, | |
| "learning_rate": 1.9530437699144725e-05, | |
| "loss": 0.0512, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.25134048257372654, | |
| "grad_norm": 0.2564306855201721, | |
| "learning_rate": 1.9513667616971327e-05, | |
| "loss": 0.053, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2597184986595174, | |
| "grad_norm": 0.39897748827934265, | |
| "learning_rate": 1.9496897534797922e-05, | |
| "loss": 0.0499, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.2680965147453083, | |
| "grad_norm": 0.3399379849433899, | |
| "learning_rate": 1.948012745262452e-05, | |
| "loss": 0.0527, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2764745308310992, | |
| "grad_norm": 0.3706755042076111, | |
| "learning_rate": 1.9463357370451116e-05, | |
| "loss": 0.0522, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2848525469168901, | |
| "grad_norm": 0.3208563029766083, | |
| "learning_rate": 1.9446587288277715e-05, | |
| "loss": 0.0502, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.29323056300268097, | |
| "grad_norm": 0.30643364787101746, | |
| "learning_rate": 1.942981720610431e-05, | |
| "loss": 0.0517, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.30160857908847183, | |
| "grad_norm": 0.28462880849838257, | |
| "learning_rate": 1.941304712393091e-05, | |
| "loss": 0.0483, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.30998659517426275, | |
| "grad_norm": 0.46007809042930603, | |
| "learning_rate": 1.9396277041757504e-05, | |
| "loss": 0.0534, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3183646112600536, | |
| "grad_norm": 0.27532362937927246, | |
| "learning_rate": 1.9379506959584103e-05, | |
| "loss": 0.049, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3267426273458445, | |
| "grad_norm": 0.3934316337108612, | |
| "learning_rate": 1.93627368774107e-05, | |
| "loss": 0.0504, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3351206434316354, | |
| "grad_norm": 0.38043123483657837, | |
| "learning_rate": 1.93459667952373e-05, | |
| "loss": 0.0474, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.34349865951742625, | |
| "grad_norm": 0.33170923590660095, | |
| "learning_rate": 1.9329196713063896e-05, | |
| "loss": 0.0474, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.35187667560321717, | |
| "grad_norm": 0.34464696049690247, | |
| "learning_rate": 1.9312426630890494e-05, | |
| "loss": 0.0494, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.36025469168900803, | |
| "grad_norm": 0.33678779006004333, | |
| "learning_rate": 1.929565654871709e-05, | |
| "loss": 0.0465, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.36863270777479895, | |
| "grad_norm": 0.34634217619895935, | |
| "learning_rate": 1.927888646654369e-05, | |
| "loss": 0.046, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3770107238605898, | |
| "grad_norm": 0.48531678318977356, | |
| "learning_rate": 1.9262116384370284e-05, | |
| "loss": 0.0463, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3853887399463807, | |
| "grad_norm": 0.33334800601005554, | |
| "learning_rate": 1.9245346302196882e-05, | |
| "loss": 0.047, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3937667560321716, | |
| "grad_norm": 0.4677096903324127, | |
| "learning_rate": 1.922857622002348e-05, | |
| "loss": 0.0474, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.40214477211796246, | |
| "grad_norm": 0.35066741704940796, | |
| "learning_rate": 1.9211806137850076e-05, | |
| "loss": 0.0449, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4105227882037534, | |
| "grad_norm": 0.3857254087924957, | |
| "learning_rate": 1.9195036055676675e-05, | |
| "loss": 0.0453, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.41890080428954424, | |
| "grad_norm": 0.36052629351615906, | |
| "learning_rate": 1.917826597350327e-05, | |
| "loss": 0.0469, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4272788203753351, | |
| "grad_norm": 0.3650895059108734, | |
| "learning_rate": 1.916149589132987e-05, | |
| "loss": 0.0483, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.435656836461126, | |
| "grad_norm": 0.34670376777648926, | |
| "learning_rate": 1.9144725809156465e-05, | |
| "loss": 0.0449, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4440348525469169, | |
| "grad_norm": 0.36593642830848694, | |
| "learning_rate": 1.9127955726983063e-05, | |
| "loss": 0.0449, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.4524128686327078, | |
| "grad_norm": 0.31553247570991516, | |
| "learning_rate": 1.9111185644809662e-05, | |
| "loss": 0.0448, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.46079088471849866, | |
| "grad_norm": 0.30997416377067566, | |
| "learning_rate": 1.909441556263626e-05, | |
| "loss": 0.0488, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4691689008042895, | |
| "grad_norm": 0.4204448461532593, | |
| "learning_rate": 1.9077645480462856e-05, | |
| "loss": 0.0443, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.47754691689008044, | |
| "grad_norm": 0.36868560314178467, | |
| "learning_rate": 1.9060875398289455e-05, | |
| "loss": 0.0428, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4859249329758713, | |
| "grad_norm": 0.37285274267196655, | |
| "learning_rate": 1.904410531611605e-05, | |
| "loss": 0.0439, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.4943029490616622, | |
| "grad_norm": 0.4258297085762024, | |
| "learning_rate": 1.902733523394265e-05, | |
| "loss": 0.0446, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5026809651474531, | |
| "grad_norm": 0.34184491634368896, | |
| "learning_rate": 1.9010565151769244e-05, | |
| "loss": 0.0399, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.511058981233244, | |
| "grad_norm": 0.404744029045105, | |
| "learning_rate": 1.8993795069595843e-05, | |
| "loss": 0.0434, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5194369973190348, | |
| "grad_norm": 0.31526079773902893, | |
| "learning_rate": 1.8977024987422438e-05, | |
| "loss": 0.042, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.5278150134048257, | |
| "grad_norm": 0.39627355337142944, | |
| "learning_rate": 1.8960254905249037e-05, | |
| "loss": 0.0418, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5361930294906166, | |
| "grad_norm": 0.39220544695854187, | |
| "learning_rate": 1.8943484823075635e-05, | |
| "loss": 0.0434, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5445710455764075, | |
| "grad_norm": 0.4202696979045868, | |
| "learning_rate": 1.892671474090223e-05, | |
| "loss": 0.0453, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5529490616621984, | |
| "grad_norm": 0.31564274430274963, | |
| "learning_rate": 1.890994465872883e-05, | |
| "loss": 0.0423, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5613270777479893, | |
| "grad_norm": 0.43861642479896545, | |
| "learning_rate": 1.8893174576555425e-05, | |
| "loss": 0.0432, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5697050938337802, | |
| "grad_norm": 0.41774672269821167, | |
| "learning_rate": 1.8876404494382024e-05, | |
| "loss": 0.0424, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.578083109919571, | |
| "grad_norm": 0.44408470392227173, | |
| "learning_rate": 1.8859634412208622e-05, | |
| "loss": 0.0393, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5864611260053619, | |
| "grad_norm": 0.5111362338066101, | |
| "learning_rate": 1.8842864330035218e-05, | |
| "loss": 0.0402, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5948391420911529, | |
| "grad_norm": 0.48010021448135376, | |
| "learning_rate": 1.8826094247861816e-05, | |
| "loss": 0.0403, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6032171581769437, | |
| "grad_norm": 0.41536250710487366, | |
| "learning_rate": 1.8809324165688415e-05, | |
| "loss": 0.0441, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6115951742627346, | |
| "grad_norm": 0.40686219930648804, | |
| "learning_rate": 1.879255408351501e-05, | |
| "loss": 0.0408, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6199731903485255, | |
| "grad_norm": 0.4435434937477112, | |
| "learning_rate": 1.877578400134161e-05, | |
| "loss": 0.0434, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6283512064343163, | |
| "grad_norm": 0.4401046633720398, | |
| "learning_rate": 1.8759013919168204e-05, | |
| "loss": 0.042, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6367292225201072, | |
| "grad_norm": 0.40911954641342163, | |
| "learning_rate": 1.8742243836994803e-05, | |
| "loss": 0.0413, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6451072386058981, | |
| "grad_norm": 0.490383118391037, | |
| "learning_rate": 1.87254737548214e-05, | |
| "loss": 0.0423, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.653485254691689, | |
| "grad_norm": 0.4375227093696594, | |
| "learning_rate": 1.8708703672647997e-05, | |
| "loss": 0.0386, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6618632707774799, | |
| "grad_norm": 0.336227685213089, | |
| "learning_rate": 1.8691933590474596e-05, | |
| "loss": 0.042, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6702412868632708, | |
| "grad_norm": 0.5190924406051636, | |
| "learning_rate": 1.8675163508301194e-05, | |
| "loss": 0.0405, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6786193029490617, | |
| "grad_norm": 0.3751809298992157, | |
| "learning_rate": 1.865839342612779e-05, | |
| "loss": 0.0383, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6869973190348525, | |
| "grad_norm": 0.34148427844047546, | |
| "learning_rate": 1.864162334395439e-05, | |
| "loss": 0.0388, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6953753351206434, | |
| "grad_norm": 0.5154247879981995, | |
| "learning_rate": 1.8624853261780984e-05, | |
| "loss": 0.0392, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.7037533512064343, | |
| "grad_norm": 0.3212796151638031, | |
| "learning_rate": 1.8608083179607583e-05, | |
| "loss": 0.0397, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7121313672922251, | |
| "grad_norm": 0.3693840503692627, | |
| "learning_rate": 1.8591313097434178e-05, | |
| "loss": 0.0399, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.7205093833780161, | |
| "grad_norm": 0.384682297706604, | |
| "learning_rate": 1.8574543015260777e-05, | |
| "loss": 0.0394, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.728887399463807, | |
| "grad_norm": 0.5106825828552246, | |
| "learning_rate": 1.8557772933087375e-05, | |
| "loss": 0.0376, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.7372654155495979, | |
| "grad_norm": 0.5798951983451843, | |
| "learning_rate": 1.854100285091397e-05, | |
| "loss": 0.0384, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7456434316353887, | |
| "grad_norm": 0.4215037226676941, | |
| "learning_rate": 1.852423276874057e-05, | |
| "loss": 0.0371, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7540214477211796, | |
| "grad_norm": 0.41392162442207336, | |
| "learning_rate": 1.8507462686567165e-05, | |
| "loss": 0.0395, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7623994638069705, | |
| "grad_norm": 0.38111090660095215, | |
| "learning_rate": 1.8490692604393763e-05, | |
| "loss": 0.0397, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7707774798927614, | |
| "grad_norm": 0.5323607325553894, | |
| "learning_rate": 1.847392252222036e-05, | |
| "loss": 0.0389, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7791554959785523, | |
| "grad_norm": 0.3730742335319519, | |
| "learning_rate": 1.8457152440046957e-05, | |
| "loss": 0.037, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7875335120643432, | |
| "grad_norm": 0.5167490243911743, | |
| "learning_rate": 1.8440382357873553e-05, | |
| "loss": 0.0398, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.795911528150134, | |
| "grad_norm": 0.3720487356185913, | |
| "learning_rate": 1.8423612275700155e-05, | |
| "loss": 0.0353, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.8042895442359249, | |
| "grad_norm": 0.49233752489089966, | |
| "learning_rate": 1.840684219352675e-05, | |
| "loss": 0.0375, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8126675603217158, | |
| "grad_norm": 0.35151785612106323, | |
| "learning_rate": 1.839007211135335e-05, | |
| "loss": 0.0367, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.8210455764075067, | |
| "grad_norm": 0.4015248119831085, | |
| "learning_rate": 1.8373302029179944e-05, | |
| "loss": 0.0402, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.8294235924932976, | |
| "grad_norm": 0.3868032395839691, | |
| "learning_rate": 1.8356531947006543e-05, | |
| "loss": 0.0364, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.8378016085790885, | |
| "grad_norm": 0.3618241250514984, | |
| "learning_rate": 1.8339761864833138e-05, | |
| "loss": 0.0365, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8461796246648794, | |
| "grad_norm": 0.4246107041835785, | |
| "learning_rate": 1.8322991782659737e-05, | |
| "loss": 0.0383, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.8545576407506702, | |
| "grad_norm": 0.4502660632133484, | |
| "learning_rate": 1.8306221700486332e-05, | |
| "loss": 0.0384, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8629356568364611, | |
| "grad_norm": 0.38021931052207947, | |
| "learning_rate": 1.828945161831293e-05, | |
| "loss": 0.036, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.871313672922252, | |
| "grad_norm": 0.49084368348121643, | |
| "learning_rate": 1.827268153613953e-05, | |
| "loss": 0.0403, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8796916890080428, | |
| "grad_norm": 0.4013173282146454, | |
| "learning_rate": 1.8255911453966125e-05, | |
| "loss": 0.038, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8880697050938338, | |
| "grad_norm": 0.4591931700706482, | |
| "learning_rate": 1.8239141371792724e-05, | |
| "loss": 0.0345, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8964477211796247, | |
| "grad_norm": 0.3261602520942688, | |
| "learning_rate": 1.822237128961932e-05, | |
| "loss": 0.0371, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.9048257372654156, | |
| "grad_norm": 0.5109397172927856, | |
| "learning_rate": 1.8205601207445918e-05, | |
| "loss": 0.0352, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.9132037533512064, | |
| "grad_norm": 0.4951651990413666, | |
| "learning_rate": 1.8188831125272516e-05, | |
| "loss": 0.0384, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.9215817694369973, | |
| "grad_norm": 0.2933291494846344, | |
| "learning_rate": 1.817206104309911e-05, | |
| "loss": 0.0367, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.9299597855227882, | |
| "grad_norm": 0.518692135810852, | |
| "learning_rate": 1.815529096092571e-05, | |
| "loss": 0.0362, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.938337801608579, | |
| "grad_norm": 0.4825911223888397, | |
| "learning_rate": 1.813852087875231e-05, | |
| "loss": 0.0363, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.94671581769437, | |
| "grad_norm": 0.45922228693962097, | |
| "learning_rate": 1.8121750796578904e-05, | |
| "loss": 0.0381, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.9550938337801609, | |
| "grad_norm": 0.3230240046977997, | |
| "learning_rate": 1.8104980714405503e-05, | |
| "loss": 0.0342, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9634718498659517, | |
| "grad_norm": 0.3606482744216919, | |
| "learning_rate": 1.80882106322321e-05, | |
| "loss": 0.0355, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.9718498659517426, | |
| "grad_norm": 0.4341330826282501, | |
| "learning_rate": 1.8071440550058697e-05, | |
| "loss": 0.037, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9802278820375335, | |
| "grad_norm": 0.42356178164482117, | |
| "learning_rate": 1.8054670467885292e-05, | |
| "loss": 0.0353, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9886058981233244, | |
| "grad_norm": 0.39021754264831543, | |
| "learning_rate": 1.803790038571189e-05, | |
| "loss": 0.0352, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9969839142091153, | |
| "grad_norm": 0.29827752709388733, | |
| "learning_rate": 1.802113030353849e-05, | |
| "loss": 0.0355, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.0053619302949062, | |
| "grad_norm": 0.38858547806739807, | |
| "learning_rate": 1.800436022136509e-05, | |
| "loss": 0.0278, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.013739946380697, | |
| "grad_norm": 0.2972586452960968, | |
| "learning_rate": 1.7987590139191684e-05, | |
| "loss": 0.0236, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.022117962466488, | |
| "grad_norm": 0.36482104659080505, | |
| "learning_rate": 1.7970820057018283e-05, | |
| "loss": 0.0251, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.030495978552279, | |
| "grad_norm": 0.37719279527664185, | |
| "learning_rate": 1.7954049974844878e-05, | |
| "loss": 0.023, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.0388739946380696, | |
| "grad_norm": 0.4385906457901001, | |
| "learning_rate": 1.7937279892671477e-05, | |
| "loss": 0.0248, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.0472520107238605, | |
| "grad_norm": 0.508695662021637, | |
| "learning_rate": 1.7920509810498072e-05, | |
| "loss": 0.0254, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.0556300268096515, | |
| "grad_norm": 0.36647507548332214, | |
| "learning_rate": 1.790373972832467e-05, | |
| "loss": 0.0254, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.0640080428954424, | |
| "grad_norm": 0.4308232069015503, | |
| "learning_rate": 1.7886969646151266e-05, | |
| "loss": 0.0226, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.0723860589812333, | |
| "grad_norm": 0.3477235734462738, | |
| "learning_rate": 1.7870199563977865e-05, | |
| "loss": 0.0235, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.0807640750670242, | |
| "grad_norm": 0.45611080527305603, | |
| "learning_rate": 1.7853429481804463e-05, | |
| "loss": 0.0255, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.089142091152815, | |
| "grad_norm": 0.41645970940589905, | |
| "learning_rate": 1.783665939963106e-05, | |
| "loss": 0.0236, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0975201072386058, | |
| "grad_norm": 0.5107206702232361, | |
| "learning_rate": 1.7819889317457657e-05, | |
| "loss": 0.0243, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.1058981233243967, | |
| "grad_norm": 0.37085390090942383, | |
| "learning_rate": 1.7803119235284253e-05, | |
| "loss": 0.0231, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.1142761394101877, | |
| "grad_norm": 0.3882488012313843, | |
| "learning_rate": 1.778634915311085e-05, | |
| "loss": 0.0226, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.1226541554959786, | |
| "grad_norm": 0.3104082942008972, | |
| "learning_rate": 1.7769579070937447e-05, | |
| "loss": 0.0243, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.1310321715817695, | |
| "grad_norm": 0.500109076499939, | |
| "learning_rate": 1.7752808988764045e-05, | |
| "loss": 0.0245, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.1394101876675604, | |
| "grad_norm": 0.6070294380187988, | |
| "learning_rate": 1.7736038906590644e-05, | |
| "loss": 0.024, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.147788203753351, | |
| "grad_norm": 0.4429844915866852, | |
| "learning_rate": 1.7719268824417243e-05, | |
| "loss": 0.024, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.156166219839142, | |
| "grad_norm": 0.532455563545227, | |
| "learning_rate": 1.7702498742243838e-05, | |
| "loss": 0.0231, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.164544235924933, | |
| "grad_norm": 0.4723723828792572, | |
| "learning_rate": 1.7685728660070437e-05, | |
| "loss": 0.0227, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.1729222520107239, | |
| "grad_norm": 0.40511298179626465, | |
| "learning_rate": 1.7668958577897032e-05, | |
| "loss": 0.0228, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1813002680965148, | |
| "grad_norm": 0.4623141884803772, | |
| "learning_rate": 1.765218849572363e-05, | |
| "loss": 0.0213, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.1896782841823057, | |
| "grad_norm": 0.5076983571052551, | |
| "learning_rate": 1.7635418413550226e-05, | |
| "loss": 0.0247, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.1980563002680964, | |
| "grad_norm": 0.438363641500473, | |
| "learning_rate": 1.7618648331376825e-05, | |
| "loss": 0.0247, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.2064343163538873, | |
| "grad_norm": 0.4427433907985687, | |
| "learning_rate": 1.7601878249203424e-05, | |
| "loss": 0.0266, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.2148123324396782, | |
| "grad_norm": 0.4235341548919678, | |
| "learning_rate": 1.758510816703002e-05, | |
| "loss": 0.0249, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.2231903485254692, | |
| "grad_norm": 0.3872547149658203, | |
| "learning_rate": 1.7568338084856618e-05, | |
| "loss": 0.0241, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.23156836461126, | |
| "grad_norm": 0.4646087884902954, | |
| "learning_rate": 1.7551568002683216e-05, | |
| "loss": 0.0239, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.239946380697051, | |
| "grad_norm": 0.3509279489517212, | |
| "learning_rate": 1.753479792050981e-05, | |
| "loss": 0.0247, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.248324396782842, | |
| "grad_norm": 0.5066854357719421, | |
| "learning_rate": 1.751802783833641e-05, | |
| "loss": 0.0223, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.2567024128686328, | |
| "grad_norm": 0.5363894104957581, | |
| "learning_rate": 1.7501257756163006e-05, | |
| "loss": 0.0231, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.2650804289544235, | |
| "grad_norm": 0.6059328317642212, | |
| "learning_rate": 1.7484487673989604e-05, | |
| "loss": 0.0247, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.2734584450402144, | |
| "grad_norm": 0.458574503660202, | |
| "learning_rate": 1.7467717591816203e-05, | |
| "loss": 0.0252, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.2818364611260054, | |
| "grad_norm": 0.4931676983833313, | |
| "learning_rate": 1.74509475096428e-05, | |
| "loss": 0.0242, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.2902144772117963, | |
| "grad_norm": 0.4417158365249634, | |
| "learning_rate": 1.7434177427469397e-05, | |
| "loss": 0.0245, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.2985924932975872, | |
| "grad_norm": 0.3258965015411377, | |
| "learning_rate": 1.7417407345295992e-05, | |
| "loss": 0.0254, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.3069705093833779, | |
| "grad_norm": 0.4492965638637543, | |
| "learning_rate": 1.740063726312259e-05, | |
| "loss": 0.0247, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.3153485254691688, | |
| "grad_norm": 0.4415794312953949, | |
| "learning_rate": 1.7383867180949186e-05, | |
| "loss": 0.0243, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.3237265415549597, | |
| "grad_norm": 0.5353983044624329, | |
| "learning_rate": 1.7367097098775785e-05, | |
| "loss": 0.0221, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.3321045576407506, | |
| "grad_norm": 0.5296221375465393, | |
| "learning_rate": 1.735032701660238e-05, | |
| "loss": 0.0241, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.3404825737265416, | |
| "grad_norm": 0.5558563470840454, | |
| "learning_rate": 1.7333556934428983e-05, | |
| "loss": 0.0278, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.3488605898123325, | |
| "grad_norm": 0.5353667736053467, | |
| "learning_rate": 1.7316786852255578e-05, | |
| "loss": 0.0221, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.3572386058981234, | |
| "grad_norm": 0.47603583335876465, | |
| "learning_rate": 1.7300016770082177e-05, | |
| "loss": 0.0241, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.3656166219839143, | |
| "grad_norm": 0.5160461068153381, | |
| "learning_rate": 1.7283246687908772e-05, | |
| "loss": 0.0237, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.3739946380697052, | |
| "grad_norm": 0.5242166519165039, | |
| "learning_rate": 1.726647660573537e-05, | |
| "loss": 0.0241, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.382372654155496, | |
| "grad_norm": 0.4098646342754364, | |
| "learning_rate": 1.7249706523561966e-05, | |
| "loss": 0.0237, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.3907506702412868, | |
| "grad_norm": 0.488899290561676, | |
| "learning_rate": 1.7232936441388565e-05, | |
| "loss": 0.024, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.3991286863270778, | |
| "grad_norm": 0.4955669641494751, | |
| "learning_rate": 1.721616635921516e-05, | |
| "loss": 0.0233, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.4075067024128687, | |
| "grad_norm": 0.4925636053085327, | |
| "learning_rate": 1.719939627704176e-05, | |
| "loss": 0.0241, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.4158847184986596, | |
| "grad_norm": 0.4332300126552582, | |
| "learning_rate": 1.7182626194868357e-05, | |
| "loss": 0.023, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.4242627345844503, | |
| "grad_norm": 0.5092645883560181, | |
| "learning_rate": 1.7165856112694953e-05, | |
| "loss": 0.0247, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.4326407506702412, | |
| "grad_norm": 0.4245849847793579, | |
| "learning_rate": 1.714908603052155e-05, | |
| "loss": 0.0229, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.4410187667560321, | |
| "grad_norm": 0.5392746329307556, | |
| "learning_rate": 1.7132315948348147e-05, | |
| "loss": 0.0243, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.449396782841823, | |
| "grad_norm": 0.4076955020427704, | |
| "learning_rate": 1.7115545866174745e-05, | |
| "loss": 0.023, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.457774798927614, | |
| "grad_norm": 0.39265647530555725, | |
| "learning_rate": 1.709877578400134e-05, | |
| "loss": 0.023, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.4661528150134049, | |
| "grad_norm": 0.49499258399009705, | |
| "learning_rate": 1.708200570182794e-05, | |
| "loss": 0.025, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.4745308310991958, | |
| "grad_norm": 0.38666218519210815, | |
| "learning_rate": 1.7065235619654538e-05, | |
| "loss": 0.0222, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.4829088471849867, | |
| "grad_norm": 0.4817696809768677, | |
| "learning_rate": 1.7048465537481137e-05, | |
| "loss": 0.0248, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.4912868632707774, | |
| "grad_norm": 0.5351291298866272, | |
| "learning_rate": 1.7031695455307732e-05, | |
| "loss": 0.0249, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.4996648793565683, | |
| "grad_norm": 0.37309539318084717, | |
| "learning_rate": 1.701492537313433e-05, | |
| "loss": 0.0236, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.5080428954423593, | |
| "grad_norm": 0.3458901345729828, | |
| "learning_rate": 1.6998155290960926e-05, | |
| "loss": 0.0234, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.5164209115281502, | |
| "grad_norm": 0.4059881269931793, | |
| "learning_rate": 1.6981385208787525e-05, | |
| "loss": 0.0241, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.5247989276139409, | |
| "grad_norm": 0.3939747214317322, | |
| "learning_rate": 1.696461512661412e-05, | |
| "loss": 0.0232, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.5331769436997318, | |
| "grad_norm": 0.43895846605300903, | |
| "learning_rate": 1.694784504444072e-05, | |
| "loss": 0.0233, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.5415549597855227, | |
| "grad_norm": 0.48546019196510315, | |
| "learning_rate": 1.6931074962267314e-05, | |
| "loss": 0.0231, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.5499329758713136, | |
| "grad_norm": 0.7542991638183594, | |
| "learning_rate": 1.6914304880093916e-05, | |
| "loss": 0.0227, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.5583109919571045, | |
| "grad_norm": 0.5190153121948242, | |
| "learning_rate": 1.6897534797920512e-05, | |
| "loss": 0.0257, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.5666890080428955, | |
| "grad_norm": 0.36860191822052, | |
| "learning_rate": 1.688076471574711e-05, | |
| "loss": 0.0237, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.5750670241286864, | |
| "grad_norm": 0.4412299394607544, | |
| "learning_rate": 1.6863994633573706e-05, | |
| "loss": 0.023, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.5834450402144773, | |
| "grad_norm": 0.44900405406951904, | |
| "learning_rate": 1.6847224551400304e-05, | |
| "loss": 0.0219, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.5918230563002682, | |
| "grad_norm": 0.4734587073326111, | |
| "learning_rate": 1.68304544692269e-05, | |
| "loss": 0.0244, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.6002010723860591, | |
| "grad_norm": 0.4086250364780426, | |
| "learning_rate": 1.68136843870535e-05, | |
| "loss": 0.0233, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.6085790884718498, | |
| "grad_norm": 0.33544018864631653, | |
| "learning_rate": 1.6796914304880094e-05, | |
| "loss": 0.0228, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.6169571045576407, | |
| "grad_norm": 0.5728262066841125, | |
| "learning_rate": 1.6780144222706692e-05, | |
| "loss": 0.0247, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.6253351206434317, | |
| "grad_norm": 0.40683993697166443, | |
| "learning_rate": 1.676337414053329e-05, | |
| "loss": 0.0236, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.6337131367292224, | |
| "grad_norm": 0.5062201619148254, | |
| "learning_rate": 1.6746604058359887e-05, | |
| "loss": 0.0227, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.6420911528150133, | |
| "grad_norm": 0.6048519015312195, | |
| "learning_rate": 1.6729833976186485e-05, | |
| "loss": 0.024, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.6504691689008042, | |
| "grad_norm": 0.42808210849761963, | |
| "learning_rate": 1.671306389401308e-05, | |
| "loss": 0.0214, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.6588471849865951, | |
| "grad_norm": 0.452362060546875, | |
| "learning_rate": 1.669629381183968e-05, | |
| "loss": 0.025, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.667225201072386, | |
| "grad_norm": 0.45486265420913696, | |
| "learning_rate": 1.6679523729666275e-05, | |
| "loss": 0.0241, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.675603217158177, | |
| "grad_norm": 0.39436477422714233, | |
| "learning_rate": 1.6662753647492873e-05, | |
| "loss": 0.0238, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.675603217158177, | |
| "eval_loss": 0.042236872017383575, | |
| "eval_runtime": 0.3154, | |
| "eval_samples_per_second": 63.415, | |
| "eval_steps_per_second": 3.171, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6839812332439679, | |
| "grad_norm": 0.3792615234851837, | |
| "learning_rate": 1.6645983565319472e-05, | |
| "loss": 0.0236, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.6923592493297588, | |
| "grad_norm": 0.4568031132221222, | |
| "learning_rate": 1.662921348314607e-05, | |
| "loss": 0.0245, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.7007372654155497, | |
| "grad_norm": 0.30801689624786377, | |
| "learning_rate": 1.6612443400972666e-05, | |
| "loss": 0.0242, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.7091152815013406, | |
| "grad_norm": 0.404593825340271, | |
| "learning_rate": 1.6595673318799265e-05, | |
| "loss": 0.0244, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.7174932975871313, | |
| "grad_norm": 0.41899484395980835, | |
| "learning_rate": 1.657890323662586e-05, | |
| "loss": 0.0237, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.7258713136729222, | |
| "grad_norm": 0.7052549719810486, | |
| "learning_rate": 1.656213315445246e-05, | |
| "loss": 0.0219, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.7342493297587132, | |
| "grad_norm": 0.6043505072593689, | |
| "learning_rate": 1.6545363072279054e-05, | |
| "loss": 0.023, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.742627345844504, | |
| "grad_norm": 0.47521620988845825, | |
| "learning_rate": 1.6528592990105653e-05, | |
| "loss": 0.0213, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.7510053619302948, | |
| "grad_norm": 0.43603143095970154, | |
| "learning_rate": 1.651182290793225e-05, | |
| "loss": 0.0222, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.7593833780160857, | |
| "grad_norm": 0.5093141794204712, | |
| "learning_rate": 1.6495052825758847e-05, | |
| "loss": 0.0241, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.7677613941018766, | |
| "grad_norm": 0.4269144535064697, | |
| "learning_rate": 1.6478282743585445e-05, | |
| "loss": 0.0225, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.7761394101876675, | |
| "grad_norm": 0.3798427879810333, | |
| "learning_rate": 1.646151266141204e-05, | |
| "loss": 0.0246, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.7845174262734584, | |
| "grad_norm": 0.35155215859413147, | |
| "learning_rate": 1.644474257923864e-05, | |
| "loss": 0.0223, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.7928954423592494, | |
| "grad_norm": 0.3362865447998047, | |
| "learning_rate": 1.6427972497065238e-05, | |
| "loss": 0.0222, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.8012734584450403, | |
| "grad_norm": 0.4176539480686188, | |
| "learning_rate": 1.6411202414891834e-05, | |
| "loss": 0.0221, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.8096514745308312, | |
| "grad_norm": 0.38857483863830566, | |
| "learning_rate": 1.6394432332718432e-05, | |
| "loss": 0.0231, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.8180294906166221, | |
| "grad_norm": 0.5508946776390076, | |
| "learning_rate": 1.637766225054503e-05, | |
| "loss": 0.0244, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.826407506702413, | |
| "grad_norm": 0.23041021823883057, | |
| "learning_rate": 1.6360892168371626e-05, | |
| "loss": 0.0214, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.8347855227882037, | |
| "grad_norm": 0.4554728865623474, | |
| "learning_rate": 1.6344122086198225e-05, | |
| "loss": 0.0216, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.8431635388739946, | |
| "grad_norm": 0.3926387131214142, | |
| "learning_rate": 1.632735200402482e-05, | |
| "loss": 0.0231, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.8515415549597856, | |
| "grad_norm": 0.4310173988342285, | |
| "learning_rate": 1.631058192185142e-05, | |
| "loss": 0.0217, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.8599195710455763, | |
| "grad_norm": 0.5301809310913086, | |
| "learning_rate": 1.6293811839678014e-05, | |
| "loss": 0.0233, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.8682975871313672, | |
| "grad_norm": 0.5201212167739868, | |
| "learning_rate": 1.6277041757504613e-05, | |
| "loss": 0.0238, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.876675603217158, | |
| "grad_norm": 0.5420696139335632, | |
| "learning_rate": 1.626027167533121e-05, | |
| "loss": 0.0228, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.885053619302949, | |
| "grad_norm": 0.449569433927536, | |
| "learning_rate": 1.624350159315781e-05, | |
| "loss": 0.0229, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.89343163538874, | |
| "grad_norm": 0.41790249943733215, | |
| "learning_rate": 1.6226731510984406e-05, | |
| "loss": 0.0211, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.9018096514745308, | |
| "grad_norm": 0.49417269229888916, | |
| "learning_rate": 1.6209961428811004e-05, | |
| "loss": 0.0238, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.9101876675603218, | |
| "grad_norm": 0.7904441952705383, | |
| "learning_rate": 1.61931913466376e-05, | |
| "loss": 0.0238, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.9185656836461127, | |
| "grad_norm": 0.5102431178092957, | |
| "learning_rate": 1.61764212644642e-05, | |
| "loss": 0.0234, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.9269436997319036, | |
| "grad_norm": 0.5872859954833984, | |
| "learning_rate": 1.6159651182290794e-05, | |
| "loss": 0.023, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.9353217158176945, | |
| "grad_norm": 0.4397691488265991, | |
| "learning_rate": 1.6142881100117393e-05, | |
| "loss": 0.0225, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.9436997319034852, | |
| "grad_norm": 0.5159376263618469, | |
| "learning_rate": 1.6126111017943988e-05, | |
| "loss": 0.0236, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.9520777479892761, | |
| "grad_norm": 0.5699421763420105, | |
| "learning_rate": 1.6109340935770587e-05, | |
| "loss": 0.0221, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.960455764075067, | |
| "grad_norm": 0.5751481056213379, | |
| "learning_rate": 1.6092570853597185e-05, | |
| "loss": 0.0238, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.9688337801608577, | |
| "grad_norm": 0.4952080249786377, | |
| "learning_rate": 1.607580077142378e-05, | |
| "loss": 0.0245, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.9772117962466487, | |
| "grad_norm": 0.3852183520793915, | |
| "learning_rate": 1.605903068925038e-05, | |
| "loss": 0.0237, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.9855898123324396, | |
| "grad_norm": 0.5378175973892212, | |
| "learning_rate": 1.6042260607076975e-05, | |
| "loss": 0.0217, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.9939678284182305, | |
| "grad_norm": 0.48786741495132446, | |
| "learning_rate": 1.6025490524903573e-05, | |
| "loss": 0.0212, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.0023458445040214, | |
| "grad_norm": 0.2508140206336975, | |
| "learning_rate": 1.600872044273017e-05, | |
| "loss": 0.0221, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.0107238605898123, | |
| "grad_norm": 0.32956379652023315, | |
| "learning_rate": 1.5991950360556767e-05, | |
| "loss": 0.0115, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0191018766756033, | |
| "grad_norm": 0.22912301123142242, | |
| "learning_rate": 1.5975180278383366e-05, | |
| "loss": 0.0127, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.027479892761394, | |
| "grad_norm": 0.29201629757881165, | |
| "learning_rate": 1.5958410196209965e-05, | |
| "loss": 0.0103, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.035857908847185, | |
| "grad_norm": 0.3595946431159973, | |
| "learning_rate": 1.594164011403656e-05, | |
| "loss": 0.0105, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.044235924932976, | |
| "grad_norm": 0.22679433226585388, | |
| "learning_rate": 1.592487003186316e-05, | |
| "loss": 0.0107, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.052613941018767, | |
| "grad_norm": 0.40025532245635986, | |
| "learning_rate": 1.5908099949689754e-05, | |
| "loss": 0.0117, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.060991957104558, | |
| "grad_norm": 0.32900357246398926, | |
| "learning_rate": 1.5891329867516353e-05, | |
| "loss": 0.0115, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.0693699731903483, | |
| "grad_norm": 0.2236577868461609, | |
| "learning_rate": 1.5874559785342948e-05, | |
| "loss": 0.0123, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.0777479892761392, | |
| "grad_norm": 0.3712753355503082, | |
| "learning_rate": 1.5857789703169547e-05, | |
| "loss": 0.0114, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.08612600536193, | |
| "grad_norm": 0.4136362373828888, | |
| "learning_rate": 1.5841019620996142e-05, | |
| "loss": 0.011, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.094504021447721, | |
| "grad_norm": 0.3658868074417114, | |
| "learning_rate": 1.582424953882274e-05, | |
| "loss": 0.0124, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.102882037533512, | |
| "grad_norm": 0.44573381543159485, | |
| "learning_rate": 1.580747945664934e-05, | |
| "loss": 0.0112, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.111260053619303, | |
| "grad_norm": 0.4188709557056427, | |
| "learning_rate": 1.5790709374475935e-05, | |
| "loss": 0.0115, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.119638069705094, | |
| "grad_norm": 0.3570314645767212, | |
| "learning_rate": 1.5773939292302534e-05, | |
| "loss": 0.0112, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.1280160857908847, | |
| "grad_norm": 0.3598877191543579, | |
| "learning_rate": 1.5757169210129132e-05, | |
| "loss": 0.0118, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.1363941018766757, | |
| "grad_norm": 0.3769216239452362, | |
| "learning_rate": 1.5740399127955728e-05, | |
| "loss": 0.0107, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.1447721179624666, | |
| "grad_norm": 0.2821277678012848, | |
| "learning_rate": 1.5723629045782326e-05, | |
| "loss": 0.0118, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.1531501340482575, | |
| "grad_norm": 0.26597416400909424, | |
| "learning_rate": 1.570685896360892e-05, | |
| "loss": 0.0113, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.1615281501340484, | |
| "grad_norm": 0.26788029074668884, | |
| "learning_rate": 1.569008888143552e-05, | |
| "loss": 0.0119, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.1699061662198393, | |
| "grad_norm": 0.4225537180900574, | |
| "learning_rate": 1.567331879926212e-05, | |
| "loss": 0.0111, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.17828418230563, | |
| "grad_norm": 0.2967151403427124, | |
| "learning_rate": 1.5656548717088714e-05, | |
| "loss": 0.0116, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.1866621983914207, | |
| "grad_norm": 0.37873271107673645, | |
| "learning_rate": 1.5639778634915313e-05, | |
| "loss": 0.0122, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.1950402144772116, | |
| "grad_norm": 0.3496306836605072, | |
| "learning_rate": 1.562300855274191e-05, | |
| "loss": 0.0115, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.2034182305630026, | |
| "grad_norm": 0.2340189516544342, | |
| "learning_rate": 1.5606238470568507e-05, | |
| "loss": 0.0114, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.2117962466487935, | |
| "grad_norm": 0.34111320972442627, | |
| "learning_rate": 1.5589468388395102e-05, | |
| "loss": 0.0106, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.2201742627345844, | |
| "grad_norm": 0.4557114839553833, | |
| "learning_rate": 1.55726983062217e-05, | |
| "loss": 0.0124, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.2285522788203753, | |
| "grad_norm": 0.3776351511478424, | |
| "learning_rate": 1.55559282240483e-05, | |
| "loss": 0.0109, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.2369302949061662, | |
| "grad_norm": 0.49314960837364197, | |
| "learning_rate": 1.55391581418749e-05, | |
| "loss": 0.0127, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.245308310991957, | |
| "grad_norm": 0.2994402348995209, | |
| "learning_rate": 1.5522388059701494e-05, | |
| "loss": 0.0123, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.253686327077748, | |
| "grad_norm": 0.6113381385803223, | |
| "learning_rate": 1.5505617977528093e-05, | |
| "loss": 0.0122, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.262064343163539, | |
| "grad_norm": 0.43357163667678833, | |
| "learning_rate": 1.5488847895354688e-05, | |
| "loss": 0.0109, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.27044235924933, | |
| "grad_norm": 0.5021244287490845, | |
| "learning_rate": 1.5472077813181287e-05, | |
| "loss": 0.013, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.278820375335121, | |
| "grad_norm": 0.4794227182865143, | |
| "learning_rate": 1.5455307731007882e-05, | |
| "loss": 0.0125, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.2871983914209117, | |
| "grad_norm": 0.2409118264913559, | |
| "learning_rate": 1.543853764883448e-05, | |
| "loss": 0.012, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.295576407506702, | |
| "grad_norm": 0.36879080533981323, | |
| "learning_rate": 1.542176756666108e-05, | |
| "loss": 0.0101, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.303954423592493, | |
| "grad_norm": 0.2825350761413574, | |
| "learning_rate": 1.5404997484487675e-05, | |
| "loss": 0.0113, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.312332439678284, | |
| "grad_norm": 0.5339875221252441, | |
| "learning_rate": 1.5388227402314273e-05, | |
| "loss": 0.0115, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.320710455764075, | |
| "grad_norm": 0.5463636517524719, | |
| "learning_rate": 1.537145732014087e-05, | |
| "loss": 0.0127, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.329088471849866, | |
| "grad_norm": 0.37746766209602356, | |
| "learning_rate": 1.5354687237967467e-05, | |
| "loss": 0.0116, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.337466487935657, | |
| "grad_norm": 0.6131693124771118, | |
| "learning_rate": 1.5337917155794063e-05, | |
| "loss": 0.0115, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.3458445040214477, | |
| "grad_norm": 0.3330284059047699, | |
| "learning_rate": 1.532114707362066e-05, | |
| "loss": 0.0111, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.3542225201072386, | |
| "grad_norm": 0.47551050782203674, | |
| "learning_rate": 1.530437699144726e-05, | |
| "loss": 0.0117, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.3626005361930296, | |
| "grad_norm": 0.5559821128845215, | |
| "learning_rate": 1.528760690927386e-05, | |
| "loss": 0.0134, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.3709785522788205, | |
| "grad_norm": 0.32303518056869507, | |
| "learning_rate": 1.5270836827100454e-05, | |
| "loss": 0.012, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.3793565683646114, | |
| "grad_norm": 0.4595315754413605, | |
| "learning_rate": 1.5254066744927051e-05, | |
| "loss": 0.012, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.3877345844504023, | |
| "grad_norm": 0.5437060594558716, | |
| "learning_rate": 1.5237296662753648e-05, | |
| "loss": 0.0124, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.396112600536193, | |
| "grad_norm": 0.3886863589286804, | |
| "learning_rate": 1.5220526580580245e-05, | |
| "loss": 0.0129, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.4044906166219837, | |
| "grad_norm": 0.5083261132240295, | |
| "learning_rate": 1.5203756498406844e-05, | |
| "loss": 0.0134, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.4128686327077746, | |
| "grad_norm": 0.35092031955718994, | |
| "learning_rate": 1.5186986416233441e-05, | |
| "loss": 0.0116, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.4212466487935655, | |
| "grad_norm": 0.4511415660381317, | |
| "learning_rate": 1.5170216334060038e-05, | |
| "loss": 0.013, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 2.4296246648793565, | |
| "grad_norm": 0.5314837694168091, | |
| "learning_rate": 1.5153446251886637e-05, | |
| "loss": 0.0128, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.4380026809651474, | |
| "grad_norm": 0.3129260540008545, | |
| "learning_rate": 1.5136676169713234e-05, | |
| "loss": 0.0121, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.4463806970509383, | |
| "grad_norm": 0.3153856694698334, | |
| "learning_rate": 1.511990608753983e-05, | |
| "loss": 0.0138, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.454758713136729, | |
| "grad_norm": 0.8036394715309143, | |
| "learning_rate": 1.5103136005366428e-05, | |
| "loss": 0.0121, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 2.46313672922252, | |
| "grad_norm": 0.50925213098526, | |
| "learning_rate": 1.5086365923193025e-05, | |
| "loss": 0.0124, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.471514745308311, | |
| "grad_norm": 0.5606102347373962, | |
| "learning_rate": 1.5069595841019622e-05, | |
| "loss": 0.0124, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 2.479892761394102, | |
| "grad_norm": 0.5037418603897095, | |
| "learning_rate": 1.5052825758846219e-05, | |
| "loss": 0.0136, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.488270777479893, | |
| "grad_norm": 0.3871222138404846, | |
| "learning_rate": 1.5036055676672816e-05, | |
| "loss": 0.0129, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.496648793565684, | |
| "grad_norm": 0.5839509963989258, | |
| "learning_rate": 1.5019285594499416e-05, | |
| "loss": 0.0123, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.5050268096514747, | |
| "grad_norm": 0.7268586754798889, | |
| "learning_rate": 1.5002515512326013e-05, | |
| "loss": 0.012, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 2.5134048257372656, | |
| "grad_norm": 0.3473876118659973, | |
| "learning_rate": 1.498574543015261e-05, | |
| "loss": 0.0126, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.5217828418230566, | |
| "grad_norm": 0.49601665139198303, | |
| "learning_rate": 1.4968975347979207e-05, | |
| "loss": 0.0121, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 2.530160857908847, | |
| "grad_norm": 0.23973305523395538, | |
| "learning_rate": 1.4952205265805804e-05, | |
| "loss": 0.0111, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.538538873994638, | |
| "grad_norm": 0.5663930177688599, | |
| "learning_rate": 1.4935435183632401e-05, | |
| "loss": 0.0106, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.546916890080429, | |
| "grad_norm": 0.24828468263149261, | |
| "learning_rate": 1.4918665101458998e-05, | |
| "loss": 0.0132, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.55529490616622, | |
| "grad_norm": 0.17071287333965302, | |
| "learning_rate": 1.4901895019285595e-05, | |
| "loss": 0.0132, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 2.5636729222520107, | |
| "grad_norm": 0.5064595937728882, | |
| "learning_rate": 1.4885124937112192e-05, | |
| "loss": 0.013, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.5720509383378016, | |
| "grad_norm": 0.48466721177101135, | |
| "learning_rate": 1.4868354854938791e-05, | |
| "loss": 0.0112, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 2.5804289544235925, | |
| "grad_norm": 0.6913251876831055, | |
| "learning_rate": 1.4851584772765388e-05, | |
| "loss": 0.0129, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.5888069705093835, | |
| "grad_norm": 0.4608655869960785, | |
| "learning_rate": 1.4834814690591985e-05, | |
| "loss": 0.0125, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 2.5971849865951744, | |
| "grad_norm": 0.5575762391090393, | |
| "learning_rate": 1.4818044608418582e-05, | |
| "loss": 0.0122, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.6055630026809653, | |
| "grad_norm": 0.3975880444049835, | |
| "learning_rate": 1.4801274526245179e-05, | |
| "loss": 0.0115, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 2.6139410187667558, | |
| "grad_norm": 0.4161764681339264, | |
| "learning_rate": 1.4784504444071776e-05, | |
| "loss": 0.0143, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.6223190348525467, | |
| "grad_norm": 0.5338849425315857, | |
| "learning_rate": 1.4767734361898373e-05, | |
| "loss": 0.0127, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 2.6306970509383376, | |
| "grad_norm": 0.3128230571746826, | |
| "learning_rate": 1.475096427972497e-05, | |
| "loss": 0.0135, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.6390750670241285, | |
| "grad_norm": 0.5028887987136841, | |
| "learning_rate": 1.473419419755157e-05, | |
| "loss": 0.0117, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 2.6474530831099194, | |
| "grad_norm": 0.3744266927242279, | |
| "learning_rate": 1.4717424115378167e-05, | |
| "loss": 0.0115, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.6558310991957104, | |
| "grad_norm": 0.4230741560459137, | |
| "learning_rate": 1.4700654033204764e-05, | |
| "loss": 0.013, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 2.6642091152815013, | |
| "grad_norm": 0.49401816725730896, | |
| "learning_rate": 1.4683883951031361e-05, | |
| "loss": 0.0115, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.672587131367292, | |
| "grad_norm": 0.4584721028804779, | |
| "learning_rate": 1.4667113868857958e-05, | |
| "loss": 0.0133, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 2.680965147453083, | |
| "grad_norm": 0.760981559753418, | |
| "learning_rate": 1.4650343786684555e-05, | |
| "loss": 0.0127, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.689343163538874, | |
| "grad_norm": 0.37186485528945923, | |
| "learning_rate": 1.4633573704511152e-05, | |
| "loss": 0.0134, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 2.697721179624665, | |
| "grad_norm": 0.62066251039505, | |
| "learning_rate": 1.461680362233775e-05, | |
| "loss": 0.0141, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 2.706099195710456, | |
| "grad_norm": 0.3157498240470886, | |
| "learning_rate": 1.4600033540164348e-05, | |
| "loss": 0.0118, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 2.714477211796247, | |
| "grad_norm": 0.4527428448200226, | |
| "learning_rate": 1.4583263457990945e-05, | |
| "loss": 0.0134, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 2.7228552278820377, | |
| "grad_norm": 0.31555086374282837, | |
| "learning_rate": 1.4566493375817544e-05, | |
| "loss": 0.0132, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 2.7312332439678286, | |
| "grad_norm": 0.44448813796043396, | |
| "learning_rate": 1.4549723293644141e-05, | |
| "loss": 0.0124, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 2.7396112600536195, | |
| "grad_norm": 0.4281978905200958, | |
| "learning_rate": 1.4532953211470738e-05, | |
| "loss": 0.0122, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 2.7479892761394105, | |
| "grad_norm": 0.45892074704170227, | |
| "learning_rate": 1.4516183129297335e-05, | |
| "loss": 0.012, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 2.756367292225201, | |
| "grad_norm": 0.30029842257499695, | |
| "learning_rate": 1.4499413047123932e-05, | |
| "loss": 0.0119, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 2.764745308310992, | |
| "grad_norm": 0.3950155973434448, | |
| "learning_rate": 1.4482642964950529e-05, | |
| "loss": 0.0136, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.7731233243967828, | |
| "grad_norm": 0.4550629258155823, | |
| "learning_rate": 1.4465872882777128e-05, | |
| "loss": 0.0122, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 2.7815013404825737, | |
| "grad_norm": 0.5514039397239685, | |
| "learning_rate": 1.4449102800603725e-05, | |
| "loss": 0.0135, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 2.7898793565683646, | |
| "grad_norm": 0.5131493806838989, | |
| "learning_rate": 1.4432332718430322e-05, | |
| "loss": 0.0132, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 2.7982573726541555, | |
| "grad_norm": 0.39987483620643616, | |
| "learning_rate": 1.4415562636256919e-05, | |
| "loss": 0.0126, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 2.8066353887399464, | |
| "grad_norm": 0.5557750463485718, | |
| "learning_rate": 1.4398792554083516e-05, | |
| "loss": 0.0121, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 2.8150134048257374, | |
| "grad_norm": 0.34864020347595215, | |
| "learning_rate": 1.4382022471910113e-05, | |
| "loss": 0.0129, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.8233914209115283, | |
| "grad_norm": 0.4396969974040985, | |
| "learning_rate": 1.436525238973671e-05, | |
| "loss": 0.0137, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 2.831769436997319, | |
| "grad_norm": 0.4104606509208679, | |
| "learning_rate": 1.4348482307563307e-05, | |
| "loss": 0.0146, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 2.8401474530831097, | |
| "grad_norm": 0.6937008500099182, | |
| "learning_rate": 1.4331712225389907e-05, | |
| "loss": 0.0123, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 2.8485254691689006, | |
| "grad_norm": 0.5880556106567383, | |
| "learning_rate": 1.4314942143216504e-05, | |
| "loss": 0.0131, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.8569034852546915, | |
| "grad_norm": 0.4264618158340454, | |
| "learning_rate": 1.4298172061043101e-05, | |
| "loss": 0.0133, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 2.8652815013404824, | |
| "grad_norm": 0.5207853317260742, | |
| "learning_rate": 1.4281401978869698e-05, | |
| "loss": 0.0137, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 2.8736595174262733, | |
| "grad_norm": 0.4656062424182892, | |
| "learning_rate": 1.4264631896696295e-05, | |
| "loss": 0.0145, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 2.8820375335120643, | |
| "grad_norm": 0.38702937960624695, | |
| "learning_rate": 1.4247861814522892e-05, | |
| "loss": 0.0122, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.890415549597855, | |
| "grad_norm": 0.246555358171463, | |
| "learning_rate": 1.423109173234949e-05, | |
| "loss": 0.0135, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 2.898793565683646, | |
| "grad_norm": 0.2863421142101288, | |
| "learning_rate": 1.4214321650176086e-05, | |
| "loss": 0.0132, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 2.907171581769437, | |
| "grad_norm": 0.31063777208328247, | |
| "learning_rate": 1.4197551568002685e-05, | |
| "loss": 0.0122, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 2.915549597855228, | |
| "grad_norm": 0.5885173082351685, | |
| "learning_rate": 1.4180781485829282e-05, | |
| "loss": 0.0117, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.923927613941019, | |
| "grad_norm": 0.41046226024627686, | |
| "learning_rate": 1.4164011403655879e-05, | |
| "loss": 0.0136, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 2.9323056300268098, | |
| "grad_norm": 0.45641854405403137, | |
| "learning_rate": 1.4147241321482476e-05, | |
| "loss": 0.0136, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.9406836461126007, | |
| "grad_norm": 0.3291575610637665, | |
| "learning_rate": 1.4130471239309073e-05, | |
| "loss": 0.0128, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 2.9490616621983916, | |
| "grad_norm": 0.4031969904899597, | |
| "learning_rate": 1.411370115713567e-05, | |
| "loss": 0.0144, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.9574396782841825, | |
| "grad_norm": 0.4048541486263275, | |
| "learning_rate": 1.4096931074962267e-05, | |
| "loss": 0.0131, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 2.9658176943699734, | |
| "grad_norm": 0.42356961965560913, | |
| "learning_rate": 1.4080160992788866e-05, | |
| "loss": 0.0136, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 2.974195710455764, | |
| "grad_norm": 0.499991774559021, | |
| "learning_rate": 1.4063390910615464e-05, | |
| "loss": 0.0129, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 2.982573726541555, | |
| "grad_norm": 0.4582955837249756, | |
| "learning_rate": 1.4046620828442061e-05, | |
| "loss": 0.0128, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.9909517426273458, | |
| "grad_norm": 0.40763500332832336, | |
| "learning_rate": 1.4029850746268658e-05, | |
| "loss": 0.0125, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 2.9993297587131367, | |
| "grad_norm": 0.2882692515850067, | |
| "learning_rate": 1.4013080664095256e-05, | |
| "loss": 0.0131, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 3.0077077747989276, | |
| "grad_norm": 0.25621238350868225, | |
| "learning_rate": 1.3996310581921853e-05, | |
| "loss": 0.007, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 3.0160857908847185, | |
| "grad_norm": 0.2496500015258789, | |
| "learning_rate": 1.397954049974845e-05, | |
| "loss": 0.0058, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.0244638069705094, | |
| "grad_norm": 0.5392020344734192, | |
| "learning_rate": 1.3962770417575047e-05, | |
| "loss": 0.0064, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 3.0328418230563003, | |
| "grad_norm": 0.142150416970253, | |
| "learning_rate": 1.3946000335401644e-05, | |
| "loss": 0.0049, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 3.0412198391420913, | |
| "grad_norm": 0.2814841866493225, | |
| "learning_rate": 1.392923025322824e-05, | |
| "loss": 0.0048, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 3.049597855227882, | |
| "grad_norm": 0.9062692523002625, | |
| "learning_rate": 1.3912460171054841e-05, | |
| "loss": 0.0054, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 3.057975871313673, | |
| "grad_norm": 0.17520900070667267, | |
| "learning_rate": 1.3895690088881438e-05, | |
| "loss": 0.0047, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 3.066353887399464, | |
| "grad_norm": 0.2684191167354584, | |
| "learning_rate": 1.3878920006708035e-05, | |
| "loss": 0.0055, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 3.0747319034852545, | |
| "grad_norm": 0.2762264013290405, | |
| "learning_rate": 1.3862149924534632e-05, | |
| "loss": 0.006, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 3.0831099195710454, | |
| "grad_norm": 0.16580019891262054, | |
| "learning_rate": 1.3845379842361229e-05, | |
| "loss": 0.0053, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 3.0914879356568363, | |
| "grad_norm": 0.30021271109580994, | |
| "learning_rate": 1.3828609760187826e-05, | |
| "loss": 0.0052, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 3.0998659517426272, | |
| "grad_norm": 0.3511424958705902, | |
| "learning_rate": 1.3811839678014423e-05, | |
| "loss": 0.0053, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.108243967828418, | |
| "grad_norm": 0.2431810349225998, | |
| "learning_rate": 1.379506959584102e-05, | |
| "loss": 0.0054, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 3.116621983914209, | |
| "grad_norm": 0.2419600486755371, | |
| "learning_rate": 1.3778299513667619e-05, | |
| "loss": 0.0053, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.3268046975135803, | |
| "learning_rate": 1.3761529431494216e-05, | |
| "loss": 0.0055, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 3.133378016085791, | |
| "grad_norm": 0.38957932591438293, | |
| "learning_rate": 1.3744759349320813e-05, | |
| "loss": 0.0051, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 3.141756032171582, | |
| "grad_norm": 0.31418824195861816, | |
| "learning_rate": 1.372798926714741e-05, | |
| "loss": 0.0052, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 3.1501340482573728, | |
| "grad_norm": 0.3322865068912506, | |
| "learning_rate": 1.3711219184974007e-05, | |
| "loss": 0.0051, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 3.1585120643431637, | |
| "grad_norm": 0.22010941803455353, | |
| "learning_rate": 1.3694449102800604e-05, | |
| "loss": 0.0053, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 3.1668900804289546, | |
| "grad_norm": 0.23425912857055664, | |
| "learning_rate": 1.3677679020627201e-05, | |
| "loss": 0.0055, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 3.1752680965147455, | |
| "grad_norm": 0.30269861221313477, | |
| "learning_rate": 1.3660908938453798e-05, | |
| "loss": 0.0051, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 3.1836461126005364, | |
| "grad_norm": 0.28305545449256897, | |
| "learning_rate": 1.3644138856280398e-05, | |
| "loss": 0.0057, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.192024128686327, | |
| "grad_norm": 0.313149631023407, | |
| "learning_rate": 1.3627368774106995e-05, | |
| "loss": 0.0048, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 3.200402144772118, | |
| "grad_norm": 0.30681276321411133, | |
| "learning_rate": 1.3610598691933592e-05, | |
| "loss": 0.0056, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 3.2087801608579087, | |
| "grad_norm": 0.17815206944942474, | |
| "learning_rate": 1.359382860976019e-05, | |
| "loss": 0.0055, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 3.2171581769436997, | |
| "grad_norm": 0.29173994064331055, | |
| "learning_rate": 1.3577058527586786e-05, | |
| "loss": 0.0057, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 3.2255361930294906, | |
| "grad_norm": 0.3214263916015625, | |
| "learning_rate": 1.3560288445413383e-05, | |
| "loss": 0.0063, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 3.2339142091152815, | |
| "grad_norm": 0.2251535803079605, | |
| "learning_rate": 1.354351836323998e-05, | |
| "loss": 0.006, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 3.2422922252010724, | |
| "grad_norm": 0.4358842372894287, | |
| "learning_rate": 1.3526748281066577e-05, | |
| "loss": 0.0047, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 3.2506702412868633, | |
| "grad_norm": 0.23471078276634216, | |
| "learning_rate": 1.3509978198893176e-05, | |
| "loss": 0.0058, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 3.2590482573726542, | |
| "grad_norm": 0.28291311860084534, | |
| "learning_rate": 1.3493208116719773e-05, | |
| "loss": 0.005, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 3.267426273458445, | |
| "grad_norm": 0.23490838706493378, | |
| "learning_rate": 1.347643803454637e-05, | |
| "loss": 0.0055, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.275804289544236, | |
| "grad_norm": 0.3439931571483612, | |
| "learning_rate": 1.3459667952372967e-05, | |
| "loss": 0.0053, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 3.284182305630027, | |
| "grad_norm": 0.19748039543628693, | |
| "learning_rate": 1.3442897870199564e-05, | |
| "loss": 0.0054, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 3.2925603217158175, | |
| "grad_norm": 0.3718995749950409, | |
| "learning_rate": 1.3426127788026163e-05, | |
| "loss": 0.0051, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 3.3009383378016084, | |
| "grad_norm": 0.49980103969573975, | |
| "learning_rate": 1.340935770585276e-05, | |
| "loss": 0.0056, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 3.3093163538873993, | |
| "grad_norm": 0.5253378748893738, | |
| "learning_rate": 1.3392587623679357e-05, | |
| "loss": 0.0052, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 3.3176943699731902, | |
| "grad_norm": 0.14330442249774933, | |
| "learning_rate": 1.3375817541505956e-05, | |
| "loss": 0.0058, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 3.326072386058981, | |
| "grad_norm": 0.2218172401189804, | |
| "learning_rate": 1.3359047459332553e-05, | |
| "loss": 0.006, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 3.334450402144772, | |
| "grad_norm": 0.4102313816547394, | |
| "learning_rate": 1.334227737715915e-05, | |
| "loss": 0.0062, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 3.342828418230563, | |
| "grad_norm": 0.37395352125167847, | |
| "learning_rate": 1.3325507294985747e-05, | |
| "loss": 0.0061, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 3.351206434316354, | |
| "grad_norm": 0.2626063823699951, | |
| "learning_rate": 1.3308737212812344e-05, | |
| "loss": 0.0053, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.351206434316354, | |
| "eval_loss": 0.03919154778122902, | |
| "eval_runtime": 0.3143, | |
| "eval_samples_per_second": 63.636, | |
| "eval_steps_per_second": 3.182, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.359584450402145, | |
| "grad_norm": 0.1779392808675766, | |
| "learning_rate": 1.329196713063894e-05, | |
| "loss": 0.0059, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 3.3679624664879357, | |
| "grad_norm": 0.37775570154190063, | |
| "learning_rate": 1.3275197048465538e-05, | |
| "loss": 0.0064, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 3.3763404825737267, | |
| "grad_norm": 0.26373809576034546, | |
| "learning_rate": 1.3258426966292135e-05, | |
| "loss": 0.0064, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 3.3847184986595176, | |
| "grad_norm": 0.330445259809494, | |
| "learning_rate": 1.3241656884118735e-05, | |
| "loss": 0.0063, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 3.3930965147453085, | |
| "grad_norm": 0.294837087392807, | |
| "learning_rate": 1.3224886801945332e-05, | |
| "loss": 0.0054, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 3.4014745308310994, | |
| "grad_norm": 0.16401290893554688, | |
| "learning_rate": 1.3208116719771929e-05, | |
| "loss": 0.0057, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 3.4098525469168903, | |
| "grad_norm": 0.5002830624580383, | |
| "learning_rate": 1.3191346637598526e-05, | |
| "loss": 0.0058, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 3.418230563002681, | |
| "grad_norm": 0.321429580450058, | |
| "learning_rate": 1.3174576555425123e-05, | |
| "loss": 0.0067, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 3.4266085790884717, | |
| "grad_norm": 0.28548842668533325, | |
| "learning_rate": 1.315780647325172e-05, | |
| "loss": 0.0055, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 3.4349865951742626, | |
| "grad_norm": 0.20685793459415436, | |
| "learning_rate": 1.3141036391078317e-05, | |
| "loss": 0.0057, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.4433646112600536, | |
| "grad_norm": 0.30995652079582214, | |
| "learning_rate": 1.3124266308904914e-05, | |
| "loss": 0.0055, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 3.4517426273458445, | |
| "grad_norm": 0.41536813974380493, | |
| "learning_rate": 1.3107496226731513e-05, | |
| "loss": 0.0062, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 3.4601206434316354, | |
| "grad_norm": 0.29047590494155884, | |
| "learning_rate": 1.309072614455811e-05, | |
| "loss": 0.0063, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 3.4684986595174263, | |
| "grad_norm": 0.23248636722564697, | |
| "learning_rate": 1.3073956062384707e-05, | |
| "loss": 0.006, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 3.4768766756032172, | |
| "grad_norm": 0.2018858790397644, | |
| "learning_rate": 1.3057185980211304e-05, | |
| "loss": 0.0055, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 3.485254691689008, | |
| "grad_norm": 0.3976786732673645, | |
| "learning_rate": 1.3040415898037901e-05, | |
| "loss": 0.0053, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 3.493632707774799, | |
| "grad_norm": 0.34822383522987366, | |
| "learning_rate": 1.3023645815864498e-05, | |
| "loss": 0.0065, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 3.5020107238605895, | |
| "grad_norm": 0.23607690632343292, | |
| "learning_rate": 1.3006875733691095e-05, | |
| "loss": 0.0059, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 3.5103887399463805, | |
| "grad_norm": 0.40768417716026306, | |
| "learning_rate": 1.2990105651517692e-05, | |
| "loss": 0.0057, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 3.5187667560321714, | |
| "grad_norm": 0.42595741152763367, | |
| "learning_rate": 1.2973335569344292e-05, | |
| "loss": 0.0066, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.5271447721179623, | |
| "grad_norm": 0.4516412615776062, | |
| "learning_rate": 1.295656548717089e-05, | |
| "loss": 0.0062, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 3.535522788203753, | |
| "grad_norm": 0.42684000730514526, | |
| "learning_rate": 1.2939795404997486e-05, | |
| "loss": 0.0059, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 3.543900804289544, | |
| "grad_norm": 0.5775489211082458, | |
| "learning_rate": 1.2923025322824083e-05, | |
| "loss": 0.0054, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 3.552278820375335, | |
| "grad_norm": 0.7901192307472229, | |
| "learning_rate": 1.290625524065068e-05, | |
| "loss": 0.0052, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 3.560656836461126, | |
| "grad_norm": 0.2339819371700287, | |
| "learning_rate": 1.2889485158477277e-05, | |
| "loss": 0.006, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 3.569034852546917, | |
| "grad_norm": 0.34695181250572205, | |
| "learning_rate": 1.2872715076303874e-05, | |
| "loss": 0.005, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 3.577412868632708, | |
| "grad_norm": 0.3339728116989136, | |
| "learning_rate": 1.2855944994130471e-05, | |
| "loss": 0.0056, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 3.5857908847184987, | |
| "grad_norm": 0.22279733419418335, | |
| "learning_rate": 1.2839174911957068e-05, | |
| "loss": 0.0062, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 3.5941689008042896, | |
| "grad_norm": 0.2896275222301483, | |
| "learning_rate": 1.2822404829783667e-05, | |
| "loss": 0.0062, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 3.6025469168900806, | |
| "grad_norm": 0.4125616252422333, | |
| "learning_rate": 1.2805634747610264e-05, | |
| "loss": 0.0064, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.6109249329758715, | |
| "grad_norm": 0.3267725110054016, | |
| "learning_rate": 1.2788864665436861e-05, | |
| "loss": 0.0065, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 3.6193029490616624, | |
| "grad_norm": 0.4519464373588562, | |
| "learning_rate": 1.277209458326346e-05, | |
| "loss": 0.0062, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 3.6276809651474533, | |
| "grad_norm": 0.4503564238548279, | |
| "learning_rate": 1.2755324501090057e-05, | |
| "loss": 0.0068, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 3.6360589812332442, | |
| "grad_norm": 0.14587004482746124, | |
| "learning_rate": 1.2738554418916654e-05, | |
| "loss": 0.0056, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 3.6444369973190347, | |
| "grad_norm": 0.3932003378868103, | |
| "learning_rate": 1.2721784336743251e-05, | |
| "loss": 0.0055, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 3.6528150134048256, | |
| "grad_norm": 0.41266146302223206, | |
| "learning_rate": 1.2705014254569848e-05, | |
| "loss": 0.0058, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 3.6611930294906165, | |
| "grad_norm": 0.40585076808929443, | |
| "learning_rate": 1.2688244172396447e-05, | |
| "loss": 0.0057, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 3.6695710455764075, | |
| "grad_norm": 0.4181327819824219, | |
| "learning_rate": 1.2671474090223044e-05, | |
| "loss": 0.0069, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 3.6779490616621984, | |
| "grad_norm": 0.20495828986167908, | |
| "learning_rate": 1.265470400804964e-05, | |
| "loss": 0.0066, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 3.6863270777479893, | |
| "grad_norm": 0.22110895812511444, | |
| "learning_rate": 1.2637933925876238e-05, | |
| "loss": 0.006, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.69470509383378, | |
| "grad_norm": 0.43901216983795166, | |
| "learning_rate": 1.2621163843702835e-05, | |
| "loss": 0.0064, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 3.703083109919571, | |
| "grad_norm": 0.34933629631996155, | |
| "learning_rate": 1.2604393761529432e-05, | |
| "loss": 0.0062, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 3.711461126005362, | |
| "grad_norm": 0.3028928339481354, | |
| "learning_rate": 1.2587623679356029e-05, | |
| "loss": 0.0065, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 3.719839142091153, | |
| "grad_norm": 0.25583240389823914, | |
| "learning_rate": 1.2570853597182626e-05, | |
| "loss": 0.0067, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 3.7282171581769434, | |
| "grad_norm": 0.15429948270320892, | |
| "learning_rate": 1.2554083515009226e-05, | |
| "loss": 0.0067, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 3.7365951742627344, | |
| "grad_norm": 0.35330894589424133, | |
| "learning_rate": 1.2537313432835823e-05, | |
| "loss": 0.0073, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 3.7449731903485253, | |
| "grad_norm": 0.5058137774467468, | |
| "learning_rate": 1.252054335066242e-05, | |
| "loss": 0.0061, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 3.753351206434316, | |
| "grad_norm": 0.27442070841789246, | |
| "learning_rate": 1.2503773268489017e-05, | |
| "loss": 0.0072, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 3.761729222520107, | |
| "grad_norm": 0.34210237860679626, | |
| "learning_rate": 1.2487003186315614e-05, | |
| "loss": 0.0059, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 3.770107238605898, | |
| "grad_norm": 0.36563077569007874, | |
| "learning_rate": 1.2470233104142211e-05, | |
| "loss": 0.0064, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.778485254691689, | |
| "grad_norm": 0.3370627760887146, | |
| "learning_rate": 1.2453463021968808e-05, | |
| "loss": 0.0054, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 3.78686327077748, | |
| "grad_norm": 0.17612957954406738, | |
| "learning_rate": 1.2436692939795405e-05, | |
| "loss": 0.0071, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 3.795241286863271, | |
| "grad_norm": 0.33844587206840515, | |
| "learning_rate": 1.2419922857622004e-05, | |
| "loss": 0.0059, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 3.8036193029490617, | |
| "grad_norm": 0.22707916796207428, | |
| "learning_rate": 1.2403152775448601e-05, | |
| "loss": 0.0062, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 3.8119973190348526, | |
| "grad_norm": 0.2754456102848053, | |
| "learning_rate": 1.2386382693275198e-05, | |
| "loss": 0.0067, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 3.8203753351206435, | |
| "grad_norm": 0.411072313785553, | |
| "learning_rate": 1.2369612611101795e-05, | |
| "loss": 0.0066, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 3.8287533512064345, | |
| "grad_norm": 0.3319416344165802, | |
| "learning_rate": 1.2352842528928392e-05, | |
| "loss": 0.0061, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 3.8371313672922254, | |
| "grad_norm": 0.20878171920776367, | |
| "learning_rate": 1.2336072446754989e-05, | |
| "loss": 0.0059, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 3.8455093833780163, | |
| "grad_norm": 0.1912664771080017, | |
| "learning_rate": 1.2319302364581586e-05, | |
| "loss": 0.0059, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 3.853887399463807, | |
| "grad_norm": 0.3744626045227051, | |
| "learning_rate": 1.2302532282408185e-05, | |
| "loss": 0.006, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.862265415549598, | |
| "grad_norm": 0.37646523118019104, | |
| "learning_rate": 1.2285762200234783e-05, | |
| "loss": 0.0056, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 3.8706434316353886, | |
| "grad_norm": 0.17005406320095062, | |
| "learning_rate": 1.226899211806138e-05, | |
| "loss": 0.0062, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 3.8790214477211795, | |
| "grad_norm": 0.35684868693351746, | |
| "learning_rate": 1.2252222035887977e-05, | |
| "loss": 0.0069, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 3.8873994638069704, | |
| "grad_norm": 0.37645256519317627, | |
| "learning_rate": 1.2235451953714574e-05, | |
| "loss": 0.0071, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 3.8957774798927614, | |
| "grad_norm": 0.5175814032554626, | |
| "learning_rate": 1.2218681871541171e-05, | |
| "loss": 0.0059, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 3.9041554959785523, | |
| "grad_norm": 0.3317829966545105, | |
| "learning_rate": 1.2201911789367768e-05, | |
| "loss": 0.0064, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 3.912533512064343, | |
| "grad_norm": 0.2565181255340576, | |
| "learning_rate": 1.2185141707194365e-05, | |
| "loss": 0.0069, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 3.920911528150134, | |
| "grad_norm": 0.38244709372520447, | |
| "learning_rate": 1.2168371625020963e-05, | |
| "loss": 0.0068, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 3.929289544235925, | |
| "grad_norm": 0.4136451184749603, | |
| "learning_rate": 1.2151601542847561e-05, | |
| "loss": 0.0067, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 3.937667560321716, | |
| "grad_norm": 0.2662147581577301, | |
| "learning_rate": 1.213483146067416e-05, | |
| "loss": 0.0061, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.946045576407507, | |
| "grad_norm": 0.291955828666687, | |
| "learning_rate": 1.2118061378500757e-05, | |
| "loss": 0.0058, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 3.9544235924932973, | |
| "grad_norm": 0.30662792921066284, | |
| "learning_rate": 1.2101291296327354e-05, | |
| "loss": 0.0071, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 3.9628016085790883, | |
| "grad_norm": 0.5435032844543457, | |
| "learning_rate": 1.2084521214153951e-05, | |
| "loss": 0.0071, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 3.971179624664879, | |
| "grad_norm": 0.2924433648586273, | |
| "learning_rate": 1.2067751131980548e-05, | |
| "loss": 0.0074, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 3.97955764075067, | |
| "grad_norm": 0.47101885080337524, | |
| "learning_rate": 1.2050981049807145e-05, | |
| "loss": 0.0065, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 3.987935656836461, | |
| "grad_norm": 0.39184531569480896, | |
| "learning_rate": 1.2034210967633742e-05, | |
| "loss": 0.0065, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 3.996313672922252, | |
| "grad_norm": 0.27226710319519043, | |
| "learning_rate": 1.201744088546034e-05, | |
| "loss": 0.0058, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 4.004691689008043, | |
| "grad_norm": 0.37524715065956116, | |
| "learning_rate": 1.2000670803286938e-05, | |
| "loss": 0.0045, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 4.013069705093834, | |
| "grad_norm": 0.0983668640255928, | |
| "learning_rate": 1.1983900721113535e-05, | |
| "loss": 0.0023, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 4.021447721179625, | |
| "grad_norm": 0.32168978452682495, | |
| "learning_rate": 1.1967130638940132e-05, | |
| "loss": 0.0024, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.029825737265416, | |
| "grad_norm": 0.2205764651298523, | |
| "learning_rate": 1.1950360556766729e-05, | |
| "loss": 0.0022, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 4.0382037533512065, | |
| "grad_norm": 0.21505975723266602, | |
| "learning_rate": 1.1933590474593326e-05, | |
| "loss": 0.0023, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 4.046581769436997, | |
| "grad_norm": 0.0701180100440979, | |
| "learning_rate": 1.1916820392419923e-05, | |
| "loss": 0.0019, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 4.054959785522788, | |
| "grad_norm": 0.3256973624229431, | |
| "learning_rate": 1.190005031024652e-05, | |
| "loss": 0.0026, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 4.063337801608579, | |
| "grad_norm": 0.308699369430542, | |
| "learning_rate": 1.1883280228073117e-05, | |
| "loss": 0.0024, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 4.07171581769437, | |
| "grad_norm": 0.29565149545669556, | |
| "learning_rate": 1.1866510145899717e-05, | |
| "loss": 0.0026, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 4.080093833780161, | |
| "grad_norm": 0.23600581288337708, | |
| "learning_rate": 1.1849740063726314e-05, | |
| "loss": 0.0021, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 4.088471849865952, | |
| "grad_norm": 0.188632994890213, | |
| "learning_rate": 1.1832969981552911e-05, | |
| "loss": 0.0025, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 4.096849865951743, | |
| "grad_norm": 0.35330700874328613, | |
| "learning_rate": 1.1816199899379508e-05, | |
| "loss": 0.0025, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 4.105227882037534, | |
| "grad_norm": 0.14944002032279968, | |
| "learning_rate": 1.1799429817206105e-05, | |
| "loss": 0.0026, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.113605898123325, | |
| "grad_norm": 0.23015423119068146, | |
| "learning_rate": 1.1782659735032702e-05, | |
| "loss": 0.0023, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 4.121983914209116, | |
| "grad_norm": 0.43203842639923096, | |
| "learning_rate": 1.17658896528593e-05, | |
| "loss": 0.0024, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 4.130361930294907, | |
| "grad_norm": 0.16286316514015198, | |
| "learning_rate": 1.1749119570685896e-05, | |
| "loss": 0.0024, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 4.138739946380697, | |
| "grad_norm": 0.44359683990478516, | |
| "learning_rate": 1.1732349488512495e-05, | |
| "loss": 0.0024, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 4.147117962466488, | |
| "grad_norm": 0.07397326827049255, | |
| "learning_rate": 1.1715579406339092e-05, | |
| "loss": 0.0027, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 4.1554959785522785, | |
| "grad_norm": 0.07963547110557556, | |
| "learning_rate": 1.1698809324165689e-05, | |
| "loss": 0.0026, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 4.163873994638069, | |
| "grad_norm": 0.27886438369750977, | |
| "learning_rate": 1.1682039241992286e-05, | |
| "loss": 0.0026, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 4.17225201072386, | |
| "grad_norm": 0.11975943297147751, | |
| "learning_rate": 1.1665269159818883e-05, | |
| "loss": 0.0025, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 4.180630026809651, | |
| "grad_norm": 0.1815500408411026, | |
| "learning_rate": 1.1648499077645482e-05, | |
| "loss": 0.0024, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 4.189008042895442, | |
| "grad_norm": 0.7938502430915833, | |
| "learning_rate": 1.1631728995472079e-05, | |
| "loss": 0.0024, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.197386058981233, | |
| "grad_norm": 0.32321181893348694, | |
| "learning_rate": 1.1614958913298676e-05, | |
| "loss": 0.0026, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 4.205764075067024, | |
| "grad_norm": 0.09507790207862854, | |
| "learning_rate": 1.1598188831125274e-05, | |
| "loss": 0.0023, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 4.214142091152815, | |
| "grad_norm": 0.31748858094215393, | |
| "learning_rate": 1.1581418748951872e-05, | |
| "loss": 0.0025, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 4.222520107238606, | |
| "grad_norm": 0.39395052194595337, | |
| "learning_rate": 1.1564648666778469e-05, | |
| "loss": 0.0027, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 4.230898123324397, | |
| "grad_norm": 0.12810911238193512, | |
| "learning_rate": 1.1547878584605066e-05, | |
| "loss": 0.0023, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 4.239276139410188, | |
| "grad_norm": 0.18891964852809906, | |
| "learning_rate": 1.1531108502431663e-05, | |
| "loss": 0.0024, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 4.247654155495979, | |
| "grad_norm": 0.16266460716724396, | |
| "learning_rate": 1.151433842025826e-05, | |
| "loss": 0.0032, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 4.2560321715817695, | |
| "grad_norm": 0.3381274342536926, | |
| "learning_rate": 1.1497568338084857e-05, | |
| "loss": 0.0027, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 4.26441018766756, | |
| "grad_norm": 0.7718698978424072, | |
| "learning_rate": 1.1480798255911454e-05, | |
| "loss": 0.0026, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 4.272788203753351, | |
| "grad_norm": 0.7734161615371704, | |
| "learning_rate": 1.1464028173738054e-05, | |
| "loss": 0.0029, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.281166219839142, | |
| "grad_norm": 0.24889783561229706, | |
| "learning_rate": 1.1447258091564651e-05, | |
| "loss": 0.0027, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 4.289544235924933, | |
| "grad_norm": 0.1416139155626297, | |
| "learning_rate": 1.1430488009391248e-05, | |
| "loss": 0.0023, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 4.297922252010724, | |
| "grad_norm": 0.2628386318683624, | |
| "learning_rate": 1.1413717927217845e-05, | |
| "loss": 0.0026, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 4.306300268096515, | |
| "grad_norm": 0.1891651153564453, | |
| "learning_rate": 1.1396947845044442e-05, | |
| "loss": 0.0021, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 4.314678284182306, | |
| "grad_norm": 0.33927446603775024, | |
| "learning_rate": 1.1380177762871039e-05, | |
| "loss": 0.0023, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 4.323056300268097, | |
| "grad_norm": 0.2871659994125366, | |
| "learning_rate": 1.1363407680697636e-05, | |
| "loss": 0.0025, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 4.331434316353888, | |
| "grad_norm": 0.16000057756900787, | |
| "learning_rate": 1.1346637598524233e-05, | |
| "loss": 0.0028, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 4.339812332439679, | |
| "grad_norm": 0.2464749813079834, | |
| "learning_rate": 1.1329867516350832e-05, | |
| "loss": 0.0027, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 4.348190348525469, | |
| "grad_norm": 0.22312916815280914, | |
| "learning_rate": 1.1313097434177429e-05, | |
| "loss": 0.0021, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 4.35656836461126, | |
| "grad_norm": 0.04916452243924141, | |
| "learning_rate": 1.1296327352004026e-05, | |
| "loss": 0.0028, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.3649463806970505, | |
| "grad_norm": 0.09145969897508621, | |
| "learning_rate": 1.1279557269830623e-05, | |
| "loss": 0.0024, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 4.3733243967828415, | |
| "grad_norm": 0.15269909799098969, | |
| "learning_rate": 1.126278718765722e-05, | |
| "loss": 0.0027, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 4.381702412868632, | |
| "grad_norm": 0.14302955567836761, | |
| "learning_rate": 1.1246017105483817e-05, | |
| "loss": 0.0026, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 4.390080428954423, | |
| "grad_norm": 0.38677042722702026, | |
| "learning_rate": 1.1229247023310414e-05, | |
| "loss": 0.0025, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 4.398458445040214, | |
| "grad_norm": 0.18998374044895172, | |
| "learning_rate": 1.1212476941137011e-05, | |
| "loss": 0.0026, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 4.406836461126005, | |
| "grad_norm": 0.07754815369844437, | |
| "learning_rate": 1.1195706858963611e-05, | |
| "loss": 0.0031, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 4.415214477211796, | |
| "grad_norm": 0.31846073269844055, | |
| "learning_rate": 1.1178936776790208e-05, | |
| "loss": 0.0024, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 4.423592493297587, | |
| "grad_norm": 0.3100847601890564, | |
| "learning_rate": 1.1162166694616805e-05, | |
| "loss": 0.0029, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 4.431970509383378, | |
| "grad_norm": 0.08093760907649994, | |
| "learning_rate": 1.1145396612443402e-05, | |
| "loss": 0.0023, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 4.440348525469169, | |
| "grad_norm": 0.12065700441598892, | |
| "learning_rate": 1.112862653027e-05, | |
| "loss": 0.0024, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.44872654155496, | |
| "grad_norm": 0.18668776750564575, | |
| "learning_rate": 1.1111856448096596e-05, | |
| "loss": 0.0021, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 4.457104557640751, | |
| "grad_norm": 0.06442166119813919, | |
| "learning_rate": 1.1095086365923193e-05, | |
| "loss": 0.0022, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 4.465482573726542, | |
| "grad_norm": 0.3723543584346771, | |
| "learning_rate": 1.107831628374979e-05, | |
| "loss": 0.0026, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 4.4738605898123325, | |
| "grad_norm": 0.17430204153060913, | |
| "learning_rate": 1.1061546201576389e-05, | |
| "loss": 0.0025, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 4.482238605898123, | |
| "grad_norm": 0.24499832093715668, | |
| "learning_rate": 1.1044776119402986e-05, | |
| "loss": 0.0026, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 4.490616621983914, | |
| "grad_norm": 0.12160493433475494, | |
| "learning_rate": 1.1028006037229583e-05, | |
| "loss": 0.0027, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 4.498994638069705, | |
| "grad_norm": 0.13980576395988464, | |
| "learning_rate": 1.101123595505618e-05, | |
| "loss": 0.0025, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 4.507372654155496, | |
| "grad_norm": 0.9759100079536438, | |
| "learning_rate": 1.0994465872882779e-05, | |
| "loss": 0.0027, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 4.515750670241287, | |
| "grad_norm": 0.26894333958625793, | |
| "learning_rate": 1.0977695790709376e-05, | |
| "loss": 0.0025, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 4.524128686327078, | |
| "grad_norm": 0.17025631666183472, | |
| "learning_rate": 1.0960925708535973e-05, | |
| "loss": 0.0026, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.532506702412869, | |
| "grad_norm": 0.38619130849838257, | |
| "learning_rate": 1.094415562636257e-05, | |
| "loss": 0.0027, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 4.54088471849866, | |
| "grad_norm": 0.2916272282600403, | |
| "learning_rate": 1.0927385544189169e-05, | |
| "loss": 0.003, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 4.549262734584451, | |
| "grad_norm": 0.26764917373657227, | |
| "learning_rate": 1.0910615462015766e-05, | |
| "loss": 0.0024, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 4.557640750670242, | |
| "grad_norm": 0.1611230969429016, | |
| "learning_rate": 1.0893845379842363e-05, | |
| "loss": 0.003, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 4.566018766756033, | |
| "grad_norm": 0.23360604047775269, | |
| "learning_rate": 1.087707529766896e-05, | |
| "loss": 0.003, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 4.5743967828418235, | |
| "grad_norm": 0.19292519986629486, | |
| "learning_rate": 1.0860305215495557e-05, | |
| "loss": 0.0026, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 4.582774798927614, | |
| "grad_norm": 0.4826861023902893, | |
| "learning_rate": 1.0843535133322154e-05, | |
| "loss": 0.0029, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 4.591152815013404, | |
| "grad_norm": 0.11598275601863861, | |
| "learning_rate": 1.082676505114875e-05, | |
| "loss": 0.003, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 4.599530831099195, | |
| "grad_norm": 0.2721264958381653, | |
| "learning_rate": 1.0809994968975348e-05, | |
| "loss": 0.0032, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 4.607908847184986, | |
| "grad_norm": 0.1644926518201828, | |
| "learning_rate": 1.0793224886801945e-05, | |
| "loss": 0.003, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.616286863270777, | |
| "grad_norm": 0.17666374146938324, | |
| "learning_rate": 1.0776454804628545e-05, | |
| "loss": 0.0028, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 4.624664879356568, | |
| "grad_norm": 0.21726448833942413, | |
| "learning_rate": 1.0759684722455142e-05, | |
| "loss": 0.0025, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 4.633042895442359, | |
| "grad_norm": 0.21972903609275818, | |
| "learning_rate": 1.0742914640281739e-05, | |
| "loss": 0.0028, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 4.64142091152815, | |
| "grad_norm": 0.2247893065214157, | |
| "learning_rate": 1.0726144558108336e-05, | |
| "loss": 0.0026, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 4.649798927613941, | |
| "grad_norm": 0.1289321780204773, | |
| "learning_rate": 1.0709374475934933e-05, | |
| "loss": 0.0032, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 4.658176943699732, | |
| "grad_norm": 0.4954499304294586, | |
| "learning_rate": 1.069260439376153e-05, | |
| "loss": 0.0028, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 4.666554959785523, | |
| "grad_norm": 0.18240614235401154, | |
| "learning_rate": 1.0675834311588127e-05, | |
| "loss": 0.0027, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 4.674932975871314, | |
| "grad_norm": 0.1933482438325882, | |
| "learning_rate": 1.0659064229414724e-05, | |
| "loss": 0.0031, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 4.6833109919571045, | |
| "grad_norm": 0.11678989976644516, | |
| "learning_rate": 1.0642294147241323e-05, | |
| "loss": 0.0032, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 4.6916890080428955, | |
| "grad_norm": 0.18577493727207184, | |
| "learning_rate": 1.062552406506792e-05, | |
| "loss": 0.0026, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.700067024128686, | |
| "grad_norm": 0.07589305937290192, | |
| "learning_rate": 1.0608753982894517e-05, | |
| "loss": 0.003, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 4.708445040214477, | |
| "grad_norm": 0.20295588672161102, | |
| "learning_rate": 1.0591983900721114e-05, | |
| "loss": 0.0028, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 4.716823056300268, | |
| "grad_norm": 0.27186664938926697, | |
| "learning_rate": 1.0575213818547711e-05, | |
| "loss": 0.0033, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 4.725201072386059, | |
| "grad_norm": 0.3505285382270813, | |
| "learning_rate": 1.0558443736374308e-05, | |
| "loss": 0.0029, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 4.73357908847185, | |
| "grad_norm": 0.4643058776855469, | |
| "learning_rate": 1.0541673654200905e-05, | |
| "loss": 0.0033, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 4.741957104557641, | |
| "grad_norm": 0.29970914125442505, | |
| "learning_rate": 1.0524903572027502e-05, | |
| "loss": 0.0029, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 4.750335120643432, | |
| "grad_norm": 0.3563650846481323, | |
| "learning_rate": 1.0508133489854102e-05, | |
| "loss": 0.0026, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 4.758713136729223, | |
| "grad_norm": 0.2816406190395355, | |
| "learning_rate": 1.04913634076807e-05, | |
| "loss": 0.0033, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 4.767091152815014, | |
| "grad_norm": 0.3998458981513977, | |
| "learning_rate": 1.0474593325507296e-05, | |
| "loss": 0.0033, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 4.775469168900805, | |
| "grad_norm": 0.29152771830558777, | |
| "learning_rate": 1.0457823243333893e-05, | |
| "loss": 0.0031, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.783847184986596, | |
| "grad_norm": 0.3733079731464386, | |
| "learning_rate": 1.044105316116049e-05, | |
| "loss": 0.0031, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 4.792225201072386, | |
| "grad_norm": 0.2442307472229004, | |
| "learning_rate": 1.0424283078987087e-05, | |
| "loss": 0.0032, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 4.8006032171581765, | |
| "grad_norm": 0.6178602576255798, | |
| "learning_rate": 1.0407512996813684e-05, | |
| "loss": 0.0029, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 4.808981233243967, | |
| "grad_norm": 0.3169240951538086, | |
| "learning_rate": 1.0390742914640281e-05, | |
| "loss": 0.0029, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 4.817359249329758, | |
| "grad_norm": 0.21497473120689392, | |
| "learning_rate": 1.037397283246688e-05, | |
| "loss": 0.0032, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 4.825737265415549, | |
| "grad_norm": 0.4647163450717926, | |
| "learning_rate": 1.0357202750293477e-05, | |
| "loss": 0.0027, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 4.83411528150134, | |
| "grad_norm": 0.18522508442401886, | |
| "learning_rate": 1.0340432668120076e-05, | |
| "loss": 0.0026, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 4.842493297587131, | |
| "grad_norm": 0.201819509267807, | |
| "learning_rate": 1.0323662585946673e-05, | |
| "loss": 0.0025, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 4.850871313672922, | |
| "grad_norm": 0.2343200445175171, | |
| "learning_rate": 1.030689250377327e-05, | |
| "loss": 0.0027, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 4.859249329758713, | |
| "grad_norm": 0.164067804813385, | |
| "learning_rate": 1.0290122421599867e-05, | |
| "loss": 0.0032, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.867627345844504, | |
| "grad_norm": 1.0820327997207642, | |
| "learning_rate": 1.0273352339426464e-05, | |
| "loss": 0.0033, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 4.876005361930295, | |
| "grad_norm": 0.18911249935626984, | |
| "learning_rate": 1.0256582257253061e-05, | |
| "loss": 0.0024, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 4.884383378016086, | |
| "grad_norm": 0.1403694599866867, | |
| "learning_rate": 1.023981217507966e-05, | |
| "loss": 0.0028, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 4.892761394101877, | |
| "grad_norm": 0.20968593657016754, | |
| "learning_rate": 1.0223042092906257e-05, | |
| "loss": 0.0034, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 4.9011394101876675, | |
| "grad_norm": 0.26832762360572815, | |
| "learning_rate": 1.0206272010732854e-05, | |
| "loss": 0.0035, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 4.909517426273458, | |
| "grad_norm": 0.26747608184814453, | |
| "learning_rate": 1.018950192855945e-05, | |
| "loss": 0.0034, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 4.917895442359249, | |
| "grad_norm": 0.999813437461853, | |
| "learning_rate": 1.0172731846386048e-05, | |
| "loss": 0.0029, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 4.92627345844504, | |
| "grad_norm": 0.33220162987709045, | |
| "learning_rate": 1.0155961764212645e-05, | |
| "loss": 0.0033, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 4.934651474530831, | |
| "grad_norm": 0.13821391761302948, | |
| "learning_rate": 1.0139191682039242e-05, | |
| "loss": 0.003, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 4.943029490616622, | |
| "grad_norm": 0.33970770239830017, | |
| "learning_rate": 1.0122421599865839e-05, | |
| "loss": 0.0028, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.951407506702413, | |
| "grad_norm": 0.09418370574712753, | |
| "learning_rate": 1.0105651517692439e-05, | |
| "loss": 0.0029, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 4.959785522788204, | |
| "grad_norm": 0.10416509956121445, | |
| "learning_rate": 1.0088881435519036e-05, | |
| "loss": 0.0031, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 4.968163538873995, | |
| "grad_norm": 0.7082052230834961, | |
| "learning_rate": 1.0072111353345633e-05, | |
| "loss": 0.0025, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 4.976541554959786, | |
| "grad_norm": 0.3095639944076538, | |
| "learning_rate": 1.005534127117223e-05, | |
| "loss": 0.0035, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 4.984919571045577, | |
| "grad_norm": 0.119889035820961, | |
| "learning_rate": 1.0038571188998827e-05, | |
| "loss": 0.0033, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 4.993297587131368, | |
| "grad_norm": 0.29492849111557007, | |
| "learning_rate": 1.0021801106825424e-05, | |
| "loss": 0.0029, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 5.0016756032171585, | |
| "grad_norm": 0.0865137055516243, | |
| "learning_rate": 1.0005031024652021e-05, | |
| "loss": 0.0028, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 5.0100536193029495, | |
| "grad_norm": 0.22691671550273895, | |
| "learning_rate": 9.98826094247862e-06, | |
| "loss": 0.0011, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 5.01843163538874, | |
| "grad_norm": 0.0516495518386364, | |
| "learning_rate": 9.971490860305217e-06, | |
| "loss": 0.0012, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 5.02680965147453, | |
| "grad_norm": 0.08190739154815674, | |
| "learning_rate": 9.954720778131814e-06, | |
| "loss": 0.0011, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.02680965147453, | |
| "eval_loss": 0.05592558532953262, | |
| "eval_runtime": 0.3145, | |
| "eval_samples_per_second": 63.586, | |
| "eval_steps_per_second": 3.179, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.035187667560321, | |
| "grad_norm": 0.025882409885525703, | |
| "learning_rate": 9.937950695958411e-06, | |
| "loss": 0.0008, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 5.043565683646112, | |
| "grad_norm": 0.12556754052639008, | |
| "learning_rate": 9.921180613785008e-06, | |
| "loss": 0.001, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 5.051943699731903, | |
| "grad_norm": 0.09527916461229324, | |
| "learning_rate": 9.904410531611605e-06, | |
| "loss": 0.0011, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 5.060321715817694, | |
| "grad_norm": 0.30216673016548157, | |
| "learning_rate": 9.887640449438202e-06, | |
| "loss": 0.0013, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 5.068699731903485, | |
| "grad_norm": 0.030431820079684258, | |
| "learning_rate": 9.8708703672648e-06, | |
| "loss": 0.0011, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 5.077077747989276, | |
| "grad_norm": 0.08387458324432373, | |
| "learning_rate": 9.854100285091398e-06, | |
| "loss": 0.0008, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 5.085455764075067, | |
| "grad_norm": 0.12422385066747665, | |
| "learning_rate": 9.837330202917995e-06, | |
| "loss": 0.001, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 5.093833780160858, | |
| "grad_norm": 0.05497809499502182, | |
| "learning_rate": 9.820560120744592e-06, | |
| "loss": 0.001, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 5.102211796246649, | |
| "grad_norm": 0.21757960319519043, | |
| "learning_rate": 9.803790038571189e-06, | |
| "loss": 0.0011, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 5.11058981233244, | |
| "grad_norm": 0.2624013423919678, | |
| "learning_rate": 9.787019956397787e-06, | |
| "loss": 0.0012, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.1189678284182305, | |
| "grad_norm": 0.12058177590370178, | |
| "learning_rate": 9.770249874224384e-06, | |
| "loss": 0.0011, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 5.127345844504021, | |
| "grad_norm": 0.09058215469121933, | |
| "learning_rate": 9.753479792050981e-06, | |
| "loss": 0.001, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 5.135723860589812, | |
| "grad_norm": 0.07257585972547531, | |
| "learning_rate": 9.736709709877578e-06, | |
| "loss": 0.0011, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 5.144101876675603, | |
| "grad_norm": 0.07394664734601974, | |
| "learning_rate": 9.719939627704177e-06, | |
| "loss": 0.0014, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 5.152479892761394, | |
| "grad_norm": 0.08835545182228088, | |
| "learning_rate": 9.703169545530774e-06, | |
| "loss": 0.0011, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 5.160857908847185, | |
| "grad_norm": 0.05140378698706627, | |
| "learning_rate": 9.686399463357371e-06, | |
| "loss": 0.0011, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 5.169235924932976, | |
| "grad_norm": 0.11998426169157028, | |
| "learning_rate": 9.669629381183968e-06, | |
| "loss": 0.0013, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 5.177613941018767, | |
| "grad_norm": 0.11740259826183319, | |
| "learning_rate": 9.652859299010567e-06, | |
| "loss": 0.0011, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 5.185991957104558, | |
| "grad_norm": 0.3295953869819641, | |
| "learning_rate": 9.636089216837164e-06, | |
| "loss": 0.0011, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 5.194369973190349, | |
| "grad_norm": 0.21252810955047607, | |
| "learning_rate": 9.619319134663761e-06, | |
| "loss": 0.0012, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.20274798927614, | |
| "grad_norm": 0.1684710681438446, | |
| "learning_rate": 9.602549052490358e-06, | |
| "loss": 0.001, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 5.211126005361931, | |
| "grad_norm": 0.30938273668289185, | |
| "learning_rate": 9.585778970316955e-06, | |
| "loss": 0.0013, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 5.2195040214477215, | |
| "grad_norm": 0.13435423374176025, | |
| "learning_rate": 9.569008888143552e-06, | |
| "loss": 0.0012, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 5.227882037533512, | |
| "grad_norm": 0.24395543336868286, | |
| "learning_rate": 9.552238805970149e-06, | |
| "loss": 0.001, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 5.236260053619303, | |
| "grad_norm": 0.07691800594329834, | |
| "learning_rate": 9.535468723796748e-06, | |
| "loss": 0.0011, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 5.244638069705093, | |
| "grad_norm": 0.07506980746984482, | |
| "learning_rate": 9.518698641623345e-06, | |
| "loss": 0.001, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 5.253016085790884, | |
| "grad_norm": 0.09802160412073135, | |
| "learning_rate": 9.501928559449942e-06, | |
| "loss": 0.0009, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 5.261394101876675, | |
| "grad_norm": 0.08386828005313873, | |
| "learning_rate": 9.485158477276539e-06, | |
| "loss": 0.0011, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 5.269772117962466, | |
| "grad_norm": 0.23838509619235992, | |
| "learning_rate": 9.468388395103136e-06, | |
| "loss": 0.0011, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 5.278150134048257, | |
| "grad_norm": 0.06687796860933304, | |
| "learning_rate": 9.451618312929734e-06, | |
| "loss": 0.001, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.286528150134048, | |
| "grad_norm": 0.06589027494192123, | |
| "learning_rate": 9.434848230756332e-06, | |
| "loss": 0.0011, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 5.294906166219839, | |
| "grad_norm": 0.21493591368198395, | |
| "learning_rate": 9.418078148582929e-06, | |
| "loss": 0.0012, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 5.30328418230563, | |
| "grad_norm": 0.20591777563095093, | |
| "learning_rate": 9.401308066409526e-06, | |
| "loss": 0.0012, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 5.311662198391421, | |
| "grad_norm": 0.16145972907543182, | |
| "learning_rate": 9.384537984236124e-06, | |
| "loss": 0.0012, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 5.320040214477212, | |
| "grad_norm": 0.07270830124616623, | |
| "learning_rate": 9.367767902062721e-06, | |
| "loss": 0.001, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 5.328418230563003, | |
| "grad_norm": 0.12921959161758423, | |
| "learning_rate": 9.350997819889318e-06, | |
| "loss": 0.0012, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 5.3367962466487935, | |
| "grad_norm": 0.24689610302448273, | |
| "learning_rate": 9.334227737715915e-06, | |
| "loss": 0.0011, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 5.345174262734584, | |
| "grad_norm": 0.14935541152954102, | |
| "learning_rate": 9.317457655542514e-06, | |
| "loss": 0.001, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 5.353552278820375, | |
| "grad_norm": 0.05026477575302124, | |
| "learning_rate": 9.300687573369111e-06, | |
| "loss": 0.0009, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 5.361930294906166, | |
| "grad_norm": 0.11298377066850662, | |
| "learning_rate": 9.283917491195708e-06, | |
| "loss": 0.0012, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.370308310991957, | |
| "grad_norm": 0.04898526519536972, | |
| "learning_rate": 9.267147409022305e-06, | |
| "loss": 0.0015, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 5.378686327077748, | |
| "grad_norm": 0.07678736001253128, | |
| "learning_rate": 9.250377326848902e-06, | |
| "loss": 0.0013, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 5.387064343163539, | |
| "grad_norm": 0.08714163303375244, | |
| "learning_rate": 9.233607244675499e-06, | |
| "loss": 0.001, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 5.39544235924933, | |
| "grad_norm": 0.14269877970218658, | |
| "learning_rate": 9.216837162502098e-06, | |
| "loss": 0.001, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 5.403820375335121, | |
| "grad_norm": 0.0840928927063942, | |
| "learning_rate": 9.200067080328695e-06, | |
| "loss": 0.0012, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 5.412198391420912, | |
| "grad_norm": 0.049975261092185974, | |
| "learning_rate": 9.183296998155292e-06, | |
| "loss": 0.0013, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 5.420576407506703, | |
| "grad_norm": 0.10942261666059494, | |
| "learning_rate": 9.166526915981889e-06, | |
| "loss": 0.0013, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 5.428954423592494, | |
| "grad_norm": 0.09510983526706696, | |
| "learning_rate": 9.149756833808486e-06, | |
| "loss": 0.0013, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 5.4373324396782845, | |
| "grad_norm": 0.07059191167354584, | |
| "learning_rate": 9.132986751635083e-06, | |
| "loss": 0.0013, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 5.445710455764075, | |
| "grad_norm": 0.09271156042814255, | |
| "learning_rate": 9.116216669461682e-06, | |
| "loss": 0.0014, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.454088471849866, | |
| "grad_norm": 0.5445387363433838, | |
| "learning_rate": 9.099446587288279e-06, | |
| "loss": 0.0017, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 5.462466487935657, | |
| "grad_norm": 0.5922443270683289, | |
| "learning_rate": 9.082676505114876e-06, | |
| "loss": 0.0012, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 5.470844504021448, | |
| "grad_norm": 0.08508787304162979, | |
| "learning_rate": 9.065906422941473e-06, | |
| "loss": 0.0013, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 5.479222520107238, | |
| "grad_norm": 0.10297244787216187, | |
| "learning_rate": 9.049136340768071e-06, | |
| "loss": 0.0015, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 5.487600536193029, | |
| "grad_norm": 0.20003701746463776, | |
| "learning_rate": 9.032366258594668e-06, | |
| "loss": 0.001, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 5.49597855227882, | |
| "grad_norm": 0.33047032356262207, | |
| "learning_rate": 9.015596176421265e-06, | |
| "loss": 0.0011, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 5.504356568364611, | |
| "grad_norm": 0.12265091389417648, | |
| "learning_rate": 8.998826094247862e-06, | |
| "loss": 0.0012, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 5.512734584450402, | |
| "grad_norm": 0.1573624163866043, | |
| "learning_rate": 8.982056012074461e-06, | |
| "loss": 0.0013, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 5.521112600536193, | |
| "grad_norm": 0.13570530712604523, | |
| "learning_rate": 8.965285929901058e-06, | |
| "loss": 0.0012, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 5.529490616621984, | |
| "grad_norm": 0.1362573504447937, | |
| "learning_rate": 8.948515847727655e-06, | |
| "loss": 0.0015, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.537868632707775, | |
| "grad_norm": 0.16211983561515808, | |
| "learning_rate": 8.931745765554252e-06, | |
| "loss": 0.0013, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 5.5462466487935655, | |
| "grad_norm": 0.1816491037607193, | |
| "learning_rate": 8.914975683380849e-06, | |
| "loss": 0.0013, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 5.5546246648793565, | |
| "grad_norm": 0.12222578376531601, | |
| "learning_rate": 8.898205601207446e-06, | |
| "loss": 0.0012, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 5.563002680965147, | |
| "grad_norm": 0.24587097764015198, | |
| "learning_rate": 8.881435519034045e-06, | |
| "loss": 0.0014, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 5.571380697050938, | |
| "grad_norm": 0.13261163234710693, | |
| "learning_rate": 8.864665436860642e-06, | |
| "loss": 0.0012, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 5.579758713136729, | |
| "grad_norm": 0.1050226092338562, | |
| "learning_rate": 8.847895354687239e-06, | |
| "loss": 0.0013, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 5.58813672922252, | |
| "grad_norm": 0.1119270995259285, | |
| "learning_rate": 8.831125272513836e-06, | |
| "loss": 0.0013, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 5.596514745308311, | |
| "grad_norm": 0.41666361689567566, | |
| "learning_rate": 8.814355190340433e-06, | |
| "loss": 0.0013, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 5.604892761394102, | |
| "grad_norm": 0.21499872207641602, | |
| "learning_rate": 8.79758510816703e-06, | |
| "loss": 0.0013, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 5.613270777479893, | |
| "grad_norm": 0.1437048465013504, | |
| "learning_rate": 8.780815025993627e-06, | |
| "loss": 0.0013, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.621648793565684, | |
| "grad_norm": 0.10052605718374252, | |
| "learning_rate": 8.764044943820226e-06, | |
| "loss": 0.0016, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 5.630026809651475, | |
| "grad_norm": 0.16671398282051086, | |
| "learning_rate": 8.747274861646823e-06, | |
| "loss": 0.0013, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 5.638404825737266, | |
| "grad_norm": 0.1302991360425949, | |
| "learning_rate": 8.73050477947342e-06, | |
| "loss": 0.0013, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 5.646782841823057, | |
| "grad_norm": 0.07595470547676086, | |
| "learning_rate": 8.713734697300017e-06, | |
| "loss": 0.0013, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 5.6551608579088475, | |
| "grad_norm": 0.08618602156639099, | |
| "learning_rate": 8.696964615126615e-06, | |
| "loss": 0.0011, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 5.663538873994638, | |
| "grad_norm": 0.18707716464996338, | |
| "learning_rate": 8.680194532953212e-06, | |
| "loss": 0.0014, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 5.671916890080429, | |
| "grad_norm": 0.167672261595726, | |
| "learning_rate": 8.66342445077981e-06, | |
| "loss": 0.0015, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 5.680294906166219, | |
| "grad_norm": 0.15763333439826965, | |
| "learning_rate": 8.646654368606406e-06, | |
| "loss": 0.0015, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 5.68867292225201, | |
| "grad_norm": 0.1692523956298828, | |
| "learning_rate": 8.629884286433005e-06, | |
| "loss": 0.0014, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 5.697050938337801, | |
| "grad_norm": 0.06355728209018707, | |
| "learning_rate": 8.613114204259602e-06, | |
| "loss": 0.0016, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.705428954423592, | |
| "grad_norm": 0.06145229935646057, | |
| "learning_rate": 8.596344122086199e-06, | |
| "loss": 0.0014, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 5.713806970509383, | |
| "grad_norm": 0.1229192316532135, | |
| "learning_rate": 8.579574039912796e-06, | |
| "loss": 0.0016, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 5.722184986595174, | |
| "grad_norm": 0.07723601907491684, | |
| "learning_rate": 8.562803957739395e-06, | |
| "loss": 0.0012, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 5.730563002680965, | |
| "grad_norm": 0.16651087999343872, | |
| "learning_rate": 8.546033875565992e-06, | |
| "loss": 0.0012, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 5.738941018766756, | |
| "grad_norm": 0.19212378561496735, | |
| "learning_rate": 8.529263793392589e-06, | |
| "loss": 0.0015, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 5.747319034852547, | |
| "grad_norm": 0.14563122391700745, | |
| "learning_rate": 8.512493711219186e-06, | |
| "loss": 0.0013, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 5.755697050938338, | |
| "grad_norm": 0.14981712400913239, | |
| "learning_rate": 8.495723629045783e-06, | |
| "loss": 0.0013, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 5.7640750670241285, | |
| "grad_norm": 0.3324640989303589, | |
| "learning_rate": 8.47895354687238e-06, | |
| "loss": 0.0014, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 5.7724530831099194, | |
| "grad_norm": 0.2827085256576538, | |
| "learning_rate": 8.462183464698977e-06, | |
| "loss": 0.0014, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 5.78083109919571, | |
| "grad_norm": 0.3784811198711395, | |
| "learning_rate": 8.445413382525574e-06, | |
| "loss": 0.0015, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.789209115281501, | |
| "grad_norm": 0.08754808455705643, | |
| "learning_rate": 8.428643300352173e-06, | |
| "loss": 0.0013, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 5.797587131367292, | |
| "grad_norm": 0.2719215750694275, | |
| "learning_rate": 8.41187321817877e-06, | |
| "loss": 0.0014, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 5.805965147453083, | |
| "grad_norm": 0.17088165879249573, | |
| "learning_rate": 8.395103136005367e-06, | |
| "loss": 0.0016, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 5.814343163538874, | |
| "grad_norm": 0.3564954400062561, | |
| "learning_rate": 8.378333053831964e-06, | |
| "loss": 0.0013, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 5.822721179624665, | |
| "grad_norm": 0.1912204474210739, | |
| "learning_rate": 8.361562971658562e-06, | |
| "loss": 0.0017, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 5.831099195710456, | |
| "grad_norm": 0.2299826443195343, | |
| "learning_rate": 8.34479288948516e-06, | |
| "loss": 0.0011, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 5.839477211796247, | |
| "grad_norm": 0.07874714583158493, | |
| "learning_rate": 8.328022807311756e-06, | |
| "loss": 0.0016, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 5.847855227882038, | |
| "grad_norm": 0.1063261479139328, | |
| "learning_rate": 8.311252725138353e-06, | |
| "loss": 0.0014, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 5.856233243967829, | |
| "grad_norm": 0.07933440804481506, | |
| "learning_rate": 8.294482642964952e-06, | |
| "loss": 0.0013, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 5.8646112600536195, | |
| "grad_norm": 0.14268645644187927, | |
| "learning_rate": 8.277712560791549e-06, | |
| "loss": 0.0014, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.8729892761394105, | |
| "grad_norm": 0.24926510453224182, | |
| "learning_rate": 8.260942478618146e-06, | |
| "loss": 0.0014, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 5.881367292225201, | |
| "grad_norm": 0.12582330405712128, | |
| "learning_rate": 8.244172396444743e-06, | |
| "loss": 0.0013, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 5.889745308310992, | |
| "grad_norm": 0.04589623957872391, | |
| "learning_rate": 8.227402314271342e-06, | |
| "loss": 0.0012, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 5.898123324396783, | |
| "grad_norm": 0.3229510188102722, | |
| "learning_rate": 8.210632232097939e-06, | |
| "loss": 0.0011, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 5.906501340482574, | |
| "grad_norm": 0.12953703105449677, | |
| "learning_rate": 8.193862149924536e-06, | |
| "loss": 0.0013, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 5.914879356568365, | |
| "grad_norm": 0.31099674105644226, | |
| "learning_rate": 8.177092067751133e-06, | |
| "loss": 0.0013, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 5.923257372654156, | |
| "grad_norm": 0.17244383692741394, | |
| "learning_rate": 8.16032198557773e-06, | |
| "loss": 0.0012, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 5.931635388739946, | |
| "grad_norm": 0.6794390678405762, | |
| "learning_rate": 8.143551903404327e-06, | |
| "loss": 0.0011, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 5.940013404825737, | |
| "grad_norm": 0.15421807765960693, | |
| "learning_rate": 8.126781821230924e-06, | |
| "loss": 0.0015, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 5.948391420911528, | |
| "grad_norm": 0.09070286899805069, | |
| "learning_rate": 8.110011739057521e-06, | |
| "loss": 0.0015, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.956769436997319, | |
| "grad_norm": 0.13311493396759033, | |
| "learning_rate": 8.09324165688412e-06, | |
| "loss": 0.0014, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 5.96514745308311, | |
| "grad_norm": 0.3226371705532074, | |
| "learning_rate": 8.076471574710717e-06, | |
| "loss": 0.0015, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 5.973525469168901, | |
| "grad_norm": 0.25139835476875305, | |
| "learning_rate": 8.059701492537314e-06, | |
| "loss": 0.0016, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 5.9819034852546915, | |
| "grad_norm": 0.14944802224636078, | |
| "learning_rate": 8.04293141036391e-06, | |
| "loss": 0.0013, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 5.990281501340482, | |
| "grad_norm": 0.210645392537117, | |
| "learning_rate": 8.02616132819051e-06, | |
| "loss": 0.0014, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 5.998659517426273, | |
| "grad_norm": 0.3287517726421356, | |
| "learning_rate": 8.009391246017106e-06, | |
| "loss": 0.0013, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 6.007037533512064, | |
| "grad_norm": 0.08051057904958725, | |
| "learning_rate": 7.992621163843703e-06, | |
| "loss": 0.0007, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 6.015415549597855, | |
| "grad_norm": 0.03923693299293518, | |
| "learning_rate": 7.9758510816703e-06, | |
| "loss": 0.0006, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 6.023793565683646, | |
| "grad_norm": 0.03783218562602997, | |
| "learning_rate": 7.959080999496899e-06, | |
| "loss": 0.0009, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 6.032171581769437, | |
| "grad_norm": 0.251902312040329, | |
| "learning_rate": 7.942310917323496e-06, | |
| "loss": 0.0007, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.040549597855228, | |
| "grad_norm": 0.04270997270941734, | |
| "learning_rate": 7.925540835150093e-06, | |
| "loss": 0.0006, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 6.048927613941019, | |
| "grad_norm": 0.07284736633300781, | |
| "learning_rate": 7.90877075297669e-06, | |
| "loss": 0.0007, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 6.05730563002681, | |
| "grad_norm": 0.06689571589231491, | |
| "learning_rate": 7.892000670803289e-06, | |
| "loss": 0.0007, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 6.065683646112601, | |
| "grad_norm": 0.024217478930950165, | |
| "learning_rate": 7.875230588629886e-06, | |
| "loss": 0.0006, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 6.074061662198392, | |
| "grad_norm": 0.09656205028295517, | |
| "learning_rate": 7.858460506456483e-06, | |
| "loss": 0.0006, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 6.0824396782841825, | |
| "grad_norm": 0.030422423034906387, | |
| "learning_rate": 7.84169042428308e-06, | |
| "loss": 0.0007, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 6.0908176943699734, | |
| "grad_norm": 0.08347397297620773, | |
| "learning_rate": 7.824920342109677e-06, | |
| "loss": 0.0006, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 6.099195710455764, | |
| "grad_norm": 0.027809837833046913, | |
| "learning_rate": 7.808150259936274e-06, | |
| "loss": 0.0006, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 6.107573726541555, | |
| "grad_norm": 0.2227599024772644, | |
| "learning_rate": 7.791380177762871e-06, | |
| "loss": 0.0007, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 6.115951742627346, | |
| "grad_norm": 0.1868954300880432, | |
| "learning_rate": 7.774610095589468e-06, | |
| "loss": 0.0007, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.124329758713137, | |
| "grad_norm": 0.1395631730556488, | |
| "learning_rate": 7.757840013416067e-06, | |
| "loss": 0.0005, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 6.132707774798928, | |
| "grad_norm": 0.029958348721265793, | |
| "learning_rate": 7.741069931242664e-06, | |
| "loss": 0.0005, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 6.141085790884718, | |
| "grad_norm": 0.04532192647457123, | |
| "learning_rate": 7.72429984906926e-06, | |
| "loss": 0.0007, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 6.149463806970509, | |
| "grad_norm": 0.0366247221827507, | |
| "learning_rate": 7.707529766895858e-06, | |
| "loss": 0.0005, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 6.1578418230563, | |
| "grad_norm": 0.07616298645734787, | |
| "learning_rate": 7.690759684722455e-06, | |
| "loss": 0.0007, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 6.166219839142091, | |
| "grad_norm": 0.05013656988739967, | |
| "learning_rate": 7.673989602549053e-06, | |
| "loss": 0.0007, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 6.174597855227882, | |
| "grad_norm": 0.08419755846261978, | |
| "learning_rate": 7.65721952037565e-06, | |
| "loss": 0.0005, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 6.182975871313673, | |
| "grad_norm": 0.11614430695772171, | |
| "learning_rate": 7.640449438202247e-06, | |
| "loss": 0.0004, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 6.191353887399464, | |
| "grad_norm": 0.1487479954957962, | |
| "learning_rate": 7.6236793560288445e-06, | |
| "loss": 0.0007, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 6.1997319034852545, | |
| "grad_norm": 0.1483132392168045, | |
| "learning_rate": 7.606909273855443e-06, | |
| "loss": 0.0006, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.208109919571045, | |
| "grad_norm": 0.034125398844480515, | |
| "learning_rate": 7.59013919168204e-06, | |
| "loss": 0.0005, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 6.216487935656836, | |
| "grad_norm": 0.0705786794424057, | |
| "learning_rate": 7.573369109508637e-06, | |
| "loss": 0.0007, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 6.224865951742627, | |
| "grad_norm": 0.03942383453249931, | |
| "learning_rate": 7.556599027335234e-06, | |
| "loss": 0.0007, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 6.233243967828418, | |
| "grad_norm": 0.045597631484270096, | |
| "learning_rate": 7.539828945161832e-06, | |
| "loss": 0.0006, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 6.241621983914209, | |
| "grad_norm": 0.06973922997713089, | |
| "learning_rate": 7.523058862988429e-06, | |
| "loss": 0.0005, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 0.042143791913986206, | |
| "learning_rate": 7.506288780815026e-06, | |
| "loss": 0.0006, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 6.258378016085791, | |
| "grad_norm": 0.02553519792854786, | |
| "learning_rate": 7.489518698641623e-06, | |
| "loss": 0.0005, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 6.266756032171582, | |
| "grad_norm": 0.07629157602787018, | |
| "learning_rate": 7.472748616468222e-06, | |
| "loss": 0.0005, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 6.275134048257373, | |
| "grad_norm": 0.13083019852638245, | |
| "learning_rate": 7.455978534294819e-06, | |
| "loss": 0.0006, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 6.283512064343164, | |
| "grad_norm": 0.024578507989645004, | |
| "learning_rate": 7.439208452121416e-06, | |
| "loss": 0.0005, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.291890080428955, | |
| "grad_norm": 0.2308337688446045, | |
| "learning_rate": 7.422438369948013e-06, | |
| "loss": 0.0005, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 6.3002680965147455, | |
| "grad_norm": 0.09496274590492249, | |
| "learning_rate": 7.405668287774611e-06, | |
| "loss": 0.0005, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 6.308646112600536, | |
| "grad_norm": 0.06111710146069527, | |
| "learning_rate": 7.388898205601209e-06, | |
| "loss": 0.0007, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 6.317024128686327, | |
| "grad_norm": 0.30509406328201294, | |
| "learning_rate": 7.372128123427806e-06, | |
| "loss": 0.0006, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 6.325402144772118, | |
| "grad_norm": 0.041681017726659775, | |
| "learning_rate": 7.355358041254403e-06, | |
| "loss": 0.0006, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 6.333780160857909, | |
| "grad_norm": 0.05730760842561722, | |
| "learning_rate": 7.3385879590810005e-06, | |
| "loss": 0.0005, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 6.3421581769437, | |
| "grad_norm": 0.03064553625881672, | |
| "learning_rate": 7.3218178769075975e-06, | |
| "loss": 0.0006, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 6.350536193029491, | |
| "grad_norm": 0.0326654389500618, | |
| "learning_rate": 7.3050477947341945e-06, | |
| "loss": 0.0006, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 6.358914209115282, | |
| "grad_norm": 0.058062855154275894, | |
| "learning_rate": 7.2882777125607915e-06, | |
| "loss": 0.0007, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 6.367292225201073, | |
| "grad_norm": 0.1029849499464035, | |
| "learning_rate": 7.27150763038739e-06, | |
| "loss": 0.0005, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.375670241286863, | |
| "grad_norm": 0.05858965218067169, | |
| "learning_rate": 7.254737548213987e-06, | |
| "loss": 0.0006, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 6.384048257372654, | |
| "grad_norm": 0.0990440845489502, | |
| "learning_rate": 7.237967466040584e-06, | |
| "loss": 0.0005, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 6.392426273458445, | |
| "grad_norm": 0.18513937294483185, | |
| "learning_rate": 7.221197383867181e-06, | |
| "loss": 0.0005, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 6.400804289544236, | |
| "grad_norm": 0.045414622873067856, | |
| "learning_rate": 7.204427301693779e-06, | |
| "loss": 0.0006, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 6.4091823056300266, | |
| "grad_norm": 0.14755046367645264, | |
| "learning_rate": 7.187657219520376e-06, | |
| "loss": 0.0006, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 6.4175603217158175, | |
| "grad_norm": 0.03956648334860802, | |
| "learning_rate": 7.170887137346973e-06, | |
| "loss": 0.0005, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 6.425938337801608, | |
| "grad_norm": 0.03931158035993576, | |
| "learning_rate": 7.154117055173571e-06, | |
| "loss": 0.0006, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 6.434316353887399, | |
| "grad_norm": 0.030736852437257767, | |
| "learning_rate": 7.137346973000169e-06, | |
| "loss": 0.0006, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 6.44269436997319, | |
| "grad_norm": 0.04715625196695328, | |
| "learning_rate": 7.120576890826766e-06, | |
| "loss": 0.0006, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 6.451072386058981, | |
| "grad_norm": 0.03468763083219528, | |
| "learning_rate": 7.103806808653363e-06, | |
| "loss": 0.0006, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.459450402144772, | |
| "grad_norm": 0.04370501637458801, | |
| "learning_rate": 7.08703672647996e-06, | |
| "loss": 0.0008, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 6.467828418230563, | |
| "grad_norm": 0.08410083502531052, | |
| "learning_rate": 7.070266644306559e-06, | |
| "loss": 0.0007, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 6.476206434316354, | |
| "grad_norm": 0.07396062463521957, | |
| "learning_rate": 7.053496562133156e-06, | |
| "loss": 0.001, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 6.484584450402145, | |
| "grad_norm": 0.09718171507120132, | |
| "learning_rate": 7.036726479959753e-06, | |
| "loss": 0.0006, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 6.492962466487936, | |
| "grad_norm": 0.20317842066287994, | |
| "learning_rate": 7.01995639778635e-06, | |
| "loss": 0.0007, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 6.501340482573727, | |
| "grad_norm": 0.1610729843378067, | |
| "learning_rate": 7.0031863156129475e-06, | |
| "loss": 0.0007, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 6.509718498659518, | |
| "grad_norm": 0.2847572863101959, | |
| "learning_rate": 6.9864162334395445e-06, | |
| "loss": 0.0006, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 6.5180965147453085, | |
| "grad_norm": 0.05428579822182655, | |
| "learning_rate": 6.9696461512661415e-06, | |
| "loss": 0.0007, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 6.526474530831099, | |
| "grad_norm": 0.03034658171236515, | |
| "learning_rate": 6.9528760690927385e-06, | |
| "loss": 0.0006, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 6.53485254691689, | |
| "grad_norm": 0.08986043930053711, | |
| "learning_rate": 6.936105986919337e-06, | |
| "loss": 0.0005, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.543230563002681, | |
| "grad_norm": 0.05553920567035675, | |
| "learning_rate": 6.919335904745934e-06, | |
| "loss": 0.0009, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 6.551608579088472, | |
| "grad_norm": 0.018183773383498192, | |
| "learning_rate": 6.902565822572531e-06, | |
| "loss": 0.0008, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 6.559986595174263, | |
| "grad_norm": 0.06645216047763824, | |
| "learning_rate": 6.885795740399128e-06, | |
| "loss": 0.0008, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 6.568364611260054, | |
| "grad_norm": 0.04686279594898224, | |
| "learning_rate": 6.869025658225726e-06, | |
| "loss": 0.0006, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 6.576742627345844, | |
| "grad_norm": 0.14523954689502716, | |
| "learning_rate": 6.852255576052323e-06, | |
| "loss": 0.0007, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 6.585120643431635, | |
| "grad_norm": 0.07832646369934082, | |
| "learning_rate": 6.83548549387892e-06, | |
| "loss": 0.0007, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 6.593498659517426, | |
| "grad_norm": 0.07805398851633072, | |
| "learning_rate": 6.818715411705518e-06, | |
| "loss": 0.0006, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 6.601876675603217, | |
| "grad_norm": 0.07783017307519913, | |
| "learning_rate": 6.801945329532115e-06, | |
| "loss": 0.0006, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 6.610254691689008, | |
| "grad_norm": 0.044575657695531845, | |
| "learning_rate": 6.785175247358713e-06, | |
| "loss": 0.0006, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 6.618632707774799, | |
| "grad_norm": 0.31225234270095825, | |
| "learning_rate": 6.76840516518531e-06, | |
| "loss": 0.0006, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.6270107238605895, | |
| "grad_norm": 0.08033174276351929, | |
| "learning_rate": 6.751635083011907e-06, | |
| "loss": 0.0008, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 6.6353887399463805, | |
| "grad_norm": 0.06084591895341873, | |
| "learning_rate": 6.734865000838504e-06, | |
| "loss": 0.0007, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 6.643766756032171, | |
| "grad_norm": 0.05018865689635277, | |
| "learning_rate": 6.718094918665103e-06, | |
| "loss": 0.0008, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 6.652144772117962, | |
| "grad_norm": 0.05032634735107422, | |
| "learning_rate": 6.7013248364917e-06, | |
| "loss": 0.0006, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 6.660522788203753, | |
| "grad_norm": 0.08206313848495483, | |
| "learning_rate": 6.684554754318297e-06, | |
| "loss": 0.0009, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 6.668900804289544, | |
| "grad_norm": 0.1278487741947174, | |
| "learning_rate": 6.667784672144894e-06, | |
| "loss": 0.0006, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 6.677278820375335, | |
| "grad_norm": 0.05226191505789757, | |
| "learning_rate": 6.6510145899714915e-06, | |
| "loss": 0.0008, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 6.685656836461126, | |
| "grad_norm": 0.04356776922941208, | |
| "learning_rate": 6.6342445077980886e-06, | |
| "loss": 0.0006, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 6.694034852546917, | |
| "grad_norm": 0.06333254277706146, | |
| "learning_rate": 6.6174744256246856e-06, | |
| "loss": 0.0007, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 6.702412868632708, | |
| "grad_norm": 0.08492754399776459, | |
| "learning_rate": 6.600704343451283e-06, | |
| "loss": 0.0007, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.702412868632708, | |
| "eval_loss": 0.061279989778995514, | |
| "eval_runtime": 0.3146, | |
| "eval_samples_per_second": 63.58, | |
| "eval_steps_per_second": 3.179, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.710790884718499, | |
| "grad_norm": 0.3515622317790985, | |
| "learning_rate": 6.583934261277881e-06, | |
| "loss": 0.0009, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 6.71916890080429, | |
| "grad_norm": 0.10627135634422302, | |
| "learning_rate": 6.567164179104478e-06, | |
| "loss": 0.0007, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 6.7275469168900806, | |
| "grad_norm": 0.17090724408626556, | |
| "learning_rate": 6.550394096931075e-06, | |
| "loss": 0.0008, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 6.7359249329758715, | |
| "grad_norm": 0.03962019085884094, | |
| "learning_rate": 6.533624014757672e-06, | |
| "loss": 0.0005, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 6.744302949061662, | |
| "grad_norm": 0.5243228077888489, | |
| "learning_rate": 6.51685393258427e-06, | |
| "loss": 0.0007, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 6.752680965147453, | |
| "grad_norm": 0.04279276728630066, | |
| "learning_rate": 6.500083850410868e-06, | |
| "loss": 0.0007, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 6.761058981233244, | |
| "grad_norm": 0.05610975995659828, | |
| "learning_rate": 6.483313768237465e-06, | |
| "loss": 0.0006, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 6.769436997319035, | |
| "grad_norm": 0.05965403839945793, | |
| "learning_rate": 6.466543686064062e-06, | |
| "loss": 0.0007, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 6.777815013404826, | |
| "grad_norm": 0.1390516757965088, | |
| "learning_rate": 6.44977360389066e-06, | |
| "loss": 0.0008, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 6.786193029490617, | |
| "grad_norm": 0.06495050340890884, | |
| "learning_rate": 6.433003521717257e-06, | |
| "loss": 0.0007, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.794571045576408, | |
| "grad_norm": 0.031072689220309258, | |
| "learning_rate": 6.416233439543854e-06, | |
| "loss": 0.0007, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 6.802949061662199, | |
| "grad_norm": 0.07859810441732407, | |
| "learning_rate": 6.399463357370451e-06, | |
| "loss": 0.0006, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 6.81132707774799, | |
| "grad_norm": 0.032027170062065125, | |
| "learning_rate": 6.38269327519705e-06, | |
| "loss": 0.0006, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 6.819705093833781, | |
| "grad_norm": 0.04607260972261429, | |
| "learning_rate": 6.365923193023647e-06, | |
| "loss": 0.0006, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 6.828083109919571, | |
| "grad_norm": 0.3754972815513611, | |
| "learning_rate": 6.349153110850244e-06, | |
| "loss": 0.0008, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 6.836461126005362, | |
| "grad_norm": 0.2285059541463852, | |
| "learning_rate": 6.332383028676841e-06, | |
| "loss": 0.0008, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 6.8448391420911525, | |
| "grad_norm": 0.0579165481030941, | |
| "learning_rate": 6.3156129465034386e-06, | |
| "loss": 0.0007, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 6.853217158176943, | |
| "grad_norm": 0.045123569667339325, | |
| "learning_rate": 6.298842864330036e-06, | |
| "loss": 0.0009, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 6.861595174262734, | |
| "grad_norm": 0.05418705940246582, | |
| "learning_rate": 6.282072782156633e-06, | |
| "loss": 0.0007, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 6.869973190348525, | |
| "grad_norm": 0.09083729982376099, | |
| "learning_rate": 6.2653026999832305e-06, | |
| "loss": 0.0007, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.878351206434316, | |
| "grad_norm": 0.04620116204023361, | |
| "learning_rate": 6.248532617809828e-06, | |
| "loss": 0.0008, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 6.886729222520107, | |
| "grad_norm": 0.18256771564483643, | |
| "learning_rate": 6.231762535636425e-06, | |
| "loss": 0.0006, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 6.895107238605898, | |
| "grad_norm": 0.09917750209569931, | |
| "learning_rate": 6.214992453463022e-06, | |
| "loss": 0.0006, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 6.903485254691689, | |
| "grad_norm": 0.07544329017400742, | |
| "learning_rate": 6.198222371289619e-06, | |
| "loss": 0.0008, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 6.91186327077748, | |
| "grad_norm": 0.23120667040348053, | |
| "learning_rate": 6.181452289116217e-06, | |
| "loss": 0.0006, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 6.920241286863271, | |
| "grad_norm": 0.030208513140678406, | |
| "learning_rate": 6.164682206942815e-06, | |
| "loss": 0.0009, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 6.928619302949062, | |
| "grad_norm": 0.06649070233106613, | |
| "learning_rate": 6.147912124769412e-06, | |
| "loss": 0.0007, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 6.936997319034853, | |
| "grad_norm": 0.0900665819644928, | |
| "learning_rate": 6.131142042596009e-06, | |
| "loss": 0.0006, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 6.9453753351206435, | |
| "grad_norm": 0.046752411872148514, | |
| "learning_rate": 6.114371960422607e-06, | |
| "loss": 0.0006, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 6.9537533512064345, | |
| "grad_norm": 0.1204705610871315, | |
| "learning_rate": 6.097601878249204e-06, | |
| "loss": 0.0007, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.962131367292225, | |
| "grad_norm": 0.0734005719423294, | |
| "learning_rate": 6.080831796075801e-06, | |
| "loss": 0.0007, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 6.970509383378016, | |
| "grad_norm": 0.0475836880505085, | |
| "learning_rate": 6.064061713902398e-06, | |
| "loss": 0.0008, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 6.978887399463807, | |
| "grad_norm": 0.08627843111753464, | |
| "learning_rate": 6.047291631728997e-06, | |
| "loss": 0.0007, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 6.987265415549598, | |
| "grad_norm": 0.04399965703487396, | |
| "learning_rate": 6.030521549555594e-06, | |
| "loss": 0.0007, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 6.995643431635389, | |
| "grad_norm": 0.09367845952510834, | |
| "learning_rate": 6.013751467382191e-06, | |
| "loss": 0.0007, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 7.00402144772118, | |
| "grad_norm": 0.042545393109321594, | |
| "learning_rate": 5.996981385208788e-06, | |
| "loss": 0.0006, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 7.012399463806971, | |
| "grad_norm": 0.06873136013746262, | |
| "learning_rate": 5.980211303035386e-06, | |
| "loss": 0.0003, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 7.020777479892762, | |
| "grad_norm": 0.016868956387043, | |
| "learning_rate": 5.963441220861983e-06, | |
| "loss": 0.0003, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 7.029155495978553, | |
| "grad_norm": 0.040632057934999466, | |
| "learning_rate": 5.94667113868858e-06, | |
| "loss": 0.0004, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 7.037533512064343, | |
| "grad_norm": 0.02409088797867298, | |
| "learning_rate": 5.9299010565151775e-06, | |
| "loss": 0.0003, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.045911528150134, | |
| "grad_norm": 0.04467145353555679, | |
| "learning_rate": 5.913130974341775e-06, | |
| "loss": 0.0003, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 7.054289544235925, | |
| "grad_norm": 0.03071122244000435, | |
| "learning_rate": 5.896360892168372e-06, | |
| "loss": 0.0003, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 7.0626675603217155, | |
| "grad_norm": 0.028979197144508362, | |
| "learning_rate": 5.879590809994969e-06, | |
| "loss": 0.0003, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 7.071045576407506, | |
| "grad_norm": 0.01919564977288246, | |
| "learning_rate": 5.862820727821566e-06, | |
| "loss": 0.0004, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 7.079423592493297, | |
| "grad_norm": 0.027090469375252724, | |
| "learning_rate": 5.846050645648163e-06, | |
| "loss": 0.0003, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 7.087801608579088, | |
| "grad_norm": 0.02643194980919361, | |
| "learning_rate": 5.829280563474762e-06, | |
| "loss": 0.0003, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 7.096179624664879, | |
| "grad_norm": 0.021015044301748276, | |
| "learning_rate": 5.812510481301359e-06, | |
| "loss": 0.0003, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 7.10455764075067, | |
| "grad_norm": 0.030943244695663452, | |
| "learning_rate": 5.795740399127956e-06, | |
| "loss": 0.0003, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 7.112935656836461, | |
| "grad_norm": 0.027034178376197815, | |
| "learning_rate": 5.778970316954553e-06, | |
| "loss": 0.0004, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 7.121313672922252, | |
| "grad_norm": 0.022702839225530624, | |
| "learning_rate": 5.762200234781151e-06, | |
| "loss": 0.0003, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.129691689008043, | |
| "grad_norm": 0.030643748119473457, | |
| "learning_rate": 5.745430152607748e-06, | |
| "loss": 0.0004, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 7.138069705093834, | |
| "grad_norm": 0.07582689076662064, | |
| "learning_rate": 5.728660070434345e-06, | |
| "loss": 0.0003, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 7.146447721179625, | |
| "grad_norm": 0.024221094325184822, | |
| "learning_rate": 5.711889988260942e-06, | |
| "loss": 0.0003, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 7.154825737265416, | |
| "grad_norm": 0.04585973173379898, | |
| "learning_rate": 5.695119906087541e-06, | |
| "loss": 0.0003, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 7.1632037533512065, | |
| "grad_norm": 0.1931953877210617, | |
| "learning_rate": 5.678349823914138e-06, | |
| "loss": 0.0004, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 7.171581769436997, | |
| "grad_norm": 0.02779720537364483, | |
| "learning_rate": 5.661579741740735e-06, | |
| "loss": 0.0002, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 7.179959785522788, | |
| "grad_norm": 0.0313730388879776, | |
| "learning_rate": 5.644809659567332e-06, | |
| "loss": 0.0004, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 7.188337801608579, | |
| "grad_norm": 0.0504007451236248, | |
| "learning_rate": 5.62803957739393e-06, | |
| "loss": 0.0006, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 7.19671581769437, | |
| "grad_norm": 0.019938312470912933, | |
| "learning_rate": 5.6112694952205275e-06, | |
| "loss": 0.0003, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 7.205093833780161, | |
| "grad_norm": 0.020967137068510056, | |
| "learning_rate": 5.5944994130471245e-06, | |
| "loss": 0.0003, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.213471849865952, | |
| "grad_norm": 0.02640225552022457, | |
| "learning_rate": 5.5777293308737215e-06, | |
| "loss": 0.0003, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 7.221849865951743, | |
| "grad_norm": 0.02011190541088581, | |
| "learning_rate": 5.560959248700319e-06, | |
| "loss": 0.0003, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 7.230227882037534, | |
| "grad_norm": 0.05346396192908287, | |
| "learning_rate": 5.544189166526916e-06, | |
| "loss": 0.0003, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 7.238605898123325, | |
| "grad_norm": 0.038962822407484055, | |
| "learning_rate": 5.5274190843535134e-06, | |
| "loss": 0.0003, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 7.246983914209116, | |
| "grad_norm": 0.031206265091896057, | |
| "learning_rate": 5.5106490021801104e-06, | |
| "loss": 0.0003, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 7.255361930294907, | |
| "grad_norm": 0.0827702060341835, | |
| "learning_rate": 5.493878920006709e-06, | |
| "loss": 0.0004, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 7.263739946380697, | |
| "grad_norm": 0.023083705455064774, | |
| "learning_rate": 5.477108837833306e-06, | |
| "loss": 0.0004, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 7.272117962466488, | |
| "grad_norm": 0.024041956290602684, | |
| "learning_rate": 5.460338755659903e-06, | |
| "loss": 0.0003, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 7.2804959785522785, | |
| "grad_norm": 0.07957682758569717, | |
| "learning_rate": 5.4435686734865e-06, | |
| "loss": 0.0003, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 7.288873994638069, | |
| "grad_norm": 0.18736758828163147, | |
| "learning_rate": 5.426798591313098e-06, | |
| "loss": 0.0005, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.29725201072386, | |
| "grad_norm": 0.04302476719021797, | |
| "learning_rate": 5.410028509139695e-06, | |
| "loss": 0.0003, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 7.305630026809651, | |
| "grad_norm": 0.35610461235046387, | |
| "learning_rate": 5.393258426966292e-06, | |
| "loss": 0.0005, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 7.314008042895442, | |
| "grad_norm": 0.023378223180770874, | |
| "learning_rate": 5.376488344792889e-06, | |
| "loss": 0.0003, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 7.322386058981233, | |
| "grad_norm": 0.031664662063121796, | |
| "learning_rate": 5.359718262619488e-06, | |
| "loss": 0.0003, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 7.330764075067024, | |
| "grad_norm": 0.01903984695672989, | |
| "learning_rate": 5.342948180446085e-06, | |
| "loss": 0.0003, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 7.339142091152815, | |
| "grad_norm": 0.031017929315567017, | |
| "learning_rate": 5.326178098272682e-06, | |
| "loss": 0.0004, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 7.347520107238606, | |
| "grad_norm": 0.028874006122350693, | |
| "learning_rate": 5.309408016099279e-06, | |
| "loss": 0.0003, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 7.355898123324397, | |
| "grad_norm": 0.016122756525874138, | |
| "learning_rate": 5.292637933925877e-06, | |
| "loss": 0.0003, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 7.364276139410188, | |
| "grad_norm": 0.037943582981824875, | |
| "learning_rate": 5.2758678517524745e-06, | |
| "loss": 0.0003, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 7.372654155495979, | |
| "grad_norm": 0.02277122251689434, | |
| "learning_rate": 5.2590977695790716e-06, | |
| "loss": 0.0004, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.3810321715817695, | |
| "grad_norm": 0.049923092126846313, | |
| "learning_rate": 5.2423276874056686e-06, | |
| "loss": 0.0003, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 7.38941018766756, | |
| "grad_norm": 0.04292990267276764, | |
| "learning_rate": 5.2255576052322664e-06, | |
| "loss": 0.0005, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 7.397788203753351, | |
| "grad_norm": 0.04316338151693344, | |
| "learning_rate": 5.2087875230588634e-06, | |
| "loss": 0.0003, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 7.406166219839142, | |
| "grad_norm": 0.0165548212826252, | |
| "learning_rate": 5.1920174408854605e-06, | |
| "loss": 0.0003, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 7.414544235924933, | |
| "grad_norm": 0.041038576513528824, | |
| "learning_rate": 5.1752473587120575e-06, | |
| "loss": 0.0004, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 7.422922252010724, | |
| "grad_norm": 0.07028800249099731, | |
| "learning_rate": 5.158477276538656e-06, | |
| "loss": 0.0003, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 7.431300268096515, | |
| "grad_norm": 0.03722773492336273, | |
| "learning_rate": 5.141707194365253e-06, | |
| "loss": 0.0003, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 7.439678284182306, | |
| "grad_norm": 0.015645667910575867, | |
| "learning_rate": 5.12493711219185e-06, | |
| "loss": 0.0004, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 7.448056300268097, | |
| "grad_norm": 0.18982096016407013, | |
| "learning_rate": 5.108167030018447e-06, | |
| "loss": 0.0005, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 7.456434316353888, | |
| "grad_norm": 0.02524687349796295, | |
| "learning_rate": 5.091396947845045e-06, | |
| "loss": 0.0004, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.464812332439679, | |
| "grad_norm": 0.02892642468214035, | |
| "learning_rate": 5.074626865671642e-06, | |
| "loss": 0.0006, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 7.473190348525469, | |
| "grad_norm": 0.04107584059238434, | |
| "learning_rate": 5.057856783498239e-06, | |
| "loss": 0.0007, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 7.48156836461126, | |
| "grad_norm": 0.05140475928783417, | |
| "learning_rate": 5.041086701324837e-06, | |
| "loss": 0.0003, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 7.4899463806970505, | |
| "grad_norm": 0.054712191224098206, | |
| "learning_rate": 5.024316619151435e-06, | |
| "loss": 0.0004, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 7.4983243967828415, | |
| "grad_norm": 0.022500043734908104, | |
| "learning_rate": 5.007546536978032e-06, | |
| "loss": 0.0004, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 7.506702412868632, | |
| "grad_norm": 0.6084216833114624, | |
| "learning_rate": 4.990776454804629e-06, | |
| "loss": 0.0006, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 7.515080428954423, | |
| "grad_norm": 0.07291937619447708, | |
| "learning_rate": 4.974006372631227e-06, | |
| "loss": 0.0004, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 7.523458445040214, | |
| "grad_norm": 0.06688908487558365, | |
| "learning_rate": 4.957236290457824e-06, | |
| "loss": 0.0005, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 7.531836461126005, | |
| "grad_norm": 0.025029698386788368, | |
| "learning_rate": 4.940466208284422e-06, | |
| "loss": 0.0005, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 7.540214477211796, | |
| "grad_norm": 0.08454358577728271, | |
| "learning_rate": 4.923696126111019e-06, | |
| "loss": 0.0003, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.548592493297587, | |
| "grad_norm": 0.0553942508995533, | |
| "learning_rate": 4.906926043937616e-06, | |
| "loss": 0.0003, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 7.556970509383378, | |
| "grad_norm": 0.05681919679045677, | |
| "learning_rate": 4.890155961764213e-06, | |
| "loss": 0.0005, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 7.565348525469169, | |
| "grad_norm": 0.019724005833268166, | |
| "learning_rate": 4.8733858795908105e-06, | |
| "loss": 0.0004, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 7.57372654155496, | |
| "grad_norm": 0.027729319408535957, | |
| "learning_rate": 4.8566157974174075e-06, | |
| "loss": 0.0003, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 7.582104557640751, | |
| "grad_norm": 0.05744357779622078, | |
| "learning_rate": 4.8398457152440045e-06, | |
| "loss": 0.0004, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 7.590482573726542, | |
| "grad_norm": 0.061757415533065796, | |
| "learning_rate": 4.823075633070602e-06, | |
| "loss": 0.0003, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 7.5988605898123325, | |
| "grad_norm": 0.04031449928879738, | |
| "learning_rate": 4.806305550897199e-06, | |
| "loss": 0.0005, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 7.607238605898123, | |
| "grad_norm": 0.10854317247867584, | |
| "learning_rate": 4.789535468723797e-06, | |
| "loss": 0.0004, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 7.615616621983914, | |
| "grad_norm": 0.01701934076845646, | |
| "learning_rate": 4.772765386550394e-06, | |
| "loss": 0.0003, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 7.623994638069705, | |
| "grad_norm": 0.04535774141550064, | |
| "learning_rate": 4.755995304376992e-06, | |
| "loss": 0.0003, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 7.632372654155496, | |
| "grad_norm": 0.03864599019289017, | |
| "learning_rate": 4.739225222203589e-06, | |
| "loss": 0.0005, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 7.640750670241287, | |
| "grad_norm": 0.03809565305709839, | |
| "learning_rate": 4.722455140030187e-06, | |
| "loss": 0.0004, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 7.649128686327078, | |
| "grad_norm": 0.043824635446071625, | |
| "learning_rate": 4.705685057856784e-06, | |
| "loss": 0.0004, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 7.657506702412869, | |
| "grad_norm": 0.02916356548666954, | |
| "learning_rate": 4.688914975683381e-06, | |
| "loss": 0.0004, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 7.66588471849866, | |
| "grad_norm": 0.1915924996137619, | |
| "learning_rate": 4.672144893509978e-06, | |
| "loss": 0.0004, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 7.674262734584451, | |
| "grad_norm": 0.07420273870229721, | |
| "learning_rate": 4.655374811336576e-06, | |
| "loss": 0.0005, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 7.682640750670242, | |
| "grad_norm": 0.026272999122738838, | |
| "learning_rate": 4.638604729163173e-06, | |
| "loss": 0.0004, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 7.691018766756033, | |
| "grad_norm": 0.3181280493736267, | |
| "learning_rate": 4.621834646989771e-06, | |
| "loss": 0.0004, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 7.6993967828418235, | |
| "grad_norm": 0.035441432148218155, | |
| "learning_rate": 4.605064564816368e-06, | |
| "loss": 0.0004, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 7.707774798927614, | |
| "grad_norm": 0.01749766804277897, | |
| "learning_rate": 4.588294482642966e-06, | |
| "loss": 0.0004, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.716152815013404, | |
| "grad_norm": 0.07927963137626648, | |
| "learning_rate": 4.571524400469563e-06, | |
| "loss": 0.0004, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 7.724530831099195, | |
| "grad_norm": 0.03702886402606964, | |
| "learning_rate": 4.5547543182961605e-06, | |
| "loss": 0.0004, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 7.732908847184986, | |
| "grad_norm": 0.04682036116719246, | |
| "learning_rate": 4.5379842361227575e-06, | |
| "loss": 0.0003, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 7.741286863270777, | |
| "grad_norm": 0.013787736184895039, | |
| "learning_rate": 4.5212141539493545e-06, | |
| "loss": 0.0004, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 7.749664879356568, | |
| "grad_norm": 0.029385261237621307, | |
| "learning_rate": 4.5044440717759515e-06, | |
| "loss": 0.0005, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 7.758042895442359, | |
| "grad_norm": 0.018814504146575928, | |
| "learning_rate": 4.487673989602549e-06, | |
| "loss": 0.0004, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 7.76642091152815, | |
| "grad_norm": 0.017043303698301315, | |
| "learning_rate": 4.470903907429146e-06, | |
| "loss": 0.0004, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 7.774798927613941, | |
| "grad_norm": 0.12016864866018295, | |
| "learning_rate": 4.454133825255744e-06, | |
| "loss": 0.0004, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 7.783176943699732, | |
| "grad_norm": 0.051153432577848434, | |
| "learning_rate": 4.437363743082341e-06, | |
| "loss": 0.0003, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 7.791554959785523, | |
| "grad_norm": 0.030375001952052116, | |
| "learning_rate": 4.420593660908939e-06, | |
| "loss": 0.0004, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 7.799932975871314, | |
| "grad_norm": 0.060530513525009155, | |
| "learning_rate": 4.403823578735536e-06, | |
| "loss": 0.0004, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 7.8083109919571045, | |
| "grad_norm": 0.034854013472795486, | |
| "learning_rate": 4.387053496562134e-06, | |
| "loss": 0.0004, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 7.8166890080428955, | |
| "grad_norm": 0.03328954800963402, | |
| "learning_rate": 4.370283414388731e-06, | |
| "loss": 0.0004, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 7.825067024128686, | |
| "grad_norm": 0.04314300790429115, | |
| "learning_rate": 4.353513332215328e-06, | |
| "loss": 0.0004, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 7.833445040214477, | |
| "grad_norm": 0.09990786761045456, | |
| "learning_rate": 4.336743250041925e-06, | |
| "loss": 0.0004, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 7.841823056300268, | |
| "grad_norm": 0.035922013223171234, | |
| "learning_rate": 4.319973167868523e-06, | |
| "loss": 0.0003, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 7.850201072386059, | |
| "grad_norm": 0.05447731912136078, | |
| "learning_rate": 4.30320308569512e-06, | |
| "loss": 0.0004, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 7.85857908847185, | |
| "grad_norm": 0.01950427144765854, | |
| "learning_rate": 4.286433003521718e-06, | |
| "loss": 0.0004, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 7.866957104557641, | |
| "grad_norm": 0.04952532425522804, | |
| "learning_rate": 4.269662921348315e-06, | |
| "loss": 0.0004, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 7.875335120643432, | |
| "grad_norm": 0.04123789444565773, | |
| "learning_rate": 4.252892839174913e-06, | |
| "loss": 0.0004, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.883713136729223, | |
| "grad_norm": 0.0161293838173151, | |
| "learning_rate": 4.23612275700151e-06, | |
| "loss": 0.0003, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 7.892091152815014, | |
| "grad_norm": 0.039569880813360214, | |
| "learning_rate": 4.2193526748281075e-06, | |
| "loss": 0.0004, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 7.900469168900805, | |
| "grad_norm": 0.209671750664711, | |
| "learning_rate": 4.2025825926547045e-06, | |
| "loss": 0.0004, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 7.908847184986596, | |
| "grad_norm": 0.049620840698480606, | |
| "learning_rate": 4.1858125104813016e-06, | |
| "loss": 0.0005, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 7.917225201072386, | |
| "grad_norm": 0.03689347580075264, | |
| "learning_rate": 4.1690424283078986e-06, | |
| "loss": 0.0004, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 7.9256032171581765, | |
| "grad_norm": 0.05554811283946037, | |
| "learning_rate": 4.1522723461344964e-06, | |
| "loss": 0.0004, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 7.933981233243967, | |
| "grad_norm": 0.040197305381298065, | |
| "learning_rate": 4.1355022639610934e-06, | |
| "loss": 0.0003, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 7.942359249329758, | |
| "grad_norm": 0.01716030202805996, | |
| "learning_rate": 4.118732181787691e-06, | |
| "loss": 0.0003, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 7.950737265415549, | |
| "grad_norm": 0.0522179938852787, | |
| "learning_rate": 4.101962099614288e-06, | |
| "loss": 0.0003, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 7.95911528150134, | |
| "grad_norm": 0.3440731167793274, | |
| "learning_rate": 4.085192017440886e-06, | |
| "loss": 0.0004, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 7.967493297587131, | |
| "grad_norm": 0.03338254243135452, | |
| "learning_rate": 4.068421935267483e-06, | |
| "loss": 0.0004, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 7.975871313672922, | |
| "grad_norm": 0.061764348298311234, | |
| "learning_rate": 4.051651853094081e-06, | |
| "loss": 0.0004, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 7.984249329758713, | |
| "grad_norm": 0.046575699001550674, | |
| "learning_rate": 4.034881770920678e-06, | |
| "loss": 0.0004, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 7.992627345844504, | |
| "grad_norm": 0.062236297875642776, | |
| "learning_rate": 4.018111688747275e-06, | |
| "loss": 0.0004, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 8.001005361930295, | |
| "grad_norm": 0.028931235894560814, | |
| "learning_rate": 4.001341606573872e-06, | |
| "loss": 0.0003, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 8.009383378016086, | |
| "grad_norm": 0.01811792142689228, | |
| "learning_rate": 3.98457152440047e-06, | |
| "loss": 0.0002, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 8.017761394101877, | |
| "grad_norm": 0.01822470873594284, | |
| "learning_rate": 3.967801442227067e-06, | |
| "loss": 0.0002, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 8.026139410187668, | |
| "grad_norm": 0.04903008043766022, | |
| "learning_rate": 3.951031360053665e-06, | |
| "loss": 0.0002, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 8.034517426273458, | |
| "grad_norm": 0.022891085594892502, | |
| "learning_rate": 3.934261277880262e-06, | |
| "loss": 0.0002, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 8.04289544235925, | |
| "grad_norm": 0.015373194590210915, | |
| "learning_rate": 3.91749119570686e-06, | |
| "loss": 0.0002, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.05127345844504, | |
| "grad_norm": 0.01921216771006584, | |
| "learning_rate": 3.900721113533457e-06, | |
| "loss": 0.0002, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 8.059651474530831, | |
| "grad_norm": 0.01771024614572525, | |
| "learning_rate": 3.8839510313600546e-06, | |
| "loss": 0.0002, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 8.068029490616622, | |
| "grad_norm": 0.012290588580071926, | |
| "learning_rate": 3.867180949186652e-06, | |
| "loss": 0.0002, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 8.076407506702413, | |
| "grad_norm": 0.013452921062707901, | |
| "learning_rate": 3.850410867013249e-06, | |
| "loss": 0.0002, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 8.084785522788204, | |
| "grad_norm": 0.024924032390117645, | |
| "learning_rate": 3.833640784839846e-06, | |
| "loss": 0.0002, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 8.093163538873995, | |
| "grad_norm": 0.010622446425259113, | |
| "learning_rate": 3.8168707026664435e-06, | |
| "loss": 0.0002, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 8.101541554959786, | |
| "grad_norm": 0.02651936188340187, | |
| "learning_rate": 3.800100620493041e-06, | |
| "loss": 0.0003, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 8.109919571045577, | |
| "grad_norm": 0.011863762512803078, | |
| "learning_rate": 3.783330538319638e-06, | |
| "loss": 0.0002, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 8.118297587131368, | |
| "grad_norm": 0.02366674318909645, | |
| "learning_rate": 3.7665604561462354e-06, | |
| "loss": 0.0002, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 8.126675603217159, | |
| "grad_norm": 0.026062361896038055, | |
| "learning_rate": 3.7497903739728324e-06, | |
| "loss": 0.0002, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.13505361930295, | |
| "grad_norm": 0.016329048201441765, | |
| "learning_rate": 3.7330202917994302e-06, | |
| "loss": 0.0002, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 8.14343163538874, | |
| "grad_norm": 0.09319298714399338, | |
| "learning_rate": 3.7162502096260272e-06, | |
| "loss": 0.0002, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 8.151809651474531, | |
| "grad_norm": 0.014463214203715324, | |
| "learning_rate": 3.6994801274526247e-06, | |
| "loss": 0.0002, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 8.160187667560322, | |
| "grad_norm": 0.030192028731107712, | |
| "learning_rate": 3.682710045279222e-06, | |
| "loss": 0.0004, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 8.168565683646113, | |
| "grad_norm": 0.014410781674087048, | |
| "learning_rate": 3.6659399631058195e-06, | |
| "loss": 0.0002, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 8.176943699731904, | |
| "grad_norm": 0.028254050761461258, | |
| "learning_rate": 3.6491698809324166e-06, | |
| "loss": 0.0002, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 8.185321715817695, | |
| "grad_norm": 0.022153843194246292, | |
| "learning_rate": 3.6323997987590144e-06, | |
| "loss": 0.0002, | |
| "step": 48850 | |
| }, | |
| { | |
| "epoch": 8.193699731903486, | |
| "grad_norm": 0.01866259053349495, | |
| "learning_rate": 3.6156297165856114e-06, | |
| "loss": 0.0002, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 8.202077747989277, | |
| "grad_norm": 0.014017355628311634, | |
| "learning_rate": 3.598859634412209e-06, | |
| "loss": 0.0002, | |
| "step": 48950 | |
| }, | |
| { | |
| "epoch": 8.210455764075068, | |
| "grad_norm": 0.017173465341329575, | |
| "learning_rate": 3.582089552238806e-06, | |
| "loss": 0.0002, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.218833780160859, | |
| "grad_norm": 0.20775650441646576, | |
| "learning_rate": 3.5653194700654037e-06, | |
| "loss": 0.0002, | |
| "step": 49050 | |
| }, | |
| { | |
| "epoch": 8.22721179624665, | |
| "grad_norm": 0.014167393557727337, | |
| "learning_rate": 3.5485493878920008e-06, | |
| "loss": 0.0002, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 8.23558981233244, | |
| "grad_norm": 0.02049107290804386, | |
| "learning_rate": 3.531779305718598e-06, | |
| "loss": 0.0002, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 8.243967828418231, | |
| "grad_norm": 0.026173189282417297, | |
| "learning_rate": 3.5150092235451956e-06, | |
| "loss": 0.0002, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 8.25234584450402, | |
| "grad_norm": 0.02050282247364521, | |
| "learning_rate": 3.498239141371793e-06, | |
| "loss": 0.0002, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 8.260723860589813, | |
| "grad_norm": 0.015291319228708744, | |
| "learning_rate": 3.48146905919839e-06, | |
| "loss": 0.0002, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 8.269101876675602, | |
| "grad_norm": 0.026897624135017395, | |
| "learning_rate": 3.464698977024988e-06, | |
| "loss": 0.0002, | |
| "step": 49350 | |
| }, | |
| { | |
| "epoch": 8.277479892761393, | |
| "grad_norm": 0.016716543585062027, | |
| "learning_rate": 3.447928894851585e-06, | |
| "loss": 0.0002, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 8.285857908847184, | |
| "grad_norm": 0.024870146065950394, | |
| "learning_rate": 3.4311588126781824e-06, | |
| "loss": 0.0002, | |
| "step": 49450 | |
| }, | |
| { | |
| "epoch": 8.294235924932975, | |
| "grad_norm": 0.017461460083723068, | |
| "learning_rate": 3.4143887305047794e-06, | |
| "loss": 0.0002, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.302613941018766, | |
| "grad_norm": 0.014370834454894066, | |
| "learning_rate": 3.3976186483313773e-06, | |
| "loss": 0.0002, | |
| "step": 49550 | |
| }, | |
| { | |
| "epoch": 8.310991957104557, | |
| "grad_norm": 0.026379108428955078, | |
| "learning_rate": 3.3808485661579743e-06, | |
| "loss": 0.0003, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 8.319369973190348, | |
| "grad_norm": 0.012285185977816582, | |
| "learning_rate": 3.364078483984572e-06, | |
| "loss": 0.0002, | |
| "step": 49650 | |
| }, | |
| { | |
| "epoch": 8.327747989276139, | |
| "grad_norm": 0.02005821093916893, | |
| "learning_rate": 3.347308401811169e-06, | |
| "loss": 0.0002, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 8.33612600536193, | |
| "grad_norm": 0.016585618257522583, | |
| "learning_rate": 3.3305383196377666e-06, | |
| "loss": 0.0003, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 8.34450402144772, | |
| "grad_norm": 0.023517385125160217, | |
| "learning_rate": 3.3137682374643636e-06, | |
| "loss": 0.0002, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 8.352882037533512, | |
| "grad_norm": 0.013065959326922894, | |
| "learning_rate": 3.2969981552909615e-06, | |
| "loss": 0.0002, | |
| "step": 49850 | |
| }, | |
| { | |
| "epoch": 8.361260053619302, | |
| "grad_norm": 0.028112584725022316, | |
| "learning_rate": 3.2802280731175585e-06, | |
| "loss": 0.0002, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 8.369638069705093, | |
| "grad_norm": 0.011142577044665813, | |
| "learning_rate": 3.263457990944156e-06, | |
| "loss": 0.0002, | |
| "step": 49950 | |
| }, | |
| { | |
| "epoch": 8.378016085790884, | |
| "grad_norm": 0.02244596742093563, | |
| "learning_rate": 3.246687908770753e-06, | |
| "loss": 0.0002, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.378016085790884, | |
| "eval_loss": 0.050992656499147415, | |
| "eval_runtime": 0.3133, | |
| "eval_samples_per_second": 63.832, | |
| "eval_steps_per_second": 3.192, | |
| "step": 50000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 59680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0705940805709087e+20, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |