| { | |
| "tests": { | |
| "22": { | |
| "id": 22, | |
| "task": 2, | |
| "model": "xlm-roberta-large", | |
| "languages": [ | |
| "cy" | |
| ], | |
| "augmentation": [ | |
| "" | |
| ], | |
| "data_percentage": 1, | |
| "use_token_type_ids": false, | |
| "tokenizer_config": { | |
| "strip_accent": false, | |
| "add_prefix_space": true | |
| }, | |
| "opimizer_config": { | |
| "adafactor": true, | |
| "num_train_epochs": 2 | |
| }, | |
| "result": [ | |
| { | |
| "loss": 1.882, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "epoch": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "loss": 0.5285, | |
| "grad_norm": 2.4953722953796387, | |
| "learning_rate": 3.99453087019932e-05, | |
| "epoch": 0.01, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.1702, | |
| "grad_norm": 1.8322360515594482, | |
| "learning_rate": 3.982377248420029e-05, | |
| "epoch": 0.01, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.1472, | |
| "grad_norm": 1.7121275663375854, | |
| "learning_rate": 3.970223626640739e-05, | |
| "epoch": 0.02, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 0.1342, | |
| "grad_norm": 1.7097556591033936, | |
| "learning_rate": 3.958070004861449e-05, | |
| "epoch": 0.02, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 0.1288, | |
| "grad_norm": 1.57424795627594, | |
| "learning_rate": 3.9459163830821586e-05, | |
| "epoch": 0.03, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 0.1247, | |
| "grad_norm": 1.7552311420440674, | |
| "learning_rate": 3.9337627613028686e-05, | |
| "epoch": 0.04, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 0.1218, | |
| "grad_norm": 1.6224812269210815, | |
| "learning_rate": 3.9216091395235786e-05, | |
| "epoch": 0.04, | |
| "step": 700 | |
| }, | |
| { | |
| "loss": 0.1176, | |
| "grad_norm": 1.8368713855743408, | |
| "learning_rate": 3.909455517744288e-05, | |
| "epoch": 0.05, | |
| "step": 800 | |
| }, | |
| { | |
| "loss": 0.1119, | |
| "grad_norm": 1.4631482362747192, | |
| "learning_rate": 3.897301895964998e-05, | |
| "epoch": 0.05, | |
| "step": 900 | |
| }, | |
| { | |
| "loss": 0.1098, | |
| "grad_norm": 1.2774118185043335, | |
| "learning_rate": 3.885148274185708e-05, | |
| "epoch": 0.06, | |
| "step": 1000 | |
| }, | |
| { | |
| "loss": 0.1083, | |
| "grad_norm": 1.187245488166809, | |
| "learning_rate": 3.872994652406417e-05, | |
| "epoch": 0.07, | |
| "step": 1100 | |
| }, | |
| { | |
| "loss": 0.1075, | |
| "grad_norm": 1.6492900848388672, | |
| "learning_rate": 3.860841030627127e-05, | |
| "epoch": 0.07, | |
| "step": 1200 | |
| }, | |
| { | |
| "loss": 0.107, | |
| "grad_norm": 1.4514034986495972, | |
| "learning_rate": 3.8486874088478366e-05, | |
| "epoch": 0.08, | |
| "step": 1300 | |
| }, | |
| { | |
| "loss": 0.1036, | |
| "grad_norm": 1.0488823652267456, | |
| "learning_rate": 3.8365337870685466e-05, | |
| "epoch": 0.08, | |
| "step": 1400 | |
| }, | |
| { | |
| "loss": 0.1021, | |
| "grad_norm": 1.5489355325698853, | |
| "learning_rate": 3.8243801652892566e-05, | |
| "epoch": 0.09, | |
| "step": 1500 | |
| }, | |
| { | |
| "loss": 0.1008, | |
| "grad_norm": 1.2730894088745117, | |
| "learning_rate": 3.812226543509966e-05, | |
| "epoch": 0.1, | |
| "step": 1600 | |
| }, | |
| { | |
| "loss": 0.1004, | |
| "grad_norm": 1.6920459270477295, | |
| "learning_rate": 3.800072921730676e-05, | |
| "epoch": 0.1, | |
| "step": 1700 | |
| }, | |
| { | |
| "loss": 0.1006, | |
| "grad_norm": 0.9863981008529663, | |
| "learning_rate": 3.787919299951386e-05, | |
| "epoch": 0.11, | |
| "step": 1800 | |
| }, | |
| { | |
| "loss": 0.0982, | |
| "grad_norm": 0.9981995820999146, | |
| "learning_rate": 3.775765678172095e-05, | |
| "epoch": 0.12, | |
| "step": 1900 | |
| }, | |
| { | |
| "loss": 0.0975, | |
| "grad_norm": 1.021620273590088, | |
| "learning_rate": 3.763612056392805e-05, | |
| "epoch": 0.12, | |
| "step": 2000 | |
| }, | |
| { | |
| "loss": 0.0989, | |
| "grad_norm": 1.2811397314071655, | |
| "learning_rate": 3.751458434613515e-05, | |
| "epoch": 0.13, | |
| "step": 2100 | |
| }, | |
| { | |
| "loss": 0.0959, | |
| "grad_norm": 1.5976190567016602, | |
| "learning_rate": 3.7393048128342246e-05, | |
| "epoch": 0.13, | |
| "step": 2200 | |
| }, | |
| { | |
| "loss": 0.0961, | |
| "grad_norm": 0.9754481911659241, | |
| "learning_rate": 3.7271511910549346e-05, | |
| "epoch": 0.14, | |
| "step": 2300 | |
| }, | |
| { | |
| "loss": 0.0956, | |
| "grad_norm": 0.9418678283691406, | |
| "learning_rate": 3.7149975692756447e-05, | |
| "epoch": 0.15, | |
| "step": 2400 | |
| }, | |
| { | |
| "loss": 0.0954, | |
| "grad_norm": 1.294745922088623, | |
| "learning_rate": 3.702843947496354e-05, | |
| "epoch": 0.15, | |
| "step": 2500 | |
| }, | |
| { | |
| "loss": 0.0943, | |
| "grad_norm": 1.3049461841583252, | |
| "learning_rate": 3.690690325717064e-05, | |
| "epoch": 0.16, | |
| "step": 2600 | |
| }, | |
| { | |
| "loss": 0.0936, | |
| "grad_norm": 1.1144427061080933, | |
| "learning_rate": 3.678536703937774e-05, | |
| "epoch": 0.16, | |
| "step": 2700 | |
| }, | |
| { | |
| "loss": 0.0939, | |
| "grad_norm": 1.3424856662750244, | |
| "learning_rate": 3.666383082158483e-05, | |
| "epoch": 0.17, | |
| "step": 2800 | |
| }, | |
| { | |
| "loss": 0.0947, | |
| "grad_norm": 1.123299241065979, | |
| "learning_rate": 3.6542294603791933e-05, | |
| "epoch": 0.18, | |
| "step": 2900 | |
| }, | |
| { | |
| "loss": 0.0932, | |
| "grad_norm": 1.456009864807129, | |
| "learning_rate": 3.642075838599903e-05, | |
| "epoch": 0.18, | |
| "step": 3000 | |
| }, | |
| { | |
| "loss": 0.0927, | |
| "grad_norm": 1.4363266229629517, | |
| "learning_rate": 3.629922216820613e-05, | |
| "epoch": 0.19, | |
| "step": 3100 | |
| }, | |
| { | |
| "loss": 0.0907, | |
| "grad_norm": 0.7776892185211182, | |
| "learning_rate": 3.617768595041323e-05, | |
| "epoch": 0.19, | |
| "step": 3200 | |
| }, | |
| { | |
| "loss": 0.092, | |
| "grad_norm": 25.731966018676758, | |
| "learning_rate": 3.605614973262032e-05, | |
| "epoch": 0.2, | |
| "step": 3300 | |
| }, | |
| { | |
| "loss": 0.091, | |
| "grad_norm": 0.9259088039398193, | |
| "learning_rate": 3.593461351482742e-05, | |
| "epoch": 0.21, | |
| "step": 3400 | |
| }, | |
| { | |
| "loss": 0.0915, | |
| "grad_norm": 0.851094663143158, | |
| "learning_rate": 3.581307729703452e-05, | |
| "epoch": 0.21, | |
| "step": 3500 | |
| }, | |
| { | |
| "loss": 0.0902, | |
| "grad_norm": 1.5700650215148926, | |
| "learning_rate": 3.5691541079241614e-05, | |
| "epoch": 0.22, | |
| "step": 3600 | |
| }, | |
| { | |
| "loss": 0.0888, | |
| "grad_norm": 1.13387930393219, | |
| "learning_rate": 3.5570004861448714e-05, | |
| "epoch": 0.22, | |
| "step": 3700 | |
| }, | |
| { | |
| "loss": 0.089, | |
| "grad_norm": 1.2357937097549438, | |
| "learning_rate": 3.5448468643655814e-05, | |
| "epoch": 0.23, | |
| "step": 3800 | |
| }, | |
| { | |
| "loss": 0.0898, | |
| "grad_norm": 0.9063655734062195, | |
| "learning_rate": 3.532693242586291e-05, | |
| "epoch": 0.24, | |
| "step": 3900 | |
| }, | |
| { | |
| "loss": 0.0893, | |
| "grad_norm": 1.1259723901748657, | |
| "learning_rate": 3.520539620807001e-05, | |
| "epoch": 0.24, | |
| "step": 4000 | |
| }, | |
| { | |
| "loss": 0.0889, | |
| "grad_norm": 0.8327601552009583, | |
| "learning_rate": 3.508385999027711e-05, | |
| "epoch": 0.25, | |
| "step": 4100 | |
| }, | |
| { | |
| "loss": 0.0862, | |
| "grad_norm": 1.2368316650390625, | |
| "learning_rate": 3.49623237724842e-05, | |
| "epoch": 0.25, | |
| "step": 4200 | |
| }, | |
| { | |
| "loss": 0.0867, | |
| "grad_norm": 1.1474043130874634, | |
| "learning_rate": 3.48407875546913e-05, | |
| "epoch": 0.26, | |
| "step": 4300 | |
| }, | |
| { | |
| "loss": 0.0858, | |
| "grad_norm": 0.6887868046760559, | |
| "learning_rate": 3.47192513368984e-05, | |
| "epoch": 0.27, | |
| "step": 4400 | |
| }, | |
| { | |
| "loss": 0.0877, | |
| "grad_norm": 0.8170347809791565, | |
| "learning_rate": 3.4597715119105494e-05, | |
| "epoch": 0.27, | |
| "step": 4500 | |
| }, | |
| { | |
| "loss": 0.0871, | |
| "grad_norm": 0.7361243367195129, | |
| "learning_rate": 3.4476178901312594e-05, | |
| "epoch": 0.28, | |
| "step": 4600 | |
| }, | |
| { | |
| "loss": 0.0878, | |
| "grad_norm": 1.0975162982940674, | |
| "learning_rate": 3.435464268351969e-05, | |
| "epoch": 0.29, | |
| "step": 4700 | |
| }, | |
| { | |
| "loss": 0.0863, | |
| "grad_norm": 0.931176483631134, | |
| "learning_rate": 3.4233106465726794e-05, | |
| "epoch": 0.29, | |
| "step": 4800 | |
| }, | |
| { | |
| "loss": 0.0853, | |
| "grad_norm": 1.0259523391723633, | |
| "learning_rate": 3.411157024793389e-05, | |
| "epoch": 0.3, | |
| "step": 4900 | |
| }, | |
| { | |
| "loss": 0.0876, | |
| "grad_norm": 1.1680504083633423, | |
| "learning_rate": 3.399003403014098e-05, | |
| "epoch": 0.3, | |
| "step": 5000 | |
| }, | |
| { | |
| "loss": 0.0855, | |
| "grad_norm": 1.2358198165893555, | |
| "learning_rate": 3.386849781234809e-05, | |
| "epoch": 0.31, | |
| "step": 5100 | |
| }, | |
| { | |
| "loss": 0.085, | |
| "grad_norm": 0.8484376668930054, | |
| "learning_rate": 3.374696159455518e-05, | |
| "epoch": 0.32, | |
| "step": 5200 | |
| }, | |
| { | |
| "loss": 0.085, | |
| "grad_norm": 1.5419291257858276, | |
| "learning_rate": 3.3625425376762274e-05, | |
| "epoch": 0.32, | |
| "step": 5300 | |
| }, | |
| { | |
| "loss": 0.0849, | |
| "grad_norm": 1.0334900617599487, | |
| "learning_rate": 3.3503889158969374e-05, | |
| "epoch": 0.33, | |
| "step": 5400 | |
| }, | |
| { | |
| "loss": 0.0854, | |
| "grad_norm": 1.0367408990859985, | |
| "learning_rate": 3.3382352941176474e-05, | |
| "epoch": 0.33, | |
| "step": 5500 | |
| }, | |
| { | |
| "loss": 0.0853, | |
| "grad_norm": 0.8429509401321411, | |
| "learning_rate": 3.326081672338357e-05, | |
| "epoch": 0.34, | |
| "step": 5600 | |
| }, | |
| { | |
| "loss": 0.086, | |
| "grad_norm": 0.9059005379676819, | |
| "learning_rate": 3.313928050559067e-05, | |
| "epoch": 0.35, | |
| "step": 5700 | |
| }, | |
| { | |
| "loss": 0.0846, | |
| "grad_norm": 1.1803362369537354, | |
| "learning_rate": 3.301774428779777e-05, | |
| "epoch": 0.35, | |
| "step": 5800 | |
| }, | |
| { | |
| "loss": 0.0817, | |
| "grad_norm": 0.7263641357421875, | |
| "learning_rate": 3.289620807000487e-05, | |
| "epoch": 0.36, | |
| "step": 5900 | |
| }, | |
| { | |
| "loss": 0.0831, | |
| "grad_norm": 0.8227238655090332, | |
| "learning_rate": 3.277467185221196e-05, | |
| "epoch": 0.36, | |
| "step": 6000 | |
| }, | |
| { | |
| "loss": 0.0839, | |
| "grad_norm": 1.0349544286727905, | |
| "learning_rate": 3.2653135634419055e-05, | |
| "epoch": 0.37, | |
| "step": 6100 | |
| }, | |
| { | |
| "loss": 0.0827, | |
| "grad_norm": 0.8446714282035828, | |
| "learning_rate": 3.253159941662616e-05, | |
| "epoch": 0.38, | |
| "step": 6200 | |
| }, | |
| { | |
| "loss": 0.082, | |
| "grad_norm": 1.1419836282730103, | |
| "learning_rate": 3.2410063198833255e-05, | |
| "epoch": 0.38, | |
| "step": 6300 | |
| }, | |
| { | |
| "loss": 0.0812, | |
| "grad_norm": 0.9505990147590637, | |
| "learning_rate": 3.228852698104035e-05, | |
| "epoch": 0.39, | |
| "step": 6400 | |
| }, | |
| { | |
| "loss": 0.0806, | |
| "grad_norm": 1.0036993026733398, | |
| "learning_rate": 3.2166990763247455e-05, | |
| "epoch": 0.39, | |
| "step": 6500 | |
| }, | |
| { | |
| "loss": 0.0819, | |
| "grad_norm": 0.7694116234779358, | |
| "learning_rate": 3.204545454545455e-05, | |
| "epoch": 0.4, | |
| "step": 6600 | |
| }, | |
| { | |
| "loss": 0.0818, | |
| "grad_norm": 0.7389699220657349, | |
| "learning_rate": 3.192391832766165e-05, | |
| "epoch": 0.41, | |
| "step": 6700 | |
| }, | |
| { | |
| "loss": 0.0829, | |
| "grad_norm": 0.8264873623847961, | |
| "learning_rate": 3.180238210986874e-05, | |
| "epoch": 0.41, | |
| "step": 6800 | |
| }, | |
| { | |
| "loss": 0.0849, | |
| "grad_norm": 0.8844084143638611, | |
| "learning_rate": 3.168084589207584e-05, | |
| "epoch": 0.42, | |
| "step": 6900 | |
| }, | |
| { | |
| "loss": 0.0816, | |
| "grad_norm": 0.8728023171424866, | |
| "learning_rate": 3.155930967428294e-05, | |
| "epoch": 0.42, | |
| "step": 7000 | |
| }, | |
| { | |
| "loss": 0.0799, | |
| "grad_norm": 1.218404769897461, | |
| "learning_rate": 3.1437773456490035e-05, | |
| "epoch": 0.43, | |
| "step": 7100 | |
| }, | |
| { | |
| "loss": 0.0797, | |
| "grad_norm": 0.7085688710212708, | |
| "learning_rate": 3.1316237238697135e-05, | |
| "epoch": 0.44, | |
| "step": 7200 | |
| }, | |
| { | |
| "loss": 0.0795, | |
| "grad_norm": 0.8446517586708069, | |
| "learning_rate": 3.1194701020904235e-05, | |
| "epoch": 0.44, | |
| "step": 7300 | |
| }, | |
| { | |
| "loss": 0.0817, | |
| "grad_norm": 1.3226453065872192, | |
| "learning_rate": 3.107316480311133e-05, | |
| "epoch": 0.45, | |
| "step": 7400 | |
| }, | |
| { | |
| "loss": 0.0816, | |
| "grad_norm": 0.7685155868530273, | |
| "learning_rate": 3.095162858531843e-05, | |
| "epoch": 0.46, | |
| "step": 7500 | |
| }, | |
| { | |
| "loss": 0.0806, | |
| "grad_norm": 0.7135798335075378, | |
| "learning_rate": 3.083009236752553e-05, | |
| "epoch": 0.46, | |
| "step": 7600 | |
| }, | |
| { | |
| "loss": 0.0795, | |
| "grad_norm": 1.0276037454605103, | |
| "learning_rate": 3.070855614973262e-05, | |
| "epoch": 0.47, | |
| "step": 7700 | |
| }, | |
| { | |
| "loss": 0.081, | |
| "grad_norm": 1.1788092851638794, | |
| "learning_rate": 3.058701993193972e-05, | |
| "epoch": 0.47, | |
| "step": 7800 | |
| }, | |
| { | |
| "loss": 0.0791, | |
| "grad_norm": 1.0305782556533813, | |
| "learning_rate": 3.046548371414682e-05, | |
| "epoch": 0.48, | |
| "step": 7900 | |
| }, | |
| { | |
| "loss": 0.0805, | |
| "grad_norm": 1.4414223432540894, | |
| "learning_rate": 3.0343947496353915e-05, | |
| "epoch": 0.49, | |
| "step": 8000 | |
| }, | |
| { | |
| "loss": 0.0799, | |
| "grad_norm": 0.8137165904045105, | |
| "learning_rate": 3.0222411278561012e-05, | |
| "epoch": 0.49, | |
| "step": 8100 | |
| }, | |
| { | |
| "loss": 0.08, | |
| "grad_norm": 1.1238079071044922, | |
| "learning_rate": 3.0100875060768112e-05, | |
| "epoch": 0.5, | |
| "step": 8200 | |
| }, | |
| { | |
| "loss": 0.0792, | |
| "grad_norm": 0.9724037647247314, | |
| "learning_rate": 2.997933884297521e-05, | |
| "epoch": 0.5, | |
| "step": 8300 | |
| }, | |
| { | |
| "loss": 0.0793, | |
| "grad_norm": 1.0247116088867188, | |
| "learning_rate": 2.9857802625182306e-05, | |
| "epoch": 0.51, | |
| "step": 8400 | |
| }, | |
| { | |
| "loss": 0.0783, | |
| "grad_norm": 1.454062581062317, | |
| "learning_rate": 2.9737481769567335e-05, | |
| "epoch": 0.52, | |
| "step": 8500 | |
| }, | |
| { | |
| "loss": 0.0788, | |
| "grad_norm": 0.7570217251777649, | |
| "learning_rate": 2.961594555177443e-05, | |
| "epoch": 0.52, | |
| "step": 8600 | |
| }, | |
| { | |
| "loss": 0.0768, | |
| "grad_norm": 1.1738083362579346, | |
| "learning_rate": 2.9494409333981528e-05, | |
| "epoch": 0.53, | |
| "step": 8700 | |
| }, | |
| { | |
| "loss": 0.0778, | |
| "grad_norm": 0.7776427268981934, | |
| "learning_rate": 2.9372873116188625e-05, | |
| "epoch": 0.53, | |
| "step": 8800 | |
| }, | |
| { | |
| "loss": 0.0763, | |
| "grad_norm": 1.226198673248291, | |
| "learning_rate": 2.9251336898395725e-05, | |
| "epoch": 0.54, | |
| "step": 8900 | |
| }, | |
| { | |
| "loss": 0.0761, | |
| "grad_norm": 0.8859773874282837, | |
| "learning_rate": 2.912980068060282e-05, | |
| "epoch": 0.55, | |
| "step": 9000 | |
| }, | |
| { | |
| "loss": 0.0765, | |
| "grad_norm": 1.0220259428024292, | |
| "learning_rate": 2.9008264462809918e-05, | |
| "epoch": 0.55, | |
| "step": 9100 | |
| }, | |
| { | |
| "loss": 0.0777, | |
| "grad_norm": 1.0430243015289307, | |
| "learning_rate": 2.888672824501702e-05, | |
| "epoch": 0.56, | |
| "step": 9200 | |
| }, | |
| { | |
| "loss": 0.0775, | |
| "grad_norm": 1.1380356550216675, | |
| "learning_rate": 2.8765192027224115e-05, | |
| "epoch": 0.56, | |
| "step": 9300 | |
| }, | |
| { | |
| "loss": 0.0775, | |
| "grad_norm": 0.6778531670570374, | |
| "learning_rate": 2.8643655809431212e-05, | |
| "epoch": 0.57, | |
| "step": 9400 | |
| }, | |
| { | |
| "loss": 0.0782, | |
| "grad_norm": 1.0413175821304321, | |
| "learning_rate": 2.852211959163831e-05, | |
| "epoch": 0.58, | |
| "step": 9500 | |
| }, | |
| { | |
| "loss": 0.0791, | |
| "grad_norm": 1.1399835348129272, | |
| "learning_rate": 2.840058337384541e-05, | |
| "epoch": 0.58, | |
| "step": 9600 | |
| }, | |
| { | |
| "loss": 0.0763, | |
| "grad_norm": 0.968399703502655, | |
| "learning_rate": 2.8279047156052505e-05, | |
| "epoch": 0.59, | |
| "step": 9700 | |
| }, | |
| { | |
| "loss": 0.0763, | |
| "grad_norm": 1.0254497528076172, | |
| "learning_rate": 2.8157510938259602e-05, | |
| "epoch": 0.59, | |
| "step": 9800 | |
| }, | |
| { | |
| "loss": 0.0771, | |
| "grad_norm": 0.8642473220825195, | |
| "learning_rate": 2.8035974720466702e-05, | |
| "epoch": 0.6, | |
| "step": 9900 | |
| }, | |
| { | |
| "loss": 0.0772, | |
| "grad_norm": 1.1130231618881226, | |
| "learning_rate": 2.79144385026738e-05, | |
| "epoch": 0.61, | |
| "step": 10000 | |
| }, | |
| { | |
| "loss": 0.0793, | |
| "grad_norm": 1.4455962181091309, | |
| "learning_rate": 2.7792902284880895e-05, | |
| "epoch": 0.61, | |
| "step": 10100 | |
| }, | |
| { | |
| "loss": 0.077, | |
| "grad_norm": 0.9273576736450195, | |
| "learning_rate": 2.7671366067087992e-05, | |
| "epoch": 0.62, | |
| "step": 10200 | |
| }, | |
| { | |
| "loss": 0.0766, | |
| "grad_norm": 0.8223456740379333, | |
| "learning_rate": 2.7549829849295092e-05, | |
| "epoch": 0.62, | |
| "step": 10300 | |
| }, | |
| { | |
| "loss": 0.0765, | |
| "grad_norm": 1.1068949699401855, | |
| "learning_rate": 2.742829363150219e-05, | |
| "epoch": 0.63, | |
| "step": 10400 | |
| }, | |
| { | |
| "loss": 0.0762, | |
| "grad_norm": 1.0787135362625122, | |
| "learning_rate": 2.7306757413709285e-05, | |
| "epoch": 0.64, | |
| "step": 10500 | |
| }, | |
| { | |
| "loss": 0.0765, | |
| "grad_norm": 0.6019480228424072, | |
| "learning_rate": 2.7185221195916386e-05, | |
| "epoch": 0.64, | |
| "step": 10600 | |
| }, | |
| { | |
| "loss": 0.0756, | |
| "grad_norm": 0.7752580046653748, | |
| "learning_rate": 2.7063684978123482e-05, | |
| "epoch": 0.65, | |
| "step": 10700 | |
| }, | |
| { | |
| "loss": 0.0762, | |
| "grad_norm": 0.9023341536521912, | |
| "learning_rate": 2.6943364122508508e-05, | |
| "epoch": 0.66, | |
| "step": 10800 | |
| }, | |
| { | |
| "loss": 0.0759, | |
| "grad_norm": 1.1154266595840454, | |
| "learning_rate": 2.6821827904715608e-05, | |
| "epoch": 0.66, | |
| "step": 10900 | |
| }, | |
| { | |
| "loss": 0.0752, | |
| "grad_norm": 1.5197564363479614, | |
| "learning_rate": 2.6700291686922705e-05, | |
| "epoch": 0.67, | |
| "step": 11000 | |
| }, | |
| { | |
| "loss": 0.0757, | |
| "grad_norm": 0.8111494183540344, | |
| "learning_rate": 2.65787554691298e-05, | |
| "epoch": 0.67, | |
| "step": 11100 | |
| }, | |
| { | |
| "loss": 0.0749, | |
| "grad_norm": 0.6413083076477051, | |
| "learning_rate": 2.6457219251336898e-05, | |
| "epoch": 0.68, | |
| "step": 11200 | |
| }, | |
| { | |
| "loss": 0.0754, | |
| "grad_norm": 0.8996323943138123, | |
| "learning_rate": 2.6335683033544e-05, | |
| "epoch": 0.69, | |
| "step": 11300 | |
| }, | |
| { | |
| "loss": 0.0744, | |
| "grad_norm": 0.7931196093559265, | |
| "learning_rate": 2.6214146815751095e-05, | |
| "epoch": 0.69, | |
| "step": 11400 | |
| }, | |
| { | |
| "loss": 0.0742, | |
| "grad_norm": 1.0821586847305298, | |
| "learning_rate": 2.609261059795819e-05, | |
| "epoch": 0.7, | |
| "step": 11500 | |
| }, | |
| { | |
| "loss": 0.0722, | |
| "grad_norm": 0.9964590072631836, | |
| "learning_rate": 2.5971074380165292e-05, | |
| "epoch": 0.7, | |
| "step": 11600 | |
| }, | |
| { | |
| "loss": 0.0752, | |
| "grad_norm": 0.7918893694877625, | |
| "learning_rate": 2.584953816237239e-05, | |
| "epoch": 0.71, | |
| "step": 11700 | |
| }, | |
| { | |
| "loss": 0.0734, | |
| "grad_norm": 0.6565855145454407, | |
| "learning_rate": 2.5728001944579485e-05, | |
| "epoch": 0.72, | |
| "step": 11800 | |
| }, | |
| { | |
| "loss": 0.0717, | |
| "grad_norm": 1.9885566234588623, | |
| "learning_rate": 2.5606465726786582e-05, | |
| "epoch": 0.72, | |
| "step": 11900 | |
| }, | |
| { | |
| "loss": 0.0747, | |
| "grad_norm": 0.6101750135421753, | |
| "learning_rate": 2.5484929508993682e-05, | |
| "epoch": 0.73, | |
| "step": 12000 | |
| }, | |
| { | |
| "loss": 0.073, | |
| "grad_norm": 1.001930594444275, | |
| "learning_rate": 2.536339329120078e-05, | |
| "epoch": 0.73, | |
| "step": 12100 | |
| }, | |
| { | |
| "loss": 0.074, | |
| "grad_norm": 0.880673348903656, | |
| "learning_rate": 2.5241857073407875e-05, | |
| "epoch": 0.74, | |
| "step": 12200 | |
| }, | |
| { | |
| "loss": 0.0738, | |
| "grad_norm": 0.7980429530143738, | |
| "learning_rate": 2.5120320855614975e-05, | |
| "epoch": 0.75, | |
| "step": 12300 | |
| }, | |
| { | |
| "loss": 0.0758, | |
| "grad_norm": 1.0153135061264038, | |
| "learning_rate": 2.4998784637822072e-05, | |
| "epoch": 0.75, | |
| "step": 12400 | |
| }, | |
| { | |
| "loss": 0.0742, | |
| "grad_norm": 0.8344822525978088, | |
| "learning_rate": 2.487724842002917e-05, | |
| "epoch": 0.76, | |
| "step": 12500 | |
| }, | |
| { | |
| "loss": 0.0738, | |
| "grad_norm": 0.6752304434776306, | |
| "learning_rate": 2.4755712202236272e-05, | |
| "epoch": 0.76, | |
| "step": 12600 | |
| }, | |
| { | |
| "loss": 0.0732, | |
| "grad_norm": 1.1106210947036743, | |
| "learning_rate": 2.4634175984443366e-05, | |
| "epoch": 0.77, | |
| "step": 12700 | |
| }, | |
| { | |
| "loss": 0.0754, | |
| "grad_norm": 0.8022058606147766, | |
| "learning_rate": 2.4512639766650462e-05, | |
| "epoch": 0.78, | |
| "step": 12800 | |
| }, | |
| { | |
| "loss": 0.0735, | |
| "grad_norm": 0.737308144569397, | |
| "learning_rate": 2.439110354885756e-05, | |
| "epoch": 0.78, | |
| "step": 12900 | |
| }, | |
| { | |
| "loss": 0.0738, | |
| "grad_norm": 2.094043493270874, | |
| "learning_rate": 2.4269567331064662e-05, | |
| "epoch": 0.79, | |
| "step": 13000 | |
| }, | |
| { | |
| "loss": 0.072, | |
| "grad_norm": 1.1105279922485352, | |
| "learning_rate": 2.4148031113271756e-05, | |
| "epoch": 0.79, | |
| "step": 13100 | |
| }, | |
| { | |
| "loss": 0.0716, | |
| "grad_norm": 1.2243571281433105, | |
| "learning_rate": 2.4026494895478852e-05, | |
| "epoch": 0.8, | |
| "step": 13200 | |
| }, | |
| { | |
| "loss": 0.0718, | |
| "grad_norm": 1.0883300304412842, | |
| "learning_rate": 2.3904958677685956e-05, | |
| "epoch": 0.81, | |
| "step": 13300 | |
| }, | |
| { | |
| "loss": 0.0727, | |
| "grad_norm": 0.9934273362159729, | |
| "learning_rate": 2.378342245989305e-05, | |
| "epoch": 0.81, | |
| "step": 13400 | |
| }, | |
| { | |
| "loss": 0.0721, | |
| "grad_norm": 0.7145100831985474, | |
| "learning_rate": 2.3661886242100146e-05, | |
| "epoch": 0.82, | |
| "step": 13500 | |
| }, | |
| { | |
| "loss": 0.0721, | |
| "grad_norm": 0.8873516321182251, | |
| "learning_rate": 2.3540350024307243e-05, | |
| "epoch": 0.83, | |
| "step": 13600 | |
| }, | |
| { | |
| "loss": 0.0723, | |
| "grad_norm": 0.7798359990119934, | |
| "learning_rate": 2.3418813806514346e-05, | |
| "epoch": 0.83, | |
| "step": 13700 | |
| }, | |
| { | |
| "loss": 0.0726, | |
| "grad_norm": 0.9411553740501404, | |
| "learning_rate": 2.329727758872144e-05, | |
| "epoch": 0.84, | |
| "step": 13800 | |
| }, | |
| { | |
| "loss": 0.0715, | |
| "grad_norm": 0.7994709610939026, | |
| "learning_rate": 2.3175741370928536e-05, | |
| "epoch": 0.84, | |
| "step": 13900 | |
| }, | |
| { | |
| "loss": 0.0732, | |
| "grad_norm": 0.5489715337753296, | |
| "learning_rate": 2.305420515313564e-05, | |
| "epoch": 0.85, | |
| "step": 14000 | |
| }, | |
| { | |
| "loss": 0.0699, | |
| "grad_norm": 0.5710996389389038, | |
| "learning_rate": 2.2932668935342736e-05, | |
| "epoch": 0.86, | |
| "step": 14100 | |
| }, | |
| { | |
| "loss": 0.073, | |
| "grad_norm": 0.7003745436668396, | |
| "learning_rate": 2.281113271754983e-05, | |
| "epoch": 0.86, | |
| "step": 14200 | |
| }, | |
| { | |
| "loss": 0.0722, | |
| "grad_norm": 0.6743086576461792, | |
| "learning_rate": 2.2689596499756926e-05, | |
| "epoch": 0.87, | |
| "step": 14300 | |
| }, | |
| { | |
| "loss": 0.0699, | |
| "grad_norm": 0.6730968356132507, | |
| "learning_rate": 2.256806028196403e-05, | |
| "epoch": 0.87, | |
| "step": 14400 | |
| }, | |
| { | |
| "loss": 0.0719, | |
| "grad_norm": 0.7155641913414001, | |
| "learning_rate": 2.2446524064171126e-05, | |
| "epoch": 0.88, | |
| "step": 14500 | |
| }, | |
| { | |
| "loss": 0.0708, | |
| "grad_norm": 0.8122462630271912, | |
| "learning_rate": 2.232498784637822e-05, | |
| "epoch": 0.89, | |
| "step": 14600 | |
| }, | |
| { | |
| "loss": 0.0718, | |
| "grad_norm": 0.8022533655166626, | |
| "learning_rate": 2.2203451628585323e-05, | |
| "epoch": 0.89, | |
| "step": 14700 | |
| }, | |
| { | |
| "loss": 0.0712, | |
| "grad_norm": 0.545359194278717, | |
| "learning_rate": 2.208191541079242e-05, | |
| "epoch": 0.9, | |
| "step": 14800 | |
| }, | |
| { | |
| "loss": 0.0711, | |
| "grad_norm": 0.8318025469779968, | |
| "learning_rate": 2.1960379192999513e-05, | |
| "epoch": 0.9, | |
| "step": 14900 | |
| }, | |
| { | |
| "loss": 0.0706, | |
| "grad_norm": 0.9334779381752014, | |
| "learning_rate": 2.1838842975206616e-05, | |
| "epoch": 0.91, | |
| "step": 15000 | |
| }, | |
| { | |
| "loss": 0.0701, | |
| "grad_norm": 0.8202875256538391, | |
| "learning_rate": 2.1717306757413713e-05, | |
| "epoch": 0.92, | |
| "step": 15100 | |
| }, | |
| { | |
| "loss": 0.07, | |
| "grad_norm": 0.8788963556289673, | |
| "learning_rate": 2.159577053962081e-05, | |
| "epoch": 0.92, | |
| "step": 15200 | |
| }, | |
| { | |
| "loss": 0.0713, | |
| "grad_norm": 1.023823618888855, | |
| "learning_rate": 2.1474234321827903e-05, | |
| "epoch": 0.93, | |
| "step": 15300 | |
| }, | |
| { | |
| "loss": 0.0697, | |
| "grad_norm": 0.8784018158912659, | |
| "learning_rate": 2.1353913466212936e-05, | |
| "epoch": 0.93, | |
| "step": 15400 | |
| }, | |
| { | |
| "loss": 0.0695, | |
| "grad_norm": 1.1254814863204956, | |
| "learning_rate": 2.1232377248420032e-05, | |
| "epoch": 0.94, | |
| "step": 15500 | |
| }, | |
| { | |
| "loss": 0.0697, | |
| "grad_norm": 0.9760749340057373, | |
| "learning_rate": 2.1110841030627126e-05, | |
| "epoch": 0.95, | |
| "step": 15600 | |
| }, | |
| { | |
| "loss": 0.0709, | |
| "grad_norm": 1.0121357440948486, | |
| "learning_rate": 2.098930481283423e-05, | |
| "epoch": 0.95, | |
| "step": 15700 | |
| }, | |
| { | |
| "loss": 0.0717, | |
| "grad_norm": 0.7810111045837402, | |
| "learning_rate": 2.0867768595041326e-05, | |
| "epoch": 0.96, | |
| "step": 15800 | |
| }, | |
| { | |
| "loss": 0.0692, | |
| "grad_norm": 0.6813214421272278, | |
| "learning_rate": 2.074623237724842e-05, | |
| "epoch": 0.96, | |
| "step": 15900 | |
| }, | |
| { | |
| "loss": 0.0696, | |
| "grad_norm": 0.7685451507568359, | |
| "learning_rate": 2.0624696159455516e-05, | |
| "epoch": 0.97, | |
| "step": 16000 | |
| }, | |
| { | |
| "loss": 0.0702, | |
| "grad_norm": 3.3225691318511963, | |
| "learning_rate": 2.050315994166262e-05, | |
| "epoch": 0.98, | |
| "step": 16100 | |
| }, | |
| { | |
| "loss": 0.0702, | |
| "grad_norm": 0.7979671955108643, | |
| "learning_rate": 2.0381623723869716e-05, | |
| "epoch": 0.98, | |
| "step": 16200 | |
| }, | |
| { | |
| "loss": 0.0691, | |
| "grad_norm": 3.4929583072662354, | |
| "learning_rate": 2.026008750607681e-05, | |
| "epoch": 0.99, | |
| "step": 16300 | |
| }, | |
| { | |
| "loss": 0.0703, | |
| "grad_norm": 0.7738245725631714, | |
| "learning_rate": 2.0138551288283913e-05, | |
| "epoch": 1.0, | |
| "step": 16400 | |
| }, | |
| { | |
| "eval_loss": 0.06881729513406754, | |
| "eval_f1": 0.8973916467400326, | |
| "eval_precision": 0.9049522471305407, | |
| "eval_recall": 0.8906029559155776, | |
| "eval_accuracy": 0.9730252863363563, | |
| "eval_runtime": 304.4852, | |
| "eval_samples_per_second": 86.796, | |
| "eval_steps_per_second": 10.851, | |
| "epoch": 1.0, | |
| "step": 16481 | |
| }, | |
| { | |
| "loss": 0.0684, | |
| "grad_norm": 0.891858696937561, | |
| "learning_rate": 2.001701507049101e-05, | |
| "epoch": 1.0, | |
| "step": 16500 | |
| }, | |
| { | |
| "loss": 0.0619, | |
| "grad_norm": 0.6408938765525818, | |
| "learning_rate": 1.9895478852698106e-05, | |
| "epoch": 1.01, | |
| "step": 16600 | |
| }, | |
| { | |
| "loss": 0.0629, | |
| "grad_norm": 0.7390792965888977, | |
| "learning_rate": 1.9773942634905203e-05, | |
| "epoch": 1.01, | |
| "step": 16700 | |
| }, | |
| { | |
| "loss": 0.0604, | |
| "grad_norm": 0.5206795930862427, | |
| "learning_rate": 1.9652406417112303e-05, | |
| "epoch": 1.02, | |
| "step": 16800 | |
| }, | |
| { | |
| "loss": 0.0613, | |
| "grad_norm": 0.909116268157959, | |
| "learning_rate": 1.95308701993194e-05, | |
| "epoch": 1.03, | |
| "step": 16900 | |
| }, | |
| { | |
| "loss": 0.0616, | |
| "grad_norm": 0.8701964020729065, | |
| "learning_rate": 1.9409333981526496e-05, | |
| "epoch": 1.03, | |
| "step": 17000 | |
| }, | |
| { | |
| "loss": 0.0625, | |
| "grad_norm": 1.0762407779693604, | |
| "learning_rate": 1.9287797763733593e-05, | |
| "epoch": 1.04, | |
| "step": 17100 | |
| }, | |
| { | |
| "loss": 0.0615, | |
| "grad_norm": 0.7816362380981445, | |
| "learning_rate": 1.9166261545940693e-05, | |
| "epoch": 1.04, | |
| "step": 17200 | |
| }, | |
| { | |
| "loss": 0.0626, | |
| "grad_norm": 0.6983965039253235, | |
| "learning_rate": 1.904594069032572e-05, | |
| "epoch": 1.05, | |
| "step": 17300 | |
| }, | |
| { | |
| "loss": 0.0621, | |
| "grad_norm": 0.910698413848877, | |
| "learning_rate": 1.8924404472532816e-05, | |
| "epoch": 1.06, | |
| "step": 17400 | |
| }, | |
| { | |
| "loss": 0.0631, | |
| "grad_norm": 0.8654133677482605, | |
| "learning_rate": 1.8802868254739916e-05, | |
| "epoch": 1.06, | |
| "step": 17500 | |
| }, | |
| { | |
| "loss": 0.062, | |
| "grad_norm": 0.8351789712905884, | |
| "learning_rate": 1.8681332036947012e-05, | |
| "epoch": 1.07, | |
| "step": 17600 | |
| }, | |
| { | |
| "loss": 0.0604, | |
| "grad_norm": 0.7861587405204773, | |
| "learning_rate": 1.855979581915411e-05, | |
| "epoch": 1.07, | |
| "step": 17700 | |
| }, | |
| { | |
| "loss": 0.0609, | |
| "grad_norm": 0.7295276522636414, | |
| "learning_rate": 1.843825960136121e-05, | |
| "epoch": 1.08, | |
| "step": 17800 | |
| }, | |
| { | |
| "loss": 0.0616, | |
| "grad_norm": 1.0210868120193481, | |
| "learning_rate": 1.8316723383568306e-05, | |
| "epoch": 1.09, | |
| "step": 17900 | |
| }, | |
| { | |
| "loss": 0.0616, | |
| "grad_norm": 0.8220874071121216, | |
| "learning_rate": 1.8195187165775403e-05, | |
| "epoch": 1.09, | |
| "step": 18000 | |
| }, | |
| { | |
| "loss": 0.0607, | |
| "grad_norm": 0.7961727380752563, | |
| "learning_rate": 1.80736509479825e-05, | |
| "epoch": 1.1, | |
| "step": 18100 | |
| }, | |
| { | |
| "loss": 0.0614, | |
| "grad_norm": 1.0390113592147827, | |
| "learning_rate": 1.79521147301896e-05, | |
| "epoch": 1.1, | |
| "step": 18200 | |
| }, | |
| { | |
| "loss": 0.0625, | |
| "grad_norm": 0.8423497080802917, | |
| "learning_rate": 1.7830578512396696e-05, | |
| "epoch": 1.11, | |
| "step": 18300 | |
| }, | |
| { | |
| "loss": 0.0618, | |
| "grad_norm": 0.7576957941055298, | |
| "learning_rate": 1.7709042294603793e-05, | |
| "epoch": 1.12, | |
| "step": 18400 | |
| }, | |
| { | |
| "loss": 0.061, | |
| "grad_norm": 0.7174555659294128, | |
| "learning_rate": 1.7587506076810893e-05, | |
| "epoch": 1.12, | |
| "step": 18500 | |
| }, | |
| { | |
| "loss": 0.0602, | |
| "grad_norm": 0.7977816462516785, | |
| "learning_rate": 1.746596985901799e-05, | |
| "epoch": 1.13, | |
| "step": 18600 | |
| }, | |
| { | |
| "loss": 0.0617, | |
| "grad_norm": 0.8125550150871277, | |
| "learning_rate": 1.7344433641225086e-05, | |
| "epoch": 1.13, | |
| "step": 18700 | |
| }, | |
| { | |
| "loss": 0.0605, | |
| "grad_norm": 1.3914258480072021, | |
| "learning_rate": 1.7222897423432183e-05, | |
| "epoch": 1.14, | |
| "step": 18800 | |
| }, | |
| { | |
| "loss": 0.0614, | |
| "grad_norm": 0.8273860812187195, | |
| "learning_rate": 1.7101361205639283e-05, | |
| "epoch": 1.15, | |
| "step": 18900 | |
| }, | |
| { | |
| "loss": 0.0606, | |
| "grad_norm": 0.7267687916755676, | |
| "learning_rate": 1.697982498784638e-05, | |
| "epoch": 1.15, | |
| "step": 19000 | |
| }, | |
| { | |
| "loss": 0.0624, | |
| "grad_norm": 1.075861930847168, | |
| "learning_rate": 1.6858288770053476e-05, | |
| "epoch": 1.16, | |
| "step": 19100 | |
| }, | |
| { | |
| "loss": 0.062, | |
| "grad_norm": 0.867139995098114, | |
| "learning_rate": 1.6736752552260576e-05, | |
| "epoch": 1.16, | |
| "step": 19200 | |
| }, | |
| { | |
| "loss": 0.0595, | |
| "grad_norm": 0.6730388402938843, | |
| "learning_rate": 1.6615216334467673e-05, | |
| "epoch": 1.17, | |
| "step": 19300 | |
| }, | |
| { | |
| "loss": 0.0603, | |
| "grad_norm": 0.7329290509223938, | |
| "learning_rate": 1.649368011667477e-05, | |
| "epoch": 1.18, | |
| "step": 19400 | |
| }, | |
| { | |
| "loss": 0.0605, | |
| "grad_norm": 1.0000228881835938, | |
| "learning_rate": 1.6372143898881866e-05, | |
| "epoch": 1.18, | |
| "step": 19500 | |
| }, | |
| { | |
| "loss": 0.0599, | |
| "grad_norm": 1.0037493705749512, | |
| "learning_rate": 1.6250607681088967e-05, | |
| "epoch": 1.19, | |
| "step": 19600 | |
| }, | |
| { | |
| "loss": 0.0616, | |
| "grad_norm": 0.7647894024848938, | |
| "learning_rate": 1.6129071463296063e-05, | |
| "epoch": 1.2, | |
| "step": 19700 | |
| }, | |
| { | |
| "loss": 0.0604, | |
| "grad_norm": 0.78948575258255, | |
| "learning_rate": 1.600753524550316e-05, | |
| "epoch": 1.2, | |
| "step": 19800 | |
| }, | |
| { | |
| "loss": 0.0609, | |
| "grad_norm": 0.8443770408630371, | |
| "learning_rate": 1.588599902771026e-05, | |
| "epoch": 1.21, | |
| "step": 19900 | |
| }, | |
| { | |
| "loss": 0.0599, | |
| "grad_norm": 1.1531789302825928, | |
| "learning_rate": 1.5764462809917357e-05, | |
| "epoch": 1.21, | |
| "step": 20000 | |
| }, | |
| { | |
| "loss": 0.0605, | |
| "grad_norm": 0.7325319647789001, | |
| "learning_rate": 1.5642926592124453e-05, | |
| "epoch": 1.22, | |
| "step": 20100 | |
| }, | |
| { | |
| "loss": 0.0606, | |
| "grad_norm": 0.8585038185119629, | |
| "learning_rate": 1.5521390374331553e-05, | |
| "epoch": 1.23, | |
| "step": 20200 | |
| }, | |
| { | |
| "loss": 0.0602, | |
| "grad_norm": 0.6652311086654663, | |
| "learning_rate": 1.539985415653865e-05, | |
| "epoch": 1.23, | |
| "step": 20300 | |
| }, | |
| { | |
| "loss": 0.0605, | |
| "grad_norm": 0.9240396618843079, | |
| "learning_rate": 1.5278317938745747e-05, | |
| "epoch": 1.24, | |
| "step": 20400 | |
| }, | |
| { | |
| "loss": 0.0609, | |
| "grad_norm": 0.9992942214012146, | |
| "learning_rate": 1.5156781720952845e-05, | |
| "epoch": 1.24, | |
| "step": 20500 | |
| }, | |
| { | |
| "loss": 0.0604, | |
| "grad_norm": 0.7454150915145874, | |
| "learning_rate": 1.5035245503159944e-05, | |
| "epoch": 1.25, | |
| "step": 20600 | |
| }, | |
| { | |
| "loss": 0.0598, | |
| "grad_norm": 0.8551883101463318, | |
| "learning_rate": 1.491370928536704e-05, | |
| "epoch": 1.26, | |
| "step": 20700 | |
| }, | |
| { | |
| "loss": 0.061, | |
| "grad_norm": 0.8273564577102661, | |
| "learning_rate": 1.4792173067574139e-05, | |
| "epoch": 1.26, | |
| "step": 20800 | |
| }, | |
| { | |
| "loss": 0.06, | |
| "grad_norm": 0.925244927406311, | |
| "learning_rate": 1.4671852211959166e-05, | |
| "epoch": 1.27, | |
| "step": 20900 | |
| }, | |
| { | |
| "loss": 0.0587, | |
| "grad_norm": 0.5892955660820007, | |
| "learning_rate": 1.4550315994166261e-05, | |
| "epoch": 1.27, | |
| "step": 21000 | |
| }, | |
| { | |
| "loss": 0.0602, | |
| "grad_norm": 0.7904210090637207, | |
| "learning_rate": 1.4428779776373361e-05, | |
| "epoch": 1.28, | |
| "step": 21100 | |
| }, | |
| { | |
| "loss": 0.0625, | |
| "grad_norm": 1.2804646492004395, | |
| "learning_rate": 1.430724355858046e-05, | |
| "epoch": 1.29, | |
| "step": 21200 | |
| }, | |
| { | |
| "loss": 0.0607, | |
| "grad_norm": 0.9952909350395203, | |
| "learning_rate": 1.4185707340787556e-05, | |
| "epoch": 1.29, | |
| "step": 21300 | |
| }, | |
| { | |
| "loss": 0.0602, | |
| "grad_norm": 0.9036094546318054, | |
| "learning_rate": 1.4064171122994655e-05, | |
| "epoch": 1.3, | |
| "step": 21400 | |
| }, | |
| { | |
| "loss": 0.0594, | |
| "grad_norm": 0.8128438591957092, | |
| "learning_rate": 1.3942634905201751e-05, | |
| "epoch": 1.3, | |
| "step": 21500 | |
| }, | |
| { | |
| "loss": 0.0593, | |
| "grad_norm": 0.786703884601593, | |
| "learning_rate": 1.382109868740885e-05, | |
| "epoch": 1.31, | |
| "step": 21600 | |
| }, | |
| { | |
| "loss": 0.0604, | |
| "grad_norm": 1.107258677482605, | |
| "learning_rate": 1.3699562469615946e-05, | |
| "epoch": 1.32, | |
| "step": 21700 | |
| }, | |
| { | |
| "loss": 0.0596, | |
| "grad_norm": 1.0990906953811646, | |
| "learning_rate": 1.3578026251823045e-05, | |
| "epoch": 1.32, | |
| "step": 21800 | |
| }, | |
| { | |
| "loss": 0.0611, | |
| "grad_norm": 0.7040949463844299, | |
| "learning_rate": 1.3456490034030143e-05, | |
| "epoch": 1.33, | |
| "step": 21900 | |
| }, | |
| { | |
| "loss": 0.0582, | |
| "grad_norm": 0.7568740248680115, | |
| "learning_rate": 1.333495381623724e-05, | |
| "epoch": 1.33, | |
| "step": 22000 | |
| }, | |
| { | |
| "loss": 0.0595, | |
| "grad_norm": 0.6342681646347046, | |
| "learning_rate": 1.3213417598444338e-05, | |
| "epoch": 1.34, | |
| "step": 22100 | |
| }, | |
| { | |
| "loss": 0.0597, | |
| "grad_norm": 0.7555422186851501, | |
| "learning_rate": 1.3091881380651435e-05, | |
| "epoch": 1.35, | |
| "step": 22200 | |
| }, | |
| { | |
| "loss": 0.0587, | |
| "grad_norm": 0.8620259165763855, | |
| "learning_rate": 1.2970345162858533e-05, | |
| "epoch": 1.35, | |
| "step": 22300 | |
| }, | |
| { | |
| "loss": 0.0586, | |
| "grad_norm": 1.4132779836654663, | |
| "learning_rate": 1.2848808945065632e-05, | |
| "epoch": 1.36, | |
| "step": 22400 | |
| }, | |
| { | |
| "loss": 0.0594, | |
| "grad_norm": 0.9352446794509888, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "epoch": 1.37, | |
| "step": 22500 | |
| }, | |
| { | |
| "loss": 0.0581, | |
| "grad_norm": 0.8808399438858032, | |
| "learning_rate": 1.2605736509479827e-05, | |
| "epoch": 1.37, | |
| "step": 22600 | |
| }, | |
| { | |
| "loss": 0.0603, | |
| "grad_norm": 0.8254494071006775, | |
| "learning_rate": 1.2484200291686924e-05, | |
| "epoch": 1.38, | |
| "step": 22700 | |
| }, | |
| { | |
| "loss": 0.0589, | |
| "grad_norm": 0.9145941138267517, | |
| "learning_rate": 1.2362664073894022e-05, | |
| "epoch": 1.38, | |
| "step": 22800 | |
| }, | |
| { | |
| "loss": 0.0594, | |
| "grad_norm": 1.267179012298584, | |
| "learning_rate": 1.2241127856101119e-05, | |
| "epoch": 1.39, | |
| "step": 22900 | |
| }, | |
| { | |
| "loss": 0.0585, | |
| "grad_norm": 0.9012957215309143, | |
| "learning_rate": 1.2119591638308217e-05, | |
| "epoch": 1.4, | |
| "step": 23000 | |
| }, | |
| { | |
| "loss": 0.0581, | |
| "grad_norm": 1.053276777267456, | |
| "learning_rate": 1.1998055420515315e-05, | |
| "epoch": 1.4, | |
| "step": 23100 | |
| }, | |
| { | |
| "loss": 0.0579, | |
| "grad_norm": 1.031724214553833, | |
| "learning_rate": 1.1876519202722412e-05, | |
| "epoch": 1.41, | |
| "step": 23200 | |
| }, | |
| { | |
| "loss": 0.0574, | |
| "grad_norm": 0.8730105757713318, | |
| "learning_rate": 1.175498298492951e-05, | |
| "epoch": 1.41, | |
| "step": 23300 | |
| }, | |
| { | |
| "loss": 0.0589, | |
| "grad_norm": 0.871724545955658, | |
| "learning_rate": 1.1633446767136607e-05, | |
| "epoch": 1.42, | |
| "step": 23400 | |
| }, | |
| { | |
| "loss": 0.0585, | |
| "grad_norm": 0.9031744599342346, | |
| "learning_rate": 1.1511910549343706e-05, | |
| "epoch": 1.43, | |
| "step": 23500 | |
| }, | |
| { | |
| "loss": 0.0586, | |
| "grad_norm": 0.5891318917274475, | |
| "learning_rate": 1.1390374331550802e-05, | |
| "epoch": 1.43, | |
| "step": 23600 | |
| }, | |
| { | |
| "loss": 0.0584, | |
| "grad_norm": 0.7399836182594299, | |
| "learning_rate": 1.12688381137579e-05, | |
| "epoch": 1.44, | |
| "step": 23700 | |
| }, | |
| { | |
| "loss": 0.0596, | |
| "grad_norm": 0.47165361046791077, | |
| "learning_rate": 1.1147301895964999e-05, | |
| "epoch": 1.44, | |
| "step": 23800 | |
| }, | |
| { | |
| "loss": 0.0588, | |
| "grad_norm": 0.8805158734321594, | |
| "learning_rate": 1.1025765678172096e-05, | |
| "epoch": 1.45, | |
| "step": 23900 | |
| }, | |
| { | |
| "loss": 0.0587, | |
| "grad_norm": 0.6524300575256348, | |
| "learning_rate": 1.0904229460379194e-05, | |
| "epoch": 1.46, | |
| "step": 24000 | |
| }, | |
| { | |
| "loss": 0.0599, | |
| "grad_norm": 0.7314462661743164, | |
| "learning_rate": 1.078269324258629e-05, | |
| "epoch": 1.46, | |
| "step": 24100 | |
| }, | |
| { | |
| "loss": 0.0587, | |
| "grad_norm": 0.7969116568565369, | |
| "learning_rate": 1.0661157024793389e-05, | |
| "epoch": 1.47, | |
| "step": 24200 | |
| }, | |
| { | |
| "loss": 0.0574, | |
| "grad_norm": 0.6548510193824768, | |
| "learning_rate": 1.0539620807000488e-05, | |
| "epoch": 1.47, | |
| "step": 24300 | |
| }, | |
| { | |
| "loss": 0.0601, | |
| "grad_norm": 0.6944112181663513, | |
| "learning_rate": 1.0418084589207584e-05, | |
| "epoch": 1.48, | |
| "step": 24400 | |
| }, | |
| { | |
| "loss": 0.0595, | |
| "grad_norm": 1.0091618299484253, | |
| "learning_rate": 1.0296548371414683e-05, | |
| "epoch": 1.49, | |
| "step": 24500 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.7692497372627258, | |
| "learning_rate": 1.017501215362178e-05, | |
| "epoch": 1.49, | |
| "step": 24600 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 1.2263282537460327, | |
| "learning_rate": 1.0053475935828878e-05, | |
| "epoch": 1.5, | |
| "step": 24700 | |
| }, | |
| { | |
| "loss": 0.058, | |
| "grad_norm": 1.412335753440857, | |
| "learning_rate": 9.931939718035976e-06, | |
| "epoch": 1.5, | |
| "step": 24800 | |
| }, | |
| { | |
| "loss": 0.0584, | |
| "grad_norm": 0.9114163517951965, | |
| "learning_rate": 9.810403500243073e-06, | |
| "epoch": 1.51, | |
| "step": 24900 | |
| }, | |
| { | |
| "loss": 0.0579, | |
| "grad_norm": 0.8343012928962708, | |
| "learning_rate": 9.688867282450171e-06, | |
| "epoch": 1.52, | |
| "step": 25000 | |
| }, | |
| { | |
| "loss": 0.0581, | |
| "grad_norm": 0.7137165665626526, | |
| "learning_rate": 9.567331064657268e-06, | |
| "epoch": 1.52, | |
| "step": 25100 | |
| }, | |
| { | |
| "loss": 0.0572, | |
| "grad_norm": 0.8871126174926758, | |
| "learning_rate": 9.445794846864366e-06, | |
| "epoch": 1.53, | |
| "step": 25200 | |
| }, | |
| { | |
| "loss": 0.0588, | |
| "grad_norm": 1.9913699626922607, | |
| "learning_rate": 9.324258629071465e-06, | |
| "epoch": 1.54, | |
| "step": 25300 | |
| }, | |
| { | |
| "loss": 0.0586, | |
| "grad_norm": 0.702129065990448, | |
| "learning_rate": 9.202722411278561e-06, | |
| "epoch": 1.54, | |
| "step": 25400 | |
| }, | |
| { | |
| "loss": 0.0589, | |
| "grad_norm": 0.759503960609436, | |
| "learning_rate": 9.08118619348566e-06, | |
| "epoch": 1.55, | |
| "step": 25500 | |
| }, | |
| { | |
| "loss": 0.0598, | |
| "grad_norm": 0.7731884717941284, | |
| "learning_rate": 8.959649975692756e-06, | |
| "epoch": 1.55, | |
| "step": 25600 | |
| }, | |
| { | |
| "loss": 0.0574, | |
| "grad_norm": 0.830560028553009, | |
| "learning_rate": 8.838113757899855e-06, | |
| "epoch": 1.56, | |
| "step": 25700 | |
| }, | |
| { | |
| "loss": 0.0561, | |
| "grad_norm": 0.612714946269989, | |
| "learning_rate": 8.716577540106953e-06, | |
| "epoch": 1.57, | |
| "step": 25800 | |
| }, | |
| { | |
| "loss": 0.0583, | |
| "grad_norm": 0.6476453542709351, | |
| "learning_rate": 8.59504132231405e-06, | |
| "epoch": 1.57, | |
| "step": 25900 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.6660561561584473, | |
| "learning_rate": 8.473505104521148e-06, | |
| "epoch": 1.58, | |
| "step": 26000 | |
| }, | |
| { | |
| "loss": 0.0575, | |
| "grad_norm": 0.6638226509094238, | |
| "learning_rate": 8.351968886728245e-06, | |
| "epoch": 1.58, | |
| "step": 26100 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.6452857255935669, | |
| "learning_rate": 8.231648031113272e-06, | |
| "epoch": 1.59, | |
| "step": 26200 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.819333016872406, | |
| "learning_rate": 8.11011181332037e-06, | |
| "epoch": 1.6, | |
| "step": 26300 | |
| }, | |
| { | |
| "loss": 0.0571, | |
| "grad_norm": 1.2114768028259277, | |
| "learning_rate": 7.988575595527467e-06, | |
| "epoch": 1.6, | |
| "step": 26400 | |
| }, | |
| { | |
| "loss": 0.0577, | |
| "grad_norm": 0.7581117153167725, | |
| "learning_rate": 7.867039377734566e-06, | |
| "epoch": 1.61, | |
| "step": 26500 | |
| }, | |
| { | |
| "loss": 0.0575, | |
| "grad_norm": 0.5861278772354126, | |
| "learning_rate": 7.745503159941663e-06, | |
| "epoch": 1.61, | |
| "step": 26600 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.7154746055603027, | |
| "learning_rate": 7.623966942148761e-06, | |
| "epoch": 1.62, | |
| "step": 26700 | |
| }, | |
| { | |
| "loss": 0.0574, | |
| "grad_norm": 1.072407841682434, | |
| "learning_rate": 7.502430724355859e-06, | |
| "epoch": 1.63, | |
| "step": 26800 | |
| }, | |
| { | |
| "loss": 0.0572, | |
| "grad_norm": 0.8198044896125793, | |
| "learning_rate": 7.380894506562957e-06, | |
| "epoch": 1.63, | |
| "step": 26900 | |
| }, | |
| { | |
| "loss": 0.0562, | |
| "grad_norm": 0.7912253141403198, | |
| "learning_rate": 7.259358288770054e-06, | |
| "epoch": 1.64, | |
| "step": 27000 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.9015645980834961, | |
| "learning_rate": 7.137822070977152e-06, | |
| "epoch": 1.64, | |
| "step": 27100 | |
| }, | |
| { | |
| "loss": 0.0551, | |
| "grad_norm": 0.6205886602401733, | |
| "learning_rate": 7.0162858531842495e-06, | |
| "epoch": 1.65, | |
| "step": 27200 | |
| }, | |
| { | |
| "loss": 0.0581, | |
| "grad_norm": 0.8834924697875977, | |
| "learning_rate": 6.894749635391347e-06, | |
| "epoch": 1.66, | |
| "step": 27300 | |
| }, | |
| { | |
| "loss": 0.0565, | |
| "grad_norm": 0.7698688507080078, | |
| "learning_rate": 6.773213417598445e-06, | |
| "epoch": 1.66, | |
| "step": 27400 | |
| }, | |
| { | |
| "loss": 0.0575, | |
| "grad_norm": 0.8447450399398804, | |
| "learning_rate": 6.651677199805543e-06, | |
| "epoch": 1.67, | |
| "step": 27500 | |
| }, | |
| { | |
| "loss": 0.057, | |
| "grad_norm": 1.6002224683761597, | |
| "learning_rate": 6.5301409820126404e-06, | |
| "epoch": 1.67, | |
| "step": 27600 | |
| }, | |
| { | |
| "loss": 0.0558, | |
| "grad_norm": 0.8625892996788025, | |
| "learning_rate": 6.408604764219738e-06, | |
| "epoch": 1.68, | |
| "step": 27700 | |
| }, | |
| { | |
| "loss": 0.0566, | |
| "grad_norm": 0.7483322024345398, | |
| "learning_rate": 6.2870685464268355e-06, | |
| "epoch": 1.69, | |
| "step": 27800 | |
| }, | |
| { | |
| "loss": 0.0571, | |
| "grad_norm": 0.781535804271698, | |
| "learning_rate": 6.165532328633933e-06, | |
| "epoch": 1.69, | |
| "step": 27900 | |
| }, | |
| { | |
| "loss": 0.0563, | |
| "grad_norm": 0.8761783838272095, | |
| "learning_rate": 6.0439961108410314e-06, | |
| "epoch": 1.7, | |
| "step": 28000 | |
| }, | |
| { | |
| "loss": 0.0565, | |
| "grad_norm": 0.5183244943618774, | |
| "learning_rate": 5.922459893048129e-06, | |
| "epoch": 1.7, | |
| "step": 28100 | |
| }, | |
| { | |
| "loss": 0.0564, | |
| "grad_norm": 0.7939796447753906, | |
| "learning_rate": 5.8009236752552265e-06, | |
| "epoch": 1.71, | |
| "step": 28200 | |
| }, | |
| { | |
| "loss": 0.0576, | |
| "grad_norm": 0.7260966300964355, | |
| "learning_rate": 5.679387457462324e-06, | |
| "epoch": 1.72, | |
| "step": 28300 | |
| }, | |
| { | |
| "loss": 0.0569, | |
| "grad_norm": 0.9087544083595276, | |
| "learning_rate": 5.557851239669422e-06, | |
| "epoch": 1.72, | |
| "step": 28400 | |
| }, | |
| { | |
| "loss": 0.056, | |
| "grad_norm": 0.7275218367576599, | |
| "learning_rate": 5.436315021876519e-06, | |
| "epoch": 1.73, | |
| "step": 28500 | |
| }, | |
| { | |
| "loss": 0.0563, | |
| "grad_norm": 0.5983753800392151, | |
| "learning_rate": 5.315994166261547e-06, | |
| "epoch": 1.74, | |
| "step": 28600 | |
| }, | |
| { | |
| "loss": 0.0564, | |
| "grad_norm": 0.912756085395813, | |
| "learning_rate": 5.194457948468644e-06, | |
| "epoch": 1.74, | |
| "step": 28700 | |
| }, | |
| { | |
| "loss": 0.0555, | |
| "grad_norm": 0.6085710525512695, | |
| "learning_rate": 5.072921730675742e-06, | |
| "epoch": 1.75, | |
| "step": 28800 | |
| }, | |
| { | |
| "loss": 0.0571, | |
| "grad_norm": 0.6775307655334473, | |
| "learning_rate": 4.95138551288284e-06, | |
| "epoch": 1.75, | |
| "step": 28900 | |
| }, | |
| { | |
| "loss": 0.0543, | |
| "grad_norm": 0.7438898682594299, | |
| "learning_rate": 4.829849295089938e-06, | |
| "epoch": 1.76, | |
| "step": 29000 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.719668984413147, | |
| "learning_rate": 4.708313077297035e-06, | |
| "epoch": 1.77, | |
| "step": 29100 | |
| }, | |
| { | |
| "loss": 0.0565, | |
| "grad_norm": 0.8647979497909546, | |
| "learning_rate": 4.586776859504133e-06, | |
| "epoch": 1.77, | |
| "step": 29200 | |
| }, | |
| { | |
| "loss": 0.057, | |
| "grad_norm": 0.8238335847854614, | |
| "learning_rate": 4.46524064171123e-06, | |
| "epoch": 1.78, | |
| "step": 29300 | |
| }, | |
| { | |
| "loss": 0.0563, | |
| "grad_norm": 3.2504589557647705, | |
| "learning_rate": 4.343704423918328e-06, | |
| "epoch": 1.78, | |
| "step": 29400 | |
| }, | |
| { | |
| "loss": 0.0536, | |
| "grad_norm": 0.7106683850288391, | |
| "learning_rate": 4.222168206125426e-06, | |
| "epoch": 1.79, | |
| "step": 29500 | |
| }, | |
| { | |
| "loss": 0.056, | |
| "grad_norm": 0.9477577209472656, | |
| "learning_rate": 4.100631988332524e-06, | |
| "epoch": 1.8, | |
| "step": 29600 | |
| }, | |
| { | |
| "loss": 0.0562, | |
| "grad_norm": 0.8888897895812988, | |
| "learning_rate": 3.979095770539621e-06, | |
| "epoch": 1.8, | |
| "step": 29700 | |
| }, | |
| { | |
| "loss": 0.0562, | |
| "grad_norm": 0.7125309705734253, | |
| "learning_rate": 3.857559552746719e-06, | |
| "epoch": 1.81, | |
| "step": 29800 | |
| }, | |
| { | |
| "loss": 0.0552, | |
| "grad_norm": 0.7241693139076233, | |
| "learning_rate": 3.7360233349538167e-06, | |
| "epoch": 1.81, | |
| "step": 29900 | |
| }, | |
| { | |
| "loss": 0.0556, | |
| "grad_norm": 0.9381842613220215, | |
| "learning_rate": 3.6144871171609143e-06, | |
| "epoch": 1.82, | |
| "step": 30000 | |
| }, | |
| { | |
| "loss": 0.0551, | |
| "grad_norm": 0.6808192133903503, | |
| "learning_rate": 3.492950899368012e-06, | |
| "epoch": 1.83, | |
| "step": 30100 | |
| }, | |
| { | |
| "loss": 0.0561, | |
| "grad_norm": 0.6042631268501282, | |
| "learning_rate": 3.3714146815751098e-06, | |
| "epoch": 1.83, | |
| "step": 30200 | |
| }, | |
| { | |
| "loss": 0.0553, | |
| "grad_norm": 0.5585273504257202, | |
| "learning_rate": 3.2498784637822073e-06, | |
| "epoch": 1.84, | |
| "step": 30300 | |
| }, | |
| { | |
| "loss": 0.0545, | |
| "grad_norm": 0.9048868417739868, | |
| "learning_rate": 3.128342245989305e-06, | |
| "epoch": 1.84, | |
| "step": 30400 | |
| }, | |
| { | |
| "loss": 0.0557, | |
| "grad_norm": 0.8429957628250122, | |
| "learning_rate": 3.006806028196403e-06, | |
| "epoch": 1.85, | |
| "step": 30500 | |
| }, | |
| { | |
| "loss": 0.0563, | |
| "grad_norm": 0.7962875962257385, | |
| "learning_rate": 2.8852698104035003e-06, | |
| "epoch": 1.86, | |
| "step": 30600 | |
| }, | |
| { | |
| "loss": 0.0559, | |
| "grad_norm": 0.7854676246643066, | |
| "learning_rate": 2.763733592610598e-06, | |
| "epoch": 1.86, | |
| "step": 30700 | |
| }, | |
| { | |
| "loss": 0.0561, | |
| "grad_norm": 1.694869041442871, | |
| "learning_rate": 2.642197374817696e-06, | |
| "epoch": 1.87, | |
| "step": 30800 | |
| }, | |
| { | |
| "loss": 0.0568, | |
| "grad_norm": 0.6683087944984436, | |
| "learning_rate": 2.5206611570247934e-06, | |
| "epoch": 1.87, | |
| "step": 30900 | |
| }, | |
| { | |
| "loss": 0.0548, | |
| "grad_norm": 0.5675504803657532, | |
| "learning_rate": 2.3991249392318913e-06, | |
| "epoch": 1.88, | |
| "step": 31000 | |
| }, | |
| { | |
| "loss": 0.0552, | |
| "grad_norm": 0.9730797410011292, | |
| "learning_rate": 2.2775887214389893e-06, | |
| "epoch": 1.89, | |
| "step": 31100 | |
| }, | |
| { | |
| "loss": 0.0568, | |
| "grad_norm": 0.8015105128288269, | |
| "learning_rate": 2.156052503646087e-06, | |
| "epoch": 1.89, | |
| "step": 31200 | |
| }, | |
| { | |
| "loss": 0.0552, | |
| "grad_norm": 0.5437925457954407, | |
| "learning_rate": 2.0345162858531844e-06, | |
| "epoch": 1.9, | |
| "step": 31300 | |
| }, | |
| { | |
| "loss": 0.0558, | |
| "grad_norm": 0.8105918765068054, | |
| "learning_rate": 1.9129800680602823e-06, | |
| "epoch": 1.91, | |
| "step": 31400 | |
| }, | |
| { | |
| "loss": 0.0567, | |
| "grad_norm": 0.8699814677238464, | |
| "learning_rate": 1.7914438502673799e-06, | |
| "epoch": 1.91, | |
| "step": 31500 | |
| }, | |
| { | |
| "loss": 0.0556, | |
| "grad_norm": 0.542261004447937, | |
| "learning_rate": 1.6699076324744776e-06, | |
| "epoch": 1.92, | |
| "step": 31600 | |
| }, | |
| { | |
| "loss": 0.0553, | |
| "grad_norm": 0.6852170825004578, | |
| "learning_rate": 1.5483714146815754e-06, | |
| "epoch": 1.92, | |
| "step": 31700 | |
| }, | |
| { | |
| "loss": 0.0559, | |
| "grad_norm": 0.8324136137962341, | |
| "learning_rate": 1.426835196888673e-06, | |
| "epoch": 1.93, | |
| "step": 31800 | |
| }, | |
| { | |
| "loss": 0.0539, | |
| "grad_norm": 0.5395376086235046, | |
| "learning_rate": 1.3052989790957707e-06, | |
| "epoch": 1.94, | |
| "step": 31900 | |
| }, | |
| { | |
| "loss": 0.0557, | |
| "grad_norm": 1.0665556192398071, | |
| "learning_rate": 1.1837627613028684e-06, | |
| "epoch": 1.94, | |
| "step": 32000 | |
| }, | |
| { | |
| "loss": 0.0556, | |
| "grad_norm": 0.5730076432228088, | |
| "learning_rate": 1.062226543509966e-06, | |
| "epoch": 1.95, | |
| "step": 32100 | |
| }, | |
| { | |
| "loss": 0.0566, | |
| "grad_norm": 0.8526155352592468, | |
| "learning_rate": 9.406903257170638e-07, | |
| "epoch": 1.95, | |
| "step": 32200 | |
| }, | |
| { | |
| "loss": 0.0554, | |
| "grad_norm": 0.47227638959884644, | |
| "learning_rate": 8.191541079241614e-07, | |
| "epoch": 1.96, | |
| "step": 32300 | |
| }, | |
| { | |
| "loss": 0.0559, | |
| "grad_norm": 0.5771980881690979, | |
| "learning_rate": 6.976178901312592e-07, | |
| "epoch": 1.97, | |
| "step": 32400 | |
| }, | |
| { | |
| "loss": 0.0553, | |
| "grad_norm": 0.7183811068534851, | |
| "learning_rate": 5.772970345162859e-07, | |
| "epoch": 1.97, | |
| "step": 32500 | |
| }, | |
| { | |
| "loss": 0.0556, | |
| "grad_norm": 0.7808952927589417, | |
| "learning_rate": 4.557608167233836e-07, | |
| "epoch": 1.98, | |
| "step": 32600 | |
| }, | |
| { | |
| "loss": 0.0549, | |
| "grad_norm": 0.7201197743415833, | |
| "learning_rate": 3.3422459893048135e-07, | |
| "epoch": 1.98, | |
| "step": 32700 | |
| }, | |
| { | |
| "loss": 0.0546, | |
| "grad_norm": 0.822515606880188, | |
| "learning_rate": 2.1268838113757902e-07, | |
| "epoch": 1.99, | |
| "step": 32800 | |
| }, | |
| { | |
| "loss": 0.0556, | |
| "grad_norm": 0.6968460083007812, | |
| "learning_rate": 9.115216334467672e-08, | |
| "epoch": 2.0, | |
| "step": 32900 | |
| }, | |
| { | |
| "eval_loss": 0.06514331698417664, | |
| "eval_f1": 0.9055283859012663, | |
| "eval_precision": 0.9128121708644065, | |
| "eval_recall": 0.898553824781504, | |
| "eval_accuracy": 0.9750088848296079, | |
| "eval_runtime": 304.326, | |
| "eval_samples_per_second": 86.841, | |
| "eval_steps_per_second": 10.857, | |
| "epoch": 2.0, | |
| "step": 32962 | |
| }, | |
| { | |
| "train_runtime": 12949.9436, | |
| "train_samples_per_second": 20.363, | |
| "train_steps_per_second": 2.545, | |
| "total_flos": 2.448996403000443e+17, | |
| "train_loss": 0.07225221031304233, | |
| "epoch": 2.0, | |
| "step": 32962 | |
| } | |
| ] | |
| } | |
| } | |
| } |