| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 15948, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009405568096313018, | |
| "grad_norm": 4.406911849975586, | |
| "learning_rate": 1.9938550288437425e-05, | |
| "loss": 0.6174, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018811136192626036, | |
| "grad_norm": 6.521998405456543, | |
| "learning_rate": 1.987584650112867e-05, | |
| "loss": 0.398, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.028216704288939052, | |
| "grad_norm": 4.9869771003723145, | |
| "learning_rate": 1.9813142713819916e-05, | |
| "loss": 0.3699, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03762227238525207, | |
| "grad_norm": 4.50770378112793, | |
| "learning_rate": 1.975043892651116e-05, | |
| "loss": 0.3522, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04702784048156509, | |
| "grad_norm": 4.725740909576416, | |
| "learning_rate": 1.9687735139202408e-05, | |
| "loss": 0.3176, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.056433408577878104, | |
| "grad_norm": 2.476680040359497, | |
| "learning_rate": 1.9625031351893655e-05, | |
| "loss": 0.3091, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06583897667419113, | |
| "grad_norm": 5.042876720428467, | |
| "learning_rate": 1.9562327564584903e-05, | |
| "loss": 0.3506, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07524454477050414, | |
| "grad_norm": 7.000358581542969, | |
| "learning_rate": 1.949962377727615e-05, | |
| "loss": 0.3493, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08465011286681716, | |
| "grad_norm": 9.695847511291504, | |
| "learning_rate": 1.9436919989967394e-05, | |
| "loss": 0.2922, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09405568096313018, | |
| "grad_norm": 5.6148552894592285, | |
| "learning_rate": 1.9374216202658642e-05, | |
| "loss": 0.3225, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10346124905944319, | |
| "grad_norm": 3.7483432292938232, | |
| "learning_rate": 1.931151241534989e-05, | |
| "loss": 0.3329, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.11286681715575621, | |
| "grad_norm": 3.2282767295837402, | |
| "learning_rate": 1.9248808628041137e-05, | |
| "loss": 0.3107, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12227238525206922, | |
| "grad_norm": 9.40439224243164, | |
| "learning_rate": 1.918610484073238e-05, | |
| "loss": 0.3059, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13167795334838225, | |
| "grad_norm": 2.8919079303741455, | |
| "learning_rate": 1.912340105342363e-05, | |
| "loss": 0.2929, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14108352144469527, | |
| "grad_norm": 3.744126558303833, | |
| "learning_rate": 1.9060697266114876e-05, | |
| "loss": 0.3426, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1504890895410083, | |
| "grad_norm": 2.0327718257904053, | |
| "learning_rate": 1.899799347880612e-05, | |
| "loss": 0.3259, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1598946576373213, | |
| "grad_norm": 3.749131679534912, | |
| "learning_rate": 1.8935289691497367e-05, | |
| "loss": 0.2781, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16930022573363432, | |
| "grad_norm": 2.2363057136535645, | |
| "learning_rate": 1.8872585904188615e-05, | |
| "loss": 0.3797, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17870579382994733, | |
| "grad_norm": 8.247345924377441, | |
| "learning_rate": 1.8809882116879862e-05, | |
| "loss": 0.3156, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18811136192626035, | |
| "grad_norm": 4.1785454750061035, | |
| "learning_rate": 1.8747178329571106e-05, | |
| "loss": 0.2728, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19751693002257337, | |
| "grad_norm": 2.009939670562744, | |
| "learning_rate": 1.8684474542262354e-05, | |
| "loss": 0.2674, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.20692249811888638, | |
| "grad_norm": 14.008905410766602, | |
| "learning_rate": 1.86217707549536e-05, | |
| "loss": 0.3228, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2163280662151994, | |
| "grad_norm": 7.390902042388916, | |
| "learning_rate": 1.855906696764485e-05, | |
| "loss": 0.2759, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22573363431151242, | |
| "grad_norm": 5.746609210968018, | |
| "learning_rate": 1.8496363180336093e-05, | |
| "loss": 0.257, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.23513920240782543, | |
| "grad_norm": 5.413491725921631, | |
| "learning_rate": 1.843365939302734e-05, | |
| "loss": 0.2821, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.24454477050413845, | |
| "grad_norm": 17.94203758239746, | |
| "learning_rate": 1.8370955605718588e-05, | |
| "loss": 0.2749, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.25395033860045146, | |
| "grad_norm": 4.912784099578857, | |
| "learning_rate": 1.8308251818409832e-05, | |
| "loss": 0.2733, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2633559066967645, | |
| "grad_norm": 3.2884740829467773, | |
| "learning_rate": 1.824554803110108e-05, | |
| "loss": 0.2873, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2727614747930775, | |
| "grad_norm": 3.9251766204833984, | |
| "learning_rate": 1.8182844243792327e-05, | |
| "loss": 0.2802, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.28216704288939054, | |
| "grad_norm": 1.8012003898620605, | |
| "learning_rate": 1.8120140456483574e-05, | |
| "loss": 0.2765, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29157261098570353, | |
| "grad_norm": 3.162705183029175, | |
| "learning_rate": 1.805743666917482e-05, | |
| "loss": 0.2761, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.3009781790820166, | |
| "grad_norm": 2.2068610191345215, | |
| "learning_rate": 1.7994732881866066e-05, | |
| "loss": 0.263, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.31038374717832956, | |
| "grad_norm": 2.723480224609375, | |
| "learning_rate": 1.7932029094557313e-05, | |
| "loss": 0.2496, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3197893152746426, | |
| "grad_norm": 2.9920785427093506, | |
| "learning_rate": 1.786932530724856e-05, | |
| "loss": 0.2955, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3291948833709556, | |
| "grad_norm": 4.665702819824219, | |
| "learning_rate": 1.7806621519939805e-05, | |
| "loss": 0.306, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.33860045146726864, | |
| "grad_norm": 1.996135950088501, | |
| "learning_rate": 1.7743917732631052e-05, | |
| "loss": 0.329, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3480060195635816, | |
| "grad_norm": 2.1975622177124023, | |
| "learning_rate": 1.76812139453223e-05, | |
| "loss": 0.2896, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.35741158765989467, | |
| "grad_norm": 23.333040237426758, | |
| "learning_rate": 1.7618510158013547e-05, | |
| "loss": 0.2878, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.36681715575620766, | |
| "grad_norm": 1.7410361766815186, | |
| "learning_rate": 1.755580637070479e-05, | |
| "loss": 0.2731, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3762227238525207, | |
| "grad_norm": 5.49874210357666, | |
| "learning_rate": 1.749310258339604e-05, | |
| "loss": 0.3071, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3856282919488337, | |
| "grad_norm": 2.9172000885009766, | |
| "learning_rate": 1.7430398796087283e-05, | |
| "loss": 0.2777, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.39503386004514673, | |
| "grad_norm": 2.531278371810913, | |
| "learning_rate": 1.736769500877853e-05, | |
| "loss": 0.2683, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4044394281414597, | |
| "grad_norm": 3.2860658168792725, | |
| "learning_rate": 1.7304991221469778e-05, | |
| "loss": 0.2599, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.41384499623777277, | |
| "grad_norm": 1.781692624092102, | |
| "learning_rate": 1.7242287434161025e-05, | |
| "loss": 0.2867, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.42325056433408575, | |
| "grad_norm": 2.29233717918396, | |
| "learning_rate": 1.7179583646852273e-05, | |
| "loss": 0.2744, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4326561324303988, | |
| "grad_norm": 2.741166591644287, | |
| "learning_rate": 1.7116879859543517e-05, | |
| "loss": 0.2595, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4420617005267118, | |
| "grad_norm": 5.684919834136963, | |
| "learning_rate": 1.7054176072234764e-05, | |
| "loss": 0.3033, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.45146726862302483, | |
| "grad_norm": 2.437774181365967, | |
| "learning_rate": 1.699147228492601e-05, | |
| "loss": 0.2934, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4608728367193379, | |
| "grad_norm": 6.011141300201416, | |
| "learning_rate": 1.692876849761726e-05, | |
| "loss": 0.2793, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.47027840481565086, | |
| "grad_norm": 24.469600677490234, | |
| "learning_rate": 1.6866064710308507e-05, | |
| "loss": 0.2492, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4796839729119639, | |
| "grad_norm": 4.883657455444336, | |
| "learning_rate": 1.680336092299975e-05, | |
| "loss": 0.2216, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4890895410082769, | |
| "grad_norm": 2.0113911628723145, | |
| "learning_rate": 1.6740657135690995e-05, | |
| "loss": 0.2999, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.49849510910458994, | |
| "grad_norm": 2.1354928016662598, | |
| "learning_rate": 1.6677953348382242e-05, | |
| "loss": 0.2635, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5079006772009029, | |
| "grad_norm": 3.987088918685913, | |
| "learning_rate": 1.661524956107349e-05, | |
| "loss": 0.2405, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5173062452972159, | |
| "grad_norm": 4.9606709480285645, | |
| "learning_rate": 1.6552545773764737e-05, | |
| "loss": 0.239, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.526711813393529, | |
| "grad_norm": 1.6401499509811401, | |
| "learning_rate": 1.6489841986455985e-05, | |
| "loss": 0.3246, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.536117381489842, | |
| "grad_norm": 5.161315441131592, | |
| "learning_rate": 1.642713819914723e-05, | |
| "loss": 0.2625, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.545522949586155, | |
| "grad_norm": 1.054700255393982, | |
| "learning_rate": 1.6364434411838476e-05, | |
| "loss": 0.2741, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.554928517682468, | |
| "grad_norm": 2.2569172382354736, | |
| "learning_rate": 1.6301730624529724e-05, | |
| "loss": 0.2622, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5643340857787811, | |
| "grad_norm": 11.479528427124023, | |
| "learning_rate": 1.623902683722097e-05, | |
| "loss": 0.2235, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5737396538750941, | |
| "grad_norm": 2.314810276031494, | |
| "learning_rate": 1.617632304991222e-05, | |
| "loss": 0.284, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5831452219714071, | |
| "grad_norm": 2.623328924179077, | |
| "learning_rate": 1.6113619262603463e-05, | |
| "loss": 0.2951, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.59255079006772, | |
| "grad_norm": 6.059717655181885, | |
| "learning_rate": 1.6050915475294707e-05, | |
| "loss": 0.2613, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6019563581640331, | |
| "grad_norm": 1.6962251663208008, | |
| "learning_rate": 1.5988211687985954e-05, | |
| "loss": 0.2676, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6113619262603461, | |
| "grad_norm": 6.8796586990356445, | |
| "learning_rate": 1.59255079006772e-05, | |
| "loss": 0.2314, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6207674943566591, | |
| "grad_norm": 5.26965856552124, | |
| "learning_rate": 1.586280411336845e-05, | |
| "loss": 0.2655, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6301730624529721, | |
| "grad_norm": 2.5264058113098145, | |
| "learning_rate": 1.5800100326059697e-05, | |
| "loss": 0.2399, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6395786305492852, | |
| "grad_norm": 7.36959171295166, | |
| "learning_rate": 1.573739653875094e-05, | |
| "loss": 0.2645, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6489841986455982, | |
| "grad_norm": 6.5851874351501465, | |
| "learning_rate": 1.5674692751442188e-05, | |
| "loss": 0.2544, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6583897667419112, | |
| "grad_norm": 1.4328551292419434, | |
| "learning_rate": 1.5611988964133436e-05, | |
| "loss": 0.2385, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6677953348382242, | |
| "grad_norm": 1.747718095779419, | |
| "learning_rate": 1.5549285176824683e-05, | |
| "loss": 0.2371, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6772009029345373, | |
| "grad_norm": 5.544091701507568, | |
| "learning_rate": 1.548658138951593e-05, | |
| "loss": 0.2347, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6866064710308503, | |
| "grad_norm": 2.0724775791168213, | |
| "learning_rate": 1.5423877602207175e-05, | |
| "loss": 0.2817, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6960120391271633, | |
| "grad_norm": 3.6699838638305664, | |
| "learning_rate": 1.536117381489842e-05, | |
| "loss": 0.282, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7054176072234764, | |
| "grad_norm": 2.081963539123535, | |
| "learning_rate": 1.5298470027589666e-05, | |
| "loss": 0.3104, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7148231753197893, | |
| "grad_norm": 2.8969521522521973, | |
| "learning_rate": 1.5235766240280914e-05, | |
| "loss": 0.2473, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7242287434161023, | |
| "grad_norm": 2.79297137260437, | |
| "learning_rate": 1.5173062452972161e-05, | |
| "loss": 0.2199, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7336343115124153, | |
| "grad_norm": 1.689758062362671, | |
| "learning_rate": 1.5110358665663407e-05, | |
| "loss": 0.2487, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7430398796087284, | |
| "grad_norm": 2.0919642448425293, | |
| "learning_rate": 1.5047654878354654e-05, | |
| "loss": 0.2558, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7524454477050414, | |
| "grad_norm": 8.588711738586426, | |
| "learning_rate": 1.49849510910459e-05, | |
| "loss": 0.2798, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7618510158013544, | |
| "grad_norm": 2.519028425216675, | |
| "learning_rate": 1.4922247303737148e-05, | |
| "loss": 0.2862, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7712565838976674, | |
| "grad_norm": 5.890926837921143, | |
| "learning_rate": 1.4859543516428393e-05, | |
| "loss": 0.2243, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.7806621519939805, | |
| "grad_norm": 2.727581262588501, | |
| "learning_rate": 1.479683972911964e-05, | |
| "loss": 0.2578, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.7900677200902935, | |
| "grad_norm": 3.8163578510284424, | |
| "learning_rate": 1.4734135941810888e-05, | |
| "loss": 0.2844, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7994732881866065, | |
| "grad_norm": 3.923978567123413, | |
| "learning_rate": 1.4671432154502132e-05, | |
| "loss": 0.2344, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8088788562829194, | |
| "grad_norm": 1.962684154510498, | |
| "learning_rate": 1.4608728367193378e-05, | |
| "loss": 0.2366, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8182844243792325, | |
| "grad_norm": 4.875962734222412, | |
| "learning_rate": 1.4546024579884626e-05, | |
| "loss": 0.266, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8276899924755455, | |
| "grad_norm": 3.953382730484009, | |
| "learning_rate": 1.4483320792575873e-05, | |
| "loss": 0.2949, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8370955605718585, | |
| "grad_norm": 4.6789870262146, | |
| "learning_rate": 1.4420617005267119e-05, | |
| "loss": 0.2429, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8465011286681715, | |
| "grad_norm": 1.2689917087554932, | |
| "learning_rate": 1.4357913217958366e-05, | |
| "loss": 0.2278, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8559066967644846, | |
| "grad_norm": 1.1619006395339966, | |
| "learning_rate": 1.4295209430649612e-05, | |
| "loss": 0.2125, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.8653122648607976, | |
| "grad_norm": 2.665306329727173, | |
| "learning_rate": 1.423250564334086e-05, | |
| "loss": 0.2627, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8747178329571106, | |
| "grad_norm": 2.67232084274292, | |
| "learning_rate": 1.4169801856032105e-05, | |
| "loss": 0.2747, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.8841234010534236, | |
| "grad_norm": 1.467073678970337, | |
| "learning_rate": 1.4107098068723353e-05, | |
| "loss": 0.2261, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.8935289691497367, | |
| "grad_norm": 7.690640449523926, | |
| "learning_rate": 1.40443942814146e-05, | |
| "loss": 0.2209, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9029345372460497, | |
| "grad_norm": 2.4795353412628174, | |
| "learning_rate": 1.3981690494105846e-05, | |
| "loss": 0.247, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9123401053423627, | |
| "grad_norm": 2.3668243885040283, | |
| "learning_rate": 1.391898670679709e-05, | |
| "loss": 0.2378, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9217456734386757, | |
| "grad_norm": 2.6995575428009033, | |
| "learning_rate": 1.3856282919488338e-05, | |
| "loss": 0.2156, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9311512415349887, | |
| "grad_norm": 3.877608299255371, | |
| "learning_rate": 1.3793579132179585e-05, | |
| "loss": 0.2434, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9405568096313017, | |
| "grad_norm": 5.548897743225098, | |
| "learning_rate": 1.373087534487083e-05, | |
| "loss": 0.2571, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9499623777276147, | |
| "grad_norm": 1.6183255910873413, | |
| "learning_rate": 1.3668171557562078e-05, | |
| "loss": 0.2118, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9593679458239278, | |
| "grad_norm": 8.709449768066406, | |
| "learning_rate": 1.3605467770253324e-05, | |
| "loss": 0.2548, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9687735139202408, | |
| "grad_norm": 1.0707285404205322, | |
| "learning_rate": 1.3542763982944572e-05, | |
| "loss": 0.1974, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.9781790820165538, | |
| "grad_norm": 2.0646872520446777, | |
| "learning_rate": 1.3480060195635817e-05, | |
| "loss": 0.2524, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.9875846501128668, | |
| "grad_norm": 2.3454248905181885, | |
| "learning_rate": 1.3417356408327065e-05, | |
| "loss": 0.2698, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.9969902182091799, | |
| "grad_norm": 3.7309887409210205, | |
| "learning_rate": 1.3354652621018312e-05, | |
| "loss": 0.2508, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0063957863054929, | |
| "grad_norm": 10.15404987335205, | |
| "learning_rate": 1.3291948833709558e-05, | |
| "loss": 0.2409, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.0158013544018059, | |
| "grad_norm": 2.018286943435669, | |
| "learning_rate": 1.3229245046400802e-05, | |
| "loss": 0.2634, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.0252069224981188, | |
| "grad_norm": 1.0378094911575317, | |
| "learning_rate": 1.316654125909205e-05, | |
| "loss": 0.2317, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.0346124905944318, | |
| "grad_norm": 2.633552074432373, | |
| "learning_rate": 1.3103837471783295e-05, | |
| "loss": 0.2087, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0440180586907448, | |
| "grad_norm": 2.9494006633758545, | |
| "learning_rate": 1.3041133684474543e-05, | |
| "loss": 0.2294, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.053423626787058, | |
| "grad_norm": 1.539960265159607, | |
| "learning_rate": 1.297842989716579e-05, | |
| "loss": 0.1995, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.062829194883371, | |
| "grad_norm": 1.6446877717971802, | |
| "learning_rate": 1.2915726109857036e-05, | |
| "loss": 0.2609, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.072234762979684, | |
| "grad_norm": 0.5871282815933228, | |
| "learning_rate": 1.2853022322548283e-05, | |
| "loss": 0.2095, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.081640331075997, | |
| "grad_norm": 3.3624796867370605, | |
| "learning_rate": 1.279031853523953e-05, | |
| "loss": 0.2342, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.09104589917231, | |
| "grad_norm": 6.334434509277344, | |
| "learning_rate": 1.2727614747930777e-05, | |
| "loss": 0.2143, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.100451467268623, | |
| "grad_norm": 3.2644360065460205, | |
| "learning_rate": 1.2664910960622022e-05, | |
| "loss": 0.2094, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.109857035364936, | |
| "grad_norm": 2.850273847579956, | |
| "learning_rate": 1.260220717331327e-05, | |
| "loss": 0.2012, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.119262603461249, | |
| "grad_norm": 6.344181537628174, | |
| "learning_rate": 1.2539503386004517e-05, | |
| "loss": 0.1993, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.1286681715575622, | |
| "grad_norm": 3.1634130477905273, | |
| "learning_rate": 1.2476799598695761e-05, | |
| "loss": 0.2035, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1380737396538751, | |
| "grad_norm": 1.3129241466522217, | |
| "learning_rate": 1.2414095811387007e-05, | |
| "loss": 0.2632, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.1474793077501881, | |
| "grad_norm": 1.7623401880264282, | |
| "learning_rate": 1.2351392024078255e-05, | |
| "loss": 0.1882, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.1568848758465011, | |
| "grad_norm": 1.544403076171875, | |
| "learning_rate": 1.2288688236769502e-05, | |
| "loss": 0.2814, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.1662904439428141, | |
| "grad_norm": 2.739286184310913, | |
| "learning_rate": 1.2225984449460748e-05, | |
| "loss": 0.1824, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.175696012039127, | |
| "grad_norm": 6.419041633605957, | |
| "learning_rate": 1.2163280662151995e-05, | |
| "loss": 0.2174, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.18510158013544, | |
| "grad_norm": 2.975383996963501, | |
| "learning_rate": 1.2100576874843241e-05, | |
| "loss": 0.2511, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.1945071482317533, | |
| "grad_norm": 2.4400739669799805, | |
| "learning_rate": 1.2037873087534489e-05, | |
| "loss": 0.2021, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.2039127163280663, | |
| "grad_norm": 3.1182546615600586, | |
| "learning_rate": 1.1975169300225734e-05, | |
| "loss": 0.2323, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2133182844243793, | |
| "grad_norm": 1.4824222326278687, | |
| "learning_rate": 1.1912465512916982e-05, | |
| "loss": 0.2289, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.2227238525206923, | |
| "grad_norm": 5.336580753326416, | |
| "learning_rate": 1.184976172560823e-05, | |
| "loss": 0.1726, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2321294206170053, | |
| "grad_norm": 14.752867698669434, | |
| "learning_rate": 1.1787057938299473e-05, | |
| "loss": 0.253, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.2415349887133182, | |
| "grad_norm": 1.371951699256897, | |
| "learning_rate": 1.172435415099072e-05, | |
| "loss": 0.1525, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.2509405568096312, | |
| "grad_norm": 2.216179847717285, | |
| "learning_rate": 1.1661650363681967e-05, | |
| "loss": 0.2001, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.2603461249059444, | |
| "grad_norm": 6.2752299308776855, | |
| "learning_rate": 1.1598946576373214e-05, | |
| "loss": 0.2261, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.2697516930022572, | |
| "grad_norm": 3.347257137298584, | |
| "learning_rate": 1.153624278906446e-05, | |
| "loss": 0.2291, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.2791572610985704, | |
| "grad_norm": 8.093568801879883, | |
| "learning_rate": 1.1473539001755707e-05, | |
| "loss": 0.1976, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.2885628291948834, | |
| "grad_norm": 1.470790147781372, | |
| "learning_rate": 1.1410835214446953e-05, | |
| "loss": 0.1928, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.2979683972911964, | |
| "grad_norm": 3.1566500663757324, | |
| "learning_rate": 1.13481314271382e-05, | |
| "loss": 0.2028, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.3073739653875094, | |
| "grad_norm": 9.452258110046387, | |
| "learning_rate": 1.1285427639829446e-05, | |
| "loss": 0.213, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.3167795334838224, | |
| "grad_norm": 7.935844898223877, | |
| "learning_rate": 1.1222723852520694e-05, | |
| "loss": 0.193, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3261851015801354, | |
| "grad_norm": 1.4266091585159302, | |
| "learning_rate": 1.1160020065211941e-05, | |
| "loss": 0.1707, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.3355906696764483, | |
| "grad_norm": 11.033124923706055, | |
| "learning_rate": 1.1097316277903187e-05, | |
| "loss": 0.1936, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.3449962377727616, | |
| "grad_norm": 1.1958593130111694, | |
| "learning_rate": 1.1034612490594431e-05, | |
| "loss": 0.196, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.3544018058690745, | |
| "grad_norm": 1.222621202468872, | |
| "learning_rate": 1.0971908703285679e-05, | |
| "loss": 0.2003, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.3638073739653875, | |
| "grad_norm": 2.297128200531006, | |
| "learning_rate": 1.0909204915976926e-05, | |
| "loss": 0.1994, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.3732129420617005, | |
| "grad_norm": 4.549992561340332, | |
| "learning_rate": 1.0846501128668172e-05, | |
| "loss": 0.1951, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.3826185101580135, | |
| "grad_norm": 2.43581223487854, | |
| "learning_rate": 1.078379734135942e-05, | |
| "loss": 0.2665, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.3920240782543265, | |
| "grad_norm": 2.75065016746521, | |
| "learning_rate": 1.0721093554050665e-05, | |
| "loss": 0.2465, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.4014296463506395, | |
| "grad_norm": 5.422140121459961, | |
| "learning_rate": 1.0658389766741913e-05, | |
| "loss": 0.1747, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.4108352144469527, | |
| "grad_norm": 0.8706988096237183, | |
| "learning_rate": 1.0595685979433158e-05, | |
| "loss": 0.2454, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4202407825432657, | |
| "grad_norm": 1.9640963077545166, | |
| "learning_rate": 1.0532982192124406e-05, | |
| "loss": 0.2293, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.4296463506395787, | |
| "grad_norm": 2.4464077949523926, | |
| "learning_rate": 1.0470278404815653e-05, | |
| "loss": 0.2183, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.4390519187358917, | |
| "grad_norm": 1.8322765827178955, | |
| "learning_rate": 1.0407574617506899e-05, | |
| "loss": 0.1933, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.4484574868322047, | |
| "grad_norm": 1.6448564529418945, | |
| "learning_rate": 1.0344870830198143e-05, | |
| "loss": 0.2198, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.4578630549285176, | |
| "grad_norm": 1.1031991243362427, | |
| "learning_rate": 1.028216704288939e-05, | |
| "loss": 0.2223, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.4672686230248306, | |
| "grad_norm": 2.653724193572998, | |
| "learning_rate": 1.0219463255580638e-05, | |
| "loss": 0.1873, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.4766741911211438, | |
| "grad_norm": 9.545223236083984, | |
| "learning_rate": 1.0156759468271884e-05, | |
| "loss": 0.2573, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.4860797592174566, | |
| "grad_norm": 0.947347104549408, | |
| "learning_rate": 1.0094055680963131e-05, | |
| "loss": 0.1485, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.4954853273137698, | |
| "grad_norm": 1.778729796409607, | |
| "learning_rate": 1.0031351893654377e-05, | |
| "loss": 0.2725, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.5048908954100828, | |
| "grad_norm": 4.2415995597839355, | |
| "learning_rate": 9.968648106345625e-06, | |
| "loss": 0.1629, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5142964635063958, | |
| "grad_norm": 2.495288133621216, | |
| "learning_rate": 9.90594431903687e-06, | |
| "loss": 0.1961, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.5237020316027088, | |
| "grad_norm": 15.494341850280762, | |
| "learning_rate": 9.843240531728118e-06, | |
| "loss": 0.2032, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.5331075996990218, | |
| "grad_norm": 0.8584136962890625, | |
| "learning_rate": 9.780536744419364e-06, | |
| "loss": 0.2194, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.542513167795335, | |
| "grad_norm": 1.7161898612976074, | |
| "learning_rate": 9.71783295711061e-06, | |
| "loss": 0.1985, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.5519187358916477, | |
| "grad_norm": 13.85793399810791, | |
| "learning_rate": 9.655129169801857e-06, | |
| "loss": 0.1902, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.561324303987961, | |
| "grad_norm": 1.6476123332977295, | |
| "learning_rate": 9.592425382493104e-06, | |
| "loss": 0.2398, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.5707298720842737, | |
| "grad_norm": 2.9998719692230225, | |
| "learning_rate": 9.52972159518435e-06, | |
| "loss": 0.1788, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.580135440180587, | |
| "grad_norm": 7.067188262939453, | |
| "learning_rate": 9.467017807875598e-06, | |
| "loss": 0.211, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.5895410082769, | |
| "grad_norm": 4.7561936378479, | |
| "learning_rate": 9.404314020566843e-06, | |
| "loss": 0.1814, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.598946576373213, | |
| "grad_norm": 7.8336873054504395, | |
| "learning_rate": 9.341610233258089e-06, | |
| "loss": 0.2003, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.6083521444695261, | |
| "grad_norm": 3.6782350540161133, | |
| "learning_rate": 9.278906445949337e-06, | |
| "loss": 0.2555, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.617757712565839, | |
| "grad_norm": 1.2770379781723022, | |
| "learning_rate": 9.216202658640582e-06, | |
| "loss": 0.1839, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.627163280662152, | |
| "grad_norm": 2.8836193084716797, | |
| "learning_rate": 9.15349887133183e-06, | |
| "loss": 0.2034, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.6365688487584649, | |
| "grad_norm": 3.362605094909668, | |
| "learning_rate": 9.090795084023076e-06, | |
| "loss": 0.1872, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.645974416854778, | |
| "grad_norm": 3.509291172027588, | |
| "learning_rate": 9.028091296714321e-06, | |
| "loss": 0.2519, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.655379984951091, | |
| "grad_norm": 12.957924842834473, | |
| "learning_rate": 8.965387509405569e-06, | |
| "loss": 0.2648, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.664785553047404, | |
| "grad_norm": 3.217221975326538, | |
| "learning_rate": 8.902683722096816e-06, | |
| "loss": 0.1699, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.674191121143717, | |
| "grad_norm": 2.8752570152282715, | |
| "learning_rate": 8.839979934788062e-06, | |
| "loss": 0.1924, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.68359668924003, | |
| "grad_norm": 3.4973011016845703, | |
| "learning_rate": 8.77727614747931e-06, | |
| "loss": 0.183, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.6930022573363432, | |
| "grad_norm": 1.2514209747314453, | |
| "learning_rate": 8.714572360170555e-06, | |
| "loss": 0.1837, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.702407825432656, | |
| "grad_norm": 6.367992877960205, | |
| "learning_rate": 8.651868572861801e-06, | |
| "loss": 0.1828, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.7118133935289692, | |
| "grad_norm": 1.3052902221679688, | |
| "learning_rate": 8.589164785553048e-06, | |
| "loss": 0.1863, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7212189616252822, | |
| "grad_norm": 1.235916256904602, | |
| "learning_rate": 8.526460998244294e-06, | |
| "loss": 0.2599, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.7306245297215952, | |
| "grad_norm": 1.0772079229354858, | |
| "learning_rate": 8.463757210935542e-06, | |
| "loss": 0.1842, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.7400300978179082, | |
| "grad_norm": 8.388031959533691, | |
| "learning_rate": 8.401053423626787e-06, | |
| "loss": 0.1866, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.7494356659142212, | |
| "grad_norm": 4.407077789306641, | |
| "learning_rate": 8.338349636318033e-06, | |
| "loss": 0.2991, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.7588412340105344, | |
| "grad_norm": 5.840625762939453, | |
| "learning_rate": 8.27564584900928e-06, | |
| "loss": 0.19, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.7682468021068471, | |
| "grad_norm": 2.0648770332336426, | |
| "learning_rate": 8.212942061700526e-06, | |
| "loss": 0.1934, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.7776523702031604, | |
| "grad_norm": 4.299741744995117, | |
| "learning_rate": 8.150238274391774e-06, | |
| "loss": 0.155, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.7870579382994731, | |
| "grad_norm": 1.6990511417388916, | |
| "learning_rate": 8.087534487083021e-06, | |
| "loss": 0.237, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.7964635063957863, | |
| "grad_norm": 2.469029664993286, | |
| "learning_rate": 8.024830699774267e-06, | |
| "loss": 0.2569, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.8058690744920993, | |
| "grad_norm": 0.9023020267486572, | |
| "learning_rate": 7.962126912465513e-06, | |
| "loss": 0.1873, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.8152746425884123, | |
| "grad_norm": 3.4308788776397705, | |
| "learning_rate": 7.89942312515676e-06, | |
| "loss": 0.2105, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.8246802106847255, | |
| "grad_norm": 2.518071174621582, | |
| "learning_rate": 7.836719337848006e-06, | |
| "loss": 0.2872, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.8340857787810383, | |
| "grad_norm": 1.2336055040359497, | |
| "learning_rate": 7.774015550539254e-06, | |
| "loss": 0.1982, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.8434913468773515, | |
| "grad_norm": 4.147019863128662, | |
| "learning_rate": 7.7113117632305e-06, | |
| "loss": 0.1778, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.8528969149736643, | |
| "grad_norm": 2.8657143115997314, | |
| "learning_rate": 7.648607975921745e-06, | |
| "loss": 0.2742, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.8623024830699775, | |
| "grad_norm": 4.490947246551514, | |
| "learning_rate": 7.585904188612993e-06, | |
| "loss": 0.2063, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.8717080511662905, | |
| "grad_norm": 13.179983139038086, | |
| "learning_rate": 7.523200401304239e-06, | |
| "loss": 0.2251, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.8811136192626035, | |
| "grad_norm": 2.9998207092285156, | |
| "learning_rate": 7.460496613995486e-06, | |
| "loss": 0.1819, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.8905191873589164, | |
| "grad_norm": 3.122727394104004, | |
| "learning_rate": 7.3977928266867325e-06, | |
| "loss": 0.1712, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.8999247554552294, | |
| "grad_norm": 12.002041816711426, | |
| "learning_rate": 7.335089039377979e-06, | |
| "loss": 0.1908, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.9093303235515426, | |
| "grad_norm": 3.0402774810791016, | |
| "learning_rate": 7.272385252069225e-06, | |
| "loss": 0.2027, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.9187358916478554, | |
| "grad_norm": 2.971097707748413, | |
| "learning_rate": 7.2096814647604716e-06, | |
| "loss": 0.1789, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.9281414597441686, | |
| "grad_norm": 1.380375862121582, | |
| "learning_rate": 7.146977677451718e-06, | |
| "loss": 0.2104, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.9375470278404816, | |
| "grad_norm": 1.4936792850494385, | |
| "learning_rate": 7.084273890142966e-06, | |
| "loss": 0.171, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.9469525959367946, | |
| "grad_norm": 1.463129997253418, | |
| "learning_rate": 7.021570102834212e-06, | |
| "loss": 0.1944, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.9563581640331076, | |
| "grad_norm": 2.7813374996185303, | |
| "learning_rate": 6.958866315525459e-06, | |
| "loss": 0.1901, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.9657637321294206, | |
| "grad_norm": 5.770429611206055, | |
| "learning_rate": 6.896162528216705e-06, | |
| "loss": 0.1671, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.9751693002257338, | |
| "grad_norm": 12.642657279968262, | |
| "learning_rate": 6.833458740907951e-06, | |
| "loss": 0.1977, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.9845748683220465, | |
| "grad_norm": 5.965068817138672, | |
| "learning_rate": 6.770754953599198e-06, | |
| "loss": 0.1535, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.9939804364183598, | |
| "grad_norm": 1.6920294761657715, | |
| "learning_rate": 6.7080511662904445e-06, | |
| "loss": 0.1837, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.0033860045146725, | |
| "grad_norm": 1.7706254720687866, | |
| "learning_rate": 6.645347378981691e-06, | |
| "loss": 0.1791, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.0127915726109857, | |
| "grad_norm": 8.003987312316895, | |
| "learning_rate": 6.582643591672938e-06, | |
| "loss": 0.2644, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.0221971407072985, | |
| "grad_norm": 1.5629470348358154, | |
| "learning_rate": 6.5199398043641835e-06, | |
| "loss": 0.1612, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.0316027088036117, | |
| "grad_norm": 2.4208626747131348, | |
| "learning_rate": 6.45723601705543e-06, | |
| "loss": 0.2278, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.041008276899925, | |
| "grad_norm": 1.0424669981002808, | |
| "learning_rate": 6.394532229746678e-06, | |
| "loss": 0.1516, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.0504138449962377, | |
| "grad_norm": 2.8615269660949707, | |
| "learning_rate": 6.331828442437924e-06, | |
| "loss": 0.2025, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.059819413092551, | |
| "grad_norm": 13.714409828186035, | |
| "learning_rate": 6.269124655129171e-06, | |
| "loss": 0.1939, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.0692249811888637, | |
| "grad_norm": 0.9942559003829956, | |
| "learning_rate": 6.206420867820417e-06, | |
| "loss": 0.1828, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.078630549285177, | |
| "grad_norm": 2.9414799213409424, | |
| "learning_rate": 6.143717080511663e-06, | |
| "loss": 0.1681, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.0880361173814896, | |
| "grad_norm": 3.001040458679199, | |
| "learning_rate": 6.08101329320291e-06, | |
| "loss": 0.165, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.097441685477803, | |
| "grad_norm": 4.616268634796143, | |
| "learning_rate": 6.0183095058941565e-06, | |
| "loss": 0.1328, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.106847253574116, | |
| "grad_norm": 16.67197608947754, | |
| "learning_rate": 5.955605718585403e-06, | |
| "loss": 0.1496, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.116252821670429, | |
| "grad_norm": 3.5193891525268555, | |
| "learning_rate": 5.89290193127665e-06, | |
| "loss": 0.1478, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.125658389766742, | |
| "grad_norm": 4.846385478973389, | |
| "learning_rate": 5.8301981439678955e-06, | |
| "loss": 0.1585, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.135063957863055, | |
| "grad_norm": 2.8305087089538574, | |
| "learning_rate": 5.767494356659142e-06, | |
| "loss": 0.1743, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.144469525959368, | |
| "grad_norm": 8.07402229309082, | |
| "learning_rate": 5.704790569350389e-06, | |
| "loss": 0.1978, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.153875094055681, | |
| "grad_norm": 5.102453231811523, | |
| "learning_rate": 5.642086782041636e-06, | |
| "loss": 0.23, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.163280662151994, | |
| "grad_norm": 5.685837268829346, | |
| "learning_rate": 5.579382994732883e-06, | |
| "loss": 0.151, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.172686230248307, | |
| "grad_norm": 1.282105803489685, | |
| "learning_rate": 5.516679207424129e-06, | |
| "loss": 0.2055, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.18209179834462, | |
| "grad_norm": 1.42794930934906, | |
| "learning_rate": 5.453975420115375e-06, | |
| "loss": 0.2316, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.191497366440933, | |
| "grad_norm": 0.9819298982620239, | |
| "learning_rate": 5.391271632806622e-06, | |
| "loss": 0.1529, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.200902934537246, | |
| "grad_norm": 6.298890113830566, | |
| "learning_rate": 5.3285678454978684e-06, | |
| "loss": 0.1741, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.210308502633559, | |
| "grad_norm": 2.593261480331421, | |
| "learning_rate": 5.265864058189115e-06, | |
| "loss": 0.2045, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.219714070729872, | |
| "grad_norm": 1.2109441757202148, | |
| "learning_rate": 5.203160270880362e-06, | |
| "loss": 0.1566, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.229119638826185, | |
| "grad_norm": 2.688478469848633, | |
| "learning_rate": 5.140456483571608e-06, | |
| "loss": 0.1741, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.238525206922498, | |
| "grad_norm": 1.0694407224655151, | |
| "learning_rate": 5.077752696262854e-06, | |
| "loss": 0.1442, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.247930775018811, | |
| "grad_norm": 3.074937343597412, | |
| "learning_rate": 5.015048908954101e-06, | |
| "loss": 0.1743, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.2573363431151243, | |
| "grad_norm": 1.3289566040039062, | |
| "learning_rate": 4.952345121645348e-06, | |
| "loss": 0.1526, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.266741911211437, | |
| "grad_norm": 1.629441261291504, | |
| "learning_rate": 4.889641334336595e-06, | |
| "loss": 0.2462, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.2761474793077503, | |
| "grad_norm": 5.780508518218994, | |
| "learning_rate": 4.8269375470278405e-06, | |
| "loss": 0.1472, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.285553047404063, | |
| "grad_norm": 10.06039810180664, | |
| "learning_rate": 4.764233759719087e-06, | |
| "loss": 0.1585, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.2949586155003763, | |
| "grad_norm": 11.558574676513672, | |
| "learning_rate": 4.701529972410334e-06, | |
| "loss": 0.1261, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.3043641835966895, | |
| "grad_norm": 1.2220011949539185, | |
| "learning_rate": 4.63882618510158e-06, | |
| "loss": 0.1866, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.3137697516930023, | |
| "grad_norm": 4.4303460121154785, | |
| "learning_rate": 4.576122397792827e-06, | |
| "loss": 0.1733, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.3231753197893155, | |
| "grad_norm": 2.550745964050293, | |
| "learning_rate": 4.513418610484074e-06, | |
| "loss": 0.2603, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.3325808878856282, | |
| "grad_norm": 3.003775119781494, | |
| "learning_rate": 4.45071482317532e-06, | |
| "loss": 0.141, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.3419864559819414, | |
| "grad_norm": 1.6381702423095703, | |
| "learning_rate": 4.388011035866567e-06, | |
| "loss": 0.1607, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.351392024078254, | |
| "grad_norm": 6.195992469787598, | |
| "learning_rate": 4.3253072485578135e-06, | |
| "loss": 0.1694, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.3607975921745674, | |
| "grad_norm": 1.3570517301559448, | |
| "learning_rate": 4.262603461249059e-06, | |
| "loss": 0.1823, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.37020316027088, | |
| "grad_norm": 1.8480778932571411, | |
| "learning_rate": 4.199899673940307e-06, | |
| "loss": 0.1476, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.3796087283671934, | |
| "grad_norm": 1.602105736732483, | |
| "learning_rate": 4.137195886631553e-06, | |
| "loss": 0.145, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.3890142964635066, | |
| "grad_norm": 2.0385992527008057, | |
| "learning_rate": 4.074492099322799e-06, | |
| "loss": 0.1685, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.3984198645598194, | |
| "grad_norm": 1.7218743562698364, | |
| "learning_rate": 4.011788312014046e-06, | |
| "loss": 0.1782, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.4078254326561326, | |
| "grad_norm": 5.287595272064209, | |
| "learning_rate": 3.949084524705293e-06, | |
| "loss": 0.173, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.4172310007524453, | |
| "grad_norm": 7.334980010986328, | |
| "learning_rate": 3.886380737396539e-06, | |
| "loss": 0.2384, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.4266365688487586, | |
| "grad_norm": 5.126362323760986, | |
| "learning_rate": 3.823676950087786e-06, | |
| "loss": 0.14, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.4360421369450713, | |
| "grad_norm": 1.547075867652893, | |
| "learning_rate": 3.760973162779032e-06, | |
| "loss": 0.1706, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.4454477050413845, | |
| "grad_norm": 4.503610134124756, | |
| "learning_rate": 3.698269375470279e-06, | |
| "loss": 0.2401, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.4548532731376973, | |
| "grad_norm": 2.71994686126709, | |
| "learning_rate": 3.6355655881615255e-06, | |
| "loss": 0.1496, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.4642588412340105, | |
| "grad_norm": 8.66321849822998, | |
| "learning_rate": 3.5728618008527716e-06, | |
| "loss": 0.1299, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.4736644093303237, | |
| "grad_norm": 11.310059547424316, | |
| "learning_rate": 3.5101580135440183e-06, | |
| "loss": 0.1402, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.4830699774266365, | |
| "grad_norm": 5.4790873527526855, | |
| "learning_rate": 3.447454226235265e-06, | |
| "loss": 0.1768, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.4924755455229497, | |
| "grad_norm": 3.9280734062194824, | |
| "learning_rate": 3.384750438926511e-06, | |
| "loss": 0.1844, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.5018811136192625, | |
| "grad_norm": 2.2618765830993652, | |
| "learning_rate": 3.322046651617758e-06, | |
| "loss": 0.1641, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.5112866817155757, | |
| "grad_norm": 1.4370334148406982, | |
| "learning_rate": 3.2593428643090047e-06, | |
| "loss": 0.1846, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.520692249811889, | |
| "grad_norm": 0.5764915347099304, | |
| "learning_rate": 3.196639077000251e-06, | |
| "loss": 0.1174, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.5300978179082017, | |
| "grad_norm": 13.661349296569824, | |
| "learning_rate": 3.1339352896914976e-06, | |
| "loss": 0.1453, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.5395033860045144, | |
| "grad_norm": 1.970798373222351, | |
| "learning_rate": 3.071231502382744e-06, | |
| "loss": 0.1712, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.5489089541008276, | |
| "grad_norm": 5.565087795257568, | |
| "learning_rate": 3.0085277150739904e-06, | |
| "loss": 0.1335, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.558314522197141, | |
| "grad_norm": 1.0521272420883179, | |
| "learning_rate": 2.9458239277652374e-06, | |
| "loss": 0.142, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.5677200902934536, | |
| "grad_norm": 2.4545013904571533, | |
| "learning_rate": 2.883120140456484e-06, | |
| "loss": 0.1364, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.577125658389767, | |
| "grad_norm": 2.9941937923431396, | |
| "learning_rate": 2.8204163531477302e-06, | |
| "loss": 0.1781, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.5865312264860796, | |
| "grad_norm": 1.3698679208755493, | |
| "learning_rate": 2.757712565838977e-06, | |
| "loss": 0.223, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.595936794582393, | |
| "grad_norm": 1.4991743564605713, | |
| "learning_rate": 2.695008778530224e-06, | |
| "loss": 0.1749, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.605342362678706, | |
| "grad_norm": 1.5825111865997314, | |
| "learning_rate": 2.63230499122147e-06, | |
| "loss": 0.2057, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.6147479307750188, | |
| "grad_norm": 9.330909729003906, | |
| "learning_rate": 2.5696012039127167e-06, | |
| "loss": 0.1412, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.624153498871332, | |
| "grad_norm": 4.595930576324463, | |
| "learning_rate": 2.5068974166039633e-06, | |
| "loss": 0.1576, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.6335590669676447, | |
| "grad_norm": 2.5993683338165283, | |
| "learning_rate": 2.4441936292952095e-06, | |
| "loss": 0.1585, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.642964635063958, | |
| "grad_norm": 1.360910177230835, | |
| "learning_rate": 2.381489841986456e-06, | |
| "loss": 0.1438, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.6523702031602707, | |
| "grad_norm": 7.876944541931152, | |
| "learning_rate": 2.3187860546777028e-06, | |
| "loss": 0.2808, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.661775771256584, | |
| "grad_norm": 7.476833820343018, | |
| "learning_rate": 2.2560822673689494e-06, | |
| "loss": 0.1367, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.6711813393528967, | |
| "grad_norm": 12.081886291503906, | |
| "learning_rate": 2.193378480060196e-06, | |
| "loss": 0.1393, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.68058690744921, | |
| "grad_norm": 2.116596221923828, | |
| "learning_rate": 2.130674692751442e-06, | |
| "loss": 0.1329, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.689992475545523, | |
| "grad_norm": 9.647782325744629, | |
| "learning_rate": 2.067970905442689e-06, | |
| "loss": 0.1475, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.699398043641836, | |
| "grad_norm": 5.909144878387451, | |
| "learning_rate": 2.0052671181339354e-06, | |
| "loss": 0.2392, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.708803611738149, | |
| "grad_norm": 3.9596285820007324, | |
| "learning_rate": 1.942563330825182e-06, | |
| "loss": 0.225, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.718209179834462, | |
| "grad_norm": 0.5444441437721252, | |
| "learning_rate": 1.8798595435164285e-06, | |
| "loss": 0.1488, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 2.727614747930775, | |
| "grad_norm": 1.5310776233673096, | |
| "learning_rate": 1.817155756207675e-06, | |
| "loss": 0.1905, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.7370203160270883, | |
| "grad_norm": 9.348706245422363, | |
| "learning_rate": 1.7544519688989217e-06, | |
| "loss": 0.1475, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.746425884123401, | |
| "grad_norm": 3.9080257415771484, | |
| "learning_rate": 1.691748181590168e-06, | |
| "loss": 0.1715, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.755831452219714, | |
| "grad_norm": 1.6289005279541016, | |
| "learning_rate": 1.6290443942814147e-06, | |
| "loss": 0.1468, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 2.765237020316027, | |
| "grad_norm": 8.900264739990234, | |
| "learning_rate": 1.5663406069726613e-06, | |
| "loss": 0.175, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.7746425884123402, | |
| "grad_norm": 7.342721462249756, | |
| "learning_rate": 1.5036368196639077e-06, | |
| "loss": 0.1888, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 2.784048156508653, | |
| "grad_norm": 9.584351539611816, | |
| "learning_rate": 1.4409330323551544e-06, | |
| "loss": 0.1692, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.793453724604966, | |
| "grad_norm": 1.2614892721176147, | |
| "learning_rate": 1.378229245046401e-06, | |
| "loss": 0.1522, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.802859292701279, | |
| "grad_norm": 4.842260360717773, | |
| "learning_rate": 1.3155254577376476e-06, | |
| "loss": 0.1783, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.812264860797592, | |
| "grad_norm": 1.9376300573349, | |
| "learning_rate": 1.252821670428894e-06, | |
| "loss": 0.1997, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 2.8216704288939054, | |
| "grad_norm": 1.777771234512329, | |
| "learning_rate": 1.1901178831201406e-06, | |
| "loss": 0.1452, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.831075996990218, | |
| "grad_norm": 8.967829704284668, | |
| "learning_rate": 1.1274140958113872e-06, | |
| "loss": 0.1644, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 2.8404815650865314, | |
| "grad_norm": 7.596806526184082, | |
| "learning_rate": 1.0647103085026337e-06, | |
| "loss": 0.2017, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.849887133182844, | |
| "grad_norm": 7.003665447235107, | |
| "learning_rate": 1.0020065211938803e-06, | |
| "loss": 0.1263, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.8592927012791574, | |
| "grad_norm": 2.5569920539855957, | |
| "learning_rate": 9.393027338851267e-07, | |
| "loss": 0.2034, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.86869826937547, | |
| "grad_norm": 4.9097490310668945, | |
| "learning_rate": 8.765989465763733e-07, | |
| "loss": 0.1402, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 2.8781038374717833, | |
| "grad_norm": 2.4663660526275635, | |
| "learning_rate": 8.138951592676199e-07, | |
| "loss": 0.1558, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.887509405568096, | |
| "grad_norm": 2.791815996170044, | |
| "learning_rate": 7.511913719588663e-07, | |
| "loss": 0.129, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 2.8969149736644093, | |
| "grad_norm": 2.119055986404419, | |
| "learning_rate": 6.884875846501129e-07, | |
| "loss": 0.2119, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.9063205417607225, | |
| "grad_norm": 12.687973976135254, | |
| "learning_rate": 6.257837973413595e-07, | |
| "loss": 0.1316, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 2.9157261098570353, | |
| "grad_norm": 1.892902135848999, | |
| "learning_rate": 5.63080010032606e-07, | |
| "loss": 0.1586, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.9251316779533485, | |
| "grad_norm": 1.8958168029785156, | |
| "learning_rate": 5.003762227238526e-07, | |
| "loss": 0.1394, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 2.9345372460496613, | |
| "grad_norm": 3.460698366165161, | |
| "learning_rate": 4.3767243541509916e-07, | |
| "loss": 0.1432, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.9439428141459745, | |
| "grad_norm": 8.283778190612793, | |
| "learning_rate": 3.7496864810634567e-07, | |
| "loss": 0.1961, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 2.9533483822422877, | |
| "grad_norm": 4.339179515838623, | |
| "learning_rate": 3.122648607975922e-07, | |
| "loss": 0.1535, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.9627539503386005, | |
| "grad_norm": 4.080807685852051, | |
| "learning_rate": 2.4956107348883875e-07, | |
| "loss": 0.1448, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 2.972159518434913, | |
| "grad_norm": 1.3516851663589478, | |
| "learning_rate": 1.868572861800853e-07, | |
| "loss": 0.1516, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.9815650865312264, | |
| "grad_norm": 6.547214031219482, | |
| "learning_rate": 1.2415349887133183e-07, | |
| "loss": 0.1415, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 2.9909706546275396, | |
| "grad_norm": 2.244124412536621, | |
| "learning_rate": 6.144971156257838e-08, | |
| "loss": 0.1463, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 15948, | |
| "total_flos": 1.6575120153138816e+16, | |
| "train_loss": 0.21908686365553812, | |
| "train_runtime": 7613.9058, | |
| "train_samples_per_second": 8.378, | |
| "train_steps_per_second": 2.095 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9558278538012962, | |
| "eval_f1": 0.5528281962901715, | |
| "eval_loss": 0.3851251006126404, | |
| "eval_pos_rate_pred": 0.06444756279730685, | |
| "eval_pos_rate_true": 0.034333589843004275, | |
| "eval_precision": 0.42367006657301653, | |
| "eval_recall": 0.7952708512467755, | |
| "eval_runtime": 37.8976, | |
| "eval_samples_per_second": 63.381, | |
| "eval_steps_per_second": 7.942, | |
| "step": 15948 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 15948, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6575120153138816e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |