| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.8377807640622, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007679017085813016, | |
| "grad_norm": 9.727725365375315, | |
| "learning_rate": 1.9969278033794163e-05, | |
| "loss": 1.0813, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015358034171626032, | |
| "grad_norm": 5.891390510276554, | |
| "learning_rate": 1.993855606758833e-05, | |
| "loss": 0.2409, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02303705125743905, | |
| "grad_norm": 5.316733043697516, | |
| "learning_rate": 1.990783410138249e-05, | |
| "loss": 0.1393, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.030716068343252065, | |
| "grad_norm": 1.7740681981145512, | |
| "learning_rate": 1.9877112135176652e-05, | |
| "loss": 0.0493, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03839508542906508, | |
| "grad_norm": 0.21916356325831438, | |
| "learning_rate": 1.9846390168970814e-05, | |
| "loss": 0.0162, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0460741025148781, | |
| "grad_norm": 0.10526533851381863, | |
| "learning_rate": 1.981566820276498e-05, | |
| "loss": 0.0091, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05375311960069111, | |
| "grad_norm": 0.09814045024832824, | |
| "learning_rate": 1.978494623655914e-05, | |
| "loss": 0.0059, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06143213668650413, | |
| "grad_norm": 0.07005501460032271, | |
| "learning_rate": 1.9754224270353303e-05, | |
| "loss": 0.004, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06911115377231715, | |
| "grad_norm": 0.05895087166496951, | |
| "learning_rate": 1.9723502304147465e-05, | |
| "loss": 0.0029, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07679017085813016, | |
| "grad_norm": 0.05370999256341492, | |
| "learning_rate": 1.969278033794163e-05, | |
| "loss": 0.0021, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08446918794394317, | |
| "grad_norm": 0.03834816398116016, | |
| "learning_rate": 1.9662058371735792e-05, | |
| "loss": 0.0017, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0921482050297562, | |
| "grad_norm": 0.02734932863664795, | |
| "learning_rate": 1.9631336405529954e-05, | |
| "loss": 0.0013, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0998272221155692, | |
| "grad_norm": 0.036636985676517964, | |
| "learning_rate": 1.960061443932412e-05, | |
| "loss": 0.0011, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10750623920138222, | |
| "grad_norm": 0.02624781225455215, | |
| "learning_rate": 1.956989247311828e-05, | |
| "loss": 0.001, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11518525628719524, | |
| "grad_norm": 0.020107478390890325, | |
| "learning_rate": 1.9539170506912443e-05, | |
| "loss": 0.0009, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12286427337300826, | |
| "grad_norm": 0.01800807960296447, | |
| "learning_rate": 1.9508448540706605e-05, | |
| "loss": 0.0008, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13054329045882127, | |
| "grad_norm": 0.020228009242971072, | |
| "learning_rate": 1.947772657450077e-05, | |
| "loss": 0.0008, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1382223075446343, | |
| "grad_norm": 0.014493824912246944, | |
| "learning_rate": 1.9447004608294932e-05, | |
| "loss": 0.0007, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1459013246304473, | |
| "grad_norm": 0.017644672369553523, | |
| "learning_rate": 1.9416282642089094e-05, | |
| "loss": 0.0007, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1535803417162603, | |
| "grad_norm": 0.010751743718197211, | |
| "learning_rate": 1.9385560675883256e-05, | |
| "loss": 0.0007, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16125935880207334, | |
| "grad_norm": 0.010740308456505745, | |
| "learning_rate": 1.935483870967742e-05, | |
| "loss": 0.0006, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16893837588788635, | |
| "grad_norm": 0.012713528482636717, | |
| "learning_rate": 1.9324116743471583e-05, | |
| "loss": 0.0006, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17661739297369936, | |
| "grad_norm": 0.015802969815143914, | |
| "learning_rate": 1.9293394777265745e-05, | |
| "loss": 0.0006, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1842964100595124, | |
| "grad_norm": 0.014091331879241371, | |
| "learning_rate": 1.926267281105991e-05, | |
| "loss": 0.0006, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1919754271453254, | |
| "grad_norm": 0.015514382294291343, | |
| "learning_rate": 1.923195084485407e-05, | |
| "loss": 0.0006, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1996544442311384, | |
| "grad_norm": 0.013206328203358594, | |
| "learning_rate": 1.9201228878648233e-05, | |
| "loss": 0.0005, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.20733346131695143, | |
| "grad_norm": 0.009242022580630511, | |
| "learning_rate": 1.91705069124424e-05, | |
| "loss": 0.0006, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21501247840276444, | |
| "grad_norm": 0.020571611640529425, | |
| "learning_rate": 1.913978494623656e-05, | |
| "loss": 0.0006, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22269149548857747, | |
| "grad_norm": 0.010955465950333863, | |
| "learning_rate": 1.9109062980030722e-05, | |
| "loss": 0.0006, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.23037051257439048, | |
| "grad_norm": 0.010007446119499878, | |
| "learning_rate": 1.9078341013824884e-05, | |
| "loss": 0.0005, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23804952966020348, | |
| "grad_norm": 0.007350763980205989, | |
| "learning_rate": 1.904761904761905e-05, | |
| "loss": 0.0005, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24572854674601652, | |
| "grad_norm": 0.01779827202427705, | |
| "learning_rate": 1.901689708141321e-05, | |
| "loss": 0.0005, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.25340756383182955, | |
| "grad_norm": 0.011252332653060129, | |
| "learning_rate": 1.8986175115207373e-05, | |
| "loss": 0.0005, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26108658091764253, | |
| "grad_norm": 0.008048724916684773, | |
| "learning_rate": 1.895545314900154e-05, | |
| "loss": 0.0005, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.26876559800345556, | |
| "grad_norm": 0.008708169011978722, | |
| "learning_rate": 1.89247311827957e-05, | |
| "loss": 0.0005, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2764446150892686, | |
| "grad_norm": 0.011334018261947772, | |
| "learning_rate": 1.8894009216589862e-05, | |
| "loss": 0.0005, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2841236321750816, | |
| "grad_norm": 0.012541055386254458, | |
| "learning_rate": 1.8863287250384027e-05, | |
| "loss": 0.0005, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2918026492608946, | |
| "grad_norm": 0.00797864009319901, | |
| "learning_rate": 1.883256528417819e-05, | |
| "loss": 0.0005, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.29948166634670764, | |
| "grad_norm": 0.020543218292001533, | |
| "learning_rate": 1.880184331797235e-05, | |
| "loss": 0.0005, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3071606834325206, | |
| "grad_norm": 0.008833716167768964, | |
| "learning_rate": 1.8771121351766516e-05, | |
| "loss": 0.0005, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31483970051833365, | |
| "grad_norm": 0.01304769806714082, | |
| "learning_rate": 1.8740399385560678e-05, | |
| "loss": 0.0005, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3225187176041467, | |
| "grad_norm": 0.0069234640254174085, | |
| "learning_rate": 1.870967741935484e-05, | |
| "loss": 0.0005, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.33019773468995967, | |
| "grad_norm": 0.006673799615850355, | |
| "learning_rate": 1.8678955453149005e-05, | |
| "loss": 0.0005, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3378767517757727, | |
| "grad_norm": 0.008446803942572372, | |
| "learning_rate": 1.8648233486943167e-05, | |
| "loss": 0.0005, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.34555576886158573, | |
| "grad_norm": 0.007927264290360291, | |
| "learning_rate": 1.861751152073733e-05, | |
| "loss": 0.0005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3532347859473987, | |
| "grad_norm": 0.008214795632921428, | |
| "learning_rate": 1.858678955453149e-05, | |
| "loss": 0.0005, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.36091380303321174, | |
| "grad_norm": 0.0753894373723428, | |
| "learning_rate": 1.8556067588325656e-05, | |
| "loss": 0.0005, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3685928201190248, | |
| "grad_norm": 0.023758962572301385, | |
| "learning_rate": 1.8525345622119818e-05, | |
| "loss": 0.0011, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.37627183720483776, | |
| "grad_norm": 0.008016402676739795, | |
| "learning_rate": 1.849462365591398e-05, | |
| "loss": 0.0007, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3839508542906508, | |
| "grad_norm": 0.008412657527867743, | |
| "learning_rate": 1.8463901689708145e-05, | |
| "loss": 0.0005, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3916298713764638, | |
| "grad_norm": 0.009837140434012532, | |
| "learning_rate": 1.8433179723502307e-05, | |
| "loss": 0.0005, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3993088884622768, | |
| "grad_norm": 0.007645537883711128, | |
| "learning_rate": 1.840245775729647e-05, | |
| "loss": 0.0005, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.40698790554808983, | |
| "grad_norm": 0.009833102435437406, | |
| "learning_rate": 1.837173579109063e-05, | |
| "loss": 0.0005, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.41466692263390287, | |
| "grad_norm": 0.011054813245860173, | |
| "learning_rate": 1.8341013824884796e-05, | |
| "loss": 0.0004, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4223459397197159, | |
| "grad_norm": 0.007490270162929283, | |
| "learning_rate": 1.8310291858678958e-05, | |
| "loss": 0.0004, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4300249568055289, | |
| "grad_norm": 0.004870404631077036, | |
| "learning_rate": 1.827956989247312e-05, | |
| "loss": 0.0004, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4377039738913419, | |
| "grad_norm": 0.0066189666941918876, | |
| "learning_rate": 1.8248847926267285e-05, | |
| "loss": 0.0004, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.44538299097715495, | |
| "grad_norm": 0.006057086083441172, | |
| "learning_rate": 1.8218125960061447e-05, | |
| "loss": 0.0004, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4530620080629679, | |
| "grad_norm": 0.013285236122907804, | |
| "learning_rate": 1.818740399385561e-05, | |
| "loss": 0.0004, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.46074102514878096, | |
| "grad_norm": 0.0060099205496900975, | |
| "learning_rate": 1.815668202764977e-05, | |
| "loss": 0.0004, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.468420042234594, | |
| "grad_norm": 0.014047318223341236, | |
| "learning_rate": 1.8125960061443936e-05, | |
| "loss": 0.0004, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.47609905932040697, | |
| "grad_norm": 0.006607072647025668, | |
| "learning_rate": 1.8095238095238097e-05, | |
| "loss": 0.0004, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.48377807640622, | |
| "grad_norm": 0.006231396108942757, | |
| "learning_rate": 1.806451612903226e-05, | |
| "loss": 0.0004, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.49145709349203304, | |
| "grad_norm": 0.007977818806990321, | |
| "learning_rate": 1.803379416282642e-05, | |
| "loss": 0.0004, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.499136110577846, | |
| "grad_norm": 0.004452562656745065, | |
| "learning_rate": 1.8003072196620586e-05, | |
| "loss": 0.0004, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5068151276636591, | |
| "grad_norm": 0.014825032602307369, | |
| "learning_rate": 1.7972350230414748e-05, | |
| "loss": 0.0004, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5144941447494721, | |
| "grad_norm": 0.007726258660291533, | |
| "learning_rate": 1.794162826420891e-05, | |
| "loss": 0.0004, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5221731618352851, | |
| "grad_norm": 0.004901555909874864, | |
| "learning_rate": 1.7910906298003075e-05, | |
| "loss": 0.0004, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5298521789210981, | |
| "grad_norm": 0.008374026896139112, | |
| "learning_rate": 1.7880184331797237e-05, | |
| "loss": 0.0004, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5375311960069111, | |
| "grad_norm": 0.012547419917709076, | |
| "learning_rate": 1.78494623655914e-05, | |
| "loss": 0.0004, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5452102130927241, | |
| "grad_norm": 0.006258152294015092, | |
| "learning_rate": 1.781874039938556e-05, | |
| "loss": 0.0003, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5528892301785372, | |
| "grad_norm": 0.005083584292665606, | |
| "learning_rate": 1.7788018433179726e-05, | |
| "loss": 0.0004, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5605682472643502, | |
| "grad_norm": 0.004891854049254944, | |
| "learning_rate": 1.7757296466973888e-05, | |
| "loss": 0.0003, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5682472643501632, | |
| "grad_norm": 0.0072081474858841825, | |
| "learning_rate": 1.772657450076805e-05, | |
| "loss": 0.0003, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5759262814359762, | |
| "grad_norm": 0.007757147865401733, | |
| "learning_rate": 1.7695852534562215e-05, | |
| "loss": 0.0003, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5836052985217892, | |
| "grad_norm": 0.009479916603674836, | |
| "learning_rate": 1.7665130568356377e-05, | |
| "loss": 0.0003, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5912843156076022, | |
| "grad_norm": 0.0032364838449939247, | |
| "learning_rate": 1.763440860215054e-05, | |
| "loss": 0.0003, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5989633326934153, | |
| "grad_norm": 0.006901552705099111, | |
| "learning_rate": 1.76036866359447e-05, | |
| "loss": 0.0003, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6066423497792283, | |
| "grad_norm": 0.015422089144709121, | |
| "learning_rate": 1.7572964669738866e-05, | |
| "loss": 0.0004, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6143213668650412, | |
| "grad_norm": 0.008708065391951672, | |
| "learning_rate": 1.7542242703533028e-05, | |
| "loss": 0.0003, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6220003839508543, | |
| "grad_norm": 0.005612862812218234, | |
| "learning_rate": 1.751152073732719e-05, | |
| "loss": 0.0004, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6296794010366673, | |
| "grad_norm": 0.004164448177684985, | |
| "learning_rate": 1.748079877112135e-05, | |
| "loss": 0.0003, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6373584181224803, | |
| "grad_norm": 0.005805489468439844, | |
| "learning_rate": 1.7450076804915517e-05, | |
| "loss": 0.0003, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6450374352082934, | |
| "grad_norm": 0.0058336408998533274, | |
| "learning_rate": 1.741935483870968e-05, | |
| "loss": 0.0003, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6527164522941064, | |
| "grad_norm": 0.004581976763955479, | |
| "learning_rate": 1.738863287250384e-05, | |
| "loss": 0.0003, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6603954693799193, | |
| "grad_norm": 0.007306575244655952, | |
| "learning_rate": 1.7357910906298005e-05, | |
| "loss": 0.0003, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6680744864657324, | |
| "grad_norm": 0.003820631484850308, | |
| "learning_rate": 1.7327188940092167e-05, | |
| "loss": 0.0003, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6757535035515454, | |
| "grad_norm": 0.0051408749682255814, | |
| "learning_rate": 1.729646697388633e-05, | |
| "loss": 0.0003, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6834325206373584, | |
| "grad_norm": 0.006921982048724673, | |
| "learning_rate": 1.726574500768049e-05, | |
| "loss": 0.0003, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6911115377231715, | |
| "grad_norm": 0.004784894049846067, | |
| "learning_rate": 1.7235023041474656e-05, | |
| "loss": 0.0003, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6987905548089844, | |
| "grad_norm": 0.0031159854313391635, | |
| "learning_rate": 1.7204301075268818e-05, | |
| "loss": 0.0003, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7064695718947974, | |
| "grad_norm": 0.006605791714352256, | |
| "learning_rate": 1.717357910906298e-05, | |
| "loss": 0.0003, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7141485889806105, | |
| "grad_norm": 0.004936488600986064, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.0003, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7218276060664235, | |
| "grad_norm": 0.007674349562955001, | |
| "learning_rate": 1.7112135176651307e-05, | |
| "loss": 0.0003, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7295066231522365, | |
| "grad_norm": 0.0076727356784045695, | |
| "learning_rate": 1.708141321044547e-05, | |
| "loss": 0.0003, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7371856402380496, | |
| "grad_norm": 0.00553702977922986, | |
| "learning_rate": 1.705069124423963e-05, | |
| "loss": 0.0003, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7448646573238625, | |
| "grad_norm": 0.007299313911910154, | |
| "learning_rate": 1.7019969278033796e-05, | |
| "loss": 0.0003, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7525436744096755, | |
| "grad_norm": 0.004258926555903475, | |
| "learning_rate": 1.6989247311827958e-05, | |
| "loss": 0.0003, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7602226914954886, | |
| "grad_norm": 0.004547740732820229, | |
| "learning_rate": 1.695852534562212e-05, | |
| "loss": 0.0003, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7679017085813016, | |
| "grad_norm": 0.005203964533047756, | |
| "learning_rate": 1.6927803379416285e-05, | |
| "loss": 0.0003, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7755807256671146, | |
| "grad_norm": 0.01302332966364172, | |
| "learning_rate": 1.6897081413210447e-05, | |
| "loss": 0.0003, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7832597427529276, | |
| "grad_norm": 0.0093180048231896, | |
| "learning_rate": 1.686635944700461e-05, | |
| "loss": 0.0003, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7909387598387406, | |
| "grad_norm": 0.008803117247590506, | |
| "learning_rate": 1.683563748079877e-05, | |
| "loss": 0.0003, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7986177769245536, | |
| "grad_norm": 0.0048423473942718395, | |
| "learning_rate": 1.6804915514592936e-05, | |
| "loss": 0.0003, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8062967940103667, | |
| "grad_norm": 0.00871594900669087, | |
| "learning_rate": 1.6774193548387098e-05, | |
| "loss": 0.0003, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8139758110961797, | |
| "grad_norm": 0.004213591332826992, | |
| "learning_rate": 1.674347158218126e-05, | |
| "loss": 0.0003, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8216548281819928, | |
| "grad_norm": 0.008233410874863824, | |
| "learning_rate": 1.6712749615975425e-05, | |
| "loss": 0.0003, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8293338452678057, | |
| "grad_norm": 0.004842583832484554, | |
| "learning_rate": 1.6682027649769587e-05, | |
| "loss": 0.0003, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8370128623536187, | |
| "grad_norm": 0.012551492004310723, | |
| "learning_rate": 1.665130568356375e-05, | |
| "loss": 0.0003, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8446918794394318, | |
| "grad_norm": 0.010999047619194315, | |
| "learning_rate": 1.6620583717357914e-05, | |
| "loss": 0.0003, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8523708965252448, | |
| "grad_norm": 0.00940161449318046, | |
| "learning_rate": 1.6589861751152075e-05, | |
| "loss": 0.0003, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8600499136110578, | |
| "grad_norm": 0.005629135501887116, | |
| "learning_rate": 1.6559139784946237e-05, | |
| "loss": 0.0003, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8677289306968708, | |
| "grad_norm": 0.005695864014209226, | |
| "learning_rate": 1.6528417818740403e-05, | |
| "loss": 0.0003, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8754079477826838, | |
| "grad_norm": 0.008141397000999681, | |
| "learning_rate": 1.6497695852534564e-05, | |
| "loss": 0.0003, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8830869648684968, | |
| "grad_norm": 0.010312822716836551, | |
| "learning_rate": 1.6466973886328726e-05, | |
| "loss": 0.0003, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8907659819543099, | |
| "grad_norm": 0.004299526696605698, | |
| "learning_rate": 1.643625192012289e-05, | |
| "loss": 0.0003, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8984449990401229, | |
| "grad_norm": 0.007880227129562899, | |
| "learning_rate": 1.6405529953917053e-05, | |
| "loss": 0.0003, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9061240161259358, | |
| "grad_norm": 0.0038386080131062204, | |
| "learning_rate": 1.6374807987711215e-05, | |
| "loss": 0.0003, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9138030332117489, | |
| "grad_norm": 0.005902343886207709, | |
| "learning_rate": 1.6344086021505377e-05, | |
| "loss": 0.0003, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9214820502975619, | |
| "grad_norm": 0.005315908218014497, | |
| "learning_rate": 1.6313364055299542e-05, | |
| "loss": 0.0003, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9291610673833749, | |
| "grad_norm": 0.004817638329770463, | |
| "learning_rate": 1.6282642089093704e-05, | |
| "loss": 0.0003, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.936840084469188, | |
| "grad_norm": 0.004282276912369252, | |
| "learning_rate": 1.6251920122887866e-05, | |
| "loss": 0.0003, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.944519101555001, | |
| "grad_norm": 0.0030553467454727474, | |
| "learning_rate": 1.622119815668203e-05, | |
| "loss": 0.0003, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9521981186408139, | |
| "grad_norm": 0.003669066757736195, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 0.0003, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.959877135726627, | |
| "grad_norm": 0.007997281797816693, | |
| "learning_rate": 1.6159754224270355e-05, | |
| "loss": 0.0003, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.96755615281244, | |
| "grad_norm": 0.0033293025096136717, | |
| "learning_rate": 1.6129032258064517e-05, | |
| "loss": 0.0003, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.975235169898253, | |
| "grad_norm": 0.0090924908378621, | |
| "learning_rate": 1.6098310291858682e-05, | |
| "loss": 0.0003, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9829141869840661, | |
| "grad_norm": 0.01333845903017061, | |
| "learning_rate": 1.6067588325652844e-05, | |
| "loss": 0.0003, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.990593204069879, | |
| "grad_norm": 0.004317437784916082, | |
| "learning_rate": 1.6036866359447006e-05, | |
| "loss": 0.0003, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.998272221155692, | |
| "grad_norm": 0.006218136692693676, | |
| "learning_rate": 1.600614439324117e-05, | |
| "loss": 0.0003, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.005375311960069, | |
| "grad_norm": 0.012722862337500376, | |
| "learning_rate": 1.5975422427035333e-05, | |
| "loss": 0.0003, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.0130543290458822, | |
| "grad_norm": 0.012347661878448313, | |
| "learning_rate": 1.5944700460829495e-05, | |
| "loss": 0.0003, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.0207333461316952, | |
| "grad_norm": 0.013680655581236744, | |
| "learning_rate": 1.5913978494623657e-05, | |
| "loss": 0.0003, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.0284123632175082, | |
| "grad_norm": 0.008212234787974039, | |
| "learning_rate": 1.5883256528417822e-05, | |
| "loss": 0.0003, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.0360913803033212, | |
| "grad_norm": 0.004326507441533446, | |
| "learning_rate": 1.5852534562211984e-05, | |
| "loss": 0.0003, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.0437703973891341, | |
| "grad_norm": 0.006702741273813344, | |
| "learning_rate": 1.5821812596006145e-05, | |
| "loss": 0.0003, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.051449414474947, | |
| "grad_norm": 0.007151020722561525, | |
| "learning_rate": 1.5791090629800307e-05, | |
| "loss": 0.0003, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.0591284315607603, | |
| "grad_norm": 0.012743255489323653, | |
| "learning_rate": 1.5760368663594473e-05, | |
| "loss": 0.0003, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.0668074486465733, | |
| "grad_norm": 0.008818497275557231, | |
| "learning_rate": 1.5729646697388634e-05, | |
| "loss": 0.0003, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.0744864657323863, | |
| "grad_norm": 0.002839593309524096, | |
| "learning_rate": 1.5698924731182796e-05, | |
| "loss": 0.0003, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0821654828181992, | |
| "grad_norm": 0.006499416407964513, | |
| "learning_rate": 1.566820276497696e-05, | |
| "loss": 0.0003, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.0898444999040122, | |
| "grad_norm": 0.0043902156072960176, | |
| "learning_rate": 1.5637480798771123e-05, | |
| "loss": 0.0003, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.0975235169898252, | |
| "grad_norm": 0.011284861249978177, | |
| "learning_rate": 1.5606758832565285e-05, | |
| "loss": 0.0003, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.1052025340756384, | |
| "grad_norm": 0.004036096758704275, | |
| "learning_rate": 1.5576036866359447e-05, | |
| "loss": 0.0003, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.1128815511614514, | |
| "grad_norm": 0.006193352867852987, | |
| "learning_rate": 1.5545314900153612e-05, | |
| "loss": 0.0003, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.1205605682472644, | |
| "grad_norm": 0.0043243328755989815, | |
| "learning_rate": 1.5514592933947774e-05, | |
| "loss": 0.0003, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.1282395853330773, | |
| "grad_norm": 0.005942275681812123, | |
| "learning_rate": 1.5483870967741936e-05, | |
| "loss": 0.0003, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.1359186024188903, | |
| "grad_norm": 0.004708153262372667, | |
| "learning_rate": 1.5453149001536098e-05, | |
| "loss": 0.0003, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.1435976195047033, | |
| "grad_norm": 0.007691673975032789, | |
| "learning_rate": 1.5422427035330263e-05, | |
| "loss": 0.0003, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.1512766365905165, | |
| "grad_norm": 0.012308448692961365, | |
| "learning_rate": 1.5391705069124425e-05, | |
| "loss": 0.0003, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.1589556536763295, | |
| "grad_norm": 0.008574072497790838, | |
| "learning_rate": 1.5360983102918587e-05, | |
| "loss": 0.0003, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.1666346707621424, | |
| "grad_norm": 0.0022349584876796874, | |
| "learning_rate": 1.5330261136712752e-05, | |
| "loss": 0.0003, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.1743136878479554, | |
| "grad_norm": 0.0039426732001856726, | |
| "learning_rate": 1.5299539170506914e-05, | |
| "loss": 0.0003, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.1819927049337684, | |
| "grad_norm": 0.0076643241645843055, | |
| "learning_rate": 1.5268817204301076e-05, | |
| "loss": 0.0003, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.1896717220195816, | |
| "grad_norm": 0.00801480580123044, | |
| "learning_rate": 1.523809523809524e-05, | |
| "loss": 0.0003, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1973507391053946, | |
| "grad_norm": 0.0063236711582617235, | |
| "learning_rate": 1.5207373271889403e-05, | |
| "loss": 0.0003, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.2050297561912076, | |
| "grad_norm": 0.0026532800936888, | |
| "learning_rate": 1.5176651305683565e-05, | |
| "loss": 0.0003, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.2127087732770205, | |
| "grad_norm": 0.004399678225568667, | |
| "learning_rate": 1.5145929339477728e-05, | |
| "loss": 0.0003, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.2203877903628335, | |
| "grad_norm": 0.003175980432596953, | |
| "learning_rate": 1.511520737327189e-05, | |
| "loss": 0.0003, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.2280668074486465, | |
| "grad_norm": 0.003690809213724381, | |
| "learning_rate": 1.5084485407066054e-05, | |
| "loss": 0.0003, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2357458245344595, | |
| "grad_norm": 0.002885795037112433, | |
| "learning_rate": 1.5053763440860215e-05, | |
| "loss": 0.0003, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.2434248416202727, | |
| "grad_norm": 0.004662223632430856, | |
| "learning_rate": 1.5023041474654379e-05, | |
| "loss": 0.0003, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.2511038587060856, | |
| "grad_norm": 0.0038277697276091482, | |
| "learning_rate": 1.4992319508448543e-05, | |
| "loss": 0.0003, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.2587828757918986, | |
| "grad_norm": 0.004926059671498492, | |
| "learning_rate": 1.4961597542242704e-05, | |
| "loss": 0.0003, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.2664618928777116, | |
| "grad_norm": 0.00463367088053743, | |
| "learning_rate": 1.4930875576036868e-05, | |
| "loss": 0.0003, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.2741409099635246, | |
| "grad_norm": 0.005726418059037605, | |
| "learning_rate": 1.490015360983103e-05, | |
| "loss": 0.0003, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.2818199270493378, | |
| "grad_norm": 0.005247011723204908, | |
| "learning_rate": 1.4869431643625193e-05, | |
| "loss": 0.0003, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.2894989441351508, | |
| "grad_norm": 0.007164559324630275, | |
| "learning_rate": 1.4838709677419357e-05, | |
| "loss": 0.0003, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.2971779612209637, | |
| "grad_norm": 0.005041996622130852, | |
| "learning_rate": 1.4807987711213519e-05, | |
| "loss": 0.0003, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.3048569783067767, | |
| "grad_norm": 0.005487598186113812, | |
| "learning_rate": 1.477726574500768e-05, | |
| "loss": 0.0003, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3125359953925897, | |
| "grad_norm": 0.003135256998223166, | |
| "learning_rate": 1.4746543778801846e-05, | |
| "loss": 0.0003, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.320215012478403, | |
| "grad_norm": 0.0069210555218662635, | |
| "learning_rate": 1.4715821812596008e-05, | |
| "loss": 0.0003, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.3278940295642156, | |
| "grad_norm": 0.003417481046551585, | |
| "learning_rate": 1.468509984639017e-05, | |
| "loss": 0.0003, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.3355730466500289, | |
| "grad_norm": 0.0076945646315396184, | |
| "learning_rate": 1.4654377880184335e-05, | |
| "loss": 0.0003, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.3432520637358418, | |
| "grad_norm": 0.004664070136688662, | |
| "learning_rate": 1.4623655913978497e-05, | |
| "loss": 0.0003, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.3509310808216548, | |
| "grad_norm": 0.003957679244338998, | |
| "learning_rate": 1.4592933947772658e-05, | |
| "loss": 0.0003, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.3586100979074678, | |
| "grad_norm": 0.003471942169350295, | |
| "learning_rate": 1.456221198156682e-05, | |
| "loss": 0.0003, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.3662891149932808, | |
| "grad_norm": 0.006157537653481742, | |
| "learning_rate": 1.4531490015360986e-05, | |
| "loss": 0.0003, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.373968132079094, | |
| "grad_norm": 0.008728892287992982, | |
| "learning_rate": 1.4500768049155147e-05, | |
| "loss": 0.0003, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.381647149164907, | |
| "grad_norm": 0.00566547815883834, | |
| "learning_rate": 1.447004608294931e-05, | |
| "loss": 0.0003, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.38932616625072, | |
| "grad_norm": 0.008658651412825806, | |
| "learning_rate": 1.4439324116743471e-05, | |
| "loss": 0.0003, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.397005183336533, | |
| "grad_norm": 0.005166116309061509, | |
| "learning_rate": 1.4408602150537636e-05, | |
| "loss": 0.0003, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.4046842004223459, | |
| "grad_norm": 0.0028075866905398405, | |
| "learning_rate": 1.4377880184331798e-05, | |
| "loss": 0.0003, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.412363217508159, | |
| "grad_norm": 0.005329755957597182, | |
| "learning_rate": 1.434715821812596e-05, | |
| "loss": 0.0003, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.420042234593972, | |
| "grad_norm": 0.0025789510506500093, | |
| "learning_rate": 1.4316436251920125e-05, | |
| "loss": 0.0003, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.427721251679785, | |
| "grad_norm": 0.00305980645847167, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.0003, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.435400268765598, | |
| "grad_norm": 0.0070892296996524756, | |
| "learning_rate": 1.4254992319508449e-05, | |
| "loss": 0.0003, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.443079285851411, | |
| "grad_norm": 0.004636892035159531, | |
| "learning_rate": 1.422427035330261e-05, | |
| "loss": 0.0003, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.450758302937224, | |
| "grad_norm": 0.007196283698693798, | |
| "learning_rate": 1.4193548387096776e-05, | |
| "loss": 0.0003, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.458437320023037, | |
| "grad_norm": 0.002244250906729436, | |
| "learning_rate": 1.4162826420890938e-05, | |
| "loss": 0.0003, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.4661163371088501, | |
| "grad_norm": 0.003662139205215455, | |
| "learning_rate": 1.41321044546851e-05, | |
| "loss": 0.0003, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.4737953541946631, | |
| "grad_norm": 0.00899296235470264, | |
| "learning_rate": 1.4101382488479263e-05, | |
| "loss": 0.0003, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.481474371280476, | |
| "grad_norm": 0.007554011023119674, | |
| "learning_rate": 1.4070660522273427e-05, | |
| "loss": 0.0003, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.489153388366289, | |
| "grad_norm": 0.006664687059239841, | |
| "learning_rate": 1.4039938556067589e-05, | |
| "loss": 0.0003, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.496832405452102, | |
| "grad_norm": 0.003654880039156468, | |
| "learning_rate": 1.4009216589861752e-05, | |
| "loss": 0.0003, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.5045114225379153, | |
| "grad_norm": 0.007520628392798319, | |
| "learning_rate": 1.3978494623655916e-05, | |
| "loss": 0.0003, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.512190439623728, | |
| "grad_norm": 0.0074722854847562, | |
| "learning_rate": 1.3947772657450078e-05, | |
| "loss": 0.0003, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.5198694567095412, | |
| "grad_norm": 0.009940245405058647, | |
| "learning_rate": 1.3917050691244241e-05, | |
| "loss": 0.0003, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.5275484737953542, | |
| "grad_norm": 0.006304573730328701, | |
| "learning_rate": 1.3886328725038403e-05, | |
| "loss": 0.0003, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.5352274908811672, | |
| "grad_norm": 0.0055550452573098165, | |
| "learning_rate": 1.3855606758832567e-05, | |
| "loss": 0.0003, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.5429065079669804, | |
| "grad_norm": 0.008909260445155288, | |
| "learning_rate": 1.382488479262673e-05, | |
| "loss": 0.0003, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.5505855250527931, | |
| "grad_norm": 0.002599064360242108, | |
| "learning_rate": 1.3794162826420892e-05, | |
| "loss": 0.0003, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.5582645421386063, | |
| "grad_norm": 0.0034038539972756267, | |
| "learning_rate": 1.3763440860215056e-05, | |
| "loss": 0.0003, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.5659435592244193, | |
| "grad_norm": 0.013808170488498894, | |
| "learning_rate": 1.3732718894009217e-05, | |
| "loss": 0.0003, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.5736225763102323, | |
| "grad_norm": 0.0017545504012708383, | |
| "learning_rate": 1.3701996927803381e-05, | |
| "loss": 0.0003, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.5813015933960453, | |
| "grad_norm": 0.003842048923808368, | |
| "learning_rate": 1.3671274961597543e-05, | |
| "loss": 0.0003, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.5889806104818582, | |
| "grad_norm": 0.004326236565390302, | |
| "learning_rate": 1.3640552995391706e-05, | |
| "loss": 0.0003, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.5966596275676714, | |
| "grad_norm": 0.0035407621847916644, | |
| "learning_rate": 1.360983102918587e-05, | |
| "loss": 0.0003, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.6043386446534842, | |
| "grad_norm": 0.004974256724902474, | |
| "learning_rate": 1.3579109062980032e-05, | |
| "loss": 0.0003, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.6120176617392974, | |
| "grad_norm": 0.002810217043936606, | |
| "learning_rate": 1.3548387096774194e-05, | |
| "loss": 0.0003, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6196966788251104, | |
| "grad_norm": 0.002954993487071089, | |
| "learning_rate": 1.3517665130568359e-05, | |
| "loss": 0.0003, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.6273756959109233, | |
| "grad_norm": 0.003028755651427543, | |
| "learning_rate": 1.348694316436252e-05, | |
| "loss": 0.0003, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.6350547129967365, | |
| "grad_norm": 0.0026674905313092702, | |
| "learning_rate": 1.3456221198156683e-05, | |
| "loss": 0.0003, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.6427337300825493, | |
| "grad_norm": 0.006409597579717579, | |
| "learning_rate": 1.3425499231950848e-05, | |
| "loss": 0.0003, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.6504127471683625, | |
| "grad_norm": 0.0036828809413129507, | |
| "learning_rate": 1.339477726574501e-05, | |
| "loss": 0.0003, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.6580917642541755, | |
| "grad_norm": 0.007891670676920814, | |
| "learning_rate": 1.3364055299539171e-05, | |
| "loss": 0.0003, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.6657707813399885, | |
| "grad_norm": 0.006626613367159732, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0003, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.6734497984258017, | |
| "grad_norm": 0.003534695341590609, | |
| "learning_rate": 1.3302611367127499e-05, | |
| "loss": 0.0003, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.6811288155116144, | |
| "grad_norm": 0.002799573693646372, | |
| "learning_rate": 1.327188940092166e-05, | |
| "loss": 0.0003, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.6888078325974276, | |
| "grad_norm": 0.007920181925607207, | |
| "learning_rate": 1.3241167434715822e-05, | |
| "loss": 0.0003, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.6964868496832406, | |
| "grad_norm": 0.003197909917687604, | |
| "learning_rate": 1.3210445468509984e-05, | |
| "loss": 0.0003, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.7041658667690536, | |
| "grad_norm": 0.0019020952054658064, | |
| "learning_rate": 1.317972350230415e-05, | |
| "loss": 0.0003, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.7118448838548666, | |
| "grad_norm": 0.003430345573430971, | |
| "learning_rate": 1.3149001536098311e-05, | |
| "loss": 0.0003, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.7195239009406795, | |
| "grad_norm": 0.005966901533330741, | |
| "learning_rate": 1.3118279569892473e-05, | |
| "loss": 0.0003, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.7272029180264927, | |
| "grad_norm": 0.003453325797399688, | |
| "learning_rate": 1.3087557603686638e-05, | |
| "loss": 0.0003, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.7348819351123055, | |
| "grad_norm": 0.004117138090969933, | |
| "learning_rate": 1.30568356374808e-05, | |
| "loss": 0.0003, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.7425609521981187, | |
| "grad_norm": 0.015933078305414367, | |
| "learning_rate": 1.3026113671274962e-05, | |
| "loss": 0.0003, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.7502399692839317, | |
| "grad_norm": 0.00695674170034877, | |
| "learning_rate": 1.2995391705069126e-05, | |
| "loss": 0.0003, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.7579189863697446, | |
| "grad_norm": 0.0030599729808705334, | |
| "learning_rate": 1.2964669738863289e-05, | |
| "loss": 0.0003, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.7655980034555578, | |
| "grad_norm": 0.005764241880693109, | |
| "learning_rate": 1.2933947772657451e-05, | |
| "loss": 0.0003, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.7732770205413706, | |
| "grad_norm": 0.0024572213319480774, | |
| "learning_rate": 1.2903225806451613e-05, | |
| "loss": 0.0003, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.7809560376271838, | |
| "grad_norm": 0.005442108190635756, | |
| "learning_rate": 1.2872503840245776e-05, | |
| "loss": 0.0003, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.7886350547129968, | |
| "grad_norm": 0.005249849493945225, | |
| "learning_rate": 1.284178187403994e-05, | |
| "loss": 0.0003, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.7963140717988098, | |
| "grad_norm": 0.003760686952515486, | |
| "learning_rate": 1.2811059907834102e-05, | |
| "loss": 0.0003, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.8039930888846227, | |
| "grad_norm": 0.003326565830912095, | |
| "learning_rate": 1.2780337941628265e-05, | |
| "loss": 0.0003, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.8116721059704357, | |
| "grad_norm": 0.00687928832968373, | |
| "learning_rate": 1.2749615975422429e-05, | |
| "loss": 0.0003, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.819351123056249, | |
| "grad_norm": 0.002523920816120679, | |
| "learning_rate": 1.271889400921659e-05, | |
| "loss": 0.0003, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.8270301401420617, | |
| "grad_norm": 0.0035826335995729104, | |
| "learning_rate": 1.2688172043010754e-05, | |
| "loss": 0.0003, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.8347091572278749, | |
| "grad_norm": 0.0042202748472642045, | |
| "learning_rate": 1.2657450076804916e-05, | |
| "loss": 0.0003, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.8423881743136878, | |
| "grad_norm": 0.00433061878504225, | |
| "learning_rate": 1.262672811059908e-05, | |
| "loss": 0.0003, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.8500671913995008, | |
| "grad_norm": 0.006032498913999335, | |
| "learning_rate": 1.2596006144393243e-05, | |
| "loss": 0.0003, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.857746208485314, | |
| "grad_norm": 0.002731884836982076, | |
| "learning_rate": 1.2565284178187405e-05, | |
| "loss": 0.0003, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.8654252255711268, | |
| "grad_norm": 0.003451471394001781, | |
| "learning_rate": 1.2534562211981567e-05, | |
| "loss": 0.0003, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.87310424265694, | |
| "grad_norm": 0.002472343027285463, | |
| "learning_rate": 1.2503840245775732e-05, | |
| "loss": 0.0003, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.880783259742753, | |
| "grad_norm": 0.003320944102999081, | |
| "learning_rate": 1.2473118279569894e-05, | |
| "loss": 0.0003, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.888462276828566, | |
| "grad_norm": 0.0018486199226611809, | |
| "learning_rate": 1.2442396313364056e-05, | |
| "loss": 0.0003, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.896141293914379, | |
| "grad_norm": 0.0024515391018999654, | |
| "learning_rate": 1.2411674347158221e-05, | |
| "loss": 0.0003, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.903820311000192, | |
| "grad_norm": 0.0039409109050494015, | |
| "learning_rate": 1.2380952380952383e-05, | |
| "loss": 0.0003, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.911499328086005, | |
| "grad_norm": 0.0020024603662128597, | |
| "learning_rate": 1.2350230414746545e-05, | |
| "loss": 0.0003, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.9191783451718178, | |
| "grad_norm": 0.004837460528352513, | |
| "learning_rate": 1.2319508448540707e-05, | |
| "loss": 0.0003, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.926857362257631, | |
| "grad_norm": 0.001684979658088187, | |
| "learning_rate": 1.2288786482334872e-05, | |
| "loss": 0.0003, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.934536379343444, | |
| "grad_norm": 0.0036424135207038444, | |
| "learning_rate": 1.2258064516129034e-05, | |
| "loss": 0.0003, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.942215396429257, | |
| "grad_norm": 0.003460021636897484, | |
| "learning_rate": 1.2227342549923195e-05, | |
| "loss": 0.0003, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.9498944135150702, | |
| "grad_norm": 0.0012857496658247277, | |
| "learning_rate": 1.2196620583717357e-05, | |
| "loss": 0.0003, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.957573430600883, | |
| "grad_norm": 0.006405995384537319, | |
| "learning_rate": 1.2165898617511523e-05, | |
| "loss": 0.0003, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.9652524476866962, | |
| "grad_norm": 0.0027885557407680427, | |
| "learning_rate": 1.2135176651305684e-05, | |
| "loss": 0.0003, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.9729314647725091, | |
| "grad_norm": 0.0046197235806071674, | |
| "learning_rate": 1.2104454685099846e-05, | |
| "loss": 0.0003, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.9806104818583221, | |
| "grad_norm": 0.002923714387690518, | |
| "learning_rate": 1.2073732718894012e-05, | |
| "loss": 0.0003, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.9882894989441353, | |
| "grad_norm": 0.004546312959559587, | |
| "learning_rate": 1.2043010752688173e-05, | |
| "loss": 0.0003, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.995968516029948, | |
| "grad_norm": 0.0027543773295028978, | |
| "learning_rate": 1.2012288786482335e-05, | |
| "loss": 0.0003, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.0030716068343253, | |
| "grad_norm": 0.00445399533446676, | |
| "learning_rate": 1.1981566820276497e-05, | |
| "loss": 0.0002, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.010750623920138, | |
| "grad_norm": 0.009066513947313037, | |
| "learning_rate": 1.1950844854070662e-05, | |
| "loss": 0.0003, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.0184296410059512, | |
| "grad_norm": 0.0022089803112435333, | |
| "learning_rate": 1.1920122887864824e-05, | |
| "loss": 0.0003, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.0261086580917644, | |
| "grad_norm": 0.007859382516213974, | |
| "learning_rate": 1.1889400921658986e-05, | |
| "loss": 0.0003, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.033787675177577, | |
| "grad_norm": 0.002280973635314502, | |
| "learning_rate": 1.185867895545315e-05, | |
| "loss": 0.0003, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.0414666922633904, | |
| "grad_norm": 0.003812714680306737, | |
| "learning_rate": 1.1827956989247313e-05, | |
| "loss": 0.0003, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.049145709349203, | |
| "grad_norm": 0.003865003763224919, | |
| "learning_rate": 1.1797235023041475e-05, | |
| "loss": 0.0003, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.0568247264350163, | |
| "grad_norm": 0.0056210714836978015, | |
| "learning_rate": 1.1766513056835639e-05, | |
| "loss": 0.0003, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.0645037435208295, | |
| "grad_norm": 0.0029786676822015994, | |
| "learning_rate": 1.1735791090629802e-05, | |
| "loss": 0.0003, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.0721827606066423, | |
| "grad_norm": 0.007577207373633205, | |
| "learning_rate": 1.1705069124423964e-05, | |
| "loss": 0.0003, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.0798617776924555, | |
| "grad_norm": 0.004911935203582005, | |
| "learning_rate": 1.1674347158218127e-05, | |
| "loss": 0.0003, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.0875407947782683, | |
| "grad_norm": 0.002348567372539777, | |
| "learning_rate": 1.164362519201229e-05, | |
| "loss": 0.0003, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.0952198118640815, | |
| "grad_norm": 0.0021975557273255456, | |
| "learning_rate": 1.1612903225806453e-05, | |
| "loss": 0.0003, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.102898828949894, | |
| "grad_norm": 0.004404237994021701, | |
| "learning_rate": 1.1582181259600616e-05, | |
| "loss": 0.0003, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.1105778460357074, | |
| "grad_norm": 0.0018039936664214415, | |
| "learning_rate": 1.1551459293394778e-05, | |
| "loss": 0.0003, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.1182568631215206, | |
| "grad_norm": 0.010542570288986714, | |
| "learning_rate": 1.152073732718894e-05, | |
| "loss": 0.0003, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.1259358802073334, | |
| "grad_norm": 0.007212811136834576, | |
| "learning_rate": 1.1490015360983104e-05, | |
| "loss": 0.0003, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.1336148972931466, | |
| "grad_norm": 0.002463133011224361, | |
| "learning_rate": 1.1459293394777267e-05, | |
| "loss": 0.0003, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.1412939143789593, | |
| "grad_norm": 0.0030969432185738934, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.0003, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.1489729314647725, | |
| "grad_norm": 0.0009640256914357676, | |
| "learning_rate": 1.1397849462365593e-05, | |
| "loss": 0.0003, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.1566519485505857, | |
| "grad_norm": 0.0062156621929774095, | |
| "learning_rate": 1.1367127496159756e-05, | |
| "loss": 0.0003, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.1643309656363985, | |
| "grad_norm": 0.006538407111075363, | |
| "learning_rate": 1.1336405529953918e-05, | |
| "loss": 0.0003, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.1720099827222117, | |
| "grad_norm": 0.0028212937587065077, | |
| "learning_rate": 1.130568356374808e-05, | |
| "loss": 0.0003, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.1796889998080244, | |
| "grad_norm": 0.005023107122791538, | |
| "learning_rate": 1.1274961597542245e-05, | |
| "loss": 0.0003, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.1873680168938376, | |
| "grad_norm": 0.0011156564572317208, | |
| "learning_rate": 1.1244239631336407e-05, | |
| "loss": 0.0003, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.1950470339796504, | |
| "grad_norm": 0.001942852230799726, | |
| "learning_rate": 1.1213517665130569e-05, | |
| "loss": 0.0003, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.2027260510654636, | |
| "grad_norm": 0.008378616547487394, | |
| "learning_rate": 1.118279569892473e-05, | |
| "loss": 0.0003, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.210405068151277, | |
| "grad_norm": 0.002600735996609255, | |
| "learning_rate": 1.1152073732718896e-05, | |
| "loss": 0.0003, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.2180840852370896, | |
| "grad_norm": 0.0015136314382080984, | |
| "learning_rate": 1.1121351766513058e-05, | |
| "loss": 0.0003, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.2257631023229028, | |
| "grad_norm": 0.007485965818712587, | |
| "learning_rate": 1.109062980030722e-05, | |
| "loss": 0.0003, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.2334421194087155, | |
| "grad_norm": 0.006013194482848518, | |
| "learning_rate": 1.1059907834101385e-05, | |
| "loss": 0.0003, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.2411211364945287, | |
| "grad_norm": 0.005253526138111572, | |
| "learning_rate": 1.1029185867895547e-05, | |
| "loss": 0.0003, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.248800153580342, | |
| "grad_norm": 0.0030844920407275436, | |
| "learning_rate": 1.0998463901689708e-05, | |
| "loss": 0.0003, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.2564791706661547, | |
| "grad_norm": 0.00448225831555134, | |
| "learning_rate": 1.096774193548387e-05, | |
| "loss": 0.0003, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.264158187751968, | |
| "grad_norm": 0.002766089015696827, | |
| "learning_rate": 1.0937019969278036e-05, | |
| "loss": 0.0003, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.2718372048377806, | |
| "grad_norm": 0.00432406954984362, | |
| "learning_rate": 1.0906298003072197e-05, | |
| "loss": 0.0003, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.279516221923594, | |
| "grad_norm": 0.005297571043727681, | |
| "learning_rate": 1.087557603686636e-05, | |
| "loss": 0.0003, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.2871952390094066, | |
| "grad_norm": 0.005051571714735924, | |
| "learning_rate": 1.0844854070660523e-05, | |
| "loss": 0.0003, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.2948742560952198, | |
| "grad_norm": 0.0036863856281938016, | |
| "learning_rate": 1.0814132104454686e-05, | |
| "loss": 0.0003, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.302553273181033, | |
| "grad_norm": 0.0024327974031678975, | |
| "learning_rate": 1.0783410138248848e-05, | |
| "loss": 0.0003, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.3102322902668457, | |
| "grad_norm": 0.0032986912710764884, | |
| "learning_rate": 1.0752688172043012e-05, | |
| "loss": 0.0003, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.317911307352659, | |
| "grad_norm": 0.002895373170239971, | |
| "learning_rate": 1.0721966205837175e-05, | |
| "loss": 0.0003, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.3255903244384717, | |
| "grad_norm": 0.0037467096830764678, | |
| "learning_rate": 1.0691244239631337e-05, | |
| "loss": 0.0003, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.333269341524285, | |
| "grad_norm": 0.0041338587084730925, | |
| "learning_rate": 1.0660522273425499e-05, | |
| "loss": 0.0003, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.340948358610098, | |
| "grad_norm": 0.004584463378907932, | |
| "learning_rate": 1.0629800307219663e-05, | |
| "loss": 0.0003, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.348627375695911, | |
| "grad_norm": 0.005390217173101364, | |
| "learning_rate": 1.0599078341013826e-05, | |
| "loss": 0.0003, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.356306392781724, | |
| "grad_norm": 0.006574519516791052, | |
| "learning_rate": 1.0568356374807988e-05, | |
| "loss": 0.0003, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.363985409867537, | |
| "grad_norm": 0.0031029456990706457, | |
| "learning_rate": 1.0537634408602151e-05, | |
| "loss": 0.0003, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.37166442695335, | |
| "grad_norm": 0.003485382502658449, | |
| "learning_rate": 1.0506912442396313e-05, | |
| "loss": 0.0003, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.379343444039163, | |
| "grad_norm": 0.0033652977877385503, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.0003, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.387022461124976, | |
| "grad_norm": 0.001994262459548078, | |
| "learning_rate": 1.044546850998464e-05, | |
| "loss": 0.0003, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.394701478210789, | |
| "grad_norm": 0.0027030822644481534, | |
| "learning_rate": 1.0414746543778802e-05, | |
| "loss": 0.0003, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.402380495296602, | |
| "grad_norm": 0.0075004858676295996, | |
| "learning_rate": 1.0384024577572966e-05, | |
| "loss": 0.0003, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.410059512382415, | |
| "grad_norm": 0.005277345871616036, | |
| "learning_rate": 1.035330261136713e-05, | |
| "loss": 0.0003, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.4177385294682283, | |
| "grad_norm": 0.0034500505732812984, | |
| "learning_rate": 1.0322580645161291e-05, | |
| "loss": 0.0003, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.425417546554041, | |
| "grad_norm": 0.004072496071322172, | |
| "learning_rate": 1.0291858678955453e-05, | |
| "loss": 0.0003, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.4330965636398543, | |
| "grad_norm": 0.0037519391939282247, | |
| "learning_rate": 1.0261136712749618e-05, | |
| "loss": 0.0003, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.440775580725667, | |
| "grad_norm": 0.0029126430566717857, | |
| "learning_rate": 1.023041474654378e-05, | |
| "loss": 0.0003, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.4484545978114802, | |
| "grad_norm": 0.006804725581164672, | |
| "learning_rate": 1.0199692780337942e-05, | |
| "loss": 0.0003, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.456133614897293, | |
| "grad_norm": 0.003394434324862927, | |
| "learning_rate": 1.0168970814132104e-05, | |
| "loss": 0.0003, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.463812631983106, | |
| "grad_norm": 0.0047839322011928136, | |
| "learning_rate": 1.0138248847926269e-05, | |
| "loss": 0.0003, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.471491649068919, | |
| "grad_norm": 0.0021047452286355496, | |
| "learning_rate": 1.0107526881720431e-05, | |
| "loss": 0.0003, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.479170666154732, | |
| "grad_norm": 0.002910893106529187, | |
| "learning_rate": 1.0076804915514593e-05, | |
| "loss": 0.0003, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.4868496832405453, | |
| "grad_norm": 0.0025188863038248495, | |
| "learning_rate": 1.0046082949308758e-05, | |
| "loss": 0.0003, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.494528700326358, | |
| "grad_norm": 0.005110455562647162, | |
| "learning_rate": 1.001536098310292e-05, | |
| "loss": 0.0003, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.5022077174121713, | |
| "grad_norm": 0.0035194967193856925, | |
| "learning_rate": 9.984639016897082e-06, | |
| "loss": 0.0003, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.509886734497984, | |
| "grad_norm": 0.004112839294946175, | |
| "learning_rate": 9.953917050691245e-06, | |
| "loss": 0.0003, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.5175657515837973, | |
| "grad_norm": 0.003219601043538841, | |
| "learning_rate": 9.923195084485407e-06, | |
| "loss": 0.0003, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.5252447686696105, | |
| "grad_norm": 0.0021615101929151476, | |
| "learning_rate": 9.89247311827957e-06, | |
| "loss": 0.0003, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.532923785755423, | |
| "grad_norm": 0.0026956859004236954, | |
| "learning_rate": 9.861751152073733e-06, | |
| "loss": 0.0003, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.5406028028412364, | |
| "grad_norm": 0.0012769547330191422, | |
| "learning_rate": 9.831029185867896e-06, | |
| "loss": 0.0003, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.548281819927049, | |
| "grad_norm": 0.004837667895037462, | |
| "learning_rate": 9.80030721966206e-06, | |
| "loss": 0.0003, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.5559608370128624, | |
| "grad_norm": 0.004331759593272771, | |
| "learning_rate": 9.769585253456221e-06, | |
| "loss": 0.0003, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.5636398540986756, | |
| "grad_norm": 0.003806749090919161, | |
| "learning_rate": 9.738863287250385e-06, | |
| "loss": 0.0003, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.5713188711844883, | |
| "grad_norm": 0.003405808798578046, | |
| "learning_rate": 9.708141321044547e-06, | |
| "loss": 0.0003, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.5789978882703015, | |
| "grad_norm": 0.0018090209587433655, | |
| "learning_rate": 9.67741935483871e-06, | |
| "loss": 0.0003, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.5866769053561143, | |
| "grad_norm": 0.0024778977279488216, | |
| "learning_rate": 9.646697388632872e-06, | |
| "loss": 0.0003, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.5943559224419275, | |
| "grad_norm": 0.005359718689721543, | |
| "learning_rate": 9.615975422427036e-06, | |
| "loss": 0.0003, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.6020349395277407, | |
| "grad_norm": 0.0012868512440999584, | |
| "learning_rate": 9.5852534562212e-06, | |
| "loss": 0.0003, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.6097139566135534, | |
| "grad_norm": 0.005696079089497556, | |
| "learning_rate": 9.554531490015361e-06, | |
| "loss": 0.0003, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.6173929736993666, | |
| "grad_norm": 0.0032109625254054023, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.0003, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.6250719907851794, | |
| "grad_norm": 0.002903890729728573, | |
| "learning_rate": 9.493087557603687e-06, | |
| "loss": 0.0003, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.6327510078709926, | |
| "grad_norm": 0.003523170524614984, | |
| "learning_rate": 9.46236559139785e-06, | |
| "loss": 0.0003, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.640430024956806, | |
| "grad_norm": 0.0036252760580902602, | |
| "learning_rate": 9.431643625192014e-06, | |
| "loss": 0.0003, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.6481090420426185, | |
| "grad_norm": 0.002714708446513513, | |
| "learning_rate": 9.400921658986176e-06, | |
| "loss": 0.0003, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.6557880591284313, | |
| "grad_norm": 0.0037700018885341927, | |
| "learning_rate": 9.370199692780339e-06, | |
| "loss": 0.0003, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.6634670762142445, | |
| "grad_norm": 0.005122776482125785, | |
| "learning_rate": 9.339477726574503e-06, | |
| "loss": 0.0003, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.6711460933000577, | |
| "grad_norm": 0.0013201671036341795, | |
| "learning_rate": 9.308755760368664e-06, | |
| "loss": 0.0003, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.6788251103858705, | |
| "grad_norm": 0.003020186145598462, | |
| "learning_rate": 9.278033794162828e-06, | |
| "loss": 0.0003, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.6865041274716837, | |
| "grad_norm": 0.011069671357401941, | |
| "learning_rate": 9.24731182795699e-06, | |
| "loss": 0.0003, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.6941831445574964, | |
| "grad_norm": 0.007485690656388163, | |
| "learning_rate": 9.216589861751153e-06, | |
| "loss": 0.0003, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.7018621616433096, | |
| "grad_norm": 0.00636654094660092, | |
| "learning_rate": 9.185867895545315e-06, | |
| "loss": 0.0003, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.709541178729123, | |
| "grad_norm": 0.0039619478349876185, | |
| "learning_rate": 9.155145929339479e-06, | |
| "loss": 0.0003, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.7172201958149356, | |
| "grad_norm": 0.0054654628329376094, | |
| "learning_rate": 9.124423963133642e-06, | |
| "loss": 0.0003, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.7248992129007488, | |
| "grad_norm": 0.0023486404063996135, | |
| "learning_rate": 9.093701996927804e-06, | |
| "loss": 0.0003, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.7325782299865615, | |
| "grad_norm": 0.002849175524143881, | |
| "learning_rate": 9.062980030721968e-06, | |
| "loss": 0.0003, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.7402572470723747, | |
| "grad_norm": 0.0040654911920692495, | |
| "learning_rate": 9.03225806451613e-06, | |
| "loss": 0.0003, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.747936264158188, | |
| "grad_norm": 0.003611352214922628, | |
| "learning_rate": 9.001536098310293e-06, | |
| "loss": 0.0003, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.7556152812440007, | |
| "grad_norm": 0.002668802943203528, | |
| "learning_rate": 8.970814132104455e-06, | |
| "loss": 0.0003, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.763294298329814, | |
| "grad_norm": 0.0033794836750422907, | |
| "learning_rate": 8.940092165898619e-06, | |
| "loss": 0.0003, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.7709733154156266, | |
| "grad_norm": 0.0019714078072412106, | |
| "learning_rate": 8.90937019969278e-06, | |
| "loss": 0.0003, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.77865233250144, | |
| "grad_norm": 0.0036424341616288908, | |
| "learning_rate": 8.878648233486944e-06, | |
| "loss": 0.0003, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.786331349587253, | |
| "grad_norm": 0.0025731242429069758, | |
| "learning_rate": 8.847926267281107e-06, | |
| "loss": 0.0003, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.794010366673066, | |
| "grad_norm": 0.003112848869608825, | |
| "learning_rate": 8.81720430107527e-06, | |
| "loss": 0.0003, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.801689383758879, | |
| "grad_norm": 0.0024507056173105482, | |
| "learning_rate": 8.786482334869433e-06, | |
| "loss": 0.0003, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.8093684008446917, | |
| "grad_norm": 0.003942355547919082, | |
| "learning_rate": 8.755760368663595e-06, | |
| "loss": 0.0003, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.817047417930505, | |
| "grad_norm": 0.007985074482092179, | |
| "learning_rate": 8.725038402457758e-06, | |
| "loss": 0.0003, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.824726435016318, | |
| "grad_norm": 0.006570601645436398, | |
| "learning_rate": 8.69431643625192e-06, | |
| "loss": 0.0003, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.832405452102131, | |
| "grad_norm": 0.002711244143718766, | |
| "learning_rate": 8.663594470046084e-06, | |
| "loss": 0.0003, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.840084469187944, | |
| "grad_norm": 0.0021197096154111942, | |
| "learning_rate": 8.632872503840246e-06, | |
| "loss": 0.0003, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.847763486273757, | |
| "grad_norm": 0.0030651493700618623, | |
| "learning_rate": 8.602150537634409e-06, | |
| "loss": 0.0003, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.85544250335957, | |
| "grad_norm": 0.0038564973802781943, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0003, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.8631215204453833, | |
| "grad_norm": 0.0019345140882013855, | |
| "learning_rate": 8.540706605222734e-06, | |
| "loss": 0.0003, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.870800537531196, | |
| "grad_norm": 0.003530046987716614, | |
| "learning_rate": 8.509984639016898e-06, | |
| "loss": 0.0003, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.8784795546170088, | |
| "grad_norm": 0.0045638471491203396, | |
| "learning_rate": 8.47926267281106e-06, | |
| "loss": 0.0003, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.886158571702822, | |
| "grad_norm": 0.002548977990330342, | |
| "learning_rate": 8.448540706605223e-06, | |
| "loss": 0.0003, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.893837588788635, | |
| "grad_norm": 0.004637676739276992, | |
| "learning_rate": 8.417818740399385e-06, | |
| "loss": 0.0003, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.901516605874448, | |
| "grad_norm": 0.004249182216049807, | |
| "learning_rate": 8.387096774193549e-06, | |
| "loss": 0.0003, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.909195622960261, | |
| "grad_norm": 0.0020834658623780523, | |
| "learning_rate": 8.356374807987712e-06, | |
| "loss": 0.0003, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.916874640046074, | |
| "grad_norm": 0.005570319306302508, | |
| "learning_rate": 8.325652841781874e-06, | |
| "loss": 0.0003, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.924553657131887, | |
| "grad_norm": 0.004441691840249149, | |
| "learning_rate": 8.294930875576038e-06, | |
| "loss": 0.0003, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.9322326742177003, | |
| "grad_norm": 0.004852997098567165, | |
| "learning_rate": 8.264208909370201e-06, | |
| "loss": 0.0003, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.939911691303513, | |
| "grad_norm": 0.004796673169729304, | |
| "learning_rate": 8.233486943164363e-06, | |
| "loss": 0.0003, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.9475907083893262, | |
| "grad_norm": 0.00303272221365764, | |
| "learning_rate": 8.202764976958527e-06, | |
| "loss": 0.0003, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.955269725475139, | |
| "grad_norm": 0.009776414261676187, | |
| "learning_rate": 8.172043010752689e-06, | |
| "loss": 0.0003, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.962948742560952, | |
| "grad_norm": 0.0019734177655631514, | |
| "learning_rate": 8.141321044546852e-06, | |
| "loss": 0.0003, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.9706277596467654, | |
| "grad_norm": 0.005414209178109374, | |
| "learning_rate": 8.110599078341016e-06, | |
| "loss": 0.0003, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.978306776732578, | |
| "grad_norm": 0.0020584308196663873, | |
| "learning_rate": 8.079877112135177e-06, | |
| "loss": 0.0003, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.9859857938183914, | |
| "grad_norm": 0.002197200567243655, | |
| "learning_rate": 8.049155145929341e-06, | |
| "loss": 0.0003, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.993664810904204, | |
| "grad_norm": 0.0012909809217368014, | |
| "learning_rate": 8.018433179723503e-06, | |
| "loss": 0.0003, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.0007679017085813, | |
| "grad_norm": 0.005912347993649129, | |
| "learning_rate": 7.987711213517666e-06, | |
| "loss": 0.0003, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 3.0084469187943945, | |
| "grad_norm": 0.0024746506269591262, | |
| "learning_rate": 7.956989247311828e-06, | |
| "loss": 0.0003, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 3.0161259358802073, | |
| "grad_norm": 0.003743031987316316, | |
| "learning_rate": 7.926267281105992e-06, | |
| "loss": 0.0003, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 3.0238049529660205, | |
| "grad_norm": 0.0020760288460637535, | |
| "learning_rate": 7.895545314900154e-06, | |
| "loss": 0.0003, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 3.0314839700518332, | |
| "grad_norm": 0.010111413246967849, | |
| "learning_rate": 7.864823348694317e-06, | |
| "loss": 0.0003, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.0391629871376464, | |
| "grad_norm": 0.00321449927030987, | |
| "learning_rate": 7.83410138248848e-06, | |
| "loss": 0.0003, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 3.046842004223459, | |
| "grad_norm": 0.0022266492617884176, | |
| "learning_rate": 7.803379416282643e-06, | |
| "loss": 0.0003, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 3.0545210213092724, | |
| "grad_norm": 0.3330681312874651, | |
| "learning_rate": 7.772657450076806e-06, | |
| "loss": 0.0006, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 3.0622000383950856, | |
| "grad_norm": 0.0929644329244732, | |
| "learning_rate": 7.741935483870968e-06, | |
| "loss": 0.0031, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 3.0698790554808983, | |
| "grad_norm": 2.5444639341537942, | |
| "learning_rate": 7.711213517665132e-06, | |
| "loss": 0.0499, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.0775580725667115, | |
| "grad_norm": 2.706681402967684, | |
| "learning_rate": 7.680491551459293e-06, | |
| "loss": 0.0123, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 3.0852370896525243, | |
| "grad_norm": 0.3106533133384103, | |
| "learning_rate": 7.649769585253457e-06, | |
| "loss": 0.012, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 3.0929161067383375, | |
| "grad_norm": 0.011659651180711347, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 0.0007, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 3.1005951238241507, | |
| "grad_norm": 0.01130908041600874, | |
| "learning_rate": 7.588325652841782e-06, | |
| "loss": 0.0004, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 3.1082741409099635, | |
| "grad_norm": 0.008227145166831094, | |
| "learning_rate": 7.557603686635945e-06, | |
| "loss": 0.0003, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.1159531579957767, | |
| "grad_norm": 0.0055737792826447054, | |
| "learning_rate": 7.526881720430108e-06, | |
| "loss": 0.0003, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 3.1236321750815894, | |
| "grad_norm": 0.002555421706411845, | |
| "learning_rate": 7.496159754224271e-06, | |
| "loss": 0.0003, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 3.1313111921674026, | |
| "grad_norm": 0.006555677014598616, | |
| "learning_rate": 7.465437788018434e-06, | |
| "loss": 0.0003, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 3.1389902092532154, | |
| "grad_norm": 0.009434120428132338, | |
| "learning_rate": 7.434715821812597e-06, | |
| "loss": 0.0003, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 3.1466692263390286, | |
| "grad_norm": 0.0041818007155617395, | |
| "learning_rate": 7.403993855606759e-06, | |
| "loss": 0.0003, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.1543482434248418, | |
| "grad_norm": 0.002877849745067617, | |
| "learning_rate": 7.373271889400923e-06, | |
| "loss": 0.0003, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 3.1620272605106545, | |
| "grad_norm": 0.0030447348846358723, | |
| "learning_rate": 7.342549923195085e-06, | |
| "loss": 0.0003, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 3.1697062775964677, | |
| "grad_norm": 0.0060218718801864175, | |
| "learning_rate": 7.311827956989248e-06, | |
| "loss": 0.0003, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 3.1773852946822805, | |
| "grad_norm": 0.006332525678995614, | |
| "learning_rate": 7.28110599078341e-06, | |
| "loss": 0.0003, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 3.1850643117680937, | |
| "grad_norm": 0.003215146996700883, | |
| "learning_rate": 7.250384024577574e-06, | |
| "loss": 0.0003, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.192743328853907, | |
| "grad_norm": 0.005935997258576502, | |
| "learning_rate": 7.2196620583717355e-06, | |
| "loss": 0.0003, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 3.2004223459397196, | |
| "grad_norm": 0.004157021390061587, | |
| "learning_rate": 7.188940092165899e-06, | |
| "loss": 0.0003, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 3.208101363025533, | |
| "grad_norm": 0.004859684777513284, | |
| "learning_rate": 7.158218125960063e-06, | |
| "loss": 0.0003, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 3.2157803801113456, | |
| "grad_norm": 0.004857019863143934, | |
| "learning_rate": 7.1274961597542245e-06, | |
| "loss": 0.0003, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 3.223459397197159, | |
| "grad_norm": 0.003878455168237981, | |
| "learning_rate": 7.096774193548388e-06, | |
| "loss": 0.0003, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.231138414282972, | |
| "grad_norm": 0.004425680070729378, | |
| "learning_rate": 7.06605222734255e-06, | |
| "loss": 0.0003, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 3.2388174313687847, | |
| "grad_norm": 0.0012776092639396753, | |
| "learning_rate": 7.0353302611367134e-06, | |
| "loss": 0.0003, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 3.246496448454598, | |
| "grad_norm": 0.0032977925007112736, | |
| "learning_rate": 7.004608294930876e-06, | |
| "loss": 0.0003, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 3.2541754655404107, | |
| "grad_norm": 0.00520607446673023, | |
| "learning_rate": 6.973886328725039e-06, | |
| "loss": 0.0003, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 3.261854482626224, | |
| "grad_norm": 0.005908417082190133, | |
| "learning_rate": 6.9431643625192015e-06, | |
| "loss": 0.0003, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.269533499712037, | |
| "grad_norm": 0.005104388415579662, | |
| "learning_rate": 6.912442396313365e-06, | |
| "loss": 0.0003, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 3.27721251679785, | |
| "grad_norm": 0.00279843439440394, | |
| "learning_rate": 6.881720430107528e-06, | |
| "loss": 0.0003, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 3.284891533883663, | |
| "grad_norm": 0.004221914284256829, | |
| "learning_rate": 6.8509984639016905e-06, | |
| "loss": 0.0003, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 3.292570550969476, | |
| "grad_norm": 0.002789017236217476, | |
| "learning_rate": 6.820276497695853e-06, | |
| "loss": 0.0003, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 3.300249568055289, | |
| "grad_norm": 0.003591747007618594, | |
| "learning_rate": 6.789554531490016e-06, | |
| "loss": 0.0003, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.3079285851411018, | |
| "grad_norm": 0.0056326237606971275, | |
| "learning_rate": 6.758832565284179e-06, | |
| "loss": 0.0003, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 3.315607602226915, | |
| "grad_norm": 0.0027487580853665806, | |
| "learning_rate": 6.728110599078341e-06, | |
| "loss": 0.0003, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 3.3232866193127277, | |
| "grad_norm": 0.005221571566163444, | |
| "learning_rate": 6.697388632872505e-06, | |
| "loss": 0.0003, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 3.330965636398541, | |
| "grad_norm": 0.005764385454293689, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0003, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 3.338644653484354, | |
| "grad_norm": 0.0019604837190694527, | |
| "learning_rate": 6.63594470046083e-06, | |
| "loss": 0.0003, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.346323670570167, | |
| "grad_norm": 0.002896385918252742, | |
| "learning_rate": 6.605222734254992e-06, | |
| "loss": 0.0003, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 3.35400268765598, | |
| "grad_norm": 0.00606820751300495, | |
| "learning_rate": 6.574500768049156e-06, | |
| "loss": 0.0003, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 3.361681704741793, | |
| "grad_norm": 0.004591568141116001, | |
| "learning_rate": 6.543778801843319e-06, | |
| "loss": 0.0003, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 3.369360721827606, | |
| "grad_norm": 0.002433120666611045, | |
| "learning_rate": 6.513056835637481e-06, | |
| "loss": 0.0003, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 3.3770397389134192, | |
| "grad_norm": 0.00531173620847677, | |
| "learning_rate": 6.4823348694316445e-06, | |
| "loss": 0.0003, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.384718755999232, | |
| "grad_norm": 0.005122482104609612, | |
| "learning_rate": 6.451612903225806e-06, | |
| "loss": 0.0003, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 3.392397773085045, | |
| "grad_norm": 0.004100500912973401, | |
| "learning_rate": 6.42089093701997e-06, | |
| "loss": 0.0003, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 3.400076790170858, | |
| "grad_norm": 0.0023809509840975367, | |
| "learning_rate": 6.390168970814133e-06, | |
| "loss": 0.0003, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 3.407755807256671, | |
| "grad_norm": 0.0026348003137030064, | |
| "learning_rate": 6.359447004608295e-06, | |
| "loss": 0.0003, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 3.4154348243424844, | |
| "grad_norm": 0.0013064532447140981, | |
| "learning_rate": 6.328725038402458e-06, | |
| "loss": 0.0003, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.423113841428297, | |
| "grad_norm": 0.004540940802478115, | |
| "learning_rate": 6.2980030721966216e-06, | |
| "loss": 0.0003, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 3.4307928585141103, | |
| "grad_norm": 0.0026972329703791845, | |
| "learning_rate": 6.267281105990783e-06, | |
| "loss": 0.0003, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 3.438471875599923, | |
| "grad_norm": 0.004636444780767564, | |
| "learning_rate": 6.236559139784947e-06, | |
| "loss": 0.0003, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 3.4461508926857363, | |
| "grad_norm": 0.003960381641830104, | |
| "learning_rate": 6.2058371735791105e-06, | |
| "loss": 0.0003, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 3.4538299097715495, | |
| "grad_norm": 0.00474082033962727, | |
| "learning_rate": 6.175115207373272e-06, | |
| "loss": 0.0003, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.4615089268573622, | |
| "grad_norm": 0.0038782360963232256, | |
| "learning_rate": 6.144393241167436e-06, | |
| "loss": 0.0003, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 3.4691879439431754, | |
| "grad_norm": 0.0035297720880386315, | |
| "learning_rate": 6.113671274961598e-06, | |
| "loss": 0.0003, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 3.476866961028988, | |
| "grad_norm": 0.004232110212456926, | |
| "learning_rate": 6.082949308755761e-06, | |
| "loss": 0.0003, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 3.4845459781148014, | |
| "grad_norm": 0.002775233265931185, | |
| "learning_rate": 6.052227342549923e-06, | |
| "loss": 0.0002, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 3.492224995200614, | |
| "grad_norm": 0.0022484687297900418, | |
| "learning_rate": 6.021505376344087e-06, | |
| "loss": 0.0003, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.4999040122864273, | |
| "grad_norm": 0.010059652937527065, | |
| "learning_rate": 5.9907834101382485e-06, | |
| "loss": 0.0003, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 3.50758302937224, | |
| "grad_norm": 0.005860058288903284, | |
| "learning_rate": 5.960061443932412e-06, | |
| "loss": 0.0003, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 3.5152620464580533, | |
| "grad_norm": 0.001771873531568768, | |
| "learning_rate": 5.929339477726575e-06, | |
| "loss": 0.0003, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 3.5229410635438665, | |
| "grad_norm": 0.0037856677349401477, | |
| "learning_rate": 5.8986175115207375e-06, | |
| "loss": 0.0003, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 3.5306200806296792, | |
| "grad_norm": 0.00262376974978298, | |
| "learning_rate": 5.867895545314901e-06, | |
| "loss": 0.0003, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.5382990977154924, | |
| "grad_norm": 0.003097531567235156, | |
| "learning_rate": 5.837173579109064e-06, | |
| "loss": 0.0003, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 3.545978114801305, | |
| "grad_norm": 0.0019368382797062594, | |
| "learning_rate": 5.806451612903226e-06, | |
| "loss": 0.0003, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 3.5536571318871184, | |
| "grad_norm": 0.004336527548689335, | |
| "learning_rate": 5.775729646697389e-06, | |
| "loss": 0.0003, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 3.5613361489729316, | |
| "grad_norm": 0.001542836625256801, | |
| "learning_rate": 5.745007680491552e-06, | |
| "loss": 0.0003, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 3.5690151660587444, | |
| "grad_norm": 0.006404316601820908, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.0003, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.5766941831445576, | |
| "grad_norm": 0.0029147588687753185, | |
| "learning_rate": 5.683563748079878e-06, | |
| "loss": 0.0003, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 3.5843732002303703, | |
| "grad_norm": 0.009747259780473038, | |
| "learning_rate": 5.65284178187404e-06, | |
| "loss": 0.0003, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 3.5920522173161835, | |
| "grad_norm": 0.005435655230219059, | |
| "learning_rate": 5.6221198156682035e-06, | |
| "loss": 0.0003, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 3.5997312344019967, | |
| "grad_norm": 0.0011809589959859185, | |
| "learning_rate": 5.591397849462365e-06, | |
| "loss": 0.0003, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 3.6074102514878095, | |
| "grad_norm": 0.0043535225911655855, | |
| "learning_rate": 5.560675883256529e-06, | |
| "loss": 0.0003, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.6150892685736227, | |
| "grad_norm": 0.002409686221015935, | |
| "learning_rate": 5.529953917050692e-06, | |
| "loss": 0.0003, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 3.6227682856594354, | |
| "grad_norm": 0.0021895075558719937, | |
| "learning_rate": 5.499231950844854e-06, | |
| "loss": 0.0003, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 3.6304473027452486, | |
| "grad_norm": 0.002325096639061682, | |
| "learning_rate": 5.468509984639018e-06, | |
| "loss": 0.0003, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 3.638126319831062, | |
| "grad_norm": 0.005217362404820908, | |
| "learning_rate": 5.43778801843318e-06, | |
| "loss": 0.0003, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 3.6458053369168746, | |
| "grad_norm": 0.0025128558421436426, | |
| "learning_rate": 5.407066052227343e-06, | |
| "loss": 0.0003, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.653484354002688, | |
| "grad_norm": 0.003260458572828957, | |
| "learning_rate": 5.376344086021506e-06, | |
| "loss": 0.0003, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 3.6611633710885005, | |
| "grad_norm": 0.004920360483939583, | |
| "learning_rate": 5.345622119815669e-06, | |
| "loss": 0.0003, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.6688423881743137, | |
| "grad_norm": 0.006017572517411142, | |
| "learning_rate": 5.314900153609831e-06, | |
| "loss": 0.0003, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 3.676521405260127, | |
| "grad_norm": 0.003921825806007615, | |
| "learning_rate": 5.284178187403994e-06, | |
| "loss": 0.0003, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 3.6842004223459397, | |
| "grad_norm": 0.0026317828696058375, | |
| "learning_rate": 5.253456221198157e-06, | |
| "loss": 0.0003, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.691879439431753, | |
| "grad_norm": 0.0018516494095851901, | |
| "learning_rate": 5.22273425499232e-06, | |
| "loss": 0.0003, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 3.6995584565175657, | |
| "grad_norm": 0.0018334101081003715, | |
| "learning_rate": 5.192012288786483e-06, | |
| "loss": 0.0003, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 3.707237473603379, | |
| "grad_norm": 0.0026102899650512555, | |
| "learning_rate": 5.161290322580646e-06, | |
| "loss": 0.0003, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 3.714916490689192, | |
| "grad_norm": 0.0041980444717698105, | |
| "learning_rate": 5.130568356374809e-06, | |
| "loss": 0.0002, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 3.722595507775005, | |
| "grad_norm": 0.002162407360619667, | |
| "learning_rate": 5.099846390168971e-06, | |
| "loss": 0.0003, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.7302745248608176, | |
| "grad_norm": 0.003927892142640596, | |
| "learning_rate": 5.0691244239631346e-06, | |
| "loss": 0.0003, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 3.7379535419466308, | |
| "grad_norm": 0.006489628564389115, | |
| "learning_rate": 5.038402457757296e-06, | |
| "loss": 0.0003, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 3.745632559032444, | |
| "grad_norm": 0.0021265975651571616, | |
| "learning_rate": 5.00768049155146e-06, | |
| "loss": 0.0003, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 3.7533115761182567, | |
| "grad_norm": 0.008523718567678173, | |
| "learning_rate": 4.976958525345623e-06, | |
| "loss": 0.0002, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 3.76099059320407, | |
| "grad_norm": 0.00219483779562531, | |
| "learning_rate": 4.946236559139785e-06, | |
| "loss": 0.0003, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.7686696102898827, | |
| "grad_norm": 0.0026242522484158557, | |
| "learning_rate": 4.915514592933948e-06, | |
| "loss": 0.0003, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 3.776348627375696, | |
| "grad_norm": 0.0030264706878712897, | |
| "learning_rate": 4.884792626728111e-06, | |
| "loss": 0.0003, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 3.784027644461509, | |
| "grad_norm": 0.006051061980646295, | |
| "learning_rate": 4.8540706605222734e-06, | |
| "loss": 0.0003, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 3.791706661547322, | |
| "grad_norm": 0.0076497369041367514, | |
| "learning_rate": 4.823348694316436e-06, | |
| "loss": 0.0003, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 3.799385678633135, | |
| "grad_norm": 0.003304104700477534, | |
| "learning_rate": 4.7926267281106e-06, | |
| "loss": 0.0003, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.807064695718948, | |
| "grad_norm": 0.001277891356393671, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 0.0003, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 3.814743712804761, | |
| "grad_norm": 0.0016109781447346389, | |
| "learning_rate": 4.731182795698925e-06, | |
| "loss": 0.0002, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.822422729890574, | |
| "grad_norm": 0.0032221247449133765, | |
| "learning_rate": 4.700460829493088e-06, | |
| "loss": 0.0003, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 3.830101746976387, | |
| "grad_norm": 0.0023617321542409296, | |
| "learning_rate": 4.669738863287251e-06, | |
| "loss": 0.0003, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 3.8377807640622, | |
| "grad_norm": 0.0016875889883648163, | |
| "learning_rate": 4.639016897081414e-06, | |
| "loss": 0.0002, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 6510, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.088806659181773e+16, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |