| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 363, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002758620689655172, | |
| "grad_norm": 2.2012167823201163, | |
| "learning_rate": 0.0, | |
| "loss": 0.0852, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005517241379310344, | |
| "grad_norm": 2.684327612345962, | |
| "learning_rate": 5.405405405405406e-07, | |
| "loss": 0.1171, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008275862068965517, | |
| "grad_norm": 2.922848494791373, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 0.1201, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011034482758620689, | |
| "grad_norm": 2.923985908786216, | |
| "learning_rate": 1.6216216216216219e-06, | |
| "loss": 0.0976, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013793103448275862, | |
| "grad_norm": 1.2724240051553533, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.1046, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.016551724137931035, | |
| "grad_norm": 1.673019205535178, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.0609, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.019310344827586208, | |
| "grad_norm": 2.504855019369174, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 0.081, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.022068965517241378, | |
| "grad_norm": 1.878885411504625, | |
| "learning_rate": 3.7837837837837844e-06, | |
| "loss": 0.1121, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02482758620689655, | |
| "grad_norm": 2.653287813889321, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 0.1125, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.027586206896551724, | |
| "grad_norm": 1.7453919996030804, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.0724, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.030344827586206897, | |
| "grad_norm": 3.333638766021421, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.1475, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03310344827586207, | |
| "grad_norm": 2.292734722563975, | |
| "learning_rate": 5.945945945945947e-06, | |
| "loss": 0.0974, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03586206896551724, | |
| "grad_norm": 1.099049620799708, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 0.0597, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.038620689655172416, | |
| "grad_norm": 1.727408246250002, | |
| "learning_rate": 7.027027027027028e-06, | |
| "loss": 0.0778, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.041379310344827586, | |
| "grad_norm": 1.761342077131181, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.0955, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.044137931034482755, | |
| "grad_norm": 1.9277700023635902, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.0869, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04689655172413793, | |
| "grad_norm": 1.568808525211501, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 0.0807, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0496551724137931, | |
| "grad_norm": 1.3523306531543753, | |
| "learning_rate": 9.189189189189191e-06, | |
| "loss": 0.0665, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05241379310344828, | |
| "grad_norm": 1.7180837689557336, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 0.0868, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05517241379310345, | |
| "grad_norm": 1.5131863973051378, | |
| "learning_rate": 1.027027027027027e-05, | |
| "loss": 0.0736, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.057931034482758624, | |
| "grad_norm": 1.6815268723910783, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 0.079, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.060689655172413794, | |
| "grad_norm": 1.4834028913829107, | |
| "learning_rate": 1.1351351351351352e-05, | |
| "loss": 0.0437, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06344827586206897, | |
| "grad_norm": 2.1417510445007792, | |
| "learning_rate": 1.1891891891891894e-05, | |
| "loss": 0.0905, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06620689655172414, | |
| "grad_norm": 1.7871991104284883, | |
| "learning_rate": 1.2432432432432433e-05, | |
| "loss": 0.1024, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 2.0156063129423485, | |
| "learning_rate": 1.2972972972972975e-05, | |
| "loss": 0.0701, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07172413793103448, | |
| "grad_norm": 1.9160557707810737, | |
| "learning_rate": 1.3513513513513515e-05, | |
| "loss": 0.0748, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07448275862068965, | |
| "grad_norm": 2.000911025201981, | |
| "learning_rate": 1.4054054054054055e-05, | |
| "loss": 0.101, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07724137931034483, | |
| "grad_norm": 2.115047269982351, | |
| "learning_rate": 1.4594594594594596e-05, | |
| "loss": 0.0918, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.8098326272573262, | |
| "learning_rate": 1.5135135135135138e-05, | |
| "loss": 0.0915, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08275862068965517, | |
| "grad_norm": 2.1207994498886764, | |
| "learning_rate": 1.5675675675675676e-05, | |
| "loss": 0.1025, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08551724137931034, | |
| "grad_norm": 2.171868532095742, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 0.1059, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08827586206896551, | |
| "grad_norm": 1.6943229112776936, | |
| "learning_rate": 1.6756756756756757e-05, | |
| "loss": 0.0677, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0910344827586207, | |
| "grad_norm": 1.994899794317826, | |
| "learning_rate": 1.72972972972973e-05, | |
| "loss": 0.0894, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09379310344827586, | |
| "grad_norm": 2.3711691862715907, | |
| "learning_rate": 1.783783783783784e-05, | |
| "loss": 0.1158, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09655172413793103, | |
| "grad_norm": 1.9877327316155244, | |
| "learning_rate": 1.8378378378378383e-05, | |
| "loss": 0.0924, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0993103448275862, | |
| "grad_norm": 2.0641113315593898, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 0.0978, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10206896551724139, | |
| "grad_norm": 2.5636633454788593, | |
| "learning_rate": 1.9459459459459463e-05, | |
| "loss": 0.1327, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.10482758620689656, | |
| "grad_norm": 2.3262721461116396, | |
| "learning_rate": 2e-05, | |
| "loss": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10758620689655173, | |
| "grad_norm": 2.0685116596847832, | |
| "learning_rate": 1.9999535665248e-05, | |
| "loss": 0.0706, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1103448275862069, | |
| "grad_norm": 2.5954824427417007, | |
| "learning_rate": 1.999814270411335e-05, | |
| "loss": 0.1205, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11310344827586206, | |
| "grad_norm": 2.144122161715886, | |
| "learning_rate": 1.99958212459561e-05, | |
| "loss": 0.0896, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11586206896551725, | |
| "grad_norm": 1.9322201299634434, | |
| "learning_rate": 1.9992571506362997e-05, | |
| "loss": 0.1012, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11862068965517242, | |
| "grad_norm": 2.075048476162322, | |
| "learning_rate": 1.9988393787127444e-05, | |
| "loss": 0.1087, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.12137931034482759, | |
| "grad_norm": 2.259726318820954, | |
| "learning_rate": 1.9983288476221482e-05, | |
| "loss": 0.1382, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12413793103448276, | |
| "grad_norm": 1.360664005910794, | |
| "learning_rate": 1.9977256047759765e-05, | |
| "loss": 0.0538, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12689655172413794, | |
| "grad_norm": 1.6435792074598727, | |
| "learning_rate": 1.9970297061955533e-05, | |
| "loss": 0.0855, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1296551724137931, | |
| "grad_norm": 2.3601345815092483, | |
| "learning_rate": 1.9962412165068575e-05, | |
| "loss": 0.1386, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.13241379310344828, | |
| "grad_norm": 1.7105484837560168, | |
| "learning_rate": 1.9953602089345215e-05, | |
| "loss": 0.0961, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.13517241379310344, | |
| "grad_norm": 1.6496292161975452, | |
| "learning_rate": 1.9943867652950323e-05, | |
| "loss": 0.0857, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 1.2468755808496865, | |
| "learning_rate": 1.9933209759891318e-05, | |
| "loss": 0.0563, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1406896551724138, | |
| "grad_norm": 2.0793413384987183, | |
| "learning_rate": 1.9921629399934224e-05, | |
| "loss": 0.108, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.14344827586206896, | |
| "grad_norm": 2.0093677538398524, | |
| "learning_rate": 1.9909127648511758e-05, | |
| "loss": 0.1289, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14620689655172414, | |
| "grad_norm": 1.2736371995174656, | |
| "learning_rate": 1.989570566662345e-05, | |
| "loss": 0.0615, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1489655172413793, | |
| "grad_norm": 1.5368399292599437, | |
| "learning_rate": 1.9881364700727827e-05, | |
| "loss": 0.0559, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15172413793103448, | |
| "grad_norm": 1.6143124587473046, | |
| "learning_rate": 1.986610608262665e-05, | |
| "loss": 0.0512, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.15448275862068966, | |
| "grad_norm": 2.0640396297545323, | |
| "learning_rate": 1.9849931229341258e-05, | |
| "loss": 0.1492, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15724137931034482, | |
| "grad_norm": 1.9321341515451917, | |
| "learning_rate": 1.9832841642980948e-05, | |
| "loss": 0.1277, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.5565020190489753, | |
| "learning_rate": 1.981483891060348e-05, | |
| "loss": 0.1266, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.16275862068965516, | |
| "grad_norm": 1.6731150384360727, | |
| "learning_rate": 1.979592470406772e-05, | |
| "loss": 0.095, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.16551724137931034, | |
| "grad_norm": 1.3348447458614452, | |
| "learning_rate": 1.9776100779878344e-05, | |
| "loss": 0.0602, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16827586206896553, | |
| "grad_norm": 1.211440894700164, | |
| "learning_rate": 1.9755368979022734e-05, | |
| "loss": 0.0597, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.17103448275862068, | |
| "grad_norm": 2.045395762464389, | |
| "learning_rate": 1.9733731226800016e-05, | |
| "loss": 0.0902, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.17379310344827587, | |
| "grad_norm": 1.681889406229311, | |
| "learning_rate": 1.9711189532642244e-05, | |
| "loss": 0.0851, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.17655172413793102, | |
| "grad_norm": 2.156485405646681, | |
| "learning_rate": 1.9687745989927823e-05, | |
| "loss": 0.1022, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1793103448275862, | |
| "grad_norm": 2.2694346385487765, | |
| "learning_rate": 1.9663402775787066e-05, | |
| "loss": 0.1402, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1820689655172414, | |
| "grad_norm": 2.2102729735649502, | |
| "learning_rate": 1.9638162150900028e-05, | |
| "loss": 0.0595, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18482758620689654, | |
| "grad_norm": 2.4808367225986023, | |
| "learning_rate": 1.961202645928658e-05, | |
| "loss": 0.1526, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.18758620689655173, | |
| "grad_norm": 2.125991623159161, | |
| "learning_rate": 1.9584998128088686e-05, | |
| "loss": 0.1149, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.19034482758620688, | |
| "grad_norm": 3.5959738930248553, | |
| "learning_rate": 1.955707966734505e-05, | |
| "loss": 0.1724, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.19310344827586207, | |
| "grad_norm": 2.217349082976429, | |
| "learning_rate": 1.9528273669757974e-05, | |
| "loss": 0.1357, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19586206896551725, | |
| "grad_norm": 2.9019793725624288, | |
| "learning_rate": 1.9498582810452607e-05, | |
| "loss": 0.1289, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1986206896551724, | |
| "grad_norm": 2.677822777383495, | |
| "learning_rate": 1.9468009846728515e-05, | |
| "loss": 0.1201, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2013793103448276, | |
| "grad_norm": 2.1904068713477693, | |
| "learning_rate": 1.9436557617803594e-05, | |
| "loss": 0.1343, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.20413793103448277, | |
| "grad_norm": 1.9333752870288448, | |
| "learning_rate": 1.9404229044550432e-05, | |
| "loss": 0.1014, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 1.2594973301567354, | |
| "learning_rate": 1.9371027129225042e-05, | |
| "loss": 0.08, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2096551724137931, | |
| "grad_norm": 1.1588871349219974, | |
| "learning_rate": 1.9336954955188042e-05, | |
| "loss": 0.0664, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.21241379310344827, | |
| "grad_norm": 2.424390700582275, | |
| "learning_rate": 1.9302015686618328e-05, | |
| "loss": 0.1228, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.21517241379310345, | |
| "grad_norm": 1.839851381331024, | |
| "learning_rate": 1.9266212568219223e-05, | |
| "loss": 0.1181, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.21793103448275863, | |
| "grad_norm": 2.9112052255264627, | |
| "learning_rate": 1.9229548924917146e-05, | |
| "loss": 0.1593, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2206896551724138, | |
| "grad_norm": 2.6573066473445124, | |
| "learning_rate": 1.9192028161552848e-05, | |
| "loss": 0.1068, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22344827586206897, | |
| "grad_norm": 2.228711734634549, | |
| "learning_rate": 1.915365376256519e-05, | |
| "loss": 0.1395, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.22620689655172413, | |
| "grad_norm": 2.1487793287842463, | |
| "learning_rate": 1.9114429291667583e-05, | |
| "loss": 0.1322, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2289655172413793, | |
| "grad_norm": 1.418589529134045, | |
| "learning_rate": 1.9074358391517026e-05, | |
| "loss": 0.1298, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2317241379310345, | |
| "grad_norm": 1.6184969490675134, | |
| "learning_rate": 1.9033444783375806e-05, | |
| "loss": 0.1375, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.23448275862068965, | |
| "grad_norm": 2.636414425888468, | |
| "learning_rate": 1.8991692266765947e-05, | |
| "loss": 0.1454, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.23724137931034484, | |
| "grad_norm": 1.5999297559125214, | |
| "learning_rate": 1.8949104719116334e-05, | |
| "loss": 0.1004, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.7047937938863038, | |
| "learning_rate": 1.8905686095402648e-05, | |
| "loss": 0.0729, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.24275862068965517, | |
| "grad_norm": 2.213860086801788, | |
| "learning_rate": 1.886144042778006e-05, | |
| "loss": 0.1219, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.24551724137931036, | |
| "grad_norm": 1.9830213856101813, | |
| "learning_rate": 1.881637182520879e-05, | |
| "loss": 0.1277, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2482758620689655, | |
| "grad_norm": 2.1526995896994543, | |
| "learning_rate": 1.8770484473072518e-05, | |
| "loss": 0.133, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25103448275862067, | |
| "grad_norm": 5.615536299423343, | |
| "learning_rate": 1.87237826327897e-05, | |
| "loss": 0.1271, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2537931034482759, | |
| "grad_norm": 2.9892624763616213, | |
| "learning_rate": 1.8676270641417824e-05, | |
| "loss": 0.1337, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.25655172413793104, | |
| "grad_norm": 1.7670570813048203, | |
| "learning_rate": 1.8627952911250632e-05, | |
| "loss": 0.1188, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2593103448275862, | |
| "grad_norm": 1.8832817735619782, | |
| "learning_rate": 1.857883392940837e-05, | |
| "loss": 0.09, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2620689655172414, | |
| "grad_norm": 2.2995757188920964, | |
| "learning_rate": 1.85289182574211e-05, | |
| "loss": 0.1084, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.26482758620689656, | |
| "grad_norm": 2.268436925129853, | |
| "learning_rate": 1.847821053080505e-05, | |
| "loss": 0.1003, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2675862068965517, | |
| "grad_norm": 1.8417005302085403, | |
| "learning_rate": 1.8426715458632154e-05, | |
| "loss": 0.0747, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.27034482758620687, | |
| "grad_norm": 2.497572736496612, | |
| "learning_rate": 1.8374437823092726e-05, | |
| "loss": 0.1513, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2731034482758621, | |
| "grad_norm": 2.0212629509138766, | |
| "learning_rate": 1.832138247905135e-05, | |
| "loss": 0.088, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 1.806243660764203, | |
| "learning_rate": 1.8267554353596027e-05, | |
| "loss": 0.0617, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2786206896551724, | |
| "grad_norm": 1.43840041921586, | |
| "learning_rate": 1.8212958445580623e-05, | |
| "loss": 0.0809, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2813793103448276, | |
| "grad_norm": 1.5624416473825227, | |
| "learning_rate": 1.815759982516061e-05, | |
| "loss": 0.0987, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.28413793103448276, | |
| "grad_norm": 1.3722413173440258, | |
| "learning_rate": 1.8101483633322255e-05, | |
| "loss": 0.1055, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2868965517241379, | |
| "grad_norm": 1.5055322976041905, | |
| "learning_rate": 1.8044615081405153e-05, | |
| "loss": 0.12, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2896551724137931, | |
| "grad_norm": 1.9238100509571232, | |
| "learning_rate": 1.7986999450618295e-05, | |
| "loss": 0.1293, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2924137931034483, | |
| "grad_norm": 1.4169309751550185, | |
| "learning_rate": 1.7928642091549616e-05, | |
| "loss": 0.0945, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.29517241379310344, | |
| "grad_norm": 1.7127369268478376, | |
| "learning_rate": 1.7869548423669075e-05, | |
| "loss": 0.1124, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2979310344827586, | |
| "grad_norm": 1.859903898855105, | |
| "learning_rate": 1.7809723934825405e-05, | |
| "loss": 0.1182, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3006896551724138, | |
| "grad_norm": 1.4775939025780847, | |
| "learning_rate": 1.7749174180736443e-05, | |
| "loss": 0.0857, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.30344827586206896, | |
| "grad_norm": 1.4354799536836982, | |
| "learning_rate": 1.768790478447319e-05, | |
| "loss": 0.0781, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3062068965517241, | |
| "grad_norm": 1.0273549786544405, | |
| "learning_rate": 1.762592143593764e-05, | |
| "loss": 0.0713, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.30896551724137933, | |
| "grad_norm": 1.9640486386494376, | |
| "learning_rate": 1.756322989133434e-05, | |
| "loss": 0.1301, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3117241379310345, | |
| "grad_norm": 1.7089277562540073, | |
| "learning_rate": 1.749983597263586e-05, | |
| "loss": 0.0681, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.31448275862068964, | |
| "grad_norm": 1.9429869321530793, | |
| "learning_rate": 1.7435745567042096e-05, | |
| "loss": 0.1331, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.31724137931034485, | |
| "grad_norm": 1.26536650090277, | |
| "learning_rate": 1.737096462643357e-05, | |
| "loss": 0.0604, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.087216494160791, | |
| "learning_rate": 1.730549916681868e-05, | |
| "loss": 0.1494, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.32275862068965516, | |
| "grad_norm": 1.9975034441736954, | |
| "learning_rate": 1.723935526777502e-05, | |
| "loss": 0.1092, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3255172413793103, | |
| "grad_norm": 2.038235893575754, | |
| "learning_rate": 1.717253907188477e-05, | |
| "loss": 0.1082, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.32827586206896553, | |
| "grad_norm": 3.3184113391653396, | |
| "learning_rate": 1.7105056784164295e-05, | |
| "loss": 0.1703, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3310344827586207, | |
| "grad_norm": 2.0270058507038966, | |
| "learning_rate": 1.7036914671487854e-05, | |
| "loss": 0.1303, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33379310344827584, | |
| "grad_norm": 1.8154593381383104, | |
| "learning_rate": 1.6968119062005644e-05, | |
| "loss": 0.1077, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.33655172413793105, | |
| "grad_norm": 1.2350092975243878, | |
| "learning_rate": 1.689867634455612e-05, | |
| "loss": 0.0924, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3393103448275862, | |
| "grad_norm": 1.792900391384705, | |
| "learning_rate": 1.682859296807268e-05, | |
| "loss": 0.1221, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.34206896551724136, | |
| "grad_norm": 1.9404357508554042, | |
| "learning_rate": 1.675787544098477e-05, | |
| "loss": 0.1106, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 1.7814508590384097, | |
| "learning_rate": 1.6686530330613472e-05, | |
| "loss": 0.1122, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.34758620689655173, | |
| "grad_norm": 1.53815702992395, | |
| "learning_rate": 1.661456426256161e-05, | |
| "loss": 0.087, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3503448275862069, | |
| "grad_norm": 1.9077349161199095, | |
| "learning_rate": 1.6541983920098462e-05, | |
| "loss": 0.087, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.35310344827586204, | |
| "grad_norm": 2.2996250559408704, | |
| "learning_rate": 1.6468796043539082e-05, | |
| "loss": 0.1254, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.35586206896551725, | |
| "grad_norm": 1.2031295382564096, | |
| "learning_rate": 1.639500742961838e-05, | |
| "loss": 0.0498, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3586206896551724, | |
| "grad_norm": 1.3436281870681093, | |
| "learning_rate": 1.6320624930859905e-05, | |
| "loss": 0.077, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36137931034482756, | |
| "grad_norm": 0.6314939772663396, | |
| "learning_rate": 1.6245655454939474e-05, | |
| "loss": 0.0352, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3641379310344828, | |
| "grad_norm": 2.420910473317855, | |
| "learning_rate": 1.6170105964043698e-05, | |
| "loss": 0.1228, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.36689655172413793, | |
| "grad_norm": 2.80050435155021, | |
| "learning_rate": 1.6093983474223392e-05, | |
| "loss": 0.1799, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3696551724137931, | |
| "grad_norm": 1.9733179937634817, | |
| "learning_rate": 1.6017295054742045e-05, | |
| "loss": 0.1088, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3724137931034483, | |
| "grad_norm": 0.8866446082715613, | |
| "learning_rate": 1.5940047827419305e-05, | |
| "loss": 0.0722, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.37517241379310345, | |
| "grad_norm": 1.4747083716633578, | |
| "learning_rate": 1.5862248965969604e-05, | |
| "loss": 0.0902, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3779310344827586, | |
| "grad_norm": 1.203872871772734, | |
| "learning_rate": 1.5783905695335947e-05, | |
| "loss": 0.0763, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.38068965517241377, | |
| "grad_norm": 1.295759494638433, | |
| "learning_rate": 1.570502529101896e-05, | |
| "loss": 0.0582, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.383448275862069, | |
| "grad_norm": 2.082719193654199, | |
| "learning_rate": 1.5625615078401244e-05, | |
| "loss": 0.1131, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.38620689655172413, | |
| "grad_norm": 2.4303248947365046, | |
| "learning_rate": 1.5545682432067068e-05, | |
| "loss": 0.1319, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3889655172413793, | |
| "grad_norm": 2.308746027421277, | |
| "learning_rate": 1.5465234775117538e-05, | |
| "loss": 0.1232, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3917241379310345, | |
| "grad_norm": 2.9364629760972907, | |
| "learning_rate": 1.5384279578481223e-05, | |
| "loss": 0.1537, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.39448275862068966, | |
| "grad_norm": 1.8350345374035169, | |
| "learning_rate": 1.5302824360220352e-05, | |
| "loss": 0.1161, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3972413793103448, | |
| "grad_norm": 1.8562459888809821, | |
| "learning_rate": 1.522087668483264e-05, | |
| "loss": 0.1054, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.2050577332052317, | |
| "learning_rate": 1.5138444162548791e-05, | |
| "loss": 0.072, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.4027586206896552, | |
| "grad_norm": 2.576862971551694, | |
| "learning_rate": 1.5055534448625766e-05, | |
| "loss": 0.1956, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.40551724137931033, | |
| "grad_norm": 2.526412596716041, | |
| "learning_rate": 1.4972155242635853e-05, | |
| "loss": 0.1163, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.40827586206896554, | |
| "grad_norm": 2.4122326865926325, | |
| "learning_rate": 1.488831428775164e-05, | |
| "loss": 0.1588, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4110344827586207, | |
| "grad_norm": 1.5900334279122388, | |
| "learning_rate": 1.4804019370026927e-05, | |
| "loss": 0.0874, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 2.2583849148863284, | |
| "learning_rate": 1.4719278317673655e-05, | |
| "loss": 0.1225, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.416551724137931, | |
| "grad_norm": 1.5043786808378643, | |
| "learning_rate": 1.4634099000334932e-05, | |
| "loss": 0.0628, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4193103448275862, | |
| "grad_norm": 2.6867593671378707, | |
| "learning_rate": 1.4548489328354197e-05, | |
| "loss": 0.1119, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4220689655172414, | |
| "grad_norm": 1.892183158003583, | |
| "learning_rate": 1.4462457252040606e-05, | |
| "loss": 0.1371, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.42482758620689653, | |
| "grad_norm": 0.7376897959603039, | |
| "learning_rate": 1.437601076093073e-05, | |
| "loss": 0.0715, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.42758620689655175, | |
| "grad_norm": 1.2313754414547655, | |
| "learning_rate": 1.4289157883046567e-05, | |
| "loss": 0.0671, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4303448275862069, | |
| "grad_norm": 2.443959323743539, | |
| "learning_rate": 1.420190668415002e-05, | |
| "loss": 0.0937, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.43310344827586206, | |
| "grad_norm": 1.8279711072151712, | |
| "learning_rate": 1.4114265266993847e-05, | |
| "loss": 0.1114, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.43586206896551727, | |
| "grad_norm": 2.722814716999782, | |
| "learning_rate": 1.4026241770569198e-05, | |
| "loss": 0.1388, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4386206896551724, | |
| "grad_norm": 2.522323402694141, | |
| "learning_rate": 1.3937844369349736e-05, | |
| "loss": 0.0862, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4413793103448276, | |
| "grad_norm": 1.0652613518383625, | |
| "learning_rate": 1.3849081272532545e-05, | |
| "loss": 0.0752, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44413793103448274, | |
| "grad_norm": 2.6542750874407335, | |
| "learning_rate": 1.375996072327573e-05, | |
| "loss": 0.1456, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.44689655172413795, | |
| "grad_norm": 1.8544625800539327, | |
| "learning_rate": 1.3670490997932922e-05, | |
| "loss": 0.109, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4496551724137931, | |
| "grad_norm": 1.9735546120483556, | |
| "learning_rate": 1.3580680405284666e-05, | |
| "loss": 0.1144, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.45241379310344826, | |
| "grad_norm": 1.2177687301571833, | |
| "learning_rate": 1.3490537285766809e-05, | |
| "loss": 0.0838, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.45517241379310347, | |
| "grad_norm": 2.866049013585789, | |
| "learning_rate": 1.3400070010695966e-05, | |
| "loss": 0.1309, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4579310344827586, | |
| "grad_norm": 1.7714947880406966, | |
| "learning_rate": 1.3309286981492084e-05, | |
| "loss": 0.1336, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4606896551724138, | |
| "grad_norm": 2.3528937198669966, | |
| "learning_rate": 1.3218196628898232e-05, | |
| "loss": 0.0878, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.463448275862069, | |
| "grad_norm": 1.32763797639713, | |
| "learning_rate": 1.3126807412197666e-05, | |
| "loss": 0.0866, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.46620689655172415, | |
| "grad_norm": 1.0489060017302254, | |
| "learning_rate": 1.3035127818428239e-05, | |
| "loss": 0.0863, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4689655172413793, | |
| "grad_norm": 1.4769228384530726, | |
| "learning_rate": 1.2943166361594242e-05, | |
| "loss": 0.1393, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.47172413793103446, | |
| "grad_norm": 1.9367121483246261, | |
| "learning_rate": 1.2850931581875723e-05, | |
| "loss": 0.1762, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.47448275862068967, | |
| "grad_norm": 1.5440366202640854, | |
| "learning_rate": 1.275843204483539e-05, | |
| "loss": 0.0599, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4772413793103448, | |
| "grad_norm": 1.691410284752133, | |
| "learning_rate": 1.2665676340623172e-05, | |
| "loss": 0.0938, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.2010801724859, | |
| "learning_rate": 1.2572673083178448e-05, | |
| "loss": 0.098, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 2.223070626201405, | |
| "learning_rate": 1.2479430909430109e-05, | |
| "loss": 0.0936, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.48551724137931035, | |
| "grad_norm": 2.1176352077764107, | |
| "learning_rate": 1.2385958478494487e-05, | |
| "loss": 0.1279, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4882758620689655, | |
| "grad_norm": 1.3628256498935367, | |
| "learning_rate": 1.2292264470871183e-05, | |
| "loss": 0.0783, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4910344827586207, | |
| "grad_norm": 1.064266823164612, | |
| "learning_rate": 1.2198357587636958e-05, | |
| "loss": 0.0525, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.49379310344827587, | |
| "grad_norm": 2.00945904980985, | |
| "learning_rate": 1.2104246549637683e-05, | |
| "loss": 0.0978, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.496551724137931, | |
| "grad_norm": 1.3141391526330848, | |
| "learning_rate": 1.2009940096678451e-05, | |
| "loss": 0.0963, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4993103448275862, | |
| "grad_norm": 1.6531005740449383, | |
| "learning_rate": 1.1915446986711953e-05, | |
| "loss": 0.1092, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5020689655172413, | |
| "grad_norm": 1.4945974838001637, | |
| "learning_rate": 1.1820775995025147e-05, | |
| "loss": 0.0667, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5048275862068966, | |
| "grad_norm": 1.3762608443334923, | |
| "learning_rate": 1.172593591342432e-05, | |
| "loss": 0.0882, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5075862068965518, | |
| "grad_norm": 1.5415877422060749, | |
| "learning_rate": 1.1630935549418627e-05, | |
| "loss": 0.0865, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5103448275862069, | |
| "grad_norm": 2.0042018778537622, | |
| "learning_rate": 1.1535783725402163e-05, | |
| "loss": 0.1353, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5131034482758621, | |
| "grad_norm": 2.6818407651740266, | |
| "learning_rate": 1.1440489277834645e-05, | |
| "loss": 0.1393, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5158620689655172, | |
| "grad_norm": 2.3807831512162374, | |
| "learning_rate": 1.134506105642081e-05, | |
| "loss": 0.1169, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5186206896551724, | |
| "grad_norm": 2.202610904326787, | |
| "learning_rate": 1.1249507923288563e-05, | |
| "loss": 0.1241, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5213793103448275, | |
| "grad_norm": 1.8110167173727552, | |
| "learning_rate": 1.115383875216598e-05, | |
| "loss": 0.0945, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5241379310344828, | |
| "grad_norm": 1.2395129394737805, | |
| "learning_rate": 1.105806242755723e-05, | |
| "loss": 0.0791, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.526896551724138, | |
| "grad_norm": 2.269164321603063, | |
| "learning_rate": 1.0962187843917498e-05, | |
| "loss": 0.0995, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5296551724137931, | |
| "grad_norm": 2.1091615955126217, | |
| "learning_rate": 1.0866223904826992e-05, | |
| "loss": 0.0978, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5324137931034483, | |
| "grad_norm": 1.1661181979597077, | |
| "learning_rate": 1.0770179522164079e-05, | |
| "loss": 0.0654, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5351724137931034, | |
| "grad_norm": 1.529220319087916, | |
| "learning_rate": 1.0674063615277681e-05, | |
| "loss": 0.0795, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5379310344827586, | |
| "grad_norm": 1.61462843303553, | |
| "learning_rate": 1.0577885110158959e-05, | |
| "loss": 0.0722, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5406896551724137, | |
| "grad_norm": 1.0229094996985268, | |
| "learning_rate": 1.0481652938612374e-05, | |
| "loss": 0.1059, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.543448275862069, | |
| "grad_norm": 1.8905308810375994, | |
| "learning_rate": 1.0385376037426227e-05, | |
| "loss": 0.1007, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5462068965517242, | |
| "grad_norm": 1.4659160591839386, | |
| "learning_rate": 1.0289063347542727e-05, | |
| "loss": 0.0997, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5489655172413793, | |
| "grad_norm": 3.3227958982974637, | |
| "learning_rate": 1.0192723813227672e-05, | |
| "loss": 0.1803, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 1.963767479554977, | |
| "learning_rate": 1.0096366381239808e-05, | |
| "loss": 0.1275, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5544827586206896, | |
| "grad_norm": 1.6295769776645304, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0784, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5572413793103448, | |
| "grad_norm": 2.440021613726331, | |
| "learning_rate": 9.903633618760195e-06, | |
| "loss": 0.0884, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.4242019780978539, | |
| "learning_rate": 9.807276186772335e-06, | |
| "loss": 0.0852, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5627586206896552, | |
| "grad_norm": 0.9963914089293635, | |
| "learning_rate": 9.710936652457276e-06, | |
| "loss": 0.0528, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5655172413793104, | |
| "grad_norm": 2.0171601668683863, | |
| "learning_rate": 9.614623962573776e-06, | |
| "loss": 0.1213, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5682758620689655, | |
| "grad_norm": 1.8302600165169711, | |
| "learning_rate": 9.518347061387629e-06, | |
| "loss": 0.0813, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5710344827586207, | |
| "grad_norm": 2.135574797850369, | |
| "learning_rate": 9.422114889841045e-06, | |
| "loss": 0.1114, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5737931034482758, | |
| "grad_norm": 1.856147430619363, | |
| "learning_rate": 9.325936384722322e-06, | |
| "loss": 0.1072, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.576551724137931, | |
| "grad_norm": 1.3734175489711284, | |
| "learning_rate": 9.229820477835926e-06, | |
| "loss": 0.0629, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5793103448275863, | |
| "grad_norm": 1.4955582653472665, | |
| "learning_rate": 9.133776095173015e-06, | |
| "loss": 0.0798, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5820689655172414, | |
| "grad_norm": 2.2098641671152075, | |
| "learning_rate": 9.037812156082503e-06, | |
| "loss": 0.0832, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5848275862068966, | |
| "grad_norm": 2.1234204791453446, | |
| "learning_rate": 8.941937572442773e-06, | |
| "loss": 0.0912, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5875862068965517, | |
| "grad_norm": 1.7048969824409477, | |
| "learning_rate": 8.846161247834024e-06, | |
| "loss": 0.0757, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5903448275862069, | |
| "grad_norm": 1.2386625705533991, | |
| "learning_rate": 8.750492076711439e-06, | |
| "loss": 0.0607, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.593103448275862, | |
| "grad_norm": 2.4577783979073518, | |
| "learning_rate": 8.654938943579194e-06, | |
| "loss": 0.1315, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5958620689655172, | |
| "grad_norm": 1.7822551401830198, | |
| "learning_rate": 8.55951072216536e-06, | |
| "loss": 0.1015, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5986206896551725, | |
| "grad_norm": 1.627034418887004, | |
| "learning_rate": 8.464216274597839e-06, | |
| "loss": 0.083, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6013793103448276, | |
| "grad_norm": 2.4236805021780907, | |
| "learning_rate": 8.369064450581374e-06, | |
| "loss": 0.144, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6041379310344828, | |
| "grad_norm": 2.0852606023844547, | |
| "learning_rate": 8.274064086575682e-06, | |
| "loss": 0.1187, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6068965517241379, | |
| "grad_norm": 1.554124619394613, | |
| "learning_rate": 8.179224004974857e-06, | |
| "loss": 0.101, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6096551724137931, | |
| "grad_norm": 1.0086858963161975, | |
| "learning_rate": 8.084553013288048e-06, | |
| "loss": 0.0562, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6124137931034482, | |
| "grad_norm": 1.744957411692858, | |
| "learning_rate": 7.990059903321554e-06, | |
| "loss": 0.1144, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6151724137931035, | |
| "grad_norm": 1.115741854823727, | |
| "learning_rate": 7.89575345036232e-06, | |
| "loss": 0.0431, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6179310344827587, | |
| "grad_norm": 2.4048467960954523, | |
| "learning_rate": 7.801642412363042e-06, | |
| "loss": 0.1239, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 1.7045994682708523, | |
| "learning_rate": 7.707735529128819e-06, | |
| "loss": 0.0861, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.623448275862069, | |
| "grad_norm": 1.5844433019812807, | |
| "learning_rate": 7.614041521505517e-06, | |
| "loss": 0.0939, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6262068965517241, | |
| "grad_norm": 1.817274462365455, | |
| "learning_rate": 7.520569090569894e-06, | |
| "loss": 0.0961, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6289655172413793, | |
| "grad_norm": 2.169489761841419, | |
| "learning_rate": 7.427326916821557e-06, | |
| "loss": 0.1141, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6317241379310344, | |
| "grad_norm": 1.2611061121212517, | |
| "learning_rate": 7.3343236593768295e-06, | |
| "loss": 0.0631, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6344827586206897, | |
| "grad_norm": 1.4981316942094398, | |
| "learning_rate": 7.24156795516461e-06, | |
| "loss": 0.0607, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6372413793103449, | |
| "grad_norm": 1.889688470210782, | |
| "learning_rate": 7.149068418124281e-06, | |
| "loss": 0.1243, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.6091621479461797, | |
| "learning_rate": 7.056833638405762e-06, | |
| "loss": 0.1154, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6427586206896552, | |
| "grad_norm": 1.7584988460897566, | |
| "learning_rate": 6.964872181571765e-06, | |
| "loss": 0.0782, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6455172413793103, | |
| "grad_norm": 1.9608264553670172, | |
| "learning_rate": 6.87319258780234e-06, | |
| "loss": 0.1088, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6482758620689655, | |
| "grad_norm": 1.1583598512074385, | |
| "learning_rate": 6.781803371101774e-06, | |
| "loss": 0.0635, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6510344827586206, | |
| "grad_norm": 1.5744270190667782, | |
| "learning_rate": 6.690713018507917e-06, | |
| "loss": 0.065, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6537931034482759, | |
| "grad_norm": 2.0489496115781147, | |
| "learning_rate": 6.599929989304034e-06, | |
| "loss": 0.0941, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6565517241379311, | |
| "grad_norm": 1.3833240601648478, | |
| "learning_rate": 6.509462714233194e-06, | |
| "loss": 0.1049, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6593103448275862, | |
| "grad_norm": 1.105761243006651, | |
| "learning_rate": 6.419319594715338e-06, | |
| "loss": 0.0795, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6620689655172414, | |
| "grad_norm": 2.2243011538195323, | |
| "learning_rate": 6.32950900206708e-06, | |
| "loss": 0.1389, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6648275862068965, | |
| "grad_norm": 1.6204675243320001, | |
| "learning_rate": 6.240039276724273e-06, | |
| "loss": 0.0904, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6675862068965517, | |
| "grad_norm": 1.7416445698806022, | |
| "learning_rate": 6.150918727467455e-06, | |
| "loss": 0.0988, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.670344827586207, | |
| "grad_norm": 1.5865476897173794, | |
| "learning_rate": 6.062155630650265e-06, | |
| "loss": 0.0887, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6731034482758621, | |
| "grad_norm": 1.0908181878235528, | |
| "learning_rate": 5.973758229430806e-06, | |
| "loss": 0.0406, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6758620689655173, | |
| "grad_norm": 1.2409267373149415, | |
| "learning_rate": 5.8857347330061545e-06, | |
| "loss": 0.0613, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6786206896551724, | |
| "grad_norm": 1.5366160191792417, | |
| "learning_rate": 5.798093315849984e-06, | |
| "loss": 0.0621, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6813793103448276, | |
| "grad_norm": 2.401971583677582, | |
| "learning_rate": 5.7108421169534376e-06, | |
| "loss": 0.135, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6841379310344827, | |
| "grad_norm": 1.7382959732076737, | |
| "learning_rate": 5.623989239069275e-06, | |
| "loss": 0.0912, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6868965517241379, | |
| "grad_norm": 1.1164138593496515, | |
| "learning_rate": 5.5375427479593945e-06, | |
| "loss": 0.0481, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 1.9207044725285578, | |
| "learning_rate": 5.451510671645806e-06, | |
| "loss": 0.0915, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6924137931034483, | |
| "grad_norm": 1.4554330468757273, | |
| "learning_rate": 5.3659009996650704e-06, | |
| "loss": 0.0932, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6951724137931035, | |
| "grad_norm": 1.5637751353358034, | |
| "learning_rate": 5.280721682326349e-06, | |
| "loss": 0.1258, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6979310344827586, | |
| "grad_norm": 1.6864022667103549, | |
| "learning_rate": 5.195980629973077e-06, | |
| "loss": 0.136, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7006896551724138, | |
| "grad_norm": 1.5424417657042435, | |
| "learning_rate": 5.111685712248364e-06, | |
| "loss": 0.0744, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7034482758620689, | |
| "grad_norm": 2.4152541945255965, | |
| "learning_rate": 5.02784475736415e-06, | |
| "loss": 0.1436, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.7062068965517241, | |
| "grad_norm": 1.8800510082119968, | |
| "learning_rate": 4.944465551374238e-06, | |
| "loss": 0.0864, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7089655172413794, | |
| "grad_norm": 1.6391965299287088, | |
| "learning_rate": 4.861555837451213e-06, | |
| "loss": 0.0931, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7117241379310345, | |
| "grad_norm": 2.1164230178199688, | |
| "learning_rate": 4.779123315167362e-06, | |
| "loss": 0.1589, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7144827586206897, | |
| "grad_norm": 2.1736287568680615, | |
| "learning_rate": 4.6971756397796506e-06, | |
| "loss": 0.075, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7172413793103448, | |
| "grad_norm": 0.981201558647046, | |
| "learning_rate": 4.61572042151878e-06, | |
| "loss": 0.0423, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2068505093305162, | |
| "learning_rate": 4.534765224882463e-06, | |
| "loss": 0.0538, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7227586206896551, | |
| "grad_norm": 0.9541230040360105, | |
| "learning_rate": 4.4543175679329345e-06, | |
| "loss": 0.0591, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7255172413793104, | |
| "grad_norm": 1.7162320823485342, | |
| "learning_rate": 4.37438492159876e-06, | |
| "loss": 0.0965, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7282758620689656, | |
| "grad_norm": 1.184768855122373, | |
| "learning_rate": 4.294974708981041e-06, | |
| "loss": 0.0383, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7310344827586207, | |
| "grad_norm": 1.5098290827125584, | |
| "learning_rate": 4.216094304664056e-06, | |
| "loss": 0.0952, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7337931034482759, | |
| "grad_norm": 1.3641276614652302, | |
| "learning_rate": 4.1377510340304e-06, | |
| "loss": 0.0687, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.736551724137931, | |
| "grad_norm": 1.9886328182742048, | |
| "learning_rate": 4.059952172580694e-06, | |
| "loss": 0.1397, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7393103448275862, | |
| "grad_norm": 2.0623334755221494, | |
| "learning_rate": 3.982704945257957e-06, | |
| "loss": 0.1271, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7420689655172413, | |
| "grad_norm": 2.027142606530138, | |
| "learning_rate": 3.9060165257766116e-06, | |
| "loss": 0.0736, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7448275862068966, | |
| "grad_norm": 1.8884432574197467, | |
| "learning_rate": 3.829894035956306e-06, | |
| "loss": 0.1243, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7475862068965518, | |
| "grad_norm": 1.5259208901463874, | |
| "learning_rate": 3.754344545060529e-06, | |
| "loss": 0.0971, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7503448275862069, | |
| "grad_norm": 1.454648528529707, | |
| "learning_rate": 3.6793750691400996e-06, | |
| "loss": 0.1226, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7531034482758621, | |
| "grad_norm": 1.1852288533881499, | |
| "learning_rate": 3.604992570381621e-06, | |
| "loss": 0.0573, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7558620689655172, | |
| "grad_norm": 0.9888790128432269, | |
| "learning_rate": 3.5312039564609203e-06, | |
| "loss": 0.0357, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 1.9227683916123242, | |
| "learning_rate": 3.458016079901544e-06, | |
| "loss": 0.0841, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7613793103448275, | |
| "grad_norm": 1.6925865088399465, | |
| "learning_rate": 3.3854357374383905e-06, | |
| "loss": 0.0749, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7641379310344828, | |
| "grad_norm": 2.482457504595889, | |
| "learning_rate": 3.313469669386532e-06, | |
| "loss": 0.1234, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.766896551724138, | |
| "grad_norm": 1.2830007481095762, | |
| "learning_rate": 3.242124559015234e-06, | |
| "loss": 0.0752, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7696551724137931, | |
| "grad_norm": 2.0768576257082114, | |
| "learning_rate": 3.171407031927325e-06, | |
| "loss": 0.0991, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7724137931034483, | |
| "grad_norm": 1.8987466668093202, | |
| "learning_rate": 3.101323655443882e-06, | |
| "loss": 0.109, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7751724137931034, | |
| "grad_norm": 1.6001709673447158, | |
| "learning_rate": 3.0318809379943594e-06, | |
| "loss": 0.0599, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7779310344827586, | |
| "grad_norm": 1.309542383665605, | |
| "learning_rate": 2.9630853285121506e-06, | |
| "loss": 0.0641, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7806896551724138, | |
| "grad_norm": 1.5565630583605414, | |
| "learning_rate": 2.8949432158357083e-06, | |
| "loss": 0.0909, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.783448275862069, | |
| "grad_norm": 1.2776706094735155, | |
| "learning_rate": 2.8274609281152322e-06, | |
| "loss": 0.1223, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7862068965517242, | |
| "grad_norm": 1.6300368148462467, | |
| "learning_rate": 2.7606447322249876e-06, | |
| "loss": 0.087, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7889655172413793, | |
| "grad_norm": 1.5096693670735182, | |
| "learning_rate": 2.694500833181323e-06, | |
| "loss": 0.1002, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7917241379310345, | |
| "grad_norm": 1.719399247233759, | |
| "learning_rate": 2.629035373566433e-06, | |
| "loss": 0.0915, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7944827586206896, | |
| "grad_norm": 1.1425139227274304, | |
| "learning_rate": 2.5642544329579088e-06, | |
| "loss": 0.0555, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7972413793103448, | |
| "grad_norm": 1.2082856945935607, | |
| "learning_rate": 2.500164027364147e-06, | |
| "loss": 0.0555, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.028172728126609, | |
| "learning_rate": 2.4367701086656625e-06, | |
| "loss": 0.104, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8027586206896552, | |
| "grad_norm": 1.498164820422529, | |
| "learning_rate": 2.374078564062364e-06, | |
| "loss": 0.0936, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8055172413793104, | |
| "grad_norm": 1.3254283577777912, | |
| "learning_rate": 2.312095215526814e-06, | |
| "loss": 0.0898, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8082758620689655, | |
| "grad_norm": 1.3758428200048072, | |
| "learning_rate": 2.2508258192635614e-06, | |
| "loss": 0.073, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8110344827586207, | |
| "grad_norm": 1.2825112587820704, | |
| "learning_rate": 2.190276065174596e-06, | |
| "loss": 0.0675, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8137931034482758, | |
| "grad_norm": 1.704844614821693, | |
| "learning_rate": 2.130451576330925e-06, | |
| "loss": 0.0871, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8165517241379311, | |
| "grad_norm": 1.8236093781558738, | |
| "learning_rate": 2.0713579084503877e-06, | |
| "loss": 0.075, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8193103448275862, | |
| "grad_norm": 1.7159210186184939, | |
| "learning_rate": 2.0130005493817063e-06, | |
| "loss": 0.0726, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8220689655172414, | |
| "grad_norm": 1.5402879375212146, | |
| "learning_rate": 1.9553849185948514e-06, | |
| "loss": 0.0585, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8248275862068966, | |
| "grad_norm": 2.088587364963122, | |
| "learning_rate": 1.8985163666777473e-06, | |
| "loss": 0.1192, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 1.385102288521542, | |
| "learning_rate": 1.8424001748393905e-06, | |
| "loss": 0.0639, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8303448275862069, | |
| "grad_norm": 1.199103692035072, | |
| "learning_rate": 1.7870415544193808e-06, | |
| "loss": 0.0644, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.833103448275862, | |
| "grad_norm": 1.576550399440244, | |
| "learning_rate": 1.7324456464039751e-06, | |
| "loss": 0.0806, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8358620689655173, | |
| "grad_norm": 1.9033469567963237, | |
| "learning_rate": 1.6786175209486565e-06, | |
| "loss": 0.1166, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8386206896551724, | |
| "grad_norm": 1.5481679752062283, | |
| "learning_rate": 1.6255621769072805e-06, | |
| "loss": 0.0883, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8413793103448276, | |
| "grad_norm": 1.7374538978001977, | |
| "learning_rate": 1.5732845413678477e-06, | |
| "loss": 0.105, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8441379310344828, | |
| "grad_norm": 1.3465892642582866, | |
| "learning_rate": 1.521789469194952e-06, | |
| "loss": 0.0618, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8468965517241379, | |
| "grad_norm": 2.312489005340463, | |
| "learning_rate": 1.4710817425789015e-06, | |
| "loss": 0.0992, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8496551724137931, | |
| "grad_norm": 1.1318530850342379, | |
| "learning_rate": 1.4211660705916286e-06, | |
| "loss": 0.0458, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8524137931034482, | |
| "grad_norm": 1.5063935617388766, | |
| "learning_rate": 1.372047088749372e-06, | |
| "loss": 0.0516, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8551724137931035, | |
| "grad_norm": 1.4001415457936668, | |
| "learning_rate": 1.3237293585821786e-06, | |
| "loss": 0.1156, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8579310344827586, | |
| "grad_norm": 2.487040806365276, | |
| "learning_rate": 1.2762173672102996e-06, | |
| "loss": 0.1332, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8606896551724138, | |
| "grad_norm": 1.514447174356807, | |
| "learning_rate": 1.2295155269274827e-06, | |
| "loss": 0.0735, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.863448275862069, | |
| "grad_norm": 1.9664878278885487, | |
| "learning_rate": 1.1836281747912125e-06, | |
| "loss": 0.0825, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8662068965517241, | |
| "grad_norm": 1.7248118984472842, | |
| "learning_rate": 1.1385595722199438e-06, | |
| "loss": 0.0945, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8689655172413793, | |
| "grad_norm": 1.232176840002336, | |
| "learning_rate": 1.094313904597355e-06, | |
| "loss": 0.0761, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8717241379310345, | |
| "grad_norm": 2.3846939660082636, | |
| "learning_rate": 1.0508952808836682e-06, | |
| "loss": 0.1104, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8744827586206897, | |
| "grad_norm": 1.5107849502345858, | |
| "learning_rate": 1.0083077332340563e-06, | |
| "loss": 0.137, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8772413793103448, | |
| "grad_norm": 2.5218758909483077, | |
| "learning_rate": 9.665552166241965e-07, | |
| "loss": 0.1395, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.913603235284768, | |
| "learning_rate": 9.256416084829778e-07, | |
| "loss": 0.1331, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8827586206896552, | |
| "grad_norm": 2.4049905043471806, | |
| "learning_rate": 8.855707083324183e-07, | |
| "loss": 0.1077, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8855172413793103, | |
| "grad_norm": 1.108589495662786, | |
| "learning_rate": 8.46346237434813e-07, | |
| "loss": 0.0638, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8882758620689655, | |
| "grad_norm": 1.222025656507595, | |
| "learning_rate": 8.079718384471557e-07, | |
| "loss": 0.0515, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8910344827586207, | |
| "grad_norm": 1.6319377780473996, | |
| "learning_rate": 7.704510750828542e-07, | |
| "loss": 0.0623, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8937931034482759, | |
| "grad_norm": 1.402983153361783, | |
| "learning_rate": 7.337874317807803e-07, | |
| "loss": 0.0516, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 1.4500270750731776, | |
| "learning_rate": 6.979843133816744e-07, | |
| "loss": 0.0612, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8993103448275862, | |
| "grad_norm": 1.095211249638046, | |
| "learning_rate": 6.630450448119618e-07, | |
| "loss": 0.0333, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9020689655172414, | |
| "grad_norm": 1.8925065532997027, | |
| "learning_rate": 6.289728707749609e-07, | |
| "loss": 0.0953, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9048275862068965, | |
| "grad_norm": 1.747624078137272, | |
| "learning_rate": 5.957709554495683e-07, | |
| "loss": 0.0893, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9075862068965517, | |
| "grad_norm": 0.9113610050884592, | |
| "learning_rate": 5.634423821964074e-07, | |
| "loss": 0.0423, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9103448275862069, | |
| "grad_norm": 1.6463257868897792, | |
| "learning_rate": 5.319901532714877e-07, | |
| "loss": 0.1055, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9131034482758621, | |
| "grad_norm": 1.84950657136513, | |
| "learning_rate": 5.014171895473929e-07, | |
| "loss": 0.1232, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9158620689655173, | |
| "grad_norm": 1.5561236996647523, | |
| "learning_rate": 4.717263302420283e-07, | |
| "loss": 0.076, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9186206896551724, | |
| "grad_norm": 1.2285714349711996, | |
| "learning_rate": 4.429203326549525e-07, | |
| "loss": 0.0835, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9213793103448276, | |
| "grad_norm": 2.8092718918728288, | |
| "learning_rate": 4.150018719113147e-07, | |
| "loss": 0.0734, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9241379310344827, | |
| "grad_norm": 1.766273016183224, | |
| "learning_rate": 3.8797354071342443e-07, | |
| "loss": 0.0768, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.926896551724138, | |
| "grad_norm": 2.1729531887535285, | |
| "learning_rate": 3.618378490999719e-07, | |
| "loss": 0.1027, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9296551724137931, | |
| "grad_norm": 1.7482937409483954, | |
| "learning_rate": 3.365972242129378e-07, | |
| "loss": 0.1176, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9324137931034483, | |
| "grad_norm": 1.2860014321281648, | |
| "learning_rate": 3.122540100721794e-07, | |
| "loss": 0.068, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9351724137931035, | |
| "grad_norm": 1.8682982161376196, | |
| "learning_rate": 2.888104673577574e-07, | |
| "loss": 0.0826, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9379310344827586, | |
| "grad_norm": 2.1192579972688548, | |
| "learning_rate": 2.66268773199988e-07, | |
| "loss": 0.1094, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9406896551724138, | |
| "grad_norm": 1.812620322889224, | |
| "learning_rate": 2.4463102097726843e-07, | |
| "loss": 0.1397, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9434482758620689, | |
| "grad_norm": 1.55323368762906, | |
| "learning_rate": 2.2389922012165944e-07, | |
| "loss": 0.0883, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9462068965517242, | |
| "grad_norm": 1.3537994599803411, | |
| "learning_rate": 2.0407529593228114e-07, | |
| "loss": 0.0537, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9489655172413793, | |
| "grad_norm": 1.9963802514280435, | |
| "learning_rate": 1.8516108939651945e-07, | |
| "loss": 0.0841, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9517241379310345, | |
| "grad_norm": 1.9489250790151131, | |
| "learning_rate": 1.6715835701905604e-07, | |
| "loss": 0.1278, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9544827586206897, | |
| "grad_norm": 1.208403698921063, | |
| "learning_rate": 1.5006877065874338e-07, | |
| "loss": 0.062, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9572413793103448, | |
| "grad_norm": 1.9902454269295837, | |
| "learning_rate": 1.3389391737335112e-07, | |
| "loss": 0.1123, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.5549649154456184, | |
| "learning_rate": 1.1863529927217731e-07, | |
| "loss": 0.0859, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9627586206896551, | |
| "grad_norm": 1.5672800792400794, | |
| "learning_rate": 1.0429433337655115e-07, | |
| "loss": 0.0664, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 1.980266389039632, | |
| "learning_rate": 9.08723514882437e-08, | |
| "loss": 0.0978, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9682758620689655, | |
| "grad_norm": 1.924180909114094, | |
| "learning_rate": 7.837060006577801e-08, | |
| "loss": 0.082, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9710344827586207, | |
| "grad_norm": 1.360244814652006, | |
| "learning_rate": 6.679024010868617e-08, | |
| "loss": 0.0937, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9737931034482759, | |
| "grad_norm": 1.3638474049460683, | |
| "learning_rate": 5.6132347049679955e-08, | |
| "loss": 0.0944, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.976551724137931, | |
| "grad_norm": 1.0043906816455541, | |
| "learning_rate": 4.639791065478738e-08, | |
| "loss": 0.0373, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9793103448275862, | |
| "grad_norm": 1.3136551040176567, | |
| "learning_rate": 3.758783493142737e-08, | |
| "loss": 0.0938, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9820689655172414, | |
| "grad_norm": 1.2330770172633143, | |
| "learning_rate": 2.9702938044468e-08, | |
| "loss": 0.1083, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9848275862068966, | |
| "grad_norm": 1.4318266541987408, | |
| "learning_rate": 2.274395224023618e-08, | |
| "loss": 0.0667, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9875862068965517, | |
| "grad_norm": 1.4889881534099365, | |
| "learning_rate": 1.671152377852092e-08, | |
| "loss": 0.0907, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9903448275862069, | |
| "grad_norm": 1.9950572073445185, | |
| "learning_rate": 1.1606212872559142e-08, | |
| "loss": 0.0937, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.993103448275862, | |
| "grad_norm": 1.2536334792292063, | |
| "learning_rate": 7.42849363700282e-09, | |
| "loss": 0.0863, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9958620689655172, | |
| "grad_norm": 1.2816200143172458, | |
| "learning_rate": 4.178754043898669e-09, | |
| "loss": 0.0593, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9986206896551724, | |
| "grad_norm": 0.7637431538197891, | |
| "learning_rate": 1.8572958866514e-09, | |
| "loss": 0.0859, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.3907025925168472, | |
| "learning_rate": 4.643347520005836e-10, | |
| "loss": 0.0658, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 363, | |
| "total_flos": 50754799140864.0, | |
| "train_loss": 0.09636665488652289, | |
| "train_runtime": 845.3624, | |
| "train_samples_per_second": 6.855, | |
| "train_steps_per_second": 0.429 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 363, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 50754799140864.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |