| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1355, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007380073800738007, |
| "grad_norm": 52.96585464477539, |
| "learning_rate": 0.0, |
| "loss": 7.329597473144531, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0014760147601476014, |
| "grad_norm": 42.55315017700195, |
| "learning_rate": 1.4e-05, |
| "loss": 7.168418884277344, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002214022140221402, |
| "grad_norm": 21.52372169494629, |
| "learning_rate": 2.8e-05, |
| "loss": 6.947352409362793, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002952029520295203, |
| "grad_norm": 19.89319610595703, |
| "learning_rate": 4.2e-05, |
| "loss": 6.611477851867676, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0036900369003690036, |
| "grad_norm": 12.127403259277344, |
| "learning_rate": 5.6e-05, |
| "loss": 6.690403938293457, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004428044280442804, |
| "grad_norm": 11.600789070129395, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 6.540159225463867, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0051660516605166054, |
| "grad_norm": 8.64883804321289, |
| "learning_rate": 8.4e-05, |
| "loss": 6.4675188064575195, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.005904059040590406, |
| "grad_norm": 8.694304466247559, |
| "learning_rate": 9.800000000000001e-05, |
| "loss": 6.344979286193848, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006642066420664207, |
| "grad_norm": 8.474891662597656, |
| "learning_rate": 0.000112, |
| "loss": 6.483427047729492, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.007380073800738007, |
| "grad_norm": 8.267909049987793, |
| "learning_rate": 0.000126, |
| "loss": 6.328839302062988, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.008118081180811807, |
| "grad_norm": 6.391619682312012, |
| "learning_rate": 0.00014000000000000001, |
| "loss": 6.344330787658691, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008856088560885609, |
| "grad_norm": 4.2130842208862305, |
| "learning_rate": 0.000154, |
| "loss": 6.2279744148254395, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00959409594095941, |
| "grad_norm": 4.580661296844482, |
| "learning_rate": 0.000168, |
| "loss": 6.267205715179443, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.010332103321033211, |
| "grad_norm": 4.600402355194092, |
| "learning_rate": 0.000182, |
| "loss": 6.177546501159668, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01107011070110701, |
| "grad_norm": 6.000468730926514, |
| "learning_rate": 0.00019600000000000002, |
| "loss": 6.123383522033691, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.011808118081180811, |
| "grad_norm": 6.2054548263549805, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 6.029158592224121, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.012546125461254613, |
| "grad_norm": 5.766181945800781, |
| "learning_rate": 0.000224, |
| "loss": 6.146026611328125, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.013284132841328414, |
| "grad_norm": 3.5282742977142334, |
| "learning_rate": 0.000238, |
| "loss": 6.347329139709473, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.014022140221402213, |
| "grad_norm": 10.378168106079102, |
| "learning_rate": 0.000252, |
| "loss": 5.982622146606445, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.014760147601476014, |
| "grad_norm": 6.26217794418335, |
| "learning_rate": 0.000266, |
| "loss": 6.232936859130859, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.015498154981549815, |
| "grad_norm": 3.640542984008789, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 6.282479763031006, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.016236162361623615, |
| "grad_norm": 4.074864864349365, |
| "learning_rate": 0.000294, |
| "loss": 6.1197309494018555, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.016974169741697416, |
| "grad_norm": 4.15755558013916, |
| "learning_rate": 0.000308, |
| "loss": 6.1190900802612305, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.017712177121771217, |
| "grad_norm": 8.528851509094238, |
| "learning_rate": 0.000322, |
| "loss": 6.108499050140381, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01845018450184502, |
| "grad_norm": 4.248746395111084, |
| "learning_rate": 0.000336, |
| "loss": 5.993032932281494, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01918819188191882, |
| "grad_norm": 5.643017292022705, |
| "learning_rate": 0.00035, |
| "loss": 6.173605918884277, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01992619926199262, |
| "grad_norm": 3.0032365322113037, |
| "learning_rate": 0.000364, |
| "loss": 5.9087629318237305, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.020664206642066422, |
| "grad_norm": 6.890568733215332, |
| "learning_rate": 0.000378, |
| "loss": 6.026180267333984, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.021402214022140223, |
| "grad_norm": 4.55826473236084, |
| "learning_rate": 0.00039200000000000004, |
| "loss": 6.302541732788086, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.02214022140221402, |
| "grad_norm": 5.366292953491211, |
| "learning_rate": 0.00040599999999999995, |
| "loss": 6.086678981781006, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.022878228782287822, |
| "grad_norm": 2.9198176860809326, |
| "learning_rate": 0.00041999999999999996, |
| "loss": 6.034950256347656, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.023616236162361623, |
| "grad_norm": 3.0416109561920166, |
| "learning_rate": 0.000434, |
| "loss": 5.959887981414795, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.024354243542435424, |
| "grad_norm": 3.6983375549316406, |
| "learning_rate": 0.000448, |
| "loss": 5.958649635314941, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.025092250922509225, |
| "grad_norm": 3.3332769870758057, |
| "learning_rate": 0.000462, |
| "loss": 6.053283214569092, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.025830258302583026, |
| "grad_norm": 4.3135857582092285, |
| "learning_rate": 0.000476, |
| "loss": 5.938570499420166, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.026568265682656828, |
| "grad_norm": 3.9662985801696777, |
| "learning_rate": 0.00049, |
| "loss": 6.1224799156188965, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02730627306273063, |
| "grad_norm": 3.4459118843078613, |
| "learning_rate": 0.000504, |
| "loss": 6.048614501953125, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.028044280442804426, |
| "grad_norm": 4.011275768280029, |
| "learning_rate": 0.000518, |
| "loss": 6.095024108886719, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.028782287822878228, |
| "grad_norm": 4.109455108642578, |
| "learning_rate": 0.000532, |
| "loss": 6.097041130065918, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02952029520295203, |
| "grad_norm": 2.0187416076660156, |
| "learning_rate": 0.000546, |
| "loss": 5.855551719665527, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03025830258302583, |
| "grad_norm": 4.543977737426758, |
| "learning_rate": 0.0005600000000000001, |
| "loss": 5.990810394287109, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03099630996309963, |
| "grad_norm": 3.6285860538482666, |
| "learning_rate": 0.000574, |
| "loss": 6.1089982986450195, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03173431734317343, |
| "grad_norm": 2.802408218383789, |
| "learning_rate": 0.000588, |
| "loss": 6.059175491333008, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03247232472324723, |
| "grad_norm": 5.055509090423584, |
| "learning_rate": 0.000602, |
| "loss": 6.0541791915893555, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.033210332103321034, |
| "grad_norm": 5.420635223388672, |
| "learning_rate": 0.000616, |
| "loss": 5.914989471435547, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03394833948339483, |
| "grad_norm": 3.779264211654663, |
| "learning_rate": 0.00063, |
| "loss": 5.772123336791992, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03468634686346864, |
| "grad_norm": 4.194505214691162, |
| "learning_rate": 0.000644, |
| "loss": 6.127632141113281, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.035424354243542434, |
| "grad_norm": 2.183096170425415, |
| "learning_rate": 0.000658, |
| "loss": 5.898839950561523, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03616236162361624, |
| "grad_norm": 3.0196142196655273, |
| "learning_rate": 0.000672, |
| "loss": 5.775443077087402, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03690036900369004, |
| "grad_norm": 4.503098011016846, |
| "learning_rate": 0.000686, |
| "loss": 5.992775917053223, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.037638376383763834, |
| "grad_norm": 2.646671772003174, |
| "learning_rate": 0.0007, |
| "loss": 5.891811370849609, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03837638376383764, |
| "grad_norm": 4.828780651092529, |
| "learning_rate": 0.0006999989858164525, |
| "loss": 5.944026947021484, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03911439114391144, |
| "grad_norm": 5.056863784790039, |
| "learning_rate": 0.0006999959432716873, |
| "loss": 6.092121601104736, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03985239852398524, |
| "grad_norm": 2.9205923080444336, |
| "learning_rate": 0.0006999908723833372, |
| "loss": 6.128796577453613, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.04059040590405904, |
| "grad_norm": 2.503229856491089, |
| "learning_rate": 0.0006999837731807897, |
| "loss": 5.857043266296387, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.041328413284132844, |
| "grad_norm": 2.815605640411377, |
| "learning_rate": 0.0006999746457051868, |
| "loss": 5.79864501953125, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04206642066420664, |
| "grad_norm": 2.630692481994629, |
| "learning_rate": 0.0006999634900094256, |
| "loss": 6.038992881774902, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.042804428044280446, |
| "grad_norm": 2.103322982788086, |
| "learning_rate": 0.0006999503061581567, |
| "loss": 5.8827619552612305, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.043542435424354244, |
| "grad_norm": 4.4402265548706055, |
| "learning_rate": 0.0006999350942277852, |
| "loss": 6.193219184875488, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04428044280442804, |
| "grad_norm": 2.784449815750122, |
| "learning_rate": 0.0006999178543064694, |
| "loss": 5.896166801452637, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.045018450184501846, |
| "grad_norm": 2.158843755722046, |
| "learning_rate": 0.0006998985864941203, |
| "loss": 5.9794487953186035, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.045756457564575644, |
| "grad_norm": 3.826530933380127, |
| "learning_rate": 0.0006998772909024012, |
| "loss": 5.747754096984863, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04649446494464945, |
| "grad_norm": 1.7147290706634521, |
| "learning_rate": 0.0006998539676547274, |
| "loss": 5.834345817565918, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.047232472324723246, |
| "grad_norm": 2.6357598304748535, |
| "learning_rate": 0.0006998286168862646, |
| "loss": 5.970273017883301, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04797047970479705, |
| "grad_norm": 2.0640320777893066, |
| "learning_rate": 0.0006998012387439294, |
| "loss": 6.20042610168457, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04870848708487085, |
| "grad_norm": 1.840738296508789, |
| "learning_rate": 0.0006997718333863869, |
| "loss": 5.69251823425293, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04944649446494465, |
| "grad_norm": 1.5103991031646729, |
| "learning_rate": 0.0006997404009840512, |
| "loss": 5.718031883239746, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.05018450184501845, |
| "grad_norm": 2.454057455062866, |
| "learning_rate": 0.0006997069417190837, |
| "loss": 5.718637466430664, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.05092250922509225, |
| "grad_norm": 1.7299764156341553, |
| "learning_rate": 0.0006996714557853919, |
| "loss": 5.874034404754639, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.05166051660516605, |
| "grad_norm": 1.983879566192627, |
| "learning_rate": 0.0006996339433886285, |
| "loss": 5.866864204406738, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05239852398523985, |
| "grad_norm": 1.7243304252624512, |
| "learning_rate": 0.0006995944047461907, |
| "loss": 5.6140642166137695, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.053136531365313655, |
| "grad_norm": 2.1467807292938232, |
| "learning_rate": 0.0006995528400872179, |
| "loss": 5.7456207275390625, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.05387453874538745, |
| "grad_norm": 1.8860361576080322, |
| "learning_rate": 0.0006995092496525912, |
| "loss": 5.868312835693359, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05461254612546126, |
| "grad_norm": 1.9977107048034668, |
| "learning_rate": 0.000699463633694932, |
| "loss": 5.8535919189453125, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.055350553505535055, |
| "grad_norm": 1.6792536973953247, |
| "learning_rate": 0.0006994159924785998, |
| "loss": 5.957564353942871, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05608856088560885, |
| "grad_norm": 1.83674955368042, |
| "learning_rate": 0.0006993663262796917, |
| "loss": 5.801642894744873, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.05682656826568266, |
| "grad_norm": 1.7811754941940308, |
| "learning_rate": 0.0006993146353860395, |
| "loss": 5.8649001121521, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.057564575645756455, |
| "grad_norm": 2.130631446838379, |
| "learning_rate": 0.0006992609200972095, |
| "loss": 5.959519386291504, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05830258302583026, |
| "grad_norm": 1.914402961730957, |
| "learning_rate": 0.0006992051807244997, |
| "loss": 5.6643877029418945, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05904059040590406, |
| "grad_norm": 2.480494737625122, |
| "learning_rate": 0.0006991474175909385, |
| "loss": 5.705104827880859, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05977859778597786, |
| "grad_norm": 1.6274583339691162, |
| "learning_rate": 0.0006990876310312825, |
| "loss": 5.786376953125, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06051660516605166, |
| "grad_norm": 3.1301629543304443, |
| "learning_rate": 0.0006990258213920147, |
| "loss": 5.652984142303467, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.061254612546125464, |
| "grad_norm": 1.7219048738479614, |
| "learning_rate": 0.0006989619890313428, |
| "loss": 5.684242248535156, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.06199261992619926, |
| "grad_norm": 2.016432046890259, |
| "learning_rate": 0.0006988961343191968, |
| "loss": 5.919116973876953, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.06273062730627306, |
| "grad_norm": 1.9674099683761597, |
| "learning_rate": 0.0006988282576372264, |
| "loss": 5.706339359283447, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.06346863468634686, |
| "grad_norm": 1.6389487981796265, |
| "learning_rate": 0.0006987583593788001, |
| "loss": 6.144864082336426, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.06420664206642067, |
| "grad_norm": 1.9105358123779297, |
| "learning_rate": 0.0006986864399490014, |
| "loss": 5.812554359436035, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.06494464944649446, |
| "grad_norm": 2.2395148277282715, |
| "learning_rate": 0.0006986124997646276, |
| "loss": 5.818288803100586, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.06568265682656826, |
| "grad_norm": 1.4297045469284058, |
| "learning_rate": 0.0006985365392541869, |
| "loss": 5.988651275634766, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.06642066420664207, |
| "grad_norm": 2.393372058868408, |
| "learning_rate": 0.0006984585588578955, |
| "loss": 5.834245681762695, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06715867158671587, |
| "grad_norm": 1.7424498796463013, |
| "learning_rate": 0.0006983785590276763, |
| "loss": 5.847927570343018, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.06789667896678966, |
| "grad_norm": 1.7180150747299194, |
| "learning_rate": 0.0006982965402271549, |
| "loss": 5.745847702026367, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06863468634686347, |
| "grad_norm": 1.5406665802001953, |
| "learning_rate": 0.0006982125029316576, |
| "loss": 5.680943012237549, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06937269372693727, |
| "grad_norm": 1.9634002447128296, |
| "learning_rate": 0.0006981264476282089, |
| "loss": 5.660253524780273, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.07011070110701106, |
| "grad_norm": 1.7053471803665161, |
| "learning_rate": 0.0006980383748155278, |
| "loss": 5.777673721313477, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.07084870848708487, |
| "grad_norm": 1.8611632585525513, |
| "learning_rate": 0.0006979482850040258, |
| "loss": 5.753267288208008, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.07158671586715867, |
| "grad_norm": 2.236954689025879, |
| "learning_rate": 0.0006978561787158036, |
| "loss": 5.762792587280273, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.07232472324723248, |
| "grad_norm": 1.5513856410980225, |
| "learning_rate": 0.0006977620564846479, |
| "loss": 5.847312927246094, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.07306273062730627, |
| "grad_norm": 1.6298314332962036, |
| "learning_rate": 0.0006976659188560285, |
| "loss": 5.568481922149658, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.07380073800738007, |
| "grad_norm": 1.6806327104568481, |
| "learning_rate": 0.0006975677663870951, |
| "loss": 5.746288776397705, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07453874538745388, |
| "grad_norm": 1.5393524169921875, |
| "learning_rate": 0.0006974675996466741, |
| "loss": 5.562119960784912, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.07527675276752767, |
| "grad_norm": 1.3935660123825073, |
| "learning_rate": 0.0006973654192152653, |
| "loss": 5.655695915222168, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07601476014760147, |
| "grad_norm": 1.8384559154510498, |
| "learning_rate": 0.0006972612256850385, |
| "loss": 5.717691421508789, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.07675276752767528, |
| "grad_norm": 1.4056777954101562, |
| "learning_rate": 0.00069715501965983, |
| "loss": 5.4914422035217285, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.07749077490774908, |
| "grad_norm": 1.5063185691833496, |
| "learning_rate": 0.0006970468017551393, |
| "loss": 5.804128170013428, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07822878228782287, |
| "grad_norm": 1.5670958757400513, |
| "learning_rate": 0.0006969365725981253, |
| "loss": 5.555459976196289, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.07896678966789668, |
| "grad_norm": 1.4736913442611694, |
| "learning_rate": 0.000696824332827603, |
| "loss": 5.734355926513672, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07970479704797048, |
| "grad_norm": 1.2875981330871582, |
| "learning_rate": 0.0006967100830940393, |
| "loss": 5.615688800811768, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.08044280442804429, |
| "grad_norm": 1.6725730895996094, |
| "learning_rate": 0.0006965938240595497, |
| "loss": 5.705436706542969, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.08118081180811808, |
| "grad_norm": 1.2822149991989136, |
| "learning_rate": 0.000696475556397894, |
| "loss": 5.77439022064209, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08191881918819188, |
| "grad_norm": 1.5231584310531616, |
| "learning_rate": 0.0006963552807944731, |
| "loss": 5.540444374084473, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.08265682656826569, |
| "grad_norm": 1.3938168287277222, |
| "learning_rate": 0.0006962329979463242, |
| "loss": 5.578408241271973, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.08339483394833948, |
| "grad_norm": 1.80418062210083, |
| "learning_rate": 0.0006961087085621174, |
| "loss": 5.822021484375, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.08413284132841328, |
| "grad_norm": 1.3559857606887817, |
| "learning_rate": 0.0006959824133621514, |
| "loss": 5.527395248413086, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.08487084870848709, |
| "grad_norm": 1.6934373378753662, |
| "learning_rate": 0.0006958541130783489, |
| "loss": 5.64322566986084, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.08560885608856089, |
| "grad_norm": 2.645036220550537, |
| "learning_rate": 0.0006957238084542531, |
| "loss": 5.786375999450684, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.08634686346863468, |
| "grad_norm": 1.7617570161819458, |
| "learning_rate": 0.0006955915002450227, |
| "loss": 5.706923484802246, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08708487084870849, |
| "grad_norm": 1.4721003770828247, |
| "learning_rate": 0.0006954571892174282, |
| "loss": 5.816807746887207, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08782287822878229, |
| "grad_norm": 1.4024418592453003, |
| "learning_rate": 0.0006953208761498471, |
| "loss": 5.504622459411621, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.08856088560885608, |
| "grad_norm": 1.1762746572494507, |
| "learning_rate": 0.0006951825618322589, |
| "loss": 5.638977527618408, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08929889298892989, |
| "grad_norm": 1.4858025312423706, |
| "learning_rate": 0.0006950422470662416, |
| "loss": 5.7883405685424805, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.09003690036900369, |
| "grad_norm": 1.197791576385498, |
| "learning_rate": 0.0006948999326649661, |
| "loss": 5.5270586013793945, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0907749077490775, |
| "grad_norm": 1.280106782913208, |
| "learning_rate": 0.000694755619453192, |
| "loss": 5.614171504974365, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.09151291512915129, |
| "grad_norm": 1.1635382175445557, |
| "learning_rate": 0.0006946093082672625, |
| "loss": 5.714271545410156, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.09225092250922509, |
| "grad_norm": 1.5833303928375244, |
| "learning_rate": 0.0006944609999551001, |
| "loss": 5.534208297729492, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0929889298892989, |
| "grad_norm": 1.2109582424163818, |
| "learning_rate": 0.0006943106953762009, |
| "loss": 5.419297218322754, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.09372693726937269, |
| "grad_norm": 1.551060676574707, |
| "learning_rate": 0.0006941583954016304, |
| "loss": 5.700986385345459, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.09446494464944649, |
| "grad_norm": 1.1270159482955933, |
| "learning_rate": 0.0006940041009140178, |
| "loss": 5.61196231842041, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0952029520295203, |
| "grad_norm": 1.288231372833252, |
| "learning_rate": 0.0006938478128075513, |
| "loss": 5.599189758300781, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0959409594095941, |
| "grad_norm": 1.7800358533859253, |
| "learning_rate": 0.0006936895319879727, |
| "loss": 5.359455108642578, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09667896678966789, |
| "grad_norm": 1.5556919574737549, |
| "learning_rate": 0.0006935292593725724, |
| "loss": 5.530261516571045, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0974169741697417, |
| "grad_norm": 1.737862229347229, |
| "learning_rate": 0.0006933669958901836, |
| "loss": 5.362129211425781, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0981549815498155, |
| "grad_norm": 1.5239074230194092, |
| "learning_rate": 0.0006932027424811779, |
| "loss": 5.559414863586426, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0988929889298893, |
| "grad_norm": 1.206781029701233, |
| "learning_rate": 0.0006930365000974584, |
| "loss": 5.415935516357422, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0996309963099631, |
| "grad_norm": 1.5241954326629639, |
| "learning_rate": 0.0006928682697024555, |
| "loss": 5.514790058135986, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1003690036900369, |
| "grad_norm": 1.7540452480316162, |
| "learning_rate": 0.0006926980522711204, |
| "loss": 5.370218276977539, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1011070110701107, |
| "grad_norm": 1.4406752586364746, |
| "learning_rate": 0.0006925258487899203, |
| "loss": 5.334672451019287, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1018450184501845, |
| "grad_norm": 1.2946128845214844, |
| "learning_rate": 0.000692351660256832, |
| "loss": 5.602551460266113, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1025830258302583, |
| "grad_norm": 1.2579693794250488, |
| "learning_rate": 0.0006921754876813361, |
| "loss": 5.522645473480225, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1033210332103321, |
| "grad_norm": 1.2886651754379272, |
| "learning_rate": 0.0006919973320844118, |
| "loss": 5.577740669250488, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10405904059040591, |
| "grad_norm": 1.0571826696395874, |
| "learning_rate": 0.0006918171944985303, |
| "loss": 5.557397842407227, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1047970479704797, |
| "grad_norm": 1.4176267385482788, |
| "learning_rate": 0.0006916350759676493, |
| "loss": 5.38129997253418, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1055350553505535, |
| "grad_norm": 1.2939625978469849, |
| "learning_rate": 0.0006914509775472065, |
| "loss": 5.3804121017456055, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.10627306273062731, |
| "grad_norm": 1.3399301767349243, |
| "learning_rate": 0.0006912649003041137, |
| "loss": 5.509670734405518, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1070110701107011, |
| "grad_norm": 1.1282126903533936, |
| "learning_rate": 0.000691076845316751, |
| "loss": 5.5377583503723145, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.1077490774907749, |
| "grad_norm": 1.372504711151123, |
| "learning_rate": 0.00069088681367496, |
| "loss": 5.6342878341674805, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.10848708487084871, |
| "grad_norm": 1.4673429727554321, |
| "learning_rate": 0.0006906948064800376, |
| "loss": 5.346056938171387, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.10922509225092251, |
| "grad_norm": 1.4786832332611084, |
| "learning_rate": 0.0006905008248447296, |
| "loss": 5.530672073364258, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1099630996309963, |
| "grad_norm": 1.14403235912323, |
| "learning_rate": 0.0006903048698932245, |
| "loss": 5.126125812530518, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.11070110701107011, |
| "grad_norm": 1.4274934530258179, |
| "learning_rate": 0.0006901069427611469, |
| "loss": 5.36081600189209, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11143911439114391, |
| "grad_norm": 1.224621295928955, |
| "learning_rate": 0.0006899070445955507, |
| "loss": 5.192722797393799, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1121771217712177, |
| "grad_norm": 1.1289647817611694, |
| "learning_rate": 0.0006897051765549127, |
| "loss": 5.438913822174072, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.11291512915129151, |
| "grad_norm": 1.3115386962890625, |
| "learning_rate": 0.0006895013398091256, |
| "loss": 5.402008533477783, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.11365313653136531, |
| "grad_norm": 1.4054917097091675, |
| "learning_rate": 0.0006892955355394918, |
| "loss": 5.593056678771973, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.11439114391143912, |
| "grad_norm": 1.2027919292449951, |
| "learning_rate": 0.0006890877649387155, |
| "loss": 5.359673500061035, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.11512915129151291, |
| "grad_norm": 1.1730295419692993, |
| "learning_rate": 0.0006888780292108971, |
| "loss": 5.578248023986816, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.11586715867158671, |
| "grad_norm": 1.2120227813720703, |
| "learning_rate": 0.0006886663295715254, |
| "loss": 5.643091678619385, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.11660516605166052, |
| "grad_norm": 1.2268054485321045, |
| "learning_rate": 0.0006884526672474704, |
| "loss": 5.381834030151367, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.11734317343173432, |
| "grad_norm": 1.3834030628204346, |
| "learning_rate": 0.0006882370434769769, |
| "loss": 5.615821838378906, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.11808118081180811, |
| "grad_norm": 1.7289725542068481, |
| "learning_rate": 0.0006880194595096567, |
| "loss": 5.346611499786377, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11881918819188192, |
| "grad_norm": 1.434497356414795, |
| "learning_rate": 0.0006877999166064817, |
| "loss": 5.427518844604492, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.11955719557195572, |
| "grad_norm": 1.2287393808364868, |
| "learning_rate": 0.0006875784160397766, |
| "loss": 5.595153331756592, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.12029520295202951, |
| "grad_norm": 1.327791690826416, |
| "learning_rate": 0.0006873549590932111, |
| "loss": 5.294317722320557, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.12103321033210332, |
| "grad_norm": 1.358208179473877, |
| "learning_rate": 0.0006871295470617932, |
| "loss": 5.65151309967041, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.12177121771217712, |
| "grad_norm": 1.1277738809585571, |
| "learning_rate": 0.0006869021812518607, |
| "loss": 5.721683979034424, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.12250922509225093, |
| "grad_norm": 1.407368540763855, |
| "learning_rate": 0.0006866728629810749, |
| "loss": 5.473011016845703, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.12324723247232472, |
| "grad_norm": 1.3105313777923584, |
| "learning_rate": 0.0006864415935784116, |
| "loss": 5.670052528381348, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.12398523985239852, |
| "grad_norm": 1.4188215732574463, |
| "learning_rate": 0.0006862083743841545, |
| "loss": 5.493824005126953, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.12472324723247233, |
| "grad_norm": 1.2717117071151733, |
| "learning_rate": 0.0006859732067498869, |
| "loss": 5.524445056915283, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.12546125461254612, |
| "grad_norm": 1.1162827014923096, |
| "learning_rate": 0.0006857360920384839, |
| "loss": 5.39989709854126, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12619926199261994, |
| "grad_norm": 1.166066288948059, |
| "learning_rate": 0.0006854970316241045, |
| "loss": 5.495843887329102, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.12693726937269373, |
| "grad_norm": 1.9042305946350098, |
| "learning_rate": 0.0006852560268921838, |
| "loss": 5.403502464294434, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.12767527675276752, |
| "grad_norm": 1.0880268812179565, |
| "learning_rate": 0.0006850130792394249, |
| "loss": 5.439591407775879, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.12841328413284134, |
| "grad_norm": 1.0691889524459839, |
| "learning_rate": 0.0006847681900737907, |
| "loss": 5.504947185516357, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.12915129151291513, |
| "grad_norm": 1.2986247539520264, |
| "learning_rate": 0.0006845213608144958, |
| "loss": 5.43480920791626, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.12988929889298892, |
| "grad_norm": 1.1326215267181396, |
| "learning_rate": 0.0006842725928919984, |
| "loss": 5.448299407958984, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.13062730627306274, |
| "grad_norm": 1.1839748620986938, |
| "learning_rate": 0.0006840218877479918, |
| "loss": 5.370269775390625, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.13136531365313653, |
| "grad_norm": 1.3466558456420898, |
| "learning_rate": 0.0006837692468353963, |
| "loss": 5.503698348999023, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.13210332103321032, |
| "grad_norm": 1.2086361646652222, |
| "learning_rate": 0.0006835146716183503, |
| "loss": 5.3210554122924805, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.13284132841328414, |
| "grad_norm": 1.0457011461257935, |
| "learning_rate": 0.0006832581635722026, |
| "loss": 5.430882930755615, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13357933579335793, |
| "grad_norm": 1.2964543104171753, |
| "learning_rate": 0.0006829997241835029, |
| "loss": 5.3685688972473145, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.13431734317343175, |
| "grad_norm": 1.12661612033844, |
| "learning_rate": 0.0006827393549499941, |
| "loss": 5.366943359375, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.13505535055350554, |
| "grad_norm": 1.4851716756820679, |
| "learning_rate": 0.0006824770573806029, |
| "loss": 5.4124755859375, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.13579335793357933, |
| "grad_norm": 2.0913474559783936, |
| "learning_rate": 0.0006822128329954316, |
| "loss": 5.477243423461914, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.13653136531365315, |
| "grad_norm": 1.6759217977523804, |
| "learning_rate": 0.0006819466833257487, |
| "loss": 5.315946578979492, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.13726937269372694, |
| "grad_norm": 1.5114970207214355, |
| "learning_rate": 0.0006816786099139809, |
| "loss": 5.488532066345215, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.13800738007380073, |
| "grad_norm": 1.229912519454956, |
| "learning_rate": 0.0006814086143137029, |
| "loss": 5.235088348388672, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.13874538745387455, |
| "grad_norm": 1.1838656663894653, |
| "learning_rate": 0.0006811366980896299, |
| "loss": 5.650766372680664, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.13948339483394834, |
| "grad_norm": 1.2359192371368408, |
| "learning_rate": 0.0006808628628176073, |
| "loss": 5.51072883605957, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.14022140221402213, |
| "grad_norm": 1.2534209489822388, |
| "learning_rate": 0.0006805871100846018, |
| "loss": 5.4855170249938965, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.14095940959409595, |
| "grad_norm": 1.1044737100601196, |
| "learning_rate": 0.0006803094414886932, |
| "loss": 5.416131973266602, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.14169741697416974, |
| "grad_norm": 1.1578259468078613, |
| "learning_rate": 0.0006800298586390637, |
| "loss": 5.303211688995361, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.14243542435424356, |
| "grad_norm": 1.2732160091400146, |
| "learning_rate": 0.0006797483631559893, |
| "loss": 5.596409320831299, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.14317343173431735, |
| "grad_norm": 1.3185418844223022, |
| "learning_rate": 0.0006794649566708308, |
| "loss": 5.081386089324951, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.14391143911439114, |
| "grad_norm": 1.2399559020996094, |
| "learning_rate": 0.0006791796408260233, |
| "loss": 5.367499828338623, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.14464944649446496, |
| "grad_norm": 1.4244142770767212, |
| "learning_rate": 0.000678892417275068, |
| "loss": 5.420333385467529, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.14538745387453875, |
| "grad_norm": 1.079671025276184, |
| "learning_rate": 0.000678603287682521, |
| "loss": 5.452577114105225, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.14612546125461254, |
| "grad_norm": 1.2236963510513306, |
| "learning_rate": 0.0006783122537239852, |
| "loss": 5.477599143981934, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.14686346863468636, |
| "grad_norm": 1.2248585224151611, |
| "learning_rate": 0.0006780193170860999, |
| "loss": 5.277920722961426, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.14760147601476015, |
| "grad_norm": 1.1838936805725098, |
| "learning_rate": 0.0006777244794665307, |
| "loss": 5.3089447021484375, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14833948339483394, |
| "grad_norm": 1.0920487642288208, |
| "learning_rate": 0.0006774277425739603, |
| "loss": 5.312920570373535, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.14907749077490776, |
| "grad_norm": 1.5156118869781494, |
| "learning_rate": 0.0006771291081280784, |
| "loss": 5.365443229675293, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.14981549815498155, |
| "grad_norm": 1.1590790748596191, |
| "learning_rate": 0.0006768285778595714, |
| "loss": 5.726003646850586, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.15055350553505534, |
| "grad_norm": 1.1078206300735474, |
| "learning_rate": 0.0006765261535101128, |
| "loss": 5.49555778503418, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.15129151291512916, |
| "grad_norm": 1.1094913482666016, |
| "learning_rate": 0.0006762218368323528, |
| "loss": 5.463008880615234, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.15202952029520295, |
| "grad_norm": 1.043042540550232, |
| "learning_rate": 0.0006759156295899086, |
| "loss": 5.329763889312744, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.15276752767527677, |
| "grad_norm": 0.9944074153900146, |
| "learning_rate": 0.0006756075335573533, |
| "loss": 5.15687370300293, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.15350553505535056, |
| "grad_norm": 1.320447564125061, |
| "learning_rate": 0.0006752975505202067, |
| "loss": 5.366092681884766, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.15424354243542435, |
| "grad_norm": 0.9683417081832886, |
| "learning_rate": 0.0006749856822749241, |
| "loss": 5.286744117736816, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.15498154981549817, |
| "grad_norm": 1.0429140329360962, |
| "learning_rate": 0.0006746719306288863, |
| "loss": 5.36182165145874, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15571955719557196, |
| "grad_norm": 0.9789266586303711, |
| "learning_rate": 0.0006743562974003891, |
| "loss": 5.401203155517578, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.15645756457564575, |
| "grad_norm": 1.5062106847763062, |
| "learning_rate": 0.0006740387844186328, |
| "loss": 5.4269890785217285, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.15719557195571957, |
| "grad_norm": 1.2152825593948364, |
| "learning_rate": 0.0006737193935237112, |
| "loss": 5.164780616760254, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.15793357933579336, |
| "grad_norm": 1.0402345657348633, |
| "learning_rate": 0.0006733981265666012, |
| "loss": 5.193200588226318, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.15867158671586715, |
| "grad_norm": 1.123574137687683, |
| "learning_rate": 0.0006730749854091528, |
| "loss": 5.191850185394287, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.15940959409594097, |
| "grad_norm": 1.2188745737075806, |
| "learning_rate": 0.0006727499719240766, |
| "loss": 5.239185810089111, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.16014760147601476, |
| "grad_norm": 1.1848610639572144, |
| "learning_rate": 0.0006724230879949348, |
| "loss": 5.381966590881348, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.16088560885608857, |
| "grad_norm": 1.0746642351150513, |
| "learning_rate": 0.000672094335516129, |
| "loss": 5.212441444396973, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.16162361623616237, |
| "grad_norm": 1.389511227607727, |
| "learning_rate": 0.0006717637163928899, |
| "loss": 5.391989707946777, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.16236162361623616, |
| "grad_norm": 1.4301270246505737, |
| "learning_rate": 0.0006714312325412659, |
| "loss": 5.462432861328125, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16309963099630997, |
| "grad_norm": 0.9488272666931152, |
| "learning_rate": 0.000671096885888112, |
| "loss": 5.5252790451049805, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.16383763837638377, |
| "grad_norm": 1.0613595247268677, |
| "learning_rate": 0.0006707606783710791, |
| "loss": 5.263217926025391, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.16457564575645756, |
| "grad_norm": 1.04259192943573, |
| "learning_rate": 0.0006704226119386022, |
| "loss": 5.378625869750977, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.16531365313653137, |
| "grad_norm": 1.0206512212753296, |
| "learning_rate": 0.0006700826885498893, |
| "loss": 5.315357208251953, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.16605166051660517, |
| "grad_norm": 0.9734926819801331, |
| "learning_rate": 0.0006697409101749102, |
| "loss": 5.143043518066406, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.16678966789667896, |
| "grad_norm": 1.2763937711715698, |
| "learning_rate": 0.0006693972787943851, |
| "loss": 5.372148513793945, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.16752767527675277, |
| "grad_norm": 1.0929063558578491, |
| "learning_rate": 0.0006690517963997727, |
| "loss": 5.465537071228027, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.16826568265682657, |
| "grad_norm": 1.098317265510559, |
| "learning_rate": 0.0006687044649932588, |
| "loss": 5.2183990478515625, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.16900369003690036, |
| "grad_norm": 1.4758684635162354, |
| "learning_rate": 0.0006683552865877454, |
| "loss": 5.08128023147583, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.16974169741697417, |
| "grad_norm": 1.425257921218872, |
| "learning_rate": 0.0006680042632068382, |
| "loss": 5.4712233543396, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.17047970479704797, |
| "grad_norm": 1.0762962102890015, |
| "learning_rate": 0.000667651396884835, |
| "loss": 5.113118648529053, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.17121771217712178, |
| "grad_norm": 1.028893232345581, |
| "learning_rate": 0.0006672966896667142, |
| "loss": 5.485983848571777, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.17195571955719557, |
| "grad_norm": 0.9485227465629578, |
| "learning_rate": 0.0006669401436081229, |
| "loss": 5.1485090255737305, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.17269372693726937, |
| "grad_norm": 1.146479606628418, |
| "learning_rate": 0.0006665817607753645, |
| "loss": 5.232944011688232, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.17343173431734318, |
| "grad_norm": 1.1146334409713745, |
| "learning_rate": 0.0006662215432453878, |
| "loss": 5.381141662597656, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.17416974169741697, |
| "grad_norm": 1.4399679899215698, |
| "learning_rate": 0.0006658594931057739, |
| "loss": 5.011406421661377, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.17490774907749077, |
| "grad_norm": 1.268681287765503, |
| "learning_rate": 0.0006654956124547241, |
| "loss": 5.245846748352051, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.17564575645756458, |
| "grad_norm": 1.001254677772522, |
| "learning_rate": 0.0006651299034010487, |
| "loss": 5.424437522888184, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.17638376383763837, |
| "grad_norm": 1.2181994915008545, |
| "learning_rate": 0.0006647623680641542, |
| "loss": 5.456673622131348, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.17712177121771217, |
| "grad_norm": 1.1333999633789062, |
| "learning_rate": 0.0006643930085740306, |
| "loss": 5.315772533416748, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17785977859778598, |
| "grad_norm": 1.0992910861968994, |
| "learning_rate": 0.0006640218270712397, |
| "loss": 5.436305999755859, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.17859778597785977, |
| "grad_norm": 1.0746663808822632, |
| "learning_rate": 0.0006636488257069027, |
| "loss": 5.308970928192139, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1793357933579336, |
| "grad_norm": 1.0561422109603882, |
| "learning_rate": 0.0006632740066426873, |
| "loss": 5.426042079925537, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.18007380073800738, |
| "grad_norm": 1.031684160232544, |
| "learning_rate": 0.0006628973720507951, |
| "loss": 5.2547478675842285, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.18081180811808117, |
| "grad_norm": 1.0969058275222778, |
| "learning_rate": 0.0006625189241139498, |
| "loss": 5.28012752532959, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.181549815498155, |
| "grad_norm": 1.047112226486206, |
| "learning_rate": 0.0006621386650253838, |
| "loss": 5.20250129699707, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.18228782287822878, |
| "grad_norm": 0.9869337677955627, |
| "learning_rate": 0.0006617565969888257, |
| "loss": 5.25740909576416, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.18302583025830257, |
| "grad_norm": 1.0927937030792236, |
| "learning_rate": 0.0006613727222184874, |
| "loss": 5.5139288902282715, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1837638376383764, |
| "grad_norm": 1.2841873168945312, |
| "learning_rate": 0.000660987042939052, |
| "loss": 5.233647346496582, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.18450184501845018, |
| "grad_norm": 0.9890136122703552, |
| "learning_rate": 0.0006605995613856595, |
| "loss": 5.420958518981934, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18523985239852397, |
| "grad_norm": 0.8926162719726562, |
| "learning_rate": 0.0006602102798038957, |
| "loss": 5.308608055114746, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1859778597785978, |
| "grad_norm": 1.0019422769546509, |
| "learning_rate": 0.0006598192004497771, |
| "loss": 5.302347660064697, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.18671586715867158, |
| "grad_norm": 0.8486745953559875, |
| "learning_rate": 0.0006594263255897396, |
| "loss": 5.099376678466797, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.18745387453874537, |
| "grad_norm": 1.0783238410949707, |
| "learning_rate": 0.0006590316575006244, |
| "loss": 5.218788146972656, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1881918819188192, |
| "grad_norm": 0.9183611869812012, |
| "learning_rate": 0.0006586351984696653, |
| "loss": 5.240777969360352, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.18892988929889298, |
| "grad_norm": 0.9513900876045227, |
| "learning_rate": 0.0006582369507944747, |
| "loss": 5.222758769989014, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.1896678966789668, |
| "grad_norm": 0.9337455630302429, |
| "learning_rate": 0.0006578369167830314, |
| "loss": 5.062905311584473, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1904059040590406, |
| "grad_norm": 1.158604383468628, |
| "learning_rate": 0.0006574350987536662, |
| "loss": 5.026293754577637, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.19114391143911438, |
| "grad_norm": 1.0550696849822998, |
| "learning_rate": 0.000657031499035049, |
| "loss": 5.1148905754089355, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1918819188191882, |
| "grad_norm": 0.9606300592422485, |
| "learning_rate": 0.0006566261199661753, |
| "loss": 5.163092613220215, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.192619926199262, |
| "grad_norm": 1.0590009689331055, |
| "learning_rate": 0.0006562189638963524, |
| "loss": 5.3179521560668945, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.19335793357933578, |
| "grad_norm": 0.9940695762634277, |
| "learning_rate": 0.0006558100331851859, |
| "loss": 5.129310607910156, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1940959409594096, |
| "grad_norm": 1.0227980613708496, |
| "learning_rate": 0.0006553993302025659, |
| "loss": 5.162182807922363, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1948339483394834, |
| "grad_norm": 1.0441575050354004, |
| "learning_rate": 0.0006549868573286539, |
| "loss": 5.2034454345703125, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.19557195571955718, |
| "grad_norm": 1.1191506385803223, |
| "learning_rate": 0.0006545726169538681, |
| "loss": 4.916297435760498, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.196309963099631, |
| "grad_norm": 1.1132999658584595, |
| "learning_rate": 0.00065415661147887, |
| "loss": 5.2382707595825195, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1970479704797048, |
| "grad_norm": 1.352728247642517, |
| "learning_rate": 0.0006537388433145504, |
| "loss": 5.228781700134277, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.1977859778597786, |
| "grad_norm": 1.0661629438400269, |
| "learning_rate": 0.0006533193148820159, |
| "loss": 5.341499328613281, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1985239852398524, |
| "grad_norm": 1.1771162748336792, |
| "learning_rate": 0.0006528980286125739, |
| "loss": 5.339306831359863, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1992619926199262, |
| "grad_norm": 0.9680821895599365, |
| "learning_rate": 0.0006524749869477192, |
| "loss": 5.367077827453613, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.0213592052459717, |
| "learning_rate": 0.00065205019233912, |
| "loss": 5.1391096115112305, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.2007380073800738, |
| "grad_norm": 0.8894791603088379, |
| "learning_rate": 0.0006516236472486032, |
| "loss": 5.218973159790039, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2014760147601476, |
| "grad_norm": 1.1796555519104004, |
| "learning_rate": 0.00065119535414814, |
| "loss": 5.110937118530273, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.2022140221402214, |
| "grad_norm": 0.9279013872146606, |
| "learning_rate": 0.0006507653155198322, |
| "loss": 5.301558494567871, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2029520295202952, |
| "grad_norm": 0.9340477585792542, |
| "learning_rate": 0.000650333533855898, |
| "loss": 5.283820152282715, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.203690036900369, |
| "grad_norm": 1.0362911224365234, |
| "learning_rate": 0.0006499000116586562, |
| "loss": 4.982748031616211, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2044280442804428, |
| "grad_norm": 1.1206884384155273, |
| "learning_rate": 0.0006494647514405131, |
| "loss": 4.973568916320801, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.2051660516605166, |
| "grad_norm": 1.0366051197052002, |
| "learning_rate": 0.0006490277557239472, |
| "loss": 5.242402076721191, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2059040590405904, |
| "grad_norm": 1.0412499904632568, |
| "learning_rate": 0.000648589027041495, |
| "loss": 5.113008499145508, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2066420664206642, |
| "grad_norm": 1.0954289436340332, |
| "learning_rate": 0.0006481485679357359, |
| "loss": 5.448449611663818, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.207380073800738, |
| "grad_norm": 0.9032571911811829, |
| "learning_rate": 0.0006477063809592778, |
| "loss": 4.939189910888672, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.20811808118081182, |
| "grad_norm": 0.890612006187439, |
| "learning_rate": 0.0006472624686747421, |
| "loss": 5.256400108337402, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.2088560885608856, |
| "grad_norm": 0.9753661751747131, |
| "learning_rate": 0.000646816833654749, |
| "loss": 5.353178024291992, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.2095940959409594, |
| "grad_norm": 0.8233433365821838, |
| "learning_rate": 0.0006463694784819029, |
| "loss": 5.223405838012695, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.21033210332103322, |
| "grad_norm": 1.0614573955535889, |
| "learning_rate": 0.0006459204057487762, |
| "loss": 5.132536888122559, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.211070110701107, |
| "grad_norm": 1.074107050895691, |
| "learning_rate": 0.0006454696180578957, |
| "loss": 5.2558369636535645, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2118081180811808, |
| "grad_norm": 1.0157700777053833, |
| "learning_rate": 0.0006450171180217273, |
| "loss": 4.989593505859375, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.21254612546125462, |
| "grad_norm": 0.886896550655365, |
| "learning_rate": 0.0006445629082626595, |
| "loss": 5.041266441345215, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.2132841328413284, |
| "grad_norm": 0.8866286873817444, |
| "learning_rate": 0.0006441069914129903, |
| "loss": 5.1668171882629395, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.2140221402214022, |
| "grad_norm": 0.9136367440223694, |
| "learning_rate": 0.0006436493701149102, |
| "loss": 5.044548988342285, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21476014760147602, |
| "grad_norm": 1.0716575384140015, |
| "learning_rate": 0.0006431900470204876, |
| "loss": 4.962906837463379, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.2154981549815498, |
| "grad_norm": 1.0485093593597412, |
| "learning_rate": 0.0006427290247916537, |
| "loss": 5.0265655517578125, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.21623616236162363, |
| "grad_norm": 0.9726313352584839, |
| "learning_rate": 0.0006422663061001865, |
| "loss": 5.10546875, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.21697416974169742, |
| "grad_norm": 0.8890307545661926, |
| "learning_rate": 0.0006418018936276956, |
| "loss": 4.885697841644287, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2177121771217712, |
| "grad_norm": 1.256881594657898, |
| "learning_rate": 0.0006413357900656066, |
| "loss": 5.05020809173584, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.21845018450184503, |
| "grad_norm": 0.8236335515975952, |
| "learning_rate": 0.0006408679981151456, |
| "loss": 5.077518463134766, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.21918819188191882, |
| "grad_norm": 1.0636200904846191, |
| "learning_rate": 0.0006403985204873235, |
| "loss": 5.087857246398926, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.2199261992619926, |
| "grad_norm": 1.0351738929748535, |
| "learning_rate": 0.0006399273599029202, |
| "loss": 5.218321800231934, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.22066420664206643, |
| "grad_norm": 1.1184179782867432, |
| "learning_rate": 0.000639454519092469, |
| "loss": 5.41963529586792, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.22140221402214022, |
| "grad_norm": 1.005051851272583, |
| "learning_rate": 0.0006389800007962404, |
| "loss": 5.267976760864258, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.222140221402214, |
| "grad_norm": 0.8542754054069519, |
| "learning_rate": 0.0006385038077642268, |
| "loss": 5.143088340759277, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.22287822878228783, |
| "grad_norm": 1.0211315155029297, |
| "learning_rate": 0.0006380259427561262, |
| "loss": 5.287484169006348, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.22361623616236162, |
| "grad_norm": 0.9097702503204346, |
| "learning_rate": 0.000637546408541326, |
| "loss": 5.212584972381592, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2243542435424354, |
| "grad_norm": 1.0342856645584106, |
| "learning_rate": 0.0006370652078988876, |
| "loss": 5.081629753112793, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.22509225092250923, |
| "grad_norm": 1.046463131904602, |
| "learning_rate": 0.0006365823436175296, |
| "loss": 5.043882369995117, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.22583025830258302, |
| "grad_norm": 0.9955232739448547, |
| "learning_rate": 0.0006360978184956121, |
| "loss": 5.135004997253418, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.22656826568265684, |
| "grad_norm": 0.8027132153511047, |
| "learning_rate": 0.0006356116353411203, |
| "loss": 5.337245941162109, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.22730627306273063, |
| "grad_norm": 1.2090736627578735, |
| "learning_rate": 0.0006351237969716482, |
| "loss": 5.095905780792236, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.22804428044280442, |
| "grad_norm": 1.090334177017212, |
| "learning_rate": 0.0006346343062143824, |
| "loss": 5.060598373413086, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.22878228782287824, |
| "grad_norm": 1.190928339958191, |
| "learning_rate": 0.0006341431659060856, |
| "loss": 5.230974197387695, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22952029520295203, |
| "grad_norm": 1.0362082719802856, |
| "learning_rate": 0.0006336503788930801, |
| "loss": 4.835149765014648, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.23025830258302582, |
| "grad_norm": 1.1221998929977417, |
| "learning_rate": 0.0006331559480312316, |
| "loss": 5.359483242034912, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.23099630996309964, |
| "grad_norm": 0.8904932737350464, |
| "learning_rate": 0.0006326598761859323, |
| "loss": 5.057035446166992, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.23173431734317343, |
| "grad_norm": 0.9242574572563171, |
| "learning_rate": 0.0006321621662320847, |
| "loss": 5.011726379394531, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.23247232472324722, |
| "grad_norm": 1.059004306793213, |
| "learning_rate": 0.0006316628210540842, |
| "loss": 4.693303108215332, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.23321033210332104, |
| "grad_norm": 1.2370541095733643, |
| "learning_rate": 0.0006311618435458034, |
| "loss": 5.188898086547852, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.23394833948339483, |
| "grad_norm": 1.0214468240737915, |
| "learning_rate": 0.0006306592366105744, |
| "loss": 5.003267288208008, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.23468634686346865, |
| "grad_norm": 0.9486777782440186, |
| "learning_rate": 0.0006301550031611726, |
| "loss": 4.848117828369141, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.23542435424354244, |
| "grad_norm": 0.9645982980728149, |
| "learning_rate": 0.0006296491461197996, |
| "loss": 5.02429723739624, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.23616236162361623, |
| "grad_norm": 1.2168879508972168, |
| "learning_rate": 0.0006291416684180662, |
| "loss": 5.0632429122924805, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.23690036900369005, |
| "grad_norm": 0.8020527362823486, |
| "learning_rate": 0.0006286325729969753, |
| "loss": 4.867977142333984, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.23763837638376384, |
| "grad_norm": 0.900245726108551, |
| "learning_rate": 0.0006281218628069054, |
| "loss": 4.880187511444092, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.23837638376383763, |
| "grad_norm": 0.9947592616081238, |
| "learning_rate": 0.0006276095408075927, |
| "loss": 5.083812236785889, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.23911439114391145, |
| "grad_norm": 0.8965998888015747, |
| "learning_rate": 0.0006270956099681148, |
| "loss": 4.908682823181152, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.23985239852398524, |
| "grad_norm": 1.1312414407730103, |
| "learning_rate": 0.0006265800732668727, |
| "loss": 5.093230247497559, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.24059040590405903, |
| "grad_norm": 0.925629734992981, |
| "learning_rate": 0.0006260629336915741, |
| "loss": 4.874239444732666, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.24132841328413285, |
| "grad_norm": 1.307646632194519, |
| "learning_rate": 0.0006255441942392159, |
| "loss": 5.057682514190674, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.24206642066420664, |
| "grad_norm": 1.0218299627304077, |
| "learning_rate": 0.0006250238579160666, |
| "loss": 5.127986907958984, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.24280442804428043, |
| "grad_norm": 0.8645507097244263, |
| "learning_rate": 0.0006245019277376496, |
| "loss": 5.13686466217041, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.24354243542435425, |
| "grad_norm": 0.9296532273292542, |
| "learning_rate": 0.0006239784067287245, |
| "loss": 5.124481678009033, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24428044280442804, |
| "grad_norm": 0.9728212952613831, |
| "learning_rate": 0.0006234532979232711, |
| "loss": 5.0022687911987305, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.24501845018450186, |
| "grad_norm": 0.8225215077400208, |
| "learning_rate": 0.0006229266043644702, |
| "loss": 4.9633378982543945, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.24575645756457565, |
| "grad_norm": 0.960574209690094, |
| "learning_rate": 0.0006223983291046875, |
| "loss": 4.844850540161133, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.24649446494464944, |
| "grad_norm": 0.9003048539161682, |
| "learning_rate": 0.0006218684752054549, |
| "loss": 5.180695056915283, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.24723247232472326, |
| "grad_norm": 0.9519006013870239, |
| "learning_rate": 0.0006213370457374527, |
| "loss": 4.989326477050781, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.24797047970479705, |
| "grad_norm": 0.9743554592132568, |
| "learning_rate": 0.0006208040437804927, |
| "loss": 4.731540679931641, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.24870848708487084, |
| "grad_norm": 0.9855546951293945, |
| "learning_rate": 0.0006202694724234994, |
| "loss": 5.105901718139648, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.24944649446494466, |
| "grad_norm": 1.8261312246322632, |
| "learning_rate": 0.0006197333347644928, |
| "loss": 5.079566478729248, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.25018450184501845, |
| "grad_norm": 1.058996558189392, |
| "learning_rate": 0.0006191956339105701, |
| "loss": 4.985716819763184, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.25092250922509224, |
| "grad_norm": 1.018185019493103, |
| "learning_rate": 0.0006186563729778875, |
| "loss": 4.921426296234131, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.25166051660516603, |
| "grad_norm": 1.154246211051941, |
| "learning_rate": 0.0006181155550916423, |
| "loss": 5.044010162353516, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2523985239852399, |
| "grad_norm": 0.9054587483406067, |
| "learning_rate": 0.0006175731833860554, |
| "loss": 4.953484535217285, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.25313653136531367, |
| "grad_norm": 0.8154107928276062, |
| "learning_rate": 0.0006170292610043523, |
| "loss": 5.044363975524902, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.25387453874538746, |
| "grad_norm": 1.3822500705718994, |
| "learning_rate": 0.0006164837910987449, |
| "loss": 5.227883338928223, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.25461254612546125, |
| "grad_norm": 0.9022698402404785, |
| "learning_rate": 0.000615936776830414, |
| "loss": 4.860300064086914, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.25535055350553504, |
| "grad_norm": 1.0594429969787598, |
| "learning_rate": 0.0006153882213694903, |
| "loss": 5.256074905395508, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.25608856088560883, |
| "grad_norm": 0.9493646025657654, |
| "learning_rate": 0.0006148381278950362, |
| "loss": 4.957509994506836, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2568265682656827, |
| "grad_norm": 1.0270938873291016, |
| "learning_rate": 0.0006142864995950273, |
| "loss": 4.809982776641846, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.25756457564575647, |
| "grad_norm": 1.663167953491211, |
| "learning_rate": 0.0006137333396663342, |
| "loss": 4.888598918914795, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.25830258302583026, |
| "grad_norm": 0.983447253704071, |
| "learning_rate": 0.0006131786513147038, |
| "loss": 5.165590763092041, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.25904059040590405, |
| "grad_norm": 0.980798065662384, |
| "learning_rate": 0.0006126224377547408, |
| "loss": 4.966999053955078, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.25977859778597784, |
| "grad_norm": 1.074250340461731, |
| "learning_rate": 0.0006120647022098887, |
| "loss": 4.936653137207031, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2605166051660517, |
| "grad_norm": 0.9961046576499939, |
| "learning_rate": 0.0006115054479124115, |
| "loss": 5.0761308670043945, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.2612546125461255, |
| "grad_norm": 0.937942385673523, |
| "learning_rate": 0.0006109446781033752, |
| "loss": 4.909850597381592, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.26199261992619927, |
| "grad_norm": 1.0551375150680542, |
| "learning_rate": 0.0006103823960326283, |
| "loss": 4.967006683349609, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.26273062730627306, |
| "grad_norm": 1.0866034030914307, |
| "learning_rate": 0.0006098186049587834, |
| "loss": 5.049051284790039, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.26346863468634685, |
| "grad_norm": 1.0815985202789307, |
| "learning_rate": 0.0006092533081491987, |
| "loss": 4.931700229644775, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.26420664206642064, |
| "grad_norm": 1.0863465070724487, |
| "learning_rate": 0.000608686508879958, |
| "loss": 5.032581329345703, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2649446494464945, |
| "grad_norm": 0.871529757976532, |
| "learning_rate": 0.000608118210435853, |
| "loss": 5.066904544830322, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2656826568265683, |
| "grad_norm": 0.9786545038223267, |
| "learning_rate": 0.0006075484161103631, |
| "loss": 5.073785305023193, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.26642066420664207, |
| "grad_norm": 0.8924750089645386, |
| "learning_rate": 0.000606977129205637, |
| "loss": 4.997740745544434, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.26715867158671586, |
| "grad_norm": 1.49006986618042, |
| "learning_rate": 0.0006064043530324738, |
| "loss": 5.006748676300049, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.26789667896678965, |
| "grad_norm": 1.0208152532577515, |
| "learning_rate": 0.0006058300909103026, |
| "loss": 5.057985305786133, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.2686346863468635, |
| "grad_norm": 0.8836379051208496, |
| "learning_rate": 0.000605254346167165, |
| "loss": 4.999290466308594, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2693726937269373, |
| "grad_norm": 0.8716019988059998, |
| "learning_rate": 0.0006046771221396938, |
| "loss": 5.058474540710449, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2701107011070111, |
| "grad_norm": 1.1286225318908691, |
| "learning_rate": 0.0006040984221730958, |
| "loss": 4.990628719329834, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.27084870848708487, |
| "grad_norm": 1.099913477897644, |
| "learning_rate": 0.0006035182496211308, |
| "loss": 4.981925010681152, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.27158671586715866, |
| "grad_norm": 1.0594391822814941, |
| "learning_rate": 0.0006029366078460929, |
| "loss": 4.859918594360352, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.27232472324723245, |
| "grad_norm": 0.8651653528213501, |
| "learning_rate": 0.0006023535002187907, |
| "loss": 4.930809020996094, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2730627306273063, |
| "grad_norm": 0.9700394868850708, |
| "learning_rate": 0.0006017689301185279, |
| "loss": 4.7630720138549805, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2738007380073801, |
| "grad_norm": 0.9684885740280151, |
| "learning_rate": 0.000601182900933084, |
| "loss": 4.800506114959717, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2745387453874539, |
| "grad_norm": 1.2140804529190063, |
| "learning_rate": 0.0006005954160586941, |
| "loss": 5.034149646759033, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.27527675276752767, |
| "grad_norm": 1.0811138153076172, |
| "learning_rate": 0.0006000064789000295, |
| "loss": 4.837162494659424, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.27601476014760146, |
| "grad_norm": 1.328092098236084, |
| "learning_rate": 0.0005994160928701782, |
| "loss": 5.215338706970215, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.2767527675276753, |
| "grad_norm": 0.9813052415847778, |
| "learning_rate": 0.0005988242613906248, |
| "loss": 5.164502143859863, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2774907749077491, |
| "grad_norm": 1.1087919473648071, |
| "learning_rate": 0.0005982309878912306, |
| "loss": 5.113296031951904, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2782287822878229, |
| "grad_norm": 1.0566635131835938, |
| "learning_rate": 0.000597636275810214, |
| "loss": 4.566821098327637, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2789667896678967, |
| "grad_norm": 1.1309762001037598, |
| "learning_rate": 0.0005970401285941305, |
| "loss": 5.184887886047363, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.27970479704797047, |
| "grad_norm": 1.3037056922912598, |
| "learning_rate": 0.0005964425496978528, |
| "loss": 4.654736042022705, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.28044280442804426, |
| "grad_norm": 1.0882046222686768, |
| "learning_rate": 0.0005958435425845504, |
| "loss": 4.828828811645508, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2811808118081181, |
| "grad_norm": 0.9877819418907166, |
| "learning_rate": 0.0005952431107256698, |
| "loss": 4.909351348876953, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2819188191881919, |
| "grad_norm": 1.0387706756591797, |
| "learning_rate": 0.0005946412576009148, |
| "loss": 4.700501441955566, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2826568265682657, |
| "grad_norm": 0.9511588215827942, |
| "learning_rate": 0.0005940379866982255, |
| "loss": 4.84822940826416, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2833948339483395, |
| "grad_norm": 1.4258911609649658, |
| "learning_rate": 0.0005934333015137585, |
| "loss": 4.82274055480957, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.28413284132841327, |
| "grad_norm": 0.9327899217605591, |
| "learning_rate": 0.0005928272055518667, |
| "loss": 4.844176292419434, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2848708487084871, |
| "grad_norm": 0.9245155453681946, |
| "learning_rate": 0.0005922197023250793, |
| "loss": 5.153466701507568, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2856088560885609, |
| "grad_norm": 1.0576754808425903, |
| "learning_rate": 0.0005916107953540805, |
| "loss": 4.96760368347168, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2863468634686347, |
| "grad_norm": 0.8730959892272949, |
| "learning_rate": 0.0005910004881676898, |
| "loss": 4.808976650238037, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2870848708487085, |
| "grad_norm": 0.8937351107597351, |
| "learning_rate": 0.0005903887843028418, |
| "loss": 4.953003883361816, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.2878228782287823, |
| "grad_norm": 0.9199606776237488, |
| "learning_rate": 0.0005897756873045648, |
| "loss": 5.063399314880371, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.28856088560885607, |
| "grad_norm": 0.9909579753875732, |
| "learning_rate": 0.0005891612007259613, |
| "loss": 4.7940473556518555, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2892988929889299, |
| "grad_norm": 0.9309024214744568, |
| "learning_rate": 0.0005885453281281863, |
| "loss": 4.881161689758301, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2900369003690037, |
| "grad_norm": 1.1199829578399658, |
| "learning_rate": 0.0005879280730804277, |
| "loss": 5.138465404510498, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2907749077490775, |
| "grad_norm": 0.9535595178604126, |
| "learning_rate": 0.000587309439159885, |
| "loss": 4.985682487487793, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2915129151291513, |
| "grad_norm": 0.9754979014396667, |
| "learning_rate": 0.0005866894299517488, |
| "loss": 4.827736854553223, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2922509225092251, |
| "grad_norm": 0.9567784667015076, |
| "learning_rate": 0.0005860680490491798, |
| "loss": 4.916905879974365, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.29298892988929887, |
| "grad_norm": 0.9050018191337585, |
| "learning_rate": 0.0005854453000532884, |
| "loss": 5.034615993499756, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2937269372693727, |
| "grad_norm": 0.8965482115745544, |
| "learning_rate": 0.0005848211865731131, |
| "loss": 4.918941497802734, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2944649446494465, |
| "grad_norm": 0.9906476140022278, |
| "learning_rate": 0.0005841957122256004, |
| "loss": 4.973904609680176, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2952029520295203, |
| "grad_norm": 0.9818461537361145, |
| "learning_rate": 0.0005835688806355835, |
| "loss": 4.993786811828613, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2959409594095941, |
| "grad_norm": 0.99074786901474, |
| "learning_rate": 0.0005829406954357611, |
| "loss": 5.0351457595825195, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2966789667896679, |
| "grad_norm": 0.9858592748641968, |
| "learning_rate": 0.0005823111602666765, |
| "loss": 4.854518413543701, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2974169741697417, |
| "grad_norm": 1.0143122673034668, |
| "learning_rate": 0.0005816802787766969, |
| "loss": 4.9962921142578125, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.2981549815498155, |
| "grad_norm": 0.8965126276016235, |
| "learning_rate": 0.0005810480546219914, |
| "loss": 4.845615386962891, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2988929889298893, |
| "grad_norm": 0.9247124791145325, |
| "learning_rate": 0.0005804144914665105, |
| "loss": 4.576415061950684, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2996309963099631, |
| "grad_norm": 1.0036989450454712, |
| "learning_rate": 0.0005797795929819646, |
| "loss": 4.833454132080078, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.3003690036900369, |
| "grad_norm": 1.1179319620132446, |
| "learning_rate": 0.0005791433628478031, |
| "loss": 5.014064311981201, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.3011070110701107, |
| "grad_norm": 1.1040972471237183, |
| "learning_rate": 0.0005785058047511922, |
| "loss": 4.786684513092041, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.3018450184501845, |
| "grad_norm": 0.9538096785545349, |
| "learning_rate": 0.0005778669223869945, |
| "loss": 4.815490245819092, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.3025830258302583, |
| "grad_norm": 1.1620954275131226, |
| "learning_rate": 0.0005772267194577469, |
| "loss": 4.706133842468262, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3033210332103321, |
| "grad_norm": 1.137211799621582, |
| "learning_rate": 0.0005765851996736397, |
| "loss": 4.959315299987793, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.3040590405904059, |
| "grad_norm": 0.9818885922431946, |
| "learning_rate": 0.0005759423667524947, |
| "loss": 4.72605037689209, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.3047970479704797, |
| "grad_norm": 0.9897336959838867, |
| "learning_rate": 0.0005752982244197436, |
| "loss": 4.857034683227539, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.30553505535055353, |
| "grad_norm": 0.9276419281959534, |
| "learning_rate": 0.0005746527764084068, |
| "loss": 4.825818061828613, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.3062730627306273, |
| "grad_norm": 0.9956037998199463, |
| "learning_rate": 0.0005740060264590714, |
| "loss": 4.663302421569824, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.3070110701107011, |
| "grad_norm": 0.9424338340759277, |
| "learning_rate": 0.00057335797831987, |
| "loss": 4.876203536987305, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.3077490774907749, |
| "grad_norm": 0.9253562092781067, |
| "learning_rate": 0.000572708635746458, |
| "loss": 4.914989471435547, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.3084870848708487, |
| "grad_norm": 1.0256803035736084, |
| "learning_rate": 0.000572058002501993, |
| "loss": 4.6925859451293945, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3092250922509225, |
| "grad_norm": 0.9552437663078308, |
| "learning_rate": 0.0005714060823571126, |
| "loss": 4.923905372619629, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.30996309963099633, |
| "grad_norm": 0.8474723696708679, |
| "learning_rate": 0.0005707528790899117, |
| "loss": 4.9794769287109375, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3107011070110701, |
| "grad_norm": 0.9562621712684631, |
| "learning_rate": 0.0005700983964859219, |
| "loss": 4.790196418762207, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.3114391143911439, |
| "grad_norm": 0.9205673336982727, |
| "learning_rate": 0.000569442638338089, |
| "loss": 4.9875407218933105, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.3121771217712177, |
| "grad_norm": 0.9803183674812317, |
| "learning_rate": 0.0005687856084467509, |
| "loss": 4.777838230133057, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.3129151291512915, |
| "grad_norm": 1.1361783742904663, |
| "learning_rate": 0.0005681273106196154, |
| "loss": 4.891695976257324, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.31365313653136534, |
| "grad_norm": 0.937116265296936, |
| "learning_rate": 0.0005674677486717386, |
| "loss": 4.8178324699401855, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.31439114391143913, |
| "grad_norm": 0.7977975606918335, |
| "learning_rate": 0.000566806926425503, |
| "loss": 4.76682710647583, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3151291512915129, |
| "grad_norm": 1.0328474044799805, |
| "learning_rate": 0.0005661448477105944, |
| "loss": 4.845677852630615, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.3158671586715867, |
| "grad_norm": 0.977131187915802, |
| "learning_rate": 0.0005654815163639804, |
| "loss": 4.799696922302246, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3166051660516605, |
| "grad_norm": 1.0650629997253418, |
| "learning_rate": 0.0005648169362298881, |
| "loss": 4.999658584594727, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3173431734317343, |
| "grad_norm": 1.0764038562774658, |
| "learning_rate": 0.0005641511111597818, |
| "loss": 4.789190292358398, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.31808118081180814, |
| "grad_norm": 0.8251600861549377, |
| "learning_rate": 0.0005634840450123405, |
| "loss": 4.80035400390625, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.31881918819188193, |
| "grad_norm": 0.9509308338165283, |
| "learning_rate": 0.0005628157416534356, |
| "loss": 4.975335597991943, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3195571955719557, |
| "grad_norm": 0.9815534353256226, |
| "learning_rate": 0.000562146204956109, |
| "loss": 4.860112190246582, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.3202952029520295, |
| "grad_norm": 1.0006123781204224, |
| "learning_rate": 0.0005614754388005494, |
| "loss": 4.970834732055664, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.3210332103321033, |
| "grad_norm": 0.9528962969779968, |
| "learning_rate": 0.0005608034470740712, |
| "loss": 4.864804267883301, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.32177121771217715, |
| "grad_norm": 0.9165347218513489, |
| "learning_rate": 0.0005601302336710914, |
| "loss": 4.844846725463867, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.32250922509225094, |
| "grad_norm": 0.8079850673675537, |
| "learning_rate": 0.0005594558024931068, |
| "loss": 4.501960754394531, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.32324723247232473, |
| "grad_norm": 1.0351104736328125, |
| "learning_rate": 0.000558780157448672, |
| "loss": 4.843764305114746, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.3239852398523985, |
| "grad_norm": 1.180903673171997, |
| "learning_rate": 0.0005581033024533757, |
| "loss": 4.818984508514404, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.3247232472324723, |
| "grad_norm": 1.0522313117980957, |
| "learning_rate": 0.0005574252414298192, |
| "loss": 4.750001907348633, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3254612546125461, |
| "grad_norm": 1.0479143857955933, |
| "learning_rate": 0.0005567459783075928, |
| "loss": 4.75580358505249, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.32619926199261995, |
| "grad_norm": 0.9943499565124512, |
| "learning_rate": 0.000556065517023254, |
| "loss": 4.778286933898926, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.32693726937269374, |
| "grad_norm": 1.0374329090118408, |
| "learning_rate": 0.0005553838615203031, |
| "loss": 4.718173027038574, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.32767527675276753, |
| "grad_norm": 0.8165338635444641, |
| "learning_rate": 0.0005547010157491621, |
| "loss": 4.73118257522583, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.3284132841328413, |
| "grad_norm": 0.840587854385376, |
| "learning_rate": 0.0005540169836671505, |
| "loss": 4.625949859619141, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3291512915129151, |
| "grad_norm": 0.9079518914222717, |
| "learning_rate": 0.0005533317692384632, |
| "loss": 4.873010158538818, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.3298892988929889, |
| "grad_norm": 0.9068413376808167, |
| "learning_rate": 0.000552645376434147, |
| "loss": 4.96326208114624, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.33062730627306275, |
| "grad_norm": 0.91420578956604, |
| "learning_rate": 0.0005519578092320779, |
| "loss": 4.897895336151123, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.33136531365313654, |
| "grad_norm": 0.986501932144165, |
| "learning_rate": 0.0005512690716169378, |
| "loss": 5.1402740478515625, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.33210332103321033, |
| "grad_norm": 0.9404821395874023, |
| "learning_rate": 0.0005505791675801916, |
| "loss": 4.783200740814209, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3328413284132841, |
| "grad_norm": 0.9507829546928406, |
| "learning_rate": 0.0005498881011200641, |
| "loss": 4.688559532165527, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3335793357933579, |
| "grad_norm": 0.9329268932342529, |
| "learning_rate": 0.0005491958762415166, |
| "loss": 4.877443790435791, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.33431734317343176, |
| "grad_norm": 1.0837727785110474, |
| "learning_rate": 0.0005485024969562237, |
| "loss": 4.7941789627075195, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.33505535055350555, |
| "grad_norm": 1.0305438041687012, |
| "learning_rate": 0.0005478079672825504, |
| "loss": 4.639592170715332, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.33579335793357934, |
| "grad_norm": 0.8832004070281982, |
| "learning_rate": 0.0005471122912455287, |
| "loss": 4.873642444610596, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.33653136531365313, |
| "grad_norm": 0.9681616425514221, |
| "learning_rate": 0.0005464154728768339, |
| "loss": 4.844632148742676, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3372693726937269, |
| "grad_norm": 0.9066919088363647, |
| "learning_rate": 0.0005457175162147614, |
| "loss": 4.705622673034668, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.3380073800738007, |
| "grad_norm": 0.8449764251708984, |
| "learning_rate": 0.0005450184253042037, |
| "loss": 4.834818363189697, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.33874538745387456, |
| "grad_norm": 1.009404182434082, |
| "learning_rate": 0.0005443182041966266, |
| "loss": 4.893503665924072, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.33948339483394835, |
| "grad_norm": 0.9231082201004028, |
| "learning_rate": 0.0005436168569500456, |
| "loss": 4.946817398071289, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.34022140221402214, |
| "grad_norm": 0.9415441155433655, |
| "learning_rate": 0.0005429143876290025, |
| "loss": 4.941875457763672, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.34095940959409593, |
| "grad_norm": 0.8538420796394348, |
| "learning_rate": 0.0005422108003045423, |
| "loss": 4.770623207092285, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.3416974169741697, |
| "grad_norm": 1.1796035766601562, |
| "learning_rate": 0.0005415060990541887, |
| "loss": 5.057588577270508, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.34243542435424357, |
| "grad_norm": 0.9784668684005737, |
| "learning_rate": 0.0005408002879619213, |
| "loss": 4.873748779296875, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.34317343173431736, |
| "grad_norm": 0.7987930774688721, |
| "learning_rate": 0.0005400933711181515, |
| "loss": 4.990841865539551, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.34391143911439115, |
| "grad_norm": 0.9904403686523438, |
| "learning_rate": 0.0005393853526196988, |
| "loss": 4.766284942626953, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.34464944649446494, |
| "grad_norm": 1.3431742191314697, |
| "learning_rate": 0.0005386762365697678, |
| "loss": 4.805080413818359, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.34538745387453873, |
| "grad_norm": 0.9435102343559265, |
| "learning_rate": 0.0005379660270779224, |
| "loss": 4.853346824645996, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.3461254612546125, |
| "grad_norm": 0.9755591154098511, |
| "learning_rate": 0.0005372547282600649, |
| "loss": 4.719388008117676, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.34686346863468637, |
| "grad_norm": 0.8468083739280701, |
| "learning_rate": 0.0005365423442384097, |
| "loss": 4.8452301025390625, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.34760147601476016, |
| "grad_norm": 0.8244897723197937, |
| "learning_rate": 0.0005358288791414604, |
| "loss": 4.7897844314575195, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.34833948339483395, |
| "grad_norm": 1.2169724702835083, |
| "learning_rate": 0.0005351143371039861, |
| "loss": 4.922556400299072, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.34907749077490774, |
| "grad_norm": 0.7928814888000488, |
| "learning_rate": 0.0005343987222669969, |
| "loss": 4.509468078613281, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.34981549815498153, |
| "grad_norm": 0.8514037132263184, |
| "learning_rate": 0.0005336820387777202, |
| "loss": 4.7827959060668945, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.3505535055350554, |
| "grad_norm": 1.0094245672225952, |
| "learning_rate": 0.0005329642907895766, |
| "loss": 4.922459602355957, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.35129151291512917, |
| "grad_norm": 0.8496381044387817, |
| "learning_rate": 0.0005322454824621558, |
| "loss": 4.833901405334473, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.35202952029520296, |
| "grad_norm": 0.9164729714393616, |
| "learning_rate": 0.0005315256179611926, |
| "loss": 4.579873085021973, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.35276752767527675, |
| "grad_norm": 0.9636603593826294, |
| "learning_rate": 0.0005308047014585427, |
| "loss": 4.682124614715576, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.35350553505535054, |
| "grad_norm": 0.8201807141304016, |
| "learning_rate": 0.000530082737132158, |
| "loss": 4.792829513549805, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.35424354243542433, |
| "grad_norm": 1.1766455173492432, |
| "learning_rate": 0.0005293597291660638, |
| "loss": 4.957970142364502, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3549815498154982, |
| "grad_norm": 1.2056677341461182, |
| "learning_rate": 0.0005286356817503329, |
| "loss": 4.584798812866211, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.35571955719557197, |
| "grad_norm": 0.9210802316665649, |
| "learning_rate": 0.0005279105990810624, |
| "loss": 4.629232406616211, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.35645756457564576, |
| "grad_norm": 0.9124312400817871, |
| "learning_rate": 0.0005271844853603489, |
| "loss": 4.6753435134887695, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.35719557195571955, |
| "grad_norm": 0.9933983087539673, |
| "learning_rate": 0.0005264573447962644, |
| "loss": 4.6301984786987305, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.35793357933579334, |
| "grad_norm": 0.93276047706604, |
| "learning_rate": 0.0005257291816028317, |
| "loss": 4.541720390319824, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3586715867158672, |
| "grad_norm": 1.028709053993225, |
| "learning_rate": 0.000525, |
| "loss": 4.660837650299072, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.359409594095941, |
| "grad_norm": 0.935407817363739, |
| "learning_rate": 0.0005242698042136208, |
| "loss": 4.810506820678711, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.36014760147601477, |
| "grad_norm": 0.9050341844558716, |
| "learning_rate": 0.000523538598475423, |
| "loss": 4.896993637084961, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.36088560885608856, |
| "grad_norm": 0.9780540466308594, |
| "learning_rate": 0.0005228063870229883, |
| "loss": 4.808036804199219, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.36162361623616235, |
| "grad_norm": 1.107608675956726, |
| "learning_rate": 0.0005220731740997273, |
| "loss": 4.784989833831787, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.36236162361623614, |
| "grad_norm": 1.0185608863830566, |
| "learning_rate": 0.0005213389639548539, |
| "loss": 4.635310173034668, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.36309963099631, |
| "grad_norm": 0.9982399940490723, |
| "learning_rate": 0.0005206037608433617, |
| "loss": 4.810551643371582, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3638376383763838, |
| "grad_norm": 0.7861829400062561, |
| "learning_rate": 0.0005198675690259988, |
| "loss": 4.704036712646484, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.36457564575645757, |
| "grad_norm": 0.935389518737793, |
| "learning_rate": 0.0005191303927692428, |
| "loss": 5.006328582763672, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.36531365313653136, |
| "grad_norm": 0.9794636368751526, |
| "learning_rate": 0.0005183922363452768, |
| "loss": 4.736790180206299, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.36605166051660515, |
| "grad_norm": 0.915691614151001, |
| "learning_rate": 0.0005176531040319643, |
| "loss": 4.851039409637451, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.36678966789667894, |
| "grad_norm": 1.203650712966919, |
| "learning_rate": 0.0005169130001128246, |
| "loss": 4.811964988708496, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3675276752767528, |
| "grad_norm": 1.001997947692871, |
| "learning_rate": 0.000516171928877007, |
| "loss": 4.62536096572876, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.3682656826568266, |
| "grad_norm": 0.9129559993743896, |
| "learning_rate": 0.0005154298946192679, |
| "loss": 4.895463466644287, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.36900369003690037, |
| "grad_norm": 0.9465571045875549, |
| "learning_rate": 0.0005146869016399432, |
| "loss": 4.418019771575928, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.36974169741697416, |
| "grad_norm": 1.1695398092269897, |
| "learning_rate": 0.0005139429542449265, |
| "loss": 4.949154376983643, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.37047970479704795, |
| "grad_norm": 0.9523534774780273, |
| "learning_rate": 0.0005131980567456417, |
| "loss": 4.633477687835693, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3712177121771218, |
| "grad_norm": 0.9152795076370239, |
| "learning_rate": 0.0005124522134590188, |
| "loss": 4.966670989990234, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3719557195571956, |
| "grad_norm": 1.2700462341308594, |
| "learning_rate": 0.0005117054287074694, |
| "loss": 4.756679534912109, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3726937269372694, |
| "grad_norm": 1.0250760316848755, |
| "learning_rate": 0.0005109577068188609, |
| "loss": 5.008725166320801, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.37343173431734317, |
| "grad_norm": 1.0058673620224, |
| "learning_rate": 0.0005102090521264917, |
| "loss": 4.794961452484131, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.37416974169741696, |
| "grad_norm": 0.9521406292915344, |
| "learning_rate": 0.0005094594689690664, |
| "loss": 4.621726989746094, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.37490774907749075, |
| "grad_norm": 1.1385680437088013, |
| "learning_rate": 0.0005087089616906701, |
| "loss": 4.789394378662109, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3756457564575646, |
| "grad_norm": 1.1471185684204102, |
| "learning_rate": 0.0005079575346407434, |
| "loss": 4.895359039306641, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3763837638376384, |
| "grad_norm": 0.9710941910743713, |
| "learning_rate": 0.0005072051921740577, |
| "loss": 4.706118583679199, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3771217712177122, |
| "grad_norm": 1.0084307193756104, |
| "learning_rate": 0.0005064519386506892, |
| "loss": 4.653249740600586, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.37785977859778597, |
| "grad_norm": 0.8812188506126404, |
| "learning_rate": 0.000505697778435994, |
| "loss": 4.762184143066406, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.37859778597785976, |
| "grad_norm": 0.902651846408844, |
| "learning_rate": 0.0005049427159005829, |
| "loss": 4.499927520751953, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.3793357933579336, |
| "grad_norm": 0.9034081697463989, |
| "learning_rate": 0.000504186755420296, |
| "loss": 4.713489055633545, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3800738007380074, |
| "grad_norm": 0.9847108721733093, |
| "learning_rate": 0.000503429901376177, |
| "loss": 4.567604064941406, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.3808118081180812, |
| "grad_norm": 1.009543776512146, |
| "learning_rate": 0.0005026721581544485, |
| "loss": 4.737997055053711, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.381549815498155, |
| "grad_norm": 0.8869209289550781, |
| "learning_rate": 0.0005019135301464861, |
| "loss": 4.873485565185547, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.38228782287822877, |
| "grad_norm": 1.083253026008606, |
| "learning_rate": 0.0005011540217487924, |
| "loss": 4.698840618133545, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.38302583025830256, |
| "grad_norm": 0.9394760131835938, |
| "learning_rate": 0.0005003936373629732, |
| "loss": 4.629981994628906, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3837638376383764, |
| "grad_norm": 0.9423143863677979, |
| "learning_rate": 0.00049963238139571, |
| "loss": 4.612939834594727, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3845018450184502, |
| "grad_norm": 0.9476136565208435, |
| "learning_rate": 0.000498870258258736, |
| "loss": 4.849869728088379, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.385239852398524, |
| "grad_norm": 0.9509242177009583, |
| "learning_rate": 0.0004981072723688098, |
| "loss": 4.818325996398926, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3859778597785978, |
| "grad_norm": 0.834679901599884, |
| "learning_rate": 0.0004973434281476899, |
| "loss": 4.750872611999512, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.38671586715867157, |
| "grad_norm": 0.799146831035614, |
| "learning_rate": 0.0004965787300221089, |
| "loss": 4.632112503051758, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3874538745387454, |
| "grad_norm": 0.8336266875267029, |
| "learning_rate": 0.0004958131824237484, |
| "loss": 4.630362510681152, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3881918819188192, |
| "grad_norm": 0.9787878394126892, |
| "learning_rate": 0.0004950467897892132, |
| "loss": 4.63228702545166, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.388929889298893, |
| "grad_norm": 1.0535902976989746, |
| "learning_rate": 0.0004942795565600044, |
| "loss": 4.849504470825195, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3896678966789668, |
| "grad_norm": 0.953353226184845, |
| "learning_rate": 0.0004935114871824956, |
| "loss": 4.9335222244262695, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.3904059040590406, |
| "grad_norm": 0.9515429735183716, |
| "learning_rate": 0.0004927425861079057, |
| "loss": 4.6670451164245605, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.39114391143911437, |
| "grad_norm": 1.0168793201446533, |
| "learning_rate": 0.0004919728577922739, |
| "loss": 4.654256820678711, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3918819188191882, |
| "grad_norm": 0.9733904600143433, |
| "learning_rate": 0.000491202306696433, |
| "loss": 4.637208938598633, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.392619926199262, |
| "grad_norm": 0.9687494039535522, |
| "learning_rate": 0.0004904309372859844, |
| "loss": 4.994683742523193, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3933579335793358, |
| "grad_norm": 1.066312313079834, |
| "learning_rate": 0.0004896587540312722, |
| "loss": 4.801863670349121, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.3940959409594096, |
| "grad_norm": 0.9010938405990601, |
| "learning_rate": 0.0004888857614073565, |
| "loss": 4.627843856811523, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3948339483394834, |
| "grad_norm": 0.9718567728996277, |
| "learning_rate": 0.00048811196389398823, |
| "loss": 4.693809509277344, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3955719557195572, |
| "grad_norm": 1.1631206274032593, |
| "learning_rate": 0.00048733736597558264, |
| "loss": 4.649688720703125, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.396309963099631, |
| "grad_norm": 1.0241073369979858, |
| "learning_rate": 0.0004865619721411941, |
| "loss": 4.654960632324219, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3970479704797048, |
| "grad_norm": 0.9926833510398865, |
| "learning_rate": 0.0004857857868844891, |
| "loss": 4.601881504058838, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3977859778597786, |
| "grad_norm": 1.0237977504730225, |
| "learning_rate": 0.0004850088147037211, |
| "loss": 4.73300838470459, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.3985239852398524, |
| "grad_norm": 1.060038685798645, |
| "learning_rate": 0.0004842310601017036, |
| "loss": 4.862484931945801, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3992619926199262, |
| "grad_norm": 1.2825253009796143, |
| "learning_rate": 0.00048345252758578484, |
| "loss": 4.497199058532715, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.9599003791809082, |
| "learning_rate": 0.00048267322166782123, |
| "loss": 4.726795673370361, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.4007380073800738, |
| "grad_norm": 0.9577689170837402, |
| "learning_rate": 0.0004818931468641511, |
| "loss": 4.560695648193359, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.4014760147601476, |
| "grad_norm": 0.9004948139190674, |
| "learning_rate": 0.0004811123076955693, |
| "loss": 4.900054931640625, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.4022140221402214, |
| "grad_norm": 1.0973906517028809, |
| "learning_rate": 0.0004803307086872996, |
| "loss": 4.605217933654785, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4029520295202952, |
| "grad_norm": 0.9591420888900757, |
| "learning_rate": 0.0004795483543689701, |
| "loss": 4.580148696899414, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.40369003690036903, |
| "grad_norm": 0.9317168593406677, |
| "learning_rate": 0.00047876524927458554, |
| "loss": 4.676855087280273, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.4044280442804428, |
| "grad_norm": 0.8841069936752319, |
| "learning_rate": 0.0004779813979425022, |
| "loss": 4.510927677154541, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.4051660516605166, |
| "grad_norm": 0.8826556205749512, |
| "learning_rate": 0.0004771968049154005, |
| "loss": 4.688409805297852, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.4059040590405904, |
| "grad_norm": 1.0118300914764404, |
| "learning_rate": 0.00047641147474025973, |
| "loss": 4.612986087799072, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4066420664206642, |
| "grad_norm": 0.7752644419670105, |
| "learning_rate": 0.00047562541196833106, |
| "loss": 4.80881929397583, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.407380073800738, |
| "grad_norm": 0.8387100100517273, |
| "learning_rate": 0.000474838621155111, |
| "loss": 4.728845596313477, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.40811808118081183, |
| "grad_norm": 0.8829529285430908, |
| "learning_rate": 0.00047405110686031575, |
| "loss": 4.959627151489258, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4088560885608856, |
| "grad_norm": 0.8168521523475647, |
| "learning_rate": 0.000473262873647854, |
| "loss": 4.744089603424072, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.4095940959409594, |
| "grad_norm": 0.8763787150382996, |
| "learning_rate": 0.000472473926085801, |
| "loss": 4.78373908996582, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.4103321033210332, |
| "grad_norm": 0.8329116106033325, |
| "learning_rate": 0.00047168426874637167, |
| "loss": 4.739480018615723, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.411070110701107, |
| "grad_norm": 0.8480055332183838, |
| "learning_rate": 0.0004708939062058946, |
| "loss": 4.604061126708984, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4118081180811808, |
| "grad_norm": 0.8095789551734924, |
| "learning_rate": 0.0004701028430447852, |
| "loss": 4.522249221801758, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.41254612546125463, |
| "grad_norm": 1.0202033519744873, |
| "learning_rate": 0.00046931108384751897, |
| "loss": 4.50852632522583, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.4132841328413284, |
| "grad_norm": 1.0811773538589478, |
| "learning_rate": 0.00046851863320260544, |
| "loss": 4.552791118621826, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4140221402214022, |
| "grad_norm": 1.1033447980880737, |
| "learning_rate": 0.00046772549570256125, |
| "loss": 4.458186149597168, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.414760147601476, |
| "grad_norm": 0.8855783939361572, |
| "learning_rate": 0.00046693167594388357, |
| "loss": 4.8609724044799805, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4154981549815498, |
| "grad_norm": 0.8844220042228699, |
| "learning_rate": 0.00046613717852702345, |
| "loss": 4.472495079040527, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.41623616236162364, |
| "grad_norm": 1.000057339668274, |
| "learning_rate": 0.0004653420080563592, |
| "loss": 4.571652412414551, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.41697416974169743, |
| "grad_norm": 1.2004189491271973, |
| "learning_rate": 0.0004645461691401697, |
| "loss": 4.222049713134766, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.4177121771217712, |
| "grad_norm": 0.891960859298706, |
| "learning_rate": 0.0004637496663906077, |
| "loss": 4.547060966491699, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.418450184501845, |
| "grad_norm": 0.895974338054657, |
| "learning_rate": 0.0004629525044236733, |
| "loss": 4.556779861450195, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.4191881918819188, |
| "grad_norm": 0.9765421152114868, |
| "learning_rate": 0.0004621546878591865, |
| "loss": 4.732317924499512, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.4199261992619926, |
| "grad_norm": 0.8740888237953186, |
| "learning_rate": 0.00046135622132076153, |
| "loss": 4.561002731323242, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.42066420664206644, |
| "grad_norm": 0.768431544303894, |
| "learning_rate": 0.00046055710943577896, |
| "loss": 4.428035259246826, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.42140221402214023, |
| "grad_norm": 0.9561269879341125, |
| "learning_rate": 0.0004597573568353595, |
| "loss": 4.324114799499512, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.422140221402214, |
| "grad_norm": 0.9126472473144531, |
| "learning_rate": 0.00045895696815433687, |
| "loss": 4.664113521575928, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.4228782287822878, |
| "grad_norm": 0.8882591128349304, |
| "learning_rate": 0.0004581559480312316, |
| "loss": 4.339204788208008, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.4236162361623616, |
| "grad_norm": 1.081982135772705, |
| "learning_rate": 0.00045735430110822303, |
| "loss": 4.641040802001953, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.42435424354243545, |
| "grad_norm": 0.7895275950431824, |
| "learning_rate": 0.0004565520320311235, |
| "loss": 4.488674163818359, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.42509225092250924, |
| "grad_norm": 0.9767966866493225, |
| "learning_rate": 0.0004557491454493504, |
| "loss": 5.026608943939209, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.42583025830258303, |
| "grad_norm": 0.8868175148963928, |
| "learning_rate": 0.0004549456460159004, |
| "loss": 4.576347351074219, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.4265682656826568, |
| "grad_norm": 0.8501465320587158, |
| "learning_rate": 0.00045414153838732135, |
| "loss": 4.619839668273926, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.4273062730627306, |
| "grad_norm": 0.8614507913589478, |
| "learning_rate": 0.00045333682722368597, |
| "loss": 4.661761283874512, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4280442804428044, |
| "grad_norm": 1.0277959108352661, |
| "learning_rate": 0.0004525315171885648, |
| "loss": 4.562242031097412, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.42878228782287825, |
| "grad_norm": 0.9864504933357239, |
| "learning_rate": 0.00045172561294899884, |
| "loss": 4.4832258224487305, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.42952029520295204, |
| "grad_norm": 0.8841885924339294, |
| "learning_rate": 0.0004509191191754728, |
| "loss": 4.594321250915527, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.43025830258302583, |
| "grad_norm": 0.8487964272499084, |
| "learning_rate": 0.00045011204054188784, |
| "loss": 4.805062294006348, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.4309963099630996, |
| "grad_norm": 1.027441143989563, |
| "learning_rate": 0.0004493043817255347, |
| "loss": 4.6832685470581055, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4317343173431734, |
| "grad_norm": 0.9376983046531677, |
| "learning_rate": 0.0004484961474070665, |
| "loss": 4.687745094299316, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.43247232472324726, |
| "grad_norm": 0.927667498588562, |
| "learning_rate": 0.00044768734227047146, |
| "loss": 4.67139196395874, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.43321033210332105, |
| "grad_norm": 0.8729023933410645, |
| "learning_rate": 0.00044687797100304596, |
| "loss": 4.648367404937744, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.43394833948339484, |
| "grad_norm": 0.9207971692085266, |
| "learning_rate": 0.0004460680382953672, |
| "loss": 4.687824249267578, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.43468634686346863, |
| "grad_norm": 0.8276870846748352, |
| "learning_rate": 0.00044525754884126634, |
| "loss": 4.622544288635254, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.4354243542435424, |
| "grad_norm": 0.9223991632461548, |
| "learning_rate": 0.0004444465073378007, |
| "loss": 4.5522003173828125, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4361623616236162, |
| "grad_norm": 1.1231549978256226, |
| "learning_rate": 0.00044363491848522737, |
| "loss": 4.543008804321289, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.43690036900369006, |
| "grad_norm": 0.867957592010498, |
| "learning_rate": 0.00044282278698697504, |
| "loss": 4.716594219207764, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.43763837638376385, |
| "grad_norm": 0.7886962890625, |
| "learning_rate": 0.0004420101175496176, |
| "loss": 4.6924920082092285, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.43837638376383764, |
| "grad_norm": 0.8600431680679321, |
| "learning_rate": 0.00044119691488284644, |
| "loss": 4.623996257781982, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.43911439114391143, |
| "grad_norm": 0.8535895347595215, |
| "learning_rate": 0.0004403831836994428, |
| "loss": 4.559450149536133, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.4398523985239852, |
| "grad_norm": 0.8784546256065369, |
| "learning_rate": 0.00043956892871525123, |
| "loss": 4.410243988037109, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.44059040590405907, |
| "grad_norm": 0.9997196197509766, |
| "learning_rate": 0.0004387541546491518, |
| "loss": 4.677160739898682, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.44132841328413286, |
| "grad_norm": 0.9354564547538757, |
| "learning_rate": 0.000437938866223033, |
| "loss": 4.577181816101074, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.44206642066420665, |
| "grad_norm": 0.8507137298583984, |
| "learning_rate": 0.00043712306816176365, |
| "loss": 4.933267593383789, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.44280442804428044, |
| "grad_norm": 0.7964354753494263, |
| "learning_rate": 0.0004363067651931667, |
| "loss": 4.742018222808838, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.44354243542435423, |
| "grad_norm": 0.8398452997207642, |
| "learning_rate": 0.0004354899620479909, |
| "loss": 4.496376991271973, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.444280442804428, |
| "grad_norm": 0.8302538990974426, |
| "learning_rate": 0.00043467266345988365, |
| "loss": 4.4834885597229, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.44501845018450187, |
| "grad_norm": 0.8685540556907654, |
| "learning_rate": 0.00043385487416536397, |
| "loss": 4.598426342010498, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.44575645756457566, |
| "grad_norm": 1.008470892906189, |
| "learning_rate": 0.0004330365989037941, |
| "loss": 4.579464912414551, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.44649446494464945, |
| "grad_norm": 0.9266964793205261, |
| "learning_rate": 0.00043221784241735315, |
| "loss": 4.776824474334717, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.44723247232472324, |
| "grad_norm": 0.8900343775749207, |
| "learning_rate": 0.00043139860945100864, |
| "loss": 4.573504447937012, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.44797047970479703, |
| "grad_norm": 0.9872782826423645, |
| "learning_rate": 0.0004305789047524901, |
| "loss": 4.563179969787598, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.4487084870848708, |
| "grad_norm": 0.8987732529640198, |
| "learning_rate": 0.00042975873307226, |
| "loss": 4.483942031860352, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.44944649446494467, |
| "grad_norm": 0.9567626714706421, |
| "learning_rate": 0.000428938099163488, |
| "loss": 4.630576133728027, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.45018450184501846, |
| "grad_norm": 0.8059940934181213, |
| "learning_rate": 0.000428117007782022, |
| "loss": 4.429983615875244, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.45092250922509225, |
| "grad_norm": 0.8970604538917542, |
| "learning_rate": 0.0004272954636863613, |
| "loss": 4.672665596008301, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.45166051660516604, |
| "grad_norm": 0.9387950301170349, |
| "learning_rate": 0.0004264734716376287, |
| "loss": 4.554316520690918, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.45239852398523983, |
| "grad_norm": 0.8920540809631348, |
| "learning_rate": 0.0004256510363995433, |
| "loss": 4.600342750549316, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.4531365313653137, |
| "grad_norm": 1.0435482263565063, |
| "learning_rate": 0.0004248281627383923, |
| "loss": 4.5729475021362305, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.45387453874538747, |
| "grad_norm": 0.8200010657310486, |
| "learning_rate": 0.0004240048554230039, |
| "loss": 4.369121551513672, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.45461254612546126, |
| "grad_norm": 0.9972869157791138, |
| "learning_rate": 0.0004231811192247195, |
| "loss": 4.570677757263184, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.45535055350553505, |
| "grad_norm": 0.9263824224472046, |
| "learning_rate": 0.00042235695891736585, |
| "loss": 4.355930328369141, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.45608856088560884, |
| "grad_norm": 1.002906084060669, |
| "learning_rate": 0.00042153237927722775, |
| "loss": 4.620849609375, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.45682656826568263, |
| "grad_norm": 0.9105566143989563, |
| "learning_rate": 0.00042070738508302003, |
| "loss": 4.353985786437988, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.4575645756457565, |
| "grad_norm": 0.8016074895858765, |
| "learning_rate": 0.0004198819811158601, |
| "loss": 4.468338966369629, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.45830258302583027, |
| "grad_norm": 0.8135733604431152, |
| "learning_rate": 0.00041905617215924, |
| "loss": 4.608132362365723, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.45904059040590406, |
| "grad_norm": 0.9293224215507507, |
| "learning_rate": 0.00041822996299899906, |
| "loss": 4.565390586853027, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.45977859778597785, |
| "grad_norm": 1.1056631803512573, |
| "learning_rate": 0.00041740335842329566, |
| "loss": 4.949249267578125, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.46051660516605164, |
| "grad_norm": 0.840045154094696, |
| "learning_rate": 0.00041657636322257993, |
| "loss": 4.710245609283447, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4612546125461255, |
| "grad_norm": 0.9296345710754395, |
| "learning_rate": 0.0004157489821895657, |
| "loss": 4.73885440826416, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4619926199261993, |
| "grad_norm": 0.8654890656471252, |
| "learning_rate": 0.0004149212201192029, |
| "loss": 4.420188903808594, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.46273062730627307, |
| "grad_norm": 1.0963070392608643, |
| "learning_rate": 0.0004140930818086497, |
| "loss": 4.5778985023498535, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.46346863468634686, |
| "grad_norm": 0.8319039940834045, |
| "learning_rate": 0.00041326457205724445, |
| "loss": 4.544205188751221, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.46420664206642065, |
| "grad_norm": 0.9679455757141113, |
| "learning_rate": 0.0004124356956664786, |
| "loss": 4.58363151550293, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.46494464944649444, |
| "grad_norm": 0.9498420357704163, |
| "learning_rate": 0.00041160645743996803, |
| "loss": 4.450014114379883, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4656826568265683, |
| "grad_norm": 0.8234408497810364, |
| "learning_rate": 0.0004107768621834257, |
| "loss": 4.5670857429504395, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.4664206642066421, |
| "grad_norm": 1.0177075862884521, |
| "learning_rate": 0.0004099469147046336, |
| "loss": 4.445223808288574, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.46715867158671587, |
| "grad_norm": 0.7691503167152405, |
| "learning_rate": 0.0004091166198134151, |
| "loss": 4.425694465637207, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.46789667896678966, |
| "grad_norm": 0.966654896736145, |
| "learning_rate": 0.00040828598232160696, |
| "loss": 4.650933265686035, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.46863468634686345, |
| "grad_norm": 1.0035220384597778, |
| "learning_rate": 0.0004074550070430312, |
| "loss": 4.69790506362915, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.4693726937269373, |
| "grad_norm": 0.8333247900009155, |
| "learning_rate": 0.0004066236987934677, |
| "loss": 4.4094438552856445, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.4701107011070111, |
| "grad_norm": 0.9272137880325317, |
| "learning_rate": 0.0004057920623906257, |
| "loss": 4.437854766845703, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.4708487084870849, |
| "grad_norm": 0.9257310628890991, |
| "learning_rate": 0.0004049601026541166, |
| "loss": 4.607282638549805, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.47158671586715867, |
| "grad_norm": 0.9032636880874634, |
| "learning_rate": 0.0004041278244054253, |
| "loss": 4.529732704162598, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.47232472324723246, |
| "grad_norm": 0.8978585600852966, |
| "learning_rate": 0.0004032952324678826, |
| "loss": 4.577826499938965, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.47306273062730625, |
| "grad_norm": 0.9967110753059387, |
| "learning_rate": 0.0004024623316666376, |
| "loss": 4.280439376831055, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.4738007380073801, |
| "grad_norm": 0.9799245595932007, |
| "learning_rate": 0.00040162912682862884, |
| "loss": 4.567631721496582, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.4745387453874539, |
| "grad_norm": 0.9125800728797913, |
| "learning_rate": 0.00040079562278255726, |
| "loss": 4.556615352630615, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.4752767527675277, |
| "grad_norm": 0.8560841679573059, |
| "learning_rate": 0.00039996182435885744, |
| "loss": 4.567816734313965, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.47601476014760147, |
| "grad_norm": 0.8384515643119812, |
| "learning_rate": 0.00039912773638967053, |
| "loss": 4.32409143447876, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.47675276752767526, |
| "grad_norm": 0.9469295144081116, |
| "learning_rate": 0.0003982933637088151, |
| "loss": 4.505819797515869, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4774907749077491, |
| "grad_norm": 0.8418838381767273, |
| "learning_rate": 0.0003974587111517601, |
| "loss": 4.288963317871094, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4782287822878229, |
| "grad_norm": 0.9017887711524963, |
| "learning_rate": 0.00039662378355559636, |
| "loss": 4.349027633666992, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4789667896678967, |
| "grad_norm": 0.9656051993370056, |
| "learning_rate": 0.00039578858575900857, |
| "loss": 4.5458478927612305, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4797047970479705, |
| "grad_norm": 0.8434630632400513, |
| "learning_rate": 0.0003949531226022474, |
| "loss": 4.536887168884277, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.48044280442804427, |
| "grad_norm": 0.8146916627883911, |
| "learning_rate": 0.0003941173989271013, |
| "loss": 4.554960250854492, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.48118081180811806, |
| "grad_norm": 0.8592056035995483, |
| "learning_rate": 0.0003932814195768687, |
| "loss": 4.47853422164917, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4819188191881919, |
| "grad_norm": 0.8136284351348877, |
| "learning_rate": 0.0003924451893963294, |
| "loss": 4.614603042602539, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4826568265682657, |
| "grad_norm": 0.8898813724517822, |
| "learning_rate": 0.0003916087132317173, |
| "loss": 4.604781150817871, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4833948339483395, |
| "grad_norm": 0.9451072216033936, |
| "learning_rate": 0.0003907719959306915, |
| "loss": 4.379412651062012, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.4841328413284133, |
| "grad_norm": 1.0912781953811646, |
| "learning_rate": 0.0003899350423423087, |
| "loss": 4.53802490234375, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.48487084870848707, |
| "grad_norm": 0.9553581476211548, |
| "learning_rate": 0.0003890978573169949, |
| "loss": 4.305476188659668, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.48560885608856086, |
| "grad_norm": 0.942167341709137, |
| "learning_rate": 0.00038826044570651756, |
| "loss": 4.399786949157715, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4863468634686347, |
| "grad_norm": 0.9437850117683411, |
| "learning_rate": 0.00038742281236395703, |
| "loss": 4.361236572265625, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4870848708487085, |
| "grad_norm": 0.9068073034286499, |
| "learning_rate": 0.00038658496214367873, |
| "loss": 4.441727638244629, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4878228782287823, |
| "grad_norm": 0.9844712615013123, |
| "learning_rate": 0.00038574689990130513, |
| "loss": 4.4561309814453125, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4885608856088561, |
| "grad_norm": 0.8944956064224243, |
| "learning_rate": 0.00038490863049368704, |
| "loss": 4.5960493087768555, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.48929889298892987, |
| "grad_norm": 0.8984336853027344, |
| "learning_rate": 0.0003840701587788765, |
| "loss": 4.440349578857422, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4900369003690037, |
| "grad_norm": 1.0019009113311768, |
| "learning_rate": 0.0003832314896160973, |
| "loss": 4.5855865478515625, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.4907749077490775, |
| "grad_norm": 0.949760913848877, |
| "learning_rate": 0.00038239262786571787, |
| "loss": 4.4265828132629395, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4915129151291513, |
| "grad_norm": 1.0857264995574951, |
| "learning_rate": 0.0003815535783892229, |
| "loss": 4.488886833190918, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4922509225092251, |
| "grad_norm": 1.0607296228408813, |
| "learning_rate": 0.00038071434604918463, |
| "loss": 4.221587657928467, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.4929889298892989, |
| "grad_norm": 1.056148648262024, |
| "learning_rate": 0.0003798749357092352, |
| "loss": 4.554340362548828, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.49372693726937267, |
| "grad_norm": 0.8420839309692383, |
| "learning_rate": 0.00037903535223403855, |
| "loss": 4.401950359344482, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4944649446494465, |
| "grad_norm": 0.8287214040756226, |
| "learning_rate": 0.00037819560048926173, |
| "loss": 4.45570182800293, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4952029520295203, |
| "grad_norm": 1.0356512069702148, |
| "learning_rate": 0.000377355685341547, |
| "loss": 4.568255424499512, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4959409594095941, |
| "grad_norm": 0.9578806161880493, |
| "learning_rate": 0.0003765156116584837, |
| "loss": 4.606746673583984, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4966789667896679, |
| "grad_norm": 0.8309308886528015, |
| "learning_rate": 0.00037567538430857976, |
| "loss": 4.480656147003174, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4974169741697417, |
| "grad_norm": 0.9627919793128967, |
| "learning_rate": 0.0003748350081612339, |
| "loss": 4.540738105773926, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.4981549815498155, |
| "grad_norm": 0.7901818752288818, |
| "learning_rate": 0.00037399448808670706, |
| "loss": 4.378629684448242, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4988929889298893, |
| "grad_norm": 1.1135075092315674, |
| "learning_rate": 0.0003731538289560941, |
| "loss": 4.591548442840576, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.4996309963099631, |
| "grad_norm": 0.8672391772270203, |
| "learning_rate": 0.0003723130356412962, |
| "loss": 4.584698677062988, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.5003690036900369, |
| "grad_norm": 0.879558265209198, |
| "learning_rate": 0.00037147211301499176, |
| "loss": 4.36656379699707, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.5011070110701107, |
| "grad_norm": 0.8770106434822083, |
| "learning_rate": 0.0003706310659506087, |
| "loss": 4.566497802734375, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5018450184501845, |
| "grad_norm": 0.8778314590454102, |
| "learning_rate": 0.0003697898993222961, |
| "loss": 4.343081474304199, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5025830258302583, |
| "grad_norm": 0.9130513668060303, |
| "learning_rate": 0.00036894861800489614, |
| "loss": 4.3984694480896, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5033210332103321, |
| "grad_norm": 0.8704879879951477, |
| "learning_rate": 0.00036810722687391544, |
| "loss": 4.561816215515137, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.5040590405904058, |
| "grad_norm": 1.010489821434021, |
| "learning_rate": 0.00036726573080549704, |
| "loss": 4.25577449798584, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5047970479704798, |
| "grad_norm": 0.9569144248962402, |
| "learning_rate": 0.0003664241346763924, |
| "loss": 4.4627227783203125, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5055350553505535, |
| "grad_norm": 0.8847797513008118, |
| "learning_rate": 0.00036558244336393236, |
| "loss": 4.437929153442383, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5062730627306273, |
| "grad_norm": 0.8487216830253601, |
| "learning_rate": 0.00036474066174599986, |
| "loss": 4.435924053192139, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5070110701107011, |
| "grad_norm": 0.8881345391273499, |
| "learning_rate": 0.00036389879470100095, |
| "loss": 4.873279094696045, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5077490774907749, |
| "grad_norm": 0.8549903035163879, |
| "learning_rate": 0.00036305684710783684, |
| "loss": 4.272536754608154, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5084870848708487, |
| "grad_norm": 0.906299889087677, |
| "learning_rate": 0.0003622148238458754, |
| "loss": 4.555997848510742, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5092250922509225, |
| "grad_norm": 0.922178328037262, |
| "learning_rate": 0.0003613727297949232, |
| "loss": 4.573604583740234, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5099630996309963, |
| "grad_norm": 0.8890010118484497, |
| "learning_rate": 0.00036053056983519706, |
| "loss": 4.512640953063965, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.5107011070110701, |
| "grad_norm": 0.8093462586402893, |
| "learning_rate": 0.00035968834884729555, |
| "loss": 4.304255485534668, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.5114391143911439, |
| "grad_norm": 0.9470013380050659, |
| "learning_rate": 0.00035884607171217126, |
| "loss": 4.261716365814209, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5121771217712177, |
| "grad_norm": 1.0242949724197388, |
| "learning_rate": 0.0003580037433111018, |
| "loss": 4.365228652954102, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5129151291512916, |
| "grad_norm": 0.8128859996795654, |
| "learning_rate": 0.0003571613685256623, |
| "loss": 4.409188270568848, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5136531365313654, |
| "grad_norm": 0.9906793236732483, |
| "learning_rate": 0.00035631895223769614, |
| "loss": 4.144466876983643, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5143911439114391, |
| "grad_norm": 0.8540819883346558, |
| "learning_rate": 0.0003554764993292878, |
| "loss": 4.1609907150268555, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5151291512915129, |
| "grad_norm": 0.8967404365539551, |
| "learning_rate": 0.00035463401468273365, |
| "loss": 4.335708141326904, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5158671586715867, |
| "grad_norm": 0.9019063115119934, |
| "learning_rate": 0.00035379150318051397, |
| "loss": 4.435550689697266, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5166051660516605, |
| "grad_norm": 0.8940041065216064, |
| "learning_rate": 0.00035294896970526504, |
| "loss": 4.551334381103516, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5173431734317343, |
| "grad_norm": 0.9932311773300171, |
| "learning_rate": 0.0003521064191397499, |
| "loss": 4.3837890625, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5180811808118081, |
| "grad_norm": 0.8308423757553101, |
| "learning_rate": 0.0003512638563668313, |
| "loss": 4.352203845977783, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5188191881918819, |
| "grad_norm": 1.0316524505615234, |
| "learning_rate": 0.00035042128626944203, |
| "loss": 4.69419527053833, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5195571955719557, |
| "grad_norm": 0.845513105392456, |
| "learning_rate": 0.00034957871373055796, |
| "loss": 4.403134346008301, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5202952029520295, |
| "grad_norm": 1.1955550909042358, |
| "learning_rate": 0.0003487361436331689, |
| "loss": 4.507143974304199, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5210332103321034, |
| "grad_norm": 0.9265637993812561, |
| "learning_rate": 0.0003478935808602501, |
| "loss": 4.529629707336426, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.5217712177121772, |
| "grad_norm": 0.7645063400268555, |
| "learning_rate": 0.0003470510302947351, |
| "loss": 4.37443733215332, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.522509225092251, |
| "grad_norm": 0.8971974849700928, |
| "learning_rate": 0.0003462084968194861, |
| "loss": 4.33015251159668, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.5232472324723247, |
| "grad_norm": 0.813549816608429, |
| "learning_rate": 0.00034536598531726646, |
| "loss": 4.5936079025268555, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.5239852398523985, |
| "grad_norm": 0.953991711139679, |
| "learning_rate": 0.0003445235006707122, |
| "loss": 4.403616905212402, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5247232472324723, |
| "grad_norm": 0.8936543464660645, |
| "learning_rate": 0.0003436810477623038, |
| "loss": 4.279123306274414, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.5254612546125461, |
| "grad_norm": 0.9178246855735779, |
| "learning_rate": 0.00034283863147433776, |
| "loss": 4.134098052978516, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.5261992619926199, |
| "grad_norm": 0.9482448101043701, |
| "learning_rate": 0.0003419962566888981, |
| "loss": 4.32552433013916, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.5269372693726937, |
| "grad_norm": 0.787865161895752, |
| "learning_rate": 0.0003411539282878288, |
| "loss": 4.394043922424316, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.5276752767527675, |
| "grad_norm": 0.9969366788864136, |
| "learning_rate": 0.00034031165115270444, |
| "loss": 4.486443996429443, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.5284132841328413, |
| "grad_norm": 0.8682870268821716, |
| "learning_rate": 0.00033946943016480304, |
| "loss": 4.508628845214844, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.5291512915129152, |
| "grad_norm": 0.9052722454071045, |
| "learning_rate": 0.0003386272702050769, |
| "loss": 4.4580583572387695, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.529889298892989, |
| "grad_norm": 0.909764289855957, |
| "learning_rate": 0.00033778517615412477, |
| "loss": 4.225852012634277, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.5306273062730628, |
| "grad_norm": 0.8234180808067322, |
| "learning_rate": 0.0003369431528921632, |
| "loss": 4.583276748657227, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.5313653136531366, |
| "grad_norm": 0.8727805614471436, |
| "learning_rate": 0.0003361012052989992, |
| "loss": 4.535766124725342, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5321033210332103, |
| "grad_norm": 1.0564109086990356, |
| "learning_rate": 0.00033525933825400014, |
| "loss": 4.4810943603515625, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.5328413284132841, |
| "grad_norm": 0.9636712074279785, |
| "learning_rate": 0.0003344175566360676, |
| "loss": 4.364070415496826, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.5335793357933579, |
| "grad_norm": 0.9482673406600952, |
| "learning_rate": 0.00033357586532360765, |
| "loss": 4.78449821472168, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.5343173431734317, |
| "grad_norm": 0.7933990955352783, |
| "learning_rate": 0.00033273426919450285, |
| "loss": 4.399996280670166, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.5350553505535055, |
| "grad_norm": 0.8797454237937927, |
| "learning_rate": 0.0003318927731260846, |
| "loss": 4.585580825805664, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.5357933579335793, |
| "grad_norm": 0.8431602716445923, |
| "learning_rate": 0.00033105138199510386, |
| "loss": 4.289941787719727, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.5365313653136531, |
| "grad_norm": 0.8969424962997437, |
| "learning_rate": 0.00033021010067770396, |
| "loss": 4.353963375091553, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.537269372693727, |
| "grad_norm": 0.7266989350318909, |
| "learning_rate": 0.00032936893404939135, |
| "loss": 4.287866592407227, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.5380073800738008, |
| "grad_norm": 0.9192749261856079, |
| "learning_rate": 0.0003285278869850084, |
| "loss": 4.431896209716797, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.5387453874538746, |
| "grad_norm": 0.9108367562294006, |
| "learning_rate": 0.0003276869643587038, |
| "loss": 4.330748558044434, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5394833948339484, |
| "grad_norm": 0.789059579372406, |
| "learning_rate": 0.000326846171043906, |
| "loss": 4.409814834594727, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.5402214022140222, |
| "grad_norm": 0.931719183921814, |
| "learning_rate": 0.000326005511913293, |
| "loss": 4.5224928855896, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.5409594095940959, |
| "grad_norm": 0.9140210747718811, |
| "learning_rate": 0.00032516499183876614, |
| "loss": 4.469390869140625, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.5416974169741697, |
| "grad_norm": 0.7886836528778076, |
| "learning_rate": 0.0003243246156914203, |
| "loss": 4.169953346252441, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.5424354243542435, |
| "grad_norm": 0.9898924827575684, |
| "learning_rate": 0.00032348438834151636, |
| "loss": 4.523615837097168, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.5431734317343173, |
| "grad_norm": 0.9171273112297058, |
| "learning_rate": 0.00032264431465845307, |
| "loss": 4.362099647521973, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.5439114391143911, |
| "grad_norm": 0.8603449463844299, |
| "learning_rate": 0.0003218043995107383, |
| "loss": 4.252144813537598, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.5446494464944649, |
| "grad_norm": 0.9839322566986084, |
| "learning_rate": 0.0003209646477659615, |
| "loss": 4.401839256286621, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.5453874538745388, |
| "grad_norm": 1.1770368814468384, |
| "learning_rate": 0.00032012506429076476, |
| "loss": 4.247356414794922, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.5461254612546126, |
| "grad_norm": 0.8217732310295105, |
| "learning_rate": 0.0003192856539508155, |
| "loss": 4.566009521484375, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5468634686346864, |
| "grad_norm": 1.1834269762039185, |
| "learning_rate": 0.00031844642161077717, |
| "loss": 4.510600566864014, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.5476014760147602, |
| "grad_norm": 0.9773359298706055, |
| "learning_rate": 0.0003176073721342822, |
| "loss": 4.310590744018555, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.548339483394834, |
| "grad_norm": 0.9322510957717896, |
| "learning_rate": 0.00031676851038390277, |
| "loss": 4.397828102111816, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.5490774907749078, |
| "grad_norm": 0.9611193537712097, |
| "learning_rate": 0.00031592984122112363, |
| "loss": 4.509471893310547, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.5498154981549815, |
| "grad_norm": 0.8511263132095337, |
| "learning_rate": 0.00031509136950631295, |
| "loss": 4.605403900146484, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5505535055350553, |
| "grad_norm": 1.1331124305725098, |
| "learning_rate": 0.00031425310009869497, |
| "loss": 4.705798625946045, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.5512915129151291, |
| "grad_norm": 0.9317970871925354, |
| "learning_rate": 0.0003134150378563213, |
| "loss": 4.538765907287598, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.5520295202952029, |
| "grad_norm": 0.7060513496398926, |
| "learning_rate": 0.00031257718763604296, |
| "loss": 4.484154224395752, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.5527675276752767, |
| "grad_norm": 0.9105408191680908, |
| "learning_rate": 0.00031173955429348254, |
| "loss": 4.227485656738281, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.5535055350553506, |
| "grad_norm": 0.8890596628189087, |
| "learning_rate": 0.000310902142683005, |
| "loss": 4.158082008361816, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5542435424354244, |
| "grad_norm": 1.074188470840454, |
| "learning_rate": 0.00031006495765769135, |
| "loss": 4.741909980773926, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.5549815498154982, |
| "grad_norm": 1.0221657752990723, |
| "learning_rate": 0.0003092280040693085, |
| "loss": 4.476526260375977, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.555719557195572, |
| "grad_norm": 0.9633339643478394, |
| "learning_rate": 0.00030839128676828277, |
| "loss": 4.336530685424805, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.5564575645756458, |
| "grad_norm": 1.0310927629470825, |
| "learning_rate": 0.0003075548106036706, |
| "loss": 4.326992988586426, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.5571955719557196, |
| "grad_norm": 0.8011588454246521, |
| "learning_rate": 0.0003067185804231314, |
| "loss": 4.4770827293396, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5579335793357934, |
| "grad_norm": 0.921048641204834, |
| "learning_rate": 0.00030588260107289875, |
| "loss": 4.608548164367676, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.5586715867158671, |
| "grad_norm": 0.9670724272727966, |
| "learning_rate": 0.0003050468773977527, |
| "loss": 4.357841491699219, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.5594095940959409, |
| "grad_norm": 1.0081647634506226, |
| "learning_rate": 0.00030421141424099153, |
| "loss": 4.160003662109375, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.5601476014760147, |
| "grad_norm": 0.8587222695350647, |
| "learning_rate": 0.0003033762164444036, |
| "loss": 4.5625104904174805, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.5608856088560885, |
| "grad_norm": 0.9064732789993286, |
| "learning_rate": 0.00030254128884823995, |
| "loss": 4.558137893676758, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5616236162361624, |
| "grad_norm": 0.9167226552963257, |
| "learning_rate": 0.00030170663629118484, |
| "loss": 4.650042533874512, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.5623616236162362, |
| "grad_norm": 0.9208563566207886, |
| "learning_rate": 0.0003008722636103295, |
| "loss": 4.311919212341309, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.56309963099631, |
| "grad_norm": 0.8243905305862427, |
| "learning_rate": 0.0003000381756411425, |
| "loss": 4.592479705810547, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.5638376383763838, |
| "grad_norm": 0.88048255443573, |
| "learning_rate": 0.00029920437721744285, |
| "loss": 4.383855819702148, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.5645756457564576, |
| "grad_norm": 0.8309145569801331, |
| "learning_rate": 0.0002983708731713712, |
| "loss": 4.523615837097168, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5653136531365314, |
| "grad_norm": 0.9054703116416931, |
| "learning_rate": 0.0002975376683333625, |
| "loss": 4.120911121368408, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.5660516605166052, |
| "grad_norm": 0.8789876103401184, |
| "learning_rate": 0.0002967047675321174, |
| "loss": 4.314697265625, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.566789667896679, |
| "grad_norm": 1.055936336517334, |
| "learning_rate": 0.0002958721755945748, |
| "loss": 4.497006416320801, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5675276752767527, |
| "grad_norm": 1.1139589548110962, |
| "learning_rate": 0.00029503989734588345, |
| "loss": 4.493967056274414, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.5682656826568265, |
| "grad_norm": 0.8091505169868469, |
| "learning_rate": 0.0002942079376093742, |
| "loss": 4.10081672668457, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5690036900369003, |
| "grad_norm": 0.8381765484809875, |
| "learning_rate": 0.00029337630120653235, |
| "loss": 4.278990745544434, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5697416974169742, |
| "grad_norm": 0.8964424729347229, |
| "learning_rate": 0.00029254499295696876, |
| "loss": 4.365828514099121, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.570479704797048, |
| "grad_norm": 0.8812311887741089, |
| "learning_rate": 0.0002917140176783931, |
| "loss": 4.407172679901123, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5712177121771218, |
| "grad_norm": 0.9463404417037964, |
| "learning_rate": 0.0002908833801865849, |
| "loss": 4.176614761352539, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5719557195571956, |
| "grad_norm": 0.9128312468528748, |
| "learning_rate": 0.0002900530852953665, |
| "loss": 4.4936604499816895, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5726937269372694, |
| "grad_norm": 0.9788138270378113, |
| "learning_rate": 0.0002892231378165744, |
| "loss": 4.425959587097168, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.5734317343173432, |
| "grad_norm": 0.8016911149024963, |
| "learning_rate": 0.0002883935425600321, |
| "loss": 4.351809024810791, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.574169741697417, |
| "grad_norm": 0.8947065472602844, |
| "learning_rate": 0.00028756430433352146, |
| "loss": 4.333946228027344, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5749077490774908, |
| "grad_norm": 0.8357275724411011, |
| "learning_rate": 0.0002867354279427556, |
| "loss": 4.609579086303711, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5756457564575646, |
| "grad_norm": 0.9476321339607239, |
| "learning_rate": 0.0002859069181913503, |
| "loss": 4.475932598114014, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5763837638376383, |
| "grad_norm": 0.9456009268760681, |
| "learning_rate": 0.00028507877988079717, |
| "loss": 4.241294860839844, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5771217712177121, |
| "grad_norm": 0.7762236595153809, |
| "learning_rate": 0.0002842510178104343, |
| "loss": 4.2514777183532715, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5778597785977859, |
| "grad_norm": 0.8480483889579773, |
| "learning_rate": 0.00028342363677742, |
| "loss": 4.5362043380737305, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5785977859778598, |
| "grad_norm": 0.8248271942138672, |
| "learning_rate": 0.00028259664157670434, |
| "loss": 4.289585113525391, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5793357933579336, |
| "grad_norm": 0.9554965496063232, |
| "learning_rate": 0.00028177003700100093, |
| "loss": 4.234594345092773, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5800738007380074, |
| "grad_norm": 1.0218883752822876, |
| "learning_rate": 0.00028094382784076005, |
| "loss": 4.032539367675781, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5808118081180812, |
| "grad_norm": 0.9201107621192932, |
| "learning_rate": 0.00028011801888413996, |
| "loss": 4.4474711418151855, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.581549815498155, |
| "grad_norm": 0.9545875191688538, |
| "learning_rate": 0.00027929261491698, |
| "loss": 4.290918350219727, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5822878228782288, |
| "grad_norm": 0.9154767394065857, |
| "learning_rate": 0.00027846762072277235, |
| "loss": 4.266115188598633, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5830258302583026, |
| "grad_norm": 0.9572087526321411, |
| "learning_rate": 0.00027764304108263425, |
| "loss": 4.489130973815918, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5837638376383764, |
| "grad_norm": 0.864920973777771, |
| "learning_rate": 0.0002768188807752806, |
| "loss": 4.22702693939209, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5845018450184502, |
| "grad_norm": 0.9186403751373291, |
| "learning_rate": 0.0002759951445769962, |
| "loss": 4.370454788208008, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5852398523985239, |
| "grad_norm": 0.9486933350563049, |
| "learning_rate": 0.00027517183726160775, |
| "loss": 4.345991611480713, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5859778597785977, |
| "grad_norm": 0.9103389382362366, |
| "learning_rate": 0.0002743489636004567, |
| "loss": 4.232224941253662, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5867158671586716, |
| "grad_norm": 0.9209710359573364, |
| "learning_rate": 0.0002735265283623713, |
| "loss": 3.9969122409820557, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5874538745387454, |
| "grad_norm": 1.2172404527664185, |
| "learning_rate": 0.00027270453631363876, |
| "loss": 4.3851318359375, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5881918819188192, |
| "grad_norm": 1.0857105255126953, |
| "learning_rate": 0.00027188299221797806, |
| "loss": 4.543056488037109, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.588929889298893, |
| "grad_norm": 0.8917638659477234, |
| "learning_rate": 0.00027106190083651206, |
| "loss": 4.233307838439941, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5896678966789668, |
| "grad_norm": 0.9834994077682495, |
| "learning_rate": 0.0002702412669277401, |
| "loss": 4.3369035720825195, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5904059040590406, |
| "grad_norm": 0.9920309782028198, |
| "learning_rate": 0.00026942109524751, |
| "loss": 4.263988971710205, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5911439114391144, |
| "grad_norm": 0.7995727062225342, |
| "learning_rate": 0.00026860139054899146, |
| "loss": 4.237081050872803, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5918819188191882, |
| "grad_norm": 0.8966661095619202, |
| "learning_rate": 0.00026778215758264696, |
| "loss": 4.278907299041748, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.592619926199262, |
| "grad_norm": 0.8927947282791138, |
| "learning_rate": 0.000266963401096206, |
| "loss": 4.3486151695251465, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5933579335793358, |
| "grad_norm": 0.7980582118034363, |
| "learning_rate": 0.0002661451258346361, |
| "loss": 4.231438636779785, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.5940959409594095, |
| "grad_norm": 0.8703809380531311, |
| "learning_rate": 0.00026532733654011635, |
| "loss": 4.2430419921875, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5948339483394834, |
| "grad_norm": 1.0357931852340698, |
| "learning_rate": 0.00026451003795200913, |
| "loss": 4.256633281707764, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5955719557195572, |
| "grad_norm": 0.8626582026481628, |
| "learning_rate": 0.00026369323480683333, |
| "loss": 4.278927326202393, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.596309963099631, |
| "grad_norm": 0.8148908615112305, |
| "learning_rate": 0.0002628769318382364, |
| "loss": 4.354986190795898, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5970479704797048, |
| "grad_norm": 0.7945446372032166, |
| "learning_rate": 0.000262061133776967, |
| "loss": 4.433017730712891, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5977859778597786, |
| "grad_norm": 0.8125186562538147, |
| "learning_rate": 0.00026124584535084825, |
| "loss": 4.323663711547852, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5985239852398524, |
| "grad_norm": 0.8656073808670044, |
| "learning_rate": 0.00026043107128474876, |
| "loss": 4.364239692687988, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5992619926199262, |
| "grad_norm": 0.7823298573493958, |
| "learning_rate": 0.00025961681630055737, |
| "loss": 4.095296382904053, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.8082625865936279, |
| "learning_rate": 0.00025880308511715366, |
| "loss": 4.251285552978516, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.6007380073800738, |
| "grad_norm": 0.8128904104232788, |
| "learning_rate": 0.00025798988245038243, |
| "loss": 4.234792709350586, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6014760147601476, |
| "grad_norm": 0.9591745138168335, |
| "learning_rate": 0.00025717721301302495, |
| "loss": 4.191695213317871, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6022140221402214, |
| "grad_norm": 0.8306787014007568, |
| "learning_rate": 0.0002563650815147728, |
| "loss": 4.182519912719727, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.6029520295202953, |
| "grad_norm": 1.0368632078170776, |
| "learning_rate": 0.0002555534926621994, |
| "loss": 4.357141971588135, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.603690036900369, |
| "grad_norm": 0.9401784539222717, |
| "learning_rate": 0.00025474245115873377, |
| "loss": 4.2874016761779785, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6044280442804428, |
| "grad_norm": 0.9086504578590393, |
| "learning_rate": 0.00025393196170463286, |
| "loss": 4.135937690734863, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.6051660516605166, |
| "grad_norm": 0.8185088634490967, |
| "learning_rate": 0.00025312202899695403, |
| "loss": 4.31793212890625, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6059040590405904, |
| "grad_norm": 0.8340873718261719, |
| "learning_rate": 0.00025231265772952864, |
| "loss": 4.332757949829102, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.6066420664206642, |
| "grad_norm": 0.9770723581314087, |
| "learning_rate": 0.00025150385259293346, |
| "loss": 4.115085124969482, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.607380073800738, |
| "grad_norm": 1.0393363237380981, |
| "learning_rate": 0.0002506956182744653, |
| "loss": 4.164813995361328, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.6081180811808118, |
| "grad_norm": 0.9465534090995789, |
| "learning_rate": 0.00024988795945811215, |
| "loss": 4.53727912902832, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6088560885608856, |
| "grad_norm": 0.8929158449172974, |
| "learning_rate": 0.00024908088082452724, |
| "loss": 4.265376091003418, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6095940959409594, |
| "grad_norm": 0.7848824262619019, |
| "learning_rate": 0.00024827438705100116, |
| "loss": 4.300992965698242, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6103321033210332, |
| "grad_norm": 0.7737518548965454, |
| "learning_rate": 0.00024746848281143524, |
| "loss": 4.297072410583496, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.6110701107011071, |
| "grad_norm": 1.0166592597961426, |
| "learning_rate": 0.00024666317277631403, |
| "loss": 4.4208478927612305, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.6118081180811809, |
| "grad_norm": 0.8515886664390564, |
| "learning_rate": 0.00024585846161267875, |
| "loss": 4.542513847351074, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.6125461254612546, |
| "grad_norm": 0.8427137732505798, |
| "learning_rate": 0.00024505435398409966, |
| "loss": 4.270936965942383, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6132841328413284, |
| "grad_norm": 0.811477541923523, |
| "learning_rate": 0.0002442508545506495, |
| "loss": 4.223374366760254, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.6140221402214022, |
| "grad_norm": 0.9186045527458191, |
| "learning_rate": 0.00024344796796887656, |
| "loss": 4.369760036468506, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.614760147601476, |
| "grad_norm": 0.809533417224884, |
| "learning_rate": 0.0002426456988917769, |
| "loss": 4.350223541259766, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.6154981549815498, |
| "grad_norm": 0.8991212248802185, |
| "learning_rate": 0.00024184405196876844, |
| "loss": 4.136372089385986, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.6162361623616236, |
| "grad_norm": 0.8988363742828369, |
| "learning_rate": 0.00024104303184566307, |
| "loss": 4.202424049377441, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.6169741697416974, |
| "grad_norm": 1.3087947368621826, |
| "learning_rate": 0.00024024264316464065, |
| "loss": 4.428619384765625, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.6177121771217712, |
| "grad_norm": 0.7776771783828735, |
| "learning_rate": 0.0002394428905642211, |
| "loss": 4.351472854614258, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.618450184501845, |
| "grad_norm": 0.996083676815033, |
| "learning_rate": 0.00023864377867923852, |
| "loss": 3.9067325592041016, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.6191881918819189, |
| "grad_norm": 0.8904930949211121, |
| "learning_rate": 0.00023784531214081348, |
| "loss": 4.205554008483887, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.6199261992619927, |
| "grad_norm": 0.9460301399230957, |
| "learning_rate": 0.00023704749557632688, |
| "loss": 4.3381452560424805, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6206642066420665, |
| "grad_norm": 0.8847654461860657, |
| "learning_rate": 0.00023625033360939239, |
| "loss": 4.210631370544434, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.6214022140221402, |
| "grad_norm": 0.9049587249755859, |
| "learning_rate": 0.00023545383085983034, |
| "loss": 4.128975868225098, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.622140221402214, |
| "grad_norm": 0.881879985332489, |
| "learning_rate": 0.00023465799194364087, |
| "loss": 4.109155654907227, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.6228782287822878, |
| "grad_norm": 0.9331649541854858, |
| "learning_rate": 0.00023386282147297657, |
| "loss": 4.180877685546875, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.6236162361623616, |
| "grad_norm": 1.0155686140060425, |
| "learning_rate": 0.00023306832405611643, |
| "loss": 4.2506818771362305, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.6243542435424354, |
| "grad_norm": 0.9788922667503357, |
| "learning_rate": 0.00023227450429743867, |
| "loss": 4.536131858825684, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.6250922509225092, |
| "grad_norm": 1.0663362741470337, |
| "learning_rate": 0.00023148136679739453, |
| "loss": 4.059211730957031, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.625830258302583, |
| "grad_norm": 0.8880152702331543, |
| "learning_rate": 0.00023068891615248102, |
| "loss": 4.163819313049316, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.6265682656826568, |
| "grad_norm": 0.9166035056114197, |
| "learning_rate": 0.0002298971569552149, |
| "loss": 4.22659158706665, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.6273062730627307, |
| "grad_norm": 1.1947702169418335, |
| "learning_rate": 0.00022910609379410546, |
| "loss": 4.3044633865356445, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6280442804428045, |
| "grad_norm": 1.251198410987854, |
| "learning_rate": 0.0002283157312536284, |
| "loss": 4.213165283203125, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.6287822878228783, |
| "grad_norm": 0.9441475868225098, |
| "learning_rate": 0.00022752607391419904, |
| "loss": 4.37963342666626, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.629520295202952, |
| "grad_norm": 0.8944138884544373, |
| "learning_rate": 0.0002267371263521461, |
| "loss": 4.479311943054199, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.6302583025830258, |
| "grad_norm": 0.9756674766540527, |
| "learning_rate": 0.00022594889313968424, |
| "loss": 4.323942184448242, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.6309963099630996, |
| "grad_norm": 0.9520359039306641, |
| "learning_rate": 0.00022516137884488895, |
| "loss": 4.259498596191406, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.6317343173431734, |
| "grad_norm": 0.8389827609062195, |
| "learning_rate": 0.000224374588031669, |
| "loss": 4.353797435760498, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.6324723247232472, |
| "grad_norm": 0.9523439407348633, |
| "learning_rate": 0.0002235885252597402, |
| "loss": 4.485894203186035, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.633210332103321, |
| "grad_norm": 0.8450521230697632, |
| "learning_rate": 0.00022280319508459953, |
| "loss": 4.3302717208862305, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.6339483394833948, |
| "grad_norm": 0.9799603819847107, |
| "learning_rate": 0.00022201860205749792, |
| "loss": 4.216465950012207, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.6346863468634686, |
| "grad_norm": 0.8215528726577759, |
| "learning_rate": 0.00022123475072541456, |
| "loss": 4.218143463134766, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6354243542435425, |
| "grad_norm": 0.8392944931983948, |
| "learning_rate": 0.00022045164563102993, |
| "loss": 4.393090724945068, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.6361623616236163, |
| "grad_norm": 0.9801323413848877, |
| "learning_rate": 0.00021966929131270053, |
| "loss": 4.3347978591918945, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.6369003690036901, |
| "grad_norm": 1.0346145629882812, |
| "learning_rate": 0.00021888769230443076, |
| "loss": 4.304266452789307, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.6376383763837639, |
| "grad_norm": 0.8837590217590332, |
| "learning_rate": 0.00021810685313584894, |
| "loss": 4.318976879119873, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.6383763837638377, |
| "grad_norm": 0.9550504088401794, |
| "learning_rate": 0.00021732677833217884, |
| "loss": 4.0572285652160645, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.6391143911439114, |
| "grad_norm": 0.9023411273956299, |
| "learning_rate": 0.00021654747241421515, |
| "loss": 4.210879325866699, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.6398523985239852, |
| "grad_norm": 1.2458837032318115, |
| "learning_rate": 0.00021576893989829648, |
| "loss": 4.031771183013916, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.640590405904059, |
| "grad_norm": 0.931896448135376, |
| "learning_rate": 0.00021499118529627893, |
| "loss": 4.238314151763916, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.6413284132841328, |
| "grad_norm": 0.8535945415496826, |
| "learning_rate": 0.00021421421311551095, |
| "loss": 4.30747652053833, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.6420664206642066, |
| "grad_norm": 0.8937339186668396, |
| "learning_rate": 0.0002134380278588059, |
| "loss": 4.368441581726074, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6428044280442804, |
| "grad_norm": 0.9691210985183716, |
| "learning_rate": 0.00021266263402441746, |
| "loss": 4.286958694458008, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.6435424354243543, |
| "grad_norm": 0.9562344551086426, |
| "learning_rate": 0.00021188803610601187, |
| "loss": 4.331124305725098, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.6442804428044281, |
| "grad_norm": 0.9085299372673035, |
| "learning_rate": 0.00021111423859264362, |
| "loss": 4.204074859619141, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.6450184501845019, |
| "grad_norm": 1.0217558145523071, |
| "learning_rate": 0.00021034124596872776, |
| "loss": 4.061552047729492, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.6457564575645757, |
| "grad_norm": 0.8775967359542847, |
| "learning_rate": 0.00020956906271401554, |
| "loss": 4.252497673034668, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6464944649446495, |
| "grad_norm": 0.9603700637817383, |
| "learning_rate": 0.00020879769330356705, |
| "loss": 4.17333984375, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.6472324723247233, |
| "grad_norm": 0.9519745707511902, |
| "learning_rate": 0.0002080271422077262, |
| "loss": 4.414155006408691, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.647970479704797, |
| "grad_norm": 0.8470144271850586, |
| "learning_rate": 0.00020725741389209423, |
| "loss": 4.405782699584961, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.6487084870848708, |
| "grad_norm": 0.872512698173523, |
| "learning_rate": 0.00020648851281750437, |
| "loss": 4.448093414306641, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.6494464944649446, |
| "grad_norm": 1.0624064207077026, |
| "learning_rate": 0.00020572044343999566, |
| "loss": 4.4731950759887695, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6501845018450184, |
| "grad_norm": 0.9333707094192505, |
| "learning_rate": 0.00020495321021078686, |
| "loss": 4.351503849029541, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.6509225092250922, |
| "grad_norm": 0.8607699275016785, |
| "learning_rate": 0.00020418681757625152, |
| "loss": 4.024420738220215, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.6516605166051661, |
| "grad_norm": 0.8372026085853577, |
| "learning_rate": 0.00020342126997789113, |
| "loss": 4.254813194274902, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.6523985239852399, |
| "grad_norm": 0.8102350234985352, |
| "learning_rate": 0.00020265657185231017, |
| "loss": 4.309717178344727, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.6531365313653137, |
| "grad_norm": 0.8655620217323303, |
| "learning_rate": 0.0002018927276311902, |
| "loss": 4.270059108734131, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.6538745387453875, |
| "grad_norm": 0.8550220727920532, |
| "learning_rate": 0.00020112974174126406, |
| "loss": 4.238635063171387, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.6546125461254613, |
| "grad_norm": 0.8815758228302002, |
| "learning_rate": 0.00020036761860428999, |
| "loss": 4.169132232666016, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.6553505535055351, |
| "grad_norm": 0.9161958694458008, |
| "learning_rate": 0.00019960636263702692, |
| "loss": 4.314050674438477, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6560885608856089, |
| "grad_norm": 1.0340604782104492, |
| "learning_rate": 0.00019884597825120762, |
| "loss": 3.9258623123168945, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.6568265682656826, |
| "grad_norm": 0.896084725856781, |
| "learning_rate": 0.000198086469853514, |
| "loss": 4.141365051269531, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6575645756457564, |
| "grad_norm": 0.9871026277542114, |
| "learning_rate": 0.00019732784184555138, |
| "loss": 4.212796211242676, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.6583025830258302, |
| "grad_norm": 1.0540019273757935, |
| "learning_rate": 0.00019657009862382286, |
| "loss": 4.061999797821045, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.659040590405904, |
| "grad_norm": 0.8863611817359924, |
| "learning_rate": 0.00019581324457970407, |
| "loss": 4.253866195678711, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.6597785977859778, |
| "grad_norm": 1.1371312141418457, |
| "learning_rate": 0.00019505728409941711, |
| "loss": 4.08126163482666, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.6605166051660517, |
| "grad_norm": 2.1047496795654297, |
| "learning_rate": 0.00019430222156400606, |
| "loss": 4.196209907531738, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.6612546125461255, |
| "grad_norm": 0.85357266664505, |
| "learning_rate": 0.00019354806134931087, |
| "loss": 4.412619590759277, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.6619926199261993, |
| "grad_norm": 1.048453450202942, |
| "learning_rate": 0.00019279480782594244, |
| "loss": 4.392220497131348, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.6627306273062731, |
| "grad_norm": 0.8711747527122498, |
| "learning_rate": 0.00019204246535925654, |
| "loss": 4.262413024902344, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.6634686346863469, |
| "grad_norm": 0.7952659130096436, |
| "learning_rate": 0.00019129103830933008, |
| "loss": 4.36223840713501, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.6642066420664207, |
| "grad_norm": 0.8127221465110779, |
| "learning_rate": 0.00019054053103093366, |
| "loss": 4.27398681640625, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6649446494464945, |
| "grad_norm": 0.8177223801612854, |
| "learning_rate": 0.0001897909478735083, |
| "loss": 3.997640609741211, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.6656826568265682, |
| "grad_norm": 1.2305352687835693, |
| "learning_rate": 0.00018904229318113914, |
| "loss": 4.09181022644043, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.666420664206642, |
| "grad_norm": 0.862445056438446, |
| "learning_rate": 0.00018829457129253057, |
| "loss": 4.322624206542969, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.6671586715867158, |
| "grad_norm": 0.8462716937065125, |
| "learning_rate": 0.00018754778654098123, |
| "loss": 4.413826942443848, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.6678966789667896, |
| "grad_norm": 0.8606178164482117, |
| "learning_rate": 0.00018680194325435839, |
| "loss": 4.309714317321777, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6686346863468635, |
| "grad_norm": 0.8559933304786682, |
| "learning_rate": 0.00018605704575507347, |
| "loss": 4.162710189819336, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.6693726937269373, |
| "grad_norm": 0.9497646689414978, |
| "learning_rate": 0.00018531309836005675, |
| "loss": 4.144913673400879, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.6701107011070111, |
| "grad_norm": 0.8656502962112427, |
| "learning_rate": 0.00018457010538073236, |
| "loss": 4.23277473449707, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.6708487084870849, |
| "grad_norm": 0.9220851063728333, |
| "learning_rate": 0.00018382807112299283, |
| "loss": 4.004146099090576, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.6715867158671587, |
| "grad_norm": 0.8644999265670776, |
| "learning_rate": 0.0001830869998871755, |
| "loss": 4.135645389556885, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6723247232472325, |
| "grad_norm": 0.9802985787391663, |
| "learning_rate": 0.0001823468959680356, |
| "loss": 4.413508892059326, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.6730627306273063, |
| "grad_norm": 0.8389285802841187, |
| "learning_rate": 0.0001816077636547232, |
| "loss": 4.484038829803467, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.67380073800738, |
| "grad_norm": 0.9547582864761353, |
| "learning_rate": 0.00018086960723075727, |
| "loss": 4.3295416831970215, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.6745387453874538, |
| "grad_norm": 0.8170531392097473, |
| "learning_rate": 0.00018013243097400128, |
| "loss": 4.145027160644531, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.6752767527675276, |
| "grad_norm": 0.8581196665763855, |
| "learning_rate": 0.00017939623915663833, |
| "loss": 4.246807098388672, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6760147601476014, |
| "grad_norm": 0.9968565702438354, |
| "learning_rate": 0.000178661036045146, |
| "loss": 4.355518817901611, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.6767527675276753, |
| "grad_norm": 1.08475923538208, |
| "learning_rate": 0.00017792682590027278, |
| "loss": 4.216618061065674, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.6774907749077491, |
| "grad_norm": 0.9199729561805725, |
| "learning_rate": 0.00017719361297701167, |
| "loss": 4.03296422958374, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.6782287822878229, |
| "grad_norm": 0.9441756010055542, |
| "learning_rate": 0.00017646140152457717, |
| "loss": 4.32381010055542, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.6789667896678967, |
| "grad_norm": 0.8643115162849426, |
| "learning_rate": 0.00017573019578637913, |
| "loss": 4.274387359619141, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6797047970479705, |
| "grad_norm": 0.8102643489837646, |
| "learning_rate": 0.00017500000000000008, |
| "loss": 4.232758522033691, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.6804428044280443, |
| "grad_norm": 1.063491702079773, |
| "learning_rate": 0.0001742708183971684, |
| "loss": 4.3541483879089355, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6811808118081181, |
| "grad_norm": 0.7408610582351685, |
| "learning_rate": 0.00017354265520373567, |
| "loss": 4.151790618896484, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6819188191881919, |
| "grad_norm": 0.7934446930885315, |
| "learning_rate": 0.0001728155146396511, |
| "loss": 4.396363258361816, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.6826568265682657, |
| "grad_norm": 0.954188883304596, |
| "learning_rate": 0.00017208940091893756, |
| "loss": 3.97440767288208, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6833948339483394, |
| "grad_norm": 1.0053012371063232, |
| "learning_rate": 0.00017136431824966715, |
| "loss": 4.055703163146973, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.6841328413284132, |
| "grad_norm": 0.8948765397071838, |
| "learning_rate": 0.00017064027083393612, |
| "loss": 4.3566484451293945, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6848708487084871, |
| "grad_norm": 0.8520956039428711, |
| "learning_rate": 0.000169917262867842, |
| "loss": 3.928354263305664, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6856088560885609, |
| "grad_norm": 0.8816937804222107, |
| "learning_rate": 0.00016919529854145745, |
| "loss": 4.179725170135498, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.6863468634686347, |
| "grad_norm": 1.3806109428405762, |
| "learning_rate": 0.00016847438203880735, |
| "loss": 4.185024738311768, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6870848708487085, |
| "grad_norm": 0.7780953049659729, |
| "learning_rate": 0.00016775451753784414, |
| "loss": 4.327208995819092, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.6878228782287823, |
| "grad_norm": 1.103068470954895, |
| "learning_rate": 0.00016703570921042344, |
| "loss": 4.4666948318481445, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6885608856088561, |
| "grad_norm": 0.8747889995574951, |
| "learning_rate": 0.00016631796122227983, |
| "loss": 4.146649360656738, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6892988929889299, |
| "grad_norm": 0.9435983896255493, |
| "learning_rate": 0.00016560127773300313, |
| "loss": 4.554599761962891, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.6900369003690037, |
| "grad_norm": 0.800839364528656, |
| "learning_rate": 0.00016488566289601388, |
| "loss": 4.008693218231201, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6907749077490775, |
| "grad_norm": 0.8912091851234436, |
| "learning_rate": 0.00016417112085853969, |
| "loss": 4.274938583374023, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.6915129151291513, |
| "grad_norm": 0.9369155168533325, |
| "learning_rate": 0.00016345765576159042, |
| "loss": 4.299943447113037, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.692250922509225, |
| "grad_norm": 0.7870283722877502, |
| "learning_rate": 0.000162745271739935, |
| "loss": 4.334400177001953, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6929889298892989, |
| "grad_norm": 0.8504934310913086, |
| "learning_rate": 0.00016203397292207758, |
| "loss": 4.140174865722656, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6937269372693727, |
| "grad_norm": 1.016496181488037, |
| "learning_rate": 0.00016132376343023233, |
| "loss": 4.296517848968506, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6944649446494465, |
| "grad_norm": 1.214504599571228, |
| "learning_rate": 0.00016061464738030106, |
| "loss": 4.107439041137695, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6952029520295203, |
| "grad_norm": 0.9972517490386963, |
| "learning_rate": 0.0001599066288818485, |
| "loss": 4.203211307525635, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6959409594095941, |
| "grad_norm": 0.8465280532836914, |
| "learning_rate": 0.0001591997120380788, |
| "loss": 4.145995616912842, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6966789667896679, |
| "grad_norm": 0.9349222779273987, |
| "learning_rate": 0.00015849390094581142, |
| "loss": 4.01326847076416, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6974169741697417, |
| "grad_norm": 1.018925666809082, |
| "learning_rate": 0.0001577891996954578, |
| "loss": 4.1635026931762695, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6981549815498155, |
| "grad_norm": 0.7925598621368408, |
| "learning_rate": 0.0001570856123709975, |
| "loss": 4.1779022216796875, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.6988929889298893, |
| "grad_norm": 0.932461142539978, |
| "learning_rate": 0.00015638314304995454, |
| "loss": 4.21356201171875, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6996309963099631, |
| "grad_norm": 0.9300697445869446, |
| "learning_rate": 0.00015568179580337333, |
| "loss": 4.0165696144104, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.7003690036900369, |
| "grad_norm": 0.8354659676551819, |
| "learning_rate": 0.0001549815746957962, |
| "loss": 4.235401630401611, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.7011070110701108, |
| "grad_norm": 0.926152765750885, |
| "learning_rate": 0.00015428248378523865, |
| "loss": 4.415463447570801, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7018450184501845, |
| "grad_norm": 1.0506153106689453, |
| "learning_rate": 0.0001535845271231662, |
| "loss": 4.269872665405273, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.7025830258302583, |
| "grad_norm": 0.8655766248703003, |
| "learning_rate": 0.00015288770875447128, |
| "loss": 4.350858688354492, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.7033210332103321, |
| "grad_norm": 0.7818317413330078, |
| "learning_rate": 0.00015219203271744954, |
| "loss": 4.015618801116943, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.7040590405904059, |
| "grad_norm": 0.8752409815788269, |
| "learning_rate": 0.00015149750304377645, |
| "loss": 4.2518510818481445, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.7047970479704797, |
| "grad_norm": 1.2109910249710083, |
| "learning_rate": 0.00015080412375848357, |
| "loss": 4.2393035888671875, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.7055350553505535, |
| "grad_norm": 0.925682544708252, |
| "learning_rate": 0.00015011189887993598, |
| "loss": 4.126298904418945, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.7062730627306273, |
| "grad_norm": 0.9237503409385681, |
| "learning_rate": 0.00014942083241980837, |
| "loss": 3.981215476989746, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.7070110701107011, |
| "grad_norm": 0.9711774587631226, |
| "learning_rate": 0.0001487309283830623, |
| "loss": 4.350025177001953, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.7077490774907749, |
| "grad_norm": 0.9577892422676086, |
| "learning_rate": 0.00014804219076792202, |
| "loss": 4.178315162658691, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.7084870848708487, |
| "grad_norm": 0.9610137343406677, |
| "learning_rate": 0.00014735462356585302, |
| "loss": 4.048961639404297, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7092250922509226, |
| "grad_norm": 0.8772600889205933, |
| "learning_rate": 0.0001466682307615368, |
| "loss": 4.249874114990234, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.7099630996309964, |
| "grad_norm": 0.8270952105522156, |
| "learning_rate": 0.00014598301633284952, |
| "loss": 4.296774387359619, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.7107011070110701, |
| "grad_norm": 0.8505011796951294, |
| "learning_rate": 0.00014529898425083793, |
| "loss": 4.15446662902832, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.7114391143911439, |
| "grad_norm": 0.7727055549621582, |
| "learning_rate": 0.00014461613847969687, |
| "loss": 4.255171298980713, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.7121771217712177, |
| "grad_norm": 1.0215280055999756, |
| "learning_rate": 0.00014393448297674613, |
| "loss": 4.0843987464904785, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.7129151291512915, |
| "grad_norm": 0.9580904841423035, |
| "learning_rate": 0.00014325402169240717, |
| "loss": 4.22476863861084, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.7136531365313653, |
| "grad_norm": 0.8007642030715942, |
| "learning_rate": 0.0001425747585701809, |
| "loss": 4.2899861335754395, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.7143911439114391, |
| "grad_norm": 1.048153042793274, |
| "learning_rate": 0.00014189669754662433, |
| "loss": 4.137915134429932, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.7151291512915129, |
| "grad_norm": 0.9277073740959167, |
| "learning_rate": 0.00014121984255132812, |
| "loss": 4.19291877746582, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.7158671586715867, |
| "grad_norm": 0.9412124752998352, |
| "learning_rate": 0.00014054419750689302, |
| "loss": 4.134371757507324, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7166051660516605, |
| "grad_norm": 0.9520360827445984, |
| "learning_rate": 0.0001398697663289086, |
| "loss": 3.9994983673095703, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.7173431734317344, |
| "grad_norm": 1.108952522277832, |
| "learning_rate": 0.00013919655292592885, |
| "loss": 4.142839431762695, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.7180811808118082, |
| "grad_norm": 0.882947564125061, |
| "learning_rate": 0.0001385245611994507, |
| "loss": 4.331300258636475, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.718819188191882, |
| "grad_norm": 0.9011394381523132, |
| "learning_rate": 0.00013785379504389108, |
| "loss": 4.304719924926758, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.7195571955719557, |
| "grad_norm": 0.9489427208900452, |
| "learning_rate": 0.00013718425834656427, |
| "loss": 3.873215675354004, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.7202952029520295, |
| "grad_norm": 0.8889840841293335, |
| "learning_rate": 0.00013651595498765954, |
| "loss": 4.21721076965332, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.7210332103321033, |
| "grad_norm": 0.8962631821632385, |
| "learning_rate": 0.0001358488888402181, |
| "loss": 4.268343925476074, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.7217712177121771, |
| "grad_norm": 1.0096079111099243, |
| "learning_rate": 0.0001351830637701119, |
| "loss": 4.305258750915527, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.7225092250922509, |
| "grad_norm": 0.8910917043685913, |
| "learning_rate": 0.0001345184836360196, |
| "loss": 4.095419883728027, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.7232472324723247, |
| "grad_norm": 0.8660383224487305, |
| "learning_rate": 0.00013385515228940572, |
| "loss": 4.2480149269104, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7239852398523985, |
| "grad_norm": 0.7730628252029419, |
| "learning_rate": 0.00013319307357449696, |
| "loss": 4.004230499267578, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.7247232472324723, |
| "grad_norm": 0.9015150666236877, |
| "learning_rate": 0.00013253225132826138, |
| "loss": 4.344229698181152, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.7254612546125462, |
| "grad_norm": 0.8757840991020203, |
| "learning_rate": 0.0001318726893803847, |
| "loss": 4.284424781799316, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.72619926199262, |
| "grad_norm": 0.8267972469329834, |
| "learning_rate": 0.00013121439155324918, |
| "loss": 3.9191102981567383, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.7269372693726938, |
| "grad_norm": 0.998901903629303, |
| "learning_rate": 0.00013055736166191095, |
| "loss": 4.020920276641846, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.7276752767527676, |
| "grad_norm": 0.9288577437400818, |
| "learning_rate": 0.00012990160351407804, |
| "loss": 4.161448001861572, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.7284132841328413, |
| "grad_norm": 0.8598924279212952, |
| "learning_rate": 0.00012924712091008842, |
| "loss": 4.157841205596924, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.7291512915129151, |
| "grad_norm": 0.8927615880966187, |
| "learning_rate": 0.0001285939176428874, |
| "loss": 4.054559230804443, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.7298892988929889, |
| "grad_norm": 0.8624060750007629, |
| "learning_rate": 0.00012794199749800698, |
| "loss": 4.096704006195068, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.7306273062730627, |
| "grad_norm": 0.9361541271209717, |
| "learning_rate": 0.00012729136425354204, |
| "loss": 4.233707427978516, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7313653136531365, |
| "grad_norm": 0.9343904256820679, |
| "learning_rate": 0.00012664202168013005, |
| "loss": 3.9704904556274414, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.7321033210332103, |
| "grad_norm": 0.9579162001609802, |
| "learning_rate": 0.0001259939735409285, |
| "loss": 4.047106742858887, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.7328413284132841, |
| "grad_norm": 0.9848127365112305, |
| "learning_rate": 0.0001253472235915933, |
| "loss": 4.055668830871582, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.7335793357933579, |
| "grad_norm": 0.8801389932632446, |
| "learning_rate": 0.00012470177558025652, |
| "loss": 4.0792717933654785, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.7343173431734318, |
| "grad_norm": 1.0689746141433716, |
| "learning_rate": 0.0001240576332475054, |
| "loss": 4.3891496658325195, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.7350553505535056, |
| "grad_norm": 0.9340549111366272, |
| "learning_rate": 0.00012341480032636035, |
| "loss": 3.9269206523895264, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.7357933579335794, |
| "grad_norm": 1.6336300373077393, |
| "learning_rate": 0.0001227732805422531, |
| "loss": 4.390302658081055, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.7365313653136532, |
| "grad_norm": 1.0127570629119873, |
| "learning_rate": 0.00012213307761300567, |
| "loss": 4.110518455505371, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.7372693726937269, |
| "grad_norm": 0.9814800024032593, |
| "learning_rate": 0.00012149419524880778, |
| "loss": 4.395967960357666, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.7380073800738007, |
| "grad_norm": 0.8709611892700195, |
| "learning_rate": 0.00012085663715219694, |
| "loss": 4.2395758628845215, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7387453874538745, |
| "grad_norm": 0.810457706451416, |
| "learning_rate": 0.00012022040701803532, |
| "loss": 4.192559242248535, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.7394833948339483, |
| "grad_norm": 0.8743382692337036, |
| "learning_rate": 0.00011958550853348949, |
| "loss": 4.053243637084961, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.7402214022140221, |
| "grad_norm": 1.0082160234451294, |
| "learning_rate": 0.0001189519453780086, |
| "loss": 4.024561405181885, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.7409594095940959, |
| "grad_norm": 0.9494944214820862, |
| "learning_rate": 0.00011831972122330317, |
| "loss": 4.133411407470703, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.7416974169741697, |
| "grad_norm": 0.729489266872406, |
| "learning_rate": 0.00011768883973332351, |
| "loss": 4.2208356857299805, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.7424354243542436, |
| "grad_norm": 0.8364970684051514, |
| "learning_rate": 0.000117059304564239, |
| "loss": 4.144787788391113, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.7431734317343174, |
| "grad_norm": 1.0048389434814453, |
| "learning_rate": 0.00011643111936441654, |
| "loss": 4.1646552085876465, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.7439114391143912, |
| "grad_norm": 0.8014469742774963, |
| "learning_rate": 0.00011580428777439973, |
| "loss": 4.183121681213379, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.744649446494465, |
| "grad_norm": 1.1298073530197144, |
| "learning_rate": 0.00011517881342688705, |
| "loss": 4.2498016357421875, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.7453874538745388, |
| "grad_norm": 0.9686313271522522, |
| "learning_rate": 0.00011455469994671158, |
| "loss": 4.157444000244141, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7461254612546125, |
| "grad_norm": 0.7569875121116638, |
| "learning_rate": 0.00011393195095082015, |
| "loss": 4.179769515991211, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.7468634686346863, |
| "grad_norm": 0.9126372933387756, |
| "learning_rate": 0.00011331057004825114, |
| "loss": 4.2508544921875, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.7476014760147601, |
| "grad_norm": 0.9252088069915771, |
| "learning_rate": 0.00011269056084011492, |
| "loss": 4.427289009094238, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.7483394833948339, |
| "grad_norm": 0.8704126477241516, |
| "learning_rate": 0.00011207192691957224, |
| "loss": 4.120467185974121, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.7490774907749077, |
| "grad_norm": 0.7337223291397095, |
| "learning_rate": 0.00011145467187181378, |
| "loss": 4.24467658996582, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.7498154981549815, |
| "grad_norm": 1.0976858139038086, |
| "learning_rate": 0.0001108387992740388, |
| "loss": 4.356447696685791, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.7505535055350554, |
| "grad_norm": 0.9983663558959961, |
| "learning_rate": 0.00011022431269543517, |
| "loss": 4.160353660583496, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.7512915129151292, |
| "grad_norm": 1.2688814401626587, |
| "learning_rate": 0.00010961121569715825, |
| "loss": 4.209506988525391, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.752029520295203, |
| "grad_norm": 0.8176230788230896, |
| "learning_rate": 0.00010899951183231028, |
| "loss": 4.172100067138672, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.7527675276752768, |
| "grad_norm": 0.8766177892684937, |
| "learning_rate": 0.00010838920464591952, |
| "loss": 4.03950834274292, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7535055350553506, |
| "grad_norm": 0.8611599802970886, |
| "learning_rate": 0.00010778029767492066, |
| "loss": 4.484358787536621, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.7542435424354244, |
| "grad_norm": 0.8686861395835876, |
| "learning_rate": 0.00010717279444813325, |
| "loss": 4.179934501647949, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.7549815498154981, |
| "grad_norm": 0.8060294985771179, |
| "learning_rate": 0.00010656669848624154, |
| "loss": 4.116765975952148, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.7557195571955719, |
| "grad_norm": 0.9301735162734985, |
| "learning_rate": 0.0001059620133017745, |
| "loss": 3.9561753273010254, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.7564575645756457, |
| "grad_norm": 0.8732739686965942, |
| "learning_rate": 0.00010535874239908514, |
| "loss": 4.087579250335693, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.7571955719557195, |
| "grad_norm": 0.8588765859603882, |
| "learning_rate": 0.00010475688927433018, |
| "loss": 4.3742876052856445, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.7579335793357933, |
| "grad_norm": 0.7727426290512085, |
| "learning_rate": 0.0001041564574154497, |
| "loss": 4.053977012634277, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.7586715867158672, |
| "grad_norm": 0.7998579144477844, |
| "learning_rate": 0.00010355745030214725, |
| "loss": 4.124699592590332, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.759409594095941, |
| "grad_norm": 0.8935081362724304, |
| "learning_rate": 0.00010295987140586949, |
| "loss": 4.136198997497559, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.7601476014760148, |
| "grad_norm": 0.8178191184997559, |
| "learning_rate": 0.00010236372418978614, |
| "loss": 4.050296783447266, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7608856088560886, |
| "grad_norm": 0.8848076462745667, |
| "learning_rate": 0.00010176901210876947, |
| "loss": 3.9550304412841797, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.7616236162361624, |
| "grad_norm": 0.9924454689025879, |
| "learning_rate": 0.00010117573860937533, |
| "loss": 4.258056640625, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.7623616236162362, |
| "grad_norm": 1.1687978506088257, |
| "learning_rate": 0.00010058390712982184, |
| "loss": 4.050140380859375, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.76309963099631, |
| "grad_norm": 0.8403159379959106, |
| "learning_rate": 9.999352109997051e-05, |
| "loss": 4.150047302246094, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.7638376383763837, |
| "grad_norm": 0.92585289478302, |
| "learning_rate": 9.940458394130595e-05, |
| "loss": 3.9567012786865234, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7645756457564575, |
| "grad_norm": 0.9140007495880127, |
| "learning_rate": 9.881709906691602e-05, |
| "loss": 4.100074291229248, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.7653136531365313, |
| "grad_norm": 0.9550725817680359, |
| "learning_rate": 9.823106988147217e-05, |
| "loss": 4.270690441131592, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.7660516605166051, |
| "grad_norm": 1.0491443872451782, |
| "learning_rate": 9.764649978120944e-05, |
| "loss": 4.158552169799805, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.766789667896679, |
| "grad_norm": 0.827170729637146, |
| "learning_rate": 9.706339215390715e-05, |
| "loss": 4.432864189147949, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.7675276752767528, |
| "grad_norm": 1.0315954685211182, |
| "learning_rate": 9.64817503788692e-05, |
| "loss": 4.234312534332275, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7682656826568266, |
| "grad_norm": 0.9796032905578613, |
| "learning_rate": 9.590157782690429e-05, |
| "loss": 3.9558591842651367, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.7690036900369004, |
| "grad_norm": 1.082369327545166, |
| "learning_rate": 9.532287786030617e-05, |
| "loss": 4.016860485076904, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.7697416974169742, |
| "grad_norm": 0.9409294724464417, |
| "learning_rate": 9.474565383283518e-05, |
| "loss": 4.121254920959473, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.770479704797048, |
| "grad_norm": 0.9006357192993164, |
| "learning_rate": 9.416990908969736e-05, |
| "loss": 4.089673042297363, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.7712177121771218, |
| "grad_norm": 1.0219764709472656, |
| "learning_rate": 9.359564696752622e-05, |
| "loss": 3.96942138671875, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7719557195571956, |
| "grad_norm": 0.9810526967048645, |
| "learning_rate": 9.302287079436289e-05, |
| "loss": 3.9760637283325195, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.7726937269372693, |
| "grad_norm": 0.9141161441802979, |
| "learning_rate": 9.245158388963689e-05, |
| "loss": 4.305903434753418, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.7734317343173431, |
| "grad_norm": 2.4086737632751465, |
| "learning_rate": 9.188178956414705e-05, |
| "loss": 4.438955307006836, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.7741697416974169, |
| "grad_norm": 0.9075452089309692, |
| "learning_rate": 9.131349112004189e-05, |
| "loss": 4.143951416015625, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.7749077490774908, |
| "grad_norm": 0.8627065420150757, |
| "learning_rate": 9.074669185080134e-05, |
| "loss": 4.145493984222412, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7756457564575646, |
| "grad_norm": 0.9488852620124817, |
| "learning_rate": 9.018139504121653e-05, |
| "loss": 4.0962677001953125, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.7763837638376384, |
| "grad_norm": 0.8987521529197693, |
| "learning_rate": 8.96176039673717e-05, |
| "loss": 4.037075996398926, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.7771217712177122, |
| "grad_norm": 1.5608737468719482, |
| "learning_rate": 8.905532189662476e-05, |
| "loss": 4.093520164489746, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.777859778597786, |
| "grad_norm": 1.0437077283859253, |
| "learning_rate": 8.849455208758849e-05, |
| "loss": 4.453344821929932, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.7785977859778598, |
| "grad_norm": 0.9098041653633118, |
| "learning_rate": 8.793529779011133e-05, |
| "loss": 3.896477699279785, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.7793357933579336, |
| "grad_norm": 0.7569055557250977, |
| "learning_rate": 8.737756224525918e-05, |
| "loss": 4.115358352661133, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.7800738007380074, |
| "grad_norm": 1.0293289422988892, |
| "learning_rate": 8.68213486852961e-05, |
| "loss": 4.121119976043701, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.7808118081180812, |
| "grad_norm": 0.8127309083938599, |
| "learning_rate": 8.626666033366578e-05, |
| "loss": 4.106558799743652, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.7815498154981549, |
| "grad_norm": 0.9031323790550232, |
| "learning_rate": 8.57135004049728e-05, |
| "loss": 3.9452483654022217, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.7822878228782287, |
| "grad_norm": 1.2205437421798706, |
| "learning_rate": 8.516187210496385e-05, |
| "loss": 3.8204894065856934, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7830258302583026, |
| "grad_norm": 0.907437801361084, |
| "learning_rate": 8.461177863050975e-05, |
| "loss": 4.430585861206055, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.7837638376383764, |
| "grad_norm": 0.8979167342185974, |
| "learning_rate": 8.406322316958601e-05, |
| "loss": 4.146002292633057, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.7845018450184502, |
| "grad_norm": 0.9116492867469788, |
| "learning_rate": 8.351620890125513e-05, |
| "loss": 4.052881240844727, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.785239852398524, |
| "grad_norm": 1.1355615854263306, |
| "learning_rate": 8.297073899564777e-05, |
| "loss": 4.160739898681641, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.7859778597785978, |
| "grad_norm": 0.8989261984825134, |
| "learning_rate": 8.242681661394466e-05, |
| "loss": 3.9555885791778564, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.7867158671586716, |
| "grad_norm": 1.0729719400405884, |
| "learning_rate": 8.188444490835773e-05, |
| "loss": 4.048243999481201, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.7874538745387454, |
| "grad_norm": 0.9004929661750793, |
| "learning_rate": 8.134362702211263e-05, |
| "loss": 4.261412143707275, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.7881918819188192, |
| "grad_norm": 0.7723477482795715, |
| "learning_rate": 8.080436608942988e-05, |
| "loss": 3.9241394996643066, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.788929889298893, |
| "grad_norm": 0.833265483379364, |
| "learning_rate": 8.026666523550708e-05, |
| "loss": 4.336735248565674, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.7896678966789668, |
| "grad_norm": 0.9609919190406799, |
| "learning_rate": 7.973052757650058e-05, |
| "loss": 3.9808225631713867, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7904059040590405, |
| "grad_norm": 1.0244325399398804, |
| "learning_rate": 7.919595621950728e-05, |
| "loss": 4.093958854675293, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.7911439114391144, |
| "grad_norm": 0.7694634199142456, |
| "learning_rate": 7.866295426254735e-05, |
| "loss": 3.9361343383789062, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.7918819188191882, |
| "grad_norm": 0.8412328958511353, |
| "learning_rate": 7.813152479454516e-05, |
| "loss": 4.3025431632995605, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.792619926199262, |
| "grad_norm": 0.9007997512817383, |
| "learning_rate": 7.760167089531244e-05, |
| "loss": 4.1600799560546875, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.7933579335793358, |
| "grad_norm": 0.8552005887031555, |
| "learning_rate": 7.707339563552973e-05, |
| "loss": 3.9373395442962646, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7940959409594096, |
| "grad_norm": 0.9131635427474976, |
| "learning_rate": 7.654670207672905e-05, |
| "loss": 4.242855072021484, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.7948339483394834, |
| "grad_norm": 0.8459916710853577, |
| "learning_rate": 7.602159327127555e-05, |
| "loss": 4.222464084625244, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.7955719557195572, |
| "grad_norm": 0.9173424243927002, |
| "learning_rate": 7.549807226235051e-05, |
| "loss": 4.072568416595459, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.796309963099631, |
| "grad_norm": 0.9082213640213013, |
| "learning_rate": 7.497614208393341e-05, |
| "loss": 3.9589667320251465, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.7970479704797048, |
| "grad_norm": 0.8568102717399597, |
| "learning_rate": 7.44558057607843e-05, |
| "loss": 4.217202663421631, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7977859778597786, |
| "grad_norm": 0.9027300477027893, |
| "learning_rate": 7.393706630842592e-05, |
| "loss": 4.339812278747559, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.7985239852398524, |
| "grad_norm": 0.8236647844314575, |
| "learning_rate": 7.341992673312733e-05, |
| "loss": 3.9794492721557617, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.7992619926199263, |
| "grad_norm": 1.059795618057251, |
| "learning_rate": 7.290439003188531e-05, |
| "loss": 4.107804298400879, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0308328866958618, |
| "learning_rate": 7.239045919240731e-05, |
| "loss": 4.0905232429504395, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.8007380073800738, |
| "grad_norm": 0.9931803345680237, |
| "learning_rate": 7.187813719309466e-05, |
| "loss": 3.8613810539245605, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.8014760147601476, |
| "grad_norm": 0.9674167037010193, |
| "learning_rate": 7.136742700302469e-05, |
| "loss": 4.229313850402832, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.8022140221402214, |
| "grad_norm": 1.1589391231536865, |
| "learning_rate": 7.085833158193391e-05, |
| "loss": 4.372422695159912, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.8029520295202952, |
| "grad_norm": 1.1077316999435425, |
| "learning_rate": 7.035085388020041e-05, |
| "loss": 4.1049089431762695, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.803690036900369, |
| "grad_norm": 0.9430045485496521, |
| "learning_rate": 6.984499683882739e-05, |
| "loss": 4.282869338989258, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.8044280442804428, |
| "grad_norm": 1.254410982131958, |
| "learning_rate": 6.934076338942564e-05, |
| "loss": 3.9536659717559814, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8051660516605166, |
| "grad_norm": 0.8754069209098816, |
| "learning_rate": 6.883815645419675e-05, |
| "loss": 4.139862060546875, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.8059040590405904, |
| "grad_norm": 0.9515761733055115, |
| "learning_rate": 6.833717894591579e-05, |
| "loss": 4.331487655639648, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.8066420664206642, |
| "grad_norm": 1.1361658573150635, |
| "learning_rate": 6.783783376791533e-05, |
| "loss": 4.143629550933838, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.8073800738007381, |
| "grad_norm": 0.8871273398399353, |
| "learning_rate": 6.734012381406767e-05, |
| "loss": 4.211644172668457, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.8081180811808119, |
| "grad_norm": 0.8796087503433228, |
| "learning_rate": 6.684405196876843e-05, |
| "loss": 4.109099864959717, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.8088560885608856, |
| "grad_norm": 1.0282338857650757, |
| "learning_rate": 6.634962110691991e-05, |
| "loss": 3.9217135906219482, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.8095940959409594, |
| "grad_norm": 0.8852423429489136, |
| "learning_rate": 6.585683409391441e-05, |
| "loss": 3.826831579208374, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.8103321033210332, |
| "grad_norm": 1.1207947731018066, |
| "learning_rate": 6.536569378561766e-05, |
| "loss": 4.236572265625, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.811070110701107, |
| "grad_norm": 0.7631810307502747, |
| "learning_rate": 6.487620302835181e-05, |
| "loss": 4.135857582092285, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.8118081180811808, |
| "grad_norm": 1.0373399257659912, |
| "learning_rate": 6.438836465887968e-05, |
| "loss": 3.926546096801758, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8125461254612546, |
| "grad_norm": 0.8193474411964417, |
| "learning_rate": 6.390218150438787e-05, |
| "loss": 4.056336402893066, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.8132841328413284, |
| "grad_norm": 0.8076398968696594, |
| "learning_rate": 6.341765638247046e-05, |
| "loss": 4.038424968719482, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.8140221402214022, |
| "grad_norm": 0.9038758873939514, |
| "learning_rate": 6.29347921011124e-05, |
| "loss": 4.076757431030273, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.814760147601476, |
| "grad_norm": 1.0241302251815796, |
| "learning_rate": 6.245359145867404e-05, |
| "loss": 4.188800811767578, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.8154981549815498, |
| "grad_norm": 0.8670378923416138, |
| "learning_rate": 6.197405724387391e-05, |
| "loss": 3.7736902236938477, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.8162361623616237, |
| "grad_norm": 0.8043569922447205, |
| "learning_rate": 6.149619223577322e-05, |
| "loss": 4.0094099044799805, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.8169741697416975, |
| "grad_norm": 1.0722813606262207, |
| "learning_rate": 6.101999920375964e-05, |
| "loss": 4.505285263061523, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.8177121771217712, |
| "grad_norm": 0.8136195540428162, |
| "learning_rate": 6.054548090753103e-05, |
| "loss": 3.993842840194702, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.818450184501845, |
| "grad_norm": 0.8687028288841248, |
| "learning_rate": 6.0072640097079836e-05, |
| "loss": 4.127281188964844, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.8191881918819188, |
| "grad_norm": 0.879191517829895, |
| "learning_rate": 5.960147951267643e-05, |
| "loss": 4.027138710021973, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8199261992619926, |
| "grad_norm": 0.8649862408638, |
| "learning_rate": 5.913200188485442e-05, |
| "loss": 4.080497741699219, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.8206642066420664, |
| "grad_norm": 0.9337714314460754, |
| "learning_rate": 5.866420993439344e-05, |
| "loss": 4.245942115783691, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.8214022140221402, |
| "grad_norm": 0.8696949481964111, |
| "learning_rate": 5.81981063723045e-05, |
| "loss": 4.227627754211426, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.822140221402214, |
| "grad_norm": 0.9521300792694092, |
| "learning_rate": 5.773369389981347e-05, |
| "loss": 4.130904197692871, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.8228782287822878, |
| "grad_norm": 1.0789848566055298, |
| "learning_rate": 5.7270975208346306e-05, |
| "loss": 4.207403182983398, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.8236162361623616, |
| "grad_norm": 0.8551551103591919, |
| "learning_rate": 5.680995297951237e-05, |
| "loss": 4.2299041748046875, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.8243542435424355, |
| "grad_norm": 0.790813684463501, |
| "learning_rate": 5.635062988508984e-05, |
| "loss": 4.201531410217285, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.8250922509225093, |
| "grad_norm": 0.7844054698944092, |
| "learning_rate": 5.5893008587009665e-05, |
| "loss": 3.9883697032928467, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.825830258302583, |
| "grad_norm": 0.8120241165161133, |
| "learning_rate": 5.543709173734044e-05, |
| "loss": 3.9854788780212402, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.8265682656826568, |
| "grad_norm": 1.1635088920593262, |
| "learning_rate": 5.498288197827285e-05, |
| "loss": 3.948390007019043, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8273062730627306, |
| "grad_norm": 0.8426750898361206, |
| "learning_rate": 5.4530381942104213e-05, |
| "loss": 4.034334182739258, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.8280442804428044, |
| "grad_norm": 0.8258629441261292, |
| "learning_rate": 5.4079594251223894e-05, |
| "loss": 4.009230613708496, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.8287822878228782, |
| "grad_norm": 0.8874958157539368, |
| "learning_rate": 5.363052151809721e-05, |
| "loss": 3.9225668907165527, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.829520295202952, |
| "grad_norm": 0.9092878103256226, |
| "learning_rate": 5.318316634525092e-05, |
| "loss": 4.106935977935791, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.8302583025830258, |
| "grad_norm": 1.0611941814422607, |
| "learning_rate": 5.273753132525793e-05, |
| "loss": 4.086188793182373, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.8309963099630996, |
| "grad_norm": 0.9324346780776978, |
| "learning_rate": 5.229361904072231e-05, |
| "loss": 4.163631916046143, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.8317343173431734, |
| "grad_norm": 0.901092529296875, |
| "learning_rate": 5.1851432064264184e-05, |
| "loss": 3.8213887214660645, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.8324723247232473, |
| "grad_norm": 0.8883295655250549, |
| "learning_rate": 5.141097295850506e-05, |
| "loss": 4.020335674285889, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.8332103321033211, |
| "grad_norm": 0.8910163044929504, |
| "learning_rate": 5.0972244276052794e-05, |
| "loss": 3.904737949371338, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.8339483394833949, |
| "grad_norm": 0.9039924144744873, |
| "learning_rate": 5.053524855948689e-05, |
| "loss": 3.9267964363098145, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8346863468634687, |
| "grad_norm": 0.7226197123527527, |
| "learning_rate": 5.0099988341343834e-05, |
| "loss": 4.004914283752441, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.8354243542435424, |
| "grad_norm": 1.0319029092788696, |
| "learning_rate": 4.966646614410193e-05, |
| "loss": 3.922898769378662, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.8361623616236162, |
| "grad_norm": 0.8679114580154419, |
| "learning_rate": 4.92346844801677e-05, |
| "loss": 4.130770206451416, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.83690036900369, |
| "grad_norm": 1.1186548471450806, |
| "learning_rate": 4.8804645851860066e-05, |
| "loss": 4.051120758056641, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.8376383763837638, |
| "grad_norm": 0.9294642806053162, |
| "learning_rate": 4.8376352751396885e-05, |
| "loss": 4.042642593383789, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.8383763837638376, |
| "grad_norm": 0.9107891917228699, |
| "learning_rate": 4.794980766087991e-05, |
| "loss": 4.207566261291504, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.8391143911439114, |
| "grad_norm": 0.8764515519142151, |
| "learning_rate": 4.752501305228076e-05, |
| "loss": 3.926863670349121, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.8398523985239852, |
| "grad_norm": 0.7595376372337341, |
| "learning_rate": 4.7101971387426126e-05, |
| "loss": 4.053175926208496, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.8405904059040591, |
| "grad_norm": 1.015899419784546, |
| "learning_rate": 4.668068511798407e-05, |
| "loss": 4.323257923126221, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.8413284132841329, |
| "grad_norm": 0.7909930348396301, |
| "learning_rate": 4.62611566854495e-05, |
| "loss": 4.0056915283203125, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8420664206642067, |
| "grad_norm": 0.9827620387077332, |
| "learning_rate": 4.5843388521130024e-05, |
| "loss": 4.075970649719238, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.8428044280442805, |
| "grad_norm": 0.8181502223014832, |
| "learning_rate": 4.5427383046131974e-05, |
| "loss": 4.204850673675537, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.8435424354243543, |
| "grad_norm": 0.917636513710022, |
| "learning_rate": 4.5013142671346035e-05, |
| "loss": 4.204797744750977, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.844280442804428, |
| "grad_norm": 0.8521405458450317, |
| "learning_rate": 4.46006697974341e-05, |
| "loss": 3.8248231410980225, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.8450184501845018, |
| "grad_norm": 0.9880871176719666, |
| "learning_rate": 4.41899668148142e-05, |
| "loss": 4.135077476501465, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.8457564575645756, |
| "grad_norm": 0.7729653120040894, |
| "learning_rate": 4.3781036103647625e-05, |
| "loss": 4.0869975090026855, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.8464944649446494, |
| "grad_norm": 0.8864187598228455, |
| "learning_rate": 4.337388003382462e-05, |
| "loss": 3.949108600616455, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.8472324723247232, |
| "grad_norm": 0.7802934646606445, |
| "learning_rate": 4.296850096495096e-05, |
| "loss": 4.134548664093018, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.847970479704797, |
| "grad_norm": 1.1349821090698242, |
| "learning_rate": 4.2564901246333816e-05, |
| "loss": 3.8663113117218018, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.8487084870848709, |
| "grad_norm": 0.8297522068023682, |
| "learning_rate": 4.216308321696862e-05, |
| "loss": 4.069552421569824, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8494464944649447, |
| "grad_norm": 0.8287118077278137, |
| "learning_rate": 4.1763049205525295e-05, |
| "loss": 4.17302131652832, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.8501845018450185, |
| "grad_norm": 0.9213622212409973, |
| "learning_rate": 4.136480153033484e-05, |
| "loss": 3.975867748260498, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.8509225092250923, |
| "grad_norm": 0.937636137008667, |
| "learning_rate": 4.096834249937555e-05, |
| "loss": 4.308503150939941, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.8516605166051661, |
| "grad_norm": 0.894312858581543, |
| "learning_rate": 4.0573674410260384e-05, |
| "loss": 4.0722808837890625, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.8523985239852399, |
| "grad_norm": 0.8254060745239258, |
| "learning_rate": 4.0180799550222964e-05, |
| "loss": 4.237331390380859, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.8531365313653136, |
| "grad_norm": 0.9793713092803955, |
| "learning_rate": 3.9789720196104374e-05, |
| "loss": 3.960724115371704, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.8538745387453874, |
| "grad_norm": 0.9438844323158264, |
| "learning_rate": 3.940043861434043e-05, |
| "loss": 4.011446952819824, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.8546125461254612, |
| "grad_norm": 0.9776595234870911, |
| "learning_rate": 3.901295706094806e-05, |
| "loss": 4.202037334442139, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.855350553505535, |
| "grad_norm": 0.8546213507652283, |
| "learning_rate": 3.862727778151262e-05, |
| "loss": 4.176602363586426, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.8560885608856088, |
| "grad_norm": 0.9939232468605042, |
| "learning_rate": 3.8243403011174406e-05, |
| "loss": 4.394288063049316, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8568265682656827, |
| "grad_norm": 0.8461161851882935, |
| "learning_rate": 3.786133497461622e-05, |
| "loss": 4.105259895324707, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.8575645756457565, |
| "grad_norm": 0.8759531378746033, |
| "learning_rate": 3.748107588605018e-05, |
| "loss": 3.866830348968506, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.8583025830258303, |
| "grad_norm": 0.9277933239936829, |
| "learning_rate": 3.710262794920493e-05, |
| "loss": 4.112336158752441, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.8590405904059041, |
| "grad_norm": 0.8277254104614258, |
| "learning_rate": 3.672599335731272e-05, |
| "loss": 4.080126762390137, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.8597785977859779, |
| "grad_norm": 0.9238406419754028, |
| "learning_rate": 3.635117429309721e-05, |
| "loss": 3.9586308002471924, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.8605166051660517, |
| "grad_norm": 0.8009730577468872, |
| "learning_rate": 3.597817292876031e-05, |
| "loss": 4.31672477722168, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.8612546125461255, |
| "grad_norm": 0.9162194728851318, |
| "learning_rate": 3.560699142596952e-05, |
| "loss": 4.007983684539795, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.8619926199261992, |
| "grad_norm": 0.809906542301178, |
| "learning_rate": 3.523763193584591e-05, |
| "loss": 4.362383842468262, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.862730627306273, |
| "grad_norm": 1.0050405263900757, |
| "learning_rate": 3.487009659895132e-05, |
| "loss": 3.949605941772461, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.8634686346863468, |
| "grad_norm": 0.821631669998169, |
| "learning_rate": 3.4504387545276056e-05, |
| "loss": 4.222439765930176, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8642066420664206, |
| "grad_norm": 1.0776225328445435, |
| "learning_rate": 3.414050689422626e-05, |
| "loss": 4.083227157592773, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.8649446494464945, |
| "grad_norm": 0.9266446232795715, |
| "learning_rate": 3.3778456754612195e-05, |
| "loss": 3.8666300773620605, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.8656826568265683, |
| "grad_norm": 0.7478688359260559, |
| "learning_rate": 3.341823922463545e-05, |
| "loss": 3.9161956310272217, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.8664206642066421, |
| "grad_norm": 0.9101294875144958, |
| "learning_rate": 3.305985639187726e-05, |
| "loss": 4.048511505126953, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.8671586715867159, |
| "grad_norm": 0.9844819903373718, |
| "learning_rate": 3.270331033328581e-05, |
| "loss": 4.01615571975708, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.8678966789667897, |
| "grad_norm": 0.8775573968887329, |
| "learning_rate": 3.2348603115165085e-05, |
| "loss": 4.202104568481445, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.8686346863468635, |
| "grad_norm": 0.8407407999038696, |
| "learning_rate": 3.199573679316183e-05, |
| "loss": 4.121450424194336, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.8693726937269373, |
| "grad_norm": 0.9224722981452942, |
| "learning_rate": 3.164471341225457e-05, |
| "loss": 3.914332389831543, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.870110701107011, |
| "grad_norm": 0.8708797693252563, |
| "learning_rate": 3.1295535006741184e-05, |
| "loss": 3.9288840293884277, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.8708487084870848, |
| "grad_norm": 0.8619300127029419, |
| "learning_rate": 3.0948203600227365e-05, |
| "loss": 4.033664226531982, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8715867158671586, |
| "grad_norm": 1.0247814655303955, |
| "learning_rate": 3.060272120561491e-05, |
| "loss": 3.908498764038086, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.8723247232472324, |
| "grad_norm": 0.8571381568908691, |
| "learning_rate": 3.0259089825089657e-05, |
| "loss": 3.937492847442627, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.8730627306273063, |
| "grad_norm": 0.8049359917640686, |
| "learning_rate": 2.9917311450110688e-05, |
| "loss": 4.154782295227051, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.8738007380073801, |
| "grad_norm": 0.8404570817947388, |
| "learning_rate": 2.9577388061397813e-05, |
| "loss": 3.8062617778778076, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.8745387453874539, |
| "grad_norm": 0.8313830494880676, |
| "learning_rate": 2.92393216289209e-05, |
| "loss": 4.034999847412109, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8752767527675277, |
| "grad_norm": 0.8629732131958008, |
| "learning_rate": 2.8903114111887997e-05, |
| "loss": 3.9214658737182617, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.8760147601476015, |
| "grad_norm": 0.788813054561615, |
| "learning_rate": 2.8568767458734206e-05, |
| "loss": 4.004258155822754, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.8767527675276753, |
| "grad_norm": 1.0045006275177002, |
| "learning_rate": 2.8236283607110122e-05, |
| "loss": 4.084541320800781, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.8774907749077491, |
| "grad_norm": 0.9397458434104919, |
| "learning_rate": 2.7905664483871018e-05, |
| "loss": 4.225802421569824, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.8782287822878229, |
| "grad_norm": 0.9364494681358337, |
| "learning_rate": 2.757691200506522e-05, |
| "loss": 4.048999786376953, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8789667896678967, |
| "grad_norm": 0.9341748952865601, |
| "learning_rate": 2.7250028075923393e-05, |
| "loss": 4.081840515136719, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.8797047970479704, |
| "grad_norm": 1.1266894340515137, |
| "learning_rate": 2.6925014590847357e-05, |
| "loss": 4.127097129821777, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.8804428044280442, |
| "grad_norm": 1.646851658821106, |
| "learning_rate": 2.660187343339872e-05, |
| "loss": 3.9492740631103516, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.8811808118081181, |
| "grad_norm": 0.8900485038757324, |
| "learning_rate": 2.628060647628891e-05, |
| "loss": 4.004465103149414, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.8819188191881919, |
| "grad_norm": 1.2839107513427734, |
| "learning_rate": 2.596121558136723e-05, |
| "loss": 3.9589195251464844, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8826568265682657, |
| "grad_norm": 0.9112879037857056, |
| "learning_rate": 2.564370259961085e-05, |
| "loss": 3.956997871398926, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.8833948339483395, |
| "grad_norm": 0.9632598161697388, |
| "learning_rate": 2.532806937111368e-05, |
| "loss": 4.068366050720215, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.8841328413284133, |
| "grad_norm": 1.0053609609603882, |
| "learning_rate": 2.5014317725075963e-05, |
| "loss": 4.128815650939941, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.8848708487084871, |
| "grad_norm": 0.9752780199050903, |
| "learning_rate": 2.470244947979335e-05, |
| "loss": 4.243814468383789, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.8856088560885609, |
| "grad_norm": 0.8026096820831299, |
| "learning_rate": 2.439246644264672e-05, |
| "loss": 3.8507800102233887, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8863468634686347, |
| "grad_norm": 1.0604981184005737, |
| "learning_rate": 2.4084370410091432e-05, |
| "loss": 4.058777332305908, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.8870848708487085, |
| "grad_norm": 0.9318236708641052, |
| "learning_rate": 2.377816316764712e-05, |
| "loss": 3.807260751724243, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.8878228782287823, |
| "grad_norm": 0.902949333190918, |
| "learning_rate": 2.347384648988722e-05, |
| "loss": 4.102638244628906, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.888560885608856, |
| "grad_norm": 0.8091539144515991, |
| "learning_rate": 2.317142214042854e-05, |
| "loss": 4.0851216316223145, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.8892988929889298, |
| "grad_norm": 0.9384903907775879, |
| "learning_rate": 2.28708918719216e-05, |
| "loss": 3.8504605293273926, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.8900369003690037, |
| "grad_norm": 1.5000700950622559, |
| "learning_rate": 2.2572257426039673e-05, |
| "loss": 3.7018003463745117, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.8907749077490775, |
| "grad_norm": 0.853367269039154, |
| "learning_rate": 2.2275520533469324e-05, |
| "loss": 4.134353160858154, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.8915129151291513, |
| "grad_norm": 0.9626766443252563, |
| "learning_rate": 2.1980682913900136e-05, |
| "loss": 3.798292636871338, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.8922509225092251, |
| "grad_norm": 1.0532819032669067, |
| "learning_rate": 2.1687746276014825e-05, |
| "loss": 3.908432960510254, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.8929889298892989, |
| "grad_norm": 0.9355469346046448, |
| "learning_rate": 2.1396712317479066e-05, |
| "loss": 3.97414493560791, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8937269372693727, |
| "grad_norm": 0.9721041917800903, |
| "learning_rate": 2.110758272493209e-05, |
| "loss": 4.170253753662109, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.8944649446494465, |
| "grad_norm": 1.4950439929962158, |
| "learning_rate": 2.082035917397661e-05, |
| "loss": 3.9789202213287354, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.8952029520295203, |
| "grad_norm": 0.7959940433502197, |
| "learning_rate": 2.05350433291692e-05, |
| "loss": 3.8894667625427246, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.8959409594095941, |
| "grad_norm": 0.885735034942627, |
| "learning_rate": 2.0251636844010645e-05, |
| "loss": 4.206930637359619, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.8966789667896679, |
| "grad_norm": 0.8489437103271484, |
| "learning_rate": 1.997014136093635e-05, |
| "loss": 4.1217241287231445, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.8974169741697416, |
| "grad_norm": 0.9283955693244934, |
| "learning_rate": 1.9690558511306816e-05, |
| "loss": 4.022772789001465, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.8981549815498155, |
| "grad_norm": 1.0239266157150269, |
| "learning_rate": 1.9412889915398164e-05, |
| "loss": 3.9056153297424316, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.8988929889298893, |
| "grad_norm": 0.8964755535125732, |
| "learning_rate": 1.91371371823928e-05, |
| "loss": 4.042991638183594, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.8996309963099631, |
| "grad_norm": 0.8521272540092468, |
| "learning_rate": 1.88633019103701e-05, |
| "loss": 4.2974958419799805, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.9003690036900369, |
| "grad_norm": 0.8455408215522766, |
| "learning_rate": 1.859138568629708e-05, |
| "loss": 4.03305721282959, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9011070110701107, |
| "grad_norm": 0.9025142788887024, |
| "learning_rate": 1.832139008601918e-05, |
| "loss": 4.064189434051514, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.9018450184501845, |
| "grad_norm": 0.789932131767273, |
| "learning_rate": 1.8053316674251256e-05, |
| "loss": 3.8885226249694824, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.9025830258302583, |
| "grad_norm": 0.8709638118743896, |
| "learning_rate": 1.7787167004568416e-05, |
| "loss": 4.0818986892700195, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.9033210332103321, |
| "grad_norm": 1.0169392824172974, |
| "learning_rate": 1.75229426193971e-05, |
| "loss": 4.12254524230957, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.9040590405904059, |
| "grad_norm": 0.9385191798210144, |
| "learning_rate": 1.7260645050005903e-05, |
| "loss": 3.894554853439331, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.9047970479704797, |
| "grad_norm": 0.87216717004776, |
| "learning_rate": 1.7000275816497063e-05, |
| "loss": 4.0138773918151855, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.9055350553505535, |
| "grad_norm": 0.8227195143699646, |
| "learning_rate": 1.6741836427797447e-05, |
| "loss": 3.842376708984375, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.9062730627306274, |
| "grad_norm": 0.9171870350837708, |
| "learning_rate": 1.6485328381649667e-05, |
| "loss": 4.184534072875977, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.9070110701107011, |
| "grad_norm": 0.8216118216514587, |
| "learning_rate": 1.6230753164603735e-05, |
| "loss": 3.9486520290374756, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.9077490774907749, |
| "grad_norm": 0.8233117461204529, |
| "learning_rate": 1.597811225200816e-05, |
| "loss": 4.167961597442627, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9084870848708487, |
| "grad_norm": 0.9360010623931885, |
| "learning_rate": 1.5727407108001634e-05, |
| "loss": 4.118611812591553, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.9092250922509225, |
| "grad_norm": 0.8383175730705261, |
| "learning_rate": 1.5478639185504255e-05, |
| "loss": 4.2346062660217285, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.9099630996309963, |
| "grad_norm": 0.7830789685249329, |
| "learning_rate": 1.52318099262094e-05, |
| "loss": 4.022680759429932, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.9107011070110701, |
| "grad_norm": 0.8860730528831482, |
| "learning_rate": 1.4986920760575173e-05, |
| "loss": 3.8851001262664795, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.9114391143911439, |
| "grad_norm": 1.0096216201782227, |
| "learning_rate": 1.4743973107816294e-05, |
| "loss": 4.16072940826416, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.9121771217712177, |
| "grad_norm": 0.8702698945999146, |
| "learning_rate": 1.4502968375895542e-05, |
| "loss": 4.074400901794434, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.9129151291512915, |
| "grad_norm": 1.0048964023590088, |
| "learning_rate": 1.4263907961516103e-05, |
| "loss": 4.206517219543457, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.9136531365313653, |
| "grad_norm": 1.0056480169296265, |
| "learning_rate": 1.40267932501131e-05, |
| "loss": 4.110833644866943, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.9143911439114392, |
| "grad_norm": 0.9925635457038879, |
| "learning_rate": 1.379162561584547e-05, |
| "loss": 3.903393507003784, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.915129151291513, |
| "grad_norm": 1.1309577226638794, |
| "learning_rate": 1.3558406421588386e-05, |
| "loss": 4.20203971862793, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9158671586715867, |
| "grad_norm": 0.9794964790344238, |
| "learning_rate": 1.332713701892514e-05, |
| "loss": 4.138725280761719, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.9166051660516605, |
| "grad_norm": 0.9882869720458984, |
| "learning_rate": 1.3097818748139284e-05, |
| "loss": 3.934995174407959, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.9173431734317343, |
| "grad_norm": 0.9639918804168701, |
| "learning_rate": 1.2870452938206834e-05, |
| "loss": 3.992349147796631, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.9180811808118081, |
| "grad_norm": 0.761677086353302, |
| "learning_rate": 1.2645040906788873e-05, |
| "loss": 4.091512680053711, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.9188191881918819, |
| "grad_norm": 0.8653919100761414, |
| "learning_rate": 1.2421583960223403e-05, |
| "loss": 4.175684452056885, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.9195571955719557, |
| "grad_norm": 0.8463162779808044, |
| "learning_rate": 1.22000833935183e-05, |
| "loss": 3.7140493392944336, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.9202952029520295, |
| "grad_norm": 1.5709336996078491, |
| "learning_rate": 1.1980540490343322e-05, |
| "loss": 4.196260452270508, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.9210332103321033, |
| "grad_norm": 1.0555191040039062, |
| "learning_rate": 1.1762956523023177e-05, |
| "loss": 4.01348876953125, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.9217712177121771, |
| "grad_norm": 0.8740063309669495, |
| "learning_rate": 1.1547332752529649e-05, |
| "loss": 4.2362470626831055, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.922509225092251, |
| "grad_norm": 0.7975241541862488, |
| "learning_rate": 1.1333670428474634e-05, |
| "loss": 3.9546077251434326, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9232472324723248, |
| "grad_norm": 0.9999665021896362, |
| "learning_rate": 1.1121970789102842e-05, |
| "loss": 4.26607608795166, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.9239852398523986, |
| "grad_norm": 0.8974668979644775, |
| "learning_rate": 1.0912235061284481e-05, |
| "loss": 3.8792271614074707, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.9247232472324723, |
| "grad_norm": 0.9566179513931274, |
| "learning_rate": 1.0704464460508312e-05, |
| "loss": 3.9453883171081543, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.9254612546125461, |
| "grad_norm": 0.8908551335334778, |
| "learning_rate": 1.0498660190874298e-05, |
| "loss": 4.036525726318359, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.9261992619926199, |
| "grad_norm": 0.8145546317100525, |
| "learning_rate": 1.0294823445087275e-05, |
| "loss": 4.188867568969727, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.9269372693726937, |
| "grad_norm": 0.8675177097320557, |
| "learning_rate": 1.0092955404449255e-05, |
| "loss": 4.099850654602051, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.9276752767527675, |
| "grad_norm": 0.8431053757667542, |
| "learning_rate": 9.893057238853053e-06, |
| "loss": 4.123414039611816, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.9284132841328413, |
| "grad_norm": 0.8683810830116272, |
| "learning_rate": 9.69513010677545e-06, |
| "loss": 4.246320724487305, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.9291512915129151, |
| "grad_norm": 0.8188611268997192, |
| "learning_rate": 9.499175155270433e-06, |
| "loss": 4.140353202819824, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.9298892988929889, |
| "grad_norm": 0.9163283705711365, |
| "learning_rate": 9.30519351996243e-06, |
| "loss": 4.289680480957031, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9306273062730628, |
| "grad_norm": 0.9404510855674744, |
| "learning_rate": 9.113186325039935e-06, |
| "loss": 3.925518035888672, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.9313653136531366, |
| "grad_norm": 1.0115693807601929, |
| "learning_rate": 8.923154683248873e-06, |
| "loss": 4.255781173706055, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.9321033210332104, |
| "grad_norm": 0.9950158596038818, |
| "learning_rate": 8.735099695886261e-06, |
| "loss": 4.2205657958984375, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.9328413284132842, |
| "grad_norm": 0.886117160320282, |
| "learning_rate": 8.549022452793597e-06, |
| "loss": 4.172645568847656, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.933579335793358, |
| "grad_norm": 0.8960427045822144, |
| "learning_rate": 8.364924032350728e-06, |
| "loss": 4.060420513153076, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.9343173431734317, |
| "grad_norm": 0.9799672961235046, |
| "learning_rate": 8.18280550146967e-06, |
| "loss": 4.113704681396484, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.9350553505535055, |
| "grad_norm": 1.0600509643554688, |
| "learning_rate": 8.002667915588191e-06, |
| "loss": 4.008590221405029, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.9357933579335793, |
| "grad_norm": 1.0815836191177368, |
| "learning_rate": 7.824512318663873e-06, |
| "loss": 3.9697742462158203, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.9365313653136531, |
| "grad_norm": 0.8676935434341431, |
| "learning_rate": 7.648339743168008e-06, |
| "loss": 3.9918062686920166, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.9372693726937269, |
| "grad_norm": 0.8141337633132935, |
| "learning_rate": 7.474151210079654e-06, |
| "loss": 4.000406742095947, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9380073800738007, |
| "grad_norm": 0.9304051995277405, |
| "learning_rate": 7.301947728879571e-06, |
| "loss": 4.1023969650268555, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.9387453874538746, |
| "grad_norm": 0.8569762110710144, |
| "learning_rate": 7.131730297544547e-06, |
| "loss": 3.9308419227600098, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.9394833948339484, |
| "grad_norm": 0.9410969018936157, |
| "learning_rate": 6.963499902541575e-06, |
| "loss": 4.188969612121582, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.9402214022140222, |
| "grad_norm": 1.1418845653533936, |
| "learning_rate": 6.7972575188220975e-06, |
| "loss": 3.789651870727539, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.940959409594096, |
| "grad_norm": 0.8757267594337463, |
| "learning_rate": 6.633004109816293e-06, |
| "loss": 4.139651298522949, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.9416974169741698, |
| "grad_norm": 0.794495165348053, |
| "learning_rate": 6.4707406274276015e-06, |
| "loss": 4.01513671875, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.9424354243542435, |
| "grad_norm": 0.8835275769233704, |
| "learning_rate": 6.310468012027321e-06, |
| "loss": 4.235984802246094, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.9431734317343173, |
| "grad_norm": 0.8845841884613037, |
| "learning_rate": 6.152187192448738e-06, |
| "loss": 4.170893669128418, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.9439114391143911, |
| "grad_norm": 0.8521038293838501, |
| "learning_rate": 5.995899085982198e-06, |
| "loss": 4.143123626708984, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.9446494464944649, |
| "grad_norm": 0.8681446313858032, |
| "learning_rate": 5.841604598369543e-06, |
| "loss": 3.9806013107299805, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9453874538745387, |
| "grad_norm": 0.8543822765350342, |
| "learning_rate": 5.689304623799063e-06, |
| "loss": 4.092264175415039, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.9461254612546125, |
| "grad_norm": 0.8498107194900513, |
| "learning_rate": 5.5390000448999e-06, |
| "loss": 3.961348056793213, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.9468634686346864, |
| "grad_norm": 0.9987668991088867, |
| "learning_rate": 5.390691732737501e-06, |
| "loss": 3.853841781616211, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.9476014760147602, |
| "grad_norm": 0.8435112833976746, |
| "learning_rate": 5.244380546808064e-06, |
| "loss": 4.074121475219727, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.948339483394834, |
| "grad_norm": 0.9243870377540588, |
| "learning_rate": 5.100067335033909e-06, |
| "loss": 3.9349491596221924, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.9490774907749078, |
| "grad_norm": 0.9198288917541504, |
| "learning_rate": 4.957752933758391e-06, |
| "loss": 4.085498332977295, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.9498154981549816, |
| "grad_norm": 1.0337499380111694, |
| "learning_rate": 4.817438167741045e-06, |
| "loss": 4.015393257141113, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.9505535055350554, |
| "grad_norm": 0.9356955289840698, |
| "learning_rate": 4.679123850152955e-06, |
| "loss": 4.079366683959961, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.9512915129151291, |
| "grad_norm": 0.8707476854324341, |
| "learning_rate": 4.542810782571749e-06, |
| "loss": 3.9717764854431152, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.9520295202952029, |
| "grad_norm": 0.8522350788116455, |
| "learning_rate": 4.4084997549773184e-06, |
| "loss": 3.9818825721740723, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9527675276752767, |
| "grad_norm": 0.9296215176582336, |
| "learning_rate": 4.276191545747004e-06, |
| "loss": 4.0599365234375, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.9535055350553505, |
| "grad_norm": 0.9785073399543762, |
| "learning_rate": 4.145886921651165e-06, |
| "loss": 3.9496049880981445, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.9542435424354243, |
| "grad_norm": 1.0265014171600342, |
| "learning_rate": 4.017586637848669e-06, |
| "loss": 3.9062700271606445, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.9549815498154982, |
| "grad_norm": 0.8512267470359802, |
| "learning_rate": 3.891291437882544e-06, |
| "loss": 3.8862087726593018, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.955719557195572, |
| "grad_norm": 0.9458624124526978, |
| "learning_rate": 3.7670020536757775e-06, |
| "loss": 4.076284408569336, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.9564575645756458, |
| "grad_norm": 1.0091397762298584, |
| "learning_rate": 3.6447192055269694e-06, |
| "loss": 4.171298503875732, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.9571955719557196, |
| "grad_norm": 1.085514783859253, |
| "learning_rate": 3.5244436021060143e-06, |
| "loss": 4.2273736000061035, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.9579335793357934, |
| "grad_norm": 0.9626381397247314, |
| "learning_rate": 3.4061759404503734e-06, |
| "loss": 3.9600830078125, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.9586715867158672, |
| "grad_norm": 0.892805814743042, |
| "learning_rate": 3.2899169059607216e-06, |
| "loss": 4.168842315673828, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.959409594095941, |
| "grad_norm": 1.0419422388076782, |
| "learning_rate": 3.1756671723969843e-06, |
| "loss": 4.045411109924316, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9601476014760147, |
| "grad_norm": 0.9553176760673523, |
| "learning_rate": 3.0634274018746466e-06, |
| "loss": 4.090331077575684, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.9608856088560885, |
| "grad_norm": 0.7899879813194275, |
| "learning_rate": 2.9531982448607108e-06, |
| "loss": 4.1696577072143555, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.9616236162361623, |
| "grad_norm": 0.8720894455909729, |
| "learning_rate": 2.8449803401700445e-06, |
| "loss": 4.093484878540039, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.9623616236162361, |
| "grad_norm": 0.93953937292099, |
| "learning_rate": 2.738774314961534e-06, |
| "loss": 4.138238430023193, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.9630996309963099, |
| "grad_norm": 0.8301063179969788, |
| "learning_rate": 2.6345807847347413e-06, |
| "loss": 4.250385761260986, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.9638376383763838, |
| "grad_norm": 0.9756575226783752, |
| "learning_rate": 2.532400353325903e-06, |
| "loss": 4.020941734313965, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.9645756457564576, |
| "grad_norm": 0.955294132232666, |
| "learning_rate": 2.4322336129049384e-06, |
| "loss": 4.259998321533203, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.9653136531365314, |
| "grad_norm": 0.8463285565376282, |
| "learning_rate": 2.3340811439715223e-06, |
| "loss": 4.142585754394531, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.9660516605166052, |
| "grad_norm": 1.4179046154022217, |
| "learning_rate": 2.237943515352098e-06, |
| "loss": 3.9881856441497803, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.966789667896679, |
| "grad_norm": 0.9249223470687866, |
| "learning_rate": 2.1438212841963734e-06, |
| "loss": 4.063835144042969, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9675276752767528, |
| "grad_norm": 0.8510631322860718, |
| "learning_rate": 2.051714995974141e-06, |
| "loss": 3.82594895362854, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.9682656826568266, |
| "grad_norm": 1.0183271169662476, |
| "learning_rate": 1.9616251844722042e-06, |
| "loss": 4.1191205978393555, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.9690036900369003, |
| "grad_norm": 0.9534389972686768, |
| "learning_rate": 1.873552371791115e-06, |
| "loss": 4.130201816558838, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.9697416974169741, |
| "grad_norm": 0.9956035017967224, |
| "learning_rate": 1.7874970683423364e-06, |
| "loss": 3.9721202850341797, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.9704797047970479, |
| "grad_norm": 0.8220584988594055, |
| "learning_rate": 1.703459772845095e-06, |
| "loss": 4.076860427856445, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.9712177121771217, |
| "grad_norm": 0.9613221287727356, |
| "learning_rate": 1.6214409723236623e-06, |
| "loss": 3.937884569168091, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.9719557195571956, |
| "grad_norm": 0.8680334687232971, |
| "learning_rate": 1.5414411421044382e-06, |
| "loss": 4.201882362365723, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.9726937269372694, |
| "grad_norm": 0.759444534778595, |
| "learning_rate": 1.4634607458131555e-06, |
| "loss": 4.007597923278809, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.9734317343173432, |
| "grad_norm": 1.0436582565307617, |
| "learning_rate": 1.387500235372352e-06, |
| "loss": 4.142274379730225, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.974169741697417, |
| "grad_norm": 0.9300816059112549, |
| "learning_rate": 1.3135600509985745e-06, |
| "loss": 4.145612716674805, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9749077490774908, |
| "grad_norm": 0.9446290731430054, |
| "learning_rate": 1.2416406211999298e-06, |
| "loss": 4.090052604675293, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.9756457564575646, |
| "grad_norm": 0.8639572262763977, |
| "learning_rate": 1.171742362773559e-06, |
| "loss": 3.974188804626465, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.9763837638376384, |
| "grad_norm": 0.8869412541389465, |
| "learning_rate": 1.1038656808032675e-06, |
| "loss": 3.864497184753418, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.9771217712177122, |
| "grad_norm": 1.0420503616333008, |
| "learning_rate": 1.0380109686571549e-06, |
| "loss": 4.054100036621094, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.977859778597786, |
| "grad_norm": 0.8223108053207397, |
| "learning_rate": 9.74178607985282e-07, |
| "loss": 3.966116428375244, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9785977859778597, |
| "grad_norm": 0.9738601446151733, |
| "learning_rate": 9.123689687175751e-07, |
| "loss": 4.062865257263184, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.9793357933579335, |
| "grad_norm": 0.9468348026275635, |
| "learning_rate": 8.525824090615308e-07, |
| "loss": 3.845522165298462, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.9800738007380074, |
| "grad_norm": 0.9388923048973083, |
| "learning_rate": 7.948192755002747e-07, |
| "loss": 4.0565595626831055, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.9808118081180812, |
| "grad_norm": 0.8884272575378418, |
| "learning_rate": 7.390799027904627e-07, |
| "loss": 3.9518604278564453, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.981549815498155, |
| "grad_norm": 0.8875818252563477, |
| "learning_rate": 6.85364613960493e-07, |
| "loss": 4.188823223114014, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9822878228782288, |
| "grad_norm": 0.7383257150650024, |
| "learning_rate": 6.336737203083698e-07, |
| "loss": 4.029225826263428, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.9830258302583026, |
| "grad_norm": 0.8472557663917542, |
| "learning_rate": 5.840075214001095e-07, |
| "loss": 4.239529609680176, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.9837638376383764, |
| "grad_norm": 0.8474605679512024, |
| "learning_rate": 5.363663050679535e-07, |
| "loss": 4.032186508178711, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.9845018450184502, |
| "grad_norm": 1.059869647026062, |
| "learning_rate": 4.90750347408736e-07, |
| "loss": 4.1005859375, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.985239852398524, |
| "grad_norm": 0.8023120760917664, |
| "learning_rate": 4.4715991278213576e-07, |
| "loss": 3.901467800140381, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.9859778597785978, |
| "grad_norm": 0.8805201053619385, |
| "learning_rate": 4.0559525380935435e-07, |
| "loss": 3.9754929542541504, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.9867158671586715, |
| "grad_norm": 0.844008207321167, |
| "learning_rate": 3.660566113714847e-07, |
| "loss": 4.112626552581787, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.9874538745387453, |
| "grad_norm": 0.8604761958122253, |
| "learning_rate": 3.2854421460815075e-07, |
| "loss": 3.6430814266204834, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.9881918819188192, |
| "grad_norm": 0.8547300100326538, |
| "learning_rate": 2.930582809162641e-07, |
| "loss": 4.121878623962402, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.988929889298893, |
| "grad_norm": 0.9670364260673523, |
| "learning_rate": 2.5959901594870273e-07, |
| "loss": 3.9410769939422607, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9896678966789668, |
| "grad_norm": 1.0530565977096558, |
| "learning_rate": 2.281666136130678e-07, |
| "loss": 3.989541530609131, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.9904059040590406, |
| "grad_norm": 1.0222362279891968, |
| "learning_rate": 1.9876125607067309e-07, |
| "loss": 4.04118537902832, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.9911439114391144, |
| "grad_norm": 0.7860491275787354, |
| "learning_rate": 1.713831137353794e-07, |
| "loss": 3.8432321548461914, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.9918819188191882, |
| "grad_norm": 0.8329381942749023, |
| "learning_rate": 1.460323452727008e-07, |
| "loss": 3.9724719524383545, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.992619926199262, |
| "grad_norm": 0.8542125225067139, |
| "learning_rate": 1.2270909759879432e-07, |
| "loss": 4.1086506843566895, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.9933579335793358, |
| "grad_norm": 0.9141987562179565, |
| "learning_rate": 1.0141350587972164e-07, |
| "loss": 3.7987635135650635, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.9940959409594096, |
| "grad_norm": 0.8679295778274536, |
| "learning_rate": 8.214569353055534e-08, |
| "loss": 4.131080627441406, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.9948339483394834, |
| "grad_norm": 0.9177278876304626, |
| "learning_rate": 6.490577221467953e-08, |
| "loss": 4.434652328491211, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.9955719557195571, |
| "grad_norm": 0.8580910563468933, |
| "learning_rate": 4.9693841843245764e-08, |
| "loss": 4.102374076843262, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.996309963099631, |
| "grad_norm": 0.9112648367881775, |
| "learning_rate": 3.6509990574473684e-08, |
| "loss": 4.2107343673706055, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9970479704797048, |
| "grad_norm": 1.0079255104064941, |
| "learning_rate": 2.535429481318463e-08, |
| "loss": 3.846949577331543, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.9977859778597786, |
| "grad_norm": 0.7972182035446167, |
| "learning_rate": 1.622681921033542e-08, |
| "loss": 3.9979095458984375, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.9985239852398524, |
| "grad_norm": 0.8567417860031128, |
| "learning_rate": 9.127616662746307e-09, |
| "loss": 3.994305372238159, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.9992619926199262, |
| "grad_norm": 0.8455564379692078, |
| "learning_rate": 4.0567283126347055e-09, |
| "loss": 4.195075988769531, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7844815254211426, |
| "learning_rate": 1.0141835475374616e-09, |
| "loss": 4.3078813552856445, |
| "step": 1355 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1355, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.572106671245492e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|