| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.9949212136997, | |
| "eval_steps": 500, | |
| "global_step": 4795, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010418023180101576, | |
| "grad_norm": 11.121468544006348, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "loss": 1.0155, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.020836046360203152, | |
| "grad_norm": 10.899005889892578, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 1.0247, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03125406954030473, | |
| "grad_norm": 11.787379264831543, | |
| "learning_rate": 6.25e-07, | |
| "loss": 0.9569, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.041672092720406305, | |
| "grad_norm": 10.255136489868164, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.9531, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.052090115900507876, | |
| "grad_norm": 11.36585807800293, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "loss": 0.913, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06250813908060945, | |
| "grad_norm": 8.4786958694458, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.9508, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07292616226071102, | |
| "grad_norm": 13.468831062316895, | |
| "learning_rate": 1.4583333333333335e-06, | |
| "loss": 0.9788, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08334418544081261, | |
| "grad_norm": 13.184584617614746, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.9664, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09376220862091418, | |
| "grad_norm": 8.695037841796875, | |
| "learning_rate": 1.8750000000000003e-06, | |
| "loss": 0.9333, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10418023180101575, | |
| "grad_norm": 10.278554916381836, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 0.8748, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11459825498111734, | |
| "grad_norm": 10.934048652648926, | |
| "learning_rate": 2.2916666666666666e-06, | |
| "loss": 0.8221, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1250162781612189, | |
| "grad_norm": 6.949201583862305, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.7869, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13543430134132048, | |
| "grad_norm": 9.884735107421875, | |
| "learning_rate": 2.7083333333333334e-06, | |
| "loss": 0.7208, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14585232452142205, | |
| "grad_norm": 8.795683860778809, | |
| "learning_rate": 2.916666666666667e-06, | |
| "loss": 0.7022, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15627034770152365, | |
| "grad_norm": 7.725055694580078, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.6499, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16668837088162522, | |
| "grad_norm": 5.934634208679199, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5223, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1771063940617268, | |
| "grad_norm": 5.791957378387451, | |
| "learning_rate": 3.5416666666666673e-06, | |
| "loss": 0.426, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18752441724182836, | |
| "grad_norm": 4.230670928955078, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.2739, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19794244042192993, | |
| "grad_norm": 3.5117030143737793, | |
| "learning_rate": 3.958333333333333e-06, | |
| "loss": 0.2038, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2083604636020315, | |
| "grad_norm": 1.7673850059509277, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.1047, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2187784867821331, | |
| "grad_norm": 2.710550308227539, | |
| "learning_rate": 4.3750000000000005e-06, | |
| "loss": 0.0695, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22919650996223467, | |
| "grad_norm": 1.8703516721725464, | |
| "learning_rate": 4.583333333333333e-06, | |
| "loss": 0.0448, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23961453314233624, | |
| "grad_norm": 2.233571767807007, | |
| "learning_rate": 4.791666666666668e-06, | |
| "loss": 0.0482, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2500325563224378, | |
| "grad_norm": 2.937638282775879, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0464, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2604505795025394, | |
| "grad_norm": 3.5171966552734375, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 0.026, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27086860268264096, | |
| "grad_norm": 3.3302793502807617, | |
| "learning_rate": 5.416666666666667e-06, | |
| "loss": 0.0131, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28128662586274256, | |
| "grad_norm": 4.264321804046631, | |
| "learning_rate": 5.625e-06, | |
| "loss": 0.0267, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2917046490428441, | |
| "grad_norm": 1.3102387189865112, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 0.0192, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3021226722229457, | |
| "grad_norm": 2.629667043685913, | |
| "learning_rate": 6.041666666666667e-06, | |
| "loss": 0.0221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3125406954030473, | |
| "grad_norm": 5.036803245544434, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.043, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32295871858314884, | |
| "grad_norm": 1.3269509077072144, | |
| "learning_rate": 6.458333333333334e-06, | |
| "loss": 0.049, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.33337674176325044, | |
| "grad_norm": 3.0598790645599365, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0131, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.343794764943352, | |
| "grad_norm": 0.758718729019165, | |
| "learning_rate": 6.875e-06, | |
| "loss": 0.0244, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3542127881234536, | |
| "grad_norm": 1.2521103620529175, | |
| "learning_rate": 7.083333333333335e-06, | |
| "loss": 0.0149, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3646308113035551, | |
| "grad_norm": 0.35801389813423157, | |
| "learning_rate": 7.291666666666667e-06, | |
| "loss": 0.0086, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3750488344836567, | |
| "grad_norm": 1.5886716842651367, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.0158, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3854668576637583, | |
| "grad_norm": 4.816068649291992, | |
| "learning_rate": 7.708333333333334e-06, | |
| "loss": 0.0331, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.39588488084385987, | |
| "grad_norm": 1.0439949035644531, | |
| "learning_rate": 7.916666666666667e-06, | |
| "loss": 0.0109, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.40630290402396146, | |
| "grad_norm": 0.6839566826820374, | |
| "learning_rate": 8.125000000000001e-06, | |
| "loss": 0.0116, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.416720927204063, | |
| "grad_norm": 2.3680825233459473, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0086, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4271389503841646, | |
| "grad_norm": 2.888700485229492, | |
| "learning_rate": 8.541666666666666e-06, | |
| "loss": 0.0257, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4375569735642662, | |
| "grad_norm": 1.5104542970657349, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 0.0211, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44797499674436775, | |
| "grad_norm": 3.3087830543518066, | |
| "learning_rate": 8.958333333333334e-06, | |
| "loss": 0.0195, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.45839301992446935, | |
| "grad_norm": 0.7269104719161987, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.0042, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4688110431045709, | |
| "grad_norm": 0.03662832826375961, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.0225, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4792290662846725, | |
| "grad_norm": 0.027355490252375603, | |
| "learning_rate": 9.583333333333335e-06, | |
| "loss": 0.0207, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.48964708946477403, | |
| "grad_norm": 1.296669602394104, | |
| "learning_rate": 9.791666666666666e-06, | |
| "loss": 0.0184, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5000651126448756, | |
| "grad_norm": 0.7718325853347778, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0082, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5104831358249772, | |
| "grad_norm": 0.6721673607826233, | |
| "learning_rate": 9.999867481584167e-06, | |
| "loss": 0.0201, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5209011590050788, | |
| "grad_norm": 2.800171136856079, | |
| "learning_rate": 9.99946993336112e-06, | |
| "loss": 0.0118, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5313191821851804, | |
| "grad_norm": 0.13001485168933868, | |
| "learning_rate": 9.998807376403843e-06, | |
| "loss": 0.0028, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5417372053652819, | |
| "grad_norm": 0.11868428438901901, | |
| "learning_rate": 9.997879845832736e-06, | |
| "loss": 0.0248, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5521552285453835, | |
| "grad_norm": 0.1077343001961708, | |
| "learning_rate": 9.996687390813751e-06, | |
| "loss": 0.0026, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5625732517254851, | |
| "grad_norm": 0.004652015864849091, | |
| "learning_rate": 9.995230074555788e-06, | |
| "loss": 0.0216, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5729912749055867, | |
| "grad_norm": 2.4979023933410645, | |
| "learning_rate": 9.993507974307346e-06, | |
| "loss": 0.0169, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5834092980856882, | |
| "grad_norm": 2.996638059616089, | |
| "learning_rate": 9.991521181352419e-06, | |
| "loss": 0.0111, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5938273212657899, | |
| "grad_norm": 1.996310830116272, | |
| "learning_rate": 9.989269801005675e-06, | |
| "loss": 0.0115, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6042453444458914, | |
| "grad_norm": 0.1705595701932907, | |
| "learning_rate": 9.986753952606851e-06, | |
| "loss": 0.0072, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6146633676259929, | |
| "grad_norm": 1.5530083179473877, | |
| "learning_rate": 9.983973769514448e-06, | |
| "loss": 0.0056, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6250813908060946, | |
| "grad_norm": 0.45140549540519714, | |
| "learning_rate": 9.98092939909865e-06, | |
| "loss": 0.0153, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6354994139861961, | |
| "grad_norm": 0.16771870851516724, | |
| "learning_rate": 9.977621002733512e-06, | |
| "loss": 0.0186, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6459174371662977, | |
| "grad_norm": 1.3226218223571777, | |
| "learning_rate": 9.974048755788416e-06, | |
| "loss": 0.0042, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6563354603463992, | |
| "grad_norm": 0.0040002502501010895, | |
| "learning_rate": 9.970212847618761e-06, | |
| "loss": 0.0136, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6667534835265009, | |
| "grad_norm": 0.26981475949287415, | |
| "learning_rate": 9.96611348155594e-06, | |
| "loss": 0.0068, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6771715067066024, | |
| "grad_norm": 0.12803907692432404, | |
| "learning_rate": 9.961750874896548e-06, | |
| "loss": 0.0075, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.687589529886704, | |
| "grad_norm": 0.15524764358997345, | |
| "learning_rate": 9.957125258890877e-06, | |
| "loss": 0.0154, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6980075530668056, | |
| "grad_norm": 1.0714681148529053, | |
| "learning_rate": 9.952236878730648e-06, | |
| "loss": 0.008, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7084255762469072, | |
| "grad_norm": 5.858039379119873, | |
| "learning_rate": 9.947085993536019e-06, | |
| "loss": 0.0293, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7188435994270087, | |
| "grad_norm": 0.6457405090332031, | |
| "learning_rate": 9.941672876341848e-06, | |
| "loss": 0.0225, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7292616226071102, | |
| "grad_norm": 0.08024393022060394, | |
| "learning_rate": 9.935997814083221e-06, | |
| "loss": 0.003, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7396796457872119, | |
| "grad_norm": 2.8071186542510986, | |
| "learning_rate": 9.930061107580245e-06, | |
| "loss": 0.0134, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7500976689673134, | |
| "grad_norm": 1.7718358039855957, | |
| "learning_rate": 9.923863071522092e-06, | |
| "loss": 0.0228, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.760515692147415, | |
| "grad_norm": 0.015169305726885796, | |
| "learning_rate": 9.917404034450333e-06, | |
| "loss": 0.0066, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7709337153275166, | |
| "grad_norm": 0.020366037264466286, | |
| "learning_rate": 9.91068433874151e-06, | |
| "loss": 0.0034, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7813517385076182, | |
| "grad_norm": 0.4552544951438904, | |
| "learning_rate": 9.903704340589e-06, | |
| "loss": 0.0172, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7917697616877197, | |
| "grad_norm": 0.586904764175415, | |
| "learning_rate": 9.896464409984115e-06, | |
| "loss": 0.0158, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8021877848678214, | |
| "grad_norm": 0.003931673243641853, | |
| "learning_rate": 9.888964930696514e-06, | |
| "loss": 0.0118, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8126058080479229, | |
| "grad_norm": 0.14547383785247803, | |
| "learning_rate": 9.881206300253838e-06, | |
| "loss": 0.0091, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8230238312280245, | |
| "grad_norm": 3.7878336906433105, | |
| "learning_rate": 9.87318892992066e-06, | |
| "loss": 0.0195, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.833441854408126, | |
| "grad_norm": 0.4627358913421631, | |
| "learning_rate": 9.864913244676661e-06, | |
| "loss": 0.0072, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8438598775882277, | |
| "grad_norm": 4.011754035949707, | |
| "learning_rate": 9.856379683194122e-06, | |
| "loss": 0.0102, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8542779007683292, | |
| "grad_norm": 0.15841282904148102, | |
| "learning_rate": 9.847588697814662e-06, | |
| "loss": 0.0037, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8646959239484308, | |
| "grad_norm": 0.011311404407024384, | |
| "learning_rate": 9.838540754525266e-06, | |
| "loss": 0.0057, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8751139471285324, | |
| "grad_norm": 2.975158929824829, | |
| "learning_rate": 9.829236332933573e-06, | |
| "loss": 0.0094, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.885531970308634, | |
| "grad_norm": 0.0023622314911335707, | |
| "learning_rate": 9.819675926242473e-06, | |
| "loss": 0.0051, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8959499934887355, | |
| "grad_norm": 1.6916306018829346, | |
| "learning_rate": 9.809860041223945e-06, | |
| "loss": 0.0179, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.906368016668837, | |
| "grad_norm": 0.002486775629222393, | |
| "learning_rate": 9.799789198192197e-06, | |
| "loss": 0.0257, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9167860398489387, | |
| "grad_norm": 0.03367191180586815, | |
| "learning_rate": 9.789463930976098e-06, | |
| "loss": 0.0044, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9272040630290402, | |
| "grad_norm": 3.230694055557251, | |
| "learning_rate": 9.77888478689087e-06, | |
| "loss": 0.0096, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9376220862091418, | |
| "grad_norm": 0.7497555017471313, | |
| "learning_rate": 9.76805232670908e-06, | |
| "loss": 0.0132, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9480401093892434, | |
| "grad_norm": 0.5059885382652283, | |
| "learning_rate": 9.756967124630911e-06, | |
| "loss": 0.0021, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.958458132569345, | |
| "grad_norm": 0.031383853405714035, | |
| "learning_rate": 9.745629768253735e-06, | |
| "loss": 0.0027, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9688761557494465, | |
| "grad_norm": 0.2179183065891266, | |
| "learning_rate": 9.734040858540947e-06, | |
| "loss": 0.0081, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9792941789295481, | |
| "grad_norm": 0.02150609716773033, | |
| "learning_rate": 9.722201009790135e-06, | |
| "loss": 0.0013, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9897122021096497, | |
| "grad_norm": 0.7268407344818115, | |
| "learning_rate": 9.710110849600498e-06, | |
| "loss": 0.0154, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 4.076342582702637, | |
| "learning_rate": 9.697771018839586e-06, | |
| "loss": 0.0182, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0104180231801017, | |
| "grad_norm": 0.0026843964587897062, | |
| "learning_rate": 9.685182171609328e-06, | |
| "loss": 0.0078, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.020836046360203, | |
| "grad_norm": 1.928469181060791, | |
| "learning_rate": 9.672344975211361e-06, | |
| "loss": 0.0026, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0312540695403047, | |
| "grad_norm": 0.19345267117023468, | |
| "learning_rate": 9.659260110111659e-06, | |
| "loss": 0.0013, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.0416720927204064, | |
| "grad_norm": 0.10904184728860855, | |
| "learning_rate": 9.645928269904459e-06, | |
| "loss": 0.0006, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0520901159005078, | |
| "grad_norm": 0.0044245533645153046, | |
| "learning_rate": 9.632350161275496e-06, | |
| "loss": 0.0078, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.0625081390806095, | |
| "grad_norm": 0.052690137177705765, | |
| "learning_rate": 9.618526503964552e-06, | |
| "loss": 0.0033, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.072926162260711, | |
| "grad_norm": 8.081074338406324e-05, | |
| "learning_rate": 9.604458030727292e-06, | |
| "loss": 0.0041, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.0833441854408126, | |
| "grad_norm": 0.9879348278045654, | |
| "learning_rate": 9.590145487296431e-06, | |
| "loss": 0.0015, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0937622086209142, | |
| "grad_norm": 0.1349502056837082, | |
| "learning_rate": 9.575589632342201e-06, | |
| "loss": 0.0003, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1041802318010157, | |
| "grad_norm": 0.0031191352754831314, | |
| "learning_rate": 9.560791237432141e-06, | |
| "loss": 0.0157, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.1145982549811173, | |
| "grad_norm": 0.003794798394665122, | |
| "learning_rate": 9.545751086990187e-06, | |
| "loss": 0.0074, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.125016278161219, | |
| "grad_norm": 0.04094767943024635, | |
| "learning_rate": 9.530469978255105e-06, | |
| "loss": 0.0001, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1354343013413204, | |
| "grad_norm": 1.0790066880872473e-05, | |
| "learning_rate": 9.514948721238227e-06, | |
| "loss": 0.0074, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.145852324521422, | |
| "grad_norm": 3.5826034545898438, | |
| "learning_rate": 9.499188138680504e-06, | |
| "loss": 0.0108, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1562703477015237, | |
| "grad_norm": 0.39376917481422424, | |
| "learning_rate": 9.48318906600891e-06, | |
| "loss": 0.002, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.1666883708816251, | |
| "grad_norm": 0.008306131698191166, | |
| "learning_rate": 9.466952351292158e-06, | |
| "loss": 0.0033, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.1771063940617268, | |
| "grad_norm": 0.08543845266103745, | |
| "learning_rate": 9.450478855195724e-06, | |
| "loss": 0.0002, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1875244172418284, | |
| "grad_norm": 1.0472006797790527, | |
| "learning_rate": 9.433769450936254e-06, | |
| "loss": 0.0033, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.1979424404219299, | |
| "grad_norm": 0.0014151977375149727, | |
| "learning_rate": 9.416825024235262e-06, | |
| "loss": 0.0005, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2083604636020315, | |
| "grad_norm": 3.7921109199523926, | |
| "learning_rate": 9.399646473272181e-06, | |
| "loss": 0.0062, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.2187784867821332, | |
| "grad_norm": 0.0031763967126607895, | |
| "learning_rate": 9.382234708636753e-06, | |
| "loss": 0.0018, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2291965099622346, | |
| "grad_norm": 0.00694266939535737, | |
| "learning_rate": 9.364590653280767e-06, | |
| "loss": 0.0023, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2396145331423363, | |
| "grad_norm": 0.0004896274767816067, | |
| "learning_rate": 9.346715242469128e-06, | |
| "loss": 0.0065, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.250032556322438, | |
| "grad_norm": 0.008612466044723988, | |
| "learning_rate": 9.328609423730285e-06, | |
| "loss": 0.0004, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.2604505795025394, | |
| "grad_norm": 0.00032048547291196883, | |
| "learning_rate": 9.310274156806006e-06, | |
| "loss": 0.0122, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.270868602682641, | |
| "grad_norm": 0.024016475304961205, | |
| "learning_rate": 9.291710413600498e-06, | |
| "loss": 0.0105, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.2812866258627427, | |
| "grad_norm": 0.059294044971466064, | |
| "learning_rate": 9.272919178128902e-06, | |
| "loss": 0.0005, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.291704649042844, | |
| "grad_norm": 0.0428699254989624, | |
| "learning_rate": 9.253901446465116e-06, | |
| "loss": 0.0032, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.3021226722229458, | |
| "grad_norm": 0.1531277745962143, | |
| "learning_rate": 9.234658226689015e-06, | |
| "loss": 0.0003, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3125406954030474, | |
| "grad_norm": 2.18913197517395, | |
| "learning_rate": 9.215190538832995e-06, | |
| "loss": 0.0046, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.3229587185831488, | |
| "grad_norm": 0.017851542681455612, | |
| "learning_rate": 9.195499414827917e-06, | |
| "loss": 0.0001, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3333767417632505, | |
| "grad_norm": 9.223073959350586, | |
| "learning_rate": 9.175585898448408e-06, | |
| "loss": 0.0267, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.343794764943352, | |
| "grad_norm": 0.0561683289706707, | |
| "learning_rate": 9.155451045257523e-06, | |
| "loss": 0.0157, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.3542127881234536, | |
| "grad_norm": 0.02189466543495655, | |
| "learning_rate": 9.135095922550801e-06, | |
| "loss": 0.002, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.364630811303555, | |
| "grad_norm": 3.704211950302124, | |
| "learning_rate": 9.114521609299691e-06, | |
| "loss": 0.008, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.3750488344836567, | |
| "grad_norm": 0.09731532633304596, | |
| "learning_rate": 9.09372919609435e-06, | |
| "loss": 0.0023, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.3854668576637583, | |
| "grad_norm": 0.18077053129673004, | |
| "learning_rate": 9.072719785085842e-06, | |
| "loss": 0.013, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3958848808438598, | |
| "grad_norm": 0.026519620791077614, | |
| "learning_rate": 9.051494489927714e-06, | |
| "loss": 0.0098, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.4063029040239614, | |
| "grad_norm": 3.807239294052124, | |
| "learning_rate": 9.030054435716961e-06, | |
| "loss": 0.0152, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.416720927204063, | |
| "grad_norm": 1.5597429275512695, | |
| "learning_rate": 9.008400758934392e-06, | |
| "loss": 0.0014, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.4271389503841645, | |
| "grad_norm": 0.00023376860190182924, | |
| "learning_rate": 8.986534607384383e-06, | |
| "loss": 0.0089, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.4375569735642661, | |
| "grad_norm": 0.13520453870296478, | |
| "learning_rate": 8.96445714013404e-06, | |
| "loss": 0.0008, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.4479749967443678, | |
| "grad_norm": 3.3357255458831787, | |
| "learning_rate": 8.942169527451756e-06, | |
| "loss": 0.0177, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.4583930199244692, | |
| "grad_norm": 0.13861112296581268, | |
| "learning_rate": 8.919672950745185e-06, | |
| "loss": 0.0177, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.468811043104571, | |
| "grad_norm": 0.0005054327775724232, | |
| "learning_rate": 8.896968602498605e-06, | |
| "loss": 0.0087, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.4792290662846725, | |
| "grad_norm": 0.188633993268013, | |
| "learning_rate": 8.874057686209727e-06, | |
| "loss": 0.0023, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.489647089464774, | |
| "grad_norm": 4.461554527282715, | |
| "learning_rate": 8.850941416325876e-06, | |
| "loss": 0.01, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.5000651126448756, | |
| "grad_norm": 0.0012838880065828562, | |
| "learning_rate": 8.827621018179644e-06, | |
| "loss": 0.0129, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5104831358249773, | |
| "grad_norm": 0.40561285614967346, | |
| "learning_rate": 8.804097727923916e-06, | |
| "loss": 0.0049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5209011590050787, | |
| "grad_norm": 0.038120679557323456, | |
| "learning_rate": 8.780372792466356e-06, | |
| "loss": 0.0104, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.5313191821851804, | |
| "grad_norm": 2.760854959487915, | |
| "learning_rate": 8.75644746940331e-06, | |
| "loss": 0.0032, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.541737205365282, | |
| "grad_norm": 0.5340994596481323, | |
| "learning_rate": 8.732323026953141e-06, | |
| "loss": 0.008, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.5521552285453835, | |
| "grad_norm": 2.0399329662323, | |
| "learning_rate": 8.708000743889009e-06, | |
| "loss": 0.0052, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.5625732517254851, | |
| "grad_norm": 2.4984633922576904, | |
| "learning_rate": 8.68348190947108e-06, | |
| "loss": 0.0151, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5729912749055868, | |
| "grad_norm": 0.07911086082458496, | |
| "learning_rate": 8.658767823378198e-06, | |
| "loss": 0.0012, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.5834092980856882, | |
| "grad_norm": 0.12755373120307922, | |
| "learning_rate": 8.63385979563897e-06, | |
| "loss": 0.0039, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.5938273212657899, | |
| "grad_norm": 0.03218844160437584, | |
| "learning_rate": 8.608759146562352e-06, | |
| "loss": 0.0013, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.6042453444458915, | |
| "grad_norm": 0.00013593579933512956, | |
| "learning_rate": 8.583467206667643e-06, | |
| "loss": 0.0007, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.614663367625993, | |
| "grad_norm": 0.00014157673285808414, | |
| "learning_rate": 8.557985316613967e-06, | |
| "loss": 0.0005, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.6250813908060946, | |
| "grad_norm": 0.001298804534599185, | |
| "learning_rate": 8.532314827129207e-06, | |
| "loss": 0.0003, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.6354994139861962, | |
| "grad_norm": 0.00196313438937068, | |
| "learning_rate": 8.5064570989384e-06, | |
| "loss": 0.0042, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.6459174371662977, | |
| "grad_norm": 0.05907834693789482, | |
| "learning_rate": 8.480413502691618e-06, | |
| "loss": 0.0042, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.6563354603463991, | |
| "grad_norm": 2.9666049480438232, | |
| "learning_rate": 8.454185418891305e-06, | |
| "loss": 0.0009, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.666753483526501, | |
| "grad_norm": 0.04065735638141632, | |
| "learning_rate": 8.427774237819113e-06, | |
| "loss": 0.0071, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.6771715067066024, | |
| "grad_norm": 2.4857747554779053, | |
| "learning_rate": 8.401181359462187e-06, | |
| "loss": 0.0078, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.6875895298867039, | |
| "grad_norm": 0.0004068031266797334, | |
| "learning_rate": 8.374408193438977e-06, | |
| "loss": 0.0003, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.6980075530668057, | |
| "grad_norm": 0.000535597384441644, | |
| "learning_rate": 8.347456158924496e-06, | |
| "loss": 0.0054, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.7084255762469072, | |
| "grad_norm": 0.06767778843641281, | |
| "learning_rate": 8.320326684575116e-06, | |
| "loss": 0.0016, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.7188435994270086, | |
| "grad_norm": 0.00011920960969291627, | |
| "learning_rate": 8.29302120845282e-06, | |
| "loss": 0.0008, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.7292616226071102, | |
| "grad_norm": 0.1632106900215149, | |
| "learning_rate": 8.265541177948986e-06, | |
| "loss": 0.0011, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.739679645787212, | |
| "grad_norm": 0.03428950533270836, | |
| "learning_rate": 8.237888049707656e-06, | |
| "loss": 0.0087, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.7500976689673133, | |
| "grad_norm": 0.0007083571399562061, | |
| "learning_rate": 8.210063289548328e-06, | |
| "loss": 0.0025, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.760515692147415, | |
| "grad_norm": 1.6527312254766002e-05, | |
| "learning_rate": 8.182068372388259e-06, | |
| "loss": 0.0044, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.7709337153275166, | |
| "grad_norm": 0.0002037805679719895, | |
| "learning_rate": 8.153904782164275e-06, | |
| "loss": 0.0011, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.781351738507618, | |
| "grad_norm": 0.0011550731724128127, | |
| "learning_rate": 8.125574011754125e-06, | |
| "loss": 0.0028, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.7917697616877197, | |
| "grad_norm": 0.058955416083335876, | |
| "learning_rate": 8.097077562897332e-06, | |
| "loss": 0.0003, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.8021877848678214, | |
| "grad_norm": 0.6716377139091492, | |
| "learning_rate": 8.0684169461156e-06, | |
| "loss": 0.0082, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.8126058080479228, | |
| "grad_norm": 0.0014718943275511265, | |
| "learning_rate": 8.03959368063274e-06, | |
| "loss": 0.0069, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.8230238312280245, | |
| "grad_norm": 0.00023426729603670537, | |
| "learning_rate": 8.01060929429415e-06, | |
| "loss": 0.0107, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.8334418544081261, | |
| "grad_norm": 0.002175110625103116, | |
| "learning_rate": 7.981465323485808e-06, | |
| "loss": 0.0082, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.8438598775882276, | |
| "grad_norm": 0.719397783279419, | |
| "learning_rate": 7.952163313052856e-06, | |
| "loss": 0.0009, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.8542779007683292, | |
| "grad_norm": 0.976002037525177, | |
| "learning_rate": 7.922704816217693e-06, | |
| "loss": 0.0082, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.8646959239484309, | |
| "grad_norm": 3.530712604522705, | |
| "learning_rate": 7.893091394497651e-06, | |
| "loss": 0.0239, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.8751139471285323, | |
| "grad_norm": 1.4467989206314087, | |
| "learning_rate": 7.863324617622227e-06, | |
| "loss": 0.0019, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.885531970308634, | |
| "grad_norm": 0.0016262925928458571, | |
| "learning_rate": 7.833406063449866e-06, | |
| "loss": 0.0004, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.8959499934887356, | |
| "grad_norm": 0.007764583453536034, | |
| "learning_rate": 7.803337317884328e-06, | |
| "loss": 0.0025, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.906368016668837, | |
| "grad_norm": 4.686193278757855e-06, | |
| "learning_rate": 7.773119974790626e-06, | |
| "loss": 0.0008, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.9167860398489387, | |
| "grad_norm": 0.02094154804944992, | |
| "learning_rate": 7.742755635910532e-06, | |
| "loss": 0.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.9272040630290403, | |
| "grad_norm": 0.026232223957777023, | |
| "learning_rate": 7.712245910777684e-06, | |
| "loss": 0.0004, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.9376220862091418, | |
| "grad_norm": 0.07305476814508438, | |
| "learning_rate": 7.68159241663226e-06, | |
| "loss": 0.0108, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.9480401093892434, | |
| "grad_norm": 0.11340963840484619, | |
| "learning_rate": 7.650796778335248e-06, | |
| "loss": 0.0006, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.958458132569345, | |
| "grad_norm": 2.027043592534028e-05, | |
| "learning_rate": 7.619860628282333e-06, | |
| "loss": 0.0178, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.9688761557494465, | |
| "grad_norm": 0.14668358862400055, | |
| "learning_rate": 7.5887856063173525e-06, | |
| "loss": 0.0031, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.979294178929548, | |
| "grad_norm": 0.056070610880851746, | |
| "learning_rate": 7.5575733596453805e-06, | |
| "loss": 0.0006, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.9897122021096498, | |
| "grad_norm": 0.01226119790226221, | |
| "learning_rate": 7.526225542745408e-06, | |
| "loss": 0.0086, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.026876511052250862, | |
| "learning_rate": 7.494743817282651e-06, | |
| "loss": 0.0021, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.0104180231801014, | |
| "grad_norm": 1.010679006576538, | |
| "learning_rate": 7.463129852020465e-06, | |
| "loss": 0.001, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.0208360463602033, | |
| "grad_norm": 0.05851694196462631, | |
| "learning_rate": 7.431385322731885e-06, | |
| "loss": 0.0002, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.0312540695403047, | |
| "grad_norm": 0.0013823857298120856, | |
| "learning_rate": 7.399511912110807e-06, | |
| "loss": 0.0003, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.041672092720406, | |
| "grad_norm": 0.0012095741694793105, | |
| "learning_rate": 7.367511309682782e-06, | |
| "loss": 0.0009, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.052090115900508, | |
| "grad_norm": 5.620197296142578, | |
| "learning_rate": 7.335385211715467e-06, | |
| "loss": 0.0064, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.0625081390806095, | |
| "grad_norm": 0.0062335277907550335, | |
| "learning_rate": 7.303135321128705e-06, | |
| "loss": 0.0002, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.072926162260711, | |
| "grad_norm": 0.001201906125061214, | |
| "learning_rate": 7.270763347404262e-06, | |
| "loss": 0.0017, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.083344185440813, | |
| "grad_norm": 0.016548430547118187, | |
| "learning_rate": 7.238271006495206e-06, | |
| "loss": 0.0015, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0937622086209142, | |
| "grad_norm": 0.0003294723283033818, | |
| "learning_rate": 7.205660020734955e-06, | |
| "loss": 0.0002, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.1041802318010157, | |
| "grad_norm": 0.02431473508477211, | |
| "learning_rate": 7.172932118745978e-06, | |
| "loss": 0.0001, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.1145982549811175, | |
| "grad_norm": 0.006785488221794367, | |
| "learning_rate": 7.140089035348166e-06, | |
| "loss": 0.0018, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.125016278161219, | |
| "grad_norm": 0.06791669875383377, | |
| "learning_rate": 7.107132511466872e-06, | |
| "loss": 0.0005, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.1354343013413204, | |
| "grad_norm": 0.0033805356360971928, | |
| "learning_rate": 7.074064294040629e-06, | |
| "loss": 0.0026, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.145852324521422, | |
| "grad_norm": 0.0271758995950222, | |
| "learning_rate": 7.040886135928554e-06, | |
| "loss": 0.0004, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.1562703477015237, | |
| "grad_norm": 0.07952375710010529, | |
| "learning_rate": 7.007599795817426e-06, | |
| "loss": 0.0, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.166688370881625, | |
| "grad_norm": 0.12942220270633698, | |
| "learning_rate": 6.9742070381284724e-06, | |
| "loss": 0.0006, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.1771063940617266, | |
| "grad_norm": 5.3233266953611746e-05, | |
| "learning_rate": 6.94070963292383e-06, | |
| "loss": 0.0001, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.1875244172418284, | |
| "grad_norm": 1.8653046254257788e-06, | |
| "learning_rate": 6.907109355812729e-06, | |
| "loss": 0.0014, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.19794244042193, | |
| "grad_norm": 0.2578833997249603, | |
| "learning_rate": 6.873407987857367e-06, | |
| "loss": 0.0005, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.2083604636020313, | |
| "grad_norm": 4.3816239667648915e-06, | |
| "learning_rate": 6.8396073154785e-06, | |
| "loss": 0.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.218778486782133, | |
| "grad_norm": 0.4164877235889435, | |
| "learning_rate": 6.805709130360754e-06, | |
| "loss": 0.0017, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.2291965099622346, | |
| "grad_norm": 7.609223303006729e-06, | |
| "learning_rate": 6.771715229357643e-06, | |
| "loss": 0.0002, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.239614533142336, | |
| "grad_norm": 0.17523327469825745, | |
| "learning_rate": 6.73762741439633e-06, | |
| "loss": 0.0018, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.250032556322438, | |
| "grad_norm": 9.692726135253906, | |
| "learning_rate": 6.7034474923821135e-06, | |
| "loss": 0.0034, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.2604505795025394, | |
| "grad_norm": 3.903836841345765e-05, | |
| "learning_rate": 6.669177275102637e-06, | |
| "loss": 0.0004, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.270868602682641, | |
| "grad_norm": 0.26724299788475037, | |
| "learning_rate": 6.634818579131865e-06, | |
| "loss": 0.0007, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.2812866258627427, | |
| "grad_norm": 0.002805814379826188, | |
| "learning_rate": 6.6003732257337805e-06, | |
| "loss": 0.0009, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.291704649042844, | |
| "grad_norm": 0.00600125128403306, | |
| "learning_rate": 6.565843040765849e-06, | |
| "loss": 0.0003, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.3021226722229455, | |
| "grad_norm": 0.07256772369146347, | |
| "learning_rate": 6.531229854582235e-06, | |
| "loss": 0.0002, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.3125406954030474, | |
| "grad_norm": 0.05456492677330971, | |
| "learning_rate": 6.496535501936781e-06, | |
| "loss": 0.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.322958718583149, | |
| "grad_norm": 0.002974656643345952, | |
| "learning_rate": 6.461761821885742e-06, | |
| "loss": 0.0008, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.3333767417632503, | |
| "grad_norm": 0.32170239090919495, | |
| "learning_rate": 6.42691065769032e-06, | |
| "loss": 0.0002, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.343794764943352, | |
| "grad_norm": 0.0577409565448761, | |
| "learning_rate": 6.391983856718941e-06, | |
| "loss": 0.0138, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.3542127881234536, | |
| "grad_norm": 0.5012153387069702, | |
| "learning_rate": 6.356983270349339e-06, | |
| "loss": 0.0047, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.364630811303555, | |
| "grad_norm": 0.0003676996275316924, | |
| "learning_rate": 6.321910753870416e-06, | |
| "loss": 0.0003, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.375048834483657, | |
| "grad_norm": 0.14126235246658325, | |
| "learning_rate": 6.286768166383905e-06, | |
| "loss": 0.0001, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.3854668576637583, | |
| "grad_norm": 0.0022616020869463682, | |
| "learning_rate": 6.251557370705809e-06, | |
| "loss": 0.0002, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.3958848808438598, | |
| "grad_norm": 6.679229736328125, | |
| "learning_rate": 6.216280233267681e-06, | |
| "loss": 0.0135, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.4063029040239616, | |
| "grad_norm": 0.006057079881429672, | |
| "learning_rate": 6.180938624017663e-06, | |
| "loss": 0.0067, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.416720927204063, | |
| "grad_norm": 0.058568984270095825, | |
| "learning_rate": 6.145534416321384e-06, | |
| "loss": 0.0002, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.4271389503841645, | |
| "grad_norm": 0.001173496013507247, | |
| "learning_rate": 6.110069486862652e-06, | |
| "loss": 0.0083, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.4375569735642664, | |
| "grad_norm": 7.219270706176758, | |
| "learning_rate": 6.074545715543975e-06, | |
| "loss": 0.0173, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.447974996744368, | |
| "grad_norm": 0.1928091198205948, | |
| "learning_rate": 6.0389649853869116e-06, | |
| "loss": 0.0001, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.4583930199244692, | |
| "grad_norm": 0.2435365468263626, | |
| "learning_rate": 6.00332918243226e-06, | |
| "loss": 0.0003, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.468811043104571, | |
| "grad_norm": 1.0856934785842896, | |
| "learning_rate": 5.967640195640083e-06, | |
| "loss": 0.0016, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.4792290662846725, | |
| "grad_norm": 0.0007231564377434552, | |
| "learning_rate": 5.931899916789576e-06, | |
| "loss": 0.001, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.489647089464774, | |
| "grad_norm": 0.0067249564453959465, | |
| "learning_rate": 5.896110240378794e-06, | |
| "loss": 0.001, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.500065112644876, | |
| "grad_norm": 0.00024194463912863284, | |
| "learning_rate": 5.860273063524227e-06, | |
| "loss": 0.0019, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.5104831358249773, | |
| "grad_norm": 0.011660280637443066, | |
| "learning_rate": 5.824390285860233e-06, | |
| "loss": 0.0, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.5209011590050787, | |
| "grad_norm": 7.278788689291105e-05, | |
| "learning_rate": 5.788463809438356e-06, | |
| "loss": 0.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.5313191821851806, | |
| "grad_norm": 0.0003818434779532254, | |
| "learning_rate": 5.752495538626493e-06, | |
| "loss": 0.0042, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.541737205365282, | |
| "grad_norm": 4.750408172607422, | |
| "learning_rate": 5.7164873800079475e-06, | |
| "loss": 0.009, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.5521552285453835, | |
| "grad_norm": 9.329643830824352e-07, | |
| "learning_rate": 5.680441242280378e-06, | |
| "loss": 0.0067, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.5625732517254853, | |
| "grad_norm": 3.5650442441692576e-05, | |
| "learning_rate": 5.6443590361546095e-06, | |
| "loss": 0.0017, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.5729912749055868, | |
| "grad_norm": 0.030909501016139984, | |
| "learning_rate": 5.608242674253362e-06, | |
| "loss": 0.0, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.583409298085688, | |
| "grad_norm": 0.011211927980184555, | |
| "learning_rate": 5.57209407100986e-06, | |
| "loss": 0.0101, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.59382732126579, | |
| "grad_norm": 0.0002943580038845539, | |
| "learning_rate": 5.535915142566361e-06, | |
| "loss": 0.0003, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.6042453444458915, | |
| "grad_norm": 6.406533066183329e-05, | |
| "learning_rate": 5.499707806672575e-06, | |
| "loss": 0.0077, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.614663367625993, | |
| "grad_norm": 0.0030457451939582825, | |
| "learning_rate": 5.463473982584023e-06, | |
| "loss": 0.0019, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.625081390806095, | |
| "grad_norm": 0.00026603075093589723, | |
| "learning_rate": 5.4272155909602875e-06, | |
| "loss": 0.0161, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.6354994139861962, | |
| "grad_norm": 0.03001578524708748, | |
| "learning_rate": 5.3909345537632205e-06, | |
| "loss": 0.0153, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.6459174371662977, | |
| "grad_norm": 0.5489205121994019, | |
| "learning_rate": 5.354632794155049e-06, | |
| "loss": 0.0008, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.656335460346399, | |
| "grad_norm": 0.00020121924171689898, | |
| "learning_rate": 5.318312236396445e-06, | |
| "loss": 0.0025, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.666753483526501, | |
| "grad_norm": 0.16872790455818176, | |
| "learning_rate": 5.281974805744516e-06, | |
| "loss": 0.0009, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.6771715067066024, | |
| "grad_norm": 1.6018518635974033e-06, | |
| "learning_rate": 5.245622428350764e-06, | |
| "loss": 0.0019, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.687589529886704, | |
| "grad_norm": 0.15927599370479584, | |
| "learning_rate": 5.209257031158972e-06, | |
| "loss": 0.0042, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.6980075530668057, | |
| "grad_norm": 0.002104927320033312, | |
| "learning_rate": 5.1728805418030725e-06, | |
| "loss": 0.002, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.708425576246907, | |
| "grad_norm": 0.00043586574611254036, | |
| "learning_rate": 5.13649488850496e-06, | |
| "loss": 0.011, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.7188435994270086, | |
| "grad_norm": 0.5086974501609802, | |
| "learning_rate": 5.100101999972291e-06, | |
| "loss": 0.0004, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.72926162260711, | |
| "grad_norm": 0.04724502190947533, | |
| "learning_rate": 5.063703805296239e-06, | |
| "loss": 0.0001, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.739679645787212, | |
| "grad_norm": 2.4284263417939655e-06, | |
| "learning_rate": 5.027302233849243e-06, | |
| "loss": 0.0028, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.7500976689673133, | |
| "grad_norm": 0.24547390639781952, | |
| "learning_rate": 4.990899215182735e-06, | |
| "loss": 0.0032, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.7605156921474148, | |
| "grad_norm": 0.001153081888332963, | |
| "learning_rate": 4.954496678924861e-06, | |
| "loss": 0.0004, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.7709337153275166, | |
| "grad_norm": 0.005142812617123127, | |
| "learning_rate": 4.9180965546781985e-06, | |
| "loss": 0.0001, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.781351738507618, | |
| "grad_norm": 0.0020258312579244375, | |
| "learning_rate": 4.8817007719174635e-06, | |
| "loss": 0.0014, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.7917697616877195, | |
| "grad_norm": 0.09598887711763382, | |
| "learning_rate": 4.845311259887251e-06, | |
| "loss": 0.0017, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.8021877848678214, | |
| "grad_norm": 5.523069921764545e-05, | |
| "learning_rate": 4.808929947499751e-06, | |
| "loss": 0.0001, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.812605808047923, | |
| "grad_norm": 4.3237105273874477e-05, | |
| "learning_rate": 4.772558763232521e-06, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.8230238312280242, | |
| "grad_norm": 0.017797382548451424, | |
| "learning_rate": 4.736199635026247e-06, | |
| "loss": 0.0004, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.833441854408126, | |
| "grad_norm": 0.0372743122279644, | |
| "learning_rate": 4.699854490182558e-06, | |
| "loss": 0.0001, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.8438598775882276, | |
| "grad_norm": 0.017943061888217926, | |
| "learning_rate": 4.663525255261861e-06, | |
| "loss": 0.0026, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.854277900768329, | |
| "grad_norm": 0.0005965960444882512, | |
| "learning_rate": 4.627213855981221e-06, | |
| "loss": 0.0002, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.864695923948431, | |
| "grad_norm": 8.808985148789361e-05, | |
| "learning_rate": 4.590922217112279e-06, | |
| "loss": 0.0005, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.8751139471285323, | |
| "grad_norm": 0.010554715059697628, | |
| "learning_rate": 4.554652262379236e-06, | |
| "loss": 0.0031, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.8855319703086337, | |
| "grad_norm": 0.030894100666046143, | |
| "learning_rate": 4.518405914356865e-06, | |
| "loss": 0.0002, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.8959499934887356, | |
| "grad_norm": 0.13782520592212677, | |
| "learning_rate": 4.4821850943686164e-06, | |
| "loss": 0.0003, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.906368016668837, | |
| "grad_norm": 0.0024414719082415104, | |
| "learning_rate": 4.445991722384763e-06, | |
| "loss": 0.0002, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.9167860398489385, | |
| "grad_norm": 0.0004072992014698684, | |
| "learning_rate": 4.4098277169206315e-06, | |
| "loss": 0.0001, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.9272040630290403, | |
| "grad_norm": 0.0007056115427985787, | |
| "learning_rate": 4.373694994934911e-06, | |
| "loss": 0.0194, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.937622086209142, | |
| "grad_norm": 0.005580561701208353, | |
| "learning_rate": 4.337595471728029e-06, | |
| "loss": 0.0004, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.948040109389243, | |
| "grad_norm": 1.4271708726882935, | |
| "learning_rate": 4.30153106084064e-06, | |
| "loss": 0.0005, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.958458132569345, | |
| "grad_norm": 0.6616150736808777, | |
| "learning_rate": 4.2655036739521795e-06, | |
| "loss": 0.0016, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.9688761557494465, | |
| "grad_norm": 0.0038947018329054117, | |
| "learning_rate": 4.229515220779545e-06, | |
| "loss": 0.0001, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.979294178929548, | |
| "grad_norm": 0.049666061997413635, | |
| "learning_rate": 4.193567608975856e-06, | |
| "loss": 0.0002, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.98971220210965, | |
| "grad_norm": 0.001238349243067205, | |
| "learning_rate": 4.1576627440293425e-06, | |
| "loss": 0.0006, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.0001485509128542617, | |
| "learning_rate": 4.121802529162331e-06, | |
| "loss": 0.0, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.0104180231801014, | |
| "grad_norm": 0.0052583469077944756, | |
| "learning_rate": 4.08598886523037e-06, | |
| "loss": 0.0008, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 3.0208360463602033, | |
| "grad_norm": 0.0016122941160574555, | |
| "learning_rate": 4.050223650621461e-06, | |
| "loss": 0.0003, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.0312540695403047, | |
| "grad_norm": 3.2575666409684345e-05, | |
| "learning_rate": 4.014508781155441e-06, | |
| "loss": 0.0003, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 3.041672092720406, | |
| "grad_norm": 0.056297626346349716, | |
| "learning_rate": 3.978846149983473e-06, | |
| "loss": 0.0002, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 3.052090115900508, | |
| "grad_norm": 2.323356420674827e-05, | |
| "learning_rate": 3.943237647487719e-06, | |
| "loss": 0.0014, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 3.0625081390806095, | |
| "grad_norm": 0.05314822122454643, | |
| "learning_rate": 3.907685161181109e-06, | |
| "loss": 0.0001, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 3.072926162260711, | |
| "grad_norm": 1.262199878692627, | |
| "learning_rate": 3.8721905756073135e-06, | |
| "loss": 0.0004, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.083344185440813, | |
| "grad_norm": 0.012801315635442734, | |
| "learning_rate": 3.836755772240829e-06, | |
| "loss": 0.0005, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 3.0937622086209142, | |
| "grad_norm": 5.073728516435949e-06, | |
| "learning_rate": 3.8013826293872623e-06, | |
| "loss": 0.0, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 3.1041802318010157, | |
| "grad_norm": 0.0036370500456541777, | |
| "learning_rate": 3.7660730220837512e-06, | |
| "loss": 0.0001, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 3.1145982549811175, | |
| "grad_norm": 0.006650363560765982, | |
| "learning_rate": 3.7308288219995884e-06, | |
| "loss": 0.0001, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 3.125016278161219, | |
| "grad_norm": 0.00013867947563994676, | |
| "learning_rate": 3.695651897336996e-06, | |
| "loss": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.1354343013413204, | |
| "grad_norm": 4.687119508162141e-05, | |
| "learning_rate": 3.6605441127321074e-06, | |
| "loss": 0.0001, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 3.145852324521422, | |
| "grad_norm": 0.0005026389262638986, | |
| "learning_rate": 3.6255073291561248e-06, | |
| "loss": 0.0, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 3.1562703477015237, | |
| "grad_norm": 0.01530701294541359, | |
| "learning_rate": 3.5905434038166653e-06, | |
| "loss": 0.0001, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 3.166688370881625, | |
| "grad_norm": 0.15575547516345978, | |
| "learning_rate": 3.555654190059333e-06, | |
| "loss": 0.004, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 3.1771063940617266, | |
| "grad_norm": 0.0063483756966888905, | |
| "learning_rate": 3.520841537269458e-06, | |
| "loss": 0.0025, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.1875244172418284, | |
| "grad_norm": 0.0002552935038693249, | |
| "learning_rate": 3.486107290774083e-06, | |
| "loss": 0.0004, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.19794244042193, | |
| "grad_norm": 1.8458106296748156e-06, | |
| "learning_rate": 3.4514532917441345e-06, | |
| "loss": 0.0004, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 3.2083604636020313, | |
| "grad_norm": 3.37355140800355e-06, | |
| "learning_rate": 3.4168813770968357e-06, | |
| "loss": 0.0001, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 3.218778486782133, | |
| "grad_norm": 0.00028720268164761364, | |
| "learning_rate": 3.3823933793983298e-06, | |
| "loss": 0.0003, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 3.2291965099622346, | |
| "grad_norm": 4.652118150261231e-06, | |
| "learning_rate": 3.347991126766545e-06, | |
| "loss": 0.0, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.239614533142336, | |
| "grad_norm": 0.0003041624731849879, | |
| "learning_rate": 3.3136764427742885e-06, | |
| "loss": 0.0001, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 3.250032556322438, | |
| "grad_norm": 0.00011942172568524256, | |
| "learning_rate": 3.279451146352588e-06, | |
| "loss": 0.0001, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 3.2604505795025394, | |
| "grad_norm": 0.044789642095565796, | |
| "learning_rate": 3.2453170516942657e-06, | |
| "loss": 0.0001, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 3.270868602682641, | |
| "grad_norm": 0.02395668812096119, | |
| "learning_rate": 3.211275968157784e-06, | |
| "loss": 0.0001, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 3.2812866258627427, | |
| "grad_norm": 0.02200975827872753, | |
| "learning_rate": 3.1773297001713266e-06, | |
| "loss": 0.0002, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.291704649042844, | |
| "grad_norm": 0.0002672448754310608, | |
| "learning_rate": 3.1434800471371586e-06, | |
| "loss": 0.0, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 3.3021226722229455, | |
| "grad_norm": 1.4595940228900872e-05, | |
| "learning_rate": 3.109728803336234e-06, | |
| "loss": 0.0, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 3.3125406954030474, | |
| "grad_norm": 0.0023994040675461292, | |
| "learning_rate": 3.076077757833103e-06, | |
| "loss": 0.0001, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 3.322958718583149, | |
| "grad_norm": 0.009379798546433449, | |
| "learning_rate": 3.042528694381057e-06, | |
| "loss": 0.0, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 3.3333767417632503, | |
| "grad_norm": 0.008108630776405334, | |
| "learning_rate": 3.0090833913275965e-06, | |
| "loss": 0.0001, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.343794764943352, | |
| "grad_norm": 0.03200116753578186, | |
| "learning_rate": 2.9757436215201497e-06, | |
| "loss": 0.0, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.3542127881234536, | |
| "grad_norm": 7.638196279913245e-07, | |
| "learning_rate": 2.9425111522121085e-06, | |
| "loss": 0.0009, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.364630811303555, | |
| "grad_norm": 3.822772123385221e-05, | |
| "learning_rate": 2.9093877449691523e-06, | |
| "loss": 0.0, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.375048834483657, | |
| "grad_norm": 1.9638941287994385, | |
| "learning_rate": 2.8763751555758544e-06, | |
| "loss": 0.0011, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.3854668576637583, | |
| "grad_norm": 8.83774831095252e-08, | |
| "learning_rate": 2.843475133942642e-06, | |
| "loss": 0.0007, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.3958848808438598, | |
| "grad_norm": 5.0836853915825486e-05, | |
| "learning_rate": 2.810689424013011e-06, | |
| "loss": 0.0, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.4063029040239616, | |
| "grad_norm": 3.2860607461770996e-05, | |
| "learning_rate": 2.7780197636710993e-06, | |
| "loss": 0.0, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.416720927204063, | |
| "grad_norm": 0.1506471037864685, | |
| "learning_rate": 2.7454678846495593e-06, | |
| "loss": 0.0002, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 3.4271389503841645, | |
| "grad_norm": 0.0003242450475227088, | |
| "learning_rate": 2.7130355124377684e-06, | |
| "loss": 0.0001, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 3.4375569735642664, | |
| "grad_norm": 4.1059661270992365e-06, | |
| "learning_rate": 2.6807243661903597e-06, | |
| "loss": 0.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.447974996744368, | |
| "grad_norm": 0.007434395141899586, | |
| "learning_rate": 2.6485361586361012e-06, | |
| "loss": 0.0001, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 3.4583930199244692, | |
| "grad_norm": 0.2851642966270447, | |
| "learning_rate": 2.6164725959871005e-06, | |
| "loss": 0.0008, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 3.468811043104571, | |
| "grad_norm": 0.001458955928683281, | |
| "learning_rate": 2.5845353778483707e-06, | |
| "loss": 0.0, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 3.4792290662846725, | |
| "grad_norm": 3.6944745716027683e-06, | |
| "learning_rate": 2.552726197127732e-06, | |
| "loss": 0.0001, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 3.489647089464774, | |
| "grad_norm": 0.00017191942606586963, | |
| "learning_rate": 2.5210467399460794e-06, | |
| "loss": 0.0, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 3.500065112644876, | |
| "grad_norm": 8.9817083789967e-05, | |
| "learning_rate": 2.489498685548005e-06, | |
| "loss": 0.0002, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 3.5104831358249773, | |
| "grad_norm": 0.0007733273669146001, | |
| "learning_rate": 2.4580837062127867e-06, | |
| "loss": 0.0, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 3.5209011590050787, | |
| "grad_norm": 0.05479538068175316, | |
| "learning_rate": 2.4268034671657412e-06, | |
| "loss": 0.0002, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 3.5313191821851806, | |
| "grad_norm": 0.02038147673010826, | |
| "learning_rate": 2.39565962648996e-06, | |
| "loss": 0.0, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 3.541737205365282, | |
| "grad_norm": 1.4380566426552832e-06, | |
| "learning_rate": 2.364653835038413e-06, | |
| "loss": 0.0001, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.5521552285453835, | |
| "grad_norm": 0.00023164517187979072, | |
| "learning_rate": 2.3337877363464475e-06, | |
| "loss": 0.0008, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 3.5625732517254853, | |
| "grad_norm": 0.00041943430551327765, | |
| "learning_rate": 2.3030629665446635e-06, | |
| "loss": 0.0001, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 3.5729912749055868, | |
| "grad_norm": 5.017073154449463, | |
| "learning_rate": 2.272481154272188e-06, | |
| "loss": 0.0062, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 3.583409298085688, | |
| "grad_norm": 0.13844065368175507, | |
| "learning_rate": 2.2420439205903523e-06, | |
| "loss": 0.0006, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 3.59382732126579, | |
| "grad_norm": 5.108922004699707, | |
| "learning_rate": 2.211752878896745e-06, | |
| "loss": 0.0131, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.6042453444458915, | |
| "grad_norm": 0.012104752473533154, | |
| "learning_rate": 2.1816096348397176e-06, | |
| "loss": 0.0, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 3.614663367625993, | |
| "grad_norm": 0.31581804156303406, | |
| "learning_rate": 2.1516157862332425e-06, | |
| "loss": 0.0002, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 3.625081390806095, | |
| "grad_norm": 0.0012989668175578117, | |
| "learning_rate": 2.1217729229722485e-06, | |
| "loss": 0.0001, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 3.6354994139861962, | |
| "grad_norm": 5.394697291194461e-05, | |
| "learning_rate": 2.092082626948313e-06, | |
| "loss": 0.0001, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 3.6459174371662977, | |
| "grad_norm": 0.0002785645192489028, | |
| "learning_rate": 2.062546471965841e-06, | |
| "loss": 0.0017, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.656335460346399, | |
| "grad_norm": 0.04909211024641991, | |
| "learning_rate": 2.033166023658613e-06, | |
| "loss": 0.0055, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 3.666753483526501, | |
| "grad_norm": 0.007108056452125311, | |
| "learning_rate": 2.0039428394068224e-06, | |
| "loss": 0.0, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 3.6771715067066024, | |
| "grad_norm": 0.0002842152607627213, | |
| "learning_rate": 1.9748784682544955e-06, | |
| "loss": 0.0, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 3.687589529886704, | |
| "grad_norm": 0.037801098078489304, | |
| "learning_rate": 1.9459744508274076e-06, | |
| "loss": 0.0014, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 3.6980075530668057, | |
| "grad_norm": 0.004841256886720657, | |
| "learning_rate": 1.917232319251392e-06, | |
| "loss": 0.0008, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.708425576246907, | |
| "grad_norm": 0.3873371481895447, | |
| "learning_rate": 1.8886535970711522e-06, | |
| "loss": 0.0062, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 3.7188435994270086, | |
| "grad_norm": 1.5684008758398704e-07, | |
| "learning_rate": 1.860239799169482e-06, | |
| "loss": 0.0001, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 3.72926162260711, | |
| "grad_norm": 0.0005140849971212447, | |
| "learning_rate": 1.8319924316869763e-06, | |
| "loss": 0.0003, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 3.739679645787212, | |
| "grad_norm": 8.463976882921997e-06, | |
| "learning_rate": 1.8039129919421905e-06, | |
| "loss": 0.0036, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 3.7500976689673133, | |
| "grad_norm": 0.0017335577867925167, | |
| "learning_rate": 1.7760029683522734e-06, | |
| "loss": 0.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.7605156921474148, | |
| "grad_norm": 6.008242417010479e-05, | |
| "learning_rate": 1.7482638403540703e-06, | |
| "loss": 0.0001, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 3.7709337153275166, | |
| "grad_norm": 0.0020610829815268517, | |
| "learning_rate": 1.720697078325701e-06, | |
| "loss": 0.0005, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 3.781351738507618, | |
| "grad_norm": 0.009846694767475128, | |
| "learning_rate": 1.693304143508618e-06, | |
| "loss": 0.0, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 3.7917697616877195, | |
| "grad_norm": 0.00021927843044977635, | |
| "learning_rate": 1.666086487930153e-06, | |
| "loss": 0.0002, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 3.8021877848678214, | |
| "grad_norm": 6.052812750567682e-05, | |
| "learning_rate": 1.6390455543265454e-06, | |
| "loss": 0.0001, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.812605808047923, | |
| "grad_norm": 0.0021989853121340275, | |
| "learning_rate": 1.6121827760664677e-06, | |
| "loss": 0.0001, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 3.8230238312280242, | |
| "grad_norm": 0.00010219099931418896, | |
| "learning_rate": 1.5854995770750526e-06, | |
| "loss": 0.0001, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 3.833441854408126, | |
| "grad_norm": 0.00012348932796157897, | |
| "learning_rate": 1.5589973717583968e-06, | |
| "loss": 0.0, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 3.8438598775882276, | |
| "grad_norm": 0.008015728555619717, | |
| "learning_rate": 1.5326775649286135e-06, | |
| "loss": 0.0001, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 3.854277900768329, | |
| "grad_norm": 3.6400449516804656e-06, | |
| "learning_rate": 1.506541551729338e-06, | |
| "loss": 0.0001, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.864695923948431, | |
| "grad_norm": 0.05682307854294777, | |
| "learning_rate": 1.4805907175618034e-06, | |
| "loss": 0.0001, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 3.8751139471285323, | |
| "grad_norm": 2.3464983314624988e-05, | |
| "learning_rate": 1.4548264380113763e-06, | |
| "loss": 0.0, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 3.8855319703086337, | |
| "grad_norm": 3.6688052205136046e-05, | |
| "learning_rate": 1.4292500787746677e-06, | |
| "loss": 0.0, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 3.8959499934887356, | |
| "grad_norm": 6.737604053341784e-06, | |
| "learning_rate": 1.4038629955871147e-06, | |
| "loss": 0.0, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 3.906368016668837, | |
| "grad_norm": 0.3771498501300812, | |
| "learning_rate": 1.3786665341511424e-06, | |
| "loss": 0.0002, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.9167860398489385, | |
| "grad_norm": 0.21408240497112274, | |
| "learning_rate": 1.3536620300648062e-06, | |
| "loss": 0.0001, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 3.9272040630290403, | |
| "grad_norm": 0.020450890064239502, | |
| "learning_rate": 1.3288508087510198e-06, | |
| "loss": 0.0, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 3.937622086209142, | |
| "grad_norm": 0.0006295586354099214, | |
| "learning_rate": 1.3042341853872753e-06, | |
| "loss": 0.0001, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 3.948040109389243, | |
| "grad_norm": 0.018750308081507683, | |
| "learning_rate": 1.2798134648359512e-06, | |
| "loss": 0.0, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 3.958458132569345, | |
| "grad_norm": 0.00036703175283037126, | |
| "learning_rate": 1.2555899415751211e-06, | |
| "loss": 0.0002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.9688761557494465, | |
| "grad_norm": 0.005201701074838638, | |
| "learning_rate": 1.2315648996299628e-06, | |
| "loss": 0.0, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 3.979294178929548, | |
| "grad_norm": 1.7224958355654962e-05, | |
| "learning_rate": 1.2077396125046703e-06, | |
| "loss": 0.0002, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 3.98971220210965, | |
| "grad_norm": 0.0006623083609156311, | |
| "learning_rate": 1.1841153431149715e-06, | |
| "loss": 0.0, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.05106138437986374, | |
| "learning_rate": 1.160693343721163e-06, | |
| "loss": 0.0, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 4.010418023180102, | |
| "grad_norm": 0.13067518174648285, | |
| "learning_rate": 1.137474855861751e-06, | |
| "loss": 0.0001, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 4.020836046360203, | |
| "grad_norm": 0.024557417258620262, | |
| "learning_rate": 1.1144611102876251e-06, | |
| "loss": 0.0, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 4.031254069540305, | |
| "grad_norm": 0.00017925056454259902, | |
| "learning_rate": 1.0916533268968293e-06, | |
| "loss": 0.0, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 4.041672092720407, | |
| "grad_norm": 0.0006559378234669566, | |
| "learning_rate": 1.0690527146698915e-06, | |
| "loss": 0.0001, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 4.052090115900508, | |
| "grad_norm": 0.0031467515509575605, | |
| "learning_rate": 1.046660471605744e-06, | |
| "loss": 0.0, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 4.0625081390806095, | |
| "grad_norm": 1.6517557241968461e-06, | |
| "learning_rate": 1.0244777846582172e-06, | |
| "loss": 0.0001, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 4.072926162260711, | |
| "grad_norm": 3.2786699648568174e-06, | |
| "learning_rate": 1.0025058296731254e-06, | |
| "loss": 0.0004, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 4.083344185440812, | |
| "grad_norm": 2.993703674292192e-05, | |
| "learning_rate": 9.807457713259354e-07, | |
| "loss": 0.0025, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 4.093762208620914, | |
| "grad_norm": 0.00212846789509058, | |
| "learning_rate": 9.591987630600313e-07, | |
| "loss": 0.0002, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 4.104180231801016, | |
| "grad_norm": 0.0008644104236736894, | |
| "learning_rate": 9.37865947025573e-07, | |
| "loss": 0.0, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 4.114598254981117, | |
| "grad_norm": 4.824515417567454e-05, | |
| "learning_rate": 9.167484540189558e-07, | |
| "loss": 0.0, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 4.125016278161219, | |
| "grad_norm": 0.010090484283864498, | |
| "learning_rate": 8.958474034228676e-07, | |
| "loss": 0.0002, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 4.135434301341321, | |
| "grad_norm": 0.0014343432849273086, | |
| "learning_rate": 8.751639031469522e-07, | |
| "loss": 0.0001, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 4.145852324521422, | |
| "grad_norm": 0.0008025519782677293, | |
| "learning_rate": 8.546990495690893e-07, | |
| "loss": 0.0, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 4.156270347701524, | |
| "grad_norm": 0.0038316240534186363, | |
| "learning_rate": 8.344539274772657e-07, | |
| "loss": 0.0, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 4.166688370881626, | |
| "grad_norm": 0.0011604970786720514, | |
| "learning_rate": 8.144296100120886e-07, | |
| "loss": 0.0001, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.177106394061727, | |
| "grad_norm": 0.0030226227827370167, | |
| "learning_rate": 7.946271586098847e-07, | |
| "loss": 0.0001, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 4.1875244172418284, | |
| "grad_norm": 0.024601083248853683, | |
| "learning_rate": 7.750476229464543e-07, | |
| "loss": 0.0, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 4.19794244042193, | |
| "grad_norm": 0.00018696175538934767, | |
| "learning_rate": 7.556920408814133e-07, | |
| "loss": 0.0, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 4.208360463602031, | |
| "grad_norm": 2.6345056539867073e-05, | |
| "learning_rate": 7.365614384031944e-07, | |
| "loss": 0.0002, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 4.218778486782133, | |
| "grad_norm": 0.00012209890701342374, | |
| "learning_rate": 7.176568295746467e-07, | |
| "loss": 0.0, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 4.229196509962235, | |
| "grad_norm": 0.0035056450869888067, | |
| "learning_rate": 6.989792164793003e-07, | |
| "loss": 0.0, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 4.239614533142336, | |
| "grad_norm": 0.00014033827756065875, | |
| "learning_rate": 6.805295891682323e-07, | |
| "loss": 0.0, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 4.250032556322438, | |
| "grad_norm": 8.16184274299303e-06, | |
| "learning_rate": 6.62308925607597e-07, | |
| "loss": 0.0, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 4.26045057950254, | |
| "grad_norm": 0.0005164833273738623, | |
| "learning_rate": 6.443181916267826e-07, | |
| "loss": 0.0, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 4.270868602682641, | |
| "grad_norm": 0.01795101910829544, | |
| "learning_rate": 6.26558340867216e-07, | |
| "loss": 0.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 4.281286625862743, | |
| "grad_norm": 0.01263391226530075, | |
| "learning_rate": 6.090303147318122e-07, | |
| "loss": 0.0, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 4.291704649042844, | |
| "grad_norm": 0.0007271830691024661, | |
| "learning_rate": 5.917350423350731e-07, | |
| "loss": 0.0, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 4.3021226722229455, | |
| "grad_norm": 5.166072696738411e-06, | |
| "learning_rate": 5.74673440453839e-07, | |
| "loss": 0.0001, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 4.312540695403047, | |
| "grad_norm": 0.005551379173994064, | |
| "learning_rate": 5.578464134786916e-07, | |
| "loss": 0.0, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 4.322958718583148, | |
| "grad_norm": 0.0028216512873768806, | |
| "learning_rate": 5.412548533660133e-07, | |
| "loss": 0.0, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 4.33337674176325, | |
| "grad_norm": 0.0007700317073613405, | |
| "learning_rate": 5.248996395907091e-07, | |
| "loss": 0.0, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 4.343794764943352, | |
| "grad_norm": 0.01867193728685379, | |
| "learning_rate": 5.08781639099587e-07, | |
| "loss": 0.0, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 4.354212788123453, | |
| "grad_norm": 0.0008056411170400679, | |
| "learning_rate": 4.92901706265404e-07, | |
| "loss": 0.0, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 4.364630811303555, | |
| "grad_norm": 0.008770433254539967, | |
| "learning_rate": 4.772606828415772e-07, | |
| "loss": 0.001, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 4.375048834483657, | |
| "grad_norm": 0.005740197841078043, | |
| "learning_rate": 4.6185939791756397e-07, | |
| "loss": 0.0, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.385466857663758, | |
| "grad_norm": 0.03286215290427208, | |
| "learning_rate": 4.46698667874918e-07, | |
| "loss": 0.0025, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 4.39588488084386, | |
| "grad_norm": 0.000286923284875229, | |
| "learning_rate": 4.3177929634400926e-07, | |
| "loss": 0.0, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 4.406302904023962, | |
| "grad_norm": 0.00014024133270140737, | |
| "learning_rate": 4.1710207416143066e-07, | |
| "loss": 0.0, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 4.416720927204063, | |
| "grad_norm": 0.004977494943886995, | |
| "learning_rate": 4.026677793280748e-07, | |
| "loss": 0.0001, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 4.4271389503841645, | |
| "grad_norm": 6.539112655445933e-05, | |
| "learning_rate": 3.884771769678952e-07, | |
| "loss": 0.0002, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 4.437556973564266, | |
| "grad_norm": 0.009294900111854076, | |
| "learning_rate": 3.745310192873508e-07, | |
| "loss": 0.0, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 4.447974996744367, | |
| "grad_norm": 5.570728262682678e-06, | |
| "learning_rate": 3.6083004553552905e-07, | |
| "loss": 0.0001, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 4.458393019924469, | |
| "grad_norm": 0.0003835258539766073, | |
| "learning_rate": 3.4737498196496686e-07, | |
| "loss": 0.0, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 4.468811043104571, | |
| "grad_norm": 2.6702812405687837e-08, | |
| "learning_rate": 3.3416654179314665e-07, | |
| "loss": 0.0, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 4.479229066284672, | |
| "grad_norm": 0.00016556118498556316, | |
| "learning_rate": 3.212054251646962e-07, | |
| "loss": 0.0, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.489647089464774, | |
| "grad_norm": 3.6678444303106517e-05, | |
| "learning_rate": 3.0849231911427134e-07, | |
| "loss": 0.0, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 4.500065112644876, | |
| "grad_norm": 0.00038032321026548743, | |
| "learning_rate": 2.960278975301428e-07, | |
| "loss": 0.0, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 4.510483135824977, | |
| "grad_norm": 0.00020073131599929184, | |
| "learning_rate": 2.8381282111847097e-07, | |
| "loss": 0.0, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 4.520901159005079, | |
| "grad_norm": 4.448674735613167e-05, | |
| "learning_rate": 2.718477373682865e-07, | |
| "loss": 0.0001, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 4.531319182185181, | |
| "grad_norm": 0.17879758775234222, | |
| "learning_rate": 2.601332805171669e-07, | |
| "loss": 0.0001, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 4.541737205365282, | |
| "grad_norm": 0.0016016721492633224, | |
| "learning_rate": 2.4867007151761766e-07, | |
| "loss": 0.0002, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 4.5521552285453835, | |
| "grad_norm": 0.011347103863954544, | |
| "learning_rate": 2.3745871800415765e-07, | |
| "loss": 0.0049, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 4.562573251725485, | |
| "grad_norm": 0.000959846016485244, | |
| "learning_rate": 2.2649981426110967e-07, | |
| "loss": 0.0002, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 4.572991274905586, | |
| "grad_norm": 0.00030386244179680943, | |
| "learning_rate": 2.1579394119109863e-07, | |
| "loss": 0.0, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 4.583409298085688, | |
| "grad_norm": 0.000289548363070935, | |
| "learning_rate": 2.053416662842611e-07, | |
| "loss": 0.014, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.59382732126579, | |
| "grad_norm": 0.009739086031913757, | |
| "learning_rate": 1.9514354358816102e-07, | |
| "loss": 0.0005, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 4.604245344445891, | |
| "grad_norm": 0.0015085875056684017, | |
| "learning_rate": 1.8520011367842538e-07, | |
| "loss": 0.0, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 4.614663367625993, | |
| "grad_norm": 8.586205694882665e-06, | |
| "learning_rate": 1.7551190363008453e-07, | |
| "loss": 0.0001, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 4.625081390806095, | |
| "grad_norm": 5.23238668392878e-07, | |
| "learning_rate": 1.6607942698963875e-07, | |
| "loss": 0.0, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 4.635499413986196, | |
| "grad_norm": 0.006870610639452934, | |
| "learning_rate": 1.5690318374783243e-07, | |
| "loss": 0.0, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 4.645917437166298, | |
| "grad_norm": 0.005947893485426903, | |
| "learning_rate": 1.4798366031315292e-07, | |
| "loss": 0.0026, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 4.6563354603464, | |
| "grad_norm": 1.6190052747333539e-06, | |
| "learning_rate": 1.393213294860457e-07, | |
| "loss": 0.0, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 4.6667534835265005, | |
| "grad_norm": 0.11864887177944183, | |
| "learning_rate": 1.3091665043385383e-07, | |
| "loss": 0.0001, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 4.677171506706602, | |
| "grad_norm": 0.002785899443551898, | |
| "learning_rate": 1.2277006866648122e-07, | |
| "loss": 0.0003, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 4.687589529886704, | |
| "grad_norm": 2.433152758385404e-06, | |
| "learning_rate": 1.1488201601276894e-07, | |
| "loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.698007553066805, | |
| "grad_norm": 0.0006944002816453576, | |
| "learning_rate": 1.0725291059761611e-07, | |
| "loss": 0.0001, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 4.708425576246907, | |
| "grad_norm": 4.231273487675935e-05, | |
| "learning_rate": 9.98831568198061e-08, | |
| "loss": 0.0, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 4.718843599427009, | |
| "grad_norm": 1.5037702496556449e-06, | |
| "learning_rate": 9.277314533057913e-08, | |
| "loss": 0.0, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 4.72926162260711, | |
| "grad_norm": 9.992829745897325e-07, | |
| "learning_rate": 8.592325301291782e-08, | |
| "loss": 0.0, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 4.739679645787212, | |
| "grad_norm": 0.0003942627226933837, | |
| "learning_rate": 7.933384296157365e-08, | |
| "loss": 0.0, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 4.750097668967314, | |
| "grad_norm": 0.09215450286865234, | |
| "learning_rate": 7.300526446381906e-08, | |
| "loss": 0.0002, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 4.760515692147415, | |
| "grad_norm": 2.6581383281154558e-05, | |
| "learning_rate": 6.693785298093336e-08, | |
| "loss": 0.0, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 4.770933715327517, | |
| "grad_norm": 9.696443470375016e-08, | |
| "learning_rate": 6.113193013041918e-08, | |
| "loss": 0.0, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 4.7813517385076185, | |
| "grad_norm": 0.0001541363453725353, | |
| "learning_rate": 5.558780366895611e-08, | |
| "loss": 0.0, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 4.7917697616877195, | |
| "grad_norm": 0.001118164393119514, | |
| "learning_rate": 5.0305767476087066e-08, | |
| "loss": 0.0001, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.802187784867821, | |
| "grad_norm": 5.372874511522241e-06, | |
| "learning_rate": 4.528610153863855e-08, | |
| "loss": 0.0001, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 4.812605808047923, | |
| "grad_norm": 4.5697615860262886e-05, | |
| "learning_rate": 4.052907193588251e-08, | |
| "loss": 0.0, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 4.823023831228024, | |
| "grad_norm": 1.0059036867460236e-05, | |
| "learning_rate": 3.60349308254293e-08, | |
| "loss": 0.0001, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 4.833441854408126, | |
| "grad_norm": 0.00015976910071913153, | |
| "learning_rate": 3.1803916429863355e-08, | |
| "loss": 0.0, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 4.843859877588228, | |
| "grad_norm": 1.2175183296203613, | |
| "learning_rate": 2.7836253024114412e-08, | |
| "loss": 0.0003, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 4.854277900768329, | |
| "grad_norm": 0.0008595722611062229, | |
| "learning_rate": 2.4132150923570353e-08, | |
| "loss": 0.0003, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 4.864695923948431, | |
| "grad_norm": 1.9864066871377872e-06, | |
| "learning_rate": 2.0691806472928344e-08, | |
| "loss": 0.0001, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 4.875113947128533, | |
| "grad_norm": 1.0369881238148082e-06, | |
| "learning_rate": 1.7515402035787053e-08, | |
| "loss": 0.0, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 4.885531970308634, | |
| "grad_norm": 0.08182670176029205, | |
| "learning_rate": 1.4603105984979382e-08, | |
| "loss": 0.0001, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 4.895949993488736, | |
| "grad_norm": 1.3140484043105971e-06, | |
| "learning_rate": 1.1955072693649594e-08, | |
| "loss": 0.0, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.9063680166688375, | |
| "grad_norm": 0.008761771954596043, | |
| "learning_rate": 9.571442527068209e-09, | |
| "loss": 0.0, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 4.9167860398489385, | |
| "grad_norm": 9.459521243115887e-05, | |
| "learning_rate": 7.452341835192388e-09, | |
| "loss": 0.0, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 4.92720406302904, | |
| "grad_norm": 0.00031195214251056314, | |
| "learning_rate": 5.597882945969635e-09, | |
| "loss": 0.0001, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 4.937622086209142, | |
| "grad_norm": 0.00505842175334692, | |
| "learning_rate": 4.008164159380879e-09, | |
| "loss": 0.0001, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 4.948040109389243, | |
| "grad_norm": 0.028236793354153633, | |
| "learning_rate": 2.6832697422324307e-09, | |
| "loss": 0.0002, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.958458132569345, | |
| "grad_norm": 3.2701405871193856e-05, | |
| "learning_rate": 1.623269923688442e-09, | |
| "loss": 0.0001, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 4.968876155749447, | |
| "grad_norm": 0.0009896554984152317, | |
| "learning_rate": 8.282208915466605e-10, | |
| "loss": 0.0001, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 4.979294178929548, | |
| "grad_norm": 4.405805157148279e-06, | |
| "learning_rate": 2.981647892635886e-10, | |
| "loss": 0.0, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 4.98971220210965, | |
| "grad_norm": 2.589948280729004e-06, | |
| "learning_rate": 3.312971371627338e-11, | |
| "loss": 0.0001, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 4.9949212136997, | |
| "step": 4795, | |
| "total_flos": 0.0, | |
| "train_loss": 0.03522291941339332, | |
| "train_runtime": 16429.2929, | |
| "train_samples_per_second": 18.696, | |
| "train_steps_per_second": 0.292 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4795, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 1, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |